diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,140042 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1315.7894736842106, + "eval_steps": 500, + "global_step": 200000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06578947368421052, + "grad_norm": 165.546875, + "learning_rate": 0.0001, + "loss": 21.3446, + "step": 10 + }, + { + "epoch": 0.13157894736842105, + "grad_norm": 92.7155990600586, + "learning_rate": 0.0001, + "loss": 4.7673, + "step": 20 + }, + { + "epoch": 0.19736842105263158, + "grad_norm": 64.37399291992188, + "learning_rate": 0.0001, + "loss": 3.561, + "step": 30 + }, + { + "epoch": 0.2631578947368421, + "grad_norm": 55.39424514770508, + "learning_rate": 0.0001, + "loss": 3.1393, + "step": 40 + }, + { + "epoch": 0.32894736842105265, + "grad_norm": 55.13695526123047, + "learning_rate": 0.0001, + "loss": 2.8728, + "step": 50 + }, + { + "epoch": 0.39473684210526316, + "grad_norm": 51.248966217041016, + "learning_rate": 0.0001, + "loss": 2.6172, + "step": 60 + }, + { + "epoch": 0.4605263157894737, + "grad_norm": 39.10142135620117, + "learning_rate": 0.0001, + "loss": 2.415, + "step": 70 + }, + { + "epoch": 0.5263157894736842, + "grad_norm": 47.69247055053711, + "learning_rate": 0.0001, + "loss": 2.2343, + "step": 80 + }, + { + "epoch": 0.5921052631578947, + "grad_norm": 49.21741485595703, + "learning_rate": 0.0001, + "loss": 2.0767, + "step": 90 + }, + { + "epoch": 0.6578947368421053, + "grad_norm": 44.604793548583984, + "learning_rate": 0.0001, + "loss": 2.0309, + "step": 100 + }, + { + "epoch": 0.7236842105263158, + "grad_norm": 51.46554183959961, + "learning_rate": 0.0001, + "loss": 1.8886, + "step": 110 + }, + { + "epoch": 0.7894736842105263, + "grad_norm": 46.2479133605957, + "learning_rate": 0.0001, + "loss": 1.7757, + "step": 120 + }, + { + "epoch": 0.8552631578947368, + "grad_norm": 49.21684646606445, + "learning_rate": 0.0001, + "loss": 1.6854, + "step": 130 + }, + { + "epoch": 0.9210526315789473, + "grad_norm": 49.37424087524414, + "learning_rate": 0.0001, + "loss": 1.614, + "step": 140 + }, + { + "epoch": 0.9868421052631579, + "grad_norm": 40.79842758178711, + "learning_rate": 0.0001, + "loss": 1.494, + "step": 150 + }, + { + "epoch": 1.0526315789473684, + "grad_norm": 43.07915115356445, + "learning_rate": 0.0001, + "loss": 1.4474, + "step": 160 + }, + { + "epoch": 1.118421052631579, + "grad_norm": 37.799560546875, + "learning_rate": 0.0001, + "loss": 1.3618, + "step": 170 + }, + { + "epoch": 1.1842105263157894, + "grad_norm": 37.14927291870117, + "learning_rate": 0.0001, + "loss": 1.3081, + "step": 180 + }, + { + "epoch": 1.25, + "grad_norm": 35.22480010986328, + "learning_rate": 0.0001, + "loss": 1.2341, + "step": 190 + }, + { + "epoch": 1.3157894736842106, + "grad_norm": 33.31498718261719, + "learning_rate": 0.0001, + "loss": 1.169, + "step": 200 + }, + { + "epoch": 1.381578947368421, + "grad_norm": 27.80243492126465, + "learning_rate": 0.0001, + "loss": 1.0758, + "step": 210 + }, + { + "epoch": 1.4473684210526316, + "grad_norm": 35.85008239746094, + "learning_rate": 0.0001, + "loss": 1.0106, + "step": 220 + }, + { + "epoch": 1.513157894736842, + "grad_norm": 29.738868713378906, + "learning_rate": 0.0001, + "loss": 0.9601, + "step": 230 + }, + { + "epoch": 1.5789473684210527, + "grad_norm": 29.869279861450195, + "learning_rate": 0.0001, + "loss": 0.9334, + "step": 240 + }, + { + "epoch": 1.6447368421052633, + "grad_norm": 23.091772079467773, + "learning_rate": 0.0001, + "loss": 0.8842, + "step": 250 + }, + { + "epoch": 1.7105263157894737, + "grad_norm": 31.321971893310547, + "learning_rate": 0.0001, + "loss": 0.8304, + "step": 260 + }, + { + "epoch": 1.776315789473684, + "grad_norm": 31.007795333862305, + "learning_rate": 0.0001, + "loss": 0.7835, + "step": 270 + }, + { + "epoch": 1.8421052631578947, + "grad_norm": 23.761444091796875, + "learning_rate": 0.0001, + "loss": 0.741, + "step": 280 + }, + { + "epoch": 1.9078947368421053, + "grad_norm": 29.322738647460938, + "learning_rate": 0.0001, + "loss": 0.7034, + "step": 290 + }, + { + "epoch": 1.973684210526316, + "grad_norm": 24.82952308654785, + "learning_rate": 0.0001, + "loss": 0.6807, + "step": 300 + }, + { + "epoch": 2.039473684210526, + "grad_norm": 20.884836196899414, + "learning_rate": 0.0001, + "loss": 0.6234, + "step": 310 + }, + { + "epoch": 2.1052631578947367, + "grad_norm": 22.192235946655273, + "learning_rate": 0.0001, + "loss": 0.5987, + "step": 320 + }, + { + "epoch": 2.1710526315789473, + "grad_norm": 22.435510635375977, + "learning_rate": 0.0001, + "loss": 0.5711, + "step": 330 + }, + { + "epoch": 2.236842105263158, + "grad_norm": 20.451364517211914, + "learning_rate": 0.0001, + "loss": 0.5399, + "step": 340 + }, + { + "epoch": 2.3026315789473686, + "grad_norm": 22.708984375, + "learning_rate": 0.0001, + "loss": 0.5199, + "step": 350 + }, + { + "epoch": 2.3684210526315788, + "grad_norm": 20.40320587158203, + "learning_rate": 0.0001, + "loss": 0.4973, + "step": 360 + }, + { + "epoch": 2.4342105263157894, + "grad_norm": 17.256328582763672, + "learning_rate": 0.0001, + "loss": 0.4806, + "step": 370 + }, + { + "epoch": 2.5, + "grad_norm": 19.842121124267578, + "learning_rate": 0.0001, + "loss": 0.4759, + "step": 380 + }, + { + "epoch": 2.5657894736842106, + "grad_norm": 17.68121337890625, + "learning_rate": 0.0001, + "loss": 0.4528, + "step": 390 + }, + { + "epoch": 2.6315789473684212, + "grad_norm": 19.98098373413086, + "learning_rate": 0.0001, + "loss": 0.4489, + "step": 400 + }, + { + "epoch": 2.6973684210526314, + "grad_norm": 18.062952041625977, + "learning_rate": 0.0001, + "loss": 0.4187, + "step": 410 + }, + { + "epoch": 2.763157894736842, + "grad_norm": 15.641585350036621, + "learning_rate": 0.0001, + "loss": 0.4068, + "step": 420 + }, + { + "epoch": 2.8289473684210527, + "grad_norm": 16.796161651611328, + "learning_rate": 0.0001, + "loss": 0.3884, + "step": 430 + }, + { + "epoch": 2.8947368421052633, + "grad_norm": 15.872922897338867, + "learning_rate": 0.0001, + "loss": 0.3997, + "step": 440 + }, + { + "epoch": 2.9605263157894735, + "grad_norm": 15.037050247192383, + "learning_rate": 0.0001, + "loss": 0.3687, + "step": 450 + }, + { + "epoch": 3.026315789473684, + "grad_norm": 18.485353469848633, + "learning_rate": 0.0001, + "loss": 0.3606, + "step": 460 + }, + { + "epoch": 3.0921052631578947, + "grad_norm": 17.94466209411621, + "learning_rate": 0.0001, + "loss": 0.3568, + "step": 470 + }, + { + "epoch": 3.1578947368421053, + "grad_norm": 15.609881401062012, + "learning_rate": 0.0001, + "loss": 0.3631, + "step": 480 + }, + { + "epoch": 3.223684210526316, + "grad_norm": 15.842222213745117, + "learning_rate": 0.0001, + "loss": 0.3499, + "step": 490 + }, + { + "epoch": 3.2894736842105265, + "grad_norm": 14.633220672607422, + "learning_rate": 0.0001, + "loss": 0.3442, + "step": 500 + }, + { + "epoch": 3.3552631578947367, + "grad_norm": 16.64904022216797, + "learning_rate": 0.0001, + "loss": 0.3311, + "step": 510 + }, + { + "epoch": 3.4210526315789473, + "grad_norm": 15.60223388671875, + "learning_rate": 0.0001, + "loss": 0.3356, + "step": 520 + }, + { + "epoch": 3.486842105263158, + "grad_norm": 14.84737491607666, + "learning_rate": 0.0001, + "loss": 0.3172, + "step": 530 + }, + { + "epoch": 3.5526315789473686, + "grad_norm": 15.952341079711914, + "learning_rate": 0.0001, + "loss": 0.3196, + "step": 540 + }, + { + "epoch": 3.6184210526315788, + "grad_norm": 14.867337226867676, + "learning_rate": 0.0001, + "loss": 0.3137, + "step": 550 + }, + { + "epoch": 3.6842105263157894, + "grad_norm": 12.948201179504395, + "learning_rate": 0.0001, + "loss": 0.2949, + "step": 560 + }, + { + "epoch": 3.75, + "grad_norm": 13.009255409240723, + "learning_rate": 0.0001, + "loss": 0.2955, + "step": 570 + }, + { + "epoch": 3.8157894736842106, + "grad_norm": 13.705317497253418, + "learning_rate": 0.0001, + "loss": 0.2905, + "step": 580 + }, + { + "epoch": 3.8815789473684212, + "grad_norm": 17.22597885131836, + "learning_rate": 0.0001, + "loss": 0.2974, + "step": 590 + }, + { + "epoch": 3.9473684210526314, + "grad_norm": 14.919693946838379, + "learning_rate": 0.0001, + "loss": 0.2898, + "step": 600 + }, + { + "epoch": 4.0131578947368425, + "grad_norm": 18.780914306640625, + "learning_rate": 0.0001, + "loss": 0.2816, + "step": 610 + }, + { + "epoch": 4.078947368421052, + "grad_norm": 16.027746200561523, + "learning_rate": 0.0001, + "loss": 0.2761, + "step": 620 + }, + { + "epoch": 4.144736842105263, + "grad_norm": 13.083127975463867, + "learning_rate": 0.0001, + "loss": 0.2688, + "step": 630 + }, + { + "epoch": 4.2105263157894735, + "grad_norm": 13.55245590209961, + "learning_rate": 0.0001, + "loss": 0.2667, + "step": 640 + }, + { + "epoch": 4.276315789473684, + "grad_norm": 11.151076316833496, + "learning_rate": 0.0001, + "loss": 0.2677, + "step": 650 + }, + { + "epoch": 4.342105263157895, + "grad_norm": 12.883306503295898, + "learning_rate": 0.0001, + "loss": 0.2619, + "step": 660 + }, + { + "epoch": 4.407894736842105, + "grad_norm": 13.993839263916016, + "learning_rate": 0.0001, + "loss": 0.2626, + "step": 670 + }, + { + "epoch": 4.473684210526316, + "grad_norm": 13.596793174743652, + "learning_rate": 0.0001, + "loss": 0.253, + "step": 680 + }, + { + "epoch": 4.5394736842105265, + "grad_norm": 12.375000953674316, + "learning_rate": 0.0001, + "loss": 0.2449, + "step": 690 + }, + { + "epoch": 4.605263157894737, + "grad_norm": 12.333367347717285, + "learning_rate": 0.0001, + "loss": 0.2449, + "step": 700 + }, + { + "epoch": 4.671052631578947, + "grad_norm": 14.13685131072998, + "learning_rate": 0.0001, + "loss": 0.2363, + "step": 710 + }, + { + "epoch": 4.7368421052631575, + "grad_norm": 12.23291015625, + "learning_rate": 0.0001, + "loss": 0.2374, + "step": 720 + }, + { + "epoch": 4.802631578947368, + "grad_norm": 11.502880096435547, + "learning_rate": 0.0001, + "loss": 0.2289, + "step": 730 + }, + { + "epoch": 4.868421052631579, + "grad_norm": 10.85533332824707, + "learning_rate": 0.0001, + "loss": 0.2266, + "step": 740 + }, + { + "epoch": 4.934210526315789, + "grad_norm": 12.013697624206543, + "learning_rate": 0.0001, + "loss": 0.2188, + "step": 750 + }, + { + "epoch": 5.0, + "grad_norm": 13.170636177062988, + "learning_rate": 0.0001, + "loss": 0.2321, + "step": 760 + }, + { + "epoch": 5.065789473684211, + "grad_norm": 12.095258712768555, + "learning_rate": 0.0001, + "loss": 0.2229, + "step": 770 + }, + { + "epoch": 5.131578947368421, + "grad_norm": 10.180475234985352, + "learning_rate": 0.0001, + "loss": 0.2231, + "step": 780 + }, + { + "epoch": 5.197368421052632, + "grad_norm": 13.165190696716309, + "learning_rate": 0.0001, + "loss": 0.223, + "step": 790 + }, + { + "epoch": 5.2631578947368425, + "grad_norm": 10.063823699951172, + "learning_rate": 0.0001, + "loss": 0.2118, + "step": 800 + }, + { + "epoch": 5.328947368421053, + "grad_norm": 12.834861755371094, + "learning_rate": 0.0001, + "loss": 0.217, + "step": 810 + }, + { + "epoch": 5.394736842105263, + "grad_norm": 12.119063377380371, + "learning_rate": 0.0001, + "loss": 0.2053, + "step": 820 + }, + { + "epoch": 5.4605263157894735, + "grad_norm": 12.132641792297363, + "learning_rate": 0.0001, + "loss": 0.206, + "step": 830 + }, + { + "epoch": 5.526315789473684, + "grad_norm": 11.347237586975098, + "learning_rate": 0.0001, + "loss": 0.201, + "step": 840 + }, + { + "epoch": 5.592105263157895, + "grad_norm": 11.609687805175781, + "learning_rate": 0.0001, + "loss": 0.2039, + "step": 850 + }, + { + "epoch": 5.657894736842105, + "grad_norm": 10.659272193908691, + "learning_rate": 0.0001, + "loss": 0.203, + "step": 860 + }, + { + "epoch": 5.723684210526316, + "grad_norm": 12.028623580932617, + "learning_rate": 0.0001, + "loss": 0.21, + "step": 870 + }, + { + "epoch": 5.7894736842105265, + "grad_norm": 10.963968276977539, + "learning_rate": 0.0001, + "loss": 0.2019, + "step": 880 + }, + { + "epoch": 5.855263157894737, + "grad_norm": 10.573838233947754, + "learning_rate": 0.0001, + "loss": 0.206, + "step": 890 + }, + { + "epoch": 5.921052631578947, + "grad_norm": 10.971415519714355, + "learning_rate": 0.0001, + "loss": 0.1975, + "step": 900 + }, + { + "epoch": 5.9868421052631575, + "grad_norm": 12.391592025756836, + "learning_rate": 0.0001, + "loss": 0.2002, + "step": 910 + }, + { + "epoch": 6.052631578947368, + "grad_norm": 11.677957534790039, + "learning_rate": 0.0001, + "loss": 0.1949, + "step": 920 + }, + { + "epoch": 6.118421052631579, + "grad_norm": 9.757022857666016, + "learning_rate": 0.0001, + "loss": 0.1887, + "step": 930 + }, + { + "epoch": 6.184210526315789, + "grad_norm": 10.96141529083252, + "learning_rate": 0.0001, + "loss": 0.1888, + "step": 940 + }, + { + "epoch": 6.25, + "grad_norm": 11.207174301147461, + "learning_rate": 0.0001, + "loss": 0.1881, + "step": 950 + }, + { + "epoch": 6.315789473684211, + "grad_norm": 9.526222229003906, + "learning_rate": 0.0001, + "loss": 0.19, + "step": 960 + }, + { + "epoch": 6.381578947368421, + "grad_norm": 10.266683578491211, + "learning_rate": 0.0001, + "loss": 0.18, + "step": 970 + }, + { + "epoch": 6.447368421052632, + "grad_norm": 10.702817916870117, + "learning_rate": 0.0001, + "loss": 0.1877, + "step": 980 + }, + { + "epoch": 6.5131578947368425, + "grad_norm": 9.538935661315918, + "learning_rate": 0.0001, + "loss": 0.1879, + "step": 990 + }, + { + "epoch": 6.578947368421053, + "grad_norm": 9.56125545501709, + "learning_rate": 0.0001, + "loss": 0.1785, + "step": 1000 + }, + { + "epoch": 6.644736842105263, + "grad_norm": 9.874727249145508, + "learning_rate": 0.0001, + "loss": 0.1856, + "step": 1010 + }, + { + "epoch": 6.7105263157894735, + "grad_norm": 9.713470458984375, + "learning_rate": 0.0001, + "loss": 0.1788, + "step": 1020 + }, + { + "epoch": 6.776315789473684, + "grad_norm": 9.037981986999512, + "learning_rate": 0.0001, + "loss": 0.1694, + "step": 1030 + }, + { + "epoch": 6.842105263157895, + "grad_norm": 10.102965354919434, + "learning_rate": 0.0001, + "loss": 0.1689, + "step": 1040 + }, + { + "epoch": 6.907894736842105, + "grad_norm": 9.202942848205566, + "learning_rate": 0.0001, + "loss": 0.18, + "step": 1050 + }, + { + "epoch": 6.973684210526316, + "grad_norm": 10.504417419433594, + "learning_rate": 0.0001, + "loss": 0.1721, + "step": 1060 + }, + { + "epoch": 7.0394736842105265, + "grad_norm": 8.70566463470459, + "learning_rate": 0.0001, + "loss": 0.1724, + "step": 1070 + }, + { + "epoch": 7.105263157894737, + "grad_norm": 10.730690956115723, + "learning_rate": 0.0001, + "loss": 0.1737, + "step": 1080 + }, + { + "epoch": 7.171052631578948, + "grad_norm": 9.544865608215332, + "learning_rate": 0.0001, + "loss": 0.1752, + "step": 1090 + }, + { + "epoch": 7.2368421052631575, + "grad_norm": 10.796236038208008, + "learning_rate": 0.0001, + "loss": 0.174, + "step": 1100 + }, + { + "epoch": 7.302631578947368, + "grad_norm": 9.174856185913086, + "learning_rate": 0.0001, + "loss": 0.1705, + "step": 1110 + }, + { + "epoch": 7.368421052631579, + "grad_norm": 10.11459732055664, + "learning_rate": 0.0001, + "loss": 0.1612, + "step": 1120 + }, + { + "epoch": 7.434210526315789, + "grad_norm": 10.245781898498535, + "learning_rate": 0.0001, + "loss": 0.1604, + "step": 1130 + }, + { + "epoch": 7.5, + "grad_norm": 9.092036247253418, + "learning_rate": 0.0001, + "loss": 0.1582, + "step": 1140 + }, + { + "epoch": 7.565789473684211, + "grad_norm": 8.524528503417969, + "learning_rate": 0.0001, + "loss": 0.1528, + "step": 1150 + }, + { + "epoch": 7.631578947368421, + "grad_norm": 9.26682186126709, + "learning_rate": 0.0001, + "loss": 0.1543, + "step": 1160 + }, + { + "epoch": 7.697368421052632, + "grad_norm": 10.105231285095215, + "learning_rate": 0.0001, + "loss": 0.1629, + "step": 1170 + }, + { + "epoch": 7.7631578947368425, + "grad_norm": 9.100499153137207, + "learning_rate": 0.0001, + "loss": 0.1575, + "step": 1180 + }, + { + "epoch": 7.828947368421053, + "grad_norm": 7.819246768951416, + "learning_rate": 0.0001, + "loss": 0.1586, + "step": 1190 + }, + { + "epoch": 7.894736842105263, + "grad_norm": 8.291773796081543, + "learning_rate": 0.0001, + "loss": 0.1518, + "step": 1200 + }, + { + "epoch": 7.9605263157894735, + "grad_norm": 8.024109840393066, + "learning_rate": 0.0001, + "loss": 0.1551, + "step": 1210 + }, + { + "epoch": 8.026315789473685, + "grad_norm": 8.904583930969238, + "learning_rate": 0.0001, + "loss": 0.1506, + "step": 1220 + }, + { + "epoch": 8.092105263157896, + "grad_norm": 7.31992769241333, + "learning_rate": 0.0001, + "loss": 0.1453, + "step": 1230 + }, + { + "epoch": 8.157894736842104, + "grad_norm": 9.026426315307617, + "learning_rate": 0.0001, + "loss": 0.1496, + "step": 1240 + }, + { + "epoch": 8.223684210526315, + "grad_norm": 9.364782333374023, + "learning_rate": 0.0001, + "loss": 0.1504, + "step": 1250 + }, + { + "epoch": 8.289473684210526, + "grad_norm": 8.736329078674316, + "learning_rate": 0.0001, + "loss": 0.1445, + "step": 1260 + }, + { + "epoch": 8.355263157894736, + "grad_norm": 11.340313911437988, + "learning_rate": 0.0001, + "loss": 0.1469, + "step": 1270 + }, + { + "epoch": 8.421052631578947, + "grad_norm": 10.251652717590332, + "learning_rate": 0.0001, + "loss": 0.1559, + "step": 1280 + }, + { + "epoch": 8.486842105263158, + "grad_norm": 9.239073753356934, + "learning_rate": 0.0001, + "loss": 0.1456, + "step": 1290 + }, + { + "epoch": 8.552631578947368, + "grad_norm": 8.867827415466309, + "learning_rate": 0.0001, + "loss": 0.1507, + "step": 1300 + }, + { + "epoch": 8.618421052631579, + "grad_norm": 9.638751983642578, + "learning_rate": 0.0001, + "loss": 0.1521, + "step": 1310 + }, + { + "epoch": 8.68421052631579, + "grad_norm": 9.176225662231445, + "learning_rate": 0.0001, + "loss": 0.1474, + "step": 1320 + }, + { + "epoch": 8.75, + "grad_norm": 8.754321098327637, + "learning_rate": 0.0001, + "loss": 0.1423, + "step": 1330 + }, + { + "epoch": 8.81578947368421, + "grad_norm": 9.146409034729004, + "learning_rate": 0.0001, + "loss": 0.1397, + "step": 1340 + }, + { + "epoch": 8.881578947368421, + "grad_norm": 9.756692886352539, + "learning_rate": 0.0001, + "loss": 0.1339, + "step": 1350 + }, + { + "epoch": 8.947368421052632, + "grad_norm": 8.44552230834961, + "learning_rate": 0.0001, + "loss": 0.1433, + "step": 1360 + }, + { + "epoch": 9.013157894736842, + "grad_norm": 9.708316802978516, + "learning_rate": 0.0001, + "loss": 0.1443, + "step": 1370 + }, + { + "epoch": 9.078947368421053, + "grad_norm": 10.184338569641113, + "learning_rate": 0.0001, + "loss": 0.1397, + "step": 1380 + }, + { + "epoch": 9.144736842105264, + "grad_norm": 8.404060363769531, + "learning_rate": 0.0001, + "loss": 0.1458, + "step": 1390 + }, + { + "epoch": 9.210526315789474, + "grad_norm": 6.799143314361572, + "learning_rate": 0.0001, + "loss": 0.1391, + "step": 1400 + }, + { + "epoch": 9.276315789473685, + "grad_norm": 6.993669033050537, + "learning_rate": 0.0001, + "loss": 0.1356, + "step": 1410 + }, + { + "epoch": 9.342105263157896, + "grad_norm": 7.0708537101745605, + "learning_rate": 0.0001, + "loss": 0.1348, + "step": 1420 + }, + { + "epoch": 9.407894736842104, + "grad_norm": 7.106939792633057, + "learning_rate": 0.0001, + "loss": 0.1395, + "step": 1430 + }, + { + "epoch": 9.473684210526315, + "grad_norm": 8.091696739196777, + "learning_rate": 0.0001, + "loss": 0.1347, + "step": 1440 + }, + { + "epoch": 9.539473684210526, + "grad_norm": 7.712802410125732, + "learning_rate": 0.0001, + "loss": 0.136, + "step": 1450 + }, + { + "epoch": 9.605263157894736, + "grad_norm": 7.227226734161377, + "learning_rate": 0.0001, + "loss": 0.1302, + "step": 1460 + }, + { + "epoch": 9.671052631578947, + "grad_norm": 7.4794464111328125, + "learning_rate": 0.0001, + "loss": 0.1238, + "step": 1470 + }, + { + "epoch": 9.736842105263158, + "grad_norm": 8.05570125579834, + "learning_rate": 0.0001, + "loss": 0.134, + "step": 1480 + }, + { + "epoch": 9.802631578947368, + "grad_norm": 8.51727294921875, + "learning_rate": 0.0001, + "loss": 0.1262, + "step": 1490 + }, + { + "epoch": 9.868421052631579, + "grad_norm": 8.46773910522461, + "learning_rate": 0.0001, + "loss": 0.133, + "step": 1500 + }, + { + "epoch": 9.93421052631579, + "grad_norm": 8.314281463623047, + "learning_rate": 0.0001, + "loss": 0.1297, + "step": 1510 + }, + { + "epoch": 10.0, + "grad_norm": 8.289139747619629, + "learning_rate": 0.0001, + "loss": 0.131, + "step": 1520 + }, + { + "epoch": 10.06578947368421, + "grad_norm": 8.90967082977295, + "learning_rate": 0.0001, + "loss": 0.1232, + "step": 1530 + }, + { + "epoch": 10.131578947368421, + "grad_norm": 8.999959945678711, + "learning_rate": 0.0001, + "loss": 0.1273, + "step": 1540 + }, + { + "epoch": 10.197368421052632, + "grad_norm": 8.188413619995117, + "learning_rate": 0.0001, + "loss": 0.1263, + "step": 1550 + }, + { + "epoch": 10.263157894736842, + "grad_norm": 8.14466381072998, + "learning_rate": 0.0001, + "loss": 0.1204, + "step": 1560 + }, + { + "epoch": 10.328947368421053, + "grad_norm": 8.005374908447266, + "learning_rate": 0.0001, + "loss": 0.1232, + "step": 1570 + }, + { + "epoch": 10.394736842105264, + "grad_norm": 8.354129791259766, + "learning_rate": 0.0001, + "loss": 0.1164, + "step": 1580 + }, + { + "epoch": 10.460526315789474, + "grad_norm": 7.809772491455078, + "learning_rate": 0.0001, + "loss": 0.1256, + "step": 1590 + }, + { + "epoch": 10.526315789473685, + "grad_norm": 7.232692241668701, + "learning_rate": 0.0001, + "loss": 0.1226, + "step": 1600 + }, + { + "epoch": 10.592105263157894, + "grad_norm": 7.019472122192383, + "learning_rate": 0.0001, + "loss": 0.1184, + "step": 1610 + }, + { + "epoch": 10.657894736842106, + "grad_norm": 6.870980262756348, + "learning_rate": 0.0001, + "loss": 0.1196, + "step": 1620 + }, + { + "epoch": 10.723684210526315, + "grad_norm": 7.686590671539307, + "learning_rate": 0.0001, + "loss": 0.123, + "step": 1630 + }, + { + "epoch": 10.789473684210526, + "grad_norm": 6.786712169647217, + "learning_rate": 0.0001, + "loss": 0.119, + "step": 1640 + }, + { + "epoch": 10.855263157894736, + "grad_norm": 8.563821792602539, + "learning_rate": 0.0001, + "loss": 0.1186, + "step": 1650 + }, + { + "epoch": 10.921052631578947, + "grad_norm": 8.261466026306152, + "learning_rate": 0.0001, + "loss": 0.1246, + "step": 1660 + }, + { + "epoch": 10.986842105263158, + "grad_norm": 6.972947120666504, + "learning_rate": 0.0001, + "loss": 0.123, + "step": 1670 + }, + { + "epoch": 11.052631578947368, + "grad_norm": 8.273945808410645, + "learning_rate": 0.0001, + "loss": 0.1213, + "step": 1680 + }, + { + "epoch": 11.118421052631579, + "grad_norm": 6.411956310272217, + "learning_rate": 0.0001, + "loss": 0.1123, + "step": 1690 + }, + { + "epoch": 11.18421052631579, + "grad_norm": 7.9852166175842285, + "learning_rate": 0.0001, + "loss": 0.1125, + "step": 1700 + }, + { + "epoch": 11.25, + "grad_norm": 6.184416770935059, + "learning_rate": 0.0001, + "loss": 0.1113, + "step": 1710 + }, + { + "epoch": 11.31578947368421, + "grad_norm": 7.070769786834717, + "learning_rate": 0.0001, + "loss": 0.115, + "step": 1720 + }, + { + "epoch": 11.381578947368421, + "grad_norm": 6.914794921875, + "learning_rate": 0.0001, + "loss": 0.111, + "step": 1730 + }, + { + "epoch": 11.447368421052632, + "grad_norm": 7.74068546295166, + "learning_rate": 0.0001, + "loss": 0.113, + "step": 1740 + }, + { + "epoch": 11.513157894736842, + "grad_norm": 6.931029319763184, + "learning_rate": 0.0001, + "loss": 0.1152, + "step": 1750 + }, + { + "epoch": 11.578947368421053, + "grad_norm": 5.994091510772705, + "learning_rate": 0.0001, + "loss": 0.1119, + "step": 1760 + }, + { + "epoch": 11.644736842105264, + "grad_norm": 6.253146648406982, + "learning_rate": 0.0001, + "loss": 0.1093, + "step": 1770 + }, + { + "epoch": 11.710526315789474, + "grad_norm": 5.8790974617004395, + "learning_rate": 0.0001, + "loss": 0.1122, + "step": 1780 + }, + { + "epoch": 11.776315789473685, + "grad_norm": 6.187469959259033, + "learning_rate": 0.0001, + "loss": 0.1122, + "step": 1790 + }, + { + "epoch": 11.842105263157894, + "grad_norm": 5.537177562713623, + "learning_rate": 0.0001, + "loss": 0.1123, + "step": 1800 + }, + { + "epoch": 11.907894736842106, + "grad_norm": 7.146965503692627, + "learning_rate": 0.0001, + "loss": 0.1118, + "step": 1810 + }, + { + "epoch": 11.973684210526315, + "grad_norm": 6.597342491149902, + "learning_rate": 0.0001, + "loss": 0.1053, + "step": 1820 + }, + { + "epoch": 12.039473684210526, + "grad_norm": 6.711949825286865, + "learning_rate": 0.0001, + "loss": 0.1098, + "step": 1830 + }, + { + "epoch": 12.105263157894736, + "grad_norm": 5.853811264038086, + "learning_rate": 0.0001, + "loss": 0.1072, + "step": 1840 + }, + { + "epoch": 12.171052631578947, + "grad_norm": 5.994247913360596, + "learning_rate": 0.0001, + "loss": 0.1058, + "step": 1850 + }, + { + "epoch": 12.236842105263158, + "grad_norm": 4.701684951782227, + "learning_rate": 0.0001, + "loss": 0.1044, + "step": 1860 + }, + { + "epoch": 12.302631578947368, + "grad_norm": 6.289170742034912, + "learning_rate": 0.0001, + "loss": 0.1025, + "step": 1870 + }, + { + "epoch": 12.368421052631579, + "grad_norm": 4.761482238769531, + "learning_rate": 0.0001, + "loss": 0.1028, + "step": 1880 + }, + { + "epoch": 12.43421052631579, + "grad_norm": 7.5639448165893555, + "learning_rate": 0.0001, + "loss": 0.1028, + "step": 1890 + }, + { + "epoch": 12.5, + "grad_norm": 5.687307357788086, + "learning_rate": 0.0001, + "loss": 0.1056, + "step": 1900 + }, + { + "epoch": 12.56578947368421, + "grad_norm": 6.268471717834473, + "learning_rate": 0.0001, + "loss": 0.0987, + "step": 1910 + }, + { + "epoch": 12.631578947368421, + "grad_norm": 7.3990254402160645, + "learning_rate": 0.0001, + "loss": 0.0981, + "step": 1920 + }, + { + "epoch": 12.697368421052632, + "grad_norm": 6.721970558166504, + "learning_rate": 0.0001, + "loss": 0.1024, + "step": 1930 + }, + { + "epoch": 12.763157894736842, + "grad_norm": 6.922434329986572, + "learning_rate": 0.0001, + "loss": 0.1034, + "step": 1940 + }, + { + "epoch": 12.828947368421053, + "grad_norm": 6.5644097328186035, + "learning_rate": 0.0001, + "loss": 0.1012, + "step": 1950 + }, + { + "epoch": 12.894736842105264, + "grad_norm": 5.8850908279418945, + "learning_rate": 0.0001, + "loss": 0.1011, + "step": 1960 + }, + { + "epoch": 12.960526315789474, + "grad_norm": 7.03394889831543, + "learning_rate": 0.0001, + "loss": 0.1031, + "step": 1970 + }, + { + "epoch": 13.026315789473685, + "grad_norm": 7.228178024291992, + "learning_rate": 0.0001, + "loss": 0.1021, + "step": 1980 + }, + { + "epoch": 13.092105263157896, + "grad_norm": 6.708334922790527, + "learning_rate": 0.0001, + "loss": 0.1003, + "step": 1990 + }, + { + "epoch": 13.157894736842104, + "grad_norm": 6.730384826660156, + "learning_rate": 0.0001, + "loss": 0.1034, + "step": 2000 + }, + { + "epoch": 13.223684210526315, + "grad_norm": 6.330638408660889, + "learning_rate": 0.0001, + "loss": 0.0981, + "step": 2010 + }, + { + "epoch": 13.289473684210526, + "grad_norm": 6.243671417236328, + "learning_rate": 0.0001, + "loss": 0.1005, + "step": 2020 + }, + { + "epoch": 13.355263157894736, + "grad_norm": 7.014003276824951, + "learning_rate": 0.0001, + "loss": 0.0937, + "step": 2030 + }, + { + "epoch": 13.421052631578947, + "grad_norm": 6.188398361206055, + "learning_rate": 0.0001, + "loss": 0.0967, + "step": 2040 + }, + { + "epoch": 13.486842105263158, + "grad_norm": 6.1966938972473145, + "learning_rate": 0.0001, + "loss": 0.1026, + "step": 2050 + }, + { + "epoch": 13.552631578947368, + "grad_norm": 5.604138374328613, + "learning_rate": 0.0001, + "loss": 0.1022, + "step": 2060 + }, + { + "epoch": 13.618421052631579, + "grad_norm": 6.079825401306152, + "learning_rate": 0.0001, + "loss": 0.096, + "step": 2070 + }, + { + "epoch": 13.68421052631579, + "grad_norm": 6.047208786010742, + "learning_rate": 0.0001, + "loss": 0.0943, + "step": 2080 + }, + { + "epoch": 13.75, + "grad_norm": 6.325393199920654, + "learning_rate": 0.0001, + "loss": 0.0975, + "step": 2090 + }, + { + "epoch": 13.81578947368421, + "grad_norm": 5.832712650299072, + "learning_rate": 0.0001, + "loss": 0.0944, + "step": 2100 + }, + { + "epoch": 13.881578947368421, + "grad_norm": 5.263245105743408, + "learning_rate": 0.0001, + "loss": 0.0933, + "step": 2110 + }, + { + "epoch": 13.947368421052632, + "grad_norm": 5.7324934005737305, + "learning_rate": 0.0001, + "loss": 0.0919, + "step": 2120 + }, + { + "epoch": 14.013157894736842, + "grad_norm": 4.923053741455078, + "learning_rate": 0.0001, + "loss": 0.0895, + "step": 2130 + }, + { + "epoch": 14.078947368421053, + "grad_norm": 5.948666095733643, + "learning_rate": 0.0001, + "loss": 0.0914, + "step": 2140 + }, + { + "epoch": 14.144736842105264, + "grad_norm": 5.863275051116943, + "learning_rate": 0.0001, + "loss": 0.0898, + "step": 2150 + }, + { + "epoch": 14.210526315789474, + "grad_norm": 5.876058101654053, + "learning_rate": 0.0001, + "loss": 0.0926, + "step": 2160 + }, + { + "epoch": 14.276315789473685, + "grad_norm": 5.606838703155518, + "learning_rate": 0.0001, + "loss": 0.0927, + "step": 2170 + }, + { + "epoch": 14.342105263157896, + "grad_norm": 5.575480937957764, + "learning_rate": 0.0001, + "loss": 0.0929, + "step": 2180 + }, + { + "epoch": 14.407894736842104, + "grad_norm": 5.6978440284729, + "learning_rate": 0.0001, + "loss": 0.0933, + "step": 2190 + }, + { + "epoch": 14.473684210526315, + "grad_norm": 6.567765712738037, + "learning_rate": 0.0001, + "loss": 0.09, + "step": 2200 + }, + { + "epoch": 14.539473684210526, + "grad_norm": 6.093446254730225, + "learning_rate": 0.0001, + "loss": 0.0933, + "step": 2210 + }, + { + "epoch": 14.605263157894736, + "grad_norm": 5.184361457824707, + "learning_rate": 0.0001, + "loss": 0.0942, + "step": 2220 + }, + { + "epoch": 14.671052631578947, + "grad_norm": 5.555819511413574, + "learning_rate": 0.0001, + "loss": 0.086, + "step": 2230 + }, + { + "epoch": 14.736842105263158, + "grad_norm": 5.339320182800293, + "learning_rate": 0.0001, + "loss": 0.0918, + "step": 2240 + }, + { + "epoch": 14.802631578947368, + "grad_norm": 5.596290588378906, + "learning_rate": 0.0001, + "loss": 0.0912, + "step": 2250 + }, + { + "epoch": 14.868421052631579, + "grad_norm": 4.995247840881348, + "learning_rate": 0.0001, + "loss": 0.0881, + "step": 2260 + }, + { + "epoch": 14.93421052631579, + "grad_norm": 5.6531548500061035, + "learning_rate": 0.0001, + "loss": 0.0871, + "step": 2270 + }, + { + "epoch": 15.0, + "grad_norm": 5.662561893463135, + "learning_rate": 0.0001, + "loss": 0.0852, + "step": 2280 + }, + { + "epoch": 15.06578947368421, + "grad_norm": 5.818594932556152, + "learning_rate": 0.0001, + "loss": 0.087, + "step": 2290 + }, + { + "epoch": 15.131578947368421, + "grad_norm": 6.480447769165039, + "learning_rate": 0.0001, + "loss": 0.0856, + "step": 2300 + }, + { + "epoch": 15.197368421052632, + "grad_norm": 5.37237548828125, + "learning_rate": 0.0001, + "loss": 0.0855, + "step": 2310 + }, + { + "epoch": 15.263157894736842, + "grad_norm": 5.8733696937561035, + "learning_rate": 0.0001, + "loss": 0.0835, + "step": 2320 + }, + { + "epoch": 15.328947368421053, + "grad_norm": 6.209512710571289, + "learning_rate": 0.0001, + "loss": 0.0856, + "step": 2330 + }, + { + "epoch": 15.394736842105264, + "grad_norm": 6.127361297607422, + "learning_rate": 0.0001, + "loss": 0.0864, + "step": 2340 + }, + { + "epoch": 15.460526315789474, + "grad_norm": 5.621913433074951, + "learning_rate": 0.0001, + "loss": 0.0818, + "step": 2350 + }, + { + "epoch": 15.526315789473685, + "grad_norm": 5.90310525894165, + "learning_rate": 0.0001, + "loss": 0.0811, + "step": 2360 + }, + { + "epoch": 15.592105263157894, + "grad_norm": 5.0243239402771, + "learning_rate": 0.0001, + "loss": 0.0819, + "step": 2370 + }, + { + "epoch": 15.657894736842106, + "grad_norm": 6.154341697692871, + "learning_rate": 0.0001, + "loss": 0.0839, + "step": 2380 + }, + { + "epoch": 15.723684210526315, + "grad_norm": 5.303261756896973, + "learning_rate": 0.0001, + "loss": 0.0798, + "step": 2390 + }, + { + "epoch": 15.789473684210526, + "grad_norm": 5.788863658905029, + "learning_rate": 0.0001, + "loss": 0.0804, + "step": 2400 + }, + { + "epoch": 15.855263157894736, + "grad_norm": 5.377126693725586, + "learning_rate": 0.0001, + "loss": 0.0812, + "step": 2410 + }, + { + "epoch": 15.921052631578947, + "grad_norm": 5.7089972496032715, + "learning_rate": 0.0001, + "loss": 0.0777, + "step": 2420 + }, + { + "epoch": 15.986842105263158, + "grad_norm": 5.824652671813965, + "learning_rate": 0.0001, + "loss": 0.0834, + "step": 2430 + }, + { + "epoch": 16.05263157894737, + "grad_norm": 4.787572383880615, + "learning_rate": 0.0001, + "loss": 0.078, + "step": 2440 + }, + { + "epoch": 16.11842105263158, + "grad_norm": 6.0126800537109375, + "learning_rate": 0.0001, + "loss": 0.0794, + "step": 2450 + }, + { + "epoch": 16.18421052631579, + "grad_norm": 6.0084919929504395, + "learning_rate": 0.0001, + "loss": 0.08, + "step": 2460 + }, + { + "epoch": 16.25, + "grad_norm": 5.7310380935668945, + "learning_rate": 0.0001, + "loss": 0.0795, + "step": 2470 + }, + { + "epoch": 16.31578947368421, + "grad_norm": 5.645249366760254, + "learning_rate": 0.0001, + "loss": 0.0772, + "step": 2480 + }, + { + "epoch": 16.38157894736842, + "grad_norm": 4.670828342437744, + "learning_rate": 0.0001, + "loss": 0.0838, + "step": 2490 + }, + { + "epoch": 16.44736842105263, + "grad_norm": 4.980633735656738, + "learning_rate": 0.0001, + "loss": 0.0772, + "step": 2500 + }, + { + "epoch": 16.513157894736842, + "grad_norm": 4.4033660888671875, + "learning_rate": 0.0001, + "loss": 0.0819, + "step": 2510 + }, + { + "epoch": 16.57894736842105, + "grad_norm": 5.503461837768555, + "learning_rate": 0.0001, + "loss": 0.0792, + "step": 2520 + }, + { + "epoch": 16.644736842105264, + "grad_norm": 4.978002071380615, + "learning_rate": 0.0001, + "loss": 0.0797, + "step": 2530 + }, + { + "epoch": 16.710526315789473, + "grad_norm": 4.877954006195068, + "learning_rate": 0.0001, + "loss": 0.0792, + "step": 2540 + }, + { + "epoch": 16.776315789473685, + "grad_norm": 4.718324184417725, + "learning_rate": 0.0001, + "loss": 0.0781, + "step": 2550 + }, + { + "epoch": 16.842105263157894, + "grad_norm": 4.839942455291748, + "learning_rate": 0.0001, + "loss": 0.0768, + "step": 2560 + }, + { + "epoch": 16.907894736842106, + "grad_norm": 4.971443176269531, + "learning_rate": 0.0001, + "loss": 0.081, + "step": 2570 + }, + { + "epoch": 16.973684210526315, + "grad_norm": 6.21569299697876, + "learning_rate": 0.0001, + "loss": 0.0835, + "step": 2580 + }, + { + "epoch": 17.039473684210527, + "grad_norm": 5.315542697906494, + "learning_rate": 0.0001, + "loss": 0.0771, + "step": 2590 + }, + { + "epoch": 17.105263157894736, + "grad_norm": 5.427717685699463, + "learning_rate": 0.0001, + "loss": 0.0807, + "step": 2600 + }, + { + "epoch": 17.17105263157895, + "grad_norm": 4.329200267791748, + "learning_rate": 0.0001, + "loss": 0.0737, + "step": 2610 + }, + { + "epoch": 17.236842105263158, + "grad_norm": 4.520540714263916, + "learning_rate": 0.0001, + "loss": 0.0826, + "step": 2620 + }, + { + "epoch": 17.30263157894737, + "grad_norm": 4.716585636138916, + "learning_rate": 0.0001, + "loss": 0.0765, + "step": 2630 + }, + { + "epoch": 17.36842105263158, + "grad_norm": 5.242930889129639, + "learning_rate": 0.0001, + "loss": 0.0739, + "step": 2640 + }, + { + "epoch": 17.43421052631579, + "grad_norm": 4.814362525939941, + "learning_rate": 0.0001, + "loss": 0.0766, + "step": 2650 + }, + { + "epoch": 17.5, + "grad_norm": 4.858095645904541, + "learning_rate": 0.0001, + "loss": 0.0761, + "step": 2660 + }, + { + "epoch": 17.56578947368421, + "grad_norm": 4.718443393707275, + "learning_rate": 0.0001, + "loss": 0.073, + "step": 2670 + }, + { + "epoch": 17.63157894736842, + "grad_norm": 4.559201240539551, + "learning_rate": 0.0001, + "loss": 0.0749, + "step": 2680 + }, + { + "epoch": 17.69736842105263, + "grad_norm": 3.9688594341278076, + "learning_rate": 0.0001, + "loss": 0.0782, + "step": 2690 + }, + { + "epoch": 17.763157894736842, + "grad_norm": 4.480958938598633, + "learning_rate": 0.0001, + "loss": 0.0776, + "step": 2700 + }, + { + "epoch": 17.82894736842105, + "grad_norm": 5.901112079620361, + "learning_rate": 0.0001, + "loss": 0.0756, + "step": 2710 + }, + { + "epoch": 17.894736842105264, + "grad_norm": 5.043224811553955, + "learning_rate": 0.0001, + "loss": 0.0784, + "step": 2720 + }, + { + "epoch": 17.960526315789473, + "grad_norm": 4.380178451538086, + "learning_rate": 0.0001, + "loss": 0.0725, + "step": 2730 + }, + { + "epoch": 18.026315789473685, + "grad_norm": 3.8729310035705566, + "learning_rate": 0.0001, + "loss": 0.0751, + "step": 2740 + }, + { + "epoch": 18.092105263157894, + "grad_norm": 4.4962639808654785, + "learning_rate": 0.0001, + "loss": 0.0727, + "step": 2750 + }, + { + "epoch": 18.157894736842106, + "grad_norm": 4.605258941650391, + "learning_rate": 0.0001, + "loss": 0.0745, + "step": 2760 + }, + { + "epoch": 18.223684210526315, + "grad_norm": 4.574273586273193, + "learning_rate": 0.0001, + "loss": 0.0758, + "step": 2770 + }, + { + "epoch": 18.289473684210527, + "grad_norm": 4.572812557220459, + "learning_rate": 0.0001, + "loss": 0.0755, + "step": 2780 + }, + { + "epoch": 18.355263157894736, + "grad_norm": 4.3042378425598145, + "learning_rate": 0.0001, + "loss": 0.0707, + "step": 2790 + }, + { + "epoch": 18.42105263157895, + "grad_norm": 4.969744682312012, + "learning_rate": 0.0001, + "loss": 0.0726, + "step": 2800 + }, + { + "epoch": 18.486842105263158, + "grad_norm": 5.771834850311279, + "learning_rate": 0.0001, + "loss": 0.0723, + "step": 2810 + }, + { + "epoch": 18.55263157894737, + "grad_norm": 4.393187046051025, + "learning_rate": 0.0001, + "loss": 0.0732, + "step": 2820 + }, + { + "epoch": 18.61842105263158, + "grad_norm": 4.077311038970947, + "learning_rate": 0.0001, + "loss": 0.0703, + "step": 2830 + }, + { + "epoch": 18.68421052631579, + "grad_norm": 3.5009090900421143, + "learning_rate": 0.0001, + "loss": 0.0705, + "step": 2840 + }, + { + "epoch": 18.75, + "grad_norm": 4.456014156341553, + "learning_rate": 0.0001, + "loss": 0.0685, + "step": 2850 + }, + { + "epoch": 18.81578947368421, + "grad_norm": 4.8032307624816895, + "learning_rate": 0.0001, + "loss": 0.0707, + "step": 2860 + }, + { + "epoch": 18.88157894736842, + "grad_norm": 4.6024603843688965, + "learning_rate": 0.0001, + "loss": 0.0754, + "step": 2870 + }, + { + "epoch": 18.94736842105263, + "grad_norm": 5.125010013580322, + "learning_rate": 0.0001, + "loss": 0.0707, + "step": 2880 + }, + { + "epoch": 19.013157894736842, + "grad_norm": 4.631536483764648, + "learning_rate": 0.0001, + "loss": 0.0743, + "step": 2890 + }, + { + "epoch": 19.07894736842105, + "grad_norm": 4.642434120178223, + "learning_rate": 0.0001, + "loss": 0.0679, + "step": 2900 + }, + { + "epoch": 19.144736842105264, + "grad_norm": 4.4804911613464355, + "learning_rate": 0.0001, + "loss": 0.0688, + "step": 2910 + }, + { + "epoch": 19.210526315789473, + "grad_norm": 3.7083773612976074, + "learning_rate": 0.0001, + "loss": 0.0698, + "step": 2920 + }, + { + "epoch": 19.276315789473685, + "grad_norm": 4.256147384643555, + "learning_rate": 0.0001, + "loss": 0.0725, + "step": 2930 + }, + { + "epoch": 19.342105263157894, + "grad_norm": 4.125244140625, + "learning_rate": 0.0001, + "loss": 0.0674, + "step": 2940 + }, + { + "epoch": 19.407894736842106, + "grad_norm": 4.567220211029053, + "learning_rate": 0.0001, + "loss": 0.0683, + "step": 2950 + }, + { + "epoch": 19.473684210526315, + "grad_norm": 4.298739433288574, + "learning_rate": 0.0001, + "loss": 0.0674, + "step": 2960 + }, + { + "epoch": 19.539473684210527, + "grad_norm": 4.303213596343994, + "learning_rate": 0.0001, + "loss": 0.0704, + "step": 2970 + }, + { + "epoch": 19.605263157894736, + "grad_norm": 4.842376232147217, + "learning_rate": 0.0001, + "loss": 0.0687, + "step": 2980 + }, + { + "epoch": 19.67105263157895, + "grad_norm": 4.336226940155029, + "learning_rate": 0.0001, + "loss": 0.0693, + "step": 2990 + }, + { + "epoch": 19.736842105263158, + "grad_norm": 4.425253868103027, + "learning_rate": 0.0001, + "loss": 0.0674, + "step": 3000 + }, + { + "epoch": 19.80263157894737, + "grad_norm": 4.074360370635986, + "learning_rate": 0.0001, + "loss": 0.0622, + "step": 3010 + }, + { + "epoch": 19.86842105263158, + "grad_norm": 4.191122531890869, + "learning_rate": 0.0001, + "loss": 0.0692, + "step": 3020 + }, + { + "epoch": 19.93421052631579, + "grad_norm": 4.672008514404297, + "learning_rate": 0.0001, + "loss": 0.0653, + "step": 3030 + }, + { + "epoch": 20.0, + "grad_norm": 4.377945423126221, + "learning_rate": 0.0001, + "loss": 0.0627, + "step": 3040 + }, + { + "epoch": 20.06578947368421, + "grad_norm": 4.234347820281982, + "learning_rate": 0.0001, + "loss": 0.0642, + "step": 3050 + }, + { + "epoch": 20.13157894736842, + "grad_norm": 4.540591716766357, + "learning_rate": 0.0001, + "loss": 0.0683, + "step": 3060 + }, + { + "epoch": 20.19736842105263, + "grad_norm": 4.472556114196777, + "learning_rate": 0.0001, + "loss": 0.0631, + "step": 3070 + }, + { + "epoch": 20.263157894736842, + "grad_norm": 4.038685321807861, + "learning_rate": 0.0001, + "loss": 0.0646, + "step": 3080 + }, + { + "epoch": 20.32894736842105, + "grad_norm": 4.310518741607666, + "learning_rate": 0.0001, + "loss": 0.066, + "step": 3090 + }, + { + "epoch": 20.394736842105264, + "grad_norm": 3.681676149368286, + "learning_rate": 0.0001, + "loss": 0.0628, + "step": 3100 + }, + { + "epoch": 20.460526315789473, + "grad_norm": 4.144742012023926, + "learning_rate": 0.0001, + "loss": 0.0632, + "step": 3110 + }, + { + "epoch": 20.526315789473685, + "grad_norm": 3.517277479171753, + "learning_rate": 0.0001, + "loss": 0.0645, + "step": 3120 + }, + { + "epoch": 20.592105263157894, + "grad_norm": 3.710193395614624, + "learning_rate": 0.0001, + "loss": 0.0645, + "step": 3130 + }, + { + "epoch": 20.657894736842106, + "grad_norm": 4.236420631408691, + "learning_rate": 0.0001, + "loss": 0.0644, + "step": 3140 + }, + { + "epoch": 20.723684210526315, + "grad_norm": 3.9308488368988037, + "learning_rate": 0.0001, + "loss": 0.0672, + "step": 3150 + }, + { + "epoch": 20.789473684210527, + "grad_norm": 4.319930553436279, + "learning_rate": 0.0001, + "loss": 0.0642, + "step": 3160 + }, + { + "epoch": 20.855263157894736, + "grad_norm": 4.539423942565918, + "learning_rate": 0.0001, + "loss": 0.0676, + "step": 3170 + }, + { + "epoch": 20.92105263157895, + "grad_norm": 4.0434956550598145, + "learning_rate": 0.0001, + "loss": 0.0683, + "step": 3180 + }, + { + "epoch": 20.986842105263158, + "grad_norm": 3.5958993434906006, + "learning_rate": 0.0001, + "loss": 0.0699, + "step": 3190 + }, + { + "epoch": 21.05263157894737, + "grad_norm": 3.827382802963257, + "learning_rate": 0.0001, + "loss": 0.0679, + "step": 3200 + }, + { + "epoch": 21.11842105263158, + "grad_norm": 4.15602445602417, + "learning_rate": 0.0001, + "loss": 0.0655, + "step": 3210 + }, + { + "epoch": 21.18421052631579, + "grad_norm": 4.118391036987305, + "learning_rate": 0.0001, + "loss": 0.0676, + "step": 3220 + }, + { + "epoch": 21.25, + "grad_norm": 4.27808141708374, + "learning_rate": 0.0001, + "loss": 0.0624, + "step": 3230 + }, + { + "epoch": 21.31578947368421, + "grad_norm": 4.284823894500732, + "learning_rate": 0.0001, + "loss": 0.0621, + "step": 3240 + }, + { + "epoch": 21.38157894736842, + "grad_norm": 3.516188621520996, + "learning_rate": 0.0001, + "loss": 0.0621, + "step": 3250 + }, + { + "epoch": 21.44736842105263, + "grad_norm": 3.9423298835754395, + "learning_rate": 0.0001, + "loss": 0.0633, + "step": 3260 + }, + { + "epoch": 21.513157894736842, + "grad_norm": 4.444387435913086, + "learning_rate": 0.0001, + "loss": 0.0624, + "step": 3270 + }, + { + "epoch": 21.57894736842105, + "grad_norm": 4.271636486053467, + "learning_rate": 0.0001, + "loss": 0.0638, + "step": 3280 + }, + { + "epoch": 21.644736842105264, + "grad_norm": 4.359874725341797, + "learning_rate": 0.0001, + "loss": 0.0678, + "step": 3290 + }, + { + "epoch": 21.710526315789473, + "grad_norm": 4.128505229949951, + "learning_rate": 0.0001, + "loss": 0.0632, + "step": 3300 + }, + { + "epoch": 21.776315789473685, + "grad_norm": 3.6902530193328857, + "learning_rate": 0.0001, + "loss": 0.0611, + "step": 3310 + }, + { + "epoch": 21.842105263157894, + "grad_norm": 4.0358805656433105, + "learning_rate": 0.0001, + "loss": 0.059, + "step": 3320 + }, + { + "epoch": 21.907894736842106, + "grad_norm": 3.965226173400879, + "learning_rate": 0.0001, + "loss": 0.062, + "step": 3330 + }, + { + "epoch": 21.973684210526315, + "grad_norm": 3.338334083557129, + "learning_rate": 0.0001, + "loss": 0.0588, + "step": 3340 + }, + { + "epoch": 22.039473684210527, + "grad_norm": 4.442704200744629, + "learning_rate": 0.0001, + "loss": 0.0591, + "step": 3350 + }, + { + "epoch": 22.105263157894736, + "grad_norm": 4.312819480895996, + "learning_rate": 0.0001, + "loss": 0.057, + "step": 3360 + }, + { + "epoch": 22.17105263157895, + "grad_norm": 3.8974449634552, + "learning_rate": 0.0001, + "loss": 0.0603, + "step": 3370 + }, + { + "epoch": 22.236842105263158, + "grad_norm": 3.41093111038208, + "learning_rate": 0.0001, + "loss": 0.0557, + "step": 3380 + }, + { + "epoch": 22.30263157894737, + "grad_norm": 3.5688576698303223, + "learning_rate": 0.0001, + "loss": 0.0561, + "step": 3390 + }, + { + "epoch": 22.36842105263158, + "grad_norm": 3.9463613033294678, + "learning_rate": 0.0001, + "loss": 0.0595, + "step": 3400 + }, + { + "epoch": 22.43421052631579, + "grad_norm": 4.227533340454102, + "learning_rate": 0.0001, + "loss": 0.0583, + "step": 3410 + }, + { + "epoch": 22.5, + "grad_norm": 3.651512861251831, + "learning_rate": 0.0001, + "loss": 0.0568, + "step": 3420 + }, + { + "epoch": 22.56578947368421, + "grad_norm": 3.8400089740753174, + "learning_rate": 0.0001, + "loss": 0.0603, + "step": 3430 + }, + { + "epoch": 22.63157894736842, + "grad_norm": 3.567005157470703, + "learning_rate": 0.0001, + "loss": 0.0592, + "step": 3440 + }, + { + "epoch": 22.69736842105263, + "grad_norm": 3.756096363067627, + "learning_rate": 0.0001, + "loss": 0.0565, + "step": 3450 + }, + { + "epoch": 22.763157894736842, + "grad_norm": 3.8833200931549072, + "learning_rate": 0.0001, + "loss": 0.0575, + "step": 3460 + }, + { + "epoch": 22.82894736842105, + "grad_norm": 3.5916409492492676, + "learning_rate": 0.0001, + "loss": 0.0604, + "step": 3470 + }, + { + "epoch": 22.894736842105264, + "grad_norm": 3.6396472454071045, + "learning_rate": 0.0001, + "loss": 0.0621, + "step": 3480 + }, + { + "epoch": 22.960526315789473, + "grad_norm": 4.026757717132568, + "learning_rate": 0.0001, + "loss": 0.0609, + "step": 3490 + }, + { + "epoch": 23.026315789473685, + "grad_norm": 3.412045955657959, + "learning_rate": 0.0001, + "loss": 0.0635, + "step": 3500 + }, + { + "epoch": 23.092105263157894, + "grad_norm": 3.8278250694274902, + "learning_rate": 0.0001, + "loss": 0.0607, + "step": 3510 + }, + { + "epoch": 23.157894736842106, + "grad_norm": 4.461212635040283, + "learning_rate": 0.0001, + "loss": 0.0583, + "step": 3520 + }, + { + "epoch": 23.223684210526315, + "grad_norm": 3.1517691612243652, + "learning_rate": 0.0001, + "loss": 0.0564, + "step": 3530 + }, + { + "epoch": 23.289473684210527, + "grad_norm": 3.9116435050964355, + "learning_rate": 0.0001, + "loss": 0.0614, + "step": 3540 + }, + { + "epoch": 23.355263157894736, + "grad_norm": 3.633558511734009, + "learning_rate": 0.0001, + "loss": 0.0562, + "step": 3550 + }, + { + "epoch": 23.42105263157895, + "grad_norm": 3.1927671432495117, + "learning_rate": 0.0001, + "loss": 0.0592, + "step": 3560 + }, + { + "epoch": 23.486842105263158, + "grad_norm": 3.9745633602142334, + "learning_rate": 0.0001, + "loss": 0.0603, + "step": 3570 + }, + { + "epoch": 23.55263157894737, + "grad_norm": 4.017717361450195, + "learning_rate": 0.0001, + "loss": 0.0599, + "step": 3580 + }, + { + "epoch": 23.61842105263158, + "grad_norm": 3.3736987113952637, + "learning_rate": 0.0001, + "loss": 0.0561, + "step": 3590 + }, + { + "epoch": 23.68421052631579, + "grad_norm": 3.3666791915893555, + "learning_rate": 0.0001, + "loss": 0.0589, + "step": 3600 + }, + { + "epoch": 23.75, + "grad_norm": 3.2723591327667236, + "learning_rate": 0.0001, + "loss": 0.0588, + "step": 3610 + }, + { + "epoch": 23.81578947368421, + "grad_norm": 3.429962396621704, + "learning_rate": 0.0001, + "loss": 0.0572, + "step": 3620 + }, + { + "epoch": 23.88157894736842, + "grad_norm": 4.048157691955566, + "learning_rate": 0.0001, + "loss": 0.0558, + "step": 3630 + }, + { + "epoch": 23.94736842105263, + "grad_norm": 4.095017910003662, + "learning_rate": 0.0001, + "loss": 0.0532, + "step": 3640 + }, + { + "epoch": 24.013157894736842, + "grad_norm": 3.7212188243865967, + "learning_rate": 0.0001, + "loss": 0.0527, + "step": 3650 + }, + { + "epoch": 24.07894736842105, + "grad_norm": 3.424227237701416, + "learning_rate": 0.0001, + "loss": 0.0525, + "step": 3660 + }, + { + "epoch": 24.144736842105264, + "grad_norm": 3.869912624359131, + "learning_rate": 0.0001, + "loss": 0.0529, + "step": 3670 + }, + { + "epoch": 24.210526315789473, + "grad_norm": 3.593370199203491, + "learning_rate": 0.0001, + "loss": 0.0547, + "step": 3680 + }, + { + "epoch": 24.276315789473685, + "grad_norm": 3.430244207382202, + "learning_rate": 0.0001, + "loss": 0.0546, + "step": 3690 + }, + { + "epoch": 24.342105263157894, + "grad_norm": 3.3279993534088135, + "learning_rate": 0.0001, + "loss": 0.0561, + "step": 3700 + }, + { + "epoch": 24.407894736842106, + "grad_norm": 3.5217323303222656, + "learning_rate": 0.0001, + "loss": 0.0588, + "step": 3710 + }, + { + "epoch": 24.473684210526315, + "grad_norm": 3.2047455310821533, + "learning_rate": 0.0001, + "loss": 0.0528, + "step": 3720 + }, + { + "epoch": 24.539473684210527, + "grad_norm": 3.5667405128479004, + "learning_rate": 0.0001, + "loss": 0.0565, + "step": 3730 + }, + { + "epoch": 24.605263157894736, + "grad_norm": 3.1612064838409424, + "learning_rate": 0.0001, + "loss": 0.0575, + "step": 3740 + }, + { + "epoch": 24.67105263157895, + "grad_norm": 3.379948616027832, + "learning_rate": 0.0001, + "loss": 0.0536, + "step": 3750 + }, + { + "epoch": 24.736842105263158, + "grad_norm": 3.004784107208252, + "learning_rate": 0.0001, + "loss": 0.0534, + "step": 3760 + }, + { + "epoch": 24.80263157894737, + "grad_norm": 3.648918867111206, + "learning_rate": 0.0001, + "loss": 0.054, + "step": 3770 + }, + { + "epoch": 24.86842105263158, + "grad_norm": 3.0311264991760254, + "learning_rate": 0.0001, + "loss": 0.06, + "step": 3780 + }, + { + "epoch": 24.93421052631579, + "grad_norm": 3.015455722808838, + "learning_rate": 0.0001, + "loss": 0.0618, + "step": 3790 + }, + { + "epoch": 25.0, + "grad_norm": 3.084538698196411, + "learning_rate": 0.0001, + "loss": 0.0562, + "step": 3800 + }, + { + "epoch": 25.06578947368421, + "grad_norm": 3.5729260444641113, + "learning_rate": 0.0001, + "loss": 0.056, + "step": 3810 + }, + { + "epoch": 25.13157894736842, + "grad_norm": 3.5942578315734863, + "learning_rate": 0.0001, + "loss": 0.0569, + "step": 3820 + }, + { + "epoch": 25.19736842105263, + "grad_norm": 3.8373870849609375, + "learning_rate": 0.0001, + "loss": 0.0578, + "step": 3830 + }, + { + "epoch": 25.263157894736842, + "grad_norm": 3.1174817085266113, + "learning_rate": 0.0001, + "loss": 0.0539, + "step": 3840 + }, + { + "epoch": 25.32894736842105, + "grad_norm": 2.9916906356811523, + "learning_rate": 0.0001, + "loss": 0.0545, + "step": 3850 + }, + { + "epoch": 25.394736842105264, + "grad_norm": 3.7236101627349854, + "learning_rate": 0.0001, + "loss": 0.0531, + "step": 3860 + }, + { + "epoch": 25.460526315789473, + "grad_norm": 3.196599006652832, + "learning_rate": 0.0001, + "loss": 0.0613, + "step": 3870 + }, + { + "epoch": 25.526315789473685, + "grad_norm": 3.3245861530303955, + "learning_rate": 0.0001, + "loss": 0.0595, + "step": 3880 + }, + { + "epoch": 25.592105263157894, + "grad_norm": 3.1229844093322754, + "learning_rate": 0.0001, + "loss": 0.0581, + "step": 3890 + }, + { + "epoch": 25.657894736842106, + "grad_norm": 3.090834617614746, + "learning_rate": 0.0001, + "loss": 0.057, + "step": 3900 + }, + { + "epoch": 25.723684210526315, + "grad_norm": 3.7173891067504883, + "learning_rate": 0.0001, + "loss": 0.0553, + "step": 3910 + }, + { + "epoch": 25.789473684210527, + "grad_norm": 3.4529900550842285, + "learning_rate": 0.0001, + "loss": 0.06, + "step": 3920 + }, + { + "epoch": 25.855263157894736, + "grad_norm": 3.41372013092041, + "learning_rate": 0.0001, + "loss": 0.0534, + "step": 3930 + }, + { + "epoch": 25.92105263157895, + "grad_norm": 3.5578503608703613, + "learning_rate": 0.0001, + "loss": 0.0579, + "step": 3940 + }, + { + "epoch": 25.986842105263158, + "grad_norm": 2.5233664512634277, + "learning_rate": 0.0001, + "loss": 0.0571, + "step": 3950 + }, + { + "epoch": 26.05263157894737, + "grad_norm": 3.310196876525879, + "learning_rate": 0.0001, + "loss": 0.0505, + "step": 3960 + }, + { + "epoch": 26.11842105263158, + "grad_norm": 3.0970399379730225, + "learning_rate": 0.0001, + "loss": 0.0532, + "step": 3970 + }, + { + "epoch": 26.18421052631579, + "grad_norm": 2.9945688247680664, + "learning_rate": 0.0001, + "loss": 0.05, + "step": 3980 + }, + { + "epoch": 26.25, + "grad_norm": 2.966948986053467, + "learning_rate": 0.0001, + "loss": 0.051, + "step": 3990 + }, + { + "epoch": 26.31578947368421, + "grad_norm": 3.1473228931427, + "learning_rate": 0.0001, + "loss": 0.0548, + "step": 4000 + }, + { + "epoch": 26.38157894736842, + "grad_norm": 3.781965494155884, + "learning_rate": 0.0001, + "loss": 0.0513, + "step": 4010 + }, + { + "epoch": 26.44736842105263, + "grad_norm": 3.1109609603881836, + "learning_rate": 0.0001, + "loss": 0.0551, + "step": 4020 + }, + { + "epoch": 26.513157894736842, + "grad_norm": 3.153053045272827, + "learning_rate": 0.0001, + "loss": 0.0601, + "step": 4030 + }, + { + "epoch": 26.57894736842105, + "grad_norm": 2.897550582885742, + "learning_rate": 0.0001, + "loss": 0.0563, + "step": 4040 + }, + { + "epoch": 26.644736842105264, + "grad_norm": 3.540456771850586, + "learning_rate": 0.0001, + "loss": 0.0529, + "step": 4050 + }, + { + "epoch": 26.710526315789473, + "grad_norm": 3.526008129119873, + "learning_rate": 0.0001, + "loss": 0.0529, + "step": 4060 + }, + { + "epoch": 26.776315789473685, + "grad_norm": 2.84975004196167, + "learning_rate": 0.0001, + "loss": 0.0546, + "step": 4070 + }, + { + "epoch": 26.842105263157894, + "grad_norm": 2.8743505477905273, + "learning_rate": 0.0001, + "loss": 0.0516, + "step": 4080 + }, + { + "epoch": 26.907894736842106, + "grad_norm": 2.8977339267730713, + "learning_rate": 0.0001, + "loss": 0.0531, + "step": 4090 + }, + { + "epoch": 26.973684210526315, + "grad_norm": 2.9389920234680176, + "learning_rate": 0.0001, + "loss": 0.054, + "step": 4100 + }, + { + "epoch": 27.039473684210527, + "grad_norm": 3.0356078147888184, + "learning_rate": 0.0001, + "loss": 0.0549, + "step": 4110 + }, + { + "epoch": 27.105263157894736, + "grad_norm": 2.933394193649292, + "learning_rate": 0.0001, + "loss": 0.0495, + "step": 4120 + }, + { + "epoch": 27.17105263157895, + "grad_norm": 3.342971086502075, + "learning_rate": 0.0001, + "loss": 0.0512, + "step": 4130 + }, + { + "epoch": 27.236842105263158, + "grad_norm": 3.144812822341919, + "learning_rate": 0.0001, + "loss": 0.0526, + "step": 4140 + }, + { + "epoch": 27.30263157894737, + "grad_norm": 2.595964193344116, + "learning_rate": 0.0001, + "loss": 0.0513, + "step": 4150 + }, + { + "epoch": 27.36842105263158, + "grad_norm": 3.5423874855041504, + "learning_rate": 0.0001, + "loss": 0.0504, + "step": 4160 + }, + { + "epoch": 27.43421052631579, + "grad_norm": 3.1405887603759766, + "learning_rate": 0.0001, + "loss": 0.0499, + "step": 4170 + }, + { + "epoch": 27.5, + "grad_norm": 3.289807081222534, + "learning_rate": 0.0001, + "loss": 0.0499, + "step": 4180 + }, + { + "epoch": 27.56578947368421, + "grad_norm": 3.1032474040985107, + "learning_rate": 0.0001, + "loss": 0.0528, + "step": 4190 + }, + { + "epoch": 27.63157894736842, + "grad_norm": 2.8093903064727783, + "learning_rate": 0.0001, + "loss": 0.0496, + "step": 4200 + }, + { + "epoch": 27.69736842105263, + "grad_norm": 2.896862268447876, + "learning_rate": 0.0001, + "loss": 0.0486, + "step": 4210 + }, + { + "epoch": 27.763157894736842, + "grad_norm": 3.2978949546813965, + "learning_rate": 0.0001, + "loss": 0.0462, + "step": 4220 + }, + { + "epoch": 27.82894736842105, + "grad_norm": 3.006734609603882, + "learning_rate": 0.0001, + "loss": 0.0477, + "step": 4230 + }, + { + "epoch": 27.894736842105264, + "grad_norm": 3.1355373859405518, + "learning_rate": 0.0001, + "loss": 0.0491, + "step": 4240 + }, + { + "epoch": 27.960526315789473, + "grad_norm": 2.898782253265381, + "learning_rate": 0.0001, + "loss": 0.0476, + "step": 4250 + }, + { + "epoch": 28.026315789473685, + "grad_norm": 2.8229269981384277, + "learning_rate": 0.0001, + "loss": 0.0495, + "step": 4260 + }, + { + "epoch": 28.092105263157894, + "grad_norm": 2.915052890777588, + "learning_rate": 0.0001, + "loss": 0.0515, + "step": 4270 + }, + { + "epoch": 28.157894736842106, + "grad_norm": 3.278546094894409, + "learning_rate": 0.0001, + "loss": 0.0483, + "step": 4280 + }, + { + "epoch": 28.223684210526315, + "grad_norm": 3.0418665409088135, + "learning_rate": 0.0001, + "loss": 0.046, + "step": 4290 + }, + { + "epoch": 28.289473684210527, + "grad_norm": 3.2005770206451416, + "learning_rate": 0.0001, + "loss": 0.0459, + "step": 4300 + }, + { + "epoch": 28.355263157894736, + "grad_norm": 3.2774760723114014, + "learning_rate": 0.0001, + "loss": 0.0503, + "step": 4310 + }, + { + "epoch": 28.42105263157895, + "grad_norm": 3.155532121658325, + "learning_rate": 0.0001, + "loss": 0.0469, + "step": 4320 + }, + { + "epoch": 28.486842105263158, + "grad_norm": 3.1832993030548096, + "learning_rate": 0.0001, + "loss": 0.0483, + "step": 4330 + }, + { + "epoch": 28.55263157894737, + "grad_norm": 3.1676037311553955, + "learning_rate": 0.0001, + "loss": 0.0497, + "step": 4340 + }, + { + "epoch": 28.61842105263158, + "grad_norm": 3.4129903316497803, + "learning_rate": 0.0001, + "loss": 0.0495, + "step": 4350 + }, + { + "epoch": 28.68421052631579, + "grad_norm": 3.1655561923980713, + "learning_rate": 0.0001, + "loss": 0.0461, + "step": 4360 + }, + { + "epoch": 28.75, + "grad_norm": 2.708021640777588, + "learning_rate": 0.0001, + "loss": 0.0466, + "step": 4370 + }, + { + "epoch": 28.81578947368421, + "grad_norm": 2.68147873878479, + "learning_rate": 0.0001, + "loss": 0.0499, + "step": 4380 + }, + { + "epoch": 28.88157894736842, + "grad_norm": 3.428018808364868, + "learning_rate": 0.0001, + "loss": 0.0489, + "step": 4390 + }, + { + "epoch": 28.94736842105263, + "grad_norm": 2.94804048538208, + "learning_rate": 0.0001, + "loss": 0.0513, + "step": 4400 + }, + { + "epoch": 29.013157894736842, + "grad_norm": 2.679884910583496, + "learning_rate": 0.0001, + "loss": 0.0524, + "step": 4410 + }, + { + "epoch": 29.07894736842105, + "grad_norm": 2.392934560775757, + "learning_rate": 0.0001, + "loss": 0.0483, + "step": 4420 + }, + { + "epoch": 29.144736842105264, + "grad_norm": 2.7461166381835938, + "learning_rate": 0.0001, + "loss": 0.0474, + "step": 4430 + }, + { + "epoch": 29.210526315789473, + "grad_norm": 2.788827896118164, + "learning_rate": 0.0001, + "loss": 0.0489, + "step": 4440 + }, + { + "epoch": 29.276315789473685, + "grad_norm": 3.0062499046325684, + "learning_rate": 0.0001, + "loss": 0.0495, + "step": 4450 + }, + { + "epoch": 29.342105263157894, + "grad_norm": 3.0573859214782715, + "learning_rate": 0.0001, + "loss": 0.05, + "step": 4460 + }, + { + "epoch": 29.407894736842106, + "grad_norm": 2.665456533432007, + "learning_rate": 0.0001, + "loss": 0.051, + "step": 4470 + }, + { + "epoch": 29.473684210526315, + "grad_norm": 2.522658586502075, + "learning_rate": 0.0001, + "loss": 0.0467, + "step": 4480 + }, + { + "epoch": 29.539473684210527, + "grad_norm": 2.600645065307617, + "learning_rate": 0.0001, + "loss": 0.0489, + "step": 4490 + }, + { + "epoch": 29.605263157894736, + "grad_norm": 2.6609113216400146, + "learning_rate": 0.0001, + "loss": 0.0476, + "step": 4500 + }, + { + "epoch": 29.67105263157895, + "grad_norm": 2.691256284713745, + "learning_rate": 0.0001, + "loss": 0.0455, + "step": 4510 + }, + { + "epoch": 29.736842105263158, + "grad_norm": 2.8132810592651367, + "learning_rate": 0.0001, + "loss": 0.0458, + "step": 4520 + }, + { + "epoch": 29.80263157894737, + "grad_norm": 2.914437770843506, + "learning_rate": 0.0001, + "loss": 0.0486, + "step": 4530 + }, + { + "epoch": 29.86842105263158, + "grad_norm": 2.497580051422119, + "learning_rate": 0.0001, + "loss": 0.0462, + "step": 4540 + }, + { + "epoch": 29.93421052631579, + "grad_norm": 2.6248748302459717, + "learning_rate": 0.0001, + "loss": 0.0456, + "step": 4550 + }, + { + "epoch": 30.0, + "grad_norm": 2.8012468814849854, + "learning_rate": 0.0001, + "loss": 0.0457, + "step": 4560 + }, + { + "epoch": 30.06578947368421, + "grad_norm": 2.505524158477783, + "learning_rate": 0.0001, + "loss": 0.047, + "step": 4570 + }, + { + "epoch": 30.13157894736842, + "grad_norm": 2.6117448806762695, + "learning_rate": 0.0001, + "loss": 0.0503, + "step": 4580 + }, + { + "epoch": 30.19736842105263, + "grad_norm": 2.7365365028381348, + "learning_rate": 0.0001, + "loss": 0.0424, + "step": 4590 + }, + { + "epoch": 30.263157894736842, + "grad_norm": 2.8292315006256104, + "learning_rate": 0.0001, + "loss": 0.0467, + "step": 4600 + }, + { + "epoch": 30.32894736842105, + "grad_norm": 2.5775272846221924, + "learning_rate": 0.0001, + "loss": 0.0455, + "step": 4610 + }, + { + "epoch": 30.394736842105264, + "grad_norm": 2.720210313796997, + "learning_rate": 0.0001, + "loss": 0.0455, + "step": 4620 + }, + { + "epoch": 30.460526315789473, + "grad_norm": 2.5070748329162598, + "learning_rate": 0.0001, + "loss": 0.0499, + "step": 4630 + }, + { + "epoch": 30.526315789473685, + "grad_norm": 2.8825769424438477, + "learning_rate": 0.0001, + "loss": 0.0495, + "step": 4640 + }, + { + "epoch": 30.592105263157894, + "grad_norm": 2.553520441055298, + "learning_rate": 0.0001, + "loss": 0.0458, + "step": 4650 + }, + { + "epoch": 30.657894736842106, + "grad_norm": 2.818341016769409, + "learning_rate": 0.0001, + "loss": 0.0488, + "step": 4660 + }, + { + "epoch": 30.723684210526315, + "grad_norm": 3.132817268371582, + "learning_rate": 0.0001, + "loss": 0.0455, + "step": 4670 + }, + { + "epoch": 30.789473684210527, + "grad_norm": 2.931819200515747, + "learning_rate": 0.0001, + "loss": 0.0488, + "step": 4680 + }, + { + "epoch": 30.855263157894736, + "grad_norm": 2.6675987243652344, + "learning_rate": 0.0001, + "loss": 0.0449, + "step": 4690 + }, + { + "epoch": 30.92105263157895, + "grad_norm": 3.1334962844848633, + "learning_rate": 0.0001, + "loss": 0.0458, + "step": 4700 + }, + { + "epoch": 30.986842105263158, + "grad_norm": 2.9134418964385986, + "learning_rate": 0.0001, + "loss": 0.0438, + "step": 4710 + }, + { + "epoch": 31.05263157894737, + "grad_norm": 2.820773124694824, + "learning_rate": 0.0001, + "loss": 0.0437, + "step": 4720 + }, + { + "epoch": 31.11842105263158, + "grad_norm": 2.4287455081939697, + "learning_rate": 0.0001, + "loss": 0.0434, + "step": 4730 + }, + { + "epoch": 31.18421052631579, + "grad_norm": 2.476181745529175, + "learning_rate": 0.0001, + "loss": 0.044, + "step": 4740 + }, + { + "epoch": 31.25, + "grad_norm": 2.6724979877471924, + "learning_rate": 0.0001, + "loss": 0.043, + "step": 4750 + }, + { + "epoch": 31.31578947368421, + "grad_norm": 2.832930326461792, + "learning_rate": 0.0001, + "loss": 0.0455, + "step": 4760 + }, + { + "epoch": 31.38157894736842, + "grad_norm": 2.3849406242370605, + "learning_rate": 0.0001, + "loss": 0.0481, + "step": 4770 + }, + { + "epoch": 31.44736842105263, + "grad_norm": 2.4562463760375977, + "learning_rate": 0.0001, + "loss": 0.0444, + "step": 4780 + }, + { + "epoch": 31.513157894736842, + "grad_norm": 2.278359889984131, + "learning_rate": 0.0001, + "loss": 0.0423, + "step": 4790 + }, + { + "epoch": 31.57894736842105, + "grad_norm": 2.701538562774658, + "learning_rate": 0.0001, + "loss": 0.0441, + "step": 4800 + }, + { + "epoch": 31.644736842105264, + "grad_norm": 3.181910276412964, + "learning_rate": 0.0001, + "loss": 0.0451, + "step": 4810 + }, + { + "epoch": 31.710526315789473, + "grad_norm": 2.4333291053771973, + "learning_rate": 0.0001, + "loss": 0.0464, + "step": 4820 + }, + { + "epoch": 31.776315789473685, + "grad_norm": 2.6533899307250977, + "learning_rate": 0.0001, + "loss": 0.0448, + "step": 4830 + }, + { + "epoch": 31.842105263157894, + "grad_norm": 2.8121790885925293, + "learning_rate": 0.0001, + "loss": 0.0476, + "step": 4840 + }, + { + "epoch": 31.907894736842106, + "grad_norm": 2.6064651012420654, + "learning_rate": 0.0001, + "loss": 0.0479, + "step": 4850 + }, + { + "epoch": 31.973684210526315, + "grad_norm": 2.4942471981048584, + "learning_rate": 0.0001, + "loss": 0.0494, + "step": 4860 + }, + { + "epoch": 32.03947368421053, + "grad_norm": 2.7892401218414307, + "learning_rate": 0.0001, + "loss": 0.0495, + "step": 4870 + }, + { + "epoch": 32.10526315789474, + "grad_norm": 2.6274139881134033, + "learning_rate": 0.0001, + "loss": 0.0454, + "step": 4880 + }, + { + "epoch": 32.171052631578945, + "grad_norm": 2.729278802871704, + "learning_rate": 0.0001, + "loss": 0.0437, + "step": 4890 + }, + { + "epoch": 32.23684210526316, + "grad_norm": 2.4070258140563965, + "learning_rate": 0.0001, + "loss": 0.0448, + "step": 4900 + }, + { + "epoch": 32.30263157894737, + "grad_norm": 2.6891207695007324, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 4910 + }, + { + "epoch": 32.36842105263158, + "grad_norm": 2.2059195041656494, + "learning_rate": 0.0001, + "loss": 0.0466, + "step": 4920 + }, + { + "epoch": 32.43421052631579, + "grad_norm": 2.6894748210906982, + "learning_rate": 0.0001, + "loss": 0.0456, + "step": 4930 + }, + { + "epoch": 32.5, + "grad_norm": 2.526822805404663, + "learning_rate": 0.0001, + "loss": 0.0422, + "step": 4940 + }, + { + "epoch": 32.56578947368421, + "grad_norm": 2.509107828140259, + "learning_rate": 0.0001, + "loss": 0.0439, + "step": 4950 + }, + { + "epoch": 32.63157894736842, + "grad_norm": 2.662522792816162, + "learning_rate": 0.0001, + "loss": 0.0468, + "step": 4960 + }, + { + "epoch": 32.69736842105263, + "grad_norm": 2.4000043869018555, + "learning_rate": 0.0001, + "loss": 0.0448, + "step": 4970 + }, + { + "epoch": 32.76315789473684, + "grad_norm": 2.2575981616973877, + "learning_rate": 0.0001, + "loss": 0.0427, + "step": 4980 + }, + { + "epoch": 32.828947368421055, + "grad_norm": 2.4728028774261475, + "learning_rate": 0.0001, + "loss": 0.0459, + "step": 4990 + }, + { + "epoch": 32.89473684210526, + "grad_norm": 2.4107658863067627, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 5000 + }, + { + "epoch": 32.96052631578947, + "grad_norm": 2.1474099159240723, + "learning_rate": 0.0001, + "loss": 0.045, + "step": 5010 + }, + { + "epoch": 33.026315789473685, + "grad_norm": 2.5930263996124268, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 5020 + }, + { + "epoch": 33.0921052631579, + "grad_norm": 2.652590036392212, + "learning_rate": 0.0001, + "loss": 0.0416, + "step": 5030 + }, + { + "epoch": 33.1578947368421, + "grad_norm": 2.6589486598968506, + "learning_rate": 0.0001, + "loss": 0.041, + "step": 5040 + }, + { + "epoch": 33.223684210526315, + "grad_norm": 2.6637887954711914, + "learning_rate": 0.0001, + "loss": 0.0413, + "step": 5050 + }, + { + "epoch": 33.28947368421053, + "grad_norm": 2.2335708141326904, + "learning_rate": 0.0001, + "loss": 0.0417, + "step": 5060 + }, + { + "epoch": 33.35526315789474, + "grad_norm": 2.3757381439208984, + "learning_rate": 0.0001, + "loss": 0.0414, + "step": 5070 + }, + { + "epoch": 33.421052631578945, + "grad_norm": 2.569889545440674, + "learning_rate": 0.0001, + "loss": 0.0414, + "step": 5080 + }, + { + "epoch": 33.48684210526316, + "grad_norm": 2.7300987243652344, + "learning_rate": 0.0001, + "loss": 0.0438, + "step": 5090 + }, + { + "epoch": 33.55263157894737, + "grad_norm": 2.591104745864868, + "learning_rate": 0.0001, + "loss": 0.0446, + "step": 5100 + }, + { + "epoch": 33.61842105263158, + "grad_norm": 2.4056169986724854, + "learning_rate": 0.0001, + "loss": 0.0443, + "step": 5110 + }, + { + "epoch": 33.68421052631579, + "grad_norm": 2.4610161781311035, + "learning_rate": 0.0001, + "loss": 0.041, + "step": 5120 + }, + { + "epoch": 33.75, + "grad_norm": 2.3633317947387695, + "learning_rate": 0.0001, + "loss": 0.0457, + "step": 5130 + }, + { + "epoch": 33.81578947368421, + "grad_norm": 2.227242946624756, + "learning_rate": 0.0001, + "loss": 0.0451, + "step": 5140 + }, + { + "epoch": 33.88157894736842, + "grad_norm": 2.552421808242798, + "learning_rate": 0.0001, + "loss": 0.0453, + "step": 5150 + }, + { + "epoch": 33.94736842105263, + "grad_norm": 2.2416305541992188, + "learning_rate": 0.0001, + "loss": 0.0462, + "step": 5160 + }, + { + "epoch": 34.01315789473684, + "grad_norm": 2.6720187664031982, + "learning_rate": 0.0001, + "loss": 0.0449, + "step": 5170 + }, + { + "epoch": 34.078947368421055, + "grad_norm": 2.6561310291290283, + "learning_rate": 0.0001, + "loss": 0.0471, + "step": 5180 + }, + { + "epoch": 34.14473684210526, + "grad_norm": 2.019381523132324, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 5190 + }, + { + "epoch": 34.21052631578947, + "grad_norm": 2.435896873474121, + "learning_rate": 0.0001, + "loss": 0.0441, + "step": 5200 + }, + { + "epoch": 34.276315789473685, + "grad_norm": 2.5385663509368896, + "learning_rate": 0.0001, + "loss": 0.0443, + "step": 5210 + }, + { + "epoch": 34.3421052631579, + "grad_norm": 2.449340343475342, + "learning_rate": 0.0001, + "loss": 0.0447, + "step": 5220 + }, + { + "epoch": 34.4078947368421, + "grad_norm": 2.6000161170959473, + "learning_rate": 0.0001, + "loss": 0.044, + "step": 5230 + }, + { + "epoch": 34.473684210526315, + "grad_norm": 2.6699042320251465, + "learning_rate": 0.0001, + "loss": 0.0446, + "step": 5240 + }, + { + "epoch": 34.53947368421053, + "grad_norm": 2.4187822341918945, + "learning_rate": 0.0001, + "loss": 0.0442, + "step": 5250 + }, + { + "epoch": 34.60526315789474, + "grad_norm": 2.450216770172119, + "learning_rate": 0.0001, + "loss": 0.0413, + "step": 5260 + }, + { + "epoch": 34.671052631578945, + "grad_norm": 2.4974207878112793, + "learning_rate": 0.0001, + "loss": 0.042, + "step": 5270 + }, + { + "epoch": 34.73684210526316, + "grad_norm": 2.525740623474121, + "learning_rate": 0.0001, + "loss": 0.0414, + "step": 5280 + }, + { + "epoch": 34.80263157894737, + "grad_norm": 2.640798568725586, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 5290 + }, + { + "epoch": 34.86842105263158, + "grad_norm": 2.456847667694092, + "learning_rate": 0.0001, + "loss": 0.0441, + "step": 5300 + }, + { + "epoch": 34.93421052631579, + "grad_norm": 2.523120164871216, + "learning_rate": 0.0001, + "loss": 0.0423, + "step": 5310 + }, + { + "epoch": 35.0, + "grad_norm": 1.9672609567642212, + "learning_rate": 0.0001, + "loss": 0.0419, + "step": 5320 + }, + { + "epoch": 35.06578947368421, + "grad_norm": 2.4729208946228027, + "learning_rate": 0.0001, + "loss": 0.0423, + "step": 5330 + }, + { + "epoch": 35.13157894736842, + "grad_norm": 2.3115689754486084, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 5340 + }, + { + "epoch": 35.19736842105263, + "grad_norm": 2.2606678009033203, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 5350 + }, + { + "epoch": 35.26315789473684, + "grad_norm": 2.3886468410491943, + "learning_rate": 0.0001, + "loss": 0.0404, + "step": 5360 + }, + { + "epoch": 35.328947368421055, + "grad_norm": 2.1863503456115723, + "learning_rate": 0.0001, + "loss": 0.041, + "step": 5370 + }, + { + "epoch": 35.39473684210526, + "grad_norm": 2.3488121032714844, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 5380 + }, + { + "epoch": 35.46052631578947, + "grad_norm": 2.360938310623169, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 5390 + }, + { + "epoch": 35.526315789473685, + "grad_norm": 2.5234484672546387, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 5400 + }, + { + "epoch": 35.5921052631579, + "grad_norm": 2.612522602081299, + "learning_rate": 0.0001, + "loss": 0.0421, + "step": 5410 + }, + { + "epoch": 35.6578947368421, + "grad_norm": 2.419776678085327, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 5420 + }, + { + "epoch": 35.723684210526315, + "grad_norm": 2.3333466053009033, + "learning_rate": 0.0001, + "loss": 0.0411, + "step": 5430 + }, + { + "epoch": 35.78947368421053, + "grad_norm": 2.3613359928131104, + "learning_rate": 0.0001, + "loss": 0.0461, + "step": 5440 + }, + { + "epoch": 35.85526315789474, + "grad_norm": 1.962768316268921, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 5450 + }, + { + "epoch": 35.921052631578945, + "grad_norm": 2.3597044944763184, + "learning_rate": 0.0001, + "loss": 0.0404, + "step": 5460 + }, + { + "epoch": 35.98684210526316, + "grad_norm": 2.2614336013793945, + "learning_rate": 0.0001, + "loss": 0.044, + "step": 5470 + }, + { + "epoch": 36.05263157894737, + "grad_norm": 2.350679397583008, + "learning_rate": 0.0001, + "loss": 0.0433, + "step": 5480 + }, + { + "epoch": 36.11842105263158, + "grad_norm": 2.374591588973999, + "learning_rate": 0.0001, + "loss": 0.0433, + "step": 5490 + }, + { + "epoch": 36.18421052631579, + "grad_norm": 2.3889739513397217, + "learning_rate": 0.0001, + "loss": 0.0422, + "step": 5500 + }, + { + "epoch": 36.25, + "grad_norm": 2.4520390033721924, + "learning_rate": 0.0001, + "loss": 0.0417, + "step": 5510 + }, + { + "epoch": 36.31578947368421, + "grad_norm": 2.349119186401367, + "learning_rate": 0.0001, + "loss": 0.0387, + "step": 5520 + }, + { + "epoch": 36.38157894736842, + "grad_norm": 2.202120780944824, + "learning_rate": 0.0001, + "loss": 0.0401, + "step": 5530 + }, + { + "epoch": 36.44736842105263, + "grad_norm": 2.2714016437530518, + "learning_rate": 0.0001, + "loss": 0.0379, + "step": 5540 + }, + { + "epoch": 36.51315789473684, + "grad_norm": 2.2632577419281006, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 5550 + }, + { + "epoch": 36.578947368421055, + "grad_norm": 2.608222007751465, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 5560 + }, + { + "epoch": 36.64473684210526, + "grad_norm": 2.298335552215576, + "learning_rate": 0.0001, + "loss": 0.0407, + "step": 5570 + }, + { + "epoch": 36.71052631578947, + "grad_norm": 2.498187303543091, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 5580 + }, + { + "epoch": 36.776315789473685, + "grad_norm": 2.62980580329895, + "learning_rate": 0.0001, + "loss": 0.0393, + "step": 5590 + }, + { + "epoch": 36.8421052631579, + "grad_norm": 2.6925511360168457, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 5600 + }, + { + "epoch": 36.9078947368421, + "grad_norm": 2.493591070175171, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 5610 + }, + { + "epoch": 36.973684210526315, + "grad_norm": 2.0936026573181152, + "learning_rate": 0.0001, + "loss": 0.0396, + "step": 5620 + }, + { + "epoch": 37.03947368421053, + "grad_norm": 2.3660683631896973, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 5630 + }, + { + "epoch": 37.10526315789474, + "grad_norm": 2.5689046382904053, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 5640 + }, + { + "epoch": 37.171052631578945, + "grad_norm": 2.339864730834961, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 5650 + }, + { + "epoch": 37.23684210526316, + "grad_norm": 1.9542019367218018, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 5660 + }, + { + "epoch": 37.30263157894737, + "grad_norm": 2.236436128616333, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 5670 + }, + { + "epoch": 37.36842105263158, + "grad_norm": 2.5332870483398438, + "learning_rate": 0.0001, + "loss": 0.0395, + "step": 5680 + }, + { + "epoch": 37.43421052631579, + "grad_norm": 2.4465630054473877, + "learning_rate": 0.0001, + "loss": 0.0406, + "step": 5690 + }, + { + "epoch": 37.5, + "grad_norm": 2.6095876693725586, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 5700 + }, + { + "epoch": 37.56578947368421, + "grad_norm": 2.1583876609802246, + "learning_rate": 0.0001, + "loss": 0.0374, + "step": 5710 + }, + { + "epoch": 37.63157894736842, + "grad_norm": 2.2866389751434326, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 5720 + }, + { + "epoch": 37.69736842105263, + "grad_norm": 2.5979535579681396, + "learning_rate": 0.0001, + "loss": 0.0421, + "step": 5730 + }, + { + "epoch": 37.76315789473684, + "grad_norm": 2.4028756618499756, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 5740 + }, + { + "epoch": 37.828947368421055, + "grad_norm": 2.268890619277954, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 5750 + }, + { + "epoch": 37.89473684210526, + "grad_norm": 2.3834378719329834, + "learning_rate": 0.0001, + "loss": 0.0403, + "step": 5760 + }, + { + "epoch": 37.96052631578947, + "grad_norm": 2.4863436222076416, + "learning_rate": 0.0001, + "loss": 0.0416, + "step": 5770 + }, + { + "epoch": 38.026315789473685, + "grad_norm": 2.2150402069091797, + "learning_rate": 0.0001, + "loss": 0.0428, + "step": 5780 + }, + { + "epoch": 38.0921052631579, + "grad_norm": 2.104177474975586, + "learning_rate": 0.0001, + "loss": 0.0407, + "step": 5790 + }, + { + "epoch": 38.1578947368421, + "grad_norm": 2.027252674102783, + "learning_rate": 0.0001, + "loss": 0.0367, + "step": 5800 + }, + { + "epoch": 38.223684210526315, + "grad_norm": 2.5156586170196533, + "learning_rate": 0.0001, + "loss": 0.0394, + "step": 5810 + }, + { + "epoch": 38.28947368421053, + "grad_norm": 2.186910629272461, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 5820 + }, + { + "epoch": 38.35526315789474, + "grad_norm": 2.3481905460357666, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 5830 + }, + { + "epoch": 38.421052631578945, + "grad_norm": 2.13398814201355, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 5840 + }, + { + "epoch": 38.48684210526316, + "grad_norm": 2.2620978355407715, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 5850 + }, + { + "epoch": 38.55263157894737, + "grad_norm": 2.134782075881958, + "learning_rate": 0.0001, + "loss": 0.0385, + "step": 5860 + }, + { + "epoch": 38.61842105263158, + "grad_norm": 1.88546884059906, + "learning_rate": 0.0001, + "loss": 0.0398, + "step": 5870 + }, + { + "epoch": 38.68421052631579, + "grad_norm": 2.0278677940368652, + "learning_rate": 0.0001, + "loss": 0.0396, + "step": 5880 + }, + { + "epoch": 38.75, + "grad_norm": 2.1986711025238037, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 5890 + }, + { + "epoch": 38.81578947368421, + "grad_norm": 2.5093231201171875, + "learning_rate": 0.0001, + "loss": 0.0394, + "step": 5900 + }, + { + "epoch": 38.88157894736842, + "grad_norm": 2.205291509628296, + "learning_rate": 0.0001, + "loss": 0.041, + "step": 5910 + }, + { + "epoch": 38.94736842105263, + "grad_norm": 2.2630720138549805, + "learning_rate": 0.0001, + "loss": 0.042, + "step": 5920 + }, + { + "epoch": 39.01315789473684, + "grad_norm": 2.1553659439086914, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 5930 + }, + { + "epoch": 39.078947368421055, + "grad_norm": 2.4210903644561768, + "learning_rate": 0.0001, + "loss": 0.037, + "step": 5940 + }, + { + "epoch": 39.14473684210526, + "grad_norm": 2.5638651847839355, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 5950 + }, + { + "epoch": 39.21052631578947, + "grad_norm": 2.3888773918151855, + "learning_rate": 0.0001, + "loss": 0.0391, + "step": 5960 + }, + { + "epoch": 39.276315789473685, + "grad_norm": 2.2515876293182373, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 5970 + }, + { + "epoch": 39.3421052631579, + "grad_norm": 2.554680347442627, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 5980 + }, + { + "epoch": 39.4078947368421, + "grad_norm": 2.454371690750122, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 5990 + }, + { + "epoch": 39.473684210526315, + "grad_norm": 2.179824113845825, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 6000 + }, + { + "epoch": 39.53947368421053, + "grad_norm": 2.5795116424560547, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 6010 + }, + { + "epoch": 39.60526315789474, + "grad_norm": 2.0184319019317627, + "learning_rate": 0.0001, + "loss": 0.038, + "step": 6020 + }, + { + "epoch": 39.671052631578945, + "grad_norm": 2.221646547317505, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 6030 + }, + { + "epoch": 39.73684210526316, + "grad_norm": 2.4176130294799805, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 6040 + }, + { + "epoch": 39.80263157894737, + "grad_norm": 2.258746385574341, + "learning_rate": 0.0001, + "loss": 0.0375, + "step": 6050 + }, + { + "epoch": 39.86842105263158, + "grad_norm": 2.2716710567474365, + "learning_rate": 0.0001, + "loss": 0.036, + "step": 6060 + }, + { + "epoch": 39.93421052631579, + "grad_norm": 2.2062830924987793, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 6070 + }, + { + "epoch": 40.0, + "grad_norm": 2.2521860599517822, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 6080 + }, + { + "epoch": 40.06578947368421, + "grad_norm": 2.0055341720581055, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 6090 + }, + { + "epoch": 40.13157894736842, + "grad_norm": 2.273428201675415, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 6100 + }, + { + "epoch": 40.19736842105263, + "grad_norm": 2.1523613929748535, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 6110 + }, + { + "epoch": 40.26315789473684, + "grad_norm": 2.2679247856140137, + "learning_rate": 0.0001, + "loss": 0.039, + "step": 6120 + }, + { + "epoch": 40.328947368421055, + "grad_norm": 2.01351261138916, + "learning_rate": 0.0001, + "loss": 0.0373, + "step": 6130 + }, + { + "epoch": 40.39473684210526, + "grad_norm": 2.1786999702453613, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 6140 + }, + { + "epoch": 40.46052631578947, + "grad_norm": 2.424417734146118, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 6150 + }, + { + "epoch": 40.526315789473685, + "grad_norm": 1.980931043624878, + "learning_rate": 0.0001, + "loss": 0.0409, + "step": 6160 + }, + { + "epoch": 40.5921052631579, + "grad_norm": 2.2606894969940186, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 6170 + }, + { + "epoch": 40.6578947368421, + "grad_norm": 2.0380303859710693, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 6180 + }, + { + "epoch": 40.723684210526315, + "grad_norm": 2.143332004547119, + "learning_rate": 0.0001, + "loss": 0.0415, + "step": 6190 + }, + { + "epoch": 40.78947368421053, + "grad_norm": 2.299978017807007, + "learning_rate": 0.0001, + "loss": 0.0405, + "step": 6200 + }, + { + "epoch": 40.85526315789474, + "grad_norm": 2.454505205154419, + "learning_rate": 0.0001, + "loss": 0.0393, + "step": 6210 + }, + { + "epoch": 40.921052631578945, + "grad_norm": 2.5504910945892334, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 6220 + }, + { + "epoch": 40.98684210526316, + "grad_norm": 2.2120444774627686, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 6230 + }, + { + "epoch": 41.05263157894737, + "grad_norm": 2.1109986305236816, + "learning_rate": 0.0001, + "loss": 0.0386, + "step": 6240 + }, + { + "epoch": 41.11842105263158, + "grad_norm": 2.4341702461242676, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 6250 + }, + { + "epoch": 41.18421052631579, + "grad_norm": 2.1559791564941406, + "learning_rate": 0.0001, + "loss": 0.0398, + "step": 6260 + }, + { + "epoch": 41.25, + "grad_norm": 2.233827590942383, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 6270 + }, + { + "epoch": 41.31578947368421, + "grad_norm": 2.0615148544311523, + "learning_rate": 0.0001, + "loss": 0.0369, + "step": 6280 + }, + { + "epoch": 41.38157894736842, + "grad_norm": 2.319382667541504, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 6290 + }, + { + "epoch": 41.44736842105263, + "grad_norm": 2.304764986038208, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 6300 + }, + { + "epoch": 41.51315789473684, + "grad_norm": 2.4223990440368652, + "learning_rate": 0.0001, + "loss": 0.0388, + "step": 6310 + }, + { + "epoch": 41.578947368421055, + "grad_norm": 2.1475377082824707, + "learning_rate": 0.0001, + "loss": 0.0381, + "step": 6320 + }, + { + "epoch": 41.64473684210526, + "grad_norm": 2.036505699157715, + "learning_rate": 0.0001, + "loss": 0.0372, + "step": 6330 + }, + { + "epoch": 41.71052631578947, + "grad_norm": 1.9919437170028687, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 6340 + }, + { + "epoch": 41.776315789473685, + "grad_norm": 2.1360974311828613, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 6350 + }, + { + "epoch": 41.8421052631579, + "grad_norm": 2.1707985401153564, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 6360 + }, + { + "epoch": 41.9078947368421, + "grad_norm": 2.1546218395233154, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 6370 + }, + { + "epoch": 41.973684210526315, + "grad_norm": 1.8794069290161133, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 6380 + }, + { + "epoch": 42.03947368421053, + "grad_norm": 2.156665086746216, + "learning_rate": 0.0001, + "loss": 0.0365, + "step": 6390 + }, + { + "epoch": 42.10526315789474, + "grad_norm": 2.243816614151001, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 6400 + }, + { + "epoch": 42.171052631578945, + "grad_norm": 2.2223100662231445, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 6410 + }, + { + "epoch": 42.23684210526316, + "grad_norm": 2.0650203227996826, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 6420 + }, + { + "epoch": 42.30263157894737, + "grad_norm": 2.1891047954559326, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 6430 + }, + { + "epoch": 42.36842105263158, + "grad_norm": 2.168792963027954, + "learning_rate": 0.0001, + "loss": 0.0358, + "step": 6440 + }, + { + "epoch": 42.43421052631579, + "grad_norm": 2.247410774230957, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 6450 + }, + { + "epoch": 42.5, + "grad_norm": 1.878936529159546, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 6460 + }, + { + "epoch": 42.56578947368421, + "grad_norm": 2.108396530151367, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 6470 + }, + { + "epoch": 42.63157894736842, + "grad_norm": 1.6226104497909546, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 6480 + }, + { + "epoch": 42.69736842105263, + "grad_norm": 2.6258723735809326, + "learning_rate": 0.0001, + "loss": 0.0325, + "step": 6490 + }, + { + "epoch": 42.76315789473684, + "grad_norm": 2.1555979251861572, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 6500 + }, + { + "epoch": 42.828947368421055, + "grad_norm": 2.5804638862609863, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 6510 + }, + { + "epoch": 42.89473684210526, + "grad_norm": 2.313135862350464, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 6520 + }, + { + "epoch": 42.96052631578947, + "grad_norm": 2.659311056137085, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 6530 + }, + { + "epoch": 43.026315789473685, + "grad_norm": 2.1776773929595947, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 6540 + }, + { + "epoch": 43.0921052631579, + "grad_norm": 2.1495778560638428, + "learning_rate": 0.0001, + "loss": 0.0382, + "step": 6550 + }, + { + "epoch": 43.1578947368421, + "grad_norm": 2.05283522605896, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 6560 + }, + { + "epoch": 43.223684210526315, + "grad_norm": 2.5983822345733643, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 6570 + }, + { + "epoch": 43.28947368421053, + "grad_norm": 2.5054686069488525, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 6580 + }, + { + "epoch": 43.35526315789474, + "grad_norm": 1.933837890625, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 6590 + }, + { + "epoch": 43.421052631578945, + "grad_norm": 2.3833420276641846, + "learning_rate": 0.0001, + "loss": 0.0376, + "step": 6600 + }, + { + "epoch": 43.48684210526316, + "grad_norm": 2.4395833015441895, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 6610 + }, + { + "epoch": 43.55263157894737, + "grad_norm": 2.5590438842773438, + "learning_rate": 0.0001, + "loss": 0.0383, + "step": 6620 + }, + { + "epoch": 43.61842105263158, + "grad_norm": 2.5415804386138916, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 6630 + }, + { + "epoch": 43.68421052631579, + "grad_norm": 1.9072297811508179, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 6640 + }, + { + "epoch": 43.75, + "grad_norm": 1.8998295068740845, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 6650 + }, + { + "epoch": 43.81578947368421, + "grad_norm": 1.905118465423584, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 6660 + }, + { + "epoch": 43.88157894736842, + "grad_norm": 2.132871627807617, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 6670 + }, + { + "epoch": 43.94736842105263, + "grad_norm": 2.0413613319396973, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 6680 + }, + { + "epoch": 44.01315789473684, + "grad_norm": 2.2114999294281006, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 6690 + }, + { + "epoch": 44.078947368421055, + "grad_norm": 2.3088066577911377, + "learning_rate": 0.0001, + "loss": 0.0378, + "step": 6700 + }, + { + "epoch": 44.14473684210526, + "grad_norm": 1.784862756729126, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 6710 + }, + { + "epoch": 44.21052631578947, + "grad_norm": 2.099011182785034, + "learning_rate": 0.0001, + "loss": 0.0359, + "step": 6720 + }, + { + "epoch": 44.276315789473685, + "grad_norm": 1.912520170211792, + "learning_rate": 0.0001, + "loss": 0.0352, + "step": 6730 + }, + { + "epoch": 44.3421052631579, + "grad_norm": 1.9815537929534912, + "learning_rate": 0.0001, + "loss": 0.0366, + "step": 6740 + }, + { + "epoch": 44.4078947368421, + "grad_norm": 2.396345853805542, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 6750 + }, + { + "epoch": 44.473684210526315, + "grad_norm": 2.2811830043792725, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 6760 + }, + { + "epoch": 44.53947368421053, + "grad_norm": 2.1888914108276367, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 6770 + }, + { + "epoch": 44.60526315789474, + "grad_norm": 2.2141542434692383, + "learning_rate": 0.0001, + "loss": 0.0362, + "step": 6780 + }, + { + "epoch": 44.671052631578945, + "grad_norm": 1.9573568105697632, + "learning_rate": 0.0001, + "loss": 0.0356, + "step": 6790 + }, + { + "epoch": 44.73684210526316, + "grad_norm": 2.2900731563568115, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 6800 + }, + { + "epoch": 44.80263157894737, + "grad_norm": 2.2001209259033203, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 6810 + }, + { + "epoch": 44.86842105263158, + "grad_norm": 2.0125033855438232, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 6820 + }, + { + "epoch": 44.93421052631579, + "grad_norm": 1.908859372138977, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 6830 + }, + { + "epoch": 45.0, + "grad_norm": 2.0976343154907227, + "learning_rate": 0.0001, + "loss": 0.034, + "step": 6840 + }, + { + "epoch": 45.06578947368421, + "grad_norm": 2.10421085357666, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 6850 + }, + { + "epoch": 45.13157894736842, + "grad_norm": 1.7706947326660156, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 6860 + }, + { + "epoch": 45.19736842105263, + "grad_norm": 2.1556997299194336, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 6870 + }, + { + "epoch": 45.26315789473684, + "grad_norm": 2.2130556106567383, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 6880 + }, + { + "epoch": 45.328947368421055, + "grad_norm": 2.1554789543151855, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 6890 + }, + { + "epoch": 45.39473684210526, + "grad_norm": 1.984050989151001, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 6900 + }, + { + "epoch": 45.46052631578947, + "grad_norm": 1.7056621313095093, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 6910 + }, + { + "epoch": 45.526315789473685, + "grad_norm": 1.9881770610809326, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 6920 + }, + { + "epoch": 45.5921052631579, + "grad_norm": 1.829092264175415, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 6930 + }, + { + "epoch": 45.6578947368421, + "grad_norm": 1.8903696537017822, + "learning_rate": 0.0001, + "loss": 0.0299, + "step": 6940 + }, + { + "epoch": 45.723684210526315, + "grad_norm": 2.089012861251831, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 6950 + }, + { + "epoch": 45.78947368421053, + "grad_norm": 2.2835586071014404, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 6960 + }, + { + "epoch": 45.85526315789474, + "grad_norm": 2.3608603477478027, + "learning_rate": 0.0001, + "loss": 0.0363, + "step": 6970 + }, + { + "epoch": 45.921052631578945, + "grad_norm": 2.000751495361328, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 6980 + }, + { + "epoch": 45.98684210526316, + "grad_norm": 2.0586018562316895, + "learning_rate": 0.0001, + "loss": 0.0369, + "step": 6990 + }, + { + "epoch": 46.05263157894737, + "grad_norm": 2.077038526535034, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 7000 + }, + { + "epoch": 46.11842105263158, + "grad_norm": 2.457906484603882, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 7010 + }, + { + "epoch": 46.18421052631579, + "grad_norm": 2.286679267883301, + "learning_rate": 0.0001, + "loss": 0.0348, + "step": 7020 + }, + { + "epoch": 46.25, + "grad_norm": 2.5469555854797363, + "learning_rate": 0.0001, + "loss": 0.0343, + "step": 7030 + }, + { + "epoch": 46.31578947368421, + "grad_norm": 2.1791422367095947, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 7040 + }, + { + "epoch": 46.38157894736842, + "grad_norm": 3.0373752117156982, + "learning_rate": 0.0001, + "loss": 0.0351, + "step": 7050 + }, + { + "epoch": 46.44736842105263, + "grad_norm": 2.291721820831299, + "learning_rate": 0.0001, + "loss": 0.031, + "step": 7060 + }, + { + "epoch": 46.51315789473684, + "grad_norm": 2.208719253540039, + "learning_rate": 0.0001, + "loss": 0.0308, + "step": 7070 + }, + { + "epoch": 46.578947368421055, + "grad_norm": 2.194161891937256, + "learning_rate": 0.0001, + "loss": 0.0345, + "step": 7080 + }, + { + "epoch": 46.64473684210526, + "grad_norm": 2.2741446495056152, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 7090 + }, + { + "epoch": 46.71052631578947, + "grad_norm": 2.3912644386291504, + "learning_rate": 0.0001, + "loss": 0.0355, + "step": 7100 + }, + { + "epoch": 46.776315789473685, + "grad_norm": 2.1767451763153076, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 7110 + }, + { + "epoch": 46.8421052631579, + "grad_norm": 2.3903443813323975, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 7120 + }, + { + "epoch": 46.9078947368421, + "grad_norm": 2.259274482727051, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 7130 + }, + { + "epoch": 46.973684210526315, + "grad_norm": 2.3009188175201416, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 7140 + }, + { + "epoch": 47.03947368421053, + "grad_norm": 2.3467867374420166, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 7150 + }, + { + "epoch": 47.10526315789474, + "grad_norm": 2.426922082901001, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 7160 + }, + { + "epoch": 47.171052631578945, + "grad_norm": 2.1517276763916016, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 7170 + }, + { + "epoch": 47.23684210526316, + "grad_norm": 2.0987422466278076, + "learning_rate": 0.0001, + "loss": 0.0281, + "step": 7180 + }, + { + "epoch": 47.30263157894737, + "grad_norm": 1.8270368576049805, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 7190 + }, + { + "epoch": 47.36842105263158, + "grad_norm": 2.406790018081665, + "learning_rate": 0.0001, + "loss": 0.0296, + "step": 7200 + }, + { + "epoch": 47.43421052631579, + "grad_norm": 1.9560794830322266, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 7210 + }, + { + "epoch": 47.5, + "grad_norm": 1.9922142028808594, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 7220 + }, + { + "epoch": 47.56578947368421, + "grad_norm": 1.9082542657852173, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 7230 + }, + { + "epoch": 47.63157894736842, + "grad_norm": 2.251033306121826, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 7240 + }, + { + "epoch": 47.69736842105263, + "grad_norm": 1.8770537376403809, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 7250 + }, + { + "epoch": 47.76315789473684, + "grad_norm": 2.259347438812256, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 7260 + }, + { + "epoch": 47.828947368421055, + "grad_norm": 2.3631973266601562, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 7270 + }, + { + "epoch": 47.89473684210526, + "grad_norm": 2.6666479110717773, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 7280 + }, + { + "epoch": 47.96052631578947, + "grad_norm": 2.482814073562622, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 7290 + }, + { + "epoch": 48.026315789473685, + "grad_norm": 2.0329630374908447, + "learning_rate": 0.0001, + "loss": 0.0357, + "step": 7300 + }, + { + "epoch": 48.0921052631579, + "grad_norm": 2.2136952877044678, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 7310 + }, + { + "epoch": 48.1578947368421, + "grad_norm": 2.1611454486846924, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 7320 + }, + { + "epoch": 48.223684210526315, + "grad_norm": 2.0240912437438965, + "learning_rate": 0.0001, + "loss": 0.0317, + "step": 7330 + }, + { + "epoch": 48.28947368421053, + "grad_norm": 2.6601762771606445, + "learning_rate": 0.0001, + "loss": 0.0338, + "step": 7340 + }, + { + "epoch": 48.35526315789474, + "grad_norm": 2.2921714782714844, + "learning_rate": 0.0001, + "loss": 0.0305, + "step": 7350 + }, + { + "epoch": 48.421052631578945, + "grad_norm": 2.367628812789917, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 7360 + }, + { + "epoch": 48.48684210526316, + "grad_norm": 2.152510166168213, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 7370 + }, + { + "epoch": 48.55263157894737, + "grad_norm": 2.1306285858154297, + "learning_rate": 0.0001, + "loss": 0.0329, + "step": 7380 + }, + { + "epoch": 48.61842105263158, + "grad_norm": 2.3841469287872314, + "learning_rate": 0.0001, + "loss": 0.0313, + "step": 7390 + }, + { + "epoch": 48.68421052631579, + "grad_norm": 2.2278404235839844, + "learning_rate": 0.0001, + "loss": 0.0317, + "step": 7400 + }, + { + "epoch": 48.75, + "grad_norm": 2.0181496143341064, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 7410 + }, + { + "epoch": 48.81578947368421, + "grad_norm": 1.9607185125350952, + "learning_rate": 0.0001, + "loss": 0.0349, + "step": 7420 + }, + { + "epoch": 48.88157894736842, + "grad_norm": 2.1952409744262695, + "learning_rate": 0.0001, + "loss": 0.0347, + "step": 7430 + }, + { + "epoch": 48.94736842105263, + "grad_norm": 1.9687610864639282, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 7440 + }, + { + "epoch": 49.01315789473684, + "grad_norm": 2.2068471908569336, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 7450 + }, + { + "epoch": 49.078947368421055, + "grad_norm": 2.1997668743133545, + "learning_rate": 0.0001, + "loss": 0.0339, + "step": 7460 + }, + { + "epoch": 49.14473684210526, + "grad_norm": 2.183655261993408, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 7470 + }, + { + "epoch": 49.21052631578947, + "grad_norm": 1.942799687385559, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 7480 + }, + { + "epoch": 49.276315789473685, + "grad_norm": 1.9380285739898682, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 7490 + }, + { + "epoch": 49.3421052631579, + "grad_norm": 2.13240122795105, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 7500 + }, + { + "epoch": 49.4078947368421, + "grad_norm": 2.2831900119781494, + "learning_rate": 0.0001, + "loss": 0.031, + "step": 7510 + }, + { + "epoch": 49.473684210526315, + "grad_norm": 2.127020835876465, + "learning_rate": 0.0001, + "loss": 0.031, + "step": 7520 + }, + { + "epoch": 49.53947368421053, + "grad_norm": 2.251577138900757, + "learning_rate": 0.0001, + "loss": 0.0301, + "step": 7530 + }, + { + "epoch": 49.60526315789474, + "grad_norm": 2.340786933898926, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 7540 + }, + { + "epoch": 49.671052631578945, + "grad_norm": 2.267075300216675, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 7550 + }, + { + "epoch": 49.73684210526316, + "grad_norm": 2.334462881088257, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 7560 + }, + { + "epoch": 49.80263157894737, + "grad_norm": 2.038785696029663, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 7570 + }, + { + "epoch": 49.86842105263158, + "grad_norm": 2.192054510116577, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 7580 + }, + { + "epoch": 49.93421052631579, + "grad_norm": 2.1006948947906494, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 7590 + }, + { + "epoch": 50.0, + "grad_norm": 1.9702825546264648, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 7600 + }, + { + "epoch": 50.06578947368421, + "grad_norm": 1.8587983846664429, + "learning_rate": 0.0001, + "loss": 0.0297, + "step": 7610 + }, + { + "epoch": 50.13157894736842, + "grad_norm": 2.164700984954834, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 7620 + }, + { + "epoch": 50.19736842105263, + "grad_norm": 2.0936055183410645, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 7630 + }, + { + "epoch": 50.26315789473684, + "grad_norm": 2.155484914779663, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 7640 + }, + { + "epoch": 50.328947368421055, + "grad_norm": 2.1522960662841797, + "learning_rate": 0.0001, + "loss": 0.035, + "step": 7650 + }, + { + "epoch": 50.39473684210526, + "grad_norm": 1.9459351301193237, + "learning_rate": 0.0001, + "loss": 0.0312, + "step": 7660 + }, + { + "epoch": 50.46052631578947, + "grad_norm": 2.1670145988464355, + "learning_rate": 0.0001, + "loss": 0.0305, + "step": 7670 + }, + { + "epoch": 50.526315789473685, + "grad_norm": 2.069352626800537, + "learning_rate": 0.0001, + "loss": 0.0344, + "step": 7680 + }, + { + "epoch": 50.5921052631579, + "grad_norm": 1.8716386556625366, + "learning_rate": 0.0001, + "loss": 0.03, + "step": 7690 + }, + { + "epoch": 50.6578947368421, + "grad_norm": 2.056234836578369, + "learning_rate": 0.0001, + "loss": 0.0303, + "step": 7700 + }, + { + "epoch": 50.723684210526315, + "grad_norm": 2.560983896255493, + "learning_rate": 0.0001, + "loss": 0.0327, + "step": 7710 + }, + { + "epoch": 50.78947368421053, + "grad_norm": 2.280230760574341, + "learning_rate": 0.0001, + "loss": 0.0354, + "step": 7720 + }, + { + "epoch": 50.85526315789474, + "grad_norm": 2.542572498321533, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 7730 + }, + { + "epoch": 50.921052631578945, + "grad_norm": 2.2160181999206543, + "learning_rate": 0.0001, + "loss": 0.033, + "step": 7740 + }, + { + "epoch": 50.98684210526316, + "grad_norm": 1.9975266456604004, + "learning_rate": 0.0001, + "loss": 0.0314, + "step": 7750 + }, + { + "epoch": 51.05263157894737, + "grad_norm": 2.1476669311523438, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 7760 + }, + { + "epoch": 51.11842105263158, + "grad_norm": 1.9486243724822998, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 7770 + }, + { + "epoch": 51.18421052631579, + "grad_norm": 1.993624210357666, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 7780 + }, + { + "epoch": 51.25, + "grad_norm": 2.0168542861938477, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 7790 + }, + { + "epoch": 51.31578947368421, + "grad_norm": 2.1617634296417236, + "learning_rate": 0.0001, + "loss": 0.0312, + "step": 7800 + }, + { + "epoch": 51.38157894736842, + "grad_norm": 1.82743239402771, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 7810 + }, + { + "epoch": 51.44736842105263, + "grad_norm": 1.7217556238174438, + "learning_rate": 0.0001, + "loss": 0.031, + "step": 7820 + }, + { + "epoch": 51.51315789473684, + "grad_norm": 1.9004185199737549, + "learning_rate": 0.0001, + "loss": 0.0361, + "step": 7830 + }, + { + "epoch": 51.578947368421055, + "grad_norm": 2.210575580596924, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 7840 + }, + { + "epoch": 51.64473684210526, + "grad_norm": 1.8974437713623047, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 7850 + }, + { + "epoch": 51.71052631578947, + "grad_norm": 1.8807051181793213, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 7860 + }, + { + "epoch": 51.776315789473685, + "grad_norm": 1.9475070238113403, + "learning_rate": 0.0001, + "loss": 0.0337, + "step": 7870 + }, + { + "epoch": 51.8421052631579, + "grad_norm": 1.9319655895233154, + "learning_rate": 0.0001, + "loss": 0.0364, + "step": 7880 + }, + { + "epoch": 51.9078947368421, + "grad_norm": 1.9801864624023438, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 7890 + }, + { + "epoch": 51.973684210526315, + "grad_norm": 1.8490853309631348, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 7900 + }, + { + "epoch": 52.03947368421053, + "grad_norm": 1.844796061515808, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 7910 + }, + { + "epoch": 52.10526315789474, + "grad_norm": 2.170541763305664, + "learning_rate": 0.0001, + "loss": 0.0304, + "step": 7920 + }, + { + "epoch": 52.171052631578945, + "grad_norm": 1.8265526294708252, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 7930 + }, + { + "epoch": 52.23684210526316, + "grad_norm": 1.8391716480255127, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 7940 + }, + { + "epoch": 52.30263157894737, + "grad_norm": 2.0495717525482178, + "learning_rate": 0.0001, + "loss": 0.0336, + "step": 7950 + }, + { + "epoch": 52.36842105263158, + "grad_norm": 2.0973620414733887, + "learning_rate": 0.0001, + "loss": 0.0328, + "step": 7960 + }, + { + "epoch": 52.43421052631579, + "grad_norm": 2.0247726440429688, + "learning_rate": 0.0001, + "loss": 0.0307, + "step": 7970 + }, + { + "epoch": 52.5, + "grad_norm": 2.3339807987213135, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 7980 + }, + { + "epoch": 52.56578947368421, + "grad_norm": 2.4479763507843018, + "learning_rate": 0.0001, + "loss": 0.0342, + "step": 7990 + }, + { + "epoch": 52.63157894736842, + "grad_norm": 2.1954760551452637, + "learning_rate": 0.0001, + "loss": 0.03, + "step": 8000 + }, + { + "epoch": 52.69736842105263, + "grad_norm": 2.01871919631958, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 8010 + }, + { + "epoch": 52.76315789473684, + "grad_norm": 1.8598896265029907, + "learning_rate": 0.0001, + "loss": 0.0304, + "step": 8020 + }, + { + "epoch": 52.828947368421055, + "grad_norm": 2.171518087387085, + "learning_rate": 0.0001, + "loss": 0.0301, + "step": 8030 + }, + { + "epoch": 52.89473684210526, + "grad_norm": 2.029583692550659, + "learning_rate": 0.0001, + "loss": 0.0308, + "step": 8040 + }, + { + "epoch": 52.96052631578947, + "grad_norm": 1.7323065996170044, + "learning_rate": 0.0001, + "loss": 0.0322, + "step": 8050 + }, + { + "epoch": 53.026315789473685, + "grad_norm": 2.0981595516204834, + "learning_rate": 0.0001, + "loss": 0.03, + "step": 8060 + }, + { + "epoch": 53.0921052631579, + "grad_norm": 1.671781063079834, + "learning_rate": 0.0001, + "loss": 0.031, + "step": 8070 + }, + { + "epoch": 53.1578947368421, + "grad_norm": 1.8050312995910645, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 8080 + }, + { + "epoch": 53.223684210526315, + "grad_norm": 2.084514617919922, + "learning_rate": 0.0001, + "loss": 0.0377, + "step": 8090 + }, + { + "epoch": 53.28947368421053, + "grad_norm": 1.9709466695785522, + "learning_rate": 0.0001, + "loss": 0.0304, + "step": 8100 + }, + { + "epoch": 53.35526315789474, + "grad_norm": 1.856992244720459, + "learning_rate": 0.0001, + "loss": 0.0303, + "step": 8110 + }, + { + "epoch": 53.421052631578945, + "grad_norm": 1.892453670501709, + "learning_rate": 0.0001, + "loss": 0.0307, + "step": 8120 + }, + { + "epoch": 53.48684210526316, + "grad_norm": 1.9388136863708496, + "learning_rate": 0.0001, + "loss": 0.0303, + "step": 8130 + }, + { + "epoch": 53.55263157894737, + "grad_norm": 1.8416920900344849, + "learning_rate": 0.0001, + "loss": 0.0307, + "step": 8140 + }, + { + "epoch": 53.61842105263158, + "grad_norm": 2.1083364486694336, + "learning_rate": 0.0001, + "loss": 0.0316, + "step": 8150 + }, + { + "epoch": 53.68421052631579, + "grad_norm": 2.071626663208008, + "learning_rate": 0.0001, + "loss": 0.028, + "step": 8160 + }, + { + "epoch": 53.75, + "grad_norm": 1.8106697797775269, + "learning_rate": 0.0001, + "loss": 0.0311, + "step": 8170 + }, + { + "epoch": 53.81578947368421, + "grad_norm": 1.9116227626800537, + "learning_rate": 0.0001, + "loss": 0.0316, + "step": 8180 + }, + { + "epoch": 53.88157894736842, + "grad_norm": 1.6516140699386597, + "learning_rate": 0.0001, + "loss": 0.0303, + "step": 8190 + }, + { + "epoch": 53.94736842105263, + "grad_norm": 2.0792529582977295, + "learning_rate": 0.0001, + "loss": 0.0331, + "step": 8200 + }, + { + "epoch": 54.01315789473684, + "grad_norm": 2.022188425064087, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 8210 + }, + { + "epoch": 54.078947368421055, + "grad_norm": 1.862682819366455, + "learning_rate": 0.0001, + "loss": 0.029, + "step": 8220 + }, + { + "epoch": 54.14473684210526, + "grad_norm": 1.7206437587738037, + "learning_rate": 0.0001, + "loss": 0.0311, + "step": 8230 + }, + { + "epoch": 54.21052631578947, + "grad_norm": 1.6664676666259766, + "learning_rate": 0.0001, + "loss": 0.0306, + "step": 8240 + }, + { + "epoch": 54.276315789473685, + "grad_norm": 2.4094207286834717, + "learning_rate": 0.0001, + "loss": 0.0314, + "step": 8250 + }, + { + "epoch": 54.3421052631579, + "grad_norm": 1.9840387105941772, + "learning_rate": 0.0001, + "loss": 0.0316, + "step": 8260 + }, + { + "epoch": 54.4078947368421, + "grad_norm": 2.449998140335083, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 8270 + }, + { + "epoch": 54.473684210526315, + "grad_norm": 2.195594072341919, + "learning_rate": 0.0001, + "loss": 0.0317, + "step": 8280 + }, + { + "epoch": 54.53947368421053, + "grad_norm": 2.2626521587371826, + "learning_rate": 0.0001, + "loss": 0.0319, + "step": 8290 + }, + { + "epoch": 54.60526315789474, + "grad_norm": 1.693280816078186, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 8300 + }, + { + "epoch": 54.671052631578945, + "grad_norm": 1.8923887014389038, + "learning_rate": 0.0001, + "loss": 0.0296, + "step": 8310 + }, + { + "epoch": 54.73684210526316, + "grad_norm": 1.639238953590393, + "learning_rate": 0.0001, + "loss": 0.0289, + "step": 8320 + }, + { + "epoch": 54.80263157894737, + "grad_norm": 1.9876776933670044, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 8330 + }, + { + "epoch": 54.86842105263158, + "grad_norm": 1.686047077178955, + "learning_rate": 0.0001, + "loss": 0.0316, + "step": 8340 + }, + { + "epoch": 54.93421052631579, + "grad_norm": 2.186433792114258, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 8350 + }, + { + "epoch": 55.0, + "grad_norm": 1.9404265880584717, + "learning_rate": 0.0001, + "loss": 0.0302, + "step": 8360 + }, + { + "epoch": 55.06578947368421, + "grad_norm": 2.0368666648864746, + "learning_rate": 0.0001, + "loss": 0.031, + "step": 8370 + }, + { + "epoch": 55.13157894736842, + "grad_norm": 1.385018229484558, + "learning_rate": 0.0001, + "loss": 0.0301, + "step": 8380 + }, + { + "epoch": 55.19736842105263, + "grad_norm": 2.025214433670044, + "learning_rate": 0.0001, + "loss": 0.0291, + "step": 8390 + }, + { + "epoch": 55.26315789473684, + "grad_norm": 1.7362509965896606, + "learning_rate": 0.0001, + "loss": 0.0302, + "step": 8400 + }, + { + "epoch": 55.328947368421055, + "grad_norm": 1.9747296571731567, + "learning_rate": 0.0001, + "loss": 0.0317, + "step": 8410 + }, + { + "epoch": 55.39473684210526, + "grad_norm": 1.8088208436965942, + "learning_rate": 0.0001, + "loss": 0.0293, + "step": 8420 + }, + { + "epoch": 55.46052631578947, + "grad_norm": 1.8630869388580322, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 8430 + }, + { + "epoch": 55.526315789473685, + "grad_norm": 1.8169368505477905, + "learning_rate": 0.0001, + "loss": 0.0294, + "step": 8440 + }, + { + "epoch": 55.5921052631579, + "grad_norm": 1.717041254043579, + "learning_rate": 0.0001, + "loss": 0.0304, + "step": 8450 + }, + { + "epoch": 55.6578947368421, + "grad_norm": 2.054370164871216, + "learning_rate": 0.0001, + "loss": 0.0293, + "step": 8460 + }, + { + "epoch": 55.723684210526315, + "grad_norm": 1.8718016147613525, + "learning_rate": 0.0001, + "loss": 0.0294, + "step": 8470 + }, + { + "epoch": 55.78947368421053, + "grad_norm": 2.4793713092803955, + "learning_rate": 0.0001, + "loss": 0.0302, + "step": 8480 + }, + { + "epoch": 55.85526315789474, + "grad_norm": 2.1499078273773193, + "learning_rate": 0.0001, + "loss": 0.0313, + "step": 8490 + }, + { + "epoch": 55.921052631578945, + "grad_norm": 2.1316916942596436, + "learning_rate": 0.0001, + "loss": 0.032, + "step": 8500 + }, + { + "epoch": 55.98684210526316, + "grad_norm": 2.108400583267212, + "learning_rate": 0.0001, + "loss": 0.0279, + "step": 8510 + }, + { + "epoch": 56.05263157894737, + "grad_norm": 1.7546727657318115, + "learning_rate": 0.0001, + "loss": 0.0305, + "step": 8520 + }, + { + "epoch": 56.11842105263158, + "grad_norm": 1.8155990839004517, + "learning_rate": 0.0001, + "loss": 0.0285, + "step": 8530 + }, + { + "epoch": 56.18421052631579, + "grad_norm": 1.8450534343719482, + "learning_rate": 0.0001, + "loss": 0.0299, + "step": 8540 + }, + { + "epoch": 56.25, + "grad_norm": 1.9651875495910645, + "learning_rate": 0.0001, + "loss": 0.0308, + "step": 8550 + }, + { + "epoch": 56.31578947368421, + "grad_norm": 1.9930384159088135, + "learning_rate": 0.0001, + "loss": 0.0278, + "step": 8560 + }, + { + "epoch": 56.38157894736842, + "grad_norm": 2.035289764404297, + "learning_rate": 0.0001, + "loss": 0.0296, + "step": 8570 + }, + { + "epoch": 56.44736842105263, + "grad_norm": 2.206793785095215, + "learning_rate": 0.0001, + "loss": 0.0295, + "step": 8580 + }, + { + "epoch": 56.51315789473684, + "grad_norm": 2.2738990783691406, + "learning_rate": 0.0001, + "loss": 0.0295, + "step": 8590 + }, + { + "epoch": 56.578947368421055, + "grad_norm": 1.6985399723052979, + "learning_rate": 0.0001, + "loss": 0.0293, + "step": 8600 + }, + { + "epoch": 56.64473684210526, + "grad_norm": 1.7506465911865234, + "learning_rate": 0.0001, + "loss": 0.026, + "step": 8610 + }, + { + "epoch": 56.71052631578947, + "grad_norm": 1.8177592754364014, + "learning_rate": 0.0001, + "loss": 0.0299, + "step": 8620 + }, + { + "epoch": 56.776315789473685, + "grad_norm": 2.0824978351593018, + "learning_rate": 0.0001, + "loss": 0.0332, + "step": 8630 + }, + { + "epoch": 56.8421052631579, + "grad_norm": 1.8708689212799072, + "learning_rate": 0.0001, + "loss": 0.0294, + "step": 8640 + }, + { + "epoch": 56.9078947368421, + "grad_norm": 1.536291480064392, + "learning_rate": 0.0001, + "loss": 0.0301, + "step": 8650 + }, + { + "epoch": 56.973684210526315, + "grad_norm": 1.5639556646347046, + "learning_rate": 0.0001, + "loss": 0.0292, + "step": 8660 + }, + { + "epoch": 57.03947368421053, + "grad_norm": 1.836817979812622, + "learning_rate": 0.0001, + "loss": 0.0284, + "step": 8670 + }, + { + "epoch": 57.10526315789474, + "grad_norm": 1.6977314949035645, + "learning_rate": 0.0001, + "loss": 0.028, + "step": 8680 + }, + { + "epoch": 57.171052631578945, + "grad_norm": 1.9491312503814697, + "learning_rate": 0.0001, + "loss": 0.0301, + "step": 8690 + }, + { + "epoch": 57.23684210526316, + "grad_norm": 1.9810190200805664, + "learning_rate": 0.0001, + "loss": 0.0299, + "step": 8700 + }, + { + "epoch": 57.30263157894737, + "grad_norm": 3.380073308944702, + "learning_rate": 0.0001, + "loss": 0.0305, + "step": 8710 + }, + { + "epoch": 57.36842105263158, + "grad_norm": 2.4485232830047607, + "learning_rate": 0.0001, + "loss": 0.0324, + "step": 8720 + }, + { + "epoch": 57.43421052631579, + "grad_norm": 2.817413330078125, + "learning_rate": 0.0001, + "loss": 0.0293, + "step": 8730 + }, + { + "epoch": 57.5, + "grad_norm": 2.479456901550293, + "learning_rate": 0.0001, + "loss": 0.0293, + "step": 8740 + }, + { + "epoch": 57.56578947368421, + "grad_norm": 2.279740571975708, + "learning_rate": 0.0001, + "loss": 0.0278, + "step": 8750 + }, + { + "epoch": 57.63157894736842, + "grad_norm": 2.4175727367401123, + "learning_rate": 0.0001, + "loss": 0.0283, + "step": 8760 + }, + { + "epoch": 57.69736842105263, + "grad_norm": 1.8929423093795776, + "learning_rate": 0.0001, + "loss": 0.0269, + "step": 8770 + }, + { + "epoch": 57.76315789473684, + "grad_norm": 1.9583559036254883, + "learning_rate": 0.0001, + "loss": 0.0271, + "step": 8780 + }, + { + "epoch": 57.828947368421055, + "grad_norm": 2.2703166007995605, + "learning_rate": 0.0001, + "loss": 0.0274, + "step": 8790 + }, + { + "epoch": 57.89473684210526, + "grad_norm": 2.0460057258605957, + "learning_rate": 0.0001, + "loss": 0.027, + "step": 8800 + }, + { + "epoch": 57.96052631578947, + "grad_norm": 2.1975557804107666, + "learning_rate": 0.0001, + "loss": 0.0315, + "step": 8810 + }, + { + "epoch": 58.026315789473685, + "grad_norm": 1.971722960472107, + "learning_rate": 0.0001, + "loss": 0.0272, + "step": 8820 + }, + { + "epoch": 58.0921052631579, + "grad_norm": 2.0381031036376953, + "learning_rate": 0.0001, + "loss": 0.0289, + "step": 8830 + }, + { + "epoch": 58.1578947368421, + "grad_norm": 1.650619626045227, + "learning_rate": 0.0001, + "loss": 0.0301, + "step": 8840 + }, + { + "epoch": 58.223684210526315, + "grad_norm": 2.122473955154419, + "learning_rate": 0.0001, + "loss": 0.0284, + "step": 8850 + }, + { + "epoch": 58.28947368421053, + "grad_norm": 1.7196934223175049, + "learning_rate": 0.0001, + "loss": 0.0272, + "step": 8860 + }, + { + "epoch": 58.35526315789474, + "grad_norm": 1.6575008630752563, + "learning_rate": 0.0001, + "loss": 0.0277, + "step": 8870 + }, + { + "epoch": 58.421052631578945, + "grad_norm": 1.7115286588668823, + "learning_rate": 0.0001, + "loss": 0.0312, + "step": 8880 + }, + { + "epoch": 58.48684210526316, + "grad_norm": 1.908919095993042, + "learning_rate": 0.0001, + "loss": 0.0283, + "step": 8890 + }, + { + "epoch": 58.55263157894737, + "grad_norm": 1.7055505514144897, + "learning_rate": 0.0001, + "loss": 0.0299, + "step": 8900 + }, + { + "epoch": 58.61842105263158, + "grad_norm": 1.72140371799469, + "learning_rate": 0.0001, + "loss": 0.03, + "step": 8910 + }, + { + "epoch": 58.68421052631579, + "grad_norm": 1.752676248550415, + "learning_rate": 0.0001, + "loss": 0.0288, + "step": 8920 + }, + { + "epoch": 58.75, + "grad_norm": 1.886494517326355, + "learning_rate": 0.0001, + "loss": 0.0301, + "step": 8930 + }, + { + "epoch": 58.81578947368421, + "grad_norm": 1.7200045585632324, + "learning_rate": 0.0001, + "loss": 0.0308, + "step": 8940 + }, + { + "epoch": 58.88157894736842, + "grad_norm": 1.7977442741394043, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 8950 + }, + { + "epoch": 58.94736842105263, + "grad_norm": 1.9197369813919067, + "learning_rate": 0.0001, + "loss": 0.0353, + "step": 8960 + }, + { + "epoch": 59.01315789473684, + "grad_norm": 1.717230200767517, + "learning_rate": 0.0001, + "loss": 0.0335, + "step": 8970 + }, + { + "epoch": 59.078947368421055, + "grad_norm": 1.8992480039596558, + "learning_rate": 0.0001, + "loss": 0.0305, + "step": 8980 + }, + { + "epoch": 59.14473684210526, + "grad_norm": 1.9328705072402954, + "learning_rate": 0.0001, + "loss": 0.0312, + "step": 8990 + }, + { + "epoch": 59.21052631578947, + "grad_norm": 1.7343662977218628, + "learning_rate": 0.0001, + "loss": 0.0295, + "step": 9000 + }, + { + "epoch": 59.276315789473685, + "grad_norm": 1.6927893161773682, + "learning_rate": 0.0001, + "loss": 0.0326, + "step": 9010 + }, + { + "epoch": 59.3421052631579, + "grad_norm": 1.746718406677246, + "learning_rate": 0.0001, + "loss": 0.0308, + "step": 9020 + }, + { + "epoch": 59.4078947368421, + "grad_norm": 2.095132827758789, + "learning_rate": 0.0001, + "loss": 0.0296, + "step": 9030 + }, + { + "epoch": 59.473684210526315, + "grad_norm": 1.837533950805664, + "learning_rate": 0.0001, + "loss": 0.0346, + "step": 9040 + }, + { + "epoch": 59.53947368421053, + "grad_norm": 1.8166559934616089, + "learning_rate": 0.0001, + "loss": 0.0295, + "step": 9050 + }, + { + "epoch": 59.60526315789474, + "grad_norm": 1.6477857828140259, + "learning_rate": 0.0001, + "loss": 0.0301, + "step": 9060 + }, + { + "epoch": 59.671052631578945, + "grad_norm": 1.8890045881271362, + "learning_rate": 0.0001, + "loss": 0.0294, + "step": 9070 + }, + { + "epoch": 59.73684210526316, + "grad_norm": 2.0278661251068115, + "learning_rate": 0.0001, + "loss": 0.0334, + "step": 9080 + }, + { + "epoch": 59.80263157894737, + "grad_norm": 2.0316243171691895, + "learning_rate": 0.0001, + "loss": 0.0266, + "step": 9090 + }, + { + "epoch": 59.86842105263158, + "grad_norm": 1.8405317068099976, + "learning_rate": 0.0001, + "loss": 0.0282, + "step": 9100 + }, + { + "epoch": 59.93421052631579, + "grad_norm": 2.397641897201538, + "learning_rate": 0.0001, + "loss": 0.0295, + "step": 9110 + }, + { + "epoch": 60.0, + "grad_norm": 1.7151198387145996, + "learning_rate": 0.0001, + "loss": 0.0278, + "step": 9120 + }, + { + "epoch": 60.06578947368421, + "grad_norm": 2.201002359390259, + "learning_rate": 0.0001, + "loss": 0.0294, + "step": 9130 + }, + { + "epoch": 60.13157894736842, + "grad_norm": 1.911597490310669, + "learning_rate": 0.0001, + "loss": 0.0289, + "step": 9140 + }, + { + "epoch": 60.19736842105263, + "grad_norm": 2.3099772930145264, + "learning_rate": 0.0001, + "loss": 0.0279, + "step": 9150 + }, + { + "epoch": 60.26315789473684, + "grad_norm": 2.0865139961242676, + "learning_rate": 0.0001, + "loss": 0.0269, + "step": 9160 + }, + { + "epoch": 60.328947368421055, + "grad_norm": 2.1491951942443848, + "learning_rate": 0.0001, + "loss": 0.0293, + "step": 9170 + }, + { + "epoch": 60.39473684210526, + "grad_norm": 1.7886714935302734, + "learning_rate": 0.0001, + "loss": 0.0286, + "step": 9180 + }, + { + "epoch": 60.46052631578947, + "grad_norm": 2.1274571418762207, + "learning_rate": 0.0001, + "loss": 0.0293, + "step": 9190 + }, + { + "epoch": 60.526315789473685, + "grad_norm": 2.1661720275878906, + "learning_rate": 0.0001, + "loss": 0.0251, + "step": 9200 + }, + { + "epoch": 60.5921052631579, + "grad_norm": 1.6278198957443237, + "learning_rate": 0.0001, + "loss": 0.0283, + "step": 9210 + }, + { + "epoch": 60.6578947368421, + "grad_norm": 1.8151003122329712, + "learning_rate": 0.0001, + "loss": 0.0259, + "step": 9220 + }, + { + "epoch": 60.723684210526315, + "grad_norm": 1.9003932476043701, + "learning_rate": 0.0001, + "loss": 0.0291, + "step": 9230 + }, + { + "epoch": 60.78947368421053, + "grad_norm": 1.6437238454818726, + "learning_rate": 0.0001, + "loss": 0.0274, + "step": 9240 + }, + { + "epoch": 60.85526315789474, + "grad_norm": 1.418359637260437, + "learning_rate": 0.0001, + "loss": 0.0285, + "step": 9250 + }, + { + "epoch": 60.921052631578945, + "grad_norm": 1.5304903984069824, + "learning_rate": 0.0001, + "loss": 0.0293, + "step": 9260 + }, + { + "epoch": 60.98684210526316, + "grad_norm": 1.7247731685638428, + "learning_rate": 0.0001, + "loss": 0.0301, + "step": 9270 + }, + { + "epoch": 61.05263157894737, + "grad_norm": 1.9069534540176392, + "learning_rate": 0.0001, + "loss": 0.0318, + "step": 9280 + }, + { + "epoch": 61.11842105263158, + "grad_norm": 1.6469271183013916, + "learning_rate": 0.0001, + "loss": 0.0325, + "step": 9290 + }, + { + "epoch": 61.18421052631579, + "grad_norm": 1.5234088897705078, + "learning_rate": 0.0001, + "loss": 0.028, + "step": 9300 + }, + { + "epoch": 61.25, + "grad_norm": 1.9155454635620117, + "learning_rate": 0.0001, + "loss": 0.0279, + "step": 9310 + }, + { + "epoch": 61.31578947368421, + "grad_norm": 1.3420042991638184, + "learning_rate": 0.0001, + "loss": 0.0301, + "step": 9320 + }, + { + "epoch": 61.38157894736842, + "grad_norm": 1.73533296585083, + "learning_rate": 0.0001, + "loss": 0.0323, + "step": 9330 + }, + { + "epoch": 61.44736842105263, + "grad_norm": 2.0660176277160645, + "learning_rate": 0.0001, + "loss": 0.0302, + "step": 9340 + }, + { + "epoch": 61.51315789473684, + "grad_norm": 1.8069857358932495, + "learning_rate": 0.0001, + "loss": 0.0314, + "step": 9350 + }, + { + "epoch": 61.578947368421055, + "grad_norm": 2.0904011726379395, + "learning_rate": 0.0001, + "loss": 0.0303, + "step": 9360 + }, + { + "epoch": 61.64473684210526, + "grad_norm": 1.8158031702041626, + "learning_rate": 0.0001, + "loss": 0.0301, + "step": 9370 + }, + { + "epoch": 61.71052631578947, + "grad_norm": 1.410555362701416, + "learning_rate": 0.0001, + "loss": 0.0307, + "step": 9380 + }, + { + "epoch": 61.776315789473685, + "grad_norm": 1.4610170125961304, + "learning_rate": 0.0001, + "loss": 0.0333, + "step": 9390 + }, + { + "epoch": 61.8421052631579, + "grad_norm": 1.5231578350067139, + "learning_rate": 0.0001, + "loss": 0.0307, + "step": 9400 + }, + { + "epoch": 61.9078947368421, + "grad_norm": 1.829412817955017, + "learning_rate": 0.0001, + "loss": 0.0315, + "step": 9410 + }, + { + "epoch": 61.973684210526315, + "grad_norm": 1.9624525308609009, + "learning_rate": 0.0001, + "loss": 0.0302, + "step": 9420 + }, + { + "epoch": 62.03947368421053, + "grad_norm": 1.5953247547149658, + "learning_rate": 0.0001, + "loss": 0.0293, + "step": 9430 + }, + { + "epoch": 62.10526315789474, + "grad_norm": 1.8386650085449219, + "learning_rate": 0.0001, + "loss": 0.0321, + "step": 9440 + }, + { + "epoch": 62.171052631578945, + "grad_norm": 2.2038257122039795, + "learning_rate": 0.0001, + "loss": 0.0312, + "step": 9450 + }, + { + "epoch": 62.23684210526316, + "grad_norm": 2.08797550201416, + "learning_rate": 0.0001, + "loss": 0.0288, + "step": 9460 + }, + { + "epoch": 62.30263157894737, + "grad_norm": 1.9139946699142456, + "learning_rate": 0.0001, + "loss": 0.0272, + "step": 9470 + }, + { + "epoch": 62.36842105263158, + "grad_norm": 2.331393241882324, + "learning_rate": 0.0001, + "loss": 0.0292, + "step": 9480 + }, + { + "epoch": 62.43421052631579, + "grad_norm": 1.5585893392562866, + "learning_rate": 0.0001, + "loss": 0.0285, + "step": 9490 + }, + { + "epoch": 62.5, + "grad_norm": 1.67988920211792, + "learning_rate": 0.0001, + "loss": 0.0283, + "step": 9500 + }, + { + "epoch": 62.56578947368421, + "grad_norm": 1.9753636121749878, + "learning_rate": 0.0001, + "loss": 0.0278, + "step": 9510 + }, + { + "epoch": 62.63157894736842, + "grad_norm": 1.5838857889175415, + "learning_rate": 0.0001, + "loss": 0.0268, + "step": 9520 + }, + { + "epoch": 62.69736842105263, + "grad_norm": 1.9455825090408325, + "learning_rate": 0.0001, + "loss": 0.0261, + "step": 9530 + }, + { + "epoch": 62.76315789473684, + "grad_norm": 2.0940582752227783, + "learning_rate": 0.0001, + "loss": 0.029, + "step": 9540 + }, + { + "epoch": 62.828947368421055, + "grad_norm": 2.082871198654175, + "learning_rate": 0.0001, + "loss": 0.0271, + "step": 9550 + }, + { + "epoch": 62.89473684210526, + "grad_norm": 1.8131487369537354, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 9560 + }, + { + "epoch": 62.96052631578947, + "grad_norm": 1.8822680711746216, + "learning_rate": 0.0001, + "loss": 0.0268, + "step": 9570 + }, + { + "epoch": 63.026315789473685, + "grad_norm": 1.9734561443328857, + "learning_rate": 0.0001, + "loss": 0.0283, + "step": 9580 + }, + { + "epoch": 63.0921052631579, + "grad_norm": 1.6446675062179565, + "learning_rate": 0.0001, + "loss": 0.0285, + "step": 9590 + }, + { + "epoch": 63.1578947368421, + "grad_norm": 1.587969422340393, + "learning_rate": 0.0001, + "loss": 0.0283, + "step": 9600 + }, + { + "epoch": 63.223684210526315, + "grad_norm": 2.2087881565093994, + "learning_rate": 0.0001, + "loss": 0.03, + "step": 9610 + }, + { + "epoch": 63.28947368421053, + "grad_norm": 2.1909730434417725, + "learning_rate": 0.0001, + "loss": 0.0296, + "step": 9620 + }, + { + "epoch": 63.35526315789474, + "grad_norm": 1.4683645963668823, + "learning_rate": 0.0001, + "loss": 0.027, + "step": 9630 + }, + { + "epoch": 63.421052631578945, + "grad_norm": 1.7564547061920166, + "learning_rate": 0.0001, + "loss": 0.0273, + "step": 9640 + }, + { + "epoch": 63.48684210526316, + "grad_norm": 1.922224998474121, + "learning_rate": 0.0001, + "loss": 0.03, + "step": 9650 + }, + { + "epoch": 63.55263157894737, + "grad_norm": 1.8191381692886353, + "learning_rate": 0.0001, + "loss": 0.0262, + "step": 9660 + }, + { + "epoch": 63.61842105263158, + "grad_norm": 1.686124324798584, + "learning_rate": 0.0001, + "loss": 0.0307, + "step": 9670 + }, + { + "epoch": 63.68421052631579, + "grad_norm": 1.717603325843811, + "learning_rate": 0.0001, + "loss": 0.0285, + "step": 9680 + }, + { + "epoch": 63.75, + "grad_norm": 1.855259656906128, + "learning_rate": 0.0001, + "loss": 0.0276, + "step": 9690 + }, + { + "epoch": 63.81578947368421, + "grad_norm": 1.6836774349212646, + "learning_rate": 0.0001, + "loss": 0.0275, + "step": 9700 + }, + { + "epoch": 63.88157894736842, + "grad_norm": 1.776174545288086, + "learning_rate": 0.0001, + "loss": 0.0288, + "step": 9710 + }, + { + "epoch": 63.94736842105263, + "grad_norm": 1.868103265762329, + "learning_rate": 0.0001, + "loss": 0.0272, + "step": 9720 + }, + { + "epoch": 64.01315789473684, + "grad_norm": 1.6347148418426514, + "learning_rate": 0.0001, + "loss": 0.0286, + "step": 9730 + }, + { + "epoch": 64.07894736842105, + "grad_norm": 1.8645349740982056, + "learning_rate": 0.0001, + "loss": 0.0279, + "step": 9740 + }, + { + "epoch": 64.14473684210526, + "grad_norm": 2.106388807296753, + "learning_rate": 0.0001, + "loss": 0.0289, + "step": 9750 + }, + { + "epoch": 64.21052631578948, + "grad_norm": 1.4443974494934082, + "learning_rate": 0.0001, + "loss": 0.0289, + "step": 9760 + }, + { + "epoch": 64.27631578947368, + "grad_norm": 1.8115508556365967, + "learning_rate": 0.0001, + "loss": 0.0298, + "step": 9770 + }, + { + "epoch": 64.34210526315789, + "grad_norm": 1.6376737356185913, + "learning_rate": 0.0001, + "loss": 0.0275, + "step": 9780 + }, + { + "epoch": 64.40789473684211, + "grad_norm": 1.7943278551101685, + "learning_rate": 0.0001, + "loss": 0.0297, + "step": 9790 + }, + { + "epoch": 64.47368421052632, + "grad_norm": 1.6659159660339355, + "learning_rate": 0.0001, + "loss": 0.027, + "step": 9800 + }, + { + "epoch": 64.53947368421052, + "grad_norm": 1.792837381362915, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 9810 + }, + { + "epoch": 64.60526315789474, + "grad_norm": 1.9182575941085815, + "learning_rate": 0.0001, + "loss": 0.0293, + "step": 9820 + }, + { + "epoch": 64.67105263157895, + "grad_norm": 1.6181617975234985, + "learning_rate": 0.0001, + "loss": 0.0284, + "step": 9830 + }, + { + "epoch": 64.73684210526316, + "grad_norm": 1.5407286882400513, + "learning_rate": 0.0001, + "loss": 0.0298, + "step": 9840 + }, + { + "epoch": 64.80263157894737, + "grad_norm": 1.621099591255188, + "learning_rate": 0.0001, + "loss": 0.0295, + "step": 9850 + }, + { + "epoch": 64.86842105263158, + "grad_norm": 1.7339770793914795, + "learning_rate": 0.0001, + "loss": 0.0306, + "step": 9860 + }, + { + "epoch": 64.9342105263158, + "grad_norm": 1.9840919971466064, + "learning_rate": 0.0001, + "loss": 0.0291, + "step": 9870 + }, + { + "epoch": 65.0, + "grad_norm": 1.8245599269866943, + "learning_rate": 0.0001, + "loss": 0.0316, + "step": 9880 + }, + { + "epoch": 65.0657894736842, + "grad_norm": 1.6021732091903687, + "learning_rate": 0.0001, + "loss": 0.0311, + "step": 9890 + }, + { + "epoch": 65.13157894736842, + "grad_norm": 2.1346256732940674, + "learning_rate": 0.0001, + "loss": 0.0266, + "step": 9900 + }, + { + "epoch": 65.19736842105263, + "grad_norm": 1.537832498550415, + "learning_rate": 0.0001, + "loss": 0.028, + "step": 9910 + }, + { + "epoch": 65.26315789473684, + "grad_norm": 1.70017671585083, + "learning_rate": 0.0001, + "loss": 0.0269, + "step": 9920 + }, + { + "epoch": 65.32894736842105, + "grad_norm": 2.058269500732422, + "learning_rate": 0.0001, + "loss": 0.0289, + "step": 9930 + }, + { + "epoch": 65.39473684210526, + "grad_norm": 1.592274785041809, + "learning_rate": 0.0001, + "loss": 0.0301, + "step": 9940 + }, + { + "epoch": 65.46052631578948, + "grad_norm": 1.6225742101669312, + "learning_rate": 0.0001, + "loss": 0.0305, + "step": 9950 + }, + { + "epoch": 65.52631578947368, + "grad_norm": 1.3477301597595215, + "learning_rate": 0.0001, + "loss": 0.0272, + "step": 9960 + }, + { + "epoch": 65.59210526315789, + "grad_norm": 1.6625304222106934, + "learning_rate": 0.0001, + "loss": 0.0301, + "step": 9970 + }, + { + "epoch": 65.65789473684211, + "grad_norm": 1.5278533697128296, + "learning_rate": 0.0001, + "loss": 0.0269, + "step": 9980 + }, + { + "epoch": 65.72368421052632, + "grad_norm": 1.9545159339904785, + "learning_rate": 0.0001, + "loss": 0.0279, + "step": 9990 + }, + { + "epoch": 65.78947368421052, + "grad_norm": 2.0630109310150146, + "learning_rate": 0.0001, + "loss": 0.0284, + "step": 10000 + }, + { + "epoch": 65.85526315789474, + "grad_norm": 2.0734527111053467, + "learning_rate": 0.0001, + "loss": 0.0304, + "step": 10010 + }, + { + "epoch": 65.92105263157895, + "grad_norm": 1.8130292892456055, + "learning_rate": 0.0001, + "loss": 0.031, + "step": 10020 + }, + { + "epoch": 65.98684210526316, + "grad_norm": 1.9439656734466553, + "learning_rate": 0.0001, + "loss": 0.03, + "step": 10030 + }, + { + "epoch": 66.05263157894737, + "grad_norm": 1.825518012046814, + "learning_rate": 0.0001, + "loss": 0.0306, + "step": 10040 + }, + { + "epoch": 66.11842105263158, + "grad_norm": 1.7734483480453491, + "learning_rate": 0.0001, + "loss": 0.0288, + "step": 10050 + }, + { + "epoch": 66.1842105263158, + "grad_norm": 1.8506808280944824, + "learning_rate": 0.0001, + "loss": 0.0272, + "step": 10060 + }, + { + "epoch": 66.25, + "grad_norm": 1.4367343187332153, + "learning_rate": 0.0001, + "loss": 0.0301, + "step": 10070 + }, + { + "epoch": 66.3157894736842, + "grad_norm": 2.0297582149505615, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 10080 + }, + { + "epoch": 66.38157894736842, + "grad_norm": 2.1654398441314697, + "learning_rate": 0.0001, + "loss": 0.0282, + "step": 10090 + }, + { + "epoch": 66.44736842105263, + "grad_norm": 1.8896561861038208, + "learning_rate": 0.0001, + "loss": 0.0276, + "step": 10100 + }, + { + "epoch": 66.51315789473684, + "grad_norm": 2.0529367923736572, + "learning_rate": 0.0001, + "loss": 0.0283, + "step": 10110 + }, + { + "epoch": 66.57894736842105, + "grad_norm": 2.187924385070801, + "learning_rate": 0.0001, + "loss": 0.029, + "step": 10120 + }, + { + "epoch": 66.64473684210526, + "grad_norm": 1.9992048740386963, + "learning_rate": 0.0001, + "loss": 0.0277, + "step": 10130 + }, + { + "epoch": 66.71052631578948, + "grad_norm": 1.930832862854004, + "learning_rate": 0.0001, + "loss": 0.0274, + "step": 10140 + }, + { + "epoch": 66.77631578947368, + "grad_norm": 2.112302780151367, + "learning_rate": 0.0001, + "loss": 0.0292, + "step": 10150 + }, + { + "epoch": 66.84210526315789, + "grad_norm": 2.1016273498535156, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 10160 + }, + { + "epoch": 66.90789473684211, + "grad_norm": 1.839139699935913, + "learning_rate": 0.0001, + "loss": 0.025, + "step": 10170 + }, + { + "epoch": 66.97368421052632, + "grad_norm": 2.132181406021118, + "learning_rate": 0.0001, + "loss": 0.0264, + "step": 10180 + }, + { + "epoch": 67.03947368421052, + "grad_norm": 1.6180822849273682, + "learning_rate": 0.0001, + "loss": 0.0251, + "step": 10190 + }, + { + "epoch": 67.10526315789474, + "grad_norm": 2.0724172592163086, + "learning_rate": 0.0001, + "loss": 0.0255, + "step": 10200 + }, + { + "epoch": 67.17105263157895, + "grad_norm": 1.807228446006775, + "learning_rate": 0.0001, + "loss": 0.0264, + "step": 10210 + }, + { + "epoch": 67.23684210526316, + "grad_norm": 1.818137764930725, + "learning_rate": 0.0001, + "loss": 0.0266, + "step": 10220 + }, + { + "epoch": 67.30263157894737, + "grad_norm": 1.9353524446487427, + "learning_rate": 0.0001, + "loss": 0.0247, + "step": 10230 + }, + { + "epoch": 67.36842105263158, + "grad_norm": 1.9978927373886108, + "learning_rate": 0.0001, + "loss": 0.0259, + "step": 10240 + }, + { + "epoch": 67.4342105263158, + "grad_norm": 1.6602277755737305, + "learning_rate": 0.0001, + "loss": 0.0287, + "step": 10250 + }, + { + "epoch": 67.5, + "grad_norm": 1.6295214891433716, + "learning_rate": 0.0001, + "loss": 0.0263, + "step": 10260 + }, + { + "epoch": 67.5657894736842, + "grad_norm": 1.5134141445159912, + "learning_rate": 0.0001, + "loss": 0.0311, + "step": 10270 + }, + { + "epoch": 67.63157894736842, + "grad_norm": 1.7062309980392456, + "learning_rate": 0.0001, + "loss": 0.0288, + "step": 10280 + }, + { + "epoch": 67.69736842105263, + "grad_norm": 2.065563678741455, + "learning_rate": 0.0001, + "loss": 0.0268, + "step": 10290 + }, + { + "epoch": 67.76315789473684, + "grad_norm": 1.7206897735595703, + "learning_rate": 0.0001, + "loss": 0.0304, + "step": 10300 + }, + { + "epoch": 67.82894736842105, + "grad_norm": 1.927570104598999, + "learning_rate": 0.0001, + "loss": 0.0283, + "step": 10310 + }, + { + "epoch": 67.89473684210526, + "grad_norm": 1.7813613414764404, + "learning_rate": 0.0001, + "loss": 0.0303, + "step": 10320 + }, + { + "epoch": 67.96052631578948, + "grad_norm": 1.7587003707885742, + "learning_rate": 0.0001, + "loss": 0.0264, + "step": 10330 + }, + { + "epoch": 68.02631578947368, + "grad_norm": 1.5077179670333862, + "learning_rate": 0.0001, + "loss": 0.0282, + "step": 10340 + }, + { + "epoch": 68.09210526315789, + "grad_norm": 1.6387265920639038, + "learning_rate": 0.0001, + "loss": 0.0308, + "step": 10350 + }, + { + "epoch": 68.15789473684211, + "grad_norm": 1.3568955659866333, + "learning_rate": 0.0001, + "loss": 0.0303, + "step": 10360 + }, + { + "epoch": 68.22368421052632, + "grad_norm": 1.4115484952926636, + "learning_rate": 0.0001, + "loss": 0.0267, + "step": 10370 + }, + { + "epoch": 68.28947368421052, + "grad_norm": 1.726250171661377, + "learning_rate": 0.0001, + "loss": 0.0264, + "step": 10380 + }, + { + "epoch": 68.35526315789474, + "grad_norm": 1.825256586074829, + "learning_rate": 0.0001, + "loss": 0.0284, + "step": 10390 + }, + { + "epoch": 68.42105263157895, + "grad_norm": 1.7078465223312378, + "learning_rate": 0.0001, + "loss": 0.028, + "step": 10400 + }, + { + "epoch": 68.48684210526316, + "grad_norm": 1.6383626461029053, + "learning_rate": 0.0001, + "loss": 0.0277, + "step": 10410 + }, + { + "epoch": 68.55263157894737, + "grad_norm": 1.635048270225525, + "learning_rate": 0.0001, + "loss": 0.0295, + "step": 10420 + }, + { + "epoch": 68.61842105263158, + "grad_norm": 1.7052724361419678, + "learning_rate": 0.0001, + "loss": 0.0272, + "step": 10430 + }, + { + "epoch": 68.6842105263158, + "grad_norm": 1.6822491884231567, + "learning_rate": 0.0001, + "loss": 0.027, + "step": 10440 + }, + { + "epoch": 68.75, + "grad_norm": 1.7057929039001465, + "learning_rate": 0.0001, + "loss": 0.0278, + "step": 10450 + }, + { + "epoch": 68.8157894736842, + "grad_norm": 1.5500861406326294, + "learning_rate": 0.0001, + "loss": 0.0282, + "step": 10460 + }, + { + "epoch": 68.88157894736842, + "grad_norm": 1.7019840478897095, + "learning_rate": 0.0001, + "loss": 0.0296, + "step": 10470 + }, + { + "epoch": 68.94736842105263, + "grad_norm": 1.6525379419326782, + "learning_rate": 0.0001, + "loss": 0.0266, + "step": 10480 + }, + { + "epoch": 69.01315789473684, + "grad_norm": 2.0071771144866943, + "learning_rate": 0.0001, + "loss": 0.029, + "step": 10490 + }, + { + "epoch": 69.07894736842105, + "grad_norm": 2.170626640319824, + "learning_rate": 0.0001, + "loss": 0.0286, + "step": 10500 + }, + { + "epoch": 69.14473684210526, + "grad_norm": 1.7918614149093628, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 10510 + }, + { + "epoch": 69.21052631578948, + "grad_norm": 1.633731722831726, + "learning_rate": 0.0001, + "loss": 0.0277, + "step": 10520 + }, + { + "epoch": 69.27631578947368, + "grad_norm": 1.8682153224945068, + "learning_rate": 0.0001, + "loss": 0.0268, + "step": 10530 + }, + { + "epoch": 69.34210526315789, + "grad_norm": 1.8960853815078735, + "learning_rate": 0.0001, + "loss": 0.0255, + "step": 10540 + }, + { + "epoch": 69.40789473684211, + "grad_norm": 1.7412015199661255, + "learning_rate": 0.0001, + "loss": 0.0267, + "step": 10550 + }, + { + "epoch": 69.47368421052632, + "grad_norm": 1.7792584896087646, + "learning_rate": 0.0001, + "loss": 0.027, + "step": 10560 + }, + { + "epoch": 69.53947368421052, + "grad_norm": 1.578399896621704, + "learning_rate": 0.0001, + "loss": 0.0274, + "step": 10570 + }, + { + "epoch": 69.60526315789474, + "grad_norm": 1.7477600574493408, + "learning_rate": 0.0001, + "loss": 0.0257, + "step": 10580 + }, + { + "epoch": 69.67105263157895, + "grad_norm": 1.8707166910171509, + "learning_rate": 0.0001, + "loss": 0.0264, + "step": 10590 + }, + { + "epoch": 69.73684210526316, + "grad_norm": 1.953047275543213, + "learning_rate": 0.0001, + "loss": 0.0268, + "step": 10600 + }, + { + "epoch": 69.80263157894737, + "grad_norm": 1.7453070878982544, + "learning_rate": 0.0001, + "loss": 0.0253, + "step": 10610 + }, + { + "epoch": 69.86842105263158, + "grad_norm": 1.3676609992980957, + "learning_rate": 0.0001, + "loss": 0.0262, + "step": 10620 + }, + { + "epoch": 69.9342105263158, + "grad_norm": 1.8007850646972656, + "learning_rate": 0.0001, + "loss": 0.0266, + "step": 10630 + }, + { + "epoch": 70.0, + "grad_norm": 1.5974959135055542, + "learning_rate": 0.0001, + "loss": 0.0247, + "step": 10640 + }, + { + "epoch": 70.0657894736842, + "grad_norm": 1.7148112058639526, + "learning_rate": 0.0001, + "loss": 0.0263, + "step": 10650 + }, + { + "epoch": 70.13157894736842, + "grad_norm": 1.8458138704299927, + "learning_rate": 0.0001, + "loss": 0.0262, + "step": 10660 + }, + { + "epoch": 70.19736842105263, + "grad_norm": 1.8393112421035767, + "learning_rate": 0.0001, + "loss": 0.0295, + "step": 10670 + }, + { + "epoch": 70.26315789473684, + "grad_norm": 2.173353433609009, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 10680 + }, + { + "epoch": 70.32894736842105, + "grad_norm": 1.9548739194869995, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 10690 + }, + { + "epoch": 70.39473684210526, + "grad_norm": 2.02528715133667, + "learning_rate": 0.0001, + "loss": 0.0279, + "step": 10700 + }, + { + "epoch": 70.46052631578948, + "grad_norm": 1.9165359735488892, + "learning_rate": 0.0001, + "loss": 0.0233, + "step": 10710 + }, + { + "epoch": 70.52631578947368, + "grad_norm": 1.8980177640914917, + "learning_rate": 0.0001, + "loss": 0.0262, + "step": 10720 + }, + { + "epoch": 70.59210526315789, + "grad_norm": 1.5036553144454956, + "learning_rate": 0.0001, + "loss": 0.0272, + "step": 10730 + }, + { + "epoch": 70.65789473684211, + "grad_norm": 1.9608731269836426, + "learning_rate": 0.0001, + "loss": 0.027, + "step": 10740 + }, + { + "epoch": 70.72368421052632, + "grad_norm": 1.665083885192871, + "learning_rate": 0.0001, + "loss": 0.0269, + "step": 10750 + }, + { + "epoch": 70.78947368421052, + "grad_norm": 1.6562914848327637, + "learning_rate": 0.0001, + "loss": 0.0244, + "step": 10760 + }, + { + "epoch": 70.85526315789474, + "grad_norm": 1.5882655382156372, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 10770 + }, + { + "epoch": 70.92105263157895, + "grad_norm": 1.7006151676177979, + "learning_rate": 0.0001, + "loss": 0.0254, + "step": 10780 + }, + { + "epoch": 70.98684210526316, + "grad_norm": 1.5319530963897705, + "learning_rate": 0.0001, + "loss": 0.0281, + "step": 10790 + }, + { + "epoch": 71.05263157894737, + "grad_norm": 1.9043638706207275, + "learning_rate": 0.0001, + "loss": 0.0284, + "step": 10800 + }, + { + "epoch": 71.11842105263158, + "grad_norm": 1.5125269889831543, + "learning_rate": 0.0001, + "loss": 0.0267, + "step": 10810 + }, + { + "epoch": 71.1842105263158, + "grad_norm": 1.8229715824127197, + "learning_rate": 0.0001, + "loss": 0.0299, + "step": 10820 + }, + { + "epoch": 71.25, + "grad_norm": 1.796139121055603, + "learning_rate": 0.0001, + "loss": 0.0264, + "step": 10830 + }, + { + "epoch": 71.3157894736842, + "grad_norm": 1.8449422121047974, + "learning_rate": 0.0001, + "loss": 0.0278, + "step": 10840 + }, + { + "epoch": 71.38157894736842, + "grad_norm": 1.7261402606964111, + "learning_rate": 0.0001, + "loss": 0.0287, + "step": 10850 + }, + { + "epoch": 71.44736842105263, + "grad_norm": 1.5883243083953857, + "learning_rate": 0.0001, + "loss": 0.0298, + "step": 10860 + }, + { + "epoch": 71.51315789473684, + "grad_norm": 1.3273059129714966, + "learning_rate": 0.0001, + "loss": 0.0277, + "step": 10870 + }, + { + "epoch": 71.57894736842105, + "grad_norm": 1.3199368715286255, + "learning_rate": 0.0001, + "loss": 0.0254, + "step": 10880 + }, + { + "epoch": 71.64473684210526, + "grad_norm": 1.3597865104675293, + "learning_rate": 0.0001, + "loss": 0.0278, + "step": 10890 + }, + { + "epoch": 71.71052631578948, + "grad_norm": 1.6409027576446533, + "learning_rate": 0.0001, + "loss": 0.0274, + "step": 10900 + }, + { + "epoch": 71.77631578947368, + "grad_norm": 1.7969907522201538, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 10910 + }, + { + "epoch": 71.84210526315789, + "grad_norm": 1.7779749631881714, + "learning_rate": 0.0001, + "loss": 0.0285, + "step": 10920 + }, + { + "epoch": 71.90789473684211, + "grad_norm": 1.8789730072021484, + "learning_rate": 0.0001, + "loss": 0.0281, + "step": 10930 + }, + { + "epoch": 71.97368421052632, + "grad_norm": 1.832434058189392, + "learning_rate": 0.0001, + "loss": 0.0272, + "step": 10940 + }, + { + "epoch": 72.03947368421052, + "grad_norm": 1.6766541004180908, + "learning_rate": 0.0001, + "loss": 0.0284, + "step": 10950 + }, + { + "epoch": 72.10526315789474, + "grad_norm": 1.7368793487548828, + "learning_rate": 0.0001, + "loss": 0.0286, + "step": 10960 + }, + { + "epoch": 72.17105263157895, + "grad_norm": 1.8270950317382812, + "learning_rate": 0.0001, + "loss": 0.0313, + "step": 10970 + }, + { + "epoch": 72.23684210526316, + "grad_norm": 1.5050655603408813, + "learning_rate": 0.0001, + "loss": 0.0286, + "step": 10980 + }, + { + "epoch": 72.30263157894737, + "grad_norm": 1.7033870220184326, + "learning_rate": 0.0001, + "loss": 0.0283, + "step": 10990 + }, + { + "epoch": 72.36842105263158, + "grad_norm": 1.559373378753662, + "learning_rate": 0.0001, + "loss": 0.0272, + "step": 11000 + }, + { + "epoch": 72.4342105263158, + "grad_norm": 1.8907803297042847, + "learning_rate": 0.0001, + "loss": 0.027, + "step": 11010 + }, + { + "epoch": 72.5, + "grad_norm": 1.5523408651351929, + "learning_rate": 0.0001, + "loss": 0.0245, + "step": 11020 + }, + { + "epoch": 72.5657894736842, + "grad_norm": 1.4263194799423218, + "learning_rate": 0.0001, + "loss": 0.0266, + "step": 11030 + }, + { + "epoch": 72.63157894736842, + "grad_norm": 1.5710784196853638, + "learning_rate": 0.0001, + "loss": 0.0262, + "step": 11040 + }, + { + "epoch": 72.69736842105263, + "grad_norm": 1.9038443565368652, + "learning_rate": 0.0001, + "loss": 0.0266, + "step": 11050 + }, + { + "epoch": 72.76315789473684, + "grad_norm": 1.9687570333480835, + "learning_rate": 0.0001, + "loss": 0.0271, + "step": 11060 + }, + { + "epoch": 72.82894736842105, + "grad_norm": 1.8713347911834717, + "learning_rate": 0.0001, + "loss": 0.0251, + "step": 11070 + }, + { + "epoch": 72.89473684210526, + "grad_norm": 1.827601671218872, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 11080 + }, + { + "epoch": 72.96052631578948, + "grad_norm": 1.7612308263778687, + "learning_rate": 0.0001, + "loss": 0.0269, + "step": 11090 + }, + { + "epoch": 73.02631578947368, + "grad_norm": 1.6651338338851929, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 11100 + }, + { + "epoch": 73.09210526315789, + "grad_norm": 1.7223801612854004, + "learning_rate": 0.0001, + "loss": 0.0278, + "step": 11110 + }, + { + "epoch": 73.15789473684211, + "grad_norm": 2.2473108768463135, + "learning_rate": 0.0001, + "loss": 0.0256, + "step": 11120 + }, + { + "epoch": 73.22368421052632, + "grad_norm": 1.6378623247146606, + "learning_rate": 0.0001, + "loss": 0.0283, + "step": 11130 + }, + { + "epoch": 73.28947368421052, + "grad_norm": 1.7509886026382446, + "learning_rate": 0.0001, + "loss": 0.0274, + "step": 11140 + }, + { + "epoch": 73.35526315789474, + "grad_norm": 1.7899543046951294, + "learning_rate": 0.0001, + "loss": 0.0263, + "step": 11150 + }, + { + "epoch": 73.42105263157895, + "grad_norm": 1.7126433849334717, + "learning_rate": 0.0001, + "loss": 0.026, + "step": 11160 + }, + { + "epoch": 73.48684210526316, + "grad_norm": 1.624640941619873, + "learning_rate": 0.0001, + "loss": 0.0258, + "step": 11170 + }, + { + "epoch": 73.55263157894737, + "grad_norm": 1.6922892332077026, + "learning_rate": 0.0001, + "loss": 0.0261, + "step": 11180 + }, + { + "epoch": 73.61842105263158, + "grad_norm": 1.6018348932266235, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 11190 + }, + { + "epoch": 73.6842105263158, + "grad_norm": 1.7257733345031738, + "learning_rate": 0.0001, + "loss": 0.0293, + "step": 11200 + }, + { + "epoch": 73.75, + "grad_norm": 1.5125445127487183, + "learning_rate": 0.0001, + "loss": 0.0274, + "step": 11210 + }, + { + "epoch": 73.8157894736842, + "grad_norm": 2.309644937515259, + "learning_rate": 0.0001, + "loss": 0.0312, + "step": 11220 + }, + { + "epoch": 73.88157894736842, + "grad_norm": 2.2980353832244873, + "learning_rate": 0.0001, + "loss": 0.028, + "step": 11230 + }, + { + "epoch": 73.94736842105263, + "grad_norm": 2.155663251876831, + "learning_rate": 0.0001, + "loss": 0.0271, + "step": 11240 + }, + { + "epoch": 74.01315789473684, + "grad_norm": 1.5754151344299316, + "learning_rate": 0.0001, + "loss": 0.0253, + "step": 11250 + }, + { + "epoch": 74.07894736842105, + "grad_norm": 1.6232088804244995, + "learning_rate": 0.0001, + "loss": 0.0257, + "step": 11260 + }, + { + "epoch": 74.14473684210526, + "grad_norm": 2.0059304237365723, + "learning_rate": 0.0001, + "loss": 0.0272, + "step": 11270 + }, + { + "epoch": 74.21052631578948, + "grad_norm": 2.166524648666382, + "learning_rate": 0.0001, + "loss": 0.0241, + "step": 11280 + }, + { + "epoch": 74.27631578947368, + "grad_norm": 1.8639678955078125, + "learning_rate": 0.0001, + "loss": 0.027, + "step": 11290 + }, + { + "epoch": 74.34210526315789, + "grad_norm": 2.540382146835327, + "learning_rate": 0.0001, + "loss": 0.0269, + "step": 11300 + }, + { + "epoch": 74.40789473684211, + "grad_norm": 1.8720356225967407, + "learning_rate": 0.0001, + "loss": 0.0251, + "step": 11310 + }, + { + "epoch": 74.47368421052632, + "grad_norm": 1.6244946718215942, + "learning_rate": 0.0001, + "loss": 0.025, + "step": 11320 + }, + { + "epoch": 74.53947368421052, + "grad_norm": 1.4546319246292114, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 11330 + }, + { + "epoch": 74.60526315789474, + "grad_norm": 1.374340534210205, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 11340 + }, + { + "epoch": 74.67105263157895, + "grad_norm": 1.5473732948303223, + "learning_rate": 0.0001, + "loss": 0.0242, + "step": 11350 + }, + { + "epoch": 74.73684210526316, + "grad_norm": 1.646817922592163, + "learning_rate": 0.0001, + "loss": 0.0263, + "step": 11360 + }, + { + "epoch": 74.80263157894737, + "grad_norm": 1.6465305089950562, + "learning_rate": 0.0001, + "loss": 0.0283, + "step": 11370 + }, + { + "epoch": 74.86842105263158, + "grad_norm": 1.6561059951782227, + "learning_rate": 0.0001, + "loss": 0.0256, + "step": 11380 + }, + { + "epoch": 74.9342105263158, + "grad_norm": 1.7500685453414917, + "learning_rate": 0.0001, + "loss": 0.0276, + "step": 11390 + }, + { + "epoch": 75.0, + "grad_norm": 1.3135260343551636, + "learning_rate": 0.0001, + "loss": 0.0262, + "step": 11400 + }, + { + "epoch": 75.0657894736842, + "grad_norm": 1.6338132619857788, + "learning_rate": 0.0001, + "loss": 0.0259, + "step": 11410 + }, + { + "epoch": 75.13157894736842, + "grad_norm": 1.5177695751190186, + "learning_rate": 0.0001, + "loss": 0.0293, + "step": 11420 + }, + { + "epoch": 75.19736842105263, + "grad_norm": 1.5215574502944946, + "learning_rate": 0.0001, + "loss": 0.0261, + "step": 11430 + }, + { + "epoch": 75.26315789473684, + "grad_norm": 1.4262157678604126, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 11440 + }, + { + "epoch": 75.32894736842105, + "grad_norm": 1.7465966939926147, + "learning_rate": 0.0001, + "loss": 0.0266, + "step": 11450 + }, + { + "epoch": 75.39473684210526, + "grad_norm": 1.7781189680099487, + "learning_rate": 0.0001, + "loss": 0.0258, + "step": 11460 + }, + { + "epoch": 75.46052631578948, + "grad_norm": 1.861718773841858, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 11470 + }, + { + "epoch": 75.52631578947368, + "grad_norm": 1.34523606300354, + "learning_rate": 0.0001, + "loss": 0.0255, + "step": 11480 + }, + { + "epoch": 75.59210526315789, + "grad_norm": 1.6540141105651855, + "learning_rate": 0.0001, + "loss": 0.0277, + "step": 11490 + }, + { + "epoch": 75.65789473684211, + "grad_norm": 1.8050261735916138, + "learning_rate": 0.0001, + "loss": 0.0258, + "step": 11500 + }, + { + "epoch": 75.72368421052632, + "grad_norm": 1.8646867275238037, + "learning_rate": 0.0001, + "loss": 0.0285, + "step": 11510 + }, + { + "epoch": 75.78947368421052, + "grad_norm": 1.4578933715820312, + "learning_rate": 0.0001, + "loss": 0.0261, + "step": 11520 + }, + { + "epoch": 75.85526315789474, + "grad_norm": 1.5991803407669067, + "learning_rate": 0.0001, + "loss": 0.0249, + "step": 11530 + }, + { + "epoch": 75.92105263157895, + "grad_norm": 1.9933110475540161, + "learning_rate": 0.0001, + "loss": 0.025, + "step": 11540 + }, + { + "epoch": 75.98684210526316, + "grad_norm": 1.5704941749572754, + "learning_rate": 0.0001, + "loss": 0.0263, + "step": 11550 + }, + { + "epoch": 76.05263157894737, + "grad_norm": 1.813393235206604, + "learning_rate": 0.0001, + "loss": 0.0259, + "step": 11560 + }, + { + "epoch": 76.11842105263158, + "grad_norm": 2.1795897483825684, + "learning_rate": 0.0001, + "loss": 0.0263, + "step": 11570 + }, + { + "epoch": 76.1842105263158, + "grad_norm": 1.719844102859497, + "learning_rate": 0.0001, + "loss": 0.0257, + "step": 11580 + }, + { + "epoch": 76.25, + "grad_norm": 1.964848518371582, + "learning_rate": 0.0001, + "loss": 0.0278, + "step": 11590 + }, + { + "epoch": 76.3157894736842, + "grad_norm": 2.0262434482574463, + "learning_rate": 0.0001, + "loss": 0.0276, + "step": 11600 + }, + { + "epoch": 76.38157894736842, + "grad_norm": 2.0098633766174316, + "learning_rate": 0.0001, + "loss": 0.0245, + "step": 11610 + }, + { + "epoch": 76.44736842105263, + "grad_norm": 1.8454413414001465, + "learning_rate": 0.0001, + "loss": 0.0243, + "step": 11620 + }, + { + "epoch": 76.51315789473684, + "grad_norm": 1.8662750720977783, + "learning_rate": 0.0001, + "loss": 0.0272, + "step": 11630 + }, + { + "epoch": 76.57894736842105, + "grad_norm": 1.9370334148406982, + "learning_rate": 0.0001, + "loss": 0.0251, + "step": 11640 + }, + { + "epoch": 76.64473684210526, + "grad_norm": 1.6803852319717407, + "learning_rate": 0.0001, + "loss": 0.0224, + "step": 11650 + }, + { + "epoch": 76.71052631578948, + "grad_norm": 1.9592006206512451, + "learning_rate": 0.0001, + "loss": 0.0242, + "step": 11660 + }, + { + "epoch": 76.77631578947368, + "grad_norm": 1.9452179670333862, + "learning_rate": 0.0001, + "loss": 0.0247, + "step": 11670 + }, + { + "epoch": 76.84210526315789, + "grad_norm": 1.9002512693405151, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 11680 + }, + { + "epoch": 76.90789473684211, + "grad_norm": 1.9148516654968262, + "learning_rate": 0.0001, + "loss": 0.0262, + "step": 11690 + }, + { + "epoch": 76.97368421052632, + "grad_norm": 1.7390122413635254, + "learning_rate": 0.0001, + "loss": 0.0256, + "step": 11700 + }, + { + "epoch": 77.03947368421052, + "grad_norm": 1.5007272958755493, + "learning_rate": 0.0001, + "loss": 0.0254, + "step": 11710 + }, + { + "epoch": 77.10526315789474, + "grad_norm": 1.6768064498901367, + "learning_rate": 0.0001, + "loss": 0.0257, + "step": 11720 + }, + { + "epoch": 77.17105263157895, + "grad_norm": 1.8090136051177979, + "learning_rate": 0.0001, + "loss": 0.0249, + "step": 11730 + }, + { + "epoch": 77.23684210526316, + "grad_norm": 1.8841345310211182, + "learning_rate": 0.0001, + "loss": 0.0253, + "step": 11740 + }, + { + "epoch": 77.30263157894737, + "grad_norm": 1.282132863998413, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 11750 + }, + { + "epoch": 77.36842105263158, + "grad_norm": 1.8869783878326416, + "learning_rate": 0.0001, + "loss": 0.0253, + "step": 11760 + }, + { + "epoch": 77.4342105263158, + "grad_norm": 1.700897216796875, + "learning_rate": 0.0001, + "loss": 0.0247, + "step": 11770 + }, + { + "epoch": 77.5, + "grad_norm": 1.763547420501709, + "learning_rate": 0.0001, + "loss": 0.0229, + "step": 11780 + }, + { + "epoch": 77.5657894736842, + "grad_norm": 1.661258339881897, + "learning_rate": 0.0001, + "loss": 0.0262, + "step": 11790 + }, + { + "epoch": 77.63157894736842, + "grad_norm": 1.2386846542358398, + "learning_rate": 0.0001, + "loss": 0.0256, + "step": 11800 + }, + { + "epoch": 77.69736842105263, + "grad_norm": 1.4917902946472168, + "learning_rate": 0.0001, + "loss": 0.0262, + "step": 11810 + }, + { + "epoch": 77.76315789473684, + "grad_norm": 1.4909294843673706, + "learning_rate": 0.0001, + "loss": 0.0262, + "step": 11820 + }, + { + "epoch": 77.82894736842105, + "grad_norm": 1.6174029111862183, + "learning_rate": 0.0001, + "loss": 0.0282, + "step": 11830 + }, + { + "epoch": 77.89473684210526, + "grad_norm": 1.75030517578125, + "learning_rate": 0.0001, + "loss": 0.026, + "step": 11840 + }, + { + "epoch": 77.96052631578948, + "grad_norm": 1.3512479066848755, + "learning_rate": 0.0001, + "loss": 0.0269, + "step": 11850 + }, + { + "epoch": 78.02631578947368, + "grad_norm": 1.5821315050125122, + "learning_rate": 0.0001, + "loss": 0.028, + "step": 11860 + }, + { + "epoch": 78.09210526315789, + "grad_norm": 1.8378440141677856, + "learning_rate": 0.0001, + "loss": 0.0252, + "step": 11870 + }, + { + "epoch": 78.15789473684211, + "grad_norm": 1.536332130432129, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 11880 + }, + { + "epoch": 78.22368421052632, + "grad_norm": 1.5582396984100342, + "learning_rate": 0.0001, + "loss": 0.0263, + "step": 11890 + }, + { + "epoch": 78.28947368421052, + "grad_norm": 1.637697458267212, + "learning_rate": 0.0001, + "loss": 0.0269, + "step": 11900 + }, + { + "epoch": 78.35526315789474, + "grad_norm": 1.7529138326644897, + "learning_rate": 0.0001, + "loss": 0.0248, + "step": 11910 + }, + { + "epoch": 78.42105263157895, + "grad_norm": 1.9082077741622925, + "learning_rate": 0.0001, + "loss": 0.0257, + "step": 11920 + }, + { + "epoch": 78.48684210526316, + "grad_norm": 1.5302765369415283, + "learning_rate": 0.0001, + "loss": 0.0248, + "step": 11930 + }, + { + "epoch": 78.55263157894737, + "grad_norm": 1.9385207891464233, + "learning_rate": 0.0001, + "loss": 0.0298, + "step": 11940 + }, + { + "epoch": 78.61842105263158, + "grad_norm": 1.6783729791641235, + "learning_rate": 0.0001, + "loss": 0.0251, + "step": 11950 + }, + { + "epoch": 78.6842105263158, + "grad_norm": 1.7037731409072876, + "learning_rate": 0.0001, + "loss": 0.0251, + "step": 11960 + }, + { + "epoch": 78.75, + "grad_norm": 1.3808066844940186, + "learning_rate": 0.0001, + "loss": 0.0273, + "step": 11970 + }, + { + "epoch": 78.8157894736842, + "grad_norm": 1.4401555061340332, + "learning_rate": 0.0001, + "loss": 0.0261, + "step": 11980 + }, + { + "epoch": 78.88157894736842, + "grad_norm": 1.8740235567092896, + "learning_rate": 0.0001, + "loss": 0.025, + "step": 11990 + }, + { + "epoch": 78.94736842105263, + "grad_norm": 1.5552549362182617, + "learning_rate": 0.0001, + "loss": 0.0247, + "step": 12000 + }, + { + "epoch": 79.01315789473684, + "grad_norm": 1.5589462518692017, + "learning_rate": 0.0001, + "loss": 0.0258, + "step": 12010 + }, + { + "epoch": 79.07894736842105, + "grad_norm": 1.641662359237671, + "learning_rate": 0.0001, + "loss": 0.0243, + "step": 12020 + }, + { + "epoch": 79.14473684210526, + "grad_norm": 1.6106501817703247, + "learning_rate": 0.0001, + "loss": 0.0244, + "step": 12030 + }, + { + "epoch": 79.21052631578948, + "grad_norm": 1.9273698329925537, + "learning_rate": 0.0001, + "loss": 0.0263, + "step": 12040 + }, + { + "epoch": 79.27631578947368, + "grad_norm": 1.7661243677139282, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 12050 + }, + { + "epoch": 79.34210526315789, + "grad_norm": 1.7702202796936035, + "learning_rate": 0.0001, + "loss": 0.0248, + "step": 12060 + }, + { + "epoch": 79.40789473684211, + "grad_norm": 1.2317006587982178, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 12070 + }, + { + "epoch": 79.47368421052632, + "grad_norm": 1.6304638385772705, + "learning_rate": 0.0001, + "loss": 0.0262, + "step": 12080 + }, + { + "epoch": 79.53947368421052, + "grad_norm": 1.764953851699829, + "learning_rate": 0.0001, + "loss": 0.0243, + "step": 12090 + }, + { + "epoch": 79.60526315789474, + "grad_norm": 1.6987632513046265, + "learning_rate": 0.0001, + "loss": 0.0281, + "step": 12100 + }, + { + "epoch": 79.67105263157895, + "grad_norm": 1.7687889337539673, + "learning_rate": 0.0001, + "loss": 0.0242, + "step": 12110 + }, + { + "epoch": 79.73684210526316, + "grad_norm": 1.714768648147583, + "learning_rate": 0.0001, + "loss": 0.0261, + "step": 12120 + }, + { + "epoch": 79.80263157894737, + "grad_norm": 1.5168180465698242, + "learning_rate": 0.0001, + "loss": 0.0238, + "step": 12130 + }, + { + "epoch": 79.86842105263158, + "grad_norm": 1.6321282386779785, + "learning_rate": 0.0001, + "loss": 0.0252, + "step": 12140 + }, + { + "epoch": 79.9342105263158, + "grad_norm": 1.8023918867111206, + "learning_rate": 0.0001, + "loss": 0.0248, + "step": 12150 + }, + { + "epoch": 80.0, + "grad_norm": 1.475740909576416, + "learning_rate": 0.0001, + "loss": 0.026, + "step": 12160 + }, + { + "epoch": 80.0657894736842, + "grad_norm": 1.8622595071792603, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 12170 + }, + { + "epoch": 80.13157894736842, + "grad_norm": 1.6295769214630127, + "learning_rate": 0.0001, + "loss": 0.0232, + "step": 12180 + }, + { + "epoch": 80.19736842105263, + "grad_norm": 1.6116666793823242, + "learning_rate": 0.0001, + "loss": 0.0242, + "step": 12190 + }, + { + "epoch": 80.26315789473684, + "grad_norm": 1.599142074584961, + "learning_rate": 0.0001, + "loss": 0.026, + "step": 12200 + }, + { + "epoch": 80.32894736842105, + "grad_norm": 1.5806418657302856, + "learning_rate": 0.0001, + "loss": 0.0259, + "step": 12210 + }, + { + "epoch": 80.39473684210526, + "grad_norm": 1.4835152626037598, + "learning_rate": 0.0001, + "loss": 0.0276, + "step": 12220 + }, + { + "epoch": 80.46052631578948, + "grad_norm": 1.2303491830825806, + "learning_rate": 0.0001, + "loss": 0.0274, + "step": 12230 + }, + { + "epoch": 80.52631578947368, + "grad_norm": 1.4486466646194458, + "learning_rate": 0.0001, + "loss": 0.026, + "step": 12240 + }, + { + "epoch": 80.59210526315789, + "grad_norm": 1.5165975093841553, + "learning_rate": 0.0001, + "loss": 0.028, + "step": 12250 + }, + { + "epoch": 80.65789473684211, + "grad_norm": 1.4321902990341187, + "learning_rate": 0.0001, + "loss": 0.0262, + "step": 12260 + }, + { + "epoch": 80.72368421052632, + "grad_norm": 1.3290172815322876, + "learning_rate": 0.0001, + "loss": 0.0284, + "step": 12270 + }, + { + "epoch": 80.78947368421052, + "grad_norm": 1.4528899192810059, + "learning_rate": 0.0001, + "loss": 0.0272, + "step": 12280 + }, + { + "epoch": 80.85526315789474, + "grad_norm": 1.4954471588134766, + "learning_rate": 0.0001, + "loss": 0.0263, + "step": 12290 + }, + { + "epoch": 80.92105263157895, + "grad_norm": 1.686185359954834, + "learning_rate": 0.0001, + "loss": 0.0268, + "step": 12300 + }, + { + "epoch": 80.98684210526316, + "grad_norm": 1.8050355911254883, + "learning_rate": 0.0001, + "loss": 0.0273, + "step": 12310 + }, + { + "epoch": 81.05263157894737, + "grad_norm": 1.667278528213501, + "learning_rate": 0.0001, + "loss": 0.026, + "step": 12320 + }, + { + "epoch": 81.11842105263158, + "grad_norm": 1.8313591480255127, + "learning_rate": 0.0001, + "loss": 0.0275, + "step": 12330 + }, + { + "epoch": 81.1842105263158, + "grad_norm": 1.5865910053253174, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 12340 + }, + { + "epoch": 81.25, + "grad_norm": 1.4617630243301392, + "learning_rate": 0.0001, + "loss": 0.0263, + "step": 12350 + }, + { + "epoch": 81.3157894736842, + "grad_norm": 1.6927311420440674, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 12360 + }, + { + "epoch": 81.38157894736842, + "grad_norm": 1.7742199897766113, + "learning_rate": 0.0001, + "loss": 0.0249, + "step": 12370 + }, + { + "epoch": 81.44736842105263, + "grad_norm": 1.5240896940231323, + "learning_rate": 0.0001, + "loss": 0.0273, + "step": 12380 + }, + { + "epoch": 81.51315789473684, + "grad_norm": 1.980691909790039, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 12390 + }, + { + "epoch": 81.57894736842105, + "grad_norm": 1.9031286239624023, + "learning_rate": 0.0001, + "loss": 0.0254, + "step": 12400 + }, + { + "epoch": 81.64473684210526, + "grad_norm": 1.8978111743927002, + "learning_rate": 0.0001, + "loss": 0.026, + "step": 12410 + }, + { + "epoch": 81.71052631578948, + "grad_norm": 2.1107935905456543, + "learning_rate": 0.0001, + "loss": 0.025, + "step": 12420 + }, + { + "epoch": 81.77631578947368, + "grad_norm": 1.7692242860794067, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 12430 + }, + { + "epoch": 81.84210526315789, + "grad_norm": 1.9986506700515747, + "learning_rate": 0.0001, + "loss": 0.0238, + "step": 12440 + }, + { + "epoch": 81.90789473684211, + "grad_norm": 2.076988935470581, + "learning_rate": 0.0001, + "loss": 0.0263, + "step": 12450 + }, + { + "epoch": 81.97368421052632, + "grad_norm": 1.7011905908584595, + "learning_rate": 0.0001, + "loss": 0.0231, + "step": 12460 + }, + { + "epoch": 82.03947368421052, + "grad_norm": 1.8305109739303589, + "learning_rate": 0.0001, + "loss": 0.0264, + "step": 12470 + }, + { + "epoch": 82.10526315789474, + "grad_norm": 1.6795586347579956, + "learning_rate": 0.0001, + "loss": 0.0243, + "step": 12480 + }, + { + "epoch": 82.17105263157895, + "grad_norm": 1.4467089176177979, + "learning_rate": 0.0001, + "loss": 0.0233, + "step": 12490 + }, + { + "epoch": 82.23684210526316, + "grad_norm": 1.618145227432251, + "learning_rate": 0.0001, + "loss": 0.0258, + "step": 12500 + }, + { + "epoch": 82.30263157894737, + "grad_norm": 1.5068567991256714, + "learning_rate": 0.0001, + "loss": 0.0247, + "step": 12510 + }, + { + "epoch": 82.36842105263158, + "grad_norm": 1.7931199073791504, + "learning_rate": 0.0001, + "loss": 0.0242, + "step": 12520 + }, + { + "epoch": 82.4342105263158, + "grad_norm": 1.7545214891433716, + "learning_rate": 0.0001, + "loss": 0.0248, + "step": 12530 + }, + { + "epoch": 82.5, + "grad_norm": 1.677007794380188, + "learning_rate": 0.0001, + "loss": 0.0234, + "step": 12540 + }, + { + "epoch": 82.5657894736842, + "grad_norm": 1.8432847261428833, + "learning_rate": 0.0001, + "loss": 0.0275, + "step": 12550 + }, + { + "epoch": 82.63157894736842, + "grad_norm": 1.6245819330215454, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 12560 + }, + { + "epoch": 82.69736842105263, + "grad_norm": 1.837830901145935, + "learning_rate": 0.0001, + "loss": 0.0268, + "step": 12570 + }, + { + "epoch": 82.76315789473684, + "grad_norm": 1.7958941459655762, + "learning_rate": 0.0001, + "loss": 0.0256, + "step": 12580 + }, + { + "epoch": 82.82894736842105, + "grad_norm": 1.5123578310012817, + "learning_rate": 0.0001, + "loss": 0.0246, + "step": 12590 + }, + { + "epoch": 82.89473684210526, + "grad_norm": 1.5450695753097534, + "learning_rate": 0.0001, + "loss": 0.0254, + "step": 12600 + }, + { + "epoch": 82.96052631578948, + "grad_norm": 1.3524283170700073, + "learning_rate": 0.0001, + "loss": 0.0238, + "step": 12610 + }, + { + "epoch": 83.02631578947368, + "grad_norm": 1.588718295097351, + "learning_rate": 0.0001, + "loss": 0.0252, + "step": 12620 + }, + { + "epoch": 83.09210526315789, + "grad_norm": 1.4721720218658447, + "learning_rate": 0.0001, + "loss": 0.0251, + "step": 12630 + }, + { + "epoch": 83.15789473684211, + "grad_norm": 1.411702036857605, + "learning_rate": 0.0001, + "loss": 0.0253, + "step": 12640 + }, + { + "epoch": 83.22368421052632, + "grad_norm": 1.5033427476882935, + "learning_rate": 0.0001, + "loss": 0.0268, + "step": 12650 + }, + { + "epoch": 83.28947368421052, + "grad_norm": 1.7150914669036865, + "learning_rate": 0.0001, + "loss": 0.025, + "step": 12660 + }, + { + "epoch": 83.35526315789474, + "grad_norm": 1.4532182216644287, + "learning_rate": 0.0001, + "loss": 0.0275, + "step": 12670 + }, + { + "epoch": 83.42105263157895, + "grad_norm": 1.678092360496521, + "learning_rate": 0.0001, + "loss": 0.025, + "step": 12680 + }, + { + "epoch": 83.48684210526316, + "grad_norm": 1.542473554611206, + "learning_rate": 0.0001, + "loss": 0.0255, + "step": 12690 + }, + { + "epoch": 83.55263157894737, + "grad_norm": 1.6771373748779297, + "learning_rate": 0.0001, + "loss": 0.0255, + "step": 12700 + }, + { + "epoch": 83.61842105263158, + "grad_norm": 1.5920690298080444, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 12710 + }, + { + "epoch": 83.6842105263158, + "grad_norm": 1.29483962059021, + "learning_rate": 0.0001, + "loss": 0.0225, + "step": 12720 + }, + { + "epoch": 83.75, + "grad_norm": 1.575247883796692, + "learning_rate": 0.0001, + "loss": 0.0241, + "step": 12730 + }, + { + "epoch": 83.8157894736842, + "grad_norm": 1.4278053045272827, + "learning_rate": 0.0001, + "loss": 0.0263, + "step": 12740 + }, + { + "epoch": 83.88157894736842, + "grad_norm": 1.0531659126281738, + "learning_rate": 0.0001, + "loss": 0.0252, + "step": 12750 + }, + { + "epoch": 83.94736842105263, + "grad_norm": 1.2762305736541748, + "learning_rate": 0.0001, + "loss": 0.0248, + "step": 12760 + }, + { + "epoch": 84.01315789473684, + "grad_norm": 1.573049545288086, + "learning_rate": 0.0001, + "loss": 0.0253, + "step": 12770 + }, + { + "epoch": 84.07894736842105, + "grad_norm": 1.2428479194641113, + "learning_rate": 0.0001, + "loss": 0.0239, + "step": 12780 + }, + { + "epoch": 84.14473684210526, + "grad_norm": 1.6974382400512695, + "learning_rate": 0.0001, + "loss": 0.0252, + "step": 12790 + }, + { + "epoch": 84.21052631578948, + "grad_norm": 1.1859573125839233, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 12800 + }, + { + "epoch": 84.27631578947368, + "grad_norm": 1.5613579750061035, + "learning_rate": 0.0001, + "loss": 0.0242, + "step": 12810 + }, + { + "epoch": 84.34210526315789, + "grad_norm": 1.359305739402771, + "learning_rate": 0.0001, + "loss": 0.025, + "step": 12820 + }, + { + "epoch": 84.40789473684211, + "grad_norm": 1.6808149814605713, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 12830 + }, + { + "epoch": 84.47368421052632, + "grad_norm": 1.8556708097457886, + "learning_rate": 0.0001, + "loss": 0.0263, + "step": 12840 + }, + { + "epoch": 84.53947368421052, + "grad_norm": 1.8008233308792114, + "learning_rate": 0.0001, + "loss": 0.0242, + "step": 12850 + }, + { + "epoch": 84.60526315789474, + "grad_norm": 1.5086830854415894, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 12860 + }, + { + "epoch": 84.67105263157895, + "grad_norm": 1.6331901550292969, + "learning_rate": 0.0001, + "loss": 0.0238, + "step": 12870 + }, + { + "epoch": 84.73684210526316, + "grad_norm": 1.1920050382614136, + "learning_rate": 0.0001, + "loss": 0.0245, + "step": 12880 + }, + { + "epoch": 84.80263157894737, + "grad_norm": 1.910048484802246, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 12890 + }, + { + "epoch": 84.86842105263158, + "grad_norm": 1.4449541568756104, + "learning_rate": 0.0001, + "loss": 0.0265, + "step": 12900 + }, + { + "epoch": 84.9342105263158, + "grad_norm": 1.3787635564804077, + "learning_rate": 0.0001, + "loss": 0.0251, + "step": 12910 + }, + { + "epoch": 85.0, + "grad_norm": 1.3375296592712402, + "learning_rate": 0.0001, + "loss": 0.0247, + "step": 12920 + }, + { + "epoch": 85.0657894736842, + "grad_norm": 1.44197416305542, + "learning_rate": 0.0001, + "loss": 0.0246, + "step": 12930 + }, + { + "epoch": 85.13157894736842, + "grad_norm": 1.2884734869003296, + "learning_rate": 0.0001, + "loss": 0.0266, + "step": 12940 + }, + { + "epoch": 85.19736842105263, + "grad_norm": 1.1492596864700317, + "learning_rate": 0.0001, + "loss": 0.0229, + "step": 12950 + }, + { + "epoch": 85.26315789473684, + "grad_norm": 1.6913357973098755, + "learning_rate": 0.0001, + "loss": 0.0278, + "step": 12960 + }, + { + "epoch": 85.32894736842105, + "grad_norm": 1.591509461402893, + "learning_rate": 0.0001, + "loss": 0.0254, + "step": 12970 + }, + { + "epoch": 85.39473684210526, + "grad_norm": 1.9279398918151855, + "learning_rate": 0.0001, + "loss": 0.0256, + "step": 12980 + }, + { + "epoch": 85.46052631578948, + "grad_norm": 1.8467780351638794, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 12990 + }, + { + "epoch": 85.52631578947368, + "grad_norm": 1.6500190496444702, + "learning_rate": 0.0001, + "loss": 0.0242, + "step": 13000 + }, + { + "epoch": 85.59210526315789, + "grad_norm": 2.0222458839416504, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 13010 + }, + { + "epoch": 85.65789473684211, + "grad_norm": 1.445722222328186, + "learning_rate": 0.0001, + "loss": 0.0251, + "step": 13020 + }, + { + "epoch": 85.72368421052632, + "grad_norm": 1.3406612873077393, + "learning_rate": 0.0001, + "loss": 0.0249, + "step": 13030 + }, + { + "epoch": 85.78947368421052, + "grad_norm": 1.2125831842422485, + "learning_rate": 0.0001, + "loss": 0.0251, + "step": 13040 + }, + { + "epoch": 85.85526315789474, + "grad_norm": 1.3329436779022217, + "learning_rate": 0.0001, + "loss": 0.0241, + "step": 13050 + }, + { + "epoch": 85.92105263157895, + "grad_norm": 1.7026501893997192, + "learning_rate": 0.0001, + "loss": 0.0244, + "step": 13060 + }, + { + "epoch": 85.98684210526316, + "grad_norm": 1.6232529878616333, + "learning_rate": 0.0001, + "loss": 0.0229, + "step": 13070 + }, + { + "epoch": 86.05263157894737, + "grad_norm": 1.4894771575927734, + "learning_rate": 0.0001, + "loss": 0.0232, + "step": 13080 + }, + { + "epoch": 86.11842105263158, + "grad_norm": 1.3056237697601318, + "learning_rate": 0.0001, + "loss": 0.0257, + "step": 13090 + }, + { + "epoch": 86.1842105263158, + "grad_norm": 1.318498134613037, + "learning_rate": 0.0001, + "loss": 0.0248, + "step": 13100 + }, + { + "epoch": 86.25, + "grad_norm": 1.4497151374816895, + "learning_rate": 0.0001, + "loss": 0.0243, + "step": 13110 + }, + { + "epoch": 86.3157894736842, + "grad_norm": 1.6557412147521973, + "learning_rate": 0.0001, + "loss": 0.0249, + "step": 13120 + }, + { + "epoch": 86.38157894736842, + "grad_norm": 1.569457769393921, + "learning_rate": 0.0001, + "loss": 0.0268, + "step": 13130 + }, + { + "epoch": 86.44736842105263, + "grad_norm": 1.7060179710388184, + "learning_rate": 0.0001, + "loss": 0.0261, + "step": 13140 + }, + { + "epoch": 86.51315789473684, + "grad_norm": 1.4572563171386719, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 13150 + }, + { + "epoch": 86.57894736842105, + "grad_norm": 1.4536716938018799, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 13160 + }, + { + "epoch": 86.64473684210526, + "grad_norm": 1.6783726215362549, + "learning_rate": 0.0001, + "loss": 0.0227, + "step": 13170 + }, + { + "epoch": 86.71052631578948, + "grad_norm": 1.4602693319320679, + "learning_rate": 0.0001, + "loss": 0.0249, + "step": 13180 + }, + { + "epoch": 86.77631578947368, + "grad_norm": 1.6970704793930054, + "learning_rate": 0.0001, + "loss": 0.0241, + "step": 13190 + }, + { + "epoch": 86.84210526315789, + "grad_norm": 1.3816075325012207, + "learning_rate": 0.0001, + "loss": 0.0243, + "step": 13200 + }, + { + "epoch": 86.90789473684211, + "grad_norm": 1.6789470911026, + "learning_rate": 0.0001, + "loss": 0.0245, + "step": 13210 + }, + { + "epoch": 86.97368421052632, + "grad_norm": 1.5384844541549683, + "learning_rate": 0.0001, + "loss": 0.0219, + "step": 13220 + }, + { + "epoch": 87.03947368421052, + "grad_norm": 1.5757942199707031, + "learning_rate": 0.0001, + "loss": 0.0233, + "step": 13230 + }, + { + "epoch": 87.10526315789474, + "grad_norm": 1.7311173677444458, + "learning_rate": 0.0001, + "loss": 0.0239, + "step": 13240 + }, + { + "epoch": 87.17105263157895, + "grad_norm": 1.5452516078948975, + "learning_rate": 0.0001, + "loss": 0.0241, + "step": 13250 + }, + { + "epoch": 87.23684210526316, + "grad_norm": 1.495526909828186, + "learning_rate": 0.0001, + "loss": 0.0244, + "step": 13260 + }, + { + "epoch": 87.30263157894737, + "grad_norm": 1.5925183296203613, + "learning_rate": 0.0001, + "loss": 0.0229, + "step": 13270 + }, + { + "epoch": 87.36842105263158, + "grad_norm": 1.58163321018219, + "learning_rate": 0.0001, + "loss": 0.0268, + "step": 13280 + }, + { + "epoch": 87.4342105263158, + "grad_norm": 1.8590866327285767, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 13290 + }, + { + "epoch": 87.5, + "grad_norm": 1.5774991512298584, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 13300 + }, + { + "epoch": 87.5657894736842, + "grad_norm": 1.7061364650726318, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 13310 + }, + { + "epoch": 87.63157894736842, + "grad_norm": 1.4900331497192383, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 13320 + }, + { + "epoch": 87.69736842105263, + "grad_norm": 1.8623180389404297, + "learning_rate": 0.0001, + "loss": 0.025, + "step": 13330 + }, + { + "epoch": 87.76315789473684, + "grad_norm": 1.6893125772476196, + "learning_rate": 0.0001, + "loss": 0.0243, + "step": 13340 + }, + { + "epoch": 87.82894736842105, + "grad_norm": 1.8551734685897827, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 13350 + }, + { + "epoch": 87.89473684210526, + "grad_norm": 1.7966662645339966, + "learning_rate": 0.0001, + "loss": 0.0231, + "step": 13360 + }, + { + "epoch": 87.96052631578948, + "grad_norm": 1.7751268148422241, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 13370 + }, + { + "epoch": 88.02631578947368, + "grad_norm": 1.7870049476623535, + "learning_rate": 0.0001, + "loss": 0.0226, + "step": 13380 + }, + { + "epoch": 88.09210526315789, + "grad_norm": 1.6607742309570312, + "learning_rate": 0.0001, + "loss": 0.0225, + "step": 13390 + }, + { + "epoch": 88.15789473684211, + "grad_norm": 1.734802007675171, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 13400 + }, + { + "epoch": 88.22368421052632, + "grad_norm": 2.1333210468292236, + "learning_rate": 0.0001, + "loss": 0.0222, + "step": 13410 + }, + { + "epoch": 88.28947368421052, + "grad_norm": 1.473213791847229, + "learning_rate": 0.0001, + "loss": 0.0244, + "step": 13420 + }, + { + "epoch": 88.35526315789474, + "grad_norm": 1.6745879650115967, + "learning_rate": 0.0001, + "loss": 0.0242, + "step": 13430 + }, + { + "epoch": 88.42105263157895, + "grad_norm": 1.3163137435913086, + "learning_rate": 0.0001, + "loss": 0.0248, + "step": 13440 + }, + { + "epoch": 88.48684210526316, + "grad_norm": 1.573905110359192, + "learning_rate": 0.0001, + "loss": 0.0266, + "step": 13450 + }, + { + "epoch": 88.55263157894737, + "grad_norm": 1.672709345817566, + "learning_rate": 0.0001, + "loss": 0.0238, + "step": 13460 + }, + { + "epoch": 88.61842105263158, + "grad_norm": 1.473910927772522, + "learning_rate": 0.0001, + "loss": 0.0262, + "step": 13470 + }, + { + "epoch": 88.6842105263158, + "grad_norm": 1.4131242036819458, + "learning_rate": 0.0001, + "loss": 0.0248, + "step": 13480 + }, + { + "epoch": 88.75, + "grad_norm": 1.6035963296890259, + "learning_rate": 0.0001, + "loss": 0.0227, + "step": 13490 + }, + { + "epoch": 88.8157894736842, + "grad_norm": 1.489512324333191, + "learning_rate": 0.0001, + "loss": 0.0238, + "step": 13500 + }, + { + "epoch": 88.88157894736842, + "grad_norm": 1.45292329788208, + "learning_rate": 0.0001, + "loss": 0.0223, + "step": 13510 + }, + { + "epoch": 88.94736842105263, + "grad_norm": 1.6451809406280518, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 13520 + }, + { + "epoch": 89.01315789473684, + "grad_norm": 1.826170563697815, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 13530 + }, + { + "epoch": 89.07894736842105, + "grad_norm": 1.5260390043258667, + "learning_rate": 0.0001, + "loss": 0.0223, + "step": 13540 + }, + { + "epoch": 89.14473684210526, + "grad_norm": 1.5344808101654053, + "learning_rate": 0.0001, + "loss": 0.0241, + "step": 13550 + }, + { + "epoch": 89.21052631578948, + "grad_norm": 1.3127132654190063, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 13560 + }, + { + "epoch": 89.27631578947368, + "grad_norm": 1.71023428440094, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 13570 + }, + { + "epoch": 89.34210526315789, + "grad_norm": 1.7097973823547363, + "learning_rate": 0.0001, + "loss": 0.0249, + "step": 13580 + }, + { + "epoch": 89.40789473684211, + "grad_norm": 1.8148266077041626, + "learning_rate": 0.0001, + "loss": 0.0258, + "step": 13590 + }, + { + "epoch": 89.47368421052632, + "grad_norm": 1.3944507837295532, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 13600 + }, + { + "epoch": 89.53947368421052, + "grad_norm": 1.421279788017273, + "learning_rate": 0.0001, + "loss": 0.0234, + "step": 13610 + }, + { + "epoch": 89.60526315789474, + "grad_norm": 1.6334819793701172, + "learning_rate": 0.0001, + "loss": 0.0257, + "step": 13620 + }, + { + "epoch": 89.67105263157895, + "grad_norm": 1.9979926347732544, + "learning_rate": 0.0001, + "loss": 0.0249, + "step": 13630 + }, + { + "epoch": 89.73684210526316, + "grad_norm": 1.5678966045379639, + "learning_rate": 0.0001, + "loss": 0.025, + "step": 13640 + }, + { + "epoch": 89.80263157894737, + "grad_norm": 1.6031328439712524, + "learning_rate": 0.0001, + "loss": 0.0241, + "step": 13650 + }, + { + "epoch": 89.86842105263158, + "grad_norm": 1.8089882135391235, + "learning_rate": 0.0001, + "loss": 0.0244, + "step": 13660 + }, + { + "epoch": 89.9342105263158, + "grad_norm": 1.2982338666915894, + "learning_rate": 0.0001, + "loss": 0.0233, + "step": 13670 + }, + { + "epoch": 90.0, + "grad_norm": 1.3500252962112427, + "learning_rate": 0.0001, + "loss": 0.025, + "step": 13680 + }, + { + "epoch": 90.0657894736842, + "grad_norm": 1.7127699851989746, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 13690 + }, + { + "epoch": 90.13157894736842, + "grad_norm": 1.1409753561019897, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 13700 + }, + { + "epoch": 90.19736842105263, + "grad_norm": 1.4835864305496216, + "learning_rate": 0.0001, + "loss": 0.0255, + "step": 13710 + }, + { + "epoch": 90.26315789473684, + "grad_norm": 1.676373839378357, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 13720 + }, + { + "epoch": 90.32894736842105, + "grad_norm": 1.247381329536438, + "learning_rate": 0.0001, + "loss": 0.0219, + "step": 13730 + }, + { + "epoch": 90.39473684210526, + "grad_norm": 1.1744779348373413, + "learning_rate": 0.0001, + "loss": 0.0234, + "step": 13740 + }, + { + "epoch": 90.46052631578948, + "grad_norm": 1.8650901317596436, + "learning_rate": 0.0001, + "loss": 0.025, + "step": 13750 + }, + { + "epoch": 90.52631578947368, + "grad_norm": 1.7591873407363892, + "learning_rate": 0.0001, + "loss": 0.0249, + "step": 13760 + }, + { + "epoch": 90.59210526315789, + "grad_norm": 1.6825592517852783, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 13770 + }, + { + "epoch": 90.65789473684211, + "grad_norm": 1.285715937614441, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 13780 + }, + { + "epoch": 90.72368421052632, + "grad_norm": 1.576466679573059, + "learning_rate": 0.0001, + "loss": 0.0256, + "step": 13790 + }, + { + "epoch": 90.78947368421052, + "grad_norm": 1.6532832384109497, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 13800 + }, + { + "epoch": 90.85526315789474, + "grad_norm": 1.4761638641357422, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 13810 + }, + { + "epoch": 90.92105263157895, + "grad_norm": 1.490132451057434, + "learning_rate": 0.0001, + "loss": 0.0248, + "step": 13820 + }, + { + "epoch": 90.98684210526316, + "grad_norm": 1.9364569187164307, + "learning_rate": 0.0001, + "loss": 0.0219, + "step": 13830 + }, + { + "epoch": 91.05263157894737, + "grad_norm": 1.6803781986236572, + "learning_rate": 0.0001, + "loss": 0.0254, + "step": 13840 + }, + { + "epoch": 91.11842105263158, + "grad_norm": 1.5090696811676025, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 13850 + }, + { + "epoch": 91.1842105263158, + "grad_norm": 1.7960859537124634, + "learning_rate": 0.0001, + "loss": 0.0252, + "step": 13860 + }, + { + "epoch": 91.25, + "grad_norm": 1.7104315757751465, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 13870 + }, + { + "epoch": 91.3157894736842, + "grad_norm": 1.793373703956604, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 13880 + }, + { + "epoch": 91.38157894736842, + "grad_norm": 2.0539634227752686, + "learning_rate": 0.0001, + "loss": 0.0248, + "step": 13890 + }, + { + "epoch": 91.44736842105263, + "grad_norm": 2.1168859004974365, + "learning_rate": 0.0001, + "loss": 0.0233, + "step": 13900 + }, + { + "epoch": 91.51315789473684, + "grad_norm": 1.8195390701293945, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 13910 + }, + { + "epoch": 91.57894736842105, + "grad_norm": 1.714077353477478, + "learning_rate": 0.0001, + "loss": 0.0225, + "step": 13920 + }, + { + "epoch": 91.64473684210526, + "grad_norm": 1.7049275636672974, + "learning_rate": 0.0001, + "loss": 0.0226, + "step": 13930 + }, + { + "epoch": 91.71052631578948, + "grad_norm": 2.158938407897949, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 13940 + }, + { + "epoch": 91.77631578947368, + "grad_norm": 2.1923866271972656, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 13950 + }, + { + "epoch": 91.84210526315789, + "grad_norm": 1.7512110471725464, + "learning_rate": 0.0001, + "loss": 0.0222, + "step": 13960 + }, + { + "epoch": 91.90789473684211, + "grad_norm": 1.8669230937957764, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 13970 + }, + { + "epoch": 91.97368421052632, + "grad_norm": 1.6305128335952759, + "learning_rate": 0.0001, + "loss": 0.0225, + "step": 13980 + }, + { + "epoch": 92.03947368421052, + "grad_norm": 1.5910475254058838, + "learning_rate": 0.0001, + "loss": 0.0229, + "step": 13990 + }, + { + "epoch": 92.10526315789474, + "grad_norm": 1.5273504257202148, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 14000 + }, + { + "epoch": 92.17105263157895, + "grad_norm": 1.7885582447052002, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 14010 + }, + { + "epoch": 92.23684210526316, + "grad_norm": 2.1910252571105957, + "learning_rate": 0.0001, + "loss": 0.0244, + "step": 14020 + }, + { + "epoch": 92.30263157894737, + "grad_norm": 1.755102515220642, + "learning_rate": 0.0001, + "loss": 0.0253, + "step": 14030 + }, + { + "epoch": 92.36842105263158, + "grad_norm": 1.6435348987579346, + "learning_rate": 0.0001, + "loss": 0.0266, + "step": 14040 + }, + { + "epoch": 92.4342105263158, + "grad_norm": 2.323174238204956, + "learning_rate": 0.0001, + "loss": 0.0234, + "step": 14050 + }, + { + "epoch": 92.5, + "grad_norm": 3.1456828117370605, + "learning_rate": 0.0001, + "loss": 0.0257, + "step": 14060 + }, + { + "epoch": 92.5657894736842, + "grad_norm": 2.456490993499756, + "learning_rate": 0.0001, + "loss": 0.0248, + "step": 14070 + }, + { + "epoch": 92.63157894736842, + "grad_norm": 1.9856231212615967, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 14080 + }, + { + "epoch": 92.69736842105263, + "grad_norm": 1.8175798654556274, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 14090 + }, + { + "epoch": 92.76315789473684, + "grad_norm": 1.905171513557434, + "learning_rate": 0.0001, + "loss": 0.0224, + "step": 14100 + }, + { + "epoch": 92.82894736842105, + "grad_norm": 1.7334188222885132, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 14110 + }, + { + "epoch": 92.89473684210526, + "grad_norm": 1.6266708374023438, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 14120 + }, + { + "epoch": 92.96052631578948, + "grad_norm": 1.4972394704818726, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 14130 + }, + { + "epoch": 93.02631578947368, + "grad_norm": 1.655664086341858, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 14140 + }, + { + "epoch": 93.09210526315789, + "grad_norm": 1.6464513540267944, + "learning_rate": 0.0001, + "loss": 0.0227, + "step": 14150 + }, + { + "epoch": 93.15789473684211, + "grad_norm": 1.4033807516098022, + "learning_rate": 0.0001, + "loss": 0.0238, + "step": 14160 + }, + { + "epoch": 93.22368421052632, + "grad_norm": 1.5057168006896973, + "learning_rate": 0.0001, + "loss": 0.0244, + "step": 14170 + }, + { + "epoch": 93.28947368421052, + "grad_norm": 1.735034465789795, + "learning_rate": 0.0001, + "loss": 0.0223, + "step": 14180 + }, + { + "epoch": 93.35526315789474, + "grad_norm": 1.6805418729782104, + "learning_rate": 0.0001, + "loss": 0.0248, + "step": 14190 + }, + { + "epoch": 93.42105263157895, + "grad_norm": 1.3809490203857422, + "learning_rate": 0.0001, + "loss": 0.0243, + "step": 14200 + }, + { + "epoch": 93.48684210526316, + "grad_norm": 1.5541129112243652, + "learning_rate": 0.0001, + "loss": 0.0241, + "step": 14210 + }, + { + "epoch": 93.55263157894737, + "grad_norm": 1.6721612215042114, + "learning_rate": 0.0001, + "loss": 0.0226, + "step": 14220 + }, + { + "epoch": 93.61842105263158, + "grad_norm": 1.4749832153320312, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 14230 + }, + { + "epoch": 93.6842105263158, + "grad_norm": 1.229355812072754, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 14240 + }, + { + "epoch": 93.75, + "grad_norm": 1.3654001951217651, + "learning_rate": 0.0001, + "loss": 0.0249, + "step": 14250 + }, + { + "epoch": 93.8157894736842, + "grad_norm": 1.3455557823181152, + "learning_rate": 0.0001, + "loss": 0.0245, + "step": 14260 + }, + { + "epoch": 93.88157894736842, + "grad_norm": 1.2095595598220825, + "learning_rate": 0.0001, + "loss": 0.0233, + "step": 14270 + }, + { + "epoch": 93.94736842105263, + "grad_norm": 1.2949072122573853, + "learning_rate": 0.0001, + "loss": 0.0252, + "step": 14280 + }, + { + "epoch": 94.01315789473684, + "grad_norm": 1.7537165880203247, + "learning_rate": 0.0001, + "loss": 0.0238, + "step": 14290 + }, + { + "epoch": 94.07894736842105, + "grad_norm": 1.4385466575622559, + "learning_rate": 0.0001, + "loss": 0.0282, + "step": 14300 + }, + { + "epoch": 94.14473684210526, + "grad_norm": 1.1989967823028564, + "learning_rate": 0.0001, + "loss": 0.0238, + "step": 14310 + }, + { + "epoch": 94.21052631578948, + "grad_norm": 1.478391408920288, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 14320 + }, + { + "epoch": 94.27631578947368, + "grad_norm": 1.5552846193313599, + "learning_rate": 0.0001, + "loss": 0.0229, + "step": 14330 + }, + { + "epoch": 94.34210526315789, + "grad_norm": 1.5184147357940674, + "learning_rate": 0.0001, + "loss": 0.0245, + "step": 14340 + }, + { + "epoch": 94.40789473684211, + "grad_norm": 1.4583368301391602, + "learning_rate": 0.0001, + "loss": 0.0225, + "step": 14350 + }, + { + "epoch": 94.47368421052632, + "grad_norm": 1.5319230556488037, + "learning_rate": 0.0001, + "loss": 0.0243, + "step": 14360 + }, + { + "epoch": 94.53947368421052, + "grad_norm": 1.505807876586914, + "learning_rate": 0.0001, + "loss": 0.0216, + "step": 14370 + }, + { + "epoch": 94.60526315789474, + "grad_norm": 1.2531390190124512, + "learning_rate": 0.0001, + "loss": 0.0243, + "step": 14380 + }, + { + "epoch": 94.67105263157895, + "grad_norm": 1.9696604013442993, + "learning_rate": 0.0001, + "loss": 0.0247, + "step": 14390 + }, + { + "epoch": 94.73684210526316, + "grad_norm": 1.7615699768066406, + "learning_rate": 0.0001, + "loss": 0.0225, + "step": 14400 + }, + { + "epoch": 94.80263157894737, + "grad_norm": 1.9439195394515991, + "learning_rate": 0.0001, + "loss": 0.0255, + "step": 14410 + }, + { + "epoch": 94.86842105263158, + "grad_norm": 2.943202018737793, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 14420 + }, + { + "epoch": 94.9342105263158, + "grad_norm": 1.8300869464874268, + "learning_rate": 0.0001, + "loss": 0.0239, + "step": 14430 + }, + { + "epoch": 95.0, + "grad_norm": 2.4264817237854004, + "learning_rate": 0.0001, + "loss": 0.0246, + "step": 14440 + }, + { + "epoch": 95.0657894736842, + "grad_norm": 2.130911350250244, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 14450 + }, + { + "epoch": 95.13157894736842, + "grad_norm": 2.132805109024048, + "learning_rate": 0.0001, + "loss": 0.0219, + "step": 14460 + }, + { + "epoch": 95.19736842105263, + "grad_norm": 2.0448882579803467, + "learning_rate": 0.0001, + "loss": 0.0216, + "step": 14470 + }, + { + "epoch": 95.26315789473684, + "grad_norm": 1.6117432117462158, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 14480 + }, + { + "epoch": 95.32894736842105, + "grad_norm": 1.5426193475723267, + "learning_rate": 0.0001, + "loss": 0.0226, + "step": 14490 + }, + { + "epoch": 95.39473684210526, + "grad_norm": 1.6238819360733032, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 14500 + }, + { + "epoch": 95.46052631578948, + "grad_norm": 1.608115553855896, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 14510 + }, + { + "epoch": 95.52631578947368, + "grad_norm": 1.6221609115600586, + "learning_rate": 0.0001, + "loss": 0.0223, + "step": 14520 + }, + { + "epoch": 95.59210526315789, + "grad_norm": 1.772195816040039, + "learning_rate": 0.0001, + "loss": 0.0225, + "step": 14530 + }, + { + "epoch": 95.65789473684211, + "grad_norm": 1.5958181619644165, + "learning_rate": 0.0001, + "loss": 0.0254, + "step": 14540 + }, + { + "epoch": 95.72368421052632, + "grad_norm": 1.7433152198791504, + "learning_rate": 0.0001, + "loss": 0.0249, + "step": 14550 + }, + { + "epoch": 95.78947368421052, + "grad_norm": 1.7404011487960815, + "learning_rate": 0.0001, + "loss": 0.0214, + "step": 14560 + }, + { + "epoch": 95.85526315789474, + "grad_norm": 1.8106964826583862, + "learning_rate": 0.0001, + "loss": 0.0252, + "step": 14570 + }, + { + "epoch": 95.92105263157895, + "grad_norm": 1.8747131824493408, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 14580 + }, + { + "epoch": 95.98684210526316, + "grad_norm": 1.9325027465820312, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 14590 + }, + { + "epoch": 96.05263157894737, + "grad_norm": 1.6936956644058228, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 14600 + }, + { + "epoch": 96.11842105263158, + "grad_norm": 1.6250704526901245, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 14610 + }, + { + "epoch": 96.1842105263158, + "grad_norm": 2.4609410762786865, + "learning_rate": 0.0001, + "loss": 0.0249, + "step": 14620 + }, + { + "epoch": 96.25, + "grad_norm": 2.147636890411377, + "learning_rate": 0.0001, + "loss": 0.0224, + "step": 14630 + }, + { + "epoch": 96.3157894736842, + "grad_norm": 1.605124831199646, + "learning_rate": 0.0001, + "loss": 0.0213, + "step": 14640 + }, + { + "epoch": 96.38157894736842, + "grad_norm": 1.8906571865081787, + "learning_rate": 0.0001, + "loss": 0.0232, + "step": 14650 + }, + { + "epoch": 96.44736842105263, + "grad_norm": 1.9277275800704956, + "learning_rate": 0.0001, + "loss": 0.0216, + "step": 14660 + }, + { + "epoch": 96.51315789473684, + "grad_norm": 1.7315579652786255, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 14670 + }, + { + "epoch": 96.57894736842105, + "grad_norm": 1.8327442407608032, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 14680 + }, + { + "epoch": 96.64473684210526, + "grad_norm": 1.5039429664611816, + "learning_rate": 0.0001, + "loss": 0.0231, + "step": 14690 + }, + { + "epoch": 96.71052631578948, + "grad_norm": 1.958951473236084, + "learning_rate": 0.0001, + "loss": 0.0229, + "step": 14700 + }, + { + "epoch": 96.77631578947368, + "grad_norm": 1.7764480113983154, + "learning_rate": 0.0001, + "loss": 0.0222, + "step": 14710 + }, + { + "epoch": 96.84210526315789, + "grad_norm": 1.4950709342956543, + "learning_rate": 0.0001, + "loss": 0.0243, + "step": 14720 + }, + { + "epoch": 96.90789473684211, + "grad_norm": 2.5329701900482178, + "learning_rate": 0.0001, + "loss": 0.0242, + "step": 14730 + }, + { + "epoch": 96.97368421052632, + "grad_norm": 1.7658509016036987, + "learning_rate": 0.0001, + "loss": 0.0226, + "step": 14740 + }, + { + "epoch": 97.03947368421052, + "grad_norm": 1.8286138772964478, + "learning_rate": 0.0001, + "loss": 0.0239, + "step": 14750 + }, + { + "epoch": 97.10526315789474, + "grad_norm": 1.5775742530822754, + "learning_rate": 0.0001, + "loss": 0.0227, + "step": 14760 + }, + { + "epoch": 97.17105263157895, + "grad_norm": 1.4077131748199463, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 14770 + }, + { + "epoch": 97.23684210526316, + "grad_norm": 1.6287063360214233, + "learning_rate": 0.0001, + "loss": 0.0229, + "step": 14780 + }, + { + "epoch": 97.30263157894737, + "grad_norm": 1.9090032577514648, + "learning_rate": 0.0001, + "loss": 0.0231, + "step": 14790 + }, + { + "epoch": 97.36842105263158, + "grad_norm": 1.7820619344711304, + "learning_rate": 0.0001, + "loss": 0.0213, + "step": 14800 + }, + { + "epoch": 97.4342105263158, + "grad_norm": 1.4791796207427979, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 14810 + }, + { + "epoch": 97.5, + "grad_norm": 1.6727066040039062, + "learning_rate": 0.0001, + "loss": 0.0229, + "step": 14820 + }, + { + "epoch": 97.5657894736842, + "grad_norm": 1.720504641532898, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 14830 + }, + { + "epoch": 97.63157894736842, + "grad_norm": 2.1600022315979004, + "learning_rate": 0.0001, + "loss": 0.0229, + "step": 14840 + }, + { + "epoch": 97.69736842105263, + "grad_norm": 2.139911413192749, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 14850 + }, + { + "epoch": 97.76315789473684, + "grad_norm": 1.762117624282837, + "learning_rate": 0.0001, + "loss": 0.021, + "step": 14860 + }, + { + "epoch": 97.82894736842105, + "grad_norm": 1.6421223878860474, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 14870 + }, + { + "epoch": 97.89473684210526, + "grad_norm": 1.2390879392623901, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 14880 + }, + { + "epoch": 97.96052631578948, + "grad_norm": 1.3071866035461426, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 14890 + }, + { + "epoch": 98.02631578947368, + "grad_norm": 1.6224156618118286, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 14900 + }, + { + "epoch": 98.09210526315789, + "grad_norm": 1.703869342803955, + "learning_rate": 0.0001, + "loss": 0.0232, + "step": 14910 + }, + { + "epoch": 98.15789473684211, + "grad_norm": 1.475963830947876, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 14920 + }, + { + "epoch": 98.22368421052632, + "grad_norm": 1.451790690422058, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 14930 + }, + { + "epoch": 98.28947368421052, + "grad_norm": 1.3597410917282104, + "learning_rate": 0.0001, + "loss": 0.0225, + "step": 14940 + }, + { + "epoch": 98.35526315789474, + "grad_norm": 1.047415018081665, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 14950 + }, + { + "epoch": 98.42105263157895, + "grad_norm": 1.6275583505630493, + "learning_rate": 0.0001, + "loss": 0.0233, + "step": 14960 + }, + { + "epoch": 98.48684210526316, + "grad_norm": 1.3515644073486328, + "learning_rate": 0.0001, + "loss": 0.0232, + "step": 14970 + }, + { + "epoch": 98.55263157894737, + "grad_norm": 1.5167436599731445, + "learning_rate": 0.0001, + "loss": 0.0245, + "step": 14980 + }, + { + "epoch": 98.61842105263158, + "grad_norm": 1.5004242658615112, + "learning_rate": 0.0001, + "loss": 0.0246, + "step": 14990 + }, + { + "epoch": 98.6842105263158, + "grad_norm": 1.2739217281341553, + "learning_rate": 0.0001, + "loss": 0.0234, + "step": 15000 + }, + { + "epoch": 98.75, + "grad_norm": 1.6664555072784424, + "learning_rate": 0.0001, + "loss": 0.0252, + "step": 15010 + }, + { + "epoch": 98.8157894736842, + "grad_norm": 1.635536789894104, + "learning_rate": 0.0001, + "loss": 0.0245, + "step": 15020 + }, + { + "epoch": 98.88157894736842, + "grad_norm": 1.3948959112167358, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 15030 + }, + { + "epoch": 98.94736842105263, + "grad_norm": 1.6810851097106934, + "learning_rate": 0.0001, + "loss": 0.0249, + "step": 15040 + }, + { + "epoch": 99.01315789473684, + "grad_norm": 1.4952811002731323, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 15050 + }, + { + "epoch": 99.07894736842105, + "grad_norm": 1.319809913635254, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 15060 + }, + { + "epoch": 99.14473684210526, + "grad_norm": 1.7210019826889038, + "learning_rate": 0.0001, + "loss": 0.0233, + "step": 15070 + }, + { + "epoch": 99.21052631578948, + "grad_norm": 1.4392873048782349, + "learning_rate": 0.0001, + "loss": 0.0234, + "step": 15080 + }, + { + "epoch": 99.27631578947368, + "grad_norm": 1.3925666809082031, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 15090 + }, + { + "epoch": 99.34210526315789, + "grad_norm": 1.5854908227920532, + "learning_rate": 0.0001, + "loss": 0.0234, + "step": 15100 + }, + { + "epoch": 99.40789473684211, + "grad_norm": 1.3266748189926147, + "learning_rate": 0.0001, + "loss": 0.0228, + "step": 15110 + }, + { + "epoch": 99.47368421052632, + "grad_norm": 1.3466399908065796, + "learning_rate": 0.0001, + "loss": 0.0261, + "step": 15120 + }, + { + "epoch": 99.53947368421052, + "grad_norm": 1.1351549625396729, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 15130 + }, + { + "epoch": 99.60526315789474, + "grad_norm": 1.2604902982711792, + "learning_rate": 0.0001, + "loss": 0.0239, + "step": 15140 + }, + { + "epoch": 99.67105263157895, + "grad_norm": 1.4526969194412231, + "learning_rate": 0.0001, + "loss": 0.0282, + "step": 15150 + }, + { + "epoch": 99.73684210526316, + "grad_norm": 1.7058517932891846, + "learning_rate": 0.0001, + "loss": 0.0252, + "step": 15160 + }, + { + "epoch": 99.80263157894737, + "grad_norm": 1.7141302824020386, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 15170 + }, + { + "epoch": 99.86842105263158, + "grad_norm": 1.6191295385360718, + "learning_rate": 0.0001, + "loss": 0.0247, + "step": 15180 + }, + { + "epoch": 99.9342105263158, + "grad_norm": 1.258483648300171, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 15190 + }, + { + "epoch": 100.0, + "grad_norm": 1.4220373630523682, + "learning_rate": 0.0001, + "loss": 0.0234, + "step": 15200 + }, + { + "epoch": 100.0657894736842, + "grad_norm": 1.295554518699646, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 15210 + }, + { + "epoch": 100.13157894736842, + "grad_norm": 1.5223777294158936, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 15220 + }, + { + "epoch": 100.19736842105263, + "grad_norm": 0.9295150637626648, + "learning_rate": 0.0001, + "loss": 0.0232, + "step": 15230 + }, + { + "epoch": 100.26315789473684, + "grad_norm": 1.522321105003357, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 15240 + }, + { + "epoch": 100.32894736842105, + "grad_norm": 1.380570650100708, + "learning_rate": 0.0001, + "loss": 0.0234, + "step": 15250 + }, + { + "epoch": 100.39473684210526, + "grad_norm": 1.745607614517212, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 15260 + }, + { + "epoch": 100.46052631578948, + "grad_norm": 1.6587855815887451, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 15270 + }, + { + "epoch": 100.52631578947368, + "grad_norm": 1.592822551727295, + "learning_rate": 0.0001, + "loss": 0.0232, + "step": 15280 + }, + { + "epoch": 100.59210526315789, + "grad_norm": 1.649880290031433, + "learning_rate": 0.0001, + "loss": 0.0226, + "step": 15290 + }, + { + "epoch": 100.65789473684211, + "grad_norm": 1.350723385810852, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 15300 + }, + { + "epoch": 100.72368421052632, + "grad_norm": 1.5590640306472778, + "learning_rate": 0.0001, + "loss": 0.0244, + "step": 15310 + }, + { + "epoch": 100.78947368421052, + "grad_norm": 1.4502315521240234, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 15320 + }, + { + "epoch": 100.85526315789474, + "grad_norm": 1.206421136856079, + "learning_rate": 0.0001, + "loss": 0.0256, + "step": 15330 + }, + { + "epoch": 100.92105263157895, + "grad_norm": 1.6681444644927979, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 15340 + }, + { + "epoch": 100.98684210526316, + "grad_norm": 1.7128868103027344, + "learning_rate": 0.0001, + "loss": 0.0277, + "step": 15350 + }, + { + "epoch": 101.05263157894737, + "grad_norm": 1.7957628965377808, + "learning_rate": 0.0001, + "loss": 0.0261, + "step": 15360 + }, + { + "epoch": 101.11842105263158, + "grad_norm": 1.8973162174224854, + "learning_rate": 0.0001, + "loss": 0.0247, + "step": 15370 + }, + { + "epoch": 101.1842105263158, + "grad_norm": 1.4725027084350586, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 15380 + }, + { + "epoch": 101.25, + "grad_norm": 1.2609061002731323, + "learning_rate": 0.0001, + "loss": 0.0253, + "step": 15390 + }, + { + "epoch": 101.3157894736842, + "grad_norm": 1.3644044399261475, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 15400 + }, + { + "epoch": 101.38157894736842, + "grad_norm": 1.4843460321426392, + "learning_rate": 0.0001, + "loss": 0.0232, + "step": 15410 + }, + { + "epoch": 101.44736842105263, + "grad_norm": 1.5529197454452515, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 15420 + }, + { + "epoch": 101.51315789473684, + "grad_norm": 1.5046693086624146, + "learning_rate": 0.0001, + "loss": 0.0243, + "step": 15430 + }, + { + "epoch": 101.57894736842105, + "grad_norm": 1.6078213453292847, + "learning_rate": 0.0001, + "loss": 0.0223, + "step": 15440 + }, + { + "epoch": 101.64473684210526, + "grad_norm": 1.9121206998825073, + "learning_rate": 0.0001, + "loss": 0.0241, + "step": 15450 + }, + { + "epoch": 101.71052631578948, + "grad_norm": 1.69439697265625, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 15460 + }, + { + "epoch": 101.77631578947368, + "grad_norm": 1.3743191957473755, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 15470 + }, + { + "epoch": 101.84210526315789, + "grad_norm": 1.4057716131210327, + "learning_rate": 0.0001, + "loss": 0.0238, + "step": 15480 + }, + { + "epoch": 101.90789473684211, + "grad_norm": 1.2741807699203491, + "learning_rate": 0.0001, + "loss": 0.0246, + "step": 15490 + }, + { + "epoch": 101.97368421052632, + "grad_norm": 1.209885597229004, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 15500 + }, + { + "epoch": 102.03947368421052, + "grad_norm": 1.3488867282867432, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 15510 + }, + { + "epoch": 102.10526315789474, + "grad_norm": 1.4938287734985352, + "learning_rate": 0.0001, + "loss": 0.025, + "step": 15520 + }, + { + "epoch": 102.17105263157895, + "grad_norm": 1.3326101303100586, + "learning_rate": 0.0001, + "loss": 0.0245, + "step": 15530 + }, + { + "epoch": 102.23684210526316, + "grad_norm": 1.462449550628662, + "learning_rate": 0.0001, + "loss": 0.0244, + "step": 15540 + }, + { + "epoch": 102.30263157894737, + "grad_norm": 1.2210637331008911, + "learning_rate": 0.0001, + "loss": 0.0244, + "step": 15550 + }, + { + "epoch": 102.36842105263158, + "grad_norm": 1.4737353324890137, + "learning_rate": 0.0001, + "loss": 0.0225, + "step": 15560 + }, + { + "epoch": 102.4342105263158, + "grad_norm": 1.4356292486190796, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 15570 + }, + { + "epoch": 102.5, + "grad_norm": 1.472893238067627, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 15580 + }, + { + "epoch": 102.5657894736842, + "grad_norm": 1.5573779344558716, + "learning_rate": 0.0001, + "loss": 0.0214, + "step": 15590 + }, + { + "epoch": 102.63157894736842, + "grad_norm": 1.5440502166748047, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 15600 + }, + { + "epoch": 102.69736842105263, + "grad_norm": 1.5142933130264282, + "learning_rate": 0.0001, + "loss": 0.0224, + "step": 15610 + }, + { + "epoch": 102.76315789473684, + "grad_norm": 1.6982897520065308, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 15620 + }, + { + "epoch": 102.82894736842105, + "grad_norm": 1.8263667821884155, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 15630 + }, + { + "epoch": 102.89473684210526, + "grad_norm": 1.2521591186523438, + "learning_rate": 0.0001, + "loss": 0.0246, + "step": 15640 + }, + { + "epoch": 102.96052631578948, + "grad_norm": 1.5161454677581787, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 15650 + }, + { + "epoch": 103.02631578947368, + "grad_norm": 1.4727702140808105, + "learning_rate": 0.0001, + "loss": 0.0224, + "step": 15660 + }, + { + "epoch": 103.09210526315789, + "grad_norm": 1.3148939609527588, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 15670 + }, + { + "epoch": 103.15789473684211, + "grad_norm": 1.3606066703796387, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 15680 + }, + { + "epoch": 103.22368421052632, + "grad_norm": 1.4023685455322266, + "learning_rate": 0.0001, + "loss": 0.0225, + "step": 15690 + }, + { + "epoch": 103.28947368421052, + "grad_norm": 1.424401879310608, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 15700 + }, + { + "epoch": 103.35526315789474, + "grad_norm": 1.1225404739379883, + "learning_rate": 0.0001, + "loss": 0.0228, + "step": 15710 + }, + { + "epoch": 103.42105263157895, + "grad_norm": 1.4364515542984009, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 15720 + }, + { + "epoch": 103.48684210526316, + "grad_norm": 1.8251726627349854, + "learning_rate": 0.0001, + "loss": 0.025, + "step": 15730 + }, + { + "epoch": 103.55263157894737, + "grad_norm": 1.6310752630233765, + "learning_rate": 0.0001, + "loss": 0.0222, + "step": 15740 + }, + { + "epoch": 103.61842105263158, + "grad_norm": 1.3311368227005005, + "learning_rate": 0.0001, + "loss": 0.0229, + "step": 15750 + }, + { + "epoch": 103.6842105263158, + "grad_norm": 1.7061165571212769, + "learning_rate": 0.0001, + "loss": 0.0233, + "step": 15760 + }, + { + "epoch": 103.75, + "grad_norm": 1.737336277961731, + "learning_rate": 0.0001, + "loss": 0.0231, + "step": 15770 + }, + { + "epoch": 103.8157894736842, + "grad_norm": 1.429793119430542, + "learning_rate": 0.0001, + "loss": 0.0224, + "step": 15780 + }, + { + "epoch": 103.88157894736842, + "grad_norm": 1.2859523296356201, + "learning_rate": 0.0001, + "loss": 0.0249, + "step": 15790 + }, + { + "epoch": 103.94736842105263, + "grad_norm": 1.5139811038970947, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 15800 + }, + { + "epoch": 104.01315789473684, + "grad_norm": 1.5388349294662476, + "learning_rate": 0.0001, + "loss": 0.0227, + "step": 15810 + }, + { + "epoch": 104.07894736842105, + "grad_norm": 1.2440307140350342, + "learning_rate": 0.0001, + "loss": 0.021, + "step": 15820 + }, + { + "epoch": 104.14473684210526, + "grad_norm": 1.590995192527771, + "learning_rate": 0.0001, + "loss": 0.0219, + "step": 15830 + }, + { + "epoch": 104.21052631578948, + "grad_norm": 1.5027881860733032, + "learning_rate": 0.0001, + "loss": 0.0227, + "step": 15840 + }, + { + "epoch": 104.27631578947368, + "grad_norm": 1.2226101160049438, + "learning_rate": 0.0001, + "loss": 0.0244, + "step": 15850 + }, + { + "epoch": 104.34210526315789, + "grad_norm": 1.3308379650115967, + "learning_rate": 0.0001, + "loss": 0.0227, + "step": 15860 + }, + { + "epoch": 104.40789473684211, + "grad_norm": 1.2079100608825684, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 15870 + }, + { + "epoch": 104.47368421052632, + "grad_norm": 1.1981853246688843, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 15880 + }, + { + "epoch": 104.53947368421052, + "grad_norm": 1.3664216995239258, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 15890 + }, + { + "epoch": 104.60526315789474, + "grad_norm": 1.8628100156784058, + "learning_rate": 0.0001, + "loss": 0.0234, + "step": 15900 + }, + { + "epoch": 104.67105263157895, + "grad_norm": 1.2317039966583252, + "learning_rate": 0.0001, + "loss": 0.0255, + "step": 15910 + }, + { + "epoch": 104.73684210526316, + "grad_norm": 1.513042688369751, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 15920 + }, + { + "epoch": 104.80263157894737, + "grad_norm": 1.6402066946029663, + "learning_rate": 0.0001, + "loss": 0.0226, + "step": 15930 + }, + { + "epoch": 104.86842105263158, + "grad_norm": 1.7423036098480225, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 15940 + }, + { + "epoch": 104.9342105263158, + "grad_norm": 1.2021863460540771, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 15950 + }, + { + "epoch": 105.0, + "grad_norm": 1.5771297216415405, + "learning_rate": 0.0001, + "loss": 0.0243, + "step": 15960 + }, + { + "epoch": 105.0657894736842, + "grad_norm": 1.2136021852493286, + "learning_rate": 0.0001, + "loss": 0.0233, + "step": 15970 + }, + { + "epoch": 105.13157894736842, + "grad_norm": 1.7530934810638428, + "learning_rate": 0.0001, + "loss": 0.0232, + "step": 15980 + }, + { + "epoch": 105.19736842105263, + "grad_norm": 1.727021336555481, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 15990 + }, + { + "epoch": 105.26315789473684, + "grad_norm": 1.689032793045044, + "learning_rate": 0.0001, + "loss": 0.0233, + "step": 16000 + }, + { + "epoch": 105.32894736842105, + "grad_norm": 2.0343551635742188, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 16010 + }, + { + "epoch": 105.39473684210526, + "grad_norm": 1.865867257118225, + "learning_rate": 0.0001, + "loss": 0.0246, + "step": 16020 + }, + { + "epoch": 105.46052631578948, + "grad_norm": 1.5226266384124756, + "learning_rate": 0.0001, + "loss": 0.0223, + "step": 16030 + }, + { + "epoch": 105.52631578947368, + "grad_norm": 1.8167226314544678, + "learning_rate": 0.0001, + "loss": 0.0231, + "step": 16040 + }, + { + "epoch": 105.59210526315789, + "grad_norm": 1.2684314250946045, + "learning_rate": 0.0001, + "loss": 0.0226, + "step": 16050 + }, + { + "epoch": 105.65789473684211, + "grad_norm": 1.7727292776107788, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 16060 + }, + { + "epoch": 105.72368421052632, + "grad_norm": 1.3268436193466187, + "learning_rate": 0.0001, + "loss": 0.0225, + "step": 16070 + }, + { + "epoch": 105.78947368421052, + "grad_norm": 1.5590864419937134, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 16080 + }, + { + "epoch": 105.85526315789474, + "grad_norm": 1.4346693754196167, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 16090 + }, + { + "epoch": 105.92105263157895, + "grad_norm": 1.5737860202789307, + "learning_rate": 0.0001, + "loss": 0.021, + "step": 16100 + }, + { + "epoch": 105.98684210526316, + "grad_norm": 1.5371108055114746, + "learning_rate": 0.0001, + "loss": 0.0228, + "step": 16110 + }, + { + "epoch": 106.05263157894737, + "grad_norm": 1.2796305418014526, + "learning_rate": 0.0001, + "loss": 0.0222, + "step": 16120 + }, + { + "epoch": 106.11842105263158, + "grad_norm": 1.506816029548645, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 16130 + }, + { + "epoch": 106.1842105263158, + "grad_norm": 1.4355484247207642, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 16140 + }, + { + "epoch": 106.25, + "grad_norm": 1.6774417161941528, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 16150 + }, + { + "epoch": 106.3157894736842, + "grad_norm": 1.5477979183197021, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 16160 + }, + { + "epoch": 106.38157894736842, + "grad_norm": 1.6077028512954712, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 16170 + }, + { + "epoch": 106.44736842105263, + "grad_norm": 1.9327597618103027, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 16180 + }, + { + "epoch": 106.51315789473684, + "grad_norm": 1.2858346700668335, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 16190 + }, + { + "epoch": 106.57894736842105, + "grad_norm": 1.3751716613769531, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 16200 + }, + { + "epoch": 106.64473684210526, + "grad_norm": 1.340836763381958, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 16210 + }, + { + "epoch": 106.71052631578948, + "grad_norm": 1.5455154180526733, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 16220 + }, + { + "epoch": 106.77631578947368, + "grad_norm": 1.066322922706604, + "learning_rate": 0.0001, + "loss": 0.0219, + "step": 16230 + }, + { + "epoch": 106.84210526315789, + "grad_norm": 1.5077837705612183, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 16240 + }, + { + "epoch": 106.90789473684211, + "grad_norm": 1.211094856262207, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 16250 + }, + { + "epoch": 106.97368421052632, + "grad_norm": 2.008754014968872, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 16260 + }, + { + "epoch": 107.03947368421052, + "grad_norm": 1.3147034645080566, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 16270 + }, + { + "epoch": 107.10526315789474, + "grad_norm": 1.1280360221862793, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 16280 + }, + { + "epoch": 107.17105263157895, + "grad_norm": 1.3991897106170654, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 16290 + }, + { + "epoch": 107.23684210526316, + "grad_norm": 1.2809622287750244, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 16300 + }, + { + "epoch": 107.30263157894737, + "grad_norm": 1.4164490699768066, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 16310 + }, + { + "epoch": 107.36842105263158, + "grad_norm": 1.374470829963684, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 16320 + }, + { + "epoch": 107.4342105263158, + "grad_norm": 1.3295577764511108, + "learning_rate": 0.0001, + "loss": 0.0227, + "step": 16330 + }, + { + "epoch": 107.5, + "grad_norm": 1.4546465873718262, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 16340 + }, + { + "epoch": 107.5657894736842, + "grad_norm": 1.6725012063980103, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 16350 + }, + { + "epoch": 107.63157894736842, + "grad_norm": 1.4987870454788208, + "learning_rate": 0.0001, + "loss": 0.0231, + "step": 16360 + }, + { + "epoch": 107.69736842105263, + "grad_norm": 1.4768329858779907, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 16370 + }, + { + "epoch": 107.76315789473684, + "grad_norm": 1.509992003440857, + "learning_rate": 0.0001, + "loss": 0.0194, + "step": 16380 + }, + { + "epoch": 107.82894736842105, + "grad_norm": 1.5696868896484375, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 16390 + }, + { + "epoch": 107.89473684210526, + "grad_norm": 1.9608714580535889, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 16400 + }, + { + "epoch": 107.96052631578948, + "grad_norm": 1.597530722618103, + "learning_rate": 0.0001, + "loss": 0.021, + "step": 16410 + }, + { + "epoch": 108.02631578947368, + "grad_norm": 1.544816493988037, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 16420 + }, + { + "epoch": 108.09210526315789, + "grad_norm": 1.8198386430740356, + "learning_rate": 0.0001, + "loss": 0.0233, + "step": 16430 + }, + { + "epoch": 108.15789473684211, + "grad_norm": 1.1581134796142578, + "learning_rate": 0.0001, + "loss": 0.0214, + "step": 16440 + }, + { + "epoch": 108.22368421052632, + "grad_norm": 1.273421049118042, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 16450 + }, + { + "epoch": 108.28947368421052, + "grad_norm": 1.438262939453125, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 16460 + }, + { + "epoch": 108.35526315789474, + "grad_norm": 1.5786799192428589, + "learning_rate": 0.0001, + "loss": 0.0234, + "step": 16470 + }, + { + "epoch": 108.42105263157895, + "grad_norm": 1.8563624620437622, + "learning_rate": 0.0001, + "loss": 0.0251, + "step": 16480 + }, + { + "epoch": 108.48684210526316, + "grad_norm": 1.6424920558929443, + "learning_rate": 0.0001, + "loss": 0.0231, + "step": 16490 + }, + { + "epoch": 108.55263157894737, + "grad_norm": 1.5354336500167847, + "learning_rate": 0.0001, + "loss": 0.0238, + "step": 16500 + }, + { + "epoch": 108.61842105263158, + "grad_norm": 1.7186359167099, + "learning_rate": 0.0001, + "loss": 0.025, + "step": 16510 + }, + { + "epoch": 108.6842105263158, + "grad_norm": 1.3204076290130615, + "learning_rate": 0.0001, + "loss": 0.0232, + "step": 16520 + }, + { + "epoch": 108.75, + "grad_norm": 1.1582022905349731, + "learning_rate": 0.0001, + "loss": 0.0227, + "step": 16530 + }, + { + "epoch": 108.8157894736842, + "grad_norm": 1.2648552656173706, + "learning_rate": 0.0001, + "loss": 0.0242, + "step": 16540 + }, + { + "epoch": 108.88157894736842, + "grad_norm": 1.2796598672866821, + "learning_rate": 0.0001, + "loss": 0.0241, + "step": 16550 + }, + { + "epoch": 108.94736842105263, + "grad_norm": 1.9214965105056763, + "learning_rate": 0.0001, + "loss": 0.0244, + "step": 16560 + }, + { + "epoch": 109.01315789473684, + "grad_norm": 1.449998140335083, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 16570 + }, + { + "epoch": 109.07894736842105, + "grad_norm": 1.1618794202804565, + "learning_rate": 0.0001, + "loss": 0.0249, + "step": 16580 + }, + { + "epoch": 109.14473684210526, + "grad_norm": 1.4211252927780151, + "learning_rate": 0.0001, + "loss": 0.0224, + "step": 16590 + }, + { + "epoch": 109.21052631578948, + "grad_norm": 1.3328765630722046, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 16600 + }, + { + "epoch": 109.27631578947368, + "grad_norm": 1.715414047241211, + "learning_rate": 0.0001, + "loss": 0.0233, + "step": 16610 + }, + { + "epoch": 109.34210526315789, + "grad_norm": 1.2124247550964355, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 16620 + }, + { + "epoch": 109.40789473684211, + "grad_norm": 1.3251482248306274, + "learning_rate": 0.0001, + "loss": 0.0229, + "step": 16630 + }, + { + "epoch": 109.47368421052632, + "grad_norm": 1.4357656240463257, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 16640 + }, + { + "epoch": 109.53947368421052, + "grad_norm": 1.2938634157180786, + "learning_rate": 0.0001, + "loss": 0.0248, + "step": 16650 + }, + { + "epoch": 109.60526315789474, + "grad_norm": 1.2677600383758545, + "learning_rate": 0.0001, + "loss": 0.0233, + "step": 16660 + }, + { + "epoch": 109.67105263157895, + "grad_norm": 1.1516252756118774, + "learning_rate": 0.0001, + "loss": 0.0245, + "step": 16670 + }, + { + "epoch": 109.73684210526316, + "grad_norm": 1.4747333526611328, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 16680 + }, + { + "epoch": 109.80263157894737, + "grad_norm": 1.3867018222808838, + "learning_rate": 0.0001, + "loss": 0.0214, + "step": 16690 + }, + { + "epoch": 109.86842105263158, + "grad_norm": 1.8688561916351318, + "learning_rate": 0.0001, + "loss": 0.0233, + "step": 16700 + }, + { + "epoch": 109.9342105263158, + "grad_norm": 1.686368465423584, + "learning_rate": 0.0001, + "loss": 0.0246, + "step": 16710 + }, + { + "epoch": 110.0, + "grad_norm": 1.311707615852356, + "learning_rate": 0.0001, + "loss": 0.021, + "step": 16720 + }, + { + "epoch": 110.0657894736842, + "grad_norm": 1.1597585678100586, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 16730 + }, + { + "epoch": 110.13157894736842, + "grad_norm": 1.41483736038208, + "learning_rate": 0.0001, + "loss": 0.0239, + "step": 16740 + }, + { + "epoch": 110.19736842105263, + "grad_norm": 1.4122625589370728, + "learning_rate": 0.0001, + "loss": 0.0224, + "step": 16750 + }, + { + "epoch": 110.26315789473684, + "grad_norm": 1.336828589439392, + "learning_rate": 0.0001, + "loss": 0.0248, + "step": 16760 + }, + { + "epoch": 110.32894736842105, + "grad_norm": 1.5870128870010376, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 16770 + }, + { + "epoch": 110.39473684210526, + "grad_norm": 1.2915987968444824, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 16780 + }, + { + "epoch": 110.46052631578948, + "grad_norm": 1.4546549320220947, + "learning_rate": 0.0001, + "loss": 0.0228, + "step": 16790 + }, + { + "epoch": 110.52631578947368, + "grad_norm": 1.3749148845672607, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 16800 + }, + { + "epoch": 110.59210526315789, + "grad_norm": 1.8686497211456299, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 16810 + }, + { + "epoch": 110.65789473684211, + "grad_norm": 1.5140857696533203, + "learning_rate": 0.0001, + "loss": 0.0229, + "step": 16820 + }, + { + "epoch": 110.72368421052632, + "grad_norm": 1.222466230392456, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 16830 + }, + { + "epoch": 110.78947368421052, + "grad_norm": 1.4809081554412842, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 16840 + }, + { + "epoch": 110.85526315789474, + "grad_norm": 1.4631025791168213, + "learning_rate": 0.0001, + "loss": 0.0219, + "step": 16850 + }, + { + "epoch": 110.92105263157895, + "grad_norm": 1.5033537149429321, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 16860 + }, + { + "epoch": 110.98684210526316, + "grad_norm": 1.5242358446121216, + "learning_rate": 0.0001, + "loss": 0.0219, + "step": 16870 + }, + { + "epoch": 111.05263157894737, + "grad_norm": 1.4843236207962036, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 16880 + }, + { + "epoch": 111.11842105263158, + "grad_norm": 1.1458847522735596, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 16890 + }, + { + "epoch": 111.1842105263158, + "grad_norm": 1.1731573343276978, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 16900 + }, + { + "epoch": 111.25, + "grad_norm": 1.4374542236328125, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 16910 + }, + { + "epoch": 111.3157894736842, + "grad_norm": 1.523205041885376, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 16920 + }, + { + "epoch": 111.38157894736842, + "grad_norm": 1.5758098363876343, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 16930 + }, + { + "epoch": 111.44736842105263, + "grad_norm": 1.42531156539917, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 16940 + }, + { + "epoch": 111.51315789473684, + "grad_norm": 1.332920789718628, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 16950 + }, + { + "epoch": 111.57894736842105, + "grad_norm": 1.5037161111831665, + "learning_rate": 0.0001, + "loss": 0.0223, + "step": 16960 + }, + { + "epoch": 111.64473684210526, + "grad_norm": 1.2761225700378418, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 16970 + }, + { + "epoch": 111.71052631578948, + "grad_norm": 1.315170168876648, + "learning_rate": 0.0001, + "loss": 0.0194, + "step": 16980 + }, + { + "epoch": 111.77631578947368, + "grad_norm": 1.6541320085525513, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 16990 + }, + { + "epoch": 111.84210526315789, + "grad_norm": 1.4604840278625488, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 17000 + }, + { + "epoch": 111.90789473684211, + "grad_norm": 1.7079929113388062, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 17010 + }, + { + "epoch": 111.97368421052632, + "grad_norm": 1.5881198644638062, + "learning_rate": 0.0001, + "loss": 0.021, + "step": 17020 + }, + { + "epoch": 112.03947368421052, + "grad_norm": 1.1928815841674805, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 17030 + }, + { + "epoch": 112.10526315789474, + "grad_norm": 1.283767819404602, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 17040 + }, + { + "epoch": 112.17105263157895, + "grad_norm": 1.0753800868988037, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 17050 + }, + { + "epoch": 112.23684210526316, + "grad_norm": 1.3679052591323853, + "learning_rate": 0.0001, + "loss": 0.0214, + "step": 17060 + }, + { + "epoch": 112.30263157894737, + "grad_norm": 1.5422084331512451, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 17070 + }, + { + "epoch": 112.36842105263158, + "grad_norm": 1.0112746953964233, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 17080 + }, + { + "epoch": 112.4342105263158, + "grad_norm": 1.7527899742126465, + "learning_rate": 0.0001, + "loss": 0.0216, + "step": 17090 + }, + { + "epoch": 112.5, + "grad_norm": 2.344182252883911, + "learning_rate": 0.0001, + "loss": 0.0312, + "step": 17100 + }, + { + "epoch": 112.5657894736842, + "grad_norm": 2.5137453079223633, + "learning_rate": 0.0001, + "loss": 0.0225, + "step": 17110 + }, + { + "epoch": 112.63157894736842, + "grad_norm": 2.357712507247925, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 17120 + }, + { + "epoch": 112.69736842105263, + "grad_norm": 2.287658929824829, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 17130 + }, + { + "epoch": 112.76315789473684, + "grad_norm": 1.9697659015655518, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 17140 + }, + { + "epoch": 112.82894736842105, + "grad_norm": 1.8423213958740234, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 17150 + }, + { + "epoch": 112.89473684210526, + "grad_norm": 1.7179113626480103, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 17160 + }, + { + "epoch": 112.96052631578948, + "grad_norm": 1.2885282039642334, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 17170 + }, + { + "epoch": 113.02631578947368, + "grad_norm": 2.470750331878662, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 17180 + }, + { + "epoch": 113.09210526315789, + "grad_norm": 1.9972500801086426, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 17190 + }, + { + "epoch": 113.15789473684211, + "grad_norm": 1.7662476301193237, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 17200 + }, + { + "epoch": 113.22368421052632, + "grad_norm": 2.526029586791992, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 17210 + }, + { + "epoch": 113.28947368421052, + "grad_norm": 1.8449680805206299, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 17220 + }, + { + "epoch": 113.35526315789474, + "grad_norm": 1.9079487323760986, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 17230 + }, + { + "epoch": 113.42105263157895, + "grad_norm": 1.5968166589736938, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 17240 + }, + { + "epoch": 113.48684210526316, + "grad_norm": 1.7774184942245483, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 17250 + }, + { + "epoch": 113.55263157894737, + "grad_norm": 1.8195269107818604, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 17260 + }, + { + "epoch": 113.61842105263158, + "grad_norm": 2.0075738430023193, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 17270 + }, + { + "epoch": 113.6842105263158, + "grad_norm": 1.6541519165039062, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 17280 + }, + { + "epoch": 113.75, + "grad_norm": 1.6574792861938477, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 17290 + }, + { + "epoch": 113.8157894736842, + "grad_norm": 1.459970235824585, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 17300 + }, + { + "epoch": 113.88157894736842, + "grad_norm": 1.072644829750061, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 17310 + }, + { + "epoch": 113.94736842105263, + "grad_norm": 1.4994428157806396, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 17320 + }, + { + "epoch": 114.01315789473684, + "grad_norm": 1.1813583374023438, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 17330 + }, + { + "epoch": 114.07894736842105, + "grad_norm": 1.115692377090454, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 17340 + }, + { + "epoch": 114.14473684210526, + "grad_norm": 1.6557120084762573, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 17350 + }, + { + "epoch": 114.21052631578948, + "grad_norm": 1.287843942642212, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 17360 + }, + { + "epoch": 114.27631578947368, + "grad_norm": 1.5140693187713623, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 17370 + }, + { + "epoch": 114.34210526315789, + "grad_norm": 1.3632303476333618, + "learning_rate": 0.0001, + "loss": 0.0223, + "step": 17380 + }, + { + "epoch": 114.40789473684211, + "grad_norm": 1.5650956630706787, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 17390 + }, + { + "epoch": 114.47368421052632, + "grad_norm": 1.413570761680603, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 17400 + }, + { + "epoch": 114.53947368421052, + "grad_norm": 1.2383382320404053, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 17410 + }, + { + "epoch": 114.60526315789474, + "grad_norm": 1.6793911457061768, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 17420 + }, + { + "epoch": 114.67105263157895, + "grad_norm": 1.5027910470962524, + "learning_rate": 0.0001, + "loss": 0.0223, + "step": 17430 + }, + { + "epoch": 114.73684210526316, + "grad_norm": 1.4984204769134521, + "learning_rate": 0.0001, + "loss": 0.0223, + "step": 17440 + }, + { + "epoch": 114.80263157894737, + "grad_norm": 1.712581992149353, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 17450 + }, + { + "epoch": 114.86842105263158, + "grad_norm": 1.507750153541565, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 17460 + }, + { + "epoch": 114.9342105263158, + "grad_norm": 1.515345573425293, + "learning_rate": 0.0001, + "loss": 0.0194, + "step": 17470 + }, + { + "epoch": 115.0, + "grad_norm": 1.804507851600647, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 17480 + }, + { + "epoch": 115.0657894736842, + "grad_norm": 1.6192446947097778, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 17490 + }, + { + "epoch": 115.13157894736842, + "grad_norm": 1.3027291297912598, + "learning_rate": 0.0001, + "loss": 0.0228, + "step": 17500 + }, + { + "epoch": 115.19736842105263, + "grad_norm": 1.5303033590316772, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 17510 + }, + { + "epoch": 115.26315789473684, + "grad_norm": 1.245597004890442, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 17520 + }, + { + "epoch": 115.32894736842105, + "grad_norm": 1.048466444015503, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 17530 + }, + { + "epoch": 115.39473684210526, + "grad_norm": 1.5736318826675415, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 17540 + }, + { + "epoch": 115.46052631578948, + "grad_norm": 1.8937121629714966, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 17550 + }, + { + "epoch": 115.52631578947368, + "grad_norm": 1.350262999534607, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 17560 + }, + { + "epoch": 115.59210526315789, + "grad_norm": 1.4281275272369385, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 17570 + }, + { + "epoch": 115.65789473684211, + "grad_norm": 1.6036561727523804, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 17580 + }, + { + "epoch": 115.72368421052632, + "grad_norm": 1.1876840591430664, + "learning_rate": 0.0001, + "loss": 0.024, + "step": 17590 + }, + { + "epoch": 115.78947368421052, + "grad_norm": 1.1769388914108276, + "learning_rate": 0.0001, + "loss": 0.0222, + "step": 17600 + }, + { + "epoch": 115.85526315789474, + "grad_norm": 1.621465802192688, + "learning_rate": 0.0001, + "loss": 0.0219, + "step": 17610 + }, + { + "epoch": 115.92105263157895, + "grad_norm": 3.2968623638153076, + "learning_rate": 0.0001, + "loss": 0.0258, + "step": 17620 + }, + { + "epoch": 115.98684210526316, + "grad_norm": 1.8271244764328003, + "learning_rate": 0.0001, + "loss": 0.0246, + "step": 17630 + }, + { + "epoch": 116.05263157894737, + "grad_norm": 1.8423553705215454, + "learning_rate": 0.0001, + "loss": 0.0229, + "step": 17640 + }, + { + "epoch": 116.11842105263158, + "grad_norm": 1.65046226978302, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 17650 + }, + { + "epoch": 116.1842105263158, + "grad_norm": 1.7557287216186523, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 17660 + }, + { + "epoch": 116.25, + "grad_norm": 2.067875385284424, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 17670 + }, + { + "epoch": 116.3157894736842, + "grad_norm": 1.5921411514282227, + "learning_rate": 0.0001, + "loss": 0.0246, + "step": 17680 + }, + { + "epoch": 116.38157894736842, + "grad_norm": 1.153791069984436, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 17690 + }, + { + "epoch": 116.44736842105263, + "grad_norm": 1.2742387056350708, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 17700 + }, + { + "epoch": 116.51315789473684, + "grad_norm": 1.384451150894165, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 17710 + }, + { + "epoch": 116.57894736842105, + "grad_norm": 1.3240984678268433, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 17720 + }, + { + "epoch": 116.64473684210526, + "grad_norm": 1.3543736934661865, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 17730 + }, + { + "epoch": 116.71052631578948, + "grad_norm": 1.4543614387512207, + "learning_rate": 0.0001, + "loss": 0.0232, + "step": 17740 + }, + { + "epoch": 116.77631578947368, + "grad_norm": 1.2755897045135498, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 17750 + }, + { + "epoch": 116.84210526315789, + "grad_norm": 1.4515208005905151, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 17760 + }, + { + "epoch": 116.90789473684211, + "grad_norm": 1.4449197053909302, + "learning_rate": 0.0001, + "loss": 0.0234, + "step": 17770 + }, + { + "epoch": 116.97368421052632, + "grad_norm": 1.1386222839355469, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 17780 + }, + { + "epoch": 117.03947368421052, + "grad_norm": 1.416370153427124, + "learning_rate": 0.0001, + "loss": 0.0238, + "step": 17790 + }, + { + "epoch": 117.10526315789474, + "grad_norm": 1.1340827941894531, + "learning_rate": 0.0001, + "loss": 0.0227, + "step": 17800 + }, + { + "epoch": 117.17105263157895, + "grad_norm": 1.6249243021011353, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 17810 + }, + { + "epoch": 117.23684210526316, + "grad_norm": 1.2724114656448364, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 17820 + }, + { + "epoch": 117.30263157894737, + "grad_norm": 1.7152994871139526, + "learning_rate": 0.0001, + "loss": 0.0226, + "step": 17830 + }, + { + "epoch": 117.36842105263158, + "grad_norm": 1.4186722040176392, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 17840 + }, + { + "epoch": 117.4342105263158, + "grad_norm": 1.5989038944244385, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 17850 + }, + { + "epoch": 117.5, + "grad_norm": 1.5071998834609985, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 17860 + }, + { + "epoch": 117.5657894736842, + "grad_norm": 1.528257131576538, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 17870 + }, + { + "epoch": 117.63157894736842, + "grad_norm": 1.3654208183288574, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 17880 + }, + { + "epoch": 117.69736842105263, + "grad_norm": 2.163045883178711, + "learning_rate": 0.0001, + "loss": 0.0228, + "step": 17890 + }, + { + "epoch": 117.76315789473684, + "grad_norm": 1.3992512226104736, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 17900 + }, + { + "epoch": 117.82894736842105, + "grad_norm": 1.3927537202835083, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 17910 + }, + { + "epoch": 117.89473684210526, + "grad_norm": 1.443617343902588, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 17920 + }, + { + "epoch": 117.96052631578948, + "grad_norm": 1.7115437984466553, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 17930 + }, + { + "epoch": 118.02631578947368, + "grad_norm": 1.743646502494812, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 17940 + }, + { + "epoch": 118.09210526315789, + "grad_norm": 1.8455554246902466, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 17950 + }, + { + "epoch": 118.15789473684211, + "grad_norm": 1.5267865657806396, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 17960 + }, + { + "epoch": 118.22368421052632, + "grad_norm": 1.3369451761245728, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 17970 + }, + { + "epoch": 118.28947368421052, + "grad_norm": 1.7073959112167358, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 17980 + }, + { + "epoch": 118.35526315789474, + "grad_norm": 1.3288969993591309, + "learning_rate": 0.0001, + "loss": 0.0216, + "step": 17990 + }, + { + "epoch": 118.42105263157895, + "grad_norm": 1.353158950805664, + "learning_rate": 0.0001, + "loss": 0.0216, + "step": 18000 + }, + { + "epoch": 118.48684210526316, + "grad_norm": 1.260098934173584, + "learning_rate": 0.0001, + "loss": 0.0214, + "step": 18010 + }, + { + "epoch": 118.55263157894737, + "grad_norm": 1.0010889768600464, + "learning_rate": 0.0001, + "loss": 0.0216, + "step": 18020 + }, + { + "epoch": 118.61842105263158, + "grad_norm": 1.4269499778747559, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 18030 + }, + { + "epoch": 118.6842105263158, + "grad_norm": 1.6440515518188477, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 18040 + }, + { + "epoch": 118.75, + "grad_norm": 1.2828301191329956, + "learning_rate": 0.0001, + "loss": 0.0225, + "step": 18050 + }, + { + "epoch": 118.8157894736842, + "grad_norm": 1.632926106452942, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 18060 + }, + { + "epoch": 118.88157894736842, + "grad_norm": 1.6261667013168335, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 18070 + }, + { + "epoch": 118.94736842105263, + "grad_norm": 1.401406168937683, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 18080 + }, + { + "epoch": 119.01315789473684, + "grad_norm": 1.7001771926879883, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 18090 + }, + { + "epoch": 119.07894736842105, + "grad_norm": 1.5548099279403687, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 18100 + }, + { + "epoch": 119.14473684210526, + "grad_norm": 1.1037205457687378, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 18110 + }, + { + "epoch": 119.21052631578948, + "grad_norm": 1.5918753147125244, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 18120 + }, + { + "epoch": 119.27631578947368, + "grad_norm": 1.6971482038497925, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 18130 + }, + { + "epoch": 119.34210526315789, + "grad_norm": 2.4634158611297607, + "learning_rate": 0.0001, + "loss": 0.0238, + "step": 18140 + }, + { + "epoch": 119.40789473684211, + "grad_norm": 1.9838364124298096, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 18150 + }, + { + "epoch": 119.47368421052632, + "grad_norm": 1.578308343887329, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 18160 + }, + { + "epoch": 119.53947368421052, + "grad_norm": 1.6289037466049194, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 18170 + }, + { + "epoch": 119.60526315789474, + "grad_norm": 1.2893215417861938, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 18180 + }, + { + "epoch": 119.67105263157895, + "grad_norm": 1.372152328491211, + "learning_rate": 0.0001, + "loss": 0.0213, + "step": 18190 + }, + { + "epoch": 119.73684210526316, + "grad_norm": 1.3960975408554077, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 18200 + }, + { + "epoch": 119.80263157894737, + "grad_norm": 2.3912997245788574, + "learning_rate": 0.0001, + "loss": 0.0238, + "step": 18210 + }, + { + "epoch": 119.86842105263158, + "grad_norm": 2.194430112838745, + "learning_rate": 0.0001, + "loss": 0.0216, + "step": 18220 + }, + { + "epoch": 119.9342105263158, + "grad_norm": 1.995544195175171, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 18230 + }, + { + "epoch": 120.0, + "grad_norm": 2.018141031265259, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 18240 + }, + { + "epoch": 120.0657894736842, + "grad_norm": 2.009296178817749, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 18250 + }, + { + "epoch": 120.13157894736842, + "grad_norm": 1.7988462448120117, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 18260 + }, + { + "epoch": 120.19736842105263, + "grad_norm": 1.3400589227676392, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 18270 + }, + { + "epoch": 120.26315789473684, + "grad_norm": 1.8497594594955444, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 18280 + }, + { + "epoch": 120.32894736842105, + "grad_norm": 1.826569676399231, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 18290 + }, + { + "epoch": 120.39473684210526, + "grad_norm": 1.4031181335449219, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 18300 + }, + { + "epoch": 120.46052631578948, + "grad_norm": 1.3137474060058594, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 18310 + }, + { + "epoch": 120.52631578947368, + "grad_norm": 1.4643906354904175, + "learning_rate": 0.0001, + "loss": 0.0194, + "step": 18320 + }, + { + "epoch": 120.59210526315789, + "grad_norm": 1.4734902381896973, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 18330 + }, + { + "epoch": 120.65789473684211, + "grad_norm": 1.2963664531707764, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 18340 + }, + { + "epoch": 120.72368421052632, + "grad_norm": 1.8689401149749756, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 18350 + }, + { + "epoch": 120.78947368421052, + "grad_norm": 1.442025899887085, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 18360 + }, + { + "epoch": 120.85526315789474, + "grad_norm": 1.0878459215164185, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 18370 + }, + { + "epoch": 120.92105263157895, + "grad_norm": 1.1373950242996216, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 18380 + }, + { + "epoch": 120.98684210526316, + "grad_norm": 1.359013319015503, + "learning_rate": 0.0001, + "loss": 0.0234, + "step": 18390 + }, + { + "epoch": 121.05263157894737, + "grad_norm": 1.154805302619934, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 18400 + }, + { + "epoch": 121.11842105263158, + "grad_norm": 1.2236026525497437, + "learning_rate": 0.0001, + "loss": 0.0222, + "step": 18410 + }, + { + "epoch": 121.1842105263158, + "grad_norm": 1.6635630130767822, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 18420 + }, + { + "epoch": 121.25, + "grad_norm": 1.2726155519485474, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 18430 + }, + { + "epoch": 121.3157894736842, + "grad_norm": 1.6328818798065186, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 18440 + }, + { + "epoch": 121.38157894736842, + "grad_norm": 1.1731946468353271, + "learning_rate": 0.0001, + "loss": 0.0227, + "step": 18450 + }, + { + "epoch": 121.44736842105263, + "grad_norm": 1.302308440208435, + "learning_rate": 0.0001, + "loss": 0.0232, + "step": 18460 + }, + { + "epoch": 121.51315789473684, + "grad_norm": 1.287872076034546, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 18470 + }, + { + "epoch": 121.57894736842105, + "grad_norm": 1.315284013748169, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 18480 + }, + { + "epoch": 121.64473684210526, + "grad_norm": 1.6699914932250977, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 18490 + }, + { + "epoch": 121.71052631578948, + "grad_norm": 1.2884074449539185, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 18500 + }, + { + "epoch": 121.77631578947368, + "grad_norm": 1.6029454469680786, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 18510 + }, + { + "epoch": 121.84210526315789, + "grad_norm": 1.4443784952163696, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 18520 + }, + { + "epoch": 121.90789473684211, + "grad_norm": 1.3697898387908936, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 18530 + }, + { + "epoch": 121.97368421052632, + "grad_norm": 1.4503991603851318, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 18540 + }, + { + "epoch": 122.03947368421052, + "grad_norm": 2.0889556407928467, + "learning_rate": 0.0001, + "loss": 0.0341, + "step": 18550 + }, + { + "epoch": 122.10526315789474, + "grad_norm": 2.1557774543762207, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 18560 + }, + { + "epoch": 122.17105263157895, + "grad_norm": 1.9375776052474976, + "learning_rate": 0.0001, + "loss": 0.0194, + "step": 18570 + }, + { + "epoch": 122.23684210526316, + "grad_norm": 1.9048449993133545, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 18580 + }, + { + "epoch": 122.30263157894737, + "grad_norm": 1.9082547426223755, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 18590 + }, + { + "epoch": 122.36842105263158, + "grad_norm": 1.3321175575256348, + "learning_rate": 0.0001, + "loss": 0.0213, + "step": 18600 + }, + { + "epoch": 122.4342105263158, + "grad_norm": 1.7989875078201294, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 18610 + }, + { + "epoch": 122.5, + "grad_norm": 1.6709932088851929, + "learning_rate": 0.0001, + "loss": 0.021, + "step": 18620 + }, + { + "epoch": 122.5657894736842, + "grad_norm": 2.0253422260284424, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 18630 + }, + { + "epoch": 122.63157894736842, + "grad_norm": 1.6843767166137695, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 18640 + }, + { + "epoch": 122.69736842105263, + "grad_norm": 1.6629420518875122, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 18650 + }, + { + "epoch": 122.76315789473684, + "grad_norm": 1.5332101583480835, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 18660 + }, + { + "epoch": 122.82894736842105, + "grad_norm": 1.3796052932739258, + "learning_rate": 0.0001, + "loss": 0.0222, + "step": 18670 + }, + { + "epoch": 122.89473684210526, + "grad_norm": 1.4094196557998657, + "learning_rate": 0.0001, + "loss": 0.0216, + "step": 18680 + }, + { + "epoch": 122.96052631578948, + "grad_norm": 1.7053511142730713, + "learning_rate": 0.0001, + "loss": 0.0253, + "step": 18690 + }, + { + "epoch": 123.02631578947368, + "grad_norm": 1.2467565536499023, + "learning_rate": 0.0001, + "loss": 0.0232, + "step": 18700 + }, + { + "epoch": 123.09210526315789, + "grad_norm": 1.2807624340057373, + "learning_rate": 0.0001, + "loss": 0.0229, + "step": 18710 + }, + { + "epoch": 123.15789473684211, + "grad_norm": 2.067713499069214, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 18720 + }, + { + "epoch": 123.22368421052632, + "grad_norm": 1.4443409442901611, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 18730 + }, + { + "epoch": 123.28947368421052, + "grad_norm": 1.0428988933563232, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 18740 + }, + { + "epoch": 123.35526315789474, + "grad_norm": 1.4765625, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 18750 + }, + { + "epoch": 123.42105263157895, + "grad_norm": 1.2120792865753174, + "learning_rate": 0.0001, + "loss": 0.0213, + "step": 18760 + }, + { + "epoch": 123.48684210526316, + "grad_norm": 1.4466508626937866, + "learning_rate": 0.0001, + "loss": 0.0194, + "step": 18770 + }, + { + "epoch": 123.55263157894737, + "grad_norm": 1.5829192399978638, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 18780 + }, + { + "epoch": 123.61842105263158, + "grad_norm": 1.7138676643371582, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 18790 + }, + { + "epoch": 123.6842105263158, + "grad_norm": 1.3385616540908813, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 18800 + }, + { + "epoch": 123.75, + "grad_norm": 1.5545923709869385, + "learning_rate": 0.0001, + "loss": 0.0224, + "step": 18810 + }, + { + "epoch": 123.8157894736842, + "grad_norm": 1.66548752784729, + "learning_rate": 0.0001, + "loss": 0.0234, + "step": 18820 + }, + { + "epoch": 123.88157894736842, + "grad_norm": 1.5020532608032227, + "learning_rate": 0.0001, + "loss": 0.0222, + "step": 18830 + }, + { + "epoch": 123.94736842105263, + "grad_norm": 1.4988902807235718, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 18840 + }, + { + "epoch": 124.01315789473684, + "grad_norm": 1.415207862854004, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 18850 + }, + { + "epoch": 124.07894736842105, + "grad_norm": 1.9516119956970215, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 18860 + }, + { + "epoch": 124.14473684210526, + "grad_norm": 1.5451509952545166, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 18870 + }, + { + "epoch": 124.21052631578948, + "grad_norm": 2.0787911415100098, + "learning_rate": 0.0001, + "loss": 0.0222, + "step": 18880 + }, + { + "epoch": 124.27631578947368, + "grad_norm": 1.574526071548462, + "learning_rate": 0.0001, + "loss": 0.0214, + "step": 18890 + }, + { + "epoch": 124.34210526315789, + "grad_norm": 1.3802706003189087, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 18900 + }, + { + "epoch": 124.40789473684211, + "grad_norm": 1.6244707107543945, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 18910 + }, + { + "epoch": 124.47368421052632, + "grad_norm": 2.145286798477173, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 18920 + }, + { + "epoch": 124.53947368421052, + "grad_norm": 1.4865652322769165, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 18930 + }, + { + "epoch": 124.60526315789474, + "grad_norm": 1.6736758947372437, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 18940 + }, + { + "epoch": 124.67105263157895, + "grad_norm": 1.779492735862732, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 18950 + }, + { + "epoch": 124.73684210526316, + "grad_norm": 1.4367070198059082, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 18960 + }, + { + "epoch": 124.80263157894737, + "grad_norm": 2.134930372238159, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 18970 + }, + { + "epoch": 124.86842105263158, + "grad_norm": 1.5714412927627563, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 18980 + }, + { + "epoch": 124.9342105263158, + "grad_norm": 1.3601255416870117, + "learning_rate": 0.0001, + "loss": 0.0213, + "step": 18990 + }, + { + "epoch": 125.0, + "grad_norm": 1.4811025857925415, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 19000 + }, + { + "epoch": 125.0657894736842, + "grad_norm": 1.156876564025879, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 19010 + }, + { + "epoch": 125.13157894736842, + "grad_norm": 1.284725546836853, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 19020 + }, + { + "epoch": 125.19736842105263, + "grad_norm": 1.6561628580093384, + "learning_rate": 0.0001, + "loss": 0.0222, + "step": 19030 + }, + { + "epoch": 125.26315789473684, + "grad_norm": 1.505050778388977, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 19040 + }, + { + "epoch": 125.32894736842105, + "grad_norm": 1.6309056282043457, + "learning_rate": 0.0001, + "loss": 0.0244, + "step": 19050 + }, + { + "epoch": 125.39473684210526, + "grad_norm": 1.736699104309082, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 19060 + }, + { + "epoch": 125.46052631578948, + "grad_norm": 1.9428766965866089, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 19070 + }, + { + "epoch": 125.52631578947368, + "grad_norm": 1.7686620950698853, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 19080 + }, + { + "epoch": 125.59210526315789, + "grad_norm": 1.7622973918914795, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 19090 + }, + { + "epoch": 125.65789473684211, + "grad_norm": 1.5690042972564697, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 19100 + }, + { + "epoch": 125.72368421052632, + "grad_norm": 1.8227565288543701, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 19110 + }, + { + "epoch": 125.78947368421052, + "grad_norm": 1.6870558261871338, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 19120 + }, + { + "epoch": 125.85526315789474, + "grad_norm": 1.479076862335205, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 19130 + }, + { + "epoch": 125.92105263157895, + "grad_norm": 1.2277092933654785, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 19140 + }, + { + "epoch": 125.98684210526316, + "grad_norm": 1.489528775215149, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 19150 + }, + { + "epoch": 126.05263157894737, + "grad_norm": 1.0452980995178223, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 19160 + }, + { + "epoch": 126.11842105263158, + "grad_norm": 1.3360462188720703, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 19170 + }, + { + "epoch": 126.1842105263158, + "grad_norm": 1.3254673480987549, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 19180 + }, + { + "epoch": 126.25, + "grad_norm": 1.2667726278305054, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 19190 + }, + { + "epoch": 126.3157894736842, + "grad_norm": 1.4161853790283203, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 19200 + }, + { + "epoch": 126.38157894736842, + "grad_norm": 1.2914543151855469, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 19210 + }, + { + "epoch": 126.44736842105263, + "grad_norm": 1.231687307357788, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 19220 + }, + { + "epoch": 126.51315789473684, + "grad_norm": 1.5462013483047485, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 19230 + }, + { + "epoch": 126.57894736842105, + "grad_norm": 1.4174609184265137, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 19240 + }, + { + "epoch": 126.64473684210526, + "grad_norm": 1.4570950269699097, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 19250 + }, + { + "epoch": 126.71052631578948, + "grad_norm": 1.3737624883651733, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 19260 + }, + { + "epoch": 126.77631578947368, + "grad_norm": 1.2217376232147217, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 19270 + }, + { + "epoch": 126.84210526315789, + "grad_norm": 1.2423151731491089, + "learning_rate": 0.0001, + "loss": 0.0219, + "step": 19280 + }, + { + "epoch": 126.90789473684211, + "grad_norm": 0.9811283349990845, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 19290 + }, + { + "epoch": 126.97368421052632, + "grad_norm": 1.0705347061157227, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 19300 + }, + { + "epoch": 127.03947368421052, + "grad_norm": 1.479683518409729, + "learning_rate": 0.0001, + "loss": 0.0213, + "step": 19310 + }, + { + "epoch": 127.10526315789474, + "grad_norm": 1.0638930797576904, + "learning_rate": 0.0001, + "loss": 0.0225, + "step": 19320 + }, + { + "epoch": 127.17105263157895, + "grad_norm": 1.0353878736495972, + "learning_rate": 0.0001, + "loss": 0.0235, + "step": 19330 + }, + { + "epoch": 127.23684210526316, + "grad_norm": 1.5880337953567505, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 19340 + }, + { + "epoch": 127.30263157894737, + "grad_norm": 1.4664833545684814, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 19350 + }, + { + "epoch": 127.36842105263158, + "grad_norm": 1.302016019821167, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 19360 + }, + { + "epoch": 127.4342105263158, + "grad_norm": 1.1787173748016357, + "learning_rate": 0.0001, + "loss": 0.0225, + "step": 19370 + }, + { + "epoch": 127.5, + "grad_norm": 1.5672588348388672, + "learning_rate": 0.0001, + "loss": 0.0219, + "step": 19380 + }, + { + "epoch": 127.5657894736842, + "grad_norm": 1.434388518333435, + "learning_rate": 0.0001, + "loss": 0.0213, + "step": 19390 + }, + { + "epoch": 127.63157894736842, + "grad_norm": 1.571679949760437, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 19400 + }, + { + "epoch": 127.69736842105263, + "grad_norm": 1.2628552913665771, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 19410 + }, + { + "epoch": 127.76315789473684, + "grad_norm": 1.4900308847427368, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 19420 + }, + { + "epoch": 127.82894736842105, + "grad_norm": 1.2159069776535034, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 19430 + }, + { + "epoch": 127.89473684210526, + "grad_norm": 1.3993160724639893, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 19440 + }, + { + "epoch": 127.96052631578948, + "grad_norm": 1.1536054611206055, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 19450 + }, + { + "epoch": 128.02631578947367, + "grad_norm": 1.333641767501831, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 19460 + }, + { + "epoch": 128.0921052631579, + "grad_norm": 1.1968632936477661, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 19470 + }, + { + "epoch": 128.1578947368421, + "grad_norm": 1.3755112886428833, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 19480 + }, + { + "epoch": 128.22368421052633, + "grad_norm": 1.2440341711044312, + "learning_rate": 0.0001, + "loss": 0.0231, + "step": 19490 + }, + { + "epoch": 128.28947368421052, + "grad_norm": 1.4638385772705078, + "learning_rate": 0.0001, + "loss": 0.0224, + "step": 19500 + }, + { + "epoch": 128.35526315789474, + "grad_norm": 1.5661355257034302, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 19510 + }, + { + "epoch": 128.42105263157896, + "grad_norm": 1.3939542770385742, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 19520 + }, + { + "epoch": 128.48684210526315, + "grad_norm": 1.1770873069763184, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 19530 + }, + { + "epoch": 128.55263157894737, + "grad_norm": 1.2070765495300293, + "learning_rate": 0.0001, + "loss": 0.0216, + "step": 19540 + }, + { + "epoch": 128.6184210526316, + "grad_norm": 1.6263254880905151, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 19550 + }, + { + "epoch": 128.68421052631578, + "grad_norm": 1.2368308305740356, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 19560 + }, + { + "epoch": 128.75, + "grad_norm": 1.4440819025039673, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 19570 + }, + { + "epoch": 128.81578947368422, + "grad_norm": 1.1472845077514648, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 19580 + }, + { + "epoch": 128.8815789473684, + "grad_norm": 1.410331130027771, + "learning_rate": 0.0001, + "loss": 0.0219, + "step": 19590 + }, + { + "epoch": 128.94736842105263, + "grad_norm": 1.265761137008667, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 19600 + }, + { + "epoch": 129.01315789473685, + "grad_norm": 1.2967408895492554, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 19610 + }, + { + "epoch": 129.07894736842104, + "grad_norm": 1.5309879779815674, + "learning_rate": 0.0001, + "loss": 0.0215, + "step": 19620 + }, + { + "epoch": 129.14473684210526, + "grad_norm": 1.5370832681655884, + "learning_rate": 0.0001, + "loss": 0.0224, + "step": 19630 + }, + { + "epoch": 129.21052631578948, + "grad_norm": 1.3466033935546875, + "learning_rate": 0.0001, + "loss": 0.0226, + "step": 19640 + }, + { + "epoch": 129.27631578947367, + "grad_norm": 1.4539897441864014, + "learning_rate": 0.0001, + "loss": 0.0224, + "step": 19650 + }, + { + "epoch": 129.3421052631579, + "grad_norm": 1.633927583694458, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 19660 + }, + { + "epoch": 129.4078947368421, + "grad_norm": 1.246416449546814, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 19670 + }, + { + "epoch": 129.47368421052633, + "grad_norm": 1.3420946598052979, + "learning_rate": 0.0001, + "loss": 0.0241, + "step": 19680 + }, + { + "epoch": 129.53947368421052, + "grad_norm": 1.3653072118759155, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 19690 + }, + { + "epoch": 129.60526315789474, + "grad_norm": 1.0461795330047607, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 19700 + }, + { + "epoch": 129.67105263157896, + "grad_norm": 1.4482468366622925, + "learning_rate": 0.0001, + "loss": 0.0241, + "step": 19710 + }, + { + "epoch": 129.73684210526315, + "grad_norm": 1.0564301013946533, + "learning_rate": 0.0001, + "loss": 0.0222, + "step": 19720 + }, + { + "epoch": 129.80263157894737, + "grad_norm": 1.5804928541183472, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 19730 + }, + { + "epoch": 129.8684210526316, + "grad_norm": 1.6412838697433472, + "learning_rate": 0.0001, + "loss": 0.0237, + "step": 19740 + }, + { + "epoch": 129.93421052631578, + "grad_norm": 1.5836437940597534, + "learning_rate": 0.0001, + "loss": 0.0223, + "step": 19750 + }, + { + "epoch": 130.0, + "grad_norm": 1.197196125984192, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 19760 + }, + { + "epoch": 130.06578947368422, + "grad_norm": 0.96909499168396, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 19770 + }, + { + "epoch": 130.1315789473684, + "grad_norm": 1.4088032245635986, + "learning_rate": 0.0001, + "loss": 0.0214, + "step": 19780 + }, + { + "epoch": 130.19736842105263, + "grad_norm": 1.3101266622543335, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 19790 + }, + { + "epoch": 130.26315789473685, + "grad_norm": 1.375200867652893, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 19800 + }, + { + "epoch": 130.32894736842104, + "grad_norm": 1.243313193321228, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 19810 + }, + { + "epoch": 130.39473684210526, + "grad_norm": 1.121595025062561, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 19820 + }, + { + "epoch": 130.46052631578948, + "grad_norm": 2.108215093612671, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 19830 + }, + { + "epoch": 130.52631578947367, + "grad_norm": 2.1370813846588135, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 19840 + }, + { + "epoch": 130.5921052631579, + "grad_norm": 1.728285789489746, + "learning_rate": 0.0001, + "loss": 0.0219, + "step": 19850 + }, + { + "epoch": 130.6578947368421, + "grad_norm": 2.018031358718872, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 19860 + }, + { + "epoch": 130.72368421052633, + "grad_norm": 1.6380020380020142, + "learning_rate": 0.0001, + "loss": 0.026, + "step": 19870 + }, + { + "epoch": 130.78947368421052, + "grad_norm": 1.7799023389816284, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 19880 + }, + { + "epoch": 130.85526315789474, + "grad_norm": 1.419378638267517, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 19890 + }, + { + "epoch": 130.92105263157896, + "grad_norm": 1.769496202468872, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 19900 + }, + { + "epoch": 130.98684210526315, + "grad_norm": 1.9092391729354858, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 19910 + }, + { + "epoch": 131.05263157894737, + "grad_norm": 1.5821865797042847, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 19920 + }, + { + "epoch": 131.1184210526316, + "grad_norm": 1.5526002645492554, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 19930 + }, + { + "epoch": 131.18421052631578, + "grad_norm": 1.4830267429351807, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 19940 + }, + { + "epoch": 131.25, + "grad_norm": 1.3607596158981323, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 19950 + }, + { + "epoch": 131.31578947368422, + "grad_norm": 1.4321061372756958, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 19960 + }, + { + "epoch": 131.3815789473684, + "grad_norm": 1.343908667564392, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 19970 + }, + { + "epoch": 131.44736842105263, + "grad_norm": 1.047028660774231, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 19980 + }, + { + "epoch": 131.51315789473685, + "grad_norm": 1.2941601276397705, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 19990 + }, + { + "epoch": 131.57894736842104, + "grad_norm": 1.480362892150879, + "learning_rate": 0.0001, + "loss": 0.021, + "step": 20000 + }, + { + "epoch": 131.64473684210526, + "grad_norm": 1.8121306896209717, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 20010 + }, + { + "epoch": 131.71052631578948, + "grad_norm": 1.2839956283569336, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 20020 + }, + { + "epoch": 131.77631578947367, + "grad_norm": 1.1678411960601807, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 20030 + }, + { + "epoch": 131.8421052631579, + "grad_norm": 1.2261030673980713, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 20040 + }, + { + "epoch": 131.9078947368421, + "grad_norm": 1.1341450214385986, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 20050 + }, + { + "epoch": 131.97368421052633, + "grad_norm": 1.0404419898986816, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 20060 + }, + { + "epoch": 132.03947368421052, + "grad_norm": 1.3259118795394897, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 20070 + }, + { + "epoch": 132.10526315789474, + "grad_norm": 1.6572375297546387, + "learning_rate": 0.0001, + "loss": 0.0239, + "step": 20080 + }, + { + "epoch": 132.17105263157896, + "grad_norm": 1.5282135009765625, + "learning_rate": 0.0001, + "loss": 0.0214, + "step": 20090 + }, + { + "epoch": 132.23684210526315, + "grad_norm": 1.3130202293395996, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 20100 + }, + { + "epoch": 132.30263157894737, + "grad_norm": 1.7606295347213745, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 20110 + }, + { + "epoch": 132.3684210526316, + "grad_norm": 1.5770694017410278, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 20120 + }, + { + "epoch": 132.43421052631578, + "grad_norm": 1.3118009567260742, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 20130 + }, + { + "epoch": 132.5, + "grad_norm": 1.220281958580017, + "learning_rate": 0.0001, + "loss": 0.0214, + "step": 20140 + }, + { + "epoch": 132.56578947368422, + "grad_norm": 1.5257964134216309, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 20150 + }, + { + "epoch": 132.6315789473684, + "grad_norm": 1.567521333694458, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 20160 + }, + { + "epoch": 132.69736842105263, + "grad_norm": 1.1980758905410767, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 20170 + }, + { + "epoch": 132.76315789473685, + "grad_norm": 1.453986644744873, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 20180 + }, + { + "epoch": 132.82894736842104, + "grad_norm": 1.4468498229980469, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 20190 + }, + { + "epoch": 132.89473684210526, + "grad_norm": 1.3943188190460205, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 20200 + }, + { + "epoch": 132.96052631578948, + "grad_norm": 1.7826237678527832, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 20210 + }, + { + "epoch": 133.02631578947367, + "grad_norm": 1.3166335821151733, + "learning_rate": 0.0001, + "loss": 0.0216, + "step": 20220 + }, + { + "epoch": 133.0921052631579, + "grad_norm": 1.3654383420944214, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 20230 + }, + { + "epoch": 133.1578947368421, + "grad_norm": 1.322871208190918, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 20240 + }, + { + "epoch": 133.22368421052633, + "grad_norm": 1.2936899662017822, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 20250 + }, + { + "epoch": 133.28947368421052, + "grad_norm": 0.9831916093826294, + "learning_rate": 0.0001, + "loss": 0.0222, + "step": 20260 + }, + { + "epoch": 133.35526315789474, + "grad_norm": 1.345542311668396, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 20270 + }, + { + "epoch": 133.42105263157896, + "grad_norm": 1.4100401401519775, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 20280 + }, + { + "epoch": 133.48684210526315, + "grad_norm": 1.1066662073135376, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 20290 + }, + { + "epoch": 133.55263157894737, + "grad_norm": 1.3817609548568726, + "learning_rate": 0.0001, + "loss": 0.0228, + "step": 20300 + }, + { + "epoch": 133.6184210526316, + "grad_norm": 1.107347846031189, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 20310 + }, + { + "epoch": 133.68421052631578, + "grad_norm": 0.8152048587799072, + "learning_rate": 0.0001, + "loss": 0.0214, + "step": 20320 + }, + { + "epoch": 133.75, + "grad_norm": 1.1018624305725098, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 20330 + }, + { + "epoch": 133.81578947368422, + "grad_norm": 1.5733144283294678, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 20340 + }, + { + "epoch": 133.8815789473684, + "grad_norm": 1.2820268869400024, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 20350 + }, + { + "epoch": 133.94736842105263, + "grad_norm": 1.4256007671356201, + "learning_rate": 0.0001, + "loss": 0.0238, + "step": 20360 + }, + { + "epoch": 134.01315789473685, + "grad_norm": 1.2284027338027954, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 20370 + }, + { + "epoch": 134.07894736842104, + "grad_norm": 1.1770983934402466, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 20380 + }, + { + "epoch": 134.14473684210526, + "grad_norm": 1.1191388368606567, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 20390 + }, + { + "epoch": 134.21052631578948, + "grad_norm": 1.1628490686416626, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 20400 + }, + { + "epoch": 134.27631578947367, + "grad_norm": 1.2943140268325806, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 20410 + }, + { + "epoch": 134.3421052631579, + "grad_norm": 1.0791293382644653, + "learning_rate": 0.0001, + "loss": 0.0194, + "step": 20420 + }, + { + "epoch": 134.4078947368421, + "grad_norm": 1.308792233467102, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 20430 + }, + { + "epoch": 134.47368421052633, + "grad_norm": 1.2937871217727661, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 20440 + }, + { + "epoch": 134.53947368421052, + "grad_norm": 1.2588351964950562, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 20450 + }, + { + "epoch": 134.60526315789474, + "grad_norm": 1.3284668922424316, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 20460 + }, + { + "epoch": 134.67105263157896, + "grad_norm": 1.3896305561065674, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 20470 + }, + { + "epoch": 134.73684210526315, + "grad_norm": 1.4380180835723877, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 20480 + }, + { + "epoch": 134.80263157894737, + "grad_norm": 1.1442885398864746, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 20490 + }, + { + "epoch": 134.8684210526316, + "grad_norm": 1.3140195608139038, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 20500 + }, + { + "epoch": 134.93421052631578, + "grad_norm": 1.2796006202697754, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 20510 + }, + { + "epoch": 135.0, + "grad_norm": 1.2553552389144897, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 20520 + }, + { + "epoch": 135.06578947368422, + "grad_norm": 1.573137640953064, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 20530 + }, + { + "epoch": 135.1315789473684, + "grad_norm": 1.4506351947784424, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 20540 + }, + { + "epoch": 135.19736842105263, + "grad_norm": 1.219154953956604, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 20550 + }, + { + "epoch": 135.26315789473685, + "grad_norm": 1.235091209411621, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 20560 + }, + { + "epoch": 135.32894736842104, + "grad_norm": 1.7018022537231445, + "learning_rate": 0.0001, + "loss": 0.0219, + "step": 20570 + }, + { + "epoch": 135.39473684210526, + "grad_norm": 1.2762569189071655, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 20580 + }, + { + "epoch": 135.46052631578948, + "grad_norm": 1.5831774473190308, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 20590 + }, + { + "epoch": 135.52631578947367, + "grad_norm": 1.3347238302230835, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 20600 + }, + { + "epoch": 135.5921052631579, + "grad_norm": 1.5127307176589966, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 20610 + }, + { + "epoch": 135.6578947368421, + "grad_norm": 1.3495934009552002, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 20620 + }, + { + "epoch": 135.72368421052633, + "grad_norm": 1.6535933017730713, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 20630 + }, + { + "epoch": 135.78947368421052, + "grad_norm": 1.341562032699585, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 20640 + }, + { + "epoch": 135.85526315789474, + "grad_norm": 1.2143185138702393, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 20650 + }, + { + "epoch": 135.92105263157896, + "grad_norm": 1.1850231885910034, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 20660 + }, + { + "epoch": 135.98684210526315, + "grad_norm": 1.3171666860580444, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 20670 + }, + { + "epoch": 136.05263157894737, + "grad_norm": 1.7448344230651855, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 20680 + }, + { + "epoch": 136.1184210526316, + "grad_norm": 1.0638881921768188, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 20690 + }, + { + "epoch": 136.18421052631578, + "grad_norm": 0.8317880034446716, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 20700 + }, + { + "epoch": 136.25, + "grad_norm": 1.029839277267456, + "learning_rate": 0.0001, + "loss": 0.0194, + "step": 20710 + }, + { + "epoch": 136.31578947368422, + "grad_norm": 0.9803492426872253, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 20720 + }, + { + "epoch": 136.3815789473684, + "grad_norm": 1.7811894416809082, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 20730 + }, + { + "epoch": 136.44736842105263, + "grad_norm": 1.0440852642059326, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 20740 + }, + { + "epoch": 136.51315789473685, + "grad_norm": 1.3539479970932007, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 20750 + }, + { + "epoch": 136.57894736842104, + "grad_norm": 1.2563800811767578, + "learning_rate": 0.0001, + "loss": 0.0219, + "step": 20760 + }, + { + "epoch": 136.64473684210526, + "grad_norm": 0.9620851874351501, + "learning_rate": 0.0001, + "loss": 0.0194, + "step": 20770 + }, + { + "epoch": 136.71052631578948, + "grad_norm": 1.340428352355957, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 20780 + }, + { + "epoch": 136.77631578947367, + "grad_norm": 1.2113215923309326, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 20790 + }, + { + "epoch": 136.8421052631579, + "grad_norm": 1.3657537698745728, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 20800 + }, + { + "epoch": 136.9078947368421, + "grad_norm": 1.3240277767181396, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 20810 + }, + { + "epoch": 136.97368421052633, + "grad_norm": 1.2962788343429565, + "learning_rate": 0.0001, + "loss": 0.0213, + "step": 20820 + }, + { + "epoch": 137.03947368421052, + "grad_norm": 0.9829322695732117, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 20830 + }, + { + "epoch": 137.10526315789474, + "grad_norm": 1.1034486293792725, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 20840 + }, + { + "epoch": 137.17105263157896, + "grad_norm": 1.254267692565918, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 20850 + }, + { + "epoch": 137.23684210526315, + "grad_norm": 1.059151530265808, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 20860 + }, + { + "epoch": 137.30263157894737, + "grad_norm": 1.5754255056381226, + "learning_rate": 0.0001, + "loss": 0.0213, + "step": 20870 + }, + { + "epoch": 137.3684210526316, + "grad_norm": 1.3503801822662354, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 20880 + }, + { + "epoch": 137.43421052631578, + "grad_norm": 1.3268914222717285, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 20890 + }, + { + "epoch": 137.5, + "grad_norm": 1.3608646392822266, + "learning_rate": 0.0001, + "loss": 0.0216, + "step": 20900 + }, + { + "epoch": 137.56578947368422, + "grad_norm": 1.1551671028137207, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 20910 + }, + { + "epoch": 137.6315789473684, + "grad_norm": 0.8847004175186157, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 20920 + }, + { + "epoch": 137.69736842105263, + "grad_norm": 1.4831230640411377, + "learning_rate": 0.0001, + "loss": 0.0216, + "step": 20930 + }, + { + "epoch": 137.76315789473685, + "grad_norm": 1.1870131492614746, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 20940 + }, + { + "epoch": 137.82894736842104, + "grad_norm": 1.3143157958984375, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 20950 + }, + { + "epoch": 137.89473684210526, + "grad_norm": 1.1769039630889893, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 20960 + }, + { + "epoch": 137.96052631578948, + "grad_norm": 1.852351427078247, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 20970 + }, + { + "epoch": 138.02631578947367, + "grad_norm": 1.6631145477294922, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 20980 + }, + { + "epoch": 138.0921052631579, + "grad_norm": 1.4467190504074097, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 20990 + }, + { + "epoch": 138.1578947368421, + "grad_norm": 1.4761419296264648, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 21000 + }, + { + "epoch": 138.22368421052633, + "grad_norm": 1.160988211631775, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 21010 + }, + { + "epoch": 138.28947368421052, + "grad_norm": 1.591023325920105, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 21020 + }, + { + "epoch": 138.35526315789474, + "grad_norm": 1.4847773313522339, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 21030 + }, + { + "epoch": 138.42105263157896, + "grad_norm": 1.3351836204528809, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 21040 + }, + { + "epoch": 138.48684210526315, + "grad_norm": 1.6248340606689453, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 21050 + }, + { + "epoch": 138.55263157894737, + "grad_norm": 1.3487075567245483, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 21060 + }, + { + "epoch": 138.6184210526316, + "grad_norm": 1.291077733039856, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 21070 + }, + { + "epoch": 138.68421052631578, + "grad_norm": 1.6398788690567017, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 21080 + }, + { + "epoch": 138.75, + "grad_norm": 1.8707720041275024, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 21090 + }, + { + "epoch": 138.81578947368422, + "grad_norm": 1.5530805587768555, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 21100 + }, + { + "epoch": 138.8815789473684, + "grad_norm": 1.3435490131378174, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 21110 + }, + { + "epoch": 138.94736842105263, + "grad_norm": 1.51068115234375, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 21120 + }, + { + "epoch": 139.01315789473685, + "grad_norm": 1.293702244758606, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 21130 + }, + { + "epoch": 139.07894736842104, + "grad_norm": 1.422736406326294, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 21140 + }, + { + "epoch": 139.14473684210526, + "grad_norm": 1.303194522857666, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 21150 + }, + { + "epoch": 139.21052631578948, + "grad_norm": 1.724423885345459, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 21160 + }, + { + "epoch": 139.27631578947367, + "grad_norm": 1.5041424036026, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 21170 + }, + { + "epoch": 139.3421052631579, + "grad_norm": 1.4980031251907349, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 21180 + }, + { + "epoch": 139.4078947368421, + "grad_norm": 1.6551101207733154, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 21190 + }, + { + "epoch": 139.47368421052633, + "grad_norm": 1.4190785884857178, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 21200 + }, + { + "epoch": 139.53947368421052, + "grad_norm": 1.5306510925292969, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 21210 + }, + { + "epoch": 139.60526315789474, + "grad_norm": 1.395179033279419, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 21220 + }, + { + "epoch": 139.67105263157896, + "grad_norm": 1.9223673343658447, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 21230 + }, + { + "epoch": 139.73684210526315, + "grad_norm": 1.42075777053833, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 21240 + }, + { + "epoch": 139.80263157894737, + "grad_norm": 1.3706669807434082, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 21250 + }, + { + "epoch": 139.8684210526316, + "grad_norm": 1.5455049276351929, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 21260 + }, + { + "epoch": 139.93421052631578, + "grad_norm": 1.6650147438049316, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 21270 + }, + { + "epoch": 140.0, + "grad_norm": 1.3898757696151733, + "learning_rate": 0.0001, + "loss": 0.021, + "step": 21280 + }, + { + "epoch": 140.06578947368422, + "grad_norm": 1.516356110572815, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 21290 + }, + { + "epoch": 140.1315789473684, + "grad_norm": 1.162001609802246, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 21300 + }, + { + "epoch": 140.19736842105263, + "grad_norm": 1.5784013271331787, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 21310 + }, + { + "epoch": 140.26315789473685, + "grad_norm": 1.3628367185592651, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 21320 + }, + { + "epoch": 140.32894736842104, + "grad_norm": 1.2204854488372803, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 21330 + }, + { + "epoch": 140.39473684210526, + "grad_norm": 1.3196606636047363, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 21340 + }, + { + "epoch": 140.46052631578948, + "grad_norm": 1.3891950845718384, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 21350 + }, + { + "epoch": 140.52631578947367, + "grad_norm": 0.994315505027771, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 21360 + }, + { + "epoch": 140.5921052631579, + "grad_norm": 1.3768020868301392, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 21370 + }, + { + "epoch": 140.6578947368421, + "grad_norm": 0.8438733816146851, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 21380 + }, + { + "epoch": 140.72368421052633, + "grad_norm": 1.5164715051651, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 21390 + }, + { + "epoch": 140.78947368421052, + "grad_norm": 1.3153681755065918, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 21400 + }, + { + "epoch": 140.85526315789474, + "grad_norm": 1.4804083108901978, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 21410 + }, + { + "epoch": 140.92105263157896, + "grad_norm": 1.6807725429534912, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 21420 + }, + { + "epoch": 140.98684210526315, + "grad_norm": 1.5503164529800415, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 21430 + }, + { + "epoch": 141.05263157894737, + "grad_norm": 1.3315825462341309, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 21440 + }, + { + "epoch": 141.1184210526316, + "grad_norm": 1.2883803844451904, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 21450 + }, + { + "epoch": 141.18421052631578, + "grad_norm": 1.409468173980713, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 21460 + }, + { + "epoch": 141.25, + "grad_norm": 1.7659132480621338, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 21470 + }, + { + "epoch": 141.31578947368422, + "grad_norm": 1.2036762237548828, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 21480 + }, + { + "epoch": 141.3815789473684, + "grad_norm": 1.3691455125808716, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 21490 + }, + { + "epoch": 141.44736842105263, + "grad_norm": 1.2026904821395874, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 21500 + }, + { + "epoch": 141.51315789473685, + "grad_norm": 1.3784390687942505, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 21510 + }, + { + "epoch": 141.57894736842104, + "grad_norm": 1.1604245901107788, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 21520 + }, + { + "epoch": 141.64473684210526, + "grad_norm": 1.6574337482452393, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 21530 + }, + { + "epoch": 141.71052631578948, + "grad_norm": 1.6064780950546265, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 21540 + }, + { + "epoch": 141.77631578947367, + "grad_norm": 1.4753048419952393, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 21550 + }, + { + "epoch": 141.8421052631579, + "grad_norm": 1.6422748565673828, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 21560 + }, + { + "epoch": 141.9078947368421, + "grad_norm": 1.4057989120483398, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 21570 + }, + { + "epoch": 141.97368421052633, + "grad_norm": 1.7741460800170898, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 21580 + }, + { + "epoch": 142.03947368421052, + "grad_norm": 1.278627872467041, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 21590 + }, + { + "epoch": 142.10526315789474, + "grad_norm": 1.6472762823104858, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 21600 + }, + { + "epoch": 142.17105263157896, + "grad_norm": 1.5067228078842163, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 21610 + }, + { + "epoch": 142.23684210526315, + "grad_norm": 1.04597008228302, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 21620 + }, + { + "epoch": 142.30263157894737, + "grad_norm": 1.3342257738113403, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 21630 + }, + { + "epoch": 142.3684210526316, + "grad_norm": 1.3006070852279663, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 21640 + }, + { + "epoch": 142.43421052631578, + "grad_norm": 1.0908511877059937, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 21650 + }, + { + "epoch": 142.5, + "grad_norm": 1.156730055809021, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 21660 + }, + { + "epoch": 142.56578947368422, + "grad_norm": 1.5827045440673828, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 21670 + }, + { + "epoch": 142.6315789473684, + "grad_norm": 1.2614614963531494, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 21680 + }, + { + "epoch": 142.69736842105263, + "grad_norm": 1.386438012123108, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 21690 + }, + { + "epoch": 142.76315789473685, + "grad_norm": 1.0151652097702026, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 21700 + }, + { + "epoch": 142.82894736842104, + "grad_norm": 1.2075785398483276, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 21710 + }, + { + "epoch": 142.89473684210526, + "grad_norm": 1.2344067096710205, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 21720 + }, + { + "epoch": 142.96052631578948, + "grad_norm": 1.3653433322906494, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 21730 + }, + { + "epoch": 143.02631578947367, + "grad_norm": 1.1140528917312622, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 21740 + }, + { + "epoch": 143.0921052631579, + "grad_norm": 1.543173909187317, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 21750 + }, + { + "epoch": 143.1578947368421, + "grad_norm": 1.6480592489242554, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 21760 + }, + { + "epoch": 143.22368421052633, + "grad_norm": 1.5367249250411987, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 21770 + }, + { + "epoch": 143.28947368421052, + "grad_norm": 1.3302656412124634, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 21780 + }, + { + "epoch": 143.35526315789474, + "grad_norm": 1.3698561191558838, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 21790 + }, + { + "epoch": 143.42105263157896, + "grad_norm": 1.6688646078109741, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 21800 + }, + { + "epoch": 143.48684210526315, + "grad_norm": 0.9735197424888611, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 21810 + }, + { + "epoch": 143.55263157894737, + "grad_norm": 1.394163727760315, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 21820 + }, + { + "epoch": 143.6184210526316, + "grad_norm": 1.4685237407684326, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 21830 + }, + { + "epoch": 143.68421052631578, + "grad_norm": 1.2259248495101929, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 21840 + }, + { + "epoch": 143.75, + "grad_norm": 1.0960712432861328, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 21850 + }, + { + "epoch": 143.81578947368422, + "grad_norm": 1.4755326509475708, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 21860 + }, + { + "epoch": 143.8815789473684, + "grad_norm": 1.1997923851013184, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 21870 + }, + { + "epoch": 143.94736842105263, + "grad_norm": 1.0807325839996338, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 21880 + }, + { + "epoch": 144.01315789473685, + "grad_norm": 1.120715618133545, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 21890 + }, + { + "epoch": 144.07894736842104, + "grad_norm": 1.401428461074829, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 21900 + }, + { + "epoch": 144.14473684210526, + "grad_norm": 1.0058794021606445, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 21910 + }, + { + "epoch": 144.21052631578948, + "grad_norm": 1.4190456867218018, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 21920 + }, + { + "epoch": 144.27631578947367, + "grad_norm": 1.0206199884414673, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 21930 + }, + { + "epoch": 144.3421052631579, + "grad_norm": 1.3440083265304565, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 21940 + }, + { + "epoch": 144.4078947368421, + "grad_norm": 1.0611833333969116, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 21950 + }, + { + "epoch": 144.47368421052633, + "grad_norm": 1.4812649488449097, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 21960 + }, + { + "epoch": 144.53947368421052, + "grad_norm": 1.3473953008651733, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 21970 + }, + { + "epoch": 144.60526315789474, + "grad_norm": 1.229573130607605, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 21980 + }, + { + "epoch": 144.67105263157896, + "grad_norm": 1.0524625778198242, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 21990 + }, + { + "epoch": 144.73684210526315, + "grad_norm": 1.317591905593872, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 22000 + }, + { + "epoch": 144.80263157894737, + "grad_norm": 1.3425631523132324, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 22010 + }, + { + "epoch": 144.8684210526316, + "grad_norm": 1.0790326595306396, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 22020 + }, + { + "epoch": 144.93421052631578, + "grad_norm": 1.0952657461166382, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 22030 + }, + { + "epoch": 145.0, + "grad_norm": 1.3941048383712769, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 22040 + }, + { + "epoch": 145.06578947368422, + "grad_norm": 1.4520479440689087, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 22050 + }, + { + "epoch": 145.1315789473684, + "grad_norm": 1.3955209255218506, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 22060 + }, + { + "epoch": 145.19736842105263, + "grad_norm": 1.3725961446762085, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 22070 + }, + { + "epoch": 145.26315789473685, + "grad_norm": 1.536729335784912, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 22080 + }, + { + "epoch": 145.32894736842104, + "grad_norm": 1.2806124687194824, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 22090 + }, + { + "epoch": 145.39473684210526, + "grad_norm": 1.2393194437026978, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 22100 + }, + { + "epoch": 145.46052631578948, + "grad_norm": 1.6960039138793945, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 22110 + }, + { + "epoch": 145.52631578947367, + "grad_norm": 1.5707310438156128, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 22120 + }, + { + "epoch": 145.5921052631579, + "grad_norm": 1.2192633152008057, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 22130 + }, + { + "epoch": 145.6578947368421, + "grad_norm": 1.4482849836349487, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 22140 + }, + { + "epoch": 145.72368421052633, + "grad_norm": 1.1989715099334717, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 22150 + }, + { + "epoch": 145.78947368421052, + "grad_norm": 1.3403609991073608, + "learning_rate": 0.0001, + "loss": 0.0216, + "step": 22160 + }, + { + "epoch": 145.85526315789474, + "grad_norm": 1.599914789199829, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 22170 + }, + { + "epoch": 145.92105263157896, + "grad_norm": 1.602010726928711, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 22180 + }, + { + "epoch": 145.98684210526315, + "grad_norm": 1.2780475616455078, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 22190 + }, + { + "epoch": 146.05263157894737, + "grad_norm": 1.600659728050232, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 22200 + }, + { + "epoch": 146.1184210526316, + "grad_norm": 1.0254467725753784, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 22210 + }, + { + "epoch": 146.18421052631578, + "grad_norm": 1.2700848579406738, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 22220 + }, + { + "epoch": 146.25, + "grad_norm": 1.2667077779769897, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 22230 + }, + { + "epoch": 146.31578947368422, + "grad_norm": 1.057997703552246, + "learning_rate": 0.0001, + "loss": 0.0222, + "step": 22240 + }, + { + "epoch": 146.3815789473684, + "grad_norm": 1.3093483448028564, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 22250 + }, + { + "epoch": 146.44736842105263, + "grad_norm": 1.0066701173782349, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 22260 + }, + { + "epoch": 146.51315789473685, + "grad_norm": 1.212918758392334, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 22270 + }, + { + "epoch": 146.57894736842104, + "grad_norm": 1.3692690134048462, + "learning_rate": 0.0001, + "loss": 0.0236, + "step": 22280 + }, + { + "epoch": 146.64473684210526, + "grad_norm": 1.1842776536941528, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 22290 + }, + { + "epoch": 146.71052631578948, + "grad_norm": 1.0390976667404175, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 22300 + }, + { + "epoch": 146.77631578947367, + "grad_norm": 0.8841727375984192, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 22310 + }, + { + "epoch": 146.8421052631579, + "grad_norm": 1.2545442581176758, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 22320 + }, + { + "epoch": 146.9078947368421, + "grad_norm": 1.3116580247879028, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 22330 + }, + { + "epoch": 146.97368421052633, + "grad_norm": 0.9272478222846985, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 22340 + }, + { + "epoch": 147.03947368421052, + "grad_norm": 1.2526774406433105, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 22350 + }, + { + "epoch": 147.10526315789474, + "grad_norm": 1.276404857635498, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 22360 + }, + { + "epoch": 147.17105263157896, + "grad_norm": 1.035636305809021, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 22370 + }, + { + "epoch": 147.23684210526315, + "grad_norm": 1.1112178564071655, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 22380 + }, + { + "epoch": 147.30263157894737, + "grad_norm": 1.3240015506744385, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 22390 + }, + { + "epoch": 147.3684210526316, + "grad_norm": 1.081715703010559, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 22400 + }, + { + "epoch": 147.43421052631578, + "grad_norm": 1.243489384651184, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 22410 + }, + { + "epoch": 147.5, + "grad_norm": 1.5017218589782715, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 22420 + }, + { + "epoch": 147.56578947368422, + "grad_norm": 0.9764308929443359, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 22430 + }, + { + "epoch": 147.6315789473684, + "grad_norm": 1.4389960765838623, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 22440 + }, + { + "epoch": 147.69736842105263, + "grad_norm": 1.4686914682388306, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 22450 + }, + { + "epoch": 147.76315789473685, + "grad_norm": 1.3333423137664795, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 22460 + }, + { + "epoch": 147.82894736842104, + "grad_norm": 1.3306684494018555, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 22470 + }, + { + "epoch": 147.89473684210526, + "grad_norm": 1.4141942262649536, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 22480 + }, + { + "epoch": 147.96052631578948, + "grad_norm": 1.542427659034729, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 22490 + }, + { + "epoch": 148.02631578947367, + "grad_norm": 1.7996158599853516, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 22500 + }, + { + "epoch": 148.0921052631579, + "grad_norm": 0.9638149738311768, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 22510 + }, + { + "epoch": 148.1578947368421, + "grad_norm": 1.074876070022583, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 22520 + }, + { + "epoch": 148.22368421052633, + "grad_norm": 1.0280288457870483, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 22530 + }, + { + "epoch": 148.28947368421052, + "grad_norm": 1.0511701107025146, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 22540 + }, + { + "epoch": 148.35526315789474, + "grad_norm": 1.678219199180603, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 22550 + }, + { + "epoch": 148.42105263157896, + "grad_norm": 1.0990034341812134, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 22560 + }, + { + "epoch": 148.48684210526315, + "grad_norm": 1.601233720779419, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 22570 + }, + { + "epoch": 148.55263157894737, + "grad_norm": 1.4822688102722168, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 22580 + }, + { + "epoch": 148.6184210526316, + "grad_norm": 1.492310881614685, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 22590 + }, + { + "epoch": 148.68421052631578, + "grad_norm": 1.3018604516983032, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 22600 + }, + { + "epoch": 148.75, + "grad_norm": 1.4880129098892212, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 22610 + }, + { + "epoch": 148.81578947368422, + "grad_norm": 1.6518675088882446, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 22620 + }, + { + "epoch": 148.8815789473684, + "grad_norm": 1.6061004400253296, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 22630 + }, + { + "epoch": 148.94736842105263, + "grad_norm": 1.5879257917404175, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 22640 + }, + { + "epoch": 149.01315789473685, + "grad_norm": 1.7779239416122437, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 22650 + }, + { + "epoch": 149.07894736842104, + "grad_norm": 1.654737949371338, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 22660 + }, + { + "epoch": 149.14473684210526, + "grad_norm": 1.3545973300933838, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 22670 + }, + { + "epoch": 149.21052631578948, + "grad_norm": 1.3350344896316528, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 22680 + }, + { + "epoch": 149.27631578947367, + "grad_norm": 1.253635287284851, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 22690 + }, + { + "epoch": 149.3421052631579, + "grad_norm": 1.4453550577163696, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 22700 + }, + { + "epoch": 149.4078947368421, + "grad_norm": 1.421644926071167, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 22710 + }, + { + "epoch": 149.47368421052633, + "grad_norm": 1.5055420398712158, + "learning_rate": 0.0001, + "loss": 0.0194, + "step": 22720 + }, + { + "epoch": 149.53947368421052, + "grad_norm": 1.4135552644729614, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 22730 + }, + { + "epoch": 149.60526315789474, + "grad_norm": 1.5352897644042969, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 22740 + }, + { + "epoch": 149.67105263157896, + "grad_norm": 1.561647891998291, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 22750 + }, + { + "epoch": 149.73684210526315, + "grad_norm": 1.3064342737197876, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 22760 + }, + { + "epoch": 149.80263157894737, + "grad_norm": 1.1616345643997192, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 22770 + }, + { + "epoch": 149.8684210526316, + "grad_norm": 1.3709357976913452, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 22780 + }, + { + "epoch": 149.93421052631578, + "grad_norm": 1.3530012369155884, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 22790 + }, + { + "epoch": 150.0, + "grad_norm": 1.3412154912948608, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 22800 + }, + { + "epoch": 150.06578947368422, + "grad_norm": 1.2247885465621948, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 22810 + }, + { + "epoch": 150.1315789473684, + "grad_norm": 1.5520859956741333, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 22820 + }, + { + "epoch": 150.19736842105263, + "grad_norm": 1.8670061826705933, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 22830 + }, + { + "epoch": 150.26315789473685, + "grad_norm": 2.187220573425293, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 22840 + }, + { + "epoch": 150.32894736842104, + "grad_norm": 1.9027713537216187, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 22850 + }, + { + "epoch": 150.39473684210526, + "grad_norm": 1.7409223318099976, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 22860 + }, + { + "epoch": 150.46052631578948, + "grad_norm": 1.7857537269592285, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 22870 + }, + { + "epoch": 150.52631578947367, + "grad_norm": 1.6792347431182861, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 22880 + }, + { + "epoch": 150.5921052631579, + "grad_norm": 1.2226988077163696, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 22890 + }, + { + "epoch": 150.6578947368421, + "grad_norm": 2.2091164588928223, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 22900 + }, + { + "epoch": 150.72368421052633, + "grad_norm": 2.2626655101776123, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 22910 + }, + { + "epoch": 150.78947368421052, + "grad_norm": 2.42525315284729, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 22920 + }, + { + "epoch": 150.85526315789474, + "grad_norm": 1.7006795406341553, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 22930 + }, + { + "epoch": 150.92105263157896, + "grad_norm": 1.7463268041610718, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 22940 + }, + { + "epoch": 150.98684210526315, + "grad_norm": 1.5665723085403442, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 22950 + }, + { + "epoch": 151.05263157894737, + "grad_norm": 1.7291148900985718, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 22960 + }, + { + "epoch": 151.1184210526316, + "grad_norm": 1.3778690099716187, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 22970 + }, + { + "epoch": 151.18421052631578, + "grad_norm": 1.612257480621338, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 22980 + }, + { + "epoch": 151.25, + "grad_norm": 1.3075007200241089, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 22990 + }, + { + "epoch": 151.31578947368422, + "grad_norm": 1.2902040481567383, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 23000 + }, + { + "epoch": 151.3815789473684, + "grad_norm": 1.3315701484680176, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 23010 + }, + { + "epoch": 151.44736842105263, + "grad_norm": 1.0458896160125732, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 23020 + }, + { + "epoch": 151.51315789473685, + "grad_norm": 1.1520403623580933, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 23030 + }, + { + "epoch": 151.57894736842104, + "grad_norm": 1.5853742361068726, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 23040 + }, + { + "epoch": 151.64473684210526, + "grad_norm": 1.2538849115371704, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 23050 + }, + { + "epoch": 151.71052631578948, + "grad_norm": 1.6762452125549316, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 23060 + }, + { + "epoch": 151.77631578947367, + "grad_norm": 1.7446788549423218, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 23070 + }, + { + "epoch": 151.8421052631579, + "grad_norm": 0.9342503547668457, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 23080 + }, + { + "epoch": 151.9078947368421, + "grad_norm": 1.1515876054763794, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 23090 + }, + { + "epoch": 151.97368421052633, + "grad_norm": 1.6669334173202515, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 23100 + }, + { + "epoch": 152.03947368421052, + "grad_norm": 1.5257030725479126, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 23110 + }, + { + "epoch": 152.10526315789474, + "grad_norm": 1.6215834617614746, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 23120 + }, + { + "epoch": 152.17105263157896, + "grad_norm": 1.7707445621490479, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 23130 + }, + { + "epoch": 152.23684210526315, + "grad_norm": 1.1109079122543335, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 23140 + }, + { + "epoch": 152.30263157894737, + "grad_norm": 1.362051010131836, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 23150 + }, + { + "epoch": 152.3684210526316, + "grad_norm": 1.205551028251648, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 23160 + }, + { + "epoch": 152.43421052631578, + "grad_norm": 1.1164907217025757, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 23170 + }, + { + "epoch": 152.5, + "grad_norm": 1.4803227186203003, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 23180 + }, + { + "epoch": 152.56578947368422, + "grad_norm": 1.282638430595398, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 23190 + }, + { + "epoch": 152.6315789473684, + "grad_norm": 1.3923507928848267, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 23200 + }, + { + "epoch": 152.69736842105263, + "grad_norm": 1.3010979890823364, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 23210 + }, + { + "epoch": 152.76315789473685, + "grad_norm": 1.3184207677841187, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 23220 + }, + { + "epoch": 152.82894736842104, + "grad_norm": 1.2217152118682861, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 23230 + }, + { + "epoch": 152.89473684210526, + "grad_norm": 3.6102325916290283, + "learning_rate": 0.0001, + "loss": 0.0229, + "step": 23240 + }, + { + "epoch": 152.96052631578948, + "grad_norm": 1.7692070007324219, + "learning_rate": 0.0001, + "loss": 0.0213, + "step": 23250 + }, + { + "epoch": 153.02631578947367, + "grad_norm": 1.6135303974151611, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 23260 + }, + { + "epoch": 153.0921052631579, + "grad_norm": 1.7957147359848022, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 23270 + }, + { + "epoch": 153.1578947368421, + "grad_norm": 1.8844883441925049, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 23280 + }, + { + "epoch": 153.22368421052633, + "grad_norm": 1.8668259382247925, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 23290 + }, + { + "epoch": 153.28947368421052, + "grad_norm": 1.8679157495498657, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 23300 + }, + { + "epoch": 153.35526315789474, + "grad_norm": 1.436536192893982, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 23310 + }, + { + "epoch": 153.42105263157896, + "grad_norm": 1.4835171699523926, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 23320 + }, + { + "epoch": 153.48684210526315, + "grad_norm": 1.3552966117858887, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 23330 + }, + { + "epoch": 153.55263157894737, + "grad_norm": 1.5863488912582397, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 23340 + }, + { + "epoch": 153.6184210526316, + "grad_norm": 1.2556802034378052, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 23350 + }, + { + "epoch": 153.68421052631578, + "grad_norm": 1.7405736446380615, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 23360 + }, + { + "epoch": 153.75, + "grad_norm": 1.4233875274658203, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 23370 + }, + { + "epoch": 153.81578947368422, + "grad_norm": 1.3720194101333618, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 23380 + }, + { + "epoch": 153.8815789473684, + "grad_norm": 1.481738567352295, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 23390 + }, + { + "epoch": 153.94736842105263, + "grad_norm": 1.8668274879455566, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 23400 + }, + { + "epoch": 154.01315789473685, + "grad_norm": 1.2795329093933105, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 23410 + }, + { + "epoch": 154.07894736842104, + "grad_norm": 1.5393232107162476, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 23420 + }, + { + "epoch": 154.14473684210526, + "grad_norm": 1.5710614919662476, + "learning_rate": 0.0001, + "loss": 0.021, + "step": 23430 + }, + { + "epoch": 154.21052631578948, + "grad_norm": 1.5256733894348145, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 23440 + }, + { + "epoch": 154.27631578947367, + "grad_norm": 1.4345033168792725, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 23450 + }, + { + "epoch": 154.3421052631579, + "grad_norm": 1.4099990129470825, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 23460 + }, + { + "epoch": 154.4078947368421, + "grad_norm": 1.6725177764892578, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 23470 + }, + { + "epoch": 154.47368421052633, + "grad_norm": 1.6998838186264038, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 23480 + }, + { + "epoch": 154.53947368421052, + "grad_norm": 1.105302095413208, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 23490 + }, + { + "epoch": 154.60526315789474, + "grad_norm": 1.0682851076126099, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 23500 + }, + { + "epoch": 154.67105263157896, + "grad_norm": 1.4762492179870605, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 23510 + }, + { + "epoch": 154.73684210526315, + "grad_norm": 1.3171006441116333, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 23520 + }, + { + "epoch": 154.80263157894737, + "grad_norm": 1.1788697242736816, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 23530 + }, + { + "epoch": 154.8684210526316, + "grad_norm": 1.425657868385315, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 23540 + }, + { + "epoch": 154.93421052631578, + "grad_norm": 1.547235369682312, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 23550 + }, + { + "epoch": 155.0, + "grad_norm": 1.6571357250213623, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 23560 + }, + { + "epoch": 155.06578947368422, + "grad_norm": 3.123242139816284, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 23570 + }, + { + "epoch": 155.1315789473684, + "grad_norm": 1.6413636207580566, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 23580 + }, + { + "epoch": 155.19736842105263, + "grad_norm": 1.3690893650054932, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 23590 + }, + { + "epoch": 155.26315789473685, + "grad_norm": 1.3904107809066772, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 23600 + }, + { + "epoch": 155.32894736842104, + "grad_norm": 1.816664457321167, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 23610 + }, + { + "epoch": 155.39473684210526, + "grad_norm": 1.4407306909561157, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 23620 + }, + { + "epoch": 155.46052631578948, + "grad_norm": 1.6005239486694336, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 23630 + }, + { + "epoch": 155.52631578947367, + "grad_norm": 1.4299267530441284, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 23640 + }, + { + "epoch": 155.5921052631579, + "grad_norm": 1.0841288566589355, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 23650 + }, + { + "epoch": 155.6578947368421, + "grad_norm": 1.3383357524871826, + "learning_rate": 0.0001, + "loss": 0.021, + "step": 23660 + }, + { + "epoch": 155.72368421052633, + "grad_norm": 1.0255568027496338, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 23670 + }, + { + "epoch": 155.78947368421052, + "grad_norm": 1.336609959602356, + "learning_rate": 0.0001, + "loss": 0.0213, + "step": 23680 + }, + { + "epoch": 155.85526315789474, + "grad_norm": 1.3863122463226318, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 23690 + }, + { + "epoch": 155.92105263157896, + "grad_norm": 1.3259609937667847, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 23700 + }, + { + "epoch": 155.98684210526315, + "grad_norm": 1.0066629648208618, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 23710 + }, + { + "epoch": 156.05263157894737, + "grad_norm": 1.1141328811645508, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 23720 + }, + { + "epoch": 156.1184210526316, + "grad_norm": 1.0179667472839355, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 23730 + }, + { + "epoch": 156.18421052631578, + "grad_norm": 1.498150110244751, + "learning_rate": 0.0001, + "loss": 0.023, + "step": 23740 + }, + { + "epoch": 156.25, + "grad_norm": 1.844730257987976, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 23750 + }, + { + "epoch": 156.31578947368422, + "grad_norm": 1.3886311054229736, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 23760 + }, + { + "epoch": 156.3815789473684, + "grad_norm": 1.3924232721328735, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 23770 + }, + { + "epoch": 156.44736842105263, + "grad_norm": 1.7710256576538086, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 23780 + }, + { + "epoch": 156.51315789473685, + "grad_norm": 1.4801042079925537, + "learning_rate": 0.0001, + "loss": 0.0194, + "step": 23790 + }, + { + "epoch": 156.57894736842104, + "grad_norm": 1.1061052083969116, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 23800 + }, + { + "epoch": 156.64473684210526, + "grad_norm": 1.3435893058776855, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 23810 + }, + { + "epoch": 156.71052631578948, + "grad_norm": 1.2930083274841309, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 23820 + }, + { + "epoch": 156.77631578947367, + "grad_norm": 1.1935070753097534, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 23830 + }, + { + "epoch": 156.8421052631579, + "grad_norm": 1.4816370010375977, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 23840 + }, + { + "epoch": 156.9078947368421, + "grad_norm": 1.39240300655365, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 23850 + }, + { + "epoch": 156.97368421052633, + "grad_norm": 1.3462928533554077, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 23860 + }, + { + "epoch": 157.03947368421052, + "grad_norm": 1.2192853689193726, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 23870 + }, + { + "epoch": 157.10526315789474, + "grad_norm": 1.2664424180984497, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 23880 + }, + { + "epoch": 157.17105263157896, + "grad_norm": 1.3305250406265259, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 23890 + }, + { + "epoch": 157.23684210526315, + "grad_norm": 1.512176513671875, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 23900 + }, + { + "epoch": 157.30263157894737, + "grad_norm": 1.5272941589355469, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 23910 + }, + { + "epoch": 157.3684210526316, + "grad_norm": 1.281473994255066, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 23920 + }, + { + "epoch": 157.43421052631578, + "grad_norm": 1.4534245729446411, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 23930 + }, + { + "epoch": 157.5, + "grad_norm": 1.065725564956665, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 23940 + }, + { + "epoch": 157.56578947368422, + "grad_norm": 1.2938498258590698, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 23950 + }, + { + "epoch": 157.6315789473684, + "grad_norm": 1.5139905214309692, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 23960 + }, + { + "epoch": 157.69736842105263, + "grad_norm": 1.1256234645843506, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 23970 + }, + { + "epoch": 157.76315789473685, + "grad_norm": 1.3171970844268799, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 23980 + }, + { + "epoch": 157.82894736842104, + "grad_norm": 1.7550626993179321, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 23990 + }, + { + "epoch": 157.89473684210526, + "grad_norm": 1.2105461359024048, + "learning_rate": 0.0001, + "loss": 0.0194, + "step": 24000 + }, + { + "epoch": 157.96052631578948, + "grad_norm": 1.7495108842849731, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 24010 + }, + { + "epoch": 158.02631578947367, + "grad_norm": 1.5057815313339233, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 24020 + }, + { + "epoch": 158.0921052631579, + "grad_norm": 1.364128828048706, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 24030 + }, + { + "epoch": 158.1578947368421, + "grad_norm": 1.6452478170394897, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 24040 + }, + { + "epoch": 158.22368421052633, + "grad_norm": 1.2079576253890991, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 24050 + }, + { + "epoch": 158.28947368421052, + "grad_norm": 1.1615091562271118, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 24060 + }, + { + "epoch": 158.35526315789474, + "grad_norm": 1.7258861064910889, + "learning_rate": 0.0001, + "loss": 0.021, + "step": 24070 + }, + { + "epoch": 158.42105263157896, + "grad_norm": 1.5624957084655762, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 24080 + }, + { + "epoch": 158.48684210526315, + "grad_norm": 1.3713676929473877, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 24090 + }, + { + "epoch": 158.55263157894737, + "grad_norm": 0.9600792527198792, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 24100 + }, + { + "epoch": 158.6184210526316, + "grad_norm": 1.1764682531356812, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 24110 + }, + { + "epoch": 158.68421052631578, + "grad_norm": 0.8807045221328735, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 24120 + }, + { + "epoch": 158.75, + "grad_norm": 1.495734691619873, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 24130 + }, + { + "epoch": 158.81578947368422, + "grad_norm": 1.2996577024459839, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 24140 + }, + { + "epoch": 158.8815789473684, + "grad_norm": 1.3335479497909546, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 24150 + }, + { + "epoch": 158.94736842105263, + "grad_norm": 1.3486828804016113, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 24160 + }, + { + "epoch": 159.01315789473685, + "grad_norm": 1.1436874866485596, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 24170 + }, + { + "epoch": 159.07894736842104, + "grad_norm": 1.0566798448562622, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 24180 + }, + { + "epoch": 159.14473684210526, + "grad_norm": 1.3232210874557495, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 24190 + }, + { + "epoch": 159.21052631578948, + "grad_norm": 1.1854642629623413, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 24200 + }, + { + "epoch": 159.27631578947367, + "grad_norm": 1.3037360906600952, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 24210 + }, + { + "epoch": 159.3421052631579, + "grad_norm": 1.1624709367752075, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 24220 + }, + { + "epoch": 159.4078947368421, + "grad_norm": 1.279187798500061, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 24230 + }, + { + "epoch": 159.47368421052633, + "grad_norm": 1.1820547580718994, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 24240 + }, + { + "epoch": 159.53947368421052, + "grad_norm": 1.3856960535049438, + "learning_rate": 0.0001, + "loss": 0.0207, + "step": 24250 + }, + { + "epoch": 159.60526315789474, + "grad_norm": 1.538688063621521, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 24260 + }, + { + "epoch": 159.67105263157896, + "grad_norm": 1.3758467435836792, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 24270 + }, + { + "epoch": 159.73684210526315, + "grad_norm": 1.637129306793213, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 24280 + }, + { + "epoch": 159.80263157894737, + "grad_norm": 1.3896411657333374, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 24290 + }, + { + "epoch": 159.8684210526316, + "grad_norm": 1.1721456050872803, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 24300 + }, + { + "epoch": 159.93421052631578, + "grad_norm": 1.9932554960250854, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 24310 + }, + { + "epoch": 160.0, + "grad_norm": 1.3891863822937012, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 24320 + }, + { + "epoch": 160.06578947368422, + "grad_norm": 1.4516315460205078, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 24330 + }, + { + "epoch": 160.1315789473684, + "grad_norm": 1.426121473312378, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 24340 + }, + { + "epoch": 160.19736842105263, + "grad_norm": 1.5220839977264404, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 24350 + }, + { + "epoch": 160.26315789473685, + "grad_norm": 1.4482239484786987, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 24360 + }, + { + "epoch": 160.32894736842104, + "grad_norm": 1.5488955974578857, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 24370 + }, + { + "epoch": 160.39473684210526, + "grad_norm": 1.3807427883148193, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 24380 + }, + { + "epoch": 160.46052631578948, + "grad_norm": 1.0160894393920898, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 24390 + }, + { + "epoch": 160.52631578947367, + "grad_norm": 1.2377235889434814, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 24400 + }, + { + "epoch": 160.5921052631579, + "grad_norm": 0.987001895904541, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 24410 + }, + { + "epoch": 160.6578947368421, + "grad_norm": 1.317531704902649, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 24420 + }, + { + "epoch": 160.72368421052633, + "grad_norm": 1.521753191947937, + "learning_rate": 0.0001, + "loss": 0.0216, + "step": 24430 + }, + { + "epoch": 160.78947368421052, + "grad_norm": 1.166321039199829, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 24440 + }, + { + "epoch": 160.85526315789474, + "grad_norm": 1.1440471410751343, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 24450 + }, + { + "epoch": 160.92105263157896, + "grad_norm": 1.3936820030212402, + "learning_rate": 0.0001, + "loss": 0.0218, + "step": 24460 + }, + { + "epoch": 160.98684210526315, + "grad_norm": 1.1521437168121338, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 24470 + }, + { + "epoch": 161.05263157894737, + "grad_norm": 1.0062541961669922, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 24480 + }, + { + "epoch": 161.1184210526316, + "grad_norm": 1.126672625541687, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 24490 + }, + { + "epoch": 161.18421052631578, + "grad_norm": 0.8864695429801941, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 24500 + }, + { + "epoch": 161.25, + "grad_norm": 1.3956273794174194, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 24510 + }, + { + "epoch": 161.31578947368422, + "grad_norm": 1.4042059183120728, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 24520 + }, + { + "epoch": 161.3815789473684, + "grad_norm": 1.1768255233764648, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 24530 + }, + { + "epoch": 161.44736842105263, + "grad_norm": 1.468287467956543, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 24540 + }, + { + "epoch": 161.51315789473685, + "grad_norm": 1.4978389739990234, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 24550 + }, + { + "epoch": 161.57894736842104, + "grad_norm": 0.9304846525192261, + "learning_rate": 0.0001, + "loss": 0.0216, + "step": 24560 + }, + { + "epoch": 161.64473684210526, + "grad_norm": 1.3616132736206055, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 24570 + }, + { + "epoch": 161.71052631578948, + "grad_norm": 1.0822436809539795, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 24580 + }, + { + "epoch": 161.77631578947367, + "grad_norm": 1.1985223293304443, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 24590 + }, + { + "epoch": 161.8421052631579, + "grad_norm": 1.1661827564239502, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 24600 + }, + { + "epoch": 161.9078947368421, + "grad_norm": 1.2504955530166626, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 24610 + }, + { + "epoch": 161.97368421052633, + "grad_norm": 1.2397123575210571, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 24620 + }, + { + "epoch": 162.03947368421052, + "grad_norm": 1.0715943574905396, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 24630 + }, + { + "epoch": 162.10526315789474, + "grad_norm": 1.1244536638259888, + "learning_rate": 0.0001, + "loss": 0.021, + "step": 24640 + }, + { + "epoch": 162.17105263157896, + "grad_norm": 1.3353065252304077, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 24650 + }, + { + "epoch": 162.23684210526315, + "grad_norm": 1.408204436302185, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 24660 + }, + { + "epoch": 162.30263157894737, + "grad_norm": 1.3387621641159058, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 24670 + }, + { + "epoch": 162.3684210526316, + "grad_norm": 1.7283587455749512, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 24680 + }, + { + "epoch": 162.43421052631578, + "grad_norm": 1.5374019145965576, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 24690 + }, + { + "epoch": 162.5, + "grad_norm": 1.2795945405960083, + "learning_rate": 0.0001, + "loss": 0.0213, + "step": 24700 + }, + { + "epoch": 162.56578947368422, + "grad_norm": 1.537787675857544, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 24710 + }, + { + "epoch": 162.6315789473684, + "grad_norm": 1.3099159002304077, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 24720 + }, + { + "epoch": 162.69736842105263, + "grad_norm": 1.4521063566207886, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 24730 + }, + { + "epoch": 162.76315789473685, + "grad_norm": 1.0471484661102295, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 24740 + }, + { + "epoch": 162.82894736842104, + "grad_norm": 1.6101007461547852, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 24750 + }, + { + "epoch": 162.89473684210526, + "grad_norm": 1.3530237674713135, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 24760 + }, + { + "epoch": 162.96052631578948, + "grad_norm": 1.4511295557022095, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 24770 + }, + { + "epoch": 163.02631578947367, + "grad_norm": 1.6022499799728394, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 24780 + }, + { + "epoch": 163.0921052631579, + "grad_norm": 1.4935600757598877, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 24790 + }, + { + "epoch": 163.1578947368421, + "grad_norm": 1.327233910560608, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 24800 + }, + { + "epoch": 163.22368421052633, + "grad_norm": 1.6861796379089355, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 24810 + }, + { + "epoch": 163.28947368421052, + "grad_norm": 1.4635359048843384, + "learning_rate": 0.0001, + "loss": 0.0224, + "step": 24820 + }, + { + "epoch": 163.35526315789474, + "grad_norm": 1.0945830345153809, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 24830 + }, + { + "epoch": 163.42105263157896, + "grad_norm": 1.359348177909851, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 24840 + }, + { + "epoch": 163.48684210526315, + "grad_norm": 1.269120216369629, + "learning_rate": 0.0001, + "loss": 0.0194, + "step": 24850 + }, + { + "epoch": 163.55263157894737, + "grad_norm": 1.2218973636627197, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 24860 + }, + { + "epoch": 163.6184210526316, + "grad_norm": 1.1602321863174438, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 24870 + }, + { + "epoch": 163.68421052631578, + "grad_norm": 1.5064598321914673, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 24880 + }, + { + "epoch": 163.75, + "grad_norm": 1.5285717248916626, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 24890 + }, + { + "epoch": 163.81578947368422, + "grad_norm": 1.0176746845245361, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 24900 + }, + { + "epoch": 163.8815789473684, + "grad_norm": 1.0195715427398682, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 24910 + }, + { + "epoch": 163.94736842105263, + "grad_norm": 1.253736972808838, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 24920 + }, + { + "epoch": 164.01315789473685, + "grad_norm": 1.370121717453003, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 24930 + }, + { + "epoch": 164.07894736842104, + "grad_norm": 1.2714835405349731, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 24940 + }, + { + "epoch": 164.14473684210526, + "grad_norm": 1.4336186647415161, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 24950 + }, + { + "epoch": 164.21052631578948, + "grad_norm": 1.3978872299194336, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 24960 + }, + { + "epoch": 164.27631578947367, + "grad_norm": 1.19012451171875, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 24970 + }, + { + "epoch": 164.3421052631579, + "grad_norm": 1.0551053285598755, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 24980 + }, + { + "epoch": 164.4078947368421, + "grad_norm": 1.8229308128356934, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 24990 + }, + { + "epoch": 164.47368421052633, + "grad_norm": 0.7936663627624512, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 25000 + }, + { + "epoch": 164.53947368421052, + "grad_norm": 1.1978473663330078, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 25010 + }, + { + "epoch": 164.60526315789474, + "grad_norm": 1.305327296257019, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 25020 + }, + { + "epoch": 164.67105263157896, + "grad_norm": 1.2756356000900269, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 25030 + }, + { + "epoch": 164.73684210526315, + "grad_norm": 0.8239328861236572, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 25040 + }, + { + "epoch": 164.80263157894737, + "grad_norm": 1.0603528022766113, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 25050 + }, + { + "epoch": 164.8684210526316, + "grad_norm": 1.0184378623962402, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 25060 + }, + { + "epoch": 164.93421052631578, + "grad_norm": 1.0379877090454102, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 25070 + }, + { + "epoch": 165.0, + "grad_norm": 1.1607532501220703, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 25080 + }, + { + "epoch": 165.06578947368422, + "grad_norm": 1.4055289030075073, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 25090 + }, + { + "epoch": 165.1315789473684, + "grad_norm": 1.3009381294250488, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 25100 + }, + { + "epoch": 165.19736842105263, + "grad_norm": 1.4090827703475952, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 25110 + }, + { + "epoch": 165.26315789473685, + "grad_norm": 1.2835659980773926, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 25120 + }, + { + "epoch": 165.32894736842104, + "grad_norm": 1.6038185358047485, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 25130 + }, + { + "epoch": 165.39473684210526, + "grad_norm": 1.2217997312545776, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 25140 + }, + { + "epoch": 165.46052631578948, + "grad_norm": 1.1289339065551758, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 25150 + }, + { + "epoch": 165.52631578947367, + "grad_norm": 1.2177705764770508, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 25160 + }, + { + "epoch": 165.5921052631579, + "grad_norm": 1.6059798002243042, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 25170 + }, + { + "epoch": 165.6578947368421, + "grad_norm": 1.921615481376648, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 25180 + }, + { + "epoch": 165.72368421052633, + "grad_norm": 1.20645010471344, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 25190 + }, + { + "epoch": 165.78947368421052, + "grad_norm": 1.2785398960113525, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 25200 + }, + { + "epoch": 165.85526315789474, + "grad_norm": 1.6781924962997437, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 25210 + }, + { + "epoch": 165.92105263157896, + "grad_norm": 0.9301381707191467, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 25220 + }, + { + "epoch": 165.98684210526315, + "grad_norm": 1.2392312288284302, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 25230 + }, + { + "epoch": 166.05263157894737, + "grad_norm": 1.5881590843200684, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 25240 + }, + { + "epoch": 166.1184210526316, + "grad_norm": 1.0536082983016968, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 25250 + }, + { + "epoch": 166.18421052631578, + "grad_norm": 1.475879192352295, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 25260 + }, + { + "epoch": 166.25, + "grad_norm": 1.0223356485366821, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 25270 + }, + { + "epoch": 166.31578947368422, + "grad_norm": 1.4303029775619507, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 25280 + }, + { + "epoch": 166.3815789473684, + "grad_norm": 1.3278456926345825, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 25290 + }, + { + "epoch": 166.44736842105263, + "grad_norm": 1.6226589679718018, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 25300 + }, + { + "epoch": 166.51315789473685, + "grad_norm": 1.4535454511642456, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 25310 + }, + { + "epoch": 166.57894736842104, + "grad_norm": 1.3479703664779663, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 25320 + }, + { + "epoch": 166.64473684210526, + "grad_norm": 1.2640882730484009, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 25330 + }, + { + "epoch": 166.71052631578948, + "grad_norm": 0.9760680198669434, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 25340 + }, + { + "epoch": 166.77631578947367, + "grad_norm": 1.1063249111175537, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 25350 + }, + { + "epoch": 166.8421052631579, + "grad_norm": 1.4282013177871704, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 25360 + }, + { + "epoch": 166.9078947368421, + "grad_norm": 1.7794874906539917, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 25370 + }, + { + "epoch": 166.97368421052633, + "grad_norm": 1.1673434972763062, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 25380 + }, + { + "epoch": 167.03947368421052, + "grad_norm": 1.1143444776535034, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 25390 + }, + { + "epoch": 167.10526315789474, + "grad_norm": 1.4228686094284058, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 25400 + }, + { + "epoch": 167.17105263157896, + "grad_norm": 1.154889464378357, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 25410 + }, + { + "epoch": 167.23684210526315, + "grad_norm": 1.4954147338867188, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 25420 + }, + { + "epoch": 167.30263157894737, + "grad_norm": 1.0341747999191284, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 25430 + }, + { + "epoch": 167.3684210526316, + "grad_norm": 1.333762526512146, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 25440 + }, + { + "epoch": 167.43421052631578, + "grad_norm": 1.4361519813537598, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 25450 + }, + { + "epoch": 167.5, + "grad_norm": 1.2971889972686768, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 25460 + }, + { + "epoch": 167.56578947368422, + "grad_norm": 1.3549476861953735, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 25470 + }, + { + "epoch": 167.6315789473684, + "grad_norm": 1.283470630645752, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 25480 + }, + { + "epoch": 167.69736842105263, + "grad_norm": 1.2950304746627808, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 25490 + }, + { + "epoch": 167.76315789473685, + "grad_norm": 1.133094072341919, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 25500 + }, + { + "epoch": 167.82894736842104, + "grad_norm": 1.097690463066101, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 25510 + }, + { + "epoch": 167.89473684210526, + "grad_norm": 1.6458208560943604, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 25520 + }, + { + "epoch": 167.96052631578948, + "grad_norm": 1.4381608963012695, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 25530 + }, + { + "epoch": 168.02631578947367, + "grad_norm": 1.225541591644287, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 25540 + }, + { + "epoch": 168.0921052631579, + "grad_norm": 1.082775592803955, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 25550 + }, + { + "epoch": 168.1578947368421, + "grad_norm": 1.2318446636199951, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 25560 + }, + { + "epoch": 168.22368421052633, + "grad_norm": 1.5468242168426514, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 25570 + }, + { + "epoch": 168.28947368421052, + "grad_norm": 1.3016541004180908, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 25580 + }, + { + "epoch": 168.35526315789474, + "grad_norm": 1.2177584171295166, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 25590 + }, + { + "epoch": 168.42105263157896, + "grad_norm": 1.384822130203247, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 25600 + }, + { + "epoch": 168.48684210526315, + "grad_norm": 1.5428193807601929, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 25610 + }, + { + "epoch": 168.55263157894737, + "grad_norm": 1.5225193500518799, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 25620 + }, + { + "epoch": 168.6184210526316, + "grad_norm": 1.4411053657531738, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 25630 + }, + { + "epoch": 168.68421052631578, + "grad_norm": 1.386864185333252, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 25640 + }, + { + "epoch": 168.75, + "grad_norm": 1.2823785543441772, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 25650 + }, + { + "epoch": 168.81578947368422, + "grad_norm": 1.1967064142227173, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 25660 + }, + { + "epoch": 168.8815789473684, + "grad_norm": 1.3014733791351318, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 25670 + }, + { + "epoch": 168.94736842105263, + "grad_norm": 1.2117063999176025, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 25680 + }, + { + "epoch": 169.01315789473685, + "grad_norm": 1.3568642139434814, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 25690 + }, + { + "epoch": 169.07894736842104, + "grad_norm": 0.8560911417007446, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 25700 + }, + { + "epoch": 169.14473684210526, + "grad_norm": 1.223082184791565, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 25710 + }, + { + "epoch": 169.21052631578948, + "grad_norm": 1.027945637702942, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 25720 + }, + { + "epoch": 169.27631578947367, + "grad_norm": 1.2788870334625244, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 25730 + }, + { + "epoch": 169.3421052631579, + "grad_norm": 1.5068094730377197, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 25740 + }, + { + "epoch": 169.4078947368421, + "grad_norm": 1.1870827674865723, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 25750 + }, + { + "epoch": 169.47368421052633, + "grad_norm": 1.2578105926513672, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 25760 + }, + { + "epoch": 169.53947368421052, + "grad_norm": 1.2269151210784912, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 25770 + }, + { + "epoch": 169.60526315789474, + "grad_norm": 1.482102870941162, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 25780 + }, + { + "epoch": 169.67105263157896, + "grad_norm": 1.2745014429092407, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 25790 + }, + { + "epoch": 169.73684210526315, + "grad_norm": 1.4906076192855835, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 25800 + }, + { + "epoch": 169.80263157894737, + "grad_norm": 0.9935338497161865, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 25810 + }, + { + "epoch": 169.8684210526316, + "grad_norm": 1.6479783058166504, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 25820 + }, + { + "epoch": 169.93421052631578, + "grad_norm": 1.0743275880813599, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 25830 + }, + { + "epoch": 170.0, + "grad_norm": 1.0243290662765503, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 25840 + }, + { + "epoch": 170.06578947368422, + "grad_norm": 1.5142858028411865, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 25850 + }, + { + "epoch": 170.1315789473684, + "grad_norm": 0.9342432618141174, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 25860 + }, + { + "epoch": 170.19736842105263, + "grad_norm": 1.4976930618286133, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 25870 + }, + { + "epoch": 170.26315789473685, + "grad_norm": 1.565292477607727, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 25880 + }, + { + "epoch": 170.32894736842104, + "grad_norm": 1.1127831935882568, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 25890 + }, + { + "epoch": 170.39473684210526, + "grad_norm": 1.198778748512268, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 25900 + }, + { + "epoch": 170.46052631578948, + "grad_norm": 1.2573102712631226, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 25910 + }, + { + "epoch": 170.52631578947367, + "grad_norm": 1.2907289266586304, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 25920 + }, + { + "epoch": 170.5921052631579, + "grad_norm": 1.4278053045272827, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 25930 + }, + { + "epoch": 170.6578947368421, + "grad_norm": 1.3394439220428467, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 25940 + }, + { + "epoch": 170.72368421052633, + "grad_norm": 1.1726481914520264, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 25950 + }, + { + "epoch": 170.78947368421052, + "grad_norm": 1.369882583618164, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 25960 + }, + { + "epoch": 170.85526315789474, + "grad_norm": 1.3367946147918701, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 25970 + }, + { + "epoch": 170.92105263157896, + "grad_norm": 1.3555912971496582, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 25980 + }, + { + "epoch": 170.98684210526315, + "grad_norm": 1.1395307779312134, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 25990 + }, + { + "epoch": 171.05263157894737, + "grad_norm": 1.1588562726974487, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 26000 + }, + { + "epoch": 171.1184210526316, + "grad_norm": 0.930858850479126, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 26010 + }, + { + "epoch": 171.18421052631578, + "grad_norm": 0.9874327778816223, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 26020 + }, + { + "epoch": 171.25, + "grad_norm": 1.0993543863296509, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 26030 + }, + { + "epoch": 171.31578947368422, + "grad_norm": 1.1052964925765991, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 26040 + }, + { + "epoch": 171.3815789473684, + "grad_norm": 1.25705885887146, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 26050 + }, + { + "epoch": 171.44736842105263, + "grad_norm": 0.7609035968780518, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 26060 + }, + { + "epoch": 171.51315789473685, + "grad_norm": 0.779438316822052, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 26070 + }, + { + "epoch": 171.57894736842104, + "grad_norm": 0.9345868229866028, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 26080 + }, + { + "epoch": 171.64473684210526, + "grad_norm": 1.416816234588623, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 26090 + }, + { + "epoch": 171.71052631578948, + "grad_norm": 1.5457994937896729, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 26100 + }, + { + "epoch": 171.77631578947367, + "grad_norm": 1.3715592622756958, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 26110 + }, + { + "epoch": 171.8421052631579, + "grad_norm": 1.3556997776031494, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 26120 + }, + { + "epoch": 171.9078947368421, + "grad_norm": 1.2203997373580933, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 26130 + }, + { + "epoch": 171.97368421052633, + "grad_norm": 1.4606289863586426, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 26140 + }, + { + "epoch": 172.03947368421052, + "grad_norm": 1.5704396963119507, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 26150 + }, + { + "epoch": 172.10526315789474, + "grad_norm": 1.2381999492645264, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 26160 + }, + { + "epoch": 172.17105263157896, + "grad_norm": 1.518921136856079, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 26170 + }, + { + "epoch": 172.23684210526315, + "grad_norm": 1.4631500244140625, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 26180 + }, + { + "epoch": 172.30263157894737, + "grad_norm": 1.0997428894042969, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 26190 + }, + { + "epoch": 172.3684210526316, + "grad_norm": 1.2960116863250732, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 26200 + }, + { + "epoch": 172.43421052631578, + "grad_norm": 1.2885228395462036, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 26210 + }, + { + "epoch": 172.5, + "grad_norm": 1.0690890550613403, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 26220 + }, + { + "epoch": 172.56578947368422, + "grad_norm": 1.4473509788513184, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 26230 + }, + { + "epoch": 172.6315789473684, + "grad_norm": 0.9004012942314148, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 26240 + }, + { + "epoch": 172.69736842105263, + "grad_norm": 1.4914005994796753, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 26250 + }, + { + "epoch": 172.76315789473685, + "grad_norm": 1.30367112159729, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 26260 + }, + { + "epoch": 172.82894736842104, + "grad_norm": 1.3740594387054443, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 26270 + }, + { + "epoch": 172.89473684210526, + "grad_norm": 1.5620779991149902, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 26280 + }, + { + "epoch": 172.96052631578948, + "grad_norm": 1.559910774230957, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 26290 + }, + { + "epoch": 173.02631578947367, + "grad_norm": 1.3926976919174194, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 26300 + }, + { + "epoch": 173.0921052631579, + "grad_norm": 1.1394861936569214, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 26310 + }, + { + "epoch": 173.1578947368421, + "grad_norm": 1.7277958393096924, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 26320 + }, + { + "epoch": 173.22368421052633, + "grad_norm": 1.507736325263977, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 26330 + }, + { + "epoch": 173.28947368421052, + "grad_norm": 1.7787176370620728, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 26340 + }, + { + "epoch": 173.35526315789474, + "grad_norm": 1.5719088315963745, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 26350 + }, + { + "epoch": 173.42105263157896, + "grad_norm": 1.6575725078582764, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 26360 + }, + { + "epoch": 173.48684210526315, + "grad_norm": 1.9089598655700684, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 26370 + }, + { + "epoch": 173.55263157894737, + "grad_norm": 1.8127702474594116, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 26380 + }, + { + "epoch": 173.6184210526316, + "grad_norm": 2.279390573501587, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 26390 + }, + { + "epoch": 173.68421052631578, + "grad_norm": 1.7834361791610718, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 26400 + }, + { + "epoch": 173.75, + "grad_norm": 2.3080971240997314, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 26410 + }, + { + "epoch": 173.81578947368422, + "grad_norm": 1.2840913534164429, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 26420 + }, + { + "epoch": 173.8815789473684, + "grad_norm": 1.8161916732788086, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 26430 + }, + { + "epoch": 173.94736842105263, + "grad_norm": 2.031912088394165, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 26440 + }, + { + "epoch": 174.01315789473685, + "grad_norm": 1.5057018995285034, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 26450 + }, + { + "epoch": 174.07894736842104, + "grad_norm": 1.6998172998428345, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 26460 + }, + { + "epoch": 174.14473684210526, + "grad_norm": 1.6755802631378174, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 26470 + }, + { + "epoch": 174.21052631578948, + "grad_norm": 1.45081627368927, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 26480 + }, + { + "epoch": 174.27631578947367, + "grad_norm": 1.4480472803115845, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 26490 + }, + { + "epoch": 174.3421052631579, + "grad_norm": 1.2978633642196655, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 26500 + }, + { + "epoch": 174.4078947368421, + "grad_norm": 1.4956743717193604, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 26510 + }, + { + "epoch": 174.47368421052633, + "grad_norm": 1.184404969215393, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 26520 + }, + { + "epoch": 174.53947368421052, + "grad_norm": 1.209301233291626, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 26530 + }, + { + "epoch": 174.60526315789474, + "grad_norm": 1.2950125932693481, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 26540 + }, + { + "epoch": 174.67105263157896, + "grad_norm": 1.8026961088180542, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 26550 + }, + { + "epoch": 174.73684210526315, + "grad_norm": 1.2904764413833618, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 26560 + }, + { + "epoch": 174.80263157894737, + "grad_norm": 1.365148663520813, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 26570 + }, + { + "epoch": 174.8684210526316, + "grad_norm": 1.2511751651763916, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 26580 + }, + { + "epoch": 174.93421052631578, + "grad_norm": 1.4057785272598267, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 26590 + }, + { + "epoch": 175.0, + "grad_norm": 1.572277545928955, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 26600 + }, + { + "epoch": 175.06578947368422, + "grad_norm": 1.2321019172668457, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 26610 + }, + { + "epoch": 175.1315789473684, + "grad_norm": 1.2357227802276611, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 26620 + }, + { + "epoch": 175.19736842105263, + "grad_norm": 1.3698261976242065, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 26630 + }, + { + "epoch": 175.26315789473685, + "grad_norm": 1.2478382587432861, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 26640 + }, + { + "epoch": 175.32894736842104, + "grad_norm": 1.074097990989685, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 26650 + }, + { + "epoch": 175.39473684210526, + "grad_norm": 0.874796986579895, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 26660 + }, + { + "epoch": 175.46052631578948, + "grad_norm": 1.2885454893112183, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 26670 + }, + { + "epoch": 175.52631578947367, + "grad_norm": 1.255919337272644, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 26680 + }, + { + "epoch": 175.5921052631579, + "grad_norm": 1.7973171472549438, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 26690 + }, + { + "epoch": 175.6578947368421, + "grad_norm": 1.4382481575012207, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 26700 + }, + { + "epoch": 175.72368421052633, + "grad_norm": 1.3824963569641113, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 26710 + }, + { + "epoch": 175.78947368421052, + "grad_norm": 1.2797788381576538, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 26720 + }, + { + "epoch": 175.85526315789474, + "grad_norm": 1.3257101774215698, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 26730 + }, + { + "epoch": 175.92105263157896, + "grad_norm": 1.3095109462738037, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 26740 + }, + { + "epoch": 175.98684210526315, + "grad_norm": 1.2997289896011353, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 26750 + }, + { + "epoch": 176.05263157894737, + "grad_norm": 1.637865662574768, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 26760 + }, + { + "epoch": 176.1184210526316, + "grad_norm": 1.384114384651184, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 26770 + }, + { + "epoch": 176.18421052631578, + "grad_norm": 1.4303343296051025, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 26780 + }, + { + "epoch": 176.25, + "grad_norm": 1.5146687030792236, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 26790 + }, + { + "epoch": 176.31578947368422, + "grad_norm": 1.5877227783203125, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 26800 + }, + { + "epoch": 176.3815789473684, + "grad_norm": 1.3442049026489258, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 26810 + }, + { + "epoch": 176.44736842105263, + "grad_norm": 1.266574740409851, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 26820 + }, + { + "epoch": 176.51315789473685, + "grad_norm": 1.4955312013626099, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 26830 + }, + { + "epoch": 176.57894736842104, + "grad_norm": 0.9446787238121033, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 26840 + }, + { + "epoch": 176.64473684210526, + "grad_norm": 0.9993739724159241, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 26850 + }, + { + "epoch": 176.71052631578948, + "grad_norm": 1.365684151649475, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 26860 + }, + { + "epoch": 176.77631578947367, + "grad_norm": 1.4473934173583984, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 26870 + }, + { + "epoch": 176.8421052631579, + "grad_norm": 1.6463544368743896, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 26880 + }, + { + "epoch": 176.9078947368421, + "grad_norm": 1.3166974782943726, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 26890 + }, + { + "epoch": 176.97368421052633, + "grad_norm": 1.214211106300354, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 26900 + }, + { + "epoch": 177.03947368421052, + "grad_norm": 1.529462218284607, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 26910 + }, + { + "epoch": 177.10526315789474, + "grad_norm": 1.0761915445327759, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 26920 + }, + { + "epoch": 177.17105263157896, + "grad_norm": 1.2023556232452393, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 26930 + }, + { + "epoch": 177.23684210526315, + "grad_norm": 1.5598783493041992, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 26940 + }, + { + "epoch": 177.30263157894737, + "grad_norm": 1.2606966495513916, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 26950 + }, + { + "epoch": 177.3684210526316, + "grad_norm": 1.6140894889831543, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 26960 + }, + { + "epoch": 177.43421052631578, + "grad_norm": 1.3021560907363892, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 26970 + }, + { + "epoch": 177.5, + "grad_norm": 1.0216045379638672, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 26980 + }, + { + "epoch": 177.56578947368422, + "grad_norm": 0.9635993242263794, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 26990 + }, + { + "epoch": 177.6315789473684, + "grad_norm": 1.2217941284179688, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 27000 + }, + { + "epoch": 177.69736842105263, + "grad_norm": 1.3411375284194946, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 27010 + }, + { + "epoch": 177.76315789473685, + "grad_norm": 0.7628015279769897, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 27020 + }, + { + "epoch": 177.82894736842104, + "grad_norm": 0.9084556698799133, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 27030 + }, + { + "epoch": 177.89473684210526, + "grad_norm": 1.1490297317504883, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 27040 + }, + { + "epoch": 177.96052631578948, + "grad_norm": 1.4998666048049927, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 27050 + }, + { + "epoch": 178.02631578947367, + "grad_norm": 1.2536399364471436, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 27060 + }, + { + "epoch": 178.0921052631579, + "grad_norm": 1.217274785041809, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 27070 + }, + { + "epoch": 178.1578947368421, + "grad_norm": 1.3491017818450928, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 27080 + }, + { + "epoch": 178.22368421052633, + "grad_norm": 1.25252366065979, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 27090 + }, + { + "epoch": 178.28947368421052, + "grad_norm": 1.0801442861557007, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 27100 + }, + { + "epoch": 178.35526315789474, + "grad_norm": 1.302809238433838, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 27110 + }, + { + "epoch": 178.42105263157896, + "grad_norm": 1.4321649074554443, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 27120 + }, + { + "epoch": 178.48684210526315, + "grad_norm": 1.3368439674377441, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 27130 + }, + { + "epoch": 178.55263157894737, + "grad_norm": 1.2113497257232666, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 27140 + }, + { + "epoch": 178.6184210526316, + "grad_norm": 1.0615336894989014, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 27150 + }, + { + "epoch": 178.68421052631578, + "grad_norm": 1.075102686882019, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 27160 + }, + { + "epoch": 178.75, + "grad_norm": 0.9051514863967896, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 27170 + }, + { + "epoch": 178.81578947368422, + "grad_norm": 1.019240140914917, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 27180 + }, + { + "epoch": 178.8815789473684, + "grad_norm": 1.2126598358154297, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 27190 + }, + { + "epoch": 178.94736842105263, + "grad_norm": 1.2346746921539307, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 27200 + }, + { + "epoch": 179.01315789473685, + "grad_norm": 1.2096296548843384, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 27210 + }, + { + "epoch": 179.07894736842104, + "grad_norm": 0.973487913608551, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 27220 + }, + { + "epoch": 179.14473684210526, + "grad_norm": 1.368411660194397, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 27230 + }, + { + "epoch": 179.21052631578948, + "grad_norm": 1.2822602987289429, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 27240 + }, + { + "epoch": 179.27631578947367, + "grad_norm": 1.2959389686584473, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 27250 + }, + { + "epoch": 179.3421052631579, + "grad_norm": 1.3127459287643433, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 27260 + }, + { + "epoch": 179.4078947368421, + "grad_norm": 1.122056007385254, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 27270 + }, + { + "epoch": 179.47368421052633, + "grad_norm": 1.3028178215026855, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 27280 + }, + { + "epoch": 179.53947368421052, + "grad_norm": 0.9754687547683716, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 27290 + }, + { + "epoch": 179.60526315789474, + "grad_norm": 1.238944411277771, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 27300 + }, + { + "epoch": 179.67105263157896, + "grad_norm": 1.3136788606643677, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 27310 + }, + { + "epoch": 179.73684210526315, + "grad_norm": 1.3043994903564453, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 27320 + }, + { + "epoch": 179.80263157894737, + "grad_norm": 1.0866022109985352, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 27330 + }, + { + "epoch": 179.8684210526316, + "grad_norm": 1.4017601013183594, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 27340 + }, + { + "epoch": 179.93421052631578, + "grad_norm": 1.2640719413757324, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 27350 + }, + { + "epoch": 180.0, + "grad_norm": 1.5062077045440674, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 27360 + }, + { + "epoch": 180.06578947368422, + "grad_norm": 1.3499562740325928, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 27370 + }, + { + "epoch": 180.1315789473684, + "grad_norm": 1.028475284576416, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 27380 + }, + { + "epoch": 180.19736842105263, + "grad_norm": 1.1006380319595337, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 27390 + }, + { + "epoch": 180.26315789473685, + "grad_norm": 1.2591434717178345, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 27400 + }, + { + "epoch": 180.32894736842104, + "grad_norm": 1.4151883125305176, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 27410 + }, + { + "epoch": 180.39473684210526, + "grad_norm": 1.132414698600769, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 27420 + }, + { + "epoch": 180.46052631578948, + "grad_norm": 1.3552273511886597, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 27430 + }, + { + "epoch": 180.52631578947367, + "grad_norm": 0.8924891352653503, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 27440 + }, + { + "epoch": 180.5921052631579, + "grad_norm": 1.676880955696106, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 27450 + }, + { + "epoch": 180.6578947368421, + "grad_norm": 1.1529208421707153, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 27460 + }, + { + "epoch": 180.72368421052633, + "grad_norm": 1.1190521717071533, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 27470 + }, + { + "epoch": 180.78947368421052, + "grad_norm": 1.293173909187317, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 27480 + }, + { + "epoch": 180.85526315789474, + "grad_norm": 0.9071879386901855, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 27490 + }, + { + "epoch": 180.92105263157896, + "grad_norm": 1.2806898355484009, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 27500 + }, + { + "epoch": 180.98684210526315, + "grad_norm": 0.9975317120552063, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 27510 + }, + { + "epoch": 181.05263157894737, + "grad_norm": 1.2514346837997437, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 27520 + }, + { + "epoch": 181.1184210526316, + "grad_norm": 1.361411452293396, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 27530 + }, + { + "epoch": 181.18421052631578, + "grad_norm": 0.9159877896308899, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 27540 + }, + { + "epoch": 181.25, + "grad_norm": 1.0003042221069336, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 27550 + }, + { + "epoch": 181.31578947368422, + "grad_norm": 1.133141040802002, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 27560 + }, + { + "epoch": 181.3815789473684, + "grad_norm": 1.0461962223052979, + "learning_rate": 0.0001, + "loss": 0.0201, + "step": 27570 + }, + { + "epoch": 181.44736842105263, + "grad_norm": 1.3540297746658325, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 27580 + }, + { + "epoch": 181.51315789473685, + "grad_norm": 1.3331037759780884, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 27590 + }, + { + "epoch": 181.57894736842104, + "grad_norm": 1.4690834283828735, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 27600 + }, + { + "epoch": 181.64473684210526, + "grad_norm": 1.3080083131790161, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 27610 + }, + { + "epoch": 181.71052631578948, + "grad_norm": 1.1954478025436401, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 27620 + }, + { + "epoch": 181.77631578947367, + "grad_norm": 1.5837913751602173, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 27630 + }, + { + "epoch": 181.8421052631579, + "grad_norm": 1.9183199405670166, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 27640 + }, + { + "epoch": 181.9078947368421, + "grad_norm": 1.468712568283081, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 27650 + }, + { + "epoch": 181.97368421052633, + "grad_norm": 1.183106780052185, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 27660 + }, + { + "epoch": 182.03947368421052, + "grad_norm": 1.315909743309021, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 27670 + }, + { + "epoch": 182.10526315789474, + "grad_norm": 1.3399372100830078, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 27680 + }, + { + "epoch": 182.17105263157896, + "grad_norm": 1.3758108615875244, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 27690 + }, + { + "epoch": 182.23684210526315, + "grad_norm": 1.372221827507019, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 27700 + }, + { + "epoch": 182.30263157894737, + "grad_norm": 1.1618051528930664, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 27710 + }, + { + "epoch": 182.3684210526316, + "grad_norm": 1.5792875289916992, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 27720 + }, + { + "epoch": 182.43421052631578, + "grad_norm": 1.4284778833389282, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 27730 + }, + { + "epoch": 182.5, + "grad_norm": 1.1605561971664429, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 27740 + }, + { + "epoch": 182.56578947368422, + "grad_norm": 1.394383192062378, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 27750 + }, + { + "epoch": 182.6315789473684, + "grad_norm": 1.2640798091888428, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 27760 + }, + { + "epoch": 182.69736842105263, + "grad_norm": 1.0631877183914185, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 27770 + }, + { + "epoch": 182.76315789473685, + "grad_norm": 1.3253459930419922, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 27780 + }, + { + "epoch": 182.82894736842104, + "grad_norm": 1.4501514434814453, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 27790 + }, + { + "epoch": 182.89473684210526, + "grad_norm": 1.629292607307434, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 27800 + }, + { + "epoch": 182.96052631578948, + "grad_norm": 1.373126745223999, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 27810 + }, + { + "epoch": 183.02631578947367, + "grad_norm": 1.4406814575195312, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 27820 + }, + { + "epoch": 183.0921052631579, + "grad_norm": 1.3722494840621948, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 27830 + }, + { + "epoch": 183.1578947368421, + "grad_norm": 1.8163440227508545, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 27840 + }, + { + "epoch": 183.22368421052633, + "grad_norm": 2.5917444229125977, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 27850 + }, + { + "epoch": 183.28947368421052, + "grad_norm": 1.976516604423523, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 27860 + }, + { + "epoch": 183.35526315789474, + "grad_norm": 1.48430335521698, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 27870 + }, + { + "epoch": 183.42105263157896, + "grad_norm": 1.908106803894043, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 27880 + }, + { + "epoch": 183.48684210526315, + "grad_norm": 1.8086661100387573, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 27890 + }, + { + "epoch": 183.55263157894737, + "grad_norm": 1.7461923360824585, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 27900 + }, + { + "epoch": 183.6184210526316, + "grad_norm": 1.1997524499893188, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 27910 + }, + { + "epoch": 183.68421052631578, + "grad_norm": 1.2785133123397827, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 27920 + }, + { + "epoch": 183.75, + "grad_norm": 0.9899089932441711, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 27930 + }, + { + "epoch": 183.81578947368422, + "grad_norm": 1.36931574344635, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 27940 + }, + { + "epoch": 183.8815789473684, + "grad_norm": 1.0563100576400757, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 27950 + }, + { + "epoch": 183.94736842105263, + "grad_norm": 1.2459518909454346, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 27960 + }, + { + "epoch": 184.01315789473685, + "grad_norm": 1.7021054029464722, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 27970 + }, + { + "epoch": 184.07894736842104, + "grad_norm": 1.1068536043167114, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 27980 + }, + { + "epoch": 184.14473684210526, + "grad_norm": 1.299325704574585, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 27990 + }, + { + "epoch": 184.21052631578948, + "grad_norm": 1.3241454362869263, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 28000 + }, + { + "epoch": 184.27631578947367, + "grad_norm": 1.4273960590362549, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 28010 + }, + { + "epoch": 184.3421052631579, + "grad_norm": 1.1758840084075928, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 28020 + }, + { + "epoch": 184.4078947368421, + "grad_norm": 1.7336351871490479, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 28030 + }, + { + "epoch": 184.47368421052633, + "grad_norm": 1.4646220207214355, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 28040 + }, + { + "epoch": 184.53947368421052, + "grad_norm": 1.5751539468765259, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 28050 + }, + { + "epoch": 184.60526315789474, + "grad_norm": 1.675756812095642, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 28060 + }, + { + "epoch": 184.67105263157896, + "grad_norm": 1.7213208675384521, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 28070 + }, + { + "epoch": 184.73684210526315, + "grad_norm": 1.4388047456741333, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 28080 + }, + { + "epoch": 184.80263157894737, + "grad_norm": 1.40472412109375, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 28090 + }, + { + "epoch": 184.8684210526316, + "grad_norm": 1.187379240989685, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 28100 + }, + { + "epoch": 184.93421052631578, + "grad_norm": 1.5341135263442993, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 28110 + }, + { + "epoch": 185.0, + "grad_norm": 1.2194042205810547, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 28120 + }, + { + "epoch": 185.06578947368422, + "grad_norm": 1.183134913444519, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 28130 + }, + { + "epoch": 185.1315789473684, + "grad_norm": 1.4038760662078857, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 28140 + }, + { + "epoch": 185.19736842105263, + "grad_norm": 1.3915314674377441, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 28150 + }, + { + "epoch": 185.26315789473685, + "grad_norm": 1.3534201383590698, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 28160 + }, + { + "epoch": 185.32894736842104, + "grad_norm": 1.2670302391052246, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 28170 + }, + { + "epoch": 185.39473684210526, + "grad_norm": 1.6847999095916748, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 28180 + }, + { + "epoch": 185.46052631578948, + "grad_norm": 1.430661678314209, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 28190 + }, + { + "epoch": 185.52631578947367, + "grad_norm": 1.600559949874878, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 28200 + }, + { + "epoch": 185.5921052631579, + "grad_norm": 1.3401246070861816, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 28210 + }, + { + "epoch": 185.6578947368421, + "grad_norm": 1.1347683668136597, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 28220 + }, + { + "epoch": 185.72368421052633, + "grad_norm": 1.1868791580200195, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 28230 + }, + { + "epoch": 185.78947368421052, + "grad_norm": 1.1938930749893188, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 28240 + }, + { + "epoch": 185.85526315789474, + "grad_norm": 0.9007596969604492, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 28250 + }, + { + "epoch": 185.92105263157896, + "grad_norm": 1.0700163841247559, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 28260 + }, + { + "epoch": 185.98684210526315, + "grad_norm": 1.3188432455062866, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 28270 + }, + { + "epoch": 186.05263157894737, + "grad_norm": 1.2520577907562256, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 28280 + }, + { + "epoch": 186.1184210526316, + "grad_norm": 1.2639751434326172, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 28290 + }, + { + "epoch": 186.18421052631578, + "grad_norm": 1.2425426244735718, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 28300 + }, + { + "epoch": 186.25, + "grad_norm": 1.1979525089263916, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 28310 + }, + { + "epoch": 186.31578947368422, + "grad_norm": 1.25445556640625, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 28320 + }, + { + "epoch": 186.3815789473684, + "grad_norm": 1.0609337091445923, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 28330 + }, + { + "epoch": 186.44736842105263, + "grad_norm": 0.9719675183296204, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 28340 + }, + { + "epoch": 186.51315789473685, + "grad_norm": 1.2194287776947021, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 28350 + }, + { + "epoch": 186.57894736842104, + "grad_norm": 1.2803702354431152, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 28360 + }, + { + "epoch": 186.64473684210526, + "grad_norm": 1.472983717918396, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 28370 + }, + { + "epoch": 186.71052631578948, + "grad_norm": 0.7579768896102905, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 28380 + }, + { + "epoch": 186.77631578947367, + "grad_norm": 1.4877924919128418, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 28390 + }, + { + "epoch": 186.8421052631579, + "grad_norm": 1.1992541551589966, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 28400 + }, + { + "epoch": 186.9078947368421, + "grad_norm": 1.5354152917861938, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 28410 + }, + { + "epoch": 186.97368421052633, + "grad_norm": 1.009834885597229, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 28420 + }, + { + "epoch": 187.03947368421052, + "grad_norm": 1.4068772792816162, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 28430 + }, + { + "epoch": 187.10526315789474, + "grad_norm": 1.7721872329711914, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 28440 + }, + { + "epoch": 187.17105263157896, + "grad_norm": 1.3178023099899292, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 28450 + }, + { + "epoch": 187.23684210526315, + "grad_norm": 2.538278341293335, + "learning_rate": 0.0001, + "loss": 0.0277, + "step": 28460 + }, + { + "epoch": 187.30263157894737, + "grad_norm": 2.6935033798217773, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 28470 + }, + { + "epoch": 187.3684210526316, + "grad_norm": 2.583784341812134, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 28480 + }, + { + "epoch": 187.43421052631578, + "grad_norm": 2.7853147983551025, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 28490 + }, + { + "epoch": 187.5, + "grad_norm": 1.8979226350784302, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 28500 + }, + { + "epoch": 187.56578947368422, + "grad_norm": 1.951745867729187, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 28510 + }, + { + "epoch": 187.6315789473684, + "grad_norm": 1.744847059249878, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 28520 + }, + { + "epoch": 187.69736842105263, + "grad_norm": 1.6811963319778442, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 28530 + }, + { + "epoch": 187.76315789473685, + "grad_norm": 1.182780146598816, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 28540 + }, + { + "epoch": 187.82894736842104, + "grad_norm": 1.3196076154708862, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 28550 + }, + { + "epoch": 187.89473684210526, + "grad_norm": 1.376051425933838, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 28560 + }, + { + "epoch": 187.96052631578948, + "grad_norm": 1.3644719123840332, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 28570 + }, + { + "epoch": 188.02631578947367, + "grad_norm": 1.3114805221557617, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 28580 + }, + { + "epoch": 188.0921052631579, + "grad_norm": 1.1968399286270142, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 28590 + }, + { + "epoch": 188.1578947368421, + "grad_norm": 1.2592320442199707, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 28600 + }, + { + "epoch": 188.22368421052633, + "grad_norm": 1.2056187391281128, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 28610 + }, + { + "epoch": 188.28947368421052, + "grad_norm": 1.101921558380127, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 28620 + }, + { + "epoch": 188.35526315789474, + "grad_norm": 1.2965214252471924, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 28630 + }, + { + "epoch": 188.42105263157896, + "grad_norm": 1.5886129140853882, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 28640 + }, + { + "epoch": 188.48684210526315, + "grad_norm": 1.6472376585006714, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 28650 + }, + { + "epoch": 188.55263157894737, + "grad_norm": 1.4192408323287964, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 28660 + }, + { + "epoch": 188.6184210526316, + "grad_norm": 1.955345630645752, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 28670 + }, + { + "epoch": 188.68421052631578, + "grad_norm": 1.7373226881027222, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 28680 + }, + { + "epoch": 188.75, + "grad_norm": 1.6695311069488525, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 28690 + }, + { + "epoch": 188.81578947368422, + "grad_norm": 1.6247828006744385, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 28700 + }, + { + "epoch": 188.8815789473684, + "grad_norm": 1.587103009223938, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 28710 + }, + { + "epoch": 188.94736842105263, + "grad_norm": 1.3761903047561646, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 28720 + }, + { + "epoch": 189.01315789473685, + "grad_norm": 1.054154872894287, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 28730 + }, + { + "epoch": 189.07894736842104, + "grad_norm": 1.7570017576217651, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 28740 + }, + { + "epoch": 189.14473684210526, + "grad_norm": 1.302839994430542, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 28750 + }, + { + "epoch": 189.21052631578948, + "grad_norm": 1.2320873737335205, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 28760 + }, + { + "epoch": 189.27631578947367, + "grad_norm": 1.2856324911117554, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 28770 + }, + { + "epoch": 189.3421052631579, + "grad_norm": 1.2495955228805542, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 28780 + }, + { + "epoch": 189.4078947368421, + "grad_norm": 1.2113772630691528, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 28790 + }, + { + "epoch": 189.47368421052633, + "grad_norm": 1.1812059879302979, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 28800 + }, + { + "epoch": 189.53947368421052, + "grad_norm": 1.3910290002822876, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 28810 + }, + { + "epoch": 189.60526315789474, + "grad_norm": 1.076119303703308, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 28820 + }, + { + "epoch": 189.67105263157896, + "grad_norm": 2.222041606903076, + "learning_rate": 0.0001, + "loss": 0.0251, + "step": 28830 + }, + { + "epoch": 189.73684210526315, + "grad_norm": 2.265124559402466, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 28840 + }, + { + "epoch": 189.80263157894737, + "grad_norm": 1.6033040285110474, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 28850 + }, + { + "epoch": 189.8684210526316, + "grad_norm": 1.4938453435897827, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 28860 + }, + { + "epoch": 189.93421052631578, + "grad_norm": 1.3724675178527832, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 28870 + }, + { + "epoch": 190.0, + "grad_norm": 1.3057665824890137, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 28880 + }, + { + "epoch": 190.06578947368422, + "grad_norm": 1.1445728540420532, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 28890 + }, + { + "epoch": 190.1315789473684, + "grad_norm": 1.2155309915542603, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 28900 + }, + { + "epoch": 190.19736842105263, + "grad_norm": 1.1960302591323853, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 28910 + }, + { + "epoch": 190.26315789473685, + "grad_norm": 1.3511428833007812, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 28920 + }, + { + "epoch": 190.32894736842104, + "grad_norm": 0.9308839440345764, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 28930 + }, + { + "epoch": 190.39473684210526, + "grad_norm": 1.409009575843811, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 28940 + }, + { + "epoch": 190.46052631578948, + "grad_norm": 1.1859124898910522, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 28950 + }, + { + "epoch": 190.52631578947367, + "grad_norm": 1.6667457818984985, + "learning_rate": 0.0001, + "loss": 0.0231, + "step": 28960 + }, + { + "epoch": 190.5921052631579, + "grad_norm": 1.8634475469589233, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 28970 + }, + { + "epoch": 190.6578947368421, + "grad_norm": 1.645556092262268, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 28980 + }, + { + "epoch": 190.72368421052633, + "grad_norm": 1.3310197591781616, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 28990 + }, + { + "epoch": 190.78947368421052, + "grad_norm": 1.2974885702133179, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 29000 + }, + { + "epoch": 190.85526315789474, + "grad_norm": 1.270050048828125, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 29010 + }, + { + "epoch": 190.92105263157896, + "grad_norm": 1.1336300373077393, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 29020 + }, + { + "epoch": 190.98684210526315, + "grad_norm": 1.3560301065444946, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 29030 + }, + { + "epoch": 191.05263157894737, + "grad_norm": 1.405979871749878, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 29040 + }, + { + "epoch": 191.1184210526316, + "grad_norm": 1.3937962055206299, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 29050 + }, + { + "epoch": 191.18421052631578, + "grad_norm": 1.3739968538284302, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 29060 + }, + { + "epoch": 191.25, + "grad_norm": 1.259650707244873, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 29070 + }, + { + "epoch": 191.31578947368422, + "grad_norm": 0.9018769860267639, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 29080 + }, + { + "epoch": 191.3815789473684, + "grad_norm": 1.1630486249923706, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 29090 + }, + { + "epoch": 191.44736842105263, + "grad_norm": 1.2646576166152954, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 29100 + }, + { + "epoch": 191.51315789473685, + "grad_norm": 1.3670384883880615, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 29110 + }, + { + "epoch": 191.57894736842104, + "grad_norm": 1.4089142084121704, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 29120 + }, + { + "epoch": 191.64473684210526, + "grad_norm": 1.2056618928909302, + "learning_rate": 0.0001, + "loss": 0.0194, + "step": 29130 + }, + { + "epoch": 191.71052631578948, + "grad_norm": 1.2704377174377441, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 29140 + }, + { + "epoch": 191.77631578947367, + "grad_norm": 1.3765251636505127, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 29150 + }, + { + "epoch": 191.8421052631579, + "grad_norm": 1.2344194650650024, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 29160 + }, + { + "epoch": 191.9078947368421, + "grad_norm": 1.1715292930603027, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 29170 + }, + { + "epoch": 191.97368421052633, + "grad_norm": 1.3913319110870361, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 29180 + }, + { + "epoch": 192.03947368421052, + "grad_norm": 1.4549134969711304, + "learning_rate": 0.0001, + "loss": 0.0202, + "step": 29190 + }, + { + "epoch": 192.10526315789474, + "grad_norm": 1.2978013753890991, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 29200 + }, + { + "epoch": 192.17105263157896, + "grad_norm": 1.2289888858795166, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 29210 + }, + { + "epoch": 192.23684210526315, + "grad_norm": 0.9477553367614746, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 29220 + }, + { + "epoch": 192.30263157894737, + "grad_norm": 1.459433674812317, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 29230 + }, + { + "epoch": 192.3684210526316, + "grad_norm": 1.5326049327850342, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 29240 + }, + { + "epoch": 192.43421052631578, + "grad_norm": 1.2914258241653442, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 29250 + }, + { + "epoch": 192.5, + "grad_norm": 1.216827630996704, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 29260 + }, + { + "epoch": 192.56578947368422, + "grad_norm": 1.4195771217346191, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 29270 + }, + { + "epoch": 192.6315789473684, + "grad_norm": 1.1989006996154785, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 29280 + }, + { + "epoch": 192.69736842105263, + "grad_norm": 1.2557868957519531, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 29290 + }, + { + "epoch": 192.76315789473685, + "grad_norm": 1.556070327758789, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 29300 + }, + { + "epoch": 192.82894736842104, + "grad_norm": 1.3581067323684692, + "learning_rate": 0.0001, + "loss": 0.0208, + "step": 29310 + }, + { + "epoch": 192.89473684210526, + "grad_norm": 1.4777802228927612, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 29320 + }, + { + "epoch": 192.96052631578948, + "grad_norm": 1.00076162815094, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 29330 + }, + { + "epoch": 193.02631578947367, + "grad_norm": 1.448732852935791, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 29340 + }, + { + "epoch": 193.0921052631579, + "grad_norm": 1.2730473279953003, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 29350 + }, + { + "epoch": 193.1578947368421, + "grad_norm": 1.256740689277649, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 29360 + }, + { + "epoch": 193.22368421052633, + "grad_norm": 1.0726568698883057, + "learning_rate": 0.0001, + "loss": 0.0198, + "step": 29370 + }, + { + "epoch": 193.28947368421052, + "grad_norm": 0.9188181757926941, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 29380 + }, + { + "epoch": 193.35526315789474, + "grad_norm": 1.2872679233551025, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 29390 + }, + { + "epoch": 193.42105263157896, + "grad_norm": 1.520761251449585, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 29400 + }, + { + "epoch": 193.48684210526315, + "grad_norm": 1.1832773685455322, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 29410 + }, + { + "epoch": 193.55263157894737, + "grad_norm": 1.4867194890975952, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 29420 + }, + { + "epoch": 193.6184210526316, + "grad_norm": 1.7297718524932861, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 29430 + }, + { + "epoch": 193.68421052631578, + "grad_norm": 1.251755952835083, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 29440 + }, + { + "epoch": 193.75, + "grad_norm": 1.3701705932617188, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 29450 + }, + { + "epoch": 193.81578947368422, + "grad_norm": 1.7457464933395386, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 29460 + }, + { + "epoch": 193.8815789473684, + "grad_norm": 1.126453161239624, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 29470 + }, + { + "epoch": 193.94736842105263, + "grad_norm": 1.2443026304244995, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 29480 + }, + { + "epoch": 194.01315789473685, + "grad_norm": 1.2029883861541748, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 29490 + }, + { + "epoch": 194.07894736842104, + "grad_norm": 1.449203610420227, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 29500 + }, + { + "epoch": 194.14473684210526, + "grad_norm": 1.0315684080123901, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 29510 + }, + { + "epoch": 194.21052631578948, + "grad_norm": 1.3804000616073608, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 29520 + }, + { + "epoch": 194.27631578947367, + "grad_norm": 1.527992844581604, + "learning_rate": 0.0001, + "loss": 0.0194, + "step": 29530 + }, + { + "epoch": 194.3421052631579, + "grad_norm": 1.5879637002944946, + "learning_rate": 0.0001, + "loss": 0.0244, + "step": 29540 + }, + { + "epoch": 194.4078947368421, + "grad_norm": 1.9236878156661987, + "learning_rate": 0.0001, + "loss": 0.0197, + "step": 29550 + }, + { + "epoch": 194.47368421052633, + "grad_norm": 1.539232611656189, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 29560 + }, + { + "epoch": 194.53947368421052, + "grad_norm": 1.3699829578399658, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 29570 + }, + { + "epoch": 194.60526315789474, + "grad_norm": 1.3466382026672363, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 29580 + }, + { + "epoch": 194.67105263157896, + "grad_norm": 1.6446095705032349, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 29590 + }, + { + "epoch": 194.73684210526315, + "grad_norm": 1.2877060174942017, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 29600 + }, + { + "epoch": 194.80263157894737, + "grad_norm": 1.1062644720077515, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 29610 + }, + { + "epoch": 194.8684210526316, + "grad_norm": 0.854597270488739, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 29620 + }, + { + "epoch": 194.93421052631578, + "grad_norm": 1.1438567638397217, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 29630 + }, + { + "epoch": 195.0, + "grad_norm": 1.058065414428711, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 29640 + }, + { + "epoch": 195.06578947368422, + "grad_norm": 1.3158515691757202, + "learning_rate": 0.0001, + "loss": 0.0212, + "step": 29650 + }, + { + "epoch": 195.1315789473684, + "grad_norm": 1.3402299880981445, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 29660 + }, + { + "epoch": 195.19736842105263, + "grad_norm": 1.5896971225738525, + "learning_rate": 0.0001, + "loss": 0.022, + "step": 29670 + }, + { + "epoch": 195.26315789473685, + "grad_norm": 1.113895297050476, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 29680 + }, + { + "epoch": 195.32894736842104, + "grad_norm": 0.8616934418678284, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 29690 + }, + { + "epoch": 195.39473684210526, + "grad_norm": 1.0356990098953247, + "learning_rate": 0.0001, + "loss": 0.0206, + "step": 29700 + }, + { + "epoch": 195.46052631578948, + "grad_norm": 1.1201450824737549, + "learning_rate": 0.0001, + "loss": 0.0205, + "step": 29710 + }, + { + "epoch": 195.52631578947367, + "grad_norm": 1.386657476425171, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 29720 + }, + { + "epoch": 195.5921052631579, + "grad_norm": 0.8346795439720154, + "learning_rate": 0.0001, + "loss": 0.0221, + "step": 29730 + }, + { + "epoch": 195.6578947368421, + "grad_norm": 1.4521796703338623, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 29740 + }, + { + "epoch": 195.72368421052633, + "grad_norm": 1.4133926630020142, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 29750 + }, + { + "epoch": 195.78947368421052, + "grad_norm": 0.9600488543510437, + "learning_rate": 0.0001, + "loss": 0.021, + "step": 29760 + }, + { + "epoch": 195.85526315789474, + "grad_norm": 1.1314250230789185, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 29770 + }, + { + "epoch": 195.92105263157896, + "grad_norm": 0.8754208087921143, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 29780 + }, + { + "epoch": 195.98684210526315, + "grad_norm": 1.3597506284713745, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 29790 + }, + { + "epoch": 196.05263157894737, + "grad_norm": 1.1357085704803467, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 29800 + }, + { + "epoch": 196.1184210526316, + "grad_norm": 1.1614354848861694, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 29810 + }, + { + "epoch": 196.18421052631578, + "grad_norm": 1.2745589017868042, + "learning_rate": 0.0001, + "loss": 0.0209, + "step": 29820 + }, + { + "epoch": 196.25, + "grad_norm": 1.1572297811508179, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 29830 + }, + { + "epoch": 196.31578947368422, + "grad_norm": 0.7867621183395386, + "learning_rate": 0.0001, + "loss": 0.02, + "step": 29840 + }, + { + "epoch": 196.3815789473684, + "grad_norm": 1.308093786239624, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 29850 + }, + { + "epoch": 196.44736842105263, + "grad_norm": 1.2609182596206665, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 29860 + }, + { + "epoch": 196.51315789473685, + "grad_norm": 1.3445231914520264, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 29870 + }, + { + "epoch": 196.57894736842104, + "grad_norm": 1.0773708820343018, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 29880 + }, + { + "epoch": 196.64473684210526, + "grad_norm": 0.9694660902023315, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 29890 + }, + { + "epoch": 196.71052631578948, + "grad_norm": 1.11964750289917, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 29900 + }, + { + "epoch": 196.77631578947367, + "grad_norm": 1.1886194944381714, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 29910 + }, + { + "epoch": 196.8421052631579, + "grad_norm": 1.4369953870773315, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 29920 + }, + { + "epoch": 196.9078947368421, + "grad_norm": 0.9446222186088562, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 29930 + }, + { + "epoch": 196.97368421052633, + "grad_norm": 1.308263897895813, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 29940 + }, + { + "epoch": 197.03947368421052, + "grad_norm": 1.479591965675354, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 29950 + }, + { + "epoch": 197.10526315789474, + "grad_norm": 1.261594295501709, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 29960 + }, + { + "epoch": 197.17105263157896, + "grad_norm": 1.597878098487854, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 29970 + }, + { + "epoch": 197.23684210526315, + "grad_norm": 0.9023445248603821, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 29980 + }, + { + "epoch": 197.30263157894737, + "grad_norm": 1.7900540828704834, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 29990 + }, + { + "epoch": 197.3684210526316, + "grad_norm": 1.0138733386993408, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 30000 + }, + { + "epoch": 197.43421052631578, + "grad_norm": 1.617836594581604, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 30010 + }, + { + "epoch": 197.5, + "grad_norm": 1.0943124294281006, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 30020 + }, + { + "epoch": 197.56578947368422, + "grad_norm": 1.1068415641784668, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 30030 + }, + { + "epoch": 197.6315789473684, + "grad_norm": 1.248450756072998, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 30040 + }, + { + "epoch": 197.69736842105263, + "grad_norm": 1.4986473321914673, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 30050 + }, + { + "epoch": 197.76315789473685, + "grad_norm": 1.2006663084030151, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 30060 + }, + { + "epoch": 197.82894736842104, + "grad_norm": 1.0944031476974487, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 30070 + }, + { + "epoch": 197.89473684210526, + "grad_norm": 1.6953439712524414, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 30080 + }, + { + "epoch": 197.96052631578948, + "grad_norm": 1.1064839363098145, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 30090 + }, + { + "epoch": 198.02631578947367, + "grad_norm": 2.015153646469116, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 30100 + }, + { + "epoch": 198.0921052631579, + "grad_norm": 1.8545198440551758, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 30110 + }, + { + "epoch": 198.1578947368421, + "grad_norm": 1.5696916580200195, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 30120 + }, + { + "epoch": 198.22368421052633, + "grad_norm": 1.4251947402954102, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 30130 + }, + { + "epoch": 198.28947368421052, + "grad_norm": 1.1599140167236328, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 30140 + }, + { + "epoch": 198.35526315789474, + "grad_norm": 1.4993690252304077, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 30150 + }, + { + "epoch": 198.42105263157896, + "grad_norm": 1.4265055656433105, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 30160 + }, + { + "epoch": 198.48684210526315, + "grad_norm": 1.101048231124878, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 30170 + }, + { + "epoch": 198.55263157894737, + "grad_norm": 1.5096710920333862, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 30180 + }, + { + "epoch": 198.6184210526316, + "grad_norm": 1.190572738647461, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 30190 + }, + { + "epoch": 198.68421052631578, + "grad_norm": 1.591253399848938, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 30200 + }, + { + "epoch": 198.75, + "grad_norm": 1.0739487409591675, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 30210 + }, + { + "epoch": 198.81578947368422, + "grad_norm": 1.2973365783691406, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 30220 + }, + { + "epoch": 198.8815789473684, + "grad_norm": 1.2122310400009155, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 30230 + }, + { + "epoch": 198.94736842105263, + "grad_norm": 1.2195587158203125, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 30240 + }, + { + "epoch": 199.01315789473685, + "grad_norm": 1.0152171850204468, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 30250 + }, + { + "epoch": 199.07894736842104, + "grad_norm": 1.404311180114746, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 30260 + }, + { + "epoch": 199.14473684210526, + "grad_norm": 1.4140353202819824, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 30270 + }, + { + "epoch": 199.21052631578948, + "grad_norm": 1.4403727054595947, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 30280 + }, + { + "epoch": 199.27631578947367, + "grad_norm": 1.9959533214569092, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 30290 + }, + { + "epoch": 199.3421052631579, + "grad_norm": 1.6239718198776245, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 30300 + }, + { + "epoch": 199.4078947368421, + "grad_norm": 1.3269397020339966, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 30310 + }, + { + "epoch": 199.47368421052633, + "grad_norm": 1.6701898574829102, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 30320 + }, + { + "epoch": 199.53947368421052, + "grad_norm": 1.5344353914260864, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 30330 + }, + { + "epoch": 199.60526315789474, + "grad_norm": 1.4121617078781128, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 30340 + }, + { + "epoch": 199.67105263157896, + "grad_norm": 0.9731596112251282, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 30350 + }, + { + "epoch": 199.73684210526315, + "grad_norm": 1.1787141561508179, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 30360 + }, + { + "epoch": 199.80263157894737, + "grad_norm": 1.0008739233016968, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 30370 + }, + { + "epoch": 199.8684210526316, + "grad_norm": 1.2544138431549072, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 30380 + }, + { + "epoch": 199.93421052631578, + "grad_norm": 1.1031585931777954, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 30390 + }, + { + "epoch": 200.0, + "grad_norm": 1.5565379858016968, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 30400 + }, + { + "epoch": 200.06578947368422, + "grad_norm": 1.0128217935562134, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 30410 + }, + { + "epoch": 200.1315789473684, + "grad_norm": 0.9004884362220764, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 30420 + }, + { + "epoch": 200.19736842105263, + "grad_norm": 0.7593161463737488, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 30430 + }, + { + "epoch": 200.26315789473685, + "grad_norm": 1.4337648153305054, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 30440 + }, + { + "epoch": 200.32894736842104, + "grad_norm": 1.2471925020217896, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 30450 + }, + { + "epoch": 200.39473684210526, + "grad_norm": 1.1293731927871704, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 30460 + }, + { + "epoch": 200.46052631578948, + "grad_norm": 0.8459559679031372, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 30470 + }, + { + "epoch": 200.52631578947367, + "grad_norm": 1.5371299982070923, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 30480 + }, + { + "epoch": 200.5921052631579, + "grad_norm": 1.3424179553985596, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 30490 + }, + { + "epoch": 200.6578947368421, + "grad_norm": 1.5373995304107666, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 30500 + }, + { + "epoch": 200.72368421052633, + "grad_norm": 0.9894046783447266, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 30510 + }, + { + "epoch": 200.78947368421052, + "grad_norm": 1.7582042217254639, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 30520 + }, + { + "epoch": 200.85526315789474, + "grad_norm": 1.3425977230072021, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 30530 + }, + { + "epoch": 200.92105263157896, + "grad_norm": 1.3770979642868042, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 30540 + }, + { + "epoch": 200.98684210526315, + "grad_norm": 1.40471351146698, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 30550 + }, + { + "epoch": 201.05263157894737, + "grad_norm": 1.337066411972046, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 30560 + }, + { + "epoch": 201.1184210526316, + "grad_norm": 1.1256475448608398, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 30570 + }, + { + "epoch": 201.18421052631578, + "grad_norm": 1.3861485719680786, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 30580 + }, + { + "epoch": 201.25, + "grad_norm": 1.1584820747375488, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 30590 + }, + { + "epoch": 201.31578947368422, + "grad_norm": 1.3375623226165771, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 30600 + }, + { + "epoch": 201.3815789473684, + "grad_norm": 1.2848438024520874, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 30610 + }, + { + "epoch": 201.44736842105263, + "grad_norm": 1.2044715881347656, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 30620 + }, + { + "epoch": 201.51315789473685, + "grad_norm": 1.1717559099197388, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 30630 + }, + { + "epoch": 201.57894736842104, + "grad_norm": 1.0431238412857056, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 30640 + }, + { + "epoch": 201.64473684210526, + "grad_norm": 1.0847781896591187, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 30650 + }, + { + "epoch": 201.71052631578948, + "grad_norm": 1.1109511852264404, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 30660 + }, + { + "epoch": 201.77631578947367, + "grad_norm": 1.094912052154541, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 30670 + }, + { + "epoch": 201.8421052631579, + "grad_norm": 1.0548181533813477, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 30680 + }, + { + "epoch": 201.9078947368421, + "grad_norm": 1.3336670398712158, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 30690 + }, + { + "epoch": 201.97368421052633, + "grad_norm": 1.0279159545898438, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 30700 + }, + { + "epoch": 202.03947368421052, + "grad_norm": 0.7135113477706909, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 30710 + }, + { + "epoch": 202.10526315789474, + "grad_norm": 1.0755541324615479, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 30720 + }, + { + "epoch": 202.17105263157896, + "grad_norm": 1.3156121969223022, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 30730 + }, + { + "epoch": 202.23684210526315, + "grad_norm": 1.4101308584213257, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 30740 + }, + { + "epoch": 202.30263157894737, + "grad_norm": 1.3764532804489136, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 30750 + }, + { + "epoch": 202.3684210526316, + "grad_norm": 0.882384181022644, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 30760 + }, + { + "epoch": 202.43421052631578, + "grad_norm": 1.6236560344696045, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 30770 + }, + { + "epoch": 202.5, + "grad_norm": 1.4815741777420044, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 30780 + }, + { + "epoch": 202.56578947368422, + "grad_norm": 1.2555485963821411, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 30790 + }, + { + "epoch": 202.6315789473684, + "grad_norm": 1.3122458457946777, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 30800 + }, + { + "epoch": 202.69736842105263, + "grad_norm": 1.042381763458252, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 30810 + }, + { + "epoch": 202.76315789473685, + "grad_norm": 0.9814920425415039, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 30820 + }, + { + "epoch": 202.82894736842104, + "grad_norm": 1.0357002019882202, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 30830 + }, + { + "epoch": 202.89473684210526, + "grad_norm": 1.281749963760376, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 30840 + }, + { + "epoch": 202.96052631578948, + "grad_norm": 1.321096658706665, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 30850 + }, + { + "epoch": 203.02631578947367, + "grad_norm": 1.1207501888275146, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 30860 + }, + { + "epoch": 203.0921052631579, + "grad_norm": 1.5637404918670654, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 30870 + }, + { + "epoch": 203.1578947368421, + "grad_norm": 1.2370407581329346, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 30880 + }, + { + "epoch": 203.22368421052633, + "grad_norm": 1.0421314239501953, + "learning_rate": 0.0001, + "loss": 0.0217, + "step": 30890 + }, + { + "epoch": 203.28947368421052, + "grad_norm": 1.0301634073257446, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 30900 + }, + { + "epoch": 203.35526315789474, + "grad_norm": 1.2774720191955566, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 30910 + }, + { + "epoch": 203.42105263157896, + "grad_norm": 1.2703475952148438, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 30920 + }, + { + "epoch": 203.48684210526315, + "grad_norm": 1.1837942600250244, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 30930 + }, + { + "epoch": 203.55263157894737, + "grad_norm": 1.0560357570648193, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 30940 + }, + { + "epoch": 203.6184210526316, + "grad_norm": 1.2957569360733032, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 30950 + }, + { + "epoch": 203.68421052631578, + "grad_norm": 1.4275071620941162, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 30960 + }, + { + "epoch": 203.75, + "grad_norm": 1.513159155845642, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 30970 + }, + { + "epoch": 203.81578947368422, + "grad_norm": 1.6335196495056152, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 30980 + }, + { + "epoch": 203.8815789473684, + "grad_norm": 1.5469692945480347, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 30990 + }, + { + "epoch": 203.94736842105263, + "grad_norm": 1.5226917266845703, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 31000 + }, + { + "epoch": 204.01315789473685, + "grad_norm": 1.3117252588272095, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 31010 + }, + { + "epoch": 204.07894736842104, + "grad_norm": 1.800711989402771, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 31020 + }, + { + "epoch": 204.14473684210526, + "grad_norm": 1.6700514554977417, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 31030 + }, + { + "epoch": 204.21052631578948, + "grad_norm": 1.702384352684021, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 31040 + }, + { + "epoch": 204.27631578947367, + "grad_norm": 1.678499698638916, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 31050 + }, + { + "epoch": 204.3421052631579, + "grad_norm": 1.4093444347381592, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 31060 + }, + { + "epoch": 204.4078947368421, + "grad_norm": 1.5333293676376343, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 31070 + }, + { + "epoch": 204.47368421052633, + "grad_norm": 1.2159759998321533, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 31080 + }, + { + "epoch": 204.53947368421052, + "grad_norm": 1.358657956123352, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 31090 + }, + { + "epoch": 204.60526315789474, + "grad_norm": 1.1089929342269897, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 31100 + }, + { + "epoch": 204.67105263157896, + "grad_norm": 1.3180047273635864, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 31110 + }, + { + "epoch": 204.73684210526315, + "grad_norm": 1.2154042720794678, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 31120 + }, + { + "epoch": 204.80263157894737, + "grad_norm": 1.4112086296081543, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 31130 + }, + { + "epoch": 204.8684210526316, + "grad_norm": 1.432087779045105, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 31140 + }, + { + "epoch": 204.93421052631578, + "grad_norm": 1.3358913660049438, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 31150 + }, + { + "epoch": 205.0, + "grad_norm": 1.435907006263733, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 31160 + }, + { + "epoch": 205.06578947368422, + "grad_norm": 1.0711824893951416, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 31170 + }, + { + "epoch": 205.1315789473684, + "grad_norm": 1.3709452152252197, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 31180 + }, + { + "epoch": 205.19736842105263, + "grad_norm": 1.1924278736114502, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 31190 + }, + { + "epoch": 205.26315789473685, + "grad_norm": 1.3899904489517212, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 31200 + }, + { + "epoch": 205.32894736842104, + "grad_norm": 1.3233460187911987, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 31210 + }, + { + "epoch": 205.39473684210526, + "grad_norm": 1.4931349754333496, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 31220 + }, + { + "epoch": 205.46052631578948, + "grad_norm": 1.203282117843628, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 31230 + }, + { + "epoch": 205.52631578947367, + "grad_norm": 1.3920016288757324, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 31240 + }, + { + "epoch": 205.5921052631579, + "grad_norm": 1.077797770500183, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 31250 + }, + { + "epoch": 205.6578947368421, + "grad_norm": 1.7095680236816406, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 31260 + }, + { + "epoch": 205.72368421052633, + "grad_norm": 0.7705028057098389, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 31270 + }, + { + "epoch": 205.78947368421052, + "grad_norm": 1.1185240745544434, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 31280 + }, + { + "epoch": 205.85526315789474, + "grad_norm": 1.0305986404418945, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 31290 + }, + { + "epoch": 205.92105263157896, + "grad_norm": 1.5906319618225098, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 31300 + }, + { + "epoch": 205.98684210526315, + "grad_norm": 1.2245663404464722, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 31310 + }, + { + "epoch": 206.05263157894737, + "grad_norm": 1.706349492073059, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 31320 + }, + { + "epoch": 206.1184210526316, + "grad_norm": 1.1370997428894043, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 31330 + }, + { + "epoch": 206.18421052631578, + "grad_norm": 0.6681815385818481, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 31340 + }, + { + "epoch": 206.25, + "grad_norm": 1.397417664527893, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 31350 + }, + { + "epoch": 206.31578947368422, + "grad_norm": 1.0024604797363281, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 31360 + }, + { + "epoch": 206.3815789473684, + "grad_norm": 1.329154372215271, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 31370 + }, + { + "epoch": 206.44736842105263, + "grad_norm": 1.4420346021652222, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 31380 + }, + { + "epoch": 206.51315789473685, + "grad_norm": 1.2937798500061035, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 31390 + }, + { + "epoch": 206.57894736842104, + "grad_norm": 1.2045661211013794, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 31400 + }, + { + "epoch": 206.64473684210526, + "grad_norm": 1.0258898735046387, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 31410 + }, + { + "epoch": 206.71052631578948, + "grad_norm": 1.1112213134765625, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 31420 + }, + { + "epoch": 206.77631578947367, + "grad_norm": 1.2808059453964233, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 31430 + }, + { + "epoch": 206.8421052631579, + "grad_norm": 1.4980825185775757, + "learning_rate": 0.0001, + "loss": 0.0195, + "step": 31440 + }, + { + "epoch": 206.9078947368421, + "grad_norm": 1.1103272438049316, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 31450 + }, + { + "epoch": 206.97368421052633, + "grad_norm": 0.9099360704421997, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 31460 + }, + { + "epoch": 207.03947368421052, + "grad_norm": 1.1088165044784546, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 31470 + }, + { + "epoch": 207.10526315789474, + "grad_norm": 1.3863370418548584, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 31480 + }, + { + "epoch": 207.17105263157896, + "grad_norm": 1.522480845451355, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 31490 + }, + { + "epoch": 207.23684210526315, + "grad_norm": 1.1153115034103394, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 31500 + }, + { + "epoch": 207.30263157894737, + "grad_norm": 1.211844563484192, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 31510 + }, + { + "epoch": 207.3684210526316, + "grad_norm": 1.3280556201934814, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 31520 + }, + { + "epoch": 207.43421052631578, + "grad_norm": 1.232966423034668, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 31530 + }, + { + "epoch": 207.5, + "grad_norm": 1.3021212816238403, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 31540 + }, + { + "epoch": 207.56578947368422, + "grad_norm": 1.4837563037872314, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 31550 + }, + { + "epoch": 207.6315789473684, + "grad_norm": 1.122427225112915, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 31560 + }, + { + "epoch": 207.69736842105263, + "grad_norm": 1.358843207359314, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 31570 + }, + { + "epoch": 207.76315789473685, + "grad_norm": 1.1522997617721558, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 31580 + }, + { + "epoch": 207.82894736842104, + "grad_norm": 1.1149085760116577, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 31590 + }, + { + "epoch": 207.89473684210526, + "grad_norm": 1.3184139728546143, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 31600 + }, + { + "epoch": 207.96052631578948, + "grad_norm": 1.1251814365386963, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 31610 + }, + { + "epoch": 208.02631578947367, + "grad_norm": 1.4511667490005493, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 31620 + }, + { + "epoch": 208.0921052631579, + "grad_norm": 1.4698028564453125, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 31630 + }, + { + "epoch": 208.1578947368421, + "grad_norm": 1.1822978258132935, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 31640 + }, + { + "epoch": 208.22368421052633, + "grad_norm": 1.1715060472488403, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 31650 + }, + { + "epoch": 208.28947368421052, + "grad_norm": 1.4276723861694336, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 31660 + }, + { + "epoch": 208.35526315789474, + "grad_norm": 1.4519996643066406, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 31670 + }, + { + "epoch": 208.42105263157896, + "grad_norm": 1.0336174964904785, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 31680 + }, + { + "epoch": 208.48684210526315, + "grad_norm": 1.6115700006484985, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 31690 + }, + { + "epoch": 208.55263157894737, + "grad_norm": 1.6710377931594849, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 31700 + }, + { + "epoch": 208.6184210526316, + "grad_norm": 1.5397769212722778, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 31710 + }, + { + "epoch": 208.68421052631578, + "grad_norm": 1.5007388591766357, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 31720 + }, + { + "epoch": 208.75, + "grad_norm": 1.3674216270446777, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 31730 + }, + { + "epoch": 208.81578947368422, + "grad_norm": 2.207770347595215, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 31740 + }, + { + "epoch": 208.8815789473684, + "grad_norm": 1.5573480129241943, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 31750 + }, + { + "epoch": 208.94736842105263, + "grad_norm": 1.5399261713027954, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 31760 + }, + { + "epoch": 209.01315789473685, + "grad_norm": 1.6872302293777466, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 31770 + }, + { + "epoch": 209.07894736842104, + "grad_norm": 1.3373833894729614, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 31780 + }, + { + "epoch": 209.14473684210526, + "grad_norm": 1.6845489740371704, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 31790 + }, + { + "epoch": 209.21052631578948, + "grad_norm": 1.3818607330322266, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 31800 + }, + { + "epoch": 209.27631578947367, + "grad_norm": 1.3296219110488892, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 31810 + }, + { + "epoch": 209.3421052631579, + "grad_norm": 1.1584928035736084, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 31820 + }, + { + "epoch": 209.4078947368421, + "grad_norm": 1.3986483812332153, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 31830 + }, + { + "epoch": 209.47368421052633, + "grad_norm": 1.7007300853729248, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 31840 + }, + { + "epoch": 209.53947368421052, + "grad_norm": 0.916429877281189, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 31850 + }, + { + "epoch": 209.60526315789474, + "grad_norm": 1.1400072574615479, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 31860 + }, + { + "epoch": 209.67105263157896, + "grad_norm": 1.3716983795166016, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 31870 + }, + { + "epoch": 209.73684210526315, + "grad_norm": 0.9862516522407532, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 31880 + }, + { + "epoch": 209.80263157894737, + "grad_norm": 1.1696069240570068, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 31890 + }, + { + "epoch": 209.8684210526316, + "grad_norm": 1.4392577409744263, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 31900 + }, + { + "epoch": 209.93421052631578, + "grad_norm": 2.1823062896728516, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 31910 + }, + { + "epoch": 210.0, + "grad_norm": 1.6324290037155151, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 31920 + }, + { + "epoch": 210.06578947368422, + "grad_norm": 1.2188938856124878, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 31930 + }, + { + "epoch": 210.1315789473684, + "grad_norm": 1.401517391204834, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 31940 + }, + { + "epoch": 210.19736842105263, + "grad_norm": 1.3221532106399536, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 31950 + }, + { + "epoch": 210.26315789473685, + "grad_norm": 1.5217102766036987, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 31960 + }, + { + "epoch": 210.32894736842104, + "grad_norm": 1.4652286767959595, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 31970 + }, + { + "epoch": 210.39473684210526, + "grad_norm": 1.2757086753845215, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 31980 + }, + { + "epoch": 210.46052631578948, + "grad_norm": 1.227782130241394, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 31990 + }, + { + "epoch": 210.52631578947367, + "grad_norm": 1.3850603103637695, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 32000 + }, + { + "epoch": 210.5921052631579, + "grad_norm": 1.2859461307525635, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 32010 + }, + { + "epoch": 210.6578947368421, + "grad_norm": 1.3917819261550903, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 32020 + }, + { + "epoch": 210.72368421052633, + "grad_norm": 1.4318441152572632, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 32030 + }, + { + "epoch": 210.78947368421052, + "grad_norm": 1.5002471208572388, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 32040 + }, + { + "epoch": 210.85526315789474, + "grad_norm": 1.2311240434646606, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 32050 + }, + { + "epoch": 210.92105263157896, + "grad_norm": 1.293976902961731, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 32060 + }, + { + "epoch": 210.98684210526315, + "grad_norm": 1.1462241411209106, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 32070 + }, + { + "epoch": 211.05263157894737, + "grad_norm": 1.0642205476760864, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 32080 + }, + { + "epoch": 211.1184210526316, + "grad_norm": 1.0781315565109253, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 32090 + }, + { + "epoch": 211.18421052631578, + "grad_norm": 1.2331435680389404, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 32100 + }, + { + "epoch": 211.25, + "grad_norm": 1.129051685333252, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 32110 + }, + { + "epoch": 211.31578947368422, + "grad_norm": 1.301344871520996, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 32120 + }, + { + "epoch": 211.3815789473684, + "grad_norm": 1.6704833507537842, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 32130 + }, + { + "epoch": 211.44736842105263, + "grad_norm": 1.4150813817977905, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 32140 + }, + { + "epoch": 211.51315789473685, + "grad_norm": 1.3065874576568604, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 32150 + }, + { + "epoch": 211.57894736842104, + "grad_norm": 1.0689506530761719, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 32160 + }, + { + "epoch": 211.64473684210526, + "grad_norm": 1.220293641090393, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 32170 + }, + { + "epoch": 211.71052631578948, + "grad_norm": 1.4087119102478027, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 32180 + }, + { + "epoch": 211.77631578947367, + "grad_norm": 1.3689287900924683, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 32190 + }, + { + "epoch": 211.8421052631579, + "grad_norm": 1.4669215679168701, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 32200 + }, + { + "epoch": 211.9078947368421, + "grad_norm": 1.2780801057815552, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 32210 + }, + { + "epoch": 211.97368421052633, + "grad_norm": 1.0613449811935425, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 32220 + }, + { + "epoch": 212.03947368421052, + "grad_norm": 1.206862211227417, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 32230 + }, + { + "epoch": 212.10526315789474, + "grad_norm": 1.3805917501449585, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 32240 + }, + { + "epoch": 212.17105263157896, + "grad_norm": 1.1278748512268066, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 32250 + }, + { + "epoch": 212.23684210526315, + "grad_norm": 1.1782323122024536, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 32260 + }, + { + "epoch": 212.30263157894737, + "grad_norm": 1.0774714946746826, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 32270 + }, + { + "epoch": 212.3684210526316, + "grad_norm": 1.1292855739593506, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 32280 + }, + { + "epoch": 212.43421052631578, + "grad_norm": 1.4095853567123413, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 32290 + }, + { + "epoch": 212.5, + "grad_norm": 1.196883201599121, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 32300 + }, + { + "epoch": 212.56578947368422, + "grad_norm": 1.2091253995895386, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 32310 + }, + { + "epoch": 212.6315789473684, + "grad_norm": 1.3275787830352783, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 32320 + }, + { + "epoch": 212.69736842105263, + "grad_norm": 1.531229019165039, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 32330 + }, + { + "epoch": 212.76315789473685, + "grad_norm": 1.4241570234298706, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 32340 + }, + { + "epoch": 212.82894736842104, + "grad_norm": 0.965205192565918, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 32350 + }, + { + "epoch": 212.89473684210526, + "grad_norm": 1.515239953994751, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 32360 + }, + { + "epoch": 212.96052631578948, + "grad_norm": 1.5352081060409546, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 32370 + }, + { + "epoch": 213.02631578947367, + "grad_norm": 1.2615565061569214, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 32380 + }, + { + "epoch": 213.0921052631579, + "grad_norm": 1.6691360473632812, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 32390 + }, + { + "epoch": 213.1578947368421, + "grad_norm": 1.4845948219299316, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 32400 + }, + { + "epoch": 213.22368421052633, + "grad_norm": 1.1609926223754883, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 32410 + }, + { + "epoch": 213.28947368421052, + "grad_norm": 1.4472464323043823, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 32420 + }, + { + "epoch": 213.35526315789474, + "grad_norm": 1.1727287769317627, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 32430 + }, + { + "epoch": 213.42105263157896, + "grad_norm": 1.0012319087982178, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 32440 + }, + { + "epoch": 213.48684210526315, + "grad_norm": 1.4010659456253052, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 32450 + }, + { + "epoch": 213.55263157894737, + "grad_norm": 1.6938822269439697, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 32460 + }, + { + "epoch": 213.6184210526316, + "grad_norm": 1.7426364421844482, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 32470 + }, + { + "epoch": 213.68421052631578, + "grad_norm": 1.147688627243042, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 32480 + }, + { + "epoch": 213.75, + "grad_norm": 1.278310775756836, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 32490 + }, + { + "epoch": 213.81578947368422, + "grad_norm": 1.183099627494812, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 32500 + }, + { + "epoch": 213.8815789473684, + "grad_norm": 1.738754391670227, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 32510 + }, + { + "epoch": 213.94736842105263, + "grad_norm": 1.2291311025619507, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 32520 + }, + { + "epoch": 214.01315789473685, + "grad_norm": 1.1750850677490234, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 32530 + }, + { + "epoch": 214.07894736842104, + "grad_norm": 1.2301994562149048, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 32540 + }, + { + "epoch": 214.14473684210526, + "grad_norm": 1.2301639318466187, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 32550 + }, + { + "epoch": 214.21052631578948, + "grad_norm": 1.4040155410766602, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 32560 + }, + { + "epoch": 214.27631578947367, + "grad_norm": 1.4484095573425293, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 32570 + }, + { + "epoch": 214.3421052631579, + "grad_norm": 1.0612156391143799, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 32580 + }, + { + "epoch": 214.4078947368421, + "grad_norm": 1.1924538612365723, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 32590 + }, + { + "epoch": 214.47368421052633, + "grad_norm": 1.3676021099090576, + "learning_rate": 0.0001, + "loss": 0.0193, + "step": 32600 + }, + { + "epoch": 214.53947368421052, + "grad_norm": 1.155792474746704, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 32610 + }, + { + "epoch": 214.60526315789474, + "grad_norm": 1.2802067995071411, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 32620 + }, + { + "epoch": 214.67105263157896, + "grad_norm": 1.1130114793777466, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 32630 + }, + { + "epoch": 214.73684210526315, + "grad_norm": 1.3467772006988525, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 32640 + }, + { + "epoch": 214.80263157894737, + "grad_norm": 1.1575422286987305, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 32650 + }, + { + "epoch": 214.8684210526316, + "grad_norm": 1.0883058309555054, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 32660 + }, + { + "epoch": 214.93421052631578, + "grad_norm": 1.3685483932495117, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 32670 + }, + { + "epoch": 215.0, + "grad_norm": 1.452791690826416, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 32680 + }, + { + "epoch": 215.06578947368422, + "grad_norm": 1.2236331701278687, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 32690 + }, + { + "epoch": 215.1315789473684, + "grad_norm": 1.1579644680023193, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 32700 + }, + { + "epoch": 215.19736842105263, + "grad_norm": 1.2398250102996826, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 32710 + }, + { + "epoch": 215.26315789473685, + "grad_norm": 0.9351480603218079, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 32720 + }, + { + "epoch": 215.32894736842104, + "grad_norm": 0.8759018778800964, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 32730 + }, + { + "epoch": 215.39473684210526, + "grad_norm": 1.077101230621338, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 32740 + }, + { + "epoch": 215.46052631578948, + "grad_norm": 0.8699200749397278, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 32750 + }, + { + "epoch": 215.52631578947367, + "grad_norm": 1.2081657648086548, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 32760 + }, + { + "epoch": 215.5921052631579, + "grad_norm": 1.218841791152954, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 32770 + }, + { + "epoch": 215.6578947368421, + "grad_norm": 1.0985344648361206, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 32780 + }, + { + "epoch": 215.72368421052633, + "grad_norm": 1.242873191833496, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 32790 + }, + { + "epoch": 215.78947368421052, + "grad_norm": 0.956916868686676, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 32800 + }, + { + "epoch": 215.85526315789474, + "grad_norm": 0.9822515249252319, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 32810 + }, + { + "epoch": 215.92105263157896, + "grad_norm": 0.8483900427818298, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 32820 + }, + { + "epoch": 215.98684210526315, + "grad_norm": 1.0709314346313477, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 32830 + }, + { + "epoch": 216.05263157894737, + "grad_norm": 1.0873550176620483, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 32840 + }, + { + "epoch": 216.1184210526316, + "grad_norm": 1.20504891872406, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 32850 + }, + { + "epoch": 216.18421052631578, + "grad_norm": 1.1691912412643433, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 32860 + }, + { + "epoch": 216.25, + "grad_norm": 1.352332592010498, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 32870 + }, + { + "epoch": 216.31578947368422, + "grad_norm": 0.8410949110984802, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 32880 + }, + { + "epoch": 216.3815789473684, + "grad_norm": 1.0081835985183716, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 32890 + }, + { + "epoch": 216.44736842105263, + "grad_norm": 0.9765644073486328, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 32900 + }, + { + "epoch": 216.51315789473685, + "grad_norm": 1.4321998357772827, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 32910 + }, + { + "epoch": 216.57894736842104, + "grad_norm": 1.1078122854232788, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 32920 + }, + { + "epoch": 216.64473684210526, + "grad_norm": 1.004533290863037, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 32930 + }, + { + "epoch": 216.71052631578948, + "grad_norm": 1.0383466482162476, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 32940 + }, + { + "epoch": 216.77631578947367, + "grad_norm": 1.178542137145996, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 32950 + }, + { + "epoch": 216.8421052631579, + "grad_norm": 1.2944022417068481, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 32960 + }, + { + "epoch": 216.9078947368421, + "grad_norm": 1.0292662382125854, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 32970 + }, + { + "epoch": 216.97368421052633, + "grad_norm": 1.4745246171951294, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 32980 + }, + { + "epoch": 217.03947368421052, + "grad_norm": 1.0737231969833374, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 32990 + }, + { + "epoch": 217.10526315789474, + "grad_norm": 1.2109375, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 33000 + }, + { + "epoch": 217.17105263157896, + "grad_norm": 1.142307162284851, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 33010 + }, + { + "epoch": 217.23684210526315, + "grad_norm": 0.9450036883354187, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 33020 + }, + { + "epoch": 217.30263157894737, + "grad_norm": 0.9609930515289307, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 33030 + }, + { + "epoch": 217.3684210526316, + "grad_norm": 0.949567437171936, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 33040 + }, + { + "epoch": 217.43421052631578, + "grad_norm": 1.1803269386291504, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 33050 + }, + { + "epoch": 217.5, + "grad_norm": 1.2131317853927612, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 33060 + }, + { + "epoch": 217.56578947368422, + "grad_norm": 1.3640658855438232, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 33070 + }, + { + "epoch": 217.6315789473684, + "grad_norm": 1.1110533475875854, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 33080 + }, + { + "epoch": 217.69736842105263, + "grad_norm": 1.4728652238845825, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 33090 + }, + { + "epoch": 217.76315789473685, + "grad_norm": 1.1803345680236816, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 33100 + }, + { + "epoch": 217.82894736842104, + "grad_norm": 1.0455313920974731, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 33110 + }, + { + "epoch": 217.89473684210526, + "grad_norm": 1.290387749671936, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 33120 + }, + { + "epoch": 217.96052631578948, + "grad_norm": 1.0073187351226807, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 33130 + }, + { + "epoch": 218.02631578947367, + "grad_norm": 1.0683584213256836, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 33140 + }, + { + "epoch": 218.0921052631579, + "grad_norm": 1.0977116823196411, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 33150 + }, + { + "epoch": 218.1578947368421, + "grad_norm": 1.2289358377456665, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 33160 + }, + { + "epoch": 218.22368421052633, + "grad_norm": 1.4504176378250122, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 33170 + }, + { + "epoch": 218.28947368421052, + "grad_norm": 1.3620048761367798, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 33180 + }, + { + "epoch": 218.35526315789474, + "grad_norm": 1.0863347053527832, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 33190 + }, + { + "epoch": 218.42105263157896, + "grad_norm": 1.5031284093856812, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 33200 + }, + { + "epoch": 218.48684210526315, + "grad_norm": 1.6940463781356812, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 33210 + }, + { + "epoch": 218.55263157894737, + "grad_norm": 1.5782625675201416, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 33220 + }, + { + "epoch": 218.6184210526316, + "grad_norm": 1.5026166439056396, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 33230 + }, + { + "epoch": 218.68421052631578, + "grad_norm": 0.9451717734336853, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 33240 + }, + { + "epoch": 218.75, + "grad_norm": 1.007091999053955, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 33250 + }, + { + "epoch": 218.81578947368422, + "grad_norm": 1.0861047506332397, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 33260 + }, + { + "epoch": 218.8815789473684, + "grad_norm": 1.3030095100402832, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 33270 + }, + { + "epoch": 218.94736842105263, + "grad_norm": 1.0647660493850708, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 33280 + }, + { + "epoch": 219.01315789473685, + "grad_norm": 1.160729169845581, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 33290 + }, + { + "epoch": 219.07894736842104, + "grad_norm": 0.992527425289154, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 33300 + }, + { + "epoch": 219.14473684210526, + "grad_norm": 1.6091036796569824, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 33310 + }, + { + "epoch": 219.21052631578948, + "grad_norm": 1.16289222240448, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 33320 + }, + { + "epoch": 219.27631578947367, + "grad_norm": 0.95487380027771, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 33330 + }, + { + "epoch": 219.3421052631579, + "grad_norm": 0.9959771633148193, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 33340 + }, + { + "epoch": 219.4078947368421, + "grad_norm": 1.1076931953430176, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 33350 + }, + { + "epoch": 219.47368421052633, + "grad_norm": 1.2252377271652222, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 33360 + }, + { + "epoch": 219.53947368421052, + "grad_norm": 1.2380799055099487, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 33370 + }, + { + "epoch": 219.60526315789474, + "grad_norm": 1.33864164352417, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 33380 + }, + { + "epoch": 219.67105263157896, + "grad_norm": 1.1309316158294678, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 33390 + }, + { + "epoch": 219.73684210526315, + "grad_norm": 1.1230326890945435, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 33400 + }, + { + "epoch": 219.80263157894737, + "grad_norm": 1.306256890296936, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 33410 + }, + { + "epoch": 219.8684210526316, + "grad_norm": 1.3912127017974854, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 33420 + }, + { + "epoch": 219.93421052631578, + "grad_norm": 1.529556155204773, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 33430 + }, + { + "epoch": 220.0, + "grad_norm": 1.9190083742141724, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 33440 + }, + { + "epoch": 220.06578947368422, + "grad_norm": 1.1662191152572632, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 33450 + }, + { + "epoch": 220.1315789473684, + "grad_norm": 1.6552345752716064, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 33460 + }, + { + "epoch": 220.19736842105263, + "grad_norm": 1.3389320373535156, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 33470 + }, + { + "epoch": 220.26315789473685, + "grad_norm": 1.5310899019241333, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 33480 + }, + { + "epoch": 220.32894736842104, + "grad_norm": 1.621925950050354, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 33490 + }, + { + "epoch": 220.39473684210526, + "grad_norm": 1.3250995874404907, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 33500 + }, + { + "epoch": 220.46052631578948, + "grad_norm": 1.3257274627685547, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 33510 + }, + { + "epoch": 220.52631578947367, + "grad_norm": 1.333977460861206, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 33520 + }, + { + "epoch": 220.5921052631579, + "grad_norm": 1.4061793088912964, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 33530 + }, + { + "epoch": 220.6578947368421, + "grad_norm": 1.223670244216919, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 33540 + }, + { + "epoch": 220.72368421052633, + "grad_norm": 1.1718958616256714, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 33550 + }, + { + "epoch": 220.78947368421052, + "grad_norm": 1.171622395515442, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 33560 + }, + { + "epoch": 220.85526315789474, + "grad_norm": 1.121756911277771, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 33570 + }, + { + "epoch": 220.92105263157896, + "grad_norm": 1.4940133094787598, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 33580 + }, + { + "epoch": 220.98684210526315, + "grad_norm": 0.976639449596405, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 33590 + }, + { + "epoch": 221.05263157894737, + "grad_norm": 1.2112866640090942, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 33600 + }, + { + "epoch": 221.1184210526316, + "grad_norm": 1.3244744539260864, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 33610 + }, + { + "epoch": 221.18421052631578, + "grad_norm": 1.2948193550109863, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 33620 + }, + { + "epoch": 221.25, + "grad_norm": 1.39680016040802, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 33630 + }, + { + "epoch": 221.31578947368422, + "grad_norm": 1.0296803712844849, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 33640 + }, + { + "epoch": 221.3815789473684, + "grad_norm": 1.1246095895767212, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 33650 + }, + { + "epoch": 221.44736842105263, + "grad_norm": 1.0090177059173584, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 33660 + }, + { + "epoch": 221.51315789473685, + "grad_norm": 1.1224722862243652, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 33670 + }, + { + "epoch": 221.57894736842104, + "grad_norm": 1.6427732706069946, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 33680 + }, + { + "epoch": 221.64473684210526, + "grad_norm": 1.3435227870941162, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 33690 + }, + { + "epoch": 221.71052631578948, + "grad_norm": 1.7769924402236938, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 33700 + }, + { + "epoch": 221.77631578947367, + "grad_norm": 1.2814626693725586, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 33710 + }, + { + "epoch": 221.8421052631579, + "grad_norm": 1.3737503290176392, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 33720 + }, + { + "epoch": 221.9078947368421, + "grad_norm": 1.2347252368927002, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 33730 + }, + { + "epoch": 221.97368421052633, + "grad_norm": 1.3277086019515991, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 33740 + }, + { + "epoch": 222.03947368421052, + "grad_norm": 1.7966270446777344, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 33750 + }, + { + "epoch": 222.10526315789474, + "grad_norm": 1.3284343481063843, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 33760 + }, + { + "epoch": 222.17105263157896, + "grad_norm": 1.6279863119125366, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 33770 + }, + { + "epoch": 222.23684210526315, + "grad_norm": 1.5999658107757568, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 33780 + }, + { + "epoch": 222.30263157894737, + "grad_norm": 1.5054606199264526, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 33790 + }, + { + "epoch": 222.3684210526316, + "grad_norm": 1.4686840772628784, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 33800 + }, + { + "epoch": 222.43421052631578, + "grad_norm": 1.1565569639205933, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 33810 + }, + { + "epoch": 222.5, + "grad_norm": 1.396141529083252, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 33820 + }, + { + "epoch": 222.56578947368422, + "grad_norm": 1.1285852193832397, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 33830 + }, + { + "epoch": 222.6315789473684, + "grad_norm": 1.093670129776001, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 33840 + }, + { + "epoch": 222.69736842105263, + "grad_norm": 1.443837285041809, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 33850 + }, + { + "epoch": 222.76315789473685, + "grad_norm": 1.3673397302627563, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 33860 + }, + { + "epoch": 222.82894736842104, + "grad_norm": 1.2065106630325317, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 33870 + }, + { + "epoch": 222.89473684210526, + "grad_norm": 1.3784736394882202, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 33880 + }, + { + "epoch": 222.96052631578948, + "grad_norm": 1.6203807592391968, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 33890 + }, + { + "epoch": 223.02631578947367, + "grad_norm": 1.314103126525879, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 33900 + }, + { + "epoch": 223.0921052631579, + "grad_norm": 1.6348367929458618, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 33910 + }, + { + "epoch": 223.1578947368421, + "grad_norm": 1.20957350730896, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 33920 + }, + { + "epoch": 223.22368421052633, + "grad_norm": 1.1228580474853516, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 33930 + }, + { + "epoch": 223.28947368421052, + "grad_norm": 1.4203588962554932, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 33940 + }, + { + "epoch": 223.35526315789474, + "grad_norm": 1.2360961437225342, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 33950 + }, + { + "epoch": 223.42105263157896, + "grad_norm": 0.9158451557159424, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 33960 + }, + { + "epoch": 223.48684210526315, + "grad_norm": 1.6221871376037598, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 33970 + }, + { + "epoch": 223.55263157894737, + "grad_norm": 1.559161901473999, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 33980 + }, + { + "epoch": 223.6184210526316, + "grad_norm": 1.876190423965454, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 33990 + }, + { + "epoch": 223.68421052631578, + "grad_norm": 1.5894197225570679, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 34000 + }, + { + "epoch": 223.75, + "grad_norm": 1.2699931859970093, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 34010 + }, + { + "epoch": 223.81578947368422, + "grad_norm": 1.1223138570785522, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 34020 + }, + { + "epoch": 223.8815789473684, + "grad_norm": 1.2991082668304443, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 34030 + }, + { + "epoch": 223.94736842105263, + "grad_norm": 1.1831358671188354, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 34040 + }, + { + "epoch": 224.01315789473685, + "grad_norm": 0.8384124636650085, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 34050 + }, + { + "epoch": 224.07894736842104, + "grad_norm": 1.0878987312316895, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 34060 + }, + { + "epoch": 224.14473684210526, + "grad_norm": 1.440589189529419, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 34070 + }, + { + "epoch": 224.21052631578948, + "grad_norm": 1.6876184940338135, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 34080 + }, + { + "epoch": 224.27631578947367, + "grad_norm": 1.424138069152832, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 34090 + }, + { + "epoch": 224.3421052631579, + "grad_norm": 1.2443312406539917, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 34100 + }, + { + "epoch": 224.4078947368421, + "grad_norm": 1.3643524646759033, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 34110 + }, + { + "epoch": 224.47368421052633, + "grad_norm": 1.1311956644058228, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 34120 + }, + { + "epoch": 224.53947368421052, + "grad_norm": 1.0207782983779907, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 34130 + }, + { + "epoch": 224.60526315789474, + "grad_norm": 1.3237141370773315, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 34140 + }, + { + "epoch": 224.67105263157896, + "grad_norm": 1.5647715330123901, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 34150 + }, + { + "epoch": 224.73684210526315, + "grad_norm": 1.4168190956115723, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 34160 + }, + { + "epoch": 224.80263157894737, + "grad_norm": 1.3588078022003174, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 34170 + }, + { + "epoch": 224.8684210526316, + "grad_norm": 1.0193902254104614, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 34180 + }, + { + "epoch": 224.93421052631578, + "grad_norm": 1.3121767044067383, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 34190 + }, + { + "epoch": 225.0, + "grad_norm": 1.4055311679840088, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 34200 + }, + { + "epoch": 225.06578947368422, + "grad_norm": 1.3310362100601196, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 34210 + }, + { + "epoch": 225.1315789473684, + "grad_norm": 1.2854124307632446, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 34220 + }, + { + "epoch": 225.19736842105263, + "grad_norm": 1.1616575717926025, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 34230 + }, + { + "epoch": 225.26315789473685, + "grad_norm": 1.6716482639312744, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 34240 + }, + { + "epoch": 225.32894736842104, + "grad_norm": 1.0361127853393555, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 34250 + }, + { + "epoch": 225.39473684210526, + "grad_norm": 1.1146401166915894, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 34260 + }, + { + "epoch": 225.46052631578948, + "grad_norm": 1.2672135829925537, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 34270 + }, + { + "epoch": 225.52631578947367, + "grad_norm": 1.557548999786377, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 34280 + }, + { + "epoch": 225.5921052631579, + "grad_norm": 1.0384529829025269, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 34290 + }, + { + "epoch": 225.6578947368421, + "grad_norm": 1.3350151777267456, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 34300 + }, + { + "epoch": 225.72368421052633, + "grad_norm": 1.3474798202514648, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 34310 + }, + { + "epoch": 225.78947368421052, + "grad_norm": 0.9546133875846863, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 34320 + }, + { + "epoch": 225.85526315789474, + "grad_norm": 1.325697660446167, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 34330 + }, + { + "epoch": 225.92105263157896, + "grad_norm": 1.2987803220748901, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 34340 + }, + { + "epoch": 225.98684210526315, + "grad_norm": 1.4725877046585083, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 34350 + }, + { + "epoch": 226.05263157894737, + "grad_norm": 1.2885562181472778, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 34360 + }, + { + "epoch": 226.1184210526316, + "grad_norm": 1.1747262477874756, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 34370 + }, + { + "epoch": 226.18421052631578, + "grad_norm": 1.6619282960891724, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 34380 + }, + { + "epoch": 226.25, + "grad_norm": 1.142620325088501, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 34390 + }, + { + "epoch": 226.31578947368422, + "grad_norm": 1.11537766456604, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 34400 + }, + { + "epoch": 226.3815789473684, + "grad_norm": 0.9072920083999634, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 34410 + }, + { + "epoch": 226.44736842105263, + "grad_norm": 0.9555834531784058, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 34420 + }, + { + "epoch": 226.51315789473685, + "grad_norm": 1.3261579275131226, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 34430 + }, + { + "epoch": 226.57894736842104, + "grad_norm": 1.6103168725967407, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 34440 + }, + { + "epoch": 226.64473684210526, + "grad_norm": 1.1931551694869995, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 34450 + }, + { + "epoch": 226.71052631578948, + "grad_norm": 1.633232593536377, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 34460 + }, + { + "epoch": 226.77631578947367, + "grad_norm": 1.4821163415908813, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 34470 + }, + { + "epoch": 226.8421052631579, + "grad_norm": 0.9790869951248169, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 34480 + }, + { + "epoch": 226.9078947368421, + "grad_norm": 0.8639764785766602, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 34490 + }, + { + "epoch": 226.97368421052633, + "grad_norm": 1.0887365341186523, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 34500 + }, + { + "epoch": 227.03947368421052, + "grad_norm": 0.8437522649765015, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 34510 + }, + { + "epoch": 227.10526315789474, + "grad_norm": 1.2919409275054932, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 34520 + }, + { + "epoch": 227.17105263157896, + "grad_norm": 1.2690752744674683, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 34530 + }, + { + "epoch": 227.23684210526315, + "grad_norm": 1.1717513799667358, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 34540 + }, + { + "epoch": 227.30263157894737, + "grad_norm": 1.1742141246795654, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 34550 + }, + { + "epoch": 227.3684210526316, + "grad_norm": 1.2285854816436768, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 34560 + }, + { + "epoch": 227.43421052631578, + "grad_norm": 1.6287882328033447, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 34570 + }, + { + "epoch": 227.5, + "grad_norm": 1.4671905040740967, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 34580 + }, + { + "epoch": 227.56578947368422, + "grad_norm": 1.2717926502227783, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 34590 + }, + { + "epoch": 227.6315789473684, + "grad_norm": 1.2138713598251343, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 34600 + }, + { + "epoch": 227.69736842105263, + "grad_norm": 1.219657301902771, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 34610 + }, + { + "epoch": 227.76315789473685, + "grad_norm": 1.2700283527374268, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 34620 + }, + { + "epoch": 227.82894736842104, + "grad_norm": 1.2568398714065552, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 34630 + }, + { + "epoch": 227.89473684210526, + "grad_norm": 1.308393120765686, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 34640 + }, + { + "epoch": 227.96052631578948, + "grad_norm": 1.6060470342636108, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 34650 + }, + { + "epoch": 228.02631578947367, + "grad_norm": 1.4891085624694824, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 34660 + }, + { + "epoch": 228.0921052631579, + "grad_norm": 1.2525702714920044, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 34670 + }, + { + "epoch": 228.1578947368421, + "grad_norm": 1.2235273122787476, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 34680 + }, + { + "epoch": 228.22368421052633, + "grad_norm": 1.2164841890335083, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 34690 + }, + { + "epoch": 228.28947368421052, + "grad_norm": 1.487396001815796, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 34700 + }, + { + "epoch": 228.35526315789474, + "grad_norm": 1.3770133256912231, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 34710 + }, + { + "epoch": 228.42105263157896, + "grad_norm": 1.5073363780975342, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 34720 + }, + { + "epoch": 228.48684210526315, + "grad_norm": 1.1958956718444824, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 34730 + }, + { + "epoch": 228.55263157894737, + "grad_norm": 1.1365365982055664, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 34740 + }, + { + "epoch": 228.6184210526316, + "grad_norm": 0.9599331021308899, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 34750 + }, + { + "epoch": 228.68421052631578, + "grad_norm": 0.7286871671676636, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 34760 + }, + { + "epoch": 228.75, + "grad_norm": 0.8963000774383545, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 34770 + }, + { + "epoch": 228.81578947368422, + "grad_norm": 1.069594144821167, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 34780 + }, + { + "epoch": 228.8815789473684, + "grad_norm": 1.107754111289978, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 34790 + }, + { + "epoch": 228.94736842105263, + "grad_norm": 0.8711170554161072, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 34800 + }, + { + "epoch": 229.01315789473685, + "grad_norm": 1.2165173292160034, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 34810 + }, + { + "epoch": 229.07894736842104, + "grad_norm": 1.3709280490875244, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 34820 + }, + { + "epoch": 229.14473684210526, + "grad_norm": 0.9676075577735901, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 34830 + }, + { + "epoch": 229.21052631578948, + "grad_norm": 1.192239761352539, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 34840 + }, + { + "epoch": 229.27631578947367, + "grad_norm": 1.1368480920791626, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 34850 + }, + { + "epoch": 229.3421052631579, + "grad_norm": 1.0366135835647583, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 34860 + }, + { + "epoch": 229.4078947368421, + "grad_norm": 1.52436101436615, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 34870 + }, + { + "epoch": 229.47368421052633, + "grad_norm": 1.2649922370910645, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 34880 + }, + { + "epoch": 229.53947368421052, + "grad_norm": 1.4526771306991577, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 34890 + }, + { + "epoch": 229.60526315789474, + "grad_norm": 1.317568063735962, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 34900 + }, + { + "epoch": 229.67105263157896, + "grad_norm": 1.5620092153549194, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 34910 + }, + { + "epoch": 229.73684210526315, + "grad_norm": 1.2714239358901978, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 34920 + }, + { + "epoch": 229.80263157894737, + "grad_norm": 1.5597683191299438, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 34930 + }, + { + "epoch": 229.8684210526316, + "grad_norm": 1.3051207065582275, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 34940 + }, + { + "epoch": 229.93421052631578, + "grad_norm": 1.688822865486145, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 34950 + }, + { + "epoch": 230.0, + "grad_norm": 1.4721719026565552, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 34960 + }, + { + "epoch": 230.06578947368422, + "grad_norm": 1.6833611726760864, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 34970 + }, + { + "epoch": 230.1315789473684, + "grad_norm": 1.749559998512268, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 34980 + }, + { + "epoch": 230.19736842105263, + "grad_norm": 1.0270739793777466, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 34990 + }, + { + "epoch": 230.26315789473685, + "grad_norm": 1.5380287170410156, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 35000 + }, + { + "epoch": 230.32894736842104, + "grad_norm": 1.2859152555465698, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 35010 + }, + { + "epoch": 230.39473684210526, + "grad_norm": 1.2448034286499023, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 35020 + }, + { + "epoch": 230.46052631578948, + "grad_norm": 1.1096408367156982, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 35030 + }, + { + "epoch": 230.52631578947367, + "grad_norm": 1.2876746654510498, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 35040 + }, + { + "epoch": 230.5921052631579, + "grad_norm": 1.4161434173583984, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 35050 + }, + { + "epoch": 230.6578947368421, + "grad_norm": 1.1465400457382202, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 35060 + }, + { + "epoch": 230.72368421052633, + "grad_norm": 1.4166468381881714, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 35070 + }, + { + "epoch": 230.78947368421052, + "grad_norm": 1.3646936416625977, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 35080 + }, + { + "epoch": 230.85526315789474, + "grad_norm": 1.3088558912277222, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 35090 + }, + { + "epoch": 230.92105263157896, + "grad_norm": 1.1793044805526733, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 35100 + }, + { + "epoch": 230.98684210526315, + "grad_norm": 1.1064832210540771, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 35110 + }, + { + "epoch": 231.05263157894737, + "grad_norm": 1.2353724241256714, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 35120 + }, + { + "epoch": 231.1184210526316, + "grad_norm": 1.4452154636383057, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 35130 + }, + { + "epoch": 231.18421052631578, + "grad_norm": 1.1226993799209595, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 35140 + }, + { + "epoch": 231.25, + "grad_norm": 1.0637484788894653, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 35150 + }, + { + "epoch": 231.31578947368422, + "grad_norm": 0.9938138723373413, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 35160 + }, + { + "epoch": 231.3815789473684, + "grad_norm": 1.5435380935668945, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 35170 + }, + { + "epoch": 231.44736842105263, + "grad_norm": 1.042462944984436, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 35180 + }, + { + "epoch": 231.51315789473685, + "grad_norm": 1.0510119199752808, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 35190 + }, + { + "epoch": 231.57894736842104, + "grad_norm": 1.3831288814544678, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 35200 + }, + { + "epoch": 231.64473684210526, + "grad_norm": 1.462022066116333, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 35210 + }, + { + "epoch": 231.71052631578948, + "grad_norm": 1.0927327871322632, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 35220 + }, + { + "epoch": 231.77631578947367, + "grad_norm": 1.1333094835281372, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 35230 + }, + { + "epoch": 231.8421052631579, + "grad_norm": 1.2732223272323608, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 35240 + }, + { + "epoch": 231.9078947368421, + "grad_norm": 1.2944512367248535, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 35250 + }, + { + "epoch": 231.97368421052633, + "grad_norm": 1.2028173208236694, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 35260 + }, + { + "epoch": 232.03947368421052, + "grad_norm": 1.0721412897109985, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 35270 + }, + { + "epoch": 232.10526315789474, + "grad_norm": 1.3471068143844604, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 35280 + }, + { + "epoch": 232.17105263157896, + "grad_norm": 1.085255742073059, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 35290 + }, + { + "epoch": 232.23684210526315, + "grad_norm": 1.335943579673767, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 35300 + }, + { + "epoch": 232.30263157894737, + "grad_norm": 1.0885100364685059, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 35310 + }, + { + "epoch": 232.3684210526316, + "grad_norm": 1.2813364267349243, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 35320 + }, + { + "epoch": 232.43421052631578, + "grad_norm": 1.1324126720428467, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 35330 + }, + { + "epoch": 232.5, + "grad_norm": 1.5118268728256226, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 35340 + }, + { + "epoch": 232.56578947368422, + "grad_norm": 1.3967148065567017, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 35350 + }, + { + "epoch": 232.6315789473684, + "grad_norm": 1.3626654148101807, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 35360 + }, + { + "epoch": 232.69736842105263, + "grad_norm": 0.9588110446929932, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 35370 + }, + { + "epoch": 232.76315789473685, + "grad_norm": 1.7593961954116821, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 35380 + }, + { + "epoch": 232.82894736842104, + "grad_norm": 1.4406646490097046, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 35390 + }, + { + "epoch": 232.89473684210526, + "grad_norm": 1.0357986688613892, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 35400 + }, + { + "epoch": 232.96052631578948, + "grad_norm": 1.051256775856018, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 35410 + }, + { + "epoch": 233.02631578947367, + "grad_norm": 1.338333249092102, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 35420 + }, + { + "epoch": 233.0921052631579, + "grad_norm": 1.0235182046890259, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 35430 + }, + { + "epoch": 233.1578947368421, + "grad_norm": 1.265726923942566, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 35440 + }, + { + "epoch": 233.22368421052633, + "grad_norm": 1.3721272945404053, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 35450 + }, + { + "epoch": 233.28947368421052, + "grad_norm": 1.0401647090911865, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 35460 + }, + { + "epoch": 233.35526315789474, + "grad_norm": 1.3844105005264282, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 35470 + }, + { + "epoch": 233.42105263157896, + "grad_norm": 1.977495551109314, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 35480 + }, + { + "epoch": 233.48684210526315, + "grad_norm": 1.13108229637146, + "learning_rate": 0.0001, + "loss": 0.0191, + "step": 35490 + }, + { + "epoch": 233.55263157894737, + "grad_norm": 1.4336754083633423, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 35500 + }, + { + "epoch": 233.6184210526316, + "grad_norm": 1.2272390127182007, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 35510 + }, + { + "epoch": 233.68421052631578, + "grad_norm": 0.978657066822052, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 35520 + }, + { + "epoch": 233.75, + "grad_norm": 1.2305833101272583, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 35530 + }, + { + "epoch": 233.81578947368422, + "grad_norm": 1.1648582220077515, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 35540 + }, + { + "epoch": 233.8815789473684, + "grad_norm": 1.3043948411941528, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 35550 + }, + { + "epoch": 233.94736842105263, + "grad_norm": 1.4503754377365112, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 35560 + }, + { + "epoch": 234.01315789473685, + "grad_norm": 1.3145157098770142, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 35570 + }, + { + "epoch": 234.07894736842104, + "grad_norm": 0.9834286570549011, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 35580 + }, + { + "epoch": 234.14473684210526, + "grad_norm": 1.5456626415252686, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 35590 + }, + { + "epoch": 234.21052631578948, + "grad_norm": 1.1707838773727417, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 35600 + }, + { + "epoch": 234.27631578947367, + "grad_norm": 1.4479657411575317, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 35610 + }, + { + "epoch": 234.3421052631579, + "grad_norm": 1.352433204650879, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 35620 + }, + { + "epoch": 234.4078947368421, + "grad_norm": 1.1182633638381958, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 35630 + }, + { + "epoch": 234.47368421052633, + "grad_norm": 0.9117094278335571, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 35640 + }, + { + "epoch": 234.53947368421052, + "grad_norm": 1.345800757408142, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 35650 + }, + { + "epoch": 234.60526315789474, + "grad_norm": 1.145963430404663, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 35660 + }, + { + "epoch": 234.67105263157896, + "grad_norm": 0.9094061851501465, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 35670 + }, + { + "epoch": 234.73684210526315, + "grad_norm": 1.2878849506378174, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 35680 + }, + { + "epoch": 234.80263157894737, + "grad_norm": 1.4541486501693726, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 35690 + }, + { + "epoch": 234.8684210526316, + "grad_norm": 0.971204400062561, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 35700 + }, + { + "epoch": 234.93421052631578, + "grad_norm": 1.1860767602920532, + "learning_rate": 0.0001, + "loss": 0.0184, + "step": 35710 + }, + { + "epoch": 235.0, + "grad_norm": 1.2985316514968872, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 35720 + }, + { + "epoch": 235.06578947368422, + "grad_norm": 1.0510177612304688, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 35730 + }, + { + "epoch": 235.1315789473684, + "grad_norm": 0.9717345833778381, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 35740 + }, + { + "epoch": 235.19736842105263, + "grad_norm": 1.131279468536377, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 35750 + }, + { + "epoch": 235.26315789473685, + "grad_norm": 1.148485541343689, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 35760 + }, + { + "epoch": 235.32894736842104, + "grad_norm": 0.8427010774612427, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 35770 + }, + { + "epoch": 235.39473684210526, + "grad_norm": 0.973970890045166, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 35780 + }, + { + "epoch": 235.46052631578948, + "grad_norm": 0.9726606607437134, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 35790 + }, + { + "epoch": 235.52631578947367, + "grad_norm": 0.849587082862854, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 35800 + }, + { + "epoch": 235.5921052631579, + "grad_norm": 1.053238868713379, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 35810 + }, + { + "epoch": 235.6578947368421, + "grad_norm": 1.2742220163345337, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 35820 + }, + { + "epoch": 235.72368421052633, + "grad_norm": 1.3794277906417847, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 35830 + }, + { + "epoch": 235.78947368421052, + "grad_norm": 1.2692276239395142, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 35840 + }, + { + "epoch": 235.85526315789474, + "grad_norm": 1.1622906923294067, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 35850 + }, + { + "epoch": 235.92105263157896, + "grad_norm": 1.1229708194732666, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 35860 + }, + { + "epoch": 235.98684210526315, + "grad_norm": 1.1721322536468506, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 35870 + }, + { + "epoch": 236.05263157894737, + "grad_norm": 1.2149195671081543, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 35880 + }, + { + "epoch": 236.1184210526316, + "grad_norm": 1.552794337272644, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 35890 + }, + { + "epoch": 236.18421052631578, + "grad_norm": 0.9946601986885071, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 35900 + }, + { + "epoch": 236.25, + "grad_norm": 1.1157256364822388, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 35910 + }, + { + "epoch": 236.31578947368422, + "grad_norm": 1.2444145679473877, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 35920 + }, + { + "epoch": 236.3815789473684, + "grad_norm": 1.2110340595245361, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 35930 + }, + { + "epoch": 236.44736842105263, + "grad_norm": 1.4335511922836304, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 35940 + }, + { + "epoch": 236.51315789473685, + "grad_norm": 1.3083791732788086, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 35950 + }, + { + "epoch": 236.57894736842104, + "grad_norm": 0.8935396075248718, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 35960 + }, + { + "epoch": 236.64473684210526, + "grad_norm": 0.9433857798576355, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 35970 + }, + { + "epoch": 236.71052631578948, + "grad_norm": 1.1531612873077393, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 35980 + }, + { + "epoch": 236.77631578947367, + "grad_norm": 1.431472659111023, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 35990 + }, + { + "epoch": 236.8421052631579, + "grad_norm": 1.6425496339797974, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 36000 + }, + { + "epoch": 236.9078947368421, + "grad_norm": 1.0566476583480835, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 36010 + }, + { + "epoch": 236.97368421052633, + "grad_norm": 0.8310641050338745, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 36020 + }, + { + "epoch": 237.03947368421052, + "grad_norm": 1.0086852312088013, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 36030 + }, + { + "epoch": 237.10526315789474, + "grad_norm": 1.44770085811615, + "learning_rate": 0.0001, + "loss": 0.0196, + "step": 36040 + }, + { + "epoch": 237.17105263157896, + "grad_norm": 1.002258539199829, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 36050 + }, + { + "epoch": 237.23684210526315, + "grad_norm": 1.0949816703796387, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 36060 + }, + { + "epoch": 237.30263157894737, + "grad_norm": 0.9487596750259399, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 36070 + }, + { + "epoch": 237.3684210526316, + "grad_norm": 0.8451818227767944, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 36080 + }, + { + "epoch": 237.43421052631578, + "grad_norm": 1.455590844154358, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 36090 + }, + { + "epoch": 237.5, + "grad_norm": 1.2920894622802734, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 36100 + }, + { + "epoch": 237.56578947368422, + "grad_norm": 1.3782984018325806, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 36110 + }, + { + "epoch": 237.6315789473684, + "grad_norm": 1.0426079034805298, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 36120 + }, + { + "epoch": 237.69736842105263, + "grad_norm": 1.128311276435852, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 36130 + }, + { + "epoch": 237.76315789473685, + "grad_norm": 1.0771710872650146, + "learning_rate": 0.0001, + "loss": 0.0183, + "step": 36140 + }, + { + "epoch": 237.82894736842104, + "grad_norm": 0.8696498274803162, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 36150 + }, + { + "epoch": 237.89473684210526, + "grad_norm": 1.1533684730529785, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 36160 + }, + { + "epoch": 237.96052631578948, + "grad_norm": 1.2097066640853882, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 36170 + }, + { + "epoch": 238.02631578947367, + "grad_norm": 1.1316962242126465, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 36180 + }, + { + "epoch": 238.0921052631579, + "grad_norm": 1.1402578353881836, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 36190 + }, + { + "epoch": 238.1578947368421, + "grad_norm": 1.0534335374832153, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 36200 + }, + { + "epoch": 238.22368421052633, + "grad_norm": 1.2206014394760132, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 36210 + }, + { + "epoch": 238.28947368421052, + "grad_norm": 0.9274782538414001, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 36220 + }, + { + "epoch": 238.35526315789474, + "grad_norm": 1.1802715063095093, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 36230 + }, + { + "epoch": 238.42105263157896, + "grad_norm": 1.3021156787872314, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 36240 + }, + { + "epoch": 238.48684210526315, + "grad_norm": 0.9791660904884338, + "learning_rate": 0.0001, + "loss": 0.0188, + "step": 36250 + }, + { + "epoch": 238.55263157894737, + "grad_norm": 1.458195686340332, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 36260 + }, + { + "epoch": 238.6184210526316, + "grad_norm": 1.2013226747512817, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 36270 + }, + { + "epoch": 238.68421052631578, + "grad_norm": 0.897367000579834, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 36280 + }, + { + "epoch": 238.75, + "grad_norm": 1.430230736732483, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 36290 + }, + { + "epoch": 238.81578947368422, + "grad_norm": 1.2003045082092285, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 36300 + }, + { + "epoch": 238.8815789473684, + "grad_norm": 0.8546904921531677, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 36310 + }, + { + "epoch": 238.94736842105263, + "grad_norm": 0.9247255325317383, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 36320 + }, + { + "epoch": 239.01315789473685, + "grad_norm": 1.1530381441116333, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 36330 + }, + { + "epoch": 239.07894736842104, + "grad_norm": 1.2787343263626099, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 36340 + }, + { + "epoch": 239.14473684210526, + "grad_norm": 1.2188760042190552, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 36350 + }, + { + "epoch": 239.21052631578948, + "grad_norm": 1.1100471019744873, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 36360 + }, + { + "epoch": 239.27631578947367, + "grad_norm": 0.8758446574211121, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 36370 + }, + { + "epoch": 239.3421052631579, + "grad_norm": 0.9716992974281311, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 36380 + }, + { + "epoch": 239.4078947368421, + "grad_norm": 1.394768476486206, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 36390 + }, + { + "epoch": 239.47368421052633, + "grad_norm": 1.2768748998641968, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 36400 + }, + { + "epoch": 239.53947368421052, + "grad_norm": 1.1910803318023682, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 36410 + }, + { + "epoch": 239.60526315789474, + "grad_norm": 1.2653837203979492, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 36420 + }, + { + "epoch": 239.67105263157896, + "grad_norm": 1.348326325416565, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 36430 + }, + { + "epoch": 239.73684210526315, + "grad_norm": 1.2382081747055054, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 36440 + }, + { + "epoch": 239.80263157894737, + "grad_norm": 1.3600709438323975, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 36450 + }, + { + "epoch": 239.8684210526316, + "grad_norm": 0.9798856377601624, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 36460 + }, + { + "epoch": 239.93421052631578, + "grad_norm": 0.7852506637573242, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 36470 + }, + { + "epoch": 240.0, + "grad_norm": 1.0045945644378662, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 36480 + }, + { + "epoch": 240.06578947368422, + "grad_norm": 0.8587341904640198, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 36490 + }, + { + "epoch": 240.1315789473684, + "grad_norm": 1.4854038953781128, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 36500 + }, + { + "epoch": 240.19736842105263, + "grad_norm": 1.3243857622146606, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 36510 + }, + { + "epoch": 240.26315789473685, + "grad_norm": 1.048046588897705, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 36520 + }, + { + "epoch": 240.32894736842104, + "grad_norm": 1.2763868570327759, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 36530 + }, + { + "epoch": 240.39473684210526, + "grad_norm": 0.910820722579956, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 36540 + }, + { + "epoch": 240.46052631578948, + "grad_norm": 1.2709863185882568, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 36550 + }, + { + "epoch": 240.52631578947367, + "grad_norm": 1.4138668775558472, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 36560 + }, + { + "epoch": 240.5921052631579, + "grad_norm": 1.7599332332611084, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 36570 + }, + { + "epoch": 240.6578947368421, + "grad_norm": 1.1151326894760132, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 36580 + }, + { + "epoch": 240.72368421052633, + "grad_norm": 0.9767569899559021, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 36590 + }, + { + "epoch": 240.78947368421052, + "grad_norm": 0.9586136341094971, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 36600 + }, + { + "epoch": 240.85526315789474, + "grad_norm": 0.9484444856643677, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 36610 + }, + { + "epoch": 240.92105263157896, + "grad_norm": 1.5435513257980347, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 36620 + }, + { + "epoch": 240.98684210526315, + "grad_norm": 1.2928390502929688, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 36630 + }, + { + "epoch": 241.05263157894737, + "grad_norm": 1.3825304508209229, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 36640 + }, + { + "epoch": 241.1184210526316, + "grad_norm": 1.3325434923171997, + "learning_rate": 0.0001, + "loss": 0.0189, + "step": 36650 + }, + { + "epoch": 241.18421052631578, + "grad_norm": 1.4383955001831055, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 36660 + }, + { + "epoch": 241.25, + "grad_norm": 1.0420336723327637, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 36670 + }, + { + "epoch": 241.31578947368422, + "grad_norm": 1.0319572687149048, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 36680 + }, + { + "epoch": 241.3815789473684, + "grad_norm": 1.3533285856246948, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 36690 + }, + { + "epoch": 241.44736842105263, + "grad_norm": 1.112142562866211, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 36700 + }, + { + "epoch": 241.51315789473685, + "grad_norm": 1.3508294820785522, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 36710 + }, + { + "epoch": 241.57894736842104, + "grad_norm": 0.8377172946929932, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 36720 + }, + { + "epoch": 241.64473684210526, + "grad_norm": 1.2829350233078003, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 36730 + }, + { + "epoch": 241.71052631578948, + "grad_norm": 1.1019694805145264, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 36740 + }, + { + "epoch": 241.77631578947367, + "grad_norm": 1.6284654140472412, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 36750 + }, + { + "epoch": 241.8421052631579, + "grad_norm": 1.409332036972046, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 36760 + }, + { + "epoch": 241.9078947368421, + "grad_norm": 1.5580686330795288, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 36770 + }, + { + "epoch": 241.97368421052633, + "grad_norm": 1.2454417943954468, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 36780 + }, + { + "epoch": 242.03947368421052, + "grad_norm": 1.194043755531311, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 36790 + }, + { + "epoch": 242.10526315789474, + "grad_norm": 1.0807819366455078, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 36800 + }, + { + "epoch": 242.17105263157896, + "grad_norm": 1.1862661838531494, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 36810 + }, + { + "epoch": 242.23684210526315, + "grad_norm": 1.3441433906555176, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 36820 + }, + { + "epoch": 242.30263157894737, + "grad_norm": 1.0832732915878296, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 36830 + }, + { + "epoch": 242.3684210526316, + "grad_norm": 0.8166968822479248, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 36840 + }, + { + "epoch": 242.43421052631578, + "grad_norm": 1.1842600107192993, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 36850 + }, + { + "epoch": 242.5, + "grad_norm": 1.0731406211853027, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 36860 + }, + { + "epoch": 242.56578947368422, + "grad_norm": 1.1145586967468262, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 36870 + }, + { + "epoch": 242.6315789473684, + "grad_norm": 1.2087843418121338, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 36880 + }, + { + "epoch": 242.69736842105263, + "grad_norm": 0.9884072542190552, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 36890 + }, + { + "epoch": 242.76315789473685, + "grad_norm": 1.011466383934021, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 36900 + }, + { + "epoch": 242.82894736842104, + "grad_norm": 0.9709348678588867, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 36910 + }, + { + "epoch": 242.89473684210526, + "grad_norm": 1.2150779962539673, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 36920 + }, + { + "epoch": 242.96052631578948, + "grad_norm": 1.3461240530014038, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 36930 + }, + { + "epoch": 243.02631578947367, + "grad_norm": 1.4247745275497437, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 36940 + }, + { + "epoch": 243.0921052631579, + "grad_norm": 1.213616132736206, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 36950 + }, + { + "epoch": 243.1578947368421, + "grad_norm": 1.3313192129135132, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 36960 + }, + { + "epoch": 243.22368421052633, + "grad_norm": 1.5116862058639526, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 36970 + }, + { + "epoch": 243.28947368421052, + "grad_norm": 1.200487494468689, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 36980 + }, + { + "epoch": 243.35526315789474, + "grad_norm": 1.5582853555679321, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 36990 + }, + { + "epoch": 243.42105263157896, + "grad_norm": 1.1975919008255005, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 37000 + }, + { + "epoch": 243.48684210526315, + "grad_norm": 1.3097623586654663, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 37010 + }, + { + "epoch": 243.55263157894737, + "grad_norm": 1.28339421749115, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 37020 + }, + { + "epoch": 243.6184210526316, + "grad_norm": 1.0238935947418213, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 37030 + }, + { + "epoch": 243.68421052631578, + "grad_norm": 1.203718900680542, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 37040 + }, + { + "epoch": 243.75, + "grad_norm": 1.2539606094360352, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 37050 + }, + { + "epoch": 243.81578947368422, + "grad_norm": 1.581852674484253, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 37060 + }, + { + "epoch": 243.8815789473684, + "grad_norm": 0.8200912475585938, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 37070 + }, + { + "epoch": 243.94736842105263, + "grad_norm": 1.1216977834701538, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 37080 + }, + { + "epoch": 244.01315789473685, + "grad_norm": 1.295255422592163, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 37090 + }, + { + "epoch": 244.07894736842104, + "grad_norm": 0.9966356158256531, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 37100 + }, + { + "epoch": 244.14473684210526, + "grad_norm": 1.278610110282898, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 37110 + }, + { + "epoch": 244.21052631578948, + "grad_norm": 1.1381927728652954, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 37120 + }, + { + "epoch": 244.27631578947367, + "grad_norm": 1.3754022121429443, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 37130 + }, + { + "epoch": 244.3421052631579, + "grad_norm": 1.0089901685714722, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 37140 + }, + { + "epoch": 244.4078947368421, + "grad_norm": 1.4417517185211182, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 37150 + }, + { + "epoch": 244.47368421052633, + "grad_norm": 1.4486968517303467, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 37160 + }, + { + "epoch": 244.53947368421052, + "grad_norm": 1.428544044494629, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 37170 + }, + { + "epoch": 244.60526315789474, + "grad_norm": 1.239670753479004, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 37180 + }, + { + "epoch": 244.67105263157896, + "grad_norm": 0.8541073203086853, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 37190 + }, + { + "epoch": 244.73684210526315, + "grad_norm": 1.2242367267608643, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 37200 + }, + { + "epoch": 244.80263157894737, + "grad_norm": 1.1044442653656006, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 37210 + }, + { + "epoch": 244.8684210526316, + "grad_norm": 1.4338637590408325, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 37220 + }, + { + "epoch": 244.93421052631578, + "grad_norm": 1.1796600818634033, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 37230 + }, + { + "epoch": 245.0, + "grad_norm": 1.4116781949996948, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 37240 + }, + { + "epoch": 245.06578947368422, + "grad_norm": 1.2402573823928833, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 37250 + }, + { + "epoch": 245.1315789473684, + "grad_norm": 1.3560236692428589, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 37260 + }, + { + "epoch": 245.19736842105263, + "grad_norm": 1.0826292037963867, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 37270 + }, + { + "epoch": 245.26315789473685, + "grad_norm": 1.4319058656692505, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 37280 + }, + { + "epoch": 245.32894736842104, + "grad_norm": 1.4913933277130127, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 37290 + }, + { + "epoch": 245.39473684210526, + "grad_norm": 1.6130610704421997, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 37300 + }, + { + "epoch": 245.46052631578948, + "grad_norm": 1.419812560081482, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 37310 + }, + { + "epoch": 245.52631578947367, + "grad_norm": 1.1114962100982666, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 37320 + }, + { + "epoch": 245.5921052631579, + "grad_norm": 0.8215615749359131, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 37330 + }, + { + "epoch": 245.6578947368421, + "grad_norm": 0.9287834763526917, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 37340 + }, + { + "epoch": 245.72368421052633, + "grad_norm": 1.5063482522964478, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 37350 + }, + { + "epoch": 245.78947368421052, + "grad_norm": 1.3184797763824463, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 37360 + }, + { + "epoch": 245.85526315789474, + "grad_norm": 1.147472858428955, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 37370 + }, + { + "epoch": 245.92105263157896, + "grad_norm": 1.3581057786941528, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 37380 + }, + { + "epoch": 245.98684210526315, + "grad_norm": 0.9130412340164185, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 37390 + }, + { + "epoch": 246.05263157894737, + "grad_norm": 1.0670521259307861, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 37400 + }, + { + "epoch": 246.1184210526316, + "grad_norm": 1.1035175323486328, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 37410 + }, + { + "epoch": 246.18421052631578, + "grad_norm": 1.24159574508667, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 37420 + }, + { + "epoch": 246.25, + "grad_norm": 1.571832537651062, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 37430 + }, + { + "epoch": 246.31578947368422, + "grad_norm": 1.1885900497436523, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 37440 + }, + { + "epoch": 246.3815789473684, + "grad_norm": 1.5651880502700806, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 37450 + }, + { + "epoch": 246.44736842105263, + "grad_norm": 1.3197070360183716, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 37460 + }, + { + "epoch": 246.51315789473685, + "grad_norm": 1.3817650079727173, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 37470 + }, + { + "epoch": 246.57894736842104, + "grad_norm": 1.5466119050979614, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 37480 + }, + { + "epoch": 246.64473684210526, + "grad_norm": 1.2215481996536255, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 37490 + }, + { + "epoch": 246.71052631578948, + "grad_norm": 1.4967329502105713, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 37500 + }, + { + "epoch": 246.77631578947367, + "grad_norm": 1.4192287921905518, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 37510 + }, + { + "epoch": 246.8421052631579, + "grad_norm": 1.3284549713134766, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 37520 + }, + { + "epoch": 246.9078947368421, + "grad_norm": 1.3947001695632935, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 37530 + }, + { + "epoch": 246.97368421052633, + "grad_norm": 1.2742048501968384, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 37540 + }, + { + "epoch": 247.03947368421052, + "grad_norm": 1.200553059577942, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 37550 + }, + { + "epoch": 247.10526315789474, + "grad_norm": 1.2947498559951782, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 37560 + }, + { + "epoch": 247.17105263157896, + "grad_norm": 1.950449824333191, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 37570 + }, + { + "epoch": 247.23684210526315, + "grad_norm": 1.7037091255187988, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 37580 + }, + { + "epoch": 247.30263157894737, + "grad_norm": 1.2691845893859863, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 37590 + }, + { + "epoch": 247.3684210526316, + "grad_norm": 0.9007494449615479, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 37600 + }, + { + "epoch": 247.43421052631578, + "grad_norm": 1.374837040901184, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 37610 + }, + { + "epoch": 247.5, + "grad_norm": 1.1083370447158813, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 37620 + }, + { + "epoch": 247.56578947368422, + "grad_norm": 1.2770500183105469, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 37630 + }, + { + "epoch": 247.6315789473684, + "grad_norm": 1.4671261310577393, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 37640 + }, + { + "epoch": 247.69736842105263, + "grad_norm": 1.6228381395339966, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 37650 + }, + { + "epoch": 247.76315789473685, + "grad_norm": 1.6250126361846924, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 37660 + }, + { + "epoch": 247.82894736842104, + "grad_norm": 1.4353737831115723, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 37670 + }, + { + "epoch": 247.89473684210526, + "grad_norm": 1.6463098526000977, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 37680 + }, + { + "epoch": 247.96052631578948, + "grad_norm": 1.2910631895065308, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 37690 + }, + { + "epoch": 248.02631578947367, + "grad_norm": 1.0633682012557983, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 37700 + }, + { + "epoch": 248.0921052631579, + "grad_norm": 1.6399755477905273, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 37710 + }, + { + "epoch": 248.1578947368421, + "grad_norm": 1.4523167610168457, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 37720 + }, + { + "epoch": 248.22368421052633, + "grad_norm": 1.307390570640564, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 37730 + }, + { + "epoch": 248.28947368421052, + "grad_norm": 1.414101243019104, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 37740 + }, + { + "epoch": 248.35526315789474, + "grad_norm": 1.1803747415542603, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 37750 + }, + { + "epoch": 248.42105263157896, + "grad_norm": 1.416965365409851, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 37760 + }, + { + "epoch": 248.48684210526315, + "grad_norm": 1.0817677974700928, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 37770 + }, + { + "epoch": 248.55263157894737, + "grad_norm": 1.3911010026931763, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 37780 + }, + { + "epoch": 248.6184210526316, + "grad_norm": 1.0438708066940308, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 37790 + }, + { + "epoch": 248.68421052631578, + "grad_norm": 1.1672842502593994, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 37800 + }, + { + "epoch": 248.75, + "grad_norm": 1.5514910221099854, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 37810 + }, + { + "epoch": 248.81578947368422, + "grad_norm": 1.686729073524475, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 37820 + }, + { + "epoch": 248.8815789473684, + "grad_norm": 1.720362901687622, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 37830 + }, + { + "epoch": 248.94736842105263, + "grad_norm": 1.4286376237869263, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 37840 + }, + { + "epoch": 249.01315789473685, + "grad_norm": 1.447860836982727, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 37850 + }, + { + "epoch": 249.07894736842104, + "grad_norm": 1.2636222839355469, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 37860 + }, + { + "epoch": 249.14473684210526, + "grad_norm": 1.2671912908554077, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 37870 + }, + { + "epoch": 249.21052631578948, + "grad_norm": 0.9959325194358826, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 37880 + }, + { + "epoch": 249.27631578947367, + "grad_norm": 1.2429472208023071, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 37890 + }, + { + "epoch": 249.3421052631579, + "grad_norm": 1.1575770378112793, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 37900 + }, + { + "epoch": 249.4078947368421, + "grad_norm": 1.075370192527771, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 37910 + }, + { + "epoch": 249.47368421052633, + "grad_norm": 1.1879488229751587, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 37920 + }, + { + "epoch": 249.53947368421052, + "grad_norm": 1.0791507959365845, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 37930 + }, + { + "epoch": 249.60526315789474, + "grad_norm": 0.9675527811050415, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 37940 + }, + { + "epoch": 249.67105263157896, + "grad_norm": 1.2315634489059448, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 37950 + }, + { + "epoch": 249.73684210526315, + "grad_norm": 0.8122895956039429, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 37960 + }, + { + "epoch": 249.80263157894737, + "grad_norm": 1.209394931793213, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 37970 + }, + { + "epoch": 249.8684210526316, + "grad_norm": 1.001671314239502, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 37980 + }, + { + "epoch": 249.93421052631578, + "grad_norm": 1.5136141777038574, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 37990 + }, + { + "epoch": 250.0, + "grad_norm": 1.4685696363449097, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 38000 + }, + { + "epoch": 250.06578947368422, + "grad_norm": 1.1837599277496338, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 38010 + }, + { + "epoch": 250.1315789473684, + "grad_norm": 1.018049955368042, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 38020 + }, + { + "epoch": 250.19736842105263, + "grad_norm": 1.0847623348236084, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 38030 + }, + { + "epoch": 250.26315789473685, + "grad_norm": 1.4358677864074707, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 38040 + }, + { + "epoch": 250.32894736842104, + "grad_norm": 1.3127168416976929, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 38050 + }, + { + "epoch": 250.39473684210526, + "grad_norm": 1.6243581771850586, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 38060 + }, + { + "epoch": 250.46052631578948, + "grad_norm": 1.3248577117919922, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 38070 + }, + { + "epoch": 250.52631578947367, + "grad_norm": 1.2250198125839233, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 38080 + }, + { + "epoch": 250.5921052631579, + "grad_norm": 1.238546371459961, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 38090 + }, + { + "epoch": 250.6578947368421, + "grad_norm": 1.3515560626983643, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 38100 + }, + { + "epoch": 250.72368421052633, + "grad_norm": 1.272128701210022, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 38110 + }, + { + "epoch": 250.78947368421052, + "grad_norm": 1.1292731761932373, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 38120 + }, + { + "epoch": 250.85526315789474, + "grad_norm": 1.018415927886963, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 38130 + }, + { + "epoch": 250.92105263157896, + "grad_norm": 1.367689609527588, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 38140 + }, + { + "epoch": 250.98684210526315, + "grad_norm": 1.3254410028457642, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 38150 + }, + { + "epoch": 251.05263157894737, + "grad_norm": 1.6456955671310425, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 38160 + }, + { + "epoch": 251.1184210526316, + "grad_norm": 1.1170157194137573, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 38170 + }, + { + "epoch": 251.18421052631578, + "grad_norm": 0.998716413974762, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 38180 + }, + { + "epoch": 251.25, + "grad_norm": 0.9729195833206177, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 38190 + }, + { + "epoch": 251.31578947368422, + "grad_norm": 1.274336576461792, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 38200 + }, + { + "epoch": 251.3815789473684, + "grad_norm": 1.0042622089385986, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 38210 + }, + { + "epoch": 251.44736842105263, + "grad_norm": 1.3611550331115723, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 38220 + }, + { + "epoch": 251.51315789473685, + "grad_norm": 1.4068595170974731, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 38230 + }, + { + "epoch": 251.57894736842104, + "grad_norm": 1.0429598093032837, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 38240 + }, + { + "epoch": 251.64473684210526, + "grad_norm": 1.2321966886520386, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 38250 + }, + { + "epoch": 251.71052631578948, + "grad_norm": 1.5043584108352661, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 38260 + }, + { + "epoch": 251.77631578947367, + "grad_norm": 1.3396488428115845, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 38270 + }, + { + "epoch": 251.8421052631579, + "grad_norm": 1.1610597372055054, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 38280 + }, + { + "epoch": 251.9078947368421, + "grad_norm": 1.2713470458984375, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 38290 + }, + { + "epoch": 251.97368421052633, + "grad_norm": 1.6690809726715088, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 38300 + }, + { + "epoch": 252.03947368421052, + "grad_norm": 1.3469371795654297, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 38310 + }, + { + "epoch": 252.10526315789474, + "grad_norm": 1.1202424764633179, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 38320 + }, + { + "epoch": 252.17105263157896, + "grad_norm": 1.3286770582199097, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 38330 + }, + { + "epoch": 252.23684210526315, + "grad_norm": 1.2388663291931152, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 38340 + }, + { + "epoch": 252.30263157894737, + "grad_norm": 1.1364609003067017, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 38350 + }, + { + "epoch": 252.3684210526316, + "grad_norm": 1.43731689453125, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 38360 + }, + { + "epoch": 252.43421052631578, + "grad_norm": 1.337084412574768, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 38370 + }, + { + "epoch": 252.5, + "grad_norm": 1.568783164024353, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 38380 + }, + { + "epoch": 252.56578947368422, + "grad_norm": 1.5150007009506226, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 38390 + }, + { + "epoch": 252.6315789473684, + "grad_norm": 1.7503875494003296, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 38400 + }, + { + "epoch": 252.69736842105263, + "grad_norm": 1.123337745666504, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 38410 + }, + { + "epoch": 252.76315789473685, + "grad_norm": 1.359995722770691, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 38420 + }, + { + "epoch": 252.82894736842104, + "grad_norm": 1.1792511940002441, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 38430 + }, + { + "epoch": 252.89473684210526, + "grad_norm": 1.2069591283798218, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 38440 + }, + { + "epoch": 252.96052631578948, + "grad_norm": 1.2496885061264038, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 38450 + }, + { + "epoch": 253.02631578947367, + "grad_norm": 1.0140652656555176, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 38460 + }, + { + "epoch": 253.0921052631579, + "grad_norm": 1.251288652420044, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 38470 + }, + { + "epoch": 253.1578947368421, + "grad_norm": 1.283728837966919, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 38480 + }, + { + "epoch": 253.22368421052633, + "grad_norm": 1.3904523849487305, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 38490 + }, + { + "epoch": 253.28947368421052, + "grad_norm": 1.363517165184021, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 38500 + }, + { + "epoch": 253.35526315789474, + "grad_norm": 1.120867133140564, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 38510 + }, + { + "epoch": 253.42105263157896, + "grad_norm": 1.360049843788147, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 38520 + }, + { + "epoch": 253.48684210526315, + "grad_norm": 1.39319908618927, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 38530 + }, + { + "epoch": 253.55263157894737, + "grad_norm": 1.0926111936569214, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 38540 + }, + { + "epoch": 253.6184210526316, + "grad_norm": 1.4008203744888306, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 38550 + }, + { + "epoch": 253.68421052631578, + "grad_norm": 0.974804699420929, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 38560 + }, + { + "epoch": 253.75, + "grad_norm": 1.2252269983291626, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 38570 + }, + { + "epoch": 253.81578947368422, + "grad_norm": 1.2093294858932495, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 38580 + }, + { + "epoch": 253.8815789473684, + "grad_norm": 1.3294168710708618, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 38590 + }, + { + "epoch": 253.94736842105263, + "grad_norm": 1.3876254558563232, + "learning_rate": 0.0001, + "loss": 0.019, + "step": 38600 + }, + { + "epoch": 254.01315789473685, + "grad_norm": 1.2935510873794556, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 38610 + }, + { + "epoch": 254.07894736842104, + "grad_norm": 1.1402019262313843, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 38620 + }, + { + "epoch": 254.14473684210526, + "grad_norm": 1.167262315750122, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 38630 + }, + { + "epoch": 254.21052631578948, + "grad_norm": 1.2734203338623047, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 38640 + }, + { + "epoch": 254.27631578947367, + "grad_norm": 1.4504673480987549, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 38650 + }, + { + "epoch": 254.3421052631579, + "grad_norm": 1.499895691871643, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 38660 + }, + { + "epoch": 254.4078947368421, + "grad_norm": 0.8160152435302734, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 38670 + }, + { + "epoch": 254.47368421052633, + "grad_norm": 1.31825852394104, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 38680 + }, + { + "epoch": 254.53947368421052, + "grad_norm": 1.0252249240875244, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 38690 + }, + { + "epoch": 254.60526315789474, + "grad_norm": 1.2557110786437988, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 38700 + }, + { + "epoch": 254.67105263157896, + "grad_norm": 1.0297420024871826, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 38710 + }, + { + "epoch": 254.73684210526315, + "grad_norm": 1.1971931457519531, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 38720 + }, + { + "epoch": 254.80263157894737, + "grad_norm": 0.9345195889472961, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 38730 + }, + { + "epoch": 254.8684210526316, + "grad_norm": 1.0931565761566162, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 38740 + }, + { + "epoch": 254.93421052631578, + "grad_norm": 1.1553313732147217, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 38750 + }, + { + "epoch": 255.0, + "grad_norm": 0.8764203786849976, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 38760 + }, + { + "epoch": 255.06578947368422, + "grad_norm": 1.2318146228790283, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 38770 + }, + { + "epoch": 255.1315789473684, + "grad_norm": 1.1166127920150757, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 38780 + }, + { + "epoch": 255.19736842105263, + "grad_norm": 1.0156936645507812, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 38790 + }, + { + "epoch": 255.26315789473685, + "grad_norm": 1.1941726207733154, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 38800 + }, + { + "epoch": 255.32894736842104, + "grad_norm": 1.557681679725647, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 38810 + }, + { + "epoch": 255.39473684210526, + "grad_norm": 1.3124456405639648, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 38820 + }, + { + "epoch": 255.46052631578948, + "grad_norm": 1.4480106830596924, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 38830 + }, + { + "epoch": 255.52631578947367, + "grad_norm": 1.780182957649231, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 38840 + }, + { + "epoch": 255.5921052631579, + "grad_norm": 1.6105772256851196, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 38850 + }, + { + "epoch": 255.6578947368421, + "grad_norm": 1.3613028526306152, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 38860 + }, + { + "epoch": 255.72368421052633, + "grad_norm": 1.5420035123825073, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 38870 + }, + { + "epoch": 255.78947368421052, + "grad_norm": 2.7019877433776855, + "learning_rate": 0.0001, + "loss": 0.0199, + "step": 38880 + }, + { + "epoch": 255.85526315789474, + "grad_norm": 2.009016752243042, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 38890 + }, + { + "epoch": 255.92105263157896, + "grad_norm": 1.6684277057647705, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 38900 + }, + { + "epoch": 255.98684210526315, + "grad_norm": 1.6645160913467407, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 38910 + }, + { + "epoch": 256.05263157894734, + "grad_norm": 1.6908752918243408, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 38920 + }, + { + "epoch": 256.11842105263156, + "grad_norm": 1.4804624319076538, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 38930 + }, + { + "epoch": 256.1842105263158, + "grad_norm": 1.5433467626571655, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 38940 + }, + { + "epoch": 256.25, + "grad_norm": 1.4467060565948486, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 38950 + }, + { + "epoch": 256.3157894736842, + "grad_norm": 1.5117400884628296, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 38960 + }, + { + "epoch": 256.38157894736844, + "grad_norm": 1.5240405797958374, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 38970 + }, + { + "epoch": 256.44736842105266, + "grad_norm": 1.3311848640441895, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 38980 + }, + { + "epoch": 256.5131578947368, + "grad_norm": 1.3353846073150635, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 38990 + }, + { + "epoch": 256.57894736842104, + "grad_norm": 2.797309398651123, + "learning_rate": 0.0001, + "loss": 0.0371, + "step": 39000 + }, + { + "epoch": 256.64473684210526, + "grad_norm": 2.06365966796875, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 39010 + }, + { + "epoch": 256.7105263157895, + "grad_norm": 2.192547082901001, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 39020 + }, + { + "epoch": 256.7763157894737, + "grad_norm": 1.957229733467102, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 39030 + }, + { + "epoch": 256.8421052631579, + "grad_norm": 1.7908846139907837, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 39040 + }, + { + "epoch": 256.9078947368421, + "grad_norm": 1.4980082511901855, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 39050 + }, + { + "epoch": 256.9736842105263, + "grad_norm": 1.5401355028152466, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 39060 + }, + { + "epoch": 257.0394736842105, + "grad_norm": 1.2455133199691772, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 39070 + }, + { + "epoch": 257.10526315789474, + "grad_norm": 1.4379152059555054, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 39080 + }, + { + "epoch": 257.17105263157896, + "grad_norm": 1.4791769981384277, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 39090 + }, + { + "epoch": 257.2368421052632, + "grad_norm": 1.1306533813476562, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 39100 + }, + { + "epoch": 257.30263157894734, + "grad_norm": 1.0871785879135132, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 39110 + }, + { + "epoch": 257.36842105263156, + "grad_norm": 1.423475980758667, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 39120 + }, + { + "epoch": 257.4342105263158, + "grad_norm": 1.7465128898620605, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 39130 + }, + { + "epoch": 257.5, + "grad_norm": 1.4371193647384644, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 39140 + }, + { + "epoch": 257.5657894736842, + "grad_norm": 1.4257088899612427, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 39150 + }, + { + "epoch": 257.63157894736844, + "grad_norm": 1.1062278747558594, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 39160 + }, + { + "epoch": 257.69736842105266, + "grad_norm": 0.7491597533226013, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 39170 + }, + { + "epoch": 257.7631578947368, + "grad_norm": 1.2030974626541138, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 39180 + }, + { + "epoch": 257.82894736842104, + "grad_norm": 0.9825753569602966, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 39190 + }, + { + "epoch": 257.89473684210526, + "grad_norm": 1.3896033763885498, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 39200 + }, + { + "epoch": 257.9605263157895, + "grad_norm": 1.0874238014221191, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 39210 + }, + { + "epoch": 258.0263157894737, + "grad_norm": 1.1241742372512817, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 39220 + }, + { + "epoch": 258.0921052631579, + "grad_norm": 1.2342971563339233, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 39230 + }, + { + "epoch": 258.1578947368421, + "grad_norm": 1.249884843826294, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 39240 + }, + { + "epoch": 258.2236842105263, + "grad_norm": 1.0342882871627808, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 39250 + }, + { + "epoch": 258.2894736842105, + "grad_norm": 1.2886340618133545, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 39260 + }, + { + "epoch": 258.35526315789474, + "grad_norm": 1.3061116933822632, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 39270 + }, + { + "epoch": 258.42105263157896, + "grad_norm": 1.4507635831832886, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 39280 + }, + { + "epoch": 258.4868421052632, + "grad_norm": 1.4641185998916626, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 39290 + }, + { + "epoch": 258.55263157894734, + "grad_norm": 1.3404735326766968, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 39300 + }, + { + "epoch": 258.61842105263156, + "grad_norm": 0.9776154160499573, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 39310 + }, + { + "epoch": 258.6842105263158, + "grad_norm": 1.0288668870925903, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 39320 + }, + { + "epoch": 258.75, + "grad_norm": 1.1790403127670288, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 39330 + }, + { + "epoch": 258.8157894736842, + "grad_norm": 0.8931066989898682, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 39340 + }, + { + "epoch": 258.88157894736844, + "grad_norm": 1.307145118713379, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 39350 + }, + { + "epoch": 258.94736842105266, + "grad_norm": 1.0468474626541138, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 39360 + }, + { + "epoch": 259.0131578947368, + "grad_norm": 0.9798098206520081, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 39370 + }, + { + "epoch": 259.07894736842104, + "grad_norm": 0.8931979537010193, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 39380 + }, + { + "epoch": 259.14473684210526, + "grad_norm": 1.3130055665969849, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 39390 + }, + { + "epoch": 259.2105263157895, + "grad_norm": 1.2557275295257568, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 39400 + }, + { + "epoch": 259.2763157894737, + "grad_norm": 1.2544786930084229, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 39410 + }, + { + "epoch": 259.3421052631579, + "grad_norm": 1.079939365386963, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 39420 + }, + { + "epoch": 259.4078947368421, + "grad_norm": 1.3079148530960083, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 39430 + }, + { + "epoch": 259.4736842105263, + "grad_norm": 1.2881779670715332, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 39440 + }, + { + "epoch": 259.5394736842105, + "grad_norm": 1.4759607315063477, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 39450 + }, + { + "epoch": 259.60526315789474, + "grad_norm": 1.1630066633224487, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 39460 + }, + { + "epoch": 259.67105263157896, + "grad_norm": 1.894482970237732, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 39470 + }, + { + "epoch": 259.7368421052632, + "grad_norm": 1.7164617776870728, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 39480 + }, + { + "epoch": 259.80263157894734, + "grad_norm": 1.365029215812683, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 39490 + }, + { + "epoch": 259.86842105263156, + "grad_norm": 1.2268500328063965, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 39500 + }, + { + "epoch": 259.9342105263158, + "grad_norm": 1.182262897491455, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 39510 + }, + { + "epoch": 260.0, + "grad_norm": 1.3532487154006958, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 39520 + }, + { + "epoch": 260.0657894736842, + "grad_norm": 1.1759036779403687, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 39530 + }, + { + "epoch": 260.13157894736844, + "grad_norm": 1.0273208618164062, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 39540 + }, + { + "epoch": 260.19736842105266, + "grad_norm": 1.1280847787857056, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 39550 + }, + { + "epoch": 260.2631578947368, + "grad_norm": 1.3776977062225342, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 39560 + }, + { + "epoch": 260.32894736842104, + "grad_norm": 1.1340792179107666, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 39570 + }, + { + "epoch": 260.39473684210526, + "grad_norm": 1.010206699371338, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 39580 + }, + { + "epoch": 260.4605263157895, + "grad_norm": 1.4387060403823853, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 39590 + }, + { + "epoch": 260.5263157894737, + "grad_norm": 1.1363633871078491, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 39600 + }, + { + "epoch": 260.5921052631579, + "grad_norm": 1.204533338546753, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 39610 + }, + { + "epoch": 260.6578947368421, + "grad_norm": 1.3235585689544678, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 39620 + }, + { + "epoch": 260.7236842105263, + "grad_norm": 1.2263247966766357, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 39630 + }, + { + "epoch": 260.7894736842105, + "grad_norm": 1.1299937963485718, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 39640 + }, + { + "epoch": 260.85526315789474, + "grad_norm": 1.2160582542419434, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 39650 + }, + { + "epoch": 260.92105263157896, + "grad_norm": 1.1547380685806274, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 39660 + }, + { + "epoch": 260.9868421052632, + "grad_norm": 1.1193982362747192, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 39670 + }, + { + "epoch": 261.05263157894734, + "grad_norm": 1.039002776145935, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 39680 + }, + { + "epoch": 261.11842105263156, + "grad_norm": 1.2692370414733887, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 39690 + }, + { + "epoch": 261.1842105263158, + "grad_norm": 1.4166964292526245, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 39700 + }, + { + "epoch": 261.25, + "grad_norm": 1.2513854503631592, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 39710 + }, + { + "epoch": 261.3157894736842, + "grad_norm": 1.340706467628479, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 39720 + }, + { + "epoch": 261.38157894736844, + "grad_norm": 1.2143534421920776, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 39730 + }, + { + "epoch": 261.44736842105266, + "grad_norm": 1.6067289113998413, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 39740 + }, + { + "epoch": 261.5131578947368, + "grad_norm": 1.3668274879455566, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 39750 + }, + { + "epoch": 261.57894736842104, + "grad_norm": 0.8968744874000549, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 39760 + }, + { + "epoch": 261.64473684210526, + "grad_norm": 1.5481125116348267, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 39770 + }, + { + "epoch": 261.7105263157895, + "grad_norm": 1.5846647024154663, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 39780 + }, + { + "epoch": 261.7763157894737, + "grad_norm": 0.9274570345878601, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 39790 + }, + { + "epoch": 261.8421052631579, + "grad_norm": 1.1757594347000122, + "learning_rate": 0.0001, + "loss": 0.0181, + "step": 39800 + }, + { + "epoch": 261.9078947368421, + "grad_norm": 1.5246598720550537, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 39810 + }, + { + "epoch": 261.9736842105263, + "grad_norm": 1.5944026708602905, + "learning_rate": 0.0001, + "loss": 0.0187, + "step": 39820 + }, + { + "epoch": 262.0394736842105, + "grad_norm": 1.3532087802886963, + "learning_rate": 0.0001, + "loss": 0.0177, + "step": 39830 + }, + { + "epoch": 262.10526315789474, + "grad_norm": 1.1372880935668945, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 39840 + }, + { + "epoch": 262.17105263157896, + "grad_norm": 1.1915911436080933, + "learning_rate": 0.0001, + "loss": 0.0186, + "step": 39850 + }, + { + "epoch": 262.2368421052632, + "grad_norm": 1.1271971464157104, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 39860 + }, + { + "epoch": 262.30263157894734, + "grad_norm": 1.3422695398330688, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 39870 + }, + { + "epoch": 262.36842105263156, + "grad_norm": 1.7832303047180176, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 39880 + }, + { + "epoch": 262.4342105263158, + "grad_norm": 1.4639548063278198, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 39890 + }, + { + "epoch": 262.5, + "grad_norm": 1.2754369974136353, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 39900 + }, + { + "epoch": 262.5657894736842, + "grad_norm": 1.5328514575958252, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 39910 + }, + { + "epoch": 262.63157894736844, + "grad_norm": 1.320682168006897, + "learning_rate": 0.0001, + "loss": 0.0176, + "step": 39920 + }, + { + "epoch": 262.69736842105266, + "grad_norm": 0.9552030563354492, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 39930 + }, + { + "epoch": 262.7631578947368, + "grad_norm": 1.2544821500778198, + "learning_rate": 0.0001, + "loss": 0.0211, + "step": 39940 + }, + { + "epoch": 262.82894736842104, + "grad_norm": 1.4101918935775757, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 39950 + }, + { + "epoch": 262.89473684210526, + "grad_norm": 1.5442532300949097, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 39960 + }, + { + "epoch": 262.9605263157895, + "grad_norm": 1.2423205375671387, + "learning_rate": 0.0001, + "loss": 0.0173, + "step": 39970 + }, + { + "epoch": 263.0263157894737, + "grad_norm": 1.0011227130889893, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 39980 + }, + { + "epoch": 263.0921052631579, + "grad_norm": 1.0351630449295044, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 39990 + }, + { + "epoch": 263.1578947368421, + "grad_norm": 1.29893159866333, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 40000 + }, + { + "epoch": 263.2236842105263, + "grad_norm": 1.5265520811080933, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 40010 + }, + { + "epoch": 263.2894736842105, + "grad_norm": 1.2051048278808594, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 40020 + }, + { + "epoch": 263.35526315789474, + "grad_norm": 1.576757788658142, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 40030 + }, + { + "epoch": 263.42105263157896, + "grad_norm": 1.4314970970153809, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 40040 + }, + { + "epoch": 263.4868421052632, + "grad_norm": 1.0734888315200806, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 40050 + }, + { + "epoch": 263.55263157894734, + "grad_norm": 1.0169308185577393, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 40060 + }, + { + "epoch": 263.61842105263156, + "grad_norm": 1.0590006113052368, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 40070 + }, + { + "epoch": 263.6842105263158, + "grad_norm": 1.1199071407318115, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 40080 + }, + { + "epoch": 263.75, + "grad_norm": 0.9885268211364746, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 40090 + }, + { + "epoch": 263.8157894736842, + "grad_norm": 0.7480038404464722, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 40100 + }, + { + "epoch": 263.88157894736844, + "grad_norm": 0.7771443128585815, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 40110 + }, + { + "epoch": 263.94736842105266, + "grad_norm": 1.1133038997650146, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 40120 + }, + { + "epoch": 264.0131578947368, + "grad_norm": 0.8933395147323608, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 40130 + }, + { + "epoch": 264.07894736842104, + "grad_norm": 1.2622970342636108, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 40140 + }, + { + "epoch": 264.14473684210526, + "grad_norm": 1.2373851537704468, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 40150 + }, + { + "epoch": 264.2105263157895, + "grad_norm": 1.3317362070083618, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 40160 + }, + { + "epoch": 264.2763157894737, + "grad_norm": 1.274640440940857, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 40170 + }, + { + "epoch": 264.3421052631579, + "grad_norm": 0.8638913035392761, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 40180 + }, + { + "epoch": 264.4078947368421, + "grad_norm": 1.1487349271774292, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 40190 + }, + { + "epoch": 264.4736842105263, + "grad_norm": 1.4805721044540405, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 40200 + }, + { + "epoch": 264.5394736842105, + "grad_norm": 1.180604338645935, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 40210 + }, + { + "epoch": 264.60526315789474, + "grad_norm": 1.2854849100112915, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 40220 + }, + { + "epoch": 264.67105263157896, + "grad_norm": 1.2706739902496338, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 40230 + }, + { + "epoch": 264.7368421052632, + "grad_norm": 1.0163190364837646, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 40240 + }, + { + "epoch": 264.80263157894734, + "grad_norm": 1.3690663576126099, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 40250 + }, + { + "epoch": 264.86842105263156, + "grad_norm": 0.8744670748710632, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 40260 + }, + { + "epoch": 264.9342105263158, + "grad_norm": 1.1227748394012451, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 40270 + }, + { + "epoch": 265.0, + "grad_norm": 1.1143121719360352, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 40280 + }, + { + "epoch": 265.0657894736842, + "grad_norm": 1.5000947713851929, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 40290 + }, + { + "epoch": 265.13157894736844, + "grad_norm": 1.3426282405853271, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 40300 + }, + { + "epoch": 265.19736842105266, + "grad_norm": 0.9148724675178528, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 40310 + }, + { + "epoch": 265.2631578947368, + "grad_norm": 0.9080812335014343, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 40320 + }, + { + "epoch": 265.32894736842104, + "grad_norm": 0.8644049167633057, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 40330 + }, + { + "epoch": 265.39473684210526, + "grad_norm": 0.9863361120223999, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 40340 + }, + { + "epoch": 265.4605263157895, + "grad_norm": 1.1581717729568481, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 40350 + }, + { + "epoch": 265.5263157894737, + "grad_norm": 0.8015313148498535, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 40360 + }, + { + "epoch": 265.5921052631579, + "grad_norm": 0.9356069564819336, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 40370 + }, + { + "epoch": 265.6578947368421, + "grad_norm": 1.1475307941436768, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 40380 + }, + { + "epoch": 265.7236842105263, + "grad_norm": 1.0415571928024292, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 40390 + }, + { + "epoch": 265.7894736842105, + "grad_norm": 1.4499433040618896, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 40400 + }, + { + "epoch": 265.85526315789474, + "grad_norm": 1.0016824007034302, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 40410 + }, + { + "epoch": 265.92105263157896, + "grad_norm": 1.426163911819458, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 40420 + }, + { + "epoch": 265.9868421052632, + "grad_norm": 1.2102915048599243, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 40430 + }, + { + "epoch": 266.05263157894734, + "grad_norm": 1.2748351097106934, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 40440 + }, + { + "epoch": 266.11842105263156, + "grad_norm": 1.0148237943649292, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 40450 + }, + { + "epoch": 266.1842105263158, + "grad_norm": 1.6188982725143433, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 40460 + }, + { + "epoch": 266.25, + "grad_norm": 1.2381527423858643, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 40470 + }, + { + "epoch": 266.3157894736842, + "grad_norm": 1.1073635816574097, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 40480 + }, + { + "epoch": 266.38157894736844, + "grad_norm": 1.1717376708984375, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 40490 + }, + { + "epoch": 266.44736842105266, + "grad_norm": 1.3967905044555664, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 40500 + }, + { + "epoch": 266.5131578947368, + "grad_norm": 0.78745037317276, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 40510 + }, + { + "epoch": 266.57894736842104, + "grad_norm": 1.2975760698318481, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 40520 + }, + { + "epoch": 266.64473684210526, + "grad_norm": 1.0333595275878906, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 40530 + }, + { + "epoch": 266.7105263157895, + "grad_norm": 1.132156252861023, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 40540 + }, + { + "epoch": 266.7763157894737, + "grad_norm": 1.1081570386886597, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 40550 + }, + { + "epoch": 266.8421052631579, + "grad_norm": 1.6667684316635132, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 40560 + }, + { + "epoch": 266.9078947368421, + "grad_norm": 1.6296488046646118, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 40570 + }, + { + "epoch": 266.9736842105263, + "grad_norm": 1.4970145225524902, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 40580 + }, + { + "epoch": 267.0394736842105, + "grad_norm": 1.2086564302444458, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 40590 + }, + { + "epoch": 267.10526315789474, + "grad_norm": 1.0567036867141724, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 40600 + }, + { + "epoch": 267.17105263157896, + "grad_norm": 1.2320477962493896, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 40610 + }, + { + "epoch": 267.2368421052632, + "grad_norm": 1.1581894159317017, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 40620 + }, + { + "epoch": 267.30263157894734, + "grad_norm": 1.2620066404342651, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 40630 + }, + { + "epoch": 267.36842105263156, + "grad_norm": 1.0989741086959839, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 40640 + }, + { + "epoch": 267.4342105263158, + "grad_norm": 1.0343804359436035, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 40650 + }, + { + "epoch": 267.5, + "grad_norm": 1.1863844394683838, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 40660 + }, + { + "epoch": 267.5657894736842, + "grad_norm": 1.5389338731765747, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 40670 + }, + { + "epoch": 267.63157894736844, + "grad_norm": 0.8726987242698669, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 40680 + }, + { + "epoch": 267.69736842105266, + "grad_norm": 0.9518386721611023, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 40690 + }, + { + "epoch": 267.7631578947368, + "grad_norm": 0.9239757657051086, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 40700 + }, + { + "epoch": 267.82894736842104, + "grad_norm": 0.800014078617096, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 40710 + }, + { + "epoch": 267.89473684210526, + "grad_norm": 1.1381514072418213, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 40720 + }, + { + "epoch": 267.9605263157895, + "grad_norm": 1.2737356424331665, + "learning_rate": 0.0001, + "loss": 0.0185, + "step": 40730 + }, + { + "epoch": 268.0263157894737, + "grad_norm": 1.2936004400253296, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 40740 + }, + { + "epoch": 268.0921052631579, + "grad_norm": 1.2660059928894043, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 40750 + }, + { + "epoch": 268.1578947368421, + "grad_norm": 1.2411798238754272, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 40760 + }, + { + "epoch": 268.2236842105263, + "grad_norm": 0.9876865744590759, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 40770 + }, + { + "epoch": 268.2894736842105, + "grad_norm": 1.146823525428772, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 40780 + }, + { + "epoch": 268.35526315789474, + "grad_norm": 1.0858898162841797, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 40790 + }, + { + "epoch": 268.42105263157896, + "grad_norm": 1.208295464515686, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 40800 + }, + { + "epoch": 268.4868421052632, + "grad_norm": 1.1481982469558716, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 40810 + }, + { + "epoch": 268.55263157894734, + "grad_norm": 1.2117033004760742, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 40820 + }, + { + "epoch": 268.61842105263156, + "grad_norm": 1.550671100616455, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 40830 + }, + { + "epoch": 268.6842105263158, + "grad_norm": 1.131887435913086, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 40840 + }, + { + "epoch": 268.75, + "grad_norm": 1.248721957206726, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 40850 + }, + { + "epoch": 268.8157894736842, + "grad_norm": 1.0527727603912354, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 40860 + }, + { + "epoch": 268.88157894736844, + "grad_norm": 1.243199348449707, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 40870 + }, + { + "epoch": 268.94736842105266, + "grad_norm": 1.0433778762817383, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 40880 + }, + { + "epoch": 269.0131578947368, + "grad_norm": 1.5813816785812378, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 40890 + }, + { + "epoch": 269.07894736842104, + "grad_norm": 1.3357555866241455, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 40900 + }, + { + "epoch": 269.14473684210526, + "grad_norm": 1.4037662744522095, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 40910 + }, + { + "epoch": 269.2105263157895, + "grad_norm": 1.459270715713501, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 40920 + }, + { + "epoch": 269.2763157894737, + "grad_norm": 1.2540459632873535, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 40930 + }, + { + "epoch": 269.3421052631579, + "grad_norm": 0.7612592577934265, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 40940 + }, + { + "epoch": 269.4078947368421, + "grad_norm": 1.0705971717834473, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 40950 + }, + { + "epoch": 269.4736842105263, + "grad_norm": 1.2538421154022217, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 40960 + }, + { + "epoch": 269.5394736842105, + "grad_norm": 1.143507719039917, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 40970 + }, + { + "epoch": 269.60526315789474, + "grad_norm": 1.3670626878738403, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 40980 + }, + { + "epoch": 269.67105263157896, + "grad_norm": 1.126387596130371, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 40990 + }, + { + "epoch": 269.7368421052632, + "grad_norm": 1.3656985759735107, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 41000 + }, + { + "epoch": 269.80263157894734, + "grad_norm": 1.4171274900436401, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 41010 + }, + { + "epoch": 269.86842105263156, + "grad_norm": 1.4547640085220337, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 41020 + }, + { + "epoch": 269.9342105263158, + "grad_norm": 1.1633445024490356, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 41030 + }, + { + "epoch": 270.0, + "grad_norm": 1.2674797773361206, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 41040 + }, + { + "epoch": 270.0657894736842, + "grad_norm": 1.2385079860687256, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 41050 + }, + { + "epoch": 270.13157894736844, + "grad_norm": 1.0688042640686035, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 41060 + }, + { + "epoch": 270.19736842105266, + "grad_norm": 1.8457244634628296, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 41070 + }, + { + "epoch": 270.2631578947368, + "grad_norm": 1.4692785739898682, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 41080 + }, + { + "epoch": 270.32894736842104, + "grad_norm": 1.0676112174987793, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 41090 + }, + { + "epoch": 270.39473684210526, + "grad_norm": 1.3465666770935059, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 41100 + }, + { + "epoch": 270.4605263157895, + "grad_norm": 1.012242317199707, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 41110 + }, + { + "epoch": 270.5263157894737, + "grad_norm": 1.1431010961532593, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 41120 + }, + { + "epoch": 270.5921052631579, + "grad_norm": 1.2994465827941895, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 41130 + }, + { + "epoch": 270.6578947368421, + "grad_norm": 1.2632924318313599, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 41140 + }, + { + "epoch": 270.7236842105263, + "grad_norm": 1.0502218008041382, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 41150 + }, + { + "epoch": 270.7894736842105, + "grad_norm": 1.1967225074768066, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 41160 + }, + { + "epoch": 270.85526315789474, + "grad_norm": 1.095524787902832, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 41170 + }, + { + "epoch": 270.92105263157896, + "grad_norm": 1.2695966958999634, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 41180 + }, + { + "epoch": 270.9868421052632, + "grad_norm": 1.425173282623291, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 41190 + }, + { + "epoch": 271.05263157894734, + "grad_norm": 0.9129456877708435, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 41200 + }, + { + "epoch": 271.11842105263156, + "grad_norm": 0.9223254919052124, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 41210 + }, + { + "epoch": 271.1842105263158, + "grad_norm": 1.0759285688400269, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 41220 + }, + { + "epoch": 271.25, + "grad_norm": 1.4797776937484741, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 41230 + }, + { + "epoch": 271.3157894736842, + "grad_norm": 1.368739128112793, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 41240 + }, + { + "epoch": 271.38157894736844, + "grad_norm": 1.1188558340072632, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 41250 + }, + { + "epoch": 271.44736842105266, + "grad_norm": 1.3473429679870605, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 41260 + }, + { + "epoch": 271.5131578947368, + "grad_norm": 1.3430105447769165, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 41270 + }, + { + "epoch": 271.57894736842104, + "grad_norm": 1.654737949371338, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 41280 + }, + { + "epoch": 271.64473684210526, + "grad_norm": 1.832235336303711, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 41290 + }, + { + "epoch": 271.7105263157895, + "grad_norm": 1.3615492582321167, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 41300 + }, + { + "epoch": 271.7763157894737, + "grad_norm": 1.5935784578323364, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 41310 + }, + { + "epoch": 271.8421052631579, + "grad_norm": 1.6195662021636963, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 41320 + }, + { + "epoch": 271.9078947368421, + "grad_norm": 1.3322649002075195, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 41330 + }, + { + "epoch": 271.9736842105263, + "grad_norm": 1.2479629516601562, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 41340 + }, + { + "epoch": 272.0394736842105, + "grad_norm": 1.4976459741592407, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 41350 + }, + { + "epoch": 272.10526315789474, + "grad_norm": 1.2839045524597168, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 41360 + }, + { + "epoch": 272.17105263157896, + "grad_norm": 1.2758049964904785, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 41370 + }, + { + "epoch": 272.2368421052632, + "grad_norm": 1.352516531944275, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 41380 + }, + { + "epoch": 272.30263157894734, + "grad_norm": 1.0206512212753296, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 41390 + }, + { + "epoch": 272.36842105263156, + "grad_norm": 1.2636960744857788, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 41400 + }, + { + "epoch": 272.4342105263158, + "grad_norm": 1.058984398841858, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 41410 + }, + { + "epoch": 272.5, + "grad_norm": 1.2583855390548706, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 41420 + }, + { + "epoch": 272.5657894736842, + "grad_norm": 1.1971688270568848, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 41430 + }, + { + "epoch": 272.63157894736844, + "grad_norm": 1.403083324432373, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 41440 + }, + { + "epoch": 272.69736842105266, + "grad_norm": 1.0412906408309937, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 41450 + }, + { + "epoch": 272.7631578947368, + "grad_norm": 1.2968218326568604, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 41460 + }, + { + "epoch": 272.82894736842104, + "grad_norm": 1.4638139009475708, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 41470 + }, + { + "epoch": 272.89473684210526, + "grad_norm": 1.3034776449203491, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 41480 + }, + { + "epoch": 272.9605263157895, + "grad_norm": 1.0589145421981812, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 41490 + }, + { + "epoch": 273.0263157894737, + "grad_norm": 0.905729353427887, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 41500 + }, + { + "epoch": 273.0921052631579, + "grad_norm": 1.093269944190979, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 41510 + }, + { + "epoch": 273.1578947368421, + "grad_norm": 1.5292916297912598, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 41520 + }, + { + "epoch": 273.2236842105263, + "grad_norm": 1.5569329261779785, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 41530 + }, + { + "epoch": 273.2894736842105, + "grad_norm": 1.620352029800415, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 41540 + }, + { + "epoch": 273.35526315789474, + "grad_norm": 1.3029897212982178, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 41550 + }, + { + "epoch": 273.42105263157896, + "grad_norm": 1.1392931938171387, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 41560 + }, + { + "epoch": 273.4868421052632, + "grad_norm": 1.3479526042938232, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 41570 + }, + { + "epoch": 273.55263157894734, + "grad_norm": 1.3470922708511353, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 41580 + }, + { + "epoch": 273.61842105263156, + "grad_norm": 1.2211447954177856, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 41590 + }, + { + "epoch": 273.6842105263158, + "grad_norm": 1.1938384771347046, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 41600 + }, + { + "epoch": 273.75, + "grad_norm": 1.1164087057113647, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 41610 + }, + { + "epoch": 273.8157894736842, + "grad_norm": 1.1596921682357788, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 41620 + }, + { + "epoch": 273.88157894736844, + "grad_norm": 0.9100044369697571, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 41630 + }, + { + "epoch": 273.94736842105266, + "grad_norm": 0.9539213180541992, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 41640 + }, + { + "epoch": 274.0131578947368, + "grad_norm": 0.9775730967521667, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 41650 + }, + { + "epoch": 274.07894736842104, + "grad_norm": 1.1786597967147827, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 41660 + }, + { + "epoch": 274.14473684210526, + "grad_norm": 0.8824886679649353, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 41670 + }, + { + "epoch": 274.2105263157895, + "grad_norm": 1.173654556274414, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 41680 + }, + { + "epoch": 274.2763157894737, + "grad_norm": 1.509282112121582, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 41690 + }, + { + "epoch": 274.3421052631579, + "grad_norm": 1.3084309101104736, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 41700 + }, + { + "epoch": 274.4078947368421, + "grad_norm": 1.2768287658691406, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 41710 + }, + { + "epoch": 274.4736842105263, + "grad_norm": 1.142025351524353, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 41720 + }, + { + "epoch": 274.5394736842105, + "grad_norm": 1.13783597946167, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 41730 + }, + { + "epoch": 274.60526315789474, + "grad_norm": 0.8221064805984497, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 41740 + }, + { + "epoch": 274.67105263157896, + "grad_norm": 1.6410062313079834, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 41750 + }, + { + "epoch": 274.7368421052632, + "grad_norm": 1.4255626201629639, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 41760 + }, + { + "epoch": 274.80263157894734, + "grad_norm": 1.1363221406936646, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 41770 + }, + { + "epoch": 274.86842105263156, + "grad_norm": 1.3313068151474, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 41780 + }, + { + "epoch": 274.9342105263158, + "grad_norm": 1.4202282428741455, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 41790 + }, + { + "epoch": 275.0, + "grad_norm": 1.3749666213989258, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 41800 + }, + { + "epoch": 275.0657894736842, + "grad_norm": 1.2333338260650635, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 41810 + }, + { + "epoch": 275.13157894736844, + "grad_norm": 1.221083164215088, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 41820 + }, + { + "epoch": 275.19736842105266, + "grad_norm": 0.8739851713180542, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 41830 + }, + { + "epoch": 275.2631578947368, + "grad_norm": 1.3110480308532715, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 41840 + }, + { + "epoch": 275.32894736842104, + "grad_norm": 0.8947159647941589, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 41850 + }, + { + "epoch": 275.39473684210526, + "grad_norm": 1.2646197080612183, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 41860 + }, + { + "epoch": 275.4605263157895, + "grad_norm": 1.0885008573532104, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 41870 + }, + { + "epoch": 275.5263157894737, + "grad_norm": 1.4110907316207886, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 41880 + }, + { + "epoch": 275.5921052631579, + "grad_norm": 1.312502145767212, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 41890 + }, + { + "epoch": 275.6578947368421, + "grad_norm": 1.3466676473617554, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 41900 + }, + { + "epoch": 275.7236842105263, + "grad_norm": 1.6430773735046387, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 41910 + }, + { + "epoch": 275.7894736842105, + "grad_norm": 1.3257335424423218, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 41920 + }, + { + "epoch": 275.85526315789474, + "grad_norm": 1.3992869853973389, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 41930 + }, + { + "epoch": 275.92105263157896, + "grad_norm": 1.3640083074569702, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 41940 + }, + { + "epoch": 275.9868421052632, + "grad_norm": 1.2238960266113281, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 41950 + }, + { + "epoch": 276.05263157894734, + "grad_norm": 0.9982905983924866, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 41960 + }, + { + "epoch": 276.11842105263156, + "grad_norm": 1.0933977365493774, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 41970 + }, + { + "epoch": 276.1842105263158, + "grad_norm": 1.1937322616577148, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 41980 + }, + { + "epoch": 276.25, + "grad_norm": 1.1956714391708374, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 41990 + }, + { + "epoch": 276.3157894736842, + "grad_norm": 1.256412386894226, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 42000 + }, + { + "epoch": 276.38157894736844, + "grad_norm": 1.035623550415039, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 42010 + }, + { + "epoch": 276.44736842105266, + "grad_norm": 1.2642019987106323, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 42020 + }, + { + "epoch": 276.5131578947368, + "grad_norm": 1.7125762701034546, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 42030 + }, + { + "epoch": 276.57894736842104, + "grad_norm": 1.3358557224273682, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 42040 + }, + { + "epoch": 276.64473684210526, + "grad_norm": 1.3947197198867798, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 42050 + }, + { + "epoch": 276.7105263157895, + "grad_norm": 1.3491345643997192, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 42060 + }, + { + "epoch": 276.7763157894737, + "grad_norm": 1.3073524236679077, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 42070 + }, + { + "epoch": 276.8421052631579, + "grad_norm": 1.403847098350525, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 42080 + }, + { + "epoch": 276.9078947368421, + "grad_norm": 1.1999281644821167, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 42090 + }, + { + "epoch": 276.9736842105263, + "grad_norm": 1.2854633331298828, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 42100 + }, + { + "epoch": 277.0394736842105, + "grad_norm": 1.1524097919464111, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 42110 + }, + { + "epoch": 277.10526315789474, + "grad_norm": 1.1198937892913818, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 42120 + }, + { + "epoch": 277.17105263157896, + "grad_norm": 0.9820287823677063, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 42130 + }, + { + "epoch": 277.2368421052632, + "grad_norm": 0.7458430528640747, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 42140 + }, + { + "epoch": 277.30263157894734, + "grad_norm": 1.2689908742904663, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 42150 + }, + { + "epoch": 277.36842105263156, + "grad_norm": 1.062583088874817, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 42160 + }, + { + "epoch": 277.4342105263158, + "grad_norm": 0.82270747423172, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 42170 + }, + { + "epoch": 277.5, + "grad_norm": 0.8221926093101501, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 42180 + }, + { + "epoch": 277.5657894736842, + "grad_norm": 0.8966209888458252, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 42190 + }, + { + "epoch": 277.63157894736844, + "grad_norm": 1.6077913045883179, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 42200 + }, + { + "epoch": 277.69736842105266, + "grad_norm": 0.9948518872261047, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 42210 + }, + { + "epoch": 277.7631578947368, + "grad_norm": 1.1815922260284424, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 42220 + }, + { + "epoch": 277.82894736842104, + "grad_norm": 1.0743731260299683, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 42230 + }, + { + "epoch": 277.89473684210526, + "grad_norm": 0.9307351112365723, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 42240 + }, + { + "epoch": 277.9605263157895, + "grad_norm": 1.2313666343688965, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 42250 + }, + { + "epoch": 278.0263157894737, + "grad_norm": 1.1681599617004395, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 42260 + }, + { + "epoch": 278.0921052631579, + "grad_norm": 1.4541995525360107, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 42270 + }, + { + "epoch": 278.1578947368421, + "grad_norm": 1.107507586479187, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 42280 + }, + { + "epoch": 278.2236842105263, + "grad_norm": 0.9326557517051697, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 42290 + }, + { + "epoch": 278.2894736842105, + "grad_norm": 1.512211799621582, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 42300 + }, + { + "epoch": 278.35526315789474, + "grad_norm": 1.3133347034454346, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 42310 + }, + { + "epoch": 278.42105263157896, + "grad_norm": 1.0264031887054443, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 42320 + }, + { + "epoch": 278.4868421052632, + "grad_norm": 1.1678309440612793, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 42330 + }, + { + "epoch": 278.55263157894734, + "grad_norm": 0.7482702732086182, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 42340 + }, + { + "epoch": 278.61842105263156, + "grad_norm": 1.0552853345870972, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 42350 + }, + { + "epoch": 278.6842105263158, + "grad_norm": 1.301750659942627, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 42360 + }, + { + "epoch": 278.75, + "grad_norm": 0.8871774673461914, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 42370 + }, + { + "epoch": 278.8157894736842, + "grad_norm": 0.9362806677818298, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 42380 + }, + { + "epoch": 278.88157894736844, + "grad_norm": 0.6715094447135925, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 42390 + }, + { + "epoch": 278.94736842105266, + "grad_norm": 1.3032416105270386, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 42400 + }, + { + "epoch": 279.0131578947368, + "grad_norm": 1.2517324686050415, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 42410 + }, + { + "epoch": 279.07894736842104, + "grad_norm": 1.0311671495437622, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 42420 + }, + { + "epoch": 279.14473684210526, + "grad_norm": 0.9779248237609863, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 42430 + }, + { + "epoch": 279.2105263157895, + "grad_norm": 1.322513461112976, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 42440 + }, + { + "epoch": 279.2763157894737, + "grad_norm": 1.117714285850525, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 42450 + }, + { + "epoch": 279.3421052631579, + "grad_norm": 1.0339843034744263, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 42460 + }, + { + "epoch": 279.4078947368421, + "grad_norm": 1.3660122156143188, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 42470 + }, + { + "epoch": 279.4736842105263, + "grad_norm": 1.191132664680481, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 42480 + }, + { + "epoch": 279.5394736842105, + "grad_norm": 1.476515293121338, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 42490 + }, + { + "epoch": 279.60526315789474, + "grad_norm": 1.0748845338821411, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 42500 + }, + { + "epoch": 279.67105263157896, + "grad_norm": 1.480494499206543, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 42510 + }, + { + "epoch": 279.7368421052632, + "grad_norm": 1.1140382289886475, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 42520 + }, + { + "epoch": 279.80263157894734, + "grad_norm": 1.4050570726394653, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 42530 + }, + { + "epoch": 279.86842105263156, + "grad_norm": 1.3111275434494019, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 42540 + }, + { + "epoch": 279.9342105263158, + "grad_norm": 1.2099816799163818, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 42550 + }, + { + "epoch": 280.0, + "grad_norm": 1.2732938528060913, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 42560 + }, + { + "epoch": 280.0657894736842, + "grad_norm": 1.4480763673782349, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 42570 + }, + { + "epoch": 280.13157894736844, + "grad_norm": 1.1580476760864258, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 42580 + }, + { + "epoch": 280.19736842105266, + "grad_norm": 1.4821127653121948, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 42590 + }, + { + "epoch": 280.2631578947368, + "grad_norm": 1.0352067947387695, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 42600 + }, + { + "epoch": 280.32894736842104, + "grad_norm": 1.536171793937683, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 42610 + }, + { + "epoch": 280.39473684210526, + "grad_norm": 1.3197144269943237, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 42620 + }, + { + "epoch": 280.4605263157895, + "grad_norm": 1.0145988464355469, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 42630 + }, + { + "epoch": 280.5263157894737, + "grad_norm": 1.5612324476242065, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 42640 + }, + { + "epoch": 280.5921052631579, + "grad_norm": 1.013548493385315, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 42650 + }, + { + "epoch": 280.6578947368421, + "grad_norm": 1.0804232358932495, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 42660 + }, + { + "epoch": 280.7236842105263, + "grad_norm": 1.183570146560669, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 42670 + }, + { + "epoch": 280.7894736842105, + "grad_norm": 1.551816463470459, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 42680 + }, + { + "epoch": 280.85526315789474, + "grad_norm": 1.5829601287841797, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 42690 + }, + { + "epoch": 280.92105263157896, + "grad_norm": 1.6313118934631348, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 42700 + }, + { + "epoch": 280.9868421052632, + "grad_norm": 0.820428192615509, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 42710 + }, + { + "epoch": 281.05263157894734, + "grad_norm": 1.293191909790039, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 42720 + }, + { + "epoch": 281.11842105263156, + "grad_norm": 1.3593038320541382, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 42730 + }, + { + "epoch": 281.1842105263158, + "grad_norm": 1.3805264234542847, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 42740 + }, + { + "epoch": 281.25, + "grad_norm": 1.3258028030395508, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 42750 + }, + { + "epoch": 281.3157894736842, + "grad_norm": 1.2791032791137695, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 42760 + }, + { + "epoch": 281.38157894736844, + "grad_norm": 1.3307384252548218, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 42770 + }, + { + "epoch": 281.44736842105266, + "grad_norm": 1.1572662591934204, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 42780 + }, + { + "epoch": 281.5131578947368, + "grad_norm": 1.5701806545257568, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 42790 + }, + { + "epoch": 281.57894736842104, + "grad_norm": 1.2336055040359497, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 42800 + }, + { + "epoch": 281.64473684210526, + "grad_norm": 1.0266544818878174, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 42810 + }, + { + "epoch": 281.7105263157895, + "grad_norm": 0.840146541595459, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 42820 + }, + { + "epoch": 281.7763157894737, + "grad_norm": 1.1825062036514282, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 42830 + }, + { + "epoch": 281.8421052631579, + "grad_norm": 1.0842541456222534, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 42840 + }, + { + "epoch": 281.9078947368421, + "grad_norm": 1.5062053203582764, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 42850 + }, + { + "epoch": 281.9736842105263, + "grad_norm": 0.8112286925315857, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 42860 + }, + { + "epoch": 282.0394736842105, + "grad_norm": 1.1789262294769287, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 42870 + }, + { + "epoch": 282.10526315789474, + "grad_norm": 1.375529170036316, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 42880 + }, + { + "epoch": 282.17105263157896, + "grad_norm": 1.330922245979309, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 42890 + }, + { + "epoch": 282.2368421052632, + "grad_norm": 1.3827651739120483, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 42900 + }, + { + "epoch": 282.30263157894734, + "grad_norm": 1.2131527662277222, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 42910 + }, + { + "epoch": 282.36842105263156, + "grad_norm": 1.3317664861679077, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 42920 + }, + { + "epoch": 282.4342105263158, + "grad_norm": 1.1144113540649414, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 42930 + }, + { + "epoch": 282.5, + "grad_norm": 1.6057426929473877, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 42940 + }, + { + "epoch": 282.5657894736842, + "grad_norm": 1.3409185409545898, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 42950 + }, + { + "epoch": 282.63157894736844, + "grad_norm": 1.5420650243759155, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 42960 + }, + { + "epoch": 282.69736842105266, + "grad_norm": 1.1373865604400635, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 42970 + }, + { + "epoch": 282.7631578947368, + "grad_norm": 1.2429486513137817, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 42980 + }, + { + "epoch": 282.82894736842104, + "grad_norm": 0.9992790222167969, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 42990 + }, + { + "epoch": 282.89473684210526, + "grad_norm": 1.142056941986084, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 43000 + }, + { + "epoch": 282.9605263157895, + "grad_norm": 1.0192174911499023, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 43010 + }, + { + "epoch": 283.0263157894737, + "grad_norm": 0.9924356937408447, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 43020 + }, + { + "epoch": 283.0921052631579, + "grad_norm": 0.8189639449119568, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 43030 + }, + { + "epoch": 283.1578947368421, + "grad_norm": 0.9445114731788635, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 43040 + }, + { + "epoch": 283.2236842105263, + "grad_norm": 1.3871409893035889, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 43050 + }, + { + "epoch": 283.2894736842105, + "grad_norm": 1.086130142211914, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 43060 + }, + { + "epoch": 283.35526315789474, + "grad_norm": 0.9964144825935364, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 43070 + }, + { + "epoch": 283.42105263157896, + "grad_norm": 1.0563546419143677, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 43080 + }, + { + "epoch": 283.4868421052632, + "grad_norm": 1.270878553390503, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 43090 + }, + { + "epoch": 283.55263157894734, + "grad_norm": 1.3381757736206055, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 43100 + }, + { + "epoch": 283.61842105263156, + "grad_norm": 1.1857870817184448, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 43110 + }, + { + "epoch": 283.6842105263158, + "grad_norm": 1.2086889743804932, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 43120 + }, + { + "epoch": 283.75, + "grad_norm": 1.3116366863250732, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 43130 + }, + { + "epoch": 283.8157894736842, + "grad_norm": 1.1488587856292725, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 43140 + }, + { + "epoch": 283.88157894736844, + "grad_norm": 0.9865978360176086, + "learning_rate": 0.0001, + "loss": 0.0175, + "step": 43150 + }, + { + "epoch": 283.94736842105266, + "grad_norm": 0.9599931240081787, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 43160 + }, + { + "epoch": 284.0131578947368, + "grad_norm": 1.4896612167358398, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 43170 + }, + { + "epoch": 284.07894736842104, + "grad_norm": 1.335207223892212, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 43180 + }, + { + "epoch": 284.14473684210526, + "grad_norm": 0.7765811085700989, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 43190 + }, + { + "epoch": 284.2105263157895, + "grad_norm": 1.1684138774871826, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 43200 + }, + { + "epoch": 284.2763157894737, + "grad_norm": 1.3540507555007935, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 43210 + }, + { + "epoch": 284.3421052631579, + "grad_norm": 1.5758388042449951, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 43220 + }, + { + "epoch": 284.4078947368421, + "grad_norm": 1.411137580871582, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 43230 + }, + { + "epoch": 284.4736842105263, + "grad_norm": 1.1263060569763184, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 43240 + }, + { + "epoch": 284.5394736842105, + "grad_norm": 1.4823040962219238, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 43250 + }, + { + "epoch": 284.60526315789474, + "grad_norm": 1.3825323581695557, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 43260 + }, + { + "epoch": 284.67105263157896, + "grad_norm": 1.1096550226211548, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 43270 + }, + { + "epoch": 284.7368421052632, + "grad_norm": 1.0073033571243286, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 43280 + }, + { + "epoch": 284.80263157894734, + "grad_norm": 1.08073091506958, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 43290 + }, + { + "epoch": 284.86842105263156, + "grad_norm": 0.6875160336494446, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 43300 + }, + { + "epoch": 284.9342105263158, + "grad_norm": 1.0610051155090332, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 43310 + }, + { + "epoch": 285.0, + "grad_norm": 0.9582310318946838, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 43320 + }, + { + "epoch": 285.0657894736842, + "grad_norm": 0.8970026969909668, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 43330 + }, + { + "epoch": 285.13157894736844, + "grad_norm": 0.8817126750946045, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 43340 + }, + { + "epoch": 285.19736842105266, + "grad_norm": 1.2000654935836792, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 43350 + }, + { + "epoch": 285.2631578947368, + "grad_norm": 1.3868666887283325, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 43360 + }, + { + "epoch": 285.32894736842104, + "grad_norm": 0.9346180558204651, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 43370 + }, + { + "epoch": 285.39473684210526, + "grad_norm": 0.9020205736160278, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 43380 + }, + { + "epoch": 285.4605263157895, + "grad_norm": 1.231022834777832, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 43390 + }, + { + "epoch": 285.5263157894737, + "grad_norm": 1.107666015625, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 43400 + }, + { + "epoch": 285.5921052631579, + "grad_norm": 1.5066663026809692, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 43410 + }, + { + "epoch": 285.6578947368421, + "grad_norm": 1.7655541896820068, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 43420 + }, + { + "epoch": 285.7236842105263, + "grad_norm": 1.2582757472991943, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 43430 + }, + { + "epoch": 285.7894736842105, + "grad_norm": 1.1627318859100342, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 43440 + }, + { + "epoch": 285.85526315789474, + "grad_norm": 1.3102083206176758, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 43450 + }, + { + "epoch": 285.92105263157896, + "grad_norm": 1.2415940761566162, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 43460 + }, + { + "epoch": 285.9868421052632, + "grad_norm": 1.494537353515625, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 43470 + }, + { + "epoch": 286.05263157894734, + "grad_norm": 1.4216185808181763, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 43480 + }, + { + "epoch": 286.11842105263156, + "grad_norm": 1.2979694604873657, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 43490 + }, + { + "epoch": 286.1842105263158, + "grad_norm": 0.9436559677124023, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 43500 + }, + { + "epoch": 286.25, + "grad_norm": 1.34527587890625, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 43510 + }, + { + "epoch": 286.3157894736842, + "grad_norm": 1.5451651811599731, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 43520 + }, + { + "epoch": 286.38157894736844, + "grad_norm": 1.5682357549667358, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 43530 + }, + { + "epoch": 286.44736842105266, + "grad_norm": 1.3602068424224854, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 43540 + }, + { + "epoch": 286.5131578947368, + "grad_norm": 1.146419644355774, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 43550 + }, + { + "epoch": 286.57894736842104, + "grad_norm": 0.9245599508285522, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 43560 + }, + { + "epoch": 286.64473684210526, + "grad_norm": 0.9519632458686829, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 43570 + }, + { + "epoch": 286.7105263157895, + "grad_norm": 1.0367894172668457, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 43580 + }, + { + "epoch": 286.7763157894737, + "grad_norm": 1.3567640781402588, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 43590 + }, + { + "epoch": 286.8421052631579, + "grad_norm": 1.3016269207000732, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 43600 + }, + { + "epoch": 286.9078947368421, + "grad_norm": 1.4324815273284912, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 43610 + }, + { + "epoch": 286.9736842105263, + "grad_norm": 1.0870546102523804, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 43620 + }, + { + "epoch": 287.0394736842105, + "grad_norm": 1.5067437887191772, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 43630 + }, + { + "epoch": 287.10526315789474, + "grad_norm": 1.3528972864151, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 43640 + }, + { + "epoch": 287.17105263157896, + "grad_norm": 1.3971104621887207, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 43650 + }, + { + "epoch": 287.2368421052632, + "grad_norm": 1.1645212173461914, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 43660 + }, + { + "epoch": 287.30263157894734, + "grad_norm": 1.295596718788147, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 43670 + }, + { + "epoch": 287.36842105263156, + "grad_norm": 0.900801420211792, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 43680 + }, + { + "epoch": 287.4342105263158, + "grad_norm": 1.128415584564209, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 43690 + }, + { + "epoch": 287.5, + "grad_norm": 1.3298012018203735, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 43700 + }, + { + "epoch": 287.5657894736842, + "grad_norm": 1.2503217458724976, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 43710 + }, + { + "epoch": 287.63157894736844, + "grad_norm": 1.2108162641525269, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 43720 + }, + { + "epoch": 287.69736842105266, + "grad_norm": 1.3603051900863647, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 43730 + }, + { + "epoch": 287.7631578947368, + "grad_norm": 1.156879186630249, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 43740 + }, + { + "epoch": 287.82894736842104, + "grad_norm": 1.0096931457519531, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 43750 + }, + { + "epoch": 287.89473684210526, + "grad_norm": 1.3088841438293457, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 43760 + }, + { + "epoch": 287.9605263157895, + "grad_norm": 1.609463095664978, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 43770 + }, + { + "epoch": 288.0263157894737, + "grad_norm": 1.2793583869934082, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 43780 + }, + { + "epoch": 288.0921052631579, + "grad_norm": 1.3340468406677246, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 43790 + }, + { + "epoch": 288.1578947368421, + "grad_norm": 1.108888030052185, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 43800 + }, + { + "epoch": 288.2236842105263, + "grad_norm": 1.1814451217651367, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 43810 + }, + { + "epoch": 288.2894736842105, + "grad_norm": 1.0461862087249756, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 43820 + }, + { + "epoch": 288.35526315789474, + "grad_norm": 1.1520538330078125, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 43830 + }, + { + "epoch": 288.42105263157896, + "grad_norm": 1.4166773557662964, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 43840 + }, + { + "epoch": 288.4868421052632, + "grad_norm": 1.0651955604553223, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 43850 + }, + { + "epoch": 288.55263157894734, + "grad_norm": 1.5009607076644897, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 43860 + }, + { + "epoch": 288.61842105263156, + "grad_norm": 1.2824265956878662, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 43870 + }, + { + "epoch": 288.6842105263158, + "grad_norm": 1.1175551414489746, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 43880 + }, + { + "epoch": 288.75, + "grad_norm": 1.3193415403366089, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 43890 + }, + { + "epoch": 288.8157894736842, + "grad_norm": 1.3196364641189575, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 43900 + }, + { + "epoch": 288.88157894736844, + "grad_norm": 1.0883409976959229, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 43910 + }, + { + "epoch": 288.94736842105266, + "grad_norm": 1.2597954273223877, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 43920 + }, + { + "epoch": 289.0131578947368, + "grad_norm": 1.32917058467865, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 43930 + }, + { + "epoch": 289.07894736842104, + "grad_norm": 1.2906899452209473, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 43940 + }, + { + "epoch": 289.14473684210526, + "grad_norm": 0.9421364068984985, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 43950 + }, + { + "epoch": 289.2105263157895, + "grad_norm": 0.7365257143974304, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 43960 + }, + { + "epoch": 289.2763157894737, + "grad_norm": 1.0555346012115479, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 43970 + }, + { + "epoch": 289.3421052631579, + "grad_norm": 1.3130031824111938, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 43980 + }, + { + "epoch": 289.4078947368421, + "grad_norm": 1.474686622619629, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 43990 + }, + { + "epoch": 289.4736842105263, + "grad_norm": 1.148776650428772, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 44000 + }, + { + "epoch": 289.5394736842105, + "grad_norm": 1.4202920198440552, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 44010 + }, + { + "epoch": 289.60526315789474, + "grad_norm": 1.267279863357544, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 44020 + }, + { + "epoch": 289.67105263157896, + "grad_norm": 1.1646653413772583, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 44030 + }, + { + "epoch": 289.7368421052632, + "grad_norm": 1.241349458694458, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 44040 + }, + { + "epoch": 289.80263157894734, + "grad_norm": 1.201326847076416, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 44050 + }, + { + "epoch": 289.86842105263156, + "grad_norm": 1.1308850049972534, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 44060 + }, + { + "epoch": 289.9342105263158, + "grad_norm": 1.6807210445404053, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 44070 + }, + { + "epoch": 290.0, + "grad_norm": 1.081008791923523, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 44080 + }, + { + "epoch": 290.0657894736842, + "grad_norm": 1.1021829843521118, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 44090 + }, + { + "epoch": 290.13157894736844, + "grad_norm": 1.4114763736724854, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 44100 + }, + { + "epoch": 290.19736842105266, + "grad_norm": 1.2354848384857178, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 44110 + }, + { + "epoch": 290.2631578947368, + "grad_norm": 1.4642871618270874, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 44120 + }, + { + "epoch": 290.32894736842104, + "grad_norm": 0.9256936311721802, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 44130 + }, + { + "epoch": 290.39473684210526, + "grad_norm": 1.1769204139709473, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 44140 + }, + { + "epoch": 290.4605263157895, + "grad_norm": 1.3560692071914673, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 44150 + }, + { + "epoch": 290.5263157894737, + "grad_norm": 1.3157851696014404, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 44160 + }, + { + "epoch": 290.5921052631579, + "grad_norm": 1.5221748352050781, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 44170 + }, + { + "epoch": 290.6578947368421, + "grad_norm": 1.3193734884262085, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 44180 + }, + { + "epoch": 290.7236842105263, + "grad_norm": 1.5841879844665527, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 44190 + }, + { + "epoch": 290.7894736842105, + "grad_norm": 1.5025423765182495, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 44200 + }, + { + "epoch": 290.85526315789474, + "grad_norm": 1.6240133047103882, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 44210 + }, + { + "epoch": 290.92105263157896, + "grad_norm": 1.4948194026947021, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 44220 + }, + { + "epoch": 290.9868421052632, + "grad_norm": 1.317069411277771, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 44230 + }, + { + "epoch": 291.05263157894734, + "grad_norm": 1.513387680053711, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 44240 + }, + { + "epoch": 291.11842105263156, + "grad_norm": 1.4927419424057007, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 44250 + }, + { + "epoch": 291.1842105263158, + "grad_norm": 1.4577031135559082, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 44260 + }, + { + "epoch": 291.25, + "grad_norm": 1.673728585243225, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 44270 + }, + { + "epoch": 291.3157894736842, + "grad_norm": 0.9547315835952759, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 44280 + }, + { + "epoch": 291.38157894736844, + "grad_norm": 0.90570068359375, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 44290 + }, + { + "epoch": 291.44736842105266, + "grad_norm": 1.1997649669647217, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 44300 + }, + { + "epoch": 291.5131578947368, + "grad_norm": 1.1620087623596191, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 44310 + }, + { + "epoch": 291.57894736842104, + "grad_norm": 1.410526990890503, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 44320 + }, + { + "epoch": 291.64473684210526, + "grad_norm": 1.3183605670928955, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 44330 + }, + { + "epoch": 291.7105263157895, + "grad_norm": 1.7681739330291748, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 44340 + }, + { + "epoch": 291.7763157894737, + "grad_norm": 1.3306128978729248, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 44350 + }, + { + "epoch": 291.8421052631579, + "grad_norm": 1.658753514289856, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 44360 + }, + { + "epoch": 291.9078947368421, + "grad_norm": 1.2005608081817627, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 44370 + }, + { + "epoch": 291.9736842105263, + "grad_norm": 1.5707415342330933, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 44380 + }, + { + "epoch": 292.0394736842105, + "grad_norm": 1.211821436882019, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 44390 + }, + { + "epoch": 292.10526315789474, + "grad_norm": 1.5584156513214111, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 44400 + }, + { + "epoch": 292.17105263157896, + "grad_norm": 1.2665433883666992, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 44410 + }, + { + "epoch": 292.2368421052632, + "grad_norm": 0.9688684344291687, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 44420 + }, + { + "epoch": 292.30263157894734, + "grad_norm": 1.0552864074707031, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 44430 + }, + { + "epoch": 292.36842105263156, + "grad_norm": 1.3883039951324463, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 44440 + }, + { + "epoch": 292.4342105263158, + "grad_norm": 1.1752902269363403, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 44450 + }, + { + "epoch": 292.5, + "grad_norm": 1.1989336013793945, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 44460 + }, + { + "epoch": 292.5657894736842, + "grad_norm": 1.2750636339187622, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 44470 + }, + { + "epoch": 292.63157894736844, + "grad_norm": 1.1853424310684204, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 44480 + }, + { + "epoch": 292.69736842105266, + "grad_norm": 0.9358903765678406, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 44490 + }, + { + "epoch": 292.7631578947368, + "grad_norm": 1.2150110006332397, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 44500 + }, + { + "epoch": 292.82894736842104, + "grad_norm": 1.1464403867721558, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 44510 + }, + { + "epoch": 292.89473684210526, + "grad_norm": 1.5787771940231323, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 44520 + }, + { + "epoch": 292.9605263157895, + "grad_norm": 1.234574794769287, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 44530 + }, + { + "epoch": 293.0263157894737, + "grad_norm": 1.4504281282424927, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 44540 + }, + { + "epoch": 293.0921052631579, + "grad_norm": 1.2790708541870117, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 44550 + }, + { + "epoch": 293.1578947368421, + "grad_norm": 1.0297752618789673, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 44560 + }, + { + "epoch": 293.2236842105263, + "grad_norm": 1.4982763528823853, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 44570 + }, + { + "epoch": 293.2894736842105, + "grad_norm": 1.4405145645141602, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 44580 + }, + { + "epoch": 293.35526315789474, + "grad_norm": 1.618911862373352, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 44590 + }, + { + "epoch": 293.42105263157896, + "grad_norm": 1.1812571287155151, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 44600 + }, + { + "epoch": 293.4868421052632, + "grad_norm": 1.3874218463897705, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 44610 + }, + { + "epoch": 293.55263157894734, + "grad_norm": 1.1860712766647339, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 44620 + }, + { + "epoch": 293.61842105263156, + "grad_norm": 1.5251539945602417, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 44630 + }, + { + "epoch": 293.6842105263158, + "grad_norm": 1.2755794525146484, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 44640 + }, + { + "epoch": 293.75, + "grad_norm": 1.8280200958251953, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 44650 + }, + { + "epoch": 293.8157894736842, + "grad_norm": 1.7811310291290283, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 44660 + }, + { + "epoch": 293.88157894736844, + "grad_norm": 1.8284488916397095, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 44670 + }, + { + "epoch": 293.94736842105266, + "grad_norm": 1.3257440328598022, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 44680 + }, + { + "epoch": 294.0131578947368, + "grad_norm": 1.3897215127944946, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 44690 + }, + { + "epoch": 294.07894736842104, + "grad_norm": 1.2009508609771729, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 44700 + }, + { + "epoch": 294.14473684210526, + "grad_norm": 1.5008352994918823, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 44710 + }, + { + "epoch": 294.2105263157895, + "grad_norm": 1.0818425416946411, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 44720 + }, + { + "epoch": 294.2763157894737, + "grad_norm": 1.3108577728271484, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 44730 + }, + { + "epoch": 294.3421052631579, + "grad_norm": 1.39351224899292, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 44740 + }, + { + "epoch": 294.4078947368421, + "grad_norm": 1.2173326015472412, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 44750 + }, + { + "epoch": 294.4736842105263, + "grad_norm": 1.1741549968719482, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 44760 + }, + { + "epoch": 294.5394736842105, + "grad_norm": 1.123326301574707, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 44770 + }, + { + "epoch": 294.60526315789474, + "grad_norm": 1.1277564764022827, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 44780 + }, + { + "epoch": 294.67105263157896, + "grad_norm": 0.9613653421401978, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 44790 + }, + { + "epoch": 294.7368421052632, + "grad_norm": 0.9356263279914856, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 44800 + }, + { + "epoch": 294.80263157894734, + "grad_norm": 0.9894914031028748, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 44810 + }, + { + "epoch": 294.86842105263156, + "grad_norm": 1.1962043046951294, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 44820 + }, + { + "epoch": 294.9342105263158, + "grad_norm": 1.2527235746383667, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 44830 + }, + { + "epoch": 295.0, + "grad_norm": 1.2737168073654175, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 44840 + }, + { + "epoch": 295.0657894736842, + "grad_norm": 1.6003625392913818, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 44850 + }, + { + "epoch": 295.13157894736844, + "grad_norm": 1.2156645059585571, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 44860 + }, + { + "epoch": 295.19736842105266, + "grad_norm": 1.3037317991256714, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 44870 + }, + { + "epoch": 295.2631578947368, + "grad_norm": 1.2797876596450806, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 44880 + }, + { + "epoch": 295.32894736842104, + "grad_norm": 0.9522796273231506, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 44890 + }, + { + "epoch": 295.39473684210526, + "grad_norm": 0.9512258172035217, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 44900 + }, + { + "epoch": 295.4605263157895, + "grad_norm": 0.9569043517112732, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 44910 + }, + { + "epoch": 295.5263157894737, + "grad_norm": 0.8486437797546387, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 44920 + }, + { + "epoch": 295.5921052631579, + "grad_norm": 0.7993052005767822, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 44930 + }, + { + "epoch": 295.6578947368421, + "grad_norm": 0.8292239308357239, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 44940 + }, + { + "epoch": 295.7236842105263, + "grad_norm": 0.9200776219367981, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 44950 + }, + { + "epoch": 295.7894736842105, + "grad_norm": 1.134240984916687, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 44960 + }, + { + "epoch": 295.85526315789474, + "grad_norm": 1.2464627027511597, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 44970 + }, + { + "epoch": 295.92105263157896, + "grad_norm": 1.2314313650131226, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 44980 + }, + { + "epoch": 295.9868421052632, + "grad_norm": 1.2230592966079712, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 44990 + }, + { + "epoch": 296.05263157894734, + "grad_norm": 1.0356382131576538, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 45000 + }, + { + "epoch": 296.11842105263156, + "grad_norm": 1.4570183753967285, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 45010 + }, + { + "epoch": 296.1842105263158, + "grad_norm": 0.9928843975067139, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 45020 + }, + { + "epoch": 296.25, + "grad_norm": 1.1260870695114136, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 45030 + }, + { + "epoch": 296.3157894736842, + "grad_norm": 1.1443604230880737, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 45040 + }, + { + "epoch": 296.38157894736844, + "grad_norm": 1.091615915298462, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 45050 + }, + { + "epoch": 296.44736842105266, + "grad_norm": 0.9866099953651428, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 45060 + }, + { + "epoch": 296.5131578947368, + "grad_norm": 1.350821852684021, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 45070 + }, + { + "epoch": 296.57894736842104, + "grad_norm": 0.8730394244194031, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 45080 + }, + { + "epoch": 296.64473684210526, + "grad_norm": 1.3271095752716064, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 45090 + }, + { + "epoch": 296.7105263157895, + "grad_norm": 1.1160370111465454, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 45100 + }, + { + "epoch": 296.7763157894737, + "grad_norm": 1.0997018814086914, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 45110 + }, + { + "epoch": 296.8421052631579, + "grad_norm": 1.1898871660232544, + "learning_rate": 0.0001, + "loss": 0.0179, + "step": 45120 + }, + { + "epoch": 296.9078947368421, + "grad_norm": 0.9266688227653503, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 45130 + }, + { + "epoch": 296.9736842105263, + "grad_norm": 1.0709036588668823, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 45140 + }, + { + "epoch": 297.0394736842105, + "grad_norm": 1.0864067077636719, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 45150 + }, + { + "epoch": 297.10526315789474, + "grad_norm": 0.7684114575386047, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 45160 + }, + { + "epoch": 297.17105263157896, + "grad_norm": 1.0278398990631104, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 45170 + }, + { + "epoch": 297.2368421052632, + "grad_norm": 1.1107127666473389, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 45180 + }, + { + "epoch": 297.30263157894734, + "grad_norm": 0.9575175046920776, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 45190 + }, + { + "epoch": 297.36842105263156, + "grad_norm": 1.2704232931137085, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 45200 + }, + { + "epoch": 297.4342105263158, + "grad_norm": 1.383664608001709, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 45210 + }, + { + "epoch": 297.5, + "grad_norm": 0.8386159539222717, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 45220 + }, + { + "epoch": 297.5657894736842, + "grad_norm": 1.1953688859939575, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 45230 + }, + { + "epoch": 297.63157894736844, + "grad_norm": 1.0901776552200317, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 45240 + }, + { + "epoch": 297.69736842105266, + "grad_norm": 1.1900759935379028, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 45250 + }, + { + "epoch": 297.7631578947368, + "grad_norm": 0.9617878794670105, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 45260 + }, + { + "epoch": 297.82894736842104, + "grad_norm": 1.275898814201355, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 45270 + }, + { + "epoch": 297.89473684210526, + "grad_norm": 1.2279614210128784, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 45280 + }, + { + "epoch": 297.9605263157895, + "grad_norm": 1.4091862440109253, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 45290 + }, + { + "epoch": 298.0263157894737, + "grad_norm": 1.1640609502792358, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 45300 + }, + { + "epoch": 298.0921052631579, + "grad_norm": 1.000942587852478, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 45310 + }, + { + "epoch": 298.1578947368421, + "grad_norm": 0.9416703581809998, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 45320 + }, + { + "epoch": 298.2236842105263, + "grad_norm": 1.3636903762817383, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 45330 + }, + { + "epoch": 298.2894736842105, + "grad_norm": 0.9758198261260986, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 45340 + }, + { + "epoch": 298.35526315789474, + "grad_norm": 1.266970157623291, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 45350 + }, + { + "epoch": 298.42105263157896, + "grad_norm": 1.5186119079589844, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 45360 + }, + { + "epoch": 298.4868421052632, + "grad_norm": 1.2090457677841187, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 45370 + }, + { + "epoch": 298.55263157894734, + "grad_norm": 1.673222541809082, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 45380 + }, + { + "epoch": 298.61842105263156, + "grad_norm": 1.3236820697784424, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 45390 + }, + { + "epoch": 298.6842105263158, + "grad_norm": 1.2713639736175537, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 45400 + }, + { + "epoch": 298.75, + "grad_norm": 1.2878004312515259, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 45410 + }, + { + "epoch": 298.8157894736842, + "grad_norm": 1.013006567955017, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 45420 + }, + { + "epoch": 298.88157894736844, + "grad_norm": 1.3307374715805054, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 45430 + }, + { + "epoch": 298.94736842105266, + "grad_norm": 1.2865517139434814, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 45440 + }, + { + "epoch": 299.0131578947368, + "grad_norm": 1.2010281085968018, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 45450 + }, + { + "epoch": 299.07894736842104, + "grad_norm": 1.3164724111557007, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 45460 + }, + { + "epoch": 299.14473684210526, + "grad_norm": 1.2685626745224, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 45470 + }, + { + "epoch": 299.2105263157895, + "grad_norm": 1.0890898704528809, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 45480 + }, + { + "epoch": 299.2763157894737, + "grad_norm": 1.3427873849868774, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 45490 + }, + { + "epoch": 299.3421052631579, + "grad_norm": 1.1876996755599976, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 45500 + }, + { + "epoch": 299.4078947368421, + "grad_norm": 1.2783021926879883, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 45510 + }, + { + "epoch": 299.4736842105263, + "grad_norm": 1.2588651180267334, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 45520 + }, + { + "epoch": 299.5394736842105, + "grad_norm": 1.005475640296936, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 45530 + }, + { + "epoch": 299.60526315789474, + "grad_norm": 1.3194289207458496, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 45540 + }, + { + "epoch": 299.67105263157896, + "grad_norm": 1.593381643295288, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 45550 + }, + { + "epoch": 299.7368421052632, + "grad_norm": 1.1744670867919922, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 45560 + }, + { + "epoch": 299.80263157894734, + "grad_norm": 1.151694893836975, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 45570 + }, + { + "epoch": 299.86842105263156, + "grad_norm": 0.9926699995994568, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 45580 + }, + { + "epoch": 299.9342105263158, + "grad_norm": 1.3312798738479614, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 45590 + }, + { + "epoch": 300.0, + "grad_norm": 1.242369294166565, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 45600 + }, + { + "epoch": 300.0657894736842, + "grad_norm": 1.139743447303772, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 45610 + }, + { + "epoch": 300.13157894736844, + "grad_norm": 1.255859375, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 45620 + }, + { + "epoch": 300.19736842105266, + "grad_norm": 1.1239902973175049, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 45630 + }, + { + "epoch": 300.2631578947368, + "grad_norm": 1.4652752876281738, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 45640 + }, + { + "epoch": 300.32894736842104, + "grad_norm": 1.098900556564331, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 45650 + }, + { + "epoch": 300.39473684210526, + "grad_norm": 0.6829715371131897, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 45660 + }, + { + "epoch": 300.4605263157895, + "grad_norm": 1.10642409324646, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 45670 + }, + { + "epoch": 300.5263157894737, + "grad_norm": 0.9976691007614136, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 45680 + }, + { + "epoch": 300.5921052631579, + "grad_norm": 1.3558682203292847, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 45690 + }, + { + "epoch": 300.6578947368421, + "grad_norm": 1.131054162979126, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 45700 + }, + { + "epoch": 300.7236842105263, + "grad_norm": 1.3741534948349, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 45710 + }, + { + "epoch": 300.7894736842105, + "grad_norm": 1.3326232433319092, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 45720 + }, + { + "epoch": 300.85526315789474, + "grad_norm": 1.5174059867858887, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 45730 + }, + { + "epoch": 300.92105263157896, + "grad_norm": 1.343550443649292, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 45740 + }, + { + "epoch": 300.9868421052632, + "grad_norm": 1.3771088123321533, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 45750 + }, + { + "epoch": 301.05263157894734, + "grad_norm": 1.3173880577087402, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 45760 + }, + { + "epoch": 301.11842105263156, + "grad_norm": 1.4637528657913208, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 45770 + }, + { + "epoch": 301.1842105263158, + "grad_norm": 1.0429658889770508, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 45780 + }, + { + "epoch": 301.25, + "grad_norm": 1.3570419549942017, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 45790 + }, + { + "epoch": 301.3157894736842, + "grad_norm": 1.0001505613327026, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 45800 + }, + { + "epoch": 301.38157894736844, + "grad_norm": 1.192205786705017, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 45810 + }, + { + "epoch": 301.44736842105266, + "grad_norm": 1.4170939922332764, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 45820 + }, + { + "epoch": 301.5131578947368, + "grad_norm": 1.2274196147918701, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 45830 + }, + { + "epoch": 301.57894736842104, + "grad_norm": 1.2040157318115234, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 45840 + }, + { + "epoch": 301.64473684210526, + "grad_norm": 1.2414194345474243, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 45850 + }, + { + "epoch": 301.7105263157895, + "grad_norm": 1.185573935508728, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 45860 + }, + { + "epoch": 301.7763157894737, + "grad_norm": 1.1964563131332397, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 45870 + }, + { + "epoch": 301.8421052631579, + "grad_norm": 1.363934874534607, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 45880 + }, + { + "epoch": 301.9078947368421, + "grad_norm": 1.0584746599197388, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 45890 + }, + { + "epoch": 301.9736842105263, + "grad_norm": 1.0964704751968384, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 45900 + }, + { + "epoch": 302.0394736842105, + "grad_norm": 0.9291341304779053, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 45910 + }, + { + "epoch": 302.10526315789474, + "grad_norm": 1.3490403890609741, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 45920 + }, + { + "epoch": 302.17105263157896, + "grad_norm": 1.4308061599731445, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 45930 + }, + { + "epoch": 302.2368421052632, + "grad_norm": 1.307144045829773, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 45940 + }, + { + "epoch": 302.30263157894734, + "grad_norm": 1.7374738454818726, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 45950 + }, + { + "epoch": 302.36842105263156, + "grad_norm": 1.5400711297988892, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 45960 + }, + { + "epoch": 302.4342105263158, + "grad_norm": 1.6926573514938354, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 45970 + }, + { + "epoch": 302.5, + "grad_norm": 1.3912023305892944, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 45980 + }, + { + "epoch": 302.5657894736842, + "grad_norm": 1.3487814664840698, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 45990 + }, + { + "epoch": 302.63157894736844, + "grad_norm": 1.3603285551071167, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 46000 + }, + { + "epoch": 302.69736842105266, + "grad_norm": 1.107670545578003, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 46010 + }, + { + "epoch": 302.7631578947368, + "grad_norm": 1.2520265579223633, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 46020 + }, + { + "epoch": 302.82894736842104, + "grad_norm": 1.2443839311599731, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 46030 + }, + { + "epoch": 302.89473684210526, + "grad_norm": 1.4623485803604126, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 46040 + }, + { + "epoch": 302.9605263157895, + "grad_norm": 1.1879698038101196, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 46050 + }, + { + "epoch": 303.0263157894737, + "grad_norm": 1.3571995496749878, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 46060 + }, + { + "epoch": 303.0921052631579, + "grad_norm": 1.199619174003601, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 46070 + }, + { + "epoch": 303.1578947368421, + "grad_norm": 1.4226540327072144, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 46080 + }, + { + "epoch": 303.2236842105263, + "grad_norm": 1.490116834640503, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 46090 + }, + { + "epoch": 303.2894736842105, + "grad_norm": 1.325772762298584, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 46100 + }, + { + "epoch": 303.35526315789474, + "grad_norm": 1.2829171419143677, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 46110 + }, + { + "epoch": 303.42105263157896, + "grad_norm": 1.164736032485962, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 46120 + }, + { + "epoch": 303.4868421052632, + "grad_norm": 1.147537350654602, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 46130 + }, + { + "epoch": 303.55263157894734, + "grad_norm": 0.7918049693107605, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 46140 + }, + { + "epoch": 303.61842105263156, + "grad_norm": 1.1674093008041382, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 46150 + }, + { + "epoch": 303.6842105263158, + "grad_norm": 1.2786973714828491, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 46160 + }, + { + "epoch": 303.75, + "grad_norm": 1.233464241027832, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 46170 + }, + { + "epoch": 303.8157894736842, + "grad_norm": 1.2651668787002563, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 46180 + }, + { + "epoch": 303.88157894736844, + "grad_norm": 1.4438832998275757, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 46190 + }, + { + "epoch": 303.94736842105266, + "grad_norm": 1.30561363697052, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 46200 + }, + { + "epoch": 304.0131578947368, + "grad_norm": 1.2133570909500122, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 46210 + }, + { + "epoch": 304.07894736842104, + "grad_norm": 1.1664997339248657, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 46220 + }, + { + "epoch": 304.14473684210526, + "grad_norm": 0.8901975750923157, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 46230 + }, + { + "epoch": 304.2105263157895, + "grad_norm": 0.9484691619873047, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 46240 + }, + { + "epoch": 304.2763157894737, + "grad_norm": 1.0902068614959717, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 46250 + }, + { + "epoch": 304.3421052631579, + "grad_norm": 0.908757209777832, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 46260 + }, + { + "epoch": 304.4078947368421, + "grad_norm": 1.406864047050476, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 46270 + }, + { + "epoch": 304.4736842105263, + "grad_norm": 1.625902533531189, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 46280 + }, + { + "epoch": 304.5394736842105, + "grad_norm": 1.1790637969970703, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 46290 + }, + { + "epoch": 304.60526315789474, + "grad_norm": 1.3175780773162842, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 46300 + }, + { + "epoch": 304.67105263157896, + "grad_norm": 1.1840614080429077, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 46310 + }, + { + "epoch": 304.7368421052632, + "grad_norm": 1.3935436010360718, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 46320 + }, + { + "epoch": 304.80263157894734, + "grad_norm": 1.0257351398468018, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 46330 + }, + { + "epoch": 304.86842105263156, + "grad_norm": 0.9773437976837158, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 46340 + }, + { + "epoch": 304.9342105263158, + "grad_norm": 1.1705121994018555, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 46350 + }, + { + "epoch": 305.0, + "grad_norm": 1.569278359413147, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 46360 + }, + { + "epoch": 305.0657894736842, + "grad_norm": 1.1362944841384888, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 46370 + }, + { + "epoch": 305.13157894736844, + "grad_norm": 0.9014955759048462, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 46380 + }, + { + "epoch": 305.19736842105266, + "grad_norm": 1.377353549003601, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 46390 + }, + { + "epoch": 305.2631578947368, + "grad_norm": 1.3454692363739014, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 46400 + }, + { + "epoch": 305.32894736842104, + "grad_norm": 1.1798051595687866, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 46410 + }, + { + "epoch": 305.39473684210526, + "grad_norm": 1.0870208740234375, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 46420 + }, + { + "epoch": 305.4605263157895, + "grad_norm": 0.8006429672241211, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 46430 + }, + { + "epoch": 305.5263157894737, + "grad_norm": 0.890498161315918, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 46440 + }, + { + "epoch": 305.5921052631579, + "grad_norm": 1.2902061939239502, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 46450 + }, + { + "epoch": 305.6578947368421, + "grad_norm": 1.146141529083252, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 46460 + }, + { + "epoch": 305.7236842105263, + "grad_norm": 0.9566381573677063, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 46470 + }, + { + "epoch": 305.7894736842105, + "grad_norm": 1.5867373943328857, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 46480 + }, + { + "epoch": 305.85526315789474, + "grad_norm": 1.2137548923492432, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 46490 + }, + { + "epoch": 305.92105263157896, + "grad_norm": 1.305253505706787, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 46500 + }, + { + "epoch": 305.9868421052632, + "grad_norm": 0.8301891088485718, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 46510 + }, + { + "epoch": 306.05263157894734, + "grad_norm": 0.9847671389579773, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 46520 + }, + { + "epoch": 306.11842105263156, + "grad_norm": 1.592354655265808, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 46530 + }, + { + "epoch": 306.1842105263158, + "grad_norm": 1.2078450918197632, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 46540 + }, + { + "epoch": 306.25, + "grad_norm": 1.000321865081787, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 46550 + }, + { + "epoch": 306.3157894736842, + "grad_norm": 1.207289695739746, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 46560 + }, + { + "epoch": 306.38157894736844, + "grad_norm": 1.2162929773330688, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 46570 + }, + { + "epoch": 306.44736842105266, + "grad_norm": 1.147230863571167, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 46580 + }, + { + "epoch": 306.5131578947368, + "grad_norm": 1.2170518636703491, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 46590 + }, + { + "epoch": 306.57894736842104, + "grad_norm": 1.1582554578781128, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 46600 + }, + { + "epoch": 306.64473684210526, + "grad_norm": 1.2563729286193848, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 46610 + }, + { + "epoch": 306.7105263157895, + "grad_norm": 0.9959396123886108, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 46620 + }, + { + "epoch": 306.7763157894737, + "grad_norm": 1.2448713779449463, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 46630 + }, + { + "epoch": 306.8421052631579, + "grad_norm": 1.0271637439727783, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 46640 + }, + { + "epoch": 306.9078947368421, + "grad_norm": 1.0369125604629517, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 46650 + }, + { + "epoch": 306.9736842105263, + "grad_norm": 1.0116463899612427, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 46660 + }, + { + "epoch": 307.0394736842105, + "grad_norm": 1.3489338159561157, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 46670 + }, + { + "epoch": 307.10526315789474, + "grad_norm": 1.214111328125, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 46680 + }, + { + "epoch": 307.17105263157896, + "grad_norm": 1.4091652631759644, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 46690 + }, + { + "epoch": 307.2368421052632, + "grad_norm": 1.3401622772216797, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 46700 + }, + { + "epoch": 307.30263157894734, + "grad_norm": 1.0717833042144775, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 46710 + }, + { + "epoch": 307.36842105263156, + "grad_norm": 1.215564489364624, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 46720 + }, + { + "epoch": 307.4342105263158, + "grad_norm": 1.126434326171875, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 46730 + }, + { + "epoch": 307.5, + "grad_norm": 1.1707587242126465, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 46740 + }, + { + "epoch": 307.5657894736842, + "grad_norm": 1.0138065814971924, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 46750 + }, + { + "epoch": 307.63157894736844, + "grad_norm": 0.8410609364509583, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 46760 + }, + { + "epoch": 307.69736842105266, + "grad_norm": 1.0997475385665894, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 46770 + }, + { + "epoch": 307.7631578947368, + "grad_norm": 1.0100196599960327, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 46780 + }, + { + "epoch": 307.82894736842104, + "grad_norm": 1.0084043741226196, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 46790 + }, + { + "epoch": 307.89473684210526, + "grad_norm": 1.1131081581115723, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 46800 + }, + { + "epoch": 307.9605263157895, + "grad_norm": 0.966968297958374, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 46810 + }, + { + "epoch": 308.0263157894737, + "grad_norm": 0.7994092106819153, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 46820 + }, + { + "epoch": 308.0921052631579, + "grad_norm": 1.2726207971572876, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 46830 + }, + { + "epoch": 308.1578947368421, + "grad_norm": 1.2597211599349976, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 46840 + }, + { + "epoch": 308.2236842105263, + "grad_norm": 1.6768962144851685, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 46850 + }, + { + "epoch": 308.2894736842105, + "grad_norm": 1.068867564201355, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 46860 + }, + { + "epoch": 308.35526315789474, + "grad_norm": 1.2445541620254517, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 46870 + }, + { + "epoch": 308.42105263157896, + "grad_norm": 1.1465497016906738, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 46880 + }, + { + "epoch": 308.4868421052632, + "grad_norm": 1.4472042322158813, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 46890 + }, + { + "epoch": 308.55263157894734, + "grad_norm": 1.262213945388794, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 46900 + }, + { + "epoch": 308.61842105263156, + "grad_norm": 1.1717816591262817, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 46910 + }, + { + "epoch": 308.6842105263158, + "grad_norm": 0.995729923248291, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 46920 + }, + { + "epoch": 308.75, + "grad_norm": 1.394285798072815, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 46930 + }, + { + "epoch": 308.8157894736842, + "grad_norm": 1.8521760702133179, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 46940 + }, + { + "epoch": 308.88157894736844, + "grad_norm": 1.5286895036697388, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 46950 + }, + { + "epoch": 308.94736842105266, + "grad_norm": 0.9966525435447693, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 46960 + }, + { + "epoch": 309.0131578947368, + "grad_norm": 1.0583655834197998, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 46970 + }, + { + "epoch": 309.07894736842104, + "grad_norm": 1.0200626850128174, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 46980 + }, + { + "epoch": 309.14473684210526, + "grad_norm": 1.2817143201828003, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 46990 + }, + { + "epoch": 309.2105263157895, + "grad_norm": 1.4597275257110596, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 47000 + }, + { + "epoch": 309.2763157894737, + "grad_norm": 1.026604413986206, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 47010 + }, + { + "epoch": 309.3421052631579, + "grad_norm": 1.1650899648666382, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 47020 + }, + { + "epoch": 309.4078947368421, + "grad_norm": 1.0153883695602417, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 47030 + }, + { + "epoch": 309.4736842105263, + "grad_norm": 1.2171595096588135, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 47040 + }, + { + "epoch": 309.5394736842105, + "grad_norm": 1.2337496280670166, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 47050 + }, + { + "epoch": 309.60526315789474, + "grad_norm": 1.2742234468460083, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 47060 + }, + { + "epoch": 309.67105263157896, + "grad_norm": 1.307140588760376, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 47070 + }, + { + "epoch": 309.7368421052632, + "grad_norm": 0.6933286190032959, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 47080 + }, + { + "epoch": 309.80263157894734, + "grad_norm": 1.448660135269165, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 47090 + }, + { + "epoch": 309.86842105263156, + "grad_norm": 1.2095212936401367, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 47100 + }, + { + "epoch": 309.9342105263158, + "grad_norm": 1.179511308670044, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 47110 + }, + { + "epoch": 310.0, + "grad_norm": 1.0663738250732422, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 47120 + }, + { + "epoch": 310.0657894736842, + "grad_norm": 1.395041823387146, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 47130 + }, + { + "epoch": 310.13157894736844, + "grad_norm": 1.5173497200012207, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 47140 + }, + { + "epoch": 310.19736842105266, + "grad_norm": 1.4677331447601318, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 47150 + }, + { + "epoch": 310.2631578947368, + "grad_norm": 1.263916015625, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 47160 + }, + { + "epoch": 310.32894736842104, + "grad_norm": 1.796497106552124, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 47170 + }, + { + "epoch": 310.39473684210526, + "grad_norm": 1.4208660125732422, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 47180 + }, + { + "epoch": 310.4605263157895, + "grad_norm": 1.3126176595687866, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 47190 + }, + { + "epoch": 310.5263157894737, + "grad_norm": 1.1969044208526611, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 47200 + }, + { + "epoch": 310.5921052631579, + "grad_norm": 1.3965052366256714, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 47210 + }, + { + "epoch": 310.6578947368421, + "grad_norm": 1.1293995380401611, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 47220 + }, + { + "epoch": 310.7236842105263, + "grad_norm": 1.213976263999939, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 47230 + }, + { + "epoch": 310.7894736842105, + "grad_norm": 1.2718894481658936, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 47240 + }, + { + "epoch": 310.85526315789474, + "grad_norm": 1.3019602298736572, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 47250 + }, + { + "epoch": 310.92105263157896, + "grad_norm": 1.1459777355194092, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 47260 + }, + { + "epoch": 310.9868421052632, + "grad_norm": 0.9818292856216431, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 47270 + }, + { + "epoch": 311.05263157894734, + "grad_norm": 1.271238088607788, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 47280 + }, + { + "epoch": 311.11842105263156, + "grad_norm": 1.5601221323013306, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 47290 + }, + { + "epoch": 311.1842105263158, + "grad_norm": 1.2876839637756348, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 47300 + }, + { + "epoch": 311.25, + "grad_norm": 1.2727855443954468, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 47310 + }, + { + "epoch": 311.3157894736842, + "grad_norm": 1.0672112703323364, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 47320 + }, + { + "epoch": 311.38157894736844, + "grad_norm": 1.3671296834945679, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 47330 + }, + { + "epoch": 311.44736842105266, + "grad_norm": 0.953478991985321, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 47340 + }, + { + "epoch": 311.5131578947368, + "grad_norm": 1.3494404554367065, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 47350 + }, + { + "epoch": 311.57894736842104, + "grad_norm": 1.256812572479248, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 47360 + }, + { + "epoch": 311.64473684210526, + "grad_norm": 1.1076347827911377, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 47370 + }, + { + "epoch": 311.7105263157895, + "grad_norm": 1.1264309883117676, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 47380 + }, + { + "epoch": 311.7763157894737, + "grad_norm": 1.082269310951233, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 47390 + }, + { + "epoch": 311.8421052631579, + "grad_norm": 1.2496287822723389, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 47400 + }, + { + "epoch": 311.9078947368421, + "grad_norm": 1.2073965072631836, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 47410 + }, + { + "epoch": 311.9736842105263, + "grad_norm": 1.4305161237716675, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 47420 + }, + { + "epoch": 312.0394736842105, + "grad_norm": 1.231873631477356, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 47430 + }, + { + "epoch": 312.10526315789474, + "grad_norm": 1.3474899530410767, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 47440 + }, + { + "epoch": 312.17105263157896, + "grad_norm": 1.301870346069336, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 47450 + }, + { + "epoch": 312.2368421052632, + "grad_norm": 1.3354082107543945, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 47460 + }, + { + "epoch": 312.30263157894734, + "grad_norm": 1.112229347229004, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 47470 + }, + { + "epoch": 312.36842105263156, + "grad_norm": 0.764408528804779, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 47480 + }, + { + "epoch": 312.4342105263158, + "grad_norm": 0.8579666018486023, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 47490 + }, + { + "epoch": 312.5, + "grad_norm": 0.7732484340667725, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 47500 + }, + { + "epoch": 312.5657894736842, + "grad_norm": 1.0402549505233765, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 47510 + }, + { + "epoch": 312.63157894736844, + "grad_norm": 0.876788318157196, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 47520 + }, + { + "epoch": 312.69736842105266, + "grad_norm": 0.8869333267211914, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 47530 + }, + { + "epoch": 312.7631578947368, + "grad_norm": 1.4594961404800415, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 47540 + }, + { + "epoch": 312.82894736842104, + "grad_norm": 1.2416234016418457, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 47550 + }, + { + "epoch": 312.89473684210526, + "grad_norm": 1.632511854171753, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 47560 + }, + { + "epoch": 312.9605263157895, + "grad_norm": 1.8223305940628052, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 47570 + }, + { + "epoch": 313.0263157894737, + "grad_norm": 1.4376325607299805, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 47580 + }, + { + "epoch": 313.0921052631579, + "grad_norm": 1.3378567695617676, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 47590 + }, + { + "epoch": 313.1578947368421, + "grad_norm": 1.1414756774902344, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 47600 + }, + { + "epoch": 313.2236842105263, + "grad_norm": 1.2612454891204834, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 47610 + }, + { + "epoch": 313.2894736842105, + "grad_norm": 1.2293567657470703, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 47620 + }, + { + "epoch": 313.35526315789474, + "grad_norm": 1.0517131090164185, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 47630 + }, + { + "epoch": 313.42105263157896, + "grad_norm": 1.222216248512268, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 47640 + }, + { + "epoch": 313.4868421052632, + "grad_norm": 0.9757271409034729, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 47650 + }, + { + "epoch": 313.55263157894734, + "grad_norm": 0.8784149289131165, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 47660 + }, + { + "epoch": 313.61842105263156, + "grad_norm": 1.1013492345809937, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 47670 + }, + { + "epoch": 313.6842105263158, + "grad_norm": 1.3613831996917725, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 47680 + }, + { + "epoch": 313.75, + "grad_norm": 1.004358172416687, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 47690 + }, + { + "epoch": 313.8157894736842, + "grad_norm": 1.2566474676132202, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 47700 + }, + { + "epoch": 313.88157894736844, + "grad_norm": 1.362387776374817, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 47710 + }, + { + "epoch": 313.94736842105266, + "grad_norm": 0.9462046027183533, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 47720 + }, + { + "epoch": 314.0131578947368, + "grad_norm": 1.5367804765701294, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 47730 + }, + { + "epoch": 314.07894736842104, + "grad_norm": 1.370335340499878, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 47740 + }, + { + "epoch": 314.14473684210526, + "grad_norm": 1.0723284482955933, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 47750 + }, + { + "epoch": 314.2105263157895, + "grad_norm": 1.293366551399231, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 47760 + }, + { + "epoch": 314.2763157894737, + "grad_norm": 1.2416729927062988, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 47770 + }, + { + "epoch": 314.3421052631579, + "grad_norm": 0.9720730781555176, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 47780 + }, + { + "epoch": 314.4078947368421, + "grad_norm": 1.1090688705444336, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 47790 + }, + { + "epoch": 314.4736842105263, + "grad_norm": 1.4181932210922241, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 47800 + }, + { + "epoch": 314.5394736842105, + "grad_norm": 1.1214467287063599, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 47810 + }, + { + "epoch": 314.60526315789474, + "grad_norm": 1.126190185546875, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 47820 + }, + { + "epoch": 314.67105263157896, + "grad_norm": 1.3774467706680298, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 47830 + }, + { + "epoch": 314.7368421052632, + "grad_norm": 1.2156517505645752, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 47840 + }, + { + "epoch": 314.80263157894734, + "grad_norm": 0.9228918552398682, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 47850 + }, + { + "epoch": 314.86842105263156, + "grad_norm": 1.2994784116744995, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 47860 + }, + { + "epoch": 314.9342105263158, + "grad_norm": 1.2928920984268188, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 47870 + }, + { + "epoch": 315.0, + "grad_norm": 0.9638012647628784, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 47880 + }, + { + "epoch": 315.0657894736842, + "grad_norm": 0.971041738986969, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 47890 + }, + { + "epoch": 315.13157894736844, + "grad_norm": 0.9184520244598389, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 47900 + }, + { + "epoch": 315.19736842105266, + "grad_norm": 0.8455377221107483, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 47910 + }, + { + "epoch": 315.2631578947368, + "grad_norm": 0.7738654017448425, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 47920 + }, + { + "epoch": 315.32894736842104, + "grad_norm": 1.2264034748077393, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 47930 + }, + { + "epoch": 315.39473684210526, + "grad_norm": 0.7875370979309082, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 47940 + }, + { + "epoch": 315.4605263157895, + "grad_norm": 0.7856130599975586, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 47950 + }, + { + "epoch": 315.5263157894737, + "grad_norm": 1.2851800918579102, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 47960 + }, + { + "epoch": 315.5921052631579, + "grad_norm": 1.3538479804992676, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 47970 + }, + { + "epoch": 315.6578947368421, + "grad_norm": 1.427662968635559, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 47980 + }, + { + "epoch": 315.7236842105263, + "grad_norm": 1.2748855352401733, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 47990 + }, + { + "epoch": 315.7894736842105, + "grad_norm": 1.1658332347869873, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 48000 + }, + { + "epoch": 315.85526315789474, + "grad_norm": 1.1847903728485107, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 48010 + }, + { + "epoch": 315.92105263157896, + "grad_norm": 1.2371987104415894, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 48020 + }, + { + "epoch": 315.9868421052632, + "grad_norm": 1.256600260734558, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 48030 + }, + { + "epoch": 316.05263157894734, + "grad_norm": 1.1105140447616577, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 48040 + }, + { + "epoch": 316.11842105263156, + "grad_norm": 1.4505999088287354, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 48050 + }, + { + "epoch": 316.1842105263158, + "grad_norm": 1.5613057613372803, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 48060 + }, + { + "epoch": 316.25, + "grad_norm": 1.2023935317993164, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 48070 + }, + { + "epoch": 316.3157894736842, + "grad_norm": 1.123512864112854, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 48080 + }, + { + "epoch": 316.38157894736844, + "grad_norm": 1.049616813659668, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 48090 + }, + { + "epoch": 316.44736842105266, + "grad_norm": 0.9086410403251648, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 48100 + }, + { + "epoch": 316.5131578947368, + "grad_norm": 0.9956292510032654, + "learning_rate": 0.0001, + "loss": 0.0192, + "step": 48110 + }, + { + "epoch": 316.57894736842104, + "grad_norm": 1.0076509714126587, + "learning_rate": 0.0001, + "loss": 0.0167, + "step": 48120 + }, + { + "epoch": 316.64473684210526, + "grad_norm": 0.9078097343444824, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 48130 + }, + { + "epoch": 316.7105263157895, + "grad_norm": 1.2351521253585815, + "learning_rate": 0.0001, + "loss": 0.018, + "step": 48140 + }, + { + "epoch": 316.7763157894737, + "grad_norm": 0.9075007438659668, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 48150 + }, + { + "epoch": 316.8421052631579, + "grad_norm": 1.0333648920059204, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 48160 + }, + { + "epoch": 316.9078947368421, + "grad_norm": 1.42146635055542, + "learning_rate": 0.0001, + "loss": 0.0204, + "step": 48170 + }, + { + "epoch": 316.9736842105263, + "grad_norm": 1.4142286777496338, + "learning_rate": 0.0001, + "loss": 0.0182, + "step": 48180 + }, + { + "epoch": 317.0394736842105, + "grad_norm": 1.693477749824524, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 48190 + }, + { + "epoch": 317.10526315789474, + "grad_norm": 1.183012843132019, + "learning_rate": 0.0001, + "loss": 0.0203, + "step": 48200 + }, + { + "epoch": 317.17105263157896, + "grad_norm": 1.2209391593933105, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 48210 + }, + { + "epoch": 317.2368421052632, + "grad_norm": 1.294472098350525, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 48220 + }, + { + "epoch": 317.30263157894734, + "grad_norm": 1.6887290477752686, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 48230 + }, + { + "epoch": 317.36842105263156, + "grad_norm": 1.9167300462722778, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 48240 + }, + { + "epoch": 317.4342105263158, + "grad_norm": 1.2060270309448242, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 48250 + }, + { + "epoch": 317.5, + "grad_norm": 0.9500623941421509, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 48260 + }, + { + "epoch": 317.5657894736842, + "grad_norm": 1.399133563041687, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 48270 + }, + { + "epoch": 317.63157894736844, + "grad_norm": 1.2392226457595825, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 48280 + }, + { + "epoch": 317.69736842105266, + "grad_norm": 1.2007087469100952, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 48290 + }, + { + "epoch": 317.7631578947368, + "grad_norm": 1.4812746047973633, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 48300 + }, + { + "epoch": 317.82894736842104, + "grad_norm": 1.3363618850708008, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 48310 + }, + { + "epoch": 317.89473684210526, + "grad_norm": 1.3857090473175049, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 48320 + }, + { + "epoch": 317.9605263157895, + "grad_norm": 1.5482040643692017, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 48330 + }, + { + "epoch": 318.0263157894737, + "grad_norm": 1.3046433925628662, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 48340 + }, + { + "epoch": 318.0921052631579, + "grad_norm": 1.4874904155731201, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 48350 + }, + { + "epoch": 318.1578947368421, + "grad_norm": 1.252055048942566, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 48360 + }, + { + "epoch": 318.2236842105263, + "grad_norm": 1.297682762145996, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 48370 + }, + { + "epoch": 318.2894736842105, + "grad_norm": 1.7662724256515503, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 48380 + }, + { + "epoch": 318.35526315789474, + "grad_norm": 1.5304863452911377, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 48390 + }, + { + "epoch": 318.42105263157896, + "grad_norm": 1.3960685729980469, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 48400 + }, + { + "epoch": 318.4868421052632, + "grad_norm": 1.506137490272522, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 48410 + }, + { + "epoch": 318.55263157894734, + "grad_norm": 1.1885483264923096, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 48420 + }, + { + "epoch": 318.61842105263156, + "grad_norm": 1.2131962776184082, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 48430 + }, + { + "epoch": 318.6842105263158, + "grad_norm": 1.2580586671829224, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 48440 + }, + { + "epoch": 318.75, + "grad_norm": 1.2965505123138428, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 48450 + }, + { + "epoch": 318.8157894736842, + "grad_norm": 1.243948221206665, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 48460 + }, + { + "epoch": 318.88157894736844, + "grad_norm": 0.9847695231437683, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 48470 + }, + { + "epoch": 318.94736842105266, + "grad_norm": 1.0571852922439575, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 48480 + }, + { + "epoch": 319.0131578947368, + "grad_norm": 0.8944369554519653, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 48490 + }, + { + "epoch": 319.07894736842104, + "grad_norm": 1.3923020362854004, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 48500 + }, + { + "epoch": 319.14473684210526, + "grad_norm": 1.0978920459747314, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 48510 + }, + { + "epoch": 319.2105263157895, + "grad_norm": 1.073713779449463, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 48520 + }, + { + "epoch": 319.2763157894737, + "grad_norm": 0.8668067455291748, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 48530 + }, + { + "epoch": 319.3421052631579, + "grad_norm": 0.8067622184753418, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 48540 + }, + { + "epoch": 319.4078947368421, + "grad_norm": 0.9731525182723999, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 48550 + }, + { + "epoch": 319.4736842105263, + "grad_norm": 0.9085482358932495, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 48560 + }, + { + "epoch": 319.5394736842105, + "grad_norm": 1.2935781478881836, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 48570 + }, + { + "epoch": 319.60526315789474, + "grad_norm": 2.343533515930176, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 48580 + }, + { + "epoch": 319.67105263157896, + "grad_norm": 1.5465039014816284, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 48590 + }, + { + "epoch": 319.7368421052632, + "grad_norm": 1.2827335596084595, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 48600 + }, + { + "epoch": 319.80263157894734, + "grad_norm": 1.5653749704360962, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 48610 + }, + { + "epoch": 319.86842105263156, + "grad_norm": 1.6465096473693848, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 48620 + }, + { + "epoch": 319.9342105263158, + "grad_norm": 1.4881956577301025, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 48630 + }, + { + "epoch": 320.0, + "grad_norm": 1.5080645084381104, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 48640 + }, + { + "epoch": 320.0657894736842, + "grad_norm": 1.516532063484192, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 48650 + }, + { + "epoch": 320.13157894736844, + "grad_norm": 1.2939443588256836, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 48660 + }, + { + "epoch": 320.19736842105266, + "grad_norm": 1.2543706893920898, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 48670 + }, + { + "epoch": 320.2631578947368, + "grad_norm": 1.0210703611373901, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 48680 + }, + { + "epoch": 320.32894736842104, + "grad_norm": 1.2187316417694092, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 48690 + }, + { + "epoch": 320.39473684210526, + "grad_norm": 0.8297109007835388, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 48700 + }, + { + "epoch": 320.4605263157895, + "grad_norm": 1.0933237075805664, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 48710 + }, + { + "epoch": 320.5263157894737, + "grad_norm": 0.9736988544464111, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 48720 + }, + { + "epoch": 320.5921052631579, + "grad_norm": 1.174633502960205, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 48730 + }, + { + "epoch": 320.6578947368421, + "grad_norm": 1.419814109802246, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 48740 + }, + { + "epoch": 320.7236842105263, + "grad_norm": 0.9784946441650391, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 48750 + }, + { + "epoch": 320.7894736842105, + "grad_norm": 0.8846175670623779, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 48760 + }, + { + "epoch": 320.85526315789474, + "grad_norm": 1.217024326324463, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 48770 + }, + { + "epoch": 320.92105263157896, + "grad_norm": 1.0269222259521484, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 48780 + }, + { + "epoch": 320.9868421052632, + "grad_norm": 1.176324486732483, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 48790 + }, + { + "epoch": 321.05263157894734, + "grad_norm": 1.1904447078704834, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 48800 + }, + { + "epoch": 321.11842105263156, + "grad_norm": 1.0044549703598022, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 48810 + }, + { + "epoch": 321.1842105263158, + "grad_norm": 1.2409372329711914, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 48820 + }, + { + "epoch": 321.25, + "grad_norm": 1.3968594074249268, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 48830 + }, + { + "epoch": 321.3157894736842, + "grad_norm": 1.2159334421157837, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 48840 + }, + { + "epoch": 321.38157894736844, + "grad_norm": 1.4838895797729492, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 48850 + }, + { + "epoch": 321.44736842105266, + "grad_norm": 1.58181893825531, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 48860 + }, + { + "epoch": 321.5131578947368, + "grad_norm": 1.1558905839920044, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 48870 + }, + { + "epoch": 321.57894736842104, + "grad_norm": 1.1674673557281494, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 48880 + }, + { + "epoch": 321.64473684210526, + "grad_norm": 1.2923530340194702, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 48890 + }, + { + "epoch": 321.7105263157895, + "grad_norm": 1.4258642196655273, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 48900 + }, + { + "epoch": 321.7763157894737, + "grad_norm": 1.1080553531646729, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 48910 + }, + { + "epoch": 321.8421052631579, + "grad_norm": 1.5684802532196045, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 48920 + }, + { + "epoch": 321.9078947368421, + "grad_norm": 1.5006873607635498, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 48930 + }, + { + "epoch": 321.9736842105263, + "grad_norm": 1.164925217628479, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 48940 + }, + { + "epoch": 322.0394736842105, + "grad_norm": 1.4386217594146729, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 48950 + }, + { + "epoch": 322.10526315789474, + "grad_norm": 1.5981786251068115, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 48960 + }, + { + "epoch": 322.17105263157896, + "grad_norm": 1.1611731052398682, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 48970 + }, + { + "epoch": 322.2368421052632, + "grad_norm": 1.0494667291641235, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 48980 + }, + { + "epoch": 322.30263157894734, + "grad_norm": 1.397523045539856, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 48990 + }, + { + "epoch": 322.36842105263156, + "grad_norm": 1.5347926616668701, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 49000 + }, + { + "epoch": 322.4342105263158, + "grad_norm": 1.1124234199523926, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 49010 + }, + { + "epoch": 322.5, + "grad_norm": 1.2991044521331787, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 49020 + }, + { + "epoch": 322.5657894736842, + "grad_norm": 1.0345944166183472, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 49030 + }, + { + "epoch": 322.63157894736844, + "grad_norm": 1.2403799295425415, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 49040 + }, + { + "epoch": 322.69736842105266, + "grad_norm": 1.202883005142212, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 49050 + }, + { + "epoch": 322.7631578947368, + "grad_norm": 1.2251741886138916, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 49060 + }, + { + "epoch": 322.82894736842104, + "grad_norm": 1.258322834968567, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 49070 + }, + { + "epoch": 322.89473684210526, + "grad_norm": 1.1949845552444458, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 49080 + }, + { + "epoch": 322.9605263157895, + "grad_norm": 1.3184500932693481, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 49090 + }, + { + "epoch": 323.0263157894737, + "grad_norm": 1.1563595533370972, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 49100 + }, + { + "epoch": 323.0921052631579, + "grad_norm": 1.0812554359436035, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 49110 + }, + { + "epoch": 323.1578947368421, + "grad_norm": 1.1537151336669922, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 49120 + }, + { + "epoch": 323.2236842105263, + "grad_norm": 1.4239085912704468, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 49130 + }, + { + "epoch": 323.2894736842105, + "grad_norm": 1.167175531387329, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 49140 + }, + { + "epoch": 323.35526315789474, + "grad_norm": 1.0651171207427979, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 49150 + }, + { + "epoch": 323.42105263157896, + "grad_norm": 1.1401432752609253, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 49160 + }, + { + "epoch": 323.4868421052632, + "grad_norm": 1.0716900825500488, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 49170 + }, + { + "epoch": 323.55263157894734, + "grad_norm": 0.9888905882835388, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 49180 + }, + { + "epoch": 323.61842105263156, + "grad_norm": 1.1560784578323364, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 49190 + }, + { + "epoch": 323.6842105263158, + "grad_norm": 1.1108367443084717, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 49200 + }, + { + "epoch": 323.75, + "grad_norm": 0.7860233783721924, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 49210 + }, + { + "epoch": 323.8157894736842, + "grad_norm": 1.1012498140335083, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 49220 + }, + { + "epoch": 323.88157894736844, + "grad_norm": 0.9723058938980103, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 49230 + }, + { + "epoch": 323.94736842105266, + "grad_norm": 1.2430423498153687, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 49240 + }, + { + "epoch": 324.0131578947368, + "grad_norm": 1.3332782983779907, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 49250 + }, + { + "epoch": 324.07894736842104, + "grad_norm": 1.341118335723877, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 49260 + }, + { + "epoch": 324.14473684210526, + "grad_norm": 1.2712905406951904, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 49270 + }, + { + "epoch": 324.2105263157895, + "grad_norm": 0.7532151937484741, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 49280 + }, + { + "epoch": 324.2763157894737, + "grad_norm": 1.4031445980072021, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 49290 + }, + { + "epoch": 324.3421052631579, + "grad_norm": 0.9208416938781738, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 49300 + }, + { + "epoch": 324.4078947368421, + "grad_norm": 1.2247257232666016, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 49310 + }, + { + "epoch": 324.4736842105263, + "grad_norm": 1.0223255157470703, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 49320 + }, + { + "epoch": 324.5394736842105, + "grad_norm": 1.1954952478408813, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 49330 + }, + { + "epoch": 324.60526315789474, + "grad_norm": 1.2614434957504272, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 49340 + }, + { + "epoch": 324.67105263157896, + "grad_norm": 1.0313371419906616, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 49350 + }, + { + "epoch": 324.7368421052632, + "grad_norm": 1.0749729871749878, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 49360 + }, + { + "epoch": 324.80263157894734, + "grad_norm": 1.2034530639648438, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 49370 + }, + { + "epoch": 324.86842105263156, + "grad_norm": 1.1916738748550415, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 49380 + }, + { + "epoch": 324.9342105263158, + "grad_norm": 0.9107323288917542, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 49390 + }, + { + "epoch": 325.0, + "grad_norm": 1.1662636995315552, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 49400 + }, + { + "epoch": 325.0657894736842, + "grad_norm": 1.0498578548431396, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 49410 + }, + { + "epoch": 325.13157894736844, + "grad_norm": 1.193319320678711, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 49420 + }, + { + "epoch": 325.19736842105266, + "grad_norm": 1.2099950313568115, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 49430 + }, + { + "epoch": 325.2631578947368, + "grad_norm": 1.0245522260665894, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 49440 + }, + { + "epoch": 325.32894736842104, + "grad_norm": 1.201566219329834, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 49450 + }, + { + "epoch": 325.39473684210526, + "grad_norm": 0.9498182535171509, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 49460 + }, + { + "epoch": 325.4605263157895, + "grad_norm": 1.081298828125, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 49470 + }, + { + "epoch": 325.5263157894737, + "grad_norm": 1.2874219417572021, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 49480 + }, + { + "epoch": 325.5921052631579, + "grad_norm": 1.1696557998657227, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 49490 + }, + { + "epoch": 325.6578947368421, + "grad_norm": 1.0769000053405762, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 49500 + }, + { + "epoch": 325.7236842105263, + "grad_norm": 1.134009838104248, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 49510 + }, + { + "epoch": 325.7894736842105, + "grad_norm": 0.9424777626991272, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 49520 + }, + { + "epoch": 325.85526315789474, + "grad_norm": 1.0407861471176147, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 49530 + }, + { + "epoch": 325.92105263157896, + "grad_norm": 0.9889567494392395, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 49540 + }, + { + "epoch": 325.9868421052632, + "grad_norm": 0.6618396043777466, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 49550 + }, + { + "epoch": 326.05263157894734, + "grad_norm": 1.2362364530563354, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 49560 + }, + { + "epoch": 326.11842105263156, + "grad_norm": 1.2794060707092285, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 49570 + }, + { + "epoch": 326.1842105263158, + "grad_norm": 1.1880918741226196, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 49580 + }, + { + "epoch": 326.25, + "grad_norm": 1.0328410863876343, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 49590 + }, + { + "epoch": 326.3157894736842, + "grad_norm": 1.3989026546478271, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 49600 + }, + { + "epoch": 326.38157894736844, + "grad_norm": 0.8209923505783081, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 49610 + }, + { + "epoch": 326.44736842105266, + "grad_norm": 1.04630446434021, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 49620 + }, + { + "epoch": 326.5131578947368, + "grad_norm": 1.4371641874313354, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 49630 + }, + { + "epoch": 326.57894736842104, + "grad_norm": 1.1267870664596558, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 49640 + }, + { + "epoch": 326.64473684210526, + "grad_norm": 1.2246888875961304, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 49650 + }, + { + "epoch": 326.7105263157895, + "grad_norm": 1.1389400959014893, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 49660 + }, + { + "epoch": 326.7763157894737, + "grad_norm": 0.806943953037262, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 49670 + }, + { + "epoch": 326.8421052631579, + "grad_norm": 1.1246274709701538, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 49680 + }, + { + "epoch": 326.9078947368421, + "grad_norm": 1.3414881229400635, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 49690 + }, + { + "epoch": 326.9736842105263, + "grad_norm": 0.9751403331756592, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 49700 + }, + { + "epoch": 327.0394736842105, + "grad_norm": 0.9873465299606323, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 49710 + }, + { + "epoch": 327.10526315789474, + "grad_norm": 1.0995386838912964, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 49720 + }, + { + "epoch": 327.17105263157896, + "grad_norm": 1.18962824344635, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 49730 + }, + { + "epoch": 327.2368421052632, + "grad_norm": 1.1961159706115723, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 49740 + }, + { + "epoch": 327.30263157894734, + "grad_norm": 1.5299499034881592, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 49750 + }, + { + "epoch": 327.36842105263156, + "grad_norm": 1.2997653484344482, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 49760 + }, + { + "epoch": 327.4342105263158, + "grad_norm": 1.19481360912323, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 49770 + }, + { + "epoch": 327.5, + "grad_norm": 1.1865720748901367, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 49780 + }, + { + "epoch": 327.5657894736842, + "grad_norm": 1.3562146425247192, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 49790 + }, + { + "epoch": 327.63157894736844, + "grad_norm": 1.2696094512939453, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 49800 + }, + { + "epoch": 327.69736842105266, + "grad_norm": 1.2589432001113892, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 49810 + }, + { + "epoch": 327.7631578947368, + "grad_norm": 0.9365496039390564, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 49820 + }, + { + "epoch": 327.82894736842104, + "grad_norm": 1.6220346689224243, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 49830 + }, + { + "epoch": 327.89473684210526, + "grad_norm": 1.4582602977752686, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 49840 + }, + { + "epoch": 327.9605263157895, + "grad_norm": 1.0291483402252197, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 49850 + }, + { + "epoch": 328.0263157894737, + "grad_norm": 1.1116678714752197, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 49860 + }, + { + "epoch": 328.0921052631579, + "grad_norm": 1.533398985862732, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 49870 + }, + { + "epoch": 328.1578947368421, + "grad_norm": 1.562609076499939, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 49880 + }, + { + "epoch": 328.2236842105263, + "grad_norm": 1.3016607761383057, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 49890 + }, + { + "epoch": 328.2894736842105, + "grad_norm": 1.1702256202697754, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 49900 + }, + { + "epoch": 328.35526315789474, + "grad_norm": 1.1809632778167725, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 49910 + }, + { + "epoch": 328.42105263157896, + "grad_norm": 1.109625220298767, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 49920 + }, + { + "epoch": 328.4868421052632, + "grad_norm": 1.376266360282898, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 49930 + }, + { + "epoch": 328.55263157894734, + "grad_norm": 1.2670068740844727, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 49940 + }, + { + "epoch": 328.61842105263156, + "grad_norm": 1.1520689725875854, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 49950 + }, + { + "epoch": 328.6842105263158, + "grad_norm": 1.171880841255188, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 49960 + }, + { + "epoch": 328.75, + "grad_norm": 1.2454252243041992, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 49970 + }, + { + "epoch": 328.8157894736842, + "grad_norm": 0.9792435169219971, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 49980 + }, + { + "epoch": 328.88157894736844, + "grad_norm": 1.4465330839157104, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 49990 + }, + { + "epoch": 328.94736842105266, + "grad_norm": 1.2347135543823242, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 50000 + }, + { + "epoch": 329.0131578947368, + "grad_norm": 1.15986967086792, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 50010 + }, + { + "epoch": 329.07894736842104, + "grad_norm": 1.2300969362258911, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 50020 + }, + { + "epoch": 329.14473684210526, + "grad_norm": 0.9923980832099915, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 50030 + }, + { + "epoch": 329.2105263157895, + "grad_norm": 1.0011688470840454, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 50040 + }, + { + "epoch": 329.2763157894737, + "grad_norm": 0.9253886938095093, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 50050 + }, + { + "epoch": 329.3421052631579, + "grad_norm": 0.8614893555641174, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 50060 + }, + { + "epoch": 329.4078947368421, + "grad_norm": 1.411759614944458, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 50070 + }, + { + "epoch": 329.4736842105263, + "grad_norm": 0.7994163632392883, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 50080 + }, + { + "epoch": 329.5394736842105, + "grad_norm": 0.9142425060272217, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 50090 + }, + { + "epoch": 329.60526315789474, + "grad_norm": 0.7740334272384644, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 50100 + }, + { + "epoch": 329.67105263157896, + "grad_norm": 0.847666323184967, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 50110 + }, + { + "epoch": 329.7368421052632, + "grad_norm": 1.1491421461105347, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 50120 + }, + { + "epoch": 329.80263157894734, + "grad_norm": 0.9505961537361145, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 50130 + }, + { + "epoch": 329.86842105263156, + "grad_norm": 1.0270600318908691, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 50140 + }, + { + "epoch": 329.9342105263158, + "grad_norm": 1.1158539056777954, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 50150 + }, + { + "epoch": 330.0, + "grad_norm": 1.6066099405288696, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 50160 + }, + { + "epoch": 330.0657894736842, + "grad_norm": 1.3317687511444092, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 50170 + }, + { + "epoch": 330.13157894736844, + "grad_norm": 1.1555328369140625, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 50180 + }, + { + "epoch": 330.19736842105266, + "grad_norm": 1.1379915475845337, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 50190 + }, + { + "epoch": 330.2631578947368, + "grad_norm": 1.0150611400604248, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 50200 + }, + { + "epoch": 330.32894736842104, + "grad_norm": 1.422431468963623, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 50210 + }, + { + "epoch": 330.39473684210526, + "grad_norm": 1.1463091373443604, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 50220 + }, + { + "epoch": 330.4605263157895, + "grad_norm": 1.2713172435760498, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 50230 + }, + { + "epoch": 330.5263157894737, + "grad_norm": 1.2657397985458374, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 50240 + }, + { + "epoch": 330.5921052631579, + "grad_norm": 1.109223484992981, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 50250 + }, + { + "epoch": 330.6578947368421, + "grad_norm": 1.1030226945877075, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 50260 + }, + { + "epoch": 330.7236842105263, + "grad_norm": 1.2392481565475464, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 50270 + }, + { + "epoch": 330.7894736842105, + "grad_norm": 1.1051968336105347, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 50280 + }, + { + "epoch": 330.85526315789474, + "grad_norm": 1.110648274421692, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 50290 + }, + { + "epoch": 330.92105263157896, + "grad_norm": 1.3679182529449463, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 50300 + }, + { + "epoch": 330.9868421052632, + "grad_norm": 1.1558316946029663, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 50310 + }, + { + "epoch": 331.05263157894734, + "grad_norm": 0.9238816499710083, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 50320 + }, + { + "epoch": 331.11842105263156, + "grad_norm": 1.363661766052246, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 50330 + }, + { + "epoch": 331.1842105263158, + "grad_norm": 1.5912078619003296, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 50340 + }, + { + "epoch": 331.25, + "grad_norm": 1.1807664632797241, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 50350 + }, + { + "epoch": 331.3157894736842, + "grad_norm": 1.0620604753494263, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 50360 + }, + { + "epoch": 331.38157894736844, + "grad_norm": 1.0566785335540771, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 50370 + }, + { + "epoch": 331.44736842105266, + "grad_norm": 0.9269898533821106, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 50380 + }, + { + "epoch": 331.5131578947368, + "grad_norm": 1.0795518159866333, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 50390 + }, + { + "epoch": 331.57894736842104, + "grad_norm": 1.1941287517547607, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 50400 + }, + { + "epoch": 331.64473684210526, + "grad_norm": 1.3070482015609741, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 50410 + }, + { + "epoch": 331.7105263157895, + "grad_norm": 1.1180983781814575, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 50420 + }, + { + "epoch": 331.7763157894737, + "grad_norm": 1.0409196615219116, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 50430 + }, + { + "epoch": 331.8421052631579, + "grad_norm": 1.5609703063964844, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 50440 + }, + { + "epoch": 331.9078947368421, + "grad_norm": 1.2838364839553833, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 50450 + }, + { + "epoch": 331.9736842105263, + "grad_norm": 1.4142061471939087, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 50460 + }, + { + "epoch": 332.0394736842105, + "grad_norm": 1.0563913583755493, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 50470 + }, + { + "epoch": 332.10526315789474, + "grad_norm": 1.106583833694458, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 50480 + }, + { + "epoch": 332.17105263157896, + "grad_norm": 1.0735349655151367, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 50490 + }, + { + "epoch": 332.2368421052632, + "grad_norm": 1.5495939254760742, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 50500 + }, + { + "epoch": 332.30263157894734, + "grad_norm": 1.1184154748916626, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 50510 + }, + { + "epoch": 332.36842105263156, + "grad_norm": 1.4668242931365967, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 50520 + }, + { + "epoch": 332.4342105263158, + "grad_norm": 1.1980876922607422, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 50530 + }, + { + "epoch": 332.5, + "grad_norm": 1.3636940717697144, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 50540 + }, + { + "epoch": 332.5657894736842, + "grad_norm": 1.2938543558120728, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 50550 + }, + { + "epoch": 332.63157894736844, + "grad_norm": 1.1288648843765259, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 50560 + }, + { + "epoch": 332.69736842105266, + "grad_norm": 1.0192151069641113, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 50570 + }, + { + "epoch": 332.7631578947368, + "grad_norm": 1.2933300733566284, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 50580 + }, + { + "epoch": 332.82894736842104, + "grad_norm": 1.2441681623458862, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 50590 + }, + { + "epoch": 332.89473684210526, + "grad_norm": 1.2795456647872925, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 50600 + }, + { + "epoch": 332.9605263157895, + "grad_norm": 1.3279870748519897, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 50610 + }, + { + "epoch": 333.0263157894737, + "grad_norm": 1.3241667747497559, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 50620 + }, + { + "epoch": 333.0921052631579, + "grad_norm": 0.7530394196510315, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 50630 + }, + { + "epoch": 333.1578947368421, + "grad_norm": 1.23948073387146, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 50640 + }, + { + "epoch": 333.2236842105263, + "grad_norm": 1.1512643098831177, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 50650 + }, + { + "epoch": 333.2894736842105, + "grad_norm": 1.1450495719909668, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 50660 + }, + { + "epoch": 333.35526315789474, + "grad_norm": 1.0570194721221924, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 50670 + }, + { + "epoch": 333.42105263157896, + "grad_norm": 1.017793893814087, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 50680 + }, + { + "epoch": 333.4868421052632, + "grad_norm": 1.2568644285202026, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 50690 + }, + { + "epoch": 333.55263157894734, + "grad_norm": 1.0966501235961914, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 50700 + }, + { + "epoch": 333.61842105263156, + "grad_norm": 1.404511570930481, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 50710 + }, + { + "epoch": 333.6842105263158, + "grad_norm": 0.9019613265991211, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 50720 + }, + { + "epoch": 333.75, + "grad_norm": 1.206984519958496, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 50730 + }, + { + "epoch": 333.8157894736842, + "grad_norm": 0.9531241059303284, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 50740 + }, + { + "epoch": 333.88157894736844, + "grad_norm": 1.3552753925323486, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 50750 + }, + { + "epoch": 333.94736842105266, + "grad_norm": 1.5873048305511475, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 50760 + }, + { + "epoch": 334.0131578947368, + "grad_norm": 1.065086007118225, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 50770 + }, + { + "epoch": 334.07894736842104, + "grad_norm": 1.4782254695892334, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 50780 + }, + { + "epoch": 334.14473684210526, + "grad_norm": 1.0018125772476196, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 50790 + }, + { + "epoch": 334.2105263157895, + "grad_norm": 1.2389479875564575, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 50800 + }, + { + "epoch": 334.2763157894737, + "grad_norm": 1.1885042190551758, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 50810 + }, + { + "epoch": 334.3421052631579, + "grad_norm": 0.9912749528884888, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 50820 + }, + { + "epoch": 334.4078947368421, + "grad_norm": 1.206180214881897, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 50830 + }, + { + "epoch": 334.4736842105263, + "grad_norm": 1.087846279144287, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 50840 + }, + { + "epoch": 334.5394736842105, + "grad_norm": 1.4583332538604736, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 50850 + }, + { + "epoch": 334.60526315789474, + "grad_norm": 1.2611576318740845, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 50860 + }, + { + "epoch": 334.67105263157896, + "grad_norm": 1.2593576908111572, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 50870 + }, + { + "epoch": 334.7368421052632, + "grad_norm": 0.8674020171165466, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 50880 + }, + { + "epoch": 334.80263157894734, + "grad_norm": 0.9973218441009521, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 50890 + }, + { + "epoch": 334.86842105263156, + "grad_norm": 1.1739544868469238, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 50900 + }, + { + "epoch": 334.9342105263158, + "grad_norm": 1.2401018142700195, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 50910 + }, + { + "epoch": 335.0, + "grad_norm": 1.03706955909729, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 50920 + }, + { + "epoch": 335.0657894736842, + "grad_norm": 1.213478684425354, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 50930 + }, + { + "epoch": 335.13157894736844, + "grad_norm": 1.3878175020217896, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 50940 + }, + { + "epoch": 335.19736842105266, + "grad_norm": 1.3380154371261597, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 50950 + }, + { + "epoch": 335.2631578947368, + "grad_norm": 0.9307578802108765, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 50960 + }, + { + "epoch": 335.32894736842104, + "grad_norm": 1.1519443988800049, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 50970 + }, + { + "epoch": 335.39473684210526, + "grad_norm": 1.3810474872589111, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 50980 + }, + { + "epoch": 335.4605263157895, + "grad_norm": 1.1275713443756104, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 50990 + }, + { + "epoch": 335.5263157894737, + "grad_norm": 1.1235930919647217, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 51000 + }, + { + "epoch": 335.5921052631579, + "grad_norm": 1.6423934698104858, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 51010 + }, + { + "epoch": 335.6578947368421, + "grad_norm": 1.104622721672058, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 51020 + }, + { + "epoch": 335.7236842105263, + "grad_norm": 1.2324247360229492, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 51030 + }, + { + "epoch": 335.7894736842105, + "grad_norm": 1.1801762580871582, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 51040 + }, + { + "epoch": 335.85526315789474, + "grad_norm": 1.164038896560669, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 51050 + }, + { + "epoch": 335.92105263157896, + "grad_norm": 0.7976166009902954, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 51060 + }, + { + "epoch": 335.9868421052632, + "grad_norm": 1.0072071552276611, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 51070 + }, + { + "epoch": 336.05263157894734, + "grad_norm": 0.8578206896781921, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 51080 + }, + { + "epoch": 336.11842105263156, + "grad_norm": 1.0572422742843628, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 51090 + }, + { + "epoch": 336.1842105263158, + "grad_norm": 0.9827244281768799, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 51100 + }, + { + "epoch": 336.25, + "grad_norm": 1.35405695438385, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 51110 + }, + { + "epoch": 336.3157894736842, + "grad_norm": 0.9394972920417786, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 51120 + }, + { + "epoch": 336.38157894736844, + "grad_norm": 1.3125892877578735, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 51130 + }, + { + "epoch": 336.44736842105266, + "grad_norm": 1.182879090309143, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 51140 + }, + { + "epoch": 336.5131578947368, + "grad_norm": 0.9829082489013672, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 51150 + }, + { + "epoch": 336.57894736842104, + "grad_norm": 1.1734791994094849, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 51160 + }, + { + "epoch": 336.64473684210526, + "grad_norm": 1.2387040853500366, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 51170 + }, + { + "epoch": 336.7105263157895, + "grad_norm": 1.2995988130569458, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 51180 + }, + { + "epoch": 336.7763157894737, + "grad_norm": 1.099161982536316, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 51190 + }, + { + "epoch": 336.8421052631579, + "grad_norm": 1.1866588592529297, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 51200 + }, + { + "epoch": 336.9078947368421, + "grad_norm": 1.2611371278762817, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 51210 + }, + { + "epoch": 336.9736842105263, + "grad_norm": 1.6526949405670166, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 51220 + }, + { + "epoch": 337.0394736842105, + "grad_norm": 1.461340069770813, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 51230 + }, + { + "epoch": 337.10526315789474, + "grad_norm": 1.127563238143921, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 51240 + }, + { + "epoch": 337.17105263157896, + "grad_norm": 1.2652572393417358, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 51250 + }, + { + "epoch": 337.2368421052632, + "grad_norm": 1.25266695022583, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 51260 + }, + { + "epoch": 337.30263157894734, + "grad_norm": 1.2490793466567993, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 51270 + }, + { + "epoch": 337.36842105263156, + "grad_norm": 1.2546192407608032, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 51280 + }, + { + "epoch": 337.4342105263158, + "grad_norm": 1.2390152215957642, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 51290 + }, + { + "epoch": 337.5, + "grad_norm": 1.2711284160614014, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 51300 + }, + { + "epoch": 337.5657894736842, + "grad_norm": 1.480649471282959, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 51310 + }, + { + "epoch": 337.63157894736844, + "grad_norm": 1.043129563331604, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 51320 + }, + { + "epoch": 337.69736842105266, + "grad_norm": 0.9065507054328918, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 51330 + }, + { + "epoch": 337.7631578947368, + "grad_norm": 1.0650672912597656, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 51340 + }, + { + "epoch": 337.82894736842104, + "grad_norm": 1.24941086769104, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 51350 + }, + { + "epoch": 337.89473684210526, + "grad_norm": 1.2355936765670776, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 51360 + }, + { + "epoch": 337.9605263157895, + "grad_norm": 1.2188667058944702, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 51370 + }, + { + "epoch": 338.0263157894737, + "grad_norm": 1.136441707611084, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 51380 + }, + { + "epoch": 338.0921052631579, + "grad_norm": 1.5337886810302734, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 51390 + }, + { + "epoch": 338.1578947368421, + "grad_norm": 1.4767624139785767, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 51400 + }, + { + "epoch": 338.2236842105263, + "grad_norm": 1.1894986629486084, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 51410 + }, + { + "epoch": 338.2894736842105, + "grad_norm": 1.316213846206665, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 51420 + }, + { + "epoch": 338.35526315789474, + "grad_norm": 1.614834189414978, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 51430 + }, + { + "epoch": 338.42105263157896, + "grad_norm": 1.146628737449646, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 51440 + }, + { + "epoch": 338.4868421052632, + "grad_norm": 1.2770928144454956, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 51450 + }, + { + "epoch": 338.55263157894734, + "grad_norm": 1.2232598066329956, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 51460 + }, + { + "epoch": 338.61842105263156, + "grad_norm": 1.278428554534912, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 51470 + }, + { + "epoch": 338.6842105263158, + "grad_norm": 1.1535056829452515, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 51480 + }, + { + "epoch": 338.75, + "grad_norm": 0.9330645203590393, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 51490 + }, + { + "epoch": 338.8157894736842, + "grad_norm": 1.2083619832992554, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 51500 + }, + { + "epoch": 338.88157894736844, + "grad_norm": 1.0455271005630493, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 51510 + }, + { + "epoch": 338.94736842105266, + "grad_norm": 1.051611065864563, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 51520 + }, + { + "epoch": 339.0131578947368, + "grad_norm": 0.9898776412010193, + "learning_rate": 0.0001, + "loss": 0.0169, + "step": 51530 + }, + { + "epoch": 339.07894736842104, + "grad_norm": 1.3422266244888306, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 51540 + }, + { + "epoch": 339.14473684210526, + "grad_norm": 0.925194501876831, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 51550 + }, + { + "epoch": 339.2105263157895, + "grad_norm": 1.2911876440048218, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 51560 + }, + { + "epoch": 339.2763157894737, + "grad_norm": 1.5111730098724365, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 51570 + }, + { + "epoch": 339.3421052631579, + "grad_norm": 1.3134963512420654, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 51580 + }, + { + "epoch": 339.4078947368421, + "grad_norm": 1.3850778341293335, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 51590 + }, + { + "epoch": 339.4736842105263, + "grad_norm": 0.9667593836784363, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 51600 + }, + { + "epoch": 339.5394736842105, + "grad_norm": 1.3687058687210083, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 51610 + }, + { + "epoch": 339.60526315789474, + "grad_norm": 1.0629100799560547, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 51620 + }, + { + "epoch": 339.67105263157896, + "grad_norm": 0.9805474877357483, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 51630 + }, + { + "epoch": 339.7368421052632, + "grad_norm": 1.029873251914978, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 51640 + }, + { + "epoch": 339.80263157894734, + "grad_norm": 1.1182074546813965, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 51650 + }, + { + "epoch": 339.86842105263156, + "grad_norm": 0.8829177618026733, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 51660 + }, + { + "epoch": 339.9342105263158, + "grad_norm": 1.0989736318588257, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 51670 + }, + { + "epoch": 340.0, + "grad_norm": 1.5837504863739014, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 51680 + }, + { + "epoch": 340.0657894736842, + "grad_norm": 1.524735689163208, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 51690 + }, + { + "epoch": 340.13157894736844, + "grad_norm": 1.6917763948440552, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 51700 + }, + { + "epoch": 340.19736842105266, + "grad_norm": 1.0942375659942627, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 51710 + }, + { + "epoch": 340.2631578947368, + "grad_norm": 1.0912152528762817, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 51720 + }, + { + "epoch": 340.32894736842104, + "grad_norm": 0.9950433373451233, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 51730 + }, + { + "epoch": 340.39473684210526, + "grad_norm": 1.1365388631820679, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 51740 + }, + { + "epoch": 340.4605263157895, + "grad_norm": 1.5053037405014038, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 51750 + }, + { + "epoch": 340.5263157894737, + "grad_norm": 1.1808347702026367, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 51760 + }, + { + "epoch": 340.5921052631579, + "grad_norm": 1.6994234323501587, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 51770 + }, + { + "epoch": 340.6578947368421, + "grad_norm": 1.4310306310653687, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 51780 + }, + { + "epoch": 340.7236842105263, + "grad_norm": 1.2134596109390259, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 51790 + }, + { + "epoch": 340.7894736842105, + "grad_norm": 1.7126917839050293, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 51800 + }, + { + "epoch": 340.85526315789474, + "grad_norm": 1.0184993743896484, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 51810 + }, + { + "epoch": 340.92105263157896, + "grad_norm": 1.3017053604125977, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 51820 + }, + { + "epoch": 340.9868421052632, + "grad_norm": 0.8897969126701355, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 51830 + }, + { + "epoch": 341.05263157894734, + "grad_norm": 1.1662821769714355, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 51840 + }, + { + "epoch": 341.11842105263156, + "grad_norm": 1.338837742805481, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 51850 + }, + { + "epoch": 341.1842105263158, + "grad_norm": 1.1382125616073608, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 51860 + }, + { + "epoch": 341.25, + "grad_norm": 0.9120957851409912, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 51870 + }, + { + "epoch": 341.3157894736842, + "grad_norm": 1.134007215499878, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 51880 + }, + { + "epoch": 341.38157894736844, + "grad_norm": 1.2043037414550781, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 51890 + }, + { + "epoch": 341.44736842105266, + "grad_norm": 1.6688247919082642, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 51900 + }, + { + "epoch": 341.5131578947368, + "grad_norm": 1.2863478660583496, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 51910 + }, + { + "epoch": 341.57894736842104, + "grad_norm": 1.4102164506912231, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 51920 + }, + { + "epoch": 341.64473684210526, + "grad_norm": 1.6692852973937988, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 51930 + }, + { + "epoch": 341.7105263157895, + "grad_norm": 1.386616826057434, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 51940 + }, + { + "epoch": 341.7763157894737, + "grad_norm": 1.1404385566711426, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 51950 + }, + { + "epoch": 341.8421052631579, + "grad_norm": 1.3173713684082031, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 51960 + }, + { + "epoch": 341.9078947368421, + "grad_norm": 1.272148847579956, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 51970 + }, + { + "epoch": 341.9736842105263, + "grad_norm": 1.2954585552215576, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 51980 + }, + { + "epoch": 342.0394736842105, + "grad_norm": 1.546271800994873, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 51990 + }, + { + "epoch": 342.10526315789474, + "grad_norm": 1.21526300907135, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 52000 + }, + { + "epoch": 342.17105263157896, + "grad_norm": 1.2381492853164673, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 52010 + }, + { + "epoch": 342.2368421052632, + "grad_norm": 1.5428966283798218, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 52020 + }, + { + "epoch": 342.30263157894734, + "grad_norm": 1.5430893898010254, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 52030 + }, + { + "epoch": 342.36842105263156, + "grad_norm": 0.9203903079032898, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 52040 + }, + { + "epoch": 342.4342105263158, + "grad_norm": 0.9490885734558105, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 52050 + }, + { + "epoch": 342.5, + "grad_norm": 0.8359211087226868, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 52060 + }, + { + "epoch": 342.5657894736842, + "grad_norm": 1.2397630214691162, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 52070 + }, + { + "epoch": 342.63157894736844, + "grad_norm": 1.1610674858093262, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 52080 + }, + { + "epoch": 342.69736842105266, + "grad_norm": 1.3798439502716064, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 52090 + }, + { + "epoch": 342.7631578947368, + "grad_norm": 1.1632894277572632, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 52100 + }, + { + "epoch": 342.82894736842104, + "grad_norm": 1.3754557371139526, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 52110 + }, + { + "epoch": 342.89473684210526, + "grad_norm": 1.087170124053955, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 52120 + }, + { + "epoch": 342.9605263157895, + "grad_norm": 0.9013633131980896, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 52130 + }, + { + "epoch": 343.0263157894737, + "grad_norm": 1.5039817094802856, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 52140 + }, + { + "epoch": 343.0921052631579, + "grad_norm": 1.4814598560333252, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 52150 + }, + { + "epoch": 343.1578947368421, + "grad_norm": 1.120323896408081, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 52160 + }, + { + "epoch": 343.2236842105263, + "grad_norm": 0.9336338639259338, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 52170 + }, + { + "epoch": 343.2894736842105, + "grad_norm": 0.7774641513824463, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 52180 + }, + { + "epoch": 343.35526315789474, + "grad_norm": 0.8035191297531128, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 52190 + }, + { + "epoch": 343.42105263157896, + "grad_norm": 0.9542168974876404, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 52200 + }, + { + "epoch": 343.4868421052632, + "grad_norm": 1.1261277198791504, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 52210 + }, + { + "epoch": 343.55263157894734, + "grad_norm": 1.2652689218521118, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 52220 + }, + { + "epoch": 343.61842105263156, + "grad_norm": 1.4694147109985352, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 52230 + }, + { + "epoch": 343.6842105263158, + "grad_norm": 0.8322300910949707, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 52240 + }, + { + "epoch": 343.75, + "grad_norm": 1.4689432382583618, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 52250 + }, + { + "epoch": 343.8157894736842, + "grad_norm": 1.446466088294983, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 52260 + }, + { + "epoch": 343.88157894736844, + "grad_norm": 1.2933214902877808, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 52270 + }, + { + "epoch": 343.94736842105266, + "grad_norm": 1.245603322982788, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 52280 + }, + { + "epoch": 344.0131578947368, + "grad_norm": 1.0786410570144653, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 52290 + }, + { + "epoch": 344.07894736842104, + "grad_norm": 1.2536273002624512, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 52300 + }, + { + "epoch": 344.14473684210526, + "grad_norm": 1.1757087707519531, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 52310 + }, + { + "epoch": 344.2105263157895, + "grad_norm": 1.397163987159729, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 52320 + }, + { + "epoch": 344.2763157894737, + "grad_norm": 1.2626746892929077, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 52330 + }, + { + "epoch": 344.3421052631579, + "grad_norm": 1.5072098970413208, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 52340 + }, + { + "epoch": 344.4078947368421, + "grad_norm": 1.0162240266799927, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 52350 + }, + { + "epoch": 344.4736842105263, + "grad_norm": 1.1723800897598267, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 52360 + }, + { + "epoch": 344.5394736842105, + "grad_norm": 1.722672939300537, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 52370 + }, + { + "epoch": 344.60526315789474, + "grad_norm": 1.4377808570861816, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 52380 + }, + { + "epoch": 344.67105263157896, + "grad_norm": 1.4362269639968872, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 52390 + }, + { + "epoch": 344.7368421052632, + "grad_norm": 1.1657408475875854, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 52400 + }, + { + "epoch": 344.80263157894734, + "grad_norm": 1.3093397617340088, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 52410 + }, + { + "epoch": 344.86842105263156, + "grad_norm": 1.1783092021942139, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 52420 + }, + { + "epoch": 344.9342105263158, + "grad_norm": 1.0397982597351074, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 52430 + }, + { + "epoch": 345.0, + "grad_norm": 1.1493473052978516, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 52440 + }, + { + "epoch": 345.0657894736842, + "grad_norm": 1.1874574422836304, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 52450 + }, + { + "epoch": 345.13157894736844, + "grad_norm": 1.2612534761428833, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 52460 + }, + { + "epoch": 345.19736842105266, + "grad_norm": 1.2384288311004639, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 52470 + }, + { + "epoch": 345.2631578947368, + "grad_norm": 1.2733972072601318, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 52480 + }, + { + "epoch": 345.32894736842104, + "grad_norm": 1.046722650527954, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 52490 + }, + { + "epoch": 345.39473684210526, + "grad_norm": 1.7517563104629517, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 52500 + }, + { + "epoch": 345.4605263157895, + "grad_norm": 1.5715398788452148, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 52510 + }, + { + "epoch": 345.5263157894737, + "grad_norm": 1.4876675605773926, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 52520 + }, + { + "epoch": 345.5921052631579, + "grad_norm": 1.2595746517181396, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 52530 + }, + { + "epoch": 345.6578947368421, + "grad_norm": 1.4354819059371948, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 52540 + }, + { + "epoch": 345.7236842105263, + "grad_norm": 1.2486364841461182, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 52550 + }, + { + "epoch": 345.7894736842105, + "grad_norm": 1.4385180473327637, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 52560 + }, + { + "epoch": 345.85526315789474, + "grad_norm": 1.4834891557693481, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 52570 + }, + { + "epoch": 345.92105263157896, + "grad_norm": 1.1149605512619019, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 52580 + }, + { + "epoch": 345.9868421052632, + "grad_norm": 1.0604519844055176, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 52590 + }, + { + "epoch": 346.05263157894734, + "grad_norm": 1.482948899269104, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 52600 + }, + { + "epoch": 346.11842105263156, + "grad_norm": 0.9433545470237732, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 52610 + }, + { + "epoch": 346.1842105263158, + "grad_norm": 1.2080388069152832, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 52620 + }, + { + "epoch": 346.25, + "grad_norm": 1.2017712593078613, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 52630 + }, + { + "epoch": 346.3157894736842, + "grad_norm": 1.3862873315811157, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 52640 + }, + { + "epoch": 346.38157894736844, + "grad_norm": 1.4891222715377808, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 52650 + }, + { + "epoch": 346.44736842105266, + "grad_norm": 1.356292724609375, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 52660 + }, + { + "epoch": 346.5131578947368, + "grad_norm": 1.215113639831543, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 52670 + }, + { + "epoch": 346.57894736842104, + "grad_norm": 1.551318883895874, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 52680 + }, + { + "epoch": 346.64473684210526, + "grad_norm": 1.6980267763137817, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 52690 + }, + { + "epoch": 346.7105263157895, + "grad_norm": 1.528921365737915, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 52700 + }, + { + "epoch": 346.7763157894737, + "grad_norm": 1.3663548231124878, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 52710 + }, + { + "epoch": 346.8421052631579, + "grad_norm": 1.3794395923614502, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 52720 + }, + { + "epoch": 346.9078947368421, + "grad_norm": 1.5531320571899414, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 52730 + }, + { + "epoch": 346.9736842105263, + "grad_norm": 1.017933964729309, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 52740 + }, + { + "epoch": 347.0394736842105, + "grad_norm": 1.3421839475631714, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 52750 + }, + { + "epoch": 347.10526315789474, + "grad_norm": 1.4590574502944946, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 52760 + }, + { + "epoch": 347.17105263157896, + "grad_norm": 1.309173345565796, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 52770 + }, + { + "epoch": 347.2368421052632, + "grad_norm": 1.1551851034164429, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 52780 + }, + { + "epoch": 347.30263157894734, + "grad_norm": 1.4142370223999023, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 52790 + }, + { + "epoch": 347.36842105263156, + "grad_norm": 1.2437849044799805, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 52800 + }, + { + "epoch": 347.4342105263158, + "grad_norm": 1.08604097366333, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 52810 + }, + { + "epoch": 347.5, + "grad_norm": 1.0768139362335205, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 52820 + }, + { + "epoch": 347.5657894736842, + "grad_norm": 1.507045865058899, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 52830 + }, + { + "epoch": 347.63157894736844, + "grad_norm": 1.1751208305358887, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 52840 + }, + { + "epoch": 347.69736842105266, + "grad_norm": 1.4803016185760498, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 52850 + }, + { + "epoch": 347.7631578947368, + "grad_norm": 1.3335362672805786, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 52860 + }, + { + "epoch": 347.82894736842104, + "grad_norm": 0.9887474179267883, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 52870 + }, + { + "epoch": 347.89473684210526, + "grad_norm": 1.2500858306884766, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 52880 + }, + { + "epoch": 347.9605263157895, + "grad_norm": 1.0655533075332642, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 52890 + }, + { + "epoch": 348.0263157894737, + "grad_norm": 1.797389268875122, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 52900 + }, + { + "epoch": 348.0921052631579, + "grad_norm": 1.0135571956634521, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 52910 + }, + { + "epoch": 348.1578947368421, + "grad_norm": 1.4118088483810425, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 52920 + }, + { + "epoch": 348.2236842105263, + "grad_norm": 1.5907139778137207, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 52930 + }, + { + "epoch": 348.2894736842105, + "grad_norm": 0.9017720818519592, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 52940 + }, + { + "epoch": 348.35526315789474, + "grad_norm": 1.0268571376800537, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 52950 + }, + { + "epoch": 348.42105263157896, + "grad_norm": 1.0724031925201416, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 52960 + }, + { + "epoch": 348.4868421052632, + "grad_norm": 0.9979811906814575, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 52970 + }, + { + "epoch": 348.55263157894734, + "grad_norm": 1.0473777055740356, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 52980 + }, + { + "epoch": 348.61842105263156, + "grad_norm": 1.4823224544525146, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 52990 + }, + { + "epoch": 348.6842105263158, + "grad_norm": 1.1118677854537964, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 53000 + }, + { + "epoch": 348.75, + "grad_norm": 1.2485218048095703, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 53010 + }, + { + "epoch": 348.8157894736842, + "grad_norm": 1.1463638544082642, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 53020 + }, + { + "epoch": 348.88157894736844, + "grad_norm": 0.8539924025535583, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 53030 + }, + { + "epoch": 348.94736842105266, + "grad_norm": 0.826951801776886, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 53040 + }, + { + "epoch": 349.0131578947368, + "grad_norm": 1.3441721200942993, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 53050 + }, + { + "epoch": 349.07894736842104, + "grad_norm": 1.0839155912399292, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 53060 + }, + { + "epoch": 349.14473684210526, + "grad_norm": 1.1949419975280762, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 53070 + }, + { + "epoch": 349.2105263157895, + "grad_norm": 0.8186034560203552, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 53080 + }, + { + "epoch": 349.2763157894737, + "grad_norm": 0.8655272126197815, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 53090 + }, + { + "epoch": 349.3421052631579, + "grad_norm": 0.7736495733261108, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 53100 + }, + { + "epoch": 349.4078947368421, + "grad_norm": 0.711251437664032, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 53110 + }, + { + "epoch": 349.4736842105263, + "grad_norm": 1.2453712224960327, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 53120 + }, + { + "epoch": 349.5394736842105, + "grad_norm": 1.0265164375305176, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 53130 + }, + { + "epoch": 349.60526315789474, + "grad_norm": 0.9887539744377136, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 53140 + }, + { + "epoch": 349.67105263157896, + "grad_norm": 0.9116423726081848, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 53150 + }, + { + "epoch": 349.7368421052632, + "grad_norm": 1.176967978477478, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 53160 + }, + { + "epoch": 349.80263157894734, + "grad_norm": 1.2313488721847534, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 53170 + }, + { + "epoch": 349.86842105263156, + "grad_norm": 0.7793358564376831, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 53180 + }, + { + "epoch": 349.9342105263158, + "grad_norm": 0.9506605863571167, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 53190 + }, + { + "epoch": 350.0, + "grad_norm": 0.9445679783821106, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 53200 + }, + { + "epoch": 350.0657894736842, + "grad_norm": 1.0125964879989624, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 53210 + }, + { + "epoch": 350.13157894736844, + "grad_norm": 1.2516371011734009, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 53220 + }, + { + "epoch": 350.19736842105266, + "grad_norm": 1.4000440835952759, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 53230 + }, + { + "epoch": 350.2631578947368, + "grad_norm": 1.0942318439483643, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 53240 + }, + { + "epoch": 350.32894736842104, + "grad_norm": 1.0864887237548828, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 53250 + }, + { + "epoch": 350.39473684210526, + "grad_norm": 1.2966852188110352, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 53260 + }, + { + "epoch": 350.4605263157895, + "grad_norm": 1.227264404296875, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 53270 + }, + { + "epoch": 350.5263157894737, + "grad_norm": 1.1729018688201904, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 53280 + }, + { + "epoch": 350.5921052631579, + "grad_norm": 0.7288147211074829, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 53290 + }, + { + "epoch": 350.6578947368421, + "grad_norm": 0.893334150314331, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 53300 + }, + { + "epoch": 350.7236842105263, + "grad_norm": 1.2939788103103638, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 53310 + }, + { + "epoch": 350.7894736842105, + "grad_norm": 0.9002676010131836, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 53320 + }, + { + "epoch": 350.85526315789474, + "grad_norm": 0.9034085869789124, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 53330 + }, + { + "epoch": 350.92105263157896, + "grad_norm": 1.2274662256240845, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 53340 + }, + { + "epoch": 350.9868421052632, + "grad_norm": 0.969467043876648, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 53350 + }, + { + "epoch": 351.05263157894734, + "grad_norm": 1.1215009689331055, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 53360 + }, + { + "epoch": 351.11842105263156, + "grad_norm": 1.4852532148361206, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 53370 + }, + { + "epoch": 351.1842105263158, + "grad_norm": 1.1253795623779297, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 53380 + }, + { + "epoch": 351.25, + "grad_norm": 0.8905407786369324, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 53390 + }, + { + "epoch": 351.3157894736842, + "grad_norm": 1.2344647645950317, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 53400 + }, + { + "epoch": 351.38157894736844, + "grad_norm": 0.9669228196144104, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 53410 + }, + { + "epoch": 351.44736842105266, + "grad_norm": 1.300685167312622, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 53420 + }, + { + "epoch": 351.5131578947368, + "grad_norm": 1.1283105611801147, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 53430 + }, + { + "epoch": 351.57894736842104, + "grad_norm": 1.390939474105835, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 53440 + }, + { + "epoch": 351.64473684210526, + "grad_norm": 1.187654972076416, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 53450 + }, + { + "epoch": 351.7105263157895, + "grad_norm": 0.9280898571014404, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 53460 + }, + { + "epoch": 351.7763157894737, + "grad_norm": 1.1642065048217773, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 53470 + }, + { + "epoch": 351.8421052631579, + "grad_norm": 1.3112080097198486, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 53480 + }, + { + "epoch": 351.9078947368421, + "grad_norm": 1.1810340881347656, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 53490 + }, + { + "epoch": 351.9736842105263, + "grad_norm": 1.1890909671783447, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 53500 + }, + { + "epoch": 352.0394736842105, + "grad_norm": 0.9350360035896301, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 53510 + }, + { + "epoch": 352.10526315789474, + "grad_norm": 1.0225274562835693, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 53520 + }, + { + "epoch": 352.17105263157896, + "grad_norm": 1.1939771175384521, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 53530 + }, + { + "epoch": 352.2368421052632, + "grad_norm": 0.9395789504051208, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 53540 + }, + { + "epoch": 352.30263157894734, + "grad_norm": 1.4980489015579224, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 53550 + }, + { + "epoch": 352.36842105263156, + "grad_norm": 1.4125310182571411, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 53560 + }, + { + "epoch": 352.4342105263158, + "grad_norm": 1.1465486288070679, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 53570 + }, + { + "epoch": 352.5, + "grad_norm": 1.5160516500473022, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 53580 + }, + { + "epoch": 352.5657894736842, + "grad_norm": 1.0091547966003418, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 53590 + }, + { + "epoch": 352.63157894736844, + "grad_norm": 1.6452295780181885, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 53600 + }, + { + "epoch": 352.69736842105266, + "grad_norm": 0.8045058846473694, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 53610 + }, + { + "epoch": 352.7631578947368, + "grad_norm": 0.9372885823249817, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 53620 + }, + { + "epoch": 352.82894736842104, + "grad_norm": 1.2400946617126465, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 53630 + }, + { + "epoch": 352.89473684210526, + "grad_norm": 1.2363208532333374, + "learning_rate": 0.0001, + "loss": 0.0174, + "step": 53640 + }, + { + "epoch": 352.9605263157895, + "grad_norm": 1.4583326578140259, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 53650 + }, + { + "epoch": 353.0263157894737, + "grad_norm": 1.4135735034942627, + "learning_rate": 0.0001, + "loss": 0.0178, + "step": 53660 + }, + { + "epoch": 353.0921052631579, + "grad_norm": 0.8179410099983215, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 53670 + }, + { + "epoch": 353.1578947368421, + "grad_norm": 1.1129777431488037, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 53680 + }, + { + "epoch": 353.2236842105263, + "grad_norm": 0.9328417778015137, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 53690 + }, + { + "epoch": 353.2894736842105, + "grad_norm": 1.0493443012237549, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 53700 + }, + { + "epoch": 353.35526315789474, + "grad_norm": 1.4180327653884888, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 53710 + }, + { + "epoch": 353.42105263157896, + "grad_norm": 1.10849130153656, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 53720 + }, + { + "epoch": 353.4868421052632, + "grad_norm": 0.9594805836677551, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 53730 + }, + { + "epoch": 353.55263157894734, + "grad_norm": 1.55135977268219, + "learning_rate": 0.0001, + "loss": 0.0168, + "step": 53740 + }, + { + "epoch": 353.61842105263156, + "grad_norm": 1.0522472858428955, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 53750 + }, + { + "epoch": 353.6842105263158, + "grad_norm": 1.2985424995422363, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 53760 + }, + { + "epoch": 353.75, + "grad_norm": 1.2076297998428345, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 53770 + }, + { + "epoch": 353.8157894736842, + "grad_norm": 1.4441065788269043, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 53780 + }, + { + "epoch": 353.88157894736844, + "grad_norm": 1.4848010540008545, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 53790 + }, + { + "epoch": 353.94736842105266, + "grad_norm": 1.2929956912994385, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 53800 + }, + { + "epoch": 354.0131578947368, + "grad_norm": 1.4430453777313232, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 53810 + }, + { + "epoch": 354.07894736842104, + "grad_norm": 1.5679028034210205, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 53820 + }, + { + "epoch": 354.14473684210526, + "grad_norm": 0.8929752111434937, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 53830 + }, + { + "epoch": 354.2105263157895, + "grad_norm": 1.3934887647628784, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 53840 + }, + { + "epoch": 354.2763157894737, + "grad_norm": 1.3150675296783447, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 53850 + }, + { + "epoch": 354.3421052631579, + "grad_norm": 1.1540673971176147, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 53860 + }, + { + "epoch": 354.4078947368421, + "grad_norm": 1.2295771837234497, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 53870 + }, + { + "epoch": 354.4736842105263, + "grad_norm": 1.2992318868637085, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 53880 + }, + { + "epoch": 354.5394736842105, + "grad_norm": 1.3661577701568604, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 53890 + }, + { + "epoch": 354.60526315789474, + "grad_norm": 1.475778579711914, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 53900 + }, + { + "epoch": 354.67105263157896, + "grad_norm": 1.3246492147445679, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 53910 + }, + { + "epoch": 354.7368421052632, + "grad_norm": 1.3882511854171753, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 53920 + }, + { + "epoch": 354.80263157894734, + "grad_norm": 0.9738257527351379, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 53930 + }, + { + "epoch": 354.86842105263156, + "grad_norm": 1.405836820602417, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 53940 + }, + { + "epoch": 354.9342105263158, + "grad_norm": 1.3983672857284546, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 53950 + }, + { + "epoch": 355.0, + "grad_norm": 1.1816673278808594, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 53960 + }, + { + "epoch": 355.0657894736842, + "grad_norm": 0.901597797870636, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 53970 + }, + { + "epoch": 355.13157894736844, + "grad_norm": 0.8046585321426392, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 53980 + }, + { + "epoch": 355.19736842105266, + "grad_norm": 0.8949022889137268, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 53990 + }, + { + "epoch": 355.2631578947368, + "grad_norm": 0.8433843851089478, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 54000 + }, + { + "epoch": 355.32894736842104, + "grad_norm": 1.3329582214355469, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 54010 + }, + { + "epoch": 355.39473684210526, + "grad_norm": 1.203683614730835, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 54020 + }, + { + "epoch": 355.4605263157895, + "grad_norm": 1.1506283283233643, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 54030 + }, + { + "epoch": 355.5263157894737, + "grad_norm": 1.4512953758239746, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 54040 + }, + { + "epoch": 355.5921052631579, + "grad_norm": 1.065973162651062, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 54050 + }, + { + "epoch": 355.6578947368421, + "grad_norm": 0.8735188245773315, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 54060 + }, + { + "epoch": 355.7236842105263, + "grad_norm": 1.3658928871154785, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 54070 + }, + { + "epoch": 355.7894736842105, + "grad_norm": 1.037252426147461, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 54080 + }, + { + "epoch": 355.85526315789474, + "grad_norm": 1.2813448905944824, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 54090 + }, + { + "epoch": 355.92105263157896, + "grad_norm": 1.114138126373291, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 54100 + }, + { + "epoch": 355.9868421052632, + "grad_norm": 1.242543339729309, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 54110 + }, + { + "epoch": 356.05263157894734, + "grad_norm": 1.8601607084274292, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 54120 + }, + { + "epoch": 356.11842105263156, + "grad_norm": 1.521649718284607, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 54130 + }, + { + "epoch": 356.1842105263158, + "grad_norm": 1.109571099281311, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 54140 + }, + { + "epoch": 356.25, + "grad_norm": 1.3010332584381104, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 54150 + }, + { + "epoch": 356.3157894736842, + "grad_norm": 1.2290462255477905, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 54160 + }, + { + "epoch": 356.38157894736844, + "grad_norm": 1.0519379377365112, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 54170 + }, + { + "epoch": 356.44736842105266, + "grad_norm": 1.036097764968872, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 54180 + }, + { + "epoch": 356.5131578947368, + "grad_norm": 1.120662808418274, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 54190 + }, + { + "epoch": 356.57894736842104, + "grad_norm": 0.9310142993927002, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 54200 + }, + { + "epoch": 356.64473684210526, + "grad_norm": 1.009954810142517, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 54210 + }, + { + "epoch": 356.7105263157895, + "grad_norm": 0.9205268621444702, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 54220 + }, + { + "epoch": 356.7763157894737, + "grad_norm": 1.1177328824996948, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 54230 + }, + { + "epoch": 356.8421052631579, + "grad_norm": 1.0612447261810303, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 54240 + }, + { + "epoch": 356.9078947368421, + "grad_norm": 0.7625839710235596, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 54250 + }, + { + "epoch": 356.9736842105263, + "grad_norm": 0.8989898562431335, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 54260 + }, + { + "epoch": 357.0394736842105, + "grad_norm": 1.3405532836914062, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 54270 + }, + { + "epoch": 357.10526315789474, + "grad_norm": 1.3939285278320312, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 54280 + }, + { + "epoch": 357.17105263157896, + "grad_norm": 1.1620155572891235, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 54290 + }, + { + "epoch": 357.2368421052632, + "grad_norm": 1.2777410745620728, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 54300 + }, + { + "epoch": 357.30263157894734, + "grad_norm": 1.271978735923767, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 54310 + }, + { + "epoch": 357.36842105263156, + "grad_norm": 1.060716152191162, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 54320 + }, + { + "epoch": 357.4342105263158, + "grad_norm": 1.1990467309951782, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 54330 + }, + { + "epoch": 357.5, + "grad_norm": 1.0680278539657593, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 54340 + }, + { + "epoch": 357.5657894736842, + "grad_norm": 1.2417722940444946, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 54350 + }, + { + "epoch": 357.63157894736844, + "grad_norm": 1.0456074476242065, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 54360 + }, + { + "epoch": 357.69736842105266, + "grad_norm": 1.1133095026016235, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 54370 + }, + { + "epoch": 357.7631578947368, + "grad_norm": 1.355271816253662, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 54380 + }, + { + "epoch": 357.82894736842104, + "grad_norm": 1.0156669616699219, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 54390 + }, + { + "epoch": 357.89473684210526, + "grad_norm": 1.571312427520752, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 54400 + }, + { + "epoch": 357.9605263157895, + "grad_norm": 1.2970470190048218, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 54410 + }, + { + "epoch": 358.0263157894737, + "grad_norm": 1.343517780303955, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 54420 + }, + { + "epoch": 358.0921052631579, + "grad_norm": 1.6380618810653687, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 54430 + }, + { + "epoch": 358.1578947368421, + "grad_norm": 1.1087614297866821, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 54440 + }, + { + "epoch": 358.2236842105263, + "grad_norm": 1.523401141166687, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 54450 + }, + { + "epoch": 358.2894736842105, + "grad_norm": 1.1688175201416016, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 54460 + }, + { + "epoch": 358.35526315789474, + "grad_norm": 1.3475173711776733, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 54470 + }, + { + "epoch": 358.42105263157896, + "grad_norm": 1.240882396697998, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 54480 + }, + { + "epoch": 358.4868421052632, + "grad_norm": 1.5280466079711914, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 54490 + }, + { + "epoch": 358.55263157894734, + "grad_norm": 1.313010334968567, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 54500 + }, + { + "epoch": 358.61842105263156, + "grad_norm": 1.4535280466079712, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 54510 + }, + { + "epoch": 358.6842105263158, + "grad_norm": 0.7499117255210876, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 54520 + }, + { + "epoch": 358.75, + "grad_norm": 1.1150604486465454, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 54530 + }, + { + "epoch": 358.8157894736842, + "grad_norm": 1.5090540647506714, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 54540 + }, + { + "epoch": 358.88157894736844, + "grad_norm": 1.2874553203582764, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 54550 + }, + { + "epoch": 358.94736842105266, + "grad_norm": 1.4591070413589478, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 54560 + }, + { + "epoch": 359.0131578947368, + "grad_norm": 1.5048354864120483, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 54570 + }, + { + "epoch": 359.07894736842104, + "grad_norm": 1.1533069610595703, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 54580 + }, + { + "epoch": 359.14473684210526, + "grad_norm": 1.287916660308838, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 54590 + }, + { + "epoch": 359.2105263157895, + "grad_norm": 1.1660511493682861, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 54600 + }, + { + "epoch": 359.2763157894737, + "grad_norm": 1.1380329132080078, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 54610 + }, + { + "epoch": 359.3421052631579, + "grad_norm": 1.1352312564849854, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 54620 + }, + { + "epoch": 359.4078947368421, + "grad_norm": 1.5034499168395996, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 54630 + }, + { + "epoch": 359.4736842105263, + "grad_norm": 1.6546825170516968, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 54640 + }, + { + "epoch": 359.5394736842105, + "grad_norm": 1.4479390382766724, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 54650 + }, + { + "epoch": 359.60526315789474, + "grad_norm": 1.404312014579773, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 54660 + }, + { + "epoch": 359.67105263157896, + "grad_norm": 1.507351040840149, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 54670 + }, + { + "epoch": 359.7368421052632, + "grad_norm": 1.2614494562149048, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 54680 + }, + { + "epoch": 359.80263157894734, + "grad_norm": 0.779187798500061, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 54690 + }, + { + "epoch": 359.86842105263156, + "grad_norm": 1.3693275451660156, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 54700 + }, + { + "epoch": 359.9342105263158, + "grad_norm": 1.4360358715057373, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 54710 + }, + { + "epoch": 360.0, + "grad_norm": 1.4990230798721313, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 54720 + }, + { + "epoch": 360.0657894736842, + "grad_norm": 1.2829705476760864, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 54730 + }, + { + "epoch": 360.13157894736844, + "grad_norm": 0.8983616232872009, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 54740 + }, + { + "epoch": 360.19736842105266, + "grad_norm": 1.4845367670059204, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 54750 + }, + { + "epoch": 360.2631578947368, + "grad_norm": 1.124158501625061, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 54760 + }, + { + "epoch": 360.32894736842104, + "grad_norm": 1.3822499513626099, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 54770 + }, + { + "epoch": 360.39473684210526, + "grad_norm": 1.2073116302490234, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 54780 + }, + { + "epoch": 360.4605263157895, + "grad_norm": 1.0493237972259521, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 54790 + }, + { + "epoch": 360.5263157894737, + "grad_norm": 1.4061884880065918, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 54800 + }, + { + "epoch": 360.5921052631579, + "grad_norm": 2.0674805641174316, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 54810 + }, + { + "epoch": 360.6578947368421, + "grad_norm": 1.6307419538497925, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 54820 + }, + { + "epoch": 360.7236842105263, + "grad_norm": 1.5388418436050415, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 54830 + }, + { + "epoch": 360.7894736842105, + "grad_norm": 1.1315034627914429, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 54840 + }, + { + "epoch": 360.85526315789474, + "grad_norm": 1.5357550382614136, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 54850 + }, + { + "epoch": 360.92105263157896, + "grad_norm": 1.019702672958374, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 54860 + }, + { + "epoch": 360.9868421052632, + "grad_norm": 1.2998785972595215, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 54870 + }, + { + "epoch": 361.05263157894734, + "grad_norm": 0.9051557183265686, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 54880 + }, + { + "epoch": 361.11842105263156, + "grad_norm": 1.275030493736267, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 54890 + }, + { + "epoch": 361.1842105263158, + "grad_norm": 0.9858371615409851, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 54900 + }, + { + "epoch": 361.25, + "grad_norm": 1.1971203088760376, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 54910 + }, + { + "epoch": 361.3157894736842, + "grad_norm": 1.2768568992614746, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 54920 + }, + { + "epoch": 361.38157894736844, + "grad_norm": 1.199203372001648, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 54930 + }, + { + "epoch": 361.44736842105266, + "grad_norm": 0.8393948078155518, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 54940 + }, + { + "epoch": 361.5131578947368, + "grad_norm": 1.0057693719863892, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 54950 + }, + { + "epoch": 361.57894736842104, + "grad_norm": 1.328454852104187, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 54960 + }, + { + "epoch": 361.64473684210526, + "grad_norm": 1.4820215702056885, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 54970 + }, + { + "epoch": 361.7105263157895, + "grad_norm": 1.2273774147033691, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 54980 + }, + { + "epoch": 361.7763157894737, + "grad_norm": 0.9895418882369995, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 54990 + }, + { + "epoch": 361.8421052631579, + "grad_norm": 1.2177479267120361, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 55000 + }, + { + "epoch": 361.9078947368421, + "grad_norm": 1.5505794286727905, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 55010 + }, + { + "epoch": 361.9736842105263, + "grad_norm": 1.2999932765960693, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 55020 + }, + { + "epoch": 362.0394736842105, + "grad_norm": 1.0745863914489746, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 55030 + }, + { + "epoch": 362.10526315789474, + "grad_norm": 1.1478854417800903, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 55040 + }, + { + "epoch": 362.17105263157896, + "grad_norm": 1.1435706615447998, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 55050 + }, + { + "epoch": 362.2368421052632, + "grad_norm": 1.1603302955627441, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 55060 + }, + { + "epoch": 362.30263157894734, + "grad_norm": 1.1007798910140991, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 55070 + }, + { + "epoch": 362.36842105263156, + "grad_norm": 0.8749071359634399, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 55080 + }, + { + "epoch": 362.4342105263158, + "grad_norm": 1.287734031677246, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 55090 + }, + { + "epoch": 362.5, + "grad_norm": 1.153018593788147, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 55100 + }, + { + "epoch": 362.5657894736842, + "grad_norm": 1.2814031839370728, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 55110 + }, + { + "epoch": 362.63157894736844, + "grad_norm": 1.211867094039917, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 55120 + }, + { + "epoch": 362.69736842105266, + "grad_norm": 1.1564773321151733, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 55130 + }, + { + "epoch": 362.7631578947368, + "grad_norm": 0.9832426309585571, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 55140 + }, + { + "epoch": 362.82894736842104, + "grad_norm": 1.05033278465271, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 55150 + }, + { + "epoch": 362.89473684210526, + "grad_norm": 1.2688597440719604, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 55160 + }, + { + "epoch": 362.9605263157895, + "grad_norm": 1.314858078956604, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 55170 + }, + { + "epoch": 363.0263157894737, + "grad_norm": 1.2743005752563477, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 55180 + }, + { + "epoch": 363.0921052631579, + "grad_norm": 1.3456010818481445, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 55190 + }, + { + "epoch": 363.1578947368421, + "grad_norm": 1.2974659204483032, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 55200 + }, + { + "epoch": 363.2236842105263, + "grad_norm": 1.0319002866744995, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 55210 + }, + { + "epoch": 363.2894736842105, + "grad_norm": 1.074663519859314, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 55220 + }, + { + "epoch": 363.35526315789474, + "grad_norm": 1.0586227178573608, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 55230 + }, + { + "epoch": 363.42105263157896, + "grad_norm": 1.1722004413604736, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 55240 + }, + { + "epoch": 363.4868421052632, + "grad_norm": 1.1225117444992065, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 55250 + }, + { + "epoch": 363.55263157894734, + "grad_norm": 0.9017395973205566, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 55260 + }, + { + "epoch": 363.61842105263156, + "grad_norm": 1.1716728210449219, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 55270 + }, + { + "epoch": 363.6842105263158, + "grad_norm": 0.8839274644851685, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 55280 + }, + { + "epoch": 363.75, + "grad_norm": 1.1430319547653198, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 55290 + }, + { + "epoch": 363.8157894736842, + "grad_norm": 1.2584179639816284, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 55300 + }, + { + "epoch": 363.88157894736844, + "grad_norm": 1.3413207530975342, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 55310 + }, + { + "epoch": 363.94736842105266, + "grad_norm": 1.4143167734146118, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 55320 + }, + { + "epoch": 364.0131578947368, + "grad_norm": 1.290185570716858, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 55330 + }, + { + "epoch": 364.07894736842104, + "grad_norm": 1.241997241973877, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 55340 + }, + { + "epoch": 364.14473684210526, + "grad_norm": 1.200785756111145, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 55350 + }, + { + "epoch": 364.2105263157895, + "grad_norm": 1.1258078813552856, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 55360 + }, + { + "epoch": 364.2763157894737, + "grad_norm": 1.1537657976150513, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 55370 + }, + { + "epoch": 364.3421052631579, + "grad_norm": 1.3146229982376099, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 55380 + }, + { + "epoch": 364.4078947368421, + "grad_norm": 1.3080146312713623, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 55390 + }, + { + "epoch": 364.4736842105263, + "grad_norm": 1.3526312112808228, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 55400 + }, + { + "epoch": 364.5394736842105, + "grad_norm": 1.6095331907272339, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 55410 + }, + { + "epoch": 364.60526315789474, + "grad_norm": 1.4046205282211304, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 55420 + }, + { + "epoch": 364.67105263157896, + "grad_norm": 1.0746979713439941, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 55430 + }, + { + "epoch": 364.7368421052632, + "grad_norm": 1.3545022010803223, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 55440 + }, + { + "epoch": 364.80263157894734, + "grad_norm": 1.36902916431427, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 55450 + }, + { + "epoch": 364.86842105263156, + "grad_norm": 1.1085587739944458, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 55460 + }, + { + "epoch": 364.9342105263158, + "grad_norm": 0.8094392418861389, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 55470 + }, + { + "epoch": 365.0, + "grad_norm": 0.8859060406684875, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 55480 + }, + { + "epoch": 365.0657894736842, + "grad_norm": 1.0482323169708252, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 55490 + }, + { + "epoch": 365.13157894736844, + "grad_norm": 1.1933088302612305, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 55500 + }, + { + "epoch": 365.19736842105266, + "grad_norm": 1.1254971027374268, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 55510 + }, + { + "epoch": 365.2631578947368, + "grad_norm": 1.3154945373535156, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 55520 + }, + { + "epoch": 365.32894736842104, + "grad_norm": 0.9752078056335449, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 55530 + }, + { + "epoch": 365.39473684210526, + "grad_norm": 1.297957420349121, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 55540 + }, + { + "epoch": 365.4605263157895, + "grad_norm": 0.6452957391738892, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 55550 + }, + { + "epoch": 365.5263157894737, + "grad_norm": 1.1209876537322998, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 55560 + }, + { + "epoch": 365.5921052631579, + "grad_norm": 1.1463710069656372, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 55570 + }, + { + "epoch": 365.6578947368421, + "grad_norm": 0.9191970825195312, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 55580 + }, + { + "epoch": 365.7236842105263, + "grad_norm": 1.2849448919296265, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 55590 + }, + { + "epoch": 365.7894736842105, + "grad_norm": 1.0626157522201538, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 55600 + }, + { + "epoch": 365.85526315789474, + "grad_norm": 1.1464340686798096, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 55610 + }, + { + "epoch": 365.92105263157896, + "grad_norm": 0.7038142681121826, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 55620 + }, + { + "epoch": 365.9868421052632, + "grad_norm": 1.4112557172775269, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 55630 + }, + { + "epoch": 366.05263157894734, + "grad_norm": 1.230036735534668, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 55640 + }, + { + "epoch": 366.11842105263156, + "grad_norm": 0.9362917542457581, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 55650 + }, + { + "epoch": 366.1842105263158, + "grad_norm": 1.2693713903427124, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 55660 + }, + { + "epoch": 366.25, + "grad_norm": 1.3503483533859253, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 55670 + }, + { + "epoch": 366.3157894736842, + "grad_norm": 1.1417988538742065, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 55680 + }, + { + "epoch": 366.38157894736844, + "grad_norm": 1.155261516571045, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 55690 + }, + { + "epoch": 366.44736842105266, + "grad_norm": 1.2800606489181519, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 55700 + }, + { + "epoch": 366.5131578947368, + "grad_norm": 1.3735575675964355, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 55710 + }, + { + "epoch": 366.57894736842104, + "grad_norm": 1.050938367843628, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 55720 + }, + { + "epoch": 366.64473684210526, + "grad_norm": 0.9390007257461548, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 55730 + }, + { + "epoch": 366.7105263157895, + "grad_norm": 1.1883047819137573, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 55740 + }, + { + "epoch": 366.7763157894737, + "grad_norm": 1.5340079069137573, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 55750 + }, + { + "epoch": 366.8421052631579, + "grad_norm": 1.2325025796890259, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 55760 + }, + { + "epoch": 366.9078947368421, + "grad_norm": 1.1925488710403442, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 55770 + }, + { + "epoch": 366.9736842105263, + "grad_norm": 1.3321561813354492, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 55780 + }, + { + "epoch": 367.0394736842105, + "grad_norm": 1.380933165550232, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 55790 + }, + { + "epoch": 367.10526315789474, + "grad_norm": 1.200136423110962, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 55800 + }, + { + "epoch": 367.17105263157896, + "grad_norm": 0.8746941685676575, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 55810 + }, + { + "epoch": 367.2368421052632, + "grad_norm": 1.1686458587646484, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 55820 + }, + { + "epoch": 367.30263157894734, + "grad_norm": 0.9047589302062988, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 55830 + }, + { + "epoch": 367.36842105263156, + "grad_norm": 1.1924694776535034, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 55840 + }, + { + "epoch": 367.4342105263158, + "grad_norm": 1.4154642820358276, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 55850 + }, + { + "epoch": 367.5, + "grad_norm": 1.3152343034744263, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 55860 + }, + { + "epoch": 367.5657894736842, + "grad_norm": 1.2152726650238037, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 55870 + }, + { + "epoch": 367.63157894736844, + "grad_norm": 1.0928616523742676, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 55880 + }, + { + "epoch": 367.69736842105266, + "grad_norm": 1.3509669303894043, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 55890 + }, + { + "epoch": 367.7631578947368, + "grad_norm": 1.1209146976470947, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 55900 + }, + { + "epoch": 367.82894736842104, + "grad_norm": 1.1768624782562256, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 55910 + }, + { + "epoch": 367.89473684210526, + "grad_norm": 1.249316930770874, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 55920 + }, + { + "epoch": 367.9605263157895, + "grad_norm": 1.1277846097946167, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 55930 + }, + { + "epoch": 368.0263157894737, + "grad_norm": 1.326911449432373, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 55940 + }, + { + "epoch": 368.0921052631579, + "grad_norm": 1.5124350786209106, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 55950 + }, + { + "epoch": 368.1578947368421, + "grad_norm": 0.921715259552002, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 55960 + }, + { + "epoch": 368.2236842105263, + "grad_norm": 1.4262442588806152, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 55970 + }, + { + "epoch": 368.2894736842105, + "grad_norm": 1.1071768999099731, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 55980 + }, + { + "epoch": 368.35526315789474, + "grad_norm": 1.1342467069625854, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 55990 + }, + { + "epoch": 368.42105263157896, + "grad_norm": 1.218801736831665, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 56000 + }, + { + "epoch": 368.4868421052632, + "grad_norm": 1.243741750717163, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 56010 + }, + { + "epoch": 368.55263157894734, + "grad_norm": 1.1212338209152222, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 56020 + }, + { + "epoch": 368.61842105263156, + "grad_norm": 1.3740099668502808, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 56030 + }, + { + "epoch": 368.6842105263158, + "grad_norm": 1.2437816858291626, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 56040 + }, + { + "epoch": 368.75, + "grad_norm": 1.3597102165222168, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 56050 + }, + { + "epoch": 368.8157894736842, + "grad_norm": 1.264675498008728, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 56060 + }, + { + "epoch": 368.88157894736844, + "grad_norm": 1.0519795417785645, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 56070 + }, + { + "epoch": 368.94736842105266, + "grad_norm": 1.288150429725647, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 56080 + }, + { + "epoch": 369.0131578947368, + "grad_norm": 1.3312660455703735, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 56090 + }, + { + "epoch": 369.07894736842104, + "grad_norm": 1.0909931659698486, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 56100 + }, + { + "epoch": 369.14473684210526, + "grad_norm": 1.1233609914779663, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 56110 + }, + { + "epoch": 369.2105263157895, + "grad_norm": 1.1134326457977295, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 56120 + }, + { + "epoch": 369.2763157894737, + "grad_norm": 1.4635008573532104, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 56130 + }, + { + "epoch": 369.3421052631579, + "grad_norm": 1.5184545516967773, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 56140 + }, + { + "epoch": 369.4078947368421, + "grad_norm": 1.1048532724380493, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 56150 + }, + { + "epoch": 369.4736842105263, + "grad_norm": 1.1498737335205078, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 56160 + }, + { + "epoch": 369.5394736842105, + "grad_norm": 1.2778600454330444, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 56170 + }, + { + "epoch": 369.60526315789474, + "grad_norm": 1.0318385362625122, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 56180 + }, + { + "epoch": 369.67105263157896, + "grad_norm": 0.9718679785728455, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 56190 + }, + { + "epoch": 369.7368421052632, + "grad_norm": 1.2061275243759155, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 56200 + }, + { + "epoch": 369.80263157894734, + "grad_norm": 1.2633416652679443, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 56210 + }, + { + "epoch": 369.86842105263156, + "grad_norm": 1.3707966804504395, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 56220 + }, + { + "epoch": 369.9342105263158, + "grad_norm": 1.2290523052215576, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 56230 + }, + { + "epoch": 370.0, + "grad_norm": 0.9901204109191895, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 56240 + }, + { + "epoch": 370.0657894736842, + "grad_norm": 1.1951624155044556, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 56250 + }, + { + "epoch": 370.13157894736844, + "grad_norm": 1.1654387712478638, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 56260 + }, + { + "epoch": 370.19736842105266, + "grad_norm": 1.5832988023757935, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 56270 + }, + { + "epoch": 370.2631578947368, + "grad_norm": 1.0225611925125122, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 56280 + }, + { + "epoch": 370.32894736842104, + "grad_norm": 1.5608348846435547, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 56290 + }, + { + "epoch": 370.39473684210526, + "grad_norm": 1.4639538526535034, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 56300 + }, + { + "epoch": 370.4605263157895, + "grad_norm": 1.4396363496780396, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 56310 + }, + { + "epoch": 370.5263157894737, + "grad_norm": 0.938585638999939, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 56320 + }, + { + "epoch": 370.5921052631579, + "grad_norm": 1.232274055480957, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 56330 + }, + { + "epoch": 370.6578947368421, + "grad_norm": 1.1240586042404175, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 56340 + }, + { + "epoch": 370.7236842105263, + "grad_norm": 1.164602279663086, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 56350 + }, + { + "epoch": 370.7894736842105, + "grad_norm": 1.4600279331207275, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 56360 + }, + { + "epoch": 370.85526315789474, + "grad_norm": 1.1497042179107666, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 56370 + }, + { + "epoch": 370.92105263157896, + "grad_norm": 1.1283025741577148, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 56380 + }, + { + "epoch": 370.9868421052632, + "grad_norm": 1.196718454360962, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 56390 + }, + { + "epoch": 371.05263157894734, + "grad_norm": 0.8738747239112854, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 56400 + }, + { + "epoch": 371.11842105263156, + "grad_norm": 1.2236226797103882, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 56410 + }, + { + "epoch": 371.1842105263158, + "grad_norm": 1.3389105796813965, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 56420 + }, + { + "epoch": 371.25, + "grad_norm": 1.1783345937728882, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 56430 + }, + { + "epoch": 371.3157894736842, + "grad_norm": 1.1184672117233276, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 56440 + }, + { + "epoch": 371.38157894736844, + "grad_norm": 1.3328872919082642, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 56450 + }, + { + "epoch": 371.44736842105266, + "grad_norm": 1.3304328918457031, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 56460 + }, + { + "epoch": 371.5131578947368, + "grad_norm": 1.4431946277618408, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 56470 + }, + { + "epoch": 371.57894736842104, + "grad_norm": 1.3655774593353271, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 56480 + }, + { + "epoch": 371.64473684210526, + "grad_norm": 0.9467471837997437, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 56490 + }, + { + "epoch": 371.7105263157895, + "grad_norm": 1.276974081993103, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 56500 + }, + { + "epoch": 371.7763157894737, + "grad_norm": 1.1539723873138428, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 56510 + }, + { + "epoch": 371.8421052631579, + "grad_norm": 1.0478674173355103, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 56520 + }, + { + "epoch": 371.9078947368421, + "grad_norm": 1.2943617105484009, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 56530 + }, + { + "epoch": 371.9736842105263, + "grad_norm": 1.0241856575012207, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 56540 + }, + { + "epoch": 372.0394736842105, + "grad_norm": 1.068509817123413, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 56550 + }, + { + "epoch": 372.10526315789474, + "grad_norm": 1.422802209854126, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 56560 + }, + { + "epoch": 372.17105263157896, + "grad_norm": 1.3704060316085815, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 56570 + }, + { + "epoch": 372.2368421052632, + "grad_norm": 1.3845621347427368, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 56580 + }, + { + "epoch": 372.30263157894734, + "grad_norm": 1.221064805984497, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 56590 + }, + { + "epoch": 372.36842105263156, + "grad_norm": 0.9670616388320923, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 56600 + }, + { + "epoch": 372.4342105263158, + "grad_norm": 1.3151499032974243, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 56610 + }, + { + "epoch": 372.5, + "grad_norm": 1.210457682609558, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 56620 + }, + { + "epoch": 372.5657894736842, + "grad_norm": 1.1957846879959106, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 56630 + }, + { + "epoch": 372.63157894736844, + "grad_norm": 0.8912261724472046, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 56640 + }, + { + "epoch": 372.69736842105266, + "grad_norm": 1.1011767387390137, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 56650 + }, + { + "epoch": 372.7631578947368, + "grad_norm": 1.0120614767074585, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 56660 + }, + { + "epoch": 372.82894736842104, + "grad_norm": 1.5126779079437256, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 56670 + }, + { + "epoch": 372.89473684210526, + "grad_norm": 1.7110366821289062, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 56680 + }, + { + "epoch": 372.9605263157895, + "grad_norm": 1.3137553930282593, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 56690 + }, + { + "epoch": 373.0263157894737, + "grad_norm": 1.3267024755477905, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 56700 + }, + { + "epoch": 373.0921052631579, + "grad_norm": 1.0713841915130615, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 56710 + }, + { + "epoch": 373.1578947368421, + "grad_norm": 1.2255971431732178, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 56720 + }, + { + "epoch": 373.2236842105263, + "grad_norm": 1.1243081092834473, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 56730 + }, + { + "epoch": 373.2894736842105, + "grad_norm": 0.909152090549469, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 56740 + }, + { + "epoch": 373.35526315789474, + "grad_norm": 0.9241997003555298, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 56750 + }, + { + "epoch": 373.42105263157896, + "grad_norm": 0.7907554507255554, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 56760 + }, + { + "epoch": 373.4868421052632, + "grad_norm": 1.4424355030059814, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 56770 + }, + { + "epoch": 373.55263157894734, + "grad_norm": 0.7469786405563354, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 56780 + }, + { + "epoch": 373.61842105263156, + "grad_norm": 1.0877963304519653, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 56790 + }, + { + "epoch": 373.6842105263158, + "grad_norm": 1.020845890045166, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 56800 + }, + { + "epoch": 373.75, + "grad_norm": 0.8404970765113831, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 56810 + }, + { + "epoch": 373.8157894736842, + "grad_norm": 1.032677412033081, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 56820 + }, + { + "epoch": 373.88157894736844, + "grad_norm": 1.0752617120742798, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 56830 + }, + { + "epoch": 373.94736842105266, + "grad_norm": 0.9850388169288635, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 56840 + }, + { + "epoch": 374.0131578947368, + "grad_norm": 0.8292937874794006, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 56850 + }, + { + "epoch": 374.07894736842104, + "grad_norm": 1.0605348348617554, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 56860 + }, + { + "epoch": 374.14473684210526, + "grad_norm": 1.2353730201721191, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 56870 + }, + { + "epoch": 374.2105263157895, + "grad_norm": 0.9276547431945801, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 56880 + }, + { + "epoch": 374.2763157894737, + "grad_norm": 1.3930708169937134, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 56890 + }, + { + "epoch": 374.3421052631579, + "grad_norm": 1.2086683511734009, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 56900 + }, + { + "epoch": 374.4078947368421, + "grad_norm": 1.2379398345947266, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 56910 + }, + { + "epoch": 374.4736842105263, + "grad_norm": 1.1630572080612183, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 56920 + }, + { + "epoch": 374.5394736842105, + "grad_norm": 1.2085312604904175, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 56930 + }, + { + "epoch": 374.60526315789474, + "grad_norm": 1.1504122018814087, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 56940 + }, + { + "epoch": 374.67105263157896, + "grad_norm": 1.1568752527236938, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 56950 + }, + { + "epoch": 374.7368421052632, + "grad_norm": 1.122700572013855, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 56960 + }, + { + "epoch": 374.80263157894734, + "grad_norm": 0.869761049747467, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 56970 + }, + { + "epoch": 374.86842105263156, + "grad_norm": 1.219969630241394, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 56980 + }, + { + "epoch": 374.9342105263158, + "grad_norm": 1.0810168981552124, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 56990 + }, + { + "epoch": 375.0, + "grad_norm": 0.8487389087677002, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 57000 + }, + { + "epoch": 375.0657894736842, + "grad_norm": 1.3084897994995117, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 57010 + }, + { + "epoch": 375.13157894736844, + "grad_norm": 1.0866003036499023, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 57020 + }, + { + "epoch": 375.19736842105266, + "grad_norm": 1.215729832649231, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 57030 + }, + { + "epoch": 375.2631578947368, + "grad_norm": 1.321714162826538, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 57040 + }, + { + "epoch": 375.32894736842104, + "grad_norm": 1.21835458278656, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 57050 + }, + { + "epoch": 375.39473684210526, + "grad_norm": 1.352271318435669, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 57060 + }, + { + "epoch": 375.4605263157895, + "grad_norm": 1.1468392610549927, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 57070 + }, + { + "epoch": 375.5263157894737, + "grad_norm": 0.9708956480026245, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 57080 + }, + { + "epoch": 375.5921052631579, + "grad_norm": 1.2509098052978516, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 57090 + }, + { + "epoch": 375.6578947368421, + "grad_norm": 0.8894394636154175, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 57100 + }, + { + "epoch": 375.7236842105263, + "grad_norm": 1.4251313209533691, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 57110 + }, + { + "epoch": 375.7894736842105, + "grad_norm": 1.3613892793655396, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 57120 + }, + { + "epoch": 375.85526315789474, + "grad_norm": 1.2195764780044556, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 57130 + }, + { + "epoch": 375.92105263157896, + "grad_norm": 1.2804150581359863, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 57140 + }, + { + "epoch": 375.9868421052632, + "grad_norm": 1.2926859855651855, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 57150 + }, + { + "epoch": 376.05263157894734, + "grad_norm": 1.2950412034988403, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 57160 + }, + { + "epoch": 376.11842105263156, + "grad_norm": 1.122531771659851, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 57170 + }, + { + "epoch": 376.1842105263158, + "grad_norm": 1.0365455150604248, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 57180 + }, + { + "epoch": 376.25, + "grad_norm": 0.9025917649269104, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 57190 + }, + { + "epoch": 376.3157894736842, + "grad_norm": 1.360355257987976, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 57200 + }, + { + "epoch": 376.38157894736844, + "grad_norm": 1.0496774911880493, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 57210 + }, + { + "epoch": 376.44736842105266, + "grad_norm": 0.9033643007278442, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 57220 + }, + { + "epoch": 376.5131578947368, + "grad_norm": 1.0611474514007568, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 57230 + }, + { + "epoch": 376.57894736842104, + "grad_norm": 1.022553563117981, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 57240 + }, + { + "epoch": 376.64473684210526, + "grad_norm": 1.3348984718322754, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 57250 + }, + { + "epoch": 376.7105263157895, + "grad_norm": 1.362504005432129, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 57260 + }, + { + "epoch": 376.7763157894737, + "grad_norm": 1.2323849201202393, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 57270 + }, + { + "epoch": 376.8421052631579, + "grad_norm": 1.3414735794067383, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 57280 + }, + { + "epoch": 376.9078947368421, + "grad_norm": 1.2798577547073364, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 57290 + }, + { + "epoch": 376.9736842105263, + "grad_norm": 0.9005674719810486, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 57300 + }, + { + "epoch": 377.0394736842105, + "grad_norm": 1.0160363912582397, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 57310 + }, + { + "epoch": 377.10526315789474, + "grad_norm": 1.1084673404693604, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 57320 + }, + { + "epoch": 377.17105263157896, + "grad_norm": 1.4065401554107666, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 57330 + }, + { + "epoch": 377.2368421052632, + "grad_norm": 1.1100188493728638, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 57340 + }, + { + "epoch": 377.30263157894734, + "grad_norm": 0.9800202250480652, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 57350 + }, + { + "epoch": 377.36842105263156, + "grad_norm": 1.887536644935608, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 57360 + }, + { + "epoch": 377.4342105263158, + "grad_norm": 1.2453606128692627, + "learning_rate": 0.0001, + "loss": 0.0164, + "step": 57370 + }, + { + "epoch": 377.5, + "grad_norm": 1.2225714921951294, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 57380 + }, + { + "epoch": 377.5657894736842, + "grad_norm": 1.2892736196517944, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 57390 + }, + { + "epoch": 377.63157894736844, + "grad_norm": 1.242989420890808, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 57400 + }, + { + "epoch": 377.69736842105266, + "grad_norm": 1.3066517114639282, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 57410 + }, + { + "epoch": 377.7631578947368, + "grad_norm": 1.126461386680603, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 57420 + }, + { + "epoch": 377.82894736842104, + "grad_norm": 1.0878877639770508, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 57430 + }, + { + "epoch": 377.89473684210526, + "grad_norm": 1.2524346113204956, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 57440 + }, + { + "epoch": 377.9605263157895, + "grad_norm": 1.3451015949249268, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 57450 + }, + { + "epoch": 378.0263157894737, + "grad_norm": 1.1526377201080322, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 57460 + }, + { + "epoch": 378.0921052631579, + "grad_norm": 0.9030545353889465, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 57470 + }, + { + "epoch": 378.1578947368421, + "grad_norm": 0.7027451992034912, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 57480 + }, + { + "epoch": 378.2236842105263, + "grad_norm": 0.7617095708847046, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 57490 + }, + { + "epoch": 378.2894736842105, + "grad_norm": 1.2447706460952759, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 57500 + }, + { + "epoch": 378.35526315789474, + "grad_norm": 1.3532993793487549, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 57510 + }, + { + "epoch": 378.42105263157896, + "grad_norm": 1.154651165008545, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 57520 + }, + { + "epoch": 378.4868421052632, + "grad_norm": 1.0880413055419922, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 57530 + }, + { + "epoch": 378.55263157894734, + "grad_norm": 0.9911805987358093, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 57540 + }, + { + "epoch": 378.61842105263156, + "grad_norm": 1.3125237226486206, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 57550 + }, + { + "epoch": 378.6842105263158, + "grad_norm": 1.2095872163772583, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 57560 + }, + { + "epoch": 378.75, + "grad_norm": 1.300068736076355, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 57570 + }, + { + "epoch": 378.8157894736842, + "grad_norm": 0.9355725049972534, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 57580 + }, + { + "epoch": 378.88157894736844, + "grad_norm": 1.0073492527008057, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 57590 + }, + { + "epoch": 378.94736842105266, + "grad_norm": 1.3095647096633911, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 57600 + }, + { + "epoch": 379.0131578947368, + "grad_norm": 1.3785039186477661, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 57610 + }, + { + "epoch": 379.07894736842104, + "grad_norm": 1.2601360082626343, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 57620 + }, + { + "epoch": 379.14473684210526, + "grad_norm": 1.2998868227005005, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 57630 + }, + { + "epoch": 379.2105263157895, + "grad_norm": 1.2391390800476074, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 57640 + }, + { + "epoch": 379.2763157894737, + "grad_norm": 1.320417046546936, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 57650 + }, + { + "epoch": 379.3421052631579, + "grad_norm": 1.2020084857940674, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 57660 + }, + { + "epoch": 379.4078947368421, + "grad_norm": 1.2088592052459717, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 57670 + }, + { + "epoch": 379.4736842105263, + "grad_norm": 0.9938647747039795, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 57680 + }, + { + "epoch": 379.5394736842105, + "grad_norm": 1.057723879814148, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 57690 + }, + { + "epoch": 379.60526315789474, + "grad_norm": 0.9933873414993286, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 57700 + }, + { + "epoch": 379.67105263157896, + "grad_norm": 0.7416632771492004, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 57710 + }, + { + "epoch": 379.7368421052632, + "grad_norm": 1.292233943939209, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 57720 + }, + { + "epoch": 379.80263157894734, + "grad_norm": 1.1518487930297852, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 57730 + }, + { + "epoch": 379.86842105263156, + "grad_norm": 1.1070126295089722, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 57740 + }, + { + "epoch": 379.9342105263158, + "grad_norm": 0.9124812483787537, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 57750 + }, + { + "epoch": 380.0, + "grad_norm": 1.2951626777648926, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 57760 + }, + { + "epoch": 380.0657894736842, + "grad_norm": 1.2988245487213135, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 57770 + }, + { + "epoch": 380.13157894736844, + "grad_norm": 1.5885379314422607, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 57780 + }, + { + "epoch": 380.19736842105266, + "grad_norm": 1.343135118484497, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 57790 + }, + { + "epoch": 380.2631578947368, + "grad_norm": 1.0885659456253052, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 57800 + }, + { + "epoch": 380.32894736842104, + "grad_norm": 1.7573096752166748, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 57810 + }, + { + "epoch": 380.39473684210526, + "grad_norm": 1.4243195056915283, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 57820 + }, + { + "epoch": 380.4605263157895, + "grad_norm": 1.1941052675247192, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 57830 + }, + { + "epoch": 380.5263157894737, + "grad_norm": 1.3269431591033936, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 57840 + }, + { + "epoch": 380.5921052631579, + "grad_norm": 1.0225571393966675, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 57850 + }, + { + "epoch": 380.6578947368421, + "grad_norm": 1.412798523902893, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 57860 + }, + { + "epoch": 380.7236842105263, + "grad_norm": 1.382020354270935, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 57870 + }, + { + "epoch": 380.7894736842105, + "grad_norm": 1.5677051544189453, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 57880 + }, + { + "epoch": 380.85526315789474, + "grad_norm": 1.2945635318756104, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 57890 + }, + { + "epoch": 380.92105263157896, + "grad_norm": 1.3230671882629395, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 57900 + }, + { + "epoch": 380.9868421052632, + "grad_norm": 1.1088383197784424, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 57910 + }, + { + "epoch": 381.05263157894734, + "grad_norm": 1.479811668395996, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 57920 + }, + { + "epoch": 381.11842105263156, + "grad_norm": 1.2625958919525146, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 57930 + }, + { + "epoch": 381.1842105263158, + "grad_norm": 1.2790255546569824, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 57940 + }, + { + "epoch": 381.25, + "grad_norm": 1.589355707168579, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 57950 + }, + { + "epoch": 381.3157894736842, + "grad_norm": 1.3272757530212402, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 57960 + }, + { + "epoch": 381.38157894736844, + "grad_norm": 0.8817294239997864, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 57970 + }, + { + "epoch": 381.44736842105266, + "grad_norm": 1.2385287284851074, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 57980 + }, + { + "epoch": 381.5131578947368, + "grad_norm": 1.324324131011963, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 57990 + }, + { + "epoch": 381.57894736842104, + "grad_norm": 1.4512399435043335, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 58000 + }, + { + "epoch": 381.64473684210526, + "grad_norm": 1.277093529701233, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 58010 + }, + { + "epoch": 381.7105263157895, + "grad_norm": 1.15549635887146, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 58020 + }, + { + "epoch": 381.7763157894737, + "grad_norm": 0.7976158261299133, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 58030 + }, + { + "epoch": 381.8421052631579, + "grad_norm": 1.189330816268921, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 58040 + }, + { + "epoch": 381.9078947368421, + "grad_norm": 1.5687916278839111, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 58050 + }, + { + "epoch": 381.9736842105263, + "grad_norm": 1.33320152759552, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 58060 + }, + { + "epoch": 382.0394736842105, + "grad_norm": 1.0838332176208496, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 58070 + }, + { + "epoch": 382.10526315789474, + "grad_norm": 1.0593252182006836, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 58080 + }, + { + "epoch": 382.17105263157896, + "grad_norm": 1.1587085723876953, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 58090 + }, + { + "epoch": 382.2368421052632, + "grad_norm": 1.2897980213165283, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 58100 + }, + { + "epoch": 382.30263157894734, + "grad_norm": 1.4974095821380615, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 58110 + }, + { + "epoch": 382.36842105263156, + "grad_norm": 1.2067298889160156, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 58120 + }, + { + "epoch": 382.4342105263158, + "grad_norm": 1.151006817817688, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 58130 + }, + { + "epoch": 382.5, + "grad_norm": 0.9617295861244202, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 58140 + }, + { + "epoch": 382.5657894736842, + "grad_norm": 1.2148723602294922, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 58150 + }, + { + "epoch": 382.63157894736844, + "grad_norm": 0.9488323926925659, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 58160 + }, + { + "epoch": 382.69736842105266, + "grad_norm": 1.2400751113891602, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 58170 + }, + { + "epoch": 382.7631578947368, + "grad_norm": 1.123399019241333, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 58180 + }, + { + "epoch": 382.82894736842104, + "grad_norm": 1.2090132236480713, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 58190 + }, + { + "epoch": 382.89473684210526, + "grad_norm": 1.1165274381637573, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 58200 + }, + { + "epoch": 382.9605263157895, + "grad_norm": 1.304680585861206, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 58210 + }, + { + "epoch": 383.0263157894737, + "grad_norm": 1.3738752603530884, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 58220 + }, + { + "epoch": 383.0921052631579, + "grad_norm": 0.8342259526252747, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 58230 + }, + { + "epoch": 383.1578947368421, + "grad_norm": 1.0694853067398071, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 58240 + }, + { + "epoch": 383.2236842105263, + "grad_norm": 1.22238028049469, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 58250 + }, + { + "epoch": 383.2894736842105, + "grad_norm": 0.9267398715019226, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 58260 + }, + { + "epoch": 383.35526315789474, + "grad_norm": 0.9709849953651428, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 58270 + }, + { + "epoch": 383.42105263157896, + "grad_norm": 1.3545488119125366, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 58280 + }, + { + "epoch": 383.4868421052632, + "grad_norm": 1.2838683128356934, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 58290 + }, + { + "epoch": 383.55263157894734, + "grad_norm": 0.764008641242981, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 58300 + }, + { + "epoch": 383.61842105263156, + "grad_norm": 0.821171224117279, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 58310 + }, + { + "epoch": 383.6842105263158, + "grad_norm": 0.8788691759109497, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 58320 + }, + { + "epoch": 383.75, + "grad_norm": 0.9442429542541504, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 58330 + }, + { + "epoch": 383.8157894736842, + "grad_norm": 1.0095479488372803, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 58340 + }, + { + "epoch": 383.88157894736844, + "grad_norm": 1.0548217296600342, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 58350 + }, + { + "epoch": 383.94736842105266, + "grad_norm": 0.9758161902427673, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 58360 + }, + { + "epoch": 384.0131578947368, + "grad_norm": 1.2047322988510132, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 58370 + }, + { + "epoch": 384.07894736842104, + "grad_norm": 0.9442251324653625, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 58380 + }, + { + "epoch": 384.14473684210526, + "grad_norm": 1.4745181798934937, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 58390 + }, + { + "epoch": 384.2105263157895, + "grad_norm": 1.0786917209625244, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 58400 + }, + { + "epoch": 384.2763157894737, + "grad_norm": 0.8698955774307251, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 58410 + }, + { + "epoch": 384.3421052631579, + "grad_norm": 1.3999569416046143, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 58420 + }, + { + "epoch": 384.4078947368421, + "grad_norm": 1.203365445137024, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 58430 + }, + { + "epoch": 384.4736842105263, + "grad_norm": 1.1867121458053589, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 58440 + }, + { + "epoch": 384.5394736842105, + "grad_norm": 1.7259279489517212, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 58450 + }, + { + "epoch": 384.60526315789474, + "grad_norm": 1.2636997699737549, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 58460 + }, + { + "epoch": 384.67105263157896, + "grad_norm": 1.0157842636108398, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 58470 + }, + { + "epoch": 384.7368421052632, + "grad_norm": 1.3222954273223877, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 58480 + }, + { + "epoch": 384.80263157894734, + "grad_norm": 1.118708610534668, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 58490 + }, + { + "epoch": 384.86842105263156, + "grad_norm": 0.8435554504394531, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 58500 + }, + { + "epoch": 384.9342105263158, + "grad_norm": 0.8175836205482483, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 58510 + }, + { + "epoch": 385.0, + "grad_norm": 1.041205883026123, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 58520 + }, + { + "epoch": 385.0657894736842, + "grad_norm": 0.7904459238052368, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 58530 + }, + { + "epoch": 385.13157894736844, + "grad_norm": 1.1305633783340454, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 58540 + }, + { + "epoch": 385.19736842105266, + "grad_norm": 0.7400175333023071, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 58550 + }, + { + "epoch": 385.2631578947368, + "grad_norm": 0.870387077331543, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 58560 + }, + { + "epoch": 385.32894736842104, + "grad_norm": 0.8553933501243591, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 58570 + }, + { + "epoch": 385.39473684210526, + "grad_norm": 0.9809901714324951, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 58580 + }, + { + "epoch": 385.4605263157895, + "grad_norm": 0.8871316313743591, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 58590 + }, + { + "epoch": 385.5263157894737, + "grad_norm": 1.1074786186218262, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 58600 + }, + { + "epoch": 385.5921052631579, + "grad_norm": 0.7304058074951172, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 58610 + }, + { + "epoch": 385.6578947368421, + "grad_norm": 1.168735384941101, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 58620 + }, + { + "epoch": 385.7236842105263, + "grad_norm": 0.8042843341827393, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 58630 + }, + { + "epoch": 385.7894736842105, + "grad_norm": 0.9500011801719666, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 58640 + }, + { + "epoch": 385.85526315789474, + "grad_norm": 0.9175438284873962, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 58650 + }, + { + "epoch": 385.92105263157896, + "grad_norm": 1.302797555923462, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 58660 + }, + { + "epoch": 385.9868421052632, + "grad_norm": 1.2630707025527954, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 58670 + }, + { + "epoch": 386.05263157894734, + "grad_norm": 0.7925125360488892, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 58680 + }, + { + "epoch": 386.11842105263156, + "grad_norm": 0.8696709275245667, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 58690 + }, + { + "epoch": 386.1842105263158, + "grad_norm": 1.196243166923523, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 58700 + }, + { + "epoch": 386.25, + "grad_norm": 1.1323686838150024, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 58710 + }, + { + "epoch": 386.3157894736842, + "grad_norm": 0.9869086742401123, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 58720 + }, + { + "epoch": 386.38157894736844, + "grad_norm": 1.3381530046463013, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 58730 + }, + { + "epoch": 386.44736842105266, + "grad_norm": 1.3545961380004883, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 58740 + }, + { + "epoch": 386.5131578947368, + "grad_norm": 1.1433833837509155, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 58750 + }, + { + "epoch": 386.57894736842104, + "grad_norm": 1.3282660245895386, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 58760 + }, + { + "epoch": 386.64473684210526, + "grad_norm": 1.518915057182312, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 58770 + }, + { + "epoch": 386.7105263157895, + "grad_norm": 1.1334174871444702, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 58780 + }, + { + "epoch": 386.7763157894737, + "grad_norm": 1.2663590908050537, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 58790 + }, + { + "epoch": 386.8421052631579, + "grad_norm": 1.148506999015808, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 58800 + }, + { + "epoch": 386.9078947368421, + "grad_norm": 1.2444261312484741, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 58810 + }, + { + "epoch": 386.9736842105263, + "grad_norm": 1.1038265228271484, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 58820 + }, + { + "epoch": 387.0394736842105, + "grad_norm": 0.9502344131469727, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 58830 + }, + { + "epoch": 387.10526315789474, + "grad_norm": 1.0720306634902954, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 58840 + }, + { + "epoch": 387.17105263157896, + "grad_norm": 1.243300199508667, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 58850 + }, + { + "epoch": 387.2368421052632, + "grad_norm": 0.916761577129364, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 58860 + }, + { + "epoch": 387.30263157894734, + "grad_norm": 1.0160788297653198, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 58870 + }, + { + "epoch": 387.36842105263156, + "grad_norm": 1.1823748350143433, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 58880 + }, + { + "epoch": 387.4342105263158, + "grad_norm": 1.0291345119476318, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 58890 + }, + { + "epoch": 387.5, + "grad_norm": 1.0827511548995972, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 58900 + }, + { + "epoch": 387.5657894736842, + "grad_norm": 1.3324905633926392, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 58910 + }, + { + "epoch": 387.63157894736844, + "grad_norm": 1.159676432609558, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 58920 + }, + { + "epoch": 387.69736842105266, + "grad_norm": 1.6756174564361572, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 58930 + }, + { + "epoch": 387.7631578947368, + "grad_norm": 1.63514244556427, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 58940 + }, + { + "epoch": 387.82894736842104, + "grad_norm": 1.3889161348342896, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 58950 + }, + { + "epoch": 387.89473684210526, + "grad_norm": 1.27745521068573, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 58960 + }, + { + "epoch": 387.9605263157895, + "grad_norm": 1.3350027799606323, + "learning_rate": 0.0001, + "loss": 0.017, + "step": 58970 + }, + { + "epoch": 388.0263157894737, + "grad_norm": 1.5261503458023071, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 58980 + }, + { + "epoch": 388.0921052631579, + "grad_norm": 1.6746344566345215, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 58990 + }, + { + "epoch": 388.1578947368421, + "grad_norm": 1.0707603693008423, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 59000 + }, + { + "epoch": 388.2236842105263, + "grad_norm": 1.0903775691986084, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 59010 + }, + { + "epoch": 388.2894736842105, + "grad_norm": 1.2945852279663086, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 59020 + }, + { + "epoch": 388.35526315789474, + "grad_norm": 0.8267077803611755, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 59030 + }, + { + "epoch": 388.42105263157896, + "grad_norm": 1.4991341829299927, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 59040 + }, + { + "epoch": 388.4868421052632, + "grad_norm": 1.3253583908081055, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 59050 + }, + { + "epoch": 388.55263157894734, + "grad_norm": 0.9886115789413452, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 59060 + }, + { + "epoch": 388.61842105263156, + "grad_norm": 1.1212751865386963, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 59070 + }, + { + "epoch": 388.6842105263158, + "grad_norm": 0.9368122220039368, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 59080 + }, + { + "epoch": 388.75, + "grad_norm": 1.0069820880889893, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 59090 + }, + { + "epoch": 388.8157894736842, + "grad_norm": 1.3330034017562866, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 59100 + }, + { + "epoch": 388.88157894736844, + "grad_norm": 1.5406800508499146, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 59110 + }, + { + "epoch": 388.94736842105266, + "grad_norm": 1.2882165908813477, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 59120 + }, + { + "epoch": 389.0131578947368, + "grad_norm": 1.138337254524231, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 59130 + }, + { + "epoch": 389.07894736842104, + "grad_norm": 1.0221810340881348, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 59140 + }, + { + "epoch": 389.14473684210526, + "grad_norm": 1.281477928161621, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 59150 + }, + { + "epoch": 389.2105263157895, + "grad_norm": 1.1402971744537354, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 59160 + }, + { + "epoch": 389.2763157894737, + "grad_norm": 0.914799153804779, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 59170 + }, + { + "epoch": 389.3421052631579, + "grad_norm": 1.0505526065826416, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 59180 + }, + { + "epoch": 389.4078947368421, + "grad_norm": 1.2654054164886475, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 59190 + }, + { + "epoch": 389.4736842105263, + "grad_norm": 1.0078588724136353, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 59200 + }, + { + "epoch": 389.5394736842105, + "grad_norm": 0.7537126541137695, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 59210 + }, + { + "epoch": 389.60526315789474, + "grad_norm": 1.1669580936431885, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 59220 + }, + { + "epoch": 389.67105263157896, + "grad_norm": 1.2333866357803345, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 59230 + }, + { + "epoch": 389.7368421052632, + "grad_norm": 1.1542506217956543, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 59240 + }, + { + "epoch": 389.80263157894734, + "grad_norm": 1.2886197566986084, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 59250 + }, + { + "epoch": 389.86842105263156, + "grad_norm": 1.0714430809020996, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 59260 + }, + { + "epoch": 389.9342105263158, + "grad_norm": 0.9094032049179077, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 59270 + }, + { + "epoch": 390.0, + "grad_norm": 1.1040136814117432, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 59280 + }, + { + "epoch": 390.0657894736842, + "grad_norm": 0.876976490020752, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 59290 + }, + { + "epoch": 390.13157894736844, + "grad_norm": 0.86655193567276, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 59300 + }, + { + "epoch": 390.19736842105266, + "grad_norm": 1.018251657485962, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 59310 + }, + { + "epoch": 390.2631578947368, + "grad_norm": 1.1418795585632324, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 59320 + }, + { + "epoch": 390.32894736842104, + "grad_norm": 1.039706826210022, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 59330 + }, + { + "epoch": 390.39473684210526, + "grad_norm": 1.0030986070632935, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 59340 + }, + { + "epoch": 390.4605263157895, + "grad_norm": 1.1898283958435059, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 59350 + }, + { + "epoch": 390.5263157894737, + "grad_norm": 0.8979069590568542, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 59360 + }, + { + "epoch": 390.5921052631579, + "grad_norm": 1.1258528232574463, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 59370 + }, + { + "epoch": 390.6578947368421, + "grad_norm": 1.3766101598739624, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 59380 + }, + { + "epoch": 390.7236842105263, + "grad_norm": 1.3407342433929443, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 59390 + }, + { + "epoch": 390.7894736842105, + "grad_norm": 0.9154465198516846, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 59400 + }, + { + "epoch": 390.85526315789474, + "grad_norm": 1.2275203466415405, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 59410 + }, + { + "epoch": 390.92105263157896, + "grad_norm": 1.105592131614685, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 59420 + }, + { + "epoch": 390.9868421052632, + "grad_norm": 1.2495988607406616, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 59430 + }, + { + "epoch": 391.05263157894734, + "grad_norm": 1.3405605554580688, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 59440 + }, + { + "epoch": 391.11842105263156, + "grad_norm": 1.2704845666885376, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 59450 + }, + { + "epoch": 391.1842105263158, + "grad_norm": 1.029582142829895, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 59460 + }, + { + "epoch": 391.25, + "grad_norm": 0.8142033219337463, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 59470 + }, + { + "epoch": 391.3157894736842, + "grad_norm": 1.6257590055465698, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 59480 + }, + { + "epoch": 391.38157894736844, + "grad_norm": 1.2035874128341675, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 59490 + }, + { + "epoch": 391.44736842105266, + "grad_norm": 0.7711625099182129, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 59500 + }, + { + "epoch": 391.5131578947368, + "grad_norm": 1.076262354850769, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 59510 + }, + { + "epoch": 391.57894736842104, + "grad_norm": 1.2418280839920044, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 59520 + }, + { + "epoch": 391.64473684210526, + "grad_norm": 1.1036405563354492, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 59530 + }, + { + "epoch": 391.7105263157895, + "grad_norm": 1.4087510108947754, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 59540 + }, + { + "epoch": 391.7763157894737, + "grad_norm": 0.9523179531097412, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 59550 + }, + { + "epoch": 391.8421052631579, + "grad_norm": 1.2418298721313477, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 59560 + }, + { + "epoch": 391.9078947368421, + "grad_norm": 1.00105881690979, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 59570 + }, + { + "epoch": 391.9736842105263, + "grad_norm": 1.2458897829055786, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 59580 + }, + { + "epoch": 392.0394736842105, + "grad_norm": 1.1352124214172363, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 59590 + }, + { + "epoch": 392.10526315789474, + "grad_norm": 1.072657823562622, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 59600 + }, + { + "epoch": 392.17105263157896, + "grad_norm": 1.4077866077423096, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 59610 + }, + { + "epoch": 392.2368421052632, + "grad_norm": 0.9759162664413452, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 59620 + }, + { + "epoch": 392.30263157894734, + "grad_norm": 1.0127092599868774, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 59630 + }, + { + "epoch": 392.36842105263156, + "grad_norm": 1.2567495107650757, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 59640 + }, + { + "epoch": 392.4342105263158, + "grad_norm": 1.346420168876648, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 59650 + }, + { + "epoch": 392.5, + "grad_norm": 1.125648856163025, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 59660 + }, + { + "epoch": 392.5657894736842, + "grad_norm": 1.004570484161377, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 59670 + }, + { + "epoch": 392.63157894736844, + "grad_norm": 1.5204274654388428, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 59680 + }, + { + "epoch": 392.69736842105266, + "grad_norm": 1.4187020063400269, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 59690 + }, + { + "epoch": 392.7631578947368, + "grad_norm": 1.4313414096832275, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 59700 + }, + { + "epoch": 392.82894736842104, + "grad_norm": 1.398133635520935, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 59710 + }, + { + "epoch": 392.89473684210526, + "grad_norm": 1.279089093208313, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 59720 + }, + { + "epoch": 392.9605263157895, + "grad_norm": 1.511715292930603, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 59730 + }, + { + "epoch": 393.0263157894737, + "grad_norm": 1.0526278018951416, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 59740 + }, + { + "epoch": 393.0921052631579, + "grad_norm": 1.0821075439453125, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 59750 + }, + { + "epoch": 393.1578947368421, + "grad_norm": 1.2545526027679443, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 59760 + }, + { + "epoch": 393.2236842105263, + "grad_norm": 0.8105134963989258, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 59770 + }, + { + "epoch": 393.2894736842105, + "grad_norm": 1.4059394598007202, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 59780 + }, + { + "epoch": 393.35526315789474, + "grad_norm": 1.157401204109192, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 59790 + }, + { + "epoch": 393.42105263157896, + "grad_norm": 0.9264048337936401, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 59800 + }, + { + "epoch": 393.4868421052632, + "grad_norm": 0.9000381827354431, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 59810 + }, + { + "epoch": 393.55263157894734, + "grad_norm": 1.0139583349227905, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 59820 + }, + { + "epoch": 393.61842105263156, + "grad_norm": 0.9514049887657166, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 59830 + }, + { + "epoch": 393.6842105263158, + "grad_norm": 1.2830848693847656, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 59840 + }, + { + "epoch": 393.75, + "grad_norm": 0.9854030013084412, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 59850 + }, + { + "epoch": 393.8157894736842, + "grad_norm": 1.196966290473938, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 59860 + }, + { + "epoch": 393.88157894736844, + "grad_norm": 1.024109125137329, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 59870 + }, + { + "epoch": 393.94736842105266, + "grad_norm": 1.1376116275787354, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 59880 + }, + { + "epoch": 394.0131578947368, + "grad_norm": 1.670544147491455, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 59890 + }, + { + "epoch": 394.07894736842104, + "grad_norm": 1.2976588010787964, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 59900 + }, + { + "epoch": 394.14473684210526, + "grad_norm": 1.2100739479064941, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 59910 + }, + { + "epoch": 394.2105263157895, + "grad_norm": 1.5674223899841309, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 59920 + }, + { + "epoch": 394.2763157894737, + "grad_norm": 1.4684234857559204, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 59930 + }, + { + "epoch": 394.3421052631579, + "grad_norm": 1.1361076831817627, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 59940 + }, + { + "epoch": 394.4078947368421, + "grad_norm": 1.0538053512573242, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 59950 + }, + { + "epoch": 394.4736842105263, + "grad_norm": 1.3147025108337402, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 59960 + }, + { + "epoch": 394.5394736842105, + "grad_norm": 1.4038904905319214, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 59970 + }, + { + "epoch": 394.60526315789474, + "grad_norm": 1.126792073249817, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 59980 + }, + { + "epoch": 394.67105263157896, + "grad_norm": 1.2345644235610962, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 59990 + }, + { + "epoch": 394.7368421052632, + "grad_norm": 1.2304173707962036, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 60000 + }, + { + "epoch": 394.80263157894734, + "grad_norm": 1.4393924474716187, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 60010 + }, + { + "epoch": 394.86842105263156, + "grad_norm": 1.3260493278503418, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 60020 + }, + { + "epoch": 394.9342105263158, + "grad_norm": 1.1898632049560547, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 60030 + }, + { + "epoch": 395.0, + "grad_norm": 1.163697361946106, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 60040 + }, + { + "epoch": 395.0657894736842, + "grad_norm": 1.0178470611572266, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 60050 + }, + { + "epoch": 395.13157894736844, + "grad_norm": 1.5706970691680908, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 60060 + }, + { + "epoch": 395.19736842105266, + "grad_norm": 1.2328516244888306, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 60070 + }, + { + "epoch": 395.2631578947368, + "grad_norm": 1.3404053449630737, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 60080 + }, + { + "epoch": 395.32894736842104, + "grad_norm": 1.3610615730285645, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 60090 + }, + { + "epoch": 395.39473684210526, + "grad_norm": 1.486354947090149, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 60100 + }, + { + "epoch": 395.4605263157895, + "grad_norm": 1.392723560333252, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 60110 + }, + { + "epoch": 395.5263157894737, + "grad_norm": 1.290682077407837, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 60120 + }, + { + "epoch": 395.5921052631579, + "grad_norm": 0.9399741291999817, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 60130 + }, + { + "epoch": 395.6578947368421, + "grad_norm": 1.4426350593566895, + "learning_rate": 0.0001, + "loss": 0.0165, + "step": 60140 + }, + { + "epoch": 395.7236842105263, + "grad_norm": 1.2685151100158691, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 60150 + }, + { + "epoch": 395.7894736842105, + "grad_norm": 0.9836694002151489, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 60160 + }, + { + "epoch": 395.85526315789474, + "grad_norm": 1.0677541494369507, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 60170 + }, + { + "epoch": 395.92105263157896, + "grad_norm": 1.1794040203094482, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 60180 + }, + { + "epoch": 395.9868421052632, + "grad_norm": 1.7301592826843262, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 60190 + }, + { + "epoch": 396.05263157894734, + "grad_norm": 1.1919747591018677, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 60200 + }, + { + "epoch": 396.11842105263156, + "grad_norm": 1.3119267225265503, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 60210 + }, + { + "epoch": 396.1842105263158, + "grad_norm": 1.2656959295272827, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 60220 + }, + { + "epoch": 396.25, + "grad_norm": 1.2613000869750977, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 60230 + }, + { + "epoch": 396.3157894736842, + "grad_norm": 1.25479257106781, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 60240 + }, + { + "epoch": 396.38157894736844, + "grad_norm": 1.2844350337982178, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 60250 + }, + { + "epoch": 396.44736842105266, + "grad_norm": 1.0911136865615845, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 60260 + }, + { + "epoch": 396.5131578947368, + "grad_norm": 1.0953329801559448, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 60270 + }, + { + "epoch": 396.57894736842104, + "grad_norm": 1.4672765731811523, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 60280 + }, + { + "epoch": 396.64473684210526, + "grad_norm": 1.0957013368606567, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 60290 + }, + { + "epoch": 396.7105263157895, + "grad_norm": 1.128840684890747, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 60300 + }, + { + "epoch": 396.7763157894737, + "grad_norm": 0.9832814931869507, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 60310 + }, + { + "epoch": 396.8421052631579, + "grad_norm": 1.023591160774231, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 60320 + }, + { + "epoch": 396.9078947368421, + "grad_norm": 0.9705013036727905, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 60330 + }, + { + "epoch": 396.9736842105263, + "grad_norm": 1.1543126106262207, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 60340 + }, + { + "epoch": 397.0394736842105, + "grad_norm": 0.7225308418273926, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 60350 + }, + { + "epoch": 397.10526315789474, + "grad_norm": 1.5401467084884644, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 60360 + }, + { + "epoch": 397.17105263157896, + "grad_norm": 1.3008463382720947, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 60370 + }, + { + "epoch": 397.2368421052632, + "grad_norm": 0.7832499742507935, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 60380 + }, + { + "epoch": 397.30263157894734, + "grad_norm": 0.9722589254379272, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 60390 + }, + { + "epoch": 397.36842105263156, + "grad_norm": 0.9584972858428955, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 60400 + }, + { + "epoch": 397.4342105263158, + "grad_norm": 1.1281046867370605, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 60410 + }, + { + "epoch": 397.5, + "grad_norm": 0.9634712338447571, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 60420 + }, + { + "epoch": 397.5657894736842, + "grad_norm": 0.8092069029808044, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 60430 + }, + { + "epoch": 397.63157894736844, + "grad_norm": 0.941852331161499, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 60440 + }, + { + "epoch": 397.69736842105266, + "grad_norm": 1.1537492275238037, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 60450 + }, + { + "epoch": 397.7631578947368, + "grad_norm": 1.2303005456924438, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 60460 + }, + { + "epoch": 397.82894736842104, + "grad_norm": 0.761372983455658, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 60470 + }, + { + "epoch": 397.89473684210526, + "grad_norm": 0.8683788180351257, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 60480 + }, + { + "epoch": 397.9605263157895, + "grad_norm": 0.7769721150398254, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 60490 + }, + { + "epoch": 398.0263157894737, + "grad_norm": 0.8893076777458191, + "learning_rate": 0.0001, + "loss": 0.0171, + "step": 60500 + }, + { + "epoch": 398.0921052631579, + "grad_norm": 1.5258387327194214, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 60510 + }, + { + "epoch": 398.1578947368421, + "grad_norm": 1.3462433815002441, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 60520 + }, + { + "epoch": 398.2236842105263, + "grad_norm": 0.9892566800117493, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 60530 + }, + { + "epoch": 398.2894736842105, + "grad_norm": 1.024162769317627, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 60540 + }, + { + "epoch": 398.35526315789474, + "grad_norm": 0.9343032240867615, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 60550 + }, + { + "epoch": 398.42105263157896, + "grad_norm": 1.1336026191711426, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 60560 + }, + { + "epoch": 398.4868421052632, + "grad_norm": 1.1892248392105103, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 60570 + }, + { + "epoch": 398.55263157894734, + "grad_norm": 1.184756875038147, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 60580 + }, + { + "epoch": 398.61842105263156, + "grad_norm": 1.1197586059570312, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 60590 + }, + { + "epoch": 398.6842105263158, + "grad_norm": 1.4261908531188965, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 60600 + }, + { + "epoch": 398.75, + "grad_norm": 1.3351398706436157, + "learning_rate": 0.0001, + "loss": 0.0172, + "step": 60610 + }, + { + "epoch": 398.8157894736842, + "grad_norm": 1.4747203588485718, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 60620 + }, + { + "epoch": 398.88157894736844, + "grad_norm": 1.32119882106781, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 60630 + }, + { + "epoch": 398.94736842105266, + "grad_norm": 1.3960144519805908, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 60640 + }, + { + "epoch": 399.0131578947368, + "grad_norm": 1.1450289487838745, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 60650 + }, + { + "epoch": 399.07894736842104, + "grad_norm": 1.1878561973571777, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 60660 + }, + { + "epoch": 399.14473684210526, + "grad_norm": 1.0264465808868408, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 60670 + }, + { + "epoch": 399.2105263157895, + "grad_norm": 1.1604735851287842, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 60680 + }, + { + "epoch": 399.2763157894737, + "grad_norm": 1.373092532157898, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 60690 + }, + { + "epoch": 399.3421052631579, + "grad_norm": 1.260588526725769, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 60700 + }, + { + "epoch": 399.4078947368421, + "grad_norm": 1.2193946838378906, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 60710 + }, + { + "epoch": 399.4736842105263, + "grad_norm": 1.2826273441314697, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 60720 + }, + { + "epoch": 399.5394736842105, + "grad_norm": 0.9738739132881165, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 60730 + }, + { + "epoch": 399.60526315789474, + "grad_norm": 1.2081356048583984, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 60740 + }, + { + "epoch": 399.67105263157896, + "grad_norm": 1.198371171951294, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 60750 + }, + { + "epoch": 399.7368421052632, + "grad_norm": 1.064581274986267, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 60760 + }, + { + "epoch": 399.80263157894734, + "grad_norm": 0.9279325604438782, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 60770 + }, + { + "epoch": 399.86842105263156, + "grad_norm": 0.7750979661941528, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 60780 + }, + { + "epoch": 399.9342105263158, + "grad_norm": 0.9537990689277649, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 60790 + }, + { + "epoch": 400.0, + "grad_norm": 1.227004885673523, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 60800 + }, + { + "epoch": 400.0657894736842, + "grad_norm": 0.8979701399803162, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 60810 + }, + { + "epoch": 400.13157894736844, + "grad_norm": 1.022169828414917, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 60820 + }, + { + "epoch": 400.19736842105266, + "grad_norm": 1.129315733909607, + "learning_rate": 0.0001, + "loss": 0.0156, + "step": 60830 + }, + { + "epoch": 400.2631578947368, + "grad_norm": 1.0634799003601074, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 60840 + }, + { + "epoch": 400.32894736842104, + "grad_norm": 1.2709938287734985, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 60850 + }, + { + "epoch": 400.39473684210526, + "grad_norm": 1.0252529382705688, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 60860 + }, + { + "epoch": 400.4605263157895, + "grad_norm": 1.4631614685058594, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 60870 + }, + { + "epoch": 400.5263157894737, + "grad_norm": 1.0839378833770752, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 60880 + }, + { + "epoch": 400.5921052631579, + "grad_norm": 0.9850987792015076, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 60890 + }, + { + "epoch": 400.6578947368421, + "grad_norm": 1.6288450956344604, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 60900 + }, + { + "epoch": 400.7236842105263, + "grad_norm": 1.1877349615097046, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 60910 + }, + { + "epoch": 400.7894736842105, + "grad_norm": 1.2342051267623901, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 60920 + }, + { + "epoch": 400.85526315789474, + "grad_norm": 1.1505542993545532, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 60930 + }, + { + "epoch": 400.92105263157896, + "grad_norm": 0.6840497851371765, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 60940 + }, + { + "epoch": 400.9868421052632, + "grad_norm": 1.1132681369781494, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 60950 + }, + { + "epoch": 401.05263157894734, + "grad_norm": 0.9815819263458252, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 60960 + }, + { + "epoch": 401.11842105263156, + "grad_norm": 1.063504695892334, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 60970 + }, + { + "epoch": 401.1842105263158, + "grad_norm": 0.8839133977890015, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 60980 + }, + { + "epoch": 401.25, + "grad_norm": 1.3246194124221802, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 60990 + }, + { + "epoch": 401.3157894736842, + "grad_norm": 1.150536060333252, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 61000 + }, + { + "epoch": 401.38157894736844, + "grad_norm": 1.1703293323516846, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 61010 + }, + { + "epoch": 401.44736842105266, + "grad_norm": 1.1966885328292847, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 61020 + }, + { + "epoch": 401.5131578947368, + "grad_norm": 1.2253130674362183, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 61030 + }, + { + "epoch": 401.57894736842104, + "grad_norm": 0.6977288126945496, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 61040 + }, + { + "epoch": 401.64473684210526, + "grad_norm": 1.388411045074463, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 61050 + }, + { + "epoch": 401.7105263157895, + "grad_norm": 1.45159113407135, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 61060 + }, + { + "epoch": 401.7763157894737, + "grad_norm": 1.221388578414917, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 61070 + }, + { + "epoch": 401.8421052631579, + "grad_norm": 1.2289520502090454, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 61080 + }, + { + "epoch": 401.9078947368421, + "grad_norm": 1.2441534996032715, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 61090 + }, + { + "epoch": 401.9736842105263, + "grad_norm": 1.4623695611953735, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 61100 + }, + { + "epoch": 402.0394736842105, + "grad_norm": 1.220936894416809, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 61110 + }, + { + "epoch": 402.10526315789474, + "grad_norm": 1.2815364599227905, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 61120 + }, + { + "epoch": 402.17105263157896, + "grad_norm": 1.2547277212142944, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 61130 + }, + { + "epoch": 402.2368421052632, + "grad_norm": 0.9268215894699097, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 61140 + }, + { + "epoch": 402.30263157894734, + "grad_norm": 1.4284296035766602, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 61150 + }, + { + "epoch": 402.36842105263156, + "grad_norm": 1.183083415031433, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 61160 + }, + { + "epoch": 402.4342105263158, + "grad_norm": 1.3317415714263916, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 61170 + }, + { + "epoch": 402.5, + "grad_norm": 0.9961628317832947, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 61180 + }, + { + "epoch": 402.5657894736842, + "grad_norm": 1.011443018913269, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 61190 + }, + { + "epoch": 402.63157894736844, + "grad_norm": 1.214849829673767, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 61200 + }, + { + "epoch": 402.69736842105266, + "grad_norm": 1.2691972255706787, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 61210 + }, + { + "epoch": 402.7631578947368, + "grad_norm": 0.8713511228561401, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 61220 + }, + { + "epoch": 402.82894736842104, + "grad_norm": 1.2095887660980225, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 61230 + }, + { + "epoch": 402.89473684210526, + "grad_norm": 1.3552407026290894, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 61240 + }, + { + "epoch": 402.9605263157895, + "grad_norm": 1.3354055881500244, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 61250 + }, + { + "epoch": 403.0263157894737, + "grad_norm": 1.0809153318405151, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 61260 + }, + { + "epoch": 403.0921052631579, + "grad_norm": 0.9687100052833557, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 61270 + }, + { + "epoch": 403.1578947368421, + "grad_norm": 1.0305453538894653, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 61280 + }, + { + "epoch": 403.2236842105263, + "grad_norm": 1.0682860612869263, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 61290 + }, + { + "epoch": 403.2894736842105, + "grad_norm": 1.3021142482757568, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 61300 + }, + { + "epoch": 403.35526315789474, + "grad_norm": 1.2152711153030396, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 61310 + }, + { + "epoch": 403.42105263157896, + "grad_norm": 1.1612188816070557, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 61320 + }, + { + "epoch": 403.4868421052632, + "grad_norm": 1.2532055377960205, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 61330 + }, + { + "epoch": 403.55263157894734, + "grad_norm": 1.164185881614685, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 61340 + }, + { + "epoch": 403.61842105263156, + "grad_norm": 1.5072556734085083, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 61350 + }, + { + "epoch": 403.6842105263158, + "grad_norm": 1.4515098333358765, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 61360 + }, + { + "epoch": 403.75, + "grad_norm": 1.3423659801483154, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 61370 + }, + { + "epoch": 403.8157894736842, + "grad_norm": 1.5325146913528442, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 61380 + }, + { + "epoch": 403.88157894736844, + "grad_norm": 1.323291301727295, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 61390 + }, + { + "epoch": 403.94736842105266, + "grad_norm": 1.3427553176879883, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 61400 + }, + { + "epoch": 404.0131578947368, + "grad_norm": 1.0898405313491821, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 61410 + }, + { + "epoch": 404.07894736842104, + "grad_norm": 1.0834609270095825, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 61420 + }, + { + "epoch": 404.14473684210526, + "grad_norm": 1.5015757083892822, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 61430 + }, + { + "epoch": 404.2105263157895, + "grad_norm": 1.4444079399108887, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 61440 + }, + { + "epoch": 404.2763157894737, + "grad_norm": 1.0211026668548584, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 61450 + }, + { + "epoch": 404.3421052631579, + "grad_norm": 1.4435980319976807, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 61460 + }, + { + "epoch": 404.4078947368421, + "grad_norm": 0.9525414109230042, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 61470 + }, + { + "epoch": 404.4736842105263, + "grad_norm": 1.289888620376587, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 61480 + }, + { + "epoch": 404.5394736842105, + "grad_norm": 1.652472734451294, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 61490 + }, + { + "epoch": 404.60526315789474, + "grad_norm": 1.2742666006088257, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 61500 + }, + { + "epoch": 404.67105263157896, + "grad_norm": 0.8357386589050293, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 61510 + }, + { + "epoch": 404.7368421052632, + "grad_norm": 0.9393722414970398, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 61520 + }, + { + "epoch": 404.80263157894734, + "grad_norm": 1.255674123764038, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 61530 + }, + { + "epoch": 404.86842105263156, + "grad_norm": 1.0511587858200073, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 61540 + }, + { + "epoch": 404.9342105263158, + "grad_norm": 0.9492988586425781, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 61550 + }, + { + "epoch": 405.0, + "grad_norm": 1.1605418920516968, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 61560 + }, + { + "epoch": 405.0657894736842, + "grad_norm": 1.098800778388977, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 61570 + }, + { + "epoch": 405.13157894736844, + "grad_norm": 1.138261079788208, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 61580 + }, + { + "epoch": 405.19736842105266, + "grad_norm": 1.2927042245864868, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 61590 + }, + { + "epoch": 405.2631578947368, + "grad_norm": 1.1841543912887573, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 61600 + }, + { + "epoch": 405.32894736842104, + "grad_norm": 1.0832710266113281, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 61610 + }, + { + "epoch": 405.39473684210526, + "grad_norm": 1.35568106174469, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 61620 + }, + { + "epoch": 405.4605263157895, + "grad_norm": 1.0459115505218506, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 61630 + }, + { + "epoch": 405.5263157894737, + "grad_norm": 1.2705081701278687, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 61640 + }, + { + "epoch": 405.5921052631579, + "grad_norm": 0.9033043384552002, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 61650 + }, + { + "epoch": 405.6578947368421, + "grad_norm": 1.2756457328796387, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 61660 + }, + { + "epoch": 405.7236842105263, + "grad_norm": 1.5137383937835693, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 61670 + }, + { + "epoch": 405.7894736842105, + "grad_norm": 0.9483539462089539, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 61680 + }, + { + "epoch": 405.85526315789474, + "grad_norm": 1.1545567512512207, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 61690 + }, + { + "epoch": 405.92105263157896, + "grad_norm": 0.8387587666511536, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 61700 + }, + { + "epoch": 405.9868421052632, + "grad_norm": 0.9866654276847839, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 61710 + }, + { + "epoch": 406.05263157894734, + "grad_norm": 0.9306283593177795, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 61720 + }, + { + "epoch": 406.11842105263156, + "grad_norm": 1.089195966720581, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 61730 + }, + { + "epoch": 406.1842105263158, + "grad_norm": 1.0379259586334229, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 61740 + }, + { + "epoch": 406.25, + "grad_norm": 0.8618190288543701, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 61750 + }, + { + "epoch": 406.3157894736842, + "grad_norm": 1.3050987720489502, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 61760 + }, + { + "epoch": 406.38157894736844, + "grad_norm": 1.1287416219711304, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 61770 + }, + { + "epoch": 406.44736842105266, + "grad_norm": 0.9702133536338806, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 61780 + }, + { + "epoch": 406.5131578947368, + "grad_norm": 0.9845720529556274, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 61790 + }, + { + "epoch": 406.57894736842104, + "grad_norm": 1.2057915925979614, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 61800 + }, + { + "epoch": 406.64473684210526, + "grad_norm": 1.1550894975662231, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 61810 + }, + { + "epoch": 406.7105263157895, + "grad_norm": 1.2562119960784912, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 61820 + }, + { + "epoch": 406.7763157894737, + "grad_norm": 1.1351388692855835, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 61830 + }, + { + "epoch": 406.8421052631579, + "grad_norm": 1.1127445697784424, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 61840 + }, + { + "epoch": 406.9078947368421, + "grad_norm": 1.1058247089385986, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 61850 + }, + { + "epoch": 406.9736842105263, + "grad_norm": 0.9056158661842346, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 61860 + }, + { + "epoch": 407.0394736842105, + "grad_norm": 1.2106934785842896, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 61870 + }, + { + "epoch": 407.10526315789474, + "grad_norm": 1.2813785076141357, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 61880 + }, + { + "epoch": 407.17105263157896, + "grad_norm": 1.3114577531814575, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 61890 + }, + { + "epoch": 407.2368421052632, + "grad_norm": 1.1716445684432983, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 61900 + }, + { + "epoch": 407.30263157894734, + "grad_norm": 1.2091526985168457, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 61910 + }, + { + "epoch": 407.36842105263156, + "grad_norm": 0.9812834858894348, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 61920 + }, + { + "epoch": 407.4342105263158, + "grad_norm": 0.9171328544616699, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 61930 + }, + { + "epoch": 407.5, + "grad_norm": 0.8399735689163208, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 61940 + }, + { + "epoch": 407.5657894736842, + "grad_norm": 1.0794377326965332, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 61950 + }, + { + "epoch": 407.63157894736844, + "grad_norm": 1.1794744729995728, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 61960 + }, + { + "epoch": 407.69736842105266, + "grad_norm": 1.2339738607406616, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 61970 + }, + { + "epoch": 407.7631578947368, + "grad_norm": 1.1554800271987915, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 61980 + }, + { + "epoch": 407.82894736842104, + "grad_norm": 0.769442081451416, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 61990 + }, + { + "epoch": 407.89473684210526, + "grad_norm": 0.9051490426063538, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 62000 + }, + { + "epoch": 407.9605263157895, + "grad_norm": 0.8999045491218567, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 62010 + }, + { + "epoch": 408.0263157894737, + "grad_norm": 1.5016242265701294, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 62020 + }, + { + "epoch": 408.0921052631579, + "grad_norm": 1.4286948442459106, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 62030 + }, + { + "epoch": 408.1578947368421, + "grad_norm": 1.1384950876235962, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 62040 + }, + { + "epoch": 408.2236842105263, + "grad_norm": 1.6049597263336182, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 62050 + }, + { + "epoch": 408.2894736842105, + "grad_norm": 1.4698026180267334, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 62060 + }, + { + "epoch": 408.35526315789474, + "grad_norm": 0.9157135486602783, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 62070 + }, + { + "epoch": 408.42105263157896, + "grad_norm": 1.380117654800415, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 62080 + }, + { + "epoch": 408.4868421052632, + "grad_norm": 1.374470829963684, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 62090 + }, + { + "epoch": 408.55263157894734, + "grad_norm": 1.056437611579895, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 62100 + }, + { + "epoch": 408.61842105263156, + "grad_norm": 1.3114112615585327, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 62110 + }, + { + "epoch": 408.6842105263158, + "grad_norm": 1.187699794769287, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 62120 + }, + { + "epoch": 408.75, + "grad_norm": 1.2368489503860474, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 62130 + }, + { + "epoch": 408.8157894736842, + "grad_norm": 1.251200556755066, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 62140 + }, + { + "epoch": 408.88157894736844, + "grad_norm": 1.1192817687988281, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 62150 + }, + { + "epoch": 408.94736842105266, + "grad_norm": 1.289333701133728, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 62160 + }, + { + "epoch": 409.0131578947368, + "grad_norm": 1.1822633743286133, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 62170 + }, + { + "epoch": 409.07894736842104, + "grad_norm": 0.9145843982696533, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 62180 + }, + { + "epoch": 409.14473684210526, + "grad_norm": 1.0487996339797974, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 62190 + }, + { + "epoch": 409.2105263157895, + "grad_norm": 1.1056833267211914, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 62200 + }, + { + "epoch": 409.2763157894737, + "grad_norm": 1.2669976949691772, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 62210 + }, + { + "epoch": 409.3421052631579, + "grad_norm": 0.501373827457428, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 62220 + }, + { + "epoch": 409.4078947368421, + "grad_norm": 1.0608346462249756, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 62230 + }, + { + "epoch": 409.4736842105263, + "grad_norm": 1.1155251264572144, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 62240 + }, + { + "epoch": 409.5394736842105, + "grad_norm": 1.1546201705932617, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 62250 + }, + { + "epoch": 409.60526315789474, + "grad_norm": 1.0794380903244019, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 62260 + }, + { + "epoch": 409.67105263157896, + "grad_norm": 1.337540626525879, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 62270 + }, + { + "epoch": 409.7368421052632, + "grad_norm": 1.1186962127685547, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 62280 + }, + { + "epoch": 409.80263157894734, + "grad_norm": 1.0747431516647339, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 62290 + }, + { + "epoch": 409.86842105263156, + "grad_norm": 1.2456459999084473, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 62300 + }, + { + "epoch": 409.9342105263158, + "grad_norm": 1.2872223854064941, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 62310 + }, + { + "epoch": 410.0, + "grad_norm": 1.3749200105667114, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 62320 + }, + { + "epoch": 410.0657894736842, + "grad_norm": 1.159676432609558, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 62330 + }, + { + "epoch": 410.13157894736844, + "grad_norm": 1.3028908967971802, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 62340 + }, + { + "epoch": 410.19736842105266, + "grad_norm": 0.9679288864135742, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 62350 + }, + { + "epoch": 410.2631578947368, + "grad_norm": 1.2073003053665161, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 62360 + }, + { + "epoch": 410.32894736842104, + "grad_norm": 1.3585573434829712, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 62370 + }, + { + "epoch": 410.39473684210526, + "grad_norm": 1.4543274641036987, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 62380 + }, + { + "epoch": 410.4605263157895, + "grad_norm": 1.1905204057693481, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 62390 + }, + { + "epoch": 410.5263157894737, + "grad_norm": 1.5460284948349, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 62400 + }, + { + "epoch": 410.5921052631579, + "grad_norm": 1.3457225561141968, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 62410 + }, + { + "epoch": 410.6578947368421, + "grad_norm": 1.2957866191864014, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 62420 + }, + { + "epoch": 410.7236842105263, + "grad_norm": 1.269675374031067, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 62430 + }, + { + "epoch": 410.7894736842105, + "grad_norm": 1.0206223726272583, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 62440 + }, + { + "epoch": 410.85526315789474, + "grad_norm": 1.2072843313217163, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 62450 + }, + { + "epoch": 410.92105263157896, + "grad_norm": 0.9825667142868042, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 62460 + }, + { + "epoch": 410.9868421052632, + "grad_norm": 1.3091356754302979, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 62470 + }, + { + "epoch": 411.05263157894734, + "grad_norm": 1.3110969066619873, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 62480 + }, + { + "epoch": 411.11842105263156, + "grad_norm": 1.2493022680282593, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 62490 + }, + { + "epoch": 411.1842105263158, + "grad_norm": 1.1567295789718628, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 62500 + }, + { + "epoch": 411.25, + "grad_norm": 1.2471519708633423, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 62510 + }, + { + "epoch": 411.3157894736842, + "grad_norm": 0.6940414905548096, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 62520 + }, + { + "epoch": 411.38157894736844, + "grad_norm": 0.9060186743736267, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 62530 + }, + { + "epoch": 411.44736842105266, + "grad_norm": 1.0752925872802734, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 62540 + }, + { + "epoch": 411.5131578947368, + "grad_norm": 1.0197933912277222, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 62550 + }, + { + "epoch": 411.57894736842104, + "grad_norm": 0.9554465413093567, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 62560 + }, + { + "epoch": 411.64473684210526, + "grad_norm": 1.5054553747177124, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 62570 + }, + { + "epoch": 411.7105263157895, + "grad_norm": 1.1572545766830444, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 62580 + }, + { + "epoch": 411.7763157894737, + "grad_norm": 1.1669964790344238, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 62590 + }, + { + "epoch": 411.8421052631579, + "grad_norm": 1.5255320072174072, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 62600 + }, + { + "epoch": 411.9078947368421, + "grad_norm": 1.322806715965271, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 62610 + }, + { + "epoch": 411.9736842105263, + "grad_norm": 1.0405142307281494, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 62620 + }, + { + "epoch": 412.0394736842105, + "grad_norm": 1.3252968788146973, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 62630 + }, + { + "epoch": 412.10526315789474, + "grad_norm": 0.9440876841545105, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 62640 + }, + { + "epoch": 412.17105263157896, + "grad_norm": 1.6244385242462158, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 62650 + }, + { + "epoch": 412.2368421052632, + "grad_norm": 0.9535412192344666, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 62660 + }, + { + "epoch": 412.30263157894734, + "grad_norm": 1.2329378128051758, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 62670 + }, + { + "epoch": 412.36842105263156, + "grad_norm": 0.9353330135345459, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 62680 + }, + { + "epoch": 412.4342105263158, + "grad_norm": 1.4033151865005493, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 62690 + }, + { + "epoch": 412.5, + "grad_norm": 1.1142581701278687, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 62700 + }, + { + "epoch": 412.5657894736842, + "grad_norm": 1.250436544418335, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 62710 + }, + { + "epoch": 412.63157894736844, + "grad_norm": 1.180382490158081, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 62720 + }, + { + "epoch": 412.69736842105266, + "grad_norm": 1.2273600101470947, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 62730 + }, + { + "epoch": 412.7631578947368, + "grad_norm": 0.9863313436508179, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 62740 + }, + { + "epoch": 412.82894736842104, + "grad_norm": 1.3861109018325806, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 62750 + }, + { + "epoch": 412.89473684210526, + "grad_norm": 1.238087773323059, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 62760 + }, + { + "epoch": 412.9605263157895, + "grad_norm": 1.0432476997375488, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 62770 + }, + { + "epoch": 413.0263157894737, + "grad_norm": 1.2043246030807495, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 62780 + }, + { + "epoch": 413.0921052631579, + "grad_norm": 1.2075798511505127, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 62790 + }, + { + "epoch": 413.1578947368421, + "grad_norm": 0.8619531393051147, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 62800 + }, + { + "epoch": 413.2236842105263, + "grad_norm": 1.2081847190856934, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 62810 + }, + { + "epoch": 413.2894736842105, + "grad_norm": 1.0683201551437378, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 62820 + }, + { + "epoch": 413.35526315789474, + "grad_norm": 1.2931760549545288, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 62830 + }, + { + "epoch": 413.42105263157896, + "grad_norm": 0.9082301259040833, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 62840 + }, + { + "epoch": 413.4868421052632, + "grad_norm": 1.485244631767273, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 62850 + }, + { + "epoch": 413.55263157894734, + "grad_norm": 1.0334351062774658, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 62860 + }, + { + "epoch": 413.61842105263156, + "grad_norm": 1.2499173879623413, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 62870 + }, + { + "epoch": 413.6842105263158, + "grad_norm": 1.141554832458496, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 62880 + }, + { + "epoch": 413.75, + "grad_norm": 1.2541395425796509, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 62890 + }, + { + "epoch": 413.8157894736842, + "grad_norm": 1.2289867401123047, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 62900 + }, + { + "epoch": 413.88157894736844, + "grad_norm": 1.5980114936828613, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 62910 + }, + { + "epoch": 413.94736842105266, + "grad_norm": 0.9283551573753357, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 62920 + }, + { + "epoch": 414.0131578947368, + "grad_norm": 1.0979995727539062, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 62930 + }, + { + "epoch": 414.07894736842104, + "grad_norm": 0.8771706819534302, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 62940 + }, + { + "epoch": 414.14473684210526, + "grad_norm": 1.0368901491165161, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 62950 + }, + { + "epoch": 414.2105263157895, + "grad_norm": 1.0799942016601562, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 62960 + }, + { + "epoch": 414.2763157894737, + "grad_norm": 1.2244056463241577, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 62970 + }, + { + "epoch": 414.3421052631579, + "grad_norm": 0.9773150086402893, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 62980 + }, + { + "epoch": 414.4078947368421, + "grad_norm": 1.0069860219955444, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 62990 + }, + { + "epoch": 414.4736842105263, + "grad_norm": 0.9400529265403748, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 63000 + }, + { + "epoch": 414.5394736842105, + "grad_norm": 1.3097772598266602, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 63010 + }, + { + "epoch": 414.60526315789474, + "grad_norm": 1.036408543586731, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 63020 + }, + { + "epoch": 414.67105263157896, + "grad_norm": 0.8755251169204712, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 63030 + }, + { + "epoch": 414.7368421052632, + "grad_norm": 0.8445789813995361, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 63040 + }, + { + "epoch": 414.80263157894734, + "grad_norm": 1.4002634286880493, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 63050 + }, + { + "epoch": 414.86842105263156, + "grad_norm": 1.1597133874893188, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 63060 + }, + { + "epoch": 414.9342105263158, + "grad_norm": 1.0446454286575317, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 63070 + }, + { + "epoch": 415.0, + "grad_norm": 1.177336573600769, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 63080 + }, + { + "epoch": 415.0657894736842, + "grad_norm": 1.3958925008773804, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 63090 + }, + { + "epoch": 415.13157894736844, + "grad_norm": 1.2025551795959473, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 63100 + }, + { + "epoch": 415.19736842105266, + "grad_norm": 1.3240025043487549, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 63110 + }, + { + "epoch": 415.2631578947368, + "grad_norm": 1.2941898107528687, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 63120 + }, + { + "epoch": 415.32894736842104, + "grad_norm": 1.2462248802185059, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 63130 + }, + { + "epoch": 415.39473684210526, + "grad_norm": 1.523111343383789, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 63140 + }, + { + "epoch": 415.4605263157895, + "grad_norm": 1.2437251806259155, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 63150 + }, + { + "epoch": 415.5263157894737, + "grad_norm": 1.0009071826934814, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 63160 + }, + { + "epoch": 415.5921052631579, + "grad_norm": 1.008365511894226, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 63170 + }, + { + "epoch": 415.6578947368421, + "grad_norm": 1.136559009552002, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 63180 + }, + { + "epoch": 415.7236842105263, + "grad_norm": 0.6877762675285339, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 63190 + }, + { + "epoch": 415.7894736842105, + "grad_norm": 1.1004842519760132, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 63200 + }, + { + "epoch": 415.85526315789474, + "grad_norm": 0.9160225987434387, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 63210 + }, + { + "epoch": 415.92105263157896, + "grad_norm": 1.2179346084594727, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 63220 + }, + { + "epoch": 415.9868421052632, + "grad_norm": 1.299672245979309, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 63230 + }, + { + "epoch": 416.05263157894734, + "grad_norm": 1.1643931865692139, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 63240 + }, + { + "epoch": 416.11842105263156, + "grad_norm": 1.36052405834198, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 63250 + }, + { + "epoch": 416.1842105263158, + "grad_norm": 1.2271571159362793, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 63260 + }, + { + "epoch": 416.25, + "grad_norm": 1.2492791414260864, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 63270 + }, + { + "epoch": 416.3157894736842, + "grad_norm": 1.2201145887374878, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 63280 + }, + { + "epoch": 416.38157894736844, + "grad_norm": 1.615103006362915, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 63290 + }, + { + "epoch": 416.44736842105266, + "grad_norm": 1.0031245946884155, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 63300 + }, + { + "epoch": 416.5131578947368, + "grad_norm": 1.1543900966644287, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 63310 + }, + { + "epoch": 416.57894736842104, + "grad_norm": 1.0617386102676392, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 63320 + }, + { + "epoch": 416.64473684210526, + "grad_norm": 1.2433397769927979, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 63330 + }, + { + "epoch": 416.7105263157895, + "grad_norm": 1.307660698890686, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 63340 + }, + { + "epoch": 416.7763157894737, + "grad_norm": 1.402388334274292, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 63350 + }, + { + "epoch": 416.8421052631579, + "grad_norm": 1.2867043018341064, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 63360 + }, + { + "epoch": 416.9078947368421, + "grad_norm": 1.1766114234924316, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 63370 + }, + { + "epoch": 416.9736842105263, + "grad_norm": 1.2115635871887207, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 63380 + }, + { + "epoch": 417.0394736842105, + "grad_norm": 1.2058789730072021, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 63390 + }, + { + "epoch": 417.10526315789474, + "grad_norm": 1.194610834121704, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 63400 + }, + { + "epoch": 417.17105263157896, + "grad_norm": 0.9211453199386597, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 63410 + }, + { + "epoch": 417.2368421052632, + "grad_norm": 1.093869924545288, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 63420 + }, + { + "epoch": 417.30263157894734, + "grad_norm": 0.9869666695594788, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 63430 + }, + { + "epoch": 417.36842105263156, + "grad_norm": 1.1537610292434692, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 63440 + }, + { + "epoch": 417.4342105263158, + "grad_norm": 0.9441319108009338, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 63450 + }, + { + "epoch": 417.5, + "grad_norm": 1.3330210447311401, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 63460 + }, + { + "epoch": 417.5657894736842, + "grad_norm": 1.4678821563720703, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 63470 + }, + { + "epoch": 417.63157894736844, + "grad_norm": 1.0894778966903687, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 63480 + }, + { + "epoch": 417.69736842105266, + "grad_norm": 1.3861744403839111, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 63490 + }, + { + "epoch": 417.7631578947368, + "grad_norm": 1.1523667573928833, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 63500 + }, + { + "epoch": 417.82894736842104, + "grad_norm": 1.058275818824768, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 63510 + }, + { + "epoch": 417.89473684210526, + "grad_norm": 1.2407093048095703, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 63520 + }, + { + "epoch": 417.9605263157895, + "grad_norm": 0.861865222454071, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 63530 + }, + { + "epoch": 418.0263157894737, + "grad_norm": 1.3533257246017456, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 63540 + }, + { + "epoch": 418.0921052631579, + "grad_norm": 0.8494672179222107, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 63550 + }, + { + "epoch": 418.1578947368421, + "grad_norm": 0.9071248769760132, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 63560 + }, + { + "epoch": 418.2236842105263, + "grad_norm": 1.4930342435836792, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 63570 + }, + { + "epoch": 418.2894736842105, + "grad_norm": 1.4266362190246582, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 63580 + }, + { + "epoch": 418.35526315789474, + "grad_norm": 1.1286511421203613, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 63590 + }, + { + "epoch": 418.42105263157896, + "grad_norm": 1.0800775289535522, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 63600 + }, + { + "epoch": 418.4868421052632, + "grad_norm": 1.0998226404190063, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 63610 + }, + { + "epoch": 418.55263157894734, + "grad_norm": 1.3095847368240356, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 63620 + }, + { + "epoch": 418.61842105263156, + "grad_norm": 1.0724563598632812, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 63630 + }, + { + "epoch": 418.6842105263158, + "grad_norm": 0.9985296726226807, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 63640 + }, + { + "epoch": 418.75, + "grad_norm": 1.2534633874893188, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 63650 + }, + { + "epoch": 418.8157894736842, + "grad_norm": 1.4504799842834473, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 63660 + }, + { + "epoch": 418.88157894736844, + "grad_norm": 1.180578589439392, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 63670 + }, + { + "epoch": 418.94736842105266, + "grad_norm": 1.6236175298690796, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 63680 + }, + { + "epoch": 419.0131578947368, + "grad_norm": 1.1032344102859497, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 63690 + }, + { + "epoch": 419.07894736842104, + "grad_norm": 1.1350429058074951, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 63700 + }, + { + "epoch": 419.14473684210526, + "grad_norm": 1.1316243410110474, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 63710 + }, + { + "epoch": 419.2105263157895, + "grad_norm": 1.0160430669784546, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 63720 + }, + { + "epoch": 419.2763157894737, + "grad_norm": 0.9599151015281677, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 63730 + }, + { + "epoch": 419.3421052631579, + "grad_norm": 1.0853590965270996, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 63740 + }, + { + "epoch": 419.4078947368421, + "grad_norm": 1.0868717432022095, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 63750 + }, + { + "epoch": 419.4736842105263, + "grad_norm": 0.9881595373153687, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 63760 + }, + { + "epoch": 419.5394736842105, + "grad_norm": 1.1161201000213623, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 63770 + }, + { + "epoch": 419.60526315789474, + "grad_norm": 0.8587058186531067, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 63780 + }, + { + "epoch": 419.67105263157896, + "grad_norm": 0.959867537021637, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 63790 + }, + { + "epoch": 419.7368421052632, + "grad_norm": 1.1373491287231445, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 63800 + }, + { + "epoch": 419.80263157894734, + "grad_norm": 1.2124371528625488, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 63810 + }, + { + "epoch": 419.86842105263156, + "grad_norm": 1.1370086669921875, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 63820 + }, + { + "epoch": 419.9342105263158, + "grad_norm": 1.2051914930343628, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 63830 + }, + { + "epoch": 420.0, + "grad_norm": 1.598705530166626, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 63840 + }, + { + "epoch": 420.0657894736842, + "grad_norm": 1.2667107582092285, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 63850 + }, + { + "epoch": 420.13157894736844, + "grad_norm": 1.4038127660751343, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 63860 + }, + { + "epoch": 420.19736842105266, + "grad_norm": 1.233218789100647, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 63870 + }, + { + "epoch": 420.2631578947368, + "grad_norm": 1.2096288204193115, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 63880 + }, + { + "epoch": 420.32894736842104, + "grad_norm": 1.050657033920288, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 63890 + }, + { + "epoch": 420.39473684210526, + "grad_norm": 1.7145295143127441, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 63900 + }, + { + "epoch": 420.4605263157895, + "grad_norm": 1.58820378780365, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 63910 + }, + { + "epoch": 420.5263157894737, + "grad_norm": 0.9367581009864807, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 63920 + }, + { + "epoch": 420.5921052631579, + "grad_norm": 1.4414033889770508, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 63930 + }, + { + "epoch": 420.6578947368421, + "grad_norm": 1.310313105583191, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 63940 + }, + { + "epoch": 420.7236842105263, + "grad_norm": 0.850493848323822, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 63950 + }, + { + "epoch": 420.7894736842105, + "grad_norm": 1.3179919719696045, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 63960 + }, + { + "epoch": 420.85526315789474, + "grad_norm": 1.3989521265029907, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 63970 + }, + { + "epoch": 420.92105263157896, + "grad_norm": 1.3190523386001587, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 63980 + }, + { + "epoch": 420.9868421052632, + "grad_norm": 0.8655247688293457, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 63990 + }, + { + "epoch": 421.05263157894734, + "grad_norm": 1.5341448783874512, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 64000 + }, + { + "epoch": 421.11842105263156, + "grad_norm": 1.0463502407073975, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 64010 + }, + { + "epoch": 421.1842105263158, + "grad_norm": 0.6681509613990784, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 64020 + }, + { + "epoch": 421.25, + "grad_norm": 1.323490858078003, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 64030 + }, + { + "epoch": 421.3157894736842, + "grad_norm": 0.8754792213439941, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 64040 + }, + { + "epoch": 421.38157894736844, + "grad_norm": 1.2560418844223022, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 64050 + }, + { + "epoch": 421.44736842105266, + "grad_norm": 1.2175922393798828, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 64060 + }, + { + "epoch": 421.5131578947368, + "grad_norm": 1.4118285179138184, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 64070 + }, + { + "epoch": 421.57894736842104, + "grad_norm": 1.3266385793685913, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 64080 + }, + { + "epoch": 421.64473684210526, + "grad_norm": 1.113729476928711, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 64090 + }, + { + "epoch": 421.7105263157895, + "grad_norm": 1.134850025177002, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 64100 + }, + { + "epoch": 421.7763157894737, + "grad_norm": 0.9437716603279114, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 64110 + }, + { + "epoch": 421.8421052631579, + "grad_norm": 1.0176291465759277, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 64120 + }, + { + "epoch": 421.9078947368421, + "grad_norm": 1.2647275924682617, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 64130 + }, + { + "epoch": 421.9736842105263, + "grad_norm": 0.7509058713912964, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 64140 + }, + { + "epoch": 422.0394736842105, + "grad_norm": 0.9064149260520935, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 64150 + }, + { + "epoch": 422.10526315789474, + "grad_norm": 1.20452880859375, + "learning_rate": 0.0001, + "loss": 0.0159, + "step": 64160 + }, + { + "epoch": 422.17105263157896, + "grad_norm": 1.04318368434906, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 64170 + }, + { + "epoch": 422.2368421052632, + "grad_norm": 1.1167349815368652, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 64180 + }, + { + "epoch": 422.30263157894734, + "grad_norm": 1.3725717067718506, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 64190 + }, + { + "epoch": 422.36842105263156, + "grad_norm": 1.0042188167572021, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 64200 + }, + { + "epoch": 422.4342105263158, + "grad_norm": 0.822090208530426, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 64210 + }, + { + "epoch": 422.5, + "grad_norm": 1.2058206796646118, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 64220 + }, + { + "epoch": 422.5657894736842, + "grad_norm": 0.9390090703964233, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 64230 + }, + { + "epoch": 422.63157894736844, + "grad_norm": 1.168169379234314, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 64240 + }, + { + "epoch": 422.69736842105266, + "grad_norm": 1.0627769231796265, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 64250 + }, + { + "epoch": 422.7631578947368, + "grad_norm": 0.819271981716156, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 64260 + }, + { + "epoch": 422.82894736842104, + "grad_norm": 1.1444475650787354, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 64270 + }, + { + "epoch": 422.89473684210526, + "grad_norm": 1.187440276145935, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 64280 + }, + { + "epoch": 422.9605263157895, + "grad_norm": 1.1766507625579834, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 64290 + }, + { + "epoch": 423.0263157894737, + "grad_norm": 1.2091718912124634, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 64300 + }, + { + "epoch": 423.0921052631579, + "grad_norm": 0.8333958983421326, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 64310 + }, + { + "epoch": 423.1578947368421, + "grad_norm": 1.0338822603225708, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 64320 + }, + { + "epoch": 423.2236842105263, + "grad_norm": 1.3641862869262695, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 64330 + }, + { + "epoch": 423.2894736842105, + "grad_norm": 1.1286553144454956, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 64340 + }, + { + "epoch": 423.35526315789474, + "grad_norm": 1.0030713081359863, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 64350 + }, + { + "epoch": 423.42105263157896, + "grad_norm": 0.8953898549079895, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 64360 + }, + { + "epoch": 423.4868421052632, + "grad_norm": 0.9812647700309753, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 64370 + }, + { + "epoch": 423.55263157894734, + "grad_norm": 1.1874929666519165, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 64380 + }, + { + "epoch": 423.61842105263156, + "grad_norm": 1.0838314294815063, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 64390 + }, + { + "epoch": 423.6842105263158, + "grad_norm": 1.1809245347976685, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 64400 + }, + { + "epoch": 423.75, + "grad_norm": 1.1488882303237915, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 64410 + }, + { + "epoch": 423.8157894736842, + "grad_norm": 1.5134193897247314, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 64420 + }, + { + "epoch": 423.88157894736844, + "grad_norm": 1.3163988590240479, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 64430 + }, + { + "epoch": 423.94736842105266, + "grad_norm": 1.3154017925262451, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 64440 + }, + { + "epoch": 424.0131578947368, + "grad_norm": 0.980704665184021, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 64450 + }, + { + "epoch": 424.07894736842104, + "grad_norm": 1.4469650983810425, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 64460 + }, + { + "epoch": 424.14473684210526, + "grad_norm": 1.6586270332336426, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 64470 + }, + { + "epoch": 424.2105263157895, + "grad_norm": 1.164552092552185, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 64480 + }, + { + "epoch": 424.2763157894737, + "grad_norm": 1.2287418842315674, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 64490 + }, + { + "epoch": 424.3421052631579, + "grad_norm": 1.2211633920669556, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 64500 + }, + { + "epoch": 424.4078947368421, + "grad_norm": 0.9924276471138, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 64510 + }, + { + "epoch": 424.4736842105263, + "grad_norm": 1.504193663597107, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 64520 + }, + { + "epoch": 424.5394736842105, + "grad_norm": 1.217288613319397, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 64530 + }, + { + "epoch": 424.60526315789474, + "grad_norm": 0.943494439125061, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 64540 + }, + { + "epoch": 424.67105263157896, + "grad_norm": 1.0527056455612183, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 64550 + }, + { + "epoch": 424.7368421052632, + "grad_norm": 0.7593855261802673, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 64560 + }, + { + "epoch": 424.80263157894734, + "grad_norm": 0.9078150391578674, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 64570 + }, + { + "epoch": 424.86842105263156, + "grad_norm": 1.2566509246826172, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 64580 + }, + { + "epoch": 424.9342105263158, + "grad_norm": 0.9278975129127502, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 64590 + }, + { + "epoch": 425.0, + "grad_norm": 1.0791572332382202, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 64600 + }, + { + "epoch": 425.0657894736842, + "grad_norm": 1.1237635612487793, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 64610 + }, + { + "epoch": 425.13157894736844, + "grad_norm": 1.0109001398086548, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 64620 + }, + { + "epoch": 425.19736842105266, + "grad_norm": 1.1927449703216553, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 64630 + }, + { + "epoch": 425.2631578947368, + "grad_norm": 0.9462395310401917, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 64640 + }, + { + "epoch": 425.32894736842104, + "grad_norm": 1.2187925577163696, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 64650 + }, + { + "epoch": 425.39473684210526, + "grad_norm": 0.9968974590301514, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 64660 + }, + { + "epoch": 425.4605263157895, + "grad_norm": 1.0321686267852783, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 64670 + }, + { + "epoch": 425.5263157894737, + "grad_norm": 0.9968619346618652, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 64680 + }, + { + "epoch": 425.5921052631579, + "grad_norm": 1.1771488189697266, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 64690 + }, + { + "epoch": 425.6578947368421, + "grad_norm": 1.3401336669921875, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 64700 + }, + { + "epoch": 425.7236842105263, + "grad_norm": 1.4088808298110962, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 64710 + }, + { + "epoch": 425.7894736842105, + "grad_norm": 0.976675808429718, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 64720 + }, + { + "epoch": 425.85526315789474, + "grad_norm": 1.2826030254364014, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 64730 + }, + { + "epoch": 425.92105263157896, + "grad_norm": 0.7183810472488403, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 64740 + }, + { + "epoch": 425.9868421052632, + "grad_norm": 1.3914217948913574, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 64750 + }, + { + "epoch": 426.05263157894734, + "grad_norm": 1.1009012460708618, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 64760 + }, + { + "epoch": 426.11842105263156, + "grad_norm": 1.2613898515701294, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 64770 + }, + { + "epoch": 426.1842105263158, + "grad_norm": 1.666262149810791, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 64780 + }, + { + "epoch": 426.25, + "grad_norm": 1.3029481172561646, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 64790 + }, + { + "epoch": 426.3157894736842, + "grad_norm": 1.0970615148544312, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 64800 + }, + { + "epoch": 426.38157894736844, + "grad_norm": 0.8303019404411316, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 64810 + }, + { + "epoch": 426.44736842105266, + "grad_norm": 1.0774695873260498, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 64820 + }, + { + "epoch": 426.5131578947368, + "grad_norm": 1.2249752283096313, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 64830 + }, + { + "epoch": 426.57894736842104, + "grad_norm": 1.4472177028656006, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 64840 + }, + { + "epoch": 426.64473684210526, + "grad_norm": 1.1271650791168213, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 64850 + }, + { + "epoch": 426.7105263157895, + "grad_norm": 1.2733358144760132, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 64860 + }, + { + "epoch": 426.7763157894737, + "grad_norm": 0.8488833904266357, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 64870 + }, + { + "epoch": 426.8421052631579, + "grad_norm": 0.9887498021125793, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 64880 + }, + { + "epoch": 426.9078947368421, + "grad_norm": 1.160360336303711, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 64890 + }, + { + "epoch": 426.9736842105263, + "grad_norm": 1.204254388809204, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 64900 + }, + { + "epoch": 427.0394736842105, + "grad_norm": 1.3775442838668823, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 64910 + }, + { + "epoch": 427.10526315789474, + "grad_norm": 1.2718161344528198, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 64920 + }, + { + "epoch": 427.17105263157896, + "grad_norm": 1.3851796388626099, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 64930 + }, + { + "epoch": 427.2368421052632, + "grad_norm": 1.498613715171814, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 64940 + }, + { + "epoch": 427.30263157894734, + "grad_norm": 0.9563484191894531, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 64950 + }, + { + "epoch": 427.36842105263156, + "grad_norm": 1.0629527568817139, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 64960 + }, + { + "epoch": 427.4342105263158, + "grad_norm": 1.2376042604446411, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 64970 + }, + { + "epoch": 427.5, + "grad_norm": 1.0314258337020874, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 64980 + }, + { + "epoch": 427.5657894736842, + "grad_norm": 1.2879270315170288, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 64990 + }, + { + "epoch": 427.63157894736844, + "grad_norm": 0.971474289894104, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 65000 + }, + { + "epoch": 427.69736842105266, + "grad_norm": 0.9196737408638, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 65010 + }, + { + "epoch": 427.7631578947368, + "grad_norm": 0.862593412399292, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 65020 + }, + { + "epoch": 427.82894736842104, + "grad_norm": 1.113861083984375, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 65030 + }, + { + "epoch": 427.89473684210526, + "grad_norm": 1.189150333404541, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 65040 + }, + { + "epoch": 427.9605263157895, + "grad_norm": 1.0264296531677246, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 65050 + }, + { + "epoch": 428.0263157894737, + "grad_norm": 1.2653850317001343, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 65060 + }, + { + "epoch": 428.0921052631579, + "grad_norm": 1.260859489440918, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 65070 + }, + { + "epoch": 428.1578947368421, + "grad_norm": 1.6127079725265503, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 65080 + }, + { + "epoch": 428.2236842105263, + "grad_norm": 1.2209359407424927, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 65090 + }, + { + "epoch": 428.2894736842105, + "grad_norm": 1.2434695959091187, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 65100 + }, + { + "epoch": 428.35526315789474, + "grad_norm": 1.273813009262085, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 65110 + }, + { + "epoch": 428.42105263157896, + "grad_norm": 1.580181360244751, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 65120 + }, + { + "epoch": 428.4868421052632, + "grad_norm": 1.0633710622787476, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 65130 + }, + { + "epoch": 428.55263157894734, + "grad_norm": 1.0825523138046265, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 65140 + }, + { + "epoch": 428.61842105263156, + "grad_norm": 1.143089771270752, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 65150 + }, + { + "epoch": 428.6842105263158, + "grad_norm": 0.7292158603668213, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 65160 + }, + { + "epoch": 428.75, + "grad_norm": 1.3087058067321777, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 65170 + }, + { + "epoch": 428.8157894736842, + "grad_norm": 1.2735743522644043, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 65180 + }, + { + "epoch": 428.88157894736844, + "grad_norm": 1.169930100440979, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 65190 + }, + { + "epoch": 428.94736842105266, + "grad_norm": 1.1776015758514404, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 65200 + }, + { + "epoch": 429.0131578947368, + "grad_norm": 1.1052436828613281, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 65210 + }, + { + "epoch": 429.07894736842104, + "grad_norm": 0.8525586724281311, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 65220 + }, + { + "epoch": 429.14473684210526, + "grad_norm": 0.9319892525672913, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 65230 + }, + { + "epoch": 429.2105263157895, + "grad_norm": 0.9886743426322937, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 65240 + }, + { + "epoch": 429.2763157894737, + "grad_norm": 0.850257158279419, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 65250 + }, + { + "epoch": 429.3421052631579, + "grad_norm": 1.0104154348373413, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 65260 + }, + { + "epoch": 429.4078947368421, + "grad_norm": 0.8095145225524902, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 65270 + }, + { + "epoch": 429.4736842105263, + "grad_norm": 1.1854900121688843, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 65280 + }, + { + "epoch": 429.5394736842105, + "grad_norm": 1.049673080444336, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 65290 + }, + { + "epoch": 429.60526315789474, + "grad_norm": 1.3695430755615234, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 65300 + }, + { + "epoch": 429.67105263157896, + "grad_norm": 1.3096168041229248, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 65310 + }, + { + "epoch": 429.7368421052632, + "grad_norm": 1.11677086353302, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 65320 + }, + { + "epoch": 429.80263157894734, + "grad_norm": 1.0054956674575806, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 65330 + }, + { + "epoch": 429.86842105263156, + "grad_norm": 1.0261496305465698, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 65340 + }, + { + "epoch": 429.9342105263158, + "grad_norm": 0.8702152967453003, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 65350 + }, + { + "epoch": 430.0, + "grad_norm": 1.1188616752624512, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 65360 + }, + { + "epoch": 430.0657894736842, + "grad_norm": 0.9277343153953552, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 65370 + }, + { + "epoch": 430.13157894736844, + "grad_norm": 0.8487063646316528, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 65380 + }, + { + "epoch": 430.19736842105266, + "grad_norm": 1.0749578475952148, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 65390 + }, + { + "epoch": 430.2631578947368, + "grad_norm": 1.4141275882720947, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 65400 + }, + { + "epoch": 430.32894736842104, + "grad_norm": 1.4153060913085938, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 65410 + }, + { + "epoch": 430.39473684210526, + "grad_norm": 1.0824086666107178, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 65420 + }, + { + "epoch": 430.4605263157895, + "grad_norm": 1.1492947340011597, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 65430 + }, + { + "epoch": 430.5263157894737, + "grad_norm": 0.9777079820632935, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 65440 + }, + { + "epoch": 430.5921052631579, + "grad_norm": 1.278460144996643, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 65450 + }, + { + "epoch": 430.6578947368421, + "grad_norm": 1.0033302307128906, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 65460 + }, + { + "epoch": 430.7236842105263, + "grad_norm": 1.1041438579559326, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 65470 + }, + { + "epoch": 430.7894736842105, + "grad_norm": 1.6266603469848633, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 65480 + }, + { + "epoch": 430.85526315789474, + "grad_norm": 1.2963449954986572, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 65490 + }, + { + "epoch": 430.92105263157896, + "grad_norm": 1.226628303527832, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 65500 + }, + { + "epoch": 430.9868421052632, + "grad_norm": 1.3734506368637085, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 65510 + }, + { + "epoch": 431.05263157894734, + "grad_norm": 1.5446348190307617, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 65520 + }, + { + "epoch": 431.11842105263156, + "grad_norm": 1.2585902214050293, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 65530 + }, + { + "epoch": 431.1842105263158, + "grad_norm": 1.1303677558898926, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 65540 + }, + { + "epoch": 431.25, + "grad_norm": 1.005300521850586, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 65550 + }, + { + "epoch": 431.3157894736842, + "grad_norm": 1.325517177581787, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 65560 + }, + { + "epoch": 431.38157894736844, + "grad_norm": 0.9297947883605957, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 65570 + }, + { + "epoch": 431.44736842105266, + "grad_norm": 1.0581676959991455, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 65580 + }, + { + "epoch": 431.5131578947368, + "grad_norm": 0.9958070516586304, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 65590 + }, + { + "epoch": 431.57894736842104, + "grad_norm": 1.3091784715652466, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 65600 + }, + { + "epoch": 431.64473684210526, + "grad_norm": 1.091407060623169, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 65610 + }, + { + "epoch": 431.7105263157895, + "grad_norm": 1.4302153587341309, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 65620 + }, + { + "epoch": 431.7763157894737, + "grad_norm": 1.0654196739196777, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 65630 + }, + { + "epoch": 431.8421052631579, + "grad_norm": 1.413366436958313, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 65640 + }, + { + "epoch": 431.9078947368421, + "grad_norm": 1.09171462059021, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 65650 + }, + { + "epoch": 431.9736842105263, + "grad_norm": 1.5487229824066162, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 65660 + }, + { + "epoch": 432.0394736842105, + "grad_norm": 1.2298110723495483, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 65670 + }, + { + "epoch": 432.10526315789474, + "grad_norm": 1.2410331964492798, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 65680 + }, + { + "epoch": 432.17105263157896, + "grad_norm": 1.3613755702972412, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 65690 + }, + { + "epoch": 432.2368421052632, + "grad_norm": 1.2268283367156982, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 65700 + }, + { + "epoch": 432.30263157894734, + "grad_norm": 1.8424831628799438, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 65710 + }, + { + "epoch": 432.36842105263156, + "grad_norm": 1.5524377822875977, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 65720 + }, + { + "epoch": 432.4342105263158, + "grad_norm": 1.7905634641647339, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 65730 + }, + { + "epoch": 432.5, + "grad_norm": 1.3174190521240234, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 65740 + }, + { + "epoch": 432.5657894736842, + "grad_norm": 1.387406587600708, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 65750 + }, + { + "epoch": 432.63157894736844, + "grad_norm": 1.184760570526123, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 65760 + }, + { + "epoch": 432.69736842105266, + "grad_norm": 1.0794061422348022, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 65770 + }, + { + "epoch": 432.7631578947368, + "grad_norm": 1.1350884437561035, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 65780 + }, + { + "epoch": 432.82894736842104, + "grad_norm": 1.20332670211792, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 65790 + }, + { + "epoch": 432.89473684210526, + "grad_norm": 1.2901190519332886, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 65800 + }, + { + "epoch": 432.9605263157895, + "grad_norm": 0.93093341588974, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 65810 + }, + { + "epoch": 433.0263157894737, + "grad_norm": 1.0623509883880615, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 65820 + }, + { + "epoch": 433.0921052631579, + "grad_norm": 1.2282602787017822, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 65830 + }, + { + "epoch": 433.1578947368421, + "grad_norm": 1.1212533712387085, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 65840 + }, + { + "epoch": 433.2236842105263, + "grad_norm": 1.0316697359085083, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 65850 + }, + { + "epoch": 433.2894736842105, + "grad_norm": 0.9327832460403442, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 65860 + }, + { + "epoch": 433.35526315789474, + "grad_norm": 1.0273808240890503, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 65870 + }, + { + "epoch": 433.42105263157896, + "grad_norm": 0.8748319745063782, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 65880 + }, + { + "epoch": 433.4868421052632, + "grad_norm": 0.8934853076934814, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 65890 + }, + { + "epoch": 433.55263157894734, + "grad_norm": 0.9318240880966187, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 65900 + }, + { + "epoch": 433.61842105263156, + "grad_norm": 1.1659666299819946, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 65910 + }, + { + "epoch": 433.6842105263158, + "grad_norm": 0.9181238412857056, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 65920 + }, + { + "epoch": 433.75, + "grad_norm": 0.6898985505104065, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 65930 + }, + { + "epoch": 433.8157894736842, + "grad_norm": 0.89139324426651, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 65940 + }, + { + "epoch": 433.88157894736844, + "grad_norm": 1.3110442161560059, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 65950 + }, + { + "epoch": 433.94736842105266, + "grad_norm": 1.3319469690322876, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 65960 + }, + { + "epoch": 434.0131578947368, + "grad_norm": 0.9168400168418884, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 65970 + }, + { + "epoch": 434.07894736842104, + "grad_norm": 1.151136875152588, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 65980 + }, + { + "epoch": 434.14473684210526, + "grad_norm": 1.2665919065475464, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 65990 + }, + { + "epoch": 434.2105263157895, + "grad_norm": 1.0252939462661743, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 66000 + }, + { + "epoch": 434.2763157894737, + "grad_norm": 1.1638813018798828, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 66010 + }, + { + "epoch": 434.3421052631579, + "grad_norm": 0.9939305186271667, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 66020 + }, + { + "epoch": 434.4078947368421, + "grad_norm": 1.4579282999038696, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 66030 + }, + { + "epoch": 434.4736842105263, + "grad_norm": 1.1966112852096558, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 66040 + }, + { + "epoch": 434.5394736842105, + "grad_norm": 1.0983270406723022, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 66050 + }, + { + "epoch": 434.60526315789474, + "grad_norm": 1.0120450258255005, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 66060 + }, + { + "epoch": 434.67105263157896, + "grad_norm": 1.0957145690917969, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 66070 + }, + { + "epoch": 434.7368421052632, + "grad_norm": 1.030774474143982, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 66080 + }, + { + "epoch": 434.80263157894734, + "grad_norm": 1.2620954513549805, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 66090 + }, + { + "epoch": 434.86842105263156, + "grad_norm": 0.9031997919082642, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 66100 + }, + { + "epoch": 434.9342105263158, + "grad_norm": 1.1857893466949463, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 66110 + }, + { + "epoch": 435.0, + "grad_norm": 1.083285927772522, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 66120 + }, + { + "epoch": 435.0657894736842, + "grad_norm": 1.357467770576477, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 66130 + }, + { + "epoch": 435.13157894736844, + "grad_norm": 1.4382094144821167, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 66140 + }, + { + "epoch": 435.19736842105266, + "grad_norm": 1.0308399200439453, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 66150 + }, + { + "epoch": 435.2631578947368, + "grad_norm": 1.303141713142395, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 66160 + }, + { + "epoch": 435.32894736842104, + "grad_norm": 1.2150816917419434, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 66170 + }, + { + "epoch": 435.39473684210526, + "grad_norm": 0.9982725977897644, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 66180 + }, + { + "epoch": 435.4605263157895, + "grad_norm": 0.9391024112701416, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 66190 + }, + { + "epoch": 435.5263157894737, + "grad_norm": 0.7449613809585571, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 66200 + }, + { + "epoch": 435.5921052631579, + "grad_norm": 1.597276210784912, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 66210 + }, + { + "epoch": 435.6578947368421, + "grad_norm": 1.5920016765594482, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 66220 + }, + { + "epoch": 435.7236842105263, + "grad_norm": 1.151368260383606, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 66230 + }, + { + "epoch": 435.7894736842105, + "grad_norm": 1.2534699440002441, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 66240 + }, + { + "epoch": 435.85526315789474, + "grad_norm": 1.220913052558899, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 66250 + }, + { + "epoch": 435.92105263157896, + "grad_norm": 1.2110836505889893, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 66260 + }, + { + "epoch": 435.9868421052632, + "grad_norm": 0.7796202898025513, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 66270 + }, + { + "epoch": 436.05263157894734, + "grad_norm": 1.0769124031066895, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 66280 + }, + { + "epoch": 436.11842105263156, + "grad_norm": 0.732404887676239, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 66290 + }, + { + "epoch": 436.1842105263158, + "grad_norm": 0.9934023022651672, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 66300 + }, + { + "epoch": 436.25, + "grad_norm": 1.068806529045105, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 66310 + }, + { + "epoch": 436.3157894736842, + "grad_norm": 0.819196343421936, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 66320 + }, + { + "epoch": 436.38157894736844, + "grad_norm": 1.0213623046875, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 66330 + }, + { + "epoch": 436.44736842105266, + "grad_norm": 1.4873377084732056, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 66340 + }, + { + "epoch": 436.5131578947368, + "grad_norm": 0.9596549272537231, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 66350 + }, + { + "epoch": 436.57894736842104, + "grad_norm": 1.3335829973220825, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 66360 + }, + { + "epoch": 436.64473684210526, + "grad_norm": 1.1521795988082886, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 66370 + }, + { + "epoch": 436.7105263157895, + "grad_norm": 1.2757539749145508, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 66380 + }, + { + "epoch": 436.7763157894737, + "grad_norm": 1.143549919128418, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 66390 + }, + { + "epoch": 436.8421052631579, + "grad_norm": 1.0633903741836548, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 66400 + }, + { + "epoch": 436.9078947368421, + "grad_norm": 0.8937993049621582, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 66410 + }, + { + "epoch": 436.9736842105263, + "grad_norm": 1.0790894031524658, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 66420 + }, + { + "epoch": 437.0394736842105, + "grad_norm": 1.1991922855377197, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 66430 + }, + { + "epoch": 437.10526315789474, + "grad_norm": 1.1077334880828857, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 66440 + }, + { + "epoch": 437.17105263157896, + "grad_norm": 0.9122137427330017, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 66450 + }, + { + "epoch": 437.2368421052632, + "grad_norm": 1.2790870666503906, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 66460 + }, + { + "epoch": 437.30263157894734, + "grad_norm": 1.557098388671875, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 66470 + }, + { + "epoch": 437.36842105263156, + "grad_norm": 1.4872727394104004, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 66480 + }, + { + "epoch": 437.4342105263158, + "grad_norm": 1.1132712364196777, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 66490 + }, + { + "epoch": 437.5, + "grad_norm": 1.3181506395339966, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 66500 + }, + { + "epoch": 437.5657894736842, + "grad_norm": 0.976253867149353, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 66510 + }, + { + "epoch": 437.63157894736844, + "grad_norm": 0.8042446970939636, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 66520 + }, + { + "epoch": 437.69736842105266, + "grad_norm": 0.6995433568954468, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 66530 + }, + { + "epoch": 437.7631578947368, + "grad_norm": 1.2673261165618896, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 66540 + }, + { + "epoch": 437.82894736842104, + "grad_norm": 1.1991665363311768, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 66550 + }, + { + "epoch": 437.89473684210526, + "grad_norm": 1.2135796546936035, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 66560 + }, + { + "epoch": 437.9605263157895, + "grad_norm": 1.4307721853256226, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 66570 + }, + { + "epoch": 438.0263157894737, + "grad_norm": 1.061484694480896, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 66580 + }, + { + "epoch": 438.0921052631579, + "grad_norm": 1.2536671161651611, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 66590 + }, + { + "epoch": 438.1578947368421, + "grad_norm": 1.211685299873352, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 66600 + }, + { + "epoch": 438.2236842105263, + "grad_norm": 0.9517151117324829, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 66610 + }, + { + "epoch": 438.2894736842105, + "grad_norm": 1.3910490274429321, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 66620 + }, + { + "epoch": 438.35526315789474, + "grad_norm": 1.2333219051361084, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 66630 + }, + { + "epoch": 438.42105263157896, + "grad_norm": 1.4747133255004883, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 66640 + }, + { + "epoch": 438.4868421052632, + "grad_norm": 1.4502803087234497, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 66650 + }, + { + "epoch": 438.55263157894734, + "grad_norm": 1.0021837949752808, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 66660 + }, + { + "epoch": 438.61842105263156, + "grad_norm": 1.2110358476638794, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 66670 + }, + { + "epoch": 438.6842105263158, + "grad_norm": 1.2951200008392334, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 66680 + }, + { + "epoch": 438.75, + "grad_norm": 1.4728100299835205, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 66690 + }, + { + "epoch": 438.8157894736842, + "grad_norm": 1.3396544456481934, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 66700 + }, + { + "epoch": 438.88157894736844, + "grad_norm": 0.9344916343688965, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 66710 + }, + { + "epoch": 438.94736842105266, + "grad_norm": 0.8151391744613647, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 66720 + }, + { + "epoch": 439.0131578947368, + "grad_norm": 0.8161630630493164, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 66730 + }, + { + "epoch": 439.07894736842104, + "grad_norm": 1.26533043384552, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 66740 + }, + { + "epoch": 439.14473684210526, + "grad_norm": 1.3989818096160889, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 66750 + }, + { + "epoch": 439.2105263157895, + "grad_norm": 1.301026463508606, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 66760 + }, + { + "epoch": 439.2763157894737, + "grad_norm": 0.9039576649665833, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 66770 + }, + { + "epoch": 439.3421052631579, + "grad_norm": 1.453680396080017, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 66780 + }, + { + "epoch": 439.4078947368421, + "grad_norm": 1.273836612701416, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 66790 + }, + { + "epoch": 439.4736842105263, + "grad_norm": 1.5056235790252686, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 66800 + }, + { + "epoch": 439.5394736842105, + "grad_norm": 1.0324246883392334, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 66810 + }, + { + "epoch": 439.60526315789474, + "grad_norm": 1.2244229316711426, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 66820 + }, + { + "epoch": 439.67105263157896, + "grad_norm": 1.2605669498443604, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 66830 + }, + { + "epoch": 439.7368421052632, + "grad_norm": 1.2557387351989746, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 66840 + }, + { + "epoch": 439.80263157894734, + "grad_norm": 1.641819953918457, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 66850 + }, + { + "epoch": 439.86842105263156, + "grad_norm": 1.0714577436447144, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 66860 + }, + { + "epoch": 439.9342105263158, + "grad_norm": 1.5716043710708618, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 66870 + }, + { + "epoch": 440.0, + "grad_norm": 1.3139293193817139, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 66880 + }, + { + "epoch": 440.0657894736842, + "grad_norm": 1.3056998252868652, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 66890 + }, + { + "epoch": 440.13157894736844, + "grad_norm": 1.1983897686004639, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 66900 + }, + { + "epoch": 440.19736842105266, + "grad_norm": 1.2643394470214844, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 66910 + }, + { + "epoch": 440.2631578947368, + "grad_norm": 1.501151204109192, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 66920 + }, + { + "epoch": 440.32894736842104, + "grad_norm": 1.387885570526123, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 66930 + }, + { + "epoch": 440.39473684210526, + "grad_norm": 1.3784682750701904, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 66940 + }, + { + "epoch": 440.4605263157895, + "grad_norm": 0.9655961394309998, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 66950 + }, + { + "epoch": 440.5263157894737, + "grad_norm": 0.8454356789588928, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 66960 + }, + { + "epoch": 440.5921052631579, + "grad_norm": 1.001245141029358, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 66970 + }, + { + "epoch": 440.6578947368421, + "grad_norm": 1.1278152465820312, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 66980 + }, + { + "epoch": 440.7236842105263, + "grad_norm": 0.9985861778259277, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 66990 + }, + { + "epoch": 440.7894736842105, + "grad_norm": 1.476648211479187, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 67000 + }, + { + "epoch": 440.85526315789474, + "grad_norm": 1.0903433561325073, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 67010 + }, + { + "epoch": 440.92105263157896, + "grad_norm": 1.2222623825073242, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 67020 + }, + { + "epoch": 440.9868421052632, + "grad_norm": 1.298284888267517, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 67030 + }, + { + "epoch": 441.05263157894734, + "grad_norm": 1.4199997186660767, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 67040 + }, + { + "epoch": 441.11842105263156, + "grad_norm": 0.9580684900283813, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 67050 + }, + { + "epoch": 441.1842105263158, + "grad_norm": 1.2852739095687866, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 67060 + }, + { + "epoch": 441.25, + "grad_norm": 0.9127504825592041, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 67070 + }, + { + "epoch": 441.3157894736842, + "grad_norm": 1.0054773092269897, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 67080 + }, + { + "epoch": 441.38157894736844, + "grad_norm": 1.0165013074874878, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 67090 + }, + { + "epoch": 441.44736842105266, + "grad_norm": 1.144066572189331, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 67100 + }, + { + "epoch": 441.5131578947368, + "grad_norm": 0.9844335913658142, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 67110 + }, + { + "epoch": 441.57894736842104, + "grad_norm": 1.0714805126190186, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 67120 + }, + { + "epoch": 441.64473684210526, + "grad_norm": 1.3069833517074585, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 67130 + }, + { + "epoch": 441.7105263157895, + "grad_norm": 1.189154028892517, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 67140 + }, + { + "epoch": 441.7763157894737, + "grad_norm": 1.2548660039901733, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 67150 + }, + { + "epoch": 441.8421052631579, + "grad_norm": 1.6267309188842773, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 67160 + }, + { + "epoch": 441.9078947368421, + "grad_norm": 1.1438740491867065, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 67170 + }, + { + "epoch": 441.9736842105263, + "grad_norm": 1.1230552196502686, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 67180 + }, + { + "epoch": 442.0394736842105, + "grad_norm": 1.0231391191482544, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 67190 + }, + { + "epoch": 442.10526315789474, + "grad_norm": 1.1591103076934814, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 67200 + }, + { + "epoch": 442.17105263157896, + "grad_norm": 1.3125256299972534, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 67210 + }, + { + "epoch": 442.2368421052632, + "grad_norm": 0.9967691898345947, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 67220 + }, + { + "epoch": 442.30263157894734, + "grad_norm": 1.2353827953338623, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 67230 + }, + { + "epoch": 442.36842105263156, + "grad_norm": 0.9676706194877625, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 67240 + }, + { + "epoch": 442.4342105263158, + "grad_norm": 0.89171302318573, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 67250 + }, + { + "epoch": 442.5, + "grad_norm": 0.8268073797225952, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 67260 + }, + { + "epoch": 442.5657894736842, + "grad_norm": 0.8292796015739441, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 67270 + }, + { + "epoch": 442.63157894736844, + "grad_norm": 1.1891852617263794, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 67280 + }, + { + "epoch": 442.69736842105266, + "grad_norm": 1.0386085510253906, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 67290 + }, + { + "epoch": 442.7631578947368, + "grad_norm": 1.4309957027435303, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 67300 + }, + { + "epoch": 442.82894736842104, + "grad_norm": 1.046149730682373, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 67310 + }, + { + "epoch": 442.89473684210526, + "grad_norm": 1.2012232542037964, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 67320 + }, + { + "epoch": 442.9605263157895, + "grad_norm": 0.963539183139801, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 67330 + }, + { + "epoch": 443.0263157894737, + "grad_norm": 0.8693314790725708, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 67340 + }, + { + "epoch": 443.0921052631579, + "grad_norm": 1.503976821899414, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 67350 + }, + { + "epoch": 443.1578947368421, + "grad_norm": 0.8034859895706177, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 67360 + }, + { + "epoch": 443.2236842105263, + "grad_norm": 1.4391164779663086, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 67370 + }, + { + "epoch": 443.2894736842105, + "grad_norm": 1.2415558099746704, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 67380 + }, + { + "epoch": 443.35526315789474, + "grad_norm": 1.0258815288543701, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 67390 + }, + { + "epoch": 443.42105263157896, + "grad_norm": 0.7889366149902344, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 67400 + }, + { + "epoch": 443.4868421052632, + "grad_norm": 0.8310263752937317, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 67410 + }, + { + "epoch": 443.55263157894734, + "grad_norm": 1.102107286453247, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 67420 + }, + { + "epoch": 443.61842105263156, + "grad_norm": 0.891818106174469, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 67430 + }, + { + "epoch": 443.6842105263158, + "grad_norm": 1.2549525499343872, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 67440 + }, + { + "epoch": 443.75, + "grad_norm": 1.475273847579956, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 67450 + }, + { + "epoch": 443.8157894736842, + "grad_norm": 1.5199368000030518, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 67460 + }, + { + "epoch": 443.88157894736844, + "grad_norm": 1.0391242504119873, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 67470 + }, + { + "epoch": 443.94736842105266, + "grad_norm": 1.2882357835769653, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 67480 + }, + { + "epoch": 444.0131578947368, + "grad_norm": 1.4327231645584106, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 67490 + }, + { + "epoch": 444.07894736842104, + "grad_norm": 1.25886869430542, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 67500 + }, + { + "epoch": 444.14473684210526, + "grad_norm": 0.9697027802467346, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 67510 + }, + { + "epoch": 444.2105263157895, + "grad_norm": 1.3872424364089966, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 67520 + }, + { + "epoch": 444.2763157894737, + "grad_norm": 1.1043860912322998, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 67530 + }, + { + "epoch": 444.3421052631579, + "grad_norm": 1.202193021774292, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 67540 + }, + { + "epoch": 444.4078947368421, + "grad_norm": 1.0167324542999268, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 67550 + }, + { + "epoch": 444.4736842105263, + "grad_norm": 0.9748640656471252, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 67560 + }, + { + "epoch": 444.5394736842105, + "grad_norm": 1.1496931314468384, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 67570 + }, + { + "epoch": 444.60526315789474, + "grad_norm": 1.193697452545166, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 67580 + }, + { + "epoch": 444.67105263157896, + "grad_norm": 1.2025853395462036, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 67590 + }, + { + "epoch": 444.7368421052632, + "grad_norm": 0.9726876020431519, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 67600 + }, + { + "epoch": 444.80263157894734, + "grad_norm": 0.8311697840690613, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 67610 + }, + { + "epoch": 444.86842105263156, + "grad_norm": 0.8062876462936401, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 67620 + }, + { + "epoch": 444.9342105263158, + "grad_norm": 0.8064841628074646, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 67630 + }, + { + "epoch": 445.0, + "grad_norm": 0.9301232695579529, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 67640 + }, + { + "epoch": 445.0657894736842, + "grad_norm": 1.1378428936004639, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 67650 + }, + { + "epoch": 445.13157894736844, + "grad_norm": 1.1521905660629272, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 67660 + }, + { + "epoch": 445.19736842105266, + "grad_norm": 1.2799955606460571, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 67670 + }, + { + "epoch": 445.2631578947368, + "grad_norm": 1.008537769317627, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 67680 + }, + { + "epoch": 445.32894736842104, + "grad_norm": 1.2457149028778076, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 67690 + }, + { + "epoch": 445.39473684210526, + "grad_norm": 1.1175915002822876, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 67700 + }, + { + "epoch": 445.4605263157895, + "grad_norm": 1.0198794603347778, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 67710 + }, + { + "epoch": 445.5263157894737, + "grad_norm": 1.1624159812927246, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 67720 + }, + { + "epoch": 445.5921052631579, + "grad_norm": 1.1959896087646484, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 67730 + }, + { + "epoch": 445.6578947368421, + "grad_norm": 1.2008717060089111, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 67740 + }, + { + "epoch": 445.7236842105263, + "grad_norm": 1.321126937866211, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 67750 + }, + { + "epoch": 445.7894736842105, + "grad_norm": 1.1660131216049194, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 67760 + }, + { + "epoch": 445.85526315789474, + "grad_norm": 1.2999013662338257, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 67770 + }, + { + "epoch": 445.92105263157896, + "grad_norm": 0.8440237045288086, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 67780 + }, + { + "epoch": 445.9868421052632, + "grad_norm": 1.0197548866271973, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 67790 + }, + { + "epoch": 446.05263157894734, + "grad_norm": 1.0732282400131226, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 67800 + }, + { + "epoch": 446.11842105263156, + "grad_norm": 1.2190271615982056, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 67810 + }, + { + "epoch": 446.1842105263158, + "grad_norm": 1.098029375076294, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 67820 + }, + { + "epoch": 446.25, + "grad_norm": 1.064057469367981, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 67830 + }, + { + "epoch": 446.3157894736842, + "grad_norm": 1.0440378189086914, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 67840 + }, + { + "epoch": 446.38157894736844, + "grad_norm": 1.2264587879180908, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 67850 + }, + { + "epoch": 446.44736842105266, + "grad_norm": 0.7877911925315857, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 67860 + }, + { + "epoch": 446.5131578947368, + "grad_norm": 1.3286290168762207, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 67870 + }, + { + "epoch": 446.57894736842104, + "grad_norm": 0.9419989585876465, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 67880 + }, + { + "epoch": 446.64473684210526, + "grad_norm": 1.018985629081726, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 67890 + }, + { + "epoch": 446.7105263157895, + "grad_norm": 0.9366230964660645, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 67900 + }, + { + "epoch": 446.7763157894737, + "grad_norm": 0.7390005588531494, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 67910 + }, + { + "epoch": 446.8421052631579, + "grad_norm": 0.7992033362388611, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 67920 + }, + { + "epoch": 446.9078947368421, + "grad_norm": 1.0633325576782227, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 67930 + }, + { + "epoch": 446.9736842105263, + "grad_norm": 1.1128759384155273, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 67940 + }, + { + "epoch": 447.0394736842105, + "grad_norm": 1.405713677406311, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 67950 + }, + { + "epoch": 447.10526315789474, + "grad_norm": 1.0440222024917603, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 67960 + }, + { + "epoch": 447.17105263157896, + "grad_norm": 1.1741487979888916, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 67970 + }, + { + "epoch": 447.2368421052632, + "grad_norm": 1.0820094347000122, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 67980 + }, + { + "epoch": 447.30263157894734, + "grad_norm": 1.0338839292526245, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 67990 + }, + { + "epoch": 447.36842105263156, + "grad_norm": 1.3230096101760864, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 68000 + }, + { + "epoch": 447.4342105263158, + "grad_norm": 0.8464519381523132, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 68010 + }, + { + "epoch": 447.5, + "grad_norm": 1.3465580940246582, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 68020 + }, + { + "epoch": 447.5657894736842, + "grad_norm": 1.2503917217254639, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 68030 + }, + { + "epoch": 447.63157894736844, + "grad_norm": 1.2269294261932373, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 68040 + }, + { + "epoch": 447.69736842105266, + "grad_norm": 0.9101136922836304, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 68050 + }, + { + "epoch": 447.7631578947368, + "grad_norm": 1.1109895706176758, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 68060 + }, + { + "epoch": 447.82894736842104, + "grad_norm": 1.2800877094268799, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 68070 + }, + { + "epoch": 447.89473684210526, + "grad_norm": 1.3578050136566162, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 68080 + }, + { + "epoch": 447.9605263157895, + "grad_norm": 1.1380995512008667, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 68090 + }, + { + "epoch": 448.0263157894737, + "grad_norm": 1.510892629623413, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 68100 + }, + { + "epoch": 448.0921052631579, + "grad_norm": 1.1936112642288208, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 68110 + }, + { + "epoch": 448.1578947368421, + "grad_norm": 1.3569926023483276, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 68120 + }, + { + "epoch": 448.2236842105263, + "grad_norm": 1.0964100360870361, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 68130 + }, + { + "epoch": 448.2894736842105, + "grad_norm": 0.7216523289680481, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 68140 + }, + { + "epoch": 448.35526315789474, + "grad_norm": 1.2212159633636475, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 68150 + }, + { + "epoch": 448.42105263157896, + "grad_norm": 1.0280985832214355, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 68160 + }, + { + "epoch": 448.4868421052632, + "grad_norm": 1.3027311563491821, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 68170 + }, + { + "epoch": 448.55263157894734, + "grad_norm": 1.462243914604187, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 68180 + }, + { + "epoch": 448.61842105263156, + "grad_norm": 1.231683373451233, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 68190 + }, + { + "epoch": 448.6842105263158, + "grad_norm": 1.2947146892547607, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 68200 + }, + { + "epoch": 448.75, + "grad_norm": 1.0188627243041992, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 68210 + }, + { + "epoch": 448.8157894736842, + "grad_norm": 1.1780815124511719, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 68220 + }, + { + "epoch": 448.88157894736844, + "grad_norm": 1.1166077852249146, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 68230 + }, + { + "epoch": 448.94736842105266, + "grad_norm": 0.9867680668830872, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 68240 + }, + { + "epoch": 449.0131578947368, + "grad_norm": 1.014134168624878, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 68250 + }, + { + "epoch": 449.07894736842104, + "grad_norm": 0.8704817891120911, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 68260 + }, + { + "epoch": 449.14473684210526, + "grad_norm": 1.1056900024414062, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 68270 + }, + { + "epoch": 449.2105263157895, + "grad_norm": 1.131632924079895, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 68280 + }, + { + "epoch": 449.2763157894737, + "grad_norm": 1.3262722492218018, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 68290 + }, + { + "epoch": 449.3421052631579, + "grad_norm": 1.328003168106079, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 68300 + }, + { + "epoch": 449.4078947368421, + "grad_norm": 1.1644665002822876, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 68310 + }, + { + "epoch": 449.4736842105263, + "grad_norm": 1.419346809387207, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 68320 + }, + { + "epoch": 449.5394736842105, + "grad_norm": 0.9281734228134155, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 68330 + }, + { + "epoch": 449.60526315789474, + "grad_norm": 1.394044280052185, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 68340 + }, + { + "epoch": 449.67105263157896, + "grad_norm": 1.435802936553955, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 68350 + }, + { + "epoch": 449.7368421052632, + "grad_norm": 1.2658971548080444, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 68360 + }, + { + "epoch": 449.80263157894734, + "grad_norm": 1.4946037530899048, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 68370 + }, + { + "epoch": 449.86842105263156, + "grad_norm": 0.7400012016296387, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 68380 + }, + { + "epoch": 449.9342105263158, + "grad_norm": 1.1774287223815918, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 68390 + }, + { + "epoch": 450.0, + "grad_norm": 1.2723482847213745, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 68400 + }, + { + "epoch": 450.0657894736842, + "grad_norm": 1.0337828397750854, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 68410 + }, + { + "epoch": 450.13157894736844, + "grad_norm": 0.836054265499115, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 68420 + }, + { + "epoch": 450.19736842105266, + "grad_norm": 1.1785285472869873, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 68430 + }, + { + "epoch": 450.2631578947368, + "grad_norm": 1.1145614385604858, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 68440 + }, + { + "epoch": 450.32894736842104, + "grad_norm": 1.3522475957870483, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 68450 + }, + { + "epoch": 450.39473684210526, + "grad_norm": 1.1406996250152588, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 68460 + }, + { + "epoch": 450.4605263157895, + "grad_norm": 1.2381994724273682, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 68470 + }, + { + "epoch": 450.5263157894737, + "grad_norm": 1.4865307807922363, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 68480 + }, + { + "epoch": 450.5921052631579, + "grad_norm": 1.2501206398010254, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 68490 + }, + { + "epoch": 450.6578947368421, + "grad_norm": 1.3360950946807861, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 68500 + }, + { + "epoch": 450.7236842105263, + "grad_norm": 1.5637435913085938, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 68510 + }, + { + "epoch": 450.7894736842105, + "grad_norm": 1.383474349975586, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 68520 + }, + { + "epoch": 450.85526315789474, + "grad_norm": 1.3459497690200806, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 68530 + }, + { + "epoch": 450.92105263157896, + "grad_norm": 0.9514352679252625, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 68540 + }, + { + "epoch": 450.9868421052632, + "grad_norm": 1.1830934286117554, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 68550 + }, + { + "epoch": 451.05263157894734, + "grad_norm": 1.131381630897522, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 68560 + }, + { + "epoch": 451.11842105263156, + "grad_norm": 0.9725364446640015, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 68570 + }, + { + "epoch": 451.1842105263158, + "grad_norm": 1.4071309566497803, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 68580 + }, + { + "epoch": 451.25, + "grad_norm": 1.218841552734375, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 68590 + }, + { + "epoch": 451.3157894736842, + "grad_norm": 1.2471004724502563, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 68600 + }, + { + "epoch": 451.38157894736844, + "grad_norm": 1.2200270891189575, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 68610 + }, + { + "epoch": 451.44736842105266, + "grad_norm": 0.7983037233352661, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 68620 + }, + { + "epoch": 451.5131578947368, + "grad_norm": 1.0153566598892212, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 68630 + }, + { + "epoch": 451.57894736842104, + "grad_norm": 0.8792093396186829, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 68640 + }, + { + "epoch": 451.64473684210526, + "grad_norm": 1.1908783912658691, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 68650 + }, + { + "epoch": 451.7105263157895, + "grad_norm": 1.2272428274154663, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 68660 + }, + { + "epoch": 451.7763157894737, + "grad_norm": 0.9203829765319824, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 68670 + }, + { + "epoch": 451.8421052631579, + "grad_norm": 1.4180694818496704, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 68680 + }, + { + "epoch": 451.9078947368421, + "grad_norm": 0.8548322319984436, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 68690 + }, + { + "epoch": 451.9736842105263, + "grad_norm": 1.259071946144104, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 68700 + }, + { + "epoch": 452.0394736842105, + "grad_norm": 1.4840049743652344, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 68710 + }, + { + "epoch": 452.10526315789474, + "grad_norm": 0.9539170861244202, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 68720 + }, + { + "epoch": 452.17105263157896, + "grad_norm": 1.287971019744873, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 68730 + }, + { + "epoch": 452.2368421052632, + "grad_norm": 1.5346850156784058, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 68740 + }, + { + "epoch": 452.30263157894734, + "grad_norm": 1.4423147439956665, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 68750 + }, + { + "epoch": 452.36842105263156, + "grad_norm": 1.0236411094665527, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 68760 + }, + { + "epoch": 452.4342105263158, + "grad_norm": 0.9541664719581604, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 68770 + }, + { + "epoch": 452.5, + "grad_norm": 0.7958992719650269, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 68780 + }, + { + "epoch": 452.5657894736842, + "grad_norm": 1.2157082557678223, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 68790 + }, + { + "epoch": 452.63157894736844, + "grad_norm": 0.8588937520980835, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 68800 + }, + { + "epoch": 452.69736842105266, + "grad_norm": 1.442825436592102, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 68810 + }, + { + "epoch": 452.7631578947368, + "grad_norm": 1.1870979070663452, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 68820 + }, + { + "epoch": 452.82894736842104, + "grad_norm": 1.28781259059906, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 68830 + }, + { + "epoch": 452.89473684210526, + "grad_norm": 0.9649443030357361, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 68840 + }, + { + "epoch": 452.9605263157895, + "grad_norm": 0.9508198499679565, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 68850 + }, + { + "epoch": 453.0263157894737, + "grad_norm": 1.5486016273498535, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 68860 + }, + { + "epoch": 453.0921052631579, + "grad_norm": 1.217083215713501, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 68870 + }, + { + "epoch": 453.1578947368421, + "grad_norm": 1.2917876243591309, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 68880 + }, + { + "epoch": 453.2236842105263, + "grad_norm": 1.0706918239593506, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 68890 + }, + { + "epoch": 453.2894736842105, + "grad_norm": 1.453224778175354, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 68900 + }, + { + "epoch": 453.35526315789474, + "grad_norm": 1.209774374961853, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 68910 + }, + { + "epoch": 453.42105263157896, + "grad_norm": 0.9678460955619812, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 68920 + }, + { + "epoch": 453.4868421052632, + "grad_norm": 1.2222375869750977, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 68930 + }, + { + "epoch": 453.55263157894734, + "grad_norm": 0.9485657215118408, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 68940 + }, + { + "epoch": 453.61842105263156, + "grad_norm": 1.2582299709320068, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 68950 + }, + { + "epoch": 453.6842105263158, + "grad_norm": 1.2892476320266724, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 68960 + }, + { + "epoch": 453.75, + "grad_norm": 1.3842599391937256, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 68970 + }, + { + "epoch": 453.8157894736842, + "grad_norm": 1.095976710319519, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 68980 + }, + { + "epoch": 453.88157894736844, + "grad_norm": 1.2450053691864014, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 68990 + }, + { + "epoch": 453.94736842105266, + "grad_norm": 0.8790797591209412, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 69000 + }, + { + "epoch": 454.0131578947368, + "grad_norm": 1.2739403247833252, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 69010 + }, + { + "epoch": 454.07894736842104, + "grad_norm": 1.2191137075424194, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 69020 + }, + { + "epoch": 454.14473684210526, + "grad_norm": 1.4736047983169556, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 69030 + }, + { + "epoch": 454.2105263157895, + "grad_norm": 0.9068169593811035, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 69040 + }, + { + "epoch": 454.2763157894737, + "grad_norm": 1.5603197813034058, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 69050 + }, + { + "epoch": 454.3421052631579, + "grad_norm": 1.4251413345336914, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 69060 + }, + { + "epoch": 454.4078947368421, + "grad_norm": 1.295698642730713, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 69070 + }, + { + "epoch": 454.4736842105263, + "grad_norm": 1.536226511001587, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 69080 + }, + { + "epoch": 454.5394736842105, + "grad_norm": 1.6740570068359375, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 69090 + }, + { + "epoch": 454.60526315789474, + "grad_norm": 1.091256856918335, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 69100 + }, + { + "epoch": 454.67105263157896, + "grad_norm": 0.9562748670578003, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 69110 + }, + { + "epoch": 454.7368421052632, + "grad_norm": 1.3585184812545776, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 69120 + }, + { + "epoch": 454.80263157894734, + "grad_norm": 1.2994377613067627, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 69130 + }, + { + "epoch": 454.86842105263156, + "grad_norm": 1.152564525604248, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 69140 + }, + { + "epoch": 454.9342105263158, + "grad_norm": 1.6018366813659668, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 69150 + }, + { + "epoch": 455.0, + "grad_norm": 1.2731026411056519, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 69160 + }, + { + "epoch": 455.0657894736842, + "grad_norm": 1.0202285051345825, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 69170 + }, + { + "epoch": 455.13157894736844, + "grad_norm": 1.0699840784072876, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 69180 + }, + { + "epoch": 455.19736842105266, + "grad_norm": 1.36112380027771, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 69190 + }, + { + "epoch": 455.2631578947368, + "grad_norm": 1.1160879135131836, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 69200 + }, + { + "epoch": 455.32894736842104, + "grad_norm": 1.1175243854522705, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 69210 + }, + { + "epoch": 455.39473684210526, + "grad_norm": 1.1258723735809326, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 69220 + }, + { + "epoch": 455.4605263157895, + "grad_norm": 1.0541523694992065, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 69230 + }, + { + "epoch": 455.5263157894737, + "grad_norm": 0.8870919942855835, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 69240 + }, + { + "epoch": 455.5921052631579, + "grad_norm": 1.1106444597244263, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 69250 + }, + { + "epoch": 455.6578947368421, + "grad_norm": 1.2560456991195679, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 69260 + }, + { + "epoch": 455.7236842105263, + "grad_norm": 0.831874668598175, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 69270 + }, + { + "epoch": 455.7894736842105, + "grad_norm": 1.3205691576004028, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 69280 + }, + { + "epoch": 455.85526315789474, + "grad_norm": 1.1079440116882324, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 69290 + }, + { + "epoch": 455.92105263157896, + "grad_norm": 1.0580300092697144, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 69300 + }, + { + "epoch": 455.9868421052632, + "grad_norm": 1.5250871181488037, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 69310 + }, + { + "epoch": 456.05263157894734, + "grad_norm": 1.4987032413482666, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 69320 + }, + { + "epoch": 456.11842105263156, + "grad_norm": 1.686471939086914, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 69330 + }, + { + "epoch": 456.1842105263158, + "grad_norm": 1.4890836477279663, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 69340 + }, + { + "epoch": 456.25, + "grad_norm": 1.3004556894302368, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 69350 + }, + { + "epoch": 456.3157894736842, + "grad_norm": 1.3867583274841309, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 69360 + }, + { + "epoch": 456.38157894736844, + "grad_norm": 1.4305163621902466, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 69370 + }, + { + "epoch": 456.44736842105266, + "grad_norm": 0.9518430233001709, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 69380 + }, + { + "epoch": 456.5131578947368, + "grad_norm": 1.2672168016433716, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 69390 + }, + { + "epoch": 456.57894736842104, + "grad_norm": 1.313014030456543, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 69400 + }, + { + "epoch": 456.64473684210526, + "grad_norm": 0.7348408699035645, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 69410 + }, + { + "epoch": 456.7105263157895, + "grad_norm": 1.0799474716186523, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 69420 + }, + { + "epoch": 456.7763157894737, + "grad_norm": 0.9113180637359619, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 69430 + }, + { + "epoch": 456.8421052631579, + "grad_norm": 0.8408834934234619, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 69440 + }, + { + "epoch": 456.9078947368421, + "grad_norm": 0.9288105964660645, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 69450 + }, + { + "epoch": 456.9736842105263, + "grad_norm": 1.152346134185791, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 69460 + }, + { + "epoch": 457.0394736842105, + "grad_norm": 1.110255241394043, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 69470 + }, + { + "epoch": 457.10526315789474, + "grad_norm": 0.9234119057655334, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 69480 + }, + { + "epoch": 457.17105263157896, + "grad_norm": 1.131162405014038, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 69490 + }, + { + "epoch": 457.2368421052632, + "grad_norm": 1.2793774604797363, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 69500 + }, + { + "epoch": 457.30263157894734, + "grad_norm": 0.9325578808784485, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 69510 + }, + { + "epoch": 457.36842105263156, + "grad_norm": 0.8188279867172241, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 69520 + }, + { + "epoch": 457.4342105263158, + "grad_norm": 1.163884162902832, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 69530 + }, + { + "epoch": 457.5, + "grad_norm": 0.8260605335235596, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 69540 + }, + { + "epoch": 457.5657894736842, + "grad_norm": 0.9177809357643127, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 69550 + }, + { + "epoch": 457.63157894736844, + "grad_norm": 1.1885771751403809, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 69560 + }, + { + "epoch": 457.69736842105266, + "grad_norm": 0.9311556816101074, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 69570 + }, + { + "epoch": 457.7631578947368, + "grad_norm": 1.2804585695266724, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 69580 + }, + { + "epoch": 457.82894736842104, + "grad_norm": 1.344652771949768, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 69590 + }, + { + "epoch": 457.89473684210526, + "grad_norm": 1.4862884283065796, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 69600 + }, + { + "epoch": 457.9605263157895, + "grad_norm": 1.1936073303222656, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 69610 + }, + { + "epoch": 458.0263157894737, + "grad_norm": 1.193771243095398, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 69620 + }, + { + "epoch": 458.0921052631579, + "grad_norm": 1.1889840364456177, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 69630 + }, + { + "epoch": 458.1578947368421, + "grad_norm": 1.3708604574203491, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 69640 + }, + { + "epoch": 458.2236842105263, + "grad_norm": 0.5873807072639465, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 69650 + }, + { + "epoch": 458.2894736842105, + "grad_norm": 1.2545485496520996, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 69660 + }, + { + "epoch": 458.35526315789474, + "grad_norm": 1.2837920188903809, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 69670 + }, + { + "epoch": 458.42105263157896, + "grad_norm": 1.2512160539627075, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 69680 + }, + { + "epoch": 458.4868421052632, + "grad_norm": 0.9939183592796326, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 69690 + }, + { + "epoch": 458.55263157894734, + "grad_norm": 0.8284690380096436, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 69700 + }, + { + "epoch": 458.61842105263156, + "grad_norm": 1.1884512901306152, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 69710 + }, + { + "epoch": 458.6842105263158, + "grad_norm": 1.069785475730896, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 69720 + }, + { + "epoch": 458.75, + "grad_norm": 1.294502854347229, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 69730 + }, + { + "epoch": 458.8157894736842, + "grad_norm": 1.4734914302825928, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 69740 + }, + { + "epoch": 458.88157894736844, + "grad_norm": 0.9889464974403381, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 69750 + }, + { + "epoch": 458.94736842105266, + "grad_norm": 1.268761396408081, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 69760 + }, + { + "epoch": 459.0131578947368, + "grad_norm": 1.004045009613037, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 69770 + }, + { + "epoch": 459.07894736842104, + "grad_norm": 0.9832544326782227, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 69780 + }, + { + "epoch": 459.14473684210526, + "grad_norm": 1.2626286745071411, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 69790 + }, + { + "epoch": 459.2105263157895, + "grad_norm": 1.5737202167510986, + "learning_rate": 0.0001, + "loss": 0.0166, + "step": 69800 + }, + { + "epoch": 459.2763157894737, + "grad_norm": 1.1485472917556763, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 69810 + }, + { + "epoch": 459.3421052631579, + "grad_norm": 0.9378661513328552, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 69820 + }, + { + "epoch": 459.4078947368421, + "grad_norm": 0.9889057874679565, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 69830 + }, + { + "epoch": 459.4736842105263, + "grad_norm": 0.7639946341514587, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 69840 + }, + { + "epoch": 459.5394736842105, + "grad_norm": 1.084297776222229, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 69850 + }, + { + "epoch": 459.60526315789474, + "grad_norm": 1.2830708026885986, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 69860 + }, + { + "epoch": 459.67105263157896, + "grad_norm": 0.9912347793579102, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 69870 + }, + { + "epoch": 459.7368421052632, + "grad_norm": 1.7279759645462036, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 69880 + }, + { + "epoch": 459.80263157894734, + "grad_norm": 1.2045061588287354, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 69890 + }, + { + "epoch": 459.86842105263156, + "grad_norm": 1.4160776138305664, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 69900 + }, + { + "epoch": 459.9342105263158, + "grad_norm": 0.9676499366760254, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 69910 + }, + { + "epoch": 460.0, + "grad_norm": 0.8560894131660461, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 69920 + }, + { + "epoch": 460.0657894736842, + "grad_norm": 1.248684287071228, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 69930 + }, + { + "epoch": 460.13157894736844, + "grad_norm": 0.8064592480659485, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 69940 + }, + { + "epoch": 460.19736842105266, + "grad_norm": 1.0934714078903198, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 69950 + }, + { + "epoch": 460.2631578947368, + "grad_norm": 1.0918971300125122, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 69960 + }, + { + "epoch": 460.32894736842104, + "grad_norm": 1.164820909500122, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 69970 + }, + { + "epoch": 460.39473684210526, + "grad_norm": 0.8863160610198975, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 69980 + }, + { + "epoch": 460.4605263157895, + "grad_norm": 1.3123438358306885, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 69990 + }, + { + "epoch": 460.5263157894737, + "grad_norm": 1.3433982133865356, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 70000 + }, + { + "epoch": 460.5921052631579, + "grad_norm": 1.089854121208191, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 70010 + }, + { + "epoch": 460.6578947368421, + "grad_norm": 1.2574924230575562, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 70020 + }, + { + "epoch": 460.7236842105263, + "grad_norm": 1.087710976600647, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 70030 + }, + { + "epoch": 460.7894736842105, + "grad_norm": 1.1236116886138916, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 70040 + }, + { + "epoch": 460.85526315789474, + "grad_norm": 1.117353081703186, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 70050 + }, + { + "epoch": 460.92105263157896, + "grad_norm": 1.1128649711608887, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 70060 + }, + { + "epoch": 460.9868421052632, + "grad_norm": 1.0730403661727905, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 70070 + }, + { + "epoch": 461.05263157894734, + "grad_norm": 1.179450273513794, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 70080 + }, + { + "epoch": 461.11842105263156, + "grad_norm": 1.2371575832366943, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 70090 + }, + { + "epoch": 461.1842105263158, + "grad_norm": 1.42441725730896, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 70100 + }, + { + "epoch": 461.25, + "grad_norm": 1.179229497909546, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 70110 + }, + { + "epoch": 461.3157894736842, + "grad_norm": 0.8924153447151184, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 70120 + }, + { + "epoch": 461.38157894736844, + "grad_norm": 1.3798414468765259, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 70130 + }, + { + "epoch": 461.44736842105266, + "grad_norm": 1.3410454988479614, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 70140 + }, + { + "epoch": 461.5131578947368, + "grad_norm": 0.9191851019859314, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 70150 + }, + { + "epoch": 461.57894736842104, + "grad_norm": 1.0787506103515625, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 70160 + }, + { + "epoch": 461.64473684210526, + "grad_norm": 1.022510290145874, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 70170 + }, + { + "epoch": 461.7105263157895, + "grad_norm": 1.0433993339538574, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 70180 + }, + { + "epoch": 461.7763157894737, + "grad_norm": 1.2744972705841064, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 70190 + }, + { + "epoch": 461.8421052631579, + "grad_norm": 1.1916465759277344, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 70200 + }, + { + "epoch": 461.9078947368421, + "grad_norm": 1.2578779458999634, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 70210 + }, + { + "epoch": 461.9736842105263, + "grad_norm": 1.0983797311782837, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 70220 + }, + { + "epoch": 462.0394736842105, + "grad_norm": 1.3907415866851807, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 70230 + }, + { + "epoch": 462.10526315789474, + "grad_norm": 1.0465130805969238, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 70240 + }, + { + "epoch": 462.17105263157896, + "grad_norm": 1.1414015293121338, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 70250 + }, + { + "epoch": 462.2368421052632, + "grad_norm": 1.4109493494033813, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 70260 + }, + { + "epoch": 462.30263157894734, + "grad_norm": 1.1486443281173706, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 70270 + }, + { + "epoch": 462.36842105263156, + "grad_norm": 1.240187168121338, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 70280 + }, + { + "epoch": 462.4342105263158, + "grad_norm": 1.2985851764678955, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 70290 + }, + { + "epoch": 462.5, + "grad_norm": 1.3922537565231323, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 70300 + }, + { + "epoch": 462.5657894736842, + "grad_norm": 1.1055222749710083, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 70310 + }, + { + "epoch": 462.63157894736844, + "grad_norm": 1.1833173036575317, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 70320 + }, + { + "epoch": 462.69736842105266, + "grad_norm": 1.226995587348938, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 70330 + }, + { + "epoch": 462.7631578947368, + "grad_norm": 1.0808686017990112, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 70340 + }, + { + "epoch": 462.82894736842104, + "grad_norm": 1.443953275680542, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 70350 + }, + { + "epoch": 462.89473684210526, + "grad_norm": 1.205458402633667, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 70360 + }, + { + "epoch": 462.9605263157895, + "grad_norm": 1.6020748615264893, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 70370 + }, + { + "epoch": 463.0263157894737, + "grad_norm": 1.2139602899551392, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 70380 + }, + { + "epoch": 463.0921052631579, + "grad_norm": 1.0997470617294312, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 70390 + }, + { + "epoch": 463.1578947368421, + "grad_norm": 0.7857748866081238, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 70400 + }, + { + "epoch": 463.2236842105263, + "grad_norm": 1.3937057256698608, + "learning_rate": 0.0001, + "loss": 0.0151, + "step": 70410 + }, + { + "epoch": 463.2894736842105, + "grad_norm": 1.2709908485412598, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 70420 + }, + { + "epoch": 463.35526315789474, + "grad_norm": 1.2485954761505127, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 70430 + }, + { + "epoch": 463.42105263157896, + "grad_norm": 1.241417646408081, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 70440 + }, + { + "epoch": 463.4868421052632, + "grad_norm": 1.4835032224655151, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 70450 + }, + { + "epoch": 463.55263157894734, + "grad_norm": 1.4047082662582397, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 70460 + }, + { + "epoch": 463.61842105263156, + "grad_norm": 1.533784031867981, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 70470 + }, + { + "epoch": 463.6842105263158, + "grad_norm": 1.3087002038955688, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 70480 + }, + { + "epoch": 463.75, + "grad_norm": 1.1302224397659302, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 70490 + }, + { + "epoch": 463.8157894736842, + "grad_norm": 0.9580501317977905, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 70500 + }, + { + "epoch": 463.88157894736844, + "grad_norm": 0.8459773063659668, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 70510 + }, + { + "epoch": 463.94736842105266, + "grad_norm": 1.1055728197097778, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 70520 + }, + { + "epoch": 464.0131578947368, + "grad_norm": 1.2454581260681152, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 70530 + }, + { + "epoch": 464.07894736842104, + "grad_norm": 1.3780239820480347, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 70540 + }, + { + "epoch": 464.14473684210526, + "grad_norm": 1.2484023571014404, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 70550 + }, + { + "epoch": 464.2105263157895, + "grad_norm": 1.0500701665878296, + "learning_rate": 0.0001, + "loss": 0.0161, + "step": 70560 + }, + { + "epoch": 464.2763157894737, + "grad_norm": 0.9749228358268738, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 70570 + }, + { + "epoch": 464.3421052631579, + "grad_norm": 1.0054138898849487, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 70580 + }, + { + "epoch": 464.4078947368421, + "grad_norm": 1.2907658815383911, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 70590 + }, + { + "epoch": 464.4736842105263, + "grad_norm": 1.1337512731552124, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 70600 + }, + { + "epoch": 464.5394736842105, + "grad_norm": 1.3894444704055786, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 70610 + }, + { + "epoch": 464.60526315789474, + "grad_norm": 1.5000969171524048, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 70620 + }, + { + "epoch": 464.67105263157896, + "grad_norm": 1.1303669214248657, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 70630 + }, + { + "epoch": 464.7368421052632, + "grad_norm": 0.8212418556213379, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 70640 + }, + { + "epoch": 464.80263157894734, + "grad_norm": 1.0030567646026611, + "learning_rate": 0.0001, + "loss": 0.016, + "step": 70650 + }, + { + "epoch": 464.86842105263156, + "grad_norm": 1.0976468324661255, + "learning_rate": 0.0001, + "loss": 0.0157, + "step": 70660 + }, + { + "epoch": 464.9342105263158, + "grad_norm": 1.2463607788085938, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 70670 + }, + { + "epoch": 465.0, + "grad_norm": 0.891297459602356, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 70680 + }, + { + "epoch": 465.0657894736842, + "grad_norm": 1.2342113256454468, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 70690 + }, + { + "epoch": 465.13157894736844, + "grad_norm": 0.8251991868019104, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 70700 + }, + { + "epoch": 465.19736842105266, + "grad_norm": 0.9566839337348938, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 70710 + }, + { + "epoch": 465.2631578947368, + "grad_norm": 0.8764354586601257, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 70720 + }, + { + "epoch": 465.32894736842104, + "grad_norm": 1.0923231840133667, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 70730 + }, + { + "epoch": 465.39473684210526, + "grad_norm": 1.1273107528686523, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 70740 + }, + { + "epoch": 465.4605263157895, + "grad_norm": 1.488062858581543, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 70750 + }, + { + "epoch": 465.5263157894737, + "grad_norm": 1.199041724205017, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 70760 + }, + { + "epoch": 465.5921052631579, + "grad_norm": 1.3205357789993286, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 70770 + }, + { + "epoch": 465.6578947368421, + "grad_norm": 1.4035863876342773, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 70780 + }, + { + "epoch": 465.7236842105263, + "grad_norm": 1.1187028884887695, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 70790 + }, + { + "epoch": 465.7894736842105, + "grad_norm": 1.0568621158599854, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 70800 + }, + { + "epoch": 465.85526315789474, + "grad_norm": 1.3467577695846558, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 70810 + }, + { + "epoch": 465.92105263157896, + "grad_norm": 1.3389008045196533, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 70820 + }, + { + "epoch": 465.9868421052632, + "grad_norm": 1.372363567352295, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 70830 + }, + { + "epoch": 466.05263157894734, + "grad_norm": 1.0468113422393799, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 70840 + }, + { + "epoch": 466.11842105263156, + "grad_norm": 0.9883679747581482, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 70850 + }, + { + "epoch": 466.1842105263158, + "grad_norm": 0.710003674030304, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 70860 + }, + { + "epoch": 466.25, + "grad_norm": 1.3106662034988403, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 70870 + }, + { + "epoch": 466.3157894736842, + "grad_norm": 1.022836685180664, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 70880 + }, + { + "epoch": 466.38157894736844, + "grad_norm": 1.1058964729309082, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 70890 + }, + { + "epoch": 466.44736842105266, + "grad_norm": 0.9566864371299744, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 70900 + }, + { + "epoch": 466.5131578947368, + "grad_norm": 1.0572712421417236, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 70910 + }, + { + "epoch": 466.57894736842104, + "grad_norm": 0.6852365136146545, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 70920 + }, + { + "epoch": 466.64473684210526, + "grad_norm": 0.9085026383399963, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 70930 + }, + { + "epoch": 466.7105263157895, + "grad_norm": 1.1825387477874756, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 70940 + }, + { + "epoch": 466.7763157894737, + "grad_norm": 1.0037176609039307, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 70950 + }, + { + "epoch": 466.8421052631579, + "grad_norm": 1.0735546350479126, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 70960 + }, + { + "epoch": 466.9078947368421, + "grad_norm": 1.3803229331970215, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 70970 + }, + { + "epoch": 466.9736842105263, + "grad_norm": 0.845694363117218, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 70980 + }, + { + "epoch": 467.0394736842105, + "grad_norm": 1.186300277709961, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 70990 + }, + { + "epoch": 467.10526315789474, + "grad_norm": 1.171091079711914, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 71000 + }, + { + "epoch": 467.17105263157896, + "grad_norm": 1.3238587379455566, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 71010 + }, + { + "epoch": 467.2368421052632, + "grad_norm": 1.0442191362380981, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 71020 + }, + { + "epoch": 467.30263157894734, + "grad_norm": 0.9936031103134155, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 71030 + }, + { + "epoch": 467.36842105263156, + "grad_norm": 0.8942242860794067, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 71040 + }, + { + "epoch": 467.4342105263158, + "grad_norm": 1.1458888053894043, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 71050 + }, + { + "epoch": 467.5, + "grad_norm": 1.038323163986206, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 71060 + }, + { + "epoch": 467.5657894736842, + "grad_norm": 1.2702325582504272, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 71070 + }, + { + "epoch": 467.63157894736844, + "grad_norm": 1.037888526916504, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 71080 + }, + { + "epoch": 467.69736842105266, + "grad_norm": 1.5120564699172974, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 71090 + }, + { + "epoch": 467.7631578947368, + "grad_norm": 1.0400851964950562, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 71100 + }, + { + "epoch": 467.82894736842104, + "grad_norm": 1.0576149225234985, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 71110 + }, + { + "epoch": 467.89473684210526, + "grad_norm": 1.1322977542877197, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 71120 + }, + { + "epoch": 467.9605263157895, + "grad_norm": 1.2082958221435547, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 71130 + }, + { + "epoch": 468.0263157894737, + "grad_norm": 0.967847466468811, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 71140 + }, + { + "epoch": 468.0921052631579, + "grad_norm": 1.2820326089859009, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 71150 + }, + { + "epoch": 468.1578947368421, + "grad_norm": 1.3719661235809326, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 71160 + }, + { + "epoch": 468.2236842105263, + "grad_norm": 1.6810905933380127, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 71170 + }, + { + "epoch": 468.2894736842105, + "grad_norm": 0.994966447353363, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 71180 + }, + { + "epoch": 468.35526315789474, + "grad_norm": 1.2583346366882324, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 71190 + }, + { + "epoch": 468.42105263157896, + "grad_norm": 1.1598786115646362, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 71200 + }, + { + "epoch": 468.4868421052632, + "grad_norm": 1.2424436807632446, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 71210 + }, + { + "epoch": 468.55263157894734, + "grad_norm": 1.1447334289550781, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 71220 + }, + { + "epoch": 468.61842105263156, + "grad_norm": 0.9471054077148438, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 71230 + }, + { + "epoch": 468.6842105263158, + "grad_norm": 1.2661805152893066, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 71240 + }, + { + "epoch": 468.75, + "grad_norm": 1.1477277278900146, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 71250 + }, + { + "epoch": 468.8157894736842, + "grad_norm": 1.0798248052597046, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 71260 + }, + { + "epoch": 468.88157894736844, + "grad_norm": 1.0489845275878906, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 71270 + }, + { + "epoch": 468.94736842105266, + "grad_norm": 1.2444311380386353, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 71280 + }, + { + "epoch": 469.0131578947368, + "grad_norm": 1.1508184671401978, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 71290 + }, + { + "epoch": 469.07894736842104, + "grad_norm": 1.0724323987960815, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 71300 + }, + { + "epoch": 469.14473684210526, + "grad_norm": 1.3677020072937012, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 71310 + }, + { + "epoch": 469.2105263157895, + "grad_norm": 0.8909698128700256, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 71320 + }, + { + "epoch": 469.2763157894737, + "grad_norm": 1.2063177824020386, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 71330 + }, + { + "epoch": 469.3421052631579, + "grad_norm": 1.4263769388198853, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 71340 + }, + { + "epoch": 469.4078947368421, + "grad_norm": 1.343396782875061, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 71350 + }, + { + "epoch": 469.4736842105263, + "grad_norm": 1.0640443563461304, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 71360 + }, + { + "epoch": 469.5394736842105, + "grad_norm": 1.131130576133728, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 71370 + }, + { + "epoch": 469.60526315789474, + "grad_norm": 0.8437464833259583, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 71380 + }, + { + "epoch": 469.67105263157896, + "grad_norm": 0.8952385187149048, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 71390 + }, + { + "epoch": 469.7368421052632, + "grad_norm": 0.8962030410766602, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 71400 + }, + { + "epoch": 469.80263157894734, + "grad_norm": 0.953825056552887, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 71410 + }, + { + "epoch": 469.86842105263156, + "grad_norm": 0.8790578246116638, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 71420 + }, + { + "epoch": 469.9342105263158, + "grad_norm": 1.3036227226257324, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 71430 + }, + { + "epoch": 470.0, + "grad_norm": 0.9618934392929077, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 71440 + }, + { + "epoch": 470.0657894736842, + "grad_norm": 0.908938467502594, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 71450 + }, + { + "epoch": 470.13157894736844, + "grad_norm": 1.156169056892395, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 71460 + }, + { + "epoch": 470.19736842105266, + "grad_norm": 0.8988484144210815, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 71470 + }, + { + "epoch": 470.2631578947368, + "grad_norm": 1.0380802154541016, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 71480 + }, + { + "epoch": 470.32894736842104, + "grad_norm": 1.1827173233032227, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 71490 + }, + { + "epoch": 470.39473684210526, + "grad_norm": 0.9451979994773865, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 71500 + }, + { + "epoch": 470.4605263157895, + "grad_norm": 0.8743581771850586, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 71510 + }, + { + "epoch": 470.5263157894737, + "grad_norm": 0.9873955249786377, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 71520 + }, + { + "epoch": 470.5921052631579, + "grad_norm": 0.9436299204826355, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 71530 + }, + { + "epoch": 470.6578947368421, + "grad_norm": 0.8567780256271362, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 71540 + }, + { + "epoch": 470.7236842105263, + "grad_norm": 1.0895605087280273, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 71550 + }, + { + "epoch": 470.7894736842105, + "grad_norm": 1.3269134759902954, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 71560 + }, + { + "epoch": 470.85526315789474, + "grad_norm": 1.2100285291671753, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 71570 + }, + { + "epoch": 470.92105263157896, + "grad_norm": 1.2074404954910278, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 71580 + }, + { + "epoch": 470.9868421052632, + "grad_norm": 1.3929208517074585, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 71590 + }, + { + "epoch": 471.05263157894734, + "grad_norm": 1.3554884195327759, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 71600 + }, + { + "epoch": 471.11842105263156, + "grad_norm": 1.3486567735671997, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 71610 + }, + { + "epoch": 471.1842105263158, + "grad_norm": 0.9685068726539612, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 71620 + }, + { + "epoch": 471.25, + "grad_norm": 1.542588233947754, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 71630 + }, + { + "epoch": 471.3157894736842, + "grad_norm": 1.5774450302124023, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 71640 + }, + { + "epoch": 471.38157894736844, + "grad_norm": 1.1362987756729126, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 71650 + }, + { + "epoch": 471.44736842105266, + "grad_norm": 1.0866764783859253, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 71660 + }, + { + "epoch": 471.5131578947368, + "grad_norm": 1.3600131273269653, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 71670 + }, + { + "epoch": 471.57894736842104, + "grad_norm": 1.3982903957366943, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 71680 + }, + { + "epoch": 471.64473684210526, + "grad_norm": 1.3128663301467896, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 71690 + }, + { + "epoch": 471.7105263157895, + "grad_norm": 1.2559845447540283, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 71700 + }, + { + "epoch": 471.7763157894737, + "grad_norm": 0.9143256545066833, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 71710 + }, + { + "epoch": 471.8421052631579, + "grad_norm": 1.2619221210479736, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 71720 + }, + { + "epoch": 471.9078947368421, + "grad_norm": 1.3303905725479126, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 71730 + }, + { + "epoch": 471.9736842105263, + "grad_norm": 1.1719235181808472, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 71740 + }, + { + "epoch": 472.0394736842105, + "grad_norm": 1.1172962188720703, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 71750 + }, + { + "epoch": 472.10526315789474, + "grad_norm": 1.4545469284057617, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 71760 + }, + { + "epoch": 472.17105263157896, + "grad_norm": 0.806731641292572, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 71770 + }, + { + "epoch": 472.2368421052632, + "grad_norm": 1.0789495706558228, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 71780 + }, + { + "epoch": 472.30263157894734, + "grad_norm": 1.059844732284546, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 71790 + }, + { + "epoch": 472.36842105263156, + "grad_norm": 1.3796194791793823, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 71800 + }, + { + "epoch": 472.4342105263158, + "grad_norm": 1.1321066617965698, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 71810 + }, + { + "epoch": 472.5, + "grad_norm": 0.8206098675727844, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 71820 + }, + { + "epoch": 472.5657894736842, + "grad_norm": 0.9593137502670288, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 71830 + }, + { + "epoch": 472.63157894736844, + "grad_norm": 1.0930147171020508, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 71840 + }, + { + "epoch": 472.69736842105266, + "grad_norm": 1.3240526914596558, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 71850 + }, + { + "epoch": 472.7631578947368, + "grad_norm": 1.3134987354278564, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 71860 + }, + { + "epoch": 472.82894736842104, + "grad_norm": 1.409283995628357, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 71870 + }, + { + "epoch": 472.89473684210526, + "grad_norm": 0.9180440306663513, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 71880 + }, + { + "epoch": 472.9605263157895, + "grad_norm": 0.8743194937705994, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 71890 + }, + { + "epoch": 473.0263157894737, + "grad_norm": 1.0829284191131592, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 71900 + }, + { + "epoch": 473.0921052631579, + "grad_norm": 1.0314230918884277, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 71910 + }, + { + "epoch": 473.1578947368421, + "grad_norm": 1.0930405855178833, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 71920 + }, + { + "epoch": 473.2236842105263, + "grad_norm": 1.387052059173584, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 71930 + }, + { + "epoch": 473.2894736842105, + "grad_norm": 1.1095280647277832, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 71940 + }, + { + "epoch": 473.35526315789474, + "grad_norm": 1.1823383569717407, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 71950 + }, + { + "epoch": 473.42105263157896, + "grad_norm": 1.2319684028625488, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 71960 + }, + { + "epoch": 473.4868421052632, + "grad_norm": 1.3296653032302856, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 71970 + }, + { + "epoch": 473.55263157894734, + "grad_norm": 1.1372100114822388, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 71980 + }, + { + "epoch": 473.61842105263156, + "grad_norm": 1.3765093088150024, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 71990 + }, + { + "epoch": 473.6842105263158, + "grad_norm": 1.0475393533706665, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 72000 + }, + { + "epoch": 473.75, + "grad_norm": 0.8706821203231812, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 72010 + }, + { + "epoch": 473.8157894736842, + "grad_norm": 1.1622743606567383, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 72020 + }, + { + "epoch": 473.88157894736844, + "grad_norm": 1.101678729057312, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 72030 + }, + { + "epoch": 473.94736842105266, + "grad_norm": 1.4966200590133667, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 72040 + }, + { + "epoch": 474.0131578947368, + "grad_norm": 0.8615991473197937, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 72050 + }, + { + "epoch": 474.07894736842104, + "grad_norm": 1.0953624248504639, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 72060 + }, + { + "epoch": 474.14473684210526, + "grad_norm": 1.2191064357757568, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 72070 + }, + { + "epoch": 474.2105263157895, + "grad_norm": 0.9888071417808533, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 72080 + }, + { + "epoch": 474.2763157894737, + "grad_norm": 1.138601541519165, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 72090 + }, + { + "epoch": 474.3421052631579, + "grad_norm": 0.9573279023170471, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 72100 + }, + { + "epoch": 474.4078947368421, + "grad_norm": 1.5391374826431274, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 72110 + }, + { + "epoch": 474.4736842105263, + "grad_norm": 1.4656072854995728, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 72120 + }, + { + "epoch": 474.5394736842105, + "grad_norm": 1.4683994054794312, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 72130 + }, + { + "epoch": 474.60526315789474, + "grad_norm": 1.0790272951126099, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 72140 + }, + { + "epoch": 474.67105263157896, + "grad_norm": 1.186042070388794, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 72150 + }, + { + "epoch": 474.7368421052632, + "grad_norm": 1.169363021850586, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 72160 + }, + { + "epoch": 474.80263157894734, + "grad_norm": 1.1990368366241455, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 72170 + }, + { + "epoch": 474.86842105263156, + "grad_norm": 1.200440764427185, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 72180 + }, + { + "epoch": 474.9342105263158, + "grad_norm": 1.1117382049560547, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 72190 + }, + { + "epoch": 475.0, + "grad_norm": 1.0739638805389404, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 72200 + }, + { + "epoch": 475.0657894736842, + "grad_norm": 1.1158939599990845, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 72210 + }, + { + "epoch": 475.13157894736844, + "grad_norm": 1.6269727945327759, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 72220 + }, + { + "epoch": 475.19736842105266, + "grad_norm": 1.75429368019104, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 72230 + }, + { + "epoch": 475.2631578947368, + "grad_norm": 1.5069760084152222, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 72240 + }, + { + "epoch": 475.32894736842104, + "grad_norm": 1.1883662939071655, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 72250 + }, + { + "epoch": 475.39473684210526, + "grad_norm": 1.3766677379608154, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 72260 + }, + { + "epoch": 475.4605263157895, + "grad_norm": 1.2644368410110474, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 72270 + }, + { + "epoch": 475.5263157894737, + "grad_norm": 1.295596718788147, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 72280 + }, + { + "epoch": 475.5921052631579, + "grad_norm": 1.2578319311141968, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 72290 + }, + { + "epoch": 475.6578947368421, + "grad_norm": 1.1036814451217651, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 72300 + }, + { + "epoch": 475.7236842105263, + "grad_norm": 1.1769661903381348, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 72310 + }, + { + "epoch": 475.7894736842105, + "grad_norm": 0.9555389285087585, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 72320 + }, + { + "epoch": 475.85526315789474, + "grad_norm": 1.3688865900039673, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 72330 + }, + { + "epoch": 475.92105263157896, + "grad_norm": 1.4835799932479858, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 72340 + }, + { + "epoch": 475.9868421052632, + "grad_norm": 1.1539369821548462, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 72350 + }, + { + "epoch": 476.05263157894734, + "grad_norm": 1.3297797441482544, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 72360 + }, + { + "epoch": 476.11842105263156, + "grad_norm": 1.5591670274734497, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 72370 + }, + { + "epoch": 476.1842105263158, + "grad_norm": 1.2825270891189575, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 72380 + }, + { + "epoch": 476.25, + "grad_norm": 0.7293868064880371, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 72390 + }, + { + "epoch": 476.3157894736842, + "grad_norm": 1.2907273769378662, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 72400 + }, + { + "epoch": 476.38157894736844, + "grad_norm": 0.7899346351623535, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 72410 + }, + { + "epoch": 476.44736842105266, + "grad_norm": 1.0589332580566406, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 72420 + }, + { + "epoch": 476.5131578947368, + "grad_norm": 1.1962878704071045, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 72430 + }, + { + "epoch": 476.57894736842104, + "grad_norm": 1.0695892572402954, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 72440 + }, + { + "epoch": 476.64473684210526, + "grad_norm": 0.8697053790092468, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 72450 + }, + { + "epoch": 476.7105263157895, + "grad_norm": 0.8389332890510559, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 72460 + }, + { + "epoch": 476.7763157894737, + "grad_norm": 1.0303555727005005, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 72470 + }, + { + "epoch": 476.8421052631579, + "grad_norm": 0.8726432919502258, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 72480 + }, + { + "epoch": 476.9078947368421, + "grad_norm": 1.1818912029266357, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 72490 + }, + { + "epoch": 476.9736842105263, + "grad_norm": 1.0324101448059082, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 72500 + }, + { + "epoch": 477.0394736842105, + "grad_norm": 1.1190623044967651, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 72510 + }, + { + "epoch": 477.10526315789474, + "grad_norm": 1.3751294612884521, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 72520 + }, + { + "epoch": 477.17105263157896, + "grad_norm": 1.0744208097457886, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 72530 + }, + { + "epoch": 477.2368421052632, + "grad_norm": 1.5082820653915405, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 72540 + }, + { + "epoch": 477.30263157894734, + "grad_norm": 0.6048269867897034, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 72550 + }, + { + "epoch": 477.36842105263156, + "grad_norm": 1.0494825839996338, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 72560 + }, + { + "epoch": 477.4342105263158, + "grad_norm": 1.2498975992202759, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 72570 + }, + { + "epoch": 477.5, + "grad_norm": 0.980216383934021, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 72580 + }, + { + "epoch": 477.5657894736842, + "grad_norm": 1.551719069480896, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 72590 + }, + { + "epoch": 477.63157894736844, + "grad_norm": 0.9513759016990662, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 72600 + }, + { + "epoch": 477.69736842105266, + "grad_norm": 1.2828733921051025, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 72610 + }, + { + "epoch": 477.7631578947368, + "grad_norm": 0.9803738594055176, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 72620 + }, + { + "epoch": 477.82894736842104, + "grad_norm": 1.0510716438293457, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 72630 + }, + { + "epoch": 477.89473684210526, + "grad_norm": 0.7186201214790344, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 72640 + }, + { + "epoch": 477.9605263157895, + "grad_norm": 1.2259303331375122, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 72650 + }, + { + "epoch": 478.0263157894737, + "grad_norm": 0.948920726776123, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 72660 + }, + { + "epoch": 478.0921052631579, + "grad_norm": 1.4578410387039185, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 72670 + }, + { + "epoch": 478.1578947368421, + "grad_norm": 1.2089077234268188, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 72680 + }, + { + "epoch": 478.2236842105263, + "grad_norm": 1.2857468128204346, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 72690 + }, + { + "epoch": 478.2894736842105, + "grad_norm": 0.8931973576545715, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 72700 + }, + { + "epoch": 478.35526315789474, + "grad_norm": 0.7080501317977905, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 72710 + }, + { + "epoch": 478.42105263157896, + "grad_norm": 1.3679475784301758, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 72720 + }, + { + "epoch": 478.4868421052632, + "grad_norm": 1.0242080688476562, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 72730 + }, + { + "epoch": 478.55263157894734, + "grad_norm": 1.1003470420837402, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 72740 + }, + { + "epoch": 478.61842105263156, + "grad_norm": 1.2705953121185303, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 72750 + }, + { + "epoch": 478.6842105263158, + "grad_norm": 0.9441986680030823, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 72760 + }, + { + "epoch": 478.75, + "grad_norm": 1.438714861869812, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 72770 + }, + { + "epoch": 478.8157894736842, + "grad_norm": 1.4461417198181152, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 72780 + }, + { + "epoch": 478.88157894736844, + "grad_norm": 1.4734560251235962, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 72790 + }, + { + "epoch": 478.94736842105266, + "grad_norm": 1.064142107963562, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 72800 + }, + { + "epoch": 479.0131578947368, + "grad_norm": 0.7491738796234131, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 72810 + }, + { + "epoch": 479.07894736842104, + "grad_norm": 1.0148205757141113, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 72820 + }, + { + "epoch": 479.14473684210526, + "grad_norm": 0.9256365299224854, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 72830 + }, + { + "epoch": 479.2105263157895, + "grad_norm": 1.0574623346328735, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 72840 + }, + { + "epoch": 479.2763157894737, + "grad_norm": 1.2569656372070312, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 72850 + }, + { + "epoch": 479.3421052631579, + "grad_norm": 0.737408459186554, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 72860 + }, + { + "epoch": 479.4078947368421, + "grad_norm": 1.0380948781967163, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 72870 + }, + { + "epoch": 479.4736842105263, + "grad_norm": 0.9933061599731445, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 72880 + }, + { + "epoch": 479.5394736842105, + "grad_norm": 1.3550677299499512, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 72890 + }, + { + "epoch": 479.60526315789474, + "grad_norm": 1.2398386001586914, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 72900 + }, + { + "epoch": 479.67105263157896, + "grad_norm": 1.213581919670105, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 72910 + }, + { + "epoch": 479.7368421052632, + "grad_norm": 1.4044569730758667, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 72920 + }, + { + "epoch": 479.80263157894734, + "grad_norm": 0.956494152545929, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 72930 + }, + { + "epoch": 479.86842105263156, + "grad_norm": 0.9288977384567261, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 72940 + }, + { + "epoch": 479.9342105263158, + "grad_norm": 1.0925542116165161, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 72950 + }, + { + "epoch": 480.0, + "grad_norm": 1.3831939697265625, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 72960 + }, + { + "epoch": 480.0657894736842, + "grad_norm": 1.177885890007019, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 72970 + }, + { + "epoch": 480.13157894736844, + "grad_norm": 1.154605507850647, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 72980 + }, + { + "epoch": 480.19736842105266, + "grad_norm": 1.4074510335922241, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 72990 + }, + { + "epoch": 480.2631578947368, + "grad_norm": 1.3587064743041992, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 73000 + }, + { + "epoch": 480.32894736842104, + "grad_norm": 1.0905214548110962, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 73010 + }, + { + "epoch": 480.39473684210526, + "grad_norm": 1.2721132040023804, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 73020 + }, + { + "epoch": 480.4605263157895, + "grad_norm": 1.2455694675445557, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 73030 + }, + { + "epoch": 480.5263157894737, + "grad_norm": 1.1374874114990234, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 73040 + }, + { + "epoch": 480.5921052631579, + "grad_norm": 0.9991102814674377, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 73050 + }, + { + "epoch": 480.6578947368421, + "grad_norm": 0.9660846590995789, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 73060 + }, + { + "epoch": 480.7236842105263, + "grad_norm": 1.3656002283096313, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 73070 + }, + { + "epoch": 480.7894736842105, + "grad_norm": 1.039703607559204, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 73080 + }, + { + "epoch": 480.85526315789474, + "grad_norm": 1.1544702053070068, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 73090 + }, + { + "epoch": 480.92105263157896, + "grad_norm": 1.4217747449874878, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 73100 + }, + { + "epoch": 480.9868421052632, + "grad_norm": 1.363659143447876, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 73110 + }, + { + "epoch": 481.05263157894734, + "grad_norm": 1.2060271501541138, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 73120 + }, + { + "epoch": 481.11842105263156, + "grad_norm": 0.9276877045631409, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 73130 + }, + { + "epoch": 481.1842105263158, + "grad_norm": 1.148481845855713, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 73140 + }, + { + "epoch": 481.25, + "grad_norm": 1.3816297054290771, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 73150 + }, + { + "epoch": 481.3157894736842, + "grad_norm": 1.1494829654693604, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 73160 + }, + { + "epoch": 481.38157894736844, + "grad_norm": 0.8974570035934448, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 73170 + }, + { + "epoch": 481.44736842105266, + "grad_norm": 1.2027701139450073, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 73180 + }, + { + "epoch": 481.5131578947368, + "grad_norm": 1.0080323219299316, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 73190 + }, + { + "epoch": 481.57894736842104, + "grad_norm": 1.2009938955307007, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 73200 + }, + { + "epoch": 481.64473684210526, + "grad_norm": 1.2606686353683472, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 73210 + }, + { + "epoch": 481.7105263157895, + "grad_norm": 0.9282734990119934, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 73220 + }, + { + "epoch": 481.7763157894737, + "grad_norm": 0.6285842657089233, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 73230 + }, + { + "epoch": 481.8421052631579, + "grad_norm": 0.895392894744873, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 73240 + }, + { + "epoch": 481.9078947368421, + "grad_norm": 1.0975970029830933, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 73250 + }, + { + "epoch": 481.9736842105263, + "grad_norm": 1.0048805475234985, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 73260 + }, + { + "epoch": 482.0394736842105, + "grad_norm": 1.4439254999160767, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 73270 + }, + { + "epoch": 482.10526315789474, + "grad_norm": 1.0936566591262817, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 73280 + }, + { + "epoch": 482.17105263157896, + "grad_norm": 1.1018260717391968, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 73290 + }, + { + "epoch": 482.2368421052632, + "grad_norm": 1.48947012424469, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 73300 + }, + { + "epoch": 482.30263157894734, + "grad_norm": 0.8502678871154785, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 73310 + }, + { + "epoch": 482.36842105263156, + "grad_norm": 1.0486079454421997, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 73320 + }, + { + "epoch": 482.4342105263158, + "grad_norm": 1.3065072298049927, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 73330 + }, + { + "epoch": 482.5, + "grad_norm": 1.218176007270813, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 73340 + }, + { + "epoch": 482.5657894736842, + "grad_norm": 1.2767457962036133, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 73350 + }, + { + "epoch": 482.63157894736844, + "grad_norm": 1.322757363319397, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 73360 + }, + { + "epoch": 482.69736842105266, + "grad_norm": 1.248364806175232, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 73370 + }, + { + "epoch": 482.7631578947368, + "grad_norm": 0.9732344746589661, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 73380 + }, + { + "epoch": 482.82894736842104, + "grad_norm": 1.0928685665130615, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 73390 + }, + { + "epoch": 482.89473684210526, + "grad_norm": 1.0037014484405518, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 73400 + }, + { + "epoch": 482.9605263157895, + "grad_norm": 1.0865426063537598, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 73410 + }, + { + "epoch": 483.0263157894737, + "grad_norm": 1.5085911750793457, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 73420 + }, + { + "epoch": 483.0921052631579, + "grad_norm": 1.8017462491989136, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 73430 + }, + { + "epoch": 483.1578947368421, + "grad_norm": 1.266747236251831, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 73440 + }, + { + "epoch": 483.2236842105263, + "grad_norm": 1.4259337186813354, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 73450 + }, + { + "epoch": 483.2894736842105, + "grad_norm": 1.2923461198806763, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 73460 + }, + { + "epoch": 483.35526315789474, + "grad_norm": 1.453090786933899, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 73470 + }, + { + "epoch": 483.42105263157896, + "grad_norm": 1.3885729312896729, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 73480 + }, + { + "epoch": 483.4868421052632, + "grad_norm": 0.8967188000679016, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 73490 + }, + { + "epoch": 483.55263157894734, + "grad_norm": 1.2121909856796265, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 73500 + }, + { + "epoch": 483.61842105263156, + "grad_norm": 1.01747465133667, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 73510 + }, + { + "epoch": 483.6842105263158, + "grad_norm": 1.1471728086471558, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 73520 + }, + { + "epoch": 483.75, + "grad_norm": 1.196329116821289, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 73530 + }, + { + "epoch": 483.8157894736842, + "grad_norm": 1.0889172554016113, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 73540 + }, + { + "epoch": 483.88157894736844, + "grad_norm": 1.1677290201187134, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 73550 + }, + { + "epoch": 483.94736842105266, + "grad_norm": 1.2402957677841187, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 73560 + }, + { + "epoch": 484.0131578947368, + "grad_norm": 1.241837739944458, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 73570 + }, + { + "epoch": 484.07894736842104, + "grad_norm": 1.1311156749725342, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 73580 + }, + { + "epoch": 484.14473684210526, + "grad_norm": 1.0363242626190186, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 73590 + }, + { + "epoch": 484.2105263157895, + "grad_norm": 1.2085379362106323, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 73600 + }, + { + "epoch": 484.2763157894737, + "grad_norm": 0.8469356298446655, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 73610 + }, + { + "epoch": 484.3421052631579, + "grad_norm": 1.0864289999008179, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 73620 + }, + { + "epoch": 484.4078947368421, + "grad_norm": 0.8979195952415466, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 73630 + }, + { + "epoch": 484.4736842105263, + "grad_norm": 1.0335921049118042, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 73640 + }, + { + "epoch": 484.5394736842105, + "grad_norm": 0.8344136476516724, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 73650 + }, + { + "epoch": 484.60526315789474, + "grad_norm": 1.1805672645568848, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 73660 + }, + { + "epoch": 484.67105263157896, + "grad_norm": 0.9891030192375183, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 73670 + }, + { + "epoch": 484.7368421052632, + "grad_norm": 1.2835693359375, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 73680 + }, + { + "epoch": 484.80263157894734, + "grad_norm": 1.1186614036560059, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 73690 + }, + { + "epoch": 484.86842105263156, + "grad_norm": 0.9351660013198853, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 73700 + }, + { + "epoch": 484.9342105263158, + "grad_norm": 1.1274996995925903, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 73710 + }, + { + "epoch": 485.0, + "grad_norm": 1.3561069965362549, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 73720 + }, + { + "epoch": 485.0657894736842, + "grad_norm": 1.0583314895629883, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 73730 + }, + { + "epoch": 485.13157894736844, + "grad_norm": 1.318225383758545, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 73740 + }, + { + "epoch": 485.19736842105266, + "grad_norm": 1.3357988595962524, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 73750 + }, + { + "epoch": 485.2631578947368, + "grad_norm": 0.7873854637145996, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 73760 + }, + { + "epoch": 485.32894736842104, + "grad_norm": 0.9330838322639465, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 73770 + }, + { + "epoch": 485.39473684210526, + "grad_norm": 0.956305205821991, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 73780 + }, + { + "epoch": 485.4605263157895, + "grad_norm": 1.252623200416565, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 73790 + }, + { + "epoch": 485.5263157894737, + "grad_norm": 1.2766034603118896, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 73800 + }, + { + "epoch": 485.5921052631579, + "grad_norm": 1.2142257690429688, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 73810 + }, + { + "epoch": 485.6578947368421, + "grad_norm": 0.972432017326355, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 73820 + }, + { + "epoch": 485.7236842105263, + "grad_norm": 1.1014715433120728, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 73830 + }, + { + "epoch": 485.7894736842105, + "grad_norm": 1.1053544282913208, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 73840 + }, + { + "epoch": 485.85526315789474, + "grad_norm": 1.4604548215866089, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 73850 + }, + { + "epoch": 485.92105263157896, + "grad_norm": 0.9957262277603149, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 73860 + }, + { + "epoch": 485.9868421052632, + "grad_norm": 0.8840813636779785, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 73870 + }, + { + "epoch": 486.05263157894734, + "grad_norm": 1.2641267776489258, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 73880 + }, + { + "epoch": 486.11842105263156, + "grad_norm": 0.9049034714698792, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 73890 + }, + { + "epoch": 486.1842105263158, + "grad_norm": 0.798561155796051, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 73900 + }, + { + "epoch": 486.25, + "grad_norm": 1.3658287525177002, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 73910 + }, + { + "epoch": 486.3157894736842, + "grad_norm": 1.3198788166046143, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 73920 + }, + { + "epoch": 486.38157894736844, + "grad_norm": 1.0078285932540894, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 73930 + }, + { + "epoch": 486.44736842105266, + "grad_norm": 1.053542971611023, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 73940 + }, + { + "epoch": 486.5131578947368, + "grad_norm": 0.8640962243080139, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 73950 + }, + { + "epoch": 486.57894736842104, + "grad_norm": 1.2860034704208374, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 73960 + }, + { + "epoch": 486.64473684210526, + "grad_norm": 1.0493406057357788, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 73970 + }, + { + "epoch": 486.7105263157895, + "grad_norm": 0.9338852763175964, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 73980 + }, + { + "epoch": 486.7763157894737, + "grad_norm": 1.1906099319458008, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 73990 + }, + { + "epoch": 486.8421052631579, + "grad_norm": 1.1527206897735596, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 74000 + }, + { + "epoch": 486.9078947368421, + "grad_norm": 1.1856426000595093, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 74010 + }, + { + "epoch": 486.9736842105263, + "grad_norm": 1.0806999206542969, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 74020 + }, + { + "epoch": 487.0394736842105, + "grad_norm": 0.960417628288269, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 74030 + }, + { + "epoch": 487.10526315789474, + "grad_norm": 0.9834040403366089, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 74040 + }, + { + "epoch": 487.17105263157896, + "grad_norm": 0.9878292679786682, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 74050 + }, + { + "epoch": 487.2368421052632, + "grad_norm": 1.11509370803833, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 74060 + }, + { + "epoch": 487.30263157894734, + "grad_norm": 1.1951452493667603, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 74070 + }, + { + "epoch": 487.36842105263156, + "grad_norm": 0.9179181456565857, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 74080 + }, + { + "epoch": 487.4342105263158, + "grad_norm": 1.1499123573303223, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 74090 + }, + { + "epoch": 487.5, + "grad_norm": 1.035010576248169, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 74100 + }, + { + "epoch": 487.5657894736842, + "grad_norm": 1.0398813486099243, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 74110 + }, + { + "epoch": 487.63157894736844, + "grad_norm": 1.191170573234558, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 74120 + }, + { + "epoch": 487.69736842105266, + "grad_norm": 1.0563814640045166, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 74130 + }, + { + "epoch": 487.7631578947368, + "grad_norm": 1.0885233879089355, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 74140 + }, + { + "epoch": 487.82894736842104, + "grad_norm": 1.0386284589767456, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 74150 + }, + { + "epoch": 487.89473684210526, + "grad_norm": 1.3478586673736572, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 74160 + }, + { + "epoch": 487.9605263157895, + "grad_norm": 0.8383530378341675, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 74170 + }, + { + "epoch": 488.0263157894737, + "grad_norm": 1.2732715606689453, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 74180 + }, + { + "epoch": 488.0921052631579, + "grad_norm": 1.2103279829025269, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 74190 + }, + { + "epoch": 488.1578947368421, + "grad_norm": 1.6459661722183228, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 74200 + }, + { + "epoch": 488.2236842105263, + "grad_norm": 1.0316122770309448, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 74210 + }, + { + "epoch": 488.2894736842105, + "grad_norm": 1.0485596656799316, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 74220 + }, + { + "epoch": 488.35526315789474, + "grad_norm": 1.27117121219635, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 74230 + }, + { + "epoch": 488.42105263157896, + "grad_norm": 1.240209698677063, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 74240 + }, + { + "epoch": 488.4868421052632, + "grad_norm": 0.7314859628677368, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 74250 + }, + { + "epoch": 488.55263157894734, + "grad_norm": 1.4494084119796753, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 74260 + }, + { + "epoch": 488.61842105263156, + "grad_norm": 1.7090212106704712, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 74270 + }, + { + "epoch": 488.6842105263158, + "grad_norm": 1.248016119003296, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 74280 + }, + { + "epoch": 488.75, + "grad_norm": 1.0164270401000977, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 74290 + }, + { + "epoch": 488.8157894736842, + "grad_norm": 0.8657320141792297, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 74300 + }, + { + "epoch": 488.88157894736844, + "grad_norm": 1.136674165725708, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 74310 + }, + { + "epoch": 488.94736842105266, + "grad_norm": 1.108292579650879, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 74320 + }, + { + "epoch": 489.0131578947368, + "grad_norm": 0.756219744682312, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 74330 + }, + { + "epoch": 489.07894736842104, + "grad_norm": 1.0228742361068726, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 74340 + }, + { + "epoch": 489.14473684210526, + "grad_norm": 0.9281026721000671, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 74350 + }, + { + "epoch": 489.2105263157895, + "grad_norm": 0.9329313039779663, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 74360 + }, + { + "epoch": 489.2763157894737, + "grad_norm": 0.8658884167671204, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 74370 + }, + { + "epoch": 489.3421052631579, + "grad_norm": 0.9127900004386902, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 74380 + }, + { + "epoch": 489.4078947368421, + "grad_norm": 1.145900011062622, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 74390 + }, + { + "epoch": 489.4736842105263, + "grad_norm": 1.131519079208374, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 74400 + }, + { + "epoch": 489.5394736842105, + "grad_norm": 1.071823000907898, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 74410 + }, + { + "epoch": 489.60526315789474, + "grad_norm": 0.8739954233169556, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 74420 + }, + { + "epoch": 489.67105263157896, + "grad_norm": 1.0112184286117554, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 74430 + }, + { + "epoch": 489.7368421052632, + "grad_norm": 0.9393345713615417, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 74440 + }, + { + "epoch": 489.80263157894734, + "grad_norm": 1.0487806797027588, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 74450 + }, + { + "epoch": 489.86842105263156, + "grad_norm": 1.1946709156036377, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 74460 + }, + { + "epoch": 489.9342105263158, + "grad_norm": 1.154524803161621, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 74470 + }, + { + "epoch": 490.0, + "grad_norm": 1.5723423957824707, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 74480 + }, + { + "epoch": 490.0657894736842, + "grad_norm": 1.4050586223602295, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 74490 + }, + { + "epoch": 490.13157894736844, + "grad_norm": 1.064405918121338, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 74500 + }, + { + "epoch": 490.19736842105266, + "grad_norm": 1.1484532356262207, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 74510 + }, + { + "epoch": 490.2631578947368, + "grad_norm": 0.8688686490058899, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 74520 + }, + { + "epoch": 490.32894736842104, + "grad_norm": 1.151997447013855, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 74530 + }, + { + "epoch": 490.39473684210526, + "grad_norm": 1.0896145105361938, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 74540 + }, + { + "epoch": 490.4605263157895, + "grad_norm": 1.170395016670227, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 74550 + }, + { + "epoch": 490.5263157894737, + "grad_norm": 1.2518529891967773, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 74560 + }, + { + "epoch": 490.5921052631579, + "grad_norm": 1.385285496711731, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 74570 + }, + { + "epoch": 490.6578947368421, + "grad_norm": 1.2214215993881226, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 74580 + }, + { + "epoch": 490.7236842105263, + "grad_norm": 1.1095465421676636, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 74590 + }, + { + "epoch": 490.7894736842105, + "grad_norm": 1.1326744556427002, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 74600 + }, + { + "epoch": 490.85526315789474, + "grad_norm": 1.0107364654541016, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 74610 + }, + { + "epoch": 490.92105263157896, + "grad_norm": 1.571817398071289, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 74620 + }, + { + "epoch": 490.9868421052632, + "grad_norm": 1.312083125114441, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 74630 + }, + { + "epoch": 491.05263157894734, + "grad_norm": 1.4065839052200317, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 74640 + }, + { + "epoch": 491.11842105263156, + "grad_norm": 1.1010332107543945, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 74650 + }, + { + "epoch": 491.1842105263158, + "grad_norm": 1.1380709409713745, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 74660 + }, + { + "epoch": 491.25, + "grad_norm": 1.1120402812957764, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 74670 + }, + { + "epoch": 491.3157894736842, + "grad_norm": 1.5647122859954834, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 74680 + }, + { + "epoch": 491.38157894736844, + "grad_norm": 1.1048648357391357, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 74690 + }, + { + "epoch": 491.44736842105266, + "grad_norm": 1.0886578559875488, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 74700 + }, + { + "epoch": 491.5131578947368, + "grad_norm": 1.111810564994812, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 74710 + }, + { + "epoch": 491.57894736842104, + "grad_norm": 1.0857470035552979, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 74720 + }, + { + "epoch": 491.64473684210526, + "grad_norm": 1.4645519256591797, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 74730 + }, + { + "epoch": 491.7105263157895, + "grad_norm": 1.5546987056732178, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 74740 + }, + { + "epoch": 491.7763157894737, + "grad_norm": 1.0544028282165527, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 74750 + }, + { + "epoch": 491.8421052631579, + "grad_norm": 1.1025646924972534, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 74760 + }, + { + "epoch": 491.9078947368421, + "grad_norm": 1.238974928855896, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 74770 + }, + { + "epoch": 491.9736842105263, + "grad_norm": 1.3790466785430908, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 74780 + }, + { + "epoch": 492.0394736842105, + "grad_norm": 1.2064578533172607, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 74790 + }, + { + "epoch": 492.10526315789474, + "grad_norm": 0.9122753143310547, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 74800 + }, + { + "epoch": 492.17105263157896, + "grad_norm": 1.4887069463729858, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 74810 + }, + { + "epoch": 492.2368421052632, + "grad_norm": 1.2702773809432983, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 74820 + }, + { + "epoch": 492.30263157894734, + "grad_norm": 1.2110621929168701, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 74830 + }, + { + "epoch": 492.36842105263156, + "grad_norm": 1.53854238986969, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 74840 + }, + { + "epoch": 492.4342105263158, + "grad_norm": 1.8626569509506226, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 74850 + }, + { + "epoch": 492.5, + "grad_norm": 1.666455864906311, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 74860 + }, + { + "epoch": 492.5657894736842, + "grad_norm": 1.684195876121521, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 74870 + }, + { + "epoch": 492.63157894736844, + "grad_norm": 1.1221078634262085, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 74880 + }, + { + "epoch": 492.69736842105266, + "grad_norm": 1.2585291862487793, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 74890 + }, + { + "epoch": 492.7631578947368, + "grad_norm": 1.376713514328003, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 74900 + }, + { + "epoch": 492.82894736842104, + "grad_norm": 0.9732930064201355, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 74910 + }, + { + "epoch": 492.89473684210526, + "grad_norm": 1.3538066148757935, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 74920 + }, + { + "epoch": 492.9605263157895, + "grad_norm": 1.5460445880889893, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 74930 + }, + { + "epoch": 493.0263157894737, + "grad_norm": 1.332470417022705, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 74940 + }, + { + "epoch": 493.0921052631579, + "grad_norm": 1.0886743068695068, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 74950 + }, + { + "epoch": 493.1578947368421, + "grad_norm": 0.8579538464546204, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 74960 + }, + { + "epoch": 493.2236842105263, + "grad_norm": 1.033247470855713, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 74970 + }, + { + "epoch": 493.2894736842105, + "grad_norm": 1.0347689390182495, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 74980 + }, + { + "epoch": 493.35526315789474, + "grad_norm": 1.149769902229309, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 74990 + }, + { + "epoch": 493.42105263157896, + "grad_norm": 0.8017956614494324, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 75000 + }, + { + "epoch": 493.4868421052632, + "grad_norm": 0.933280348777771, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 75010 + }, + { + "epoch": 493.55263157894734, + "grad_norm": 1.4169752597808838, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 75020 + }, + { + "epoch": 493.61842105263156, + "grad_norm": 1.0053260326385498, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 75030 + }, + { + "epoch": 493.6842105263158, + "grad_norm": 1.0985119342803955, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 75040 + }, + { + "epoch": 493.75, + "grad_norm": 0.89968341588974, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 75050 + }, + { + "epoch": 493.8157894736842, + "grad_norm": 1.0092484951019287, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 75060 + }, + { + "epoch": 493.88157894736844, + "grad_norm": 0.8683974742889404, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 75070 + }, + { + "epoch": 493.94736842105266, + "grad_norm": 1.1474254131317139, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 75080 + }, + { + "epoch": 494.0131578947368, + "grad_norm": 0.9479317665100098, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 75090 + }, + { + "epoch": 494.07894736842104, + "grad_norm": 1.194884181022644, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 75100 + }, + { + "epoch": 494.14473684210526, + "grad_norm": 1.3148432970046997, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 75110 + }, + { + "epoch": 494.2105263157895, + "grad_norm": 1.1125714778900146, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 75120 + }, + { + "epoch": 494.2763157894737, + "grad_norm": 1.0506175756454468, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 75130 + }, + { + "epoch": 494.3421052631579, + "grad_norm": 1.3244073390960693, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 75140 + }, + { + "epoch": 494.4078947368421, + "grad_norm": 1.082236647605896, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 75150 + }, + { + "epoch": 494.4736842105263, + "grad_norm": 1.702755093574524, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 75160 + }, + { + "epoch": 494.5394736842105, + "grad_norm": 1.0720016956329346, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 75170 + }, + { + "epoch": 494.60526315789474, + "grad_norm": 0.8905903100967407, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 75180 + }, + { + "epoch": 494.67105263157896, + "grad_norm": 0.9259252548217773, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 75190 + }, + { + "epoch": 494.7368421052632, + "grad_norm": 1.1381369829177856, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 75200 + }, + { + "epoch": 494.80263157894734, + "grad_norm": 1.1923221349716187, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 75210 + }, + { + "epoch": 494.86842105263156, + "grad_norm": 1.3231667280197144, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 75220 + }, + { + "epoch": 494.9342105263158, + "grad_norm": 1.1546229124069214, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 75230 + }, + { + "epoch": 495.0, + "grad_norm": 0.942509114742279, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 75240 + }, + { + "epoch": 495.0657894736842, + "grad_norm": 1.1736472845077515, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 75250 + }, + { + "epoch": 495.13157894736844, + "grad_norm": 1.43409264087677, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 75260 + }, + { + "epoch": 495.19736842105266, + "grad_norm": 1.1117990016937256, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 75270 + }, + { + "epoch": 495.2631578947368, + "grad_norm": 1.196996808052063, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 75280 + }, + { + "epoch": 495.32894736842104, + "grad_norm": 1.3209561109542847, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 75290 + }, + { + "epoch": 495.39473684210526, + "grad_norm": 1.633660078048706, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 75300 + }, + { + "epoch": 495.4605263157895, + "grad_norm": 1.3215038776397705, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 75310 + }, + { + "epoch": 495.5263157894737, + "grad_norm": 1.1978789567947388, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 75320 + }, + { + "epoch": 495.5921052631579, + "grad_norm": 1.508163332939148, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 75330 + }, + { + "epoch": 495.6578947368421, + "grad_norm": 1.5541365146636963, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 75340 + }, + { + "epoch": 495.7236842105263, + "grad_norm": 1.6904091835021973, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 75350 + }, + { + "epoch": 495.7894736842105, + "grad_norm": 1.0997204780578613, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 75360 + }, + { + "epoch": 495.85526315789474, + "grad_norm": 1.1885972023010254, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 75370 + }, + { + "epoch": 495.92105263157896, + "grad_norm": 1.2370365858078003, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 75380 + }, + { + "epoch": 495.9868421052632, + "grad_norm": 1.3986064195632935, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 75390 + }, + { + "epoch": 496.05263157894734, + "grad_norm": 1.3072096109390259, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 75400 + }, + { + "epoch": 496.11842105263156, + "grad_norm": 1.2789565324783325, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 75410 + }, + { + "epoch": 496.1842105263158, + "grad_norm": 1.0061566829681396, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 75420 + }, + { + "epoch": 496.25, + "grad_norm": 0.9549350738525391, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 75430 + }, + { + "epoch": 496.3157894736842, + "grad_norm": 0.9655198454856873, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 75440 + }, + { + "epoch": 496.38157894736844, + "grad_norm": 0.9944902062416077, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 75450 + }, + { + "epoch": 496.44736842105266, + "grad_norm": 1.0632195472717285, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 75460 + }, + { + "epoch": 496.5131578947368, + "grad_norm": 1.0134518146514893, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 75470 + }, + { + "epoch": 496.57894736842104, + "grad_norm": 1.3689377307891846, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 75480 + }, + { + "epoch": 496.64473684210526, + "grad_norm": 1.2085026502609253, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 75490 + }, + { + "epoch": 496.7105263157895, + "grad_norm": 0.7359601259231567, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 75500 + }, + { + "epoch": 496.7763157894737, + "grad_norm": 1.2168084383010864, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 75510 + }, + { + "epoch": 496.8421052631579, + "grad_norm": 0.99058598279953, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 75520 + }, + { + "epoch": 496.9078947368421, + "grad_norm": 0.9866731762886047, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 75530 + }, + { + "epoch": 496.9736842105263, + "grad_norm": 0.7462350726127625, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 75540 + }, + { + "epoch": 497.0394736842105, + "grad_norm": 0.8804934620857239, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 75550 + }, + { + "epoch": 497.10526315789474, + "grad_norm": 0.9820599555969238, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 75560 + }, + { + "epoch": 497.17105263157896, + "grad_norm": 0.8656883239746094, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 75570 + }, + { + "epoch": 497.2368421052632, + "grad_norm": 0.9848073124885559, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 75580 + }, + { + "epoch": 497.30263157894734, + "grad_norm": 0.7762583494186401, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 75590 + }, + { + "epoch": 497.36842105263156, + "grad_norm": 0.9925325512886047, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 75600 + }, + { + "epoch": 497.4342105263158, + "grad_norm": 0.905873715877533, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 75610 + }, + { + "epoch": 497.5, + "grad_norm": 0.8699862957000732, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 75620 + }, + { + "epoch": 497.5657894736842, + "grad_norm": 1.1519784927368164, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 75630 + }, + { + "epoch": 497.63157894736844, + "grad_norm": 1.285287857055664, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 75640 + }, + { + "epoch": 497.69736842105266, + "grad_norm": 0.9361952543258667, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 75650 + }, + { + "epoch": 497.7631578947368, + "grad_norm": 1.113816499710083, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 75660 + }, + { + "epoch": 497.82894736842104, + "grad_norm": 1.1863288879394531, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 75670 + }, + { + "epoch": 497.89473684210526, + "grad_norm": 0.6203667521476746, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 75680 + }, + { + "epoch": 497.9605263157895, + "grad_norm": 1.0878044366836548, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 75690 + }, + { + "epoch": 498.0263157894737, + "grad_norm": 0.5921301245689392, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 75700 + }, + { + "epoch": 498.0921052631579, + "grad_norm": 0.9742683172225952, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 75710 + }, + { + "epoch": 498.1578947368421, + "grad_norm": 1.1154931783676147, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 75720 + }, + { + "epoch": 498.2236842105263, + "grad_norm": 1.0638843774795532, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 75730 + }, + { + "epoch": 498.2894736842105, + "grad_norm": 1.2512115240097046, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 75740 + }, + { + "epoch": 498.35526315789474, + "grad_norm": 1.373955488204956, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 75750 + }, + { + "epoch": 498.42105263157896, + "grad_norm": 1.1924251317977905, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 75760 + }, + { + "epoch": 498.4868421052632, + "grad_norm": 1.0624274015426636, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 75770 + }, + { + "epoch": 498.55263157894734, + "grad_norm": 0.7194404602050781, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 75780 + }, + { + "epoch": 498.61842105263156, + "grad_norm": 1.0660557746887207, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 75790 + }, + { + "epoch": 498.6842105263158, + "grad_norm": 1.237527847290039, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 75800 + }, + { + "epoch": 498.75, + "grad_norm": 1.1295133829116821, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 75810 + }, + { + "epoch": 498.8157894736842, + "grad_norm": 0.8984070420265198, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 75820 + }, + { + "epoch": 498.88157894736844, + "grad_norm": 1.035714864730835, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 75830 + }, + { + "epoch": 498.94736842105266, + "grad_norm": 1.1130855083465576, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 75840 + }, + { + "epoch": 499.0131578947368, + "grad_norm": 1.1051881313323975, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 75850 + }, + { + "epoch": 499.07894736842104, + "grad_norm": 0.8507428765296936, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 75860 + }, + { + "epoch": 499.14473684210526, + "grad_norm": 0.9365002512931824, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 75870 + }, + { + "epoch": 499.2105263157895, + "grad_norm": 1.0836772918701172, + "learning_rate": 0.0001, + "loss": 0.0155, + "step": 75880 + }, + { + "epoch": 499.2763157894737, + "grad_norm": 1.0799518823623657, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 75890 + }, + { + "epoch": 499.3421052631579, + "grad_norm": 1.1762809753417969, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 75900 + }, + { + "epoch": 499.4078947368421, + "grad_norm": 0.9116426110267639, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 75910 + }, + { + "epoch": 499.4736842105263, + "grad_norm": 1.1221206188201904, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 75920 + }, + { + "epoch": 499.5394736842105, + "grad_norm": 0.9344303011894226, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 75930 + }, + { + "epoch": 499.60526315789474, + "grad_norm": 1.2901678085327148, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 75940 + }, + { + "epoch": 499.67105263157896, + "grad_norm": 1.0376948118209839, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 75950 + }, + { + "epoch": 499.7368421052632, + "grad_norm": 0.9793575406074524, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 75960 + }, + { + "epoch": 499.80263157894734, + "grad_norm": 1.1645046472549438, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 75970 + }, + { + "epoch": 499.86842105263156, + "grad_norm": 0.8754516243934631, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 75980 + }, + { + "epoch": 499.9342105263158, + "grad_norm": 1.0461459159851074, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 75990 + }, + { + "epoch": 500.0, + "grad_norm": 0.9663628339767456, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 76000 + }, + { + "epoch": 500.0657894736842, + "grad_norm": 1.32105553150177, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 76010 + }, + { + "epoch": 500.13157894736844, + "grad_norm": 1.3787779808044434, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 76020 + }, + { + "epoch": 500.19736842105266, + "grad_norm": 1.2713334560394287, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 76030 + }, + { + "epoch": 500.2631578947368, + "grad_norm": 1.2451242208480835, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 76040 + }, + { + "epoch": 500.32894736842104, + "grad_norm": 1.1290003061294556, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 76050 + }, + { + "epoch": 500.39473684210526, + "grad_norm": 1.2039153575897217, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 76060 + }, + { + "epoch": 500.4605263157895, + "grad_norm": 1.2840030193328857, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 76070 + }, + { + "epoch": 500.5263157894737, + "grad_norm": 1.0132211446762085, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 76080 + }, + { + "epoch": 500.5921052631579, + "grad_norm": 0.8630693554878235, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 76090 + }, + { + "epoch": 500.6578947368421, + "grad_norm": 1.200491189956665, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 76100 + }, + { + "epoch": 500.7236842105263, + "grad_norm": 1.0840117931365967, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 76110 + }, + { + "epoch": 500.7894736842105, + "grad_norm": 1.058173418045044, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 76120 + }, + { + "epoch": 500.85526315789474, + "grad_norm": 0.7691683769226074, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 76130 + }, + { + "epoch": 500.92105263157896, + "grad_norm": 1.3571420907974243, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 76140 + }, + { + "epoch": 500.9868421052632, + "grad_norm": 1.397667646408081, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 76150 + }, + { + "epoch": 501.05263157894734, + "grad_norm": 1.2376160621643066, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 76160 + }, + { + "epoch": 501.11842105263156, + "grad_norm": 1.193163514137268, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 76170 + }, + { + "epoch": 501.1842105263158, + "grad_norm": 1.4034628868103027, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 76180 + }, + { + "epoch": 501.25, + "grad_norm": 1.207105278968811, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 76190 + }, + { + "epoch": 501.3157894736842, + "grad_norm": 0.7556562423706055, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 76200 + }, + { + "epoch": 501.38157894736844, + "grad_norm": 1.2998090982437134, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 76210 + }, + { + "epoch": 501.44736842105266, + "grad_norm": 0.9922952055931091, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 76220 + }, + { + "epoch": 501.5131578947368, + "grad_norm": 1.225348949432373, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 76230 + }, + { + "epoch": 501.57894736842104, + "grad_norm": 1.3138529062271118, + "learning_rate": 0.0001, + "loss": 0.0154, + "step": 76240 + }, + { + "epoch": 501.64473684210526, + "grad_norm": 0.9411857724189758, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 76250 + }, + { + "epoch": 501.7105263157895, + "grad_norm": 1.271523356437683, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 76260 + }, + { + "epoch": 501.7763157894737, + "grad_norm": 1.2197598218917847, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 76270 + }, + { + "epoch": 501.8421052631579, + "grad_norm": 1.3674578666687012, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 76280 + }, + { + "epoch": 501.9078947368421, + "grad_norm": 1.3369501829147339, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 76290 + }, + { + "epoch": 501.9736842105263, + "grad_norm": 1.6227846145629883, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 76300 + }, + { + "epoch": 502.0394736842105, + "grad_norm": 1.3651342391967773, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 76310 + }, + { + "epoch": 502.10526315789474, + "grad_norm": 1.1167755126953125, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 76320 + }, + { + "epoch": 502.17105263157896, + "grad_norm": 1.0122556686401367, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 76330 + }, + { + "epoch": 502.2368421052632, + "grad_norm": 1.339971899986267, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 76340 + }, + { + "epoch": 502.30263157894734, + "grad_norm": 1.1776334047317505, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 76350 + }, + { + "epoch": 502.36842105263156, + "grad_norm": 1.1204994916915894, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 76360 + }, + { + "epoch": 502.4342105263158, + "grad_norm": 1.1037061214447021, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 76370 + }, + { + "epoch": 502.5, + "grad_norm": 1.4613289833068848, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 76380 + }, + { + "epoch": 502.5657894736842, + "grad_norm": 1.3934388160705566, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 76390 + }, + { + "epoch": 502.63157894736844, + "grad_norm": 1.470294713973999, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 76400 + }, + { + "epoch": 502.69736842105266, + "grad_norm": 1.2108807563781738, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 76410 + }, + { + "epoch": 502.7631578947368, + "grad_norm": 1.1446878910064697, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 76420 + }, + { + "epoch": 502.82894736842104, + "grad_norm": 1.1863682270050049, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 76430 + }, + { + "epoch": 502.89473684210526, + "grad_norm": 1.2745025157928467, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 76440 + }, + { + "epoch": 502.9605263157895, + "grad_norm": 1.2708007097244263, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 76450 + }, + { + "epoch": 503.0263157894737, + "grad_norm": 1.109881043434143, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 76460 + }, + { + "epoch": 503.0921052631579, + "grad_norm": 0.837130606174469, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 76470 + }, + { + "epoch": 503.1578947368421, + "grad_norm": 1.1610591411590576, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 76480 + }, + { + "epoch": 503.2236842105263, + "grad_norm": 0.9062598943710327, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 76490 + }, + { + "epoch": 503.2894736842105, + "grad_norm": 0.880432665348053, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 76500 + }, + { + "epoch": 503.35526315789474, + "grad_norm": 1.1003761291503906, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 76510 + }, + { + "epoch": 503.42105263157896, + "grad_norm": 1.0578047037124634, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 76520 + }, + { + "epoch": 503.4868421052632, + "grad_norm": 1.265828251838684, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 76530 + }, + { + "epoch": 503.55263157894734, + "grad_norm": 0.7461774945259094, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 76540 + }, + { + "epoch": 503.61842105263156, + "grad_norm": 1.1446738243103027, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 76550 + }, + { + "epoch": 503.6842105263158, + "grad_norm": 1.0880557298660278, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 76560 + }, + { + "epoch": 503.75, + "grad_norm": 1.399858832359314, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 76570 + }, + { + "epoch": 503.8157894736842, + "grad_norm": 1.1367653608322144, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 76580 + }, + { + "epoch": 503.88157894736844, + "grad_norm": 1.018934965133667, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 76590 + }, + { + "epoch": 503.94736842105266, + "grad_norm": 1.066989779472351, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 76600 + }, + { + "epoch": 504.0131578947368, + "grad_norm": 0.9867700338363647, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 76610 + }, + { + "epoch": 504.07894736842104, + "grad_norm": 1.0449360609054565, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 76620 + }, + { + "epoch": 504.14473684210526, + "grad_norm": 0.7353350520133972, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 76630 + }, + { + "epoch": 504.2105263157895, + "grad_norm": 0.8877101540565491, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 76640 + }, + { + "epoch": 504.2763157894737, + "grad_norm": 1.084471583366394, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 76650 + }, + { + "epoch": 504.3421052631579, + "grad_norm": 1.1777323484420776, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 76660 + }, + { + "epoch": 504.4078947368421, + "grad_norm": 1.193827748298645, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 76670 + }, + { + "epoch": 504.4736842105263, + "grad_norm": 1.2224369049072266, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 76680 + }, + { + "epoch": 504.5394736842105, + "grad_norm": 1.1256766319274902, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 76690 + }, + { + "epoch": 504.60526315789474, + "grad_norm": 1.0881433486938477, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 76700 + }, + { + "epoch": 504.67105263157896, + "grad_norm": 0.9593660831451416, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 76710 + }, + { + "epoch": 504.7368421052632, + "grad_norm": 1.1365207433700562, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 76720 + }, + { + "epoch": 504.80263157894734, + "grad_norm": 1.1816807985305786, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 76730 + }, + { + "epoch": 504.86842105263156, + "grad_norm": 1.0027751922607422, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 76740 + }, + { + "epoch": 504.9342105263158, + "grad_norm": 0.7428399920463562, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 76750 + }, + { + "epoch": 505.0, + "grad_norm": 1.1550109386444092, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 76760 + }, + { + "epoch": 505.0657894736842, + "grad_norm": 0.9820820093154907, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 76770 + }, + { + "epoch": 505.13157894736844, + "grad_norm": 1.3091676235198975, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 76780 + }, + { + "epoch": 505.19736842105266, + "grad_norm": 1.0327069759368896, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 76790 + }, + { + "epoch": 505.2631578947368, + "grad_norm": 0.9336044788360596, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 76800 + }, + { + "epoch": 505.32894736842104, + "grad_norm": 1.4007383584976196, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 76810 + }, + { + "epoch": 505.39473684210526, + "grad_norm": 1.2266377210617065, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 76820 + }, + { + "epoch": 505.4605263157895, + "grad_norm": 1.1764065027236938, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 76830 + }, + { + "epoch": 505.5263157894737, + "grad_norm": 1.091961145401001, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 76840 + }, + { + "epoch": 505.5921052631579, + "grad_norm": 0.9492750763893127, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 76850 + }, + { + "epoch": 505.6578947368421, + "grad_norm": 1.088575839996338, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 76860 + }, + { + "epoch": 505.7236842105263, + "grad_norm": 0.8977634906768799, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 76870 + }, + { + "epoch": 505.7894736842105, + "grad_norm": 0.9890562891960144, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 76880 + }, + { + "epoch": 505.85526315789474, + "grad_norm": 0.8245396018028259, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 76890 + }, + { + "epoch": 505.92105263157896, + "grad_norm": 0.810986340045929, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 76900 + }, + { + "epoch": 505.9868421052632, + "grad_norm": 1.3287197351455688, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 76910 + }, + { + "epoch": 506.05263157894734, + "grad_norm": 0.7860926985740662, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 76920 + }, + { + "epoch": 506.11842105263156, + "grad_norm": 1.179496169090271, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 76930 + }, + { + "epoch": 506.1842105263158, + "grad_norm": 1.606441617012024, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 76940 + }, + { + "epoch": 506.25, + "grad_norm": 1.499569058418274, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 76950 + }, + { + "epoch": 506.3157894736842, + "grad_norm": 1.24739670753479, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 76960 + }, + { + "epoch": 506.38157894736844, + "grad_norm": 1.2441381216049194, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 76970 + }, + { + "epoch": 506.44736842105266, + "grad_norm": 0.8672195672988892, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 76980 + }, + { + "epoch": 506.5131578947368, + "grad_norm": 0.9006538391113281, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 76990 + }, + { + "epoch": 506.57894736842104, + "grad_norm": 0.9952192902565002, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 77000 + }, + { + "epoch": 506.64473684210526, + "grad_norm": 0.8276636004447937, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 77010 + }, + { + "epoch": 506.7105263157895, + "grad_norm": 1.2697261571884155, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 77020 + }, + { + "epoch": 506.7763157894737, + "grad_norm": 1.3981074094772339, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 77030 + }, + { + "epoch": 506.8421052631579, + "grad_norm": 1.0681496858596802, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 77040 + }, + { + "epoch": 506.9078947368421, + "grad_norm": 0.8381850719451904, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 77050 + }, + { + "epoch": 506.9736842105263, + "grad_norm": 1.236061692237854, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 77060 + }, + { + "epoch": 507.0394736842105, + "grad_norm": 0.8843864798545837, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 77070 + }, + { + "epoch": 507.10526315789474, + "grad_norm": 1.0024884939193726, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 77080 + }, + { + "epoch": 507.17105263157896, + "grad_norm": 1.0010193586349487, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 77090 + }, + { + "epoch": 507.2368421052632, + "grad_norm": 1.3840446472167969, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 77100 + }, + { + "epoch": 507.30263157894734, + "grad_norm": 0.9459224343299866, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 77110 + }, + { + "epoch": 507.36842105263156, + "grad_norm": 1.2796701192855835, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 77120 + }, + { + "epoch": 507.4342105263158, + "grad_norm": 0.9489369988441467, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 77130 + }, + { + "epoch": 507.5, + "grad_norm": 0.9329638481140137, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 77140 + }, + { + "epoch": 507.5657894736842, + "grad_norm": 1.1059828996658325, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 77150 + }, + { + "epoch": 507.63157894736844, + "grad_norm": 0.9566002488136292, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 77160 + }, + { + "epoch": 507.69736842105266, + "grad_norm": 1.44321608543396, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 77170 + }, + { + "epoch": 507.7631578947368, + "grad_norm": 0.9149665832519531, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 77180 + }, + { + "epoch": 507.82894736842104, + "grad_norm": 1.2634365558624268, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 77190 + }, + { + "epoch": 507.89473684210526, + "grad_norm": 1.0077171325683594, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 77200 + }, + { + "epoch": 507.9605263157895, + "grad_norm": 1.3527032136917114, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 77210 + }, + { + "epoch": 508.0263157894737, + "grad_norm": 1.3523765802383423, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 77220 + }, + { + "epoch": 508.0921052631579, + "grad_norm": 1.2622536420822144, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 77230 + }, + { + "epoch": 508.1578947368421, + "grad_norm": 1.2173060178756714, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 77240 + }, + { + "epoch": 508.2236842105263, + "grad_norm": 1.077372431755066, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 77250 + }, + { + "epoch": 508.2894736842105, + "grad_norm": 1.0975558757781982, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 77260 + }, + { + "epoch": 508.35526315789474, + "grad_norm": 1.3480600118637085, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 77270 + }, + { + "epoch": 508.42105263157896, + "grad_norm": 1.1693116426467896, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 77280 + }, + { + "epoch": 508.4868421052632, + "grad_norm": 1.1305088996887207, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 77290 + }, + { + "epoch": 508.55263157894734, + "grad_norm": 1.2774187326431274, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 77300 + }, + { + "epoch": 508.61842105263156, + "grad_norm": 1.1564953327178955, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 77310 + }, + { + "epoch": 508.6842105263158, + "grad_norm": 0.8752122521400452, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 77320 + }, + { + "epoch": 508.75, + "grad_norm": 1.2619119882583618, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 77330 + }, + { + "epoch": 508.8157894736842, + "grad_norm": 0.9893479943275452, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 77340 + }, + { + "epoch": 508.88157894736844, + "grad_norm": 1.0533159971237183, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 77350 + }, + { + "epoch": 508.94736842105266, + "grad_norm": 1.032997488975525, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 77360 + }, + { + "epoch": 509.0131578947368, + "grad_norm": 1.0692557096481323, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 77370 + }, + { + "epoch": 509.07894736842104, + "grad_norm": 0.8767366409301758, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 77380 + }, + { + "epoch": 509.14473684210526, + "grad_norm": 1.1660325527191162, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 77390 + }, + { + "epoch": 509.2105263157895, + "grad_norm": 0.8173626661300659, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 77400 + }, + { + "epoch": 509.2763157894737, + "grad_norm": 0.7611241936683655, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 77410 + }, + { + "epoch": 509.3421052631579, + "grad_norm": 1.0339752435684204, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 77420 + }, + { + "epoch": 509.4078947368421, + "grad_norm": 1.1537483930587769, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 77430 + }, + { + "epoch": 509.4736842105263, + "grad_norm": 1.1419742107391357, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 77440 + }, + { + "epoch": 509.5394736842105, + "grad_norm": 1.0991747379302979, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 77450 + }, + { + "epoch": 509.60526315789474, + "grad_norm": 1.048770785331726, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 77460 + }, + { + "epoch": 509.67105263157896, + "grad_norm": 1.1474910974502563, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 77470 + }, + { + "epoch": 509.7368421052632, + "grad_norm": 1.519187331199646, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 77480 + }, + { + "epoch": 509.80263157894734, + "grad_norm": 1.7529782056808472, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 77490 + }, + { + "epoch": 509.86842105263156, + "grad_norm": 1.0271803140640259, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 77500 + }, + { + "epoch": 509.9342105263158, + "grad_norm": 1.1275503635406494, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 77510 + }, + { + "epoch": 510.0, + "grad_norm": 1.0205459594726562, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 77520 + }, + { + "epoch": 510.0657894736842, + "grad_norm": 1.088582158088684, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 77530 + }, + { + "epoch": 510.13157894736844, + "grad_norm": 0.9400649070739746, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 77540 + }, + { + "epoch": 510.19736842105266, + "grad_norm": 1.1834319829940796, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 77550 + }, + { + "epoch": 510.2631578947368, + "grad_norm": 1.0135151147842407, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 77560 + }, + { + "epoch": 510.32894736842104, + "grad_norm": 1.5176630020141602, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 77570 + }, + { + "epoch": 510.39473684210526, + "grad_norm": 1.283932089805603, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 77580 + }, + { + "epoch": 510.4605263157895, + "grad_norm": 1.1694949865341187, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 77590 + }, + { + "epoch": 510.5263157894737, + "grad_norm": 0.772484540939331, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 77600 + }, + { + "epoch": 510.5921052631579, + "grad_norm": 1.1480525732040405, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 77610 + }, + { + "epoch": 510.6578947368421, + "grad_norm": 1.1137710809707642, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 77620 + }, + { + "epoch": 510.7236842105263, + "grad_norm": 0.9193239212036133, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 77630 + }, + { + "epoch": 510.7894736842105, + "grad_norm": 0.9958206415176392, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 77640 + }, + { + "epoch": 510.85526315789474, + "grad_norm": 0.8010199069976807, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 77650 + }, + { + "epoch": 510.92105263157896, + "grad_norm": 1.4143106937408447, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 77660 + }, + { + "epoch": 510.9868421052632, + "grad_norm": 0.9478830695152283, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 77670 + }, + { + "epoch": 511.05263157894734, + "grad_norm": 1.2224031686782837, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 77680 + }, + { + "epoch": 511.11842105263156, + "grad_norm": 1.0343413352966309, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 77690 + }, + { + "epoch": 511.1842105263158, + "grad_norm": 1.211520791053772, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 77700 + }, + { + "epoch": 511.25, + "grad_norm": 1.017286777496338, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 77710 + }, + { + "epoch": 511.3157894736842, + "grad_norm": 0.9983707666397095, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 77720 + }, + { + "epoch": 511.38157894736844, + "grad_norm": 1.5086129903793335, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 77730 + }, + { + "epoch": 511.44736842105266, + "grad_norm": 1.1469098329544067, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 77740 + }, + { + "epoch": 511.5131578947368, + "grad_norm": 1.2588064670562744, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 77750 + }, + { + "epoch": 511.57894736842104, + "grad_norm": 0.9466657638549805, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 77760 + }, + { + "epoch": 511.64473684210526, + "grad_norm": 1.3773024082183838, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 77770 + }, + { + "epoch": 511.7105263157895, + "grad_norm": 1.0853861570358276, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 77780 + }, + { + "epoch": 511.7763157894737, + "grad_norm": 1.2546428442001343, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 77790 + }, + { + "epoch": 511.8421052631579, + "grad_norm": 1.0669403076171875, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 77800 + }, + { + "epoch": 511.9078947368421, + "grad_norm": 1.2506325244903564, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 77810 + }, + { + "epoch": 511.9736842105263, + "grad_norm": 1.007456660270691, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 77820 + }, + { + "epoch": 512.0394736842105, + "grad_norm": 1.0313053131103516, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 77830 + }, + { + "epoch": 512.1052631578947, + "grad_norm": 1.1146388053894043, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 77840 + }, + { + "epoch": 512.171052631579, + "grad_norm": 1.1103726625442505, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 77850 + }, + { + "epoch": 512.2368421052631, + "grad_norm": 1.240702748298645, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 77860 + }, + { + "epoch": 512.3026315789474, + "grad_norm": 1.3598905801773071, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 77870 + }, + { + "epoch": 512.3684210526316, + "grad_norm": 1.3365449905395508, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 77880 + }, + { + "epoch": 512.4342105263158, + "grad_norm": 0.8507790565490723, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 77890 + }, + { + "epoch": 512.5, + "grad_norm": 0.9919387698173523, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 77900 + }, + { + "epoch": 512.5657894736842, + "grad_norm": 0.742108941078186, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 77910 + }, + { + "epoch": 512.6315789473684, + "grad_norm": 1.1056491136550903, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 77920 + }, + { + "epoch": 512.6973684210526, + "grad_norm": 1.045460820198059, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 77930 + }, + { + "epoch": 512.7631578947369, + "grad_norm": 1.2098149061203003, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 77940 + }, + { + "epoch": 512.828947368421, + "grad_norm": 1.1252753734588623, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 77950 + }, + { + "epoch": 512.8947368421053, + "grad_norm": 1.4744460582733154, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 77960 + }, + { + "epoch": 512.9605263157895, + "grad_norm": 0.7933367490768433, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 77970 + }, + { + "epoch": 513.0263157894736, + "grad_norm": 1.1913303136825562, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 77980 + }, + { + "epoch": 513.0921052631579, + "grad_norm": 1.1555919647216797, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 77990 + }, + { + "epoch": 513.1578947368421, + "grad_norm": 1.4469823837280273, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 78000 + }, + { + "epoch": 513.2236842105264, + "grad_norm": 1.104987382888794, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 78010 + }, + { + "epoch": 513.2894736842105, + "grad_norm": 1.0023167133331299, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 78020 + }, + { + "epoch": 513.3552631578947, + "grad_norm": 1.0223348140716553, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 78030 + }, + { + "epoch": 513.421052631579, + "grad_norm": 1.0552802085876465, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 78040 + }, + { + "epoch": 513.4868421052631, + "grad_norm": 1.0780552625656128, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 78050 + }, + { + "epoch": 513.5526315789474, + "grad_norm": 1.1177747249603271, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 78060 + }, + { + "epoch": 513.6184210526316, + "grad_norm": 1.3587802648544312, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 78070 + }, + { + "epoch": 513.6842105263158, + "grad_norm": 1.1935032606124878, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 78080 + }, + { + "epoch": 513.75, + "grad_norm": 1.1695393323898315, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 78090 + }, + { + "epoch": 513.8157894736842, + "grad_norm": 0.7090479135513306, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 78100 + }, + { + "epoch": 513.8815789473684, + "grad_norm": 1.5053452253341675, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 78110 + }, + { + "epoch": 513.9473684210526, + "grad_norm": 1.4671512842178345, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 78120 + }, + { + "epoch": 514.0131578947369, + "grad_norm": 1.2938599586486816, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 78130 + }, + { + "epoch": 514.078947368421, + "grad_norm": 1.09611177444458, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 78140 + }, + { + "epoch": 514.1447368421053, + "grad_norm": 1.0053191184997559, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 78150 + }, + { + "epoch": 514.2105263157895, + "grad_norm": 0.8893487453460693, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 78160 + }, + { + "epoch": 514.2763157894736, + "grad_norm": 1.13539457321167, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 78170 + }, + { + "epoch": 514.3421052631579, + "grad_norm": 1.3186606168746948, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 78180 + }, + { + "epoch": 514.4078947368421, + "grad_norm": 1.296586036682129, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 78190 + }, + { + "epoch": 514.4736842105264, + "grad_norm": 1.1459144353866577, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 78200 + }, + { + "epoch": 514.5394736842105, + "grad_norm": 0.9058517813682556, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 78210 + }, + { + "epoch": 514.6052631578947, + "grad_norm": 1.4812514781951904, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 78220 + }, + { + "epoch": 514.671052631579, + "grad_norm": 1.5883773565292358, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 78230 + }, + { + "epoch": 514.7368421052631, + "grad_norm": 1.2800959348678589, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 78240 + }, + { + "epoch": 514.8026315789474, + "grad_norm": 0.9361305236816406, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 78250 + }, + { + "epoch": 514.8684210526316, + "grad_norm": 1.3019955158233643, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 78260 + }, + { + "epoch": 514.9342105263158, + "grad_norm": 1.1924444437026978, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 78270 + }, + { + "epoch": 515.0, + "grad_norm": 1.103654384613037, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 78280 + }, + { + "epoch": 515.0657894736842, + "grad_norm": 1.3044533729553223, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 78290 + }, + { + "epoch": 515.1315789473684, + "grad_norm": 0.844568133354187, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 78300 + }, + { + "epoch": 515.1973684210526, + "grad_norm": 1.071467399597168, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 78310 + }, + { + "epoch": 515.2631578947369, + "grad_norm": 0.9115777015686035, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 78320 + }, + { + "epoch": 515.328947368421, + "grad_norm": 0.8897896409034729, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 78330 + }, + { + "epoch": 515.3947368421053, + "grad_norm": 1.273638367652893, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 78340 + }, + { + "epoch": 515.4605263157895, + "grad_norm": 0.7495373487472534, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 78350 + }, + { + "epoch": 515.5263157894736, + "grad_norm": 1.1438860893249512, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 78360 + }, + { + "epoch": 515.5921052631579, + "grad_norm": 0.8679733276367188, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 78370 + }, + { + "epoch": 515.6578947368421, + "grad_norm": 1.0994764566421509, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 78380 + }, + { + "epoch": 515.7236842105264, + "grad_norm": 1.1603001356124878, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 78390 + }, + { + "epoch": 515.7894736842105, + "grad_norm": 1.049238920211792, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 78400 + }, + { + "epoch": 515.8552631578947, + "grad_norm": 1.0663156509399414, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 78410 + }, + { + "epoch": 515.921052631579, + "grad_norm": 0.7960723638534546, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 78420 + }, + { + "epoch": 515.9868421052631, + "grad_norm": 0.9979690909385681, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 78430 + }, + { + "epoch": 516.0526315789474, + "grad_norm": 0.8607260584831238, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 78440 + }, + { + "epoch": 516.1184210526316, + "grad_norm": 1.2933063507080078, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 78450 + }, + { + "epoch": 516.1842105263158, + "grad_norm": 0.9800442457199097, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 78460 + }, + { + "epoch": 516.25, + "grad_norm": 1.1290419101715088, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 78470 + }, + { + "epoch": 516.3157894736842, + "grad_norm": 1.2492495775222778, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 78480 + }, + { + "epoch": 516.3815789473684, + "grad_norm": 1.2778388261795044, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 78490 + }, + { + "epoch": 516.4473684210526, + "grad_norm": 1.31378173828125, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 78500 + }, + { + "epoch": 516.5131578947369, + "grad_norm": 0.9775269627571106, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 78510 + }, + { + "epoch": 516.578947368421, + "grad_norm": 0.8219498991966248, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 78520 + }, + { + "epoch": 516.6447368421053, + "grad_norm": 0.7330734729766846, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 78530 + }, + { + "epoch": 516.7105263157895, + "grad_norm": 0.9409795999526978, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 78540 + }, + { + "epoch": 516.7763157894736, + "grad_norm": 1.4014899730682373, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 78550 + }, + { + "epoch": 516.8421052631579, + "grad_norm": 1.3369898796081543, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 78560 + }, + { + "epoch": 516.9078947368421, + "grad_norm": 0.7518098950386047, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 78570 + }, + { + "epoch": 516.9736842105264, + "grad_norm": 0.977200448513031, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 78580 + }, + { + "epoch": 517.0394736842105, + "grad_norm": 0.9853875041007996, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 78590 + }, + { + "epoch": 517.1052631578947, + "grad_norm": 0.9010801911354065, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 78600 + }, + { + "epoch": 517.171052631579, + "grad_norm": 0.7902107238769531, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 78610 + }, + { + "epoch": 517.2368421052631, + "grad_norm": 0.7945990562438965, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 78620 + }, + { + "epoch": 517.3026315789474, + "grad_norm": 0.8374760746955872, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 78630 + }, + { + "epoch": 517.3684210526316, + "grad_norm": 0.8473724722862244, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 78640 + }, + { + "epoch": 517.4342105263158, + "grad_norm": 1.1389491558074951, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 78650 + }, + { + "epoch": 517.5, + "grad_norm": 1.1223610639572144, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 78660 + }, + { + "epoch": 517.5657894736842, + "grad_norm": 0.9881941080093384, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 78670 + }, + { + "epoch": 517.6315789473684, + "grad_norm": 1.021945834159851, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 78680 + }, + { + "epoch": 517.6973684210526, + "grad_norm": 0.9731574058532715, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 78690 + }, + { + "epoch": 517.7631578947369, + "grad_norm": 1.2203140258789062, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 78700 + }, + { + "epoch": 517.828947368421, + "grad_norm": 1.5560880899429321, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 78710 + }, + { + "epoch": 517.8947368421053, + "grad_norm": 1.2174243927001953, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 78720 + }, + { + "epoch": 517.9605263157895, + "grad_norm": 1.308231234550476, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 78730 + }, + { + "epoch": 518.0263157894736, + "grad_norm": 1.0401585102081299, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 78740 + }, + { + "epoch": 518.0921052631579, + "grad_norm": 1.1766376495361328, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 78750 + }, + { + "epoch": 518.1578947368421, + "grad_norm": 1.2370878458023071, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 78760 + }, + { + "epoch": 518.2236842105264, + "grad_norm": 1.2282683849334717, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 78770 + }, + { + "epoch": 518.2894736842105, + "grad_norm": 1.6565943956375122, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 78780 + }, + { + "epoch": 518.3552631578947, + "grad_norm": 1.6417547464370728, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 78790 + }, + { + "epoch": 518.421052631579, + "grad_norm": 1.1247665882110596, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 78800 + }, + { + "epoch": 518.4868421052631, + "grad_norm": 1.1730952262878418, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 78810 + }, + { + "epoch": 518.5526315789474, + "grad_norm": 1.2181925773620605, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 78820 + }, + { + "epoch": 518.6184210526316, + "grad_norm": 1.0916987657546997, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 78830 + }, + { + "epoch": 518.6842105263158, + "grad_norm": 0.9300577640533447, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 78840 + }, + { + "epoch": 518.75, + "grad_norm": 1.0200120210647583, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 78850 + }, + { + "epoch": 518.8157894736842, + "grad_norm": 1.1790283918380737, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 78860 + }, + { + "epoch": 518.8815789473684, + "grad_norm": 1.4343522787094116, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 78870 + }, + { + "epoch": 518.9473684210526, + "grad_norm": 1.2748308181762695, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 78880 + }, + { + "epoch": 519.0131578947369, + "grad_norm": 1.3263063430786133, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 78890 + }, + { + "epoch": 519.078947368421, + "grad_norm": 0.8143279552459717, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 78900 + }, + { + "epoch": 519.1447368421053, + "grad_norm": 0.8361022472381592, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 78910 + }, + { + "epoch": 519.2105263157895, + "grad_norm": 1.0913989543914795, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 78920 + }, + { + "epoch": 519.2763157894736, + "grad_norm": 1.3421107530593872, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 78930 + }, + { + "epoch": 519.3421052631579, + "grad_norm": 1.0078271627426147, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 78940 + }, + { + "epoch": 519.4078947368421, + "grad_norm": 1.1363328695297241, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 78950 + }, + { + "epoch": 519.4736842105264, + "grad_norm": 1.1798573732376099, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 78960 + }, + { + "epoch": 519.5394736842105, + "grad_norm": 1.3588061332702637, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 78970 + }, + { + "epoch": 519.6052631578947, + "grad_norm": 0.9301666021347046, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 78980 + }, + { + "epoch": 519.671052631579, + "grad_norm": 1.2737902402877808, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 78990 + }, + { + "epoch": 519.7368421052631, + "grad_norm": 1.1327887773513794, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 79000 + }, + { + "epoch": 519.8026315789474, + "grad_norm": 1.0071492195129395, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 79010 + }, + { + "epoch": 519.8684210526316, + "grad_norm": 1.227921724319458, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 79020 + }, + { + "epoch": 519.9342105263158, + "grad_norm": 1.2975742816925049, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 79030 + }, + { + "epoch": 520.0, + "grad_norm": 1.392256259918213, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 79040 + }, + { + "epoch": 520.0657894736842, + "grad_norm": 1.469244360923767, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 79050 + }, + { + "epoch": 520.1315789473684, + "grad_norm": 0.9325305223464966, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 79060 + }, + { + "epoch": 520.1973684210526, + "grad_norm": 1.2577964067459106, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 79070 + }, + { + "epoch": 520.2631578947369, + "grad_norm": 1.347522497177124, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 79080 + }, + { + "epoch": 520.328947368421, + "grad_norm": 1.0849223136901855, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 79090 + }, + { + "epoch": 520.3947368421053, + "grad_norm": 1.3118022680282593, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 79100 + }, + { + "epoch": 520.4605263157895, + "grad_norm": 1.0926787853240967, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 79110 + }, + { + "epoch": 520.5263157894736, + "grad_norm": 1.3140438795089722, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 79120 + }, + { + "epoch": 520.5921052631579, + "grad_norm": 1.4527376890182495, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 79130 + }, + { + "epoch": 520.6578947368421, + "grad_norm": 0.9648956656455994, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 79140 + }, + { + "epoch": 520.7236842105264, + "grad_norm": 0.9755305051803589, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 79150 + }, + { + "epoch": 520.7894736842105, + "grad_norm": 1.085988163948059, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 79160 + }, + { + "epoch": 520.8552631578947, + "grad_norm": 0.982824444770813, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 79170 + }, + { + "epoch": 520.921052631579, + "grad_norm": 1.0134260654449463, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 79180 + }, + { + "epoch": 520.9868421052631, + "grad_norm": 1.0143835544586182, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 79190 + }, + { + "epoch": 521.0526315789474, + "grad_norm": 1.0507410764694214, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 79200 + }, + { + "epoch": 521.1184210526316, + "grad_norm": 0.9346352815628052, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 79210 + }, + { + "epoch": 521.1842105263158, + "grad_norm": 0.6871551871299744, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 79220 + }, + { + "epoch": 521.25, + "grad_norm": 1.1676675081253052, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 79230 + }, + { + "epoch": 521.3157894736842, + "grad_norm": 0.6791279911994934, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 79240 + }, + { + "epoch": 521.3815789473684, + "grad_norm": 0.735756516456604, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 79250 + }, + { + "epoch": 521.4473684210526, + "grad_norm": 1.068332314491272, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 79260 + }, + { + "epoch": 521.5131578947369, + "grad_norm": 1.0991226434707642, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 79270 + }, + { + "epoch": 521.578947368421, + "grad_norm": 0.9471065402030945, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 79280 + }, + { + "epoch": 521.6447368421053, + "grad_norm": 1.1460009813308716, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 79290 + }, + { + "epoch": 521.7105263157895, + "grad_norm": 1.2541179656982422, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 79300 + }, + { + "epoch": 521.7763157894736, + "grad_norm": 1.3320528268814087, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 79310 + }, + { + "epoch": 521.8421052631579, + "grad_norm": 1.2252389192581177, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 79320 + }, + { + "epoch": 521.9078947368421, + "grad_norm": 1.0492901802062988, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 79330 + }, + { + "epoch": 521.9736842105264, + "grad_norm": 0.755221962928772, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 79340 + }, + { + "epoch": 522.0394736842105, + "grad_norm": 0.9267183542251587, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 79350 + }, + { + "epoch": 522.1052631578947, + "grad_norm": 1.2305091619491577, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 79360 + }, + { + "epoch": 522.171052631579, + "grad_norm": 1.3195940256118774, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 79370 + }, + { + "epoch": 522.2368421052631, + "grad_norm": 1.2802159786224365, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 79380 + }, + { + "epoch": 522.3026315789474, + "grad_norm": 1.2666503190994263, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 79390 + }, + { + "epoch": 522.3684210526316, + "grad_norm": 1.2319374084472656, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 79400 + }, + { + "epoch": 522.4342105263158, + "grad_norm": 1.361430287361145, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 79410 + }, + { + "epoch": 522.5, + "grad_norm": 1.0288817882537842, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 79420 + }, + { + "epoch": 522.5657894736842, + "grad_norm": 1.1962461471557617, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 79430 + }, + { + "epoch": 522.6315789473684, + "grad_norm": 1.5438764095306396, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 79440 + }, + { + "epoch": 522.6973684210526, + "grad_norm": 1.5173245668411255, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 79450 + }, + { + "epoch": 522.7631578947369, + "grad_norm": 1.4040913581848145, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 79460 + }, + { + "epoch": 522.828947368421, + "grad_norm": 1.4810621738433838, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 79470 + }, + { + "epoch": 522.8947368421053, + "grad_norm": 1.279215931892395, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 79480 + }, + { + "epoch": 522.9605263157895, + "grad_norm": 1.1824666261672974, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 79490 + }, + { + "epoch": 523.0263157894736, + "grad_norm": 1.2264102697372437, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 79500 + }, + { + "epoch": 523.0921052631579, + "grad_norm": 1.2089831829071045, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 79510 + }, + { + "epoch": 523.1578947368421, + "grad_norm": 1.6189210414886475, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 79520 + }, + { + "epoch": 523.2236842105264, + "grad_norm": 1.5252612829208374, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 79530 + }, + { + "epoch": 523.2894736842105, + "grad_norm": 1.3406212329864502, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 79540 + }, + { + "epoch": 523.3552631578947, + "grad_norm": 1.1579642295837402, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 79550 + }, + { + "epoch": 523.421052631579, + "grad_norm": 1.012661337852478, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 79560 + }, + { + "epoch": 523.4868421052631, + "grad_norm": 0.8289533257484436, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 79570 + }, + { + "epoch": 523.5526315789474, + "grad_norm": 1.1507660150527954, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 79580 + }, + { + "epoch": 523.6184210526316, + "grad_norm": 1.1765552759170532, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 79590 + }, + { + "epoch": 523.6842105263158, + "grad_norm": 1.2260336875915527, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 79600 + }, + { + "epoch": 523.75, + "grad_norm": 1.341565728187561, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 79610 + }, + { + "epoch": 523.8157894736842, + "grad_norm": 0.9649227261543274, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 79620 + }, + { + "epoch": 523.8815789473684, + "grad_norm": 1.075209617614746, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 79630 + }, + { + "epoch": 523.9473684210526, + "grad_norm": 1.4141005277633667, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 79640 + }, + { + "epoch": 524.0131578947369, + "grad_norm": 1.0523087978363037, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 79650 + }, + { + "epoch": 524.078947368421, + "grad_norm": 1.1604161262512207, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 79660 + }, + { + "epoch": 524.1447368421053, + "grad_norm": 0.8030272722244263, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 79670 + }, + { + "epoch": 524.2105263157895, + "grad_norm": 0.9838864207267761, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 79680 + }, + { + "epoch": 524.2763157894736, + "grad_norm": 1.2279644012451172, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 79690 + }, + { + "epoch": 524.3421052631579, + "grad_norm": 1.4876803159713745, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 79700 + }, + { + "epoch": 524.4078947368421, + "grad_norm": 1.1998071670532227, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 79710 + }, + { + "epoch": 524.4736842105264, + "grad_norm": 1.1491254568099976, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 79720 + }, + { + "epoch": 524.5394736842105, + "grad_norm": 1.4121824502944946, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 79730 + }, + { + "epoch": 524.6052631578947, + "grad_norm": 0.9082379341125488, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 79740 + }, + { + "epoch": 524.671052631579, + "grad_norm": 1.1881701946258545, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 79750 + }, + { + "epoch": 524.7368421052631, + "grad_norm": 1.3639180660247803, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 79760 + }, + { + "epoch": 524.8026315789474, + "grad_norm": 1.2215701341629028, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 79770 + }, + { + "epoch": 524.8684210526316, + "grad_norm": 0.7113543748855591, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 79780 + }, + { + "epoch": 524.9342105263158, + "grad_norm": 1.2361555099487305, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 79790 + }, + { + "epoch": 525.0, + "grad_norm": 1.0537495613098145, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 79800 + }, + { + "epoch": 525.0657894736842, + "grad_norm": 1.0916693210601807, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 79810 + }, + { + "epoch": 525.1315789473684, + "grad_norm": 1.286116600036621, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 79820 + }, + { + "epoch": 525.1973684210526, + "grad_norm": 1.1319963932037354, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 79830 + }, + { + "epoch": 525.2631578947369, + "grad_norm": 1.149547815322876, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 79840 + }, + { + "epoch": 525.328947368421, + "grad_norm": 1.2941960096359253, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 79850 + }, + { + "epoch": 525.3947368421053, + "grad_norm": 0.9617957472801208, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 79860 + }, + { + "epoch": 525.4605263157895, + "grad_norm": 1.2239959239959717, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 79870 + }, + { + "epoch": 525.5263157894736, + "grad_norm": 1.0392863750457764, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 79880 + }, + { + "epoch": 525.5921052631579, + "grad_norm": 1.2805525064468384, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 79890 + }, + { + "epoch": 525.6578947368421, + "grad_norm": 1.201557993888855, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 79900 + }, + { + "epoch": 525.7236842105264, + "grad_norm": 1.165624737739563, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 79910 + }, + { + "epoch": 525.7894736842105, + "grad_norm": 1.2472856044769287, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 79920 + }, + { + "epoch": 525.8552631578947, + "grad_norm": 0.8875550627708435, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 79930 + }, + { + "epoch": 525.921052631579, + "grad_norm": 1.0350874662399292, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 79940 + }, + { + "epoch": 525.9868421052631, + "grad_norm": 1.2986034154891968, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 79950 + }, + { + "epoch": 526.0526315789474, + "grad_norm": 1.1310527324676514, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 79960 + }, + { + "epoch": 526.1184210526316, + "grad_norm": 1.1110562086105347, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 79970 + }, + { + "epoch": 526.1842105263158, + "grad_norm": 1.1927745342254639, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 79980 + }, + { + "epoch": 526.25, + "grad_norm": 1.0965501070022583, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 79990 + }, + { + "epoch": 526.3157894736842, + "grad_norm": 1.4071152210235596, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 80000 + }, + { + "epoch": 526.3815789473684, + "grad_norm": 1.0651636123657227, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 80010 + }, + { + "epoch": 526.4473684210526, + "grad_norm": 1.3853951692581177, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 80020 + }, + { + "epoch": 526.5131578947369, + "grad_norm": 1.1486934423446655, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 80030 + }, + { + "epoch": 526.578947368421, + "grad_norm": 1.088996171951294, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 80040 + }, + { + "epoch": 526.6447368421053, + "grad_norm": 1.4724292755126953, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 80050 + }, + { + "epoch": 526.7105263157895, + "grad_norm": 1.1345316171646118, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 80060 + }, + { + "epoch": 526.7763157894736, + "grad_norm": 1.065085768699646, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 80070 + }, + { + "epoch": 526.8421052631579, + "grad_norm": 1.199918270111084, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 80080 + }, + { + "epoch": 526.9078947368421, + "grad_norm": 1.019289255142212, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 80090 + }, + { + "epoch": 526.9736842105264, + "grad_norm": 1.2872799634933472, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 80100 + }, + { + "epoch": 527.0394736842105, + "grad_norm": 1.3231974840164185, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 80110 + }, + { + "epoch": 527.1052631578947, + "grad_norm": 1.0737744569778442, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 80120 + }, + { + "epoch": 527.171052631579, + "grad_norm": 0.7930030822753906, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 80130 + }, + { + "epoch": 527.2368421052631, + "grad_norm": 1.4024803638458252, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 80140 + }, + { + "epoch": 527.3026315789474, + "grad_norm": 0.9458417892456055, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 80150 + }, + { + "epoch": 527.3684210526316, + "grad_norm": 1.3698056936264038, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 80160 + }, + { + "epoch": 527.4342105263158, + "grad_norm": 0.9708862900733948, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 80170 + }, + { + "epoch": 527.5, + "grad_norm": 0.9804370403289795, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 80180 + }, + { + "epoch": 527.5657894736842, + "grad_norm": 1.2980481386184692, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 80190 + }, + { + "epoch": 527.6315789473684, + "grad_norm": 1.3205853700637817, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 80200 + }, + { + "epoch": 527.6973684210526, + "grad_norm": 0.9288889765739441, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 80210 + }, + { + "epoch": 527.7631578947369, + "grad_norm": 1.5022870302200317, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 80220 + }, + { + "epoch": 527.828947368421, + "grad_norm": 1.310860276222229, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 80230 + }, + { + "epoch": 527.8947368421053, + "grad_norm": 1.0890276432037354, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 80240 + }, + { + "epoch": 527.9605263157895, + "grad_norm": 1.25932776927948, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 80250 + }, + { + "epoch": 528.0263157894736, + "grad_norm": 1.037662386894226, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 80260 + }, + { + "epoch": 528.0921052631579, + "grad_norm": 1.1433483362197876, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 80270 + }, + { + "epoch": 528.1578947368421, + "grad_norm": 1.212409257888794, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 80280 + }, + { + "epoch": 528.2236842105264, + "grad_norm": 1.226341962814331, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 80290 + }, + { + "epoch": 528.2894736842105, + "grad_norm": 1.4430090188980103, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 80300 + }, + { + "epoch": 528.3552631578947, + "grad_norm": 1.1568644046783447, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 80310 + }, + { + "epoch": 528.421052631579, + "grad_norm": 1.183970332145691, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 80320 + }, + { + "epoch": 528.4868421052631, + "grad_norm": 1.3099122047424316, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 80330 + }, + { + "epoch": 528.5526315789474, + "grad_norm": 1.2902318239212036, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 80340 + }, + { + "epoch": 528.6184210526316, + "grad_norm": 1.0034809112548828, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 80350 + }, + { + "epoch": 528.6842105263158, + "grad_norm": 0.9680561423301697, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 80360 + }, + { + "epoch": 528.75, + "grad_norm": 1.1589789390563965, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 80370 + }, + { + "epoch": 528.8157894736842, + "grad_norm": 1.2295933961868286, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 80380 + }, + { + "epoch": 528.8815789473684, + "grad_norm": 1.5712645053863525, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 80390 + }, + { + "epoch": 528.9473684210526, + "grad_norm": 1.4577996730804443, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 80400 + }, + { + "epoch": 529.0131578947369, + "grad_norm": 1.1000990867614746, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 80410 + }, + { + "epoch": 529.078947368421, + "grad_norm": 1.2241917848587036, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 80420 + }, + { + "epoch": 529.1447368421053, + "grad_norm": 1.0974723100662231, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 80430 + }, + { + "epoch": 529.2105263157895, + "grad_norm": 0.9757032990455627, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 80440 + }, + { + "epoch": 529.2763157894736, + "grad_norm": 1.317093849182129, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 80450 + }, + { + "epoch": 529.3421052631579, + "grad_norm": 1.1558804512023926, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 80460 + }, + { + "epoch": 529.4078947368421, + "grad_norm": 1.0101091861724854, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 80470 + }, + { + "epoch": 529.4736842105264, + "grad_norm": 1.2322200536727905, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 80480 + }, + { + "epoch": 529.5394736842105, + "grad_norm": 1.0501878261566162, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 80490 + }, + { + "epoch": 529.6052631578947, + "grad_norm": 1.1091357469558716, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 80500 + }, + { + "epoch": 529.671052631579, + "grad_norm": 1.2675368785858154, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 80510 + }, + { + "epoch": 529.7368421052631, + "grad_norm": 1.0387545824050903, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 80520 + }, + { + "epoch": 529.8026315789474, + "grad_norm": 1.0804754495620728, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 80530 + }, + { + "epoch": 529.8684210526316, + "grad_norm": 1.0754399299621582, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 80540 + }, + { + "epoch": 529.9342105263158, + "grad_norm": 1.0641555786132812, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 80550 + }, + { + "epoch": 530.0, + "grad_norm": 1.3832042217254639, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 80560 + }, + { + "epoch": 530.0657894736842, + "grad_norm": 1.0079296827316284, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 80570 + }, + { + "epoch": 530.1315789473684, + "grad_norm": 0.7935236692428589, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 80580 + }, + { + "epoch": 530.1973684210526, + "grad_norm": 1.2828476428985596, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 80590 + }, + { + "epoch": 530.2631578947369, + "grad_norm": 1.0356460809707642, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 80600 + }, + { + "epoch": 530.328947368421, + "grad_norm": 1.1222877502441406, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 80610 + }, + { + "epoch": 530.3947368421053, + "grad_norm": 0.8804997801780701, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 80620 + }, + { + "epoch": 530.4605263157895, + "grad_norm": 1.159664273262024, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 80630 + }, + { + "epoch": 530.5263157894736, + "grad_norm": 1.4239946603775024, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 80640 + }, + { + "epoch": 530.5921052631579, + "grad_norm": 1.7034857273101807, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 80650 + }, + { + "epoch": 530.6578947368421, + "grad_norm": 1.6954020261764526, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 80660 + }, + { + "epoch": 530.7236842105264, + "grad_norm": 1.291788935661316, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 80670 + }, + { + "epoch": 530.7894736842105, + "grad_norm": 2.2857964038848877, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 80680 + }, + { + "epoch": 530.8552631578947, + "grad_norm": 1.9390625953674316, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 80690 + }, + { + "epoch": 530.921052631579, + "grad_norm": 1.31684410572052, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 80700 + }, + { + "epoch": 530.9868421052631, + "grad_norm": 1.6871960163116455, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 80710 + }, + { + "epoch": 531.0526315789474, + "grad_norm": 1.471971869468689, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 80720 + }, + { + "epoch": 531.1184210526316, + "grad_norm": 1.5256435871124268, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 80730 + }, + { + "epoch": 531.1842105263158, + "grad_norm": 1.4224085807800293, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 80740 + }, + { + "epoch": 531.25, + "grad_norm": 1.179168701171875, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 80750 + }, + { + "epoch": 531.3157894736842, + "grad_norm": 1.2524338960647583, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 80760 + }, + { + "epoch": 531.3815789473684, + "grad_norm": 1.2265777587890625, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 80770 + }, + { + "epoch": 531.4473684210526, + "grad_norm": 1.4335110187530518, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 80780 + }, + { + "epoch": 531.5131578947369, + "grad_norm": 1.5284316539764404, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 80790 + }, + { + "epoch": 531.578947368421, + "grad_norm": 1.42241370677948, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 80800 + }, + { + "epoch": 531.6447368421053, + "grad_norm": 1.530569314956665, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 80810 + }, + { + "epoch": 531.7105263157895, + "grad_norm": 0.7496344447135925, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 80820 + }, + { + "epoch": 531.7763157894736, + "grad_norm": 1.7025457620620728, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 80830 + }, + { + "epoch": 531.8421052631579, + "grad_norm": 1.1982378959655762, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 80840 + }, + { + "epoch": 531.9078947368421, + "grad_norm": 1.0408469438552856, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 80850 + }, + { + "epoch": 531.9736842105264, + "grad_norm": 1.2916014194488525, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 80860 + }, + { + "epoch": 532.0394736842105, + "grad_norm": 1.271699070930481, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 80870 + }, + { + "epoch": 532.1052631578947, + "grad_norm": 1.313772439956665, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 80880 + }, + { + "epoch": 532.171052631579, + "grad_norm": 1.413476824760437, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 80890 + }, + { + "epoch": 532.2368421052631, + "grad_norm": 1.160772442817688, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 80900 + }, + { + "epoch": 532.3026315789474, + "grad_norm": 1.0700751543045044, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 80910 + }, + { + "epoch": 532.3684210526316, + "grad_norm": 1.1918394565582275, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 80920 + }, + { + "epoch": 532.4342105263158, + "grad_norm": 0.9265491366386414, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 80930 + }, + { + "epoch": 532.5, + "grad_norm": 0.8995620012283325, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 80940 + }, + { + "epoch": 532.5657894736842, + "grad_norm": 1.0701876878738403, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 80950 + }, + { + "epoch": 532.6315789473684, + "grad_norm": 1.1642286777496338, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 80960 + }, + { + "epoch": 532.6973684210526, + "grad_norm": 1.027518630027771, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 80970 + }, + { + "epoch": 532.7631578947369, + "grad_norm": 1.2189431190490723, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 80980 + }, + { + "epoch": 532.828947368421, + "grad_norm": 1.1508433818817139, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 80990 + }, + { + "epoch": 532.8947368421053, + "grad_norm": 1.2702674865722656, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 81000 + }, + { + "epoch": 532.9605263157895, + "grad_norm": 1.1186507940292358, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 81010 + }, + { + "epoch": 533.0263157894736, + "grad_norm": 1.3020246028900146, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 81020 + }, + { + "epoch": 533.0921052631579, + "grad_norm": 1.0293231010437012, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 81030 + }, + { + "epoch": 533.1578947368421, + "grad_norm": 1.2916498184204102, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 81040 + }, + { + "epoch": 533.2236842105264, + "grad_norm": 1.2180614471435547, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 81050 + }, + { + "epoch": 533.2894736842105, + "grad_norm": 0.9033607840538025, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 81060 + }, + { + "epoch": 533.3552631578947, + "grad_norm": 1.1329870223999023, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 81070 + }, + { + "epoch": 533.421052631579, + "grad_norm": 0.9711558818817139, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 81080 + }, + { + "epoch": 533.4868421052631, + "grad_norm": 1.2064048051834106, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 81090 + }, + { + "epoch": 533.5526315789474, + "grad_norm": 1.200500726699829, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 81100 + }, + { + "epoch": 533.6184210526316, + "grad_norm": 1.3031874895095825, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 81110 + }, + { + "epoch": 533.6842105263158, + "grad_norm": 1.249746322631836, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 81120 + }, + { + "epoch": 533.75, + "grad_norm": 1.4198533296585083, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 81130 + }, + { + "epoch": 533.8157894736842, + "grad_norm": 1.0344791412353516, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 81140 + }, + { + "epoch": 533.8815789473684, + "grad_norm": 1.2430800199508667, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 81150 + }, + { + "epoch": 533.9473684210526, + "grad_norm": 1.0078041553497314, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 81160 + }, + { + "epoch": 534.0131578947369, + "grad_norm": 1.1461764574050903, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 81170 + }, + { + "epoch": 534.078947368421, + "grad_norm": 1.1230483055114746, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 81180 + }, + { + "epoch": 534.1447368421053, + "grad_norm": 1.142557144165039, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 81190 + }, + { + "epoch": 534.2105263157895, + "grad_norm": 1.1560312509536743, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 81200 + }, + { + "epoch": 534.2763157894736, + "grad_norm": 1.1191767454147339, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 81210 + }, + { + "epoch": 534.3421052631579, + "grad_norm": 0.7681576013565063, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 81220 + }, + { + "epoch": 534.4078947368421, + "grad_norm": 0.7272207736968994, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 81230 + }, + { + "epoch": 534.4736842105264, + "grad_norm": 0.9689534306526184, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 81240 + }, + { + "epoch": 534.5394736842105, + "grad_norm": 1.1911404132843018, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 81250 + }, + { + "epoch": 534.6052631578947, + "grad_norm": 1.2596523761749268, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 81260 + }, + { + "epoch": 534.671052631579, + "grad_norm": 1.0386167764663696, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 81270 + }, + { + "epoch": 534.7368421052631, + "grad_norm": 0.7270429730415344, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 81280 + }, + { + "epoch": 534.8026315789474, + "grad_norm": 0.9695709347724915, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 81290 + }, + { + "epoch": 534.8684210526316, + "grad_norm": 1.0142713785171509, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 81300 + }, + { + "epoch": 534.9342105263158, + "grad_norm": 1.2047271728515625, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 81310 + }, + { + "epoch": 535.0, + "grad_norm": 1.0015687942504883, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 81320 + }, + { + "epoch": 535.0657894736842, + "grad_norm": 1.3443199396133423, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 81330 + }, + { + "epoch": 535.1315789473684, + "grad_norm": 1.1419992446899414, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 81340 + }, + { + "epoch": 535.1973684210526, + "grad_norm": 1.3758596181869507, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 81350 + }, + { + "epoch": 535.2631578947369, + "grad_norm": 1.384379267692566, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 81360 + }, + { + "epoch": 535.328947368421, + "grad_norm": 0.993719220161438, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 81370 + }, + { + "epoch": 535.3947368421053, + "grad_norm": 0.9903963804244995, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 81380 + }, + { + "epoch": 535.4605263157895, + "grad_norm": 1.840928077697754, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 81390 + }, + { + "epoch": 535.5263157894736, + "grad_norm": 1.8315232992172241, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 81400 + }, + { + "epoch": 535.5921052631579, + "grad_norm": 1.0046427249908447, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 81410 + }, + { + "epoch": 535.6578947368421, + "grad_norm": 1.2056866884231567, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 81420 + }, + { + "epoch": 535.7236842105264, + "grad_norm": 1.0682623386383057, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 81430 + }, + { + "epoch": 535.7894736842105, + "grad_norm": 1.022611141204834, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 81440 + }, + { + "epoch": 535.8552631578947, + "grad_norm": 0.8441836833953857, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 81450 + }, + { + "epoch": 535.921052631579, + "grad_norm": 0.9319405555725098, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 81460 + }, + { + "epoch": 535.9868421052631, + "grad_norm": 1.3196568489074707, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 81470 + }, + { + "epoch": 536.0526315789474, + "grad_norm": 0.9573938846588135, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 81480 + }, + { + "epoch": 536.1184210526316, + "grad_norm": 1.1913952827453613, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 81490 + }, + { + "epoch": 536.1842105263158, + "grad_norm": 1.4894800186157227, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 81500 + }, + { + "epoch": 536.25, + "grad_norm": 1.2386541366577148, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 81510 + }, + { + "epoch": 536.3157894736842, + "grad_norm": 1.4592559337615967, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 81520 + }, + { + "epoch": 536.3815789473684, + "grad_norm": 1.2014906406402588, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 81530 + }, + { + "epoch": 536.4473684210526, + "grad_norm": 0.9843008518218994, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 81540 + }, + { + "epoch": 536.5131578947369, + "grad_norm": 1.1141507625579834, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 81550 + }, + { + "epoch": 536.578947368421, + "grad_norm": 1.0911942720413208, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 81560 + }, + { + "epoch": 536.6447368421053, + "grad_norm": 0.9761918783187866, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 81570 + }, + { + "epoch": 536.7105263157895, + "grad_norm": 1.1536732912063599, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 81580 + }, + { + "epoch": 536.7763157894736, + "grad_norm": 0.9874551296234131, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 81590 + }, + { + "epoch": 536.8421052631579, + "grad_norm": 1.2264351844787598, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 81600 + }, + { + "epoch": 536.9078947368421, + "grad_norm": 1.0342381000518799, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 81610 + }, + { + "epoch": 536.9736842105264, + "grad_norm": 0.8672528266906738, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 81620 + }, + { + "epoch": 537.0394736842105, + "grad_norm": 1.4549974203109741, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 81630 + }, + { + "epoch": 537.1052631578947, + "grad_norm": 1.4086586236953735, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 81640 + }, + { + "epoch": 537.171052631579, + "grad_norm": 0.8391483426094055, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 81650 + }, + { + "epoch": 537.2368421052631, + "grad_norm": 1.2846829891204834, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 81660 + }, + { + "epoch": 537.3026315789474, + "grad_norm": 0.9655391573905945, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 81670 + }, + { + "epoch": 537.3684210526316, + "grad_norm": 1.599820613861084, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 81680 + }, + { + "epoch": 537.4342105263158, + "grad_norm": 0.971474826335907, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 81690 + }, + { + "epoch": 537.5, + "grad_norm": 0.8270283937454224, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 81700 + }, + { + "epoch": 537.5657894736842, + "grad_norm": 1.2286924123764038, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 81710 + }, + { + "epoch": 537.6315789473684, + "grad_norm": 0.9310139417648315, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 81720 + }, + { + "epoch": 537.6973684210526, + "grad_norm": 0.9931386113166809, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 81730 + }, + { + "epoch": 537.7631578947369, + "grad_norm": 1.1506545543670654, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 81740 + }, + { + "epoch": 537.828947368421, + "grad_norm": 0.7511075735092163, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 81750 + }, + { + "epoch": 537.8947368421053, + "grad_norm": 1.1868315935134888, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 81760 + }, + { + "epoch": 537.9605263157895, + "grad_norm": 1.2605093717575073, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 81770 + }, + { + "epoch": 538.0263157894736, + "grad_norm": 0.8703752756118774, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 81780 + }, + { + "epoch": 538.0921052631579, + "grad_norm": 0.8889430165290833, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 81790 + }, + { + "epoch": 538.1578947368421, + "grad_norm": 1.4555449485778809, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 81800 + }, + { + "epoch": 538.2236842105264, + "grad_norm": 0.8129600882530212, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 81810 + }, + { + "epoch": 538.2894736842105, + "grad_norm": 0.6337363123893738, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 81820 + }, + { + "epoch": 538.3552631578947, + "grad_norm": 1.1285789012908936, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 81830 + }, + { + "epoch": 538.421052631579, + "grad_norm": 1.1603407859802246, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 81840 + }, + { + "epoch": 538.4868421052631, + "grad_norm": 0.8500170707702637, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 81850 + }, + { + "epoch": 538.5526315789474, + "grad_norm": 0.9496303200721741, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 81860 + }, + { + "epoch": 538.6184210526316, + "grad_norm": 1.0320367813110352, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 81870 + }, + { + "epoch": 538.6842105263158, + "grad_norm": 0.8967204689979553, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 81880 + }, + { + "epoch": 538.75, + "grad_norm": 1.439362645149231, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 81890 + }, + { + "epoch": 538.8157894736842, + "grad_norm": 1.2981364727020264, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 81900 + }, + { + "epoch": 538.8815789473684, + "grad_norm": 0.7729794979095459, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 81910 + }, + { + "epoch": 538.9473684210526, + "grad_norm": 0.9322214722633362, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 81920 + }, + { + "epoch": 539.0131578947369, + "grad_norm": 1.2167713642120361, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 81930 + }, + { + "epoch": 539.078947368421, + "grad_norm": 0.9902080297470093, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 81940 + }, + { + "epoch": 539.1447368421053, + "grad_norm": 1.3800474405288696, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 81950 + }, + { + "epoch": 539.2105263157895, + "grad_norm": 1.2342554330825806, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 81960 + }, + { + "epoch": 539.2763157894736, + "grad_norm": 1.1256905794143677, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 81970 + }, + { + "epoch": 539.3421052631579, + "grad_norm": 0.8514827489852905, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 81980 + }, + { + "epoch": 539.4078947368421, + "grad_norm": 1.0054851770401, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 81990 + }, + { + "epoch": 539.4736842105264, + "grad_norm": 1.3685269355773926, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 82000 + }, + { + "epoch": 539.5394736842105, + "grad_norm": 0.8945045471191406, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 82010 + }, + { + "epoch": 539.6052631578947, + "grad_norm": 0.7146797776222229, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 82020 + }, + { + "epoch": 539.671052631579, + "grad_norm": 1.4394199848175049, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 82030 + }, + { + "epoch": 539.7368421052631, + "grad_norm": 1.3503402471542358, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 82040 + }, + { + "epoch": 539.8026315789474, + "grad_norm": 0.9955316185951233, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 82050 + }, + { + "epoch": 539.8684210526316, + "grad_norm": 1.2270481586456299, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 82060 + }, + { + "epoch": 539.9342105263158, + "grad_norm": 0.8091347217559814, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 82070 + }, + { + "epoch": 540.0, + "grad_norm": 0.9882093071937561, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 82080 + }, + { + "epoch": 540.0657894736842, + "grad_norm": 1.3003027439117432, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 82090 + }, + { + "epoch": 540.1315789473684, + "grad_norm": 1.113666296005249, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 82100 + }, + { + "epoch": 540.1973684210526, + "grad_norm": 1.5681219100952148, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 82110 + }, + { + "epoch": 540.2631578947369, + "grad_norm": 0.7766351103782654, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 82120 + }, + { + "epoch": 540.328947368421, + "grad_norm": 1.0728567838668823, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 82130 + }, + { + "epoch": 540.3947368421053, + "grad_norm": 0.9811382293701172, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 82140 + }, + { + "epoch": 540.4605263157895, + "grad_norm": 0.8526586890220642, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 82150 + }, + { + "epoch": 540.5263157894736, + "grad_norm": 0.9959100484848022, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 82160 + }, + { + "epoch": 540.5921052631579, + "grad_norm": 1.1363943815231323, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 82170 + }, + { + "epoch": 540.6578947368421, + "grad_norm": 1.0341233015060425, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 82180 + }, + { + "epoch": 540.7236842105264, + "grad_norm": 1.2639089822769165, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 82190 + }, + { + "epoch": 540.7894736842105, + "grad_norm": 1.197579026222229, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 82200 + }, + { + "epoch": 540.8552631578947, + "grad_norm": 1.078098177909851, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 82210 + }, + { + "epoch": 540.921052631579, + "grad_norm": 1.026748538017273, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 82220 + }, + { + "epoch": 540.9868421052631, + "grad_norm": 1.1622309684753418, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 82230 + }, + { + "epoch": 541.0526315789474, + "grad_norm": 0.9618615508079529, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 82240 + }, + { + "epoch": 541.1184210526316, + "grad_norm": 1.2612966299057007, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 82250 + }, + { + "epoch": 541.1842105263158, + "grad_norm": 1.0327303409576416, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 82260 + }, + { + "epoch": 541.25, + "grad_norm": 1.160564661026001, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 82270 + }, + { + "epoch": 541.3157894736842, + "grad_norm": 1.1718395948410034, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 82280 + }, + { + "epoch": 541.3815789473684, + "grad_norm": 0.9807857275009155, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 82290 + }, + { + "epoch": 541.4473684210526, + "grad_norm": 1.2698783874511719, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 82300 + }, + { + "epoch": 541.5131578947369, + "grad_norm": 1.2093521356582642, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 82310 + }, + { + "epoch": 541.578947368421, + "grad_norm": 1.0178953409194946, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 82320 + }, + { + "epoch": 541.6447368421053, + "grad_norm": 0.9447916150093079, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 82330 + }, + { + "epoch": 541.7105263157895, + "grad_norm": 1.0798600912094116, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 82340 + }, + { + "epoch": 541.7763157894736, + "grad_norm": 1.1212877035140991, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 82350 + }, + { + "epoch": 541.8421052631579, + "grad_norm": 1.1646664142608643, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 82360 + }, + { + "epoch": 541.9078947368421, + "grad_norm": 1.073519229888916, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 82370 + }, + { + "epoch": 541.9736842105264, + "grad_norm": 0.8553018569946289, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 82380 + }, + { + "epoch": 542.0394736842105, + "grad_norm": 1.2172927856445312, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 82390 + }, + { + "epoch": 542.1052631578947, + "grad_norm": 1.0330595970153809, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 82400 + }, + { + "epoch": 542.171052631579, + "grad_norm": 0.9683526158332825, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 82410 + }, + { + "epoch": 542.2368421052631, + "grad_norm": 1.1833120584487915, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 82420 + }, + { + "epoch": 542.3026315789474, + "grad_norm": 1.3812700510025024, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 82430 + }, + { + "epoch": 542.3684210526316, + "grad_norm": 1.6291792392730713, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 82440 + }, + { + "epoch": 542.4342105263158, + "grad_norm": 1.2887747287750244, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 82450 + }, + { + "epoch": 542.5, + "grad_norm": 1.5787415504455566, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 82460 + }, + { + "epoch": 542.5657894736842, + "grad_norm": 1.3304678201675415, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 82470 + }, + { + "epoch": 542.6315789473684, + "grad_norm": 1.3938226699829102, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 82480 + }, + { + "epoch": 542.6973684210526, + "grad_norm": 1.4595518112182617, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 82490 + }, + { + "epoch": 542.7631578947369, + "grad_norm": 1.0828487873077393, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 82500 + }, + { + "epoch": 542.828947368421, + "grad_norm": 1.4781620502471924, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 82510 + }, + { + "epoch": 542.8947368421053, + "grad_norm": 1.2808284759521484, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 82520 + }, + { + "epoch": 542.9605263157895, + "grad_norm": 1.0565862655639648, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 82530 + }, + { + "epoch": 543.0263157894736, + "grad_norm": 1.2909079790115356, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 82540 + }, + { + "epoch": 543.0921052631579, + "grad_norm": 1.1060192584991455, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 82550 + }, + { + "epoch": 543.1578947368421, + "grad_norm": 0.9638274908065796, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 82560 + }, + { + "epoch": 543.2236842105264, + "grad_norm": 1.082889437675476, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 82570 + }, + { + "epoch": 543.2894736842105, + "grad_norm": 0.6525837182998657, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 82580 + }, + { + "epoch": 543.3552631578947, + "grad_norm": 1.2893084287643433, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 82590 + }, + { + "epoch": 543.421052631579, + "grad_norm": 1.0150879621505737, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 82600 + }, + { + "epoch": 543.4868421052631, + "grad_norm": 0.5977818965911865, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 82610 + }, + { + "epoch": 543.5526315789474, + "grad_norm": 1.1314287185668945, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 82620 + }, + { + "epoch": 543.6184210526316, + "grad_norm": 1.184816837310791, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 82630 + }, + { + "epoch": 543.6842105263158, + "grad_norm": 0.9463902711868286, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 82640 + }, + { + "epoch": 543.75, + "grad_norm": 1.0592985153198242, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 82650 + }, + { + "epoch": 543.8157894736842, + "grad_norm": 1.3953194618225098, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 82660 + }, + { + "epoch": 543.8815789473684, + "grad_norm": 1.4499620199203491, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 82670 + }, + { + "epoch": 543.9473684210526, + "grad_norm": 1.2184102535247803, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 82680 + }, + { + "epoch": 544.0131578947369, + "grad_norm": 0.9526715874671936, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 82690 + }, + { + "epoch": 544.078947368421, + "grad_norm": 1.111823320388794, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 82700 + }, + { + "epoch": 544.1447368421053, + "grad_norm": 1.0925346612930298, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 82710 + }, + { + "epoch": 544.2105263157895, + "grad_norm": 1.0318348407745361, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 82720 + }, + { + "epoch": 544.2763157894736, + "grad_norm": 1.3430664539337158, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 82730 + }, + { + "epoch": 544.3421052631579, + "grad_norm": 0.8877522945404053, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 82740 + }, + { + "epoch": 544.4078947368421, + "grad_norm": 1.2313371896743774, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 82750 + }, + { + "epoch": 544.4736842105264, + "grad_norm": 1.1085177659988403, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 82760 + }, + { + "epoch": 544.5394736842105, + "grad_norm": 1.0655664205551147, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 82770 + }, + { + "epoch": 544.6052631578947, + "grad_norm": 1.3107541799545288, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 82780 + }, + { + "epoch": 544.671052631579, + "grad_norm": 1.2717570066452026, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 82790 + }, + { + "epoch": 544.7368421052631, + "grad_norm": 1.365206003189087, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 82800 + }, + { + "epoch": 544.8026315789474, + "grad_norm": 1.2015581130981445, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 82810 + }, + { + "epoch": 544.8684210526316, + "grad_norm": 0.8265142440795898, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 82820 + }, + { + "epoch": 544.9342105263158, + "grad_norm": 1.278316617012024, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 82830 + }, + { + "epoch": 545.0, + "grad_norm": 0.8285160064697266, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 82840 + }, + { + "epoch": 545.0657894736842, + "grad_norm": 1.131121277809143, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 82850 + }, + { + "epoch": 545.1315789473684, + "grad_norm": 1.1002274751663208, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 82860 + }, + { + "epoch": 545.1973684210526, + "grad_norm": 1.3011295795440674, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 82870 + }, + { + "epoch": 545.2631578947369, + "grad_norm": 0.9043525457382202, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 82880 + }, + { + "epoch": 545.328947368421, + "grad_norm": 1.1586050987243652, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 82890 + }, + { + "epoch": 545.3947368421053, + "grad_norm": 1.1788783073425293, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 82900 + }, + { + "epoch": 545.4605263157895, + "grad_norm": 1.1490991115570068, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 82910 + }, + { + "epoch": 545.5263157894736, + "grad_norm": 0.9973250031471252, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 82920 + }, + { + "epoch": 545.5921052631579, + "grad_norm": 0.9318479299545288, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 82930 + }, + { + "epoch": 545.6578947368421, + "grad_norm": 0.8547037839889526, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 82940 + }, + { + "epoch": 545.7236842105264, + "grad_norm": 1.1414095163345337, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 82950 + }, + { + "epoch": 545.7894736842105, + "grad_norm": 1.1906640529632568, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 82960 + }, + { + "epoch": 545.8552631578947, + "grad_norm": 0.7530783414840698, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 82970 + }, + { + "epoch": 545.921052631579, + "grad_norm": 1.2797421216964722, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 82980 + }, + { + "epoch": 545.9868421052631, + "grad_norm": 0.995582640171051, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 82990 + }, + { + "epoch": 546.0526315789474, + "grad_norm": 1.1054130792617798, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 83000 + }, + { + "epoch": 546.1184210526316, + "grad_norm": 1.2854208946228027, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 83010 + }, + { + "epoch": 546.1842105263158, + "grad_norm": 1.1343199014663696, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 83020 + }, + { + "epoch": 546.25, + "grad_norm": 0.953602135181427, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 83030 + }, + { + "epoch": 546.3157894736842, + "grad_norm": 1.223362922668457, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 83040 + }, + { + "epoch": 546.3815789473684, + "grad_norm": 1.4338486194610596, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 83050 + }, + { + "epoch": 546.4473684210526, + "grad_norm": 1.1713676452636719, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 83060 + }, + { + "epoch": 546.5131578947369, + "grad_norm": 1.1168043613433838, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 83070 + }, + { + "epoch": 546.578947368421, + "grad_norm": 1.1231666803359985, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 83080 + }, + { + "epoch": 546.6447368421053, + "grad_norm": 1.1550521850585938, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 83090 + }, + { + "epoch": 546.7105263157895, + "grad_norm": 1.1135015487670898, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 83100 + }, + { + "epoch": 546.7763157894736, + "grad_norm": 1.2623625993728638, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 83110 + }, + { + "epoch": 546.8421052631579, + "grad_norm": 0.7757622003555298, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 83120 + }, + { + "epoch": 546.9078947368421, + "grad_norm": 0.9508805274963379, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 83130 + }, + { + "epoch": 546.9736842105264, + "grad_norm": 0.7631132006645203, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 83140 + }, + { + "epoch": 547.0394736842105, + "grad_norm": 0.9571039080619812, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 83150 + }, + { + "epoch": 547.1052631578947, + "grad_norm": 0.9324824213981628, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 83160 + }, + { + "epoch": 547.171052631579, + "grad_norm": 1.1061806678771973, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 83170 + }, + { + "epoch": 547.2368421052631, + "grad_norm": 0.9221788048744202, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 83180 + }, + { + "epoch": 547.3026315789474, + "grad_norm": 1.1765472888946533, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 83190 + }, + { + "epoch": 547.3684210526316, + "grad_norm": 1.0740410089492798, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 83200 + }, + { + "epoch": 547.4342105263158, + "grad_norm": 1.1589864492416382, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 83210 + }, + { + "epoch": 547.5, + "grad_norm": 1.1935651302337646, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 83220 + }, + { + "epoch": 547.5657894736842, + "grad_norm": 0.8413220047950745, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 83230 + }, + { + "epoch": 547.6315789473684, + "grad_norm": 0.8780201077461243, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 83240 + }, + { + "epoch": 547.6973684210526, + "grad_norm": 1.1799852848052979, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 83250 + }, + { + "epoch": 547.7631578947369, + "grad_norm": 0.9087440371513367, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 83260 + }, + { + "epoch": 547.828947368421, + "grad_norm": 1.065704584121704, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 83270 + }, + { + "epoch": 547.8947368421053, + "grad_norm": 1.0333939790725708, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 83280 + }, + { + "epoch": 547.9605263157895, + "grad_norm": 0.951385498046875, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 83290 + }, + { + "epoch": 548.0263157894736, + "grad_norm": 0.7980932593345642, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 83300 + }, + { + "epoch": 548.0921052631579, + "grad_norm": 1.1457808017730713, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 83310 + }, + { + "epoch": 548.1578947368421, + "grad_norm": 0.9355278015136719, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 83320 + }, + { + "epoch": 548.2236842105264, + "grad_norm": 0.7948923707008362, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 83330 + }, + { + "epoch": 548.2894736842105, + "grad_norm": 0.9692309498786926, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 83340 + }, + { + "epoch": 548.3552631578947, + "grad_norm": 1.2351458072662354, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 83350 + }, + { + "epoch": 548.421052631579, + "grad_norm": 0.8993010520935059, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 83360 + }, + { + "epoch": 548.4868421052631, + "grad_norm": 1.3239619731903076, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 83370 + }, + { + "epoch": 548.5526315789474, + "grad_norm": 1.2244657278060913, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 83380 + }, + { + "epoch": 548.6184210526316, + "grad_norm": 1.208211064338684, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 83390 + }, + { + "epoch": 548.6842105263158, + "grad_norm": 1.225913405418396, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 83400 + }, + { + "epoch": 548.75, + "grad_norm": 1.019867181777954, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 83410 + }, + { + "epoch": 548.8157894736842, + "grad_norm": 1.138984203338623, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 83420 + }, + { + "epoch": 548.8815789473684, + "grad_norm": 1.0787227153778076, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 83430 + }, + { + "epoch": 548.9473684210526, + "grad_norm": 1.3269912004470825, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 83440 + }, + { + "epoch": 549.0131578947369, + "grad_norm": 1.1982839107513428, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 83450 + }, + { + "epoch": 549.078947368421, + "grad_norm": 1.4429033994674683, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 83460 + }, + { + "epoch": 549.1447368421053, + "grad_norm": 1.1085598468780518, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 83470 + }, + { + "epoch": 549.2105263157895, + "grad_norm": 1.2021725177764893, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 83480 + }, + { + "epoch": 549.2763157894736, + "grad_norm": 0.8998311161994934, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 83490 + }, + { + "epoch": 549.3421052631579, + "grad_norm": 1.5632169246673584, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 83500 + }, + { + "epoch": 549.4078947368421, + "grad_norm": 1.549469232559204, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 83510 + }, + { + "epoch": 549.4736842105264, + "grad_norm": 1.4514491558074951, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 83520 + }, + { + "epoch": 549.5394736842105, + "grad_norm": 1.0409729480743408, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 83530 + }, + { + "epoch": 549.6052631578947, + "grad_norm": 1.5041582584381104, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 83540 + }, + { + "epoch": 549.671052631579, + "grad_norm": 0.9774188995361328, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 83550 + }, + { + "epoch": 549.7368421052631, + "grad_norm": 1.5498707294464111, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 83560 + }, + { + "epoch": 549.8026315789474, + "grad_norm": 1.5098849534988403, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 83570 + }, + { + "epoch": 549.8684210526316, + "grad_norm": 1.2632389068603516, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 83580 + }, + { + "epoch": 549.9342105263158, + "grad_norm": 1.1932131052017212, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 83590 + }, + { + "epoch": 550.0, + "grad_norm": 1.4617688655853271, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 83600 + }, + { + "epoch": 550.0657894736842, + "grad_norm": 1.0467870235443115, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 83610 + }, + { + "epoch": 550.1315789473684, + "grad_norm": 1.3570033311843872, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 83620 + }, + { + "epoch": 550.1973684210526, + "grad_norm": 1.4261776208877563, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 83630 + }, + { + "epoch": 550.2631578947369, + "grad_norm": 1.2804388999938965, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 83640 + }, + { + "epoch": 550.328947368421, + "grad_norm": 1.119381070137024, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 83650 + }, + { + "epoch": 550.3947368421053, + "grad_norm": 1.0963352918624878, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 83660 + }, + { + "epoch": 550.4605263157895, + "grad_norm": 0.940981924533844, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 83670 + }, + { + "epoch": 550.5263157894736, + "grad_norm": 1.4243806600570679, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 83680 + }, + { + "epoch": 550.5921052631579, + "grad_norm": 1.049736738204956, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 83690 + }, + { + "epoch": 550.6578947368421, + "grad_norm": 1.2986576557159424, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 83700 + }, + { + "epoch": 550.7236842105264, + "grad_norm": 0.9634553790092468, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 83710 + }, + { + "epoch": 550.7894736842105, + "grad_norm": 1.209815263748169, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 83720 + }, + { + "epoch": 550.8552631578947, + "grad_norm": 1.3946318626403809, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 83730 + }, + { + "epoch": 550.921052631579, + "grad_norm": 1.1185719966888428, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 83740 + }, + { + "epoch": 550.9868421052631, + "grad_norm": 0.8527510762214661, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 83750 + }, + { + "epoch": 551.0526315789474, + "grad_norm": 1.235743761062622, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 83760 + }, + { + "epoch": 551.1184210526316, + "grad_norm": 1.2538394927978516, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 83770 + }, + { + "epoch": 551.1842105263158, + "grad_norm": 1.089035987854004, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 83780 + }, + { + "epoch": 551.25, + "grad_norm": 1.1372734308242798, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 83790 + }, + { + "epoch": 551.3157894736842, + "grad_norm": 1.026092290878296, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 83800 + }, + { + "epoch": 551.3815789473684, + "grad_norm": 1.2157622575759888, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 83810 + }, + { + "epoch": 551.4473684210526, + "grad_norm": 1.1005877256393433, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 83820 + }, + { + "epoch": 551.5131578947369, + "grad_norm": 1.059844970703125, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 83830 + }, + { + "epoch": 551.578947368421, + "grad_norm": 1.1124544143676758, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 83840 + }, + { + "epoch": 551.6447368421053, + "grad_norm": 1.0764391422271729, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 83850 + }, + { + "epoch": 551.7105263157895, + "grad_norm": 1.031297206878662, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 83860 + }, + { + "epoch": 551.7763157894736, + "grad_norm": 1.022562026977539, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 83870 + }, + { + "epoch": 551.8421052631579, + "grad_norm": 1.1150662899017334, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 83880 + }, + { + "epoch": 551.9078947368421, + "grad_norm": 1.0998282432556152, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 83890 + }, + { + "epoch": 551.9736842105264, + "grad_norm": 1.265975832939148, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 83900 + }, + { + "epoch": 552.0394736842105, + "grad_norm": 1.0744695663452148, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 83910 + }, + { + "epoch": 552.1052631578947, + "grad_norm": 1.1602157354354858, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 83920 + }, + { + "epoch": 552.171052631579, + "grad_norm": 1.2619503736495972, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 83930 + }, + { + "epoch": 552.2368421052631, + "grad_norm": 1.3881173133850098, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 83940 + }, + { + "epoch": 552.3026315789474, + "grad_norm": 1.3225895166397095, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 83950 + }, + { + "epoch": 552.3684210526316, + "grad_norm": 1.4558875560760498, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 83960 + }, + { + "epoch": 552.4342105263158, + "grad_norm": 0.8859315514564514, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 83970 + }, + { + "epoch": 552.5, + "grad_norm": 0.9375401735305786, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 83980 + }, + { + "epoch": 552.5657894736842, + "grad_norm": 1.0013401508331299, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 83990 + }, + { + "epoch": 552.6315789473684, + "grad_norm": 0.8222454190254211, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 84000 + }, + { + "epoch": 552.6973684210526, + "grad_norm": 1.0796682834625244, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 84010 + }, + { + "epoch": 552.7631578947369, + "grad_norm": 1.3614517450332642, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 84020 + }, + { + "epoch": 552.828947368421, + "grad_norm": 1.4528874158859253, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 84030 + }, + { + "epoch": 552.8947368421053, + "grad_norm": 1.2762951850891113, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 84040 + }, + { + "epoch": 552.9605263157895, + "grad_norm": 1.0010347366333008, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 84050 + }, + { + "epoch": 553.0263157894736, + "grad_norm": 1.4103955030441284, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 84060 + }, + { + "epoch": 553.0921052631579, + "grad_norm": 1.2792900800704956, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 84070 + }, + { + "epoch": 553.1578947368421, + "grad_norm": 1.1283965110778809, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 84080 + }, + { + "epoch": 553.2236842105264, + "grad_norm": 1.2378798723220825, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 84090 + }, + { + "epoch": 553.2894736842105, + "grad_norm": 1.0513310432434082, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 84100 + }, + { + "epoch": 553.3552631578947, + "grad_norm": 1.0277812480926514, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 84110 + }, + { + "epoch": 553.421052631579, + "grad_norm": 1.211808681488037, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 84120 + }, + { + "epoch": 553.4868421052631, + "grad_norm": 1.0594956874847412, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 84130 + }, + { + "epoch": 553.5526315789474, + "grad_norm": 1.1370322704315186, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 84140 + }, + { + "epoch": 553.6184210526316, + "grad_norm": 0.897812008857727, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 84150 + }, + { + "epoch": 553.6842105263158, + "grad_norm": 0.9729959964752197, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 84160 + }, + { + "epoch": 553.75, + "grad_norm": 1.1582231521606445, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 84170 + }, + { + "epoch": 553.8157894736842, + "grad_norm": 1.115011215209961, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 84180 + }, + { + "epoch": 553.8815789473684, + "grad_norm": 1.1389967203140259, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 84190 + }, + { + "epoch": 553.9473684210526, + "grad_norm": 1.1534051895141602, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 84200 + }, + { + "epoch": 554.0131578947369, + "grad_norm": 1.1648868322372437, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 84210 + }, + { + "epoch": 554.078947368421, + "grad_norm": 0.9899840354919434, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 84220 + }, + { + "epoch": 554.1447368421053, + "grad_norm": 1.097878336906433, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 84230 + }, + { + "epoch": 554.2105263157895, + "grad_norm": 1.1488139629364014, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 84240 + }, + { + "epoch": 554.2763157894736, + "grad_norm": 1.2230936288833618, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 84250 + }, + { + "epoch": 554.3421052631579, + "grad_norm": 1.2412559986114502, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 84260 + }, + { + "epoch": 554.4078947368421, + "grad_norm": 1.044669270515442, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 84270 + }, + { + "epoch": 554.4736842105264, + "grad_norm": 0.9399918913841248, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 84280 + }, + { + "epoch": 554.5394736842105, + "grad_norm": 0.8007739782333374, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 84290 + }, + { + "epoch": 554.6052631578947, + "grad_norm": 0.68852698802948, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 84300 + }, + { + "epoch": 554.671052631579, + "grad_norm": 1.3844374418258667, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 84310 + }, + { + "epoch": 554.7368421052631, + "grad_norm": 0.897927463054657, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 84320 + }, + { + "epoch": 554.8026315789474, + "grad_norm": 1.1353065967559814, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 84330 + }, + { + "epoch": 554.8684210526316, + "grad_norm": 1.1590604782104492, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 84340 + }, + { + "epoch": 554.9342105263158, + "grad_norm": 1.2227908372879028, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 84350 + }, + { + "epoch": 555.0, + "grad_norm": 1.2319128513336182, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 84360 + }, + { + "epoch": 555.0657894736842, + "grad_norm": 1.0367213487625122, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 84370 + }, + { + "epoch": 555.1315789473684, + "grad_norm": 1.069209098815918, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 84380 + }, + { + "epoch": 555.1973684210526, + "grad_norm": 1.0245308876037598, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 84390 + }, + { + "epoch": 555.2631578947369, + "grad_norm": 1.2172003984451294, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 84400 + }, + { + "epoch": 555.328947368421, + "grad_norm": 1.0955407619476318, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 84410 + }, + { + "epoch": 555.3947368421053, + "grad_norm": 1.0502513647079468, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 84420 + }, + { + "epoch": 555.4605263157895, + "grad_norm": 1.2674477100372314, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 84430 + }, + { + "epoch": 555.5263157894736, + "grad_norm": 1.4851372241973877, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 84440 + }, + { + "epoch": 555.5921052631579, + "grad_norm": 1.4355062246322632, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 84450 + }, + { + "epoch": 555.6578947368421, + "grad_norm": 1.2910557985305786, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 84460 + }, + { + "epoch": 555.7236842105264, + "grad_norm": 0.9084872603416443, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 84470 + }, + { + "epoch": 555.7894736842105, + "grad_norm": 1.1866637468338013, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 84480 + }, + { + "epoch": 555.8552631578947, + "grad_norm": 1.1410084962844849, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 84490 + }, + { + "epoch": 555.921052631579, + "grad_norm": 1.256248116493225, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 84500 + }, + { + "epoch": 555.9868421052631, + "grad_norm": 1.1614428758621216, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 84510 + }, + { + "epoch": 556.0526315789474, + "grad_norm": 1.1941181421279907, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 84520 + }, + { + "epoch": 556.1184210526316, + "grad_norm": 1.1115273237228394, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 84530 + }, + { + "epoch": 556.1842105263158, + "grad_norm": 0.8539004325866699, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 84540 + }, + { + "epoch": 556.25, + "grad_norm": 1.1488637924194336, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 84550 + }, + { + "epoch": 556.3157894736842, + "grad_norm": 1.1279138326644897, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 84560 + }, + { + "epoch": 556.3815789473684, + "grad_norm": 1.3774553537368774, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 84570 + }, + { + "epoch": 556.4473684210526, + "grad_norm": 1.3250354528427124, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 84580 + }, + { + "epoch": 556.5131578947369, + "grad_norm": 1.1280083656311035, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 84590 + }, + { + "epoch": 556.578947368421, + "grad_norm": 0.9719551205635071, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 84600 + }, + { + "epoch": 556.6447368421053, + "grad_norm": 0.8734564781188965, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 84610 + }, + { + "epoch": 556.7105263157895, + "grad_norm": 1.252884864807129, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 84620 + }, + { + "epoch": 556.7763157894736, + "grad_norm": 1.2169554233551025, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 84630 + }, + { + "epoch": 556.8421052631579, + "grad_norm": 1.3537654876708984, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 84640 + }, + { + "epoch": 556.9078947368421, + "grad_norm": 1.1690727472305298, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 84650 + }, + { + "epoch": 556.9736842105264, + "grad_norm": 1.4341109991073608, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 84660 + }, + { + "epoch": 557.0394736842105, + "grad_norm": 1.1728025674819946, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 84670 + }, + { + "epoch": 557.1052631578947, + "grad_norm": 1.1351969242095947, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 84680 + }, + { + "epoch": 557.171052631579, + "grad_norm": 1.049938440322876, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 84690 + }, + { + "epoch": 557.2368421052631, + "grad_norm": 1.0241278409957886, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 84700 + }, + { + "epoch": 557.3026315789474, + "grad_norm": 1.357133150100708, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 84710 + }, + { + "epoch": 557.3684210526316, + "grad_norm": 1.2477604150772095, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 84720 + }, + { + "epoch": 557.4342105263158, + "grad_norm": 1.1618016958236694, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 84730 + }, + { + "epoch": 557.5, + "grad_norm": 1.4906044006347656, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 84740 + }, + { + "epoch": 557.5657894736842, + "grad_norm": 1.6575461626052856, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 84750 + }, + { + "epoch": 557.6315789473684, + "grad_norm": 1.5979561805725098, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 84760 + }, + { + "epoch": 557.6973684210526, + "grad_norm": 1.49028480052948, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 84770 + }, + { + "epoch": 557.7631578947369, + "grad_norm": 1.476001501083374, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 84780 + }, + { + "epoch": 557.828947368421, + "grad_norm": 1.1101016998291016, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 84790 + }, + { + "epoch": 557.8947368421053, + "grad_norm": 1.2816787958145142, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 84800 + }, + { + "epoch": 557.9605263157895, + "grad_norm": 1.4827154874801636, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 84810 + }, + { + "epoch": 558.0263157894736, + "grad_norm": 1.2179068326950073, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 84820 + }, + { + "epoch": 558.0921052631579, + "grad_norm": 1.0725390911102295, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 84830 + }, + { + "epoch": 558.1578947368421, + "grad_norm": 1.1772629022598267, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 84840 + }, + { + "epoch": 558.2236842105264, + "grad_norm": 1.1219836473464966, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 84850 + }, + { + "epoch": 558.2894736842105, + "grad_norm": 1.1598541736602783, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 84860 + }, + { + "epoch": 558.3552631578947, + "grad_norm": 1.2729027271270752, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 84870 + }, + { + "epoch": 558.421052631579, + "grad_norm": 1.107276201248169, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 84880 + }, + { + "epoch": 558.4868421052631, + "grad_norm": 0.9644288420677185, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 84890 + }, + { + "epoch": 558.5526315789474, + "grad_norm": 0.8253970742225647, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 84900 + }, + { + "epoch": 558.6184210526316, + "grad_norm": 0.98391193151474, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 84910 + }, + { + "epoch": 558.6842105263158, + "grad_norm": 1.0832492113113403, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 84920 + }, + { + "epoch": 558.75, + "grad_norm": 0.9063038229942322, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 84930 + }, + { + "epoch": 558.8157894736842, + "grad_norm": 1.0386995077133179, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 84940 + }, + { + "epoch": 558.8815789473684, + "grad_norm": 0.7958237528800964, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 84950 + }, + { + "epoch": 558.9473684210526, + "grad_norm": 0.860396683216095, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 84960 + }, + { + "epoch": 559.0131578947369, + "grad_norm": 0.9728363752365112, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 84970 + }, + { + "epoch": 559.078947368421, + "grad_norm": 1.1759837865829468, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 84980 + }, + { + "epoch": 559.1447368421053, + "grad_norm": 1.0870031118392944, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 84990 + }, + { + "epoch": 559.2105263157895, + "grad_norm": 1.1532487869262695, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 85000 + }, + { + "epoch": 559.2763157894736, + "grad_norm": 1.2575640678405762, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 85010 + }, + { + "epoch": 559.3421052631579, + "grad_norm": 1.3786592483520508, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 85020 + }, + { + "epoch": 559.4078947368421, + "grad_norm": 0.8900845050811768, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 85030 + }, + { + "epoch": 559.4736842105264, + "grad_norm": 1.2417876720428467, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 85040 + }, + { + "epoch": 559.5394736842105, + "grad_norm": 1.2613035440444946, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 85050 + }, + { + "epoch": 559.6052631578947, + "grad_norm": 0.8748085498809814, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 85060 + }, + { + "epoch": 559.671052631579, + "grad_norm": 1.059360384941101, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 85070 + }, + { + "epoch": 559.7368421052631, + "grad_norm": 0.8595458269119263, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 85080 + }, + { + "epoch": 559.8026315789474, + "grad_norm": 1.0253055095672607, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 85090 + }, + { + "epoch": 559.8684210526316, + "grad_norm": 1.1457302570343018, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 85100 + }, + { + "epoch": 559.9342105263158, + "grad_norm": 1.5385023355484009, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 85110 + }, + { + "epoch": 560.0, + "grad_norm": 1.041575312614441, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 85120 + }, + { + "epoch": 560.0657894736842, + "grad_norm": 0.9644649624824524, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 85130 + }, + { + "epoch": 560.1315789473684, + "grad_norm": 1.3442351818084717, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 85140 + }, + { + "epoch": 560.1973684210526, + "grad_norm": 1.4101388454437256, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 85150 + }, + { + "epoch": 560.2631578947369, + "grad_norm": 1.3312458992004395, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 85160 + }, + { + "epoch": 560.328947368421, + "grad_norm": 1.2617542743682861, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 85170 + }, + { + "epoch": 560.3947368421053, + "grad_norm": 0.9706904888153076, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 85180 + }, + { + "epoch": 560.4605263157895, + "grad_norm": 1.0391241312026978, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 85190 + }, + { + "epoch": 560.5263157894736, + "grad_norm": 0.7928785085678101, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 85200 + }, + { + "epoch": 560.5921052631579, + "grad_norm": 0.8561776280403137, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 85210 + }, + { + "epoch": 560.6578947368421, + "grad_norm": 1.097593903541565, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 85220 + }, + { + "epoch": 560.7236842105264, + "grad_norm": 0.9008827805519104, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 85230 + }, + { + "epoch": 560.7894736842105, + "grad_norm": 1.3910067081451416, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 85240 + }, + { + "epoch": 560.8552631578947, + "grad_norm": 1.2383750677108765, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 85250 + }, + { + "epoch": 560.921052631579, + "grad_norm": 1.2001198530197144, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 85260 + }, + { + "epoch": 560.9868421052631, + "grad_norm": 1.0926671028137207, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 85270 + }, + { + "epoch": 561.0526315789474, + "grad_norm": 1.1885424852371216, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 85280 + }, + { + "epoch": 561.1184210526316, + "grad_norm": 1.3044886589050293, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 85290 + }, + { + "epoch": 561.1842105263158, + "grad_norm": 1.361910104751587, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 85300 + }, + { + "epoch": 561.25, + "grad_norm": 1.2132842540740967, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 85310 + }, + { + "epoch": 561.3157894736842, + "grad_norm": 1.2285622358322144, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 85320 + }, + { + "epoch": 561.3815789473684, + "grad_norm": 1.0628094673156738, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 85330 + }, + { + "epoch": 561.4473684210526, + "grad_norm": 1.0431807041168213, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 85340 + }, + { + "epoch": 561.5131578947369, + "grad_norm": 0.8674909472465515, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 85350 + }, + { + "epoch": 561.578947368421, + "grad_norm": 0.8657084107398987, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 85360 + }, + { + "epoch": 561.6447368421053, + "grad_norm": 1.0542387962341309, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 85370 + }, + { + "epoch": 561.7105263157895, + "grad_norm": 1.1342475414276123, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 85380 + }, + { + "epoch": 561.7763157894736, + "grad_norm": 1.0846221446990967, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 85390 + }, + { + "epoch": 561.8421052631579, + "grad_norm": 1.0843048095703125, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 85400 + }, + { + "epoch": 561.9078947368421, + "grad_norm": 1.266693115234375, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 85410 + }, + { + "epoch": 561.9736842105264, + "grad_norm": 1.3362053632736206, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 85420 + }, + { + "epoch": 562.0394736842105, + "grad_norm": 1.1382287740707397, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 85430 + }, + { + "epoch": 562.1052631578947, + "grad_norm": 1.0653843879699707, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 85440 + }, + { + "epoch": 562.171052631579, + "grad_norm": 0.9367539882659912, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 85450 + }, + { + "epoch": 562.2368421052631, + "grad_norm": 0.8476194143295288, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 85460 + }, + { + "epoch": 562.3026315789474, + "grad_norm": 1.0958266258239746, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 85470 + }, + { + "epoch": 562.3684210526316, + "grad_norm": 1.2579114437103271, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 85480 + }, + { + "epoch": 562.4342105263158, + "grad_norm": 1.1456224918365479, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 85490 + }, + { + "epoch": 562.5, + "grad_norm": 1.2678587436676025, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 85500 + }, + { + "epoch": 562.5657894736842, + "grad_norm": 1.0048190355300903, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 85510 + }, + { + "epoch": 562.6315789473684, + "grad_norm": 0.7200008630752563, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 85520 + }, + { + "epoch": 562.6973684210526, + "grad_norm": 1.4088075160980225, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 85530 + }, + { + "epoch": 562.7631578947369, + "grad_norm": 1.2613880634307861, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 85540 + }, + { + "epoch": 562.828947368421, + "grad_norm": 1.0894513130187988, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 85550 + }, + { + "epoch": 562.8947368421053, + "grad_norm": 1.0057373046875, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 85560 + }, + { + "epoch": 562.9605263157895, + "grad_norm": 1.1327048540115356, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 85570 + }, + { + "epoch": 563.0263157894736, + "grad_norm": 1.3295555114746094, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 85580 + }, + { + "epoch": 563.0921052631579, + "grad_norm": 1.098968505859375, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 85590 + }, + { + "epoch": 563.1578947368421, + "grad_norm": 1.1151443719863892, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 85600 + }, + { + "epoch": 563.2236842105264, + "grad_norm": 1.261618971824646, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 85610 + }, + { + "epoch": 563.2894736842105, + "grad_norm": 0.6959012150764465, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 85620 + }, + { + "epoch": 563.3552631578947, + "grad_norm": 0.9894868731498718, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 85630 + }, + { + "epoch": 563.421052631579, + "grad_norm": 0.8784093260765076, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 85640 + }, + { + "epoch": 563.4868421052631, + "grad_norm": 0.5873738527297974, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 85650 + }, + { + "epoch": 563.5526315789474, + "grad_norm": 0.7651575803756714, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 85660 + }, + { + "epoch": 563.6184210526316, + "grad_norm": 0.6802495718002319, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 85670 + }, + { + "epoch": 563.6842105263158, + "grad_norm": 0.948125422000885, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 85680 + }, + { + "epoch": 563.75, + "grad_norm": 1.1198630332946777, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 85690 + }, + { + "epoch": 563.8157894736842, + "grad_norm": 1.2532132863998413, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 85700 + }, + { + "epoch": 563.8815789473684, + "grad_norm": 0.9288182854652405, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 85710 + }, + { + "epoch": 563.9473684210526, + "grad_norm": 1.1498973369598389, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 85720 + }, + { + "epoch": 564.0131578947369, + "grad_norm": 0.9289548397064209, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 85730 + }, + { + "epoch": 564.078947368421, + "grad_norm": 0.6125806570053101, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 85740 + }, + { + "epoch": 564.1447368421053, + "grad_norm": 0.762289822101593, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 85750 + }, + { + "epoch": 564.2105263157895, + "grad_norm": 1.2654229402542114, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 85760 + }, + { + "epoch": 564.2763157894736, + "grad_norm": 1.2169716358184814, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 85770 + }, + { + "epoch": 564.3421052631579, + "grad_norm": 1.0589436292648315, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 85780 + }, + { + "epoch": 564.4078947368421, + "grad_norm": 0.9136533141136169, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 85790 + }, + { + "epoch": 564.4736842105264, + "grad_norm": 0.831851601600647, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 85800 + }, + { + "epoch": 564.5394736842105, + "grad_norm": 0.9365316033363342, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 85810 + }, + { + "epoch": 564.6052631578947, + "grad_norm": 1.4530662298202515, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 85820 + }, + { + "epoch": 564.671052631579, + "grad_norm": 1.1239123344421387, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 85830 + }, + { + "epoch": 564.7368421052631, + "grad_norm": 1.2323276996612549, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 85840 + }, + { + "epoch": 564.8026315789474, + "grad_norm": 0.9077661633491516, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 85850 + }, + { + "epoch": 564.8684210526316, + "grad_norm": 1.2481967210769653, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 85860 + }, + { + "epoch": 564.9342105263158, + "grad_norm": 1.2315006256103516, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 85870 + }, + { + "epoch": 565.0, + "grad_norm": 1.2639683485031128, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 85880 + }, + { + "epoch": 565.0657894736842, + "grad_norm": 1.2742642164230347, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 85890 + }, + { + "epoch": 565.1315789473684, + "grad_norm": 1.1369564533233643, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 85900 + }, + { + "epoch": 565.1973684210526, + "grad_norm": 1.4705597162246704, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 85910 + }, + { + "epoch": 565.2631578947369, + "grad_norm": 1.326097846031189, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 85920 + }, + { + "epoch": 565.328947368421, + "grad_norm": 0.9751961827278137, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 85930 + }, + { + "epoch": 565.3947368421053, + "grad_norm": 1.3746706247329712, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 85940 + }, + { + "epoch": 565.4605263157895, + "grad_norm": 1.2249608039855957, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 85950 + }, + { + "epoch": 565.5263157894736, + "grad_norm": 1.0640735626220703, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 85960 + }, + { + "epoch": 565.5921052631579, + "grad_norm": 1.0786769390106201, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 85970 + }, + { + "epoch": 565.6578947368421, + "grad_norm": 1.5025138854980469, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 85980 + }, + { + "epoch": 565.7236842105264, + "grad_norm": 1.311964511871338, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 85990 + }, + { + "epoch": 565.7894736842105, + "grad_norm": 1.1438058614730835, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 86000 + }, + { + "epoch": 565.8552631578947, + "grad_norm": 0.921307384967804, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 86010 + }, + { + "epoch": 565.921052631579, + "grad_norm": 1.0109132528305054, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 86020 + }, + { + "epoch": 565.9868421052631, + "grad_norm": 1.3036121129989624, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 86030 + }, + { + "epoch": 566.0526315789474, + "grad_norm": 1.1101711988449097, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 86040 + }, + { + "epoch": 566.1184210526316, + "grad_norm": 1.3747954368591309, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 86050 + }, + { + "epoch": 566.1842105263158, + "grad_norm": 1.133420705795288, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 86060 + }, + { + "epoch": 566.25, + "grad_norm": 1.0541547536849976, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 86070 + }, + { + "epoch": 566.3157894736842, + "grad_norm": 1.0774303674697876, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 86080 + }, + { + "epoch": 566.3815789473684, + "grad_norm": 1.3107317686080933, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 86090 + }, + { + "epoch": 566.4473684210526, + "grad_norm": 0.8362441658973694, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 86100 + }, + { + "epoch": 566.5131578947369, + "grad_norm": 1.1291522979736328, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 86110 + }, + { + "epoch": 566.578947368421, + "grad_norm": 1.062232494354248, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 86120 + }, + { + "epoch": 566.6447368421053, + "grad_norm": 1.1691906452178955, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 86130 + }, + { + "epoch": 566.7105263157895, + "grad_norm": 1.1058992147445679, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 86140 + }, + { + "epoch": 566.7763157894736, + "grad_norm": 0.6713123321533203, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 86150 + }, + { + "epoch": 566.8421052631579, + "grad_norm": 0.7876428365707397, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 86160 + }, + { + "epoch": 566.9078947368421, + "grad_norm": 1.0768256187438965, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 86170 + }, + { + "epoch": 566.9736842105264, + "grad_norm": 1.6893032789230347, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 86180 + }, + { + "epoch": 567.0394736842105, + "grad_norm": 0.7960553765296936, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 86190 + }, + { + "epoch": 567.1052631578947, + "grad_norm": 0.6956373453140259, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 86200 + }, + { + "epoch": 567.171052631579, + "grad_norm": 1.1536741256713867, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 86210 + }, + { + "epoch": 567.2368421052631, + "grad_norm": 0.8399643301963806, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 86220 + }, + { + "epoch": 567.3026315789474, + "grad_norm": 0.8098105192184448, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 86230 + }, + { + "epoch": 567.3684210526316, + "grad_norm": 0.8427768349647522, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 86240 + }, + { + "epoch": 567.4342105263158, + "grad_norm": 0.9141896963119507, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 86250 + }, + { + "epoch": 567.5, + "grad_norm": 1.6003632545471191, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 86260 + }, + { + "epoch": 567.5657894736842, + "grad_norm": 1.1154911518096924, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 86270 + }, + { + "epoch": 567.6315789473684, + "grad_norm": 1.1013606786727905, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 86280 + }, + { + "epoch": 567.6973684210526, + "grad_norm": 1.0994194746017456, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 86290 + }, + { + "epoch": 567.7631578947369, + "grad_norm": 0.9053112864494324, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 86300 + }, + { + "epoch": 567.828947368421, + "grad_norm": 1.4429203271865845, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 86310 + }, + { + "epoch": 567.8947368421053, + "grad_norm": 1.0973800420761108, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 86320 + }, + { + "epoch": 567.9605263157895, + "grad_norm": 1.2074874639511108, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 86330 + }, + { + "epoch": 568.0263157894736, + "grad_norm": 1.1671373844146729, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 86340 + }, + { + "epoch": 568.0921052631579, + "grad_norm": 1.0695538520812988, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 86350 + }, + { + "epoch": 568.1578947368421, + "grad_norm": 1.335986852645874, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 86360 + }, + { + "epoch": 568.2236842105264, + "grad_norm": 1.4699814319610596, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 86370 + }, + { + "epoch": 568.2894736842105, + "grad_norm": 1.071112871170044, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 86380 + }, + { + "epoch": 568.3552631578947, + "grad_norm": 1.1350902318954468, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 86390 + }, + { + "epoch": 568.421052631579, + "grad_norm": 1.3547534942626953, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 86400 + }, + { + "epoch": 568.4868421052631, + "grad_norm": 1.5289572477340698, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 86410 + }, + { + "epoch": 568.5526315789474, + "grad_norm": 1.0102062225341797, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 86420 + }, + { + "epoch": 568.6184210526316, + "grad_norm": 0.8501527309417725, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 86430 + }, + { + "epoch": 568.6842105263158, + "grad_norm": 0.9667913317680359, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 86440 + }, + { + "epoch": 568.75, + "grad_norm": 1.1724212169647217, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 86450 + }, + { + "epoch": 568.8157894736842, + "grad_norm": 1.0859756469726562, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 86460 + }, + { + "epoch": 568.8815789473684, + "grad_norm": 1.2196053266525269, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 86470 + }, + { + "epoch": 568.9473684210526, + "grad_norm": 0.9872120022773743, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 86480 + }, + { + "epoch": 569.0131578947369, + "grad_norm": 1.1090824604034424, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 86490 + }, + { + "epoch": 569.078947368421, + "grad_norm": 1.0156619548797607, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 86500 + }, + { + "epoch": 569.1447368421053, + "grad_norm": 0.6294759511947632, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 86510 + }, + { + "epoch": 569.2105263157895, + "grad_norm": 0.8278417587280273, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 86520 + }, + { + "epoch": 569.2763157894736, + "grad_norm": 0.8335527777671814, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 86530 + }, + { + "epoch": 569.3421052631579, + "grad_norm": 0.728941798210144, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 86540 + }, + { + "epoch": 569.4078947368421, + "grad_norm": 1.1835122108459473, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 86550 + }, + { + "epoch": 569.4736842105264, + "grad_norm": 1.3987102508544922, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 86560 + }, + { + "epoch": 569.5394736842105, + "grad_norm": 1.288018822669983, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 86570 + }, + { + "epoch": 569.6052631578947, + "grad_norm": 1.1396912336349487, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 86580 + }, + { + "epoch": 569.671052631579, + "grad_norm": 0.8793830871582031, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 86590 + }, + { + "epoch": 569.7368421052631, + "grad_norm": 0.8946493268013, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 86600 + }, + { + "epoch": 569.8026315789474, + "grad_norm": 0.8746724724769592, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 86610 + }, + { + "epoch": 569.8684210526316, + "grad_norm": 0.7413333654403687, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 86620 + }, + { + "epoch": 569.9342105263158, + "grad_norm": 1.0053696632385254, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 86630 + }, + { + "epoch": 570.0, + "grad_norm": 1.109494924545288, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 86640 + }, + { + "epoch": 570.0657894736842, + "grad_norm": 1.0057399272918701, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 86650 + }, + { + "epoch": 570.1315789473684, + "grad_norm": 0.9789668321609497, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 86660 + }, + { + "epoch": 570.1973684210526, + "grad_norm": 1.0310949087142944, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 86670 + }, + { + "epoch": 570.2631578947369, + "grad_norm": 0.9389269351959229, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 86680 + }, + { + "epoch": 570.328947368421, + "grad_norm": 0.8357144594192505, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 86690 + }, + { + "epoch": 570.3947368421053, + "grad_norm": 0.850374698638916, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 86700 + }, + { + "epoch": 570.4605263157895, + "grad_norm": 0.8948403596878052, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 86710 + }, + { + "epoch": 570.5263157894736, + "grad_norm": 1.1358330249786377, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 86720 + }, + { + "epoch": 570.5921052631579, + "grad_norm": 1.393713355064392, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 86730 + }, + { + "epoch": 570.6578947368421, + "grad_norm": 1.0343717336654663, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 86740 + }, + { + "epoch": 570.7236842105264, + "grad_norm": 0.8011754751205444, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 86750 + }, + { + "epoch": 570.7894736842105, + "grad_norm": 1.0685820579528809, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 86760 + }, + { + "epoch": 570.8552631578947, + "grad_norm": 1.2427990436553955, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 86770 + }, + { + "epoch": 570.921052631579, + "grad_norm": 1.2266359329223633, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 86780 + }, + { + "epoch": 570.9868421052631, + "grad_norm": 1.432370662689209, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 86790 + }, + { + "epoch": 571.0526315789474, + "grad_norm": 1.2090051174163818, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 86800 + }, + { + "epoch": 571.1184210526316, + "grad_norm": 0.8846063017845154, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 86810 + }, + { + "epoch": 571.1842105263158, + "grad_norm": 1.1082240343093872, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 86820 + }, + { + "epoch": 571.25, + "grad_norm": 1.0534160137176514, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 86830 + }, + { + "epoch": 571.3157894736842, + "grad_norm": 1.1436423063278198, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 86840 + }, + { + "epoch": 571.3815789473684, + "grad_norm": 0.8122831583023071, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 86850 + }, + { + "epoch": 571.4473684210526, + "grad_norm": 1.0820256471633911, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 86860 + }, + { + "epoch": 571.5131578947369, + "grad_norm": 0.848965585231781, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 86870 + }, + { + "epoch": 571.578947368421, + "grad_norm": 1.2625044584274292, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 86880 + }, + { + "epoch": 571.6447368421053, + "grad_norm": 1.3473550081253052, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 86890 + }, + { + "epoch": 571.7105263157895, + "grad_norm": 1.336229920387268, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 86900 + }, + { + "epoch": 571.7763157894736, + "grad_norm": 0.9691115617752075, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 86910 + }, + { + "epoch": 571.8421052631579, + "grad_norm": 1.2057015895843506, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 86920 + }, + { + "epoch": 571.9078947368421, + "grad_norm": 1.0640344619750977, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 86930 + }, + { + "epoch": 571.9736842105264, + "grad_norm": 1.1055980920791626, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 86940 + }, + { + "epoch": 572.0394736842105, + "grad_norm": 0.6833211779594421, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 86950 + }, + { + "epoch": 572.1052631578947, + "grad_norm": 1.1310468912124634, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 86960 + }, + { + "epoch": 572.171052631579, + "grad_norm": 1.280174970626831, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 86970 + }, + { + "epoch": 572.2368421052631, + "grad_norm": 1.0595585107803345, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 86980 + }, + { + "epoch": 572.3026315789474, + "grad_norm": 1.1463285684585571, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 86990 + }, + { + "epoch": 572.3684210526316, + "grad_norm": 1.0951709747314453, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 87000 + }, + { + "epoch": 572.4342105263158, + "grad_norm": 1.1616261005401611, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 87010 + }, + { + "epoch": 572.5, + "grad_norm": 1.0127562284469604, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 87020 + }, + { + "epoch": 572.5657894736842, + "grad_norm": 1.3694489002227783, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 87030 + }, + { + "epoch": 572.6315789473684, + "grad_norm": 1.5818332433700562, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 87040 + }, + { + "epoch": 572.6973684210526, + "grad_norm": 1.6021660566329956, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 87050 + }, + { + "epoch": 572.7631578947369, + "grad_norm": 1.4695814847946167, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 87060 + }, + { + "epoch": 572.828947368421, + "grad_norm": 1.1281092166900635, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 87070 + }, + { + "epoch": 572.8947368421053, + "grad_norm": 1.168941617012024, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 87080 + }, + { + "epoch": 572.9605263157895, + "grad_norm": 1.1322778463363647, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 87090 + }, + { + "epoch": 573.0263157894736, + "grad_norm": 1.4014652967453003, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 87100 + }, + { + "epoch": 573.0921052631579, + "grad_norm": 1.2505130767822266, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 87110 + }, + { + "epoch": 573.1578947368421, + "grad_norm": 1.029102087020874, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 87120 + }, + { + "epoch": 573.2236842105264, + "grad_norm": 1.2195322513580322, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 87130 + }, + { + "epoch": 573.2894736842105, + "grad_norm": 1.3658270835876465, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 87140 + }, + { + "epoch": 573.3552631578947, + "grad_norm": 1.0435492992401123, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 87150 + }, + { + "epoch": 573.421052631579, + "grad_norm": 1.1018818616867065, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 87160 + }, + { + "epoch": 573.4868421052631, + "grad_norm": 0.9364510774612427, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 87170 + }, + { + "epoch": 573.5526315789474, + "grad_norm": 1.258358359336853, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 87180 + }, + { + "epoch": 573.6184210526316, + "grad_norm": 1.2375657558441162, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 87190 + }, + { + "epoch": 573.6842105263158, + "grad_norm": 1.2008768320083618, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 87200 + }, + { + "epoch": 573.75, + "grad_norm": 0.818011999130249, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 87210 + }, + { + "epoch": 573.8157894736842, + "grad_norm": 1.2211856842041016, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 87220 + }, + { + "epoch": 573.8815789473684, + "grad_norm": 1.0101572275161743, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 87230 + }, + { + "epoch": 573.9473684210526, + "grad_norm": 1.2342889308929443, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 87240 + }, + { + "epoch": 574.0131578947369, + "grad_norm": 0.9465478658676147, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 87250 + }, + { + "epoch": 574.078947368421, + "grad_norm": 0.9019796848297119, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 87260 + }, + { + "epoch": 574.1447368421053, + "grad_norm": 1.0364303588867188, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 87270 + }, + { + "epoch": 574.2105263157895, + "grad_norm": 1.4639605283737183, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 87280 + }, + { + "epoch": 574.2763157894736, + "grad_norm": 1.2540547847747803, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 87290 + }, + { + "epoch": 574.3421052631579, + "grad_norm": 1.2065496444702148, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 87300 + }, + { + "epoch": 574.4078947368421, + "grad_norm": 1.3892549276351929, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 87310 + }, + { + "epoch": 574.4736842105264, + "grad_norm": 1.1445990800857544, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 87320 + }, + { + "epoch": 574.5394736842105, + "grad_norm": 1.227105975151062, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 87330 + }, + { + "epoch": 574.6052631578947, + "grad_norm": 1.262370228767395, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 87340 + }, + { + "epoch": 574.671052631579, + "grad_norm": 1.0574274063110352, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 87350 + }, + { + "epoch": 574.7368421052631, + "grad_norm": 0.9377652406692505, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 87360 + }, + { + "epoch": 574.8026315789474, + "grad_norm": 0.9124358296394348, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 87370 + }, + { + "epoch": 574.8684210526316, + "grad_norm": 1.0424607992172241, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 87380 + }, + { + "epoch": 574.9342105263158, + "grad_norm": 1.2752419710159302, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 87390 + }, + { + "epoch": 575.0, + "grad_norm": 1.1712169647216797, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 87400 + }, + { + "epoch": 575.0657894736842, + "grad_norm": 1.0214799642562866, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 87410 + }, + { + "epoch": 575.1315789473684, + "grad_norm": 0.9856082797050476, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 87420 + }, + { + "epoch": 575.1973684210526, + "grad_norm": 1.4361339807510376, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 87430 + }, + { + "epoch": 575.2631578947369, + "grad_norm": 1.2261438369750977, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 87440 + }, + { + "epoch": 575.328947368421, + "grad_norm": 1.267825722694397, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 87450 + }, + { + "epoch": 575.3947368421053, + "grad_norm": 1.1174856424331665, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 87460 + }, + { + "epoch": 575.4605263157895, + "grad_norm": 1.3079813718795776, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 87470 + }, + { + "epoch": 575.5263157894736, + "grad_norm": 1.29449462890625, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 87480 + }, + { + "epoch": 575.5921052631579, + "grad_norm": 1.4480912685394287, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 87490 + }, + { + "epoch": 575.6578947368421, + "grad_norm": 1.2274054288864136, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 87500 + }, + { + "epoch": 575.7236842105264, + "grad_norm": 1.114770770072937, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 87510 + }, + { + "epoch": 575.7894736842105, + "grad_norm": 1.2212456464767456, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 87520 + }, + { + "epoch": 575.8552631578947, + "grad_norm": 0.8942835927009583, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 87530 + }, + { + "epoch": 575.921052631579, + "grad_norm": 1.293633222579956, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 87540 + }, + { + "epoch": 575.9868421052631, + "grad_norm": 1.2598819732666016, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 87550 + }, + { + "epoch": 576.0526315789474, + "grad_norm": 1.32999849319458, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 87560 + }, + { + "epoch": 576.1184210526316, + "grad_norm": 1.2802047729492188, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 87570 + }, + { + "epoch": 576.1842105263158, + "grad_norm": 1.7709383964538574, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 87580 + }, + { + "epoch": 576.25, + "grad_norm": 1.3085800409317017, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 87590 + }, + { + "epoch": 576.3157894736842, + "grad_norm": 1.2775263786315918, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 87600 + }, + { + "epoch": 576.3815789473684, + "grad_norm": 1.5602463483810425, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 87610 + }, + { + "epoch": 576.4473684210526, + "grad_norm": 1.5633126497268677, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 87620 + }, + { + "epoch": 576.5131578947369, + "grad_norm": 1.0983535051345825, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 87630 + }, + { + "epoch": 576.578947368421, + "grad_norm": 1.0312081575393677, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 87640 + }, + { + "epoch": 576.6447368421053, + "grad_norm": 1.3446412086486816, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 87650 + }, + { + "epoch": 576.7105263157895, + "grad_norm": 1.1297847032546997, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 87660 + }, + { + "epoch": 576.7763157894736, + "grad_norm": 1.5967837572097778, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 87670 + }, + { + "epoch": 576.8421052631579, + "grad_norm": 1.1126961708068848, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 87680 + }, + { + "epoch": 576.9078947368421, + "grad_norm": 1.0181384086608887, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 87690 + }, + { + "epoch": 576.9736842105264, + "grad_norm": 0.9504018425941467, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 87700 + }, + { + "epoch": 577.0394736842105, + "grad_norm": 1.2466793060302734, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 87710 + }, + { + "epoch": 577.1052631578947, + "grad_norm": 0.9084358215332031, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 87720 + }, + { + "epoch": 577.171052631579, + "grad_norm": 1.0614901781082153, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 87730 + }, + { + "epoch": 577.2368421052631, + "grad_norm": 1.036545991897583, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 87740 + }, + { + "epoch": 577.3026315789474, + "grad_norm": 0.8779444098472595, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 87750 + }, + { + "epoch": 577.3684210526316, + "grad_norm": 0.7535855174064636, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 87760 + }, + { + "epoch": 577.4342105263158, + "grad_norm": 1.1626191139221191, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 87770 + }, + { + "epoch": 577.5, + "grad_norm": 1.0589649677276611, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 87780 + }, + { + "epoch": 577.5657894736842, + "grad_norm": 1.277850866317749, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 87790 + }, + { + "epoch": 577.6315789473684, + "grad_norm": 0.9819774031639099, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 87800 + }, + { + "epoch": 577.6973684210526, + "grad_norm": 0.9045615792274475, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 87810 + }, + { + "epoch": 577.7631578947369, + "grad_norm": 1.1192935705184937, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 87820 + }, + { + "epoch": 577.828947368421, + "grad_norm": 1.2856429815292358, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 87830 + }, + { + "epoch": 577.8947368421053, + "grad_norm": 1.3235225677490234, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 87840 + }, + { + "epoch": 577.9605263157895, + "grad_norm": 0.8897786140441895, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 87850 + }, + { + "epoch": 578.0263157894736, + "grad_norm": 0.9557298421859741, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 87860 + }, + { + "epoch": 578.0921052631579, + "grad_norm": 1.0050173997879028, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 87870 + }, + { + "epoch": 578.1578947368421, + "grad_norm": 0.8767371773719788, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 87880 + }, + { + "epoch": 578.2236842105264, + "grad_norm": 1.2130120992660522, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 87890 + }, + { + "epoch": 578.2894736842105, + "grad_norm": 1.0449823141098022, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 87900 + }, + { + "epoch": 578.3552631578947, + "grad_norm": 0.840207040309906, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 87910 + }, + { + "epoch": 578.421052631579, + "grad_norm": 0.847531795501709, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 87920 + }, + { + "epoch": 578.4868421052631, + "grad_norm": 1.5373563766479492, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 87930 + }, + { + "epoch": 578.5526315789474, + "grad_norm": 1.208318829536438, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 87940 + }, + { + "epoch": 578.6184210526316, + "grad_norm": 1.1403499841690063, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 87950 + }, + { + "epoch": 578.6842105263158, + "grad_norm": 1.375062108039856, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 87960 + }, + { + "epoch": 578.75, + "grad_norm": 0.9031561017036438, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 87970 + }, + { + "epoch": 578.8157894736842, + "grad_norm": 1.3210184574127197, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 87980 + }, + { + "epoch": 578.8815789473684, + "grad_norm": 1.013850450515747, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 87990 + }, + { + "epoch": 578.9473684210526, + "grad_norm": 1.104063630104065, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 88000 + }, + { + "epoch": 579.0131578947369, + "grad_norm": 0.8655098080635071, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 88010 + }, + { + "epoch": 579.078947368421, + "grad_norm": 1.320491075515747, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 88020 + }, + { + "epoch": 579.1447368421053, + "grad_norm": 1.221070408821106, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 88030 + }, + { + "epoch": 579.2105263157895, + "grad_norm": 1.0820084810256958, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 88040 + }, + { + "epoch": 579.2763157894736, + "grad_norm": 1.2061529159545898, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 88050 + }, + { + "epoch": 579.3421052631579, + "grad_norm": 0.9127936959266663, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 88060 + }, + { + "epoch": 579.4078947368421, + "grad_norm": 1.4120718240737915, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 88070 + }, + { + "epoch": 579.4736842105264, + "grad_norm": 1.0153974294662476, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 88080 + }, + { + "epoch": 579.5394736842105, + "grad_norm": 1.3774847984313965, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 88090 + }, + { + "epoch": 579.6052631578947, + "grad_norm": 1.1112881898880005, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 88100 + }, + { + "epoch": 579.671052631579, + "grad_norm": 1.1673177480697632, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 88110 + }, + { + "epoch": 579.7368421052631, + "grad_norm": 1.160556674003601, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 88120 + }, + { + "epoch": 579.8026315789474, + "grad_norm": 1.118118405342102, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 88130 + }, + { + "epoch": 579.8684210526316, + "grad_norm": 0.7637477517127991, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 88140 + }, + { + "epoch": 579.9342105263158, + "grad_norm": 1.3707367181777954, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 88150 + }, + { + "epoch": 580.0, + "grad_norm": 1.009476900100708, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 88160 + }, + { + "epoch": 580.0657894736842, + "grad_norm": 1.3438063859939575, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 88170 + }, + { + "epoch": 580.1315789473684, + "grad_norm": 1.126423716545105, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 88180 + }, + { + "epoch": 580.1973684210526, + "grad_norm": 1.1746346950531006, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 88190 + }, + { + "epoch": 580.2631578947369, + "grad_norm": 1.0661026239395142, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 88200 + }, + { + "epoch": 580.328947368421, + "grad_norm": 0.9947375059127808, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 88210 + }, + { + "epoch": 580.3947368421053, + "grad_norm": 1.178560733795166, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 88220 + }, + { + "epoch": 580.4605263157895, + "grad_norm": 1.3273268938064575, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 88230 + }, + { + "epoch": 580.5263157894736, + "grad_norm": 1.040791392326355, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 88240 + }, + { + "epoch": 580.5921052631579, + "grad_norm": 1.4010847806930542, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 88250 + }, + { + "epoch": 580.6578947368421, + "grad_norm": 1.2355036735534668, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 88260 + }, + { + "epoch": 580.7236842105264, + "grad_norm": 0.9420546889305115, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 88270 + }, + { + "epoch": 580.7894736842105, + "grad_norm": 1.1292775869369507, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 88280 + }, + { + "epoch": 580.8552631578947, + "grad_norm": 1.118950366973877, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 88290 + }, + { + "epoch": 580.921052631579, + "grad_norm": 1.3669413328170776, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 88300 + }, + { + "epoch": 580.9868421052631, + "grad_norm": 1.1475030183792114, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 88310 + }, + { + "epoch": 581.0526315789474, + "grad_norm": 0.9832250475883484, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 88320 + }, + { + "epoch": 581.1184210526316, + "grad_norm": 1.0179389715194702, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 88330 + }, + { + "epoch": 581.1842105263158, + "grad_norm": 1.026995301246643, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 88340 + }, + { + "epoch": 581.25, + "grad_norm": 1.0577194690704346, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 88350 + }, + { + "epoch": 581.3157894736842, + "grad_norm": 0.8815690875053406, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 88360 + }, + { + "epoch": 581.3815789473684, + "grad_norm": 0.9633040428161621, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 88370 + }, + { + "epoch": 581.4473684210526, + "grad_norm": 1.289242148399353, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 88380 + }, + { + "epoch": 581.5131578947369, + "grad_norm": 0.8594449758529663, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 88390 + }, + { + "epoch": 581.578947368421, + "grad_norm": 0.8207187652587891, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 88400 + }, + { + "epoch": 581.6447368421053, + "grad_norm": 1.0943405628204346, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 88410 + }, + { + "epoch": 581.7105263157895, + "grad_norm": 0.834723711013794, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 88420 + }, + { + "epoch": 581.7763157894736, + "grad_norm": 1.02744722366333, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 88430 + }, + { + "epoch": 581.8421052631579, + "grad_norm": 1.1640406847000122, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 88440 + }, + { + "epoch": 581.9078947368421, + "grad_norm": 1.0775976181030273, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 88450 + }, + { + "epoch": 581.9736842105264, + "grad_norm": 1.163527250289917, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 88460 + }, + { + "epoch": 582.0394736842105, + "grad_norm": 0.8929876089096069, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 88470 + }, + { + "epoch": 582.1052631578947, + "grad_norm": 1.3309000730514526, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 88480 + }, + { + "epoch": 582.171052631579, + "grad_norm": 1.0838176012039185, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 88490 + }, + { + "epoch": 582.2368421052631, + "grad_norm": 1.5269906520843506, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 88500 + }, + { + "epoch": 582.3026315789474, + "grad_norm": 0.8997194170951843, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 88510 + }, + { + "epoch": 582.3684210526316, + "grad_norm": 1.0836443901062012, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 88520 + }, + { + "epoch": 582.4342105263158, + "grad_norm": 0.7760714888572693, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 88530 + }, + { + "epoch": 582.5, + "grad_norm": 1.0541595220565796, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 88540 + }, + { + "epoch": 582.5657894736842, + "grad_norm": 1.0740338563919067, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 88550 + }, + { + "epoch": 582.6315789473684, + "grad_norm": 1.234104871749878, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 88560 + }, + { + "epoch": 582.6973684210526, + "grad_norm": 1.1293065547943115, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 88570 + }, + { + "epoch": 582.7631578947369, + "grad_norm": 0.9713568091392517, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 88580 + }, + { + "epoch": 582.828947368421, + "grad_norm": 0.9887568950653076, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 88590 + }, + { + "epoch": 582.8947368421053, + "grad_norm": 1.0662295818328857, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 88600 + }, + { + "epoch": 582.9605263157895, + "grad_norm": 1.09245765209198, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 88610 + }, + { + "epoch": 583.0263157894736, + "grad_norm": 1.12832510471344, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 88620 + }, + { + "epoch": 583.0921052631579, + "grad_norm": 1.6330440044403076, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 88630 + }, + { + "epoch": 583.1578947368421, + "grad_norm": 1.4116557836532593, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 88640 + }, + { + "epoch": 583.2236842105264, + "grad_norm": 1.313237190246582, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 88650 + }, + { + "epoch": 583.2894736842105, + "grad_norm": 1.496812105178833, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 88660 + }, + { + "epoch": 583.3552631578947, + "grad_norm": 1.3389391899108887, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 88670 + }, + { + "epoch": 583.421052631579, + "grad_norm": 0.9950214624404907, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 88680 + }, + { + "epoch": 583.4868421052631, + "grad_norm": 1.0509663820266724, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 88690 + }, + { + "epoch": 583.5526315789474, + "grad_norm": 1.2093925476074219, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 88700 + }, + { + "epoch": 583.6184210526316, + "grad_norm": 1.2887691259384155, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 88710 + }, + { + "epoch": 583.6842105263158, + "grad_norm": 1.2735077142715454, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 88720 + }, + { + "epoch": 583.75, + "grad_norm": 1.4364982843399048, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 88730 + }, + { + "epoch": 583.8157894736842, + "grad_norm": 1.1002883911132812, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 88740 + }, + { + "epoch": 583.8815789473684, + "grad_norm": 0.9703975915908813, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 88750 + }, + { + "epoch": 583.9473684210526, + "grad_norm": 0.8523468971252441, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 88760 + }, + { + "epoch": 584.0131578947369, + "grad_norm": 1.2249200344085693, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 88770 + }, + { + "epoch": 584.078947368421, + "grad_norm": 1.7450804710388184, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 88780 + }, + { + "epoch": 584.1447368421053, + "grad_norm": 1.176170825958252, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 88790 + }, + { + "epoch": 584.2105263157895, + "grad_norm": 0.7471736073493958, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 88800 + }, + { + "epoch": 584.2763157894736, + "grad_norm": 1.824398159980774, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 88810 + }, + { + "epoch": 584.3421052631579, + "grad_norm": 1.3423678874969482, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 88820 + }, + { + "epoch": 584.4078947368421, + "grad_norm": 1.2872895002365112, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 88830 + }, + { + "epoch": 584.4736842105264, + "grad_norm": 1.6336323022842407, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 88840 + }, + { + "epoch": 584.5394736842105, + "grad_norm": 1.0463390350341797, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 88850 + }, + { + "epoch": 584.6052631578947, + "grad_norm": 1.3081940412521362, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 88860 + }, + { + "epoch": 584.671052631579, + "grad_norm": 1.1893011331558228, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 88870 + }, + { + "epoch": 584.7368421052631, + "grad_norm": 1.2873516082763672, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 88880 + }, + { + "epoch": 584.8026315789474, + "grad_norm": 0.9724097847938538, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 88890 + }, + { + "epoch": 584.8684210526316, + "grad_norm": 0.8140742778778076, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 88900 + }, + { + "epoch": 584.9342105263158, + "grad_norm": 1.4383323192596436, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 88910 + }, + { + "epoch": 585.0, + "grad_norm": 0.8941511511802673, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 88920 + }, + { + "epoch": 585.0657894736842, + "grad_norm": 1.3522826433181763, + "learning_rate": 0.0001, + "loss": 0.0152, + "step": 88930 + }, + { + "epoch": 585.1315789473684, + "grad_norm": 1.0675338506698608, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 88940 + }, + { + "epoch": 585.1973684210526, + "grad_norm": 1.4727058410644531, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 88950 + }, + { + "epoch": 585.2631578947369, + "grad_norm": 1.2497466802597046, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 88960 + }, + { + "epoch": 585.328947368421, + "grad_norm": 0.8785649538040161, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 88970 + }, + { + "epoch": 585.3947368421053, + "grad_norm": 1.2095212936401367, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 88980 + }, + { + "epoch": 585.4605263157895, + "grad_norm": 1.3274887800216675, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 88990 + }, + { + "epoch": 585.5263157894736, + "grad_norm": 1.0932297706604004, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 89000 + }, + { + "epoch": 585.5921052631579, + "grad_norm": 1.1494570970535278, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 89010 + }, + { + "epoch": 585.6578947368421, + "grad_norm": 1.5100291967391968, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 89020 + }, + { + "epoch": 585.7236842105264, + "grad_norm": 1.3086682558059692, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 89030 + }, + { + "epoch": 585.7894736842105, + "grad_norm": 1.1666781902313232, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 89040 + }, + { + "epoch": 585.8552631578947, + "grad_norm": 0.9592698216438293, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 89050 + }, + { + "epoch": 585.921052631579, + "grad_norm": 0.6694148182868958, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 89060 + }, + { + "epoch": 585.9868421052631, + "grad_norm": 1.0263710021972656, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 89070 + }, + { + "epoch": 586.0526315789474, + "grad_norm": 0.6993970274925232, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 89080 + }, + { + "epoch": 586.1184210526316, + "grad_norm": 0.8670745491981506, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 89090 + }, + { + "epoch": 586.1842105263158, + "grad_norm": 0.7880876064300537, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 89100 + }, + { + "epoch": 586.25, + "grad_norm": 0.9657894968986511, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 89110 + }, + { + "epoch": 586.3157894736842, + "grad_norm": 0.7474163174629211, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 89120 + }, + { + "epoch": 586.3815789473684, + "grad_norm": 1.1458895206451416, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 89130 + }, + { + "epoch": 586.4473684210526, + "grad_norm": 1.0431797504425049, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 89140 + }, + { + "epoch": 586.5131578947369, + "grad_norm": 0.9211791157722473, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 89150 + }, + { + "epoch": 586.578947368421, + "grad_norm": 1.1954262256622314, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 89160 + }, + { + "epoch": 586.6447368421053, + "grad_norm": 1.164695143699646, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 89170 + }, + { + "epoch": 586.7105263157895, + "grad_norm": 1.2543039321899414, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 89180 + }, + { + "epoch": 586.7763157894736, + "grad_norm": 1.077324390411377, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 89190 + }, + { + "epoch": 586.8421052631579, + "grad_norm": 0.7734859585762024, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 89200 + }, + { + "epoch": 586.9078947368421, + "grad_norm": 1.3108208179473877, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 89210 + }, + { + "epoch": 586.9736842105264, + "grad_norm": 1.516038179397583, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 89220 + }, + { + "epoch": 587.0394736842105, + "grad_norm": 1.5509519577026367, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 89230 + }, + { + "epoch": 587.1052631578947, + "grad_norm": 1.3086330890655518, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 89240 + }, + { + "epoch": 587.171052631579, + "grad_norm": 1.3632980585098267, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 89250 + }, + { + "epoch": 587.2368421052631, + "grad_norm": 1.1639214754104614, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 89260 + }, + { + "epoch": 587.3026315789474, + "grad_norm": 1.0251145362854004, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 89270 + }, + { + "epoch": 587.3684210526316, + "grad_norm": 1.2207335233688354, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 89280 + }, + { + "epoch": 587.4342105263158, + "grad_norm": 1.3697402477264404, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 89290 + }, + { + "epoch": 587.5, + "grad_norm": 1.4069492816925049, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 89300 + }, + { + "epoch": 587.5657894736842, + "grad_norm": 1.0171831846237183, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 89310 + }, + { + "epoch": 587.6315789473684, + "grad_norm": 1.1951297521591187, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 89320 + }, + { + "epoch": 587.6973684210526, + "grad_norm": 0.9013453125953674, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 89330 + }, + { + "epoch": 587.7631578947369, + "grad_norm": 1.1464810371398926, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 89340 + }, + { + "epoch": 587.828947368421, + "grad_norm": 1.0271261930465698, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 89350 + }, + { + "epoch": 587.8947368421053, + "grad_norm": 1.106770396232605, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 89360 + }, + { + "epoch": 587.9605263157895, + "grad_norm": 0.9565857648849487, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 89370 + }, + { + "epoch": 588.0263157894736, + "grad_norm": 1.2642189264297485, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 89380 + }, + { + "epoch": 588.0921052631579, + "grad_norm": 1.1177488565444946, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 89390 + }, + { + "epoch": 588.1578947368421, + "grad_norm": 0.8865790367126465, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 89400 + }, + { + "epoch": 588.2236842105264, + "grad_norm": 1.007317066192627, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 89410 + }, + { + "epoch": 588.2894736842105, + "grad_norm": 1.041730523109436, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 89420 + }, + { + "epoch": 588.3552631578947, + "grad_norm": 1.389570713043213, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 89430 + }, + { + "epoch": 588.421052631579, + "grad_norm": 1.1564185619354248, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 89440 + }, + { + "epoch": 588.4868421052631, + "grad_norm": 1.2922906875610352, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 89450 + }, + { + "epoch": 588.5526315789474, + "grad_norm": 1.2938793897628784, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 89460 + }, + { + "epoch": 588.6184210526316, + "grad_norm": 0.9030840992927551, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 89470 + }, + { + "epoch": 588.6842105263158, + "grad_norm": 1.131744384765625, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 89480 + }, + { + "epoch": 588.75, + "grad_norm": 1.0331127643585205, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 89490 + }, + { + "epoch": 588.8157894736842, + "grad_norm": 0.9357874989509583, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 89500 + }, + { + "epoch": 588.8815789473684, + "grad_norm": 1.4031165838241577, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 89510 + }, + { + "epoch": 588.9473684210526, + "grad_norm": 1.0815694332122803, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 89520 + }, + { + "epoch": 589.0131578947369, + "grad_norm": 1.360446810722351, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 89530 + }, + { + "epoch": 589.078947368421, + "grad_norm": 1.0539952516555786, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 89540 + }, + { + "epoch": 589.1447368421053, + "grad_norm": 0.9572492837905884, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 89550 + }, + { + "epoch": 589.2105263157895, + "grad_norm": 1.1082371473312378, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 89560 + }, + { + "epoch": 589.2763157894736, + "grad_norm": 1.2691287994384766, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 89570 + }, + { + "epoch": 589.3421052631579, + "grad_norm": 1.6313940286636353, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 89580 + }, + { + "epoch": 589.4078947368421, + "grad_norm": 0.9120645523071289, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 89590 + }, + { + "epoch": 589.4736842105264, + "grad_norm": 1.050746202468872, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 89600 + }, + { + "epoch": 589.5394736842105, + "grad_norm": 1.3637466430664062, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 89610 + }, + { + "epoch": 589.6052631578947, + "grad_norm": 1.2993441820144653, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 89620 + }, + { + "epoch": 589.671052631579, + "grad_norm": 1.2083455324172974, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 89630 + }, + { + "epoch": 589.7368421052631, + "grad_norm": 0.9543986320495605, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 89640 + }, + { + "epoch": 589.8026315789474, + "grad_norm": 1.0169148445129395, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 89650 + }, + { + "epoch": 589.8684210526316, + "grad_norm": 0.9722794890403748, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 89660 + }, + { + "epoch": 589.9342105263158, + "grad_norm": 1.2600023746490479, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 89670 + }, + { + "epoch": 590.0, + "grad_norm": 1.084512710571289, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 89680 + }, + { + "epoch": 590.0657894736842, + "grad_norm": 0.9002571105957031, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 89690 + }, + { + "epoch": 590.1315789473684, + "grad_norm": 1.2132498025894165, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 89700 + }, + { + "epoch": 590.1973684210526, + "grad_norm": 1.4505194425582886, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 89710 + }, + { + "epoch": 590.2631578947369, + "grad_norm": 1.2071669101715088, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 89720 + }, + { + "epoch": 590.328947368421, + "grad_norm": 1.1249711513519287, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 89730 + }, + { + "epoch": 590.3947368421053, + "grad_norm": 0.9531528353691101, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 89740 + }, + { + "epoch": 590.4605263157895, + "grad_norm": 1.5327131748199463, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 89750 + }, + { + "epoch": 590.5263157894736, + "grad_norm": 1.2193597555160522, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 89760 + }, + { + "epoch": 590.5921052631579, + "grad_norm": 1.114341378211975, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 89770 + }, + { + "epoch": 590.6578947368421, + "grad_norm": 0.7572506070137024, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 89780 + }, + { + "epoch": 590.7236842105264, + "grad_norm": 1.0355122089385986, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 89790 + }, + { + "epoch": 590.7894736842105, + "grad_norm": 1.0882655382156372, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 89800 + }, + { + "epoch": 590.8552631578947, + "grad_norm": 1.2581169605255127, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 89810 + }, + { + "epoch": 590.921052631579, + "grad_norm": 1.334328055381775, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 89820 + }, + { + "epoch": 590.9868421052631, + "grad_norm": 1.0767223834991455, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 89830 + }, + { + "epoch": 591.0526315789474, + "grad_norm": 1.1012510061264038, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 89840 + }, + { + "epoch": 591.1184210526316, + "grad_norm": 0.7526869773864746, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 89850 + }, + { + "epoch": 591.1842105263158, + "grad_norm": 1.0117813348770142, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 89860 + }, + { + "epoch": 591.25, + "grad_norm": 1.2263928651809692, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 89870 + }, + { + "epoch": 591.3157894736842, + "grad_norm": 0.8752433657646179, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 89880 + }, + { + "epoch": 591.3815789473684, + "grad_norm": 1.4674887657165527, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 89890 + }, + { + "epoch": 591.4473684210526, + "grad_norm": 1.1503700017929077, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 89900 + }, + { + "epoch": 591.5131578947369, + "grad_norm": 1.218532681465149, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 89910 + }, + { + "epoch": 591.578947368421, + "grad_norm": 1.5845961570739746, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 89920 + }, + { + "epoch": 591.6447368421053, + "grad_norm": 1.585045576095581, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 89930 + }, + { + "epoch": 591.7105263157895, + "grad_norm": 1.2569578886032104, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 89940 + }, + { + "epoch": 591.7763157894736, + "grad_norm": 1.1375163793563843, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 89950 + }, + { + "epoch": 591.8421052631579, + "grad_norm": 0.8566263914108276, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 89960 + }, + { + "epoch": 591.9078947368421, + "grad_norm": 1.2207688093185425, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 89970 + }, + { + "epoch": 591.9736842105264, + "grad_norm": 1.099183440208435, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 89980 + }, + { + "epoch": 592.0394736842105, + "grad_norm": 1.3385899066925049, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 89990 + }, + { + "epoch": 592.1052631578947, + "grad_norm": 1.2732150554656982, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 90000 + }, + { + "epoch": 592.171052631579, + "grad_norm": 1.053571105003357, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 90010 + }, + { + "epoch": 592.2368421052631, + "grad_norm": 0.9966652393341064, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 90020 + }, + { + "epoch": 592.3026315789474, + "grad_norm": 1.0630309581756592, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 90030 + }, + { + "epoch": 592.3684210526316, + "grad_norm": 1.1324174404144287, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 90040 + }, + { + "epoch": 592.4342105263158, + "grad_norm": 1.2332628965377808, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 90050 + }, + { + "epoch": 592.5, + "grad_norm": 1.151996374130249, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 90060 + }, + { + "epoch": 592.5657894736842, + "grad_norm": 0.7464222311973572, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 90070 + }, + { + "epoch": 592.6315789473684, + "grad_norm": 0.8995870351791382, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 90080 + }, + { + "epoch": 592.6973684210526, + "grad_norm": 1.467040777206421, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 90090 + }, + { + "epoch": 592.7631578947369, + "grad_norm": 0.7274995446205139, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 90100 + }, + { + "epoch": 592.828947368421, + "grad_norm": 1.107151985168457, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 90110 + }, + { + "epoch": 592.8947368421053, + "grad_norm": 1.1053555011749268, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 90120 + }, + { + "epoch": 592.9605263157895, + "grad_norm": 0.884556770324707, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 90130 + }, + { + "epoch": 593.0263157894736, + "grad_norm": 1.09501051902771, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 90140 + }, + { + "epoch": 593.0921052631579, + "grad_norm": 1.0981241464614868, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 90150 + }, + { + "epoch": 593.1578947368421, + "grad_norm": 0.5903021097183228, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 90160 + }, + { + "epoch": 593.2236842105264, + "grad_norm": 0.7272729873657227, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 90170 + }, + { + "epoch": 593.2894736842105, + "grad_norm": 1.2663187980651855, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 90180 + }, + { + "epoch": 593.3552631578947, + "grad_norm": 1.210874080657959, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 90190 + }, + { + "epoch": 593.421052631579, + "grad_norm": 1.0026808977127075, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 90200 + }, + { + "epoch": 593.4868421052631, + "grad_norm": 1.1828995943069458, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 90210 + }, + { + "epoch": 593.5526315789474, + "grad_norm": 1.2319470643997192, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 90220 + }, + { + "epoch": 593.6184210526316, + "grad_norm": 1.5883439779281616, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 90230 + }, + { + "epoch": 593.6842105263158, + "grad_norm": 1.1823478937149048, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 90240 + }, + { + "epoch": 593.75, + "grad_norm": 1.1434694528579712, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 90250 + }, + { + "epoch": 593.8157894736842, + "grad_norm": 0.8349093794822693, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 90260 + }, + { + "epoch": 593.8815789473684, + "grad_norm": 1.1716779470443726, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 90270 + }, + { + "epoch": 593.9473684210526, + "grad_norm": 0.9748353362083435, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 90280 + }, + { + "epoch": 594.0131578947369, + "grad_norm": 1.1021101474761963, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 90290 + }, + { + "epoch": 594.078947368421, + "grad_norm": 1.007872223854065, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 90300 + }, + { + "epoch": 594.1447368421053, + "grad_norm": 0.8488720059394836, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 90310 + }, + { + "epoch": 594.2105263157895, + "grad_norm": 0.9892115592956543, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 90320 + }, + { + "epoch": 594.2763157894736, + "grad_norm": 1.2533457279205322, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 90330 + }, + { + "epoch": 594.3421052631579, + "grad_norm": 1.1002429723739624, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 90340 + }, + { + "epoch": 594.4078947368421, + "grad_norm": 1.0091798305511475, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 90350 + }, + { + "epoch": 594.4736842105264, + "grad_norm": 1.074102520942688, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 90360 + }, + { + "epoch": 594.5394736842105, + "grad_norm": 0.9539269804954529, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 90370 + }, + { + "epoch": 594.6052631578947, + "grad_norm": 1.0919673442840576, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 90380 + }, + { + "epoch": 594.671052631579, + "grad_norm": 1.65645170211792, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 90390 + }, + { + "epoch": 594.7368421052631, + "grad_norm": 1.5577173233032227, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 90400 + }, + { + "epoch": 594.8026315789474, + "grad_norm": 0.9961990714073181, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 90410 + }, + { + "epoch": 594.8684210526316, + "grad_norm": 1.1721851825714111, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 90420 + }, + { + "epoch": 594.9342105263158, + "grad_norm": 1.203532099723816, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 90430 + }, + { + "epoch": 595.0, + "grad_norm": 1.3049278259277344, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 90440 + }, + { + "epoch": 595.0657894736842, + "grad_norm": 1.4618241786956787, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 90450 + }, + { + "epoch": 595.1315789473684, + "grad_norm": 1.3204786777496338, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 90460 + }, + { + "epoch": 595.1973684210526, + "grad_norm": 1.2331825494766235, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 90470 + }, + { + "epoch": 595.2631578947369, + "grad_norm": 1.2492101192474365, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 90480 + }, + { + "epoch": 595.328947368421, + "grad_norm": 1.1810252666473389, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 90490 + }, + { + "epoch": 595.3947368421053, + "grad_norm": 1.0970319509506226, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 90500 + }, + { + "epoch": 595.4605263157895, + "grad_norm": 1.2086048126220703, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 90510 + }, + { + "epoch": 595.5263157894736, + "grad_norm": 0.9637195467948914, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 90520 + }, + { + "epoch": 595.5921052631579, + "grad_norm": 1.4612727165222168, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 90530 + }, + { + "epoch": 595.6578947368421, + "grad_norm": 1.3866544961929321, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 90540 + }, + { + "epoch": 595.7236842105264, + "grad_norm": 1.3224684000015259, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 90550 + }, + { + "epoch": 595.7894736842105, + "grad_norm": 1.1373405456542969, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 90560 + }, + { + "epoch": 595.8552631578947, + "grad_norm": 0.909879207611084, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 90570 + }, + { + "epoch": 595.921052631579, + "grad_norm": 0.965241551399231, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 90580 + }, + { + "epoch": 595.9868421052631, + "grad_norm": 1.0295147895812988, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 90590 + }, + { + "epoch": 596.0526315789474, + "grad_norm": 1.1842830181121826, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 90600 + }, + { + "epoch": 596.1184210526316, + "grad_norm": 1.119468331336975, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 90610 + }, + { + "epoch": 596.1842105263158, + "grad_norm": 1.0015408992767334, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 90620 + }, + { + "epoch": 596.25, + "grad_norm": 1.0252635478973389, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 90630 + }, + { + "epoch": 596.3157894736842, + "grad_norm": 1.0679528713226318, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 90640 + }, + { + "epoch": 596.3815789473684, + "grad_norm": 1.1197189092636108, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 90650 + }, + { + "epoch": 596.4473684210526, + "grad_norm": 1.1711959838867188, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 90660 + }, + { + "epoch": 596.5131578947369, + "grad_norm": 0.8267062306404114, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 90670 + }, + { + "epoch": 596.578947368421, + "grad_norm": 0.9089040756225586, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 90680 + }, + { + "epoch": 596.6447368421053, + "grad_norm": 1.1336896419525146, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 90690 + }, + { + "epoch": 596.7105263157895, + "grad_norm": 1.5351234674453735, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 90700 + }, + { + "epoch": 596.7763157894736, + "grad_norm": 1.4574313163757324, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 90710 + }, + { + "epoch": 596.8421052631579, + "grad_norm": 1.3572088479995728, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 90720 + }, + { + "epoch": 596.9078947368421, + "grad_norm": 1.556077480316162, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 90730 + }, + { + "epoch": 596.9736842105264, + "grad_norm": 1.4981848001480103, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 90740 + }, + { + "epoch": 597.0394736842105, + "grad_norm": 1.3176113367080688, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 90750 + }, + { + "epoch": 597.1052631578947, + "grad_norm": 1.1030333042144775, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 90760 + }, + { + "epoch": 597.171052631579, + "grad_norm": 1.3109503984451294, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 90770 + }, + { + "epoch": 597.2368421052631, + "grad_norm": 1.4163578748703003, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 90780 + }, + { + "epoch": 597.3026315789474, + "grad_norm": 1.3910415172576904, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 90790 + }, + { + "epoch": 597.3684210526316, + "grad_norm": 1.3340750932693481, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 90800 + }, + { + "epoch": 597.4342105263158, + "grad_norm": 0.9749845862388611, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 90810 + }, + { + "epoch": 597.5, + "grad_norm": 0.9810044169425964, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 90820 + }, + { + "epoch": 597.5657894736842, + "grad_norm": 1.392261266708374, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 90830 + }, + { + "epoch": 597.6315789473684, + "grad_norm": 1.2727516889572144, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 90840 + }, + { + "epoch": 597.6973684210526, + "grad_norm": 1.2531763315200806, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 90850 + }, + { + "epoch": 597.7631578947369, + "grad_norm": 1.1090060472488403, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 90860 + }, + { + "epoch": 597.828947368421, + "grad_norm": 1.283146619796753, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 90870 + }, + { + "epoch": 597.8947368421053, + "grad_norm": 0.8939560055732727, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 90880 + }, + { + "epoch": 597.9605263157895, + "grad_norm": 0.7372931838035583, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 90890 + }, + { + "epoch": 598.0263157894736, + "grad_norm": 0.992813229560852, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 90900 + }, + { + "epoch": 598.0921052631579, + "grad_norm": 1.0757131576538086, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 90910 + }, + { + "epoch": 598.1578947368421, + "grad_norm": 1.3756095170974731, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 90920 + }, + { + "epoch": 598.2236842105264, + "grad_norm": 1.1609660387039185, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 90930 + }, + { + "epoch": 598.2894736842105, + "grad_norm": 1.197822093963623, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 90940 + }, + { + "epoch": 598.3552631578947, + "grad_norm": 1.0572071075439453, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 90950 + }, + { + "epoch": 598.421052631579, + "grad_norm": 1.026586890220642, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 90960 + }, + { + "epoch": 598.4868421052631, + "grad_norm": 1.0258917808532715, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 90970 + }, + { + "epoch": 598.5526315789474, + "grad_norm": 1.1755049228668213, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 90980 + }, + { + "epoch": 598.6184210526316, + "grad_norm": 1.2228240966796875, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 90990 + }, + { + "epoch": 598.6842105263158, + "grad_norm": 1.14155912399292, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 91000 + }, + { + "epoch": 598.75, + "grad_norm": 1.355281114578247, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 91010 + }, + { + "epoch": 598.8157894736842, + "grad_norm": 0.7199444770812988, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 91020 + }, + { + "epoch": 598.8815789473684, + "grad_norm": 0.8066603541374207, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 91030 + }, + { + "epoch": 598.9473684210526, + "grad_norm": 0.8587542772293091, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 91040 + }, + { + "epoch": 599.0131578947369, + "grad_norm": 0.8503077030181885, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 91050 + }, + { + "epoch": 599.078947368421, + "grad_norm": 0.5482606887817383, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 91060 + }, + { + "epoch": 599.1447368421053, + "grad_norm": 1.3587243556976318, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 91070 + }, + { + "epoch": 599.2105263157895, + "grad_norm": 1.3960087299346924, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 91080 + }, + { + "epoch": 599.2763157894736, + "grad_norm": 1.0723680257797241, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 91090 + }, + { + "epoch": 599.3421052631579, + "grad_norm": 0.911795437335968, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 91100 + }, + { + "epoch": 599.4078947368421, + "grad_norm": 1.3153270483016968, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 91110 + }, + { + "epoch": 599.4736842105264, + "grad_norm": 1.3088375329971313, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 91120 + }, + { + "epoch": 599.5394736842105, + "grad_norm": 1.0322259664535522, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 91130 + }, + { + "epoch": 599.6052631578947, + "grad_norm": 0.8440555334091187, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 91140 + }, + { + "epoch": 599.671052631579, + "grad_norm": 1.4241567850112915, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 91150 + }, + { + "epoch": 599.7368421052631, + "grad_norm": 1.298012375831604, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 91160 + }, + { + "epoch": 599.8026315789474, + "grad_norm": 2.0893771648406982, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 91170 + }, + { + "epoch": 599.8684210526316, + "grad_norm": 1.2253968715667725, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 91180 + }, + { + "epoch": 599.9342105263158, + "grad_norm": 1.2711297273635864, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 91190 + }, + { + "epoch": 600.0, + "grad_norm": 0.9054577350616455, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 91200 + }, + { + "epoch": 600.0657894736842, + "grad_norm": 0.9020711779594421, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 91210 + }, + { + "epoch": 600.1315789473684, + "grad_norm": 1.009748935699463, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 91220 + }, + { + "epoch": 600.1973684210526, + "grad_norm": 1.466187834739685, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 91230 + }, + { + "epoch": 600.2631578947369, + "grad_norm": 1.3021607398986816, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 91240 + }, + { + "epoch": 600.328947368421, + "grad_norm": 1.0254582166671753, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 91250 + }, + { + "epoch": 600.3947368421053, + "grad_norm": 0.8893698453903198, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 91260 + }, + { + "epoch": 600.4605263157895, + "grad_norm": 1.190147042274475, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 91270 + }, + { + "epoch": 600.5263157894736, + "grad_norm": 1.1104352474212646, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 91280 + }, + { + "epoch": 600.5921052631579, + "grad_norm": 1.2664473056793213, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 91290 + }, + { + "epoch": 600.6578947368421, + "grad_norm": 1.136460781097412, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 91300 + }, + { + "epoch": 600.7236842105264, + "grad_norm": 1.4673341512680054, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 91310 + }, + { + "epoch": 600.7894736842105, + "grad_norm": 1.533826231956482, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 91320 + }, + { + "epoch": 600.8552631578947, + "grad_norm": 1.178153157234192, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 91330 + }, + { + "epoch": 600.921052631579, + "grad_norm": 1.365976095199585, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 91340 + }, + { + "epoch": 600.9868421052631, + "grad_norm": 1.1566143035888672, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 91350 + }, + { + "epoch": 601.0526315789474, + "grad_norm": 1.1433640718460083, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 91360 + }, + { + "epoch": 601.1184210526316, + "grad_norm": 0.9018469452857971, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 91370 + }, + { + "epoch": 601.1842105263158, + "grad_norm": 1.1393685340881348, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 91380 + }, + { + "epoch": 601.25, + "grad_norm": 1.1455858945846558, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 91390 + }, + { + "epoch": 601.3157894736842, + "grad_norm": 1.4033070802688599, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 91400 + }, + { + "epoch": 601.3815789473684, + "grad_norm": 1.4642856121063232, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 91410 + }, + { + "epoch": 601.4473684210526, + "grad_norm": 1.0535764694213867, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 91420 + }, + { + "epoch": 601.5131578947369, + "grad_norm": 1.0785901546478271, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 91430 + }, + { + "epoch": 601.578947368421, + "grad_norm": 1.2098617553710938, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 91440 + }, + { + "epoch": 601.6447368421053, + "grad_norm": 1.4227787256240845, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 91450 + }, + { + "epoch": 601.7105263157895, + "grad_norm": 1.245746374130249, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 91460 + }, + { + "epoch": 601.7763157894736, + "grad_norm": 1.1183278560638428, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 91470 + }, + { + "epoch": 601.8421052631579, + "grad_norm": 1.2257438898086548, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 91480 + }, + { + "epoch": 601.9078947368421, + "grad_norm": 0.8656227588653564, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 91490 + }, + { + "epoch": 601.9736842105264, + "grad_norm": 0.9605566263198853, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 91500 + }, + { + "epoch": 602.0394736842105, + "grad_norm": 1.148573637008667, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 91510 + }, + { + "epoch": 602.1052631578947, + "grad_norm": 1.1873161792755127, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 91520 + }, + { + "epoch": 602.171052631579, + "grad_norm": 1.2313774824142456, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 91530 + }, + { + "epoch": 602.2368421052631, + "grad_norm": 0.932478666305542, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 91540 + }, + { + "epoch": 602.3026315789474, + "grad_norm": 1.0965076684951782, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 91550 + }, + { + "epoch": 602.3684210526316, + "grad_norm": 1.0548715591430664, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 91560 + }, + { + "epoch": 602.4342105263158, + "grad_norm": 0.6713628172874451, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 91570 + }, + { + "epoch": 602.5, + "grad_norm": 1.1099224090576172, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 91580 + }, + { + "epoch": 602.5657894736842, + "grad_norm": 1.04884672164917, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 91590 + }, + { + "epoch": 602.6315789473684, + "grad_norm": 0.7628430128097534, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 91600 + }, + { + "epoch": 602.6973684210526, + "grad_norm": 0.8330525755882263, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 91610 + }, + { + "epoch": 602.7631578947369, + "grad_norm": 1.0476446151733398, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 91620 + }, + { + "epoch": 602.828947368421, + "grad_norm": 0.9488280415534973, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 91630 + }, + { + "epoch": 602.8947368421053, + "grad_norm": 1.0100157260894775, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 91640 + }, + { + "epoch": 602.9605263157895, + "grad_norm": 1.3274532556533813, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 91650 + }, + { + "epoch": 603.0263157894736, + "grad_norm": 1.0656237602233887, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 91660 + }, + { + "epoch": 603.0921052631579, + "grad_norm": 1.239275336265564, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 91670 + }, + { + "epoch": 603.1578947368421, + "grad_norm": 1.4684178829193115, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 91680 + }, + { + "epoch": 603.2236842105264, + "grad_norm": 1.0184123516082764, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 91690 + }, + { + "epoch": 603.2894736842105, + "grad_norm": 0.9157920479774475, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 91700 + }, + { + "epoch": 603.3552631578947, + "grad_norm": 1.0523053407669067, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 91710 + }, + { + "epoch": 603.421052631579, + "grad_norm": 1.4203752279281616, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 91720 + }, + { + "epoch": 603.4868421052631, + "grad_norm": 1.1770908832550049, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 91730 + }, + { + "epoch": 603.5526315789474, + "grad_norm": 1.181809425354004, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 91740 + }, + { + "epoch": 603.6184210526316, + "grad_norm": 1.3628486394882202, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 91750 + }, + { + "epoch": 603.6842105263158, + "grad_norm": 1.0041279792785645, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 91760 + }, + { + "epoch": 603.75, + "grad_norm": 1.1971017122268677, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 91770 + }, + { + "epoch": 603.8157894736842, + "grad_norm": 0.9669057726860046, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 91780 + }, + { + "epoch": 603.8815789473684, + "grad_norm": 1.1059627532958984, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 91790 + }, + { + "epoch": 603.9473684210526, + "grad_norm": 1.0041719675064087, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 91800 + }, + { + "epoch": 604.0131578947369, + "grad_norm": 0.8442330360412598, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 91810 + }, + { + "epoch": 604.078947368421, + "grad_norm": 0.9047984480857849, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 91820 + }, + { + "epoch": 604.1447368421053, + "grad_norm": 1.000754714012146, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 91830 + }, + { + "epoch": 604.2105263157895, + "grad_norm": 1.2270958423614502, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 91840 + }, + { + "epoch": 604.2763157894736, + "grad_norm": 0.8679501414299011, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 91850 + }, + { + "epoch": 604.3421052631579, + "grad_norm": 0.7244850397109985, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 91860 + }, + { + "epoch": 604.4078947368421, + "grad_norm": 0.9245374202728271, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 91870 + }, + { + "epoch": 604.4736842105264, + "grad_norm": 0.977651834487915, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 91880 + }, + { + "epoch": 604.5394736842105, + "grad_norm": 0.9714836478233337, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 91890 + }, + { + "epoch": 604.6052631578947, + "grad_norm": 1.11795973777771, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 91900 + }, + { + "epoch": 604.671052631579, + "grad_norm": 1.1345611810684204, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 91910 + }, + { + "epoch": 604.7368421052631, + "grad_norm": 1.1849992275238037, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 91920 + }, + { + "epoch": 604.8026315789474, + "grad_norm": 1.2668788433074951, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 91930 + }, + { + "epoch": 604.8684210526316, + "grad_norm": 1.0477993488311768, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 91940 + }, + { + "epoch": 604.9342105263158, + "grad_norm": 1.0195016860961914, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 91950 + }, + { + "epoch": 605.0, + "grad_norm": 0.9753181338310242, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 91960 + }, + { + "epoch": 605.0657894736842, + "grad_norm": 0.8940220475196838, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 91970 + }, + { + "epoch": 605.1315789473684, + "grad_norm": 0.779227077960968, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 91980 + }, + { + "epoch": 605.1973684210526, + "grad_norm": 1.1637389659881592, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 91990 + }, + { + "epoch": 605.2631578947369, + "grad_norm": 1.1492772102355957, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 92000 + }, + { + "epoch": 605.328947368421, + "grad_norm": 1.265363097190857, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 92010 + }, + { + "epoch": 605.3947368421053, + "grad_norm": 1.1617438793182373, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 92020 + }, + { + "epoch": 605.4605263157895, + "grad_norm": 1.0030673742294312, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 92030 + }, + { + "epoch": 605.5263157894736, + "grad_norm": 1.7177019119262695, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 92040 + }, + { + "epoch": 605.5921052631579, + "grad_norm": 1.1032168865203857, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 92050 + }, + { + "epoch": 605.6578947368421, + "grad_norm": 1.0121941566467285, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 92060 + }, + { + "epoch": 605.7236842105264, + "grad_norm": 1.0975619554519653, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 92070 + }, + { + "epoch": 605.7894736842105, + "grad_norm": 0.9619658589363098, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 92080 + }, + { + "epoch": 605.8552631578947, + "grad_norm": 0.8327913284301758, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 92090 + }, + { + "epoch": 605.921052631579, + "grad_norm": 1.230811357498169, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 92100 + }, + { + "epoch": 605.9868421052631, + "grad_norm": 1.1441431045532227, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 92110 + }, + { + "epoch": 606.0526315789474, + "grad_norm": 0.8227072954177856, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 92120 + }, + { + "epoch": 606.1184210526316, + "grad_norm": 0.9248983860015869, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 92130 + }, + { + "epoch": 606.1842105263158, + "grad_norm": 1.613922119140625, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 92140 + }, + { + "epoch": 606.25, + "grad_norm": 1.2741233110427856, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 92150 + }, + { + "epoch": 606.3157894736842, + "grad_norm": 1.0718578100204468, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 92160 + }, + { + "epoch": 606.3815789473684, + "grad_norm": 1.7179508209228516, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 92170 + }, + { + "epoch": 606.4473684210526, + "grad_norm": 1.6176236867904663, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 92180 + }, + { + "epoch": 606.5131578947369, + "grad_norm": 1.308016300201416, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 92190 + }, + { + "epoch": 606.578947368421, + "grad_norm": 1.2682429552078247, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 92200 + }, + { + "epoch": 606.6447368421053, + "grad_norm": 1.2215003967285156, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 92210 + }, + { + "epoch": 606.7105263157895, + "grad_norm": 1.3663808107376099, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 92220 + }, + { + "epoch": 606.7763157894736, + "grad_norm": 0.8408277630805969, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 92230 + }, + { + "epoch": 606.8421052631579, + "grad_norm": 0.7498778104782104, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 92240 + }, + { + "epoch": 606.9078947368421, + "grad_norm": 1.0909346342086792, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 92250 + }, + { + "epoch": 606.9736842105264, + "grad_norm": 0.9541800618171692, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 92260 + }, + { + "epoch": 607.0394736842105, + "grad_norm": 1.0058972835540771, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 92270 + }, + { + "epoch": 607.1052631578947, + "grad_norm": 0.4746664762496948, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 92280 + }, + { + "epoch": 607.171052631579, + "grad_norm": 0.893031656742096, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 92290 + }, + { + "epoch": 607.2368421052631, + "grad_norm": 1.1034398078918457, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 92300 + }, + { + "epoch": 607.3026315789474, + "grad_norm": 1.0481274127960205, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 92310 + }, + { + "epoch": 607.3684210526316, + "grad_norm": 1.0464096069335938, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 92320 + }, + { + "epoch": 607.4342105263158, + "grad_norm": 1.2462482452392578, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 92330 + }, + { + "epoch": 607.5, + "grad_norm": 1.3764554262161255, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 92340 + }, + { + "epoch": 607.5657894736842, + "grad_norm": 1.0368061065673828, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 92350 + }, + { + "epoch": 607.6315789473684, + "grad_norm": 0.9703469276428223, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 92360 + }, + { + "epoch": 607.6973684210526, + "grad_norm": 1.2898932695388794, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 92370 + }, + { + "epoch": 607.7631578947369, + "grad_norm": 0.900486946105957, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 92380 + }, + { + "epoch": 607.828947368421, + "grad_norm": 1.2100414037704468, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 92390 + }, + { + "epoch": 607.8947368421053, + "grad_norm": 1.1729727983474731, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 92400 + }, + { + "epoch": 607.9605263157895, + "grad_norm": 1.127228856086731, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 92410 + }, + { + "epoch": 608.0263157894736, + "grad_norm": 0.8651881217956543, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 92420 + }, + { + "epoch": 608.0921052631579, + "grad_norm": 0.8712637424468994, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 92430 + }, + { + "epoch": 608.1578947368421, + "grad_norm": 1.1293796300888062, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 92440 + }, + { + "epoch": 608.2236842105264, + "grad_norm": 1.2297700643539429, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 92450 + }, + { + "epoch": 608.2894736842105, + "grad_norm": 1.038426399230957, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 92460 + }, + { + "epoch": 608.3552631578947, + "grad_norm": 1.3792036771774292, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 92470 + }, + { + "epoch": 608.421052631579, + "grad_norm": 1.0525060892105103, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 92480 + }, + { + "epoch": 608.4868421052631, + "grad_norm": 1.0307869911193848, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 92490 + }, + { + "epoch": 608.5526315789474, + "grad_norm": 0.7510896325111389, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 92500 + }, + { + "epoch": 608.6184210526316, + "grad_norm": 0.973735511302948, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 92510 + }, + { + "epoch": 608.6842105263158, + "grad_norm": 1.3305103778839111, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 92520 + }, + { + "epoch": 608.75, + "grad_norm": 0.8219487071037292, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 92530 + }, + { + "epoch": 608.8157894736842, + "grad_norm": 0.836087703704834, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 92540 + }, + { + "epoch": 608.8815789473684, + "grad_norm": 1.397853970527649, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 92550 + }, + { + "epoch": 608.9473684210526, + "grad_norm": 1.1062546968460083, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 92560 + }, + { + "epoch": 609.0131578947369, + "grad_norm": 0.8973669409751892, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 92570 + }, + { + "epoch": 609.078947368421, + "grad_norm": 1.047875165939331, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 92580 + }, + { + "epoch": 609.1447368421053, + "grad_norm": 1.2423298358917236, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 92590 + }, + { + "epoch": 609.2105263157895, + "grad_norm": 0.6068233251571655, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 92600 + }, + { + "epoch": 609.2763157894736, + "grad_norm": 1.0823287963867188, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 92610 + }, + { + "epoch": 609.3421052631579, + "grad_norm": 0.9071270227432251, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 92620 + }, + { + "epoch": 609.4078947368421, + "grad_norm": 0.6887641549110413, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 92630 + }, + { + "epoch": 609.4736842105264, + "grad_norm": 1.3606575727462769, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 92640 + }, + { + "epoch": 609.5394736842105, + "grad_norm": 0.9844822287559509, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 92650 + }, + { + "epoch": 609.6052631578947, + "grad_norm": 1.0416951179504395, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 92660 + }, + { + "epoch": 609.671052631579, + "grad_norm": 1.3930866718292236, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 92670 + }, + { + "epoch": 609.7368421052631, + "grad_norm": 1.41727614402771, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 92680 + }, + { + "epoch": 609.8026315789474, + "grad_norm": 1.0249476432800293, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 92690 + }, + { + "epoch": 609.8684210526316, + "grad_norm": 1.2941230535507202, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 92700 + }, + { + "epoch": 609.9342105263158, + "grad_norm": 1.1592159271240234, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 92710 + }, + { + "epoch": 610.0, + "grad_norm": 1.3490889072418213, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 92720 + }, + { + "epoch": 610.0657894736842, + "grad_norm": 1.1179836988449097, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 92730 + }, + { + "epoch": 610.1315789473684, + "grad_norm": 1.387890100479126, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 92740 + }, + { + "epoch": 610.1973684210526, + "grad_norm": 1.3158421516418457, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 92750 + }, + { + "epoch": 610.2631578947369, + "grad_norm": 1.1624884605407715, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 92760 + }, + { + "epoch": 610.328947368421, + "grad_norm": 0.9471017718315125, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 92770 + }, + { + "epoch": 610.3947368421053, + "grad_norm": 1.1989469528198242, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 92780 + }, + { + "epoch": 610.4605263157895, + "grad_norm": 1.1440564393997192, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 92790 + }, + { + "epoch": 610.5263157894736, + "grad_norm": 1.0892585515975952, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 92800 + }, + { + "epoch": 610.5921052631579, + "grad_norm": 1.0231949090957642, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 92810 + }, + { + "epoch": 610.6578947368421, + "grad_norm": 1.5775789022445679, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 92820 + }, + { + "epoch": 610.7236842105264, + "grad_norm": 1.2640076875686646, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 92830 + }, + { + "epoch": 610.7894736842105, + "grad_norm": 1.052940011024475, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 92840 + }, + { + "epoch": 610.8552631578947, + "grad_norm": 1.000012993812561, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 92850 + }, + { + "epoch": 610.921052631579, + "grad_norm": 1.0184035301208496, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 92860 + }, + { + "epoch": 610.9868421052631, + "grad_norm": 1.1735719442367554, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 92870 + }, + { + "epoch": 611.0526315789474, + "grad_norm": 1.5585322380065918, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 92880 + }, + { + "epoch": 611.1184210526316, + "grad_norm": 1.2368836402893066, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 92890 + }, + { + "epoch": 611.1842105263158, + "grad_norm": 0.9293583035469055, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 92900 + }, + { + "epoch": 611.25, + "grad_norm": 1.3896769285202026, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 92910 + }, + { + "epoch": 611.3157894736842, + "grad_norm": 1.0448602437973022, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 92920 + }, + { + "epoch": 611.3815789473684, + "grad_norm": 1.4385607242584229, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 92930 + }, + { + "epoch": 611.4473684210526, + "grad_norm": 1.271680474281311, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 92940 + }, + { + "epoch": 611.5131578947369, + "grad_norm": 1.3433717489242554, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 92950 + }, + { + "epoch": 611.578947368421, + "grad_norm": 1.157219409942627, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 92960 + }, + { + "epoch": 611.6447368421053, + "grad_norm": 1.4146461486816406, + "learning_rate": 0.0001, + "loss": 0.0162, + "step": 92970 + }, + { + "epoch": 611.7105263157895, + "grad_norm": 1.070226788520813, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 92980 + }, + { + "epoch": 611.7763157894736, + "grad_norm": 1.0087385177612305, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 92990 + }, + { + "epoch": 611.8421052631579, + "grad_norm": 1.1002082824707031, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 93000 + }, + { + "epoch": 611.9078947368421, + "grad_norm": 1.0607959032058716, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 93010 + }, + { + "epoch": 611.9736842105264, + "grad_norm": 0.7643837928771973, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 93020 + }, + { + "epoch": 612.0394736842105, + "grad_norm": 1.0315144062042236, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 93030 + }, + { + "epoch": 612.1052631578947, + "grad_norm": 1.1846758127212524, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 93040 + }, + { + "epoch": 612.171052631579, + "grad_norm": 1.1276209354400635, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 93050 + }, + { + "epoch": 612.2368421052631, + "grad_norm": 1.1060857772827148, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 93060 + }, + { + "epoch": 612.3026315789474, + "grad_norm": 0.8714224696159363, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 93070 + }, + { + "epoch": 612.3684210526316, + "grad_norm": 1.2322101593017578, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 93080 + }, + { + "epoch": 612.4342105263158, + "grad_norm": 1.2291126251220703, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 93090 + }, + { + "epoch": 612.5, + "grad_norm": 1.4525691270828247, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 93100 + }, + { + "epoch": 612.5657894736842, + "grad_norm": 0.7913857698440552, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 93110 + }, + { + "epoch": 612.6315789473684, + "grad_norm": 1.1557925939559937, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 93120 + }, + { + "epoch": 612.6973684210526, + "grad_norm": 0.9045695066452026, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 93130 + }, + { + "epoch": 612.7631578947369, + "grad_norm": 1.2888610363006592, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 93140 + }, + { + "epoch": 612.828947368421, + "grad_norm": 1.0652800798416138, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 93150 + }, + { + "epoch": 612.8947368421053, + "grad_norm": 1.246463656425476, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 93160 + }, + { + "epoch": 612.9605263157895, + "grad_norm": 0.9784409403800964, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 93170 + }, + { + "epoch": 613.0263157894736, + "grad_norm": 1.0943495035171509, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 93180 + }, + { + "epoch": 613.0921052631579, + "grad_norm": 1.1042827367782593, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 93190 + }, + { + "epoch": 613.1578947368421, + "grad_norm": 1.0616756677627563, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 93200 + }, + { + "epoch": 613.2236842105264, + "grad_norm": 1.2859280109405518, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 93210 + }, + { + "epoch": 613.2894736842105, + "grad_norm": 1.3570408821105957, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 93220 + }, + { + "epoch": 613.3552631578947, + "grad_norm": 0.9719215035438538, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 93230 + }, + { + "epoch": 613.421052631579, + "grad_norm": 1.1417118310928345, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 93240 + }, + { + "epoch": 613.4868421052631, + "grad_norm": 1.616381287574768, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 93250 + }, + { + "epoch": 613.5526315789474, + "grad_norm": 1.248619794845581, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 93260 + }, + { + "epoch": 613.6184210526316, + "grad_norm": 1.056206226348877, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 93270 + }, + { + "epoch": 613.6842105263158, + "grad_norm": 0.8425273299217224, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 93280 + }, + { + "epoch": 613.75, + "grad_norm": 0.8737837076187134, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 93290 + }, + { + "epoch": 613.8157894736842, + "grad_norm": 0.8249803185462952, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 93300 + }, + { + "epoch": 613.8815789473684, + "grad_norm": 1.2743202447891235, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 93310 + }, + { + "epoch": 613.9473684210526, + "grad_norm": 0.8276104927062988, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 93320 + }, + { + "epoch": 614.0131578947369, + "grad_norm": 0.7873020768165588, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 93330 + }, + { + "epoch": 614.078947368421, + "grad_norm": 0.9627916812896729, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 93340 + }, + { + "epoch": 614.1447368421053, + "grad_norm": 1.3203983306884766, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 93350 + }, + { + "epoch": 614.2105263157895, + "grad_norm": 0.8902283906936646, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 93360 + }, + { + "epoch": 614.2763157894736, + "grad_norm": 1.3011919260025024, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 93370 + }, + { + "epoch": 614.3421052631579, + "grad_norm": 1.173582673072815, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 93380 + }, + { + "epoch": 614.4078947368421, + "grad_norm": 0.8310754299163818, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 93390 + }, + { + "epoch": 614.4736842105264, + "grad_norm": 1.0314079523086548, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 93400 + }, + { + "epoch": 614.5394736842105, + "grad_norm": 0.9039306640625, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 93410 + }, + { + "epoch": 614.6052631578947, + "grad_norm": 0.8027017116546631, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 93420 + }, + { + "epoch": 614.671052631579, + "grad_norm": 0.8218366503715515, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 93430 + }, + { + "epoch": 614.7368421052631, + "grad_norm": 0.8938267827033997, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 93440 + }, + { + "epoch": 614.8026315789474, + "grad_norm": 0.6793553829193115, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 93450 + }, + { + "epoch": 614.8684210526316, + "grad_norm": 1.1240465641021729, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 93460 + }, + { + "epoch": 614.9342105263158, + "grad_norm": 1.2319480180740356, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 93470 + }, + { + "epoch": 615.0, + "grad_norm": 1.1686429977416992, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 93480 + }, + { + "epoch": 615.0657894736842, + "grad_norm": 1.3799974918365479, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 93490 + }, + { + "epoch": 615.1315789473684, + "grad_norm": 0.9599565863609314, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 93500 + }, + { + "epoch": 615.1973684210526, + "grad_norm": 0.8217589855194092, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 93510 + }, + { + "epoch": 615.2631578947369, + "grad_norm": 1.090279221534729, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 93520 + }, + { + "epoch": 615.328947368421, + "grad_norm": 0.7684426307678223, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 93530 + }, + { + "epoch": 615.3947368421053, + "grad_norm": 1.3158767223358154, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 93540 + }, + { + "epoch": 615.4605263157895, + "grad_norm": 1.1429448127746582, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 93550 + }, + { + "epoch": 615.5263157894736, + "grad_norm": 1.2864338159561157, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 93560 + }, + { + "epoch": 615.5921052631579, + "grad_norm": 1.0945682525634766, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 93570 + }, + { + "epoch": 615.6578947368421, + "grad_norm": 0.966946542263031, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 93580 + }, + { + "epoch": 615.7236842105264, + "grad_norm": 0.7336428761482239, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 93590 + }, + { + "epoch": 615.7894736842105, + "grad_norm": 0.6933264136314392, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 93600 + }, + { + "epoch": 615.8552631578947, + "grad_norm": 1.118443489074707, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 93610 + }, + { + "epoch": 615.921052631579, + "grad_norm": 1.2565184831619263, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 93620 + }, + { + "epoch": 615.9868421052631, + "grad_norm": 1.2820919752120972, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 93630 + }, + { + "epoch": 616.0526315789474, + "grad_norm": 1.0800106525421143, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 93640 + }, + { + "epoch": 616.1184210526316, + "grad_norm": 1.172425627708435, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 93650 + }, + { + "epoch": 616.1842105263158, + "grad_norm": 1.2287852764129639, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 93660 + }, + { + "epoch": 616.25, + "grad_norm": 1.2166169881820679, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 93670 + }, + { + "epoch": 616.3157894736842, + "grad_norm": 0.8677031397819519, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 93680 + }, + { + "epoch": 616.3815789473684, + "grad_norm": 1.3982559442520142, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 93690 + }, + { + "epoch": 616.4473684210526, + "grad_norm": 1.3162497282028198, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 93700 + }, + { + "epoch": 616.5131578947369, + "grad_norm": 1.3209203481674194, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 93710 + }, + { + "epoch": 616.578947368421, + "grad_norm": 1.223204493522644, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 93720 + }, + { + "epoch": 616.6447368421053, + "grad_norm": 1.7229893207550049, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 93730 + }, + { + "epoch": 616.7105263157895, + "grad_norm": 1.5067758560180664, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 93740 + }, + { + "epoch": 616.7763157894736, + "grad_norm": 1.7268178462982178, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 93750 + }, + { + "epoch": 616.8421052631579, + "grad_norm": 1.096575140953064, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 93760 + }, + { + "epoch": 616.9078947368421, + "grad_norm": 1.145380973815918, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 93770 + }, + { + "epoch": 616.9736842105264, + "grad_norm": 1.3325169086456299, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 93780 + }, + { + "epoch": 617.0394736842105, + "grad_norm": 1.6675764322280884, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 93790 + }, + { + "epoch": 617.1052631578947, + "grad_norm": 1.2384144067764282, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 93800 + }, + { + "epoch": 617.171052631579, + "grad_norm": 1.222592830657959, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 93810 + }, + { + "epoch": 617.2368421052631, + "grad_norm": 1.2035316228866577, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 93820 + }, + { + "epoch": 617.3026315789474, + "grad_norm": 1.2100892066955566, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 93830 + }, + { + "epoch": 617.3684210526316, + "grad_norm": 1.365777850151062, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 93840 + }, + { + "epoch": 617.4342105263158, + "grad_norm": 1.5900193452835083, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 93850 + }, + { + "epoch": 617.5, + "grad_norm": 1.3592753410339355, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 93860 + }, + { + "epoch": 617.5657894736842, + "grad_norm": 1.417954683303833, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 93870 + }, + { + "epoch": 617.6315789473684, + "grad_norm": 1.517095923423767, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 93880 + }, + { + "epoch": 617.6973684210526, + "grad_norm": 1.3372291326522827, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 93890 + }, + { + "epoch": 617.7631578947369, + "grad_norm": 1.4415357112884521, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 93900 + }, + { + "epoch": 617.828947368421, + "grad_norm": 0.986171543598175, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 93910 + }, + { + "epoch": 617.8947368421053, + "grad_norm": 1.1450518369674683, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 93920 + }, + { + "epoch": 617.9605263157895, + "grad_norm": 1.439809799194336, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 93930 + }, + { + "epoch": 618.0263157894736, + "grad_norm": 1.4576247930526733, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 93940 + }, + { + "epoch": 618.0921052631579, + "grad_norm": 1.307482361793518, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 93950 + }, + { + "epoch": 618.1578947368421, + "grad_norm": 1.5274423360824585, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 93960 + }, + { + "epoch": 618.2236842105264, + "grad_norm": 1.0210192203521729, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 93970 + }, + { + "epoch": 618.2894736842105, + "grad_norm": 1.1291130781173706, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 93980 + }, + { + "epoch": 618.3552631578947, + "grad_norm": 0.7608104944229126, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 93990 + }, + { + "epoch": 618.421052631579, + "grad_norm": 0.9358824491500854, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 94000 + }, + { + "epoch": 618.4868421052631, + "grad_norm": 0.9126713871955872, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 94010 + }, + { + "epoch": 618.5526315789474, + "grad_norm": 0.9021777510643005, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 94020 + }, + { + "epoch": 618.6184210526316, + "grad_norm": 0.9748514890670776, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 94030 + }, + { + "epoch": 618.6842105263158, + "grad_norm": 0.7533281445503235, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 94040 + }, + { + "epoch": 618.75, + "grad_norm": 0.7163051962852478, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 94050 + }, + { + "epoch": 618.8157894736842, + "grad_norm": 1.0862754583358765, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 94060 + }, + { + "epoch": 618.8815789473684, + "grad_norm": 0.8490597009658813, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 94070 + }, + { + "epoch": 618.9473684210526, + "grad_norm": 1.2494447231292725, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 94080 + }, + { + "epoch": 619.0131578947369, + "grad_norm": 1.0552870035171509, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 94090 + }, + { + "epoch": 619.078947368421, + "grad_norm": 0.8766233921051025, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 94100 + }, + { + "epoch": 619.1447368421053, + "grad_norm": 1.181583285331726, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 94110 + }, + { + "epoch": 619.2105263157895, + "grad_norm": 1.25386381149292, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 94120 + }, + { + "epoch": 619.2763157894736, + "grad_norm": 1.0907665491104126, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 94130 + }, + { + "epoch": 619.3421052631579, + "grad_norm": 1.258827567100525, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 94140 + }, + { + "epoch": 619.4078947368421, + "grad_norm": 1.2584177255630493, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 94150 + }, + { + "epoch": 619.4736842105264, + "grad_norm": 1.2424556016921997, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 94160 + }, + { + "epoch": 619.5394736842105, + "grad_norm": 1.395603060722351, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 94170 + }, + { + "epoch": 619.6052631578947, + "grad_norm": 0.9532821774482727, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 94180 + }, + { + "epoch": 619.671052631579, + "grad_norm": 1.263140082359314, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 94190 + }, + { + "epoch": 619.7368421052631, + "grad_norm": 0.8259878754615784, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 94200 + }, + { + "epoch": 619.8026315789474, + "grad_norm": 1.4339922666549683, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 94210 + }, + { + "epoch": 619.8684210526316, + "grad_norm": 0.9723814725875854, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 94220 + }, + { + "epoch": 619.9342105263158, + "grad_norm": 0.9586044549942017, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 94230 + }, + { + "epoch": 620.0, + "grad_norm": 0.9816604852676392, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 94240 + }, + { + "epoch": 620.0657894736842, + "grad_norm": 0.8294615149497986, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 94250 + }, + { + "epoch": 620.1315789473684, + "grad_norm": 0.8866937160491943, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 94260 + }, + { + "epoch": 620.1973684210526, + "grad_norm": 0.9527875781059265, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 94270 + }, + { + "epoch": 620.2631578947369, + "grad_norm": 1.1666209697723389, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 94280 + }, + { + "epoch": 620.328947368421, + "grad_norm": 0.9513602256774902, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 94290 + }, + { + "epoch": 620.3947368421053, + "grad_norm": 0.7834760546684265, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 94300 + }, + { + "epoch": 620.4605263157895, + "grad_norm": 0.9751595258712769, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 94310 + }, + { + "epoch": 620.5263157894736, + "grad_norm": 1.2259643077850342, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 94320 + }, + { + "epoch": 620.5921052631579, + "grad_norm": 0.9753211736679077, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 94330 + }, + { + "epoch": 620.6578947368421, + "grad_norm": 1.5966567993164062, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 94340 + }, + { + "epoch": 620.7236842105264, + "grad_norm": 1.2838525772094727, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 94350 + }, + { + "epoch": 620.7894736842105, + "grad_norm": 1.416764497756958, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 94360 + }, + { + "epoch": 620.8552631578947, + "grad_norm": 1.317114233970642, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 94370 + }, + { + "epoch": 620.921052631579, + "grad_norm": 1.6000310182571411, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 94380 + }, + { + "epoch": 620.9868421052631, + "grad_norm": 1.296604037284851, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 94390 + }, + { + "epoch": 621.0526315789474, + "grad_norm": 1.1899832487106323, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 94400 + }, + { + "epoch": 621.1184210526316, + "grad_norm": 1.3651827573776245, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 94410 + }, + { + "epoch": 621.1842105263158, + "grad_norm": 1.230488657951355, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 94420 + }, + { + "epoch": 621.25, + "grad_norm": 1.2421315908432007, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 94430 + }, + { + "epoch": 621.3157894736842, + "grad_norm": 1.5289520025253296, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 94440 + }, + { + "epoch": 621.3815789473684, + "grad_norm": 1.2667958736419678, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 94450 + }, + { + "epoch": 621.4473684210526, + "grad_norm": 1.2462661266326904, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 94460 + }, + { + "epoch": 621.5131578947369, + "grad_norm": 1.1925386190414429, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 94470 + }, + { + "epoch": 621.578947368421, + "grad_norm": 0.7232915759086609, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 94480 + }, + { + "epoch": 621.6447368421053, + "grad_norm": 0.9784261584281921, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 94490 + }, + { + "epoch": 621.7105263157895, + "grad_norm": 0.8584251999855042, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 94500 + }, + { + "epoch": 621.7763157894736, + "grad_norm": 1.0461781024932861, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 94510 + }, + { + "epoch": 621.8421052631579, + "grad_norm": 0.9463827013969421, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 94520 + }, + { + "epoch": 621.9078947368421, + "grad_norm": 0.9517367482185364, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 94530 + }, + { + "epoch": 621.9736842105264, + "grad_norm": 1.1361017227172852, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 94540 + }, + { + "epoch": 622.0394736842105, + "grad_norm": 1.1935226917266846, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 94550 + }, + { + "epoch": 622.1052631578947, + "grad_norm": 1.004470705986023, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 94560 + }, + { + "epoch": 622.171052631579, + "grad_norm": 1.2536550760269165, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 94570 + }, + { + "epoch": 622.2368421052631, + "grad_norm": 1.262121319770813, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 94580 + }, + { + "epoch": 622.3026315789474, + "grad_norm": 1.241873860359192, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 94590 + }, + { + "epoch": 622.3684210526316, + "grad_norm": 0.9200069308280945, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 94600 + }, + { + "epoch": 622.4342105263158, + "grad_norm": 0.8964002132415771, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 94610 + }, + { + "epoch": 622.5, + "grad_norm": 1.2735228538513184, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 94620 + }, + { + "epoch": 622.5657894736842, + "grad_norm": 1.320853590965271, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 94630 + }, + { + "epoch": 622.6315789473684, + "grad_norm": 1.3654546737670898, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 94640 + }, + { + "epoch": 622.6973684210526, + "grad_norm": 0.8524348139762878, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 94650 + }, + { + "epoch": 622.7631578947369, + "grad_norm": 1.1246964931488037, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 94660 + }, + { + "epoch": 622.828947368421, + "grad_norm": 1.0840219259262085, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 94670 + }, + { + "epoch": 622.8947368421053, + "grad_norm": 0.9175322651863098, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 94680 + }, + { + "epoch": 622.9605263157895, + "grad_norm": 0.6897749900817871, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 94690 + }, + { + "epoch": 623.0263157894736, + "grad_norm": 1.313354730606079, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 94700 + }, + { + "epoch": 623.0921052631579, + "grad_norm": 0.9614347815513611, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 94710 + }, + { + "epoch": 623.1578947368421, + "grad_norm": 1.1975575685501099, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 94720 + }, + { + "epoch": 623.2236842105264, + "grad_norm": 1.0742084980010986, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 94730 + }, + { + "epoch": 623.2894736842105, + "grad_norm": 0.9996278882026672, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 94740 + }, + { + "epoch": 623.3552631578947, + "grad_norm": 1.3011069297790527, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 94750 + }, + { + "epoch": 623.421052631579, + "grad_norm": 0.7885226011276245, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 94760 + }, + { + "epoch": 623.4868421052631, + "grad_norm": 0.9483335018157959, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 94770 + }, + { + "epoch": 623.5526315789474, + "grad_norm": 1.2516369819641113, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 94780 + }, + { + "epoch": 623.6184210526316, + "grad_norm": 1.041446566581726, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 94790 + }, + { + "epoch": 623.6842105263158, + "grad_norm": 1.1910645961761475, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 94800 + }, + { + "epoch": 623.75, + "grad_norm": 1.1067719459533691, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 94810 + }, + { + "epoch": 623.8157894736842, + "grad_norm": 1.0154963731765747, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 94820 + }, + { + "epoch": 623.8815789473684, + "grad_norm": 1.08232843875885, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 94830 + }, + { + "epoch": 623.9473684210526, + "grad_norm": 1.1486544609069824, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 94840 + }, + { + "epoch": 624.0131578947369, + "grad_norm": 1.1820085048675537, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 94850 + }, + { + "epoch": 624.078947368421, + "grad_norm": 1.095815658569336, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 94860 + }, + { + "epoch": 624.1447368421053, + "grad_norm": 0.9863499999046326, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 94870 + }, + { + "epoch": 624.2105263157895, + "grad_norm": 1.2064094543457031, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 94880 + }, + { + "epoch": 624.2763157894736, + "grad_norm": 0.7593415379524231, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 94890 + }, + { + "epoch": 624.3421052631579, + "grad_norm": 0.7509251832962036, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 94900 + }, + { + "epoch": 624.4078947368421, + "grad_norm": 0.8254883289337158, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 94910 + }, + { + "epoch": 624.4736842105264, + "grad_norm": 1.2074943780899048, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 94920 + }, + { + "epoch": 624.5394736842105, + "grad_norm": 1.1988282203674316, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 94930 + }, + { + "epoch": 624.6052631578947, + "grad_norm": 1.4289294481277466, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 94940 + }, + { + "epoch": 624.671052631579, + "grad_norm": 1.4623303413391113, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 94950 + }, + { + "epoch": 624.7368421052631, + "grad_norm": 1.0225207805633545, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 94960 + }, + { + "epoch": 624.8026315789474, + "grad_norm": 1.1724567413330078, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 94970 + }, + { + "epoch": 624.8684210526316, + "grad_norm": 1.0636459589004517, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 94980 + }, + { + "epoch": 624.9342105263158, + "grad_norm": 1.551671028137207, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 94990 + }, + { + "epoch": 625.0, + "grad_norm": 1.9914339780807495, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 95000 + }, + { + "epoch": 625.0657894736842, + "grad_norm": 1.3119971752166748, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 95010 + }, + { + "epoch": 625.1315789473684, + "grad_norm": 1.2091944217681885, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 95020 + }, + { + "epoch": 625.1973684210526, + "grad_norm": 1.333477258682251, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 95030 + }, + { + "epoch": 625.2631578947369, + "grad_norm": 1.5082767009735107, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 95040 + }, + { + "epoch": 625.328947368421, + "grad_norm": 1.2716140747070312, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 95050 + }, + { + "epoch": 625.3947368421053, + "grad_norm": 1.2596821784973145, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 95060 + }, + { + "epoch": 625.4605263157895, + "grad_norm": 1.198418140411377, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 95070 + }, + { + "epoch": 625.5263157894736, + "grad_norm": 1.1842753887176514, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 95080 + }, + { + "epoch": 625.5921052631579, + "grad_norm": 1.162773847579956, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 95090 + }, + { + "epoch": 625.6578947368421, + "grad_norm": 0.8941752910614014, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 95100 + }, + { + "epoch": 625.7236842105264, + "grad_norm": 0.9903761744499207, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 95110 + }, + { + "epoch": 625.7894736842105, + "grad_norm": 1.0713928937911987, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 95120 + }, + { + "epoch": 625.8552631578947, + "grad_norm": 1.1026970148086548, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 95130 + }, + { + "epoch": 625.921052631579, + "grad_norm": 1.091262936592102, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 95140 + }, + { + "epoch": 625.9868421052631, + "grad_norm": 1.0287009477615356, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 95150 + }, + { + "epoch": 626.0526315789474, + "grad_norm": 0.9312505125999451, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 95160 + }, + { + "epoch": 626.1184210526316, + "grad_norm": 1.1395642757415771, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 95170 + }, + { + "epoch": 626.1842105263158, + "grad_norm": 1.1278079748153687, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 95180 + }, + { + "epoch": 626.25, + "grad_norm": 1.112074613571167, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 95190 + }, + { + "epoch": 626.3157894736842, + "grad_norm": 1.41941499710083, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 95200 + }, + { + "epoch": 626.3815789473684, + "grad_norm": 1.5267459154129028, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 95210 + }, + { + "epoch": 626.4473684210526, + "grad_norm": 1.2732293605804443, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 95220 + }, + { + "epoch": 626.5131578947369, + "grad_norm": 1.3131200075149536, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 95230 + }, + { + "epoch": 626.578947368421, + "grad_norm": 1.115815281867981, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 95240 + }, + { + "epoch": 626.6447368421053, + "grad_norm": 1.6979179382324219, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 95250 + }, + { + "epoch": 626.7105263157895, + "grad_norm": 1.280093789100647, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 95260 + }, + { + "epoch": 626.7763157894736, + "grad_norm": 1.349902629852295, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 95270 + }, + { + "epoch": 626.8421052631579, + "grad_norm": 1.4500685930252075, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 95280 + }, + { + "epoch": 626.9078947368421, + "grad_norm": 1.2372456789016724, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 95290 + }, + { + "epoch": 626.9736842105264, + "grad_norm": 1.7116293907165527, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 95300 + }, + { + "epoch": 627.0394736842105, + "grad_norm": 1.3504589796066284, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 95310 + }, + { + "epoch": 627.1052631578947, + "grad_norm": 1.3713061809539795, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 95320 + }, + { + "epoch": 627.171052631579, + "grad_norm": 1.5668679475784302, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 95330 + }, + { + "epoch": 627.2368421052631, + "grad_norm": 1.5698161125183105, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 95340 + }, + { + "epoch": 627.3026315789474, + "grad_norm": 1.002941370010376, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 95350 + }, + { + "epoch": 627.3684210526316, + "grad_norm": 1.125559687614441, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 95360 + }, + { + "epoch": 627.4342105263158, + "grad_norm": 0.9617181420326233, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 95370 + }, + { + "epoch": 627.5, + "grad_norm": 0.9981349110603333, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 95380 + }, + { + "epoch": 627.5657894736842, + "grad_norm": 1.1463162899017334, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 95390 + }, + { + "epoch": 627.6315789473684, + "grad_norm": 1.363034725189209, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 95400 + }, + { + "epoch": 627.6973684210526, + "grad_norm": 1.0812790393829346, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 95410 + }, + { + "epoch": 627.7631578947369, + "grad_norm": 0.9944791793823242, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 95420 + }, + { + "epoch": 627.828947368421, + "grad_norm": 1.1499660015106201, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 95430 + }, + { + "epoch": 627.8947368421053, + "grad_norm": 1.1649742126464844, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 95440 + }, + { + "epoch": 627.9605263157895, + "grad_norm": 0.9862028360366821, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 95450 + }, + { + "epoch": 628.0263157894736, + "grad_norm": 1.0337918996810913, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 95460 + }, + { + "epoch": 628.0921052631579, + "grad_norm": 1.1768397092819214, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 95470 + }, + { + "epoch": 628.1578947368421, + "grad_norm": 1.1246834993362427, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 95480 + }, + { + "epoch": 628.2236842105264, + "grad_norm": 0.6552825570106506, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 95490 + }, + { + "epoch": 628.2894736842105, + "grad_norm": 0.9042338728904724, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 95500 + }, + { + "epoch": 628.3552631578947, + "grad_norm": 1.1680855751037598, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 95510 + }, + { + "epoch": 628.421052631579, + "grad_norm": 1.1862854957580566, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 95520 + }, + { + "epoch": 628.4868421052631, + "grad_norm": 0.9259408712387085, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 95530 + }, + { + "epoch": 628.5526315789474, + "grad_norm": 1.0121794939041138, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 95540 + }, + { + "epoch": 628.6184210526316, + "grad_norm": 1.021821141242981, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 95550 + }, + { + "epoch": 628.6842105263158, + "grad_norm": 1.0887248516082764, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 95560 + }, + { + "epoch": 628.75, + "grad_norm": 1.1664204597473145, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 95570 + }, + { + "epoch": 628.8157894736842, + "grad_norm": 0.9954600930213928, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 95580 + }, + { + "epoch": 628.8815789473684, + "grad_norm": 1.174320101737976, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 95590 + }, + { + "epoch": 628.9473684210526, + "grad_norm": 1.5383800268173218, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 95600 + }, + { + "epoch": 629.0131578947369, + "grad_norm": 1.5063879489898682, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 95610 + }, + { + "epoch": 629.078947368421, + "grad_norm": 1.2808583974838257, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 95620 + }, + { + "epoch": 629.1447368421053, + "grad_norm": 1.3587077856063843, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 95630 + }, + { + "epoch": 629.2105263157895, + "grad_norm": 1.1514275074005127, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 95640 + }, + { + "epoch": 629.2763157894736, + "grad_norm": 1.3568569421768188, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 95650 + }, + { + "epoch": 629.3421052631579, + "grad_norm": 1.1111990213394165, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 95660 + }, + { + "epoch": 629.4078947368421, + "grad_norm": 1.160679578781128, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 95670 + }, + { + "epoch": 629.4736842105264, + "grad_norm": 0.875325083732605, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 95680 + }, + { + "epoch": 629.5394736842105, + "grad_norm": 0.676986575126648, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 95690 + }, + { + "epoch": 629.6052631578947, + "grad_norm": 1.0533658266067505, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 95700 + }, + { + "epoch": 629.671052631579, + "grad_norm": 1.126704454421997, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 95710 + }, + { + "epoch": 629.7368421052631, + "grad_norm": 0.875869631767273, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 95720 + }, + { + "epoch": 629.8026315789474, + "grad_norm": 0.843349277973175, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 95730 + }, + { + "epoch": 629.8684210526316, + "grad_norm": 1.0324947834014893, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 95740 + }, + { + "epoch": 629.9342105263158, + "grad_norm": 1.0827348232269287, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 95750 + }, + { + "epoch": 630.0, + "grad_norm": 0.8079311847686768, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 95760 + }, + { + "epoch": 630.0657894736842, + "grad_norm": 1.0577661991119385, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 95770 + }, + { + "epoch": 630.1315789473684, + "grad_norm": 1.0803674459457397, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 95780 + }, + { + "epoch": 630.1973684210526, + "grad_norm": 1.1568626165390015, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 95790 + }, + { + "epoch": 630.2631578947369, + "grad_norm": 0.7139347195625305, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 95800 + }, + { + "epoch": 630.328947368421, + "grad_norm": 1.54314386844635, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 95810 + }, + { + "epoch": 630.3947368421053, + "grad_norm": 0.8538906574249268, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 95820 + }, + { + "epoch": 630.4605263157895, + "grad_norm": 1.0766382217407227, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 95830 + }, + { + "epoch": 630.5263157894736, + "grad_norm": 0.9496257901191711, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 95840 + }, + { + "epoch": 630.5921052631579, + "grad_norm": 1.2234865427017212, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 95850 + }, + { + "epoch": 630.6578947368421, + "grad_norm": 0.8329818248748779, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 95860 + }, + { + "epoch": 630.7236842105264, + "grad_norm": 1.100710153579712, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 95870 + }, + { + "epoch": 630.7894736842105, + "grad_norm": 0.9329621195793152, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 95880 + }, + { + "epoch": 630.8552631578947, + "grad_norm": 1.2335596084594727, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 95890 + }, + { + "epoch": 630.921052631579, + "grad_norm": 1.0950738191604614, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 95900 + }, + { + "epoch": 630.9868421052631, + "grad_norm": 0.9743636846542358, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 95910 + }, + { + "epoch": 631.0526315789474, + "grad_norm": 0.9001186490058899, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 95920 + }, + { + "epoch": 631.1184210526316, + "grad_norm": 1.3222036361694336, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 95930 + }, + { + "epoch": 631.1842105263158, + "grad_norm": 1.2145307064056396, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 95940 + }, + { + "epoch": 631.25, + "grad_norm": 1.1518114805221558, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 95950 + }, + { + "epoch": 631.3157894736842, + "grad_norm": 1.0182396173477173, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 95960 + }, + { + "epoch": 631.3815789473684, + "grad_norm": 0.998622477054596, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 95970 + }, + { + "epoch": 631.4473684210526, + "grad_norm": 0.9129418730735779, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 95980 + }, + { + "epoch": 631.5131578947369, + "grad_norm": 0.8914371728897095, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 95990 + }, + { + "epoch": 631.578947368421, + "grad_norm": 1.324182152748108, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 96000 + }, + { + "epoch": 631.6447368421053, + "grad_norm": 1.3271923065185547, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 96010 + }, + { + "epoch": 631.7105263157895, + "grad_norm": 1.2438828945159912, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 96020 + }, + { + "epoch": 631.7763157894736, + "grad_norm": 0.808535635471344, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 96030 + }, + { + "epoch": 631.8421052631579, + "grad_norm": 1.3767280578613281, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 96040 + }, + { + "epoch": 631.9078947368421, + "grad_norm": 1.3440818786621094, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 96050 + }, + { + "epoch": 631.9736842105264, + "grad_norm": 1.1138218641281128, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 96060 + }, + { + "epoch": 632.0394736842105, + "grad_norm": 0.868145227432251, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 96070 + }, + { + "epoch": 632.1052631578947, + "grad_norm": 1.3606065511703491, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 96080 + }, + { + "epoch": 632.171052631579, + "grad_norm": 1.3854315280914307, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 96090 + }, + { + "epoch": 632.2368421052631, + "grad_norm": 1.2756513357162476, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 96100 + }, + { + "epoch": 632.3026315789474, + "grad_norm": 1.1794759035110474, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 96110 + }, + { + "epoch": 632.3684210526316, + "grad_norm": 1.2816321849822998, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 96120 + }, + { + "epoch": 632.4342105263158, + "grad_norm": 1.0967599153518677, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 96130 + }, + { + "epoch": 632.5, + "grad_norm": 1.3079113960266113, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 96140 + }, + { + "epoch": 632.5657894736842, + "grad_norm": 1.0151442289352417, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 96150 + }, + { + "epoch": 632.6315789473684, + "grad_norm": 2.014241933822632, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 96160 + }, + { + "epoch": 632.6973684210526, + "grad_norm": 1.0446006059646606, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 96170 + }, + { + "epoch": 632.7631578947369, + "grad_norm": 1.2573105096817017, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 96180 + }, + { + "epoch": 632.828947368421, + "grad_norm": 1.1705162525177002, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 96190 + }, + { + "epoch": 632.8947368421053, + "grad_norm": 1.3962786197662354, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 96200 + }, + { + "epoch": 632.9605263157895, + "grad_norm": 1.2889972925186157, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 96210 + }, + { + "epoch": 633.0263157894736, + "grad_norm": 1.30446457862854, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 96220 + }, + { + "epoch": 633.0921052631579, + "grad_norm": 1.0984090566635132, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 96230 + }, + { + "epoch": 633.1578947368421, + "grad_norm": 1.0214024782180786, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 96240 + }, + { + "epoch": 633.2236842105264, + "grad_norm": 1.3582532405853271, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 96250 + }, + { + "epoch": 633.2894736842105, + "grad_norm": 1.1658719778060913, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 96260 + }, + { + "epoch": 633.3552631578947, + "grad_norm": 1.1683430671691895, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 96270 + }, + { + "epoch": 633.421052631579, + "grad_norm": 1.399514079093933, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 96280 + }, + { + "epoch": 633.4868421052631, + "grad_norm": 1.0703054666519165, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 96290 + }, + { + "epoch": 633.5526315789474, + "grad_norm": 1.1495949029922485, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 96300 + }, + { + "epoch": 633.6184210526316, + "grad_norm": 1.1547905206680298, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 96310 + }, + { + "epoch": 633.6842105263158, + "grad_norm": 1.3389787673950195, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 96320 + }, + { + "epoch": 633.75, + "grad_norm": 0.622372031211853, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 96330 + }, + { + "epoch": 633.8157894736842, + "grad_norm": 0.7797669768333435, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 96340 + }, + { + "epoch": 633.8815789473684, + "grad_norm": 1.225666880607605, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 96350 + }, + { + "epoch": 633.9473684210526, + "grad_norm": 1.2764816284179688, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 96360 + }, + { + "epoch": 634.0131578947369, + "grad_norm": 1.3104716539382935, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 96370 + }, + { + "epoch": 634.078947368421, + "grad_norm": 1.122225046157837, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 96380 + }, + { + "epoch": 634.1447368421053, + "grad_norm": 1.331106424331665, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 96390 + }, + { + "epoch": 634.2105263157895, + "grad_norm": 1.1977465152740479, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 96400 + }, + { + "epoch": 634.2763157894736, + "grad_norm": 1.4676520824432373, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 96410 + }, + { + "epoch": 634.3421052631579, + "grad_norm": 1.1884233951568604, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 96420 + }, + { + "epoch": 634.4078947368421, + "grad_norm": 1.448885440826416, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 96430 + }, + { + "epoch": 634.4736842105264, + "grad_norm": 1.1831235885620117, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 96440 + }, + { + "epoch": 634.5394736842105, + "grad_norm": 1.1921072006225586, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 96450 + }, + { + "epoch": 634.6052631578947, + "grad_norm": 0.9832690954208374, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 96460 + }, + { + "epoch": 634.671052631579, + "grad_norm": 1.3597534894943237, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 96470 + }, + { + "epoch": 634.7368421052631, + "grad_norm": 1.2656805515289307, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 96480 + }, + { + "epoch": 634.8026315789474, + "grad_norm": 0.9572046995162964, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 96490 + }, + { + "epoch": 634.8684210526316, + "grad_norm": 1.0890827178955078, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 96500 + }, + { + "epoch": 634.9342105263158, + "grad_norm": 1.2112818956375122, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 96510 + }, + { + "epoch": 635.0, + "grad_norm": 1.3493852615356445, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 96520 + }, + { + "epoch": 635.0657894736842, + "grad_norm": 0.8873164057731628, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 96530 + }, + { + "epoch": 635.1315789473684, + "grad_norm": 1.3361443281173706, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 96540 + }, + { + "epoch": 635.1973684210526, + "grad_norm": 1.2855825424194336, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 96550 + }, + { + "epoch": 635.2631578947369, + "grad_norm": 1.0987671613693237, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 96560 + }, + { + "epoch": 635.328947368421, + "grad_norm": 0.7303910255432129, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 96570 + }, + { + "epoch": 635.3947368421053, + "grad_norm": 1.2419792413711548, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 96580 + }, + { + "epoch": 635.4605263157895, + "grad_norm": 1.311336874961853, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 96590 + }, + { + "epoch": 635.5263157894736, + "grad_norm": 1.0877469778060913, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 96600 + }, + { + "epoch": 635.5921052631579, + "grad_norm": 1.0030274391174316, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 96610 + }, + { + "epoch": 635.6578947368421, + "grad_norm": 1.3998702764511108, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 96620 + }, + { + "epoch": 635.7236842105264, + "grad_norm": 0.7409038543701172, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 96630 + }, + { + "epoch": 635.7894736842105, + "grad_norm": 0.9985183477401733, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 96640 + }, + { + "epoch": 635.8552631578947, + "grad_norm": 1.0499337911605835, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 96650 + }, + { + "epoch": 635.921052631579, + "grad_norm": 0.9747737050056458, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 96660 + }, + { + "epoch": 635.9868421052631, + "grad_norm": 1.1201547384262085, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 96670 + }, + { + "epoch": 636.0526315789474, + "grad_norm": 1.0508447885513306, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 96680 + }, + { + "epoch": 636.1184210526316, + "grad_norm": 0.8244642019271851, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 96690 + }, + { + "epoch": 636.1842105263158, + "grad_norm": 1.1868029832839966, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 96700 + }, + { + "epoch": 636.25, + "grad_norm": 1.2624951601028442, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 96710 + }, + { + "epoch": 636.3157894736842, + "grad_norm": 1.0682224035263062, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 96720 + }, + { + "epoch": 636.3815789473684, + "grad_norm": 1.52485990524292, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 96730 + }, + { + "epoch": 636.4473684210526, + "grad_norm": 1.0763731002807617, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 96740 + }, + { + "epoch": 636.5131578947369, + "grad_norm": 1.3827406167984009, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 96750 + }, + { + "epoch": 636.578947368421, + "grad_norm": 1.2517787218093872, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 96760 + }, + { + "epoch": 636.6447368421053, + "grad_norm": 1.0848556756973267, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 96770 + }, + { + "epoch": 636.7105263157895, + "grad_norm": 0.9772914052009583, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 96780 + }, + { + "epoch": 636.7763157894736, + "grad_norm": 1.1397275924682617, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 96790 + }, + { + "epoch": 636.8421052631579, + "grad_norm": 0.940963089466095, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 96800 + }, + { + "epoch": 636.9078947368421, + "grad_norm": 0.8980696797370911, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 96810 + }, + { + "epoch": 636.9736842105264, + "grad_norm": 0.9884552359580994, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 96820 + }, + { + "epoch": 637.0394736842105, + "grad_norm": 1.1750383377075195, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 96830 + }, + { + "epoch": 637.1052631578947, + "grad_norm": 1.1034425497055054, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 96840 + }, + { + "epoch": 637.171052631579, + "grad_norm": 0.7074925899505615, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 96850 + }, + { + "epoch": 637.2368421052631, + "grad_norm": 0.977379322052002, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 96860 + }, + { + "epoch": 637.3026315789474, + "grad_norm": 0.942234456539154, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 96870 + }, + { + "epoch": 637.3684210526316, + "grad_norm": 1.336562156677246, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 96880 + }, + { + "epoch": 637.4342105263158, + "grad_norm": 1.1461960077285767, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 96890 + }, + { + "epoch": 637.5, + "grad_norm": 1.023284912109375, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 96900 + }, + { + "epoch": 637.5657894736842, + "grad_norm": 1.1981929540634155, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 96910 + }, + { + "epoch": 637.6315789473684, + "grad_norm": 1.3196550607681274, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 96920 + }, + { + "epoch": 637.6973684210526, + "grad_norm": 1.042585015296936, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 96930 + }, + { + "epoch": 637.7631578947369, + "grad_norm": 0.9002341628074646, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 96940 + }, + { + "epoch": 637.828947368421, + "grad_norm": 0.8911952376365662, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 96950 + }, + { + "epoch": 637.8947368421053, + "grad_norm": 1.2505193948745728, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 96960 + }, + { + "epoch": 637.9605263157895, + "grad_norm": 1.0482158660888672, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 96970 + }, + { + "epoch": 638.0263157894736, + "grad_norm": 1.1949135065078735, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 96980 + }, + { + "epoch": 638.0921052631579, + "grad_norm": 1.1491917371749878, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 96990 + }, + { + "epoch": 638.1578947368421, + "grad_norm": 1.0446337461471558, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 97000 + }, + { + "epoch": 638.2236842105264, + "grad_norm": 1.1364582777023315, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 97010 + }, + { + "epoch": 638.2894736842105, + "grad_norm": 0.856816291809082, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 97020 + }, + { + "epoch": 638.3552631578947, + "grad_norm": 0.8455696702003479, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 97030 + }, + { + "epoch": 638.421052631579, + "grad_norm": 1.37261164188385, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 97040 + }, + { + "epoch": 638.4868421052631, + "grad_norm": 1.065354585647583, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 97050 + }, + { + "epoch": 638.5526315789474, + "grad_norm": 1.0042915344238281, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 97060 + }, + { + "epoch": 638.6184210526316, + "grad_norm": 0.9727343320846558, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 97070 + }, + { + "epoch": 638.6842105263158, + "grad_norm": 0.724888026714325, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 97080 + }, + { + "epoch": 638.75, + "grad_norm": 1.3290162086486816, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 97090 + }, + { + "epoch": 638.8157894736842, + "grad_norm": 1.107700228691101, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 97100 + }, + { + "epoch": 638.8815789473684, + "grad_norm": 1.076773762702942, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 97110 + }, + { + "epoch": 638.9473684210526, + "grad_norm": 0.8548887968063354, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 97120 + }, + { + "epoch": 639.0131578947369, + "grad_norm": 1.2806280851364136, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 97130 + }, + { + "epoch": 639.078947368421, + "grad_norm": 0.99432772397995, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 97140 + }, + { + "epoch": 639.1447368421053, + "grad_norm": 1.1284815073013306, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 97150 + }, + { + "epoch": 639.2105263157895, + "grad_norm": 1.4430863857269287, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 97160 + }, + { + "epoch": 639.2763157894736, + "grad_norm": 0.9166154265403748, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 97170 + }, + { + "epoch": 639.3421052631579, + "grad_norm": 1.2260555028915405, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 97180 + }, + { + "epoch": 639.4078947368421, + "grad_norm": 1.1222833395004272, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 97190 + }, + { + "epoch": 639.4736842105264, + "grad_norm": 1.1462934017181396, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 97200 + }, + { + "epoch": 639.5394736842105, + "grad_norm": 1.0073168277740479, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 97210 + }, + { + "epoch": 639.6052631578947, + "grad_norm": 1.0227516889572144, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 97220 + }, + { + "epoch": 639.671052631579, + "grad_norm": 0.9892854690551758, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 97230 + }, + { + "epoch": 639.7368421052631, + "grad_norm": 0.918364942073822, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 97240 + }, + { + "epoch": 639.8026315789474, + "grad_norm": 0.8302596211433411, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 97250 + }, + { + "epoch": 639.8684210526316, + "grad_norm": 1.3149603605270386, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 97260 + }, + { + "epoch": 639.9342105263158, + "grad_norm": 1.0401725769042969, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 97270 + }, + { + "epoch": 640.0, + "grad_norm": 0.8277792930603027, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 97280 + }, + { + "epoch": 640.0657894736842, + "grad_norm": 0.8482030034065247, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 97290 + }, + { + "epoch": 640.1315789473684, + "grad_norm": 1.0147836208343506, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 97300 + }, + { + "epoch": 640.1973684210526, + "grad_norm": 1.1419466733932495, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 97310 + }, + { + "epoch": 640.2631578947369, + "grad_norm": 0.7621695399284363, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 97320 + }, + { + "epoch": 640.328947368421, + "grad_norm": 1.1174577474594116, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 97330 + }, + { + "epoch": 640.3947368421053, + "grad_norm": 0.8022199273109436, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 97340 + }, + { + "epoch": 640.4605263157895, + "grad_norm": 0.8935840129852295, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 97350 + }, + { + "epoch": 640.5263157894736, + "grad_norm": 1.3368089199066162, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 97360 + }, + { + "epoch": 640.5921052631579, + "grad_norm": 1.0535829067230225, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 97370 + }, + { + "epoch": 640.6578947368421, + "grad_norm": 1.1254475116729736, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 97380 + }, + { + "epoch": 640.7236842105264, + "grad_norm": 1.1946035623550415, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 97390 + }, + { + "epoch": 640.7894736842105, + "grad_norm": 1.2451772689819336, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 97400 + }, + { + "epoch": 640.8552631578947, + "grad_norm": 0.8775976896286011, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 97410 + }, + { + "epoch": 640.921052631579, + "grad_norm": 1.0031647682189941, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 97420 + }, + { + "epoch": 640.9868421052631, + "grad_norm": 1.1627060174942017, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 97430 + }, + { + "epoch": 641.0526315789474, + "grad_norm": 0.9640074372291565, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 97440 + }, + { + "epoch": 641.1184210526316, + "grad_norm": 1.0985318422317505, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 97450 + }, + { + "epoch": 641.1842105263158, + "grad_norm": 0.9738800525665283, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 97460 + }, + { + "epoch": 641.25, + "grad_norm": 0.9755899906158447, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 97470 + }, + { + "epoch": 641.3157894736842, + "grad_norm": 1.0237241983413696, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 97480 + }, + { + "epoch": 641.3815789473684, + "grad_norm": 0.8942602872848511, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 97490 + }, + { + "epoch": 641.4473684210526, + "grad_norm": 0.7703356742858887, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 97500 + }, + { + "epoch": 641.5131578947369, + "grad_norm": 1.2847163677215576, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 97510 + }, + { + "epoch": 641.578947368421, + "grad_norm": 0.8879604339599609, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 97520 + }, + { + "epoch": 641.6447368421053, + "grad_norm": 1.2082008123397827, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 97530 + }, + { + "epoch": 641.7105263157895, + "grad_norm": 1.3666983842849731, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 97540 + }, + { + "epoch": 641.7763157894736, + "grad_norm": 1.454520583152771, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 97550 + }, + { + "epoch": 641.8421052631579, + "grad_norm": 1.0853079557418823, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 97560 + }, + { + "epoch": 641.9078947368421, + "grad_norm": 0.8811721205711365, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 97570 + }, + { + "epoch": 641.9736842105264, + "grad_norm": 1.1882359981536865, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 97580 + }, + { + "epoch": 642.0394736842105, + "grad_norm": 1.194444179534912, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 97590 + }, + { + "epoch": 642.1052631578947, + "grad_norm": 1.390655517578125, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 97600 + }, + { + "epoch": 642.171052631579, + "grad_norm": 0.7747687697410583, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 97610 + }, + { + "epoch": 642.2368421052631, + "grad_norm": 0.9693976640701294, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 97620 + }, + { + "epoch": 642.3026315789474, + "grad_norm": 1.255947470664978, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 97630 + }, + { + "epoch": 642.3684210526316, + "grad_norm": 1.0807551145553589, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 97640 + }, + { + "epoch": 642.4342105263158, + "grad_norm": 1.077979326248169, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 97650 + }, + { + "epoch": 642.5, + "grad_norm": 0.8917891979217529, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 97660 + }, + { + "epoch": 642.5657894736842, + "grad_norm": 1.0527293682098389, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 97670 + }, + { + "epoch": 642.6315789473684, + "grad_norm": 0.8535796403884888, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 97680 + }, + { + "epoch": 642.6973684210526, + "grad_norm": 0.9337286949157715, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 97690 + }, + { + "epoch": 642.7631578947369, + "grad_norm": 1.0351840257644653, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 97700 + }, + { + "epoch": 642.828947368421, + "grad_norm": 0.8320999145507812, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 97710 + }, + { + "epoch": 642.8947368421053, + "grad_norm": 1.2720904350280762, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 97720 + }, + { + "epoch": 642.9605263157895, + "grad_norm": 1.19514799118042, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 97730 + }, + { + "epoch": 643.0263157894736, + "grad_norm": 1.1567554473876953, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 97740 + }, + { + "epoch": 643.0921052631579, + "grad_norm": 0.992160975933075, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 97750 + }, + { + "epoch": 643.1578947368421, + "grad_norm": 0.9545310735702515, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 97760 + }, + { + "epoch": 643.2236842105264, + "grad_norm": 0.5216129422187805, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 97770 + }, + { + "epoch": 643.2894736842105, + "grad_norm": 1.1363471746444702, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 97780 + }, + { + "epoch": 643.3552631578947, + "grad_norm": 1.0840483903884888, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 97790 + }, + { + "epoch": 643.421052631579, + "grad_norm": 0.9298526048660278, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 97800 + }, + { + "epoch": 643.4868421052631, + "grad_norm": 0.9495653510093689, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 97810 + }, + { + "epoch": 643.5526315789474, + "grad_norm": 0.9317263960838318, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 97820 + }, + { + "epoch": 643.6184210526316, + "grad_norm": 1.0475777387619019, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 97830 + }, + { + "epoch": 643.6842105263158, + "grad_norm": 1.4607027769088745, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 97840 + }, + { + "epoch": 643.75, + "grad_norm": 0.8541473150253296, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 97850 + }, + { + "epoch": 643.8157894736842, + "grad_norm": 0.600893497467041, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 97860 + }, + { + "epoch": 643.8815789473684, + "grad_norm": 0.9018236994743347, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 97870 + }, + { + "epoch": 643.9473684210526, + "grad_norm": 0.7661747932434082, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 97880 + }, + { + "epoch": 644.0131578947369, + "grad_norm": 0.8002960085868835, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 97890 + }, + { + "epoch": 644.078947368421, + "grad_norm": 0.8436306715011597, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 97900 + }, + { + "epoch": 644.1447368421053, + "grad_norm": 1.3472542762756348, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 97910 + }, + { + "epoch": 644.2105263157895, + "grad_norm": 0.8664248585700989, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 97920 + }, + { + "epoch": 644.2763157894736, + "grad_norm": 0.9880051612854004, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 97930 + }, + { + "epoch": 644.3421052631579, + "grad_norm": 1.1191985607147217, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 97940 + }, + { + "epoch": 644.4078947368421, + "grad_norm": 1.2010605335235596, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 97950 + }, + { + "epoch": 644.4736842105264, + "grad_norm": 1.250929594039917, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 97960 + }, + { + "epoch": 644.5394736842105, + "grad_norm": 1.0687397718429565, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 97970 + }, + { + "epoch": 644.6052631578947, + "grad_norm": 0.9296792149543762, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 97980 + }, + { + "epoch": 644.671052631579, + "grad_norm": 1.3585340976715088, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 97990 + }, + { + "epoch": 644.7368421052631, + "grad_norm": 1.1659291982650757, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 98000 + }, + { + "epoch": 644.8026315789474, + "grad_norm": 1.242610216140747, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 98010 + }, + { + "epoch": 644.8684210526316, + "grad_norm": 1.096759557723999, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 98020 + }, + { + "epoch": 644.9342105263158, + "grad_norm": 1.0130127668380737, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 98030 + }, + { + "epoch": 645.0, + "grad_norm": 1.1907083988189697, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 98040 + }, + { + "epoch": 645.0657894736842, + "grad_norm": 1.232130765914917, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 98050 + }, + { + "epoch": 645.1315789473684, + "grad_norm": 1.1238503456115723, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 98060 + }, + { + "epoch": 645.1973684210526, + "grad_norm": 0.9487515091896057, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 98070 + }, + { + "epoch": 645.2631578947369, + "grad_norm": 0.9860408902168274, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 98080 + }, + { + "epoch": 645.328947368421, + "grad_norm": 0.7561169862747192, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 98090 + }, + { + "epoch": 645.3947368421053, + "grad_norm": 0.9736877679824829, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 98100 + }, + { + "epoch": 645.4605263157895, + "grad_norm": 0.6840393543243408, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 98110 + }, + { + "epoch": 645.5263157894736, + "grad_norm": 1.2354158163070679, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 98120 + }, + { + "epoch": 645.5921052631579, + "grad_norm": 1.306350827217102, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 98130 + }, + { + "epoch": 645.6578947368421, + "grad_norm": 1.1757615804672241, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 98140 + }, + { + "epoch": 645.7236842105264, + "grad_norm": 1.1148210763931274, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 98150 + }, + { + "epoch": 645.7894736842105, + "grad_norm": 1.281851887702942, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 98160 + }, + { + "epoch": 645.8552631578947, + "grad_norm": 0.8599856495857239, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 98170 + }, + { + "epoch": 645.921052631579, + "grad_norm": 0.9599358439445496, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 98180 + }, + { + "epoch": 645.9868421052631, + "grad_norm": 1.0771713256835938, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 98190 + }, + { + "epoch": 646.0526315789474, + "grad_norm": 0.7740371227264404, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 98200 + }, + { + "epoch": 646.1184210526316, + "grad_norm": 1.125326156616211, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 98210 + }, + { + "epoch": 646.1842105263158, + "grad_norm": 1.2309529781341553, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 98220 + }, + { + "epoch": 646.25, + "grad_norm": 1.0347673892974854, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 98230 + }, + { + "epoch": 646.3157894736842, + "grad_norm": 1.0844132900238037, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 98240 + }, + { + "epoch": 646.3815789473684, + "grad_norm": 0.7745186686515808, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 98250 + }, + { + "epoch": 646.4473684210526, + "grad_norm": 1.0603491067886353, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 98260 + }, + { + "epoch": 646.5131578947369, + "grad_norm": 1.0565520524978638, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 98270 + }, + { + "epoch": 646.578947368421, + "grad_norm": 1.1339266300201416, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 98280 + }, + { + "epoch": 646.6447368421053, + "grad_norm": 1.2667466402053833, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 98290 + }, + { + "epoch": 646.7105263157895, + "grad_norm": 1.1827952861785889, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 98300 + }, + { + "epoch": 646.7763157894736, + "grad_norm": 1.1136367321014404, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 98310 + }, + { + "epoch": 646.8421052631579, + "grad_norm": 0.8930481672286987, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 98320 + }, + { + "epoch": 646.9078947368421, + "grad_norm": 1.1677948236465454, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 98330 + }, + { + "epoch": 646.9736842105264, + "grad_norm": 1.003639817237854, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 98340 + }, + { + "epoch": 647.0394736842105, + "grad_norm": 0.9792555570602417, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 98350 + }, + { + "epoch": 647.1052631578947, + "grad_norm": 1.3798192739486694, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 98360 + }, + { + "epoch": 647.171052631579, + "grad_norm": 0.9844328165054321, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 98370 + }, + { + "epoch": 647.2368421052631, + "grad_norm": 1.2482646703720093, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 98380 + }, + { + "epoch": 647.3026315789474, + "grad_norm": 0.796341598033905, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 98390 + }, + { + "epoch": 647.3684210526316, + "grad_norm": 0.8844649791717529, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 98400 + }, + { + "epoch": 647.4342105263158, + "grad_norm": 1.4429231882095337, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 98410 + }, + { + "epoch": 647.5, + "grad_norm": 1.1434082984924316, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 98420 + }, + { + "epoch": 647.5657894736842, + "grad_norm": 1.153358817100525, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 98430 + }, + { + "epoch": 647.6315789473684, + "grad_norm": 1.1155855655670166, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 98440 + }, + { + "epoch": 647.6973684210526, + "grad_norm": 0.8420267701148987, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 98450 + }, + { + "epoch": 647.7631578947369, + "grad_norm": 0.9819120764732361, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 98460 + }, + { + "epoch": 647.828947368421, + "grad_norm": 1.6169120073318481, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 98470 + }, + { + "epoch": 647.8947368421053, + "grad_norm": 1.1635892391204834, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 98480 + }, + { + "epoch": 647.9605263157895, + "grad_norm": 1.0484620332717896, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 98490 + }, + { + "epoch": 648.0263157894736, + "grad_norm": 0.8859636783599854, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 98500 + }, + { + "epoch": 648.0921052631579, + "grad_norm": 0.9191065430641174, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 98510 + }, + { + "epoch": 648.1578947368421, + "grad_norm": 1.2865121364593506, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 98520 + }, + { + "epoch": 648.2236842105264, + "grad_norm": 1.1271616220474243, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 98530 + }, + { + "epoch": 648.2894736842105, + "grad_norm": 1.0781117677688599, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 98540 + }, + { + "epoch": 648.3552631578947, + "grad_norm": 0.8963111639022827, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 98550 + }, + { + "epoch": 648.421052631579, + "grad_norm": 0.8486378192901611, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 98560 + }, + { + "epoch": 648.4868421052631, + "grad_norm": 0.7802874445915222, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 98570 + }, + { + "epoch": 648.5526315789474, + "grad_norm": 0.9380501508712769, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 98580 + }, + { + "epoch": 648.6184210526316, + "grad_norm": 1.1357520818710327, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 98590 + }, + { + "epoch": 648.6842105263158, + "grad_norm": 0.9585915207862854, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 98600 + }, + { + "epoch": 648.75, + "grad_norm": 1.170572280883789, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 98610 + }, + { + "epoch": 648.8157894736842, + "grad_norm": 0.7782623767852783, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 98620 + }, + { + "epoch": 648.8815789473684, + "grad_norm": 1.2383381128311157, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 98630 + }, + { + "epoch": 648.9473684210526, + "grad_norm": 1.1335386037826538, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 98640 + }, + { + "epoch": 649.0131578947369, + "grad_norm": 1.0189485549926758, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 98650 + }, + { + "epoch": 649.078947368421, + "grad_norm": 1.0290753841400146, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 98660 + }, + { + "epoch": 649.1447368421053, + "grad_norm": 1.2437363862991333, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 98670 + }, + { + "epoch": 649.2105263157895, + "grad_norm": 0.781124472618103, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 98680 + }, + { + "epoch": 649.2763157894736, + "grad_norm": 1.2116012573242188, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 98690 + }, + { + "epoch": 649.3421052631579, + "grad_norm": 0.9548416137695312, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 98700 + }, + { + "epoch": 649.4078947368421, + "grad_norm": 0.7918787002563477, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 98710 + }, + { + "epoch": 649.4736842105264, + "grad_norm": 0.8988084197044373, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 98720 + }, + { + "epoch": 649.5394736842105, + "grad_norm": 0.9593010544776917, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 98730 + }, + { + "epoch": 649.6052631578947, + "grad_norm": 1.205809235572815, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 98740 + }, + { + "epoch": 649.671052631579, + "grad_norm": 1.1642436981201172, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 98750 + }, + { + "epoch": 649.7368421052631, + "grad_norm": 0.8834133744239807, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 98760 + }, + { + "epoch": 649.8026315789474, + "grad_norm": 1.1076525449752808, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 98770 + }, + { + "epoch": 649.8684210526316, + "grad_norm": 1.2798569202423096, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 98780 + }, + { + "epoch": 649.9342105263158, + "grad_norm": 0.937585175037384, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 98790 + }, + { + "epoch": 650.0, + "grad_norm": 1.0356471538543701, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 98800 + }, + { + "epoch": 650.0657894736842, + "grad_norm": 1.1217914819717407, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 98810 + }, + { + "epoch": 650.1315789473684, + "grad_norm": 0.9628757238388062, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 98820 + }, + { + "epoch": 650.1973684210526, + "grad_norm": 1.3987805843353271, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 98830 + }, + { + "epoch": 650.2631578947369, + "grad_norm": 1.4442050457000732, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 98840 + }, + { + "epoch": 650.328947368421, + "grad_norm": 1.1787656545639038, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 98850 + }, + { + "epoch": 650.3947368421053, + "grad_norm": 1.3678606748580933, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 98860 + }, + { + "epoch": 650.4605263157895, + "grad_norm": 1.1847083568572998, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 98870 + }, + { + "epoch": 650.5263157894736, + "grad_norm": 1.195621132850647, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 98880 + }, + { + "epoch": 650.5921052631579, + "grad_norm": 1.2353485822677612, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 98890 + }, + { + "epoch": 650.6578947368421, + "grad_norm": 0.8940504789352417, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 98900 + }, + { + "epoch": 650.7236842105264, + "grad_norm": 1.0895377397537231, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 98910 + }, + { + "epoch": 650.7894736842105, + "grad_norm": 1.0993573665618896, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 98920 + }, + { + "epoch": 650.8552631578947, + "grad_norm": 1.2110718488693237, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 98930 + }, + { + "epoch": 650.921052631579, + "grad_norm": 0.9158070683479309, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 98940 + }, + { + "epoch": 650.9868421052631, + "grad_norm": 1.1650333404541016, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 98950 + }, + { + "epoch": 651.0526315789474, + "grad_norm": 1.2735977172851562, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 98960 + }, + { + "epoch": 651.1184210526316, + "grad_norm": 1.1164957284927368, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 98970 + }, + { + "epoch": 651.1842105263158, + "grad_norm": 1.1054691076278687, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 98980 + }, + { + "epoch": 651.25, + "grad_norm": 1.2364662885665894, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 98990 + }, + { + "epoch": 651.3157894736842, + "grad_norm": 1.0556890964508057, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 99000 + }, + { + "epoch": 651.3815789473684, + "grad_norm": 1.3006346225738525, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 99010 + }, + { + "epoch": 651.4473684210526, + "grad_norm": 1.3828588724136353, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 99020 + }, + { + "epoch": 651.5131578947369, + "grad_norm": 1.1058744192123413, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 99030 + }, + { + "epoch": 651.578947368421, + "grad_norm": 1.0962953567504883, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 99040 + }, + { + "epoch": 651.6447368421053, + "grad_norm": 1.1675875186920166, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 99050 + }, + { + "epoch": 651.7105263157895, + "grad_norm": 1.486832857131958, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 99060 + }, + { + "epoch": 651.7763157894736, + "grad_norm": 0.9721072912216187, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 99070 + }, + { + "epoch": 651.8421052631579, + "grad_norm": 1.1138964891433716, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 99080 + }, + { + "epoch": 651.9078947368421, + "grad_norm": 0.9725443124771118, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 99090 + }, + { + "epoch": 651.9736842105264, + "grad_norm": 1.2599581480026245, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 99100 + }, + { + "epoch": 652.0394736842105, + "grad_norm": 0.8848244547843933, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 99110 + }, + { + "epoch": 652.1052631578947, + "grad_norm": 1.0993847846984863, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 99120 + }, + { + "epoch": 652.171052631579, + "grad_norm": 1.1078648567199707, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 99130 + }, + { + "epoch": 652.2368421052631, + "grad_norm": 1.1154159307479858, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 99140 + }, + { + "epoch": 652.3026315789474, + "grad_norm": 1.2033315896987915, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 99150 + }, + { + "epoch": 652.3684210526316, + "grad_norm": 0.8731604218482971, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 99160 + }, + { + "epoch": 652.4342105263158, + "grad_norm": 1.3480206727981567, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 99170 + }, + { + "epoch": 652.5, + "grad_norm": 1.2460421323776245, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 99180 + }, + { + "epoch": 652.5657894736842, + "grad_norm": 1.5589008331298828, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 99190 + }, + { + "epoch": 652.6315789473684, + "grad_norm": 1.400813341140747, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 99200 + }, + { + "epoch": 652.6973684210526, + "grad_norm": 1.5220674276351929, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 99210 + }, + { + "epoch": 652.7631578947369, + "grad_norm": 1.3454396724700928, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 99220 + }, + { + "epoch": 652.828947368421, + "grad_norm": 1.3195019960403442, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 99230 + }, + { + "epoch": 652.8947368421053, + "grad_norm": 1.0843956470489502, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 99240 + }, + { + "epoch": 652.9605263157895, + "grad_norm": 1.2075053453445435, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 99250 + }, + { + "epoch": 653.0263157894736, + "grad_norm": 1.3347957134246826, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 99260 + }, + { + "epoch": 653.0921052631579, + "grad_norm": 1.1405830383300781, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 99270 + }, + { + "epoch": 653.1578947368421, + "grad_norm": 0.9808728098869324, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 99280 + }, + { + "epoch": 653.2236842105264, + "grad_norm": 1.1668258905410767, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 99290 + }, + { + "epoch": 653.2894736842105, + "grad_norm": 1.2372167110443115, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 99300 + }, + { + "epoch": 653.3552631578947, + "grad_norm": 1.0494773387908936, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 99310 + }, + { + "epoch": 653.421052631579, + "grad_norm": 1.203221082687378, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 99320 + }, + { + "epoch": 653.4868421052631, + "grad_norm": 1.0435203313827515, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 99330 + }, + { + "epoch": 653.5526315789474, + "grad_norm": 1.1974724531173706, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 99340 + }, + { + "epoch": 653.6184210526316, + "grad_norm": 0.9907897710800171, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 99350 + }, + { + "epoch": 653.6842105263158, + "grad_norm": 0.9244227409362793, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 99360 + }, + { + "epoch": 653.75, + "grad_norm": 0.8877261877059937, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 99370 + }, + { + "epoch": 653.8157894736842, + "grad_norm": 0.8094327449798584, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 99380 + }, + { + "epoch": 653.8815789473684, + "grad_norm": 1.055003046989441, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 99390 + }, + { + "epoch": 653.9473684210526, + "grad_norm": 1.1381503343582153, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 99400 + }, + { + "epoch": 654.0131578947369, + "grad_norm": 0.7804838418960571, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 99410 + }, + { + "epoch": 654.078947368421, + "grad_norm": 1.3052879571914673, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 99420 + }, + { + "epoch": 654.1447368421053, + "grad_norm": 0.9438977241516113, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 99430 + }, + { + "epoch": 654.2105263157895, + "grad_norm": 1.324198603630066, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 99440 + }, + { + "epoch": 654.2763157894736, + "grad_norm": 1.0651750564575195, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 99450 + }, + { + "epoch": 654.3421052631579, + "grad_norm": 1.2382241487503052, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 99460 + }, + { + "epoch": 654.4078947368421, + "grad_norm": 1.0247857570648193, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 99470 + }, + { + "epoch": 654.4736842105264, + "grad_norm": 0.8664435744285583, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 99480 + }, + { + "epoch": 654.5394736842105, + "grad_norm": 1.1170216798782349, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 99490 + }, + { + "epoch": 654.6052631578947, + "grad_norm": 0.9370788931846619, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 99500 + }, + { + "epoch": 654.671052631579, + "grad_norm": 1.2386893033981323, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 99510 + }, + { + "epoch": 654.7368421052631, + "grad_norm": 1.4200820922851562, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 99520 + }, + { + "epoch": 654.8026315789474, + "grad_norm": 1.3450512886047363, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 99530 + }, + { + "epoch": 654.8684210526316, + "grad_norm": 1.1321581602096558, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 99540 + }, + { + "epoch": 654.9342105263158, + "grad_norm": 1.3326202630996704, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 99550 + }, + { + "epoch": 655.0, + "grad_norm": 0.9542607665061951, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 99560 + }, + { + "epoch": 655.0657894736842, + "grad_norm": 1.4707971811294556, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 99570 + }, + { + "epoch": 655.1315789473684, + "grad_norm": 1.3422001600265503, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 99580 + }, + { + "epoch": 655.1973684210526, + "grad_norm": 1.2660077810287476, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 99590 + }, + { + "epoch": 655.2631578947369, + "grad_norm": 1.3600062131881714, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 99600 + }, + { + "epoch": 655.328947368421, + "grad_norm": 1.2667269706726074, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 99610 + }, + { + "epoch": 655.3947368421053, + "grad_norm": 1.327304482460022, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 99620 + }, + { + "epoch": 655.4605263157895, + "grad_norm": 1.309645414352417, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 99630 + }, + { + "epoch": 655.5263157894736, + "grad_norm": 0.8108488917350769, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 99640 + }, + { + "epoch": 655.5921052631579, + "grad_norm": 0.985109806060791, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 99650 + }, + { + "epoch": 655.6578947368421, + "grad_norm": 0.8888941407203674, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 99660 + }, + { + "epoch": 655.7236842105264, + "grad_norm": 1.006456971168518, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 99670 + }, + { + "epoch": 655.7894736842105, + "grad_norm": 1.1159669160842896, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 99680 + }, + { + "epoch": 655.8552631578947, + "grad_norm": 0.9909751415252686, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 99690 + }, + { + "epoch": 655.921052631579, + "grad_norm": 1.045996904373169, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 99700 + }, + { + "epoch": 655.9868421052631, + "grad_norm": 1.0817979574203491, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 99710 + }, + { + "epoch": 656.0526315789474, + "grad_norm": 1.2081537246704102, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 99720 + }, + { + "epoch": 656.1184210526316, + "grad_norm": 0.9195104241371155, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 99730 + }, + { + "epoch": 656.1842105263158, + "grad_norm": 1.1696057319641113, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 99740 + }, + { + "epoch": 656.25, + "grad_norm": 1.0604538917541504, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 99750 + }, + { + "epoch": 656.3157894736842, + "grad_norm": 1.1938915252685547, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 99760 + }, + { + "epoch": 656.3815789473684, + "grad_norm": 1.0641862154006958, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 99770 + }, + { + "epoch": 656.4473684210526, + "grad_norm": 0.7114510536193848, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 99780 + }, + { + "epoch": 656.5131578947369, + "grad_norm": 1.3345415592193604, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 99790 + }, + { + "epoch": 656.578947368421, + "grad_norm": 1.2074189186096191, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 99800 + }, + { + "epoch": 656.6447368421053, + "grad_norm": 1.2852963209152222, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 99810 + }, + { + "epoch": 656.7105263157895, + "grad_norm": 0.9789184331893921, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 99820 + }, + { + "epoch": 656.7763157894736, + "grad_norm": 1.1512428522109985, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 99830 + }, + { + "epoch": 656.8421052631579, + "grad_norm": 0.9603427052497864, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 99840 + }, + { + "epoch": 656.9078947368421, + "grad_norm": 0.7649758458137512, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 99850 + }, + { + "epoch": 656.9736842105264, + "grad_norm": 1.2759963274002075, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 99860 + }, + { + "epoch": 657.0394736842105, + "grad_norm": 0.8978010416030884, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 99870 + }, + { + "epoch": 657.1052631578947, + "grad_norm": 1.0761810541152954, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 99880 + }, + { + "epoch": 657.171052631579, + "grad_norm": 1.0069093704223633, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 99890 + }, + { + "epoch": 657.2368421052631, + "grad_norm": 1.188727855682373, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 99900 + }, + { + "epoch": 657.3026315789474, + "grad_norm": 1.0814998149871826, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 99910 + }, + { + "epoch": 657.3684210526316, + "grad_norm": 1.085719347000122, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 99920 + }, + { + "epoch": 657.4342105263158, + "grad_norm": 1.0952857732772827, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 99930 + }, + { + "epoch": 657.5, + "grad_norm": 0.7331225275993347, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 99940 + }, + { + "epoch": 657.5657894736842, + "grad_norm": 1.202723503112793, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 99950 + }, + { + "epoch": 657.6315789473684, + "grad_norm": 1.0389039516448975, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 99960 + }, + { + "epoch": 657.6973684210526, + "grad_norm": 0.9595016241073608, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 99970 + }, + { + "epoch": 657.7631578947369, + "grad_norm": 0.8788406848907471, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 99980 + }, + { + "epoch": 657.828947368421, + "grad_norm": 1.0672800540924072, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 99990 + }, + { + "epoch": 657.8947368421053, + "grad_norm": 1.2291871309280396, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 100000 + }, + { + "epoch": 657.9605263157895, + "grad_norm": 1.470028042793274, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 100010 + }, + { + "epoch": 658.0263157894736, + "grad_norm": 1.5565192699432373, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 100020 + }, + { + "epoch": 658.0921052631579, + "grad_norm": 1.5067261457443237, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 100030 + }, + { + "epoch": 658.1578947368421, + "grad_norm": 1.442273497581482, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 100040 + }, + { + "epoch": 658.2236842105264, + "grad_norm": 1.4687352180480957, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 100050 + }, + { + "epoch": 658.2894736842105, + "grad_norm": 1.2669801712036133, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 100060 + }, + { + "epoch": 658.3552631578947, + "grad_norm": 0.9612995386123657, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 100070 + }, + { + "epoch": 658.421052631579, + "grad_norm": 1.0884873867034912, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 100080 + }, + { + "epoch": 658.4868421052631, + "grad_norm": 1.34589421749115, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 100090 + }, + { + "epoch": 658.5526315789474, + "grad_norm": 1.1699200868606567, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 100100 + }, + { + "epoch": 658.6184210526316, + "grad_norm": 0.9280645847320557, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 100110 + }, + { + "epoch": 658.6842105263158, + "grad_norm": 1.307676911354065, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 100120 + }, + { + "epoch": 658.75, + "grad_norm": 1.0661935806274414, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 100130 + }, + { + "epoch": 658.8157894736842, + "grad_norm": 0.9746012687683105, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 100140 + }, + { + "epoch": 658.8815789473684, + "grad_norm": 0.9165105223655701, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 100150 + }, + { + "epoch": 658.9473684210526, + "grad_norm": 1.2205272912979126, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 100160 + }, + { + "epoch": 659.0131578947369, + "grad_norm": 0.8076446652412415, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 100170 + }, + { + "epoch": 659.078947368421, + "grad_norm": 0.9755529165267944, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 100180 + }, + { + "epoch": 659.1447368421053, + "grad_norm": 1.021377682685852, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 100190 + }, + { + "epoch": 659.2105263157895, + "grad_norm": 1.2089534997940063, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 100200 + }, + { + "epoch": 659.2763157894736, + "grad_norm": 1.0684491395950317, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 100210 + }, + { + "epoch": 659.3421052631579, + "grad_norm": 1.0578230619430542, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 100220 + }, + { + "epoch": 659.4078947368421, + "grad_norm": 1.1062239408493042, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 100230 + }, + { + "epoch": 659.4736842105264, + "grad_norm": 1.1410943269729614, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 100240 + }, + { + "epoch": 659.5394736842105, + "grad_norm": 1.0092296600341797, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 100250 + }, + { + "epoch": 659.6052631578947, + "grad_norm": 1.231850266456604, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 100260 + }, + { + "epoch": 659.671052631579, + "grad_norm": 1.1187002658843994, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 100270 + }, + { + "epoch": 659.7368421052631, + "grad_norm": 1.5265558958053589, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 100280 + }, + { + "epoch": 659.8026315789474, + "grad_norm": 1.3116233348846436, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 100290 + }, + { + "epoch": 659.8684210526316, + "grad_norm": 1.1131277084350586, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 100300 + }, + { + "epoch": 659.9342105263158, + "grad_norm": 0.9748517274856567, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 100310 + }, + { + "epoch": 660.0, + "grad_norm": 0.9195544719696045, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 100320 + }, + { + "epoch": 660.0657894736842, + "grad_norm": 1.3856765031814575, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 100330 + }, + { + "epoch": 660.1315789473684, + "grad_norm": 1.0203367471694946, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 100340 + }, + { + "epoch": 660.1973684210526, + "grad_norm": 1.344549536705017, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 100350 + }, + { + "epoch": 660.2631578947369, + "grad_norm": 1.2398855686187744, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 100360 + }, + { + "epoch": 660.328947368421, + "grad_norm": 1.2883145809173584, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 100370 + }, + { + "epoch": 660.3947368421053, + "grad_norm": 1.2523137331008911, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 100380 + }, + { + "epoch": 660.4605263157895, + "grad_norm": 0.7788773775100708, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 100390 + }, + { + "epoch": 660.5263157894736, + "grad_norm": 1.165600061416626, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 100400 + }, + { + "epoch": 660.5921052631579, + "grad_norm": 0.8959761261940002, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 100410 + }, + { + "epoch": 660.6578947368421, + "grad_norm": 0.9459083676338196, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 100420 + }, + { + "epoch": 660.7236842105264, + "grad_norm": 1.0725597143173218, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 100430 + }, + { + "epoch": 660.7894736842105, + "grad_norm": 1.0163500308990479, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 100440 + }, + { + "epoch": 660.8552631578947, + "grad_norm": 1.0939759016036987, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 100450 + }, + { + "epoch": 660.921052631579, + "grad_norm": 1.2891944646835327, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 100460 + }, + { + "epoch": 660.9868421052631, + "grad_norm": 0.9678558111190796, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 100470 + }, + { + "epoch": 661.0526315789474, + "grad_norm": 0.6295179724693298, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 100480 + }, + { + "epoch": 661.1184210526316, + "grad_norm": 1.092713475227356, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 100490 + }, + { + "epoch": 661.1842105263158, + "grad_norm": 0.7096255421638489, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 100500 + }, + { + "epoch": 661.25, + "grad_norm": 1.1376711130142212, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 100510 + }, + { + "epoch": 661.3157894736842, + "grad_norm": 1.0104148387908936, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 100520 + }, + { + "epoch": 661.3815789473684, + "grad_norm": 0.9783036112785339, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 100530 + }, + { + "epoch": 661.4473684210526, + "grad_norm": 1.1273771524429321, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 100540 + }, + { + "epoch": 661.5131578947369, + "grad_norm": 1.2987968921661377, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 100550 + }, + { + "epoch": 661.578947368421, + "grad_norm": 1.0228668451309204, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 100560 + }, + { + "epoch": 661.6447368421053, + "grad_norm": 0.901421844959259, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 100570 + }, + { + "epoch": 661.7105263157895, + "grad_norm": 1.1462628841400146, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 100580 + }, + { + "epoch": 661.7763157894736, + "grad_norm": 0.7442108392715454, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 100590 + }, + { + "epoch": 661.8421052631579, + "grad_norm": 1.2875144481658936, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 100600 + }, + { + "epoch": 661.9078947368421, + "grad_norm": 1.1649748086929321, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 100610 + }, + { + "epoch": 661.9736842105264, + "grad_norm": 1.0540515184402466, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 100620 + }, + { + "epoch": 662.0394736842105, + "grad_norm": 1.0686025619506836, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 100630 + }, + { + "epoch": 662.1052631578947, + "grad_norm": 0.9818456768989563, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 100640 + }, + { + "epoch": 662.171052631579, + "grad_norm": 0.9439074397087097, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 100650 + }, + { + "epoch": 662.2368421052631, + "grad_norm": 0.9941381216049194, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 100660 + }, + { + "epoch": 662.3026315789474, + "grad_norm": 1.1093741655349731, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 100670 + }, + { + "epoch": 662.3684210526316, + "grad_norm": 1.1444611549377441, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 100680 + }, + { + "epoch": 662.4342105263158, + "grad_norm": 1.0933109521865845, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 100690 + }, + { + "epoch": 662.5, + "grad_norm": 1.2394286394119263, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 100700 + }, + { + "epoch": 662.5657894736842, + "grad_norm": 1.3169022798538208, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 100710 + }, + { + "epoch": 662.6315789473684, + "grad_norm": 0.8559953570365906, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 100720 + }, + { + "epoch": 662.6973684210526, + "grad_norm": 1.2426526546478271, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 100730 + }, + { + "epoch": 662.7631578947369, + "grad_norm": 1.1118953227996826, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 100740 + }, + { + "epoch": 662.828947368421, + "grad_norm": 0.955522894859314, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 100750 + }, + { + "epoch": 662.8947368421053, + "grad_norm": 1.0989824533462524, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 100760 + }, + { + "epoch": 662.9605263157895, + "grad_norm": 1.0677485466003418, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 100770 + }, + { + "epoch": 663.0263157894736, + "grad_norm": 0.7644389271736145, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 100780 + }, + { + "epoch": 663.0921052631579, + "grad_norm": 1.139380931854248, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 100790 + }, + { + "epoch": 663.1578947368421, + "grad_norm": 0.9676651954650879, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 100800 + }, + { + "epoch": 663.2236842105264, + "grad_norm": 1.2177777290344238, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 100810 + }, + { + "epoch": 663.2894736842105, + "grad_norm": 1.339358925819397, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 100820 + }, + { + "epoch": 663.3552631578947, + "grad_norm": 1.3607467412948608, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 100830 + }, + { + "epoch": 663.421052631579, + "grad_norm": 1.0772532224655151, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 100840 + }, + { + "epoch": 663.4868421052631, + "grad_norm": 1.1806657314300537, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 100850 + }, + { + "epoch": 663.5526315789474, + "grad_norm": 0.8904920220375061, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 100860 + }, + { + "epoch": 663.6184210526316, + "grad_norm": 1.1465924978256226, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 100870 + }, + { + "epoch": 663.6842105263158, + "grad_norm": 1.0607256889343262, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 100880 + }, + { + "epoch": 663.75, + "grad_norm": 1.294001579284668, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 100890 + }, + { + "epoch": 663.8157894736842, + "grad_norm": 1.168131947517395, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 100900 + }, + { + "epoch": 663.8815789473684, + "grad_norm": 0.9382193684577942, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 100910 + }, + { + "epoch": 663.9473684210526, + "grad_norm": 1.172577142715454, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 100920 + }, + { + "epoch": 664.0131578947369, + "grad_norm": 1.1000980138778687, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 100930 + }, + { + "epoch": 664.078947368421, + "grad_norm": 1.3834381103515625, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 100940 + }, + { + "epoch": 664.1447368421053, + "grad_norm": 1.354061484336853, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 100950 + }, + { + "epoch": 664.2105263157895, + "grad_norm": 1.3133666515350342, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 100960 + }, + { + "epoch": 664.2763157894736, + "grad_norm": 0.8378098011016846, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 100970 + }, + { + "epoch": 664.3421052631579, + "grad_norm": 0.8867501616477966, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 100980 + }, + { + "epoch": 664.4078947368421, + "grad_norm": 1.0622562170028687, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 100990 + }, + { + "epoch": 664.4736842105264, + "grad_norm": 0.6690018773078918, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 101000 + }, + { + "epoch": 664.5394736842105, + "grad_norm": 0.7047531008720398, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 101010 + }, + { + "epoch": 664.6052631578947, + "grad_norm": 1.0882129669189453, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 101020 + }, + { + "epoch": 664.671052631579, + "grad_norm": 0.8649616241455078, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 101030 + }, + { + "epoch": 664.7368421052631, + "grad_norm": 0.8693536520004272, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 101040 + }, + { + "epoch": 664.8026315789474, + "grad_norm": 1.3576804399490356, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 101050 + }, + { + "epoch": 664.8684210526316, + "grad_norm": 1.0976057052612305, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 101060 + }, + { + "epoch": 664.9342105263158, + "grad_norm": 1.4118653535842896, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 101070 + }, + { + "epoch": 665.0, + "grad_norm": 0.913599967956543, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 101080 + }, + { + "epoch": 665.0657894736842, + "grad_norm": 1.0430513620376587, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 101090 + }, + { + "epoch": 665.1315789473684, + "grad_norm": 1.3427150249481201, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 101100 + }, + { + "epoch": 665.1973684210526, + "grad_norm": 1.5117545127868652, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 101110 + }, + { + "epoch": 665.2631578947369, + "grad_norm": 0.9170265197753906, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 101120 + }, + { + "epoch": 665.328947368421, + "grad_norm": 1.115020513534546, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 101130 + }, + { + "epoch": 665.3947368421053, + "grad_norm": 0.7325767874717712, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 101140 + }, + { + "epoch": 665.4605263157895, + "grad_norm": 1.1029810905456543, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 101150 + }, + { + "epoch": 665.5263157894736, + "grad_norm": 1.1868432760238647, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 101160 + }, + { + "epoch": 665.5921052631579, + "grad_norm": 0.9201452136039734, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 101170 + }, + { + "epoch": 665.6578947368421, + "grad_norm": 1.1828328371047974, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 101180 + }, + { + "epoch": 665.7236842105264, + "grad_norm": 1.1864783763885498, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 101190 + }, + { + "epoch": 665.7894736842105, + "grad_norm": 1.2099254131317139, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 101200 + }, + { + "epoch": 665.8552631578947, + "grad_norm": 1.1975241899490356, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 101210 + }, + { + "epoch": 665.921052631579, + "grad_norm": 1.1442780494689941, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 101220 + }, + { + "epoch": 665.9868421052631, + "grad_norm": 1.2307586669921875, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 101230 + }, + { + "epoch": 666.0526315789474, + "grad_norm": 0.816516101360321, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 101240 + }, + { + "epoch": 666.1184210526316, + "grad_norm": 1.2257626056671143, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 101250 + }, + { + "epoch": 666.1842105263158, + "grad_norm": 1.1634641885757446, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 101260 + }, + { + "epoch": 666.25, + "grad_norm": 1.2208856344223022, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 101270 + }, + { + "epoch": 666.3157894736842, + "grad_norm": 0.8225875496864319, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 101280 + }, + { + "epoch": 666.3815789473684, + "grad_norm": 1.0692633390426636, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 101290 + }, + { + "epoch": 666.4473684210526, + "grad_norm": 1.2944279909133911, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 101300 + }, + { + "epoch": 666.5131578947369, + "grad_norm": 1.2648913860321045, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 101310 + }, + { + "epoch": 666.578947368421, + "grad_norm": 1.435753345489502, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 101320 + }, + { + "epoch": 666.6447368421053, + "grad_norm": 1.2368513345718384, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 101330 + }, + { + "epoch": 666.7105263157895, + "grad_norm": 1.1920435428619385, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 101340 + }, + { + "epoch": 666.7763157894736, + "grad_norm": 1.2127162218093872, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 101350 + }, + { + "epoch": 666.8421052631579, + "grad_norm": 1.2593845129013062, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 101360 + }, + { + "epoch": 666.9078947368421, + "grad_norm": 1.0275031328201294, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 101370 + }, + { + "epoch": 666.9736842105264, + "grad_norm": 1.224514126777649, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 101380 + }, + { + "epoch": 667.0394736842105, + "grad_norm": 1.1807442903518677, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 101390 + }, + { + "epoch": 667.1052631578947, + "grad_norm": 0.8523136377334595, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 101400 + }, + { + "epoch": 667.171052631579, + "grad_norm": 1.0002000331878662, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 101410 + }, + { + "epoch": 667.2368421052631, + "grad_norm": 1.0773143768310547, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 101420 + }, + { + "epoch": 667.3026315789474, + "grad_norm": 1.2809265851974487, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 101430 + }, + { + "epoch": 667.3684210526316, + "grad_norm": 1.272909164428711, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 101440 + }, + { + "epoch": 667.4342105263158, + "grad_norm": 1.0183367729187012, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 101450 + }, + { + "epoch": 667.5, + "grad_norm": 1.0988458395004272, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 101460 + }, + { + "epoch": 667.5657894736842, + "grad_norm": 1.024822473526001, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 101470 + }, + { + "epoch": 667.6315789473684, + "grad_norm": 1.2332916259765625, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 101480 + }, + { + "epoch": 667.6973684210526, + "grad_norm": 1.0866150856018066, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 101490 + }, + { + "epoch": 667.7631578947369, + "grad_norm": 1.0552276372909546, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 101500 + }, + { + "epoch": 667.828947368421, + "grad_norm": 1.0964107513427734, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 101510 + }, + { + "epoch": 667.8947368421053, + "grad_norm": 1.264575719833374, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 101520 + }, + { + "epoch": 667.9605263157895, + "grad_norm": 1.4266283512115479, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 101530 + }, + { + "epoch": 668.0263157894736, + "grad_norm": 0.7991641759872437, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 101540 + }, + { + "epoch": 668.0921052631579, + "grad_norm": 1.0575485229492188, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 101550 + }, + { + "epoch": 668.1578947368421, + "grad_norm": 1.040031909942627, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 101560 + }, + { + "epoch": 668.2236842105264, + "grad_norm": 0.8988039493560791, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 101570 + }, + { + "epoch": 668.2894736842105, + "grad_norm": 1.1382830142974854, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 101580 + }, + { + "epoch": 668.3552631578947, + "grad_norm": 1.2425233125686646, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 101590 + }, + { + "epoch": 668.421052631579, + "grad_norm": 1.2461456060409546, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 101600 + }, + { + "epoch": 668.4868421052631, + "grad_norm": 0.828624427318573, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 101610 + }, + { + "epoch": 668.5526315789474, + "grad_norm": 0.6338958144187927, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 101620 + }, + { + "epoch": 668.6184210526316, + "grad_norm": 0.9959868788719177, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 101630 + }, + { + "epoch": 668.6842105263158, + "grad_norm": 0.9457986354827881, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 101640 + }, + { + "epoch": 668.75, + "grad_norm": 1.2366830110549927, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 101650 + }, + { + "epoch": 668.8157894736842, + "grad_norm": 1.192163348197937, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 101660 + }, + { + "epoch": 668.8815789473684, + "grad_norm": 0.9253160953521729, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 101670 + }, + { + "epoch": 668.9473684210526, + "grad_norm": 0.8594035506248474, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 101680 + }, + { + "epoch": 669.0131578947369, + "grad_norm": 1.1185978651046753, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 101690 + }, + { + "epoch": 669.078947368421, + "grad_norm": 0.9524565935134888, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 101700 + }, + { + "epoch": 669.1447368421053, + "grad_norm": 1.0104931592941284, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 101710 + }, + { + "epoch": 669.2105263157895, + "grad_norm": 1.1492105722427368, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 101720 + }, + { + "epoch": 669.2763157894736, + "grad_norm": 1.1701027154922485, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 101730 + }, + { + "epoch": 669.3421052631579, + "grad_norm": 1.386569619178772, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 101740 + }, + { + "epoch": 669.4078947368421, + "grad_norm": 1.1431201696395874, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 101750 + }, + { + "epoch": 669.4736842105264, + "grad_norm": 1.031060814857483, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 101760 + }, + { + "epoch": 669.5394736842105, + "grad_norm": 0.8447512984275818, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 101770 + }, + { + "epoch": 669.6052631578947, + "grad_norm": 1.1162457466125488, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 101780 + }, + { + "epoch": 669.671052631579, + "grad_norm": 1.321597933769226, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 101790 + }, + { + "epoch": 669.7368421052631, + "grad_norm": 1.5164763927459717, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 101800 + }, + { + "epoch": 669.8026315789474, + "grad_norm": 1.3314334154129028, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 101810 + }, + { + "epoch": 669.8684210526316, + "grad_norm": 1.0497831106185913, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 101820 + }, + { + "epoch": 669.9342105263158, + "grad_norm": 1.1639678478240967, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 101830 + }, + { + "epoch": 670.0, + "grad_norm": 1.2581437826156616, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 101840 + }, + { + "epoch": 670.0657894736842, + "grad_norm": 1.3124868869781494, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 101850 + }, + { + "epoch": 670.1315789473684, + "grad_norm": 1.232029676437378, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 101860 + }, + { + "epoch": 670.1973684210526, + "grad_norm": 1.5936830043792725, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 101870 + }, + { + "epoch": 670.2631578947369, + "grad_norm": 1.1136298179626465, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 101880 + }, + { + "epoch": 670.328947368421, + "grad_norm": 1.2041126489639282, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 101890 + }, + { + "epoch": 670.3947368421053, + "grad_norm": 1.0076220035552979, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 101900 + }, + { + "epoch": 670.4605263157895, + "grad_norm": 1.001729965209961, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 101910 + }, + { + "epoch": 670.5263157894736, + "grad_norm": 1.1370996236801147, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 101920 + }, + { + "epoch": 670.5921052631579, + "grad_norm": 1.3312040567398071, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 101930 + }, + { + "epoch": 670.6578947368421, + "grad_norm": 1.0733331441879272, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 101940 + }, + { + "epoch": 670.7236842105264, + "grad_norm": 1.2168687582015991, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 101950 + }, + { + "epoch": 670.7894736842105, + "grad_norm": 1.0879305601119995, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 101960 + }, + { + "epoch": 670.8552631578947, + "grad_norm": 1.084376573562622, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 101970 + }, + { + "epoch": 670.921052631579, + "grad_norm": 1.1504342555999756, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 101980 + }, + { + "epoch": 670.9868421052631, + "grad_norm": 1.2068742513656616, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 101990 + }, + { + "epoch": 671.0526315789474, + "grad_norm": 1.264833688735962, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 102000 + }, + { + "epoch": 671.1184210526316, + "grad_norm": 0.7933449149131775, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 102010 + }, + { + "epoch": 671.1842105263158, + "grad_norm": 0.9847843647003174, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 102020 + }, + { + "epoch": 671.25, + "grad_norm": 1.2704505920410156, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 102030 + }, + { + "epoch": 671.3157894736842, + "grad_norm": 1.4738961458206177, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 102040 + }, + { + "epoch": 671.3815789473684, + "grad_norm": 1.338627576828003, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 102050 + }, + { + "epoch": 671.4473684210526, + "grad_norm": 1.2926578521728516, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 102060 + }, + { + "epoch": 671.5131578947369, + "grad_norm": 1.442939043045044, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 102070 + }, + { + "epoch": 671.578947368421, + "grad_norm": 1.1999456882476807, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 102080 + }, + { + "epoch": 671.6447368421053, + "grad_norm": 1.2807422876358032, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 102090 + }, + { + "epoch": 671.7105263157895, + "grad_norm": 1.0529870986938477, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 102100 + }, + { + "epoch": 671.7763157894736, + "grad_norm": 1.044051170349121, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 102110 + }, + { + "epoch": 671.8421052631579, + "grad_norm": 0.934209406375885, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 102120 + }, + { + "epoch": 671.9078947368421, + "grad_norm": 1.350449562072754, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 102130 + }, + { + "epoch": 671.9736842105264, + "grad_norm": 1.2345993518829346, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 102140 + }, + { + "epoch": 672.0394736842105, + "grad_norm": 1.41543447971344, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 102150 + }, + { + "epoch": 672.1052631578947, + "grad_norm": 0.7719045877456665, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 102160 + }, + { + "epoch": 672.171052631579, + "grad_norm": 1.0525262355804443, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 102170 + }, + { + "epoch": 672.2368421052631, + "grad_norm": 1.1869667768478394, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 102180 + }, + { + "epoch": 672.3026315789474, + "grad_norm": 1.0475441217422485, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 102190 + }, + { + "epoch": 672.3684210526316, + "grad_norm": 1.173707127571106, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 102200 + }, + { + "epoch": 672.4342105263158, + "grad_norm": 1.1738572120666504, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 102210 + }, + { + "epoch": 672.5, + "grad_norm": 1.0514370203018188, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 102220 + }, + { + "epoch": 672.5657894736842, + "grad_norm": 1.058618426322937, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 102230 + }, + { + "epoch": 672.6315789473684, + "grad_norm": 1.6183222532272339, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 102240 + }, + { + "epoch": 672.6973684210526, + "grad_norm": 1.2186968326568604, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 102250 + }, + { + "epoch": 672.7631578947369, + "grad_norm": 0.9119374752044678, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 102260 + }, + { + "epoch": 672.828947368421, + "grad_norm": 1.2468377351760864, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 102270 + }, + { + "epoch": 672.8947368421053, + "grad_norm": 0.8254573941230774, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 102280 + }, + { + "epoch": 672.9605263157895, + "grad_norm": 0.8752316832542419, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 102290 + }, + { + "epoch": 673.0263157894736, + "grad_norm": 0.8799929618835449, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 102300 + }, + { + "epoch": 673.0921052631579, + "grad_norm": 1.0996203422546387, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 102310 + }, + { + "epoch": 673.1578947368421, + "grad_norm": 1.4787687063217163, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 102320 + }, + { + "epoch": 673.2236842105264, + "grad_norm": 0.9628654718399048, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 102330 + }, + { + "epoch": 673.2894736842105, + "grad_norm": 1.1669740676879883, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 102340 + }, + { + "epoch": 673.3552631578947, + "grad_norm": 0.9935758709907532, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 102350 + }, + { + "epoch": 673.421052631579, + "grad_norm": 1.2170686721801758, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 102360 + }, + { + "epoch": 673.4868421052631, + "grad_norm": 1.3537840843200684, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 102370 + }, + { + "epoch": 673.5526315789474, + "grad_norm": 1.2032088041305542, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 102380 + }, + { + "epoch": 673.6184210526316, + "grad_norm": 1.0807600021362305, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 102390 + }, + { + "epoch": 673.6842105263158, + "grad_norm": 1.3539401292800903, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 102400 + }, + { + "epoch": 673.75, + "grad_norm": 0.9600739479064941, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 102410 + }, + { + "epoch": 673.8157894736842, + "grad_norm": 0.8437861204147339, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 102420 + }, + { + "epoch": 673.8815789473684, + "grad_norm": 1.0616252422332764, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 102430 + }, + { + "epoch": 673.9473684210526, + "grad_norm": 1.1971665620803833, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 102440 + }, + { + "epoch": 674.0131578947369, + "grad_norm": 1.0292773246765137, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 102450 + }, + { + "epoch": 674.078947368421, + "grad_norm": 1.0635182857513428, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 102460 + }, + { + "epoch": 674.1447368421053, + "grad_norm": 1.1411423683166504, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 102470 + }, + { + "epoch": 674.2105263157895, + "grad_norm": 1.0148522853851318, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 102480 + }, + { + "epoch": 674.2763157894736, + "grad_norm": 1.2934777736663818, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 102490 + }, + { + "epoch": 674.3421052631579, + "grad_norm": 1.151720404624939, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 102500 + }, + { + "epoch": 674.4078947368421, + "grad_norm": 0.8841307759284973, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 102510 + }, + { + "epoch": 674.4736842105264, + "grad_norm": 0.8646332621574402, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 102520 + }, + { + "epoch": 674.5394736842105, + "grad_norm": 0.8606105446815491, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 102530 + }, + { + "epoch": 674.6052631578947, + "grad_norm": 0.7609214186668396, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 102540 + }, + { + "epoch": 674.671052631579, + "grad_norm": 1.0096818208694458, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 102550 + }, + { + "epoch": 674.7368421052631, + "grad_norm": 1.0138216018676758, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 102560 + }, + { + "epoch": 674.8026315789474, + "grad_norm": 1.2651227712631226, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 102570 + }, + { + "epoch": 674.8684210526316, + "grad_norm": 1.3024859428405762, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 102580 + }, + { + "epoch": 674.9342105263158, + "grad_norm": 0.9451261758804321, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 102590 + }, + { + "epoch": 675.0, + "grad_norm": 1.1993613243103027, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 102600 + }, + { + "epoch": 675.0657894736842, + "grad_norm": 1.2021538019180298, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 102610 + }, + { + "epoch": 675.1315789473684, + "grad_norm": 0.7005917429924011, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 102620 + }, + { + "epoch": 675.1973684210526, + "grad_norm": 0.9240332245826721, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 102630 + }, + { + "epoch": 675.2631578947369, + "grad_norm": 1.2300150394439697, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 102640 + }, + { + "epoch": 675.328947368421, + "grad_norm": 1.2758888006210327, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 102650 + }, + { + "epoch": 675.3947368421053, + "grad_norm": 1.0356707572937012, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 102660 + }, + { + "epoch": 675.4605263157895, + "grad_norm": 0.9405995011329651, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 102670 + }, + { + "epoch": 675.5263157894736, + "grad_norm": 1.1242586374282837, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 102680 + }, + { + "epoch": 675.5921052631579, + "grad_norm": 1.0802112817764282, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 102690 + }, + { + "epoch": 675.6578947368421, + "grad_norm": 0.9207102656364441, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 102700 + }, + { + "epoch": 675.7236842105264, + "grad_norm": 0.9859259128570557, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 102710 + }, + { + "epoch": 675.7894736842105, + "grad_norm": 1.63200044631958, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 102720 + }, + { + "epoch": 675.8552631578947, + "grad_norm": 1.0292948484420776, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 102730 + }, + { + "epoch": 675.921052631579, + "grad_norm": 0.9589559435844421, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 102740 + }, + { + "epoch": 675.9868421052631, + "grad_norm": 1.2861278057098389, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 102750 + }, + { + "epoch": 676.0526315789474, + "grad_norm": 1.23167884349823, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 102760 + }, + { + "epoch": 676.1184210526316, + "grad_norm": 1.2074878215789795, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 102770 + }, + { + "epoch": 676.1842105263158, + "grad_norm": 1.1100069284439087, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 102780 + }, + { + "epoch": 676.25, + "grad_norm": 1.2543814182281494, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 102790 + }, + { + "epoch": 676.3157894736842, + "grad_norm": 1.0590333938598633, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 102800 + }, + { + "epoch": 676.3815789473684, + "grad_norm": 1.1635903120040894, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 102810 + }, + { + "epoch": 676.4473684210526, + "grad_norm": 1.191482663154602, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 102820 + }, + { + "epoch": 676.5131578947369, + "grad_norm": 1.2780073881149292, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 102830 + }, + { + "epoch": 676.578947368421, + "grad_norm": 1.0318385362625122, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 102840 + }, + { + "epoch": 676.6447368421053, + "grad_norm": 1.411617636680603, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 102850 + }, + { + "epoch": 676.7105263157895, + "grad_norm": 1.007462739944458, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 102860 + }, + { + "epoch": 676.7763157894736, + "grad_norm": 1.653554916381836, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 102870 + }, + { + "epoch": 676.8421052631579, + "grad_norm": 1.147131085395813, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 102880 + }, + { + "epoch": 676.9078947368421, + "grad_norm": 0.8459810018539429, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 102890 + }, + { + "epoch": 676.9736842105264, + "grad_norm": 1.1392914056777954, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 102900 + }, + { + "epoch": 677.0394736842105, + "grad_norm": 1.1632038354873657, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 102910 + }, + { + "epoch": 677.1052631578947, + "grad_norm": 1.4160902500152588, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 102920 + }, + { + "epoch": 677.171052631579, + "grad_norm": 1.1566230058670044, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 102930 + }, + { + "epoch": 677.2368421052631, + "grad_norm": 0.9785585403442383, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 102940 + }, + { + "epoch": 677.3026315789474, + "grad_norm": 0.8845245242118835, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 102950 + }, + { + "epoch": 677.3684210526316, + "grad_norm": 0.8199566006660461, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 102960 + }, + { + "epoch": 677.4342105263158, + "grad_norm": 0.8677225708961487, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 102970 + }, + { + "epoch": 677.5, + "grad_norm": 1.06745445728302, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 102980 + }, + { + "epoch": 677.5657894736842, + "grad_norm": 1.011222004890442, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 102990 + }, + { + "epoch": 677.6315789473684, + "grad_norm": 1.0868799686431885, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 103000 + }, + { + "epoch": 677.6973684210526, + "grad_norm": 0.8394410610198975, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 103010 + }, + { + "epoch": 677.7631578947369, + "grad_norm": 1.1427472829818726, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 103020 + }, + { + "epoch": 677.828947368421, + "grad_norm": 1.2967380285263062, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 103030 + }, + { + "epoch": 677.8947368421053, + "grad_norm": 1.249349594116211, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 103040 + }, + { + "epoch": 677.9605263157895, + "grad_norm": 1.3048033714294434, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 103050 + }, + { + "epoch": 678.0263157894736, + "grad_norm": 1.421112298965454, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 103060 + }, + { + "epoch": 678.0921052631579, + "grad_norm": 1.029354214668274, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 103070 + }, + { + "epoch": 678.1578947368421, + "grad_norm": 0.6975602507591248, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 103080 + }, + { + "epoch": 678.2236842105264, + "grad_norm": 1.1515804529190063, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 103090 + }, + { + "epoch": 678.2894736842105, + "grad_norm": 1.4265718460083008, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 103100 + }, + { + "epoch": 678.3552631578947, + "grad_norm": 1.0537846088409424, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 103110 + }, + { + "epoch": 678.421052631579, + "grad_norm": 1.3082852363586426, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 103120 + }, + { + "epoch": 678.4868421052631, + "grad_norm": 1.1465588808059692, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 103130 + }, + { + "epoch": 678.5526315789474, + "grad_norm": 1.254515290260315, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 103140 + }, + { + "epoch": 678.6184210526316, + "grad_norm": 0.8402789831161499, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 103150 + }, + { + "epoch": 678.6842105263158, + "grad_norm": 1.0215173959732056, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 103160 + }, + { + "epoch": 678.75, + "grad_norm": 1.0827964544296265, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 103170 + }, + { + "epoch": 678.8157894736842, + "grad_norm": 1.0201925039291382, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 103180 + }, + { + "epoch": 678.8815789473684, + "grad_norm": 0.9389650821685791, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 103190 + }, + { + "epoch": 678.9473684210526, + "grad_norm": 0.8603837490081787, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 103200 + }, + { + "epoch": 679.0131578947369, + "grad_norm": 1.0995327234268188, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 103210 + }, + { + "epoch": 679.078947368421, + "grad_norm": 1.0585341453552246, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 103220 + }, + { + "epoch": 679.1447368421053, + "grad_norm": 1.1244828701019287, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 103230 + }, + { + "epoch": 679.2105263157895, + "grad_norm": 1.1929583549499512, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 103240 + }, + { + "epoch": 679.2763157894736, + "grad_norm": 1.2523797750473022, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 103250 + }, + { + "epoch": 679.3421052631579, + "grad_norm": 1.0820051431655884, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 103260 + }, + { + "epoch": 679.4078947368421, + "grad_norm": 1.3185031414031982, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 103270 + }, + { + "epoch": 679.4736842105264, + "grad_norm": 1.0388109683990479, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 103280 + }, + { + "epoch": 679.5394736842105, + "grad_norm": 1.443453073501587, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 103290 + }, + { + "epoch": 679.6052631578947, + "grad_norm": 0.7869113087654114, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 103300 + }, + { + "epoch": 679.671052631579, + "grad_norm": 1.2342416048049927, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 103310 + }, + { + "epoch": 679.7368421052631, + "grad_norm": 1.1199147701263428, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 103320 + }, + { + "epoch": 679.8026315789474, + "grad_norm": 0.9589998126029968, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 103330 + }, + { + "epoch": 679.8684210526316, + "grad_norm": 1.0581178665161133, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 103340 + }, + { + "epoch": 679.9342105263158, + "grad_norm": 0.9367907643318176, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 103350 + }, + { + "epoch": 680.0, + "grad_norm": 0.9200935363769531, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 103360 + }, + { + "epoch": 680.0657894736842, + "grad_norm": 1.1545464992523193, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 103370 + }, + { + "epoch": 680.1315789473684, + "grad_norm": 0.9586318731307983, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 103380 + }, + { + "epoch": 680.1973684210526, + "grad_norm": 1.0395398139953613, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 103390 + }, + { + "epoch": 680.2631578947369, + "grad_norm": 1.193278431892395, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 103400 + }, + { + "epoch": 680.328947368421, + "grad_norm": 0.9284635186195374, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 103410 + }, + { + "epoch": 680.3947368421053, + "grad_norm": 1.0416210889816284, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 103420 + }, + { + "epoch": 680.4605263157895, + "grad_norm": 0.880230188369751, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 103430 + }, + { + "epoch": 680.5263157894736, + "grad_norm": 1.139072299003601, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 103440 + }, + { + "epoch": 680.5921052631579, + "grad_norm": 0.859987735748291, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 103450 + }, + { + "epoch": 680.6578947368421, + "grad_norm": 1.0812146663665771, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 103460 + }, + { + "epoch": 680.7236842105264, + "grad_norm": 0.973387598991394, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 103470 + }, + { + "epoch": 680.7894736842105, + "grad_norm": 1.28573739528656, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 103480 + }, + { + "epoch": 680.8552631578947, + "grad_norm": 1.2095247507095337, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 103490 + }, + { + "epoch": 680.921052631579, + "grad_norm": 1.1945544481277466, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 103500 + }, + { + "epoch": 680.9868421052631, + "grad_norm": 1.0555740594863892, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 103510 + }, + { + "epoch": 681.0526315789474, + "grad_norm": 1.0830953121185303, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 103520 + }, + { + "epoch": 681.1184210526316, + "grad_norm": 1.1533199548721313, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 103530 + }, + { + "epoch": 681.1842105263158, + "grad_norm": 1.2779477834701538, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 103540 + }, + { + "epoch": 681.25, + "grad_norm": 1.3541412353515625, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 103550 + }, + { + "epoch": 681.3157894736842, + "grad_norm": 1.2908533811569214, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 103560 + }, + { + "epoch": 681.3815789473684, + "grad_norm": 1.2065902948379517, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 103570 + }, + { + "epoch": 681.4473684210526, + "grad_norm": 0.755403995513916, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 103580 + }, + { + "epoch": 681.5131578947369, + "grad_norm": 1.087852954864502, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 103590 + }, + { + "epoch": 681.578947368421, + "grad_norm": 0.9179571270942688, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 103600 + }, + { + "epoch": 681.6447368421053, + "grad_norm": 1.2405117750167847, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 103610 + }, + { + "epoch": 681.7105263157895, + "grad_norm": 0.9832314252853394, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 103620 + }, + { + "epoch": 681.7763157894736, + "grad_norm": 1.0479704141616821, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 103630 + }, + { + "epoch": 681.8421052631579, + "grad_norm": 1.2243036031723022, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 103640 + }, + { + "epoch": 681.9078947368421, + "grad_norm": 0.8835751414299011, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 103650 + }, + { + "epoch": 681.9736842105264, + "grad_norm": 0.982793927192688, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 103660 + }, + { + "epoch": 682.0394736842105, + "grad_norm": 1.2806956768035889, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 103670 + }, + { + "epoch": 682.1052631578947, + "grad_norm": 1.2741878032684326, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 103680 + }, + { + "epoch": 682.171052631579, + "grad_norm": 0.92679762840271, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 103690 + }, + { + "epoch": 682.2368421052631, + "grad_norm": 1.367266058921814, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 103700 + }, + { + "epoch": 682.3026315789474, + "grad_norm": 0.7617621421813965, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 103710 + }, + { + "epoch": 682.3684210526316, + "grad_norm": 0.740490734577179, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 103720 + }, + { + "epoch": 682.4342105263158, + "grad_norm": 1.2656317949295044, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 103730 + }, + { + "epoch": 682.5, + "grad_norm": 1.286602258682251, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 103740 + }, + { + "epoch": 682.5657894736842, + "grad_norm": 1.0341092348098755, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 103750 + }, + { + "epoch": 682.6315789473684, + "grad_norm": 1.1948630809783936, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 103760 + }, + { + "epoch": 682.6973684210526, + "grad_norm": 1.1949267387390137, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 103770 + }, + { + "epoch": 682.7631578947369, + "grad_norm": 1.2278507947921753, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 103780 + }, + { + "epoch": 682.828947368421, + "grad_norm": 1.1813137531280518, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 103790 + }, + { + "epoch": 682.8947368421053, + "grad_norm": 1.0005921125411987, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 103800 + }, + { + "epoch": 682.9605263157895, + "grad_norm": 0.7229200005531311, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 103810 + }, + { + "epoch": 683.0263157894736, + "grad_norm": 0.7326030135154724, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 103820 + }, + { + "epoch": 683.0921052631579, + "grad_norm": 0.5357984900474548, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 103830 + }, + { + "epoch": 683.1578947368421, + "grad_norm": 0.856106162071228, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 103840 + }, + { + "epoch": 683.2236842105264, + "grad_norm": 0.9525607228279114, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 103850 + }, + { + "epoch": 683.2894736842105, + "grad_norm": 0.6489855647087097, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 103860 + }, + { + "epoch": 683.3552631578947, + "grad_norm": 0.7851767539978027, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 103870 + }, + { + "epoch": 683.421052631579, + "grad_norm": 1.2372000217437744, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 103880 + }, + { + "epoch": 683.4868421052631, + "grad_norm": 1.1379144191741943, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 103890 + }, + { + "epoch": 683.5526315789474, + "grad_norm": 1.1280021667480469, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 103900 + }, + { + "epoch": 683.6184210526316, + "grad_norm": 1.049955129623413, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 103910 + }, + { + "epoch": 683.6842105263158, + "grad_norm": 1.1678504943847656, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 103920 + }, + { + "epoch": 683.75, + "grad_norm": 1.31575345993042, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 103930 + }, + { + "epoch": 683.8157894736842, + "grad_norm": 1.1992285251617432, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 103940 + }, + { + "epoch": 683.8815789473684, + "grad_norm": 1.1740840673446655, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 103950 + }, + { + "epoch": 683.9473684210526, + "grad_norm": 1.3050742149353027, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 103960 + }, + { + "epoch": 684.0131578947369, + "grad_norm": 1.0441380739212036, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 103970 + }, + { + "epoch": 684.078947368421, + "grad_norm": 0.8772562146186829, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 103980 + }, + { + "epoch": 684.1447368421053, + "grad_norm": 1.1009024381637573, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 103990 + }, + { + "epoch": 684.2105263157895, + "grad_norm": 1.1562106609344482, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 104000 + }, + { + "epoch": 684.2763157894736, + "grad_norm": 1.0368518829345703, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 104010 + }, + { + "epoch": 684.3421052631579, + "grad_norm": 0.992682933807373, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 104020 + }, + { + "epoch": 684.4078947368421, + "grad_norm": 0.7431752681732178, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 104030 + }, + { + "epoch": 684.4736842105264, + "grad_norm": 1.2687865495681763, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 104040 + }, + { + "epoch": 684.5394736842105, + "grad_norm": 1.1064850091934204, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 104050 + }, + { + "epoch": 684.6052631578947, + "grad_norm": 1.0989872217178345, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 104060 + }, + { + "epoch": 684.671052631579, + "grad_norm": 0.931109607219696, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 104070 + }, + { + "epoch": 684.7368421052631, + "grad_norm": 1.1777174472808838, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 104080 + }, + { + "epoch": 684.8026315789474, + "grad_norm": 1.218532919883728, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 104090 + }, + { + "epoch": 684.8684210526316, + "grad_norm": 1.0839264392852783, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 104100 + }, + { + "epoch": 684.9342105263158, + "grad_norm": 1.5299314260482788, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 104110 + }, + { + "epoch": 685.0, + "grad_norm": 0.85472172498703, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 104120 + }, + { + "epoch": 685.0657894736842, + "grad_norm": 1.038342833518982, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 104130 + }, + { + "epoch": 685.1315789473684, + "grad_norm": 1.3401973247528076, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 104140 + }, + { + "epoch": 685.1973684210526, + "grad_norm": 1.3317383527755737, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 104150 + }, + { + "epoch": 685.2631578947369, + "grad_norm": 1.0047595500946045, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 104160 + }, + { + "epoch": 685.328947368421, + "grad_norm": 1.0131652355194092, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 104170 + }, + { + "epoch": 685.3947368421053, + "grad_norm": 0.9121875166893005, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 104180 + }, + { + "epoch": 685.4605263157895, + "grad_norm": 1.079406976699829, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 104190 + }, + { + "epoch": 685.5263157894736, + "grad_norm": 1.008108139038086, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 104200 + }, + { + "epoch": 685.5921052631579, + "grad_norm": 0.8241605758666992, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 104210 + }, + { + "epoch": 685.6578947368421, + "grad_norm": 0.8410286903381348, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 104220 + }, + { + "epoch": 685.7236842105264, + "grad_norm": 0.8288211226463318, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 104230 + }, + { + "epoch": 685.7894736842105, + "grad_norm": 0.8989503383636475, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 104240 + }, + { + "epoch": 685.8552631578947, + "grad_norm": 1.1565848588943481, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 104250 + }, + { + "epoch": 685.921052631579, + "grad_norm": 1.376077651977539, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 104260 + }, + { + "epoch": 685.9868421052631, + "grad_norm": 0.8136918544769287, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 104270 + }, + { + "epoch": 686.0526315789474, + "grad_norm": 1.2481287717819214, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 104280 + }, + { + "epoch": 686.1184210526316, + "grad_norm": 1.1149044036865234, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 104290 + }, + { + "epoch": 686.1842105263158, + "grad_norm": 1.1859511137008667, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 104300 + }, + { + "epoch": 686.25, + "grad_norm": 1.0750795602798462, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 104310 + }, + { + "epoch": 686.3157894736842, + "grad_norm": 1.4063059091567993, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 104320 + }, + { + "epoch": 686.3815789473684, + "grad_norm": 1.4898396730422974, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 104330 + }, + { + "epoch": 686.4473684210526, + "grad_norm": 1.1081674098968506, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 104340 + }, + { + "epoch": 686.5131578947369, + "grad_norm": 1.1555651426315308, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 104350 + }, + { + "epoch": 686.578947368421, + "grad_norm": 1.2798792123794556, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 104360 + }, + { + "epoch": 686.6447368421053, + "grad_norm": 0.9958081841468811, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 104370 + }, + { + "epoch": 686.7105263157895, + "grad_norm": 1.3393999338150024, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 104380 + }, + { + "epoch": 686.7763157894736, + "grad_norm": 1.0677820444107056, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 104390 + }, + { + "epoch": 686.8421052631579, + "grad_norm": 1.1201198101043701, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 104400 + }, + { + "epoch": 686.9078947368421, + "grad_norm": 0.9454928040504456, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 104410 + }, + { + "epoch": 686.9736842105264, + "grad_norm": 1.056349515914917, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 104420 + }, + { + "epoch": 687.0394736842105, + "grad_norm": 0.84330153465271, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 104430 + }, + { + "epoch": 687.1052631578947, + "grad_norm": 0.9636723399162292, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 104440 + }, + { + "epoch": 687.171052631579, + "grad_norm": 1.4594964981079102, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 104450 + }, + { + "epoch": 687.2368421052631, + "grad_norm": 0.8333598375320435, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 104460 + }, + { + "epoch": 687.3026315789474, + "grad_norm": 0.9445642828941345, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 104470 + }, + { + "epoch": 687.3684210526316, + "grad_norm": 1.2898783683776855, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 104480 + }, + { + "epoch": 687.4342105263158, + "grad_norm": 1.1669278144836426, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 104490 + }, + { + "epoch": 687.5, + "grad_norm": 0.9558044075965881, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 104500 + }, + { + "epoch": 687.5657894736842, + "grad_norm": 1.2216863632202148, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 104510 + }, + { + "epoch": 687.6315789473684, + "grad_norm": 1.3271684646606445, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 104520 + }, + { + "epoch": 687.6973684210526, + "grad_norm": 1.0442551374435425, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 104530 + }, + { + "epoch": 687.7631578947369, + "grad_norm": 0.6984831094741821, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 104540 + }, + { + "epoch": 687.828947368421, + "grad_norm": 1.0056344270706177, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 104550 + }, + { + "epoch": 687.8947368421053, + "grad_norm": 1.0924835205078125, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 104560 + }, + { + "epoch": 687.9605263157895, + "grad_norm": 1.1241261959075928, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 104570 + }, + { + "epoch": 688.0263157894736, + "grad_norm": 0.9026856422424316, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 104580 + }, + { + "epoch": 688.0921052631579, + "grad_norm": 0.7907440662384033, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 104590 + }, + { + "epoch": 688.1578947368421, + "grad_norm": 1.012350082397461, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 104600 + }, + { + "epoch": 688.2236842105264, + "grad_norm": 1.0234317779541016, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 104610 + }, + { + "epoch": 688.2894736842105, + "grad_norm": 0.9007984399795532, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 104620 + }, + { + "epoch": 688.3552631578947, + "grad_norm": 1.339667797088623, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 104630 + }, + { + "epoch": 688.421052631579, + "grad_norm": 1.140592336654663, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 104640 + }, + { + "epoch": 688.4868421052631, + "grad_norm": 1.2752139568328857, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 104650 + }, + { + "epoch": 688.5526315789474, + "grad_norm": 1.20328950881958, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 104660 + }, + { + "epoch": 688.6184210526316, + "grad_norm": 1.0453274250030518, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 104670 + }, + { + "epoch": 688.6842105263158, + "grad_norm": 1.0228806734085083, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 104680 + }, + { + "epoch": 688.75, + "grad_norm": 1.1263914108276367, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 104690 + }, + { + "epoch": 688.8157894736842, + "grad_norm": 1.1457710266113281, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 104700 + }, + { + "epoch": 688.8815789473684, + "grad_norm": 1.1575244665145874, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 104710 + }, + { + "epoch": 688.9473684210526, + "grad_norm": 1.1374318599700928, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 104720 + }, + { + "epoch": 689.0131578947369, + "grad_norm": 1.1628844738006592, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 104730 + }, + { + "epoch": 689.078947368421, + "grad_norm": 1.267177939414978, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 104740 + }, + { + "epoch": 689.1447368421053, + "grad_norm": 1.0953381061553955, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 104750 + }, + { + "epoch": 689.2105263157895, + "grad_norm": 0.6190617680549622, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 104760 + }, + { + "epoch": 689.2763157894736, + "grad_norm": 1.0876604318618774, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 104770 + }, + { + "epoch": 689.3421052631579, + "grad_norm": 1.1195549964904785, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 104780 + }, + { + "epoch": 689.4078947368421, + "grad_norm": 1.1582069396972656, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 104790 + }, + { + "epoch": 689.4736842105264, + "grad_norm": 1.3708412647247314, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 104800 + }, + { + "epoch": 689.5394736842105, + "grad_norm": 1.0523473024368286, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 104810 + }, + { + "epoch": 689.6052631578947, + "grad_norm": 1.0299116373062134, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 104820 + }, + { + "epoch": 689.671052631579, + "grad_norm": 1.1898280382156372, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 104830 + }, + { + "epoch": 689.7368421052631, + "grad_norm": 1.1457937955856323, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 104840 + }, + { + "epoch": 689.8026315789474, + "grad_norm": 1.2077155113220215, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 104850 + }, + { + "epoch": 689.8684210526316, + "grad_norm": 1.064317226409912, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 104860 + }, + { + "epoch": 689.9342105263158, + "grad_norm": 1.3832712173461914, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 104870 + }, + { + "epoch": 690.0, + "grad_norm": 1.2490742206573486, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 104880 + }, + { + "epoch": 690.0657894736842, + "grad_norm": 0.9746840000152588, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 104890 + }, + { + "epoch": 690.1315789473684, + "grad_norm": 1.291522741317749, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 104900 + }, + { + "epoch": 690.1973684210526, + "grad_norm": 1.422899603843689, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 104910 + }, + { + "epoch": 690.2631578947369, + "grad_norm": 1.692259669303894, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 104920 + }, + { + "epoch": 690.328947368421, + "grad_norm": 1.206990361213684, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 104930 + }, + { + "epoch": 690.3947368421053, + "grad_norm": 1.1877973079681396, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 104940 + }, + { + "epoch": 690.4605263157895, + "grad_norm": 0.9999209642410278, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 104950 + }, + { + "epoch": 690.5263157894736, + "grad_norm": 0.9556674361228943, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 104960 + }, + { + "epoch": 690.5921052631579, + "grad_norm": 1.3551650047302246, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 104970 + }, + { + "epoch": 690.6578947368421, + "grad_norm": 0.9526252746582031, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 104980 + }, + { + "epoch": 690.7236842105264, + "grad_norm": 1.16046941280365, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 104990 + }, + { + "epoch": 690.7894736842105, + "grad_norm": 1.410556674003601, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 105000 + }, + { + "epoch": 690.8552631578947, + "grad_norm": 1.1336820125579834, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 105010 + }, + { + "epoch": 690.921052631579, + "grad_norm": 1.0210304260253906, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 105020 + }, + { + "epoch": 690.9868421052631, + "grad_norm": 0.7669985890388489, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 105030 + }, + { + "epoch": 691.0526315789474, + "grad_norm": 1.2497632503509521, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 105040 + }, + { + "epoch": 691.1184210526316, + "grad_norm": 0.9652649760246277, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 105050 + }, + { + "epoch": 691.1842105263158, + "grad_norm": 1.0892058610916138, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 105060 + }, + { + "epoch": 691.25, + "grad_norm": 1.201240062713623, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 105070 + }, + { + "epoch": 691.3157894736842, + "grad_norm": 1.2225768566131592, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 105080 + }, + { + "epoch": 691.3815789473684, + "grad_norm": 1.4184614419937134, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 105090 + }, + { + "epoch": 691.4473684210526, + "grad_norm": 0.9505151510238647, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 105100 + }, + { + "epoch": 691.5131578947369, + "grad_norm": 0.7423208355903625, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 105110 + }, + { + "epoch": 691.578947368421, + "grad_norm": 1.2950553894042969, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 105120 + }, + { + "epoch": 691.6447368421053, + "grad_norm": 1.050185203552246, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 105130 + }, + { + "epoch": 691.7105263157895, + "grad_norm": 1.229802131652832, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 105140 + }, + { + "epoch": 691.7763157894736, + "grad_norm": 1.34513258934021, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 105150 + }, + { + "epoch": 691.8421052631579, + "grad_norm": 0.9867190718650818, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 105160 + }, + { + "epoch": 691.9078947368421, + "grad_norm": 1.0777831077575684, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 105170 + }, + { + "epoch": 691.9736842105264, + "grad_norm": 0.8535577058792114, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 105180 + }, + { + "epoch": 692.0394736842105, + "grad_norm": 1.4894434213638306, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 105190 + }, + { + "epoch": 692.1052631578947, + "grad_norm": 1.5268137454986572, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 105200 + }, + { + "epoch": 692.171052631579, + "grad_norm": 0.9532687067985535, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 105210 + }, + { + "epoch": 692.2368421052631, + "grad_norm": 1.5903687477111816, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 105220 + }, + { + "epoch": 692.3026315789474, + "grad_norm": 1.4972559213638306, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 105230 + }, + { + "epoch": 692.3684210526316, + "grad_norm": 1.5704691410064697, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 105240 + }, + { + "epoch": 692.4342105263158, + "grad_norm": 1.3427348136901855, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 105250 + }, + { + "epoch": 692.5, + "grad_norm": 1.3334673643112183, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 105260 + }, + { + "epoch": 692.5657894736842, + "grad_norm": 1.3983582258224487, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 105270 + }, + { + "epoch": 692.6315789473684, + "grad_norm": 1.0505462884902954, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 105280 + }, + { + "epoch": 692.6973684210526, + "grad_norm": 1.273409128189087, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 105290 + }, + { + "epoch": 692.7631578947369, + "grad_norm": 1.1249748468399048, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 105300 + }, + { + "epoch": 692.828947368421, + "grad_norm": 1.0098894834518433, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 105310 + }, + { + "epoch": 692.8947368421053, + "grad_norm": 0.9849072694778442, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 105320 + }, + { + "epoch": 692.9605263157895, + "grad_norm": 1.1056095361709595, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 105330 + }, + { + "epoch": 693.0263157894736, + "grad_norm": 1.139883041381836, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 105340 + }, + { + "epoch": 693.0921052631579, + "grad_norm": 1.1363656520843506, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 105350 + }, + { + "epoch": 693.1578947368421, + "grad_norm": 1.1857936382293701, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 105360 + }, + { + "epoch": 693.2236842105264, + "grad_norm": 0.88065105676651, + "learning_rate": 0.0001, + "loss": 0.015, + "step": 105370 + }, + { + "epoch": 693.2894736842105, + "grad_norm": 0.8013133406639099, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 105380 + }, + { + "epoch": 693.3552631578947, + "grad_norm": 1.1217681169509888, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 105390 + }, + { + "epoch": 693.421052631579, + "grad_norm": 1.2208178043365479, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 105400 + }, + { + "epoch": 693.4868421052631, + "grad_norm": 1.139967918395996, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 105410 + }, + { + "epoch": 693.5526315789474, + "grad_norm": 1.063003659248352, + "learning_rate": 0.0001, + "loss": 0.0158, + "step": 105420 + }, + { + "epoch": 693.6184210526316, + "grad_norm": 1.4261283874511719, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 105430 + }, + { + "epoch": 693.6842105263158, + "grad_norm": 1.0778611898422241, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 105440 + }, + { + "epoch": 693.75, + "grad_norm": 1.0890920162200928, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 105450 + }, + { + "epoch": 693.8157894736842, + "grad_norm": 1.4902729988098145, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 105460 + }, + { + "epoch": 693.8815789473684, + "grad_norm": 1.185309886932373, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 105470 + }, + { + "epoch": 693.9473684210526, + "grad_norm": 1.2872449159622192, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 105480 + }, + { + "epoch": 694.0131578947369, + "grad_norm": 1.1898432970046997, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 105490 + }, + { + "epoch": 694.078947368421, + "grad_norm": 1.2668589353561401, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 105500 + }, + { + "epoch": 694.1447368421053, + "grad_norm": 1.3832660913467407, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 105510 + }, + { + "epoch": 694.2105263157895, + "grad_norm": 1.297754168510437, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 105520 + }, + { + "epoch": 694.2763157894736, + "grad_norm": 1.1962448358535767, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 105530 + }, + { + "epoch": 694.3421052631579, + "grad_norm": 1.3533624410629272, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 105540 + }, + { + "epoch": 694.4078947368421, + "grad_norm": 1.2034482955932617, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 105550 + }, + { + "epoch": 694.4736842105264, + "grad_norm": 1.1491233110427856, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 105560 + }, + { + "epoch": 694.5394736842105, + "grad_norm": 1.1336445808410645, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 105570 + }, + { + "epoch": 694.6052631578947, + "grad_norm": 1.028238296508789, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 105580 + }, + { + "epoch": 694.671052631579, + "grad_norm": 1.0678424835205078, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 105590 + }, + { + "epoch": 694.7368421052631, + "grad_norm": 1.087962031364441, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 105600 + }, + { + "epoch": 694.8026315789474, + "grad_norm": 1.0385785102844238, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 105610 + }, + { + "epoch": 694.8684210526316, + "grad_norm": 1.196866512298584, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 105620 + }, + { + "epoch": 694.9342105263158, + "grad_norm": 1.0130568742752075, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 105630 + }, + { + "epoch": 695.0, + "grad_norm": 0.7995595932006836, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 105640 + }, + { + "epoch": 695.0657894736842, + "grad_norm": 1.008529782295227, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 105650 + }, + { + "epoch": 695.1315789473684, + "grad_norm": 0.806501030921936, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 105660 + }, + { + "epoch": 695.1973684210526, + "grad_norm": 0.8974860906600952, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 105670 + }, + { + "epoch": 695.2631578947369, + "grad_norm": 1.2004984617233276, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 105680 + }, + { + "epoch": 695.328947368421, + "grad_norm": 1.1275862455368042, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 105690 + }, + { + "epoch": 695.3947368421053, + "grad_norm": 0.9173672199249268, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 105700 + }, + { + "epoch": 695.4605263157895, + "grad_norm": 0.8183532953262329, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 105710 + }, + { + "epoch": 695.5263157894736, + "grad_norm": 0.8390060663223267, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 105720 + }, + { + "epoch": 695.5921052631579, + "grad_norm": 1.1580220460891724, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 105730 + }, + { + "epoch": 695.6578947368421, + "grad_norm": 0.9561638832092285, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 105740 + }, + { + "epoch": 695.7236842105264, + "grad_norm": 1.305795431137085, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 105750 + }, + { + "epoch": 695.7894736842105, + "grad_norm": 1.044447422027588, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 105760 + }, + { + "epoch": 695.8552631578947, + "grad_norm": 1.3991165161132812, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 105770 + }, + { + "epoch": 695.921052631579, + "grad_norm": 0.8383837342262268, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 105780 + }, + { + "epoch": 695.9868421052631, + "grad_norm": 0.9691088795661926, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 105790 + }, + { + "epoch": 696.0526315789474, + "grad_norm": 1.0391768217086792, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 105800 + }, + { + "epoch": 696.1184210526316, + "grad_norm": 0.8894152641296387, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 105810 + }, + { + "epoch": 696.1842105263158, + "grad_norm": 0.7640244364738464, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 105820 + }, + { + "epoch": 696.25, + "grad_norm": 0.6968852877616882, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 105830 + }, + { + "epoch": 696.3157894736842, + "grad_norm": 0.7941989302635193, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 105840 + }, + { + "epoch": 696.3815789473684, + "grad_norm": 1.2468845844268799, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 105850 + }, + { + "epoch": 696.4473684210526, + "grad_norm": 1.1141244173049927, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 105860 + }, + { + "epoch": 696.5131578947369, + "grad_norm": 0.9799408912658691, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 105870 + }, + { + "epoch": 696.578947368421, + "grad_norm": 0.7694076299667358, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 105880 + }, + { + "epoch": 696.6447368421053, + "grad_norm": 1.0681445598602295, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 105890 + }, + { + "epoch": 696.7105263157895, + "grad_norm": 0.7481711506843567, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 105900 + }, + { + "epoch": 696.7763157894736, + "grad_norm": 0.9661341309547424, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 105910 + }, + { + "epoch": 696.8421052631579, + "grad_norm": 1.1508374214172363, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 105920 + }, + { + "epoch": 696.9078947368421, + "grad_norm": 1.2137349843978882, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 105930 + }, + { + "epoch": 696.9736842105264, + "grad_norm": 1.2096316814422607, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 105940 + }, + { + "epoch": 697.0394736842105, + "grad_norm": 1.1053109169006348, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 105950 + }, + { + "epoch": 697.1052631578947, + "grad_norm": 1.0717750787734985, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 105960 + }, + { + "epoch": 697.171052631579, + "grad_norm": 1.2490699291229248, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 105970 + }, + { + "epoch": 697.2368421052631, + "grad_norm": 1.2428292036056519, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 105980 + }, + { + "epoch": 697.3026315789474, + "grad_norm": 1.023769736289978, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 105990 + }, + { + "epoch": 697.3684210526316, + "grad_norm": 1.1357252597808838, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 106000 + }, + { + "epoch": 697.4342105263158, + "grad_norm": 1.178993582725525, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 106010 + }, + { + "epoch": 697.5, + "grad_norm": 0.642010509967804, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 106020 + }, + { + "epoch": 697.5657894736842, + "grad_norm": 0.8353654742240906, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 106030 + }, + { + "epoch": 697.6315789473684, + "grad_norm": 0.9867295622825623, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 106040 + }, + { + "epoch": 697.6973684210526, + "grad_norm": 1.1230249404907227, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 106050 + }, + { + "epoch": 697.7631578947369, + "grad_norm": 1.3012531995773315, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 106060 + }, + { + "epoch": 697.828947368421, + "grad_norm": 1.0835860967636108, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 106070 + }, + { + "epoch": 697.8947368421053, + "grad_norm": 1.1090937852859497, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 106080 + }, + { + "epoch": 697.9605263157895, + "grad_norm": 1.16708505153656, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 106090 + }, + { + "epoch": 698.0263157894736, + "grad_norm": 1.0519673824310303, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 106100 + }, + { + "epoch": 698.0921052631579, + "grad_norm": 1.2105119228363037, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 106110 + }, + { + "epoch": 698.1578947368421, + "grad_norm": 1.2713031768798828, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 106120 + }, + { + "epoch": 698.2236842105264, + "grad_norm": 0.7641732692718506, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 106130 + }, + { + "epoch": 698.2894736842105, + "grad_norm": 1.30423903465271, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 106140 + }, + { + "epoch": 698.3552631578947, + "grad_norm": 1.076094388961792, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 106150 + }, + { + "epoch": 698.421052631579, + "grad_norm": 1.1429264545440674, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 106160 + }, + { + "epoch": 698.4868421052631, + "grad_norm": 1.1242903470993042, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 106170 + }, + { + "epoch": 698.5526315789474, + "grad_norm": 0.9787787199020386, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 106180 + }, + { + "epoch": 698.6184210526316, + "grad_norm": 1.1341170072555542, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 106190 + }, + { + "epoch": 698.6842105263158, + "grad_norm": 0.9176144003868103, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 106200 + }, + { + "epoch": 698.75, + "grad_norm": 1.141721487045288, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 106210 + }, + { + "epoch": 698.8157894736842, + "grad_norm": 0.9301637411117554, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 106220 + }, + { + "epoch": 698.8815789473684, + "grad_norm": 1.2631868124008179, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 106230 + }, + { + "epoch": 698.9473684210526, + "grad_norm": 0.9568750262260437, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 106240 + }, + { + "epoch": 699.0131578947369, + "grad_norm": 1.0320165157318115, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 106250 + }, + { + "epoch": 699.078947368421, + "grad_norm": 0.9375261664390564, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 106260 + }, + { + "epoch": 699.1447368421053, + "grad_norm": 1.236198902130127, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 106270 + }, + { + "epoch": 699.2105263157895, + "grad_norm": 0.9655860662460327, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 106280 + }, + { + "epoch": 699.2763157894736, + "grad_norm": 0.9262257814407349, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 106290 + }, + { + "epoch": 699.3421052631579, + "grad_norm": 1.303225040435791, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 106300 + }, + { + "epoch": 699.4078947368421, + "grad_norm": 1.1554994583129883, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 106310 + }, + { + "epoch": 699.4736842105264, + "grad_norm": 1.3132860660552979, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 106320 + }, + { + "epoch": 699.5394736842105, + "grad_norm": 0.9363493919372559, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 106330 + }, + { + "epoch": 699.6052631578947, + "grad_norm": 1.1558855772018433, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 106340 + }, + { + "epoch": 699.671052631579, + "grad_norm": 1.206477403640747, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 106350 + }, + { + "epoch": 699.7368421052631, + "grad_norm": 1.2269618511199951, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 106360 + }, + { + "epoch": 699.8026315789474, + "grad_norm": 1.058182716369629, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 106370 + }, + { + "epoch": 699.8684210526316, + "grad_norm": 1.0236554145812988, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 106380 + }, + { + "epoch": 699.9342105263158, + "grad_norm": 1.0067118406295776, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 106390 + }, + { + "epoch": 700.0, + "grad_norm": 1.0427459478378296, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 106400 + }, + { + "epoch": 700.0657894736842, + "grad_norm": 1.3142248392105103, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 106410 + }, + { + "epoch": 700.1315789473684, + "grad_norm": 0.9090234041213989, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 106420 + }, + { + "epoch": 700.1973684210526, + "grad_norm": 0.9273867607116699, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 106430 + }, + { + "epoch": 700.2631578947369, + "grad_norm": 0.958476722240448, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 106440 + }, + { + "epoch": 700.328947368421, + "grad_norm": 1.0677157640457153, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 106450 + }, + { + "epoch": 700.3947368421053, + "grad_norm": 1.2199952602386475, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 106460 + }, + { + "epoch": 700.4605263157895, + "grad_norm": 1.244336724281311, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 106470 + }, + { + "epoch": 700.5263157894736, + "grad_norm": 1.1296188831329346, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 106480 + }, + { + "epoch": 700.5921052631579, + "grad_norm": 1.4885672330856323, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 106490 + }, + { + "epoch": 700.6578947368421, + "grad_norm": 1.0214803218841553, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 106500 + }, + { + "epoch": 700.7236842105264, + "grad_norm": 1.2202999591827393, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 106510 + }, + { + "epoch": 700.7894736842105, + "grad_norm": 0.9407332539558411, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 106520 + }, + { + "epoch": 700.8552631578947, + "grad_norm": 0.8566510677337646, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 106530 + }, + { + "epoch": 700.921052631579, + "grad_norm": 0.9324583411216736, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 106540 + }, + { + "epoch": 700.9868421052631, + "grad_norm": 1.044553279876709, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 106550 + }, + { + "epoch": 701.0526315789474, + "grad_norm": 1.0946259498596191, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 106560 + }, + { + "epoch": 701.1184210526316, + "grad_norm": 1.0644162893295288, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 106570 + }, + { + "epoch": 701.1842105263158, + "grad_norm": 1.1168071031570435, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 106580 + }, + { + "epoch": 701.25, + "grad_norm": 1.2700804471969604, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 106590 + }, + { + "epoch": 701.3157894736842, + "grad_norm": 1.0045099258422852, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 106600 + }, + { + "epoch": 701.3815789473684, + "grad_norm": 1.2132641077041626, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 106610 + }, + { + "epoch": 701.4473684210526, + "grad_norm": 0.9734482169151306, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 106620 + }, + { + "epoch": 701.5131578947369, + "grad_norm": 0.9646698832511902, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 106630 + }, + { + "epoch": 701.578947368421, + "grad_norm": 1.0157219171524048, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 106640 + }, + { + "epoch": 701.6447368421053, + "grad_norm": 0.8552806973457336, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 106650 + }, + { + "epoch": 701.7105263157895, + "grad_norm": 0.9953233003616333, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 106660 + }, + { + "epoch": 701.7763157894736, + "grad_norm": 1.2354696989059448, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 106670 + }, + { + "epoch": 701.8421052631579, + "grad_norm": 1.019411563873291, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 106680 + }, + { + "epoch": 701.9078947368421, + "grad_norm": 1.2094115018844604, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 106690 + }, + { + "epoch": 701.9736842105264, + "grad_norm": 1.2663511037826538, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 106700 + }, + { + "epoch": 702.0394736842105, + "grad_norm": 1.2119556665420532, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 106710 + }, + { + "epoch": 702.1052631578947, + "grad_norm": 1.0109612941741943, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 106720 + }, + { + "epoch": 702.171052631579, + "grad_norm": 1.126444697380066, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 106730 + }, + { + "epoch": 702.2368421052631, + "grad_norm": 1.0462061166763306, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 106740 + }, + { + "epoch": 702.3026315789474, + "grad_norm": 1.3411667346954346, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 106750 + }, + { + "epoch": 702.3684210526316, + "grad_norm": 1.215775728225708, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 106760 + }, + { + "epoch": 702.4342105263158, + "grad_norm": 1.4780688285827637, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 106770 + }, + { + "epoch": 702.5, + "grad_norm": 1.3486248254776, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 106780 + }, + { + "epoch": 702.5657894736842, + "grad_norm": 1.1051580905914307, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 106790 + }, + { + "epoch": 702.6315789473684, + "grad_norm": 1.1159751415252686, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 106800 + }, + { + "epoch": 702.6973684210526, + "grad_norm": 0.8592166900634766, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 106810 + }, + { + "epoch": 702.7631578947369, + "grad_norm": 1.0782039165496826, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 106820 + }, + { + "epoch": 702.828947368421, + "grad_norm": 1.10999596118927, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 106830 + }, + { + "epoch": 702.8947368421053, + "grad_norm": 1.2560298442840576, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 106840 + }, + { + "epoch": 702.9605263157895, + "grad_norm": 1.123194694519043, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 106850 + }, + { + "epoch": 703.0263157894736, + "grad_norm": 1.0512282848358154, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 106860 + }, + { + "epoch": 703.0921052631579, + "grad_norm": 1.1006485223770142, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 106870 + }, + { + "epoch": 703.1578947368421, + "grad_norm": 0.8787376284599304, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 106880 + }, + { + "epoch": 703.2236842105264, + "grad_norm": 1.178478479385376, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 106890 + }, + { + "epoch": 703.2894736842105, + "grad_norm": 1.2921770811080933, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 106900 + }, + { + "epoch": 703.3552631578947, + "grad_norm": 1.2834266424179077, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 106910 + }, + { + "epoch": 703.421052631579, + "grad_norm": 1.2144252061843872, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 106920 + }, + { + "epoch": 703.4868421052631, + "grad_norm": 1.4216984510421753, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 106930 + }, + { + "epoch": 703.5526315789474, + "grad_norm": 1.094687581062317, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 106940 + }, + { + "epoch": 703.6184210526316, + "grad_norm": 0.8012235164642334, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 106950 + }, + { + "epoch": 703.6842105263158, + "grad_norm": 1.1857279539108276, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 106960 + }, + { + "epoch": 703.75, + "grad_norm": 1.2665016651153564, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 106970 + }, + { + "epoch": 703.8157894736842, + "grad_norm": 0.9577199220657349, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 106980 + }, + { + "epoch": 703.8815789473684, + "grad_norm": 0.9756731986999512, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 106990 + }, + { + "epoch": 703.9473684210526, + "grad_norm": 1.3572697639465332, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 107000 + }, + { + "epoch": 704.0131578947369, + "grad_norm": 0.9276780486106873, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 107010 + }, + { + "epoch": 704.078947368421, + "grad_norm": 0.9649149775505066, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 107020 + }, + { + "epoch": 704.1447368421053, + "grad_norm": 0.9579582810401917, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 107030 + }, + { + "epoch": 704.2105263157895, + "grad_norm": 1.0553480386734009, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 107040 + }, + { + "epoch": 704.2763157894736, + "grad_norm": 1.0931501388549805, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 107050 + }, + { + "epoch": 704.3421052631579, + "grad_norm": 1.0343230962753296, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 107060 + }, + { + "epoch": 704.4078947368421, + "grad_norm": 1.1571636199951172, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 107070 + }, + { + "epoch": 704.4736842105264, + "grad_norm": 1.0774343013763428, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 107080 + }, + { + "epoch": 704.5394736842105, + "grad_norm": 0.9952540397644043, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 107090 + }, + { + "epoch": 704.6052631578947, + "grad_norm": 1.2612237930297852, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 107100 + }, + { + "epoch": 704.671052631579, + "grad_norm": 1.1099755764007568, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 107110 + }, + { + "epoch": 704.7368421052631, + "grad_norm": 1.1933823823928833, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 107120 + }, + { + "epoch": 704.8026315789474, + "grad_norm": 0.8420557379722595, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 107130 + }, + { + "epoch": 704.8684210526316, + "grad_norm": 0.7003465294837952, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 107140 + }, + { + "epoch": 704.9342105263158, + "grad_norm": 1.202143907546997, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 107150 + }, + { + "epoch": 705.0, + "grad_norm": 0.8362906575202942, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 107160 + }, + { + "epoch": 705.0657894736842, + "grad_norm": 1.445649266242981, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 107170 + }, + { + "epoch": 705.1315789473684, + "grad_norm": 1.4059619903564453, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 107180 + }, + { + "epoch": 705.1973684210526, + "grad_norm": 1.178699016571045, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 107190 + }, + { + "epoch": 705.2631578947369, + "grad_norm": 1.397766351699829, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 107200 + }, + { + "epoch": 705.328947368421, + "grad_norm": 1.5875097513198853, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 107210 + }, + { + "epoch": 705.3947368421053, + "grad_norm": 1.234609842300415, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 107220 + }, + { + "epoch": 705.4605263157895, + "grad_norm": 1.3890899419784546, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 107230 + }, + { + "epoch": 705.5263157894736, + "grad_norm": 1.17513906955719, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 107240 + }, + { + "epoch": 705.5921052631579, + "grad_norm": 0.8957661986351013, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 107250 + }, + { + "epoch": 705.6578947368421, + "grad_norm": 0.950888991355896, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 107260 + }, + { + "epoch": 705.7236842105264, + "grad_norm": 1.2250102758407593, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 107270 + }, + { + "epoch": 705.7894736842105, + "grad_norm": 1.09373939037323, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 107280 + }, + { + "epoch": 705.8552631578947, + "grad_norm": 1.0987354516983032, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 107290 + }, + { + "epoch": 705.921052631579, + "grad_norm": 0.9803661704063416, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 107300 + }, + { + "epoch": 705.9868421052631, + "grad_norm": 0.8128421306610107, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 107310 + }, + { + "epoch": 706.0526315789474, + "grad_norm": 0.9702147841453552, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 107320 + }, + { + "epoch": 706.1184210526316, + "grad_norm": 1.0280282497406006, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 107330 + }, + { + "epoch": 706.1842105263158, + "grad_norm": 1.0692169666290283, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 107340 + }, + { + "epoch": 706.25, + "grad_norm": 1.2259516716003418, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 107350 + }, + { + "epoch": 706.3157894736842, + "grad_norm": 1.2134467363357544, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 107360 + }, + { + "epoch": 706.3815789473684, + "grad_norm": 1.4001606702804565, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 107370 + }, + { + "epoch": 706.4473684210526, + "grad_norm": 0.8867390155792236, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 107380 + }, + { + "epoch": 706.5131578947369, + "grad_norm": 1.1521034240722656, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 107390 + }, + { + "epoch": 706.578947368421, + "grad_norm": 1.07185959815979, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 107400 + }, + { + "epoch": 706.6447368421053, + "grad_norm": 0.9920993447303772, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 107410 + }, + { + "epoch": 706.7105263157895, + "grad_norm": 1.0084408521652222, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 107420 + }, + { + "epoch": 706.7763157894736, + "grad_norm": 1.267495036125183, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 107430 + }, + { + "epoch": 706.8421052631579, + "grad_norm": 1.3698124885559082, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 107440 + }, + { + "epoch": 706.9078947368421, + "grad_norm": 1.062820315361023, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 107450 + }, + { + "epoch": 706.9736842105264, + "grad_norm": 1.5721125602722168, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 107460 + }, + { + "epoch": 707.0394736842105, + "grad_norm": 1.1249845027923584, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 107470 + }, + { + "epoch": 707.1052631578947, + "grad_norm": 1.3193303346633911, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 107480 + }, + { + "epoch": 707.171052631579, + "grad_norm": 1.1248865127563477, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 107490 + }, + { + "epoch": 707.2368421052631, + "grad_norm": 1.0584726333618164, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 107500 + }, + { + "epoch": 707.3026315789474, + "grad_norm": 1.1499197483062744, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 107510 + }, + { + "epoch": 707.3684210526316, + "grad_norm": 1.3799810409545898, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 107520 + }, + { + "epoch": 707.4342105263158, + "grad_norm": 1.3305660486221313, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 107530 + }, + { + "epoch": 707.5, + "grad_norm": 1.3056739568710327, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 107540 + }, + { + "epoch": 707.5657894736842, + "grad_norm": 0.9053949117660522, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 107550 + }, + { + "epoch": 707.6315789473684, + "grad_norm": 0.6791939735412598, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 107560 + }, + { + "epoch": 707.6973684210526, + "grad_norm": 1.0320830345153809, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 107570 + }, + { + "epoch": 707.7631578947369, + "grad_norm": 0.8447424173355103, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 107580 + }, + { + "epoch": 707.828947368421, + "grad_norm": 0.9377061724662781, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 107590 + }, + { + "epoch": 707.8947368421053, + "grad_norm": 1.2281724214553833, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 107600 + }, + { + "epoch": 707.9605263157895, + "grad_norm": 1.1441106796264648, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 107610 + }, + { + "epoch": 708.0263157894736, + "grad_norm": 1.3841345310211182, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 107620 + }, + { + "epoch": 708.0921052631579, + "grad_norm": 1.2582640647888184, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 107630 + }, + { + "epoch": 708.1578947368421, + "grad_norm": 1.3789803981781006, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 107640 + }, + { + "epoch": 708.2236842105264, + "grad_norm": 1.2278392314910889, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 107650 + }, + { + "epoch": 708.2894736842105, + "grad_norm": 1.1727294921875, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 107660 + }, + { + "epoch": 708.3552631578947, + "grad_norm": 0.9205034375190735, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 107670 + }, + { + "epoch": 708.421052631579, + "grad_norm": 0.9984912872314453, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 107680 + }, + { + "epoch": 708.4868421052631, + "grad_norm": 0.7507139444351196, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 107690 + }, + { + "epoch": 708.5526315789474, + "grad_norm": 1.054671287536621, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 107700 + }, + { + "epoch": 708.6184210526316, + "grad_norm": 0.9641456604003906, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 107710 + }, + { + "epoch": 708.6842105263158, + "grad_norm": 1.3411998748779297, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 107720 + }, + { + "epoch": 708.75, + "grad_norm": 0.993855357170105, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 107730 + }, + { + "epoch": 708.8157894736842, + "grad_norm": 0.6834460496902466, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 107740 + }, + { + "epoch": 708.8815789473684, + "grad_norm": 0.7746645212173462, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 107750 + }, + { + "epoch": 708.9473684210526, + "grad_norm": 0.9106453061103821, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 107760 + }, + { + "epoch": 709.0131578947369, + "grad_norm": 1.1743462085723877, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 107770 + }, + { + "epoch": 709.078947368421, + "grad_norm": 1.0879472494125366, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 107780 + }, + { + "epoch": 709.1447368421053, + "grad_norm": 0.9474685192108154, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 107790 + }, + { + "epoch": 709.2105263157895, + "grad_norm": 1.2010844945907593, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 107800 + }, + { + "epoch": 709.2763157894736, + "grad_norm": 1.3946421146392822, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 107810 + }, + { + "epoch": 709.3421052631579, + "grad_norm": 1.2751479148864746, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 107820 + }, + { + "epoch": 709.4078947368421, + "grad_norm": 1.0376313924789429, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 107830 + }, + { + "epoch": 709.4736842105264, + "grad_norm": 0.96278977394104, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 107840 + }, + { + "epoch": 709.5394736842105, + "grad_norm": 0.7817369699478149, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 107850 + }, + { + "epoch": 709.6052631578947, + "grad_norm": 1.010752558708191, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 107860 + }, + { + "epoch": 709.671052631579, + "grad_norm": 1.0647060871124268, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 107870 + }, + { + "epoch": 709.7368421052631, + "grad_norm": 0.7408843636512756, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 107880 + }, + { + "epoch": 709.8026315789474, + "grad_norm": 0.9281281232833862, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 107890 + }, + { + "epoch": 709.8684210526316, + "grad_norm": 1.3812792301177979, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 107900 + }, + { + "epoch": 709.9342105263158, + "grad_norm": 1.2687792778015137, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 107910 + }, + { + "epoch": 710.0, + "grad_norm": 0.8348847031593323, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 107920 + }, + { + "epoch": 710.0657894736842, + "grad_norm": 1.199946641921997, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 107930 + }, + { + "epoch": 710.1315789473684, + "grad_norm": 0.9090976119041443, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 107940 + }, + { + "epoch": 710.1973684210526, + "grad_norm": 1.042677879333496, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 107950 + }, + { + "epoch": 710.2631578947369, + "grad_norm": 1.2170066833496094, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 107960 + }, + { + "epoch": 710.328947368421, + "grad_norm": 1.3705898523330688, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 107970 + }, + { + "epoch": 710.3947368421053, + "grad_norm": 1.2710314989089966, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 107980 + }, + { + "epoch": 710.4605263157895, + "grad_norm": 1.175453543663025, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 107990 + }, + { + "epoch": 710.5263157894736, + "grad_norm": 0.9529065489768982, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 108000 + }, + { + "epoch": 710.5921052631579, + "grad_norm": 1.1746102571487427, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 108010 + }, + { + "epoch": 710.6578947368421, + "grad_norm": 1.0855262279510498, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 108020 + }, + { + "epoch": 710.7236842105264, + "grad_norm": 1.1520380973815918, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 108030 + }, + { + "epoch": 710.7894736842105, + "grad_norm": 0.9170784950256348, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 108040 + }, + { + "epoch": 710.8552631578947, + "grad_norm": 1.1136395931243896, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 108050 + }, + { + "epoch": 710.921052631579, + "grad_norm": 1.207287073135376, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 108060 + }, + { + "epoch": 710.9868421052631, + "grad_norm": 1.001600980758667, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 108070 + }, + { + "epoch": 711.0526315789474, + "grad_norm": 0.9510000348091125, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 108080 + }, + { + "epoch": 711.1184210526316, + "grad_norm": 0.7180987000465393, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 108090 + }, + { + "epoch": 711.1842105263158, + "grad_norm": 0.977580189704895, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 108100 + }, + { + "epoch": 711.25, + "grad_norm": 0.843988299369812, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 108110 + }, + { + "epoch": 711.3157894736842, + "grad_norm": 1.3870481252670288, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 108120 + }, + { + "epoch": 711.3815789473684, + "grad_norm": 1.0824686288833618, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 108130 + }, + { + "epoch": 711.4473684210526, + "grad_norm": 1.089383602142334, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 108140 + }, + { + "epoch": 711.5131578947369, + "grad_norm": 0.9979451298713684, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 108150 + }, + { + "epoch": 711.578947368421, + "grad_norm": 0.9893368482589722, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 108160 + }, + { + "epoch": 711.6447368421053, + "grad_norm": 1.0383979082107544, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 108170 + }, + { + "epoch": 711.7105263157895, + "grad_norm": 1.280486822128296, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 108180 + }, + { + "epoch": 711.7763157894736, + "grad_norm": 1.3183673620224, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 108190 + }, + { + "epoch": 711.8421052631579, + "grad_norm": 1.2419326305389404, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 108200 + }, + { + "epoch": 711.9078947368421, + "grad_norm": 1.3799538612365723, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 108210 + }, + { + "epoch": 711.9736842105264, + "grad_norm": 0.9520628452301025, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 108220 + }, + { + "epoch": 712.0394736842105, + "grad_norm": 0.9893721342086792, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 108230 + }, + { + "epoch": 712.1052631578947, + "grad_norm": 1.260324239730835, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 108240 + }, + { + "epoch": 712.171052631579, + "grad_norm": 1.377862572669983, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 108250 + }, + { + "epoch": 712.2368421052631, + "grad_norm": 1.4345449209213257, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 108260 + }, + { + "epoch": 712.3026315789474, + "grad_norm": 1.0738847255706787, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 108270 + }, + { + "epoch": 712.3684210526316, + "grad_norm": 1.1670902967453003, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 108280 + }, + { + "epoch": 712.4342105263158, + "grad_norm": 1.362013578414917, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 108290 + }, + { + "epoch": 712.5, + "grad_norm": 1.2504547834396362, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 108300 + }, + { + "epoch": 712.5657894736842, + "grad_norm": 1.6184645891189575, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 108310 + }, + { + "epoch": 712.6315789473684, + "grad_norm": 1.2186678647994995, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 108320 + }, + { + "epoch": 712.6973684210526, + "grad_norm": 1.0406875610351562, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 108330 + }, + { + "epoch": 712.7631578947369, + "grad_norm": 0.9314107894897461, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 108340 + }, + { + "epoch": 712.828947368421, + "grad_norm": 0.996528148651123, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 108350 + }, + { + "epoch": 712.8947368421053, + "grad_norm": 0.9486305117607117, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 108360 + }, + { + "epoch": 712.9605263157895, + "grad_norm": 0.9168351292610168, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 108370 + }, + { + "epoch": 713.0263157894736, + "grad_norm": 0.9039626717567444, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 108380 + }, + { + "epoch": 713.0921052631579, + "grad_norm": 0.9132272601127625, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 108390 + }, + { + "epoch": 713.1578947368421, + "grad_norm": 0.9673351049423218, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 108400 + }, + { + "epoch": 713.2236842105264, + "grad_norm": 0.8779450058937073, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 108410 + }, + { + "epoch": 713.2894736842105, + "grad_norm": 1.4076229333877563, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 108420 + }, + { + "epoch": 713.3552631578947, + "grad_norm": 1.1083792448043823, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 108430 + }, + { + "epoch": 713.421052631579, + "grad_norm": 1.1614620685577393, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 108440 + }, + { + "epoch": 713.4868421052631, + "grad_norm": 1.1655492782592773, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 108450 + }, + { + "epoch": 713.5526315789474, + "grad_norm": 0.9747868776321411, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 108460 + }, + { + "epoch": 713.6184210526316, + "grad_norm": 1.1151975393295288, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 108470 + }, + { + "epoch": 713.6842105263158, + "grad_norm": 1.0362054109573364, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 108480 + }, + { + "epoch": 713.75, + "grad_norm": 1.196274757385254, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 108490 + }, + { + "epoch": 713.8157894736842, + "grad_norm": 0.6207993030548096, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 108500 + }, + { + "epoch": 713.8815789473684, + "grad_norm": 0.8537484407424927, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 108510 + }, + { + "epoch": 713.9473684210526, + "grad_norm": 0.7432962656021118, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 108520 + }, + { + "epoch": 714.0131578947369, + "grad_norm": 1.1963402032852173, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 108530 + }, + { + "epoch": 714.078947368421, + "grad_norm": 1.0920330286026, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 108540 + }, + { + "epoch": 714.1447368421053, + "grad_norm": 0.5513865351676941, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 108550 + }, + { + "epoch": 714.2105263157895, + "grad_norm": 1.2096713781356812, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 108560 + }, + { + "epoch": 714.2763157894736, + "grad_norm": 0.7468422651290894, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 108570 + }, + { + "epoch": 714.3421052631579, + "grad_norm": 1.296449899673462, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 108580 + }, + { + "epoch": 714.4078947368421, + "grad_norm": 1.2958561182022095, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 108590 + }, + { + "epoch": 714.4736842105264, + "grad_norm": 1.0128918886184692, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 108600 + }, + { + "epoch": 714.5394736842105, + "grad_norm": 0.9160696864128113, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 108610 + }, + { + "epoch": 714.6052631578947, + "grad_norm": 0.9371721148490906, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 108620 + }, + { + "epoch": 714.671052631579, + "grad_norm": 0.8780196905136108, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 108630 + }, + { + "epoch": 714.7368421052631, + "grad_norm": 0.8234481811523438, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 108640 + }, + { + "epoch": 714.8026315789474, + "grad_norm": 0.9354177713394165, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 108650 + }, + { + "epoch": 714.8684210526316, + "grad_norm": 1.4554506540298462, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 108660 + }, + { + "epoch": 714.9342105263158, + "grad_norm": 0.8353884220123291, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 108670 + }, + { + "epoch": 715.0, + "grad_norm": 1.3016752004623413, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 108680 + }, + { + "epoch": 715.0657894736842, + "grad_norm": 1.0093426704406738, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 108690 + }, + { + "epoch": 715.1315789473684, + "grad_norm": 0.8471440672874451, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 108700 + }, + { + "epoch": 715.1973684210526, + "grad_norm": 1.381130576133728, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 108710 + }, + { + "epoch": 715.2631578947369, + "grad_norm": 1.0571134090423584, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 108720 + }, + { + "epoch": 715.328947368421, + "grad_norm": 1.1896861791610718, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 108730 + }, + { + "epoch": 715.3947368421053, + "grad_norm": 0.7548841834068298, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 108740 + }, + { + "epoch": 715.4605263157895, + "grad_norm": 0.652093231678009, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 108750 + }, + { + "epoch": 715.5263157894736, + "grad_norm": 0.6288610696792603, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 108760 + }, + { + "epoch": 715.5921052631579, + "grad_norm": 0.9039884209632874, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 108770 + }, + { + "epoch": 715.6578947368421, + "grad_norm": 0.7594443559646606, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 108780 + }, + { + "epoch": 715.7236842105264, + "grad_norm": 1.3326084613800049, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 108790 + }, + { + "epoch": 715.7894736842105, + "grad_norm": 0.7681350708007812, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 108800 + }, + { + "epoch": 715.8552631578947, + "grad_norm": 0.6910384893417358, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 108810 + }, + { + "epoch": 715.921052631579, + "grad_norm": 0.9494452476501465, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 108820 + }, + { + "epoch": 715.9868421052631, + "grad_norm": 1.248620867729187, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 108830 + }, + { + "epoch": 716.0526315789474, + "grad_norm": 1.2778465747833252, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 108840 + }, + { + "epoch": 716.1184210526316, + "grad_norm": 1.229967474937439, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 108850 + }, + { + "epoch": 716.1842105263158, + "grad_norm": 1.2221544981002808, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 108860 + }, + { + "epoch": 716.25, + "grad_norm": 0.9114399552345276, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 108870 + }, + { + "epoch": 716.3157894736842, + "grad_norm": 1.2433297634124756, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 108880 + }, + { + "epoch": 716.3815789473684, + "grad_norm": 1.0392719507217407, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 108890 + }, + { + "epoch": 716.4473684210526, + "grad_norm": 1.3560720682144165, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 108900 + }, + { + "epoch": 716.5131578947369, + "grad_norm": 1.7873417139053345, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 108910 + }, + { + "epoch": 716.578947368421, + "grad_norm": 1.2388242483139038, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 108920 + }, + { + "epoch": 716.6447368421053, + "grad_norm": 1.307459831237793, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 108930 + }, + { + "epoch": 716.7105263157895, + "grad_norm": 1.3190605640411377, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 108940 + }, + { + "epoch": 716.7763157894736, + "grad_norm": 1.2416794300079346, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 108950 + }, + { + "epoch": 716.8421052631579, + "grad_norm": 1.4050918817520142, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 108960 + }, + { + "epoch": 716.9078947368421, + "grad_norm": 0.8353002667427063, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 108970 + }, + { + "epoch": 716.9736842105264, + "grad_norm": 1.285539984703064, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 108980 + }, + { + "epoch": 717.0394736842105, + "grad_norm": 0.93118816614151, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 108990 + }, + { + "epoch": 717.1052631578947, + "grad_norm": 1.2005360126495361, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 109000 + }, + { + "epoch": 717.171052631579, + "grad_norm": 1.0540269613265991, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 109010 + }, + { + "epoch": 717.2368421052631, + "grad_norm": 1.0876415967941284, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 109020 + }, + { + "epoch": 717.3026315789474, + "grad_norm": 1.1824675798416138, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 109030 + }, + { + "epoch": 717.3684210526316, + "grad_norm": 0.9963378310203552, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 109040 + }, + { + "epoch": 717.4342105263158, + "grad_norm": 1.1265755891799927, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 109050 + }, + { + "epoch": 717.5, + "grad_norm": 0.9112656712532043, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 109060 + }, + { + "epoch": 717.5657894736842, + "grad_norm": 0.6385452151298523, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 109070 + }, + { + "epoch": 717.6315789473684, + "grad_norm": 1.0386406183242798, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 109080 + }, + { + "epoch": 717.6973684210526, + "grad_norm": 0.5495930314064026, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 109090 + }, + { + "epoch": 717.7631578947369, + "grad_norm": 1.1156800985336304, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 109100 + }, + { + "epoch": 717.828947368421, + "grad_norm": 1.2771714925765991, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 109110 + }, + { + "epoch": 717.8947368421053, + "grad_norm": 1.0908472537994385, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 109120 + }, + { + "epoch": 717.9605263157895, + "grad_norm": 0.8165896534919739, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 109130 + }, + { + "epoch": 718.0263157894736, + "grad_norm": 1.1818957328796387, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 109140 + }, + { + "epoch": 718.0921052631579, + "grad_norm": 1.2370299100875854, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 109150 + }, + { + "epoch": 718.1578947368421, + "grad_norm": 1.1179412603378296, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 109160 + }, + { + "epoch": 718.2236842105264, + "grad_norm": 1.040429711341858, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 109170 + }, + { + "epoch": 718.2894736842105, + "grad_norm": 1.090136170387268, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 109180 + }, + { + "epoch": 718.3552631578947, + "grad_norm": 1.2007248401641846, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 109190 + }, + { + "epoch": 718.421052631579, + "grad_norm": 1.092756986618042, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 109200 + }, + { + "epoch": 718.4868421052631, + "grad_norm": 0.8665720820426941, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 109210 + }, + { + "epoch": 718.5526315789474, + "grad_norm": 1.117387056350708, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 109220 + }, + { + "epoch": 718.6184210526316, + "grad_norm": 1.2737882137298584, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 109230 + }, + { + "epoch": 718.6842105263158, + "grad_norm": 0.8910441398620605, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 109240 + }, + { + "epoch": 718.75, + "grad_norm": 1.0049889087677002, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 109250 + }, + { + "epoch": 718.8157894736842, + "grad_norm": 1.1752616167068481, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 109260 + }, + { + "epoch": 718.8815789473684, + "grad_norm": 0.8606551885604858, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 109270 + }, + { + "epoch": 718.9473684210526, + "grad_norm": 0.8159741163253784, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 109280 + }, + { + "epoch": 719.0131578947369, + "grad_norm": 1.0030544996261597, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 109290 + }, + { + "epoch": 719.078947368421, + "grad_norm": 0.8993750810623169, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 109300 + }, + { + "epoch": 719.1447368421053, + "grad_norm": 1.2870763540267944, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 109310 + }, + { + "epoch": 719.2105263157895, + "grad_norm": 0.9233137369155884, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 109320 + }, + { + "epoch": 719.2763157894736, + "grad_norm": 0.928329586982727, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 109330 + }, + { + "epoch": 719.3421052631579, + "grad_norm": 1.117807388305664, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 109340 + }, + { + "epoch": 719.4078947368421, + "grad_norm": 1.040723204612732, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 109350 + }, + { + "epoch": 719.4736842105264, + "grad_norm": 1.066552996635437, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 109360 + }, + { + "epoch": 719.5394736842105, + "grad_norm": 0.8651625514030457, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 109370 + }, + { + "epoch": 719.6052631578947, + "grad_norm": 1.3908514976501465, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 109380 + }, + { + "epoch": 719.671052631579, + "grad_norm": 1.2088861465454102, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 109390 + }, + { + "epoch": 719.7368421052631, + "grad_norm": 1.2017627954483032, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 109400 + }, + { + "epoch": 719.8026315789474, + "grad_norm": 1.2163819074630737, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 109410 + }, + { + "epoch": 719.8684210526316, + "grad_norm": 0.8632364273071289, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 109420 + }, + { + "epoch": 719.9342105263158, + "grad_norm": 1.2523313760757446, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 109430 + }, + { + "epoch": 720.0, + "grad_norm": 1.2396034002304077, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 109440 + }, + { + "epoch": 720.0657894736842, + "grad_norm": 1.3992606401443481, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 109450 + }, + { + "epoch": 720.1315789473684, + "grad_norm": 1.1208072900772095, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 109460 + }, + { + "epoch": 720.1973684210526, + "grad_norm": 1.3922314643859863, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 109470 + }, + { + "epoch": 720.2631578947369, + "grad_norm": 1.2266921997070312, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 109480 + }, + { + "epoch": 720.328947368421, + "grad_norm": 0.8874131441116333, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 109490 + }, + { + "epoch": 720.3947368421053, + "grad_norm": 0.9725931882858276, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 109500 + }, + { + "epoch": 720.4605263157895, + "grad_norm": 1.0284030437469482, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 109510 + }, + { + "epoch": 720.5263157894736, + "grad_norm": 0.9212563633918762, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 109520 + }, + { + "epoch": 720.5921052631579, + "grad_norm": 1.033521294593811, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 109530 + }, + { + "epoch": 720.6578947368421, + "grad_norm": 0.933686375617981, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 109540 + }, + { + "epoch": 720.7236842105264, + "grad_norm": 1.2563905715942383, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 109550 + }, + { + "epoch": 720.7894736842105, + "grad_norm": 1.322197675704956, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 109560 + }, + { + "epoch": 720.8552631578947, + "grad_norm": 1.221634864807129, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 109570 + }, + { + "epoch": 720.921052631579, + "grad_norm": 1.2580702304840088, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 109580 + }, + { + "epoch": 720.9868421052631, + "grad_norm": 0.6149132251739502, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 109590 + }, + { + "epoch": 721.0526315789474, + "grad_norm": 1.1910752058029175, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 109600 + }, + { + "epoch": 721.1184210526316, + "grad_norm": 0.7073631286621094, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 109610 + }, + { + "epoch": 721.1842105263158, + "grad_norm": 1.3418338298797607, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 109620 + }, + { + "epoch": 721.25, + "grad_norm": 1.2676738500595093, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 109630 + }, + { + "epoch": 721.3157894736842, + "grad_norm": 1.0815623998641968, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 109640 + }, + { + "epoch": 721.3815789473684, + "grad_norm": 0.6487982273101807, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 109650 + }, + { + "epoch": 721.4473684210526, + "grad_norm": 1.0579499006271362, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 109660 + }, + { + "epoch": 721.5131578947369, + "grad_norm": 1.0880662202835083, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 109670 + }, + { + "epoch": 721.578947368421, + "grad_norm": 1.020585298538208, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 109680 + }, + { + "epoch": 721.6447368421053, + "grad_norm": 0.9497626423835754, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 109690 + }, + { + "epoch": 721.7105263157895, + "grad_norm": 1.3067365884780884, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 109700 + }, + { + "epoch": 721.7763157894736, + "grad_norm": 1.3649437427520752, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 109710 + }, + { + "epoch": 721.8421052631579, + "grad_norm": 1.4548853635787964, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 109720 + }, + { + "epoch": 721.9078947368421, + "grad_norm": 1.0740547180175781, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 109730 + }, + { + "epoch": 721.9736842105264, + "grad_norm": 1.0808460712432861, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 109740 + }, + { + "epoch": 722.0394736842105, + "grad_norm": 0.738896906375885, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 109750 + }, + { + "epoch": 722.1052631578947, + "grad_norm": 1.3755104541778564, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 109760 + }, + { + "epoch": 722.171052631579, + "grad_norm": 1.0921390056610107, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 109770 + }, + { + "epoch": 722.2368421052631, + "grad_norm": 1.5075722932815552, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 109780 + }, + { + "epoch": 722.3026315789474, + "grad_norm": 1.332755446434021, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 109790 + }, + { + "epoch": 722.3684210526316, + "grad_norm": 0.9924520254135132, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 109800 + }, + { + "epoch": 722.4342105263158, + "grad_norm": 1.1538974046707153, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 109810 + }, + { + "epoch": 722.5, + "grad_norm": 0.8574804663658142, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 109820 + }, + { + "epoch": 722.5657894736842, + "grad_norm": 1.1876112222671509, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 109830 + }, + { + "epoch": 722.6315789473684, + "grad_norm": 1.1805399656295776, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 109840 + }, + { + "epoch": 722.6973684210526, + "grad_norm": 1.1132640838623047, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 109850 + }, + { + "epoch": 722.7631578947369, + "grad_norm": 1.2226154804229736, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 109860 + }, + { + "epoch": 722.828947368421, + "grad_norm": 1.0198978185653687, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 109870 + }, + { + "epoch": 722.8947368421053, + "grad_norm": 0.758260190486908, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 109880 + }, + { + "epoch": 722.9605263157895, + "grad_norm": 1.051596999168396, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 109890 + }, + { + "epoch": 723.0263157894736, + "grad_norm": 1.2912297248840332, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 109900 + }, + { + "epoch": 723.0921052631579, + "grad_norm": 1.3294340372085571, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 109910 + }, + { + "epoch": 723.1578947368421, + "grad_norm": 1.156272053718567, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 109920 + }, + { + "epoch": 723.2236842105264, + "grad_norm": 0.9503754377365112, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 109930 + }, + { + "epoch": 723.2894736842105, + "grad_norm": 1.5646758079528809, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 109940 + }, + { + "epoch": 723.3552631578947, + "grad_norm": 1.065543532371521, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 109950 + }, + { + "epoch": 723.421052631579, + "grad_norm": 1.1717690229415894, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 109960 + }, + { + "epoch": 723.4868421052631, + "grad_norm": 0.8114318251609802, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 109970 + }, + { + "epoch": 723.5526315789474, + "grad_norm": 1.155068278312683, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 109980 + }, + { + "epoch": 723.6184210526316, + "grad_norm": 1.220639705657959, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 109990 + }, + { + "epoch": 723.6842105263158, + "grad_norm": 0.996774435043335, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 110000 + }, + { + "epoch": 723.75, + "grad_norm": 1.1410325765609741, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 110010 + }, + { + "epoch": 723.8157894736842, + "grad_norm": 1.0671559572219849, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 110020 + }, + { + "epoch": 723.8815789473684, + "grad_norm": 1.0174076557159424, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 110030 + }, + { + "epoch": 723.9473684210526, + "grad_norm": 1.0742820501327515, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 110040 + }, + { + "epoch": 724.0131578947369, + "grad_norm": 0.996814489364624, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 110050 + }, + { + "epoch": 724.078947368421, + "grad_norm": 1.142468810081482, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 110060 + }, + { + "epoch": 724.1447368421053, + "grad_norm": 1.2725366353988647, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 110070 + }, + { + "epoch": 724.2105263157895, + "grad_norm": 1.052403211593628, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 110080 + }, + { + "epoch": 724.2763157894736, + "grad_norm": 1.1229180097579956, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 110090 + }, + { + "epoch": 724.3421052631579, + "grad_norm": 0.9793723821640015, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 110100 + }, + { + "epoch": 724.4078947368421, + "grad_norm": 0.8089613318443298, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 110110 + }, + { + "epoch": 724.4736842105264, + "grad_norm": 1.0036156177520752, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 110120 + }, + { + "epoch": 724.5394736842105, + "grad_norm": 1.4579956531524658, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 110130 + }, + { + "epoch": 724.6052631578947, + "grad_norm": 1.225488543510437, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 110140 + }, + { + "epoch": 724.671052631579, + "grad_norm": 0.904151201248169, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 110150 + }, + { + "epoch": 724.7368421052631, + "grad_norm": 0.8515450954437256, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 110160 + }, + { + "epoch": 724.8026315789474, + "grad_norm": 0.9605996608734131, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 110170 + }, + { + "epoch": 724.8684210526316, + "grad_norm": 0.8468832969665527, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 110180 + }, + { + "epoch": 724.9342105263158, + "grad_norm": 1.0464105606079102, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 110190 + }, + { + "epoch": 725.0, + "grad_norm": 0.886685848236084, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 110200 + }, + { + "epoch": 725.0657894736842, + "grad_norm": 1.0475046634674072, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 110210 + }, + { + "epoch": 725.1315789473684, + "grad_norm": 1.0469486713409424, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 110220 + }, + { + "epoch": 725.1973684210526, + "grad_norm": 1.3123482465744019, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 110230 + }, + { + "epoch": 725.2631578947369, + "grad_norm": 1.2292877435684204, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 110240 + }, + { + "epoch": 725.328947368421, + "grad_norm": 1.54679536819458, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 110250 + }, + { + "epoch": 725.3947368421053, + "grad_norm": 1.5144623517990112, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 110260 + }, + { + "epoch": 725.4605263157895, + "grad_norm": 1.4638532400131226, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 110270 + }, + { + "epoch": 725.5263157894736, + "grad_norm": 1.037933349609375, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 110280 + }, + { + "epoch": 725.5921052631579, + "grad_norm": 0.8391295671463013, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 110290 + }, + { + "epoch": 725.6578947368421, + "grad_norm": 1.3161094188690186, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 110300 + }, + { + "epoch": 725.7236842105264, + "grad_norm": 1.3753902912139893, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 110310 + }, + { + "epoch": 725.7894736842105, + "grad_norm": 1.369210124015808, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 110320 + }, + { + "epoch": 725.8552631578947, + "grad_norm": 0.8835994005203247, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 110330 + }, + { + "epoch": 725.921052631579, + "grad_norm": 1.2269823551177979, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 110340 + }, + { + "epoch": 725.9868421052631, + "grad_norm": 1.24722158908844, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 110350 + }, + { + "epoch": 726.0526315789474, + "grad_norm": 1.065712571144104, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 110360 + }, + { + "epoch": 726.1184210526316, + "grad_norm": 1.5890785455703735, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 110370 + }, + { + "epoch": 726.1842105263158, + "grad_norm": 0.9685258269309998, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 110380 + }, + { + "epoch": 726.25, + "grad_norm": 1.2705445289611816, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 110390 + }, + { + "epoch": 726.3157894736842, + "grad_norm": 1.1773557662963867, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 110400 + }, + { + "epoch": 726.3815789473684, + "grad_norm": 1.0990701913833618, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 110410 + }, + { + "epoch": 726.4473684210526, + "grad_norm": 1.3985167741775513, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 110420 + }, + { + "epoch": 726.5131578947369, + "grad_norm": 1.0265055894851685, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 110430 + }, + { + "epoch": 726.578947368421, + "grad_norm": 0.6000217199325562, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 110440 + }, + { + "epoch": 726.6447368421053, + "grad_norm": 1.1134717464447021, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 110450 + }, + { + "epoch": 726.7105263157895, + "grad_norm": 1.1539477109909058, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 110460 + }, + { + "epoch": 726.7763157894736, + "grad_norm": 1.103757619857788, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 110470 + }, + { + "epoch": 726.8421052631579, + "grad_norm": 0.9701244235038757, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 110480 + }, + { + "epoch": 726.9078947368421, + "grad_norm": 1.1969598531723022, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 110490 + }, + { + "epoch": 726.9736842105264, + "grad_norm": 1.2793946266174316, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 110500 + }, + { + "epoch": 727.0394736842105, + "grad_norm": 1.4208569526672363, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 110510 + }, + { + "epoch": 727.1052631578947, + "grad_norm": 1.100865364074707, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 110520 + }, + { + "epoch": 727.171052631579, + "grad_norm": 1.3485084772109985, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 110530 + }, + { + "epoch": 727.2368421052631, + "grad_norm": 1.2594733238220215, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 110540 + }, + { + "epoch": 727.3026315789474, + "grad_norm": 1.0424339771270752, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 110550 + }, + { + "epoch": 727.3684210526316, + "grad_norm": 0.9489828944206238, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 110560 + }, + { + "epoch": 727.4342105263158, + "grad_norm": 1.0845290422439575, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 110570 + }, + { + "epoch": 727.5, + "grad_norm": 1.0237302780151367, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 110580 + }, + { + "epoch": 727.5657894736842, + "grad_norm": 0.6209089756011963, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 110590 + }, + { + "epoch": 727.6315789473684, + "grad_norm": 0.8243921995162964, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 110600 + }, + { + "epoch": 727.6973684210526, + "grad_norm": 1.0948792695999146, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 110610 + }, + { + "epoch": 727.7631578947369, + "grad_norm": 1.0297280550003052, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 110620 + }, + { + "epoch": 727.828947368421, + "grad_norm": 1.0834107398986816, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 110630 + }, + { + "epoch": 727.8947368421053, + "grad_norm": 1.2613449096679688, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 110640 + }, + { + "epoch": 727.9605263157895, + "grad_norm": 1.1736477613449097, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 110650 + }, + { + "epoch": 728.0263157894736, + "grad_norm": 0.9048214554786682, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 110660 + }, + { + "epoch": 728.0921052631579, + "grad_norm": 0.9805360436439514, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 110670 + }, + { + "epoch": 728.1578947368421, + "grad_norm": 0.9766744375228882, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 110680 + }, + { + "epoch": 728.2236842105264, + "grad_norm": 1.171241283416748, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 110690 + }, + { + "epoch": 728.2894736842105, + "grad_norm": 1.0495202541351318, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 110700 + }, + { + "epoch": 728.3552631578947, + "grad_norm": 1.1976003646850586, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 110710 + }, + { + "epoch": 728.421052631579, + "grad_norm": 1.323385238647461, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 110720 + }, + { + "epoch": 728.4868421052631, + "grad_norm": 0.8279552459716797, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 110730 + }, + { + "epoch": 728.5526315789474, + "grad_norm": 1.1523821353912354, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 110740 + }, + { + "epoch": 728.6184210526316, + "grad_norm": 1.1618291139602661, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 110750 + }, + { + "epoch": 728.6842105263158, + "grad_norm": 1.517456293106079, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 110760 + }, + { + "epoch": 728.75, + "grad_norm": 1.259458303451538, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 110770 + }, + { + "epoch": 728.8157894736842, + "grad_norm": 1.2021534442901611, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 110780 + }, + { + "epoch": 728.8815789473684, + "grad_norm": 0.7205750346183777, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 110790 + }, + { + "epoch": 728.9473684210526, + "grad_norm": 0.7980080246925354, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 110800 + }, + { + "epoch": 729.0131578947369, + "grad_norm": 1.2943599224090576, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 110810 + }, + { + "epoch": 729.078947368421, + "grad_norm": 1.2890563011169434, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 110820 + }, + { + "epoch": 729.1447368421053, + "grad_norm": 0.8411272764205933, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 110830 + }, + { + "epoch": 729.2105263157895, + "grad_norm": 1.1245949268341064, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 110840 + }, + { + "epoch": 729.2763157894736, + "grad_norm": 0.9203550815582275, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 110850 + }, + { + "epoch": 729.3421052631579, + "grad_norm": 1.0467205047607422, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 110860 + }, + { + "epoch": 729.4078947368421, + "grad_norm": 1.1412601470947266, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 110870 + }, + { + "epoch": 729.4736842105264, + "grad_norm": 0.8333537578582764, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 110880 + }, + { + "epoch": 729.5394736842105, + "grad_norm": 0.9673253893852234, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 110890 + }, + { + "epoch": 729.6052631578947, + "grad_norm": 0.9576820135116577, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 110900 + }, + { + "epoch": 729.671052631579, + "grad_norm": 1.1713014841079712, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 110910 + }, + { + "epoch": 729.7368421052631, + "grad_norm": 1.1785231828689575, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 110920 + }, + { + "epoch": 729.8026315789474, + "grad_norm": 1.2538374662399292, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 110930 + }, + { + "epoch": 729.8684210526316, + "grad_norm": 1.0921484231948853, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 110940 + }, + { + "epoch": 729.9342105263158, + "grad_norm": 1.2144492864608765, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 110950 + }, + { + "epoch": 730.0, + "grad_norm": 1.5016517639160156, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 110960 + }, + { + "epoch": 730.0657894736842, + "grad_norm": 1.3069437742233276, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 110970 + }, + { + "epoch": 730.1315789473684, + "grad_norm": 1.0793986320495605, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 110980 + }, + { + "epoch": 730.1973684210526, + "grad_norm": 0.8832784295082092, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 110990 + }, + { + "epoch": 730.2631578947369, + "grad_norm": 0.7944328188896179, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 111000 + }, + { + "epoch": 730.328947368421, + "grad_norm": 1.1215214729309082, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 111010 + }, + { + "epoch": 730.3947368421053, + "grad_norm": 1.332367181777954, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 111020 + }, + { + "epoch": 730.4605263157895, + "grad_norm": 1.3931553363800049, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 111030 + }, + { + "epoch": 730.5263157894736, + "grad_norm": 1.4220699071884155, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 111040 + }, + { + "epoch": 730.5921052631579, + "grad_norm": 1.1629732847213745, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 111050 + }, + { + "epoch": 730.6578947368421, + "grad_norm": 1.2412759065628052, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 111060 + }, + { + "epoch": 730.7236842105264, + "grad_norm": 1.228760838508606, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 111070 + }, + { + "epoch": 730.7894736842105, + "grad_norm": 1.2496362924575806, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 111080 + }, + { + "epoch": 730.8552631578947, + "grad_norm": 0.9673229455947876, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 111090 + }, + { + "epoch": 730.921052631579, + "grad_norm": 1.0482990741729736, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 111100 + }, + { + "epoch": 730.9868421052631, + "grad_norm": 1.0728371143341064, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 111110 + }, + { + "epoch": 731.0526315789474, + "grad_norm": 0.8858627080917358, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 111120 + }, + { + "epoch": 731.1184210526316, + "grad_norm": 1.158208966255188, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 111130 + }, + { + "epoch": 731.1842105263158, + "grad_norm": 1.145789623260498, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 111140 + }, + { + "epoch": 731.25, + "grad_norm": 1.142052412033081, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 111150 + }, + { + "epoch": 731.3157894736842, + "grad_norm": 1.0644676685333252, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 111160 + }, + { + "epoch": 731.3815789473684, + "grad_norm": 1.0623435974121094, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 111170 + }, + { + "epoch": 731.4473684210526, + "grad_norm": 1.053824543952942, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 111180 + }, + { + "epoch": 731.5131578947369, + "grad_norm": 1.2041244506835938, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 111190 + }, + { + "epoch": 731.578947368421, + "grad_norm": 1.0749359130859375, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 111200 + }, + { + "epoch": 731.6447368421053, + "grad_norm": 1.109894037246704, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 111210 + }, + { + "epoch": 731.7105263157895, + "grad_norm": 1.581756830215454, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 111220 + }, + { + "epoch": 731.7763157894736, + "grad_norm": 0.937778115272522, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 111230 + }, + { + "epoch": 731.8421052631579, + "grad_norm": 1.134636402130127, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 111240 + }, + { + "epoch": 731.9078947368421, + "grad_norm": 1.137024998664856, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 111250 + }, + { + "epoch": 731.9736842105264, + "grad_norm": 0.980426013469696, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 111260 + }, + { + "epoch": 732.0394736842105, + "grad_norm": 1.3249105215072632, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 111270 + }, + { + "epoch": 732.1052631578947, + "grad_norm": 1.0460824966430664, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 111280 + }, + { + "epoch": 732.171052631579, + "grad_norm": 1.2906813621520996, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 111290 + }, + { + "epoch": 732.2368421052631, + "grad_norm": 0.8858234286308289, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 111300 + }, + { + "epoch": 732.3026315789474, + "grad_norm": 1.170811653137207, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 111310 + }, + { + "epoch": 732.3684210526316, + "grad_norm": 1.3404710292816162, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 111320 + }, + { + "epoch": 732.4342105263158, + "grad_norm": 1.3341257572174072, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 111330 + }, + { + "epoch": 732.5, + "grad_norm": 1.3876317739486694, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 111340 + }, + { + "epoch": 732.5657894736842, + "grad_norm": 0.7830882668495178, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 111350 + }, + { + "epoch": 732.6315789473684, + "grad_norm": 1.436397910118103, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 111360 + }, + { + "epoch": 732.6973684210526, + "grad_norm": 0.7687216997146606, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 111370 + }, + { + "epoch": 732.7631578947369, + "grad_norm": 0.7303744554519653, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 111380 + }, + { + "epoch": 732.828947368421, + "grad_norm": 1.1733373403549194, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 111390 + }, + { + "epoch": 732.8947368421053, + "grad_norm": 0.9514874219894409, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 111400 + }, + { + "epoch": 732.9605263157895, + "grad_norm": 0.594891369342804, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 111410 + }, + { + "epoch": 733.0263157894736, + "grad_norm": 0.945779025554657, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 111420 + }, + { + "epoch": 733.0921052631579, + "grad_norm": 0.6920212507247925, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 111430 + }, + { + "epoch": 733.1578947368421, + "grad_norm": 0.796112060546875, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 111440 + }, + { + "epoch": 733.2236842105264, + "grad_norm": 0.9636341333389282, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 111450 + }, + { + "epoch": 733.2894736842105, + "grad_norm": 1.2237485647201538, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 111460 + }, + { + "epoch": 733.3552631578947, + "grad_norm": 1.8574937582015991, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 111470 + }, + { + "epoch": 733.421052631579, + "grad_norm": 1.866210699081421, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 111480 + }, + { + "epoch": 733.4868421052631, + "grad_norm": 1.6350382566452026, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 111490 + }, + { + "epoch": 733.5526315789474, + "grad_norm": 1.0349119901657104, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 111500 + }, + { + "epoch": 733.6184210526316, + "grad_norm": 1.5205327272415161, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 111510 + }, + { + "epoch": 733.6842105263158, + "grad_norm": 1.2245243787765503, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 111520 + }, + { + "epoch": 733.75, + "grad_norm": 1.118973731994629, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 111530 + }, + { + "epoch": 733.8157894736842, + "grad_norm": 1.170804500579834, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 111540 + }, + { + "epoch": 733.8815789473684, + "grad_norm": 1.130577802658081, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 111550 + }, + { + "epoch": 733.9473684210526, + "grad_norm": 1.4491939544677734, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 111560 + }, + { + "epoch": 734.0131578947369, + "grad_norm": 1.1310603618621826, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 111570 + }, + { + "epoch": 734.078947368421, + "grad_norm": 0.9815219640731812, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 111580 + }, + { + "epoch": 734.1447368421053, + "grad_norm": 1.254376769065857, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 111590 + }, + { + "epoch": 734.2105263157895, + "grad_norm": 0.9605775475502014, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 111600 + }, + { + "epoch": 734.2763157894736, + "grad_norm": 1.2415673732757568, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 111610 + }, + { + "epoch": 734.3421052631579, + "grad_norm": 1.124882698059082, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 111620 + }, + { + "epoch": 734.4078947368421, + "grad_norm": 0.7746277451515198, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 111630 + }, + { + "epoch": 734.4736842105264, + "grad_norm": 1.1267927885055542, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 111640 + }, + { + "epoch": 734.5394736842105, + "grad_norm": 0.798928439617157, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 111650 + }, + { + "epoch": 734.6052631578947, + "grad_norm": 1.297399878501892, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 111660 + }, + { + "epoch": 734.671052631579, + "grad_norm": 0.66205894947052, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 111670 + }, + { + "epoch": 734.7368421052631, + "grad_norm": 0.7813223004341125, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 111680 + }, + { + "epoch": 734.8026315789474, + "grad_norm": 0.9377914667129517, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 111690 + }, + { + "epoch": 734.8684210526316, + "grad_norm": 1.1338937282562256, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 111700 + }, + { + "epoch": 734.9342105263158, + "grad_norm": 1.4318599700927734, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 111710 + }, + { + "epoch": 735.0, + "grad_norm": 1.221707820892334, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 111720 + }, + { + "epoch": 735.0657894736842, + "grad_norm": 1.2542157173156738, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 111730 + }, + { + "epoch": 735.1315789473684, + "grad_norm": 1.02726149559021, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 111740 + }, + { + "epoch": 735.1973684210526, + "grad_norm": 0.7633141875267029, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 111750 + }, + { + "epoch": 735.2631578947369, + "grad_norm": 1.315930962562561, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 111760 + }, + { + "epoch": 735.328947368421, + "grad_norm": 1.430780291557312, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 111770 + }, + { + "epoch": 735.3947368421053, + "grad_norm": 1.1580687761306763, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 111780 + }, + { + "epoch": 735.4605263157895, + "grad_norm": 1.3785609006881714, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 111790 + }, + { + "epoch": 735.5263157894736, + "grad_norm": 1.3708674907684326, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 111800 + }, + { + "epoch": 735.5921052631579, + "grad_norm": 1.1644020080566406, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 111810 + }, + { + "epoch": 735.6578947368421, + "grad_norm": 1.1379879713058472, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 111820 + }, + { + "epoch": 735.7236842105264, + "grad_norm": 0.817884087562561, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 111830 + }, + { + "epoch": 735.7894736842105, + "grad_norm": 0.846930742263794, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 111840 + }, + { + "epoch": 735.8552631578947, + "grad_norm": 0.9146661162376404, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 111850 + }, + { + "epoch": 735.921052631579, + "grad_norm": 1.0802667140960693, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 111860 + }, + { + "epoch": 735.9868421052631, + "grad_norm": 0.7806511521339417, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 111870 + }, + { + "epoch": 736.0526315789474, + "grad_norm": 0.9969675540924072, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 111880 + }, + { + "epoch": 736.1184210526316, + "grad_norm": 1.1050714254379272, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 111890 + }, + { + "epoch": 736.1842105263158, + "grad_norm": 1.1047745943069458, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 111900 + }, + { + "epoch": 736.25, + "grad_norm": 1.2120915651321411, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 111910 + }, + { + "epoch": 736.3157894736842, + "grad_norm": 0.9838027358055115, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 111920 + }, + { + "epoch": 736.3815789473684, + "grad_norm": 1.0756142139434814, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 111930 + }, + { + "epoch": 736.4473684210526, + "grad_norm": 0.8673560619354248, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 111940 + }, + { + "epoch": 736.5131578947369, + "grad_norm": 0.8475706577301025, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 111950 + }, + { + "epoch": 736.578947368421, + "grad_norm": 0.9812502264976501, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 111960 + }, + { + "epoch": 736.6447368421053, + "grad_norm": 1.0416502952575684, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 111970 + }, + { + "epoch": 736.7105263157895, + "grad_norm": 1.1706211566925049, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 111980 + }, + { + "epoch": 736.7763157894736, + "grad_norm": 1.0563849210739136, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 111990 + }, + { + "epoch": 736.8421052631579, + "grad_norm": 1.2770580053329468, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 112000 + }, + { + "epoch": 736.9078947368421, + "grad_norm": 1.0203648805618286, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 112010 + }, + { + "epoch": 736.9736842105264, + "grad_norm": 0.937138557434082, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 112020 + }, + { + "epoch": 737.0394736842105, + "grad_norm": 1.422008752822876, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 112030 + }, + { + "epoch": 737.1052631578947, + "grad_norm": 1.0362669229507446, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 112040 + }, + { + "epoch": 737.171052631579, + "grad_norm": 0.7629768252372742, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 112050 + }, + { + "epoch": 737.2368421052631, + "grad_norm": 0.8714403510093689, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 112060 + }, + { + "epoch": 737.3026315789474, + "grad_norm": 1.0577564239501953, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 112070 + }, + { + "epoch": 737.3684210526316, + "grad_norm": 1.1749887466430664, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 112080 + }, + { + "epoch": 737.4342105263158, + "grad_norm": 1.071063756942749, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 112090 + }, + { + "epoch": 737.5, + "grad_norm": 1.1474461555480957, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 112100 + }, + { + "epoch": 737.5657894736842, + "grad_norm": 1.089721441268921, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 112110 + }, + { + "epoch": 737.6315789473684, + "grad_norm": 1.0660192966461182, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 112120 + }, + { + "epoch": 737.6973684210526, + "grad_norm": 0.9403685331344604, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 112130 + }, + { + "epoch": 737.7631578947369, + "grad_norm": 0.7262328267097473, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 112140 + }, + { + "epoch": 737.828947368421, + "grad_norm": 0.9046671986579895, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 112150 + }, + { + "epoch": 737.8947368421053, + "grad_norm": 1.0580425262451172, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 112160 + }, + { + "epoch": 737.9605263157895, + "grad_norm": 0.8231117129325867, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 112170 + }, + { + "epoch": 738.0263157894736, + "grad_norm": 0.9785894751548767, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 112180 + }, + { + "epoch": 738.0921052631579, + "grad_norm": 1.0782703161239624, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 112190 + }, + { + "epoch": 738.1578947368421, + "grad_norm": 1.0809273719787598, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 112200 + }, + { + "epoch": 738.2236842105264, + "grad_norm": 1.4004234075546265, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 112210 + }, + { + "epoch": 738.2894736842105, + "grad_norm": 1.5561052560806274, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 112220 + }, + { + "epoch": 738.3552631578947, + "grad_norm": 1.2708110809326172, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 112230 + }, + { + "epoch": 738.421052631579, + "grad_norm": 1.1724753379821777, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 112240 + }, + { + "epoch": 738.4868421052631, + "grad_norm": 1.3249428272247314, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 112250 + }, + { + "epoch": 738.5526315789474, + "grad_norm": 1.2601690292358398, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 112260 + }, + { + "epoch": 738.6184210526316, + "grad_norm": 1.4754626750946045, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 112270 + }, + { + "epoch": 738.6842105263158, + "grad_norm": 1.0365551710128784, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 112280 + }, + { + "epoch": 738.75, + "grad_norm": 1.195425033569336, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 112290 + }, + { + "epoch": 738.8157894736842, + "grad_norm": 0.8115053772926331, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 112300 + }, + { + "epoch": 738.8815789473684, + "grad_norm": 1.1184942722320557, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 112310 + }, + { + "epoch": 738.9473684210526, + "grad_norm": 0.7952884435653687, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 112320 + }, + { + "epoch": 739.0131578947369, + "grad_norm": 0.909673810005188, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 112330 + }, + { + "epoch": 739.078947368421, + "grad_norm": 0.9384917616844177, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 112340 + }, + { + "epoch": 739.1447368421053, + "grad_norm": 0.9624817967414856, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 112350 + }, + { + "epoch": 739.2105263157895, + "grad_norm": 1.2342602014541626, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 112360 + }, + { + "epoch": 739.2763157894736, + "grad_norm": 0.8785709142684937, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 112370 + }, + { + "epoch": 739.3421052631579, + "grad_norm": 1.1421648263931274, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 112380 + }, + { + "epoch": 739.4078947368421, + "grad_norm": 0.7869369387626648, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 112390 + }, + { + "epoch": 739.4736842105264, + "grad_norm": 0.9102826118469238, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 112400 + }, + { + "epoch": 739.5394736842105, + "grad_norm": 1.3796066045761108, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 112410 + }, + { + "epoch": 739.6052631578947, + "grad_norm": 0.9464783072471619, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 112420 + }, + { + "epoch": 739.671052631579, + "grad_norm": 1.226456642150879, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 112430 + }, + { + "epoch": 739.7368421052631, + "grad_norm": 0.8695907592773438, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 112440 + }, + { + "epoch": 739.8026315789474, + "grad_norm": 1.0778448581695557, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 112450 + }, + { + "epoch": 739.8684210526316, + "grad_norm": 1.033981204032898, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 112460 + }, + { + "epoch": 739.9342105263158, + "grad_norm": 1.3811094760894775, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 112470 + }, + { + "epoch": 740.0, + "grad_norm": 1.0558196306228638, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 112480 + }, + { + "epoch": 740.0657894736842, + "grad_norm": 1.4138654470443726, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 112490 + }, + { + "epoch": 740.1315789473684, + "grad_norm": 1.170053482055664, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 112500 + }, + { + "epoch": 740.1973684210526, + "grad_norm": 1.0176713466644287, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 112510 + }, + { + "epoch": 740.2631578947369, + "grad_norm": 1.2900663614273071, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 112520 + }, + { + "epoch": 740.328947368421, + "grad_norm": 1.0003573894500732, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 112530 + }, + { + "epoch": 740.3947368421053, + "grad_norm": 1.079323410987854, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 112540 + }, + { + "epoch": 740.4605263157895, + "grad_norm": 0.878826916217804, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 112550 + }, + { + "epoch": 740.5263157894736, + "grad_norm": 0.9983590841293335, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 112560 + }, + { + "epoch": 740.5921052631579, + "grad_norm": 1.1599466800689697, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 112570 + }, + { + "epoch": 740.6578947368421, + "grad_norm": 1.4644721746444702, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 112580 + }, + { + "epoch": 740.7236842105264, + "grad_norm": 1.0327931642532349, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 112590 + }, + { + "epoch": 740.7894736842105, + "grad_norm": 1.1933623552322388, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 112600 + }, + { + "epoch": 740.8552631578947, + "grad_norm": 1.1514612436294556, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 112610 + }, + { + "epoch": 740.921052631579, + "grad_norm": 1.074176549911499, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 112620 + }, + { + "epoch": 740.9868421052631, + "grad_norm": 1.3565348386764526, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 112630 + }, + { + "epoch": 741.0526315789474, + "grad_norm": 1.487715721130371, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 112640 + }, + { + "epoch": 741.1184210526316, + "grad_norm": 1.228196620941162, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 112650 + }, + { + "epoch": 741.1842105263158, + "grad_norm": 0.9726716876029968, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 112660 + }, + { + "epoch": 741.25, + "grad_norm": 1.1228713989257812, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 112670 + }, + { + "epoch": 741.3157894736842, + "grad_norm": 1.3492858409881592, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 112680 + }, + { + "epoch": 741.3815789473684, + "grad_norm": 1.2448452711105347, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 112690 + }, + { + "epoch": 741.4473684210526, + "grad_norm": 1.2772343158721924, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 112700 + }, + { + "epoch": 741.5131578947369, + "grad_norm": 0.9293051958084106, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 112710 + }, + { + "epoch": 741.578947368421, + "grad_norm": 0.7797158360481262, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 112720 + }, + { + "epoch": 741.6447368421053, + "grad_norm": 1.2524482011795044, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 112730 + }, + { + "epoch": 741.7105263157895, + "grad_norm": 1.0010104179382324, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 112740 + }, + { + "epoch": 741.7763157894736, + "grad_norm": 0.8157704472541809, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 112750 + }, + { + "epoch": 741.8421052631579, + "grad_norm": 0.7338435649871826, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 112760 + }, + { + "epoch": 741.9078947368421, + "grad_norm": 0.8441011905670166, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 112770 + }, + { + "epoch": 741.9736842105264, + "grad_norm": 0.5987163186073303, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 112780 + }, + { + "epoch": 742.0394736842105, + "grad_norm": 0.5863604545593262, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 112790 + }, + { + "epoch": 742.1052631578947, + "grad_norm": 1.1553442478179932, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 112800 + }, + { + "epoch": 742.171052631579, + "grad_norm": 1.0437456369400024, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 112810 + }, + { + "epoch": 742.2368421052631, + "grad_norm": 1.0978097915649414, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 112820 + }, + { + "epoch": 742.3026315789474, + "grad_norm": 1.2839778661727905, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 112830 + }, + { + "epoch": 742.3684210526316, + "grad_norm": 0.8955215215682983, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 112840 + }, + { + "epoch": 742.4342105263158, + "grad_norm": 0.9545695781707764, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 112850 + }, + { + "epoch": 742.5, + "grad_norm": 0.9076725840568542, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 112860 + }, + { + "epoch": 742.5657894736842, + "grad_norm": 0.9868484735488892, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 112870 + }, + { + "epoch": 742.6315789473684, + "grad_norm": 0.9689452052116394, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 112880 + }, + { + "epoch": 742.6973684210526, + "grad_norm": 0.9611213207244873, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 112890 + }, + { + "epoch": 742.7631578947369, + "grad_norm": 1.3017029762268066, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 112900 + }, + { + "epoch": 742.828947368421, + "grad_norm": 1.010025978088379, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 112910 + }, + { + "epoch": 742.8947368421053, + "grad_norm": 1.1082737445831299, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 112920 + }, + { + "epoch": 742.9605263157895, + "grad_norm": 1.2095134258270264, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 112930 + }, + { + "epoch": 743.0263157894736, + "grad_norm": 0.7735888361930847, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 112940 + }, + { + "epoch": 743.0921052631579, + "grad_norm": 0.8232780694961548, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 112950 + }, + { + "epoch": 743.1578947368421, + "grad_norm": 1.1837214231491089, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 112960 + }, + { + "epoch": 743.2236842105264, + "grad_norm": 1.0164154767990112, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 112970 + }, + { + "epoch": 743.2894736842105, + "grad_norm": 0.9214721918106079, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 112980 + }, + { + "epoch": 743.3552631578947, + "grad_norm": 1.178781270980835, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 112990 + }, + { + "epoch": 743.421052631579, + "grad_norm": 0.8632880449295044, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 113000 + }, + { + "epoch": 743.4868421052631, + "grad_norm": 0.8442752957344055, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 113010 + }, + { + "epoch": 743.5526315789474, + "grad_norm": 1.3869895935058594, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 113020 + }, + { + "epoch": 743.6184210526316, + "grad_norm": 0.642612636089325, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 113030 + }, + { + "epoch": 743.6842105263158, + "grad_norm": 0.9901149868965149, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 113040 + }, + { + "epoch": 743.75, + "grad_norm": 0.6913559436798096, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 113050 + }, + { + "epoch": 743.8157894736842, + "grad_norm": 1.3106902837753296, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 113060 + }, + { + "epoch": 743.8815789473684, + "grad_norm": 0.6828902959823608, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 113070 + }, + { + "epoch": 743.9473684210526, + "grad_norm": 1.2042633295059204, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 113080 + }, + { + "epoch": 744.0131578947369, + "grad_norm": 1.0320390462875366, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 113090 + }, + { + "epoch": 744.078947368421, + "grad_norm": 1.210471272468567, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 113100 + }, + { + "epoch": 744.1447368421053, + "grad_norm": 0.7670679092407227, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 113110 + }, + { + "epoch": 744.2105263157895, + "grad_norm": 1.2954962253570557, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 113120 + }, + { + "epoch": 744.2763157894736, + "grad_norm": 1.0254065990447998, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 113130 + }, + { + "epoch": 744.3421052631579, + "grad_norm": 0.9328817129135132, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 113140 + }, + { + "epoch": 744.4078947368421, + "grad_norm": 1.0787217617034912, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 113150 + }, + { + "epoch": 744.4736842105264, + "grad_norm": 1.0308198928833008, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 113160 + }, + { + "epoch": 744.5394736842105, + "grad_norm": 1.011286973953247, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 113170 + }, + { + "epoch": 744.6052631578947, + "grad_norm": 0.8020488023757935, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 113180 + }, + { + "epoch": 744.671052631579, + "grad_norm": 1.1139671802520752, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 113190 + }, + { + "epoch": 744.7368421052631, + "grad_norm": 1.1352884769439697, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 113200 + }, + { + "epoch": 744.8026315789474, + "grad_norm": 0.8926935791969299, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 113210 + }, + { + "epoch": 744.8684210526316, + "grad_norm": 1.2045756578445435, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 113220 + }, + { + "epoch": 744.9342105263158, + "grad_norm": 1.2373918294906616, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 113230 + }, + { + "epoch": 745.0, + "grad_norm": 0.9970014095306396, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 113240 + }, + { + "epoch": 745.0657894736842, + "grad_norm": 0.9644217491149902, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 113250 + }, + { + "epoch": 745.1315789473684, + "grad_norm": 1.1484285593032837, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 113260 + }, + { + "epoch": 745.1973684210526, + "grad_norm": 0.9639143347740173, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 113270 + }, + { + "epoch": 745.2631578947369, + "grad_norm": 1.1494964361190796, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 113280 + }, + { + "epoch": 745.328947368421, + "grad_norm": 1.025691032409668, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 113290 + }, + { + "epoch": 745.3947368421053, + "grad_norm": 1.154371976852417, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 113300 + }, + { + "epoch": 745.4605263157895, + "grad_norm": 1.3313137292861938, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 113310 + }, + { + "epoch": 745.5263157894736, + "grad_norm": 1.094545841217041, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 113320 + }, + { + "epoch": 745.5921052631579, + "grad_norm": 1.2096467018127441, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 113330 + }, + { + "epoch": 745.6578947368421, + "grad_norm": 1.1005749702453613, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 113340 + }, + { + "epoch": 745.7236842105264, + "grad_norm": 1.1122779846191406, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 113350 + }, + { + "epoch": 745.7894736842105, + "grad_norm": 1.2181123495101929, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 113360 + }, + { + "epoch": 745.8552631578947, + "grad_norm": 1.4004229307174683, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 113370 + }, + { + "epoch": 745.921052631579, + "grad_norm": 0.9129208922386169, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 113380 + }, + { + "epoch": 745.9868421052631, + "grad_norm": 1.1694635152816772, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 113390 + }, + { + "epoch": 746.0526315789474, + "grad_norm": 1.1359633207321167, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 113400 + }, + { + "epoch": 746.1184210526316, + "grad_norm": 1.1821686029434204, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 113410 + }, + { + "epoch": 746.1842105263158, + "grad_norm": 1.0882134437561035, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 113420 + }, + { + "epoch": 746.25, + "grad_norm": 1.2553226947784424, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 113430 + }, + { + "epoch": 746.3157894736842, + "grad_norm": 0.9115428924560547, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 113440 + }, + { + "epoch": 746.3815789473684, + "grad_norm": 1.281915307044983, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 113450 + }, + { + "epoch": 746.4473684210526, + "grad_norm": 1.3433202505111694, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 113460 + }, + { + "epoch": 746.5131578947369, + "grad_norm": 1.5758907794952393, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 113470 + }, + { + "epoch": 746.578947368421, + "grad_norm": 1.3677822351455688, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 113480 + }, + { + "epoch": 746.6447368421053, + "grad_norm": 1.343830943107605, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 113490 + }, + { + "epoch": 746.7105263157895, + "grad_norm": 1.2050989866256714, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 113500 + }, + { + "epoch": 746.7763157894736, + "grad_norm": 0.8132672905921936, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 113510 + }, + { + "epoch": 746.8421052631579, + "grad_norm": 1.0796236991882324, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 113520 + }, + { + "epoch": 746.9078947368421, + "grad_norm": 0.9713119268417358, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 113530 + }, + { + "epoch": 746.9736842105264, + "grad_norm": 1.068225383758545, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 113540 + }, + { + "epoch": 747.0394736842105, + "grad_norm": 0.9908331036567688, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 113550 + }, + { + "epoch": 747.1052631578947, + "grad_norm": 0.8865001797676086, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 113560 + }, + { + "epoch": 747.171052631579, + "grad_norm": 0.9770333766937256, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 113570 + }, + { + "epoch": 747.2368421052631, + "grad_norm": 0.9718663692474365, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 113580 + }, + { + "epoch": 747.3026315789474, + "grad_norm": 0.9248496890068054, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 113590 + }, + { + "epoch": 747.3684210526316, + "grad_norm": 0.9006205797195435, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 113600 + }, + { + "epoch": 747.4342105263158, + "grad_norm": 1.0532220602035522, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 113610 + }, + { + "epoch": 747.5, + "grad_norm": 1.0480901002883911, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 113620 + }, + { + "epoch": 747.5657894736842, + "grad_norm": 1.1913716793060303, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 113630 + }, + { + "epoch": 747.6315789473684, + "grad_norm": 1.3170071840286255, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 113640 + }, + { + "epoch": 747.6973684210526, + "grad_norm": 0.8311954140663147, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 113650 + }, + { + "epoch": 747.7631578947369, + "grad_norm": 1.102514624595642, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 113660 + }, + { + "epoch": 747.828947368421, + "grad_norm": 1.3776170015335083, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 113670 + }, + { + "epoch": 747.8947368421053, + "grad_norm": 1.1094131469726562, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 113680 + }, + { + "epoch": 747.9605263157895, + "grad_norm": 1.3613204956054688, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 113690 + }, + { + "epoch": 748.0263157894736, + "grad_norm": 1.2817715406417847, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 113700 + }, + { + "epoch": 748.0921052631579, + "grad_norm": 1.1699529886245728, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 113710 + }, + { + "epoch": 748.1578947368421, + "grad_norm": 1.2629177570343018, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 113720 + }, + { + "epoch": 748.2236842105264, + "grad_norm": 1.0418188571929932, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 113730 + }, + { + "epoch": 748.2894736842105, + "grad_norm": 1.2210304737091064, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 113740 + }, + { + "epoch": 748.3552631578947, + "grad_norm": 0.6313312649726868, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 113750 + }, + { + "epoch": 748.421052631579, + "grad_norm": 1.2036254405975342, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 113760 + }, + { + "epoch": 748.4868421052631, + "grad_norm": 0.9804714918136597, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 113770 + }, + { + "epoch": 748.5526315789474, + "grad_norm": 0.9679644107818604, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 113780 + }, + { + "epoch": 748.6184210526316, + "grad_norm": 0.7930268049240112, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 113790 + }, + { + "epoch": 748.6842105263158, + "grad_norm": 1.0785940885543823, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 113800 + }, + { + "epoch": 748.75, + "grad_norm": 1.1710313558578491, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 113810 + }, + { + "epoch": 748.8157894736842, + "grad_norm": 0.9718161225318909, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 113820 + }, + { + "epoch": 748.8815789473684, + "grad_norm": 1.169264316558838, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 113830 + }, + { + "epoch": 748.9473684210526, + "grad_norm": 0.9664254188537598, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 113840 + }, + { + "epoch": 749.0131578947369, + "grad_norm": 0.7254254817962646, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 113850 + }, + { + "epoch": 749.078947368421, + "grad_norm": 0.9984303116798401, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 113860 + }, + { + "epoch": 749.1447368421053, + "grad_norm": 1.2939634323120117, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 113870 + }, + { + "epoch": 749.2105263157895, + "grad_norm": 1.12336003780365, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 113880 + }, + { + "epoch": 749.2763157894736, + "grad_norm": 1.6549626588821411, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 113890 + }, + { + "epoch": 749.3421052631579, + "grad_norm": 1.5198620557785034, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 113900 + }, + { + "epoch": 749.4078947368421, + "grad_norm": 1.2456414699554443, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 113910 + }, + { + "epoch": 749.4736842105264, + "grad_norm": 1.2373472452163696, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 113920 + }, + { + "epoch": 749.5394736842105, + "grad_norm": 1.309753179550171, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 113930 + }, + { + "epoch": 749.6052631578947, + "grad_norm": 0.9735221862792969, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 113940 + }, + { + "epoch": 749.671052631579, + "grad_norm": 0.6393201351165771, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 113950 + }, + { + "epoch": 749.7368421052631, + "grad_norm": 1.0009740591049194, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 113960 + }, + { + "epoch": 749.8026315789474, + "grad_norm": 0.8014363646507263, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 113970 + }, + { + "epoch": 749.8684210526316, + "grad_norm": 0.9935765266418457, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 113980 + }, + { + "epoch": 749.9342105263158, + "grad_norm": 1.1239501237869263, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 113990 + }, + { + "epoch": 750.0, + "grad_norm": 1.2619990110397339, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 114000 + }, + { + "epoch": 750.0657894736842, + "grad_norm": 0.8633625507354736, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 114010 + }, + { + "epoch": 750.1315789473684, + "grad_norm": 1.195512294769287, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 114020 + }, + { + "epoch": 750.1973684210526, + "grad_norm": 1.052501916885376, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 114030 + }, + { + "epoch": 750.2631578947369, + "grad_norm": 0.899624228477478, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 114040 + }, + { + "epoch": 750.328947368421, + "grad_norm": 0.7613295316696167, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 114050 + }, + { + "epoch": 750.3947368421053, + "grad_norm": 1.224990725517273, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 114060 + }, + { + "epoch": 750.4605263157895, + "grad_norm": 1.157004475593567, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 114070 + }, + { + "epoch": 750.5263157894736, + "grad_norm": 1.021988034248352, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 114080 + }, + { + "epoch": 750.5921052631579, + "grad_norm": 1.2240087985992432, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 114090 + }, + { + "epoch": 750.6578947368421, + "grad_norm": 0.8899611830711365, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 114100 + }, + { + "epoch": 750.7236842105264, + "grad_norm": 1.063750982284546, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 114110 + }, + { + "epoch": 750.7894736842105, + "grad_norm": 0.6514599919319153, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 114120 + }, + { + "epoch": 750.8552631578947, + "grad_norm": 0.8705139756202698, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 114130 + }, + { + "epoch": 750.921052631579, + "grad_norm": 1.2214490175247192, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 114140 + }, + { + "epoch": 750.9868421052631, + "grad_norm": 1.1715788841247559, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 114150 + }, + { + "epoch": 751.0526315789474, + "grad_norm": 1.076686143875122, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 114160 + }, + { + "epoch": 751.1184210526316, + "grad_norm": 1.0638906955718994, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 114170 + }, + { + "epoch": 751.1842105263158, + "grad_norm": 0.9506366848945618, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 114180 + }, + { + "epoch": 751.25, + "grad_norm": 0.9974748492240906, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 114190 + }, + { + "epoch": 751.3157894736842, + "grad_norm": 1.044983983039856, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 114200 + }, + { + "epoch": 751.3815789473684, + "grad_norm": 0.7726526856422424, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 114210 + }, + { + "epoch": 751.4473684210526, + "grad_norm": 0.9929822683334351, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 114220 + }, + { + "epoch": 751.5131578947369, + "grad_norm": 1.2815465927124023, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 114230 + }, + { + "epoch": 751.578947368421, + "grad_norm": 1.2115155458450317, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 114240 + }, + { + "epoch": 751.6447368421053, + "grad_norm": 1.184339165687561, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 114250 + }, + { + "epoch": 751.7105263157895, + "grad_norm": 1.2363086938858032, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 114260 + }, + { + "epoch": 751.7763157894736, + "grad_norm": 1.2934776544570923, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 114270 + }, + { + "epoch": 751.8421052631579, + "grad_norm": 1.1832584142684937, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 114280 + }, + { + "epoch": 751.9078947368421, + "grad_norm": 1.02647066116333, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 114290 + }, + { + "epoch": 751.9736842105264, + "grad_norm": 1.5524499416351318, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 114300 + }, + { + "epoch": 752.0394736842105, + "grad_norm": 1.193410038948059, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 114310 + }, + { + "epoch": 752.1052631578947, + "grad_norm": 1.1393669843673706, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 114320 + }, + { + "epoch": 752.171052631579, + "grad_norm": 0.7503915429115295, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 114330 + }, + { + "epoch": 752.2368421052631, + "grad_norm": 1.088619351387024, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 114340 + }, + { + "epoch": 752.3026315789474, + "grad_norm": 0.9867507815361023, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 114350 + }, + { + "epoch": 752.3684210526316, + "grad_norm": 1.2592015266418457, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 114360 + }, + { + "epoch": 752.4342105263158, + "grad_norm": 1.365531086921692, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 114370 + }, + { + "epoch": 752.5, + "grad_norm": 1.318558931350708, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 114380 + }, + { + "epoch": 752.5657894736842, + "grad_norm": 0.9499292373657227, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 114390 + }, + { + "epoch": 752.6315789473684, + "grad_norm": 1.3132816553115845, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 114400 + }, + { + "epoch": 752.6973684210526, + "grad_norm": 1.6993619203567505, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 114410 + }, + { + "epoch": 752.7631578947369, + "grad_norm": 1.5415050983428955, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 114420 + }, + { + "epoch": 752.828947368421, + "grad_norm": 1.364233136177063, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 114430 + }, + { + "epoch": 752.8947368421053, + "grad_norm": 1.6695607900619507, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 114440 + }, + { + "epoch": 752.9605263157895, + "grad_norm": 1.379301905632019, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 114450 + }, + { + "epoch": 753.0263157894736, + "grad_norm": 1.2977728843688965, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 114460 + }, + { + "epoch": 753.0921052631579, + "grad_norm": 1.069703459739685, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 114470 + }, + { + "epoch": 753.1578947368421, + "grad_norm": 1.064315915107727, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 114480 + }, + { + "epoch": 753.2236842105264, + "grad_norm": 1.2300894260406494, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 114490 + }, + { + "epoch": 753.2894736842105, + "grad_norm": 1.0416491031646729, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 114500 + }, + { + "epoch": 753.3552631578947, + "grad_norm": 1.2523179054260254, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 114510 + }, + { + "epoch": 753.421052631579, + "grad_norm": 1.2075591087341309, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 114520 + }, + { + "epoch": 753.4868421052631, + "grad_norm": 1.3077597618103027, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 114530 + }, + { + "epoch": 753.5526315789474, + "grad_norm": 0.7280270457267761, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 114540 + }, + { + "epoch": 753.6184210526316, + "grad_norm": 1.2432559728622437, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 114550 + }, + { + "epoch": 753.6842105263158, + "grad_norm": 1.0678569078445435, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 114560 + }, + { + "epoch": 753.75, + "grad_norm": 0.9261232614517212, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 114570 + }, + { + "epoch": 753.8157894736842, + "grad_norm": 1.2366281747817993, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 114580 + }, + { + "epoch": 753.8815789473684, + "grad_norm": 1.1832107305526733, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 114590 + }, + { + "epoch": 753.9473684210526, + "grad_norm": 1.4531916379928589, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 114600 + }, + { + "epoch": 754.0131578947369, + "grad_norm": 1.1988731622695923, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 114610 + }, + { + "epoch": 754.078947368421, + "grad_norm": 1.1996492147445679, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 114620 + }, + { + "epoch": 754.1447368421053, + "grad_norm": 1.2475368976593018, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 114630 + }, + { + "epoch": 754.2105263157895, + "grad_norm": 0.7737396359443665, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 114640 + }, + { + "epoch": 754.2763157894736, + "grad_norm": 1.1298565864562988, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 114650 + }, + { + "epoch": 754.3421052631579, + "grad_norm": 0.8025405406951904, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 114660 + }, + { + "epoch": 754.4078947368421, + "grad_norm": 1.1314117908477783, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 114670 + }, + { + "epoch": 754.4736842105264, + "grad_norm": 1.032278299331665, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 114680 + }, + { + "epoch": 754.5394736842105, + "grad_norm": 1.1019865274429321, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 114690 + }, + { + "epoch": 754.6052631578947, + "grad_norm": 1.3335840702056885, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 114700 + }, + { + "epoch": 754.671052631579, + "grad_norm": 0.6844580769538879, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 114710 + }, + { + "epoch": 754.7368421052631, + "grad_norm": 0.8246498107910156, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 114720 + }, + { + "epoch": 754.8026315789474, + "grad_norm": 1.2790837287902832, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 114730 + }, + { + "epoch": 754.8684210526316, + "grad_norm": 1.169844388961792, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 114740 + }, + { + "epoch": 754.9342105263158, + "grad_norm": 0.9610524773597717, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 114750 + }, + { + "epoch": 755.0, + "grad_norm": 0.9811365604400635, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 114760 + }, + { + "epoch": 755.0657894736842, + "grad_norm": 1.2406882047653198, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 114770 + }, + { + "epoch": 755.1315789473684, + "grad_norm": 1.348677635192871, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 114780 + }, + { + "epoch": 755.1973684210526, + "grad_norm": 1.0763145685195923, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 114790 + }, + { + "epoch": 755.2631578947369, + "grad_norm": 0.8977418541908264, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 114800 + }, + { + "epoch": 755.328947368421, + "grad_norm": 1.2123111486434937, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 114810 + }, + { + "epoch": 755.3947368421053, + "grad_norm": 1.1371431350708008, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 114820 + }, + { + "epoch": 755.4605263157895, + "grad_norm": 1.1556872129440308, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 114830 + }, + { + "epoch": 755.5263157894736, + "grad_norm": 1.5574454069137573, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 114840 + }, + { + "epoch": 755.5921052631579, + "grad_norm": 1.1535435914993286, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 114850 + }, + { + "epoch": 755.6578947368421, + "grad_norm": 1.2058866024017334, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 114860 + }, + { + "epoch": 755.7236842105264, + "grad_norm": 0.8264251947402954, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 114870 + }, + { + "epoch": 755.7894736842105, + "grad_norm": 1.1852251291275024, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 114880 + }, + { + "epoch": 755.8552631578947, + "grad_norm": 0.5516109466552734, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 114890 + }, + { + "epoch": 755.921052631579, + "grad_norm": 0.8325181007385254, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 114900 + }, + { + "epoch": 755.9868421052631, + "grad_norm": 0.7445228695869446, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 114910 + }, + { + "epoch": 756.0526315789474, + "grad_norm": 1.4654862880706787, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 114920 + }, + { + "epoch": 756.1184210526316, + "grad_norm": 1.1833246946334839, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 114930 + }, + { + "epoch": 756.1842105263158, + "grad_norm": 0.8253543972969055, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 114940 + }, + { + "epoch": 756.25, + "grad_norm": 1.0825392007827759, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 114950 + }, + { + "epoch": 756.3157894736842, + "grad_norm": 0.9824175834655762, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 114960 + }, + { + "epoch": 756.3815789473684, + "grad_norm": 1.0814141035079956, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 114970 + }, + { + "epoch": 756.4473684210526, + "grad_norm": 0.9042399525642395, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 114980 + }, + { + "epoch": 756.5131578947369, + "grad_norm": 1.2757545709609985, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 114990 + }, + { + "epoch": 756.578947368421, + "grad_norm": 1.336796760559082, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 115000 + }, + { + "epoch": 756.6447368421053, + "grad_norm": 1.2338722944259644, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 115010 + }, + { + "epoch": 756.7105263157895, + "grad_norm": 0.8805379271507263, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 115020 + }, + { + "epoch": 756.7763157894736, + "grad_norm": 1.1899020671844482, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 115030 + }, + { + "epoch": 756.8421052631579, + "grad_norm": 1.0832873582839966, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 115040 + }, + { + "epoch": 756.9078947368421, + "grad_norm": 0.9258546233177185, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 115050 + }, + { + "epoch": 756.9736842105264, + "grad_norm": 0.8678163290023804, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 115060 + }, + { + "epoch": 757.0394736842105, + "grad_norm": 0.9894230365753174, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 115070 + }, + { + "epoch": 757.1052631578947, + "grad_norm": 1.470314383506775, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 115080 + }, + { + "epoch": 757.171052631579, + "grad_norm": 1.0584030151367188, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 115090 + }, + { + "epoch": 757.2368421052631, + "grad_norm": 1.0429503917694092, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 115100 + }, + { + "epoch": 757.3026315789474, + "grad_norm": 1.2286428213119507, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 115110 + }, + { + "epoch": 757.3684210526316, + "grad_norm": 1.1292539834976196, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 115120 + }, + { + "epoch": 757.4342105263158, + "grad_norm": 0.7512161135673523, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 115130 + }, + { + "epoch": 757.5, + "grad_norm": 1.228721261024475, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 115140 + }, + { + "epoch": 757.5657894736842, + "grad_norm": 1.2214051485061646, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 115150 + }, + { + "epoch": 757.6315789473684, + "grad_norm": 1.1014398336410522, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 115160 + }, + { + "epoch": 757.6973684210526, + "grad_norm": 0.9443309307098389, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 115170 + }, + { + "epoch": 757.7631578947369, + "grad_norm": 0.7845607995986938, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 115180 + }, + { + "epoch": 757.828947368421, + "grad_norm": 1.0893877744674683, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 115190 + }, + { + "epoch": 757.8947368421053, + "grad_norm": 0.7762365937232971, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 115200 + }, + { + "epoch": 757.9605263157895, + "grad_norm": 0.9474076628684998, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 115210 + }, + { + "epoch": 758.0263157894736, + "grad_norm": 1.041471242904663, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 115220 + }, + { + "epoch": 758.0921052631579, + "grad_norm": 1.2159472703933716, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 115230 + }, + { + "epoch": 758.1578947368421, + "grad_norm": 1.0471974611282349, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 115240 + }, + { + "epoch": 758.2236842105264, + "grad_norm": 0.8706479072570801, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 115250 + }, + { + "epoch": 758.2894736842105, + "grad_norm": 0.847398042678833, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 115260 + }, + { + "epoch": 758.3552631578947, + "grad_norm": 1.139387845993042, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 115270 + }, + { + "epoch": 758.421052631579, + "grad_norm": 1.2678453922271729, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 115280 + }, + { + "epoch": 758.4868421052631, + "grad_norm": 1.1725306510925293, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 115290 + }, + { + "epoch": 758.5526315789474, + "grad_norm": 1.2793757915496826, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 115300 + }, + { + "epoch": 758.6184210526316, + "grad_norm": 1.028630018234253, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 115310 + }, + { + "epoch": 758.6842105263158, + "grad_norm": 1.1012976169586182, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 115320 + }, + { + "epoch": 758.75, + "grad_norm": 0.8987807035446167, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 115330 + }, + { + "epoch": 758.8157894736842, + "grad_norm": 1.493779182434082, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 115340 + }, + { + "epoch": 758.8815789473684, + "grad_norm": 1.4419828653335571, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 115350 + }, + { + "epoch": 758.9473684210526, + "grad_norm": 1.4344781637191772, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 115360 + }, + { + "epoch": 759.0131578947369, + "grad_norm": 0.9399645924568176, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 115370 + }, + { + "epoch": 759.078947368421, + "grad_norm": 0.8678160309791565, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 115380 + }, + { + "epoch": 759.1447368421053, + "grad_norm": 1.2153128385543823, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 115390 + }, + { + "epoch": 759.2105263157895, + "grad_norm": 1.31186044216156, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 115400 + }, + { + "epoch": 759.2763157894736, + "grad_norm": 1.637511968612671, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 115410 + }, + { + "epoch": 759.3421052631579, + "grad_norm": 1.3562475442886353, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 115420 + }, + { + "epoch": 759.4078947368421, + "grad_norm": 1.192093014717102, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 115430 + }, + { + "epoch": 759.4736842105264, + "grad_norm": 1.5335882902145386, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 115440 + }, + { + "epoch": 759.5394736842105, + "grad_norm": 1.1234040260314941, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 115450 + }, + { + "epoch": 759.6052631578947, + "grad_norm": 1.1577470302581787, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 115460 + }, + { + "epoch": 759.671052631579, + "grad_norm": 1.1491925716400146, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 115470 + }, + { + "epoch": 759.7368421052631, + "grad_norm": 1.103284239768982, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 115480 + }, + { + "epoch": 759.8026315789474, + "grad_norm": 1.1908904314041138, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 115490 + }, + { + "epoch": 759.8684210526316, + "grad_norm": 1.180321216583252, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 115500 + }, + { + "epoch": 759.9342105263158, + "grad_norm": 1.2648427486419678, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 115510 + }, + { + "epoch": 760.0, + "grad_norm": 1.4859260320663452, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 115520 + }, + { + "epoch": 760.0657894736842, + "grad_norm": 1.3933957815170288, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 115530 + }, + { + "epoch": 760.1315789473684, + "grad_norm": 1.4522415399551392, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 115540 + }, + { + "epoch": 760.1973684210526, + "grad_norm": 1.2445298433303833, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 115550 + }, + { + "epoch": 760.2631578947369, + "grad_norm": 1.222784399986267, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 115560 + }, + { + "epoch": 760.328947368421, + "grad_norm": 0.9933916330337524, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 115570 + }, + { + "epoch": 760.3947368421053, + "grad_norm": 1.02178955078125, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 115580 + }, + { + "epoch": 760.4605263157895, + "grad_norm": 1.0598787069320679, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 115590 + }, + { + "epoch": 760.5263157894736, + "grad_norm": 1.264055609703064, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 115600 + }, + { + "epoch": 760.5921052631579, + "grad_norm": 1.1147860288619995, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 115610 + }, + { + "epoch": 760.6578947368421, + "grad_norm": 1.159653902053833, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 115620 + }, + { + "epoch": 760.7236842105264, + "grad_norm": 0.6607643961906433, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 115630 + }, + { + "epoch": 760.7894736842105, + "grad_norm": 0.9183317422866821, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 115640 + }, + { + "epoch": 760.8552631578947, + "grad_norm": 1.034013032913208, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 115650 + }, + { + "epoch": 760.921052631579, + "grad_norm": 0.9568067789077759, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 115660 + }, + { + "epoch": 760.9868421052631, + "grad_norm": 1.138213872909546, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 115670 + }, + { + "epoch": 761.0526315789474, + "grad_norm": 1.0073471069335938, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 115680 + }, + { + "epoch": 761.1184210526316, + "grad_norm": 1.5636231899261475, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 115690 + }, + { + "epoch": 761.1842105263158, + "grad_norm": 1.0607483386993408, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 115700 + }, + { + "epoch": 761.25, + "grad_norm": 1.3943238258361816, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 115710 + }, + { + "epoch": 761.3157894736842, + "grad_norm": 0.7510659694671631, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 115720 + }, + { + "epoch": 761.3815789473684, + "grad_norm": 1.033464789390564, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 115730 + }, + { + "epoch": 761.4473684210526, + "grad_norm": 1.1388704776763916, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 115740 + }, + { + "epoch": 761.5131578947369, + "grad_norm": 1.0201410055160522, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 115750 + }, + { + "epoch": 761.578947368421, + "grad_norm": 0.690221905708313, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 115760 + }, + { + "epoch": 761.6447368421053, + "grad_norm": 0.7212555408477783, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 115770 + }, + { + "epoch": 761.7105263157895, + "grad_norm": 1.3436615467071533, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 115780 + }, + { + "epoch": 761.7763157894736, + "grad_norm": 1.2216511964797974, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 115790 + }, + { + "epoch": 761.8421052631579, + "grad_norm": 1.0771437883377075, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 115800 + }, + { + "epoch": 761.9078947368421, + "grad_norm": 1.3642146587371826, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 115810 + }, + { + "epoch": 761.9736842105264, + "grad_norm": 1.320507287979126, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 115820 + }, + { + "epoch": 762.0394736842105, + "grad_norm": 0.8073870539665222, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 115830 + }, + { + "epoch": 762.1052631578947, + "grad_norm": 1.161726951599121, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 115840 + }, + { + "epoch": 762.171052631579, + "grad_norm": 1.1861587762832642, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 115850 + }, + { + "epoch": 762.2368421052631, + "grad_norm": 0.9289435744285583, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 115860 + }, + { + "epoch": 762.3026315789474, + "grad_norm": 1.0481675863265991, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 115870 + }, + { + "epoch": 762.3684210526316, + "grad_norm": 1.2852026224136353, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 115880 + }, + { + "epoch": 762.4342105263158, + "grad_norm": 1.4661617279052734, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 115890 + }, + { + "epoch": 762.5, + "grad_norm": 1.2878676652908325, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 115900 + }, + { + "epoch": 762.5657894736842, + "grad_norm": 1.1841388940811157, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 115910 + }, + { + "epoch": 762.6315789473684, + "grad_norm": 1.1691356897354126, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 115920 + }, + { + "epoch": 762.6973684210526, + "grad_norm": 1.0297003984451294, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 115930 + }, + { + "epoch": 762.7631578947369, + "grad_norm": 1.1073315143585205, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 115940 + }, + { + "epoch": 762.828947368421, + "grad_norm": 1.3465665578842163, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 115950 + }, + { + "epoch": 762.8947368421053, + "grad_norm": 1.0311391353607178, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 115960 + }, + { + "epoch": 762.9605263157895, + "grad_norm": 0.8130193948745728, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 115970 + }, + { + "epoch": 763.0263157894736, + "grad_norm": 0.788371741771698, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 115980 + }, + { + "epoch": 763.0921052631579, + "grad_norm": 1.0673997402191162, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 115990 + }, + { + "epoch": 763.1578947368421, + "grad_norm": 0.967298150062561, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 116000 + }, + { + "epoch": 763.2236842105264, + "grad_norm": 0.8940585255622864, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 116010 + }, + { + "epoch": 763.2894736842105, + "grad_norm": 0.8840467929840088, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 116020 + }, + { + "epoch": 763.3552631578947, + "grad_norm": 1.0176132917404175, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 116030 + }, + { + "epoch": 763.421052631579, + "grad_norm": 1.144092321395874, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 116040 + }, + { + "epoch": 763.4868421052631, + "grad_norm": 1.1976016759872437, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 116050 + }, + { + "epoch": 763.5526315789474, + "grad_norm": 1.0607638359069824, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 116060 + }, + { + "epoch": 763.6184210526316, + "grad_norm": 1.0160369873046875, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 116070 + }, + { + "epoch": 763.6842105263158, + "grad_norm": 0.8879520297050476, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 116080 + }, + { + "epoch": 763.75, + "grad_norm": 1.279135823249817, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 116090 + }, + { + "epoch": 763.8157894736842, + "grad_norm": 0.8777012228965759, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 116100 + }, + { + "epoch": 763.8815789473684, + "grad_norm": 1.0412824153900146, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 116110 + }, + { + "epoch": 763.9473684210526, + "grad_norm": 1.1568728685379028, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 116120 + }, + { + "epoch": 764.0131578947369, + "grad_norm": 0.8233476281166077, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 116130 + }, + { + "epoch": 764.078947368421, + "grad_norm": 0.7248982191085815, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 116140 + }, + { + "epoch": 764.1447368421053, + "grad_norm": 0.712303876876831, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 116150 + }, + { + "epoch": 764.2105263157895, + "grad_norm": 1.0194206237792969, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 116160 + }, + { + "epoch": 764.2763157894736, + "grad_norm": 1.2041058540344238, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 116170 + }, + { + "epoch": 764.3421052631579, + "grad_norm": 0.784016489982605, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 116180 + }, + { + "epoch": 764.4078947368421, + "grad_norm": 1.4073185920715332, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 116190 + }, + { + "epoch": 764.4736842105264, + "grad_norm": 1.0075128078460693, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 116200 + }, + { + "epoch": 764.5394736842105, + "grad_norm": 0.6819977164268494, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 116210 + }, + { + "epoch": 764.6052631578947, + "grad_norm": 1.2221415042877197, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 116220 + }, + { + "epoch": 764.671052631579, + "grad_norm": 1.6122931241989136, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 116230 + }, + { + "epoch": 764.7368421052631, + "grad_norm": 1.30339515209198, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 116240 + }, + { + "epoch": 764.8026315789474, + "grad_norm": 1.2761629819869995, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 116250 + }, + { + "epoch": 764.8684210526316, + "grad_norm": 1.287692904472351, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 116260 + }, + { + "epoch": 764.9342105263158, + "grad_norm": 0.9369876384735107, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 116270 + }, + { + "epoch": 765.0, + "grad_norm": 0.9917391538619995, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 116280 + }, + { + "epoch": 765.0657894736842, + "grad_norm": 0.8295028805732727, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 116290 + }, + { + "epoch": 765.1315789473684, + "grad_norm": 1.1534699201583862, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 116300 + }, + { + "epoch": 765.1973684210526, + "grad_norm": 0.8612220883369446, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 116310 + }, + { + "epoch": 765.2631578947369, + "grad_norm": 1.1963870525360107, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 116320 + }, + { + "epoch": 765.328947368421, + "grad_norm": 1.3065626621246338, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 116330 + }, + { + "epoch": 765.3947368421053, + "grad_norm": 0.979164719581604, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 116340 + }, + { + "epoch": 765.4605263157895, + "grad_norm": 0.9245379567146301, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 116350 + }, + { + "epoch": 765.5263157894736, + "grad_norm": 0.8577090501785278, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 116360 + }, + { + "epoch": 765.5921052631579, + "grad_norm": 0.9338501691818237, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 116370 + }, + { + "epoch": 765.6578947368421, + "grad_norm": 1.0019291639328003, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 116380 + }, + { + "epoch": 765.7236842105264, + "grad_norm": 1.2914475202560425, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 116390 + }, + { + "epoch": 765.7894736842105, + "grad_norm": 1.1972627639770508, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 116400 + }, + { + "epoch": 765.8552631578947, + "grad_norm": 1.0993999242782593, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 116410 + }, + { + "epoch": 765.921052631579, + "grad_norm": 0.9448077082633972, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 116420 + }, + { + "epoch": 765.9868421052631, + "grad_norm": 0.893061637878418, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 116430 + }, + { + "epoch": 766.0526315789474, + "grad_norm": 1.3330209255218506, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 116440 + }, + { + "epoch": 766.1184210526316, + "grad_norm": 1.363829255104065, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 116450 + }, + { + "epoch": 766.1842105263158, + "grad_norm": 1.1652305126190186, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 116460 + }, + { + "epoch": 766.25, + "grad_norm": 1.1729694604873657, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 116470 + }, + { + "epoch": 766.3157894736842, + "grad_norm": 0.8965724110603333, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 116480 + }, + { + "epoch": 766.3815789473684, + "grad_norm": 0.8969491720199585, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 116490 + }, + { + "epoch": 766.4473684210526, + "grad_norm": 1.0770326852798462, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 116500 + }, + { + "epoch": 766.5131578947369, + "grad_norm": 0.9563101530075073, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 116510 + }, + { + "epoch": 766.578947368421, + "grad_norm": 1.18818199634552, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 116520 + }, + { + "epoch": 766.6447368421053, + "grad_norm": 0.7398898005485535, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 116530 + }, + { + "epoch": 766.7105263157895, + "grad_norm": 0.6611107587814331, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 116540 + }, + { + "epoch": 766.7763157894736, + "grad_norm": 0.8535279631614685, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 116550 + }, + { + "epoch": 766.8421052631579, + "grad_norm": 1.0214197635650635, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 116560 + }, + { + "epoch": 766.9078947368421, + "grad_norm": 1.0022668838500977, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 116570 + }, + { + "epoch": 766.9736842105264, + "grad_norm": 1.2458581924438477, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 116580 + }, + { + "epoch": 767.0394736842105, + "grad_norm": 1.3370084762573242, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 116590 + }, + { + "epoch": 767.1052631578947, + "grad_norm": 0.9627024531364441, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 116600 + }, + { + "epoch": 767.171052631579, + "grad_norm": 1.2679235935211182, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 116610 + }, + { + "epoch": 767.2368421052631, + "grad_norm": 0.9105582237243652, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 116620 + }, + { + "epoch": 767.3026315789474, + "grad_norm": 0.9305436015129089, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 116630 + }, + { + "epoch": 767.3684210526316, + "grad_norm": 0.8848668336868286, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 116640 + }, + { + "epoch": 767.4342105263158, + "grad_norm": 1.0530251264572144, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 116650 + }, + { + "epoch": 767.5, + "grad_norm": 0.8415973782539368, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 116660 + }, + { + "epoch": 767.5657894736842, + "grad_norm": 1.0781415700912476, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 116670 + }, + { + "epoch": 767.6315789473684, + "grad_norm": 0.8499521017074585, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 116680 + }, + { + "epoch": 767.6973684210526, + "grad_norm": 1.271653175354004, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 116690 + }, + { + "epoch": 767.7631578947369, + "grad_norm": 1.3526909351348877, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 116700 + }, + { + "epoch": 767.828947368421, + "grad_norm": 1.1030946969985962, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 116710 + }, + { + "epoch": 767.8947368421053, + "grad_norm": 1.2905566692352295, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 116720 + }, + { + "epoch": 767.9605263157895, + "grad_norm": 1.2587246894836426, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 116730 + }, + { + "epoch": 768.0263157894736, + "grad_norm": 1.263112187385559, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 116740 + }, + { + "epoch": 768.0921052631579, + "grad_norm": 1.2640330791473389, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 116750 + }, + { + "epoch": 768.1578947368421, + "grad_norm": 0.9966381788253784, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 116760 + }, + { + "epoch": 768.2236842105264, + "grad_norm": 1.5357413291931152, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 116770 + }, + { + "epoch": 768.2894736842105, + "grad_norm": 1.3589855432510376, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 116780 + }, + { + "epoch": 768.3552631578947, + "grad_norm": 1.3303495645523071, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 116790 + }, + { + "epoch": 768.421052631579, + "grad_norm": 1.531052827835083, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 116800 + }, + { + "epoch": 768.4868421052631, + "grad_norm": 1.2647902965545654, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 116810 + }, + { + "epoch": 768.5526315789474, + "grad_norm": 1.2003931999206543, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 116820 + }, + { + "epoch": 768.6184210526316, + "grad_norm": 1.2271924018859863, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 116830 + }, + { + "epoch": 768.6842105263158, + "grad_norm": 0.8011854887008667, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 116840 + }, + { + "epoch": 768.75, + "grad_norm": 1.0051759481430054, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 116850 + }, + { + "epoch": 768.8157894736842, + "grad_norm": 1.0065410137176514, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 116860 + }, + { + "epoch": 768.8815789473684, + "grad_norm": 1.094523310661316, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 116870 + }, + { + "epoch": 768.9473684210526, + "grad_norm": 0.9108685255050659, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 116880 + }, + { + "epoch": 769.0131578947369, + "grad_norm": 1.0537774562835693, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 116890 + }, + { + "epoch": 769.078947368421, + "grad_norm": 0.8847028613090515, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 116900 + }, + { + "epoch": 769.1447368421053, + "grad_norm": 0.8502101302146912, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 116910 + }, + { + "epoch": 769.2105263157895, + "grad_norm": 1.3079115152359009, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 116920 + }, + { + "epoch": 769.2763157894736, + "grad_norm": 0.997676432132721, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 116930 + }, + { + "epoch": 769.3421052631579, + "grad_norm": 0.9060066342353821, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 116940 + }, + { + "epoch": 769.4078947368421, + "grad_norm": 0.9685508012771606, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 116950 + }, + { + "epoch": 769.4736842105264, + "grad_norm": 1.053354024887085, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 116960 + }, + { + "epoch": 769.5394736842105, + "grad_norm": 0.901861310005188, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 116970 + }, + { + "epoch": 769.6052631578947, + "grad_norm": 1.1374804973602295, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 116980 + }, + { + "epoch": 769.671052631579, + "grad_norm": 1.060011386871338, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 116990 + }, + { + "epoch": 769.7368421052631, + "grad_norm": 0.8617300391197205, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 117000 + }, + { + "epoch": 769.8026315789474, + "grad_norm": 0.7569366097450256, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 117010 + }, + { + "epoch": 769.8684210526316, + "grad_norm": 0.9865979552268982, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 117020 + }, + { + "epoch": 769.9342105263158, + "grad_norm": 0.6194046139717102, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 117030 + }, + { + "epoch": 770.0, + "grad_norm": 1.2223562002182007, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 117040 + }, + { + "epoch": 770.0657894736842, + "grad_norm": 0.9539296627044678, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 117050 + }, + { + "epoch": 770.1315789473684, + "grad_norm": 0.7826547622680664, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 117060 + }, + { + "epoch": 770.1973684210526, + "grad_norm": 1.4138188362121582, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 117070 + }, + { + "epoch": 770.2631578947369, + "grad_norm": 1.2407159805297852, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 117080 + }, + { + "epoch": 770.328947368421, + "grad_norm": 1.1824477910995483, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 117090 + }, + { + "epoch": 770.3947368421053, + "grad_norm": 0.9199476838111877, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 117100 + }, + { + "epoch": 770.4605263157895, + "grad_norm": 0.7619976997375488, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 117110 + }, + { + "epoch": 770.5263157894736, + "grad_norm": 0.8713205456733704, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 117120 + }, + { + "epoch": 770.5921052631579, + "grad_norm": 0.819759726524353, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 117130 + }, + { + "epoch": 770.6578947368421, + "grad_norm": 1.4600467681884766, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 117140 + }, + { + "epoch": 770.7236842105264, + "grad_norm": 0.7182674407958984, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 117150 + }, + { + "epoch": 770.7894736842105, + "grad_norm": 1.2647074460983276, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 117160 + }, + { + "epoch": 770.8552631578947, + "grad_norm": 1.4599173069000244, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 117170 + }, + { + "epoch": 770.921052631579, + "grad_norm": 1.161414384841919, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 117180 + }, + { + "epoch": 770.9868421052631, + "grad_norm": 0.9526516199111938, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 117190 + }, + { + "epoch": 771.0526315789474, + "grad_norm": 1.2501287460327148, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 117200 + }, + { + "epoch": 771.1184210526316, + "grad_norm": 1.2030541896820068, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 117210 + }, + { + "epoch": 771.1842105263158, + "grad_norm": 1.1028085947036743, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 117220 + }, + { + "epoch": 771.25, + "grad_norm": 1.2376686334609985, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 117230 + }, + { + "epoch": 771.3157894736842, + "grad_norm": 1.1704808473587036, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 117240 + }, + { + "epoch": 771.3815789473684, + "grad_norm": 0.899161159992218, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 117250 + }, + { + "epoch": 771.4473684210526, + "grad_norm": 1.2865201234817505, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 117260 + }, + { + "epoch": 771.5131578947369, + "grad_norm": 1.0590287446975708, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 117270 + }, + { + "epoch": 771.578947368421, + "grad_norm": 0.7633146047592163, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 117280 + }, + { + "epoch": 771.6447368421053, + "grad_norm": 0.6437667012214661, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 117290 + }, + { + "epoch": 771.7105263157895, + "grad_norm": 1.379522442817688, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 117300 + }, + { + "epoch": 771.7763157894736, + "grad_norm": 1.1769373416900635, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 117310 + }, + { + "epoch": 771.8421052631579, + "grad_norm": 1.229821801185608, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 117320 + }, + { + "epoch": 771.9078947368421, + "grad_norm": 0.8484160304069519, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 117330 + }, + { + "epoch": 771.9736842105264, + "grad_norm": 0.7620605230331421, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 117340 + }, + { + "epoch": 772.0394736842105, + "grad_norm": 1.064082384109497, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 117350 + }, + { + "epoch": 772.1052631578947, + "grad_norm": 0.9610821604728699, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 117360 + }, + { + "epoch": 772.171052631579, + "grad_norm": 1.0888431072235107, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 117370 + }, + { + "epoch": 772.2368421052631, + "grad_norm": 0.9147794842720032, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 117380 + }, + { + "epoch": 772.3026315789474, + "grad_norm": 0.930996298789978, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 117390 + }, + { + "epoch": 772.3684210526316, + "grad_norm": 0.9899475574493408, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 117400 + }, + { + "epoch": 772.4342105263158, + "grad_norm": 1.1888105869293213, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 117410 + }, + { + "epoch": 772.5, + "grad_norm": 1.1909250020980835, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 117420 + }, + { + "epoch": 772.5657894736842, + "grad_norm": 1.144278645515442, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 117430 + }, + { + "epoch": 772.6315789473684, + "grad_norm": 1.2057489156723022, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 117440 + }, + { + "epoch": 772.6973684210526, + "grad_norm": 1.240365982055664, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 117450 + }, + { + "epoch": 772.7631578947369, + "grad_norm": 1.1910370588302612, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 117460 + }, + { + "epoch": 772.828947368421, + "grad_norm": 1.0955934524536133, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 117470 + }, + { + "epoch": 772.8947368421053, + "grad_norm": 1.1824718713760376, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 117480 + }, + { + "epoch": 772.9605263157895, + "grad_norm": 1.323282241821289, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 117490 + }, + { + "epoch": 773.0263157894736, + "grad_norm": 0.9789089560508728, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 117500 + }, + { + "epoch": 773.0921052631579, + "grad_norm": 1.0450570583343506, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 117510 + }, + { + "epoch": 773.1578947368421, + "grad_norm": 1.3305141925811768, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 117520 + }, + { + "epoch": 773.2236842105264, + "grad_norm": 1.228081226348877, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 117530 + }, + { + "epoch": 773.2894736842105, + "grad_norm": 1.1966365575790405, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 117540 + }, + { + "epoch": 773.3552631578947, + "grad_norm": 1.154728651046753, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 117550 + }, + { + "epoch": 773.421052631579, + "grad_norm": 1.119430422782898, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 117560 + }, + { + "epoch": 773.4868421052631, + "grad_norm": 1.3745265007019043, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 117570 + }, + { + "epoch": 773.5526315789474, + "grad_norm": 1.3830089569091797, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 117580 + }, + { + "epoch": 773.6184210526316, + "grad_norm": 1.2149348258972168, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 117590 + }, + { + "epoch": 773.6842105263158, + "grad_norm": 1.084110975265503, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 117600 + }, + { + "epoch": 773.75, + "grad_norm": 1.2367154359817505, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 117610 + }, + { + "epoch": 773.8157894736842, + "grad_norm": 1.1649274826049805, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 117620 + }, + { + "epoch": 773.8815789473684, + "grad_norm": 1.4875725507736206, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 117630 + }, + { + "epoch": 773.9473684210526, + "grad_norm": 0.9839734435081482, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 117640 + }, + { + "epoch": 774.0131578947369, + "grad_norm": 1.041127324104309, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 117650 + }, + { + "epoch": 774.078947368421, + "grad_norm": 1.2891390323638916, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 117660 + }, + { + "epoch": 774.1447368421053, + "grad_norm": 1.126435399055481, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 117670 + }, + { + "epoch": 774.2105263157895, + "grad_norm": 1.295599341392517, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 117680 + }, + { + "epoch": 774.2763157894736, + "grad_norm": 1.0001301765441895, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 117690 + }, + { + "epoch": 774.3421052631579, + "grad_norm": 0.824128270149231, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 117700 + }, + { + "epoch": 774.4078947368421, + "grad_norm": 1.071433663368225, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 117710 + }, + { + "epoch": 774.4736842105264, + "grad_norm": 0.8961533308029175, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 117720 + }, + { + "epoch": 774.5394736842105, + "grad_norm": 0.9159356355667114, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 117730 + }, + { + "epoch": 774.6052631578947, + "grad_norm": 0.8452175259590149, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 117740 + }, + { + "epoch": 774.671052631579, + "grad_norm": 0.9821557998657227, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 117750 + }, + { + "epoch": 774.7368421052631, + "grad_norm": 1.0779387950897217, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 117760 + }, + { + "epoch": 774.8026315789474, + "grad_norm": 1.1578203439712524, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 117770 + }, + { + "epoch": 774.8684210526316, + "grad_norm": 1.1006511449813843, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 117780 + }, + { + "epoch": 774.9342105263158, + "grad_norm": 1.2114787101745605, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 117790 + }, + { + "epoch": 775.0, + "grad_norm": 0.8628996014595032, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 117800 + }, + { + "epoch": 775.0657894736842, + "grad_norm": 1.1864206790924072, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 117810 + }, + { + "epoch": 775.1315789473684, + "grad_norm": 1.050344467163086, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 117820 + }, + { + "epoch": 775.1973684210526, + "grad_norm": 1.0661941766738892, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 117830 + }, + { + "epoch": 775.2631578947369, + "grad_norm": 0.982932984828949, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 117840 + }, + { + "epoch": 775.328947368421, + "grad_norm": 1.0128767490386963, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 117850 + }, + { + "epoch": 775.3947368421053, + "grad_norm": 1.2008044719696045, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 117860 + }, + { + "epoch": 775.4605263157895, + "grad_norm": 1.156033992767334, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 117870 + }, + { + "epoch": 775.5263157894736, + "grad_norm": 1.492291808128357, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 117880 + }, + { + "epoch": 775.5921052631579, + "grad_norm": 0.656265914440155, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 117890 + }, + { + "epoch": 775.6578947368421, + "grad_norm": 1.0817625522613525, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 117900 + }, + { + "epoch": 775.7236842105264, + "grad_norm": 0.741335391998291, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 117910 + }, + { + "epoch": 775.7894736842105, + "grad_norm": 0.9007006883621216, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 117920 + }, + { + "epoch": 775.8552631578947, + "grad_norm": 0.8411262035369873, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 117930 + }, + { + "epoch": 775.921052631579, + "grad_norm": 0.8096522688865662, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 117940 + }, + { + "epoch": 775.9868421052631, + "grad_norm": 0.6779792308807373, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 117950 + }, + { + "epoch": 776.0526315789474, + "grad_norm": 0.9828864336013794, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 117960 + }, + { + "epoch": 776.1184210526316, + "grad_norm": 1.536733627319336, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 117970 + }, + { + "epoch": 776.1842105263158, + "grad_norm": 1.2087494134902954, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 117980 + }, + { + "epoch": 776.25, + "grad_norm": 0.7931889295578003, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 117990 + }, + { + "epoch": 776.3157894736842, + "grad_norm": 0.8839361667633057, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 118000 + }, + { + "epoch": 776.3815789473684, + "grad_norm": 1.1379063129425049, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 118010 + }, + { + "epoch": 776.4473684210526, + "grad_norm": 1.1438642740249634, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 118020 + }, + { + "epoch": 776.5131578947369, + "grad_norm": 0.9373273253440857, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 118030 + }, + { + "epoch": 776.578947368421, + "grad_norm": 1.2530593872070312, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 118040 + }, + { + "epoch": 776.6447368421053, + "grad_norm": 1.2131603956222534, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 118050 + }, + { + "epoch": 776.7105263157895, + "grad_norm": 0.7886523008346558, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 118060 + }, + { + "epoch": 776.7763157894736, + "grad_norm": 0.6613753437995911, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 118070 + }, + { + "epoch": 776.8421052631579, + "grad_norm": 1.1990413665771484, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 118080 + }, + { + "epoch": 776.9078947368421, + "grad_norm": 1.0473389625549316, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 118090 + }, + { + "epoch": 776.9736842105264, + "grad_norm": 0.9223041534423828, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 118100 + }, + { + "epoch": 777.0394736842105, + "grad_norm": 0.7598844170570374, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 118110 + }, + { + "epoch": 777.1052631578947, + "grad_norm": 1.1341654062271118, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 118120 + }, + { + "epoch": 777.171052631579, + "grad_norm": 0.5702991485595703, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 118130 + }, + { + "epoch": 777.2368421052631, + "grad_norm": 1.0409194231033325, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 118140 + }, + { + "epoch": 777.3026315789474, + "grad_norm": 1.469417691230774, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 118150 + }, + { + "epoch": 777.3684210526316, + "grad_norm": 1.1192512512207031, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 118160 + }, + { + "epoch": 777.4342105263158, + "grad_norm": 1.1567856073379517, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 118170 + }, + { + "epoch": 777.5, + "grad_norm": 0.8657984733581543, + "learning_rate": 0.0001, + "loss": 0.0163, + "step": 118180 + }, + { + "epoch": 777.5657894736842, + "grad_norm": 0.8693804740905762, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 118190 + }, + { + "epoch": 777.6315789473684, + "grad_norm": 1.0661799907684326, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 118200 + }, + { + "epoch": 777.6973684210526, + "grad_norm": 0.9497994184494019, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 118210 + }, + { + "epoch": 777.7631578947369, + "grad_norm": 1.2563682794570923, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 118220 + }, + { + "epoch": 777.828947368421, + "grad_norm": 0.7826574444770813, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 118230 + }, + { + "epoch": 777.8947368421053, + "grad_norm": 1.0407278537750244, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 118240 + }, + { + "epoch": 777.9605263157895, + "grad_norm": 1.288908839225769, + "learning_rate": 0.0001, + "loss": 0.0149, + "step": 118250 + }, + { + "epoch": 778.0263157894736, + "grad_norm": 1.0953668355941772, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 118260 + }, + { + "epoch": 778.0921052631579, + "grad_norm": 1.315590500831604, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 118270 + }, + { + "epoch": 778.1578947368421, + "grad_norm": 1.1540820598602295, + "learning_rate": 0.0001, + "loss": 0.0141, + "step": 118280 + }, + { + "epoch": 778.2236842105264, + "grad_norm": 1.0182602405548096, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 118290 + }, + { + "epoch": 778.2894736842105, + "grad_norm": 1.1212661266326904, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 118300 + }, + { + "epoch": 778.3552631578947, + "grad_norm": 0.8415754437446594, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 118310 + }, + { + "epoch": 778.421052631579, + "grad_norm": 0.8753746747970581, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 118320 + }, + { + "epoch": 778.4868421052631, + "grad_norm": 1.123030662536621, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 118330 + }, + { + "epoch": 778.5526315789474, + "grad_norm": 1.058523416519165, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 118340 + }, + { + "epoch": 778.6184210526316, + "grad_norm": 1.2867323160171509, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 118350 + }, + { + "epoch": 778.6842105263158, + "grad_norm": 1.0465610027313232, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 118360 + }, + { + "epoch": 778.75, + "grad_norm": 1.1913409233093262, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 118370 + }, + { + "epoch": 778.8157894736842, + "grad_norm": 1.0035961866378784, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 118380 + }, + { + "epoch": 778.8815789473684, + "grad_norm": 1.2081618309020996, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 118390 + }, + { + "epoch": 778.9473684210526, + "grad_norm": 0.9412795901298523, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 118400 + }, + { + "epoch": 779.0131578947369, + "grad_norm": 1.1386052370071411, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 118410 + }, + { + "epoch": 779.078947368421, + "grad_norm": 1.215187907218933, + "learning_rate": 0.0001, + "loss": 0.0143, + "step": 118420 + }, + { + "epoch": 779.1447368421053, + "grad_norm": 1.0938812494277954, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 118430 + }, + { + "epoch": 779.2105263157895, + "grad_norm": 1.063369631767273, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 118440 + }, + { + "epoch": 779.2763157894736, + "grad_norm": 1.0048210620880127, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 118450 + }, + { + "epoch": 779.3421052631579, + "grad_norm": 1.2882784605026245, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 118460 + }, + { + "epoch": 779.4078947368421, + "grad_norm": 0.9867761731147766, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 118470 + }, + { + "epoch": 779.4736842105264, + "grad_norm": 1.0761276483535767, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 118480 + }, + { + "epoch": 779.5394736842105, + "grad_norm": 0.9819127321243286, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 118490 + }, + { + "epoch": 779.6052631578947, + "grad_norm": 1.317988395690918, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 118500 + }, + { + "epoch": 779.671052631579, + "grad_norm": 0.8267877101898193, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 118510 + }, + { + "epoch": 779.7368421052631, + "grad_norm": 1.0201878547668457, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 118520 + }, + { + "epoch": 779.8026315789474, + "grad_norm": 0.907877504825592, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 118530 + }, + { + "epoch": 779.8684210526316, + "grad_norm": 0.8943087458610535, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 118540 + }, + { + "epoch": 779.9342105263158, + "grad_norm": 1.0807287693023682, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 118550 + }, + { + "epoch": 780.0, + "grad_norm": 1.523391842842102, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 118560 + }, + { + "epoch": 780.0657894736842, + "grad_norm": 0.7130796313285828, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 118570 + }, + { + "epoch": 780.1315789473684, + "grad_norm": 1.3061423301696777, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 118580 + }, + { + "epoch": 780.1973684210526, + "grad_norm": 1.1210944652557373, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 118590 + }, + { + "epoch": 780.2631578947369, + "grad_norm": 1.2665170431137085, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 118600 + }, + { + "epoch": 780.328947368421, + "grad_norm": 1.1752995252609253, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 118610 + }, + { + "epoch": 780.3947368421053, + "grad_norm": 1.102726936340332, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 118620 + }, + { + "epoch": 780.4605263157895, + "grad_norm": 1.2804216146469116, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 118630 + }, + { + "epoch": 780.5263157894736, + "grad_norm": 1.0606364011764526, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 118640 + }, + { + "epoch": 780.5921052631579, + "grad_norm": 0.9021885991096497, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 118650 + }, + { + "epoch": 780.6578947368421, + "grad_norm": 1.3025189638137817, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 118660 + }, + { + "epoch": 780.7236842105264, + "grad_norm": 1.1469581127166748, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 118670 + }, + { + "epoch": 780.7894736842105, + "grad_norm": 1.1277015209197998, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 118680 + }, + { + "epoch": 780.8552631578947, + "grad_norm": 1.2387878894805908, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 118690 + }, + { + "epoch": 780.921052631579, + "grad_norm": 0.9669885635375977, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 118700 + }, + { + "epoch": 780.9868421052631, + "grad_norm": 1.0439066886901855, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 118710 + }, + { + "epoch": 781.0526315789474, + "grad_norm": 0.8005104660987854, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 118720 + }, + { + "epoch": 781.1184210526316, + "grad_norm": 1.0265685319900513, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 118730 + }, + { + "epoch": 781.1842105263158, + "grad_norm": 1.4636842012405396, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 118740 + }, + { + "epoch": 781.25, + "grad_norm": 1.312772512435913, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 118750 + }, + { + "epoch": 781.3157894736842, + "grad_norm": 1.1143829822540283, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 118760 + }, + { + "epoch": 781.3815789473684, + "grad_norm": 1.134764313697815, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 118770 + }, + { + "epoch": 781.4473684210526, + "grad_norm": 1.432435393333435, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 118780 + }, + { + "epoch": 781.5131578947369, + "grad_norm": 1.2253336906433105, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 118790 + }, + { + "epoch": 781.578947368421, + "grad_norm": 1.319845199584961, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 118800 + }, + { + "epoch": 781.6447368421053, + "grad_norm": 1.283031940460205, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 118810 + }, + { + "epoch": 781.7105263157895, + "grad_norm": 1.425012469291687, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 118820 + }, + { + "epoch": 781.7763157894736, + "grad_norm": 0.8615345358848572, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 118830 + }, + { + "epoch": 781.8421052631579, + "grad_norm": 1.3266713619232178, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 118840 + }, + { + "epoch": 781.9078947368421, + "grad_norm": 1.3085308074951172, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 118850 + }, + { + "epoch": 781.9736842105264, + "grad_norm": 1.1384379863739014, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 118860 + }, + { + "epoch": 782.0394736842105, + "grad_norm": 1.471619963645935, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 118870 + }, + { + "epoch": 782.1052631578947, + "grad_norm": 1.2752647399902344, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 118880 + }, + { + "epoch": 782.171052631579, + "grad_norm": 1.337410569190979, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 118890 + }, + { + "epoch": 782.2368421052631, + "grad_norm": 1.0045582056045532, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 118900 + }, + { + "epoch": 782.3026315789474, + "grad_norm": 1.1922293901443481, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 118910 + }, + { + "epoch": 782.3684210526316, + "grad_norm": 1.145751714706421, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 118920 + }, + { + "epoch": 782.4342105263158, + "grad_norm": 1.2008522748947144, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 118930 + }, + { + "epoch": 782.5, + "grad_norm": 1.2413567304611206, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 118940 + }, + { + "epoch": 782.5657894736842, + "grad_norm": 0.9739688634872437, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 118950 + }, + { + "epoch": 782.6315789473684, + "grad_norm": 0.9391964077949524, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 118960 + }, + { + "epoch": 782.6973684210526, + "grad_norm": 1.4125076532363892, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 118970 + }, + { + "epoch": 782.7631578947369, + "grad_norm": 1.2959308624267578, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 118980 + }, + { + "epoch": 782.828947368421, + "grad_norm": 1.301820993423462, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 118990 + }, + { + "epoch": 782.8947368421053, + "grad_norm": 1.200832724571228, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 119000 + }, + { + "epoch": 782.9605263157895, + "grad_norm": 0.8034687042236328, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 119010 + }, + { + "epoch": 783.0263157894736, + "grad_norm": 1.1737399101257324, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 119020 + }, + { + "epoch": 783.0921052631579, + "grad_norm": 1.0292094945907593, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 119030 + }, + { + "epoch": 783.1578947368421, + "grad_norm": 0.9858736395835876, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 119040 + }, + { + "epoch": 783.2236842105264, + "grad_norm": 1.2504321336746216, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 119050 + }, + { + "epoch": 783.2894736842105, + "grad_norm": 1.0716443061828613, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 119060 + }, + { + "epoch": 783.3552631578947, + "grad_norm": 0.9858033657073975, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 119070 + }, + { + "epoch": 783.421052631579, + "grad_norm": 1.0344947576522827, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 119080 + }, + { + "epoch": 783.4868421052631, + "grad_norm": 1.1620103120803833, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 119090 + }, + { + "epoch": 783.5526315789474, + "grad_norm": 1.2477577924728394, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 119100 + }, + { + "epoch": 783.6184210526316, + "grad_norm": 0.96187824010849, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 119110 + }, + { + "epoch": 783.6842105263158, + "grad_norm": 0.9657493829727173, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 119120 + }, + { + "epoch": 783.75, + "grad_norm": 1.1220613718032837, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 119130 + }, + { + "epoch": 783.8157894736842, + "grad_norm": 0.9884085059165955, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 119140 + }, + { + "epoch": 783.8815789473684, + "grad_norm": 1.1578776836395264, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 119150 + }, + { + "epoch": 783.9473684210526, + "grad_norm": 1.015238642692566, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 119160 + }, + { + "epoch": 784.0131578947369, + "grad_norm": 1.1379379034042358, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 119170 + }, + { + "epoch": 784.078947368421, + "grad_norm": 1.041599154472351, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 119180 + }, + { + "epoch": 784.1447368421053, + "grad_norm": 1.3538576364517212, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 119190 + }, + { + "epoch": 784.2105263157895, + "grad_norm": 1.2984648942947388, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 119200 + }, + { + "epoch": 784.2763157894736, + "grad_norm": 1.5270977020263672, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 119210 + }, + { + "epoch": 784.3421052631579, + "grad_norm": 0.9184948801994324, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 119220 + }, + { + "epoch": 784.4078947368421, + "grad_norm": 1.3463494777679443, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 119230 + }, + { + "epoch": 784.4736842105264, + "grad_norm": 1.1868815422058105, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 119240 + }, + { + "epoch": 784.5394736842105, + "grad_norm": 1.198378324508667, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 119250 + }, + { + "epoch": 784.6052631578947, + "grad_norm": 1.215132236480713, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 119260 + }, + { + "epoch": 784.671052631579, + "grad_norm": 1.1473467350006104, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 119270 + }, + { + "epoch": 784.7368421052631, + "grad_norm": 1.5379295349121094, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 119280 + }, + { + "epoch": 784.8026315789474, + "grad_norm": 1.4966095685958862, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 119290 + }, + { + "epoch": 784.8684210526316, + "grad_norm": 1.0885952711105347, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 119300 + }, + { + "epoch": 784.9342105263158, + "grad_norm": 0.980161726474762, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 119310 + }, + { + "epoch": 785.0, + "grad_norm": 0.916150689125061, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 119320 + }, + { + "epoch": 785.0657894736842, + "grad_norm": 1.2834523916244507, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 119330 + }, + { + "epoch": 785.1315789473684, + "grad_norm": 1.2613166570663452, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 119340 + }, + { + "epoch": 785.1973684210526, + "grad_norm": 1.0938130617141724, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 119350 + }, + { + "epoch": 785.2631578947369, + "grad_norm": 1.0228303670883179, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 119360 + }, + { + "epoch": 785.328947368421, + "grad_norm": 0.9948782920837402, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 119370 + }, + { + "epoch": 785.3947368421053, + "grad_norm": 1.0518125295639038, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 119380 + }, + { + "epoch": 785.4605263157895, + "grad_norm": 0.9449158906936646, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 119390 + }, + { + "epoch": 785.5263157894736, + "grad_norm": 0.914734959602356, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 119400 + }, + { + "epoch": 785.5921052631579, + "grad_norm": 1.529738426208496, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 119410 + }, + { + "epoch": 785.6578947368421, + "grad_norm": 1.1134679317474365, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 119420 + }, + { + "epoch": 785.7236842105264, + "grad_norm": 0.9557541012763977, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 119430 + }, + { + "epoch": 785.7894736842105, + "grad_norm": 1.2106385231018066, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 119440 + }, + { + "epoch": 785.8552631578947, + "grad_norm": 0.9082741141319275, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 119450 + }, + { + "epoch": 785.921052631579, + "grad_norm": 1.3996853828430176, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 119460 + }, + { + "epoch": 785.9868421052631, + "grad_norm": 1.2628298997879028, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 119470 + }, + { + "epoch": 786.0526315789474, + "grad_norm": 1.445421814918518, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 119480 + }, + { + "epoch": 786.1184210526316, + "grad_norm": 0.8487147092819214, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 119490 + }, + { + "epoch": 786.1842105263158, + "grad_norm": 0.9393507242202759, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 119500 + }, + { + "epoch": 786.25, + "grad_norm": 1.010326862335205, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 119510 + }, + { + "epoch": 786.3157894736842, + "grad_norm": 1.1670511960983276, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 119520 + }, + { + "epoch": 786.3815789473684, + "grad_norm": 0.9680734872817993, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 119530 + }, + { + "epoch": 786.4473684210526, + "grad_norm": 1.2515125274658203, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 119540 + }, + { + "epoch": 786.5131578947369, + "grad_norm": 1.0361891984939575, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 119550 + }, + { + "epoch": 786.578947368421, + "grad_norm": 0.6271700859069824, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 119560 + }, + { + "epoch": 786.6447368421053, + "grad_norm": 0.9543516635894775, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 119570 + }, + { + "epoch": 786.7105263157895, + "grad_norm": 1.0879392623901367, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 119580 + }, + { + "epoch": 786.7763157894736, + "grad_norm": 0.9335300922393799, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 119590 + }, + { + "epoch": 786.8421052631579, + "grad_norm": 1.244016408920288, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 119600 + }, + { + "epoch": 786.9078947368421, + "grad_norm": 1.3877922296524048, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 119610 + }, + { + "epoch": 786.9736842105264, + "grad_norm": 1.0246111154556274, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 119620 + }, + { + "epoch": 787.0394736842105, + "grad_norm": 1.1666784286499023, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 119630 + }, + { + "epoch": 787.1052631578947, + "grad_norm": 1.422247052192688, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 119640 + }, + { + "epoch": 787.171052631579, + "grad_norm": 1.2223421335220337, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 119650 + }, + { + "epoch": 787.2368421052631, + "grad_norm": 1.0068063735961914, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 119660 + }, + { + "epoch": 787.3026315789474, + "grad_norm": 1.4940046072006226, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 119670 + }, + { + "epoch": 787.3684210526316, + "grad_norm": 0.9652587175369263, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 119680 + }, + { + "epoch": 787.4342105263158, + "grad_norm": 0.7590309977531433, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 119690 + }, + { + "epoch": 787.5, + "grad_norm": 1.0170762538909912, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 119700 + }, + { + "epoch": 787.5657894736842, + "grad_norm": 1.0947462320327759, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 119710 + }, + { + "epoch": 787.6315789473684, + "grad_norm": 0.9564235806465149, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 119720 + }, + { + "epoch": 787.6973684210526, + "grad_norm": 0.8482645153999329, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 119730 + }, + { + "epoch": 787.7631578947369, + "grad_norm": 1.2533555030822754, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 119740 + }, + { + "epoch": 787.828947368421, + "grad_norm": 1.3869478702545166, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 119750 + }, + { + "epoch": 787.8947368421053, + "grad_norm": 1.1893796920776367, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 119760 + }, + { + "epoch": 787.9605263157895, + "grad_norm": 1.3825172185897827, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 119770 + }, + { + "epoch": 788.0263157894736, + "grad_norm": 1.1921755075454712, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 119780 + }, + { + "epoch": 788.0921052631579, + "grad_norm": 1.175467610359192, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 119790 + }, + { + "epoch": 788.1578947368421, + "grad_norm": 1.1586354970932007, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 119800 + }, + { + "epoch": 788.2236842105264, + "grad_norm": 0.9912821054458618, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 119810 + }, + { + "epoch": 788.2894736842105, + "grad_norm": 0.786872923374176, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 119820 + }, + { + "epoch": 788.3552631578947, + "grad_norm": 0.9939967393875122, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 119830 + }, + { + "epoch": 788.421052631579, + "grad_norm": 0.7620839476585388, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 119840 + }, + { + "epoch": 788.4868421052631, + "grad_norm": 1.0876280069351196, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 119850 + }, + { + "epoch": 788.5526315789474, + "grad_norm": 0.9076918959617615, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 119860 + }, + { + "epoch": 788.6184210526316, + "grad_norm": 1.2486871480941772, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 119870 + }, + { + "epoch": 788.6842105263158, + "grad_norm": 0.9349822402000427, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 119880 + }, + { + "epoch": 788.75, + "grad_norm": 1.160534381866455, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 119890 + }, + { + "epoch": 788.8157894736842, + "grad_norm": 1.1158519983291626, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 119900 + }, + { + "epoch": 788.8815789473684, + "grad_norm": 1.0565320253372192, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 119910 + }, + { + "epoch": 788.9473684210526, + "grad_norm": 1.1070889234542847, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 119920 + }, + { + "epoch": 789.0131578947369, + "grad_norm": 1.0946141481399536, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 119930 + }, + { + "epoch": 789.078947368421, + "grad_norm": 1.1808589696884155, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 119940 + }, + { + "epoch": 789.1447368421053, + "grad_norm": 1.0160439014434814, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 119950 + }, + { + "epoch": 789.2105263157895, + "grad_norm": 1.3254737854003906, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 119960 + }, + { + "epoch": 789.2763157894736, + "grad_norm": 1.3260244131088257, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 119970 + }, + { + "epoch": 789.3421052631579, + "grad_norm": 1.212821125984192, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 119980 + }, + { + "epoch": 789.4078947368421, + "grad_norm": 1.2427418231964111, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 119990 + }, + { + "epoch": 789.4736842105264, + "grad_norm": 0.9601447582244873, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 120000 + }, + { + "epoch": 789.5394736842105, + "grad_norm": 1.4246236085891724, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 120010 + }, + { + "epoch": 789.6052631578947, + "grad_norm": 1.162190556526184, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 120020 + }, + { + "epoch": 789.671052631579, + "grad_norm": 1.3025704622268677, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 120030 + }, + { + "epoch": 789.7368421052631, + "grad_norm": 1.1935677528381348, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 120040 + }, + { + "epoch": 789.8026315789474, + "grad_norm": 2.3716793060302734, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 120050 + }, + { + "epoch": 789.8684210526316, + "grad_norm": 1.407984733581543, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 120060 + }, + { + "epoch": 789.9342105263158, + "grad_norm": 1.2061392068862915, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 120070 + }, + { + "epoch": 790.0, + "grad_norm": 1.1397764682769775, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 120080 + }, + { + "epoch": 790.0657894736842, + "grad_norm": 1.4993038177490234, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 120090 + }, + { + "epoch": 790.1315789473684, + "grad_norm": 1.1924669742584229, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 120100 + }, + { + "epoch": 790.1973684210526, + "grad_norm": 1.2470619678497314, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 120110 + }, + { + "epoch": 790.2631578947369, + "grad_norm": 1.23881196975708, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 120120 + }, + { + "epoch": 790.328947368421, + "grad_norm": 1.0482486486434937, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 120130 + }, + { + "epoch": 790.3947368421053, + "grad_norm": 1.1907408237457275, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 120140 + }, + { + "epoch": 790.4605263157895, + "grad_norm": 1.216784119606018, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 120150 + }, + { + "epoch": 790.5263157894736, + "grad_norm": 0.947479248046875, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 120160 + }, + { + "epoch": 790.5921052631579, + "grad_norm": 1.0760152339935303, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 120170 + }, + { + "epoch": 790.6578947368421, + "grad_norm": 0.7800330519676208, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 120180 + }, + { + "epoch": 790.7236842105264, + "grad_norm": 1.2009683847427368, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 120190 + }, + { + "epoch": 790.7894736842105, + "grad_norm": 0.9596481919288635, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 120200 + }, + { + "epoch": 790.8552631578947, + "grad_norm": 1.0723578929901123, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 120210 + }, + { + "epoch": 790.921052631579, + "grad_norm": 0.8646221160888672, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 120220 + }, + { + "epoch": 790.9868421052631, + "grad_norm": 0.9819400310516357, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 120230 + }, + { + "epoch": 791.0526315789474, + "grad_norm": 0.9730360507965088, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 120240 + }, + { + "epoch": 791.1184210526316, + "grad_norm": 1.2054215669631958, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 120250 + }, + { + "epoch": 791.1842105263158, + "grad_norm": 1.0366238355636597, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 120260 + }, + { + "epoch": 791.25, + "grad_norm": 0.992953360080719, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 120270 + }, + { + "epoch": 791.3157894736842, + "grad_norm": 0.7910591959953308, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 120280 + }, + { + "epoch": 791.3815789473684, + "grad_norm": 0.9786763191223145, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 120290 + }, + { + "epoch": 791.4473684210526, + "grad_norm": 0.9105993509292603, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 120300 + }, + { + "epoch": 791.5131578947369, + "grad_norm": 1.005461573600769, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 120310 + }, + { + "epoch": 791.578947368421, + "grad_norm": 0.9919746518135071, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 120320 + }, + { + "epoch": 791.6447368421053, + "grad_norm": 0.6113591194152832, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 120330 + }, + { + "epoch": 791.7105263157895, + "grad_norm": 0.7895347476005554, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 120340 + }, + { + "epoch": 791.7763157894736, + "grad_norm": 1.1017674207687378, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 120350 + }, + { + "epoch": 791.8421052631579, + "grad_norm": 0.9249036908149719, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 120360 + }, + { + "epoch": 791.9078947368421, + "grad_norm": 1.0315765142440796, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 120370 + }, + { + "epoch": 791.9736842105264, + "grad_norm": 0.9553092122077942, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 120380 + }, + { + "epoch": 792.0394736842105, + "grad_norm": 0.9283358454704285, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 120390 + }, + { + "epoch": 792.1052631578947, + "grad_norm": 1.0717108249664307, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 120400 + }, + { + "epoch": 792.171052631579, + "grad_norm": 1.008732557296753, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 120410 + }, + { + "epoch": 792.2368421052631, + "grad_norm": 0.6750414967536926, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 120420 + }, + { + "epoch": 792.3026315789474, + "grad_norm": 0.9144452810287476, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 120430 + }, + { + "epoch": 792.3684210526316, + "grad_norm": 0.8606979250907898, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 120440 + }, + { + "epoch": 792.4342105263158, + "grad_norm": 0.9034025073051453, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 120450 + }, + { + "epoch": 792.5, + "grad_norm": 0.8836445808410645, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 120460 + }, + { + "epoch": 792.5657894736842, + "grad_norm": 1.0012311935424805, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 120470 + }, + { + "epoch": 792.6315789473684, + "grad_norm": 1.7558934688568115, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 120480 + }, + { + "epoch": 792.6973684210526, + "grad_norm": 1.1554054021835327, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 120490 + }, + { + "epoch": 792.7631578947369, + "grad_norm": 0.9315431714057922, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 120500 + }, + { + "epoch": 792.828947368421, + "grad_norm": 1.0039079189300537, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 120510 + }, + { + "epoch": 792.8947368421053, + "grad_norm": 0.9370085597038269, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 120520 + }, + { + "epoch": 792.9605263157895, + "grad_norm": 1.2092498540878296, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 120530 + }, + { + "epoch": 793.0263157894736, + "grad_norm": 1.0734901428222656, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 120540 + }, + { + "epoch": 793.0921052631579, + "grad_norm": 1.0039688348770142, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 120550 + }, + { + "epoch": 793.1578947368421, + "grad_norm": 0.904447078704834, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 120560 + }, + { + "epoch": 793.2236842105264, + "grad_norm": 0.8521278500556946, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 120570 + }, + { + "epoch": 793.2894736842105, + "grad_norm": 0.7753221988677979, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 120580 + }, + { + "epoch": 793.3552631578947, + "grad_norm": 1.0764966011047363, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 120590 + }, + { + "epoch": 793.421052631579, + "grad_norm": 1.086124062538147, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 120600 + }, + { + "epoch": 793.4868421052631, + "grad_norm": 0.8036643862724304, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 120610 + }, + { + "epoch": 793.5526315789474, + "grad_norm": 1.0477961301803589, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 120620 + }, + { + "epoch": 793.6184210526316, + "grad_norm": 0.8894155621528625, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 120630 + }, + { + "epoch": 793.6842105263158, + "grad_norm": 1.6042909622192383, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 120640 + }, + { + "epoch": 793.75, + "grad_norm": 0.8697584867477417, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 120650 + }, + { + "epoch": 793.8157894736842, + "grad_norm": 1.1683377027511597, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 120660 + }, + { + "epoch": 793.8815789473684, + "grad_norm": 1.1286085844039917, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 120670 + }, + { + "epoch": 793.9473684210526, + "grad_norm": 1.295102834701538, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 120680 + }, + { + "epoch": 794.0131578947369, + "grad_norm": 1.4578384160995483, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 120690 + }, + { + "epoch": 794.078947368421, + "grad_norm": 1.1735413074493408, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 120700 + }, + { + "epoch": 794.1447368421053, + "grad_norm": 0.7384294867515564, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 120710 + }, + { + "epoch": 794.2105263157895, + "grad_norm": 0.9748033285140991, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 120720 + }, + { + "epoch": 794.2763157894736, + "grad_norm": 1.08341646194458, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 120730 + }, + { + "epoch": 794.3421052631579, + "grad_norm": 1.2374165058135986, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 120740 + }, + { + "epoch": 794.4078947368421, + "grad_norm": 0.8181259632110596, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 120750 + }, + { + "epoch": 794.4736842105264, + "grad_norm": 1.0374884605407715, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 120760 + }, + { + "epoch": 794.5394736842105, + "grad_norm": 1.3677926063537598, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 120770 + }, + { + "epoch": 794.6052631578947, + "grad_norm": 0.8347147107124329, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 120780 + }, + { + "epoch": 794.671052631579, + "grad_norm": 0.8443951606750488, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 120790 + }, + { + "epoch": 794.7368421052631, + "grad_norm": 0.8409653902053833, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 120800 + }, + { + "epoch": 794.8026315789474, + "grad_norm": 0.8954428434371948, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 120810 + }, + { + "epoch": 794.8684210526316, + "grad_norm": 1.0372549295425415, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 120820 + }, + { + "epoch": 794.9342105263158, + "grad_norm": 0.8429449796676636, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 120830 + }, + { + "epoch": 795.0, + "grad_norm": 0.6495620608329773, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 120840 + }, + { + "epoch": 795.0657894736842, + "grad_norm": 1.2131069898605347, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 120850 + }, + { + "epoch": 795.1315789473684, + "grad_norm": 1.1015368700027466, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 120860 + }, + { + "epoch": 795.1973684210526, + "grad_norm": 0.9811515212059021, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 120870 + }, + { + "epoch": 795.2631578947369, + "grad_norm": 1.299547791481018, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 120880 + }, + { + "epoch": 795.328947368421, + "grad_norm": 1.1710044145584106, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 120890 + }, + { + "epoch": 795.3947368421053, + "grad_norm": 1.1657590866088867, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 120900 + }, + { + "epoch": 795.4605263157895, + "grad_norm": 0.8403371572494507, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 120910 + }, + { + "epoch": 795.5263157894736, + "grad_norm": 0.9806166887283325, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 120920 + }, + { + "epoch": 795.5921052631579, + "grad_norm": 1.4901376962661743, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 120930 + }, + { + "epoch": 795.6578947368421, + "grad_norm": 0.8995088338851929, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 120940 + }, + { + "epoch": 795.7236842105264, + "grad_norm": 1.037126064300537, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 120950 + }, + { + "epoch": 795.7894736842105, + "grad_norm": 1.0357317924499512, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 120960 + }, + { + "epoch": 795.8552631578947, + "grad_norm": 0.8257767558097839, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 120970 + }, + { + "epoch": 795.921052631579, + "grad_norm": 0.8299721479415894, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 120980 + }, + { + "epoch": 795.9868421052631, + "grad_norm": 1.0660762786865234, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 120990 + }, + { + "epoch": 796.0526315789474, + "grad_norm": 1.2139171361923218, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 121000 + }, + { + "epoch": 796.1184210526316, + "grad_norm": 1.1926602125167847, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 121010 + }, + { + "epoch": 796.1842105263158, + "grad_norm": 1.40194571018219, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 121020 + }, + { + "epoch": 796.25, + "grad_norm": 1.023759126663208, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 121030 + }, + { + "epoch": 796.3157894736842, + "grad_norm": 1.3803359270095825, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 121040 + }, + { + "epoch": 796.3815789473684, + "grad_norm": 1.2790648937225342, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 121050 + }, + { + "epoch": 796.4473684210526, + "grad_norm": 0.9783958792686462, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 121060 + }, + { + "epoch": 796.5131578947369, + "grad_norm": 1.3217369318008423, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 121070 + }, + { + "epoch": 796.578947368421, + "grad_norm": 1.2040969133377075, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 121080 + }, + { + "epoch": 796.6447368421053, + "grad_norm": 1.263850212097168, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 121090 + }, + { + "epoch": 796.7105263157895, + "grad_norm": 0.976997971534729, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 121100 + }, + { + "epoch": 796.7763157894736, + "grad_norm": 1.194297194480896, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 121110 + }, + { + "epoch": 796.8421052631579, + "grad_norm": 1.2234728336334229, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 121120 + }, + { + "epoch": 796.9078947368421, + "grad_norm": 1.7128822803497314, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 121130 + }, + { + "epoch": 796.9736842105264, + "grad_norm": 1.269959807395935, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 121140 + }, + { + "epoch": 797.0394736842105, + "grad_norm": 1.3292056322097778, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 121150 + }, + { + "epoch": 797.1052631578947, + "grad_norm": 0.8442802429199219, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 121160 + }, + { + "epoch": 797.171052631579, + "grad_norm": 0.9467982649803162, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 121170 + }, + { + "epoch": 797.2368421052631, + "grad_norm": 0.8293236494064331, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 121180 + }, + { + "epoch": 797.3026315789474, + "grad_norm": 0.5392968654632568, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 121190 + }, + { + "epoch": 797.3684210526316, + "grad_norm": 0.9286937713623047, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 121200 + }, + { + "epoch": 797.4342105263158, + "grad_norm": 0.6233976483345032, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 121210 + }, + { + "epoch": 797.5, + "grad_norm": 0.9368897080421448, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 121220 + }, + { + "epoch": 797.5657894736842, + "grad_norm": 0.8141809105873108, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 121230 + }, + { + "epoch": 797.6315789473684, + "grad_norm": 0.7223671674728394, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 121240 + }, + { + "epoch": 797.6973684210526, + "grad_norm": 1.1252926588058472, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 121250 + }, + { + "epoch": 797.7631578947369, + "grad_norm": 0.971419095993042, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 121260 + }, + { + "epoch": 797.828947368421, + "grad_norm": 1.5512263774871826, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 121270 + }, + { + "epoch": 797.8947368421053, + "grad_norm": 2.1177520751953125, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 121280 + }, + { + "epoch": 797.9605263157895, + "grad_norm": 1.3696571588516235, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 121290 + }, + { + "epoch": 798.0263157894736, + "grad_norm": 2.183379888534546, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 121300 + }, + { + "epoch": 798.0921052631579, + "grad_norm": 1.6594129800796509, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 121310 + }, + { + "epoch": 798.1578947368421, + "grad_norm": 1.3081175088882446, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 121320 + }, + { + "epoch": 798.2236842105264, + "grad_norm": 1.0478174686431885, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 121330 + }, + { + "epoch": 798.2894736842105, + "grad_norm": 1.081154227256775, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 121340 + }, + { + "epoch": 798.3552631578947, + "grad_norm": 1.172153353691101, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 121350 + }, + { + "epoch": 798.421052631579, + "grad_norm": 0.9921495318412781, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 121360 + }, + { + "epoch": 798.4868421052631, + "grad_norm": 0.8902541399002075, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 121370 + }, + { + "epoch": 798.5526315789474, + "grad_norm": 1.2591131925582886, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 121380 + }, + { + "epoch": 798.6184210526316, + "grad_norm": 1.258408546447754, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 121390 + }, + { + "epoch": 798.6842105263158, + "grad_norm": 0.9958367943763733, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 121400 + }, + { + "epoch": 798.75, + "grad_norm": 1.3845791816711426, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 121410 + }, + { + "epoch": 798.8157894736842, + "grad_norm": 1.2385156154632568, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 121420 + }, + { + "epoch": 798.8815789473684, + "grad_norm": 0.9330929517745972, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 121430 + }, + { + "epoch": 798.9473684210526, + "grad_norm": 1.2166731357574463, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 121440 + }, + { + "epoch": 799.0131578947369, + "grad_norm": 1.1683727502822876, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 121450 + }, + { + "epoch": 799.078947368421, + "grad_norm": 1.0285605192184448, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 121460 + }, + { + "epoch": 799.1447368421053, + "grad_norm": 0.9653977155685425, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 121470 + }, + { + "epoch": 799.2105263157895, + "grad_norm": 1.072914481163025, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 121480 + }, + { + "epoch": 799.2763157894736, + "grad_norm": 1.1126468181610107, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 121490 + }, + { + "epoch": 799.3421052631579, + "grad_norm": 1.0155147314071655, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 121500 + }, + { + "epoch": 799.4078947368421, + "grad_norm": 1.1577136516571045, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 121510 + }, + { + "epoch": 799.4736842105264, + "grad_norm": 1.2593013048171997, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 121520 + }, + { + "epoch": 799.5394736842105, + "grad_norm": 1.4376327991485596, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 121530 + }, + { + "epoch": 799.6052631578947, + "grad_norm": 1.1606967449188232, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 121540 + }, + { + "epoch": 799.671052631579, + "grad_norm": 1.1336442232131958, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 121550 + }, + { + "epoch": 799.7368421052631, + "grad_norm": 0.9730342626571655, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 121560 + }, + { + "epoch": 799.8026315789474, + "grad_norm": 1.030070185661316, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 121570 + }, + { + "epoch": 799.8684210526316, + "grad_norm": 0.6735216975212097, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 121580 + }, + { + "epoch": 799.9342105263158, + "grad_norm": 1.4537135362625122, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 121590 + }, + { + "epoch": 800.0, + "grad_norm": 0.9319224953651428, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 121600 + }, + { + "epoch": 800.0657894736842, + "grad_norm": 0.779762864112854, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 121610 + }, + { + "epoch": 800.1315789473684, + "grad_norm": 0.7982974648475647, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 121620 + }, + { + "epoch": 800.1973684210526, + "grad_norm": 0.9684889316558838, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 121630 + }, + { + "epoch": 800.2631578947369, + "grad_norm": 0.9351498484611511, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 121640 + }, + { + "epoch": 800.328947368421, + "grad_norm": 0.9390810132026672, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 121650 + }, + { + "epoch": 800.3947368421053, + "grad_norm": 0.9394250512123108, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 121660 + }, + { + "epoch": 800.4605263157895, + "grad_norm": 0.8497784733772278, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 121670 + }, + { + "epoch": 800.5263157894736, + "grad_norm": 0.9997221827507019, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 121680 + }, + { + "epoch": 800.5921052631579, + "grad_norm": 1.0118328332901, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 121690 + }, + { + "epoch": 800.6578947368421, + "grad_norm": 1.0781736373901367, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 121700 + }, + { + "epoch": 800.7236842105264, + "grad_norm": 0.9491978287696838, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 121710 + }, + { + "epoch": 800.7894736842105, + "grad_norm": 1.0116422176361084, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 121720 + }, + { + "epoch": 800.8552631578947, + "grad_norm": 1.0374714136123657, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 121730 + }, + { + "epoch": 800.921052631579, + "grad_norm": 0.8975595831871033, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 121740 + }, + { + "epoch": 800.9868421052631, + "grad_norm": 1.0354864597320557, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 121750 + }, + { + "epoch": 801.0526315789474, + "grad_norm": 1.3337525129318237, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 121760 + }, + { + "epoch": 801.1184210526316, + "grad_norm": 1.3484176397323608, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 121770 + }, + { + "epoch": 801.1842105263158, + "grad_norm": 0.9428836703300476, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 121780 + }, + { + "epoch": 801.25, + "grad_norm": 1.2294591665267944, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 121790 + }, + { + "epoch": 801.3157894736842, + "grad_norm": 1.2167091369628906, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 121800 + }, + { + "epoch": 801.3815789473684, + "grad_norm": 1.2219659090042114, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 121810 + }, + { + "epoch": 801.4473684210526, + "grad_norm": 1.1897832155227661, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 121820 + }, + { + "epoch": 801.5131578947369, + "grad_norm": 1.3615158796310425, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 121830 + }, + { + "epoch": 801.578947368421, + "grad_norm": 0.8604697585105896, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 121840 + }, + { + "epoch": 801.6447368421053, + "grad_norm": 1.149682879447937, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 121850 + }, + { + "epoch": 801.7105263157895, + "grad_norm": 0.803564190864563, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 121860 + }, + { + "epoch": 801.7763157894736, + "grad_norm": 1.223574161529541, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 121870 + }, + { + "epoch": 801.8421052631579, + "grad_norm": 0.9793342351913452, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 121880 + }, + { + "epoch": 801.9078947368421, + "grad_norm": 0.9008523225784302, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 121890 + }, + { + "epoch": 801.9736842105264, + "grad_norm": 0.9227433204650879, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 121900 + }, + { + "epoch": 802.0394736842105, + "grad_norm": 0.8591435551643372, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 121910 + }, + { + "epoch": 802.1052631578947, + "grad_norm": 1.1136678457260132, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 121920 + }, + { + "epoch": 802.171052631579, + "grad_norm": 0.9162068367004395, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 121930 + }, + { + "epoch": 802.2368421052631, + "grad_norm": 0.9551861882209778, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 121940 + }, + { + "epoch": 802.3026315789474, + "grad_norm": 0.9675922989845276, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 121950 + }, + { + "epoch": 802.3684210526316, + "grad_norm": 1.1318920850753784, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 121960 + }, + { + "epoch": 802.4342105263158, + "grad_norm": 0.9588920474052429, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 121970 + }, + { + "epoch": 802.5, + "grad_norm": 0.6656215190887451, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 121980 + }, + { + "epoch": 802.5657894736842, + "grad_norm": 1.1019165515899658, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 121990 + }, + { + "epoch": 802.6315789473684, + "grad_norm": 1.2802983522415161, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 122000 + }, + { + "epoch": 802.6973684210526, + "grad_norm": 1.1623402833938599, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 122010 + }, + { + "epoch": 802.7631578947369, + "grad_norm": 1.039617896080017, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 122020 + }, + { + "epoch": 802.828947368421, + "grad_norm": 0.8688013553619385, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 122030 + }, + { + "epoch": 802.8947368421053, + "grad_norm": 1.22115957736969, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 122040 + }, + { + "epoch": 802.9605263157895, + "grad_norm": 0.9164858460426331, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 122050 + }, + { + "epoch": 803.0263157894736, + "grad_norm": 0.8192176818847656, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 122060 + }, + { + "epoch": 803.0921052631579, + "grad_norm": 1.2811557054519653, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 122070 + }, + { + "epoch": 803.1578947368421, + "grad_norm": 1.3830119371414185, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 122080 + }, + { + "epoch": 803.2236842105264, + "grad_norm": 1.146261215209961, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 122090 + }, + { + "epoch": 803.2894736842105, + "grad_norm": 0.9297433495521545, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 122100 + }, + { + "epoch": 803.3552631578947, + "grad_norm": 0.8575513958930969, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 122110 + }, + { + "epoch": 803.421052631579, + "grad_norm": 1.1129348278045654, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 122120 + }, + { + "epoch": 803.4868421052631, + "grad_norm": 1.1914259195327759, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 122130 + }, + { + "epoch": 803.5526315789474, + "grad_norm": 0.8768858909606934, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 122140 + }, + { + "epoch": 803.6184210526316, + "grad_norm": 0.8389908671379089, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 122150 + }, + { + "epoch": 803.6842105263158, + "grad_norm": 1.1169975996017456, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 122160 + }, + { + "epoch": 803.75, + "grad_norm": 0.8531729578971863, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 122170 + }, + { + "epoch": 803.8157894736842, + "grad_norm": 1.1040211915969849, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 122180 + }, + { + "epoch": 803.8815789473684, + "grad_norm": 0.9061696529388428, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 122190 + }, + { + "epoch": 803.9473684210526, + "grad_norm": 0.9023482799530029, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 122200 + }, + { + "epoch": 804.0131578947369, + "grad_norm": 1.1508461236953735, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 122210 + }, + { + "epoch": 804.078947368421, + "grad_norm": 1.0282151699066162, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 122220 + }, + { + "epoch": 804.1447368421053, + "grad_norm": 1.2764521837234497, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 122230 + }, + { + "epoch": 804.2105263157895, + "grad_norm": 1.1191545724868774, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 122240 + }, + { + "epoch": 804.2763157894736, + "grad_norm": 0.988823413848877, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 122250 + }, + { + "epoch": 804.3421052631579, + "grad_norm": 1.095762848854065, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 122260 + }, + { + "epoch": 804.4078947368421, + "grad_norm": 1.3492151498794556, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 122270 + }, + { + "epoch": 804.4736842105264, + "grad_norm": 1.2106963396072388, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 122280 + }, + { + "epoch": 804.5394736842105, + "grad_norm": 1.2582029104232788, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 122290 + }, + { + "epoch": 804.6052631578947, + "grad_norm": 1.3207285404205322, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 122300 + }, + { + "epoch": 804.671052631579, + "grad_norm": 1.0762017965316772, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 122310 + }, + { + "epoch": 804.7368421052631, + "grad_norm": 0.871727466583252, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 122320 + }, + { + "epoch": 804.8026315789474, + "grad_norm": 0.9750553369522095, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 122330 + }, + { + "epoch": 804.8684210526316, + "grad_norm": 1.4661288261413574, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 122340 + }, + { + "epoch": 804.9342105263158, + "grad_norm": 1.1938623189926147, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 122350 + }, + { + "epoch": 805.0, + "grad_norm": 1.210606575012207, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 122360 + }, + { + "epoch": 805.0657894736842, + "grad_norm": 0.9685527086257935, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 122370 + }, + { + "epoch": 805.1315789473684, + "grad_norm": 0.7988755702972412, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 122380 + }, + { + "epoch": 805.1973684210526, + "grad_norm": 1.0523570775985718, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 122390 + }, + { + "epoch": 805.2631578947369, + "grad_norm": 0.9532704949378967, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 122400 + }, + { + "epoch": 805.328947368421, + "grad_norm": 0.774535059928894, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 122410 + }, + { + "epoch": 805.3947368421053, + "grad_norm": 1.0820705890655518, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 122420 + }, + { + "epoch": 805.4605263157895, + "grad_norm": 1.2834467887878418, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 122430 + }, + { + "epoch": 805.5263157894736, + "grad_norm": 1.2776272296905518, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 122440 + }, + { + "epoch": 805.5921052631579, + "grad_norm": 1.7061843872070312, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 122450 + }, + { + "epoch": 805.6578947368421, + "grad_norm": 1.2151910066604614, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 122460 + }, + { + "epoch": 805.7236842105264, + "grad_norm": 0.9397733211517334, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 122470 + }, + { + "epoch": 805.7894736842105, + "grad_norm": 1.0783069133758545, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 122480 + }, + { + "epoch": 805.8552631578947, + "grad_norm": 0.965991735458374, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 122490 + }, + { + "epoch": 805.921052631579, + "grad_norm": 1.1885125637054443, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 122500 + }, + { + "epoch": 805.9868421052631, + "grad_norm": 1.4462835788726807, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 122510 + }, + { + "epoch": 806.0526315789474, + "grad_norm": 1.4112342596054077, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 122520 + }, + { + "epoch": 806.1184210526316, + "grad_norm": 1.3368967771530151, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 122530 + }, + { + "epoch": 806.1842105263158, + "grad_norm": 0.9588228464126587, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 122540 + }, + { + "epoch": 806.25, + "grad_norm": 1.0012880563735962, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 122550 + }, + { + "epoch": 806.3157894736842, + "grad_norm": 1.0490740537643433, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 122560 + }, + { + "epoch": 806.3815789473684, + "grad_norm": 1.1824147701263428, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 122570 + }, + { + "epoch": 806.4473684210526, + "grad_norm": 1.1107277870178223, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 122580 + }, + { + "epoch": 806.5131578947369, + "grad_norm": 0.9902931451797485, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 122590 + }, + { + "epoch": 806.578947368421, + "grad_norm": 1.0864098072052002, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 122600 + }, + { + "epoch": 806.6447368421053, + "grad_norm": 1.0239638090133667, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 122610 + }, + { + "epoch": 806.7105263157895, + "grad_norm": 0.9125197529792786, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 122620 + }, + { + "epoch": 806.7763157894736, + "grad_norm": 0.8427323698997498, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 122630 + }, + { + "epoch": 806.8421052631579, + "grad_norm": 0.6944894790649414, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 122640 + }, + { + "epoch": 806.9078947368421, + "grad_norm": 0.6341553926467896, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 122650 + }, + { + "epoch": 806.9736842105264, + "grad_norm": 1.362230896949768, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 122660 + }, + { + "epoch": 807.0394736842105, + "grad_norm": 1.2482671737670898, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 122670 + }, + { + "epoch": 807.1052631578947, + "grad_norm": 1.413817048072815, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 122680 + }, + { + "epoch": 807.171052631579, + "grad_norm": 1.4257447719573975, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 122690 + }, + { + "epoch": 807.2368421052631, + "grad_norm": 1.0907763242721558, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 122700 + }, + { + "epoch": 807.3026315789474, + "grad_norm": 1.1896060705184937, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 122710 + }, + { + "epoch": 807.3684210526316, + "grad_norm": 0.9209076166152954, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 122720 + }, + { + "epoch": 807.4342105263158, + "grad_norm": 0.7475315928459167, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 122730 + }, + { + "epoch": 807.5, + "grad_norm": 1.2551970481872559, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 122740 + }, + { + "epoch": 807.5657894736842, + "grad_norm": 1.2004441022872925, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 122750 + }, + { + "epoch": 807.6315789473684, + "grad_norm": 1.0100356340408325, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 122760 + }, + { + "epoch": 807.6973684210526, + "grad_norm": 1.3553928136825562, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 122770 + }, + { + "epoch": 807.7631578947369, + "grad_norm": 0.8858333826065063, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 122780 + }, + { + "epoch": 807.828947368421, + "grad_norm": 1.5102218389511108, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 122790 + }, + { + "epoch": 807.8947368421053, + "grad_norm": 1.0069150924682617, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 122800 + }, + { + "epoch": 807.9605263157895, + "grad_norm": 0.7301906943321228, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 122810 + }, + { + "epoch": 808.0263157894736, + "grad_norm": 1.0068855285644531, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 122820 + }, + { + "epoch": 808.0921052631579, + "grad_norm": 0.9534813165664673, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 122830 + }, + { + "epoch": 808.1578947368421, + "grad_norm": 0.9146772027015686, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 122840 + }, + { + "epoch": 808.2236842105264, + "grad_norm": 0.9505394697189331, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 122850 + }, + { + "epoch": 808.2894736842105, + "grad_norm": 1.0315290689468384, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 122860 + }, + { + "epoch": 808.3552631578947, + "grad_norm": 1.236918568611145, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 122870 + }, + { + "epoch": 808.421052631579, + "grad_norm": 1.0912704467773438, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 122880 + }, + { + "epoch": 808.4868421052631, + "grad_norm": 0.8193346261978149, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 122890 + }, + { + "epoch": 808.5526315789474, + "grad_norm": 1.54730224609375, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 122900 + }, + { + "epoch": 808.6184210526316, + "grad_norm": 1.441839575767517, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 122910 + }, + { + "epoch": 808.6842105263158, + "grad_norm": 1.055781602859497, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 122920 + }, + { + "epoch": 808.75, + "grad_norm": 1.353714108467102, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 122930 + }, + { + "epoch": 808.8157894736842, + "grad_norm": 1.2414506673812866, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 122940 + }, + { + "epoch": 808.8815789473684, + "grad_norm": 1.2906715869903564, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 122950 + }, + { + "epoch": 808.9473684210526, + "grad_norm": 0.7939502596855164, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 122960 + }, + { + "epoch": 809.0131578947369, + "grad_norm": 0.9539815783500671, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 122970 + }, + { + "epoch": 809.078947368421, + "grad_norm": 0.7728420495986938, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 122980 + }, + { + "epoch": 809.1447368421053, + "grad_norm": 0.9012200236320496, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 122990 + }, + { + "epoch": 809.2105263157895, + "grad_norm": 0.874159574508667, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 123000 + }, + { + "epoch": 809.2763157894736, + "grad_norm": 1.1063437461853027, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 123010 + }, + { + "epoch": 809.3421052631579, + "grad_norm": 1.1049076318740845, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 123020 + }, + { + "epoch": 809.4078947368421, + "grad_norm": 1.4535138607025146, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 123030 + }, + { + "epoch": 809.4736842105264, + "grad_norm": 1.022336483001709, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 123040 + }, + { + "epoch": 809.5394736842105, + "grad_norm": 1.1350669860839844, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 123050 + }, + { + "epoch": 809.6052631578947, + "grad_norm": 1.5300946235656738, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 123060 + }, + { + "epoch": 809.671052631579, + "grad_norm": 1.1498500108718872, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 123070 + }, + { + "epoch": 809.7368421052631, + "grad_norm": 0.6810249090194702, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 123080 + }, + { + "epoch": 809.8026315789474, + "grad_norm": 0.9116702675819397, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 123090 + }, + { + "epoch": 809.8684210526316, + "grad_norm": 1.095033049583435, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 123100 + }, + { + "epoch": 809.9342105263158, + "grad_norm": 0.9382158517837524, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 123110 + }, + { + "epoch": 810.0, + "grad_norm": 1.0285440683364868, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 123120 + }, + { + "epoch": 810.0657894736842, + "grad_norm": 0.9891853332519531, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 123130 + }, + { + "epoch": 810.1315789473684, + "grad_norm": 0.9508638381958008, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 123140 + }, + { + "epoch": 810.1973684210526, + "grad_norm": 0.9087507724761963, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 123150 + }, + { + "epoch": 810.2631578947369, + "grad_norm": 1.0318292379379272, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 123160 + }, + { + "epoch": 810.328947368421, + "grad_norm": 0.8435489535331726, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 123170 + }, + { + "epoch": 810.3947368421053, + "grad_norm": 0.7418573498725891, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 123180 + }, + { + "epoch": 810.4605263157895, + "grad_norm": 1.2154449224472046, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 123190 + }, + { + "epoch": 810.5263157894736, + "grad_norm": 1.1838921308517456, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 123200 + }, + { + "epoch": 810.5921052631579, + "grad_norm": 0.6878968477249146, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 123210 + }, + { + "epoch": 810.6578947368421, + "grad_norm": 1.2046258449554443, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 123220 + }, + { + "epoch": 810.7236842105264, + "grad_norm": 1.1671987771987915, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 123230 + }, + { + "epoch": 810.7894736842105, + "grad_norm": 1.0558589696884155, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 123240 + }, + { + "epoch": 810.8552631578947, + "grad_norm": 1.035437822341919, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 123250 + }, + { + "epoch": 810.921052631579, + "grad_norm": 1.2086669206619263, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 123260 + }, + { + "epoch": 810.9868421052631, + "grad_norm": 1.086695909500122, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 123270 + }, + { + "epoch": 811.0526315789474, + "grad_norm": 1.3100385665893555, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 123280 + }, + { + "epoch": 811.1184210526316, + "grad_norm": 1.3624625205993652, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 123290 + }, + { + "epoch": 811.1842105263158, + "grad_norm": 1.1712080240249634, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 123300 + }, + { + "epoch": 811.25, + "grad_norm": 1.2696281671524048, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 123310 + }, + { + "epoch": 811.3157894736842, + "grad_norm": 0.7880839109420776, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 123320 + }, + { + "epoch": 811.3815789473684, + "grad_norm": 1.3143435716629028, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 123330 + }, + { + "epoch": 811.4473684210526, + "grad_norm": 1.0152329206466675, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 123340 + }, + { + "epoch": 811.5131578947369, + "grad_norm": 0.9424976110458374, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 123350 + }, + { + "epoch": 811.578947368421, + "grad_norm": 1.0948466062545776, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 123360 + }, + { + "epoch": 811.6447368421053, + "grad_norm": 0.9606415033340454, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 123370 + }, + { + "epoch": 811.7105263157895, + "grad_norm": 1.1549152135849, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 123380 + }, + { + "epoch": 811.7763157894736, + "grad_norm": 0.8370715975761414, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 123390 + }, + { + "epoch": 811.8421052631579, + "grad_norm": 1.1757169961929321, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 123400 + }, + { + "epoch": 811.9078947368421, + "grad_norm": 1.3213396072387695, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 123410 + }, + { + "epoch": 811.9736842105264, + "grad_norm": 1.3721901178359985, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 123420 + }, + { + "epoch": 812.0394736842105, + "grad_norm": 1.2278730869293213, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 123430 + }, + { + "epoch": 812.1052631578947, + "grad_norm": 0.8597956895828247, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 123440 + }, + { + "epoch": 812.171052631579, + "grad_norm": 1.0379050970077515, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 123450 + }, + { + "epoch": 812.2368421052631, + "grad_norm": 1.107707142829895, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 123460 + }, + { + "epoch": 812.3026315789474, + "grad_norm": 1.37968909740448, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 123470 + }, + { + "epoch": 812.3684210526316, + "grad_norm": 1.6168029308319092, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 123480 + }, + { + "epoch": 812.4342105263158, + "grad_norm": 1.3366464376449585, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 123490 + }, + { + "epoch": 812.5, + "grad_norm": 1.2438991069793701, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 123500 + }, + { + "epoch": 812.5657894736842, + "grad_norm": 1.4187861680984497, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 123510 + }, + { + "epoch": 812.6315789473684, + "grad_norm": 1.350807547569275, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 123520 + }, + { + "epoch": 812.6973684210526, + "grad_norm": 1.1984448432922363, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 123530 + }, + { + "epoch": 812.7631578947369, + "grad_norm": 0.9718255400657654, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 123540 + }, + { + "epoch": 812.828947368421, + "grad_norm": 0.8007934093475342, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 123550 + }, + { + "epoch": 812.8947368421053, + "grad_norm": 1.1120939254760742, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 123560 + }, + { + "epoch": 812.9605263157895, + "grad_norm": 0.8073641657829285, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 123570 + }, + { + "epoch": 813.0263157894736, + "grad_norm": 1.3328661918640137, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 123580 + }, + { + "epoch": 813.0921052631579, + "grad_norm": 1.2822130918502808, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 123590 + }, + { + "epoch": 813.1578947368421, + "grad_norm": 1.1979964971542358, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 123600 + }, + { + "epoch": 813.2236842105264, + "grad_norm": 1.2304644584655762, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 123610 + }, + { + "epoch": 813.2894736842105, + "grad_norm": 0.8856472373008728, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 123620 + }, + { + "epoch": 813.3552631578947, + "grad_norm": 1.14380943775177, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 123630 + }, + { + "epoch": 813.421052631579, + "grad_norm": 1.169329047203064, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 123640 + }, + { + "epoch": 813.4868421052631, + "grad_norm": 1.1275250911712646, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 123650 + }, + { + "epoch": 813.5526315789474, + "grad_norm": 0.9524691700935364, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 123660 + }, + { + "epoch": 813.6184210526316, + "grad_norm": 1.1175603866577148, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 123670 + }, + { + "epoch": 813.6842105263158, + "grad_norm": 1.0256012678146362, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 123680 + }, + { + "epoch": 813.75, + "grad_norm": 0.9166593551635742, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 123690 + }, + { + "epoch": 813.8157894736842, + "grad_norm": 1.1852658987045288, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 123700 + }, + { + "epoch": 813.8815789473684, + "grad_norm": 1.0424803495407104, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 123710 + }, + { + "epoch": 813.9473684210526, + "grad_norm": 0.9205971956253052, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 123720 + }, + { + "epoch": 814.0131578947369, + "grad_norm": 0.8064239621162415, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 123730 + }, + { + "epoch": 814.078947368421, + "grad_norm": 1.1456804275512695, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 123740 + }, + { + "epoch": 814.1447368421053, + "grad_norm": 1.1536784172058105, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 123750 + }, + { + "epoch": 814.2105263157895, + "grad_norm": 0.9535488486289978, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 123760 + }, + { + "epoch": 814.2763157894736, + "grad_norm": 0.8925880193710327, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 123770 + }, + { + "epoch": 814.3421052631579, + "grad_norm": 0.7770874500274658, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 123780 + }, + { + "epoch": 814.4078947368421, + "grad_norm": 0.7658482193946838, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 123790 + }, + { + "epoch": 814.4736842105264, + "grad_norm": 1.1080727577209473, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 123800 + }, + { + "epoch": 814.5394736842105, + "grad_norm": 0.9930034279823303, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 123810 + }, + { + "epoch": 814.6052631578947, + "grad_norm": 1.04108464717865, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 123820 + }, + { + "epoch": 814.671052631579, + "grad_norm": 1.4060074090957642, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 123830 + }, + { + "epoch": 814.7368421052631, + "grad_norm": 1.135053277015686, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 123840 + }, + { + "epoch": 814.8026315789474, + "grad_norm": 1.102472186088562, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 123850 + }, + { + "epoch": 814.8684210526316, + "grad_norm": 1.0958255529403687, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 123860 + }, + { + "epoch": 814.9342105263158, + "grad_norm": 1.2109631299972534, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 123870 + }, + { + "epoch": 815.0, + "grad_norm": 0.8327732086181641, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 123880 + }, + { + "epoch": 815.0657894736842, + "grad_norm": 0.830962598323822, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 123890 + }, + { + "epoch": 815.1315789473684, + "grad_norm": 1.0585317611694336, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 123900 + }, + { + "epoch": 815.1973684210526, + "grad_norm": 1.095977783203125, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 123910 + }, + { + "epoch": 815.2631578947369, + "grad_norm": 1.2704319953918457, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 123920 + }, + { + "epoch": 815.328947368421, + "grad_norm": 0.957330584526062, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 123930 + }, + { + "epoch": 815.3947368421053, + "grad_norm": 0.8996625542640686, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 123940 + }, + { + "epoch": 815.4605263157895, + "grad_norm": 1.2398024797439575, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 123950 + }, + { + "epoch": 815.5263157894736, + "grad_norm": 1.1726700067520142, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 123960 + }, + { + "epoch": 815.5921052631579, + "grad_norm": 1.1765739917755127, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 123970 + }, + { + "epoch": 815.6578947368421, + "grad_norm": 1.4318251609802246, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 123980 + }, + { + "epoch": 815.7236842105264, + "grad_norm": 1.3720372915267944, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 123990 + }, + { + "epoch": 815.7894736842105, + "grad_norm": 0.9442108273506165, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 124000 + }, + { + "epoch": 815.8552631578947, + "grad_norm": 1.4249804019927979, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 124010 + }, + { + "epoch": 815.921052631579, + "grad_norm": 1.5111039876937866, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 124020 + }, + { + "epoch": 815.9868421052631, + "grad_norm": 1.065873146057129, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 124030 + }, + { + "epoch": 816.0526315789474, + "grad_norm": 1.224717617034912, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 124040 + }, + { + "epoch": 816.1184210526316, + "grad_norm": 0.8708517551422119, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 124050 + }, + { + "epoch": 816.1842105263158, + "grad_norm": 1.0297471284866333, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 124060 + }, + { + "epoch": 816.25, + "grad_norm": 1.1556639671325684, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 124070 + }, + { + "epoch": 816.3157894736842, + "grad_norm": 1.0219916105270386, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 124080 + }, + { + "epoch": 816.3815789473684, + "grad_norm": 0.9908708930015564, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 124090 + }, + { + "epoch": 816.4473684210526, + "grad_norm": 0.8453224897384644, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 124100 + }, + { + "epoch": 816.5131578947369, + "grad_norm": 0.6190137267112732, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 124110 + }, + { + "epoch": 816.578947368421, + "grad_norm": 1.4555507898330688, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 124120 + }, + { + "epoch": 816.6447368421053, + "grad_norm": 1.2438726425170898, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 124130 + }, + { + "epoch": 816.7105263157895, + "grad_norm": 1.2123976945877075, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 124140 + }, + { + "epoch": 816.7763157894736, + "grad_norm": 1.1773884296417236, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 124150 + }, + { + "epoch": 816.8421052631579, + "grad_norm": 1.0776981115341187, + "learning_rate": 0.0001, + "loss": 0.0136, + "step": 124160 + }, + { + "epoch": 816.9078947368421, + "grad_norm": 1.0977414846420288, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 124170 + }, + { + "epoch": 816.9736842105264, + "grad_norm": 0.9133142232894897, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 124180 + }, + { + "epoch": 817.0394736842105, + "grad_norm": 1.3177136182785034, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 124190 + }, + { + "epoch": 817.1052631578947, + "grad_norm": 1.1052703857421875, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 124200 + }, + { + "epoch": 817.171052631579, + "grad_norm": 1.2231838703155518, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 124210 + }, + { + "epoch": 817.2368421052631, + "grad_norm": 1.0773632526397705, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 124220 + }, + { + "epoch": 817.3026315789474, + "grad_norm": 0.7591347098350525, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 124230 + }, + { + "epoch": 817.3684210526316, + "grad_norm": 1.1627017259597778, + "learning_rate": 0.0001, + "loss": 0.0145, + "step": 124240 + }, + { + "epoch": 817.4342105263158, + "grad_norm": 0.88426274061203, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 124250 + }, + { + "epoch": 817.5, + "grad_norm": 1.0880540609359741, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 124260 + }, + { + "epoch": 817.5657894736842, + "grad_norm": 1.1447336673736572, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 124270 + }, + { + "epoch": 817.6315789473684, + "grad_norm": 0.9627189636230469, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 124280 + }, + { + "epoch": 817.6973684210526, + "grad_norm": 0.8809316158294678, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 124290 + }, + { + "epoch": 817.7631578947369, + "grad_norm": 1.1471461057662964, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 124300 + }, + { + "epoch": 817.828947368421, + "grad_norm": 0.9346367716789246, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 124310 + }, + { + "epoch": 817.8947368421053, + "grad_norm": 1.247454047203064, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 124320 + }, + { + "epoch": 817.9605263157895, + "grad_norm": 1.3746150732040405, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 124330 + }, + { + "epoch": 818.0263157894736, + "grad_norm": 1.183838963508606, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 124340 + }, + { + "epoch": 818.0921052631579, + "grad_norm": 0.9270862936973572, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 124350 + }, + { + "epoch": 818.1578947368421, + "grad_norm": 0.9272816777229309, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 124360 + }, + { + "epoch": 818.2236842105264, + "grad_norm": 1.0511780977249146, + "learning_rate": 0.0001, + "loss": 0.0139, + "step": 124370 + }, + { + "epoch": 818.2894736842105, + "grad_norm": 1.2871315479278564, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 124380 + }, + { + "epoch": 818.3552631578947, + "grad_norm": 1.2518093585968018, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 124390 + }, + { + "epoch": 818.421052631579, + "grad_norm": 1.498147964477539, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 124400 + }, + { + "epoch": 818.4868421052631, + "grad_norm": 2.1815390586853027, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 124410 + }, + { + "epoch": 818.5526315789474, + "grad_norm": 1.512572169303894, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 124420 + }, + { + "epoch": 818.6184210526316, + "grad_norm": 1.39220130443573, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 124430 + }, + { + "epoch": 818.6842105263158, + "grad_norm": 1.40886390209198, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 124440 + }, + { + "epoch": 818.75, + "grad_norm": 1.2492808103561401, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 124450 + }, + { + "epoch": 818.8157894736842, + "grad_norm": 1.3465094566345215, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 124460 + }, + { + "epoch": 818.8815789473684, + "grad_norm": 1.4258430004119873, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 124470 + }, + { + "epoch": 818.9473684210526, + "grad_norm": 1.4436920881271362, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 124480 + }, + { + "epoch": 819.0131578947369, + "grad_norm": 1.0473710298538208, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 124490 + }, + { + "epoch": 819.078947368421, + "grad_norm": 1.4626469612121582, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 124500 + }, + { + "epoch": 819.1447368421053, + "grad_norm": 1.2455971240997314, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 124510 + }, + { + "epoch": 819.2105263157895, + "grad_norm": 0.8882140517234802, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 124520 + }, + { + "epoch": 819.2763157894736, + "grad_norm": 1.2379828691482544, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 124530 + }, + { + "epoch": 819.3421052631579, + "grad_norm": 0.7983285188674927, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 124540 + }, + { + "epoch": 819.4078947368421, + "grad_norm": 0.8106863498687744, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 124550 + }, + { + "epoch": 819.4736842105264, + "grad_norm": 1.1622852087020874, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 124560 + }, + { + "epoch": 819.5394736842105, + "grad_norm": 0.733343243598938, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 124570 + }, + { + "epoch": 819.6052631578947, + "grad_norm": 0.8690012693405151, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 124580 + }, + { + "epoch": 819.671052631579, + "grad_norm": 0.7145095467567444, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 124590 + }, + { + "epoch": 819.7368421052631, + "grad_norm": 1.0075265169143677, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 124600 + }, + { + "epoch": 819.8026315789474, + "grad_norm": 1.1366093158721924, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 124610 + }, + { + "epoch": 819.8684210526316, + "grad_norm": 0.9843047261238098, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 124620 + }, + { + "epoch": 819.9342105263158, + "grad_norm": 0.7279674410820007, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 124630 + }, + { + "epoch": 820.0, + "grad_norm": 1.1037843227386475, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 124640 + }, + { + "epoch": 820.0657894736842, + "grad_norm": 0.7516036033630371, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 124650 + }, + { + "epoch": 820.1315789473684, + "grad_norm": 1.1608238220214844, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 124660 + }, + { + "epoch": 820.1973684210526, + "grad_norm": 1.3053244352340698, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 124670 + }, + { + "epoch": 820.2631578947369, + "grad_norm": 1.0844759941101074, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 124680 + }, + { + "epoch": 820.328947368421, + "grad_norm": 0.9442291259765625, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 124690 + }, + { + "epoch": 820.3947368421053, + "grad_norm": 1.1379389762878418, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 124700 + }, + { + "epoch": 820.4605263157895, + "grad_norm": 0.6525788903236389, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 124710 + }, + { + "epoch": 820.5263157894736, + "grad_norm": 1.278929352760315, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 124720 + }, + { + "epoch": 820.5921052631579, + "grad_norm": 1.4533745050430298, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 124730 + }, + { + "epoch": 820.6578947368421, + "grad_norm": 1.0017926692962646, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 124740 + }, + { + "epoch": 820.7236842105264, + "grad_norm": 0.9450610876083374, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 124750 + }, + { + "epoch": 820.7894736842105, + "grad_norm": 0.6079602837562561, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 124760 + }, + { + "epoch": 820.8552631578947, + "grad_norm": 1.112248420715332, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 124770 + }, + { + "epoch": 820.921052631579, + "grad_norm": 1.2943469285964966, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 124780 + }, + { + "epoch": 820.9868421052631, + "grad_norm": 1.1327763795852661, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 124790 + }, + { + "epoch": 821.0526315789474, + "grad_norm": 1.0445799827575684, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 124800 + }, + { + "epoch": 821.1184210526316, + "grad_norm": 1.081302285194397, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 124810 + }, + { + "epoch": 821.1842105263158, + "grad_norm": 1.0378952026367188, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 124820 + }, + { + "epoch": 821.25, + "grad_norm": 0.7026200890541077, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 124830 + }, + { + "epoch": 821.3157894736842, + "grad_norm": 0.709734320640564, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 124840 + }, + { + "epoch": 821.3815789473684, + "grad_norm": 1.214118480682373, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 124850 + }, + { + "epoch": 821.4473684210526, + "grad_norm": 1.0140697956085205, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 124860 + }, + { + "epoch": 821.5131578947369, + "grad_norm": 1.1845144033432007, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 124870 + }, + { + "epoch": 821.578947368421, + "grad_norm": 1.131510853767395, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 124880 + }, + { + "epoch": 821.6447368421053, + "grad_norm": 1.0710123777389526, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 124890 + }, + { + "epoch": 821.7105263157895, + "grad_norm": 0.9462993741035461, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 124900 + }, + { + "epoch": 821.7763157894736, + "grad_norm": 1.1488789319992065, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 124910 + }, + { + "epoch": 821.8421052631579, + "grad_norm": 0.8153425455093384, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 124920 + }, + { + "epoch": 821.9078947368421, + "grad_norm": 1.136583924293518, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 124930 + }, + { + "epoch": 821.9736842105264, + "grad_norm": 1.1596461534500122, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 124940 + }, + { + "epoch": 822.0394736842105, + "grad_norm": 1.1383651494979858, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 124950 + }, + { + "epoch": 822.1052631578947, + "grad_norm": 1.0268265008926392, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 124960 + }, + { + "epoch": 822.171052631579, + "grad_norm": 1.239668607711792, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 124970 + }, + { + "epoch": 822.2368421052631, + "grad_norm": 1.1452547311782837, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 124980 + }, + { + "epoch": 822.3026315789474, + "grad_norm": 0.804125189781189, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 124990 + }, + { + "epoch": 822.3684210526316, + "grad_norm": 1.5917028188705444, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 125000 + }, + { + "epoch": 822.4342105263158, + "grad_norm": 1.280947208404541, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 125010 + }, + { + "epoch": 822.5, + "grad_norm": 0.9528753161430359, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 125020 + }, + { + "epoch": 822.5657894736842, + "grad_norm": 0.9198108911514282, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 125030 + }, + { + "epoch": 822.6315789473684, + "grad_norm": 0.8453339338302612, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 125040 + }, + { + "epoch": 822.6973684210526, + "grad_norm": 1.1574821472167969, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 125050 + }, + { + "epoch": 822.7631578947369, + "grad_norm": 1.1364651918411255, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 125060 + }, + { + "epoch": 822.828947368421, + "grad_norm": 0.9139201641082764, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 125070 + }, + { + "epoch": 822.8947368421053, + "grad_norm": 0.8016061782836914, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 125080 + }, + { + "epoch": 822.9605263157895, + "grad_norm": 1.3205960988998413, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 125090 + }, + { + "epoch": 823.0263157894736, + "grad_norm": 0.8638978600502014, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 125100 + }, + { + "epoch": 823.0921052631579, + "grad_norm": 0.964411199092865, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 125110 + }, + { + "epoch": 823.1578947368421, + "grad_norm": 1.1022132635116577, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 125120 + }, + { + "epoch": 823.2236842105264, + "grad_norm": 1.2288185358047485, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 125130 + }, + { + "epoch": 823.2894736842105, + "grad_norm": 1.1592532396316528, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 125140 + }, + { + "epoch": 823.3552631578947, + "grad_norm": 0.9944072961807251, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 125150 + }, + { + "epoch": 823.421052631579, + "grad_norm": 0.8100464344024658, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 125160 + }, + { + "epoch": 823.4868421052631, + "grad_norm": 0.9860333204269409, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 125170 + }, + { + "epoch": 823.5526315789474, + "grad_norm": 1.0454355478286743, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 125180 + }, + { + "epoch": 823.6184210526316, + "grad_norm": 1.0982762575149536, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 125190 + }, + { + "epoch": 823.6842105263158, + "grad_norm": 1.372549057006836, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 125200 + }, + { + "epoch": 823.75, + "grad_norm": 1.4132963418960571, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 125210 + }, + { + "epoch": 823.8157894736842, + "grad_norm": 1.2512835264205933, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 125220 + }, + { + "epoch": 823.8815789473684, + "grad_norm": 1.114558219909668, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 125230 + }, + { + "epoch": 823.9473684210526, + "grad_norm": 1.0687578916549683, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 125240 + }, + { + "epoch": 824.0131578947369, + "grad_norm": 1.5718590021133423, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 125250 + }, + { + "epoch": 824.078947368421, + "grad_norm": 1.0337969064712524, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 125260 + }, + { + "epoch": 824.1447368421053, + "grad_norm": 1.0111421346664429, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 125270 + }, + { + "epoch": 824.2105263157895, + "grad_norm": 1.1369024515151978, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 125280 + }, + { + "epoch": 824.2763157894736, + "grad_norm": 0.7964611649513245, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 125290 + }, + { + "epoch": 824.3421052631579, + "grad_norm": 1.2055116891860962, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 125300 + }, + { + "epoch": 824.4078947368421, + "grad_norm": 1.0731807947158813, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 125310 + }, + { + "epoch": 824.4736842105264, + "grad_norm": 1.1437373161315918, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 125320 + }, + { + "epoch": 824.5394736842105, + "grad_norm": 1.1605925559997559, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 125330 + }, + { + "epoch": 824.6052631578947, + "grad_norm": 1.286482334136963, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 125340 + }, + { + "epoch": 824.671052631579, + "grad_norm": 0.9673534035682678, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 125350 + }, + { + "epoch": 824.7368421052631, + "grad_norm": 1.3695217370986938, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 125360 + }, + { + "epoch": 824.8026315789474, + "grad_norm": 1.1328647136688232, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 125370 + }, + { + "epoch": 824.8684210526316, + "grad_norm": 1.0601879358291626, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 125380 + }, + { + "epoch": 824.9342105263158, + "grad_norm": 0.9178951978683472, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 125390 + }, + { + "epoch": 825.0, + "grad_norm": 0.9925839900970459, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 125400 + }, + { + "epoch": 825.0657894736842, + "grad_norm": 1.2685902118682861, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 125410 + }, + { + "epoch": 825.1315789473684, + "grad_norm": 1.3963377475738525, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 125420 + }, + { + "epoch": 825.1973684210526, + "grad_norm": 1.2463805675506592, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 125430 + }, + { + "epoch": 825.2631578947369, + "grad_norm": 0.8058486580848694, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 125440 + }, + { + "epoch": 825.328947368421, + "grad_norm": 1.0413463115692139, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 125450 + }, + { + "epoch": 825.3947368421053, + "grad_norm": 1.3171190023422241, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 125460 + }, + { + "epoch": 825.4605263157895, + "grad_norm": 1.252544641494751, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 125470 + }, + { + "epoch": 825.5263157894736, + "grad_norm": 1.0242067575454712, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 125480 + }, + { + "epoch": 825.5921052631579, + "grad_norm": 0.9414500594139099, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 125490 + }, + { + "epoch": 825.6578947368421, + "grad_norm": 1.213117003440857, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 125500 + }, + { + "epoch": 825.7236842105264, + "grad_norm": 1.1076947450637817, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 125510 + }, + { + "epoch": 825.7894736842105, + "grad_norm": 1.1053651571273804, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 125520 + }, + { + "epoch": 825.8552631578947, + "grad_norm": 0.7674948573112488, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 125530 + }, + { + "epoch": 825.921052631579, + "grad_norm": 1.0354715585708618, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 125540 + }, + { + "epoch": 825.9868421052631, + "grad_norm": 0.8403087854385376, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 125550 + }, + { + "epoch": 826.0526315789474, + "grad_norm": 1.164014458656311, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 125560 + }, + { + "epoch": 826.1184210526316, + "grad_norm": 0.902004599571228, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 125570 + }, + { + "epoch": 826.1842105263158, + "grad_norm": 1.306944489479065, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 125580 + }, + { + "epoch": 826.25, + "grad_norm": 1.1703041791915894, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 125590 + }, + { + "epoch": 826.3157894736842, + "grad_norm": 1.309346079826355, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 125600 + }, + { + "epoch": 826.3815789473684, + "grad_norm": 0.9344452619552612, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 125610 + }, + { + "epoch": 826.4473684210526, + "grad_norm": 1.1551661491394043, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 125620 + }, + { + "epoch": 826.5131578947369, + "grad_norm": 0.8393580913543701, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 125630 + }, + { + "epoch": 826.578947368421, + "grad_norm": 1.1995357275009155, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 125640 + }, + { + "epoch": 826.6447368421053, + "grad_norm": 1.1376581192016602, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 125650 + }, + { + "epoch": 826.7105263157895, + "grad_norm": 1.103240966796875, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 125660 + }, + { + "epoch": 826.7763157894736, + "grad_norm": 0.8122648000717163, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 125670 + }, + { + "epoch": 826.8421052631579, + "grad_norm": 0.9855820536613464, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 125680 + }, + { + "epoch": 826.9078947368421, + "grad_norm": 1.3283075094223022, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 125690 + }, + { + "epoch": 826.9736842105264, + "grad_norm": 1.198071837425232, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 125700 + }, + { + "epoch": 827.0394736842105, + "grad_norm": 1.1746746301651, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 125710 + }, + { + "epoch": 827.1052631578947, + "grad_norm": 1.1474931240081787, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 125720 + }, + { + "epoch": 827.171052631579, + "grad_norm": 0.8816200494766235, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 125730 + }, + { + "epoch": 827.2368421052631, + "grad_norm": 1.0781453847885132, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 125740 + }, + { + "epoch": 827.3026315789474, + "grad_norm": 0.9956436157226562, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 125750 + }, + { + "epoch": 827.3684210526316, + "grad_norm": 0.9633864164352417, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 125760 + }, + { + "epoch": 827.4342105263158, + "grad_norm": 0.7504329085350037, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 125770 + }, + { + "epoch": 827.5, + "grad_norm": 0.7806046009063721, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 125780 + }, + { + "epoch": 827.5657894736842, + "grad_norm": 1.3514283895492554, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 125790 + }, + { + "epoch": 827.6315789473684, + "grad_norm": 0.7963207960128784, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 125800 + }, + { + "epoch": 827.6973684210526, + "grad_norm": 1.0604304075241089, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 125810 + }, + { + "epoch": 827.7631578947369, + "grad_norm": 1.343090295791626, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 125820 + }, + { + "epoch": 827.828947368421, + "grad_norm": 1.3664075136184692, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 125830 + }, + { + "epoch": 827.8947368421053, + "grad_norm": 1.1324230432510376, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 125840 + }, + { + "epoch": 827.9605263157895, + "grad_norm": 1.1731352806091309, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 125850 + }, + { + "epoch": 828.0263157894736, + "grad_norm": 1.25088369846344, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 125860 + }, + { + "epoch": 828.0921052631579, + "grad_norm": 0.9906530976295471, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 125870 + }, + { + "epoch": 828.1578947368421, + "grad_norm": 0.8870477676391602, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 125880 + }, + { + "epoch": 828.2236842105264, + "grad_norm": 0.904126763343811, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 125890 + }, + { + "epoch": 828.2894736842105, + "grad_norm": 1.1464385986328125, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 125900 + }, + { + "epoch": 828.3552631578947, + "grad_norm": 0.9195071458816528, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 125910 + }, + { + "epoch": 828.421052631579, + "grad_norm": 1.2309972047805786, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 125920 + }, + { + "epoch": 828.4868421052631, + "grad_norm": 0.7952999472618103, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 125930 + }, + { + "epoch": 828.5526315789474, + "grad_norm": 1.1572850942611694, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 125940 + }, + { + "epoch": 828.6184210526316, + "grad_norm": 1.0016225576400757, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 125950 + }, + { + "epoch": 828.6842105263158, + "grad_norm": 1.1131701469421387, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 125960 + }, + { + "epoch": 828.75, + "grad_norm": 1.075382113456726, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 125970 + }, + { + "epoch": 828.8157894736842, + "grad_norm": 1.0743154287338257, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 125980 + }, + { + "epoch": 828.8815789473684, + "grad_norm": 0.8774272203445435, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 125990 + }, + { + "epoch": 828.9473684210526, + "grad_norm": 0.7976047396659851, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 126000 + }, + { + "epoch": 829.0131578947369, + "grad_norm": 0.9983826875686646, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 126010 + }, + { + "epoch": 829.078947368421, + "grad_norm": 1.0128384828567505, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 126020 + }, + { + "epoch": 829.1447368421053, + "grad_norm": 1.2371277809143066, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 126030 + }, + { + "epoch": 829.2105263157895, + "grad_norm": 1.121740460395813, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 126040 + }, + { + "epoch": 829.2763157894736, + "grad_norm": 1.0720088481903076, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 126050 + }, + { + "epoch": 829.3421052631579, + "grad_norm": 0.8553463816642761, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 126060 + }, + { + "epoch": 829.4078947368421, + "grad_norm": 0.9494578838348389, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 126070 + }, + { + "epoch": 829.4736842105264, + "grad_norm": 0.7240245342254639, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 126080 + }, + { + "epoch": 829.5394736842105, + "grad_norm": 0.9537514448165894, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 126090 + }, + { + "epoch": 829.6052631578947, + "grad_norm": 0.9568378329277039, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 126100 + }, + { + "epoch": 829.671052631579, + "grad_norm": 1.081212043762207, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 126110 + }, + { + "epoch": 829.7368421052631, + "grad_norm": 0.7988075017929077, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 126120 + }, + { + "epoch": 829.8026315789474, + "grad_norm": 0.926447868347168, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 126130 + }, + { + "epoch": 829.8684210526316, + "grad_norm": 0.8456553816795349, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 126140 + }, + { + "epoch": 829.9342105263158, + "grad_norm": 0.9388708472251892, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 126150 + }, + { + "epoch": 830.0, + "grad_norm": 1.1751067638397217, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 126160 + }, + { + "epoch": 830.0657894736842, + "grad_norm": 0.8441213369369507, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 126170 + }, + { + "epoch": 830.1315789473684, + "grad_norm": 0.8216992020606995, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 126180 + }, + { + "epoch": 830.1973684210526, + "grad_norm": 1.0158005952835083, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 126190 + }, + { + "epoch": 830.2631578947369, + "grad_norm": 1.3386634588241577, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 126200 + }, + { + "epoch": 830.328947368421, + "grad_norm": 1.026985764503479, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 126210 + }, + { + "epoch": 830.3947368421053, + "grad_norm": 1.194299578666687, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 126220 + }, + { + "epoch": 830.4605263157895, + "grad_norm": 1.0776842832565308, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 126230 + }, + { + "epoch": 830.5263157894736, + "grad_norm": 1.1848527193069458, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 126240 + }, + { + "epoch": 830.5921052631579, + "grad_norm": 1.341431736946106, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 126250 + }, + { + "epoch": 830.6578947368421, + "grad_norm": 1.150313138961792, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 126260 + }, + { + "epoch": 830.7236842105264, + "grad_norm": 1.0594727993011475, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 126270 + }, + { + "epoch": 830.7894736842105, + "grad_norm": 0.9882242679595947, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 126280 + }, + { + "epoch": 830.8552631578947, + "grad_norm": 1.1118570566177368, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 126290 + }, + { + "epoch": 830.921052631579, + "grad_norm": 1.4360451698303223, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 126300 + }, + { + "epoch": 830.9868421052631, + "grad_norm": 1.0150532722473145, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 126310 + }, + { + "epoch": 831.0526315789474, + "grad_norm": 1.010431170463562, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 126320 + }, + { + "epoch": 831.1184210526316, + "grad_norm": 1.2884689569473267, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 126330 + }, + { + "epoch": 831.1842105263158, + "grad_norm": 1.212661623954773, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 126340 + }, + { + "epoch": 831.25, + "grad_norm": 1.3419666290283203, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 126350 + }, + { + "epoch": 831.3157894736842, + "grad_norm": 1.1499714851379395, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 126360 + }, + { + "epoch": 831.3815789473684, + "grad_norm": 1.0944184064865112, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 126370 + }, + { + "epoch": 831.4473684210526, + "grad_norm": 1.3042875528335571, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 126380 + }, + { + "epoch": 831.5131578947369, + "grad_norm": 1.3170181512832642, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 126390 + }, + { + "epoch": 831.578947368421, + "grad_norm": 1.0547919273376465, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 126400 + }, + { + "epoch": 831.6447368421053, + "grad_norm": 1.129002571105957, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 126410 + }, + { + "epoch": 831.7105263157895, + "grad_norm": 1.3356235027313232, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 126420 + }, + { + "epoch": 831.7763157894736, + "grad_norm": 1.2888990640640259, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 126430 + }, + { + "epoch": 831.8421052631579, + "grad_norm": 1.0148605108261108, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 126440 + }, + { + "epoch": 831.9078947368421, + "grad_norm": 0.9079254865646362, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 126450 + }, + { + "epoch": 831.9736842105264, + "grad_norm": 1.0899399518966675, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 126460 + }, + { + "epoch": 832.0394736842105, + "grad_norm": 0.915304958820343, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 126470 + }, + { + "epoch": 832.1052631578947, + "grad_norm": 1.0055623054504395, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 126480 + }, + { + "epoch": 832.171052631579, + "grad_norm": 0.8581055998802185, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 126490 + }, + { + "epoch": 832.2368421052631, + "grad_norm": 0.8408956527709961, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 126500 + }, + { + "epoch": 832.3026315789474, + "grad_norm": 0.7524006366729736, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 126510 + }, + { + "epoch": 832.3684210526316, + "grad_norm": 0.799748420715332, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 126520 + }, + { + "epoch": 832.4342105263158, + "grad_norm": 0.9387583136558533, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 126530 + }, + { + "epoch": 832.5, + "grad_norm": 0.9900626540184021, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 126540 + }, + { + "epoch": 832.5657894736842, + "grad_norm": 1.16420578956604, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 126550 + }, + { + "epoch": 832.6315789473684, + "grad_norm": 1.023986577987671, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 126560 + }, + { + "epoch": 832.6973684210526, + "grad_norm": 0.950329601764679, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 126570 + }, + { + "epoch": 832.7631578947369, + "grad_norm": 1.0447871685028076, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 126580 + }, + { + "epoch": 832.828947368421, + "grad_norm": 0.6059691905975342, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 126590 + }, + { + "epoch": 832.8947368421053, + "grad_norm": 1.1729786396026611, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 126600 + }, + { + "epoch": 832.9605263157895, + "grad_norm": 1.0164276361465454, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 126610 + }, + { + "epoch": 833.0263157894736, + "grad_norm": 1.1366938352584839, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 126620 + }, + { + "epoch": 833.0921052631579, + "grad_norm": 1.0589661598205566, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 126630 + }, + { + "epoch": 833.1578947368421, + "grad_norm": 0.9996610879898071, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 126640 + }, + { + "epoch": 833.2236842105264, + "grad_norm": 1.13376784324646, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 126650 + }, + { + "epoch": 833.2894736842105, + "grad_norm": 1.0717856884002686, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 126660 + }, + { + "epoch": 833.3552631578947, + "grad_norm": 0.7824698090553284, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 126670 + }, + { + "epoch": 833.421052631579, + "grad_norm": 1.0761967897415161, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 126680 + }, + { + "epoch": 833.4868421052631, + "grad_norm": 1.2236111164093018, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 126690 + }, + { + "epoch": 833.5526315789474, + "grad_norm": 1.143056035041809, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 126700 + }, + { + "epoch": 833.6184210526316, + "grad_norm": 1.0561918020248413, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 126710 + }, + { + "epoch": 833.6842105263158, + "grad_norm": 1.1367357969284058, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 126720 + }, + { + "epoch": 833.75, + "grad_norm": 0.7675701379776001, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 126730 + }, + { + "epoch": 833.8157894736842, + "grad_norm": 1.025897741317749, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 126740 + }, + { + "epoch": 833.8815789473684, + "grad_norm": 0.9761922359466553, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 126750 + }, + { + "epoch": 833.9473684210526, + "grad_norm": 0.9018344879150391, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 126760 + }, + { + "epoch": 834.0131578947369, + "grad_norm": 1.203495979309082, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 126770 + }, + { + "epoch": 834.078947368421, + "grad_norm": 1.1950404644012451, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 126780 + }, + { + "epoch": 834.1447368421053, + "grad_norm": 0.943814218044281, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 126790 + }, + { + "epoch": 834.2105263157895, + "grad_norm": 1.0648579597473145, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 126800 + }, + { + "epoch": 834.2763157894736, + "grad_norm": 1.0198975801467896, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 126810 + }, + { + "epoch": 834.3421052631579, + "grad_norm": 1.195588231086731, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 126820 + }, + { + "epoch": 834.4078947368421, + "grad_norm": 0.9909479022026062, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 126830 + }, + { + "epoch": 834.4736842105264, + "grad_norm": 0.8295359015464783, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 126840 + }, + { + "epoch": 834.5394736842105, + "grad_norm": 1.1163146495819092, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 126850 + }, + { + "epoch": 834.6052631578947, + "grad_norm": 0.9339954853057861, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 126860 + }, + { + "epoch": 834.671052631579, + "grad_norm": 1.0907495021820068, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 126870 + }, + { + "epoch": 834.7368421052631, + "grad_norm": 0.7249103784561157, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 126880 + }, + { + "epoch": 834.8026315789474, + "grad_norm": 0.9385533928871155, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 126890 + }, + { + "epoch": 834.8684210526316, + "grad_norm": 1.03643000125885, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 126900 + }, + { + "epoch": 834.9342105263158, + "grad_norm": 1.1739977598190308, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 126910 + }, + { + "epoch": 835.0, + "grad_norm": 0.7921239733695984, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 126920 + }, + { + "epoch": 835.0657894736842, + "grad_norm": 0.7429852485656738, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 126930 + }, + { + "epoch": 835.1315789473684, + "grad_norm": 0.8239948153495789, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 126940 + }, + { + "epoch": 835.1973684210526, + "grad_norm": 0.9873680472373962, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 126950 + }, + { + "epoch": 835.2631578947369, + "grad_norm": 0.8534329533576965, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 126960 + }, + { + "epoch": 835.328947368421, + "grad_norm": 1.498870849609375, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 126970 + }, + { + "epoch": 835.3947368421053, + "grad_norm": 0.9700299501419067, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 126980 + }, + { + "epoch": 835.4605263157895, + "grad_norm": 0.9935576915740967, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 126990 + }, + { + "epoch": 835.5263157894736, + "grad_norm": 1.1261264085769653, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 127000 + }, + { + "epoch": 835.5921052631579, + "grad_norm": 1.2722344398498535, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 127010 + }, + { + "epoch": 835.6578947368421, + "grad_norm": 1.2571996450424194, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 127020 + }, + { + "epoch": 835.7236842105264, + "grad_norm": 1.4331727027893066, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 127030 + }, + { + "epoch": 835.7894736842105, + "grad_norm": 1.5384089946746826, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 127040 + }, + { + "epoch": 835.8552631578947, + "grad_norm": 1.356110692024231, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 127050 + }, + { + "epoch": 835.921052631579, + "grad_norm": 0.8997991681098938, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 127060 + }, + { + "epoch": 835.9868421052631, + "grad_norm": 0.8456292152404785, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 127070 + }, + { + "epoch": 836.0526315789474, + "grad_norm": 1.1669671535491943, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 127080 + }, + { + "epoch": 836.1184210526316, + "grad_norm": 1.0110538005828857, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 127090 + }, + { + "epoch": 836.1842105263158, + "grad_norm": 1.2209850549697876, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 127100 + }, + { + "epoch": 836.25, + "grad_norm": 1.2250803709030151, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 127110 + }, + { + "epoch": 836.3157894736842, + "grad_norm": 0.9316526055335999, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 127120 + }, + { + "epoch": 836.3815789473684, + "grad_norm": 0.9690991044044495, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 127130 + }, + { + "epoch": 836.4473684210526, + "grad_norm": 0.7517539858818054, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 127140 + }, + { + "epoch": 836.5131578947369, + "grad_norm": 1.0882537364959717, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 127150 + }, + { + "epoch": 836.578947368421, + "grad_norm": 1.2422233819961548, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 127160 + }, + { + "epoch": 836.6447368421053, + "grad_norm": 1.1308391094207764, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 127170 + }, + { + "epoch": 836.7105263157895, + "grad_norm": 1.181129813194275, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 127180 + }, + { + "epoch": 836.7763157894736, + "grad_norm": 1.0255203247070312, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 127190 + }, + { + "epoch": 836.8421052631579, + "grad_norm": 1.0835888385772705, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 127200 + }, + { + "epoch": 836.9078947368421, + "grad_norm": 1.4097410440444946, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 127210 + }, + { + "epoch": 836.9736842105264, + "grad_norm": 0.8870857954025269, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 127220 + }, + { + "epoch": 837.0394736842105, + "grad_norm": 0.9946476221084595, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 127230 + }, + { + "epoch": 837.1052631578947, + "grad_norm": 1.268383264541626, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 127240 + }, + { + "epoch": 837.171052631579, + "grad_norm": 1.1119439601898193, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 127250 + }, + { + "epoch": 837.2368421052631, + "grad_norm": 1.3202515840530396, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 127260 + }, + { + "epoch": 837.3026315789474, + "grad_norm": 1.216822624206543, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 127270 + }, + { + "epoch": 837.3684210526316, + "grad_norm": 1.5065245628356934, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 127280 + }, + { + "epoch": 837.4342105263158, + "grad_norm": 1.358711838722229, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 127290 + }, + { + "epoch": 837.5, + "grad_norm": 1.0266478061676025, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 127300 + }, + { + "epoch": 837.5657894736842, + "grad_norm": 1.2015262842178345, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 127310 + }, + { + "epoch": 837.6315789473684, + "grad_norm": 0.8070095777511597, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 127320 + }, + { + "epoch": 837.6973684210526, + "grad_norm": 1.3548043966293335, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 127330 + }, + { + "epoch": 837.7631578947369, + "grad_norm": 1.2841241359710693, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 127340 + }, + { + "epoch": 837.828947368421, + "grad_norm": 1.2051345109939575, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 127350 + }, + { + "epoch": 837.8947368421053, + "grad_norm": 1.303202748298645, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 127360 + }, + { + "epoch": 837.9605263157895, + "grad_norm": 0.9011682271957397, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 127370 + }, + { + "epoch": 838.0263157894736, + "grad_norm": 1.1267516613006592, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 127380 + }, + { + "epoch": 838.0921052631579, + "grad_norm": 1.045426368713379, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 127390 + }, + { + "epoch": 838.1578947368421, + "grad_norm": 1.0214307308197021, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 127400 + }, + { + "epoch": 838.2236842105264, + "grad_norm": 1.4649877548217773, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 127410 + }, + { + "epoch": 838.2894736842105, + "grad_norm": 1.314589262008667, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 127420 + }, + { + "epoch": 838.3552631578947, + "grad_norm": 1.1437108516693115, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 127430 + }, + { + "epoch": 838.421052631579, + "grad_norm": 1.1604773998260498, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 127440 + }, + { + "epoch": 838.4868421052631, + "grad_norm": 1.3208593130111694, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 127450 + }, + { + "epoch": 838.5526315789474, + "grad_norm": 1.03213369846344, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 127460 + }, + { + "epoch": 838.6184210526316, + "grad_norm": 1.0011709928512573, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 127470 + }, + { + "epoch": 838.6842105263158, + "grad_norm": 1.0269711017608643, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 127480 + }, + { + "epoch": 838.75, + "grad_norm": 1.070455551147461, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 127490 + }, + { + "epoch": 838.8157894736842, + "grad_norm": 0.789607584476471, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 127500 + }, + { + "epoch": 838.8815789473684, + "grad_norm": 0.8468894958496094, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 127510 + }, + { + "epoch": 838.9473684210526, + "grad_norm": 0.7091095447540283, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 127520 + }, + { + "epoch": 839.0131578947369, + "grad_norm": 1.0333023071289062, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 127530 + }, + { + "epoch": 839.078947368421, + "grad_norm": 1.031185269355774, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 127540 + }, + { + "epoch": 839.1447368421053, + "grad_norm": 0.9632649421691895, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 127550 + }, + { + "epoch": 839.2105263157895, + "grad_norm": 0.7596598863601685, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 127560 + }, + { + "epoch": 839.2763157894736, + "grad_norm": 0.7665521502494812, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 127570 + }, + { + "epoch": 839.3421052631579, + "grad_norm": 0.8565236926078796, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 127580 + }, + { + "epoch": 839.4078947368421, + "grad_norm": 1.3483563661575317, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 127590 + }, + { + "epoch": 839.4736842105264, + "grad_norm": 1.4640027284622192, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 127600 + }, + { + "epoch": 839.5394736842105, + "grad_norm": 1.413806438446045, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 127610 + }, + { + "epoch": 839.6052631578947, + "grad_norm": 1.3078571557998657, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 127620 + }, + { + "epoch": 839.671052631579, + "grad_norm": 0.8377428650856018, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 127630 + }, + { + "epoch": 839.7368421052631, + "grad_norm": 1.2343833446502686, + "learning_rate": 0.0001, + "loss": 0.0142, + "step": 127640 + }, + { + "epoch": 839.8026315789474, + "grad_norm": 0.7730600833892822, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 127650 + }, + { + "epoch": 839.8684210526316, + "grad_norm": 0.9938012957572937, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 127660 + }, + { + "epoch": 839.9342105263158, + "grad_norm": 0.7523202896118164, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 127670 + }, + { + "epoch": 840.0, + "grad_norm": 0.7400522828102112, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 127680 + }, + { + "epoch": 840.0657894736842, + "grad_norm": 0.9633582234382629, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 127690 + }, + { + "epoch": 840.1315789473684, + "grad_norm": 1.1232542991638184, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 127700 + }, + { + "epoch": 840.1973684210526, + "grad_norm": 1.1191346645355225, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 127710 + }, + { + "epoch": 840.2631578947369, + "grad_norm": 1.43672513961792, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 127720 + }, + { + "epoch": 840.328947368421, + "grad_norm": 0.8802378177642822, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 127730 + }, + { + "epoch": 840.3947368421053, + "grad_norm": 1.0515570640563965, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 127740 + }, + { + "epoch": 840.4605263157895, + "grad_norm": 0.9687642455101013, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 127750 + }, + { + "epoch": 840.5263157894736, + "grad_norm": 0.8646338582038879, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 127760 + }, + { + "epoch": 840.5921052631579, + "grad_norm": 0.8392863869667053, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 127770 + }, + { + "epoch": 840.6578947368421, + "grad_norm": 0.8507763147354126, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 127780 + }, + { + "epoch": 840.7236842105264, + "grad_norm": 0.9287623167037964, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 127790 + }, + { + "epoch": 840.7894736842105, + "grad_norm": 0.7224423289299011, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 127800 + }, + { + "epoch": 840.8552631578947, + "grad_norm": 0.9184572100639343, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 127810 + }, + { + "epoch": 840.921052631579, + "grad_norm": 1.360131859779358, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 127820 + }, + { + "epoch": 840.9868421052631, + "grad_norm": 1.099462628364563, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 127830 + }, + { + "epoch": 841.0526315789474, + "grad_norm": 1.0513808727264404, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 127840 + }, + { + "epoch": 841.1184210526316, + "grad_norm": 1.0300471782684326, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 127850 + }, + { + "epoch": 841.1842105263158, + "grad_norm": 1.2935221195220947, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 127860 + }, + { + "epoch": 841.25, + "grad_norm": 1.2731430530548096, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 127870 + }, + { + "epoch": 841.3157894736842, + "grad_norm": 0.963191032409668, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 127880 + }, + { + "epoch": 841.3815789473684, + "grad_norm": 1.1909409761428833, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 127890 + }, + { + "epoch": 841.4473684210526, + "grad_norm": 1.1539998054504395, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 127900 + }, + { + "epoch": 841.5131578947369, + "grad_norm": 1.3037039041519165, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 127910 + }, + { + "epoch": 841.578947368421, + "grad_norm": 1.0298134088516235, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 127920 + }, + { + "epoch": 841.6447368421053, + "grad_norm": 0.9822731018066406, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 127930 + }, + { + "epoch": 841.7105263157895, + "grad_norm": 0.9702892303466797, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 127940 + }, + { + "epoch": 841.7763157894736, + "grad_norm": 0.8605859279632568, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 127950 + }, + { + "epoch": 841.8421052631579, + "grad_norm": 1.282814383506775, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 127960 + }, + { + "epoch": 841.9078947368421, + "grad_norm": 0.9928876161575317, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 127970 + }, + { + "epoch": 841.9736842105264, + "grad_norm": 1.6558021306991577, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 127980 + }, + { + "epoch": 842.0394736842105, + "grad_norm": 0.8925629258155823, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 127990 + }, + { + "epoch": 842.1052631578947, + "grad_norm": 0.8495152592658997, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 128000 + }, + { + "epoch": 842.171052631579, + "grad_norm": 0.8504860997200012, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 128010 + }, + { + "epoch": 842.2368421052631, + "grad_norm": 1.1367663145065308, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 128020 + }, + { + "epoch": 842.3026315789474, + "grad_norm": 0.9067413210868835, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 128030 + }, + { + "epoch": 842.3684210526316, + "grad_norm": 1.0044593811035156, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 128040 + }, + { + "epoch": 842.4342105263158, + "grad_norm": 1.2345927953720093, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 128050 + }, + { + "epoch": 842.5, + "grad_norm": 1.3313305377960205, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 128060 + }, + { + "epoch": 842.5657894736842, + "grad_norm": 1.3206311464309692, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 128070 + }, + { + "epoch": 842.6315789473684, + "grad_norm": 0.9550430774688721, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 128080 + }, + { + "epoch": 842.6973684210526, + "grad_norm": 1.156746745109558, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 128090 + }, + { + "epoch": 842.7631578947369, + "grad_norm": 1.0178402662277222, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 128100 + }, + { + "epoch": 842.828947368421, + "grad_norm": 1.0084766149520874, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 128110 + }, + { + "epoch": 842.8947368421053, + "grad_norm": 1.0070236921310425, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 128120 + }, + { + "epoch": 842.9605263157895, + "grad_norm": 0.9018028378486633, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 128130 + }, + { + "epoch": 843.0263157894736, + "grad_norm": 0.7280605435371399, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 128140 + }, + { + "epoch": 843.0921052631579, + "grad_norm": 1.2799079418182373, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 128150 + }, + { + "epoch": 843.1578947368421, + "grad_norm": 1.1699326038360596, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 128160 + }, + { + "epoch": 843.2236842105264, + "grad_norm": 1.3281631469726562, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 128170 + }, + { + "epoch": 843.2894736842105, + "grad_norm": 1.5978515148162842, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 128180 + }, + { + "epoch": 843.3552631578947, + "grad_norm": 1.2502210140228271, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 128190 + }, + { + "epoch": 843.421052631579, + "grad_norm": 1.4333372116088867, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 128200 + }, + { + "epoch": 843.4868421052631, + "grad_norm": 0.8813333511352539, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 128210 + }, + { + "epoch": 843.5526315789474, + "grad_norm": 1.101881980895996, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 128220 + }, + { + "epoch": 843.6184210526316, + "grad_norm": 0.9753175377845764, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 128230 + }, + { + "epoch": 843.6842105263158, + "grad_norm": 1.0207269191741943, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 128240 + }, + { + "epoch": 843.75, + "grad_norm": 0.8684105277061462, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 128250 + }, + { + "epoch": 843.8157894736842, + "grad_norm": 0.8891376852989197, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 128260 + }, + { + "epoch": 843.8815789473684, + "grad_norm": 0.7383885979652405, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 128270 + }, + { + "epoch": 843.9473684210526, + "grad_norm": 0.9492682814598083, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 128280 + }, + { + "epoch": 844.0131578947369, + "grad_norm": 0.815500020980835, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 128290 + }, + { + "epoch": 844.078947368421, + "grad_norm": 0.8752801418304443, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 128300 + }, + { + "epoch": 844.1447368421053, + "grad_norm": 1.1680850982666016, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 128310 + }, + { + "epoch": 844.2105263157895, + "grad_norm": 1.0762124061584473, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 128320 + }, + { + "epoch": 844.2763157894736, + "grad_norm": 1.2257249355316162, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 128330 + }, + { + "epoch": 844.3421052631579, + "grad_norm": 1.2510292530059814, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 128340 + }, + { + "epoch": 844.4078947368421, + "grad_norm": 1.0363690853118896, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 128350 + }, + { + "epoch": 844.4736842105264, + "grad_norm": 1.1200023889541626, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 128360 + }, + { + "epoch": 844.5394736842105, + "grad_norm": 1.0740878582000732, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 128370 + }, + { + "epoch": 844.6052631578947, + "grad_norm": 1.1896917819976807, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 128380 + }, + { + "epoch": 844.671052631579, + "grad_norm": 1.0779705047607422, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 128390 + }, + { + "epoch": 844.7368421052631, + "grad_norm": 0.555634617805481, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 128400 + }, + { + "epoch": 844.8026315789474, + "grad_norm": 0.9556017518043518, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 128410 + }, + { + "epoch": 844.8684210526316, + "grad_norm": 0.8419582843780518, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 128420 + }, + { + "epoch": 844.9342105263158, + "grad_norm": 1.0701279640197754, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 128430 + }, + { + "epoch": 845.0, + "grad_norm": 1.3151577711105347, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 128440 + }, + { + "epoch": 845.0657894736842, + "grad_norm": 1.086671233177185, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 128450 + }, + { + "epoch": 845.1315789473684, + "grad_norm": 1.2113654613494873, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 128460 + }, + { + "epoch": 845.1973684210526, + "grad_norm": 1.2643672227859497, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 128470 + }, + { + "epoch": 845.2631578947369, + "grad_norm": 1.0714949369430542, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 128480 + }, + { + "epoch": 845.328947368421, + "grad_norm": 0.7351527214050293, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 128490 + }, + { + "epoch": 845.3947368421053, + "grad_norm": 1.0877636671066284, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 128500 + }, + { + "epoch": 845.4605263157895, + "grad_norm": 0.6363227963447571, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 128510 + }, + { + "epoch": 845.5263157894736, + "grad_norm": 0.7102157473564148, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 128520 + }, + { + "epoch": 845.5921052631579, + "grad_norm": 0.9936819076538086, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 128530 + }, + { + "epoch": 845.6578947368421, + "grad_norm": 0.7047979831695557, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 128540 + }, + { + "epoch": 845.7236842105264, + "grad_norm": 1.017305850982666, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 128550 + }, + { + "epoch": 845.7894736842105, + "grad_norm": 1.3176766633987427, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 128560 + }, + { + "epoch": 845.8552631578947, + "grad_norm": 1.1096950769424438, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 128570 + }, + { + "epoch": 845.921052631579, + "grad_norm": 1.1976255178451538, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 128580 + }, + { + "epoch": 845.9868421052631, + "grad_norm": 1.0794117450714111, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 128590 + }, + { + "epoch": 846.0526315789474, + "grad_norm": 0.8604280948638916, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 128600 + }, + { + "epoch": 846.1184210526316, + "grad_norm": 0.8841588497161865, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 128610 + }, + { + "epoch": 846.1842105263158, + "grad_norm": 0.9039338827133179, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 128620 + }, + { + "epoch": 846.25, + "grad_norm": 0.7028020024299622, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 128630 + }, + { + "epoch": 846.3157894736842, + "grad_norm": 1.018131136894226, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 128640 + }, + { + "epoch": 846.3815789473684, + "grad_norm": 1.4011059999465942, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 128650 + }, + { + "epoch": 846.4473684210526, + "grad_norm": 1.176498293876648, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 128660 + }, + { + "epoch": 846.5131578947369, + "grad_norm": 1.150488257408142, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 128670 + }, + { + "epoch": 846.578947368421, + "grad_norm": 1.1439628601074219, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 128680 + }, + { + "epoch": 846.6447368421053, + "grad_norm": 1.0313776731491089, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 128690 + }, + { + "epoch": 846.7105263157895, + "grad_norm": 0.8566111922264099, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 128700 + }, + { + "epoch": 846.7763157894736, + "grad_norm": 1.240422010421753, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 128710 + }, + { + "epoch": 846.8421052631579, + "grad_norm": 0.8956599831581116, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 128720 + }, + { + "epoch": 846.9078947368421, + "grad_norm": 1.075857400894165, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 128730 + }, + { + "epoch": 846.9736842105264, + "grad_norm": 1.1086944341659546, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 128740 + }, + { + "epoch": 847.0394736842105, + "grad_norm": 0.7189145684242249, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 128750 + }, + { + "epoch": 847.1052631578947, + "grad_norm": 1.0675982236862183, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 128760 + }, + { + "epoch": 847.171052631579, + "grad_norm": 1.0329749584197998, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 128770 + }, + { + "epoch": 847.2368421052631, + "grad_norm": 0.8450028896331787, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 128780 + }, + { + "epoch": 847.3026315789474, + "grad_norm": 1.0242254734039307, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 128790 + }, + { + "epoch": 847.3684210526316, + "grad_norm": 1.0955355167388916, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 128800 + }, + { + "epoch": 847.4342105263158, + "grad_norm": 0.9389364123344421, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 128810 + }, + { + "epoch": 847.5, + "grad_norm": 0.7814875245094299, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 128820 + }, + { + "epoch": 847.5657894736842, + "grad_norm": 1.0729169845581055, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 128830 + }, + { + "epoch": 847.6315789473684, + "grad_norm": 1.7011371850967407, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 128840 + }, + { + "epoch": 847.6973684210526, + "grad_norm": 1.1857264041900635, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 128850 + }, + { + "epoch": 847.7631578947369, + "grad_norm": 1.284214973449707, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 128860 + }, + { + "epoch": 847.828947368421, + "grad_norm": 1.111433982849121, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 128870 + }, + { + "epoch": 847.8947368421053, + "grad_norm": 0.7958429455757141, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 128880 + }, + { + "epoch": 847.9605263157895, + "grad_norm": 0.8999338150024414, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 128890 + }, + { + "epoch": 848.0263157894736, + "grad_norm": 1.1462279558181763, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 128900 + }, + { + "epoch": 848.0921052631579, + "grad_norm": 1.2270578145980835, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 128910 + }, + { + "epoch": 848.1578947368421, + "grad_norm": 1.2196954488754272, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 128920 + }, + { + "epoch": 848.2236842105264, + "grad_norm": 1.0865422487258911, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 128930 + }, + { + "epoch": 848.2894736842105, + "grad_norm": 0.8074581623077393, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 128940 + }, + { + "epoch": 848.3552631578947, + "grad_norm": 1.2923625707626343, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 128950 + }, + { + "epoch": 848.421052631579, + "grad_norm": 1.020020842552185, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 128960 + }, + { + "epoch": 848.4868421052631, + "grad_norm": 0.9365369081497192, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 128970 + }, + { + "epoch": 848.5526315789474, + "grad_norm": 1.216340184211731, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 128980 + }, + { + "epoch": 848.6184210526316, + "grad_norm": 1.2271922826766968, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 128990 + }, + { + "epoch": 848.6842105263158, + "grad_norm": 1.1109647750854492, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 129000 + }, + { + "epoch": 848.75, + "grad_norm": 1.0088245868682861, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 129010 + }, + { + "epoch": 848.8157894736842, + "grad_norm": 1.1272104978561401, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 129020 + }, + { + "epoch": 848.8815789473684, + "grad_norm": 1.0856529474258423, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 129030 + }, + { + "epoch": 848.9473684210526, + "grad_norm": 0.8239693641662598, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 129040 + }, + { + "epoch": 849.0131578947369, + "grad_norm": 1.0656987428665161, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 129050 + }, + { + "epoch": 849.078947368421, + "grad_norm": 1.2168710231781006, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 129060 + }, + { + "epoch": 849.1447368421053, + "grad_norm": 1.1486916542053223, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 129070 + }, + { + "epoch": 849.2105263157895, + "grad_norm": 1.0890756845474243, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 129080 + }, + { + "epoch": 849.2763157894736, + "grad_norm": 1.3131881952285767, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 129090 + }, + { + "epoch": 849.3421052631579, + "grad_norm": 0.8249554634094238, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 129100 + }, + { + "epoch": 849.4078947368421, + "grad_norm": 0.9621846079826355, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 129110 + }, + { + "epoch": 849.4736842105264, + "grad_norm": 0.8949623703956604, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 129120 + }, + { + "epoch": 849.5394736842105, + "grad_norm": 0.9129248261451721, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 129130 + }, + { + "epoch": 849.6052631578947, + "grad_norm": 1.3539313077926636, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 129140 + }, + { + "epoch": 849.671052631579, + "grad_norm": 1.445449948310852, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 129150 + }, + { + "epoch": 849.7368421052631, + "grad_norm": 1.0846349000930786, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 129160 + }, + { + "epoch": 849.8026315789474, + "grad_norm": 1.1930545568466187, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 129170 + }, + { + "epoch": 849.8684210526316, + "grad_norm": 1.0064821243286133, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 129180 + }, + { + "epoch": 849.9342105263158, + "grad_norm": 0.9630594849586487, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 129190 + }, + { + "epoch": 850.0, + "grad_norm": 1.0653307437896729, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 129200 + }, + { + "epoch": 850.0657894736842, + "grad_norm": 0.947507381439209, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 129210 + }, + { + "epoch": 850.1315789473684, + "grad_norm": 0.9130157232284546, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 129220 + }, + { + "epoch": 850.1973684210526, + "grad_norm": 0.8457781076431274, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 129230 + }, + { + "epoch": 850.2631578947369, + "grad_norm": 0.7123493552207947, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 129240 + }, + { + "epoch": 850.328947368421, + "grad_norm": 0.9977707862854004, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 129250 + }, + { + "epoch": 850.3947368421053, + "grad_norm": 0.7980003356933594, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 129260 + }, + { + "epoch": 850.4605263157895, + "grad_norm": 1.2628130912780762, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 129270 + }, + { + "epoch": 850.5263157894736, + "grad_norm": 1.2784351110458374, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 129280 + }, + { + "epoch": 850.5921052631579, + "grad_norm": 0.8135380148887634, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 129290 + }, + { + "epoch": 850.6578947368421, + "grad_norm": 1.2080432176589966, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 129300 + }, + { + "epoch": 850.7236842105264, + "grad_norm": 1.1570539474487305, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 129310 + }, + { + "epoch": 850.7894736842105, + "grad_norm": 0.8722181916236877, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 129320 + }, + { + "epoch": 850.8552631578947, + "grad_norm": 0.9665051698684692, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 129330 + }, + { + "epoch": 850.921052631579, + "grad_norm": 1.156315565109253, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 129340 + }, + { + "epoch": 850.9868421052631, + "grad_norm": 1.3095405101776123, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 129350 + }, + { + "epoch": 851.0526315789474, + "grad_norm": 1.2862575054168701, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 129360 + }, + { + "epoch": 851.1184210526316, + "grad_norm": 1.2178730964660645, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 129370 + }, + { + "epoch": 851.1842105263158, + "grad_norm": 1.331790804862976, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 129380 + }, + { + "epoch": 851.25, + "grad_norm": 1.1525365114212036, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 129390 + }, + { + "epoch": 851.3157894736842, + "grad_norm": 1.2628202438354492, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 129400 + }, + { + "epoch": 851.3815789473684, + "grad_norm": 0.9051490426063538, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 129410 + }, + { + "epoch": 851.4473684210526, + "grad_norm": 0.9689465165138245, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 129420 + }, + { + "epoch": 851.5131578947369, + "grad_norm": 0.8615492582321167, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 129430 + }, + { + "epoch": 851.578947368421, + "grad_norm": 0.9767460823059082, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 129440 + }, + { + "epoch": 851.6447368421053, + "grad_norm": 0.9622799158096313, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 129450 + }, + { + "epoch": 851.7105263157895, + "grad_norm": 1.0682460069656372, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 129460 + }, + { + "epoch": 851.7763157894736, + "grad_norm": 0.5867149233818054, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 129470 + }, + { + "epoch": 851.8421052631579, + "grad_norm": 0.9323984384536743, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 129480 + }, + { + "epoch": 851.9078947368421, + "grad_norm": 1.0186138153076172, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 129490 + }, + { + "epoch": 851.9736842105264, + "grad_norm": 1.1094682216644287, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 129500 + }, + { + "epoch": 852.0394736842105, + "grad_norm": 1.0459119081497192, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 129510 + }, + { + "epoch": 852.1052631578947, + "grad_norm": 0.9179003238677979, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 129520 + }, + { + "epoch": 852.171052631579, + "grad_norm": 1.0056957006454468, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 129530 + }, + { + "epoch": 852.2368421052631, + "grad_norm": 0.6981986165046692, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 129540 + }, + { + "epoch": 852.3026315789474, + "grad_norm": 1.2573353052139282, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 129550 + }, + { + "epoch": 852.3684210526316, + "grad_norm": 0.9952992796897888, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 129560 + }, + { + "epoch": 852.4342105263158, + "grad_norm": 0.9949916005134583, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 129570 + }, + { + "epoch": 852.5, + "grad_norm": 1.2157580852508545, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 129580 + }, + { + "epoch": 852.5657894736842, + "grad_norm": 1.0513194799423218, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 129590 + }, + { + "epoch": 852.6315789473684, + "grad_norm": 1.3504682779312134, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 129600 + }, + { + "epoch": 852.6973684210526, + "grad_norm": 0.9804072380065918, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 129610 + }, + { + "epoch": 852.7631578947369, + "grad_norm": 1.3309262990951538, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 129620 + }, + { + "epoch": 852.828947368421, + "grad_norm": 1.3208234310150146, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 129630 + }, + { + "epoch": 852.8947368421053, + "grad_norm": 1.8901704549789429, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 129640 + }, + { + "epoch": 852.9605263157895, + "grad_norm": 1.7452239990234375, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 129650 + }, + { + "epoch": 853.0263157894736, + "grad_norm": 1.2675105333328247, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 129660 + }, + { + "epoch": 853.0921052631579, + "grad_norm": 0.9307915568351746, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 129670 + }, + { + "epoch": 853.1578947368421, + "grad_norm": 1.136796236038208, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 129680 + }, + { + "epoch": 853.2236842105264, + "grad_norm": 1.2172702550888062, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 129690 + }, + { + "epoch": 853.2894736842105, + "grad_norm": 1.3970834016799927, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 129700 + }, + { + "epoch": 853.3552631578947, + "grad_norm": 1.3139710426330566, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 129710 + }, + { + "epoch": 853.421052631579, + "grad_norm": 1.085668683052063, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 129720 + }, + { + "epoch": 853.4868421052631, + "grad_norm": 1.054573655128479, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 129730 + }, + { + "epoch": 853.5526315789474, + "grad_norm": 1.22200345993042, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 129740 + }, + { + "epoch": 853.6184210526316, + "grad_norm": 1.0596305131912231, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 129750 + }, + { + "epoch": 853.6842105263158, + "grad_norm": 1.0495727062225342, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 129760 + }, + { + "epoch": 853.75, + "grad_norm": 1.4085642099380493, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 129770 + }, + { + "epoch": 853.8157894736842, + "grad_norm": 1.1661970615386963, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 129780 + }, + { + "epoch": 853.8815789473684, + "grad_norm": 1.3750501871109009, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 129790 + }, + { + "epoch": 853.9473684210526, + "grad_norm": 1.349413275718689, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 129800 + }, + { + "epoch": 854.0131578947369, + "grad_norm": 1.1775285005569458, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 129810 + }, + { + "epoch": 854.078947368421, + "grad_norm": 1.127249836921692, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 129820 + }, + { + "epoch": 854.1447368421053, + "grad_norm": 1.1303577423095703, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 129830 + }, + { + "epoch": 854.2105263157895, + "grad_norm": 1.0692039728164673, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 129840 + }, + { + "epoch": 854.2763157894736, + "grad_norm": 1.2041025161743164, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 129850 + }, + { + "epoch": 854.3421052631579, + "grad_norm": 0.8241481184959412, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 129860 + }, + { + "epoch": 854.4078947368421, + "grad_norm": 0.9176292419433594, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 129870 + }, + { + "epoch": 854.4736842105264, + "grad_norm": 1.2800862789154053, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 129880 + }, + { + "epoch": 854.5394736842105, + "grad_norm": 0.7797620892524719, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 129890 + }, + { + "epoch": 854.6052631578947, + "grad_norm": 1.4259511232376099, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 129900 + }, + { + "epoch": 854.671052631579, + "grad_norm": 1.1848866939544678, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 129910 + }, + { + "epoch": 854.7368421052631, + "grad_norm": 0.8558827042579651, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 129920 + }, + { + "epoch": 854.8026315789474, + "grad_norm": 0.9844433069229126, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 129930 + }, + { + "epoch": 854.8684210526316, + "grad_norm": 1.2203624248504639, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 129940 + }, + { + "epoch": 854.9342105263158, + "grad_norm": 1.2477107048034668, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 129950 + }, + { + "epoch": 855.0, + "grad_norm": 1.1950006484985352, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 129960 + }, + { + "epoch": 855.0657894736842, + "grad_norm": 0.8927521109580994, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 129970 + }, + { + "epoch": 855.1315789473684, + "grad_norm": 1.3346248865127563, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 129980 + }, + { + "epoch": 855.1973684210526, + "grad_norm": 1.2857704162597656, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 129990 + }, + { + "epoch": 855.2631578947369, + "grad_norm": 1.3358838558197021, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 130000 + }, + { + "epoch": 855.328947368421, + "grad_norm": 1.1128495931625366, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 130010 + }, + { + "epoch": 855.3947368421053, + "grad_norm": 1.34784734249115, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 130020 + }, + { + "epoch": 855.4605263157895, + "grad_norm": 1.322873592376709, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 130030 + }, + { + "epoch": 855.5263157894736, + "grad_norm": 0.7924045324325562, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 130040 + }, + { + "epoch": 855.5921052631579, + "grad_norm": 0.9343613982200623, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 130050 + }, + { + "epoch": 855.6578947368421, + "grad_norm": 0.847163736820221, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 130060 + }, + { + "epoch": 855.7236842105264, + "grad_norm": 0.8668592572212219, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 130070 + }, + { + "epoch": 855.7894736842105, + "grad_norm": 1.1902350187301636, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 130080 + }, + { + "epoch": 855.8552631578947, + "grad_norm": 0.9959059357643127, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 130090 + }, + { + "epoch": 855.921052631579, + "grad_norm": 0.9965035319328308, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 130100 + }, + { + "epoch": 855.9868421052631, + "grad_norm": 0.8111009001731873, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 130110 + }, + { + "epoch": 856.0526315789474, + "grad_norm": 0.8355821967124939, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 130120 + }, + { + "epoch": 856.1184210526316, + "grad_norm": 0.9395269155502319, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 130130 + }, + { + "epoch": 856.1842105263158, + "grad_norm": 0.7983924150466919, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 130140 + }, + { + "epoch": 856.25, + "grad_norm": 1.0904862880706787, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 130150 + }, + { + "epoch": 856.3157894736842, + "grad_norm": 0.8742513656616211, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 130160 + }, + { + "epoch": 856.3815789473684, + "grad_norm": 1.308323860168457, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 130170 + }, + { + "epoch": 856.4473684210526, + "grad_norm": 0.660823404788971, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 130180 + }, + { + "epoch": 856.5131578947369, + "grad_norm": 0.7933557033538818, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 130190 + }, + { + "epoch": 856.578947368421, + "grad_norm": 1.5122641324996948, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 130200 + }, + { + "epoch": 856.6447368421053, + "grad_norm": 1.1528301239013672, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 130210 + }, + { + "epoch": 856.7105263157895, + "grad_norm": 0.9372194409370422, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 130220 + }, + { + "epoch": 856.7763157894736, + "grad_norm": 1.312410831451416, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 130230 + }, + { + "epoch": 856.8421052631579, + "grad_norm": 0.9367921948432922, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 130240 + }, + { + "epoch": 856.9078947368421, + "grad_norm": 1.0946214199066162, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 130250 + }, + { + "epoch": 856.9736842105264, + "grad_norm": 1.2462278604507446, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 130260 + }, + { + "epoch": 857.0394736842105, + "grad_norm": 0.9840075373649597, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 130270 + }, + { + "epoch": 857.1052631578947, + "grad_norm": 1.2592564821243286, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 130280 + }, + { + "epoch": 857.171052631579, + "grad_norm": 1.078933835029602, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 130290 + }, + { + "epoch": 857.2368421052631, + "grad_norm": 1.3728272914886475, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 130300 + }, + { + "epoch": 857.3026315789474, + "grad_norm": 1.05517578125, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 130310 + }, + { + "epoch": 857.3684210526316, + "grad_norm": 1.3198639154434204, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 130320 + }, + { + "epoch": 857.4342105263158, + "grad_norm": 1.3628894090652466, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 130330 + }, + { + "epoch": 857.5, + "grad_norm": 1.2717257738113403, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 130340 + }, + { + "epoch": 857.5657894736842, + "grad_norm": 1.429996371269226, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 130350 + }, + { + "epoch": 857.6315789473684, + "grad_norm": 0.9314458966255188, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 130360 + }, + { + "epoch": 857.6973684210526, + "grad_norm": 1.0293179750442505, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 130370 + }, + { + "epoch": 857.7631578947369, + "grad_norm": 0.7930873036384583, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 130380 + }, + { + "epoch": 857.828947368421, + "grad_norm": 1.0124858617782593, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 130390 + }, + { + "epoch": 857.8947368421053, + "grad_norm": 0.6720251441001892, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 130400 + }, + { + "epoch": 857.9605263157895, + "grad_norm": 1.2420949935913086, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 130410 + }, + { + "epoch": 858.0263157894736, + "grad_norm": 1.295690655708313, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 130420 + }, + { + "epoch": 858.0921052631579, + "grad_norm": 0.9636945128440857, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 130430 + }, + { + "epoch": 858.1578947368421, + "grad_norm": 0.9246024489402771, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 130440 + }, + { + "epoch": 858.2236842105264, + "grad_norm": 1.116898536682129, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 130450 + }, + { + "epoch": 858.2894736842105, + "grad_norm": 1.295310378074646, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 130460 + }, + { + "epoch": 858.3552631578947, + "grad_norm": 1.0627254247665405, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 130470 + }, + { + "epoch": 858.421052631579, + "grad_norm": 0.8008264899253845, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 130480 + }, + { + "epoch": 858.4868421052631, + "grad_norm": 1.239762783050537, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 130490 + }, + { + "epoch": 858.5526315789474, + "grad_norm": 0.9881368279457092, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 130500 + }, + { + "epoch": 858.6184210526316, + "grad_norm": 1.0925045013427734, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 130510 + }, + { + "epoch": 858.6842105263158, + "grad_norm": 0.9376081824302673, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 130520 + }, + { + "epoch": 858.75, + "grad_norm": 0.8281996250152588, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 130530 + }, + { + "epoch": 858.8157894736842, + "grad_norm": 0.5148002505302429, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 130540 + }, + { + "epoch": 858.8815789473684, + "grad_norm": 0.9477270245552063, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 130550 + }, + { + "epoch": 858.9473684210526, + "grad_norm": 1.0532981157302856, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 130560 + }, + { + "epoch": 859.0131578947369, + "grad_norm": 1.0642023086547852, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 130570 + }, + { + "epoch": 859.078947368421, + "grad_norm": 1.0734174251556396, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 130580 + }, + { + "epoch": 859.1447368421053, + "grad_norm": 1.0683674812316895, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 130590 + }, + { + "epoch": 859.2105263157895, + "grad_norm": 0.8307974934577942, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 130600 + }, + { + "epoch": 859.2763157894736, + "grad_norm": 0.9671717882156372, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 130610 + }, + { + "epoch": 859.3421052631579, + "grad_norm": 0.8904266953468323, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 130620 + }, + { + "epoch": 859.4078947368421, + "grad_norm": 1.097537875175476, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 130630 + }, + { + "epoch": 859.4736842105264, + "grad_norm": 0.6235876679420471, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 130640 + }, + { + "epoch": 859.5394736842105, + "grad_norm": 1.1195687055587769, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 130650 + }, + { + "epoch": 859.6052631578947, + "grad_norm": 1.0346473455429077, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 130660 + }, + { + "epoch": 859.671052631579, + "grad_norm": 1.2493486404418945, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 130670 + }, + { + "epoch": 859.7368421052631, + "grad_norm": 1.1328728199005127, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 130680 + }, + { + "epoch": 859.8026315789474, + "grad_norm": 1.057497501373291, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 130690 + }, + { + "epoch": 859.8684210526316, + "grad_norm": 1.1181135177612305, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 130700 + }, + { + "epoch": 859.9342105263158, + "grad_norm": 1.242897391319275, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 130710 + }, + { + "epoch": 860.0, + "grad_norm": 1.0586951971054077, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 130720 + }, + { + "epoch": 860.0657894736842, + "grad_norm": 1.2389774322509766, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 130730 + }, + { + "epoch": 860.1315789473684, + "grad_norm": 1.0785973072052002, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 130740 + }, + { + "epoch": 860.1973684210526, + "grad_norm": 1.104970932006836, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 130750 + }, + { + "epoch": 860.2631578947369, + "grad_norm": 0.8328081965446472, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 130760 + }, + { + "epoch": 860.328947368421, + "grad_norm": 0.955934464931488, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 130770 + }, + { + "epoch": 860.3947368421053, + "grad_norm": 1.1552305221557617, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 130780 + }, + { + "epoch": 860.4605263157895, + "grad_norm": 1.000320553779602, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 130790 + }, + { + "epoch": 860.5263157894736, + "grad_norm": 0.9015398025512695, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 130800 + }, + { + "epoch": 860.5921052631579, + "grad_norm": 0.6215592622756958, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 130810 + }, + { + "epoch": 860.6578947368421, + "grad_norm": 0.9939150810241699, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 130820 + }, + { + "epoch": 860.7236842105264, + "grad_norm": 1.01790452003479, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 130830 + }, + { + "epoch": 860.7894736842105, + "grad_norm": 1.468485951423645, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 130840 + }, + { + "epoch": 860.8552631578947, + "grad_norm": 1.1106871366500854, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 130850 + }, + { + "epoch": 860.921052631579, + "grad_norm": 0.834820568561554, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 130860 + }, + { + "epoch": 860.9868421052631, + "grad_norm": 0.9590187072753906, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 130870 + }, + { + "epoch": 861.0526315789474, + "grad_norm": 1.1876466274261475, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 130880 + }, + { + "epoch": 861.1184210526316, + "grad_norm": 0.968468427658081, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 130890 + }, + { + "epoch": 861.1842105263158, + "grad_norm": 0.8767639398574829, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 130900 + }, + { + "epoch": 861.25, + "grad_norm": 0.9714983105659485, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 130910 + }, + { + "epoch": 861.3157894736842, + "grad_norm": 1.2602828741073608, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 130920 + }, + { + "epoch": 861.3815789473684, + "grad_norm": 1.2543740272521973, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 130930 + }, + { + "epoch": 861.4473684210526, + "grad_norm": 1.088036060333252, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 130940 + }, + { + "epoch": 861.5131578947369, + "grad_norm": 1.3729690313339233, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 130950 + }, + { + "epoch": 861.578947368421, + "grad_norm": 1.1072582006454468, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 130960 + }, + { + "epoch": 861.6447368421053, + "grad_norm": 1.1380093097686768, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 130970 + }, + { + "epoch": 861.7105263157895, + "grad_norm": 1.0508384704589844, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 130980 + }, + { + "epoch": 861.7763157894736, + "grad_norm": 1.1020636558532715, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 130990 + }, + { + "epoch": 861.8421052631579, + "grad_norm": 1.138003945350647, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 131000 + }, + { + "epoch": 861.9078947368421, + "grad_norm": 1.2972642183303833, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 131010 + }, + { + "epoch": 861.9736842105264, + "grad_norm": 0.911027729511261, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 131020 + }, + { + "epoch": 862.0394736842105, + "grad_norm": 0.9824381470680237, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 131030 + }, + { + "epoch": 862.1052631578947, + "grad_norm": 1.227348804473877, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 131040 + }, + { + "epoch": 862.171052631579, + "grad_norm": 0.9096339344978333, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 131050 + }, + { + "epoch": 862.2368421052631, + "grad_norm": 1.2303411960601807, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 131060 + }, + { + "epoch": 862.3026315789474, + "grad_norm": 0.8208591938018799, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 131070 + }, + { + "epoch": 862.3684210526316, + "grad_norm": 1.1471190452575684, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 131080 + }, + { + "epoch": 862.4342105263158, + "grad_norm": 0.8580765128135681, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 131090 + }, + { + "epoch": 862.5, + "grad_norm": 0.9216636419296265, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 131100 + }, + { + "epoch": 862.5657894736842, + "grad_norm": 1.3407820463180542, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 131110 + }, + { + "epoch": 862.6315789473684, + "grad_norm": 1.359041452407837, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 131120 + }, + { + "epoch": 862.6973684210526, + "grad_norm": 1.231490969657898, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 131130 + }, + { + "epoch": 862.7631578947369, + "grad_norm": 1.0086109638214111, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 131140 + }, + { + "epoch": 862.828947368421, + "grad_norm": 0.9800273180007935, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 131150 + }, + { + "epoch": 862.8947368421053, + "grad_norm": 1.3657548427581787, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 131160 + }, + { + "epoch": 862.9605263157895, + "grad_norm": 1.1648259162902832, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 131170 + }, + { + "epoch": 863.0263157894736, + "grad_norm": 1.3625643253326416, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 131180 + }, + { + "epoch": 863.0921052631579, + "grad_norm": 0.833634614944458, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 131190 + }, + { + "epoch": 863.1578947368421, + "grad_norm": 0.7656792402267456, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 131200 + }, + { + "epoch": 863.2236842105264, + "grad_norm": 1.2618483304977417, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 131210 + }, + { + "epoch": 863.2894736842105, + "grad_norm": 1.133224606513977, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 131220 + }, + { + "epoch": 863.3552631578947, + "grad_norm": 1.1126824617385864, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 131230 + }, + { + "epoch": 863.421052631579, + "grad_norm": 1.0469779968261719, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 131240 + }, + { + "epoch": 863.4868421052631, + "grad_norm": 1.0890519618988037, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 131250 + }, + { + "epoch": 863.5526315789474, + "grad_norm": 1.1236612796783447, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 131260 + }, + { + "epoch": 863.6184210526316, + "grad_norm": 1.1163815259933472, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 131270 + }, + { + "epoch": 863.6842105263158, + "grad_norm": 0.9706016182899475, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 131280 + }, + { + "epoch": 863.75, + "grad_norm": 0.7366560697555542, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 131290 + }, + { + "epoch": 863.8157894736842, + "grad_norm": 1.2588547468185425, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 131300 + }, + { + "epoch": 863.8815789473684, + "grad_norm": 0.7898572087287903, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 131310 + }, + { + "epoch": 863.9473684210526, + "grad_norm": 1.2470197677612305, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 131320 + }, + { + "epoch": 864.0131578947369, + "grad_norm": 1.060998797416687, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 131330 + }, + { + "epoch": 864.078947368421, + "grad_norm": 0.6772517561912537, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 131340 + }, + { + "epoch": 864.1447368421053, + "grad_norm": 1.205532193183899, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 131350 + }, + { + "epoch": 864.2105263157895, + "grad_norm": 1.150336742401123, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 131360 + }, + { + "epoch": 864.2763157894736, + "grad_norm": 0.8620902895927429, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 131370 + }, + { + "epoch": 864.3421052631579, + "grad_norm": 1.0216280221939087, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 131380 + }, + { + "epoch": 864.4078947368421, + "grad_norm": 1.0433382987976074, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 131390 + }, + { + "epoch": 864.4736842105264, + "grad_norm": 1.3569798469543457, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 131400 + }, + { + "epoch": 864.5394736842105, + "grad_norm": 1.1768606901168823, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 131410 + }, + { + "epoch": 864.6052631578947, + "grad_norm": 1.2419917583465576, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 131420 + }, + { + "epoch": 864.671052631579, + "grad_norm": 1.128338098526001, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 131430 + }, + { + "epoch": 864.7368421052631, + "grad_norm": 1.1112442016601562, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 131440 + }, + { + "epoch": 864.8026315789474, + "grad_norm": 1.0152097940444946, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 131450 + }, + { + "epoch": 864.8684210526316, + "grad_norm": 0.9926842451095581, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 131460 + }, + { + "epoch": 864.9342105263158, + "grad_norm": 0.9706729054450989, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 131470 + }, + { + "epoch": 865.0, + "grad_norm": 0.9864161610603333, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 131480 + }, + { + "epoch": 865.0657894736842, + "grad_norm": 1.234376072883606, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 131490 + }, + { + "epoch": 865.1315789473684, + "grad_norm": 0.8708391189575195, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 131500 + }, + { + "epoch": 865.1973684210526, + "grad_norm": 1.2367292642593384, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 131510 + }, + { + "epoch": 865.2631578947369, + "grad_norm": 1.1945315599441528, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 131520 + }, + { + "epoch": 865.328947368421, + "grad_norm": 1.3687530755996704, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 131530 + }, + { + "epoch": 865.3947368421053, + "grad_norm": 1.2335397005081177, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 131540 + }, + { + "epoch": 865.4605263157895, + "grad_norm": 0.9434360265731812, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 131550 + }, + { + "epoch": 865.5263157894736, + "grad_norm": 1.020836591720581, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 131560 + }, + { + "epoch": 865.5921052631579, + "grad_norm": 1.0214914083480835, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 131570 + }, + { + "epoch": 865.6578947368421, + "grad_norm": 1.2193523645401, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 131580 + }, + { + "epoch": 865.7236842105264, + "grad_norm": 1.0998847484588623, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 131590 + }, + { + "epoch": 865.7894736842105, + "grad_norm": 1.1614649295806885, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 131600 + }, + { + "epoch": 865.8552631578947, + "grad_norm": 1.3147051334381104, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 131610 + }, + { + "epoch": 865.921052631579, + "grad_norm": 0.9823493957519531, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 131620 + }, + { + "epoch": 865.9868421052631, + "grad_norm": 0.8709498643875122, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 131630 + }, + { + "epoch": 866.0526315789474, + "grad_norm": 1.4504932165145874, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 131640 + }, + { + "epoch": 866.1184210526316, + "grad_norm": 0.9600393772125244, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 131650 + }, + { + "epoch": 866.1842105263158, + "grad_norm": 0.9422047734260559, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 131660 + }, + { + "epoch": 866.25, + "grad_norm": 0.9015654921531677, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 131670 + }, + { + "epoch": 866.3157894736842, + "grad_norm": 1.2097015380859375, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 131680 + }, + { + "epoch": 866.3815789473684, + "grad_norm": 1.0797349214553833, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 131690 + }, + { + "epoch": 866.4473684210526, + "grad_norm": 1.1002148389816284, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 131700 + }, + { + "epoch": 866.5131578947369, + "grad_norm": 0.8693884611129761, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 131710 + }, + { + "epoch": 866.578947368421, + "grad_norm": 1.2665678262710571, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 131720 + }, + { + "epoch": 866.6447368421053, + "grad_norm": 1.0744692087173462, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 131730 + }, + { + "epoch": 866.7105263157895, + "grad_norm": 1.1713916063308716, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 131740 + }, + { + "epoch": 866.7763157894736, + "grad_norm": 1.3538668155670166, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 131750 + }, + { + "epoch": 866.8421052631579, + "grad_norm": 0.8203177452087402, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 131760 + }, + { + "epoch": 866.9078947368421, + "grad_norm": 1.1548283100128174, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 131770 + }, + { + "epoch": 866.9736842105264, + "grad_norm": 1.0071430206298828, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 131780 + }, + { + "epoch": 867.0394736842105, + "grad_norm": 1.1559392213821411, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 131790 + }, + { + "epoch": 867.1052631578947, + "grad_norm": 1.0900834798812866, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 131800 + }, + { + "epoch": 867.171052631579, + "grad_norm": 1.1441543102264404, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 131810 + }, + { + "epoch": 867.2368421052631, + "grad_norm": 1.2302360534667969, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 131820 + }, + { + "epoch": 867.3026315789474, + "grad_norm": 1.0534881353378296, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 131830 + }, + { + "epoch": 867.3684210526316, + "grad_norm": 0.8932411074638367, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 131840 + }, + { + "epoch": 867.4342105263158, + "grad_norm": 0.6699873805046082, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 131850 + }, + { + "epoch": 867.5, + "grad_norm": 0.7454432845115662, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 131860 + }, + { + "epoch": 867.5657894736842, + "grad_norm": 1.1074482202529907, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 131870 + }, + { + "epoch": 867.6315789473684, + "grad_norm": 1.2334349155426025, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 131880 + }, + { + "epoch": 867.6973684210526, + "grad_norm": 0.9547383189201355, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 131890 + }, + { + "epoch": 867.7631578947369, + "grad_norm": 1.069530963897705, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 131900 + }, + { + "epoch": 867.828947368421, + "grad_norm": 1.2506078481674194, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 131910 + }, + { + "epoch": 867.8947368421053, + "grad_norm": 0.9932133555412292, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 131920 + }, + { + "epoch": 867.9605263157895, + "grad_norm": 1.2245303392410278, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 131930 + }, + { + "epoch": 868.0263157894736, + "grad_norm": 1.5292211771011353, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 131940 + }, + { + "epoch": 868.0921052631579, + "grad_norm": 1.3680943250656128, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 131950 + }, + { + "epoch": 868.1578947368421, + "grad_norm": 1.1934568881988525, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 131960 + }, + { + "epoch": 868.2236842105264, + "grad_norm": 0.9152090549468994, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 131970 + }, + { + "epoch": 868.2894736842105, + "grad_norm": 0.8579458594322205, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 131980 + }, + { + "epoch": 868.3552631578947, + "grad_norm": 0.9107769131660461, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 131990 + }, + { + "epoch": 868.421052631579, + "grad_norm": 1.1969023942947388, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 132000 + }, + { + "epoch": 868.4868421052631, + "grad_norm": 0.9776577949523926, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 132010 + }, + { + "epoch": 868.5526315789474, + "grad_norm": 1.3878238201141357, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 132020 + }, + { + "epoch": 868.6184210526316, + "grad_norm": 0.997664213180542, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 132030 + }, + { + "epoch": 868.6842105263158, + "grad_norm": 1.237343430519104, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 132040 + }, + { + "epoch": 868.75, + "grad_norm": 0.9770588874816895, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 132050 + }, + { + "epoch": 868.8157894736842, + "grad_norm": 1.209973692893982, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 132060 + }, + { + "epoch": 868.8815789473684, + "grad_norm": 1.109294056892395, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 132070 + }, + { + "epoch": 868.9473684210526, + "grad_norm": 0.8828414082527161, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 132080 + }, + { + "epoch": 869.0131578947369, + "grad_norm": 0.8478983044624329, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 132090 + }, + { + "epoch": 869.078947368421, + "grad_norm": 1.0058600902557373, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 132100 + }, + { + "epoch": 869.1447368421053, + "grad_norm": 1.5055391788482666, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 132110 + }, + { + "epoch": 869.2105263157895, + "grad_norm": 1.3304319381713867, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 132120 + }, + { + "epoch": 869.2763157894736, + "grad_norm": 1.3461639881134033, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 132130 + }, + { + "epoch": 869.3421052631579, + "grad_norm": 1.318153977394104, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 132140 + }, + { + "epoch": 869.4078947368421, + "grad_norm": 1.3171056509017944, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 132150 + }, + { + "epoch": 869.4736842105264, + "grad_norm": 1.2487008571624756, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 132160 + }, + { + "epoch": 869.5394736842105, + "grad_norm": 0.9375091195106506, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 132170 + }, + { + "epoch": 869.6052631578947, + "grad_norm": 1.3363336324691772, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 132180 + }, + { + "epoch": 869.671052631579, + "grad_norm": 1.0731987953186035, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 132190 + }, + { + "epoch": 869.7368421052631, + "grad_norm": 1.6114320755004883, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 132200 + }, + { + "epoch": 869.8026315789474, + "grad_norm": 1.3398220539093018, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 132210 + }, + { + "epoch": 869.8684210526316, + "grad_norm": 0.9430725574493408, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 132220 + }, + { + "epoch": 869.9342105263158, + "grad_norm": 0.9981265068054199, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 132230 + }, + { + "epoch": 870.0, + "grad_norm": 1.1120591163635254, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 132240 + }, + { + "epoch": 870.0657894736842, + "grad_norm": 1.0576080083847046, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 132250 + }, + { + "epoch": 870.1315789473684, + "grad_norm": 1.1692156791687012, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 132260 + }, + { + "epoch": 870.1973684210526, + "grad_norm": 1.4510517120361328, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 132270 + }, + { + "epoch": 870.2631578947369, + "grad_norm": 0.8253922462463379, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 132280 + }, + { + "epoch": 870.328947368421, + "grad_norm": 1.1040948629379272, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 132290 + }, + { + "epoch": 870.3947368421053, + "grad_norm": 1.4819238185882568, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 132300 + }, + { + "epoch": 870.4605263157895, + "grad_norm": 0.8338831663131714, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 132310 + }, + { + "epoch": 870.5263157894736, + "grad_norm": 1.5496101379394531, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 132320 + }, + { + "epoch": 870.5921052631579, + "grad_norm": 1.3076415061950684, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 132330 + }, + { + "epoch": 870.6578947368421, + "grad_norm": 1.3276525735855103, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 132340 + }, + { + "epoch": 870.7236842105264, + "grad_norm": 1.18726646900177, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 132350 + }, + { + "epoch": 870.7894736842105, + "grad_norm": 1.3741918802261353, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 132360 + }, + { + "epoch": 870.8552631578947, + "grad_norm": 0.9850346446037292, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 132370 + }, + { + "epoch": 870.921052631579, + "grad_norm": 0.5921350717544556, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 132380 + }, + { + "epoch": 870.9868421052631, + "grad_norm": 1.010513424873352, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 132390 + }, + { + "epoch": 871.0526315789474, + "grad_norm": 1.3716336488723755, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 132400 + }, + { + "epoch": 871.1184210526316, + "grad_norm": 0.9653910994529724, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 132410 + }, + { + "epoch": 871.1842105263158, + "grad_norm": 1.3690600395202637, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 132420 + }, + { + "epoch": 871.25, + "grad_norm": 1.072267770767212, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 132430 + }, + { + "epoch": 871.3157894736842, + "grad_norm": 1.4082567691802979, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 132440 + }, + { + "epoch": 871.3815789473684, + "grad_norm": 1.0181151628494263, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 132450 + }, + { + "epoch": 871.4473684210526, + "grad_norm": 0.8527328968048096, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 132460 + }, + { + "epoch": 871.5131578947369, + "grad_norm": 1.0901014804840088, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 132470 + }, + { + "epoch": 871.578947368421, + "grad_norm": 1.1375715732574463, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 132480 + }, + { + "epoch": 871.6447368421053, + "grad_norm": 1.0507901906967163, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 132490 + }, + { + "epoch": 871.7105263157895, + "grad_norm": 0.819428563117981, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 132500 + }, + { + "epoch": 871.7763157894736, + "grad_norm": 0.7174791693687439, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 132510 + }, + { + "epoch": 871.8421052631579, + "grad_norm": 1.1212362051010132, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 132520 + }, + { + "epoch": 871.9078947368421, + "grad_norm": 1.310887098312378, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 132530 + }, + { + "epoch": 871.9736842105264, + "grad_norm": 0.9797291159629822, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 132540 + }, + { + "epoch": 872.0394736842105, + "grad_norm": 1.042771816253662, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 132550 + }, + { + "epoch": 872.1052631578947, + "grad_norm": 1.1746186017990112, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 132560 + }, + { + "epoch": 872.171052631579, + "grad_norm": 1.1553187370300293, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 132570 + }, + { + "epoch": 872.2368421052631, + "grad_norm": 0.9846141338348389, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 132580 + }, + { + "epoch": 872.3026315789474, + "grad_norm": 1.0329734086990356, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 132590 + }, + { + "epoch": 872.3684210526316, + "grad_norm": 1.0318963527679443, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 132600 + }, + { + "epoch": 872.4342105263158, + "grad_norm": 0.97999507188797, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 132610 + }, + { + "epoch": 872.5, + "grad_norm": 0.8947683572769165, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 132620 + }, + { + "epoch": 872.5657894736842, + "grad_norm": 0.7579332590103149, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 132630 + }, + { + "epoch": 872.6315789473684, + "grad_norm": 0.8904950022697449, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 132640 + }, + { + "epoch": 872.6973684210526, + "grad_norm": 1.2979966402053833, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 132650 + }, + { + "epoch": 872.7631578947369, + "grad_norm": 0.7980130314826965, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 132660 + }, + { + "epoch": 872.828947368421, + "grad_norm": 0.6705465912818909, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 132670 + }, + { + "epoch": 872.8947368421053, + "grad_norm": 0.5814407467842102, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 132680 + }, + { + "epoch": 872.9605263157895, + "grad_norm": 0.9677609205245972, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 132690 + }, + { + "epoch": 873.0263157894736, + "grad_norm": 0.7871490716934204, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 132700 + }, + { + "epoch": 873.0921052631579, + "grad_norm": 1.0843896865844727, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 132710 + }, + { + "epoch": 873.1578947368421, + "grad_norm": 1.4221380949020386, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 132720 + }, + { + "epoch": 873.2236842105264, + "grad_norm": 0.7746054530143738, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 132730 + }, + { + "epoch": 873.2894736842105, + "grad_norm": 1.0406986474990845, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 132740 + }, + { + "epoch": 873.3552631578947, + "grad_norm": 1.3779292106628418, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 132750 + }, + { + "epoch": 873.421052631579, + "grad_norm": 1.179019808769226, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 132760 + }, + { + "epoch": 873.4868421052631, + "grad_norm": 0.9589442014694214, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 132770 + }, + { + "epoch": 873.5526315789474, + "grad_norm": 0.8380019664764404, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 132780 + }, + { + "epoch": 873.6184210526316, + "grad_norm": 1.0861836671829224, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 132790 + }, + { + "epoch": 873.6842105263158, + "grad_norm": 0.7795262932777405, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 132800 + }, + { + "epoch": 873.75, + "grad_norm": 1.4798517227172852, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 132810 + }, + { + "epoch": 873.8157894736842, + "grad_norm": 0.8960368037223816, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 132820 + }, + { + "epoch": 873.8815789473684, + "grad_norm": 1.1715494394302368, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 132830 + }, + { + "epoch": 873.9473684210526, + "grad_norm": 1.0302726030349731, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 132840 + }, + { + "epoch": 874.0131578947369, + "grad_norm": 0.8323149085044861, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 132850 + }, + { + "epoch": 874.078947368421, + "grad_norm": 1.0214720964431763, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 132860 + }, + { + "epoch": 874.1447368421053, + "grad_norm": 1.2060658931732178, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 132870 + }, + { + "epoch": 874.2105263157895, + "grad_norm": 1.1269394159317017, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 132880 + }, + { + "epoch": 874.2763157894736, + "grad_norm": 1.2563400268554688, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 132890 + }, + { + "epoch": 874.3421052631579, + "grad_norm": 0.6155217885971069, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 132900 + }, + { + "epoch": 874.4078947368421, + "grad_norm": 1.0993677377700806, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 132910 + }, + { + "epoch": 874.4736842105264, + "grad_norm": 0.8634936809539795, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 132920 + }, + { + "epoch": 874.5394736842105, + "grad_norm": 1.18904709815979, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 132930 + }, + { + "epoch": 874.6052631578947, + "grad_norm": 0.8078858852386475, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 132940 + }, + { + "epoch": 874.671052631579, + "grad_norm": 0.8726778626441956, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 132950 + }, + { + "epoch": 874.7368421052631, + "grad_norm": 0.8983378410339355, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 132960 + }, + { + "epoch": 874.8026315789474, + "grad_norm": 0.6301218271255493, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 132970 + }, + { + "epoch": 874.8684210526316, + "grad_norm": 1.143539547920227, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 132980 + }, + { + "epoch": 874.9342105263158, + "grad_norm": 0.7233585119247437, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 132990 + }, + { + "epoch": 875.0, + "grad_norm": 0.7568589448928833, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 133000 + }, + { + "epoch": 875.0657894736842, + "grad_norm": 1.0015631914138794, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 133010 + }, + { + "epoch": 875.1315789473684, + "grad_norm": 1.3138691186904907, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 133020 + }, + { + "epoch": 875.1973684210526, + "grad_norm": 1.1672554016113281, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 133030 + }, + { + "epoch": 875.2631578947369, + "grad_norm": 1.2994216680526733, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 133040 + }, + { + "epoch": 875.328947368421, + "grad_norm": 1.1386960744857788, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 133050 + }, + { + "epoch": 875.3947368421053, + "grad_norm": 0.8103448152542114, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 133060 + }, + { + "epoch": 875.4605263157895, + "grad_norm": 1.0586600303649902, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 133070 + }, + { + "epoch": 875.5263157894736, + "grad_norm": 1.086573839187622, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 133080 + }, + { + "epoch": 875.5921052631579, + "grad_norm": 0.9082050323486328, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 133090 + }, + { + "epoch": 875.6578947368421, + "grad_norm": 1.2547775506973267, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 133100 + }, + { + "epoch": 875.7236842105264, + "grad_norm": 1.1525391340255737, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 133110 + }, + { + "epoch": 875.7894736842105, + "grad_norm": 1.0197055339813232, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 133120 + }, + { + "epoch": 875.8552631578947, + "grad_norm": 1.1045457124710083, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 133130 + }, + { + "epoch": 875.921052631579, + "grad_norm": 1.19781494140625, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 133140 + }, + { + "epoch": 875.9868421052631, + "grad_norm": 1.0859558582305908, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 133150 + }, + { + "epoch": 876.0526315789474, + "grad_norm": 1.278917670249939, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 133160 + }, + { + "epoch": 876.1184210526316, + "grad_norm": 0.9689110517501831, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 133170 + }, + { + "epoch": 876.1842105263158, + "grad_norm": 1.1780256032943726, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 133180 + }, + { + "epoch": 876.25, + "grad_norm": 1.1999872922897339, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 133190 + }, + { + "epoch": 876.3157894736842, + "grad_norm": 1.1459710597991943, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 133200 + }, + { + "epoch": 876.3815789473684, + "grad_norm": 1.1914170980453491, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 133210 + }, + { + "epoch": 876.4473684210526, + "grad_norm": 1.8968595266342163, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 133220 + }, + { + "epoch": 876.5131578947369, + "grad_norm": 1.641607642173767, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 133230 + }, + { + "epoch": 876.578947368421, + "grad_norm": 1.1843949556350708, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 133240 + }, + { + "epoch": 876.6447368421053, + "grad_norm": 1.2296756505966187, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 133250 + }, + { + "epoch": 876.7105263157895, + "grad_norm": 1.4620548486709595, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 133260 + }, + { + "epoch": 876.7763157894736, + "grad_norm": 1.4632091522216797, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 133270 + }, + { + "epoch": 876.8421052631579, + "grad_norm": 1.3262276649475098, + "learning_rate": 0.0001, + "loss": 0.0134, + "step": 133280 + }, + { + "epoch": 876.9078947368421, + "grad_norm": 1.4855178594589233, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 133290 + }, + { + "epoch": 876.9736842105264, + "grad_norm": 1.28376042842865, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 133300 + }, + { + "epoch": 877.0394736842105, + "grad_norm": 1.2845184803009033, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 133310 + }, + { + "epoch": 877.1052631578947, + "grad_norm": 1.154748797416687, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 133320 + }, + { + "epoch": 877.171052631579, + "grad_norm": 1.3325797319412231, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 133330 + }, + { + "epoch": 877.2368421052631, + "grad_norm": 1.2600542306900024, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 133340 + }, + { + "epoch": 877.3026315789474, + "grad_norm": 1.4029486179351807, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 133350 + }, + { + "epoch": 877.3684210526316, + "grad_norm": 1.3744686841964722, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 133360 + }, + { + "epoch": 877.4342105263158, + "grad_norm": 1.225288987159729, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 133370 + }, + { + "epoch": 877.5, + "grad_norm": 1.006150245666504, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 133380 + }, + { + "epoch": 877.5657894736842, + "grad_norm": 0.9903148412704468, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 133390 + }, + { + "epoch": 877.6315789473684, + "grad_norm": 1.0531034469604492, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 133400 + }, + { + "epoch": 877.6973684210526, + "grad_norm": 1.0496103763580322, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 133410 + }, + { + "epoch": 877.7631578947369, + "grad_norm": 1.3180263042449951, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 133420 + }, + { + "epoch": 877.828947368421, + "grad_norm": 1.455904245376587, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 133430 + }, + { + "epoch": 877.8947368421053, + "grad_norm": 0.9219629168510437, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 133440 + }, + { + "epoch": 877.9605263157895, + "grad_norm": 1.1099201440811157, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 133450 + }, + { + "epoch": 878.0263157894736, + "grad_norm": 1.0845720767974854, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 133460 + }, + { + "epoch": 878.0921052631579, + "grad_norm": 1.3005256652832031, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 133470 + }, + { + "epoch": 878.1578947368421, + "grad_norm": 1.2297483682632446, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 133480 + }, + { + "epoch": 878.2236842105264, + "grad_norm": 1.2901902198791504, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 133490 + }, + { + "epoch": 878.2894736842105, + "grad_norm": 1.3723936080932617, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 133500 + }, + { + "epoch": 878.3552631578947, + "grad_norm": 1.08962082862854, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 133510 + }, + { + "epoch": 878.421052631579, + "grad_norm": 1.2790974378585815, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 133520 + }, + { + "epoch": 878.4868421052631, + "grad_norm": 1.027013897895813, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 133530 + }, + { + "epoch": 878.5526315789474, + "grad_norm": 1.4130576848983765, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 133540 + }, + { + "epoch": 878.6184210526316, + "grad_norm": 1.0892243385314941, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 133550 + }, + { + "epoch": 878.6842105263158, + "grad_norm": 1.1556276082992554, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 133560 + }, + { + "epoch": 878.75, + "grad_norm": 1.008480191230774, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 133570 + }, + { + "epoch": 878.8157894736842, + "grad_norm": 0.7436794638633728, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 133580 + }, + { + "epoch": 878.8815789473684, + "grad_norm": 1.1126219034194946, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 133590 + }, + { + "epoch": 878.9473684210526, + "grad_norm": 1.165494441986084, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 133600 + }, + { + "epoch": 879.0131578947369, + "grad_norm": 1.1165974140167236, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 133610 + }, + { + "epoch": 879.078947368421, + "grad_norm": 1.1761646270751953, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 133620 + }, + { + "epoch": 879.1447368421053, + "grad_norm": 1.1715446710586548, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 133630 + }, + { + "epoch": 879.2105263157895, + "grad_norm": 1.2596334218978882, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 133640 + }, + { + "epoch": 879.2763157894736, + "grad_norm": 0.70237797498703, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 133650 + }, + { + "epoch": 879.3421052631579, + "grad_norm": 0.857401967048645, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 133660 + }, + { + "epoch": 879.4078947368421, + "grad_norm": 0.7992585301399231, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 133670 + }, + { + "epoch": 879.4736842105264, + "grad_norm": 0.7546584606170654, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 133680 + }, + { + "epoch": 879.5394736842105, + "grad_norm": 1.1993327140808105, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 133690 + }, + { + "epoch": 879.6052631578947, + "grad_norm": 1.0572268962860107, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 133700 + }, + { + "epoch": 879.671052631579, + "grad_norm": 1.2813950777053833, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 133710 + }, + { + "epoch": 879.7368421052631, + "grad_norm": 0.7247679233551025, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 133720 + }, + { + "epoch": 879.8026315789474, + "grad_norm": 0.8819660544395447, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 133730 + }, + { + "epoch": 879.8684210526316, + "grad_norm": 0.9946263432502747, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 133740 + }, + { + "epoch": 879.9342105263158, + "grad_norm": 0.8821104168891907, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 133750 + }, + { + "epoch": 880.0, + "grad_norm": 0.6893178820610046, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 133760 + }, + { + "epoch": 880.0657894736842, + "grad_norm": 1.0600444078445435, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 133770 + }, + { + "epoch": 880.1315789473684, + "grad_norm": 1.0986602306365967, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 133780 + }, + { + "epoch": 880.1973684210526, + "grad_norm": 0.8387561440467834, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 133790 + }, + { + "epoch": 880.2631578947369, + "grad_norm": 0.8584687113761902, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 133800 + }, + { + "epoch": 880.328947368421, + "grad_norm": 0.9088717699050903, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 133810 + }, + { + "epoch": 880.3947368421053, + "grad_norm": 0.9743188619613647, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 133820 + }, + { + "epoch": 880.4605263157895, + "grad_norm": 0.6925018429756165, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 133830 + }, + { + "epoch": 880.5263157894736, + "grad_norm": 0.9171350002288818, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 133840 + }, + { + "epoch": 880.5921052631579, + "grad_norm": 0.8944483399391174, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 133850 + }, + { + "epoch": 880.6578947368421, + "grad_norm": 0.859816312789917, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 133860 + }, + { + "epoch": 880.7236842105264, + "grad_norm": 0.8540825247764587, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 133870 + }, + { + "epoch": 880.7894736842105, + "grad_norm": 0.8680028319358826, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 133880 + }, + { + "epoch": 880.8552631578947, + "grad_norm": 0.6797283291816711, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 133890 + }, + { + "epoch": 880.921052631579, + "grad_norm": 1.095505952835083, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 133900 + }, + { + "epoch": 880.9868421052631, + "grad_norm": 0.8483147025108337, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 133910 + }, + { + "epoch": 881.0526315789474, + "grad_norm": 1.0879533290863037, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 133920 + }, + { + "epoch": 881.1184210526316, + "grad_norm": 1.1705760955810547, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 133930 + }, + { + "epoch": 881.1842105263158, + "grad_norm": 1.3736196756362915, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 133940 + }, + { + "epoch": 881.25, + "grad_norm": 1.5451818704605103, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 133950 + }, + { + "epoch": 881.3157894736842, + "grad_norm": 1.1037559509277344, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 133960 + }, + { + "epoch": 881.3815789473684, + "grad_norm": 1.054043173789978, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 133970 + }, + { + "epoch": 881.4473684210526, + "grad_norm": 1.1496354341506958, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 133980 + }, + { + "epoch": 881.5131578947369, + "grad_norm": 0.895317792892456, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 133990 + }, + { + "epoch": 881.578947368421, + "grad_norm": 0.9729178547859192, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 134000 + }, + { + "epoch": 881.6447368421053, + "grad_norm": 0.8765199184417725, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 134010 + }, + { + "epoch": 881.7105263157895, + "grad_norm": 1.216593861579895, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 134020 + }, + { + "epoch": 881.7763157894736, + "grad_norm": 0.9411874413490295, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 134030 + }, + { + "epoch": 881.8421052631579, + "grad_norm": 1.250806450843811, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 134040 + }, + { + "epoch": 881.9078947368421, + "grad_norm": 1.06573486328125, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 134050 + }, + { + "epoch": 881.9736842105264, + "grad_norm": 1.1607677936553955, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 134060 + }, + { + "epoch": 882.0394736842105, + "grad_norm": 1.0911805629730225, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 134070 + }, + { + "epoch": 882.1052631578947, + "grad_norm": 0.9352615475654602, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 134080 + }, + { + "epoch": 882.171052631579, + "grad_norm": 1.1933131217956543, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 134090 + }, + { + "epoch": 882.2368421052631, + "grad_norm": 1.0852186679840088, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 134100 + }, + { + "epoch": 882.3026315789474, + "grad_norm": 1.5493214130401611, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 134110 + }, + { + "epoch": 882.3684210526316, + "grad_norm": 1.0032248497009277, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 134120 + }, + { + "epoch": 882.4342105263158, + "grad_norm": 0.861934244632721, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 134130 + }, + { + "epoch": 882.5, + "grad_norm": 1.3053967952728271, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 134140 + }, + { + "epoch": 882.5657894736842, + "grad_norm": 1.0918253660202026, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 134150 + }, + { + "epoch": 882.6315789473684, + "grad_norm": 1.0111234188079834, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 134160 + }, + { + "epoch": 882.6973684210526, + "grad_norm": 1.3519738912582397, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 134170 + }, + { + "epoch": 882.7631578947369, + "grad_norm": 1.0138260126113892, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 134180 + }, + { + "epoch": 882.828947368421, + "grad_norm": 1.0171897411346436, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 134190 + }, + { + "epoch": 882.8947368421053, + "grad_norm": 0.7954549789428711, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 134200 + }, + { + "epoch": 882.9605263157895, + "grad_norm": 0.9768205285072327, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 134210 + }, + { + "epoch": 883.0263157894736, + "grad_norm": 1.317543625831604, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 134220 + }, + { + "epoch": 883.0921052631579, + "grad_norm": 1.5799720287322998, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 134230 + }, + { + "epoch": 883.1578947368421, + "grad_norm": 1.1066539287567139, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 134240 + }, + { + "epoch": 883.2236842105264, + "grad_norm": 0.8365485668182373, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 134250 + }, + { + "epoch": 883.2894736842105, + "grad_norm": 1.3858592510223389, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 134260 + }, + { + "epoch": 883.3552631578947, + "grad_norm": 0.8898154497146606, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 134270 + }, + { + "epoch": 883.421052631579, + "grad_norm": 0.8104990124702454, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 134280 + }, + { + "epoch": 883.4868421052631, + "grad_norm": 1.3037996292114258, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 134290 + }, + { + "epoch": 883.5526315789474, + "grad_norm": 1.1328245401382446, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 134300 + }, + { + "epoch": 883.6184210526316, + "grad_norm": 1.0135562419891357, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 134310 + }, + { + "epoch": 883.6842105263158, + "grad_norm": 0.8955759406089783, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 134320 + }, + { + "epoch": 883.75, + "grad_norm": 0.7221687436103821, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 134330 + }, + { + "epoch": 883.8157894736842, + "grad_norm": 0.8941370844841003, + "learning_rate": 0.0001, + "loss": 0.0132, + "step": 134340 + }, + { + "epoch": 883.8815789473684, + "grad_norm": 1.185426115989685, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 134350 + }, + { + "epoch": 883.9473684210526, + "grad_norm": 1.2640587091445923, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 134360 + }, + { + "epoch": 884.0131578947369, + "grad_norm": 1.3449172973632812, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 134370 + }, + { + "epoch": 884.078947368421, + "grad_norm": 1.0787019729614258, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 134380 + }, + { + "epoch": 884.1447368421053, + "grad_norm": 1.3704967498779297, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 134390 + }, + { + "epoch": 884.2105263157895, + "grad_norm": 1.1371135711669922, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 134400 + }, + { + "epoch": 884.2763157894736, + "grad_norm": 1.1591137647628784, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 134410 + }, + { + "epoch": 884.3421052631579, + "grad_norm": 1.0808539390563965, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 134420 + }, + { + "epoch": 884.4078947368421, + "grad_norm": 0.9412270784378052, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 134430 + }, + { + "epoch": 884.4736842105264, + "grad_norm": 0.8797613978385925, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 134440 + }, + { + "epoch": 884.5394736842105, + "grad_norm": 1.1469194889068604, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 134450 + }, + { + "epoch": 884.6052631578947, + "grad_norm": 1.2293258905410767, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 134460 + }, + { + "epoch": 884.671052631579, + "grad_norm": 0.9875232577323914, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 134470 + }, + { + "epoch": 884.7368421052631, + "grad_norm": 0.9115191698074341, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 134480 + }, + { + "epoch": 884.8026315789474, + "grad_norm": 1.4155162572860718, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 134490 + }, + { + "epoch": 884.8684210526316, + "grad_norm": 1.0972641706466675, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 134500 + }, + { + "epoch": 884.9342105263158, + "grad_norm": 1.0389680862426758, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 134510 + }, + { + "epoch": 885.0, + "grad_norm": 1.0352895259857178, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 134520 + }, + { + "epoch": 885.0657894736842, + "grad_norm": 0.6757728457450867, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 134530 + }, + { + "epoch": 885.1315789473684, + "grad_norm": 1.3461501598358154, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 134540 + }, + { + "epoch": 885.1973684210526, + "grad_norm": 0.9551567435264587, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 134550 + }, + { + "epoch": 885.2631578947369, + "grad_norm": 1.348388910293579, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 134560 + }, + { + "epoch": 885.328947368421, + "grad_norm": 1.1484230756759644, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 134570 + }, + { + "epoch": 885.3947368421053, + "grad_norm": 1.0391778945922852, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 134580 + }, + { + "epoch": 885.4605263157895, + "grad_norm": 0.9618095755577087, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 134590 + }, + { + "epoch": 885.5263157894736, + "grad_norm": 0.9640491604804993, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 134600 + }, + { + "epoch": 885.5921052631579, + "grad_norm": 1.0603336095809937, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 134610 + }, + { + "epoch": 885.6578947368421, + "grad_norm": 0.8449630737304688, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 134620 + }, + { + "epoch": 885.7236842105264, + "grad_norm": 0.8843861222267151, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 134630 + }, + { + "epoch": 885.7894736842105, + "grad_norm": 1.1363091468811035, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 134640 + }, + { + "epoch": 885.8552631578947, + "grad_norm": 1.238927960395813, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 134650 + }, + { + "epoch": 885.921052631579, + "grad_norm": 1.1133671998977661, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 134660 + }, + { + "epoch": 885.9868421052631, + "grad_norm": 1.23615300655365, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 134670 + }, + { + "epoch": 886.0526315789474, + "grad_norm": 1.0656203031539917, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 134680 + }, + { + "epoch": 886.1184210526316, + "grad_norm": 1.0231038331985474, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 134690 + }, + { + "epoch": 886.1842105263158, + "grad_norm": 1.6583019495010376, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 134700 + }, + { + "epoch": 886.25, + "grad_norm": 1.5105453729629517, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 134710 + }, + { + "epoch": 886.3157894736842, + "grad_norm": 1.3457199335098267, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 134720 + }, + { + "epoch": 886.3815789473684, + "grad_norm": 1.0339312553405762, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 134730 + }, + { + "epoch": 886.4473684210526, + "grad_norm": 0.9141952395439148, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 134740 + }, + { + "epoch": 886.5131578947369, + "grad_norm": 1.3014699220657349, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 134750 + }, + { + "epoch": 886.578947368421, + "grad_norm": 0.8726618885993958, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 134760 + }, + { + "epoch": 886.6447368421053, + "grad_norm": 1.2617624998092651, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 134770 + }, + { + "epoch": 886.7105263157895, + "grad_norm": 0.9956019520759583, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 134780 + }, + { + "epoch": 886.7763157894736, + "grad_norm": 1.3004852533340454, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 134790 + }, + { + "epoch": 886.8421052631579, + "grad_norm": 1.200190782546997, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 134800 + }, + { + "epoch": 886.9078947368421, + "grad_norm": 1.1957974433898926, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 134810 + }, + { + "epoch": 886.9736842105264, + "grad_norm": 1.3454686403274536, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 134820 + }, + { + "epoch": 887.0394736842105, + "grad_norm": 0.9715621471405029, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 134830 + }, + { + "epoch": 887.1052631578947, + "grad_norm": 1.2138034105300903, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 134840 + }, + { + "epoch": 887.171052631579, + "grad_norm": 1.0837821960449219, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 134850 + }, + { + "epoch": 887.2368421052631, + "grad_norm": 0.9954550266265869, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 134860 + }, + { + "epoch": 887.3026315789474, + "grad_norm": 1.319032907485962, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 134870 + }, + { + "epoch": 887.3684210526316, + "grad_norm": 1.0620183944702148, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 134880 + }, + { + "epoch": 887.4342105263158, + "grad_norm": 1.06571626663208, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 134890 + }, + { + "epoch": 887.5, + "grad_norm": 1.1749192476272583, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 134900 + }, + { + "epoch": 887.5657894736842, + "grad_norm": 1.475423812866211, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 134910 + }, + { + "epoch": 887.6315789473684, + "grad_norm": 1.3862122297286987, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 134920 + }, + { + "epoch": 887.6973684210526, + "grad_norm": 1.3569873571395874, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 134930 + }, + { + "epoch": 887.7631578947369, + "grad_norm": 1.0890201330184937, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 134940 + }, + { + "epoch": 887.828947368421, + "grad_norm": 1.0563850402832031, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 134950 + }, + { + "epoch": 887.8947368421053, + "grad_norm": 1.4732695817947388, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 134960 + }, + { + "epoch": 887.9605263157895, + "grad_norm": 0.7517303824424744, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 134970 + }, + { + "epoch": 888.0263157894736, + "grad_norm": 0.9856480956077576, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 134980 + }, + { + "epoch": 888.0921052631579, + "grad_norm": 0.9564664363861084, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 134990 + }, + { + "epoch": 888.1578947368421, + "grad_norm": 0.8272193670272827, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 135000 + }, + { + "epoch": 888.2236842105264, + "grad_norm": 1.3324556350708008, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 135010 + }, + { + "epoch": 888.2894736842105, + "grad_norm": 1.2476513385772705, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 135020 + }, + { + "epoch": 888.3552631578947, + "grad_norm": 1.0319244861602783, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 135030 + }, + { + "epoch": 888.421052631579, + "grad_norm": 0.6704224944114685, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 135040 + }, + { + "epoch": 888.4868421052631, + "grad_norm": 0.8636611700057983, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 135050 + }, + { + "epoch": 888.5526315789474, + "grad_norm": 1.0018370151519775, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 135060 + }, + { + "epoch": 888.6184210526316, + "grad_norm": 1.3011027574539185, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 135070 + }, + { + "epoch": 888.6842105263158, + "grad_norm": 0.8524222373962402, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 135080 + }, + { + "epoch": 888.75, + "grad_norm": 1.3557988405227661, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 135090 + }, + { + "epoch": 888.8157894736842, + "grad_norm": 0.79920494556427, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 135100 + }, + { + "epoch": 888.8815789473684, + "grad_norm": 0.8106809258460999, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 135110 + }, + { + "epoch": 888.9473684210526, + "grad_norm": 0.9653734564781189, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 135120 + }, + { + "epoch": 889.0131578947369, + "grad_norm": 1.4216874837875366, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 135130 + }, + { + "epoch": 889.078947368421, + "grad_norm": 1.148635983467102, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 135140 + }, + { + "epoch": 889.1447368421053, + "grad_norm": 1.2149169445037842, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 135150 + }, + { + "epoch": 889.2105263157895, + "grad_norm": 1.0104612112045288, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 135160 + }, + { + "epoch": 889.2763157894736, + "grad_norm": 0.984620988368988, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 135170 + }, + { + "epoch": 889.3421052631579, + "grad_norm": 1.2821751832962036, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 135180 + }, + { + "epoch": 889.4078947368421, + "grad_norm": 1.0690802335739136, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 135190 + }, + { + "epoch": 889.4736842105264, + "grad_norm": 1.02140474319458, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 135200 + }, + { + "epoch": 889.5394736842105, + "grad_norm": 1.0375328063964844, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 135210 + }, + { + "epoch": 889.6052631578947, + "grad_norm": 0.9176158308982849, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 135220 + }, + { + "epoch": 889.671052631579, + "grad_norm": 0.9485794305801392, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 135230 + }, + { + "epoch": 889.7368421052631, + "grad_norm": 0.9150336980819702, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 135240 + }, + { + "epoch": 889.8026315789474, + "grad_norm": 0.9094645380973816, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 135250 + }, + { + "epoch": 889.8684210526316, + "grad_norm": 0.6861184239387512, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 135260 + }, + { + "epoch": 889.9342105263158, + "grad_norm": 0.9334322810173035, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 135270 + }, + { + "epoch": 890.0, + "grad_norm": 1.377838134765625, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 135280 + }, + { + "epoch": 890.0657894736842, + "grad_norm": 1.133252739906311, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 135290 + }, + { + "epoch": 890.1315789473684, + "grad_norm": 1.1310118436813354, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 135300 + }, + { + "epoch": 890.1973684210526, + "grad_norm": 0.7168204188346863, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 135310 + }, + { + "epoch": 890.2631578947369, + "grad_norm": 0.8758770227432251, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 135320 + }, + { + "epoch": 890.328947368421, + "grad_norm": 1.1734673976898193, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 135330 + }, + { + "epoch": 890.3947368421053, + "grad_norm": 1.19082772731781, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 135340 + }, + { + "epoch": 890.4605263157895, + "grad_norm": 0.8569977283477783, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 135350 + }, + { + "epoch": 890.5263157894736, + "grad_norm": 1.1632040739059448, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 135360 + }, + { + "epoch": 890.5921052631579, + "grad_norm": 1.2661454677581787, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 135370 + }, + { + "epoch": 890.6578947368421, + "grad_norm": 0.9945191144943237, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 135380 + }, + { + "epoch": 890.7236842105264, + "grad_norm": 1.382722020149231, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 135390 + }, + { + "epoch": 890.7894736842105, + "grad_norm": 1.317020297050476, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 135400 + }, + { + "epoch": 890.8552631578947, + "grad_norm": 1.1985100507736206, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 135410 + }, + { + "epoch": 890.921052631579, + "grad_norm": 0.7880237698554993, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 135420 + }, + { + "epoch": 890.9868421052631, + "grad_norm": 1.2420897483825684, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 135430 + }, + { + "epoch": 891.0526315789474, + "grad_norm": 1.693311333656311, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 135440 + }, + { + "epoch": 891.1184210526316, + "grad_norm": 1.6864817142486572, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 135450 + }, + { + "epoch": 891.1842105263158, + "grad_norm": 1.1330416202545166, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 135460 + }, + { + "epoch": 891.25, + "grad_norm": 1.0997774600982666, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 135470 + }, + { + "epoch": 891.3157894736842, + "grad_norm": 0.924976646900177, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 135480 + }, + { + "epoch": 891.3815789473684, + "grad_norm": 1.182472825050354, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 135490 + }, + { + "epoch": 891.4473684210526, + "grad_norm": 1.1451120376586914, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 135500 + }, + { + "epoch": 891.5131578947369, + "grad_norm": 0.876240611076355, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 135510 + }, + { + "epoch": 891.578947368421, + "grad_norm": 1.0297558307647705, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 135520 + }, + { + "epoch": 891.6447368421053, + "grad_norm": 0.6283572912216187, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 135530 + }, + { + "epoch": 891.7105263157895, + "grad_norm": 1.2140889167785645, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 135540 + }, + { + "epoch": 891.7763157894736, + "grad_norm": 1.0670727491378784, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 135550 + }, + { + "epoch": 891.8421052631579, + "grad_norm": 1.0269482135772705, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 135560 + }, + { + "epoch": 891.9078947368421, + "grad_norm": 1.1649383306503296, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 135570 + }, + { + "epoch": 891.9736842105264, + "grad_norm": 1.085970163345337, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 135580 + }, + { + "epoch": 892.0394736842105, + "grad_norm": 1.1776764392852783, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 135590 + }, + { + "epoch": 892.1052631578947, + "grad_norm": 1.2964081764221191, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 135600 + }, + { + "epoch": 892.171052631579, + "grad_norm": 1.1058318614959717, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 135610 + }, + { + "epoch": 892.2368421052631, + "grad_norm": 0.9314119815826416, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 135620 + }, + { + "epoch": 892.3026315789474, + "grad_norm": 1.4460657835006714, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 135630 + }, + { + "epoch": 892.3684210526316, + "grad_norm": 0.9084354043006897, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 135640 + }, + { + "epoch": 892.4342105263158, + "grad_norm": 1.1502026319503784, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 135650 + }, + { + "epoch": 892.5, + "grad_norm": 1.2194117307662964, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 135660 + }, + { + "epoch": 892.5657894736842, + "grad_norm": 0.9721207618713379, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 135670 + }, + { + "epoch": 892.6315789473684, + "grad_norm": 0.7545431852340698, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 135680 + }, + { + "epoch": 892.6973684210526, + "grad_norm": 0.8242561221122742, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 135690 + }, + { + "epoch": 892.7631578947369, + "grad_norm": 0.8406139612197876, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 135700 + }, + { + "epoch": 892.828947368421, + "grad_norm": 1.0956982374191284, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 135710 + }, + { + "epoch": 892.8947368421053, + "grad_norm": 0.8513756990432739, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 135720 + }, + { + "epoch": 892.9605263157895, + "grad_norm": 1.1040880680084229, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 135730 + }, + { + "epoch": 893.0263157894736, + "grad_norm": 1.076196551322937, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 135740 + }, + { + "epoch": 893.0921052631579, + "grad_norm": 1.1001917123794556, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 135750 + }, + { + "epoch": 893.1578947368421, + "grad_norm": 1.5378665924072266, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 135760 + }, + { + "epoch": 893.2236842105264, + "grad_norm": 1.166962742805481, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 135770 + }, + { + "epoch": 893.2894736842105, + "grad_norm": 0.90530925989151, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 135780 + }, + { + "epoch": 893.3552631578947, + "grad_norm": 1.342469334602356, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 135790 + }, + { + "epoch": 893.421052631579, + "grad_norm": 1.1305315494537354, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 135800 + }, + { + "epoch": 893.4868421052631, + "grad_norm": 0.8744671940803528, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 135810 + }, + { + "epoch": 893.5526315789474, + "grad_norm": 1.1060692071914673, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 135820 + }, + { + "epoch": 893.6184210526316, + "grad_norm": 1.1510794162750244, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 135830 + }, + { + "epoch": 893.6842105263158, + "grad_norm": 1.0159302949905396, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 135840 + }, + { + "epoch": 893.75, + "grad_norm": 1.0674599409103394, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 135850 + }, + { + "epoch": 893.8157894736842, + "grad_norm": 0.7770407199859619, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 135860 + }, + { + "epoch": 893.8815789473684, + "grad_norm": 1.1732828617095947, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 135870 + }, + { + "epoch": 893.9473684210526, + "grad_norm": 0.732951283454895, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 135880 + }, + { + "epoch": 894.0131578947369, + "grad_norm": 0.8434010148048401, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 135890 + }, + { + "epoch": 894.078947368421, + "grad_norm": 0.9561564922332764, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 135900 + }, + { + "epoch": 894.1447368421053, + "grad_norm": 1.377158761024475, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 135910 + }, + { + "epoch": 894.2105263157895, + "grad_norm": 1.1105989217758179, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 135920 + }, + { + "epoch": 894.2763157894736, + "grad_norm": 1.2619684934616089, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 135930 + }, + { + "epoch": 894.3421052631579, + "grad_norm": 1.026212453842163, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 135940 + }, + { + "epoch": 894.4078947368421, + "grad_norm": 0.7990017533302307, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 135950 + }, + { + "epoch": 894.4736842105264, + "grad_norm": 0.9422027468681335, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 135960 + }, + { + "epoch": 894.5394736842105, + "grad_norm": 0.796430230140686, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 135970 + }, + { + "epoch": 894.6052631578947, + "grad_norm": 1.2357088327407837, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 135980 + }, + { + "epoch": 894.671052631579, + "grad_norm": 1.2409306764602661, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 135990 + }, + { + "epoch": 894.7368421052631, + "grad_norm": 1.2816016674041748, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 136000 + }, + { + "epoch": 894.8026315789474, + "grad_norm": 0.6768025755882263, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 136010 + }, + { + "epoch": 894.8684210526316, + "grad_norm": 0.9706000685691833, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 136020 + }, + { + "epoch": 894.9342105263158, + "grad_norm": 0.9608473777770996, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 136030 + }, + { + "epoch": 895.0, + "grad_norm": 1.143330693244934, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 136040 + }, + { + "epoch": 895.0657894736842, + "grad_norm": 0.8923169374465942, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 136050 + }, + { + "epoch": 895.1315789473684, + "grad_norm": 0.7669290900230408, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 136060 + }, + { + "epoch": 895.1973684210526, + "grad_norm": 0.9958640933036804, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 136070 + }, + { + "epoch": 895.2631578947369, + "grad_norm": 1.1290862560272217, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 136080 + }, + { + "epoch": 895.328947368421, + "grad_norm": 0.9855304956436157, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 136090 + }, + { + "epoch": 895.3947368421053, + "grad_norm": 0.9755940437316895, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 136100 + }, + { + "epoch": 895.4605263157895, + "grad_norm": 0.8567885160446167, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 136110 + }, + { + "epoch": 895.5263157894736, + "grad_norm": 1.0460125207901, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 136120 + }, + { + "epoch": 895.5921052631579, + "grad_norm": 1.2655810117721558, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 136130 + }, + { + "epoch": 895.6578947368421, + "grad_norm": 0.951598048210144, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 136140 + }, + { + "epoch": 895.7236842105264, + "grad_norm": 1.1518741846084595, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 136150 + }, + { + "epoch": 895.7894736842105, + "grad_norm": 0.9149439334869385, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 136160 + }, + { + "epoch": 895.8552631578947, + "grad_norm": 1.0804107189178467, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 136170 + }, + { + "epoch": 895.921052631579, + "grad_norm": 1.2547401189804077, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 136180 + }, + { + "epoch": 895.9868421052631, + "grad_norm": 1.0589017868041992, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 136190 + }, + { + "epoch": 896.0526315789474, + "grad_norm": 1.040658950805664, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 136200 + }, + { + "epoch": 896.1184210526316, + "grad_norm": 1.1037824153900146, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 136210 + }, + { + "epoch": 896.1842105263158, + "grad_norm": 1.4946725368499756, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 136220 + }, + { + "epoch": 896.25, + "grad_norm": 1.519361972808838, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 136230 + }, + { + "epoch": 896.3157894736842, + "grad_norm": 1.1634271144866943, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 136240 + }, + { + "epoch": 896.3815789473684, + "grad_norm": 1.3984472751617432, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 136250 + }, + { + "epoch": 896.4473684210526, + "grad_norm": 1.7315332889556885, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 136260 + }, + { + "epoch": 896.5131578947369, + "grad_norm": 1.5922656059265137, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 136270 + }, + { + "epoch": 896.578947368421, + "grad_norm": 1.4069749116897583, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 136280 + }, + { + "epoch": 896.6447368421053, + "grad_norm": 1.244490146636963, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 136290 + }, + { + "epoch": 896.7105263157895, + "grad_norm": 1.1110283136367798, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 136300 + }, + { + "epoch": 896.7763157894736, + "grad_norm": 1.0167648792266846, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 136310 + }, + { + "epoch": 896.8421052631579, + "grad_norm": 1.137952208518982, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 136320 + }, + { + "epoch": 896.9078947368421, + "grad_norm": 1.0673314332962036, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 136330 + }, + { + "epoch": 896.9736842105264, + "grad_norm": 1.012425184249878, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 136340 + }, + { + "epoch": 897.0394736842105, + "grad_norm": 0.7594325542449951, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 136350 + }, + { + "epoch": 897.1052631578947, + "grad_norm": 1.1178792715072632, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 136360 + }, + { + "epoch": 897.171052631579, + "grad_norm": 1.0943617820739746, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 136370 + }, + { + "epoch": 897.2368421052631, + "grad_norm": 0.9920550584793091, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 136380 + }, + { + "epoch": 897.3026315789474, + "grad_norm": 1.0962131023406982, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 136390 + }, + { + "epoch": 897.3684210526316, + "grad_norm": 0.8344330787658691, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 136400 + }, + { + "epoch": 897.4342105263158, + "grad_norm": 1.1164144277572632, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 136410 + }, + { + "epoch": 897.5, + "grad_norm": 1.07270348072052, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 136420 + }, + { + "epoch": 897.5657894736842, + "grad_norm": 0.843373715877533, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 136430 + }, + { + "epoch": 897.6315789473684, + "grad_norm": 0.8556021451950073, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 136440 + }, + { + "epoch": 897.6973684210526, + "grad_norm": 0.8964880704879761, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 136450 + }, + { + "epoch": 897.7631578947369, + "grad_norm": 1.153839111328125, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 136460 + }, + { + "epoch": 897.828947368421, + "grad_norm": 1.0658332109451294, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 136470 + }, + { + "epoch": 897.8947368421053, + "grad_norm": 1.2250584363937378, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 136480 + }, + { + "epoch": 897.9605263157895, + "grad_norm": 1.1773885488510132, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 136490 + }, + { + "epoch": 898.0263157894736, + "grad_norm": 1.2498626708984375, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 136500 + }, + { + "epoch": 898.0921052631579, + "grad_norm": 0.980217695236206, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 136510 + }, + { + "epoch": 898.1578947368421, + "grad_norm": 0.8119854927062988, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 136520 + }, + { + "epoch": 898.2236842105264, + "grad_norm": 0.8097248077392578, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 136530 + }, + { + "epoch": 898.2894736842105, + "grad_norm": 1.080000638961792, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 136540 + }, + { + "epoch": 898.3552631578947, + "grad_norm": 1.0661876201629639, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 136550 + }, + { + "epoch": 898.421052631579, + "grad_norm": 1.1550641059875488, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 136560 + }, + { + "epoch": 898.4868421052631, + "grad_norm": 1.1951998472213745, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 136570 + }, + { + "epoch": 898.5526315789474, + "grad_norm": 0.6485775709152222, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 136580 + }, + { + "epoch": 898.6184210526316, + "grad_norm": 0.8973692059516907, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 136590 + }, + { + "epoch": 898.6842105263158, + "grad_norm": 0.7221608757972717, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 136600 + }, + { + "epoch": 898.75, + "grad_norm": 0.7160447835922241, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 136610 + }, + { + "epoch": 898.8157894736842, + "grad_norm": 0.9838132262229919, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 136620 + }, + { + "epoch": 898.8815789473684, + "grad_norm": 0.8849008679389954, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 136630 + }, + { + "epoch": 898.9473684210526, + "grad_norm": 1.1569770574569702, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 136640 + }, + { + "epoch": 899.0131578947369, + "grad_norm": 0.594029426574707, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 136650 + }, + { + "epoch": 899.078947368421, + "grad_norm": 0.8497085571289062, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 136660 + }, + { + "epoch": 899.1447368421053, + "grad_norm": 0.8578495383262634, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 136670 + }, + { + "epoch": 899.2105263157895, + "grad_norm": 1.1420329809188843, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 136680 + }, + { + "epoch": 899.2763157894736, + "grad_norm": 1.0156878232955933, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 136690 + }, + { + "epoch": 899.3421052631579, + "grad_norm": 0.8283445239067078, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 136700 + }, + { + "epoch": 899.4078947368421, + "grad_norm": 0.8237717747688293, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 136710 + }, + { + "epoch": 899.4736842105264, + "grad_norm": 0.9158399105072021, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 136720 + }, + { + "epoch": 899.5394736842105, + "grad_norm": 0.850222110748291, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 136730 + }, + { + "epoch": 899.6052631578947, + "grad_norm": 1.1023249626159668, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 136740 + }, + { + "epoch": 899.671052631579, + "grad_norm": 0.9188584685325623, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 136750 + }, + { + "epoch": 899.7368421052631, + "grad_norm": 1.0491944551467896, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 136760 + }, + { + "epoch": 899.8026315789474, + "grad_norm": 0.7473717927932739, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 136770 + }, + { + "epoch": 899.8684210526316, + "grad_norm": 0.8892393112182617, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 136780 + }, + { + "epoch": 899.9342105263158, + "grad_norm": 1.0245060920715332, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 136790 + }, + { + "epoch": 900.0, + "grad_norm": 1.3008759021759033, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 136800 + }, + { + "epoch": 900.0657894736842, + "grad_norm": 1.4540356397628784, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 136810 + }, + { + "epoch": 900.1315789473684, + "grad_norm": 1.342063069343567, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 136820 + }, + { + "epoch": 900.1973684210526, + "grad_norm": 1.1454083919525146, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 136830 + }, + { + "epoch": 900.2631578947369, + "grad_norm": 1.6022448539733887, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 136840 + }, + { + "epoch": 900.328947368421, + "grad_norm": 0.9221236109733582, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 136850 + }, + { + "epoch": 900.3947368421053, + "grad_norm": 1.3476216793060303, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 136860 + }, + { + "epoch": 900.4605263157895, + "grad_norm": 0.8298095464706421, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 136870 + }, + { + "epoch": 900.5263157894736, + "grad_norm": 0.7791982293128967, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 136880 + }, + { + "epoch": 900.5921052631579, + "grad_norm": 1.068069577217102, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 136890 + }, + { + "epoch": 900.6578947368421, + "grad_norm": 0.7247030138969421, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 136900 + }, + { + "epoch": 900.7236842105264, + "grad_norm": 0.8441376686096191, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 136910 + }, + { + "epoch": 900.7894736842105, + "grad_norm": 1.0622183084487915, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 136920 + }, + { + "epoch": 900.8552631578947, + "grad_norm": 0.9633458256721497, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 136930 + }, + { + "epoch": 900.921052631579, + "grad_norm": 1.0135974884033203, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 136940 + }, + { + "epoch": 900.9868421052631, + "grad_norm": 0.9505342841148376, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 136950 + }, + { + "epoch": 901.0526315789474, + "grad_norm": 0.7464510202407837, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 136960 + }, + { + "epoch": 901.1184210526316, + "grad_norm": 1.1256731748580933, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 136970 + }, + { + "epoch": 901.1842105263158, + "grad_norm": 1.1023762226104736, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 136980 + }, + { + "epoch": 901.25, + "grad_norm": 1.3419615030288696, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 136990 + }, + { + "epoch": 901.3157894736842, + "grad_norm": 1.1724894046783447, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 137000 + }, + { + "epoch": 901.3815789473684, + "grad_norm": 1.1471381187438965, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 137010 + }, + { + "epoch": 901.4473684210526, + "grad_norm": 1.5692983865737915, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 137020 + }, + { + "epoch": 901.5131578947369, + "grad_norm": 1.1495898962020874, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 137030 + }, + { + "epoch": 901.578947368421, + "grad_norm": 0.7561367154121399, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 137040 + }, + { + "epoch": 901.6447368421053, + "grad_norm": 0.8060752749443054, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 137050 + }, + { + "epoch": 901.7105263157895, + "grad_norm": 1.0309655666351318, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 137060 + }, + { + "epoch": 901.7763157894736, + "grad_norm": 1.3897480964660645, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 137070 + }, + { + "epoch": 901.8421052631579, + "grad_norm": 1.1561673879623413, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 137080 + }, + { + "epoch": 901.9078947368421, + "grad_norm": 1.1159383058547974, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 137090 + }, + { + "epoch": 901.9736842105264, + "grad_norm": 1.2140202522277832, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 137100 + }, + { + "epoch": 902.0394736842105, + "grad_norm": 1.0120538473129272, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 137110 + }, + { + "epoch": 902.1052631578947, + "grad_norm": 1.3522586822509766, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 137120 + }, + { + "epoch": 902.171052631579, + "grad_norm": 1.1707468032836914, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 137130 + }, + { + "epoch": 902.2368421052631, + "grad_norm": 1.209934949874878, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 137140 + }, + { + "epoch": 902.3026315789474, + "grad_norm": 0.8782662153244019, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 137150 + }, + { + "epoch": 902.3684210526316, + "grad_norm": 0.9009442329406738, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 137160 + }, + { + "epoch": 902.4342105263158, + "grad_norm": 1.024381399154663, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 137170 + }, + { + "epoch": 902.5, + "grad_norm": 1.0990251302719116, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 137180 + }, + { + "epoch": 902.5657894736842, + "grad_norm": 1.1666136980056763, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 137190 + }, + { + "epoch": 902.6315789473684, + "grad_norm": 1.1644891500473022, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 137200 + }, + { + "epoch": 902.6973684210526, + "grad_norm": 1.3089399337768555, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 137210 + }, + { + "epoch": 902.7631578947369, + "grad_norm": 1.2203880548477173, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 137220 + }, + { + "epoch": 902.828947368421, + "grad_norm": 1.0612380504608154, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 137230 + }, + { + "epoch": 902.8947368421053, + "grad_norm": 1.1838760375976562, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 137240 + }, + { + "epoch": 902.9605263157895, + "grad_norm": 1.0632514953613281, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 137250 + }, + { + "epoch": 903.0263157894736, + "grad_norm": 0.9610679745674133, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 137260 + }, + { + "epoch": 903.0921052631579, + "grad_norm": 0.670998215675354, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 137270 + }, + { + "epoch": 903.1578947368421, + "grad_norm": 0.9140186905860901, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 137280 + }, + { + "epoch": 903.2236842105264, + "grad_norm": 1.182522177696228, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 137290 + }, + { + "epoch": 903.2894736842105, + "grad_norm": 0.9858769774436951, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 137300 + }, + { + "epoch": 903.3552631578947, + "grad_norm": 1.1477785110473633, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 137310 + }, + { + "epoch": 903.421052631579, + "grad_norm": 1.2154635190963745, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 137320 + }, + { + "epoch": 903.4868421052631, + "grad_norm": 0.9853901863098145, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 137330 + }, + { + "epoch": 903.5526315789474, + "grad_norm": 1.2945221662521362, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 137340 + }, + { + "epoch": 903.6184210526316, + "grad_norm": 1.0276622772216797, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 137350 + }, + { + "epoch": 903.6842105263158, + "grad_norm": 0.8371639251708984, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 137360 + }, + { + "epoch": 903.75, + "grad_norm": 1.3738861083984375, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 137370 + }, + { + "epoch": 903.8157894736842, + "grad_norm": 0.9806889295578003, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 137380 + }, + { + "epoch": 903.8815789473684, + "grad_norm": 1.3452116250991821, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 137390 + }, + { + "epoch": 903.9473684210526, + "grad_norm": 1.2465362548828125, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 137400 + }, + { + "epoch": 904.0131578947369, + "grad_norm": 0.8776252269744873, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 137410 + }, + { + "epoch": 904.078947368421, + "grad_norm": 0.8679521083831787, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 137420 + }, + { + "epoch": 904.1447368421053, + "grad_norm": 1.0201027393341064, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 137430 + }, + { + "epoch": 904.2105263157895, + "grad_norm": 1.2104623317718506, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 137440 + }, + { + "epoch": 904.2763157894736, + "grad_norm": 0.9057734608650208, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 137450 + }, + { + "epoch": 904.3421052631579, + "grad_norm": 1.0167741775512695, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 137460 + }, + { + "epoch": 904.4078947368421, + "grad_norm": 0.5943556427955627, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 137470 + }, + { + "epoch": 904.4736842105264, + "grad_norm": 0.8920695781707764, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 137480 + }, + { + "epoch": 904.5394736842105, + "grad_norm": 0.9045788645744324, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 137490 + }, + { + "epoch": 904.6052631578947, + "grad_norm": 0.9521625638008118, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 137500 + }, + { + "epoch": 904.671052631579, + "grad_norm": 0.8317375779151917, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 137510 + }, + { + "epoch": 904.7368421052631, + "grad_norm": 0.6917269229888916, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 137520 + }, + { + "epoch": 904.8026315789474, + "grad_norm": 1.1361898183822632, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 137530 + }, + { + "epoch": 904.8684210526316, + "grad_norm": 1.2201398611068726, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 137540 + }, + { + "epoch": 904.9342105263158, + "grad_norm": 0.8375370502471924, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 137550 + }, + { + "epoch": 905.0, + "grad_norm": 1.1140497922897339, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 137560 + }, + { + "epoch": 905.0657894736842, + "grad_norm": 1.0850721597671509, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 137570 + }, + { + "epoch": 905.1315789473684, + "grad_norm": 0.9781196117401123, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 137580 + }, + { + "epoch": 905.1973684210526, + "grad_norm": 1.1050325632095337, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 137590 + }, + { + "epoch": 905.2631578947369, + "grad_norm": 0.8729236721992493, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 137600 + }, + { + "epoch": 905.328947368421, + "grad_norm": 1.088930368423462, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 137610 + }, + { + "epoch": 905.3947368421053, + "grad_norm": 1.292374849319458, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 137620 + }, + { + "epoch": 905.4605263157895, + "grad_norm": 0.9945433735847473, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 137630 + }, + { + "epoch": 905.5263157894736, + "grad_norm": 1.1098923683166504, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 137640 + }, + { + "epoch": 905.5921052631579, + "grad_norm": 1.15111243724823, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 137650 + }, + { + "epoch": 905.6578947368421, + "grad_norm": 0.8712033629417419, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 137660 + }, + { + "epoch": 905.7236842105264, + "grad_norm": 1.0952752828598022, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 137670 + }, + { + "epoch": 905.7894736842105, + "grad_norm": 1.2315001487731934, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 137680 + }, + { + "epoch": 905.8552631578947, + "grad_norm": 1.5495014190673828, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 137690 + }, + { + "epoch": 905.921052631579, + "grad_norm": 1.7285585403442383, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 137700 + }, + { + "epoch": 905.9868421052631, + "grad_norm": 1.7616963386535645, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 137710 + }, + { + "epoch": 906.0526315789474, + "grad_norm": 1.4196006059646606, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 137720 + }, + { + "epoch": 906.1184210526316, + "grad_norm": 1.3745293617248535, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 137730 + }, + { + "epoch": 906.1842105263158, + "grad_norm": 0.8986107707023621, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 137740 + }, + { + "epoch": 906.25, + "grad_norm": 0.9261276721954346, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 137750 + }, + { + "epoch": 906.3157894736842, + "grad_norm": 1.2320727109909058, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 137760 + }, + { + "epoch": 906.3815789473684, + "grad_norm": 1.3749173879623413, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 137770 + }, + { + "epoch": 906.4473684210526, + "grad_norm": 1.2523115873336792, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 137780 + }, + { + "epoch": 906.5131578947369, + "grad_norm": 0.9561618566513062, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 137790 + }, + { + "epoch": 906.578947368421, + "grad_norm": 1.0065858364105225, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 137800 + }, + { + "epoch": 906.6447368421053, + "grad_norm": 1.0665801763534546, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 137810 + }, + { + "epoch": 906.7105263157895, + "grad_norm": 1.0746973752975464, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 137820 + }, + { + "epoch": 906.7763157894736, + "grad_norm": 0.6593524217605591, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 137830 + }, + { + "epoch": 906.8421052631579, + "grad_norm": 0.9232041239738464, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 137840 + }, + { + "epoch": 906.9078947368421, + "grad_norm": 0.9710672497749329, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 137850 + }, + { + "epoch": 906.9736842105264, + "grad_norm": 1.034181833267212, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 137860 + }, + { + "epoch": 907.0394736842105, + "grad_norm": 0.5571140646934509, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 137870 + }, + { + "epoch": 907.1052631578947, + "grad_norm": 1.2709097862243652, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 137880 + }, + { + "epoch": 907.171052631579, + "grad_norm": 1.4115272760391235, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 137890 + }, + { + "epoch": 907.2368421052631, + "grad_norm": 1.275320291519165, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 137900 + }, + { + "epoch": 907.3026315789474, + "grad_norm": 0.980765163898468, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 137910 + }, + { + "epoch": 907.3684210526316, + "grad_norm": 0.9128990173339844, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 137920 + }, + { + "epoch": 907.4342105263158, + "grad_norm": 0.8334782719612122, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 137930 + }, + { + "epoch": 907.5, + "grad_norm": 0.7317301630973816, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 137940 + }, + { + "epoch": 907.5657894736842, + "grad_norm": 1.2653545141220093, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 137950 + }, + { + "epoch": 907.6315789473684, + "grad_norm": 1.2031755447387695, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 137960 + }, + { + "epoch": 907.6973684210526, + "grad_norm": 0.6481014490127563, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 137970 + }, + { + "epoch": 907.7631578947369, + "grad_norm": 1.1213470697402954, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 137980 + }, + { + "epoch": 907.828947368421, + "grad_norm": 1.4939583539962769, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 137990 + }, + { + "epoch": 907.8947368421053, + "grad_norm": 1.7831441164016724, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 138000 + }, + { + "epoch": 907.9605263157895, + "grad_norm": 1.7211501598358154, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 138010 + }, + { + "epoch": 908.0263157894736, + "grad_norm": 1.5099759101867676, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 138020 + }, + { + "epoch": 908.0921052631579, + "grad_norm": 1.137043833732605, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 138030 + }, + { + "epoch": 908.1578947368421, + "grad_norm": 1.2676162719726562, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 138040 + }, + { + "epoch": 908.2236842105264, + "grad_norm": 0.994552731513977, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 138050 + }, + { + "epoch": 908.2894736842105, + "grad_norm": 1.0138901472091675, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 138060 + }, + { + "epoch": 908.3552631578947, + "grad_norm": 0.9943720698356628, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 138070 + }, + { + "epoch": 908.421052631579, + "grad_norm": 1.1211268901824951, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 138080 + }, + { + "epoch": 908.4868421052631, + "grad_norm": 1.2332216501235962, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 138090 + }, + { + "epoch": 908.5526315789474, + "grad_norm": 1.1603976488113403, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 138100 + }, + { + "epoch": 908.6184210526316, + "grad_norm": 0.789871871471405, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 138110 + }, + { + "epoch": 908.6842105263158, + "grad_norm": 1.0970319509506226, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 138120 + }, + { + "epoch": 908.75, + "grad_norm": 1.0479586124420166, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 138130 + }, + { + "epoch": 908.8157894736842, + "grad_norm": 0.932201623916626, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 138140 + }, + { + "epoch": 908.8815789473684, + "grad_norm": 0.9041379690170288, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 138150 + }, + { + "epoch": 908.9473684210526, + "grad_norm": 0.9784705638885498, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 138160 + }, + { + "epoch": 909.0131578947369, + "grad_norm": 1.0539556741714478, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 138170 + }, + { + "epoch": 909.078947368421, + "grad_norm": 1.054840087890625, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 138180 + }, + { + "epoch": 909.1447368421053, + "grad_norm": 0.817751944065094, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 138190 + }, + { + "epoch": 909.2105263157895, + "grad_norm": 1.2767053842544556, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 138200 + }, + { + "epoch": 909.2763157894736, + "grad_norm": 0.9269699454307556, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 138210 + }, + { + "epoch": 909.3421052631579, + "grad_norm": 0.7192962765693665, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 138220 + }, + { + "epoch": 909.4078947368421, + "grad_norm": 1.0182446241378784, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 138230 + }, + { + "epoch": 909.4736842105264, + "grad_norm": 0.8939855694770813, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 138240 + }, + { + "epoch": 909.5394736842105, + "grad_norm": 1.3759862184524536, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 138250 + }, + { + "epoch": 909.6052631578947, + "grad_norm": 1.0938113927841187, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 138260 + }, + { + "epoch": 909.671052631579, + "grad_norm": 1.2169560194015503, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 138270 + }, + { + "epoch": 909.7368421052631, + "grad_norm": 0.8270349502563477, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 138280 + }, + { + "epoch": 909.8026315789474, + "grad_norm": 0.9773039221763611, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 138290 + }, + { + "epoch": 909.8684210526316, + "grad_norm": 0.9139556884765625, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 138300 + }, + { + "epoch": 909.9342105263158, + "grad_norm": 1.0602258443832397, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 138310 + }, + { + "epoch": 910.0, + "grad_norm": 1.1161372661590576, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 138320 + }, + { + "epoch": 910.0657894736842, + "grad_norm": 1.1189671754837036, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 138330 + }, + { + "epoch": 910.1315789473684, + "grad_norm": 0.8698909282684326, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 138340 + }, + { + "epoch": 910.1973684210526, + "grad_norm": 0.9428373575210571, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 138350 + }, + { + "epoch": 910.2631578947369, + "grad_norm": 0.6504638195037842, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 138360 + }, + { + "epoch": 910.328947368421, + "grad_norm": 0.8180758357048035, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 138370 + }, + { + "epoch": 910.3947368421053, + "grad_norm": 1.1417499780654907, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 138380 + }, + { + "epoch": 910.4605263157895, + "grad_norm": 0.8425560593605042, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 138390 + }, + { + "epoch": 910.5263157894736, + "grad_norm": 1.2811640501022339, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 138400 + }, + { + "epoch": 910.5921052631579, + "grad_norm": 1.0059995651245117, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 138410 + }, + { + "epoch": 910.6578947368421, + "grad_norm": 1.4118963479995728, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 138420 + }, + { + "epoch": 910.7236842105264, + "grad_norm": 1.055229663848877, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 138430 + }, + { + "epoch": 910.7894736842105, + "grad_norm": 1.1390751600265503, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 138440 + }, + { + "epoch": 910.8552631578947, + "grad_norm": 1.3545476198196411, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 138450 + }, + { + "epoch": 910.921052631579, + "grad_norm": 1.0266139507293701, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 138460 + }, + { + "epoch": 910.9868421052631, + "grad_norm": 1.0077507495880127, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 138470 + }, + { + "epoch": 911.0526315789474, + "grad_norm": 1.1615369319915771, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 138480 + }, + { + "epoch": 911.1184210526316, + "grad_norm": 1.2945265769958496, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 138490 + }, + { + "epoch": 911.1842105263158, + "grad_norm": 0.7563729286193848, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 138500 + }, + { + "epoch": 911.25, + "grad_norm": 0.941920280456543, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 138510 + }, + { + "epoch": 911.3157894736842, + "grad_norm": 0.8620559573173523, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 138520 + }, + { + "epoch": 911.3815789473684, + "grad_norm": 1.0454643964767456, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 138530 + }, + { + "epoch": 911.4473684210526, + "grad_norm": 1.2060420513153076, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 138540 + }, + { + "epoch": 911.5131578947369, + "grad_norm": 1.0942254066467285, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 138550 + }, + { + "epoch": 911.578947368421, + "grad_norm": 0.7879404425621033, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 138560 + }, + { + "epoch": 911.6447368421053, + "grad_norm": 1.4239997863769531, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 138570 + }, + { + "epoch": 911.7105263157895, + "grad_norm": 1.384261965751648, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 138580 + }, + { + "epoch": 911.7763157894736, + "grad_norm": 1.2264975309371948, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 138590 + }, + { + "epoch": 911.8421052631579, + "grad_norm": 1.1380722522735596, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 138600 + }, + { + "epoch": 911.9078947368421, + "grad_norm": 1.4093822240829468, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 138610 + }, + { + "epoch": 911.9736842105264, + "grad_norm": 0.9658661484718323, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 138620 + }, + { + "epoch": 912.0394736842105, + "grad_norm": 1.6494758129119873, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 138630 + }, + { + "epoch": 912.1052631578947, + "grad_norm": 1.223433494567871, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 138640 + }, + { + "epoch": 912.171052631579, + "grad_norm": 1.1293435096740723, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 138650 + }, + { + "epoch": 912.2368421052631, + "grad_norm": 1.3169997930526733, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 138660 + }, + { + "epoch": 912.3026315789474, + "grad_norm": 1.1258878707885742, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 138670 + }, + { + "epoch": 912.3684210526316, + "grad_norm": 1.0870180130004883, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 138680 + }, + { + "epoch": 912.4342105263158, + "grad_norm": 1.2303227186203003, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 138690 + }, + { + "epoch": 912.5, + "grad_norm": 1.3241380453109741, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 138700 + }, + { + "epoch": 912.5657894736842, + "grad_norm": 0.9058986306190491, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 138710 + }, + { + "epoch": 912.6315789473684, + "grad_norm": 1.2740260362625122, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 138720 + }, + { + "epoch": 912.6973684210526, + "grad_norm": 1.073053240776062, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 138730 + }, + { + "epoch": 912.7631578947369, + "grad_norm": 0.9886819124221802, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 138740 + }, + { + "epoch": 912.828947368421, + "grad_norm": 1.464430332183838, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 138750 + }, + { + "epoch": 912.8947368421053, + "grad_norm": 1.4598714113235474, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 138760 + }, + { + "epoch": 912.9605263157895, + "grad_norm": 1.233940839767456, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 138770 + }, + { + "epoch": 913.0263157894736, + "grad_norm": 0.9200678467750549, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 138780 + }, + { + "epoch": 913.0921052631579, + "grad_norm": 0.763102650642395, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 138790 + }, + { + "epoch": 913.1578947368421, + "grad_norm": 0.944960355758667, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 138800 + }, + { + "epoch": 913.2236842105264, + "grad_norm": 0.9596688747406006, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 138810 + }, + { + "epoch": 913.2894736842105, + "grad_norm": 0.8753271102905273, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 138820 + }, + { + "epoch": 913.3552631578947, + "grad_norm": 0.8596283197402954, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 138830 + }, + { + "epoch": 913.421052631579, + "grad_norm": 0.9290410280227661, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 138840 + }, + { + "epoch": 913.4868421052631, + "grad_norm": 0.703997015953064, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 138850 + }, + { + "epoch": 913.5526315789474, + "grad_norm": 0.7406630516052246, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 138860 + }, + { + "epoch": 913.6184210526316, + "grad_norm": 0.5769844055175781, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 138870 + }, + { + "epoch": 913.6842105263158, + "grad_norm": 0.8807429075241089, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 138880 + }, + { + "epoch": 913.75, + "grad_norm": 1.0065295696258545, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 138890 + }, + { + "epoch": 913.8157894736842, + "grad_norm": 0.9491748213768005, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 138900 + }, + { + "epoch": 913.8815789473684, + "grad_norm": 0.8654152750968933, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 138910 + }, + { + "epoch": 913.9473684210526, + "grad_norm": 0.9335588216781616, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 138920 + }, + { + "epoch": 914.0131578947369, + "grad_norm": 1.165829062461853, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 138930 + }, + { + "epoch": 914.078947368421, + "grad_norm": 0.8912755250930786, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 138940 + }, + { + "epoch": 914.1447368421053, + "grad_norm": 1.1820372343063354, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 138950 + }, + { + "epoch": 914.2105263157895, + "grad_norm": 1.3358891010284424, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 138960 + }, + { + "epoch": 914.2763157894736, + "grad_norm": 0.878984272480011, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 138970 + }, + { + "epoch": 914.3421052631579, + "grad_norm": 1.2799499034881592, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 138980 + }, + { + "epoch": 914.4078947368421, + "grad_norm": 0.9388821125030518, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 138990 + }, + { + "epoch": 914.4736842105264, + "grad_norm": 1.0505149364471436, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 139000 + }, + { + "epoch": 914.5394736842105, + "grad_norm": 0.869010329246521, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 139010 + }, + { + "epoch": 914.6052631578947, + "grad_norm": 0.8790243864059448, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 139020 + }, + { + "epoch": 914.671052631579, + "grad_norm": 0.7046595811843872, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 139030 + }, + { + "epoch": 914.7368421052631, + "grad_norm": 0.9645804166793823, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 139040 + }, + { + "epoch": 914.8026315789474, + "grad_norm": 1.3589966297149658, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 139050 + }, + { + "epoch": 914.8684210526316, + "grad_norm": 1.1540025472640991, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 139060 + }, + { + "epoch": 914.9342105263158, + "grad_norm": 0.794073224067688, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 139070 + }, + { + "epoch": 915.0, + "grad_norm": 0.9464465975761414, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 139080 + }, + { + "epoch": 915.0657894736842, + "grad_norm": 1.0088104009628296, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 139090 + }, + { + "epoch": 915.1315789473684, + "grad_norm": 1.040697693824768, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 139100 + }, + { + "epoch": 915.1973684210526, + "grad_norm": 1.2374768257141113, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 139110 + }, + { + "epoch": 915.2631578947369, + "grad_norm": 1.1254475116729736, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 139120 + }, + { + "epoch": 915.328947368421, + "grad_norm": 1.0068753957748413, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 139130 + }, + { + "epoch": 915.3947368421053, + "grad_norm": 1.0009926557540894, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 139140 + }, + { + "epoch": 915.4605263157895, + "grad_norm": 1.104837417602539, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 139150 + }, + { + "epoch": 915.5263157894736, + "grad_norm": 0.9055717587471008, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 139160 + }, + { + "epoch": 915.5921052631579, + "grad_norm": 1.2613590955734253, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 139170 + }, + { + "epoch": 915.6578947368421, + "grad_norm": 0.9277036786079407, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 139180 + }, + { + "epoch": 915.7236842105264, + "grad_norm": 0.8551871180534363, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 139190 + }, + { + "epoch": 915.7894736842105, + "grad_norm": 1.1076236963272095, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 139200 + }, + { + "epoch": 915.8552631578947, + "grad_norm": 1.120894432067871, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 139210 + }, + { + "epoch": 915.921052631579, + "grad_norm": 0.8614113926887512, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 139220 + }, + { + "epoch": 915.9868421052631, + "grad_norm": 0.8355217576026917, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 139230 + }, + { + "epoch": 916.0526315789474, + "grad_norm": 0.9857189655303955, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 139240 + }, + { + "epoch": 916.1184210526316, + "grad_norm": 0.8549261689186096, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 139250 + }, + { + "epoch": 916.1842105263158, + "grad_norm": 1.0027446746826172, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 139260 + }, + { + "epoch": 916.25, + "grad_norm": 0.9751135110855103, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 139270 + }, + { + "epoch": 916.3157894736842, + "grad_norm": 1.3326411247253418, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 139280 + }, + { + "epoch": 916.3815789473684, + "grad_norm": 0.8798542618751526, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 139290 + }, + { + "epoch": 916.4473684210526, + "grad_norm": 1.432790994644165, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 139300 + }, + { + "epoch": 916.5131578947369, + "grad_norm": 1.3682509660720825, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 139310 + }, + { + "epoch": 916.578947368421, + "grad_norm": 0.9973912835121155, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 139320 + }, + { + "epoch": 916.6447368421053, + "grad_norm": 1.2774707078933716, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 139330 + }, + { + "epoch": 916.7105263157895, + "grad_norm": 1.1853746175765991, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 139340 + }, + { + "epoch": 916.7763157894736, + "grad_norm": 1.0939549207687378, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 139350 + }, + { + "epoch": 916.8421052631579, + "grad_norm": 1.6797022819519043, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 139360 + }, + { + "epoch": 916.9078947368421, + "grad_norm": 0.9860899448394775, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 139370 + }, + { + "epoch": 916.9736842105264, + "grad_norm": 1.0874377489089966, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 139380 + }, + { + "epoch": 917.0394736842105, + "grad_norm": 0.8851874470710754, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 139390 + }, + { + "epoch": 917.1052631578947, + "grad_norm": 1.039806604385376, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 139400 + }, + { + "epoch": 917.171052631579, + "grad_norm": 1.1718597412109375, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 139410 + }, + { + "epoch": 917.2368421052631, + "grad_norm": 0.9298036098480225, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 139420 + }, + { + "epoch": 917.3026315789474, + "grad_norm": 1.1916438341140747, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 139430 + }, + { + "epoch": 917.3684210526316, + "grad_norm": 1.2922661304473877, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 139440 + }, + { + "epoch": 917.4342105263158, + "grad_norm": 1.0661784410476685, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 139450 + }, + { + "epoch": 917.5, + "grad_norm": 1.1906532049179077, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 139460 + }, + { + "epoch": 917.5657894736842, + "grad_norm": 1.1400047540664673, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 139470 + }, + { + "epoch": 917.6315789473684, + "grad_norm": 1.0732362270355225, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 139480 + }, + { + "epoch": 917.6973684210526, + "grad_norm": 0.8187721371650696, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 139490 + }, + { + "epoch": 917.7631578947369, + "grad_norm": 0.8629974722862244, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 139500 + }, + { + "epoch": 917.828947368421, + "grad_norm": 1.0016518831253052, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 139510 + }, + { + "epoch": 917.8947368421053, + "grad_norm": 0.9016565680503845, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 139520 + }, + { + "epoch": 917.9605263157895, + "grad_norm": 0.7531505227088928, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 139530 + }, + { + "epoch": 918.0263157894736, + "grad_norm": 0.846524178981781, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 139540 + }, + { + "epoch": 918.0921052631579, + "grad_norm": 1.01736581325531, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 139550 + }, + { + "epoch": 918.1578947368421, + "grad_norm": 0.9176644086837769, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 139560 + }, + { + "epoch": 918.2236842105264, + "grad_norm": 1.1797696352005005, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 139570 + }, + { + "epoch": 918.2894736842105, + "grad_norm": 0.9965087175369263, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 139580 + }, + { + "epoch": 918.3552631578947, + "grad_norm": 1.053809642791748, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 139590 + }, + { + "epoch": 918.421052631579, + "grad_norm": 0.8142527341842651, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 139600 + }, + { + "epoch": 918.4868421052631, + "grad_norm": 1.2485212087631226, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 139610 + }, + { + "epoch": 918.5526315789474, + "grad_norm": 0.9339953660964966, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 139620 + }, + { + "epoch": 918.6184210526316, + "grad_norm": 1.036667823791504, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 139630 + }, + { + "epoch": 918.6842105263158, + "grad_norm": 0.8027102947235107, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 139640 + }, + { + "epoch": 918.75, + "grad_norm": 0.7558383345603943, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 139650 + }, + { + "epoch": 918.8157894736842, + "grad_norm": 0.6426530480384827, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 139660 + }, + { + "epoch": 918.8815789473684, + "grad_norm": 0.6960042715072632, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 139670 + }, + { + "epoch": 918.9473684210526, + "grad_norm": 0.6835951209068298, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 139680 + }, + { + "epoch": 919.0131578947369, + "grad_norm": 1.270308017730713, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 139690 + }, + { + "epoch": 919.078947368421, + "grad_norm": 1.1782221794128418, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 139700 + }, + { + "epoch": 919.1447368421053, + "grad_norm": 1.0767455101013184, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 139710 + }, + { + "epoch": 919.2105263157895, + "grad_norm": 0.8092596530914307, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 139720 + }, + { + "epoch": 919.2763157894736, + "grad_norm": 0.9447590112686157, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 139730 + }, + { + "epoch": 919.3421052631579, + "grad_norm": 1.1488889455795288, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 139740 + }, + { + "epoch": 919.4078947368421, + "grad_norm": 0.6854469180107117, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 139750 + }, + { + "epoch": 919.4736842105264, + "grad_norm": 0.9892016649246216, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 139760 + }, + { + "epoch": 919.5394736842105, + "grad_norm": 1.0678046941757202, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 139770 + }, + { + "epoch": 919.6052631578947, + "grad_norm": 1.0261939764022827, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 139780 + }, + { + "epoch": 919.671052631579, + "grad_norm": 0.7115074396133423, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 139790 + }, + { + "epoch": 919.7368421052631, + "grad_norm": 0.8169745802879333, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 139800 + }, + { + "epoch": 919.8026315789474, + "grad_norm": 0.951320469379425, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 139810 + }, + { + "epoch": 919.8684210526316, + "grad_norm": 1.1289328336715698, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 139820 + }, + { + "epoch": 919.9342105263158, + "grad_norm": 0.8910213112831116, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 139830 + }, + { + "epoch": 920.0, + "grad_norm": 1.091223120689392, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 139840 + }, + { + "epoch": 920.0657894736842, + "grad_norm": 1.1098347902297974, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 139850 + }, + { + "epoch": 920.1315789473684, + "grad_norm": 0.8560648560523987, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 139860 + }, + { + "epoch": 920.1973684210526, + "grad_norm": 0.8434080481529236, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 139870 + }, + { + "epoch": 920.2631578947369, + "grad_norm": 1.10075044631958, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 139880 + }, + { + "epoch": 920.328947368421, + "grad_norm": 1.0007864236831665, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 139890 + }, + { + "epoch": 920.3947368421053, + "grad_norm": 0.9732034802436829, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 139900 + }, + { + "epoch": 920.4605263157895, + "grad_norm": 0.9646409749984741, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 139910 + }, + { + "epoch": 920.5263157894736, + "grad_norm": 0.9488770961761475, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 139920 + }, + { + "epoch": 920.5921052631579, + "grad_norm": 1.1824666261672974, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 139930 + }, + { + "epoch": 920.6578947368421, + "grad_norm": 0.9511621594429016, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 139940 + }, + { + "epoch": 920.7236842105264, + "grad_norm": 1.0742391347885132, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 139950 + }, + { + "epoch": 920.7894736842105, + "grad_norm": 1.4730793237686157, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 139960 + }, + { + "epoch": 920.8552631578947, + "grad_norm": 1.3852665424346924, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 139970 + }, + { + "epoch": 920.921052631579, + "grad_norm": 1.0065243244171143, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 139980 + }, + { + "epoch": 920.9868421052631, + "grad_norm": 1.2180964946746826, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 139990 + }, + { + "epoch": 921.0526315789474, + "grad_norm": 1.0379401445388794, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 140000 + }, + { + "epoch": 921.1184210526316, + "grad_norm": 1.255040168762207, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 140010 + }, + { + "epoch": 921.1842105263158, + "grad_norm": 1.2452787160873413, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 140020 + }, + { + "epoch": 921.25, + "grad_norm": 1.25413978099823, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 140030 + }, + { + "epoch": 921.3157894736842, + "grad_norm": 0.6751289963722229, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 140040 + }, + { + "epoch": 921.3815789473684, + "grad_norm": 1.1533355712890625, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 140050 + }, + { + "epoch": 921.4473684210526, + "grad_norm": 0.6121186017990112, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 140060 + }, + { + "epoch": 921.5131578947369, + "grad_norm": 1.2446112632751465, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 140070 + }, + { + "epoch": 921.578947368421, + "grad_norm": 1.057027816772461, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 140080 + }, + { + "epoch": 921.6447368421053, + "grad_norm": 0.7790706753730774, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 140090 + }, + { + "epoch": 921.7105263157895, + "grad_norm": 0.9158756732940674, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 140100 + }, + { + "epoch": 921.7763157894736, + "grad_norm": 0.7222694754600525, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 140110 + }, + { + "epoch": 921.8421052631579, + "grad_norm": 1.073899269104004, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 140120 + }, + { + "epoch": 921.9078947368421, + "grad_norm": 1.1622812747955322, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 140130 + }, + { + "epoch": 921.9736842105264, + "grad_norm": 1.201113224029541, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 140140 + }, + { + "epoch": 922.0394736842105, + "grad_norm": 1.0527993440628052, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 140150 + }, + { + "epoch": 922.1052631578947, + "grad_norm": 0.7997422814369202, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 140160 + }, + { + "epoch": 922.171052631579, + "grad_norm": 0.9327260851860046, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 140170 + }, + { + "epoch": 922.2368421052631, + "grad_norm": 0.844416618347168, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 140180 + }, + { + "epoch": 922.3026315789474, + "grad_norm": 0.9128046035766602, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 140190 + }, + { + "epoch": 922.3684210526316, + "grad_norm": 0.7824735045433044, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 140200 + }, + { + "epoch": 922.4342105263158, + "grad_norm": 0.961676836013794, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 140210 + }, + { + "epoch": 922.5, + "grad_norm": 0.7900063991546631, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 140220 + }, + { + "epoch": 922.5657894736842, + "grad_norm": 1.3658584356307983, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 140230 + }, + { + "epoch": 922.6315789473684, + "grad_norm": 1.0145326852798462, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 140240 + }, + { + "epoch": 922.6973684210526, + "grad_norm": 1.2029974460601807, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 140250 + }, + { + "epoch": 922.7631578947369, + "grad_norm": 1.2081828117370605, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 140260 + }, + { + "epoch": 922.828947368421, + "grad_norm": 1.177101492881775, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 140270 + }, + { + "epoch": 922.8947368421053, + "grad_norm": 1.2548049688339233, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 140280 + }, + { + "epoch": 922.9605263157895, + "grad_norm": 1.279402732849121, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 140290 + }, + { + "epoch": 923.0263157894736, + "grad_norm": 0.8556655645370483, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 140300 + }, + { + "epoch": 923.0921052631579, + "grad_norm": 0.7919434905052185, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 140310 + }, + { + "epoch": 923.1578947368421, + "grad_norm": 1.0582438707351685, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 140320 + }, + { + "epoch": 923.2236842105264, + "grad_norm": 0.8901892304420471, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 140330 + }, + { + "epoch": 923.2894736842105, + "grad_norm": 0.9587327837944031, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 140340 + }, + { + "epoch": 923.3552631578947, + "grad_norm": 0.9833970665931702, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 140350 + }, + { + "epoch": 923.421052631579, + "grad_norm": 0.9788154363632202, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 140360 + }, + { + "epoch": 923.4868421052631, + "grad_norm": 0.7878212332725525, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 140370 + }, + { + "epoch": 923.5526315789474, + "grad_norm": 0.7902630567550659, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 140380 + }, + { + "epoch": 923.6184210526316, + "grad_norm": 1.0521742105484009, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 140390 + }, + { + "epoch": 923.6842105263158, + "grad_norm": 1.1913330554962158, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 140400 + }, + { + "epoch": 923.75, + "grad_norm": 0.9974690079689026, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 140410 + }, + { + "epoch": 923.8157894736842, + "grad_norm": 0.7740110158920288, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 140420 + }, + { + "epoch": 923.8815789473684, + "grad_norm": 0.651350736618042, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 140430 + }, + { + "epoch": 923.9473684210526, + "grad_norm": 0.8533127903938293, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 140440 + }, + { + "epoch": 924.0131578947369, + "grad_norm": 1.2042335271835327, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 140450 + }, + { + "epoch": 924.078947368421, + "grad_norm": 1.0779392719268799, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 140460 + }, + { + "epoch": 924.1447368421053, + "grad_norm": 1.2157610654830933, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 140470 + }, + { + "epoch": 924.2105263157895, + "grad_norm": 1.7160738706588745, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 140480 + }, + { + "epoch": 924.2763157894736, + "grad_norm": 0.9967711567878723, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 140490 + }, + { + "epoch": 924.3421052631579, + "grad_norm": 0.9991487264633179, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 140500 + }, + { + "epoch": 924.4078947368421, + "grad_norm": 1.028993844985962, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 140510 + }, + { + "epoch": 924.4736842105264, + "grad_norm": 1.1722681522369385, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 140520 + }, + { + "epoch": 924.5394736842105, + "grad_norm": 0.9538052678108215, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 140530 + }, + { + "epoch": 924.6052631578947, + "grad_norm": 0.9765692353248596, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 140540 + }, + { + "epoch": 924.671052631579, + "grad_norm": 1.1148061752319336, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 140550 + }, + { + "epoch": 924.7368421052631, + "grad_norm": 1.2510484457015991, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 140560 + }, + { + "epoch": 924.8026315789474, + "grad_norm": 0.6285231709480286, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 140570 + }, + { + "epoch": 924.8684210526316, + "grad_norm": 0.8659400939941406, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 140580 + }, + { + "epoch": 924.9342105263158, + "grad_norm": 1.054602861404419, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 140590 + }, + { + "epoch": 925.0, + "grad_norm": 1.0485122203826904, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 140600 + }, + { + "epoch": 925.0657894736842, + "grad_norm": 1.1340656280517578, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 140610 + }, + { + "epoch": 925.1315789473684, + "grad_norm": 0.945837140083313, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 140620 + }, + { + "epoch": 925.1973684210526, + "grad_norm": 0.9775026440620422, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 140630 + }, + { + "epoch": 925.2631578947369, + "grad_norm": 0.979673445224762, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 140640 + }, + { + "epoch": 925.328947368421, + "grad_norm": 1.1637605428695679, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 140650 + }, + { + "epoch": 925.3947368421053, + "grad_norm": 1.587935209274292, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 140660 + }, + { + "epoch": 925.4605263157895, + "grad_norm": 1.3600460290908813, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 140670 + }, + { + "epoch": 925.5263157894736, + "grad_norm": 1.253381609916687, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 140680 + }, + { + "epoch": 925.5921052631579, + "grad_norm": 1.4726488590240479, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 140690 + }, + { + "epoch": 925.6578947368421, + "grad_norm": 1.187400221824646, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 140700 + }, + { + "epoch": 925.7236842105264, + "grad_norm": 1.0697247982025146, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 140710 + }, + { + "epoch": 925.7894736842105, + "grad_norm": 1.059090256690979, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 140720 + }, + { + "epoch": 925.8552631578947, + "grad_norm": 1.2741175889968872, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 140730 + }, + { + "epoch": 925.921052631579, + "grad_norm": 1.1128478050231934, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 140740 + }, + { + "epoch": 925.9868421052631, + "grad_norm": 0.7608397006988525, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 140750 + }, + { + "epoch": 926.0526315789474, + "grad_norm": 1.069777250289917, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 140760 + }, + { + "epoch": 926.1184210526316, + "grad_norm": 1.0431427955627441, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 140770 + }, + { + "epoch": 926.1842105263158, + "grad_norm": 1.0773409605026245, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 140780 + }, + { + "epoch": 926.25, + "grad_norm": 1.208251714706421, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 140790 + }, + { + "epoch": 926.3157894736842, + "grad_norm": 1.0173733234405518, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 140800 + }, + { + "epoch": 926.3815789473684, + "grad_norm": 0.7675154209136963, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 140810 + }, + { + "epoch": 926.4473684210526, + "grad_norm": 1.237313985824585, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 140820 + }, + { + "epoch": 926.5131578947369, + "grad_norm": 0.9644690752029419, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 140830 + }, + { + "epoch": 926.578947368421, + "grad_norm": 1.1485686302185059, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 140840 + }, + { + "epoch": 926.6447368421053, + "grad_norm": 1.82488214969635, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 140850 + }, + { + "epoch": 926.7105263157895, + "grad_norm": 1.2388635873794556, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 140860 + }, + { + "epoch": 926.7763157894736, + "grad_norm": 1.3352988958358765, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 140870 + }, + { + "epoch": 926.8421052631579, + "grad_norm": 1.1297030448913574, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 140880 + }, + { + "epoch": 926.9078947368421, + "grad_norm": 1.162842869758606, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 140890 + }, + { + "epoch": 926.9736842105264, + "grad_norm": 1.2064037322998047, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 140900 + }, + { + "epoch": 927.0394736842105, + "grad_norm": 1.08427894115448, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 140910 + }, + { + "epoch": 927.1052631578947, + "grad_norm": 1.1430708169937134, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 140920 + }, + { + "epoch": 927.171052631579, + "grad_norm": 1.0991872549057007, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 140930 + }, + { + "epoch": 927.2368421052631, + "grad_norm": 1.4145089387893677, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 140940 + }, + { + "epoch": 927.3026315789474, + "grad_norm": 0.8903805613517761, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 140950 + }, + { + "epoch": 927.3684210526316, + "grad_norm": 1.02116858959198, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 140960 + }, + { + "epoch": 927.4342105263158, + "grad_norm": 1.196993350982666, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 140970 + }, + { + "epoch": 927.5, + "grad_norm": 1.419411063194275, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 140980 + }, + { + "epoch": 927.5657894736842, + "grad_norm": 1.4672049283981323, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 140990 + }, + { + "epoch": 927.6315789473684, + "grad_norm": 0.8084446787834167, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 141000 + }, + { + "epoch": 927.6973684210526, + "grad_norm": 1.2789831161499023, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 141010 + }, + { + "epoch": 927.7631578947369, + "grad_norm": 0.6588245630264282, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 141020 + }, + { + "epoch": 927.828947368421, + "grad_norm": 0.9887574911117554, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 141030 + }, + { + "epoch": 927.8947368421053, + "grad_norm": 0.8936231136322021, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 141040 + }, + { + "epoch": 927.9605263157895, + "grad_norm": 0.9490195512771606, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 141050 + }, + { + "epoch": 928.0263157894736, + "grad_norm": 1.2485547065734863, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 141060 + }, + { + "epoch": 928.0921052631579, + "grad_norm": 1.1075419187545776, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 141070 + }, + { + "epoch": 928.1578947368421, + "grad_norm": 1.1165497303009033, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 141080 + }, + { + "epoch": 928.2236842105264, + "grad_norm": 1.352881669998169, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 141090 + }, + { + "epoch": 928.2894736842105, + "grad_norm": 1.4939100742340088, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 141100 + }, + { + "epoch": 928.3552631578947, + "grad_norm": 1.2259972095489502, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 141110 + }, + { + "epoch": 928.421052631579, + "grad_norm": 0.6991260647773743, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 141120 + }, + { + "epoch": 928.4868421052631, + "grad_norm": 0.9250067472457886, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 141130 + }, + { + "epoch": 928.5526315789474, + "grad_norm": 1.0126291513442993, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 141140 + }, + { + "epoch": 928.6184210526316, + "grad_norm": 1.092654824256897, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 141150 + }, + { + "epoch": 928.6842105263158, + "grad_norm": 1.143467903137207, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 141160 + }, + { + "epoch": 928.75, + "grad_norm": 1.1244598627090454, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 141170 + }, + { + "epoch": 928.8157894736842, + "grad_norm": 0.8869340419769287, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 141180 + }, + { + "epoch": 928.8815789473684, + "grad_norm": 1.0493642091751099, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 141190 + }, + { + "epoch": 928.9473684210526, + "grad_norm": 1.048553228378296, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 141200 + }, + { + "epoch": 929.0131578947369, + "grad_norm": 1.0063235759735107, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 141210 + }, + { + "epoch": 929.078947368421, + "grad_norm": 0.803489625453949, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 141220 + }, + { + "epoch": 929.1447368421053, + "grad_norm": 0.7754166126251221, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 141230 + }, + { + "epoch": 929.2105263157895, + "grad_norm": 1.123893141746521, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 141240 + }, + { + "epoch": 929.2763157894736, + "grad_norm": 0.8346624970436096, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 141250 + }, + { + "epoch": 929.3421052631579, + "grad_norm": 1.2501662969589233, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 141260 + }, + { + "epoch": 929.4078947368421, + "grad_norm": 0.9028252959251404, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 141270 + }, + { + "epoch": 929.4736842105264, + "grad_norm": 1.1409492492675781, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 141280 + }, + { + "epoch": 929.5394736842105, + "grad_norm": 0.795866847038269, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 141290 + }, + { + "epoch": 929.6052631578947, + "grad_norm": 1.0980581045150757, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 141300 + }, + { + "epoch": 929.671052631579, + "grad_norm": 0.9573304057121277, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 141310 + }, + { + "epoch": 929.7368421052631, + "grad_norm": 0.9308114051818848, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 141320 + }, + { + "epoch": 929.8026315789474, + "grad_norm": 1.13672935962677, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 141330 + }, + { + "epoch": 929.8684210526316, + "grad_norm": 1.0716716051101685, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 141340 + }, + { + "epoch": 929.9342105263158, + "grad_norm": 0.8969205617904663, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 141350 + }, + { + "epoch": 930.0, + "grad_norm": 0.7731922268867493, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 141360 + }, + { + "epoch": 930.0657894736842, + "grad_norm": 1.054961085319519, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 141370 + }, + { + "epoch": 930.1315789473684, + "grad_norm": 1.0062198638916016, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 141380 + }, + { + "epoch": 930.1973684210526, + "grad_norm": 0.8831532597541809, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 141390 + }, + { + "epoch": 930.2631578947369, + "grad_norm": 0.94657963514328, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 141400 + }, + { + "epoch": 930.328947368421, + "grad_norm": 0.7149097919464111, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 141410 + }, + { + "epoch": 930.3947368421053, + "grad_norm": 1.1722291707992554, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 141420 + }, + { + "epoch": 930.4605263157895, + "grad_norm": 1.2141605615615845, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 141430 + }, + { + "epoch": 930.5263157894736, + "grad_norm": 1.0134645700454712, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 141440 + }, + { + "epoch": 930.5921052631579, + "grad_norm": 1.4195690155029297, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 141450 + }, + { + "epoch": 930.6578947368421, + "grad_norm": 1.0043601989746094, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 141460 + }, + { + "epoch": 930.7236842105264, + "grad_norm": 0.9379559755325317, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 141470 + }, + { + "epoch": 930.7894736842105, + "grad_norm": 1.1322745084762573, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 141480 + }, + { + "epoch": 930.8552631578947, + "grad_norm": 1.1862339973449707, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 141490 + }, + { + "epoch": 930.921052631579, + "grad_norm": 1.0361219644546509, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 141500 + }, + { + "epoch": 930.9868421052631, + "grad_norm": 1.4550143480300903, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 141510 + }, + { + "epoch": 931.0526315789474, + "grad_norm": 1.0843679904937744, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 141520 + }, + { + "epoch": 931.1184210526316, + "grad_norm": 1.4889967441558838, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 141530 + }, + { + "epoch": 931.1842105263158, + "grad_norm": 1.130838394165039, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 141540 + }, + { + "epoch": 931.25, + "grad_norm": 1.1887781620025635, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 141550 + }, + { + "epoch": 931.3157894736842, + "grad_norm": 0.8891682028770447, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 141560 + }, + { + "epoch": 931.3815789473684, + "grad_norm": 1.069966197013855, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 141570 + }, + { + "epoch": 931.4473684210526, + "grad_norm": 1.2888097763061523, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 141580 + }, + { + "epoch": 931.5131578947369, + "grad_norm": 0.7364023327827454, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 141590 + }, + { + "epoch": 931.578947368421, + "grad_norm": 1.0230684280395508, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 141600 + }, + { + "epoch": 931.6447368421053, + "grad_norm": 1.1566507816314697, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 141610 + }, + { + "epoch": 931.7105263157895, + "grad_norm": 1.0186455249786377, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 141620 + }, + { + "epoch": 931.7763157894736, + "grad_norm": 1.209118127822876, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 141630 + }, + { + "epoch": 931.8421052631579, + "grad_norm": 0.766610324382782, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 141640 + }, + { + "epoch": 931.9078947368421, + "grad_norm": 1.0066529512405396, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 141650 + }, + { + "epoch": 931.9736842105264, + "grad_norm": 0.9950891137123108, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 141660 + }, + { + "epoch": 932.0394736842105, + "grad_norm": 1.1890411376953125, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 141670 + }, + { + "epoch": 932.1052631578947, + "grad_norm": 0.9757727384567261, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 141680 + }, + { + "epoch": 932.171052631579, + "grad_norm": 0.7399367690086365, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 141690 + }, + { + "epoch": 932.2368421052631, + "grad_norm": 0.8191750645637512, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 141700 + }, + { + "epoch": 932.3026315789474, + "grad_norm": 1.0755620002746582, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 141710 + }, + { + "epoch": 932.3684210526316, + "grad_norm": 1.099184513092041, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 141720 + }, + { + "epoch": 932.4342105263158, + "grad_norm": 0.948433518409729, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 141730 + }, + { + "epoch": 932.5, + "grad_norm": 1.2848048210144043, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 141740 + }, + { + "epoch": 932.5657894736842, + "grad_norm": 1.0524227619171143, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 141750 + }, + { + "epoch": 932.6315789473684, + "grad_norm": 1.1864573955535889, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 141760 + }, + { + "epoch": 932.6973684210526, + "grad_norm": 1.1344093084335327, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 141770 + }, + { + "epoch": 932.7631578947369, + "grad_norm": 1.0128337144851685, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 141780 + }, + { + "epoch": 932.828947368421, + "grad_norm": 0.9922648668289185, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 141790 + }, + { + "epoch": 932.8947368421053, + "grad_norm": 1.1825958490371704, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 141800 + }, + { + "epoch": 932.9605263157895, + "grad_norm": 1.0141911506652832, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 141810 + }, + { + "epoch": 933.0263157894736, + "grad_norm": 1.0173567533493042, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 141820 + }, + { + "epoch": 933.0921052631579, + "grad_norm": 1.0617711544036865, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 141830 + }, + { + "epoch": 933.1578947368421, + "grad_norm": 1.0690877437591553, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 141840 + }, + { + "epoch": 933.2236842105264, + "grad_norm": 0.7836157083511353, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 141850 + }, + { + "epoch": 933.2894736842105, + "grad_norm": 0.8588806986808777, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 141860 + }, + { + "epoch": 933.3552631578947, + "grad_norm": 0.8288969993591309, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 141870 + }, + { + "epoch": 933.421052631579, + "grad_norm": 0.8952768445014954, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 141880 + }, + { + "epoch": 933.4868421052631, + "grad_norm": 0.7412847280502319, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 141890 + }, + { + "epoch": 933.5526315789474, + "grad_norm": 0.5983102321624756, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 141900 + }, + { + "epoch": 933.6184210526316, + "grad_norm": 0.8310821652412415, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 141910 + }, + { + "epoch": 933.6842105263158, + "grad_norm": 1.0020363330841064, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 141920 + }, + { + "epoch": 933.75, + "grad_norm": 0.9288676381111145, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 141930 + }, + { + "epoch": 933.8157894736842, + "grad_norm": 0.8493418097496033, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 141940 + }, + { + "epoch": 933.8815789473684, + "grad_norm": 1.2082266807556152, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 141950 + }, + { + "epoch": 933.9473684210526, + "grad_norm": 1.1100561618804932, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 141960 + }, + { + "epoch": 934.0131578947369, + "grad_norm": 1.3064969778060913, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 141970 + }, + { + "epoch": 934.078947368421, + "grad_norm": 1.045897126197815, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 141980 + }, + { + "epoch": 934.1447368421053, + "grad_norm": 1.0338056087493896, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 141990 + }, + { + "epoch": 934.2105263157895, + "grad_norm": 0.882927656173706, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 142000 + }, + { + "epoch": 934.2763157894736, + "grad_norm": 0.8924429416656494, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 142010 + }, + { + "epoch": 934.3421052631579, + "grad_norm": 1.2137278318405151, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 142020 + }, + { + "epoch": 934.4078947368421, + "grad_norm": 1.2545146942138672, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 142030 + }, + { + "epoch": 934.4736842105264, + "grad_norm": 1.1060088872909546, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 142040 + }, + { + "epoch": 934.5394736842105, + "grad_norm": 0.8980741500854492, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 142050 + }, + { + "epoch": 934.6052631578947, + "grad_norm": 1.3434455394744873, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 142060 + }, + { + "epoch": 934.671052631579, + "grad_norm": 1.541412591934204, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 142070 + }, + { + "epoch": 934.7368421052631, + "grad_norm": 0.7517480850219727, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 142080 + }, + { + "epoch": 934.8026315789474, + "grad_norm": 0.9728532433509827, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 142090 + }, + { + "epoch": 934.8684210526316, + "grad_norm": 0.956839382648468, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 142100 + }, + { + "epoch": 934.9342105263158, + "grad_norm": 0.6664096117019653, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 142110 + }, + { + "epoch": 935.0, + "grad_norm": 0.6346357464790344, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 142120 + }, + { + "epoch": 935.0657894736842, + "grad_norm": 0.8186964392662048, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 142130 + }, + { + "epoch": 935.1315789473684, + "grad_norm": 1.007627248764038, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 142140 + }, + { + "epoch": 935.1973684210526, + "grad_norm": 0.8972962498664856, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 142150 + }, + { + "epoch": 935.2631578947369, + "grad_norm": 0.863339364528656, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 142160 + }, + { + "epoch": 935.328947368421, + "grad_norm": 1.014875054359436, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 142170 + }, + { + "epoch": 935.3947368421053, + "grad_norm": 1.0720738172531128, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 142180 + }, + { + "epoch": 935.4605263157895, + "grad_norm": 0.8214248418807983, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 142190 + }, + { + "epoch": 935.5263157894736, + "grad_norm": 0.9831026792526245, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 142200 + }, + { + "epoch": 935.5921052631579, + "grad_norm": 1.0691466331481934, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 142210 + }, + { + "epoch": 935.6578947368421, + "grad_norm": 1.3758347034454346, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 142220 + }, + { + "epoch": 935.7236842105264, + "grad_norm": 1.7172006368637085, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 142230 + }, + { + "epoch": 935.7894736842105, + "grad_norm": 1.1777650117874146, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 142240 + }, + { + "epoch": 935.8552631578947, + "grad_norm": 1.176995038986206, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 142250 + }, + { + "epoch": 935.921052631579, + "grad_norm": 1.0358529090881348, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 142260 + }, + { + "epoch": 935.9868421052631, + "grad_norm": 1.0105960369110107, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 142270 + }, + { + "epoch": 936.0526315789474, + "grad_norm": 1.2849767208099365, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 142280 + }, + { + "epoch": 936.1184210526316, + "grad_norm": 1.315524935722351, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 142290 + }, + { + "epoch": 936.1842105263158, + "grad_norm": 1.1446727514266968, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 142300 + }, + { + "epoch": 936.25, + "grad_norm": 1.1680877208709717, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 142310 + }, + { + "epoch": 936.3157894736842, + "grad_norm": 1.1997969150543213, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 142320 + }, + { + "epoch": 936.3815789473684, + "grad_norm": 1.043163537979126, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 142330 + }, + { + "epoch": 936.4473684210526, + "grad_norm": 1.2314527034759521, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 142340 + }, + { + "epoch": 936.5131578947369, + "grad_norm": 0.8051645755767822, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 142350 + }, + { + "epoch": 936.578947368421, + "grad_norm": 0.8518595099449158, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 142360 + }, + { + "epoch": 936.6447368421053, + "grad_norm": 1.2750550508499146, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 142370 + }, + { + "epoch": 936.7105263157895, + "grad_norm": 0.7354443073272705, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 142380 + }, + { + "epoch": 936.7763157894736, + "grad_norm": 0.8145555853843689, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 142390 + }, + { + "epoch": 936.8421052631579, + "grad_norm": 1.1294350624084473, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 142400 + }, + { + "epoch": 936.9078947368421, + "grad_norm": 0.8376908302307129, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 142410 + }, + { + "epoch": 936.9736842105264, + "grad_norm": 0.9719381332397461, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 142420 + }, + { + "epoch": 937.0394736842105, + "grad_norm": 0.9003921747207642, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 142430 + }, + { + "epoch": 937.1052631578947, + "grad_norm": 1.2315677404403687, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 142440 + }, + { + "epoch": 937.171052631579, + "grad_norm": 1.2337232828140259, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 142450 + }, + { + "epoch": 937.2368421052631, + "grad_norm": 0.866338312625885, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 142460 + }, + { + "epoch": 937.3026315789474, + "grad_norm": 1.071451187133789, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 142470 + }, + { + "epoch": 937.3684210526316, + "grad_norm": 1.0966578722000122, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 142480 + }, + { + "epoch": 937.4342105263158, + "grad_norm": 0.882290780544281, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 142490 + }, + { + "epoch": 937.5, + "grad_norm": 0.9930370450019836, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 142500 + }, + { + "epoch": 937.5657894736842, + "grad_norm": 0.729529857635498, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 142510 + }, + { + "epoch": 937.6315789473684, + "grad_norm": 0.8702946305274963, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 142520 + }, + { + "epoch": 937.6973684210526, + "grad_norm": 0.8152609467506409, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 142530 + }, + { + "epoch": 937.7631578947369, + "grad_norm": 1.1005128622055054, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 142540 + }, + { + "epoch": 937.828947368421, + "grad_norm": 1.302595615386963, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 142550 + }, + { + "epoch": 937.8947368421053, + "grad_norm": 1.0444657802581787, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 142560 + }, + { + "epoch": 937.9605263157895, + "grad_norm": 0.947675883769989, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 142570 + }, + { + "epoch": 938.0263157894736, + "grad_norm": 0.942642867565155, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 142580 + }, + { + "epoch": 938.0921052631579, + "grad_norm": 1.1774204969406128, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 142590 + }, + { + "epoch": 938.1578947368421, + "grad_norm": 1.1025643348693848, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 142600 + }, + { + "epoch": 938.2236842105264, + "grad_norm": 1.1018234491348267, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 142610 + }, + { + "epoch": 938.2894736842105, + "grad_norm": 1.5480759143829346, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 142620 + }, + { + "epoch": 938.3552631578947, + "grad_norm": 1.1027711629867554, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 142630 + }, + { + "epoch": 938.421052631579, + "grad_norm": 0.9052314162254333, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 142640 + }, + { + "epoch": 938.4868421052631, + "grad_norm": 1.1163532733917236, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 142650 + }, + { + "epoch": 938.5526315789474, + "grad_norm": 1.5282114744186401, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 142660 + }, + { + "epoch": 938.6184210526316, + "grad_norm": 1.0808197259902954, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 142670 + }, + { + "epoch": 938.6842105263158, + "grad_norm": 0.9166187644004822, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 142680 + }, + { + "epoch": 938.75, + "grad_norm": 1.2333557605743408, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 142690 + }, + { + "epoch": 938.8157894736842, + "grad_norm": 1.2331442832946777, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 142700 + }, + { + "epoch": 938.8815789473684, + "grad_norm": 0.7584700584411621, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 142710 + }, + { + "epoch": 938.9473684210526, + "grad_norm": 0.8076496720314026, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 142720 + }, + { + "epoch": 939.0131578947369, + "grad_norm": 1.1252182722091675, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 142730 + }, + { + "epoch": 939.078947368421, + "grad_norm": 0.7238128185272217, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 142740 + }, + { + "epoch": 939.1447368421053, + "grad_norm": 0.9610776305198669, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 142750 + }, + { + "epoch": 939.2105263157895, + "grad_norm": 1.0641119480133057, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 142760 + }, + { + "epoch": 939.2763157894736, + "grad_norm": 1.4816539287567139, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 142770 + }, + { + "epoch": 939.3421052631579, + "grad_norm": 1.748676061630249, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 142780 + }, + { + "epoch": 939.4078947368421, + "grad_norm": 1.8980330228805542, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 142790 + }, + { + "epoch": 939.4736842105264, + "grad_norm": 1.2307404279708862, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 142800 + }, + { + "epoch": 939.5394736842105, + "grad_norm": 1.277494192123413, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 142810 + }, + { + "epoch": 939.6052631578947, + "grad_norm": 1.3489617109298706, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 142820 + }, + { + "epoch": 939.671052631579, + "grad_norm": 1.0544668436050415, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 142830 + }, + { + "epoch": 939.7368421052631, + "grad_norm": 1.1810656785964966, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 142840 + }, + { + "epoch": 939.8026315789474, + "grad_norm": 0.905704140663147, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 142850 + }, + { + "epoch": 939.8684210526316, + "grad_norm": 1.0323855876922607, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 142860 + }, + { + "epoch": 939.9342105263158, + "grad_norm": 1.2508949041366577, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 142870 + }, + { + "epoch": 940.0, + "grad_norm": 1.1149414777755737, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 142880 + }, + { + "epoch": 940.0657894736842, + "grad_norm": 1.280163288116455, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 142890 + }, + { + "epoch": 940.1315789473684, + "grad_norm": 1.270648717880249, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 142900 + }, + { + "epoch": 940.1973684210526, + "grad_norm": 0.8688375949859619, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 142910 + }, + { + "epoch": 940.2631578947369, + "grad_norm": 1.2751940488815308, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 142920 + }, + { + "epoch": 940.328947368421, + "grad_norm": 1.1398617029190063, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 142930 + }, + { + "epoch": 940.3947368421053, + "grad_norm": 0.7770438194274902, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 142940 + }, + { + "epoch": 940.4605263157895, + "grad_norm": 1.470767855644226, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 142950 + }, + { + "epoch": 940.5263157894736, + "grad_norm": 1.3184136152267456, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 142960 + }, + { + "epoch": 940.5921052631579, + "grad_norm": 0.9164167642593384, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 142970 + }, + { + "epoch": 940.6578947368421, + "grad_norm": 1.149084210395813, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 142980 + }, + { + "epoch": 940.7236842105264, + "grad_norm": 1.2743107080459595, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 142990 + }, + { + "epoch": 940.7894736842105, + "grad_norm": 0.6566851139068604, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 143000 + }, + { + "epoch": 940.8552631578947, + "grad_norm": 0.6921648979187012, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 143010 + }, + { + "epoch": 940.921052631579, + "grad_norm": 0.9658145904541016, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 143020 + }, + { + "epoch": 940.9868421052631, + "grad_norm": 1.0340406894683838, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 143030 + }, + { + "epoch": 941.0526315789474, + "grad_norm": 1.061692237854004, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 143040 + }, + { + "epoch": 941.1184210526316, + "grad_norm": 0.8725217580795288, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 143050 + }, + { + "epoch": 941.1842105263158, + "grad_norm": 1.1090641021728516, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 143060 + }, + { + "epoch": 941.25, + "grad_norm": 1.313765287399292, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 143070 + }, + { + "epoch": 941.3157894736842, + "grad_norm": 0.8687812089920044, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 143080 + }, + { + "epoch": 941.3815789473684, + "grad_norm": 1.066633701324463, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 143090 + }, + { + "epoch": 941.4473684210526, + "grad_norm": 1.1810579299926758, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 143100 + }, + { + "epoch": 941.5131578947369, + "grad_norm": 1.057855486869812, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 143110 + }, + { + "epoch": 941.578947368421, + "grad_norm": 0.7218169569969177, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 143120 + }, + { + "epoch": 941.6447368421053, + "grad_norm": 1.1620811223983765, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 143130 + }, + { + "epoch": 941.7105263157895, + "grad_norm": 1.2973016500473022, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 143140 + }, + { + "epoch": 941.7763157894736, + "grad_norm": 1.3135859966278076, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 143150 + }, + { + "epoch": 941.8421052631579, + "grad_norm": 0.8703665733337402, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 143160 + }, + { + "epoch": 941.9078947368421, + "grad_norm": 0.9815681576728821, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 143170 + }, + { + "epoch": 941.9736842105264, + "grad_norm": 1.3823065757751465, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 143180 + }, + { + "epoch": 942.0394736842105, + "grad_norm": 0.974730372428894, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 143190 + }, + { + "epoch": 942.1052631578947, + "grad_norm": 0.7570364475250244, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 143200 + }, + { + "epoch": 942.171052631579, + "grad_norm": 1.0894103050231934, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 143210 + }, + { + "epoch": 942.2368421052631, + "grad_norm": 1.1041042804718018, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 143220 + }, + { + "epoch": 942.3026315789474, + "grad_norm": 1.2231684923171997, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 143230 + }, + { + "epoch": 942.3684210526316, + "grad_norm": 1.2694424390792847, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 143240 + }, + { + "epoch": 942.4342105263158, + "grad_norm": 0.8678814172744751, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 143250 + }, + { + "epoch": 942.5, + "grad_norm": 0.9810147285461426, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 143260 + }, + { + "epoch": 942.5657894736842, + "grad_norm": 0.6947391629219055, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 143270 + }, + { + "epoch": 942.6315789473684, + "grad_norm": 1.0292843580245972, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 143280 + }, + { + "epoch": 942.6973684210526, + "grad_norm": 0.93792724609375, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 143290 + }, + { + "epoch": 942.7631578947369, + "grad_norm": 0.827596127986908, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 143300 + }, + { + "epoch": 942.828947368421, + "grad_norm": 1.1035057306289673, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 143310 + }, + { + "epoch": 942.8947368421053, + "grad_norm": 1.2476816177368164, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 143320 + }, + { + "epoch": 942.9605263157895, + "grad_norm": 1.1506506204605103, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 143330 + }, + { + "epoch": 943.0263157894736, + "grad_norm": 0.9531042575836182, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 143340 + }, + { + "epoch": 943.0921052631579, + "grad_norm": 1.2359414100646973, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 143350 + }, + { + "epoch": 943.1578947368421, + "grad_norm": 1.264983057975769, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 143360 + }, + { + "epoch": 943.2236842105264, + "grad_norm": 1.0347520112991333, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 143370 + }, + { + "epoch": 943.2894736842105, + "grad_norm": 1.0723800659179688, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 143380 + }, + { + "epoch": 943.3552631578947, + "grad_norm": 1.2043391466140747, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 143390 + }, + { + "epoch": 943.421052631579, + "grad_norm": 0.9921755790710449, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 143400 + }, + { + "epoch": 943.4868421052631, + "grad_norm": 1.0856060981750488, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 143410 + }, + { + "epoch": 943.5526315789474, + "grad_norm": 1.3713430166244507, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 143420 + }, + { + "epoch": 943.6184210526316, + "grad_norm": 1.2866997718811035, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 143430 + }, + { + "epoch": 943.6842105263158, + "grad_norm": 1.1391348838806152, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 143440 + }, + { + "epoch": 943.75, + "grad_norm": 0.8373425006866455, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 143450 + }, + { + "epoch": 943.8157894736842, + "grad_norm": 0.9597638845443726, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 143460 + }, + { + "epoch": 943.8815789473684, + "grad_norm": 0.904534637928009, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 143470 + }, + { + "epoch": 943.9473684210526, + "grad_norm": 0.8183276057243347, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 143480 + }, + { + "epoch": 944.0131578947369, + "grad_norm": 0.7242514491081238, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 143490 + }, + { + "epoch": 944.078947368421, + "grad_norm": 0.8846292495727539, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 143500 + }, + { + "epoch": 944.1447368421053, + "grad_norm": 0.9731491804122925, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 143510 + }, + { + "epoch": 944.2105263157895, + "grad_norm": 0.6665592193603516, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 143520 + }, + { + "epoch": 944.2763157894736, + "grad_norm": 0.75457763671875, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 143530 + }, + { + "epoch": 944.3421052631579, + "grad_norm": 0.9339364767074585, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 143540 + }, + { + "epoch": 944.4078947368421, + "grad_norm": 0.8604110479354858, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 143550 + }, + { + "epoch": 944.4736842105264, + "grad_norm": 1.0775866508483887, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 143560 + }, + { + "epoch": 944.5394736842105, + "grad_norm": 1.1625889539718628, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 143570 + }, + { + "epoch": 944.6052631578947, + "grad_norm": 1.0786641836166382, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 143580 + }, + { + "epoch": 944.671052631579, + "grad_norm": 0.6444934010505676, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 143590 + }, + { + "epoch": 944.7368421052631, + "grad_norm": 0.9392868876457214, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 143600 + }, + { + "epoch": 944.8026315789474, + "grad_norm": 1.0685558319091797, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 143610 + }, + { + "epoch": 944.8684210526316, + "grad_norm": 1.4054182767868042, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 143620 + }, + { + "epoch": 944.9342105263158, + "grad_norm": 1.2101002931594849, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 143630 + }, + { + "epoch": 945.0, + "grad_norm": 1.5603113174438477, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 143640 + }, + { + "epoch": 945.0657894736842, + "grad_norm": 1.0184569358825684, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 143650 + }, + { + "epoch": 945.1315789473684, + "grad_norm": 0.7555046081542969, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 143660 + }, + { + "epoch": 945.1973684210526, + "grad_norm": 0.8718606233596802, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 143670 + }, + { + "epoch": 945.2631578947369, + "grad_norm": 1.0357540845870972, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 143680 + }, + { + "epoch": 945.328947368421, + "grad_norm": 0.9740762710571289, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 143690 + }, + { + "epoch": 945.3947368421053, + "grad_norm": 0.9810712337493896, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 143700 + }, + { + "epoch": 945.4605263157895, + "grad_norm": 0.9435747265815735, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 143710 + }, + { + "epoch": 945.5263157894736, + "grad_norm": 1.1019375324249268, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 143720 + }, + { + "epoch": 945.5921052631579, + "grad_norm": 0.8351660370826721, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 143730 + }, + { + "epoch": 945.6578947368421, + "grad_norm": 0.8502940535545349, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 143740 + }, + { + "epoch": 945.7236842105264, + "grad_norm": 0.7618118524551392, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 143750 + }, + { + "epoch": 945.7894736842105, + "grad_norm": 1.3105641603469849, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 143760 + }, + { + "epoch": 945.8552631578947, + "grad_norm": 0.9641187787055969, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 143770 + }, + { + "epoch": 945.921052631579, + "grad_norm": 1.303466796875, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 143780 + }, + { + "epoch": 945.9868421052631, + "grad_norm": 1.175897479057312, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 143790 + }, + { + "epoch": 946.0526315789474, + "grad_norm": 1.1815438270568848, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 143800 + }, + { + "epoch": 946.1184210526316, + "grad_norm": 1.078926920890808, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 143810 + }, + { + "epoch": 946.1842105263158, + "grad_norm": 0.9007603526115417, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 143820 + }, + { + "epoch": 946.25, + "grad_norm": 1.0532934665679932, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 143830 + }, + { + "epoch": 946.3157894736842, + "grad_norm": 1.0850658416748047, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 143840 + }, + { + "epoch": 946.3815789473684, + "grad_norm": 0.9188476800918579, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 143850 + }, + { + "epoch": 946.4473684210526, + "grad_norm": 0.854782223701477, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 143860 + }, + { + "epoch": 946.5131578947369, + "grad_norm": 0.5295913815498352, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 143870 + }, + { + "epoch": 946.578947368421, + "grad_norm": 0.8378633856773376, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 143880 + }, + { + "epoch": 946.6447368421053, + "grad_norm": 1.1583908796310425, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 143890 + }, + { + "epoch": 946.7105263157895, + "grad_norm": 0.8956261873245239, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 143900 + }, + { + "epoch": 946.7763157894736, + "grad_norm": 1.1853934526443481, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 143910 + }, + { + "epoch": 946.8421052631579, + "grad_norm": 0.9609845876693726, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 143920 + }, + { + "epoch": 946.9078947368421, + "grad_norm": 0.7970736622810364, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 143930 + }, + { + "epoch": 946.9736842105264, + "grad_norm": 1.046696424484253, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 143940 + }, + { + "epoch": 947.0394736842105, + "grad_norm": 1.5037174224853516, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 143950 + }, + { + "epoch": 947.1052631578947, + "grad_norm": 1.2524503469467163, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 143960 + }, + { + "epoch": 947.171052631579, + "grad_norm": 0.7371581792831421, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 143970 + }, + { + "epoch": 947.2368421052631, + "grad_norm": 0.883450984954834, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 143980 + }, + { + "epoch": 947.3026315789474, + "grad_norm": 1.1274125576019287, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 143990 + }, + { + "epoch": 947.3684210526316, + "grad_norm": 1.2218645811080933, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 144000 + }, + { + "epoch": 947.4342105263158, + "grad_norm": 0.9843568205833435, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 144010 + }, + { + "epoch": 947.5, + "grad_norm": 0.8719574213027954, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 144020 + }, + { + "epoch": 947.5657894736842, + "grad_norm": 0.8319658637046814, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 144030 + }, + { + "epoch": 947.6315789473684, + "grad_norm": 0.8840770721435547, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 144040 + }, + { + "epoch": 947.6973684210526, + "grad_norm": 1.1723517179489136, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 144050 + }, + { + "epoch": 947.7631578947369, + "grad_norm": 0.925780177116394, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 144060 + }, + { + "epoch": 947.828947368421, + "grad_norm": 0.7539234161376953, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 144070 + }, + { + "epoch": 947.8947368421053, + "grad_norm": 1.0115832090377808, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 144080 + }, + { + "epoch": 947.9605263157895, + "grad_norm": 1.076154112815857, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 144090 + }, + { + "epoch": 948.0263157894736, + "grad_norm": 1.1594144105911255, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 144100 + }, + { + "epoch": 948.0921052631579, + "grad_norm": 0.8752143383026123, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 144110 + }, + { + "epoch": 948.1578947368421, + "grad_norm": 0.816047191619873, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 144120 + }, + { + "epoch": 948.2236842105264, + "grad_norm": 0.9532915949821472, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 144130 + }, + { + "epoch": 948.2894736842105, + "grad_norm": 1.082932710647583, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 144140 + }, + { + "epoch": 948.3552631578947, + "grad_norm": 0.8170779347419739, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 144150 + }, + { + "epoch": 948.421052631579, + "grad_norm": 1.189034104347229, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 144160 + }, + { + "epoch": 948.4868421052631, + "grad_norm": 1.0919978618621826, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 144170 + }, + { + "epoch": 948.5526315789474, + "grad_norm": 1.05934476852417, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 144180 + }, + { + "epoch": 948.6184210526316, + "grad_norm": 0.8420997858047485, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 144190 + }, + { + "epoch": 948.6842105263158, + "grad_norm": 1.1011571884155273, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 144200 + }, + { + "epoch": 948.75, + "grad_norm": 0.7747015953063965, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 144210 + }, + { + "epoch": 948.8157894736842, + "grad_norm": 1.1915805339813232, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 144220 + }, + { + "epoch": 948.8815789473684, + "grad_norm": 1.0432209968566895, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 144230 + }, + { + "epoch": 948.9473684210526, + "grad_norm": 0.9377793669700623, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 144240 + }, + { + "epoch": 949.0131578947369, + "grad_norm": 0.8904802203178406, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 144250 + }, + { + "epoch": 949.078947368421, + "grad_norm": 1.0895966291427612, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 144260 + }, + { + "epoch": 949.1447368421053, + "grad_norm": 1.2289841175079346, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 144270 + }, + { + "epoch": 949.2105263157895, + "grad_norm": 0.9211072325706482, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 144280 + }, + { + "epoch": 949.2763157894736, + "grad_norm": 1.0456809997558594, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 144290 + }, + { + "epoch": 949.3421052631579, + "grad_norm": 0.9694221019744873, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 144300 + }, + { + "epoch": 949.4078947368421, + "grad_norm": 0.9555386304855347, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 144310 + }, + { + "epoch": 949.4736842105264, + "grad_norm": 0.9614490866661072, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 144320 + }, + { + "epoch": 949.5394736842105, + "grad_norm": 1.557354211807251, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 144330 + }, + { + "epoch": 949.6052631578947, + "grad_norm": 1.0724836587905884, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 144340 + }, + { + "epoch": 949.671052631579, + "grad_norm": 1.0988471508026123, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 144350 + }, + { + "epoch": 949.7368421052631, + "grad_norm": 1.2191790342330933, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 144360 + }, + { + "epoch": 949.8026315789474, + "grad_norm": 1.3018871545791626, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 144370 + }, + { + "epoch": 949.8684210526316, + "grad_norm": 0.7598888874053955, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 144380 + }, + { + "epoch": 949.9342105263158, + "grad_norm": 0.9216148257255554, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 144390 + }, + { + "epoch": 950.0, + "grad_norm": 1.0104155540466309, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 144400 + }, + { + "epoch": 950.0657894736842, + "grad_norm": 1.0783321857452393, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 144410 + }, + { + "epoch": 950.1315789473684, + "grad_norm": 1.3584924936294556, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 144420 + }, + { + "epoch": 950.1973684210526, + "grad_norm": 1.443320393562317, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 144430 + }, + { + "epoch": 950.2631578947369, + "grad_norm": 1.016124963760376, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 144440 + }, + { + "epoch": 950.328947368421, + "grad_norm": 1.2364863157272339, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 144450 + }, + { + "epoch": 950.3947368421053, + "grad_norm": 1.195246696472168, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 144460 + }, + { + "epoch": 950.4605263157895, + "grad_norm": 1.5904250144958496, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 144470 + }, + { + "epoch": 950.5263157894736, + "grad_norm": 1.104093074798584, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 144480 + }, + { + "epoch": 950.5921052631579, + "grad_norm": 1.0603477954864502, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 144490 + }, + { + "epoch": 950.6578947368421, + "grad_norm": 1.1990110874176025, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 144500 + }, + { + "epoch": 950.7236842105264, + "grad_norm": 1.1858569383621216, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 144510 + }, + { + "epoch": 950.7894736842105, + "grad_norm": 0.9741637706756592, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 144520 + }, + { + "epoch": 950.8552631578947, + "grad_norm": 0.9094181656837463, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 144530 + }, + { + "epoch": 950.921052631579, + "grad_norm": 1.022029995918274, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 144540 + }, + { + "epoch": 950.9868421052631, + "grad_norm": 0.9624213576316833, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 144550 + }, + { + "epoch": 951.0526315789474, + "grad_norm": 1.2031329870224, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 144560 + }, + { + "epoch": 951.1184210526316, + "grad_norm": 1.1659698486328125, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 144570 + }, + { + "epoch": 951.1842105263158, + "grad_norm": 1.0349775552749634, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 144580 + }, + { + "epoch": 951.25, + "grad_norm": 1.165041208267212, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 144590 + }, + { + "epoch": 951.3157894736842, + "grad_norm": 1.2488925457000732, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 144600 + }, + { + "epoch": 951.3815789473684, + "grad_norm": 1.0464668273925781, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 144610 + }, + { + "epoch": 951.4473684210526, + "grad_norm": 0.7799762487411499, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 144620 + }, + { + "epoch": 951.5131578947369, + "grad_norm": 0.6983115077018738, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 144630 + }, + { + "epoch": 951.578947368421, + "grad_norm": 1.1071887016296387, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 144640 + }, + { + "epoch": 951.6447368421053, + "grad_norm": 0.8498964309692383, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 144650 + }, + { + "epoch": 951.7105263157895, + "grad_norm": 0.8821257948875427, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 144660 + }, + { + "epoch": 951.7763157894736, + "grad_norm": 0.9334793090820312, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 144670 + }, + { + "epoch": 951.8421052631579, + "grad_norm": 1.2155643701553345, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 144680 + }, + { + "epoch": 951.9078947368421, + "grad_norm": 0.8632481694221497, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 144690 + }, + { + "epoch": 951.9736842105264, + "grad_norm": 1.1241223812103271, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 144700 + }, + { + "epoch": 952.0394736842105, + "grad_norm": 1.0715571641921997, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 144710 + }, + { + "epoch": 952.1052631578947, + "grad_norm": 1.1770998239517212, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 144720 + }, + { + "epoch": 952.171052631579, + "grad_norm": 1.2517147064208984, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 144730 + }, + { + "epoch": 952.2368421052631, + "grad_norm": 1.0367016792297363, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 144740 + }, + { + "epoch": 952.3026315789474, + "grad_norm": 0.8607826232910156, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 144750 + }, + { + "epoch": 952.3684210526316, + "grad_norm": 1.0406628847122192, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 144760 + }, + { + "epoch": 952.4342105263158, + "grad_norm": 0.97654128074646, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 144770 + }, + { + "epoch": 952.5, + "grad_norm": 0.7601302266120911, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 144780 + }, + { + "epoch": 952.5657894736842, + "grad_norm": 0.7820169925689697, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 144790 + }, + { + "epoch": 952.6315789473684, + "grad_norm": 0.8376019597053528, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 144800 + }, + { + "epoch": 952.6973684210526, + "grad_norm": 1.023985505104065, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 144810 + }, + { + "epoch": 952.7631578947369, + "grad_norm": 0.743279218673706, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 144820 + }, + { + "epoch": 952.828947368421, + "grad_norm": 1.3411636352539062, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 144830 + }, + { + "epoch": 952.8947368421053, + "grad_norm": 1.0974597930908203, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 144840 + }, + { + "epoch": 952.9605263157895, + "grad_norm": 1.0889559984207153, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 144850 + }, + { + "epoch": 953.0263157894736, + "grad_norm": 1.1542203426361084, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 144860 + }, + { + "epoch": 953.0921052631579, + "grad_norm": 1.394636631011963, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 144870 + }, + { + "epoch": 953.1578947368421, + "grad_norm": 1.16937255859375, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 144880 + }, + { + "epoch": 953.2236842105264, + "grad_norm": 1.2651804685592651, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 144890 + }, + { + "epoch": 953.2894736842105, + "grad_norm": 1.2077585458755493, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 144900 + }, + { + "epoch": 953.3552631578947, + "grad_norm": 1.1569517850875854, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 144910 + }, + { + "epoch": 953.421052631579, + "grad_norm": 1.34952712059021, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 144920 + }, + { + "epoch": 953.4868421052631, + "grad_norm": 1.0990208387374878, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 144930 + }, + { + "epoch": 953.5526315789474, + "grad_norm": 0.9614241719245911, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 144940 + }, + { + "epoch": 953.6184210526316, + "grad_norm": 1.349439024925232, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 144950 + }, + { + "epoch": 953.6842105263158, + "grad_norm": 1.501696228981018, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 144960 + }, + { + "epoch": 953.75, + "grad_norm": 1.1203855276107788, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 144970 + }, + { + "epoch": 953.8157894736842, + "grad_norm": 1.2413620948791504, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 144980 + }, + { + "epoch": 953.8815789473684, + "grad_norm": 1.3755046129226685, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 144990 + }, + { + "epoch": 953.9473684210526, + "grad_norm": 1.0295451879501343, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 145000 + }, + { + "epoch": 954.0131578947369, + "grad_norm": 1.3562036752700806, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 145010 + }, + { + "epoch": 954.078947368421, + "grad_norm": 1.4135730266571045, + "learning_rate": 0.0001, + "loss": 0.0144, + "step": 145020 + }, + { + "epoch": 954.1447368421053, + "grad_norm": 1.433528184890747, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 145030 + }, + { + "epoch": 954.2105263157895, + "grad_norm": 1.3277394771575928, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 145040 + }, + { + "epoch": 954.2763157894736, + "grad_norm": 0.9119917154312134, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 145050 + }, + { + "epoch": 954.3421052631579, + "grad_norm": 1.1164143085479736, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 145060 + }, + { + "epoch": 954.4078947368421, + "grad_norm": 1.1159082651138306, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 145070 + }, + { + "epoch": 954.4736842105264, + "grad_norm": 1.0839544534683228, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 145080 + }, + { + "epoch": 954.5394736842105, + "grad_norm": 0.9123933911323547, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 145090 + }, + { + "epoch": 954.6052631578947, + "grad_norm": 0.9606594443321228, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 145100 + }, + { + "epoch": 954.671052631579, + "grad_norm": 0.8391530513763428, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 145110 + }, + { + "epoch": 954.7368421052631, + "grad_norm": 0.8009461164474487, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 145120 + }, + { + "epoch": 954.8026315789474, + "grad_norm": 1.1401244401931763, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 145130 + }, + { + "epoch": 954.8684210526316, + "grad_norm": 1.1111904382705688, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 145140 + }, + { + "epoch": 954.9342105263158, + "grad_norm": 0.8661221265792847, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 145150 + }, + { + "epoch": 955.0, + "grad_norm": 1.0058561563491821, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 145160 + }, + { + "epoch": 955.0657894736842, + "grad_norm": 1.0411880016326904, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 145170 + }, + { + "epoch": 955.1315789473684, + "grad_norm": 1.146001935005188, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 145180 + }, + { + "epoch": 955.1973684210526, + "grad_norm": 1.115849256515503, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 145190 + }, + { + "epoch": 955.2631578947369, + "grad_norm": 0.991651713848114, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 145200 + }, + { + "epoch": 955.328947368421, + "grad_norm": 1.1527934074401855, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 145210 + }, + { + "epoch": 955.3947368421053, + "grad_norm": 1.2311185598373413, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 145220 + }, + { + "epoch": 955.4605263157895, + "grad_norm": 1.1631335020065308, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 145230 + }, + { + "epoch": 955.5263157894736, + "grad_norm": 0.657287061214447, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 145240 + }, + { + "epoch": 955.5921052631579, + "grad_norm": 0.9889928698539734, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 145250 + }, + { + "epoch": 955.6578947368421, + "grad_norm": 0.9157257676124573, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 145260 + }, + { + "epoch": 955.7236842105264, + "grad_norm": 0.983443558216095, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 145270 + }, + { + "epoch": 955.7894736842105, + "grad_norm": 1.1161870956420898, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 145280 + }, + { + "epoch": 955.8552631578947, + "grad_norm": 0.9558767080307007, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 145290 + }, + { + "epoch": 955.921052631579, + "grad_norm": 1.0487935543060303, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 145300 + }, + { + "epoch": 955.9868421052631, + "grad_norm": 0.7724936604499817, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 145310 + }, + { + "epoch": 956.0526315789474, + "grad_norm": 1.0620529651641846, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 145320 + }, + { + "epoch": 956.1184210526316, + "grad_norm": 1.1085588932037354, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 145330 + }, + { + "epoch": 956.1842105263158, + "grad_norm": 1.2441157102584839, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 145340 + }, + { + "epoch": 956.25, + "grad_norm": 1.0353556871414185, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 145350 + }, + { + "epoch": 956.3157894736842, + "grad_norm": 1.4679001569747925, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 145360 + }, + { + "epoch": 956.3815789473684, + "grad_norm": 1.2972333431243896, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 145370 + }, + { + "epoch": 956.4473684210526, + "grad_norm": 0.9384067058563232, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 145380 + }, + { + "epoch": 956.5131578947369, + "grad_norm": 1.0650990009307861, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 145390 + }, + { + "epoch": 956.578947368421, + "grad_norm": 1.5110204219818115, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 145400 + }, + { + "epoch": 956.6447368421053, + "grad_norm": 0.7420325875282288, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 145410 + }, + { + "epoch": 956.7105263157895, + "grad_norm": 1.3069480657577515, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 145420 + }, + { + "epoch": 956.7763157894736, + "grad_norm": 1.2609871625900269, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 145430 + }, + { + "epoch": 956.8421052631579, + "grad_norm": 1.0048247575759888, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 145440 + }, + { + "epoch": 956.9078947368421, + "grad_norm": 1.0545076131820679, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 145450 + }, + { + "epoch": 956.9736842105264, + "grad_norm": 0.8621100187301636, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 145460 + }, + { + "epoch": 957.0394736842105, + "grad_norm": 1.0694085359573364, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 145470 + }, + { + "epoch": 957.1052631578947, + "grad_norm": 1.2032498121261597, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 145480 + }, + { + "epoch": 957.171052631579, + "grad_norm": 0.7613986134529114, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 145490 + }, + { + "epoch": 957.2368421052631, + "grad_norm": 0.8016976714134216, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 145500 + }, + { + "epoch": 957.3026315789474, + "grad_norm": 1.1267141103744507, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 145510 + }, + { + "epoch": 957.3684210526316, + "grad_norm": 1.0526936054229736, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 145520 + }, + { + "epoch": 957.4342105263158, + "grad_norm": 1.1312087774276733, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 145530 + }, + { + "epoch": 957.5, + "grad_norm": 0.8225216865539551, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 145540 + }, + { + "epoch": 957.5657894736842, + "grad_norm": 1.3235195875167847, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 145550 + }, + { + "epoch": 957.6315789473684, + "grad_norm": 0.9246519804000854, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 145560 + }, + { + "epoch": 957.6973684210526, + "grad_norm": 1.227251648902893, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 145570 + }, + { + "epoch": 957.7631578947369, + "grad_norm": 1.1541143655776978, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 145580 + }, + { + "epoch": 957.828947368421, + "grad_norm": 1.001869797706604, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 145590 + }, + { + "epoch": 957.8947368421053, + "grad_norm": 1.1103819608688354, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 145600 + }, + { + "epoch": 957.9605263157895, + "grad_norm": 1.1512665748596191, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 145610 + }, + { + "epoch": 958.0263157894736, + "grad_norm": 1.135569453239441, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 145620 + }, + { + "epoch": 958.0921052631579, + "grad_norm": 0.9894860982894897, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 145630 + }, + { + "epoch": 958.1578947368421, + "grad_norm": 1.2264050245285034, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 145640 + }, + { + "epoch": 958.2236842105264, + "grad_norm": 0.9081289768218994, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 145650 + }, + { + "epoch": 958.2894736842105, + "grad_norm": 1.1225621700286865, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 145660 + }, + { + "epoch": 958.3552631578947, + "grad_norm": 1.4110243320465088, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 145670 + }, + { + "epoch": 958.421052631579, + "grad_norm": 1.2238539457321167, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 145680 + }, + { + "epoch": 958.4868421052631, + "grad_norm": 0.7691843509674072, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 145690 + }, + { + "epoch": 958.5526315789474, + "grad_norm": 1.0191400051116943, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 145700 + }, + { + "epoch": 958.6184210526316, + "grad_norm": 1.1331918239593506, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 145710 + }, + { + "epoch": 958.6842105263158, + "grad_norm": 1.3987983465194702, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 145720 + }, + { + "epoch": 958.75, + "grad_norm": 1.1458479166030884, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 145730 + }, + { + "epoch": 958.8157894736842, + "grad_norm": 0.9019910097122192, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 145740 + }, + { + "epoch": 958.8815789473684, + "grad_norm": 1.2355693578720093, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 145750 + }, + { + "epoch": 958.9473684210526, + "grad_norm": 1.0789332389831543, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 145760 + }, + { + "epoch": 959.0131578947369, + "grad_norm": 1.0482940673828125, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 145770 + }, + { + "epoch": 959.078947368421, + "grad_norm": 1.026578426361084, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 145780 + }, + { + "epoch": 959.1447368421053, + "grad_norm": 1.144579529762268, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 145790 + }, + { + "epoch": 959.2105263157895, + "grad_norm": 1.0373647212982178, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 145800 + }, + { + "epoch": 959.2763157894736, + "grad_norm": 1.0401736497879028, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 145810 + }, + { + "epoch": 959.3421052631579, + "grad_norm": 1.3450522422790527, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 145820 + }, + { + "epoch": 959.4078947368421, + "grad_norm": 0.786882221698761, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 145830 + }, + { + "epoch": 959.4736842105264, + "grad_norm": 1.122706413269043, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 145840 + }, + { + "epoch": 959.5394736842105, + "grad_norm": 1.224389672279358, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 145850 + }, + { + "epoch": 959.6052631578947, + "grad_norm": 0.8540865778923035, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 145860 + }, + { + "epoch": 959.671052631579, + "grad_norm": 1.0747767686843872, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 145870 + }, + { + "epoch": 959.7368421052631, + "grad_norm": 0.7939700484275818, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 145880 + }, + { + "epoch": 959.8026315789474, + "grad_norm": 0.817993700504303, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 145890 + }, + { + "epoch": 959.8684210526316, + "grad_norm": 0.7488849759101868, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 145900 + }, + { + "epoch": 959.9342105263158, + "grad_norm": 1.1702375411987305, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 145910 + }, + { + "epoch": 960.0, + "grad_norm": 1.0733966827392578, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 145920 + }, + { + "epoch": 960.0657894736842, + "grad_norm": 0.836497962474823, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 145930 + }, + { + "epoch": 960.1315789473684, + "grad_norm": 1.1901671886444092, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 145940 + }, + { + "epoch": 960.1973684210526, + "grad_norm": 1.657729148864746, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 145950 + }, + { + "epoch": 960.2631578947369, + "grad_norm": 1.0714272260665894, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 145960 + }, + { + "epoch": 960.328947368421, + "grad_norm": 1.1627241373062134, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 145970 + }, + { + "epoch": 960.3947368421053, + "grad_norm": 0.7160118222236633, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 145980 + }, + { + "epoch": 960.4605263157895, + "grad_norm": 1.2324270009994507, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 145990 + }, + { + "epoch": 960.5263157894736, + "grad_norm": 0.9793133735656738, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 146000 + }, + { + "epoch": 960.5921052631579, + "grad_norm": 1.2766015529632568, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 146010 + }, + { + "epoch": 960.6578947368421, + "grad_norm": 0.7441828846931458, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 146020 + }, + { + "epoch": 960.7236842105264, + "grad_norm": 1.006022572517395, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 146030 + }, + { + "epoch": 960.7894736842105, + "grad_norm": 0.781061053276062, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 146040 + }, + { + "epoch": 960.8552631578947, + "grad_norm": 1.0156453847885132, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 146050 + }, + { + "epoch": 960.921052631579, + "grad_norm": 1.5238510370254517, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 146060 + }, + { + "epoch": 960.9868421052631, + "grad_norm": 1.1294466257095337, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 146070 + }, + { + "epoch": 961.0526315789474, + "grad_norm": 1.103088140487671, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 146080 + }, + { + "epoch": 961.1184210526316, + "grad_norm": 1.1292765140533447, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 146090 + }, + { + "epoch": 961.1842105263158, + "grad_norm": 1.060682773590088, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 146100 + }, + { + "epoch": 961.25, + "grad_norm": 0.584579348564148, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 146110 + }, + { + "epoch": 961.3157894736842, + "grad_norm": 0.8006397485733032, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 146120 + }, + { + "epoch": 961.3815789473684, + "grad_norm": 0.8636497855186462, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 146130 + }, + { + "epoch": 961.4473684210526, + "grad_norm": 0.9201158881187439, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 146140 + }, + { + "epoch": 961.5131578947369, + "grad_norm": 1.3234913349151611, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 146150 + }, + { + "epoch": 961.578947368421, + "grad_norm": 0.9483774900436401, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 146160 + }, + { + "epoch": 961.6447368421053, + "grad_norm": 0.9402949213981628, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 146170 + }, + { + "epoch": 961.7105263157895, + "grad_norm": 0.809597909450531, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 146180 + }, + { + "epoch": 961.7763157894736, + "grad_norm": 1.229201078414917, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 146190 + }, + { + "epoch": 961.8421052631579, + "grad_norm": 1.1061270236968994, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 146200 + }, + { + "epoch": 961.9078947368421, + "grad_norm": 0.8046864867210388, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 146210 + }, + { + "epoch": 961.9736842105264, + "grad_norm": 0.7022920846939087, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 146220 + }, + { + "epoch": 962.0394736842105, + "grad_norm": 1.0460927486419678, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 146230 + }, + { + "epoch": 962.1052631578947, + "grad_norm": 1.061414361000061, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 146240 + }, + { + "epoch": 962.171052631579, + "grad_norm": 0.8802205324172974, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 146250 + }, + { + "epoch": 962.2368421052631, + "grad_norm": 1.0647673606872559, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 146260 + }, + { + "epoch": 962.3026315789474, + "grad_norm": 1.1186468601226807, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 146270 + }, + { + "epoch": 962.3684210526316, + "grad_norm": 1.0472915172576904, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 146280 + }, + { + "epoch": 962.4342105263158, + "grad_norm": 0.8644057512283325, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 146290 + }, + { + "epoch": 962.5, + "grad_norm": 0.8172399997711182, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 146300 + }, + { + "epoch": 962.5657894736842, + "grad_norm": 0.9596365094184875, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 146310 + }, + { + "epoch": 962.6315789473684, + "grad_norm": 0.8653599619865417, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 146320 + }, + { + "epoch": 962.6973684210526, + "grad_norm": 0.9772462844848633, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 146330 + }, + { + "epoch": 962.7631578947369, + "grad_norm": 1.0098869800567627, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 146340 + }, + { + "epoch": 962.828947368421, + "grad_norm": 1.2209604978561401, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 146350 + }, + { + "epoch": 962.8947368421053, + "grad_norm": 0.9694502949714661, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 146360 + }, + { + "epoch": 962.9605263157895, + "grad_norm": 0.981524646282196, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 146370 + }, + { + "epoch": 963.0263157894736, + "grad_norm": 1.0773963928222656, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 146380 + }, + { + "epoch": 963.0921052631579, + "grad_norm": 1.1981137990951538, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 146390 + }, + { + "epoch": 963.1578947368421, + "grad_norm": 1.1366533041000366, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 146400 + }, + { + "epoch": 963.2236842105264, + "grad_norm": 1.1924341917037964, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 146410 + }, + { + "epoch": 963.2894736842105, + "grad_norm": 0.9857580065727234, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 146420 + }, + { + "epoch": 963.3552631578947, + "grad_norm": 1.2033591270446777, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 146430 + }, + { + "epoch": 963.421052631579, + "grad_norm": 1.2146192789077759, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 146440 + }, + { + "epoch": 963.4868421052631, + "grad_norm": 1.1512936353683472, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 146450 + }, + { + "epoch": 963.5526315789474, + "grad_norm": 0.6414240598678589, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 146460 + }, + { + "epoch": 963.6184210526316, + "grad_norm": 0.7209314107894897, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 146470 + }, + { + "epoch": 963.6842105263158, + "grad_norm": 0.8891003727912903, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 146480 + }, + { + "epoch": 963.75, + "grad_norm": 0.7356894016265869, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 146490 + }, + { + "epoch": 963.8157894736842, + "grad_norm": 0.9845222234725952, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 146500 + }, + { + "epoch": 963.8815789473684, + "grad_norm": 1.4602724313735962, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 146510 + }, + { + "epoch": 963.9473684210526, + "grad_norm": 0.9599493741989136, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 146520 + }, + { + "epoch": 964.0131578947369, + "grad_norm": 1.1864409446716309, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 146530 + }, + { + "epoch": 964.078947368421, + "grad_norm": 1.6686468124389648, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 146540 + }, + { + "epoch": 964.1447368421053, + "grad_norm": 1.6375855207443237, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 146550 + }, + { + "epoch": 964.2105263157895, + "grad_norm": 1.250057339668274, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 146560 + }, + { + "epoch": 964.2763157894736, + "grad_norm": 1.3468494415283203, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 146570 + }, + { + "epoch": 964.3421052631579, + "grad_norm": 1.369868516921997, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 146580 + }, + { + "epoch": 964.4078947368421, + "grad_norm": 0.8721538186073303, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 146590 + }, + { + "epoch": 964.4736842105264, + "grad_norm": 1.0958600044250488, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 146600 + }, + { + "epoch": 964.5394736842105, + "grad_norm": 1.1461358070373535, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 146610 + }, + { + "epoch": 964.6052631578947, + "grad_norm": 1.3515912294387817, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 146620 + }, + { + "epoch": 964.671052631579, + "grad_norm": 1.3492871522903442, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 146630 + }, + { + "epoch": 964.7368421052631, + "grad_norm": 0.7146100997924805, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 146640 + }, + { + "epoch": 964.8026315789474, + "grad_norm": 1.150172233581543, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 146650 + }, + { + "epoch": 964.8684210526316, + "grad_norm": 1.4243590831756592, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 146660 + }, + { + "epoch": 964.9342105263158, + "grad_norm": 1.0917390584945679, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 146670 + }, + { + "epoch": 965.0, + "grad_norm": 0.888741135597229, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 146680 + }, + { + "epoch": 965.0657894736842, + "grad_norm": 0.8921936750411987, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 146690 + }, + { + "epoch": 965.1315789473684, + "grad_norm": 1.1336225271224976, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 146700 + }, + { + "epoch": 965.1973684210526, + "grad_norm": 1.2168223857879639, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 146710 + }, + { + "epoch": 965.2631578947369, + "grad_norm": 0.9628434181213379, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 146720 + }, + { + "epoch": 965.328947368421, + "grad_norm": 0.9987488389015198, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 146730 + }, + { + "epoch": 965.3947368421053, + "grad_norm": 1.300894856452942, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 146740 + }, + { + "epoch": 965.4605263157895, + "grad_norm": 3.1058502197265625, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 146750 + }, + { + "epoch": 965.5263157894736, + "grad_norm": 1.6994332075119019, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 146760 + }, + { + "epoch": 965.5921052631579, + "grad_norm": 0.7340602874755859, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 146770 + }, + { + "epoch": 965.6578947368421, + "grad_norm": 0.9732682108879089, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 146780 + }, + { + "epoch": 965.7236842105264, + "grad_norm": 1.0283761024475098, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 146790 + }, + { + "epoch": 965.7894736842105, + "grad_norm": 0.8493642807006836, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 146800 + }, + { + "epoch": 965.8552631578947, + "grad_norm": 0.8938899636268616, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 146810 + }, + { + "epoch": 965.921052631579, + "grad_norm": 0.8142083287239075, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 146820 + }, + { + "epoch": 965.9868421052631, + "grad_norm": 0.9816213250160217, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 146830 + }, + { + "epoch": 966.0526315789474, + "grad_norm": 0.7372590899467468, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 146840 + }, + { + "epoch": 966.1184210526316, + "grad_norm": 1.1608000993728638, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 146850 + }, + { + "epoch": 966.1842105263158, + "grad_norm": 1.0333927869796753, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 146860 + }, + { + "epoch": 966.25, + "grad_norm": 1.392578125, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 146870 + }, + { + "epoch": 966.3157894736842, + "grad_norm": 1.003764271736145, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 146880 + }, + { + "epoch": 966.3815789473684, + "grad_norm": 1.026443362236023, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 146890 + }, + { + "epoch": 966.4473684210526, + "grad_norm": 0.8039220571517944, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 146900 + }, + { + "epoch": 966.5131578947369, + "grad_norm": 1.3538212776184082, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 146910 + }, + { + "epoch": 966.578947368421, + "grad_norm": 1.2053329944610596, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 146920 + }, + { + "epoch": 966.6447368421053, + "grad_norm": 1.1715484857559204, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 146930 + }, + { + "epoch": 966.7105263157895, + "grad_norm": 1.387887716293335, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 146940 + }, + { + "epoch": 966.7763157894736, + "grad_norm": 0.7918421626091003, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 146950 + }, + { + "epoch": 966.8421052631579, + "grad_norm": 1.122507929801941, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 146960 + }, + { + "epoch": 966.9078947368421, + "grad_norm": 1.20033597946167, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 146970 + }, + { + "epoch": 966.9736842105264, + "grad_norm": 1.156383991241455, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 146980 + }, + { + "epoch": 967.0394736842105, + "grad_norm": 0.760665774345398, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 146990 + }, + { + "epoch": 967.1052631578947, + "grad_norm": 0.9186726808547974, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 147000 + }, + { + "epoch": 967.171052631579, + "grad_norm": 1.0184344053268433, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 147010 + }, + { + "epoch": 967.2368421052631, + "grad_norm": 1.3089451789855957, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 147020 + }, + { + "epoch": 967.3026315789474, + "grad_norm": 0.8527283072471619, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 147030 + }, + { + "epoch": 967.3684210526316, + "grad_norm": 1.0763221979141235, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 147040 + }, + { + "epoch": 967.4342105263158, + "grad_norm": 1.0858137607574463, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 147050 + }, + { + "epoch": 967.5, + "grad_norm": 0.9243344068527222, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 147060 + }, + { + "epoch": 967.5657894736842, + "grad_norm": 0.9228748083114624, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 147070 + }, + { + "epoch": 967.6315789473684, + "grad_norm": 1.0944608449935913, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 147080 + }, + { + "epoch": 967.6973684210526, + "grad_norm": 0.9257031679153442, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 147090 + }, + { + "epoch": 967.7631578947369, + "grad_norm": 1.181267261505127, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 147100 + }, + { + "epoch": 967.828947368421, + "grad_norm": 1.0577452182769775, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 147110 + }, + { + "epoch": 967.8947368421053, + "grad_norm": 0.8401322960853577, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 147120 + }, + { + "epoch": 967.9605263157895, + "grad_norm": 0.9169796705245972, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 147130 + }, + { + "epoch": 968.0263157894736, + "grad_norm": 0.8156391978263855, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 147140 + }, + { + "epoch": 968.0921052631579, + "grad_norm": 1.2525750398635864, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 147150 + }, + { + "epoch": 968.1578947368421, + "grad_norm": 1.0658366680145264, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 147160 + }, + { + "epoch": 968.2236842105264, + "grad_norm": 0.9549158215522766, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 147170 + }, + { + "epoch": 968.2894736842105, + "grad_norm": 1.1841901540756226, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 147180 + }, + { + "epoch": 968.3552631578947, + "grad_norm": 0.855679452419281, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 147190 + }, + { + "epoch": 968.421052631579, + "grad_norm": 1.03905189037323, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 147200 + }, + { + "epoch": 968.4868421052631, + "grad_norm": 0.7143714427947998, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 147210 + }, + { + "epoch": 968.5526315789474, + "grad_norm": 0.8746243715286255, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 147220 + }, + { + "epoch": 968.6184210526316, + "grad_norm": 0.8814296126365662, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 147230 + }, + { + "epoch": 968.6842105263158, + "grad_norm": 0.7824357151985168, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 147240 + }, + { + "epoch": 968.75, + "grad_norm": 0.8747170567512512, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 147250 + }, + { + "epoch": 968.8157894736842, + "grad_norm": 1.2082691192626953, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 147260 + }, + { + "epoch": 968.8815789473684, + "grad_norm": 1.2725180387496948, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 147270 + }, + { + "epoch": 968.9473684210526, + "grad_norm": 1.4602564573287964, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 147280 + }, + { + "epoch": 969.0131578947369, + "grad_norm": 0.9811068773269653, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 147290 + }, + { + "epoch": 969.078947368421, + "grad_norm": 1.359781265258789, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 147300 + }, + { + "epoch": 969.1447368421053, + "grad_norm": 1.2391654253005981, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 147310 + }, + { + "epoch": 969.2105263157895, + "grad_norm": 1.193811297416687, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 147320 + }, + { + "epoch": 969.2763157894736, + "grad_norm": 0.9369627237319946, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 147330 + }, + { + "epoch": 969.3421052631579, + "grad_norm": 1.2277706861495972, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 147340 + }, + { + "epoch": 969.4078947368421, + "grad_norm": 0.6797551512718201, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 147350 + }, + { + "epoch": 969.4736842105264, + "grad_norm": 0.8890407085418701, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 147360 + }, + { + "epoch": 969.5394736842105, + "grad_norm": 0.6497656106948853, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 147370 + }, + { + "epoch": 969.6052631578947, + "grad_norm": 1.0025545358657837, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 147380 + }, + { + "epoch": 969.671052631579, + "grad_norm": 1.0839303731918335, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 147390 + }, + { + "epoch": 969.7368421052631, + "grad_norm": 0.9364657402038574, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 147400 + }, + { + "epoch": 969.8026315789474, + "grad_norm": 0.725238561630249, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 147410 + }, + { + "epoch": 969.8684210526316, + "grad_norm": 0.7093734741210938, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 147420 + }, + { + "epoch": 969.9342105263158, + "grad_norm": 1.0567338466644287, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 147430 + }, + { + "epoch": 970.0, + "grad_norm": 1.1926231384277344, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 147440 + }, + { + "epoch": 970.0657894736842, + "grad_norm": 0.7869142889976501, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 147450 + }, + { + "epoch": 970.1315789473684, + "grad_norm": 1.23822820186615, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 147460 + }, + { + "epoch": 970.1973684210526, + "grad_norm": 1.007323145866394, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 147470 + }, + { + "epoch": 970.2631578947369, + "grad_norm": 1.408695101737976, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 147480 + }, + { + "epoch": 970.328947368421, + "grad_norm": 1.1631524562835693, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 147490 + }, + { + "epoch": 970.3947368421053, + "grad_norm": 0.9714431166648865, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 147500 + }, + { + "epoch": 970.4605263157895, + "grad_norm": 0.8139010071754456, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 147510 + }, + { + "epoch": 970.5263157894736, + "grad_norm": 1.2274115085601807, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 147520 + }, + { + "epoch": 970.5921052631579, + "grad_norm": 1.0801491737365723, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 147530 + }, + { + "epoch": 970.6578947368421, + "grad_norm": 0.7565146088600159, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 147540 + }, + { + "epoch": 970.7236842105264, + "grad_norm": 0.9171380400657654, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 147550 + }, + { + "epoch": 970.7894736842105, + "grad_norm": 0.9315175414085388, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 147560 + }, + { + "epoch": 970.8552631578947, + "grad_norm": 0.9675357341766357, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 147570 + }, + { + "epoch": 970.921052631579, + "grad_norm": 0.8446849584579468, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 147580 + }, + { + "epoch": 970.9868421052631, + "grad_norm": 1.2151329517364502, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 147590 + }, + { + "epoch": 971.0526315789474, + "grad_norm": 0.8625166416168213, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 147600 + }, + { + "epoch": 971.1184210526316, + "grad_norm": 0.904151439666748, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 147610 + }, + { + "epoch": 971.1842105263158, + "grad_norm": 1.3609187602996826, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 147620 + }, + { + "epoch": 971.25, + "grad_norm": 2.187743663787842, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 147630 + }, + { + "epoch": 971.3157894736842, + "grad_norm": 2.0440964698791504, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 147640 + }, + { + "epoch": 971.3815789473684, + "grad_norm": 1.9944370985031128, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 147650 + }, + { + "epoch": 971.4473684210526, + "grad_norm": 1.8147251605987549, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 147660 + }, + { + "epoch": 971.5131578947369, + "grad_norm": 1.0225766897201538, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 147670 + }, + { + "epoch": 971.578947368421, + "grad_norm": 1.2411375045776367, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 147680 + }, + { + "epoch": 971.6447368421053, + "grad_norm": 1.3855613470077515, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 147690 + }, + { + "epoch": 971.7105263157895, + "grad_norm": 1.0319336652755737, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 147700 + }, + { + "epoch": 971.7763157894736, + "grad_norm": 1.0877528190612793, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 147710 + }, + { + "epoch": 971.8421052631579, + "grad_norm": 1.5878164768218994, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 147720 + }, + { + "epoch": 971.9078947368421, + "grad_norm": 1.735569715499878, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 147730 + }, + { + "epoch": 971.9736842105264, + "grad_norm": 1.2337061166763306, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 147740 + }, + { + "epoch": 972.0394736842105, + "grad_norm": 0.926618754863739, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 147750 + }, + { + "epoch": 972.1052631578947, + "grad_norm": 0.951611340045929, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 147760 + }, + { + "epoch": 972.171052631579, + "grad_norm": 1.1412699222564697, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 147770 + }, + { + "epoch": 972.2368421052631, + "grad_norm": 1.0385075807571411, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 147780 + }, + { + "epoch": 972.3026315789474, + "grad_norm": 0.8071399331092834, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 147790 + }, + { + "epoch": 972.3684210526316, + "grad_norm": 1.131365418434143, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 147800 + }, + { + "epoch": 972.4342105263158, + "grad_norm": 1.10077702999115, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 147810 + }, + { + "epoch": 972.5, + "grad_norm": 1.1625159978866577, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 147820 + }, + { + "epoch": 972.5657894736842, + "grad_norm": 0.8516972064971924, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 147830 + }, + { + "epoch": 972.6315789473684, + "grad_norm": 0.9543007612228394, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 147840 + }, + { + "epoch": 972.6973684210526, + "grad_norm": 0.7978515028953552, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 147850 + }, + { + "epoch": 972.7631578947369, + "grad_norm": 1.0908616781234741, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 147860 + }, + { + "epoch": 972.828947368421, + "grad_norm": 1.0295151472091675, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 147870 + }, + { + "epoch": 972.8947368421053, + "grad_norm": 0.67076176404953, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 147880 + }, + { + "epoch": 972.9605263157895, + "grad_norm": 1.1055384874343872, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 147890 + }, + { + "epoch": 973.0263157894736, + "grad_norm": 0.9399993419647217, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 147900 + }, + { + "epoch": 973.0921052631579, + "grad_norm": 0.9942353367805481, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 147910 + }, + { + "epoch": 973.1578947368421, + "grad_norm": 0.7977069020271301, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 147920 + }, + { + "epoch": 973.2236842105264, + "grad_norm": 0.940716028213501, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 147930 + }, + { + "epoch": 973.2894736842105, + "grad_norm": 0.5818641185760498, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 147940 + }, + { + "epoch": 973.3552631578947, + "grad_norm": 0.9071598052978516, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 147950 + }, + { + "epoch": 973.421052631579, + "grad_norm": 0.9769167900085449, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 147960 + }, + { + "epoch": 973.4868421052631, + "grad_norm": 0.7918068766593933, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 147970 + }, + { + "epoch": 973.5526315789474, + "grad_norm": 1.44162917137146, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 147980 + }, + { + "epoch": 973.6184210526316, + "grad_norm": 1.3190975189208984, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 147990 + }, + { + "epoch": 973.6842105263158, + "grad_norm": 0.8876311182975769, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 148000 + }, + { + "epoch": 973.75, + "grad_norm": 0.6711481213569641, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 148010 + }, + { + "epoch": 973.8157894736842, + "grad_norm": 0.9575647115707397, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 148020 + }, + { + "epoch": 973.8815789473684, + "grad_norm": 1.1577619314193726, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 148030 + }, + { + "epoch": 973.9473684210526, + "grad_norm": 1.1602065563201904, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 148040 + }, + { + "epoch": 974.0131578947369, + "grad_norm": 1.1503347158432007, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 148050 + }, + { + "epoch": 974.078947368421, + "grad_norm": 1.1186825037002563, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 148060 + }, + { + "epoch": 974.1447368421053, + "grad_norm": 1.2692493200302124, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 148070 + }, + { + "epoch": 974.2105263157895, + "grad_norm": 1.3095154762268066, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 148080 + }, + { + "epoch": 974.2763157894736, + "grad_norm": 1.1788098812103271, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 148090 + }, + { + "epoch": 974.3421052631579, + "grad_norm": 0.8337370753288269, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 148100 + }, + { + "epoch": 974.4078947368421, + "grad_norm": 0.8174479007720947, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 148110 + }, + { + "epoch": 974.4736842105264, + "grad_norm": 1.0070465803146362, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 148120 + }, + { + "epoch": 974.5394736842105, + "grad_norm": 1.2746952772140503, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 148130 + }, + { + "epoch": 974.6052631578947, + "grad_norm": 1.2314714193344116, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 148140 + }, + { + "epoch": 974.671052631579, + "grad_norm": 0.9068085551261902, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 148150 + }, + { + "epoch": 974.7368421052631, + "grad_norm": 1.3437057733535767, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 148160 + }, + { + "epoch": 974.8026315789474, + "grad_norm": 1.2839545011520386, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 148170 + }, + { + "epoch": 974.8684210526316, + "grad_norm": 1.1615701913833618, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 148180 + }, + { + "epoch": 974.9342105263158, + "grad_norm": 1.104874849319458, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 148190 + }, + { + "epoch": 975.0, + "grad_norm": 1.0236458778381348, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 148200 + }, + { + "epoch": 975.0657894736842, + "grad_norm": 0.8276492953300476, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 148210 + }, + { + "epoch": 975.1315789473684, + "grad_norm": 0.7698920369148254, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 148220 + }, + { + "epoch": 975.1973684210526, + "grad_norm": 1.0697041749954224, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 148230 + }, + { + "epoch": 975.2631578947369, + "grad_norm": 0.6203532218933105, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 148240 + }, + { + "epoch": 975.328947368421, + "grad_norm": 1.2870906591415405, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 148250 + }, + { + "epoch": 975.3947368421053, + "grad_norm": 1.1638849973678589, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 148260 + }, + { + "epoch": 975.4605263157895, + "grad_norm": 0.8770232200622559, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 148270 + }, + { + "epoch": 975.5263157894736, + "grad_norm": 0.8042388558387756, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 148280 + }, + { + "epoch": 975.5921052631579, + "grad_norm": 1.1777013540267944, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 148290 + }, + { + "epoch": 975.6578947368421, + "grad_norm": 1.2170054912567139, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 148300 + }, + { + "epoch": 975.7236842105264, + "grad_norm": 1.1343997716903687, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 148310 + }, + { + "epoch": 975.7894736842105, + "grad_norm": 0.9868319034576416, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 148320 + }, + { + "epoch": 975.8552631578947, + "grad_norm": 1.1468241214752197, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 148330 + }, + { + "epoch": 975.921052631579, + "grad_norm": 0.9679098129272461, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 148340 + }, + { + "epoch": 975.9868421052631, + "grad_norm": 1.3126945495605469, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 148350 + }, + { + "epoch": 976.0526315789474, + "grad_norm": 1.1734507083892822, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 148360 + }, + { + "epoch": 976.1184210526316, + "grad_norm": 0.9262369871139526, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 148370 + }, + { + "epoch": 976.1842105263158, + "grad_norm": 1.1285349130630493, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 148380 + }, + { + "epoch": 976.25, + "grad_norm": 0.9451377987861633, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 148390 + }, + { + "epoch": 976.3157894736842, + "grad_norm": 1.0291858911514282, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 148400 + }, + { + "epoch": 976.3815789473684, + "grad_norm": 1.141808271408081, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 148410 + }, + { + "epoch": 976.4473684210526, + "grad_norm": 0.9214851260185242, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 148420 + }, + { + "epoch": 976.5131578947369, + "grad_norm": 1.1481177806854248, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 148430 + }, + { + "epoch": 976.578947368421, + "grad_norm": 1.2398234605789185, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 148440 + }, + { + "epoch": 976.6447368421053, + "grad_norm": 0.9766506552696228, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 148450 + }, + { + "epoch": 976.7105263157895, + "grad_norm": 1.284214735031128, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 148460 + }, + { + "epoch": 976.7763157894736, + "grad_norm": 1.0955685377120972, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 148470 + }, + { + "epoch": 976.8421052631579, + "grad_norm": 1.0362792015075684, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 148480 + }, + { + "epoch": 976.9078947368421, + "grad_norm": 1.067434310913086, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 148490 + }, + { + "epoch": 976.9736842105264, + "grad_norm": 0.7627929449081421, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 148500 + }, + { + "epoch": 977.0394736842105, + "grad_norm": 0.9590473175048828, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 148510 + }, + { + "epoch": 977.1052631578947, + "grad_norm": 0.8360632061958313, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 148520 + }, + { + "epoch": 977.171052631579, + "grad_norm": 0.7430837154388428, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 148530 + }, + { + "epoch": 977.2368421052631, + "grad_norm": 0.9567493796348572, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 148540 + }, + { + "epoch": 977.3026315789474, + "grad_norm": 1.1349761486053467, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 148550 + }, + { + "epoch": 977.3684210526316, + "grad_norm": 1.2582765817642212, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 148560 + }, + { + "epoch": 977.4342105263158, + "grad_norm": 1.0692235231399536, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 148570 + }, + { + "epoch": 977.5, + "grad_norm": 1.2210439443588257, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 148580 + }, + { + "epoch": 977.5657894736842, + "grad_norm": 0.9458500742912292, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 148590 + }, + { + "epoch": 977.6315789473684, + "grad_norm": 1.099303960800171, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 148600 + }, + { + "epoch": 977.6973684210526, + "grad_norm": 0.9207699298858643, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 148610 + }, + { + "epoch": 977.7631578947369, + "grad_norm": 1.0685609579086304, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 148620 + }, + { + "epoch": 977.828947368421, + "grad_norm": 1.1274516582489014, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 148630 + }, + { + "epoch": 977.8947368421053, + "grad_norm": 1.022341012954712, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 148640 + }, + { + "epoch": 977.9605263157895, + "grad_norm": 1.0944082736968994, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 148650 + }, + { + "epoch": 978.0263157894736, + "grad_norm": 1.1251676082611084, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 148660 + }, + { + "epoch": 978.0921052631579, + "grad_norm": 0.7965459823608398, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 148670 + }, + { + "epoch": 978.1578947368421, + "grad_norm": 0.8056169152259827, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 148680 + }, + { + "epoch": 978.2236842105264, + "grad_norm": 0.9947016835212708, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 148690 + }, + { + "epoch": 978.2894736842105, + "grad_norm": 1.521198034286499, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 148700 + }, + { + "epoch": 978.3552631578947, + "grad_norm": 1.4726166725158691, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 148710 + }, + { + "epoch": 978.421052631579, + "grad_norm": 1.2374329566955566, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 148720 + }, + { + "epoch": 978.4868421052631, + "grad_norm": 1.090682029724121, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 148730 + }, + { + "epoch": 978.5526315789474, + "grad_norm": 1.163394570350647, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 148740 + }, + { + "epoch": 978.6184210526316, + "grad_norm": 1.0703188180923462, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 148750 + }, + { + "epoch": 978.6842105263158, + "grad_norm": 1.2367560863494873, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 148760 + }, + { + "epoch": 978.75, + "grad_norm": 1.329300880432129, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 148770 + }, + { + "epoch": 978.8157894736842, + "grad_norm": 1.083126425743103, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 148780 + }, + { + "epoch": 978.8815789473684, + "grad_norm": 0.7448536157608032, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 148790 + }, + { + "epoch": 978.9473684210526, + "grad_norm": 1.0757248401641846, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 148800 + }, + { + "epoch": 979.0131578947369, + "grad_norm": 0.8935508131980896, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 148810 + }, + { + "epoch": 979.078947368421, + "grad_norm": 1.3141487836837769, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 148820 + }, + { + "epoch": 979.1447368421053, + "grad_norm": 0.9629881978034973, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 148830 + }, + { + "epoch": 979.2105263157895, + "grad_norm": 0.9605873823165894, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 148840 + }, + { + "epoch": 979.2763157894736, + "grad_norm": 1.030029058456421, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 148850 + }, + { + "epoch": 979.3421052631579, + "grad_norm": 1.113380789756775, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 148860 + }, + { + "epoch": 979.4078947368421, + "grad_norm": 1.1046963930130005, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 148870 + }, + { + "epoch": 979.4736842105264, + "grad_norm": 1.0526591539382935, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 148880 + }, + { + "epoch": 979.5394736842105, + "grad_norm": 1.6122370958328247, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 148890 + }, + { + "epoch": 979.6052631578947, + "grad_norm": 1.042912244796753, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 148900 + }, + { + "epoch": 979.671052631579, + "grad_norm": 1.0555074214935303, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 148910 + }, + { + "epoch": 979.7368421052631, + "grad_norm": 0.8293302655220032, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 148920 + }, + { + "epoch": 979.8026315789474, + "grad_norm": 0.7137147188186646, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 148930 + }, + { + "epoch": 979.8684210526316, + "grad_norm": 1.0557599067687988, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 148940 + }, + { + "epoch": 979.9342105263158, + "grad_norm": 1.147873044013977, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 148950 + }, + { + "epoch": 980.0, + "grad_norm": 0.8311463594436646, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 148960 + }, + { + "epoch": 980.0657894736842, + "grad_norm": 1.0311176776885986, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 148970 + }, + { + "epoch": 980.1315789473684, + "grad_norm": 0.8780924677848816, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 148980 + }, + { + "epoch": 980.1973684210526, + "grad_norm": 1.235633373260498, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 148990 + }, + { + "epoch": 980.2631578947369, + "grad_norm": 1.298965573310852, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 149000 + }, + { + "epoch": 980.328947368421, + "grad_norm": 0.9419639706611633, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 149010 + }, + { + "epoch": 980.3947368421053, + "grad_norm": 1.0413622856140137, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 149020 + }, + { + "epoch": 980.4605263157895, + "grad_norm": 0.7547554969787598, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 149030 + }, + { + "epoch": 980.5263157894736, + "grad_norm": 0.7510955333709717, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 149040 + }, + { + "epoch": 980.5921052631579, + "grad_norm": 0.9087187647819519, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 149050 + }, + { + "epoch": 980.6578947368421, + "grad_norm": 1.227208137512207, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 149060 + }, + { + "epoch": 980.7236842105264, + "grad_norm": 0.9901775121688843, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 149070 + }, + { + "epoch": 980.7894736842105, + "grad_norm": 1.1645238399505615, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 149080 + }, + { + "epoch": 980.8552631578947, + "grad_norm": 0.9713094234466553, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 149090 + }, + { + "epoch": 980.921052631579, + "grad_norm": 1.0313467979431152, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 149100 + }, + { + "epoch": 980.9868421052631, + "grad_norm": 0.6963071227073669, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 149110 + }, + { + "epoch": 981.0526315789474, + "grad_norm": 1.0932525396347046, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 149120 + }, + { + "epoch": 981.1184210526316, + "grad_norm": 1.1071008443832397, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 149130 + }, + { + "epoch": 981.1842105263158, + "grad_norm": 1.3636506795883179, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 149140 + }, + { + "epoch": 981.25, + "grad_norm": 0.9255510568618774, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 149150 + }, + { + "epoch": 981.3157894736842, + "grad_norm": 0.9791567921638489, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 149160 + }, + { + "epoch": 981.3815789473684, + "grad_norm": 0.8821341395378113, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 149170 + }, + { + "epoch": 981.4473684210526, + "grad_norm": 0.9280576705932617, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 149180 + }, + { + "epoch": 981.5131578947369, + "grad_norm": 0.7593532800674438, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 149190 + }, + { + "epoch": 981.578947368421, + "grad_norm": 1.2766715288162231, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 149200 + }, + { + "epoch": 981.6447368421053, + "grad_norm": 0.7514734864234924, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 149210 + }, + { + "epoch": 981.7105263157895, + "grad_norm": 0.6935755014419556, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 149220 + }, + { + "epoch": 981.7763157894736, + "grad_norm": 1.0693104267120361, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 149230 + }, + { + "epoch": 981.8421052631579, + "grad_norm": 1.1052137613296509, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 149240 + }, + { + "epoch": 981.9078947368421, + "grad_norm": 1.1656912565231323, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 149250 + }, + { + "epoch": 981.9736842105264, + "grad_norm": 1.1336760520935059, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 149260 + }, + { + "epoch": 982.0394736842105, + "grad_norm": 1.118245244026184, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 149270 + }, + { + "epoch": 982.1052631578947, + "grad_norm": 1.0137847661972046, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 149280 + }, + { + "epoch": 982.171052631579, + "grad_norm": 1.1577128171920776, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 149290 + }, + { + "epoch": 982.2368421052631, + "grad_norm": 0.8299029469490051, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 149300 + }, + { + "epoch": 982.3026315789474, + "grad_norm": 1.0582658052444458, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 149310 + }, + { + "epoch": 982.3684210526316, + "grad_norm": 1.0629546642303467, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 149320 + }, + { + "epoch": 982.4342105263158, + "grad_norm": 1.2285642623901367, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 149330 + }, + { + "epoch": 982.5, + "grad_norm": 1.1044747829437256, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 149340 + }, + { + "epoch": 982.5657894736842, + "grad_norm": 1.2919867038726807, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 149350 + }, + { + "epoch": 982.6315789473684, + "grad_norm": 0.8287044763565063, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 149360 + }, + { + "epoch": 982.6973684210526, + "grad_norm": 0.9002633094787598, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 149370 + }, + { + "epoch": 982.7631578947369, + "grad_norm": 1.065321683883667, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 149380 + }, + { + "epoch": 982.828947368421, + "grad_norm": 1.0330926179885864, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 149390 + }, + { + "epoch": 982.8947368421053, + "grad_norm": 0.989842414855957, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 149400 + }, + { + "epoch": 982.9605263157895, + "grad_norm": 0.905006468296051, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 149410 + }, + { + "epoch": 983.0263157894736, + "grad_norm": 1.150158405303955, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 149420 + }, + { + "epoch": 983.0921052631579, + "grad_norm": 0.8461920619010925, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 149430 + }, + { + "epoch": 983.1578947368421, + "grad_norm": 0.8330326676368713, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 149440 + }, + { + "epoch": 983.2236842105264, + "grad_norm": 1.2453408241271973, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 149450 + }, + { + "epoch": 983.2894736842105, + "grad_norm": 1.0621312856674194, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 149460 + }, + { + "epoch": 983.3552631578947, + "grad_norm": 1.0289655923843384, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 149470 + }, + { + "epoch": 983.421052631579, + "grad_norm": 0.8648130297660828, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 149480 + }, + { + "epoch": 983.4868421052631, + "grad_norm": 1.0741007328033447, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 149490 + }, + { + "epoch": 983.5526315789474, + "grad_norm": 0.8858330845832825, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 149500 + }, + { + "epoch": 983.6184210526316, + "grad_norm": 1.25303053855896, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 149510 + }, + { + "epoch": 983.6842105263158, + "grad_norm": 1.2206391096115112, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 149520 + }, + { + "epoch": 983.75, + "grad_norm": 0.9137042164802551, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 149530 + }, + { + "epoch": 983.8157894736842, + "grad_norm": 1.025648593902588, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 149540 + }, + { + "epoch": 983.8815789473684, + "grad_norm": 1.0423038005828857, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 149550 + }, + { + "epoch": 983.9473684210526, + "grad_norm": 1.0618716478347778, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 149560 + }, + { + "epoch": 984.0131578947369, + "grad_norm": 1.030285120010376, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 149570 + }, + { + "epoch": 984.078947368421, + "grad_norm": 0.8739547729492188, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 149580 + }, + { + "epoch": 984.1447368421053, + "grad_norm": 1.1206870079040527, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 149590 + }, + { + "epoch": 984.2105263157895, + "grad_norm": 1.278941035270691, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 149600 + }, + { + "epoch": 984.2763157894736, + "grad_norm": 1.7599936723709106, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 149610 + }, + { + "epoch": 984.3421052631579, + "grad_norm": 1.6677428483963013, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 149620 + }, + { + "epoch": 984.4078947368421, + "grad_norm": 3.871687412261963, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 149630 + }, + { + "epoch": 984.4736842105264, + "grad_norm": 2.421388864517212, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 149640 + }, + { + "epoch": 984.5394736842105, + "grad_norm": 1.2500579357147217, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 149650 + }, + { + "epoch": 984.6052631578947, + "grad_norm": 1.5379196405410767, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 149660 + }, + { + "epoch": 984.671052631579, + "grad_norm": 1.7158979177474976, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 149670 + }, + { + "epoch": 984.7368421052631, + "grad_norm": 1.4845489263534546, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 149680 + }, + { + "epoch": 984.8026315789474, + "grad_norm": 1.1629608869552612, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 149690 + }, + { + "epoch": 984.8684210526316, + "grad_norm": 1.415179967880249, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 149700 + }, + { + "epoch": 984.9342105263158, + "grad_norm": 1.1638578176498413, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 149710 + }, + { + "epoch": 985.0, + "grad_norm": 0.9588046669960022, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 149720 + }, + { + "epoch": 985.0657894736842, + "grad_norm": 1.2830522060394287, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 149730 + }, + { + "epoch": 985.1315789473684, + "grad_norm": 0.9208570122718811, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 149740 + }, + { + "epoch": 985.1973684210526, + "grad_norm": 0.5856902003288269, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 149750 + }, + { + "epoch": 985.2631578947369, + "grad_norm": 0.9705900549888611, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 149760 + }, + { + "epoch": 985.328947368421, + "grad_norm": 1.1862601041793823, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 149770 + }, + { + "epoch": 985.3947368421053, + "grad_norm": 0.9023541808128357, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 149780 + }, + { + "epoch": 985.4605263157895, + "grad_norm": 1.044999361038208, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 149790 + }, + { + "epoch": 985.5263157894736, + "grad_norm": 0.9709221124649048, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 149800 + }, + { + "epoch": 985.5921052631579, + "grad_norm": 1.0954318046569824, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 149810 + }, + { + "epoch": 985.6578947368421, + "grad_norm": 1.156111478805542, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 149820 + }, + { + "epoch": 985.7236842105264, + "grad_norm": 0.8454775810241699, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 149830 + }, + { + "epoch": 985.7894736842105, + "grad_norm": 0.880204439163208, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 149840 + }, + { + "epoch": 985.8552631578947, + "grad_norm": 1.2712900638580322, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 149850 + }, + { + "epoch": 985.921052631579, + "grad_norm": 1.0623518228530884, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 149860 + }, + { + "epoch": 985.9868421052631, + "grad_norm": 0.7809160947799683, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 149870 + }, + { + "epoch": 986.0526315789474, + "grad_norm": 0.9675153493881226, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 149880 + }, + { + "epoch": 986.1184210526316, + "grad_norm": 0.8535943031311035, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 149890 + }, + { + "epoch": 986.1842105263158, + "grad_norm": 0.9090612530708313, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 149900 + }, + { + "epoch": 986.25, + "grad_norm": 0.7848944067955017, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 149910 + }, + { + "epoch": 986.3157894736842, + "grad_norm": 1.056888461112976, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 149920 + }, + { + "epoch": 986.3815789473684, + "grad_norm": 0.9026320576667786, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 149930 + }, + { + "epoch": 986.4473684210526, + "grad_norm": 1.167649745941162, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 149940 + }, + { + "epoch": 986.5131578947369, + "grad_norm": 1.0454814434051514, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 149950 + }, + { + "epoch": 986.578947368421, + "grad_norm": 1.3439925909042358, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 149960 + }, + { + "epoch": 986.6447368421053, + "grad_norm": 1.020798921585083, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 149970 + }, + { + "epoch": 986.7105263157895, + "grad_norm": 1.1719021797180176, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 149980 + }, + { + "epoch": 986.7763157894736, + "grad_norm": 1.0493474006652832, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 149990 + }, + { + "epoch": 986.8421052631579, + "grad_norm": 0.8416689038276672, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 150000 + }, + { + "epoch": 986.9078947368421, + "grad_norm": 0.9856203198432922, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 150010 + }, + { + "epoch": 986.9736842105264, + "grad_norm": 0.9143205881118774, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 150020 + }, + { + "epoch": 987.0394736842105, + "grad_norm": 0.9702136516571045, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 150030 + }, + { + "epoch": 987.1052631578947, + "grad_norm": 0.7779248356819153, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 150040 + }, + { + "epoch": 987.171052631579, + "grad_norm": 1.1739939451217651, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 150050 + }, + { + "epoch": 987.2368421052631, + "grad_norm": 1.0114635229110718, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 150060 + }, + { + "epoch": 987.3026315789474, + "grad_norm": 1.0713098049163818, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 150070 + }, + { + "epoch": 987.3684210526316, + "grad_norm": 0.9097891449928284, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 150080 + }, + { + "epoch": 987.4342105263158, + "grad_norm": 1.1985975503921509, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 150090 + }, + { + "epoch": 987.5, + "grad_norm": 1.0965287685394287, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 150100 + }, + { + "epoch": 987.5657894736842, + "grad_norm": 0.873046338558197, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 150110 + }, + { + "epoch": 987.6315789473684, + "grad_norm": 1.016603708267212, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 150120 + }, + { + "epoch": 987.6973684210526, + "grad_norm": 0.9413048028945923, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 150130 + }, + { + "epoch": 987.7631578947369, + "grad_norm": 1.0264689922332764, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 150140 + }, + { + "epoch": 987.828947368421, + "grad_norm": 0.916080117225647, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 150150 + }, + { + "epoch": 987.8947368421053, + "grad_norm": 1.110188364982605, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 150160 + }, + { + "epoch": 987.9605263157895, + "grad_norm": 1.0848978757858276, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 150170 + }, + { + "epoch": 988.0263157894736, + "grad_norm": 0.9753615856170654, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 150180 + }, + { + "epoch": 988.0921052631579, + "grad_norm": 1.2816380262374878, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 150190 + }, + { + "epoch": 988.1578947368421, + "grad_norm": 1.2264199256896973, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 150200 + }, + { + "epoch": 988.2236842105264, + "grad_norm": 1.3432106971740723, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 150210 + }, + { + "epoch": 988.2894736842105, + "grad_norm": 1.5210658311843872, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 150220 + }, + { + "epoch": 988.3552631578947, + "grad_norm": 1.3455909490585327, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 150230 + }, + { + "epoch": 988.421052631579, + "grad_norm": 1.2642890214920044, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 150240 + }, + { + "epoch": 988.4868421052631, + "grad_norm": 1.0704741477966309, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 150250 + }, + { + "epoch": 988.5526315789474, + "grad_norm": 1.0484412908554077, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 150260 + }, + { + "epoch": 988.6184210526316, + "grad_norm": 0.8334908485412598, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 150270 + }, + { + "epoch": 988.6842105263158, + "grad_norm": 0.8684769868850708, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 150280 + }, + { + "epoch": 988.75, + "grad_norm": 0.8539243936538696, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 150290 + }, + { + "epoch": 988.8157894736842, + "grad_norm": 0.8212605118751526, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 150300 + }, + { + "epoch": 988.8815789473684, + "grad_norm": 1.1426969766616821, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 150310 + }, + { + "epoch": 988.9473684210526, + "grad_norm": 1.2686213254928589, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 150320 + }, + { + "epoch": 989.0131578947369, + "grad_norm": 1.0295464992523193, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 150330 + }, + { + "epoch": 989.078947368421, + "grad_norm": 0.9937509298324585, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 150340 + }, + { + "epoch": 989.1447368421053, + "grad_norm": 0.8517789840698242, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 150350 + }, + { + "epoch": 989.2105263157895, + "grad_norm": 1.2996900081634521, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 150360 + }, + { + "epoch": 989.2763157894736, + "grad_norm": 0.7057400345802307, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 150370 + }, + { + "epoch": 989.3421052631579, + "grad_norm": 0.8088485598564148, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 150380 + }, + { + "epoch": 989.4078947368421, + "grad_norm": 0.9495823979377747, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 150390 + }, + { + "epoch": 989.4736842105264, + "grad_norm": 1.176836609840393, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 150400 + }, + { + "epoch": 989.5394736842105, + "grad_norm": 1.438713788986206, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 150410 + }, + { + "epoch": 989.6052631578947, + "grad_norm": 1.0466101169586182, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 150420 + }, + { + "epoch": 989.671052631579, + "grad_norm": 0.9159618020057678, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 150430 + }, + { + "epoch": 989.7368421052631, + "grad_norm": 1.128126859664917, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 150440 + }, + { + "epoch": 989.8026315789474, + "grad_norm": 0.6916261911392212, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 150450 + }, + { + "epoch": 989.8684210526316, + "grad_norm": 1.2188067436218262, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 150460 + }, + { + "epoch": 989.9342105263158, + "grad_norm": 1.330909013748169, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 150470 + }, + { + "epoch": 990.0, + "grad_norm": 1.1757625341415405, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 150480 + }, + { + "epoch": 990.0657894736842, + "grad_norm": 1.4033007621765137, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 150490 + }, + { + "epoch": 990.1315789473684, + "grad_norm": 1.0067665576934814, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 150500 + }, + { + "epoch": 990.1973684210526, + "grad_norm": 1.1867026090621948, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 150510 + }, + { + "epoch": 990.2631578947369, + "grad_norm": 1.306778073310852, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 150520 + }, + { + "epoch": 990.328947368421, + "grad_norm": 1.033635139465332, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 150530 + }, + { + "epoch": 990.3947368421053, + "grad_norm": 1.0581187009811401, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 150540 + }, + { + "epoch": 990.4605263157895, + "grad_norm": 1.396301507949829, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 150550 + }, + { + "epoch": 990.5263157894736, + "grad_norm": 1.2779043912887573, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 150560 + }, + { + "epoch": 990.5921052631579, + "grad_norm": 1.0077518224716187, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 150570 + }, + { + "epoch": 990.6578947368421, + "grad_norm": 1.1723346710205078, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 150580 + }, + { + "epoch": 990.7236842105264, + "grad_norm": 1.0024058818817139, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 150590 + }, + { + "epoch": 990.7894736842105, + "grad_norm": 1.120664358139038, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 150600 + }, + { + "epoch": 990.8552631578947, + "grad_norm": 1.2388149499893188, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 150610 + }, + { + "epoch": 990.921052631579, + "grad_norm": 1.3285748958587646, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 150620 + }, + { + "epoch": 990.9868421052631, + "grad_norm": 0.933190643787384, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 150630 + }, + { + "epoch": 991.0526315789474, + "grad_norm": 1.2380552291870117, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 150640 + }, + { + "epoch": 991.1184210526316, + "grad_norm": 1.042554497718811, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 150650 + }, + { + "epoch": 991.1842105263158, + "grad_norm": 1.0768581628799438, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 150660 + }, + { + "epoch": 991.25, + "grad_norm": 0.9357948899269104, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 150670 + }, + { + "epoch": 991.3157894736842, + "grad_norm": 1.2259186506271362, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 150680 + }, + { + "epoch": 991.3815789473684, + "grad_norm": 0.9076092839241028, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 150690 + }, + { + "epoch": 991.4473684210526, + "grad_norm": 1.0756639242172241, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 150700 + }, + { + "epoch": 991.5131578947369, + "grad_norm": 0.8654159307479858, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 150710 + }, + { + "epoch": 991.578947368421, + "grad_norm": 0.5353500843048096, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 150720 + }, + { + "epoch": 991.6447368421053, + "grad_norm": 1.0881078243255615, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 150730 + }, + { + "epoch": 991.7105263157895, + "grad_norm": 0.8417906165122986, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 150740 + }, + { + "epoch": 991.7763157894736, + "grad_norm": 1.3755824565887451, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 150750 + }, + { + "epoch": 991.8421052631579, + "grad_norm": 1.0867851972579956, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 150760 + }, + { + "epoch": 991.9078947368421, + "grad_norm": 1.0686115026474, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 150770 + }, + { + "epoch": 991.9736842105264, + "grad_norm": 0.9699664115905762, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 150780 + }, + { + "epoch": 992.0394736842105, + "grad_norm": 1.1987810134887695, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 150790 + }, + { + "epoch": 992.1052631578947, + "grad_norm": 0.8176727294921875, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 150800 + }, + { + "epoch": 992.171052631579, + "grad_norm": 0.8529284596443176, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 150810 + }, + { + "epoch": 992.2368421052631, + "grad_norm": 1.179165005683899, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 150820 + }, + { + "epoch": 992.3026315789474, + "grad_norm": 0.9331127405166626, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 150830 + }, + { + "epoch": 992.3684210526316, + "grad_norm": 1.0699684619903564, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 150840 + }, + { + "epoch": 992.4342105263158, + "grad_norm": 1.0798276662826538, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 150850 + }, + { + "epoch": 992.5, + "grad_norm": 1.126177430152893, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 150860 + }, + { + "epoch": 992.5657894736842, + "grad_norm": 1.269126057624817, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 150870 + }, + { + "epoch": 992.6315789473684, + "grad_norm": 1.2540189027786255, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 150880 + }, + { + "epoch": 992.6973684210526, + "grad_norm": 0.9607136845588684, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 150890 + }, + { + "epoch": 992.7631578947369, + "grad_norm": 1.192214846611023, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 150900 + }, + { + "epoch": 992.828947368421, + "grad_norm": 0.9914960265159607, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 150910 + }, + { + "epoch": 992.8947368421053, + "grad_norm": 0.728797435760498, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 150920 + }, + { + "epoch": 992.9605263157895, + "grad_norm": 0.8233456611633301, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 150930 + }, + { + "epoch": 993.0263157894736, + "grad_norm": 0.8725035190582275, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 150940 + }, + { + "epoch": 993.0921052631579, + "grad_norm": 1.1844197511672974, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 150950 + }, + { + "epoch": 993.1578947368421, + "grad_norm": 0.7963541150093079, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 150960 + }, + { + "epoch": 993.2236842105264, + "grad_norm": 1.0642098188400269, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 150970 + }, + { + "epoch": 993.2894736842105, + "grad_norm": 0.7806707620620728, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 150980 + }, + { + "epoch": 993.3552631578947, + "grad_norm": 0.9475646615028381, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 150990 + }, + { + "epoch": 993.421052631579, + "grad_norm": 0.715223491191864, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 151000 + }, + { + "epoch": 993.4868421052631, + "grad_norm": 1.004929542541504, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 151010 + }, + { + "epoch": 993.5526315789474, + "grad_norm": 0.9862182140350342, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 151020 + }, + { + "epoch": 993.6184210526316, + "grad_norm": 0.9281184673309326, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 151030 + }, + { + "epoch": 993.6842105263158, + "grad_norm": 1.0093213319778442, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 151040 + }, + { + "epoch": 993.75, + "grad_norm": 1.3052515983581543, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 151050 + }, + { + "epoch": 993.8157894736842, + "grad_norm": 1.137257695198059, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 151060 + }, + { + "epoch": 993.8815789473684, + "grad_norm": 1.3269919157028198, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 151070 + }, + { + "epoch": 993.9473684210526, + "grad_norm": 1.2152782678604126, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 151080 + }, + { + "epoch": 994.0131578947369, + "grad_norm": 1.2013344764709473, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 151090 + }, + { + "epoch": 994.078947368421, + "grad_norm": 1.0185185670852661, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 151100 + }, + { + "epoch": 994.1447368421053, + "grad_norm": 1.1128599643707275, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 151110 + }, + { + "epoch": 994.2105263157895, + "grad_norm": 1.1425211429595947, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 151120 + }, + { + "epoch": 994.2763157894736, + "grad_norm": 1.0568586587905884, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 151130 + }, + { + "epoch": 994.3421052631579, + "grad_norm": 1.080623984336853, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 151140 + }, + { + "epoch": 994.4078947368421, + "grad_norm": 1.4741694927215576, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 151150 + }, + { + "epoch": 994.4736842105264, + "grad_norm": 2.169677972793579, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 151160 + }, + { + "epoch": 994.5394736842105, + "grad_norm": 2.1970841884613037, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 151170 + }, + { + "epoch": 994.6052631578947, + "grad_norm": 1.8922151327133179, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 151180 + }, + { + "epoch": 994.671052631579, + "grad_norm": 1.2851279973983765, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 151190 + }, + { + "epoch": 994.7368421052631, + "grad_norm": 1.3797351121902466, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 151200 + }, + { + "epoch": 994.8026315789474, + "grad_norm": 1.318306565284729, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 151210 + }, + { + "epoch": 994.8684210526316, + "grad_norm": 1.115478754043579, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 151220 + }, + { + "epoch": 994.9342105263158, + "grad_norm": 1.0066384077072144, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 151230 + }, + { + "epoch": 995.0, + "grad_norm": 1.1105729341506958, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 151240 + }, + { + "epoch": 995.0657894736842, + "grad_norm": 1.399383544921875, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 151250 + }, + { + "epoch": 995.1315789473684, + "grad_norm": 1.160544991493225, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 151260 + }, + { + "epoch": 995.1973684210526, + "grad_norm": 0.9492695331573486, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 151270 + }, + { + "epoch": 995.2631578947369, + "grad_norm": 1.0131347179412842, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 151280 + }, + { + "epoch": 995.328947368421, + "grad_norm": 0.9944673776626587, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 151290 + }, + { + "epoch": 995.3947368421053, + "grad_norm": 1.1447815895080566, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 151300 + }, + { + "epoch": 995.4605263157895, + "grad_norm": 1.2639286518096924, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 151310 + }, + { + "epoch": 995.5263157894736, + "grad_norm": 0.9355853199958801, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 151320 + }, + { + "epoch": 995.5921052631579, + "grad_norm": 1.3493982553482056, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 151330 + }, + { + "epoch": 995.6578947368421, + "grad_norm": 1.0639145374298096, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 151340 + }, + { + "epoch": 995.7236842105264, + "grad_norm": 0.9887959957122803, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 151350 + }, + { + "epoch": 995.7894736842105, + "grad_norm": 1.3102302551269531, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 151360 + }, + { + "epoch": 995.8552631578947, + "grad_norm": 1.0294629335403442, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 151370 + }, + { + "epoch": 995.921052631579, + "grad_norm": 1.3485372066497803, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 151380 + }, + { + "epoch": 995.9868421052631, + "grad_norm": 1.2144447565078735, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 151390 + }, + { + "epoch": 996.0526315789474, + "grad_norm": 0.7425600290298462, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 151400 + }, + { + "epoch": 996.1184210526316, + "grad_norm": 1.1900805234909058, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 151410 + }, + { + "epoch": 996.1842105263158, + "grad_norm": 0.8845036029815674, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 151420 + }, + { + "epoch": 996.25, + "grad_norm": 0.965152382850647, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 151430 + }, + { + "epoch": 996.3157894736842, + "grad_norm": 1.1030590534210205, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 151440 + }, + { + "epoch": 996.3815789473684, + "grad_norm": 0.915751576423645, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 151450 + }, + { + "epoch": 996.4473684210526, + "grad_norm": 0.9805639982223511, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 151460 + }, + { + "epoch": 996.5131578947369, + "grad_norm": 0.8894463777542114, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 151470 + }, + { + "epoch": 996.578947368421, + "grad_norm": 1.0241661071777344, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 151480 + }, + { + "epoch": 996.6447368421053, + "grad_norm": 1.265334129333496, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 151490 + }, + { + "epoch": 996.7105263157895, + "grad_norm": 0.9865074753761292, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 151500 + }, + { + "epoch": 996.7763157894736, + "grad_norm": 0.9851489067077637, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 151510 + }, + { + "epoch": 996.8421052631579, + "grad_norm": 0.9616159200668335, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 151520 + }, + { + "epoch": 996.9078947368421, + "grad_norm": 1.051690936088562, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 151530 + }, + { + "epoch": 996.9736842105264, + "grad_norm": 0.7457857131958008, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 151540 + }, + { + "epoch": 997.0394736842105, + "grad_norm": 1.3598742485046387, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 151550 + }, + { + "epoch": 997.1052631578947, + "grad_norm": 1.0818504095077515, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 151560 + }, + { + "epoch": 997.171052631579, + "grad_norm": 1.1341614723205566, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 151570 + }, + { + "epoch": 997.2368421052631, + "grad_norm": 1.3437490463256836, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 151580 + }, + { + "epoch": 997.3026315789474, + "grad_norm": 0.998374879360199, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 151590 + }, + { + "epoch": 997.3684210526316, + "grad_norm": 1.2805155515670776, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 151600 + }, + { + "epoch": 997.4342105263158, + "grad_norm": 1.1499323844909668, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 151610 + }, + { + "epoch": 997.5, + "grad_norm": 1.3379521369934082, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 151620 + }, + { + "epoch": 997.5657894736842, + "grad_norm": 1.0792102813720703, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 151630 + }, + { + "epoch": 997.6315789473684, + "grad_norm": 1.2518432140350342, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 151640 + }, + { + "epoch": 997.6973684210526, + "grad_norm": 1.0179204940795898, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 151650 + }, + { + "epoch": 997.7631578947369, + "grad_norm": 0.97569340467453, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 151660 + }, + { + "epoch": 997.828947368421, + "grad_norm": 1.1537328958511353, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 151670 + }, + { + "epoch": 997.8947368421053, + "grad_norm": 1.0750672817230225, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 151680 + }, + { + "epoch": 997.9605263157895, + "grad_norm": 0.9960630536079407, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 151690 + }, + { + "epoch": 998.0263157894736, + "grad_norm": 0.7261005640029907, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 151700 + }, + { + "epoch": 998.0921052631579, + "grad_norm": 1.1404473781585693, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 151710 + }, + { + "epoch": 998.1578947368421, + "grad_norm": 1.0211684703826904, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 151720 + }, + { + "epoch": 998.2236842105264, + "grad_norm": 0.8978713750839233, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 151730 + }, + { + "epoch": 998.2894736842105, + "grad_norm": 1.1412054300308228, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 151740 + }, + { + "epoch": 998.3552631578947, + "grad_norm": 0.9569369554519653, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 151750 + }, + { + "epoch": 998.421052631579, + "grad_norm": 1.211241602897644, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 151760 + }, + { + "epoch": 998.4868421052631, + "grad_norm": 1.1356269121170044, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 151770 + }, + { + "epoch": 998.5526315789474, + "grad_norm": 1.2875263690948486, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 151780 + }, + { + "epoch": 998.6184210526316, + "grad_norm": 0.9279645085334778, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 151790 + }, + { + "epoch": 998.6842105263158, + "grad_norm": 1.1666724681854248, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 151800 + }, + { + "epoch": 998.75, + "grad_norm": 1.2689951658248901, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 151810 + }, + { + "epoch": 998.8157894736842, + "grad_norm": 0.8863130807876587, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 151820 + }, + { + "epoch": 998.8815789473684, + "grad_norm": 1.002847671508789, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 151830 + }, + { + "epoch": 998.9473684210526, + "grad_norm": 1.0156797170639038, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 151840 + }, + { + "epoch": 999.0131578947369, + "grad_norm": 1.1730620861053467, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 151850 + }, + { + "epoch": 999.078947368421, + "grad_norm": 1.2348865270614624, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 151860 + }, + { + "epoch": 999.1447368421053, + "grad_norm": 1.2273298501968384, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 151870 + }, + { + "epoch": 999.2105263157895, + "grad_norm": 1.1343059539794922, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 151880 + }, + { + "epoch": 999.2763157894736, + "grad_norm": 1.2365325689315796, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 151890 + }, + { + "epoch": 999.3421052631579, + "grad_norm": 0.8025848269462585, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 151900 + }, + { + "epoch": 999.4078947368421, + "grad_norm": 1.318703055381775, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 151910 + }, + { + "epoch": 999.4736842105264, + "grad_norm": 0.9021982550621033, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 151920 + }, + { + "epoch": 999.5394736842105, + "grad_norm": 1.2123674154281616, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 151930 + }, + { + "epoch": 999.6052631578947, + "grad_norm": 1.0211516618728638, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 151940 + }, + { + "epoch": 999.671052631579, + "grad_norm": 0.8161410689353943, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 151950 + }, + { + "epoch": 999.7368421052631, + "grad_norm": 0.7967602610588074, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 151960 + }, + { + "epoch": 999.8026315789474, + "grad_norm": 1.032511591911316, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 151970 + }, + { + "epoch": 999.8684210526316, + "grad_norm": 0.9636939764022827, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 151980 + }, + { + "epoch": 999.9342105263158, + "grad_norm": 0.7413888573646545, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 151990 + }, + { + "epoch": 1000.0, + "grad_norm": 1.27151358127594, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 152000 + }, + { + "epoch": 1000.0657894736842, + "grad_norm": 1.314128041267395, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 152010 + }, + { + "epoch": 1000.1315789473684, + "grad_norm": 1.3646230697631836, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 152020 + }, + { + "epoch": 1000.1973684210526, + "grad_norm": 1.1015452146530151, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 152030 + }, + { + "epoch": 1000.2631578947369, + "grad_norm": 1.1657509803771973, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 152040 + }, + { + "epoch": 1000.328947368421, + "grad_norm": 1.5875916481018066, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 152050 + }, + { + "epoch": 1000.3947368421053, + "grad_norm": 0.9831330180168152, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 152060 + }, + { + "epoch": 1000.4605263157895, + "grad_norm": 1.056758999824524, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 152070 + }, + { + "epoch": 1000.5263157894736, + "grad_norm": 0.9852983355522156, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 152080 + }, + { + "epoch": 1000.5921052631579, + "grad_norm": 1.0023329257965088, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 152090 + }, + { + "epoch": 1000.6578947368421, + "grad_norm": 0.8812468647956848, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 152100 + }, + { + "epoch": 1000.7236842105264, + "grad_norm": 1.064321756362915, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 152110 + }, + { + "epoch": 1000.7894736842105, + "grad_norm": 0.9033085107803345, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 152120 + }, + { + "epoch": 1000.8552631578947, + "grad_norm": 1.2103757858276367, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 152130 + }, + { + "epoch": 1000.921052631579, + "grad_norm": 0.9150286316871643, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 152140 + }, + { + "epoch": 1000.9868421052631, + "grad_norm": 0.9393565058708191, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 152150 + }, + { + "epoch": 1001.0526315789474, + "grad_norm": 1.163619875907898, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 152160 + }, + { + "epoch": 1001.1184210526316, + "grad_norm": 0.695565938949585, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 152170 + }, + { + "epoch": 1001.1842105263158, + "grad_norm": 0.837020754814148, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 152180 + }, + { + "epoch": 1001.25, + "grad_norm": 0.9212221503257751, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 152190 + }, + { + "epoch": 1001.3157894736842, + "grad_norm": 1.2509287595748901, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 152200 + }, + { + "epoch": 1001.3815789473684, + "grad_norm": 1.2080440521240234, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 152210 + }, + { + "epoch": 1001.4473684210526, + "grad_norm": 0.846920907497406, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 152220 + }, + { + "epoch": 1001.5131578947369, + "grad_norm": 1.3782720565795898, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 152230 + }, + { + "epoch": 1001.578947368421, + "grad_norm": 1.222617506980896, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 152240 + }, + { + "epoch": 1001.6447368421053, + "grad_norm": 0.953065812587738, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 152250 + }, + { + "epoch": 1001.7105263157895, + "grad_norm": 0.9601914882659912, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 152260 + }, + { + "epoch": 1001.7763157894736, + "grad_norm": 0.7010998725891113, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 152270 + }, + { + "epoch": 1001.8421052631579, + "grad_norm": 1.2162070274353027, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 152280 + }, + { + "epoch": 1001.9078947368421, + "grad_norm": 0.6597650647163391, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 152290 + }, + { + "epoch": 1001.9736842105264, + "grad_norm": 1.0291619300842285, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 152300 + }, + { + "epoch": 1002.0394736842105, + "grad_norm": 1.1354762315750122, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 152310 + }, + { + "epoch": 1002.1052631578947, + "grad_norm": 0.8112242221832275, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 152320 + }, + { + "epoch": 1002.171052631579, + "grad_norm": 1.0882664918899536, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 152330 + }, + { + "epoch": 1002.2368421052631, + "grad_norm": 0.919173538684845, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 152340 + }, + { + "epoch": 1002.3026315789474, + "grad_norm": 1.0165963172912598, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 152350 + }, + { + "epoch": 1002.3684210526316, + "grad_norm": 1.0748934745788574, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 152360 + }, + { + "epoch": 1002.4342105263158, + "grad_norm": 1.1250693798065186, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 152370 + }, + { + "epoch": 1002.5, + "grad_norm": 1.0457911491394043, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 152380 + }, + { + "epoch": 1002.5657894736842, + "grad_norm": 1.0000548362731934, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 152390 + }, + { + "epoch": 1002.6315789473684, + "grad_norm": 1.0795695781707764, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 152400 + }, + { + "epoch": 1002.6973684210526, + "grad_norm": 0.7151395082473755, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 152410 + }, + { + "epoch": 1002.7631578947369, + "grad_norm": 0.9494915008544922, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 152420 + }, + { + "epoch": 1002.828947368421, + "grad_norm": 0.8552742004394531, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 152430 + }, + { + "epoch": 1002.8947368421053, + "grad_norm": 0.8879970908164978, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 152440 + }, + { + "epoch": 1002.9605263157895, + "grad_norm": 1.2099488973617554, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 152450 + }, + { + "epoch": 1003.0263157894736, + "grad_norm": 0.8229483366012573, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 152460 + }, + { + "epoch": 1003.0921052631579, + "grad_norm": 0.604310154914856, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 152470 + }, + { + "epoch": 1003.1578947368421, + "grad_norm": 1.2384097576141357, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 152480 + }, + { + "epoch": 1003.2236842105264, + "grad_norm": 0.9572075009346008, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 152490 + }, + { + "epoch": 1003.2894736842105, + "grad_norm": 1.0197408199310303, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 152500 + }, + { + "epoch": 1003.3552631578947, + "grad_norm": 0.9657206535339355, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 152510 + }, + { + "epoch": 1003.421052631579, + "grad_norm": 0.9685333967208862, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 152520 + }, + { + "epoch": 1003.4868421052631, + "grad_norm": 1.1198996305465698, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 152530 + }, + { + "epoch": 1003.5526315789474, + "grad_norm": 1.1644282341003418, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 152540 + }, + { + "epoch": 1003.6184210526316, + "grad_norm": 0.9588817954063416, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 152550 + }, + { + "epoch": 1003.6842105263158, + "grad_norm": 0.7863638997077942, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 152560 + }, + { + "epoch": 1003.75, + "grad_norm": 1.0634634494781494, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 152570 + }, + { + "epoch": 1003.8157894736842, + "grad_norm": 1.312638759613037, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 152580 + }, + { + "epoch": 1003.8815789473684, + "grad_norm": 1.0929923057556152, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 152590 + }, + { + "epoch": 1003.9473684210526, + "grad_norm": 1.4399696588516235, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 152600 + }, + { + "epoch": 1004.0131578947369, + "grad_norm": 1.2716460227966309, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 152610 + }, + { + "epoch": 1004.078947368421, + "grad_norm": 1.0641130208969116, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 152620 + }, + { + "epoch": 1004.1447368421053, + "grad_norm": 0.9772552847862244, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 152630 + }, + { + "epoch": 1004.2105263157895, + "grad_norm": 0.9149281978607178, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 152640 + }, + { + "epoch": 1004.2763157894736, + "grad_norm": 1.0702228546142578, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 152650 + }, + { + "epoch": 1004.3421052631579, + "grad_norm": 1.0155905485153198, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 152660 + }, + { + "epoch": 1004.4078947368421, + "grad_norm": 1.0387847423553467, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 152670 + }, + { + "epoch": 1004.4736842105264, + "grad_norm": 1.1399638652801514, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 152680 + }, + { + "epoch": 1004.5394736842105, + "grad_norm": 1.349084734916687, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 152690 + }, + { + "epoch": 1004.6052631578947, + "grad_norm": 1.1907349824905396, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 152700 + }, + { + "epoch": 1004.671052631579, + "grad_norm": 1.4231032133102417, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 152710 + }, + { + "epoch": 1004.7368421052631, + "grad_norm": 1.0504357814788818, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 152720 + }, + { + "epoch": 1004.8026315789474, + "grad_norm": 0.8143443465232849, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 152730 + }, + { + "epoch": 1004.8684210526316, + "grad_norm": 1.1146156787872314, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 152740 + }, + { + "epoch": 1004.9342105263158, + "grad_norm": 1.0395196676254272, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 152750 + }, + { + "epoch": 1005.0, + "grad_norm": 1.0455608367919922, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 152760 + }, + { + "epoch": 1005.0657894736842, + "grad_norm": 0.8449133038520813, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 152770 + }, + { + "epoch": 1005.1315789473684, + "grad_norm": 0.9601693749427795, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 152780 + }, + { + "epoch": 1005.1973684210526, + "grad_norm": 0.9671313762664795, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 152790 + }, + { + "epoch": 1005.2631578947369, + "grad_norm": 0.8322471976280212, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 152800 + }, + { + "epoch": 1005.328947368421, + "grad_norm": 0.9465487003326416, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 152810 + }, + { + "epoch": 1005.3947368421053, + "grad_norm": 0.8888710141181946, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 152820 + }, + { + "epoch": 1005.4605263157895, + "grad_norm": 1.081684947013855, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 152830 + }, + { + "epoch": 1005.5263157894736, + "grad_norm": 0.9783938527107239, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 152840 + }, + { + "epoch": 1005.5921052631579, + "grad_norm": 1.3762357234954834, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 152850 + }, + { + "epoch": 1005.6578947368421, + "grad_norm": 1.3803174495697021, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 152860 + }, + { + "epoch": 1005.7236842105264, + "grad_norm": 1.0648322105407715, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 152870 + }, + { + "epoch": 1005.7894736842105, + "grad_norm": 1.019877314567566, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 152880 + }, + { + "epoch": 1005.8552631578947, + "grad_norm": 1.0689724683761597, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 152890 + }, + { + "epoch": 1005.921052631579, + "grad_norm": 1.0110927820205688, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 152900 + }, + { + "epoch": 1005.9868421052631, + "grad_norm": 0.8026441335678101, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 152910 + }, + { + "epoch": 1006.0526315789474, + "grad_norm": 0.9327909350395203, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 152920 + }, + { + "epoch": 1006.1184210526316, + "grad_norm": 0.9593319296836853, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 152930 + }, + { + "epoch": 1006.1842105263158, + "grad_norm": 0.9734617471694946, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 152940 + }, + { + "epoch": 1006.25, + "grad_norm": 0.8981104493141174, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 152950 + }, + { + "epoch": 1006.3157894736842, + "grad_norm": 0.6825907826423645, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 152960 + }, + { + "epoch": 1006.3815789473684, + "grad_norm": 1.2038465738296509, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 152970 + }, + { + "epoch": 1006.4473684210526, + "grad_norm": 1.119794487953186, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 152980 + }, + { + "epoch": 1006.5131578947369, + "grad_norm": 1.064179539680481, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 152990 + }, + { + "epoch": 1006.578947368421, + "grad_norm": 1.1156647205352783, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 153000 + }, + { + "epoch": 1006.6447368421053, + "grad_norm": 1.3254365921020508, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 153010 + }, + { + "epoch": 1006.7105263157895, + "grad_norm": 1.4035636186599731, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 153020 + }, + { + "epoch": 1006.7763157894736, + "grad_norm": 1.1695518493652344, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 153030 + }, + { + "epoch": 1006.8421052631579, + "grad_norm": 1.270020842552185, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 153040 + }, + { + "epoch": 1006.9078947368421, + "grad_norm": 1.3155039548873901, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 153050 + }, + { + "epoch": 1006.9736842105264, + "grad_norm": 0.8515135645866394, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 153060 + }, + { + "epoch": 1007.0394736842105, + "grad_norm": 0.9089174866676331, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 153070 + }, + { + "epoch": 1007.1052631578947, + "grad_norm": 1.1836307048797607, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 153080 + }, + { + "epoch": 1007.171052631579, + "grad_norm": 1.1035048961639404, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 153090 + }, + { + "epoch": 1007.2368421052631, + "grad_norm": 1.2062311172485352, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 153100 + }, + { + "epoch": 1007.3026315789474, + "grad_norm": 1.3278850317001343, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 153110 + }, + { + "epoch": 1007.3684210526316, + "grad_norm": 1.200394630432129, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 153120 + }, + { + "epoch": 1007.4342105263158, + "grad_norm": 1.2503161430358887, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 153130 + }, + { + "epoch": 1007.5, + "grad_norm": 1.327488899230957, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 153140 + }, + { + "epoch": 1007.5657894736842, + "grad_norm": 0.6946982145309448, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 153150 + }, + { + "epoch": 1007.6315789473684, + "grad_norm": 0.8254473805427551, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 153160 + }, + { + "epoch": 1007.6973684210526, + "grad_norm": 0.8691716194152832, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 153170 + }, + { + "epoch": 1007.7631578947369, + "grad_norm": 1.0709201097488403, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 153180 + }, + { + "epoch": 1007.828947368421, + "grad_norm": 1.361719012260437, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 153190 + }, + { + "epoch": 1007.8947368421053, + "grad_norm": 1.2241108417510986, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 153200 + }, + { + "epoch": 1007.9605263157895, + "grad_norm": 1.4689909219741821, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 153210 + }, + { + "epoch": 1008.0263157894736, + "grad_norm": 1.4543124437332153, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 153220 + }, + { + "epoch": 1008.0921052631579, + "grad_norm": 1.4135468006134033, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 153230 + }, + { + "epoch": 1008.1578947368421, + "grad_norm": 1.1939821243286133, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 153240 + }, + { + "epoch": 1008.2236842105264, + "grad_norm": 1.4653980731964111, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 153250 + }, + { + "epoch": 1008.2894736842105, + "grad_norm": 1.4187058210372925, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 153260 + }, + { + "epoch": 1008.3552631578947, + "grad_norm": 1.3027122020721436, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 153270 + }, + { + "epoch": 1008.421052631579, + "grad_norm": 1.2403491735458374, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 153280 + }, + { + "epoch": 1008.4868421052631, + "grad_norm": 0.8869355916976929, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 153290 + }, + { + "epoch": 1008.5526315789474, + "grad_norm": 1.038567066192627, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 153300 + }, + { + "epoch": 1008.6184210526316, + "grad_norm": 1.1018482446670532, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 153310 + }, + { + "epoch": 1008.6842105263158, + "grad_norm": 0.9891707301139832, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 153320 + }, + { + "epoch": 1008.75, + "grad_norm": 1.3126126527786255, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 153330 + }, + { + "epoch": 1008.8157894736842, + "grad_norm": 1.0261906385421753, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 153340 + }, + { + "epoch": 1008.8815789473684, + "grad_norm": 1.0585100650787354, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 153350 + }, + { + "epoch": 1008.9473684210526, + "grad_norm": 0.7990329265594482, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 153360 + }, + { + "epoch": 1009.0131578947369, + "grad_norm": 0.703535795211792, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 153370 + }, + { + "epoch": 1009.078947368421, + "grad_norm": 0.7562394142150879, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 153380 + }, + { + "epoch": 1009.1447368421053, + "grad_norm": 0.6611744165420532, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 153390 + }, + { + "epoch": 1009.2105263157895, + "grad_norm": 0.9061070084571838, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 153400 + }, + { + "epoch": 1009.2763157894736, + "grad_norm": 1.1760482788085938, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 153410 + }, + { + "epoch": 1009.3421052631579, + "grad_norm": 1.6927746534347534, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 153420 + }, + { + "epoch": 1009.4078947368421, + "grad_norm": 1.375290870666504, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 153430 + }, + { + "epoch": 1009.4736842105264, + "grad_norm": 1.3933255672454834, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 153440 + }, + { + "epoch": 1009.5394736842105, + "grad_norm": 1.4885764122009277, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 153450 + }, + { + "epoch": 1009.6052631578947, + "grad_norm": 0.8054714798927307, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 153460 + }, + { + "epoch": 1009.671052631579, + "grad_norm": 0.9713701605796814, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 153470 + }, + { + "epoch": 1009.7368421052631, + "grad_norm": 1.0270944833755493, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 153480 + }, + { + "epoch": 1009.8026315789474, + "grad_norm": 1.0508967638015747, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 153490 + }, + { + "epoch": 1009.8684210526316, + "grad_norm": 1.0318694114685059, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 153500 + }, + { + "epoch": 1009.9342105263158, + "grad_norm": 1.081775426864624, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 153510 + }, + { + "epoch": 1010.0, + "grad_norm": 0.6069177389144897, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 153520 + }, + { + "epoch": 1010.0657894736842, + "grad_norm": 0.6230463981628418, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 153530 + }, + { + "epoch": 1010.1315789473684, + "grad_norm": 0.8272234201431274, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 153540 + }, + { + "epoch": 1010.1973684210526, + "grad_norm": 1.0551385879516602, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 153550 + }, + { + "epoch": 1010.2631578947369, + "grad_norm": 1.039683222770691, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 153560 + }, + { + "epoch": 1010.328947368421, + "grad_norm": 0.7899230122566223, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 153570 + }, + { + "epoch": 1010.3947368421053, + "grad_norm": 0.8216849565505981, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 153580 + }, + { + "epoch": 1010.4605263157895, + "grad_norm": 0.9571535587310791, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 153590 + }, + { + "epoch": 1010.5263157894736, + "grad_norm": 1.1962392330169678, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 153600 + }, + { + "epoch": 1010.5921052631579, + "grad_norm": 0.8668336868286133, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 153610 + }, + { + "epoch": 1010.6578947368421, + "grad_norm": 1.1946544647216797, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 153620 + }, + { + "epoch": 1010.7236842105264, + "grad_norm": 0.8684710264205933, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 153630 + }, + { + "epoch": 1010.7894736842105, + "grad_norm": 1.0615448951721191, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 153640 + }, + { + "epoch": 1010.8552631578947, + "grad_norm": 1.3180712461471558, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 153650 + }, + { + "epoch": 1010.921052631579, + "grad_norm": 1.4111500978469849, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 153660 + }, + { + "epoch": 1010.9868421052631, + "grad_norm": 1.3890069723129272, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 153670 + }, + { + "epoch": 1011.0526315789474, + "grad_norm": 0.9825431108474731, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 153680 + }, + { + "epoch": 1011.1184210526316, + "grad_norm": 0.8479264974594116, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 153690 + }, + { + "epoch": 1011.1842105263158, + "grad_norm": 1.2472234964370728, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 153700 + }, + { + "epoch": 1011.25, + "grad_norm": 1.0906670093536377, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 153710 + }, + { + "epoch": 1011.3157894736842, + "grad_norm": 1.144405722618103, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 153720 + }, + { + "epoch": 1011.3815789473684, + "grad_norm": 0.8326566219329834, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 153730 + }, + { + "epoch": 1011.4473684210526, + "grad_norm": 1.1683098077774048, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 153740 + }, + { + "epoch": 1011.5131578947369, + "grad_norm": 0.7336865663528442, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 153750 + }, + { + "epoch": 1011.578947368421, + "grad_norm": 1.1023454666137695, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 153760 + }, + { + "epoch": 1011.6447368421053, + "grad_norm": 1.045922040939331, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 153770 + }, + { + "epoch": 1011.7105263157895, + "grad_norm": 0.9625352621078491, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 153780 + }, + { + "epoch": 1011.7763157894736, + "grad_norm": 1.1642197370529175, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 153790 + }, + { + "epoch": 1011.8421052631579, + "grad_norm": 1.1206856966018677, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 153800 + }, + { + "epoch": 1011.9078947368421, + "grad_norm": 1.251189947128296, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 153810 + }, + { + "epoch": 1011.9736842105264, + "grad_norm": 0.9257858991622925, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 153820 + }, + { + "epoch": 1012.0394736842105, + "grad_norm": 0.9416394233703613, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 153830 + }, + { + "epoch": 1012.1052631578947, + "grad_norm": 0.7011066675186157, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 153840 + }, + { + "epoch": 1012.171052631579, + "grad_norm": 0.9953075051307678, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 153850 + }, + { + "epoch": 1012.2368421052631, + "grad_norm": 1.0580404996871948, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 153860 + }, + { + "epoch": 1012.3026315789474, + "grad_norm": 0.9159753918647766, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 153870 + }, + { + "epoch": 1012.3684210526316, + "grad_norm": 1.2467912435531616, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 153880 + }, + { + "epoch": 1012.4342105263158, + "grad_norm": 0.8988513350486755, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 153890 + }, + { + "epoch": 1012.5, + "grad_norm": 0.8975652456283569, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 153900 + }, + { + "epoch": 1012.5657894736842, + "grad_norm": 0.9276600480079651, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 153910 + }, + { + "epoch": 1012.6315789473684, + "grad_norm": 1.1802819967269897, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 153920 + }, + { + "epoch": 1012.6973684210526, + "grad_norm": 1.5919638872146606, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 153930 + }, + { + "epoch": 1012.7631578947369, + "grad_norm": 0.8894348740577698, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 153940 + }, + { + "epoch": 1012.828947368421, + "grad_norm": 0.9059845209121704, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 153950 + }, + { + "epoch": 1012.8947368421053, + "grad_norm": 0.7619372010231018, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 153960 + }, + { + "epoch": 1012.9605263157895, + "grad_norm": 1.105398416519165, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 153970 + }, + { + "epoch": 1013.0263157894736, + "grad_norm": 0.6441566944122314, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 153980 + }, + { + "epoch": 1013.0921052631579, + "grad_norm": 1.1921231746673584, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 153990 + }, + { + "epoch": 1013.1578947368421, + "grad_norm": 1.3228693008422852, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 154000 + }, + { + "epoch": 1013.2236842105264, + "grad_norm": 1.2285857200622559, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 154010 + }, + { + "epoch": 1013.2894736842105, + "grad_norm": 0.5511453747749329, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 154020 + }, + { + "epoch": 1013.3552631578947, + "grad_norm": 0.8343952894210815, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 154030 + }, + { + "epoch": 1013.421052631579, + "grad_norm": 0.9927038550376892, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 154040 + }, + { + "epoch": 1013.4868421052631, + "grad_norm": 0.9461319446563721, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 154050 + }, + { + "epoch": 1013.5526315789474, + "grad_norm": 1.170270562171936, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 154060 + }, + { + "epoch": 1013.6184210526316, + "grad_norm": 1.1801016330718994, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 154070 + }, + { + "epoch": 1013.6842105263158, + "grad_norm": 1.064570665359497, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 154080 + }, + { + "epoch": 1013.75, + "grad_norm": 0.9943475127220154, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 154090 + }, + { + "epoch": 1013.8157894736842, + "grad_norm": 1.1326581239700317, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 154100 + }, + { + "epoch": 1013.8815789473684, + "grad_norm": 1.2644318342208862, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 154110 + }, + { + "epoch": 1013.9473684210526, + "grad_norm": 0.9471270442008972, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 154120 + }, + { + "epoch": 1014.0131578947369, + "grad_norm": 1.1460727453231812, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 154130 + }, + { + "epoch": 1014.078947368421, + "grad_norm": 1.2603262662887573, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 154140 + }, + { + "epoch": 1014.1447368421053, + "grad_norm": 0.8152709007263184, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 154150 + }, + { + "epoch": 1014.2105263157895, + "grad_norm": 0.7039467692375183, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 154160 + }, + { + "epoch": 1014.2763157894736, + "grad_norm": 1.0728949308395386, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 154170 + }, + { + "epoch": 1014.3421052631579, + "grad_norm": 0.7830701470375061, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 154180 + }, + { + "epoch": 1014.4078947368421, + "grad_norm": 0.8367193937301636, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 154190 + }, + { + "epoch": 1014.4736842105264, + "grad_norm": 1.973212480545044, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 154200 + }, + { + "epoch": 1014.5394736842105, + "grad_norm": 1.815258502960205, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 154210 + }, + { + "epoch": 1014.6052631578947, + "grad_norm": 2.393010377883911, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 154220 + }, + { + "epoch": 1014.671052631579, + "grad_norm": 1.425652027130127, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 154230 + }, + { + "epoch": 1014.7368421052631, + "grad_norm": 1.2232511043548584, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 154240 + }, + { + "epoch": 1014.8026315789474, + "grad_norm": 1.2349510192871094, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 154250 + }, + { + "epoch": 1014.8684210526316, + "grad_norm": 1.3472286462783813, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 154260 + }, + { + "epoch": 1014.9342105263158, + "grad_norm": 1.1970043182373047, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 154270 + }, + { + "epoch": 1015.0, + "grad_norm": 0.9629507660865784, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 154280 + }, + { + "epoch": 1015.0657894736842, + "grad_norm": 0.9816302061080933, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 154290 + }, + { + "epoch": 1015.1315789473684, + "grad_norm": 1.125002384185791, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 154300 + }, + { + "epoch": 1015.1973684210526, + "grad_norm": 1.284298062324524, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 154310 + }, + { + "epoch": 1015.2631578947369, + "grad_norm": 1.4155656099319458, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 154320 + }, + { + "epoch": 1015.328947368421, + "grad_norm": 1.0492305755615234, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 154330 + }, + { + "epoch": 1015.3947368421053, + "grad_norm": 1.006813406944275, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 154340 + }, + { + "epoch": 1015.4605263157895, + "grad_norm": 1.409071683883667, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 154350 + }, + { + "epoch": 1015.5263157894736, + "grad_norm": 1.6593658924102783, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 154360 + }, + { + "epoch": 1015.5921052631579, + "grad_norm": 1.4697237014770508, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 154370 + }, + { + "epoch": 1015.6578947368421, + "grad_norm": 1.9840725660324097, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 154380 + }, + { + "epoch": 1015.7236842105264, + "grad_norm": 1.4490352869033813, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 154390 + }, + { + "epoch": 1015.7894736842105, + "grad_norm": 1.1679290533065796, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 154400 + }, + { + "epoch": 1015.8552631578947, + "grad_norm": 1.219805121421814, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 154410 + }, + { + "epoch": 1015.921052631579, + "grad_norm": 1.1907403469085693, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 154420 + }, + { + "epoch": 1015.9868421052631, + "grad_norm": 1.4506357908248901, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 154430 + }, + { + "epoch": 1016.0526315789474, + "grad_norm": 1.450147032737732, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 154440 + }, + { + "epoch": 1016.1184210526316, + "grad_norm": 1.0505656003952026, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 154450 + }, + { + "epoch": 1016.1842105263158, + "grad_norm": 1.4850653409957886, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 154460 + }, + { + "epoch": 1016.25, + "grad_norm": 1.165228247642517, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 154470 + }, + { + "epoch": 1016.3157894736842, + "grad_norm": 0.9636247754096985, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 154480 + }, + { + "epoch": 1016.3815789473684, + "grad_norm": 1.0024343729019165, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 154490 + }, + { + "epoch": 1016.4473684210526, + "grad_norm": 0.8500187397003174, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 154500 + }, + { + "epoch": 1016.5131578947369, + "grad_norm": 1.01841139793396, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 154510 + }, + { + "epoch": 1016.578947368421, + "grad_norm": 0.774018406867981, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 154520 + }, + { + "epoch": 1016.6447368421053, + "grad_norm": 1.0345419645309448, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 154530 + }, + { + "epoch": 1016.7105263157895, + "grad_norm": 1.3381010293960571, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 154540 + }, + { + "epoch": 1016.7763157894736, + "grad_norm": 1.0903173685073853, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 154550 + }, + { + "epoch": 1016.8421052631579, + "grad_norm": 0.7370033860206604, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 154560 + }, + { + "epoch": 1016.9078947368421, + "grad_norm": 0.9964326620101929, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 154570 + }, + { + "epoch": 1016.9736842105264, + "grad_norm": 1.0719693899154663, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 154580 + }, + { + "epoch": 1017.0394736842105, + "grad_norm": 1.0311167240142822, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 154590 + }, + { + "epoch": 1017.1052631578947, + "grad_norm": 1.1289986371994019, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 154600 + }, + { + "epoch": 1017.171052631579, + "grad_norm": 0.8267912864685059, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 154610 + }, + { + "epoch": 1017.2368421052631, + "grad_norm": 1.1450597047805786, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 154620 + }, + { + "epoch": 1017.3026315789474, + "grad_norm": 0.8567523956298828, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 154630 + }, + { + "epoch": 1017.3684210526316, + "grad_norm": 0.6963806748390198, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 154640 + }, + { + "epoch": 1017.4342105263158, + "grad_norm": 0.7073983550071716, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 154650 + }, + { + "epoch": 1017.5, + "grad_norm": 0.6711755394935608, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 154660 + }, + { + "epoch": 1017.5657894736842, + "grad_norm": 0.7169049382209778, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 154670 + }, + { + "epoch": 1017.6315789473684, + "grad_norm": 0.6535640954971313, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 154680 + }, + { + "epoch": 1017.6973684210526, + "grad_norm": 0.5502938032150269, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 154690 + }, + { + "epoch": 1017.7631578947369, + "grad_norm": 0.9529855847358704, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 154700 + }, + { + "epoch": 1017.828947368421, + "grad_norm": 1.1591817140579224, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 154710 + }, + { + "epoch": 1017.8947368421053, + "grad_norm": 1.1533323526382446, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 154720 + }, + { + "epoch": 1017.9605263157895, + "grad_norm": 1.2087000608444214, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 154730 + }, + { + "epoch": 1018.0263157894736, + "grad_norm": 0.936263918876648, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 154740 + }, + { + "epoch": 1018.0921052631579, + "grad_norm": 1.1925992965698242, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 154750 + }, + { + "epoch": 1018.1578947368421, + "grad_norm": 1.5353156328201294, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 154760 + }, + { + "epoch": 1018.2236842105264, + "grad_norm": 1.3622137308120728, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 154770 + }, + { + "epoch": 1018.2894736842105, + "grad_norm": 1.236350417137146, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 154780 + }, + { + "epoch": 1018.3552631578947, + "grad_norm": 1.0077983140945435, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 154790 + }, + { + "epoch": 1018.421052631579, + "grad_norm": 1.119370460510254, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 154800 + }, + { + "epoch": 1018.4868421052631, + "grad_norm": 1.009660005569458, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 154810 + }, + { + "epoch": 1018.5526315789474, + "grad_norm": 1.031388759613037, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 154820 + }, + { + "epoch": 1018.6184210526316, + "grad_norm": 0.6950995326042175, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 154830 + }, + { + "epoch": 1018.6842105263158, + "grad_norm": 0.9287242293357849, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 154840 + }, + { + "epoch": 1018.75, + "grad_norm": 0.8696134090423584, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 154850 + }, + { + "epoch": 1018.8157894736842, + "grad_norm": 0.8959367275238037, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 154860 + }, + { + "epoch": 1018.8815789473684, + "grad_norm": 1.239129900932312, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 154870 + }, + { + "epoch": 1018.9473684210526, + "grad_norm": 0.8386613726615906, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 154880 + }, + { + "epoch": 1019.0131578947369, + "grad_norm": 1.1517994403839111, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 154890 + }, + { + "epoch": 1019.078947368421, + "grad_norm": 1.2050758600234985, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 154900 + }, + { + "epoch": 1019.1447368421053, + "grad_norm": 1.15315842628479, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 154910 + }, + { + "epoch": 1019.2105263157895, + "grad_norm": 0.8363189697265625, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 154920 + }, + { + "epoch": 1019.2763157894736, + "grad_norm": 0.8743520379066467, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 154930 + }, + { + "epoch": 1019.3421052631579, + "grad_norm": 1.1668791770935059, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 154940 + }, + { + "epoch": 1019.4078947368421, + "grad_norm": 1.0123094320297241, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 154950 + }, + { + "epoch": 1019.4736842105264, + "grad_norm": 0.5643197894096375, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 154960 + }, + { + "epoch": 1019.5394736842105, + "grad_norm": 0.9855898022651672, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 154970 + }, + { + "epoch": 1019.6052631578947, + "grad_norm": 0.9356517791748047, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 154980 + }, + { + "epoch": 1019.671052631579, + "grad_norm": 0.853830099105835, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 154990 + }, + { + "epoch": 1019.7368421052631, + "grad_norm": 1.0371325016021729, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 155000 + }, + { + "epoch": 1019.8026315789474, + "grad_norm": 0.9705318212509155, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 155010 + }, + { + "epoch": 1019.8684210526316, + "grad_norm": 0.9544471502304077, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 155020 + }, + { + "epoch": 1019.9342105263158, + "grad_norm": 1.049113392829895, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 155030 + }, + { + "epoch": 1020.0, + "grad_norm": 1.123701572418213, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 155040 + }, + { + "epoch": 1020.0657894736842, + "grad_norm": 1.035402536392212, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 155050 + }, + { + "epoch": 1020.1315789473684, + "grad_norm": 1.1114774942398071, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 155060 + }, + { + "epoch": 1020.1973684210526, + "grad_norm": 0.7469269037246704, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 155070 + }, + { + "epoch": 1020.2631578947369, + "grad_norm": 1.2120532989501953, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 155080 + }, + { + "epoch": 1020.328947368421, + "grad_norm": 0.9209999442100525, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 155090 + }, + { + "epoch": 1020.3947368421053, + "grad_norm": 1.0287814140319824, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 155100 + }, + { + "epoch": 1020.4605263157895, + "grad_norm": 1.212156057357788, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 155110 + }, + { + "epoch": 1020.5263157894736, + "grad_norm": 1.101295828819275, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 155120 + }, + { + "epoch": 1020.5921052631579, + "grad_norm": 1.2472134828567505, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 155130 + }, + { + "epoch": 1020.6578947368421, + "grad_norm": 1.4082173109054565, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 155140 + }, + { + "epoch": 1020.7236842105264, + "grad_norm": 1.0719764232635498, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 155150 + }, + { + "epoch": 1020.7894736842105, + "grad_norm": 1.4822707176208496, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 155160 + }, + { + "epoch": 1020.8552631578947, + "grad_norm": 1.1437565088272095, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 155170 + }, + { + "epoch": 1020.921052631579, + "grad_norm": 1.258529543876648, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 155180 + }, + { + "epoch": 1020.9868421052631, + "grad_norm": 1.0920777320861816, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 155190 + }, + { + "epoch": 1021.0526315789474, + "grad_norm": 1.4850847721099854, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 155200 + }, + { + "epoch": 1021.1184210526316, + "grad_norm": 0.7385463118553162, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 155210 + }, + { + "epoch": 1021.1842105263158, + "grad_norm": 1.168807864189148, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 155220 + }, + { + "epoch": 1021.25, + "grad_norm": 1.1659116744995117, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 155230 + }, + { + "epoch": 1021.3157894736842, + "grad_norm": 0.993746817111969, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 155240 + }, + { + "epoch": 1021.3815789473684, + "grad_norm": 1.1375447511672974, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 155250 + }, + { + "epoch": 1021.4473684210526, + "grad_norm": 0.8850059509277344, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 155260 + }, + { + "epoch": 1021.5131578947369, + "grad_norm": 0.8182586431503296, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 155270 + }, + { + "epoch": 1021.578947368421, + "grad_norm": 0.7995529770851135, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 155280 + }, + { + "epoch": 1021.6447368421053, + "grad_norm": 1.0963636636734009, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 155290 + }, + { + "epoch": 1021.7105263157895, + "grad_norm": 1.1766555309295654, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 155300 + }, + { + "epoch": 1021.7763157894736, + "grad_norm": 1.4533417224884033, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 155310 + }, + { + "epoch": 1021.8421052631579, + "grad_norm": 0.9066959023475647, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 155320 + }, + { + "epoch": 1021.9078947368421, + "grad_norm": 0.9514365196228027, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 155330 + }, + { + "epoch": 1021.9736842105264, + "grad_norm": 0.9749160408973694, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 155340 + }, + { + "epoch": 1022.0394736842105, + "grad_norm": 1.3155367374420166, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 155350 + }, + { + "epoch": 1022.1052631578947, + "grad_norm": 0.8676218390464783, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 155360 + }, + { + "epoch": 1022.171052631579, + "grad_norm": 1.047354817390442, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 155370 + }, + { + "epoch": 1022.2368421052631, + "grad_norm": 1.1073765754699707, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 155380 + }, + { + "epoch": 1022.3026315789474, + "grad_norm": 1.0393955707550049, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 155390 + }, + { + "epoch": 1022.3684210526316, + "grad_norm": 0.886330783367157, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 155400 + }, + { + "epoch": 1022.4342105263158, + "grad_norm": 0.9492865800857544, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 155410 + }, + { + "epoch": 1022.5, + "grad_norm": 1.4241212606430054, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 155420 + }, + { + "epoch": 1022.5657894736842, + "grad_norm": 1.7502048015594482, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 155430 + }, + { + "epoch": 1022.6315789473684, + "grad_norm": 1.311353087425232, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 155440 + }, + { + "epoch": 1022.6973684210526, + "grad_norm": 1.1434524059295654, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 155450 + }, + { + "epoch": 1022.7631578947369, + "grad_norm": 1.5268412828445435, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 155460 + }, + { + "epoch": 1022.828947368421, + "grad_norm": 1.0521647930145264, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 155470 + }, + { + "epoch": 1022.8947368421053, + "grad_norm": 1.2657438516616821, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 155480 + }, + { + "epoch": 1022.9605263157895, + "grad_norm": 1.5762451887130737, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 155490 + }, + { + "epoch": 1023.0263157894736, + "grad_norm": 0.9546888470649719, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 155500 + }, + { + "epoch": 1023.0921052631579, + "grad_norm": 0.966810941696167, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 155510 + }, + { + "epoch": 1023.1578947368421, + "grad_norm": 1.2217552661895752, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 155520 + }, + { + "epoch": 1023.2236842105264, + "grad_norm": 1.2993433475494385, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 155530 + }, + { + "epoch": 1023.2894736842105, + "grad_norm": 1.1217420101165771, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 155540 + }, + { + "epoch": 1023.3552631578947, + "grad_norm": 1.1864551305770874, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 155550 + }, + { + "epoch": 1023.421052631579, + "grad_norm": 1.1899709701538086, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 155560 + }, + { + "epoch": 1023.4868421052631, + "grad_norm": 1.223408818244934, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 155570 + }, + { + "epoch": 1023.5526315789474, + "grad_norm": 1.22601318359375, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 155580 + }, + { + "epoch": 1023.6184210526316, + "grad_norm": 0.761501133441925, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 155590 + }, + { + "epoch": 1023.6842105263158, + "grad_norm": 1.0578773021697998, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 155600 + }, + { + "epoch": 1023.75, + "grad_norm": 1.3183938264846802, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 155610 + }, + { + "epoch": 1023.8157894736842, + "grad_norm": 1.4918771982192993, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 155620 + }, + { + "epoch": 1023.8815789473684, + "grad_norm": 1.0870895385742188, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 155630 + }, + { + "epoch": 1023.9473684210526, + "grad_norm": 1.3157042264938354, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 155640 + }, + { + "epoch": 1024.0131578947369, + "grad_norm": 1.3045361042022705, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 155650 + }, + { + "epoch": 1024.078947368421, + "grad_norm": 1.0080130100250244, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 155660 + }, + { + "epoch": 1024.1447368421052, + "grad_norm": 1.2834631204605103, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 155670 + }, + { + "epoch": 1024.2105263157894, + "grad_norm": 1.2468620538711548, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 155680 + }, + { + "epoch": 1024.2763157894738, + "grad_norm": 0.9517583250999451, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 155690 + }, + { + "epoch": 1024.342105263158, + "grad_norm": 1.0547385215759277, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 155700 + }, + { + "epoch": 1024.407894736842, + "grad_norm": 1.4880155324935913, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 155710 + }, + { + "epoch": 1024.4736842105262, + "grad_norm": 1.133327603340149, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 155720 + }, + { + "epoch": 1024.5394736842106, + "grad_norm": 1.1422955989837646, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 155730 + }, + { + "epoch": 1024.6052631578948, + "grad_norm": 1.3525456190109253, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 155740 + }, + { + "epoch": 1024.671052631579, + "grad_norm": 1.063536524772644, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 155750 + }, + { + "epoch": 1024.7368421052631, + "grad_norm": 1.0734652280807495, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 155760 + }, + { + "epoch": 1024.8026315789473, + "grad_norm": 1.1811069250106812, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 155770 + }, + { + "epoch": 1024.8684210526317, + "grad_norm": 0.923505425453186, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 155780 + }, + { + "epoch": 1024.9342105263158, + "grad_norm": 0.7430877685546875, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 155790 + }, + { + "epoch": 1025.0, + "grad_norm": 0.5086251497268677, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 155800 + }, + { + "epoch": 1025.0657894736842, + "grad_norm": 0.8926265239715576, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 155810 + }, + { + "epoch": 1025.1315789473683, + "grad_norm": 1.6458979845046997, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 155820 + }, + { + "epoch": 1025.1973684210527, + "grad_norm": 0.9550696611404419, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 155830 + }, + { + "epoch": 1025.2631578947369, + "grad_norm": 1.3163942098617554, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 155840 + }, + { + "epoch": 1025.328947368421, + "grad_norm": 1.4571993350982666, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 155850 + }, + { + "epoch": 1025.3947368421052, + "grad_norm": 1.3019025325775146, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 155860 + }, + { + "epoch": 1025.4605263157894, + "grad_norm": 1.154705286026001, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 155870 + }, + { + "epoch": 1025.5263157894738, + "grad_norm": 1.1474753618240356, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 155880 + }, + { + "epoch": 1025.592105263158, + "grad_norm": 0.7636915445327759, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 155890 + }, + { + "epoch": 1025.657894736842, + "grad_norm": 1.04159414768219, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 155900 + }, + { + "epoch": 1025.7236842105262, + "grad_norm": 1.030527114868164, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 155910 + }, + { + "epoch": 1025.7894736842106, + "grad_norm": 1.1383177042007446, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 155920 + }, + { + "epoch": 1025.8552631578948, + "grad_norm": 1.3946573734283447, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 155930 + }, + { + "epoch": 1025.921052631579, + "grad_norm": 1.2071908712387085, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 155940 + }, + { + "epoch": 1025.9868421052631, + "grad_norm": 1.3407089710235596, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 155950 + }, + { + "epoch": 1026.0526315789473, + "grad_norm": 0.9338118433952332, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 155960 + }, + { + "epoch": 1026.1184210526317, + "grad_norm": 1.0861817598342896, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 155970 + }, + { + "epoch": 1026.1842105263158, + "grad_norm": 1.192749261856079, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 155980 + }, + { + "epoch": 1026.25, + "grad_norm": 1.2024480104446411, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 155990 + }, + { + "epoch": 1026.3157894736842, + "grad_norm": 1.0778899192810059, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 156000 + }, + { + "epoch": 1026.3815789473683, + "grad_norm": 1.0766499042510986, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 156010 + }, + { + "epoch": 1026.4473684210527, + "grad_norm": 1.1861462593078613, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 156020 + }, + { + "epoch": 1026.5131578947369, + "grad_norm": 1.0940054655075073, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 156030 + }, + { + "epoch": 1026.578947368421, + "grad_norm": 1.3079572916030884, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 156040 + }, + { + "epoch": 1026.6447368421052, + "grad_norm": 1.1062140464782715, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 156050 + }, + { + "epoch": 1026.7105263157894, + "grad_norm": 1.2502998113632202, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 156060 + }, + { + "epoch": 1026.7763157894738, + "grad_norm": 0.586124837398529, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 156070 + }, + { + "epoch": 1026.842105263158, + "grad_norm": 0.6738919019699097, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 156080 + }, + { + "epoch": 1026.907894736842, + "grad_norm": 0.9908142685890198, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 156090 + }, + { + "epoch": 1026.9736842105262, + "grad_norm": 1.2288180589675903, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 156100 + }, + { + "epoch": 1027.0394736842106, + "grad_norm": 1.325110673904419, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 156110 + }, + { + "epoch": 1027.1052631578948, + "grad_norm": 1.1032160520553589, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 156120 + }, + { + "epoch": 1027.171052631579, + "grad_norm": 1.2170844078063965, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 156130 + }, + { + "epoch": 1027.2368421052631, + "grad_norm": 0.6858425140380859, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 156140 + }, + { + "epoch": 1027.3026315789473, + "grad_norm": 1.3081910610198975, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 156150 + }, + { + "epoch": 1027.3684210526317, + "grad_norm": 1.1171272993087769, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 156160 + }, + { + "epoch": 1027.4342105263158, + "grad_norm": 1.35430109500885, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 156170 + }, + { + "epoch": 1027.5, + "grad_norm": 1.3601711988449097, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 156180 + }, + { + "epoch": 1027.5657894736842, + "grad_norm": 1.4882049560546875, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 156190 + }, + { + "epoch": 1027.6315789473683, + "grad_norm": 0.9452913999557495, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 156200 + }, + { + "epoch": 1027.6973684210527, + "grad_norm": 1.0488688945770264, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 156210 + }, + { + "epoch": 1027.7631578947369, + "grad_norm": 1.0508980751037598, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 156220 + }, + { + "epoch": 1027.828947368421, + "grad_norm": 1.4618175029754639, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 156230 + }, + { + "epoch": 1027.8947368421052, + "grad_norm": 0.8647827506065369, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 156240 + }, + { + "epoch": 1027.9605263157894, + "grad_norm": 1.1985787153244019, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 156250 + }, + { + "epoch": 1028.0263157894738, + "grad_norm": 1.6828774213790894, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 156260 + }, + { + "epoch": 1028.092105263158, + "grad_norm": 1.015224814414978, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 156270 + }, + { + "epoch": 1028.157894736842, + "grad_norm": 1.148722767829895, + "learning_rate": 0.0001, + "loss": 0.0131, + "step": 156280 + }, + { + "epoch": 1028.2236842105262, + "grad_norm": 0.7410078048706055, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 156290 + }, + { + "epoch": 1028.2894736842106, + "grad_norm": 0.7929697036743164, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 156300 + }, + { + "epoch": 1028.3552631578948, + "grad_norm": 1.1712801456451416, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 156310 + }, + { + "epoch": 1028.421052631579, + "grad_norm": 0.8382496237754822, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 156320 + }, + { + "epoch": 1028.4868421052631, + "grad_norm": 0.9845030307769775, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 156330 + }, + { + "epoch": 1028.5526315789473, + "grad_norm": 0.8377711176872253, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 156340 + }, + { + "epoch": 1028.6184210526317, + "grad_norm": 0.874224066734314, + "learning_rate": 0.0001, + "loss": 0.0147, + "step": 156350 + }, + { + "epoch": 1028.6842105263158, + "grad_norm": 1.0614688396453857, + "learning_rate": 0.0001, + "loss": 0.0133, + "step": 156360 + }, + { + "epoch": 1028.75, + "grad_norm": 0.947669267654419, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 156370 + }, + { + "epoch": 1028.8157894736842, + "grad_norm": 0.8915820121765137, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 156380 + }, + { + "epoch": 1028.8815789473683, + "grad_norm": 1.1027073860168457, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 156390 + }, + { + "epoch": 1028.9473684210527, + "grad_norm": 0.9126038551330566, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 156400 + }, + { + "epoch": 1029.0131578947369, + "grad_norm": 1.0201152563095093, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 156410 + }, + { + "epoch": 1029.078947368421, + "grad_norm": 0.7695257067680359, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 156420 + }, + { + "epoch": 1029.1447368421052, + "grad_norm": 1.1396857500076294, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 156430 + }, + { + "epoch": 1029.2105263157894, + "grad_norm": 1.5026952028274536, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 156440 + }, + { + "epoch": 1029.2763157894738, + "grad_norm": 1.048147201538086, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 156450 + }, + { + "epoch": 1029.342105263158, + "grad_norm": 1.0914058685302734, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 156460 + }, + { + "epoch": 1029.407894736842, + "grad_norm": 0.9121741056442261, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 156470 + }, + { + "epoch": 1029.4736842105262, + "grad_norm": 1.072133183479309, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 156480 + }, + { + "epoch": 1029.5394736842106, + "grad_norm": 1.3796448707580566, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 156490 + }, + { + "epoch": 1029.6052631578948, + "grad_norm": 1.3151845932006836, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 156500 + }, + { + "epoch": 1029.671052631579, + "grad_norm": 0.7077886462211609, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 156510 + }, + { + "epoch": 1029.7368421052631, + "grad_norm": 0.8222764134407043, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 156520 + }, + { + "epoch": 1029.8026315789473, + "grad_norm": 1.199191689491272, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 156530 + }, + { + "epoch": 1029.8684210526317, + "grad_norm": 1.1930209398269653, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 156540 + }, + { + "epoch": 1029.9342105263158, + "grad_norm": 1.1827071905136108, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 156550 + }, + { + "epoch": 1030.0, + "grad_norm": 1.1457871198654175, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 156560 + }, + { + "epoch": 1030.0657894736842, + "grad_norm": 1.4931516647338867, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 156570 + }, + { + "epoch": 1030.1315789473683, + "grad_norm": 1.4106398820877075, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 156580 + }, + { + "epoch": 1030.1973684210527, + "grad_norm": 1.3719513416290283, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 156590 + }, + { + "epoch": 1030.2631578947369, + "grad_norm": 0.9716209173202515, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 156600 + }, + { + "epoch": 1030.328947368421, + "grad_norm": 1.2926265001296997, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 156610 + }, + { + "epoch": 1030.3947368421052, + "grad_norm": 0.9236207604408264, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 156620 + }, + { + "epoch": 1030.4605263157894, + "grad_norm": 0.8633676767349243, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 156630 + }, + { + "epoch": 1030.5263157894738, + "grad_norm": 0.5557491183280945, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 156640 + }, + { + "epoch": 1030.592105263158, + "grad_norm": 1.302513837814331, + "learning_rate": 0.0001, + "loss": 0.0148, + "step": 156650 + }, + { + "epoch": 1030.657894736842, + "grad_norm": 0.9874489307403564, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 156660 + }, + { + "epoch": 1030.7236842105262, + "grad_norm": 1.4245355129241943, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 156670 + }, + { + "epoch": 1030.7894736842106, + "grad_norm": 1.2239059209823608, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 156680 + }, + { + "epoch": 1030.8552631578948, + "grad_norm": 1.1711642742156982, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 156690 + }, + { + "epoch": 1030.921052631579, + "grad_norm": 1.1151902675628662, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 156700 + }, + { + "epoch": 1030.9868421052631, + "grad_norm": 1.4098039865493774, + "learning_rate": 0.0001, + "loss": 0.0153, + "step": 156710 + }, + { + "epoch": 1031.0526315789473, + "grad_norm": 1.140479326248169, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 156720 + }, + { + "epoch": 1031.1184210526317, + "grad_norm": 1.3065927028656006, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 156730 + }, + { + "epoch": 1031.1842105263158, + "grad_norm": 0.9672998785972595, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 156740 + }, + { + "epoch": 1031.25, + "grad_norm": 1.2625794410705566, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 156750 + }, + { + "epoch": 1031.3157894736842, + "grad_norm": 1.0192890167236328, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 156760 + }, + { + "epoch": 1031.3815789473683, + "grad_norm": 1.1616865396499634, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 156770 + }, + { + "epoch": 1031.4473684210527, + "grad_norm": 0.9945463538169861, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 156780 + }, + { + "epoch": 1031.5131578947369, + "grad_norm": 0.9444164037704468, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 156790 + }, + { + "epoch": 1031.578947368421, + "grad_norm": 0.7794418931007385, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 156800 + }, + { + "epoch": 1031.6447368421052, + "grad_norm": 0.908078670501709, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 156810 + }, + { + "epoch": 1031.7105263157894, + "grad_norm": 0.7587025165557861, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 156820 + }, + { + "epoch": 1031.7763157894738, + "grad_norm": 1.2462350130081177, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 156830 + }, + { + "epoch": 1031.842105263158, + "grad_norm": 1.1004960536956787, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 156840 + }, + { + "epoch": 1031.907894736842, + "grad_norm": 0.6818149089813232, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 156850 + }, + { + "epoch": 1031.9736842105262, + "grad_norm": 1.0181989669799805, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 156860 + }, + { + "epoch": 1032.0394736842106, + "grad_norm": 1.3769375085830688, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 156870 + }, + { + "epoch": 1032.1052631578948, + "grad_norm": 1.301367998123169, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 156880 + }, + { + "epoch": 1032.171052631579, + "grad_norm": 1.4614466428756714, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 156890 + }, + { + "epoch": 1032.2368421052631, + "grad_norm": 1.141822099685669, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 156900 + }, + { + "epoch": 1032.3026315789473, + "grad_norm": 0.8680024147033691, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 156910 + }, + { + "epoch": 1032.3684210526317, + "grad_norm": 1.0369664430618286, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 156920 + }, + { + "epoch": 1032.4342105263158, + "grad_norm": 1.025201439857483, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 156930 + }, + { + "epoch": 1032.5, + "grad_norm": 0.8427085876464844, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 156940 + }, + { + "epoch": 1032.5657894736842, + "grad_norm": 1.090246558189392, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 156950 + }, + { + "epoch": 1032.6315789473683, + "grad_norm": 0.9469527006149292, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 156960 + }, + { + "epoch": 1032.6973684210527, + "grad_norm": 0.9149708151817322, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 156970 + }, + { + "epoch": 1032.7631578947369, + "grad_norm": 0.9780547618865967, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 156980 + }, + { + "epoch": 1032.828947368421, + "grad_norm": 1.3159875869750977, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 156990 + }, + { + "epoch": 1032.8947368421052, + "grad_norm": 0.9465406537055969, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 157000 + }, + { + "epoch": 1032.9605263157894, + "grad_norm": 1.0383718013763428, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 157010 + }, + { + "epoch": 1033.0263157894738, + "grad_norm": 0.9563059210777283, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 157020 + }, + { + "epoch": 1033.092105263158, + "grad_norm": 0.8377731442451477, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 157030 + }, + { + "epoch": 1033.157894736842, + "grad_norm": 0.7005869150161743, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 157040 + }, + { + "epoch": 1033.2236842105262, + "grad_norm": 0.8357473611831665, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 157050 + }, + { + "epoch": 1033.2894736842106, + "grad_norm": 0.877869188785553, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 157060 + }, + { + "epoch": 1033.3552631578948, + "grad_norm": 1.0630418062210083, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 157070 + }, + { + "epoch": 1033.421052631579, + "grad_norm": 1.6160914897918701, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 157080 + }, + { + "epoch": 1033.4868421052631, + "grad_norm": 1.2976188659667969, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 157090 + }, + { + "epoch": 1033.5526315789473, + "grad_norm": 1.338534951210022, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 157100 + }, + { + "epoch": 1033.6184210526317, + "grad_norm": 1.0486924648284912, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 157110 + }, + { + "epoch": 1033.6842105263158, + "grad_norm": 0.957435667514801, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 157120 + }, + { + "epoch": 1033.75, + "grad_norm": 1.0611435174942017, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 157130 + }, + { + "epoch": 1033.8157894736842, + "grad_norm": 1.471934199333191, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 157140 + }, + { + "epoch": 1033.8815789473683, + "grad_norm": 1.0346896648406982, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 157150 + }, + { + "epoch": 1033.9473684210527, + "grad_norm": 1.5586978197097778, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 157160 + }, + { + "epoch": 1034.0131578947369, + "grad_norm": 1.5314991474151611, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 157170 + }, + { + "epoch": 1034.078947368421, + "grad_norm": 1.3219138383865356, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 157180 + }, + { + "epoch": 1034.1447368421052, + "grad_norm": 0.7580617070198059, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 157190 + }, + { + "epoch": 1034.2105263157894, + "grad_norm": 1.2841469049453735, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 157200 + }, + { + "epoch": 1034.2763157894738, + "grad_norm": 0.8952348828315735, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 157210 + }, + { + "epoch": 1034.342105263158, + "grad_norm": 1.0828073024749756, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 157220 + }, + { + "epoch": 1034.407894736842, + "grad_norm": 1.3251006603240967, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 157230 + }, + { + "epoch": 1034.4736842105262, + "grad_norm": 0.8319105505943298, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 157240 + }, + { + "epoch": 1034.5394736842106, + "grad_norm": 0.6340340971946716, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 157250 + }, + { + "epoch": 1034.6052631578948, + "grad_norm": 1.0439341068267822, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 157260 + }, + { + "epoch": 1034.671052631579, + "grad_norm": 0.9129518270492554, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 157270 + }, + { + "epoch": 1034.7368421052631, + "grad_norm": 1.1103804111480713, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 157280 + }, + { + "epoch": 1034.8026315789473, + "grad_norm": 1.1742290258407593, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 157290 + }, + { + "epoch": 1034.8684210526317, + "grad_norm": 1.2057838439941406, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 157300 + }, + { + "epoch": 1034.9342105263158, + "grad_norm": 0.9219057559967041, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 157310 + }, + { + "epoch": 1035.0, + "grad_norm": 0.6480765342712402, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 157320 + }, + { + "epoch": 1035.0657894736842, + "grad_norm": 0.9799567461013794, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 157330 + }, + { + "epoch": 1035.1315789473683, + "grad_norm": 1.2882983684539795, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 157340 + }, + { + "epoch": 1035.1973684210527, + "grad_norm": 0.887322187423706, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 157350 + }, + { + "epoch": 1035.2631578947369, + "grad_norm": 0.8471446633338928, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 157360 + }, + { + "epoch": 1035.328947368421, + "grad_norm": 0.8873788118362427, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 157370 + }, + { + "epoch": 1035.3947368421052, + "grad_norm": 1.0279638767242432, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 157380 + }, + { + "epoch": 1035.4605263157894, + "grad_norm": 0.9138223528862, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 157390 + }, + { + "epoch": 1035.5263157894738, + "grad_norm": 1.492469072341919, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 157400 + }, + { + "epoch": 1035.592105263158, + "grad_norm": 1.4782291650772095, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 157410 + }, + { + "epoch": 1035.657894736842, + "grad_norm": 1.0031368732452393, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 157420 + }, + { + "epoch": 1035.7236842105262, + "grad_norm": 1.0063573122024536, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 157430 + }, + { + "epoch": 1035.7894736842106, + "grad_norm": 1.1823172569274902, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 157440 + }, + { + "epoch": 1035.8552631578948, + "grad_norm": 0.9905596971511841, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 157450 + }, + { + "epoch": 1035.921052631579, + "grad_norm": 0.9706788063049316, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 157460 + }, + { + "epoch": 1035.9868421052631, + "grad_norm": 1.1120086908340454, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 157470 + }, + { + "epoch": 1036.0526315789473, + "grad_norm": 1.1422266960144043, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 157480 + }, + { + "epoch": 1036.1184210526317, + "grad_norm": 1.3374935388565063, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 157490 + }, + { + "epoch": 1036.1842105263158, + "grad_norm": 1.3589179515838623, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 157500 + }, + { + "epoch": 1036.25, + "grad_norm": 1.1102532148361206, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 157510 + }, + { + "epoch": 1036.3157894736842, + "grad_norm": 1.357604742050171, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 157520 + }, + { + "epoch": 1036.3815789473683, + "grad_norm": 1.054775595664978, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 157530 + }, + { + "epoch": 1036.4473684210527, + "grad_norm": 1.0131275653839111, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 157540 + }, + { + "epoch": 1036.5131578947369, + "grad_norm": 0.73055100440979, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 157550 + }, + { + "epoch": 1036.578947368421, + "grad_norm": 0.8774827122688293, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 157560 + }, + { + "epoch": 1036.6447368421052, + "grad_norm": 1.4485974311828613, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 157570 + }, + { + "epoch": 1036.7105263157894, + "grad_norm": 1.3411765098571777, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 157580 + }, + { + "epoch": 1036.7763157894738, + "grad_norm": 1.4272361993789673, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 157590 + }, + { + "epoch": 1036.842105263158, + "grad_norm": 1.3204584121704102, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 157600 + }, + { + "epoch": 1036.907894736842, + "grad_norm": 1.5034716129302979, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 157610 + }, + { + "epoch": 1036.9736842105262, + "grad_norm": 1.6326338052749634, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 157620 + }, + { + "epoch": 1037.0394736842106, + "grad_norm": 0.9135128259658813, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 157630 + }, + { + "epoch": 1037.1052631578948, + "grad_norm": 1.148511290550232, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 157640 + }, + { + "epoch": 1037.171052631579, + "grad_norm": 1.2435686588287354, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 157650 + }, + { + "epoch": 1037.2368421052631, + "grad_norm": 0.9658685326576233, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 157660 + }, + { + "epoch": 1037.3026315789473, + "grad_norm": 1.3001043796539307, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 157670 + }, + { + "epoch": 1037.3684210526317, + "grad_norm": 1.0400381088256836, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 157680 + }, + { + "epoch": 1037.4342105263158, + "grad_norm": 0.8855817914009094, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 157690 + }, + { + "epoch": 1037.5, + "grad_norm": 1.0237966775894165, + "learning_rate": 0.0001, + "loss": 0.0069, + "step": 157700 + }, + { + "epoch": 1037.5657894736842, + "grad_norm": 1.0766881704330444, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 157710 + }, + { + "epoch": 1037.6315789473683, + "grad_norm": 1.1923819780349731, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 157720 + }, + { + "epoch": 1037.6973684210527, + "grad_norm": 1.0676945447921753, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 157730 + }, + { + "epoch": 1037.7631578947369, + "grad_norm": 0.5870857238769531, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 157740 + }, + { + "epoch": 1037.828947368421, + "grad_norm": 0.9011507034301758, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 157750 + }, + { + "epoch": 1037.8947368421052, + "grad_norm": 1.1840976476669312, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 157760 + }, + { + "epoch": 1037.9605263157894, + "grad_norm": 1.0965074300765991, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 157770 + }, + { + "epoch": 1038.0263157894738, + "grad_norm": 0.8631574511528015, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 157780 + }, + { + "epoch": 1038.092105263158, + "grad_norm": 1.3120415210723877, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 157790 + }, + { + "epoch": 1038.157894736842, + "grad_norm": 1.1779505014419556, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 157800 + }, + { + "epoch": 1038.2236842105262, + "grad_norm": 1.042224645614624, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 157810 + }, + { + "epoch": 1038.2894736842106, + "grad_norm": 0.9169209599494934, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 157820 + }, + { + "epoch": 1038.3552631578948, + "grad_norm": 1.129440188407898, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 157830 + }, + { + "epoch": 1038.421052631579, + "grad_norm": 0.9302949905395508, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 157840 + }, + { + "epoch": 1038.4868421052631, + "grad_norm": 1.3047235012054443, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 157850 + }, + { + "epoch": 1038.5526315789473, + "grad_norm": 1.4257992506027222, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 157860 + }, + { + "epoch": 1038.6184210526317, + "grad_norm": 1.1425153017044067, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 157870 + }, + { + "epoch": 1038.6842105263158, + "grad_norm": 1.1358925104141235, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 157880 + }, + { + "epoch": 1038.75, + "grad_norm": 0.9900014400482178, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 157890 + }, + { + "epoch": 1038.8157894736842, + "grad_norm": 0.9139213562011719, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 157900 + }, + { + "epoch": 1038.8815789473683, + "grad_norm": 1.2955433130264282, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 157910 + }, + { + "epoch": 1038.9473684210527, + "grad_norm": 1.2196770906448364, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 157920 + }, + { + "epoch": 1039.0131578947369, + "grad_norm": 1.0511000156402588, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 157930 + }, + { + "epoch": 1039.078947368421, + "grad_norm": 1.1892273426055908, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 157940 + }, + { + "epoch": 1039.1447368421052, + "grad_norm": 0.8552981019020081, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 157950 + }, + { + "epoch": 1039.2105263157894, + "grad_norm": 0.9567652940750122, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 157960 + }, + { + "epoch": 1039.2763157894738, + "grad_norm": 0.9324297904968262, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 157970 + }, + { + "epoch": 1039.342105263158, + "grad_norm": 0.8563692569732666, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 157980 + }, + { + "epoch": 1039.407894736842, + "grad_norm": 1.0775697231292725, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 157990 + }, + { + "epoch": 1039.4736842105262, + "grad_norm": 1.0669533014297485, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 158000 + }, + { + "epoch": 1039.5394736842106, + "grad_norm": 1.2241381406784058, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 158010 + }, + { + "epoch": 1039.6052631578948, + "grad_norm": 1.0516544580459595, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 158020 + }, + { + "epoch": 1039.671052631579, + "grad_norm": 1.1746127605438232, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 158030 + }, + { + "epoch": 1039.7368421052631, + "grad_norm": 0.8839475512504578, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 158040 + }, + { + "epoch": 1039.8026315789473, + "grad_norm": 0.8313326835632324, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 158050 + }, + { + "epoch": 1039.8684210526317, + "grad_norm": 1.1891549825668335, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 158060 + }, + { + "epoch": 1039.9342105263158, + "grad_norm": 0.9349226355552673, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 158070 + }, + { + "epoch": 1040.0, + "grad_norm": 1.3668376207351685, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 158080 + }, + { + "epoch": 1040.0657894736842, + "grad_norm": 1.0891430377960205, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 158090 + }, + { + "epoch": 1040.1315789473683, + "grad_norm": 1.2689098119735718, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 158100 + }, + { + "epoch": 1040.1973684210527, + "grad_norm": 1.3867899179458618, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 158110 + }, + { + "epoch": 1040.2631578947369, + "grad_norm": 1.1549094915390015, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 158120 + }, + { + "epoch": 1040.328947368421, + "grad_norm": 1.0771297216415405, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 158130 + }, + { + "epoch": 1040.3947368421052, + "grad_norm": 0.6854061484336853, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 158140 + }, + { + "epoch": 1040.4605263157894, + "grad_norm": 0.7592263221740723, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 158150 + }, + { + "epoch": 1040.5263157894738, + "grad_norm": 0.7394665479660034, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 158160 + }, + { + "epoch": 1040.592105263158, + "grad_norm": 1.2182101011276245, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 158170 + }, + { + "epoch": 1040.657894736842, + "grad_norm": 1.0940667390823364, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 158180 + }, + { + "epoch": 1040.7236842105262, + "grad_norm": 1.154238224029541, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 158190 + }, + { + "epoch": 1040.7894736842106, + "grad_norm": 1.0772144794464111, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 158200 + }, + { + "epoch": 1040.8552631578948, + "grad_norm": 0.7135392427444458, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 158210 + }, + { + "epoch": 1040.921052631579, + "grad_norm": 0.6920236945152283, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 158220 + }, + { + "epoch": 1040.9868421052631, + "grad_norm": 0.9873380661010742, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 158230 + }, + { + "epoch": 1041.0526315789473, + "grad_norm": 1.1470561027526855, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 158240 + }, + { + "epoch": 1041.1184210526317, + "grad_norm": 1.1026843786239624, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 158250 + }, + { + "epoch": 1041.1842105263158, + "grad_norm": 0.9850237369537354, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 158260 + }, + { + "epoch": 1041.25, + "grad_norm": 0.8078471422195435, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 158270 + }, + { + "epoch": 1041.3157894736842, + "grad_norm": 0.9304453134536743, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 158280 + }, + { + "epoch": 1041.3815789473683, + "grad_norm": 1.4547866582870483, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 158290 + }, + { + "epoch": 1041.4473684210527, + "grad_norm": 1.2319010496139526, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 158300 + }, + { + "epoch": 1041.5131578947369, + "grad_norm": 0.9949256181716919, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 158310 + }, + { + "epoch": 1041.578947368421, + "grad_norm": 0.8701847195625305, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 158320 + }, + { + "epoch": 1041.6447368421052, + "grad_norm": 1.0713963508605957, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 158330 + }, + { + "epoch": 1041.7105263157894, + "grad_norm": 1.043212652206421, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 158340 + }, + { + "epoch": 1041.7763157894738, + "grad_norm": 1.2368879318237305, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 158350 + }, + { + "epoch": 1041.842105263158, + "grad_norm": 1.1877309083938599, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 158360 + }, + { + "epoch": 1041.907894736842, + "grad_norm": 1.265149712562561, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 158370 + }, + { + "epoch": 1041.9736842105262, + "grad_norm": 1.0216879844665527, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 158380 + }, + { + "epoch": 1042.0394736842106, + "grad_norm": 0.9168482422828674, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 158390 + }, + { + "epoch": 1042.1052631578948, + "grad_norm": 1.1165847778320312, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 158400 + }, + { + "epoch": 1042.171052631579, + "grad_norm": 1.5731300115585327, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 158410 + }, + { + "epoch": 1042.2368421052631, + "grad_norm": 1.1611987352371216, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 158420 + }, + { + "epoch": 1042.3026315789473, + "grad_norm": 1.191115140914917, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 158430 + }, + { + "epoch": 1042.3684210526317, + "grad_norm": 0.9672328233718872, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 158440 + }, + { + "epoch": 1042.4342105263158, + "grad_norm": 1.1341195106506348, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 158450 + }, + { + "epoch": 1042.5, + "grad_norm": 1.0045264959335327, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 158460 + }, + { + "epoch": 1042.5657894736842, + "grad_norm": 1.13510000705719, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 158470 + }, + { + "epoch": 1042.6315789473683, + "grad_norm": 0.9069583415985107, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 158480 + }, + { + "epoch": 1042.6973684210527, + "grad_norm": 1.0426357984542847, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 158490 + }, + { + "epoch": 1042.7631578947369, + "grad_norm": 1.1042143106460571, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 158500 + }, + { + "epoch": 1042.828947368421, + "grad_norm": 1.1533740758895874, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 158510 + }, + { + "epoch": 1042.8947368421052, + "grad_norm": 1.0955190658569336, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 158520 + }, + { + "epoch": 1042.9605263157894, + "grad_norm": 1.552657961845398, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 158530 + }, + { + "epoch": 1043.0263157894738, + "grad_norm": 0.9286198019981384, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 158540 + }, + { + "epoch": 1043.092105263158, + "grad_norm": 0.8410740494728088, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 158550 + }, + { + "epoch": 1043.157894736842, + "grad_norm": 1.1539233922958374, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 158560 + }, + { + "epoch": 1043.2236842105262, + "grad_norm": 1.0042409896850586, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 158570 + }, + { + "epoch": 1043.2894736842106, + "grad_norm": 1.2449952363967896, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 158580 + }, + { + "epoch": 1043.3552631578948, + "grad_norm": 1.4503757953643799, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 158590 + }, + { + "epoch": 1043.421052631579, + "grad_norm": 1.995700478553772, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 158600 + }, + { + "epoch": 1043.4868421052631, + "grad_norm": 1.310492992401123, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 158610 + }, + { + "epoch": 1043.5526315789473, + "grad_norm": 1.1206649541854858, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 158620 + }, + { + "epoch": 1043.6184210526317, + "grad_norm": 1.135241985321045, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 158630 + }, + { + "epoch": 1043.6842105263158, + "grad_norm": 1.2304860353469849, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 158640 + }, + { + "epoch": 1043.75, + "grad_norm": 1.1069141626358032, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 158650 + }, + { + "epoch": 1043.8157894736842, + "grad_norm": 1.013854742050171, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 158660 + }, + { + "epoch": 1043.8815789473683, + "grad_norm": 0.8795697093009949, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 158670 + }, + { + "epoch": 1043.9473684210527, + "grad_norm": 1.4216660261154175, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 158680 + }, + { + "epoch": 1044.0131578947369, + "grad_norm": 1.174999475479126, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 158690 + }, + { + "epoch": 1044.078947368421, + "grad_norm": 1.3855254650115967, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 158700 + }, + { + "epoch": 1044.1447368421052, + "grad_norm": 0.9125514626502991, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 158710 + }, + { + "epoch": 1044.2105263157894, + "grad_norm": 0.9567851424217224, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 158720 + }, + { + "epoch": 1044.2763157894738, + "grad_norm": 0.9960494041442871, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 158730 + }, + { + "epoch": 1044.342105263158, + "grad_norm": 0.8722960948944092, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 158740 + }, + { + "epoch": 1044.407894736842, + "grad_norm": 0.9810813665390015, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 158750 + }, + { + "epoch": 1044.4736842105262, + "grad_norm": 1.2386258840560913, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 158760 + }, + { + "epoch": 1044.5394736842106, + "grad_norm": 1.000181794166565, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 158770 + }, + { + "epoch": 1044.6052631578948, + "grad_norm": 0.927920937538147, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 158780 + }, + { + "epoch": 1044.671052631579, + "grad_norm": 0.8654837608337402, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 158790 + }, + { + "epoch": 1044.7368421052631, + "grad_norm": 0.9293181896209717, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 158800 + }, + { + "epoch": 1044.8026315789473, + "grad_norm": 1.0467267036437988, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 158810 + }, + { + "epoch": 1044.8684210526317, + "grad_norm": 0.9053267240524292, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 158820 + }, + { + "epoch": 1044.9342105263158, + "grad_norm": 1.1089938879013062, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 158830 + }, + { + "epoch": 1045.0, + "grad_norm": 1.0741143226623535, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 158840 + }, + { + "epoch": 1045.0657894736842, + "grad_norm": 0.8008183836936951, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 158850 + }, + { + "epoch": 1045.1315789473683, + "grad_norm": 0.9834315776824951, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 158860 + }, + { + "epoch": 1045.1973684210527, + "grad_norm": 0.8978926539421082, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 158870 + }, + { + "epoch": 1045.2631578947369, + "grad_norm": 0.7383962273597717, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 158880 + }, + { + "epoch": 1045.328947368421, + "grad_norm": 0.7250087261199951, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 158890 + }, + { + "epoch": 1045.3947368421052, + "grad_norm": 1.158569097518921, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 158900 + }, + { + "epoch": 1045.4605263157894, + "grad_norm": 1.0793335437774658, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 158910 + }, + { + "epoch": 1045.5263157894738, + "grad_norm": 1.2114217281341553, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 158920 + }, + { + "epoch": 1045.592105263158, + "grad_norm": 1.464314579963684, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 158930 + }, + { + "epoch": 1045.657894736842, + "grad_norm": 1.0885869264602661, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 158940 + }, + { + "epoch": 1045.7236842105262, + "grad_norm": 1.1773313283920288, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 158950 + }, + { + "epoch": 1045.7894736842106, + "grad_norm": 1.345479965209961, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 158960 + }, + { + "epoch": 1045.8552631578948, + "grad_norm": 1.1355724334716797, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 158970 + }, + { + "epoch": 1045.921052631579, + "grad_norm": 1.21616792678833, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 158980 + }, + { + "epoch": 1045.9868421052631, + "grad_norm": 0.895064651966095, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 158990 + }, + { + "epoch": 1046.0526315789473, + "grad_norm": 1.1562937498092651, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 159000 + }, + { + "epoch": 1046.1184210526317, + "grad_norm": 1.1668152809143066, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 159010 + }, + { + "epoch": 1046.1842105263158, + "grad_norm": 1.03926682472229, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 159020 + }, + { + "epoch": 1046.25, + "grad_norm": 1.0261049270629883, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 159030 + }, + { + "epoch": 1046.3157894736842, + "grad_norm": 1.0766314268112183, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 159040 + }, + { + "epoch": 1046.3815789473683, + "grad_norm": 0.6256817579269409, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 159050 + }, + { + "epoch": 1046.4473684210527, + "grad_norm": 0.874019980430603, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 159060 + }, + { + "epoch": 1046.5131578947369, + "grad_norm": 1.0828053951263428, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 159070 + }, + { + "epoch": 1046.578947368421, + "grad_norm": 1.11966872215271, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 159080 + }, + { + "epoch": 1046.6447368421052, + "grad_norm": 0.9557555913925171, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 159090 + }, + { + "epoch": 1046.7105263157894, + "grad_norm": 1.2064489126205444, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 159100 + }, + { + "epoch": 1046.7763157894738, + "grad_norm": 1.3529369831085205, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 159110 + }, + { + "epoch": 1046.842105263158, + "grad_norm": 0.934829592704773, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 159120 + }, + { + "epoch": 1046.907894736842, + "grad_norm": 1.274330735206604, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 159130 + }, + { + "epoch": 1046.9736842105262, + "grad_norm": 1.0497575998306274, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 159140 + }, + { + "epoch": 1047.0394736842106, + "grad_norm": 1.1160510778427124, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 159150 + }, + { + "epoch": 1047.1052631578948, + "grad_norm": 1.325922966003418, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 159160 + }, + { + "epoch": 1047.171052631579, + "grad_norm": 1.4824130535125732, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 159170 + }, + { + "epoch": 1047.2368421052631, + "grad_norm": 1.2637231349945068, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 159180 + }, + { + "epoch": 1047.3026315789473, + "grad_norm": 1.0419176816940308, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 159190 + }, + { + "epoch": 1047.3684210526317, + "grad_norm": 1.0075671672821045, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 159200 + }, + { + "epoch": 1047.4342105263158, + "grad_norm": 0.9810393452644348, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 159210 + }, + { + "epoch": 1047.5, + "grad_norm": 0.9505169987678528, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 159220 + }, + { + "epoch": 1047.5657894736842, + "grad_norm": 0.9999101758003235, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 159230 + }, + { + "epoch": 1047.6315789473683, + "grad_norm": 0.7792953848838806, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 159240 + }, + { + "epoch": 1047.6973684210527, + "grad_norm": 1.1231037378311157, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 159250 + }, + { + "epoch": 1047.7631578947369, + "grad_norm": 1.1759856939315796, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 159260 + }, + { + "epoch": 1047.828947368421, + "grad_norm": 0.9894112348556519, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 159270 + }, + { + "epoch": 1047.8947368421052, + "grad_norm": 0.9776474833488464, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 159280 + }, + { + "epoch": 1047.9605263157894, + "grad_norm": 1.2519046068191528, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 159290 + }, + { + "epoch": 1048.0263157894738, + "grad_norm": 1.459942102432251, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 159300 + }, + { + "epoch": 1048.092105263158, + "grad_norm": 1.0710073709487915, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 159310 + }, + { + "epoch": 1048.157894736842, + "grad_norm": 0.962834358215332, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 159320 + }, + { + "epoch": 1048.2236842105262, + "grad_norm": 0.8130425810813904, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 159330 + }, + { + "epoch": 1048.2894736842106, + "grad_norm": 0.9603180289268494, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 159340 + }, + { + "epoch": 1048.3552631578948, + "grad_norm": 0.8210400342941284, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 159350 + }, + { + "epoch": 1048.421052631579, + "grad_norm": 1.1121681928634644, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 159360 + }, + { + "epoch": 1048.4868421052631, + "grad_norm": 1.0714201927185059, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 159370 + }, + { + "epoch": 1048.5526315789473, + "grad_norm": 0.9843019247055054, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 159380 + }, + { + "epoch": 1048.6184210526317, + "grad_norm": 1.1511754989624023, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 159390 + }, + { + "epoch": 1048.6842105263158, + "grad_norm": 0.6616480946540833, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 159400 + }, + { + "epoch": 1048.75, + "grad_norm": 1.1609398126602173, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 159410 + }, + { + "epoch": 1048.8157894736842, + "grad_norm": 0.9391231536865234, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 159420 + }, + { + "epoch": 1048.8815789473683, + "grad_norm": 0.9525076150894165, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 159430 + }, + { + "epoch": 1048.9473684210527, + "grad_norm": 0.9699547290802002, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 159440 + }, + { + "epoch": 1049.0131578947369, + "grad_norm": 0.7067571878433228, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 159450 + }, + { + "epoch": 1049.078947368421, + "grad_norm": 0.7832255959510803, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 159460 + }, + { + "epoch": 1049.1447368421052, + "grad_norm": 0.945345938205719, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 159470 + }, + { + "epoch": 1049.2105263157894, + "grad_norm": 1.0841176509857178, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 159480 + }, + { + "epoch": 1049.2763157894738, + "grad_norm": 1.3195167779922485, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 159490 + }, + { + "epoch": 1049.342105263158, + "grad_norm": 1.5180623531341553, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 159500 + }, + { + "epoch": 1049.407894736842, + "grad_norm": 0.7986946105957031, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 159510 + }, + { + "epoch": 1049.4736842105262, + "grad_norm": 1.2574127912521362, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 159520 + }, + { + "epoch": 1049.5394736842106, + "grad_norm": 1.320098876953125, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 159530 + }, + { + "epoch": 1049.6052631578948, + "grad_norm": 0.890644371509552, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 159540 + }, + { + "epoch": 1049.671052631579, + "grad_norm": 1.4320951700210571, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 159550 + }, + { + "epoch": 1049.7368421052631, + "grad_norm": 1.1257306337356567, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 159560 + }, + { + "epoch": 1049.8026315789473, + "grad_norm": 1.2194310426712036, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 159570 + }, + { + "epoch": 1049.8684210526317, + "grad_norm": 0.7975109219551086, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 159580 + }, + { + "epoch": 1049.9342105263158, + "grad_norm": 1.1322660446166992, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 159590 + }, + { + "epoch": 1050.0, + "grad_norm": 1.1089693307876587, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 159600 + }, + { + "epoch": 1050.0657894736842, + "grad_norm": 1.135398507118225, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 159610 + }, + { + "epoch": 1050.1315789473683, + "grad_norm": 1.2235279083251953, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 159620 + }, + { + "epoch": 1050.1973684210527, + "grad_norm": 1.2901211977005005, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 159630 + }, + { + "epoch": 1050.2631578947369, + "grad_norm": 1.0014219284057617, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 159640 + }, + { + "epoch": 1050.328947368421, + "grad_norm": 1.5262455940246582, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 159650 + }, + { + "epoch": 1050.3947368421052, + "grad_norm": 0.9812429547309875, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 159660 + }, + { + "epoch": 1050.4605263157894, + "grad_norm": 0.8760225176811218, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 159670 + }, + { + "epoch": 1050.5263157894738, + "grad_norm": 0.8203020691871643, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 159680 + }, + { + "epoch": 1050.592105263158, + "grad_norm": 1.3625808954238892, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 159690 + }, + { + "epoch": 1050.657894736842, + "grad_norm": 0.927275538444519, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 159700 + }, + { + "epoch": 1050.7236842105262, + "grad_norm": 1.043666124343872, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 159710 + }, + { + "epoch": 1050.7894736842106, + "grad_norm": 0.8062131404876709, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 159720 + }, + { + "epoch": 1050.8552631578948, + "grad_norm": 0.9691109657287598, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 159730 + }, + { + "epoch": 1050.921052631579, + "grad_norm": 1.3453434705734253, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 159740 + }, + { + "epoch": 1050.9868421052631, + "grad_norm": 1.1522691249847412, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 159750 + }, + { + "epoch": 1051.0526315789473, + "grad_norm": 1.2776633501052856, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 159760 + }, + { + "epoch": 1051.1184210526317, + "grad_norm": 1.5653990507125854, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 159770 + }, + { + "epoch": 1051.1842105263158, + "grad_norm": 1.3178924322128296, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 159780 + }, + { + "epoch": 1051.25, + "grad_norm": 1.3187295198440552, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 159790 + }, + { + "epoch": 1051.3157894736842, + "grad_norm": 0.9678728580474854, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 159800 + }, + { + "epoch": 1051.3815789473683, + "grad_norm": 0.9071379899978638, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 159810 + }, + { + "epoch": 1051.4473684210527, + "grad_norm": 1.175668478012085, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 159820 + }, + { + "epoch": 1051.5131578947369, + "grad_norm": 1.0487818717956543, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 159830 + }, + { + "epoch": 1051.578947368421, + "grad_norm": 0.8464121222496033, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 159840 + }, + { + "epoch": 1051.6447368421052, + "grad_norm": 1.1520172357559204, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 159850 + }, + { + "epoch": 1051.7105263157894, + "grad_norm": 0.6816383600234985, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 159860 + }, + { + "epoch": 1051.7763157894738, + "grad_norm": 1.1880346536636353, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 159870 + }, + { + "epoch": 1051.842105263158, + "grad_norm": 1.3333251476287842, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 159880 + }, + { + "epoch": 1051.907894736842, + "grad_norm": 1.1161757707595825, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 159890 + }, + { + "epoch": 1051.9736842105262, + "grad_norm": 1.5317634344100952, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 159900 + }, + { + "epoch": 1052.0394736842106, + "grad_norm": 1.2690035104751587, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 159910 + }, + { + "epoch": 1052.1052631578948, + "grad_norm": 1.0578007698059082, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 159920 + }, + { + "epoch": 1052.171052631579, + "grad_norm": 1.1168135404586792, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 159930 + }, + { + "epoch": 1052.2368421052631, + "grad_norm": 1.1406240463256836, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 159940 + }, + { + "epoch": 1052.3026315789473, + "grad_norm": 0.972987949848175, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 159950 + }, + { + "epoch": 1052.3684210526317, + "grad_norm": 0.7099289894104004, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 159960 + }, + { + "epoch": 1052.4342105263158, + "grad_norm": 0.7676605582237244, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 159970 + }, + { + "epoch": 1052.5, + "grad_norm": 0.9532294273376465, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 159980 + }, + { + "epoch": 1052.5657894736842, + "grad_norm": 0.8012232780456543, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 159990 + }, + { + "epoch": 1052.6315789473683, + "grad_norm": 1.0869078636169434, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 160000 + }, + { + "epoch": 1052.6973684210527, + "grad_norm": 0.9576671123504639, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 160010 + }, + { + "epoch": 1052.7631578947369, + "grad_norm": 0.9968484044075012, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 160020 + }, + { + "epoch": 1052.828947368421, + "grad_norm": 1.0012370347976685, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 160030 + }, + { + "epoch": 1052.8947368421052, + "grad_norm": 0.6246411800384521, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 160040 + }, + { + "epoch": 1052.9605263157894, + "grad_norm": 0.8564648032188416, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 160050 + }, + { + "epoch": 1053.0263157894738, + "grad_norm": 1.1350836753845215, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 160060 + }, + { + "epoch": 1053.092105263158, + "grad_norm": 0.8760911822319031, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 160070 + }, + { + "epoch": 1053.157894736842, + "grad_norm": 1.1660544872283936, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 160080 + }, + { + "epoch": 1053.2236842105262, + "grad_norm": 0.8360348343849182, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 160090 + }, + { + "epoch": 1053.2894736842106, + "grad_norm": 0.754542887210846, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 160100 + }, + { + "epoch": 1053.3552631578948, + "grad_norm": 1.2186027765274048, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 160110 + }, + { + "epoch": 1053.421052631579, + "grad_norm": 0.9988258481025696, + "learning_rate": 0.0001, + "loss": 0.0137, + "step": 160120 + }, + { + "epoch": 1053.4868421052631, + "grad_norm": 1.007232904434204, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 160130 + }, + { + "epoch": 1053.5526315789473, + "grad_norm": 1.2770189046859741, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 160140 + }, + { + "epoch": 1053.6184210526317, + "grad_norm": 1.163527488708496, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 160150 + }, + { + "epoch": 1053.6842105263158, + "grad_norm": 0.8490967154502869, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 160160 + }, + { + "epoch": 1053.75, + "grad_norm": 1.0161746740341187, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 160170 + }, + { + "epoch": 1053.8157894736842, + "grad_norm": 1.1093944311141968, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 160180 + }, + { + "epoch": 1053.8815789473683, + "grad_norm": 0.9495304822921753, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 160190 + }, + { + "epoch": 1053.9473684210527, + "grad_norm": 0.9384229779243469, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 160200 + }, + { + "epoch": 1054.0131578947369, + "grad_norm": 1.0374761819839478, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 160210 + }, + { + "epoch": 1054.078947368421, + "grad_norm": 1.3228940963745117, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 160220 + }, + { + "epoch": 1054.1447368421052, + "grad_norm": 1.1004372835159302, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 160230 + }, + { + "epoch": 1054.2105263157894, + "grad_norm": 1.346169352531433, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 160240 + }, + { + "epoch": 1054.2763157894738, + "grad_norm": 1.0477455854415894, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 160250 + }, + { + "epoch": 1054.342105263158, + "grad_norm": 1.119246244430542, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 160260 + }, + { + "epoch": 1054.407894736842, + "grad_norm": 1.2886263132095337, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 160270 + }, + { + "epoch": 1054.4736842105262, + "grad_norm": 0.7763897180557251, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 160280 + }, + { + "epoch": 1054.5394736842106, + "grad_norm": 0.8887063264846802, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 160290 + }, + { + "epoch": 1054.6052631578948, + "grad_norm": 1.2421995401382446, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 160300 + }, + { + "epoch": 1054.671052631579, + "grad_norm": 1.0714391469955444, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 160310 + }, + { + "epoch": 1054.7368421052631, + "grad_norm": 1.002956509590149, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 160320 + }, + { + "epoch": 1054.8026315789473, + "grad_norm": 1.0658334493637085, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 160330 + }, + { + "epoch": 1054.8684210526317, + "grad_norm": 0.7606706619262695, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 160340 + }, + { + "epoch": 1054.9342105263158, + "grad_norm": 0.995364785194397, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 160350 + }, + { + "epoch": 1055.0, + "grad_norm": 0.8916927576065063, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 160360 + }, + { + "epoch": 1055.0657894736842, + "grad_norm": 1.03330659866333, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 160370 + }, + { + "epoch": 1055.1315789473683, + "grad_norm": 0.8269760012626648, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 160380 + }, + { + "epoch": 1055.1973684210527, + "grad_norm": 0.9928635358810425, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 160390 + }, + { + "epoch": 1055.2631578947369, + "grad_norm": 1.2529430389404297, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 160400 + }, + { + "epoch": 1055.328947368421, + "grad_norm": 0.6669118404388428, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 160410 + }, + { + "epoch": 1055.3947368421052, + "grad_norm": 1.3909178972244263, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 160420 + }, + { + "epoch": 1055.4605263157894, + "grad_norm": 1.6014965772628784, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 160430 + }, + { + "epoch": 1055.5263157894738, + "grad_norm": 1.5059226751327515, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 160440 + }, + { + "epoch": 1055.592105263158, + "grad_norm": 0.8995516896247864, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 160450 + }, + { + "epoch": 1055.657894736842, + "grad_norm": 0.8544006943702698, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 160460 + }, + { + "epoch": 1055.7236842105262, + "grad_norm": 1.2715330123901367, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 160470 + }, + { + "epoch": 1055.7894736842106, + "grad_norm": 0.8547405004501343, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 160480 + }, + { + "epoch": 1055.8552631578948, + "grad_norm": 0.8670430183410645, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 160490 + }, + { + "epoch": 1055.921052631579, + "grad_norm": 0.853753387928009, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 160500 + }, + { + "epoch": 1055.9868421052631, + "grad_norm": 0.8636345267295837, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 160510 + }, + { + "epoch": 1056.0526315789473, + "grad_norm": 0.9810386896133423, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 160520 + }, + { + "epoch": 1056.1184210526317, + "grad_norm": 1.062951683998108, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 160530 + }, + { + "epoch": 1056.1842105263158, + "grad_norm": 1.1501662731170654, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 160540 + }, + { + "epoch": 1056.25, + "grad_norm": 1.5023175477981567, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 160550 + }, + { + "epoch": 1056.3157894736842, + "grad_norm": 1.048349380493164, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 160560 + }, + { + "epoch": 1056.3815789473683, + "grad_norm": 1.1809251308441162, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 160570 + }, + { + "epoch": 1056.4473684210527, + "grad_norm": 1.038750171661377, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 160580 + }, + { + "epoch": 1056.5131578947369, + "grad_norm": 1.030869483947754, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 160590 + }, + { + "epoch": 1056.578947368421, + "grad_norm": 1.0279252529144287, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 160600 + }, + { + "epoch": 1056.6447368421052, + "grad_norm": 0.8072715997695923, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 160610 + }, + { + "epoch": 1056.7105263157894, + "grad_norm": 1.0479940176010132, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 160620 + }, + { + "epoch": 1056.7763157894738, + "grad_norm": 0.9599220156669617, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 160630 + }, + { + "epoch": 1056.842105263158, + "grad_norm": 0.6841326355934143, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 160640 + }, + { + "epoch": 1056.907894736842, + "grad_norm": 1.1303530931472778, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 160650 + }, + { + "epoch": 1056.9736842105262, + "grad_norm": 1.0210925340652466, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 160660 + }, + { + "epoch": 1057.0394736842106, + "grad_norm": 0.7038403153419495, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 160670 + }, + { + "epoch": 1057.1052631578948, + "grad_norm": 1.3282930850982666, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 160680 + }, + { + "epoch": 1057.171052631579, + "grad_norm": 1.007394790649414, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 160690 + }, + { + "epoch": 1057.2368421052631, + "grad_norm": 0.7837812304496765, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 160700 + }, + { + "epoch": 1057.3026315789473, + "grad_norm": 0.9380816221237183, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 160710 + }, + { + "epoch": 1057.3684210526317, + "grad_norm": 1.3225820064544678, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 160720 + }, + { + "epoch": 1057.4342105263158, + "grad_norm": 0.6032253503799438, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 160730 + }, + { + "epoch": 1057.5, + "grad_norm": 1.0029069185256958, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 160740 + }, + { + "epoch": 1057.5657894736842, + "grad_norm": 0.8361908197402954, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 160750 + }, + { + "epoch": 1057.6315789473683, + "grad_norm": 1.2243342399597168, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 160760 + }, + { + "epoch": 1057.6973684210527, + "grad_norm": 0.9138869643211365, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 160770 + }, + { + "epoch": 1057.7631578947369, + "grad_norm": 1.0361911058425903, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 160780 + }, + { + "epoch": 1057.828947368421, + "grad_norm": 0.9030035138130188, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 160790 + }, + { + "epoch": 1057.8947368421052, + "grad_norm": 0.996807873249054, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 160800 + }, + { + "epoch": 1057.9605263157894, + "grad_norm": 0.8536729216575623, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 160810 + }, + { + "epoch": 1058.0263157894738, + "grad_norm": 1.0060828924179077, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 160820 + }, + { + "epoch": 1058.092105263158, + "grad_norm": 0.9179419875144958, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 160830 + }, + { + "epoch": 1058.157894736842, + "grad_norm": 0.8911098837852478, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 160840 + }, + { + "epoch": 1058.2236842105262, + "grad_norm": 1.0588878393173218, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 160850 + }, + { + "epoch": 1058.2894736842106, + "grad_norm": 0.6525148153305054, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 160860 + }, + { + "epoch": 1058.3552631578948, + "grad_norm": 0.6334079504013062, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 160870 + }, + { + "epoch": 1058.421052631579, + "grad_norm": 0.8859434127807617, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 160880 + }, + { + "epoch": 1058.4868421052631, + "grad_norm": 0.7293907999992371, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 160890 + }, + { + "epoch": 1058.5526315789473, + "grad_norm": 0.8205122351646423, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 160900 + }, + { + "epoch": 1058.6184210526317, + "grad_norm": 0.8700542449951172, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 160910 + }, + { + "epoch": 1058.6842105263158, + "grad_norm": 1.1697430610656738, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 160920 + }, + { + "epoch": 1058.75, + "grad_norm": 1.5094969272613525, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 160930 + }, + { + "epoch": 1058.8157894736842, + "grad_norm": 1.085866928100586, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 160940 + }, + { + "epoch": 1058.8815789473683, + "grad_norm": 0.935643196105957, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 160950 + }, + { + "epoch": 1058.9473684210527, + "grad_norm": 0.8698868751525879, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 160960 + }, + { + "epoch": 1059.0131578947369, + "grad_norm": 1.2459019422531128, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 160970 + }, + { + "epoch": 1059.078947368421, + "grad_norm": 1.4872862100601196, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 160980 + }, + { + "epoch": 1059.1447368421052, + "grad_norm": 1.7374746799468994, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 160990 + }, + { + "epoch": 1059.2105263157894, + "grad_norm": 1.4759396314620972, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 161000 + }, + { + "epoch": 1059.2763157894738, + "grad_norm": 1.3807487487792969, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 161010 + }, + { + "epoch": 1059.342105263158, + "grad_norm": 1.0829836130142212, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 161020 + }, + { + "epoch": 1059.407894736842, + "grad_norm": 1.3213567733764648, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 161030 + }, + { + "epoch": 1059.4736842105262, + "grad_norm": 1.1159719228744507, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 161040 + }, + { + "epoch": 1059.5394736842106, + "grad_norm": 1.0388392210006714, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 161050 + }, + { + "epoch": 1059.6052631578948, + "grad_norm": 1.4326999187469482, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 161060 + }, + { + "epoch": 1059.671052631579, + "grad_norm": 0.9729658365249634, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 161070 + }, + { + "epoch": 1059.7368421052631, + "grad_norm": 0.9425380229949951, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 161080 + }, + { + "epoch": 1059.8026315789473, + "grad_norm": 0.8282036185264587, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 161090 + }, + { + "epoch": 1059.8684210526317, + "grad_norm": 0.782004714012146, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 161100 + }, + { + "epoch": 1059.9342105263158, + "grad_norm": 0.8745496273040771, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 161110 + }, + { + "epoch": 1060.0, + "grad_norm": 1.0883170366287231, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 161120 + }, + { + "epoch": 1060.0657894736842, + "grad_norm": 0.7825089693069458, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 161130 + }, + { + "epoch": 1060.1315789473683, + "grad_norm": 0.7291836738586426, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 161140 + }, + { + "epoch": 1060.1973684210527, + "grad_norm": 0.9234983325004578, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 161150 + }, + { + "epoch": 1060.2631578947369, + "grad_norm": 1.0557537078857422, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 161160 + }, + { + "epoch": 1060.328947368421, + "grad_norm": 1.190638542175293, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 161170 + }, + { + "epoch": 1060.3947368421052, + "grad_norm": 1.2052501440048218, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 161180 + }, + { + "epoch": 1060.4605263157894, + "grad_norm": 0.8857777714729309, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 161190 + }, + { + "epoch": 1060.5263157894738, + "grad_norm": 0.9795964956283569, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 161200 + }, + { + "epoch": 1060.592105263158, + "grad_norm": 1.1422735452651978, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 161210 + }, + { + "epoch": 1060.657894736842, + "grad_norm": 0.8671726584434509, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 161220 + }, + { + "epoch": 1060.7236842105262, + "grad_norm": 1.1022329330444336, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 161230 + }, + { + "epoch": 1060.7894736842106, + "grad_norm": 1.2287969589233398, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 161240 + }, + { + "epoch": 1060.8552631578948, + "grad_norm": 1.1893408298492432, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 161250 + }, + { + "epoch": 1060.921052631579, + "grad_norm": 1.085235595703125, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 161260 + }, + { + "epoch": 1060.9868421052631, + "grad_norm": 1.2641836404800415, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 161270 + }, + { + "epoch": 1061.0526315789473, + "grad_norm": 0.9672636985778809, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 161280 + }, + { + "epoch": 1061.1184210526317, + "grad_norm": 0.8965618014335632, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 161290 + }, + { + "epoch": 1061.1842105263158, + "grad_norm": 0.9255189299583435, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 161300 + }, + { + "epoch": 1061.25, + "grad_norm": 0.9481072425842285, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 161310 + }, + { + "epoch": 1061.3157894736842, + "grad_norm": 1.216856837272644, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 161320 + }, + { + "epoch": 1061.3815789473683, + "grad_norm": 1.3531123399734497, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 161330 + }, + { + "epoch": 1061.4473684210527, + "grad_norm": 0.8785354495048523, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 161340 + }, + { + "epoch": 1061.5131578947369, + "grad_norm": 1.1641191244125366, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 161350 + }, + { + "epoch": 1061.578947368421, + "grad_norm": 1.3598567247390747, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 161360 + }, + { + "epoch": 1061.6447368421052, + "grad_norm": 0.9496684670448303, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 161370 + }, + { + "epoch": 1061.7105263157894, + "grad_norm": 1.231241226196289, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 161380 + }, + { + "epoch": 1061.7763157894738, + "grad_norm": 1.052296757698059, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 161390 + }, + { + "epoch": 1061.842105263158, + "grad_norm": 0.9695555567741394, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 161400 + }, + { + "epoch": 1061.907894736842, + "grad_norm": 1.0984798669815063, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 161410 + }, + { + "epoch": 1061.9736842105262, + "grad_norm": 0.8881370425224304, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 161420 + }, + { + "epoch": 1062.0394736842106, + "grad_norm": 0.9678500294685364, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 161430 + }, + { + "epoch": 1062.1052631578948, + "grad_norm": 1.1650303602218628, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 161440 + }, + { + "epoch": 1062.171052631579, + "grad_norm": 1.0884886980056763, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 161450 + }, + { + "epoch": 1062.2368421052631, + "grad_norm": 0.9503740668296814, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 161460 + }, + { + "epoch": 1062.3026315789473, + "grad_norm": 0.8754977583885193, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 161470 + }, + { + "epoch": 1062.3684210526317, + "grad_norm": 1.0203204154968262, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 161480 + }, + { + "epoch": 1062.4342105263158, + "grad_norm": 0.9040229320526123, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 161490 + }, + { + "epoch": 1062.5, + "grad_norm": 1.0845030546188354, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 161500 + }, + { + "epoch": 1062.5657894736842, + "grad_norm": 0.950481116771698, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 161510 + }, + { + "epoch": 1062.6315789473683, + "grad_norm": 1.1162766218185425, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 161520 + }, + { + "epoch": 1062.6973684210527, + "grad_norm": 1.0046415328979492, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 161530 + }, + { + "epoch": 1062.7631578947369, + "grad_norm": 0.8391357064247131, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 161540 + }, + { + "epoch": 1062.828947368421, + "grad_norm": 0.9628133773803711, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 161550 + }, + { + "epoch": 1062.8947368421052, + "grad_norm": 0.9832002520561218, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 161560 + }, + { + "epoch": 1062.9605263157894, + "grad_norm": 1.2245110273361206, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 161570 + }, + { + "epoch": 1063.0263157894738, + "grad_norm": 1.3383867740631104, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 161580 + }, + { + "epoch": 1063.092105263158, + "grad_norm": 1.1894196271896362, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 161590 + }, + { + "epoch": 1063.157894736842, + "grad_norm": 1.3893300294876099, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 161600 + }, + { + "epoch": 1063.2236842105262, + "grad_norm": 0.8097324371337891, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 161610 + }, + { + "epoch": 1063.2894736842106, + "grad_norm": 0.9299864172935486, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 161620 + }, + { + "epoch": 1063.3552631578948, + "grad_norm": 1.031350016593933, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 161630 + }, + { + "epoch": 1063.421052631579, + "grad_norm": 1.079654574394226, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 161640 + }, + { + "epoch": 1063.4868421052631, + "grad_norm": 1.1577768325805664, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 161650 + }, + { + "epoch": 1063.5526315789473, + "grad_norm": 1.0904746055603027, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 161660 + }, + { + "epoch": 1063.6184210526317, + "grad_norm": 1.2395423650741577, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 161670 + }, + { + "epoch": 1063.6842105263158, + "grad_norm": 1.0951871871948242, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 161680 + }, + { + "epoch": 1063.75, + "grad_norm": 1.186826467514038, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 161690 + }, + { + "epoch": 1063.8157894736842, + "grad_norm": 0.7495224475860596, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 161700 + }, + { + "epoch": 1063.8815789473683, + "grad_norm": 1.0695480108261108, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 161710 + }, + { + "epoch": 1063.9473684210527, + "grad_norm": 0.743630051612854, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 161720 + }, + { + "epoch": 1064.0131578947369, + "grad_norm": 0.8561877608299255, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 161730 + }, + { + "epoch": 1064.078947368421, + "grad_norm": 0.8216092586517334, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 161740 + }, + { + "epoch": 1064.1447368421052, + "grad_norm": 0.8354560136795044, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 161750 + }, + { + "epoch": 1064.2105263157894, + "grad_norm": 1.2202868461608887, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 161760 + }, + { + "epoch": 1064.2763157894738, + "grad_norm": 0.7838713526725769, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 161770 + }, + { + "epoch": 1064.342105263158, + "grad_norm": 1.285569190979004, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 161780 + }, + { + "epoch": 1064.407894736842, + "grad_norm": 0.9280838370323181, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 161790 + }, + { + "epoch": 1064.4736842105262, + "grad_norm": 0.8232817649841309, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 161800 + }, + { + "epoch": 1064.5394736842106, + "grad_norm": 1.015890121459961, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 161810 + }, + { + "epoch": 1064.6052631578948, + "grad_norm": 0.8009679317474365, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 161820 + }, + { + "epoch": 1064.671052631579, + "grad_norm": 0.6812798976898193, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 161830 + }, + { + "epoch": 1064.7368421052631, + "grad_norm": 1.3212003707885742, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 161840 + }, + { + "epoch": 1064.8026315789473, + "grad_norm": 0.8784815669059753, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 161850 + }, + { + "epoch": 1064.8684210526317, + "grad_norm": 1.0952194929122925, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 161860 + }, + { + "epoch": 1064.9342105263158, + "grad_norm": 1.038833498954773, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 161870 + }, + { + "epoch": 1065.0, + "grad_norm": 1.1288541555404663, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 161880 + }, + { + "epoch": 1065.0657894736842, + "grad_norm": 1.0623427629470825, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 161890 + }, + { + "epoch": 1065.1315789473683, + "grad_norm": 1.2442352771759033, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 161900 + }, + { + "epoch": 1065.1973684210527, + "grad_norm": 1.1538206338882446, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 161910 + }, + { + "epoch": 1065.2631578947369, + "grad_norm": 1.1074141263961792, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 161920 + }, + { + "epoch": 1065.328947368421, + "grad_norm": 1.0948708057403564, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 161930 + }, + { + "epoch": 1065.3947368421052, + "grad_norm": 0.8019455671310425, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 161940 + }, + { + "epoch": 1065.4605263157894, + "grad_norm": 0.6959496736526489, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 161950 + }, + { + "epoch": 1065.5263157894738, + "grad_norm": 0.9164180755615234, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 161960 + }, + { + "epoch": 1065.592105263158, + "grad_norm": 1.390481948852539, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 161970 + }, + { + "epoch": 1065.657894736842, + "grad_norm": 1.1339566707611084, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 161980 + }, + { + "epoch": 1065.7236842105262, + "grad_norm": 0.8999402523040771, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 161990 + }, + { + "epoch": 1065.7894736842106, + "grad_norm": 1.1724854707717896, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 162000 + }, + { + "epoch": 1065.8552631578948, + "grad_norm": 1.1292765140533447, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 162010 + }, + { + "epoch": 1065.921052631579, + "grad_norm": 1.3481417894363403, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 162020 + }, + { + "epoch": 1065.9868421052631, + "grad_norm": 1.1352733373641968, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 162030 + }, + { + "epoch": 1066.0526315789473, + "grad_norm": 1.0196239948272705, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 162040 + }, + { + "epoch": 1066.1184210526317, + "grad_norm": 1.2338160276412964, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 162050 + }, + { + "epoch": 1066.1842105263158, + "grad_norm": 0.9089396595954895, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 162060 + }, + { + "epoch": 1066.25, + "grad_norm": 0.8388129472732544, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 162070 + }, + { + "epoch": 1066.3157894736842, + "grad_norm": 1.036230206489563, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 162080 + }, + { + "epoch": 1066.3815789473683, + "grad_norm": 0.9070473909378052, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 162090 + }, + { + "epoch": 1066.4473684210527, + "grad_norm": 1.3986698389053345, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 162100 + }, + { + "epoch": 1066.5131578947369, + "grad_norm": 1.2524380683898926, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 162110 + }, + { + "epoch": 1066.578947368421, + "grad_norm": 1.3897756338119507, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 162120 + }, + { + "epoch": 1066.6447368421052, + "grad_norm": 1.2259730100631714, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 162130 + }, + { + "epoch": 1066.7105263157894, + "grad_norm": 1.0453299283981323, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 162140 + }, + { + "epoch": 1066.7763157894738, + "grad_norm": 1.3548429012298584, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 162150 + }, + { + "epoch": 1066.842105263158, + "grad_norm": 0.7889332175254822, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 162160 + }, + { + "epoch": 1066.907894736842, + "grad_norm": 0.773144006729126, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 162170 + }, + { + "epoch": 1066.9736842105262, + "grad_norm": 0.8035649657249451, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 162180 + }, + { + "epoch": 1067.0394736842106, + "grad_norm": 0.7761321067810059, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 162190 + }, + { + "epoch": 1067.1052631578948, + "grad_norm": 0.8864235877990723, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 162200 + }, + { + "epoch": 1067.171052631579, + "grad_norm": 1.0513126850128174, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 162210 + }, + { + "epoch": 1067.2368421052631, + "grad_norm": 0.9010508060455322, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 162220 + }, + { + "epoch": 1067.3026315789473, + "grad_norm": 1.1405491828918457, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 162230 + }, + { + "epoch": 1067.3684210526317, + "grad_norm": 0.7161151170730591, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 162240 + }, + { + "epoch": 1067.4342105263158, + "grad_norm": 0.7078927755355835, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 162250 + }, + { + "epoch": 1067.5, + "grad_norm": 0.9383971095085144, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 162260 + }, + { + "epoch": 1067.5657894736842, + "grad_norm": 0.7436659336090088, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 162270 + }, + { + "epoch": 1067.6315789473683, + "grad_norm": 0.8315485715866089, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 162280 + }, + { + "epoch": 1067.6973684210527, + "grad_norm": 1.013370156288147, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 162290 + }, + { + "epoch": 1067.7631578947369, + "grad_norm": 1.2187751531600952, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 162300 + }, + { + "epoch": 1067.828947368421, + "grad_norm": 1.2132238149642944, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 162310 + }, + { + "epoch": 1067.8947368421052, + "grad_norm": 1.1550824642181396, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 162320 + }, + { + "epoch": 1067.9605263157894, + "grad_norm": 1.193505883216858, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 162330 + }, + { + "epoch": 1068.0263157894738, + "grad_norm": 0.9410684108734131, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 162340 + }, + { + "epoch": 1068.092105263158, + "grad_norm": 1.1005580425262451, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 162350 + }, + { + "epoch": 1068.157894736842, + "grad_norm": 1.2267907857894897, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 162360 + }, + { + "epoch": 1068.2236842105262, + "grad_norm": 1.1919841766357422, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 162370 + }, + { + "epoch": 1068.2894736842106, + "grad_norm": 1.14139986038208, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 162380 + }, + { + "epoch": 1068.3552631578948, + "grad_norm": 0.8994879722595215, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 162390 + }, + { + "epoch": 1068.421052631579, + "grad_norm": 1.3189268112182617, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 162400 + }, + { + "epoch": 1068.4868421052631, + "grad_norm": 1.0653045177459717, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 162410 + }, + { + "epoch": 1068.5526315789473, + "grad_norm": 1.248468041419983, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 162420 + }, + { + "epoch": 1068.6184210526317, + "grad_norm": 1.4759578704833984, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 162430 + }, + { + "epoch": 1068.6842105263158, + "grad_norm": 1.3343039751052856, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 162440 + }, + { + "epoch": 1068.75, + "grad_norm": 0.91098952293396, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 162450 + }, + { + "epoch": 1068.8157894736842, + "grad_norm": 1.612722635269165, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 162460 + }, + { + "epoch": 1068.8815789473683, + "grad_norm": 1.5344117879867554, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 162470 + }, + { + "epoch": 1068.9473684210527, + "grad_norm": 1.2199701070785522, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 162480 + }, + { + "epoch": 1069.0131578947369, + "grad_norm": 1.03998601436615, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 162490 + }, + { + "epoch": 1069.078947368421, + "grad_norm": 1.0132770538330078, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 162500 + }, + { + "epoch": 1069.1447368421052, + "grad_norm": 1.1793491840362549, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 162510 + }, + { + "epoch": 1069.2105263157894, + "grad_norm": 1.0435994863510132, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 162520 + }, + { + "epoch": 1069.2763157894738, + "grad_norm": 0.7933632135391235, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 162530 + }, + { + "epoch": 1069.342105263158, + "grad_norm": 1.094799280166626, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 162540 + }, + { + "epoch": 1069.407894736842, + "grad_norm": 1.0293395519256592, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 162550 + }, + { + "epoch": 1069.4736842105262, + "grad_norm": 0.8514150381088257, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 162560 + }, + { + "epoch": 1069.5394736842106, + "grad_norm": 1.075880765914917, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 162570 + }, + { + "epoch": 1069.6052631578948, + "grad_norm": 0.8023967742919922, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 162580 + }, + { + "epoch": 1069.671052631579, + "grad_norm": 0.9331603050231934, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 162590 + }, + { + "epoch": 1069.7368421052631, + "grad_norm": 0.8573759198188782, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 162600 + }, + { + "epoch": 1069.8026315789473, + "grad_norm": 1.105576515197754, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 162610 + }, + { + "epoch": 1069.8684210526317, + "grad_norm": 1.3647798299789429, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 162620 + }, + { + "epoch": 1069.9342105263158, + "grad_norm": 0.9643642902374268, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 162630 + }, + { + "epoch": 1070.0, + "grad_norm": 1.3293102979660034, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 162640 + }, + { + "epoch": 1070.0657894736842, + "grad_norm": 1.421432375907898, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 162650 + }, + { + "epoch": 1070.1315789473683, + "grad_norm": 1.0988101959228516, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 162660 + }, + { + "epoch": 1070.1973684210527, + "grad_norm": 1.185173511505127, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 162670 + }, + { + "epoch": 1070.2631578947369, + "grad_norm": 0.96378093957901, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 162680 + }, + { + "epoch": 1070.328947368421, + "grad_norm": 1.0372625589370728, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 162690 + }, + { + "epoch": 1070.3947368421052, + "grad_norm": 0.8378893136978149, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 162700 + }, + { + "epoch": 1070.4605263157894, + "grad_norm": 1.0191593170166016, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 162710 + }, + { + "epoch": 1070.5263157894738, + "grad_norm": 1.2982977628707886, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 162720 + }, + { + "epoch": 1070.592105263158, + "grad_norm": 1.0704700946807861, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 162730 + }, + { + "epoch": 1070.657894736842, + "grad_norm": 1.0267466306686401, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 162740 + }, + { + "epoch": 1070.7236842105262, + "grad_norm": 1.131195068359375, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 162750 + }, + { + "epoch": 1070.7894736842106, + "grad_norm": 0.9964534044265747, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 162760 + }, + { + "epoch": 1070.8552631578948, + "grad_norm": 1.135125994682312, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 162770 + }, + { + "epoch": 1070.921052631579, + "grad_norm": 0.5625234842300415, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 162780 + }, + { + "epoch": 1070.9868421052631, + "grad_norm": 1.0314369201660156, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 162790 + }, + { + "epoch": 1071.0526315789473, + "grad_norm": 1.1041908264160156, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 162800 + }, + { + "epoch": 1071.1184210526317, + "grad_norm": 1.4432227611541748, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 162810 + }, + { + "epoch": 1071.1842105263158, + "grad_norm": 1.1122026443481445, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 162820 + }, + { + "epoch": 1071.25, + "grad_norm": 0.8845628499984741, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 162830 + }, + { + "epoch": 1071.3157894736842, + "grad_norm": 0.9934724569320679, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 162840 + }, + { + "epoch": 1071.3815789473683, + "grad_norm": 1.2053556442260742, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 162850 + }, + { + "epoch": 1071.4473684210527, + "grad_norm": 1.0750372409820557, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 162860 + }, + { + "epoch": 1071.5131578947369, + "grad_norm": 1.1821308135986328, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 162870 + }, + { + "epoch": 1071.578947368421, + "grad_norm": 0.8854652047157288, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 162880 + }, + { + "epoch": 1071.6447368421052, + "grad_norm": 0.9452785849571228, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 162890 + }, + { + "epoch": 1071.7105263157894, + "grad_norm": 0.4714410901069641, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 162900 + }, + { + "epoch": 1071.7763157894738, + "grad_norm": 0.9950037598609924, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 162910 + }, + { + "epoch": 1071.842105263158, + "grad_norm": 1.0008114576339722, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 162920 + }, + { + "epoch": 1071.907894736842, + "grad_norm": 0.9882568120956421, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 162930 + }, + { + "epoch": 1071.9736842105262, + "grad_norm": 0.9975512623786926, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 162940 + }, + { + "epoch": 1072.0394736842106, + "grad_norm": 0.9865131974220276, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 162950 + }, + { + "epoch": 1072.1052631578948, + "grad_norm": 0.8073837757110596, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 162960 + }, + { + "epoch": 1072.171052631579, + "grad_norm": 1.311487078666687, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 162970 + }, + { + "epoch": 1072.2368421052631, + "grad_norm": 1.1827518939971924, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 162980 + }, + { + "epoch": 1072.3026315789473, + "grad_norm": 1.183228611946106, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 162990 + }, + { + "epoch": 1072.3684210526317, + "grad_norm": 1.4253993034362793, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 163000 + }, + { + "epoch": 1072.4342105263158, + "grad_norm": 1.0491241216659546, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 163010 + }, + { + "epoch": 1072.5, + "grad_norm": 1.536728858947754, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 163020 + }, + { + "epoch": 1072.5657894736842, + "grad_norm": 0.9010704159736633, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 163030 + }, + { + "epoch": 1072.6315789473683, + "grad_norm": 0.9637726545333862, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 163040 + }, + { + "epoch": 1072.6973684210527, + "grad_norm": 1.410753846168518, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 163050 + }, + { + "epoch": 1072.7631578947369, + "grad_norm": 1.4014109373092651, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 163060 + }, + { + "epoch": 1072.828947368421, + "grad_norm": 1.3508261442184448, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 163070 + }, + { + "epoch": 1072.8947368421052, + "grad_norm": 1.2983468770980835, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 163080 + }, + { + "epoch": 1072.9605263157894, + "grad_norm": 1.4878889322280884, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 163090 + }, + { + "epoch": 1073.0263157894738, + "grad_norm": 1.155154824256897, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 163100 + }, + { + "epoch": 1073.092105263158, + "grad_norm": 1.2900110483169556, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 163110 + }, + { + "epoch": 1073.157894736842, + "grad_norm": 0.711246907711029, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 163120 + }, + { + "epoch": 1073.2236842105262, + "grad_norm": 1.4569929838180542, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 163130 + }, + { + "epoch": 1073.2894736842106, + "grad_norm": 1.12445068359375, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 163140 + }, + { + "epoch": 1073.3552631578948, + "grad_norm": 0.9855101704597473, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 163150 + }, + { + "epoch": 1073.421052631579, + "grad_norm": 0.9957090020179749, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 163160 + }, + { + "epoch": 1073.4868421052631, + "grad_norm": 0.8900773525238037, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 163170 + }, + { + "epoch": 1073.5526315789473, + "grad_norm": 0.8382232785224915, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 163180 + }, + { + "epoch": 1073.6184210526317, + "grad_norm": 1.113542914390564, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 163190 + }, + { + "epoch": 1073.6842105263158, + "grad_norm": 1.017556071281433, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 163200 + }, + { + "epoch": 1073.75, + "grad_norm": 1.1955180168151855, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 163210 + }, + { + "epoch": 1073.8157894736842, + "grad_norm": 1.223476529121399, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 163220 + }, + { + "epoch": 1073.8815789473683, + "grad_norm": 0.9942741394042969, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 163230 + }, + { + "epoch": 1073.9473684210527, + "grad_norm": 1.0802150964736938, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 163240 + }, + { + "epoch": 1074.0131578947369, + "grad_norm": 0.9715554714202881, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 163250 + }, + { + "epoch": 1074.078947368421, + "grad_norm": 1.1486918926239014, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 163260 + }, + { + "epoch": 1074.1447368421052, + "grad_norm": 1.1075643301010132, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 163270 + }, + { + "epoch": 1074.2105263157894, + "grad_norm": 1.1682664155960083, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 163280 + }, + { + "epoch": 1074.2763157894738, + "grad_norm": 1.3140562772750854, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 163290 + }, + { + "epoch": 1074.342105263158, + "grad_norm": 1.2166996002197266, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 163300 + }, + { + "epoch": 1074.407894736842, + "grad_norm": 1.1994695663452148, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 163310 + }, + { + "epoch": 1074.4736842105262, + "grad_norm": 0.7714230418205261, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 163320 + }, + { + "epoch": 1074.5394736842106, + "grad_norm": 0.611962080001831, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 163330 + }, + { + "epoch": 1074.6052631578948, + "grad_norm": 0.8996819853782654, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 163340 + }, + { + "epoch": 1074.671052631579, + "grad_norm": 0.9892165064811707, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 163350 + }, + { + "epoch": 1074.7368421052631, + "grad_norm": 1.245927333831787, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 163360 + }, + { + "epoch": 1074.8026315789473, + "grad_norm": 1.5583741664886475, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 163370 + }, + { + "epoch": 1074.8684210526317, + "grad_norm": 1.4896721839904785, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 163380 + }, + { + "epoch": 1074.9342105263158, + "grad_norm": 1.5544887781143188, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 163390 + }, + { + "epoch": 1075.0, + "grad_norm": 1.562259554862976, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 163400 + }, + { + "epoch": 1075.0657894736842, + "grad_norm": 1.1588703393936157, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 163410 + }, + { + "epoch": 1075.1315789473683, + "grad_norm": 1.3577895164489746, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 163420 + }, + { + "epoch": 1075.1973684210527, + "grad_norm": 1.0716686248779297, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 163430 + }, + { + "epoch": 1075.2631578947369, + "grad_norm": 1.1159995794296265, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 163440 + }, + { + "epoch": 1075.328947368421, + "grad_norm": 1.1047301292419434, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 163450 + }, + { + "epoch": 1075.3947368421052, + "grad_norm": 1.001532793045044, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 163460 + }, + { + "epoch": 1075.4605263157894, + "grad_norm": 1.4024605751037598, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 163470 + }, + { + "epoch": 1075.5263157894738, + "grad_norm": 1.3084982633590698, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 163480 + }, + { + "epoch": 1075.592105263158, + "grad_norm": 1.0299196243286133, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 163490 + }, + { + "epoch": 1075.657894736842, + "grad_norm": 0.941424548625946, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 163500 + }, + { + "epoch": 1075.7236842105262, + "grad_norm": 1.2184175252914429, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 163510 + }, + { + "epoch": 1075.7894736842106, + "grad_norm": 1.1736630201339722, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 163520 + }, + { + "epoch": 1075.8552631578948, + "grad_norm": 0.897025465965271, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 163530 + }, + { + "epoch": 1075.921052631579, + "grad_norm": 0.8720511198043823, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 163540 + }, + { + "epoch": 1075.9868421052631, + "grad_norm": 1.0315616130828857, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 163550 + }, + { + "epoch": 1076.0526315789473, + "grad_norm": 1.194000005722046, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 163560 + }, + { + "epoch": 1076.1184210526317, + "grad_norm": 1.108793020248413, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 163570 + }, + { + "epoch": 1076.1842105263158, + "grad_norm": 1.059388518333435, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 163580 + }, + { + "epoch": 1076.25, + "grad_norm": 1.1141692399978638, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 163590 + }, + { + "epoch": 1076.3157894736842, + "grad_norm": 0.8344478011131287, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 163600 + }, + { + "epoch": 1076.3815789473683, + "grad_norm": 1.0367026329040527, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 163610 + }, + { + "epoch": 1076.4473684210527, + "grad_norm": 1.339621663093567, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 163620 + }, + { + "epoch": 1076.5131578947369, + "grad_norm": 1.3752906322479248, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 163630 + }, + { + "epoch": 1076.578947368421, + "grad_norm": 0.8494938015937805, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 163640 + }, + { + "epoch": 1076.6447368421052, + "grad_norm": 0.6599777936935425, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 163650 + }, + { + "epoch": 1076.7105263157894, + "grad_norm": 1.3162485361099243, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 163660 + }, + { + "epoch": 1076.7763157894738, + "grad_norm": 0.9044468998908997, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 163670 + }, + { + "epoch": 1076.842105263158, + "grad_norm": 0.8075153231620789, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 163680 + }, + { + "epoch": 1076.907894736842, + "grad_norm": 1.406198501586914, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 163690 + }, + { + "epoch": 1076.9736842105262, + "grad_norm": 1.080063819885254, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 163700 + }, + { + "epoch": 1077.0394736842106, + "grad_norm": 1.1820083856582642, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 163710 + }, + { + "epoch": 1077.1052631578948, + "grad_norm": 1.0260475873947144, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 163720 + }, + { + "epoch": 1077.171052631579, + "grad_norm": 1.063698410987854, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 163730 + }, + { + "epoch": 1077.2368421052631, + "grad_norm": 0.7983181476593018, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 163740 + }, + { + "epoch": 1077.3026315789473, + "grad_norm": 0.8934413194656372, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 163750 + }, + { + "epoch": 1077.3684210526317, + "grad_norm": 0.701677680015564, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 163760 + }, + { + "epoch": 1077.4342105263158, + "grad_norm": 0.9841487407684326, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 163770 + }, + { + "epoch": 1077.5, + "grad_norm": 0.8910136222839355, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 163780 + }, + { + "epoch": 1077.5657894736842, + "grad_norm": 1.0197700262069702, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 163790 + }, + { + "epoch": 1077.6315789473683, + "grad_norm": 1.3116474151611328, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 163800 + }, + { + "epoch": 1077.6973684210527, + "grad_norm": 0.8362675905227661, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 163810 + }, + { + "epoch": 1077.7631578947369, + "grad_norm": 0.6043428182601929, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 163820 + }, + { + "epoch": 1077.828947368421, + "grad_norm": 1.1942695379257202, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 163830 + }, + { + "epoch": 1077.8947368421052, + "grad_norm": 0.9171984195709229, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 163840 + }, + { + "epoch": 1077.9605263157894, + "grad_norm": 1.100325584411621, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 163850 + }, + { + "epoch": 1078.0263157894738, + "grad_norm": 1.150246024131775, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 163860 + }, + { + "epoch": 1078.092105263158, + "grad_norm": 0.9796252250671387, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 163870 + }, + { + "epoch": 1078.157894736842, + "grad_norm": 0.7340827584266663, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 163880 + }, + { + "epoch": 1078.2236842105262, + "grad_norm": 0.9303230047225952, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 163890 + }, + { + "epoch": 1078.2894736842106, + "grad_norm": 0.9361478090286255, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 163900 + }, + { + "epoch": 1078.3552631578948, + "grad_norm": 1.0189319849014282, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 163910 + }, + { + "epoch": 1078.421052631579, + "grad_norm": 1.059217929840088, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 163920 + }, + { + "epoch": 1078.4868421052631, + "grad_norm": 1.3725210428237915, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 163930 + }, + { + "epoch": 1078.5526315789473, + "grad_norm": 1.200869083404541, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 163940 + }, + { + "epoch": 1078.6184210526317, + "grad_norm": 1.561780571937561, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 163950 + }, + { + "epoch": 1078.6842105263158, + "grad_norm": 1.174270510673523, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 163960 + }, + { + "epoch": 1078.75, + "grad_norm": 1.1414722204208374, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 163970 + }, + { + "epoch": 1078.8157894736842, + "grad_norm": 1.0247485637664795, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 163980 + }, + { + "epoch": 1078.8815789473683, + "grad_norm": 0.9565194249153137, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 163990 + }, + { + "epoch": 1078.9473684210527, + "grad_norm": 0.9359107613563538, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 164000 + }, + { + "epoch": 1079.0131578947369, + "grad_norm": 0.8053498268127441, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 164010 + }, + { + "epoch": 1079.078947368421, + "grad_norm": 1.111828088760376, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 164020 + }, + { + "epoch": 1079.1447368421052, + "grad_norm": 0.9666885137557983, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 164030 + }, + { + "epoch": 1079.2105263157894, + "grad_norm": 1.089706301689148, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 164040 + }, + { + "epoch": 1079.2763157894738, + "grad_norm": 1.2634003162384033, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 164050 + }, + { + "epoch": 1079.342105263158, + "grad_norm": 1.2256748676300049, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 164060 + }, + { + "epoch": 1079.407894736842, + "grad_norm": 1.0512341260910034, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 164070 + }, + { + "epoch": 1079.4736842105262, + "grad_norm": 1.1967103481292725, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 164080 + }, + { + "epoch": 1079.5394736842106, + "grad_norm": 0.9088690876960754, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 164090 + }, + { + "epoch": 1079.6052631578948, + "grad_norm": 0.8924160599708557, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 164100 + }, + { + "epoch": 1079.671052631579, + "grad_norm": 0.8244693875312805, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 164110 + }, + { + "epoch": 1079.7368421052631, + "grad_norm": 1.0724085569381714, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 164120 + }, + { + "epoch": 1079.8026315789473, + "grad_norm": 0.8203881978988647, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 164130 + }, + { + "epoch": 1079.8684210526317, + "grad_norm": 0.8975365161895752, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 164140 + }, + { + "epoch": 1079.9342105263158, + "grad_norm": 1.132312297821045, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 164150 + }, + { + "epoch": 1080.0, + "grad_norm": 1.0990959405899048, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 164160 + }, + { + "epoch": 1080.0657894736842, + "grad_norm": 0.9807849526405334, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 164170 + }, + { + "epoch": 1080.1315789473683, + "grad_norm": 1.4156996011734009, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 164180 + }, + { + "epoch": 1080.1973684210527, + "grad_norm": 1.217441439628601, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 164190 + }, + { + "epoch": 1080.2631578947369, + "grad_norm": 1.1044880151748657, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 164200 + }, + { + "epoch": 1080.328947368421, + "grad_norm": 1.1214570999145508, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 164210 + }, + { + "epoch": 1080.3947368421052, + "grad_norm": 0.7764980792999268, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 164220 + }, + { + "epoch": 1080.4605263157894, + "grad_norm": 1.1083272695541382, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 164230 + }, + { + "epoch": 1080.5263157894738, + "grad_norm": 0.908808171749115, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 164240 + }, + { + "epoch": 1080.592105263158, + "grad_norm": 0.9142073392868042, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 164250 + }, + { + "epoch": 1080.657894736842, + "grad_norm": 0.9220647215843201, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 164260 + }, + { + "epoch": 1080.7236842105262, + "grad_norm": 0.9497620463371277, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 164270 + }, + { + "epoch": 1080.7894736842106, + "grad_norm": 0.9928162693977356, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 164280 + }, + { + "epoch": 1080.8552631578948, + "grad_norm": 1.177316427230835, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 164290 + }, + { + "epoch": 1080.921052631579, + "grad_norm": 1.1457185745239258, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 164300 + }, + { + "epoch": 1080.9868421052631, + "grad_norm": 0.5666669011116028, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 164310 + }, + { + "epoch": 1081.0526315789473, + "grad_norm": 0.9142729640007019, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 164320 + }, + { + "epoch": 1081.1184210526317, + "grad_norm": 0.9657456874847412, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 164330 + }, + { + "epoch": 1081.1842105263158, + "grad_norm": 0.9326075315475464, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 164340 + }, + { + "epoch": 1081.25, + "grad_norm": 0.8688045740127563, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 164350 + }, + { + "epoch": 1081.3157894736842, + "grad_norm": 1.195733666419983, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 164360 + }, + { + "epoch": 1081.3815789473683, + "grad_norm": 1.1410993337631226, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 164370 + }, + { + "epoch": 1081.4473684210527, + "grad_norm": 0.9133111238479614, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 164380 + }, + { + "epoch": 1081.5131578947369, + "grad_norm": 0.9332903027534485, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 164390 + }, + { + "epoch": 1081.578947368421, + "grad_norm": 1.2633785009384155, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 164400 + }, + { + "epoch": 1081.6447368421052, + "grad_norm": 1.1889288425445557, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 164410 + }, + { + "epoch": 1081.7105263157894, + "grad_norm": 0.9122311472892761, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 164420 + }, + { + "epoch": 1081.7763157894738, + "grad_norm": 0.9126380085945129, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 164430 + }, + { + "epoch": 1081.842105263158, + "grad_norm": 0.9555377960205078, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 164440 + }, + { + "epoch": 1081.907894736842, + "grad_norm": 1.0733895301818848, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 164450 + }, + { + "epoch": 1081.9736842105262, + "grad_norm": 1.0887807607650757, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 164460 + }, + { + "epoch": 1082.0394736842106, + "grad_norm": 0.8500633835792542, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 164470 + }, + { + "epoch": 1082.1052631578948, + "grad_norm": 0.6595375537872314, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 164480 + }, + { + "epoch": 1082.171052631579, + "grad_norm": 1.3263654708862305, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 164490 + }, + { + "epoch": 1082.2368421052631, + "grad_norm": 0.9079999327659607, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 164500 + }, + { + "epoch": 1082.3026315789473, + "grad_norm": 0.9589836001396179, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 164510 + }, + { + "epoch": 1082.3684210526317, + "grad_norm": 1.1960152387619019, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 164520 + }, + { + "epoch": 1082.4342105263158, + "grad_norm": 0.9973219037055969, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 164530 + }, + { + "epoch": 1082.5, + "grad_norm": 1.1780554056167603, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 164540 + }, + { + "epoch": 1082.5657894736842, + "grad_norm": 1.2360888719558716, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 164550 + }, + { + "epoch": 1082.6315789473683, + "grad_norm": 1.0589853525161743, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 164560 + }, + { + "epoch": 1082.6973684210527, + "grad_norm": 1.2129298448562622, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 164570 + }, + { + "epoch": 1082.7631578947369, + "grad_norm": 0.8589812517166138, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 164580 + }, + { + "epoch": 1082.828947368421, + "grad_norm": 1.226012110710144, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 164590 + }, + { + "epoch": 1082.8947368421052, + "grad_norm": 0.921205461025238, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 164600 + }, + { + "epoch": 1082.9605263157894, + "grad_norm": 0.9451828598976135, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 164610 + }, + { + "epoch": 1083.0263157894738, + "grad_norm": 0.8518921732902527, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 164620 + }, + { + "epoch": 1083.092105263158, + "grad_norm": 0.7957374453544617, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 164630 + }, + { + "epoch": 1083.157894736842, + "grad_norm": 0.6601923108100891, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 164640 + }, + { + "epoch": 1083.2236842105262, + "grad_norm": 0.8923590779304504, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 164650 + }, + { + "epoch": 1083.2894736842106, + "grad_norm": 1.051577091217041, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 164660 + }, + { + "epoch": 1083.3552631578948, + "grad_norm": 1.3525593280792236, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 164670 + }, + { + "epoch": 1083.421052631579, + "grad_norm": 1.278694987297058, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 164680 + }, + { + "epoch": 1083.4868421052631, + "grad_norm": 1.5232925415039062, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 164690 + }, + { + "epoch": 1083.5526315789473, + "grad_norm": 1.3283989429473877, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 164700 + }, + { + "epoch": 1083.6184210526317, + "grad_norm": 1.3318917751312256, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 164710 + }, + { + "epoch": 1083.6842105263158, + "grad_norm": 1.2124128341674805, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 164720 + }, + { + "epoch": 1083.75, + "grad_norm": 1.336235761642456, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 164730 + }, + { + "epoch": 1083.8157894736842, + "grad_norm": 0.9546015858650208, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 164740 + }, + { + "epoch": 1083.8815789473683, + "grad_norm": 0.8885062336921692, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 164750 + }, + { + "epoch": 1083.9473684210527, + "grad_norm": 1.1417690515518188, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 164760 + }, + { + "epoch": 1084.0131578947369, + "grad_norm": 0.9051141738891602, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 164770 + }, + { + "epoch": 1084.078947368421, + "grad_norm": 1.195184588432312, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 164780 + }, + { + "epoch": 1084.1447368421052, + "grad_norm": 1.2856130599975586, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 164790 + }, + { + "epoch": 1084.2105263157894, + "grad_norm": 0.9908321499824524, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 164800 + }, + { + "epoch": 1084.2763157894738, + "grad_norm": 1.1323515176773071, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 164810 + }, + { + "epoch": 1084.342105263158, + "grad_norm": 1.0925931930541992, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 164820 + }, + { + "epoch": 1084.407894736842, + "grad_norm": 0.8730666637420654, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 164830 + }, + { + "epoch": 1084.4736842105262, + "grad_norm": 1.1487864255905151, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 164840 + }, + { + "epoch": 1084.5394736842106, + "grad_norm": 0.6327576041221619, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 164850 + }, + { + "epoch": 1084.6052631578948, + "grad_norm": 1.2673507928848267, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 164860 + }, + { + "epoch": 1084.671052631579, + "grad_norm": 1.0878818035125732, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 164870 + }, + { + "epoch": 1084.7368421052631, + "grad_norm": 1.1536747217178345, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 164880 + }, + { + "epoch": 1084.8026315789473, + "grad_norm": 1.274584174156189, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 164890 + }, + { + "epoch": 1084.8684210526317, + "grad_norm": 1.102174162864685, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 164900 + }, + { + "epoch": 1084.9342105263158, + "grad_norm": 0.8976937532424927, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 164910 + }, + { + "epoch": 1085.0, + "grad_norm": 1.2239928245544434, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 164920 + }, + { + "epoch": 1085.0657894736842, + "grad_norm": 0.8770923614501953, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 164930 + }, + { + "epoch": 1085.1315789473683, + "grad_norm": 0.9570636749267578, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 164940 + }, + { + "epoch": 1085.1973684210527, + "grad_norm": 0.9466798901557922, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 164950 + }, + { + "epoch": 1085.2631578947369, + "grad_norm": 0.897685170173645, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 164960 + }, + { + "epoch": 1085.328947368421, + "grad_norm": 0.710432767868042, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 164970 + }, + { + "epoch": 1085.3947368421052, + "grad_norm": 0.8062248826026917, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 164980 + }, + { + "epoch": 1085.4605263157894, + "grad_norm": 0.7032548785209656, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 164990 + }, + { + "epoch": 1085.5263157894738, + "grad_norm": 1.0174980163574219, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 165000 + }, + { + "epoch": 1085.592105263158, + "grad_norm": 0.7526353597640991, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 165010 + }, + { + "epoch": 1085.657894736842, + "grad_norm": 0.815987229347229, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 165020 + }, + { + "epoch": 1085.7236842105262, + "grad_norm": 0.6938906908035278, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 165030 + }, + { + "epoch": 1085.7894736842106, + "grad_norm": 1.025226354598999, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 165040 + }, + { + "epoch": 1085.8552631578948, + "grad_norm": 1.1357896327972412, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 165050 + }, + { + "epoch": 1085.921052631579, + "grad_norm": 1.0124887228012085, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 165060 + }, + { + "epoch": 1085.9868421052631, + "grad_norm": 0.9484216570854187, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 165070 + }, + { + "epoch": 1086.0526315789473, + "grad_norm": 1.2238960266113281, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 165080 + }, + { + "epoch": 1086.1184210526317, + "grad_norm": 1.4172849655151367, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 165090 + }, + { + "epoch": 1086.1842105263158, + "grad_norm": 1.292891263961792, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 165100 + }, + { + "epoch": 1086.25, + "grad_norm": 1.20212984085083, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 165110 + }, + { + "epoch": 1086.3157894736842, + "grad_norm": 1.3747832775115967, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 165120 + }, + { + "epoch": 1086.3815789473683, + "grad_norm": 1.1830034255981445, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 165130 + }, + { + "epoch": 1086.4473684210527, + "grad_norm": 1.2273931503295898, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 165140 + }, + { + "epoch": 1086.5131578947369, + "grad_norm": 0.9580655097961426, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 165150 + }, + { + "epoch": 1086.578947368421, + "grad_norm": 1.0733976364135742, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 165160 + }, + { + "epoch": 1086.6447368421052, + "grad_norm": 1.256765604019165, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 165170 + }, + { + "epoch": 1086.7105263157894, + "grad_norm": 0.8285326361656189, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 165180 + }, + { + "epoch": 1086.7763157894738, + "grad_norm": 0.8502132296562195, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 165190 + }, + { + "epoch": 1086.842105263158, + "grad_norm": 0.5578171610832214, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 165200 + }, + { + "epoch": 1086.907894736842, + "grad_norm": 0.8829189538955688, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 165210 + }, + { + "epoch": 1086.9736842105262, + "grad_norm": 0.8555329442024231, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 165220 + }, + { + "epoch": 1087.0394736842106, + "grad_norm": 1.1692776679992676, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 165230 + }, + { + "epoch": 1087.1052631578948, + "grad_norm": 0.903022825717926, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 165240 + }, + { + "epoch": 1087.171052631579, + "grad_norm": 0.9635505676269531, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 165250 + }, + { + "epoch": 1087.2368421052631, + "grad_norm": 1.0092943906784058, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 165260 + }, + { + "epoch": 1087.3026315789473, + "grad_norm": 0.9053812026977539, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 165270 + }, + { + "epoch": 1087.3684210526317, + "grad_norm": 1.0822906494140625, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 165280 + }, + { + "epoch": 1087.4342105263158, + "grad_norm": 0.9517032504081726, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 165290 + }, + { + "epoch": 1087.5, + "grad_norm": 0.995897114276886, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 165300 + }, + { + "epoch": 1087.5657894736842, + "grad_norm": 1.121558427810669, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 165310 + }, + { + "epoch": 1087.6315789473683, + "grad_norm": 0.905768632888794, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 165320 + }, + { + "epoch": 1087.6973684210527, + "grad_norm": 0.9619491100311279, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 165330 + }, + { + "epoch": 1087.7631578947369, + "grad_norm": 0.804125964641571, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 165340 + }, + { + "epoch": 1087.828947368421, + "grad_norm": 0.9737436175346375, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 165350 + }, + { + "epoch": 1087.8947368421052, + "grad_norm": 1.1739447116851807, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 165360 + }, + { + "epoch": 1087.9605263157894, + "grad_norm": 1.081647515296936, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 165370 + }, + { + "epoch": 1088.0263157894738, + "grad_norm": 1.38816499710083, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 165380 + }, + { + "epoch": 1088.092105263158, + "grad_norm": 1.0949757099151611, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 165390 + }, + { + "epoch": 1088.157894736842, + "grad_norm": 1.0412479639053345, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 165400 + }, + { + "epoch": 1088.2236842105262, + "grad_norm": 1.2765264511108398, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 165410 + }, + { + "epoch": 1088.2894736842106, + "grad_norm": 1.1357839107513428, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 165420 + }, + { + "epoch": 1088.3552631578948, + "grad_norm": 1.2164040803909302, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 165430 + }, + { + "epoch": 1088.421052631579, + "grad_norm": 1.2040680646896362, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 165440 + }, + { + "epoch": 1088.4868421052631, + "grad_norm": 1.342111349105835, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 165450 + }, + { + "epoch": 1088.5526315789473, + "grad_norm": 1.203928828239441, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 165460 + }, + { + "epoch": 1088.6184210526317, + "grad_norm": 1.0916163921356201, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 165470 + }, + { + "epoch": 1088.6842105263158, + "grad_norm": 0.970431387424469, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 165480 + }, + { + "epoch": 1088.75, + "grad_norm": 1.1007429361343384, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 165490 + }, + { + "epoch": 1088.8157894736842, + "grad_norm": 1.2328646183013916, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 165500 + }, + { + "epoch": 1088.8815789473683, + "grad_norm": 1.3475733995437622, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 165510 + }, + { + "epoch": 1088.9473684210527, + "grad_norm": 1.0184584856033325, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 165520 + }, + { + "epoch": 1089.0131578947369, + "grad_norm": 0.9720950722694397, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 165530 + }, + { + "epoch": 1089.078947368421, + "grad_norm": 1.327864646911621, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 165540 + }, + { + "epoch": 1089.1447368421052, + "grad_norm": 0.9376356601715088, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 165550 + }, + { + "epoch": 1089.2105263157894, + "grad_norm": 1.0245535373687744, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 165560 + }, + { + "epoch": 1089.2763157894738, + "grad_norm": 1.088114857673645, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 165570 + }, + { + "epoch": 1089.342105263158, + "grad_norm": 0.981323778629303, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 165580 + }, + { + "epoch": 1089.407894736842, + "grad_norm": 1.1533432006835938, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 165590 + }, + { + "epoch": 1089.4736842105262, + "grad_norm": 0.8093254566192627, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 165600 + }, + { + "epoch": 1089.5394736842106, + "grad_norm": 0.9100672006607056, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 165610 + }, + { + "epoch": 1089.6052631578948, + "grad_norm": 0.8746789693832397, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 165620 + }, + { + "epoch": 1089.671052631579, + "grad_norm": 1.4913291931152344, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 165630 + }, + { + "epoch": 1089.7368421052631, + "grad_norm": 1.1756936311721802, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 165640 + }, + { + "epoch": 1089.8026315789473, + "grad_norm": 1.144705891609192, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 165650 + }, + { + "epoch": 1089.8684210526317, + "grad_norm": 1.0096395015716553, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 165660 + }, + { + "epoch": 1089.9342105263158, + "grad_norm": 1.4962252378463745, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 165670 + }, + { + "epoch": 1090.0, + "grad_norm": 1.0682179927825928, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 165680 + }, + { + "epoch": 1090.0657894736842, + "grad_norm": 1.1246052980422974, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 165690 + }, + { + "epoch": 1090.1315789473683, + "grad_norm": 1.1388136148452759, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 165700 + }, + { + "epoch": 1090.1973684210527, + "grad_norm": 1.017364740371704, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 165710 + }, + { + "epoch": 1090.2631578947369, + "grad_norm": 1.1747958660125732, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 165720 + }, + { + "epoch": 1090.328947368421, + "grad_norm": 1.1064643859863281, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 165730 + }, + { + "epoch": 1090.3947368421052, + "grad_norm": 0.9353105425834656, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 165740 + }, + { + "epoch": 1090.4605263157894, + "grad_norm": 1.1124236583709717, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 165750 + }, + { + "epoch": 1090.5263157894738, + "grad_norm": 1.1237322092056274, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 165760 + }, + { + "epoch": 1090.592105263158, + "grad_norm": 0.8977600932121277, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 165770 + }, + { + "epoch": 1090.657894736842, + "grad_norm": 1.2883377075195312, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 165780 + }, + { + "epoch": 1090.7236842105262, + "grad_norm": 1.2734546661376953, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 165790 + }, + { + "epoch": 1090.7894736842106, + "grad_norm": 1.1414625644683838, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 165800 + }, + { + "epoch": 1090.8552631578948, + "grad_norm": 1.0614356994628906, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 165810 + }, + { + "epoch": 1090.921052631579, + "grad_norm": 1.0727033615112305, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 165820 + }, + { + "epoch": 1090.9868421052631, + "grad_norm": 1.1432024240493774, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 165830 + }, + { + "epoch": 1091.0526315789473, + "grad_norm": 1.158418893814087, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 165840 + }, + { + "epoch": 1091.1184210526317, + "grad_norm": 1.033921718597412, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 165850 + }, + { + "epoch": 1091.1842105263158, + "grad_norm": 1.0645828247070312, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 165860 + }, + { + "epoch": 1091.25, + "grad_norm": 0.5794889330863953, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 165870 + }, + { + "epoch": 1091.3157894736842, + "grad_norm": 1.1075804233551025, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 165880 + }, + { + "epoch": 1091.3815789473683, + "grad_norm": 0.9454206228256226, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 165890 + }, + { + "epoch": 1091.4473684210527, + "grad_norm": 1.1715730428695679, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 165900 + }, + { + "epoch": 1091.5131578947369, + "grad_norm": 1.0313235521316528, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 165910 + }, + { + "epoch": 1091.578947368421, + "grad_norm": 1.3936482667922974, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 165920 + }, + { + "epoch": 1091.6447368421052, + "grad_norm": 0.8240237832069397, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 165930 + }, + { + "epoch": 1091.7105263157894, + "grad_norm": 1.0866410732269287, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 165940 + }, + { + "epoch": 1091.7763157894738, + "grad_norm": 1.3597463369369507, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 165950 + }, + { + "epoch": 1091.842105263158, + "grad_norm": 1.3477791547775269, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 165960 + }, + { + "epoch": 1091.907894736842, + "grad_norm": 1.2000268697738647, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 165970 + }, + { + "epoch": 1091.9736842105262, + "grad_norm": 0.7252658009529114, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 165980 + }, + { + "epoch": 1092.0394736842106, + "grad_norm": 1.189745545387268, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 165990 + }, + { + "epoch": 1092.1052631578948, + "grad_norm": 0.7382572889328003, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 166000 + }, + { + "epoch": 1092.171052631579, + "grad_norm": 1.0772287845611572, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 166010 + }, + { + "epoch": 1092.2368421052631, + "grad_norm": 0.7473660111427307, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 166020 + }, + { + "epoch": 1092.3026315789473, + "grad_norm": 0.8912314176559448, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 166030 + }, + { + "epoch": 1092.3684210526317, + "grad_norm": 1.1493737697601318, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 166040 + }, + { + "epoch": 1092.4342105263158, + "grad_norm": 1.3039054870605469, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 166050 + }, + { + "epoch": 1092.5, + "grad_norm": 0.9073991775512695, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 166060 + }, + { + "epoch": 1092.5657894736842, + "grad_norm": 0.8854196667671204, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 166070 + }, + { + "epoch": 1092.6315789473683, + "grad_norm": 1.2598876953125, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 166080 + }, + { + "epoch": 1092.6973684210527, + "grad_norm": 1.2058098316192627, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 166090 + }, + { + "epoch": 1092.7631578947369, + "grad_norm": 1.188262701034546, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 166100 + }, + { + "epoch": 1092.828947368421, + "grad_norm": 1.5130947828292847, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 166110 + }, + { + "epoch": 1092.8947368421052, + "grad_norm": 1.2979497909545898, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 166120 + }, + { + "epoch": 1092.9605263157894, + "grad_norm": 1.0811450481414795, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 166130 + }, + { + "epoch": 1093.0263157894738, + "grad_norm": 1.0868312120437622, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 166140 + }, + { + "epoch": 1093.092105263158, + "grad_norm": 0.8259110450744629, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 166150 + }, + { + "epoch": 1093.157894736842, + "grad_norm": 1.2230782508850098, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 166160 + }, + { + "epoch": 1093.2236842105262, + "grad_norm": 1.296341896057129, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 166170 + }, + { + "epoch": 1093.2894736842106, + "grad_norm": 1.0726011991500854, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 166180 + }, + { + "epoch": 1093.3552631578948, + "grad_norm": 0.7867605686187744, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 166190 + }, + { + "epoch": 1093.421052631579, + "grad_norm": 0.8794106245040894, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 166200 + }, + { + "epoch": 1093.4868421052631, + "grad_norm": 1.1672301292419434, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 166210 + }, + { + "epoch": 1093.5526315789473, + "grad_norm": 0.9970499277114868, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 166220 + }, + { + "epoch": 1093.6184210526317, + "grad_norm": 1.1926120519638062, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 166230 + }, + { + "epoch": 1093.6842105263158, + "grad_norm": 0.935606837272644, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 166240 + }, + { + "epoch": 1093.75, + "grad_norm": 1.3509941101074219, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 166250 + }, + { + "epoch": 1093.8157894736842, + "grad_norm": 1.2598907947540283, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 166260 + }, + { + "epoch": 1093.8815789473683, + "grad_norm": 1.3617310523986816, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 166270 + }, + { + "epoch": 1093.9473684210527, + "grad_norm": 1.6481096744537354, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 166280 + }, + { + "epoch": 1094.0131578947369, + "grad_norm": 1.0241049528121948, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 166290 + }, + { + "epoch": 1094.078947368421, + "grad_norm": 1.2445513010025024, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 166300 + }, + { + "epoch": 1094.1447368421052, + "grad_norm": 1.102005124092102, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 166310 + }, + { + "epoch": 1094.2105263157894, + "grad_norm": 1.0038352012634277, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 166320 + }, + { + "epoch": 1094.2763157894738, + "grad_norm": 1.2613288164138794, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 166330 + }, + { + "epoch": 1094.342105263158, + "grad_norm": 0.9229909181594849, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 166340 + }, + { + "epoch": 1094.407894736842, + "grad_norm": 0.9548853635787964, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 166350 + }, + { + "epoch": 1094.4736842105262, + "grad_norm": 0.8751519322395325, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 166360 + }, + { + "epoch": 1094.5394736842106, + "grad_norm": 0.5805840492248535, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 166370 + }, + { + "epoch": 1094.6052631578948, + "grad_norm": 1.1700973510742188, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 166380 + }, + { + "epoch": 1094.671052631579, + "grad_norm": 0.8679378032684326, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 166390 + }, + { + "epoch": 1094.7368421052631, + "grad_norm": 0.843585193157196, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 166400 + }, + { + "epoch": 1094.8026315789473, + "grad_norm": 1.0492405891418457, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 166410 + }, + { + "epoch": 1094.8684210526317, + "grad_norm": 1.0488659143447876, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 166420 + }, + { + "epoch": 1094.9342105263158, + "grad_norm": 1.3835662603378296, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 166430 + }, + { + "epoch": 1095.0, + "grad_norm": 0.7628530859947205, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 166440 + }, + { + "epoch": 1095.0657894736842, + "grad_norm": 1.0557432174682617, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 166450 + }, + { + "epoch": 1095.1315789473683, + "grad_norm": 1.1266205310821533, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 166460 + }, + { + "epoch": 1095.1973684210527, + "grad_norm": 1.0164369344711304, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 166470 + }, + { + "epoch": 1095.2631578947369, + "grad_norm": 1.1279717683792114, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 166480 + }, + { + "epoch": 1095.328947368421, + "grad_norm": 1.44341242313385, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 166490 + }, + { + "epoch": 1095.3947368421052, + "grad_norm": 1.4984021186828613, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 166500 + }, + { + "epoch": 1095.4605263157894, + "grad_norm": 0.9742573499679565, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 166510 + }, + { + "epoch": 1095.5263157894738, + "grad_norm": 1.1598995923995972, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 166520 + }, + { + "epoch": 1095.592105263158, + "grad_norm": 1.0693007707595825, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 166530 + }, + { + "epoch": 1095.657894736842, + "grad_norm": 1.192436695098877, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 166540 + }, + { + "epoch": 1095.7236842105262, + "grad_norm": 1.156217098236084, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 166550 + }, + { + "epoch": 1095.7894736842106, + "grad_norm": 1.08530592918396, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 166560 + }, + { + "epoch": 1095.8552631578948, + "grad_norm": 1.1809860467910767, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 166570 + }, + { + "epoch": 1095.921052631579, + "grad_norm": 0.9600285887718201, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 166580 + }, + { + "epoch": 1095.9868421052631, + "grad_norm": 1.0501923561096191, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 166590 + }, + { + "epoch": 1096.0526315789473, + "grad_norm": 1.4869877099990845, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 166600 + }, + { + "epoch": 1096.1184210526317, + "grad_norm": 1.0109316110610962, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 166610 + }, + { + "epoch": 1096.1842105263158, + "grad_norm": 1.2201900482177734, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 166620 + }, + { + "epoch": 1096.25, + "grad_norm": 1.0076817274093628, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 166630 + }, + { + "epoch": 1096.3157894736842, + "grad_norm": 0.9542809128761292, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 166640 + }, + { + "epoch": 1096.3815789473683, + "grad_norm": 0.8175939321517944, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 166650 + }, + { + "epoch": 1096.4473684210527, + "grad_norm": 1.0511505603790283, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 166660 + }, + { + "epoch": 1096.5131578947369, + "grad_norm": 0.8790225982666016, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 166670 + }, + { + "epoch": 1096.578947368421, + "grad_norm": 0.8520594835281372, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 166680 + }, + { + "epoch": 1096.6447368421052, + "grad_norm": 0.8566333055496216, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 166690 + }, + { + "epoch": 1096.7105263157894, + "grad_norm": 1.1941945552825928, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 166700 + }, + { + "epoch": 1096.7763157894738, + "grad_norm": 0.8733620047569275, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 166710 + }, + { + "epoch": 1096.842105263158, + "grad_norm": 1.1847484111785889, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 166720 + }, + { + "epoch": 1096.907894736842, + "grad_norm": 0.9977523684501648, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 166730 + }, + { + "epoch": 1096.9736842105262, + "grad_norm": 1.088883638381958, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 166740 + }, + { + "epoch": 1097.0394736842106, + "grad_norm": 1.1181623935699463, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 166750 + }, + { + "epoch": 1097.1052631578948, + "grad_norm": 3.2963168621063232, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 166760 + }, + { + "epoch": 1097.171052631579, + "grad_norm": 1.2029335498809814, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 166770 + }, + { + "epoch": 1097.2368421052631, + "grad_norm": 1.2045027017593384, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 166780 + }, + { + "epoch": 1097.3026315789473, + "grad_norm": 1.1241041421890259, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 166790 + }, + { + "epoch": 1097.3684210526317, + "grad_norm": 1.700153112411499, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 166800 + }, + { + "epoch": 1097.4342105263158, + "grad_norm": 1.5076239109039307, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 166810 + }, + { + "epoch": 1097.5, + "grad_norm": 1.4871948957443237, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 166820 + }, + { + "epoch": 1097.5657894736842, + "grad_norm": 1.3231090307235718, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 166830 + }, + { + "epoch": 1097.6315789473683, + "grad_norm": 1.1733216047286987, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 166840 + }, + { + "epoch": 1097.6973684210527, + "grad_norm": 1.3139346837997437, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 166850 + }, + { + "epoch": 1097.7631578947369, + "grad_norm": 1.0993858575820923, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 166860 + }, + { + "epoch": 1097.828947368421, + "grad_norm": 1.0795350074768066, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 166870 + }, + { + "epoch": 1097.8947368421052, + "grad_norm": 1.0457212924957275, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 166880 + }, + { + "epoch": 1097.9605263157894, + "grad_norm": 0.9968882203102112, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 166890 + }, + { + "epoch": 1098.0263157894738, + "grad_norm": 1.2826480865478516, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 166900 + }, + { + "epoch": 1098.092105263158, + "grad_norm": 1.4935877323150635, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 166910 + }, + { + "epoch": 1098.157894736842, + "grad_norm": 1.1641865968704224, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 166920 + }, + { + "epoch": 1098.2236842105262, + "grad_norm": 1.0109764337539673, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 166930 + }, + { + "epoch": 1098.2894736842106, + "grad_norm": 1.3171255588531494, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 166940 + }, + { + "epoch": 1098.3552631578948, + "grad_norm": 1.3384495973587036, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 166950 + }, + { + "epoch": 1098.421052631579, + "grad_norm": 1.485098123550415, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 166960 + }, + { + "epoch": 1098.4868421052631, + "grad_norm": 1.4696592092514038, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 166970 + }, + { + "epoch": 1098.5526315789473, + "grad_norm": 0.9190577864646912, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 166980 + }, + { + "epoch": 1098.6184210526317, + "grad_norm": 1.0120787620544434, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 166990 + }, + { + "epoch": 1098.6842105263158, + "grad_norm": 1.3007423877716064, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 167000 + }, + { + "epoch": 1098.75, + "grad_norm": 1.21133291721344, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 167010 + }, + { + "epoch": 1098.8157894736842, + "grad_norm": 0.8098885416984558, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 167020 + }, + { + "epoch": 1098.8815789473683, + "grad_norm": 1.0924862623214722, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 167030 + }, + { + "epoch": 1098.9473684210527, + "grad_norm": 1.058971643447876, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 167040 + }, + { + "epoch": 1099.0131578947369, + "grad_norm": 1.1815778017044067, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 167050 + }, + { + "epoch": 1099.078947368421, + "grad_norm": 1.1275389194488525, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 167060 + }, + { + "epoch": 1099.1447368421052, + "grad_norm": 1.2118959426879883, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 167070 + }, + { + "epoch": 1099.2105263157894, + "grad_norm": 1.3578826189041138, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 167080 + }, + { + "epoch": 1099.2763157894738, + "grad_norm": 1.1233108043670654, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 167090 + }, + { + "epoch": 1099.342105263158, + "grad_norm": 0.8814030885696411, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 167100 + }, + { + "epoch": 1099.407894736842, + "grad_norm": 1.0305200815200806, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 167110 + }, + { + "epoch": 1099.4736842105262, + "grad_norm": 1.1122325658798218, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 167120 + }, + { + "epoch": 1099.5394736842106, + "grad_norm": 0.9248008728027344, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 167130 + }, + { + "epoch": 1099.6052631578948, + "grad_norm": 0.889100968837738, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 167140 + }, + { + "epoch": 1099.671052631579, + "grad_norm": 1.2280583381652832, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 167150 + }, + { + "epoch": 1099.7368421052631, + "grad_norm": 1.2300212383270264, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 167160 + }, + { + "epoch": 1099.8026315789473, + "grad_norm": 1.211428165435791, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 167170 + }, + { + "epoch": 1099.8684210526317, + "grad_norm": 1.1295406818389893, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 167180 + }, + { + "epoch": 1099.9342105263158, + "grad_norm": 1.1120741367340088, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 167190 + }, + { + "epoch": 1100.0, + "grad_norm": 0.8799479603767395, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 167200 + }, + { + "epoch": 1100.0657894736842, + "grad_norm": 1.0118703842163086, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 167210 + }, + { + "epoch": 1100.1315789473683, + "grad_norm": 1.097957968711853, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 167220 + }, + { + "epoch": 1100.1973684210527, + "grad_norm": 0.8075319528579712, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 167230 + }, + { + "epoch": 1100.2631578947369, + "grad_norm": 1.2604179382324219, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 167240 + }, + { + "epoch": 1100.328947368421, + "grad_norm": 1.090760350227356, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 167250 + }, + { + "epoch": 1100.3947368421052, + "grad_norm": 0.9950432181358337, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 167260 + }, + { + "epoch": 1100.4605263157894, + "grad_norm": 0.7708602547645569, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 167270 + }, + { + "epoch": 1100.5263157894738, + "grad_norm": 0.8636929392814636, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 167280 + }, + { + "epoch": 1100.592105263158, + "grad_norm": 0.7915971279144287, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 167290 + }, + { + "epoch": 1100.657894736842, + "grad_norm": 0.8626055121421814, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 167300 + }, + { + "epoch": 1100.7236842105262, + "grad_norm": 0.6943460702896118, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 167310 + }, + { + "epoch": 1100.7894736842106, + "grad_norm": 1.0812398195266724, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 167320 + }, + { + "epoch": 1100.8552631578948, + "grad_norm": 0.906474232673645, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 167330 + }, + { + "epoch": 1100.921052631579, + "grad_norm": 0.9703337550163269, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 167340 + }, + { + "epoch": 1100.9868421052631, + "grad_norm": 0.8789888024330139, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 167350 + }, + { + "epoch": 1101.0526315789473, + "grad_norm": 0.9876759648323059, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 167360 + }, + { + "epoch": 1101.1184210526317, + "grad_norm": 1.0288928747177124, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 167370 + }, + { + "epoch": 1101.1842105263158, + "grad_norm": 0.9884411692619324, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 167380 + }, + { + "epoch": 1101.25, + "grad_norm": 1.231924057006836, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 167390 + }, + { + "epoch": 1101.3157894736842, + "grad_norm": 1.2032873630523682, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 167400 + }, + { + "epoch": 1101.3815789473683, + "grad_norm": 1.1129034757614136, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 167410 + }, + { + "epoch": 1101.4473684210527, + "grad_norm": 1.0674737691879272, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 167420 + }, + { + "epoch": 1101.5131578947369, + "grad_norm": 0.9787297248840332, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 167430 + }, + { + "epoch": 1101.578947368421, + "grad_norm": 0.8333871364593506, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 167440 + }, + { + "epoch": 1101.6447368421052, + "grad_norm": 1.1701922416687012, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 167450 + }, + { + "epoch": 1101.7105263157894, + "grad_norm": 1.1655021905899048, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 167460 + }, + { + "epoch": 1101.7763157894738, + "grad_norm": 1.2170398235321045, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 167470 + }, + { + "epoch": 1101.842105263158, + "grad_norm": 1.1292792558670044, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 167480 + }, + { + "epoch": 1101.907894736842, + "grad_norm": 1.0343544483184814, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 167490 + }, + { + "epoch": 1101.9736842105262, + "grad_norm": 1.292896032333374, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 167500 + }, + { + "epoch": 1102.0394736842106, + "grad_norm": 1.3598896265029907, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 167510 + }, + { + "epoch": 1102.1052631578948, + "grad_norm": 1.1134599447250366, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 167520 + }, + { + "epoch": 1102.171052631579, + "grad_norm": 1.4782142639160156, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 167530 + }, + { + "epoch": 1102.2368421052631, + "grad_norm": 1.0224602222442627, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 167540 + }, + { + "epoch": 1102.3026315789473, + "grad_norm": 1.1568275690078735, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 167550 + }, + { + "epoch": 1102.3684210526317, + "grad_norm": 0.9794890284538269, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 167560 + }, + { + "epoch": 1102.4342105263158, + "grad_norm": 0.9508534073829651, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 167570 + }, + { + "epoch": 1102.5, + "grad_norm": 1.202999234199524, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 167580 + }, + { + "epoch": 1102.5657894736842, + "grad_norm": 1.2646609544754028, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 167590 + }, + { + "epoch": 1102.6315789473683, + "grad_norm": 1.1292835474014282, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 167600 + }, + { + "epoch": 1102.6973684210527, + "grad_norm": 0.8157707452774048, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 167610 + }, + { + "epoch": 1102.7631578947369, + "grad_norm": 0.9448849558830261, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 167620 + }, + { + "epoch": 1102.828947368421, + "grad_norm": 0.878303587436676, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 167630 + }, + { + "epoch": 1102.8947368421052, + "grad_norm": 1.097684383392334, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 167640 + }, + { + "epoch": 1102.9605263157894, + "grad_norm": 1.0110867023468018, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 167650 + }, + { + "epoch": 1103.0263157894738, + "grad_norm": 1.1155486106872559, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 167660 + }, + { + "epoch": 1103.092105263158, + "grad_norm": 1.0845309495925903, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 167670 + }, + { + "epoch": 1103.157894736842, + "grad_norm": 0.8324429392814636, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 167680 + }, + { + "epoch": 1103.2236842105262, + "grad_norm": 0.8664148449897766, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 167690 + }, + { + "epoch": 1103.2894736842106, + "grad_norm": 0.7451772689819336, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 167700 + }, + { + "epoch": 1103.3552631578948, + "grad_norm": 0.9398016929626465, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 167710 + }, + { + "epoch": 1103.421052631579, + "grad_norm": 1.0360288619995117, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 167720 + }, + { + "epoch": 1103.4868421052631, + "grad_norm": 1.274431824684143, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 167730 + }, + { + "epoch": 1103.5526315789473, + "grad_norm": 0.5587978363037109, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 167740 + }, + { + "epoch": 1103.6184210526317, + "grad_norm": 0.7810954451560974, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 167750 + }, + { + "epoch": 1103.6842105263158, + "grad_norm": 0.8118604421615601, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 167760 + }, + { + "epoch": 1103.75, + "grad_norm": 0.6753564476966858, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 167770 + }, + { + "epoch": 1103.8157894736842, + "grad_norm": 0.609535276889801, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 167780 + }, + { + "epoch": 1103.8815789473683, + "grad_norm": 0.8553572297096252, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 167790 + }, + { + "epoch": 1103.9473684210527, + "grad_norm": 1.0933605432510376, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 167800 + }, + { + "epoch": 1104.0131578947369, + "grad_norm": 1.4074715375900269, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 167810 + }, + { + "epoch": 1104.078947368421, + "grad_norm": 0.9313386678695679, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 167820 + }, + { + "epoch": 1104.1447368421052, + "grad_norm": 1.2255233526229858, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 167830 + }, + { + "epoch": 1104.2105263157894, + "grad_norm": 0.958168089389801, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 167840 + }, + { + "epoch": 1104.2763157894738, + "grad_norm": 0.7276482582092285, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 167850 + }, + { + "epoch": 1104.342105263158, + "grad_norm": 1.2092912197113037, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 167860 + }, + { + "epoch": 1104.407894736842, + "grad_norm": 0.9933399558067322, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 167870 + }, + { + "epoch": 1104.4736842105262, + "grad_norm": 0.8450823426246643, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 167880 + }, + { + "epoch": 1104.5394736842106, + "grad_norm": 1.043594479560852, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 167890 + }, + { + "epoch": 1104.6052631578948, + "grad_norm": 1.107570767402649, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 167900 + }, + { + "epoch": 1104.671052631579, + "grad_norm": 1.1680465936660767, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 167910 + }, + { + "epoch": 1104.7368421052631, + "grad_norm": 1.4395219087600708, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 167920 + }, + { + "epoch": 1104.8026315789473, + "grad_norm": 1.0182297229766846, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 167930 + }, + { + "epoch": 1104.8684210526317, + "grad_norm": 0.7282902598381042, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 167940 + }, + { + "epoch": 1104.9342105263158, + "grad_norm": 1.1767057180404663, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 167950 + }, + { + "epoch": 1105.0, + "grad_norm": 0.9892759323120117, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 167960 + }, + { + "epoch": 1105.0657894736842, + "grad_norm": 0.8956315517425537, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 167970 + }, + { + "epoch": 1105.1315789473683, + "grad_norm": 0.707263708114624, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 167980 + }, + { + "epoch": 1105.1973684210527, + "grad_norm": 0.895297110080719, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 167990 + }, + { + "epoch": 1105.2631578947369, + "grad_norm": 0.7965859174728394, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 168000 + }, + { + "epoch": 1105.328947368421, + "grad_norm": 1.3010401725769043, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 168010 + }, + { + "epoch": 1105.3947368421052, + "grad_norm": 1.0735414028167725, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 168020 + }, + { + "epoch": 1105.4605263157894, + "grad_norm": 1.0381656885147095, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 168030 + }, + { + "epoch": 1105.5263157894738, + "grad_norm": 0.8729901909828186, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 168040 + }, + { + "epoch": 1105.592105263158, + "grad_norm": 0.9868913888931274, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 168050 + }, + { + "epoch": 1105.657894736842, + "grad_norm": 1.1911859512329102, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 168060 + }, + { + "epoch": 1105.7236842105262, + "grad_norm": 1.3197442293167114, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 168070 + }, + { + "epoch": 1105.7894736842106, + "grad_norm": 1.240401029586792, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 168080 + }, + { + "epoch": 1105.8552631578948, + "grad_norm": 0.9005335569381714, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 168090 + }, + { + "epoch": 1105.921052631579, + "grad_norm": 0.9704708456993103, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 168100 + }, + { + "epoch": 1105.9868421052631, + "grad_norm": 0.759002149105072, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 168110 + }, + { + "epoch": 1106.0526315789473, + "grad_norm": 0.8771544694900513, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 168120 + }, + { + "epoch": 1106.1184210526317, + "grad_norm": 1.3510820865631104, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 168130 + }, + { + "epoch": 1106.1842105263158, + "grad_norm": 1.1021738052368164, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 168140 + }, + { + "epoch": 1106.25, + "grad_norm": 1.1089617013931274, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 168150 + }, + { + "epoch": 1106.3157894736842, + "grad_norm": 1.5470319986343384, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 168160 + }, + { + "epoch": 1106.3815789473683, + "grad_norm": 0.8695709109306335, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 168170 + }, + { + "epoch": 1106.4473684210527, + "grad_norm": 1.176269769668579, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 168180 + }, + { + "epoch": 1106.5131578947369, + "grad_norm": 1.0304797887802124, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 168190 + }, + { + "epoch": 1106.578947368421, + "grad_norm": 1.2818888425827026, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 168200 + }, + { + "epoch": 1106.6447368421052, + "grad_norm": 1.1938977241516113, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 168210 + }, + { + "epoch": 1106.7105263157894, + "grad_norm": 0.8075238466262817, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 168220 + }, + { + "epoch": 1106.7763157894738, + "grad_norm": 1.112987756729126, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 168230 + }, + { + "epoch": 1106.842105263158, + "grad_norm": 1.1107630729675293, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 168240 + }, + { + "epoch": 1106.907894736842, + "grad_norm": 0.9863311052322388, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 168250 + }, + { + "epoch": 1106.9736842105262, + "grad_norm": 1.1156419515609741, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 168260 + }, + { + "epoch": 1107.0394736842106, + "grad_norm": 1.1970049142837524, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 168270 + }, + { + "epoch": 1107.1052631578948, + "grad_norm": 1.1724064350128174, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 168280 + }, + { + "epoch": 1107.171052631579, + "grad_norm": 1.0076320171356201, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 168290 + }, + { + "epoch": 1107.2368421052631, + "grad_norm": 1.0176210403442383, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 168300 + }, + { + "epoch": 1107.3026315789473, + "grad_norm": 1.0968610048294067, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 168310 + }, + { + "epoch": 1107.3684210526317, + "grad_norm": 1.47584867477417, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 168320 + }, + { + "epoch": 1107.4342105263158, + "grad_norm": 1.49242103099823, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 168330 + }, + { + "epoch": 1107.5, + "grad_norm": 1.3437186479568481, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 168340 + }, + { + "epoch": 1107.5657894736842, + "grad_norm": 1.1982983350753784, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 168350 + }, + { + "epoch": 1107.6315789473683, + "grad_norm": 1.1239218711853027, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 168360 + }, + { + "epoch": 1107.6973684210527, + "grad_norm": 0.9983989000320435, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 168370 + }, + { + "epoch": 1107.7631578947369, + "grad_norm": 1.2701680660247803, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 168380 + }, + { + "epoch": 1107.828947368421, + "grad_norm": 1.07079017162323, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 168390 + }, + { + "epoch": 1107.8947368421052, + "grad_norm": 1.092276930809021, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 168400 + }, + { + "epoch": 1107.9605263157894, + "grad_norm": 0.7285662293434143, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 168410 + }, + { + "epoch": 1108.0263157894738, + "grad_norm": 1.0730990171432495, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 168420 + }, + { + "epoch": 1108.092105263158, + "grad_norm": 1.0767719745635986, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 168430 + }, + { + "epoch": 1108.157894736842, + "grad_norm": 1.1680963039398193, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 168440 + }, + { + "epoch": 1108.2236842105262, + "grad_norm": 1.048923373222351, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 168450 + }, + { + "epoch": 1108.2894736842106, + "grad_norm": 0.9691452980041504, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 168460 + }, + { + "epoch": 1108.3552631578948, + "grad_norm": 0.694308340549469, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 168470 + }, + { + "epoch": 1108.421052631579, + "grad_norm": 0.8766660094261169, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 168480 + }, + { + "epoch": 1108.4868421052631, + "grad_norm": 0.8163343071937561, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 168490 + }, + { + "epoch": 1108.5526315789473, + "grad_norm": 0.7243715524673462, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 168500 + }, + { + "epoch": 1108.6184210526317, + "grad_norm": 1.2673771381378174, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 168510 + }, + { + "epoch": 1108.6842105263158, + "grad_norm": 0.9088281989097595, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 168520 + }, + { + "epoch": 1108.75, + "grad_norm": 1.3097550868988037, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 168530 + }, + { + "epoch": 1108.8157894736842, + "grad_norm": 1.2778091430664062, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 168540 + }, + { + "epoch": 1108.8815789473683, + "grad_norm": 1.2676957845687866, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 168550 + }, + { + "epoch": 1108.9473684210527, + "grad_norm": 1.185399055480957, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 168560 + }, + { + "epoch": 1109.0131578947369, + "grad_norm": 0.9912101626396179, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 168570 + }, + { + "epoch": 1109.078947368421, + "grad_norm": 1.0790884494781494, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 168580 + }, + { + "epoch": 1109.1447368421052, + "grad_norm": 0.9652689695358276, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 168590 + }, + { + "epoch": 1109.2105263157894, + "grad_norm": 1.2606117725372314, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 168600 + }, + { + "epoch": 1109.2763157894738, + "grad_norm": 1.2370003461837769, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 168610 + }, + { + "epoch": 1109.342105263158, + "grad_norm": 0.9363791346549988, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 168620 + }, + { + "epoch": 1109.407894736842, + "grad_norm": 1.1172534227371216, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 168630 + }, + { + "epoch": 1109.4736842105262, + "grad_norm": 1.1383373737335205, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 168640 + }, + { + "epoch": 1109.5394736842106, + "grad_norm": 1.16228449344635, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 168650 + }, + { + "epoch": 1109.6052631578948, + "grad_norm": 1.1701244115829468, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 168660 + }, + { + "epoch": 1109.671052631579, + "grad_norm": 1.5518008470535278, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 168670 + }, + { + "epoch": 1109.7368421052631, + "grad_norm": 1.4797471761703491, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 168680 + }, + { + "epoch": 1109.8026315789473, + "grad_norm": 1.0961973667144775, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 168690 + }, + { + "epoch": 1109.8684210526317, + "grad_norm": 1.1600146293640137, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 168700 + }, + { + "epoch": 1109.9342105263158, + "grad_norm": 1.3564647436141968, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 168710 + }, + { + "epoch": 1110.0, + "grad_norm": 1.7049609422683716, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 168720 + }, + { + "epoch": 1110.0657894736842, + "grad_norm": 1.5665743350982666, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 168730 + }, + { + "epoch": 1110.1315789473683, + "grad_norm": 1.5646759271621704, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 168740 + }, + { + "epoch": 1110.1973684210527, + "grad_norm": 1.1867520809173584, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 168750 + }, + { + "epoch": 1110.2631578947369, + "grad_norm": 1.6456133127212524, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 168760 + }, + { + "epoch": 1110.328947368421, + "grad_norm": 1.2314730882644653, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 168770 + }, + { + "epoch": 1110.3947368421052, + "grad_norm": 1.0836608409881592, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 168780 + }, + { + "epoch": 1110.4605263157894, + "grad_norm": 0.9682186245918274, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 168790 + }, + { + "epoch": 1110.5263157894738, + "grad_norm": 0.8563161492347717, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 168800 + }, + { + "epoch": 1110.592105263158, + "grad_norm": 0.9814671277999878, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 168810 + }, + { + "epoch": 1110.657894736842, + "grad_norm": 0.7647870182991028, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 168820 + }, + { + "epoch": 1110.7236842105262, + "grad_norm": 1.3109147548675537, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 168830 + }, + { + "epoch": 1110.7894736842106, + "grad_norm": 1.0601048469543457, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 168840 + }, + { + "epoch": 1110.8552631578948, + "grad_norm": 1.160309910774231, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 168850 + }, + { + "epoch": 1110.921052631579, + "grad_norm": 1.0141721963882446, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 168860 + }, + { + "epoch": 1110.9868421052631, + "grad_norm": 0.9558742046356201, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 168870 + }, + { + "epoch": 1111.0526315789473, + "grad_norm": 1.0279165506362915, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 168880 + }, + { + "epoch": 1111.1184210526317, + "grad_norm": 1.179625153541565, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 168890 + }, + { + "epoch": 1111.1842105263158, + "grad_norm": 1.070167064666748, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 168900 + }, + { + "epoch": 1111.25, + "grad_norm": 1.2725509405136108, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 168910 + }, + { + "epoch": 1111.3157894736842, + "grad_norm": 0.9104771018028259, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 168920 + }, + { + "epoch": 1111.3815789473683, + "grad_norm": 0.937410831451416, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 168930 + }, + { + "epoch": 1111.4473684210527, + "grad_norm": 1.037760615348816, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 168940 + }, + { + "epoch": 1111.5131578947369, + "grad_norm": 1.1063820123672485, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 168950 + }, + { + "epoch": 1111.578947368421, + "grad_norm": 1.0770851373672485, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 168960 + }, + { + "epoch": 1111.6447368421052, + "grad_norm": 1.1745645999908447, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 168970 + }, + { + "epoch": 1111.7105263157894, + "grad_norm": 0.936163604259491, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 168980 + }, + { + "epoch": 1111.7763157894738, + "grad_norm": 1.031320333480835, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 168990 + }, + { + "epoch": 1111.842105263158, + "grad_norm": 0.6643326878547668, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 169000 + }, + { + "epoch": 1111.907894736842, + "grad_norm": 0.8796061873435974, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 169010 + }, + { + "epoch": 1111.9736842105262, + "grad_norm": 0.9710099697113037, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 169020 + }, + { + "epoch": 1112.0394736842106, + "grad_norm": 1.134057879447937, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 169030 + }, + { + "epoch": 1112.1052631578948, + "grad_norm": 0.7296460270881653, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 169040 + }, + { + "epoch": 1112.171052631579, + "grad_norm": 0.7189831733703613, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 169050 + }, + { + "epoch": 1112.2368421052631, + "grad_norm": 0.8894656896591187, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 169060 + }, + { + "epoch": 1112.3026315789473, + "grad_norm": 1.1131048202514648, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 169070 + }, + { + "epoch": 1112.3684210526317, + "grad_norm": 1.5262320041656494, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 169080 + }, + { + "epoch": 1112.4342105263158, + "grad_norm": 1.2792640924453735, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 169090 + }, + { + "epoch": 1112.5, + "grad_norm": 1.091755747795105, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 169100 + }, + { + "epoch": 1112.5657894736842, + "grad_norm": 1.1923725605010986, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 169110 + }, + { + "epoch": 1112.6315789473683, + "grad_norm": 1.2157421112060547, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 169120 + }, + { + "epoch": 1112.6973684210527, + "grad_norm": 1.0996853113174438, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 169130 + }, + { + "epoch": 1112.7631578947369, + "grad_norm": 1.3527673482894897, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 169140 + }, + { + "epoch": 1112.828947368421, + "grad_norm": 1.1760993003845215, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 169150 + }, + { + "epoch": 1112.8947368421052, + "grad_norm": 1.1394774913787842, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 169160 + }, + { + "epoch": 1112.9605263157894, + "grad_norm": 1.1786078214645386, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 169170 + }, + { + "epoch": 1113.0263157894738, + "grad_norm": 2.485809326171875, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 169180 + }, + { + "epoch": 1113.092105263158, + "grad_norm": 1.372261881828308, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 169190 + }, + { + "epoch": 1113.157894736842, + "grad_norm": 1.125187873840332, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 169200 + }, + { + "epoch": 1113.2236842105262, + "grad_norm": 1.4014942646026611, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 169210 + }, + { + "epoch": 1113.2894736842106, + "grad_norm": 1.0641945600509644, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 169220 + }, + { + "epoch": 1113.3552631578948, + "grad_norm": 1.023398756980896, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 169230 + }, + { + "epoch": 1113.421052631579, + "grad_norm": 0.8850655555725098, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 169240 + }, + { + "epoch": 1113.4868421052631, + "grad_norm": 1.0332579612731934, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 169250 + }, + { + "epoch": 1113.5526315789473, + "grad_norm": 1.3429882526397705, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 169260 + }, + { + "epoch": 1113.6184210526317, + "grad_norm": 1.1904481649398804, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 169270 + }, + { + "epoch": 1113.6842105263158, + "grad_norm": 0.8941307067871094, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 169280 + }, + { + "epoch": 1113.75, + "grad_norm": 1.1564146280288696, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 169290 + }, + { + "epoch": 1113.8157894736842, + "grad_norm": 0.770451545715332, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 169300 + }, + { + "epoch": 1113.8815789473683, + "grad_norm": 0.9108574986457825, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 169310 + }, + { + "epoch": 1113.9473684210527, + "grad_norm": 0.8220398426055908, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 169320 + }, + { + "epoch": 1114.0131578947369, + "grad_norm": 1.039478063583374, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 169330 + }, + { + "epoch": 1114.078947368421, + "grad_norm": 1.078933835029602, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 169340 + }, + { + "epoch": 1114.1447368421052, + "grad_norm": 1.0920974016189575, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 169350 + }, + { + "epoch": 1114.2105263157894, + "grad_norm": 0.9281700253486633, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 169360 + }, + { + "epoch": 1114.2763157894738, + "grad_norm": 1.1259106397628784, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 169370 + }, + { + "epoch": 1114.342105263158, + "grad_norm": 0.9117035269737244, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 169380 + }, + { + "epoch": 1114.407894736842, + "grad_norm": 1.178030252456665, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 169390 + }, + { + "epoch": 1114.4736842105262, + "grad_norm": 1.0540664196014404, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 169400 + }, + { + "epoch": 1114.5394736842106, + "grad_norm": 1.3689993619918823, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 169410 + }, + { + "epoch": 1114.6052631578948, + "grad_norm": 1.2431306838989258, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 169420 + }, + { + "epoch": 1114.671052631579, + "grad_norm": 1.0379416942596436, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 169430 + }, + { + "epoch": 1114.7368421052631, + "grad_norm": 1.2257938385009766, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 169440 + }, + { + "epoch": 1114.8026315789473, + "grad_norm": 0.9797086119651794, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 169450 + }, + { + "epoch": 1114.8684210526317, + "grad_norm": 1.3930553197860718, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 169460 + }, + { + "epoch": 1114.9342105263158, + "grad_norm": 1.1438504457473755, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 169470 + }, + { + "epoch": 1115.0, + "grad_norm": 1.0164341926574707, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 169480 + }, + { + "epoch": 1115.0657894736842, + "grad_norm": 0.706180989742279, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 169490 + }, + { + "epoch": 1115.1315789473683, + "grad_norm": 1.0323935747146606, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 169500 + }, + { + "epoch": 1115.1973684210527, + "grad_norm": 0.8739573359489441, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 169510 + }, + { + "epoch": 1115.2631578947369, + "grad_norm": 1.4711079597473145, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 169520 + }, + { + "epoch": 1115.328947368421, + "grad_norm": 1.3129510879516602, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 169530 + }, + { + "epoch": 1115.3947368421052, + "grad_norm": 1.3260138034820557, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 169540 + }, + { + "epoch": 1115.4605263157894, + "grad_norm": 1.2280200719833374, + "learning_rate": 0.0001, + "loss": 0.013, + "step": 169550 + }, + { + "epoch": 1115.5263157894738, + "grad_norm": 1.0579249858856201, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 169560 + }, + { + "epoch": 1115.592105263158, + "grad_norm": 1.5779436826705933, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 169570 + }, + { + "epoch": 1115.657894736842, + "grad_norm": 1.2979265451431274, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 169580 + }, + { + "epoch": 1115.7236842105262, + "grad_norm": 1.2990245819091797, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 169590 + }, + { + "epoch": 1115.7894736842106, + "grad_norm": 1.1901843547821045, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 169600 + }, + { + "epoch": 1115.8552631578948, + "grad_norm": 1.2466164827346802, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 169610 + }, + { + "epoch": 1115.921052631579, + "grad_norm": 0.8972548842430115, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 169620 + }, + { + "epoch": 1115.9868421052631, + "grad_norm": 0.7799301743507385, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 169630 + }, + { + "epoch": 1116.0526315789473, + "grad_norm": 0.7389102578163147, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 169640 + }, + { + "epoch": 1116.1184210526317, + "grad_norm": 1.1879955530166626, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 169650 + }, + { + "epoch": 1116.1842105263158, + "grad_norm": 1.0571030378341675, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 169660 + }, + { + "epoch": 1116.25, + "grad_norm": 1.1208561658859253, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 169670 + }, + { + "epoch": 1116.3157894736842, + "grad_norm": 1.211946964263916, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 169680 + }, + { + "epoch": 1116.3815789473683, + "grad_norm": 1.0168685913085938, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 169690 + }, + { + "epoch": 1116.4473684210527, + "grad_norm": 1.4225287437438965, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 169700 + }, + { + "epoch": 1116.5131578947369, + "grad_norm": 0.7239480018615723, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 169710 + }, + { + "epoch": 1116.578947368421, + "grad_norm": 1.2720476388931274, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 169720 + }, + { + "epoch": 1116.6447368421052, + "grad_norm": 1.250244140625, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 169730 + }, + { + "epoch": 1116.7105263157894, + "grad_norm": 1.0670952796936035, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 169740 + }, + { + "epoch": 1116.7763157894738, + "grad_norm": 1.2624378204345703, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 169750 + }, + { + "epoch": 1116.842105263158, + "grad_norm": 1.0680029392242432, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 169760 + }, + { + "epoch": 1116.907894736842, + "grad_norm": 0.9182707071304321, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 169770 + }, + { + "epoch": 1116.9736842105262, + "grad_norm": 1.0356745719909668, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 169780 + }, + { + "epoch": 1117.0394736842106, + "grad_norm": 1.2450051307678223, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 169790 + }, + { + "epoch": 1117.1052631578948, + "grad_norm": 1.1483206748962402, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 169800 + }, + { + "epoch": 1117.171052631579, + "grad_norm": 1.0393849611282349, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 169810 + }, + { + "epoch": 1117.2368421052631, + "grad_norm": 1.1354923248291016, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 169820 + }, + { + "epoch": 1117.3026315789473, + "grad_norm": 1.2320741415023804, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 169830 + }, + { + "epoch": 1117.3684210526317, + "grad_norm": 1.2553433179855347, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 169840 + }, + { + "epoch": 1117.4342105263158, + "grad_norm": 1.142922043800354, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 169850 + }, + { + "epoch": 1117.5, + "grad_norm": 1.2326382398605347, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 169860 + }, + { + "epoch": 1117.5657894736842, + "grad_norm": 1.5461610555648804, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 169870 + }, + { + "epoch": 1117.6315789473683, + "grad_norm": 0.9742788076400757, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 169880 + }, + { + "epoch": 1117.6973684210527, + "grad_norm": 0.898607075214386, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 169890 + }, + { + "epoch": 1117.7631578947369, + "grad_norm": 0.6093039512634277, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 169900 + }, + { + "epoch": 1117.828947368421, + "grad_norm": 1.1345704793930054, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 169910 + }, + { + "epoch": 1117.8947368421052, + "grad_norm": 0.851069450378418, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 169920 + }, + { + "epoch": 1117.9605263157894, + "grad_norm": 0.9209760427474976, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 169930 + }, + { + "epoch": 1118.0263157894738, + "grad_norm": 0.86602783203125, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 169940 + }, + { + "epoch": 1118.092105263158, + "grad_norm": 0.698943555355072, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 169950 + }, + { + "epoch": 1118.157894736842, + "grad_norm": 0.8509097695350647, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 169960 + }, + { + "epoch": 1118.2236842105262, + "grad_norm": 0.893696665763855, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 169970 + }, + { + "epoch": 1118.2894736842106, + "grad_norm": 0.7406723499298096, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 169980 + }, + { + "epoch": 1118.3552631578948, + "grad_norm": 1.147310495376587, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 169990 + }, + { + "epoch": 1118.421052631579, + "grad_norm": 0.8037264347076416, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 170000 + }, + { + "epoch": 1118.4868421052631, + "grad_norm": 0.5971798300743103, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 170010 + }, + { + "epoch": 1118.5526315789473, + "grad_norm": 0.7508800625801086, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 170020 + }, + { + "epoch": 1118.6184210526317, + "grad_norm": 1.1436516046524048, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 170030 + }, + { + "epoch": 1118.6842105263158, + "grad_norm": 1.146233320236206, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 170040 + }, + { + "epoch": 1118.75, + "grad_norm": 1.1808003187179565, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 170050 + }, + { + "epoch": 1118.8157894736842, + "grad_norm": 0.8754172325134277, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 170060 + }, + { + "epoch": 1118.8815789473683, + "grad_norm": 1.1422688961029053, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 170070 + }, + { + "epoch": 1118.9473684210527, + "grad_norm": 0.8434144854545593, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 170080 + }, + { + "epoch": 1119.0131578947369, + "grad_norm": 1.1790432929992676, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 170090 + }, + { + "epoch": 1119.078947368421, + "grad_norm": 0.9969106316566467, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 170100 + }, + { + "epoch": 1119.1447368421052, + "grad_norm": 0.6672012209892273, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 170110 + }, + { + "epoch": 1119.2105263157894, + "grad_norm": 1.309625267982483, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 170120 + }, + { + "epoch": 1119.2763157894738, + "grad_norm": 1.0300084352493286, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 170130 + }, + { + "epoch": 1119.342105263158, + "grad_norm": 0.9487712979316711, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 170140 + }, + { + "epoch": 1119.407894736842, + "grad_norm": 1.0787352323532104, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 170150 + }, + { + "epoch": 1119.4736842105262, + "grad_norm": 1.1711540222167969, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 170160 + }, + { + "epoch": 1119.5394736842106, + "grad_norm": 0.9721617102622986, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 170170 + }, + { + "epoch": 1119.6052631578948, + "grad_norm": 1.088510513305664, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 170180 + }, + { + "epoch": 1119.671052631579, + "grad_norm": 1.2319920063018799, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 170190 + }, + { + "epoch": 1119.7368421052631, + "grad_norm": 0.7855985760688782, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 170200 + }, + { + "epoch": 1119.8026315789473, + "grad_norm": 0.8179022669792175, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 170210 + }, + { + "epoch": 1119.8684210526317, + "grad_norm": 1.2478657960891724, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 170220 + }, + { + "epoch": 1119.9342105263158, + "grad_norm": 0.9301165342330933, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 170230 + }, + { + "epoch": 1120.0, + "grad_norm": 1.096358299255371, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 170240 + }, + { + "epoch": 1120.0657894736842, + "grad_norm": 1.134454369544983, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 170250 + }, + { + "epoch": 1120.1315789473683, + "grad_norm": 1.2220538854599, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 170260 + }, + { + "epoch": 1120.1973684210527, + "grad_norm": 1.1378227472305298, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 170270 + }, + { + "epoch": 1120.2631578947369, + "grad_norm": 0.8785101771354675, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 170280 + }, + { + "epoch": 1120.328947368421, + "grad_norm": 0.8152796626091003, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 170290 + }, + { + "epoch": 1120.3947368421052, + "grad_norm": 0.6250335574150085, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 170300 + }, + { + "epoch": 1120.4605263157894, + "grad_norm": 1.0436937808990479, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 170310 + }, + { + "epoch": 1120.5263157894738, + "grad_norm": 0.9515871405601501, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 170320 + }, + { + "epoch": 1120.592105263158, + "grad_norm": 1.3222743272781372, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 170330 + }, + { + "epoch": 1120.657894736842, + "grad_norm": 0.754511833190918, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 170340 + }, + { + "epoch": 1120.7236842105262, + "grad_norm": 1.0005298852920532, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 170350 + }, + { + "epoch": 1120.7894736842106, + "grad_norm": 1.1745997667312622, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 170360 + }, + { + "epoch": 1120.8552631578948, + "grad_norm": 1.2459282875061035, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 170370 + }, + { + "epoch": 1120.921052631579, + "grad_norm": 1.0530474185943604, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 170380 + }, + { + "epoch": 1120.9868421052631, + "grad_norm": 1.2831578254699707, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 170390 + }, + { + "epoch": 1121.0526315789473, + "grad_norm": 1.1489259004592896, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 170400 + }, + { + "epoch": 1121.1184210526317, + "grad_norm": 0.929141104221344, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 170410 + }, + { + "epoch": 1121.1842105263158, + "grad_norm": 1.1473424434661865, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 170420 + }, + { + "epoch": 1121.25, + "grad_norm": 1.2042150497436523, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 170430 + }, + { + "epoch": 1121.3157894736842, + "grad_norm": 0.9099306464195251, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 170440 + }, + { + "epoch": 1121.3815789473683, + "grad_norm": 1.141692876815796, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 170450 + }, + { + "epoch": 1121.4473684210527, + "grad_norm": 1.392948865890503, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 170460 + }, + { + "epoch": 1121.5131578947369, + "grad_norm": 0.8575266599655151, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 170470 + }, + { + "epoch": 1121.578947368421, + "grad_norm": 0.9454750418663025, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 170480 + }, + { + "epoch": 1121.6447368421052, + "grad_norm": 0.644149899482727, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 170490 + }, + { + "epoch": 1121.7105263157894, + "grad_norm": 1.050371527671814, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 170500 + }, + { + "epoch": 1121.7763157894738, + "grad_norm": 1.0545912981033325, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 170510 + }, + { + "epoch": 1121.842105263158, + "grad_norm": 0.8763865828514099, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 170520 + }, + { + "epoch": 1121.907894736842, + "grad_norm": 1.6924924850463867, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 170530 + }, + { + "epoch": 1121.9736842105262, + "grad_norm": 1.3894912004470825, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 170540 + }, + { + "epoch": 1122.0394736842106, + "grad_norm": 1.1801551580429077, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 170550 + }, + { + "epoch": 1122.1052631578948, + "grad_norm": 1.1211940050125122, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 170560 + }, + { + "epoch": 1122.171052631579, + "grad_norm": 1.0931165218353271, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 170570 + }, + { + "epoch": 1122.2368421052631, + "grad_norm": 1.0842170715332031, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 170580 + }, + { + "epoch": 1122.3026315789473, + "grad_norm": 1.0453462600708008, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 170590 + }, + { + "epoch": 1122.3684210526317, + "grad_norm": 0.7612990736961365, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 170600 + }, + { + "epoch": 1122.4342105263158, + "grad_norm": 1.1230723857879639, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 170610 + }, + { + "epoch": 1122.5, + "grad_norm": 0.7165056467056274, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 170620 + }, + { + "epoch": 1122.5657894736842, + "grad_norm": 1.0598911046981812, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 170630 + }, + { + "epoch": 1122.6315789473683, + "grad_norm": 1.3187886476516724, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 170640 + }, + { + "epoch": 1122.6973684210527, + "grad_norm": 1.140576720237732, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 170650 + }, + { + "epoch": 1122.7631578947369, + "grad_norm": 0.73148512840271, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 170660 + }, + { + "epoch": 1122.828947368421, + "grad_norm": 1.5248452425003052, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 170670 + }, + { + "epoch": 1122.8947368421052, + "grad_norm": 0.9757792949676514, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 170680 + }, + { + "epoch": 1122.9605263157894, + "grad_norm": 0.7404382228851318, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 170690 + }, + { + "epoch": 1123.0263157894738, + "grad_norm": 1.0916646718978882, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 170700 + }, + { + "epoch": 1123.092105263158, + "grad_norm": 1.196270227432251, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 170710 + }, + { + "epoch": 1123.157894736842, + "grad_norm": 1.4013103246688843, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 170720 + }, + { + "epoch": 1123.2236842105262, + "grad_norm": 1.210837960243225, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 170730 + }, + { + "epoch": 1123.2894736842106, + "grad_norm": 1.1967648267745972, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 170740 + }, + { + "epoch": 1123.3552631578948, + "grad_norm": 1.1867121458053589, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 170750 + }, + { + "epoch": 1123.421052631579, + "grad_norm": 0.9490767121315002, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 170760 + }, + { + "epoch": 1123.4868421052631, + "grad_norm": 0.8652997016906738, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 170770 + }, + { + "epoch": 1123.5526315789473, + "grad_norm": 1.082627534866333, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 170780 + }, + { + "epoch": 1123.6184210526317, + "grad_norm": 1.2542318105697632, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 170790 + }, + { + "epoch": 1123.6842105263158, + "grad_norm": 1.3510973453521729, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 170800 + }, + { + "epoch": 1123.75, + "grad_norm": 1.249051570892334, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 170810 + }, + { + "epoch": 1123.8157894736842, + "grad_norm": 1.2348275184631348, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 170820 + }, + { + "epoch": 1123.8815789473683, + "grad_norm": 1.2740799188613892, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 170830 + }, + { + "epoch": 1123.9473684210527, + "grad_norm": 1.133602499961853, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 170840 + }, + { + "epoch": 1124.0131578947369, + "grad_norm": 0.8562770485877991, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 170850 + }, + { + "epoch": 1124.078947368421, + "grad_norm": 1.2140693664550781, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 170860 + }, + { + "epoch": 1124.1447368421052, + "grad_norm": 1.1629786491394043, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 170870 + }, + { + "epoch": 1124.2105263157894, + "grad_norm": 0.9350384473800659, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 170880 + }, + { + "epoch": 1124.2763157894738, + "grad_norm": 0.7206969261169434, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 170890 + }, + { + "epoch": 1124.342105263158, + "grad_norm": 1.0412704944610596, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 170900 + }, + { + "epoch": 1124.407894736842, + "grad_norm": 1.0339332818984985, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 170910 + }, + { + "epoch": 1124.4736842105262, + "grad_norm": 0.9266976714134216, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 170920 + }, + { + "epoch": 1124.5394736842106, + "grad_norm": 1.0310457944869995, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 170930 + }, + { + "epoch": 1124.6052631578948, + "grad_norm": 1.2762702703475952, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 170940 + }, + { + "epoch": 1124.671052631579, + "grad_norm": 1.1744694709777832, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 170950 + }, + { + "epoch": 1124.7368421052631, + "grad_norm": 1.1720898151397705, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 170960 + }, + { + "epoch": 1124.8026315789473, + "grad_norm": 0.9738285541534424, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 170970 + }, + { + "epoch": 1124.8684210526317, + "grad_norm": 0.9768696427345276, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 170980 + }, + { + "epoch": 1124.9342105263158, + "grad_norm": 1.0169459581375122, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 170990 + }, + { + "epoch": 1125.0, + "grad_norm": 0.9950452446937561, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 171000 + }, + { + "epoch": 1125.0657894736842, + "grad_norm": 0.8835775256156921, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 171010 + }, + { + "epoch": 1125.1315789473683, + "grad_norm": 0.8104636073112488, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 171020 + }, + { + "epoch": 1125.1973684210527, + "grad_norm": 0.9511426091194153, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 171030 + }, + { + "epoch": 1125.2631578947369, + "grad_norm": 0.873656153678894, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 171040 + }, + { + "epoch": 1125.328947368421, + "grad_norm": 1.1368869543075562, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 171050 + }, + { + "epoch": 1125.3947368421052, + "grad_norm": 0.568688154220581, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 171060 + }, + { + "epoch": 1125.4605263157894, + "grad_norm": 0.5979208946228027, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 171070 + }, + { + "epoch": 1125.5263157894738, + "grad_norm": 0.9214772582054138, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 171080 + }, + { + "epoch": 1125.592105263158, + "grad_norm": 0.7531283497810364, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 171090 + }, + { + "epoch": 1125.657894736842, + "grad_norm": 1.2580556869506836, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 171100 + }, + { + "epoch": 1125.7236842105262, + "grad_norm": 0.930986225605011, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 171110 + }, + { + "epoch": 1125.7894736842106, + "grad_norm": 1.245627999305725, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 171120 + }, + { + "epoch": 1125.8552631578948, + "grad_norm": 0.7773181796073914, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 171130 + }, + { + "epoch": 1125.921052631579, + "grad_norm": 1.173557996749878, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 171140 + }, + { + "epoch": 1125.9868421052631, + "grad_norm": 0.8408874869346619, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 171150 + }, + { + "epoch": 1126.0526315789473, + "grad_norm": 0.6218984127044678, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 171160 + }, + { + "epoch": 1126.1184210526317, + "grad_norm": 0.9372327923774719, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 171170 + }, + { + "epoch": 1126.1842105263158, + "grad_norm": 0.9183964729309082, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 171180 + }, + { + "epoch": 1126.25, + "grad_norm": 1.0843031406402588, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 171190 + }, + { + "epoch": 1126.3157894736842, + "grad_norm": 1.221227765083313, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 171200 + }, + { + "epoch": 1126.3815789473683, + "grad_norm": 1.0891140699386597, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 171210 + }, + { + "epoch": 1126.4473684210527, + "grad_norm": 1.0184756517410278, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 171220 + }, + { + "epoch": 1126.5131578947369, + "grad_norm": 0.9050408601760864, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 171230 + }, + { + "epoch": 1126.578947368421, + "grad_norm": 0.754024088382721, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 171240 + }, + { + "epoch": 1126.6447368421052, + "grad_norm": 1.1629414558410645, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 171250 + }, + { + "epoch": 1126.7105263157894, + "grad_norm": 1.1155112981796265, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 171260 + }, + { + "epoch": 1126.7763157894738, + "grad_norm": 1.1457107067108154, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 171270 + }, + { + "epoch": 1126.842105263158, + "grad_norm": 1.5924310684204102, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 171280 + }, + { + "epoch": 1126.907894736842, + "grad_norm": 1.267576813697815, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 171290 + }, + { + "epoch": 1126.9736842105262, + "grad_norm": 1.2480442523956299, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 171300 + }, + { + "epoch": 1127.0394736842106, + "grad_norm": 0.923325777053833, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 171310 + }, + { + "epoch": 1127.1052631578948, + "grad_norm": 1.2825509309768677, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 171320 + }, + { + "epoch": 1127.171052631579, + "grad_norm": 1.1516602039337158, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 171330 + }, + { + "epoch": 1127.2368421052631, + "grad_norm": 0.9105165004730225, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 171340 + }, + { + "epoch": 1127.3026315789473, + "grad_norm": 1.116930603981018, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 171350 + }, + { + "epoch": 1127.3684210526317, + "grad_norm": 1.105677604675293, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 171360 + }, + { + "epoch": 1127.4342105263158, + "grad_norm": 1.1379443407058716, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 171370 + }, + { + "epoch": 1127.5, + "grad_norm": 0.8313212394714355, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 171380 + }, + { + "epoch": 1127.5657894736842, + "grad_norm": 0.9692210555076599, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 171390 + }, + { + "epoch": 1127.6315789473683, + "grad_norm": 0.9176498651504517, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 171400 + }, + { + "epoch": 1127.6973684210527, + "grad_norm": 1.2443631887435913, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 171410 + }, + { + "epoch": 1127.7631578947369, + "grad_norm": 1.4867162704467773, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 171420 + }, + { + "epoch": 1127.828947368421, + "grad_norm": 1.2587157487869263, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 171430 + }, + { + "epoch": 1127.8947368421052, + "grad_norm": 1.1008973121643066, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 171440 + }, + { + "epoch": 1127.9605263157894, + "grad_norm": 1.035931944847107, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 171450 + }, + { + "epoch": 1128.0263157894738, + "grad_norm": 0.9197618961334229, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 171460 + }, + { + "epoch": 1128.092105263158, + "grad_norm": 0.9170363545417786, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 171470 + }, + { + "epoch": 1128.157894736842, + "grad_norm": 1.0054898262023926, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 171480 + }, + { + "epoch": 1128.2236842105262, + "grad_norm": 0.6774921417236328, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 171490 + }, + { + "epoch": 1128.2894736842106, + "grad_norm": 1.0647435188293457, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 171500 + }, + { + "epoch": 1128.3552631578948, + "grad_norm": 0.9801605343818665, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 171510 + }, + { + "epoch": 1128.421052631579, + "grad_norm": 0.9069273471832275, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 171520 + }, + { + "epoch": 1128.4868421052631, + "grad_norm": 0.8292950987815857, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 171530 + }, + { + "epoch": 1128.5526315789473, + "grad_norm": 1.0863277912139893, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 171540 + }, + { + "epoch": 1128.6184210526317, + "grad_norm": 1.3724346160888672, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 171550 + }, + { + "epoch": 1128.6842105263158, + "grad_norm": 0.905957043170929, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 171560 + }, + { + "epoch": 1128.75, + "grad_norm": 0.5370518565177917, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 171570 + }, + { + "epoch": 1128.8157894736842, + "grad_norm": 1.248266339302063, + "learning_rate": 0.0001, + "loss": 0.0069, + "step": 171580 + }, + { + "epoch": 1128.8815789473683, + "grad_norm": 1.00150728225708, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 171590 + }, + { + "epoch": 1128.9473684210527, + "grad_norm": 1.0399243831634521, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 171600 + }, + { + "epoch": 1129.0131578947369, + "grad_norm": 1.1588037014007568, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 171610 + }, + { + "epoch": 1129.078947368421, + "grad_norm": 1.0992803573608398, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 171620 + }, + { + "epoch": 1129.1447368421052, + "grad_norm": 1.2114496231079102, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 171630 + }, + { + "epoch": 1129.2105263157894, + "grad_norm": 1.3456045389175415, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 171640 + }, + { + "epoch": 1129.2763157894738, + "grad_norm": 1.063812017440796, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 171650 + }, + { + "epoch": 1129.342105263158, + "grad_norm": 0.7482918500900269, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 171660 + }, + { + "epoch": 1129.407894736842, + "grad_norm": 0.9320112466812134, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 171670 + }, + { + "epoch": 1129.4736842105262, + "grad_norm": 1.2601550817489624, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 171680 + }, + { + "epoch": 1129.5394736842106, + "grad_norm": 0.9702651500701904, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 171690 + }, + { + "epoch": 1129.6052631578948, + "grad_norm": 0.6806167960166931, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 171700 + }, + { + "epoch": 1129.671052631579, + "grad_norm": 0.8016168475151062, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 171710 + }, + { + "epoch": 1129.7368421052631, + "grad_norm": 1.0033786296844482, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 171720 + }, + { + "epoch": 1129.8026315789473, + "grad_norm": 1.152343988418579, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 171730 + }, + { + "epoch": 1129.8684210526317, + "grad_norm": 1.0898712873458862, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 171740 + }, + { + "epoch": 1129.9342105263158, + "grad_norm": 1.0133626461029053, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 171750 + }, + { + "epoch": 1130.0, + "grad_norm": 1.0823217630386353, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 171760 + }, + { + "epoch": 1130.0657894736842, + "grad_norm": 0.853507936000824, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 171770 + }, + { + "epoch": 1130.1315789473683, + "grad_norm": 1.2531830072402954, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 171780 + }, + { + "epoch": 1130.1973684210527, + "grad_norm": 1.178091287612915, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 171790 + }, + { + "epoch": 1130.2631578947369, + "grad_norm": 1.0549789667129517, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 171800 + }, + { + "epoch": 1130.328947368421, + "grad_norm": 1.3365063667297363, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 171810 + }, + { + "epoch": 1130.3947368421052, + "grad_norm": 1.022502064704895, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 171820 + }, + { + "epoch": 1130.4605263157894, + "grad_norm": 0.9836164116859436, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 171830 + }, + { + "epoch": 1130.5263157894738, + "grad_norm": 1.1745048761367798, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 171840 + }, + { + "epoch": 1130.592105263158, + "grad_norm": 1.0340749025344849, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 171850 + }, + { + "epoch": 1130.657894736842, + "grad_norm": 0.8751736283302307, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 171860 + }, + { + "epoch": 1130.7236842105262, + "grad_norm": 1.1623742580413818, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 171870 + }, + { + "epoch": 1130.7894736842106, + "grad_norm": 0.9135785102844238, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 171880 + }, + { + "epoch": 1130.8552631578948, + "grad_norm": 0.9940324425697327, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 171890 + }, + { + "epoch": 1130.921052631579, + "grad_norm": 0.944202721118927, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 171900 + }, + { + "epoch": 1130.9868421052631, + "grad_norm": 1.2017103433609009, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 171910 + }, + { + "epoch": 1131.0526315789473, + "grad_norm": 0.7654322385787964, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 171920 + }, + { + "epoch": 1131.1184210526317, + "grad_norm": 0.7834600210189819, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 171930 + }, + { + "epoch": 1131.1842105263158, + "grad_norm": 0.751923143863678, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 171940 + }, + { + "epoch": 1131.25, + "grad_norm": 0.9888975024223328, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 171950 + }, + { + "epoch": 1131.3157894736842, + "grad_norm": 1.2195971012115479, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 171960 + }, + { + "epoch": 1131.3815789473683, + "grad_norm": 1.110573410987854, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 171970 + }, + { + "epoch": 1131.4473684210527, + "grad_norm": 1.1125825643539429, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 171980 + }, + { + "epoch": 1131.5131578947369, + "grad_norm": 0.6448672413825989, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 171990 + }, + { + "epoch": 1131.578947368421, + "grad_norm": 1.2890968322753906, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 172000 + }, + { + "epoch": 1131.6447368421052, + "grad_norm": 1.0350221395492554, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 172010 + }, + { + "epoch": 1131.7105263157894, + "grad_norm": 0.9488757252693176, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 172020 + }, + { + "epoch": 1131.7763157894738, + "grad_norm": 0.7833364009857178, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 172030 + }, + { + "epoch": 1131.842105263158, + "grad_norm": 1.1103259325027466, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 172040 + }, + { + "epoch": 1131.907894736842, + "grad_norm": 1.4510931968688965, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 172050 + }, + { + "epoch": 1131.9736842105262, + "grad_norm": 1.293509840965271, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 172060 + }, + { + "epoch": 1132.0394736842106, + "grad_norm": 1.0810400247573853, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 172070 + }, + { + "epoch": 1132.1052631578948, + "grad_norm": 1.3170888423919678, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 172080 + }, + { + "epoch": 1132.171052631579, + "grad_norm": 0.8184434771537781, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 172090 + }, + { + "epoch": 1132.2368421052631, + "grad_norm": 1.073915958404541, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 172100 + }, + { + "epoch": 1132.3026315789473, + "grad_norm": 1.019290566444397, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 172110 + }, + { + "epoch": 1132.3684210526317, + "grad_norm": 1.3928847312927246, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 172120 + }, + { + "epoch": 1132.4342105263158, + "grad_norm": 1.158503770828247, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 172130 + }, + { + "epoch": 1132.5, + "grad_norm": 1.2527748346328735, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 172140 + }, + { + "epoch": 1132.5657894736842, + "grad_norm": 1.0773019790649414, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 172150 + }, + { + "epoch": 1132.6315789473683, + "grad_norm": 1.3126847743988037, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 172160 + }, + { + "epoch": 1132.6973684210527, + "grad_norm": 0.9994803071022034, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 172170 + }, + { + "epoch": 1132.7631578947369, + "grad_norm": 1.4703785181045532, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 172180 + }, + { + "epoch": 1132.828947368421, + "grad_norm": 0.6720287799835205, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 172190 + }, + { + "epoch": 1132.8947368421052, + "grad_norm": 0.6046428084373474, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 172200 + }, + { + "epoch": 1132.9605263157894, + "grad_norm": 1.031443476676941, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 172210 + }, + { + "epoch": 1133.0263157894738, + "grad_norm": 1.2193036079406738, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 172220 + }, + { + "epoch": 1133.092105263158, + "grad_norm": 1.1105955839157104, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 172230 + }, + { + "epoch": 1133.157894736842, + "grad_norm": 1.0140831470489502, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 172240 + }, + { + "epoch": 1133.2236842105262, + "grad_norm": 0.8403400778770447, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 172250 + }, + { + "epoch": 1133.2894736842106, + "grad_norm": 1.143967628479004, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 172260 + }, + { + "epoch": 1133.3552631578948, + "grad_norm": 0.6676881909370422, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 172270 + }, + { + "epoch": 1133.421052631579, + "grad_norm": 0.8999795913696289, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 172280 + }, + { + "epoch": 1133.4868421052631, + "grad_norm": 0.9858129024505615, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 172290 + }, + { + "epoch": 1133.5526315789473, + "grad_norm": 0.9326895475387573, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 172300 + }, + { + "epoch": 1133.6184210526317, + "grad_norm": 1.069669246673584, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 172310 + }, + { + "epoch": 1133.6842105263158, + "grad_norm": 0.8025325536727905, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 172320 + }, + { + "epoch": 1133.75, + "grad_norm": 0.5994061231613159, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 172330 + }, + { + "epoch": 1133.8157894736842, + "grad_norm": 0.6835926175117493, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 172340 + }, + { + "epoch": 1133.8815789473683, + "grad_norm": 0.6774199604988098, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 172350 + }, + { + "epoch": 1133.9473684210527, + "grad_norm": 1.1998493671417236, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 172360 + }, + { + "epoch": 1134.0131578947369, + "grad_norm": 1.2091012001037598, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 172370 + }, + { + "epoch": 1134.078947368421, + "grad_norm": 0.9642314910888672, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 172380 + }, + { + "epoch": 1134.1447368421052, + "grad_norm": 0.9943333864212036, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 172390 + }, + { + "epoch": 1134.2105263157894, + "grad_norm": 1.449242115020752, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 172400 + }, + { + "epoch": 1134.2763157894738, + "grad_norm": 1.4561964273452759, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 172410 + }, + { + "epoch": 1134.342105263158, + "grad_norm": 1.191004991531372, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 172420 + }, + { + "epoch": 1134.407894736842, + "grad_norm": 1.171547293663025, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 172430 + }, + { + "epoch": 1134.4736842105262, + "grad_norm": 1.08799409866333, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 172440 + }, + { + "epoch": 1134.5394736842106, + "grad_norm": 1.141973614692688, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 172450 + }, + { + "epoch": 1134.6052631578948, + "grad_norm": 1.2193361520767212, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 172460 + }, + { + "epoch": 1134.671052631579, + "grad_norm": 1.011452078819275, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 172470 + }, + { + "epoch": 1134.7368421052631, + "grad_norm": 1.046140432357788, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 172480 + }, + { + "epoch": 1134.8026315789473, + "grad_norm": 0.9603179693222046, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 172490 + }, + { + "epoch": 1134.8684210526317, + "grad_norm": 1.3100394010543823, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 172500 + }, + { + "epoch": 1134.9342105263158, + "grad_norm": 0.8967265486717224, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 172510 + }, + { + "epoch": 1135.0, + "grad_norm": 0.750058650970459, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 172520 + }, + { + "epoch": 1135.0657894736842, + "grad_norm": 0.9799665808677673, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 172530 + }, + { + "epoch": 1135.1315789473683, + "grad_norm": 0.871820330619812, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 172540 + }, + { + "epoch": 1135.1973684210527, + "grad_norm": 1.1122573614120483, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 172550 + }, + { + "epoch": 1135.2631578947369, + "grad_norm": 0.9300641417503357, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 172560 + }, + { + "epoch": 1135.328947368421, + "grad_norm": 1.2777584791183472, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 172570 + }, + { + "epoch": 1135.3947368421052, + "grad_norm": 0.886059582233429, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 172580 + }, + { + "epoch": 1135.4605263157894, + "grad_norm": 1.118418574333191, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 172590 + }, + { + "epoch": 1135.5263157894738, + "grad_norm": 1.219286561012268, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 172600 + }, + { + "epoch": 1135.592105263158, + "grad_norm": 1.4709956645965576, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 172610 + }, + { + "epoch": 1135.657894736842, + "grad_norm": 1.124354600906372, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 172620 + }, + { + "epoch": 1135.7236842105262, + "grad_norm": 0.929551362991333, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 172630 + }, + { + "epoch": 1135.7894736842106, + "grad_norm": 1.3097996711730957, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 172640 + }, + { + "epoch": 1135.8552631578948, + "grad_norm": 1.0899239778518677, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 172650 + }, + { + "epoch": 1135.921052631579, + "grad_norm": 1.0806041955947876, + "learning_rate": 0.0001, + "loss": 0.0068, + "step": 172660 + }, + { + "epoch": 1135.9868421052631, + "grad_norm": 1.1731756925582886, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 172670 + }, + { + "epoch": 1136.0526315789473, + "grad_norm": 0.7187557816505432, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 172680 + }, + { + "epoch": 1136.1184210526317, + "grad_norm": 1.014794945716858, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 172690 + }, + { + "epoch": 1136.1842105263158, + "grad_norm": 0.9141129851341248, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 172700 + }, + { + "epoch": 1136.25, + "grad_norm": 0.8806213736534119, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 172710 + }, + { + "epoch": 1136.3157894736842, + "grad_norm": 1.0453318357467651, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 172720 + }, + { + "epoch": 1136.3815789473683, + "grad_norm": 1.0713512897491455, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 172730 + }, + { + "epoch": 1136.4473684210527, + "grad_norm": 0.8867670893669128, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 172740 + }, + { + "epoch": 1136.5131578947369, + "grad_norm": 1.3366000652313232, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 172750 + }, + { + "epoch": 1136.578947368421, + "grad_norm": 1.2236058712005615, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 172760 + }, + { + "epoch": 1136.6447368421052, + "grad_norm": 1.2140995264053345, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 172770 + }, + { + "epoch": 1136.7105263157894, + "grad_norm": 1.1668179035186768, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 172780 + }, + { + "epoch": 1136.7763157894738, + "grad_norm": 0.9051017761230469, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 172790 + }, + { + "epoch": 1136.842105263158, + "grad_norm": 1.346951961517334, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 172800 + }, + { + "epoch": 1136.907894736842, + "grad_norm": 0.9546437859535217, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 172810 + }, + { + "epoch": 1136.9736842105262, + "grad_norm": 1.1562156677246094, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 172820 + }, + { + "epoch": 1137.0394736842106, + "grad_norm": 1.1238209009170532, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 172830 + }, + { + "epoch": 1137.1052631578948, + "grad_norm": 1.0962235927581787, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 172840 + }, + { + "epoch": 1137.171052631579, + "grad_norm": 1.1757200956344604, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 172850 + }, + { + "epoch": 1137.2368421052631, + "grad_norm": 0.6541777849197388, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 172860 + }, + { + "epoch": 1137.3026315789473, + "grad_norm": 0.7562914490699768, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 172870 + }, + { + "epoch": 1137.3684210526317, + "grad_norm": 0.8109030723571777, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 172880 + }, + { + "epoch": 1137.4342105263158, + "grad_norm": 0.924420952796936, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 172890 + }, + { + "epoch": 1137.5, + "grad_norm": 0.7896656394004822, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 172900 + }, + { + "epoch": 1137.5657894736842, + "grad_norm": 1.00142502784729, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 172910 + }, + { + "epoch": 1137.6315789473683, + "grad_norm": 0.8568731546401978, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 172920 + }, + { + "epoch": 1137.6973684210527, + "grad_norm": 1.0580912828445435, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 172930 + }, + { + "epoch": 1137.7631578947369, + "grad_norm": 0.8699727058410645, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 172940 + }, + { + "epoch": 1137.828947368421, + "grad_norm": 0.8225347399711609, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 172950 + }, + { + "epoch": 1137.8947368421052, + "grad_norm": 1.0345213413238525, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 172960 + }, + { + "epoch": 1137.9605263157894, + "grad_norm": 1.1980303525924683, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 172970 + }, + { + "epoch": 1138.0263157894738, + "grad_norm": 1.3538132905960083, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 172980 + }, + { + "epoch": 1138.092105263158, + "grad_norm": 1.0862656831741333, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 172990 + }, + { + "epoch": 1138.157894736842, + "grad_norm": 1.2043201923370361, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 173000 + }, + { + "epoch": 1138.2236842105262, + "grad_norm": 1.0616967678070068, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 173010 + }, + { + "epoch": 1138.2894736842106, + "grad_norm": 1.2403620481491089, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 173020 + }, + { + "epoch": 1138.3552631578948, + "grad_norm": 1.2264503240585327, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 173030 + }, + { + "epoch": 1138.421052631579, + "grad_norm": 0.9958550333976746, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 173040 + }, + { + "epoch": 1138.4868421052631, + "grad_norm": 0.9058167934417725, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 173050 + }, + { + "epoch": 1138.5526315789473, + "grad_norm": 0.7711786031723022, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 173060 + }, + { + "epoch": 1138.6184210526317, + "grad_norm": 1.0303704738616943, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 173070 + }, + { + "epoch": 1138.6842105263158, + "grad_norm": 1.2294689416885376, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 173080 + }, + { + "epoch": 1138.75, + "grad_norm": 0.888333797454834, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 173090 + }, + { + "epoch": 1138.8157894736842, + "grad_norm": 0.8880026936531067, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 173100 + }, + { + "epoch": 1138.8815789473683, + "grad_norm": 0.8499730229377747, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 173110 + }, + { + "epoch": 1138.9473684210527, + "grad_norm": 1.0166504383087158, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 173120 + }, + { + "epoch": 1139.0131578947369, + "grad_norm": 1.199263334274292, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 173130 + }, + { + "epoch": 1139.078947368421, + "grad_norm": 0.9333541393280029, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 173140 + }, + { + "epoch": 1139.1447368421052, + "grad_norm": 1.0832608938217163, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 173150 + }, + { + "epoch": 1139.2105263157894, + "grad_norm": 1.0656371116638184, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 173160 + }, + { + "epoch": 1139.2763157894738, + "grad_norm": 0.7567489147186279, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 173170 + }, + { + "epoch": 1139.342105263158, + "grad_norm": 1.146492600440979, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 173180 + }, + { + "epoch": 1139.407894736842, + "grad_norm": 1.0995131731033325, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 173190 + }, + { + "epoch": 1139.4736842105262, + "grad_norm": 1.1685936450958252, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 173200 + }, + { + "epoch": 1139.5394736842106, + "grad_norm": 0.8442726731300354, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 173210 + }, + { + "epoch": 1139.6052631578948, + "grad_norm": 0.9669975638389587, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 173220 + }, + { + "epoch": 1139.671052631579, + "grad_norm": 1.19166100025177, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 173230 + }, + { + "epoch": 1139.7368421052631, + "grad_norm": 0.6347991228103638, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 173240 + }, + { + "epoch": 1139.8026315789473, + "grad_norm": 0.877065122127533, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 173250 + }, + { + "epoch": 1139.8684210526317, + "grad_norm": 0.9608317017555237, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 173260 + }, + { + "epoch": 1139.9342105263158, + "grad_norm": 1.243171215057373, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 173270 + }, + { + "epoch": 1140.0, + "grad_norm": 0.7330761551856995, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 173280 + }, + { + "epoch": 1140.0657894736842, + "grad_norm": 1.0452905893325806, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 173290 + }, + { + "epoch": 1140.1315789473683, + "grad_norm": 1.095406413078308, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 173300 + }, + { + "epoch": 1140.1973684210527, + "grad_norm": 0.8945803642272949, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 173310 + }, + { + "epoch": 1140.2631578947369, + "grad_norm": 1.0646826028823853, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 173320 + }, + { + "epoch": 1140.328947368421, + "grad_norm": 0.9003171324729919, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 173330 + }, + { + "epoch": 1140.3947368421052, + "grad_norm": 1.0605850219726562, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 173340 + }, + { + "epoch": 1140.4605263157894, + "grad_norm": 1.0738884210586548, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 173350 + }, + { + "epoch": 1140.5263157894738, + "grad_norm": 1.067591667175293, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 173360 + }, + { + "epoch": 1140.592105263158, + "grad_norm": 1.4844350814819336, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 173370 + }, + { + "epoch": 1140.657894736842, + "grad_norm": 1.0386178493499756, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 173380 + }, + { + "epoch": 1140.7236842105262, + "grad_norm": 1.306445837020874, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 173390 + }, + { + "epoch": 1140.7894736842106, + "grad_norm": 1.4519286155700684, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 173400 + }, + { + "epoch": 1140.8552631578948, + "grad_norm": 1.0992698669433594, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 173410 + }, + { + "epoch": 1140.921052631579, + "grad_norm": 1.0953896045684814, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 173420 + }, + { + "epoch": 1140.9868421052631, + "grad_norm": 1.086705207824707, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 173430 + }, + { + "epoch": 1141.0526315789473, + "grad_norm": 0.9892106056213379, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 173440 + }, + { + "epoch": 1141.1184210526317, + "grad_norm": 0.8099537491798401, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 173450 + }, + { + "epoch": 1141.1842105263158, + "grad_norm": 1.2578682899475098, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 173460 + }, + { + "epoch": 1141.25, + "grad_norm": 1.0416673421859741, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 173470 + }, + { + "epoch": 1141.3157894736842, + "grad_norm": 0.9099404215812683, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 173480 + }, + { + "epoch": 1141.3815789473683, + "grad_norm": 1.2768899202346802, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 173490 + }, + { + "epoch": 1141.4473684210527, + "grad_norm": 1.1993544101715088, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 173500 + }, + { + "epoch": 1141.5131578947369, + "grad_norm": 1.094346046447754, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 173510 + }, + { + "epoch": 1141.578947368421, + "grad_norm": 0.8274684548377991, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 173520 + }, + { + "epoch": 1141.6447368421052, + "grad_norm": 1.1275930404663086, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 173530 + }, + { + "epoch": 1141.7105263157894, + "grad_norm": 1.065172553062439, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 173540 + }, + { + "epoch": 1141.7763157894738, + "grad_norm": 0.9865522980690002, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 173550 + }, + { + "epoch": 1141.842105263158, + "grad_norm": 0.7311487793922424, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 173560 + }, + { + "epoch": 1141.907894736842, + "grad_norm": 0.9053587317466736, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 173570 + }, + { + "epoch": 1141.9736842105262, + "grad_norm": 1.1042989492416382, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 173580 + }, + { + "epoch": 1142.0394736842106, + "grad_norm": 1.2687427997589111, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 173590 + }, + { + "epoch": 1142.1052631578948, + "grad_norm": 0.8820356726646423, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 173600 + }, + { + "epoch": 1142.171052631579, + "grad_norm": 1.3813494443893433, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 173610 + }, + { + "epoch": 1142.2368421052631, + "grad_norm": 0.895897626876831, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 173620 + }, + { + "epoch": 1142.3026315789473, + "grad_norm": 1.020932674407959, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 173630 + }, + { + "epoch": 1142.3684210526317, + "grad_norm": 1.1267672777175903, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 173640 + }, + { + "epoch": 1142.4342105263158, + "grad_norm": 0.8028258085250854, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 173650 + }, + { + "epoch": 1142.5, + "grad_norm": 1.3700445890426636, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 173660 + }, + { + "epoch": 1142.5657894736842, + "grad_norm": 0.9799003005027771, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 173670 + }, + { + "epoch": 1142.6315789473683, + "grad_norm": 0.9328917264938354, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 173680 + }, + { + "epoch": 1142.6973684210527, + "grad_norm": 0.8575257658958435, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 173690 + }, + { + "epoch": 1142.7631578947369, + "grad_norm": 0.9841787219047546, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 173700 + }, + { + "epoch": 1142.828947368421, + "grad_norm": 1.0814399719238281, + "learning_rate": 0.0001, + "loss": 0.014, + "step": 173710 + }, + { + "epoch": 1142.8947368421052, + "grad_norm": 1.0300018787384033, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 173720 + }, + { + "epoch": 1142.9605263157894, + "grad_norm": 0.9641240239143372, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 173730 + }, + { + "epoch": 1143.0263157894738, + "grad_norm": 1.5655041933059692, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 173740 + }, + { + "epoch": 1143.092105263158, + "grad_norm": 1.1116403341293335, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 173750 + }, + { + "epoch": 1143.157894736842, + "grad_norm": 0.8238686919212341, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 173760 + }, + { + "epoch": 1143.2236842105262, + "grad_norm": 1.036594033241272, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 173770 + }, + { + "epoch": 1143.2894736842106, + "grad_norm": 1.2605559825897217, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 173780 + }, + { + "epoch": 1143.3552631578948, + "grad_norm": 0.9970645904541016, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 173790 + }, + { + "epoch": 1143.421052631579, + "grad_norm": 0.645720362663269, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 173800 + }, + { + "epoch": 1143.4868421052631, + "grad_norm": 1.0435887575149536, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 173810 + }, + { + "epoch": 1143.5526315789473, + "grad_norm": 1.0935068130493164, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 173820 + }, + { + "epoch": 1143.6184210526317, + "grad_norm": 0.9884122610092163, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 173830 + }, + { + "epoch": 1143.6842105263158, + "grad_norm": 1.100342035293579, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 173840 + }, + { + "epoch": 1143.75, + "grad_norm": 0.95659339427948, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 173850 + }, + { + "epoch": 1143.8157894736842, + "grad_norm": 0.9715131521224976, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 173860 + }, + { + "epoch": 1143.8815789473683, + "grad_norm": 0.7474709153175354, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 173870 + }, + { + "epoch": 1143.9473684210527, + "grad_norm": 0.9470142126083374, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 173880 + }, + { + "epoch": 1144.0131578947369, + "grad_norm": 0.9878813624382019, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 173890 + }, + { + "epoch": 1144.078947368421, + "grad_norm": 0.6697216629981995, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 173900 + }, + { + "epoch": 1144.1447368421052, + "grad_norm": 1.1067986488342285, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 173910 + }, + { + "epoch": 1144.2105263157894, + "grad_norm": 1.1652238368988037, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 173920 + }, + { + "epoch": 1144.2763157894738, + "grad_norm": 1.5187675952911377, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 173930 + }, + { + "epoch": 1144.342105263158, + "grad_norm": 0.8939927220344543, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 173940 + }, + { + "epoch": 1144.407894736842, + "grad_norm": 1.0155541896820068, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 173950 + }, + { + "epoch": 1144.4736842105262, + "grad_norm": 1.0913894176483154, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 173960 + }, + { + "epoch": 1144.5394736842106, + "grad_norm": 0.9633883237838745, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 173970 + }, + { + "epoch": 1144.6052631578948, + "grad_norm": 0.845365047454834, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 173980 + }, + { + "epoch": 1144.671052631579, + "grad_norm": 0.9477745890617371, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 173990 + }, + { + "epoch": 1144.7368421052631, + "grad_norm": 1.1216838359832764, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 174000 + }, + { + "epoch": 1144.8026315789473, + "grad_norm": 1.5633032321929932, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 174010 + }, + { + "epoch": 1144.8684210526317, + "grad_norm": 1.2574188709259033, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 174020 + }, + { + "epoch": 1144.9342105263158, + "grad_norm": 1.3190844058990479, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 174030 + }, + { + "epoch": 1145.0, + "grad_norm": 1.2496864795684814, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 174040 + }, + { + "epoch": 1145.0657894736842, + "grad_norm": 1.188367486000061, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 174050 + }, + { + "epoch": 1145.1315789473683, + "grad_norm": 1.2431753873825073, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 174060 + }, + { + "epoch": 1145.1973684210527, + "grad_norm": 1.5690101385116577, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 174070 + }, + { + "epoch": 1145.2631578947369, + "grad_norm": 1.4982401132583618, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 174080 + }, + { + "epoch": 1145.328947368421, + "grad_norm": 1.3239631652832031, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 174090 + }, + { + "epoch": 1145.3947368421052, + "grad_norm": 1.079105257987976, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 174100 + }, + { + "epoch": 1145.4605263157894, + "grad_norm": 1.0493974685668945, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 174110 + }, + { + "epoch": 1145.5263157894738, + "grad_norm": 1.0567365884780884, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 174120 + }, + { + "epoch": 1145.592105263158, + "grad_norm": 1.0888392925262451, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 174130 + }, + { + "epoch": 1145.657894736842, + "grad_norm": 1.156753420829773, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 174140 + }, + { + "epoch": 1145.7236842105262, + "grad_norm": 0.7009889483451843, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 174150 + }, + { + "epoch": 1145.7894736842106, + "grad_norm": 1.242331862449646, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 174160 + }, + { + "epoch": 1145.8552631578948, + "grad_norm": 1.1424720287322998, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 174170 + }, + { + "epoch": 1145.921052631579, + "grad_norm": 1.1428437232971191, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 174180 + }, + { + "epoch": 1145.9868421052631, + "grad_norm": 1.0217548608779907, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 174190 + }, + { + "epoch": 1146.0526315789473, + "grad_norm": 0.9081407189369202, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 174200 + }, + { + "epoch": 1146.1184210526317, + "grad_norm": 1.0314104557037354, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 174210 + }, + { + "epoch": 1146.1842105263158, + "grad_norm": 0.9840436577796936, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 174220 + }, + { + "epoch": 1146.25, + "grad_norm": 1.1434142589569092, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 174230 + }, + { + "epoch": 1146.3157894736842, + "grad_norm": 0.9102608561515808, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 174240 + }, + { + "epoch": 1146.3815789473683, + "grad_norm": 1.0038232803344727, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 174250 + }, + { + "epoch": 1146.4473684210527, + "grad_norm": 0.7302023768424988, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 174260 + }, + { + "epoch": 1146.5131578947369, + "grad_norm": 1.0826613903045654, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 174270 + }, + { + "epoch": 1146.578947368421, + "grad_norm": 1.095320224761963, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 174280 + }, + { + "epoch": 1146.6447368421052, + "grad_norm": 1.0644302368164062, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 174290 + }, + { + "epoch": 1146.7105263157894, + "grad_norm": 1.1615333557128906, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 174300 + }, + { + "epoch": 1146.7763157894738, + "grad_norm": 1.3274446725845337, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 174310 + }, + { + "epoch": 1146.842105263158, + "grad_norm": 1.1300023794174194, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 174320 + }, + { + "epoch": 1146.907894736842, + "grad_norm": 1.0761096477508545, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 174330 + }, + { + "epoch": 1146.9736842105262, + "grad_norm": 0.9165819883346558, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 174340 + }, + { + "epoch": 1147.0394736842106, + "grad_norm": 0.8142982721328735, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 174350 + }, + { + "epoch": 1147.1052631578948, + "grad_norm": 0.7436504364013672, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 174360 + }, + { + "epoch": 1147.171052631579, + "grad_norm": 0.9768326878547668, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 174370 + }, + { + "epoch": 1147.2368421052631, + "grad_norm": 1.2096675634384155, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 174380 + }, + { + "epoch": 1147.3026315789473, + "grad_norm": 1.1367160081863403, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 174390 + }, + { + "epoch": 1147.3684210526317, + "grad_norm": 0.680374801158905, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 174400 + }, + { + "epoch": 1147.4342105263158, + "grad_norm": 0.9637120962142944, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 174410 + }, + { + "epoch": 1147.5, + "grad_norm": 0.9413122534751892, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 174420 + }, + { + "epoch": 1147.5657894736842, + "grad_norm": 1.165696620941162, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 174430 + }, + { + "epoch": 1147.6315789473683, + "grad_norm": 1.0719096660614014, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 174440 + }, + { + "epoch": 1147.6973684210527, + "grad_norm": 0.9276707768440247, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 174450 + }, + { + "epoch": 1147.7631578947369, + "grad_norm": 0.7522660493850708, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 174460 + }, + { + "epoch": 1147.828947368421, + "grad_norm": 0.9146892428398132, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 174470 + }, + { + "epoch": 1147.8947368421052, + "grad_norm": 1.5214803218841553, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 174480 + }, + { + "epoch": 1147.9605263157894, + "grad_norm": 1.152605652809143, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 174490 + }, + { + "epoch": 1148.0263157894738, + "grad_norm": 1.000752568244934, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 174500 + }, + { + "epoch": 1148.092105263158, + "grad_norm": 1.0302700996398926, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 174510 + }, + { + "epoch": 1148.157894736842, + "grad_norm": 1.0237771272659302, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 174520 + }, + { + "epoch": 1148.2236842105262, + "grad_norm": 1.1548134088516235, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 174530 + }, + { + "epoch": 1148.2894736842106, + "grad_norm": 1.4400047063827515, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 174540 + }, + { + "epoch": 1148.3552631578948, + "grad_norm": 0.9746676087379456, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 174550 + }, + { + "epoch": 1148.421052631579, + "grad_norm": 0.8200980424880981, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 174560 + }, + { + "epoch": 1148.4868421052631, + "grad_norm": 1.2456996440887451, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 174570 + }, + { + "epoch": 1148.5526315789473, + "grad_norm": 1.0017765760421753, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 174580 + }, + { + "epoch": 1148.6184210526317, + "grad_norm": 0.7824232578277588, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 174590 + }, + { + "epoch": 1148.6842105263158, + "grad_norm": 1.0675746202468872, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 174600 + }, + { + "epoch": 1148.75, + "grad_norm": 0.8915340304374695, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 174610 + }, + { + "epoch": 1148.8157894736842, + "grad_norm": 1.1041761636734009, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 174620 + }, + { + "epoch": 1148.8815789473683, + "grad_norm": 0.9389930367469788, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 174630 + }, + { + "epoch": 1148.9473684210527, + "grad_norm": 0.7159057259559631, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 174640 + }, + { + "epoch": 1149.0131578947369, + "grad_norm": 0.8780013918876648, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 174650 + }, + { + "epoch": 1149.078947368421, + "grad_norm": 0.7802185416221619, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 174660 + }, + { + "epoch": 1149.1447368421052, + "grad_norm": 1.1969313621520996, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 174670 + }, + { + "epoch": 1149.2105263157894, + "grad_norm": 1.0634304285049438, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 174680 + }, + { + "epoch": 1149.2763157894738, + "grad_norm": 0.9525381326675415, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 174690 + }, + { + "epoch": 1149.342105263158, + "grad_norm": 0.895878791809082, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 174700 + }, + { + "epoch": 1149.407894736842, + "grad_norm": 0.641148030757904, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 174710 + }, + { + "epoch": 1149.4736842105262, + "grad_norm": 1.0341429710388184, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 174720 + }, + { + "epoch": 1149.5394736842106, + "grad_norm": 0.9922910928726196, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 174730 + }, + { + "epoch": 1149.6052631578948, + "grad_norm": 1.1714662313461304, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 174740 + }, + { + "epoch": 1149.671052631579, + "grad_norm": 2.198077440261841, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 174750 + }, + { + "epoch": 1149.7368421052631, + "grad_norm": 1.9137755632400513, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 174760 + }, + { + "epoch": 1149.8026315789473, + "grad_norm": 1.6982780694961548, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 174770 + }, + { + "epoch": 1149.8684210526317, + "grad_norm": 1.3892414569854736, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 174780 + }, + { + "epoch": 1149.9342105263158, + "grad_norm": 1.2968597412109375, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 174790 + }, + { + "epoch": 1150.0, + "grad_norm": 1.1849431991577148, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 174800 + }, + { + "epoch": 1150.0657894736842, + "grad_norm": 1.133933663368225, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 174810 + }, + { + "epoch": 1150.1315789473683, + "grad_norm": 1.385817050933838, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 174820 + }, + { + "epoch": 1150.1973684210527, + "grad_norm": 1.2556873559951782, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 174830 + }, + { + "epoch": 1150.2631578947369, + "grad_norm": 1.142346978187561, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 174840 + }, + { + "epoch": 1150.328947368421, + "grad_norm": 1.2064703702926636, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 174850 + }, + { + "epoch": 1150.3947368421052, + "grad_norm": 1.0505454540252686, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 174860 + }, + { + "epoch": 1150.4605263157894, + "grad_norm": 1.084917664527893, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 174870 + }, + { + "epoch": 1150.5263157894738, + "grad_norm": 1.289945125579834, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 174880 + }, + { + "epoch": 1150.592105263158, + "grad_norm": 0.8230373859405518, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 174890 + }, + { + "epoch": 1150.657894736842, + "grad_norm": 1.1008986234664917, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 174900 + }, + { + "epoch": 1150.7236842105262, + "grad_norm": 1.0355833768844604, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 174910 + }, + { + "epoch": 1150.7894736842106, + "grad_norm": 1.206417441368103, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 174920 + }, + { + "epoch": 1150.8552631578948, + "grad_norm": 1.393118977546692, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 174930 + }, + { + "epoch": 1150.921052631579, + "grad_norm": 1.0129419565200806, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 174940 + }, + { + "epoch": 1150.9868421052631, + "grad_norm": 0.9136284589767456, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 174950 + }, + { + "epoch": 1151.0526315789473, + "grad_norm": 0.9406630992889404, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 174960 + }, + { + "epoch": 1151.1184210526317, + "grad_norm": 1.0768885612487793, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 174970 + }, + { + "epoch": 1151.1842105263158, + "grad_norm": 0.9179930686950684, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 174980 + }, + { + "epoch": 1151.25, + "grad_norm": 0.6786910891532898, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 174990 + }, + { + "epoch": 1151.3157894736842, + "grad_norm": 1.1306889057159424, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 175000 + }, + { + "epoch": 1151.3815789473683, + "grad_norm": 1.0117748975753784, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 175010 + }, + { + "epoch": 1151.4473684210527, + "grad_norm": 0.661830484867096, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 175020 + }, + { + "epoch": 1151.5131578947369, + "grad_norm": 1.0096147060394287, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 175030 + }, + { + "epoch": 1151.578947368421, + "grad_norm": 1.1573773622512817, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 175040 + }, + { + "epoch": 1151.6447368421052, + "grad_norm": 1.0948268175125122, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 175050 + }, + { + "epoch": 1151.7105263157894, + "grad_norm": 0.9008634090423584, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 175060 + }, + { + "epoch": 1151.7763157894738, + "grad_norm": 0.8874291181564331, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 175070 + }, + { + "epoch": 1151.842105263158, + "grad_norm": 1.33906090259552, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 175080 + }, + { + "epoch": 1151.907894736842, + "grad_norm": 1.3272651433944702, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 175090 + }, + { + "epoch": 1151.9736842105262, + "grad_norm": 1.1627230644226074, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 175100 + }, + { + "epoch": 1152.0394736842106, + "grad_norm": 0.9578849077224731, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 175110 + }, + { + "epoch": 1152.1052631578948, + "grad_norm": 1.016489863395691, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 175120 + }, + { + "epoch": 1152.171052631579, + "grad_norm": 0.9531344771385193, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 175130 + }, + { + "epoch": 1152.2368421052631, + "grad_norm": 0.9982447028160095, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 175140 + }, + { + "epoch": 1152.3026315789473, + "grad_norm": 1.3460252285003662, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 175150 + }, + { + "epoch": 1152.3684210526317, + "grad_norm": 1.0662846565246582, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 175160 + }, + { + "epoch": 1152.4342105263158, + "grad_norm": 0.7010901570320129, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 175170 + }, + { + "epoch": 1152.5, + "grad_norm": 0.8454627990722656, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 175180 + }, + { + "epoch": 1152.5657894736842, + "grad_norm": 0.858896017074585, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 175190 + }, + { + "epoch": 1152.6315789473683, + "grad_norm": 1.0066238641738892, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 175200 + }, + { + "epoch": 1152.6973684210527, + "grad_norm": 1.1515685319900513, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 175210 + }, + { + "epoch": 1152.7631578947369, + "grad_norm": 0.8878205418586731, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 175220 + }, + { + "epoch": 1152.828947368421, + "grad_norm": 1.0133531093597412, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 175230 + }, + { + "epoch": 1152.8947368421052, + "grad_norm": 1.2004742622375488, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 175240 + }, + { + "epoch": 1152.9605263157894, + "grad_norm": 0.8571202754974365, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 175250 + }, + { + "epoch": 1153.0263157894738, + "grad_norm": 1.2629226446151733, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 175260 + }, + { + "epoch": 1153.092105263158, + "grad_norm": 1.3697537183761597, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 175270 + }, + { + "epoch": 1153.157894736842, + "grad_norm": 1.2100975513458252, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 175280 + }, + { + "epoch": 1153.2236842105262, + "grad_norm": 1.1854854822158813, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 175290 + }, + { + "epoch": 1153.2894736842106, + "grad_norm": 0.7720757126808167, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 175300 + }, + { + "epoch": 1153.3552631578948, + "grad_norm": 1.1668815612792969, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 175310 + }, + { + "epoch": 1153.421052631579, + "grad_norm": 1.0737518072128296, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 175320 + }, + { + "epoch": 1153.4868421052631, + "grad_norm": 1.1186960935592651, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 175330 + }, + { + "epoch": 1153.5526315789473, + "grad_norm": 0.9653819799423218, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 175340 + }, + { + "epoch": 1153.6184210526317, + "grad_norm": 0.9481167793273926, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 175350 + }, + { + "epoch": 1153.6842105263158, + "grad_norm": 1.0212024450302124, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 175360 + }, + { + "epoch": 1153.75, + "grad_norm": 1.3355201482772827, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 175370 + }, + { + "epoch": 1153.8157894736842, + "grad_norm": 0.727803111076355, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 175380 + }, + { + "epoch": 1153.8815789473683, + "grad_norm": 0.894546627998352, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 175390 + }, + { + "epoch": 1153.9473684210527, + "grad_norm": 1.0446175336837769, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 175400 + }, + { + "epoch": 1154.0131578947369, + "grad_norm": 1.3837840557098389, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 175410 + }, + { + "epoch": 1154.078947368421, + "grad_norm": 1.131395936012268, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 175420 + }, + { + "epoch": 1154.1447368421052, + "grad_norm": 1.1270420551300049, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 175430 + }, + { + "epoch": 1154.2105263157894, + "grad_norm": 0.8780666589736938, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 175440 + }, + { + "epoch": 1154.2763157894738, + "grad_norm": 1.054305911064148, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 175450 + }, + { + "epoch": 1154.342105263158, + "grad_norm": 0.7585221529006958, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 175460 + }, + { + "epoch": 1154.407894736842, + "grad_norm": 0.8007470965385437, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 175470 + }, + { + "epoch": 1154.4736842105262, + "grad_norm": 1.0967533588409424, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 175480 + }, + { + "epoch": 1154.5394736842106, + "grad_norm": 1.0877718925476074, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 175490 + }, + { + "epoch": 1154.6052631578948, + "grad_norm": 0.965691864490509, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 175500 + }, + { + "epoch": 1154.671052631579, + "grad_norm": 0.9504064917564392, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 175510 + }, + { + "epoch": 1154.7368421052631, + "grad_norm": 1.0463320016860962, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 175520 + }, + { + "epoch": 1154.8026315789473, + "grad_norm": 1.1356477737426758, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 175530 + }, + { + "epoch": 1154.8684210526317, + "grad_norm": 0.937968909740448, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 175540 + }, + { + "epoch": 1154.9342105263158, + "grad_norm": 1.2012879848480225, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 175550 + }, + { + "epoch": 1155.0, + "grad_norm": 1.171942949295044, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 175560 + }, + { + "epoch": 1155.0657894736842, + "grad_norm": 0.92462557554245, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 175570 + }, + { + "epoch": 1155.1315789473683, + "grad_norm": 1.2336483001708984, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 175580 + }, + { + "epoch": 1155.1973684210527, + "grad_norm": 1.019647240638733, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 175590 + }, + { + "epoch": 1155.2631578947369, + "grad_norm": 1.0296791791915894, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 175600 + }, + { + "epoch": 1155.328947368421, + "grad_norm": 1.081726312637329, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 175610 + }, + { + "epoch": 1155.3947368421052, + "grad_norm": 0.6053563356399536, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 175620 + }, + { + "epoch": 1155.4605263157894, + "grad_norm": 0.908021867275238, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 175630 + }, + { + "epoch": 1155.5263157894738, + "grad_norm": 0.8451210856437683, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 175640 + }, + { + "epoch": 1155.592105263158, + "grad_norm": 1.068265676498413, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 175650 + }, + { + "epoch": 1155.657894736842, + "grad_norm": 1.213500738143921, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 175660 + }, + { + "epoch": 1155.7236842105262, + "grad_norm": 0.8982007503509521, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 175670 + }, + { + "epoch": 1155.7894736842106, + "grad_norm": 1.0422526597976685, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 175680 + }, + { + "epoch": 1155.8552631578948, + "grad_norm": 0.9385272264480591, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 175690 + }, + { + "epoch": 1155.921052631579, + "grad_norm": 0.9605172872543335, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 175700 + }, + { + "epoch": 1155.9868421052631, + "grad_norm": 1.1489660739898682, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 175710 + }, + { + "epoch": 1156.0526315789473, + "grad_norm": 1.1852326393127441, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 175720 + }, + { + "epoch": 1156.1184210526317, + "grad_norm": 1.5356533527374268, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 175730 + }, + { + "epoch": 1156.1842105263158, + "grad_norm": 0.974061131477356, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 175740 + }, + { + "epoch": 1156.25, + "grad_norm": 1.0487468242645264, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 175750 + }, + { + "epoch": 1156.3157894736842, + "grad_norm": 1.5691155195236206, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 175760 + }, + { + "epoch": 1156.3815789473683, + "grad_norm": 1.4372376203536987, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 175770 + }, + { + "epoch": 1156.4473684210527, + "grad_norm": 1.508100152015686, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 175780 + }, + { + "epoch": 1156.5131578947369, + "grad_norm": 1.0872918367385864, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 175790 + }, + { + "epoch": 1156.578947368421, + "grad_norm": 1.0140292644500732, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 175800 + }, + { + "epoch": 1156.6447368421052, + "grad_norm": 0.9516561031341553, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 175810 + }, + { + "epoch": 1156.7105263157894, + "grad_norm": 1.1463650465011597, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 175820 + }, + { + "epoch": 1156.7763157894738, + "grad_norm": 1.2068666219711304, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 175830 + }, + { + "epoch": 1156.842105263158, + "grad_norm": 0.8994778394699097, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 175840 + }, + { + "epoch": 1156.907894736842, + "grad_norm": 1.115564227104187, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 175850 + }, + { + "epoch": 1156.9736842105262, + "grad_norm": 1.5093863010406494, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 175860 + }, + { + "epoch": 1157.0394736842106, + "grad_norm": 1.3060439825057983, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 175870 + }, + { + "epoch": 1157.1052631578948, + "grad_norm": 1.0380960702896118, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 175880 + }, + { + "epoch": 1157.171052631579, + "grad_norm": 1.381862998008728, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 175890 + }, + { + "epoch": 1157.2368421052631, + "grad_norm": 0.9802920818328857, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 175900 + }, + { + "epoch": 1157.3026315789473, + "grad_norm": 1.1407344341278076, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 175910 + }, + { + "epoch": 1157.3684210526317, + "grad_norm": 1.3060024976730347, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 175920 + }, + { + "epoch": 1157.4342105263158, + "grad_norm": 1.3562568426132202, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 175930 + }, + { + "epoch": 1157.5, + "grad_norm": 0.958991527557373, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 175940 + }, + { + "epoch": 1157.5657894736842, + "grad_norm": 1.1298139095306396, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 175950 + }, + { + "epoch": 1157.6315789473683, + "grad_norm": 1.4933613538742065, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 175960 + }, + { + "epoch": 1157.6973684210527, + "grad_norm": 1.4154495000839233, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 175970 + }, + { + "epoch": 1157.7631578947369, + "grad_norm": 1.487290382385254, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 175980 + }, + { + "epoch": 1157.828947368421, + "grad_norm": 1.0696330070495605, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 175990 + }, + { + "epoch": 1157.8947368421052, + "grad_norm": 1.3961727619171143, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 176000 + }, + { + "epoch": 1157.9605263157894, + "grad_norm": 0.8423775434494019, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 176010 + }, + { + "epoch": 1158.0263157894738, + "grad_norm": 1.1189703941345215, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 176020 + }, + { + "epoch": 1158.092105263158, + "grad_norm": 1.04086434841156, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 176030 + }, + { + "epoch": 1158.157894736842, + "grad_norm": 0.8030532002449036, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 176040 + }, + { + "epoch": 1158.2236842105262, + "grad_norm": 1.1623908281326294, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 176050 + }, + { + "epoch": 1158.2894736842106, + "grad_norm": 0.7452375292778015, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 176060 + }, + { + "epoch": 1158.3552631578948, + "grad_norm": 0.625102162361145, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 176070 + }, + { + "epoch": 1158.421052631579, + "grad_norm": 0.6674894690513611, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 176080 + }, + { + "epoch": 1158.4868421052631, + "grad_norm": 0.7138943076133728, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 176090 + }, + { + "epoch": 1158.5526315789473, + "grad_norm": 0.9019205570220947, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 176100 + }, + { + "epoch": 1158.6184210526317, + "grad_norm": 1.1200653314590454, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 176110 + }, + { + "epoch": 1158.6842105263158, + "grad_norm": 1.2586793899536133, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 176120 + }, + { + "epoch": 1158.75, + "grad_norm": 0.7260456085205078, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 176130 + }, + { + "epoch": 1158.8157894736842, + "grad_norm": 1.0455162525177002, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 176140 + }, + { + "epoch": 1158.8815789473683, + "grad_norm": 1.5511831045150757, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 176150 + }, + { + "epoch": 1158.9473684210527, + "grad_norm": 0.8422067165374756, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 176160 + }, + { + "epoch": 1159.0131578947369, + "grad_norm": 1.0608569383621216, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 176170 + }, + { + "epoch": 1159.078947368421, + "grad_norm": 0.9599637985229492, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 176180 + }, + { + "epoch": 1159.1447368421052, + "grad_norm": 1.0301839113235474, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 176190 + }, + { + "epoch": 1159.2105263157894, + "grad_norm": 1.3413852453231812, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 176200 + }, + { + "epoch": 1159.2763157894738, + "grad_norm": 1.2173924446105957, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 176210 + }, + { + "epoch": 1159.342105263158, + "grad_norm": 1.487339735031128, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 176220 + }, + { + "epoch": 1159.407894736842, + "grad_norm": 1.342084527015686, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 176230 + }, + { + "epoch": 1159.4736842105262, + "grad_norm": 1.627140998840332, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 176240 + }, + { + "epoch": 1159.5394736842106, + "grad_norm": 1.2196797132492065, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 176250 + }, + { + "epoch": 1159.6052631578948, + "grad_norm": 1.0979526042938232, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 176260 + }, + { + "epoch": 1159.671052631579, + "grad_norm": 1.0794157981872559, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 176270 + }, + { + "epoch": 1159.7368421052631, + "grad_norm": 1.344915747642517, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 176280 + }, + { + "epoch": 1159.8026315789473, + "grad_norm": 0.9110754728317261, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 176290 + }, + { + "epoch": 1159.8684210526317, + "grad_norm": 0.9595354795455933, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 176300 + }, + { + "epoch": 1159.9342105263158, + "grad_norm": 1.3430472612380981, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 176310 + }, + { + "epoch": 1160.0, + "grad_norm": 0.8704079985618591, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 176320 + }, + { + "epoch": 1160.0657894736842, + "grad_norm": 1.069422721862793, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 176330 + }, + { + "epoch": 1160.1315789473683, + "grad_norm": 1.1828012466430664, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 176340 + }, + { + "epoch": 1160.1973684210527, + "grad_norm": 1.0887290239334106, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 176350 + }, + { + "epoch": 1160.2631578947369, + "grad_norm": 0.9942824244499207, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 176360 + }, + { + "epoch": 1160.328947368421, + "grad_norm": 0.9858255982398987, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 176370 + }, + { + "epoch": 1160.3947368421052, + "grad_norm": 0.7866831421852112, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 176380 + }, + { + "epoch": 1160.4605263157894, + "grad_norm": 0.8747726082801819, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 176390 + }, + { + "epoch": 1160.5263157894738, + "grad_norm": 1.004457712173462, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 176400 + }, + { + "epoch": 1160.592105263158, + "grad_norm": 0.9716295599937439, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 176410 + }, + { + "epoch": 1160.657894736842, + "grad_norm": 1.2135223150253296, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 176420 + }, + { + "epoch": 1160.7236842105262, + "grad_norm": 1.085508942604065, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 176430 + }, + { + "epoch": 1160.7894736842106, + "grad_norm": 1.4997029304504395, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 176440 + }, + { + "epoch": 1160.8552631578948, + "grad_norm": 1.238146185874939, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 176450 + }, + { + "epoch": 1160.921052631579, + "grad_norm": 0.9297826290130615, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 176460 + }, + { + "epoch": 1160.9868421052631, + "grad_norm": 0.8368382453918457, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 176470 + }, + { + "epoch": 1161.0526315789473, + "grad_norm": 0.933539092540741, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 176480 + }, + { + "epoch": 1161.1184210526317, + "grad_norm": 1.1759393215179443, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 176490 + }, + { + "epoch": 1161.1842105263158, + "grad_norm": 1.2678519487380981, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 176500 + }, + { + "epoch": 1161.25, + "grad_norm": 1.227339506149292, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 176510 + }, + { + "epoch": 1161.3157894736842, + "grad_norm": 1.1603562831878662, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 176520 + }, + { + "epoch": 1161.3815789473683, + "grad_norm": 1.2368062734603882, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 176530 + }, + { + "epoch": 1161.4473684210527, + "grad_norm": 1.3081183433532715, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 176540 + }, + { + "epoch": 1161.5131578947369, + "grad_norm": 1.4025623798370361, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 176550 + }, + { + "epoch": 1161.578947368421, + "grad_norm": 1.1546919345855713, + "learning_rate": 0.0001, + "loss": 0.0068, + "step": 176560 + }, + { + "epoch": 1161.6447368421052, + "grad_norm": 0.9056599736213684, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 176570 + }, + { + "epoch": 1161.7105263157894, + "grad_norm": 1.22064208984375, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 176580 + }, + { + "epoch": 1161.7763157894738, + "grad_norm": 0.9950551390647888, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 176590 + }, + { + "epoch": 1161.842105263158, + "grad_norm": 1.0174490213394165, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 176600 + }, + { + "epoch": 1161.907894736842, + "grad_norm": 1.21022629737854, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 176610 + }, + { + "epoch": 1161.9736842105262, + "grad_norm": 0.6943854093551636, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 176620 + }, + { + "epoch": 1162.0394736842106, + "grad_norm": 1.3050177097320557, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 176630 + }, + { + "epoch": 1162.1052631578948, + "grad_norm": 1.040520429611206, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 176640 + }, + { + "epoch": 1162.171052631579, + "grad_norm": 1.3651999235153198, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 176650 + }, + { + "epoch": 1162.2368421052631, + "grad_norm": 1.1482195854187012, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 176660 + }, + { + "epoch": 1162.3026315789473, + "grad_norm": 1.137037754058838, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 176670 + }, + { + "epoch": 1162.3684210526317, + "grad_norm": 1.381179690361023, + "learning_rate": 0.0001, + "loss": 0.0068, + "step": 176680 + }, + { + "epoch": 1162.4342105263158, + "grad_norm": 1.0104210376739502, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 176690 + }, + { + "epoch": 1162.5, + "grad_norm": 1.2539334297180176, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 176700 + }, + { + "epoch": 1162.5657894736842, + "grad_norm": 1.0029774904251099, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 176710 + }, + { + "epoch": 1162.6315789473683, + "grad_norm": 0.8852845430374146, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 176720 + }, + { + "epoch": 1162.6973684210527, + "grad_norm": 1.0239177942276, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 176730 + }, + { + "epoch": 1162.7631578947369, + "grad_norm": 0.8226960897445679, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 176740 + }, + { + "epoch": 1162.828947368421, + "grad_norm": 0.9960774779319763, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 176750 + }, + { + "epoch": 1162.8947368421052, + "grad_norm": 1.1968601942062378, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 176760 + }, + { + "epoch": 1162.9605263157894, + "grad_norm": 1.1123733520507812, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 176770 + }, + { + "epoch": 1163.0263157894738, + "grad_norm": 1.0978655815124512, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 176780 + }, + { + "epoch": 1163.092105263158, + "grad_norm": 0.8756230473518372, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 176790 + }, + { + "epoch": 1163.157894736842, + "grad_norm": 1.0591200590133667, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 176800 + }, + { + "epoch": 1163.2236842105262, + "grad_norm": 0.7175271511077881, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 176810 + }, + { + "epoch": 1163.2894736842106, + "grad_norm": 1.0549403429031372, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 176820 + }, + { + "epoch": 1163.3552631578948, + "grad_norm": 1.0469660758972168, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 176830 + }, + { + "epoch": 1163.421052631579, + "grad_norm": 0.8197566270828247, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 176840 + }, + { + "epoch": 1163.4868421052631, + "grad_norm": 1.2558757066726685, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 176850 + }, + { + "epoch": 1163.5526315789473, + "grad_norm": 1.1719180345535278, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 176860 + }, + { + "epoch": 1163.6184210526317, + "grad_norm": 0.817057728767395, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 176870 + }, + { + "epoch": 1163.6842105263158, + "grad_norm": 1.178382158279419, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 176880 + }, + { + "epoch": 1163.75, + "grad_norm": 1.0112900733947754, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 176890 + }, + { + "epoch": 1163.8157894736842, + "grad_norm": 0.7621281743049622, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 176900 + }, + { + "epoch": 1163.8815789473683, + "grad_norm": 0.6787197589874268, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 176910 + }, + { + "epoch": 1163.9473684210527, + "grad_norm": 1.0531107187271118, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 176920 + }, + { + "epoch": 1164.0131578947369, + "grad_norm": 0.5585743188858032, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 176930 + }, + { + "epoch": 1164.078947368421, + "grad_norm": 0.8682098388671875, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 176940 + }, + { + "epoch": 1164.1447368421052, + "grad_norm": 0.7695916295051575, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 176950 + }, + { + "epoch": 1164.2105263157894, + "grad_norm": 1.2778445482254028, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 176960 + }, + { + "epoch": 1164.2763157894738, + "grad_norm": 1.0614190101623535, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 176970 + }, + { + "epoch": 1164.342105263158, + "grad_norm": 0.7645097374916077, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 176980 + }, + { + "epoch": 1164.407894736842, + "grad_norm": 1.0178488492965698, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 176990 + }, + { + "epoch": 1164.4736842105262, + "grad_norm": 1.3799580335617065, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 177000 + }, + { + "epoch": 1164.5394736842106, + "grad_norm": 1.1391440629959106, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 177010 + }, + { + "epoch": 1164.6052631578948, + "grad_norm": 1.0461714267730713, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 177020 + }, + { + "epoch": 1164.671052631579, + "grad_norm": 1.119397521018982, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 177030 + }, + { + "epoch": 1164.7368421052631, + "grad_norm": 1.3247554302215576, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 177040 + }, + { + "epoch": 1164.8026315789473, + "grad_norm": 0.9581509828567505, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 177050 + }, + { + "epoch": 1164.8684210526317, + "grad_norm": 1.0519541501998901, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 177060 + }, + { + "epoch": 1164.9342105263158, + "grad_norm": 0.7811246514320374, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 177070 + }, + { + "epoch": 1165.0, + "grad_norm": 0.8886945843696594, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 177080 + }, + { + "epoch": 1165.0657894736842, + "grad_norm": 1.0160313844680786, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 177090 + }, + { + "epoch": 1165.1315789473683, + "grad_norm": 1.3136074542999268, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 177100 + }, + { + "epoch": 1165.1973684210527, + "grad_norm": 1.1090233325958252, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 177110 + }, + { + "epoch": 1165.2631578947369, + "grad_norm": 1.0425786972045898, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 177120 + }, + { + "epoch": 1165.328947368421, + "grad_norm": 1.0848468542099, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 177130 + }, + { + "epoch": 1165.3947368421052, + "grad_norm": 0.8333765268325806, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 177140 + }, + { + "epoch": 1165.4605263157894, + "grad_norm": 0.8007072806358337, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 177150 + }, + { + "epoch": 1165.5263157894738, + "grad_norm": 1.0657711029052734, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 177160 + }, + { + "epoch": 1165.592105263158, + "grad_norm": 0.8942683339118958, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 177170 + }, + { + "epoch": 1165.657894736842, + "grad_norm": 0.7064841985702515, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 177180 + }, + { + "epoch": 1165.7236842105262, + "grad_norm": 0.673353910446167, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 177190 + }, + { + "epoch": 1165.7894736842106, + "grad_norm": 1.201241135597229, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 177200 + }, + { + "epoch": 1165.8552631578948, + "grad_norm": 1.1128406524658203, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 177210 + }, + { + "epoch": 1165.921052631579, + "grad_norm": 1.0577025413513184, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 177220 + }, + { + "epoch": 1165.9868421052631, + "grad_norm": 1.063428282737732, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 177230 + }, + { + "epoch": 1166.0526315789473, + "grad_norm": 0.6475265622138977, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 177240 + }, + { + "epoch": 1166.1184210526317, + "grad_norm": 1.0673624277114868, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 177250 + }, + { + "epoch": 1166.1842105263158, + "grad_norm": 1.2042251825332642, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 177260 + }, + { + "epoch": 1166.25, + "grad_norm": 1.023403286933899, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 177270 + }, + { + "epoch": 1166.3157894736842, + "grad_norm": 1.0554587841033936, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 177280 + }, + { + "epoch": 1166.3815789473683, + "grad_norm": 1.2385607957839966, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 177290 + }, + { + "epoch": 1166.4473684210527, + "grad_norm": 1.058170199394226, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 177300 + }, + { + "epoch": 1166.5131578947369, + "grad_norm": 0.7787966728210449, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 177310 + }, + { + "epoch": 1166.578947368421, + "grad_norm": 0.8278951048851013, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 177320 + }, + { + "epoch": 1166.6447368421052, + "grad_norm": 0.945210337638855, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 177330 + }, + { + "epoch": 1166.7105263157894, + "grad_norm": 0.8070635795593262, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 177340 + }, + { + "epoch": 1166.7763157894738, + "grad_norm": 1.1100777387619019, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 177350 + }, + { + "epoch": 1166.842105263158, + "grad_norm": 1.3282232284545898, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 177360 + }, + { + "epoch": 1166.907894736842, + "grad_norm": 1.6687939167022705, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 177370 + }, + { + "epoch": 1166.9736842105262, + "grad_norm": 1.4565303325653076, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 177380 + }, + { + "epoch": 1167.0394736842106, + "grad_norm": 0.7736444473266602, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 177390 + }, + { + "epoch": 1167.1052631578948, + "grad_norm": 1.261600375175476, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 177400 + }, + { + "epoch": 1167.171052631579, + "grad_norm": 1.2047302722930908, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 177410 + }, + { + "epoch": 1167.2368421052631, + "grad_norm": 1.3666282892227173, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 177420 + }, + { + "epoch": 1167.3026315789473, + "grad_norm": 1.41166353225708, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 177430 + }, + { + "epoch": 1167.3684210526317, + "grad_norm": 0.9169084429740906, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 177440 + }, + { + "epoch": 1167.4342105263158, + "grad_norm": 1.0971057415008545, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 177450 + }, + { + "epoch": 1167.5, + "grad_norm": 1.0250972509384155, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 177460 + }, + { + "epoch": 1167.5657894736842, + "grad_norm": 1.008253812789917, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 177470 + }, + { + "epoch": 1167.6315789473683, + "grad_norm": 1.0217920541763306, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 177480 + }, + { + "epoch": 1167.6973684210527, + "grad_norm": 1.0006431341171265, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 177490 + }, + { + "epoch": 1167.7631578947369, + "grad_norm": 0.8362782001495361, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 177500 + }, + { + "epoch": 1167.828947368421, + "grad_norm": 1.3864368200302124, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 177510 + }, + { + "epoch": 1167.8947368421052, + "grad_norm": 1.3755213022232056, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 177520 + }, + { + "epoch": 1167.9605263157894, + "grad_norm": 1.060314416885376, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 177530 + }, + { + "epoch": 1168.0263157894738, + "grad_norm": 1.2582261562347412, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 177540 + }, + { + "epoch": 1168.092105263158, + "grad_norm": 1.0791796445846558, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 177550 + }, + { + "epoch": 1168.157894736842, + "grad_norm": 1.2090390920639038, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 177560 + }, + { + "epoch": 1168.2236842105262, + "grad_norm": 0.7330916523933411, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 177570 + }, + { + "epoch": 1168.2894736842106, + "grad_norm": 0.9363530874252319, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 177580 + }, + { + "epoch": 1168.3552631578948, + "grad_norm": 0.9685211777687073, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 177590 + }, + { + "epoch": 1168.421052631579, + "grad_norm": 1.0913938283920288, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 177600 + }, + { + "epoch": 1168.4868421052631, + "grad_norm": 1.035131812095642, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 177610 + }, + { + "epoch": 1168.5526315789473, + "grad_norm": 1.0176951885223389, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 177620 + }, + { + "epoch": 1168.6184210526317, + "grad_norm": 0.8525509834289551, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 177630 + }, + { + "epoch": 1168.6842105263158, + "grad_norm": 1.0058506727218628, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 177640 + }, + { + "epoch": 1168.75, + "grad_norm": 1.1106820106506348, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 177650 + }, + { + "epoch": 1168.8157894736842, + "grad_norm": 0.9219995737075806, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 177660 + }, + { + "epoch": 1168.8815789473683, + "grad_norm": 0.9162684082984924, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 177670 + }, + { + "epoch": 1168.9473684210527, + "grad_norm": 1.331979513168335, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 177680 + }, + { + "epoch": 1169.0131578947369, + "grad_norm": 0.8545548915863037, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 177690 + }, + { + "epoch": 1169.078947368421, + "grad_norm": 0.989903450012207, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 177700 + }, + { + "epoch": 1169.1447368421052, + "grad_norm": 0.9872443079948425, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 177710 + }, + { + "epoch": 1169.2105263157894, + "grad_norm": 1.1131963729858398, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 177720 + }, + { + "epoch": 1169.2763157894738, + "grad_norm": 0.7441876530647278, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 177730 + }, + { + "epoch": 1169.342105263158, + "grad_norm": 1.1648389101028442, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 177740 + }, + { + "epoch": 1169.407894736842, + "grad_norm": 1.0851612091064453, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 177750 + }, + { + "epoch": 1169.4736842105262, + "grad_norm": 0.9316413402557373, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 177760 + }, + { + "epoch": 1169.5394736842106, + "grad_norm": 0.8847388029098511, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 177770 + }, + { + "epoch": 1169.6052631578948, + "grad_norm": 1.1352200508117676, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 177780 + }, + { + "epoch": 1169.671052631579, + "grad_norm": 0.9478182792663574, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 177790 + }, + { + "epoch": 1169.7368421052631, + "grad_norm": 0.7659247517585754, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 177800 + }, + { + "epoch": 1169.8026315789473, + "grad_norm": 0.7848868370056152, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 177810 + }, + { + "epoch": 1169.8684210526317, + "grad_norm": 1.0635731220245361, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 177820 + }, + { + "epoch": 1169.9342105263158, + "grad_norm": 1.3124653100967407, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 177830 + }, + { + "epoch": 1170.0, + "grad_norm": 0.9985669255256653, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 177840 + }, + { + "epoch": 1170.0657894736842, + "grad_norm": 1.075380563735962, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 177850 + }, + { + "epoch": 1170.1315789473683, + "grad_norm": 0.7401938438415527, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 177860 + }, + { + "epoch": 1170.1973684210527, + "grad_norm": 1.1559667587280273, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 177870 + }, + { + "epoch": 1170.2631578947369, + "grad_norm": 1.152563452720642, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 177880 + }, + { + "epoch": 1170.328947368421, + "grad_norm": 0.8326547145843506, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 177890 + }, + { + "epoch": 1170.3947368421052, + "grad_norm": 1.1279677152633667, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 177900 + }, + { + "epoch": 1170.4605263157894, + "grad_norm": 1.192924976348877, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 177910 + }, + { + "epoch": 1170.5263157894738, + "grad_norm": 1.3840651512145996, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 177920 + }, + { + "epoch": 1170.592105263158, + "grad_norm": 1.016034483909607, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 177930 + }, + { + "epoch": 1170.657894736842, + "grad_norm": 1.2145895957946777, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 177940 + }, + { + "epoch": 1170.7236842105262, + "grad_norm": 0.9925963878631592, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 177950 + }, + { + "epoch": 1170.7894736842106, + "grad_norm": 0.9628350734710693, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 177960 + }, + { + "epoch": 1170.8552631578948, + "grad_norm": 0.8890961408615112, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 177970 + }, + { + "epoch": 1170.921052631579, + "grad_norm": 0.8459914922714233, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 177980 + }, + { + "epoch": 1170.9868421052631, + "grad_norm": 0.828228235244751, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 177990 + }, + { + "epoch": 1171.0526315789473, + "grad_norm": 0.8761423230171204, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 178000 + }, + { + "epoch": 1171.1184210526317, + "grad_norm": 0.9206410050392151, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 178010 + }, + { + "epoch": 1171.1842105263158, + "grad_norm": 0.7937294840812683, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 178020 + }, + { + "epoch": 1171.25, + "grad_norm": 0.9361086487770081, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 178030 + }, + { + "epoch": 1171.3157894736842, + "grad_norm": 1.000895380973816, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 178040 + }, + { + "epoch": 1171.3815789473683, + "grad_norm": 1.2588021755218506, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 178050 + }, + { + "epoch": 1171.4473684210527, + "grad_norm": 1.0151118040084839, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 178060 + }, + { + "epoch": 1171.5131578947369, + "grad_norm": 1.0695226192474365, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 178070 + }, + { + "epoch": 1171.578947368421, + "grad_norm": 0.9043155908584595, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 178080 + }, + { + "epoch": 1171.6447368421052, + "grad_norm": 0.9957794547080994, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 178090 + }, + { + "epoch": 1171.7105263157894, + "grad_norm": 0.9040594100952148, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 178100 + }, + { + "epoch": 1171.7763157894738, + "grad_norm": 0.9921096563339233, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 178110 + }, + { + "epoch": 1171.842105263158, + "grad_norm": 1.4011787176132202, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 178120 + }, + { + "epoch": 1171.907894736842, + "grad_norm": 1.1821715831756592, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 178130 + }, + { + "epoch": 1171.9736842105262, + "grad_norm": 1.4412405490875244, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 178140 + }, + { + "epoch": 1172.0394736842106, + "grad_norm": 1.3233447074890137, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 178150 + }, + { + "epoch": 1172.1052631578948, + "grad_norm": 1.3075172901153564, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 178160 + }, + { + "epoch": 1172.171052631579, + "grad_norm": 1.162680983543396, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 178170 + }, + { + "epoch": 1172.2368421052631, + "grad_norm": 0.6982220411300659, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 178180 + }, + { + "epoch": 1172.3026315789473, + "grad_norm": 1.1954939365386963, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 178190 + }, + { + "epoch": 1172.3684210526317, + "grad_norm": 1.6869287490844727, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 178200 + }, + { + "epoch": 1172.4342105263158, + "grad_norm": 1.649885654449463, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 178210 + }, + { + "epoch": 1172.5, + "grad_norm": 1.5803041458129883, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 178220 + }, + { + "epoch": 1172.5657894736842, + "grad_norm": 1.7074077129364014, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 178230 + }, + { + "epoch": 1172.6315789473683, + "grad_norm": 1.3916934728622437, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 178240 + }, + { + "epoch": 1172.6973684210527, + "grad_norm": 1.668851613998413, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 178250 + }, + { + "epoch": 1172.7631578947369, + "grad_norm": 1.1936376094818115, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 178260 + }, + { + "epoch": 1172.828947368421, + "grad_norm": 1.4797992706298828, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 178270 + }, + { + "epoch": 1172.8947368421052, + "grad_norm": 1.1677192449569702, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 178280 + }, + { + "epoch": 1172.9605263157894, + "grad_norm": 1.2152477502822876, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 178290 + }, + { + "epoch": 1173.0263157894738, + "grad_norm": 1.248241662979126, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 178300 + }, + { + "epoch": 1173.092105263158, + "grad_norm": 1.2586883306503296, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 178310 + }, + { + "epoch": 1173.157894736842, + "grad_norm": 1.2856673002243042, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 178320 + }, + { + "epoch": 1173.2236842105262, + "grad_norm": 1.19162917137146, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 178330 + }, + { + "epoch": 1173.2894736842106, + "grad_norm": 0.8176268935203552, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 178340 + }, + { + "epoch": 1173.3552631578948, + "grad_norm": 0.7139531970024109, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 178350 + }, + { + "epoch": 1173.421052631579, + "grad_norm": 0.7016176581382751, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 178360 + }, + { + "epoch": 1173.4868421052631, + "grad_norm": 1.0311745405197144, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 178370 + }, + { + "epoch": 1173.5526315789473, + "grad_norm": 0.8565056920051575, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 178380 + }, + { + "epoch": 1173.6184210526317, + "grad_norm": 1.1574746370315552, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 178390 + }, + { + "epoch": 1173.6842105263158, + "grad_norm": 1.0975223779678345, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 178400 + }, + { + "epoch": 1173.75, + "grad_norm": 0.849980890750885, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 178410 + }, + { + "epoch": 1173.8157894736842, + "grad_norm": 0.8010478019714355, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 178420 + }, + { + "epoch": 1173.8815789473683, + "grad_norm": 0.7263656258583069, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 178430 + }, + { + "epoch": 1173.9473684210527, + "grad_norm": 0.9674796462059021, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 178440 + }, + { + "epoch": 1174.0131578947369, + "grad_norm": 0.8462510108947754, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 178450 + }, + { + "epoch": 1174.078947368421, + "grad_norm": 0.921765923500061, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 178460 + }, + { + "epoch": 1174.1447368421052, + "grad_norm": 1.0591344833374023, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 178470 + }, + { + "epoch": 1174.2105263157894, + "grad_norm": 1.122679352760315, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 178480 + }, + { + "epoch": 1174.2763157894738, + "grad_norm": 0.8474733233451843, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 178490 + }, + { + "epoch": 1174.342105263158, + "grad_norm": 0.9375333189964294, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 178500 + }, + { + "epoch": 1174.407894736842, + "grad_norm": 0.9587281942367554, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 178510 + }, + { + "epoch": 1174.4736842105262, + "grad_norm": 0.998461902141571, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 178520 + }, + { + "epoch": 1174.5394736842106, + "grad_norm": 1.0688536167144775, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 178530 + }, + { + "epoch": 1174.6052631578948, + "grad_norm": 0.768993079662323, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 178540 + }, + { + "epoch": 1174.671052631579, + "grad_norm": 0.9926314353942871, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 178550 + }, + { + "epoch": 1174.7368421052631, + "grad_norm": 1.068148136138916, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 178560 + }, + { + "epoch": 1174.8026315789473, + "grad_norm": 0.9381814002990723, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 178570 + }, + { + "epoch": 1174.8684210526317, + "grad_norm": 1.102551817893982, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 178580 + }, + { + "epoch": 1174.9342105263158, + "grad_norm": 0.8753682374954224, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 178590 + }, + { + "epoch": 1175.0, + "grad_norm": 1.0447521209716797, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 178600 + }, + { + "epoch": 1175.0657894736842, + "grad_norm": 1.3192112445831299, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 178610 + }, + { + "epoch": 1175.1315789473683, + "grad_norm": 1.2548503875732422, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 178620 + }, + { + "epoch": 1175.1973684210527, + "grad_norm": 1.1852110624313354, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 178630 + }, + { + "epoch": 1175.2631578947369, + "grad_norm": 1.1667160987854004, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 178640 + }, + { + "epoch": 1175.328947368421, + "grad_norm": 0.9693193435668945, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 178650 + }, + { + "epoch": 1175.3947368421052, + "grad_norm": 1.279008150100708, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 178660 + }, + { + "epoch": 1175.4605263157894, + "grad_norm": 0.8803616762161255, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 178670 + }, + { + "epoch": 1175.5263157894738, + "grad_norm": 0.7935562133789062, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 178680 + }, + { + "epoch": 1175.592105263158, + "grad_norm": 0.8752424120903015, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 178690 + }, + { + "epoch": 1175.657894736842, + "grad_norm": 1.2736647129058838, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 178700 + }, + { + "epoch": 1175.7236842105262, + "grad_norm": 0.9050102829933167, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 178710 + }, + { + "epoch": 1175.7894736842106, + "grad_norm": 0.9366145730018616, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 178720 + }, + { + "epoch": 1175.8552631578948, + "grad_norm": 1.0141912698745728, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 178730 + }, + { + "epoch": 1175.921052631579, + "grad_norm": 0.8957210183143616, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 178740 + }, + { + "epoch": 1175.9868421052631, + "grad_norm": 0.743053674697876, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 178750 + }, + { + "epoch": 1176.0526315789473, + "grad_norm": 0.8863754868507385, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 178760 + }, + { + "epoch": 1176.1184210526317, + "grad_norm": 0.7674370408058167, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 178770 + }, + { + "epoch": 1176.1842105263158, + "grad_norm": 0.6406316757202148, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 178780 + }, + { + "epoch": 1176.25, + "grad_norm": 0.5896703600883484, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 178790 + }, + { + "epoch": 1176.3157894736842, + "grad_norm": 0.7623488903045654, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 178800 + }, + { + "epoch": 1176.3815789473683, + "grad_norm": 0.7528663277626038, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 178810 + }, + { + "epoch": 1176.4473684210527, + "grad_norm": 0.9548066854476929, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 178820 + }, + { + "epoch": 1176.5131578947369, + "grad_norm": 0.880696177482605, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 178830 + }, + { + "epoch": 1176.578947368421, + "grad_norm": 1.145169734954834, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 178840 + }, + { + "epoch": 1176.6447368421052, + "grad_norm": 1.2576274871826172, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 178850 + }, + { + "epoch": 1176.7105263157894, + "grad_norm": 1.057856798171997, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 178860 + }, + { + "epoch": 1176.7763157894738, + "grad_norm": 0.9875898957252502, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 178870 + }, + { + "epoch": 1176.842105263158, + "grad_norm": 1.0104905366897583, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 178880 + }, + { + "epoch": 1176.907894736842, + "grad_norm": 0.7599539756774902, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 178890 + }, + { + "epoch": 1176.9736842105262, + "grad_norm": 0.6184892058372498, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 178900 + }, + { + "epoch": 1177.0394736842106, + "grad_norm": 1.0564872026443481, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 178910 + }, + { + "epoch": 1177.1052631578948, + "grad_norm": 0.9271602034568787, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 178920 + }, + { + "epoch": 1177.171052631579, + "grad_norm": 0.9372125267982483, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 178930 + }, + { + "epoch": 1177.2368421052631, + "grad_norm": 0.8659113645553589, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 178940 + }, + { + "epoch": 1177.3026315789473, + "grad_norm": 0.9990260601043701, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 178950 + }, + { + "epoch": 1177.3684210526317, + "grad_norm": 0.9762459993362427, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 178960 + }, + { + "epoch": 1177.4342105263158, + "grad_norm": 0.8299205899238586, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 178970 + }, + { + "epoch": 1177.5, + "grad_norm": 1.0260875225067139, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 178980 + }, + { + "epoch": 1177.5657894736842, + "grad_norm": 0.8214814066886902, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 178990 + }, + { + "epoch": 1177.6315789473683, + "grad_norm": 0.8492797017097473, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 179000 + }, + { + "epoch": 1177.6973684210527, + "grad_norm": 1.093478798866272, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 179010 + }, + { + "epoch": 1177.7631578947369, + "grad_norm": 1.2081449031829834, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 179020 + }, + { + "epoch": 1177.828947368421, + "grad_norm": 1.1940277814865112, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 179030 + }, + { + "epoch": 1177.8947368421052, + "grad_norm": 0.8136548399925232, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 179040 + }, + { + "epoch": 1177.9605263157894, + "grad_norm": 0.9597692489624023, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 179050 + }, + { + "epoch": 1178.0263157894738, + "grad_norm": 1.0133211612701416, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 179060 + }, + { + "epoch": 1178.092105263158, + "grad_norm": 1.08301842212677, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 179070 + }, + { + "epoch": 1178.157894736842, + "grad_norm": 0.9614161252975464, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 179080 + }, + { + "epoch": 1178.2236842105262, + "grad_norm": 1.0885823965072632, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 179090 + }, + { + "epoch": 1178.2894736842106, + "grad_norm": 0.9387257695198059, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 179100 + }, + { + "epoch": 1178.3552631578948, + "grad_norm": 0.7102392911911011, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 179110 + }, + { + "epoch": 1178.421052631579, + "grad_norm": 0.9195134043693542, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 179120 + }, + { + "epoch": 1178.4868421052631, + "grad_norm": 1.0749733448028564, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 179130 + }, + { + "epoch": 1178.5526315789473, + "grad_norm": 1.2915416955947876, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 179140 + }, + { + "epoch": 1178.6184210526317, + "grad_norm": 0.8285413980484009, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 179150 + }, + { + "epoch": 1178.6842105263158, + "grad_norm": 0.7751388549804688, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 179160 + }, + { + "epoch": 1178.75, + "grad_norm": 0.8202561736106873, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 179170 + }, + { + "epoch": 1178.8157894736842, + "grad_norm": 0.9537473320960999, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 179180 + }, + { + "epoch": 1178.8815789473683, + "grad_norm": 0.9641271829605103, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 179190 + }, + { + "epoch": 1178.9473684210527, + "grad_norm": 0.962111234664917, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 179200 + }, + { + "epoch": 1179.0131578947369, + "grad_norm": 0.8706178665161133, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 179210 + }, + { + "epoch": 1179.078947368421, + "grad_norm": 1.0734243392944336, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 179220 + }, + { + "epoch": 1179.1447368421052, + "grad_norm": 1.075181484222412, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 179230 + }, + { + "epoch": 1179.2105263157894, + "grad_norm": 1.1446199417114258, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 179240 + }, + { + "epoch": 1179.2763157894738, + "grad_norm": 0.9987981915473938, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 179250 + }, + { + "epoch": 1179.342105263158, + "grad_norm": 0.8230014443397522, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 179260 + }, + { + "epoch": 1179.407894736842, + "grad_norm": 0.7945327162742615, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 179270 + }, + { + "epoch": 1179.4736842105262, + "grad_norm": 0.7237135171890259, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 179280 + }, + { + "epoch": 1179.5394736842106, + "grad_norm": 0.8276739716529846, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 179290 + }, + { + "epoch": 1179.6052631578948, + "grad_norm": 1.0555814504623413, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 179300 + }, + { + "epoch": 1179.671052631579, + "grad_norm": 1.0211066007614136, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 179310 + }, + { + "epoch": 1179.7368421052631, + "grad_norm": 1.0281317234039307, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 179320 + }, + { + "epoch": 1179.8026315789473, + "grad_norm": 1.0497641563415527, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 179330 + }, + { + "epoch": 1179.8684210526317, + "grad_norm": 1.2660293579101562, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 179340 + }, + { + "epoch": 1179.9342105263158, + "grad_norm": 0.6678763628005981, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 179350 + }, + { + "epoch": 1180.0, + "grad_norm": 1.2870025634765625, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 179360 + }, + { + "epoch": 1180.0657894736842, + "grad_norm": 1.0261781215667725, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 179370 + }, + { + "epoch": 1180.1315789473683, + "grad_norm": 0.9007861614227295, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 179380 + }, + { + "epoch": 1180.1973684210527, + "grad_norm": 0.9087187647819519, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 179390 + }, + { + "epoch": 1180.2631578947369, + "grad_norm": 1.1201528310775757, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 179400 + }, + { + "epoch": 1180.328947368421, + "grad_norm": 1.268721342086792, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 179410 + }, + { + "epoch": 1180.3947368421052, + "grad_norm": 1.1803576946258545, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 179420 + }, + { + "epoch": 1180.4605263157894, + "grad_norm": 0.8738627433776855, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 179430 + }, + { + "epoch": 1180.5263157894738, + "grad_norm": 0.9588783383369446, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 179440 + }, + { + "epoch": 1180.592105263158, + "grad_norm": 0.8148517608642578, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 179450 + }, + { + "epoch": 1180.657894736842, + "grad_norm": 0.8594145774841309, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 179460 + }, + { + "epoch": 1180.7236842105262, + "grad_norm": 0.658765435218811, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 179470 + }, + { + "epoch": 1180.7894736842106, + "grad_norm": 0.9419595003128052, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 179480 + }, + { + "epoch": 1180.8552631578948, + "grad_norm": 0.9967935681343079, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 179490 + }, + { + "epoch": 1180.921052631579, + "grad_norm": 1.151293158531189, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 179500 + }, + { + "epoch": 1180.9868421052631, + "grad_norm": 1.1010416746139526, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 179510 + }, + { + "epoch": 1181.0526315789473, + "grad_norm": 1.184219241142273, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 179520 + }, + { + "epoch": 1181.1184210526317, + "grad_norm": 1.5013480186462402, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 179530 + }, + { + "epoch": 1181.1842105263158, + "grad_norm": 1.2943904399871826, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 179540 + }, + { + "epoch": 1181.25, + "grad_norm": 1.3031779527664185, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 179550 + }, + { + "epoch": 1181.3157894736842, + "grad_norm": 0.875571072101593, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 179560 + }, + { + "epoch": 1181.3815789473683, + "grad_norm": 1.010248064994812, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 179570 + }, + { + "epoch": 1181.4473684210527, + "grad_norm": 1.0535480976104736, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 179580 + }, + { + "epoch": 1181.5131578947369, + "grad_norm": 0.9515687823295593, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 179590 + }, + { + "epoch": 1181.578947368421, + "grad_norm": 0.5328004360198975, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 179600 + }, + { + "epoch": 1181.6447368421052, + "grad_norm": 1.2475734949111938, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 179610 + }, + { + "epoch": 1181.7105263157894, + "grad_norm": 0.8466393351554871, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 179620 + }, + { + "epoch": 1181.7763157894738, + "grad_norm": 0.8406497240066528, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 179630 + }, + { + "epoch": 1181.842105263158, + "grad_norm": 1.0086272954940796, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 179640 + }, + { + "epoch": 1181.907894736842, + "grad_norm": 0.9933298826217651, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 179650 + }, + { + "epoch": 1181.9736842105262, + "grad_norm": 1.049748420715332, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 179660 + }, + { + "epoch": 1182.0394736842106, + "grad_norm": 1.0045021772384644, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 179670 + }, + { + "epoch": 1182.1052631578948, + "grad_norm": 0.8957365155220032, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 179680 + }, + { + "epoch": 1182.171052631579, + "grad_norm": 0.8856828212738037, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 179690 + }, + { + "epoch": 1182.2368421052631, + "grad_norm": 1.2047641277313232, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 179700 + }, + { + "epoch": 1182.3026315789473, + "grad_norm": 1.1860121488571167, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 179710 + }, + { + "epoch": 1182.3684210526317, + "grad_norm": 1.2543531656265259, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 179720 + }, + { + "epoch": 1182.4342105263158, + "grad_norm": 1.3099639415740967, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 179730 + }, + { + "epoch": 1182.5, + "grad_norm": 1.1823962926864624, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 179740 + }, + { + "epoch": 1182.5657894736842, + "grad_norm": 0.9950479865074158, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 179750 + }, + { + "epoch": 1182.6315789473683, + "grad_norm": 1.14492928981781, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 179760 + }, + { + "epoch": 1182.6973684210527, + "grad_norm": 0.985609233379364, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 179770 + }, + { + "epoch": 1182.7631578947369, + "grad_norm": 1.0433059930801392, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 179780 + }, + { + "epoch": 1182.828947368421, + "grad_norm": 0.9120721220970154, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 179790 + }, + { + "epoch": 1182.8947368421052, + "grad_norm": 0.8850807547569275, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 179800 + }, + { + "epoch": 1182.9605263157894, + "grad_norm": 0.6139315962791443, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 179810 + }, + { + "epoch": 1183.0263157894738, + "grad_norm": 1.1187626123428345, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 179820 + }, + { + "epoch": 1183.092105263158, + "grad_norm": 1.0548551082611084, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 179830 + }, + { + "epoch": 1183.157894736842, + "grad_norm": 0.9442790746688843, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 179840 + }, + { + "epoch": 1183.2236842105262, + "grad_norm": 0.7005117535591125, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 179850 + }, + { + "epoch": 1183.2894736842106, + "grad_norm": 0.5735732913017273, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 179860 + }, + { + "epoch": 1183.3552631578948, + "grad_norm": 1.027298092842102, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 179870 + }, + { + "epoch": 1183.421052631579, + "grad_norm": 1.3360284566879272, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 179880 + }, + { + "epoch": 1183.4868421052631, + "grad_norm": 0.8555198907852173, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 179890 + }, + { + "epoch": 1183.5526315789473, + "grad_norm": 1.2100673913955688, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 179900 + }, + { + "epoch": 1183.6184210526317, + "grad_norm": 0.8696701526641846, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 179910 + }, + { + "epoch": 1183.6842105263158, + "grad_norm": 1.044405460357666, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 179920 + }, + { + "epoch": 1183.75, + "grad_norm": 0.6577524542808533, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 179930 + }, + { + "epoch": 1183.8157894736842, + "grad_norm": 0.8520231246948242, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 179940 + }, + { + "epoch": 1183.8815789473683, + "grad_norm": 0.5790631175041199, + "learning_rate": 0.0001, + "loss": 0.0067, + "step": 179950 + }, + { + "epoch": 1183.9473684210527, + "grad_norm": 0.6821280717849731, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 179960 + }, + { + "epoch": 1184.0131578947369, + "grad_norm": 1.1946996450424194, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 179970 + }, + { + "epoch": 1184.078947368421, + "grad_norm": 0.8206828832626343, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 179980 + }, + { + "epoch": 1184.1447368421052, + "grad_norm": 1.200190782546997, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 179990 + }, + { + "epoch": 1184.2105263157894, + "grad_norm": 1.0176090002059937, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 180000 + }, + { + "epoch": 1184.2763157894738, + "grad_norm": 1.791040301322937, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 180010 + }, + { + "epoch": 1184.342105263158, + "grad_norm": 1.1457858085632324, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 180020 + }, + { + "epoch": 1184.407894736842, + "grad_norm": 1.3513076305389404, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 180030 + }, + { + "epoch": 1184.4736842105262, + "grad_norm": 1.1888035535812378, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 180040 + }, + { + "epoch": 1184.5394736842106, + "grad_norm": 1.3418779373168945, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 180050 + }, + { + "epoch": 1184.6052631578948, + "grad_norm": 1.2430517673492432, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 180060 + }, + { + "epoch": 1184.671052631579, + "grad_norm": 1.299909234046936, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 180070 + }, + { + "epoch": 1184.7368421052631, + "grad_norm": 1.187268614768982, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 180080 + }, + { + "epoch": 1184.8026315789473, + "grad_norm": 1.0313128232955933, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 180090 + }, + { + "epoch": 1184.8684210526317, + "grad_norm": 0.9773435592651367, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 180100 + }, + { + "epoch": 1184.9342105263158, + "grad_norm": 0.9462606310844421, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 180110 + }, + { + "epoch": 1185.0, + "grad_norm": 1.008164882659912, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 180120 + }, + { + "epoch": 1185.0657894736842, + "grad_norm": 1.0677521228790283, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 180130 + }, + { + "epoch": 1185.1315789473683, + "grad_norm": 1.056257963180542, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 180140 + }, + { + "epoch": 1185.1973684210527, + "grad_norm": 1.3330624103546143, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 180150 + }, + { + "epoch": 1185.2631578947369, + "grad_norm": 1.2008347511291504, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 180160 + }, + { + "epoch": 1185.328947368421, + "grad_norm": 1.0260757207870483, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 180170 + }, + { + "epoch": 1185.3947368421052, + "grad_norm": 0.9635398983955383, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 180180 + }, + { + "epoch": 1185.4605263157894, + "grad_norm": 0.9844223260879517, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 180190 + }, + { + "epoch": 1185.5263157894738, + "grad_norm": 0.9323477745056152, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 180200 + }, + { + "epoch": 1185.592105263158, + "grad_norm": 0.7835142612457275, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 180210 + }, + { + "epoch": 1185.657894736842, + "grad_norm": 0.8820164799690247, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 180220 + }, + { + "epoch": 1185.7236842105262, + "grad_norm": 1.1750354766845703, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 180230 + }, + { + "epoch": 1185.7894736842106, + "grad_norm": 1.058611512184143, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 180240 + }, + { + "epoch": 1185.8552631578948, + "grad_norm": 1.0352505445480347, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 180250 + }, + { + "epoch": 1185.921052631579, + "grad_norm": 0.8906688690185547, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 180260 + }, + { + "epoch": 1185.9868421052631, + "grad_norm": 1.0255763530731201, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 180270 + }, + { + "epoch": 1186.0526315789473, + "grad_norm": 0.9030811190605164, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 180280 + }, + { + "epoch": 1186.1184210526317, + "grad_norm": 0.7949104905128479, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 180290 + }, + { + "epoch": 1186.1842105263158, + "grad_norm": 1.2452713251113892, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 180300 + }, + { + "epoch": 1186.25, + "grad_norm": 0.8592615127563477, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 180310 + }, + { + "epoch": 1186.3157894736842, + "grad_norm": 1.0579206943511963, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 180320 + }, + { + "epoch": 1186.3815789473683, + "grad_norm": 1.0151481628417969, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 180330 + }, + { + "epoch": 1186.4473684210527, + "grad_norm": 1.2412348985671997, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 180340 + }, + { + "epoch": 1186.5131578947369, + "grad_norm": 0.9836140275001526, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 180350 + }, + { + "epoch": 1186.578947368421, + "grad_norm": 0.8268551826477051, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 180360 + }, + { + "epoch": 1186.6447368421052, + "grad_norm": 1.1718807220458984, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 180370 + }, + { + "epoch": 1186.7105263157894, + "grad_norm": 0.9186016321182251, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 180380 + }, + { + "epoch": 1186.7763157894738, + "grad_norm": 1.0187981128692627, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 180390 + }, + { + "epoch": 1186.842105263158, + "grad_norm": 0.7766441106796265, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 180400 + }, + { + "epoch": 1186.907894736842, + "grad_norm": 1.1324797868728638, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 180410 + }, + { + "epoch": 1186.9736842105262, + "grad_norm": 1.3124502897262573, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 180420 + }, + { + "epoch": 1187.0394736842106, + "grad_norm": 1.0421258211135864, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 180430 + }, + { + "epoch": 1187.1052631578948, + "grad_norm": 0.9878388047218323, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 180440 + }, + { + "epoch": 1187.171052631579, + "grad_norm": 1.404099941253662, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 180450 + }, + { + "epoch": 1187.2368421052631, + "grad_norm": 0.8869650959968567, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 180460 + }, + { + "epoch": 1187.3026315789473, + "grad_norm": 0.7417500615119934, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 180470 + }, + { + "epoch": 1187.3684210526317, + "grad_norm": 0.8958377838134766, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 180480 + }, + { + "epoch": 1187.4342105263158, + "grad_norm": 1.3181413412094116, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 180490 + }, + { + "epoch": 1187.5, + "grad_norm": 1.1831432580947876, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 180500 + }, + { + "epoch": 1187.5657894736842, + "grad_norm": 0.9017171263694763, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 180510 + }, + { + "epoch": 1187.6315789473683, + "grad_norm": 1.0367988348007202, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 180520 + }, + { + "epoch": 1187.6973684210527, + "grad_norm": 0.9045135974884033, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 180530 + }, + { + "epoch": 1187.7631578947369, + "grad_norm": 0.8042669296264648, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 180540 + }, + { + "epoch": 1187.828947368421, + "grad_norm": 1.19108247756958, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 180550 + }, + { + "epoch": 1187.8947368421052, + "grad_norm": 1.238985300064087, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 180560 + }, + { + "epoch": 1187.9605263157894, + "grad_norm": 1.5841645002365112, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 180570 + }, + { + "epoch": 1188.0263157894738, + "grad_norm": 1.1452237367630005, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 180580 + }, + { + "epoch": 1188.092105263158, + "grad_norm": 1.4237656593322754, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 180590 + }, + { + "epoch": 1188.157894736842, + "grad_norm": 1.343073844909668, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 180600 + }, + { + "epoch": 1188.2236842105262, + "grad_norm": 0.9501973986625671, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 180610 + }, + { + "epoch": 1188.2894736842106, + "grad_norm": 0.8718429803848267, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 180620 + }, + { + "epoch": 1188.3552631578948, + "grad_norm": 1.1177846193313599, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 180630 + }, + { + "epoch": 1188.421052631579, + "grad_norm": 0.7141475677490234, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 180640 + }, + { + "epoch": 1188.4868421052631, + "grad_norm": 0.9596945643424988, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 180650 + }, + { + "epoch": 1188.5526315789473, + "grad_norm": 1.1708953380584717, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 180660 + }, + { + "epoch": 1188.6184210526317, + "grad_norm": 1.1487609148025513, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 180670 + }, + { + "epoch": 1188.6842105263158, + "grad_norm": 0.9982112050056458, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 180680 + }, + { + "epoch": 1188.75, + "grad_norm": 1.1738011837005615, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 180690 + }, + { + "epoch": 1188.8157894736842, + "grad_norm": 1.2586804628372192, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 180700 + }, + { + "epoch": 1188.8815789473683, + "grad_norm": 1.6938642263412476, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 180710 + }, + { + "epoch": 1188.9473684210527, + "grad_norm": 1.3859210014343262, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 180720 + }, + { + "epoch": 1189.0131578947369, + "grad_norm": 1.3656494617462158, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 180730 + }, + { + "epoch": 1189.078947368421, + "grad_norm": 1.3069894313812256, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 180740 + }, + { + "epoch": 1189.1447368421052, + "grad_norm": 1.5368648767471313, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 180750 + }, + { + "epoch": 1189.2105263157894, + "grad_norm": 0.9646233916282654, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 180760 + }, + { + "epoch": 1189.2763157894738, + "grad_norm": 1.1789246797561646, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 180770 + }, + { + "epoch": 1189.342105263158, + "grad_norm": 0.9047682881355286, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 180780 + }, + { + "epoch": 1189.407894736842, + "grad_norm": 1.1768347024917603, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 180790 + }, + { + "epoch": 1189.4736842105262, + "grad_norm": 0.8927736878395081, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 180800 + }, + { + "epoch": 1189.5394736842106, + "grad_norm": 1.3227018117904663, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 180810 + }, + { + "epoch": 1189.6052631578948, + "grad_norm": 1.1930198669433594, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 180820 + }, + { + "epoch": 1189.671052631579, + "grad_norm": 0.9631125926971436, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 180830 + }, + { + "epoch": 1189.7368421052631, + "grad_norm": 0.9095155000686646, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 180840 + }, + { + "epoch": 1189.8026315789473, + "grad_norm": 0.9870935082435608, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 180850 + }, + { + "epoch": 1189.8684210526317, + "grad_norm": 0.9359210133552551, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 180860 + }, + { + "epoch": 1189.9342105263158, + "grad_norm": 1.2630891799926758, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 180870 + }, + { + "epoch": 1190.0, + "grad_norm": 0.9118783473968506, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 180880 + }, + { + "epoch": 1190.0657894736842, + "grad_norm": 0.9447109699249268, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 180890 + }, + { + "epoch": 1190.1315789473683, + "grad_norm": 0.8619827032089233, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 180900 + }, + { + "epoch": 1190.1973684210527, + "grad_norm": 1.0921404361724854, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 180910 + }, + { + "epoch": 1190.2631578947369, + "grad_norm": 0.7819488048553467, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 180920 + }, + { + "epoch": 1190.328947368421, + "grad_norm": 0.9675580263137817, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 180930 + }, + { + "epoch": 1190.3947368421052, + "grad_norm": 0.9420514106750488, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 180940 + }, + { + "epoch": 1190.4605263157894, + "grad_norm": 1.3097189664840698, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 180950 + }, + { + "epoch": 1190.5263157894738, + "grad_norm": 1.0128648281097412, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 180960 + }, + { + "epoch": 1190.592105263158, + "grad_norm": 0.833601713180542, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 180970 + }, + { + "epoch": 1190.657894736842, + "grad_norm": 0.7364785671234131, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 180980 + }, + { + "epoch": 1190.7236842105262, + "grad_norm": 1.0391943454742432, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 180990 + }, + { + "epoch": 1190.7894736842106, + "grad_norm": 0.9360489845275879, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 181000 + }, + { + "epoch": 1190.8552631578948, + "grad_norm": 0.8230446577072144, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 181010 + }, + { + "epoch": 1190.921052631579, + "grad_norm": 0.6308891773223877, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 181020 + }, + { + "epoch": 1190.9868421052631, + "grad_norm": 0.9240980744361877, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 181030 + }, + { + "epoch": 1191.0526315789473, + "grad_norm": 0.9708207845687866, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 181040 + }, + { + "epoch": 1191.1184210526317, + "grad_norm": 1.2800960540771484, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 181050 + }, + { + "epoch": 1191.1842105263158, + "grad_norm": 0.9510250687599182, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 181060 + }, + { + "epoch": 1191.25, + "grad_norm": 0.8674155473709106, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 181070 + }, + { + "epoch": 1191.3157894736842, + "grad_norm": 1.276930332183838, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 181080 + }, + { + "epoch": 1191.3815789473683, + "grad_norm": 1.3920434713363647, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 181090 + }, + { + "epoch": 1191.4473684210527, + "grad_norm": 0.7923642992973328, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 181100 + }, + { + "epoch": 1191.5131578947369, + "grad_norm": 1.4579869508743286, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 181110 + }, + { + "epoch": 1191.578947368421, + "grad_norm": 0.9362051486968994, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 181120 + }, + { + "epoch": 1191.6447368421052, + "grad_norm": 1.371772050857544, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 181130 + }, + { + "epoch": 1191.7105263157894, + "grad_norm": 1.1880755424499512, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 181140 + }, + { + "epoch": 1191.7763157894738, + "grad_norm": 0.8417680859565735, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 181150 + }, + { + "epoch": 1191.842105263158, + "grad_norm": 0.9738719463348389, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 181160 + }, + { + "epoch": 1191.907894736842, + "grad_norm": 0.8332647085189819, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 181170 + }, + { + "epoch": 1191.9736842105262, + "grad_norm": 0.8110590577125549, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 181180 + }, + { + "epoch": 1192.0394736842106, + "grad_norm": 0.7889165878295898, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 181190 + }, + { + "epoch": 1192.1052631578948, + "grad_norm": 0.9631662368774414, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 181200 + }, + { + "epoch": 1192.171052631579, + "grad_norm": 0.9708088040351868, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 181210 + }, + { + "epoch": 1192.2368421052631, + "grad_norm": 0.9359713196754456, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 181220 + }, + { + "epoch": 1192.3026315789473, + "grad_norm": 0.9870832562446594, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 181230 + }, + { + "epoch": 1192.3684210526317, + "grad_norm": 1.1724073886871338, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 181240 + }, + { + "epoch": 1192.4342105263158, + "grad_norm": 0.7455830574035645, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 181250 + }, + { + "epoch": 1192.5, + "grad_norm": 0.9996033906936646, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 181260 + }, + { + "epoch": 1192.5657894736842, + "grad_norm": 1.123407006263733, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 181270 + }, + { + "epoch": 1192.6315789473683, + "grad_norm": 0.9888333678245544, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 181280 + }, + { + "epoch": 1192.6973684210527, + "grad_norm": 1.7157537937164307, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 181290 + }, + { + "epoch": 1192.7631578947369, + "grad_norm": 1.2598707675933838, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 181300 + }, + { + "epoch": 1192.828947368421, + "grad_norm": 1.1619699001312256, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 181310 + }, + { + "epoch": 1192.8947368421052, + "grad_norm": 0.9161400198936462, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 181320 + }, + { + "epoch": 1192.9605263157894, + "grad_norm": 0.7341195344924927, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 181330 + }, + { + "epoch": 1193.0263157894738, + "grad_norm": 1.1331208944320679, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 181340 + }, + { + "epoch": 1193.092105263158, + "grad_norm": 1.149287223815918, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 181350 + }, + { + "epoch": 1193.157894736842, + "grad_norm": 0.9564964175224304, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 181360 + }, + { + "epoch": 1193.2236842105262, + "grad_norm": 1.252368450164795, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 181370 + }, + { + "epoch": 1193.2894736842106, + "grad_norm": 1.285278558731079, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 181380 + }, + { + "epoch": 1193.3552631578948, + "grad_norm": 1.0179182291030884, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 181390 + }, + { + "epoch": 1193.421052631579, + "grad_norm": 0.8512329459190369, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 181400 + }, + { + "epoch": 1193.4868421052631, + "grad_norm": 1.3857789039611816, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 181410 + }, + { + "epoch": 1193.5526315789473, + "grad_norm": 1.1037719249725342, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 181420 + }, + { + "epoch": 1193.6184210526317, + "grad_norm": 1.2100577354431152, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 181430 + }, + { + "epoch": 1193.6842105263158, + "grad_norm": 1.3522088527679443, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 181440 + }, + { + "epoch": 1193.75, + "grad_norm": 1.1661986112594604, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 181450 + }, + { + "epoch": 1193.8157894736842, + "grad_norm": 1.2195343971252441, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 181460 + }, + { + "epoch": 1193.8815789473683, + "grad_norm": 1.2342994213104248, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 181470 + }, + { + "epoch": 1193.9473684210527, + "grad_norm": 1.23150634765625, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 181480 + }, + { + "epoch": 1194.0131578947369, + "grad_norm": 1.1036239862442017, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 181490 + }, + { + "epoch": 1194.078947368421, + "grad_norm": 0.7650617361068726, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 181500 + }, + { + "epoch": 1194.1447368421052, + "grad_norm": 0.7626112699508667, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 181510 + }, + { + "epoch": 1194.2105263157894, + "grad_norm": 1.1375484466552734, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 181520 + }, + { + "epoch": 1194.2763157894738, + "grad_norm": 0.9652150869369507, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 181530 + }, + { + "epoch": 1194.342105263158, + "grad_norm": 0.9495952725410461, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 181540 + }, + { + "epoch": 1194.407894736842, + "grad_norm": 1.1295645236968994, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 181550 + }, + { + "epoch": 1194.4736842105262, + "grad_norm": 0.7198823094367981, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 181560 + }, + { + "epoch": 1194.5394736842106, + "grad_norm": 1.061182975769043, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 181570 + }, + { + "epoch": 1194.6052631578948, + "grad_norm": 0.6715997457504272, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 181580 + }, + { + "epoch": 1194.671052631579, + "grad_norm": 0.8791295289993286, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 181590 + }, + { + "epoch": 1194.7368421052631, + "grad_norm": 1.9613913297653198, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 181600 + }, + { + "epoch": 1194.8026315789473, + "grad_norm": 2.2601711750030518, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 181610 + }, + { + "epoch": 1194.8684210526317, + "grad_norm": 2.3166682720184326, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 181620 + }, + { + "epoch": 1194.9342105263158, + "grad_norm": 1.4985716342926025, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 181630 + }, + { + "epoch": 1195.0, + "grad_norm": 1.3064441680908203, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 181640 + }, + { + "epoch": 1195.0657894736842, + "grad_norm": 0.9696851372718811, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 181650 + }, + { + "epoch": 1195.1315789473683, + "grad_norm": 1.3249530792236328, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 181660 + }, + { + "epoch": 1195.1973684210527, + "grad_norm": 1.042757511138916, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 181670 + }, + { + "epoch": 1195.2631578947369, + "grad_norm": 0.9609398245811462, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 181680 + }, + { + "epoch": 1195.328947368421, + "grad_norm": 1.0072219371795654, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 181690 + }, + { + "epoch": 1195.3947368421052, + "grad_norm": 1.1560006141662598, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 181700 + }, + { + "epoch": 1195.4605263157894, + "grad_norm": 0.9536867737770081, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 181710 + }, + { + "epoch": 1195.5263157894738, + "grad_norm": 0.8431065678596497, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 181720 + }, + { + "epoch": 1195.592105263158, + "grad_norm": 1.448400855064392, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 181730 + }, + { + "epoch": 1195.657894736842, + "grad_norm": 1.354210615158081, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 181740 + }, + { + "epoch": 1195.7236842105262, + "grad_norm": 1.0818076133728027, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 181750 + }, + { + "epoch": 1195.7894736842106, + "grad_norm": 1.0603787899017334, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 181760 + }, + { + "epoch": 1195.8552631578948, + "grad_norm": 1.0252753496170044, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 181770 + }, + { + "epoch": 1195.921052631579, + "grad_norm": 0.9185944199562073, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 181780 + }, + { + "epoch": 1195.9868421052631, + "grad_norm": 1.012385368347168, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 181790 + }, + { + "epoch": 1196.0526315789473, + "grad_norm": 1.1767807006835938, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 181800 + }, + { + "epoch": 1196.1184210526317, + "grad_norm": 0.9487605094909668, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 181810 + }, + { + "epoch": 1196.1842105263158, + "grad_norm": 1.0731463432312012, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 181820 + }, + { + "epoch": 1196.25, + "grad_norm": 1.1926857233047485, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 181830 + }, + { + "epoch": 1196.3157894736842, + "grad_norm": 1.1458866596221924, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 181840 + }, + { + "epoch": 1196.3815789473683, + "grad_norm": 0.7594784498214722, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 181850 + }, + { + "epoch": 1196.4473684210527, + "grad_norm": 0.7159861326217651, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 181860 + }, + { + "epoch": 1196.5131578947369, + "grad_norm": 0.9384440183639526, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 181870 + }, + { + "epoch": 1196.578947368421, + "grad_norm": 0.7661808729171753, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 181880 + }, + { + "epoch": 1196.6447368421052, + "grad_norm": 0.8647274971008301, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 181890 + }, + { + "epoch": 1196.7105263157894, + "grad_norm": 0.8873572945594788, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 181900 + }, + { + "epoch": 1196.7763157894738, + "grad_norm": 1.2721664905548096, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 181910 + }, + { + "epoch": 1196.842105263158, + "grad_norm": 0.8040602207183838, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 181920 + }, + { + "epoch": 1196.907894736842, + "grad_norm": 1.413497805595398, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 181930 + }, + { + "epoch": 1196.9736842105262, + "grad_norm": 1.1012861728668213, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 181940 + }, + { + "epoch": 1197.0394736842106, + "grad_norm": 1.0302972793579102, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 181950 + }, + { + "epoch": 1197.1052631578948, + "grad_norm": 0.9582118391990662, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 181960 + }, + { + "epoch": 1197.171052631579, + "grad_norm": 1.0968737602233887, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 181970 + }, + { + "epoch": 1197.2368421052631, + "grad_norm": 1.0602166652679443, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 181980 + }, + { + "epoch": 1197.3026315789473, + "grad_norm": 0.890673816204071, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 181990 + }, + { + "epoch": 1197.3684210526317, + "grad_norm": 0.9959932565689087, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 182000 + }, + { + "epoch": 1197.4342105263158, + "grad_norm": 1.0189555883407593, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 182010 + }, + { + "epoch": 1197.5, + "grad_norm": 1.3010244369506836, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 182020 + }, + { + "epoch": 1197.5657894736842, + "grad_norm": 0.6490765810012817, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 182030 + }, + { + "epoch": 1197.6315789473683, + "grad_norm": 0.9288578629493713, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 182040 + }, + { + "epoch": 1197.6973684210527, + "grad_norm": 0.9825459122657776, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 182050 + }, + { + "epoch": 1197.7631578947369, + "grad_norm": 1.1905490159988403, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 182060 + }, + { + "epoch": 1197.828947368421, + "grad_norm": 0.7103608846664429, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 182070 + }, + { + "epoch": 1197.8947368421052, + "grad_norm": 0.9443063735961914, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 182080 + }, + { + "epoch": 1197.9605263157894, + "grad_norm": 1.0908981561660767, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 182090 + }, + { + "epoch": 1198.0263157894738, + "grad_norm": 1.0232374668121338, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 182100 + }, + { + "epoch": 1198.092105263158, + "grad_norm": 0.8261113166809082, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 182110 + }, + { + "epoch": 1198.157894736842, + "grad_norm": 0.9809024930000305, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 182120 + }, + { + "epoch": 1198.2236842105262, + "grad_norm": 1.3282575607299805, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 182130 + }, + { + "epoch": 1198.2894736842106, + "grad_norm": 0.756084144115448, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 182140 + }, + { + "epoch": 1198.3552631578948, + "grad_norm": 1.1468161344528198, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 182150 + }, + { + "epoch": 1198.421052631579, + "grad_norm": 1.3206136226654053, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 182160 + }, + { + "epoch": 1198.4868421052631, + "grad_norm": 1.339464545249939, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 182170 + }, + { + "epoch": 1198.5526315789473, + "grad_norm": 1.1766377687454224, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 182180 + }, + { + "epoch": 1198.6184210526317, + "grad_norm": 1.342929482460022, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 182190 + }, + { + "epoch": 1198.6842105263158, + "grad_norm": 1.5300862789154053, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 182200 + }, + { + "epoch": 1198.75, + "grad_norm": 1.2704681158065796, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 182210 + }, + { + "epoch": 1198.8157894736842, + "grad_norm": 0.9477614164352417, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 182220 + }, + { + "epoch": 1198.8815789473683, + "grad_norm": 1.055249810218811, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 182230 + }, + { + "epoch": 1198.9473684210527, + "grad_norm": 1.057106375694275, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 182240 + }, + { + "epoch": 1199.0131578947369, + "grad_norm": 1.096887469291687, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 182250 + }, + { + "epoch": 1199.078947368421, + "grad_norm": 1.1574534177780151, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 182260 + }, + { + "epoch": 1199.1447368421052, + "grad_norm": 1.2946901321411133, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 182270 + }, + { + "epoch": 1199.2105263157894, + "grad_norm": 1.0303276777267456, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 182280 + }, + { + "epoch": 1199.2763157894738, + "grad_norm": 0.9668704867362976, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 182290 + }, + { + "epoch": 1199.342105263158, + "grad_norm": 1.6884338855743408, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 182300 + }, + { + "epoch": 1199.407894736842, + "grad_norm": 1.1282835006713867, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 182310 + }, + { + "epoch": 1199.4736842105262, + "grad_norm": 1.2376283407211304, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 182320 + }, + { + "epoch": 1199.5394736842106, + "grad_norm": 1.081881046295166, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 182330 + }, + { + "epoch": 1199.6052631578948, + "grad_norm": 1.080176830291748, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 182340 + }, + { + "epoch": 1199.671052631579, + "grad_norm": 1.160550832748413, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 182350 + }, + { + "epoch": 1199.7368421052631, + "grad_norm": 1.1847903728485107, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 182360 + }, + { + "epoch": 1199.8026315789473, + "grad_norm": 1.0338932275772095, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 182370 + }, + { + "epoch": 1199.8684210526317, + "grad_norm": 0.7202876806259155, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 182380 + }, + { + "epoch": 1199.9342105263158, + "grad_norm": 0.9999865293502808, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 182390 + }, + { + "epoch": 1200.0, + "grad_norm": 0.7838411331176758, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 182400 + }, + { + "epoch": 1200.0657894736842, + "grad_norm": 0.8725451827049255, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 182410 + }, + { + "epoch": 1200.1315789473683, + "grad_norm": 0.8963289856910706, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 182420 + }, + { + "epoch": 1200.1973684210527, + "grad_norm": 0.8601619601249695, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 182430 + }, + { + "epoch": 1200.2631578947369, + "grad_norm": 0.9362215399742126, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 182440 + }, + { + "epoch": 1200.328947368421, + "grad_norm": 0.7226735949516296, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 182450 + }, + { + "epoch": 1200.3947368421052, + "grad_norm": 1.1684337854385376, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 182460 + }, + { + "epoch": 1200.4605263157894, + "grad_norm": 1.1520575284957886, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 182470 + }, + { + "epoch": 1200.5263157894738, + "grad_norm": 1.0707088708877563, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 182480 + }, + { + "epoch": 1200.592105263158, + "grad_norm": 0.8464767336845398, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 182490 + }, + { + "epoch": 1200.657894736842, + "grad_norm": 1.186996579170227, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 182500 + }, + { + "epoch": 1200.7236842105262, + "grad_norm": 1.0348249673843384, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 182510 + }, + { + "epoch": 1200.7894736842106, + "grad_norm": 1.1279314756393433, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 182520 + }, + { + "epoch": 1200.8552631578948, + "grad_norm": 0.6997672319412231, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 182530 + }, + { + "epoch": 1200.921052631579, + "grad_norm": 0.8470837473869324, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 182540 + }, + { + "epoch": 1200.9868421052631, + "grad_norm": 1.178248405456543, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 182550 + }, + { + "epoch": 1201.0526315789473, + "grad_norm": 0.958922266960144, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 182560 + }, + { + "epoch": 1201.1184210526317, + "grad_norm": 0.8927515745162964, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 182570 + }, + { + "epoch": 1201.1842105263158, + "grad_norm": 1.0204429626464844, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 182580 + }, + { + "epoch": 1201.25, + "grad_norm": 0.7796499729156494, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 182590 + }, + { + "epoch": 1201.3157894736842, + "grad_norm": 0.7244349718093872, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 182600 + }, + { + "epoch": 1201.3815789473683, + "grad_norm": 0.921933114528656, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 182610 + }, + { + "epoch": 1201.4473684210527, + "grad_norm": 1.1822832822799683, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 182620 + }, + { + "epoch": 1201.5131578947369, + "grad_norm": 0.8840344548225403, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 182630 + }, + { + "epoch": 1201.578947368421, + "grad_norm": 1.2693527936935425, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 182640 + }, + { + "epoch": 1201.6447368421052, + "grad_norm": 1.1028697490692139, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 182650 + }, + { + "epoch": 1201.7105263157894, + "grad_norm": 0.9558727145195007, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 182660 + }, + { + "epoch": 1201.7763157894738, + "grad_norm": 1.0722416639328003, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 182670 + }, + { + "epoch": 1201.842105263158, + "grad_norm": 1.0818010568618774, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 182680 + }, + { + "epoch": 1201.907894736842, + "grad_norm": 0.9133005142211914, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 182690 + }, + { + "epoch": 1201.9736842105262, + "grad_norm": 0.8337648510932922, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 182700 + }, + { + "epoch": 1202.0394736842106, + "grad_norm": 1.0010408163070679, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 182710 + }, + { + "epoch": 1202.1052631578948, + "grad_norm": 1.2349421977996826, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 182720 + }, + { + "epoch": 1202.171052631579, + "grad_norm": 0.9251318573951721, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 182730 + }, + { + "epoch": 1202.2368421052631, + "grad_norm": 1.2574422359466553, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 182740 + }, + { + "epoch": 1202.3026315789473, + "grad_norm": 1.0148321390151978, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 182750 + }, + { + "epoch": 1202.3684210526317, + "grad_norm": 0.4935104548931122, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 182760 + }, + { + "epoch": 1202.4342105263158, + "grad_norm": 0.8940833806991577, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 182770 + }, + { + "epoch": 1202.5, + "grad_norm": 0.8071444034576416, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 182780 + }, + { + "epoch": 1202.5657894736842, + "grad_norm": 0.8158524036407471, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 182790 + }, + { + "epoch": 1202.6315789473683, + "grad_norm": 1.1093024015426636, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 182800 + }, + { + "epoch": 1202.6973684210527, + "grad_norm": 1.2366114854812622, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 182810 + }, + { + "epoch": 1202.7631578947369, + "grad_norm": 1.1967785358428955, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 182820 + }, + { + "epoch": 1202.828947368421, + "grad_norm": 1.1936818361282349, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 182830 + }, + { + "epoch": 1202.8947368421052, + "grad_norm": 1.7148412466049194, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 182840 + }, + { + "epoch": 1202.9605263157894, + "grad_norm": 1.4570777416229248, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 182850 + }, + { + "epoch": 1203.0263157894738, + "grad_norm": 1.157327651977539, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 182860 + }, + { + "epoch": 1203.092105263158, + "grad_norm": 1.0844436883926392, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 182870 + }, + { + "epoch": 1203.157894736842, + "grad_norm": 1.446609616279602, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 182880 + }, + { + "epoch": 1203.2236842105262, + "grad_norm": 0.9722105264663696, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 182890 + }, + { + "epoch": 1203.2894736842106, + "grad_norm": 0.864902675151825, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 182900 + }, + { + "epoch": 1203.3552631578948, + "grad_norm": 0.9735894203186035, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 182910 + }, + { + "epoch": 1203.421052631579, + "grad_norm": 0.9694196581840515, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 182920 + }, + { + "epoch": 1203.4868421052631, + "grad_norm": 1.1758695840835571, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 182930 + }, + { + "epoch": 1203.5526315789473, + "grad_norm": 0.9834887981414795, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 182940 + }, + { + "epoch": 1203.6184210526317, + "grad_norm": 1.0233968496322632, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 182950 + }, + { + "epoch": 1203.6842105263158, + "grad_norm": 0.826490044593811, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 182960 + }, + { + "epoch": 1203.75, + "grad_norm": 0.7306733727455139, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 182970 + }, + { + "epoch": 1203.8157894736842, + "grad_norm": 1.019582986831665, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 182980 + }, + { + "epoch": 1203.8815789473683, + "grad_norm": 1.1735548973083496, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 182990 + }, + { + "epoch": 1203.9473684210527, + "grad_norm": 1.2922629117965698, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 183000 + }, + { + "epoch": 1204.0131578947369, + "grad_norm": 1.170229434967041, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 183010 + }, + { + "epoch": 1204.078947368421, + "grad_norm": 1.1170730590820312, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 183020 + }, + { + "epoch": 1204.1447368421052, + "grad_norm": 1.4809566736221313, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 183030 + }, + { + "epoch": 1204.2105263157894, + "grad_norm": 1.3102599382400513, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 183040 + }, + { + "epoch": 1204.2763157894738, + "grad_norm": 1.1147892475128174, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 183050 + }, + { + "epoch": 1204.342105263158, + "grad_norm": 1.0742824077606201, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 183060 + }, + { + "epoch": 1204.407894736842, + "grad_norm": 1.0442990064620972, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 183070 + }, + { + "epoch": 1204.4736842105262, + "grad_norm": 0.6480495929718018, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 183080 + }, + { + "epoch": 1204.5394736842106, + "grad_norm": 1.12603759765625, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 183090 + }, + { + "epoch": 1204.6052631578948, + "grad_norm": 0.9349260926246643, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 183100 + }, + { + "epoch": 1204.671052631579, + "grad_norm": 1.4842894077301025, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 183110 + }, + { + "epoch": 1204.7368421052631, + "grad_norm": 1.003530502319336, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 183120 + }, + { + "epoch": 1204.8026315789473, + "grad_norm": 1.3181918859481812, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 183130 + }, + { + "epoch": 1204.8684210526317, + "grad_norm": 0.9079726934432983, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 183140 + }, + { + "epoch": 1204.9342105263158, + "grad_norm": 1.0107637643814087, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 183150 + }, + { + "epoch": 1205.0, + "grad_norm": 1.043506145477295, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 183160 + }, + { + "epoch": 1205.0657894736842, + "grad_norm": 0.9948956966400146, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 183170 + }, + { + "epoch": 1205.1315789473683, + "grad_norm": 0.8911522626876831, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 183180 + }, + { + "epoch": 1205.1973684210527, + "grad_norm": 0.9188016057014465, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 183190 + }, + { + "epoch": 1205.2631578947369, + "grad_norm": 0.9119938611984253, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 183200 + }, + { + "epoch": 1205.328947368421, + "grad_norm": 1.064100742340088, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 183210 + }, + { + "epoch": 1205.3947368421052, + "grad_norm": 1.1177867650985718, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 183220 + }, + { + "epoch": 1205.4605263157894, + "grad_norm": 0.7622190117835999, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 183230 + }, + { + "epoch": 1205.5263157894738, + "grad_norm": 0.9194398522377014, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 183240 + }, + { + "epoch": 1205.592105263158, + "grad_norm": 1.1614681482315063, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 183250 + }, + { + "epoch": 1205.657894736842, + "grad_norm": 1.296439290046692, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 183260 + }, + { + "epoch": 1205.7236842105262, + "grad_norm": 1.368787169456482, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 183270 + }, + { + "epoch": 1205.7894736842106, + "grad_norm": 1.1311159133911133, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 183280 + }, + { + "epoch": 1205.8552631578948, + "grad_norm": 1.0988332033157349, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 183290 + }, + { + "epoch": 1205.921052631579, + "grad_norm": 0.6983150839805603, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 183300 + }, + { + "epoch": 1205.9868421052631, + "grad_norm": 0.8750330805778503, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 183310 + }, + { + "epoch": 1206.0526315789473, + "grad_norm": 1.087065577507019, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 183320 + }, + { + "epoch": 1206.1184210526317, + "grad_norm": 0.9251524806022644, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 183330 + }, + { + "epoch": 1206.1842105263158, + "grad_norm": 0.998687744140625, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 183340 + }, + { + "epoch": 1206.25, + "grad_norm": 0.6075114011764526, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 183350 + }, + { + "epoch": 1206.3157894736842, + "grad_norm": 0.7300073504447937, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 183360 + }, + { + "epoch": 1206.3815789473683, + "grad_norm": 0.7422103881835938, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 183370 + }, + { + "epoch": 1206.4473684210527, + "grad_norm": 0.9821145534515381, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 183380 + }, + { + "epoch": 1206.5131578947369, + "grad_norm": 0.9073783159255981, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 183390 + }, + { + "epoch": 1206.578947368421, + "grad_norm": 0.854741096496582, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 183400 + }, + { + "epoch": 1206.6447368421052, + "grad_norm": 1.1214978694915771, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 183410 + }, + { + "epoch": 1206.7105263157894, + "grad_norm": 0.9807927012443542, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 183420 + }, + { + "epoch": 1206.7763157894738, + "grad_norm": 0.8988727927207947, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 183430 + }, + { + "epoch": 1206.842105263158, + "grad_norm": 0.840644896030426, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 183440 + }, + { + "epoch": 1206.907894736842, + "grad_norm": 1.0273395776748657, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 183450 + }, + { + "epoch": 1206.9736842105262, + "grad_norm": 1.0159443616867065, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 183460 + }, + { + "epoch": 1207.0394736842106, + "grad_norm": 1.0389869213104248, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 183470 + }, + { + "epoch": 1207.1052631578948, + "grad_norm": 0.9145666360855103, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 183480 + }, + { + "epoch": 1207.171052631579, + "grad_norm": 0.9767587184906006, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 183490 + }, + { + "epoch": 1207.2368421052631, + "grad_norm": 1.1191926002502441, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 183500 + }, + { + "epoch": 1207.3026315789473, + "grad_norm": 0.898288905620575, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 183510 + }, + { + "epoch": 1207.3684210526317, + "grad_norm": 0.7566203474998474, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 183520 + }, + { + "epoch": 1207.4342105263158, + "grad_norm": 1.134048342704773, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 183530 + }, + { + "epoch": 1207.5, + "grad_norm": 1.08811354637146, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 183540 + }, + { + "epoch": 1207.5657894736842, + "grad_norm": 1.2516783475875854, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 183550 + }, + { + "epoch": 1207.6315789473683, + "grad_norm": 1.2968010902404785, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 183560 + }, + { + "epoch": 1207.6973684210527, + "grad_norm": 0.9975584149360657, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 183570 + }, + { + "epoch": 1207.7631578947369, + "grad_norm": 1.1204050779342651, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 183580 + }, + { + "epoch": 1207.828947368421, + "grad_norm": 1.050008773803711, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 183590 + }, + { + "epoch": 1207.8947368421052, + "grad_norm": 0.832999050617218, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 183600 + }, + { + "epoch": 1207.9605263157894, + "grad_norm": 0.9619593024253845, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 183610 + }, + { + "epoch": 1208.0263157894738, + "grad_norm": 0.7968380451202393, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 183620 + }, + { + "epoch": 1208.092105263158, + "grad_norm": 0.9513537883758545, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 183630 + }, + { + "epoch": 1208.157894736842, + "grad_norm": 1.0646263360977173, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 183640 + }, + { + "epoch": 1208.2236842105262, + "grad_norm": 0.8924401998519897, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 183650 + }, + { + "epoch": 1208.2894736842106, + "grad_norm": 0.9576424360275269, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 183660 + }, + { + "epoch": 1208.3552631578948, + "grad_norm": 1.0064516067504883, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 183670 + }, + { + "epoch": 1208.421052631579, + "grad_norm": 1.2385472059249878, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 183680 + }, + { + "epoch": 1208.4868421052631, + "grad_norm": 1.1335607767105103, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 183690 + }, + { + "epoch": 1208.5526315789473, + "grad_norm": 1.0447804927825928, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 183700 + }, + { + "epoch": 1208.6184210526317, + "grad_norm": 0.9965493679046631, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 183710 + }, + { + "epoch": 1208.6842105263158, + "grad_norm": 0.8367345929145813, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 183720 + }, + { + "epoch": 1208.75, + "grad_norm": 1.0351852178573608, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 183730 + }, + { + "epoch": 1208.8157894736842, + "grad_norm": 0.9388834834098816, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 183740 + }, + { + "epoch": 1208.8815789473683, + "grad_norm": 0.9737825393676758, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 183750 + }, + { + "epoch": 1208.9473684210527, + "grad_norm": 1.0484927892684937, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 183760 + }, + { + "epoch": 1209.0131578947369, + "grad_norm": 0.9659373760223389, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 183770 + }, + { + "epoch": 1209.078947368421, + "grad_norm": 1.433643102645874, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 183780 + }, + { + "epoch": 1209.1447368421052, + "grad_norm": 0.7290152311325073, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 183790 + }, + { + "epoch": 1209.2105263157894, + "grad_norm": 1.0148568153381348, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 183800 + }, + { + "epoch": 1209.2763157894738, + "grad_norm": 1.4373056888580322, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 183810 + }, + { + "epoch": 1209.342105263158, + "grad_norm": 0.8225641846656799, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 183820 + }, + { + "epoch": 1209.407894736842, + "grad_norm": 1.236804485321045, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 183830 + }, + { + "epoch": 1209.4736842105262, + "grad_norm": 1.466613531112671, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 183840 + }, + { + "epoch": 1209.5394736842106, + "grad_norm": 1.2647264003753662, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 183850 + }, + { + "epoch": 1209.6052631578948, + "grad_norm": 0.9265692234039307, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 183860 + }, + { + "epoch": 1209.671052631579, + "grad_norm": 1.0921601057052612, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 183870 + }, + { + "epoch": 1209.7368421052631, + "grad_norm": 1.2621147632598877, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 183880 + }, + { + "epoch": 1209.8026315789473, + "grad_norm": 1.0996668338775635, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 183890 + }, + { + "epoch": 1209.8684210526317, + "grad_norm": 1.0238312482833862, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 183900 + }, + { + "epoch": 1209.9342105263158, + "grad_norm": 1.0571686029434204, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 183910 + }, + { + "epoch": 1210.0, + "grad_norm": 1.0116900205612183, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 183920 + }, + { + "epoch": 1210.0657894736842, + "grad_norm": 1.5669469833374023, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 183930 + }, + { + "epoch": 1210.1315789473683, + "grad_norm": 1.1383708715438843, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 183940 + }, + { + "epoch": 1210.1973684210527, + "grad_norm": 1.053849697113037, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 183950 + }, + { + "epoch": 1210.2631578947369, + "grad_norm": 0.9576135873794556, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 183960 + }, + { + "epoch": 1210.328947368421, + "grad_norm": 1.0139400959014893, + "learning_rate": 0.0001, + "loss": 0.0068, + "step": 183970 + }, + { + "epoch": 1210.3947368421052, + "grad_norm": 0.9644301533699036, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 183980 + }, + { + "epoch": 1210.4605263157894, + "grad_norm": 0.9945560693740845, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 183990 + }, + { + "epoch": 1210.5263157894738, + "grad_norm": 1.0940089225769043, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 184000 + }, + { + "epoch": 1210.592105263158, + "grad_norm": 0.9017602801322937, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 184010 + }, + { + "epoch": 1210.657894736842, + "grad_norm": 0.8119890093803406, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 184020 + }, + { + "epoch": 1210.7236842105262, + "grad_norm": 0.7484346628189087, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 184030 + }, + { + "epoch": 1210.7894736842106, + "grad_norm": 0.6174297332763672, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 184040 + }, + { + "epoch": 1210.8552631578948, + "grad_norm": 0.9243563413619995, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 184050 + }, + { + "epoch": 1210.921052631579, + "grad_norm": 1.0928142070770264, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 184060 + }, + { + "epoch": 1210.9868421052631, + "grad_norm": 1.0260541439056396, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 184070 + }, + { + "epoch": 1211.0526315789473, + "grad_norm": 0.8601813316345215, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 184080 + }, + { + "epoch": 1211.1184210526317, + "grad_norm": 1.062245488166809, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 184090 + }, + { + "epoch": 1211.1842105263158, + "grad_norm": 0.8199011087417603, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 184100 + }, + { + "epoch": 1211.25, + "grad_norm": 0.9992640614509583, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 184110 + }, + { + "epoch": 1211.3157894736842, + "grad_norm": 1.1344804763793945, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 184120 + }, + { + "epoch": 1211.3815789473683, + "grad_norm": 0.9198042750358582, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 184130 + }, + { + "epoch": 1211.4473684210527, + "grad_norm": 1.2158046960830688, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 184140 + }, + { + "epoch": 1211.5131578947369, + "grad_norm": 1.368562936782837, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 184150 + }, + { + "epoch": 1211.578947368421, + "grad_norm": 0.9911054372787476, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 184160 + }, + { + "epoch": 1211.6447368421052, + "grad_norm": 1.076811671257019, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 184170 + }, + { + "epoch": 1211.7105263157894, + "grad_norm": 1.632210373878479, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 184180 + }, + { + "epoch": 1211.7763157894738, + "grad_norm": 1.2261195182800293, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 184190 + }, + { + "epoch": 1211.842105263158, + "grad_norm": 1.1790894269943237, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 184200 + }, + { + "epoch": 1211.907894736842, + "grad_norm": 0.8446111083030701, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 184210 + }, + { + "epoch": 1211.9736842105262, + "grad_norm": 0.72014319896698, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 184220 + }, + { + "epoch": 1212.0394736842106, + "grad_norm": 0.8168789744377136, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 184230 + }, + { + "epoch": 1212.1052631578948, + "grad_norm": 0.8248627781867981, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 184240 + }, + { + "epoch": 1212.171052631579, + "grad_norm": 0.6290561556816101, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 184250 + }, + { + "epoch": 1212.2368421052631, + "grad_norm": 0.9064732789993286, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 184260 + }, + { + "epoch": 1212.3026315789473, + "grad_norm": 1.5276134014129639, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 184270 + }, + { + "epoch": 1212.3684210526317, + "grad_norm": 1.2133339643478394, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 184280 + }, + { + "epoch": 1212.4342105263158, + "grad_norm": 1.096923589706421, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 184290 + }, + { + "epoch": 1212.5, + "grad_norm": 1.0609931945800781, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 184300 + }, + { + "epoch": 1212.5657894736842, + "grad_norm": 1.0884696245193481, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 184310 + }, + { + "epoch": 1212.6315789473683, + "grad_norm": 1.2373757362365723, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 184320 + }, + { + "epoch": 1212.6973684210527, + "grad_norm": 0.8663111925125122, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 184330 + }, + { + "epoch": 1212.7631578947369, + "grad_norm": 0.8895679116249084, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 184340 + }, + { + "epoch": 1212.828947368421, + "grad_norm": 0.9693534970283508, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 184350 + }, + { + "epoch": 1212.8947368421052, + "grad_norm": 1.173888921737671, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 184360 + }, + { + "epoch": 1212.9605263157894, + "grad_norm": 0.8452108502388, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 184370 + }, + { + "epoch": 1213.0263157894738, + "grad_norm": 1.301694393157959, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 184380 + }, + { + "epoch": 1213.092105263158, + "grad_norm": 1.157357931137085, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 184390 + }, + { + "epoch": 1213.157894736842, + "grad_norm": 0.9105615019798279, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 184400 + }, + { + "epoch": 1213.2236842105262, + "grad_norm": 1.3666666746139526, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 184410 + }, + { + "epoch": 1213.2894736842106, + "grad_norm": 1.4840264320373535, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 184420 + }, + { + "epoch": 1213.3552631578948, + "grad_norm": 1.0419845581054688, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 184430 + }, + { + "epoch": 1213.421052631579, + "grad_norm": 1.0189485549926758, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 184440 + }, + { + "epoch": 1213.4868421052631, + "grad_norm": 1.2533512115478516, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 184450 + }, + { + "epoch": 1213.5526315789473, + "grad_norm": 0.8926238417625427, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 184460 + }, + { + "epoch": 1213.6184210526317, + "grad_norm": 1.0614726543426514, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 184470 + }, + { + "epoch": 1213.6842105263158, + "grad_norm": 1.0628024339675903, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 184480 + }, + { + "epoch": 1213.75, + "grad_norm": 1.3046667575836182, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 184490 + }, + { + "epoch": 1213.8157894736842, + "grad_norm": 1.021803379058838, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 184500 + }, + { + "epoch": 1213.8815789473683, + "grad_norm": 1.1435785293579102, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 184510 + }, + { + "epoch": 1213.9473684210527, + "grad_norm": 1.1126259565353394, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 184520 + }, + { + "epoch": 1214.0131578947369, + "grad_norm": 1.026728630065918, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 184530 + }, + { + "epoch": 1214.078947368421, + "grad_norm": 0.6541023254394531, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 184540 + }, + { + "epoch": 1214.1447368421052, + "grad_norm": 0.8954967260360718, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 184550 + }, + { + "epoch": 1214.2105263157894, + "grad_norm": 1.3695642948150635, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 184560 + }, + { + "epoch": 1214.2763157894738, + "grad_norm": 1.1998350620269775, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 184570 + }, + { + "epoch": 1214.342105263158, + "grad_norm": 1.03053617477417, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 184580 + }, + { + "epoch": 1214.407894736842, + "grad_norm": 1.0831443071365356, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 184590 + }, + { + "epoch": 1214.4736842105262, + "grad_norm": 0.8419860005378723, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 184600 + }, + { + "epoch": 1214.5394736842106, + "grad_norm": 1.1064237356185913, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 184610 + }, + { + "epoch": 1214.6052631578948, + "grad_norm": 1.0451138019561768, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 184620 + }, + { + "epoch": 1214.671052631579, + "grad_norm": 0.9039570093154907, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 184630 + }, + { + "epoch": 1214.7368421052631, + "grad_norm": 1.0026644468307495, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 184640 + }, + { + "epoch": 1214.8026315789473, + "grad_norm": 0.8823190927505493, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 184650 + }, + { + "epoch": 1214.8684210526317, + "grad_norm": 1.510465383529663, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 184660 + }, + { + "epoch": 1214.9342105263158, + "grad_norm": 0.9225495457649231, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 184670 + }, + { + "epoch": 1215.0, + "grad_norm": 0.866723895072937, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 184680 + }, + { + "epoch": 1215.0657894736842, + "grad_norm": 1.0360679626464844, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 184690 + }, + { + "epoch": 1215.1315789473683, + "grad_norm": 1.0180871486663818, + "learning_rate": 0.0001, + "loss": 0.0125, + "step": 184700 + }, + { + "epoch": 1215.1973684210527, + "grad_norm": 0.8230453729629517, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 184710 + }, + { + "epoch": 1215.2631578947369, + "grad_norm": 1.398345947265625, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 184720 + }, + { + "epoch": 1215.328947368421, + "grad_norm": 1.5286283493041992, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 184730 + }, + { + "epoch": 1215.3947368421052, + "grad_norm": 1.097556710243225, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 184740 + }, + { + "epoch": 1215.4605263157894, + "grad_norm": 1.1276756525039673, + "learning_rate": 0.0001, + "loss": 0.0129, + "step": 184750 + }, + { + "epoch": 1215.5263157894738, + "grad_norm": 1.013379693031311, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 184760 + }, + { + "epoch": 1215.592105263158, + "grad_norm": 1.1437040567398071, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 184770 + }, + { + "epoch": 1215.657894736842, + "grad_norm": 0.8518174290657043, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 184780 + }, + { + "epoch": 1215.7236842105262, + "grad_norm": 1.0094599723815918, + "learning_rate": 0.0001, + "loss": 0.0146, + "step": 184790 + }, + { + "epoch": 1215.7894736842106, + "grad_norm": 1.0971324443817139, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 184800 + }, + { + "epoch": 1215.8552631578948, + "grad_norm": 1.0762872695922852, + "learning_rate": 0.0001, + "loss": 0.0135, + "step": 184810 + }, + { + "epoch": 1215.921052631579, + "grad_norm": 0.6933231353759766, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 184820 + }, + { + "epoch": 1215.9868421052631, + "grad_norm": 1.1836282014846802, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 184830 + }, + { + "epoch": 1216.0526315789473, + "grad_norm": 1.4843765497207642, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 184840 + }, + { + "epoch": 1216.1184210526317, + "grad_norm": 1.573395013809204, + "learning_rate": 0.0001, + "loss": 0.0138, + "step": 184850 + }, + { + "epoch": 1216.1842105263158, + "grad_norm": 1.349432110786438, + "learning_rate": 0.0001, + "loss": 0.0126, + "step": 184860 + }, + { + "epoch": 1216.25, + "grad_norm": 0.9781970381736755, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 184870 + }, + { + "epoch": 1216.3157894736842, + "grad_norm": 0.9339350461959839, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 184880 + }, + { + "epoch": 1216.3815789473683, + "grad_norm": 1.192179799079895, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 184890 + }, + { + "epoch": 1216.4473684210527, + "grad_norm": 0.9777132272720337, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 184900 + }, + { + "epoch": 1216.5131578947369, + "grad_norm": 1.1878960132598877, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 184910 + }, + { + "epoch": 1216.578947368421, + "grad_norm": 1.1332019567489624, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 184920 + }, + { + "epoch": 1216.6447368421052, + "grad_norm": 1.2628083229064941, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 184930 + }, + { + "epoch": 1216.7105263157894, + "grad_norm": 1.167966365814209, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 184940 + }, + { + "epoch": 1216.7763157894738, + "grad_norm": 1.3506724834442139, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 184950 + }, + { + "epoch": 1216.842105263158, + "grad_norm": 1.2869248390197754, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 184960 + }, + { + "epoch": 1216.907894736842, + "grad_norm": 1.2013112306594849, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 184970 + }, + { + "epoch": 1216.9736842105262, + "grad_norm": 0.8679819107055664, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 184980 + }, + { + "epoch": 1217.0394736842106, + "grad_norm": 1.0381109714508057, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 184990 + }, + { + "epoch": 1217.1052631578948, + "grad_norm": 1.11387038230896, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 185000 + }, + { + "epoch": 1217.171052631579, + "grad_norm": 1.2010740041732788, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 185010 + }, + { + "epoch": 1217.2368421052631, + "grad_norm": 1.2932566404342651, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 185020 + }, + { + "epoch": 1217.3026315789473, + "grad_norm": 0.7184022665023804, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 185030 + }, + { + "epoch": 1217.3684210526317, + "grad_norm": 0.8820980787277222, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 185040 + }, + { + "epoch": 1217.4342105263158, + "grad_norm": 1.4294477701187134, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 185050 + }, + { + "epoch": 1217.5, + "grad_norm": 1.082810878753662, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 185060 + }, + { + "epoch": 1217.5657894736842, + "grad_norm": 1.033191442489624, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 185070 + }, + { + "epoch": 1217.6315789473683, + "grad_norm": 1.1163650751113892, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 185080 + }, + { + "epoch": 1217.6973684210527, + "grad_norm": 1.2836352586746216, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 185090 + }, + { + "epoch": 1217.7631578947369, + "grad_norm": 1.1807303428649902, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 185100 + }, + { + "epoch": 1217.828947368421, + "grad_norm": 1.0362707376480103, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 185110 + }, + { + "epoch": 1217.8947368421052, + "grad_norm": 1.2636171579360962, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 185120 + }, + { + "epoch": 1217.9605263157894, + "grad_norm": 1.2539921998977661, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 185130 + }, + { + "epoch": 1218.0263157894738, + "grad_norm": 1.5820645093917847, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 185140 + }, + { + "epoch": 1218.092105263158, + "grad_norm": 0.8721255660057068, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 185150 + }, + { + "epoch": 1218.157894736842, + "grad_norm": 1.0245574712753296, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 185160 + }, + { + "epoch": 1218.2236842105262, + "grad_norm": 1.1424921751022339, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 185170 + }, + { + "epoch": 1218.2894736842106, + "grad_norm": 1.0817832946777344, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 185180 + }, + { + "epoch": 1218.3552631578948, + "grad_norm": 0.9851661920547485, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 185190 + }, + { + "epoch": 1218.421052631579, + "grad_norm": 1.0707749128341675, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 185200 + }, + { + "epoch": 1218.4868421052631, + "grad_norm": 0.903893768787384, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 185210 + }, + { + "epoch": 1218.5526315789473, + "grad_norm": 0.8631360530853271, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 185220 + }, + { + "epoch": 1218.6184210526317, + "grad_norm": 0.8446455001831055, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 185230 + }, + { + "epoch": 1218.6842105263158, + "grad_norm": 1.238858938217163, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 185240 + }, + { + "epoch": 1218.75, + "grad_norm": 1.4525588750839233, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 185250 + }, + { + "epoch": 1218.8157894736842, + "grad_norm": 0.7506535649299622, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 185260 + }, + { + "epoch": 1218.8815789473683, + "grad_norm": 1.0195599794387817, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 185270 + }, + { + "epoch": 1218.9473684210527, + "grad_norm": 1.0614231824874878, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 185280 + }, + { + "epoch": 1219.0131578947369, + "grad_norm": 0.9390943050384521, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 185290 + }, + { + "epoch": 1219.078947368421, + "grad_norm": 1.175963282585144, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 185300 + }, + { + "epoch": 1219.1447368421052, + "grad_norm": 1.5767515897750854, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 185310 + }, + { + "epoch": 1219.2105263157894, + "grad_norm": 1.0393364429473877, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 185320 + }, + { + "epoch": 1219.2763157894738, + "grad_norm": 1.084050178527832, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 185330 + }, + { + "epoch": 1219.342105263158, + "grad_norm": 1.1142622232437134, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 185340 + }, + { + "epoch": 1219.407894736842, + "grad_norm": 1.1076176166534424, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 185350 + }, + { + "epoch": 1219.4736842105262, + "grad_norm": 0.9142469167709351, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 185360 + }, + { + "epoch": 1219.5394736842106, + "grad_norm": 0.9830288887023926, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 185370 + }, + { + "epoch": 1219.6052631578948, + "grad_norm": 1.4786913394927979, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 185380 + }, + { + "epoch": 1219.671052631579, + "grad_norm": 1.2091387510299683, + "learning_rate": 0.0001, + "loss": 0.0122, + "step": 185390 + }, + { + "epoch": 1219.7368421052631, + "grad_norm": 1.051276683807373, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 185400 + }, + { + "epoch": 1219.8026315789473, + "grad_norm": 1.0199636220932007, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 185410 + }, + { + "epoch": 1219.8684210526317, + "grad_norm": 0.9174479246139526, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 185420 + }, + { + "epoch": 1219.9342105263158, + "grad_norm": 0.9192227721214294, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 185430 + }, + { + "epoch": 1220.0, + "grad_norm": 0.7987057566642761, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 185440 + }, + { + "epoch": 1220.0657894736842, + "grad_norm": 0.6848961710929871, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 185450 + }, + { + "epoch": 1220.1315789473683, + "grad_norm": 0.7822224497795105, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 185460 + }, + { + "epoch": 1220.1973684210527, + "grad_norm": 0.8950042724609375, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 185470 + }, + { + "epoch": 1220.2631578947369, + "grad_norm": 0.982462465763092, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 185480 + }, + { + "epoch": 1220.328947368421, + "grad_norm": 1.3260400295257568, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 185490 + }, + { + "epoch": 1220.3947368421052, + "grad_norm": 1.1258111000061035, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 185500 + }, + { + "epoch": 1220.4605263157894, + "grad_norm": 0.9215371608734131, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 185510 + }, + { + "epoch": 1220.5263157894738, + "grad_norm": 1.1126407384872437, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 185520 + }, + { + "epoch": 1220.592105263158, + "grad_norm": 0.8571876287460327, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 185530 + }, + { + "epoch": 1220.657894736842, + "grad_norm": 1.0084202289581299, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 185540 + }, + { + "epoch": 1220.7236842105262, + "grad_norm": 1.0304898023605347, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 185550 + }, + { + "epoch": 1220.7894736842106, + "grad_norm": 1.4203600883483887, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 185560 + }, + { + "epoch": 1220.8552631578948, + "grad_norm": 0.695814311504364, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 185570 + }, + { + "epoch": 1220.921052631579, + "grad_norm": 1.066893219947815, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 185580 + }, + { + "epoch": 1220.9868421052631, + "grad_norm": 1.0553237199783325, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 185590 + }, + { + "epoch": 1221.0526315789473, + "grad_norm": 1.2047573328018188, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 185600 + }, + { + "epoch": 1221.1184210526317, + "grad_norm": 1.031988263130188, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 185610 + }, + { + "epoch": 1221.1842105263158, + "grad_norm": 1.2671812772750854, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 185620 + }, + { + "epoch": 1221.25, + "grad_norm": 1.1938546895980835, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 185630 + }, + { + "epoch": 1221.3157894736842, + "grad_norm": 0.8404895067214966, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 185640 + }, + { + "epoch": 1221.3815789473683, + "grad_norm": 1.2843692302703857, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 185650 + }, + { + "epoch": 1221.4473684210527, + "grad_norm": 1.1143099069595337, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 185660 + }, + { + "epoch": 1221.5131578947369, + "grad_norm": 1.0575257539749146, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 185670 + }, + { + "epoch": 1221.578947368421, + "grad_norm": 0.7534637451171875, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 185680 + }, + { + "epoch": 1221.6447368421052, + "grad_norm": 0.7828577756881714, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 185690 + }, + { + "epoch": 1221.7105263157894, + "grad_norm": 0.9473252296447754, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 185700 + }, + { + "epoch": 1221.7763157894738, + "grad_norm": 1.1639002561569214, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 185710 + }, + { + "epoch": 1221.842105263158, + "grad_norm": 0.9955407977104187, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 185720 + }, + { + "epoch": 1221.907894736842, + "grad_norm": 1.1388436555862427, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 185730 + }, + { + "epoch": 1221.9736842105262, + "grad_norm": 0.793958842754364, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 185740 + }, + { + "epoch": 1222.0394736842106, + "grad_norm": 0.7527614235877991, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 185750 + }, + { + "epoch": 1222.1052631578948, + "grad_norm": 1.2795637845993042, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 185760 + }, + { + "epoch": 1222.171052631579, + "grad_norm": 0.7451528906822205, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 185770 + }, + { + "epoch": 1222.2368421052631, + "grad_norm": 0.9207596778869629, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 185780 + }, + { + "epoch": 1222.3026315789473, + "grad_norm": 0.8541901111602783, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 185790 + }, + { + "epoch": 1222.3684210526317, + "grad_norm": 0.9875356554985046, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 185800 + }, + { + "epoch": 1222.4342105263158, + "grad_norm": 0.8189533948898315, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 185810 + }, + { + "epoch": 1222.5, + "grad_norm": 1.3536303043365479, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 185820 + }, + { + "epoch": 1222.5657894736842, + "grad_norm": 1.253621220588684, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 185830 + }, + { + "epoch": 1222.6315789473683, + "grad_norm": 0.9833441376686096, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 185840 + }, + { + "epoch": 1222.6973684210527, + "grad_norm": 1.02719247341156, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 185850 + }, + { + "epoch": 1222.7631578947369, + "grad_norm": 0.8571970462799072, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 185860 + }, + { + "epoch": 1222.828947368421, + "grad_norm": 1.270446538925171, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 185870 + }, + { + "epoch": 1222.8947368421052, + "grad_norm": 1.1519957780838013, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 185880 + }, + { + "epoch": 1222.9605263157894, + "grad_norm": 1.149924397468567, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 185890 + }, + { + "epoch": 1223.0263157894738, + "grad_norm": 1.0503630638122559, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 185900 + }, + { + "epoch": 1223.092105263158, + "grad_norm": 1.033896565437317, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 185910 + }, + { + "epoch": 1223.157894736842, + "grad_norm": 1.530225396156311, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 185920 + }, + { + "epoch": 1223.2236842105262, + "grad_norm": 1.317832112312317, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 185930 + }, + { + "epoch": 1223.2894736842106, + "grad_norm": 1.1852115392684937, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 185940 + }, + { + "epoch": 1223.3552631578948, + "grad_norm": 1.0423250198364258, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 185950 + }, + { + "epoch": 1223.421052631579, + "grad_norm": 1.0078924894332886, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 185960 + }, + { + "epoch": 1223.4868421052631, + "grad_norm": 1.1069872379302979, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 185970 + }, + { + "epoch": 1223.5526315789473, + "grad_norm": 1.058595061302185, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 185980 + }, + { + "epoch": 1223.6184210526317, + "grad_norm": 1.1351420879364014, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 185990 + }, + { + "epoch": 1223.6842105263158, + "grad_norm": 1.0550578832626343, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 186000 + }, + { + "epoch": 1223.75, + "grad_norm": 0.7912912964820862, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 186010 + }, + { + "epoch": 1223.8157894736842, + "grad_norm": 1.267266035079956, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 186020 + }, + { + "epoch": 1223.8815789473683, + "grad_norm": 0.7863309979438782, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 186030 + }, + { + "epoch": 1223.9473684210527, + "grad_norm": 0.8405379056930542, + "learning_rate": 0.0001, + "loss": 0.0067, + "step": 186040 + }, + { + "epoch": 1224.0131578947369, + "grad_norm": 0.7136426568031311, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 186050 + }, + { + "epoch": 1224.078947368421, + "grad_norm": 1.2677913904190063, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 186060 + }, + { + "epoch": 1224.1447368421052, + "grad_norm": 1.0475759506225586, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 186070 + }, + { + "epoch": 1224.2105263157894, + "grad_norm": 0.9506308436393738, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 186080 + }, + { + "epoch": 1224.2763157894738, + "grad_norm": 0.9816416501998901, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 186090 + }, + { + "epoch": 1224.342105263158, + "grad_norm": 0.6930526494979858, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 186100 + }, + { + "epoch": 1224.407894736842, + "grad_norm": 0.9830896258354187, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 186110 + }, + { + "epoch": 1224.4736842105262, + "grad_norm": 0.9465078115463257, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 186120 + }, + { + "epoch": 1224.5394736842106, + "grad_norm": 0.8693153858184814, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 186130 + }, + { + "epoch": 1224.6052631578948, + "grad_norm": 1.0287957191467285, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 186140 + }, + { + "epoch": 1224.671052631579, + "grad_norm": 0.7358331084251404, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 186150 + }, + { + "epoch": 1224.7368421052631, + "grad_norm": 0.6375886797904968, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 186160 + }, + { + "epoch": 1224.8026315789473, + "grad_norm": 1.0094578266143799, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 186170 + }, + { + "epoch": 1224.8684210526317, + "grad_norm": 1.2570643424987793, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 186180 + }, + { + "epoch": 1224.9342105263158, + "grad_norm": 0.9317885637283325, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 186190 + }, + { + "epoch": 1225.0, + "grad_norm": 0.8335903286933899, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 186200 + }, + { + "epoch": 1225.0657894736842, + "grad_norm": 1.1470184326171875, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 186210 + }, + { + "epoch": 1225.1315789473683, + "grad_norm": 1.1468976736068726, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 186220 + }, + { + "epoch": 1225.1973684210527, + "grad_norm": 1.1850436925888062, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 186230 + }, + { + "epoch": 1225.2631578947369, + "grad_norm": 1.3890918493270874, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 186240 + }, + { + "epoch": 1225.328947368421, + "grad_norm": 0.9903066754341125, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 186250 + }, + { + "epoch": 1225.3947368421052, + "grad_norm": 0.9706083536148071, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 186260 + }, + { + "epoch": 1225.4605263157894, + "grad_norm": 1.4619114398956299, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 186270 + }, + { + "epoch": 1225.5263157894738, + "grad_norm": 0.8819053173065186, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 186280 + }, + { + "epoch": 1225.592105263158, + "grad_norm": 0.8979442715644836, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 186290 + }, + { + "epoch": 1225.657894736842, + "grad_norm": 0.9516298770904541, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 186300 + }, + { + "epoch": 1225.7236842105262, + "grad_norm": 1.1198757886886597, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 186310 + }, + { + "epoch": 1225.7894736842106, + "grad_norm": 1.2486748695373535, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 186320 + }, + { + "epoch": 1225.8552631578948, + "grad_norm": 1.025698184967041, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 186330 + }, + { + "epoch": 1225.921052631579, + "grad_norm": 0.6521353125572205, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 186340 + }, + { + "epoch": 1225.9868421052631, + "grad_norm": 0.7902568578720093, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 186350 + }, + { + "epoch": 1226.0526315789473, + "grad_norm": 1.071388840675354, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 186360 + }, + { + "epoch": 1226.1184210526317, + "grad_norm": 0.9144065976142883, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 186370 + }, + { + "epoch": 1226.1842105263158, + "grad_norm": 1.1299687623977661, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 186380 + }, + { + "epoch": 1226.25, + "grad_norm": 1.0512938499450684, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 186390 + }, + { + "epoch": 1226.3157894736842, + "grad_norm": 0.8974717259407043, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 186400 + }, + { + "epoch": 1226.3815789473683, + "grad_norm": 0.9535547494888306, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 186410 + }, + { + "epoch": 1226.4473684210527, + "grad_norm": 1.0984748601913452, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 186420 + }, + { + "epoch": 1226.5131578947369, + "grad_norm": 1.2447376251220703, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 186430 + }, + { + "epoch": 1226.578947368421, + "grad_norm": 1.0115010738372803, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 186440 + }, + { + "epoch": 1226.6447368421052, + "grad_norm": 1.0430307388305664, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 186450 + }, + { + "epoch": 1226.7105263157894, + "grad_norm": 0.9099544882774353, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 186460 + }, + { + "epoch": 1226.7763157894738, + "grad_norm": 0.878151535987854, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 186470 + }, + { + "epoch": 1226.842105263158, + "grad_norm": 0.6946768164634705, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 186480 + }, + { + "epoch": 1226.907894736842, + "grad_norm": 1.0372909307479858, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 186490 + }, + { + "epoch": 1226.9736842105262, + "grad_norm": 0.8411084413528442, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 186500 + }, + { + "epoch": 1227.0394736842106, + "grad_norm": 0.7090082764625549, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 186510 + }, + { + "epoch": 1227.1052631578948, + "grad_norm": 0.802839457988739, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 186520 + }, + { + "epoch": 1227.171052631579, + "grad_norm": 0.8215923309326172, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 186530 + }, + { + "epoch": 1227.2368421052631, + "grad_norm": 0.8801704049110413, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 186540 + }, + { + "epoch": 1227.3026315789473, + "grad_norm": 1.15470552444458, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 186550 + }, + { + "epoch": 1227.3684210526317, + "grad_norm": 1.114592432975769, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 186560 + }, + { + "epoch": 1227.4342105263158, + "grad_norm": 1.192267656326294, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 186570 + }, + { + "epoch": 1227.5, + "grad_norm": 0.8330889940261841, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 186580 + }, + { + "epoch": 1227.5657894736842, + "grad_norm": 1.0712906122207642, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 186590 + }, + { + "epoch": 1227.6315789473683, + "grad_norm": 1.0091384649276733, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 186600 + }, + { + "epoch": 1227.6973684210527, + "grad_norm": 0.8430705070495605, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 186610 + }, + { + "epoch": 1227.7631578947369, + "grad_norm": 1.0369099378585815, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 186620 + }, + { + "epoch": 1227.828947368421, + "grad_norm": 1.0996822118759155, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 186630 + }, + { + "epoch": 1227.8947368421052, + "grad_norm": 1.3173720836639404, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 186640 + }, + { + "epoch": 1227.9605263157894, + "grad_norm": 1.1714130640029907, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 186650 + }, + { + "epoch": 1228.0263157894738, + "grad_norm": 1.1778959035873413, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 186660 + }, + { + "epoch": 1228.092105263158, + "grad_norm": 1.0300238132476807, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 186670 + }, + { + "epoch": 1228.157894736842, + "grad_norm": 0.9473615288734436, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 186680 + }, + { + "epoch": 1228.2236842105262, + "grad_norm": 1.171370267868042, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 186690 + }, + { + "epoch": 1228.2894736842106, + "grad_norm": 0.8373340964317322, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 186700 + }, + { + "epoch": 1228.3552631578948, + "grad_norm": 0.9212744832038879, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 186710 + }, + { + "epoch": 1228.421052631579, + "grad_norm": 1.379812240600586, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 186720 + }, + { + "epoch": 1228.4868421052631, + "grad_norm": 1.5933481454849243, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 186730 + }, + { + "epoch": 1228.5526315789473, + "grad_norm": 1.3223012685775757, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 186740 + }, + { + "epoch": 1228.6184210526317, + "grad_norm": 1.4536499977111816, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 186750 + }, + { + "epoch": 1228.6842105263158, + "grad_norm": 1.071915626525879, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 186760 + }, + { + "epoch": 1228.75, + "grad_norm": 1.0385130643844604, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 186770 + }, + { + "epoch": 1228.8157894736842, + "grad_norm": 1.3961035013198853, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 186780 + }, + { + "epoch": 1228.8815789473683, + "grad_norm": 1.3020026683807373, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 186790 + }, + { + "epoch": 1228.9473684210527, + "grad_norm": 1.341375708580017, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 186800 + }, + { + "epoch": 1229.0131578947369, + "grad_norm": 1.4074076414108276, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 186810 + }, + { + "epoch": 1229.078947368421, + "grad_norm": 0.9496380090713501, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 186820 + }, + { + "epoch": 1229.1447368421052, + "grad_norm": 1.0725762844085693, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 186830 + }, + { + "epoch": 1229.2105263157894, + "grad_norm": 0.9692988991737366, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 186840 + }, + { + "epoch": 1229.2763157894738, + "grad_norm": 0.9738875031471252, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 186850 + }, + { + "epoch": 1229.342105263158, + "grad_norm": 1.2129768133163452, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 186860 + }, + { + "epoch": 1229.407894736842, + "grad_norm": 0.7136619091033936, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 186870 + }, + { + "epoch": 1229.4736842105262, + "grad_norm": 1.1828364133834839, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 186880 + }, + { + "epoch": 1229.5394736842106, + "grad_norm": 1.1648519039154053, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 186890 + }, + { + "epoch": 1229.6052631578948, + "grad_norm": 0.9487463235855103, + "learning_rate": 0.0001, + "loss": 0.0066, + "step": 186900 + }, + { + "epoch": 1229.671052631579, + "grad_norm": 1.2276160717010498, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 186910 + }, + { + "epoch": 1229.7368421052631, + "grad_norm": 0.9763864278793335, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 186920 + }, + { + "epoch": 1229.8026315789473, + "grad_norm": 0.8818470239639282, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 186930 + }, + { + "epoch": 1229.8684210526317, + "grad_norm": 0.7005442380905151, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 186940 + }, + { + "epoch": 1229.9342105263158, + "grad_norm": 0.5166844129562378, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 186950 + }, + { + "epoch": 1230.0, + "grad_norm": 0.9323204755783081, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 186960 + }, + { + "epoch": 1230.0657894736842, + "grad_norm": 1.0536624193191528, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 186970 + }, + { + "epoch": 1230.1315789473683, + "grad_norm": 0.9364405274391174, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 186980 + }, + { + "epoch": 1230.1973684210527, + "grad_norm": 0.9843827486038208, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 186990 + }, + { + "epoch": 1230.2631578947369, + "grad_norm": 1.1612672805786133, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 187000 + }, + { + "epoch": 1230.328947368421, + "grad_norm": 1.0113646984100342, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 187010 + }, + { + "epoch": 1230.3947368421052, + "grad_norm": 0.8987430334091187, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 187020 + }, + { + "epoch": 1230.4605263157894, + "grad_norm": 1.0051621198654175, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 187030 + }, + { + "epoch": 1230.5263157894738, + "grad_norm": 1.093727469444275, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 187040 + }, + { + "epoch": 1230.592105263158, + "grad_norm": 0.9883322715759277, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 187050 + }, + { + "epoch": 1230.657894736842, + "grad_norm": 0.8294075131416321, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 187060 + }, + { + "epoch": 1230.7236842105262, + "grad_norm": 0.8644272685050964, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 187070 + }, + { + "epoch": 1230.7894736842106, + "grad_norm": 0.7558962106704712, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 187080 + }, + { + "epoch": 1230.8552631578948, + "grad_norm": 0.6551307439804077, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 187090 + }, + { + "epoch": 1230.921052631579, + "grad_norm": 0.8390691876411438, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 187100 + }, + { + "epoch": 1230.9868421052631, + "grad_norm": 1.2046642303466797, + "learning_rate": 0.0001, + "loss": 0.0068, + "step": 187110 + }, + { + "epoch": 1231.0526315789473, + "grad_norm": 0.6695582866668701, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 187120 + }, + { + "epoch": 1231.1184210526317, + "grad_norm": 0.7703352570533752, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 187130 + }, + { + "epoch": 1231.1842105263158, + "grad_norm": 0.9928059577941895, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 187140 + }, + { + "epoch": 1231.25, + "grad_norm": 0.8067262768745422, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 187150 + }, + { + "epoch": 1231.3157894736842, + "grad_norm": 0.8623690009117126, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 187160 + }, + { + "epoch": 1231.3815789473683, + "grad_norm": 0.8938329219818115, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 187170 + }, + { + "epoch": 1231.4473684210527, + "grad_norm": 1.010252594947815, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 187180 + }, + { + "epoch": 1231.5131578947369, + "grad_norm": 1.1144150495529175, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 187190 + }, + { + "epoch": 1231.578947368421, + "grad_norm": 0.7431076765060425, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 187200 + }, + { + "epoch": 1231.6447368421052, + "grad_norm": 0.8599720597267151, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 187210 + }, + { + "epoch": 1231.7105263157894, + "grad_norm": 1.280928134918213, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 187220 + }, + { + "epoch": 1231.7763157894738, + "grad_norm": 0.8571041822433472, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 187230 + }, + { + "epoch": 1231.842105263158, + "grad_norm": 0.6886258125305176, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 187240 + }, + { + "epoch": 1231.907894736842, + "grad_norm": 0.9688119888305664, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 187250 + }, + { + "epoch": 1231.9736842105262, + "grad_norm": 1.306778907775879, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 187260 + }, + { + "epoch": 1232.0394736842106, + "grad_norm": 1.23270845413208, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 187270 + }, + { + "epoch": 1232.1052631578948, + "grad_norm": 1.2252572774887085, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 187280 + }, + { + "epoch": 1232.171052631579, + "grad_norm": 0.9559009075164795, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 187290 + }, + { + "epoch": 1232.2368421052631, + "grad_norm": 0.895037829875946, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 187300 + }, + { + "epoch": 1232.3026315789473, + "grad_norm": 1.1871989965438843, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 187310 + }, + { + "epoch": 1232.3684210526317, + "grad_norm": 1.0235185623168945, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 187320 + }, + { + "epoch": 1232.4342105263158, + "grad_norm": 0.5289400815963745, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 187330 + }, + { + "epoch": 1232.5, + "grad_norm": 1.077367901802063, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 187340 + }, + { + "epoch": 1232.5657894736842, + "grad_norm": 1.0103833675384521, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 187350 + }, + { + "epoch": 1232.6315789473683, + "grad_norm": 1.0976219177246094, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 187360 + }, + { + "epoch": 1232.6973684210527, + "grad_norm": 0.8010794520378113, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 187370 + }, + { + "epoch": 1232.7631578947369, + "grad_norm": 0.8650757074356079, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 187380 + }, + { + "epoch": 1232.828947368421, + "grad_norm": 0.9312655329704285, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 187390 + }, + { + "epoch": 1232.8947368421052, + "grad_norm": 0.9070029854774475, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 187400 + }, + { + "epoch": 1232.9605263157894, + "grad_norm": 0.7533953189849854, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 187410 + }, + { + "epoch": 1233.0263157894738, + "grad_norm": 0.9384796023368835, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 187420 + }, + { + "epoch": 1233.092105263158, + "grad_norm": 1.097226858139038, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 187430 + }, + { + "epoch": 1233.157894736842, + "grad_norm": 1.156470537185669, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 187440 + }, + { + "epoch": 1233.2236842105262, + "grad_norm": 0.7621616125106812, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 187450 + }, + { + "epoch": 1233.2894736842106, + "grad_norm": 1.3856050968170166, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 187460 + }, + { + "epoch": 1233.3552631578948, + "grad_norm": 0.9687317609786987, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 187470 + }, + { + "epoch": 1233.421052631579, + "grad_norm": 0.9934854507446289, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 187480 + }, + { + "epoch": 1233.4868421052631, + "grad_norm": 1.1821582317352295, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 187490 + }, + { + "epoch": 1233.5526315789473, + "grad_norm": 0.8907933235168457, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 187500 + }, + { + "epoch": 1233.6184210526317, + "grad_norm": 0.7803938388824463, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 187510 + }, + { + "epoch": 1233.6842105263158, + "grad_norm": 0.8478217124938965, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 187520 + }, + { + "epoch": 1233.75, + "grad_norm": 0.9070845246315002, + "learning_rate": 0.0001, + "loss": 0.0128, + "step": 187530 + }, + { + "epoch": 1233.8157894736842, + "grad_norm": 0.7729270458221436, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 187540 + }, + { + "epoch": 1233.8815789473683, + "grad_norm": 0.689509391784668, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 187550 + }, + { + "epoch": 1233.9473684210527, + "grad_norm": 1.0114260911941528, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 187560 + }, + { + "epoch": 1234.0131578947369, + "grad_norm": 0.8790208697319031, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 187570 + }, + { + "epoch": 1234.078947368421, + "grad_norm": 0.7868586182594299, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 187580 + }, + { + "epoch": 1234.1447368421052, + "grad_norm": 1.1828399896621704, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 187590 + }, + { + "epoch": 1234.2105263157894, + "grad_norm": 2.568375825881958, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 187600 + }, + { + "epoch": 1234.2763157894738, + "grad_norm": 1.1999150514602661, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 187610 + }, + { + "epoch": 1234.342105263158, + "grad_norm": 1.172085165977478, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 187620 + }, + { + "epoch": 1234.407894736842, + "grad_norm": 1.260359287261963, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 187630 + }, + { + "epoch": 1234.4736842105262, + "grad_norm": 0.9659546613693237, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 187640 + }, + { + "epoch": 1234.5394736842106, + "grad_norm": 1.1871107816696167, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 187650 + }, + { + "epoch": 1234.6052631578948, + "grad_norm": 0.8396361470222473, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 187660 + }, + { + "epoch": 1234.671052631579, + "grad_norm": 0.862738311290741, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 187670 + }, + { + "epoch": 1234.7368421052631, + "grad_norm": 0.8556384444236755, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 187680 + }, + { + "epoch": 1234.8026315789473, + "grad_norm": 1.10312819480896, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 187690 + }, + { + "epoch": 1234.8684210526317, + "grad_norm": 0.9752662777900696, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 187700 + }, + { + "epoch": 1234.9342105263158, + "grad_norm": 0.7204729914665222, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 187710 + }, + { + "epoch": 1235.0, + "grad_norm": 0.7936938405036926, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 187720 + }, + { + "epoch": 1235.0657894736842, + "grad_norm": 0.8540555238723755, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 187730 + }, + { + "epoch": 1235.1315789473683, + "grad_norm": 1.0517683029174805, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 187740 + }, + { + "epoch": 1235.1973684210527, + "grad_norm": 0.9040259718894958, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 187750 + }, + { + "epoch": 1235.2631578947369, + "grad_norm": 1.0970168113708496, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 187760 + }, + { + "epoch": 1235.328947368421, + "grad_norm": 0.7497133016586304, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 187770 + }, + { + "epoch": 1235.3947368421052, + "grad_norm": 1.210512638092041, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 187780 + }, + { + "epoch": 1235.4605263157894, + "grad_norm": 1.1530228853225708, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 187790 + }, + { + "epoch": 1235.5263157894738, + "grad_norm": 1.0944901704788208, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 187800 + }, + { + "epoch": 1235.592105263158, + "grad_norm": 0.8939099311828613, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 187810 + }, + { + "epoch": 1235.657894736842, + "grad_norm": 0.756377100944519, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 187820 + }, + { + "epoch": 1235.7236842105262, + "grad_norm": 0.9245010614395142, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 187830 + }, + { + "epoch": 1235.7894736842106, + "grad_norm": 1.1319273710250854, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 187840 + }, + { + "epoch": 1235.8552631578948, + "grad_norm": 1.1174309253692627, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 187850 + }, + { + "epoch": 1235.921052631579, + "grad_norm": 0.8425665497779846, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 187860 + }, + { + "epoch": 1235.9868421052631, + "grad_norm": 0.9204140901565552, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 187870 + }, + { + "epoch": 1236.0526315789473, + "grad_norm": 0.7384737730026245, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 187880 + }, + { + "epoch": 1236.1184210526317, + "grad_norm": 1.1339808702468872, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 187890 + }, + { + "epoch": 1236.1842105263158, + "grad_norm": 1.197279453277588, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 187900 + }, + { + "epoch": 1236.25, + "grad_norm": 0.8099234700202942, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 187910 + }, + { + "epoch": 1236.3157894736842, + "grad_norm": 0.9687013626098633, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 187920 + }, + { + "epoch": 1236.3815789473683, + "grad_norm": 0.9288181066513062, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 187930 + }, + { + "epoch": 1236.4473684210527, + "grad_norm": 1.2456539869308472, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 187940 + }, + { + "epoch": 1236.5131578947369, + "grad_norm": 1.1154669523239136, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 187950 + }, + { + "epoch": 1236.578947368421, + "grad_norm": 1.3738938570022583, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 187960 + }, + { + "epoch": 1236.6447368421052, + "grad_norm": 1.0610955953598022, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 187970 + }, + { + "epoch": 1236.7105263157894, + "grad_norm": 1.108542799949646, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 187980 + }, + { + "epoch": 1236.7763157894738, + "grad_norm": 0.896397054195404, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 187990 + }, + { + "epoch": 1236.842105263158, + "grad_norm": 0.7802370190620422, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 188000 + }, + { + "epoch": 1236.907894736842, + "grad_norm": 0.5918530225753784, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 188010 + }, + { + "epoch": 1236.9736842105262, + "grad_norm": 1.0109541416168213, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 188020 + }, + { + "epoch": 1237.0394736842106, + "grad_norm": 0.9148463010787964, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 188030 + }, + { + "epoch": 1237.1052631578948, + "grad_norm": 0.9371543526649475, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 188040 + }, + { + "epoch": 1237.171052631579, + "grad_norm": 0.9670152068138123, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 188050 + }, + { + "epoch": 1237.2368421052631, + "grad_norm": 0.973000705242157, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 188060 + }, + { + "epoch": 1237.3026315789473, + "grad_norm": 0.9884973764419556, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 188070 + }, + { + "epoch": 1237.3684210526317, + "grad_norm": 1.0578480958938599, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 188080 + }, + { + "epoch": 1237.4342105263158, + "grad_norm": 0.9570189118385315, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 188090 + }, + { + "epoch": 1237.5, + "grad_norm": 1.118773102760315, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 188100 + }, + { + "epoch": 1237.5657894736842, + "grad_norm": 1.1802111864089966, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 188110 + }, + { + "epoch": 1237.6315789473683, + "grad_norm": 0.8796427845954895, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 188120 + }, + { + "epoch": 1237.6973684210527, + "grad_norm": 1.2683212757110596, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 188130 + }, + { + "epoch": 1237.7631578947369, + "grad_norm": 1.0439941883087158, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 188140 + }, + { + "epoch": 1237.828947368421, + "grad_norm": 0.969838559627533, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 188150 + }, + { + "epoch": 1237.8947368421052, + "grad_norm": 1.0440396070480347, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 188160 + }, + { + "epoch": 1237.9605263157894, + "grad_norm": 1.1314268112182617, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 188170 + }, + { + "epoch": 1238.0263157894738, + "grad_norm": 1.2484370470046997, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 188180 + }, + { + "epoch": 1238.092105263158, + "grad_norm": 0.7492143511772156, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 188190 + }, + { + "epoch": 1238.157894736842, + "grad_norm": 1.2526135444641113, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 188200 + }, + { + "epoch": 1238.2236842105262, + "grad_norm": 1.2862526178359985, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 188210 + }, + { + "epoch": 1238.2894736842106, + "grad_norm": 1.1841567754745483, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 188220 + }, + { + "epoch": 1238.3552631578948, + "grad_norm": 1.1451932191848755, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 188230 + }, + { + "epoch": 1238.421052631579, + "grad_norm": 0.9696784019470215, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 188240 + }, + { + "epoch": 1238.4868421052631, + "grad_norm": 1.0971126556396484, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 188250 + }, + { + "epoch": 1238.5526315789473, + "grad_norm": 0.7748174071311951, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 188260 + }, + { + "epoch": 1238.6184210526317, + "grad_norm": 0.851367712020874, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 188270 + }, + { + "epoch": 1238.6842105263158, + "grad_norm": 0.7852613925933838, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 188280 + }, + { + "epoch": 1238.75, + "grad_norm": 0.9901844263076782, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 188290 + }, + { + "epoch": 1238.8157894736842, + "grad_norm": 1.0415254831314087, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 188300 + }, + { + "epoch": 1238.8815789473683, + "grad_norm": 1.2643136978149414, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 188310 + }, + { + "epoch": 1238.9473684210527, + "grad_norm": 1.2420594692230225, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 188320 + }, + { + "epoch": 1239.0131578947369, + "grad_norm": 0.9030695557594299, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 188330 + }, + { + "epoch": 1239.078947368421, + "grad_norm": 0.8474811911582947, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 188340 + }, + { + "epoch": 1239.1447368421052, + "grad_norm": 0.9705641865730286, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 188350 + }, + { + "epoch": 1239.2105263157894, + "grad_norm": 0.9619945883750916, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 188360 + }, + { + "epoch": 1239.2763157894738, + "grad_norm": 1.1420098543167114, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 188370 + }, + { + "epoch": 1239.342105263158, + "grad_norm": 1.0340029001235962, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 188380 + }, + { + "epoch": 1239.407894736842, + "grad_norm": 0.9889524579048157, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 188390 + }, + { + "epoch": 1239.4736842105262, + "grad_norm": 1.768046259880066, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 188400 + }, + { + "epoch": 1239.5394736842106, + "grad_norm": 1.4465337991714478, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 188410 + }, + { + "epoch": 1239.6052631578948, + "grad_norm": 1.0993311405181885, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 188420 + }, + { + "epoch": 1239.671052631579, + "grad_norm": 1.098059892654419, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 188430 + }, + { + "epoch": 1239.7368421052631, + "grad_norm": 1.0775386095046997, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 188440 + }, + { + "epoch": 1239.8026315789473, + "grad_norm": 0.6998803019523621, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 188450 + }, + { + "epoch": 1239.8684210526317, + "grad_norm": 0.9273382425308228, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 188460 + }, + { + "epoch": 1239.9342105263158, + "grad_norm": 1.2026686668395996, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 188470 + }, + { + "epoch": 1240.0, + "grad_norm": 0.9668415784835815, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 188480 + }, + { + "epoch": 1240.0657894736842, + "grad_norm": 1.1113210916519165, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 188490 + }, + { + "epoch": 1240.1315789473683, + "grad_norm": 1.2677863836288452, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 188500 + }, + { + "epoch": 1240.1973684210527, + "grad_norm": 1.4234356880187988, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 188510 + }, + { + "epoch": 1240.2631578947369, + "grad_norm": 1.1672581434249878, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 188520 + }, + { + "epoch": 1240.328947368421, + "grad_norm": 1.075741171836853, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 188530 + }, + { + "epoch": 1240.3947368421052, + "grad_norm": 0.8153349161148071, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 188540 + }, + { + "epoch": 1240.4605263157894, + "grad_norm": 1.0249247550964355, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 188550 + }, + { + "epoch": 1240.5263157894738, + "grad_norm": 1.0031867027282715, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 188560 + }, + { + "epoch": 1240.592105263158, + "grad_norm": 1.103637933731079, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 188570 + }, + { + "epoch": 1240.657894736842, + "grad_norm": 1.0994689464569092, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 188580 + }, + { + "epoch": 1240.7236842105262, + "grad_norm": 1.0447285175323486, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 188590 + }, + { + "epoch": 1240.7894736842106, + "grad_norm": 0.9495458006858826, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 188600 + }, + { + "epoch": 1240.8552631578948, + "grad_norm": 0.9653858542442322, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 188610 + }, + { + "epoch": 1240.921052631579, + "grad_norm": 1.0502097606658936, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 188620 + }, + { + "epoch": 1240.9868421052631, + "grad_norm": 0.9432629942893982, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 188630 + }, + { + "epoch": 1241.0526315789473, + "grad_norm": 0.9657227396965027, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 188640 + }, + { + "epoch": 1241.1184210526317, + "grad_norm": 0.8866987228393555, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 188650 + }, + { + "epoch": 1241.1842105263158, + "grad_norm": 0.9814696907997131, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 188660 + }, + { + "epoch": 1241.25, + "grad_norm": 1.1600935459136963, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 188670 + }, + { + "epoch": 1241.3157894736842, + "grad_norm": 1.2622950077056885, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 188680 + }, + { + "epoch": 1241.3815789473683, + "grad_norm": 0.7318976521492004, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 188690 + }, + { + "epoch": 1241.4473684210527, + "grad_norm": 1.3622748851776123, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 188700 + }, + { + "epoch": 1241.5131578947369, + "grad_norm": 1.0387088060379028, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 188710 + }, + { + "epoch": 1241.578947368421, + "grad_norm": 0.8503026366233826, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 188720 + }, + { + "epoch": 1241.6447368421052, + "grad_norm": 0.909067690372467, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 188730 + }, + { + "epoch": 1241.7105263157894, + "grad_norm": 0.9165451526641846, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 188740 + }, + { + "epoch": 1241.7763157894738, + "grad_norm": 0.7640308737754822, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 188750 + }, + { + "epoch": 1241.842105263158, + "grad_norm": 0.9033883810043335, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 188760 + }, + { + "epoch": 1241.907894736842, + "grad_norm": 1.0701746940612793, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 188770 + }, + { + "epoch": 1241.9736842105262, + "grad_norm": 0.9798920154571533, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 188780 + }, + { + "epoch": 1242.0394736842106, + "grad_norm": 1.3275381326675415, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 188790 + }, + { + "epoch": 1242.1052631578948, + "grad_norm": 1.1964999437332153, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 188800 + }, + { + "epoch": 1242.171052631579, + "grad_norm": 1.0140482187271118, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 188810 + }, + { + "epoch": 1242.2368421052631, + "grad_norm": 0.8349454998970032, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 188820 + }, + { + "epoch": 1242.3026315789473, + "grad_norm": 1.2491860389709473, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 188830 + }, + { + "epoch": 1242.3684210526317, + "grad_norm": 1.1465342044830322, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 188840 + }, + { + "epoch": 1242.4342105263158, + "grad_norm": 0.7169458866119385, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 188850 + }, + { + "epoch": 1242.5, + "grad_norm": 0.887615978717804, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 188860 + }, + { + "epoch": 1242.5657894736842, + "grad_norm": 1.243327260017395, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 188870 + }, + { + "epoch": 1242.6315789473683, + "grad_norm": 1.3320894241333008, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 188880 + }, + { + "epoch": 1242.6973684210527, + "grad_norm": 1.0858453512191772, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 188890 + }, + { + "epoch": 1242.7631578947369, + "grad_norm": 1.123133659362793, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 188900 + }, + { + "epoch": 1242.828947368421, + "grad_norm": 0.7207273840904236, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 188910 + }, + { + "epoch": 1242.8947368421052, + "grad_norm": 0.6698071956634521, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 188920 + }, + { + "epoch": 1242.9605263157894, + "grad_norm": 1.2865219116210938, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 188930 + }, + { + "epoch": 1243.0263157894738, + "grad_norm": 0.8424243927001953, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 188940 + }, + { + "epoch": 1243.092105263158, + "grad_norm": 1.2220584154129028, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 188950 + }, + { + "epoch": 1243.157894736842, + "grad_norm": 1.234723687171936, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 188960 + }, + { + "epoch": 1243.2236842105262, + "grad_norm": 1.3230215311050415, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 188970 + }, + { + "epoch": 1243.2894736842106, + "grad_norm": 1.1419968605041504, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 188980 + }, + { + "epoch": 1243.3552631578948, + "grad_norm": 1.2557247877120972, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 188990 + }, + { + "epoch": 1243.421052631579, + "grad_norm": 1.0815895795822144, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 189000 + }, + { + "epoch": 1243.4868421052631, + "grad_norm": 0.997104823589325, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 189010 + }, + { + "epoch": 1243.5526315789473, + "grad_norm": 0.776548445224762, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 189020 + }, + { + "epoch": 1243.6184210526317, + "grad_norm": 0.8415836691856384, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 189030 + }, + { + "epoch": 1243.6842105263158, + "grad_norm": 0.7960478663444519, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 189040 + }, + { + "epoch": 1243.75, + "grad_norm": 0.7596374750137329, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 189050 + }, + { + "epoch": 1243.8157894736842, + "grad_norm": 1.2631161212921143, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 189060 + }, + { + "epoch": 1243.8815789473683, + "grad_norm": 0.959852397441864, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 189070 + }, + { + "epoch": 1243.9473684210527, + "grad_norm": 0.6801808476448059, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 189080 + }, + { + "epoch": 1244.0131578947369, + "grad_norm": 1.1035255193710327, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 189090 + }, + { + "epoch": 1244.078947368421, + "grad_norm": 0.7732435464859009, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 189100 + }, + { + "epoch": 1244.1447368421052, + "grad_norm": 1.119423747062683, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 189110 + }, + { + "epoch": 1244.2105263157894, + "grad_norm": 1.1290099620819092, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 189120 + }, + { + "epoch": 1244.2763157894738, + "grad_norm": 0.9945160150527954, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 189130 + }, + { + "epoch": 1244.342105263158, + "grad_norm": 1.1095064878463745, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 189140 + }, + { + "epoch": 1244.407894736842, + "grad_norm": 0.930774450302124, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 189150 + }, + { + "epoch": 1244.4736842105262, + "grad_norm": 0.7640677094459534, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 189160 + }, + { + "epoch": 1244.5394736842106, + "grad_norm": 0.8537974953651428, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 189170 + }, + { + "epoch": 1244.6052631578948, + "grad_norm": 0.8385094404220581, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 189180 + }, + { + "epoch": 1244.671052631579, + "grad_norm": 0.8964349627494812, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 189190 + }, + { + "epoch": 1244.7368421052631, + "grad_norm": 0.9953314065933228, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 189200 + }, + { + "epoch": 1244.8026315789473, + "grad_norm": 1.099317193031311, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 189210 + }, + { + "epoch": 1244.8684210526317, + "grad_norm": 1.2956123352050781, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 189220 + }, + { + "epoch": 1244.9342105263158, + "grad_norm": 1.0341782569885254, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 189230 + }, + { + "epoch": 1245.0, + "grad_norm": 1.3250845670700073, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 189240 + }, + { + "epoch": 1245.0657894736842, + "grad_norm": 0.8931382298469543, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 189250 + }, + { + "epoch": 1245.1315789473683, + "grad_norm": 0.8672465682029724, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 189260 + }, + { + "epoch": 1245.1973684210527, + "grad_norm": 1.1268553733825684, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 189270 + }, + { + "epoch": 1245.2631578947369, + "grad_norm": 0.9678467512130737, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 189280 + }, + { + "epoch": 1245.328947368421, + "grad_norm": 1.09657883644104, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 189290 + }, + { + "epoch": 1245.3947368421052, + "grad_norm": 1.1582295894622803, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 189300 + }, + { + "epoch": 1245.4605263157894, + "grad_norm": 0.817107617855072, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 189310 + }, + { + "epoch": 1245.5263157894738, + "grad_norm": 1.2293273210525513, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 189320 + }, + { + "epoch": 1245.592105263158, + "grad_norm": 0.8553192019462585, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 189330 + }, + { + "epoch": 1245.657894736842, + "grad_norm": 1.2988234758377075, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 189340 + }, + { + "epoch": 1245.7236842105262, + "grad_norm": 1.1800487041473389, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 189350 + }, + { + "epoch": 1245.7894736842106, + "grad_norm": 1.1548720598220825, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 189360 + }, + { + "epoch": 1245.8552631578948, + "grad_norm": 1.1566232442855835, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 189370 + }, + { + "epoch": 1245.921052631579, + "grad_norm": 1.2034006118774414, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 189380 + }, + { + "epoch": 1245.9868421052631, + "grad_norm": 1.0221554040908813, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 189390 + }, + { + "epoch": 1246.0526315789473, + "grad_norm": 0.8397990465164185, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 189400 + }, + { + "epoch": 1246.1184210526317, + "grad_norm": 1.0938118696212769, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 189410 + }, + { + "epoch": 1246.1842105263158, + "grad_norm": 0.7125457525253296, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 189420 + }, + { + "epoch": 1246.25, + "grad_norm": 1.0024956464767456, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 189430 + }, + { + "epoch": 1246.3157894736842, + "grad_norm": 0.7446456551551819, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 189440 + }, + { + "epoch": 1246.3815789473683, + "grad_norm": 0.9926702380180359, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 189450 + }, + { + "epoch": 1246.4473684210527, + "grad_norm": 1.1048181056976318, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 189460 + }, + { + "epoch": 1246.5131578947369, + "grad_norm": 1.3652530908584595, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 189470 + }, + { + "epoch": 1246.578947368421, + "grad_norm": 1.2224739789962769, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 189480 + }, + { + "epoch": 1246.6447368421052, + "grad_norm": 1.0956324338912964, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 189490 + }, + { + "epoch": 1246.7105263157894, + "grad_norm": 0.8215267658233643, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 189500 + }, + { + "epoch": 1246.7763157894738, + "grad_norm": 1.3271948099136353, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 189510 + }, + { + "epoch": 1246.842105263158, + "grad_norm": 1.0132023096084595, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 189520 + }, + { + "epoch": 1246.907894736842, + "grad_norm": 1.202150821685791, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 189530 + }, + { + "epoch": 1246.9736842105262, + "grad_norm": 1.2758268117904663, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 189540 + }, + { + "epoch": 1247.0394736842106, + "grad_norm": 1.0313605070114136, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 189550 + }, + { + "epoch": 1247.1052631578948, + "grad_norm": 1.0390371084213257, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 189560 + }, + { + "epoch": 1247.171052631579, + "grad_norm": 0.8408495187759399, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 189570 + }, + { + "epoch": 1247.2368421052631, + "grad_norm": 1.059502124786377, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 189580 + }, + { + "epoch": 1247.3026315789473, + "grad_norm": 1.236569881439209, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 189590 + }, + { + "epoch": 1247.3684210526317, + "grad_norm": 1.1000412702560425, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 189600 + }, + { + "epoch": 1247.4342105263158, + "grad_norm": 1.3047072887420654, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 189610 + }, + { + "epoch": 1247.5, + "grad_norm": 1.1245132684707642, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 189620 + }, + { + "epoch": 1247.5657894736842, + "grad_norm": 1.1942509412765503, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 189630 + }, + { + "epoch": 1247.6315789473683, + "grad_norm": 1.323743462562561, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 189640 + }, + { + "epoch": 1247.6973684210527, + "grad_norm": 1.3108394145965576, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 189650 + }, + { + "epoch": 1247.7631578947369, + "grad_norm": 1.1331356763839722, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 189660 + }, + { + "epoch": 1247.828947368421, + "grad_norm": 1.2502070665359497, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 189670 + }, + { + "epoch": 1247.8947368421052, + "grad_norm": 0.7357776165008545, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 189680 + }, + { + "epoch": 1247.9605263157894, + "grad_norm": 1.009076714515686, + "learning_rate": 0.0001, + "loss": 0.0115, + "step": 189690 + }, + { + "epoch": 1248.0263157894738, + "grad_norm": 0.9644945859909058, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 189700 + }, + { + "epoch": 1248.092105263158, + "grad_norm": 0.8089728355407715, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 189710 + }, + { + "epoch": 1248.157894736842, + "grad_norm": 1.2572730779647827, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 189720 + }, + { + "epoch": 1248.2236842105262, + "grad_norm": 1.3105838298797607, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 189730 + }, + { + "epoch": 1248.2894736842106, + "grad_norm": 0.7587280869483948, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 189740 + }, + { + "epoch": 1248.3552631578948, + "grad_norm": 0.8644336462020874, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 189750 + }, + { + "epoch": 1248.421052631579, + "grad_norm": 0.8798325657844543, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 189760 + }, + { + "epoch": 1248.4868421052631, + "grad_norm": 1.5717847347259521, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 189770 + }, + { + "epoch": 1248.5526315789473, + "grad_norm": 0.9737314581871033, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 189780 + }, + { + "epoch": 1248.6184210526317, + "grad_norm": 0.884158194065094, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 189790 + }, + { + "epoch": 1248.6842105263158, + "grad_norm": 0.8840878009796143, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 189800 + }, + { + "epoch": 1248.75, + "grad_norm": 0.7942262291908264, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 189810 + }, + { + "epoch": 1248.8157894736842, + "grad_norm": 1.0044453144073486, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 189820 + }, + { + "epoch": 1248.8815789473683, + "grad_norm": 1.1081262826919556, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 189830 + }, + { + "epoch": 1248.9473684210527, + "grad_norm": 0.7932543754577637, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 189840 + }, + { + "epoch": 1249.0131578947369, + "grad_norm": 1.0459403991699219, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 189850 + }, + { + "epoch": 1249.078947368421, + "grad_norm": 1.1057350635528564, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 189860 + }, + { + "epoch": 1249.1447368421052, + "grad_norm": 0.8458811640739441, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 189870 + }, + { + "epoch": 1249.2105263157894, + "grad_norm": 0.7159251570701599, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 189880 + }, + { + "epoch": 1249.2763157894738, + "grad_norm": 1.037407636642456, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 189890 + }, + { + "epoch": 1249.342105263158, + "grad_norm": 0.9437480568885803, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 189900 + }, + { + "epoch": 1249.407894736842, + "grad_norm": 0.7939618229866028, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 189910 + }, + { + "epoch": 1249.4736842105262, + "grad_norm": 1.138870358467102, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 189920 + }, + { + "epoch": 1249.5394736842106, + "grad_norm": 0.7695747017860413, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 189930 + }, + { + "epoch": 1249.6052631578948, + "grad_norm": 1.0361729860305786, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 189940 + }, + { + "epoch": 1249.671052631579, + "grad_norm": 0.9108820557594299, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 189950 + }, + { + "epoch": 1249.7368421052631, + "grad_norm": 1.085376501083374, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 189960 + }, + { + "epoch": 1249.8026315789473, + "grad_norm": 1.2229604721069336, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 189970 + }, + { + "epoch": 1249.8684210526317, + "grad_norm": 0.9447299242019653, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 189980 + }, + { + "epoch": 1249.9342105263158, + "grad_norm": 0.8941596746444702, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 189990 + }, + { + "epoch": 1250.0, + "grad_norm": 0.8298214673995972, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 190000 + }, + { + "epoch": 1250.0657894736842, + "grad_norm": 0.8514341711997986, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 190010 + }, + { + "epoch": 1250.1315789473683, + "grad_norm": 1.1375254392623901, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 190020 + }, + { + "epoch": 1250.1973684210527, + "grad_norm": 0.9819296598434448, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 190030 + }, + { + "epoch": 1250.2631578947369, + "grad_norm": 0.9162175059318542, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 190040 + }, + { + "epoch": 1250.328947368421, + "grad_norm": 1.0678719282150269, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 190050 + }, + { + "epoch": 1250.3947368421052, + "grad_norm": 1.3395860195159912, + "learning_rate": 0.0001, + "loss": 0.0123, + "step": 190060 + }, + { + "epoch": 1250.4605263157894, + "grad_norm": 0.8305793404579163, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 190070 + }, + { + "epoch": 1250.5263157894738, + "grad_norm": 1.182700514793396, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 190080 + }, + { + "epoch": 1250.592105263158, + "grad_norm": 0.9990321397781372, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 190090 + }, + { + "epoch": 1250.657894736842, + "grad_norm": 0.9627804756164551, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 190100 + }, + { + "epoch": 1250.7236842105262, + "grad_norm": 0.7819858193397522, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 190110 + }, + { + "epoch": 1250.7894736842106, + "grad_norm": 0.8119869232177734, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 190120 + }, + { + "epoch": 1250.8552631578948, + "grad_norm": 1.0260744094848633, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 190130 + }, + { + "epoch": 1250.921052631579, + "grad_norm": 1.2213586568832397, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 190140 + }, + { + "epoch": 1250.9868421052631, + "grad_norm": 1.0840460062026978, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 190150 + }, + { + "epoch": 1251.0526315789473, + "grad_norm": 1.0639532804489136, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 190160 + }, + { + "epoch": 1251.1184210526317, + "grad_norm": 1.0878303050994873, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 190170 + }, + { + "epoch": 1251.1842105263158, + "grad_norm": 0.787267804145813, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 190180 + }, + { + "epoch": 1251.25, + "grad_norm": 1.283235788345337, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 190190 + }, + { + "epoch": 1251.3157894736842, + "grad_norm": 1.402185320854187, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 190200 + }, + { + "epoch": 1251.3815789473683, + "grad_norm": 0.9384086728096008, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 190210 + }, + { + "epoch": 1251.4473684210527, + "grad_norm": 1.2631064653396606, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 190220 + }, + { + "epoch": 1251.5131578947369, + "grad_norm": 1.1802366971969604, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 190230 + }, + { + "epoch": 1251.578947368421, + "grad_norm": 1.0927965641021729, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 190240 + }, + { + "epoch": 1251.6447368421052, + "grad_norm": 1.3226182460784912, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 190250 + }, + { + "epoch": 1251.7105263157894, + "grad_norm": 1.1238583326339722, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 190260 + }, + { + "epoch": 1251.7763157894738, + "grad_norm": 1.0334173440933228, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 190270 + }, + { + "epoch": 1251.842105263158, + "grad_norm": 0.7510884404182434, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 190280 + }, + { + "epoch": 1251.907894736842, + "grad_norm": 1.0210719108581543, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 190290 + }, + { + "epoch": 1251.9736842105262, + "grad_norm": 1.1662030220031738, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 190300 + }, + { + "epoch": 1252.0394736842106, + "grad_norm": 0.8975061774253845, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 190310 + }, + { + "epoch": 1252.1052631578948, + "grad_norm": 1.1046485900878906, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 190320 + }, + { + "epoch": 1252.171052631579, + "grad_norm": 0.740519106388092, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 190330 + }, + { + "epoch": 1252.2368421052631, + "grad_norm": 0.7718915939331055, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 190340 + }, + { + "epoch": 1252.3026315789473, + "grad_norm": 0.7174364328384399, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 190350 + }, + { + "epoch": 1252.3684210526317, + "grad_norm": 0.7045212388038635, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 190360 + }, + { + "epoch": 1252.4342105263158, + "grad_norm": 1.1751577854156494, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 190370 + }, + { + "epoch": 1252.5, + "grad_norm": 1.107194423675537, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 190380 + }, + { + "epoch": 1252.5657894736842, + "grad_norm": 1.6648300886154175, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 190390 + }, + { + "epoch": 1252.6315789473683, + "grad_norm": 1.0686296224594116, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 190400 + }, + { + "epoch": 1252.6973684210527, + "grad_norm": 1.0623538494110107, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 190410 + }, + { + "epoch": 1252.7631578947369, + "grad_norm": 1.1247642040252686, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 190420 + }, + { + "epoch": 1252.828947368421, + "grad_norm": 1.0035089254379272, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 190430 + }, + { + "epoch": 1252.8947368421052, + "grad_norm": 1.277760624885559, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 190440 + }, + { + "epoch": 1252.9605263157894, + "grad_norm": 1.3326517343521118, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 190450 + }, + { + "epoch": 1253.0263157894738, + "grad_norm": 1.0908524990081787, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 190460 + }, + { + "epoch": 1253.092105263158, + "grad_norm": 1.1250838041305542, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 190470 + }, + { + "epoch": 1253.157894736842, + "grad_norm": 1.1381192207336426, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 190480 + }, + { + "epoch": 1253.2236842105262, + "grad_norm": 1.1070564985275269, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 190490 + }, + { + "epoch": 1253.2894736842106, + "grad_norm": 0.9667354226112366, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 190500 + }, + { + "epoch": 1253.3552631578948, + "grad_norm": 0.9640436172485352, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 190510 + }, + { + "epoch": 1253.421052631579, + "grad_norm": 1.0675090551376343, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 190520 + }, + { + "epoch": 1253.4868421052631, + "grad_norm": 0.9862016439437866, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 190530 + }, + { + "epoch": 1253.5526315789473, + "grad_norm": 0.98412024974823, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 190540 + }, + { + "epoch": 1253.6184210526317, + "grad_norm": 0.9340914487838745, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 190550 + }, + { + "epoch": 1253.6842105263158, + "grad_norm": 1.179900884628296, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 190560 + }, + { + "epoch": 1253.75, + "grad_norm": 1.0011744499206543, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 190570 + }, + { + "epoch": 1253.8157894736842, + "grad_norm": 0.9150945544242859, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 190580 + }, + { + "epoch": 1253.8815789473683, + "grad_norm": 1.025476098060608, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 190590 + }, + { + "epoch": 1253.9473684210527, + "grad_norm": 0.7332446575164795, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 190600 + }, + { + "epoch": 1254.0131578947369, + "grad_norm": 0.9624845385551453, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 190610 + }, + { + "epoch": 1254.078947368421, + "grad_norm": 0.9401580691337585, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 190620 + }, + { + "epoch": 1254.1447368421052, + "grad_norm": 1.178200602531433, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 190630 + }, + { + "epoch": 1254.2105263157894, + "grad_norm": 0.8018969297409058, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 190640 + }, + { + "epoch": 1254.2763157894738, + "grad_norm": 1.0022549629211426, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 190650 + }, + { + "epoch": 1254.342105263158, + "grad_norm": 1.0292798280715942, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 190660 + }, + { + "epoch": 1254.407894736842, + "grad_norm": 0.9748183488845825, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 190670 + }, + { + "epoch": 1254.4736842105262, + "grad_norm": 1.1642075777053833, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 190680 + }, + { + "epoch": 1254.5394736842106, + "grad_norm": 1.0161871910095215, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 190690 + }, + { + "epoch": 1254.6052631578948, + "grad_norm": 1.2374141216278076, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 190700 + }, + { + "epoch": 1254.671052631579, + "grad_norm": 0.6536117196083069, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 190710 + }, + { + "epoch": 1254.7368421052631, + "grad_norm": 0.9045120477676392, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 190720 + }, + { + "epoch": 1254.8026315789473, + "grad_norm": 0.969821035861969, + "learning_rate": 0.0001, + "loss": 0.0066, + "step": 190730 + }, + { + "epoch": 1254.8684210526317, + "grad_norm": 0.9699207544326782, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 190740 + }, + { + "epoch": 1254.9342105263158, + "grad_norm": 1.0693953037261963, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 190750 + }, + { + "epoch": 1255.0, + "grad_norm": 1.0095750093460083, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 190760 + }, + { + "epoch": 1255.0657894736842, + "grad_norm": 1.1202609539031982, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 190770 + }, + { + "epoch": 1255.1315789473683, + "grad_norm": 1.095145344734192, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 190780 + }, + { + "epoch": 1255.1973684210527, + "grad_norm": 0.8262442350387573, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 190790 + }, + { + "epoch": 1255.2631578947369, + "grad_norm": 1.1005247831344604, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 190800 + }, + { + "epoch": 1255.328947368421, + "grad_norm": 1.0275697708129883, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 190810 + }, + { + "epoch": 1255.3947368421052, + "grad_norm": 0.8765363097190857, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 190820 + }, + { + "epoch": 1255.4605263157894, + "grad_norm": 0.8573766946792603, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 190830 + }, + { + "epoch": 1255.5263157894738, + "grad_norm": 1.2033637762069702, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 190840 + }, + { + "epoch": 1255.592105263158, + "grad_norm": 0.8023409843444824, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 190850 + }, + { + "epoch": 1255.657894736842, + "grad_norm": 0.9117615222930908, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 190860 + }, + { + "epoch": 1255.7236842105262, + "grad_norm": 1.0993090867996216, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 190870 + }, + { + "epoch": 1255.7894736842106, + "grad_norm": 1.0232125520706177, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 190880 + }, + { + "epoch": 1255.8552631578948, + "grad_norm": 1.2864488363265991, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 190890 + }, + { + "epoch": 1255.921052631579, + "grad_norm": 0.8744462132453918, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 190900 + }, + { + "epoch": 1255.9868421052631, + "grad_norm": 0.8951719403266907, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 190910 + }, + { + "epoch": 1256.0526315789473, + "grad_norm": 1.058536410331726, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 190920 + }, + { + "epoch": 1256.1184210526317, + "grad_norm": 0.9087652564048767, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 190930 + }, + { + "epoch": 1256.1842105263158, + "grad_norm": 1.1572648286819458, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 190940 + }, + { + "epoch": 1256.25, + "grad_norm": 1.497307300567627, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 190950 + }, + { + "epoch": 1256.3157894736842, + "grad_norm": 1.1288655996322632, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 190960 + }, + { + "epoch": 1256.3815789473683, + "grad_norm": 1.1092500686645508, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 190970 + }, + { + "epoch": 1256.4473684210527, + "grad_norm": 0.9487274885177612, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 190980 + }, + { + "epoch": 1256.5131578947369, + "grad_norm": 1.2593556642532349, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 190990 + }, + { + "epoch": 1256.578947368421, + "grad_norm": 1.0957036018371582, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 191000 + }, + { + "epoch": 1256.6447368421052, + "grad_norm": 1.3564567565917969, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 191010 + }, + { + "epoch": 1256.7105263157894, + "grad_norm": 1.0658830404281616, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 191020 + }, + { + "epoch": 1256.7763157894738, + "grad_norm": 1.4636614322662354, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 191030 + }, + { + "epoch": 1256.842105263158, + "grad_norm": 0.8907326459884644, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 191040 + }, + { + "epoch": 1256.907894736842, + "grad_norm": 0.6591998934745789, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 191050 + }, + { + "epoch": 1256.9736842105262, + "grad_norm": 1.2480213642120361, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 191060 + }, + { + "epoch": 1257.0394736842106, + "grad_norm": 1.0565965175628662, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 191070 + }, + { + "epoch": 1257.1052631578948, + "grad_norm": 0.7769169807434082, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 191080 + }, + { + "epoch": 1257.171052631579, + "grad_norm": 1.2173153162002563, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 191090 + }, + { + "epoch": 1257.2368421052631, + "grad_norm": 1.1387425661087036, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 191100 + }, + { + "epoch": 1257.3026315789473, + "grad_norm": 1.099096417427063, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 191110 + }, + { + "epoch": 1257.3684210526317, + "grad_norm": 0.9480217695236206, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 191120 + }, + { + "epoch": 1257.4342105263158, + "grad_norm": 0.8401440382003784, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 191130 + }, + { + "epoch": 1257.5, + "grad_norm": 0.9184837341308594, + "learning_rate": 0.0001, + "loss": 0.0121, + "step": 191140 + }, + { + "epoch": 1257.5657894736842, + "grad_norm": 0.9384816288948059, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 191150 + }, + { + "epoch": 1257.6315789473683, + "grad_norm": 0.8797683119773865, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 191160 + }, + { + "epoch": 1257.6973684210527, + "grad_norm": 0.781887412071228, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 191170 + }, + { + "epoch": 1257.7631578947369, + "grad_norm": 0.9485580325126648, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 191180 + }, + { + "epoch": 1257.828947368421, + "grad_norm": 0.7003353834152222, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 191190 + }, + { + "epoch": 1257.8947368421052, + "grad_norm": 0.9563307166099548, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 191200 + }, + { + "epoch": 1257.9605263157894, + "grad_norm": 0.787761926651001, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 191210 + }, + { + "epoch": 1258.0263157894738, + "grad_norm": 1.2450077533721924, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 191220 + }, + { + "epoch": 1258.092105263158, + "grad_norm": 1.280927062034607, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 191230 + }, + { + "epoch": 1258.157894736842, + "grad_norm": 1.0110337734222412, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 191240 + }, + { + "epoch": 1258.2236842105262, + "grad_norm": 1.0578523874282837, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 191250 + }, + { + "epoch": 1258.2894736842106, + "grad_norm": 1.5664889812469482, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 191260 + }, + { + "epoch": 1258.3552631578948, + "grad_norm": 1.2868566513061523, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 191270 + }, + { + "epoch": 1258.421052631579, + "grad_norm": 1.0075466632843018, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 191280 + }, + { + "epoch": 1258.4868421052631, + "grad_norm": 1.2369372844696045, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 191290 + }, + { + "epoch": 1258.5526315789473, + "grad_norm": 1.0888575315475464, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 191300 + }, + { + "epoch": 1258.6184210526317, + "grad_norm": 1.023420810699463, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 191310 + }, + { + "epoch": 1258.6842105263158, + "grad_norm": 1.248289704322815, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 191320 + }, + { + "epoch": 1258.75, + "grad_norm": 1.0365052223205566, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 191330 + }, + { + "epoch": 1258.8157894736842, + "grad_norm": 1.4416338205337524, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 191340 + }, + { + "epoch": 1258.8815789473683, + "grad_norm": 1.3358416557312012, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 191350 + }, + { + "epoch": 1258.9473684210527, + "grad_norm": 1.1580281257629395, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 191360 + }, + { + "epoch": 1259.0131578947369, + "grad_norm": 1.4239611625671387, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 191370 + }, + { + "epoch": 1259.078947368421, + "grad_norm": 0.7980217337608337, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 191380 + }, + { + "epoch": 1259.1447368421052, + "grad_norm": 0.9824530482292175, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 191390 + }, + { + "epoch": 1259.2105263157894, + "grad_norm": 0.770296573638916, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 191400 + }, + { + "epoch": 1259.2763157894738, + "grad_norm": 0.780419647693634, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 191410 + }, + { + "epoch": 1259.342105263158, + "grad_norm": 0.8854151964187622, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 191420 + }, + { + "epoch": 1259.407894736842, + "grad_norm": 0.6854922771453857, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 191430 + }, + { + "epoch": 1259.4736842105262, + "grad_norm": 0.6790972948074341, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 191440 + }, + { + "epoch": 1259.5394736842106, + "grad_norm": 0.5925989747047424, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 191450 + }, + { + "epoch": 1259.6052631578948, + "grad_norm": 1.307077407836914, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 191460 + }, + { + "epoch": 1259.671052631579, + "grad_norm": 0.9079083800315857, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 191470 + }, + { + "epoch": 1259.7368421052631, + "grad_norm": 1.258397102355957, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 191480 + }, + { + "epoch": 1259.8026315789473, + "grad_norm": 1.126745343208313, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 191490 + }, + { + "epoch": 1259.8684210526317, + "grad_norm": 0.9693688154220581, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 191500 + }, + { + "epoch": 1259.9342105263158, + "grad_norm": 1.0901410579681396, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 191510 + }, + { + "epoch": 1260.0, + "grad_norm": 1.0153412818908691, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 191520 + }, + { + "epoch": 1260.0657894736842, + "grad_norm": 1.094736099243164, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 191530 + }, + { + "epoch": 1260.1315789473683, + "grad_norm": 1.3164678812026978, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 191540 + }, + { + "epoch": 1260.1973684210527, + "grad_norm": 1.0634385347366333, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 191550 + }, + { + "epoch": 1260.2631578947369, + "grad_norm": 1.0030722618103027, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 191560 + }, + { + "epoch": 1260.328947368421, + "grad_norm": 1.0075671672821045, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 191570 + }, + { + "epoch": 1260.3947368421052, + "grad_norm": 1.2293148040771484, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 191580 + }, + { + "epoch": 1260.4605263157894, + "grad_norm": 0.9783899784088135, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 191590 + }, + { + "epoch": 1260.5263157894738, + "grad_norm": 1.0404366254806519, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 191600 + }, + { + "epoch": 1260.592105263158, + "grad_norm": 1.1873605251312256, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 191610 + }, + { + "epoch": 1260.657894736842, + "grad_norm": 0.8735949397087097, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 191620 + }, + { + "epoch": 1260.7236842105262, + "grad_norm": 1.2900155782699585, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 191630 + }, + { + "epoch": 1260.7894736842106, + "grad_norm": 1.2164313793182373, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 191640 + }, + { + "epoch": 1260.8552631578948, + "grad_norm": 1.0002045631408691, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 191650 + }, + { + "epoch": 1260.921052631579, + "grad_norm": 1.3534318208694458, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 191660 + }, + { + "epoch": 1260.9868421052631, + "grad_norm": 1.0810010433197021, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 191670 + }, + { + "epoch": 1261.0526315789473, + "grad_norm": 1.0390048027038574, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 191680 + }, + { + "epoch": 1261.1184210526317, + "grad_norm": 0.8042935729026794, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 191690 + }, + { + "epoch": 1261.1842105263158, + "grad_norm": 0.8306818008422852, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 191700 + }, + { + "epoch": 1261.25, + "grad_norm": 0.769751787185669, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 191710 + }, + { + "epoch": 1261.3157894736842, + "grad_norm": 0.807361364364624, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 191720 + }, + { + "epoch": 1261.3815789473683, + "grad_norm": 0.6746041178703308, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 191730 + }, + { + "epoch": 1261.4473684210527, + "grad_norm": 0.6681817173957825, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 191740 + }, + { + "epoch": 1261.5131578947369, + "grad_norm": 0.5453993082046509, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 191750 + }, + { + "epoch": 1261.578947368421, + "grad_norm": 0.9125059247016907, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 191760 + }, + { + "epoch": 1261.6447368421052, + "grad_norm": 0.5915899276733398, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 191770 + }, + { + "epoch": 1261.7105263157894, + "grad_norm": 0.9361303448677063, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 191780 + }, + { + "epoch": 1261.7763157894738, + "grad_norm": 1.2069631814956665, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 191790 + }, + { + "epoch": 1261.842105263158, + "grad_norm": 1.1386783123016357, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 191800 + }, + { + "epoch": 1261.907894736842, + "grad_norm": 1.0958198308944702, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 191810 + }, + { + "epoch": 1261.9736842105262, + "grad_norm": 0.7236484885215759, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 191820 + }, + { + "epoch": 1262.0394736842106, + "grad_norm": 1.066319465637207, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 191830 + }, + { + "epoch": 1262.1052631578948, + "grad_norm": 1.1681187152862549, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 191840 + }, + { + "epoch": 1262.171052631579, + "grad_norm": 0.9392849206924438, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 191850 + }, + { + "epoch": 1262.2368421052631, + "grad_norm": 1.0458626747131348, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 191860 + }, + { + "epoch": 1262.3026315789473, + "grad_norm": 0.7124366164207458, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 191870 + }, + { + "epoch": 1262.3684210526317, + "grad_norm": 0.9901890158653259, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 191880 + }, + { + "epoch": 1262.4342105263158, + "grad_norm": 1.156256914138794, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 191890 + }, + { + "epoch": 1262.5, + "grad_norm": 1.051873803138733, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 191900 + }, + { + "epoch": 1262.5657894736842, + "grad_norm": 1.0366870164871216, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 191910 + }, + { + "epoch": 1262.6315789473683, + "grad_norm": 0.843497633934021, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 191920 + }, + { + "epoch": 1262.6973684210527, + "grad_norm": 0.8482083678245544, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 191930 + }, + { + "epoch": 1262.7631578947369, + "grad_norm": 0.7245591282844543, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 191940 + }, + { + "epoch": 1262.828947368421, + "grad_norm": 0.7226964831352234, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 191950 + }, + { + "epoch": 1262.8947368421052, + "grad_norm": 1.0393447875976562, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 191960 + }, + { + "epoch": 1262.9605263157894, + "grad_norm": 0.9947481155395508, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 191970 + }, + { + "epoch": 1263.0263157894738, + "grad_norm": 1.087018609046936, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 191980 + }, + { + "epoch": 1263.092105263158, + "grad_norm": 1.0347148180007935, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 191990 + }, + { + "epoch": 1263.157894736842, + "grad_norm": 0.8795108795166016, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 192000 + }, + { + "epoch": 1263.2236842105262, + "grad_norm": 0.9262383580207825, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 192010 + }, + { + "epoch": 1263.2894736842106, + "grad_norm": 0.9647265076637268, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 192020 + }, + { + "epoch": 1263.3552631578948, + "grad_norm": 1.0269702672958374, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 192030 + }, + { + "epoch": 1263.421052631579, + "grad_norm": 0.9326910972595215, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 192040 + }, + { + "epoch": 1263.4868421052631, + "grad_norm": 1.2422527074813843, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 192050 + }, + { + "epoch": 1263.5526315789473, + "grad_norm": 1.082977056503296, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 192060 + }, + { + "epoch": 1263.6184210526317, + "grad_norm": 1.06131112575531, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 192070 + }, + { + "epoch": 1263.6842105263158, + "grad_norm": 1.269061803817749, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 192080 + }, + { + "epoch": 1263.75, + "grad_norm": 0.993816614151001, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 192090 + }, + { + "epoch": 1263.8157894736842, + "grad_norm": 0.9369046688079834, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 192100 + }, + { + "epoch": 1263.8815789473683, + "grad_norm": 1.1662249565124512, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 192110 + }, + { + "epoch": 1263.9473684210527, + "grad_norm": 0.8694877028465271, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 192120 + }, + { + "epoch": 1264.0131578947369, + "grad_norm": 1.0363537073135376, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 192130 + }, + { + "epoch": 1264.078947368421, + "grad_norm": 0.8358498215675354, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 192140 + }, + { + "epoch": 1264.1447368421052, + "grad_norm": 0.7027297616004944, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 192150 + }, + { + "epoch": 1264.2105263157894, + "grad_norm": 0.8918916583061218, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 192160 + }, + { + "epoch": 1264.2763157894738, + "grad_norm": 0.9327743053436279, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 192170 + }, + { + "epoch": 1264.342105263158, + "grad_norm": 1.0644272565841675, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 192180 + }, + { + "epoch": 1264.407894736842, + "grad_norm": 1.1869374513626099, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 192190 + }, + { + "epoch": 1264.4736842105262, + "grad_norm": 0.9971968531608582, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 192200 + }, + { + "epoch": 1264.5394736842106, + "grad_norm": 0.745521068572998, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 192210 + }, + { + "epoch": 1264.6052631578948, + "grad_norm": 1.1426355838775635, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 192220 + }, + { + "epoch": 1264.671052631579, + "grad_norm": 1.11016845703125, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 192230 + }, + { + "epoch": 1264.7368421052631, + "grad_norm": 0.9929028153419495, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 192240 + }, + { + "epoch": 1264.8026315789473, + "grad_norm": 0.9268110394477844, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 192250 + }, + { + "epoch": 1264.8684210526317, + "grad_norm": 0.8309184312820435, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 192260 + }, + { + "epoch": 1264.9342105263158, + "grad_norm": 1.2837961912155151, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 192270 + }, + { + "epoch": 1265.0, + "grad_norm": 0.8434421420097351, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 192280 + }, + { + "epoch": 1265.0657894736842, + "grad_norm": 1.0207735300064087, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 192290 + }, + { + "epoch": 1265.1315789473683, + "grad_norm": 0.9556507468223572, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 192300 + }, + { + "epoch": 1265.1973684210527, + "grad_norm": 1.1968109607696533, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 192310 + }, + { + "epoch": 1265.2631578947369, + "grad_norm": 1.016579270362854, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 192320 + }, + { + "epoch": 1265.328947368421, + "grad_norm": 1.248950481414795, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 192330 + }, + { + "epoch": 1265.3947368421052, + "grad_norm": 1.086604118347168, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 192340 + }, + { + "epoch": 1265.4605263157894, + "grad_norm": 1.240754246711731, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 192350 + }, + { + "epoch": 1265.5263157894738, + "grad_norm": 1.088182806968689, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 192360 + }, + { + "epoch": 1265.592105263158, + "grad_norm": 1.2884389162063599, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 192370 + }, + { + "epoch": 1265.657894736842, + "grad_norm": 0.9595872759819031, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 192380 + }, + { + "epoch": 1265.7236842105262, + "grad_norm": 0.7716920375823975, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 192390 + }, + { + "epoch": 1265.7894736842106, + "grad_norm": 1.2177424430847168, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 192400 + }, + { + "epoch": 1265.8552631578948, + "grad_norm": 1.1005266904830933, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 192410 + }, + { + "epoch": 1265.921052631579, + "grad_norm": 0.8893570899963379, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 192420 + }, + { + "epoch": 1265.9868421052631, + "grad_norm": 1.0129755735397339, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 192430 + }, + { + "epoch": 1266.0526315789473, + "grad_norm": 1.1341480016708374, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 192440 + }, + { + "epoch": 1266.1184210526317, + "grad_norm": 0.907776951789856, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 192450 + }, + { + "epoch": 1266.1842105263158, + "grad_norm": 0.811395525932312, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 192460 + }, + { + "epoch": 1266.25, + "grad_norm": 1.0590198040008545, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 192470 + }, + { + "epoch": 1266.3157894736842, + "grad_norm": 1.0062720775604248, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 192480 + }, + { + "epoch": 1266.3815789473683, + "grad_norm": 0.9959651827812195, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 192490 + }, + { + "epoch": 1266.4473684210527, + "grad_norm": 1.1516484022140503, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 192500 + }, + { + "epoch": 1266.5131578947369, + "grad_norm": 0.8557493090629578, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 192510 + }, + { + "epoch": 1266.578947368421, + "grad_norm": 1.0222887992858887, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 192520 + }, + { + "epoch": 1266.6447368421052, + "grad_norm": 1.160636067390442, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 192530 + }, + { + "epoch": 1266.7105263157894, + "grad_norm": 0.7170030474662781, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 192540 + }, + { + "epoch": 1266.7763157894738, + "grad_norm": 1.1373835802078247, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 192550 + }, + { + "epoch": 1266.842105263158, + "grad_norm": 0.9175273776054382, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 192560 + }, + { + "epoch": 1266.907894736842, + "grad_norm": 1.1107465028762817, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 192570 + }, + { + "epoch": 1266.9736842105262, + "grad_norm": 1.1369487047195435, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 192580 + }, + { + "epoch": 1267.0394736842106, + "grad_norm": 1.0154566764831543, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 192590 + }, + { + "epoch": 1267.1052631578948, + "grad_norm": 0.7436667084693909, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 192600 + }, + { + "epoch": 1267.171052631579, + "grad_norm": 1.1619658470153809, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 192610 + }, + { + "epoch": 1267.2368421052631, + "grad_norm": 1.0842703580856323, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 192620 + }, + { + "epoch": 1267.3026315789473, + "grad_norm": 1.0601835250854492, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 192630 + }, + { + "epoch": 1267.3684210526317, + "grad_norm": 1.2090107202529907, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 192640 + }, + { + "epoch": 1267.4342105263158, + "grad_norm": 0.9081798195838928, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 192650 + }, + { + "epoch": 1267.5, + "grad_norm": 0.960776686668396, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 192660 + }, + { + "epoch": 1267.5657894736842, + "grad_norm": 0.7369850277900696, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 192670 + }, + { + "epoch": 1267.6315789473683, + "grad_norm": 0.9591938853263855, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 192680 + }, + { + "epoch": 1267.6973684210527, + "grad_norm": 0.9522967338562012, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 192690 + }, + { + "epoch": 1267.7631578947369, + "grad_norm": 1.203101396560669, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 192700 + }, + { + "epoch": 1267.828947368421, + "grad_norm": 0.974402666091919, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 192710 + }, + { + "epoch": 1267.8947368421052, + "grad_norm": 0.6702833771705627, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 192720 + }, + { + "epoch": 1267.9605263157894, + "grad_norm": 0.7159844636917114, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 192730 + }, + { + "epoch": 1268.0263157894738, + "grad_norm": 0.7871840596199036, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 192740 + }, + { + "epoch": 1268.092105263158, + "grad_norm": 1.0035476684570312, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 192750 + }, + { + "epoch": 1268.157894736842, + "grad_norm": 1.134181022644043, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 192760 + }, + { + "epoch": 1268.2236842105262, + "grad_norm": 1.1951532363891602, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 192770 + }, + { + "epoch": 1268.2894736842106, + "grad_norm": 0.7619327902793884, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 192780 + }, + { + "epoch": 1268.3552631578948, + "grad_norm": 1.1061769723892212, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 192790 + }, + { + "epoch": 1268.421052631579, + "grad_norm": 0.7267657518386841, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 192800 + }, + { + "epoch": 1268.4868421052631, + "grad_norm": 1.2342023849487305, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 192810 + }, + { + "epoch": 1268.5526315789473, + "grad_norm": 0.790826141834259, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 192820 + }, + { + "epoch": 1268.6184210526317, + "grad_norm": 0.762381374835968, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 192830 + }, + { + "epoch": 1268.6842105263158, + "grad_norm": 1.3754265308380127, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 192840 + }, + { + "epoch": 1268.75, + "grad_norm": 1.1752783060073853, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 192850 + }, + { + "epoch": 1268.8157894736842, + "grad_norm": 1.3542336225509644, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 192860 + }, + { + "epoch": 1268.8815789473683, + "grad_norm": 1.3783767223358154, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 192870 + }, + { + "epoch": 1268.9473684210527, + "grad_norm": 0.9072512984275818, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 192880 + }, + { + "epoch": 1269.0131578947369, + "grad_norm": 1.1490826606750488, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 192890 + }, + { + "epoch": 1269.078947368421, + "grad_norm": 1.1428316831588745, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 192900 + }, + { + "epoch": 1269.1447368421052, + "grad_norm": 1.2847250699996948, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 192910 + }, + { + "epoch": 1269.2105263157894, + "grad_norm": 0.9286653995513916, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 192920 + }, + { + "epoch": 1269.2763157894738, + "grad_norm": 0.81577467918396, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 192930 + }, + { + "epoch": 1269.342105263158, + "grad_norm": 0.6981829404830933, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 192940 + }, + { + "epoch": 1269.407894736842, + "grad_norm": 0.916454553604126, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 192950 + }, + { + "epoch": 1269.4736842105262, + "grad_norm": 0.8955065608024597, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 192960 + }, + { + "epoch": 1269.5394736842106, + "grad_norm": 1.2273223400115967, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 192970 + }, + { + "epoch": 1269.6052631578948, + "grad_norm": 1.2199914455413818, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 192980 + }, + { + "epoch": 1269.671052631579, + "grad_norm": 1.2514991760253906, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 192990 + }, + { + "epoch": 1269.7368421052631, + "grad_norm": 1.1191940307617188, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 193000 + }, + { + "epoch": 1269.8026315789473, + "grad_norm": 1.1365535259246826, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 193010 + }, + { + "epoch": 1269.8684210526317, + "grad_norm": 1.1664906740188599, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 193020 + }, + { + "epoch": 1269.9342105263158, + "grad_norm": 1.2412478923797607, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 193030 + }, + { + "epoch": 1270.0, + "grad_norm": 1.1647230386734009, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 193040 + }, + { + "epoch": 1270.0657894736842, + "grad_norm": 1.4412037134170532, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 193050 + }, + { + "epoch": 1270.1315789473683, + "grad_norm": 1.1689636707305908, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 193060 + }, + { + "epoch": 1270.1973684210527, + "grad_norm": 0.9392591118812561, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 193070 + }, + { + "epoch": 1270.2631578947369, + "grad_norm": 1.0944244861602783, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 193080 + }, + { + "epoch": 1270.328947368421, + "grad_norm": 0.9576421976089478, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 193090 + }, + { + "epoch": 1270.3947368421052, + "grad_norm": 0.7435740828514099, + "learning_rate": 0.0001, + "loss": 0.0065, + "step": 193100 + }, + { + "epoch": 1270.4605263157894, + "grad_norm": 0.8688104152679443, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 193110 + }, + { + "epoch": 1270.5263157894738, + "grad_norm": 0.8202447295188904, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 193120 + }, + { + "epoch": 1270.592105263158, + "grad_norm": 0.7284652590751648, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 193130 + }, + { + "epoch": 1270.657894736842, + "grad_norm": 0.9387590289115906, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 193140 + }, + { + "epoch": 1270.7236842105262, + "grad_norm": 1.2218446731567383, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 193150 + }, + { + "epoch": 1270.7894736842106, + "grad_norm": 0.6693400144577026, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 193160 + }, + { + "epoch": 1270.8552631578948, + "grad_norm": 1.0726978778839111, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 193170 + }, + { + "epoch": 1270.921052631579, + "grad_norm": 1.0772373676300049, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 193180 + }, + { + "epoch": 1270.9868421052631, + "grad_norm": 1.2476823329925537, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 193190 + }, + { + "epoch": 1271.0526315789473, + "grad_norm": 1.162440299987793, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 193200 + }, + { + "epoch": 1271.1184210526317, + "grad_norm": 0.7666929960250854, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 193210 + }, + { + "epoch": 1271.1842105263158, + "grad_norm": 1.0631024837493896, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 193220 + }, + { + "epoch": 1271.25, + "grad_norm": 0.8989170789718628, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 193230 + }, + { + "epoch": 1271.3157894736842, + "grad_norm": 1.1688525676727295, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 193240 + }, + { + "epoch": 1271.3815789473683, + "grad_norm": 1.0052133798599243, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 193250 + }, + { + "epoch": 1271.4473684210527, + "grad_norm": 0.9958306550979614, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 193260 + }, + { + "epoch": 1271.5131578947369, + "grad_norm": 1.302128553390503, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 193270 + }, + { + "epoch": 1271.578947368421, + "grad_norm": 0.8450573086738586, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 193280 + }, + { + "epoch": 1271.6447368421052, + "grad_norm": 1.1243925094604492, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 193290 + }, + { + "epoch": 1271.7105263157894, + "grad_norm": 1.0591107606887817, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 193300 + }, + { + "epoch": 1271.7763157894738, + "grad_norm": 1.174922227859497, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 193310 + }, + { + "epoch": 1271.842105263158, + "grad_norm": 0.8074167966842651, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 193320 + }, + { + "epoch": 1271.907894736842, + "grad_norm": 0.9306144714355469, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 193330 + }, + { + "epoch": 1271.9736842105262, + "grad_norm": 0.8695573806762695, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 193340 + }, + { + "epoch": 1272.0394736842106, + "grad_norm": 0.9562103152275085, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 193350 + }, + { + "epoch": 1272.1052631578948, + "grad_norm": 0.7408207058906555, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 193360 + }, + { + "epoch": 1272.171052631579, + "grad_norm": 0.8762840628623962, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 193370 + }, + { + "epoch": 1272.2368421052631, + "grad_norm": 1.0107637643814087, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 193380 + }, + { + "epoch": 1272.3026315789473, + "grad_norm": 0.9742252230644226, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 193390 + }, + { + "epoch": 1272.3684210526317, + "grad_norm": 1.0357519388198853, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 193400 + }, + { + "epoch": 1272.4342105263158, + "grad_norm": 1.0647118091583252, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 193410 + }, + { + "epoch": 1272.5, + "grad_norm": 1.2739837169647217, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 193420 + }, + { + "epoch": 1272.5657894736842, + "grad_norm": 1.3156754970550537, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 193430 + }, + { + "epoch": 1272.6315789473683, + "grad_norm": 1.3240246772766113, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 193440 + }, + { + "epoch": 1272.6973684210527, + "grad_norm": 1.2527782917022705, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 193450 + }, + { + "epoch": 1272.7631578947369, + "grad_norm": 1.097855567932129, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 193460 + }, + { + "epoch": 1272.828947368421, + "grad_norm": 0.7602299451828003, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 193470 + }, + { + "epoch": 1272.8947368421052, + "grad_norm": 0.9691528677940369, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 193480 + }, + { + "epoch": 1272.9605263157894, + "grad_norm": 0.8665773868560791, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 193490 + }, + { + "epoch": 1273.0263157894738, + "grad_norm": 0.9522979855537415, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 193500 + }, + { + "epoch": 1273.092105263158, + "grad_norm": 1.1000076532363892, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 193510 + }, + { + "epoch": 1273.157894736842, + "grad_norm": 1.0523219108581543, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 193520 + }, + { + "epoch": 1273.2236842105262, + "grad_norm": 0.7484111785888672, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 193530 + }, + { + "epoch": 1273.2894736842106, + "grad_norm": 0.6804783344268799, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 193540 + }, + { + "epoch": 1273.3552631578948, + "grad_norm": 1.133036494255066, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 193550 + }, + { + "epoch": 1273.421052631579, + "grad_norm": 0.9207621812820435, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 193560 + }, + { + "epoch": 1273.4868421052631, + "grad_norm": 0.9967194199562073, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 193570 + }, + { + "epoch": 1273.5526315789473, + "grad_norm": 0.6356325745582581, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 193580 + }, + { + "epoch": 1273.6184210526317, + "grad_norm": 0.9811447262763977, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 193590 + }, + { + "epoch": 1273.6842105263158, + "grad_norm": 0.9721751809120178, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 193600 + }, + { + "epoch": 1273.75, + "grad_norm": 0.8101963996887207, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 193610 + }, + { + "epoch": 1273.8157894736842, + "grad_norm": 1.3644793033599854, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 193620 + }, + { + "epoch": 1273.8815789473683, + "grad_norm": 0.827865719795227, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 193630 + }, + { + "epoch": 1273.9473684210527, + "grad_norm": 1.0075935125350952, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 193640 + }, + { + "epoch": 1274.0131578947369, + "grad_norm": 1.2060139179229736, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 193650 + }, + { + "epoch": 1274.078947368421, + "grad_norm": 1.1146782636642456, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 193660 + }, + { + "epoch": 1274.1447368421052, + "grad_norm": 1.0539047718048096, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 193670 + }, + { + "epoch": 1274.2105263157894, + "grad_norm": 0.9264514446258545, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 193680 + }, + { + "epoch": 1274.2763157894738, + "grad_norm": 0.6521208882331848, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 193690 + }, + { + "epoch": 1274.342105263158, + "grad_norm": 0.6478739380836487, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 193700 + }, + { + "epoch": 1274.407894736842, + "grad_norm": 0.8767127990722656, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 193710 + }, + { + "epoch": 1274.4736842105262, + "grad_norm": 1.1907069683074951, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 193720 + }, + { + "epoch": 1274.5394736842106, + "grad_norm": 1.0393046140670776, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 193730 + }, + { + "epoch": 1274.6052631578948, + "grad_norm": 0.9379701614379883, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 193740 + }, + { + "epoch": 1274.671052631579, + "grad_norm": 0.976507306098938, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 193750 + }, + { + "epoch": 1274.7368421052631, + "grad_norm": 0.986788272857666, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 193760 + }, + { + "epoch": 1274.8026315789473, + "grad_norm": 1.1617448329925537, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 193770 + }, + { + "epoch": 1274.8684210526317, + "grad_norm": 1.0417400598526, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 193780 + }, + { + "epoch": 1274.9342105263158, + "grad_norm": 0.8060194253921509, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 193790 + }, + { + "epoch": 1275.0, + "grad_norm": 1.053001880645752, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 193800 + }, + { + "epoch": 1275.0657894736842, + "grad_norm": 1.0343172550201416, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 193810 + }, + { + "epoch": 1275.1315789473683, + "grad_norm": 0.8656930923461914, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 193820 + }, + { + "epoch": 1275.1973684210527, + "grad_norm": 1.0269204378128052, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 193830 + }, + { + "epoch": 1275.2631578947369, + "grad_norm": 1.2087546586990356, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 193840 + }, + { + "epoch": 1275.328947368421, + "grad_norm": 0.8350012898445129, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 193850 + }, + { + "epoch": 1275.3947368421052, + "grad_norm": 0.9556059241294861, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 193860 + }, + { + "epoch": 1275.4605263157894, + "grad_norm": 1.050558090209961, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 193870 + }, + { + "epoch": 1275.5263157894738, + "grad_norm": 1.1796860694885254, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 193880 + }, + { + "epoch": 1275.592105263158, + "grad_norm": 0.9585551023483276, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 193890 + }, + { + "epoch": 1275.657894736842, + "grad_norm": 1.2745286226272583, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 193900 + }, + { + "epoch": 1275.7236842105262, + "grad_norm": 0.8697680234909058, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 193910 + }, + { + "epoch": 1275.7894736842106, + "grad_norm": 1.0319898128509521, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 193920 + }, + { + "epoch": 1275.8552631578948, + "grad_norm": 0.8313315510749817, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 193930 + }, + { + "epoch": 1275.921052631579, + "grad_norm": 1.148795247077942, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 193940 + }, + { + "epoch": 1275.9868421052631, + "grad_norm": 0.8364288210868835, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 193950 + }, + { + "epoch": 1276.0526315789473, + "grad_norm": 0.909010648727417, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 193960 + }, + { + "epoch": 1276.1184210526317, + "grad_norm": 0.9541910290718079, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 193970 + }, + { + "epoch": 1276.1842105263158, + "grad_norm": 1.6058369874954224, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 193980 + }, + { + "epoch": 1276.25, + "grad_norm": 3.058375835418701, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 193990 + }, + { + "epoch": 1276.3157894736842, + "grad_norm": 2.173565149307251, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 194000 + }, + { + "epoch": 1276.3815789473683, + "grad_norm": 1.741365671157837, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 194010 + }, + { + "epoch": 1276.4473684210527, + "grad_norm": 1.5380631685256958, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 194020 + }, + { + "epoch": 1276.5131578947369, + "grad_norm": 1.3168596029281616, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 194030 + }, + { + "epoch": 1276.578947368421, + "grad_norm": 1.2907633781433105, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 194040 + }, + { + "epoch": 1276.6447368421052, + "grad_norm": 1.0855131149291992, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 194050 + }, + { + "epoch": 1276.7105263157894, + "grad_norm": 1.2930248975753784, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 194060 + }, + { + "epoch": 1276.7763157894738, + "grad_norm": 1.222780466079712, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 194070 + }, + { + "epoch": 1276.842105263158, + "grad_norm": 1.4316956996917725, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 194080 + }, + { + "epoch": 1276.907894736842, + "grad_norm": 1.0720902681350708, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 194090 + }, + { + "epoch": 1276.9736842105262, + "grad_norm": 1.6173840761184692, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 194100 + }, + { + "epoch": 1277.0394736842106, + "grad_norm": 1.3037574291229248, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 194110 + }, + { + "epoch": 1277.1052631578948, + "grad_norm": 1.301659345626831, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 194120 + }, + { + "epoch": 1277.171052631579, + "grad_norm": 1.402639627456665, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 194130 + }, + { + "epoch": 1277.2368421052631, + "grad_norm": 1.272783637046814, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 194140 + }, + { + "epoch": 1277.3026315789473, + "grad_norm": 0.7505586743354797, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 194150 + }, + { + "epoch": 1277.3684210526317, + "grad_norm": 0.9745710492134094, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 194160 + }, + { + "epoch": 1277.4342105263158, + "grad_norm": 1.12748384475708, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 194170 + }, + { + "epoch": 1277.5, + "grad_norm": 1.296922206878662, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 194180 + }, + { + "epoch": 1277.5657894736842, + "grad_norm": 1.1418737173080444, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 194190 + }, + { + "epoch": 1277.6315789473683, + "grad_norm": 0.7893008589744568, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 194200 + }, + { + "epoch": 1277.6973684210527, + "grad_norm": 0.9640665650367737, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 194210 + }, + { + "epoch": 1277.7631578947369, + "grad_norm": 1.2299898862838745, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 194220 + }, + { + "epoch": 1277.828947368421, + "grad_norm": 0.9324010610580444, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 194230 + }, + { + "epoch": 1277.8947368421052, + "grad_norm": 0.841152012348175, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 194240 + }, + { + "epoch": 1277.9605263157894, + "grad_norm": 1.026451826095581, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 194250 + }, + { + "epoch": 1278.0263157894738, + "grad_norm": 0.8263790011405945, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 194260 + }, + { + "epoch": 1278.092105263158, + "grad_norm": 0.8429759740829468, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 194270 + }, + { + "epoch": 1278.157894736842, + "grad_norm": 0.9431121945381165, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 194280 + }, + { + "epoch": 1278.2236842105262, + "grad_norm": 0.8703896999359131, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 194290 + }, + { + "epoch": 1278.2894736842106, + "grad_norm": 0.7083398699760437, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 194300 + }, + { + "epoch": 1278.3552631578948, + "grad_norm": 1.1701858043670654, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 194310 + }, + { + "epoch": 1278.421052631579, + "grad_norm": 1.2647616863250732, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 194320 + }, + { + "epoch": 1278.4868421052631, + "grad_norm": 0.9830572009086609, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 194330 + }, + { + "epoch": 1278.5526315789473, + "grad_norm": 0.9235027432441711, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 194340 + }, + { + "epoch": 1278.6184210526317, + "grad_norm": 0.9852763414382935, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 194350 + }, + { + "epoch": 1278.6842105263158, + "grad_norm": 0.9919136762619019, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 194360 + }, + { + "epoch": 1278.75, + "grad_norm": 0.9679889678955078, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 194370 + }, + { + "epoch": 1278.8157894736842, + "grad_norm": 1.1155283451080322, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 194380 + }, + { + "epoch": 1278.8815789473683, + "grad_norm": 1.1698013544082642, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 194390 + }, + { + "epoch": 1278.9473684210527, + "grad_norm": 1.2240992784500122, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 194400 + }, + { + "epoch": 1279.0131578947369, + "grad_norm": 1.3296053409576416, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 194410 + }, + { + "epoch": 1279.078947368421, + "grad_norm": 1.4155553579330444, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 194420 + }, + { + "epoch": 1279.1447368421052, + "grad_norm": 1.3145627975463867, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 194430 + }, + { + "epoch": 1279.2105263157894, + "grad_norm": 1.4205524921417236, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 194440 + }, + { + "epoch": 1279.2763157894738, + "grad_norm": 0.8720544576644897, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 194450 + }, + { + "epoch": 1279.342105263158, + "grad_norm": 0.9181710481643677, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 194460 + }, + { + "epoch": 1279.407894736842, + "grad_norm": 1.1017323732376099, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 194470 + }, + { + "epoch": 1279.4736842105262, + "grad_norm": 0.9298954606056213, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 194480 + }, + { + "epoch": 1279.5394736842106, + "grad_norm": 0.9003967046737671, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 194490 + }, + { + "epoch": 1279.6052631578948, + "grad_norm": 1.0150399208068848, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 194500 + }, + { + "epoch": 1279.671052631579, + "grad_norm": 0.9780325889587402, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 194510 + }, + { + "epoch": 1279.7368421052631, + "grad_norm": 0.9293487668037415, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 194520 + }, + { + "epoch": 1279.8026315789473, + "grad_norm": 0.82585608959198, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 194530 + }, + { + "epoch": 1279.8684210526317, + "grad_norm": 0.9033555388450623, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 194540 + }, + { + "epoch": 1279.9342105263158, + "grad_norm": 1.1674561500549316, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 194550 + }, + { + "epoch": 1280.0, + "grad_norm": 0.9872167110443115, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 194560 + }, + { + "epoch": 1280.0657894736842, + "grad_norm": 0.7526688575744629, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 194570 + }, + { + "epoch": 1280.1315789473683, + "grad_norm": 1.100469708442688, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 194580 + }, + { + "epoch": 1280.1973684210527, + "grad_norm": 1.1157853603363037, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 194590 + }, + { + "epoch": 1280.2631578947369, + "grad_norm": 1.3560817241668701, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 194600 + }, + { + "epoch": 1280.328947368421, + "grad_norm": 1.132397174835205, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 194610 + }, + { + "epoch": 1280.3947368421052, + "grad_norm": 1.0123302936553955, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 194620 + }, + { + "epoch": 1280.4605263157894, + "grad_norm": 0.9225643873214722, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 194630 + }, + { + "epoch": 1280.5263157894738, + "grad_norm": 0.7562076449394226, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 194640 + }, + { + "epoch": 1280.592105263158, + "grad_norm": 0.5493844747543335, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 194650 + }, + { + "epoch": 1280.657894736842, + "grad_norm": 0.9773281216621399, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 194660 + }, + { + "epoch": 1280.7236842105262, + "grad_norm": 0.8132543563842773, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 194670 + }, + { + "epoch": 1280.7894736842106, + "grad_norm": 1.0290168523788452, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 194680 + }, + { + "epoch": 1280.8552631578948, + "grad_norm": 1.0300383567810059, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 194690 + }, + { + "epoch": 1280.921052631579, + "grad_norm": 0.831333339214325, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 194700 + }, + { + "epoch": 1280.9868421052631, + "grad_norm": 1.1776654720306396, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 194710 + }, + { + "epoch": 1281.0526315789473, + "grad_norm": 1.3399312496185303, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 194720 + }, + { + "epoch": 1281.1184210526317, + "grad_norm": 1.3882694244384766, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 194730 + }, + { + "epoch": 1281.1842105263158, + "grad_norm": 1.010141372680664, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 194740 + }, + { + "epoch": 1281.25, + "grad_norm": 1.1796314716339111, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 194750 + }, + { + "epoch": 1281.3157894736842, + "grad_norm": 0.9938530325889587, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 194760 + }, + { + "epoch": 1281.3815789473683, + "grad_norm": 0.8807727098464966, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 194770 + }, + { + "epoch": 1281.4473684210527, + "grad_norm": 0.9626410603523254, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 194780 + }, + { + "epoch": 1281.5131578947369, + "grad_norm": 0.9949114322662354, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 194790 + }, + { + "epoch": 1281.578947368421, + "grad_norm": 0.6117430925369263, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 194800 + }, + { + "epoch": 1281.6447368421052, + "grad_norm": 0.8383743762969971, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 194810 + }, + { + "epoch": 1281.7105263157894, + "grad_norm": 1.091646432876587, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 194820 + }, + { + "epoch": 1281.7763157894738, + "grad_norm": 0.7561153769493103, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 194830 + }, + { + "epoch": 1281.842105263158, + "grad_norm": 0.7222174406051636, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 194840 + }, + { + "epoch": 1281.907894736842, + "grad_norm": 0.7637395262718201, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 194850 + }, + { + "epoch": 1281.9736842105262, + "grad_norm": 1.0813342332839966, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 194860 + }, + { + "epoch": 1282.0394736842106, + "grad_norm": 1.0132391452789307, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 194870 + }, + { + "epoch": 1282.1052631578948, + "grad_norm": 0.9855432510375977, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 194880 + }, + { + "epoch": 1282.171052631579, + "grad_norm": 1.1103159189224243, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 194890 + }, + { + "epoch": 1282.2368421052631, + "grad_norm": 1.3338035345077515, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 194900 + }, + { + "epoch": 1282.3026315789473, + "grad_norm": 1.1458650827407837, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 194910 + }, + { + "epoch": 1282.3684210526317, + "grad_norm": 1.2011997699737549, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 194920 + }, + { + "epoch": 1282.4342105263158, + "grad_norm": 0.8735988736152649, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 194930 + }, + { + "epoch": 1282.5, + "grad_norm": 0.8202658295631409, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 194940 + }, + { + "epoch": 1282.5657894736842, + "grad_norm": 1.0125209093093872, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 194950 + }, + { + "epoch": 1282.6315789473683, + "grad_norm": 1.1227607727050781, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 194960 + }, + { + "epoch": 1282.6973684210527, + "grad_norm": 1.05240797996521, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 194970 + }, + { + "epoch": 1282.7631578947369, + "grad_norm": 1.0156041383743286, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 194980 + }, + { + "epoch": 1282.828947368421, + "grad_norm": 1.0174468755722046, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 194990 + }, + { + "epoch": 1282.8947368421052, + "grad_norm": 1.300093412399292, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 195000 + }, + { + "epoch": 1282.9605263157894, + "grad_norm": 1.298466444015503, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 195010 + }, + { + "epoch": 1283.0263157894738, + "grad_norm": 1.0919368267059326, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 195020 + }, + { + "epoch": 1283.092105263158, + "grad_norm": 1.0547558069229126, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 195030 + }, + { + "epoch": 1283.157894736842, + "grad_norm": 1.1440446376800537, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 195040 + }, + { + "epoch": 1283.2236842105262, + "grad_norm": 0.7482719421386719, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 195050 + }, + { + "epoch": 1283.2894736842106, + "grad_norm": 1.0653194189071655, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 195060 + }, + { + "epoch": 1283.3552631578948, + "grad_norm": 1.172141432762146, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 195070 + }, + { + "epoch": 1283.421052631579, + "grad_norm": 0.7871286273002625, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 195080 + }, + { + "epoch": 1283.4868421052631, + "grad_norm": 0.9559378027915955, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 195090 + }, + { + "epoch": 1283.5526315789473, + "grad_norm": 1.331890344619751, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 195100 + }, + { + "epoch": 1283.6184210526317, + "grad_norm": 0.9049778580665588, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 195110 + }, + { + "epoch": 1283.6842105263158, + "grad_norm": 0.6896979808807373, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 195120 + }, + { + "epoch": 1283.75, + "grad_norm": 0.6551532745361328, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 195130 + }, + { + "epoch": 1283.8157894736842, + "grad_norm": 0.8988133072853088, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 195140 + }, + { + "epoch": 1283.8815789473683, + "grad_norm": 0.7520022392272949, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 195150 + }, + { + "epoch": 1283.9473684210527, + "grad_norm": 1.120882511138916, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 195160 + }, + { + "epoch": 1284.0131578947369, + "grad_norm": 0.8684665560722351, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 195170 + }, + { + "epoch": 1284.078947368421, + "grad_norm": 0.9502143859863281, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 195180 + }, + { + "epoch": 1284.1447368421052, + "grad_norm": 1.1483829021453857, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 195190 + }, + { + "epoch": 1284.2105263157894, + "grad_norm": 1.1794594526290894, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 195200 + }, + { + "epoch": 1284.2763157894738, + "grad_norm": 0.681542694568634, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 195210 + }, + { + "epoch": 1284.342105263158, + "grad_norm": 0.8862521648406982, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 195220 + }, + { + "epoch": 1284.407894736842, + "grad_norm": 0.9744770526885986, + "learning_rate": 0.0001, + "loss": 0.0069, + "step": 195230 + }, + { + "epoch": 1284.4736842105262, + "grad_norm": 0.7451329827308655, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 195240 + }, + { + "epoch": 1284.5394736842106, + "grad_norm": 0.7159759402275085, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 195250 + }, + { + "epoch": 1284.6052631578948, + "grad_norm": 1.3413786888122559, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 195260 + }, + { + "epoch": 1284.671052631579, + "grad_norm": 1.1536619663238525, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 195270 + }, + { + "epoch": 1284.7368421052631, + "grad_norm": 0.8164981007575989, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 195280 + }, + { + "epoch": 1284.8026315789473, + "grad_norm": 0.990378737449646, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 195290 + }, + { + "epoch": 1284.8684210526317, + "grad_norm": 0.8975867629051208, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 195300 + }, + { + "epoch": 1284.9342105263158, + "grad_norm": 0.9112573862075806, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 195310 + }, + { + "epoch": 1285.0, + "grad_norm": 0.8136677145957947, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 195320 + }, + { + "epoch": 1285.0657894736842, + "grad_norm": 0.9761696457862854, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 195330 + }, + { + "epoch": 1285.1315789473683, + "grad_norm": 0.5923354029655457, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 195340 + }, + { + "epoch": 1285.1973684210527, + "grad_norm": 0.8915498852729797, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 195350 + }, + { + "epoch": 1285.2631578947369, + "grad_norm": 1.0586600303649902, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 195360 + }, + { + "epoch": 1285.328947368421, + "grad_norm": 1.0132033824920654, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 195370 + }, + { + "epoch": 1285.3947368421052, + "grad_norm": 1.0920168161392212, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 195380 + }, + { + "epoch": 1285.4605263157894, + "grad_norm": 0.8626097440719604, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 195390 + }, + { + "epoch": 1285.5263157894738, + "grad_norm": 0.9792760014533997, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 195400 + }, + { + "epoch": 1285.592105263158, + "grad_norm": 1.0166680812835693, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 195410 + }, + { + "epoch": 1285.657894736842, + "grad_norm": 0.970467209815979, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 195420 + }, + { + "epoch": 1285.7236842105262, + "grad_norm": 1.0889147520065308, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 195430 + }, + { + "epoch": 1285.7894736842106, + "grad_norm": 0.7250308394432068, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 195440 + }, + { + "epoch": 1285.8552631578948, + "grad_norm": 0.8987478613853455, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 195450 + }, + { + "epoch": 1285.921052631579, + "grad_norm": 0.7027731537818909, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 195460 + }, + { + "epoch": 1285.9868421052631, + "grad_norm": 1.0836803913116455, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 195470 + }, + { + "epoch": 1286.0526315789473, + "grad_norm": 0.9599834084510803, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 195480 + }, + { + "epoch": 1286.1184210526317, + "grad_norm": 1.119584083557129, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 195490 + }, + { + "epoch": 1286.1842105263158, + "grad_norm": 1.0971158742904663, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 195500 + }, + { + "epoch": 1286.25, + "grad_norm": 0.7488318085670471, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 195510 + }, + { + "epoch": 1286.3157894736842, + "grad_norm": 0.7044021487236023, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 195520 + }, + { + "epoch": 1286.3815789473683, + "grad_norm": 1.1740487813949585, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 195530 + }, + { + "epoch": 1286.4473684210527, + "grad_norm": 1.206044316291809, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 195540 + }, + { + "epoch": 1286.5131578947369, + "grad_norm": 0.9043120741844177, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 195550 + }, + { + "epoch": 1286.578947368421, + "grad_norm": 1.060771107673645, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 195560 + }, + { + "epoch": 1286.6447368421052, + "grad_norm": 1.237007975578308, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 195570 + }, + { + "epoch": 1286.7105263157894, + "grad_norm": 1.2525277137756348, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 195580 + }, + { + "epoch": 1286.7763157894738, + "grad_norm": 0.9757577180862427, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 195590 + }, + { + "epoch": 1286.842105263158, + "grad_norm": 0.8785386085510254, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 195600 + }, + { + "epoch": 1286.907894736842, + "grad_norm": 1.1863728761672974, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 195610 + }, + { + "epoch": 1286.9736842105262, + "grad_norm": 0.9649335741996765, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 195620 + }, + { + "epoch": 1287.0394736842106, + "grad_norm": 0.8843129873275757, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 195630 + }, + { + "epoch": 1287.1052631578948, + "grad_norm": 1.302202820777893, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 195640 + }, + { + "epoch": 1287.171052631579, + "grad_norm": 1.2117173671722412, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 195650 + }, + { + "epoch": 1287.2368421052631, + "grad_norm": 1.168351411819458, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 195660 + }, + { + "epoch": 1287.3026315789473, + "grad_norm": 1.0641189813613892, + "learning_rate": 0.0001, + "loss": 0.0124, + "step": 195670 + }, + { + "epoch": 1287.3684210526317, + "grad_norm": 1.3389551639556885, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 195680 + }, + { + "epoch": 1287.4342105263158, + "grad_norm": 0.8608621954917908, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 195690 + }, + { + "epoch": 1287.5, + "grad_norm": 0.9058868885040283, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 195700 + }, + { + "epoch": 1287.5657894736842, + "grad_norm": 1.3241692781448364, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 195710 + }, + { + "epoch": 1287.6315789473683, + "grad_norm": 1.0901117324829102, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 195720 + }, + { + "epoch": 1287.6973684210527, + "grad_norm": 1.2744159698486328, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 195730 + }, + { + "epoch": 1287.7631578947369, + "grad_norm": 0.9215327501296997, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 195740 + }, + { + "epoch": 1287.828947368421, + "grad_norm": 0.8423858284950256, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 195750 + }, + { + "epoch": 1287.8947368421052, + "grad_norm": 1.16117262840271, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 195760 + }, + { + "epoch": 1287.9605263157894, + "grad_norm": 0.973017156124115, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 195770 + }, + { + "epoch": 1288.0263157894738, + "grad_norm": 0.9248650074005127, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 195780 + }, + { + "epoch": 1288.092105263158, + "grad_norm": 1.0818302631378174, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 195790 + }, + { + "epoch": 1288.157894736842, + "grad_norm": 0.8217904567718506, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 195800 + }, + { + "epoch": 1288.2236842105262, + "grad_norm": 1.021702527999878, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 195810 + }, + { + "epoch": 1288.2894736842106, + "grad_norm": 0.8209040760993958, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 195820 + }, + { + "epoch": 1288.3552631578948, + "grad_norm": 0.7926214933395386, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 195830 + }, + { + "epoch": 1288.421052631579, + "grad_norm": 0.9760251045227051, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 195840 + }, + { + "epoch": 1288.4868421052631, + "grad_norm": 0.9303892850875854, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 195850 + }, + { + "epoch": 1288.5526315789473, + "grad_norm": 1.2029811143875122, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 195860 + }, + { + "epoch": 1288.6184210526317, + "grad_norm": 1.0326858758926392, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 195870 + }, + { + "epoch": 1288.6842105263158, + "grad_norm": 0.8558474779129028, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 195880 + }, + { + "epoch": 1288.75, + "grad_norm": 0.9762908220291138, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 195890 + }, + { + "epoch": 1288.8157894736842, + "grad_norm": 0.9533197283744812, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 195900 + }, + { + "epoch": 1288.8815789473683, + "grad_norm": 1.0016499757766724, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 195910 + }, + { + "epoch": 1288.9473684210527, + "grad_norm": 1.404975414276123, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 195920 + }, + { + "epoch": 1289.0131578947369, + "grad_norm": 1.2138851881027222, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 195930 + }, + { + "epoch": 1289.078947368421, + "grad_norm": 1.0937894582748413, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 195940 + }, + { + "epoch": 1289.1447368421052, + "grad_norm": 0.9936800599098206, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 195950 + }, + { + "epoch": 1289.2105263157894, + "grad_norm": 1.2059030532836914, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 195960 + }, + { + "epoch": 1289.2763157894738, + "grad_norm": 1.204706072807312, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 195970 + }, + { + "epoch": 1289.342105263158, + "grad_norm": 0.8758791089057922, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 195980 + }, + { + "epoch": 1289.407894736842, + "grad_norm": 0.9688547849655151, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 195990 + }, + { + "epoch": 1289.4736842105262, + "grad_norm": 0.8228425979614258, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 196000 + }, + { + "epoch": 1289.5394736842106, + "grad_norm": 0.6902533173561096, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 196010 + }, + { + "epoch": 1289.6052631578948, + "grad_norm": 1.0823955535888672, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 196020 + }, + { + "epoch": 1289.671052631579, + "grad_norm": 1.114900827407837, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 196030 + }, + { + "epoch": 1289.7368421052631, + "grad_norm": 1.076406717300415, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 196040 + }, + { + "epoch": 1289.8026315789473, + "grad_norm": 0.841337263584137, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 196050 + }, + { + "epoch": 1289.8684210526317, + "grad_norm": 0.953133761882782, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 196060 + }, + { + "epoch": 1289.9342105263158, + "grad_norm": 0.9329866766929626, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 196070 + }, + { + "epoch": 1290.0, + "grad_norm": 1.4121922254562378, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 196080 + }, + { + "epoch": 1290.0657894736842, + "grad_norm": 1.1717429161071777, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 196090 + }, + { + "epoch": 1290.1315789473683, + "grad_norm": 1.0481109619140625, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 196100 + }, + { + "epoch": 1290.1973684210527, + "grad_norm": 1.3172156810760498, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 196110 + }, + { + "epoch": 1290.2631578947369, + "grad_norm": 1.1237008571624756, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 196120 + }, + { + "epoch": 1290.328947368421, + "grad_norm": 1.2435554265975952, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 196130 + }, + { + "epoch": 1290.3947368421052, + "grad_norm": 0.9842655062675476, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 196140 + }, + { + "epoch": 1290.4605263157894, + "grad_norm": 0.9428750276565552, + "learning_rate": 0.0001, + "loss": 0.0068, + "step": 196150 + }, + { + "epoch": 1290.5263157894738, + "grad_norm": 0.8579443693161011, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 196160 + }, + { + "epoch": 1290.592105263158, + "grad_norm": 0.9661730527877808, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 196170 + }, + { + "epoch": 1290.657894736842, + "grad_norm": 1.2594717741012573, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 196180 + }, + { + "epoch": 1290.7236842105262, + "grad_norm": 1.2238619327545166, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 196190 + }, + { + "epoch": 1290.7894736842106, + "grad_norm": 1.045121431350708, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 196200 + }, + { + "epoch": 1290.8552631578948, + "grad_norm": 1.2637920379638672, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 196210 + }, + { + "epoch": 1290.921052631579, + "grad_norm": 1.0736345052719116, + "learning_rate": 0.0001, + "loss": 0.0068, + "step": 196220 + }, + { + "epoch": 1290.9868421052631, + "grad_norm": 1.1902934312820435, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 196230 + }, + { + "epoch": 1291.0526315789473, + "grad_norm": 1.206912875175476, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 196240 + }, + { + "epoch": 1291.1184210526317, + "grad_norm": 0.6837294697761536, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 196250 + }, + { + "epoch": 1291.1842105263158, + "grad_norm": 0.8661401271820068, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 196260 + }, + { + "epoch": 1291.25, + "grad_norm": 0.7421029210090637, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 196270 + }, + { + "epoch": 1291.3157894736842, + "grad_norm": 1.000644326210022, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 196280 + }, + { + "epoch": 1291.3815789473683, + "grad_norm": 0.9609134793281555, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 196290 + }, + { + "epoch": 1291.4473684210527, + "grad_norm": 0.8988838791847229, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 196300 + }, + { + "epoch": 1291.5131578947369, + "grad_norm": 1.2548774480819702, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 196310 + }, + { + "epoch": 1291.578947368421, + "grad_norm": 1.2236343622207642, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 196320 + }, + { + "epoch": 1291.6447368421052, + "grad_norm": 0.982333242893219, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 196330 + }, + { + "epoch": 1291.7105263157894, + "grad_norm": 1.1261245012283325, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 196340 + }, + { + "epoch": 1291.7763157894738, + "grad_norm": 1.1063497066497803, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 196350 + }, + { + "epoch": 1291.842105263158, + "grad_norm": 0.8248695731163025, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 196360 + }, + { + "epoch": 1291.907894736842, + "grad_norm": 1.017749309539795, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 196370 + }, + { + "epoch": 1291.9736842105262, + "grad_norm": 0.7296943664550781, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 196380 + }, + { + "epoch": 1292.0394736842106, + "grad_norm": 0.9445720314979553, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 196390 + }, + { + "epoch": 1292.1052631578948, + "grad_norm": 2.0632238388061523, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 196400 + }, + { + "epoch": 1292.171052631579, + "grad_norm": 1.4882488250732422, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 196410 + }, + { + "epoch": 1292.2368421052631, + "grad_norm": 1.1946419477462769, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 196420 + }, + { + "epoch": 1292.3026315789473, + "grad_norm": 1.3664677143096924, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 196430 + }, + { + "epoch": 1292.3684210526317, + "grad_norm": 1.171695590019226, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 196440 + }, + { + "epoch": 1292.4342105263158, + "grad_norm": 1.2387498617172241, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 196450 + }, + { + "epoch": 1292.5, + "grad_norm": 1.0718955993652344, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 196460 + }, + { + "epoch": 1292.5657894736842, + "grad_norm": 1.0958354473114014, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 196470 + }, + { + "epoch": 1292.6315789473683, + "grad_norm": 1.5984266996383667, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 196480 + }, + { + "epoch": 1292.6973684210527, + "grad_norm": 1.1815001964569092, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 196490 + }, + { + "epoch": 1292.7631578947369, + "grad_norm": 1.5802987813949585, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 196500 + }, + { + "epoch": 1292.828947368421, + "grad_norm": 1.259151816368103, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 196510 + }, + { + "epoch": 1292.8947368421052, + "grad_norm": 1.411533236503601, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 196520 + }, + { + "epoch": 1292.9605263157894, + "grad_norm": 0.8868615031242371, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 196530 + }, + { + "epoch": 1293.0263157894738, + "grad_norm": 0.9721224904060364, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 196540 + }, + { + "epoch": 1293.092105263158, + "grad_norm": 1.0972973108291626, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 196550 + }, + { + "epoch": 1293.157894736842, + "grad_norm": 0.9356346130371094, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 196560 + }, + { + "epoch": 1293.2236842105262, + "grad_norm": 1.2877929210662842, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 196570 + }, + { + "epoch": 1293.2894736842106, + "grad_norm": 0.9061616659164429, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 196580 + }, + { + "epoch": 1293.3552631578948, + "grad_norm": 1.1051673889160156, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 196590 + }, + { + "epoch": 1293.421052631579, + "grad_norm": 1.0051326751708984, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 196600 + }, + { + "epoch": 1293.4868421052631, + "grad_norm": 1.1201657056808472, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 196610 + }, + { + "epoch": 1293.5526315789473, + "grad_norm": 0.9499081969261169, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 196620 + }, + { + "epoch": 1293.6184210526317, + "grad_norm": 0.87836092710495, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 196630 + }, + { + "epoch": 1293.6842105263158, + "grad_norm": 0.5928868651390076, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 196640 + }, + { + "epoch": 1293.75, + "grad_norm": 0.7514234781265259, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 196650 + }, + { + "epoch": 1293.8157894736842, + "grad_norm": 0.8907245397567749, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 196660 + }, + { + "epoch": 1293.8815789473683, + "grad_norm": 0.7311668395996094, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 196670 + }, + { + "epoch": 1293.9473684210527, + "grad_norm": 1.2829216718673706, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 196680 + }, + { + "epoch": 1294.0131578947369, + "grad_norm": 0.797347366809845, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 196690 + }, + { + "epoch": 1294.078947368421, + "grad_norm": 1.1499863862991333, + "learning_rate": 0.0001, + "loss": 0.0069, + "step": 196700 + }, + { + "epoch": 1294.1447368421052, + "grad_norm": 1.033013939857483, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 196710 + }, + { + "epoch": 1294.2105263157894, + "grad_norm": 0.9540650248527527, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 196720 + }, + { + "epoch": 1294.2763157894738, + "grad_norm": 1.2450796365737915, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 196730 + }, + { + "epoch": 1294.342105263158, + "grad_norm": 0.8691481947898865, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 196740 + }, + { + "epoch": 1294.407894736842, + "grad_norm": 1.0715320110321045, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 196750 + }, + { + "epoch": 1294.4736842105262, + "grad_norm": 1.0476354360580444, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 196760 + }, + { + "epoch": 1294.5394736842106, + "grad_norm": 0.9503270983695984, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 196770 + }, + { + "epoch": 1294.6052631578948, + "grad_norm": 1.1776961088180542, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 196780 + }, + { + "epoch": 1294.671052631579, + "grad_norm": 0.6950992345809937, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 196790 + }, + { + "epoch": 1294.7368421052631, + "grad_norm": 1.3217885494232178, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 196800 + }, + { + "epoch": 1294.8026315789473, + "grad_norm": 0.9721072912216187, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 196810 + }, + { + "epoch": 1294.8684210526317, + "grad_norm": 0.996015191078186, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 196820 + }, + { + "epoch": 1294.9342105263158, + "grad_norm": 1.1851248741149902, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 196830 + }, + { + "epoch": 1295.0, + "grad_norm": 1.0879162549972534, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 196840 + }, + { + "epoch": 1295.0657894736842, + "grad_norm": 0.9492610692977905, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 196850 + }, + { + "epoch": 1295.1315789473683, + "grad_norm": 1.0667566061019897, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 196860 + }, + { + "epoch": 1295.1973684210527, + "grad_norm": 0.8629001975059509, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 196870 + }, + { + "epoch": 1295.2631578947369, + "grad_norm": 0.9111774563789368, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 196880 + }, + { + "epoch": 1295.328947368421, + "grad_norm": 0.9569632411003113, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 196890 + }, + { + "epoch": 1295.3947368421052, + "grad_norm": 0.8886310458183289, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 196900 + }, + { + "epoch": 1295.4605263157894, + "grad_norm": 1.119640588760376, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 196910 + }, + { + "epoch": 1295.5263157894738, + "grad_norm": 0.7103597521781921, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 196920 + }, + { + "epoch": 1295.592105263158, + "grad_norm": 1.0032562017440796, + "learning_rate": 0.0001, + "loss": 0.007, + "step": 196930 + }, + { + "epoch": 1295.657894736842, + "grad_norm": 0.6882836818695068, + "learning_rate": 0.0001, + "loss": 0.0065, + "step": 196940 + }, + { + "epoch": 1295.7236842105262, + "grad_norm": 1.1084208488464355, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 196950 + }, + { + "epoch": 1295.7894736842106, + "grad_norm": 1.101213812828064, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 196960 + }, + { + "epoch": 1295.8552631578948, + "grad_norm": 1.2237820625305176, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 196970 + }, + { + "epoch": 1295.921052631579, + "grad_norm": 1.164002537727356, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 196980 + }, + { + "epoch": 1295.9868421052631, + "grad_norm": 0.5948657989501953, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 196990 + }, + { + "epoch": 1296.0526315789473, + "grad_norm": 0.8001142144203186, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 197000 + }, + { + "epoch": 1296.1184210526317, + "grad_norm": 0.9679588675498962, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 197010 + }, + { + "epoch": 1296.1842105263158, + "grad_norm": 0.7849892377853394, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 197020 + }, + { + "epoch": 1296.25, + "grad_norm": 0.9673242568969727, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 197030 + }, + { + "epoch": 1296.3157894736842, + "grad_norm": 0.968097984790802, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 197040 + }, + { + "epoch": 1296.3815789473683, + "grad_norm": 0.8999632596969604, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 197050 + }, + { + "epoch": 1296.4473684210527, + "grad_norm": 0.7191337943077087, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 197060 + }, + { + "epoch": 1296.5131578947369, + "grad_norm": 1.1006282567977905, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 197070 + }, + { + "epoch": 1296.578947368421, + "grad_norm": 1.0084878206253052, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 197080 + }, + { + "epoch": 1296.6447368421052, + "grad_norm": 0.9073280096054077, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 197090 + }, + { + "epoch": 1296.7105263157894, + "grad_norm": 0.8795638680458069, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 197100 + }, + { + "epoch": 1296.7763157894738, + "grad_norm": 1.0686129331588745, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 197110 + }, + { + "epoch": 1296.842105263158, + "grad_norm": 0.8052754402160645, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 197120 + }, + { + "epoch": 1296.907894736842, + "grad_norm": 0.8181116580963135, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 197130 + }, + { + "epoch": 1296.9736842105262, + "grad_norm": 1.0895122289657593, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 197140 + }, + { + "epoch": 1297.0394736842106, + "grad_norm": 0.7319552898406982, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 197150 + }, + { + "epoch": 1297.1052631578948, + "grad_norm": 1.2623764276504517, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 197160 + }, + { + "epoch": 1297.171052631579, + "grad_norm": 1.238411784172058, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 197170 + }, + { + "epoch": 1297.2368421052631, + "grad_norm": 1.0991114377975464, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 197180 + }, + { + "epoch": 1297.3026315789473, + "grad_norm": 0.8547118306159973, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 197190 + }, + { + "epoch": 1297.3684210526317, + "grad_norm": 1.1389788389205933, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 197200 + }, + { + "epoch": 1297.4342105263158, + "grad_norm": 1.2709906101226807, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 197210 + }, + { + "epoch": 1297.5, + "grad_norm": 0.9706898331642151, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 197220 + }, + { + "epoch": 1297.5657894736842, + "grad_norm": 0.8353281021118164, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 197230 + }, + { + "epoch": 1297.6315789473683, + "grad_norm": 1.0063377618789673, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 197240 + }, + { + "epoch": 1297.6973684210527, + "grad_norm": 1.0630806684494019, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 197250 + }, + { + "epoch": 1297.7631578947369, + "grad_norm": 0.7940020561218262, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 197260 + }, + { + "epoch": 1297.828947368421, + "grad_norm": 1.0338921546936035, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 197270 + }, + { + "epoch": 1297.8947368421052, + "grad_norm": 1.1591367721557617, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 197280 + }, + { + "epoch": 1297.9605263157894, + "grad_norm": 0.9339560270309448, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 197290 + }, + { + "epoch": 1298.0263157894738, + "grad_norm": 1.0255118608474731, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 197300 + }, + { + "epoch": 1298.092105263158, + "grad_norm": 1.0539302825927734, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 197310 + }, + { + "epoch": 1298.157894736842, + "grad_norm": 0.9863162636756897, + "learning_rate": 0.0001, + "loss": 0.0069, + "step": 197320 + }, + { + "epoch": 1298.2236842105262, + "grad_norm": 1.1610512733459473, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 197330 + }, + { + "epoch": 1298.2894736842106, + "grad_norm": 0.9269651174545288, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 197340 + }, + { + "epoch": 1298.3552631578948, + "grad_norm": 0.8783165812492371, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 197350 + }, + { + "epoch": 1298.421052631579, + "grad_norm": 0.9454626441001892, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 197360 + }, + { + "epoch": 1298.4868421052631, + "grad_norm": 0.9759495854377747, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 197370 + }, + { + "epoch": 1298.5526315789473, + "grad_norm": 1.3680849075317383, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 197380 + }, + { + "epoch": 1298.6184210526317, + "grad_norm": 1.3781981468200684, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 197390 + }, + { + "epoch": 1298.6842105263158, + "grad_norm": 1.1003533601760864, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 197400 + }, + { + "epoch": 1298.75, + "grad_norm": 1.1373398303985596, + "learning_rate": 0.0001, + "loss": 0.011, + "step": 197410 + }, + { + "epoch": 1298.8157894736842, + "grad_norm": 0.7512350082397461, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 197420 + }, + { + "epoch": 1298.8815789473683, + "grad_norm": 1.035928726196289, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 197430 + }, + { + "epoch": 1298.9473684210527, + "grad_norm": 1.0015102624893188, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 197440 + }, + { + "epoch": 1299.0131578947369, + "grad_norm": 1.1075206995010376, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 197450 + }, + { + "epoch": 1299.078947368421, + "grad_norm": 1.2172952890396118, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 197460 + }, + { + "epoch": 1299.1447368421052, + "grad_norm": 1.0585336685180664, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 197470 + }, + { + "epoch": 1299.2105263157894, + "grad_norm": 0.9066409468650818, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 197480 + }, + { + "epoch": 1299.2763157894738, + "grad_norm": 1.2567695379257202, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 197490 + }, + { + "epoch": 1299.342105263158, + "grad_norm": 1.0604040622711182, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 197500 + }, + { + "epoch": 1299.407894736842, + "grad_norm": 1.0578597784042358, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 197510 + }, + { + "epoch": 1299.4736842105262, + "grad_norm": 0.9691057205200195, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 197520 + }, + { + "epoch": 1299.5394736842106, + "grad_norm": 1.079813003540039, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 197530 + }, + { + "epoch": 1299.6052631578948, + "grad_norm": 1.141811490058899, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 197540 + }, + { + "epoch": 1299.671052631579, + "grad_norm": 0.8607233166694641, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 197550 + }, + { + "epoch": 1299.7368421052631, + "grad_norm": 0.815772533416748, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 197560 + }, + { + "epoch": 1299.8026315789473, + "grad_norm": 0.8933233022689819, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 197570 + }, + { + "epoch": 1299.8684210526317, + "grad_norm": 0.9021427035331726, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 197580 + }, + { + "epoch": 1299.9342105263158, + "grad_norm": 0.763812780380249, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 197590 + }, + { + "epoch": 1300.0, + "grad_norm": 0.9004136323928833, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 197600 + }, + { + "epoch": 1300.0657894736842, + "grad_norm": 0.7930411100387573, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 197610 + }, + { + "epoch": 1300.1315789473683, + "grad_norm": 1.1747939586639404, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 197620 + }, + { + "epoch": 1300.1973684210527, + "grad_norm": 0.7587085962295532, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 197630 + }, + { + "epoch": 1300.2631578947369, + "grad_norm": 0.8571013808250427, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 197640 + }, + { + "epoch": 1300.328947368421, + "grad_norm": 1.2042564153671265, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 197650 + }, + { + "epoch": 1300.3947368421052, + "grad_norm": 1.0559279918670654, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 197660 + }, + { + "epoch": 1300.4605263157894, + "grad_norm": 1.0643962621688843, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 197670 + }, + { + "epoch": 1300.5263157894738, + "grad_norm": 1.2402312755584717, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 197680 + }, + { + "epoch": 1300.592105263158, + "grad_norm": 1.1355198621749878, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 197690 + }, + { + "epoch": 1300.657894736842, + "grad_norm": 0.8521161675453186, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 197700 + }, + { + "epoch": 1300.7236842105262, + "grad_norm": 0.7998043894767761, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 197710 + }, + { + "epoch": 1300.7894736842106, + "grad_norm": 1.1688848733901978, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 197720 + }, + { + "epoch": 1300.8552631578948, + "grad_norm": 1.0704905986785889, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 197730 + }, + { + "epoch": 1300.921052631579, + "grad_norm": 0.9947075247764587, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 197740 + }, + { + "epoch": 1300.9868421052631, + "grad_norm": 1.168898582458496, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 197750 + }, + { + "epoch": 1301.0526315789473, + "grad_norm": 1.1258999109268188, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 197760 + }, + { + "epoch": 1301.1184210526317, + "grad_norm": 0.9728697538375854, + "learning_rate": 0.0001, + "loss": 0.0113, + "step": 197770 + }, + { + "epoch": 1301.1842105263158, + "grad_norm": 0.923896849155426, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 197780 + }, + { + "epoch": 1301.25, + "grad_norm": 1.1840898990631104, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 197790 + }, + { + "epoch": 1301.3157894736842, + "grad_norm": 1.1185284852981567, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 197800 + }, + { + "epoch": 1301.3815789473683, + "grad_norm": 0.7468376159667969, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 197810 + }, + { + "epoch": 1301.4473684210527, + "grad_norm": 0.6839302778244019, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 197820 + }, + { + "epoch": 1301.5131578947369, + "grad_norm": 0.9034473299980164, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 197830 + }, + { + "epoch": 1301.578947368421, + "grad_norm": 1.357205867767334, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 197840 + }, + { + "epoch": 1301.6447368421052, + "grad_norm": 1.2651640176773071, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 197850 + }, + { + "epoch": 1301.7105263157894, + "grad_norm": 0.9837512969970703, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 197860 + }, + { + "epoch": 1301.7763157894738, + "grad_norm": 1.1584852933883667, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 197870 + }, + { + "epoch": 1301.842105263158, + "grad_norm": 0.8707510232925415, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 197880 + }, + { + "epoch": 1301.907894736842, + "grad_norm": 1.073400855064392, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 197890 + }, + { + "epoch": 1301.9736842105262, + "grad_norm": 1.0655863285064697, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 197900 + }, + { + "epoch": 1302.0394736842106, + "grad_norm": 0.9237596988677979, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 197910 + }, + { + "epoch": 1302.1052631578948, + "grad_norm": 1.0519744157791138, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 197920 + }, + { + "epoch": 1302.171052631579, + "grad_norm": 0.7526938319206238, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 197930 + }, + { + "epoch": 1302.2368421052631, + "grad_norm": 0.9896842837333679, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 197940 + }, + { + "epoch": 1302.3026315789473, + "grad_norm": 0.7161365151405334, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 197950 + }, + { + "epoch": 1302.3684210526317, + "grad_norm": 1.2991057634353638, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 197960 + }, + { + "epoch": 1302.4342105263158, + "grad_norm": 1.0487098693847656, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 197970 + }, + { + "epoch": 1302.5, + "grad_norm": 1.1503612995147705, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 197980 + }, + { + "epoch": 1302.5657894736842, + "grad_norm": 1.0851579904556274, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 197990 + }, + { + "epoch": 1302.6315789473683, + "grad_norm": 1.2855541706085205, + "learning_rate": 0.0001, + "loss": 0.0066, + "step": 198000 + }, + { + "epoch": 1302.6973684210527, + "grad_norm": 1.1550692319869995, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 198010 + }, + { + "epoch": 1302.7631578947369, + "grad_norm": 0.8146882653236389, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 198020 + }, + { + "epoch": 1302.828947368421, + "grad_norm": 0.6463797092437744, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 198030 + }, + { + "epoch": 1302.8947368421052, + "grad_norm": 1.289481520652771, + "learning_rate": 0.0001, + "loss": 0.0114, + "step": 198040 + }, + { + "epoch": 1302.9605263157894, + "grad_norm": 0.8075095415115356, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 198050 + }, + { + "epoch": 1303.0263157894738, + "grad_norm": 1.2249948978424072, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 198060 + }, + { + "epoch": 1303.092105263158, + "grad_norm": 0.8465560674667358, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 198070 + }, + { + "epoch": 1303.157894736842, + "grad_norm": 0.8450498580932617, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 198080 + }, + { + "epoch": 1303.2236842105262, + "grad_norm": 1.2542957067489624, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 198090 + }, + { + "epoch": 1303.2894736842106, + "grad_norm": 0.9559205770492554, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 198100 + }, + { + "epoch": 1303.3552631578948, + "grad_norm": 0.6290387511253357, + "learning_rate": 0.0001, + "loss": 0.0091, + "step": 198110 + }, + { + "epoch": 1303.421052631579, + "grad_norm": 0.8307482600212097, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 198120 + }, + { + "epoch": 1303.4868421052631, + "grad_norm": 0.7818741202354431, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 198130 + }, + { + "epoch": 1303.5526315789473, + "grad_norm": 0.7035477757453918, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 198140 + }, + { + "epoch": 1303.6184210526317, + "grad_norm": 0.9868258833885193, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 198150 + }, + { + "epoch": 1303.6842105263158, + "grad_norm": 1.5310142040252686, + "learning_rate": 0.0001, + "loss": 0.0127, + "step": 198160 + }, + { + "epoch": 1303.75, + "grad_norm": 1.3995954990386963, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 198170 + }, + { + "epoch": 1303.8157894736842, + "grad_norm": 1.1260607242584229, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 198180 + }, + { + "epoch": 1303.8815789473683, + "grad_norm": 1.8549559116363525, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 198190 + }, + { + "epoch": 1303.9473684210527, + "grad_norm": 1.5865100622177124, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 198200 + }, + { + "epoch": 1304.0131578947369, + "grad_norm": 1.6978919506072998, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 198210 + }, + { + "epoch": 1304.078947368421, + "grad_norm": 1.3336944580078125, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 198220 + }, + { + "epoch": 1304.1447368421052, + "grad_norm": 1.2469390630722046, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 198230 + }, + { + "epoch": 1304.2105263157894, + "grad_norm": 1.321698546409607, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 198240 + }, + { + "epoch": 1304.2763157894738, + "grad_norm": 0.9702907800674438, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 198250 + }, + { + "epoch": 1304.342105263158, + "grad_norm": 1.213561773300171, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 198260 + }, + { + "epoch": 1304.407894736842, + "grad_norm": 1.2231154441833496, + "learning_rate": 0.0001, + "loss": 0.0108, + "step": 198270 + }, + { + "epoch": 1304.4736842105262, + "grad_norm": 1.1985797882080078, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 198280 + }, + { + "epoch": 1304.5394736842106, + "grad_norm": 0.9450637102127075, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 198290 + }, + { + "epoch": 1304.6052631578948, + "grad_norm": 0.9720757007598877, + "learning_rate": 0.0001, + "loss": 0.0096, + "step": 198300 + }, + { + "epoch": 1304.671052631579, + "grad_norm": 0.849722683429718, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 198310 + }, + { + "epoch": 1304.7368421052631, + "grad_norm": 1.1238338947296143, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 198320 + }, + { + "epoch": 1304.8026315789473, + "grad_norm": 1.1874395608901978, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 198330 + }, + { + "epoch": 1304.8684210526317, + "grad_norm": 1.0838104486465454, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 198340 + }, + { + "epoch": 1304.9342105263158, + "grad_norm": 1.154099464416504, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 198350 + }, + { + "epoch": 1305.0, + "grad_norm": 1.3620731830596924, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 198360 + }, + { + "epoch": 1305.0657894736842, + "grad_norm": 1.2841746807098389, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 198370 + }, + { + "epoch": 1305.1315789473683, + "grad_norm": 0.7634298801422119, + "learning_rate": 0.0001, + "loss": 0.0118, + "step": 198380 + }, + { + "epoch": 1305.1973684210527, + "grad_norm": 0.82984858751297, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 198390 + }, + { + "epoch": 1305.2631578947369, + "grad_norm": 1.093189001083374, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 198400 + }, + { + "epoch": 1305.328947368421, + "grad_norm": 0.9606587290763855, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 198410 + }, + { + "epoch": 1305.3947368421052, + "grad_norm": 0.9093089699745178, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 198420 + }, + { + "epoch": 1305.4605263157894, + "grad_norm": 0.9972700476646423, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 198430 + }, + { + "epoch": 1305.5263157894738, + "grad_norm": 0.724269688129425, + "learning_rate": 0.0001, + "loss": 0.012, + "step": 198440 + }, + { + "epoch": 1305.592105263158, + "grad_norm": 0.6498937606811523, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 198450 + }, + { + "epoch": 1305.657894736842, + "grad_norm": 0.9374182224273682, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 198460 + }, + { + "epoch": 1305.7236842105262, + "grad_norm": 1.0840044021606445, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 198470 + }, + { + "epoch": 1305.7894736842106, + "grad_norm": 0.8806322813034058, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 198480 + }, + { + "epoch": 1305.8552631578948, + "grad_norm": 1.1491776704788208, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 198490 + }, + { + "epoch": 1305.921052631579, + "grad_norm": 1.1558951139450073, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 198500 + }, + { + "epoch": 1305.9868421052631, + "grad_norm": 0.8697763085365295, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 198510 + }, + { + "epoch": 1306.0526315789473, + "grad_norm": 0.7972436547279358, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 198520 + }, + { + "epoch": 1306.1184210526317, + "grad_norm": 0.7652742266654968, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 198530 + }, + { + "epoch": 1306.1842105263158, + "grad_norm": 1.0677361488342285, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 198540 + }, + { + "epoch": 1306.25, + "grad_norm": 0.9967530965805054, + "learning_rate": 0.0001, + "loss": 0.0071, + "step": 198550 + }, + { + "epoch": 1306.3157894736842, + "grad_norm": 0.8501853942871094, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 198560 + }, + { + "epoch": 1306.3815789473683, + "grad_norm": 1.3013569116592407, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 198570 + }, + { + "epoch": 1306.4473684210527, + "grad_norm": 0.9409443140029907, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 198580 + }, + { + "epoch": 1306.5131578947369, + "grad_norm": 0.9747039675712585, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 198590 + }, + { + "epoch": 1306.578947368421, + "grad_norm": 1.0571503639221191, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 198600 + }, + { + "epoch": 1306.6447368421052, + "grad_norm": 0.7892196178436279, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 198610 + }, + { + "epoch": 1306.7105263157894, + "grad_norm": 1.2711453437805176, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 198620 + }, + { + "epoch": 1306.7763157894738, + "grad_norm": 1.0646253824234009, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 198630 + }, + { + "epoch": 1306.842105263158, + "grad_norm": 0.9739760160446167, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 198640 + }, + { + "epoch": 1306.907894736842, + "grad_norm": 1.054294466972351, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 198650 + }, + { + "epoch": 1306.9736842105262, + "grad_norm": 0.9815489649772644, + "learning_rate": 0.0001, + "loss": 0.0111, + "step": 198660 + }, + { + "epoch": 1307.0394736842106, + "grad_norm": 1.2176196575164795, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 198670 + }, + { + "epoch": 1307.1052631578948, + "grad_norm": 0.6826998591423035, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 198680 + }, + { + "epoch": 1307.171052631579, + "grad_norm": 0.6777370572090149, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 198690 + }, + { + "epoch": 1307.2368421052631, + "grad_norm": 0.7783268690109253, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 198700 + }, + { + "epoch": 1307.3026315789473, + "grad_norm": 0.9428658485412598, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 198710 + }, + { + "epoch": 1307.3684210526317, + "grad_norm": 0.6998884081840515, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 198720 + }, + { + "epoch": 1307.4342105263158, + "grad_norm": 0.6625298261642456, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 198730 + }, + { + "epoch": 1307.5, + "grad_norm": 0.868622362613678, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 198740 + }, + { + "epoch": 1307.5657894736842, + "grad_norm": 0.9482408165931702, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 198750 + }, + { + "epoch": 1307.6315789473683, + "grad_norm": 0.9562262296676636, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 198760 + }, + { + "epoch": 1307.6973684210527, + "grad_norm": 1.1496707201004028, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 198770 + }, + { + "epoch": 1307.7631578947369, + "grad_norm": 1.1873894929885864, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 198780 + }, + { + "epoch": 1307.828947368421, + "grad_norm": 1.321275234222412, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 198790 + }, + { + "epoch": 1307.8947368421052, + "grad_norm": 1.3467177152633667, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 198800 + }, + { + "epoch": 1307.9605263157894, + "grad_norm": 1.229738473892212, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 198810 + }, + { + "epoch": 1308.0263157894738, + "grad_norm": 1.2855587005615234, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 198820 + }, + { + "epoch": 1308.092105263158, + "grad_norm": 0.9853659272193909, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 198830 + }, + { + "epoch": 1308.157894736842, + "grad_norm": 1.0061135292053223, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 198840 + }, + { + "epoch": 1308.2236842105262, + "grad_norm": 0.6921624541282654, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 198850 + }, + { + "epoch": 1308.2894736842106, + "grad_norm": 0.9409551024436951, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 198860 + }, + { + "epoch": 1308.3552631578948, + "grad_norm": 1.0803308486938477, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 198870 + }, + { + "epoch": 1308.421052631579, + "grad_norm": 1.107574224472046, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 198880 + }, + { + "epoch": 1308.4868421052631, + "grad_norm": 1.182572603225708, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 198890 + }, + { + "epoch": 1308.5526315789473, + "grad_norm": 1.107548713684082, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 198900 + }, + { + "epoch": 1308.6184210526317, + "grad_norm": 1.1584941148757935, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 198910 + }, + { + "epoch": 1308.6842105263158, + "grad_norm": 1.4747897386550903, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 198920 + }, + { + "epoch": 1308.75, + "grad_norm": 1.3047585487365723, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 198930 + }, + { + "epoch": 1308.8157894736842, + "grad_norm": 1.0019290447235107, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 198940 + }, + { + "epoch": 1308.8815789473683, + "grad_norm": 0.7977175712585449, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 198950 + }, + { + "epoch": 1308.9473684210527, + "grad_norm": 1.2986831665039062, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 198960 + }, + { + "epoch": 1309.0131578947369, + "grad_norm": 1.226833701133728, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 198970 + }, + { + "epoch": 1309.078947368421, + "grad_norm": 0.9073089957237244, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 198980 + }, + { + "epoch": 1309.1447368421052, + "grad_norm": 0.9509008526802063, + "learning_rate": 0.0001, + "loss": 0.0073, + "step": 198990 + }, + { + "epoch": 1309.2105263157894, + "grad_norm": 1.083253026008606, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 199000 + }, + { + "epoch": 1309.2763157894738, + "grad_norm": 0.731686532497406, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 199010 + }, + { + "epoch": 1309.342105263158, + "grad_norm": 1.5306459665298462, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 199020 + }, + { + "epoch": 1309.407894736842, + "grad_norm": 1.169585108757019, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 199030 + }, + { + "epoch": 1309.4736842105262, + "grad_norm": 1.225286841392517, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 199040 + }, + { + "epoch": 1309.5394736842106, + "grad_norm": 1.1703866720199585, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 199050 + }, + { + "epoch": 1309.6052631578948, + "grad_norm": 1.381737470626831, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 199060 + }, + { + "epoch": 1309.671052631579, + "grad_norm": 0.9777848720550537, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 199070 + }, + { + "epoch": 1309.7368421052631, + "grad_norm": 1.1087908744812012, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 199080 + }, + { + "epoch": 1309.8026315789473, + "grad_norm": 1.5419447422027588, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 199090 + }, + { + "epoch": 1309.8684210526317, + "grad_norm": 1.1349118947982788, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 199100 + }, + { + "epoch": 1309.9342105263158, + "grad_norm": 1.1746926307678223, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 199110 + }, + { + "epoch": 1310.0, + "grad_norm": 0.8852264881134033, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 199120 + }, + { + "epoch": 1310.0657894736842, + "grad_norm": 1.0382320880889893, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 199130 + }, + { + "epoch": 1310.1315789473683, + "grad_norm": 1.3453278541564941, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 199140 + }, + { + "epoch": 1310.1973684210527, + "grad_norm": 1.1044265031814575, + "learning_rate": 0.0001, + "loss": 0.0085, + "step": 199150 + }, + { + "epoch": 1310.2631578947369, + "grad_norm": 0.6985461711883545, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 199160 + }, + { + "epoch": 1310.328947368421, + "grad_norm": 1.2022053003311157, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 199170 + }, + { + "epoch": 1310.3947368421052, + "grad_norm": 1.3073654174804688, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 199180 + }, + { + "epoch": 1310.4605263157894, + "grad_norm": 0.9767325520515442, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 199190 + }, + { + "epoch": 1310.5263157894738, + "grad_norm": 0.7886372208595276, + "learning_rate": 0.0001, + "loss": 0.0083, + "step": 199200 + }, + { + "epoch": 1310.592105263158, + "grad_norm": 1.1879262924194336, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 199210 + }, + { + "epoch": 1310.657894736842, + "grad_norm": 1.3642725944519043, + "learning_rate": 0.0001, + "loss": 0.0072, + "step": 199220 + }, + { + "epoch": 1310.7236842105262, + "grad_norm": 0.7498663663864136, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 199230 + }, + { + "epoch": 1310.7894736842106, + "grad_norm": 0.6963070034980774, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 199240 + }, + { + "epoch": 1310.8552631578948, + "grad_norm": 1.091447114944458, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 199250 + }, + { + "epoch": 1310.921052631579, + "grad_norm": 0.7097912430763245, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 199260 + }, + { + "epoch": 1310.9868421052631, + "grad_norm": 0.9089406728744507, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 199270 + }, + { + "epoch": 1311.0526315789473, + "grad_norm": 1.0399571657180786, + "learning_rate": 0.0001, + "loss": 0.0104, + "step": 199280 + }, + { + "epoch": 1311.1184210526317, + "grad_norm": 0.8863736987113953, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 199290 + }, + { + "epoch": 1311.1842105263158, + "grad_norm": 0.7154498100280762, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 199300 + }, + { + "epoch": 1311.25, + "grad_norm": 0.644045352935791, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 199310 + }, + { + "epoch": 1311.3157894736842, + "grad_norm": 1.04582941532135, + "learning_rate": 0.0001, + "loss": 0.0075, + "step": 199320 + }, + { + "epoch": 1311.3815789473683, + "grad_norm": 0.9396390318870544, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 199330 + }, + { + "epoch": 1311.4473684210527, + "grad_norm": 0.959603488445282, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 199340 + }, + { + "epoch": 1311.5131578947369, + "grad_norm": 1.1740236282348633, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 199350 + }, + { + "epoch": 1311.578947368421, + "grad_norm": 0.7795823216438293, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 199360 + }, + { + "epoch": 1311.6447368421052, + "grad_norm": 1.1692177057266235, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 199370 + }, + { + "epoch": 1311.7105263157894, + "grad_norm": 1.053812026977539, + "learning_rate": 0.0001, + "loss": 0.0069, + "step": 199380 + }, + { + "epoch": 1311.7763157894738, + "grad_norm": 1.0118703842163086, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 199390 + }, + { + "epoch": 1311.842105263158, + "grad_norm": 0.7939363718032837, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 199400 + }, + { + "epoch": 1311.907894736842, + "grad_norm": 1.0312461853027344, + "learning_rate": 0.0001, + "loss": 0.0084, + "step": 199410 + }, + { + "epoch": 1311.9736842105262, + "grad_norm": 1.237866759300232, + "learning_rate": 0.0001, + "loss": 0.0074, + "step": 199420 + }, + { + "epoch": 1312.0394736842106, + "grad_norm": 1.2162584066390991, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 199430 + }, + { + "epoch": 1312.1052631578948, + "grad_norm": 0.9375420212745667, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 199440 + }, + { + "epoch": 1312.171052631579, + "grad_norm": 1.0901834964752197, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 199450 + }, + { + "epoch": 1312.2368421052631, + "grad_norm": 1.2144420146942139, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 199460 + }, + { + "epoch": 1312.3026315789473, + "grad_norm": 1.3519539833068848, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 199470 + }, + { + "epoch": 1312.3684210526317, + "grad_norm": 0.9426065683364868, + "learning_rate": 0.0001, + "loss": 0.0082, + "step": 199480 + }, + { + "epoch": 1312.4342105263158, + "grad_norm": 0.7949636578559875, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 199490 + }, + { + "epoch": 1312.5, + "grad_norm": 1.197507619857788, + "learning_rate": 0.0001, + "loss": 0.0098, + "step": 199500 + }, + { + "epoch": 1312.5657894736842, + "grad_norm": 1.0171011686325073, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 199510 + }, + { + "epoch": 1312.6315789473683, + "grad_norm": 0.9687016010284424, + "learning_rate": 0.0001, + "loss": 0.0078, + "step": 199520 + }, + { + "epoch": 1312.6973684210527, + "grad_norm": 1.1884241104125977, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 199530 + }, + { + "epoch": 1312.7631578947369, + "grad_norm": 1.3759465217590332, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 199540 + }, + { + "epoch": 1312.828947368421, + "grad_norm": 1.368860125541687, + "learning_rate": 0.0001, + "loss": 0.0089, + "step": 199550 + }, + { + "epoch": 1312.8947368421052, + "grad_norm": 1.2088021039962769, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 199560 + }, + { + "epoch": 1312.9605263157894, + "grad_norm": 0.524134635925293, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 199570 + }, + { + "epoch": 1313.0263157894738, + "grad_norm": 1.3162397146224976, + "learning_rate": 0.0001, + "loss": 0.009, + "step": 199580 + }, + { + "epoch": 1313.092105263158, + "grad_norm": 1.575825810432434, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 199590 + }, + { + "epoch": 1313.157894736842, + "grad_norm": 1.1224581003189087, + "learning_rate": 0.0001, + "loss": 0.0076, + "step": 199600 + }, + { + "epoch": 1313.2236842105262, + "grad_norm": 1.0737751722335815, + "learning_rate": 0.0001, + "loss": 0.0097, + "step": 199610 + }, + { + "epoch": 1313.2894736842106, + "grad_norm": 0.9804463982582092, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 199620 + }, + { + "epoch": 1313.3552631578948, + "grad_norm": 0.9724069833755493, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 199630 + }, + { + "epoch": 1313.421052631579, + "grad_norm": 1.1374396085739136, + "learning_rate": 0.0001, + "loss": 0.0117, + "step": 199640 + }, + { + "epoch": 1313.4868421052631, + "grad_norm": 0.8667737245559692, + "learning_rate": 0.0001, + "loss": 0.0077, + "step": 199650 + }, + { + "epoch": 1313.5526315789473, + "grad_norm": 0.9983107447624207, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 199660 + }, + { + "epoch": 1313.6184210526317, + "grad_norm": 1.2259620428085327, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 199670 + }, + { + "epoch": 1313.6842105263158, + "grad_norm": 0.9851988554000854, + "learning_rate": 0.0001, + "loss": 0.0105, + "step": 199680 + }, + { + "epoch": 1313.75, + "grad_norm": 0.5888131260871887, + "learning_rate": 0.0001, + "loss": 0.0101, + "step": 199690 + }, + { + "epoch": 1313.8157894736842, + "grad_norm": 0.9664670825004578, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 199700 + }, + { + "epoch": 1313.8815789473683, + "grad_norm": 0.9594868421554565, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 199710 + }, + { + "epoch": 1313.9473684210527, + "grad_norm": 0.9215731620788574, + "learning_rate": 0.0001, + "loss": 0.0092, + "step": 199720 + }, + { + "epoch": 1314.0131578947369, + "grad_norm": 1.1164031028747559, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 199730 + }, + { + "epoch": 1314.078947368421, + "grad_norm": 0.9203608632087708, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 199740 + }, + { + "epoch": 1314.1447368421052, + "grad_norm": 0.9199415445327759, + "learning_rate": 0.0001, + "loss": 0.0116, + "step": 199750 + }, + { + "epoch": 1314.2105263157894, + "grad_norm": 1.0740537643432617, + "learning_rate": 0.0001, + "loss": 0.0099, + "step": 199760 + }, + { + "epoch": 1314.2763157894738, + "grad_norm": 0.9334034323692322, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 199770 + }, + { + "epoch": 1314.342105263158, + "grad_norm": 1.0269443988800049, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 199780 + }, + { + "epoch": 1314.407894736842, + "grad_norm": 0.8074368834495544, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 199790 + }, + { + "epoch": 1314.4736842105262, + "grad_norm": 0.929581880569458, + "learning_rate": 0.0001, + "loss": 0.0094, + "step": 199800 + }, + { + "epoch": 1314.5394736842106, + "grad_norm": 0.8619239926338196, + "learning_rate": 0.0001, + "loss": 0.0088, + "step": 199810 + }, + { + "epoch": 1314.6052631578948, + "grad_norm": 1.4143906831741333, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 199820 + }, + { + "epoch": 1314.671052631579, + "grad_norm": 1.1911793947219849, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 199830 + }, + { + "epoch": 1314.7368421052631, + "grad_norm": 1.1507445573806763, + "learning_rate": 0.0001, + "loss": 0.0103, + "step": 199840 + }, + { + "epoch": 1314.8026315789473, + "grad_norm": 1.1112452745437622, + "learning_rate": 0.0001, + "loss": 0.0106, + "step": 199850 + }, + { + "epoch": 1314.8684210526317, + "grad_norm": 1.0850579738616943, + "learning_rate": 0.0001, + "loss": 0.0087, + "step": 199860 + }, + { + "epoch": 1314.9342105263158, + "grad_norm": 1.0612223148345947, + "learning_rate": 0.0001, + "loss": 0.01, + "step": 199870 + }, + { + "epoch": 1315.0, + "grad_norm": 0.6841676831245422, + "learning_rate": 0.0001, + "loss": 0.0107, + "step": 199880 + }, + { + "epoch": 1315.0657894736842, + "grad_norm": 0.7603421211242676, + "learning_rate": 0.0001, + "loss": 0.0093, + "step": 199890 + }, + { + "epoch": 1315.1315789473683, + "grad_norm": 0.862991988658905, + "learning_rate": 0.0001, + "loss": 0.0112, + "step": 199900 + }, + { + "epoch": 1315.1973684210527, + "grad_norm": 0.9214925169944763, + "learning_rate": 0.0001, + "loss": 0.0119, + "step": 199910 + }, + { + "epoch": 1315.2631578947369, + "grad_norm": 1.2615870237350464, + "learning_rate": 0.0001, + "loss": 0.0109, + "step": 199920 + }, + { + "epoch": 1315.328947368421, + "grad_norm": 1.203710675239563, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 199930 + }, + { + "epoch": 1315.3947368421052, + "grad_norm": 1.0401464700698853, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 199940 + }, + { + "epoch": 1315.4605263157894, + "grad_norm": 0.9347955584526062, + "learning_rate": 0.0001, + "loss": 0.0095, + "step": 199950 + }, + { + "epoch": 1315.5263157894738, + "grad_norm": 0.8496779799461365, + "learning_rate": 0.0001, + "loss": 0.0102, + "step": 199960 + }, + { + "epoch": 1315.592105263158, + "grad_norm": 1.1234538555145264, + "learning_rate": 0.0001, + "loss": 0.0081, + "step": 199970 + }, + { + "epoch": 1315.657894736842, + "grad_norm": 1.2898207902908325, + "learning_rate": 0.0001, + "loss": 0.008, + "step": 199980 + }, + { + "epoch": 1315.7236842105262, + "grad_norm": 0.773065984249115, + "learning_rate": 0.0001, + "loss": 0.0086, + "step": 199990 + }, + { + "epoch": 1315.7894736842106, + "grad_norm": 1.2071137428283691, + "learning_rate": 0.0001, + "loss": 0.0079, + "step": 200000 + }, + { + "epoch": 1315.7894736842106, + "step": 200000, + "total_flos": 0.0, + "train_loss": 0.01848614209484309, + "train_runtime": 93663.6535, + "train_samples_per_second": 136.659, + "train_steps_per_second": 2.135 + } + ], + "logging_steps": 10, + "max_steps": 200000, + "num_input_tokens_seen": 0, + "num_train_epochs": 1316, + "save_steps": 5000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +}