{ "best_metric": null, "best_model_checkpoint": null, "epoch": 99.97123130034522, "eval_steps": 500, "global_step": 347500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0028768699654775605, "grad_norm": 1.419504165649414, "learning_rate": 0.0019999424626006906, "loss": 3.3594, "step": 10 }, { "epoch": 0.005753739930955121, "grad_norm": 1.1211611032485962, "learning_rate": 0.0019998849252013807, "loss": 3.0838, "step": 20 }, { "epoch": 0.00863060989643268, "grad_norm": 1.4362672567367554, "learning_rate": 0.0019998273878020713, "loss": 2.7944, "step": 30 }, { "epoch": 0.011507479861910242, "grad_norm": 1.532305121421814, "learning_rate": 0.001999769850402762, "loss": 2.4625, "step": 40 }, { "epoch": 0.014384349827387802, "grad_norm": 3.1493453979492188, "learning_rate": 0.0019997123130034524, "loss": 2.258, "step": 50 }, { "epoch": 0.01726121979286536, "grad_norm": 1.2425230741500854, "learning_rate": 0.0019996547756041426, "loss": 1.9695, "step": 60 }, { "epoch": 0.020138089758342925, "grad_norm": 6.2710771560668945, "learning_rate": 0.001999597238204833, "loss": 2.1391, "step": 70 }, { "epoch": 0.023014959723820484, "grad_norm": 1.063030481338501, "learning_rate": 0.0019995397008055237, "loss": 2.1663, "step": 80 }, { "epoch": 0.025891829689298044, "grad_norm": 1.093984603881836, "learning_rate": 0.0019994821634062142, "loss": 1.9186, "step": 90 }, { "epoch": 0.028768699654775604, "grad_norm": 1.0841283798217773, "learning_rate": 0.0019994246260069044, "loss": 1.7937, "step": 100 }, { "epoch": 0.03164556962025317, "grad_norm": 1.093656063079834, "learning_rate": 0.001999367088607595, "loss": 1.8707, "step": 110 }, { "epoch": 0.03452243958573072, "grad_norm": 1.0813652276992798, "learning_rate": 0.0019993095512082855, "loss": 1.7024, "step": 120 }, { "epoch": 0.037399309551208286, "grad_norm": 2.0296854972839355, "learning_rate": 0.0019992520138089756, "loss": 1.8544, "step": 130 }, { "epoch": 0.04027617951668585, "grad_norm": 1.7927192449569702, "learning_rate": 0.001999194476409666, "loss": 1.5814, "step": 140 }, { "epoch": 0.043153049482163405, "grad_norm": 1.2801340818405151, "learning_rate": 0.0019991369390103568, "loss": 1.7538, "step": 150 }, { "epoch": 0.04602991944764097, "grad_norm": 2.2886276245117188, "learning_rate": 0.0019990794016110473, "loss": 1.8289, "step": 160 }, { "epoch": 0.048906789413118525, "grad_norm": 1.1125091314315796, "learning_rate": 0.001999021864211738, "loss": 1.6972, "step": 170 }, { "epoch": 0.05178365937859609, "grad_norm": 1.0439566373825073, "learning_rate": 0.001998964326812428, "loss": 1.6807, "step": 180 }, { "epoch": 0.05466052934407365, "grad_norm": 1.2183302640914917, "learning_rate": 0.0019989067894131186, "loss": 1.5161, "step": 190 }, { "epoch": 0.05753739930955121, "grad_norm": 1.1651142835617065, "learning_rate": 0.001998849252013809, "loss": 1.7126, "step": 200 }, { "epoch": 0.06041426927502877, "grad_norm": 1.8648242950439453, "learning_rate": 0.0019987917146144993, "loss": 1.7528, "step": 210 }, { "epoch": 0.06329113924050633, "grad_norm": 1.8443872928619385, "learning_rate": 0.00199873417721519, "loss": 1.5103, "step": 220 }, { "epoch": 0.0661680092059839, "grad_norm": 1.3439974784851074, "learning_rate": 0.0019986766398158804, "loss": 1.6984, "step": 230 }, { "epoch": 0.06904487917146145, "grad_norm": 1.5209980010986328, "learning_rate": 0.0019986191024165705, "loss": 1.6114, "step": 240 }, { "epoch": 0.07192174913693901, "grad_norm": 1.662189245223999, "learning_rate": 0.001998561565017261, "loss": 1.6044, "step": 250 }, { "epoch": 0.07479861910241657, "grad_norm": 1.3202059268951416, "learning_rate": 0.0019985040276179517, "loss": 1.5029, "step": 260 }, { "epoch": 0.07767548906789414, "grad_norm": 1.0785328149795532, "learning_rate": 0.0019984464902186422, "loss": 1.6662, "step": 270 }, { "epoch": 0.0805523590333717, "grad_norm": 1.365518569946289, "learning_rate": 0.001998388952819333, "loss": 1.7472, "step": 280 }, { "epoch": 0.08342922899884925, "grad_norm": 1.9179788827896118, "learning_rate": 0.001998331415420023, "loss": 1.5191, "step": 290 }, { "epoch": 0.08630609896432681, "grad_norm": 1.9505764245986938, "learning_rate": 0.0019982738780207135, "loss": 1.6792, "step": 300 }, { "epoch": 0.08918296892980437, "grad_norm": 0.7513627409934998, "learning_rate": 0.001998216340621404, "loss": 1.4233, "step": 310 }, { "epoch": 0.09205983889528194, "grad_norm": 1.8972502946853638, "learning_rate": 0.001998158803222094, "loss": 1.4923, "step": 320 }, { "epoch": 0.0949367088607595, "grad_norm": 1.1989878416061401, "learning_rate": 0.0019981012658227847, "loss": 1.446, "step": 330 }, { "epoch": 0.09781357882623705, "grad_norm": 1.1401382684707642, "learning_rate": 0.0019980437284234753, "loss": 1.6401, "step": 340 }, { "epoch": 0.10069044879171461, "grad_norm": 1.7582546472549438, "learning_rate": 0.001997986191024166, "loss": 1.4546, "step": 350 }, { "epoch": 0.10356731875719218, "grad_norm": 2.004795551300049, "learning_rate": 0.001997928653624856, "loss": 1.5469, "step": 360 }, { "epoch": 0.10644418872266974, "grad_norm": 1.1646779775619507, "learning_rate": 0.0019978711162255466, "loss": 1.477, "step": 370 }, { "epoch": 0.1093210586881473, "grad_norm": 2.4609429836273193, "learning_rate": 0.001997813578826237, "loss": 1.5681, "step": 380 }, { "epoch": 0.11219792865362485, "grad_norm": 1.7073465585708618, "learning_rate": 0.0019977560414269277, "loss": 1.4822, "step": 390 }, { "epoch": 0.11507479861910241, "grad_norm": 1.2496469020843506, "learning_rate": 0.0019976985040276183, "loss": 1.37, "step": 400 }, { "epoch": 0.11795166858457998, "grad_norm": 1.9079952239990234, "learning_rate": 0.0019976409666283084, "loss": 1.4909, "step": 410 }, { "epoch": 0.12082853855005754, "grad_norm": 1.1610264778137207, "learning_rate": 0.001997583429228999, "loss": 1.3676, "step": 420 }, { "epoch": 0.1237054085155351, "grad_norm": 1.2856301069259644, "learning_rate": 0.001997525891829689, "loss": 1.3672, "step": 430 }, { "epoch": 0.12658227848101267, "grad_norm": 1.279110074043274, "learning_rate": 0.0019974683544303797, "loss": 1.3617, "step": 440 }, { "epoch": 0.12945914844649023, "grad_norm": 1.6225695610046387, "learning_rate": 0.00199741081703107, "loss": 1.3535, "step": 450 }, { "epoch": 0.1323360184119678, "grad_norm": 1.4623076915740967, "learning_rate": 0.0019973532796317608, "loss": 1.5523, "step": 460 }, { "epoch": 0.13521288837744533, "grad_norm": 1.317874550819397, "learning_rate": 0.0019972957422324513, "loss": 1.3043, "step": 470 }, { "epoch": 0.1380897583429229, "grad_norm": 1.4665484428405762, "learning_rate": 0.0019972382048331415, "loss": 1.3322, "step": 480 }, { "epoch": 0.14096662830840045, "grad_norm": 1.6096346378326416, "learning_rate": 0.001997180667433832, "loss": 1.2078, "step": 490 }, { "epoch": 0.14384349827387802, "grad_norm": 0.7990589737892151, "learning_rate": 0.0019971231300345226, "loss": 1.4591, "step": 500 }, { "epoch": 0.14672036823935558, "grad_norm": 2.5407285690307617, "learning_rate": 0.001997065592635213, "loss": 1.4772, "step": 510 }, { "epoch": 0.14959723820483314, "grad_norm": 0.8593369722366333, "learning_rate": 0.0019970080552359033, "loss": 1.3858, "step": 520 }, { "epoch": 0.1524741081703107, "grad_norm": 1.2544381618499756, "learning_rate": 0.001996950517836594, "loss": 1.5497, "step": 530 }, { "epoch": 0.15535097813578827, "grad_norm": 0.9747873544692993, "learning_rate": 0.001996892980437284, "loss": 1.4721, "step": 540 }, { "epoch": 0.15822784810126583, "grad_norm": 1.4865474700927734, "learning_rate": 0.0019968354430379746, "loss": 1.4513, "step": 550 }, { "epoch": 0.1611047180667434, "grad_norm": 1.0564674139022827, "learning_rate": 0.001996777905638665, "loss": 1.6666, "step": 560 }, { "epoch": 0.16398158803222093, "grad_norm": 1.555554747581482, "learning_rate": 0.0019967203682393557, "loss": 1.179, "step": 570 }, { "epoch": 0.1668584579976985, "grad_norm": 1.1934908628463745, "learning_rate": 0.0019966628308400462, "loss": 1.2644, "step": 580 }, { "epoch": 0.16973532796317606, "grad_norm": 5.0148725509643555, "learning_rate": 0.001996605293440737, "loss": 1.3785, "step": 590 }, { "epoch": 0.17261219792865362, "grad_norm": 1.4111299514770508, "learning_rate": 0.001996547756041427, "loss": 1.3436, "step": 600 }, { "epoch": 0.17548906789413118, "grad_norm": 0.9251930117607117, "learning_rate": 0.0019964902186421175, "loss": 1.2073, "step": 610 }, { "epoch": 0.17836593785960875, "grad_norm": 2.7978174686431885, "learning_rate": 0.001996432681242808, "loss": 1.165, "step": 620 }, { "epoch": 0.1812428078250863, "grad_norm": 1.0367852449417114, "learning_rate": 0.001996375143843498, "loss": 1.354, "step": 630 }, { "epoch": 0.18411967779056387, "grad_norm": 1.6369867324829102, "learning_rate": 0.0019963176064441888, "loss": 1.348, "step": 640 }, { "epoch": 0.18699654775604144, "grad_norm": 1.8084880113601685, "learning_rate": 0.0019962600690448793, "loss": 1.3365, "step": 650 }, { "epoch": 0.189873417721519, "grad_norm": 1.4041026830673218, "learning_rate": 0.0019962025316455695, "loss": 1.3326, "step": 660 }, { "epoch": 0.19275028768699654, "grad_norm": 1.1745176315307617, "learning_rate": 0.00199614499424626, "loss": 1.4871, "step": 670 }, { "epoch": 0.1956271576524741, "grad_norm": 0.9133937358856201, "learning_rate": 0.0019960874568469506, "loss": 1.2391, "step": 680 }, { "epoch": 0.19850402761795166, "grad_norm": 0.9865404367446899, "learning_rate": 0.001996029919447641, "loss": 1.2137, "step": 690 }, { "epoch": 0.20138089758342922, "grad_norm": 1.4198219776153564, "learning_rate": 0.0019959723820483317, "loss": 1.2488, "step": 700 }, { "epoch": 0.2042577675489068, "grad_norm": 1.0046555995941162, "learning_rate": 0.001995914844649022, "loss": 1.2267, "step": 710 }, { "epoch": 0.20713463751438435, "grad_norm": 1.1232091188430786, "learning_rate": 0.0019958573072497124, "loss": 1.4553, "step": 720 }, { "epoch": 0.21001150747986191, "grad_norm": 1.8577485084533691, "learning_rate": 0.001995799769850403, "loss": 1.2269, "step": 730 }, { "epoch": 0.21288837744533948, "grad_norm": 1.081331729888916, "learning_rate": 0.001995742232451093, "loss": 1.3561, "step": 740 }, { "epoch": 0.21576524741081704, "grad_norm": 0.8581048846244812, "learning_rate": 0.0019956846950517837, "loss": 1.2755, "step": 750 }, { "epoch": 0.2186421173762946, "grad_norm": 1.028489351272583, "learning_rate": 0.0019956271576524742, "loss": 1.2155, "step": 760 }, { "epoch": 0.22151898734177214, "grad_norm": 1.7235716581344604, "learning_rate": 0.001995569620253165, "loss": 1.262, "step": 770 }, { "epoch": 0.2243958573072497, "grad_norm": 1.1932514905929565, "learning_rate": 0.001995512082853855, "loss": 1.3043, "step": 780 }, { "epoch": 0.22727272727272727, "grad_norm": 1.65322744846344, "learning_rate": 0.0019954545454545455, "loss": 1.4386, "step": 790 }, { "epoch": 0.23014959723820483, "grad_norm": 0.9812535047531128, "learning_rate": 0.001995397008055236, "loss": 1.2044, "step": 800 }, { "epoch": 0.2330264672036824, "grad_norm": 1.613416314125061, "learning_rate": 0.0019953394706559266, "loss": 1.2277, "step": 810 }, { "epoch": 0.23590333716915995, "grad_norm": 1.821332335472107, "learning_rate": 0.0019952819332566167, "loss": 1.158, "step": 820 }, { "epoch": 0.23878020713463752, "grad_norm": 1.1805943250656128, "learning_rate": 0.0019952243958573073, "loss": 1.1717, "step": 830 }, { "epoch": 0.24165707710011508, "grad_norm": 2.5672314167022705, "learning_rate": 0.001995166858457998, "loss": 1.4009, "step": 840 }, { "epoch": 0.24453394706559264, "grad_norm": 0.712100625038147, "learning_rate": 0.001995109321058688, "loss": 1.1548, "step": 850 }, { "epoch": 0.2474108170310702, "grad_norm": 1.2638142108917236, "learning_rate": 0.0019950517836593786, "loss": 1.25, "step": 860 }, { "epoch": 0.25028768699654774, "grad_norm": 2.099332094192505, "learning_rate": 0.001994994246260069, "loss": 1.1792, "step": 870 }, { "epoch": 0.25316455696202533, "grad_norm": 1.0583020448684692, "learning_rate": 0.0019949367088607597, "loss": 1.1354, "step": 880 }, { "epoch": 0.25604142692750287, "grad_norm": 1.296217441558838, "learning_rate": 0.00199487917146145, "loss": 1.2825, "step": 890 }, { "epoch": 0.25891829689298046, "grad_norm": 1.9236681461334229, "learning_rate": 0.0019948216340621404, "loss": 1.2062, "step": 900 }, { "epoch": 0.261795166858458, "grad_norm": 1.7678754329681396, "learning_rate": 0.001994764096662831, "loss": 1.3499, "step": 910 }, { "epoch": 0.2646720368239356, "grad_norm": 0.9345505833625793, "learning_rate": 0.0019947065592635215, "loss": 1.2175, "step": 920 }, { "epoch": 0.2675489067894131, "grad_norm": 2.1252307891845703, "learning_rate": 0.0019946490218642116, "loss": 1.379, "step": 930 }, { "epoch": 0.27042577675489066, "grad_norm": 1.3713995218276978, "learning_rate": 0.001994591484464902, "loss": 1.3862, "step": 940 }, { "epoch": 0.27330264672036825, "grad_norm": 0.9749099016189575, "learning_rate": 0.0019945339470655928, "loss": 1.2285, "step": 950 }, { "epoch": 0.2761795166858458, "grad_norm": 1.0502796173095703, "learning_rate": 0.001994476409666283, "loss": 1.316, "step": 960 }, { "epoch": 0.2790563866513234, "grad_norm": 1.960335612297058, "learning_rate": 0.0019944188722669735, "loss": 1.3467, "step": 970 }, { "epoch": 0.2819332566168009, "grad_norm": 1.0714914798736572, "learning_rate": 0.001994361334867664, "loss": 1.147, "step": 980 }, { "epoch": 0.2848101265822785, "grad_norm": 0.8825795650482178, "learning_rate": 0.0019943037974683546, "loss": 1.0933, "step": 990 }, { "epoch": 0.28768699654775604, "grad_norm": 1.2604235410690308, "learning_rate": 0.001994246260069045, "loss": 1.3006, "step": 1000 }, { "epoch": 0.2905638665132336, "grad_norm": 1.0448836088180542, "learning_rate": 0.0019941887226697353, "loss": 1.4282, "step": 1010 }, { "epoch": 0.29344073647871116, "grad_norm": 1.2276064157485962, "learning_rate": 0.001994131185270426, "loss": 1.2927, "step": 1020 }, { "epoch": 0.2963176064441887, "grad_norm": 0.8656520843505859, "learning_rate": 0.0019940736478711164, "loss": 1.2802, "step": 1030 }, { "epoch": 0.2991944764096663, "grad_norm": 1.5254170894622803, "learning_rate": 0.0019940161104718065, "loss": 1.1968, "step": 1040 }, { "epoch": 0.3020713463751438, "grad_norm": 1.1627863645553589, "learning_rate": 0.001993958573072497, "loss": 1.1506, "step": 1050 }, { "epoch": 0.3049482163406214, "grad_norm": 1.5869239568710327, "learning_rate": 0.0019939010356731877, "loss": 1.1081, "step": 1060 }, { "epoch": 0.30782508630609895, "grad_norm": 1.44916832447052, "learning_rate": 0.001993843498273878, "loss": 1.1683, "step": 1070 }, { "epoch": 0.31070195627157654, "grad_norm": 1.518163800239563, "learning_rate": 0.0019937859608745684, "loss": 1.2482, "step": 1080 }, { "epoch": 0.3135788262370541, "grad_norm": 0.9846799969673157, "learning_rate": 0.001993728423475259, "loss": 1.36, "step": 1090 }, { "epoch": 0.31645569620253167, "grad_norm": 1.3731496334075928, "learning_rate": 0.0019936708860759495, "loss": 1.1984, "step": 1100 }, { "epoch": 0.3193325661680092, "grad_norm": 1.0617108345031738, "learning_rate": 0.00199361334867664, "loss": 1.2313, "step": 1110 }, { "epoch": 0.3222094361334868, "grad_norm": 1.1671142578125, "learning_rate": 0.00199355581127733, "loss": 1.3583, "step": 1120 }, { "epoch": 0.32508630609896433, "grad_norm": 1.6956230401992798, "learning_rate": 0.0019934982738780208, "loss": 1.3803, "step": 1130 }, { "epoch": 0.32796317606444186, "grad_norm": 0.9052206873893738, "learning_rate": 0.0019934407364787113, "loss": 1.1405, "step": 1140 }, { "epoch": 0.33084004602991945, "grad_norm": 1.0280311107635498, "learning_rate": 0.0019933831990794014, "loss": 1.1118, "step": 1150 }, { "epoch": 0.333716915995397, "grad_norm": 0.905697226524353, "learning_rate": 0.001993325661680092, "loss": 1.1945, "step": 1160 }, { "epoch": 0.3365937859608746, "grad_norm": 1.0427004098892212, "learning_rate": 0.0019932681242807826, "loss": 1.0592, "step": 1170 }, { "epoch": 0.3394706559263521, "grad_norm": 2.393909454345703, "learning_rate": 0.001993210586881473, "loss": 1.2884, "step": 1180 }, { "epoch": 0.3423475258918297, "grad_norm": 0.7737524509429932, "learning_rate": 0.0019931530494821633, "loss": 1.2437, "step": 1190 }, { "epoch": 0.34522439585730724, "grad_norm": 0.9790889024734497, "learning_rate": 0.001993095512082854, "loss": 1.066, "step": 1200 }, { "epoch": 0.34810126582278483, "grad_norm": 2.3595457077026367, "learning_rate": 0.0019930379746835444, "loss": 1.2634, "step": 1210 }, { "epoch": 0.35097813578826237, "grad_norm": 1.0944223403930664, "learning_rate": 0.001992980437284235, "loss": 1.4825, "step": 1220 }, { "epoch": 0.3538550057537399, "grad_norm": 0.8723865747451782, "learning_rate": 0.001992922899884925, "loss": 1.3907, "step": 1230 }, { "epoch": 0.3567318757192175, "grad_norm": 1.928821086883545, "learning_rate": 0.0019928653624856157, "loss": 1.2505, "step": 1240 }, { "epoch": 0.35960874568469503, "grad_norm": 1.8351126909255981, "learning_rate": 0.0019928078250863062, "loss": 1.2184, "step": 1250 }, { "epoch": 0.3624856156501726, "grad_norm": 0.9060549139976501, "learning_rate": 0.0019927502876869964, "loss": 1.301, "step": 1260 }, { "epoch": 0.36536248561565016, "grad_norm": 0.7639679908752441, "learning_rate": 0.001992692750287687, "loss": 1.4239, "step": 1270 }, { "epoch": 0.36823935558112775, "grad_norm": 1.905387282371521, "learning_rate": 0.0019926352128883775, "loss": 1.0929, "step": 1280 }, { "epoch": 0.3711162255466053, "grad_norm": 1.220094919204712, "learning_rate": 0.001992577675489068, "loss": 1.0855, "step": 1290 }, { "epoch": 0.3739930955120829, "grad_norm": 1.050036072731018, "learning_rate": 0.0019925201380897586, "loss": 1.0725, "step": 1300 }, { "epoch": 0.3768699654775604, "grad_norm": 0.8610907793045044, "learning_rate": 0.0019924626006904487, "loss": 1.2481, "step": 1310 }, { "epoch": 0.379746835443038, "grad_norm": 1.3730653524398804, "learning_rate": 0.0019924050632911393, "loss": 1.2052, "step": 1320 }, { "epoch": 0.38262370540851554, "grad_norm": 0.8091867566108704, "learning_rate": 0.00199234752589183, "loss": 1.1958, "step": 1330 }, { "epoch": 0.38550057537399307, "grad_norm": 2.0912654399871826, "learning_rate": 0.00199228998849252, "loss": 1.3637, "step": 1340 }, { "epoch": 0.38837744533947066, "grad_norm": 1.2530285120010376, "learning_rate": 0.0019922324510932106, "loss": 1.1906, "step": 1350 }, { "epoch": 0.3912543153049482, "grad_norm": 0.9528944492340088, "learning_rate": 0.001992174913693901, "loss": 1.225, "step": 1360 }, { "epoch": 0.3941311852704258, "grad_norm": 1.2748920917510986, "learning_rate": 0.0019921173762945913, "loss": 1.1591, "step": 1370 }, { "epoch": 0.3970080552359033, "grad_norm": 1.0720257759094238, "learning_rate": 0.001992059838895282, "loss": 1.0756, "step": 1380 }, { "epoch": 0.3998849252013809, "grad_norm": 1.2356206178665161, "learning_rate": 0.0019920023014959724, "loss": 1.2069, "step": 1390 }, { "epoch": 0.40276179516685845, "grad_norm": 1.3012460470199585, "learning_rate": 0.001991944764096663, "loss": 1.2432, "step": 1400 }, { "epoch": 0.40563866513233604, "grad_norm": 1.8373066186904907, "learning_rate": 0.0019918872266973535, "loss": 1.0438, "step": 1410 }, { "epoch": 0.4085155350978136, "grad_norm": 1.3349945545196533, "learning_rate": 0.001991829689298044, "loss": 1.2417, "step": 1420 }, { "epoch": 0.41139240506329117, "grad_norm": 1.1423624753952026, "learning_rate": 0.001991772151898734, "loss": 1.2203, "step": 1430 }, { "epoch": 0.4142692750287687, "grad_norm": 1.2877459526062012, "learning_rate": 0.0019917146144994248, "loss": 1.5226, "step": 1440 }, { "epoch": 0.41714614499424624, "grad_norm": 1.6739119291305542, "learning_rate": 0.001991657077100115, "loss": 1.0819, "step": 1450 }, { "epoch": 0.42002301495972383, "grad_norm": 1.911141037940979, "learning_rate": 0.0019915995397008055, "loss": 1.3198, "step": 1460 }, { "epoch": 0.42289988492520136, "grad_norm": 1.0851370096206665, "learning_rate": 0.001991542002301496, "loss": 1.3043, "step": 1470 }, { "epoch": 0.42577675489067895, "grad_norm": 1.5101398229599, "learning_rate": 0.0019914844649021866, "loss": 1.0712, "step": 1480 }, { "epoch": 0.4286536248561565, "grad_norm": 1.2868901491165161, "learning_rate": 0.0019914269275028767, "loss": 1.1402, "step": 1490 }, { "epoch": 0.4315304948216341, "grad_norm": 1.0508310794830322, "learning_rate": 0.0019913693901035673, "loss": 1.1407, "step": 1500 }, { "epoch": 0.4344073647871116, "grad_norm": 1.2738195657730103, "learning_rate": 0.001991311852704258, "loss": 1.0183, "step": 1510 }, { "epoch": 0.4372842347525892, "grad_norm": 1.5147947072982788, "learning_rate": 0.0019912543153049484, "loss": 1.3101, "step": 1520 }, { "epoch": 0.44016110471806674, "grad_norm": 1.4651899337768555, "learning_rate": 0.001991196777905639, "loss": 1.0608, "step": 1530 }, { "epoch": 0.4430379746835443, "grad_norm": 1.2352889776229858, "learning_rate": 0.001991139240506329, "loss": 1.1749, "step": 1540 }, { "epoch": 0.44591484464902187, "grad_norm": 0.9093248844146729, "learning_rate": 0.0019910817031070197, "loss": 1.1227, "step": 1550 }, { "epoch": 0.4487917146144994, "grad_norm": 2.4938669204711914, "learning_rate": 0.00199102416570771, "loss": 1.2681, "step": 1560 }, { "epoch": 0.451668584579977, "grad_norm": 1.2509527206420898, "learning_rate": 0.0019909666283084004, "loss": 1.4412, "step": 1570 }, { "epoch": 0.45454545454545453, "grad_norm": 1.618718147277832, "learning_rate": 0.001990909090909091, "loss": 1.2392, "step": 1580 }, { "epoch": 0.4574223245109321, "grad_norm": 1.1020851135253906, "learning_rate": 0.0019908515535097815, "loss": 1.1146, "step": 1590 }, { "epoch": 0.46029919447640966, "grad_norm": 1.4705857038497925, "learning_rate": 0.001990794016110472, "loss": 1.1623, "step": 1600 }, { "epoch": 0.46317606444188725, "grad_norm": 1.3387444019317627, "learning_rate": 0.001990736478711162, "loss": 1.0116, "step": 1610 }, { "epoch": 0.4660529344073648, "grad_norm": 1.5661818981170654, "learning_rate": 0.0019906789413118527, "loss": 1.1401, "step": 1620 }, { "epoch": 0.4689298043728424, "grad_norm": 1.1832319498062134, "learning_rate": 0.0019906214039125433, "loss": 1.1712, "step": 1630 }, { "epoch": 0.4718066743383199, "grad_norm": 1.7297683954238892, "learning_rate": 0.001990563866513234, "loss": 1.1323, "step": 1640 }, { "epoch": 0.47468354430379744, "grad_norm": 1.7858330011367798, "learning_rate": 0.001990506329113924, "loss": 1.2161, "step": 1650 }, { "epoch": 0.47756041426927504, "grad_norm": 1.5483311414718628, "learning_rate": 0.0019904487917146146, "loss": 1.2007, "step": 1660 }, { "epoch": 0.48043728423475257, "grad_norm": 2.3646721839904785, "learning_rate": 0.001990391254315305, "loss": 1.1716, "step": 1670 }, { "epoch": 0.48331415420023016, "grad_norm": 1.4982868432998657, "learning_rate": 0.0019903337169159953, "loss": 1.0299, "step": 1680 }, { "epoch": 0.4861910241657077, "grad_norm": 1.2012912034988403, "learning_rate": 0.001990276179516686, "loss": 0.9916, "step": 1690 }, { "epoch": 0.4890678941311853, "grad_norm": 1.9408661127090454, "learning_rate": 0.0019902186421173764, "loss": 1.1313, "step": 1700 }, { "epoch": 0.4919447640966628, "grad_norm": 0.9590497612953186, "learning_rate": 0.001990161104718067, "loss": 1.0682, "step": 1710 }, { "epoch": 0.4948216340621404, "grad_norm": 0.9569056034088135, "learning_rate": 0.001990103567318757, "loss": 1.2894, "step": 1720 }, { "epoch": 0.49769850402761795, "grad_norm": 1.1346347332000732, "learning_rate": 0.0019900460299194477, "loss": 1.0254, "step": 1730 }, { "epoch": 0.5005753739930955, "grad_norm": 0.9489582180976868, "learning_rate": 0.001989988492520138, "loss": 1.2964, "step": 1740 }, { "epoch": 0.503452243958573, "grad_norm": 1.7076536417007446, "learning_rate": 0.0019899309551208288, "loss": 1.2165, "step": 1750 }, { "epoch": 0.5063291139240507, "grad_norm": 1.78108811378479, "learning_rate": 0.001989873417721519, "loss": 1.2362, "step": 1760 }, { "epoch": 0.5092059838895282, "grad_norm": 1.979296088218689, "learning_rate": 0.0019898158803222095, "loss": 0.9953, "step": 1770 }, { "epoch": 0.5120828538550057, "grad_norm": 0.8563864231109619, "learning_rate": 0.0019897583429229, "loss": 1.1409, "step": 1780 }, { "epoch": 0.5149597238204833, "grad_norm": 1.357371211051941, "learning_rate": 0.00198970080552359, "loss": 1.223, "step": 1790 }, { "epoch": 0.5178365937859609, "grad_norm": 1.1099735498428345, "learning_rate": 0.0019896432681242807, "loss": 1.045, "step": 1800 }, { "epoch": 0.5207134637514385, "grad_norm": 1.56416654586792, "learning_rate": 0.0019895857307249713, "loss": 1.0973, "step": 1810 }, { "epoch": 0.523590333716916, "grad_norm": 1.4478991031646729, "learning_rate": 0.001989528193325662, "loss": 0.9735, "step": 1820 }, { "epoch": 0.5264672036823935, "grad_norm": 1.0988730192184448, "learning_rate": 0.0019894706559263524, "loss": 1.1909, "step": 1830 }, { "epoch": 0.5293440736478712, "grad_norm": 1.542137861251831, "learning_rate": 0.0019894131185270426, "loss": 1.0506, "step": 1840 }, { "epoch": 0.5322209436133487, "grad_norm": 1.3839938640594482, "learning_rate": 0.001989355581127733, "loss": 1.5149, "step": 1850 }, { "epoch": 0.5350978135788262, "grad_norm": 2.2513692378997803, "learning_rate": 0.0019892980437284237, "loss": 1.0691, "step": 1860 }, { "epoch": 0.5379746835443038, "grad_norm": 1.4683278799057007, "learning_rate": 0.001989240506329114, "loss": 1.0003, "step": 1870 }, { "epoch": 0.5408515535097813, "grad_norm": 1.353415846824646, "learning_rate": 0.0019891829689298044, "loss": 1.1235, "step": 1880 }, { "epoch": 0.543728423475259, "grad_norm": 1.0725497007369995, "learning_rate": 0.001989125431530495, "loss": 1.2705, "step": 1890 }, { "epoch": 0.5466052934407365, "grad_norm": 1.075757384300232, "learning_rate": 0.001989067894131185, "loss": 0.9456, "step": 1900 }, { "epoch": 0.549482163406214, "grad_norm": 1.1518001556396484, "learning_rate": 0.0019890103567318756, "loss": 1.1131, "step": 1910 }, { "epoch": 0.5523590333716916, "grad_norm": 1.3473302125930786, "learning_rate": 0.001988952819332566, "loss": 1.1263, "step": 1920 }, { "epoch": 0.5552359033371692, "grad_norm": 1.448154330253601, "learning_rate": 0.0019888952819332568, "loss": 1.3686, "step": 1930 }, { "epoch": 0.5581127733026467, "grad_norm": 1.5840996503829956, "learning_rate": 0.0019888377445339473, "loss": 1.3998, "step": 1940 }, { "epoch": 0.5609896432681243, "grad_norm": 1.0925464630126953, "learning_rate": 0.0019887802071346375, "loss": 0.8116, "step": 1950 }, { "epoch": 0.5638665132336018, "grad_norm": 0.9342069029808044, "learning_rate": 0.001988722669735328, "loss": 1.1799, "step": 1960 }, { "epoch": 0.5667433831990794, "grad_norm": 1.8812541961669922, "learning_rate": 0.0019886651323360186, "loss": 1.0909, "step": 1970 }, { "epoch": 0.569620253164557, "grad_norm": 1.3844459056854248, "learning_rate": 0.0019886075949367087, "loss": 0.9537, "step": 1980 }, { "epoch": 0.5724971231300345, "grad_norm": 1.6110179424285889, "learning_rate": 0.0019885500575373993, "loss": 1.0022, "step": 1990 }, { "epoch": 0.5753739930955121, "grad_norm": 0.9503207802772522, "learning_rate": 0.00198849252013809, "loss": 1.2013, "step": 2000 }, { "epoch": 0.5782508630609896, "grad_norm": 1.3695456981658936, "learning_rate": 0.0019884349827387804, "loss": 1.1015, "step": 2010 }, { "epoch": 0.5811277330264673, "grad_norm": 0.925671398639679, "learning_rate": 0.0019883774453394705, "loss": 1.0937, "step": 2020 }, { "epoch": 0.5840046029919448, "grad_norm": 1.4539326429367065, "learning_rate": 0.001988319907940161, "loss": 1.2805, "step": 2030 }, { "epoch": 0.5868814729574223, "grad_norm": 1.5072892904281616, "learning_rate": 0.0019882623705408517, "loss": 1.2911, "step": 2040 }, { "epoch": 0.5897583429228999, "grad_norm": 1.1692432165145874, "learning_rate": 0.0019882048331415422, "loss": 1.0746, "step": 2050 }, { "epoch": 0.5926352128883774, "grad_norm": 1.4937007427215576, "learning_rate": 0.0019881472957422324, "loss": 1.2046, "step": 2060 }, { "epoch": 0.595512082853855, "grad_norm": 1.165658950805664, "learning_rate": 0.001988089758342923, "loss": 1.153, "step": 2070 }, { "epoch": 0.5983889528193326, "grad_norm": 1.553802490234375, "learning_rate": 0.0019880322209436135, "loss": 1.0758, "step": 2080 }, { "epoch": 0.6012658227848101, "grad_norm": 1.1166480779647827, "learning_rate": 0.0019879746835443036, "loss": 1.1285, "step": 2090 }, { "epoch": 0.6041426927502876, "grad_norm": 1.0810225009918213, "learning_rate": 0.001987917146144994, "loss": 1.1945, "step": 2100 }, { "epoch": 0.6070195627157653, "grad_norm": 1.1552541255950928, "learning_rate": 0.0019878596087456847, "loss": 1.0816, "step": 2110 }, { "epoch": 0.6098964326812428, "grad_norm": 1.8602944612503052, "learning_rate": 0.0019878020713463753, "loss": 1.0879, "step": 2120 }, { "epoch": 0.6127733026467204, "grad_norm": 1.7887593507766724, "learning_rate": 0.001987744533947066, "loss": 1.4673, "step": 2130 }, { "epoch": 0.6156501726121979, "grad_norm": 2.0889949798583984, "learning_rate": 0.001987686996547756, "loss": 1.1466, "step": 2140 }, { "epoch": 0.6185270425776754, "grad_norm": 1.0758466720581055, "learning_rate": 0.0019876294591484466, "loss": 0.9284, "step": 2150 }, { "epoch": 0.6214039125431531, "grad_norm": 0.764085054397583, "learning_rate": 0.001987571921749137, "loss": 1.265, "step": 2160 }, { "epoch": 0.6242807825086306, "grad_norm": 1.5303157567977905, "learning_rate": 0.0019875143843498273, "loss": 0.9683, "step": 2170 }, { "epoch": 0.6271576524741082, "grad_norm": 1.903631567955017, "learning_rate": 0.001987456846950518, "loss": 1.1476, "step": 2180 }, { "epoch": 0.6300345224395857, "grad_norm": 1.1043081283569336, "learning_rate": 0.0019873993095512084, "loss": 1.0603, "step": 2190 }, { "epoch": 0.6329113924050633, "grad_norm": 1.4814070463180542, "learning_rate": 0.0019873417721518985, "loss": 1.3567, "step": 2200 }, { "epoch": 0.6357882623705409, "grad_norm": 1.3865660429000854, "learning_rate": 0.001987284234752589, "loss": 1.2184, "step": 2210 }, { "epoch": 0.6386651323360184, "grad_norm": 1.3294655084609985, "learning_rate": 0.0019872266973532796, "loss": 1.0751, "step": 2220 }, { "epoch": 0.6415420023014959, "grad_norm": 1.0471417903900146, "learning_rate": 0.00198716915995397, "loss": 1.0053, "step": 2230 }, { "epoch": 0.6444188722669736, "grad_norm": 2.512223958969116, "learning_rate": 0.0019871116225546608, "loss": 1.1523, "step": 2240 }, { "epoch": 0.6472957422324511, "grad_norm": 1.0791349411010742, "learning_rate": 0.001987054085155351, "loss": 0.9855, "step": 2250 }, { "epoch": 0.6501726121979287, "grad_norm": 1.2368220090866089, "learning_rate": 0.0019869965477560415, "loss": 0.9073, "step": 2260 }, { "epoch": 0.6530494821634062, "grad_norm": 1.2770906686782837, "learning_rate": 0.001986939010356732, "loss": 1.1127, "step": 2270 }, { "epoch": 0.6559263521288837, "grad_norm": 0.8503343462944031, "learning_rate": 0.001986881472957422, "loss": 1.076, "step": 2280 }, { "epoch": 0.6588032220943614, "grad_norm": 1.2640718221664429, "learning_rate": 0.0019868239355581127, "loss": 1.0305, "step": 2290 }, { "epoch": 0.6616800920598389, "grad_norm": 1.9009108543395996, "learning_rate": 0.0019867663981588033, "loss": 1.3735, "step": 2300 }, { "epoch": 0.6645569620253164, "grad_norm": 1.650413990020752, "learning_rate": 0.001986708860759494, "loss": 1.0301, "step": 2310 }, { "epoch": 0.667433831990794, "grad_norm": 1.1294419765472412, "learning_rate": 0.001986651323360184, "loss": 1.1696, "step": 2320 }, { "epoch": 0.6703107019562716, "grad_norm": 1.1544643640518188, "learning_rate": 0.0019865937859608745, "loss": 1.0681, "step": 2330 }, { "epoch": 0.6731875719217492, "grad_norm": 1.0104166269302368, "learning_rate": 0.001986536248561565, "loss": 1.2411, "step": 2340 }, { "epoch": 0.6760644418872267, "grad_norm": 1.6547104120254517, "learning_rate": 0.0019864787111622557, "loss": 1.111, "step": 2350 }, { "epoch": 0.6789413118527042, "grad_norm": 1.5467240810394287, "learning_rate": 0.0019864211737629462, "loss": 1.0991, "step": 2360 }, { "epoch": 0.6818181818181818, "grad_norm": 0.7136487364768982, "learning_rate": 0.0019863636363636364, "loss": 1.2118, "step": 2370 }, { "epoch": 0.6846950517836594, "grad_norm": 1.5438659191131592, "learning_rate": 0.001986306098964327, "loss": 1.2331, "step": 2380 }, { "epoch": 0.687571921749137, "grad_norm": 1.3143078088760376, "learning_rate": 0.001986248561565017, "loss": 1.0245, "step": 2390 }, { "epoch": 0.6904487917146145, "grad_norm": 1.0916210412979126, "learning_rate": 0.0019861910241657076, "loss": 1.3278, "step": 2400 }, { "epoch": 0.693325661680092, "grad_norm": 0.9966866374015808, "learning_rate": 0.001986133486766398, "loss": 1.1222, "step": 2410 }, { "epoch": 0.6962025316455697, "grad_norm": 2.029536724090576, "learning_rate": 0.0019860759493670888, "loss": 1.1478, "step": 2420 }, { "epoch": 0.6990794016110472, "grad_norm": 1.0790008306503296, "learning_rate": 0.0019860184119677793, "loss": 1.0195, "step": 2430 }, { "epoch": 0.7019562715765247, "grad_norm": 1.4162993431091309, "learning_rate": 0.0019859608745684694, "loss": 1.1587, "step": 2440 }, { "epoch": 0.7048331415420023, "grad_norm": 1.75681471824646, "learning_rate": 0.00198590333716916, "loss": 1.2466, "step": 2450 }, { "epoch": 0.7077100115074798, "grad_norm": 1.4498282670974731, "learning_rate": 0.0019858457997698506, "loss": 1.0991, "step": 2460 }, { "epoch": 0.7105868814729575, "grad_norm": 1.5853737592697144, "learning_rate": 0.001985788262370541, "loss": 1.1666, "step": 2470 }, { "epoch": 0.713463751438435, "grad_norm": 1.18956458568573, "learning_rate": 0.0019857307249712313, "loss": 0.9452, "step": 2480 }, { "epoch": 0.7163406214039125, "grad_norm": 1.209795355796814, "learning_rate": 0.001985673187571922, "loss": 1.0283, "step": 2490 }, { "epoch": 0.7192174913693901, "grad_norm": 1.6157821416854858, "learning_rate": 0.001985615650172612, "loss": 1.1193, "step": 2500 }, { "epoch": 0.7220943613348677, "grad_norm": 1.0460468530654907, "learning_rate": 0.0019855581127733025, "loss": 0.913, "step": 2510 }, { "epoch": 0.7249712313003452, "grad_norm": 1.308314561843872, "learning_rate": 0.001985500575373993, "loss": 0.9647, "step": 2520 }, { "epoch": 0.7278481012658228, "grad_norm": 1.4910897016525269, "learning_rate": 0.0019854430379746837, "loss": 1.112, "step": 2530 }, { "epoch": 0.7307249712313003, "grad_norm": 1.4519686698913574, "learning_rate": 0.0019853855005753742, "loss": 0.9908, "step": 2540 }, { "epoch": 0.733601841196778, "grad_norm": 1.9154868125915527, "learning_rate": 0.0019853279631760644, "loss": 1.1336, "step": 2550 }, { "epoch": 0.7364787111622555, "grad_norm": 1.1915075778961182, "learning_rate": 0.001985270425776755, "loss": 1.1009, "step": 2560 }, { "epoch": 0.739355581127733, "grad_norm": 1.7265255451202393, "learning_rate": 0.0019852128883774455, "loss": 1.226, "step": 2570 }, { "epoch": 0.7422324510932106, "grad_norm": 1.3097074031829834, "learning_rate": 0.001985155350978136, "loss": 1.1547, "step": 2580 }, { "epoch": 0.7451093210586881, "grad_norm": 1.3561947345733643, "learning_rate": 0.001985097813578826, "loss": 0.9026, "step": 2590 }, { "epoch": 0.7479861910241657, "grad_norm": 1.7366279363632202, "learning_rate": 0.0019850402761795167, "loss": 1.2703, "step": 2600 }, { "epoch": 0.7508630609896433, "grad_norm": 1.1506048440933228, "learning_rate": 0.001984982738780207, "loss": 1.0609, "step": 2610 }, { "epoch": 0.7537399309551208, "grad_norm": 1.623457670211792, "learning_rate": 0.0019849252013808974, "loss": 1.0386, "step": 2620 }, { "epoch": 0.7566168009205984, "grad_norm": 2.064485788345337, "learning_rate": 0.001984867663981588, "loss": 1.1046, "step": 2630 }, { "epoch": 0.759493670886076, "grad_norm": 1.1150943040847778, "learning_rate": 0.0019848101265822786, "loss": 0.9199, "step": 2640 }, { "epoch": 0.7623705408515535, "grad_norm": 1.5453550815582275, "learning_rate": 0.001984752589182969, "loss": 1.1604, "step": 2650 }, { "epoch": 0.7652474108170311, "grad_norm": 0.8655397891998291, "learning_rate": 0.0019846950517836597, "loss": 0.9235, "step": 2660 }, { "epoch": 0.7681242807825086, "grad_norm": 1.7893282175064087, "learning_rate": 0.00198463751438435, "loss": 1.167, "step": 2670 }, { "epoch": 0.7710011507479861, "grad_norm": 1.5718913078308105, "learning_rate": 0.0019845799769850404, "loss": 1.3688, "step": 2680 }, { "epoch": 0.7738780207134638, "grad_norm": 1.6273021697998047, "learning_rate": 0.001984522439585731, "loss": 1.2593, "step": 2690 }, { "epoch": 0.7767548906789413, "grad_norm": 1.394400954246521, "learning_rate": 0.001984464902186421, "loss": 1.0772, "step": 2700 }, { "epoch": 0.7796317606444189, "grad_norm": 1.3420010805130005, "learning_rate": 0.0019844073647871116, "loss": 1.0377, "step": 2710 }, { "epoch": 0.7825086306098964, "grad_norm": 1.320313811302185, "learning_rate": 0.001984349827387802, "loss": 1.0817, "step": 2720 }, { "epoch": 0.785385500575374, "grad_norm": 1.1175507307052612, "learning_rate": 0.0019842922899884923, "loss": 1.0268, "step": 2730 }, { "epoch": 0.7882623705408516, "grad_norm": 1.1981478929519653, "learning_rate": 0.001984234752589183, "loss": 1.0841, "step": 2740 }, { "epoch": 0.7911392405063291, "grad_norm": 1.0887706279754639, "learning_rate": 0.0019841772151898735, "loss": 1.1559, "step": 2750 }, { "epoch": 0.7940161104718066, "grad_norm": 1.4930484294891357, "learning_rate": 0.001984119677790564, "loss": 1.1151, "step": 2760 }, { "epoch": 0.7968929804372842, "grad_norm": 1.4985054731369019, "learning_rate": 0.0019840621403912546, "loss": 1.2146, "step": 2770 }, { "epoch": 0.7997698504027618, "grad_norm": 1.2439887523651123, "learning_rate": 0.0019840046029919447, "loss": 1.0407, "step": 2780 }, { "epoch": 0.8026467203682394, "grad_norm": 1.2655129432678223, "learning_rate": 0.0019839470655926353, "loss": 1.1077, "step": 2790 }, { "epoch": 0.8055235903337169, "grad_norm": 0.7593598961830139, "learning_rate": 0.001983889528193326, "loss": 1.1794, "step": 2800 }, { "epoch": 0.8084004602991944, "grad_norm": 1.7235441207885742, "learning_rate": 0.001983831990794016, "loss": 1.1549, "step": 2810 }, { "epoch": 0.8112773302646721, "grad_norm": 1.6465835571289062, "learning_rate": 0.0019837744533947065, "loss": 0.8759, "step": 2820 }, { "epoch": 0.8141542002301496, "grad_norm": 1.7207396030426025, "learning_rate": 0.001983716915995397, "loss": 1.1865, "step": 2830 }, { "epoch": 0.8170310701956272, "grad_norm": 0.732351541519165, "learning_rate": 0.0019836593785960877, "loss": 1.2276, "step": 2840 }, { "epoch": 0.8199079401611047, "grad_norm": 1.6982070207595825, "learning_rate": 0.001983601841196778, "loss": 1.0619, "step": 2850 }, { "epoch": 0.8227848101265823, "grad_norm": 1.2225158214569092, "learning_rate": 0.0019835443037974684, "loss": 1.1086, "step": 2860 }, { "epoch": 0.8256616800920599, "grad_norm": 0.9607563018798828, "learning_rate": 0.001983486766398159, "loss": 1.1686, "step": 2870 }, { "epoch": 0.8285385500575374, "grad_norm": 1.0626153945922852, "learning_rate": 0.0019834292289988495, "loss": 1.0251, "step": 2880 }, { "epoch": 0.8314154200230149, "grad_norm": 2.205998420715332, "learning_rate": 0.0019833716915995396, "loss": 1.2291, "step": 2890 }, { "epoch": 0.8342922899884925, "grad_norm": 1.7462562322616577, "learning_rate": 0.00198331415420023, "loss": 1.1605, "step": 2900 }, { "epoch": 0.8371691599539701, "grad_norm": 1.4589816331863403, "learning_rate": 0.0019832566168009207, "loss": 1.0879, "step": 2910 }, { "epoch": 0.8400460299194477, "grad_norm": 1.1594676971435547, "learning_rate": 0.001983199079401611, "loss": 1.1843, "step": 2920 }, { "epoch": 0.8429228998849252, "grad_norm": 1.213435411453247, "learning_rate": 0.0019831415420023014, "loss": 1.0248, "step": 2930 }, { "epoch": 0.8457997698504027, "grad_norm": 1.2894361019134521, "learning_rate": 0.001983084004602992, "loss": 1.0897, "step": 2940 }, { "epoch": 0.8486766398158804, "grad_norm": 1.789259433746338, "learning_rate": 0.0019830264672036826, "loss": 1.1473, "step": 2950 }, { "epoch": 0.8515535097813579, "grad_norm": 1.6865073442459106, "learning_rate": 0.001982968929804373, "loss": 1.1008, "step": 2960 }, { "epoch": 0.8544303797468354, "grad_norm": 1.1920877695083618, "learning_rate": 0.0019829113924050633, "loss": 1.0641, "step": 2970 }, { "epoch": 0.857307249712313, "grad_norm": 1.0654942989349365, "learning_rate": 0.001982853855005754, "loss": 1.1559, "step": 2980 }, { "epoch": 0.8601841196777905, "grad_norm": 1.1836347579956055, "learning_rate": 0.0019827963176064444, "loss": 1.2503, "step": 2990 }, { "epoch": 0.8630609896432682, "grad_norm": 0.8849923610687256, "learning_rate": 0.0019827387802071345, "loss": 0.9709, "step": 3000 }, { "epoch": 0.8659378596087457, "grad_norm": 1.140437126159668, "learning_rate": 0.001982681242807825, "loss": 1.0224, "step": 3010 }, { "epoch": 0.8688147295742232, "grad_norm": 1.2252036333084106, "learning_rate": 0.0019826237054085157, "loss": 1.0078, "step": 3020 }, { "epoch": 0.8716915995397008, "grad_norm": 2.1807026863098145, "learning_rate": 0.0019825661680092058, "loss": 1.1172, "step": 3030 }, { "epoch": 0.8745684695051784, "grad_norm": 2.5682358741760254, "learning_rate": 0.0019825086306098963, "loss": 1.2915, "step": 3040 }, { "epoch": 0.877445339470656, "grad_norm": 1.7341612577438354, "learning_rate": 0.001982451093210587, "loss": 0.8932, "step": 3050 }, { "epoch": 0.8803222094361335, "grad_norm": 1.277787208557129, "learning_rate": 0.0019823935558112775, "loss": 1.0869, "step": 3060 }, { "epoch": 0.883199079401611, "grad_norm": 1.8742495775222778, "learning_rate": 0.001982336018411968, "loss": 1.027, "step": 3070 }, { "epoch": 0.8860759493670886, "grad_norm": 1.2894729375839233, "learning_rate": 0.001982278481012658, "loss": 1.1139, "step": 3080 }, { "epoch": 0.8889528193325662, "grad_norm": 1.0320303440093994, "learning_rate": 0.0019822209436133487, "loss": 0.9888, "step": 3090 }, { "epoch": 0.8918296892980437, "grad_norm": 1.363417625427246, "learning_rate": 0.0019821634062140393, "loss": 0.9385, "step": 3100 }, { "epoch": 0.8947065592635213, "grad_norm": 1.1431595087051392, "learning_rate": 0.0019821058688147294, "loss": 1.1534, "step": 3110 }, { "epoch": 0.8975834292289988, "grad_norm": 0.9495234489440918, "learning_rate": 0.00198204833141542, "loss": 1.1655, "step": 3120 }, { "epoch": 0.9004602991944765, "grad_norm": 1.001463770866394, "learning_rate": 0.0019819907940161106, "loss": 0.9903, "step": 3130 }, { "epoch": 0.903337169159954, "grad_norm": 1.123374342918396, "learning_rate": 0.001981933256616801, "loss": 0.7814, "step": 3140 }, { "epoch": 0.9062140391254315, "grad_norm": 1.208539605140686, "learning_rate": 0.0019818757192174912, "loss": 0.9529, "step": 3150 }, { "epoch": 0.9090909090909091, "grad_norm": 0.9905529618263245, "learning_rate": 0.001981818181818182, "loss": 1.1053, "step": 3160 }, { "epoch": 0.9119677790563866, "grad_norm": 1.9348362684249878, "learning_rate": 0.0019817606444188724, "loss": 1.0951, "step": 3170 }, { "epoch": 0.9148446490218642, "grad_norm": 0.8622063994407654, "learning_rate": 0.001981703107019563, "loss": 1.0422, "step": 3180 }, { "epoch": 0.9177215189873418, "grad_norm": 0.9667761921882629, "learning_rate": 0.001981645569620253, "loss": 1.2929, "step": 3190 }, { "epoch": 0.9205983889528193, "grad_norm": 0.724045991897583, "learning_rate": 0.0019815880322209436, "loss": 1.1409, "step": 3200 }, { "epoch": 0.9234752589182968, "grad_norm": 0.9431407451629639, "learning_rate": 0.001981530494821634, "loss": 0.9969, "step": 3210 }, { "epoch": 0.9263521288837745, "grad_norm": 3.020303726196289, "learning_rate": 0.0019814729574223243, "loss": 1.0908, "step": 3220 }, { "epoch": 0.929228998849252, "grad_norm": 1.0814417600631714, "learning_rate": 0.001981415420023015, "loss": 0.9896, "step": 3230 }, { "epoch": 0.9321058688147296, "grad_norm": 1.3365960121154785, "learning_rate": 0.0019813578826237055, "loss": 1.049, "step": 3240 }, { "epoch": 0.9349827387802071, "grad_norm": 1.1527985334396362, "learning_rate": 0.001981300345224396, "loss": 1.0323, "step": 3250 }, { "epoch": 0.9378596087456847, "grad_norm": 1.057244896888733, "learning_rate": 0.0019812428078250866, "loss": 1.0766, "step": 3260 }, { "epoch": 0.9407364787111623, "grad_norm": 3.30214262008667, "learning_rate": 0.0019811852704257767, "loss": 1.1114, "step": 3270 }, { "epoch": 0.9436133486766398, "grad_norm": 1.7627907991409302, "learning_rate": 0.0019811277330264673, "loss": 1.1051, "step": 3280 }, { "epoch": 0.9464902186421174, "grad_norm": 1.3863722085952759, "learning_rate": 0.001981070195627158, "loss": 1.1855, "step": 3290 }, { "epoch": 0.9493670886075949, "grad_norm": 1.2765904664993286, "learning_rate": 0.001981012658227848, "loss": 1.0078, "step": 3300 }, { "epoch": 0.9522439585730725, "grad_norm": 1.2161865234375, "learning_rate": 0.0019809551208285385, "loss": 0.8655, "step": 3310 }, { "epoch": 0.9551208285385501, "grad_norm": 1.497111201286316, "learning_rate": 0.001980897583429229, "loss": 1.071, "step": 3320 }, { "epoch": 0.9579976985040276, "grad_norm": 1.6067687273025513, "learning_rate": 0.0019808400460299192, "loss": 1.176, "step": 3330 }, { "epoch": 0.9608745684695051, "grad_norm": 1.923779010772705, "learning_rate": 0.00198078250863061, "loss": 1.0453, "step": 3340 }, { "epoch": 0.9637514384349828, "grad_norm": 1.9379603862762451, "learning_rate": 0.0019807249712313004, "loss": 1.0477, "step": 3350 }, { "epoch": 0.9666283084004603, "grad_norm": 1.0502737760543823, "learning_rate": 0.001980667433831991, "loss": 0.9201, "step": 3360 }, { "epoch": 0.9695051783659379, "grad_norm": 0.9488188028335571, "learning_rate": 0.0019806098964326815, "loss": 1.1399, "step": 3370 }, { "epoch": 0.9723820483314154, "grad_norm": 2.035947561264038, "learning_rate": 0.0019805523590333716, "loss": 1.0815, "step": 3380 }, { "epoch": 0.9752589182968929, "grad_norm": 1.7095320224761963, "learning_rate": 0.001980494821634062, "loss": 0.7425, "step": 3390 }, { "epoch": 0.9781357882623706, "grad_norm": 1.4154778718948364, "learning_rate": 0.0019804372842347527, "loss": 1.124, "step": 3400 }, { "epoch": 0.9810126582278481, "grad_norm": 1.4947386980056763, "learning_rate": 0.001980379746835443, "loss": 1.0452, "step": 3410 }, { "epoch": 0.9838895281933256, "grad_norm": 1.3692169189453125, "learning_rate": 0.0019803222094361334, "loss": 1.3447, "step": 3420 }, { "epoch": 0.9867663981588032, "grad_norm": 1.5567779541015625, "learning_rate": 0.001980264672036824, "loss": 1.0554, "step": 3430 }, { "epoch": 0.9896432681242808, "grad_norm": 1.1634666919708252, "learning_rate": 0.001980207134637514, "loss": 1.0476, "step": 3440 }, { "epoch": 0.9925201380897584, "grad_norm": 1.3775889873504639, "learning_rate": 0.0019801495972382047, "loss": 1.0816, "step": 3450 }, { "epoch": 0.9953970080552359, "grad_norm": 0.8871949315071106, "learning_rate": 0.0019800920598388953, "loss": 0.8835, "step": 3460 }, { "epoch": 0.9982738780207134, "grad_norm": 2.1718974113464355, "learning_rate": 0.001980034522439586, "loss": 1.131, "step": 3470 }, { "epoch": 1.001150747986191, "grad_norm": 1.5198698043823242, "learning_rate": 0.0019799769850402764, "loss": 0.9563, "step": 3480 }, { "epoch": 1.0040276179516685, "grad_norm": 2.37674880027771, "learning_rate": 0.001979919447640967, "loss": 1.188, "step": 3490 }, { "epoch": 1.006904487917146, "grad_norm": 0.9692029356956482, "learning_rate": 0.001979861910241657, "loss": 0.887, "step": 3500 }, { "epoch": 1.0097813578826238, "grad_norm": 1.1864540576934814, "learning_rate": 0.0019798043728423476, "loss": 0.8816, "step": 3510 }, { "epoch": 1.0126582278481013, "grad_norm": 1.3895000219345093, "learning_rate": 0.0019797468354430378, "loss": 0.8597, "step": 3520 }, { "epoch": 1.0155350978135789, "grad_norm": 1.1202113628387451, "learning_rate": 0.0019796892980437283, "loss": 0.9101, "step": 3530 }, { "epoch": 1.0184119677790564, "grad_norm": 1.626765251159668, "learning_rate": 0.001979631760644419, "loss": 1.1947, "step": 3540 }, { "epoch": 1.021288837744534, "grad_norm": 0.810778021812439, "learning_rate": 0.0019795742232451095, "loss": 0.9718, "step": 3550 }, { "epoch": 1.0241657077100115, "grad_norm": 1.666556715965271, "learning_rate": 0.0019795166858457996, "loss": 1.0414, "step": 3560 }, { "epoch": 1.027042577675489, "grad_norm": 1.0484448671340942, "learning_rate": 0.00197945914844649, "loss": 0.8634, "step": 3570 }, { "epoch": 1.0299194476409665, "grad_norm": 1.5254807472229004, "learning_rate": 0.0019794016110471807, "loss": 0.7881, "step": 3580 }, { "epoch": 1.032796317606444, "grad_norm": 1.9065008163452148, "learning_rate": 0.0019793440736478713, "loss": 1.0728, "step": 3590 }, { "epoch": 1.0356731875719218, "grad_norm": 1.7831215858459473, "learning_rate": 0.001979286536248562, "loss": 1.0372, "step": 3600 }, { "epoch": 1.0385500575373994, "grad_norm": 0.9267882704734802, "learning_rate": 0.001979228998849252, "loss": 1.0197, "step": 3610 }, { "epoch": 1.041426927502877, "grad_norm": 1.550209641456604, "learning_rate": 0.0019791714614499425, "loss": 1.1263, "step": 3620 }, { "epoch": 1.0443037974683544, "grad_norm": 1.360363245010376, "learning_rate": 0.001979113924050633, "loss": 1.1514, "step": 3630 }, { "epoch": 1.047180667433832, "grad_norm": 1.9527623653411865, "learning_rate": 0.0019790563866513232, "loss": 1.047, "step": 3640 }, { "epoch": 1.0500575373993095, "grad_norm": 1.9566019773483276, "learning_rate": 0.001978998849252014, "loss": 1.2642, "step": 3650 }, { "epoch": 1.052934407364787, "grad_norm": 1.304473876953125, "learning_rate": 0.0019789413118527044, "loss": 1.0326, "step": 3660 }, { "epoch": 1.0558112773302646, "grad_norm": 1.1376185417175293, "learning_rate": 0.001978883774453395, "loss": 1.048, "step": 3670 }, { "epoch": 1.0586881472957423, "grad_norm": 1.236975908279419, "learning_rate": 0.001978826237054085, "loss": 1.0746, "step": 3680 }, { "epoch": 1.0615650172612199, "grad_norm": 0.9195955395698547, "learning_rate": 0.0019787686996547756, "loss": 1.1517, "step": 3690 }, { "epoch": 1.0644418872266974, "grad_norm": 1.6330602169036865, "learning_rate": 0.001978711162255466, "loss": 0.8852, "step": 3700 }, { "epoch": 1.067318757192175, "grad_norm": 2.1730737686157227, "learning_rate": 0.0019786536248561568, "loss": 1.01, "step": 3710 }, { "epoch": 1.0701956271576525, "grad_norm": 1.2995496988296509, "learning_rate": 0.001978596087456847, "loss": 0.9692, "step": 3720 }, { "epoch": 1.07307249712313, "grad_norm": 1.4668521881103516, "learning_rate": 0.0019785385500575375, "loss": 0.9927, "step": 3730 }, { "epoch": 1.0759493670886076, "grad_norm": 1.3647336959838867, "learning_rate": 0.001978481012658228, "loss": 0.9221, "step": 3740 }, { "epoch": 1.078826237054085, "grad_norm": 0.8763877749443054, "learning_rate": 0.001978423475258918, "loss": 0.9663, "step": 3750 }, { "epoch": 1.0817031070195626, "grad_norm": 2.367017984390259, "learning_rate": 0.0019783659378596087, "loss": 0.8883, "step": 3760 }, { "epoch": 1.0845799769850402, "grad_norm": 1.1824783086776733, "learning_rate": 0.0019783084004602993, "loss": 1.1034, "step": 3770 }, { "epoch": 1.087456846950518, "grad_norm": 0.5068567991256714, "learning_rate": 0.00197825086306099, "loss": 0.8904, "step": 3780 }, { "epoch": 1.0903337169159955, "grad_norm": 1.567819595336914, "learning_rate": 0.0019781933256616804, "loss": 1.2142, "step": 3790 }, { "epoch": 1.093210586881473, "grad_norm": 0.9774667024612427, "learning_rate": 0.0019781357882623705, "loss": 0.9095, "step": 3800 }, { "epoch": 1.0960874568469505, "grad_norm": 1.6065609455108643, "learning_rate": 0.001978078250863061, "loss": 1.0493, "step": 3810 }, { "epoch": 1.098964326812428, "grad_norm": 1.3869130611419678, "learning_rate": 0.0019780207134637517, "loss": 0.8461, "step": 3820 }, { "epoch": 1.1018411967779056, "grad_norm": 0.9380619525909424, "learning_rate": 0.001977963176064442, "loss": 0.9807, "step": 3830 }, { "epoch": 1.1047180667433831, "grad_norm": 1.0051279067993164, "learning_rate": 0.0019779056386651324, "loss": 0.9965, "step": 3840 }, { "epoch": 1.1075949367088607, "grad_norm": 1.4290648698806763, "learning_rate": 0.001977848101265823, "loss": 0.8878, "step": 3850 }, { "epoch": 1.1104718066743384, "grad_norm": 1.7787108421325684, "learning_rate": 0.001977790563866513, "loss": 1.3437, "step": 3860 }, { "epoch": 1.113348676639816, "grad_norm": 0.8408524990081787, "learning_rate": 0.0019777330264672036, "loss": 0.8479, "step": 3870 }, { "epoch": 1.1162255466052935, "grad_norm": 0.8109760880470276, "learning_rate": 0.001977675489067894, "loss": 0.9878, "step": 3880 }, { "epoch": 1.119102416570771, "grad_norm": 0.6840512156486511, "learning_rate": 0.0019776179516685847, "loss": 0.838, "step": 3890 }, { "epoch": 1.1219792865362486, "grad_norm": 1.672937035560608, "learning_rate": 0.0019775604142692753, "loss": 0.925, "step": 3900 }, { "epoch": 1.124856156501726, "grad_norm": 1.7959917783737183, "learning_rate": 0.0019775028768699654, "loss": 0.8537, "step": 3910 }, { "epoch": 1.1277330264672036, "grad_norm": 0.9200351238250732, "learning_rate": 0.001977445339470656, "loss": 0.9238, "step": 3920 }, { "epoch": 1.1306098964326812, "grad_norm": 2.2264244556427, "learning_rate": 0.0019773878020713466, "loss": 1.1732, "step": 3930 }, { "epoch": 1.1334867663981587, "grad_norm": 0.8701280355453491, "learning_rate": 0.0019773302646720367, "loss": 0.9349, "step": 3940 }, { "epoch": 1.1363636363636362, "grad_norm": 1.1128617525100708, "learning_rate": 0.0019772727272727273, "loss": 0.9541, "step": 3950 }, { "epoch": 1.139240506329114, "grad_norm": 2.0115485191345215, "learning_rate": 0.001977215189873418, "loss": 0.9105, "step": 3960 }, { "epoch": 1.1421173762945915, "grad_norm": 1.2236874103546143, "learning_rate": 0.0019771576524741084, "loss": 0.9834, "step": 3970 }, { "epoch": 1.144994246260069, "grad_norm": 1.7742794752120972, "learning_rate": 0.0019771001150747985, "loss": 1.0799, "step": 3980 }, { "epoch": 1.1478711162255466, "grad_norm": 1.4273407459259033, "learning_rate": 0.001977042577675489, "loss": 1.1247, "step": 3990 }, { "epoch": 1.1507479861910241, "grad_norm": 1.4844794273376465, "learning_rate": 0.0019769850402761796, "loss": 0.9489, "step": 4000 }, { "epoch": 1.1536248561565017, "grad_norm": 1.3504869937896729, "learning_rate": 0.00197692750287687, "loss": 0.9568, "step": 4010 }, { "epoch": 1.1565017261219792, "grad_norm": 1.3399893045425415, "learning_rate": 0.0019768699654775603, "loss": 0.8624, "step": 4020 }, { "epoch": 1.159378596087457, "grad_norm": 1.9990952014923096, "learning_rate": 0.001976812428078251, "loss": 1.2588, "step": 4030 }, { "epoch": 1.1622554660529345, "grad_norm": 1.3336102962493896, "learning_rate": 0.0019767548906789415, "loss": 0.9206, "step": 4040 }, { "epoch": 1.165132336018412, "grad_norm": 1.0298529863357544, "learning_rate": 0.0019766973532796316, "loss": 1.1031, "step": 4050 }, { "epoch": 1.1680092059838896, "grad_norm": 1.49605131149292, "learning_rate": 0.001976639815880322, "loss": 0.9686, "step": 4060 }, { "epoch": 1.1708860759493671, "grad_norm": 1.6984894275665283, "learning_rate": 0.0019765822784810127, "loss": 1.1291, "step": 4070 }, { "epoch": 1.1737629459148446, "grad_norm": 1.0119837522506714, "learning_rate": 0.0019765247410817033, "loss": 0.9964, "step": 4080 }, { "epoch": 1.1766398158803222, "grad_norm": 1.3916276693344116, "learning_rate": 0.001976467203682394, "loss": 0.8691, "step": 4090 }, { "epoch": 1.1795166858457997, "grad_norm": 1.6199504137039185, "learning_rate": 0.001976409666283084, "loss": 1.14, "step": 4100 }, { "epoch": 1.1823935558112773, "grad_norm": 2.3295702934265137, "learning_rate": 0.0019763521288837745, "loss": 0.9356, "step": 4110 }, { "epoch": 1.1852704257767548, "grad_norm": 0.8110642433166504, "learning_rate": 0.001976294591484465, "loss": 1.024, "step": 4120 }, { "epoch": 1.1881472957422325, "grad_norm": 1.369902491569519, "learning_rate": 0.0019762370540851552, "loss": 0.9089, "step": 4130 }, { "epoch": 1.19102416570771, "grad_norm": 1.5710242986679077, "learning_rate": 0.001976179516685846, "loss": 0.9915, "step": 4140 }, { "epoch": 1.1939010356731876, "grad_norm": 2.186274528503418, "learning_rate": 0.0019761219792865364, "loss": 0.8487, "step": 4150 }, { "epoch": 1.1967779056386652, "grad_norm": 1.605449914932251, "learning_rate": 0.0019760644418872265, "loss": 1.3079, "step": 4160 }, { "epoch": 1.1996547756041427, "grad_norm": 1.9519639015197754, "learning_rate": 0.001976006904487917, "loss": 0.9664, "step": 4170 }, { "epoch": 1.2025316455696202, "grad_norm": 0.8661849498748779, "learning_rate": 0.0019759493670886076, "loss": 1.2456, "step": 4180 }, { "epoch": 1.2054085155350978, "grad_norm": 1.1621534824371338, "learning_rate": 0.001975891829689298, "loss": 1.1914, "step": 4190 }, { "epoch": 1.2082853855005753, "grad_norm": 0.9994075298309326, "learning_rate": 0.0019758342922899888, "loss": 1.1054, "step": 4200 }, { "epoch": 1.211162255466053, "grad_norm": 1.0234143733978271, "learning_rate": 0.001975776754890679, "loss": 1.2797, "step": 4210 }, { "epoch": 1.2140391254315306, "grad_norm": 1.9508427381515503, "learning_rate": 0.0019757192174913694, "loss": 1.1113, "step": 4220 }, { "epoch": 1.2169159953970081, "grad_norm": 1.5993883609771729, "learning_rate": 0.00197566168009206, "loss": 1.1683, "step": 4230 }, { "epoch": 1.2197928653624857, "grad_norm": 1.0844930410385132, "learning_rate": 0.00197560414269275, "loss": 0.9509, "step": 4240 }, { "epoch": 1.2226697353279632, "grad_norm": 1.589860439300537, "learning_rate": 0.0019755466052934407, "loss": 0.9798, "step": 4250 }, { "epoch": 1.2255466052934407, "grad_norm": 1.104877233505249, "learning_rate": 0.0019754890678941313, "loss": 1.0626, "step": 4260 }, { "epoch": 1.2284234752589183, "grad_norm": 1.830490231513977, "learning_rate": 0.0019754315304948214, "loss": 1.0581, "step": 4270 }, { "epoch": 1.2313003452243958, "grad_norm": 1.0457144975662231, "learning_rate": 0.001975373993095512, "loss": 0.969, "step": 4280 }, { "epoch": 1.2341772151898733, "grad_norm": 1.2137161493301392, "learning_rate": 0.0019753164556962025, "loss": 0.942, "step": 4290 }, { "epoch": 1.2370540851553509, "grad_norm": 1.2080312967300415, "learning_rate": 0.001975258918296893, "loss": 0.9921, "step": 4300 }, { "epoch": 1.2399309551208286, "grad_norm": 1.497583031654358, "learning_rate": 0.0019752013808975837, "loss": 0.9225, "step": 4310 }, { "epoch": 1.2428078250863062, "grad_norm": 1.235602855682373, "learning_rate": 0.001975143843498274, "loss": 1.0492, "step": 4320 }, { "epoch": 1.2456846950517837, "grad_norm": 1.7684283256530762, "learning_rate": 0.0019750863060989643, "loss": 1.3476, "step": 4330 }, { "epoch": 1.2485615650172612, "grad_norm": 2.1912920475006104, "learning_rate": 0.001975028768699655, "loss": 0.8555, "step": 4340 }, { "epoch": 1.2514384349827388, "grad_norm": 1.1973387002944946, "learning_rate": 0.001974971231300345, "loss": 0.9116, "step": 4350 }, { "epoch": 1.2543153049482163, "grad_norm": 1.175037145614624, "learning_rate": 0.0019749136939010356, "loss": 1.0989, "step": 4360 }, { "epoch": 1.2571921749136938, "grad_norm": 1.3830311298370361, "learning_rate": 0.001974856156501726, "loss": 1.1365, "step": 4370 }, { "epoch": 1.2600690448791716, "grad_norm": 1.7697116136550903, "learning_rate": 0.0019747986191024167, "loss": 0.9799, "step": 4380 }, { "epoch": 1.2629459148446491, "grad_norm": 0.7304568886756897, "learning_rate": 0.001974741081703107, "loss": 0.9396, "step": 4390 }, { "epoch": 1.2658227848101267, "grad_norm": 0.9510437250137329, "learning_rate": 0.0019746835443037974, "loss": 0.8884, "step": 4400 }, { "epoch": 1.2686996547756042, "grad_norm": 2.1159169673919678, "learning_rate": 0.001974626006904488, "loss": 0.9491, "step": 4410 }, { "epoch": 1.2715765247410817, "grad_norm": 1.7333095073699951, "learning_rate": 0.0019745684695051786, "loss": 1.0814, "step": 4420 }, { "epoch": 1.2744533947065593, "grad_norm": 0.9509525299072266, "learning_rate": 0.001974510932105869, "loss": 0.8808, "step": 4430 }, { "epoch": 1.2773302646720368, "grad_norm": 1.1975380182266235, "learning_rate": 0.0019744533947065592, "loss": 1.099, "step": 4440 }, { "epoch": 1.2802071346375143, "grad_norm": 0.9692880511283875, "learning_rate": 0.00197439585730725, "loss": 1.1129, "step": 4450 }, { "epoch": 1.2830840046029919, "grad_norm": 0.8535281419754028, "learning_rate": 0.00197433831990794, "loss": 0.9774, "step": 4460 }, { "epoch": 1.2859608745684694, "grad_norm": 1.1920522451400757, "learning_rate": 0.0019742807825086305, "loss": 1.0859, "step": 4470 }, { "epoch": 1.288837744533947, "grad_norm": 1.1145492792129517, "learning_rate": 0.001974223245109321, "loss": 0.937, "step": 4480 }, { "epoch": 1.2917146144994247, "grad_norm": 1.3097046613693237, "learning_rate": 0.0019741657077100116, "loss": 0.8712, "step": 4490 }, { "epoch": 1.2945914844649022, "grad_norm": 1.4572352170944214, "learning_rate": 0.001974108170310702, "loss": 0.8998, "step": 4500 }, { "epoch": 1.2974683544303798, "grad_norm": 1.2513011693954468, "learning_rate": 0.0019740506329113923, "loss": 0.9, "step": 4510 }, { "epoch": 1.3003452243958573, "grad_norm": 1.5702314376831055, "learning_rate": 0.001973993095512083, "loss": 1.1248, "step": 4520 }, { "epoch": 1.3032220943613348, "grad_norm": 1.5799827575683594, "learning_rate": 0.0019739355581127735, "loss": 1.0167, "step": 4530 }, { "epoch": 1.3060989643268124, "grad_norm": 1.112506628036499, "learning_rate": 0.001973878020713464, "loss": 0.8106, "step": 4540 }, { "epoch": 1.30897583429229, "grad_norm": 1.2525438070297241, "learning_rate": 0.001973820483314154, "loss": 1.0912, "step": 4550 }, { "epoch": 1.3118527042577677, "grad_norm": 1.4961788654327393, "learning_rate": 0.0019737629459148447, "loss": 1.3398, "step": 4560 }, { "epoch": 1.3147295742232452, "grad_norm": 1.1297551393508911, "learning_rate": 0.001973705408515535, "loss": 0.8768, "step": 4570 }, { "epoch": 1.3176064441887227, "grad_norm": 1.5524013042449951, "learning_rate": 0.0019736478711162254, "loss": 0.9749, "step": 4580 }, { "epoch": 1.3204833141542003, "grad_norm": 1.0857199430465698, "learning_rate": 0.001973590333716916, "loss": 0.8668, "step": 4590 }, { "epoch": 1.3233601841196778, "grad_norm": 1.3670754432678223, "learning_rate": 0.0019735327963176065, "loss": 0.8534, "step": 4600 }, { "epoch": 1.3262370540851554, "grad_norm": 1.4406400918960571, "learning_rate": 0.001973475258918297, "loss": 1.0181, "step": 4610 }, { "epoch": 1.3291139240506329, "grad_norm": 0.9931894540786743, "learning_rate": 0.0019734177215189877, "loss": 0.9421, "step": 4620 }, { "epoch": 1.3319907940161104, "grad_norm": 1.0862350463867188, "learning_rate": 0.001973360184119678, "loss": 1.0723, "step": 4630 }, { "epoch": 1.334867663981588, "grad_norm": 1.4078381061553955, "learning_rate": 0.0019733026467203684, "loss": 0.9257, "step": 4640 }, { "epoch": 1.3377445339470655, "grad_norm": 1.4880276918411255, "learning_rate": 0.001973245109321059, "loss": 1.0598, "step": 4650 }, { "epoch": 1.340621403912543, "grad_norm": 2.80163311958313, "learning_rate": 0.001973187571921749, "loss": 1.19, "step": 4660 }, { "epoch": 1.3434982738780208, "grad_norm": 1.1303561925888062, "learning_rate": 0.0019731300345224396, "loss": 0.9685, "step": 4670 }, { "epoch": 1.3463751438434983, "grad_norm": 1.5893036127090454, "learning_rate": 0.00197307249712313, "loss": 1.0631, "step": 4680 }, { "epoch": 1.3492520138089759, "grad_norm": 1.4756367206573486, "learning_rate": 0.0019730149597238203, "loss": 1.1602, "step": 4690 }, { "epoch": 1.3521288837744534, "grad_norm": 0.8547199964523315, "learning_rate": 0.001972957422324511, "loss": 0.9952, "step": 4700 }, { "epoch": 1.355005753739931, "grad_norm": 1.5565555095672607, "learning_rate": 0.0019728998849252014, "loss": 1.012, "step": 4710 }, { "epoch": 1.3578826237054085, "grad_norm": 0.9381767511367798, "learning_rate": 0.001972842347525892, "loss": 0.9266, "step": 4720 }, { "epoch": 1.360759493670886, "grad_norm": 1.0723708868026733, "learning_rate": 0.0019727848101265826, "loss": 1.0572, "step": 4730 }, { "epoch": 1.3636363636363638, "grad_norm": 1.337672472000122, "learning_rate": 0.0019727272727272727, "loss": 0.9376, "step": 4740 }, { "epoch": 1.3665132336018413, "grad_norm": 1.3056554794311523, "learning_rate": 0.0019726697353279633, "loss": 0.9063, "step": 4750 }, { "epoch": 1.3693901035673188, "grad_norm": 2.174103260040283, "learning_rate": 0.001972612197928654, "loss": 0.873, "step": 4760 }, { "epoch": 1.3722669735327964, "grad_norm": 1.1627870798110962, "learning_rate": 0.001972554660529344, "loss": 1.0957, "step": 4770 }, { "epoch": 1.375143843498274, "grad_norm": 1.8050310611724854, "learning_rate": 0.0019724971231300345, "loss": 0.8467, "step": 4780 }, { "epoch": 1.3780207134637514, "grad_norm": 1.756394624710083, "learning_rate": 0.001972439585730725, "loss": 1.0368, "step": 4790 }, { "epoch": 1.380897583429229, "grad_norm": 1.4744620323181152, "learning_rate": 0.0019723820483314156, "loss": 1.0039, "step": 4800 }, { "epoch": 1.3837744533947065, "grad_norm": 1.2139177322387695, "learning_rate": 0.0019723245109321058, "loss": 1.2518, "step": 4810 }, { "epoch": 1.386651323360184, "grad_norm": 1.473384976387024, "learning_rate": 0.0019722669735327963, "loss": 0.9055, "step": 4820 }, { "epoch": 1.3895281933256616, "grad_norm": 0.9024837017059326, "learning_rate": 0.001972209436133487, "loss": 0.895, "step": 4830 }, { "epoch": 1.3924050632911391, "grad_norm": 0.9150998592376709, "learning_rate": 0.0019721518987341775, "loss": 1.083, "step": 4840 }, { "epoch": 1.3952819332566169, "grad_norm": 1.0609196424484253, "learning_rate": 0.0019720943613348676, "loss": 0.8848, "step": 4850 }, { "epoch": 1.3981588032220944, "grad_norm": 1.3346481323242188, "learning_rate": 0.001972036823935558, "loss": 0.9817, "step": 4860 }, { "epoch": 1.401035673187572, "grad_norm": 1.0721640586853027, "learning_rate": 0.0019719792865362487, "loss": 0.8686, "step": 4870 }, { "epoch": 1.4039125431530495, "grad_norm": 1.735984206199646, "learning_rate": 0.001971921749136939, "loss": 0.9002, "step": 4880 }, { "epoch": 1.406789413118527, "grad_norm": 1.079898476600647, "learning_rate": 0.0019718642117376294, "loss": 0.9207, "step": 4890 }, { "epoch": 1.4096662830840045, "grad_norm": 1.1808559894561768, "learning_rate": 0.00197180667433832, "loss": 1.0915, "step": 4900 }, { "epoch": 1.412543153049482, "grad_norm": 1.38386070728302, "learning_rate": 0.0019717491369390105, "loss": 0.9683, "step": 4910 }, { "epoch": 1.4154200230149598, "grad_norm": 1.2516733407974243, "learning_rate": 0.001971691599539701, "loss": 0.857, "step": 4920 }, { "epoch": 1.4182968929804374, "grad_norm": 1.8045285940170288, "learning_rate": 0.0019716340621403912, "loss": 1.0051, "step": 4930 }, { "epoch": 1.421173762945915, "grad_norm": 1.622131586074829, "learning_rate": 0.001971576524741082, "loss": 1.0274, "step": 4940 }, { "epoch": 1.4240506329113924, "grad_norm": 0.8755598068237305, "learning_rate": 0.0019715189873417724, "loss": 0.8969, "step": 4950 }, { "epoch": 1.42692750287687, "grad_norm": 0.8398337960243225, "learning_rate": 0.0019714614499424625, "loss": 0.8956, "step": 4960 }, { "epoch": 1.4298043728423475, "grad_norm": 1.6829875707626343, "learning_rate": 0.001971403912543153, "loss": 1.0449, "step": 4970 }, { "epoch": 1.432681242807825, "grad_norm": 1.346276044845581, "learning_rate": 0.0019713463751438436, "loss": 0.8942, "step": 4980 }, { "epoch": 1.4355581127733026, "grad_norm": 0.8268054127693176, "learning_rate": 0.0019712888377445338, "loss": 1.0097, "step": 4990 }, { "epoch": 1.4384349827387801, "grad_norm": 0.9226120710372925, "learning_rate": 0.0019712313003452243, "loss": 0.9943, "step": 5000 }, { "epoch": 1.4413118527042577, "grad_norm": 1.1503942012786865, "learning_rate": 0.001971173762945915, "loss": 0.8249, "step": 5010 }, { "epoch": 1.4441887226697352, "grad_norm": 1.381833791732788, "learning_rate": 0.0019711162255466055, "loss": 0.9594, "step": 5020 }, { "epoch": 1.447065592635213, "grad_norm": 1.6412419080734253, "learning_rate": 0.001971058688147296, "loss": 0.8913, "step": 5030 }, { "epoch": 1.4499424626006905, "grad_norm": 0.654193103313446, "learning_rate": 0.001971001150747986, "loss": 1.0585, "step": 5040 }, { "epoch": 1.452819332566168, "grad_norm": 1.1640472412109375, "learning_rate": 0.0019709436133486767, "loss": 1.1507, "step": 5050 }, { "epoch": 1.4556962025316456, "grad_norm": 1.0732415914535522, "learning_rate": 0.0019708860759493673, "loss": 0.87, "step": 5060 }, { "epoch": 1.458573072497123, "grad_norm": 1.4005697965621948, "learning_rate": 0.0019708285385500574, "loss": 1.015, "step": 5070 }, { "epoch": 1.4614499424626006, "grad_norm": 1.2918628454208374, "learning_rate": 0.001970771001150748, "loss": 1.1029, "step": 5080 }, { "epoch": 1.4643268124280784, "grad_norm": 1.1716855764389038, "learning_rate": 0.0019707134637514385, "loss": 1.1102, "step": 5090 }, { "epoch": 1.467203682393556, "grad_norm": 1.7274417877197266, "learning_rate": 0.0019706559263521287, "loss": 1.0968, "step": 5100 }, { "epoch": 1.4700805523590335, "grad_norm": 1.4044312238693237, "learning_rate": 0.0019705983889528192, "loss": 0.9945, "step": 5110 }, { "epoch": 1.472957422324511, "grad_norm": 0.850128173828125, "learning_rate": 0.00197054085155351, "loss": 1.1015, "step": 5120 }, { "epoch": 1.4758342922899885, "grad_norm": 1.4120447635650635, "learning_rate": 0.0019704833141542004, "loss": 0.9604, "step": 5130 }, { "epoch": 1.478711162255466, "grad_norm": 1.7259286642074585, "learning_rate": 0.001970425776754891, "loss": 1.2369, "step": 5140 }, { "epoch": 1.4815880322209436, "grad_norm": 1.7238929271697998, "learning_rate": 0.001970368239355581, "loss": 0.7856, "step": 5150 }, { "epoch": 1.4844649021864211, "grad_norm": 1.438337802886963, "learning_rate": 0.0019703107019562716, "loss": 1.005, "step": 5160 }, { "epoch": 1.4873417721518987, "grad_norm": 1.587588906288147, "learning_rate": 0.001970253164556962, "loss": 1.0235, "step": 5170 }, { "epoch": 1.4902186421173762, "grad_norm": 1.291944146156311, "learning_rate": 0.0019701956271576523, "loss": 0.9263, "step": 5180 }, { "epoch": 1.4930955120828537, "grad_norm": 1.9082858562469482, "learning_rate": 0.001970138089758343, "loss": 1.0977, "step": 5190 }, { "epoch": 1.4959723820483315, "grad_norm": 0.9472231268882751, "learning_rate": 0.0019700805523590334, "loss": 0.9969, "step": 5200 }, { "epoch": 1.498849252013809, "grad_norm": 1.946329951286316, "learning_rate": 0.001970023014959724, "loss": 1.0048, "step": 5210 }, { "epoch": 1.5017261219792866, "grad_norm": 0.8890090584754944, "learning_rate": 0.001969965477560414, "loss": 0.9621, "step": 5220 }, { "epoch": 1.504602991944764, "grad_norm": 0.8991700410842896, "learning_rate": 0.0019699079401611047, "loss": 0.6887, "step": 5230 }, { "epoch": 1.5074798619102416, "grad_norm": 1.060975432395935, "learning_rate": 0.0019698504027617953, "loss": 0.8554, "step": 5240 }, { "epoch": 1.5103567318757192, "grad_norm": 0.5989450216293335, "learning_rate": 0.001969792865362486, "loss": 0.8711, "step": 5250 }, { "epoch": 1.513233601841197, "grad_norm": 1.978723168373108, "learning_rate": 0.001969735327963176, "loss": 0.9968, "step": 5260 }, { "epoch": 1.5161104718066745, "grad_norm": 1.1841212511062622, "learning_rate": 0.0019696777905638665, "loss": 0.7199, "step": 5270 }, { "epoch": 1.518987341772152, "grad_norm": 1.8612483739852905, "learning_rate": 0.001969620253164557, "loss": 1.1627, "step": 5280 }, { "epoch": 1.5218642117376295, "grad_norm": 1.8044832944869995, "learning_rate": 0.001969562715765247, "loss": 1.106, "step": 5290 }, { "epoch": 1.524741081703107, "grad_norm": 0.7040504813194275, "learning_rate": 0.0019695051783659378, "loss": 0.924, "step": 5300 }, { "epoch": 1.5276179516685846, "grad_norm": 1.386450171470642, "learning_rate": 0.0019694476409666283, "loss": 0.9235, "step": 5310 }, { "epoch": 1.5304948216340621, "grad_norm": 1.4929349422454834, "learning_rate": 0.001969390103567319, "loss": 0.9315, "step": 5320 }, { "epoch": 1.5333716915995397, "grad_norm": 2.821376323699951, "learning_rate": 0.0019693325661680095, "loss": 1.1448, "step": 5330 }, { "epoch": 1.5362485615650172, "grad_norm": 1.2993786334991455, "learning_rate": 0.0019692750287686996, "loss": 1.0205, "step": 5340 }, { "epoch": 1.5391254315304947, "grad_norm": 1.5067421197891235, "learning_rate": 0.00196921749136939, "loss": 1.0639, "step": 5350 }, { "epoch": 1.5420023014959723, "grad_norm": 1.8693431615829468, "learning_rate": 0.0019691599539700807, "loss": 1.1162, "step": 5360 }, { "epoch": 1.5448791714614498, "grad_norm": 2.16433048248291, "learning_rate": 0.001969102416570771, "loss": 1.3113, "step": 5370 }, { "epoch": 1.5477560414269274, "grad_norm": 1.4317307472229004, "learning_rate": 0.0019690448791714614, "loss": 1.1487, "step": 5380 }, { "epoch": 1.5506329113924051, "grad_norm": 1.2483785152435303, "learning_rate": 0.001968987341772152, "loss": 0.9957, "step": 5390 }, { "epoch": 1.5535097813578826, "grad_norm": 0.9866055250167847, "learning_rate": 0.001968929804372842, "loss": 1.3447, "step": 5400 }, { "epoch": 1.5563866513233602, "grad_norm": 1.3847243785858154, "learning_rate": 0.0019688722669735327, "loss": 1.0859, "step": 5410 }, { "epoch": 1.5592635212888377, "grad_norm": 1.1621203422546387, "learning_rate": 0.0019688147295742232, "loss": 0.9812, "step": 5420 }, { "epoch": 1.5621403912543153, "grad_norm": 1.5820848941802979, "learning_rate": 0.001968757192174914, "loss": 1.1165, "step": 5430 }, { "epoch": 1.565017261219793, "grad_norm": 1.143669843673706, "learning_rate": 0.0019686996547756044, "loss": 0.9931, "step": 5440 }, { "epoch": 1.5678941311852705, "grad_norm": 1.207303524017334, "learning_rate": 0.001968642117376295, "loss": 0.8409, "step": 5450 }, { "epoch": 1.570771001150748, "grad_norm": 1.0417759418487549, "learning_rate": 0.001968584579976985, "loss": 0.7466, "step": 5460 }, { "epoch": 1.5736478711162256, "grad_norm": 1.6089386940002441, "learning_rate": 0.0019685270425776756, "loss": 1.1087, "step": 5470 }, { "epoch": 1.5765247410817032, "grad_norm": 0.9392552971839905, "learning_rate": 0.0019684695051783658, "loss": 0.7803, "step": 5480 }, { "epoch": 1.5794016110471807, "grad_norm": 2.425974130630493, "learning_rate": 0.0019684119677790563, "loss": 0.9001, "step": 5490 }, { "epoch": 1.5822784810126582, "grad_norm": 2.094043731689453, "learning_rate": 0.001968354430379747, "loss": 1.0867, "step": 5500 }, { "epoch": 1.5851553509781358, "grad_norm": 1.785237431526184, "learning_rate": 0.0019682968929804374, "loss": 0.9392, "step": 5510 }, { "epoch": 1.5880322209436133, "grad_norm": 1.7874128818511963, "learning_rate": 0.0019682393555811276, "loss": 1.1452, "step": 5520 }, { "epoch": 1.5909090909090908, "grad_norm": 1.4970083236694336, "learning_rate": 0.001968181818181818, "loss": 0.9483, "step": 5530 }, { "epoch": 1.5937859608745684, "grad_norm": 1.694322943687439, "learning_rate": 0.0019681242807825087, "loss": 0.9964, "step": 5540 }, { "epoch": 1.596662830840046, "grad_norm": 2.4150028228759766, "learning_rate": 0.0019680667433831993, "loss": 1.0782, "step": 5550 }, { "epoch": 1.5995397008055234, "grad_norm": 1.5449193716049194, "learning_rate": 0.00196800920598389, "loss": 1.3805, "step": 5560 }, { "epoch": 1.6024165707710012, "grad_norm": 1.376084804534912, "learning_rate": 0.00196795166858458, "loss": 0.8734, "step": 5570 }, { "epoch": 1.6052934407364787, "grad_norm": 2.0378100872039795, "learning_rate": 0.0019678941311852705, "loss": 0.9161, "step": 5580 }, { "epoch": 1.6081703107019563, "grad_norm": 0.748805046081543, "learning_rate": 0.0019678365937859607, "loss": 1.106, "step": 5590 }, { "epoch": 1.6110471806674338, "grad_norm": 0.7187327742576599, "learning_rate": 0.0019677790563866512, "loss": 0.9669, "step": 5600 }, { "epoch": 1.6139240506329116, "grad_norm": 1.2659200429916382, "learning_rate": 0.001967721518987342, "loss": 1.1205, "step": 5610 }, { "epoch": 1.616800920598389, "grad_norm": 0.9723079800605774, "learning_rate": 0.0019676639815880323, "loss": 1.0028, "step": 5620 }, { "epoch": 1.6196777905638666, "grad_norm": 2.0927560329437256, "learning_rate": 0.001967606444188723, "loss": 0.7559, "step": 5630 }, { "epoch": 1.6225546605293442, "grad_norm": 1.6257706880569458, "learning_rate": 0.001967548906789413, "loss": 0.8471, "step": 5640 }, { "epoch": 1.6254315304948217, "grad_norm": 1.927612543106079, "learning_rate": 0.0019674913693901036, "loss": 0.9671, "step": 5650 }, { "epoch": 1.6283084004602992, "grad_norm": 1.2510167360305786, "learning_rate": 0.001967433831990794, "loss": 0.9332, "step": 5660 }, { "epoch": 1.6311852704257768, "grad_norm": 1.7062827348709106, "learning_rate": 0.0019673762945914847, "loss": 0.8595, "step": 5670 }, { "epoch": 1.6340621403912543, "grad_norm": 0.9790545701980591, "learning_rate": 0.001967318757192175, "loss": 1.1493, "step": 5680 }, { "epoch": 1.6369390103567318, "grad_norm": 1.377563238143921, "learning_rate": 0.0019672612197928654, "loss": 0.9663, "step": 5690 }, { "epoch": 1.6398158803222094, "grad_norm": 1.4733402729034424, "learning_rate": 0.001967203682393556, "loss": 0.8227, "step": 5700 }, { "epoch": 1.642692750287687, "grad_norm": 1.5283524990081787, "learning_rate": 0.001967146144994246, "loss": 0.9254, "step": 5710 }, { "epoch": 1.6455696202531644, "grad_norm": 1.9953604936599731, "learning_rate": 0.0019670886075949367, "loss": 0.9818, "step": 5720 }, { "epoch": 1.648446490218642, "grad_norm": 1.3435944318771362, "learning_rate": 0.0019670310701956273, "loss": 1.0041, "step": 5730 }, { "epoch": 1.6513233601841195, "grad_norm": 1.0958802700042725, "learning_rate": 0.001966973532796318, "loss": 0.8462, "step": 5740 }, { "epoch": 1.6542002301495973, "grad_norm": 0.8559074997901917, "learning_rate": 0.0019669159953970084, "loss": 0.942, "step": 5750 }, { "epoch": 1.6570771001150748, "grad_norm": 0.9003471732139587, "learning_rate": 0.0019668584579976985, "loss": 1.0694, "step": 5760 }, { "epoch": 1.6599539700805523, "grad_norm": 1.7816433906555176, "learning_rate": 0.001966800920598389, "loss": 1.0329, "step": 5770 }, { "epoch": 1.6628308400460299, "grad_norm": 2.1422488689422607, "learning_rate": 0.0019667433831990796, "loss": 1.003, "step": 5780 }, { "epoch": 1.6657077100115076, "grad_norm": 1.2588142156600952, "learning_rate": 0.0019666858457997698, "loss": 1.1338, "step": 5790 }, { "epoch": 1.6685845799769852, "grad_norm": 1.2118977308273315, "learning_rate": 0.0019666283084004603, "loss": 0.8802, "step": 5800 }, { "epoch": 1.6714614499424627, "grad_norm": 1.257858157157898, "learning_rate": 0.001966570771001151, "loss": 0.9912, "step": 5810 }, { "epoch": 1.6743383199079402, "grad_norm": 1.529067039489746, "learning_rate": 0.001966513233601841, "loss": 1.0501, "step": 5820 }, { "epoch": 1.6772151898734178, "grad_norm": 1.0805238485336304, "learning_rate": 0.0019664556962025316, "loss": 0.7604, "step": 5830 }, { "epoch": 1.6800920598388953, "grad_norm": 1.2759897708892822, "learning_rate": 0.001966398158803222, "loss": 0.9399, "step": 5840 }, { "epoch": 1.6829689298043728, "grad_norm": 1.7296700477600098, "learning_rate": 0.0019663406214039127, "loss": 0.865, "step": 5850 }, { "epoch": 1.6858457997698504, "grad_norm": 1.113838791847229, "learning_rate": 0.0019662830840046033, "loss": 0.9835, "step": 5860 }, { "epoch": 1.688722669735328, "grad_norm": 2.4163668155670166, "learning_rate": 0.0019662255466052934, "loss": 1.016, "step": 5870 }, { "epoch": 1.6915995397008055, "grad_norm": 1.3507329225540161, "learning_rate": 0.001966168009205984, "loss": 0.9529, "step": 5880 }, { "epoch": 1.694476409666283, "grad_norm": 1.8439642190933228, "learning_rate": 0.0019661104718066745, "loss": 0.9984, "step": 5890 }, { "epoch": 1.6973532796317605, "grad_norm": 0.8246824741363525, "learning_rate": 0.0019660529344073647, "loss": 1.0383, "step": 5900 }, { "epoch": 1.700230149597238, "grad_norm": 1.2345151901245117, "learning_rate": 0.0019659953970080552, "loss": 0.9222, "step": 5910 }, { "epoch": 1.7031070195627158, "grad_norm": 1.0021382570266724, "learning_rate": 0.001965937859608746, "loss": 1.0414, "step": 5920 }, { "epoch": 1.7059838895281934, "grad_norm": 0.8158469796180725, "learning_rate": 0.001965880322209436, "loss": 0.9634, "step": 5930 }, { "epoch": 1.7088607594936709, "grad_norm": 2.2509491443634033, "learning_rate": 0.0019658227848101265, "loss": 1.2325, "step": 5940 }, { "epoch": 1.7117376294591484, "grad_norm": 1.0486177206039429, "learning_rate": 0.001965765247410817, "loss": 1.0711, "step": 5950 }, { "epoch": 1.714614499424626, "grad_norm": 1.0958157777786255, "learning_rate": 0.0019657077100115076, "loss": 0.9252, "step": 5960 }, { "epoch": 1.7174913693901037, "grad_norm": 1.9800505638122559, "learning_rate": 0.001965650172612198, "loss": 1.0058, "step": 5970 }, { "epoch": 1.7203682393555813, "grad_norm": 1.424269437789917, "learning_rate": 0.0019655926352128883, "loss": 1.0238, "step": 5980 }, { "epoch": 1.7232451093210588, "grad_norm": 1.0805635452270508, "learning_rate": 0.001965535097813579, "loss": 0.9598, "step": 5990 }, { "epoch": 1.7261219792865363, "grad_norm": 1.8274463415145874, "learning_rate": 0.0019654775604142694, "loss": 1.1278, "step": 6000 }, { "epoch": 1.7289988492520139, "grad_norm": 1.6597094535827637, "learning_rate": 0.0019654200230149596, "loss": 0.853, "step": 6010 }, { "epoch": 1.7318757192174914, "grad_norm": 1.2848405838012695, "learning_rate": 0.00196536248561565, "loss": 0.864, "step": 6020 }, { "epoch": 1.734752589182969, "grad_norm": 1.196657657623291, "learning_rate": 0.0019653049482163407, "loss": 1.0856, "step": 6030 }, { "epoch": 1.7376294591484465, "grad_norm": 1.6231333017349243, "learning_rate": 0.0019652474108170313, "loss": 0.8153, "step": 6040 }, { "epoch": 1.740506329113924, "grad_norm": 1.8734976053237915, "learning_rate": 0.0019651898734177214, "loss": 0.9965, "step": 6050 }, { "epoch": 1.7433831990794015, "grad_norm": 1.4256383180618286, "learning_rate": 0.001965132336018412, "loss": 1.1432, "step": 6060 }, { "epoch": 1.746260069044879, "grad_norm": 1.014739990234375, "learning_rate": 0.0019650747986191025, "loss": 0.8431, "step": 6070 }, { "epoch": 1.7491369390103566, "grad_norm": 1.233275294303894, "learning_rate": 0.001965017261219793, "loss": 1.2006, "step": 6080 }, { "epoch": 1.7520138089758341, "grad_norm": 2.9766714572906494, "learning_rate": 0.001964959723820483, "loss": 0.8519, "step": 6090 }, { "epoch": 1.754890678941312, "grad_norm": 1.3367804288864136, "learning_rate": 0.0019649021864211738, "loss": 0.9656, "step": 6100 }, { "epoch": 1.7577675489067894, "grad_norm": 2.772707462310791, "learning_rate": 0.0019648446490218643, "loss": 1.0743, "step": 6110 }, { "epoch": 1.760644418872267, "grad_norm": 1.319153070449829, "learning_rate": 0.0019647871116225545, "loss": 1.0967, "step": 6120 }, { "epoch": 1.7635212888377445, "grad_norm": 1.538698434829712, "learning_rate": 0.001964729574223245, "loss": 0.8955, "step": 6130 }, { "epoch": 1.766398158803222, "grad_norm": 1.7634150981903076, "learning_rate": 0.0019646720368239356, "loss": 1.0108, "step": 6140 }, { "epoch": 1.7692750287686998, "grad_norm": 0.8294065594673157, "learning_rate": 0.001964614499424626, "loss": 0.9295, "step": 6150 }, { "epoch": 1.7721518987341773, "grad_norm": 0.9945739507675171, "learning_rate": 0.0019645569620253167, "loss": 0.7457, "step": 6160 }, { "epoch": 1.7750287686996549, "grad_norm": 0.7011457085609436, "learning_rate": 0.001964499424626007, "loss": 0.7021, "step": 6170 }, { "epoch": 1.7779056386651324, "grad_norm": 0.9480445981025696, "learning_rate": 0.0019644418872266974, "loss": 0.8853, "step": 6180 }, { "epoch": 1.78078250863061, "grad_norm": 0.8187589049339294, "learning_rate": 0.001964384349827388, "loss": 0.9493, "step": 6190 }, { "epoch": 1.7836593785960875, "grad_norm": 0.9639753103256226, "learning_rate": 0.001964326812428078, "loss": 0.9574, "step": 6200 }, { "epoch": 1.786536248561565, "grad_norm": 1.9420804977416992, "learning_rate": 0.0019642692750287687, "loss": 1.2418, "step": 6210 }, { "epoch": 1.7894131185270425, "grad_norm": 0.8306291699409485, "learning_rate": 0.0019642117376294592, "loss": 0.8075, "step": 6220 }, { "epoch": 1.79228998849252, "grad_norm": 1.2441740036010742, "learning_rate": 0.0019641542002301494, "loss": 1.0424, "step": 6230 }, { "epoch": 1.7951668584579976, "grad_norm": 2.5973711013793945, "learning_rate": 0.00196409666283084, "loss": 0.9699, "step": 6240 }, { "epoch": 1.7980437284234752, "grad_norm": 1.4608010053634644, "learning_rate": 0.0019640391254315305, "loss": 0.8701, "step": 6250 }, { "epoch": 1.8009205983889527, "grad_norm": 0.8442615270614624, "learning_rate": 0.001963981588032221, "loss": 0.9272, "step": 6260 }, { "epoch": 1.8037974683544302, "grad_norm": 1.0396840572357178, "learning_rate": 0.0019639240506329116, "loss": 0.9025, "step": 6270 }, { "epoch": 1.806674338319908, "grad_norm": 2.1565096378326416, "learning_rate": 0.001963866513233602, "loss": 0.885, "step": 6280 }, { "epoch": 1.8095512082853855, "grad_norm": 1.1060386896133423, "learning_rate": 0.0019638089758342923, "loss": 0.8444, "step": 6290 }, { "epoch": 1.812428078250863, "grad_norm": 0.9115983247756958, "learning_rate": 0.001963751438434983, "loss": 0.9932, "step": 6300 }, { "epoch": 1.8153049482163406, "grad_norm": 0.9901440739631653, "learning_rate": 0.001963693901035673, "loss": 1.0261, "step": 6310 }, { "epoch": 1.8181818181818183, "grad_norm": 1.4480090141296387, "learning_rate": 0.0019636363636363636, "loss": 1.1321, "step": 6320 }, { "epoch": 1.8210586881472959, "grad_norm": 1.0342729091644287, "learning_rate": 0.001963578826237054, "loss": 1.0014, "step": 6330 }, { "epoch": 1.8239355581127734, "grad_norm": 1.4175995588302612, "learning_rate": 0.0019635212888377447, "loss": 1.1237, "step": 6340 }, { "epoch": 1.826812428078251, "grad_norm": 1.2950087785720825, "learning_rate": 0.001963463751438435, "loss": 1.173, "step": 6350 }, { "epoch": 1.8296892980437285, "grad_norm": 1.2976531982421875, "learning_rate": 0.0019634062140391254, "loss": 0.8536, "step": 6360 }, { "epoch": 1.832566168009206, "grad_norm": 1.513678789138794, "learning_rate": 0.001963348676639816, "loss": 1.1871, "step": 6370 }, { "epoch": 1.8354430379746836, "grad_norm": 2.133594512939453, "learning_rate": 0.0019632911392405065, "loss": 1.0422, "step": 6380 }, { "epoch": 1.838319907940161, "grad_norm": 4.111898899078369, "learning_rate": 0.001963233601841197, "loss": 1.0554, "step": 6390 }, { "epoch": 1.8411967779056386, "grad_norm": 1.3178684711456299, "learning_rate": 0.0019631760644418872, "loss": 1.0886, "step": 6400 }, { "epoch": 1.8440736478711162, "grad_norm": 1.2806378602981567, "learning_rate": 0.001963118527042578, "loss": 0.9394, "step": 6410 }, { "epoch": 1.8469505178365937, "grad_norm": 0.9576355814933777, "learning_rate": 0.001963060989643268, "loss": 0.8754, "step": 6420 }, { "epoch": 1.8498273878020712, "grad_norm": 1.0051460266113281, "learning_rate": 0.0019630034522439585, "loss": 0.9167, "step": 6430 }, { "epoch": 1.8527042577675488, "grad_norm": 1.3363070487976074, "learning_rate": 0.001962945914844649, "loss": 1.0938, "step": 6440 }, { "epoch": 1.8555811277330263, "grad_norm": 1.0241373777389526, "learning_rate": 0.0019628883774453396, "loss": 0.9522, "step": 6450 }, { "epoch": 1.858457997698504, "grad_norm": 1.478050947189331, "learning_rate": 0.00196283084004603, "loss": 1.0155, "step": 6460 }, { "epoch": 1.8613348676639816, "grad_norm": 1.7375435829162598, "learning_rate": 0.0019627733026467203, "loss": 1.0942, "step": 6470 }, { "epoch": 1.8642117376294591, "grad_norm": 1.2981493473052979, "learning_rate": 0.001962715765247411, "loss": 0.9188, "step": 6480 }, { "epoch": 1.8670886075949367, "grad_norm": 1.366586446762085, "learning_rate": 0.0019626582278481014, "loss": 1.1003, "step": 6490 }, { "epoch": 1.8699654775604144, "grad_norm": 1.0113660097122192, "learning_rate": 0.001962600690448792, "loss": 0.8451, "step": 6500 }, { "epoch": 1.872842347525892, "grad_norm": 1.8289450407028198, "learning_rate": 0.001962543153049482, "loss": 0.9642, "step": 6510 }, { "epoch": 1.8757192174913695, "grad_norm": 1.1142241954803467, "learning_rate": 0.0019624856156501727, "loss": 0.9683, "step": 6520 }, { "epoch": 1.878596087456847, "grad_norm": 0.7744626402854919, "learning_rate": 0.001962428078250863, "loss": 0.9278, "step": 6530 }, { "epoch": 1.8814729574223246, "grad_norm": 1.0185136795043945, "learning_rate": 0.0019623705408515534, "loss": 0.9568, "step": 6540 }, { "epoch": 1.884349827387802, "grad_norm": 1.9642219543457031, "learning_rate": 0.001962313003452244, "loss": 1.2604, "step": 6550 }, { "epoch": 1.8872266973532796, "grad_norm": 0.9570397138595581, "learning_rate": 0.0019622554660529345, "loss": 0.8827, "step": 6560 }, { "epoch": 1.8901035673187572, "grad_norm": 1.0775362253189087, "learning_rate": 0.001962197928653625, "loss": 1.187, "step": 6570 }, { "epoch": 1.8929804372842347, "grad_norm": 1.8182833194732666, "learning_rate": 0.0019621403912543156, "loss": 0.8504, "step": 6580 }, { "epoch": 1.8958573072497122, "grad_norm": 1.4623595476150513, "learning_rate": 0.0019620828538550058, "loss": 0.9719, "step": 6590 }, { "epoch": 1.8987341772151898, "grad_norm": 1.3596614599227905, "learning_rate": 0.0019620253164556963, "loss": 0.7904, "step": 6600 }, { "epoch": 1.9016110471806673, "grad_norm": 1.1858710050582886, "learning_rate": 0.001961967779056387, "loss": 1.1603, "step": 6610 }, { "epoch": 1.9044879171461448, "grad_norm": 1.8223772048950195, "learning_rate": 0.001961910241657077, "loss": 1.0339, "step": 6620 }, { "epoch": 1.9073647871116226, "grad_norm": 1.1996177434921265, "learning_rate": 0.0019618527042577676, "loss": 1.0542, "step": 6630 }, { "epoch": 1.9102416570771001, "grad_norm": 1.001421570777893, "learning_rate": 0.0019617951668584577, "loss": 0.7553, "step": 6640 }, { "epoch": 1.9131185270425777, "grad_norm": 1.0026689767837524, "learning_rate": 0.0019617376294591483, "loss": 0.9952, "step": 6650 }, { "epoch": 1.9159953970080552, "grad_norm": 1.8716826438903809, "learning_rate": 0.001961680092059839, "loss": 1.0894, "step": 6660 }, { "epoch": 1.9188722669735327, "grad_norm": 1.1237506866455078, "learning_rate": 0.0019616225546605294, "loss": 0.8939, "step": 6670 }, { "epoch": 1.9217491369390105, "grad_norm": 1.2003110647201538, "learning_rate": 0.00196156501726122, "loss": 1.0153, "step": 6680 }, { "epoch": 1.924626006904488, "grad_norm": 1.6478465795516968, "learning_rate": 0.0019615074798619105, "loss": 1.1918, "step": 6690 }, { "epoch": 1.9275028768699656, "grad_norm": 0.9771629571914673, "learning_rate": 0.0019614499424626007, "loss": 0.9274, "step": 6700 }, { "epoch": 1.9303797468354431, "grad_norm": 1.73612642288208, "learning_rate": 0.0019613924050632912, "loss": 0.9238, "step": 6710 }, { "epoch": 1.9332566168009206, "grad_norm": 1.7791178226470947, "learning_rate": 0.001961334867663982, "loss": 0.818, "step": 6720 }, { "epoch": 1.9361334867663982, "grad_norm": 0.8848963379859924, "learning_rate": 0.001961277330264672, "loss": 1.114, "step": 6730 }, { "epoch": 1.9390103567318757, "grad_norm": 1.0899040699005127, "learning_rate": 0.0019612197928653625, "loss": 0.8894, "step": 6740 }, { "epoch": 1.9418872266973533, "grad_norm": 1.245506763458252, "learning_rate": 0.001961162255466053, "loss": 0.8312, "step": 6750 }, { "epoch": 1.9447640966628308, "grad_norm": 1.9296483993530273, "learning_rate": 0.001961104718066743, "loss": 0.9443, "step": 6760 }, { "epoch": 1.9476409666283083, "grad_norm": 1.2338852882385254, "learning_rate": 0.0019610471806674338, "loss": 0.9879, "step": 6770 }, { "epoch": 1.9505178365937859, "grad_norm": 1.284226417541504, "learning_rate": 0.0019609896432681243, "loss": 0.8974, "step": 6780 }, { "epoch": 1.9533947065592634, "grad_norm": 1.6487181186676025, "learning_rate": 0.001960932105868815, "loss": 0.9208, "step": 6790 }, { "epoch": 1.956271576524741, "grad_norm": 1.8867918252944946, "learning_rate": 0.0019608745684695054, "loss": 0.9106, "step": 6800 }, { "epoch": 1.9591484464902187, "grad_norm": 1.4253908395767212, "learning_rate": 0.0019608170310701956, "loss": 0.8954, "step": 6810 }, { "epoch": 1.9620253164556962, "grad_norm": 0.9619266986846924, "learning_rate": 0.001960759493670886, "loss": 0.9369, "step": 6820 }, { "epoch": 1.9649021864211738, "grad_norm": 1.170180082321167, "learning_rate": 0.0019607019562715767, "loss": 0.8892, "step": 6830 }, { "epoch": 1.9677790563866513, "grad_norm": 0.8833518028259277, "learning_rate": 0.001960644418872267, "loss": 0.7976, "step": 6840 }, { "epoch": 1.9706559263521288, "grad_norm": 1.2135647535324097, "learning_rate": 0.0019605868814729574, "loss": 1.1288, "step": 6850 }, { "epoch": 1.9735327963176066, "grad_norm": 1.5933599472045898, "learning_rate": 0.001960529344073648, "loss": 0.9854, "step": 6860 }, { "epoch": 1.9764096662830841, "grad_norm": 1.8876241445541382, "learning_rate": 0.0019604718066743385, "loss": 1.121, "step": 6870 }, { "epoch": 1.9792865362485617, "grad_norm": 1.6394834518432617, "learning_rate": 0.0019604142692750287, "loss": 0.9213, "step": 6880 }, { "epoch": 1.9821634062140392, "grad_norm": 1.0560781955718994, "learning_rate": 0.0019603567318757192, "loss": 0.9307, "step": 6890 }, { "epoch": 1.9850402761795167, "grad_norm": 2.484130382537842, "learning_rate": 0.00196029919447641, "loss": 0.9312, "step": 6900 }, { "epoch": 1.9879171461449943, "grad_norm": 0.9542770981788635, "learning_rate": 0.0019602416570771003, "loss": 0.9019, "step": 6910 }, { "epoch": 1.9907940161104718, "grad_norm": 1.0500613451004028, "learning_rate": 0.0019601841196777905, "loss": 0.9909, "step": 6920 }, { "epoch": 1.9936708860759493, "grad_norm": 1.5530672073364258, "learning_rate": 0.001960126582278481, "loss": 1.0028, "step": 6930 }, { "epoch": 1.9965477560414269, "grad_norm": 1.2526204586029053, "learning_rate": 0.0019600690448791716, "loss": 0.9323, "step": 6940 }, { "epoch": 1.9994246260069044, "grad_norm": 1.0207527875900269, "learning_rate": 0.0019600115074798617, "loss": 0.8718, "step": 6950 }, { "epoch": 2.002301495972382, "grad_norm": 1.2006338834762573, "learning_rate": 0.0019599539700805523, "loss": 0.7317, "step": 6960 }, { "epoch": 2.0051783659378595, "grad_norm": 1.0252270698547363, "learning_rate": 0.001959896432681243, "loss": 0.8728, "step": 6970 }, { "epoch": 2.008055235903337, "grad_norm": 0.9490408897399902, "learning_rate": 0.0019598388952819334, "loss": 0.9126, "step": 6980 }, { "epoch": 2.0109321058688145, "grad_norm": 3.008258819580078, "learning_rate": 0.001959781357882624, "loss": 0.8858, "step": 6990 }, { "epoch": 2.013808975834292, "grad_norm": 1.3778491020202637, "learning_rate": 0.001959723820483314, "loss": 0.8736, "step": 7000 }, { "epoch": 2.01668584579977, "grad_norm": 1.8759028911590576, "learning_rate": 0.0019596662830840047, "loss": 0.6675, "step": 7010 }, { "epoch": 2.0195627157652476, "grad_norm": 1.5899657011032104, "learning_rate": 0.0019596087456846953, "loss": 0.8471, "step": 7020 }, { "epoch": 2.022439585730725, "grad_norm": 1.2791998386383057, "learning_rate": 0.0019595512082853854, "loss": 0.795, "step": 7030 }, { "epoch": 2.0253164556962027, "grad_norm": 1.6339887380599976, "learning_rate": 0.001959493670886076, "loss": 0.7431, "step": 7040 }, { "epoch": 2.02819332566168, "grad_norm": 1.1396450996398926, "learning_rate": 0.0019594361334867665, "loss": 0.94, "step": 7050 }, { "epoch": 2.0310701956271577, "grad_norm": 1.6637332439422607, "learning_rate": 0.0019593785960874566, "loss": 0.9133, "step": 7060 }, { "epoch": 2.0339470655926353, "grad_norm": 1.2948411703109741, "learning_rate": 0.001959321058688147, "loss": 1.0848, "step": 7070 }, { "epoch": 2.036823935558113, "grad_norm": 0.8659104108810425, "learning_rate": 0.0019592635212888378, "loss": 0.8465, "step": 7080 }, { "epoch": 2.0397008055235903, "grad_norm": 4.163944721221924, "learning_rate": 0.0019592059838895283, "loss": 0.7752, "step": 7090 }, { "epoch": 2.042577675489068, "grad_norm": 1.2813076972961426, "learning_rate": 0.001959148446490219, "loss": 0.7602, "step": 7100 }, { "epoch": 2.0454545454545454, "grad_norm": 1.461391568183899, "learning_rate": 0.001959090909090909, "loss": 0.8799, "step": 7110 }, { "epoch": 2.048331415420023, "grad_norm": 1.5577610731124878, "learning_rate": 0.0019590333716915996, "loss": 0.8732, "step": 7120 }, { "epoch": 2.0512082853855005, "grad_norm": 2.005366563796997, "learning_rate": 0.00195897583429229, "loss": 0.8927, "step": 7130 }, { "epoch": 2.054085155350978, "grad_norm": 1.5695126056671143, "learning_rate": 0.0019589182968929803, "loss": 1.0748, "step": 7140 }, { "epoch": 2.0569620253164556, "grad_norm": 1.0716651678085327, "learning_rate": 0.001958860759493671, "loss": 0.8507, "step": 7150 }, { "epoch": 2.059838895281933, "grad_norm": 1.0001213550567627, "learning_rate": 0.0019588032220943614, "loss": 0.6663, "step": 7160 }, { "epoch": 2.0627157652474106, "grad_norm": 1.1310083866119385, "learning_rate": 0.001958745684695052, "loss": 0.8159, "step": 7170 }, { "epoch": 2.065592635212888, "grad_norm": 1.7745803594589233, "learning_rate": 0.001958688147295742, "loss": 0.9331, "step": 7180 }, { "epoch": 2.068469505178366, "grad_norm": 0.6914129853248596, "learning_rate": 0.0019586306098964327, "loss": 0.7979, "step": 7190 }, { "epoch": 2.0713463751438437, "grad_norm": 0.7069788575172424, "learning_rate": 0.0019585730724971232, "loss": 0.9563, "step": 7200 }, { "epoch": 2.074223245109321, "grad_norm": 1.4323127269744873, "learning_rate": 0.001958515535097814, "loss": 0.922, "step": 7210 }, { "epoch": 2.0771001150747987, "grad_norm": 1.0945005416870117, "learning_rate": 0.001958457997698504, "loss": 0.9676, "step": 7220 }, { "epoch": 2.0799769850402763, "grad_norm": 1.2511271238327026, "learning_rate": 0.0019584004602991945, "loss": 0.8463, "step": 7230 }, { "epoch": 2.082853855005754, "grad_norm": 1.4378536939620972, "learning_rate": 0.001958342922899885, "loss": 0.9271, "step": 7240 }, { "epoch": 2.0857307249712314, "grad_norm": 1.2770190238952637, "learning_rate": 0.001958285385500575, "loss": 1.0115, "step": 7250 }, { "epoch": 2.088607594936709, "grad_norm": 1.8121916055679321, "learning_rate": 0.0019582278481012657, "loss": 0.9031, "step": 7260 }, { "epoch": 2.0914844649021864, "grad_norm": 1.3418368101119995, "learning_rate": 0.0019581703107019563, "loss": 1.0234, "step": 7270 }, { "epoch": 2.094361334867664, "grad_norm": 1.6501801013946533, "learning_rate": 0.001958112773302647, "loss": 1.0748, "step": 7280 }, { "epoch": 2.0972382048331415, "grad_norm": 1.0035535097122192, "learning_rate": 0.0019580552359033374, "loss": 0.7984, "step": 7290 }, { "epoch": 2.100115074798619, "grad_norm": 1.105301856994629, "learning_rate": 0.0019579976985040276, "loss": 0.8317, "step": 7300 }, { "epoch": 2.1029919447640966, "grad_norm": 1.2318569421768188, "learning_rate": 0.001957940161104718, "loss": 0.8027, "step": 7310 }, { "epoch": 2.105868814729574, "grad_norm": 1.922943115234375, "learning_rate": 0.0019578826237054087, "loss": 1.0012, "step": 7320 }, { "epoch": 2.1087456846950516, "grad_norm": 1.387296438217163, "learning_rate": 0.001957825086306099, "loss": 0.9644, "step": 7330 }, { "epoch": 2.111622554660529, "grad_norm": 3.0226733684539795, "learning_rate": 0.0019577675489067894, "loss": 1.1553, "step": 7340 }, { "epoch": 2.1144994246260067, "grad_norm": 1.8396462202072144, "learning_rate": 0.00195771001150748, "loss": 0.92, "step": 7350 }, { "epoch": 2.1173762945914847, "grad_norm": 1.972549557685852, "learning_rate": 0.00195765247410817, "loss": 0.9262, "step": 7360 }, { "epoch": 2.1202531645569622, "grad_norm": 0.8375453948974609, "learning_rate": 0.0019575949367088607, "loss": 0.7595, "step": 7370 }, { "epoch": 2.1231300345224398, "grad_norm": 1.1547627449035645, "learning_rate": 0.001957537399309551, "loss": 0.8037, "step": 7380 }, { "epoch": 2.1260069044879173, "grad_norm": 0.964600145816803, "learning_rate": 0.0019574798619102418, "loss": 0.9332, "step": 7390 }, { "epoch": 2.128883774453395, "grad_norm": 1.3696529865264893, "learning_rate": 0.0019574223245109323, "loss": 1.0023, "step": 7400 }, { "epoch": 2.1317606444188724, "grad_norm": 2.2802999019622803, "learning_rate": 0.001957364787111623, "loss": 0.8031, "step": 7410 }, { "epoch": 2.13463751438435, "grad_norm": 4.2220258712768555, "learning_rate": 0.001957307249712313, "loss": 0.7257, "step": 7420 }, { "epoch": 2.1375143843498274, "grad_norm": 1.352393627166748, "learning_rate": 0.0019572497123130036, "loss": 0.8273, "step": 7430 }, { "epoch": 2.140391254315305, "grad_norm": 1.0505796670913696, "learning_rate": 0.0019571921749136937, "loss": 0.903, "step": 7440 }, { "epoch": 2.1432681242807825, "grad_norm": 1.7660630941390991, "learning_rate": 0.0019571346375143843, "loss": 0.8703, "step": 7450 }, { "epoch": 2.14614499424626, "grad_norm": 1.1148422956466675, "learning_rate": 0.001957077100115075, "loss": 0.8907, "step": 7460 }, { "epoch": 2.1490218642117376, "grad_norm": 3.4447412490844727, "learning_rate": 0.001957019562715765, "loss": 1.1235, "step": 7470 }, { "epoch": 2.151898734177215, "grad_norm": 1.3223644495010376, "learning_rate": 0.0019569620253164556, "loss": 0.9713, "step": 7480 }, { "epoch": 2.1547756041426926, "grad_norm": 0.9210221767425537, "learning_rate": 0.001956904487917146, "loss": 0.8428, "step": 7490 }, { "epoch": 2.15765247410817, "grad_norm": 1.2732173204421997, "learning_rate": 0.0019568469505178367, "loss": 0.867, "step": 7500 }, { "epoch": 2.1605293440736477, "grad_norm": 1.870651125907898, "learning_rate": 0.0019567894131185272, "loss": 1.0539, "step": 7510 }, { "epoch": 2.1634062140391253, "grad_norm": 1.4687660932540894, "learning_rate": 0.001956731875719218, "loss": 0.9516, "step": 7520 }, { "epoch": 2.166283084004603, "grad_norm": 1.3182060718536377, "learning_rate": 0.001956674338319908, "loss": 1.0015, "step": 7530 }, { "epoch": 2.1691599539700803, "grad_norm": 1.8979246616363525, "learning_rate": 0.0019566168009205985, "loss": 0.8024, "step": 7540 }, { "epoch": 2.1720368239355583, "grad_norm": 1.9328176975250244, "learning_rate": 0.0019565592635212886, "loss": 0.8093, "step": 7550 }, { "epoch": 2.174913693901036, "grad_norm": 1.6875832080841064, "learning_rate": 0.001956501726121979, "loss": 0.9317, "step": 7560 }, { "epoch": 2.1777905638665134, "grad_norm": 1.5850751399993896, "learning_rate": 0.0019564441887226698, "loss": 0.8693, "step": 7570 }, { "epoch": 2.180667433831991, "grad_norm": 1.0952749252319336, "learning_rate": 0.0019563866513233603, "loss": 0.8966, "step": 7580 }, { "epoch": 2.1835443037974684, "grad_norm": 0.8979012370109558, "learning_rate": 0.0019563291139240505, "loss": 1.0452, "step": 7590 }, { "epoch": 2.186421173762946, "grad_norm": 1.7960313558578491, "learning_rate": 0.001956271576524741, "loss": 0.7054, "step": 7600 }, { "epoch": 2.1892980437284235, "grad_norm": 1.2270758152008057, "learning_rate": 0.0019562140391254316, "loss": 1.0169, "step": 7610 }, { "epoch": 2.192174913693901, "grad_norm": 0.7222998738288879, "learning_rate": 0.001956156501726122, "loss": 0.9439, "step": 7620 }, { "epoch": 2.1950517836593786, "grad_norm": 1.1007078886032104, "learning_rate": 0.0019560989643268127, "loss": 0.9163, "step": 7630 }, { "epoch": 2.197928653624856, "grad_norm": 1.577715516090393, "learning_rate": 0.001956041426927503, "loss": 1.0526, "step": 7640 }, { "epoch": 2.2008055235903337, "grad_norm": 1.5268994569778442, "learning_rate": 0.0019559838895281934, "loss": 0.9789, "step": 7650 }, { "epoch": 2.203682393555811, "grad_norm": 1.3567746877670288, "learning_rate": 0.001955926352128884, "loss": 0.9476, "step": 7660 }, { "epoch": 2.2065592635212887, "grad_norm": 1.064050555229187, "learning_rate": 0.001955868814729574, "loss": 0.8664, "step": 7670 }, { "epoch": 2.2094361334867663, "grad_norm": 1.3304646015167236, "learning_rate": 0.0019558112773302647, "loss": 0.7896, "step": 7680 }, { "epoch": 2.212313003452244, "grad_norm": 1.5740517377853394, "learning_rate": 0.0019557537399309552, "loss": 0.8391, "step": 7690 }, { "epoch": 2.2151898734177213, "grad_norm": 0.9226892590522766, "learning_rate": 0.001955696202531646, "loss": 0.858, "step": 7700 }, { "epoch": 2.218066743383199, "grad_norm": 1.012349009513855, "learning_rate": 0.001955638665132336, "loss": 0.8485, "step": 7710 }, { "epoch": 2.220943613348677, "grad_norm": 1.041441798210144, "learning_rate": 0.0019555811277330265, "loss": 1.0249, "step": 7720 }, { "epoch": 2.2238204833141544, "grad_norm": 1.5240931510925293, "learning_rate": 0.001955523590333717, "loss": 0.9126, "step": 7730 }, { "epoch": 2.226697353279632, "grad_norm": 2.401684522628784, "learning_rate": 0.0019554660529344076, "loss": 1.0351, "step": 7740 }, { "epoch": 2.2295742232451095, "grad_norm": 1.7391464710235596, "learning_rate": 0.0019554085155350977, "loss": 0.9616, "step": 7750 }, { "epoch": 2.232451093210587, "grad_norm": 1.2305257320404053, "learning_rate": 0.0019553509781357883, "loss": 1.2042, "step": 7760 }, { "epoch": 2.2353279631760645, "grad_norm": 0.7424312233924866, "learning_rate": 0.001955293440736479, "loss": 0.7953, "step": 7770 }, { "epoch": 2.238204833141542, "grad_norm": 0.9165663123130798, "learning_rate": 0.001955235903337169, "loss": 0.8423, "step": 7780 }, { "epoch": 2.2410817031070196, "grad_norm": 1.1613860130310059, "learning_rate": 0.0019551783659378596, "loss": 0.8637, "step": 7790 }, { "epoch": 2.243958573072497, "grad_norm": 1.8664753437042236, "learning_rate": 0.00195512082853855, "loss": 0.9206, "step": 7800 }, { "epoch": 2.2468354430379747, "grad_norm": 1.171364426612854, "learning_rate": 0.0019550632911392407, "loss": 0.9488, "step": 7810 }, { "epoch": 2.249712313003452, "grad_norm": 2.309968948364258, "learning_rate": 0.0019550057537399313, "loss": 0.8201, "step": 7820 }, { "epoch": 2.2525891829689297, "grad_norm": 0.9490132331848145, "learning_rate": 0.0019549482163406214, "loss": 0.7772, "step": 7830 }, { "epoch": 2.2554660529344073, "grad_norm": 1.0413613319396973, "learning_rate": 0.001954890678941312, "loss": 0.9519, "step": 7840 }, { "epoch": 2.258342922899885, "grad_norm": 1.1290113925933838, "learning_rate": 0.0019548331415420025, "loss": 0.8574, "step": 7850 }, { "epoch": 2.2612197928653623, "grad_norm": 1.8768174648284912, "learning_rate": 0.0019547756041426926, "loss": 0.9971, "step": 7860 }, { "epoch": 2.26409666283084, "grad_norm": 1.0770388841629028, "learning_rate": 0.001954718066743383, "loss": 0.7906, "step": 7870 }, { "epoch": 2.2669735327963174, "grad_norm": 1.644849181175232, "learning_rate": 0.0019546605293440738, "loss": 0.9238, "step": 7880 }, { "epoch": 2.2698504027617954, "grad_norm": 1.2938050031661987, "learning_rate": 0.001954602991944764, "loss": 0.88, "step": 7890 }, { "epoch": 2.2727272727272725, "grad_norm": 0.583979606628418, "learning_rate": 0.0019545454545454545, "loss": 1.0036, "step": 7900 }, { "epoch": 2.2756041426927505, "grad_norm": 2.43570876121521, "learning_rate": 0.001954487917146145, "loss": 0.9871, "step": 7910 }, { "epoch": 2.278481012658228, "grad_norm": 0.7680619955062866, "learning_rate": 0.0019544303797468356, "loss": 1.2062, "step": 7920 }, { "epoch": 2.2813578826237055, "grad_norm": 1.055029273033142, "learning_rate": 0.001954372842347526, "loss": 0.9753, "step": 7930 }, { "epoch": 2.284234752589183, "grad_norm": 2.548293113708496, "learning_rate": 0.0019543153049482163, "loss": 0.9638, "step": 7940 }, { "epoch": 2.2871116225546606, "grad_norm": 1.1223955154418945, "learning_rate": 0.001954257767548907, "loss": 1.0733, "step": 7950 }, { "epoch": 2.289988492520138, "grad_norm": 1.4215928316116333, "learning_rate": 0.0019542002301495974, "loss": 0.7711, "step": 7960 }, { "epoch": 2.2928653624856157, "grad_norm": 1.0003468990325928, "learning_rate": 0.0019541426927502875, "loss": 1.0717, "step": 7970 }, { "epoch": 2.295742232451093, "grad_norm": 4.125150680541992, "learning_rate": 0.001954085155350978, "loss": 1.0291, "step": 7980 }, { "epoch": 2.2986191024165707, "grad_norm": 1.7558395862579346, "learning_rate": 0.0019540276179516687, "loss": 0.9463, "step": 7990 }, { "epoch": 2.3014959723820483, "grad_norm": 1.696690320968628, "learning_rate": 0.0019539700805523592, "loss": 0.8639, "step": 8000 }, { "epoch": 2.304372842347526, "grad_norm": 2.2557711601257324, "learning_rate": 0.0019539125431530494, "loss": 1.0849, "step": 8010 }, { "epoch": 2.3072497123130034, "grad_norm": 1.191096305847168, "learning_rate": 0.00195385500575374, "loss": 0.7273, "step": 8020 }, { "epoch": 2.310126582278481, "grad_norm": 1.5635251998901367, "learning_rate": 0.0019537974683544305, "loss": 0.9235, "step": 8030 }, { "epoch": 2.3130034522439584, "grad_norm": 1.5115479230880737, "learning_rate": 0.001953739930955121, "loss": 0.9659, "step": 8040 }, { "epoch": 2.315880322209436, "grad_norm": 1.4599906206130981, "learning_rate": 0.001953682393555811, "loss": 0.7626, "step": 8050 }, { "epoch": 2.318757192174914, "grad_norm": 1.927894949913025, "learning_rate": 0.0019536248561565018, "loss": 1.0131, "step": 8060 }, { "epoch": 2.321634062140391, "grad_norm": 1.2091424465179443, "learning_rate": 0.0019535673187571923, "loss": 0.9488, "step": 8070 }, { "epoch": 2.324510932105869, "grad_norm": 1.1572847366333008, "learning_rate": 0.0019535097813578825, "loss": 1.0628, "step": 8080 }, { "epoch": 2.3273878020713465, "grad_norm": 1.2359932661056519, "learning_rate": 0.001953452243958573, "loss": 0.9884, "step": 8090 }, { "epoch": 2.330264672036824, "grad_norm": 1.1201573610305786, "learning_rate": 0.0019533947065592636, "loss": 0.9085, "step": 8100 }, { "epoch": 2.3331415420023016, "grad_norm": 0.9349533915519714, "learning_rate": 0.001953337169159954, "loss": 0.8863, "step": 8110 }, { "epoch": 2.336018411967779, "grad_norm": 1.2600281238555908, "learning_rate": 0.0019532796317606447, "loss": 1.0072, "step": 8120 }, { "epoch": 2.3388952819332567, "grad_norm": 1.5152078866958618, "learning_rate": 0.001953222094361335, "loss": 1.1875, "step": 8130 }, { "epoch": 2.3417721518987342, "grad_norm": 1.0113236904144287, "learning_rate": 0.0019531645569620254, "loss": 0.8635, "step": 8140 }, { "epoch": 2.3446490218642118, "grad_norm": 0.9207028746604919, "learning_rate": 0.001953107019562716, "loss": 0.8255, "step": 8150 }, { "epoch": 2.3475258918296893, "grad_norm": 1.8572219610214233, "learning_rate": 0.001953049482163406, "loss": 1.0013, "step": 8160 }, { "epoch": 2.350402761795167, "grad_norm": 1.1398611068725586, "learning_rate": 0.0019529919447640967, "loss": 0.7661, "step": 8170 }, { "epoch": 2.3532796317606444, "grad_norm": 1.2987456321716309, "learning_rate": 0.0019529344073647872, "loss": 0.9004, "step": 8180 }, { "epoch": 2.356156501726122, "grad_norm": 2.381772756576538, "learning_rate": 0.0019528768699654776, "loss": 0.877, "step": 8190 }, { "epoch": 2.3590333716915994, "grad_norm": 1.936873435974121, "learning_rate": 0.0019528193325661681, "loss": 1.0258, "step": 8200 }, { "epoch": 2.361910241657077, "grad_norm": 0.9593486189842224, "learning_rate": 0.0019527617951668587, "loss": 0.9407, "step": 8210 }, { "epoch": 2.3647871116225545, "grad_norm": 1.1759594678878784, "learning_rate": 0.0019527042577675488, "loss": 0.877, "step": 8220 }, { "epoch": 2.367663981588032, "grad_norm": 0.9277821183204651, "learning_rate": 0.0019526467203682394, "loss": 1.0286, "step": 8230 }, { "epoch": 2.3705408515535096, "grad_norm": 0.9409446716308594, "learning_rate": 0.00195258918296893, "loss": 0.9259, "step": 8240 }, { "epoch": 2.3734177215189876, "grad_norm": 1.1157596111297607, "learning_rate": 0.0019525316455696203, "loss": 1.1652, "step": 8250 }, { "epoch": 2.376294591484465, "grad_norm": 1.2556533813476562, "learning_rate": 0.0019524741081703109, "loss": 1.0928, "step": 8260 }, { "epoch": 2.3791714614499426, "grad_norm": 1.479189395904541, "learning_rate": 0.001952416570771001, "loss": 0.968, "step": 8270 }, { "epoch": 2.38204833141542, "grad_norm": 1.1129392385482788, "learning_rate": 0.0019523590333716916, "loss": 0.9161, "step": 8280 }, { "epoch": 2.3849252013808977, "grad_norm": 1.2509753704071045, "learning_rate": 0.0019523014959723821, "loss": 0.9067, "step": 8290 }, { "epoch": 2.3878020713463752, "grad_norm": 1.9917405843734741, "learning_rate": 0.0019522439585730725, "loss": 1.1422, "step": 8300 }, { "epoch": 2.3906789413118528, "grad_norm": 1.8247809410095215, "learning_rate": 0.001952186421173763, "loss": 1.013, "step": 8310 }, { "epoch": 2.3935558112773303, "grad_norm": 1.5688642263412476, "learning_rate": 0.0019521288837744536, "loss": 0.9296, "step": 8320 }, { "epoch": 2.396432681242808, "grad_norm": 1.2623876333236694, "learning_rate": 0.0019520713463751437, "loss": 0.9092, "step": 8330 }, { "epoch": 2.3993095512082854, "grad_norm": 1.3285412788391113, "learning_rate": 0.0019520138089758343, "loss": 1.0299, "step": 8340 }, { "epoch": 2.402186421173763, "grad_norm": 1.2163448333740234, "learning_rate": 0.0019519562715765249, "loss": 0.7711, "step": 8350 }, { "epoch": 2.4050632911392404, "grad_norm": 0.9343546628952026, "learning_rate": 0.0019518987341772152, "loss": 0.8169, "step": 8360 }, { "epoch": 2.407940161104718, "grad_norm": 1.635824203491211, "learning_rate": 0.0019518411967779058, "loss": 0.825, "step": 8370 }, { "epoch": 2.4108170310701955, "grad_norm": 2.6182994842529297, "learning_rate": 0.0019517836593785961, "loss": 1.1233, "step": 8380 }, { "epoch": 2.413693901035673, "grad_norm": 2.1868648529052734, "learning_rate": 0.0019517261219792865, "loss": 0.9542, "step": 8390 }, { "epoch": 2.4165707710011506, "grad_norm": 1.8683584928512573, "learning_rate": 0.001951668584579977, "loss": 1.152, "step": 8400 }, { "epoch": 2.419447640966628, "grad_norm": 2.119143486022949, "learning_rate": 0.0019516110471806674, "loss": 0.9149, "step": 8410 }, { "epoch": 2.422324510932106, "grad_norm": 1.6816182136535645, "learning_rate": 0.001951553509781358, "loss": 1.2377, "step": 8420 }, { "epoch": 2.425201380897583, "grad_norm": 1.2837886810302734, "learning_rate": 0.0019514959723820485, "loss": 0.8213, "step": 8430 }, { "epoch": 2.428078250863061, "grad_norm": 1.3884743452072144, "learning_rate": 0.0019514384349827388, "loss": 0.84, "step": 8440 }, { "epoch": 2.4309551208285387, "grad_norm": 0.9140376448631287, "learning_rate": 0.0019513808975834292, "loss": 0.8084, "step": 8450 }, { "epoch": 2.4338319907940162, "grad_norm": 1.484212040901184, "learning_rate": 0.0019513233601841198, "loss": 0.8698, "step": 8460 }, { "epoch": 2.4367088607594938, "grad_norm": 1.5302411317825317, "learning_rate": 0.00195126582278481, "loss": 0.8612, "step": 8470 }, { "epoch": 2.4395857307249713, "grad_norm": 1.0857800245285034, "learning_rate": 0.0019512082853855007, "loss": 0.901, "step": 8480 }, { "epoch": 2.442462600690449, "grad_norm": 2.5188374519348145, "learning_rate": 0.001951150747986191, "loss": 0.9424, "step": 8490 }, { "epoch": 2.4453394706559264, "grad_norm": 1.2587001323699951, "learning_rate": 0.0019510932105868816, "loss": 0.9277, "step": 8500 }, { "epoch": 2.448216340621404, "grad_norm": 1.5466957092285156, "learning_rate": 0.001951035673187572, "loss": 0.9731, "step": 8510 }, { "epoch": 2.4510932105868815, "grad_norm": 1.342121958732605, "learning_rate": 0.0019509781357882623, "loss": 0.9542, "step": 8520 }, { "epoch": 2.453970080552359, "grad_norm": 1.9152941703796387, "learning_rate": 0.0019509205983889528, "loss": 0.9238, "step": 8530 }, { "epoch": 2.4568469505178365, "grad_norm": 1.8211227655410767, "learning_rate": 0.0019508630609896434, "loss": 1.1506, "step": 8540 }, { "epoch": 2.459723820483314, "grad_norm": 1.5048052072525024, "learning_rate": 0.0019508055235903338, "loss": 0.8883, "step": 8550 }, { "epoch": 2.4626006904487916, "grad_norm": 1.3672796487808228, "learning_rate": 0.0019507479861910243, "loss": 1.0678, "step": 8560 }, { "epoch": 2.465477560414269, "grad_norm": 0.9574085474014282, "learning_rate": 0.0019506904487917147, "loss": 0.7567, "step": 8570 }, { "epoch": 2.4683544303797467, "grad_norm": 1.1707379817962646, "learning_rate": 0.001950632911392405, "loss": 0.8101, "step": 8580 }, { "epoch": 2.471231300345224, "grad_norm": 1.1880557537078857, "learning_rate": 0.0019505753739930956, "loss": 0.8847, "step": 8590 }, { "epoch": 2.4741081703107017, "grad_norm": 1.2083334922790527, "learning_rate": 0.001950517836593786, "loss": 0.842, "step": 8600 }, { "epoch": 2.4769850402761797, "grad_norm": 0.6042840480804443, "learning_rate": 0.0019504602991944765, "loss": 0.9357, "step": 8610 }, { "epoch": 2.4798619102416573, "grad_norm": 1.3951247930526733, "learning_rate": 0.001950402761795167, "loss": 0.9099, "step": 8620 }, { "epoch": 2.482738780207135, "grad_norm": 1.5446107387542725, "learning_rate": 0.0019503452243958572, "loss": 0.9881, "step": 8630 }, { "epoch": 2.4856156501726123, "grad_norm": 1.204432725906372, "learning_rate": 0.0019502876869965477, "loss": 0.8871, "step": 8640 }, { "epoch": 2.48849252013809, "grad_norm": 1.2827576398849487, "learning_rate": 0.0019502301495972383, "loss": 0.9441, "step": 8650 }, { "epoch": 2.4913693901035674, "grad_norm": 1.3948862552642822, "learning_rate": 0.0019501726121979287, "loss": 0.9785, "step": 8660 }, { "epoch": 2.494246260069045, "grad_norm": 1.3911007642745972, "learning_rate": 0.0019501150747986192, "loss": 1.0089, "step": 8670 }, { "epoch": 2.4971231300345225, "grad_norm": 2.0501368045806885, "learning_rate": 0.0019500575373993098, "loss": 0.8229, "step": 8680 }, { "epoch": 2.5, "grad_norm": 0.9772994518280029, "learning_rate": 0.00195, "loss": 0.8274, "step": 8690 }, { "epoch": 2.5028768699654775, "grad_norm": 1.5598622560501099, "learning_rate": 0.0019499424626006905, "loss": 0.9916, "step": 8700 }, { "epoch": 2.505753739930955, "grad_norm": 0.6726496815681458, "learning_rate": 0.0019498849252013808, "loss": 0.7327, "step": 8710 }, { "epoch": 2.5086306098964326, "grad_norm": 1.343847393989563, "learning_rate": 0.0019498273878020714, "loss": 0.885, "step": 8720 }, { "epoch": 2.51150747986191, "grad_norm": 1.0659171342849731, "learning_rate": 0.001949769850402762, "loss": 0.872, "step": 8730 }, { "epoch": 2.5143843498273877, "grad_norm": 1.768241286277771, "learning_rate": 0.0019497123130034523, "loss": 1.0369, "step": 8740 }, { "epoch": 2.517261219792865, "grad_norm": 1.7006547451019287, "learning_rate": 0.0019496547756041426, "loss": 1.0219, "step": 8750 }, { "epoch": 2.520138089758343, "grad_norm": 1.5477248430252075, "learning_rate": 0.0019495972382048332, "loss": 1.1503, "step": 8760 }, { "epoch": 2.5230149597238203, "grad_norm": 1.1865389347076416, "learning_rate": 0.0019495397008055236, "loss": 0.9066, "step": 8770 }, { "epoch": 2.5258918296892983, "grad_norm": 1.214784860610962, "learning_rate": 0.0019494821634062141, "loss": 0.8341, "step": 8780 }, { "epoch": 2.5287686996547754, "grad_norm": 2.518641233444214, "learning_rate": 0.0019494246260069047, "loss": 0.9466, "step": 8790 }, { "epoch": 2.5316455696202533, "grad_norm": 1.0057027339935303, "learning_rate": 0.001949367088607595, "loss": 0.8899, "step": 8800 }, { "epoch": 2.534522439585731, "grad_norm": 1.2704590559005737, "learning_rate": 0.0019493095512082854, "loss": 0.8948, "step": 8810 }, { "epoch": 2.5373993095512084, "grad_norm": 1.7684038877487183, "learning_rate": 0.0019492520138089757, "loss": 1.1125, "step": 8820 }, { "epoch": 2.540276179516686, "grad_norm": 1.0664440393447876, "learning_rate": 0.0019491944764096663, "loss": 0.8584, "step": 8830 }, { "epoch": 2.5431530494821635, "grad_norm": 2.6912283897399902, "learning_rate": 0.0019491369390103569, "loss": 0.8513, "step": 8840 }, { "epoch": 2.546029919447641, "grad_norm": 1.7656352519989014, "learning_rate": 0.0019490794016110472, "loss": 0.9949, "step": 8850 }, { "epoch": 2.5489067894131185, "grad_norm": 2.433647632598877, "learning_rate": 0.0019490218642117378, "loss": 0.9487, "step": 8860 }, { "epoch": 2.551783659378596, "grad_norm": 1.6599570512771606, "learning_rate": 0.0019489643268124281, "loss": 0.7566, "step": 8870 }, { "epoch": 2.5546605293440736, "grad_norm": 0.8110421895980835, "learning_rate": 0.0019489067894131185, "loss": 1.0476, "step": 8880 }, { "epoch": 2.557537399309551, "grad_norm": 1.4409786462783813, "learning_rate": 0.001948849252013809, "loss": 1.034, "step": 8890 }, { "epoch": 2.5604142692750287, "grad_norm": 1.4055508375167847, "learning_rate": 0.0019487917146144996, "loss": 1.2387, "step": 8900 }, { "epoch": 2.5632911392405062, "grad_norm": 1.401912808418274, "learning_rate": 0.00194873417721519, "loss": 0.9069, "step": 8910 }, { "epoch": 2.5661680092059838, "grad_norm": 1.9212065935134888, "learning_rate": 0.0019486766398158805, "loss": 0.9111, "step": 8920 }, { "epoch": 2.5690448791714613, "grad_norm": 1.5834681987762451, "learning_rate": 0.0019486191024165708, "loss": 0.7862, "step": 8930 }, { "epoch": 2.571921749136939, "grad_norm": 1.4219070672988892, "learning_rate": 0.0019485615650172612, "loss": 0.8674, "step": 8940 }, { "epoch": 2.574798619102417, "grad_norm": 3.6004443168640137, "learning_rate": 0.0019485040276179518, "loss": 0.7264, "step": 8950 }, { "epoch": 2.577675489067894, "grad_norm": 1.0378286838531494, "learning_rate": 0.001948446490218642, "loss": 0.8898, "step": 8960 }, { "epoch": 2.580552359033372, "grad_norm": 1.090950846672058, "learning_rate": 0.0019483889528193327, "loss": 0.8433, "step": 8970 }, { "epoch": 2.5834292289988494, "grad_norm": 0.8400689363479614, "learning_rate": 0.0019483314154200232, "loss": 0.7697, "step": 8980 }, { "epoch": 2.586306098964327, "grad_norm": 1.7134366035461426, "learning_rate": 0.0019482738780207134, "loss": 0.8622, "step": 8990 }, { "epoch": 2.5891829689298045, "grad_norm": 1.5084662437438965, "learning_rate": 0.001948216340621404, "loss": 0.7603, "step": 9000 }, { "epoch": 2.592059838895282, "grad_norm": 1.0074585676193237, "learning_rate": 0.0019481588032220945, "loss": 0.805, "step": 9010 }, { "epoch": 2.5949367088607596, "grad_norm": 1.1579524278640747, "learning_rate": 0.0019481012658227848, "loss": 0.9263, "step": 9020 }, { "epoch": 2.597813578826237, "grad_norm": 1.6965339183807373, "learning_rate": 0.0019480437284234754, "loss": 1.0709, "step": 9030 }, { "epoch": 2.6006904487917146, "grad_norm": 1.5393484830856323, "learning_rate": 0.001947986191024166, "loss": 0.7543, "step": 9040 }, { "epoch": 2.603567318757192, "grad_norm": 1.899540662765503, "learning_rate": 0.001947928653624856, "loss": 0.9878, "step": 9050 }, { "epoch": 2.6064441887226697, "grad_norm": 2.3296940326690674, "learning_rate": 0.0019478711162255467, "loss": 0.783, "step": 9060 }, { "epoch": 2.6093210586881472, "grad_norm": 1.0595093965530396, "learning_rate": 0.001947813578826237, "loss": 0.8715, "step": 9070 }, { "epoch": 2.6121979286536248, "grad_norm": 2.165076494216919, "learning_rate": 0.0019477560414269276, "loss": 0.9191, "step": 9080 }, { "epoch": 2.6150747986191023, "grad_norm": 1.1173702478408813, "learning_rate": 0.0019476985040276181, "loss": 0.8864, "step": 9090 }, { "epoch": 2.61795166858458, "grad_norm": 1.3017303943634033, "learning_rate": 0.0019476409666283083, "loss": 1.0844, "step": 9100 }, { "epoch": 2.6208285385500574, "grad_norm": 1.1371068954467773, "learning_rate": 0.0019475834292289988, "loss": 0.8332, "step": 9110 }, { "epoch": 2.6237054085155354, "grad_norm": 1.5168851613998413, "learning_rate": 0.0019475258918296894, "loss": 0.911, "step": 9120 }, { "epoch": 2.6265822784810124, "grad_norm": 1.3774545192718506, "learning_rate": 0.0019474683544303797, "loss": 1.1253, "step": 9130 }, { "epoch": 2.6294591484464904, "grad_norm": 1.2903246879577637, "learning_rate": 0.0019474108170310703, "loss": 0.9745, "step": 9140 }, { "epoch": 2.6323360184119675, "grad_norm": 1.1642348766326904, "learning_rate": 0.0019473532796317609, "loss": 0.8236, "step": 9150 }, { "epoch": 2.6352128883774455, "grad_norm": 1.3183895349502563, "learning_rate": 0.001947295742232451, "loss": 1.0209, "step": 9160 }, { "epoch": 2.638089758342923, "grad_norm": 0.7434096336364746, "learning_rate": 0.0019472382048331416, "loss": 1.0001, "step": 9170 }, { "epoch": 2.6409666283084006, "grad_norm": 1.810347080230713, "learning_rate": 0.001947180667433832, "loss": 1.0964, "step": 9180 }, { "epoch": 2.643843498273878, "grad_norm": 1.3180047273635864, "learning_rate": 0.0019471231300345225, "loss": 0.9044, "step": 9190 }, { "epoch": 2.6467203682393556, "grad_norm": 2.2050509452819824, "learning_rate": 0.001947065592635213, "loss": 1.0075, "step": 9200 }, { "epoch": 2.649597238204833, "grad_norm": 2.6828725337982178, "learning_rate": 0.0019470080552359034, "loss": 1.1051, "step": 9210 }, { "epoch": 2.6524741081703107, "grad_norm": 1.4838149547576904, "learning_rate": 0.0019469505178365937, "loss": 1.0327, "step": 9220 }, { "epoch": 2.6553509781357882, "grad_norm": 0.8549264669418335, "learning_rate": 0.0019468929804372843, "loss": 1.074, "step": 9230 }, { "epoch": 2.6582278481012658, "grad_norm": 1.1734830141067505, "learning_rate": 0.0019468354430379746, "loss": 0.862, "step": 9240 }, { "epoch": 2.6611047180667433, "grad_norm": 1.4587979316711426, "learning_rate": 0.0019467779056386652, "loss": 0.795, "step": 9250 }, { "epoch": 2.663981588032221, "grad_norm": 1.085004448890686, "learning_rate": 0.0019467203682393558, "loss": 1.1652, "step": 9260 }, { "epoch": 2.6668584579976984, "grad_norm": 0.8462554812431335, "learning_rate": 0.0019466628308400461, "loss": 0.806, "step": 9270 }, { "epoch": 2.669735327963176, "grad_norm": 1.2369089126586914, "learning_rate": 0.0019466052934407365, "loss": 0.8317, "step": 9280 }, { "epoch": 2.672612197928654, "grad_norm": 1.1308356523513794, "learning_rate": 0.0019465477560414268, "loss": 0.9559, "step": 9290 }, { "epoch": 2.675489067894131, "grad_norm": 1.4724388122558594, "learning_rate": 0.0019464902186421174, "loss": 1.1467, "step": 9300 }, { "epoch": 2.678365937859609, "grad_norm": 0.7562936544418335, "learning_rate": 0.001946432681242808, "loss": 0.9033, "step": 9310 }, { "epoch": 2.681242807825086, "grad_norm": 1.3887816667556763, "learning_rate": 0.0019463751438434983, "loss": 0.9168, "step": 9320 }, { "epoch": 2.684119677790564, "grad_norm": 1.6408956050872803, "learning_rate": 0.0019463176064441888, "loss": 1.1522, "step": 9330 }, { "epoch": 2.6869965477560416, "grad_norm": 1.1456571817398071, "learning_rate": 0.0019462600690448792, "loss": 0.9872, "step": 9340 }, { "epoch": 2.689873417721519, "grad_norm": 1.3562453985214233, "learning_rate": 0.0019462025316455695, "loss": 1.3, "step": 9350 }, { "epoch": 2.6927502876869966, "grad_norm": 0.8078615665435791, "learning_rate": 0.00194614499424626, "loss": 0.9056, "step": 9360 }, { "epoch": 2.695627157652474, "grad_norm": 0.996971845626831, "learning_rate": 0.0019460874568469507, "loss": 0.9149, "step": 9370 }, { "epoch": 2.6985040276179517, "grad_norm": 1.2057723999023438, "learning_rate": 0.001946029919447641, "loss": 1.0053, "step": 9380 }, { "epoch": 2.7013808975834293, "grad_norm": 0.9210729598999023, "learning_rate": 0.0019459723820483316, "loss": 0.9195, "step": 9390 }, { "epoch": 2.704257767548907, "grad_norm": 1.6597269773483276, "learning_rate": 0.0019459148446490217, "loss": 0.8766, "step": 9400 }, { "epoch": 2.7071346375143843, "grad_norm": 1.424978494644165, "learning_rate": 0.0019458573072497123, "loss": 0.8786, "step": 9410 }, { "epoch": 2.710011507479862, "grad_norm": 1.6821414232254028, "learning_rate": 0.0019457997698504028, "loss": 1.0419, "step": 9420 }, { "epoch": 2.7128883774453394, "grad_norm": 1.7547922134399414, "learning_rate": 0.0019457422324510932, "loss": 0.9462, "step": 9430 }, { "epoch": 2.715765247410817, "grad_norm": 1.2511738538742065, "learning_rate": 0.0019456846950517837, "loss": 0.9329, "step": 9440 }, { "epoch": 2.7186421173762945, "grad_norm": 0.769887387752533, "learning_rate": 0.0019456271576524743, "loss": 0.9397, "step": 9450 }, { "epoch": 2.721518987341772, "grad_norm": 1.8105272054672241, "learning_rate": 0.0019455696202531644, "loss": 1.077, "step": 9460 }, { "epoch": 2.7243958573072495, "grad_norm": 1.0262171030044556, "learning_rate": 0.001945512082853855, "loss": 0.9634, "step": 9470 }, { "epoch": 2.7272727272727275, "grad_norm": 0.967701256275177, "learning_rate": 0.0019454545454545456, "loss": 0.8507, "step": 9480 }, { "epoch": 2.7301495972382046, "grad_norm": 1.8298648595809937, "learning_rate": 0.001945397008055236, "loss": 0.9728, "step": 9490 }, { "epoch": 2.7330264672036826, "grad_norm": 0.9710360765457153, "learning_rate": 0.0019453394706559265, "loss": 0.8113, "step": 9500 }, { "epoch": 2.7359033371691597, "grad_norm": 1.218746304512024, "learning_rate": 0.0019452819332566168, "loss": 1.1973, "step": 9510 }, { "epoch": 2.7387802071346377, "grad_norm": 1.487638235092163, "learning_rate": 0.0019452243958573072, "loss": 0.9521, "step": 9520 }, { "epoch": 2.741657077100115, "grad_norm": 1.2616091966629028, "learning_rate": 0.0019451668584579977, "loss": 0.9437, "step": 9530 }, { "epoch": 2.7445339470655927, "grad_norm": 1.2209819555282593, "learning_rate": 0.001945109321058688, "loss": 1.056, "step": 9540 }, { "epoch": 2.7474108170310703, "grad_norm": 2.168330669403076, "learning_rate": 0.0019450517836593787, "loss": 0.9999, "step": 9550 }, { "epoch": 2.750287686996548, "grad_norm": 0.9788414835929871, "learning_rate": 0.0019449942462600692, "loss": 0.9986, "step": 9560 }, { "epoch": 2.7531645569620253, "grad_norm": 1.2155368328094482, "learning_rate": 0.0019449367088607596, "loss": 0.7624, "step": 9570 }, { "epoch": 2.756041426927503, "grad_norm": 0.9551307559013367, "learning_rate": 0.00194487917146145, "loss": 0.9715, "step": 9580 }, { "epoch": 2.7589182968929804, "grad_norm": 1.6312178373336792, "learning_rate": 0.0019448216340621405, "loss": 0.8891, "step": 9590 }, { "epoch": 2.761795166858458, "grad_norm": 1.5237886905670166, "learning_rate": 0.0019447640966628308, "loss": 0.9224, "step": 9600 }, { "epoch": 2.7646720368239355, "grad_norm": 1.5498988628387451, "learning_rate": 0.0019447065592635214, "loss": 0.9248, "step": 9610 }, { "epoch": 2.767548906789413, "grad_norm": 1.738914132118225, "learning_rate": 0.001944649021864212, "loss": 0.838, "step": 9620 }, { "epoch": 2.7704257767548905, "grad_norm": 1.8876851797103882, "learning_rate": 0.0019445914844649023, "loss": 0.9875, "step": 9630 }, { "epoch": 2.773302646720368, "grad_norm": 1.515108585357666, "learning_rate": 0.0019445339470655926, "loss": 0.8844, "step": 9640 }, { "epoch": 2.776179516685846, "grad_norm": 1.8955955505371094, "learning_rate": 0.001944476409666283, "loss": 0.9851, "step": 9650 }, { "epoch": 2.779056386651323, "grad_norm": 1.062726616859436, "learning_rate": 0.0019444188722669736, "loss": 1.1325, "step": 9660 }, { "epoch": 2.781933256616801, "grad_norm": 1.0081530809402466, "learning_rate": 0.0019443613348676641, "loss": 0.9318, "step": 9670 }, { "epoch": 2.7848101265822782, "grad_norm": 1.324537754058838, "learning_rate": 0.0019443037974683545, "loss": 0.7981, "step": 9680 }, { "epoch": 2.787686996547756, "grad_norm": 0.8715140223503113, "learning_rate": 0.001944246260069045, "loss": 1.2803, "step": 9690 }, { "epoch": 2.7905638665132337, "grad_norm": 2.1051409244537354, "learning_rate": 0.0019441887226697354, "loss": 0.9272, "step": 9700 }, { "epoch": 2.7934407364787113, "grad_norm": 1.1768436431884766, "learning_rate": 0.0019441311852704257, "loss": 0.8323, "step": 9710 }, { "epoch": 2.796317606444189, "grad_norm": 2.2806591987609863, "learning_rate": 0.0019440736478711163, "loss": 1.0929, "step": 9720 }, { "epoch": 2.7991944764096663, "grad_norm": 2.439995527267456, "learning_rate": 0.0019440161104718068, "loss": 0.9475, "step": 9730 }, { "epoch": 2.802071346375144, "grad_norm": 1.32634437084198, "learning_rate": 0.0019439585730724972, "loss": 0.8363, "step": 9740 }, { "epoch": 2.8049482163406214, "grad_norm": 1.2134461402893066, "learning_rate": 0.0019439010356731878, "loss": 1.1045, "step": 9750 }, { "epoch": 2.807825086306099, "grad_norm": 1.8965120315551758, "learning_rate": 0.0019438434982738779, "loss": 0.9116, "step": 9760 }, { "epoch": 2.8107019562715765, "grad_norm": 1.0737544298171997, "learning_rate": 0.0019437859608745685, "loss": 1.1173, "step": 9770 }, { "epoch": 2.813578826237054, "grad_norm": 0.811289370059967, "learning_rate": 0.001943728423475259, "loss": 0.9294, "step": 9780 }, { "epoch": 2.8164556962025316, "grad_norm": 1.149965524673462, "learning_rate": 0.0019436708860759494, "loss": 0.8608, "step": 9790 }, { "epoch": 2.819332566168009, "grad_norm": 3.1589910984039307, "learning_rate": 0.00194361334867664, "loss": 0.776, "step": 9800 }, { "epoch": 2.8222094361334866, "grad_norm": 1.6410484313964844, "learning_rate": 0.0019435558112773305, "loss": 0.9104, "step": 9810 }, { "epoch": 2.825086306098964, "grad_norm": 1.7657420635223389, "learning_rate": 0.0019434982738780206, "loss": 0.7596, "step": 9820 }, { "epoch": 2.8279631760644417, "grad_norm": 1.5684417486190796, "learning_rate": 0.0019434407364787112, "loss": 0.9385, "step": 9830 }, { "epoch": 2.8308400460299197, "grad_norm": 1.2317842245101929, "learning_rate": 0.0019433831990794018, "loss": 0.8726, "step": 9840 }, { "epoch": 2.8337169159953968, "grad_norm": 1.1552478075027466, "learning_rate": 0.001943325661680092, "loss": 0.9417, "step": 9850 }, { "epoch": 2.8365937859608747, "grad_norm": 1.36764395236969, "learning_rate": 0.0019432681242807827, "loss": 0.8187, "step": 9860 }, { "epoch": 2.8394706559263523, "grad_norm": 1.3493925333023071, "learning_rate": 0.0019432105868814728, "loss": 0.9517, "step": 9870 }, { "epoch": 2.84234752589183, "grad_norm": 1.5807702541351318, "learning_rate": 0.0019431530494821634, "loss": 1.0036, "step": 9880 }, { "epoch": 2.8452243958573074, "grad_norm": 0.8984076380729675, "learning_rate": 0.001943095512082854, "loss": 0.7935, "step": 9890 }, { "epoch": 2.848101265822785, "grad_norm": 1.557304859161377, "learning_rate": 0.0019430379746835443, "loss": 0.9069, "step": 9900 }, { "epoch": 2.8509781357882624, "grad_norm": 1.3384888172149658, "learning_rate": 0.0019429804372842348, "loss": 1.0977, "step": 9910 }, { "epoch": 2.85385500575374, "grad_norm": 1.1413497924804688, "learning_rate": 0.0019429228998849254, "loss": 0.9294, "step": 9920 }, { "epoch": 2.8567318757192175, "grad_norm": 1.7425166368484497, "learning_rate": 0.0019428653624856155, "loss": 1.0509, "step": 9930 }, { "epoch": 2.859608745684695, "grad_norm": 1.6623531579971313, "learning_rate": 0.001942807825086306, "loss": 0.8776, "step": 9940 }, { "epoch": 2.8624856156501726, "grad_norm": 2.7085747718811035, "learning_rate": 0.0019427502876869967, "loss": 0.8294, "step": 9950 }, { "epoch": 2.86536248561565, "grad_norm": 1.8535360097885132, "learning_rate": 0.001942692750287687, "loss": 0.8522, "step": 9960 }, { "epoch": 2.8682393555811276, "grad_norm": 0.990894079208374, "learning_rate": 0.0019426352128883776, "loss": 1.0293, "step": 9970 }, { "epoch": 2.871116225546605, "grad_norm": 1.3137481212615967, "learning_rate": 0.001942577675489068, "loss": 1.1573, "step": 9980 }, { "epoch": 2.8739930955120827, "grad_norm": 0.703386127948761, "learning_rate": 0.0019425201380897583, "loss": 0.9737, "step": 9990 }, { "epoch": 2.8768699654775602, "grad_norm": 1.2294261455535889, "learning_rate": 0.0019424626006904488, "loss": 0.9546, "step": 10000 }, { "epoch": 2.879746835443038, "grad_norm": 1.4345906972885132, "learning_rate": 0.0019424050632911392, "loss": 0.9871, "step": 10010 }, { "epoch": 2.8826237054085153, "grad_norm": 1.1779391765594482, "learning_rate": 0.0019423475258918297, "loss": 0.8685, "step": 10020 }, { "epoch": 2.8855005753739933, "grad_norm": 0.9985438585281372, "learning_rate": 0.0019422899884925203, "loss": 1.0751, "step": 10030 }, { "epoch": 2.8883774453394704, "grad_norm": 1.7880563735961914, "learning_rate": 0.0019422324510932106, "loss": 0.8127, "step": 10040 }, { "epoch": 2.8912543153049484, "grad_norm": 1.3340530395507812, "learning_rate": 0.001942174913693901, "loss": 1.0383, "step": 10050 }, { "epoch": 2.894131185270426, "grad_norm": 1.5336236953735352, "learning_rate": 0.0019421173762945916, "loss": 0.8183, "step": 10060 }, { "epoch": 2.8970080552359034, "grad_norm": 1.1468186378479004, "learning_rate": 0.001942059838895282, "loss": 0.8689, "step": 10070 }, { "epoch": 2.899884925201381, "grad_norm": 1.6780444383621216, "learning_rate": 0.0019420023014959725, "loss": 0.9941, "step": 10080 }, { "epoch": 2.9027617951668585, "grad_norm": 1.3421902656555176, "learning_rate": 0.0019419447640966628, "loss": 0.9103, "step": 10090 }, { "epoch": 2.905638665132336, "grad_norm": 2.760612726211548, "learning_rate": 0.0019418872266973534, "loss": 0.8157, "step": 10100 }, { "epoch": 2.9085155350978136, "grad_norm": 1.6155283451080322, "learning_rate": 0.0019418296892980437, "loss": 0.8758, "step": 10110 }, { "epoch": 2.911392405063291, "grad_norm": 2.0452563762664795, "learning_rate": 0.001941772151898734, "loss": 1.0736, "step": 10120 }, { "epoch": 2.9142692750287686, "grad_norm": 1.2594943046569824, "learning_rate": 0.0019417146144994246, "loss": 0.942, "step": 10130 }, { "epoch": 2.917146144994246, "grad_norm": 0.8925260901451111, "learning_rate": 0.0019416570771001152, "loss": 0.9204, "step": 10140 }, { "epoch": 2.9200230149597237, "grad_norm": 2.0639302730560303, "learning_rate": 0.0019415995397008055, "loss": 0.9365, "step": 10150 }, { "epoch": 2.9228998849252013, "grad_norm": 1.3345987796783447, "learning_rate": 0.0019415420023014961, "loss": 0.8658, "step": 10160 }, { "epoch": 2.925776754890679, "grad_norm": 0.9715394377708435, "learning_rate": 0.0019414844649021865, "loss": 1.1358, "step": 10170 }, { "epoch": 2.9286536248561568, "grad_norm": 1.460862159729004, "learning_rate": 0.0019414269275028768, "loss": 0.9359, "step": 10180 }, { "epoch": 2.931530494821634, "grad_norm": 0.8752611875534058, "learning_rate": 0.0019413693901035674, "loss": 0.9865, "step": 10190 }, { "epoch": 2.934407364787112, "grad_norm": 1.5155397653579712, "learning_rate": 0.0019413118527042577, "loss": 0.7926, "step": 10200 }, { "epoch": 2.937284234752589, "grad_norm": 1.0131645202636719, "learning_rate": 0.0019412543153049483, "loss": 0.9294, "step": 10210 }, { "epoch": 2.940161104718067, "grad_norm": 1.6838555335998535, "learning_rate": 0.0019411967779056388, "loss": 0.7444, "step": 10220 }, { "epoch": 2.9430379746835444, "grad_norm": 1.5970168113708496, "learning_rate": 0.001941139240506329, "loss": 0.85, "step": 10230 }, { "epoch": 2.945914844649022, "grad_norm": 1.2418962717056274, "learning_rate": 0.0019410817031070195, "loss": 0.8999, "step": 10240 }, { "epoch": 2.9487917146144995, "grad_norm": 1.3981451988220215, "learning_rate": 0.00194102416570771, "loss": 1.116, "step": 10250 }, { "epoch": 2.951668584579977, "grad_norm": 0.9303348064422607, "learning_rate": 0.0019409666283084004, "loss": 1.0203, "step": 10260 }, { "epoch": 2.9545454545454546, "grad_norm": 1.1711763143539429, "learning_rate": 0.001940909090909091, "loss": 0.7673, "step": 10270 }, { "epoch": 2.957422324510932, "grad_norm": 1.7598403692245483, "learning_rate": 0.0019408515535097816, "loss": 0.958, "step": 10280 }, { "epoch": 2.9602991944764097, "grad_norm": 1.3975249528884888, "learning_rate": 0.0019407940161104717, "loss": 1.0082, "step": 10290 }, { "epoch": 2.963176064441887, "grad_norm": 1.5428487062454224, "learning_rate": 0.0019407364787111623, "loss": 0.8138, "step": 10300 }, { "epoch": 2.9660529344073647, "grad_norm": 1.3649888038635254, "learning_rate": 0.0019406789413118528, "loss": 0.9527, "step": 10310 }, { "epoch": 2.9689298043728423, "grad_norm": 1.543027639389038, "learning_rate": 0.0019406214039125432, "loss": 0.8928, "step": 10320 }, { "epoch": 2.97180667433832, "grad_norm": 1.6138179302215576, "learning_rate": 0.0019405638665132337, "loss": 0.8452, "step": 10330 }, { "epoch": 2.9746835443037973, "grad_norm": 1.1925491094589233, "learning_rate": 0.001940506329113924, "loss": 0.8859, "step": 10340 }, { "epoch": 2.977560414269275, "grad_norm": 2.1699321269989014, "learning_rate": 0.0019404487917146144, "loss": 0.9649, "step": 10350 }, { "epoch": 2.9804372842347524, "grad_norm": 1.4096827507019043, "learning_rate": 0.001940391254315305, "loss": 0.7714, "step": 10360 }, { "epoch": 2.9833141542002304, "grad_norm": 1.1021465063095093, "learning_rate": 0.0019403337169159954, "loss": 0.9131, "step": 10370 }, { "epoch": 2.9861910241657075, "grad_norm": 1.3874281644821167, "learning_rate": 0.001940276179516686, "loss": 0.9547, "step": 10380 }, { "epoch": 2.9890678941311855, "grad_norm": 1.0345709323883057, "learning_rate": 0.0019402186421173765, "loss": 0.8729, "step": 10390 }, { "epoch": 2.991944764096663, "grad_norm": 1.1353133916854858, "learning_rate": 0.0019401611047180668, "loss": 1.0305, "step": 10400 }, { "epoch": 2.9948216340621405, "grad_norm": 0.9944214224815369, "learning_rate": 0.0019401035673187572, "loss": 0.8292, "step": 10410 }, { "epoch": 2.997698504027618, "grad_norm": 1.4519401788711548, "learning_rate": 0.0019400460299194477, "loss": 1.019, "step": 10420 }, { "epoch": 3.0005753739930956, "grad_norm": 0.7208581566810608, "learning_rate": 0.001939988492520138, "loss": 0.9092, "step": 10430 }, { "epoch": 3.003452243958573, "grad_norm": 2.077338218688965, "learning_rate": 0.0019399309551208286, "loss": 0.8399, "step": 10440 }, { "epoch": 3.0063291139240507, "grad_norm": 1.791563868522644, "learning_rate": 0.001939873417721519, "loss": 0.9253, "step": 10450 }, { "epoch": 3.009205983889528, "grad_norm": 1.708034873008728, "learning_rate": 0.0019398158803222096, "loss": 0.6685, "step": 10460 }, { "epoch": 3.0120828538550057, "grad_norm": 1.1688939332962036, "learning_rate": 0.0019397583429229, "loss": 0.9764, "step": 10470 }, { "epoch": 3.0149597238204833, "grad_norm": 0.9953864216804504, "learning_rate": 0.0019397008055235903, "loss": 0.8292, "step": 10480 }, { "epoch": 3.017836593785961, "grad_norm": 1.4922785758972168, "learning_rate": 0.0019396432681242808, "loss": 0.9967, "step": 10490 }, { "epoch": 3.0207134637514383, "grad_norm": 1.270883560180664, "learning_rate": 0.0019395857307249714, "loss": 0.7672, "step": 10500 }, { "epoch": 3.023590333716916, "grad_norm": 1.0201493501663208, "learning_rate": 0.0019395281933256617, "loss": 0.773, "step": 10510 }, { "epoch": 3.0264672036823934, "grad_norm": 1.510507345199585, "learning_rate": 0.0019394706559263523, "loss": 0.8556, "step": 10520 }, { "epoch": 3.029344073647871, "grad_norm": 1.0200098752975464, "learning_rate": 0.0019394131185270426, "loss": 0.928, "step": 10530 }, { "epoch": 3.0322209436133485, "grad_norm": 1.7743139266967773, "learning_rate": 0.001939355581127733, "loss": 0.9689, "step": 10540 }, { "epoch": 3.035097813578826, "grad_norm": 1.177539348602295, "learning_rate": 0.0019392980437284235, "loss": 0.8974, "step": 10550 }, { "epoch": 3.037974683544304, "grad_norm": 1.5095117092132568, "learning_rate": 0.001939240506329114, "loss": 0.7432, "step": 10560 }, { "epoch": 3.0408515535097815, "grad_norm": 0.9145352244377136, "learning_rate": 0.0019391829689298045, "loss": 0.8759, "step": 10570 }, { "epoch": 3.043728423475259, "grad_norm": 1.8138350248336792, "learning_rate": 0.001939125431530495, "loss": 0.9801, "step": 10580 }, { "epoch": 3.0466052934407366, "grad_norm": 1.0032939910888672, "learning_rate": 0.0019390678941311852, "loss": 0.7406, "step": 10590 }, { "epoch": 3.049482163406214, "grad_norm": 2.4040050506591797, "learning_rate": 0.0019390103567318757, "loss": 0.8667, "step": 10600 }, { "epoch": 3.0523590333716917, "grad_norm": 1.0515053272247314, "learning_rate": 0.0019389528193325663, "loss": 0.6715, "step": 10610 }, { "epoch": 3.055235903337169, "grad_norm": 1.3058674335479736, "learning_rate": 0.0019388952819332566, "loss": 0.8811, "step": 10620 }, { "epoch": 3.0581127733026467, "grad_norm": 1.2219269275665283, "learning_rate": 0.0019388377445339472, "loss": 0.7136, "step": 10630 }, { "epoch": 3.0609896432681243, "grad_norm": 1.3305193185806274, "learning_rate": 0.0019387802071346378, "loss": 0.9408, "step": 10640 }, { "epoch": 3.063866513233602, "grad_norm": 2.145429849624634, "learning_rate": 0.0019387226697353279, "loss": 0.994, "step": 10650 }, { "epoch": 3.0667433831990794, "grad_norm": 0.9261243343353271, "learning_rate": 0.0019386651323360185, "loss": 0.9066, "step": 10660 }, { "epoch": 3.069620253164557, "grad_norm": 1.048318862915039, "learning_rate": 0.0019386075949367088, "loss": 0.7833, "step": 10670 }, { "epoch": 3.0724971231300344, "grad_norm": 0.7529218792915344, "learning_rate": 0.0019385500575373994, "loss": 0.6937, "step": 10680 }, { "epoch": 3.075373993095512, "grad_norm": 2.2413084506988525, "learning_rate": 0.00193849252013809, "loss": 0.8065, "step": 10690 }, { "epoch": 3.0782508630609895, "grad_norm": 0.8340462446212769, "learning_rate": 0.00193843498273878, "loss": 0.9058, "step": 10700 }, { "epoch": 3.081127733026467, "grad_norm": 1.1866815090179443, "learning_rate": 0.0019383774453394706, "loss": 0.8824, "step": 10710 }, { "epoch": 3.0840046029919446, "grad_norm": 1.072367548942566, "learning_rate": 0.0019383199079401612, "loss": 0.5864, "step": 10720 }, { "epoch": 3.0868814729574225, "grad_norm": 1.9526622295379639, "learning_rate": 0.0019382623705408515, "loss": 0.904, "step": 10730 }, { "epoch": 3.0897583429229, "grad_norm": 0.860154390335083, "learning_rate": 0.001938204833141542, "loss": 0.9145, "step": 10740 }, { "epoch": 3.0926352128883776, "grad_norm": 1.4811608791351318, "learning_rate": 0.0019381472957422327, "loss": 0.9509, "step": 10750 }, { "epoch": 3.095512082853855, "grad_norm": 1.5637222528457642, "learning_rate": 0.0019380897583429228, "loss": 0.8657, "step": 10760 }, { "epoch": 3.0983889528193327, "grad_norm": 1.738795280456543, "learning_rate": 0.0019380322209436134, "loss": 0.9516, "step": 10770 }, { "epoch": 3.1012658227848102, "grad_norm": 2.059634208679199, "learning_rate": 0.0019379746835443037, "loss": 0.9145, "step": 10780 }, { "epoch": 3.1041426927502878, "grad_norm": 2.5255091190338135, "learning_rate": 0.0019379171461449943, "loss": 1.1162, "step": 10790 }, { "epoch": 3.1070195627157653, "grad_norm": 1.092995285987854, "learning_rate": 0.0019378596087456848, "loss": 0.8799, "step": 10800 }, { "epoch": 3.109896432681243, "grad_norm": 1.3379158973693848, "learning_rate": 0.0019378020713463752, "loss": 1.0306, "step": 10810 }, { "epoch": 3.1127733026467204, "grad_norm": 0.9740696549415588, "learning_rate": 0.0019377445339470655, "loss": 0.8947, "step": 10820 }, { "epoch": 3.115650172612198, "grad_norm": 1.233745813369751, "learning_rate": 0.001937686996547756, "loss": 0.9688, "step": 10830 }, { "epoch": 3.1185270425776754, "grad_norm": 0.6033667325973511, "learning_rate": 0.0019376294591484464, "loss": 0.9012, "step": 10840 }, { "epoch": 3.121403912543153, "grad_norm": 1.000977635383606, "learning_rate": 0.001937571921749137, "loss": 0.6999, "step": 10850 }, { "epoch": 3.1242807825086305, "grad_norm": 1.2982075214385986, "learning_rate": 0.0019375143843498276, "loss": 0.8636, "step": 10860 }, { "epoch": 3.127157652474108, "grad_norm": 1.1564016342163086, "learning_rate": 0.001937456846950518, "loss": 0.7906, "step": 10870 }, { "epoch": 3.1300345224395856, "grad_norm": 1.0375546216964722, "learning_rate": 0.0019373993095512083, "loss": 0.8898, "step": 10880 }, { "epoch": 3.132911392405063, "grad_norm": 1.2557244300842285, "learning_rate": 0.0019373417721518988, "loss": 0.9908, "step": 10890 }, { "epoch": 3.1357882623705406, "grad_norm": 1.711849570274353, "learning_rate": 0.0019372842347525892, "loss": 0.8562, "step": 10900 }, { "epoch": 3.138665132336018, "grad_norm": 0.8937116265296936, "learning_rate": 0.0019372266973532797, "loss": 0.7537, "step": 10910 }, { "epoch": 3.141542002301496, "grad_norm": 0.8283678889274597, "learning_rate": 0.00193716915995397, "loss": 0.865, "step": 10920 }, { "epoch": 3.1444188722669737, "grad_norm": 1.2784236669540405, "learning_rate": 0.0019371116225546606, "loss": 0.9844, "step": 10930 }, { "epoch": 3.1472957422324512, "grad_norm": 1.9320849180221558, "learning_rate": 0.001937054085155351, "loss": 1.0868, "step": 10940 }, { "epoch": 3.1501726121979288, "grad_norm": 1.245213508605957, "learning_rate": 0.0019369965477560413, "loss": 0.7456, "step": 10950 }, { "epoch": 3.1530494821634063, "grad_norm": 1.1367965936660767, "learning_rate": 0.001936939010356732, "loss": 0.9492, "step": 10960 }, { "epoch": 3.155926352128884, "grad_norm": 0.9688381552696228, "learning_rate": 0.0019368814729574225, "loss": 0.7965, "step": 10970 }, { "epoch": 3.1588032220943614, "grad_norm": 1.2451010942459106, "learning_rate": 0.0019368239355581128, "loss": 0.8359, "step": 10980 }, { "epoch": 3.161680092059839, "grad_norm": 1.0019665956497192, "learning_rate": 0.0019367663981588034, "loss": 1.0566, "step": 10990 }, { "epoch": 3.1645569620253164, "grad_norm": 1.182327389717102, "learning_rate": 0.0019367088607594937, "loss": 0.8244, "step": 11000 }, { "epoch": 3.167433831990794, "grad_norm": 1.2803155183792114, "learning_rate": 0.001936651323360184, "loss": 0.9376, "step": 11010 }, { "epoch": 3.1703107019562715, "grad_norm": 1.5943485498428345, "learning_rate": 0.0019365937859608746, "loss": 1.0114, "step": 11020 }, { "epoch": 3.173187571921749, "grad_norm": 1.1336265802383423, "learning_rate": 0.001936536248561565, "loss": 0.8283, "step": 11030 }, { "epoch": 3.1760644418872266, "grad_norm": 1.5637339353561401, "learning_rate": 0.0019364787111622555, "loss": 0.7533, "step": 11040 }, { "epoch": 3.178941311852704, "grad_norm": 1.5596998929977417, "learning_rate": 0.001936421173762946, "loss": 0.9698, "step": 11050 }, { "epoch": 3.1818181818181817, "grad_norm": 1.3780850172042847, "learning_rate": 0.0019363636363636362, "loss": 0.8125, "step": 11060 }, { "epoch": 3.184695051783659, "grad_norm": 1.440611720085144, "learning_rate": 0.0019363060989643268, "loss": 0.8937, "step": 11070 }, { "epoch": 3.1875719217491367, "grad_norm": 1.2646886110305786, "learning_rate": 0.0019362485615650174, "loss": 0.9034, "step": 11080 }, { "epoch": 3.1904487917146147, "grad_norm": 2.170464277267456, "learning_rate": 0.0019361910241657077, "loss": 0.8052, "step": 11090 }, { "epoch": 3.1933256616800922, "grad_norm": 1.4394841194152832, "learning_rate": 0.0019361334867663983, "loss": 0.7391, "step": 11100 }, { "epoch": 3.1962025316455698, "grad_norm": 1.5903658866882324, "learning_rate": 0.0019360759493670888, "loss": 0.8678, "step": 11110 }, { "epoch": 3.1990794016110473, "grad_norm": 1.0600446462631226, "learning_rate": 0.001936018411967779, "loss": 0.9377, "step": 11120 }, { "epoch": 3.201956271576525, "grad_norm": 2.31017804145813, "learning_rate": 0.0019359608745684695, "loss": 0.9397, "step": 11130 }, { "epoch": 3.2048331415420024, "grad_norm": 0.7927943468093872, "learning_rate": 0.0019359033371691599, "loss": 0.768, "step": 11140 }, { "epoch": 3.20771001150748, "grad_norm": 1.593825340270996, "learning_rate": 0.0019358457997698504, "loss": 0.9504, "step": 11150 }, { "epoch": 3.2105868814729575, "grad_norm": 1.000963568687439, "learning_rate": 0.001935788262370541, "loss": 0.8058, "step": 11160 }, { "epoch": 3.213463751438435, "grad_norm": 0.9596954584121704, "learning_rate": 0.0019357307249712314, "loss": 1.0716, "step": 11170 }, { "epoch": 3.2163406214039125, "grad_norm": 1.7127373218536377, "learning_rate": 0.0019356731875719217, "loss": 0.7105, "step": 11180 }, { "epoch": 3.21921749136939, "grad_norm": 2.123713970184326, "learning_rate": 0.0019356156501726123, "loss": 0.938, "step": 11190 }, { "epoch": 3.2220943613348676, "grad_norm": 2.545238494873047, "learning_rate": 0.0019355581127733026, "loss": 1.0396, "step": 11200 }, { "epoch": 3.224971231300345, "grad_norm": 1.382786512374878, "learning_rate": 0.0019355005753739932, "loss": 0.8156, "step": 11210 }, { "epoch": 3.2278481012658227, "grad_norm": 1.1870611906051636, "learning_rate": 0.0019354430379746837, "loss": 0.8018, "step": 11220 }, { "epoch": 3.2307249712313, "grad_norm": 0.8306632041931152, "learning_rate": 0.001935385500575374, "loss": 0.8448, "step": 11230 }, { "epoch": 3.2336018411967777, "grad_norm": 1.1193691492080688, "learning_rate": 0.0019353279631760644, "loss": 0.8586, "step": 11240 }, { "epoch": 3.2364787111622553, "grad_norm": 0.8669357299804688, "learning_rate": 0.0019352704257767548, "loss": 0.8169, "step": 11250 }, { "epoch": 3.2393555811277333, "grad_norm": 1.0368188619613647, "learning_rate": 0.0019352128883774453, "loss": 0.9223, "step": 11260 }, { "epoch": 3.242232451093211, "grad_norm": 1.183122158050537, "learning_rate": 0.001935155350978136, "loss": 0.8397, "step": 11270 }, { "epoch": 3.2451093210586883, "grad_norm": 1.452423334121704, "learning_rate": 0.0019350978135788263, "loss": 1.0232, "step": 11280 }, { "epoch": 3.247986191024166, "grad_norm": 1.5430601835250854, "learning_rate": 0.0019350402761795168, "loss": 0.7796, "step": 11290 }, { "epoch": 3.2508630609896434, "grad_norm": 1.1845935583114624, "learning_rate": 0.0019349827387802072, "loss": 0.7174, "step": 11300 }, { "epoch": 3.253739930955121, "grad_norm": 1.552002191543579, "learning_rate": 0.0019349252013808975, "loss": 1.0487, "step": 11310 }, { "epoch": 3.2566168009205985, "grad_norm": 2.7430808544158936, "learning_rate": 0.001934867663981588, "loss": 0.8657, "step": 11320 }, { "epoch": 3.259493670886076, "grad_norm": 1.0652494430541992, "learning_rate": 0.0019348101265822786, "loss": 0.8077, "step": 11330 }, { "epoch": 3.2623705408515535, "grad_norm": 1.3633482456207275, "learning_rate": 0.001934752589182969, "loss": 0.8113, "step": 11340 }, { "epoch": 3.265247410817031, "grad_norm": 1.7222607135772705, "learning_rate": 0.0019346950517836596, "loss": 0.912, "step": 11350 }, { "epoch": 3.2681242807825086, "grad_norm": 2.1333813667297363, "learning_rate": 0.0019346375143843497, "loss": 1.0326, "step": 11360 }, { "epoch": 3.271001150747986, "grad_norm": 0.8566032648086548, "learning_rate": 0.0019345799769850403, "loss": 0.8608, "step": 11370 }, { "epoch": 3.2738780207134637, "grad_norm": 1.9308090209960938, "learning_rate": 0.0019345224395857308, "loss": 0.8106, "step": 11380 }, { "epoch": 3.276754890678941, "grad_norm": 1.1002740859985352, "learning_rate": 0.0019344649021864212, "loss": 0.9288, "step": 11390 }, { "epoch": 3.2796317606444187, "grad_norm": 0.8447061777114868, "learning_rate": 0.0019344073647871117, "loss": 0.9777, "step": 11400 }, { "epoch": 3.2825086306098963, "grad_norm": 1.5299509763717651, "learning_rate": 0.0019343498273878023, "loss": 0.9906, "step": 11410 }, { "epoch": 3.285385500575374, "grad_norm": 1.9166722297668457, "learning_rate": 0.0019342922899884924, "loss": 0.8842, "step": 11420 }, { "epoch": 3.288262370540852, "grad_norm": 1.6655739545822144, "learning_rate": 0.001934234752589183, "loss": 0.906, "step": 11430 }, { "epoch": 3.291139240506329, "grad_norm": 2.2260892391204834, "learning_rate": 0.0019341772151898735, "loss": 0.9276, "step": 11440 }, { "epoch": 3.294016110471807, "grad_norm": 1.244666337966919, "learning_rate": 0.001934119677790564, "loss": 0.8431, "step": 11450 }, { "epoch": 3.2968929804372844, "grad_norm": 0.8800835609436035, "learning_rate": 0.0019340621403912545, "loss": 0.8466, "step": 11460 }, { "epoch": 3.299769850402762, "grad_norm": 0.9547956585884094, "learning_rate": 0.0019340046029919446, "loss": 0.8791, "step": 11470 }, { "epoch": 3.3026467203682395, "grad_norm": 1.7121734619140625, "learning_rate": 0.0019339470655926352, "loss": 0.82, "step": 11480 }, { "epoch": 3.305523590333717, "grad_norm": 1.328608751296997, "learning_rate": 0.0019338895281933257, "loss": 0.7433, "step": 11490 }, { "epoch": 3.3084004602991945, "grad_norm": 1.5540766716003418, "learning_rate": 0.001933831990794016, "loss": 1.0715, "step": 11500 }, { "epoch": 3.311277330264672, "grad_norm": 1.1290574073791504, "learning_rate": 0.0019337744533947066, "loss": 0.9541, "step": 11510 }, { "epoch": 3.3141542002301496, "grad_norm": 0.8655827641487122, "learning_rate": 0.0019337169159953972, "loss": 1.1348, "step": 11520 }, { "epoch": 3.317031070195627, "grad_norm": 1.7991814613342285, "learning_rate": 0.0019336593785960873, "loss": 1.1424, "step": 11530 }, { "epoch": 3.3199079401611047, "grad_norm": 1.3513020277023315, "learning_rate": 0.0019336018411967779, "loss": 0.9031, "step": 11540 }, { "epoch": 3.3227848101265822, "grad_norm": 1.9847283363342285, "learning_rate": 0.0019335443037974684, "loss": 0.8822, "step": 11550 }, { "epoch": 3.3256616800920598, "grad_norm": 1.2856907844543457, "learning_rate": 0.0019334867663981588, "loss": 0.9471, "step": 11560 }, { "epoch": 3.3285385500575373, "grad_norm": 2.006516933441162, "learning_rate": 0.0019334292289988494, "loss": 1.0094, "step": 11570 }, { "epoch": 3.331415420023015, "grad_norm": 1.664392113685608, "learning_rate": 0.00193337169159954, "loss": 0.8078, "step": 11580 }, { "epoch": 3.3342922899884924, "grad_norm": 1.167136788368225, "learning_rate": 0.00193331415420023, "loss": 0.8125, "step": 11590 }, { "epoch": 3.33716915995397, "grad_norm": 0.998475968837738, "learning_rate": 0.0019332566168009206, "loss": 0.8756, "step": 11600 }, { "epoch": 3.3400460299194474, "grad_norm": 1.6561107635498047, "learning_rate": 0.001933199079401611, "loss": 0.9001, "step": 11610 }, { "epoch": 3.3429228998849254, "grad_norm": 1.0039352178573608, "learning_rate": 0.0019331415420023015, "loss": 0.8762, "step": 11620 }, { "epoch": 3.345799769850403, "grad_norm": 0.9626198410987854, "learning_rate": 0.001933084004602992, "loss": 0.9202, "step": 11630 }, { "epoch": 3.3486766398158805, "grad_norm": 1.0303176641464233, "learning_rate": 0.0019330264672036824, "loss": 0.8691, "step": 11640 }, { "epoch": 3.351553509781358, "grad_norm": 1.57676362991333, "learning_rate": 0.0019329689298043728, "loss": 1.1598, "step": 11650 }, { "epoch": 3.3544303797468356, "grad_norm": 1.226190447807312, "learning_rate": 0.0019329113924050634, "loss": 0.7368, "step": 11660 }, { "epoch": 3.357307249712313, "grad_norm": 1.0340206623077393, "learning_rate": 0.0019328538550057537, "loss": 0.8203, "step": 11670 }, { "epoch": 3.3601841196777906, "grad_norm": 1.1056431531906128, "learning_rate": 0.0019327963176064443, "loss": 0.7531, "step": 11680 }, { "epoch": 3.363060989643268, "grad_norm": 1.367255687713623, "learning_rate": 0.0019327387802071348, "loss": 0.906, "step": 11690 }, { "epoch": 3.3659378596087457, "grad_norm": 0.8580129146575928, "learning_rate": 0.0019326812428078252, "loss": 0.8244, "step": 11700 }, { "epoch": 3.3688147295742232, "grad_norm": 1.2602227926254272, "learning_rate": 0.0019326237054085155, "loss": 0.8655, "step": 11710 }, { "epoch": 3.3716915995397008, "grad_norm": 1.6075387001037598, "learning_rate": 0.0019325661680092059, "loss": 0.7831, "step": 11720 }, { "epoch": 3.3745684695051783, "grad_norm": 1.1305136680603027, "learning_rate": 0.0019325086306098964, "loss": 0.765, "step": 11730 }, { "epoch": 3.377445339470656, "grad_norm": 2.0547311305999756, "learning_rate": 0.001932451093210587, "loss": 1.0859, "step": 11740 }, { "epoch": 3.3803222094361334, "grad_norm": 3.2706422805786133, "learning_rate": 0.0019323935558112773, "loss": 1.1428, "step": 11750 }, { "epoch": 3.383199079401611, "grad_norm": 1.6334251165390015, "learning_rate": 0.001932336018411968, "loss": 0.8286, "step": 11760 }, { "epoch": 3.3860759493670884, "grad_norm": 1.9904619455337524, "learning_rate": 0.0019322784810126583, "loss": 0.8583, "step": 11770 }, { "epoch": 3.388952819332566, "grad_norm": 1.9465560913085938, "learning_rate": 0.0019322209436133486, "loss": 1.0161, "step": 11780 }, { "epoch": 3.391829689298044, "grad_norm": 1.7594714164733887, "learning_rate": 0.0019321634062140392, "loss": 0.8778, "step": 11790 }, { "epoch": 3.394706559263521, "grad_norm": 1.4417319297790527, "learning_rate": 0.0019321058688147297, "loss": 0.9237, "step": 11800 }, { "epoch": 3.397583429228999, "grad_norm": 1.1435216665267944, "learning_rate": 0.00193204833141542, "loss": 0.8316, "step": 11810 }, { "epoch": 3.4004602991944766, "grad_norm": 1.094033122062683, "learning_rate": 0.0019319907940161106, "loss": 0.7933, "step": 11820 }, { "epoch": 3.403337169159954, "grad_norm": 1.4281302690505981, "learning_rate": 0.0019319332566168008, "loss": 1.0187, "step": 11830 }, { "epoch": 3.4062140391254316, "grad_norm": 2.1248109340667725, "learning_rate": 0.0019318757192174913, "loss": 0.8867, "step": 11840 }, { "epoch": 3.409090909090909, "grad_norm": 2.069969892501831, "learning_rate": 0.001931818181818182, "loss": 0.982, "step": 11850 }, { "epoch": 3.4119677790563867, "grad_norm": 1.2045308351516724, "learning_rate": 0.0019317606444188722, "loss": 0.8959, "step": 11860 }, { "epoch": 3.4148446490218642, "grad_norm": 1.1000314950942993, "learning_rate": 0.0019317031070195628, "loss": 0.9776, "step": 11870 }, { "epoch": 3.4177215189873418, "grad_norm": 1.03598153591156, "learning_rate": 0.0019316455696202534, "loss": 0.8259, "step": 11880 }, { "epoch": 3.4205983889528193, "grad_norm": 1.6494985818862915, "learning_rate": 0.0019315880322209435, "loss": 0.9545, "step": 11890 }, { "epoch": 3.423475258918297, "grad_norm": 1.3576287031173706, "learning_rate": 0.001931530494821634, "loss": 0.9315, "step": 11900 }, { "epoch": 3.4263521288837744, "grad_norm": 1.4553823471069336, "learning_rate": 0.0019314729574223246, "loss": 1.0657, "step": 11910 }, { "epoch": 3.429228998849252, "grad_norm": 1.7806438207626343, "learning_rate": 0.001931415420023015, "loss": 1.0124, "step": 11920 }, { "epoch": 3.4321058688147295, "grad_norm": 1.5362390279769897, "learning_rate": 0.0019313578826237055, "loss": 0.9786, "step": 11930 }, { "epoch": 3.434982738780207, "grad_norm": 2.1266136169433594, "learning_rate": 0.0019313003452243959, "loss": 0.7675, "step": 11940 }, { "epoch": 3.4378596087456845, "grad_norm": 1.1689682006835938, "learning_rate": 0.0019312428078250862, "loss": 1.0526, "step": 11950 }, { "epoch": 3.4407364787111625, "grad_norm": 0.8261664509773254, "learning_rate": 0.0019311852704257768, "loss": 0.7181, "step": 11960 }, { "epoch": 3.4436133486766396, "grad_norm": 0.8304380178451538, "learning_rate": 0.0019311277330264671, "loss": 0.9308, "step": 11970 }, { "epoch": 3.4464902186421176, "grad_norm": 1.143415093421936, "learning_rate": 0.0019310701956271577, "loss": 0.9751, "step": 11980 }, { "epoch": 3.449367088607595, "grad_norm": 0.8218410015106201, "learning_rate": 0.0019310126582278483, "loss": 0.7469, "step": 11990 }, { "epoch": 3.4522439585730726, "grad_norm": 2.022129535675049, "learning_rate": 0.0019309551208285386, "loss": 0.844, "step": 12000 }, { "epoch": 3.45512082853855, "grad_norm": 1.6381821632385254, "learning_rate": 0.001930897583429229, "loss": 0.9134, "step": 12010 }, { "epoch": 3.4579976985040277, "grad_norm": 0.9332036972045898, "learning_rate": 0.0019308400460299195, "loss": 0.818, "step": 12020 }, { "epoch": 3.4608745684695053, "grad_norm": 2.20869779586792, "learning_rate": 0.0019307825086306099, "loss": 1.0035, "step": 12030 }, { "epoch": 3.463751438434983, "grad_norm": 1.3605244159698486, "learning_rate": 0.0019307249712313004, "loss": 0.8783, "step": 12040 }, { "epoch": 3.4666283084004603, "grad_norm": 1.21759831905365, "learning_rate": 0.0019306674338319908, "loss": 0.9303, "step": 12050 }, { "epoch": 3.469505178365938, "grad_norm": 1.9874637126922607, "learning_rate": 0.0019306098964326814, "loss": 0.8549, "step": 12060 }, { "epoch": 3.4723820483314154, "grad_norm": 0.7081024646759033, "learning_rate": 0.0019305523590333717, "loss": 0.8305, "step": 12070 }, { "epoch": 3.475258918296893, "grad_norm": 1.0181529521942139, "learning_rate": 0.001930494821634062, "loss": 0.8732, "step": 12080 }, { "epoch": 3.4781357882623705, "grad_norm": 2.6148862838745117, "learning_rate": 0.0019304372842347526, "loss": 0.9752, "step": 12090 }, { "epoch": 3.481012658227848, "grad_norm": 1.097031593322754, "learning_rate": 0.0019303797468354432, "loss": 0.9174, "step": 12100 }, { "epoch": 3.4838895281933255, "grad_norm": 1.3243721723556519, "learning_rate": 0.0019303222094361335, "loss": 0.8652, "step": 12110 }, { "epoch": 3.486766398158803, "grad_norm": 2.459951400756836, "learning_rate": 0.001930264672036824, "loss": 1.0674, "step": 12120 }, { "epoch": 3.4896432681242806, "grad_norm": 1.2033661603927612, "learning_rate": 0.0019302071346375144, "loss": 0.8879, "step": 12130 }, { "epoch": 3.492520138089758, "grad_norm": 1.9516886472702026, "learning_rate": 0.0019301495972382048, "loss": 0.7909, "step": 12140 }, { "epoch": 3.495397008055236, "grad_norm": 1.7156060934066772, "learning_rate": 0.0019300920598388953, "loss": 0.7909, "step": 12150 }, { "epoch": 3.4982738780207137, "grad_norm": 1.5902131795883179, "learning_rate": 0.0019300345224395857, "loss": 0.9242, "step": 12160 }, { "epoch": 3.501150747986191, "grad_norm": 1.6204830408096313, "learning_rate": 0.0019299769850402763, "loss": 0.9672, "step": 12170 }, { "epoch": 3.5040276179516687, "grad_norm": 1.3292125463485718, "learning_rate": 0.0019299194476409668, "loss": 0.7375, "step": 12180 }, { "epoch": 3.5069044879171463, "grad_norm": 3.111314535140991, "learning_rate": 0.001929861910241657, "loss": 0.9011, "step": 12190 }, { "epoch": 3.509781357882624, "grad_norm": 1.3338146209716797, "learning_rate": 0.0019298043728423475, "loss": 1.0967, "step": 12200 }, { "epoch": 3.5126582278481013, "grad_norm": 1.3085598945617676, "learning_rate": 0.001929746835443038, "loss": 0.8201, "step": 12210 }, { "epoch": 3.515535097813579, "grad_norm": 0.9004116654396057, "learning_rate": 0.0019296892980437284, "loss": 0.9181, "step": 12220 }, { "epoch": 3.5184119677790564, "grad_norm": 1.2740586996078491, "learning_rate": 0.001929631760644419, "loss": 1.15, "step": 12230 }, { "epoch": 3.521288837744534, "grad_norm": 0.7931145429611206, "learning_rate": 0.0019295742232451096, "loss": 1.1193, "step": 12240 }, { "epoch": 3.5241657077100115, "grad_norm": 1.830141305923462, "learning_rate": 0.0019295166858457997, "loss": 0.8215, "step": 12250 }, { "epoch": 3.527042577675489, "grad_norm": 1.1993147134780884, "learning_rate": 0.0019294591484464902, "loss": 0.917, "step": 12260 }, { "epoch": 3.5299194476409665, "grad_norm": 1.6191388368606567, "learning_rate": 0.0019294016110471808, "loss": 0.9445, "step": 12270 }, { "epoch": 3.532796317606444, "grad_norm": 3.426295757293701, "learning_rate": 0.0019293440736478712, "loss": 0.9849, "step": 12280 }, { "epoch": 3.5356731875719216, "grad_norm": 1.4096126556396484, "learning_rate": 0.0019292865362485617, "loss": 0.9741, "step": 12290 }, { "epoch": 3.538550057537399, "grad_norm": 0.90078204870224, "learning_rate": 0.0019292289988492519, "loss": 0.8576, "step": 12300 }, { "epoch": 3.5414269275028767, "grad_norm": 1.1223605871200562, "learning_rate": 0.0019291714614499424, "loss": 1.0397, "step": 12310 }, { "epoch": 3.5443037974683547, "grad_norm": 1.3829821348190308, "learning_rate": 0.001929113924050633, "loss": 0.7655, "step": 12320 }, { "epoch": 3.5471806674338318, "grad_norm": 1.562347650527954, "learning_rate": 0.0019290563866513233, "loss": 0.9938, "step": 12330 }, { "epoch": 3.5500575373993097, "grad_norm": 0.9490426182746887, "learning_rate": 0.001928998849252014, "loss": 0.7986, "step": 12340 }, { "epoch": 3.5529344073647873, "grad_norm": 1.0687024593353271, "learning_rate": 0.0019289413118527045, "loss": 1.0824, "step": 12350 }, { "epoch": 3.555811277330265, "grad_norm": 1.5450325012207031, "learning_rate": 0.0019288837744533946, "loss": 0.8782, "step": 12360 }, { "epoch": 3.5586881472957423, "grad_norm": 1.4311970472335815, "learning_rate": 0.0019288262370540852, "loss": 0.9219, "step": 12370 }, { "epoch": 3.56156501726122, "grad_norm": 0.9708885550498962, "learning_rate": 0.0019287686996547757, "loss": 0.9305, "step": 12380 }, { "epoch": 3.5644418872266974, "grad_norm": 1.8451502323150635, "learning_rate": 0.001928711162255466, "loss": 0.8187, "step": 12390 }, { "epoch": 3.567318757192175, "grad_norm": 1.0974143743515015, "learning_rate": 0.0019286536248561566, "loss": 1.0043, "step": 12400 }, { "epoch": 3.5701956271576525, "grad_norm": 1.9627234935760498, "learning_rate": 0.001928596087456847, "loss": 0.8495, "step": 12410 }, { "epoch": 3.57307249712313, "grad_norm": 1.3205561637878418, "learning_rate": 0.0019285385500575373, "loss": 0.7878, "step": 12420 }, { "epoch": 3.5759493670886076, "grad_norm": 1.584741473197937, "learning_rate": 0.0019284810126582279, "loss": 0.9095, "step": 12430 }, { "epoch": 3.578826237054085, "grad_norm": 1.1215031147003174, "learning_rate": 0.0019284234752589182, "loss": 0.8156, "step": 12440 }, { "epoch": 3.5817031070195626, "grad_norm": 1.2194303274154663, "learning_rate": 0.0019283659378596088, "loss": 0.944, "step": 12450 }, { "epoch": 3.58457997698504, "grad_norm": 1.3321378231048584, "learning_rate": 0.0019283084004602994, "loss": 0.8358, "step": 12460 }, { "epoch": 3.5874568469505177, "grad_norm": 2.1915125846862793, "learning_rate": 0.0019282508630609897, "loss": 0.9703, "step": 12470 }, { "epoch": 3.5903337169159952, "grad_norm": 1.866108775138855, "learning_rate": 0.00192819332566168, "loss": 1.0454, "step": 12480 }, { "epoch": 3.593210586881473, "grad_norm": 1.350526213645935, "learning_rate": 0.0019281357882623706, "loss": 0.8617, "step": 12490 }, { "epoch": 3.5960874568469503, "grad_norm": 0.8016111850738525, "learning_rate": 0.001928078250863061, "loss": 0.8416, "step": 12500 }, { "epoch": 3.5989643268124283, "grad_norm": 1.145646572113037, "learning_rate": 0.0019280207134637515, "loss": 0.938, "step": 12510 }, { "epoch": 3.6018411967779054, "grad_norm": 0.9446151256561279, "learning_rate": 0.0019279631760644419, "loss": 0.7449, "step": 12520 }, { "epoch": 3.6047180667433834, "grad_norm": 1.017399787902832, "learning_rate": 0.0019279056386651324, "loss": 0.7922, "step": 12530 }, { "epoch": 3.607594936708861, "grad_norm": 1.3986520767211914, "learning_rate": 0.0019278481012658228, "loss": 0.96, "step": 12540 }, { "epoch": 3.6104718066743384, "grad_norm": 1.8394269943237305, "learning_rate": 0.0019277905638665131, "loss": 0.9204, "step": 12550 }, { "epoch": 3.613348676639816, "grad_norm": 1.0236701965332031, "learning_rate": 0.0019277330264672037, "loss": 0.9256, "step": 12560 }, { "epoch": 3.6162255466052935, "grad_norm": 1.5001263618469238, "learning_rate": 0.0019276754890678943, "loss": 0.7045, "step": 12570 }, { "epoch": 3.619102416570771, "grad_norm": 1.219671368598938, "learning_rate": 0.0019276179516685846, "loss": 1.0711, "step": 12580 }, { "epoch": 3.6219792865362486, "grad_norm": 3.181907892227173, "learning_rate": 0.0019275604142692752, "loss": 1.0439, "step": 12590 }, { "epoch": 3.624856156501726, "grad_norm": 1.966577410697937, "learning_rate": 0.0019275028768699655, "loss": 0.9066, "step": 12600 }, { "epoch": 3.6277330264672036, "grad_norm": 0.7782929539680481, "learning_rate": 0.0019274453394706559, "loss": 0.7315, "step": 12610 }, { "epoch": 3.630609896432681, "grad_norm": 1.110713243484497, "learning_rate": 0.0019273878020713464, "loss": 0.8151, "step": 12620 }, { "epoch": 3.6334867663981587, "grad_norm": 1.6812167167663574, "learning_rate": 0.0019273302646720368, "loss": 1.1738, "step": 12630 }, { "epoch": 3.6363636363636362, "grad_norm": 1.9635097980499268, "learning_rate": 0.0019272727272727273, "loss": 1.1065, "step": 12640 }, { "epoch": 3.6392405063291138, "grad_norm": 1.2207564115524292, "learning_rate": 0.001927215189873418, "loss": 0.7762, "step": 12650 }, { "epoch": 3.6421173762945918, "grad_norm": 1.3650078773498535, "learning_rate": 0.001927157652474108, "loss": 0.8319, "step": 12660 }, { "epoch": 3.644994246260069, "grad_norm": 1.1909270286560059, "learning_rate": 0.0019271001150747986, "loss": 0.8541, "step": 12670 }, { "epoch": 3.647871116225547, "grad_norm": 1.2508471012115479, "learning_rate": 0.0019270425776754892, "loss": 0.9424, "step": 12680 }, { "epoch": 3.650747986191024, "grad_norm": 0.8922887444496155, "learning_rate": 0.0019269850402761795, "loss": 0.9472, "step": 12690 }, { "epoch": 3.653624856156502, "grad_norm": 1.6659257411956787, "learning_rate": 0.00192692750287687, "loss": 0.778, "step": 12700 }, { "epoch": 3.6565017261219794, "grad_norm": 1.5230718851089478, "learning_rate": 0.0019268699654775606, "loss": 0.9498, "step": 12710 }, { "epoch": 3.659378596087457, "grad_norm": 0.6633215546607971, "learning_rate": 0.0019268124280782508, "loss": 0.8256, "step": 12720 }, { "epoch": 3.6622554660529345, "grad_norm": 1.0842739343643188, "learning_rate": 0.0019267548906789413, "loss": 0.8582, "step": 12730 }, { "epoch": 3.665132336018412, "grad_norm": 1.5407938957214355, "learning_rate": 0.0019266973532796317, "loss": 0.7528, "step": 12740 }, { "epoch": 3.6680092059838896, "grad_norm": 1.728737473487854, "learning_rate": 0.0019266398158803222, "loss": 0.7706, "step": 12750 }, { "epoch": 3.670886075949367, "grad_norm": 0.880730926990509, "learning_rate": 0.0019265822784810128, "loss": 1.1639, "step": 12760 }, { "epoch": 3.6737629459148446, "grad_norm": 1.2134425640106201, "learning_rate": 0.0019265247410817032, "loss": 0.8478, "step": 12770 }, { "epoch": 3.676639815880322, "grad_norm": 2.500502824783325, "learning_rate": 0.0019264672036823935, "loss": 1.0864, "step": 12780 }, { "epoch": 3.6795166858457997, "grad_norm": 2.0511200428009033, "learning_rate": 0.001926409666283084, "loss": 0.9721, "step": 12790 }, { "epoch": 3.6823935558112773, "grad_norm": 1.1868417263031006, "learning_rate": 0.0019263521288837744, "loss": 0.7981, "step": 12800 }, { "epoch": 3.685270425776755, "grad_norm": 1.1493101119995117, "learning_rate": 0.001926294591484465, "loss": 1.0862, "step": 12810 }, { "epoch": 3.6881472957422323, "grad_norm": 1.5828371047973633, "learning_rate": 0.0019262370540851555, "loss": 0.9287, "step": 12820 }, { "epoch": 3.69102416570771, "grad_norm": 1.253804087638855, "learning_rate": 0.0019261795166858459, "loss": 0.7737, "step": 12830 }, { "epoch": 3.6939010356731874, "grad_norm": 1.0178484916687012, "learning_rate": 0.0019261219792865362, "loss": 0.8585, "step": 12840 }, { "epoch": 3.6967779056386654, "grad_norm": 1.1369109153747559, "learning_rate": 0.0019260644418872268, "loss": 0.8586, "step": 12850 }, { "epoch": 3.6996547756041425, "grad_norm": 0.8407977223396301, "learning_rate": 0.0019260069044879171, "loss": 0.9937, "step": 12860 }, { "epoch": 3.7025316455696204, "grad_norm": 1.4187649488449097, "learning_rate": 0.0019259493670886077, "loss": 0.9104, "step": 12870 }, { "epoch": 3.705408515535098, "grad_norm": 1.3835238218307495, "learning_rate": 0.001925891829689298, "loss": 0.9366, "step": 12880 }, { "epoch": 3.7082853855005755, "grad_norm": 0.9814357161521912, "learning_rate": 0.0019258342922899886, "loss": 0.8699, "step": 12890 }, { "epoch": 3.711162255466053, "grad_norm": 1.3629096746444702, "learning_rate": 0.001925776754890679, "loss": 0.9032, "step": 12900 }, { "epoch": 3.7140391254315306, "grad_norm": 1.2587534189224243, "learning_rate": 0.0019257192174913693, "loss": 0.9859, "step": 12910 }, { "epoch": 3.716915995397008, "grad_norm": 1.0687330961227417, "learning_rate": 0.0019256616800920599, "loss": 0.9325, "step": 12920 }, { "epoch": 3.7197928653624857, "grad_norm": 1.0280097723007202, "learning_rate": 0.0019256041426927504, "loss": 0.9318, "step": 12930 }, { "epoch": 3.722669735327963, "grad_norm": 1.696683406829834, "learning_rate": 0.0019255466052934408, "loss": 0.8815, "step": 12940 }, { "epoch": 3.7255466052934407, "grad_norm": 1.096991777420044, "learning_rate": 0.0019254890678941314, "loss": 0.8586, "step": 12950 }, { "epoch": 3.7284234752589183, "grad_norm": 1.019160270690918, "learning_rate": 0.0019254315304948217, "loss": 0.8644, "step": 12960 }, { "epoch": 3.731300345224396, "grad_norm": 1.0301828384399414, "learning_rate": 0.001925373993095512, "loss": 1.0811, "step": 12970 }, { "epoch": 3.7341772151898733, "grad_norm": 1.3130483627319336, "learning_rate": 0.0019253164556962026, "loss": 1.0646, "step": 12980 }, { "epoch": 3.737054085155351, "grad_norm": 0.7845792174339294, "learning_rate": 0.001925258918296893, "loss": 0.9929, "step": 12990 }, { "epoch": 3.7399309551208284, "grad_norm": 1.7676544189453125, "learning_rate": 0.0019252013808975835, "loss": 1.0051, "step": 13000 }, { "epoch": 3.742807825086306, "grad_norm": 1.8262616395950317, "learning_rate": 0.001925143843498274, "loss": 0.78, "step": 13010 }, { "epoch": 3.745684695051784, "grad_norm": 0.9618165493011475, "learning_rate": 0.0019250863060989642, "loss": 0.7276, "step": 13020 }, { "epoch": 3.748561565017261, "grad_norm": 1.3839391469955444, "learning_rate": 0.0019250287686996548, "loss": 0.9829, "step": 13030 }, { "epoch": 3.751438434982739, "grad_norm": 1.210344672203064, "learning_rate": 0.0019249712313003453, "loss": 0.9299, "step": 13040 }, { "epoch": 3.754315304948216, "grad_norm": 0.9532739520072937, "learning_rate": 0.0019249136939010357, "loss": 0.7249, "step": 13050 }, { "epoch": 3.757192174913694, "grad_norm": 1.6559005975723267, "learning_rate": 0.0019248561565017263, "loss": 1.0592, "step": 13060 }, { "epoch": 3.7600690448791716, "grad_norm": 1.7727349996566772, "learning_rate": 0.0019247986191024168, "loss": 0.9852, "step": 13070 }, { "epoch": 3.762945914844649, "grad_norm": 2.2520341873168945, "learning_rate": 0.001924741081703107, "loss": 0.7928, "step": 13080 }, { "epoch": 3.7658227848101267, "grad_norm": 2.021679162979126, "learning_rate": 0.0019246835443037975, "loss": 0.9826, "step": 13090 }, { "epoch": 3.768699654775604, "grad_norm": 1.6604081392288208, "learning_rate": 0.0019246260069044879, "loss": 0.9258, "step": 13100 }, { "epoch": 3.7715765247410817, "grad_norm": 1.0565316677093506, "learning_rate": 0.0019245684695051784, "loss": 0.8878, "step": 13110 }, { "epoch": 3.7744533947065593, "grad_norm": 1.432837963104248, "learning_rate": 0.001924510932105869, "loss": 0.9214, "step": 13120 }, { "epoch": 3.777330264672037, "grad_norm": 1.5348974466323853, "learning_rate": 0.0019244533947065591, "loss": 0.7508, "step": 13130 }, { "epoch": 3.7802071346375143, "grad_norm": 2.455178737640381, "learning_rate": 0.0019243958573072497, "loss": 0.8991, "step": 13140 }, { "epoch": 3.783084004602992, "grad_norm": 1.271081805229187, "learning_rate": 0.0019243383199079402, "loss": 0.8707, "step": 13150 }, { "epoch": 3.7859608745684694, "grad_norm": 1.1129933595657349, "learning_rate": 0.0019242807825086306, "loss": 0.8359, "step": 13160 }, { "epoch": 3.788837744533947, "grad_norm": 1.274943232536316, "learning_rate": 0.0019242232451093212, "loss": 0.9581, "step": 13170 }, { "epoch": 3.7917146144994245, "grad_norm": 0.6448306441307068, "learning_rate": 0.0019241657077100117, "loss": 0.8865, "step": 13180 }, { "epoch": 3.794591484464902, "grad_norm": 1.5562525987625122, "learning_rate": 0.0019241081703107019, "loss": 0.8082, "step": 13190 }, { "epoch": 3.7974683544303796, "grad_norm": 1.7159907817840576, "learning_rate": 0.0019240506329113924, "loss": 1.0776, "step": 13200 }, { "epoch": 3.8003452243958575, "grad_norm": 1.2305517196655273, "learning_rate": 0.0019239930955120828, "loss": 0.9347, "step": 13210 }, { "epoch": 3.8032220943613346, "grad_norm": 1.20650053024292, "learning_rate": 0.0019239355581127733, "loss": 0.9125, "step": 13220 }, { "epoch": 3.8060989643268126, "grad_norm": 0.854028046131134, "learning_rate": 0.0019238780207134639, "loss": 0.6582, "step": 13230 }, { "epoch": 3.80897583429229, "grad_norm": 1.2890095710754395, "learning_rate": 0.0019238204833141542, "loss": 1.0074, "step": 13240 }, { "epoch": 3.8118527042577677, "grad_norm": 1.1639851331710815, "learning_rate": 0.0019237629459148446, "loss": 0.771, "step": 13250 }, { "epoch": 3.814729574223245, "grad_norm": 0.9302098155021667, "learning_rate": 0.0019237054085155351, "loss": 0.8787, "step": 13260 }, { "epoch": 3.8176064441887227, "grad_norm": 1.043768048286438, "learning_rate": 0.0019236478711162255, "loss": 0.8846, "step": 13270 }, { "epoch": 3.8204833141542003, "grad_norm": 1.1083390712738037, "learning_rate": 0.001923590333716916, "loss": 0.7271, "step": 13280 }, { "epoch": 3.823360184119678, "grad_norm": 0.8888152837753296, "learning_rate": 0.0019235327963176066, "loss": 0.765, "step": 13290 }, { "epoch": 3.8262370540851554, "grad_norm": 1.0619746446609497, "learning_rate": 0.001923475258918297, "loss": 0.9593, "step": 13300 }, { "epoch": 3.829113924050633, "grad_norm": 1.1903551816940308, "learning_rate": 0.0019234177215189873, "loss": 0.7979, "step": 13310 }, { "epoch": 3.8319907940161104, "grad_norm": 1.568801760673523, "learning_rate": 0.0019233601841196777, "loss": 0.9002, "step": 13320 }, { "epoch": 3.834867663981588, "grad_norm": 1.397705316543579, "learning_rate": 0.0019233026467203682, "loss": 0.7434, "step": 13330 }, { "epoch": 3.8377445339470655, "grad_norm": 1.1725475788116455, "learning_rate": 0.0019232451093210588, "loss": 0.9433, "step": 13340 }, { "epoch": 3.840621403912543, "grad_norm": 1.4116679430007935, "learning_rate": 0.0019231875719217491, "loss": 0.8121, "step": 13350 }, { "epoch": 3.8434982738780206, "grad_norm": 1.5361456871032715, "learning_rate": 0.0019231300345224397, "loss": 0.8619, "step": 13360 }, { "epoch": 3.846375143843498, "grad_norm": 1.3612314462661743, "learning_rate": 0.00192307249712313, "loss": 0.8358, "step": 13370 }, { "epoch": 3.849252013808976, "grad_norm": 1.0939881801605225, "learning_rate": 0.0019230149597238204, "loss": 0.8601, "step": 13380 }, { "epoch": 3.852128883774453, "grad_norm": 1.5858631134033203, "learning_rate": 0.001922957422324511, "loss": 1.148, "step": 13390 }, { "epoch": 3.855005753739931, "grad_norm": 0.9687299728393555, "learning_rate": 0.0019228998849252015, "loss": 0.9926, "step": 13400 }, { "epoch": 3.8578826237054082, "grad_norm": 1.0697810649871826, "learning_rate": 0.0019228423475258919, "loss": 0.8005, "step": 13410 }, { "epoch": 3.8607594936708862, "grad_norm": 1.8452807664871216, "learning_rate": 0.0019227848101265824, "loss": 0.8283, "step": 13420 }, { "epoch": 3.8636363636363638, "grad_norm": 1.3223754167556763, "learning_rate": 0.0019227272727272726, "loss": 0.7947, "step": 13430 }, { "epoch": 3.8665132336018413, "grad_norm": 1.390507698059082, "learning_rate": 0.0019226697353279631, "loss": 0.9543, "step": 13440 }, { "epoch": 3.869390103567319, "grad_norm": 1.0077508687973022, "learning_rate": 0.0019226121979286537, "loss": 0.8872, "step": 13450 }, { "epoch": 3.8722669735327964, "grad_norm": 1.0420184135437012, "learning_rate": 0.001922554660529344, "loss": 0.8895, "step": 13460 }, { "epoch": 3.875143843498274, "grad_norm": 1.3849806785583496, "learning_rate": 0.0019224971231300346, "loss": 0.8695, "step": 13470 }, { "epoch": 3.8780207134637514, "grad_norm": 1.3816393613815308, "learning_rate": 0.0019224395857307252, "loss": 1.0921, "step": 13480 }, { "epoch": 3.880897583429229, "grad_norm": 1.0337809324264526, "learning_rate": 0.0019223820483314153, "loss": 0.9962, "step": 13490 }, { "epoch": 3.8837744533947065, "grad_norm": 0.9657948613166809, "learning_rate": 0.0019223245109321059, "loss": 0.884, "step": 13500 }, { "epoch": 3.886651323360184, "grad_norm": 0.9787033200263977, "learning_rate": 0.0019222669735327964, "loss": 0.9506, "step": 13510 }, { "epoch": 3.8895281933256616, "grad_norm": 1.103510856628418, "learning_rate": 0.0019222094361334868, "loss": 0.8602, "step": 13520 }, { "epoch": 3.892405063291139, "grad_norm": 1.2826722860336304, "learning_rate": 0.0019221518987341773, "loss": 0.8381, "step": 13530 }, { "epoch": 3.8952819332566166, "grad_norm": 1.7649040222167969, "learning_rate": 0.001922094361334868, "loss": 0.7584, "step": 13540 }, { "epoch": 3.8981588032220946, "grad_norm": 0.7776345610618591, "learning_rate": 0.001922036823935558, "loss": 0.9466, "step": 13550 }, { "epoch": 3.9010356731875717, "grad_norm": 1.1539019346237183, "learning_rate": 0.0019219792865362486, "loss": 1.0134, "step": 13560 }, { "epoch": 3.9039125431530497, "grad_norm": 1.2684111595153809, "learning_rate": 0.001921921749136939, "loss": 1.0179, "step": 13570 }, { "epoch": 3.906789413118527, "grad_norm": 1.3139063119888306, "learning_rate": 0.0019218642117376295, "loss": 0.9536, "step": 13580 }, { "epoch": 3.9096662830840048, "grad_norm": 1.4166256189346313, "learning_rate": 0.00192180667433832, "loss": 0.7834, "step": 13590 }, { "epoch": 3.9125431530494823, "grad_norm": 1.0087776184082031, "learning_rate": 0.0019217491369390104, "loss": 0.9065, "step": 13600 }, { "epoch": 3.91542002301496, "grad_norm": 1.3637316226959229, "learning_rate": 0.0019216915995397008, "loss": 0.8858, "step": 13610 }, { "epoch": 3.9182968929804374, "grad_norm": 1.5976483821868896, "learning_rate": 0.0019216340621403913, "loss": 0.8163, "step": 13620 }, { "epoch": 3.921173762945915, "grad_norm": 1.873005986213684, "learning_rate": 0.0019215765247410817, "loss": 0.8075, "step": 13630 }, { "epoch": 3.9240506329113924, "grad_norm": 0.7583629488945007, "learning_rate": 0.0019215189873417722, "loss": 0.8811, "step": 13640 }, { "epoch": 3.92692750287687, "grad_norm": 1.29827880859375, "learning_rate": 0.0019214614499424628, "loss": 0.9174, "step": 13650 }, { "epoch": 3.9298043728423475, "grad_norm": 1.2228184938430786, "learning_rate": 0.0019214039125431532, "loss": 0.8698, "step": 13660 }, { "epoch": 3.932681242807825, "grad_norm": 1.8220182657241821, "learning_rate": 0.0019213463751438435, "loss": 0.7914, "step": 13670 }, { "epoch": 3.9355581127733026, "grad_norm": 2.380136489868164, "learning_rate": 0.0019212888377445338, "loss": 0.7907, "step": 13680 }, { "epoch": 3.93843498273878, "grad_norm": 1.5849971771240234, "learning_rate": 0.0019212313003452244, "loss": 0.8676, "step": 13690 }, { "epoch": 3.9413118527042577, "grad_norm": 1.03949773311615, "learning_rate": 0.001921173762945915, "loss": 0.8429, "step": 13700 }, { "epoch": 3.944188722669735, "grad_norm": 1.1854979991912842, "learning_rate": 0.0019211162255466053, "loss": 0.9095, "step": 13710 }, { "epoch": 3.9470655926352127, "grad_norm": 2.3211071491241455, "learning_rate": 0.0019210586881472959, "loss": 0.7263, "step": 13720 }, { "epoch": 3.9499424626006903, "grad_norm": 1.5662431716918945, "learning_rate": 0.0019210011507479862, "loss": 0.8188, "step": 13730 }, { "epoch": 3.9528193325661682, "grad_norm": 2.0645792484283447, "learning_rate": 0.0019209436133486766, "loss": 0.8301, "step": 13740 }, { "epoch": 3.9556962025316453, "grad_norm": 1.6131607294082642, "learning_rate": 0.0019208860759493671, "loss": 0.8987, "step": 13750 }, { "epoch": 3.9585730724971233, "grad_norm": 1.479185700416565, "learning_rate": 0.0019208285385500577, "loss": 0.9243, "step": 13760 }, { "epoch": 3.961449942462601, "grad_norm": 1.3394783735275269, "learning_rate": 0.001920771001150748, "loss": 0.887, "step": 13770 }, { "epoch": 3.9643268124280784, "grad_norm": 1.0341458320617676, "learning_rate": 0.0019207134637514386, "loss": 0.7792, "step": 13780 }, { "epoch": 3.967203682393556, "grad_norm": 0.9999095797538757, "learning_rate": 0.0019206559263521287, "loss": 0.8854, "step": 13790 }, { "epoch": 3.9700805523590335, "grad_norm": 1.4076733589172363, "learning_rate": 0.0019205983889528193, "loss": 0.955, "step": 13800 }, { "epoch": 3.972957422324511, "grad_norm": 0.9987596273422241, "learning_rate": 0.0019205408515535099, "loss": 0.8423, "step": 13810 }, { "epoch": 3.9758342922899885, "grad_norm": 1.1267249584197998, "learning_rate": 0.0019204833141542002, "loss": 0.7452, "step": 13820 }, { "epoch": 3.978711162255466, "grad_norm": 1.2240468263626099, "learning_rate": 0.0019204257767548908, "loss": 0.7873, "step": 13830 }, { "epoch": 3.9815880322209436, "grad_norm": 0.7786559462547302, "learning_rate": 0.0019203682393555814, "loss": 1.0415, "step": 13840 }, { "epoch": 3.984464902186421, "grad_norm": 1.4127812385559082, "learning_rate": 0.0019203107019562715, "loss": 1.2031, "step": 13850 }, { "epoch": 3.9873417721518987, "grad_norm": 1.857555627822876, "learning_rate": 0.001920253164556962, "loss": 1.0619, "step": 13860 }, { "epoch": 3.990218642117376, "grad_norm": 1.0483466386795044, "learning_rate": 0.0019201956271576526, "loss": 1.089, "step": 13870 }, { "epoch": 3.9930955120828537, "grad_norm": 1.0028544664382935, "learning_rate": 0.001920138089758343, "loss": 0.8775, "step": 13880 }, { "epoch": 3.9959723820483313, "grad_norm": 1.6830556392669678, "learning_rate": 0.0019200805523590335, "loss": 1.0731, "step": 13890 }, { "epoch": 3.998849252013809, "grad_norm": 2.936338424682617, "learning_rate": 0.0019200230149597236, "loss": 0.7512, "step": 13900 }, { "epoch": 4.001726121979287, "grad_norm": 1.2301766872406006, "learning_rate": 0.0019199654775604142, "loss": 1.0248, "step": 13910 }, { "epoch": 4.004602991944764, "grad_norm": 1.2521291971206665, "learning_rate": 0.0019199079401611048, "loss": 0.8337, "step": 13920 }, { "epoch": 4.007479861910242, "grad_norm": 1.1829417943954468, "learning_rate": 0.0019198504027617951, "loss": 0.6364, "step": 13930 }, { "epoch": 4.010356731875719, "grad_norm": 0.9509701132774353, "learning_rate": 0.0019197928653624857, "loss": 0.735, "step": 13940 }, { "epoch": 4.013233601841197, "grad_norm": 1.231339454650879, "learning_rate": 0.0019197353279631763, "loss": 0.8228, "step": 13950 }, { "epoch": 4.016110471806674, "grad_norm": 0.9001617431640625, "learning_rate": 0.0019196777905638664, "loss": 0.8159, "step": 13960 }, { "epoch": 4.018987341772152, "grad_norm": 1.5728026628494263, "learning_rate": 0.001919620253164557, "loss": 0.9781, "step": 13970 }, { "epoch": 4.021864211737629, "grad_norm": 1.4255387783050537, "learning_rate": 0.0019195627157652475, "loss": 0.7769, "step": 13980 }, { "epoch": 4.024741081703107, "grad_norm": 1.1560014486312866, "learning_rate": 0.0019195051783659379, "loss": 1.0656, "step": 13990 }, { "epoch": 4.027617951668584, "grad_norm": 0.968207836151123, "learning_rate": 0.0019194476409666284, "loss": 0.9535, "step": 14000 }, { "epoch": 4.030494821634062, "grad_norm": 1.5739248991012573, "learning_rate": 0.0019193901035673188, "loss": 0.9234, "step": 14010 }, { "epoch": 4.03337169159954, "grad_norm": 0.7703878879547119, "learning_rate": 0.0019193325661680091, "loss": 0.805, "step": 14020 }, { "epoch": 4.036248561565017, "grad_norm": 1.202100396156311, "learning_rate": 0.0019192750287686997, "loss": 0.7731, "step": 14030 }, { "epoch": 4.039125431530495, "grad_norm": 1.2178089618682861, "learning_rate": 0.00191921749136939, "loss": 0.8584, "step": 14040 }, { "epoch": 4.042002301495972, "grad_norm": 1.9244486093521118, "learning_rate": 0.0019191599539700806, "loss": 0.9856, "step": 14050 }, { "epoch": 4.04487917146145, "grad_norm": 1.5274394750595093, "learning_rate": 0.0019191024165707712, "loss": 0.9009, "step": 14060 }, { "epoch": 4.047756041426927, "grad_norm": 1.2158138751983643, "learning_rate": 0.0019190448791714615, "loss": 0.9067, "step": 14070 }, { "epoch": 4.050632911392405, "grad_norm": 1.1989797353744507, "learning_rate": 0.0019189873417721518, "loss": 0.9592, "step": 14080 }, { "epoch": 4.053509781357882, "grad_norm": 1.210785984992981, "learning_rate": 0.0019189298043728424, "loss": 0.769, "step": 14090 }, { "epoch": 4.05638665132336, "grad_norm": 1.740071177482605, "learning_rate": 0.0019188722669735328, "loss": 0.7598, "step": 14100 }, { "epoch": 4.0592635212888375, "grad_norm": 1.1635239124298096, "learning_rate": 0.0019188147295742233, "loss": 0.7985, "step": 14110 }, { "epoch": 4.0621403912543155, "grad_norm": 1.6770130395889282, "learning_rate": 0.0019187571921749137, "loss": 1.0557, "step": 14120 }, { "epoch": 4.065017261219793, "grad_norm": 1.0562971830368042, "learning_rate": 0.0019186996547756042, "loss": 0.9336, "step": 14130 }, { "epoch": 4.0678941311852705, "grad_norm": 1.0697581768035889, "learning_rate": 0.0019186421173762946, "loss": 0.608, "step": 14140 }, { "epoch": 4.070771001150748, "grad_norm": 0.8473911285400391, "learning_rate": 0.001918584579976985, "loss": 0.9746, "step": 14150 }, { "epoch": 4.073647871116226, "grad_norm": 2.502476215362549, "learning_rate": 0.0019185270425776755, "loss": 0.8615, "step": 14160 }, { "epoch": 4.076524741081703, "grad_norm": 1.9246559143066406, "learning_rate": 0.001918469505178366, "loss": 0.8438, "step": 14170 }, { "epoch": 4.079401611047181, "grad_norm": 1.3773092031478882, "learning_rate": 0.0019184119677790564, "loss": 0.7872, "step": 14180 }, { "epoch": 4.082278481012658, "grad_norm": 1.7903376817703247, "learning_rate": 0.001918354430379747, "loss": 0.8501, "step": 14190 }, { "epoch": 4.085155350978136, "grad_norm": 0.9602370262145996, "learning_rate": 0.0019182968929804373, "loss": 0.9304, "step": 14200 }, { "epoch": 4.088032220943614, "grad_norm": 0.940354585647583, "learning_rate": 0.0019182393555811277, "loss": 0.8333, "step": 14210 }, { "epoch": 4.090909090909091, "grad_norm": 1.4435068368911743, "learning_rate": 0.0019181818181818182, "loss": 0.7287, "step": 14220 }, { "epoch": 4.093785960874569, "grad_norm": 0.9698981046676636, "learning_rate": 0.0019181242807825088, "loss": 0.7304, "step": 14230 }, { "epoch": 4.096662830840046, "grad_norm": 1.241775393486023, "learning_rate": 0.0019180667433831991, "loss": 0.8565, "step": 14240 }, { "epoch": 4.099539700805524, "grad_norm": 1.0165519714355469, "learning_rate": 0.0019180092059838897, "loss": 0.787, "step": 14250 }, { "epoch": 4.102416570771001, "grad_norm": 1.1786009073257446, "learning_rate": 0.0019179516685845798, "loss": 0.8087, "step": 14260 }, { "epoch": 4.105293440736479, "grad_norm": 1.4116036891937256, "learning_rate": 0.0019178941311852704, "loss": 1.136, "step": 14270 }, { "epoch": 4.108170310701956, "grad_norm": 1.1496917009353638, "learning_rate": 0.001917836593785961, "loss": 0.817, "step": 14280 }, { "epoch": 4.111047180667434, "grad_norm": 0.9651239514350891, "learning_rate": 0.0019177790563866513, "loss": 0.751, "step": 14290 }, { "epoch": 4.113924050632911, "grad_norm": 1.1556931734085083, "learning_rate": 0.0019177215189873419, "loss": 0.7968, "step": 14300 }, { "epoch": 4.116800920598389, "grad_norm": 1.1039769649505615, "learning_rate": 0.0019176639815880324, "loss": 0.8287, "step": 14310 }, { "epoch": 4.119677790563866, "grad_norm": 1.3930768966674805, "learning_rate": 0.0019176064441887226, "loss": 0.8074, "step": 14320 }, { "epoch": 4.122554660529344, "grad_norm": 1.5626513957977295, "learning_rate": 0.0019175489067894131, "loss": 0.7631, "step": 14330 }, { "epoch": 4.125431530494821, "grad_norm": 1.314261794090271, "learning_rate": 0.0019174913693901037, "loss": 0.9065, "step": 14340 }, { "epoch": 4.128308400460299, "grad_norm": 1.225821852684021, "learning_rate": 0.001917433831990794, "loss": 0.968, "step": 14350 }, { "epoch": 4.131185270425776, "grad_norm": 0.9961084127426147, "learning_rate": 0.0019173762945914846, "loss": 0.7023, "step": 14360 }, { "epoch": 4.134062140391254, "grad_norm": 1.8206201791763306, "learning_rate": 0.001917318757192175, "loss": 0.9638, "step": 14370 }, { "epoch": 4.136939010356732, "grad_norm": 0.8946329951286316, "learning_rate": 0.0019172612197928653, "loss": 0.9007, "step": 14380 }, { "epoch": 4.139815880322209, "grad_norm": 1.085939645767212, "learning_rate": 0.0019172036823935559, "loss": 0.932, "step": 14390 }, { "epoch": 4.142692750287687, "grad_norm": 0.9975664615631104, "learning_rate": 0.0019171461449942462, "loss": 0.8084, "step": 14400 }, { "epoch": 4.1455696202531644, "grad_norm": 1.3575669527053833, "learning_rate": 0.0019170886075949368, "loss": 0.7226, "step": 14410 }, { "epoch": 4.148446490218642, "grad_norm": 2.1131832599639893, "learning_rate": 0.0019170310701956273, "loss": 0.8337, "step": 14420 }, { "epoch": 4.1513233601841195, "grad_norm": 2.274052381515503, "learning_rate": 0.0019169735327963177, "loss": 0.9246, "step": 14430 }, { "epoch": 4.1542002301495975, "grad_norm": 1.735873818397522, "learning_rate": 0.001916915995397008, "loss": 0.9865, "step": 14440 }, { "epoch": 4.157077100115075, "grad_norm": 0.9538254737854004, "learning_rate": 0.0019168584579976986, "loss": 0.7071, "step": 14450 }, { "epoch": 4.159953970080553, "grad_norm": 1.1453237533569336, "learning_rate": 0.001916800920598389, "loss": 0.9409, "step": 14460 }, { "epoch": 4.16283084004603, "grad_norm": 1.3291646242141724, "learning_rate": 0.0019167433831990795, "loss": 0.8157, "step": 14470 }, { "epoch": 4.165707710011508, "grad_norm": 1.026192545890808, "learning_rate": 0.0019166858457997699, "loss": 0.609, "step": 14480 }, { "epoch": 4.168584579976985, "grad_norm": 1.0879111289978027, "learning_rate": 0.0019166283084004604, "loss": 0.9277, "step": 14490 }, { "epoch": 4.171461449942463, "grad_norm": 0.7762089371681213, "learning_rate": 0.0019165707710011508, "loss": 0.8376, "step": 14500 }, { "epoch": 4.17433831990794, "grad_norm": 1.3927143812179565, "learning_rate": 0.0019165132336018411, "loss": 0.8369, "step": 14510 }, { "epoch": 4.177215189873418, "grad_norm": 0.8470642566680908, "learning_rate": 0.0019164556962025317, "loss": 0.9805, "step": 14520 }, { "epoch": 4.180092059838895, "grad_norm": 1.4948334693908691, "learning_rate": 0.0019163981588032222, "loss": 0.7744, "step": 14530 }, { "epoch": 4.182968929804373, "grad_norm": 0.8533593416213989, "learning_rate": 0.0019163406214039126, "loss": 0.817, "step": 14540 }, { "epoch": 4.185845799769851, "grad_norm": 1.3297921419143677, "learning_rate": 0.0019162830840046031, "loss": 0.9022, "step": 14550 }, { "epoch": 4.188722669735328, "grad_norm": 2.2850124835968018, "learning_rate": 0.0019162255466052935, "loss": 1.0152, "step": 14560 }, { "epoch": 4.191599539700806, "grad_norm": 1.7193505764007568, "learning_rate": 0.0019161680092059838, "loss": 0.9743, "step": 14570 }, { "epoch": 4.194476409666283, "grad_norm": 3.6228904724121094, "learning_rate": 0.0019161104718066744, "loss": 0.9614, "step": 14580 }, { "epoch": 4.197353279631761, "grad_norm": 0.7212798595428467, "learning_rate": 0.0019160529344073648, "loss": 0.9116, "step": 14590 }, { "epoch": 4.200230149597238, "grad_norm": 2.0110251903533936, "learning_rate": 0.0019159953970080553, "loss": 0.9228, "step": 14600 }, { "epoch": 4.203107019562716, "grad_norm": 1.3503351211547852, "learning_rate": 0.0019159378596087459, "loss": 0.7473, "step": 14610 }, { "epoch": 4.205983889528193, "grad_norm": 0.9766534566879272, "learning_rate": 0.001915880322209436, "loss": 0.8035, "step": 14620 }, { "epoch": 4.208860759493671, "grad_norm": 1.9491163492202759, "learning_rate": 0.0019158227848101266, "loss": 0.8025, "step": 14630 }, { "epoch": 4.211737629459148, "grad_norm": 1.1587424278259277, "learning_rate": 0.0019157652474108171, "loss": 1.1787, "step": 14640 }, { "epoch": 4.214614499424626, "grad_norm": 2.981292963027954, "learning_rate": 0.0019157077100115075, "loss": 0.8079, "step": 14650 }, { "epoch": 4.217491369390103, "grad_norm": 0.907227098941803, "learning_rate": 0.001915650172612198, "loss": 0.7774, "step": 14660 }, { "epoch": 4.220368239355581, "grad_norm": 2.8294003009796143, "learning_rate": 0.0019155926352128886, "loss": 0.8656, "step": 14670 }, { "epoch": 4.223245109321058, "grad_norm": 1.235685110092163, "learning_rate": 0.0019155350978135787, "loss": 0.8832, "step": 14680 }, { "epoch": 4.226121979286536, "grad_norm": 2.150513172149658, "learning_rate": 0.0019154775604142693, "loss": 0.999, "step": 14690 }, { "epoch": 4.228998849252013, "grad_norm": 1.04386305809021, "learning_rate": 0.0019154200230149597, "loss": 1.0324, "step": 14700 }, { "epoch": 4.231875719217491, "grad_norm": 1.0973559617996216, "learning_rate": 0.0019153624856156502, "loss": 1.0257, "step": 14710 }, { "epoch": 4.234752589182969, "grad_norm": 2.1443874835968018, "learning_rate": 0.0019153049482163408, "loss": 1.0183, "step": 14720 }, { "epoch": 4.2376294591484465, "grad_norm": 1.3811969757080078, "learning_rate": 0.001915247410817031, "loss": 0.7612, "step": 14730 }, { "epoch": 4.2405063291139244, "grad_norm": 0.9005551338195801, "learning_rate": 0.0019151898734177215, "loss": 0.853, "step": 14740 }, { "epoch": 4.2433831990794015, "grad_norm": 1.4678685665130615, "learning_rate": 0.001915132336018412, "loss": 0.8228, "step": 14750 }, { "epoch": 4.2462600690448795, "grad_norm": 1.3035207986831665, "learning_rate": 0.0019150747986191024, "loss": 0.7534, "step": 14760 }, { "epoch": 4.249136939010357, "grad_norm": 0.753805935382843, "learning_rate": 0.001915017261219793, "loss": 0.9951, "step": 14770 }, { "epoch": 4.252013808975835, "grad_norm": 1.9032026529312134, "learning_rate": 0.0019149597238204835, "loss": 0.8971, "step": 14780 }, { "epoch": 4.254890678941312, "grad_norm": 0.9907655119895935, "learning_rate": 0.0019149021864211736, "loss": 0.7573, "step": 14790 }, { "epoch": 4.25776754890679, "grad_norm": 1.689954161643982, "learning_rate": 0.0019148446490218642, "loss": 1.074, "step": 14800 }, { "epoch": 4.260644418872267, "grad_norm": 1.6718696355819702, "learning_rate": 0.0019147871116225546, "loss": 0.9506, "step": 14810 }, { "epoch": 4.263521288837745, "grad_norm": 1.6092369556427002, "learning_rate": 0.0019147295742232451, "loss": 0.9366, "step": 14820 }, { "epoch": 4.266398158803222, "grad_norm": 1.1821403503417969, "learning_rate": 0.0019146720368239357, "loss": 0.8198, "step": 14830 }, { "epoch": 4.2692750287687, "grad_norm": 2.236215829849243, "learning_rate": 0.001914614499424626, "loss": 1.231, "step": 14840 }, { "epoch": 4.272151898734177, "grad_norm": 1.0024847984313965, "learning_rate": 0.0019145569620253164, "loss": 0.7623, "step": 14850 }, { "epoch": 4.275028768699655, "grad_norm": 1.051520586013794, "learning_rate": 0.001914499424626007, "loss": 0.9089, "step": 14860 }, { "epoch": 4.277905638665132, "grad_norm": 1.66872239112854, "learning_rate": 0.0019144418872266973, "loss": 0.8395, "step": 14870 }, { "epoch": 4.28078250863061, "grad_norm": 1.1302324533462524, "learning_rate": 0.0019143843498273879, "loss": 0.8326, "step": 14880 }, { "epoch": 4.283659378596088, "grad_norm": 1.4884791374206543, "learning_rate": 0.0019143268124280784, "loss": 1.09, "step": 14890 }, { "epoch": 4.286536248561565, "grad_norm": 0.9870990514755249, "learning_rate": 0.0019142692750287688, "loss": 0.881, "step": 14900 }, { "epoch": 4.289413118527043, "grad_norm": 0.9207090735435486, "learning_rate": 0.0019142117376294591, "loss": 0.912, "step": 14910 }, { "epoch": 4.29228998849252, "grad_norm": 0.8490313291549683, "learning_rate": 0.0019141542002301497, "loss": 0.812, "step": 14920 }, { "epoch": 4.295166858457998, "grad_norm": 1.670006513595581, "learning_rate": 0.00191409666283084, "loss": 0.9233, "step": 14930 }, { "epoch": 4.298043728423475, "grad_norm": 1.6011018753051758, "learning_rate": 0.0019140391254315306, "loss": 0.8202, "step": 14940 }, { "epoch": 4.300920598388953, "grad_norm": 1.3559141159057617, "learning_rate": 0.001913981588032221, "loss": 0.8641, "step": 14950 }, { "epoch": 4.30379746835443, "grad_norm": 1.626880407333374, "learning_rate": 0.0019139240506329115, "loss": 0.8757, "step": 14960 }, { "epoch": 4.306674338319908, "grad_norm": 2.2946672439575195, "learning_rate": 0.0019138665132336018, "loss": 0.9016, "step": 14970 }, { "epoch": 4.309551208285385, "grad_norm": 1.4554295539855957, "learning_rate": 0.0019138089758342922, "loss": 0.6756, "step": 14980 }, { "epoch": 4.312428078250863, "grad_norm": 1.1498780250549316, "learning_rate": 0.0019137514384349828, "loss": 0.7797, "step": 14990 }, { "epoch": 4.31530494821634, "grad_norm": 2.1178014278411865, "learning_rate": 0.0019136939010356733, "loss": 0.8662, "step": 15000 }, { "epoch": 4.318181818181818, "grad_norm": 1.6267242431640625, "learning_rate": 0.0019136363636363637, "loss": 0.8855, "step": 15010 }, { "epoch": 4.321058688147295, "grad_norm": 1.317181944847107, "learning_rate": 0.0019135788262370542, "loss": 0.7787, "step": 15020 }, { "epoch": 4.323935558112773, "grad_norm": 1.738329529762268, "learning_rate": 0.0019135212888377446, "loss": 0.8808, "step": 15030 }, { "epoch": 4.3268124280782505, "grad_norm": 1.303429126739502, "learning_rate": 0.001913463751438435, "loss": 0.9308, "step": 15040 }, { "epoch": 4.3296892980437285, "grad_norm": 0.9937182068824768, "learning_rate": 0.0019134062140391255, "loss": 0.7461, "step": 15050 }, { "epoch": 4.332566168009206, "grad_norm": 2.002376079559326, "learning_rate": 0.0019133486766398158, "loss": 0.9939, "step": 15060 }, { "epoch": 4.3354430379746836, "grad_norm": 1.599635362625122, "learning_rate": 0.0019132911392405064, "loss": 0.8163, "step": 15070 }, { "epoch": 4.338319907940161, "grad_norm": 1.4877634048461914, "learning_rate": 0.001913233601841197, "loss": 0.7658, "step": 15080 }, { "epoch": 4.341196777905639, "grad_norm": 1.0583934783935547, "learning_rate": 0.001913176064441887, "loss": 0.7819, "step": 15090 }, { "epoch": 4.344073647871117, "grad_norm": 1.4342187643051147, "learning_rate": 0.0019131185270425777, "loss": 0.9725, "step": 15100 }, { "epoch": 4.346950517836594, "grad_norm": 0.9989875555038452, "learning_rate": 0.0019130609896432682, "loss": 0.9363, "step": 15110 }, { "epoch": 4.349827387802072, "grad_norm": 1.2413967847824097, "learning_rate": 0.0019130034522439586, "loss": 1.0369, "step": 15120 }, { "epoch": 4.352704257767549, "grad_norm": 1.0453126430511475, "learning_rate": 0.0019129459148446491, "loss": 0.7218, "step": 15130 }, { "epoch": 4.355581127733027, "grad_norm": 1.9950605630874634, "learning_rate": 0.0019128883774453397, "loss": 0.9917, "step": 15140 }, { "epoch": 4.358457997698504, "grad_norm": 1.0171782970428467, "learning_rate": 0.0019128308400460298, "loss": 0.8184, "step": 15150 }, { "epoch": 4.361334867663982, "grad_norm": 1.1622955799102783, "learning_rate": 0.0019127733026467204, "loss": 0.7921, "step": 15160 }, { "epoch": 4.364211737629459, "grad_norm": 1.2268381118774414, "learning_rate": 0.0019127157652474107, "loss": 0.8894, "step": 15170 }, { "epoch": 4.367088607594937, "grad_norm": 1.8722233772277832, "learning_rate": 0.0019126582278481013, "loss": 1.1365, "step": 15180 }, { "epoch": 4.369965477560414, "grad_norm": 1.1794180870056152, "learning_rate": 0.0019126006904487919, "loss": 0.8033, "step": 15190 }, { "epoch": 4.372842347525892, "grad_norm": 1.2619484663009644, "learning_rate": 0.0019125431530494822, "loss": 1.043, "step": 15200 }, { "epoch": 4.375719217491369, "grad_norm": 1.2766659259796143, "learning_rate": 0.0019124856156501726, "loss": 0.7481, "step": 15210 }, { "epoch": 4.378596087456847, "grad_norm": 1.6383092403411865, "learning_rate": 0.0019124280782508631, "loss": 1.0266, "step": 15220 }, { "epoch": 4.381472957422324, "grad_norm": 1.095786452293396, "learning_rate": 0.0019123705408515535, "loss": 0.7193, "step": 15230 }, { "epoch": 4.384349827387802, "grad_norm": 1.1112394332885742, "learning_rate": 0.001912313003452244, "loss": 0.9647, "step": 15240 }, { "epoch": 4.387226697353279, "grad_norm": 0.7824285626411438, "learning_rate": 0.0019122554660529346, "loss": 0.906, "step": 15250 }, { "epoch": 4.390103567318757, "grad_norm": 1.8862688541412354, "learning_rate": 0.001912197928653625, "loss": 0.8476, "step": 15260 }, { "epoch": 4.392980437284235, "grad_norm": 1.4559017419815063, "learning_rate": 0.0019121403912543153, "loss": 0.6673, "step": 15270 }, { "epoch": 4.395857307249712, "grad_norm": 1.0109845399856567, "learning_rate": 0.0019120828538550056, "loss": 0.7126, "step": 15280 }, { "epoch": 4.39873417721519, "grad_norm": 2.1197261810302734, "learning_rate": 0.0019120253164556962, "loss": 0.9379, "step": 15290 }, { "epoch": 4.401611047180667, "grad_norm": 1.1078637838363647, "learning_rate": 0.0019119677790563868, "loss": 1.0745, "step": 15300 }, { "epoch": 4.404487917146145, "grad_norm": 1.6841177940368652, "learning_rate": 0.0019119102416570771, "loss": 0.7316, "step": 15310 }, { "epoch": 4.407364787111622, "grad_norm": 2.7643370628356934, "learning_rate": 0.0019118527042577677, "loss": 0.85, "step": 15320 }, { "epoch": 4.4102416570771, "grad_norm": 1.4755374193191528, "learning_rate": 0.001911795166858458, "loss": 1.1264, "step": 15330 }, { "epoch": 4.4131185270425775, "grad_norm": 0.9028780460357666, "learning_rate": 0.0019117376294591484, "loss": 0.8445, "step": 15340 }, { "epoch": 4.415995397008055, "grad_norm": 2.003946542739868, "learning_rate": 0.001911680092059839, "loss": 0.8331, "step": 15350 }, { "epoch": 4.4188722669735325, "grad_norm": 1.2825521230697632, "learning_rate": 0.0019116225546605295, "loss": 0.7674, "step": 15360 }, { "epoch": 4.4217491369390105, "grad_norm": 1.0530322790145874, "learning_rate": 0.0019115650172612198, "loss": 0.8315, "step": 15370 }, { "epoch": 4.424626006904488, "grad_norm": 1.4116262197494507, "learning_rate": 0.0019115074798619104, "loss": 0.9495, "step": 15380 }, { "epoch": 4.427502876869966, "grad_norm": 1.6569758653640747, "learning_rate": 0.0019114499424626005, "loss": 0.8311, "step": 15390 }, { "epoch": 4.430379746835443, "grad_norm": 1.660268783569336, "learning_rate": 0.001911392405063291, "loss": 0.6971, "step": 15400 }, { "epoch": 4.433256616800921, "grad_norm": 1.6160058975219727, "learning_rate": 0.0019113348676639817, "loss": 0.7566, "step": 15410 }, { "epoch": 4.436133486766398, "grad_norm": 2.5840353965759277, "learning_rate": 0.001911277330264672, "loss": 0.7773, "step": 15420 }, { "epoch": 4.439010356731876, "grad_norm": 0.694007396697998, "learning_rate": 0.0019112197928653626, "loss": 0.8706, "step": 15430 }, { "epoch": 4.441887226697354, "grad_norm": 1.2641443014144897, "learning_rate": 0.0019111622554660531, "loss": 0.973, "step": 15440 }, { "epoch": 4.444764096662831, "grad_norm": 1.147080659866333, "learning_rate": 0.0019111047180667433, "loss": 0.9104, "step": 15450 }, { "epoch": 4.447640966628309, "grad_norm": 1.3793820142745972, "learning_rate": 0.0019110471806674338, "loss": 0.9661, "step": 15460 }, { "epoch": 4.450517836593786, "grad_norm": 1.1988273859024048, "learning_rate": 0.0019109896432681244, "loss": 0.9618, "step": 15470 }, { "epoch": 4.453394706559264, "grad_norm": 0.9540511965751648, "learning_rate": 0.0019109321058688148, "loss": 0.8356, "step": 15480 }, { "epoch": 4.456271576524741, "grad_norm": 1.0699450969696045, "learning_rate": 0.0019108745684695053, "loss": 0.7873, "step": 15490 }, { "epoch": 4.459148446490219, "grad_norm": 1.5651378631591797, "learning_rate": 0.0019108170310701959, "loss": 0.9192, "step": 15500 }, { "epoch": 4.462025316455696, "grad_norm": 1.6874133348464966, "learning_rate": 0.001910759493670886, "loss": 0.9585, "step": 15510 }, { "epoch": 4.464902186421174, "grad_norm": 1.255953073501587, "learning_rate": 0.0019107019562715766, "loss": 0.6974, "step": 15520 }, { "epoch": 4.467779056386651, "grad_norm": 1.038178563117981, "learning_rate": 0.001910644418872267, "loss": 0.7882, "step": 15530 }, { "epoch": 4.470655926352129, "grad_norm": 1.3960912227630615, "learning_rate": 0.0019105868814729575, "loss": 0.781, "step": 15540 }, { "epoch": 4.473532796317606, "grad_norm": 1.1011765003204346, "learning_rate": 0.001910529344073648, "loss": 0.8738, "step": 15550 }, { "epoch": 4.476409666283084, "grad_norm": 1.0418028831481934, "learning_rate": 0.0019104718066743382, "loss": 0.9693, "step": 15560 }, { "epoch": 4.479286536248561, "grad_norm": 2.4160616397857666, "learning_rate": 0.0019104142692750287, "loss": 0.903, "step": 15570 }, { "epoch": 4.482163406214039, "grad_norm": 1.463049054145813, "learning_rate": 0.0019103567318757193, "loss": 0.8845, "step": 15580 }, { "epoch": 4.485040276179516, "grad_norm": 1.295709252357483, "learning_rate": 0.0019102991944764097, "loss": 0.7486, "step": 15590 }, { "epoch": 4.487917146144994, "grad_norm": 0.8238698840141296, "learning_rate": 0.0019102416570771002, "loss": 1.048, "step": 15600 }, { "epoch": 4.490794016110472, "grad_norm": 0.9502568244934082, "learning_rate": 0.0019101841196777908, "loss": 0.8941, "step": 15610 }, { "epoch": 4.493670886075949, "grad_norm": 1.1022804975509644, "learning_rate": 0.001910126582278481, "loss": 0.9761, "step": 15620 }, { "epoch": 4.496547756041427, "grad_norm": 1.1343930959701538, "learning_rate": 0.0019100690448791715, "loss": 0.8868, "step": 15630 }, { "epoch": 4.499424626006904, "grad_norm": 2.0388996601104736, "learning_rate": 0.0019100115074798618, "loss": 0.7925, "step": 15640 }, { "epoch": 4.502301495972382, "grad_norm": 1.5923800468444824, "learning_rate": 0.0019099539700805524, "loss": 0.9477, "step": 15650 }, { "epoch": 4.5051783659378595, "grad_norm": 1.6118159294128418, "learning_rate": 0.001909896432681243, "loss": 0.963, "step": 15660 }, { "epoch": 4.5080552359033375, "grad_norm": 1.3596023321151733, "learning_rate": 0.0019098388952819333, "loss": 0.7016, "step": 15670 }, { "epoch": 4.5109321058688145, "grad_norm": 1.125800609588623, "learning_rate": 0.0019097813578826236, "loss": 1.0952, "step": 15680 }, { "epoch": 4.5138089758342925, "grad_norm": 1.4780359268188477, "learning_rate": 0.0019097238204833142, "loss": 0.9669, "step": 15690 }, { "epoch": 4.51668584579977, "grad_norm": 0.8493865728378296, "learning_rate": 0.0019096662830840046, "loss": 0.8461, "step": 15700 }, { "epoch": 4.519562715765248, "grad_norm": 1.142212152481079, "learning_rate": 0.0019096087456846951, "loss": 0.7769, "step": 15710 }, { "epoch": 4.522439585730725, "grad_norm": 1.0430763959884644, "learning_rate": 0.0019095512082853857, "loss": 0.761, "step": 15720 }, { "epoch": 4.525316455696203, "grad_norm": 1.2505468130111694, "learning_rate": 0.001909493670886076, "loss": 0.7497, "step": 15730 }, { "epoch": 4.52819332566168, "grad_norm": 0.9511294960975647, "learning_rate": 0.0019094361334867664, "loss": 0.8384, "step": 15740 }, { "epoch": 4.531070195627158, "grad_norm": 0.9782663583755493, "learning_rate": 0.0019093785960874567, "loss": 0.8201, "step": 15750 }, { "epoch": 4.533947065592635, "grad_norm": 1.1000646352767944, "learning_rate": 0.0019093210586881473, "loss": 0.7813, "step": 15760 }, { "epoch": 4.536823935558113, "grad_norm": 1.6823583841323853, "learning_rate": 0.0019092635212888379, "loss": 0.9371, "step": 15770 }, { "epoch": 4.539700805523591, "grad_norm": 2.024325370788574, "learning_rate": 0.0019092059838895282, "loss": 0.7145, "step": 15780 }, { "epoch": 4.542577675489068, "grad_norm": 1.218411922454834, "learning_rate": 0.0019091484464902188, "loss": 0.9241, "step": 15790 }, { "epoch": 4.545454545454545, "grad_norm": 1.5155671834945679, "learning_rate": 0.0019090909090909091, "loss": 0.7203, "step": 15800 }, { "epoch": 4.548331415420023, "grad_norm": 1.3550835847854614, "learning_rate": 0.0019090333716915995, "loss": 0.7671, "step": 15810 }, { "epoch": 4.551208285385501, "grad_norm": 1.518436312675476, "learning_rate": 0.00190897583429229, "loss": 1.1292, "step": 15820 }, { "epoch": 4.554085155350978, "grad_norm": 0.9992713332176208, "learning_rate": 0.0019089182968929806, "loss": 0.7735, "step": 15830 }, { "epoch": 4.556962025316456, "grad_norm": 1.8088299036026, "learning_rate": 0.001908860759493671, "loss": 0.7423, "step": 15840 }, { "epoch": 4.559838895281933, "grad_norm": 0.8560647368431091, "learning_rate": 0.0019088032220943615, "loss": 0.8395, "step": 15850 }, { "epoch": 4.562715765247411, "grad_norm": 1.5414892435073853, "learning_rate": 0.0019087456846950516, "loss": 0.8131, "step": 15860 }, { "epoch": 4.565592635212888, "grad_norm": 1.75486421585083, "learning_rate": 0.0019086881472957422, "loss": 0.8235, "step": 15870 }, { "epoch": 4.568469505178366, "grad_norm": 1.5082987546920776, "learning_rate": 0.0019086306098964328, "loss": 0.9619, "step": 15880 }, { "epoch": 4.571346375143843, "grad_norm": 1.0840848684310913, "learning_rate": 0.001908573072497123, "loss": 0.7213, "step": 15890 }, { "epoch": 4.574223245109321, "grad_norm": 1.3701196908950806, "learning_rate": 0.0019085155350978137, "loss": 0.8754, "step": 15900 }, { "epoch": 4.577100115074798, "grad_norm": 1.280706763267517, "learning_rate": 0.0019084579976985042, "loss": 0.8515, "step": 15910 }, { "epoch": 4.579976985040276, "grad_norm": 1.379064679145813, "learning_rate": 0.0019084004602991944, "loss": 0.8561, "step": 15920 }, { "epoch": 4.582853855005753, "grad_norm": 1.4378596544265747, "learning_rate": 0.001908342922899885, "loss": 0.9069, "step": 15930 }, { "epoch": 4.585730724971231, "grad_norm": 1.1159641742706299, "learning_rate": 0.0019082853855005755, "loss": 0.9952, "step": 15940 }, { "epoch": 4.588607594936709, "grad_norm": 2.0422606468200684, "learning_rate": 0.0019082278481012658, "loss": 0.8624, "step": 15950 }, { "epoch": 4.591484464902186, "grad_norm": 1.4231417179107666, "learning_rate": 0.0019081703107019564, "loss": 0.8982, "step": 15960 }, { "epoch": 4.5943613348676635, "grad_norm": 1.202480435371399, "learning_rate": 0.0019081127733026467, "loss": 0.9112, "step": 15970 }, { "epoch": 4.5972382048331415, "grad_norm": 1.997246265411377, "learning_rate": 0.001908055235903337, "loss": 0.7988, "step": 15980 }, { "epoch": 4.6001150747986195, "grad_norm": 1.7690699100494385, "learning_rate": 0.0019079976985040277, "loss": 1.0205, "step": 15990 }, { "epoch": 4.602991944764097, "grad_norm": 1.037468671798706, "learning_rate": 0.001907940161104718, "loss": 0.9245, "step": 16000 }, { "epoch": 4.6058688147295745, "grad_norm": 1.8129833936691284, "learning_rate": 0.0019078826237054086, "loss": 0.9181, "step": 16010 }, { "epoch": 4.608745684695052, "grad_norm": 1.8345943689346313, "learning_rate": 0.0019078250863060991, "loss": 0.9839, "step": 16020 }, { "epoch": 4.61162255466053, "grad_norm": 1.5491620302200317, "learning_rate": 0.0019077675489067895, "loss": 0.8707, "step": 16030 }, { "epoch": 4.614499424626007, "grad_norm": 2.0150582790374756, "learning_rate": 0.0019077100115074798, "loss": 1.0483, "step": 16040 }, { "epoch": 4.617376294591485, "grad_norm": 1.2781273126602173, "learning_rate": 0.0019076524741081704, "loss": 0.7943, "step": 16050 }, { "epoch": 4.620253164556962, "grad_norm": 2.0504848957061768, "learning_rate": 0.0019075949367088607, "loss": 0.882, "step": 16060 }, { "epoch": 4.62313003452244, "grad_norm": 1.4154834747314453, "learning_rate": 0.0019075373993095513, "loss": 1.1324, "step": 16070 }, { "epoch": 4.626006904487917, "grad_norm": 1.7906861305236816, "learning_rate": 0.0019074798619102416, "loss": 0.8993, "step": 16080 }, { "epoch": 4.628883774453395, "grad_norm": 1.7117464542388916, "learning_rate": 0.0019074223245109322, "loss": 1.0555, "step": 16090 }, { "epoch": 4.631760644418872, "grad_norm": 1.258230447769165, "learning_rate": 0.0019073647871116226, "loss": 0.7547, "step": 16100 }, { "epoch": 4.63463751438435, "grad_norm": 1.304571509361267, "learning_rate": 0.001907307249712313, "loss": 0.9737, "step": 16110 }, { "epoch": 4.637514384349828, "grad_norm": 1.677359938621521, "learning_rate": 0.0019072497123130035, "loss": 0.9111, "step": 16120 }, { "epoch": 4.640391254315305, "grad_norm": 2.1526341438293457, "learning_rate": 0.001907192174913694, "loss": 1.1639, "step": 16130 }, { "epoch": 4.643268124280782, "grad_norm": 1.0300054550170898, "learning_rate": 0.0019071346375143844, "loss": 0.7965, "step": 16140 }, { "epoch": 4.64614499424626, "grad_norm": 1.7806105613708496, "learning_rate": 0.001907077100115075, "loss": 0.9101, "step": 16150 }, { "epoch": 4.649021864211738, "grad_norm": 2.0558419227600098, "learning_rate": 0.0019070195627157653, "loss": 0.8863, "step": 16160 }, { "epoch": 4.651898734177215, "grad_norm": 1.6845026016235352, "learning_rate": 0.0019069620253164556, "loss": 1.0905, "step": 16170 }, { "epoch": 4.654775604142693, "grad_norm": 2.2281529903411865, "learning_rate": 0.0019069044879171462, "loss": 0.8384, "step": 16180 }, { "epoch": 4.65765247410817, "grad_norm": 3.4963862895965576, "learning_rate": 0.0019068469505178368, "loss": 0.9437, "step": 16190 }, { "epoch": 4.660529344073648, "grad_norm": 1.1035865545272827, "learning_rate": 0.0019067894131185271, "loss": 0.8596, "step": 16200 }, { "epoch": 4.663406214039125, "grad_norm": 1.1547868251800537, "learning_rate": 0.0019067318757192177, "loss": 0.9013, "step": 16210 }, { "epoch": 4.666283084004603, "grad_norm": 1.0208265781402588, "learning_rate": 0.0019066743383199078, "loss": 0.9787, "step": 16220 }, { "epoch": 4.66915995397008, "grad_norm": 0.8052436709403992, "learning_rate": 0.0019066168009205984, "loss": 0.7771, "step": 16230 }, { "epoch": 4.672036823935558, "grad_norm": 1.5430006980895996, "learning_rate": 0.001906559263521289, "loss": 0.8776, "step": 16240 }, { "epoch": 4.674913693901035, "grad_norm": 1.0528212785720825, "learning_rate": 0.0019065017261219793, "loss": 0.9839, "step": 16250 }, { "epoch": 4.677790563866513, "grad_norm": 1.9119752645492554, "learning_rate": 0.0019064441887226698, "loss": 0.8588, "step": 16260 }, { "epoch": 4.6806674338319905, "grad_norm": 1.8162941932678223, "learning_rate": 0.0019063866513233604, "loss": 0.8541, "step": 16270 }, { "epoch": 4.6835443037974684, "grad_norm": 1.472411036491394, "learning_rate": 0.0019063291139240505, "loss": 0.8718, "step": 16280 }, { "epoch": 4.6864211737629455, "grad_norm": 1.211539387702942, "learning_rate": 0.001906271576524741, "loss": 0.9314, "step": 16290 }, { "epoch": 4.6892980437284235, "grad_norm": 1.5943621397018433, "learning_rate": 0.0019062140391254317, "loss": 0.9337, "step": 16300 }, { "epoch": 4.692174913693901, "grad_norm": 1.8499529361724854, "learning_rate": 0.001906156501726122, "loss": 0.955, "step": 16310 }, { "epoch": 4.695051783659379, "grad_norm": 0.6720134615898132, "learning_rate": 0.0019060989643268126, "loss": 0.9177, "step": 16320 }, { "epoch": 4.697928653624857, "grad_norm": 2.879891872406006, "learning_rate": 0.001906041426927503, "loss": 0.8606, "step": 16330 }, { "epoch": 4.700805523590334, "grad_norm": 1.9366815090179443, "learning_rate": 0.0019059838895281933, "loss": 0.8211, "step": 16340 }, { "epoch": 4.703682393555812, "grad_norm": 1.2696794271469116, "learning_rate": 0.0019059263521288838, "loss": 0.78, "step": 16350 }, { "epoch": 4.706559263521289, "grad_norm": 1.1952375173568726, "learning_rate": 0.0019058688147295742, "loss": 0.9661, "step": 16360 }, { "epoch": 4.709436133486767, "grad_norm": 1.1584371328353882, "learning_rate": 0.0019058112773302647, "loss": 0.9185, "step": 16370 }, { "epoch": 4.712313003452244, "grad_norm": 1.2501249313354492, "learning_rate": 0.0019057537399309553, "loss": 0.8142, "step": 16380 }, { "epoch": 4.715189873417722, "grad_norm": 1.2774534225463867, "learning_rate": 0.0019056962025316454, "loss": 0.9512, "step": 16390 }, { "epoch": 4.718066743383199, "grad_norm": 2.034247636795044, "learning_rate": 0.001905638665132336, "loss": 0.8888, "step": 16400 }, { "epoch": 4.720943613348677, "grad_norm": 0.6435783505439758, "learning_rate": 0.0019055811277330266, "loss": 0.8559, "step": 16410 }, { "epoch": 4.723820483314154, "grad_norm": 2.1106953620910645, "learning_rate": 0.001905523590333717, "loss": 0.8733, "step": 16420 }, { "epoch": 4.726697353279632, "grad_norm": 1.6343600749969482, "learning_rate": 0.0019054660529344075, "loss": 0.6711, "step": 16430 }, { "epoch": 4.729574223245109, "grad_norm": 1.1325865983963013, "learning_rate": 0.0019054085155350978, "loss": 0.8424, "step": 16440 }, { "epoch": 4.732451093210587, "grad_norm": 1.1420527696609497, "learning_rate": 0.0019053509781357882, "loss": 0.891, "step": 16450 }, { "epoch": 4.735327963176064, "grad_norm": 1.9411427974700928, "learning_rate": 0.0019052934407364787, "loss": 0.8831, "step": 16460 }, { "epoch": 4.738204833141542, "grad_norm": 0.9618431329727173, "learning_rate": 0.001905235903337169, "loss": 0.8074, "step": 16470 }, { "epoch": 4.741081703107019, "grad_norm": 0.673640787601471, "learning_rate": 0.0019051783659378597, "loss": 0.8738, "step": 16480 }, { "epoch": 4.743958573072497, "grad_norm": 1.0413610935211182, "learning_rate": 0.0019051208285385502, "loss": 0.8852, "step": 16490 }, { "epoch": 4.746835443037975, "grad_norm": 1.5171406269073486, "learning_rate": 0.0019050632911392406, "loss": 0.7841, "step": 16500 }, { "epoch": 4.749712313003452, "grad_norm": 2.0350630283355713, "learning_rate": 0.001905005753739931, "loss": 1.0626, "step": 16510 }, { "epoch": 4.75258918296893, "grad_norm": 1.6566169261932373, "learning_rate": 0.0019049482163406215, "loss": 0.8728, "step": 16520 }, { "epoch": 4.755466052934407, "grad_norm": 1.2737905979156494, "learning_rate": 0.0019048906789413118, "loss": 0.7306, "step": 16530 }, { "epoch": 4.758342922899885, "grad_norm": 1.4972856044769287, "learning_rate": 0.0019048331415420024, "loss": 0.9885, "step": 16540 }, { "epoch": 4.761219792865362, "grad_norm": 1.4232702255249023, "learning_rate": 0.0019047756041426927, "loss": 0.7991, "step": 16550 }, { "epoch": 4.76409666283084, "grad_norm": 1.1246229410171509, "learning_rate": 0.0019047180667433833, "loss": 0.8448, "step": 16560 }, { "epoch": 4.766973532796317, "grad_norm": 1.586871862411499, "learning_rate": 0.0019046605293440736, "loss": 0.7996, "step": 16570 }, { "epoch": 4.769850402761795, "grad_norm": 1.7258431911468506, "learning_rate": 0.001904602991944764, "loss": 0.8347, "step": 16580 }, { "epoch": 4.7727272727272725, "grad_norm": 1.3011568784713745, "learning_rate": 0.0019045454545454546, "loss": 0.9742, "step": 16590 }, { "epoch": 4.7756041426927505, "grad_norm": 1.5473041534423828, "learning_rate": 0.0019044879171461451, "loss": 0.805, "step": 16600 }, { "epoch": 4.7784810126582276, "grad_norm": 1.6648759841918945, "learning_rate": 0.0019044303797468355, "loss": 0.7336, "step": 16610 }, { "epoch": 4.7813578826237055, "grad_norm": 1.6587978601455688, "learning_rate": 0.001904372842347526, "loss": 0.7383, "step": 16620 }, { "epoch": 4.784234752589183, "grad_norm": 1.5691472291946411, "learning_rate": 0.0019043153049482164, "loss": 0.9349, "step": 16630 }, { "epoch": 4.787111622554661, "grad_norm": 1.323009729385376, "learning_rate": 0.0019042577675489067, "loss": 0.8323, "step": 16640 }, { "epoch": 4.789988492520138, "grad_norm": 1.1095514297485352, "learning_rate": 0.0019042002301495973, "loss": 0.727, "step": 16650 }, { "epoch": 4.792865362485616, "grad_norm": 0.8255194425582886, "learning_rate": 0.0019041426927502876, "loss": 0.7757, "step": 16660 }, { "epoch": 4.795742232451094, "grad_norm": 1.1312767267227173, "learning_rate": 0.0019040851553509782, "loss": 0.9589, "step": 16670 }, { "epoch": 4.798619102416571, "grad_norm": 1.1986424922943115, "learning_rate": 0.0019040276179516688, "loss": 0.8775, "step": 16680 }, { "epoch": 4.801495972382048, "grad_norm": 1.3379688262939453, "learning_rate": 0.001903970080552359, "loss": 0.9011, "step": 16690 }, { "epoch": 4.804372842347526, "grad_norm": 1.195286750793457, "learning_rate": 0.0019039125431530495, "loss": 1.0586, "step": 16700 }, { "epoch": 4.807249712313004, "grad_norm": 0.9798372387886047, "learning_rate": 0.00190385500575374, "loss": 0.736, "step": 16710 }, { "epoch": 4.810126582278481, "grad_norm": 1.1982429027557373, "learning_rate": 0.0019037974683544304, "loss": 0.7664, "step": 16720 }, { "epoch": 4.813003452243959, "grad_norm": 1.3862576484680176, "learning_rate": 0.001903739930955121, "loss": 0.8086, "step": 16730 }, { "epoch": 4.815880322209436, "grad_norm": 1.924704909324646, "learning_rate": 0.0019036823935558115, "loss": 0.9421, "step": 16740 }, { "epoch": 4.818757192174914, "grad_norm": 1.0472782850265503, "learning_rate": 0.0019036248561565016, "loss": 0.9126, "step": 16750 }, { "epoch": 4.821634062140391, "grad_norm": 0.974543571472168, "learning_rate": 0.0019035673187571922, "loss": 0.9676, "step": 16760 }, { "epoch": 4.824510932105869, "grad_norm": 0.7824651598930359, "learning_rate": 0.0019035097813578825, "loss": 0.9239, "step": 16770 }, { "epoch": 4.827387802071346, "grad_norm": 1.1408143043518066, "learning_rate": 0.001903452243958573, "loss": 0.7876, "step": 16780 }, { "epoch": 4.830264672036824, "grad_norm": 1.6811636686325073, "learning_rate": 0.0019033947065592637, "loss": 0.7402, "step": 16790 }, { "epoch": 4.833141542002301, "grad_norm": 1.0806002616882324, "learning_rate": 0.001903337169159954, "loss": 0.8806, "step": 16800 }, { "epoch": 4.836018411967779, "grad_norm": 1.2443541288375854, "learning_rate": 0.0019032796317606444, "loss": 0.8461, "step": 16810 }, { "epoch": 4.838895281933256, "grad_norm": 0.968848466873169, "learning_rate": 0.001903222094361335, "loss": 0.9667, "step": 16820 }, { "epoch": 4.841772151898734, "grad_norm": 1.3659696578979492, "learning_rate": 0.0019031645569620253, "loss": 0.8968, "step": 16830 }, { "epoch": 4.844649021864212, "grad_norm": 1.9798381328582764, "learning_rate": 0.0019031070195627158, "loss": 0.9686, "step": 16840 }, { "epoch": 4.847525891829689, "grad_norm": 0.9657642841339111, "learning_rate": 0.0019030494821634064, "loss": 0.9875, "step": 16850 }, { "epoch": 4.850402761795166, "grad_norm": 1.3020647764205933, "learning_rate": 0.0019029919447640967, "loss": 0.8838, "step": 16860 }, { "epoch": 4.853279631760644, "grad_norm": 0.6429805159568787, "learning_rate": 0.001902934407364787, "loss": 0.721, "step": 16870 }, { "epoch": 4.856156501726122, "grad_norm": 0.9125068187713623, "learning_rate": 0.0019028768699654777, "loss": 0.9627, "step": 16880 }, { "epoch": 4.859033371691599, "grad_norm": 1.2215850353240967, "learning_rate": 0.001902819332566168, "loss": 0.8304, "step": 16890 }, { "epoch": 4.861910241657077, "grad_norm": 1.7298548221588135, "learning_rate": 0.0019027617951668586, "loss": 0.6967, "step": 16900 }, { "epoch": 4.8647871116225545, "grad_norm": 0.8801998496055603, "learning_rate": 0.001902704257767549, "loss": 0.8484, "step": 16910 }, { "epoch": 4.8676639815880325, "grad_norm": 1.1502504348754883, "learning_rate": 0.0019026467203682395, "loss": 0.8602, "step": 16920 }, { "epoch": 4.87054085155351, "grad_norm": 1.4753952026367188, "learning_rate": 0.0019025891829689298, "loss": 0.9229, "step": 16930 }, { "epoch": 4.8734177215189876, "grad_norm": 1.002806544303894, "learning_rate": 0.0019025316455696202, "loss": 0.8397, "step": 16940 }, { "epoch": 4.876294591484465, "grad_norm": 1.197477102279663, "learning_rate": 0.0019024741081703107, "loss": 1.0376, "step": 16950 }, { "epoch": 4.879171461449943, "grad_norm": 1.8419851064682007, "learning_rate": 0.0019024165707710013, "loss": 0.6566, "step": 16960 }, { "epoch": 4.88204833141542, "grad_norm": 2.1335885524749756, "learning_rate": 0.0019023590333716916, "loss": 0.8202, "step": 16970 }, { "epoch": 4.884925201380898, "grad_norm": 1.1843559741973877, "learning_rate": 0.0019023014959723822, "loss": 0.9799, "step": 16980 }, { "epoch": 4.887802071346375, "grad_norm": 1.0663654804229736, "learning_rate": 0.0019022439585730726, "loss": 0.8803, "step": 16990 }, { "epoch": 4.890678941311853, "grad_norm": 1.2856589555740356, "learning_rate": 0.001902186421173763, "loss": 0.7419, "step": 17000 }, { "epoch": 4.893555811277331, "grad_norm": 1.6439005136489868, "learning_rate": 0.0019021288837744535, "loss": 0.8932, "step": 17010 }, { "epoch": 4.896432681242808, "grad_norm": 0.8066824674606323, "learning_rate": 0.0019020713463751438, "loss": 0.7189, "step": 17020 }, { "epoch": 4.899309551208285, "grad_norm": 0.9582926630973816, "learning_rate": 0.0019020138089758344, "loss": 1.1629, "step": 17030 }, { "epoch": 4.902186421173763, "grad_norm": 1.1069748401641846, "learning_rate": 0.001901956271576525, "loss": 0.7556, "step": 17040 }, { "epoch": 4.905063291139241, "grad_norm": 1.403321623802185, "learning_rate": 0.001901898734177215, "loss": 1.0961, "step": 17050 }, { "epoch": 4.907940161104718, "grad_norm": 1.2217724323272705, "learning_rate": 0.0019018411967779056, "loss": 0.7716, "step": 17060 }, { "epoch": 4.910817031070196, "grad_norm": 3.7459418773651123, "learning_rate": 0.0019017836593785962, "loss": 0.9586, "step": 17070 }, { "epoch": 4.913693901035673, "grad_norm": 1.3955199718475342, "learning_rate": 0.0019017261219792865, "loss": 0.6824, "step": 17080 }, { "epoch": 4.916570771001151, "grad_norm": 1.8270485401153564, "learning_rate": 0.0019016685845799771, "loss": 0.9315, "step": 17090 }, { "epoch": 4.919447640966628, "grad_norm": 1.1200847625732422, "learning_rate": 0.0019016110471806677, "loss": 0.9138, "step": 17100 }, { "epoch": 4.922324510932106, "grad_norm": 0.9481359124183655, "learning_rate": 0.0019015535097813578, "loss": 0.9183, "step": 17110 }, { "epoch": 4.925201380897583, "grad_norm": 2.5261685848236084, "learning_rate": 0.0019014959723820484, "loss": 0.9545, "step": 17120 }, { "epoch": 4.928078250863061, "grad_norm": 2.4814696311950684, "learning_rate": 0.0019014384349827387, "loss": 1.0163, "step": 17130 }, { "epoch": 4.930955120828538, "grad_norm": 1.3310117721557617, "learning_rate": 0.0019013808975834293, "loss": 0.9178, "step": 17140 }, { "epoch": 4.933831990794016, "grad_norm": 0.9492062330245972, "learning_rate": 0.0019013233601841198, "loss": 0.7561, "step": 17150 }, { "epoch": 4.936708860759493, "grad_norm": 1.5139238834381104, "learning_rate": 0.0019012658227848102, "loss": 1.0665, "step": 17160 }, { "epoch": 4.939585730724971, "grad_norm": 2.415599822998047, "learning_rate": 0.0019012082853855005, "loss": 0.9795, "step": 17170 }, { "epoch": 4.942462600690448, "grad_norm": 1.2673468589782715, "learning_rate": 0.001901150747986191, "loss": 0.7308, "step": 17180 }, { "epoch": 4.945339470655926, "grad_norm": 1.90072500705719, "learning_rate": 0.0019010932105868815, "loss": 1.0217, "step": 17190 }, { "epoch": 4.9482163406214035, "grad_norm": 1.0612801313400269, "learning_rate": 0.001901035673187572, "loss": 0.8033, "step": 17200 }, { "epoch": 4.9510932105868815, "grad_norm": 2.57102632522583, "learning_rate": 0.0019009781357882626, "loss": 0.9583, "step": 17210 }, { "epoch": 4.953970080552359, "grad_norm": 1.6036182641983032, "learning_rate": 0.0019009205983889527, "loss": 0.937, "step": 17220 }, { "epoch": 4.9568469505178365, "grad_norm": 1.5023826360702515, "learning_rate": 0.0019008630609896433, "loss": 0.9133, "step": 17230 }, { "epoch": 4.9597238204833145, "grad_norm": 1.242252230644226, "learning_rate": 0.0019008055235903336, "loss": 0.8533, "step": 17240 }, { "epoch": 4.962600690448792, "grad_norm": 1.1571515798568726, "learning_rate": 0.0019007479861910242, "loss": 0.8885, "step": 17250 }, { "epoch": 4.96547756041427, "grad_norm": 2.5350534915924072, "learning_rate": 0.0019006904487917147, "loss": 1.012, "step": 17260 }, { "epoch": 4.968354430379747, "grad_norm": 1.2985031604766846, "learning_rate": 0.001900632911392405, "loss": 0.835, "step": 17270 }, { "epoch": 4.971231300345225, "grad_norm": 1.105857491493225, "learning_rate": 0.0019005753739930954, "loss": 1.0561, "step": 17280 }, { "epoch": 4.974108170310702, "grad_norm": 0.839263916015625, "learning_rate": 0.001900517836593786, "loss": 0.7585, "step": 17290 }, { "epoch": 4.97698504027618, "grad_norm": 1.333565354347229, "learning_rate": 0.0019004602991944764, "loss": 0.9657, "step": 17300 }, { "epoch": 4.979861910241657, "grad_norm": 0.9486253261566162, "learning_rate": 0.001900402761795167, "loss": 0.8529, "step": 17310 }, { "epoch": 4.982738780207135, "grad_norm": 1.092949628829956, "learning_rate": 0.0019003452243958575, "loss": 0.8302, "step": 17320 }, { "epoch": 4.985615650172612, "grad_norm": 1.1809192895889282, "learning_rate": 0.0019002876869965478, "loss": 0.9281, "step": 17330 }, { "epoch": 4.98849252013809, "grad_norm": 0.757474958896637, "learning_rate": 0.0019002301495972382, "loss": 0.8754, "step": 17340 }, { "epoch": 4.991369390103567, "grad_norm": 0.8536441922187805, "learning_rate": 0.0019001726121979285, "loss": 0.8355, "step": 17350 }, { "epoch": 4.994246260069045, "grad_norm": 1.9848542213439941, "learning_rate": 0.001900115074798619, "loss": 0.8559, "step": 17360 }, { "epoch": 4.997123130034522, "grad_norm": 4.449210166931152, "learning_rate": 0.0019000575373993096, "loss": 0.8312, "step": 17370 }, { "epoch": 5.0, "grad_norm": 1.3584418296813965, "learning_rate": 0.0019, "loss": 0.8485, "step": 17380 }, { "epoch": 5.002876869965478, "grad_norm": 1.0302597284317017, "learning_rate": 0.0018999424626006906, "loss": 0.9095, "step": 17390 }, { "epoch": 5.005753739930955, "grad_norm": 1.4962183237075806, "learning_rate": 0.001899884925201381, "loss": 0.7587, "step": 17400 }, { "epoch": 5.008630609896433, "grad_norm": 2.0133204460144043, "learning_rate": 0.0018998273878020713, "loss": 0.8724, "step": 17410 }, { "epoch": 5.01150747986191, "grad_norm": 1.957750916481018, "learning_rate": 0.0018997698504027618, "loss": 0.6612, "step": 17420 }, { "epoch": 5.014384349827388, "grad_norm": 1.261438250541687, "learning_rate": 0.0018997123130034524, "loss": 0.8326, "step": 17430 }, { "epoch": 5.017261219792865, "grad_norm": 1.2428295612335205, "learning_rate": 0.0018996547756041427, "loss": 0.9218, "step": 17440 }, { "epoch": 5.020138089758343, "grad_norm": 1.9613168239593506, "learning_rate": 0.0018995972382048333, "loss": 0.935, "step": 17450 }, { "epoch": 5.02301495972382, "grad_norm": 2.063695192337036, "learning_rate": 0.0018995397008055236, "loss": 0.9534, "step": 17460 }, { "epoch": 5.025891829689298, "grad_norm": 1.5130486488342285, "learning_rate": 0.001899482163406214, "loss": 0.6887, "step": 17470 }, { "epoch": 5.028768699654775, "grad_norm": 1.444243311882019, "learning_rate": 0.0018994246260069046, "loss": 0.8733, "step": 17480 }, { "epoch": 5.031645569620253, "grad_norm": 1.3003138303756714, "learning_rate": 0.001899367088607595, "loss": 0.6622, "step": 17490 }, { "epoch": 5.03452243958573, "grad_norm": 1.5306121110916138, "learning_rate": 0.0018993095512082855, "loss": 0.981, "step": 17500 }, { "epoch": 5.037399309551208, "grad_norm": 0.915668785572052, "learning_rate": 0.001899252013808976, "loss": 0.7753, "step": 17510 }, { "epoch": 5.0402761795166855, "grad_norm": 1.6675065755844116, "learning_rate": 0.0018991944764096662, "loss": 0.9368, "step": 17520 }, { "epoch": 5.0431530494821635, "grad_norm": 0.9808990359306335, "learning_rate": 0.0018991369390103567, "loss": 0.9106, "step": 17530 }, { "epoch": 5.046029919447641, "grad_norm": 0.7667146921157837, "learning_rate": 0.0018990794016110473, "loss": 1.0435, "step": 17540 }, { "epoch": 5.0489067894131185, "grad_norm": 1.130448818206787, "learning_rate": 0.0018990218642117376, "loss": 0.899, "step": 17550 }, { "epoch": 5.0517836593785965, "grad_norm": 1.3232964277267456, "learning_rate": 0.0018989643268124282, "loss": 0.7735, "step": 17560 }, { "epoch": 5.054660529344074, "grad_norm": 1.5162824392318726, "learning_rate": 0.0018989067894131188, "loss": 0.711, "step": 17570 }, { "epoch": 5.057537399309552, "grad_norm": 1.3009463548660278, "learning_rate": 0.0018988492520138089, "loss": 1.1706, "step": 17580 }, { "epoch": 5.060414269275029, "grad_norm": 2.1794471740722656, "learning_rate": 0.0018987917146144995, "loss": 0.9218, "step": 17590 }, { "epoch": 5.063291139240507, "grad_norm": 1.8540313243865967, "learning_rate": 0.0018987341772151898, "loss": 0.8178, "step": 17600 }, { "epoch": 5.066168009205984, "grad_norm": 1.0236400365829468, "learning_rate": 0.0018986766398158804, "loss": 0.7953, "step": 17610 }, { "epoch": 5.069044879171462, "grad_norm": 1.4573720693588257, "learning_rate": 0.001898619102416571, "loss": 0.6329, "step": 17620 }, { "epoch": 5.071921749136939, "grad_norm": 1.8383761644363403, "learning_rate": 0.0018985615650172613, "loss": 0.7533, "step": 17630 }, { "epoch": 5.074798619102417, "grad_norm": 3.452603340148926, "learning_rate": 0.0018985040276179516, "loss": 0.7481, "step": 17640 }, { "epoch": 5.077675489067894, "grad_norm": 1.1307698488235474, "learning_rate": 0.0018984464902186422, "loss": 0.826, "step": 17650 }, { "epoch": 5.080552359033372, "grad_norm": 0.9502083659172058, "learning_rate": 0.0018983889528193325, "loss": 0.8596, "step": 17660 }, { "epoch": 5.083429228998849, "grad_norm": 1.1588160991668701, "learning_rate": 0.001898331415420023, "loss": 0.7136, "step": 17670 }, { "epoch": 5.086306098964327, "grad_norm": 1.7164390087127686, "learning_rate": 0.0018982738780207137, "loss": 0.7165, "step": 17680 }, { "epoch": 5.089182968929804, "grad_norm": 1.4163689613342285, "learning_rate": 0.001898216340621404, "loss": 0.9903, "step": 17690 }, { "epoch": 5.092059838895282, "grad_norm": 0.8994120359420776, "learning_rate": 0.0018981588032220944, "loss": 0.9032, "step": 17700 }, { "epoch": 5.094936708860759, "grad_norm": 1.6373655796051025, "learning_rate": 0.0018981012658227847, "loss": 0.8484, "step": 17710 }, { "epoch": 5.097813578826237, "grad_norm": 1.210391640663147, "learning_rate": 0.0018980437284234753, "loss": 0.8205, "step": 17720 }, { "epoch": 5.100690448791714, "grad_norm": 2.4262077808380127, "learning_rate": 0.0018979861910241658, "loss": 0.846, "step": 17730 }, { "epoch": 5.103567318757192, "grad_norm": 1.756201982498169, "learning_rate": 0.0018979286536248562, "loss": 0.8644, "step": 17740 }, { "epoch": 5.10644418872267, "grad_norm": 1.1988427639007568, "learning_rate": 0.0018978711162255467, "loss": 0.826, "step": 17750 }, { "epoch": 5.109321058688147, "grad_norm": 1.4780081510543823, "learning_rate": 0.001897813578826237, "loss": 0.7659, "step": 17760 }, { "epoch": 5.112197928653625, "grad_norm": 1.6028764247894287, "learning_rate": 0.0018977560414269274, "loss": 0.841, "step": 17770 }, { "epoch": 5.115074798619102, "grad_norm": 0.9906308054924011, "learning_rate": 0.001897698504027618, "loss": 0.7796, "step": 17780 }, { "epoch": 5.11795166858458, "grad_norm": 2.007732629776001, "learning_rate": 0.0018976409666283086, "loss": 0.7678, "step": 17790 }, { "epoch": 5.120828538550057, "grad_norm": 0.8613387942314148, "learning_rate": 0.001897583429228999, "loss": 0.7467, "step": 17800 }, { "epoch": 5.123705408515535, "grad_norm": 1.1087634563446045, "learning_rate": 0.0018975258918296895, "loss": 0.8046, "step": 17810 }, { "epoch": 5.1265822784810124, "grad_norm": 1.116878867149353, "learning_rate": 0.0018974683544303796, "loss": 0.9212, "step": 17820 }, { "epoch": 5.12945914844649, "grad_norm": 1.387540578842163, "learning_rate": 0.0018974108170310702, "loss": 0.7846, "step": 17830 }, { "epoch": 5.1323360184119675, "grad_norm": 1.39962899684906, "learning_rate": 0.0018973532796317607, "loss": 0.7166, "step": 17840 }, { "epoch": 5.1352128883774455, "grad_norm": 1.3345894813537598, "learning_rate": 0.001897295742232451, "loss": 0.7076, "step": 17850 }, { "epoch": 5.138089758342923, "grad_norm": 0.8761097192764282, "learning_rate": 0.0018972382048331416, "loss": 0.788, "step": 17860 }, { "epoch": 5.140966628308401, "grad_norm": 1.152152180671692, "learning_rate": 0.0018971806674338322, "loss": 0.7218, "step": 17870 }, { "epoch": 5.143843498273878, "grad_norm": 0.6809861063957214, "learning_rate": 0.0018971231300345223, "loss": 0.786, "step": 17880 }, { "epoch": 5.146720368239356, "grad_norm": 0.8334994316101074, "learning_rate": 0.001897065592635213, "loss": 0.9586, "step": 17890 }, { "epoch": 5.149597238204833, "grad_norm": 1.5758264064788818, "learning_rate": 0.0018970080552359035, "loss": 1.0043, "step": 17900 }, { "epoch": 5.152474108170311, "grad_norm": 1.8146833181381226, "learning_rate": 0.0018969505178365938, "loss": 0.8243, "step": 17910 }, { "epoch": 5.155350978135789, "grad_norm": 1.5808809995651245, "learning_rate": 0.0018968929804372844, "loss": 0.8112, "step": 17920 }, { "epoch": 5.158227848101266, "grad_norm": 2.16937255859375, "learning_rate": 0.0018968354430379747, "loss": 0.8256, "step": 17930 }, { "epoch": 5.161104718066744, "grad_norm": 1.6838842630386353, "learning_rate": 0.001896777905638665, "loss": 0.865, "step": 17940 }, { "epoch": 5.163981588032221, "grad_norm": 0.99716717004776, "learning_rate": 0.0018967203682393556, "loss": 0.7805, "step": 17950 }, { "epoch": 5.166858457997699, "grad_norm": 1.3031702041625977, "learning_rate": 0.001896662830840046, "loss": 0.8681, "step": 17960 }, { "epoch": 5.169735327963176, "grad_norm": 0.7492700815200806, "learning_rate": 0.0018966052934407365, "loss": 0.805, "step": 17970 }, { "epoch": 5.172612197928654, "grad_norm": 1.313412070274353, "learning_rate": 0.0018965477560414271, "loss": 0.9346, "step": 17980 }, { "epoch": 5.175489067894131, "grad_norm": 1.3153040409088135, "learning_rate": 0.0018964902186421175, "loss": 0.8703, "step": 17990 }, { "epoch": 5.178365937859609, "grad_norm": 2.3913414478302, "learning_rate": 0.0018964326812428078, "loss": 0.7998, "step": 18000 }, { "epoch": 5.181242807825086, "grad_norm": 1.3475381135940552, "learning_rate": 0.0018963751438434984, "loss": 1.0781, "step": 18010 }, { "epoch": 5.184119677790564, "grad_norm": 1.9115195274353027, "learning_rate": 0.0018963176064441887, "loss": 0.7119, "step": 18020 }, { "epoch": 5.186996547756041, "grad_norm": 0.9604452252388, "learning_rate": 0.0018962600690448793, "loss": 0.6644, "step": 18030 }, { "epoch": 5.189873417721519, "grad_norm": 1.1033101081848145, "learning_rate": 0.0018962025316455696, "loss": 1.1411, "step": 18040 }, { "epoch": 5.192750287686996, "grad_norm": 1.607296109199524, "learning_rate": 0.00189614499424626, "loss": 0.8259, "step": 18050 }, { "epoch": 5.195627157652474, "grad_norm": 1.9536577463150024, "learning_rate": 0.0018960874568469505, "loss": 0.8819, "step": 18060 }, { "epoch": 5.198504027617951, "grad_norm": 1.3305127620697021, "learning_rate": 0.0018960299194476409, "loss": 0.7524, "step": 18070 }, { "epoch": 5.201380897583429, "grad_norm": 1.7142492532730103, "learning_rate": 0.0018959723820483314, "loss": 0.822, "step": 18080 }, { "epoch": 5.204257767548906, "grad_norm": 1.599463701248169, "learning_rate": 0.001895914844649022, "loss": 0.8988, "step": 18090 }, { "epoch": 5.207134637514384, "grad_norm": 1.1632341146469116, "learning_rate": 0.0018958573072497124, "loss": 0.9088, "step": 18100 }, { "epoch": 5.210011507479862, "grad_norm": 1.099673867225647, "learning_rate": 0.0018957997698504027, "loss": 0.8524, "step": 18110 }, { "epoch": 5.212888377445339, "grad_norm": 1.3757288455963135, "learning_rate": 0.0018957422324510933, "loss": 0.9335, "step": 18120 }, { "epoch": 5.215765247410817, "grad_norm": 1.2022992372512817, "learning_rate": 0.0018956846950517836, "loss": 0.8663, "step": 18130 }, { "epoch": 5.2186421173762945, "grad_norm": 1.122565746307373, "learning_rate": 0.0018956271576524742, "loss": 0.7908, "step": 18140 }, { "epoch": 5.2215189873417724, "grad_norm": 1.3167136907577515, "learning_rate": 0.0018955696202531647, "loss": 0.7454, "step": 18150 }, { "epoch": 5.2243958573072495, "grad_norm": 0.9892534017562866, "learning_rate": 0.001895512082853855, "loss": 0.9723, "step": 18160 }, { "epoch": 5.2272727272727275, "grad_norm": 1.1918003559112549, "learning_rate": 0.0018954545454545454, "loss": 0.7954, "step": 18170 }, { "epoch": 5.230149597238205, "grad_norm": 1.0871391296386719, "learning_rate": 0.0018953970080552358, "loss": 0.7916, "step": 18180 }, { "epoch": 5.233026467203683, "grad_norm": 1.3231825828552246, "learning_rate": 0.0018953394706559263, "loss": 0.7918, "step": 18190 }, { "epoch": 5.23590333716916, "grad_norm": 1.5785701274871826, "learning_rate": 0.001895281933256617, "loss": 0.7999, "step": 18200 }, { "epoch": 5.238780207134638, "grad_norm": 1.1864579916000366, "learning_rate": 0.0018952243958573073, "loss": 0.7684, "step": 18210 }, { "epoch": 5.241657077100115, "grad_norm": 0.7623103857040405, "learning_rate": 0.0018951668584579978, "loss": 0.8411, "step": 18220 }, { "epoch": 5.244533947065593, "grad_norm": 1.33597993850708, "learning_rate": 0.0018951093210586882, "loss": 0.7148, "step": 18230 }, { "epoch": 5.24741081703107, "grad_norm": 1.0741827487945557, "learning_rate": 0.0018950517836593785, "loss": 0.8265, "step": 18240 }, { "epoch": 5.250287686996548, "grad_norm": 0.8497599363327026, "learning_rate": 0.001894994246260069, "loss": 0.837, "step": 18250 }, { "epoch": 5.253164556962025, "grad_norm": 1.9282947778701782, "learning_rate": 0.0018949367088607596, "loss": 0.9281, "step": 18260 }, { "epoch": 5.256041426927503, "grad_norm": 1.9306257963180542, "learning_rate": 0.00189487917146145, "loss": 0.9189, "step": 18270 }, { "epoch": 5.258918296892981, "grad_norm": 1.292352318763733, "learning_rate": 0.0018948216340621406, "loss": 0.739, "step": 18280 }, { "epoch": 5.261795166858458, "grad_norm": 1.010333776473999, "learning_rate": 0.0018947640966628307, "loss": 0.8567, "step": 18290 }, { "epoch": 5.264672036823936, "grad_norm": 1.30037260055542, "learning_rate": 0.0018947065592635213, "loss": 0.9424, "step": 18300 }, { "epoch": 5.267548906789413, "grad_norm": 0.8944627642631531, "learning_rate": 0.0018946490218642118, "loss": 1.0069, "step": 18310 }, { "epoch": 5.270425776754891, "grad_norm": 0.9772331118583679, "learning_rate": 0.0018945914844649022, "loss": 0.9644, "step": 18320 }, { "epoch": 5.273302646720368, "grad_norm": 1.8377894163131714, "learning_rate": 0.0018945339470655927, "loss": 0.814, "step": 18330 }, { "epoch": 5.276179516685846, "grad_norm": 1.1091291904449463, "learning_rate": 0.0018944764096662833, "loss": 0.9383, "step": 18340 }, { "epoch": 5.279056386651323, "grad_norm": 1.478438138961792, "learning_rate": 0.0018944188722669734, "loss": 0.8289, "step": 18350 }, { "epoch": 5.281933256616801, "grad_norm": 1.4942939281463623, "learning_rate": 0.001894361334867664, "loss": 0.9094, "step": 18360 }, { "epoch": 5.284810126582278, "grad_norm": 1.089970350265503, "learning_rate": 0.0018943037974683545, "loss": 0.8572, "step": 18370 }, { "epoch": 5.287686996547756, "grad_norm": 0.8128070831298828, "learning_rate": 0.001894246260069045, "loss": 0.819, "step": 18380 }, { "epoch": 5.290563866513233, "grad_norm": 1.3658853769302368, "learning_rate": 0.0018941887226697355, "loss": 0.8785, "step": 18390 }, { "epoch": 5.293440736478711, "grad_norm": 0.7055487632751465, "learning_rate": 0.0018941311852704258, "loss": 0.8124, "step": 18400 }, { "epoch": 5.296317606444188, "grad_norm": 1.1103487014770508, "learning_rate": 0.0018940736478711162, "loss": 0.8339, "step": 18410 }, { "epoch": 5.299194476409666, "grad_norm": 2.082221031188965, "learning_rate": 0.0018940161104718067, "loss": 0.9867, "step": 18420 }, { "epoch": 5.302071346375143, "grad_norm": 1.1388001441955566, "learning_rate": 0.001893958573072497, "loss": 0.9595, "step": 18430 }, { "epoch": 5.304948216340621, "grad_norm": 0.9302062392234802, "learning_rate": 0.0018939010356731876, "loss": 0.8735, "step": 18440 }, { "epoch": 5.307825086306099, "grad_norm": 1.524078130722046, "learning_rate": 0.0018938434982738782, "loss": 1.0203, "step": 18450 }, { "epoch": 5.3107019562715765, "grad_norm": 1.0803372859954834, "learning_rate": 0.0018937859608745685, "loss": 0.8214, "step": 18460 }, { "epoch": 5.3135788262370545, "grad_norm": 1.214746117591858, "learning_rate": 0.0018937284234752589, "loss": 0.72, "step": 18470 }, { "epoch": 5.3164556962025316, "grad_norm": 1.928853988647461, "learning_rate": 0.0018936708860759495, "loss": 1.0314, "step": 18480 }, { "epoch": 5.3193325661680095, "grad_norm": 2.1038169860839844, "learning_rate": 0.0018936133486766398, "loss": 0.9422, "step": 18490 }, { "epoch": 5.322209436133487, "grad_norm": 1.7815771102905273, "learning_rate": 0.0018935558112773304, "loss": 0.8094, "step": 18500 }, { "epoch": 5.325086306098965, "grad_norm": 1.2687050104141235, "learning_rate": 0.0018934982738780207, "loss": 0.9129, "step": 18510 }, { "epoch": 5.327963176064442, "grad_norm": 2.042266368865967, "learning_rate": 0.0018934407364787113, "loss": 1.0505, "step": 18520 }, { "epoch": 5.33084004602992, "grad_norm": 1.2896252870559692, "learning_rate": 0.0018933831990794016, "loss": 0.9261, "step": 18530 }, { "epoch": 5.333716915995397, "grad_norm": 0.8034769892692566, "learning_rate": 0.001893325661680092, "loss": 0.7082, "step": 18540 }, { "epoch": 5.336593785960875, "grad_norm": 1.2164417505264282, "learning_rate": 0.0018932681242807825, "loss": 0.8054, "step": 18550 }, { "epoch": 5.339470655926352, "grad_norm": 1.6895077228546143, "learning_rate": 0.001893210586881473, "loss": 0.9064, "step": 18560 }, { "epoch": 5.34234752589183, "grad_norm": 1.5258841514587402, "learning_rate": 0.0018931530494821634, "loss": 0.9056, "step": 18570 }, { "epoch": 5.345224395857307, "grad_norm": 1.6245036125183105, "learning_rate": 0.001893095512082854, "loss": 0.8277, "step": 18580 }, { "epoch": 5.348101265822785, "grad_norm": 1.5629271268844604, "learning_rate": 0.0018930379746835444, "loss": 0.9772, "step": 18590 }, { "epoch": 5.350978135788262, "grad_norm": 1.099730372428894, "learning_rate": 0.0018929804372842347, "loss": 0.8994, "step": 18600 }, { "epoch": 5.35385500575374, "grad_norm": 1.438746452331543, "learning_rate": 0.0018929228998849253, "loss": 0.6807, "step": 18610 }, { "epoch": 5.356731875719218, "grad_norm": 2.6135692596435547, "learning_rate": 0.0018928653624856156, "loss": 0.9437, "step": 18620 }, { "epoch": 5.359608745684695, "grad_norm": 1.4440566301345825, "learning_rate": 0.0018928078250863062, "loss": 0.7, "step": 18630 }, { "epoch": 5.362485615650173, "grad_norm": 2.112138032913208, "learning_rate": 0.0018927502876869967, "loss": 0.8017, "step": 18640 }, { "epoch": 5.36536248561565, "grad_norm": 2.3341493606567383, "learning_rate": 0.0018926927502876869, "loss": 0.8614, "step": 18650 }, { "epoch": 5.368239355581128, "grad_norm": 0.7533648610115051, "learning_rate": 0.0018926352128883774, "loss": 0.82, "step": 18660 }, { "epoch": 5.371116225546605, "grad_norm": 1.550812005996704, "learning_rate": 0.001892577675489068, "loss": 0.678, "step": 18670 }, { "epoch": 5.373993095512083, "grad_norm": 1.0166354179382324, "learning_rate": 0.0018925201380897583, "loss": 0.8541, "step": 18680 }, { "epoch": 5.37686996547756, "grad_norm": 1.4697178602218628, "learning_rate": 0.001892462600690449, "loss": 0.8384, "step": 18690 }, { "epoch": 5.379746835443038, "grad_norm": 0.5151211619377136, "learning_rate": 0.0018924050632911395, "loss": 0.8467, "step": 18700 }, { "epoch": 5.382623705408515, "grad_norm": 1.077589511871338, "learning_rate": 0.0018923475258918296, "loss": 0.7856, "step": 18710 }, { "epoch": 5.385500575373993, "grad_norm": 0.9926930665969849, "learning_rate": 0.0018922899884925202, "loss": 0.9442, "step": 18720 }, { "epoch": 5.38837744533947, "grad_norm": 1.1745728254318237, "learning_rate": 0.0018922324510932105, "loss": 0.8009, "step": 18730 }, { "epoch": 5.391254315304948, "grad_norm": 1.3973841667175293, "learning_rate": 0.001892174913693901, "loss": 0.9736, "step": 18740 }, { "epoch": 5.3941311852704255, "grad_norm": 1.5448514223098755, "learning_rate": 0.0018921173762945916, "loss": 0.732, "step": 18750 }, { "epoch": 5.397008055235903, "grad_norm": 0.8903096318244934, "learning_rate": 0.001892059838895282, "loss": 0.825, "step": 18760 }, { "epoch": 5.3998849252013805, "grad_norm": 1.3995296955108643, "learning_rate": 0.0018920023014959723, "loss": 0.8364, "step": 18770 }, { "epoch": 5.4027617951668585, "grad_norm": 2.1865005493164062, "learning_rate": 0.001891944764096663, "loss": 0.8012, "step": 18780 }, { "epoch": 5.4056386651323365, "grad_norm": 1.020517349243164, "learning_rate": 0.0018918872266973532, "loss": 0.8958, "step": 18790 }, { "epoch": 5.408515535097814, "grad_norm": 1.4758598804473877, "learning_rate": 0.0018918296892980438, "loss": 0.9646, "step": 18800 }, { "epoch": 5.4113924050632916, "grad_norm": 3.1398251056671143, "learning_rate": 0.0018917721518987344, "loss": 1.015, "step": 18810 }, { "epoch": 5.414269275028769, "grad_norm": 0.9224035143852234, "learning_rate": 0.0018917146144994247, "loss": 0.824, "step": 18820 }, { "epoch": 5.417146144994247, "grad_norm": 2.039539098739624, "learning_rate": 0.001891657077100115, "loss": 0.9126, "step": 18830 }, { "epoch": 5.420023014959724, "grad_norm": 1.0343718528747559, "learning_rate": 0.0018915995397008056, "loss": 0.9271, "step": 18840 }, { "epoch": 5.422899884925202, "grad_norm": 2.8488221168518066, "learning_rate": 0.001891542002301496, "loss": 0.8383, "step": 18850 }, { "epoch": 5.425776754890679, "grad_norm": 0.9658272862434387, "learning_rate": 0.0018914844649021865, "loss": 0.8989, "step": 18860 }, { "epoch": 5.428653624856157, "grad_norm": 2.5938098430633545, "learning_rate": 0.0018914269275028769, "loss": 0.8645, "step": 18870 }, { "epoch": 5.431530494821634, "grad_norm": 1.4039477109909058, "learning_rate": 0.0018913693901035672, "loss": 0.8805, "step": 18880 }, { "epoch": 5.434407364787112, "grad_norm": 1.5328155755996704, "learning_rate": 0.0018913118527042578, "loss": 0.7395, "step": 18890 }, { "epoch": 5.437284234752589, "grad_norm": 2.183147668838501, "learning_rate": 0.0018912543153049481, "loss": 0.956, "step": 18900 }, { "epoch": 5.440161104718067, "grad_norm": 1.2161823511123657, "learning_rate": 0.0018911967779056387, "loss": 0.6948, "step": 18910 }, { "epoch": 5.443037974683544, "grad_norm": 1.1341174840927124, "learning_rate": 0.0018911392405063293, "loss": 0.6449, "step": 18920 }, { "epoch": 5.445914844649022, "grad_norm": 2.8445520401000977, "learning_rate": 0.0018910817031070196, "loss": 0.8197, "step": 18930 }, { "epoch": 5.448791714614499, "grad_norm": 1.7963396310806274, "learning_rate": 0.00189102416570771, "loss": 0.8691, "step": 18940 }, { "epoch": 5.451668584579977, "grad_norm": 1.137981653213501, "learning_rate": 0.0018909666283084005, "loss": 1.0188, "step": 18950 }, { "epoch": 5.454545454545454, "grad_norm": 1.7461999654769897, "learning_rate": 0.0018909090909090909, "loss": 0.8793, "step": 18960 }, { "epoch": 5.457422324510932, "grad_norm": 0.953266441822052, "learning_rate": 0.0018908515535097814, "loss": 0.8147, "step": 18970 }, { "epoch": 5.460299194476409, "grad_norm": 1.6727842092514038, "learning_rate": 0.0018907940161104718, "loss": 1.0661, "step": 18980 }, { "epoch": 5.463176064441887, "grad_norm": 2.1502082347869873, "learning_rate": 0.0018907364787111624, "loss": 0.8629, "step": 18990 }, { "epoch": 5.466052934407365, "grad_norm": 1.4968808889389038, "learning_rate": 0.0018906789413118527, "loss": 0.9752, "step": 19000 }, { "epoch": 5.468929804372842, "grad_norm": 1.7006076574325562, "learning_rate": 0.001890621403912543, "loss": 0.8582, "step": 19010 }, { "epoch": 5.47180667433832, "grad_norm": 1.7141473293304443, "learning_rate": 0.0018905638665132336, "loss": 0.9328, "step": 19020 }, { "epoch": 5.474683544303797, "grad_norm": 1.4945282936096191, "learning_rate": 0.0018905063291139242, "loss": 0.8558, "step": 19030 }, { "epoch": 5.477560414269275, "grad_norm": 1.566447377204895, "learning_rate": 0.0018904487917146145, "loss": 0.7026, "step": 19040 }, { "epoch": 5.480437284234752, "grad_norm": 2.0675642490386963, "learning_rate": 0.001890391254315305, "loss": 0.672, "step": 19050 }, { "epoch": 5.48331415420023, "grad_norm": 1.0907763242721558, "learning_rate": 0.0018903337169159954, "loss": 0.6769, "step": 19060 }, { "epoch": 5.4861910241657075, "grad_norm": 1.0497710704803467, "learning_rate": 0.0018902761795166858, "loss": 0.8245, "step": 19070 }, { "epoch": 5.4890678941311855, "grad_norm": 0.9769449830055237, "learning_rate": 0.0018902186421173763, "loss": 0.8894, "step": 19080 }, { "epoch": 5.4919447640966625, "grad_norm": 2.910723924636841, "learning_rate": 0.0018901611047180667, "loss": 0.814, "step": 19090 }, { "epoch": 5.4948216340621405, "grad_norm": 0.7506650686264038, "learning_rate": 0.0018901035673187573, "loss": 0.807, "step": 19100 }, { "epoch": 5.497698504027618, "grad_norm": 1.4191780090332031, "learning_rate": 0.0018900460299194478, "loss": 0.8041, "step": 19110 }, { "epoch": 5.500575373993096, "grad_norm": 2.9063937664031982, "learning_rate": 0.001889988492520138, "loss": 0.8656, "step": 19120 }, { "epoch": 5.503452243958573, "grad_norm": 2.0140795707702637, "learning_rate": 0.0018899309551208285, "loss": 1.0006, "step": 19130 }, { "epoch": 5.506329113924051, "grad_norm": 1.6289827823638916, "learning_rate": 0.001889873417721519, "loss": 0.8071, "step": 19140 }, { "epoch": 5.509205983889528, "grad_norm": 0.9991874098777771, "learning_rate": 0.0018898158803222094, "loss": 0.75, "step": 19150 }, { "epoch": 5.512082853855006, "grad_norm": 1.2418668270111084, "learning_rate": 0.0018897583429229, "loss": 0.7276, "step": 19160 }, { "epoch": 5.514959723820484, "grad_norm": 2.0358426570892334, "learning_rate": 0.0018897008055235906, "loss": 0.8933, "step": 19170 }, { "epoch": 5.517836593785961, "grad_norm": 1.230216383934021, "learning_rate": 0.0018896432681242807, "loss": 0.9013, "step": 19180 }, { "epoch": 5.520713463751439, "grad_norm": 1.3119590282440186, "learning_rate": 0.0018895857307249712, "loss": 0.911, "step": 19190 }, { "epoch": 5.523590333716916, "grad_norm": 1.5707626342773438, "learning_rate": 0.0018895281933256616, "loss": 0.8156, "step": 19200 }, { "epoch": 5.526467203682394, "grad_norm": 1.5824109315872192, "learning_rate": 0.0018894706559263522, "loss": 1.1048, "step": 19210 }, { "epoch": 5.529344073647871, "grad_norm": 1.1960432529449463, "learning_rate": 0.0018894131185270427, "loss": 0.8992, "step": 19220 }, { "epoch": 5.532220943613349, "grad_norm": 0.7675307989120483, "learning_rate": 0.001889355581127733, "loss": 0.8292, "step": 19230 }, { "epoch": 5.535097813578826, "grad_norm": 0.97291499376297, "learning_rate": 0.0018892980437284234, "loss": 0.9289, "step": 19240 }, { "epoch": 5.537974683544304, "grad_norm": 1.9254679679870605, "learning_rate": 0.001889240506329114, "loss": 0.7871, "step": 19250 }, { "epoch": 5.540851553509781, "grad_norm": 2.0215158462524414, "learning_rate": 0.0018891829689298043, "loss": 0.7443, "step": 19260 }, { "epoch": 5.543728423475259, "grad_norm": 1.3054957389831543, "learning_rate": 0.001889125431530495, "loss": 1.0494, "step": 19270 }, { "epoch": 5.546605293440736, "grad_norm": 0.9863124489784241, "learning_rate": 0.0018890678941311855, "loss": 0.7613, "step": 19280 }, { "epoch": 5.549482163406214, "grad_norm": 1.260108470916748, "learning_rate": 0.0018890103567318758, "loss": 0.8958, "step": 19290 }, { "epoch": 5.552359033371691, "grad_norm": 0.9793252944946289, "learning_rate": 0.0018889528193325662, "loss": 0.8697, "step": 19300 }, { "epoch": 5.555235903337169, "grad_norm": 1.589274525642395, "learning_rate": 0.0018888952819332565, "loss": 1.0057, "step": 19310 }, { "epoch": 5.558112773302646, "grad_norm": 1.4041541814804077, "learning_rate": 0.001888837744533947, "loss": 0.7927, "step": 19320 }, { "epoch": 5.560989643268124, "grad_norm": 1.5798767805099487, "learning_rate": 0.0018887802071346376, "loss": 0.9224, "step": 19330 }, { "epoch": 5.563866513233602, "grad_norm": 1.1237659454345703, "learning_rate": 0.001888722669735328, "loss": 0.8283, "step": 19340 }, { "epoch": 5.566743383199079, "grad_norm": 1.3648008108139038, "learning_rate": 0.0018886651323360185, "loss": 0.7796, "step": 19350 }, { "epoch": 5.569620253164557, "grad_norm": 1.2551000118255615, "learning_rate": 0.0018886075949367089, "loss": 0.6673, "step": 19360 }, { "epoch": 5.572497123130034, "grad_norm": 0.9499288201332092, "learning_rate": 0.0018885500575373992, "loss": 0.8556, "step": 19370 }, { "epoch": 5.575373993095512, "grad_norm": 0.9102627038955688, "learning_rate": 0.0018884925201380898, "loss": 0.7395, "step": 19380 }, { "epoch": 5.5782508630609895, "grad_norm": 1.1692241430282593, "learning_rate": 0.0018884349827387804, "loss": 0.7776, "step": 19390 }, { "epoch": 5.5811277330264675, "grad_norm": 3.6941919326782227, "learning_rate": 0.0018883774453394707, "loss": 0.7938, "step": 19400 }, { "epoch": 5.584004602991945, "grad_norm": 1.3671131134033203, "learning_rate": 0.0018883199079401613, "loss": 0.8828, "step": 19410 }, { "epoch": 5.5868814729574225, "grad_norm": 1.0438954830169678, "learning_rate": 0.0018882623705408514, "loss": 1.0151, "step": 19420 }, { "epoch": 5.5897583429229, "grad_norm": 1.6201235055923462, "learning_rate": 0.001888204833141542, "loss": 0.8926, "step": 19430 }, { "epoch": 5.592635212888378, "grad_norm": 1.36597740650177, "learning_rate": 0.0018881472957422325, "loss": 0.8673, "step": 19440 }, { "epoch": 5.595512082853855, "grad_norm": 0.9515073299407959, "learning_rate": 0.0018880897583429229, "loss": 0.9063, "step": 19450 }, { "epoch": 5.598388952819333, "grad_norm": 1.668741226196289, "learning_rate": 0.0018880322209436134, "loss": 0.9212, "step": 19460 }, { "epoch": 5.60126582278481, "grad_norm": 0.7779862880706787, "learning_rate": 0.001887974683544304, "loss": 0.8675, "step": 19470 }, { "epoch": 5.604142692750288, "grad_norm": 2.0372817516326904, "learning_rate": 0.0018879171461449941, "loss": 0.8771, "step": 19480 }, { "epoch": 5.607019562715765, "grad_norm": 1.4248881340026855, "learning_rate": 0.0018878596087456847, "loss": 0.7812, "step": 19490 }, { "epoch": 5.609896432681243, "grad_norm": 0.8509798049926758, "learning_rate": 0.0018878020713463753, "loss": 0.9234, "step": 19500 }, { "epoch": 5.612773302646721, "grad_norm": 1.8518143892288208, "learning_rate": 0.0018877445339470656, "loss": 0.8616, "step": 19510 }, { "epoch": 5.615650172612198, "grad_norm": 2.0815610885620117, "learning_rate": 0.0018876869965477562, "loss": 0.9086, "step": 19520 }, { "epoch": 5.618527042577675, "grad_norm": 2.216059684753418, "learning_rate": 0.0018876294591484467, "loss": 0.7255, "step": 19530 }, { "epoch": 5.621403912543153, "grad_norm": 1.2564637660980225, "learning_rate": 0.0018875719217491369, "loss": 0.7633, "step": 19540 }, { "epoch": 5.624280782508631, "grad_norm": 1.2209111452102661, "learning_rate": 0.0018875143843498274, "loss": 0.9476, "step": 19550 }, { "epoch": 5.627157652474108, "grad_norm": 0.7306578159332275, "learning_rate": 0.0018874568469505178, "loss": 0.9338, "step": 19560 }, { "epoch": 5.630034522439586, "grad_norm": 1.6607263088226318, "learning_rate": 0.0018873993095512083, "loss": 0.9606, "step": 19570 }, { "epoch": 5.632911392405063, "grad_norm": 1.147682547569275, "learning_rate": 0.001887341772151899, "loss": 0.7305, "step": 19580 }, { "epoch": 5.635788262370541, "grad_norm": 0.7683880925178528, "learning_rate": 0.0018872842347525893, "loss": 0.8121, "step": 19590 }, { "epoch": 5.638665132336018, "grad_norm": 1.209186315536499, "learning_rate": 0.0018872266973532796, "loss": 1.0677, "step": 19600 }, { "epoch": 5.641542002301496, "grad_norm": 1.172072172164917, "learning_rate": 0.0018871691599539702, "loss": 0.7809, "step": 19610 }, { "epoch": 5.644418872266973, "grad_norm": 0.9895082712173462, "learning_rate": 0.0018871116225546605, "loss": 0.9509, "step": 19620 }, { "epoch": 5.647295742232451, "grad_norm": 2.550826072692871, "learning_rate": 0.001887054085155351, "loss": 1.1593, "step": 19630 }, { "epoch": 5.650172612197928, "grad_norm": 1.3504345417022705, "learning_rate": 0.0018869965477560416, "loss": 0.8662, "step": 19640 }, { "epoch": 5.653049482163406, "grad_norm": 1.1648269891738892, "learning_rate": 0.001886939010356732, "loss": 0.8721, "step": 19650 }, { "epoch": 5.655926352128883, "grad_norm": 1.5731451511383057, "learning_rate": 0.0018868814729574223, "loss": 0.8489, "step": 19660 }, { "epoch": 5.658803222094361, "grad_norm": 1.1394309997558594, "learning_rate": 0.0018868239355581127, "loss": 0.9299, "step": 19670 }, { "epoch": 5.661680092059839, "grad_norm": 1.6290360689163208, "learning_rate": 0.0018867663981588032, "loss": 0.931, "step": 19680 }, { "epoch": 5.6645569620253164, "grad_norm": 1.478316068649292, "learning_rate": 0.0018867088607594938, "loss": 0.7865, "step": 19690 }, { "epoch": 5.6674338319907935, "grad_norm": 1.2662662267684937, "learning_rate": 0.0018866513233601842, "loss": 0.6992, "step": 19700 }, { "epoch": 5.6703107019562715, "grad_norm": 1.2330191135406494, "learning_rate": 0.0018865937859608745, "loss": 0.8183, "step": 19710 }, { "epoch": 5.6731875719217495, "grad_norm": 1.288442850112915, "learning_rate": 0.001886536248561565, "loss": 0.8885, "step": 19720 }, { "epoch": 5.676064441887227, "grad_norm": 1.1256036758422852, "learning_rate": 0.0018864787111622554, "loss": 0.7652, "step": 19730 }, { "epoch": 5.678941311852705, "grad_norm": 2.234650135040283, "learning_rate": 0.001886421173762946, "loss": 0.8283, "step": 19740 }, { "epoch": 5.681818181818182, "grad_norm": 1.0146348476409912, "learning_rate": 0.0018863636363636365, "loss": 0.8233, "step": 19750 }, { "epoch": 5.68469505178366, "grad_norm": 1.6277049779891968, "learning_rate": 0.0018863060989643269, "loss": 0.933, "step": 19760 }, { "epoch": 5.687571921749137, "grad_norm": 1.0435677766799927, "learning_rate": 0.0018862485615650172, "loss": 0.9356, "step": 19770 }, { "epoch": 5.690448791714615, "grad_norm": 1.0783753395080566, "learning_rate": 0.0018861910241657076, "loss": 0.839, "step": 19780 }, { "epoch": 5.693325661680092, "grad_norm": 1.5430575609207153, "learning_rate": 0.0018861334867663981, "loss": 0.8631, "step": 19790 }, { "epoch": 5.69620253164557, "grad_norm": 1.6489195823669434, "learning_rate": 0.0018860759493670887, "loss": 0.8205, "step": 19800 }, { "epoch": 5.699079401611047, "grad_norm": 2.009718656539917, "learning_rate": 0.001886018411967779, "loss": 0.7627, "step": 19810 }, { "epoch": 5.701956271576525, "grad_norm": 1.0821222066879272, "learning_rate": 0.0018859608745684696, "loss": 0.9343, "step": 19820 }, { "epoch": 5.704833141542002, "grad_norm": 1.2532403469085693, "learning_rate": 0.00188590333716916, "loss": 0.9804, "step": 19830 }, { "epoch": 5.70771001150748, "grad_norm": 1.6600769758224487, "learning_rate": 0.0018858457997698503, "loss": 0.9994, "step": 19840 }, { "epoch": 5.710586881472958, "grad_norm": 1.0644265413284302, "learning_rate": 0.0018857882623705409, "loss": 0.8138, "step": 19850 }, { "epoch": 5.713463751438435, "grad_norm": 1.3147419691085815, "learning_rate": 0.0018857307249712314, "loss": 0.7467, "step": 19860 }, { "epoch": 5.716340621403912, "grad_norm": 1.1628472805023193, "learning_rate": 0.0018856731875719218, "loss": 0.9133, "step": 19870 }, { "epoch": 5.71921749136939, "grad_norm": 0.9137598276138306, "learning_rate": 0.0018856156501726124, "loss": 0.7629, "step": 19880 }, { "epoch": 5.722094361334868, "grad_norm": 1.199052333831787, "learning_rate": 0.0018855581127733025, "loss": 0.7202, "step": 19890 }, { "epoch": 5.724971231300345, "grad_norm": 1.7381749153137207, "learning_rate": 0.001885500575373993, "loss": 0.9588, "step": 19900 }, { "epoch": 5.727848101265823, "grad_norm": 1.7585444450378418, "learning_rate": 0.0018854430379746836, "loss": 0.9094, "step": 19910 }, { "epoch": 5.7307249712313, "grad_norm": 1.080928087234497, "learning_rate": 0.001885385500575374, "loss": 0.8067, "step": 19920 }, { "epoch": 5.733601841196778, "grad_norm": 0.9813022613525391, "learning_rate": 0.0018853279631760645, "loss": 1.0359, "step": 19930 }, { "epoch": 5.736478711162255, "grad_norm": 1.8625946044921875, "learning_rate": 0.001885270425776755, "loss": 0.6745, "step": 19940 }, { "epoch": 5.739355581127733, "grad_norm": 1.13152277469635, "learning_rate": 0.0018852128883774452, "loss": 0.9225, "step": 19950 }, { "epoch": 5.74223245109321, "grad_norm": 1.0989748239517212, "learning_rate": 0.0018851553509781358, "loss": 0.9023, "step": 19960 }, { "epoch": 5.745109321058688, "grad_norm": 1.0096322298049927, "learning_rate": 0.0018850978135788263, "loss": 0.7669, "step": 19970 }, { "epoch": 5.747986191024165, "grad_norm": 1.552364706993103, "learning_rate": 0.0018850402761795167, "loss": 0.8031, "step": 19980 }, { "epoch": 5.750863060989643, "grad_norm": 1.1076048612594604, "learning_rate": 0.0018849827387802073, "loss": 0.8168, "step": 19990 }, { "epoch": 5.7537399309551205, "grad_norm": 2.081461191177368, "learning_rate": 0.0018849252013808976, "loss": 0.9662, "step": 20000 }, { "epoch": 5.7566168009205985, "grad_norm": 0.9630710482597351, "learning_rate": 0.001884867663981588, "loss": 0.8075, "step": 20010 }, { "epoch": 5.759493670886076, "grad_norm": 1.0869536399841309, "learning_rate": 0.0018848101265822785, "loss": 0.8514, "step": 20020 }, { "epoch": 5.7623705408515535, "grad_norm": 1.171730875968933, "learning_rate": 0.0018847525891829689, "loss": 1.0538, "step": 20030 }, { "epoch": 5.765247410817031, "grad_norm": 0.90543532371521, "learning_rate": 0.0018846950517836594, "loss": 0.9598, "step": 20040 }, { "epoch": 5.768124280782509, "grad_norm": 1.6682353019714355, "learning_rate": 0.00188463751438435, "loss": 0.9438, "step": 20050 }, { "epoch": 5.771001150747987, "grad_norm": 1.2054930925369263, "learning_rate": 0.0018845799769850403, "loss": 0.936, "step": 20060 }, { "epoch": 5.773878020713464, "grad_norm": 1.6981950998306274, "learning_rate": 0.0018845224395857307, "loss": 0.9175, "step": 20070 }, { "epoch": 5.776754890678942, "grad_norm": 1.4012473821640015, "learning_rate": 0.0018844649021864212, "loss": 0.8625, "step": 20080 }, { "epoch": 5.779631760644419, "grad_norm": 0.8079041838645935, "learning_rate": 0.0018844073647871116, "loss": 1.0502, "step": 20090 }, { "epoch": 5.782508630609897, "grad_norm": 0.8504668474197388, "learning_rate": 0.0018843498273878022, "loss": 0.8177, "step": 20100 }, { "epoch": 5.785385500575374, "grad_norm": 0.9396620392799377, "learning_rate": 0.0018842922899884927, "loss": 0.8061, "step": 20110 }, { "epoch": 5.788262370540852, "grad_norm": 0.8146317601203918, "learning_rate": 0.001884234752589183, "loss": 0.8985, "step": 20120 }, { "epoch": 5.791139240506329, "grad_norm": 1.0469858646392822, "learning_rate": 0.0018841772151898734, "loss": 0.6047, "step": 20130 }, { "epoch": 5.794016110471807, "grad_norm": 1.5707303285598755, "learning_rate": 0.0018841196777905638, "loss": 0.9968, "step": 20140 }, { "epoch": 5.796892980437284, "grad_norm": 1.619463562965393, "learning_rate": 0.0018840621403912543, "loss": 0.9507, "step": 20150 }, { "epoch": 5.799769850402762, "grad_norm": 1.4649269580841064, "learning_rate": 0.0018840046029919449, "loss": 0.8397, "step": 20160 }, { "epoch": 5.802646720368239, "grad_norm": 1.1195826530456543, "learning_rate": 0.0018839470655926352, "loss": 0.8715, "step": 20170 }, { "epoch": 5.805523590333717, "grad_norm": 1.1427017450332642, "learning_rate": 0.0018838895281933258, "loss": 0.6949, "step": 20180 }, { "epoch": 5.808400460299194, "grad_norm": 1.0768096446990967, "learning_rate": 0.0018838319907940161, "loss": 0.8632, "step": 20190 }, { "epoch": 5.811277330264672, "grad_norm": 0.853550136089325, "learning_rate": 0.0018837744533947065, "loss": 0.687, "step": 20200 }, { "epoch": 5.814154200230149, "grad_norm": 0.9977357983589172, "learning_rate": 0.001883716915995397, "loss": 0.8628, "step": 20210 }, { "epoch": 5.817031070195627, "grad_norm": 1.536452293395996, "learning_rate": 0.0018836593785960876, "loss": 0.861, "step": 20220 }, { "epoch": 5.819907940161105, "grad_norm": 0.8924586772918701, "learning_rate": 0.001883601841196778, "loss": 0.8901, "step": 20230 }, { "epoch": 5.822784810126582, "grad_norm": 1.2787834405899048, "learning_rate": 0.0018835443037974685, "loss": 1.0449, "step": 20240 }, { "epoch": 5.82566168009206, "grad_norm": 1.0678391456604004, "learning_rate": 0.0018834867663981587, "loss": 0.8648, "step": 20250 }, { "epoch": 5.828538550057537, "grad_norm": 1.265444040298462, "learning_rate": 0.0018834292289988492, "loss": 0.7044, "step": 20260 }, { "epoch": 5.831415420023015, "grad_norm": 1.6804550886154175, "learning_rate": 0.0018833716915995398, "loss": 0.7585, "step": 20270 }, { "epoch": 5.834292289988492, "grad_norm": 2.8259482383728027, "learning_rate": 0.0018833141542002301, "loss": 1.1082, "step": 20280 }, { "epoch": 5.83716915995397, "grad_norm": 1.4042149782180786, "learning_rate": 0.0018832566168009207, "loss": 0.7592, "step": 20290 }, { "epoch": 5.840046029919447, "grad_norm": 1.3107634782791138, "learning_rate": 0.0018831990794016113, "loss": 0.9282, "step": 20300 }, { "epoch": 5.842922899884925, "grad_norm": 1.4377188682556152, "learning_rate": 0.0018831415420023014, "loss": 1.0324, "step": 20310 }, { "epoch": 5.8457997698504025, "grad_norm": 1.0982204675674438, "learning_rate": 0.001883084004602992, "loss": 0.958, "step": 20320 }, { "epoch": 5.8486766398158805, "grad_norm": 1.3353513479232788, "learning_rate": 0.0018830264672036825, "loss": 1.0105, "step": 20330 }, { "epoch": 5.851553509781358, "grad_norm": 0.8490620851516724, "learning_rate": 0.0018829689298043729, "loss": 0.9433, "step": 20340 }, { "epoch": 5.8544303797468356, "grad_norm": 1.6405433416366577, "learning_rate": 0.0018829113924050634, "loss": 0.9469, "step": 20350 }, { "epoch": 5.857307249712313, "grad_norm": 0.9981609582901001, "learning_rate": 0.0018828538550057538, "loss": 0.9645, "step": 20360 }, { "epoch": 5.860184119677791, "grad_norm": 1.0938726663589478, "learning_rate": 0.0018827963176064441, "loss": 0.8007, "step": 20370 }, { "epoch": 5.863060989643268, "grad_norm": 0.7567389607429504, "learning_rate": 0.0018827387802071347, "loss": 0.8804, "step": 20380 }, { "epoch": 5.865937859608746, "grad_norm": 1.1213295459747314, "learning_rate": 0.001882681242807825, "loss": 0.9088, "step": 20390 }, { "epoch": 5.868814729574224, "grad_norm": 1.0832411050796509, "learning_rate": 0.0018826237054085156, "loss": 0.8266, "step": 20400 }, { "epoch": 5.871691599539701, "grad_norm": 1.4620741605758667, "learning_rate": 0.0018825661680092062, "loss": 0.8424, "step": 20410 }, { "epoch": 5.874568469505179, "grad_norm": 1.2330838441848755, "learning_rate": 0.0018825086306098965, "loss": 0.756, "step": 20420 }, { "epoch": 5.877445339470656, "grad_norm": 1.0376032590866089, "learning_rate": 0.0018824510932105869, "loss": 0.7283, "step": 20430 }, { "epoch": 5.880322209436134, "grad_norm": 1.112954020500183, "learning_rate": 0.0018823935558112774, "loss": 0.9972, "step": 20440 }, { "epoch": 5.883199079401611, "grad_norm": 1.4320244789123535, "learning_rate": 0.0018823360184119678, "loss": 1.0932, "step": 20450 }, { "epoch": 5.886075949367089, "grad_norm": 1.0047359466552734, "learning_rate": 0.0018822784810126583, "loss": 0.7869, "step": 20460 }, { "epoch": 5.888952819332566, "grad_norm": 2.079314708709717, "learning_rate": 0.0018822209436133487, "loss": 0.8744, "step": 20470 }, { "epoch": 5.891829689298044, "grad_norm": 1.3210278749465942, "learning_rate": 0.0018821634062140393, "loss": 0.6896, "step": 20480 }, { "epoch": 5.894706559263521, "grad_norm": 1.0874325037002563, "learning_rate": 0.0018821058688147296, "loss": 0.8171, "step": 20490 }, { "epoch": 5.897583429228999, "grad_norm": 1.917742133140564, "learning_rate": 0.00188204833141542, "loss": 0.8398, "step": 20500 }, { "epoch": 5.900460299194476, "grad_norm": 1.647688627243042, "learning_rate": 0.0018819907940161105, "loss": 0.8424, "step": 20510 }, { "epoch": 5.903337169159954, "grad_norm": 1.2021852731704712, "learning_rate": 0.001881933256616801, "loss": 0.822, "step": 20520 }, { "epoch": 5.906214039125431, "grad_norm": 1.7345024347305298, "learning_rate": 0.0018818757192174914, "loss": 0.7484, "step": 20530 }, { "epoch": 5.909090909090909, "grad_norm": 0.999980092048645, "learning_rate": 0.0018818181818181818, "loss": 0.7366, "step": 20540 }, { "epoch": 5.911967779056386, "grad_norm": 1.2917412519454956, "learning_rate": 0.0018817606444188723, "loss": 0.7301, "step": 20550 }, { "epoch": 5.914844649021864, "grad_norm": 1.7038347721099854, "learning_rate": 0.0018817031070195627, "loss": 0.973, "step": 20560 }, { "epoch": 5.917721518987342, "grad_norm": 1.8782286643981934, "learning_rate": 0.0018816455696202532, "loss": 0.8555, "step": 20570 }, { "epoch": 5.920598388952819, "grad_norm": 1.0450172424316406, "learning_rate": 0.0018815880322209436, "loss": 0.8086, "step": 20580 }, { "epoch": 5.923475258918296, "grad_norm": 1.6046620607376099, "learning_rate": 0.0018815304948216342, "loss": 0.8088, "step": 20590 }, { "epoch": 5.926352128883774, "grad_norm": 1.3494387865066528, "learning_rate": 0.0018814729574223245, "loss": 0.8797, "step": 20600 }, { "epoch": 5.929228998849252, "grad_norm": 3.375515937805176, "learning_rate": 0.0018814154200230148, "loss": 0.7211, "step": 20610 }, { "epoch": 5.9321058688147295, "grad_norm": 2.2165238857269287, "learning_rate": 0.0018813578826237054, "loss": 0.814, "step": 20620 }, { "epoch": 5.934982738780207, "grad_norm": 2.2646195888519287, "learning_rate": 0.001881300345224396, "loss": 0.7696, "step": 20630 }, { "epoch": 5.9378596087456845, "grad_norm": 1.305908203125, "learning_rate": 0.0018812428078250863, "loss": 0.7742, "step": 20640 }, { "epoch": 5.9407364787111625, "grad_norm": 1.3609176874160767, "learning_rate": 0.0018811852704257769, "loss": 0.7209, "step": 20650 }, { "epoch": 5.94361334867664, "grad_norm": 1.6397045850753784, "learning_rate": 0.0018811277330264672, "loss": 0.9066, "step": 20660 }, { "epoch": 5.946490218642118, "grad_norm": 1.7042146921157837, "learning_rate": 0.0018810701956271576, "loss": 0.9508, "step": 20670 }, { "epoch": 5.949367088607595, "grad_norm": 3.4895946979522705, "learning_rate": 0.0018810126582278481, "loss": 0.9749, "step": 20680 }, { "epoch": 5.952243958573073, "grad_norm": 1.2887643575668335, "learning_rate": 0.0018809551208285385, "loss": 0.8332, "step": 20690 }, { "epoch": 5.95512082853855, "grad_norm": 0.8619809746742249, "learning_rate": 0.001880897583429229, "loss": 0.869, "step": 20700 }, { "epoch": 5.957997698504028, "grad_norm": 1.7654900550842285, "learning_rate": 0.0018808400460299196, "loss": 0.7615, "step": 20710 }, { "epoch": 5.960874568469505, "grad_norm": 1.0077670812606812, "learning_rate": 0.0018807825086306097, "loss": 0.7047, "step": 20720 }, { "epoch": 5.963751438434983, "grad_norm": 1.8488881587982178, "learning_rate": 0.0018807249712313003, "loss": 1.0783, "step": 20730 }, { "epoch": 5.966628308400461, "grad_norm": 2.037107467651367, "learning_rate": 0.0018806674338319909, "loss": 1.1843, "step": 20740 }, { "epoch": 5.969505178365938, "grad_norm": 1.517759084701538, "learning_rate": 0.0018806098964326812, "loss": 0.8603, "step": 20750 }, { "epoch": 5.972382048331415, "grad_norm": 1.7072136402130127, "learning_rate": 0.0018805523590333718, "loss": 0.9617, "step": 20760 }, { "epoch": 5.975258918296893, "grad_norm": 0.8894562721252441, "learning_rate": 0.0018804948216340624, "loss": 0.7801, "step": 20770 }, { "epoch": 5.978135788262371, "grad_norm": 1.108513355255127, "learning_rate": 0.0018804372842347525, "loss": 0.786, "step": 20780 }, { "epoch": 5.981012658227848, "grad_norm": 1.790850043296814, "learning_rate": 0.001880379746835443, "loss": 0.7611, "step": 20790 }, { "epoch": 5.983889528193326, "grad_norm": 1.5182914733886719, "learning_rate": 0.0018803222094361336, "loss": 0.7382, "step": 20800 }, { "epoch": 5.986766398158803, "grad_norm": 1.7366286516189575, "learning_rate": 0.001880264672036824, "loss": 1.0125, "step": 20810 }, { "epoch": 5.989643268124281, "grad_norm": 1.7735284566879272, "learning_rate": 0.0018802071346375145, "loss": 1.1309, "step": 20820 }, { "epoch": 5.992520138089758, "grad_norm": 0.9093880653381348, "learning_rate": 0.0018801495972382049, "loss": 0.898, "step": 20830 }, { "epoch": 5.995397008055236, "grad_norm": 1.930391788482666, "learning_rate": 0.0018800920598388952, "loss": 0.863, "step": 20840 }, { "epoch": 5.998273878020713, "grad_norm": 1.8349847793579102, "learning_rate": 0.0018800345224395858, "loss": 0.7008, "step": 20850 }, { "epoch": 6.001150747986191, "grad_norm": 1.2120440006256104, "learning_rate": 0.0018799769850402761, "loss": 0.7232, "step": 20860 }, { "epoch": 6.004027617951668, "grad_norm": 1.1004786491394043, "learning_rate": 0.0018799194476409667, "loss": 0.848, "step": 20870 }, { "epoch": 6.006904487917146, "grad_norm": 1.0580198764801025, "learning_rate": 0.0018798619102416573, "loss": 0.8508, "step": 20880 }, { "epoch": 6.009781357882623, "grad_norm": 2.467038869857788, "learning_rate": 0.0018798043728423476, "loss": 0.9258, "step": 20890 }, { "epoch": 6.012658227848101, "grad_norm": 1.2359974384307861, "learning_rate": 0.001879746835443038, "loss": 0.8776, "step": 20900 }, { "epoch": 6.015535097813578, "grad_norm": 0.8424385190010071, "learning_rate": 0.0018796892980437285, "loss": 0.8569, "step": 20910 }, { "epoch": 6.018411967779056, "grad_norm": 1.6741091012954712, "learning_rate": 0.0018796317606444189, "loss": 0.8323, "step": 20920 }, { "epoch": 6.021288837744534, "grad_norm": 1.6995155811309814, "learning_rate": 0.0018795742232451094, "loss": 0.8926, "step": 20930 }, { "epoch": 6.0241657077100115, "grad_norm": 1.5314511060714722, "learning_rate": 0.0018795166858457998, "loss": 0.7671, "step": 20940 }, { "epoch": 6.0270425776754895, "grad_norm": 1.0952421426773071, "learning_rate": 0.0018794591484464903, "loss": 0.8689, "step": 20950 }, { "epoch": 6.0299194476409665, "grad_norm": 1.1152896881103516, "learning_rate": 0.0018794016110471807, "loss": 0.9452, "step": 20960 }, { "epoch": 6.0327963176064445, "grad_norm": 2.761587142944336, "learning_rate": 0.001879344073647871, "loss": 0.7069, "step": 20970 }, { "epoch": 6.035673187571922, "grad_norm": 2.00854754447937, "learning_rate": 0.0018792865362485616, "loss": 0.7763, "step": 20980 }, { "epoch": 6.0385500575374, "grad_norm": 0.8638267517089844, "learning_rate": 0.0018792289988492522, "loss": 0.7911, "step": 20990 }, { "epoch": 6.041426927502877, "grad_norm": 1.935486912727356, "learning_rate": 0.0018791714614499425, "loss": 0.8312, "step": 21000 }, { "epoch": 6.044303797468355, "grad_norm": 1.449265956878662, "learning_rate": 0.001879113924050633, "loss": 0.7287, "step": 21010 }, { "epoch": 6.047180667433832, "grad_norm": 0.8649268746376038, "learning_rate": 0.0018790563866513234, "loss": 0.7516, "step": 21020 }, { "epoch": 6.05005753739931, "grad_norm": 1.388479232788086, "learning_rate": 0.0018789988492520138, "loss": 0.8857, "step": 21030 }, { "epoch": 6.052934407364787, "grad_norm": 1.0828145742416382, "learning_rate": 0.0018789413118527043, "loss": 0.955, "step": 21040 }, { "epoch": 6.055811277330265, "grad_norm": 1.795047640800476, "learning_rate": 0.0018788837744533947, "loss": 0.807, "step": 21050 }, { "epoch": 6.058688147295742, "grad_norm": 0.9969637989997864, "learning_rate": 0.0018788262370540852, "loss": 0.8409, "step": 21060 }, { "epoch": 6.06156501726122, "grad_norm": 1.6214337348937988, "learning_rate": 0.0018787686996547758, "loss": 0.8891, "step": 21070 }, { "epoch": 6.064441887226697, "grad_norm": 1.462520956993103, "learning_rate": 0.001878711162255466, "loss": 0.8704, "step": 21080 }, { "epoch": 6.067318757192175, "grad_norm": 2.737017869949341, "learning_rate": 0.0018786536248561565, "loss": 0.7397, "step": 21090 }, { "epoch": 6.070195627157652, "grad_norm": 1.1962636709213257, "learning_rate": 0.001878596087456847, "loss": 0.887, "step": 21100 }, { "epoch": 6.07307249712313, "grad_norm": 0.7995173335075378, "learning_rate": 0.0018785385500575374, "loss": 0.5556, "step": 21110 }, { "epoch": 6.075949367088608, "grad_norm": 1.509720802307129, "learning_rate": 0.001878481012658228, "loss": 0.7415, "step": 21120 }, { "epoch": 6.078826237054085, "grad_norm": 1.810882806777954, "learning_rate": 0.0018784234752589185, "loss": 0.7606, "step": 21130 }, { "epoch": 6.081703107019563, "grad_norm": 0.8155317902565002, "learning_rate": 0.0018783659378596087, "loss": 0.9481, "step": 21140 }, { "epoch": 6.08457997698504, "grad_norm": 0.8757128715515137, "learning_rate": 0.0018783084004602992, "loss": 0.8537, "step": 21150 }, { "epoch": 6.087456846950518, "grad_norm": 1.1164675951004028, "learning_rate": 0.0018782508630609896, "loss": 0.8083, "step": 21160 }, { "epoch": 6.090333716915995, "grad_norm": 1.2481883764266968, "learning_rate": 0.0018781933256616801, "loss": 0.8211, "step": 21170 }, { "epoch": 6.093210586881473, "grad_norm": 1.214966058731079, "learning_rate": 0.0018781357882623707, "loss": 0.8381, "step": 21180 }, { "epoch": 6.09608745684695, "grad_norm": 1.470470666885376, "learning_rate": 0.001878078250863061, "loss": 0.8014, "step": 21190 }, { "epoch": 6.098964326812428, "grad_norm": 1.0399184226989746, "learning_rate": 0.0018780207134637514, "loss": 0.7522, "step": 21200 }, { "epoch": 6.101841196777905, "grad_norm": 2.4540066719055176, "learning_rate": 0.001877963176064442, "loss": 1.0069, "step": 21210 }, { "epoch": 6.104718066743383, "grad_norm": 1.4389537572860718, "learning_rate": 0.0018779056386651323, "loss": 0.6978, "step": 21220 }, { "epoch": 6.1075949367088604, "grad_norm": 1.5672307014465332, "learning_rate": 0.0018778481012658229, "loss": 0.7593, "step": 21230 }, { "epoch": 6.110471806674338, "grad_norm": 1.1457616090774536, "learning_rate": 0.0018777905638665134, "loss": 0.6008, "step": 21240 }, { "epoch": 6.1133486766398155, "grad_norm": 0.8774670362472534, "learning_rate": 0.0018777330264672038, "loss": 0.8583, "step": 21250 }, { "epoch": 6.1162255466052935, "grad_norm": 2.0992789268493652, "learning_rate": 0.0018776754890678941, "loss": 0.8935, "step": 21260 }, { "epoch": 6.119102416570771, "grad_norm": 2.50830078125, "learning_rate": 0.0018776179516685845, "loss": 1.1062, "step": 21270 }, { "epoch": 6.121979286536249, "grad_norm": 0.9644056558609009, "learning_rate": 0.001877560414269275, "loss": 0.7612, "step": 21280 }, { "epoch": 6.1248561565017265, "grad_norm": 1.6181970834732056, "learning_rate": 0.0018775028768699656, "loss": 0.9491, "step": 21290 }, { "epoch": 6.127733026467204, "grad_norm": 1.4445216655731201, "learning_rate": 0.001877445339470656, "loss": 0.8793, "step": 21300 }, { "epoch": 6.130609896432682, "grad_norm": 1.0842481851577759, "learning_rate": 0.0018773878020713465, "loss": 0.9314, "step": 21310 }, { "epoch": 6.133486766398159, "grad_norm": 2.322819471359253, "learning_rate": 0.0018773302646720369, "loss": 0.9221, "step": 21320 }, { "epoch": 6.136363636363637, "grad_norm": 1.2760510444641113, "learning_rate": 0.0018772727272727272, "loss": 0.7378, "step": 21330 }, { "epoch": 6.139240506329114, "grad_norm": 1.5524158477783203, "learning_rate": 0.0018772151898734178, "loss": 0.843, "step": 21340 }, { "epoch": 6.142117376294592, "grad_norm": 0.8781824111938477, "learning_rate": 0.0018771576524741083, "loss": 0.9727, "step": 21350 }, { "epoch": 6.144994246260069, "grad_norm": 2.0341787338256836, "learning_rate": 0.0018771001150747987, "loss": 0.8421, "step": 21360 }, { "epoch": 6.147871116225547, "grad_norm": 1.4235459566116333, "learning_rate": 0.001877042577675489, "loss": 0.8264, "step": 21370 }, { "epoch": 6.150747986191024, "grad_norm": 1.0220369100570679, "learning_rate": 0.0018769850402761794, "loss": 0.8184, "step": 21380 }, { "epoch": 6.153624856156502, "grad_norm": 1.0024367570877075, "learning_rate": 0.00187692750287687, "loss": 0.7754, "step": 21390 }, { "epoch": 6.156501726121979, "grad_norm": 0.9520954489707947, "learning_rate": 0.0018768699654775605, "loss": 0.6477, "step": 21400 }, { "epoch": 6.159378596087457, "grad_norm": 1.4417208433151245, "learning_rate": 0.0018768124280782509, "loss": 0.7468, "step": 21410 }, { "epoch": 6.162255466052934, "grad_norm": 0.811743438243866, "learning_rate": 0.0018767548906789414, "loss": 1.081, "step": 21420 }, { "epoch": 6.165132336018412, "grad_norm": 1.1107630729675293, "learning_rate": 0.0018766973532796318, "loss": 0.8186, "step": 21430 }, { "epoch": 6.168009205983889, "grad_norm": 1.0664037466049194, "learning_rate": 0.0018766398158803221, "loss": 0.9398, "step": 21440 }, { "epoch": 6.170886075949367, "grad_norm": 0.8866965770721436, "learning_rate": 0.0018765822784810127, "loss": 0.8354, "step": 21450 }, { "epoch": 6.173762945914845, "grad_norm": 2.0142641067504883, "learning_rate": 0.0018765247410817032, "loss": 1.078, "step": 21460 }, { "epoch": 6.176639815880322, "grad_norm": 0.7548583149909973, "learning_rate": 0.0018764672036823936, "loss": 1.0356, "step": 21470 }, { "epoch": 6.1795166858458, "grad_norm": 2.088870048522949, "learning_rate": 0.0018764096662830842, "loss": 0.8998, "step": 21480 }, { "epoch": 6.182393555811277, "grad_norm": 2.5344927310943604, "learning_rate": 0.0018763521288837745, "loss": 0.8302, "step": 21490 }, { "epoch": 6.185270425776755, "grad_norm": 1.635972499847412, "learning_rate": 0.0018762945914844648, "loss": 0.7378, "step": 21500 }, { "epoch": 6.188147295742232, "grad_norm": 1.7686132192611694, "learning_rate": 0.0018762370540851554, "loss": 0.9782, "step": 21510 }, { "epoch": 6.19102416570771, "grad_norm": 0.7860034704208374, "learning_rate": 0.0018761795166858458, "loss": 0.9571, "step": 21520 }, { "epoch": 6.193901035673187, "grad_norm": 1.37380051612854, "learning_rate": 0.0018761219792865363, "loss": 0.818, "step": 21530 }, { "epoch": 6.196777905638665, "grad_norm": 1.2877702713012695, "learning_rate": 0.0018760644418872269, "loss": 0.8127, "step": 21540 }, { "epoch": 6.1996547756041425, "grad_norm": 1.1177955865859985, "learning_rate": 0.001876006904487917, "loss": 0.6248, "step": 21550 }, { "epoch": 6.2025316455696204, "grad_norm": 1.6725273132324219, "learning_rate": 0.0018759493670886076, "loss": 0.9049, "step": 21560 }, { "epoch": 6.2054085155350975, "grad_norm": 1.4578300714492798, "learning_rate": 0.0018758918296892981, "loss": 0.8437, "step": 21570 }, { "epoch": 6.2082853855005755, "grad_norm": 2.7196199893951416, "learning_rate": 0.0018758342922899885, "loss": 0.9868, "step": 21580 }, { "epoch": 6.211162255466053, "grad_norm": 1.2478384971618652, "learning_rate": 0.001875776754890679, "loss": 0.9194, "step": 21590 }, { "epoch": 6.214039125431531, "grad_norm": 1.3855516910552979, "learning_rate": 0.0018757192174913696, "loss": 0.9542, "step": 21600 }, { "epoch": 6.216915995397008, "grad_norm": 0.9809876680374146, "learning_rate": 0.0018756616800920597, "loss": 0.7618, "step": 21610 }, { "epoch": 6.219792865362486, "grad_norm": 0.9206173419952393, "learning_rate": 0.0018756041426927503, "loss": 0.6819, "step": 21620 }, { "epoch": 6.222669735327963, "grad_norm": 1.418426275253296, "learning_rate": 0.0018755466052934407, "loss": 0.6816, "step": 21630 }, { "epoch": 6.225546605293441, "grad_norm": 1.5597394704818726, "learning_rate": 0.0018754890678941312, "loss": 0.8425, "step": 21640 }, { "epoch": 6.228423475258919, "grad_norm": 1.3273274898529053, "learning_rate": 0.0018754315304948218, "loss": 0.8574, "step": 21650 }, { "epoch": 6.231300345224396, "grad_norm": 1.1552146673202515, "learning_rate": 0.0018753739930955121, "loss": 0.6942, "step": 21660 }, { "epoch": 6.234177215189874, "grad_norm": 1.6573988199234009, "learning_rate": 0.0018753164556962025, "loss": 0.8531, "step": 21670 }, { "epoch": 6.237054085155351, "grad_norm": 0.8608697056770325, "learning_rate": 0.001875258918296893, "loss": 0.7194, "step": 21680 }, { "epoch": 6.239930955120829, "grad_norm": 1.4043080806732178, "learning_rate": 0.0018752013808975834, "loss": 0.7269, "step": 21690 }, { "epoch": 6.242807825086306, "grad_norm": 1.7513506412506104, "learning_rate": 0.001875143843498274, "loss": 1.0182, "step": 21700 }, { "epoch": 6.245684695051784, "grad_norm": 1.7959579229354858, "learning_rate": 0.0018750863060989645, "loss": 1.1571, "step": 21710 }, { "epoch": 6.248561565017261, "grad_norm": 0.7999435663223267, "learning_rate": 0.0018750287686996549, "loss": 0.7924, "step": 21720 }, { "epoch": 6.251438434982739, "grad_norm": 1.4094128608703613, "learning_rate": 0.0018749712313003452, "loss": 0.8123, "step": 21730 }, { "epoch": 6.254315304948216, "grad_norm": 1.8757905960083008, "learning_rate": 0.0018749136939010356, "loss": 0.6417, "step": 21740 }, { "epoch": 6.257192174913694, "grad_norm": 0.9363431930541992, "learning_rate": 0.0018748561565017261, "loss": 0.8212, "step": 21750 }, { "epoch": 6.260069044879171, "grad_norm": 1.617570161819458, "learning_rate": 0.0018747986191024167, "loss": 0.8415, "step": 21760 }, { "epoch": 6.262945914844649, "grad_norm": 1.3066774606704712, "learning_rate": 0.001874741081703107, "loss": 0.8793, "step": 21770 }, { "epoch": 6.265822784810126, "grad_norm": 2.1067123413085938, "learning_rate": 0.0018746835443037976, "loss": 0.834, "step": 21780 }, { "epoch": 6.268699654775604, "grad_norm": 1.348364233970642, "learning_rate": 0.001874626006904488, "loss": 0.9916, "step": 21790 }, { "epoch": 6.271576524741081, "grad_norm": 1.1559237241744995, "learning_rate": 0.0018745684695051783, "loss": 0.6702, "step": 21800 }, { "epoch": 6.274453394706559, "grad_norm": 1.556419014930725, "learning_rate": 0.0018745109321058689, "loss": 0.9208, "step": 21810 }, { "epoch": 6.277330264672036, "grad_norm": 1.2898762226104736, "learning_rate": 0.0018744533947065594, "loss": 0.9423, "step": 21820 }, { "epoch": 6.280207134637514, "grad_norm": 1.40525221824646, "learning_rate": 0.0018743958573072498, "loss": 0.8876, "step": 21830 }, { "epoch": 6.283084004602992, "grad_norm": 1.249350905418396, "learning_rate": 0.0018743383199079403, "loss": 0.7496, "step": 21840 }, { "epoch": 6.285960874568469, "grad_norm": 0.7603589296340942, "learning_rate": 0.0018742807825086305, "loss": 0.7647, "step": 21850 }, { "epoch": 6.288837744533947, "grad_norm": 2.069795846939087, "learning_rate": 0.001874223245109321, "loss": 1.0081, "step": 21860 }, { "epoch": 6.2917146144994245, "grad_norm": 0.9532472491264343, "learning_rate": 0.0018741657077100116, "loss": 0.887, "step": 21870 }, { "epoch": 6.2945914844649025, "grad_norm": 1.6867398023605347, "learning_rate": 0.001874108170310702, "loss": 0.7892, "step": 21880 }, { "epoch": 6.2974683544303796, "grad_norm": 1.7679064273834229, "learning_rate": 0.0018740506329113925, "loss": 0.76, "step": 21890 }, { "epoch": 6.3003452243958575, "grad_norm": 1.4580599069595337, "learning_rate": 0.001873993095512083, "loss": 0.759, "step": 21900 }, { "epoch": 6.303222094361335, "grad_norm": 1.9428375959396362, "learning_rate": 0.0018739355581127732, "loss": 0.8109, "step": 21910 }, { "epoch": 6.306098964326813, "grad_norm": 1.0315141677856445, "learning_rate": 0.0018738780207134638, "loss": 1.0815, "step": 21920 }, { "epoch": 6.30897583429229, "grad_norm": 0.8202006220817566, "learning_rate": 0.0018738204833141543, "loss": 0.726, "step": 21930 }, { "epoch": 6.311852704257768, "grad_norm": 1.9258612394332886, "learning_rate": 0.0018737629459148447, "loss": 0.7006, "step": 21940 }, { "epoch": 6.314729574223245, "grad_norm": 1.4302078485488892, "learning_rate": 0.0018737054085155352, "loss": 0.9425, "step": 21950 }, { "epoch": 6.317606444188723, "grad_norm": 0.8788588643074036, "learning_rate": 0.0018736478711162256, "loss": 0.681, "step": 21960 }, { "epoch": 6.3204833141542, "grad_norm": 1.6552221775054932, "learning_rate": 0.001873590333716916, "loss": 0.6811, "step": 21970 }, { "epoch": 6.323360184119678, "grad_norm": 1.4178245067596436, "learning_rate": 0.0018735327963176065, "loss": 0.7472, "step": 21980 }, { "epoch": 6.326237054085155, "grad_norm": 1.2990282773971558, "learning_rate": 0.0018734752589182968, "loss": 0.9587, "step": 21990 }, { "epoch": 6.329113924050633, "grad_norm": 1.01025390625, "learning_rate": 0.0018734177215189874, "loss": 0.712, "step": 22000 }, { "epoch": 6.331990794016111, "grad_norm": 1.092257022857666, "learning_rate": 0.001873360184119678, "loss": 0.8435, "step": 22010 }, { "epoch": 6.334867663981588, "grad_norm": 1.417286992073059, "learning_rate": 0.0018733026467203683, "loss": 0.8794, "step": 22020 }, { "epoch": 6.337744533947066, "grad_norm": 2.226421356201172, "learning_rate": 0.0018732451093210587, "loss": 0.8765, "step": 22030 }, { "epoch": 6.340621403912543, "grad_norm": 1.603339672088623, "learning_rate": 0.0018731875719217492, "loss": 0.7905, "step": 22040 }, { "epoch": 6.343498273878021, "grad_norm": 1.0556509494781494, "learning_rate": 0.0018731300345224396, "loss": 0.7576, "step": 22050 }, { "epoch": 6.346375143843498, "grad_norm": 1.3989602327346802, "learning_rate": 0.0018730724971231301, "loss": 0.8796, "step": 22060 }, { "epoch": 6.349252013808976, "grad_norm": 1.1199138164520264, "learning_rate": 0.0018730149597238207, "loss": 0.7511, "step": 22070 }, { "epoch": 6.352128883774453, "grad_norm": 1.2293930053710938, "learning_rate": 0.001872957422324511, "loss": 0.8645, "step": 22080 }, { "epoch": 6.355005753739931, "grad_norm": 1.2701517343521118, "learning_rate": 0.0018728998849252014, "loss": 0.75, "step": 22090 }, { "epoch": 6.357882623705408, "grad_norm": 1.9501324892044067, "learning_rate": 0.0018728423475258917, "loss": 0.8534, "step": 22100 }, { "epoch": 6.360759493670886, "grad_norm": 1.2494149208068848, "learning_rate": 0.0018727848101265823, "loss": 0.9568, "step": 22110 }, { "epoch": 6.363636363636363, "grad_norm": 1.7108992338180542, "learning_rate": 0.0018727272727272729, "loss": 0.7783, "step": 22120 }, { "epoch": 6.366513233601841, "grad_norm": 1.253549337387085, "learning_rate": 0.0018726697353279632, "loss": 0.8375, "step": 22130 }, { "epoch": 6.369390103567318, "grad_norm": 1.343493938446045, "learning_rate": 0.0018726121979286536, "loss": 0.7468, "step": 22140 }, { "epoch": 6.372266973532796, "grad_norm": 1.8449023962020874, "learning_rate": 0.0018725546605293441, "loss": 0.831, "step": 22150 }, { "epoch": 6.3751438434982735, "grad_norm": 2.5961663722991943, "learning_rate": 0.0018724971231300345, "loss": 0.84, "step": 22160 }, { "epoch": 6.378020713463751, "grad_norm": 1.0457130670547485, "learning_rate": 0.001872439585730725, "loss": 0.8603, "step": 22170 }, { "epoch": 6.380897583429229, "grad_norm": 1.2282530069351196, "learning_rate": 0.0018723820483314156, "loss": 0.7516, "step": 22180 }, { "epoch": 6.3837744533947065, "grad_norm": 2.1010844707489014, "learning_rate": 0.001872324510932106, "loss": 0.9167, "step": 22190 }, { "epoch": 6.3866513233601845, "grad_norm": 1.8757164478302002, "learning_rate": 0.0018722669735327963, "loss": 0.8312, "step": 22200 }, { "epoch": 6.389528193325662, "grad_norm": 1.4034266471862793, "learning_rate": 0.0018722094361334866, "loss": 0.8255, "step": 22210 }, { "epoch": 6.3924050632911396, "grad_norm": 1.7377581596374512, "learning_rate": 0.0018721518987341772, "loss": 0.8335, "step": 22220 }, { "epoch": 6.395281933256617, "grad_norm": 1.1642025709152222, "learning_rate": 0.0018720943613348678, "loss": 0.8021, "step": 22230 }, { "epoch": 6.398158803222095, "grad_norm": 1.065102458000183, "learning_rate": 0.0018720368239355581, "loss": 0.8456, "step": 22240 }, { "epoch": 6.401035673187572, "grad_norm": 1.67940092086792, "learning_rate": 0.0018719792865362487, "loss": 1.0252, "step": 22250 }, { "epoch": 6.40391254315305, "grad_norm": 2.41098952293396, "learning_rate": 0.001871921749136939, "loss": 0.6849, "step": 22260 }, { "epoch": 6.406789413118527, "grad_norm": 1.489386796951294, "learning_rate": 0.0018718642117376294, "loss": 0.7921, "step": 22270 }, { "epoch": 6.409666283084005, "grad_norm": 1.1807303428649902, "learning_rate": 0.00187180667433832, "loss": 0.768, "step": 22280 }, { "epoch": 6.412543153049482, "grad_norm": 1.6904383897781372, "learning_rate": 0.0018717491369390105, "loss": 0.9135, "step": 22290 }, { "epoch": 6.41542002301496, "grad_norm": 1.6018290519714355, "learning_rate": 0.0018716915995397009, "loss": 0.7134, "step": 22300 }, { "epoch": 6.418296892980437, "grad_norm": 0.9894108176231384, "learning_rate": 0.0018716340621403914, "loss": 0.7287, "step": 22310 }, { "epoch": 6.421173762945915, "grad_norm": 2.0159552097320557, "learning_rate": 0.0018715765247410815, "loss": 1.0507, "step": 22320 }, { "epoch": 6.424050632911392, "grad_norm": 1.112050175666809, "learning_rate": 0.0018715189873417721, "loss": 0.6901, "step": 22330 }, { "epoch": 6.42692750287687, "grad_norm": 1.405189037322998, "learning_rate": 0.0018714614499424627, "loss": 0.8343, "step": 22340 }, { "epoch": 6.429804372842348, "grad_norm": 1.5942046642303467, "learning_rate": 0.001871403912543153, "loss": 0.6779, "step": 22350 }, { "epoch": 6.432681242807825, "grad_norm": 1.5951405763626099, "learning_rate": 0.0018713463751438436, "loss": 0.7793, "step": 22360 }, { "epoch": 6.435558112773303, "grad_norm": 1.1436665058135986, "learning_rate": 0.0018712888377445341, "loss": 0.7937, "step": 22370 }, { "epoch": 6.43843498273878, "grad_norm": 2.025245428085327, "learning_rate": 0.0018712313003452243, "loss": 0.8785, "step": 22380 }, { "epoch": 6.441311852704258, "grad_norm": 1.455393671989441, "learning_rate": 0.0018711737629459148, "loss": 0.7459, "step": 22390 }, { "epoch": 6.444188722669735, "grad_norm": 0.899038553237915, "learning_rate": 0.0018711162255466054, "loss": 0.6492, "step": 22400 }, { "epoch": 6.447065592635213, "grad_norm": 1.4938668012619019, "learning_rate": 0.0018710586881472958, "loss": 0.8921, "step": 22410 }, { "epoch": 6.44994246260069, "grad_norm": 1.5601818561553955, "learning_rate": 0.0018710011507479863, "loss": 0.7502, "step": 22420 }, { "epoch": 6.452819332566168, "grad_norm": 1.8362230062484741, "learning_rate": 0.0018709436133486767, "loss": 0.6929, "step": 22430 }, { "epoch": 6.455696202531645, "grad_norm": 1.120806336402893, "learning_rate": 0.001870886075949367, "loss": 0.7066, "step": 22440 }, { "epoch": 6.458573072497123, "grad_norm": 1.9845675230026245, "learning_rate": 0.0018708285385500576, "loss": 1.0661, "step": 22450 }, { "epoch": 6.4614499424626, "grad_norm": 1.8086544275283813, "learning_rate": 0.001870771001150748, "loss": 0.7657, "step": 22460 }, { "epoch": 6.464326812428078, "grad_norm": 1.160261869430542, "learning_rate": 0.0018707134637514385, "loss": 0.8528, "step": 22470 }, { "epoch": 6.4672036823935555, "grad_norm": 2.1007533073425293, "learning_rate": 0.001870655926352129, "loss": 0.8395, "step": 22480 }, { "epoch": 6.4700805523590335, "grad_norm": 1.7411973476409912, "learning_rate": 0.0018705983889528194, "loss": 0.8346, "step": 22490 }, { "epoch": 6.4729574223245105, "grad_norm": 1.9917887449264526, "learning_rate": 0.0018705408515535097, "loss": 0.8431, "step": 22500 }, { "epoch": 6.4758342922899885, "grad_norm": 1.0555400848388672, "learning_rate": 0.0018704833141542003, "loss": 0.797, "step": 22510 }, { "epoch": 6.4787111622554665, "grad_norm": 1.4290350675582886, "learning_rate": 0.0018704257767548907, "loss": 0.8306, "step": 22520 }, { "epoch": 6.481588032220944, "grad_norm": 1.019796371459961, "learning_rate": 0.0018703682393555812, "loss": 0.7665, "step": 22530 }, { "epoch": 6.484464902186422, "grad_norm": 0.9541434049606323, "learning_rate": 0.0018703107019562716, "loss": 1.0609, "step": 22540 }, { "epoch": 6.487341772151899, "grad_norm": 2.0029289722442627, "learning_rate": 0.0018702531645569621, "loss": 0.8461, "step": 22550 }, { "epoch": 6.490218642117377, "grad_norm": 1.4390504360198975, "learning_rate": 0.0018701956271576525, "loss": 0.9951, "step": 22560 }, { "epoch": 6.493095512082854, "grad_norm": 1.062811255455017, "learning_rate": 0.0018701380897583428, "loss": 0.8792, "step": 22570 }, { "epoch": 6.495972382048332, "grad_norm": 1.1762406826019287, "learning_rate": 0.0018700805523590334, "loss": 0.7647, "step": 22580 }, { "epoch": 6.498849252013809, "grad_norm": 2.1311981678009033, "learning_rate": 0.001870023014959724, "loss": 0.916, "step": 22590 }, { "epoch": 6.501726121979287, "grad_norm": 1.192299723625183, "learning_rate": 0.0018699654775604143, "loss": 0.8369, "step": 22600 }, { "epoch": 6.504602991944764, "grad_norm": 1.2911070585250854, "learning_rate": 0.0018699079401611049, "loss": 0.7759, "step": 22610 }, { "epoch": 6.507479861910242, "grad_norm": 1.2915656566619873, "learning_rate": 0.0018698504027617952, "loss": 0.8836, "step": 22620 }, { "epoch": 6.510356731875719, "grad_norm": 0.7135539054870605, "learning_rate": 0.0018697928653624856, "loss": 0.8231, "step": 22630 }, { "epoch": 6.513233601841197, "grad_norm": 1.0987820625305176, "learning_rate": 0.0018697353279631761, "loss": 0.5867, "step": 22640 }, { "epoch": 6.516110471806674, "grad_norm": 1.2704371213912964, "learning_rate": 0.0018696777905638665, "loss": 1.0008, "step": 22650 }, { "epoch": 6.518987341772152, "grad_norm": 1.2281101942062378, "learning_rate": 0.001869620253164557, "loss": 0.699, "step": 22660 }, { "epoch": 6.521864211737629, "grad_norm": 1.6170904636383057, "learning_rate": 0.0018695627157652476, "loss": 0.88, "step": 22670 }, { "epoch": 6.524741081703107, "grad_norm": 1.2196927070617676, "learning_rate": 0.0018695051783659377, "loss": 0.8399, "step": 22680 }, { "epoch": 6.527617951668585, "grad_norm": 2.6560890674591064, "learning_rate": 0.0018694476409666283, "loss": 0.7663, "step": 22690 }, { "epoch": 6.530494821634062, "grad_norm": 1.9489436149597168, "learning_rate": 0.0018693901035673189, "loss": 0.8961, "step": 22700 }, { "epoch": 6.533371691599539, "grad_norm": 1.2141902446746826, "learning_rate": 0.0018693325661680092, "loss": 1.0317, "step": 22710 }, { "epoch": 6.536248561565017, "grad_norm": 1.3737022876739502, "learning_rate": 0.0018692750287686998, "loss": 0.8202, "step": 22720 }, { "epoch": 6.539125431530495, "grad_norm": 4.233468532562256, "learning_rate": 0.0018692174913693903, "loss": 1.1164, "step": 22730 }, { "epoch": 6.542002301495972, "grad_norm": 1.2638829946517944, "learning_rate": 0.0018691599539700805, "loss": 1.0116, "step": 22740 }, { "epoch": 6.54487917146145, "grad_norm": 1.7522541284561157, "learning_rate": 0.001869102416570771, "loss": 0.8075, "step": 22750 }, { "epoch": 6.547756041426927, "grad_norm": 3.204974412918091, "learning_rate": 0.0018690448791714616, "loss": 0.943, "step": 22760 }, { "epoch": 6.550632911392405, "grad_norm": 1.3280909061431885, "learning_rate": 0.001868987341772152, "loss": 0.7733, "step": 22770 }, { "epoch": 6.553509781357882, "grad_norm": 1.0979479551315308, "learning_rate": 0.0018689298043728425, "loss": 0.8168, "step": 22780 }, { "epoch": 6.55638665132336, "grad_norm": 1.5025073289871216, "learning_rate": 0.0018688722669735328, "loss": 0.8177, "step": 22790 }, { "epoch": 6.5592635212888375, "grad_norm": 1.3205784559249878, "learning_rate": 0.0018688147295742232, "loss": 0.9069, "step": 22800 }, { "epoch": 6.5621403912543155, "grad_norm": 1.9773536920547485, "learning_rate": 0.0018687571921749138, "loss": 0.7824, "step": 22810 }, { "epoch": 6.565017261219793, "grad_norm": 0.8207451701164246, "learning_rate": 0.001868699654775604, "loss": 0.8631, "step": 22820 }, { "epoch": 6.5678941311852705, "grad_norm": 1.4940567016601562, "learning_rate": 0.0018686421173762947, "loss": 0.6464, "step": 22830 }, { "epoch": 6.570771001150748, "grad_norm": 1.4321447610855103, "learning_rate": 0.0018685845799769852, "loss": 0.9006, "step": 22840 }, { "epoch": 6.573647871116226, "grad_norm": 0.8397804498672485, "learning_rate": 0.0018685270425776756, "loss": 1.0259, "step": 22850 }, { "epoch": 6.576524741081704, "grad_norm": 0.8362016081809998, "learning_rate": 0.001868469505178366, "loss": 0.6862, "step": 22860 }, { "epoch": 6.579401611047181, "grad_norm": 1.6837555170059204, "learning_rate": 0.0018684119677790565, "loss": 0.8211, "step": 22870 }, { "epoch": 6.582278481012658, "grad_norm": 0.740195631980896, "learning_rate": 0.0018683544303797468, "loss": 0.9328, "step": 22880 }, { "epoch": 6.585155350978136, "grad_norm": 1.8164467811584473, "learning_rate": 0.0018682968929804374, "loss": 0.7153, "step": 22890 }, { "epoch": 6.588032220943614, "grad_norm": 0.9919025897979736, "learning_rate": 0.0018682393555811277, "loss": 0.665, "step": 22900 }, { "epoch": 6.590909090909091, "grad_norm": 1.4866917133331299, "learning_rate": 0.0018681818181818183, "loss": 0.968, "step": 22910 }, { "epoch": 6.593785960874569, "grad_norm": 1.745827555656433, "learning_rate": 0.0018681242807825087, "loss": 0.9478, "step": 22920 }, { "epoch": 6.596662830840046, "grad_norm": 0.978646993637085, "learning_rate": 0.001868066743383199, "loss": 0.86, "step": 22930 }, { "epoch": 6.599539700805524, "grad_norm": 0.9466322064399719, "learning_rate": 0.0018680092059838896, "loss": 0.8691, "step": 22940 }, { "epoch": 6.602416570771001, "grad_norm": 1.8693230152130127, "learning_rate": 0.0018679516685845801, "loss": 0.6931, "step": 22950 }, { "epoch": 6.605293440736479, "grad_norm": 1.399815559387207, "learning_rate": 0.0018678941311852705, "loss": 0.6974, "step": 22960 }, { "epoch": 6.608170310701956, "grad_norm": 0.9927239418029785, "learning_rate": 0.0018678365937859608, "loss": 0.7944, "step": 22970 }, { "epoch": 6.611047180667434, "grad_norm": 1.0336636304855347, "learning_rate": 0.0018677790563866514, "loss": 0.9568, "step": 22980 }, { "epoch": 6.613924050632911, "grad_norm": 0.8669326901435852, "learning_rate": 0.0018677215189873417, "loss": 0.8216, "step": 22990 }, { "epoch": 6.616800920598389, "grad_norm": 1.3917171955108643, "learning_rate": 0.0018676639815880323, "loss": 1.0032, "step": 23000 }, { "epoch": 6.619677790563866, "grad_norm": 1.891538381576538, "learning_rate": 0.0018676064441887226, "loss": 0.8162, "step": 23010 }, { "epoch": 6.622554660529344, "grad_norm": 1.349731206893921, "learning_rate": 0.0018675489067894132, "loss": 0.9965, "step": 23020 }, { "epoch": 6.625431530494821, "grad_norm": 1.4854298830032349, "learning_rate": 0.0018674913693901036, "loss": 0.9683, "step": 23030 }, { "epoch": 6.628308400460299, "grad_norm": 0.619259238243103, "learning_rate": 0.001867433831990794, "loss": 0.698, "step": 23040 }, { "epoch": 6.631185270425776, "grad_norm": 1.8138798475265503, "learning_rate": 0.0018673762945914845, "loss": 0.9936, "step": 23050 }, { "epoch": 6.634062140391254, "grad_norm": 0.9442638158798218, "learning_rate": 0.001867318757192175, "loss": 0.8658, "step": 23060 }, { "epoch": 6.636939010356732, "grad_norm": 3.312450885772705, "learning_rate": 0.0018672612197928654, "loss": 0.6741, "step": 23070 }, { "epoch": 6.639815880322209, "grad_norm": 1.6112607717514038, "learning_rate": 0.001867203682393556, "loss": 0.8612, "step": 23080 }, { "epoch": 6.642692750287687, "grad_norm": 0.8224934935569763, "learning_rate": 0.0018671461449942463, "loss": 0.7547, "step": 23090 }, { "epoch": 6.6455696202531644, "grad_norm": 0.8839603066444397, "learning_rate": 0.0018670886075949366, "loss": 0.9003, "step": 23100 }, { "epoch": 6.648446490218642, "grad_norm": 1.490288496017456, "learning_rate": 0.0018670310701956272, "loss": 0.8625, "step": 23110 }, { "epoch": 6.6513233601841195, "grad_norm": 1.903262734413147, "learning_rate": 0.0018669735327963176, "loss": 0.8579, "step": 23120 }, { "epoch": 6.6542002301495975, "grad_norm": 1.1322821378707886, "learning_rate": 0.0018669159953970081, "loss": 0.8249, "step": 23130 }, { "epoch": 6.657077100115075, "grad_norm": 2.893160581588745, "learning_rate": 0.0018668584579976987, "loss": 0.9832, "step": 23140 }, { "epoch": 6.659953970080553, "grad_norm": 2.329183340072632, "learning_rate": 0.0018668009205983888, "loss": 0.981, "step": 23150 }, { "epoch": 6.66283084004603, "grad_norm": 1.3591375350952148, "learning_rate": 0.0018667433831990794, "loss": 0.8033, "step": 23160 }, { "epoch": 6.665707710011508, "grad_norm": 0.9950786232948303, "learning_rate": 0.00186668584579977, "loss": 0.8964, "step": 23170 }, { "epoch": 6.668584579976985, "grad_norm": 1.1894279718399048, "learning_rate": 0.0018666283084004603, "loss": 0.9103, "step": 23180 }, { "epoch": 6.671461449942463, "grad_norm": 8.30463981628418, "learning_rate": 0.0018665707710011508, "loss": 0.7502, "step": 23190 }, { "epoch": 6.67433831990794, "grad_norm": 1.5505201816558838, "learning_rate": 0.0018665132336018414, "loss": 0.6876, "step": 23200 }, { "epoch": 6.677215189873418, "grad_norm": 1.8770995140075684, "learning_rate": 0.0018664556962025315, "loss": 0.7131, "step": 23210 }, { "epoch": 6.680092059838895, "grad_norm": 0.9594983458518982, "learning_rate": 0.001866398158803222, "loss": 0.9392, "step": 23220 }, { "epoch": 6.682968929804373, "grad_norm": 0.9133108854293823, "learning_rate": 0.0018663406214039125, "loss": 0.6375, "step": 23230 }, { "epoch": 6.685845799769851, "grad_norm": 1.2609505653381348, "learning_rate": 0.001866283084004603, "loss": 0.8877, "step": 23240 }, { "epoch": 6.688722669735328, "grad_norm": 1.2857723236083984, "learning_rate": 0.0018662255466052936, "loss": 0.6808, "step": 23250 }, { "epoch": 6.691599539700806, "grad_norm": 1.3778085708618164, "learning_rate": 0.001866168009205984, "loss": 0.8039, "step": 23260 }, { "epoch": 6.694476409666283, "grad_norm": 2.0483310222625732, "learning_rate": 0.0018661104718066743, "loss": 0.8685, "step": 23270 }, { "epoch": 6.697353279631761, "grad_norm": 1.0854581594467163, "learning_rate": 0.0018660529344073648, "loss": 0.9453, "step": 23280 }, { "epoch": 6.700230149597238, "grad_norm": 1.1029833555221558, "learning_rate": 0.0018659953970080552, "loss": 0.7563, "step": 23290 }, { "epoch": 6.703107019562716, "grad_norm": 0.9031062126159668, "learning_rate": 0.0018659378596087458, "loss": 0.7692, "step": 23300 }, { "epoch": 6.705983889528193, "grad_norm": 0.847251296043396, "learning_rate": 0.0018658803222094363, "loss": 0.8968, "step": 23310 }, { "epoch": 6.708860759493671, "grad_norm": 2.549675703048706, "learning_rate": 0.0018658227848101267, "loss": 0.8548, "step": 23320 }, { "epoch": 6.711737629459148, "grad_norm": 1.0680137872695923, "learning_rate": 0.001865765247410817, "loss": 0.9114, "step": 23330 }, { "epoch": 6.714614499424626, "grad_norm": 1.7978214025497437, "learning_rate": 0.0018657077100115074, "loss": 0.8544, "step": 23340 }, { "epoch": 6.717491369390103, "grad_norm": 1.0211386680603027, "learning_rate": 0.001865650172612198, "loss": 0.9254, "step": 23350 }, { "epoch": 6.720368239355581, "grad_norm": 1.4849014282226562, "learning_rate": 0.0018655926352128885, "loss": 0.9958, "step": 23360 }, { "epoch": 6.723245109321058, "grad_norm": 1.2521026134490967, "learning_rate": 0.0018655350978135788, "loss": 0.8759, "step": 23370 }, { "epoch": 6.726121979286536, "grad_norm": 1.3056628704071045, "learning_rate": 0.0018654775604142694, "loss": 0.7737, "step": 23380 }, { "epoch": 6.728998849252013, "grad_norm": 0.952542781829834, "learning_rate": 0.0018654200230149597, "loss": 0.6843, "step": 23390 }, { "epoch": 6.731875719217491, "grad_norm": 2.5109705924987793, "learning_rate": 0.00186536248561565, "loss": 1.0789, "step": 23400 }, { "epoch": 6.734752589182969, "grad_norm": 2.120013952255249, "learning_rate": 0.0018653049482163407, "loss": 1.0159, "step": 23410 }, { "epoch": 6.7376294591484465, "grad_norm": 1.9231702089309692, "learning_rate": 0.0018652474108170312, "loss": 1.0643, "step": 23420 }, { "epoch": 6.740506329113924, "grad_norm": 0.6535977721214294, "learning_rate": 0.0018651898734177216, "loss": 0.7663, "step": 23430 }, { "epoch": 6.7433831990794015, "grad_norm": 1.1869900226593018, "learning_rate": 0.0018651323360184121, "loss": 0.8965, "step": 23440 }, { "epoch": 6.7462600690448795, "grad_norm": 1.6200087070465088, "learning_rate": 0.0018650747986191025, "loss": 0.949, "step": 23450 }, { "epoch": 6.749136939010357, "grad_norm": 1.0458989143371582, "learning_rate": 0.0018650172612197928, "loss": 0.7778, "step": 23460 }, { "epoch": 6.752013808975835, "grad_norm": 1.342223048210144, "learning_rate": 0.0018649597238204834, "loss": 0.7854, "step": 23470 }, { "epoch": 6.754890678941312, "grad_norm": 0.8881583213806152, "learning_rate": 0.0018649021864211737, "loss": 0.8457, "step": 23480 }, { "epoch": 6.75776754890679, "grad_norm": 1.4694401025772095, "learning_rate": 0.0018648446490218643, "loss": 0.9197, "step": 23490 }, { "epoch": 6.760644418872267, "grad_norm": 1.6557635068893433, "learning_rate": 0.0018647871116225549, "loss": 0.9266, "step": 23500 }, { "epoch": 6.763521288837745, "grad_norm": 1.3914053440093994, "learning_rate": 0.001864729574223245, "loss": 0.8646, "step": 23510 }, { "epoch": 6.766398158803222, "grad_norm": 1.6667438745498657, "learning_rate": 0.0018646720368239356, "loss": 0.871, "step": 23520 }, { "epoch": 6.7692750287687, "grad_norm": 1.2079215049743652, "learning_rate": 0.0018646144994246261, "loss": 0.7742, "step": 23530 }, { "epoch": 6.772151898734177, "grad_norm": 0.8302589654922485, "learning_rate": 0.0018645569620253165, "loss": 0.8964, "step": 23540 }, { "epoch": 6.775028768699655, "grad_norm": 1.1591852903366089, "learning_rate": 0.001864499424626007, "loss": 0.8556, "step": 23550 }, { "epoch": 6.777905638665132, "grad_norm": 1.8599984645843506, "learning_rate": 0.0018644418872266976, "loss": 1.1433, "step": 23560 }, { "epoch": 6.78078250863061, "grad_norm": 0.9713876247406006, "learning_rate": 0.0018643843498273877, "loss": 0.6629, "step": 23570 }, { "epoch": 6.783659378596088, "grad_norm": 2.579105854034424, "learning_rate": 0.0018643268124280783, "loss": 0.8536, "step": 23580 }, { "epoch": 6.786536248561565, "grad_norm": 1.6977778673171997, "learning_rate": 0.0018642692750287686, "loss": 0.8817, "step": 23590 }, { "epoch": 6.789413118527042, "grad_norm": 1.8639636039733887, "learning_rate": 0.0018642117376294592, "loss": 0.9178, "step": 23600 }, { "epoch": 6.79228998849252, "grad_norm": 1.5324331521987915, "learning_rate": 0.0018641542002301498, "loss": 0.7869, "step": 23610 }, { "epoch": 6.795166858457998, "grad_norm": 1.7861214876174927, "learning_rate": 0.0018640966628308401, "loss": 0.8319, "step": 23620 }, { "epoch": 6.798043728423475, "grad_norm": 2.8779613971710205, "learning_rate": 0.0018640391254315305, "loss": 0.9015, "step": 23630 }, { "epoch": 6.800920598388953, "grad_norm": 1.478090763092041, "learning_rate": 0.001863981588032221, "loss": 0.8843, "step": 23640 }, { "epoch": 6.80379746835443, "grad_norm": 0.8184416890144348, "learning_rate": 0.0018639240506329114, "loss": 0.6783, "step": 23650 }, { "epoch": 6.806674338319908, "grad_norm": 1.122390866279602, "learning_rate": 0.001863866513233602, "loss": 0.8574, "step": 23660 }, { "epoch": 6.809551208285385, "grad_norm": 1.1694358587265015, "learning_rate": 0.0018638089758342925, "loss": 0.8356, "step": 23670 }, { "epoch": 6.812428078250863, "grad_norm": 0.9786534309387207, "learning_rate": 0.0018637514384349828, "loss": 0.9108, "step": 23680 }, { "epoch": 6.81530494821634, "grad_norm": 1.0100358724594116, "learning_rate": 0.0018636939010356732, "loss": 0.7508, "step": 23690 }, { "epoch": 6.818181818181818, "grad_norm": 1.0375893115997314, "learning_rate": 0.0018636363636363635, "loss": 0.7676, "step": 23700 }, { "epoch": 6.821058688147295, "grad_norm": 2.0576350688934326, "learning_rate": 0.001863578826237054, "loss": 0.8879, "step": 23710 }, { "epoch": 6.823935558112773, "grad_norm": 2.170830488204956, "learning_rate": 0.0018635212888377447, "loss": 0.8271, "step": 23720 }, { "epoch": 6.8268124280782505, "grad_norm": 1.3289843797683716, "learning_rate": 0.001863463751438435, "loss": 0.8129, "step": 23730 }, { "epoch": 6.8296892980437285, "grad_norm": 0.7820063829421997, "learning_rate": 0.0018634062140391256, "loss": 0.9523, "step": 23740 }, { "epoch": 6.8325661680092065, "grad_norm": 1.2161520719528198, "learning_rate": 0.001863348676639816, "loss": 1.019, "step": 23750 }, { "epoch": 6.8354430379746836, "grad_norm": 1.3795870542526245, "learning_rate": 0.0018632911392405063, "loss": 0.8378, "step": 23760 }, { "epoch": 6.838319907940161, "grad_norm": 1.40693199634552, "learning_rate": 0.0018632336018411968, "loss": 0.7973, "step": 23770 }, { "epoch": 6.841196777905639, "grad_norm": 1.0707569122314453, "learning_rate": 0.0018631760644418874, "loss": 0.7848, "step": 23780 }, { "epoch": 6.844073647871117, "grad_norm": 1.898012638092041, "learning_rate": 0.0018631185270425777, "loss": 1.0277, "step": 23790 }, { "epoch": 6.846950517836594, "grad_norm": 1.3312591314315796, "learning_rate": 0.001863060989643268, "loss": 0.7998, "step": 23800 }, { "epoch": 6.849827387802072, "grad_norm": 1.0164809226989746, "learning_rate": 0.0018630034522439584, "loss": 1.1155, "step": 23810 }, { "epoch": 6.852704257767549, "grad_norm": 1.6402963399887085, "learning_rate": 0.001862945914844649, "loss": 0.9028, "step": 23820 }, { "epoch": 6.855581127733027, "grad_norm": 0.6914053559303284, "learning_rate": 0.0018628883774453396, "loss": 0.6802, "step": 23830 }, { "epoch": 6.858457997698504, "grad_norm": 0.8059312701225281, "learning_rate": 0.00186283084004603, "loss": 0.65, "step": 23840 }, { "epoch": 6.861334867663982, "grad_norm": 1.0109608173370361, "learning_rate": 0.0018627733026467205, "loss": 0.8658, "step": 23850 }, { "epoch": 6.864211737629459, "grad_norm": 1.1026159524917603, "learning_rate": 0.0018627157652474108, "loss": 1.0285, "step": 23860 }, { "epoch": 6.867088607594937, "grad_norm": 1.2764006853103638, "learning_rate": 0.0018626582278481012, "loss": 0.7748, "step": 23870 }, { "epoch": 6.869965477560414, "grad_norm": 1.4243519306182861, "learning_rate": 0.0018626006904487917, "loss": 0.7727, "step": 23880 }, { "epoch": 6.872842347525892, "grad_norm": 2.134209394454956, "learning_rate": 0.0018625431530494823, "loss": 0.9938, "step": 23890 }, { "epoch": 6.875719217491369, "grad_norm": 0.6667572855949402, "learning_rate": 0.0018624856156501726, "loss": 0.8994, "step": 23900 }, { "epoch": 6.878596087456847, "grad_norm": 1.0676194429397583, "learning_rate": 0.0018624280782508632, "loss": 0.7021, "step": 23910 }, { "epoch": 6.881472957422325, "grad_norm": 2.577207088470459, "learning_rate": 0.0018623705408515533, "loss": 0.992, "step": 23920 }, { "epoch": 6.884349827387802, "grad_norm": 1.584971308708191, "learning_rate": 0.001862313003452244, "loss": 0.7187, "step": 23930 }, { "epoch": 6.887226697353279, "grad_norm": 1.7354816198349, "learning_rate": 0.0018622554660529345, "loss": 0.7734, "step": 23940 }, { "epoch": 6.890103567318757, "grad_norm": 1.8738511800765991, "learning_rate": 0.0018621979286536248, "loss": 0.9186, "step": 23950 }, { "epoch": 6.892980437284235, "grad_norm": 1.7882672548294067, "learning_rate": 0.0018621403912543154, "loss": 0.9008, "step": 23960 }, { "epoch": 6.895857307249712, "grad_norm": 1.0245367288589478, "learning_rate": 0.001862082853855006, "loss": 0.6616, "step": 23970 }, { "epoch": 6.89873417721519, "grad_norm": 1.2876091003417969, "learning_rate": 0.001862025316455696, "loss": 1.0688, "step": 23980 }, { "epoch": 6.901611047180667, "grad_norm": 1.2703688144683838, "learning_rate": 0.0018619677790563866, "loss": 0.8796, "step": 23990 }, { "epoch": 6.904487917146145, "grad_norm": 1.2554259300231934, "learning_rate": 0.0018619102416570772, "loss": 0.9785, "step": 24000 }, { "epoch": 6.907364787111622, "grad_norm": 1.5568939447402954, "learning_rate": 0.0018618527042577675, "loss": 0.7668, "step": 24010 }, { "epoch": 6.9102416570771, "grad_norm": 1.8851290941238403, "learning_rate": 0.0018617951668584581, "loss": 1.0412, "step": 24020 }, { "epoch": 6.9131185270425775, "grad_norm": 1.3927310705184937, "learning_rate": 0.0018617376294591487, "loss": 0.9441, "step": 24030 }, { "epoch": 6.915995397008055, "grad_norm": 2.097687005996704, "learning_rate": 0.0018616800920598388, "loss": 0.863, "step": 24040 }, { "epoch": 6.9188722669735325, "grad_norm": 1.3652503490447998, "learning_rate": 0.0018616225546605294, "loss": 0.7974, "step": 24050 }, { "epoch": 6.9217491369390105, "grad_norm": 1.1643959283828735, "learning_rate": 0.0018615650172612197, "loss": 0.8303, "step": 24060 }, { "epoch": 6.924626006904488, "grad_norm": 0.8903191685676575, "learning_rate": 0.0018615074798619103, "loss": 0.7722, "step": 24070 }, { "epoch": 6.927502876869966, "grad_norm": 1.3339979648590088, "learning_rate": 0.0018614499424626008, "loss": 0.9333, "step": 24080 }, { "epoch": 6.930379746835443, "grad_norm": 0.7173075675964355, "learning_rate": 0.0018613924050632912, "loss": 0.9425, "step": 24090 }, { "epoch": 6.933256616800921, "grad_norm": 1.6851234436035156, "learning_rate": 0.0018613348676639815, "loss": 0.9156, "step": 24100 }, { "epoch": 6.936133486766398, "grad_norm": 1.4369927644729614, "learning_rate": 0.001861277330264672, "loss": 0.9872, "step": 24110 }, { "epoch": 6.939010356731876, "grad_norm": 1.2301831245422363, "learning_rate": 0.0018612197928653625, "loss": 0.8334, "step": 24120 }, { "epoch": 6.941887226697354, "grad_norm": 3.268782377243042, "learning_rate": 0.001861162255466053, "loss": 0.6641, "step": 24130 }, { "epoch": 6.944764096662831, "grad_norm": 1.350490689277649, "learning_rate": 0.0018611047180667436, "loss": 0.7448, "step": 24140 }, { "epoch": 6.947640966628309, "grad_norm": 1.2043043375015259, "learning_rate": 0.001861047180667434, "loss": 0.8726, "step": 24150 }, { "epoch": 6.950517836593786, "grad_norm": 1.431544542312622, "learning_rate": 0.0018609896432681243, "loss": 0.9545, "step": 24160 }, { "epoch": 6.953394706559264, "grad_norm": 2.255891799926758, "learning_rate": 0.0018609321058688146, "loss": 0.9333, "step": 24170 }, { "epoch": 6.956271576524741, "grad_norm": 2.2299513816833496, "learning_rate": 0.0018608745684695052, "loss": 0.8115, "step": 24180 }, { "epoch": 6.959148446490219, "grad_norm": 2.2330050468444824, "learning_rate": 0.0018608170310701957, "loss": 0.9769, "step": 24190 }, { "epoch": 6.962025316455696, "grad_norm": 1.192506194114685, "learning_rate": 0.001860759493670886, "loss": 0.917, "step": 24200 }, { "epoch": 6.964902186421174, "grad_norm": 1.6761173009872437, "learning_rate": 0.0018607019562715767, "loss": 0.9859, "step": 24210 }, { "epoch": 6.967779056386651, "grad_norm": 1.7216356992721558, "learning_rate": 0.001860644418872267, "loss": 1.0061, "step": 24220 }, { "epoch": 6.970655926352129, "grad_norm": 1.1555545330047607, "learning_rate": 0.0018605868814729574, "loss": 0.7564, "step": 24230 }, { "epoch": 6.973532796317606, "grad_norm": 0.8774792551994324, "learning_rate": 0.001860529344073648, "loss": 0.9926, "step": 24240 }, { "epoch": 6.976409666283084, "grad_norm": 1.4658117294311523, "learning_rate": 0.0018604718066743385, "loss": 0.9, "step": 24250 }, { "epoch": 6.979286536248561, "grad_norm": 2.8233776092529297, "learning_rate": 0.0018604142692750288, "loss": 0.9782, "step": 24260 }, { "epoch": 6.982163406214039, "grad_norm": 1.059935212135315, "learning_rate": 0.0018603567318757194, "loss": 0.7847, "step": 24270 }, { "epoch": 6.985040276179516, "grad_norm": 1.2142550945281982, "learning_rate": 0.0018602991944764095, "loss": 0.8952, "step": 24280 }, { "epoch": 6.987917146144994, "grad_norm": 1.5844627618789673, "learning_rate": 0.0018602416570771, "loss": 1.2783, "step": 24290 }, { "epoch": 6.990794016110472, "grad_norm": 0.7321116924285889, "learning_rate": 0.0018601841196777907, "loss": 0.9014, "step": 24300 }, { "epoch": 6.993670886075949, "grad_norm": 0.7748911380767822, "learning_rate": 0.001860126582278481, "loss": 0.7319, "step": 24310 }, { "epoch": 6.996547756041427, "grad_norm": 1.8490021228790283, "learning_rate": 0.0018600690448791716, "loss": 0.7857, "step": 24320 }, { "epoch": 6.999424626006904, "grad_norm": 0.8175060153007507, "learning_rate": 0.0018600115074798621, "loss": 0.8283, "step": 24330 }, { "epoch": 7.002301495972382, "grad_norm": 1.5574413537979126, "learning_rate": 0.0018599539700805523, "loss": 0.6764, "step": 24340 }, { "epoch": 7.0051783659378595, "grad_norm": 0.9588491320610046, "learning_rate": 0.0018598964326812428, "loss": 0.8161, "step": 24350 }, { "epoch": 7.0080552359033375, "grad_norm": 1.327111840248108, "learning_rate": 0.0018598388952819334, "loss": 0.6705, "step": 24360 }, { "epoch": 7.0109321058688145, "grad_norm": 1.3898853063583374, "learning_rate": 0.0018597813578826237, "loss": 0.7721, "step": 24370 }, { "epoch": 7.0138089758342925, "grad_norm": 1.240950107574463, "learning_rate": 0.0018597238204833143, "loss": 0.8399, "step": 24380 }, { "epoch": 7.01668584579977, "grad_norm": 1.0472424030303955, "learning_rate": 0.0018596662830840046, "loss": 0.7074, "step": 24390 }, { "epoch": 7.019562715765248, "grad_norm": 1.1847532987594604, "learning_rate": 0.001859608745684695, "loss": 1.0378, "step": 24400 }, { "epoch": 7.022439585730725, "grad_norm": 1.4694514274597168, "learning_rate": 0.0018595512082853856, "loss": 0.9022, "step": 24410 }, { "epoch": 7.025316455696203, "grad_norm": 0.9820579886436462, "learning_rate": 0.001859493670886076, "loss": 0.8452, "step": 24420 }, { "epoch": 7.02819332566168, "grad_norm": 1.3706533908843994, "learning_rate": 0.0018594361334867665, "loss": 0.916, "step": 24430 }, { "epoch": 7.031070195627158, "grad_norm": 1.5183253288269043, "learning_rate": 0.001859378596087457, "loss": 0.8433, "step": 24440 }, { "epoch": 7.033947065592635, "grad_norm": 1.6215482950210571, "learning_rate": 0.0018593210586881474, "loss": 0.7025, "step": 24450 }, { "epoch": 7.036823935558113, "grad_norm": 1.1402223110198975, "learning_rate": 0.0018592635212888377, "loss": 0.8204, "step": 24460 }, { "epoch": 7.03970080552359, "grad_norm": 1.3184820413589478, "learning_rate": 0.0018592059838895283, "loss": 0.8435, "step": 24470 }, { "epoch": 7.042577675489068, "grad_norm": 1.3389338254928589, "learning_rate": 0.0018591484464902186, "loss": 0.7796, "step": 24480 }, { "epoch": 7.045454545454546, "grad_norm": 1.2381961345672607, "learning_rate": 0.0018590909090909092, "loss": 0.7149, "step": 24490 }, { "epoch": 7.048331415420023, "grad_norm": 1.0322833061218262, "learning_rate": 0.0018590333716915995, "loss": 0.7383, "step": 24500 }, { "epoch": 7.051208285385501, "grad_norm": 1.4243699312210083, "learning_rate": 0.00185897583429229, "loss": 0.9191, "step": 24510 }, { "epoch": 7.054085155350978, "grad_norm": 1.4480832815170288, "learning_rate": 0.0018589182968929805, "loss": 0.8428, "step": 24520 }, { "epoch": 7.056962025316456, "grad_norm": 2.607999086380005, "learning_rate": 0.0018588607594936708, "loss": 0.7755, "step": 24530 }, { "epoch": 7.059838895281933, "grad_norm": 2.3348939418792725, "learning_rate": 0.0018588032220943614, "loss": 0.7696, "step": 24540 }, { "epoch": 7.062715765247411, "grad_norm": 1.6079384088516235, "learning_rate": 0.001858745684695052, "loss": 0.7609, "step": 24550 }, { "epoch": 7.065592635212888, "grad_norm": 2.3335678577423096, "learning_rate": 0.0018586881472957423, "loss": 0.9079, "step": 24560 }, { "epoch": 7.068469505178366, "grad_norm": 2.591440439224243, "learning_rate": 0.0018586306098964328, "loss": 0.7871, "step": 24570 }, { "epoch": 7.071346375143843, "grad_norm": 1.7837632894515991, "learning_rate": 0.0018585730724971232, "loss": 0.9254, "step": 24580 }, { "epoch": 7.074223245109321, "grad_norm": 1.4442585706710815, "learning_rate": 0.0018585155350978135, "loss": 0.7677, "step": 24590 }, { "epoch": 7.077100115074798, "grad_norm": 1.5795238018035889, "learning_rate": 0.001858457997698504, "loss": 0.8592, "step": 24600 }, { "epoch": 7.079976985040276, "grad_norm": 1.4802379608154297, "learning_rate": 0.0018584004602991944, "loss": 0.7525, "step": 24610 }, { "epoch": 7.082853855005753, "grad_norm": 2.2380740642547607, "learning_rate": 0.001858342922899885, "loss": 0.7218, "step": 24620 }, { "epoch": 7.085730724971231, "grad_norm": 1.4628403186798096, "learning_rate": 0.0018582853855005754, "loss": 0.6791, "step": 24630 }, { "epoch": 7.0886075949367084, "grad_norm": 0.9704036116600037, "learning_rate": 0.0018582278481012657, "loss": 0.8888, "step": 24640 }, { "epoch": 7.091484464902186, "grad_norm": 0.7783573865890503, "learning_rate": 0.0018581703107019563, "loss": 0.6821, "step": 24650 }, { "epoch": 7.094361334867664, "grad_norm": 1.417008638381958, "learning_rate": 0.0018581127733026468, "loss": 0.7519, "step": 24660 }, { "epoch": 7.0972382048331415, "grad_norm": 1.7206757068634033, "learning_rate": 0.0018580552359033372, "loss": 0.8463, "step": 24670 }, { "epoch": 7.1001150747986195, "grad_norm": 0.9341565370559692, "learning_rate": 0.0018579976985040277, "loss": 0.7215, "step": 24680 }, { "epoch": 7.102991944764097, "grad_norm": 0.949931800365448, "learning_rate": 0.001857940161104718, "loss": 0.7917, "step": 24690 }, { "epoch": 7.1058688147295745, "grad_norm": 0.9816468358039856, "learning_rate": 0.0018578826237054084, "loss": 0.901, "step": 24700 }, { "epoch": 7.108745684695052, "grad_norm": 1.3926774263381958, "learning_rate": 0.001857825086306099, "loss": 0.87, "step": 24710 }, { "epoch": 7.11162255466053, "grad_norm": 0.8591293692588806, "learning_rate": 0.0018577675489067896, "loss": 0.6885, "step": 24720 }, { "epoch": 7.114499424626007, "grad_norm": 2.3408420085906982, "learning_rate": 0.00185771001150748, "loss": 1.0166, "step": 24730 }, { "epoch": 7.117376294591485, "grad_norm": 2.4298861026763916, "learning_rate": 0.0018576524741081705, "loss": 0.786, "step": 24740 }, { "epoch": 7.120253164556962, "grad_norm": 1.2992955446243286, "learning_rate": 0.0018575949367088606, "loss": 0.8926, "step": 24750 }, { "epoch": 7.12313003452244, "grad_norm": 1.218329906463623, "learning_rate": 0.0018575373993095512, "loss": 0.8135, "step": 24760 }, { "epoch": 7.126006904487917, "grad_norm": 1.6505508422851562, "learning_rate": 0.0018574798619102417, "loss": 0.6851, "step": 24770 }, { "epoch": 7.128883774453395, "grad_norm": 2.135640859603882, "learning_rate": 0.001857422324510932, "loss": 0.7791, "step": 24780 }, { "epoch": 7.131760644418872, "grad_norm": 2.0987541675567627, "learning_rate": 0.0018573647871116226, "loss": 0.7417, "step": 24790 }, { "epoch": 7.13463751438435, "grad_norm": 1.2141493558883667, "learning_rate": 0.0018573072497123132, "loss": 0.7481, "step": 24800 }, { "epoch": 7.137514384349827, "grad_norm": 1.2970774173736572, "learning_rate": 0.0018572497123130033, "loss": 0.9735, "step": 24810 }, { "epoch": 7.140391254315305, "grad_norm": 0.8512192964553833, "learning_rate": 0.001857192174913694, "loss": 0.7247, "step": 24820 }, { "epoch": 7.143268124280782, "grad_norm": 1.0649888515472412, "learning_rate": 0.0018571346375143845, "loss": 0.7652, "step": 24830 }, { "epoch": 7.14614499424626, "grad_norm": 1.1983476877212524, "learning_rate": 0.0018570771001150748, "loss": 0.7379, "step": 24840 }, { "epoch": 7.149021864211738, "grad_norm": 1.3819382190704346, "learning_rate": 0.0018570195627157654, "loss": 0.7843, "step": 24850 }, { "epoch": 7.151898734177215, "grad_norm": 2.844937562942505, "learning_rate": 0.0018569620253164557, "loss": 0.9517, "step": 24860 }, { "epoch": 7.154775604142693, "grad_norm": 0.5879691243171692, "learning_rate": 0.001856904487917146, "loss": 0.834, "step": 24870 }, { "epoch": 7.15765247410817, "grad_norm": 1.0641194581985474, "learning_rate": 0.0018568469505178366, "loss": 0.8402, "step": 24880 }, { "epoch": 7.160529344073648, "grad_norm": 0.8687148094177246, "learning_rate": 0.001856789413118527, "loss": 0.7577, "step": 24890 }, { "epoch": 7.163406214039125, "grad_norm": 1.5790756940841675, "learning_rate": 0.0018567318757192175, "loss": 0.9049, "step": 24900 }, { "epoch": 7.166283084004603, "grad_norm": 0.997796356678009, "learning_rate": 0.0018566743383199081, "loss": 0.7195, "step": 24910 }, { "epoch": 7.16915995397008, "grad_norm": 1.3349343538284302, "learning_rate": 0.0018566168009205985, "loss": 0.8318, "step": 24920 }, { "epoch": 7.172036823935558, "grad_norm": 1.2450493574142456, "learning_rate": 0.0018565592635212888, "loss": 0.872, "step": 24930 }, { "epoch": 7.174913693901035, "grad_norm": 0.8032209277153015, "learning_rate": 0.0018565017261219794, "loss": 0.7336, "step": 24940 }, { "epoch": 7.177790563866513, "grad_norm": 1.335681438446045, "learning_rate": 0.0018564441887226697, "loss": 0.7894, "step": 24950 }, { "epoch": 7.1806674338319905, "grad_norm": 2.3567302227020264, "learning_rate": 0.0018563866513233603, "loss": 0.7455, "step": 24960 }, { "epoch": 7.1835443037974684, "grad_norm": 0.907030463218689, "learning_rate": 0.0018563291139240506, "loss": 0.6447, "step": 24970 }, { "epoch": 7.1864211737629455, "grad_norm": 2.2364609241485596, "learning_rate": 0.0018562715765247412, "loss": 0.7666, "step": 24980 }, { "epoch": 7.1892980437284235, "grad_norm": 1.389204740524292, "learning_rate": 0.0018562140391254315, "loss": 0.7683, "step": 24990 }, { "epoch": 7.192174913693901, "grad_norm": 1.470603585243225, "learning_rate": 0.0018561565017261219, "loss": 0.5975, "step": 25000 }, { "epoch": 7.195051783659379, "grad_norm": 0.9259714484214783, "learning_rate": 0.0018560989643268124, "loss": 0.8064, "step": 25010 }, { "epoch": 7.197928653624857, "grad_norm": 2.702932596206665, "learning_rate": 0.001856041426927503, "loss": 0.7564, "step": 25020 }, { "epoch": 7.200805523590334, "grad_norm": 0.9486038684844971, "learning_rate": 0.0018559838895281934, "loss": 0.707, "step": 25030 }, { "epoch": 7.203682393555812, "grad_norm": 1.3129498958587646, "learning_rate": 0.001855926352128884, "loss": 0.648, "step": 25040 }, { "epoch": 7.206559263521289, "grad_norm": 1.0823421478271484, "learning_rate": 0.0018558688147295743, "loss": 0.8138, "step": 25050 }, { "epoch": 7.209436133486767, "grad_norm": 1.675621509552002, "learning_rate": 0.0018558112773302646, "loss": 0.8294, "step": 25060 }, { "epoch": 7.212313003452244, "grad_norm": 2.635922431945801, "learning_rate": 0.0018557537399309552, "loss": 0.9191, "step": 25070 }, { "epoch": 7.215189873417722, "grad_norm": 0.9677117466926575, "learning_rate": 0.0018556962025316455, "loss": 0.7011, "step": 25080 }, { "epoch": 7.218066743383199, "grad_norm": 1.2999237775802612, "learning_rate": 0.001855638665132336, "loss": 0.7322, "step": 25090 }, { "epoch": 7.220943613348677, "grad_norm": 1.0056226253509521, "learning_rate": 0.0018555811277330267, "loss": 0.7797, "step": 25100 }, { "epoch": 7.223820483314154, "grad_norm": 1.6774603128433228, "learning_rate": 0.0018555235903337168, "loss": 0.9175, "step": 25110 }, { "epoch": 7.226697353279632, "grad_norm": 1.4267737865447998, "learning_rate": 0.0018554660529344074, "loss": 0.728, "step": 25120 }, { "epoch": 7.229574223245109, "grad_norm": 1.5381571054458618, "learning_rate": 0.001855408515535098, "loss": 0.7524, "step": 25130 }, { "epoch": 7.232451093210587, "grad_norm": 1.0352840423583984, "learning_rate": 0.0018553509781357883, "loss": 0.9859, "step": 25140 }, { "epoch": 7.235327963176064, "grad_norm": 1.359215497970581, "learning_rate": 0.0018552934407364788, "loss": 0.912, "step": 25150 }, { "epoch": 7.238204833141542, "grad_norm": 1.4229788780212402, "learning_rate": 0.0018552359033371694, "loss": 0.7681, "step": 25160 }, { "epoch": 7.241081703107019, "grad_norm": 2.5912933349609375, "learning_rate": 0.0018551783659378595, "loss": 1.0243, "step": 25170 }, { "epoch": 7.243958573072497, "grad_norm": 1.33211088180542, "learning_rate": 0.00185512082853855, "loss": 0.6607, "step": 25180 }, { "epoch": 7.246835443037975, "grad_norm": 1.7249139547348022, "learning_rate": 0.0018550632911392404, "loss": 1.0254, "step": 25190 }, { "epoch": 7.249712313003452, "grad_norm": 1.7228559255599976, "learning_rate": 0.001855005753739931, "loss": 0.828, "step": 25200 }, { "epoch": 7.25258918296893, "grad_norm": 1.2607357501983643, "learning_rate": 0.0018549482163406216, "loss": 0.8832, "step": 25210 }, { "epoch": 7.255466052934407, "grad_norm": 1.5627931356430054, "learning_rate": 0.001854890678941312, "loss": 0.7631, "step": 25220 }, { "epoch": 7.258342922899885, "grad_norm": 1.7337769269943237, "learning_rate": 0.0018548331415420023, "loss": 0.8694, "step": 25230 }, { "epoch": 7.261219792865362, "grad_norm": 1.4089868068695068, "learning_rate": 0.0018547756041426928, "loss": 0.8042, "step": 25240 }, { "epoch": 7.26409666283084, "grad_norm": 1.72146475315094, "learning_rate": 0.0018547180667433832, "loss": 0.9548, "step": 25250 }, { "epoch": 7.266973532796317, "grad_norm": 1.127428650856018, "learning_rate": 0.0018546605293440737, "loss": 0.8395, "step": 25260 }, { "epoch": 7.269850402761795, "grad_norm": 1.7483999729156494, "learning_rate": 0.0018546029919447643, "loss": 0.7794, "step": 25270 }, { "epoch": 7.2727272727272725, "grad_norm": 1.8493036031723022, "learning_rate": 0.0018545454545454546, "loss": 1.2785, "step": 25280 }, { "epoch": 7.2756041426927505, "grad_norm": 0.8555125594139099, "learning_rate": 0.001854487917146145, "loss": 0.7608, "step": 25290 }, { "epoch": 7.2784810126582276, "grad_norm": 0.9149812459945679, "learning_rate": 0.0018544303797468353, "loss": 0.716, "step": 25300 }, { "epoch": 7.2813578826237055, "grad_norm": 2.2453453540802, "learning_rate": 0.001854372842347526, "loss": 0.8264, "step": 25310 }, { "epoch": 7.284234752589183, "grad_norm": 1.3846451044082642, "learning_rate": 0.0018543153049482165, "loss": 0.7001, "step": 25320 }, { "epoch": 7.287111622554661, "grad_norm": 1.4603158235549927, "learning_rate": 0.0018542577675489068, "loss": 0.7805, "step": 25330 }, { "epoch": 7.289988492520138, "grad_norm": 2.1943562030792236, "learning_rate": 0.0018542002301495974, "loss": 0.8174, "step": 25340 }, { "epoch": 7.292865362485616, "grad_norm": 2.7880699634552, "learning_rate": 0.0018541426927502877, "loss": 0.8372, "step": 25350 }, { "epoch": 7.295742232451094, "grad_norm": 1.3007844686508179, "learning_rate": 0.001854085155350978, "loss": 0.7885, "step": 25360 }, { "epoch": 7.298619102416571, "grad_norm": 0.8076132535934448, "learning_rate": 0.0018540276179516686, "loss": 1.0145, "step": 25370 }, { "epoch": 7.301495972382049, "grad_norm": 1.1873705387115479, "learning_rate": 0.0018539700805523592, "loss": 0.8168, "step": 25380 }, { "epoch": 7.304372842347526, "grad_norm": 1.810079574584961, "learning_rate": 0.0018539125431530495, "loss": 0.8143, "step": 25390 }, { "epoch": 7.307249712313004, "grad_norm": 1.0113370418548584, "learning_rate": 0.00185385500575374, "loss": 0.8237, "step": 25400 }, { "epoch": 7.310126582278481, "grad_norm": 0.9464476704597473, "learning_rate": 0.0018537974683544305, "loss": 0.7386, "step": 25410 }, { "epoch": 7.313003452243959, "grad_norm": 1.6842612028121948, "learning_rate": 0.0018537399309551208, "loss": 0.819, "step": 25420 }, { "epoch": 7.315880322209436, "grad_norm": 1.4432448148727417, "learning_rate": 0.0018536823935558114, "loss": 0.6956, "step": 25430 }, { "epoch": 7.318757192174914, "grad_norm": 1.5233508348464966, "learning_rate": 0.0018536248561565017, "loss": 0.8435, "step": 25440 }, { "epoch": 7.321634062140391, "grad_norm": 1.6656408309936523, "learning_rate": 0.0018535673187571923, "loss": 0.7946, "step": 25450 }, { "epoch": 7.324510932105869, "grad_norm": 2.2364680767059326, "learning_rate": 0.0018535097813578826, "loss": 0.9319, "step": 25460 }, { "epoch": 7.327387802071346, "grad_norm": 1.9695067405700684, "learning_rate": 0.001853452243958573, "loss": 0.714, "step": 25470 }, { "epoch": 7.330264672036824, "grad_norm": 1.3050671815872192, "learning_rate": 0.0018533947065592635, "loss": 0.9643, "step": 25480 }, { "epoch": 7.333141542002301, "grad_norm": 1.5674315690994263, "learning_rate": 0.001853337169159954, "loss": 0.7869, "step": 25490 }, { "epoch": 7.336018411967779, "grad_norm": 2.1041147708892822, "learning_rate": 0.0018532796317606444, "loss": 0.7433, "step": 25500 }, { "epoch": 7.338895281933256, "grad_norm": 0.9305287599563599, "learning_rate": 0.001853222094361335, "loss": 1.0478, "step": 25510 }, { "epoch": 7.341772151898734, "grad_norm": 0.7607412338256836, "learning_rate": 0.0018531645569620254, "loss": 0.6943, "step": 25520 }, { "epoch": 7.344649021864212, "grad_norm": 1.5356806516647339, "learning_rate": 0.0018531070195627157, "loss": 0.8427, "step": 25530 }, { "epoch": 7.347525891829689, "grad_norm": 1.1366177797317505, "learning_rate": 0.0018530494821634063, "loss": 0.7621, "step": 25540 }, { "epoch": 7.350402761795167, "grad_norm": 1.6972248554229736, "learning_rate": 0.0018529919447640966, "loss": 0.8026, "step": 25550 }, { "epoch": 7.353279631760644, "grad_norm": 0.6020211577415466, "learning_rate": 0.0018529344073647872, "loss": 0.9378, "step": 25560 }, { "epoch": 7.356156501726122, "grad_norm": 1.2380902767181396, "learning_rate": 0.0018528768699654777, "loss": 0.6635, "step": 25570 }, { "epoch": 7.359033371691599, "grad_norm": 1.8311963081359863, "learning_rate": 0.0018528193325661679, "loss": 0.8218, "step": 25580 }, { "epoch": 7.361910241657077, "grad_norm": 1.270279049873352, "learning_rate": 0.0018527617951668584, "loss": 0.7509, "step": 25590 }, { "epoch": 7.3647871116225545, "grad_norm": 1.1848328113555908, "learning_rate": 0.001852704257767549, "loss": 0.6835, "step": 25600 }, { "epoch": 7.3676639815880325, "grad_norm": 1.3584260940551758, "learning_rate": 0.0018526467203682393, "loss": 0.7698, "step": 25610 }, { "epoch": 7.37054085155351, "grad_norm": 1.8174830675125122, "learning_rate": 0.00185258918296893, "loss": 0.8885, "step": 25620 }, { "epoch": 7.3734177215189876, "grad_norm": 0.6471933126449585, "learning_rate": 0.0018525316455696205, "loss": 0.7764, "step": 25630 }, { "epoch": 7.376294591484465, "grad_norm": 1.2672078609466553, "learning_rate": 0.0018524741081703106, "loss": 0.7773, "step": 25640 }, { "epoch": 7.379171461449943, "grad_norm": 1.0243473052978516, "learning_rate": 0.0018524165707710012, "loss": 0.83, "step": 25650 }, { "epoch": 7.38204833141542, "grad_norm": 1.4546617269515991, "learning_rate": 0.0018523590333716915, "loss": 0.9945, "step": 25660 }, { "epoch": 7.384925201380898, "grad_norm": 1.194537878036499, "learning_rate": 0.001852301495972382, "loss": 0.7817, "step": 25670 }, { "epoch": 7.387802071346375, "grad_norm": 1.1048122644424438, "learning_rate": 0.0018522439585730726, "loss": 0.7667, "step": 25680 }, { "epoch": 7.390678941311853, "grad_norm": 1.0624775886535645, "learning_rate": 0.001852186421173763, "loss": 0.8356, "step": 25690 }, { "epoch": 7.39355581127733, "grad_norm": 1.1587172746658325, "learning_rate": 0.0018521288837744533, "loss": 0.8394, "step": 25700 }, { "epoch": 7.396432681242808, "grad_norm": 1.6549172401428223, "learning_rate": 0.001852071346375144, "loss": 0.8917, "step": 25710 }, { "epoch": 7.399309551208285, "grad_norm": 1.334238052368164, "learning_rate": 0.0018520138089758342, "loss": 1.0409, "step": 25720 }, { "epoch": 7.402186421173763, "grad_norm": 1.5422534942626953, "learning_rate": 0.0018519562715765248, "loss": 0.8243, "step": 25730 }, { "epoch": 7.405063291139241, "grad_norm": 0.7743839621543884, "learning_rate": 0.0018518987341772154, "loss": 0.8803, "step": 25740 }, { "epoch": 7.407940161104718, "grad_norm": 1.0739551782608032, "learning_rate": 0.0018518411967779057, "loss": 0.7964, "step": 25750 }, { "epoch": 7.410817031070196, "grad_norm": 0.9014877080917358, "learning_rate": 0.001851783659378596, "loss": 0.8743, "step": 25760 }, { "epoch": 7.413693901035673, "grad_norm": 1.7651056051254272, "learning_rate": 0.0018517261219792864, "loss": 0.8327, "step": 25770 }, { "epoch": 7.416570771001151, "grad_norm": 1.1627341508865356, "learning_rate": 0.001851668584579977, "loss": 0.9377, "step": 25780 }, { "epoch": 7.419447640966628, "grad_norm": 1.2923662662506104, "learning_rate": 0.0018516110471806675, "loss": 0.7003, "step": 25790 }, { "epoch": 7.422324510932106, "grad_norm": 1.0724101066589355, "learning_rate": 0.001851553509781358, "loss": 0.8369, "step": 25800 }, { "epoch": 7.425201380897583, "grad_norm": 0.9204912781715393, "learning_rate": 0.0018514959723820485, "loss": 0.8287, "step": 25810 }, { "epoch": 7.428078250863061, "grad_norm": 1.3042094707489014, "learning_rate": 0.0018514384349827388, "loss": 0.9007, "step": 25820 }, { "epoch": 7.430955120828538, "grad_norm": 1.4003818035125732, "learning_rate": 0.0018513808975834291, "loss": 0.8169, "step": 25830 }, { "epoch": 7.433831990794016, "grad_norm": 1.2925586700439453, "learning_rate": 0.0018513233601841197, "loss": 0.9951, "step": 25840 }, { "epoch": 7.436708860759493, "grad_norm": 1.0759323835372925, "learning_rate": 0.0018512658227848103, "loss": 0.7777, "step": 25850 }, { "epoch": 7.439585730724971, "grad_norm": 0.7794771194458008, "learning_rate": 0.0018512082853855006, "loss": 0.7558, "step": 25860 }, { "epoch": 7.442462600690448, "grad_norm": 1.1285910606384277, "learning_rate": 0.0018511507479861912, "loss": 0.8543, "step": 25870 }, { "epoch": 7.445339470655926, "grad_norm": 1.2926486730575562, "learning_rate": 0.0018510932105868813, "loss": 1.0179, "step": 25880 }, { "epoch": 7.4482163406214035, "grad_norm": 1.6480317115783691, "learning_rate": 0.0018510356731875719, "loss": 0.844, "step": 25890 }, { "epoch": 7.4510932105868815, "grad_norm": 0.988571047782898, "learning_rate": 0.0018509781357882624, "loss": 0.6366, "step": 25900 }, { "epoch": 7.453970080552359, "grad_norm": 2.476210832595825, "learning_rate": 0.0018509205983889528, "loss": 0.8829, "step": 25910 }, { "epoch": 7.4568469505178365, "grad_norm": 1.1099555492401123, "learning_rate": 0.0018508630609896434, "loss": 0.8397, "step": 25920 }, { "epoch": 7.4597238204833145, "grad_norm": 1.3521314859390259, "learning_rate": 0.001850805523590334, "loss": 0.8845, "step": 25930 }, { "epoch": 7.462600690448792, "grad_norm": 1.9664024114608765, "learning_rate": 0.001850747986191024, "loss": 1.1148, "step": 25940 }, { "epoch": 7.46547756041427, "grad_norm": 1.388627290725708, "learning_rate": 0.0018506904487917146, "loss": 0.9637, "step": 25950 }, { "epoch": 7.468354430379747, "grad_norm": 1.7298641204833984, "learning_rate": 0.0018506329113924052, "loss": 0.7682, "step": 25960 }, { "epoch": 7.471231300345225, "grad_norm": 1.8159877061843872, "learning_rate": 0.0018505753739930955, "loss": 0.8016, "step": 25970 }, { "epoch": 7.474108170310702, "grad_norm": 1.4317424297332764, "learning_rate": 0.001850517836593786, "loss": 1.0994, "step": 25980 }, { "epoch": 7.47698504027618, "grad_norm": 2.847421407699585, "learning_rate": 0.0018504602991944764, "loss": 1.0307, "step": 25990 }, { "epoch": 7.479861910241657, "grad_norm": 1.4193998575210571, "learning_rate": 0.0018504027617951668, "loss": 0.8512, "step": 26000 }, { "epoch": 7.482738780207135, "grad_norm": 1.8663699626922607, "learning_rate": 0.0018503452243958573, "loss": 0.732, "step": 26010 }, { "epoch": 7.485615650172612, "grad_norm": 1.2183843851089478, "learning_rate": 0.0018502876869965477, "loss": 0.8326, "step": 26020 }, { "epoch": 7.48849252013809, "grad_norm": 1.3816430568695068, "learning_rate": 0.0018502301495972383, "loss": 0.6717, "step": 26030 }, { "epoch": 7.491369390103567, "grad_norm": 1.3502695560455322, "learning_rate": 0.0018501726121979288, "loss": 0.8076, "step": 26040 }, { "epoch": 7.494246260069045, "grad_norm": 1.286086082458496, "learning_rate": 0.0018501150747986192, "loss": 0.8035, "step": 26050 }, { "epoch": 7.497123130034522, "grad_norm": 1.1031299829483032, "learning_rate": 0.0018500575373993095, "loss": 0.9844, "step": 26060 }, { "epoch": 7.5, "grad_norm": 1.7079063653945923, "learning_rate": 0.00185, "loss": 0.8828, "step": 26070 }, { "epoch": 7.502876869965478, "grad_norm": 1.2819745540618896, "learning_rate": 0.0018499424626006904, "loss": 0.89, "step": 26080 }, { "epoch": 7.505753739930955, "grad_norm": 1.402224063873291, "learning_rate": 0.001849884925201381, "loss": 0.8046, "step": 26090 }, { "epoch": 7.508630609896433, "grad_norm": 1.2609604597091675, "learning_rate": 0.0018498273878020716, "loss": 0.7471, "step": 26100 }, { "epoch": 7.51150747986191, "grad_norm": 0.8292046189308167, "learning_rate": 0.001849769850402762, "loss": 0.9222, "step": 26110 }, { "epoch": 7.514384349827388, "grad_norm": 1.8272953033447266, "learning_rate": 0.0018497123130034523, "loss": 0.9312, "step": 26120 }, { "epoch": 7.517261219792865, "grad_norm": 1.0468683242797852, "learning_rate": 0.0018496547756041426, "loss": 0.6703, "step": 26130 }, { "epoch": 7.520138089758343, "grad_norm": 1.4692282676696777, "learning_rate": 0.0018495972382048332, "loss": 0.9036, "step": 26140 }, { "epoch": 7.52301495972382, "grad_norm": 0.8474666476249695, "learning_rate": 0.0018495397008055237, "loss": 0.8612, "step": 26150 }, { "epoch": 7.525891829689298, "grad_norm": 1.5015193223953247, "learning_rate": 0.001849482163406214, "loss": 0.8789, "step": 26160 }, { "epoch": 7.528768699654775, "grad_norm": 0.8709912896156311, "learning_rate": 0.0018494246260069046, "loss": 0.7754, "step": 26170 }, { "epoch": 7.531645569620253, "grad_norm": 2.106489896774292, "learning_rate": 0.001849367088607595, "loss": 0.7564, "step": 26180 }, { "epoch": 7.53452243958573, "grad_norm": 1.099365234375, "learning_rate": 0.0018493095512082853, "loss": 0.8312, "step": 26190 }, { "epoch": 7.537399309551208, "grad_norm": 2.0011985301971436, "learning_rate": 0.001849252013808976, "loss": 0.7496, "step": 26200 }, { "epoch": 7.5402761795166855, "grad_norm": 1.096512794494629, "learning_rate": 0.0018491944764096665, "loss": 0.8701, "step": 26210 }, { "epoch": 7.5431530494821635, "grad_norm": 1.1570589542388916, "learning_rate": 0.0018491369390103568, "loss": 0.7461, "step": 26220 }, { "epoch": 7.546029919447641, "grad_norm": 1.2001253366470337, "learning_rate": 0.0018490794016110474, "loss": 0.8218, "step": 26230 }, { "epoch": 7.5489067894131185, "grad_norm": 2.746342658996582, "learning_rate": 0.0018490218642117375, "loss": 1.028, "step": 26240 }, { "epoch": 7.5517836593785965, "grad_norm": 1.1376087665557861, "learning_rate": 0.001848964326812428, "loss": 0.8207, "step": 26250 }, { "epoch": 7.554660529344074, "grad_norm": 1.0735546350479126, "learning_rate": 0.0018489067894131186, "loss": 0.7026, "step": 26260 }, { "epoch": 7.557537399309552, "grad_norm": 2.6566781997680664, "learning_rate": 0.001848849252013809, "loss": 0.9124, "step": 26270 }, { "epoch": 7.560414269275029, "grad_norm": 1.4923239946365356, "learning_rate": 0.0018487917146144995, "loss": 0.8327, "step": 26280 }, { "epoch": 7.563291139240507, "grad_norm": 1.0743626356124878, "learning_rate": 0.0018487341772151899, "loss": 0.8953, "step": 26290 }, { "epoch": 7.566168009205984, "grad_norm": 1.095599889755249, "learning_rate": 0.0018486766398158802, "loss": 0.7519, "step": 26300 }, { "epoch": 7.569044879171462, "grad_norm": 1.1043652296066284, "learning_rate": 0.0018486191024165708, "loss": 0.5929, "step": 26310 }, { "epoch": 7.571921749136939, "grad_norm": 2.1262409687042236, "learning_rate": 0.0018485615650172614, "loss": 0.8054, "step": 26320 }, { "epoch": 7.574798619102417, "grad_norm": 1.2371140718460083, "learning_rate": 0.0018485040276179517, "loss": 0.7888, "step": 26330 }, { "epoch": 7.577675489067894, "grad_norm": 1.857625961303711, "learning_rate": 0.0018484464902186423, "loss": 0.7129, "step": 26340 }, { "epoch": 7.580552359033372, "grad_norm": 1.8859577178955078, "learning_rate": 0.0018483889528193324, "loss": 0.8254, "step": 26350 }, { "epoch": 7.583429228998849, "grad_norm": 1.847684621810913, "learning_rate": 0.001848331415420023, "loss": 0.7542, "step": 26360 }, { "epoch": 7.586306098964327, "grad_norm": 1.5646750926971436, "learning_rate": 0.0018482738780207135, "loss": 1.0395, "step": 26370 }, { "epoch": 7.589182968929804, "grad_norm": 0.6707299947738647, "learning_rate": 0.0018482163406214039, "loss": 0.9444, "step": 26380 }, { "epoch": 7.592059838895282, "grad_norm": 1.1217883825302124, "learning_rate": 0.0018481588032220944, "loss": 0.6511, "step": 26390 }, { "epoch": 7.594936708860759, "grad_norm": 0.7305939793586731, "learning_rate": 0.001848101265822785, "loss": 0.7533, "step": 26400 }, { "epoch": 7.597813578826237, "grad_norm": 1.9477771520614624, "learning_rate": 0.0018480437284234751, "loss": 0.7499, "step": 26410 }, { "epoch": 7.600690448791715, "grad_norm": 1.1478917598724365, "learning_rate": 0.0018479861910241657, "loss": 0.8871, "step": 26420 }, { "epoch": 7.603567318757192, "grad_norm": 1.2189853191375732, "learning_rate": 0.0018479286536248563, "loss": 0.7268, "step": 26430 }, { "epoch": 7.606444188722669, "grad_norm": 1.0703136920928955, "learning_rate": 0.0018478711162255466, "loss": 0.8575, "step": 26440 }, { "epoch": 7.609321058688147, "grad_norm": 1.0775883197784424, "learning_rate": 0.0018478135788262372, "loss": 0.7101, "step": 26450 }, { "epoch": 7.612197928653625, "grad_norm": 1.9041147232055664, "learning_rate": 0.0018477560414269275, "loss": 0.8966, "step": 26460 }, { "epoch": 7.615074798619102, "grad_norm": 1.025885820388794, "learning_rate": 0.0018476985040276179, "loss": 1.0425, "step": 26470 }, { "epoch": 7.61795166858458, "grad_norm": 1.0169779062271118, "learning_rate": 0.0018476409666283084, "loss": 0.6853, "step": 26480 }, { "epoch": 7.620828538550057, "grad_norm": 2.576392412185669, "learning_rate": 0.0018475834292289988, "loss": 1.0111, "step": 26490 }, { "epoch": 7.623705408515535, "grad_norm": 2.230243682861328, "learning_rate": 0.0018475258918296893, "loss": 0.9677, "step": 26500 }, { "epoch": 7.6265822784810124, "grad_norm": 1.3974800109863281, "learning_rate": 0.00184746835443038, "loss": 1.0967, "step": 26510 }, { "epoch": 7.62945914844649, "grad_norm": 1.0641435384750366, "learning_rate": 0.0018474108170310703, "loss": 0.6327, "step": 26520 }, { "epoch": 7.6323360184119675, "grad_norm": 1.6282250881195068, "learning_rate": 0.0018473532796317606, "loss": 0.7386, "step": 26530 }, { "epoch": 7.6352128883774455, "grad_norm": 2.085780382156372, "learning_rate": 0.0018472957422324512, "loss": 0.9993, "step": 26540 }, { "epoch": 7.638089758342923, "grad_norm": 1.3524550199508667, "learning_rate": 0.0018472382048331415, "loss": 0.8835, "step": 26550 }, { "epoch": 7.640966628308401, "grad_norm": 1.1985105276107788, "learning_rate": 0.001847180667433832, "loss": 0.963, "step": 26560 }, { "epoch": 7.643843498273878, "grad_norm": 1.4305346012115479, "learning_rate": 0.0018471231300345224, "loss": 1.1697, "step": 26570 }, { "epoch": 7.646720368239356, "grad_norm": 1.5421671867370605, "learning_rate": 0.001847065592635213, "loss": 0.9535, "step": 26580 }, { "epoch": 7.649597238204834, "grad_norm": 1.723829746246338, "learning_rate": 0.0018470080552359033, "loss": 0.9408, "step": 26590 }, { "epoch": 7.652474108170311, "grad_norm": 0.9923287630081177, "learning_rate": 0.0018469505178365937, "loss": 0.8007, "step": 26600 }, { "epoch": 7.655350978135788, "grad_norm": 1.1559356451034546, "learning_rate": 0.0018468929804372842, "loss": 0.8714, "step": 26610 }, { "epoch": 7.658227848101266, "grad_norm": 0.8138514161109924, "learning_rate": 0.0018468354430379748, "loss": 0.7881, "step": 26620 }, { "epoch": 7.661104718066744, "grad_norm": 1.5040456056594849, "learning_rate": 0.0018467779056386652, "loss": 0.9544, "step": 26630 }, { "epoch": 7.663981588032221, "grad_norm": 1.2123726606369019, "learning_rate": 0.0018467203682393557, "loss": 1.0008, "step": 26640 }, { "epoch": 7.666858457997699, "grad_norm": 1.0635079145431519, "learning_rate": 0.001846662830840046, "loss": 0.953, "step": 26650 }, { "epoch": 7.669735327963176, "grad_norm": 0.8213714361190796, "learning_rate": 0.0018466052934407364, "loss": 0.761, "step": 26660 }, { "epoch": 7.672612197928654, "grad_norm": 3.1888434886932373, "learning_rate": 0.001846547756041427, "loss": 0.8453, "step": 26670 }, { "epoch": 7.675489067894131, "grad_norm": 1.2124433517456055, "learning_rate": 0.0018464902186421175, "loss": 1.181, "step": 26680 }, { "epoch": 7.678365937859609, "grad_norm": 1.5944688320159912, "learning_rate": 0.0018464326812428079, "loss": 0.9739, "step": 26690 }, { "epoch": 7.681242807825086, "grad_norm": 1.9617986679077148, "learning_rate": 0.0018463751438434985, "loss": 0.8993, "step": 26700 }, { "epoch": 7.684119677790564, "grad_norm": 0.9678574800491333, "learning_rate": 0.0018463176064441886, "loss": 0.8731, "step": 26710 }, { "epoch": 7.686996547756041, "grad_norm": 2.0827183723449707, "learning_rate": 0.0018462600690448791, "loss": 0.7677, "step": 26720 }, { "epoch": 7.689873417721519, "grad_norm": 0.8766736388206482, "learning_rate": 0.0018462025316455697, "loss": 0.676, "step": 26730 }, { "epoch": 7.692750287686996, "grad_norm": 1.1048827171325684, "learning_rate": 0.00184614499424626, "loss": 0.9688, "step": 26740 }, { "epoch": 7.695627157652474, "grad_norm": 1.2294121980667114, "learning_rate": 0.0018460874568469506, "loss": 0.905, "step": 26750 }, { "epoch": 7.698504027617952, "grad_norm": 0.9476693272590637, "learning_rate": 0.0018460299194476412, "loss": 0.8744, "step": 26760 }, { "epoch": 7.701380897583429, "grad_norm": 1.2215681076049805, "learning_rate": 0.0018459723820483313, "loss": 0.8176, "step": 26770 }, { "epoch": 7.704257767548906, "grad_norm": 1.564151406288147, "learning_rate": 0.0018459148446490219, "loss": 0.7904, "step": 26780 }, { "epoch": 7.707134637514384, "grad_norm": 2.2803313732147217, "learning_rate": 0.0018458573072497124, "loss": 0.8838, "step": 26790 }, { "epoch": 7.710011507479862, "grad_norm": 1.2185486555099487, "learning_rate": 0.0018457997698504028, "loss": 0.8879, "step": 26800 }, { "epoch": 7.712888377445339, "grad_norm": 2.042571544647217, "learning_rate": 0.0018457422324510934, "loss": 0.8574, "step": 26810 }, { "epoch": 7.715765247410817, "grad_norm": 1.233294129371643, "learning_rate": 0.0018456846950517837, "loss": 1.0336, "step": 26820 }, { "epoch": 7.7186421173762945, "grad_norm": 1.2738909721374512, "learning_rate": 0.001845627157652474, "loss": 0.6647, "step": 26830 }, { "epoch": 7.7215189873417724, "grad_norm": 1.7232662439346313, "learning_rate": 0.0018455696202531646, "loss": 0.7995, "step": 26840 }, { "epoch": 7.7243958573072495, "grad_norm": 1.0223969221115112, "learning_rate": 0.001845512082853855, "loss": 0.7674, "step": 26850 }, { "epoch": 7.7272727272727275, "grad_norm": 1.793563961982727, "learning_rate": 0.0018454545454545455, "loss": 1.0397, "step": 26860 }, { "epoch": 7.730149597238205, "grad_norm": 0.8978668451309204, "learning_rate": 0.001845397008055236, "loss": 0.8843, "step": 26870 }, { "epoch": 7.733026467203683, "grad_norm": 2.835134744644165, "learning_rate": 0.0018453394706559264, "loss": 0.8174, "step": 26880 }, { "epoch": 7.73590333716916, "grad_norm": 1.2454029321670532, "learning_rate": 0.0018452819332566168, "loss": 0.7014, "step": 26890 }, { "epoch": 7.738780207134638, "grad_norm": 1.655519723892212, "learning_rate": 0.0018452243958573073, "loss": 0.8529, "step": 26900 }, { "epoch": 7.741657077100115, "grad_norm": 1.405697226524353, "learning_rate": 0.0018451668584579977, "loss": 0.7011, "step": 26910 }, { "epoch": 7.744533947065593, "grad_norm": 1.0947728157043457, "learning_rate": 0.0018451093210586883, "loss": 0.9603, "step": 26920 }, { "epoch": 7.74741081703107, "grad_norm": 0.9518083930015564, "learning_rate": 0.0018450517836593786, "loss": 0.731, "step": 26930 }, { "epoch": 7.750287686996548, "grad_norm": 0.9243097901344299, "learning_rate": 0.0018449942462600692, "loss": 0.7151, "step": 26940 }, { "epoch": 7.753164556962025, "grad_norm": 0.7165273427963257, "learning_rate": 0.0018449367088607595, "loss": 0.7414, "step": 26950 }, { "epoch": 7.756041426927503, "grad_norm": 0.7735826969146729, "learning_rate": 0.0018448791714614499, "loss": 0.992, "step": 26960 }, { "epoch": 7.758918296892981, "grad_norm": 0.6806305646896362, "learning_rate": 0.0018448216340621404, "loss": 0.7704, "step": 26970 }, { "epoch": 7.761795166858458, "grad_norm": 1.479196310043335, "learning_rate": 0.001844764096662831, "loss": 0.7935, "step": 26980 }, { "epoch": 7.764672036823936, "grad_norm": 1.2966054677963257, "learning_rate": 0.0018447065592635213, "loss": 0.7181, "step": 26990 }, { "epoch": 7.767548906789413, "grad_norm": 0.8485930562019348, "learning_rate": 0.001844649021864212, "loss": 0.7997, "step": 27000 }, { "epoch": 7.770425776754891, "grad_norm": 0.9484483003616333, "learning_rate": 0.0018445914844649022, "loss": 0.8588, "step": 27010 }, { "epoch": 7.773302646720368, "grad_norm": 1.8749079704284668, "learning_rate": 0.0018445339470655926, "loss": 0.8456, "step": 27020 }, { "epoch": 7.776179516685846, "grad_norm": 0.7077147960662842, "learning_rate": 0.0018444764096662832, "loss": 0.7939, "step": 27030 }, { "epoch": 7.779056386651323, "grad_norm": 1.5798522233963013, "learning_rate": 0.0018444188722669735, "loss": 0.84, "step": 27040 }, { "epoch": 7.781933256616801, "grad_norm": 2.012892484664917, "learning_rate": 0.001844361334867664, "loss": 1.1157, "step": 27050 }, { "epoch": 7.784810126582278, "grad_norm": 0.8970484733581543, "learning_rate": 0.0018443037974683546, "loss": 0.8612, "step": 27060 }, { "epoch": 7.787686996547756, "grad_norm": 1.6158192157745361, "learning_rate": 0.0018442462600690448, "loss": 0.8797, "step": 27070 }, { "epoch": 7.790563866513233, "grad_norm": 1.6403086185455322, "learning_rate": 0.0018441887226697353, "loss": 0.8813, "step": 27080 }, { "epoch": 7.793440736478711, "grad_norm": 1.4114497900009155, "learning_rate": 0.001844131185270426, "loss": 0.7789, "step": 27090 }, { "epoch": 7.796317606444188, "grad_norm": 1.2437723875045776, "learning_rate": 0.0018440736478711162, "loss": 1.0239, "step": 27100 }, { "epoch": 7.799194476409666, "grad_norm": 1.1607722043991089, "learning_rate": 0.0018440161104718068, "loss": 0.7717, "step": 27110 }, { "epoch": 7.802071346375143, "grad_norm": 1.8224881887435913, "learning_rate": 0.0018439585730724972, "loss": 0.9464, "step": 27120 }, { "epoch": 7.804948216340621, "grad_norm": 0.9371421337127686, "learning_rate": 0.0018439010356731875, "loss": 0.8686, "step": 27130 }, { "epoch": 7.807825086306099, "grad_norm": 1.5418026447296143, "learning_rate": 0.001843843498273878, "loss": 0.7331, "step": 27140 }, { "epoch": 7.8107019562715765, "grad_norm": 1.42243492603302, "learning_rate": 0.0018437859608745684, "loss": 0.8103, "step": 27150 }, { "epoch": 7.8135788262370545, "grad_norm": 1.6278635263442993, "learning_rate": 0.001843728423475259, "loss": 0.8278, "step": 27160 }, { "epoch": 7.8164556962025316, "grad_norm": 1.4501152038574219, "learning_rate": 0.0018436708860759495, "loss": 0.7525, "step": 27170 }, { "epoch": 7.8193325661680095, "grad_norm": 1.0205878019332886, "learning_rate": 0.0018436133486766397, "loss": 0.9515, "step": 27180 }, { "epoch": 7.822209436133487, "grad_norm": 1.8697727918624878, "learning_rate": 0.0018435558112773302, "loss": 0.7648, "step": 27190 }, { "epoch": 7.825086306098965, "grad_norm": 1.3836438655853271, "learning_rate": 0.0018434982738780208, "loss": 1.0539, "step": 27200 }, { "epoch": 7.827963176064442, "grad_norm": 1.3872405290603638, "learning_rate": 0.0018434407364787111, "loss": 0.8265, "step": 27210 }, { "epoch": 7.83084004602992, "grad_norm": 1.5602128505706787, "learning_rate": 0.0018433831990794017, "loss": 0.9569, "step": 27220 }, { "epoch": 7.833716915995397, "grad_norm": 0.7920131087303162, "learning_rate": 0.0018433256616800923, "loss": 0.6322, "step": 27230 }, { "epoch": 7.836593785960875, "grad_norm": 1.6805986166000366, "learning_rate": 0.0018432681242807824, "loss": 0.8245, "step": 27240 }, { "epoch": 7.839470655926352, "grad_norm": 1.0897412300109863, "learning_rate": 0.001843210586881473, "loss": 0.8578, "step": 27250 }, { "epoch": 7.84234752589183, "grad_norm": 0.6987788081169128, "learning_rate": 0.0018431530494821633, "loss": 0.8374, "step": 27260 }, { "epoch": 7.845224395857307, "grad_norm": 1.1635479927062988, "learning_rate": 0.0018430955120828539, "loss": 0.7546, "step": 27270 }, { "epoch": 7.848101265822785, "grad_norm": 1.546025037765503, "learning_rate": 0.0018430379746835444, "loss": 0.8032, "step": 27280 }, { "epoch": 7.850978135788262, "grad_norm": 1.4318809509277344, "learning_rate": 0.0018429804372842348, "loss": 0.7351, "step": 27290 }, { "epoch": 7.85385500575374, "grad_norm": 1.491119623184204, "learning_rate": 0.0018429228998849251, "loss": 0.9432, "step": 27300 }, { "epoch": 7.856731875719218, "grad_norm": 1.246750831604004, "learning_rate": 0.0018428653624856157, "loss": 1.0531, "step": 27310 }, { "epoch": 7.859608745684695, "grad_norm": 0.9244565367698669, "learning_rate": 0.001842807825086306, "loss": 0.9506, "step": 27320 }, { "epoch": 7.862485615650172, "grad_norm": 2.219531774520874, "learning_rate": 0.0018427502876869966, "loss": 0.7935, "step": 27330 }, { "epoch": 7.86536248561565, "grad_norm": 0.9717763662338257, "learning_rate": 0.0018426927502876872, "loss": 0.6821, "step": 27340 }, { "epoch": 7.868239355581128, "grad_norm": 1.3118114471435547, "learning_rate": 0.0018426352128883775, "loss": 0.7913, "step": 27350 }, { "epoch": 7.871116225546605, "grad_norm": 1.0228215456008911, "learning_rate": 0.0018425776754890679, "loss": 0.8698, "step": 27360 }, { "epoch": 7.873993095512083, "grad_norm": 2.998915433883667, "learning_rate": 0.0018425201380897584, "loss": 0.9843, "step": 27370 }, { "epoch": 7.87686996547756, "grad_norm": 1.5856322050094604, "learning_rate": 0.0018424626006904488, "loss": 0.6731, "step": 27380 }, { "epoch": 7.879746835443038, "grad_norm": 1.7046804428100586, "learning_rate": 0.0018424050632911393, "loss": 0.7788, "step": 27390 }, { "epoch": 7.882623705408515, "grad_norm": 1.11419677734375, "learning_rate": 0.0018423475258918297, "loss": 0.7588, "step": 27400 }, { "epoch": 7.885500575373993, "grad_norm": 1.2081594467163086, "learning_rate": 0.0018422899884925203, "loss": 0.7162, "step": 27410 }, { "epoch": 7.88837744533947, "grad_norm": 1.8726354837417603, "learning_rate": 0.0018422324510932106, "loss": 0.7603, "step": 27420 }, { "epoch": 7.891254315304948, "grad_norm": 1.5833780765533447, "learning_rate": 0.001842174913693901, "loss": 0.7891, "step": 27430 }, { "epoch": 7.8941311852704255, "grad_norm": 1.3175402879714966, "learning_rate": 0.0018421173762945915, "loss": 0.9948, "step": 27440 }, { "epoch": 7.897008055235903, "grad_norm": 0.7412970662117004, "learning_rate": 0.001842059838895282, "loss": 0.861, "step": 27450 }, { "epoch": 7.8998849252013805, "grad_norm": 1.3667752742767334, "learning_rate": 0.0018420023014959724, "loss": 1.0526, "step": 27460 }, { "epoch": 7.9027617951668585, "grad_norm": 1.1199915409088135, "learning_rate": 0.001841944764096663, "loss": 0.7306, "step": 27470 }, { "epoch": 7.9056386651323365, "grad_norm": 0.9539288282394409, "learning_rate": 0.0018418872266973533, "loss": 0.7543, "step": 27480 }, { "epoch": 7.908515535097814, "grad_norm": 1.3355215787887573, "learning_rate": 0.0018418296892980437, "loss": 1.0102, "step": 27490 }, { "epoch": 7.911392405063291, "grad_norm": 2.0873641967773438, "learning_rate": 0.0018417721518987342, "loss": 0.8353, "step": 27500 }, { "epoch": 7.914269275028769, "grad_norm": 1.8622257709503174, "learning_rate": 0.0018417146144994246, "loss": 0.9255, "step": 27510 }, { "epoch": 7.917146144994247, "grad_norm": 0.8917598724365234, "learning_rate": 0.0018416570771001152, "loss": 0.8697, "step": 27520 }, { "epoch": 7.920023014959724, "grad_norm": 1.4693819284439087, "learning_rate": 0.0018415995397008057, "loss": 0.8077, "step": 27530 }, { "epoch": 7.922899884925202, "grad_norm": 1.3681637048721313, "learning_rate": 0.0018415420023014958, "loss": 0.7956, "step": 27540 }, { "epoch": 7.925776754890679, "grad_norm": 1.4151690006256104, "learning_rate": 0.0018414844649021864, "loss": 0.7629, "step": 27550 }, { "epoch": 7.928653624856157, "grad_norm": 1.1228783130645752, "learning_rate": 0.001841426927502877, "loss": 0.9397, "step": 27560 }, { "epoch": 7.931530494821634, "grad_norm": 1.1872198581695557, "learning_rate": 0.0018413693901035673, "loss": 0.7956, "step": 27570 }, { "epoch": 7.934407364787112, "grad_norm": 4.287310600280762, "learning_rate": 0.0018413118527042579, "loss": 0.8487, "step": 27580 }, { "epoch": 7.937284234752589, "grad_norm": 0.9980314373970032, "learning_rate": 0.0018412543153049485, "loss": 0.8012, "step": 27590 }, { "epoch": 7.940161104718067, "grad_norm": 1.2608562707901, "learning_rate": 0.0018411967779056386, "loss": 0.8275, "step": 27600 }, { "epoch": 7.943037974683544, "grad_norm": 1.5455505847930908, "learning_rate": 0.0018411392405063291, "loss": 0.8369, "step": 27610 }, { "epoch": 7.945914844649022, "grad_norm": 1.1124552488327026, "learning_rate": 0.0018410817031070195, "loss": 0.8497, "step": 27620 }, { "epoch": 7.948791714614499, "grad_norm": 1.0247080326080322, "learning_rate": 0.00184102416570771, "loss": 0.7734, "step": 27630 }, { "epoch": 7.951668584579977, "grad_norm": 2.1417346000671387, "learning_rate": 0.0018409666283084006, "loss": 0.8292, "step": 27640 }, { "epoch": 7.954545454545455, "grad_norm": 1.3035423755645752, "learning_rate": 0.001840909090909091, "loss": 0.8417, "step": 27650 }, { "epoch": 7.957422324510932, "grad_norm": 1.2864491939544678, "learning_rate": 0.0018408515535097813, "loss": 0.8353, "step": 27660 }, { "epoch": 7.960299194476409, "grad_norm": 0.9795665144920349, "learning_rate": 0.0018407940161104719, "loss": 0.7195, "step": 27670 }, { "epoch": 7.963176064441887, "grad_norm": 1.2985291481018066, "learning_rate": 0.0018407364787111622, "loss": 0.7125, "step": 27680 }, { "epoch": 7.966052934407365, "grad_norm": 1.0833722352981567, "learning_rate": 0.0018406789413118528, "loss": 0.8308, "step": 27690 }, { "epoch": 7.968929804372842, "grad_norm": 1.687317967414856, "learning_rate": 0.0018406214039125434, "loss": 0.7844, "step": 27700 }, { "epoch": 7.97180667433832, "grad_norm": 1.2105580568313599, "learning_rate": 0.0018405638665132337, "loss": 0.8235, "step": 27710 }, { "epoch": 7.974683544303797, "grad_norm": 1.2923005819320679, "learning_rate": 0.001840506329113924, "loss": 0.7473, "step": 27720 }, { "epoch": 7.977560414269275, "grad_norm": 0.6675378680229187, "learning_rate": 0.0018404487917146144, "loss": 0.6112, "step": 27730 }, { "epoch": 7.980437284234752, "grad_norm": 1.766102910041809, "learning_rate": 0.001840391254315305, "loss": 0.9414, "step": 27740 }, { "epoch": 7.98331415420023, "grad_norm": 1.840646743774414, "learning_rate": 0.0018403337169159955, "loss": 0.9147, "step": 27750 }, { "epoch": 7.9861910241657075, "grad_norm": 0.7832748293876648, "learning_rate": 0.0018402761795166859, "loss": 0.8095, "step": 27760 }, { "epoch": 7.9890678941311855, "grad_norm": 1.1009013652801514, "learning_rate": 0.0018402186421173764, "loss": 0.9148, "step": 27770 }, { "epoch": 7.9919447640966625, "grad_norm": 0.7369806170463562, "learning_rate": 0.0018401611047180668, "loss": 0.6413, "step": 27780 }, { "epoch": 7.9948216340621405, "grad_norm": 1.787237286567688, "learning_rate": 0.0018401035673187571, "loss": 0.8728, "step": 27790 }, { "epoch": 7.997698504027618, "grad_norm": 0.6361650228500366, "learning_rate": 0.0018400460299194477, "loss": 0.8344, "step": 27800 }, { "epoch": 8.000575373993096, "grad_norm": 1.2444502115249634, "learning_rate": 0.0018399884925201383, "loss": 0.7144, "step": 27810 }, { "epoch": 8.003452243958574, "grad_norm": 1.3982867002487183, "learning_rate": 0.0018399309551208286, "loss": 0.7993, "step": 27820 }, { "epoch": 8.00632911392405, "grad_norm": 0.7887259721755981, "learning_rate": 0.0018398734177215192, "loss": 0.9281, "step": 27830 }, { "epoch": 8.009205983889528, "grad_norm": 0.7848648428916931, "learning_rate": 0.0018398158803222093, "loss": 0.9161, "step": 27840 }, { "epoch": 8.012082853855006, "grad_norm": 1.4821079969406128, "learning_rate": 0.0018397583429228999, "loss": 0.6469, "step": 27850 }, { "epoch": 8.014959723820484, "grad_norm": 0.9721201062202454, "learning_rate": 0.0018397008055235904, "loss": 0.6584, "step": 27860 }, { "epoch": 8.017836593785962, "grad_norm": 1.5887385606765747, "learning_rate": 0.0018396432681242808, "loss": 0.7322, "step": 27870 }, { "epoch": 8.020713463751438, "grad_norm": 1.5401794910430908, "learning_rate": 0.0018395857307249713, "loss": 0.8314, "step": 27880 }, { "epoch": 8.023590333716916, "grad_norm": 1.3713515996932983, "learning_rate": 0.001839528193325662, "loss": 0.7851, "step": 27890 }, { "epoch": 8.026467203682394, "grad_norm": 1.041642189025879, "learning_rate": 0.001839470655926352, "loss": 0.7473, "step": 27900 }, { "epoch": 8.029344073647872, "grad_norm": 1.5265151262283325, "learning_rate": 0.0018394131185270426, "loss": 0.8215, "step": 27910 }, { "epoch": 8.032220943613348, "grad_norm": 1.8586386442184448, "learning_rate": 0.0018393555811277332, "loss": 0.9313, "step": 27920 }, { "epoch": 8.035097813578826, "grad_norm": 1.1101897954940796, "learning_rate": 0.0018392980437284235, "loss": 0.775, "step": 27930 }, { "epoch": 8.037974683544304, "grad_norm": 1.262751817703247, "learning_rate": 0.001839240506329114, "loss": 0.8612, "step": 27940 }, { "epoch": 8.040851553509782, "grad_norm": 0.8436989188194275, "learning_rate": 0.0018391829689298042, "loss": 0.9109, "step": 27950 }, { "epoch": 8.043728423475258, "grad_norm": 0.7253473997116089, "learning_rate": 0.0018391254315304948, "loss": 0.8072, "step": 27960 }, { "epoch": 8.046605293440736, "grad_norm": 1.235980749130249, "learning_rate": 0.0018390678941311853, "loss": 0.7397, "step": 27970 }, { "epoch": 8.049482163406214, "grad_norm": 1.103413462638855, "learning_rate": 0.0018390103567318757, "loss": 0.7558, "step": 27980 }, { "epoch": 8.052359033371692, "grad_norm": 1.943880558013916, "learning_rate": 0.0018389528193325662, "loss": 0.8502, "step": 27990 }, { "epoch": 8.055235903337168, "grad_norm": 1.444129467010498, "learning_rate": 0.0018388952819332568, "loss": 0.8113, "step": 28000 }, { "epoch": 8.058112773302646, "grad_norm": 0.8083311915397644, "learning_rate": 0.001838837744533947, "loss": 0.8082, "step": 28010 }, { "epoch": 8.060989643268124, "grad_norm": 1.1552622318267822, "learning_rate": 0.0018387802071346375, "loss": 0.7118, "step": 28020 }, { "epoch": 8.063866513233602, "grad_norm": 1.3381234407424927, "learning_rate": 0.001838722669735328, "loss": 0.6552, "step": 28030 }, { "epoch": 8.06674338319908, "grad_norm": 2.8730344772338867, "learning_rate": 0.0018386651323360184, "loss": 0.7207, "step": 28040 }, { "epoch": 8.069620253164556, "grad_norm": 2.2386252880096436, "learning_rate": 0.001838607594936709, "loss": 0.9072, "step": 28050 }, { "epoch": 8.072497123130034, "grad_norm": 1.196751594543457, "learning_rate": 0.0018385500575373995, "loss": 0.6615, "step": 28060 }, { "epoch": 8.075373993095512, "grad_norm": 0.9925245642662048, "learning_rate": 0.0018384925201380897, "loss": 0.6507, "step": 28070 }, { "epoch": 8.07825086306099, "grad_norm": 1.474425196647644, "learning_rate": 0.0018384349827387802, "loss": 0.7888, "step": 28080 }, { "epoch": 8.081127733026467, "grad_norm": 3.152496576309204, "learning_rate": 0.0018383774453394706, "loss": 0.8869, "step": 28090 }, { "epoch": 8.084004602991945, "grad_norm": 1.2862727642059326, "learning_rate": 0.0018383199079401611, "loss": 0.9149, "step": 28100 }, { "epoch": 8.086881472957423, "grad_norm": 1.599048376083374, "learning_rate": 0.0018382623705408517, "loss": 0.8265, "step": 28110 }, { "epoch": 8.0897583429229, "grad_norm": 0.7922273874282837, "learning_rate": 0.001838204833141542, "loss": 0.7344, "step": 28120 }, { "epoch": 8.092635212888377, "grad_norm": 1.1054296493530273, "learning_rate": 0.0018381472957422324, "loss": 0.7434, "step": 28130 }, { "epoch": 8.095512082853855, "grad_norm": 1.065529465675354, "learning_rate": 0.001838089758342923, "loss": 0.757, "step": 28140 }, { "epoch": 8.098388952819333, "grad_norm": 1.3134580850601196, "learning_rate": 0.0018380322209436133, "loss": 0.8775, "step": 28150 }, { "epoch": 8.10126582278481, "grad_norm": 1.1497862339019775, "learning_rate": 0.0018379746835443039, "loss": 0.89, "step": 28160 }, { "epoch": 8.104142692750287, "grad_norm": 1.9116740226745605, "learning_rate": 0.0018379171461449944, "loss": 0.8831, "step": 28170 }, { "epoch": 8.107019562715765, "grad_norm": 1.1110037565231323, "learning_rate": 0.0018378596087456848, "loss": 0.7412, "step": 28180 }, { "epoch": 8.109896432681243, "grad_norm": 1.3593308925628662, "learning_rate": 0.0018378020713463751, "loss": 0.9285, "step": 28190 }, { "epoch": 8.11277330264672, "grad_norm": 1.7339872121810913, "learning_rate": 0.0018377445339470655, "loss": 0.761, "step": 28200 }, { "epoch": 8.115650172612199, "grad_norm": 1.754722237586975, "learning_rate": 0.001837686996547756, "loss": 0.9064, "step": 28210 }, { "epoch": 8.118527042577675, "grad_norm": 0.9037410616874695, "learning_rate": 0.0018376294591484466, "loss": 0.692, "step": 28220 }, { "epoch": 8.121403912543153, "grad_norm": 1.5128544569015503, "learning_rate": 0.001837571921749137, "loss": 0.7005, "step": 28230 }, { "epoch": 8.124280782508631, "grad_norm": 1.1229854822158813, "learning_rate": 0.0018375143843498275, "loss": 0.8564, "step": 28240 }, { "epoch": 8.127157652474109, "grad_norm": 1.97454035282135, "learning_rate": 0.0018374568469505179, "loss": 0.6844, "step": 28250 }, { "epoch": 8.130034522439585, "grad_norm": 1.6559849977493286, "learning_rate": 0.0018373993095512082, "loss": 0.6505, "step": 28260 }, { "epoch": 8.132911392405063, "grad_norm": 1.2266103029251099, "learning_rate": 0.0018373417721518988, "loss": 0.8367, "step": 28270 }, { "epoch": 8.135788262370541, "grad_norm": 2.2028274536132812, "learning_rate": 0.0018372842347525893, "loss": 1.0446, "step": 28280 }, { "epoch": 8.138665132336019, "grad_norm": 1.1911685466766357, "learning_rate": 0.0018372266973532797, "loss": 0.7193, "step": 28290 }, { "epoch": 8.141542002301495, "grad_norm": 1.3003416061401367, "learning_rate": 0.0018371691599539702, "loss": 0.7142, "step": 28300 }, { "epoch": 8.144418872266973, "grad_norm": 1.0305830240249634, "learning_rate": 0.0018371116225546604, "loss": 0.6335, "step": 28310 }, { "epoch": 8.147295742232451, "grad_norm": 1.7524652481079102, "learning_rate": 0.001837054085155351, "loss": 0.8525, "step": 28320 }, { "epoch": 8.15017261219793, "grad_norm": 2.207526922225952, "learning_rate": 0.0018369965477560415, "loss": 0.6421, "step": 28330 }, { "epoch": 8.153049482163405, "grad_norm": 1.6853011846542358, "learning_rate": 0.0018369390103567319, "loss": 0.8596, "step": 28340 }, { "epoch": 8.155926352128883, "grad_norm": 0.7596163749694824, "learning_rate": 0.0018368814729574224, "loss": 0.6435, "step": 28350 }, { "epoch": 8.158803222094361, "grad_norm": 2.2789435386657715, "learning_rate": 0.001836823935558113, "loss": 0.8267, "step": 28360 }, { "epoch": 8.16168009205984, "grad_norm": 1.6968554258346558, "learning_rate": 0.0018367663981588031, "loss": 0.7142, "step": 28370 }, { "epoch": 8.164556962025316, "grad_norm": 0.8369816541671753, "learning_rate": 0.0018367088607594937, "loss": 0.8463, "step": 28380 }, { "epoch": 8.167433831990794, "grad_norm": 1.1027629375457764, "learning_rate": 0.0018366513233601842, "loss": 0.703, "step": 28390 }, { "epoch": 8.170310701956272, "grad_norm": 1.7330313920974731, "learning_rate": 0.0018365937859608746, "loss": 0.7915, "step": 28400 }, { "epoch": 8.17318757192175, "grad_norm": 1.164069414138794, "learning_rate": 0.0018365362485615652, "loss": 0.8388, "step": 28410 }, { "epoch": 8.176064441887227, "grad_norm": 2.282851219177246, "learning_rate": 0.0018364787111622555, "loss": 0.8456, "step": 28420 }, { "epoch": 8.178941311852704, "grad_norm": 1.2070153951644897, "learning_rate": 0.0018364211737629458, "loss": 0.8045, "step": 28430 }, { "epoch": 8.181818181818182, "grad_norm": 1.2730317115783691, "learning_rate": 0.0018363636363636364, "loss": 0.659, "step": 28440 }, { "epoch": 8.18469505178366, "grad_norm": 1.9524853229522705, "learning_rate": 0.0018363060989643268, "loss": 0.9438, "step": 28450 }, { "epoch": 8.187571921749138, "grad_norm": 0.7821930050849915, "learning_rate": 0.0018362485615650173, "loss": 0.7941, "step": 28460 }, { "epoch": 8.190448791714614, "grad_norm": 0.9886065125465393, "learning_rate": 0.0018361910241657079, "loss": 0.7474, "step": 28470 }, { "epoch": 8.193325661680092, "grad_norm": 1.3340349197387695, "learning_rate": 0.0018361334867663982, "loss": 0.6443, "step": 28480 }, { "epoch": 8.19620253164557, "grad_norm": 1.78147554397583, "learning_rate": 0.0018360759493670886, "loss": 0.9379, "step": 28490 }, { "epoch": 8.199079401611048, "grad_norm": 1.3916758298873901, "learning_rate": 0.0018360184119677791, "loss": 0.7655, "step": 28500 }, { "epoch": 8.201956271576524, "grad_norm": 1.4686082601547241, "learning_rate": 0.0018359608745684695, "loss": 0.602, "step": 28510 }, { "epoch": 8.204833141542002, "grad_norm": 1.1195590496063232, "learning_rate": 0.00183590333716916, "loss": 0.7051, "step": 28520 }, { "epoch": 8.20771001150748, "grad_norm": 0.7629860043525696, "learning_rate": 0.0018358457997698504, "loss": 0.7555, "step": 28530 }, { "epoch": 8.210586881472958, "grad_norm": 1.4739998579025269, "learning_rate": 0.001835788262370541, "loss": 0.6729, "step": 28540 }, { "epoch": 8.213463751438436, "grad_norm": 1.482694149017334, "learning_rate": 0.0018357307249712313, "loss": 0.7818, "step": 28550 }, { "epoch": 8.216340621403912, "grad_norm": 1.608647108078003, "learning_rate": 0.0018356731875719217, "loss": 0.8833, "step": 28560 }, { "epoch": 8.21921749136939, "grad_norm": 1.707637071609497, "learning_rate": 0.0018356156501726122, "loss": 0.6781, "step": 28570 }, { "epoch": 8.222094361334868, "grad_norm": 1.0956863164901733, "learning_rate": 0.0018355581127733028, "loss": 0.6699, "step": 28580 }, { "epoch": 8.224971231300346, "grad_norm": 2.0922930240631104, "learning_rate": 0.0018355005753739931, "loss": 0.8002, "step": 28590 }, { "epoch": 8.227848101265822, "grad_norm": 1.9938383102416992, "learning_rate": 0.0018354430379746837, "loss": 0.8917, "step": 28600 }, { "epoch": 8.2307249712313, "grad_norm": 0.7997027039527893, "learning_rate": 0.001835385500575374, "loss": 0.6158, "step": 28610 }, { "epoch": 8.233601841196778, "grad_norm": 1.6779471635818481, "learning_rate": 0.0018353279631760644, "loss": 0.8508, "step": 28620 }, { "epoch": 8.236478711162256, "grad_norm": 2.1293399333953857, "learning_rate": 0.001835270425776755, "loss": 1.0197, "step": 28630 }, { "epoch": 8.239355581127732, "grad_norm": 0.7005317807197571, "learning_rate": 0.0018352128883774455, "loss": 0.7599, "step": 28640 }, { "epoch": 8.24223245109321, "grad_norm": 1.1987130641937256, "learning_rate": 0.0018351553509781359, "loss": 1.0041, "step": 28650 }, { "epoch": 8.245109321058688, "grad_norm": 3.8146004676818848, "learning_rate": 0.0018350978135788264, "loss": 0.8218, "step": 28660 }, { "epoch": 8.247986191024166, "grad_norm": 1.9492127895355225, "learning_rate": 0.0018350402761795166, "loss": 0.7011, "step": 28670 }, { "epoch": 8.250863060989643, "grad_norm": 1.1026982069015503, "learning_rate": 0.0018349827387802071, "loss": 0.8142, "step": 28680 }, { "epoch": 8.25373993095512, "grad_norm": 2.4310100078582764, "learning_rate": 0.0018349252013808977, "loss": 0.7345, "step": 28690 }, { "epoch": 8.256616800920598, "grad_norm": 1.781389832496643, "learning_rate": 0.001834867663981588, "loss": 0.7304, "step": 28700 }, { "epoch": 8.259493670886076, "grad_norm": 1.4661245346069336, "learning_rate": 0.0018348101265822786, "loss": 0.9464, "step": 28710 }, { "epoch": 8.262370540851553, "grad_norm": 0.9246181845664978, "learning_rate": 0.0018347525891829692, "loss": 0.887, "step": 28720 }, { "epoch": 8.26524741081703, "grad_norm": 1.3104420900344849, "learning_rate": 0.0018346950517836593, "loss": 0.7264, "step": 28730 }, { "epoch": 8.268124280782509, "grad_norm": 1.1407403945922852, "learning_rate": 0.0018346375143843499, "loss": 0.7052, "step": 28740 }, { "epoch": 8.271001150747987, "grad_norm": 1.0541510581970215, "learning_rate": 0.0018345799769850404, "loss": 0.8101, "step": 28750 }, { "epoch": 8.273878020713465, "grad_norm": 0.8686559796333313, "learning_rate": 0.0018345224395857308, "loss": 0.9465, "step": 28760 }, { "epoch": 8.27675489067894, "grad_norm": 1.042746663093567, "learning_rate": 0.0018344649021864213, "loss": 0.8879, "step": 28770 }, { "epoch": 8.279631760644419, "grad_norm": 1.060921311378479, "learning_rate": 0.0018344073647871115, "loss": 0.8855, "step": 28780 }, { "epoch": 8.282508630609897, "grad_norm": 0.8233322501182556, "learning_rate": 0.001834349827387802, "loss": 0.8843, "step": 28790 }, { "epoch": 8.285385500575375, "grad_norm": 1.0186163187026978, "learning_rate": 0.0018342922899884926, "loss": 0.8569, "step": 28800 }, { "epoch": 8.288262370540851, "grad_norm": 1.6854857206344604, "learning_rate": 0.001834234752589183, "loss": 0.8205, "step": 28810 }, { "epoch": 8.291139240506329, "grad_norm": 1.7193243503570557, "learning_rate": 0.0018341772151898735, "loss": 0.838, "step": 28820 }, { "epoch": 8.294016110471807, "grad_norm": 1.8176639080047607, "learning_rate": 0.001834119677790564, "loss": 0.7704, "step": 28830 }, { "epoch": 8.296892980437285, "grad_norm": 1.4770944118499756, "learning_rate": 0.0018340621403912542, "loss": 0.7419, "step": 28840 }, { "epoch": 8.299769850402761, "grad_norm": 1.0537525415420532, "learning_rate": 0.0018340046029919448, "loss": 0.843, "step": 28850 }, { "epoch": 8.302646720368239, "grad_norm": 1.0968725681304932, "learning_rate": 0.0018339470655926353, "loss": 0.852, "step": 28860 }, { "epoch": 8.305523590333717, "grad_norm": 1.3248140811920166, "learning_rate": 0.0018338895281933257, "loss": 0.8943, "step": 28870 }, { "epoch": 8.308400460299195, "grad_norm": 1.105769395828247, "learning_rate": 0.0018338319907940162, "loss": 0.8777, "step": 28880 }, { "epoch": 8.311277330264671, "grad_norm": 1.0751581192016602, "learning_rate": 0.0018337744533947066, "loss": 0.8025, "step": 28890 }, { "epoch": 8.31415420023015, "grad_norm": 1.742035150527954, "learning_rate": 0.001833716915995397, "loss": 0.7851, "step": 28900 }, { "epoch": 8.317031070195627, "grad_norm": 0.6277735233306885, "learning_rate": 0.0018336593785960875, "loss": 0.8452, "step": 28910 }, { "epoch": 8.319907940161105, "grad_norm": 2.2407755851745605, "learning_rate": 0.0018336018411967778, "loss": 0.7612, "step": 28920 }, { "epoch": 8.322784810126583, "grad_norm": 2.553577423095703, "learning_rate": 0.0018335443037974684, "loss": 0.7391, "step": 28930 }, { "epoch": 8.32566168009206, "grad_norm": 1.2807927131652832, "learning_rate": 0.001833486766398159, "loss": 0.7112, "step": 28940 }, { "epoch": 8.328538550057537, "grad_norm": 0.9063690304756165, "learning_rate": 0.0018334292289988493, "loss": 1.0151, "step": 28950 }, { "epoch": 8.331415420023015, "grad_norm": 1.6850992441177368, "learning_rate": 0.0018333716915995397, "loss": 0.8235, "step": 28960 }, { "epoch": 8.334292289988493, "grad_norm": 2.372246503829956, "learning_rate": 0.0018333141542002302, "loss": 0.7615, "step": 28970 }, { "epoch": 8.33716915995397, "grad_norm": 1.3806582689285278, "learning_rate": 0.0018332566168009206, "loss": 0.6841, "step": 28980 }, { "epoch": 8.340046029919447, "grad_norm": 1.8372892141342163, "learning_rate": 0.0018331990794016111, "loss": 0.7055, "step": 28990 }, { "epoch": 8.342922899884925, "grad_norm": 1.2506179809570312, "learning_rate": 0.0018331415420023015, "loss": 0.8825, "step": 29000 }, { "epoch": 8.345799769850403, "grad_norm": 1.5719252824783325, "learning_rate": 0.001833084004602992, "loss": 0.7806, "step": 29010 }, { "epoch": 8.34867663981588, "grad_norm": 2.682512044906616, "learning_rate": 0.0018330264672036824, "loss": 0.9956, "step": 29020 }, { "epoch": 8.351553509781358, "grad_norm": 0.9110154509544373, "learning_rate": 0.0018329689298043727, "loss": 0.9901, "step": 29030 }, { "epoch": 8.354430379746836, "grad_norm": 1.0407670736312866, "learning_rate": 0.0018329113924050633, "loss": 0.7914, "step": 29040 }, { "epoch": 8.357307249712314, "grad_norm": 1.308397889137268, "learning_rate": 0.0018328538550057539, "loss": 0.9486, "step": 29050 }, { "epoch": 8.36018411967779, "grad_norm": 1.1846086978912354, "learning_rate": 0.0018327963176064442, "loss": 0.7606, "step": 29060 }, { "epoch": 8.363060989643268, "grad_norm": 0.9856820702552795, "learning_rate": 0.0018327387802071348, "loss": 0.9308, "step": 29070 }, { "epoch": 8.365937859608746, "grad_norm": 2.0349717140197754, "learning_rate": 0.0018326812428078251, "loss": 0.6249, "step": 29080 }, { "epoch": 8.368814729574224, "grad_norm": 1.2142008543014526, "learning_rate": 0.0018326237054085155, "loss": 0.6878, "step": 29090 }, { "epoch": 8.371691599539702, "grad_norm": 1.37223219871521, "learning_rate": 0.001832566168009206, "loss": 0.8619, "step": 29100 }, { "epoch": 8.374568469505178, "grad_norm": 1.6304481029510498, "learning_rate": 0.0018325086306098964, "loss": 0.9982, "step": 29110 }, { "epoch": 8.377445339470656, "grad_norm": 1.3570595979690552, "learning_rate": 0.001832451093210587, "loss": 0.7794, "step": 29120 }, { "epoch": 8.380322209436134, "grad_norm": 1.2945966720581055, "learning_rate": 0.0018323935558112775, "loss": 0.796, "step": 29130 }, { "epoch": 8.383199079401612, "grad_norm": 1.3137257099151611, "learning_rate": 0.0018323360184119676, "loss": 0.8619, "step": 29140 }, { "epoch": 8.386075949367088, "grad_norm": 2.7147624492645264, "learning_rate": 0.0018322784810126582, "loss": 0.8445, "step": 29150 }, { "epoch": 8.388952819332566, "grad_norm": 1.4790574312210083, "learning_rate": 0.0018322209436133488, "loss": 0.9148, "step": 29160 }, { "epoch": 8.391829689298044, "grad_norm": 1.3982446193695068, "learning_rate": 0.0018321634062140391, "loss": 0.8131, "step": 29170 }, { "epoch": 8.394706559263522, "grad_norm": 1.4066474437713623, "learning_rate": 0.0018321058688147297, "loss": 0.7919, "step": 29180 }, { "epoch": 8.397583429228998, "grad_norm": 10.175410270690918, "learning_rate": 0.0018320483314154202, "loss": 1.0048, "step": 29190 }, { "epoch": 8.400460299194476, "grad_norm": 2.5128464698791504, "learning_rate": 0.0018319907940161104, "loss": 0.9425, "step": 29200 }, { "epoch": 8.403337169159954, "grad_norm": 1.09033203125, "learning_rate": 0.001831933256616801, "loss": 0.9076, "step": 29210 }, { "epoch": 8.406214039125432, "grad_norm": 0.8826109766960144, "learning_rate": 0.0018318757192174913, "loss": 0.7417, "step": 29220 }, { "epoch": 8.409090909090908, "grad_norm": 1.9564279317855835, "learning_rate": 0.0018318181818181819, "loss": 0.7751, "step": 29230 }, { "epoch": 8.411967779056386, "grad_norm": 2.742443323135376, "learning_rate": 0.0018317606444188724, "loss": 0.6887, "step": 29240 }, { "epoch": 8.414844649021864, "grad_norm": 0.9618980288505554, "learning_rate": 0.0018317031070195628, "loss": 0.8174, "step": 29250 }, { "epoch": 8.417721518987342, "grad_norm": 0.9466444253921509, "learning_rate": 0.0018316455696202531, "loss": 0.8629, "step": 29260 }, { "epoch": 8.420598388952818, "grad_norm": 1.3364439010620117, "learning_rate": 0.0018315880322209437, "loss": 0.7781, "step": 29270 }, { "epoch": 8.423475258918296, "grad_norm": 1.883719801902771, "learning_rate": 0.001831530494821634, "loss": 0.968, "step": 29280 }, { "epoch": 8.426352128883774, "grad_norm": 1.9239585399627686, "learning_rate": 0.0018314729574223246, "loss": 0.8315, "step": 29290 }, { "epoch": 8.429228998849252, "grad_norm": 0.9598681926727295, "learning_rate": 0.0018314154200230151, "loss": 0.7555, "step": 29300 }, { "epoch": 8.43210586881473, "grad_norm": 1.050160527229309, "learning_rate": 0.0018313578826237055, "loss": 1.0054, "step": 29310 }, { "epoch": 8.434982738780207, "grad_norm": 1.3252346515655518, "learning_rate": 0.0018313003452243958, "loss": 0.8629, "step": 29320 }, { "epoch": 8.437859608745685, "grad_norm": 1.2387768030166626, "learning_rate": 0.0018312428078250864, "loss": 0.6754, "step": 29330 }, { "epoch": 8.440736478711163, "grad_norm": 1.6483831405639648, "learning_rate": 0.0018311852704257768, "loss": 0.8907, "step": 29340 }, { "epoch": 8.44361334867664, "grad_norm": 1.0171812772750854, "learning_rate": 0.0018311277330264673, "loss": 0.7084, "step": 29350 }, { "epoch": 8.446490218642117, "grad_norm": 1.9611561298370361, "learning_rate": 0.0018310701956271577, "loss": 0.8463, "step": 29360 }, { "epoch": 8.449367088607595, "grad_norm": 1.2205286026000977, "learning_rate": 0.0018310126582278482, "loss": 0.8913, "step": 29370 }, { "epoch": 8.452243958573073, "grad_norm": 0.8564532995223999, "learning_rate": 0.0018309551208285386, "loss": 0.8866, "step": 29380 }, { "epoch": 8.45512082853855, "grad_norm": 1.4204715490341187, "learning_rate": 0.001830897583429229, "loss": 0.8085, "step": 29390 }, { "epoch": 8.457997698504027, "grad_norm": 1.1682863235473633, "learning_rate": 0.0018308400460299195, "loss": 0.9366, "step": 29400 }, { "epoch": 8.460874568469505, "grad_norm": 2.3973724842071533, "learning_rate": 0.00183078250863061, "loss": 0.6804, "step": 29410 }, { "epoch": 8.463751438434983, "grad_norm": 1.6135131120681763, "learning_rate": 0.0018307249712313004, "loss": 0.9407, "step": 29420 }, { "epoch": 8.46662830840046, "grad_norm": 1.5902588367462158, "learning_rate": 0.001830667433831991, "loss": 0.9431, "step": 29430 }, { "epoch": 8.469505178365939, "grad_norm": 1.3812003135681152, "learning_rate": 0.0018306098964326813, "loss": 1.0102, "step": 29440 }, { "epoch": 8.472382048331415, "grad_norm": 1.332453727722168, "learning_rate": 0.0018305523590333717, "loss": 1.0075, "step": 29450 }, { "epoch": 8.475258918296893, "grad_norm": 1.1713465452194214, "learning_rate": 0.0018304948216340622, "loss": 0.7705, "step": 29460 }, { "epoch": 8.478135788262371, "grad_norm": 1.0733591318130493, "learning_rate": 0.0018304372842347526, "loss": 0.8669, "step": 29470 }, { "epoch": 8.481012658227849, "grad_norm": 1.4366106986999512, "learning_rate": 0.0018303797468354431, "loss": 0.7314, "step": 29480 }, { "epoch": 8.483889528193325, "grad_norm": 2.2067835330963135, "learning_rate": 0.0018303222094361337, "loss": 0.9192, "step": 29490 }, { "epoch": 8.486766398158803, "grad_norm": 1.6969308853149414, "learning_rate": 0.0018302646720368238, "loss": 0.8258, "step": 29500 }, { "epoch": 8.489643268124281, "grad_norm": 0.9831934571266174, "learning_rate": 0.0018302071346375144, "loss": 0.8338, "step": 29510 }, { "epoch": 8.492520138089759, "grad_norm": 1.761000633239746, "learning_rate": 0.001830149597238205, "loss": 0.8477, "step": 29520 }, { "epoch": 8.495397008055235, "grad_norm": 1.8830769062042236, "learning_rate": 0.0018300920598388953, "loss": 0.7618, "step": 29530 }, { "epoch": 8.498273878020713, "grad_norm": 1.6620934009552002, "learning_rate": 0.0018300345224395859, "loss": 0.6765, "step": 29540 }, { "epoch": 8.501150747986191, "grad_norm": 1.0558830499649048, "learning_rate": 0.0018299769850402764, "loss": 0.8216, "step": 29550 }, { "epoch": 8.50402761795167, "grad_norm": 1.008965015411377, "learning_rate": 0.0018299194476409666, "loss": 0.8966, "step": 29560 }, { "epoch": 8.506904487917145, "grad_norm": 1.0929113626480103, "learning_rate": 0.0018298619102416571, "loss": 0.8883, "step": 29570 }, { "epoch": 8.509781357882623, "grad_norm": 1.561517357826233, "learning_rate": 0.0018298043728423475, "loss": 0.7281, "step": 29580 }, { "epoch": 8.512658227848101, "grad_norm": 1.2137272357940674, "learning_rate": 0.001829746835443038, "loss": 0.7421, "step": 29590 }, { "epoch": 8.51553509781358, "grad_norm": 1.505814552307129, "learning_rate": 0.0018296892980437286, "loss": 0.7964, "step": 29600 }, { "epoch": 8.518411967779056, "grad_norm": 1.5452558994293213, "learning_rate": 0.0018296317606444187, "loss": 0.9026, "step": 29610 }, { "epoch": 8.521288837744533, "grad_norm": 1.457700490951538, "learning_rate": 0.0018295742232451093, "loss": 0.8722, "step": 29620 }, { "epoch": 8.524165707710011, "grad_norm": 1.5183546543121338, "learning_rate": 0.0018295166858457999, "loss": 0.6174, "step": 29630 }, { "epoch": 8.52704257767549, "grad_norm": 0.7331917881965637, "learning_rate": 0.0018294591484464902, "loss": 0.867, "step": 29640 }, { "epoch": 8.529919447640967, "grad_norm": 3.2992758750915527, "learning_rate": 0.0018294016110471808, "loss": 0.8818, "step": 29650 }, { "epoch": 8.532796317606444, "grad_norm": 1.5094389915466309, "learning_rate": 0.0018293440736478713, "loss": 0.9861, "step": 29660 }, { "epoch": 8.535673187571922, "grad_norm": 1.2120412588119507, "learning_rate": 0.0018292865362485615, "loss": 0.6442, "step": 29670 }, { "epoch": 8.5385500575374, "grad_norm": 1.176132321357727, "learning_rate": 0.001829228998849252, "loss": 0.6653, "step": 29680 }, { "epoch": 8.541426927502878, "grad_norm": 1.6436854600906372, "learning_rate": 0.0018291714614499424, "loss": 0.7251, "step": 29690 }, { "epoch": 8.544303797468354, "grad_norm": 0.7459123134613037, "learning_rate": 0.001829113924050633, "loss": 0.8893, "step": 29700 }, { "epoch": 8.547180667433832, "grad_norm": 1.1153957843780518, "learning_rate": 0.0018290563866513235, "loss": 0.7134, "step": 29710 }, { "epoch": 8.55005753739931, "grad_norm": 1.4252715110778809, "learning_rate": 0.0018289988492520138, "loss": 0.7466, "step": 29720 }, { "epoch": 8.552934407364788, "grad_norm": 2.182236433029175, "learning_rate": 0.0018289413118527042, "loss": 0.9275, "step": 29730 }, { "epoch": 8.555811277330264, "grad_norm": 1.089847207069397, "learning_rate": 0.0018288837744533948, "loss": 0.7809, "step": 29740 }, { "epoch": 8.558688147295742, "grad_norm": 1.0835527181625366, "learning_rate": 0.001828826237054085, "loss": 0.8236, "step": 29750 }, { "epoch": 8.56156501726122, "grad_norm": 1.9403541088104248, "learning_rate": 0.0018287686996547757, "loss": 0.8134, "step": 29760 }, { "epoch": 8.564441887226698, "grad_norm": 1.2199541330337524, "learning_rate": 0.0018287111622554662, "loss": 0.8083, "step": 29770 }, { "epoch": 8.567318757192176, "grad_norm": 1.129517674446106, "learning_rate": 0.0018286536248561566, "loss": 0.8988, "step": 29780 }, { "epoch": 8.570195627157652, "grad_norm": 1.4350050687789917, "learning_rate": 0.001828596087456847, "loss": 0.8217, "step": 29790 }, { "epoch": 8.57307249712313, "grad_norm": 1.1368675231933594, "learning_rate": 0.0018285385500575373, "loss": 0.6973, "step": 29800 }, { "epoch": 8.575949367088608, "grad_norm": 1.2349451780319214, "learning_rate": 0.0018284810126582278, "loss": 0.8368, "step": 29810 }, { "epoch": 8.578826237054086, "grad_norm": 1.8184541463851929, "learning_rate": 0.0018284234752589184, "loss": 0.8413, "step": 29820 }, { "epoch": 8.581703107019562, "grad_norm": 0.8301032781600952, "learning_rate": 0.0018283659378596087, "loss": 0.6349, "step": 29830 }, { "epoch": 8.58457997698504, "grad_norm": 2.1172239780426025, "learning_rate": 0.0018283084004602993, "loss": 0.7668, "step": 29840 }, { "epoch": 8.587456846950518, "grad_norm": 2.550769805908203, "learning_rate": 0.0018282508630609897, "loss": 0.903, "step": 29850 }, { "epoch": 8.590333716915996, "grad_norm": 1.3577672243118286, "learning_rate": 0.00182819332566168, "loss": 0.8754, "step": 29860 }, { "epoch": 8.593210586881472, "grad_norm": 0.7620074152946472, "learning_rate": 0.0018281357882623706, "loss": 0.6911, "step": 29870 }, { "epoch": 8.59608745684695, "grad_norm": 1.2359826564788818, "learning_rate": 0.0018280782508630611, "loss": 0.6833, "step": 29880 }, { "epoch": 8.598964326812428, "grad_norm": 1.1350481510162354, "learning_rate": 0.0018280207134637515, "loss": 0.6613, "step": 29890 }, { "epoch": 8.601841196777906, "grad_norm": 1.5581690073013306, "learning_rate": 0.001827963176064442, "loss": 1.1472, "step": 29900 }, { "epoch": 8.604718066743382, "grad_norm": 1.238647222518921, "learning_rate": 0.0018279056386651322, "loss": 0.8886, "step": 29910 }, { "epoch": 8.60759493670886, "grad_norm": 2.078458547592163, "learning_rate": 0.0018278481012658227, "loss": 0.8379, "step": 29920 }, { "epoch": 8.610471806674338, "grad_norm": 1.293851613998413, "learning_rate": 0.0018277905638665133, "loss": 0.7506, "step": 29930 }, { "epoch": 8.613348676639816, "grad_norm": 1.5873796939849854, "learning_rate": 0.0018277330264672037, "loss": 0.8571, "step": 29940 }, { "epoch": 8.616225546605293, "grad_norm": 2.6311697959899902, "learning_rate": 0.0018276754890678942, "loss": 0.8752, "step": 29950 }, { "epoch": 8.61910241657077, "grad_norm": 1.7424522638320923, "learning_rate": 0.0018276179516685848, "loss": 1.0912, "step": 29960 }, { "epoch": 8.621979286536249, "grad_norm": 1.2921173572540283, "learning_rate": 0.001827560414269275, "loss": 0.8792, "step": 29970 }, { "epoch": 8.624856156501727, "grad_norm": 0.8019508719444275, "learning_rate": 0.0018275028768699655, "loss": 0.9399, "step": 29980 }, { "epoch": 8.627733026467205, "grad_norm": 2.3298404216766357, "learning_rate": 0.001827445339470656, "loss": 0.7044, "step": 29990 }, { "epoch": 8.63060989643268, "grad_norm": 0.7848712801933289, "learning_rate": 0.0018273878020713464, "loss": 0.7889, "step": 30000 }, { "epoch": 8.633486766398159, "grad_norm": 2.687455892562866, "learning_rate": 0.001827330264672037, "loss": 0.9679, "step": 30010 }, { "epoch": 8.636363636363637, "grad_norm": 1.7008262872695923, "learning_rate": 0.0018272727272727275, "loss": 0.9502, "step": 30020 }, { "epoch": 8.639240506329115, "grad_norm": 1.7733489274978638, "learning_rate": 0.0018272151898734176, "loss": 0.7496, "step": 30030 }, { "epoch": 8.64211737629459, "grad_norm": 1.545742154121399, "learning_rate": 0.0018271576524741082, "loss": 0.9171, "step": 30040 }, { "epoch": 8.644994246260069, "grad_norm": 1.9069219827651978, "learning_rate": 0.0018271001150747986, "loss": 0.8861, "step": 30050 }, { "epoch": 8.647871116225547, "grad_norm": 1.5136370658874512, "learning_rate": 0.0018270425776754891, "loss": 0.862, "step": 30060 }, { "epoch": 8.650747986191025, "grad_norm": 1.473317265510559, "learning_rate": 0.0018269850402761797, "loss": 0.7502, "step": 30070 }, { "epoch": 8.653624856156501, "grad_norm": 0.8548181653022766, "learning_rate": 0.00182692750287687, "loss": 0.7756, "step": 30080 }, { "epoch": 8.656501726121979, "grad_norm": 1.6288495063781738, "learning_rate": 0.0018268699654775604, "loss": 0.7886, "step": 30090 }, { "epoch": 8.659378596087457, "grad_norm": 1.6361603736877441, "learning_rate": 0.001826812428078251, "loss": 0.9271, "step": 30100 }, { "epoch": 8.662255466052935, "grad_norm": 1.46836256980896, "learning_rate": 0.0018267548906789413, "loss": 0.7539, "step": 30110 }, { "epoch": 8.665132336018411, "grad_norm": 1.4635810852050781, "learning_rate": 0.0018266973532796319, "loss": 0.7023, "step": 30120 }, { "epoch": 8.66800920598389, "grad_norm": 0.7904888987541199, "learning_rate": 0.0018266398158803224, "loss": 0.9001, "step": 30130 }, { "epoch": 8.670886075949367, "grad_norm": 1.4771782159805298, "learning_rate": 0.0018265822784810128, "loss": 0.7497, "step": 30140 }, { "epoch": 8.673762945914845, "grad_norm": 2.304784059524536, "learning_rate": 0.001826524741081703, "loss": 0.9405, "step": 30150 }, { "epoch": 8.676639815880321, "grad_norm": 0.896144688129425, "learning_rate": 0.0018264672036823935, "loss": 0.8148, "step": 30160 }, { "epoch": 8.6795166858458, "grad_norm": 1.8908345699310303, "learning_rate": 0.001826409666283084, "loss": 0.818, "step": 30170 }, { "epoch": 8.682393555811277, "grad_norm": 1.2691435813903809, "learning_rate": 0.0018263521288837746, "loss": 1.1851, "step": 30180 }, { "epoch": 8.685270425776755, "grad_norm": 1.6400020122528076, "learning_rate": 0.001826294591484465, "loss": 0.8964, "step": 30190 }, { "epoch": 8.688147295742233, "grad_norm": 0.8981714844703674, "learning_rate": 0.0018262370540851555, "loss": 0.9357, "step": 30200 }, { "epoch": 8.69102416570771, "grad_norm": 1.136505365371704, "learning_rate": 0.0018261795166858458, "loss": 0.6954, "step": 30210 }, { "epoch": 8.693901035673187, "grad_norm": 2.4913251399993896, "learning_rate": 0.0018261219792865362, "loss": 0.925, "step": 30220 }, { "epoch": 8.696777905638665, "grad_norm": 2.387626886367798, "learning_rate": 0.0018260644418872268, "loss": 0.8949, "step": 30230 }, { "epoch": 8.699654775604143, "grad_norm": 0.7610844969749451, "learning_rate": 0.0018260069044879173, "loss": 0.8346, "step": 30240 }, { "epoch": 8.70253164556962, "grad_norm": 1.744353175163269, "learning_rate": 0.0018259493670886077, "loss": 0.8085, "step": 30250 }, { "epoch": 8.705408515535098, "grad_norm": 0.8312823176383972, "learning_rate": 0.0018258918296892982, "loss": 0.8106, "step": 30260 }, { "epoch": 8.708285385500576, "grad_norm": 1.0267713069915771, "learning_rate": 0.0018258342922899884, "loss": 0.7512, "step": 30270 }, { "epoch": 8.711162255466053, "grad_norm": 1.774261474609375, "learning_rate": 0.001825776754890679, "loss": 0.9248, "step": 30280 }, { "epoch": 8.71403912543153, "grad_norm": 1.3608990907669067, "learning_rate": 0.0018257192174913695, "loss": 0.837, "step": 30290 }, { "epoch": 8.716915995397008, "grad_norm": 1.6517411470413208, "learning_rate": 0.0018256616800920598, "loss": 0.7884, "step": 30300 }, { "epoch": 8.719792865362486, "grad_norm": 1.7385817766189575, "learning_rate": 0.0018256041426927504, "loss": 0.932, "step": 30310 }, { "epoch": 8.722669735327964, "grad_norm": 2.007899045944214, "learning_rate": 0.001825546605293441, "loss": 1.0124, "step": 30320 }, { "epoch": 8.725546605293442, "grad_norm": 0.9572886228561401, "learning_rate": 0.001825489067894131, "loss": 0.9927, "step": 30330 }, { "epoch": 8.728423475258918, "grad_norm": 1.5545563697814941, "learning_rate": 0.0018254315304948217, "loss": 0.8893, "step": 30340 }, { "epoch": 8.731300345224396, "grad_norm": 0.6469119191169739, "learning_rate": 0.0018253739930955122, "loss": 0.9254, "step": 30350 }, { "epoch": 8.734177215189874, "grad_norm": 1.237668514251709, "learning_rate": 0.0018253164556962026, "loss": 0.758, "step": 30360 }, { "epoch": 8.737054085155352, "grad_norm": 1.6859073638916016, "learning_rate": 0.0018252589182968931, "loss": 0.7503, "step": 30370 }, { "epoch": 8.739930955120828, "grad_norm": 2.1890523433685303, "learning_rate": 0.0018252013808975833, "loss": 0.8224, "step": 30380 }, { "epoch": 8.742807825086306, "grad_norm": 1.289080262184143, "learning_rate": 0.0018251438434982738, "loss": 0.6382, "step": 30390 }, { "epoch": 8.745684695051784, "grad_norm": 1.423980951309204, "learning_rate": 0.0018250863060989644, "loss": 0.8438, "step": 30400 }, { "epoch": 8.748561565017262, "grad_norm": 1.2075676918029785, "learning_rate": 0.0018250287686996547, "loss": 0.803, "step": 30410 }, { "epoch": 8.751438434982738, "grad_norm": 0.871540367603302, "learning_rate": 0.0018249712313003453, "loss": 0.847, "step": 30420 }, { "epoch": 8.754315304948216, "grad_norm": 1.1286330223083496, "learning_rate": 0.0018249136939010359, "loss": 0.816, "step": 30430 }, { "epoch": 8.757192174913694, "grad_norm": 1.298512578010559, "learning_rate": 0.001824856156501726, "loss": 0.6499, "step": 30440 }, { "epoch": 8.760069044879172, "grad_norm": 1.0762673616409302, "learning_rate": 0.0018247986191024166, "loss": 0.8894, "step": 30450 }, { "epoch": 8.762945914844648, "grad_norm": 1.998847484588623, "learning_rate": 0.0018247410817031071, "loss": 0.8683, "step": 30460 }, { "epoch": 8.765822784810126, "grad_norm": 1.9354469776153564, "learning_rate": 0.0018246835443037975, "loss": 0.8528, "step": 30470 }, { "epoch": 8.768699654775604, "grad_norm": 2.2475221157073975, "learning_rate": 0.001824626006904488, "loss": 0.9238, "step": 30480 }, { "epoch": 8.771576524741082, "grad_norm": 1.3439334630966187, "learning_rate": 0.0018245684695051784, "loss": 0.9276, "step": 30490 }, { "epoch": 8.774453394706558, "grad_norm": 1.350687026977539, "learning_rate": 0.0018245109321058687, "loss": 0.8086, "step": 30500 }, { "epoch": 8.777330264672036, "grad_norm": 1.2263678312301636, "learning_rate": 0.0018244533947065593, "loss": 0.7076, "step": 30510 }, { "epoch": 8.780207134637514, "grad_norm": 0.9161993861198425, "learning_rate": 0.0018243958573072496, "loss": 0.8789, "step": 30520 }, { "epoch": 8.783084004602992, "grad_norm": 2.069974660873413, "learning_rate": 0.0018243383199079402, "loss": 1.0584, "step": 30530 }, { "epoch": 8.78596087456847, "grad_norm": 2.2347772121429443, "learning_rate": 0.0018242807825086308, "loss": 0.9617, "step": 30540 }, { "epoch": 8.788837744533947, "grad_norm": 1.1024234294891357, "learning_rate": 0.0018242232451093211, "loss": 0.8264, "step": 30550 }, { "epoch": 8.791714614499424, "grad_norm": 1.0356281995773315, "learning_rate": 0.0018241657077100115, "loss": 0.9404, "step": 30560 }, { "epoch": 8.794591484464902, "grad_norm": 2.4274778366088867, "learning_rate": 0.001824108170310702, "loss": 0.7822, "step": 30570 }, { "epoch": 8.79746835443038, "grad_norm": 0.6162936091423035, "learning_rate": 0.0018240506329113924, "loss": 0.9441, "step": 30580 }, { "epoch": 8.800345224395857, "grad_norm": 0.948054313659668, "learning_rate": 0.001823993095512083, "loss": 0.8342, "step": 30590 }, { "epoch": 8.803222094361335, "grad_norm": 1.2159851789474487, "learning_rate": 0.0018239355581127733, "loss": 0.9771, "step": 30600 }, { "epoch": 8.806098964326813, "grad_norm": 1.3762497901916504, "learning_rate": 0.0018238780207134638, "loss": 0.7728, "step": 30610 }, { "epoch": 8.80897583429229, "grad_norm": 0.984481930732727, "learning_rate": 0.0018238204833141542, "loss": 0.7232, "step": 30620 }, { "epoch": 8.811852704257767, "grad_norm": 1.7959823608398438, "learning_rate": 0.0018237629459148445, "loss": 0.8402, "step": 30630 }, { "epoch": 8.814729574223245, "grad_norm": 1.387222409248352, "learning_rate": 0.001823705408515535, "loss": 0.9606, "step": 30640 }, { "epoch": 8.817606444188723, "grad_norm": 1.2293859720230103, "learning_rate": 0.0018236478711162257, "loss": 0.6626, "step": 30650 }, { "epoch": 8.8204833141542, "grad_norm": 1.5982797145843506, "learning_rate": 0.001823590333716916, "loss": 0.8609, "step": 30660 }, { "epoch": 8.823360184119679, "grad_norm": 1.1393017768859863, "learning_rate": 0.0018235327963176066, "loss": 0.8327, "step": 30670 }, { "epoch": 8.826237054085155, "grad_norm": 0.5007483959197998, "learning_rate": 0.001823475258918297, "loss": 0.8095, "step": 30680 }, { "epoch": 8.829113924050633, "grad_norm": 0.9157325029373169, "learning_rate": 0.0018234177215189873, "loss": 0.7127, "step": 30690 }, { "epoch": 8.83199079401611, "grad_norm": 1.408499002456665, "learning_rate": 0.0018233601841196778, "loss": 1.0288, "step": 30700 }, { "epoch": 8.834867663981589, "grad_norm": 0.5990835428237915, "learning_rate": 0.0018233026467203684, "loss": 0.7153, "step": 30710 }, { "epoch": 8.837744533947065, "grad_norm": 1.509465217590332, "learning_rate": 0.0018232451093210587, "loss": 0.7982, "step": 30720 }, { "epoch": 8.840621403912543, "grad_norm": 1.8507287502288818, "learning_rate": 0.0018231875719217493, "loss": 0.7933, "step": 30730 }, { "epoch": 8.843498273878021, "grad_norm": 1.6699249744415283, "learning_rate": 0.0018231300345224394, "loss": 1.0021, "step": 30740 }, { "epoch": 8.846375143843499, "grad_norm": 1.1126128435134888, "learning_rate": 0.00182307249712313, "loss": 0.7221, "step": 30750 }, { "epoch": 8.849252013808975, "grad_norm": 0.7251062989234924, "learning_rate": 0.0018230149597238206, "loss": 0.7795, "step": 30760 }, { "epoch": 8.852128883774453, "grad_norm": 1.582457184791565, "learning_rate": 0.001822957422324511, "loss": 1.0218, "step": 30770 }, { "epoch": 8.855005753739931, "grad_norm": 1.157901406288147, "learning_rate": 0.0018228998849252015, "loss": 0.6979, "step": 30780 }, { "epoch": 8.85788262370541, "grad_norm": 2.3324320316314697, "learning_rate": 0.001822842347525892, "loss": 0.6377, "step": 30790 }, { "epoch": 8.860759493670885, "grad_norm": 1.4635443687438965, "learning_rate": 0.0018227848101265822, "loss": 0.9012, "step": 30800 }, { "epoch": 8.863636363636363, "grad_norm": 1.2245676517486572, "learning_rate": 0.0018227272727272727, "loss": 0.8723, "step": 30810 }, { "epoch": 8.866513233601841, "grad_norm": 1.2703427076339722, "learning_rate": 0.0018226697353279633, "loss": 0.912, "step": 30820 }, { "epoch": 8.86939010356732, "grad_norm": 1.1173900365829468, "learning_rate": 0.0018226121979286536, "loss": 0.8702, "step": 30830 }, { "epoch": 8.872266973532795, "grad_norm": 1.4525032043457031, "learning_rate": 0.0018225546605293442, "loss": 0.8579, "step": 30840 }, { "epoch": 8.875143843498273, "grad_norm": 1.9920169115066528, "learning_rate": 0.0018224971231300346, "loss": 0.8793, "step": 30850 }, { "epoch": 8.878020713463751, "grad_norm": 2.309685707092285, "learning_rate": 0.001822439585730725, "loss": 0.8326, "step": 30860 }, { "epoch": 8.88089758342923, "grad_norm": 0.8488758206367493, "learning_rate": 0.0018223820483314155, "loss": 0.7863, "step": 30870 }, { "epoch": 8.883774453394707, "grad_norm": 1.8982677459716797, "learning_rate": 0.0018223245109321058, "loss": 0.7985, "step": 30880 }, { "epoch": 8.886651323360184, "grad_norm": 1.012325644493103, "learning_rate": 0.0018222669735327964, "loss": 0.9256, "step": 30890 }, { "epoch": 8.889528193325662, "grad_norm": 0.8343455195426941, "learning_rate": 0.001822209436133487, "loss": 0.8157, "step": 30900 }, { "epoch": 8.89240506329114, "grad_norm": 0.8979990482330322, "learning_rate": 0.0018221518987341773, "loss": 0.6306, "step": 30910 }, { "epoch": 8.895281933256618, "grad_norm": 2.3420298099517822, "learning_rate": 0.0018220943613348676, "loss": 0.8432, "step": 30920 }, { "epoch": 8.898158803222094, "grad_norm": 1.2414429187774658, "learning_rate": 0.0018220368239355582, "loss": 0.751, "step": 30930 }, { "epoch": 8.901035673187572, "grad_norm": 1.510711908340454, "learning_rate": 0.0018219792865362486, "loss": 0.8008, "step": 30940 }, { "epoch": 8.90391254315305, "grad_norm": 1.571189522743225, "learning_rate": 0.0018219217491369391, "loss": 0.7702, "step": 30950 }, { "epoch": 8.906789413118528, "grad_norm": 1.4195005893707275, "learning_rate": 0.0018218642117376295, "loss": 0.955, "step": 30960 }, { "epoch": 8.909666283084004, "grad_norm": 0.9883869290351868, "learning_rate": 0.00182180667433832, "loss": 0.7337, "step": 30970 }, { "epoch": 8.912543153049482, "grad_norm": 2.0962045192718506, "learning_rate": 0.0018217491369390104, "loss": 0.7928, "step": 30980 }, { "epoch": 8.91542002301496, "grad_norm": 2.2638180255889893, "learning_rate": 0.0018216915995397007, "loss": 0.8137, "step": 30990 }, { "epoch": 8.918296892980438, "grad_norm": 1.7332496643066406, "learning_rate": 0.0018216340621403913, "loss": 0.9055, "step": 31000 }, { "epoch": 8.921173762945914, "grad_norm": 1.946271538734436, "learning_rate": 0.0018215765247410818, "loss": 0.8765, "step": 31010 }, { "epoch": 8.924050632911392, "grad_norm": 2.0591800212860107, "learning_rate": 0.0018215189873417722, "loss": 0.8556, "step": 31020 }, { "epoch": 8.92692750287687, "grad_norm": 1.4451860189437866, "learning_rate": 0.0018214614499424628, "loss": 0.785, "step": 31030 }, { "epoch": 8.929804372842348, "grad_norm": 2.4090771675109863, "learning_rate": 0.001821403912543153, "loss": 0.7575, "step": 31040 }, { "epoch": 8.932681242807824, "grad_norm": 1.0072046518325806, "learning_rate": 0.0018213463751438435, "loss": 0.6694, "step": 31050 }, { "epoch": 8.935558112773302, "grad_norm": 1.196123480796814, "learning_rate": 0.001821288837744534, "loss": 0.7271, "step": 31060 }, { "epoch": 8.93843498273878, "grad_norm": 1.2666294574737549, "learning_rate": 0.0018212313003452244, "loss": 0.7741, "step": 31070 }, { "epoch": 8.941311852704258, "grad_norm": 2.406135082244873, "learning_rate": 0.001821173762945915, "loss": 0.9836, "step": 31080 }, { "epoch": 8.944188722669736, "grad_norm": 1.493730902671814, "learning_rate": 0.0018211162255466055, "loss": 0.7779, "step": 31090 }, { "epoch": 8.947065592635212, "grad_norm": 1.2768579721450806, "learning_rate": 0.0018210586881472956, "loss": 0.8344, "step": 31100 }, { "epoch": 8.94994246260069, "grad_norm": 0.6711753010749817, "learning_rate": 0.0018210011507479862, "loss": 0.8472, "step": 31110 }, { "epoch": 8.952819332566168, "grad_norm": 0.9889324307441711, "learning_rate": 0.0018209436133486767, "loss": 0.8734, "step": 31120 }, { "epoch": 8.955696202531646, "grad_norm": 0.9320860505104065, "learning_rate": 0.001820886075949367, "loss": 0.6347, "step": 31130 }, { "epoch": 8.958573072497122, "grad_norm": 2.621422052383423, "learning_rate": 0.0018208285385500577, "loss": 0.8967, "step": 31140 }, { "epoch": 8.9614499424626, "grad_norm": 1.6458734273910522, "learning_rate": 0.0018207710011507482, "loss": 0.7372, "step": 31150 }, { "epoch": 8.964326812428078, "grad_norm": 1.81907057762146, "learning_rate": 0.0018207134637514384, "loss": 0.9222, "step": 31160 }, { "epoch": 8.967203682393556, "grad_norm": 7.825479984283447, "learning_rate": 0.001820655926352129, "loss": 0.9945, "step": 31170 }, { "epoch": 8.970080552359033, "grad_norm": 3.5466301441192627, "learning_rate": 0.0018205983889528193, "loss": 0.8721, "step": 31180 }, { "epoch": 8.97295742232451, "grad_norm": 0.7081201672554016, "learning_rate": 0.0018205408515535098, "loss": 0.7803, "step": 31190 }, { "epoch": 8.975834292289989, "grad_norm": 0.8385429978370667, "learning_rate": 0.0018204833141542004, "loss": 0.7645, "step": 31200 }, { "epoch": 8.978711162255467, "grad_norm": 1.6030982732772827, "learning_rate": 0.0018204257767548905, "loss": 0.7557, "step": 31210 }, { "epoch": 8.981588032220944, "grad_norm": 1.1188530921936035, "learning_rate": 0.001820368239355581, "loss": 0.9151, "step": 31220 }, { "epoch": 8.98446490218642, "grad_norm": 1.4223185777664185, "learning_rate": 0.0018203107019562717, "loss": 0.8671, "step": 31230 }, { "epoch": 8.987341772151899, "grad_norm": 1.2132359743118286, "learning_rate": 0.001820253164556962, "loss": 0.7372, "step": 31240 }, { "epoch": 8.990218642117377, "grad_norm": 1.1130422353744507, "learning_rate": 0.0018201956271576526, "loss": 0.6503, "step": 31250 }, { "epoch": 8.993095512082855, "grad_norm": 1.301546335220337, "learning_rate": 0.0018201380897583431, "loss": 0.8186, "step": 31260 }, { "epoch": 8.99597238204833, "grad_norm": 1.0366355180740356, "learning_rate": 0.0018200805523590333, "loss": 0.7307, "step": 31270 }, { "epoch": 8.998849252013809, "grad_norm": 1.519921064376831, "learning_rate": 0.0018200230149597238, "loss": 0.9142, "step": 31280 }, { "epoch": 9.001726121979287, "grad_norm": 2.4087090492248535, "learning_rate": 0.0018199654775604144, "loss": 0.8331, "step": 31290 }, { "epoch": 9.004602991944765, "grad_norm": 0.7895507216453552, "learning_rate": 0.0018199079401611047, "loss": 0.7909, "step": 31300 }, { "epoch": 9.007479861910241, "grad_norm": 1.7239265441894531, "learning_rate": 0.0018198504027617953, "loss": 0.7097, "step": 31310 }, { "epoch": 9.010356731875719, "grad_norm": 1.862830638885498, "learning_rate": 0.0018197928653624856, "loss": 0.7663, "step": 31320 }, { "epoch": 9.013233601841197, "grad_norm": 0.7850019335746765, "learning_rate": 0.001819735327963176, "loss": 0.7055, "step": 31330 }, { "epoch": 9.016110471806675, "grad_norm": 1.4921433925628662, "learning_rate": 0.0018196777905638666, "loss": 0.7382, "step": 31340 }, { "epoch": 9.018987341772151, "grad_norm": 1.0928473472595215, "learning_rate": 0.001819620253164557, "loss": 0.7639, "step": 31350 }, { "epoch": 9.021864211737629, "grad_norm": 2.003458023071289, "learning_rate": 0.0018195627157652475, "loss": 0.6797, "step": 31360 }, { "epoch": 9.024741081703107, "grad_norm": 1.1628434658050537, "learning_rate": 0.001819505178365938, "loss": 0.6049, "step": 31370 }, { "epoch": 9.027617951668585, "grad_norm": 1.7439638376235962, "learning_rate": 0.0018194476409666284, "loss": 0.6612, "step": 31380 }, { "epoch": 9.030494821634061, "grad_norm": 1.1737334728240967, "learning_rate": 0.0018193901035673187, "loss": 0.8215, "step": 31390 }, { "epoch": 9.03337169159954, "grad_norm": 1.2289482355117798, "learning_rate": 0.0018193325661680093, "loss": 0.7115, "step": 31400 }, { "epoch": 9.036248561565017, "grad_norm": 0.9215061068534851, "learning_rate": 0.0018192750287686996, "loss": 0.7387, "step": 31410 }, { "epoch": 9.039125431530495, "grad_norm": 1.029343605041504, "learning_rate": 0.0018192174913693902, "loss": 0.7759, "step": 31420 }, { "epoch": 9.042002301495973, "grad_norm": 1.4599168300628662, "learning_rate": 0.0018191599539700805, "loss": 0.743, "step": 31430 }, { "epoch": 9.04487917146145, "grad_norm": 1.8606659173965454, "learning_rate": 0.0018191024165707711, "loss": 0.7546, "step": 31440 }, { "epoch": 9.047756041426927, "grad_norm": 1.1954801082611084, "learning_rate": 0.0018190448791714615, "loss": 0.7382, "step": 31450 }, { "epoch": 9.050632911392405, "grad_norm": 1.287053108215332, "learning_rate": 0.0018189873417721518, "loss": 0.979, "step": 31460 }, { "epoch": 9.053509781357883, "grad_norm": 1.2281442880630493, "learning_rate": 0.0018189298043728424, "loss": 0.6992, "step": 31470 }, { "epoch": 9.05638665132336, "grad_norm": 2.021197557449341, "learning_rate": 0.001818872266973533, "loss": 0.7774, "step": 31480 }, { "epoch": 9.059263521288837, "grad_norm": 1.1542036533355713, "learning_rate": 0.0018188147295742233, "loss": 0.8801, "step": 31490 }, { "epoch": 9.062140391254315, "grad_norm": 1.6845219135284424, "learning_rate": 0.0018187571921749138, "loss": 0.9096, "step": 31500 }, { "epoch": 9.065017261219793, "grad_norm": 2.3973066806793213, "learning_rate": 0.0018186996547756042, "loss": 0.926, "step": 31510 }, { "epoch": 9.06789413118527, "grad_norm": 1.0066016912460327, "learning_rate": 0.0018186421173762945, "loss": 0.7915, "step": 31520 }, { "epoch": 9.070771001150748, "grad_norm": 1.405548334121704, "learning_rate": 0.001818584579976985, "loss": 0.86, "step": 31530 }, { "epoch": 9.073647871116226, "grad_norm": 0.846468985080719, "learning_rate": 0.0018185270425776754, "loss": 0.7319, "step": 31540 }, { "epoch": 9.076524741081704, "grad_norm": 1.3619649410247803, "learning_rate": 0.001818469505178366, "loss": 0.7939, "step": 31550 }, { "epoch": 9.07940161104718, "grad_norm": 2.2306947708129883, "learning_rate": 0.0018184119677790566, "loss": 0.9021, "step": 31560 }, { "epoch": 9.082278481012658, "grad_norm": 0.9270578622817993, "learning_rate": 0.0018183544303797467, "loss": 0.7456, "step": 31570 }, { "epoch": 9.085155350978136, "grad_norm": 1.631914496421814, "learning_rate": 0.0018182968929804373, "loss": 0.839, "step": 31580 }, { "epoch": 9.088032220943614, "grad_norm": 1.8731189966201782, "learning_rate": 0.0018182393555811278, "loss": 0.982, "step": 31590 }, { "epoch": 9.090909090909092, "grad_norm": 0.801105797290802, "learning_rate": 0.0018181818181818182, "loss": 0.6758, "step": 31600 }, { "epoch": 9.093785960874568, "grad_norm": 1.7409230470657349, "learning_rate": 0.0018181242807825087, "loss": 0.7308, "step": 31610 }, { "epoch": 9.096662830840046, "grad_norm": 1.0462982654571533, "learning_rate": 0.0018180667433831993, "loss": 0.7151, "step": 31620 }, { "epoch": 9.099539700805524, "grad_norm": 1.4180653095245361, "learning_rate": 0.0018180092059838894, "loss": 0.6691, "step": 31630 }, { "epoch": 9.102416570771002, "grad_norm": 2.1369221210479736, "learning_rate": 0.00181795166858458, "loss": 0.8231, "step": 31640 }, { "epoch": 9.105293440736478, "grad_norm": 0.7851033806800842, "learning_rate": 0.0018178941311852703, "loss": 0.5673, "step": 31650 }, { "epoch": 9.108170310701956, "grad_norm": 2.0109994411468506, "learning_rate": 0.001817836593785961, "loss": 0.9239, "step": 31660 }, { "epoch": 9.111047180667434, "grad_norm": 1.3686034679412842, "learning_rate": 0.0018177790563866515, "loss": 0.7797, "step": 31670 }, { "epoch": 9.113924050632912, "grad_norm": 1.6321861743927002, "learning_rate": 0.0018177215189873418, "loss": 0.7866, "step": 31680 }, { "epoch": 9.116800920598388, "grad_norm": 1.8590717315673828, "learning_rate": 0.0018176639815880322, "loss": 0.8258, "step": 31690 }, { "epoch": 9.119677790563866, "grad_norm": 2.8653371334075928, "learning_rate": 0.0018176064441887227, "loss": 0.7817, "step": 31700 }, { "epoch": 9.122554660529344, "grad_norm": 1.857728362083435, "learning_rate": 0.001817548906789413, "loss": 0.7601, "step": 31710 }, { "epoch": 9.125431530494822, "grad_norm": 0.6412099003791809, "learning_rate": 0.0018174913693901036, "loss": 0.8685, "step": 31720 }, { "epoch": 9.128308400460298, "grad_norm": 1.3667612075805664, "learning_rate": 0.0018174338319907942, "loss": 0.9422, "step": 31730 }, { "epoch": 9.131185270425776, "grad_norm": 1.1326979398727417, "learning_rate": 0.0018173762945914846, "loss": 0.7823, "step": 31740 }, { "epoch": 9.134062140391254, "grad_norm": 1.712036371231079, "learning_rate": 0.001817318757192175, "loss": 0.7374, "step": 31750 }, { "epoch": 9.136939010356732, "grad_norm": 1.226344347000122, "learning_rate": 0.0018172612197928653, "loss": 0.6573, "step": 31760 }, { "epoch": 9.13981588032221, "grad_norm": 2.030418634414673, "learning_rate": 0.0018172036823935558, "loss": 0.858, "step": 31770 }, { "epoch": 9.142692750287686, "grad_norm": 1.8013249635696411, "learning_rate": 0.0018171461449942464, "loss": 0.7201, "step": 31780 }, { "epoch": 9.145569620253164, "grad_norm": 1.2688313722610474, "learning_rate": 0.0018170886075949367, "loss": 0.9331, "step": 31790 }, { "epoch": 9.148446490218642, "grad_norm": 1.3769539594650269, "learning_rate": 0.0018170310701956273, "loss": 0.8337, "step": 31800 }, { "epoch": 9.15132336018412, "grad_norm": 0.7820612788200378, "learning_rate": 0.0018169735327963176, "loss": 0.7372, "step": 31810 }, { "epoch": 9.154200230149597, "grad_norm": 0.9015775322914124, "learning_rate": 0.001816915995397008, "loss": 0.9217, "step": 31820 }, { "epoch": 9.157077100115075, "grad_norm": 1.6719250679016113, "learning_rate": 0.0018168584579976985, "loss": 0.7332, "step": 31830 }, { "epoch": 9.159953970080553, "grad_norm": 0.9655701518058777, "learning_rate": 0.0018168009205983891, "loss": 0.8478, "step": 31840 }, { "epoch": 9.16283084004603, "grad_norm": 2.170614242553711, "learning_rate": 0.0018167433831990795, "loss": 0.8541, "step": 31850 }, { "epoch": 9.165707710011507, "grad_norm": 1.408862829208374, "learning_rate": 0.00181668584579977, "loss": 0.8884, "step": 31860 }, { "epoch": 9.168584579976985, "grad_norm": 1.7106808423995972, "learning_rate": 0.0018166283084004602, "loss": 0.8678, "step": 31870 }, { "epoch": 9.171461449942463, "grad_norm": 1.2328897714614868, "learning_rate": 0.0018165707710011507, "loss": 0.6486, "step": 31880 }, { "epoch": 9.17433831990794, "grad_norm": 1.814903736114502, "learning_rate": 0.0018165132336018413, "loss": 0.8548, "step": 31890 }, { "epoch": 9.177215189873417, "grad_norm": 1.0644702911376953, "learning_rate": 0.0018164556962025316, "loss": 0.8906, "step": 31900 }, { "epoch": 9.180092059838895, "grad_norm": 2.5098321437835693, "learning_rate": 0.0018163981588032222, "loss": 0.8247, "step": 31910 }, { "epoch": 9.182968929804373, "grad_norm": 1.513986349105835, "learning_rate": 0.0018163406214039128, "loss": 0.9725, "step": 31920 }, { "epoch": 9.18584579976985, "grad_norm": 0.999826192855835, "learning_rate": 0.0018162830840046029, "loss": 0.7402, "step": 31930 }, { "epoch": 9.188722669735329, "grad_norm": 1.7163335084915161, "learning_rate": 0.0018162255466052935, "loss": 0.9585, "step": 31940 }, { "epoch": 9.191599539700805, "grad_norm": 1.0014989376068115, "learning_rate": 0.001816168009205984, "loss": 0.7359, "step": 31950 }, { "epoch": 9.194476409666283, "grad_norm": 1.2546885013580322, "learning_rate": 0.0018161104718066744, "loss": 0.7107, "step": 31960 }, { "epoch": 9.197353279631761, "grad_norm": 1.8610668182373047, "learning_rate": 0.001816052934407365, "loss": 1.0013, "step": 31970 }, { "epoch": 9.200230149597239, "grad_norm": 1.486322045326233, "learning_rate": 0.0018159953970080555, "loss": 0.8684, "step": 31980 }, { "epoch": 9.203107019562715, "grad_norm": 1.3212895393371582, "learning_rate": 0.0018159378596087456, "loss": 0.9131, "step": 31990 }, { "epoch": 9.205983889528193, "grad_norm": 2.598393440246582, "learning_rate": 0.0018158803222094362, "loss": 0.7046, "step": 32000 }, { "epoch": 9.208860759493671, "grad_norm": 1.6513402462005615, "learning_rate": 0.0018158227848101265, "loss": 0.8648, "step": 32010 }, { "epoch": 9.211737629459149, "grad_norm": 0.8390821814537048, "learning_rate": 0.001815765247410817, "loss": 0.7551, "step": 32020 }, { "epoch": 9.214614499424625, "grad_norm": 0.9125989675521851, "learning_rate": 0.0018157077100115077, "loss": 0.9445, "step": 32030 }, { "epoch": 9.217491369390103, "grad_norm": 1.135109543800354, "learning_rate": 0.0018156501726121978, "loss": 0.8334, "step": 32040 }, { "epoch": 9.220368239355581, "grad_norm": 1.0287225246429443, "learning_rate": 0.0018155926352128884, "loss": 0.6787, "step": 32050 }, { "epoch": 9.22324510932106, "grad_norm": 1.8023669719696045, "learning_rate": 0.001815535097813579, "loss": 0.6359, "step": 32060 }, { "epoch": 9.226121979286535, "grad_norm": 1.0154075622558594, "learning_rate": 0.0018154775604142693, "loss": 0.8305, "step": 32070 }, { "epoch": 9.228998849252013, "grad_norm": 2.1365535259246826, "learning_rate": 0.0018154200230149598, "loss": 0.7378, "step": 32080 }, { "epoch": 9.231875719217491, "grad_norm": 0.9042030572891235, "learning_rate": 0.0018153624856156504, "loss": 0.6528, "step": 32090 }, { "epoch": 9.23475258918297, "grad_norm": 1.3401356935501099, "learning_rate": 0.0018153049482163405, "loss": 0.6537, "step": 32100 }, { "epoch": 9.237629459148447, "grad_norm": 1.6202586889266968, "learning_rate": 0.001815247410817031, "loss": 0.8551, "step": 32110 }, { "epoch": 9.240506329113924, "grad_norm": 2.9768917560577393, "learning_rate": 0.0018151898734177214, "loss": 0.6435, "step": 32120 }, { "epoch": 9.243383199079402, "grad_norm": 1.7707043886184692, "learning_rate": 0.001815132336018412, "loss": 1.0057, "step": 32130 }, { "epoch": 9.24626006904488, "grad_norm": 1.1046398878097534, "learning_rate": 0.0018150747986191026, "loss": 0.762, "step": 32140 }, { "epoch": 9.249136939010357, "grad_norm": 1.2095156908035278, "learning_rate": 0.001815017261219793, "loss": 0.7962, "step": 32150 }, { "epoch": 9.252013808975834, "grad_norm": 0.9108474254608154, "learning_rate": 0.0018149597238204833, "loss": 0.6677, "step": 32160 }, { "epoch": 9.254890678941312, "grad_norm": 1.4902607202529907, "learning_rate": 0.0018149021864211738, "loss": 0.7805, "step": 32170 }, { "epoch": 9.25776754890679, "grad_norm": 1.8586112260818481, "learning_rate": 0.0018148446490218642, "loss": 0.7089, "step": 32180 }, { "epoch": 9.260644418872268, "grad_norm": 0.8149921894073486, "learning_rate": 0.0018147871116225547, "loss": 0.6192, "step": 32190 }, { "epoch": 9.263521288837744, "grad_norm": 1.3310458660125732, "learning_rate": 0.0018147295742232453, "loss": 0.8137, "step": 32200 }, { "epoch": 9.266398158803222, "grad_norm": 1.3051294088363647, "learning_rate": 0.0018146720368239356, "loss": 0.8194, "step": 32210 }, { "epoch": 9.2692750287687, "grad_norm": 1.7165822982788086, "learning_rate": 0.001814614499424626, "loss": 0.8321, "step": 32220 }, { "epoch": 9.272151898734178, "grad_norm": 1.2591263055801392, "learning_rate": 0.0018145569620253163, "loss": 0.7728, "step": 32230 }, { "epoch": 9.275028768699654, "grad_norm": 1.511033058166504, "learning_rate": 0.001814499424626007, "loss": 0.8787, "step": 32240 }, { "epoch": 9.277905638665132, "grad_norm": 1.0500328540802002, "learning_rate": 0.0018144418872266975, "loss": 0.7855, "step": 32250 }, { "epoch": 9.28078250863061, "grad_norm": 1.3474735021591187, "learning_rate": 0.0018143843498273878, "loss": 0.7797, "step": 32260 }, { "epoch": 9.283659378596088, "grad_norm": 1.1573461294174194, "learning_rate": 0.0018143268124280784, "loss": 0.7332, "step": 32270 }, { "epoch": 9.286536248561564, "grad_norm": 1.6204899549484253, "learning_rate": 0.0018142692750287687, "loss": 0.7226, "step": 32280 }, { "epoch": 9.289413118527042, "grad_norm": 0.9998387694358826, "learning_rate": 0.001814211737629459, "loss": 0.7296, "step": 32290 }, { "epoch": 9.29228998849252, "grad_norm": 1.9799506664276123, "learning_rate": 0.0018141542002301496, "loss": 0.7176, "step": 32300 }, { "epoch": 9.295166858457998, "grad_norm": 1.3271043300628662, "learning_rate": 0.0018140966628308402, "loss": 0.8351, "step": 32310 }, { "epoch": 9.298043728423476, "grad_norm": 0.9939965009689331, "learning_rate": 0.0018140391254315305, "loss": 0.8188, "step": 32320 }, { "epoch": 9.300920598388952, "grad_norm": 1.0720683336257935, "learning_rate": 0.001813981588032221, "loss": 0.8867, "step": 32330 }, { "epoch": 9.30379746835443, "grad_norm": 0.8934497237205505, "learning_rate": 0.0018139240506329112, "loss": 0.8817, "step": 32340 }, { "epoch": 9.306674338319908, "grad_norm": 1.5957225561141968, "learning_rate": 0.0018138665132336018, "loss": 0.8348, "step": 32350 }, { "epoch": 9.309551208285386, "grad_norm": 1.3928121328353882, "learning_rate": 0.0018138089758342924, "loss": 0.999, "step": 32360 }, { "epoch": 9.312428078250862, "grad_norm": 0.9765191078186035, "learning_rate": 0.0018137514384349827, "loss": 0.836, "step": 32370 }, { "epoch": 9.31530494821634, "grad_norm": 1.554118275642395, "learning_rate": 0.0018136939010356733, "loss": 0.8365, "step": 32380 }, { "epoch": 9.318181818181818, "grad_norm": 1.5733745098114014, "learning_rate": 0.0018136363636363638, "loss": 0.7775, "step": 32390 }, { "epoch": 9.321058688147296, "grad_norm": 1.4696468114852905, "learning_rate": 0.001813578826237054, "loss": 0.7161, "step": 32400 }, { "epoch": 9.323935558112773, "grad_norm": 2.3077855110168457, "learning_rate": 0.0018135212888377445, "loss": 0.8362, "step": 32410 }, { "epoch": 9.32681242807825, "grad_norm": 1.7175672054290771, "learning_rate": 0.001813463751438435, "loss": 0.9996, "step": 32420 }, { "epoch": 9.329689298043728, "grad_norm": 0.9579077959060669, "learning_rate": 0.0018134062140391254, "loss": 0.8443, "step": 32430 }, { "epoch": 9.332566168009206, "grad_norm": 0.9516798257827759, "learning_rate": 0.001813348676639816, "loss": 0.8182, "step": 32440 }, { "epoch": 9.335443037974684, "grad_norm": 1.3617770671844482, "learning_rate": 0.0018132911392405064, "loss": 0.9459, "step": 32450 }, { "epoch": 9.33831990794016, "grad_norm": 1.1934443712234497, "learning_rate": 0.0018132336018411967, "loss": 0.745, "step": 32460 }, { "epoch": 9.341196777905639, "grad_norm": 1.0805864334106445, "learning_rate": 0.0018131760644418873, "loss": 0.6502, "step": 32470 }, { "epoch": 9.344073647871117, "grad_norm": 2.320502281188965, "learning_rate": 0.0018131185270425776, "loss": 1.0753, "step": 32480 }, { "epoch": 9.346950517836595, "grad_norm": 0.9919232726097107, "learning_rate": 0.0018130609896432682, "loss": 0.6867, "step": 32490 }, { "epoch": 9.34982738780207, "grad_norm": 1.4194365739822388, "learning_rate": 0.0018130034522439587, "loss": 0.8095, "step": 32500 }, { "epoch": 9.352704257767549, "grad_norm": 1.4102240800857544, "learning_rate": 0.001812945914844649, "loss": 0.6824, "step": 32510 }, { "epoch": 9.355581127733027, "grad_norm": 1.447023630142212, "learning_rate": 0.0018128883774453394, "loss": 0.8303, "step": 32520 }, { "epoch": 9.358457997698505, "grad_norm": 1.8917678594589233, "learning_rate": 0.00181283084004603, "loss": 0.8677, "step": 32530 }, { "epoch": 9.361334867663981, "grad_norm": 1.0370620489120483, "learning_rate": 0.0018127733026467203, "loss": 0.5742, "step": 32540 }, { "epoch": 9.364211737629459, "grad_norm": 1.8014072179794312, "learning_rate": 0.001812715765247411, "loss": 0.688, "step": 32550 }, { "epoch": 9.367088607594937, "grad_norm": 1.9045844078063965, "learning_rate": 0.0018126582278481013, "loss": 0.8466, "step": 32560 }, { "epoch": 9.369965477560415, "grad_norm": 1.0467191934585571, "learning_rate": 0.0018126006904487918, "loss": 0.7995, "step": 32570 }, { "epoch": 9.372842347525891, "grad_norm": 1.084712028503418, "learning_rate": 0.0018125431530494822, "loss": 0.7497, "step": 32580 }, { "epoch": 9.375719217491369, "grad_norm": 1.3085861206054688, "learning_rate": 0.0018124856156501725, "loss": 0.8344, "step": 32590 }, { "epoch": 9.378596087456847, "grad_norm": 2.247539520263672, "learning_rate": 0.001812428078250863, "loss": 0.7837, "step": 32600 }, { "epoch": 9.381472957422325, "grad_norm": 1.1599515676498413, "learning_rate": 0.0018123705408515536, "loss": 0.742, "step": 32610 }, { "epoch": 9.384349827387801, "grad_norm": 1.7296576499938965, "learning_rate": 0.001812313003452244, "loss": 0.7457, "step": 32620 }, { "epoch": 9.38722669735328, "grad_norm": 1.7227026224136353, "learning_rate": 0.0018122554660529346, "loss": 0.9629, "step": 32630 }, { "epoch": 9.390103567318757, "grad_norm": 1.8524564504623413, "learning_rate": 0.001812197928653625, "loss": 0.7414, "step": 32640 }, { "epoch": 9.392980437284235, "grad_norm": 1.3144911527633667, "learning_rate": 0.0018121403912543152, "loss": 0.7523, "step": 32650 }, { "epoch": 9.395857307249713, "grad_norm": 1.089426040649414, "learning_rate": 0.0018120828538550058, "loss": 0.7333, "step": 32660 }, { "epoch": 9.39873417721519, "grad_norm": 1.758920431137085, "learning_rate": 0.0018120253164556964, "loss": 1.0237, "step": 32670 }, { "epoch": 9.401611047180667, "grad_norm": 1.3169419765472412, "learning_rate": 0.0018119677790563867, "loss": 0.8265, "step": 32680 }, { "epoch": 9.404487917146145, "grad_norm": 1.6327203512191772, "learning_rate": 0.0018119102416570773, "loss": 0.8509, "step": 32690 }, { "epoch": 9.407364787111623, "grad_norm": 1.7705715894699097, "learning_rate": 0.0018118527042577674, "loss": 0.8069, "step": 32700 }, { "epoch": 9.4102416570771, "grad_norm": 2.3043954372406006, "learning_rate": 0.001811795166858458, "loss": 0.9475, "step": 32710 }, { "epoch": 9.413118527042577, "grad_norm": 1.042267918586731, "learning_rate": 0.0018117376294591485, "loss": 0.7826, "step": 32720 }, { "epoch": 9.415995397008055, "grad_norm": 0.935298502445221, "learning_rate": 0.001811680092059839, "loss": 0.8672, "step": 32730 }, { "epoch": 9.418872266973533, "grad_norm": 1.475396990776062, "learning_rate": 0.0018116225546605295, "loss": 0.9, "step": 32740 }, { "epoch": 9.42174913693901, "grad_norm": 1.2318803071975708, "learning_rate": 0.00181156501726122, "loss": 0.9374, "step": 32750 }, { "epoch": 9.424626006904488, "grad_norm": 1.7154326438903809, "learning_rate": 0.0018115074798619102, "loss": 0.7568, "step": 32760 }, { "epoch": 9.427502876869966, "grad_norm": 2.7373898029327393, "learning_rate": 0.0018114499424626007, "loss": 0.7061, "step": 32770 }, { "epoch": 9.430379746835444, "grad_norm": 1.2553892135620117, "learning_rate": 0.0018113924050632913, "loss": 0.8956, "step": 32780 }, { "epoch": 9.43325661680092, "grad_norm": 1.1517919301986694, "learning_rate": 0.0018113348676639816, "loss": 0.842, "step": 32790 }, { "epoch": 9.436133486766398, "grad_norm": 1.3324604034423828, "learning_rate": 0.0018112773302646722, "loss": 0.7565, "step": 32800 }, { "epoch": 9.439010356731876, "grad_norm": 1.0992389917373657, "learning_rate": 0.0018112197928653623, "loss": 0.773, "step": 32810 }, { "epoch": 9.441887226697354, "grad_norm": 1.615435004234314, "learning_rate": 0.0018111622554660529, "loss": 0.6372, "step": 32820 }, { "epoch": 9.444764096662832, "grad_norm": 1.595409631729126, "learning_rate": 0.0018111047180667434, "loss": 0.9237, "step": 32830 }, { "epoch": 9.447640966628308, "grad_norm": 1.3820974826812744, "learning_rate": 0.0018110471806674338, "loss": 1.0209, "step": 32840 }, { "epoch": 9.450517836593786, "grad_norm": 2.032493829727173, "learning_rate": 0.0018109896432681244, "loss": 0.818, "step": 32850 }, { "epoch": 9.453394706559264, "grad_norm": 1.0135732889175415, "learning_rate": 0.001810932105868815, "loss": 0.7557, "step": 32860 }, { "epoch": 9.456271576524742, "grad_norm": 0.8781247735023499, "learning_rate": 0.001810874568469505, "loss": 0.7506, "step": 32870 }, { "epoch": 9.459148446490218, "grad_norm": 1.4307345151901245, "learning_rate": 0.0018108170310701956, "loss": 0.7089, "step": 32880 }, { "epoch": 9.462025316455696, "grad_norm": 1.4665231704711914, "learning_rate": 0.0018107594936708862, "loss": 0.9575, "step": 32890 }, { "epoch": 9.464902186421174, "grad_norm": 2.0760552883148193, "learning_rate": 0.0018107019562715765, "loss": 0.9595, "step": 32900 }, { "epoch": 9.467779056386652, "grad_norm": 0.8579868674278259, "learning_rate": 0.001810644418872267, "loss": 0.8155, "step": 32910 }, { "epoch": 9.470655926352128, "grad_norm": 1.0764528512954712, "learning_rate": 0.0018105868814729574, "loss": 0.8251, "step": 32920 }, { "epoch": 9.473532796317606, "grad_norm": 1.2130632400512695, "learning_rate": 0.0018105293440736478, "loss": 0.6863, "step": 32930 }, { "epoch": 9.476409666283084, "grad_norm": 1.4572019577026367, "learning_rate": 0.0018104718066743384, "loss": 0.8574, "step": 32940 }, { "epoch": 9.479286536248562, "grad_norm": 1.411190390586853, "learning_rate": 0.0018104142692750287, "loss": 0.6989, "step": 32950 }, { "epoch": 9.482163406214038, "grad_norm": 1.2570444345474243, "learning_rate": 0.0018103567318757193, "loss": 0.6865, "step": 32960 }, { "epoch": 9.485040276179516, "grad_norm": 1.588479995727539, "learning_rate": 0.0018102991944764098, "loss": 0.8368, "step": 32970 }, { "epoch": 9.487917146144994, "grad_norm": 1.1646257638931274, "learning_rate": 0.0018102416570771002, "loss": 0.7242, "step": 32980 }, { "epoch": 9.490794016110472, "grad_norm": 1.3767772912979126, "learning_rate": 0.0018101841196777905, "loss": 0.7178, "step": 32990 }, { "epoch": 9.49367088607595, "grad_norm": 1.811123013496399, "learning_rate": 0.001810126582278481, "loss": 0.8776, "step": 33000 }, { "epoch": 9.496547756041426, "grad_norm": 1.2473517656326294, "learning_rate": 0.0018100690448791714, "loss": 0.7178, "step": 33010 }, { "epoch": 9.499424626006904, "grad_norm": 1.4826092720031738, "learning_rate": 0.001810011507479862, "loss": 0.8537, "step": 33020 }, { "epoch": 9.502301495972382, "grad_norm": 1.7874422073364258, "learning_rate": 0.0018099539700805523, "loss": 0.831, "step": 33030 }, { "epoch": 9.50517836593786, "grad_norm": 1.4714328050613403, "learning_rate": 0.001809896432681243, "loss": 0.7195, "step": 33040 }, { "epoch": 9.508055235903337, "grad_norm": 1.6174490451812744, "learning_rate": 0.0018098388952819333, "loss": 0.9691, "step": 33050 }, { "epoch": 9.510932105868815, "grad_norm": 1.640203595161438, "learning_rate": 0.0018097813578826236, "loss": 0.6962, "step": 33060 }, { "epoch": 9.513808975834293, "grad_norm": 0.7717615365982056, "learning_rate": 0.0018097238204833142, "loss": 0.6856, "step": 33070 }, { "epoch": 9.51668584579977, "grad_norm": 1.7040470838546753, "learning_rate": 0.0018096662830840047, "loss": 0.8316, "step": 33080 }, { "epoch": 9.519562715765247, "grad_norm": 1.7998746633529663, "learning_rate": 0.001809608745684695, "loss": 0.8987, "step": 33090 }, { "epoch": 9.522439585730725, "grad_norm": 1.277180790901184, "learning_rate": 0.0018095512082853856, "loss": 0.838, "step": 33100 }, { "epoch": 9.525316455696203, "grad_norm": 1.3815042972564697, "learning_rate": 0.001809493670886076, "loss": 0.7587, "step": 33110 }, { "epoch": 9.52819332566168, "grad_norm": 1.4650033712387085, "learning_rate": 0.0018094361334867663, "loss": 0.8138, "step": 33120 }, { "epoch": 9.531070195627157, "grad_norm": 0.8375522494316101, "learning_rate": 0.001809378596087457, "loss": 0.6157, "step": 33130 }, { "epoch": 9.533947065592635, "grad_norm": 1.148983359336853, "learning_rate": 0.0018093210586881472, "loss": 0.795, "step": 33140 }, { "epoch": 9.536823935558113, "grad_norm": 2.3447139263153076, "learning_rate": 0.0018092635212888378, "loss": 0.8741, "step": 33150 }, { "epoch": 9.53970080552359, "grad_norm": 2.205822229385376, "learning_rate": 0.0018092059838895284, "loss": 0.9591, "step": 33160 }, { "epoch": 9.542577675489067, "grad_norm": 1.810452938079834, "learning_rate": 0.0018091484464902185, "loss": 0.7086, "step": 33170 }, { "epoch": 9.545454545454545, "grad_norm": 1.2556084394454956, "learning_rate": 0.001809090909090909, "loss": 0.7244, "step": 33180 }, { "epoch": 9.548331415420023, "grad_norm": 1.08841872215271, "learning_rate": 0.0018090333716915996, "loss": 0.802, "step": 33190 }, { "epoch": 9.551208285385501, "grad_norm": 1.4445946216583252, "learning_rate": 0.00180897583429229, "loss": 0.7227, "step": 33200 }, { "epoch": 9.554085155350979, "grad_norm": 1.1581803560256958, "learning_rate": 0.0018089182968929805, "loss": 0.9672, "step": 33210 }, { "epoch": 9.556962025316455, "grad_norm": 1.9899396896362305, "learning_rate": 0.001808860759493671, "loss": 0.9659, "step": 33220 }, { "epoch": 9.559838895281933, "grad_norm": 1.6939752101898193, "learning_rate": 0.0018088032220943612, "loss": 0.6833, "step": 33230 }, { "epoch": 9.562715765247411, "grad_norm": 1.3893895149230957, "learning_rate": 0.0018087456846950518, "loss": 0.9781, "step": 33240 }, { "epoch": 9.565592635212889, "grad_norm": 1.5144100189208984, "learning_rate": 0.0018086881472957424, "loss": 0.8096, "step": 33250 }, { "epoch": 9.568469505178365, "grad_norm": 2.4636929035186768, "learning_rate": 0.0018086306098964327, "loss": 0.8404, "step": 33260 }, { "epoch": 9.571346375143843, "grad_norm": 2.8481736183166504, "learning_rate": 0.0018085730724971233, "loss": 0.8782, "step": 33270 }, { "epoch": 9.574223245109321, "grad_norm": 1.5526742935180664, "learning_rate": 0.0018085155350978136, "loss": 0.7463, "step": 33280 }, { "epoch": 9.5771001150748, "grad_norm": 0.9905989766120911, "learning_rate": 0.001808457997698504, "loss": 0.7825, "step": 33290 }, { "epoch": 9.579976985040275, "grad_norm": 1.8667960166931152, "learning_rate": 0.0018084004602991945, "loss": 0.7294, "step": 33300 }, { "epoch": 9.582853855005753, "grad_norm": 0.882879376411438, "learning_rate": 0.0018083429228998849, "loss": 0.6534, "step": 33310 }, { "epoch": 9.585730724971231, "grad_norm": 1.927337408065796, "learning_rate": 0.0018082853855005754, "loss": 0.9186, "step": 33320 }, { "epoch": 9.58860759493671, "grad_norm": 4.231822490692139, "learning_rate": 0.001808227848101266, "loss": 0.9036, "step": 33330 }, { "epoch": 9.591484464902187, "grad_norm": 1.1002614498138428, "learning_rate": 0.0018081703107019564, "loss": 0.9287, "step": 33340 }, { "epoch": 9.594361334867664, "grad_norm": 1.223043441772461, "learning_rate": 0.0018081127733026467, "loss": 0.818, "step": 33350 }, { "epoch": 9.597238204833141, "grad_norm": 0.9876987934112549, "learning_rate": 0.0018080552359033373, "loss": 0.8088, "step": 33360 }, { "epoch": 9.60011507479862, "grad_norm": 1.3454798460006714, "learning_rate": 0.0018079976985040276, "loss": 0.675, "step": 33370 }, { "epoch": 9.602991944764097, "grad_norm": 1.0559706687927246, "learning_rate": 0.0018079401611047182, "loss": 0.8332, "step": 33380 }, { "epoch": 9.605868814729574, "grad_norm": 1.1935620307922363, "learning_rate": 0.0018078826237054085, "loss": 0.8168, "step": 33390 }, { "epoch": 9.608745684695052, "grad_norm": 2.0812830924987793, "learning_rate": 0.001807825086306099, "loss": 0.8377, "step": 33400 }, { "epoch": 9.61162255466053, "grad_norm": 1.2787150144577026, "learning_rate": 0.0018077675489067894, "loss": 0.7546, "step": 33410 }, { "epoch": 9.614499424626008, "grad_norm": 1.3882465362548828, "learning_rate": 0.0018077100115074798, "loss": 0.8612, "step": 33420 }, { "epoch": 9.617376294591484, "grad_norm": 1.5577356815338135, "learning_rate": 0.0018076524741081703, "loss": 0.9016, "step": 33430 }, { "epoch": 9.620253164556962, "grad_norm": 1.522091269493103, "learning_rate": 0.001807594936708861, "loss": 0.6543, "step": 33440 }, { "epoch": 9.62313003452244, "grad_norm": 1.414304494857788, "learning_rate": 0.0018075373993095513, "loss": 0.6873, "step": 33450 }, { "epoch": 9.626006904487918, "grad_norm": 1.1609710454940796, "learning_rate": 0.0018074798619102418, "loss": 0.8395, "step": 33460 }, { "epoch": 9.628883774453394, "grad_norm": 1.2945685386657715, "learning_rate": 0.0018074223245109322, "loss": 0.7677, "step": 33470 }, { "epoch": 9.631760644418872, "grad_norm": 0.7544395923614502, "learning_rate": 0.0018073647871116225, "loss": 1.0525, "step": 33480 }, { "epoch": 9.63463751438435, "grad_norm": 1.2440065145492554, "learning_rate": 0.001807307249712313, "loss": 0.8415, "step": 33490 }, { "epoch": 9.637514384349828, "grad_norm": 1.3916605710983276, "learning_rate": 0.0018072497123130034, "loss": 0.9392, "step": 33500 }, { "epoch": 9.640391254315304, "grad_norm": 0.998146116733551, "learning_rate": 0.001807192174913694, "loss": 0.8538, "step": 33510 }, { "epoch": 9.643268124280782, "grad_norm": 1.1866930723190308, "learning_rate": 0.0018071346375143846, "loss": 0.8477, "step": 33520 }, { "epoch": 9.64614499424626, "grad_norm": 1.057919979095459, "learning_rate": 0.0018070771001150747, "loss": 1.0893, "step": 33530 }, { "epoch": 9.649021864211738, "grad_norm": 1.6971626281738281, "learning_rate": 0.0018070195627157652, "loss": 0.6628, "step": 33540 }, { "epoch": 9.651898734177216, "grad_norm": 0.5459734201431274, "learning_rate": 0.0018069620253164558, "loss": 0.7454, "step": 33550 }, { "epoch": 9.654775604142692, "grad_norm": 1.0075058937072754, "learning_rate": 0.0018069044879171462, "loss": 0.8829, "step": 33560 }, { "epoch": 9.65765247410817, "grad_norm": 1.568194031715393, "learning_rate": 0.0018068469505178367, "loss": 1.0619, "step": 33570 }, { "epoch": 9.660529344073648, "grad_norm": 1.6022130250930786, "learning_rate": 0.0018067894131185273, "loss": 0.7684, "step": 33580 }, { "epoch": 9.663406214039126, "grad_norm": 1.037498950958252, "learning_rate": 0.0018067318757192174, "loss": 0.7383, "step": 33590 }, { "epoch": 9.666283084004602, "grad_norm": 1.1497538089752197, "learning_rate": 0.001806674338319908, "loss": 0.8212, "step": 33600 }, { "epoch": 9.66915995397008, "grad_norm": 1.1660922765731812, "learning_rate": 0.0018066168009205983, "loss": 0.8193, "step": 33610 }, { "epoch": 9.672036823935558, "grad_norm": 0.9704128503799438, "learning_rate": 0.0018065592635212889, "loss": 0.7016, "step": 33620 }, { "epoch": 9.674913693901036, "grad_norm": 2.013524055480957, "learning_rate": 0.0018065017261219795, "loss": 0.832, "step": 33630 }, { "epoch": 9.677790563866512, "grad_norm": 0.990414023399353, "learning_rate": 0.0018064441887226696, "loss": 0.8665, "step": 33640 }, { "epoch": 9.68066743383199, "grad_norm": 3.6206700801849365, "learning_rate": 0.0018063866513233601, "loss": 1.1217, "step": 33650 }, { "epoch": 9.683544303797468, "grad_norm": 2.5367319583892822, "learning_rate": 0.0018063291139240507, "loss": 0.7556, "step": 33660 }, { "epoch": 9.686421173762946, "grad_norm": 1.7301732301712036, "learning_rate": 0.001806271576524741, "loss": 0.7857, "step": 33670 }, { "epoch": 9.689298043728424, "grad_norm": 1.2285130023956299, "learning_rate": 0.0018062140391254316, "loss": 0.7958, "step": 33680 }, { "epoch": 9.6921749136939, "grad_norm": 0.9174132943153381, "learning_rate": 0.0018061565017261222, "loss": 0.8049, "step": 33690 }, { "epoch": 9.695051783659379, "grad_norm": 1.9068472385406494, "learning_rate": 0.0018060989643268123, "loss": 0.7085, "step": 33700 }, { "epoch": 9.697928653624857, "grad_norm": 2.009364128112793, "learning_rate": 0.0018060414269275029, "loss": 0.8529, "step": 33710 }, { "epoch": 9.700805523590335, "grad_norm": 2.984125852584839, "learning_rate": 0.0018059838895281932, "loss": 0.8256, "step": 33720 }, { "epoch": 9.70368239355581, "grad_norm": 1.1377804279327393, "learning_rate": 0.0018059263521288838, "loss": 0.9458, "step": 33730 }, { "epoch": 9.706559263521289, "grad_norm": 2.1464455127716064, "learning_rate": 0.0018058688147295744, "loss": 0.8828, "step": 33740 }, { "epoch": 9.709436133486767, "grad_norm": 1.630074143409729, "learning_rate": 0.0018058112773302647, "loss": 0.8395, "step": 33750 }, { "epoch": 9.712313003452245, "grad_norm": 1.8357349634170532, "learning_rate": 0.001805753739930955, "loss": 0.9294, "step": 33760 }, { "epoch": 9.715189873417721, "grad_norm": 1.6616662740707397, "learning_rate": 0.0018056962025316456, "loss": 0.8293, "step": 33770 }, { "epoch": 9.718066743383199, "grad_norm": 0.9363522529602051, "learning_rate": 0.001805638665132336, "loss": 0.7222, "step": 33780 }, { "epoch": 9.720943613348677, "grad_norm": 3.147590398788452, "learning_rate": 0.0018055811277330265, "loss": 0.743, "step": 33790 }, { "epoch": 9.723820483314155, "grad_norm": 1.053594708442688, "learning_rate": 0.001805523590333717, "loss": 0.711, "step": 33800 }, { "epoch": 9.726697353279631, "grad_norm": 2.4941680431365967, "learning_rate": 0.0018054660529344074, "loss": 0.8395, "step": 33810 }, { "epoch": 9.729574223245109, "grad_norm": 1.7872027158737183, "learning_rate": 0.0018054085155350978, "loss": 0.8915, "step": 33820 }, { "epoch": 9.732451093210587, "grad_norm": 1.2482669353485107, "learning_rate": 0.0018053509781357881, "loss": 0.827, "step": 33830 }, { "epoch": 9.735327963176065, "grad_norm": 1.1297359466552734, "learning_rate": 0.0018052934407364787, "loss": 0.8054, "step": 33840 }, { "epoch": 9.738204833141541, "grad_norm": 1.5385364294052124, "learning_rate": 0.0018052359033371693, "loss": 0.8396, "step": 33850 }, { "epoch": 9.74108170310702, "grad_norm": 2.159113883972168, "learning_rate": 0.0018051783659378596, "loss": 0.7951, "step": 33860 }, { "epoch": 9.743958573072497, "grad_norm": 0.9342076778411865, "learning_rate": 0.0018051208285385502, "loss": 0.7153, "step": 33870 }, { "epoch": 9.746835443037975, "grad_norm": 1.9587721824645996, "learning_rate": 0.0018050632911392405, "loss": 0.6655, "step": 33880 }, { "epoch": 9.749712313003453, "grad_norm": 1.2587355375289917, "learning_rate": 0.0018050057537399309, "loss": 0.9585, "step": 33890 }, { "epoch": 9.75258918296893, "grad_norm": 3.2436792850494385, "learning_rate": 0.0018049482163406214, "loss": 0.8182, "step": 33900 }, { "epoch": 9.755466052934407, "grad_norm": 1.7755786180496216, "learning_rate": 0.001804890678941312, "loss": 0.9353, "step": 33910 }, { "epoch": 9.758342922899885, "grad_norm": 1.0785576105117798, "learning_rate": 0.0018048331415420023, "loss": 1.0473, "step": 33920 }, { "epoch": 9.761219792865363, "grad_norm": 0.8199670910835266, "learning_rate": 0.001804775604142693, "loss": 0.621, "step": 33930 }, { "epoch": 9.76409666283084, "grad_norm": 1.7545585632324219, "learning_rate": 0.0018047180667433833, "loss": 0.7783, "step": 33940 }, { "epoch": 9.766973532796317, "grad_norm": 1.009718894958496, "learning_rate": 0.0018046605293440736, "loss": 0.7955, "step": 33950 }, { "epoch": 9.769850402761795, "grad_norm": 1.0596433877944946, "learning_rate": 0.0018046029919447642, "loss": 0.6906, "step": 33960 }, { "epoch": 9.772727272727273, "grad_norm": 1.561417818069458, "learning_rate": 0.0018045454545454545, "loss": 0.8831, "step": 33970 }, { "epoch": 9.77560414269275, "grad_norm": 1.863606572151184, "learning_rate": 0.001804487917146145, "loss": 0.8883, "step": 33980 }, { "epoch": 9.778481012658228, "grad_norm": 1.2519254684448242, "learning_rate": 0.0018044303797468356, "loss": 0.8963, "step": 33990 }, { "epoch": 9.781357882623706, "grad_norm": 1.283422589302063, "learning_rate": 0.0018043728423475258, "loss": 0.967, "step": 34000 }, { "epoch": 9.784234752589184, "grad_norm": 1.4972355365753174, "learning_rate": 0.0018043153049482163, "loss": 0.7938, "step": 34010 }, { "epoch": 9.78711162255466, "grad_norm": 1.6890136003494263, "learning_rate": 0.001804257767548907, "loss": 0.8197, "step": 34020 }, { "epoch": 9.789988492520138, "grad_norm": 2.2133593559265137, "learning_rate": 0.0018042002301495972, "loss": 0.828, "step": 34030 }, { "epoch": 9.792865362485616, "grad_norm": 1.2212508916854858, "learning_rate": 0.0018041426927502878, "loss": 0.9449, "step": 34040 }, { "epoch": 9.795742232451094, "grad_norm": 1.0844441652297974, "learning_rate": 0.0018040851553509784, "loss": 0.7384, "step": 34050 }, { "epoch": 9.79861910241657, "grad_norm": 1.205466866493225, "learning_rate": 0.0018040276179516685, "loss": 0.7986, "step": 34060 }, { "epoch": 9.801495972382048, "grad_norm": 0.8708109855651855, "learning_rate": 0.001803970080552359, "loss": 0.7258, "step": 34070 }, { "epoch": 9.804372842347526, "grad_norm": 2.66042160987854, "learning_rate": 0.0018039125431530494, "loss": 0.8224, "step": 34080 }, { "epoch": 9.807249712313004, "grad_norm": 2.2462549209594727, "learning_rate": 0.00180385500575374, "loss": 0.6852, "step": 34090 }, { "epoch": 9.810126582278482, "grad_norm": 1.2268112897872925, "learning_rate": 0.0018037974683544305, "loss": 0.6739, "step": 34100 }, { "epoch": 9.813003452243958, "grad_norm": 1.9519771337509155, "learning_rate": 0.0018037399309551209, "loss": 0.8413, "step": 34110 }, { "epoch": 9.815880322209436, "grad_norm": 1.919744849205017, "learning_rate": 0.0018036823935558112, "loss": 0.9193, "step": 34120 }, { "epoch": 9.818757192174914, "grad_norm": 2.302379608154297, "learning_rate": 0.0018036248561565018, "loss": 0.8386, "step": 34130 }, { "epoch": 9.821634062140392, "grad_norm": 1.1591081619262695, "learning_rate": 0.0018035673187571921, "loss": 0.8369, "step": 34140 }, { "epoch": 9.824510932105868, "grad_norm": 0.602607786655426, "learning_rate": 0.0018035097813578827, "loss": 0.6825, "step": 34150 }, { "epoch": 9.827387802071346, "grad_norm": 1.2528412342071533, "learning_rate": 0.0018034522439585733, "loss": 0.8414, "step": 34160 }, { "epoch": 9.830264672036824, "grad_norm": 1.522674560546875, "learning_rate": 0.0018033947065592636, "loss": 0.9046, "step": 34170 }, { "epoch": 9.833141542002302, "grad_norm": 1.46048903465271, "learning_rate": 0.001803337169159954, "loss": 0.7828, "step": 34180 }, { "epoch": 9.836018411967778, "grad_norm": 1.1245638132095337, "learning_rate": 0.0018032796317606443, "loss": 0.8116, "step": 34190 }, { "epoch": 9.838895281933256, "grad_norm": 1.9769333600997925, "learning_rate": 0.0018032220943613349, "loss": 0.9424, "step": 34200 }, { "epoch": 9.841772151898734, "grad_norm": 1.3658194541931152, "learning_rate": 0.0018031645569620254, "loss": 0.8076, "step": 34210 }, { "epoch": 9.844649021864212, "grad_norm": 1.570499062538147, "learning_rate": 0.0018031070195627158, "loss": 0.9985, "step": 34220 }, { "epoch": 9.84752589182969, "grad_norm": 1.365899920463562, "learning_rate": 0.0018030494821634064, "loss": 0.7074, "step": 34230 }, { "epoch": 9.850402761795166, "grad_norm": 0.9469289779663086, "learning_rate": 0.0018029919447640967, "loss": 0.7882, "step": 34240 }, { "epoch": 9.853279631760644, "grad_norm": 4.6719160079956055, "learning_rate": 0.001802934407364787, "loss": 0.8844, "step": 34250 }, { "epoch": 9.856156501726122, "grad_norm": 0.9497219920158386, "learning_rate": 0.0018028768699654776, "loss": 0.9066, "step": 34260 }, { "epoch": 9.8590333716916, "grad_norm": 1.3232779502868652, "learning_rate": 0.0018028193325661682, "loss": 0.9404, "step": 34270 }, { "epoch": 9.861910241657077, "grad_norm": 2.0928680896759033, "learning_rate": 0.0018027617951668585, "loss": 1.0176, "step": 34280 }, { "epoch": 9.864787111622555, "grad_norm": 1.1695268154144287, "learning_rate": 0.001802704257767549, "loss": 0.9193, "step": 34290 }, { "epoch": 9.867663981588032, "grad_norm": 1.6307833194732666, "learning_rate": 0.0018026467203682392, "loss": 0.945, "step": 34300 }, { "epoch": 9.87054085155351, "grad_norm": 1.1714301109313965, "learning_rate": 0.0018025891829689298, "loss": 0.9836, "step": 34310 }, { "epoch": 9.873417721518987, "grad_norm": 1.750415325164795, "learning_rate": 0.0018025316455696203, "loss": 0.8105, "step": 34320 }, { "epoch": 9.876294591484465, "grad_norm": 1.1656601428985596, "learning_rate": 0.0018024741081703107, "loss": 0.798, "step": 34330 }, { "epoch": 9.879171461449943, "grad_norm": 1.808887004852295, "learning_rate": 0.0018024165707710013, "loss": 0.8024, "step": 34340 }, { "epoch": 9.88204833141542, "grad_norm": 1.4701802730560303, "learning_rate": 0.0018023590333716918, "loss": 0.9524, "step": 34350 }, { "epoch": 9.884925201380897, "grad_norm": 2.001974105834961, "learning_rate": 0.001802301495972382, "loss": 0.7688, "step": 34360 }, { "epoch": 9.887802071346375, "grad_norm": 1.4604164361953735, "learning_rate": 0.0018022439585730725, "loss": 0.7797, "step": 34370 }, { "epoch": 9.890678941311853, "grad_norm": 1.504778265953064, "learning_rate": 0.001802186421173763, "loss": 0.7286, "step": 34380 }, { "epoch": 9.89355581127733, "grad_norm": 3.444260835647583, "learning_rate": 0.0018021288837744534, "loss": 0.8511, "step": 34390 }, { "epoch": 9.896432681242807, "grad_norm": 2.323521614074707, "learning_rate": 0.001802071346375144, "loss": 0.7694, "step": 34400 }, { "epoch": 9.899309551208285, "grad_norm": 1.8694980144500732, "learning_rate": 0.0018020138089758341, "loss": 0.8752, "step": 34410 }, { "epoch": 9.902186421173763, "grad_norm": 0.7902039885520935, "learning_rate": 0.0018019562715765247, "loss": 0.9797, "step": 34420 }, { "epoch": 9.905063291139241, "grad_norm": 1.4111162424087524, "learning_rate": 0.0018018987341772152, "loss": 0.7094, "step": 34430 }, { "epoch": 9.907940161104719, "grad_norm": 1.6406062841415405, "learning_rate": 0.0018018411967779056, "loss": 0.8032, "step": 34440 }, { "epoch": 9.910817031070195, "grad_norm": 2.0651557445526123, "learning_rate": 0.0018017836593785962, "loss": 0.8725, "step": 34450 }, { "epoch": 9.913693901035673, "grad_norm": 1.691847562789917, "learning_rate": 0.0018017261219792867, "loss": 0.8043, "step": 34460 }, { "epoch": 9.916570771001151, "grad_norm": 1.1964226961135864, "learning_rate": 0.0018016685845799768, "loss": 0.879, "step": 34470 }, { "epoch": 9.919447640966629, "grad_norm": 1.6433061361312866, "learning_rate": 0.0018016110471806674, "loss": 0.7883, "step": 34480 }, { "epoch": 9.922324510932105, "grad_norm": 3.835066080093384, "learning_rate": 0.001801553509781358, "loss": 0.8088, "step": 34490 }, { "epoch": 9.925201380897583, "grad_norm": 1.3870519399642944, "learning_rate": 0.0018014959723820483, "loss": 0.8832, "step": 34500 }, { "epoch": 9.928078250863061, "grad_norm": 1.2855952978134155, "learning_rate": 0.0018014384349827389, "loss": 0.6838, "step": 34510 }, { "epoch": 9.93095512082854, "grad_norm": 0.9060441851615906, "learning_rate": 0.0018013808975834292, "loss": 0.8145, "step": 34520 }, { "epoch": 9.933831990794015, "grad_norm": 1.0421961545944214, "learning_rate": 0.0018013233601841196, "loss": 0.9353, "step": 34530 }, { "epoch": 9.936708860759493, "grad_norm": 1.8630436658859253, "learning_rate": 0.0018012658227848101, "loss": 0.7303, "step": 34540 }, { "epoch": 9.939585730724971, "grad_norm": 2.257627010345459, "learning_rate": 0.0018012082853855005, "loss": 0.7412, "step": 34550 }, { "epoch": 9.94246260069045, "grad_norm": 1.1986321210861206, "learning_rate": 0.001801150747986191, "loss": 0.9258, "step": 34560 }, { "epoch": 9.945339470655927, "grad_norm": 1.1508843898773193, "learning_rate": 0.0018010932105868816, "loss": 0.6315, "step": 34570 }, { "epoch": 9.948216340621403, "grad_norm": 1.9081283807754517, "learning_rate": 0.001801035673187572, "loss": 0.9903, "step": 34580 }, { "epoch": 9.951093210586881, "grad_norm": 1.7401052713394165, "learning_rate": 0.0018009781357882623, "loss": 0.7468, "step": 34590 }, { "epoch": 9.95397008055236, "grad_norm": 1.6731455326080322, "learning_rate": 0.0018009205983889529, "loss": 0.7885, "step": 34600 }, { "epoch": 9.956846950517837, "grad_norm": 1.0732303857803345, "learning_rate": 0.0018008630609896432, "loss": 0.8648, "step": 34610 }, { "epoch": 9.959723820483314, "grad_norm": 1.8058369159698486, "learning_rate": 0.0018008055235903338, "loss": 0.8225, "step": 34620 }, { "epoch": 9.962600690448792, "grad_norm": 1.0891706943511963, "learning_rate": 0.0018007479861910244, "loss": 0.8038, "step": 34630 }, { "epoch": 9.96547756041427, "grad_norm": 0.9857622385025024, "learning_rate": 0.0018006904487917147, "loss": 0.8228, "step": 34640 }, { "epoch": 9.968354430379748, "grad_norm": 1.0434199571609497, "learning_rate": 0.001800632911392405, "loss": 0.7622, "step": 34650 }, { "epoch": 9.971231300345224, "grad_norm": 0.8743027448654175, "learning_rate": 0.0018005753739930954, "loss": 0.775, "step": 34660 }, { "epoch": 9.974108170310702, "grad_norm": 1.2224334478378296, "learning_rate": 0.001800517836593786, "loss": 0.7507, "step": 34670 }, { "epoch": 9.97698504027618, "grad_norm": 1.0576993227005005, "learning_rate": 0.0018004602991944765, "loss": 0.8337, "step": 34680 }, { "epoch": 9.979861910241658, "grad_norm": 2.1341114044189453, "learning_rate": 0.0018004027617951669, "loss": 0.8993, "step": 34690 }, { "epoch": 9.982738780207134, "grad_norm": 1.3692209720611572, "learning_rate": 0.0018003452243958574, "loss": 0.7832, "step": 34700 }, { "epoch": 9.985615650172612, "grad_norm": 1.8643431663513184, "learning_rate": 0.0018002876869965478, "loss": 0.7808, "step": 34710 }, { "epoch": 9.98849252013809, "grad_norm": 1.0946372747421265, "learning_rate": 0.0018002301495972381, "loss": 0.8851, "step": 34720 }, { "epoch": 9.991369390103568, "grad_norm": 1.6725207567214966, "learning_rate": 0.0018001726121979287, "loss": 0.845, "step": 34730 }, { "epoch": 9.994246260069044, "grad_norm": 1.755548119544983, "learning_rate": 0.0018001150747986193, "loss": 0.7174, "step": 34740 }, { "epoch": 9.997123130034522, "grad_norm": 1.1261444091796875, "learning_rate": 0.0018000575373993096, "loss": 0.8223, "step": 34750 }, { "epoch": 10.0, "grad_norm": 1.1385167837142944, "learning_rate": 0.0018000000000000002, "loss": 0.8794, "step": 34760 }, { "epoch": 10.002876869965478, "grad_norm": 1.5623152256011963, "learning_rate": 0.0017999424626006903, "loss": 0.8205, "step": 34770 }, { "epoch": 10.005753739930956, "grad_norm": 1.4586520195007324, "learning_rate": 0.0017998849252013809, "loss": 0.8794, "step": 34780 }, { "epoch": 10.008630609896432, "grad_norm": 0.7230918407440186, "learning_rate": 0.0017998273878020714, "loss": 0.7732, "step": 34790 }, { "epoch": 10.01150747986191, "grad_norm": 1.2410329580307007, "learning_rate": 0.0017997698504027618, "loss": 0.8116, "step": 34800 }, { "epoch": 10.014384349827388, "grad_norm": 1.2392666339874268, "learning_rate": 0.0017997123130034523, "loss": 0.7549, "step": 34810 }, { "epoch": 10.017261219792866, "grad_norm": 0.9146358966827393, "learning_rate": 0.001799654775604143, "loss": 0.7186, "step": 34820 }, { "epoch": 10.020138089758342, "grad_norm": 2.3255410194396973, "learning_rate": 0.001799597238204833, "loss": 0.8983, "step": 34830 }, { "epoch": 10.02301495972382, "grad_norm": 0.9976220726966858, "learning_rate": 0.0017995397008055236, "loss": 0.8234, "step": 34840 }, { "epoch": 10.025891829689298, "grad_norm": 0.9188392758369446, "learning_rate": 0.0017994821634062142, "loss": 0.5438, "step": 34850 }, { "epoch": 10.028768699654776, "grad_norm": 1.6420392990112305, "learning_rate": 0.0017994246260069045, "loss": 0.7579, "step": 34860 }, { "epoch": 10.031645569620252, "grad_norm": 1.077669382095337, "learning_rate": 0.001799367088607595, "loss": 0.7437, "step": 34870 }, { "epoch": 10.03452243958573, "grad_norm": 1.4592931270599365, "learning_rate": 0.0017993095512082854, "loss": 0.7804, "step": 34880 }, { "epoch": 10.037399309551208, "grad_norm": 2.203106164932251, "learning_rate": 0.0017992520138089758, "loss": 0.8138, "step": 34890 }, { "epoch": 10.040276179516686, "grad_norm": 1.389029622077942, "learning_rate": 0.0017991944764096663, "loss": 0.6814, "step": 34900 }, { "epoch": 10.043153049482163, "grad_norm": 2.485621690750122, "learning_rate": 0.0017991369390103567, "loss": 0.8951, "step": 34910 }, { "epoch": 10.04602991944764, "grad_norm": 1.5459450483322144, "learning_rate": 0.0017990794016110472, "loss": 0.7457, "step": 34920 }, { "epoch": 10.048906789413119, "grad_norm": 1.4790536165237427, "learning_rate": 0.0017990218642117378, "loss": 0.6945, "step": 34930 }, { "epoch": 10.051783659378597, "grad_norm": 1.374284267425537, "learning_rate": 0.0017989643268124281, "loss": 0.8902, "step": 34940 }, { "epoch": 10.054660529344075, "grad_norm": 1.4654744863510132, "learning_rate": 0.0017989067894131185, "loss": 0.9088, "step": 34950 }, { "epoch": 10.05753739930955, "grad_norm": 1.3871160745620728, "learning_rate": 0.001798849252013809, "loss": 0.8657, "step": 34960 }, { "epoch": 10.060414269275029, "grad_norm": 1.1166225671768188, "learning_rate": 0.0017987917146144994, "loss": 0.6675, "step": 34970 }, { "epoch": 10.063291139240507, "grad_norm": 1.842422366142273, "learning_rate": 0.00179873417721519, "loss": 0.7059, "step": 34980 }, { "epoch": 10.066168009205985, "grad_norm": 1.1186217069625854, "learning_rate": 0.0017986766398158803, "loss": 0.7673, "step": 34990 }, { "epoch": 10.06904487917146, "grad_norm": 0.9485477805137634, "learning_rate": 0.0017986191024165709, "loss": 0.7955, "step": 35000 }, { "epoch": 10.071921749136939, "grad_norm": 2.6782007217407227, "learning_rate": 0.0017985615650172612, "loss": 0.9509, "step": 35010 }, { "epoch": 10.074798619102417, "grad_norm": 1.1942914724349976, "learning_rate": 0.0017985040276179516, "loss": 0.6363, "step": 35020 }, { "epoch": 10.077675489067895, "grad_norm": 0.9901559948921204, "learning_rate": 0.0017984464902186421, "loss": 0.9012, "step": 35030 }, { "epoch": 10.080552359033371, "grad_norm": 1.2504971027374268, "learning_rate": 0.0017983889528193327, "loss": 0.8063, "step": 35040 }, { "epoch": 10.083429228998849, "grad_norm": 1.1626631021499634, "learning_rate": 0.001798331415420023, "loss": 0.7805, "step": 35050 }, { "epoch": 10.086306098964327, "grad_norm": 1.3075705766677856, "learning_rate": 0.0017982738780207136, "loss": 0.6771, "step": 35060 }, { "epoch": 10.089182968929805, "grad_norm": 0.8131011128425598, "learning_rate": 0.001798216340621404, "loss": 0.6712, "step": 35070 }, { "epoch": 10.092059838895281, "grad_norm": 0.8706451058387756, "learning_rate": 0.0017981588032220943, "loss": 0.8206, "step": 35080 }, { "epoch": 10.094936708860759, "grad_norm": 1.182295560836792, "learning_rate": 0.0017981012658227849, "loss": 0.7288, "step": 35090 }, { "epoch": 10.097813578826237, "grad_norm": 1.1621911525726318, "learning_rate": 0.0017980437284234752, "loss": 0.8146, "step": 35100 }, { "epoch": 10.100690448791715, "grad_norm": 1.5716952085494995, "learning_rate": 0.0017979861910241658, "loss": 1.2233, "step": 35110 }, { "epoch": 10.103567318757193, "grad_norm": 0.7988805174827576, "learning_rate": 0.0017979286536248563, "loss": 0.7069, "step": 35120 }, { "epoch": 10.10644418872267, "grad_norm": 1.0879127979278564, "learning_rate": 0.0017978711162255465, "loss": 0.6476, "step": 35130 }, { "epoch": 10.109321058688147, "grad_norm": 1.3978476524353027, "learning_rate": 0.001797813578826237, "loss": 0.6808, "step": 35140 }, { "epoch": 10.112197928653625, "grad_norm": 0.9796364903450012, "learning_rate": 0.0017977560414269276, "loss": 0.6787, "step": 35150 }, { "epoch": 10.115074798619103, "grad_norm": 1.262923240661621, "learning_rate": 0.001797698504027618, "loss": 0.8018, "step": 35160 }, { "epoch": 10.11795166858458, "grad_norm": 1.8724706172943115, "learning_rate": 0.0017976409666283085, "loss": 0.6248, "step": 35170 }, { "epoch": 10.120828538550057, "grad_norm": 1.0046579837799072, "learning_rate": 0.001797583429228999, "loss": 0.7497, "step": 35180 }, { "epoch": 10.123705408515535, "grad_norm": 1.4457645416259766, "learning_rate": 0.0017975258918296892, "loss": 0.7203, "step": 35190 }, { "epoch": 10.126582278481013, "grad_norm": 1.6405397653579712, "learning_rate": 0.0017974683544303798, "loss": 0.8578, "step": 35200 }, { "epoch": 10.12945914844649, "grad_norm": 1.6344918012619019, "learning_rate": 0.0017974108170310701, "loss": 0.8053, "step": 35210 }, { "epoch": 10.132336018411968, "grad_norm": 0.8860941529273987, "learning_rate": 0.0017973532796317607, "loss": 0.8312, "step": 35220 }, { "epoch": 10.135212888377445, "grad_norm": 0.9945207834243774, "learning_rate": 0.0017972957422324513, "loss": 0.725, "step": 35230 }, { "epoch": 10.138089758342923, "grad_norm": 1.1934614181518555, "learning_rate": 0.0017972382048331414, "loss": 0.685, "step": 35240 }, { "epoch": 10.1409666283084, "grad_norm": 1.9156577587127686, "learning_rate": 0.001797180667433832, "loss": 0.6992, "step": 35250 }, { "epoch": 10.143843498273878, "grad_norm": 2.344069480895996, "learning_rate": 0.0017971231300345225, "loss": 0.8112, "step": 35260 }, { "epoch": 10.146720368239356, "grad_norm": 1.0892703533172607, "learning_rate": 0.0017970655926352129, "loss": 0.7528, "step": 35270 }, { "epoch": 10.149597238204834, "grad_norm": 1.446370005607605, "learning_rate": 0.0017970080552359034, "loss": 0.6826, "step": 35280 }, { "epoch": 10.15247410817031, "grad_norm": 1.8092201948165894, "learning_rate": 0.001796950517836594, "loss": 0.7925, "step": 35290 }, { "epoch": 10.155350978135788, "grad_norm": 1.7021218538284302, "learning_rate": 0.0017968929804372841, "loss": 0.8324, "step": 35300 }, { "epoch": 10.158227848101266, "grad_norm": 1.4312963485717773, "learning_rate": 0.0017968354430379747, "loss": 0.8523, "step": 35310 }, { "epoch": 10.161104718066744, "grad_norm": 1.5151021480560303, "learning_rate": 0.0017967779056386652, "loss": 0.707, "step": 35320 }, { "epoch": 10.163981588032222, "grad_norm": 0.8130682706832886, "learning_rate": 0.0017967203682393556, "loss": 0.8005, "step": 35330 }, { "epoch": 10.166858457997698, "grad_norm": 1.062178373336792, "learning_rate": 0.0017966628308400462, "loss": 1.0626, "step": 35340 }, { "epoch": 10.169735327963176, "grad_norm": 1.9405133724212646, "learning_rate": 0.0017966052934407365, "loss": 0.9562, "step": 35350 }, { "epoch": 10.172612197928654, "grad_norm": 1.0074561834335327, "learning_rate": 0.0017965477560414268, "loss": 0.7409, "step": 35360 }, { "epoch": 10.175489067894132, "grad_norm": 2.48303484916687, "learning_rate": 0.0017964902186421174, "loss": 0.7653, "step": 35370 }, { "epoch": 10.178365937859608, "grad_norm": 1.0839847326278687, "learning_rate": 0.0017964326812428078, "loss": 0.8912, "step": 35380 }, { "epoch": 10.181242807825086, "grad_norm": 1.3575588464736938, "learning_rate": 0.0017963751438434983, "loss": 0.7368, "step": 35390 }, { "epoch": 10.184119677790564, "grad_norm": 1.2659127712249756, "learning_rate": 0.0017963176064441889, "loss": 0.6491, "step": 35400 }, { "epoch": 10.186996547756042, "grad_norm": 0.9496157765388489, "learning_rate": 0.0017962600690448792, "loss": 0.7102, "step": 35410 }, { "epoch": 10.189873417721518, "grad_norm": 1.3513522148132324, "learning_rate": 0.0017962025316455696, "loss": 0.7143, "step": 35420 }, { "epoch": 10.192750287686996, "grad_norm": 1.2185286283493042, "learning_rate": 0.0017961449942462601, "loss": 0.6778, "step": 35430 }, { "epoch": 10.195627157652474, "grad_norm": 1.5016800165176392, "learning_rate": 0.0017960874568469505, "loss": 1.0594, "step": 35440 }, { "epoch": 10.198504027617952, "grad_norm": 2.105829954147339, "learning_rate": 0.001796029919447641, "loss": 0.7109, "step": 35450 }, { "epoch": 10.201380897583428, "grad_norm": 1.0535517930984497, "learning_rate": 0.0017959723820483314, "loss": 0.8748, "step": 35460 }, { "epoch": 10.204257767548906, "grad_norm": 1.1231656074523926, "learning_rate": 0.001795914844649022, "loss": 0.5606, "step": 35470 }, { "epoch": 10.207134637514384, "grad_norm": 1.2321929931640625, "learning_rate": 0.0017958573072497123, "loss": 0.7479, "step": 35480 }, { "epoch": 10.210011507479862, "grad_norm": 1.4332473278045654, "learning_rate": 0.0017957997698504027, "loss": 0.8469, "step": 35490 }, { "epoch": 10.21288837744534, "grad_norm": 2.258176565170288, "learning_rate": 0.0017957422324510932, "loss": 0.7917, "step": 35500 }, { "epoch": 10.215765247410816, "grad_norm": 1.3972923755645752, "learning_rate": 0.0017956846950517838, "loss": 0.7868, "step": 35510 }, { "epoch": 10.218642117376294, "grad_norm": 1.1132915019989014, "learning_rate": 0.0017956271576524741, "loss": 0.7008, "step": 35520 }, { "epoch": 10.221518987341772, "grad_norm": 0.7608745694160461, "learning_rate": 0.0017955696202531647, "loss": 0.8028, "step": 35530 }, { "epoch": 10.22439585730725, "grad_norm": 1.1643518209457397, "learning_rate": 0.001795512082853855, "loss": 0.7019, "step": 35540 }, { "epoch": 10.227272727272727, "grad_norm": 1.5673993825912476, "learning_rate": 0.0017954545454545454, "loss": 1.1731, "step": 35550 }, { "epoch": 10.230149597238205, "grad_norm": 0.9549554586410522, "learning_rate": 0.001795397008055236, "loss": 0.7521, "step": 35560 }, { "epoch": 10.233026467203683, "grad_norm": 1.254694938659668, "learning_rate": 0.0017953394706559263, "loss": 0.7924, "step": 35570 }, { "epoch": 10.23590333716916, "grad_norm": 1.472907304763794, "learning_rate": 0.0017952819332566169, "loss": 0.8591, "step": 35580 }, { "epoch": 10.238780207134637, "grad_norm": 1.3620705604553223, "learning_rate": 0.0017952243958573074, "loss": 0.87, "step": 35590 }, { "epoch": 10.241657077100115, "grad_norm": 1.3327504396438599, "learning_rate": 0.0017951668584579976, "loss": 0.749, "step": 35600 }, { "epoch": 10.244533947065593, "grad_norm": 1.3380842208862305, "learning_rate": 0.0017951093210586881, "loss": 0.6764, "step": 35610 }, { "epoch": 10.24741081703107, "grad_norm": 0.9691317677497864, "learning_rate": 0.0017950517836593787, "loss": 0.7756, "step": 35620 }, { "epoch": 10.250287686996547, "grad_norm": 0.8703305721282959, "learning_rate": 0.001794994246260069, "loss": 0.6965, "step": 35630 }, { "epoch": 10.253164556962025, "grad_norm": 1.3229767084121704, "learning_rate": 0.0017949367088607596, "loss": 0.8401, "step": 35640 }, { "epoch": 10.256041426927503, "grad_norm": 1.6500599384307861, "learning_rate": 0.0017948791714614502, "loss": 0.7207, "step": 35650 }, { "epoch": 10.25891829689298, "grad_norm": 0.7306893467903137, "learning_rate": 0.0017948216340621403, "loss": 0.9633, "step": 35660 }, { "epoch": 10.261795166858459, "grad_norm": 0.7703144550323486, "learning_rate": 0.0017947640966628309, "loss": 0.7942, "step": 35670 }, { "epoch": 10.264672036823935, "grad_norm": 1.9994856119155884, "learning_rate": 0.0017947065592635212, "loss": 0.8653, "step": 35680 }, { "epoch": 10.267548906789413, "grad_norm": 1.3064839839935303, "learning_rate": 0.0017946490218642118, "loss": 0.9642, "step": 35690 }, { "epoch": 10.270425776754891, "grad_norm": 1.0251765251159668, "learning_rate": 0.0017945914844649023, "loss": 0.7452, "step": 35700 }, { "epoch": 10.273302646720369, "grad_norm": 2.8218026161193848, "learning_rate": 0.0017945339470655927, "loss": 0.7975, "step": 35710 }, { "epoch": 10.276179516685845, "grad_norm": 1.9883216619491577, "learning_rate": 0.001794476409666283, "loss": 0.9482, "step": 35720 }, { "epoch": 10.279056386651323, "grad_norm": 1.4492194652557373, "learning_rate": 0.0017944188722669736, "loss": 0.6925, "step": 35730 }, { "epoch": 10.281933256616801, "grad_norm": 0.8378463387489319, "learning_rate": 0.001794361334867664, "loss": 0.7933, "step": 35740 }, { "epoch": 10.284810126582279, "grad_norm": 1.7607215642929077, "learning_rate": 0.0017943037974683545, "loss": 0.8855, "step": 35750 }, { "epoch": 10.287686996547755, "grad_norm": 1.3310312032699585, "learning_rate": 0.001794246260069045, "loss": 0.7203, "step": 35760 }, { "epoch": 10.290563866513233, "grad_norm": 1.3458412885665894, "learning_rate": 0.0017941887226697354, "loss": 0.7555, "step": 35770 }, { "epoch": 10.293440736478711, "grad_norm": 1.5991817712783813, "learning_rate": 0.0017941311852704258, "loss": 0.7237, "step": 35780 }, { "epoch": 10.29631760644419, "grad_norm": 1.0823509693145752, "learning_rate": 0.0017940736478711161, "loss": 0.887, "step": 35790 }, { "epoch": 10.299194476409665, "grad_norm": 1.1965454816818237, "learning_rate": 0.0017940161104718067, "loss": 0.5953, "step": 35800 }, { "epoch": 10.302071346375143, "grad_norm": 1.093261957168579, "learning_rate": 0.0017939585730724972, "loss": 0.7887, "step": 35810 }, { "epoch": 10.304948216340621, "grad_norm": 1.9381464719772339, "learning_rate": 0.0017939010356731876, "loss": 0.8144, "step": 35820 }, { "epoch": 10.3078250863061, "grad_norm": 1.5581802129745483, "learning_rate": 0.0017938434982738781, "loss": 0.737, "step": 35830 }, { "epoch": 10.310701956271577, "grad_norm": 1.5563099384307861, "learning_rate": 0.0017937859608745685, "loss": 0.8848, "step": 35840 }, { "epoch": 10.313578826237054, "grad_norm": 1.2144702672958374, "learning_rate": 0.0017937284234752588, "loss": 0.8549, "step": 35850 }, { "epoch": 10.316455696202532, "grad_norm": 1.000900387763977, "learning_rate": 0.0017936708860759494, "loss": 0.9044, "step": 35860 }, { "epoch": 10.31933256616801, "grad_norm": 0.8682152032852173, "learning_rate": 0.00179361334867664, "loss": 1.0369, "step": 35870 }, { "epoch": 10.322209436133488, "grad_norm": 1.3075134754180908, "learning_rate": 0.0017935558112773303, "loss": 0.9272, "step": 35880 }, { "epoch": 10.325086306098964, "grad_norm": 1.728621006011963, "learning_rate": 0.0017934982738780209, "loss": 0.7471, "step": 35890 }, { "epoch": 10.327963176064442, "grad_norm": 1.3002939224243164, "learning_rate": 0.0017934407364787112, "loss": 1.0235, "step": 35900 }, { "epoch": 10.33084004602992, "grad_norm": 1.696681261062622, "learning_rate": 0.0017933831990794016, "loss": 0.8511, "step": 35910 }, { "epoch": 10.333716915995398, "grad_norm": 1.043806791305542, "learning_rate": 0.0017933256616800921, "loss": 0.6986, "step": 35920 }, { "epoch": 10.336593785960874, "grad_norm": 1.026712417602539, "learning_rate": 0.0017932681242807825, "loss": 0.7786, "step": 35930 }, { "epoch": 10.339470655926352, "grad_norm": 1.249638319015503, "learning_rate": 0.001793210586881473, "loss": 0.9353, "step": 35940 }, { "epoch": 10.34234752589183, "grad_norm": 1.005812644958496, "learning_rate": 0.0017931530494821636, "loss": 0.7343, "step": 35950 }, { "epoch": 10.345224395857308, "grad_norm": 1.5454703569412231, "learning_rate": 0.0017930955120828537, "loss": 0.862, "step": 35960 }, { "epoch": 10.348101265822784, "grad_norm": 1.1288011074066162, "learning_rate": 0.0017930379746835443, "loss": 0.7749, "step": 35970 }, { "epoch": 10.350978135788262, "grad_norm": 0.8110828995704651, "learning_rate": 0.0017929804372842349, "loss": 0.9237, "step": 35980 }, { "epoch": 10.35385500575374, "grad_norm": 0.9270837903022766, "learning_rate": 0.0017929228998849252, "loss": 0.8525, "step": 35990 }, { "epoch": 10.356731875719218, "grad_norm": 1.107412576675415, "learning_rate": 0.0017928653624856158, "loss": 0.9056, "step": 36000 }, { "epoch": 10.359608745684696, "grad_norm": 1.365478754043579, "learning_rate": 0.0017928078250863063, "loss": 0.846, "step": 36010 }, { "epoch": 10.362485615650172, "grad_norm": 1.4320238828659058, "learning_rate": 0.0017927502876869965, "loss": 0.8982, "step": 36020 }, { "epoch": 10.36536248561565, "grad_norm": 1.606898546218872, "learning_rate": 0.001792692750287687, "loss": 0.7458, "step": 36030 }, { "epoch": 10.368239355581128, "grad_norm": 0.8663291931152344, "learning_rate": 0.0017926352128883774, "loss": 0.8023, "step": 36040 }, { "epoch": 10.371116225546606, "grad_norm": 1.0523326396942139, "learning_rate": 0.001792577675489068, "loss": 0.8703, "step": 36050 }, { "epoch": 10.373993095512082, "grad_norm": 1.30318021774292, "learning_rate": 0.0017925201380897585, "loss": 0.8201, "step": 36060 }, { "epoch": 10.37686996547756, "grad_norm": 1.3606659173965454, "learning_rate": 0.0017924626006904486, "loss": 0.8325, "step": 36070 }, { "epoch": 10.379746835443038, "grad_norm": 1.2515802383422852, "learning_rate": 0.0017924050632911392, "loss": 0.7293, "step": 36080 }, { "epoch": 10.382623705408516, "grad_norm": 1.2894105911254883, "learning_rate": 0.0017923475258918298, "loss": 0.5567, "step": 36090 }, { "epoch": 10.385500575373992, "grad_norm": 2.4437015056610107, "learning_rate": 0.0017922899884925201, "loss": 1.0973, "step": 36100 }, { "epoch": 10.38837744533947, "grad_norm": 1.2946430444717407, "learning_rate": 0.0017922324510932107, "loss": 0.9473, "step": 36110 }, { "epoch": 10.391254315304948, "grad_norm": 1.7797966003417969, "learning_rate": 0.0017921749136939012, "loss": 0.825, "step": 36120 }, { "epoch": 10.394131185270426, "grad_norm": 0.9923428297042847, "learning_rate": 0.0017921173762945914, "loss": 0.7711, "step": 36130 }, { "epoch": 10.397008055235903, "grad_norm": 1.5641058683395386, "learning_rate": 0.001792059838895282, "loss": 0.7307, "step": 36140 }, { "epoch": 10.39988492520138, "grad_norm": 1.8436301946640015, "learning_rate": 0.0017920023014959723, "loss": 0.8394, "step": 36150 }, { "epoch": 10.402761795166859, "grad_norm": 1.6811139583587646, "learning_rate": 0.0017919447640966629, "loss": 0.751, "step": 36160 }, { "epoch": 10.405638665132336, "grad_norm": 1.1254554986953735, "learning_rate": 0.0017918872266973534, "loss": 0.7251, "step": 36170 }, { "epoch": 10.408515535097813, "grad_norm": 1.3952006101608276, "learning_rate": 0.0017918296892980438, "loss": 0.8112, "step": 36180 }, { "epoch": 10.41139240506329, "grad_norm": 1.3068662881851196, "learning_rate": 0.0017917721518987341, "loss": 0.7345, "step": 36190 }, { "epoch": 10.414269275028769, "grad_norm": 0.857893705368042, "learning_rate": 0.0017917146144994247, "loss": 0.7879, "step": 36200 }, { "epoch": 10.417146144994247, "grad_norm": 2.318305730819702, "learning_rate": 0.001791657077100115, "loss": 0.6896, "step": 36210 }, { "epoch": 10.420023014959725, "grad_norm": 1.066249132156372, "learning_rate": 0.0017915995397008056, "loss": 0.8407, "step": 36220 }, { "epoch": 10.4228998849252, "grad_norm": 1.2354592084884644, "learning_rate": 0.0017915420023014962, "loss": 0.6362, "step": 36230 }, { "epoch": 10.425776754890679, "grad_norm": 1.7143502235412598, "learning_rate": 0.0017914844649021865, "loss": 0.7611, "step": 36240 }, { "epoch": 10.428653624856157, "grad_norm": 1.1025761365890503, "learning_rate": 0.0017914269275028768, "loss": 1.0043, "step": 36250 }, { "epoch": 10.431530494821635, "grad_norm": 1.260986089706421, "learning_rate": 0.0017913693901035672, "loss": 0.629, "step": 36260 }, { "epoch": 10.434407364787111, "grad_norm": 1.5162949562072754, "learning_rate": 0.0017913118527042578, "loss": 0.7636, "step": 36270 }, { "epoch": 10.437284234752589, "grad_norm": 1.5473463535308838, "learning_rate": 0.0017912543153049483, "loss": 0.7351, "step": 36280 }, { "epoch": 10.440161104718067, "grad_norm": 0.977468729019165, "learning_rate": 0.0017911967779056387, "loss": 0.9158, "step": 36290 }, { "epoch": 10.443037974683545, "grad_norm": 0.6956089735031128, "learning_rate": 0.0017911392405063292, "loss": 0.838, "step": 36300 }, { "epoch": 10.445914844649021, "grad_norm": 1.3525463342666626, "learning_rate": 0.0017910817031070196, "loss": 1.0491, "step": 36310 }, { "epoch": 10.448791714614499, "grad_norm": 2.1628971099853516, "learning_rate": 0.00179102416570771, "loss": 0.9325, "step": 36320 }, { "epoch": 10.451668584579977, "grad_norm": 1.1076159477233887, "learning_rate": 0.0017909666283084005, "loss": 0.6818, "step": 36330 }, { "epoch": 10.454545454545455, "grad_norm": 1.503522515296936, "learning_rate": 0.001790909090909091, "loss": 0.771, "step": 36340 }, { "epoch": 10.457422324510933, "grad_norm": 2.134768486022949, "learning_rate": 0.0017908515535097814, "loss": 1.0841, "step": 36350 }, { "epoch": 10.46029919447641, "grad_norm": 1.2989287376403809, "learning_rate": 0.001790794016110472, "loss": 0.9877, "step": 36360 }, { "epoch": 10.463176064441887, "grad_norm": 1.0093733072280884, "learning_rate": 0.001790736478711162, "loss": 0.9315, "step": 36370 }, { "epoch": 10.466052934407365, "grad_norm": 0.9756017327308655, "learning_rate": 0.0017906789413118527, "loss": 0.6705, "step": 36380 }, { "epoch": 10.468929804372843, "grad_norm": 1.0393098592758179, "learning_rate": 0.0017906214039125432, "loss": 0.8387, "step": 36390 }, { "epoch": 10.47180667433832, "grad_norm": 1.1000643968582153, "learning_rate": 0.0017905638665132336, "loss": 0.7438, "step": 36400 }, { "epoch": 10.474683544303797, "grad_norm": 1.0343663692474365, "learning_rate": 0.0017905063291139241, "loss": 0.8895, "step": 36410 }, { "epoch": 10.477560414269275, "grad_norm": 1.314157485961914, "learning_rate": 0.0017904487917146147, "loss": 0.8418, "step": 36420 }, { "epoch": 10.480437284234753, "grad_norm": 1.2726720571517944, "learning_rate": 0.0017903912543153048, "loss": 0.8651, "step": 36430 }, { "epoch": 10.48331415420023, "grad_norm": 1.4596302509307861, "learning_rate": 0.0017903337169159954, "loss": 0.6851, "step": 36440 }, { "epoch": 10.486191024165707, "grad_norm": 1.1454639434814453, "learning_rate": 0.001790276179516686, "loss": 0.8448, "step": 36450 }, { "epoch": 10.489067894131185, "grad_norm": 1.4715814590454102, "learning_rate": 0.0017902186421173763, "loss": 0.7413, "step": 36460 }, { "epoch": 10.491944764096663, "grad_norm": 1.3848273754119873, "learning_rate": 0.0017901611047180669, "loss": 0.7941, "step": 36470 }, { "epoch": 10.49482163406214, "grad_norm": 0.9351450204849243, "learning_rate": 0.0017901035673187572, "loss": 0.6498, "step": 36480 }, { "epoch": 10.497698504027618, "grad_norm": 1.70116126537323, "learning_rate": 0.0017900460299194476, "loss": 0.8611, "step": 36490 }, { "epoch": 10.500575373993096, "grad_norm": 1.1817816495895386, "learning_rate": 0.0017899884925201381, "loss": 0.73, "step": 36500 }, { "epoch": 10.503452243958574, "grad_norm": 1.0114365816116333, "learning_rate": 0.0017899309551208285, "loss": 0.7957, "step": 36510 }, { "epoch": 10.50632911392405, "grad_norm": 1.2636215686798096, "learning_rate": 0.001789873417721519, "loss": 0.7995, "step": 36520 }, { "epoch": 10.509205983889528, "grad_norm": 1.4199225902557373, "learning_rate": 0.0017898158803222096, "loss": 0.727, "step": 36530 }, { "epoch": 10.512082853855006, "grad_norm": 1.5081148147583008, "learning_rate": 0.0017897583429229, "loss": 0.9271, "step": 36540 }, { "epoch": 10.514959723820484, "grad_norm": 1.3371715545654297, "learning_rate": 0.0017897008055235903, "loss": 0.8732, "step": 36550 }, { "epoch": 10.517836593785962, "grad_norm": 0.7416989803314209, "learning_rate": 0.0017896432681242809, "loss": 0.6807, "step": 36560 }, { "epoch": 10.520713463751438, "grad_norm": 1.5230878591537476, "learning_rate": 0.0017895857307249712, "loss": 0.8743, "step": 36570 }, { "epoch": 10.523590333716916, "grad_norm": 0.9081467390060425, "learning_rate": 0.0017895281933256618, "loss": 0.614, "step": 36580 }, { "epoch": 10.526467203682394, "grad_norm": 1.1413432359695435, "learning_rate": 0.0017894706559263523, "loss": 0.8564, "step": 36590 }, { "epoch": 10.529344073647872, "grad_norm": 1.4464040994644165, "learning_rate": 0.0017894131185270427, "loss": 0.733, "step": 36600 }, { "epoch": 10.532220943613348, "grad_norm": 1.3479722738265991, "learning_rate": 0.001789355581127733, "loss": 0.8539, "step": 36610 }, { "epoch": 10.535097813578826, "grad_norm": 0.8069517612457275, "learning_rate": 0.0017892980437284234, "loss": 0.8314, "step": 36620 }, { "epoch": 10.537974683544304, "grad_norm": 0.9896148443222046, "learning_rate": 0.001789240506329114, "loss": 0.6009, "step": 36630 }, { "epoch": 10.540851553509782, "grad_norm": 1.4440317153930664, "learning_rate": 0.0017891829689298045, "loss": 0.6992, "step": 36640 }, { "epoch": 10.543728423475258, "grad_norm": 1.4301493167877197, "learning_rate": 0.0017891254315304948, "loss": 0.8283, "step": 36650 }, { "epoch": 10.546605293440736, "grad_norm": 0.9150304198265076, "learning_rate": 0.0017890678941311854, "loss": 0.7501, "step": 36660 }, { "epoch": 10.549482163406214, "grad_norm": 1.4535702466964722, "learning_rate": 0.0017890103567318758, "loss": 0.8032, "step": 36670 }, { "epoch": 10.552359033371692, "grad_norm": 1.7972710132598877, "learning_rate": 0.001788952819332566, "loss": 0.7352, "step": 36680 }, { "epoch": 10.55523590333717, "grad_norm": 1.608608365058899, "learning_rate": 0.0017888952819332567, "loss": 0.7189, "step": 36690 }, { "epoch": 10.558112773302646, "grad_norm": 1.160932183265686, "learning_rate": 0.0017888377445339472, "loss": 0.9073, "step": 36700 }, { "epoch": 10.560989643268124, "grad_norm": 0.8964403867721558, "learning_rate": 0.0017887802071346376, "loss": 0.8044, "step": 36710 }, { "epoch": 10.563866513233602, "grad_norm": 1.895510196685791, "learning_rate": 0.0017887226697353281, "loss": 0.8286, "step": 36720 }, { "epoch": 10.566743383199078, "grad_norm": 0.5368801951408386, "learning_rate": 0.0017886651323360183, "loss": 0.6166, "step": 36730 }, { "epoch": 10.569620253164556, "grad_norm": 1.5046929121017456, "learning_rate": 0.0017886075949367088, "loss": 1.0063, "step": 36740 }, { "epoch": 10.572497123130034, "grad_norm": 1.4544999599456787, "learning_rate": 0.0017885500575373994, "loss": 0.8779, "step": 36750 }, { "epoch": 10.575373993095512, "grad_norm": 1.503628134727478, "learning_rate": 0.0017884925201380898, "loss": 0.9075, "step": 36760 }, { "epoch": 10.57825086306099, "grad_norm": 1.3489985466003418, "learning_rate": 0.0017884349827387803, "loss": 0.7609, "step": 36770 }, { "epoch": 10.581127733026467, "grad_norm": 1.2486504316329956, "learning_rate": 0.0017883774453394709, "loss": 0.8372, "step": 36780 }, { "epoch": 10.584004602991945, "grad_norm": 1.2643110752105713, "learning_rate": 0.001788319907940161, "loss": 0.9044, "step": 36790 }, { "epoch": 10.586881472957423, "grad_norm": 1.0681873559951782, "learning_rate": 0.0017882623705408516, "loss": 0.837, "step": 36800 }, { "epoch": 10.5897583429229, "grad_norm": 2.711054801940918, "learning_rate": 0.0017882048331415421, "loss": 1.0693, "step": 36810 }, { "epoch": 10.592635212888377, "grad_norm": 1.071498990058899, "learning_rate": 0.0017881472957422325, "loss": 0.9564, "step": 36820 }, { "epoch": 10.595512082853855, "grad_norm": 1.477685570716858, "learning_rate": 0.001788089758342923, "loss": 0.7099, "step": 36830 }, { "epoch": 10.598388952819333, "grad_norm": 0.9965774416923523, "learning_rate": 0.0017880322209436132, "loss": 0.6936, "step": 36840 }, { "epoch": 10.60126582278481, "grad_norm": 1.7309988737106323, "learning_rate": 0.0017879746835443037, "loss": 0.8052, "step": 36850 }, { "epoch": 10.604142692750287, "grad_norm": 1.5653852224349976, "learning_rate": 0.0017879171461449943, "loss": 0.6851, "step": 36860 }, { "epoch": 10.607019562715765, "grad_norm": 3.6751492023468018, "learning_rate": 0.0017878596087456847, "loss": 0.9069, "step": 36870 }, { "epoch": 10.609896432681243, "grad_norm": 0.9153583645820618, "learning_rate": 0.0017878020713463752, "loss": 0.8283, "step": 36880 }, { "epoch": 10.61277330264672, "grad_norm": 1.4257988929748535, "learning_rate": 0.0017877445339470658, "loss": 0.7561, "step": 36890 }, { "epoch": 10.615650172612199, "grad_norm": 1.2694612741470337, "learning_rate": 0.001787686996547756, "loss": 0.7931, "step": 36900 }, { "epoch": 10.618527042577675, "grad_norm": 1.1982687711715698, "learning_rate": 0.0017876294591484465, "loss": 0.7388, "step": 36910 }, { "epoch": 10.621403912543153, "grad_norm": 1.4126371145248413, "learning_rate": 0.001787571921749137, "loss": 0.8807, "step": 36920 }, { "epoch": 10.624280782508631, "grad_norm": 1.0492088794708252, "learning_rate": 0.0017875143843498274, "loss": 0.7088, "step": 36930 }, { "epoch": 10.627157652474109, "grad_norm": 0.9178164601325989, "learning_rate": 0.001787456846950518, "loss": 0.8034, "step": 36940 }, { "epoch": 10.630034522439585, "grad_norm": 1.3353639841079712, "learning_rate": 0.0017873993095512083, "loss": 0.6776, "step": 36950 }, { "epoch": 10.632911392405063, "grad_norm": 1.6773182153701782, "learning_rate": 0.0017873417721518986, "loss": 0.9307, "step": 36960 }, { "epoch": 10.635788262370541, "grad_norm": 1.2603706121444702, "learning_rate": 0.0017872842347525892, "loss": 0.7225, "step": 36970 }, { "epoch": 10.638665132336019, "grad_norm": 1.221575379371643, "learning_rate": 0.0017872266973532796, "loss": 0.9178, "step": 36980 }, { "epoch": 10.641542002301495, "grad_norm": 0.7292558550834656, "learning_rate": 0.0017871691599539701, "loss": 0.8441, "step": 36990 }, { "epoch": 10.644418872266973, "grad_norm": 1.8310247659683228, "learning_rate": 0.0017871116225546607, "loss": 0.7947, "step": 37000 }, { "epoch": 10.647295742232451, "grad_norm": 0.8001111745834351, "learning_rate": 0.001787054085155351, "loss": 0.6773, "step": 37010 }, { "epoch": 10.65017261219793, "grad_norm": 1.629136323928833, "learning_rate": 0.0017869965477560414, "loss": 0.7893, "step": 37020 }, { "epoch": 10.653049482163405, "grad_norm": 1.6845101118087769, "learning_rate": 0.001786939010356732, "loss": 0.6112, "step": 37030 }, { "epoch": 10.655926352128883, "grad_norm": 1.8226784467697144, "learning_rate": 0.0017868814729574223, "loss": 0.8931, "step": 37040 }, { "epoch": 10.658803222094361, "grad_norm": 1.6844961643218994, "learning_rate": 0.0017868239355581129, "loss": 0.7054, "step": 37050 }, { "epoch": 10.66168009205984, "grad_norm": 1.7532950639724731, "learning_rate": 0.0017867663981588032, "loss": 0.7499, "step": 37060 }, { "epoch": 10.664556962025316, "grad_norm": 1.058869481086731, "learning_rate": 0.0017867088607594938, "loss": 0.7789, "step": 37070 }, { "epoch": 10.667433831990794, "grad_norm": 0.8175690770149231, "learning_rate": 0.0017866513233601841, "loss": 0.8898, "step": 37080 }, { "epoch": 10.670310701956272, "grad_norm": 1.2011932134628296, "learning_rate": 0.0017865937859608745, "loss": 0.6833, "step": 37090 }, { "epoch": 10.67318757192175, "grad_norm": 1.5278756618499756, "learning_rate": 0.001786536248561565, "loss": 0.7234, "step": 37100 }, { "epoch": 10.676064441887227, "grad_norm": 1.674028754234314, "learning_rate": 0.0017864787111622556, "loss": 0.7992, "step": 37110 }, { "epoch": 10.678941311852704, "grad_norm": 1.1043180227279663, "learning_rate": 0.001786421173762946, "loss": 0.8309, "step": 37120 }, { "epoch": 10.681818181818182, "grad_norm": 1.3450989723205566, "learning_rate": 0.0017863636363636365, "loss": 0.739, "step": 37130 }, { "epoch": 10.68469505178366, "grad_norm": 1.6563537120819092, "learning_rate": 0.0017863060989643268, "loss": 0.8444, "step": 37140 }, { "epoch": 10.687571921749138, "grad_norm": 1.2697848081588745, "learning_rate": 0.0017862485615650172, "loss": 0.6446, "step": 37150 }, { "epoch": 10.690448791714614, "grad_norm": 1.0335570573806763, "learning_rate": 0.0017861910241657078, "loss": 0.8052, "step": 37160 }, { "epoch": 10.693325661680092, "grad_norm": 0.7914114594459534, "learning_rate": 0.001786133486766398, "loss": 0.7572, "step": 37170 }, { "epoch": 10.69620253164557, "grad_norm": 1.209509015083313, "learning_rate": 0.0017860759493670887, "loss": 0.8209, "step": 37180 }, { "epoch": 10.699079401611048, "grad_norm": 1.4199631214141846, "learning_rate": 0.0017860184119677792, "loss": 0.8233, "step": 37190 }, { "epoch": 10.701956271576524, "grad_norm": 2.255206346511841, "learning_rate": 0.0017859608745684694, "loss": 0.8744, "step": 37200 }, { "epoch": 10.704833141542002, "grad_norm": 1.8392294645309448, "learning_rate": 0.00178590333716916, "loss": 0.7875, "step": 37210 }, { "epoch": 10.70771001150748, "grad_norm": 1.5108178853988647, "learning_rate": 0.0017858457997698505, "loss": 0.8397, "step": 37220 }, { "epoch": 10.710586881472958, "grad_norm": 0.9726626873016357, "learning_rate": 0.0017857882623705408, "loss": 0.6473, "step": 37230 }, { "epoch": 10.713463751438436, "grad_norm": 1.6029525995254517, "learning_rate": 0.0017857307249712314, "loss": 0.8348, "step": 37240 }, { "epoch": 10.716340621403912, "grad_norm": 1.1397449970245361, "learning_rate": 0.001785673187571922, "loss": 0.6363, "step": 37250 }, { "epoch": 10.71921749136939, "grad_norm": 1.8309766054153442, "learning_rate": 0.001785615650172612, "loss": 0.9789, "step": 37260 }, { "epoch": 10.722094361334868, "grad_norm": 0.9500905871391296, "learning_rate": 0.0017855581127733027, "loss": 0.9061, "step": 37270 }, { "epoch": 10.724971231300346, "grad_norm": 1.4720628261566162, "learning_rate": 0.0017855005753739932, "loss": 0.9035, "step": 37280 }, { "epoch": 10.727848101265822, "grad_norm": 1.3549624681472778, "learning_rate": 0.0017854430379746836, "loss": 0.8827, "step": 37290 }, { "epoch": 10.7307249712313, "grad_norm": 1.0231693983078003, "learning_rate": 0.0017853855005753741, "loss": 0.8186, "step": 37300 }, { "epoch": 10.733601841196778, "grad_norm": 1.5674470663070679, "learning_rate": 0.0017853279631760645, "loss": 1.0673, "step": 37310 }, { "epoch": 10.736478711162256, "grad_norm": 0.8352193832397461, "learning_rate": 0.0017852704257767548, "loss": 0.7882, "step": 37320 }, { "epoch": 10.739355581127732, "grad_norm": 0.9225987195968628, "learning_rate": 0.0017852128883774454, "loss": 0.7886, "step": 37330 }, { "epoch": 10.74223245109321, "grad_norm": 1.7826567888259888, "learning_rate": 0.0017851553509781357, "loss": 0.8017, "step": 37340 }, { "epoch": 10.745109321058688, "grad_norm": 1.9051120281219482, "learning_rate": 0.0017850978135788263, "loss": 0.648, "step": 37350 }, { "epoch": 10.747986191024166, "grad_norm": 1.212158441543579, "learning_rate": 0.0017850402761795169, "loss": 0.7958, "step": 37360 }, { "epoch": 10.750863060989643, "grad_norm": 1.3195323944091797, "learning_rate": 0.0017849827387802072, "loss": 0.8243, "step": 37370 }, { "epoch": 10.75373993095512, "grad_norm": 1.5724188089370728, "learning_rate": 0.0017849252013808976, "loss": 0.8271, "step": 37380 }, { "epoch": 10.756616800920598, "grad_norm": 1.9669567346572876, "learning_rate": 0.0017848676639815881, "loss": 0.8433, "step": 37390 }, { "epoch": 10.759493670886076, "grad_norm": 1.9085426330566406, "learning_rate": 0.0017848101265822785, "loss": 0.8865, "step": 37400 }, { "epoch": 10.762370540851553, "grad_norm": 1.1743932962417603, "learning_rate": 0.001784752589182969, "loss": 0.8173, "step": 37410 }, { "epoch": 10.76524741081703, "grad_norm": 1.4598698616027832, "learning_rate": 0.0017846950517836594, "loss": 0.8136, "step": 37420 }, { "epoch": 10.768124280782509, "grad_norm": 1.4073022603988647, "learning_rate": 0.00178463751438435, "loss": 0.974, "step": 37430 }, { "epoch": 10.771001150747987, "grad_norm": 1.4512019157409668, "learning_rate": 0.0017845799769850403, "loss": 0.7304, "step": 37440 }, { "epoch": 10.773878020713465, "grad_norm": 1.5618394613265991, "learning_rate": 0.0017845224395857306, "loss": 0.7854, "step": 37450 }, { "epoch": 10.77675489067894, "grad_norm": 0.9450345635414124, "learning_rate": 0.0017844649021864212, "loss": 0.7764, "step": 37460 }, { "epoch": 10.779631760644419, "grad_norm": 1.3969905376434326, "learning_rate": 0.0017844073647871118, "loss": 0.7191, "step": 37470 }, { "epoch": 10.782508630609897, "grad_norm": 1.437119483947754, "learning_rate": 0.0017843498273878021, "loss": 0.8644, "step": 37480 }, { "epoch": 10.785385500575375, "grad_norm": 1.4042067527770996, "learning_rate": 0.0017842922899884927, "loss": 0.6071, "step": 37490 }, { "epoch": 10.788262370540851, "grad_norm": 2.3697617053985596, "learning_rate": 0.001784234752589183, "loss": 0.7988, "step": 37500 }, { "epoch": 10.791139240506329, "grad_norm": 1.8844534158706665, "learning_rate": 0.0017841772151898734, "loss": 0.8513, "step": 37510 }, { "epoch": 10.794016110471807, "grad_norm": 1.1955546140670776, "learning_rate": 0.001784119677790564, "loss": 0.8848, "step": 37520 }, { "epoch": 10.796892980437285, "grad_norm": 1.186266303062439, "learning_rate": 0.0017840621403912543, "loss": 1.0554, "step": 37530 }, { "epoch": 10.799769850402761, "grad_norm": 0.9643357396125793, "learning_rate": 0.0017840046029919448, "loss": 0.6683, "step": 37540 }, { "epoch": 10.802646720368239, "grad_norm": 1.2844104766845703, "learning_rate": 0.0017839470655926354, "loss": 0.9178, "step": 37550 }, { "epoch": 10.805523590333717, "grad_norm": 0.7210537195205688, "learning_rate": 0.0017838895281933255, "loss": 0.884, "step": 37560 }, { "epoch": 10.808400460299195, "grad_norm": 1.9966034889221191, "learning_rate": 0.001783831990794016, "loss": 0.943, "step": 37570 }, { "epoch": 10.811277330264673, "grad_norm": 1.5663572549819946, "learning_rate": 0.0017837744533947067, "loss": 0.8672, "step": 37580 }, { "epoch": 10.81415420023015, "grad_norm": 1.8581651449203491, "learning_rate": 0.001783716915995397, "loss": 0.9937, "step": 37590 }, { "epoch": 10.817031070195627, "grad_norm": 1.1760762929916382, "learning_rate": 0.0017836593785960876, "loss": 0.8513, "step": 37600 }, { "epoch": 10.819907940161105, "grad_norm": 1.6060348749160767, "learning_rate": 0.0017836018411967781, "loss": 0.881, "step": 37610 }, { "epoch": 10.822784810126583, "grad_norm": 0.8877235054969788, "learning_rate": 0.0017835443037974683, "loss": 0.8989, "step": 37620 }, { "epoch": 10.82566168009206, "grad_norm": 1.1995971202850342, "learning_rate": 0.0017834867663981588, "loss": 1.0157, "step": 37630 }, { "epoch": 10.828538550057537, "grad_norm": 1.4069494009017944, "learning_rate": 0.0017834292289988492, "loss": 0.8709, "step": 37640 }, { "epoch": 10.831415420023015, "grad_norm": 0.933696985244751, "learning_rate": 0.0017833716915995397, "loss": 0.726, "step": 37650 }, { "epoch": 10.834292289988493, "grad_norm": 3.7328197956085205, "learning_rate": 0.0017833141542002303, "loss": 0.8544, "step": 37660 }, { "epoch": 10.83716915995397, "grad_norm": 1.2856297492980957, "learning_rate": 0.0017832566168009204, "loss": 0.7639, "step": 37670 }, { "epoch": 10.840046029919447, "grad_norm": 1.2570675611495972, "learning_rate": 0.001783199079401611, "loss": 0.8114, "step": 37680 }, { "epoch": 10.842922899884925, "grad_norm": 0.9077668190002441, "learning_rate": 0.0017831415420023016, "loss": 0.9203, "step": 37690 }, { "epoch": 10.845799769850403, "grad_norm": 1.0750938653945923, "learning_rate": 0.001783084004602992, "loss": 0.7617, "step": 37700 }, { "epoch": 10.84867663981588, "grad_norm": 1.8730558156967163, "learning_rate": 0.0017830264672036825, "loss": 0.9409, "step": 37710 }, { "epoch": 10.851553509781358, "grad_norm": 1.2781189680099487, "learning_rate": 0.001782968929804373, "loss": 0.8796, "step": 37720 }, { "epoch": 10.854430379746836, "grad_norm": 1.1829804182052612, "learning_rate": 0.0017829113924050632, "loss": 0.8012, "step": 37730 }, { "epoch": 10.857307249712314, "grad_norm": 0.8967404961585999, "learning_rate": 0.0017828538550057537, "loss": 0.9149, "step": 37740 }, { "epoch": 10.86018411967779, "grad_norm": 1.481709599494934, "learning_rate": 0.001782796317606444, "loss": 0.9172, "step": 37750 }, { "epoch": 10.863060989643268, "grad_norm": 1.6241756677627563, "learning_rate": 0.0017827387802071347, "loss": 1.0797, "step": 37760 }, { "epoch": 10.865937859608746, "grad_norm": 0.9238442778587341, "learning_rate": 0.0017826812428078252, "loss": 0.6627, "step": 37770 }, { "epoch": 10.868814729574224, "grad_norm": 1.4320341348648071, "learning_rate": 0.0017826237054085156, "loss": 0.9022, "step": 37780 }, { "epoch": 10.871691599539702, "grad_norm": 1.9534491300582886, "learning_rate": 0.001782566168009206, "loss": 0.7378, "step": 37790 }, { "epoch": 10.874568469505178, "grad_norm": 1.302356243133545, "learning_rate": 0.0017825086306098965, "loss": 0.8278, "step": 37800 }, { "epoch": 10.877445339470656, "grad_norm": 1.7699556350708008, "learning_rate": 0.0017824510932105868, "loss": 0.7029, "step": 37810 }, { "epoch": 10.880322209436134, "grad_norm": 1.544077754020691, "learning_rate": 0.0017823935558112774, "loss": 0.7153, "step": 37820 }, { "epoch": 10.883199079401612, "grad_norm": 1.034019947052002, "learning_rate": 0.001782336018411968, "loss": 0.9754, "step": 37830 }, { "epoch": 10.886075949367088, "grad_norm": 1.6720629930496216, "learning_rate": 0.0017822784810126583, "loss": 0.6941, "step": 37840 }, { "epoch": 10.888952819332566, "grad_norm": 0.9794418215751648, "learning_rate": 0.0017822209436133486, "loss": 0.9382, "step": 37850 }, { "epoch": 10.891829689298044, "grad_norm": 2.015620708465576, "learning_rate": 0.0017821634062140392, "loss": 0.9046, "step": 37860 }, { "epoch": 10.894706559263522, "grad_norm": 1.3928475379943848, "learning_rate": 0.0017821058688147296, "loss": 0.923, "step": 37870 }, { "epoch": 10.897583429228998, "grad_norm": 1.3521335124969482, "learning_rate": 0.0017820483314154201, "loss": 0.8627, "step": 37880 }, { "epoch": 10.900460299194476, "grad_norm": 1.1251085996627808, "learning_rate": 0.0017819907940161105, "loss": 0.7461, "step": 37890 }, { "epoch": 10.903337169159954, "grad_norm": 1.44193434715271, "learning_rate": 0.001781933256616801, "loss": 0.8345, "step": 37900 }, { "epoch": 10.906214039125432, "grad_norm": 1.6459989547729492, "learning_rate": 0.0017818757192174914, "loss": 0.8913, "step": 37910 }, { "epoch": 10.909090909090908, "grad_norm": 3.0791234970092773, "learning_rate": 0.0017818181818181817, "loss": 0.7877, "step": 37920 }, { "epoch": 10.911967779056386, "grad_norm": 0.7920551896095276, "learning_rate": 0.0017817606444188723, "loss": 0.662, "step": 37930 }, { "epoch": 10.914844649021864, "grad_norm": 1.226741075515747, "learning_rate": 0.0017817031070195628, "loss": 0.7695, "step": 37940 }, { "epoch": 10.917721518987342, "grad_norm": 1.243005394935608, "learning_rate": 0.0017816455696202532, "loss": 0.8475, "step": 37950 }, { "epoch": 10.920598388952818, "grad_norm": 1.4612220525741577, "learning_rate": 0.0017815880322209438, "loss": 0.781, "step": 37960 }, { "epoch": 10.923475258918296, "grad_norm": 1.9817386865615845, "learning_rate": 0.001781530494821634, "loss": 0.9462, "step": 37970 }, { "epoch": 10.926352128883774, "grad_norm": 1.3686894178390503, "learning_rate": 0.0017814729574223245, "loss": 0.711, "step": 37980 }, { "epoch": 10.929228998849252, "grad_norm": 1.3394826650619507, "learning_rate": 0.001781415420023015, "loss": 0.7872, "step": 37990 }, { "epoch": 10.93210586881473, "grad_norm": 1.1829066276550293, "learning_rate": 0.0017813578826237054, "loss": 0.7947, "step": 38000 }, { "epoch": 10.934982738780207, "grad_norm": 1.3429315090179443, "learning_rate": 0.001781300345224396, "loss": 0.6983, "step": 38010 }, { "epoch": 10.937859608745685, "grad_norm": 0.887499213218689, "learning_rate": 0.0017812428078250865, "loss": 0.8631, "step": 38020 }, { "epoch": 10.940736478711163, "grad_norm": 0.8405227661132812, "learning_rate": 0.0017811852704257766, "loss": 0.6467, "step": 38030 }, { "epoch": 10.94361334867664, "grad_norm": 1.4152727127075195, "learning_rate": 0.0017811277330264672, "loss": 0.5896, "step": 38040 }, { "epoch": 10.946490218642117, "grad_norm": 1.2270489931106567, "learning_rate": 0.0017810701956271578, "loss": 0.7677, "step": 38050 }, { "epoch": 10.949367088607595, "grad_norm": 1.3830151557922363, "learning_rate": 0.001781012658227848, "loss": 0.9962, "step": 38060 }, { "epoch": 10.952243958573073, "grad_norm": 1.5829205513000488, "learning_rate": 0.0017809551208285387, "loss": 1.0052, "step": 38070 }, { "epoch": 10.95512082853855, "grad_norm": 0.9717665314674377, "learning_rate": 0.0017808975834292292, "loss": 0.8094, "step": 38080 }, { "epoch": 10.957997698504027, "grad_norm": 0.7640013098716736, "learning_rate": 0.0017808400460299194, "loss": 0.6519, "step": 38090 }, { "epoch": 10.960874568469505, "grad_norm": 0.871340274810791, "learning_rate": 0.00178078250863061, "loss": 0.8809, "step": 38100 }, { "epoch": 10.963751438434983, "grad_norm": 2.5998618602752686, "learning_rate": 0.0017807249712313003, "loss": 0.7523, "step": 38110 }, { "epoch": 10.96662830840046, "grad_norm": 1.089733600616455, "learning_rate": 0.0017806674338319908, "loss": 0.6845, "step": 38120 }, { "epoch": 10.969505178365939, "grad_norm": 1.9819934368133545, "learning_rate": 0.0017806098964326814, "loss": 0.8132, "step": 38130 }, { "epoch": 10.972382048331415, "grad_norm": 2.098775625228882, "learning_rate": 0.0017805523590333717, "loss": 0.9306, "step": 38140 }, { "epoch": 10.975258918296893, "grad_norm": 1.2958537340164185, "learning_rate": 0.001780494821634062, "loss": 0.8054, "step": 38150 }, { "epoch": 10.978135788262371, "grad_norm": 1.2923773527145386, "learning_rate": 0.0017804372842347527, "loss": 0.8873, "step": 38160 }, { "epoch": 10.981012658227849, "grad_norm": 1.1242351531982422, "learning_rate": 0.001780379746835443, "loss": 0.688, "step": 38170 }, { "epoch": 10.983889528193325, "grad_norm": 1.0543642044067383, "learning_rate": 0.0017803222094361336, "loss": 0.7174, "step": 38180 }, { "epoch": 10.986766398158803, "grad_norm": 1.384621262550354, "learning_rate": 0.0017802646720368241, "loss": 0.7806, "step": 38190 }, { "epoch": 10.989643268124281, "grad_norm": 0.9414723515510559, "learning_rate": 0.0017802071346375145, "loss": 0.626, "step": 38200 }, { "epoch": 10.992520138089759, "grad_norm": 1.1868816614151, "learning_rate": 0.0017801495972382048, "loss": 0.761, "step": 38210 }, { "epoch": 10.995397008055235, "grad_norm": 1.6276922225952148, "learning_rate": 0.0017800920598388952, "loss": 0.725, "step": 38220 }, { "epoch": 10.998273878020713, "grad_norm": 2.0726799964904785, "learning_rate": 0.0017800345224395857, "loss": 0.6459, "step": 38230 }, { "epoch": 11.001150747986191, "grad_norm": 1.2311623096466064, "learning_rate": 0.0017799769850402763, "loss": 0.6711, "step": 38240 }, { "epoch": 11.00402761795167, "grad_norm": 1.6146705150604248, "learning_rate": 0.0017799194476409666, "loss": 0.7244, "step": 38250 }, { "epoch": 11.006904487917145, "grad_norm": 2.3347928524017334, "learning_rate": 0.0017798619102416572, "loss": 0.7728, "step": 38260 }, { "epoch": 11.009781357882623, "grad_norm": 1.4263043403625488, "learning_rate": 0.0017798043728423476, "loss": 0.6271, "step": 38270 }, { "epoch": 11.012658227848101, "grad_norm": 0.9954443573951721, "learning_rate": 0.001779746835443038, "loss": 0.8693, "step": 38280 }, { "epoch": 11.01553509781358, "grad_norm": 1.2231816053390503, "learning_rate": 0.0017796892980437285, "loss": 0.7526, "step": 38290 }, { "epoch": 11.018411967779056, "grad_norm": 1.4842313528060913, "learning_rate": 0.001779631760644419, "loss": 0.658, "step": 38300 }, { "epoch": 11.021288837744533, "grad_norm": 1.3666818141937256, "learning_rate": 0.0017795742232451094, "loss": 0.7492, "step": 38310 }, { "epoch": 11.024165707710011, "grad_norm": 1.466030478477478, "learning_rate": 0.0017795166858458, "loss": 0.8133, "step": 38320 }, { "epoch": 11.02704257767549, "grad_norm": 1.3553496599197388, "learning_rate": 0.00177945914844649, "loss": 0.8948, "step": 38330 }, { "epoch": 11.029919447640967, "grad_norm": 0.9859044551849365, "learning_rate": 0.0017794016110471806, "loss": 0.7983, "step": 38340 }, { "epoch": 11.032796317606444, "grad_norm": 0.7059655785560608, "learning_rate": 0.0017793440736478712, "loss": 0.5473, "step": 38350 }, { "epoch": 11.035673187571922, "grad_norm": 0.800905168056488, "learning_rate": 0.0017792865362485615, "loss": 0.631, "step": 38360 }, { "epoch": 11.0385500575374, "grad_norm": 1.1067066192626953, "learning_rate": 0.0017792289988492521, "loss": 0.5838, "step": 38370 }, { "epoch": 11.041426927502878, "grad_norm": 2.2467479705810547, "learning_rate": 0.0017791714614499427, "loss": 0.8378, "step": 38380 }, { "epoch": 11.044303797468354, "grad_norm": 0.9406134486198425, "learning_rate": 0.0017791139240506328, "loss": 0.6118, "step": 38390 }, { "epoch": 11.047180667433832, "grad_norm": 1.3979382514953613, "learning_rate": 0.0017790563866513234, "loss": 0.6658, "step": 38400 }, { "epoch": 11.05005753739931, "grad_norm": 1.4850666522979736, "learning_rate": 0.001778998849252014, "loss": 0.7755, "step": 38410 }, { "epoch": 11.052934407364788, "grad_norm": 1.158007264137268, "learning_rate": 0.0017789413118527043, "loss": 0.7699, "step": 38420 }, { "epoch": 11.055811277330264, "grad_norm": 1.229692816734314, "learning_rate": 0.0017788837744533948, "loss": 0.718, "step": 38430 }, { "epoch": 11.058688147295742, "grad_norm": 1.1799068450927734, "learning_rate": 0.0017788262370540852, "loss": 0.684, "step": 38440 }, { "epoch": 11.06156501726122, "grad_norm": 1.229386329650879, "learning_rate": 0.0017787686996547755, "loss": 0.7108, "step": 38450 }, { "epoch": 11.064441887226698, "grad_norm": 0.8230552673339844, "learning_rate": 0.001778711162255466, "loss": 0.786, "step": 38460 }, { "epoch": 11.067318757192174, "grad_norm": 1.1267222166061401, "learning_rate": 0.0017786536248561564, "loss": 0.8402, "step": 38470 }, { "epoch": 11.070195627157652, "grad_norm": 0.7536640763282776, "learning_rate": 0.001778596087456847, "loss": 0.7151, "step": 38480 }, { "epoch": 11.07307249712313, "grad_norm": 1.015946865081787, "learning_rate": 0.0017785385500575376, "loss": 0.9254, "step": 38490 }, { "epoch": 11.075949367088608, "grad_norm": 0.9931955337524414, "learning_rate": 0.0017784810126582277, "loss": 0.6853, "step": 38500 }, { "epoch": 11.078826237054086, "grad_norm": 0.8456212878227234, "learning_rate": 0.0017784234752589183, "loss": 0.663, "step": 38510 }, { "epoch": 11.081703107019562, "grad_norm": 1.5520583391189575, "learning_rate": 0.0017783659378596088, "loss": 0.648, "step": 38520 }, { "epoch": 11.08457997698504, "grad_norm": 1.4319697618484497, "learning_rate": 0.0017783084004602992, "loss": 0.7148, "step": 38530 }, { "epoch": 11.087456846950518, "grad_norm": 2.214715003967285, "learning_rate": 0.0017782508630609897, "loss": 0.7787, "step": 38540 }, { "epoch": 11.090333716915996, "grad_norm": 3.593736171722412, "learning_rate": 0.0017781933256616803, "loss": 0.6916, "step": 38550 }, { "epoch": 11.093210586881472, "grad_norm": 1.2462158203125, "learning_rate": 0.0017781357882623704, "loss": 0.8253, "step": 38560 }, { "epoch": 11.09608745684695, "grad_norm": 0.9458587169647217, "learning_rate": 0.001778078250863061, "loss": 0.7235, "step": 38570 }, { "epoch": 11.098964326812428, "grad_norm": 1.384257435798645, "learning_rate": 0.0017780207134637514, "loss": 0.7123, "step": 38580 }, { "epoch": 11.101841196777906, "grad_norm": 0.7365342974662781, "learning_rate": 0.001777963176064442, "loss": 0.6211, "step": 38590 }, { "epoch": 11.104718066743382, "grad_norm": 0.8886486291885376, "learning_rate": 0.0017779056386651325, "loss": 0.6414, "step": 38600 }, { "epoch": 11.10759493670886, "grad_norm": 0.7487035393714905, "learning_rate": 0.0017778481012658228, "loss": 0.9365, "step": 38610 }, { "epoch": 11.110471806674338, "grad_norm": 3.343512773513794, "learning_rate": 0.0017777905638665132, "loss": 1.0494, "step": 38620 }, { "epoch": 11.113348676639816, "grad_norm": 1.2685718536376953, "learning_rate": 0.0017777330264672037, "loss": 0.8705, "step": 38630 }, { "epoch": 11.116225546605293, "grad_norm": 1.4823150634765625, "learning_rate": 0.001777675489067894, "loss": 0.806, "step": 38640 }, { "epoch": 11.11910241657077, "grad_norm": 1.1969629526138306, "learning_rate": 0.0017776179516685846, "loss": 0.639, "step": 38650 }, { "epoch": 11.121979286536249, "grad_norm": 1.1838350296020508, "learning_rate": 0.0017775604142692752, "loss": 0.8095, "step": 38660 }, { "epoch": 11.124856156501727, "grad_norm": 1.5366109609603882, "learning_rate": 0.0017775028768699656, "loss": 0.7729, "step": 38670 }, { "epoch": 11.127733026467205, "grad_norm": 1.5642192363739014, "learning_rate": 0.001777445339470656, "loss": 0.9868, "step": 38680 }, { "epoch": 11.13060989643268, "grad_norm": 1.3608342409133911, "learning_rate": 0.0017773878020713463, "loss": 0.8507, "step": 38690 }, { "epoch": 11.133486766398159, "grad_norm": 0.8762654066085815, "learning_rate": 0.0017773302646720368, "loss": 0.5483, "step": 38700 }, { "epoch": 11.136363636363637, "grad_norm": 1.517741084098816, "learning_rate": 0.0017772727272727274, "loss": 0.9832, "step": 38710 }, { "epoch": 11.139240506329115, "grad_norm": 1.6149022579193115, "learning_rate": 0.0017772151898734177, "loss": 0.7205, "step": 38720 }, { "epoch": 11.14211737629459, "grad_norm": 2.032851219177246, "learning_rate": 0.0017771576524741083, "loss": 0.8256, "step": 38730 }, { "epoch": 11.144994246260069, "grad_norm": 1.1185762882232666, "learning_rate": 0.0017771001150747986, "loss": 0.6698, "step": 38740 }, { "epoch": 11.147871116225547, "grad_norm": 1.8490439653396606, "learning_rate": 0.001777042577675489, "loss": 0.858, "step": 38750 }, { "epoch": 11.150747986191025, "grad_norm": 1.4286482334136963, "learning_rate": 0.0017769850402761795, "loss": 0.8735, "step": 38760 }, { "epoch": 11.153624856156501, "grad_norm": 1.310461401939392, "learning_rate": 0.0017769275028768701, "loss": 0.6913, "step": 38770 }, { "epoch": 11.156501726121979, "grad_norm": 0.955207109451294, "learning_rate": 0.0017768699654775605, "loss": 0.9241, "step": 38780 }, { "epoch": 11.159378596087457, "grad_norm": 1.081925868988037, "learning_rate": 0.001776812428078251, "loss": 0.7195, "step": 38790 }, { "epoch": 11.162255466052935, "grad_norm": 1.3637028932571411, "learning_rate": 0.0017767548906789412, "loss": 0.8026, "step": 38800 }, { "epoch": 11.165132336018411, "grad_norm": 1.1699970960617065, "learning_rate": 0.0017766973532796317, "loss": 0.7406, "step": 38810 }, { "epoch": 11.16800920598389, "grad_norm": 1.2657692432403564, "learning_rate": 0.0017766398158803223, "loss": 0.7362, "step": 38820 }, { "epoch": 11.170886075949367, "grad_norm": 1.6230964660644531, "learning_rate": 0.0017765822784810126, "loss": 0.6939, "step": 38830 }, { "epoch": 11.173762945914845, "grad_norm": 0.8095999360084534, "learning_rate": 0.0017765247410817032, "loss": 0.6711, "step": 38840 }, { "epoch": 11.176639815880323, "grad_norm": 0.8007609248161316, "learning_rate": 0.0017764672036823938, "loss": 0.7353, "step": 38850 }, { "epoch": 11.1795166858458, "grad_norm": 1.4657965898513794, "learning_rate": 0.0017764096662830839, "loss": 0.8757, "step": 38860 }, { "epoch": 11.182393555811277, "grad_norm": 1.4742828607559204, "learning_rate": 0.0017763521288837745, "loss": 0.7826, "step": 38870 }, { "epoch": 11.185270425776755, "grad_norm": 1.1640260219573975, "learning_rate": 0.001776294591484465, "loss": 0.7859, "step": 38880 }, { "epoch": 11.188147295742233, "grad_norm": 0.7774976491928101, "learning_rate": 0.0017762370540851554, "loss": 0.6694, "step": 38890 }, { "epoch": 11.19102416570771, "grad_norm": 1.9399025440216064, "learning_rate": 0.001776179516685846, "loss": 0.9708, "step": 38900 }, { "epoch": 11.193901035673187, "grad_norm": 2.1852712631225586, "learning_rate": 0.0017761219792865363, "loss": 0.7444, "step": 38910 }, { "epoch": 11.196777905638665, "grad_norm": 1.7930530309677124, "learning_rate": 0.0017760644418872266, "loss": 0.6838, "step": 38920 }, { "epoch": 11.199654775604143, "grad_norm": 1.0810929536819458, "learning_rate": 0.0017760069044879172, "loss": 0.7887, "step": 38930 }, { "epoch": 11.20253164556962, "grad_norm": 1.9233124256134033, "learning_rate": 0.0017759493670886075, "loss": 0.9174, "step": 38940 }, { "epoch": 11.205408515535098, "grad_norm": 0.9415201544761658, "learning_rate": 0.001775891829689298, "loss": 0.8025, "step": 38950 }, { "epoch": 11.208285385500576, "grad_norm": 1.2220155000686646, "learning_rate": 0.0017758342922899887, "loss": 0.8948, "step": 38960 }, { "epoch": 11.211162255466053, "grad_norm": 1.2479208707809448, "learning_rate": 0.001775776754890679, "loss": 0.8362, "step": 38970 }, { "epoch": 11.21403912543153, "grad_norm": 1.585460901260376, "learning_rate": 0.0017757192174913694, "loss": 0.8941, "step": 38980 }, { "epoch": 11.216915995397008, "grad_norm": 1.019208312034607, "learning_rate": 0.00177566168009206, "loss": 0.635, "step": 38990 }, { "epoch": 11.219792865362486, "grad_norm": 2.970740795135498, "learning_rate": 0.0017756041426927503, "loss": 0.9114, "step": 39000 }, { "epoch": 11.222669735327964, "grad_norm": 1.464603304862976, "learning_rate": 0.0017755466052934408, "loss": 0.8647, "step": 39010 }, { "epoch": 11.225546605293442, "grad_norm": 1.149224877357483, "learning_rate": 0.0017754890678941312, "loss": 0.9614, "step": 39020 }, { "epoch": 11.228423475258918, "grad_norm": 1.7943936586380005, "learning_rate": 0.0017754315304948217, "loss": 0.9196, "step": 39030 }, { "epoch": 11.231300345224396, "grad_norm": 1.0853387117385864, "learning_rate": 0.001775373993095512, "loss": 0.79, "step": 39040 }, { "epoch": 11.234177215189874, "grad_norm": 0.9116141200065613, "learning_rate": 0.0017753164556962024, "loss": 0.7918, "step": 39050 }, { "epoch": 11.237054085155352, "grad_norm": 1.495859980583191, "learning_rate": 0.001775258918296893, "loss": 0.7755, "step": 39060 }, { "epoch": 11.239930955120828, "grad_norm": 1.6021956205368042, "learning_rate": 0.0017752013808975836, "loss": 0.7549, "step": 39070 }, { "epoch": 11.242807825086306, "grad_norm": 1.325178623199463, "learning_rate": 0.001775143843498274, "loss": 0.9765, "step": 39080 }, { "epoch": 11.245684695051784, "grad_norm": 1.4870387315750122, "learning_rate": 0.0017750863060989645, "loss": 0.9529, "step": 39090 }, { "epoch": 11.248561565017262, "grad_norm": 1.4479634761810303, "learning_rate": 0.0017750287686996548, "loss": 0.7764, "step": 39100 }, { "epoch": 11.251438434982738, "grad_norm": 1.4720125198364258, "learning_rate": 0.0017749712313003452, "loss": 1.0863, "step": 39110 }, { "epoch": 11.254315304948216, "grad_norm": 1.3391755819320679, "learning_rate": 0.0017749136939010357, "loss": 0.8039, "step": 39120 }, { "epoch": 11.257192174913694, "grad_norm": 1.704700231552124, "learning_rate": 0.001774856156501726, "loss": 0.7009, "step": 39130 }, { "epoch": 11.260069044879172, "grad_norm": 0.8778958320617676, "learning_rate": 0.0017747986191024166, "loss": 0.672, "step": 39140 }, { "epoch": 11.262945914844648, "grad_norm": 1.7676688432693481, "learning_rate": 0.0017747410817031072, "loss": 0.8414, "step": 39150 }, { "epoch": 11.265822784810126, "grad_norm": 1.3360973596572876, "learning_rate": 0.0017746835443037973, "loss": 0.8732, "step": 39160 }, { "epoch": 11.268699654775604, "grad_norm": 1.2250597476959229, "learning_rate": 0.001774626006904488, "loss": 0.8578, "step": 39170 }, { "epoch": 11.271576524741082, "grad_norm": 2.069850206375122, "learning_rate": 0.0017745684695051785, "loss": 0.8395, "step": 39180 }, { "epoch": 11.274453394706558, "grad_norm": 1.4039722681045532, "learning_rate": 0.0017745109321058688, "loss": 0.8238, "step": 39190 }, { "epoch": 11.277330264672036, "grad_norm": 1.4187217950820923, "learning_rate": 0.0017744533947065594, "loss": 0.8328, "step": 39200 }, { "epoch": 11.280207134637514, "grad_norm": 1.2979087829589844, "learning_rate": 0.00177439585730725, "loss": 0.7946, "step": 39210 }, { "epoch": 11.283084004602992, "grad_norm": 1.1849628686904907, "learning_rate": 0.00177433831990794, "loss": 0.8144, "step": 39220 }, { "epoch": 11.28596087456847, "grad_norm": 3.644479751586914, "learning_rate": 0.0017742807825086306, "loss": 0.8586, "step": 39230 }, { "epoch": 11.288837744533947, "grad_norm": 1.4746683835983276, "learning_rate": 0.0017742232451093212, "loss": 0.9099, "step": 39240 }, { "epoch": 11.291714614499424, "grad_norm": 1.4124506711959839, "learning_rate": 0.0017741657077100115, "loss": 0.5539, "step": 39250 }, { "epoch": 11.294591484464902, "grad_norm": 3.1108901500701904, "learning_rate": 0.001774108170310702, "loss": 0.9457, "step": 39260 }, { "epoch": 11.29746835443038, "grad_norm": 1.9063990116119385, "learning_rate": 0.0017740506329113925, "loss": 0.8209, "step": 39270 }, { "epoch": 11.300345224395857, "grad_norm": 1.6135163307189941, "learning_rate": 0.0017739930955120828, "loss": 0.8986, "step": 39280 }, { "epoch": 11.303222094361335, "grad_norm": 1.843361496925354, "learning_rate": 0.0017739355581127734, "loss": 1.0257, "step": 39290 }, { "epoch": 11.306098964326813, "grad_norm": 1.7945703268051147, "learning_rate": 0.0017738780207134637, "loss": 0.8445, "step": 39300 }, { "epoch": 11.30897583429229, "grad_norm": 1.0766873359680176, "learning_rate": 0.0017738204833141543, "loss": 0.9608, "step": 39310 }, { "epoch": 11.311852704257767, "grad_norm": 1.162455677986145, "learning_rate": 0.0017737629459148448, "loss": 0.7037, "step": 39320 }, { "epoch": 11.314729574223245, "grad_norm": 1.1491564512252808, "learning_rate": 0.001773705408515535, "loss": 0.7718, "step": 39330 }, { "epoch": 11.317606444188723, "grad_norm": 1.0425413846969604, "learning_rate": 0.0017736478711162255, "loss": 1.0544, "step": 39340 }, { "epoch": 11.3204833141542, "grad_norm": 1.996312141418457, "learning_rate": 0.001773590333716916, "loss": 0.7822, "step": 39350 }, { "epoch": 11.323360184119679, "grad_norm": 0.9272354245185852, "learning_rate": 0.0017735327963176064, "loss": 0.6927, "step": 39360 }, { "epoch": 11.326237054085155, "grad_norm": 0.9107697606086731, "learning_rate": 0.001773475258918297, "loss": 0.7565, "step": 39370 }, { "epoch": 11.329113924050633, "grad_norm": 1.3211859464645386, "learning_rate": 0.0017734177215189874, "loss": 0.6503, "step": 39380 }, { "epoch": 11.33199079401611, "grad_norm": 0.9887993335723877, "learning_rate": 0.0017733601841196777, "loss": 0.7502, "step": 39390 }, { "epoch": 11.334867663981589, "grad_norm": 2.490600824356079, "learning_rate": 0.0017733026467203683, "loss": 0.7866, "step": 39400 }, { "epoch": 11.337744533947065, "grad_norm": 1.0752073526382446, "learning_rate": 0.0017732451093210586, "loss": 0.8673, "step": 39410 }, { "epoch": 11.340621403912543, "grad_norm": 1.4718722105026245, "learning_rate": 0.0017731875719217492, "loss": 0.6323, "step": 39420 }, { "epoch": 11.343498273878021, "grad_norm": 0.75192791223526, "learning_rate": 0.0017731300345224397, "loss": 0.5904, "step": 39430 }, { "epoch": 11.346375143843499, "grad_norm": 1.5195444822311401, "learning_rate": 0.00177307249712313, "loss": 0.8291, "step": 39440 }, { "epoch": 11.349252013808975, "grad_norm": 1.1101272106170654, "learning_rate": 0.0017730149597238204, "loss": 0.9666, "step": 39450 }, { "epoch": 11.352128883774453, "grad_norm": 1.501969337463379, "learning_rate": 0.001772957422324511, "loss": 0.7442, "step": 39460 }, { "epoch": 11.355005753739931, "grad_norm": 1.878467082977295, "learning_rate": 0.0017728998849252013, "loss": 0.8292, "step": 39470 }, { "epoch": 11.35788262370541, "grad_norm": 0.7303661704063416, "learning_rate": 0.001772842347525892, "loss": 0.7564, "step": 39480 }, { "epoch": 11.360759493670885, "grad_norm": 0.9975404739379883, "learning_rate": 0.0017727848101265823, "loss": 0.7515, "step": 39490 }, { "epoch": 11.363636363636363, "grad_norm": 1.1531981229782104, "learning_rate": 0.0017727272727272728, "loss": 0.7354, "step": 39500 }, { "epoch": 11.366513233601841, "grad_norm": 1.5135746002197266, "learning_rate": 0.0017726697353279632, "loss": 0.8018, "step": 39510 }, { "epoch": 11.36939010356732, "grad_norm": 1.2953985929489136, "learning_rate": 0.0017726121979286535, "loss": 0.7447, "step": 39520 }, { "epoch": 11.372266973532795, "grad_norm": 1.2595925331115723, "learning_rate": 0.001772554660529344, "loss": 0.7676, "step": 39530 }, { "epoch": 11.375143843498273, "grad_norm": 2.0985138416290283, "learning_rate": 0.0017724971231300346, "loss": 0.997, "step": 39540 }, { "epoch": 11.378020713463751, "grad_norm": 1.4850103855133057, "learning_rate": 0.001772439585730725, "loss": 0.7428, "step": 39550 }, { "epoch": 11.38089758342923, "grad_norm": 1.190177321434021, "learning_rate": 0.0017723820483314156, "loss": 1.0162, "step": 39560 }, { "epoch": 11.383774453394707, "grad_norm": 0.9125235676765442, "learning_rate": 0.001772324510932106, "loss": 0.8589, "step": 39570 }, { "epoch": 11.386651323360184, "grad_norm": 2.396928548812866, "learning_rate": 0.0017722669735327963, "loss": 0.6217, "step": 39580 }, { "epoch": 11.389528193325662, "grad_norm": 2.0918185710906982, "learning_rate": 0.0017722094361334868, "loss": 0.8395, "step": 39590 }, { "epoch": 11.39240506329114, "grad_norm": 1.513329267501831, "learning_rate": 0.0017721518987341772, "loss": 0.7608, "step": 39600 }, { "epoch": 11.395281933256618, "grad_norm": 1.0303019285202026, "learning_rate": 0.0017720943613348677, "loss": 0.8352, "step": 39610 }, { "epoch": 11.398158803222094, "grad_norm": 1.462470531463623, "learning_rate": 0.0017720368239355583, "loss": 0.888, "step": 39620 }, { "epoch": 11.401035673187572, "grad_norm": 1.8034875392913818, "learning_rate": 0.0017719792865362484, "loss": 0.6851, "step": 39630 }, { "epoch": 11.40391254315305, "grad_norm": 1.8066388368606567, "learning_rate": 0.001771921749136939, "loss": 1.0028, "step": 39640 }, { "epoch": 11.406789413118528, "grad_norm": 1.672542929649353, "learning_rate": 0.0017718642117376295, "loss": 0.7759, "step": 39650 }, { "epoch": 11.409666283084004, "grad_norm": 2.1228063106536865, "learning_rate": 0.00177180667433832, "loss": 0.785, "step": 39660 }, { "epoch": 11.412543153049482, "grad_norm": 1.065107822418213, "learning_rate": 0.0017717491369390105, "loss": 0.66, "step": 39670 }, { "epoch": 11.41542002301496, "grad_norm": 1.830069661140442, "learning_rate": 0.001771691599539701, "loss": 0.6425, "step": 39680 }, { "epoch": 11.418296892980438, "grad_norm": 1.3220490217208862, "learning_rate": 0.0017716340621403912, "loss": 0.8658, "step": 39690 }, { "epoch": 11.421173762945914, "grad_norm": 1.7532142400741577, "learning_rate": 0.0017715765247410817, "loss": 0.7703, "step": 39700 }, { "epoch": 11.424050632911392, "grad_norm": 1.183615803718567, "learning_rate": 0.001771518987341772, "loss": 0.7649, "step": 39710 }, { "epoch": 11.42692750287687, "grad_norm": 1.0815565586090088, "learning_rate": 0.0017714614499424626, "loss": 0.7539, "step": 39720 }, { "epoch": 11.429804372842348, "grad_norm": 0.8552953004837036, "learning_rate": 0.0017714039125431532, "loss": 0.7486, "step": 39730 }, { "epoch": 11.432681242807826, "grad_norm": 1.6232762336730957, "learning_rate": 0.0017713463751438435, "loss": 0.8486, "step": 39740 }, { "epoch": 11.435558112773302, "grad_norm": 1.8622682094573975, "learning_rate": 0.0017712888377445339, "loss": 0.7129, "step": 39750 }, { "epoch": 11.43843498273878, "grad_norm": 1.849008321762085, "learning_rate": 0.0017712313003452244, "loss": 0.7595, "step": 39760 }, { "epoch": 11.441311852704258, "grad_norm": 0.869610071182251, "learning_rate": 0.0017711737629459148, "loss": 0.708, "step": 39770 }, { "epoch": 11.444188722669736, "grad_norm": 0.9972500801086426, "learning_rate": 0.0017711162255466054, "loss": 0.7675, "step": 39780 }, { "epoch": 11.447065592635212, "grad_norm": 1.7025004625320435, "learning_rate": 0.001771058688147296, "loss": 0.9623, "step": 39790 }, { "epoch": 11.44994246260069, "grad_norm": 2.2542386054992676, "learning_rate": 0.0017710011507479863, "loss": 0.6488, "step": 39800 }, { "epoch": 11.452819332566168, "grad_norm": 1.3009597063064575, "learning_rate": 0.0017709436133486766, "loss": 0.8672, "step": 39810 }, { "epoch": 11.455696202531646, "grad_norm": 0.9313322305679321, "learning_rate": 0.001770886075949367, "loss": 0.6644, "step": 39820 }, { "epoch": 11.458573072497122, "grad_norm": 1.255719542503357, "learning_rate": 0.0017708285385500575, "loss": 0.6846, "step": 39830 }, { "epoch": 11.4614499424626, "grad_norm": 1.7536897659301758, "learning_rate": 0.001770771001150748, "loss": 0.8729, "step": 39840 }, { "epoch": 11.464326812428078, "grad_norm": 2.6407272815704346, "learning_rate": 0.0017707134637514384, "loss": 0.7726, "step": 39850 }, { "epoch": 11.467203682393556, "grad_norm": 1.053979516029358, "learning_rate": 0.001770655926352129, "loss": 0.781, "step": 39860 }, { "epoch": 11.470080552359033, "grad_norm": 2.1544880867004395, "learning_rate": 0.0017705983889528194, "loss": 0.787, "step": 39870 }, { "epoch": 11.47295742232451, "grad_norm": 1.134670376777649, "learning_rate": 0.0017705408515535097, "loss": 0.7086, "step": 39880 }, { "epoch": 11.475834292289989, "grad_norm": 3.9586358070373535, "learning_rate": 0.0017704833141542003, "loss": 0.8039, "step": 39890 }, { "epoch": 11.478711162255467, "grad_norm": 0.8394511342048645, "learning_rate": 0.0017704257767548908, "loss": 0.6925, "step": 39900 }, { "epoch": 11.481588032220944, "grad_norm": 1.843131184577942, "learning_rate": 0.0017703682393555812, "loss": 0.8297, "step": 39910 }, { "epoch": 11.48446490218642, "grad_norm": 0.7565233111381531, "learning_rate": 0.0017703107019562717, "loss": 0.7769, "step": 39920 }, { "epoch": 11.487341772151899, "grad_norm": 1.0454645156860352, "learning_rate": 0.001770253164556962, "loss": 0.6039, "step": 39930 }, { "epoch": 11.490218642117377, "grad_norm": 1.1652761697769165, "learning_rate": 0.0017701956271576524, "loss": 0.6288, "step": 39940 }, { "epoch": 11.493095512082855, "grad_norm": 1.0533826351165771, "learning_rate": 0.001770138089758343, "loss": 0.7428, "step": 39950 }, { "epoch": 11.49597238204833, "grad_norm": 1.785121202468872, "learning_rate": 0.0017700805523590333, "loss": 0.7916, "step": 39960 }, { "epoch": 11.498849252013809, "grad_norm": 1.3777458667755127, "learning_rate": 0.001770023014959724, "loss": 0.7523, "step": 39970 }, { "epoch": 11.501726121979287, "grad_norm": 1.0874395370483398, "learning_rate": 0.0017699654775604145, "loss": 0.832, "step": 39980 }, { "epoch": 11.504602991944765, "grad_norm": 1.1459351778030396, "learning_rate": 0.0017699079401611046, "loss": 0.9267, "step": 39990 }, { "epoch": 11.507479861910241, "grad_norm": 1.0481863021850586, "learning_rate": 0.0017698504027617952, "loss": 0.9008, "step": 40000 }, { "epoch": 11.510356731875719, "grad_norm": 1.0422208309173584, "learning_rate": 0.0017697928653624857, "loss": 0.6936, "step": 40010 }, { "epoch": 11.513233601841197, "grad_norm": 1.6815423965454102, "learning_rate": 0.001769735327963176, "loss": 0.7128, "step": 40020 }, { "epoch": 11.516110471806675, "grad_norm": 1.026604413986206, "learning_rate": 0.0017696777905638666, "loss": 0.8298, "step": 40030 }, { "epoch": 11.518987341772151, "grad_norm": 0.8960224986076355, "learning_rate": 0.0017696202531645572, "loss": 0.7705, "step": 40040 }, { "epoch": 11.521864211737629, "grad_norm": 0.7300949096679688, "learning_rate": 0.0017695627157652473, "loss": 0.8023, "step": 40050 }, { "epoch": 11.524741081703107, "grad_norm": 1.1020900011062622, "learning_rate": 0.001769505178365938, "loss": 0.6803, "step": 40060 }, { "epoch": 11.527617951668585, "grad_norm": 0.7694911956787109, "learning_rate": 0.0017694476409666282, "loss": 0.8855, "step": 40070 }, { "epoch": 11.530494821634061, "grad_norm": 1.1129662990570068, "learning_rate": 0.0017693901035673188, "loss": 0.6988, "step": 40080 }, { "epoch": 11.53337169159954, "grad_norm": 1.1201856136322021, "learning_rate": 0.0017693325661680094, "loss": 1.0005, "step": 40090 }, { "epoch": 11.536248561565017, "grad_norm": 1.6150784492492676, "learning_rate": 0.0017692750287686997, "loss": 0.7984, "step": 40100 }, { "epoch": 11.539125431530495, "grad_norm": 1.9051198959350586, "learning_rate": 0.00176921749136939, "loss": 0.904, "step": 40110 }, { "epoch": 11.542002301495973, "grad_norm": 3.324857473373413, "learning_rate": 0.0017691599539700806, "loss": 0.7599, "step": 40120 }, { "epoch": 11.54487917146145, "grad_norm": 1.6539150476455688, "learning_rate": 0.001769102416570771, "loss": 0.7568, "step": 40130 }, { "epoch": 11.547756041426927, "grad_norm": 3.1187961101531982, "learning_rate": 0.0017690448791714615, "loss": 0.7378, "step": 40140 }, { "epoch": 11.550632911392405, "grad_norm": 1.1213866472244263, "learning_rate": 0.001768987341772152, "loss": 0.6763, "step": 40150 }, { "epoch": 11.553509781357883, "grad_norm": 1.9622868299484253, "learning_rate": 0.0017689298043728422, "loss": 0.8281, "step": 40160 }, { "epoch": 11.55638665132336, "grad_norm": 1.8053247928619385, "learning_rate": 0.0017688722669735328, "loss": 0.757, "step": 40170 }, { "epoch": 11.559263521288837, "grad_norm": 2.339388132095337, "learning_rate": 0.0017688147295742231, "loss": 0.895, "step": 40180 }, { "epoch": 11.562140391254315, "grad_norm": 6.642601490020752, "learning_rate": 0.0017687571921749137, "loss": 0.6911, "step": 40190 }, { "epoch": 11.565017261219793, "grad_norm": 1.1908646821975708, "learning_rate": 0.0017686996547756043, "loss": 0.716, "step": 40200 }, { "epoch": 11.56789413118527, "grad_norm": 0.9892880320549011, "learning_rate": 0.0017686421173762946, "loss": 0.8439, "step": 40210 }, { "epoch": 11.570771001150748, "grad_norm": 0.8531197309494019, "learning_rate": 0.001768584579976985, "loss": 0.664, "step": 40220 }, { "epoch": 11.573647871116226, "grad_norm": 2.4566056728363037, "learning_rate": 0.0017685270425776755, "loss": 0.7985, "step": 40230 }, { "epoch": 11.576524741081704, "grad_norm": 1.746374487876892, "learning_rate": 0.0017684695051783659, "loss": 0.6339, "step": 40240 }, { "epoch": 11.579401611047182, "grad_norm": 1.278593897819519, "learning_rate": 0.0017684119677790564, "loss": 0.9133, "step": 40250 }, { "epoch": 11.582278481012658, "grad_norm": 0.9087414145469666, "learning_rate": 0.001768354430379747, "loss": 0.8056, "step": 40260 }, { "epoch": 11.585155350978136, "grad_norm": 1.2760493755340576, "learning_rate": 0.0017682968929804374, "loss": 0.7455, "step": 40270 }, { "epoch": 11.588032220943614, "grad_norm": 1.0858198404312134, "learning_rate": 0.0017682393555811277, "loss": 0.5709, "step": 40280 }, { "epoch": 11.590909090909092, "grad_norm": 1.0310431718826294, "learning_rate": 0.001768181818181818, "loss": 0.8275, "step": 40290 }, { "epoch": 11.593785960874568, "grad_norm": 1.781174659729004, "learning_rate": 0.0017681242807825086, "loss": 0.9005, "step": 40300 }, { "epoch": 11.596662830840046, "grad_norm": 1.6552194356918335, "learning_rate": 0.0017680667433831992, "loss": 0.931, "step": 40310 }, { "epoch": 11.599539700805524, "grad_norm": 2.969369649887085, "learning_rate": 0.0017680092059838895, "loss": 0.933, "step": 40320 }, { "epoch": 11.602416570771002, "grad_norm": 1.8181360960006714, "learning_rate": 0.00176795166858458, "loss": 0.8536, "step": 40330 }, { "epoch": 11.605293440736478, "grad_norm": 2.4631452560424805, "learning_rate": 0.0017678941311852704, "loss": 0.8671, "step": 40340 }, { "epoch": 11.608170310701956, "grad_norm": 1.9254482984542847, "learning_rate": 0.0017678365937859608, "loss": 0.8687, "step": 40350 }, { "epoch": 11.611047180667434, "grad_norm": 1.6606379747390747, "learning_rate": 0.0017677790563866513, "loss": 0.7937, "step": 40360 }, { "epoch": 11.613924050632912, "grad_norm": 1.448676347732544, "learning_rate": 0.001767721518987342, "loss": 0.8496, "step": 40370 }, { "epoch": 11.616800920598388, "grad_norm": 1.6238163709640503, "learning_rate": 0.0017676639815880323, "loss": 0.8799, "step": 40380 }, { "epoch": 11.619677790563866, "grad_norm": 1.2623740434646606, "learning_rate": 0.0017676064441887228, "loss": 0.8936, "step": 40390 }, { "epoch": 11.622554660529344, "grad_norm": 0.8334515690803528, "learning_rate": 0.001767548906789413, "loss": 0.8054, "step": 40400 }, { "epoch": 11.625431530494822, "grad_norm": 0.9485747218132019, "learning_rate": 0.0017674913693901035, "loss": 0.771, "step": 40410 }, { "epoch": 11.628308400460298, "grad_norm": 1.4779701232910156, "learning_rate": 0.001767433831990794, "loss": 0.7498, "step": 40420 }, { "epoch": 11.631185270425776, "grad_norm": 1.2643306255340576, "learning_rate": 0.0017673762945914844, "loss": 0.8637, "step": 40430 }, { "epoch": 11.634062140391254, "grad_norm": 1.5527831315994263, "learning_rate": 0.001767318757192175, "loss": 0.6622, "step": 40440 }, { "epoch": 11.636939010356732, "grad_norm": 1.2100262641906738, "learning_rate": 0.0017672612197928656, "loss": 0.7872, "step": 40450 }, { "epoch": 11.63981588032221, "grad_norm": 1.3281863927841187, "learning_rate": 0.0017672036823935557, "loss": 1.0174, "step": 40460 }, { "epoch": 11.642692750287686, "grad_norm": 0.9555238485336304, "learning_rate": 0.0017671461449942462, "loss": 0.9703, "step": 40470 }, { "epoch": 11.645569620253164, "grad_norm": 1.3057135343551636, "learning_rate": 0.0017670886075949368, "loss": 0.6653, "step": 40480 }, { "epoch": 11.648446490218642, "grad_norm": 1.6192445755004883, "learning_rate": 0.0017670310701956272, "loss": 0.7183, "step": 40490 }, { "epoch": 11.65132336018412, "grad_norm": 0.9606744050979614, "learning_rate": 0.0017669735327963177, "loss": 0.682, "step": 40500 }, { "epoch": 11.654200230149597, "grad_norm": 2.076953649520874, "learning_rate": 0.0017669159953970083, "loss": 0.82, "step": 40510 }, { "epoch": 11.657077100115075, "grad_norm": 1.485069751739502, "learning_rate": 0.0017668584579976984, "loss": 0.8716, "step": 40520 }, { "epoch": 11.659953970080553, "grad_norm": 1.31619393825531, "learning_rate": 0.001766800920598389, "loss": 0.7854, "step": 40530 }, { "epoch": 11.66283084004603, "grad_norm": 0.9131718873977661, "learning_rate": 0.0017667433831990793, "loss": 0.7071, "step": 40540 }, { "epoch": 11.665707710011507, "grad_norm": 1.4215341806411743, "learning_rate": 0.00176668584579977, "loss": 0.6463, "step": 40550 }, { "epoch": 11.668584579976985, "grad_norm": 1.1506153345108032, "learning_rate": 0.0017666283084004605, "loss": 0.706, "step": 40560 }, { "epoch": 11.671461449942463, "grad_norm": 3.3605504035949707, "learning_rate": 0.0017665707710011508, "loss": 1.0607, "step": 40570 }, { "epoch": 11.67433831990794, "grad_norm": 1.1723687648773193, "learning_rate": 0.0017665132336018412, "loss": 0.8284, "step": 40580 }, { "epoch": 11.677215189873417, "grad_norm": 1.3688257932662964, "learning_rate": 0.0017664556962025317, "loss": 1.2003, "step": 40590 }, { "epoch": 11.680092059838895, "grad_norm": 1.108289122581482, "learning_rate": 0.001766398158803222, "loss": 0.7926, "step": 40600 }, { "epoch": 11.682968929804373, "grad_norm": 0.6570484042167664, "learning_rate": 0.0017663406214039126, "loss": 0.8835, "step": 40610 }, { "epoch": 11.68584579976985, "grad_norm": 1.325938105583191, "learning_rate": 0.0017662830840046032, "loss": 0.9069, "step": 40620 }, { "epoch": 11.688722669735327, "grad_norm": 1.3520209789276123, "learning_rate": 0.0017662255466052935, "loss": 0.8231, "step": 40630 }, { "epoch": 11.691599539700805, "grad_norm": 1.255552053451538, "learning_rate": 0.0017661680092059839, "loss": 0.9039, "step": 40640 }, { "epoch": 11.694476409666283, "grad_norm": 1.5237253904342651, "learning_rate": 0.0017661104718066742, "loss": 0.8707, "step": 40650 }, { "epoch": 11.697353279631761, "grad_norm": 1.5352762937545776, "learning_rate": 0.0017660529344073648, "loss": 0.7888, "step": 40660 }, { "epoch": 11.700230149597239, "grad_norm": 1.2436611652374268, "learning_rate": 0.0017659953970080554, "loss": 0.8429, "step": 40670 }, { "epoch": 11.703107019562715, "grad_norm": 1.354623556137085, "learning_rate": 0.0017659378596087457, "loss": 0.7428, "step": 40680 }, { "epoch": 11.705983889528193, "grad_norm": 1.213472604751587, "learning_rate": 0.0017658803222094363, "loss": 0.6915, "step": 40690 }, { "epoch": 11.708860759493671, "grad_norm": 1.5459473133087158, "learning_rate": 0.0017658227848101266, "loss": 0.7285, "step": 40700 }, { "epoch": 11.711737629459149, "grad_norm": 1.0173319578170776, "learning_rate": 0.001765765247410817, "loss": 0.8017, "step": 40710 }, { "epoch": 11.714614499424625, "grad_norm": 2.720440149307251, "learning_rate": 0.0017657077100115075, "loss": 0.8842, "step": 40720 }, { "epoch": 11.717491369390103, "grad_norm": 1.0936278104782104, "learning_rate": 0.001765650172612198, "loss": 0.7155, "step": 40730 }, { "epoch": 11.720368239355581, "grad_norm": 2.273068428039551, "learning_rate": 0.0017655926352128884, "loss": 0.6863, "step": 40740 }, { "epoch": 11.72324510932106, "grad_norm": 0.6591776013374329, "learning_rate": 0.001765535097813579, "loss": 0.5762, "step": 40750 }, { "epoch": 11.726121979286535, "grad_norm": 0.9791557192802429, "learning_rate": 0.0017654775604142691, "loss": 0.7519, "step": 40760 }, { "epoch": 11.728998849252013, "grad_norm": 1.2962185144424438, "learning_rate": 0.0017654200230149597, "loss": 0.8029, "step": 40770 }, { "epoch": 11.731875719217491, "grad_norm": 1.6417357921600342, "learning_rate": 0.0017653624856156503, "loss": 0.9568, "step": 40780 }, { "epoch": 11.73475258918297, "grad_norm": 0.7812600135803223, "learning_rate": 0.0017653049482163406, "loss": 0.9319, "step": 40790 }, { "epoch": 11.737629459148447, "grad_norm": 2.6658313274383545, "learning_rate": 0.0017652474108170312, "loss": 0.6638, "step": 40800 }, { "epoch": 11.740506329113924, "grad_norm": 1.3583362102508545, "learning_rate": 0.0017651898734177217, "loss": 0.5994, "step": 40810 }, { "epoch": 11.743383199079402, "grad_norm": 0.8990656733512878, "learning_rate": 0.0017651323360184119, "loss": 0.6298, "step": 40820 }, { "epoch": 11.74626006904488, "grad_norm": 1.5558174848556519, "learning_rate": 0.0017650747986191024, "loss": 0.6633, "step": 40830 }, { "epoch": 11.749136939010357, "grad_norm": 1.5452595949172974, "learning_rate": 0.001765017261219793, "loss": 0.7941, "step": 40840 }, { "epoch": 11.752013808975834, "grad_norm": 1.5782183408737183, "learning_rate": 0.0017649597238204833, "loss": 0.8246, "step": 40850 }, { "epoch": 11.754890678941312, "grad_norm": 1.740898609161377, "learning_rate": 0.001764902186421174, "loss": 0.7077, "step": 40860 }, { "epoch": 11.75776754890679, "grad_norm": 2.0494558811187744, "learning_rate": 0.0017648446490218643, "loss": 1.0067, "step": 40870 }, { "epoch": 11.760644418872268, "grad_norm": 1.8212343454360962, "learning_rate": 0.0017647871116225546, "loss": 0.8993, "step": 40880 }, { "epoch": 11.763521288837744, "grad_norm": 1.119139552116394, "learning_rate": 0.0017647295742232452, "loss": 0.7715, "step": 40890 }, { "epoch": 11.766398158803222, "grad_norm": 1.8370437622070312, "learning_rate": 0.0017646720368239355, "loss": 0.7943, "step": 40900 }, { "epoch": 11.7692750287687, "grad_norm": 0.9544287323951721, "learning_rate": 0.001764614499424626, "loss": 0.7584, "step": 40910 }, { "epoch": 11.772151898734178, "grad_norm": 1.3692044019699097, "learning_rate": 0.0017645569620253166, "loss": 0.885, "step": 40920 }, { "epoch": 11.775028768699654, "grad_norm": 0.6028038859367371, "learning_rate": 0.001764499424626007, "loss": 0.7405, "step": 40930 }, { "epoch": 11.777905638665132, "grad_norm": 1.528764009475708, "learning_rate": 0.0017644418872266973, "loss": 1.0135, "step": 40940 }, { "epoch": 11.78078250863061, "grad_norm": 1.5773916244506836, "learning_rate": 0.001764384349827388, "loss": 0.7574, "step": 40950 }, { "epoch": 11.783659378596088, "grad_norm": 1.5406275987625122, "learning_rate": 0.0017643268124280782, "loss": 0.8359, "step": 40960 }, { "epoch": 11.786536248561564, "grad_norm": 2.0924768447875977, "learning_rate": 0.0017642692750287688, "loss": 0.9468, "step": 40970 }, { "epoch": 11.789413118527042, "grad_norm": 1.3442704677581787, "learning_rate": 0.0017642117376294592, "loss": 0.9382, "step": 40980 }, { "epoch": 11.79228998849252, "grad_norm": 1.510805606842041, "learning_rate": 0.0017641542002301495, "loss": 0.883, "step": 40990 }, { "epoch": 11.795166858457998, "grad_norm": 1.535965919494629, "learning_rate": 0.00176409666283084, "loss": 0.7754, "step": 41000 }, { "epoch": 11.798043728423476, "grad_norm": 0.7819586992263794, "learning_rate": 0.0017640391254315304, "loss": 0.9754, "step": 41010 }, { "epoch": 11.800920598388952, "grad_norm": 1.2518099546432495, "learning_rate": 0.001763981588032221, "loss": 0.7588, "step": 41020 }, { "epoch": 11.80379746835443, "grad_norm": 1.4065301418304443, "learning_rate": 0.0017639240506329115, "loss": 0.896, "step": 41030 }, { "epoch": 11.806674338319908, "grad_norm": 1.3660476207733154, "learning_rate": 0.0017638665132336019, "loss": 0.6747, "step": 41040 }, { "epoch": 11.809551208285386, "grad_norm": 0.8480243682861328, "learning_rate": 0.0017638089758342922, "loss": 0.8948, "step": 41050 }, { "epoch": 11.812428078250862, "grad_norm": 1.2334142923355103, "learning_rate": 0.0017637514384349828, "loss": 0.8771, "step": 41060 }, { "epoch": 11.81530494821634, "grad_norm": 3.976346492767334, "learning_rate": 0.0017636939010356731, "loss": 0.8089, "step": 41070 }, { "epoch": 11.818181818181818, "grad_norm": 0.9499451518058777, "learning_rate": 0.0017636363636363637, "loss": 0.8676, "step": 41080 }, { "epoch": 11.821058688147296, "grad_norm": 1.0503579378128052, "learning_rate": 0.001763578826237054, "loss": 0.8479, "step": 41090 }, { "epoch": 11.823935558112773, "grad_norm": 0.9489638209342957, "learning_rate": 0.0017635212888377446, "loss": 0.7361, "step": 41100 }, { "epoch": 11.82681242807825, "grad_norm": 1.755345106124878, "learning_rate": 0.001763463751438435, "loss": 0.898, "step": 41110 }, { "epoch": 11.829689298043728, "grad_norm": 0.781388521194458, "learning_rate": 0.0017634062140391253, "loss": 0.8056, "step": 41120 }, { "epoch": 11.832566168009206, "grad_norm": 1.804663896560669, "learning_rate": 0.0017633486766398159, "loss": 0.9235, "step": 41130 }, { "epoch": 11.835443037974684, "grad_norm": 2.0773937702178955, "learning_rate": 0.0017632911392405064, "loss": 0.7747, "step": 41140 }, { "epoch": 11.83831990794016, "grad_norm": 2.3499863147735596, "learning_rate": 0.0017632336018411968, "loss": 0.9921, "step": 41150 }, { "epoch": 11.841196777905639, "grad_norm": 1.765907645225525, "learning_rate": 0.0017631760644418874, "loss": 0.8527, "step": 41160 }, { "epoch": 11.844073647871117, "grad_norm": 0.8270894289016724, "learning_rate": 0.0017631185270425777, "loss": 0.9062, "step": 41170 }, { "epoch": 11.846950517836595, "grad_norm": 1.672924518585205, "learning_rate": 0.001763060989643268, "loss": 0.6942, "step": 41180 }, { "epoch": 11.84982738780207, "grad_norm": 2.2402050495147705, "learning_rate": 0.0017630034522439586, "loss": 0.9199, "step": 41190 }, { "epoch": 11.852704257767549, "grad_norm": 1.0793445110321045, "learning_rate": 0.0017629459148446492, "loss": 0.6933, "step": 41200 }, { "epoch": 11.855581127733027, "grad_norm": 1.0769144296646118, "learning_rate": 0.0017628883774453395, "loss": 0.7852, "step": 41210 }, { "epoch": 11.858457997698505, "grad_norm": 2.006387948989868, "learning_rate": 0.00176283084004603, "loss": 0.8334, "step": 41220 }, { "epoch": 11.861334867663981, "grad_norm": 1.2446720600128174, "learning_rate": 0.0017627733026467202, "loss": 0.7879, "step": 41230 }, { "epoch": 11.864211737629459, "grad_norm": 2.4952681064605713, "learning_rate": 0.0017627157652474108, "loss": 0.8141, "step": 41240 }, { "epoch": 11.867088607594937, "grad_norm": 4.314261436462402, "learning_rate": 0.0017626582278481013, "loss": 0.8595, "step": 41250 }, { "epoch": 11.869965477560415, "grad_norm": 1.4222205877304077, "learning_rate": 0.0017626006904487917, "loss": 0.7414, "step": 41260 }, { "epoch": 11.872842347525891, "grad_norm": 1.5221434831619263, "learning_rate": 0.0017625431530494823, "loss": 0.8283, "step": 41270 }, { "epoch": 11.875719217491369, "grad_norm": 0.9809785485267639, "learning_rate": 0.0017624856156501728, "loss": 0.8758, "step": 41280 }, { "epoch": 11.878596087456847, "grad_norm": 2.722914934158325, "learning_rate": 0.001762428078250863, "loss": 0.8214, "step": 41290 }, { "epoch": 11.881472957422325, "grad_norm": 1.349058747291565, "learning_rate": 0.0017623705408515535, "loss": 0.8622, "step": 41300 }, { "epoch": 11.884349827387801, "grad_norm": 1.0458720922470093, "learning_rate": 0.001762313003452244, "loss": 0.5757, "step": 41310 }, { "epoch": 11.88722669735328, "grad_norm": 1.520405888557434, "learning_rate": 0.0017622554660529344, "loss": 0.7822, "step": 41320 }, { "epoch": 11.890103567318757, "grad_norm": 1.3460537195205688, "learning_rate": 0.001762197928653625, "loss": 0.7559, "step": 41330 }, { "epoch": 11.892980437284235, "grad_norm": 0.8137878775596619, "learning_rate": 0.0017621403912543153, "loss": 0.7166, "step": 41340 }, { "epoch": 11.895857307249713, "grad_norm": 1.0067379474639893, "learning_rate": 0.0017620828538550057, "loss": 0.6932, "step": 41350 }, { "epoch": 11.89873417721519, "grad_norm": 1.6512330770492554, "learning_rate": 0.0017620253164556962, "loss": 0.8565, "step": 41360 }, { "epoch": 11.901611047180667, "grad_norm": 1.1814950704574585, "learning_rate": 0.0017619677790563866, "loss": 0.6911, "step": 41370 }, { "epoch": 11.904487917146145, "grad_norm": 1.1860209703445435, "learning_rate": 0.0017619102416570772, "loss": 0.6374, "step": 41380 }, { "epoch": 11.907364787111623, "grad_norm": 1.2450004816055298, "learning_rate": 0.0017618527042577677, "loss": 0.7949, "step": 41390 }, { "epoch": 11.9102416570771, "grad_norm": 1.0015501976013184, "learning_rate": 0.001761795166858458, "loss": 1.0304, "step": 41400 }, { "epoch": 11.913118527042577, "grad_norm": 0.7752836346626282, "learning_rate": 0.0017617376294591484, "loss": 0.741, "step": 41410 }, { "epoch": 11.915995397008055, "grad_norm": 1.030463457107544, "learning_rate": 0.001761680092059839, "loss": 0.7621, "step": 41420 }, { "epoch": 11.918872266973533, "grad_norm": 1.5750212669372559, "learning_rate": 0.0017616225546605293, "loss": 0.5714, "step": 41430 }, { "epoch": 11.92174913693901, "grad_norm": 1.4216814041137695, "learning_rate": 0.0017615650172612199, "loss": 0.7543, "step": 41440 }, { "epoch": 11.924626006904488, "grad_norm": 1.4395058155059814, "learning_rate": 0.0017615074798619102, "loss": 0.8371, "step": 41450 }, { "epoch": 11.927502876869966, "grad_norm": 1.6588670015335083, "learning_rate": 0.0017614499424626008, "loss": 0.8465, "step": 41460 }, { "epoch": 11.930379746835444, "grad_norm": 1.2353849411010742, "learning_rate": 0.0017613924050632911, "loss": 0.7515, "step": 41470 }, { "epoch": 11.933256616800922, "grad_norm": 1.2984325885772705, "learning_rate": 0.0017613348676639815, "loss": 0.8298, "step": 41480 }, { "epoch": 11.936133486766398, "grad_norm": 1.506238579750061, "learning_rate": 0.001761277330264672, "loss": 0.9155, "step": 41490 }, { "epoch": 11.939010356731876, "grad_norm": 0.9547949433326721, "learning_rate": 0.0017612197928653626, "loss": 0.9569, "step": 41500 }, { "epoch": 11.941887226697354, "grad_norm": 0.9323287606239319, "learning_rate": 0.001761162255466053, "loss": 0.8194, "step": 41510 }, { "epoch": 11.94476409666283, "grad_norm": 1.4639068841934204, "learning_rate": 0.0017611047180667435, "loss": 0.8364, "step": 41520 }, { "epoch": 11.947640966628308, "grad_norm": 1.1283279657363892, "learning_rate": 0.0017610471806674339, "loss": 0.8786, "step": 41530 }, { "epoch": 11.950517836593786, "grad_norm": 1.618348479270935, "learning_rate": 0.0017609896432681242, "loss": 0.9419, "step": 41540 }, { "epoch": 11.953394706559264, "grad_norm": 1.5180623531341553, "learning_rate": 0.0017609321058688148, "loss": 0.7882, "step": 41550 }, { "epoch": 11.956271576524742, "grad_norm": 0.8856039047241211, "learning_rate": 0.0017608745684695051, "loss": 0.6841, "step": 41560 }, { "epoch": 11.959148446490218, "grad_norm": 1.5872962474822998, "learning_rate": 0.0017608170310701957, "loss": 0.9568, "step": 41570 }, { "epoch": 11.962025316455696, "grad_norm": 2.1354613304138184, "learning_rate": 0.0017607594936708863, "loss": 0.7171, "step": 41580 }, { "epoch": 11.964902186421174, "grad_norm": 1.3272755146026611, "learning_rate": 0.0017607019562715764, "loss": 0.7776, "step": 41590 }, { "epoch": 11.967779056386652, "grad_norm": 1.589171051979065, "learning_rate": 0.001760644418872267, "loss": 0.7278, "step": 41600 }, { "epoch": 11.970655926352128, "grad_norm": 1.335170865058899, "learning_rate": 0.0017605868814729575, "loss": 0.9733, "step": 41610 }, { "epoch": 11.973532796317606, "grad_norm": 1.2665382623672485, "learning_rate": 0.0017605293440736479, "loss": 0.8993, "step": 41620 }, { "epoch": 11.976409666283084, "grad_norm": 0.8348612785339355, "learning_rate": 0.0017604718066743384, "loss": 0.7087, "step": 41630 }, { "epoch": 11.979286536248562, "grad_norm": 1.92404305934906, "learning_rate": 0.001760414269275029, "loss": 0.6844, "step": 41640 }, { "epoch": 11.982163406214038, "grad_norm": 1.0654988288879395, "learning_rate": 0.0017603567318757191, "loss": 0.7773, "step": 41650 }, { "epoch": 11.985040276179516, "grad_norm": 0.9363641142845154, "learning_rate": 0.0017602991944764097, "loss": 0.9271, "step": 41660 }, { "epoch": 11.987917146144994, "grad_norm": 2.312333822250366, "learning_rate": 0.0017602416570771, "loss": 0.6832, "step": 41670 }, { "epoch": 11.990794016110472, "grad_norm": 1.2984769344329834, "learning_rate": 0.0017601841196777906, "loss": 0.8857, "step": 41680 }, { "epoch": 11.99367088607595, "grad_norm": 1.0714982748031616, "learning_rate": 0.0017601265822784812, "loss": 1.0188, "step": 41690 }, { "epoch": 11.996547756041426, "grad_norm": 0.9772949814796448, "learning_rate": 0.0017600690448791715, "loss": 0.6028, "step": 41700 }, { "epoch": 11.999424626006904, "grad_norm": 1.6695308685302734, "learning_rate": 0.0017600115074798619, "loss": 0.5997, "step": 41710 }, { "epoch": 12.002301495972382, "grad_norm": 1.0910227298736572, "learning_rate": 0.0017599539700805524, "loss": 0.6951, "step": 41720 }, { "epoch": 12.00517836593786, "grad_norm": 0.8581985831260681, "learning_rate": 0.0017598964326812428, "loss": 0.7626, "step": 41730 }, { "epoch": 12.008055235903337, "grad_norm": 1.420790672302246, "learning_rate": 0.0017598388952819333, "loss": 0.637, "step": 41740 }, { "epoch": 12.010932105868815, "grad_norm": 1.1747702360153198, "learning_rate": 0.001759781357882624, "loss": 0.6636, "step": 41750 }, { "epoch": 12.013808975834293, "grad_norm": 1.2876065969467163, "learning_rate": 0.0017597238204833142, "loss": 0.7227, "step": 41760 }, { "epoch": 12.01668584579977, "grad_norm": 1.8819942474365234, "learning_rate": 0.0017596662830840046, "loss": 0.8533, "step": 41770 }, { "epoch": 12.019562715765247, "grad_norm": 0.8922308087348938, "learning_rate": 0.001759608745684695, "loss": 0.7862, "step": 41780 }, { "epoch": 12.022439585730725, "grad_norm": 0.939944863319397, "learning_rate": 0.0017595512082853855, "loss": 0.6697, "step": 41790 }, { "epoch": 12.025316455696203, "grad_norm": 1.2209957838058472, "learning_rate": 0.001759493670886076, "loss": 0.634, "step": 41800 }, { "epoch": 12.02819332566168, "grad_norm": 1.2080514430999756, "learning_rate": 0.0017594361334867664, "loss": 0.7324, "step": 41810 }, { "epoch": 12.031070195627157, "grad_norm": 1.1512196063995361, "learning_rate": 0.0017593785960874568, "loss": 0.9537, "step": 41820 }, { "epoch": 12.033947065592635, "grad_norm": 1.4782042503356934, "learning_rate": 0.0017593210586881473, "loss": 0.7239, "step": 41830 }, { "epoch": 12.036823935558113, "grad_norm": 1.6400208473205566, "learning_rate": 0.0017592635212888377, "loss": 0.7074, "step": 41840 }, { "epoch": 12.03970080552359, "grad_norm": 1.556592583656311, "learning_rate": 0.0017592059838895282, "loss": 0.8307, "step": 41850 }, { "epoch": 12.042577675489069, "grad_norm": 1.2047303915023804, "learning_rate": 0.0017591484464902188, "loss": 0.6154, "step": 41860 }, { "epoch": 12.045454545454545, "grad_norm": 1.0155686140060425, "learning_rate": 0.0017590909090909092, "loss": 0.7856, "step": 41870 }, { "epoch": 12.048331415420023, "grad_norm": 2.3169870376586914, "learning_rate": 0.0017590333716915995, "loss": 0.7436, "step": 41880 }, { "epoch": 12.051208285385501, "grad_norm": 1.6347512006759644, "learning_rate": 0.00175897583429229, "loss": 0.7859, "step": 41890 }, { "epoch": 12.054085155350979, "grad_norm": 1.8695006370544434, "learning_rate": 0.0017589182968929804, "loss": 0.8397, "step": 41900 }, { "epoch": 12.056962025316455, "grad_norm": 1.1789566278457642, "learning_rate": 0.001758860759493671, "loss": 0.7517, "step": 41910 }, { "epoch": 12.059838895281933, "grad_norm": 1.4022870063781738, "learning_rate": 0.0017588032220943613, "loss": 0.7093, "step": 41920 }, { "epoch": 12.062715765247411, "grad_norm": 1.0644221305847168, "learning_rate": 0.0017587456846950519, "loss": 0.6772, "step": 41930 }, { "epoch": 12.065592635212889, "grad_norm": 1.751536250114441, "learning_rate": 0.0017586881472957422, "loss": 0.674, "step": 41940 }, { "epoch": 12.068469505178365, "grad_norm": 0.8900517225265503, "learning_rate": 0.0017586306098964326, "loss": 0.7202, "step": 41950 }, { "epoch": 12.071346375143843, "grad_norm": 1.9595224857330322, "learning_rate": 0.0017585730724971231, "loss": 0.8013, "step": 41960 }, { "epoch": 12.074223245109321, "grad_norm": 0.8581684827804565, "learning_rate": 0.0017585155350978137, "loss": 0.5943, "step": 41970 }, { "epoch": 12.0771001150748, "grad_norm": 1.411761999130249, "learning_rate": 0.001758457997698504, "loss": 0.7135, "step": 41980 }, { "epoch": 12.079976985040275, "grad_norm": 1.695314645767212, "learning_rate": 0.0017584004602991946, "loss": 0.8228, "step": 41990 }, { "epoch": 12.082853855005753, "grad_norm": 1.473297357559204, "learning_rate": 0.001758342922899885, "loss": 0.8964, "step": 42000 }, { "epoch": 12.085730724971231, "grad_norm": 0.9604143500328064, "learning_rate": 0.0017582853855005753, "loss": 0.6593, "step": 42010 }, { "epoch": 12.08860759493671, "grad_norm": 2.082395076751709, "learning_rate": 0.0017582278481012659, "loss": 0.729, "step": 42020 }, { "epoch": 12.091484464902187, "grad_norm": 1.0310122966766357, "learning_rate": 0.0017581703107019562, "loss": 0.5771, "step": 42030 }, { "epoch": 12.094361334867664, "grad_norm": 1.2552932500839233, "learning_rate": 0.0017581127733026468, "loss": 0.756, "step": 42040 }, { "epoch": 12.097238204833141, "grad_norm": 1.1642733812332153, "learning_rate": 0.0017580552359033374, "loss": 0.7173, "step": 42050 }, { "epoch": 12.10011507479862, "grad_norm": 0.8208944797515869, "learning_rate": 0.0017579976985040275, "loss": 0.8493, "step": 42060 }, { "epoch": 12.102991944764097, "grad_norm": 1.453558087348938, "learning_rate": 0.001757940161104718, "loss": 0.9202, "step": 42070 }, { "epoch": 12.105868814729574, "grad_norm": 0.9337403178215027, "learning_rate": 0.0017578826237054086, "loss": 0.8, "step": 42080 }, { "epoch": 12.108745684695052, "grad_norm": 0.6512324213981628, "learning_rate": 0.001757825086306099, "loss": 0.6699, "step": 42090 }, { "epoch": 12.11162255466053, "grad_norm": 1.7194454669952393, "learning_rate": 0.0017577675489067895, "loss": 0.7561, "step": 42100 }, { "epoch": 12.114499424626008, "grad_norm": 2.482020854949951, "learning_rate": 0.00175771001150748, "loss": 0.8869, "step": 42110 }, { "epoch": 12.117376294591484, "grad_norm": 1.5243006944656372, "learning_rate": 0.0017576524741081702, "loss": 0.6694, "step": 42120 }, { "epoch": 12.120253164556962, "grad_norm": 2.7406411170959473, "learning_rate": 0.0017575949367088608, "loss": 0.9528, "step": 42130 }, { "epoch": 12.12313003452244, "grad_norm": 2.516836643218994, "learning_rate": 0.0017575373993095511, "loss": 0.7299, "step": 42140 }, { "epoch": 12.126006904487918, "grad_norm": 1.3735767602920532, "learning_rate": 0.0017574798619102417, "loss": 0.8436, "step": 42150 }, { "epoch": 12.128883774453394, "grad_norm": 1.2881425619125366, "learning_rate": 0.0017574223245109323, "loss": 0.6566, "step": 42160 }, { "epoch": 12.131760644418872, "grad_norm": 1.7393771409988403, "learning_rate": 0.0017573647871116226, "loss": 0.8047, "step": 42170 }, { "epoch": 12.13463751438435, "grad_norm": 2.412100315093994, "learning_rate": 0.001757307249712313, "loss": 0.8864, "step": 42180 }, { "epoch": 12.137514384349828, "grad_norm": 1.432815670967102, "learning_rate": 0.0017572497123130035, "loss": 0.5757, "step": 42190 }, { "epoch": 12.140391254315304, "grad_norm": 0.6894439458847046, "learning_rate": 0.0017571921749136939, "loss": 0.6134, "step": 42200 }, { "epoch": 12.143268124280782, "grad_norm": 0.8186745643615723, "learning_rate": 0.0017571346375143844, "loss": 0.6995, "step": 42210 }, { "epoch": 12.14614499424626, "grad_norm": 1.345755934715271, "learning_rate": 0.001757077100115075, "loss": 0.7499, "step": 42220 }, { "epoch": 12.149021864211738, "grad_norm": 0.6712018847465515, "learning_rate": 0.0017570195627157653, "loss": 0.7003, "step": 42230 }, { "epoch": 12.151898734177216, "grad_norm": 1.4716862440109253, "learning_rate": 0.0017569620253164557, "loss": 0.609, "step": 42240 }, { "epoch": 12.154775604142692, "grad_norm": 1.4926848411560059, "learning_rate": 0.001756904487917146, "loss": 0.8058, "step": 42250 }, { "epoch": 12.15765247410817, "grad_norm": 0.9887983798980713, "learning_rate": 0.0017568469505178366, "loss": 0.6317, "step": 42260 }, { "epoch": 12.160529344073648, "grad_norm": 2.6422226428985596, "learning_rate": 0.0017567894131185272, "loss": 0.8197, "step": 42270 }, { "epoch": 12.163406214039126, "grad_norm": 1.6052309274673462, "learning_rate": 0.0017567318757192175, "loss": 0.7052, "step": 42280 }, { "epoch": 12.166283084004602, "grad_norm": 0.841914713382721, "learning_rate": 0.001756674338319908, "loss": 0.6505, "step": 42290 }, { "epoch": 12.16915995397008, "grad_norm": 1.6837472915649414, "learning_rate": 0.0017566168009205984, "loss": 0.9552, "step": 42300 }, { "epoch": 12.172036823935558, "grad_norm": 1.229846715927124, "learning_rate": 0.0017565592635212888, "loss": 0.7407, "step": 42310 }, { "epoch": 12.174913693901036, "grad_norm": 1.0614711046218872, "learning_rate": 0.0017565017261219793, "loss": 1.0784, "step": 42320 }, { "epoch": 12.177790563866512, "grad_norm": 1.2630020380020142, "learning_rate": 0.0017564441887226699, "loss": 0.6773, "step": 42330 }, { "epoch": 12.18066743383199, "grad_norm": 1.1616871356964111, "learning_rate": 0.0017563866513233602, "loss": 0.9165, "step": 42340 }, { "epoch": 12.183544303797468, "grad_norm": 1.1325379610061646, "learning_rate": 0.0017563291139240508, "loss": 0.6354, "step": 42350 }, { "epoch": 12.186421173762946, "grad_norm": 1.0226415395736694, "learning_rate": 0.001756271576524741, "loss": 0.7428, "step": 42360 }, { "epoch": 12.189298043728423, "grad_norm": 1.7012152671813965, "learning_rate": 0.0017562140391254315, "loss": 0.8165, "step": 42370 }, { "epoch": 12.1921749136939, "grad_norm": 1.5428647994995117, "learning_rate": 0.001756156501726122, "loss": 0.8461, "step": 42380 }, { "epoch": 12.195051783659379, "grad_norm": 0.5247207880020142, "learning_rate": 0.0017560989643268124, "loss": 0.706, "step": 42390 }, { "epoch": 12.197928653624857, "grad_norm": 1.4781712293624878, "learning_rate": 0.001756041426927503, "loss": 0.6559, "step": 42400 }, { "epoch": 12.200805523590335, "grad_norm": 1.1146259307861328, "learning_rate": 0.0017559838895281935, "loss": 0.7987, "step": 42410 }, { "epoch": 12.20368239355581, "grad_norm": 1.0554701089859009, "learning_rate": 0.0017559263521288837, "loss": 0.899, "step": 42420 }, { "epoch": 12.206559263521289, "grad_norm": 1.2012579441070557, "learning_rate": 0.0017558688147295742, "loss": 0.7378, "step": 42430 }, { "epoch": 12.209436133486767, "grad_norm": 1.5413615703582764, "learning_rate": 0.0017558112773302648, "loss": 1.0027, "step": 42440 }, { "epoch": 12.212313003452245, "grad_norm": 1.4161874055862427, "learning_rate": 0.0017557537399309551, "loss": 0.7168, "step": 42450 }, { "epoch": 12.215189873417721, "grad_norm": 1.5752708911895752, "learning_rate": 0.0017556962025316457, "loss": 1.0012, "step": 42460 }, { "epoch": 12.218066743383199, "grad_norm": 1.8174434900283813, "learning_rate": 0.0017556386651323363, "loss": 0.9373, "step": 42470 }, { "epoch": 12.220943613348677, "grad_norm": 1.8961776494979858, "learning_rate": 0.0017555811277330264, "loss": 0.8426, "step": 42480 }, { "epoch": 12.223820483314155, "grad_norm": 0.8367596864700317, "learning_rate": 0.001755523590333717, "loss": 0.7, "step": 42490 }, { "epoch": 12.226697353279631, "grad_norm": 1.9692952632904053, "learning_rate": 0.0017554660529344073, "loss": 0.7163, "step": 42500 }, { "epoch": 12.229574223245109, "grad_norm": 1.1058732271194458, "learning_rate": 0.0017554085155350979, "loss": 0.7692, "step": 42510 }, { "epoch": 12.232451093210587, "grad_norm": 1.4095357656478882, "learning_rate": 0.0017553509781357884, "loss": 0.7375, "step": 42520 }, { "epoch": 12.235327963176065, "grad_norm": 0.7416710257530212, "learning_rate": 0.0017552934407364788, "loss": 0.7003, "step": 42530 }, { "epoch": 12.238204833141541, "grad_norm": 1.1081221103668213, "learning_rate": 0.0017552359033371691, "loss": 0.9158, "step": 42540 }, { "epoch": 12.24108170310702, "grad_norm": 1.0655690431594849, "learning_rate": 0.0017551783659378597, "loss": 0.9142, "step": 42550 }, { "epoch": 12.243958573072497, "grad_norm": 1.648884654045105, "learning_rate": 0.00175512082853855, "loss": 0.7631, "step": 42560 }, { "epoch": 12.246835443037975, "grad_norm": 1.0521550178527832, "learning_rate": 0.0017550632911392406, "loss": 0.7411, "step": 42570 }, { "epoch": 12.249712313003453, "grad_norm": 1.0821855068206787, "learning_rate": 0.0017550057537399312, "loss": 0.7351, "step": 42580 }, { "epoch": 12.25258918296893, "grad_norm": 1.3001712560653687, "learning_rate": 0.0017549482163406215, "loss": 0.7694, "step": 42590 }, { "epoch": 12.255466052934407, "grad_norm": 1.6061559915542603, "learning_rate": 0.0017548906789413119, "loss": 0.7644, "step": 42600 }, { "epoch": 12.258342922899885, "grad_norm": 1.0895686149597168, "learning_rate": 0.0017548331415420022, "loss": 0.6959, "step": 42610 }, { "epoch": 12.261219792865363, "grad_norm": 0.7256046533584595, "learning_rate": 0.0017547756041426928, "loss": 0.6914, "step": 42620 }, { "epoch": 12.26409666283084, "grad_norm": 3.8657402992248535, "learning_rate": 0.0017547180667433833, "loss": 1.031, "step": 42630 }, { "epoch": 12.266973532796317, "grad_norm": 1.1905014514923096, "learning_rate": 0.0017546605293440737, "loss": 0.7328, "step": 42640 }, { "epoch": 12.269850402761795, "grad_norm": 1.0110595226287842, "learning_rate": 0.001754602991944764, "loss": 0.5852, "step": 42650 }, { "epoch": 12.272727272727273, "grad_norm": 0.6547088027000427, "learning_rate": 0.0017545454545454546, "loss": 0.7748, "step": 42660 }, { "epoch": 12.27560414269275, "grad_norm": 1.4368822574615479, "learning_rate": 0.001754487917146145, "loss": 0.8652, "step": 42670 }, { "epoch": 12.278481012658228, "grad_norm": 0.9672104716300964, "learning_rate": 0.0017544303797468355, "loss": 0.6789, "step": 42680 }, { "epoch": 12.281357882623706, "grad_norm": 1.1509405374526978, "learning_rate": 0.001754372842347526, "loss": 0.5451, "step": 42690 }, { "epoch": 12.284234752589184, "grad_norm": 1.4146630764007568, "learning_rate": 0.0017543153049482164, "loss": 0.6112, "step": 42700 }, { "epoch": 12.28711162255466, "grad_norm": 0.865215539932251, "learning_rate": 0.0017542577675489068, "loss": 0.7863, "step": 42710 }, { "epoch": 12.289988492520138, "grad_norm": 1.2053312063217163, "learning_rate": 0.0017542002301495971, "loss": 0.7103, "step": 42720 }, { "epoch": 12.292865362485616, "grad_norm": 1.8511152267456055, "learning_rate": 0.0017541426927502877, "loss": 0.8337, "step": 42730 }, { "epoch": 12.295742232451094, "grad_norm": 0.8243930339813232, "learning_rate": 0.0017540851553509782, "loss": 0.7273, "step": 42740 }, { "epoch": 12.29861910241657, "grad_norm": 1.5389989614486694, "learning_rate": 0.0017540276179516686, "loss": 0.7338, "step": 42750 }, { "epoch": 12.301495972382048, "grad_norm": 1.294932246208191, "learning_rate": 0.0017539700805523591, "loss": 0.9216, "step": 42760 }, { "epoch": 12.304372842347526, "grad_norm": 1.0977287292480469, "learning_rate": 0.0017539125431530495, "loss": 0.6893, "step": 42770 }, { "epoch": 12.307249712313004, "grad_norm": 1.0088831186294556, "learning_rate": 0.0017538550057537398, "loss": 0.7212, "step": 42780 }, { "epoch": 12.310126582278482, "grad_norm": 0.8851228952407837, "learning_rate": 0.0017537974683544304, "loss": 0.8605, "step": 42790 }, { "epoch": 12.313003452243958, "grad_norm": 1.1128894090652466, "learning_rate": 0.001753739930955121, "loss": 0.8164, "step": 42800 }, { "epoch": 12.315880322209436, "grad_norm": 1.5805246829986572, "learning_rate": 0.0017536823935558113, "loss": 0.9447, "step": 42810 }, { "epoch": 12.318757192174914, "grad_norm": 1.2413455247879028, "learning_rate": 0.0017536248561565019, "loss": 0.6617, "step": 42820 }, { "epoch": 12.321634062140392, "grad_norm": 1.2462499141693115, "learning_rate": 0.001753567318757192, "loss": 0.7971, "step": 42830 }, { "epoch": 12.324510932105868, "grad_norm": 1.7507872581481934, "learning_rate": 0.0017535097813578826, "loss": 0.932, "step": 42840 }, { "epoch": 12.327387802071346, "grad_norm": 2.032528877258301, "learning_rate": 0.0017534522439585731, "loss": 0.8291, "step": 42850 }, { "epoch": 12.330264672036824, "grad_norm": 2.108039379119873, "learning_rate": 0.0017533947065592635, "loss": 0.7307, "step": 42860 }, { "epoch": 12.333141542002302, "grad_norm": 2.291879415512085, "learning_rate": 0.001753337169159954, "loss": 1.1122, "step": 42870 }, { "epoch": 12.336018411967778, "grad_norm": 1.1180570125579834, "learning_rate": 0.0017532796317606446, "loss": 0.6762, "step": 42880 }, { "epoch": 12.338895281933256, "grad_norm": 1.3773285150527954, "learning_rate": 0.0017532220943613347, "loss": 0.6828, "step": 42890 }, { "epoch": 12.341772151898734, "grad_norm": 1.13467276096344, "learning_rate": 0.0017531645569620253, "loss": 0.9009, "step": 42900 }, { "epoch": 12.344649021864212, "grad_norm": 1.027612328529358, "learning_rate": 0.0017531070195627159, "loss": 0.6619, "step": 42910 }, { "epoch": 12.34752589182969, "grad_norm": 1.0823882818222046, "learning_rate": 0.0017530494821634062, "loss": 0.6263, "step": 42920 }, { "epoch": 12.350402761795166, "grad_norm": 1.1264712810516357, "learning_rate": 0.0017529919447640968, "loss": 0.8238, "step": 42930 }, { "epoch": 12.353279631760644, "grad_norm": 0.9077207446098328, "learning_rate": 0.0017529344073647871, "loss": 0.6636, "step": 42940 }, { "epoch": 12.356156501726122, "grad_norm": 1.0336575508117676, "learning_rate": 0.0017528768699654775, "loss": 0.5981, "step": 42950 }, { "epoch": 12.3590333716916, "grad_norm": 2.714002847671509, "learning_rate": 0.001752819332566168, "loss": 0.8509, "step": 42960 }, { "epoch": 12.361910241657077, "grad_norm": 1.237859845161438, "learning_rate": 0.0017527617951668584, "loss": 0.7515, "step": 42970 }, { "epoch": 12.364787111622555, "grad_norm": 1.0434694290161133, "learning_rate": 0.001752704257767549, "loss": 0.662, "step": 42980 }, { "epoch": 12.367663981588032, "grad_norm": 2.679060697555542, "learning_rate": 0.0017526467203682395, "loss": 0.7151, "step": 42990 }, { "epoch": 12.37054085155351, "grad_norm": 0.6998331546783447, "learning_rate": 0.0017525891829689299, "loss": 0.7284, "step": 43000 }, { "epoch": 12.373417721518987, "grad_norm": 2.060955047607422, "learning_rate": 0.0017525316455696202, "loss": 0.9686, "step": 43010 }, { "epoch": 12.376294591484465, "grad_norm": 0.9013199210166931, "learning_rate": 0.0017524741081703108, "loss": 0.7225, "step": 43020 }, { "epoch": 12.379171461449943, "grad_norm": 1.2504228353500366, "learning_rate": 0.0017524165707710011, "loss": 0.8241, "step": 43030 }, { "epoch": 12.38204833141542, "grad_norm": 1.3501406908035278, "learning_rate": 0.0017523590333716917, "loss": 0.8544, "step": 43040 }, { "epoch": 12.384925201380897, "grad_norm": 0.7153046131134033, "learning_rate": 0.001752301495972382, "loss": 0.6945, "step": 43050 }, { "epoch": 12.387802071346375, "grad_norm": 2.275803327560425, "learning_rate": 0.0017522439585730726, "loss": 0.8467, "step": 43060 }, { "epoch": 12.390678941311853, "grad_norm": 1.7434563636779785, "learning_rate": 0.001752186421173763, "loss": 0.8301, "step": 43070 }, { "epoch": 12.39355581127733, "grad_norm": 1.1224193572998047, "learning_rate": 0.0017521288837744533, "loss": 0.6847, "step": 43080 }, { "epoch": 12.396432681242807, "grad_norm": 0.9178779721260071, "learning_rate": 0.0017520713463751439, "loss": 0.7826, "step": 43090 }, { "epoch": 12.399309551208285, "grad_norm": 0.9152220487594604, "learning_rate": 0.0017520138089758344, "loss": 0.7995, "step": 43100 }, { "epoch": 12.402186421173763, "grad_norm": 0.7273720502853394, "learning_rate": 0.0017519562715765248, "loss": 0.7197, "step": 43110 }, { "epoch": 12.405063291139241, "grad_norm": 1.6136586666107178, "learning_rate": 0.0017518987341772153, "loss": 0.826, "step": 43120 }, { "epoch": 12.407940161104719, "grad_norm": 0.6892317533493042, "learning_rate": 0.0017518411967779057, "loss": 0.7586, "step": 43130 }, { "epoch": 12.410817031070195, "grad_norm": 1.5976498126983643, "learning_rate": 0.001751783659378596, "loss": 0.7525, "step": 43140 }, { "epoch": 12.413693901035673, "grad_norm": 1.3268470764160156, "learning_rate": 0.0017517261219792866, "loss": 0.676, "step": 43150 }, { "epoch": 12.416570771001151, "grad_norm": 1.4668183326721191, "learning_rate": 0.0017516685845799772, "loss": 0.8573, "step": 43160 }, { "epoch": 12.419447640966629, "grad_norm": 1.374508023262024, "learning_rate": 0.0017516110471806675, "loss": 0.9889, "step": 43170 }, { "epoch": 12.422324510932105, "grad_norm": 1.4350115060806274, "learning_rate": 0.001751553509781358, "loss": 0.8056, "step": 43180 }, { "epoch": 12.425201380897583, "grad_norm": 1.3926784992218018, "learning_rate": 0.0017514959723820482, "loss": 0.7349, "step": 43190 }, { "epoch": 12.428078250863061, "grad_norm": 1.4316504001617432, "learning_rate": 0.0017514384349827388, "loss": 0.8153, "step": 43200 }, { "epoch": 12.43095512082854, "grad_norm": 2.51757550239563, "learning_rate": 0.0017513808975834293, "loss": 0.7657, "step": 43210 }, { "epoch": 12.433831990794015, "grad_norm": 1.2229975461959839, "learning_rate": 0.0017513233601841197, "loss": 0.8611, "step": 43220 }, { "epoch": 12.436708860759493, "grad_norm": 1.540246605873108, "learning_rate": 0.0017512658227848102, "loss": 0.7342, "step": 43230 }, { "epoch": 12.439585730724971, "grad_norm": 1.5725774765014648, "learning_rate": 0.0017512082853855008, "loss": 1.0261, "step": 43240 }, { "epoch": 12.44246260069045, "grad_norm": 1.9935753345489502, "learning_rate": 0.001751150747986191, "loss": 1.0755, "step": 43250 }, { "epoch": 12.445339470655925, "grad_norm": 0.784519612789154, "learning_rate": 0.0017510932105868815, "loss": 0.6503, "step": 43260 }, { "epoch": 12.448216340621403, "grad_norm": 1.091731309890747, "learning_rate": 0.001751035673187572, "loss": 0.8087, "step": 43270 }, { "epoch": 12.451093210586881, "grad_norm": 0.9236723780632019, "learning_rate": 0.0017509781357882624, "loss": 0.8534, "step": 43280 }, { "epoch": 12.45397008055236, "grad_norm": 1.5137723684310913, "learning_rate": 0.001750920598388953, "loss": 0.727, "step": 43290 }, { "epoch": 12.456846950517837, "grad_norm": 1.5755972862243652, "learning_rate": 0.0017508630609896433, "loss": 0.6925, "step": 43300 }, { "epoch": 12.459723820483314, "grad_norm": 0.906502366065979, "learning_rate": 0.0017508055235903337, "loss": 0.6829, "step": 43310 }, { "epoch": 12.462600690448792, "grad_norm": 1.6540447473526, "learning_rate": 0.0017507479861910242, "loss": 0.9096, "step": 43320 }, { "epoch": 12.46547756041427, "grad_norm": 1.1860114336013794, "learning_rate": 0.0017506904487917146, "loss": 0.6321, "step": 43330 }, { "epoch": 12.468354430379748, "grad_norm": 0.8401625156402588, "learning_rate": 0.0017506329113924051, "loss": 0.5757, "step": 43340 }, { "epoch": 12.471231300345224, "grad_norm": 1.6269632577896118, "learning_rate": 0.0017505753739930957, "loss": 0.6822, "step": 43350 }, { "epoch": 12.474108170310702, "grad_norm": 1.4908493757247925, "learning_rate": 0.001750517836593786, "loss": 0.8863, "step": 43360 }, { "epoch": 12.47698504027618, "grad_norm": 1.8939093351364136, "learning_rate": 0.0017504602991944764, "loss": 0.7745, "step": 43370 }, { "epoch": 12.479861910241658, "grad_norm": 1.8095738887786865, "learning_rate": 0.001750402761795167, "loss": 0.769, "step": 43380 }, { "epoch": 12.482738780207134, "grad_norm": 1.753185749053955, "learning_rate": 0.0017503452243958573, "loss": 0.7732, "step": 43390 }, { "epoch": 12.485615650172612, "grad_norm": 2.053375482559204, "learning_rate": 0.0017502876869965479, "loss": 0.7982, "step": 43400 }, { "epoch": 12.48849252013809, "grad_norm": 2.195876121520996, "learning_rate": 0.0017502301495972382, "loss": 0.8794, "step": 43410 }, { "epoch": 12.491369390103568, "grad_norm": 1.0939173698425293, "learning_rate": 0.0017501726121979288, "loss": 0.7502, "step": 43420 }, { "epoch": 12.494246260069044, "grad_norm": 1.2176165580749512, "learning_rate": 0.0017501150747986191, "loss": 0.7661, "step": 43430 }, { "epoch": 12.497123130034522, "grad_norm": 1.0604801177978516, "learning_rate": 0.0017500575373993095, "loss": 0.7376, "step": 43440 }, { "epoch": 12.5, "grad_norm": 1.1676785945892334, "learning_rate": 0.00175, "loss": 0.6289, "step": 43450 }, { "epoch": 12.502876869965478, "grad_norm": 2.3047444820404053, "learning_rate": 0.0017499424626006906, "loss": 0.7164, "step": 43460 }, { "epoch": 12.505753739930956, "grad_norm": 2.0714287757873535, "learning_rate": 0.001749884925201381, "loss": 1.0223, "step": 43470 }, { "epoch": 12.508630609896432, "grad_norm": 1.344925880432129, "learning_rate": 0.0017498273878020713, "loss": 0.8869, "step": 43480 }, { "epoch": 12.51150747986191, "grad_norm": 0.8555569052696228, "learning_rate": 0.0017497698504027619, "loss": 0.8018, "step": 43490 }, { "epoch": 12.514384349827388, "grad_norm": 1.6818933486938477, "learning_rate": 0.0017497123130034522, "loss": 0.5054, "step": 43500 }, { "epoch": 12.517261219792866, "grad_norm": 2.561119318008423, "learning_rate": 0.0017496547756041428, "loss": 0.8964, "step": 43510 }, { "epoch": 12.520138089758342, "grad_norm": 1.032948613166809, "learning_rate": 0.0017495972382048331, "loss": 0.962, "step": 43520 }, { "epoch": 12.52301495972382, "grad_norm": 2.1544711589813232, "learning_rate": 0.0017495397008055237, "loss": 1.0087, "step": 43530 }, { "epoch": 12.525891829689298, "grad_norm": 0.9828790426254272, "learning_rate": 0.001749482163406214, "loss": 0.7553, "step": 43540 }, { "epoch": 12.528768699654776, "grad_norm": 1.1680046319961548, "learning_rate": 0.0017494246260069044, "loss": 0.7391, "step": 43550 }, { "epoch": 12.531645569620252, "grad_norm": 2.060163736343384, "learning_rate": 0.001749367088607595, "loss": 0.7932, "step": 43560 }, { "epoch": 12.53452243958573, "grad_norm": 1.1627967357635498, "learning_rate": 0.0017493095512082855, "loss": 0.9541, "step": 43570 }, { "epoch": 12.537399309551208, "grad_norm": 1.9107800722122192, "learning_rate": 0.0017492520138089758, "loss": 0.9414, "step": 43580 }, { "epoch": 12.540276179516686, "grad_norm": 0.9598865509033203, "learning_rate": 0.0017491944764096664, "loss": 0.8482, "step": 43590 }, { "epoch": 12.543153049482163, "grad_norm": 0.8275425434112549, "learning_rate": 0.0017491369390103568, "loss": 1.03, "step": 43600 }, { "epoch": 12.54602991944764, "grad_norm": 1.5795856714248657, "learning_rate": 0.001749079401611047, "loss": 0.9852, "step": 43610 }, { "epoch": 12.548906789413119, "grad_norm": 1.3434478044509888, "learning_rate": 0.0017490218642117377, "loss": 0.7538, "step": 43620 }, { "epoch": 12.551783659378597, "grad_norm": 1.6032252311706543, "learning_rate": 0.001748964326812428, "loss": 0.7352, "step": 43630 }, { "epoch": 12.554660529344073, "grad_norm": 1.2388066053390503, "learning_rate": 0.0017489067894131186, "loss": 0.8831, "step": 43640 }, { "epoch": 12.55753739930955, "grad_norm": 1.7674146890640259, "learning_rate": 0.0017488492520138091, "loss": 0.9174, "step": 43650 }, { "epoch": 12.560414269275029, "grad_norm": 1.5667005777359009, "learning_rate": 0.0017487917146144993, "loss": 0.5991, "step": 43660 }, { "epoch": 12.563291139240507, "grad_norm": 1.3770557641983032, "learning_rate": 0.0017487341772151898, "loss": 1.0163, "step": 43670 }, { "epoch": 12.566168009205985, "grad_norm": 1.3393890857696533, "learning_rate": 0.0017486766398158804, "loss": 0.8387, "step": 43680 }, { "epoch": 12.56904487917146, "grad_norm": 1.267083764076233, "learning_rate": 0.0017486191024165708, "loss": 0.6658, "step": 43690 }, { "epoch": 12.571921749136939, "grad_norm": 1.664942979812622, "learning_rate": 0.0017485615650172613, "loss": 0.7229, "step": 43700 }, { "epoch": 12.574798619102417, "grad_norm": 0.5615156292915344, "learning_rate": 0.0017485040276179519, "loss": 0.9479, "step": 43710 }, { "epoch": 12.577675489067895, "grad_norm": 1.4705928564071655, "learning_rate": 0.001748446490218642, "loss": 0.7544, "step": 43720 }, { "epoch": 12.580552359033371, "grad_norm": 1.174997091293335, "learning_rate": 0.0017483889528193326, "loss": 0.6576, "step": 43730 }, { "epoch": 12.583429228998849, "grad_norm": 0.953891396522522, "learning_rate": 0.001748331415420023, "loss": 0.8278, "step": 43740 }, { "epoch": 12.586306098964327, "grad_norm": 2.090341806411743, "learning_rate": 0.0017482738780207135, "loss": 0.9238, "step": 43750 }, { "epoch": 12.589182968929805, "grad_norm": 0.9811940789222717, "learning_rate": 0.001748216340621404, "loss": 0.6638, "step": 43760 }, { "epoch": 12.592059838895281, "grad_norm": 0.7722921371459961, "learning_rate": 0.0017481588032220944, "loss": 0.7549, "step": 43770 }, { "epoch": 12.594936708860759, "grad_norm": 1.2901767492294312, "learning_rate": 0.0017481012658227847, "loss": 0.9283, "step": 43780 }, { "epoch": 12.597813578826237, "grad_norm": 1.1363533735275269, "learning_rate": 0.0017480437284234753, "loss": 0.7971, "step": 43790 }, { "epoch": 12.600690448791715, "grad_norm": 2.5203020572662354, "learning_rate": 0.0017479861910241657, "loss": 0.8401, "step": 43800 }, { "epoch": 12.603567318757193, "grad_norm": 1.2084561586380005, "learning_rate": 0.0017479286536248562, "loss": 0.8007, "step": 43810 }, { "epoch": 12.60644418872267, "grad_norm": 2.283186674118042, "learning_rate": 0.0017478711162255468, "loss": 0.7623, "step": 43820 }, { "epoch": 12.609321058688147, "grad_norm": 1.718914270401001, "learning_rate": 0.0017478135788262371, "loss": 0.9382, "step": 43830 }, { "epoch": 12.612197928653625, "grad_norm": 1.5039304494857788, "learning_rate": 0.0017477560414269275, "loss": 0.7017, "step": 43840 }, { "epoch": 12.615074798619103, "grad_norm": 0.9611552357673645, "learning_rate": 0.001747698504027618, "loss": 0.7604, "step": 43850 }, { "epoch": 12.61795166858458, "grad_norm": 1.4940004348754883, "learning_rate": 0.0017476409666283084, "loss": 0.8109, "step": 43860 }, { "epoch": 12.620828538550057, "grad_norm": 1.2386915683746338, "learning_rate": 0.001747583429228999, "loss": 0.7288, "step": 43870 }, { "epoch": 12.623705408515535, "grad_norm": 1.320527195930481, "learning_rate": 0.0017475258918296893, "loss": 0.7978, "step": 43880 }, { "epoch": 12.626582278481013, "grad_norm": 1.5693050622940063, "learning_rate": 0.0017474683544303799, "loss": 0.8934, "step": 43890 }, { "epoch": 12.62945914844649, "grad_norm": 1.0431307554244995, "learning_rate": 0.0017474108170310702, "loss": 0.8092, "step": 43900 }, { "epoch": 12.632336018411968, "grad_norm": 1.0285470485687256, "learning_rate": 0.0017473532796317606, "loss": 0.8183, "step": 43910 }, { "epoch": 12.635212888377445, "grad_norm": 2.5297772884368896, "learning_rate": 0.0017472957422324511, "loss": 0.8843, "step": 43920 }, { "epoch": 12.638089758342923, "grad_norm": 1.5238748788833618, "learning_rate": 0.0017472382048331417, "loss": 0.6999, "step": 43930 }, { "epoch": 12.6409666283084, "grad_norm": 1.0552396774291992, "learning_rate": 0.001747180667433832, "loss": 0.8397, "step": 43940 }, { "epoch": 12.643843498273878, "grad_norm": 1.1644823551177979, "learning_rate": 0.0017471231300345226, "loss": 0.6401, "step": 43950 }, { "epoch": 12.646720368239356, "grad_norm": 3.1280624866485596, "learning_rate": 0.001747065592635213, "loss": 0.7731, "step": 43960 }, { "epoch": 12.649597238204834, "grad_norm": 1.0373257398605347, "learning_rate": 0.0017470080552359033, "loss": 0.7944, "step": 43970 }, { "epoch": 12.65247410817031, "grad_norm": 1.2697876691818237, "learning_rate": 0.0017469505178365939, "loss": 0.5957, "step": 43980 }, { "epoch": 12.655350978135788, "grad_norm": 0.9976789355278015, "learning_rate": 0.0017468929804372842, "loss": 0.7504, "step": 43990 }, { "epoch": 12.658227848101266, "grad_norm": 0.9513267278671265, "learning_rate": 0.0017468354430379748, "loss": 0.8727, "step": 44000 }, { "epoch": 12.661104718066744, "grad_norm": 1.455692172050476, "learning_rate": 0.0017467779056386653, "loss": 0.7969, "step": 44010 }, { "epoch": 12.663981588032222, "grad_norm": 2.1606409549713135, "learning_rate": 0.0017467203682393555, "loss": 0.7798, "step": 44020 }, { "epoch": 12.666858457997698, "grad_norm": 1.0554940700531006, "learning_rate": 0.001746662830840046, "loss": 0.685, "step": 44030 }, { "epoch": 12.669735327963176, "grad_norm": 1.0644145011901855, "learning_rate": 0.0017466052934407366, "loss": 0.7117, "step": 44040 }, { "epoch": 12.672612197928654, "grad_norm": 0.8769745826721191, "learning_rate": 0.001746547756041427, "loss": 0.7831, "step": 44050 }, { "epoch": 12.675489067894132, "grad_norm": 2.7077016830444336, "learning_rate": 0.0017464902186421175, "loss": 0.8695, "step": 44060 }, { "epoch": 12.678365937859608, "grad_norm": 1.1505099534988403, "learning_rate": 0.001746432681242808, "loss": 0.8995, "step": 44070 }, { "epoch": 12.681242807825086, "grad_norm": 3.7910079956054688, "learning_rate": 0.0017463751438434982, "loss": 1.0671, "step": 44080 }, { "epoch": 12.684119677790564, "grad_norm": 1.621142864227295, "learning_rate": 0.0017463176064441888, "loss": 0.8473, "step": 44090 }, { "epoch": 12.686996547756042, "grad_norm": 1.8744434118270874, "learning_rate": 0.001746260069044879, "loss": 0.8001, "step": 44100 }, { "epoch": 12.689873417721518, "grad_norm": 1.059545636177063, "learning_rate": 0.0017462025316455697, "loss": 0.8985, "step": 44110 }, { "epoch": 12.692750287686996, "grad_norm": 1.0438387393951416, "learning_rate": 0.0017461449942462602, "loss": 0.7194, "step": 44120 }, { "epoch": 12.695627157652474, "grad_norm": 1.5213667154312134, "learning_rate": 0.0017460874568469506, "loss": 0.7584, "step": 44130 }, { "epoch": 12.698504027617952, "grad_norm": 1.2955483198165894, "learning_rate": 0.001746029919447641, "loss": 0.7833, "step": 44140 }, { "epoch": 12.70138089758343, "grad_norm": 1.4411516189575195, "learning_rate": 0.0017459723820483315, "loss": 0.8158, "step": 44150 }, { "epoch": 12.704257767548906, "grad_norm": 0.6672265529632568, "learning_rate": 0.0017459148446490218, "loss": 0.7186, "step": 44160 }, { "epoch": 12.707134637514384, "grad_norm": 1.1615675687789917, "learning_rate": 0.0017458573072497124, "loss": 0.7159, "step": 44170 }, { "epoch": 12.710011507479862, "grad_norm": 1.4937081336975098, "learning_rate": 0.001745799769850403, "loss": 0.8081, "step": 44180 }, { "epoch": 12.71288837744534, "grad_norm": 1.5156211853027344, "learning_rate": 0.0017457422324510933, "loss": 0.7014, "step": 44190 }, { "epoch": 12.715765247410816, "grad_norm": 2.3009896278381348, "learning_rate": 0.0017456846950517837, "loss": 1.0253, "step": 44200 }, { "epoch": 12.718642117376294, "grad_norm": 0.8162270188331604, "learning_rate": 0.001745627157652474, "loss": 0.5403, "step": 44210 }, { "epoch": 12.721518987341772, "grad_norm": 1.5363694429397583, "learning_rate": 0.0017455696202531646, "loss": 0.9722, "step": 44220 }, { "epoch": 12.72439585730725, "grad_norm": 1.1986229419708252, "learning_rate": 0.0017455120828538551, "loss": 0.785, "step": 44230 }, { "epoch": 12.727272727272727, "grad_norm": 1.0726242065429688, "learning_rate": 0.0017454545454545455, "loss": 0.8575, "step": 44240 }, { "epoch": 12.730149597238205, "grad_norm": 1.0966213941574097, "learning_rate": 0.001745397008055236, "loss": 0.748, "step": 44250 }, { "epoch": 12.733026467203683, "grad_norm": 1.1478263139724731, "learning_rate": 0.0017453394706559264, "loss": 0.7441, "step": 44260 }, { "epoch": 12.73590333716916, "grad_norm": 1.7134467363357544, "learning_rate": 0.0017452819332566167, "loss": 0.6493, "step": 44270 }, { "epoch": 12.738780207134637, "grad_norm": 1.8187775611877441, "learning_rate": 0.0017452243958573073, "loss": 0.831, "step": 44280 }, { "epoch": 12.741657077100115, "grad_norm": 1.2157337665557861, "learning_rate": 0.0017451668584579979, "loss": 0.8418, "step": 44290 }, { "epoch": 12.744533947065593, "grad_norm": 0.8001445531845093, "learning_rate": 0.0017451093210586882, "loss": 0.7595, "step": 44300 }, { "epoch": 12.74741081703107, "grad_norm": 1.1620924472808838, "learning_rate": 0.0017450517836593786, "loss": 0.8259, "step": 44310 }, { "epoch": 12.750287686996547, "grad_norm": 1.0466516017913818, "learning_rate": 0.001744994246260069, "loss": 0.9255, "step": 44320 }, { "epoch": 12.753164556962025, "grad_norm": 1.1457602977752686, "learning_rate": 0.0017449367088607595, "loss": 0.8839, "step": 44330 }, { "epoch": 12.756041426927503, "grad_norm": 1.9992202520370483, "learning_rate": 0.00174487917146145, "loss": 0.8527, "step": 44340 }, { "epoch": 12.75891829689298, "grad_norm": 1.3299520015716553, "learning_rate": 0.0017448216340621404, "loss": 0.755, "step": 44350 }, { "epoch": 12.761795166858459, "grad_norm": 1.637757658958435, "learning_rate": 0.001744764096662831, "loss": 0.6994, "step": 44360 }, { "epoch": 12.764672036823935, "grad_norm": 1.2856264114379883, "learning_rate": 0.0017447065592635213, "loss": 0.664, "step": 44370 }, { "epoch": 12.767548906789413, "grad_norm": 2.000518798828125, "learning_rate": 0.0017446490218642116, "loss": 0.8121, "step": 44380 }, { "epoch": 12.770425776754891, "grad_norm": 1.8666775226593018, "learning_rate": 0.0017445914844649022, "loss": 0.9324, "step": 44390 }, { "epoch": 12.773302646720369, "grad_norm": 1.3842273950576782, "learning_rate": 0.0017445339470655928, "loss": 0.8422, "step": 44400 }, { "epoch": 12.776179516685845, "grad_norm": 2.5378670692443848, "learning_rate": 0.0017444764096662831, "loss": 0.7941, "step": 44410 }, { "epoch": 12.779056386651323, "grad_norm": 2.3648009300231934, "learning_rate": 0.0017444188722669737, "loss": 0.7702, "step": 44420 }, { "epoch": 12.781933256616801, "grad_norm": 1.1936362981796265, "learning_rate": 0.0017443613348676638, "loss": 0.7243, "step": 44430 }, { "epoch": 12.784810126582279, "grad_norm": 1.3046845197677612, "learning_rate": 0.0017443037974683544, "loss": 0.7606, "step": 44440 }, { "epoch": 12.787686996547755, "grad_norm": 2.1205861568450928, "learning_rate": 0.001744246260069045, "loss": 0.7034, "step": 44450 }, { "epoch": 12.790563866513233, "grad_norm": 1.787768006324768, "learning_rate": 0.0017441887226697353, "loss": 0.7533, "step": 44460 }, { "epoch": 12.793440736478711, "grad_norm": 0.7491835355758667, "learning_rate": 0.0017441311852704258, "loss": 0.89, "step": 44470 }, { "epoch": 12.79631760644419, "grad_norm": 2.093254804611206, "learning_rate": 0.0017440736478711164, "loss": 0.6627, "step": 44480 }, { "epoch": 12.799194476409665, "grad_norm": 1.3624591827392578, "learning_rate": 0.0017440161104718065, "loss": 0.6141, "step": 44490 }, { "epoch": 12.802071346375143, "grad_norm": 1.7373849153518677, "learning_rate": 0.001743958573072497, "loss": 0.9352, "step": 44500 }, { "epoch": 12.804948216340621, "grad_norm": 5.4668145179748535, "learning_rate": 0.0017439010356731877, "loss": 1.1593, "step": 44510 }, { "epoch": 12.8078250863061, "grad_norm": 1.4195493459701538, "learning_rate": 0.001743843498273878, "loss": 0.8795, "step": 44520 }, { "epoch": 12.810701956271576, "grad_norm": 0.6584358215332031, "learning_rate": 0.0017437859608745686, "loss": 0.6865, "step": 44530 }, { "epoch": 12.813578826237054, "grad_norm": 1.3905948400497437, "learning_rate": 0.0017437284234752591, "loss": 0.8962, "step": 44540 }, { "epoch": 12.816455696202532, "grad_norm": 2.1502089500427246, "learning_rate": 0.0017436708860759493, "loss": 0.8332, "step": 44550 }, { "epoch": 12.81933256616801, "grad_norm": 1.5526570081710815, "learning_rate": 0.0017436133486766398, "loss": 0.6726, "step": 44560 }, { "epoch": 12.822209436133488, "grad_norm": 1.8993794918060303, "learning_rate": 0.0017435558112773302, "loss": 0.8057, "step": 44570 }, { "epoch": 12.825086306098964, "grad_norm": 1.0211983919143677, "learning_rate": 0.0017434982738780207, "loss": 0.7913, "step": 44580 }, { "epoch": 12.827963176064442, "grad_norm": 1.3693209886550903, "learning_rate": 0.0017434407364787113, "loss": 0.7724, "step": 44590 }, { "epoch": 12.83084004602992, "grad_norm": 0.8831102252006531, "learning_rate": 0.0017433831990794017, "loss": 0.8681, "step": 44600 }, { "epoch": 12.833716915995398, "grad_norm": 1.7227704524993896, "learning_rate": 0.001743325661680092, "loss": 0.9456, "step": 44610 }, { "epoch": 12.836593785960874, "grad_norm": 0.9328121542930603, "learning_rate": 0.0017432681242807826, "loss": 0.6811, "step": 44620 }, { "epoch": 12.839470655926352, "grad_norm": 0.832766592502594, "learning_rate": 0.001743210586881473, "loss": 0.6612, "step": 44630 }, { "epoch": 12.84234752589183, "grad_norm": 1.4948060512542725, "learning_rate": 0.0017431530494821635, "loss": 0.9028, "step": 44640 }, { "epoch": 12.845224395857308, "grad_norm": 1.3035284280776978, "learning_rate": 0.001743095512082854, "loss": 0.6411, "step": 44650 }, { "epoch": 12.848101265822784, "grad_norm": 1.7431743144989014, "learning_rate": 0.0017430379746835444, "loss": 0.8091, "step": 44660 }, { "epoch": 12.850978135788262, "grad_norm": 1.1083344221115112, "learning_rate": 0.0017429804372842347, "loss": 0.811, "step": 44670 }, { "epoch": 12.85385500575374, "grad_norm": 1.282668113708496, "learning_rate": 0.001742922899884925, "loss": 0.8018, "step": 44680 }, { "epoch": 12.856731875719218, "grad_norm": 0.8087635636329651, "learning_rate": 0.0017428653624856157, "loss": 0.7311, "step": 44690 }, { "epoch": 12.859608745684696, "grad_norm": 1.0106515884399414, "learning_rate": 0.0017428078250863062, "loss": 0.782, "step": 44700 }, { "epoch": 12.862485615650172, "grad_norm": 1.9708936214447021, "learning_rate": 0.0017427502876869966, "loss": 1.0697, "step": 44710 }, { "epoch": 12.86536248561565, "grad_norm": 1.6275010108947754, "learning_rate": 0.0017426927502876871, "loss": 0.9571, "step": 44720 }, { "epoch": 12.868239355581128, "grad_norm": 0.9460673332214355, "learning_rate": 0.0017426352128883775, "loss": 0.6896, "step": 44730 }, { "epoch": 12.871116225546606, "grad_norm": 1.422250747680664, "learning_rate": 0.0017425776754890678, "loss": 0.8507, "step": 44740 }, { "epoch": 12.873993095512082, "grad_norm": 4.302846908569336, "learning_rate": 0.0017425201380897584, "loss": 1.0194, "step": 44750 }, { "epoch": 12.87686996547756, "grad_norm": 1.6356905698776245, "learning_rate": 0.001742462600690449, "loss": 0.9871, "step": 44760 }, { "epoch": 12.879746835443038, "grad_norm": 0.9053263068199158, "learning_rate": 0.0017424050632911393, "loss": 0.7033, "step": 44770 }, { "epoch": 12.882623705408516, "grad_norm": 0.8713847994804382, "learning_rate": 0.0017423475258918299, "loss": 0.648, "step": 44780 }, { "epoch": 12.885500575373992, "grad_norm": 1.1895250082015991, "learning_rate": 0.00174228998849252, "loss": 0.5231, "step": 44790 }, { "epoch": 12.88837744533947, "grad_norm": 0.9540327787399292, "learning_rate": 0.0017422324510932106, "loss": 0.7763, "step": 44800 }, { "epoch": 12.891254315304948, "grad_norm": 1.2760534286499023, "learning_rate": 0.0017421749136939011, "loss": 0.9419, "step": 44810 }, { "epoch": 12.894131185270426, "grad_norm": 0.7779159545898438, "learning_rate": 0.0017421173762945915, "loss": 0.922, "step": 44820 }, { "epoch": 12.897008055235903, "grad_norm": 1.5741480588912964, "learning_rate": 0.001742059838895282, "loss": 0.7837, "step": 44830 }, { "epoch": 12.89988492520138, "grad_norm": 1.0145633220672607, "learning_rate": 0.0017420023014959726, "loss": 0.7864, "step": 44840 }, { "epoch": 12.902761795166859, "grad_norm": 1.4484657049179077, "learning_rate": 0.0017419447640966627, "loss": 0.8657, "step": 44850 }, { "epoch": 12.905638665132336, "grad_norm": 1.4885896444320679, "learning_rate": 0.0017418872266973533, "loss": 0.7649, "step": 44860 }, { "epoch": 12.908515535097813, "grad_norm": 1.3765562772750854, "learning_rate": 0.0017418296892980439, "loss": 0.7142, "step": 44870 }, { "epoch": 12.91139240506329, "grad_norm": 1.5561375617980957, "learning_rate": 0.0017417721518987342, "loss": 0.7845, "step": 44880 }, { "epoch": 12.914269275028769, "grad_norm": 1.0890090465545654, "learning_rate": 0.0017417146144994248, "loss": 0.7847, "step": 44890 }, { "epoch": 12.917146144994247, "grad_norm": 1.1521459817886353, "learning_rate": 0.0017416570771001151, "loss": 0.726, "step": 44900 }, { "epoch": 12.920023014959725, "grad_norm": 1.7952691316604614, "learning_rate": 0.0017415995397008055, "loss": 0.6617, "step": 44910 }, { "epoch": 12.9228998849252, "grad_norm": 1.540885090827942, "learning_rate": 0.001741542002301496, "loss": 0.7254, "step": 44920 }, { "epoch": 12.925776754890679, "grad_norm": 0.68072110414505, "learning_rate": 0.0017414844649021864, "loss": 0.6528, "step": 44930 }, { "epoch": 12.928653624856157, "grad_norm": 1.1117802858352661, "learning_rate": 0.001741426927502877, "loss": 0.8652, "step": 44940 }, { "epoch": 12.931530494821635, "grad_norm": 2.326361894607544, "learning_rate": 0.0017413693901035675, "loss": 0.8169, "step": 44950 }, { "epoch": 12.934407364787111, "grad_norm": 1.1005903482437134, "learning_rate": 0.0017413118527042578, "loss": 0.8919, "step": 44960 }, { "epoch": 12.937284234752589, "grad_norm": 0.8150712251663208, "learning_rate": 0.0017412543153049482, "loss": 0.6803, "step": 44970 }, { "epoch": 12.940161104718067, "grad_norm": 0.8228994607925415, "learning_rate": 0.0017411967779056388, "loss": 0.6496, "step": 44980 }, { "epoch": 12.943037974683545, "grad_norm": 1.4033600091934204, "learning_rate": 0.001741139240506329, "loss": 0.7831, "step": 44990 }, { "epoch": 12.945914844649021, "grad_norm": 0.9331263303756714, "learning_rate": 0.0017410817031070197, "loss": 0.8853, "step": 45000 }, { "epoch": 12.948791714614499, "grad_norm": 0.70811527967453, "learning_rate": 0.00174102416570771, "loss": 0.7997, "step": 45010 }, { "epoch": 12.951668584579977, "grad_norm": 1.9911108016967773, "learning_rate": 0.0017409666283084006, "loss": 0.8116, "step": 45020 }, { "epoch": 12.954545454545455, "grad_norm": 1.2241889238357544, "learning_rate": 0.001740909090909091, "loss": 0.7679, "step": 45030 }, { "epoch": 12.957422324510933, "grad_norm": 1.534496545791626, "learning_rate": 0.0017408515535097813, "loss": 0.8165, "step": 45040 }, { "epoch": 12.96029919447641, "grad_norm": 1.2519959211349487, "learning_rate": 0.0017407940161104718, "loss": 0.7769, "step": 45050 }, { "epoch": 12.963176064441887, "grad_norm": 2.0412631034851074, "learning_rate": 0.0017407364787111624, "loss": 0.7904, "step": 45060 }, { "epoch": 12.966052934407365, "grad_norm": 0.9263031482696533, "learning_rate": 0.0017406789413118527, "loss": 0.9201, "step": 45070 }, { "epoch": 12.968929804372843, "grad_norm": 1.1513253450393677, "learning_rate": 0.0017406214039125433, "loss": 0.7703, "step": 45080 }, { "epoch": 12.97180667433832, "grad_norm": 0.9191755652427673, "learning_rate": 0.0017405638665132337, "loss": 0.9037, "step": 45090 }, { "epoch": 12.974683544303797, "grad_norm": 1.149014949798584, "learning_rate": 0.001740506329113924, "loss": 0.943, "step": 45100 }, { "epoch": 12.977560414269275, "grad_norm": 1.8298280239105225, "learning_rate": 0.0017404487917146146, "loss": 0.6464, "step": 45110 }, { "epoch": 12.980437284234753, "grad_norm": 1.0675158500671387, "learning_rate": 0.0017403912543153051, "loss": 0.8361, "step": 45120 }, { "epoch": 12.98331415420023, "grad_norm": 1.144073724746704, "learning_rate": 0.0017403337169159955, "loss": 1.0851, "step": 45130 }, { "epoch": 12.986191024165707, "grad_norm": 2.2315595149993896, "learning_rate": 0.0017402761795166858, "loss": 0.8991, "step": 45140 }, { "epoch": 12.989067894131185, "grad_norm": 1.242087483406067, "learning_rate": 0.0017402186421173762, "loss": 0.7199, "step": 45150 }, { "epoch": 12.991944764096663, "grad_norm": 1.630510926246643, "learning_rate": 0.0017401611047180667, "loss": 0.9154, "step": 45160 }, { "epoch": 12.99482163406214, "grad_norm": 1.1022144556045532, "learning_rate": 0.0017401035673187573, "loss": 0.7141, "step": 45170 }, { "epoch": 12.997698504027618, "grad_norm": 1.4306796789169312, "learning_rate": 0.0017400460299194476, "loss": 0.7765, "step": 45180 }, { "epoch": 13.000575373993096, "grad_norm": 2.0044915676116943, "learning_rate": 0.0017399884925201382, "loss": 0.6914, "step": 45190 }, { "epoch": 13.003452243958574, "grad_norm": 1.052503228187561, "learning_rate": 0.0017399309551208286, "loss": 0.676, "step": 45200 }, { "epoch": 13.00632911392405, "grad_norm": 0.9747355580329895, "learning_rate": 0.001739873417721519, "loss": 0.7551, "step": 45210 }, { "epoch": 13.009205983889528, "grad_norm": 1.7343370914459229, "learning_rate": 0.0017398158803222095, "loss": 0.7984, "step": 45220 }, { "epoch": 13.012082853855006, "grad_norm": 1.0556703805923462, "learning_rate": 0.0017397583429229, "loss": 0.5952, "step": 45230 }, { "epoch": 13.014959723820484, "grad_norm": 1.583356261253357, "learning_rate": 0.0017397008055235904, "loss": 0.8137, "step": 45240 }, { "epoch": 13.017836593785962, "grad_norm": 1.0064170360565186, "learning_rate": 0.001739643268124281, "loss": 0.6644, "step": 45250 }, { "epoch": 13.020713463751438, "grad_norm": 1.4252557754516602, "learning_rate": 0.001739585730724971, "loss": 0.5953, "step": 45260 }, { "epoch": 13.023590333716916, "grad_norm": 2.138573408126831, "learning_rate": 0.0017395281933256616, "loss": 0.5794, "step": 45270 }, { "epoch": 13.026467203682394, "grad_norm": 0.788179337978363, "learning_rate": 0.0017394706559263522, "loss": 0.6384, "step": 45280 }, { "epoch": 13.029344073647872, "grad_norm": 1.3628863096237183, "learning_rate": 0.0017394131185270425, "loss": 0.6303, "step": 45290 }, { "epoch": 13.032220943613348, "grad_norm": 0.8447245359420776, "learning_rate": 0.0017393555811277331, "loss": 0.7068, "step": 45300 }, { "epoch": 13.035097813578826, "grad_norm": 1.5964614152908325, "learning_rate": 0.0017392980437284237, "loss": 0.8082, "step": 45310 }, { "epoch": 13.037974683544304, "grad_norm": 1.3603016138076782, "learning_rate": 0.0017392405063291138, "loss": 0.7282, "step": 45320 }, { "epoch": 13.040851553509782, "grad_norm": 1.6112397909164429, "learning_rate": 0.0017391829689298044, "loss": 0.6312, "step": 45330 }, { "epoch": 13.043728423475258, "grad_norm": 1.1632332801818848, "learning_rate": 0.001739125431530495, "loss": 0.6969, "step": 45340 }, { "epoch": 13.046605293440736, "grad_norm": 1.617533802986145, "learning_rate": 0.0017390678941311853, "loss": 0.7535, "step": 45350 }, { "epoch": 13.049482163406214, "grad_norm": 1.8187838792800903, "learning_rate": 0.0017390103567318758, "loss": 0.5999, "step": 45360 }, { "epoch": 13.052359033371692, "grad_norm": 1.9877591133117676, "learning_rate": 0.0017389528193325662, "loss": 0.8591, "step": 45370 }, { "epoch": 13.055235903337168, "grad_norm": 1.1162465810775757, "learning_rate": 0.0017388952819332565, "loss": 0.7442, "step": 45380 }, { "epoch": 13.058112773302646, "grad_norm": 1.7598623037338257, "learning_rate": 0.001738837744533947, "loss": 0.7585, "step": 45390 }, { "epoch": 13.060989643268124, "grad_norm": 0.9380955100059509, "learning_rate": 0.0017387802071346375, "loss": 0.7806, "step": 45400 }, { "epoch": 13.063866513233602, "grad_norm": 1.638680338859558, "learning_rate": 0.001738722669735328, "loss": 0.9614, "step": 45410 }, { "epoch": 13.06674338319908, "grad_norm": 1.7861448526382446, "learning_rate": 0.0017386651323360186, "loss": 0.8671, "step": 45420 }, { "epoch": 13.069620253164556, "grad_norm": 1.4460607767105103, "learning_rate": 0.001738607594936709, "loss": 1.0255, "step": 45430 }, { "epoch": 13.072497123130034, "grad_norm": 1.0384584665298462, "learning_rate": 0.0017385500575373993, "loss": 0.6553, "step": 45440 }, { "epoch": 13.075373993095512, "grad_norm": 1.43793785572052, "learning_rate": 0.0017384925201380898, "loss": 0.8381, "step": 45450 }, { "epoch": 13.07825086306099, "grad_norm": 0.7522374987602234, "learning_rate": 0.0017384349827387802, "loss": 0.6962, "step": 45460 }, { "epoch": 13.081127733026467, "grad_norm": 1.8524552583694458, "learning_rate": 0.0017383774453394707, "loss": 0.8844, "step": 45470 }, { "epoch": 13.084004602991945, "grad_norm": 1.1824777126312256, "learning_rate": 0.001738319907940161, "loss": 0.5953, "step": 45480 }, { "epoch": 13.086881472957423, "grad_norm": 1.2335251569747925, "learning_rate": 0.0017382623705408517, "loss": 0.6843, "step": 45490 }, { "epoch": 13.0897583429229, "grad_norm": 1.6734987497329712, "learning_rate": 0.001738204833141542, "loss": 0.7138, "step": 45500 }, { "epoch": 13.092635212888377, "grad_norm": 1.2665282487869263, "learning_rate": 0.0017381472957422324, "loss": 0.6489, "step": 45510 }, { "epoch": 13.095512082853855, "grad_norm": 0.8008357882499695, "learning_rate": 0.001738089758342923, "loss": 0.67, "step": 45520 }, { "epoch": 13.098388952819333, "grad_norm": 2.5451953411102295, "learning_rate": 0.0017380322209436135, "loss": 0.8693, "step": 45530 }, { "epoch": 13.10126582278481, "grad_norm": 1.5098559856414795, "learning_rate": 0.0017379746835443038, "loss": 0.7117, "step": 45540 }, { "epoch": 13.104142692750287, "grad_norm": 1.9513239860534668, "learning_rate": 0.0017379171461449944, "loss": 0.7835, "step": 45550 }, { "epoch": 13.107019562715765, "grad_norm": 1.9778774976730347, "learning_rate": 0.0017378596087456847, "loss": 0.7619, "step": 45560 }, { "epoch": 13.109896432681243, "grad_norm": 1.6669962406158447, "learning_rate": 0.001737802071346375, "loss": 0.7141, "step": 45570 }, { "epoch": 13.11277330264672, "grad_norm": 1.042809247970581, "learning_rate": 0.0017377445339470656, "loss": 0.5666, "step": 45580 }, { "epoch": 13.115650172612199, "grad_norm": 1.3175891637802124, "learning_rate": 0.001737686996547756, "loss": 0.6709, "step": 45590 }, { "epoch": 13.118527042577675, "grad_norm": 1.2283635139465332, "learning_rate": 0.0017376294591484466, "loss": 0.8018, "step": 45600 }, { "epoch": 13.121403912543153, "grad_norm": 1.4526540040969849, "learning_rate": 0.0017375719217491371, "loss": 0.8412, "step": 45610 }, { "epoch": 13.124280782508631, "grad_norm": 1.6551202535629272, "learning_rate": 0.0017375143843498273, "loss": 0.8224, "step": 45620 }, { "epoch": 13.127157652474109, "grad_norm": 1.4198780059814453, "learning_rate": 0.0017374568469505178, "loss": 0.6558, "step": 45630 }, { "epoch": 13.130034522439585, "grad_norm": 0.9201130270957947, "learning_rate": 0.0017373993095512084, "loss": 0.8003, "step": 45640 }, { "epoch": 13.132911392405063, "grad_norm": 0.7981874942779541, "learning_rate": 0.0017373417721518987, "loss": 0.9028, "step": 45650 }, { "epoch": 13.135788262370541, "grad_norm": 1.2535974979400635, "learning_rate": 0.0017372842347525893, "loss": 0.7591, "step": 45660 }, { "epoch": 13.138665132336019, "grad_norm": 0.9902228713035583, "learning_rate": 0.0017372266973532799, "loss": 0.5922, "step": 45670 }, { "epoch": 13.141542002301495, "grad_norm": 1.537952184677124, "learning_rate": 0.00173716915995397, "loss": 0.5945, "step": 45680 }, { "epoch": 13.144418872266973, "grad_norm": 1.1361758708953857, "learning_rate": 0.0017371116225546606, "loss": 0.8191, "step": 45690 }, { "epoch": 13.147295742232451, "grad_norm": 1.3602701425552368, "learning_rate": 0.001737054085155351, "loss": 0.8663, "step": 45700 }, { "epoch": 13.15017261219793, "grad_norm": 0.9793403744697571, "learning_rate": 0.0017369965477560415, "loss": 0.6172, "step": 45710 }, { "epoch": 13.153049482163405, "grad_norm": 2.5433545112609863, "learning_rate": 0.001736939010356732, "loss": 0.8408, "step": 45720 }, { "epoch": 13.155926352128883, "grad_norm": 1.3612146377563477, "learning_rate": 0.0017368814729574224, "loss": 0.7033, "step": 45730 }, { "epoch": 13.158803222094361, "grad_norm": 2.3726513385772705, "learning_rate": 0.0017368239355581127, "loss": 0.708, "step": 45740 }, { "epoch": 13.16168009205984, "grad_norm": 1.6891446113586426, "learning_rate": 0.0017367663981588033, "loss": 0.7894, "step": 45750 }, { "epoch": 13.164556962025316, "grad_norm": 4.924036979675293, "learning_rate": 0.0017367088607594936, "loss": 0.7508, "step": 45760 }, { "epoch": 13.167433831990794, "grad_norm": 1.2757351398468018, "learning_rate": 0.0017366513233601842, "loss": 0.9009, "step": 45770 }, { "epoch": 13.170310701956272, "grad_norm": 1.560318112373352, "learning_rate": 0.0017365937859608748, "loss": 0.9472, "step": 45780 }, { "epoch": 13.17318757192175, "grad_norm": 1.5186911821365356, "learning_rate": 0.001736536248561565, "loss": 0.7884, "step": 45790 }, { "epoch": 13.176064441887227, "grad_norm": 1.3507509231567383, "learning_rate": 0.0017364787111622555, "loss": 0.8317, "step": 45800 }, { "epoch": 13.178941311852704, "grad_norm": 1.209346890449524, "learning_rate": 0.001736421173762946, "loss": 0.7025, "step": 45810 }, { "epoch": 13.181818181818182, "grad_norm": 0.9537495970726013, "learning_rate": 0.0017363636363636364, "loss": 0.7218, "step": 45820 }, { "epoch": 13.18469505178366, "grad_norm": 1.8701928853988647, "learning_rate": 0.001736306098964327, "loss": 0.7561, "step": 45830 }, { "epoch": 13.187571921749138, "grad_norm": 2.198683977127075, "learning_rate": 0.0017362485615650173, "loss": 0.8301, "step": 45840 }, { "epoch": 13.190448791714614, "grad_norm": 0.7597553133964539, "learning_rate": 0.0017361910241657078, "loss": 0.6144, "step": 45850 }, { "epoch": 13.193325661680092, "grad_norm": 1.0120275020599365, "learning_rate": 0.0017361334867663982, "loss": 0.6943, "step": 45860 }, { "epoch": 13.19620253164557, "grad_norm": 1.7257471084594727, "learning_rate": 0.0017360759493670885, "loss": 0.8543, "step": 45870 }, { "epoch": 13.199079401611048, "grad_norm": 1.1727482080459595, "learning_rate": 0.001736018411967779, "loss": 0.8295, "step": 45880 }, { "epoch": 13.201956271576524, "grad_norm": 1.0026530027389526, "learning_rate": 0.0017359608745684697, "loss": 0.6474, "step": 45890 }, { "epoch": 13.204833141542002, "grad_norm": 1.2828084230422974, "learning_rate": 0.00173590333716916, "loss": 0.8052, "step": 45900 }, { "epoch": 13.20771001150748, "grad_norm": 1.2175346612930298, "learning_rate": 0.0017358457997698506, "loss": 0.8493, "step": 45910 }, { "epoch": 13.210586881472958, "grad_norm": 1.1134202480316162, "learning_rate": 0.001735788262370541, "loss": 1.0333, "step": 45920 }, { "epoch": 13.213463751438436, "grad_norm": 1.1992902755737305, "learning_rate": 0.0017357307249712313, "loss": 0.6791, "step": 45930 }, { "epoch": 13.216340621403912, "grad_norm": 2.1549158096313477, "learning_rate": 0.0017356731875719218, "loss": 0.7567, "step": 45940 }, { "epoch": 13.21921749136939, "grad_norm": 1.2331916093826294, "learning_rate": 0.0017356156501726122, "loss": 0.7357, "step": 45950 }, { "epoch": 13.222094361334868, "grad_norm": 1.167647123336792, "learning_rate": 0.0017355581127733027, "loss": 0.6145, "step": 45960 }, { "epoch": 13.224971231300346, "grad_norm": 1.0421466827392578, "learning_rate": 0.001735500575373993, "loss": 0.7533, "step": 45970 }, { "epoch": 13.227848101265822, "grad_norm": 1.0180147886276245, "learning_rate": 0.0017354430379746834, "loss": 0.7399, "step": 45980 }, { "epoch": 13.2307249712313, "grad_norm": 0.9432151317596436, "learning_rate": 0.001735385500575374, "loss": 0.7763, "step": 45990 }, { "epoch": 13.233601841196778, "grad_norm": 1.303186297416687, "learning_rate": 0.0017353279631760646, "loss": 0.7163, "step": 46000 }, { "epoch": 13.236478711162256, "grad_norm": 1.2197743654251099, "learning_rate": 0.001735270425776755, "loss": 0.7502, "step": 46010 }, { "epoch": 13.239355581127732, "grad_norm": 1.437817931175232, "learning_rate": 0.0017352128883774455, "loss": 0.6742, "step": 46020 }, { "epoch": 13.24223245109321, "grad_norm": 1.4946049451828003, "learning_rate": 0.0017351553509781358, "loss": 0.672, "step": 46030 }, { "epoch": 13.245109321058688, "grad_norm": 0.6574632525444031, "learning_rate": 0.0017350978135788262, "loss": 0.7931, "step": 46040 }, { "epoch": 13.247986191024166, "grad_norm": 0.8423603177070618, "learning_rate": 0.0017350402761795167, "loss": 0.6928, "step": 46050 }, { "epoch": 13.250863060989643, "grad_norm": 1.0090397596359253, "learning_rate": 0.001734982738780207, "loss": 0.7819, "step": 46060 }, { "epoch": 13.25373993095512, "grad_norm": 1.2798774242401123, "learning_rate": 0.0017349252013808976, "loss": 0.7646, "step": 46070 }, { "epoch": 13.256616800920598, "grad_norm": 1.1648184061050415, "learning_rate": 0.0017348676639815882, "loss": 0.6307, "step": 46080 }, { "epoch": 13.259493670886076, "grad_norm": 1.8632562160491943, "learning_rate": 0.0017348101265822783, "loss": 0.7076, "step": 46090 }, { "epoch": 13.262370540851553, "grad_norm": 2.3406288623809814, "learning_rate": 0.001734752589182969, "loss": 0.8173, "step": 46100 }, { "epoch": 13.26524741081703, "grad_norm": 1.1726360321044922, "learning_rate": 0.0017346950517836595, "loss": 0.7936, "step": 46110 }, { "epoch": 13.268124280782509, "grad_norm": 1.492969036102295, "learning_rate": 0.0017346375143843498, "loss": 0.7296, "step": 46120 }, { "epoch": 13.271001150747987, "grad_norm": 1.0901532173156738, "learning_rate": 0.0017345799769850404, "loss": 0.6924, "step": 46130 }, { "epoch": 13.273878020713465, "grad_norm": 0.9952259063720703, "learning_rate": 0.001734522439585731, "loss": 0.7807, "step": 46140 }, { "epoch": 13.27675489067894, "grad_norm": 1.5966458320617676, "learning_rate": 0.001734464902186421, "loss": 0.7478, "step": 46150 }, { "epoch": 13.279631760644419, "grad_norm": 2.235780715942383, "learning_rate": 0.0017344073647871116, "loss": 0.8784, "step": 46160 }, { "epoch": 13.282508630609897, "grad_norm": 1.173316240310669, "learning_rate": 0.001734349827387802, "loss": 0.7719, "step": 46170 }, { "epoch": 13.285385500575375, "grad_norm": 1.0658448934555054, "learning_rate": 0.0017342922899884925, "loss": 0.6394, "step": 46180 }, { "epoch": 13.288262370540851, "grad_norm": 1.5672379732131958, "learning_rate": 0.0017342347525891831, "loss": 0.6633, "step": 46190 }, { "epoch": 13.291139240506329, "grad_norm": 0.8477551937103271, "learning_rate": 0.0017341772151898735, "loss": 1.0136, "step": 46200 }, { "epoch": 13.294016110471807, "grad_norm": 1.3070261478424072, "learning_rate": 0.0017341196777905638, "loss": 0.7735, "step": 46210 }, { "epoch": 13.296892980437285, "grad_norm": 1.2323068380355835, "learning_rate": 0.0017340621403912544, "loss": 0.839, "step": 46220 }, { "epoch": 13.299769850402761, "grad_norm": 1.455566167831421, "learning_rate": 0.0017340046029919447, "loss": 0.7419, "step": 46230 }, { "epoch": 13.302646720368239, "grad_norm": 1.5488605499267578, "learning_rate": 0.0017339470655926353, "loss": 0.7343, "step": 46240 }, { "epoch": 13.305523590333717, "grad_norm": 0.7097749710083008, "learning_rate": 0.0017338895281933258, "loss": 0.7112, "step": 46250 }, { "epoch": 13.308400460299195, "grad_norm": 0.9847738742828369, "learning_rate": 0.0017338319907940162, "loss": 0.6772, "step": 46260 }, { "epoch": 13.311277330264671, "grad_norm": 1.126173496246338, "learning_rate": 0.0017337744533947065, "loss": 0.6545, "step": 46270 }, { "epoch": 13.31415420023015, "grad_norm": 1.3944990634918213, "learning_rate": 0.0017337169159953969, "loss": 0.9076, "step": 46280 }, { "epoch": 13.317031070195627, "grad_norm": 1.0629899501800537, "learning_rate": 0.0017336593785960874, "loss": 0.7675, "step": 46290 }, { "epoch": 13.319907940161105, "grad_norm": 1.7192130088806152, "learning_rate": 0.001733601841196778, "loss": 0.8997, "step": 46300 }, { "epoch": 13.322784810126583, "grad_norm": 0.8722088932991028, "learning_rate": 0.0017335443037974684, "loss": 0.9859, "step": 46310 }, { "epoch": 13.32566168009206, "grad_norm": 1.0133157968521118, "learning_rate": 0.001733486766398159, "loss": 0.6531, "step": 46320 }, { "epoch": 13.328538550057537, "grad_norm": 0.6192327737808228, "learning_rate": 0.0017334292289988493, "loss": 0.5952, "step": 46330 }, { "epoch": 13.331415420023015, "grad_norm": 1.518522024154663, "learning_rate": 0.0017333716915995396, "loss": 0.8655, "step": 46340 }, { "epoch": 13.334292289988493, "grad_norm": 1.5382205247879028, "learning_rate": 0.0017333141542002302, "loss": 0.8299, "step": 46350 }, { "epoch": 13.33716915995397, "grad_norm": 1.6781316995620728, "learning_rate": 0.0017332566168009207, "loss": 0.5918, "step": 46360 }, { "epoch": 13.340046029919447, "grad_norm": 1.0947550535202026, "learning_rate": 0.001733199079401611, "loss": 0.6993, "step": 46370 }, { "epoch": 13.342922899884925, "grad_norm": 2.6899049282073975, "learning_rate": 0.0017331415420023017, "loss": 1.0354, "step": 46380 }, { "epoch": 13.345799769850403, "grad_norm": 0.8134579658508301, "learning_rate": 0.0017330840046029918, "loss": 0.7538, "step": 46390 }, { "epoch": 13.34867663981588, "grad_norm": 1.7441537380218506, "learning_rate": 0.0017330264672036823, "loss": 0.8752, "step": 46400 }, { "epoch": 13.351553509781358, "grad_norm": 0.9822483658790588, "learning_rate": 0.001732968929804373, "loss": 0.8155, "step": 46410 }, { "epoch": 13.354430379746836, "grad_norm": 2.208508014678955, "learning_rate": 0.0017329113924050633, "loss": 0.9268, "step": 46420 }, { "epoch": 13.357307249712314, "grad_norm": 1.8871276378631592, "learning_rate": 0.0017328538550057538, "loss": 0.6499, "step": 46430 }, { "epoch": 13.36018411967779, "grad_norm": 3.075906991958618, "learning_rate": 0.0017327963176064444, "loss": 0.8918, "step": 46440 }, { "epoch": 13.363060989643268, "grad_norm": 0.988713264465332, "learning_rate": 0.0017327387802071345, "loss": 0.7813, "step": 46450 }, { "epoch": 13.365937859608746, "grad_norm": 1.9220221042633057, "learning_rate": 0.001732681242807825, "loss": 0.6614, "step": 46460 }, { "epoch": 13.368814729574224, "grad_norm": 1.1687595844268799, "learning_rate": 0.0017326237054085156, "loss": 0.6936, "step": 46470 }, { "epoch": 13.371691599539702, "grad_norm": 1.3443013429641724, "learning_rate": 0.001732566168009206, "loss": 0.9514, "step": 46480 }, { "epoch": 13.374568469505178, "grad_norm": 0.896616518497467, "learning_rate": 0.0017325086306098966, "loss": 0.7738, "step": 46490 }, { "epoch": 13.377445339470656, "grad_norm": 1.7803441286087036, "learning_rate": 0.0017324510932105871, "loss": 0.7872, "step": 46500 }, { "epoch": 13.380322209436134, "grad_norm": 0.8565574884414673, "learning_rate": 0.0017323935558112773, "loss": 0.9171, "step": 46510 }, { "epoch": 13.383199079401612, "grad_norm": 1.5642523765563965, "learning_rate": 0.0017323360184119678, "loss": 0.7805, "step": 46520 }, { "epoch": 13.386075949367088, "grad_norm": 1.7797540426254272, "learning_rate": 0.0017322784810126582, "loss": 0.6379, "step": 46530 }, { "epoch": 13.388952819332566, "grad_norm": 1.2512696981430054, "learning_rate": 0.0017322209436133487, "loss": 0.7136, "step": 46540 }, { "epoch": 13.391829689298044, "grad_norm": 1.5537168979644775, "learning_rate": 0.0017321634062140393, "loss": 0.5859, "step": 46550 }, { "epoch": 13.394706559263522, "grad_norm": 0.9090600609779358, "learning_rate": 0.0017321058688147296, "loss": 0.6532, "step": 46560 }, { "epoch": 13.397583429228998, "grad_norm": 3.9459989070892334, "learning_rate": 0.00173204833141542, "loss": 0.8385, "step": 46570 }, { "epoch": 13.400460299194476, "grad_norm": 1.2975707054138184, "learning_rate": 0.0017319907940161105, "loss": 0.716, "step": 46580 }, { "epoch": 13.403337169159954, "grad_norm": 1.5126967430114746, "learning_rate": 0.001731933256616801, "loss": 0.7281, "step": 46590 }, { "epoch": 13.406214039125432, "grad_norm": 1.2715874910354614, "learning_rate": 0.0017318757192174915, "loss": 0.7982, "step": 46600 }, { "epoch": 13.409090909090908, "grad_norm": 1.1019458770751953, "learning_rate": 0.001731818181818182, "loss": 0.6293, "step": 46610 }, { "epoch": 13.411967779056386, "grad_norm": 1.1276477575302124, "learning_rate": 0.0017317606444188724, "loss": 0.7417, "step": 46620 }, { "epoch": 13.414844649021864, "grad_norm": 1.994562029838562, "learning_rate": 0.0017317031070195627, "loss": 0.7678, "step": 46630 }, { "epoch": 13.417721518987342, "grad_norm": 1.5885605812072754, "learning_rate": 0.001731645569620253, "loss": 0.8867, "step": 46640 }, { "epoch": 13.420598388952818, "grad_norm": 1.122656226158142, "learning_rate": 0.0017315880322209436, "loss": 0.7705, "step": 46650 }, { "epoch": 13.423475258918296, "grad_norm": 1.6756949424743652, "learning_rate": 0.0017315304948216342, "loss": 1.0189, "step": 46660 }, { "epoch": 13.426352128883774, "grad_norm": 1.5865504741668701, "learning_rate": 0.0017314729574223245, "loss": 0.8708, "step": 46670 }, { "epoch": 13.429228998849252, "grad_norm": 2.605133533477783, "learning_rate": 0.001731415420023015, "loss": 0.8223, "step": 46680 }, { "epoch": 13.43210586881473, "grad_norm": 1.3608938455581665, "learning_rate": 0.0017313578826237055, "loss": 0.8015, "step": 46690 }, { "epoch": 13.434982738780207, "grad_norm": 1.2396719455718994, "learning_rate": 0.0017313003452243958, "loss": 0.6491, "step": 46700 }, { "epoch": 13.437859608745685, "grad_norm": 1.029171109199524, "learning_rate": 0.0017312428078250864, "loss": 0.9756, "step": 46710 }, { "epoch": 13.440736478711163, "grad_norm": 1.6337509155273438, "learning_rate": 0.001731185270425777, "loss": 0.7778, "step": 46720 }, { "epoch": 13.44361334867664, "grad_norm": 1.3322638273239136, "learning_rate": 0.0017311277330264673, "loss": 0.8896, "step": 46730 }, { "epoch": 13.446490218642117, "grad_norm": 1.2954214811325073, "learning_rate": 0.0017310701956271578, "loss": 0.8461, "step": 46740 }, { "epoch": 13.449367088607595, "grad_norm": 1.6336958408355713, "learning_rate": 0.001731012658227848, "loss": 0.6651, "step": 46750 }, { "epoch": 13.452243958573073, "grad_norm": 1.2758303880691528, "learning_rate": 0.0017309551208285385, "loss": 0.7223, "step": 46760 }, { "epoch": 13.45512082853855, "grad_norm": 0.9713600277900696, "learning_rate": 0.001730897583429229, "loss": 0.7155, "step": 46770 }, { "epoch": 13.457997698504027, "grad_norm": 1.5664825439453125, "learning_rate": 0.0017308400460299194, "loss": 0.8792, "step": 46780 }, { "epoch": 13.460874568469505, "grad_norm": 1.39353346824646, "learning_rate": 0.00173078250863061, "loss": 0.9045, "step": 46790 }, { "epoch": 13.463751438434983, "grad_norm": 2.4114677906036377, "learning_rate": 0.0017307249712313004, "loss": 0.8191, "step": 46800 }, { "epoch": 13.46662830840046, "grad_norm": 1.4480968713760376, "learning_rate": 0.0017306674338319907, "loss": 0.6874, "step": 46810 }, { "epoch": 13.469505178365939, "grad_norm": 1.1889755725860596, "learning_rate": 0.0017306098964326813, "loss": 0.6463, "step": 46820 }, { "epoch": 13.472382048331415, "grad_norm": 1.413877248764038, "learning_rate": 0.0017305523590333718, "loss": 0.9295, "step": 46830 }, { "epoch": 13.475258918296893, "grad_norm": 1.0623704195022583, "learning_rate": 0.0017304948216340622, "loss": 0.7652, "step": 46840 }, { "epoch": 13.478135788262371, "grad_norm": 1.1526187658309937, "learning_rate": 0.0017304372842347527, "loss": 1.0448, "step": 46850 }, { "epoch": 13.481012658227849, "grad_norm": 1.785332202911377, "learning_rate": 0.0017303797468354429, "loss": 0.951, "step": 46860 }, { "epoch": 13.483889528193325, "grad_norm": 0.7870118021965027, "learning_rate": 0.0017303222094361334, "loss": 0.6745, "step": 46870 }, { "epoch": 13.486766398158803, "grad_norm": 2.5617945194244385, "learning_rate": 0.001730264672036824, "loss": 0.6958, "step": 46880 }, { "epoch": 13.489643268124281, "grad_norm": 0.8987767696380615, "learning_rate": 0.0017302071346375143, "loss": 0.8239, "step": 46890 }, { "epoch": 13.492520138089759, "grad_norm": 1.0075916051864624, "learning_rate": 0.001730149597238205, "loss": 0.755, "step": 46900 }, { "epoch": 13.495397008055235, "grad_norm": 0.8815855383872986, "learning_rate": 0.0017300920598388955, "loss": 0.6936, "step": 46910 }, { "epoch": 13.498273878020713, "grad_norm": 1.0469393730163574, "learning_rate": 0.0017300345224395856, "loss": 0.736, "step": 46920 }, { "epoch": 13.501150747986191, "grad_norm": 1.4632267951965332, "learning_rate": 0.0017299769850402762, "loss": 0.7047, "step": 46930 }, { "epoch": 13.50402761795167, "grad_norm": 1.9472332000732422, "learning_rate": 0.0017299194476409667, "loss": 0.7297, "step": 46940 }, { "epoch": 13.506904487917145, "grad_norm": 1.8291981220245361, "learning_rate": 0.001729861910241657, "loss": 0.7701, "step": 46950 }, { "epoch": 13.509781357882623, "grad_norm": 0.9551069736480713, "learning_rate": 0.0017298043728423476, "loss": 0.9228, "step": 46960 }, { "epoch": 13.512658227848101, "grad_norm": 1.2363344430923462, "learning_rate": 0.001729746835443038, "loss": 0.8963, "step": 46970 }, { "epoch": 13.51553509781358, "grad_norm": 1.0926684141159058, "learning_rate": 0.0017296892980437283, "loss": 0.7323, "step": 46980 }, { "epoch": 13.518411967779056, "grad_norm": 0.8924847841262817, "learning_rate": 0.001729631760644419, "loss": 0.7587, "step": 46990 }, { "epoch": 13.521288837744533, "grad_norm": 1.4461195468902588, "learning_rate": 0.0017295742232451092, "loss": 1.051, "step": 47000 }, { "epoch": 13.524165707710011, "grad_norm": 1.0833144187927246, "learning_rate": 0.0017295166858457998, "loss": 0.9718, "step": 47010 }, { "epoch": 13.52704257767549, "grad_norm": 0.9576131701469421, "learning_rate": 0.0017294591484464904, "loss": 0.7495, "step": 47020 }, { "epoch": 13.529919447640967, "grad_norm": 1.4117616415023804, "learning_rate": 0.0017294016110471807, "loss": 0.737, "step": 47030 }, { "epoch": 13.532796317606444, "grad_norm": 1.332690954208374, "learning_rate": 0.001729344073647871, "loss": 0.7906, "step": 47040 }, { "epoch": 13.535673187571922, "grad_norm": 0.9444169998168945, "learning_rate": 0.0017292865362485616, "loss": 0.7711, "step": 47050 }, { "epoch": 13.5385500575374, "grad_norm": 1.6870522499084473, "learning_rate": 0.001729228998849252, "loss": 0.7453, "step": 47060 }, { "epoch": 13.541426927502878, "grad_norm": 1.3322173357009888, "learning_rate": 0.0017291714614499425, "loss": 1.0739, "step": 47070 }, { "epoch": 13.544303797468354, "grad_norm": 1.1175498962402344, "learning_rate": 0.001729113924050633, "loss": 0.7094, "step": 47080 }, { "epoch": 13.547180667433832, "grad_norm": 1.1648143529891968, "learning_rate": 0.0017290563866513235, "loss": 0.8033, "step": 47090 }, { "epoch": 13.55005753739931, "grad_norm": 1.18921959400177, "learning_rate": 0.0017289988492520138, "loss": 0.6715, "step": 47100 }, { "epoch": 13.552934407364788, "grad_norm": 1.0456347465515137, "learning_rate": 0.0017289413118527041, "loss": 0.7726, "step": 47110 }, { "epoch": 13.555811277330264, "grad_norm": 0.8570455312728882, "learning_rate": 0.0017288837744533947, "loss": 0.8083, "step": 47120 }, { "epoch": 13.558688147295742, "grad_norm": 1.6153669357299805, "learning_rate": 0.0017288262370540853, "loss": 0.8948, "step": 47130 }, { "epoch": 13.56156501726122, "grad_norm": 1.5721142292022705, "learning_rate": 0.0017287686996547756, "loss": 0.8435, "step": 47140 }, { "epoch": 13.564441887226698, "grad_norm": 0.9667115211486816, "learning_rate": 0.0017287111622554662, "loss": 0.7197, "step": 47150 }, { "epoch": 13.567318757192176, "grad_norm": 0.7624796628952026, "learning_rate": 0.0017286536248561565, "loss": 0.6902, "step": 47160 }, { "epoch": 13.570195627157652, "grad_norm": 0.8847030401229858, "learning_rate": 0.0017285960874568469, "loss": 0.9768, "step": 47170 }, { "epoch": 13.57307249712313, "grad_norm": 0.989769458770752, "learning_rate": 0.0017285385500575374, "loss": 0.6766, "step": 47180 }, { "epoch": 13.575949367088608, "grad_norm": 2.131347179412842, "learning_rate": 0.001728481012658228, "loss": 0.8051, "step": 47190 }, { "epoch": 13.578826237054086, "grad_norm": 1.0873868465423584, "learning_rate": 0.0017284234752589184, "loss": 0.7282, "step": 47200 }, { "epoch": 13.581703107019562, "grad_norm": 0.9829299449920654, "learning_rate": 0.001728365937859609, "loss": 0.7862, "step": 47210 }, { "epoch": 13.58457997698504, "grad_norm": 2.2185184955596924, "learning_rate": 0.001728308400460299, "loss": 0.7899, "step": 47220 }, { "epoch": 13.587456846950518, "grad_norm": 0.9609283208847046, "learning_rate": 0.0017282508630609896, "loss": 0.6696, "step": 47230 }, { "epoch": 13.590333716915996, "grad_norm": 1.968562364578247, "learning_rate": 0.0017281933256616802, "loss": 0.8527, "step": 47240 }, { "epoch": 13.593210586881472, "grad_norm": 0.9886713027954102, "learning_rate": 0.0017281357882623705, "loss": 0.7743, "step": 47250 }, { "epoch": 13.59608745684695, "grad_norm": 1.1804717779159546, "learning_rate": 0.001728078250863061, "loss": 0.9382, "step": 47260 }, { "epoch": 13.598964326812428, "grad_norm": 1.4595677852630615, "learning_rate": 0.0017280207134637517, "loss": 0.8439, "step": 47270 }, { "epoch": 13.601841196777906, "grad_norm": 1.5845940113067627, "learning_rate": 0.0017279631760644418, "loss": 0.7823, "step": 47280 }, { "epoch": 13.604718066743382, "grad_norm": 0.955737829208374, "learning_rate": 0.0017279056386651323, "loss": 0.6959, "step": 47290 }, { "epoch": 13.60759493670886, "grad_norm": 1.9906020164489746, "learning_rate": 0.001727848101265823, "loss": 0.7444, "step": 47300 }, { "epoch": 13.610471806674338, "grad_norm": 1.0931718349456787, "learning_rate": 0.0017277905638665133, "loss": 0.7305, "step": 47310 }, { "epoch": 13.613348676639816, "grad_norm": 1.5541943311691284, "learning_rate": 0.0017277330264672038, "loss": 0.8905, "step": 47320 }, { "epoch": 13.616225546605293, "grad_norm": 1.814374566078186, "learning_rate": 0.0017276754890678942, "loss": 0.845, "step": 47330 }, { "epoch": 13.61910241657077, "grad_norm": 1.7628198862075806, "learning_rate": 0.0017276179516685845, "loss": 0.9256, "step": 47340 }, { "epoch": 13.621979286536249, "grad_norm": 1.3370646238327026, "learning_rate": 0.001727560414269275, "loss": 0.9817, "step": 47350 }, { "epoch": 13.624856156501727, "grad_norm": 1.3863673210144043, "learning_rate": 0.0017275028768699654, "loss": 0.7242, "step": 47360 }, { "epoch": 13.627733026467205, "grad_norm": 1.3766340017318726, "learning_rate": 0.001727445339470656, "loss": 0.8017, "step": 47370 }, { "epoch": 13.63060989643268, "grad_norm": 0.934953510761261, "learning_rate": 0.0017273878020713466, "loss": 0.8277, "step": 47380 }, { "epoch": 13.633486766398159, "grad_norm": 1.544792652130127, "learning_rate": 0.001727330264672037, "loss": 0.7636, "step": 47390 }, { "epoch": 13.636363636363637, "grad_norm": 0.9483009576797485, "learning_rate": 0.0017272727272727272, "loss": 0.7304, "step": 47400 }, { "epoch": 13.639240506329115, "grad_norm": 1.023368239402771, "learning_rate": 0.0017272151898734178, "loss": 0.7253, "step": 47410 }, { "epoch": 13.64211737629459, "grad_norm": 1.1721770763397217, "learning_rate": 0.0017271576524741082, "loss": 0.72, "step": 47420 }, { "epoch": 13.644994246260069, "grad_norm": 1.121396541595459, "learning_rate": 0.0017271001150747987, "loss": 0.9761, "step": 47430 }, { "epoch": 13.647871116225547, "grad_norm": 1.3666179180145264, "learning_rate": 0.001727042577675489, "loss": 0.7938, "step": 47440 }, { "epoch": 13.650747986191025, "grad_norm": 1.299608826637268, "learning_rate": 0.0017269850402761796, "loss": 0.7337, "step": 47450 }, { "epoch": 13.653624856156501, "grad_norm": 1.1440362930297852, "learning_rate": 0.00172692750287687, "loss": 0.7347, "step": 47460 }, { "epoch": 13.656501726121979, "grad_norm": 1.7114189863204956, "learning_rate": 0.0017268699654775603, "loss": 0.9012, "step": 47470 }, { "epoch": 13.659378596087457, "grad_norm": 0.9293603301048279, "learning_rate": 0.001726812428078251, "loss": 0.5733, "step": 47480 }, { "epoch": 13.662255466052935, "grad_norm": 2.2561070919036865, "learning_rate": 0.0017267548906789415, "loss": 1.105, "step": 47490 }, { "epoch": 13.665132336018411, "grad_norm": 0.9536370038986206, "learning_rate": 0.0017266973532796318, "loss": 0.6692, "step": 47500 }, { "epoch": 13.66800920598389, "grad_norm": 1.3795490264892578, "learning_rate": 0.0017266398158803224, "loss": 1.0706, "step": 47510 }, { "epoch": 13.670886075949367, "grad_norm": 0.8400359749794006, "learning_rate": 0.0017265822784810127, "loss": 0.8699, "step": 47520 }, { "epoch": 13.673762945914845, "grad_norm": 1.1620835065841675, "learning_rate": 0.001726524741081703, "loss": 0.6712, "step": 47530 }, { "epoch": 13.676639815880321, "grad_norm": 1.1401073932647705, "learning_rate": 0.0017264672036823936, "loss": 0.6621, "step": 47540 }, { "epoch": 13.6795166858458, "grad_norm": 1.5041594505310059, "learning_rate": 0.001726409666283084, "loss": 0.875, "step": 47550 }, { "epoch": 13.682393555811277, "grad_norm": 0.962560772895813, "learning_rate": 0.0017263521288837745, "loss": 0.6904, "step": 47560 }, { "epoch": 13.685270425776755, "grad_norm": 1.348441481590271, "learning_rate": 0.001726294591484465, "loss": 0.7968, "step": 47570 }, { "epoch": 13.688147295742233, "grad_norm": 1.053858995437622, "learning_rate": 0.0017262370540851552, "loss": 0.69, "step": 47580 }, { "epoch": 13.69102416570771, "grad_norm": 1.417798399925232, "learning_rate": 0.0017261795166858458, "loss": 0.8146, "step": 47590 }, { "epoch": 13.693901035673187, "grad_norm": 2.2046425342559814, "learning_rate": 0.0017261219792865364, "loss": 0.9402, "step": 47600 }, { "epoch": 13.696777905638665, "grad_norm": 1.0632920265197754, "learning_rate": 0.0017260644418872267, "loss": 0.7397, "step": 47610 }, { "epoch": 13.699654775604143, "grad_norm": 1.266946792602539, "learning_rate": 0.0017260069044879173, "loss": 0.9769, "step": 47620 }, { "epoch": 13.70253164556962, "grad_norm": 1.245592474937439, "learning_rate": 0.0017259493670886076, "loss": 1.1769, "step": 47630 }, { "epoch": 13.705408515535098, "grad_norm": 0.9259340167045593, "learning_rate": 0.001725891829689298, "loss": 0.6122, "step": 47640 }, { "epoch": 13.708285385500576, "grad_norm": 1.7357693910598755, "learning_rate": 0.0017258342922899885, "loss": 0.747, "step": 47650 }, { "epoch": 13.711162255466053, "grad_norm": 0.9084285497665405, "learning_rate": 0.0017257767548906789, "loss": 0.6893, "step": 47660 }, { "epoch": 13.71403912543153, "grad_norm": 1.44425630569458, "learning_rate": 0.0017257192174913694, "loss": 0.8312, "step": 47670 }, { "epoch": 13.716915995397008, "grad_norm": 0.9773368239402771, "learning_rate": 0.00172566168009206, "loss": 0.7454, "step": 47680 }, { "epoch": 13.719792865362486, "grad_norm": 1.6220109462738037, "learning_rate": 0.0017256041426927501, "loss": 0.6408, "step": 47690 }, { "epoch": 13.722669735327964, "grad_norm": 0.6802286505699158, "learning_rate": 0.0017255466052934407, "loss": 0.6739, "step": 47700 }, { "epoch": 13.725546605293442, "grad_norm": 1.0019322633743286, "learning_rate": 0.0017254890678941313, "loss": 0.7899, "step": 47710 }, { "epoch": 13.728423475258918, "grad_norm": 1.3761094808578491, "learning_rate": 0.0017254315304948216, "loss": 0.8778, "step": 47720 }, { "epoch": 13.731300345224396, "grad_norm": 1.35286283493042, "learning_rate": 0.0017253739930955122, "loss": 0.7461, "step": 47730 }, { "epoch": 13.734177215189874, "grad_norm": 1.586390733718872, "learning_rate": 0.0017253164556962027, "loss": 0.9406, "step": 47740 }, { "epoch": 13.737054085155352, "grad_norm": 3.5705552101135254, "learning_rate": 0.0017252589182968929, "loss": 0.7938, "step": 47750 }, { "epoch": 13.739930955120828, "grad_norm": 0.9532987475395203, "learning_rate": 0.0017252013808975834, "loss": 0.6393, "step": 47760 }, { "epoch": 13.742807825086306, "grad_norm": 1.7034809589385986, "learning_rate": 0.001725143843498274, "loss": 0.7803, "step": 47770 }, { "epoch": 13.745684695051784, "grad_norm": 1.1010308265686035, "learning_rate": 0.0017250863060989643, "loss": 0.8984, "step": 47780 }, { "epoch": 13.748561565017262, "grad_norm": 1.0867750644683838, "learning_rate": 0.001725028768699655, "loss": 0.7659, "step": 47790 }, { "epoch": 13.751438434982738, "grad_norm": 2.030506134033203, "learning_rate": 0.0017249712313003453, "loss": 0.7071, "step": 47800 }, { "epoch": 13.754315304948216, "grad_norm": 2.9339818954467773, "learning_rate": 0.0017249136939010356, "loss": 0.7126, "step": 47810 }, { "epoch": 13.757192174913694, "grad_norm": 1.0895793437957764, "learning_rate": 0.0017248561565017262, "loss": 0.7144, "step": 47820 }, { "epoch": 13.760069044879172, "grad_norm": 1.22258460521698, "learning_rate": 0.0017247986191024165, "loss": 0.7242, "step": 47830 }, { "epoch": 13.762945914844648, "grad_norm": 1.1037261486053467, "learning_rate": 0.001724741081703107, "loss": 0.8178, "step": 47840 }, { "epoch": 13.765822784810126, "grad_norm": 1.5679219961166382, "learning_rate": 0.0017246835443037976, "loss": 1.0445, "step": 47850 }, { "epoch": 13.768699654775604, "grad_norm": 2.4133784770965576, "learning_rate": 0.001724626006904488, "loss": 0.827, "step": 47860 }, { "epoch": 13.771576524741082, "grad_norm": 1.6529741287231445, "learning_rate": 0.0017245684695051783, "loss": 0.9004, "step": 47870 }, { "epoch": 13.774453394706558, "grad_norm": 1.1543641090393066, "learning_rate": 0.001724510932105869, "loss": 0.7191, "step": 47880 }, { "epoch": 13.777330264672036, "grad_norm": 1.2051208019256592, "learning_rate": 0.0017244533947065592, "loss": 0.7011, "step": 47890 }, { "epoch": 13.780207134637514, "grad_norm": 1.5152802467346191, "learning_rate": 0.0017243958573072498, "loss": 0.7771, "step": 47900 }, { "epoch": 13.783084004602992, "grad_norm": 1.5033801794052124, "learning_rate": 0.0017243383199079402, "loss": 0.8475, "step": 47910 }, { "epoch": 13.78596087456847, "grad_norm": 3.8649420738220215, "learning_rate": 0.0017242807825086307, "loss": 0.8191, "step": 47920 }, { "epoch": 13.788837744533947, "grad_norm": 1.3635190725326538, "learning_rate": 0.001724223245109321, "loss": 0.885, "step": 47930 }, { "epoch": 13.791714614499424, "grad_norm": 1.5334302186965942, "learning_rate": 0.0017241657077100114, "loss": 0.7985, "step": 47940 }, { "epoch": 13.794591484464902, "grad_norm": 1.4084464311599731, "learning_rate": 0.001724108170310702, "loss": 0.9122, "step": 47950 }, { "epoch": 13.79746835443038, "grad_norm": 0.5781418085098267, "learning_rate": 0.0017240506329113925, "loss": 0.7359, "step": 47960 }, { "epoch": 13.800345224395857, "grad_norm": 1.3659684658050537, "learning_rate": 0.0017239930955120829, "loss": 0.8223, "step": 47970 }, { "epoch": 13.803222094361335, "grad_norm": 1.207136869430542, "learning_rate": 0.0017239355581127735, "loss": 0.6702, "step": 47980 }, { "epoch": 13.806098964326813, "grad_norm": 0.9185501933097839, "learning_rate": 0.0017238780207134638, "loss": 0.7106, "step": 47990 }, { "epoch": 13.80897583429229, "grad_norm": 1.2102080583572388, "learning_rate": 0.0017238204833141541, "loss": 0.9175, "step": 48000 }, { "epoch": 13.811852704257767, "grad_norm": 1.9187850952148438, "learning_rate": 0.0017237629459148447, "loss": 1.0109, "step": 48010 }, { "epoch": 13.814729574223245, "grad_norm": 1.0080560445785522, "learning_rate": 0.001723705408515535, "loss": 0.8743, "step": 48020 }, { "epoch": 13.817606444188723, "grad_norm": 0.9294198751449585, "learning_rate": 0.0017236478711162256, "loss": 0.7206, "step": 48030 }, { "epoch": 13.8204833141542, "grad_norm": 1.2949715852737427, "learning_rate": 0.0017235903337169162, "loss": 0.8382, "step": 48040 }, { "epoch": 13.823360184119679, "grad_norm": 2.1800718307495117, "learning_rate": 0.0017235327963176063, "loss": 0.8928, "step": 48050 }, { "epoch": 13.826237054085155, "grad_norm": 1.4848604202270508, "learning_rate": 0.0017234752589182969, "loss": 0.7064, "step": 48060 }, { "epoch": 13.829113924050633, "grad_norm": 1.5811415910720825, "learning_rate": 0.0017234177215189874, "loss": 0.8007, "step": 48070 }, { "epoch": 13.83199079401611, "grad_norm": 0.9320271015167236, "learning_rate": 0.0017233601841196778, "loss": 0.8191, "step": 48080 }, { "epoch": 13.834867663981589, "grad_norm": 1.0613505840301514, "learning_rate": 0.0017233026467203684, "loss": 0.7042, "step": 48090 }, { "epoch": 13.837744533947065, "grad_norm": 1.0946071147918701, "learning_rate": 0.001723245109321059, "loss": 0.796, "step": 48100 }, { "epoch": 13.840621403912543, "grad_norm": 1.1939512491226196, "learning_rate": 0.001723187571921749, "loss": 0.8534, "step": 48110 }, { "epoch": 13.843498273878021, "grad_norm": 1.0211834907531738, "learning_rate": 0.0017231300345224396, "loss": 0.8371, "step": 48120 }, { "epoch": 13.846375143843499, "grad_norm": 2.9305126667022705, "learning_rate": 0.00172307249712313, "loss": 0.9112, "step": 48130 }, { "epoch": 13.849252013808975, "grad_norm": 1.0385465621948242, "learning_rate": 0.0017230149597238205, "loss": 0.6866, "step": 48140 }, { "epoch": 13.852128883774453, "grad_norm": 1.7540295124053955, "learning_rate": 0.001722957422324511, "loss": 0.8696, "step": 48150 }, { "epoch": 13.855005753739931, "grad_norm": 1.6996392011642456, "learning_rate": 0.0017228998849252014, "loss": 0.7292, "step": 48160 }, { "epoch": 13.85788262370541, "grad_norm": 1.920706033706665, "learning_rate": 0.0017228423475258918, "loss": 0.7456, "step": 48170 }, { "epoch": 13.860759493670885, "grad_norm": 0.9524572491645813, "learning_rate": 0.0017227848101265823, "loss": 0.7408, "step": 48180 }, { "epoch": 13.863636363636363, "grad_norm": 2.186166524887085, "learning_rate": 0.0017227272727272727, "loss": 0.5398, "step": 48190 }, { "epoch": 13.866513233601841, "grad_norm": 2.169950246810913, "learning_rate": 0.0017226697353279633, "loss": 0.7614, "step": 48200 }, { "epoch": 13.86939010356732, "grad_norm": 2.1634151935577393, "learning_rate": 0.0017226121979286538, "loss": 0.719, "step": 48210 }, { "epoch": 13.872266973532795, "grad_norm": 1.7418917417526245, "learning_rate": 0.0017225546605293442, "loss": 0.7235, "step": 48220 }, { "epoch": 13.875143843498273, "grad_norm": 1.753838062286377, "learning_rate": 0.0017224971231300345, "loss": 0.7626, "step": 48230 }, { "epoch": 13.878020713463751, "grad_norm": 1.057031273841858, "learning_rate": 0.0017224395857307249, "loss": 0.7053, "step": 48240 }, { "epoch": 13.88089758342923, "grad_norm": 1.0190430879592896, "learning_rate": 0.0017223820483314154, "loss": 0.6499, "step": 48250 }, { "epoch": 13.883774453394707, "grad_norm": 1.0230193138122559, "learning_rate": 0.001722324510932106, "loss": 0.8713, "step": 48260 }, { "epoch": 13.886651323360184, "grad_norm": 0.8486390709877014, "learning_rate": 0.0017222669735327963, "loss": 0.7972, "step": 48270 }, { "epoch": 13.889528193325662, "grad_norm": 1.449853777885437, "learning_rate": 0.001722209436133487, "loss": 0.8089, "step": 48280 }, { "epoch": 13.89240506329114, "grad_norm": 1.7799426317214966, "learning_rate": 0.0017221518987341772, "loss": 0.8906, "step": 48290 }, { "epoch": 13.895281933256618, "grad_norm": 1.4021073579788208, "learning_rate": 0.0017220943613348676, "loss": 0.9447, "step": 48300 }, { "epoch": 13.898158803222094, "grad_norm": 1.27964186668396, "learning_rate": 0.0017220368239355582, "loss": 0.6544, "step": 48310 }, { "epoch": 13.901035673187572, "grad_norm": 1.3988839387893677, "learning_rate": 0.0017219792865362487, "loss": 0.603, "step": 48320 }, { "epoch": 13.90391254315305, "grad_norm": 1.9768122434616089, "learning_rate": 0.001721921749136939, "loss": 0.742, "step": 48330 }, { "epoch": 13.906789413118528, "grad_norm": 1.469921350479126, "learning_rate": 0.0017218642117376296, "loss": 0.7815, "step": 48340 }, { "epoch": 13.909666283084004, "grad_norm": 1.214491605758667, "learning_rate": 0.0017218066743383198, "loss": 0.7314, "step": 48350 }, { "epoch": 13.912543153049482, "grad_norm": 1.2289789915084839, "learning_rate": 0.0017217491369390103, "loss": 0.7801, "step": 48360 }, { "epoch": 13.91542002301496, "grad_norm": 1.2187681198120117, "learning_rate": 0.0017216915995397009, "loss": 0.6948, "step": 48370 }, { "epoch": 13.918296892980438, "grad_norm": 0.6920222043991089, "learning_rate": 0.0017216340621403912, "loss": 0.7929, "step": 48380 }, { "epoch": 13.921173762945914, "grad_norm": 0.7861948013305664, "learning_rate": 0.0017215765247410818, "loss": 0.6853, "step": 48390 }, { "epoch": 13.924050632911392, "grad_norm": 1.7422631978988647, "learning_rate": 0.0017215189873417724, "loss": 0.7758, "step": 48400 }, { "epoch": 13.92692750287687, "grad_norm": 2.0923430919647217, "learning_rate": 0.0017214614499424625, "loss": 0.9171, "step": 48410 }, { "epoch": 13.929804372842348, "grad_norm": 0.6982320547103882, "learning_rate": 0.001721403912543153, "loss": 0.7709, "step": 48420 }, { "epoch": 13.932681242807824, "grad_norm": 1.487754464149475, "learning_rate": 0.0017213463751438436, "loss": 0.668, "step": 48430 }, { "epoch": 13.935558112773302, "grad_norm": 1.284314751625061, "learning_rate": 0.001721288837744534, "loss": 0.7407, "step": 48440 }, { "epoch": 13.93843498273878, "grad_norm": 1.8532742261886597, "learning_rate": 0.0017212313003452245, "loss": 0.7472, "step": 48450 }, { "epoch": 13.941311852704258, "grad_norm": 0.7490265965461731, "learning_rate": 0.0017211737629459149, "loss": 0.7211, "step": 48460 }, { "epoch": 13.944188722669736, "grad_norm": 1.454643964767456, "learning_rate": 0.0017211162255466052, "loss": 1.0023, "step": 48470 }, { "epoch": 13.947065592635212, "grad_norm": 0.7940648198127747, "learning_rate": 0.0017210586881472958, "loss": 0.8476, "step": 48480 }, { "epoch": 13.94994246260069, "grad_norm": 1.7013686895370483, "learning_rate": 0.0017210011507479861, "loss": 0.8939, "step": 48490 }, { "epoch": 13.952819332566168, "grad_norm": 1.0705227851867676, "learning_rate": 0.0017209436133486767, "loss": 0.6759, "step": 48500 }, { "epoch": 13.955696202531646, "grad_norm": 1.0535194873809814, "learning_rate": 0.0017208860759493673, "loss": 0.8399, "step": 48510 }, { "epoch": 13.958573072497122, "grad_norm": 0.9951322674751282, "learning_rate": 0.0017208285385500574, "loss": 0.5833, "step": 48520 }, { "epoch": 13.9614499424626, "grad_norm": 0.9615360498428345, "learning_rate": 0.001720771001150748, "loss": 0.7572, "step": 48530 }, { "epoch": 13.964326812428078, "grad_norm": 1.8640073537826538, "learning_rate": 0.0017207134637514385, "loss": 0.8466, "step": 48540 }, { "epoch": 13.967203682393556, "grad_norm": 3.090282917022705, "learning_rate": 0.0017206559263521289, "loss": 0.794, "step": 48550 }, { "epoch": 13.970080552359033, "grad_norm": 2.0630228519439697, "learning_rate": 0.0017205983889528194, "loss": 0.7253, "step": 48560 }, { "epoch": 13.97295742232451, "grad_norm": 1.550774097442627, "learning_rate": 0.00172054085155351, "loss": 1.0006, "step": 48570 }, { "epoch": 13.975834292289989, "grad_norm": 1.8945170640945435, "learning_rate": 0.0017204833141542001, "loss": 0.9246, "step": 48580 }, { "epoch": 13.978711162255467, "grad_norm": 0.8359857201576233, "learning_rate": 0.0017204257767548907, "loss": 0.6153, "step": 48590 }, { "epoch": 13.981588032220944, "grad_norm": 1.0502734184265137, "learning_rate": 0.001720368239355581, "loss": 0.7366, "step": 48600 }, { "epoch": 13.98446490218642, "grad_norm": 0.9807635545730591, "learning_rate": 0.0017203107019562716, "loss": 0.6398, "step": 48610 }, { "epoch": 13.987341772151899, "grad_norm": 2.2833139896392822, "learning_rate": 0.0017202531645569622, "loss": 0.8785, "step": 48620 }, { "epoch": 13.990218642117377, "grad_norm": 1.8225739002227783, "learning_rate": 0.0017201956271576525, "loss": 0.7967, "step": 48630 }, { "epoch": 13.993095512082855, "grad_norm": 2.5180203914642334, "learning_rate": 0.0017201380897583429, "loss": 0.7333, "step": 48640 }, { "epoch": 13.99597238204833, "grad_norm": 1.6837170124053955, "learning_rate": 0.0017200805523590334, "loss": 0.674, "step": 48650 }, { "epoch": 13.998849252013809, "grad_norm": 1.9155001640319824, "learning_rate": 0.0017200230149597238, "loss": 0.7579, "step": 48660 }, { "epoch": 14.001726121979287, "grad_norm": 2.6784071922302246, "learning_rate": 0.0017199654775604143, "loss": 0.7485, "step": 48670 }, { "epoch": 14.004602991944765, "grad_norm": 1.1480333805084229, "learning_rate": 0.001719907940161105, "loss": 0.7871, "step": 48680 }, { "epoch": 14.007479861910241, "grad_norm": 1.0315662622451782, "learning_rate": 0.0017198504027617953, "loss": 0.6058, "step": 48690 }, { "epoch": 14.010356731875719, "grad_norm": 2.1004886627197266, "learning_rate": 0.0017197928653624856, "loss": 0.725, "step": 48700 }, { "epoch": 14.013233601841197, "grad_norm": 0.9159685373306274, "learning_rate": 0.001719735327963176, "loss": 0.9242, "step": 48710 }, { "epoch": 14.016110471806675, "grad_norm": 1.5156986713409424, "learning_rate": 0.0017196777905638665, "loss": 0.6353, "step": 48720 }, { "epoch": 14.018987341772151, "grad_norm": 1.038644790649414, "learning_rate": 0.001719620253164557, "loss": 0.6381, "step": 48730 }, { "epoch": 14.021864211737629, "grad_norm": 2.0651614665985107, "learning_rate": 0.0017195627157652474, "loss": 0.6022, "step": 48740 }, { "epoch": 14.024741081703107, "grad_norm": 1.543786644935608, "learning_rate": 0.001719505178365938, "loss": 0.8953, "step": 48750 }, { "epoch": 14.027617951668585, "grad_norm": 2.0030438899993896, "learning_rate": 0.0017194476409666283, "loss": 0.684, "step": 48760 }, { "epoch": 14.030494821634061, "grad_norm": 0.888058602809906, "learning_rate": 0.0017193901035673187, "loss": 0.7641, "step": 48770 }, { "epoch": 14.03337169159954, "grad_norm": 0.9740912318229675, "learning_rate": 0.0017193325661680092, "loss": 0.7402, "step": 48780 }, { "epoch": 14.036248561565017, "grad_norm": 1.9626195430755615, "learning_rate": 0.0017192750287686998, "loss": 0.8135, "step": 48790 }, { "epoch": 14.039125431530495, "grad_norm": 0.8795016407966614, "learning_rate": 0.0017192174913693902, "loss": 0.6709, "step": 48800 }, { "epoch": 14.042002301495973, "grad_norm": 1.677394151687622, "learning_rate": 0.0017191599539700807, "loss": 0.655, "step": 48810 }, { "epoch": 14.04487917146145, "grad_norm": 2.2068068981170654, "learning_rate": 0.0017191024165707708, "loss": 0.7366, "step": 48820 }, { "epoch": 14.047756041426927, "grad_norm": 0.6377515196800232, "learning_rate": 0.0017190448791714614, "loss": 0.8378, "step": 48830 }, { "epoch": 14.050632911392405, "grad_norm": 1.2552872896194458, "learning_rate": 0.001718987341772152, "loss": 0.7512, "step": 48840 }, { "epoch": 14.053509781357883, "grad_norm": 0.951248049736023, "learning_rate": 0.0017189298043728423, "loss": 0.8258, "step": 48850 }, { "epoch": 14.05638665132336, "grad_norm": 0.7387839555740356, "learning_rate": 0.0017188722669735329, "loss": 0.7574, "step": 48860 }, { "epoch": 14.059263521288837, "grad_norm": 2.0390148162841797, "learning_rate": 0.0017188147295742234, "loss": 0.646, "step": 48870 }, { "epoch": 14.062140391254315, "grad_norm": 1.9952754974365234, "learning_rate": 0.0017187571921749136, "loss": 0.7892, "step": 48880 }, { "epoch": 14.065017261219793, "grad_norm": 0.9331260323524475, "learning_rate": 0.0017186996547756041, "loss": 0.7673, "step": 48890 }, { "epoch": 14.06789413118527, "grad_norm": 0.9864317774772644, "learning_rate": 0.0017186421173762947, "loss": 0.8662, "step": 48900 }, { "epoch": 14.070771001150748, "grad_norm": 2.484267234802246, "learning_rate": 0.001718584579976985, "loss": 0.839, "step": 48910 }, { "epoch": 14.073647871116226, "grad_norm": 1.2202757596969604, "learning_rate": 0.0017185270425776756, "loss": 0.7037, "step": 48920 }, { "epoch": 14.076524741081704, "grad_norm": 0.9801111221313477, "learning_rate": 0.001718469505178366, "loss": 0.644, "step": 48930 }, { "epoch": 14.07940161104718, "grad_norm": 1.4847495555877686, "learning_rate": 0.0017184119677790563, "loss": 0.8396, "step": 48940 }, { "epoch": 14.082278481012658, "grad_norm": 0.8400557041168213, "learning_rate": 0.0017183544303797469, "loss": 0.8167, "step": 48950 }, { "epoch": 14.085155350978136, "grad_norm": 1.4256923198699951, "learning_rate": 0.0017182968929804372, "loss": 0.6775, "step": 48960 }, { "epoch": 14.088032220943614, "grad_norm": 1.1905713081359863, "learning_rate": 0.0017182393555811278, "loss": 0.7548, "step": 48970 }, { "epoch": 14.090909090909092, "grad_norm": 2.7652230262756348, "learning_rate": 0.0017181818181818184, "loss": 0.7495, "step": 48980 }, { "epoch": 14.093785960874568, "grad_norm": 1.2484408617019653, "learning_rate": 0.0017181242807825087, "loss": 0.624, "step": 48990 }, { "epoch": 14.096662830840046, "grad_norm": 1.0053801536560059, "learning_rate": 0.001718066743383199, "loss": 0.8237, "step": 49000 }, { "epoch": 14.099539700805524, "grad_norm": 1.711828589439392, "learning_rate": 0.0017180092059838896, "loss": 0.6867, "step": 49010 }, { "epoch": 14.102416570771002, "grad_norm": 0.9324570298194885, "learning_rate": 0.00171795166858458, "loss": 0.5678, "step": 49020 }, { "epoch": 14.105293440736478, "grad_norm": 1.7246416807174683, "learning_rate": 0.0017178941311852705, "loss": 0.9101, "step": 49030 }, { "epoch": 14.108170310701956, "grad_norm": 1.839819312095642, "learning_rate": 0.0017178365937859609, "loss": 0.7458, "step": 49040 }, { "epoch": 14.111047180667434, "grad_norm": 1.8480859994888306, "learning_rate": 0.0017177790563866514, "loss": 0.753, "step": 49050 }, { "epoch": 14.113924050632912, "grad_norm": 1.2814171314239502, "learning_rate": 0.0017177215189873418, "loss": 0.768, "step": 49060 }, { "epoch": 14.116800920598388, "grad_norm": 0.9888001680374146, "learning_rate": 0.0017176639815880321, "loss": 0.7103, "step": 49070 }, { "epoch": 14.119677790563866, "grad_norm": 3.2764458656311035, "learning_rate": 0.0017176064441887227, "loss": 0.6034, "step": 49080 }, { "epoch": 14.122554660529344, "grad_norm": 1.1682586669921875, "learning_rate": 0.0017175489067894133, "loss": 0.7577, "step": 49090 }, { "epoch": 14.125431530494822, "grad_norm": 1.1999603509902954, "learning_rate": 0.0017174913693901036, "loss": 0.8435, "step": 49100 }, { "epoch": 14.128308400460298, "grad_norm": 1.1246856451034546, "learning_rate": 0.0017174338319907942, "loss": 0.7622, "step": 49110 }, { "epoch": 14.131185270425776, "grad_norm": 1.588999629020691, "learning_rate": 0.0017173762945914845, "loss": 0.9404, "step": 49120 }, { "epoch": 14.134062140391254, "grad_norm": 0.836523711681366, "learning_rate": 0.0017173187571921749, "loss": 0.7173, "step": 49130 }, { "epoch": 14.136939010356732, "grad_norm": 3.007352828979492, "learning_rate": 0.0017172612197928654, "loss": 0.7084, "step": 49140 }, { "epoch": 14.13981588032221, "grad_norm": 1.7071741819381714, "learning_rate": 0.001717203682393556, "loss": 0.6817, "step": 49150 }, { "epoch": 14.142692750287686, "grad_norm": 0.8921881914138794, "learning_rate": 0.0017171461449942463, "loss": 0.8056, "step": 49160 }, { "epoch": 14.145569620253164, "grad_norm": 1.132966160774231, "learning_rate": 0.001717088607594937, "loss": 0.8372, "step": 49170 }, { "epoch": 14.148446490218642, "grad_norm": 1.2185556888580322, "learning_rate": 0.001717031070195627, "loss": 0.7553, "step": 49180 }, { "epoch": 14.15132336018412, "grad_norm": 1.0370383262634277, "learning_rate": 0.0017169735327963176, "loss": 0.794, "step": 49190 }, { "epoch": 14.154200230149597, "grad_norm": 0.9039967060089111, "learning_rate": 0.0017169159953970082, "loss": 0.841, "step": 49200 }, { "epoch": 14.157077100115075, "grad_norm": 0.782106339931488, "learning_rate": 0.0017168584579976985, "loss": 0.7059, "step": 49210 }, { "epoch": 14.159953970080553, "grad_norm": 1.8210033178329468, "learning_rate": 0.001716800920598389, "loss": 0.7255, "step": 49220 }, { "epoch": 14.16283084004603, "grad_norm": 1.7029839754104614, "learning_rate": 0.0017167433831990796, "loss": 0.725, "step": 49230 }, { "epoch": 14.165707710011507, "grad_norm": 1.4830050468444824, "learning_rate": 0.0017166858457997698, "loss": 0.9201, "step": 49240 }, { "epoch": 14.168584579976985, "grad_norm": 1.1617658138275146, "learning_rate": 0.0017166283084004603, "loss": 0.7489, "step": 49250 }, { "epoch": 14.171461449942463, "grad_norm": 1.8782503604888916, "learning_rate": 0.0017165707710011509, "loss": 0.7998, "step": 49260 }, { "epoch": 14.17433831990794, "grad_norm": 1.031352162361145, "learning_rate": 0.0017165132336018412, "loss": 0.6758, "step": 49270 }, { "epoch": 14.177215189873417, "grad_norm": 1.1758759021759033, "learning_rate": 0.0017164556962025318, "loss": 0.7363, "step": 49280 }, { "epoch": 14.180092059838895, "grad_norm": 0.9080071449279785, "learning_rate": 0.001716398158803222, "loss": 0.7074, "step": 49290 }, { "epoch": 14.182968929804373, "grad_norm": 1.1277767419815063, "learning_rate": 0.0017163406214039125, "loss": 0.8429, "step": 49300 }, { "epoch": 14.18584579976985, "grad_norm": 0.8121628165245056, "learning_rate": 0.001716283084004603, "loss": 0.8182, "step": 49310 }, { "epoch": 14.188722669735329, "grad_norm": 1.721170425415039, "learning_rate": 0.0017162255466052934, "loss": 0.696, "step": 49320 }, { "epoch": 14.191599539700805, "grad_norm": 1.8347774744033813, "learning_rate": 0.001716168009205984, "loss": 0.7504, "step": 49330 }, { "epoch": 14.194476409666283, "grad_norm": 0.9535900950431824, "learning_rate": 0.0017161104718066745, "loss": 0.7274, "step": 49340 }, { "epoch": 14.197353279631761, "grad_norm": 1.8067591190338135, "learning_rate": 0.0017160529344073647, "loss": 0.7035, "step": 49350 }, { "epoch": 14.200230149597239, "grad_norm": 1.8654934167861938, "learning_rate": 0.0017159953970080552, "loss": 0.7259, "step": 49360 }, { "epoch": 14.203107019562715, "grad_norm": 2.200890064239502, "learning_rate": 0.0017159378596087458, "loss": 0.6521, "step": 49370 }, { "epoch": 14.205983889528193, "grad_norm": 1.2822941541671753, "learning_rate": 0.0017158803222094361, "loss": 0.842, "step": 49380 }, { "epoch": 14.208860759493671, "grad_norm": 1.8520580530166626, "learning_rate": 0.0017158227848101267, "loss": 0.7448, "step": 49390 }, { "epoch": 14.211737629459149, "grad_norm": 1.3088229894638062, "learning_rate": 0.001715765247410817, "loss": 1.0017, "step": 49400 }, { "epoch": 14.214614499424625, "grad_norm": 0.9024635553359985, "learning_rate": 0.0017157077100115074, "loss": 0.7937, "step": 49410 }, { "epoch": 14.217491369390103, "grad_norm": 0.9325520992279053, "learning_rate": 0.001715650172612198, "loss": 0.7801, "step": 49420 }, { "epoch": 14.220368239355581, "grad_norm": 0.8848339319229126, "learning_rate": 0.0017155926352128883, "loss": 0.6777, "step": 49430 }, { "epoch": 14.22324510932106, "grad_norm": 1.429187536239624, "learning_rate": 0.0017155350978135789, "loss": 0.7858, "step": 49440 }, { "epoch": 14.226121979286535, "grad_norm": 2.0137522220611572, "learning_rate": 0.0017154775604142694, "loss": 0.7797, "step": 49450 }, { "epoch": 14.228998849252013, "grad_norm": 2.0555174350738525, "learning_rate": 0.0017154200230149598, "loss": 0.7726, "step": 49460 }, { "epoch": 14.231875719217491, "grad_norm": 1.6086281538009644, "learning_rate": 0.0017153624856156501, "loss": 0.8628, "step": 49470 }, { "epoch": 14.23475258918297, "grad_norm": 2.237774610519409, "learning_rate": 0.0017153049482163407, "loss": 0.8827, "step": 49480 }, { "epoch": 14.237629459148447, "grad_norm": 1.8885987997055054, "learning_rate": 0.001715247410817031, "loss": 0.8525, "step": 49490 }, { "epoch": 14.240506329113924, "grad_norm": 1.5623011589050293, "learning_rate": 0.0017151898734177216, "loss": 0.7821, "step": 49500 }, { "epoch": 14.243383199079402, "grad_norm": 1.1692650318145752, "learning_rate": 0.001715132336018412, "loss": 0.684, "step": 49510 }, { "epoch": 14.24626006904488, "grad_norm": 0.9651261568069458, "learning_rate": 0.0017150747986191025, "loss": 0.6828, "step": 49520 }, { "epoch": 14.249136939010357, "grad_norm": 1.5935896635055542, "learning_rate": 0.0017150172612197929, "loss": 0.7483, "step": 49530 }, { "epoch": 14.252013808975834, "grad_norm": 1.2326463460922241, "learning_rate": 0.0017149597238204832, "loss": 0.841, "step": 49540 }, { "epoch": 14.254890678941312, "grad_norm": 1.2403432130813599, "learning_rate": 0.0017149021864211738, "loss": 0.7506, "step": 49550 }, { "epoch": 14.25776754890679, "grad_norm": 1.5527373552322388, "learning_rate": 0.0017148446490218643, "loss": 0.6924, "step": 49560 }, { "epoch": 14.260644418872268, "grad_norm": 1.618919014930725, "learning_rate": 0.0017147871116225547, "loss": 0.7544, "step": 49570 }, { "epoch": 14.263521288837744, "grad_norm": 1.1310486793518066, "learning_rate": 0.0017147295742232452, "loss": 0.7168, "step": 49580 }, { "epoch": 14.266398158803222, "grad_norm": 0.9474703669548035, "learning_rate": 0.0017146720368239356, "loss": 0.6928, "step": 49590 }, { "epoch": 14.2692750287687, "grad_norm": 1.0682480335235596, "learning_rate": 0.001714614499424626, "loss": 0.7311, "step": 49600 }, { "epoch": 14.272151898734178, "grad_norm": 1.37903892993927, "learning_rate": 0.0017145569620253165, "loss": 0.8465, "step": 49610 }, { "epoch": 14.275028768699654, "grad_norm": 2.1723337173461914, "learning_rate": 0.0017144994246260069, "loss": 0.7438, "step": 49620 }, { "epoch": 14.277905638665132, "grad_norm": 1.4797824621200562, "learning_rate": 0.0017144418872266974, "loss": 0.6911, "step": 49630 }, { "epoch": 14.28078250863061, "grad_norm": 1.345395565032959, "learning_rate": 0.001714384349827388, "loss": 0.7118, "step": 49640 }, { "epoch": 14.283659378596088, "grad_norm": 0.9148531556129456, "learning_rate": 0.0017143268124280781, "loss": 0.6142, "step": 49650 }, { "epoch": 14.286536248561564, "grad_norm": 2.0008771419525146, "learning_rate": 0.0017142692750287687, "loss": 0.6886, "step": 49660 }, { "epoch": 14.289413118527042, "grad_norm": 2.1882545948028564, "learning_rate": 0.0017142117376294592, "loss": 0.8875, "step": 49670 }, { "epoch": 14.29228998849252, "grad_norm": 1.5001322031021118, "learning_rate": 0.0017141542002301496, "loss": 0.9065, "step": 49680 }, { "epoch": 14.295166858457998, "grad_norm": 2.3356127738952637, "learning_rate": 0.0017140966628308402, "loss": 0.7902, "step": 49690 }, { "epoch": 14.298043728423476, "grad_norm": 1.6000597476959229, "learning_rate": 0.0017140391254315307, "loss": 0.7004, "step": 49700 }, { "epoch": 14.300920598388952, "grad_norm": 0.9948853850364685, "learning_rate": 0.0017139815880322208, "loss": 0.632, "step": 49710 }, { "epoch": 14.30379746835443, "grad_norm": 1.4311281442642212, "learning_rate": 0.0017139240506329114, "loss": 0.8014, "step": 49720 }, { "epoch": 14.306674338319908, "grad_norm": 0.6768298149108887, "learning_rate": 0.001713866513233602, "loss": 0.6638, "step": 49730 }, { "epoch": 14.309551208285386, "grad_norm": 1.5462831258773804, "learning_rate": 0.0017138089758342923, "loss": 0.7378, "step": 49740 }, { "epoch": 14.312428078250862, "grad_norm": 0.9139561057090759, "learning_rate": 0.0017137514384349829, "loss": 0.6787, "step": 49750 }, { "epoch": 14.31530494821634, "grad_norm": 0.684279203414917, "learning_rate": 0.0017136939010356732, "loss": 0.8159, "step": 49760 }, { "epoch": 14.318181818181818, "grad_norm": 1.2646175622940063, "learning_rate": 0.0017136363636363636, "loss": 0.6704, "step": 49770 }, { "epoch": 14.321058688147296, "grad_norm": 2.086862325668335, "learning_rate": 0.0017135788262370541, "loss": 0.7202, "step": 49780 }, { "epoch": 14.323935558112773, "grad_norm": 0.8239697217941284, "learning_rate": 0.0017135212888377445, "loss": 0.8359, "step": 49790 }, { "epoch": 14.32681242807825, "grad_norm": 1.4470922946929932, "learning_rate": 0.001713463751438435, "loss": 0.7274, "step": 49800 }, { "epoch": 14.329689298043728, "grad_norm": 1.2208408117294312, "learning_rate": 0.0017134062140391256, "loss": 0.7121, "step": 49810 }, { "epoch": 14.332566168009206, "grad_norm": 1.9641402959823608, "learning_rate": 0.001713348676639816, "loss": 0.9351, "step": 49820 }, { "epoch": 14.335443037974684, "grad_norm": 1.4680179357528687, "learning_rate": 0.0017132911392405063, "loss": 0.8465, "step": 49830 }, { "epoch": 14.33831990794016, "grad_norm": 1.1734052896499634, "learning_rate": 0.0017132336018411969, "loss": 0.7187, "step": 49840 }, { "epoch": 14.341196777905639, "grad_norm": 2.0957159996032715, "learning_rate": 0.0017131760644418872, "loss": 0.7766, "step": 49850 }, { "epoch": 14.344073647871117, "grad_norm": 0.8755671977996826, "learning_rate": 0.0017131185270425778, "loss": 0.5925, "step": 49860 }, { "epoch": 14.346950517836595, "grad_norm": 1.2801762819290161, "learning_rate": 0.0017130609896432681, "loss": 0.7022, "step": 49870 }, { "epoch": 14.34982738780207, "grad_norm": 1.481343150138855, "learning_rate": 0.0017130034522439587, "loss": 0.9027, "step": 49880 }, { "epoch": 14.352704257767549, "grad_norm": 1.597475528717041, "learning_rate": 0.001712945914844649, "loss": 0.6222, "step": 49890 }, { "epoch": 14.355581127733027, "grad_norm": 1.8030122518539429, "learning_rate": 0.0017128883774453394, "loss": 0.7891, "step": 49900 }, { "epoch": 14.358457997698505, "grad_norm": 1.273919939994812, "learning_rate": 0.00171283084004603, "loss": 0.9164, "step": 49910 }, { "epoch": 14.361334867663981, "grad_norm": 2.2381694316864014, "learning_rate": 0.0017127733026467205, "loss": 0.7972, "step": 49920 }, { "epoch": 14.364211737629459, "grad_norm": 1.6964837312698364, "learning_rate": 0.0017127157652474109, "loss": 0.7785, "step": 49930 }, { "epoch": 14.367088607594937, "grad_norm": 1.5965869426727295, "learning_rate": 0.0017126582278481014, "loss": 0.7799, "step": 49940 }, { "epoch": 14.369965477560415, "grad_norm": 1.0500527620315552, "learning_rate": 0.0017126006904487918, "loss": 0.8566, "step": 49950 }, { "epoch": 14.372842347525891, "grad_norm": 1.0212080478668213, "learning_rate": 0.0017125431530494821, "loss": 0.5896, "step": 49960 }, { "epoch": 14.375719217491369, "grad_norm": 2.482170820236206, "learning_rate": 0.0017124856156501727, "loss": 0.6081, "step": 49970 }, { "epoch": 14.378596087456847, "grad_norm": 1.2845369577407837, "learning_rate": 0.001712428078250863, "loss": 0.6572, "step": 49980 }, { "epoch": 14.381472957422325, "grad_norm": 1.2004855871200562, "learning_rate": 0.0017123705408515536, "loss": 0.827, "step": 49990 }, { "epoch": 14.384349827387801, "grad_norm": 1.375794529914856, "learning_rate": 0.0017123130034522442, "loss": 0.6996, "step": 50000 }, { "epoch": 14.38722669735328, "grad_norm": 1.872154712677002, "learning_rate": 0.0017122554660529343, "loss": 0.9056, "step": 50010 }, { "epoch": 14.390103567318757, "grad_norm": 0.9829708337783813, "learning_rate": 0.0017121979286536249, "loss": 0.7704, "step": 50020 }, { "epoch": 14.392980437284235, "grad_norm": 0.9098554849624634, "learning_rate": 0.0017121403912543154, "loss": 0.9445, "step": 50030 }, { "epoch": 14.395857307249713, "grad_norm": 1.102949857711792, "learning_rate": 0.0017120828538550058, "loss": 0.6728, "step": 50040 }, { "epoch": 14.39873417721519, "grad_norm": 0.8033377528190613, "learning_rate": 0.0017120253164556963, "loss": 0.7094, "step": 50050 }, { "epoch": 14.401611047180667, "grad_norm": 3.0833797454833984, "learning_rate": 0.001711967779056387, "loss": 0.7223, "step": 50060 }, { "epoch": 14.404487917146145, "grad_norm": 1.1850537061691284, "learning_rate": 0.001711910241657077, "loss": 0.7992, "step": 50070 }, { "epoch": 14.407364787111623, "grad_norm": 0.9983052015304565, "learning_rate": 0.0017118527042577676, "loss": 0.6354, "step": 50080 }, { "epoch": 14.4102416570771, "grad_norm": 0.8402858376502991, "learning_rate": 0.001711795166858458, "loss": 0.5696, "step": 50090 }, { "epoch": 14.413118527042577, "grad_norm": 2.5372214317321777, "learning_rate": 0.0017117376294591485, "loss": 0.8269, "step": 50100 }, { "epoch": 14.415995397008055, "grad_norm": 1.3518297672271729, "learning_rate": 0.001711680092059839, "loss": 0.8712, "step": 50110 }, { "epoch": 14.418872266973533, "grad_norm": 1.1090081930160522, "learning_rate": 0.0017116225546605292, "loss": 0.6034, "step": 50120 }, { "epoch": 14.42174913693901, "grad_norm": 1.4079265594482422, "learning_rate": 0.0017115650172612198, "loss": 0.9741, "step": 50130 }, { "epoch": 14.424626006904488, "grad_norm": 1.7525811195373535, "learning_rate": 0.0017115074798619103, "loss": 0.6925, "step": 50140 }, { "epoch": 14.427502876869966, "grad_norm": 1.297353982925415, "learning_rate": 0.0017114499424626007, "loss": 0.7829, "step": 50150 }, { "epoch": 14.430379746835444, "grad_norm": 1.399817705154419, "learning_rate": 0.0017113924050632912, "loss": 0.8841, "step": 50160 }, { "epoch": 14.43325661680092, "grad_norm": 1.2016468048095703, "learning_rate": 0.0017113348676639818, "loss": 0.9109, "step": 50170 }, { "epoch": 14.436133486766398, "grad_norm": 1.434670090675354, "learning_rate": 0.001711277330264672, "loss": 0.7977, "step": 50180 }, { "epoch": 14.439010356731876, "grad_norm": 1.56061851978302, "learning_rate": 0.0017112197928653625, "loss": 0.7868, "step": 50190 }, { "epoch": 14.441887226697354, "grad_norm": 2.001554250717163, "learning_rate": 0.0017111622554660528, "loss": 0.7586, "step": 50200 }, { "epoch": 14.444764096662832, "grad_norm": 1.1836808919906616, "learning_rate": 0.0017111047180667434, "loss": 0.7124, "step": 50210 }, { "epoch": 14.447640966628308, "grad_norm": 0.9580890536308289, "learning_rate": 0.001711047180667434, "loss": 0.7424, "step": 50220 }, { "epoch": 14.450517836593786, "grad_norm": 0.7451808452606201, "learning_rate": 0.0017109896432681243, "loss": 0.823, "step": 50230 }, { "epoch": 14.453394706559264, "grad_norm": 0.877938449382782, "learning_rate": 0.0017109321058688147, "loss": 0.7962, "step": 50240 }, { "epoch": 14.456271576524742, "grad_norm": 2.125248670578003, "learning_rate": 0.0017108745684695052, "loss": 0.622, "step": 50250 }, { "epoch": 14.459148446490218, "grad_norm": 1.3020964860916138, "learning_rate": 0.0017108170310701956, "loss": 0.8849, "step": 50260 }, { "epoch": 14.462025316455696, "grad_norm": 1.2711620330810547, "learning_rate": 0.0017107594936708861, "loss": 0.761, "step": 50270 }, { "epoch": 14.464902186421174, "grad_norm": 1.3585102558135986, "learning_rate": 0.0017107019562715767, "loss": 0.704, "step": 50280 }, { "epoch": 14.467779056386652, "grad_norm": 1.3473021984100342, "learning_rate": 0.001710644418872267, "loss": 0.8239, "step": 50290 }, { "epoch": 14.470655926352128, "grad_norm": 0.7287587523460388, "learning_rate": 0.0017105868814729574, "loss": 0.6812, "step": 50300 }, { "epoch": 14.473532796317606, "grad_norm": 1.4451947212219238, "learning_rate": 0.0017105293440736477, "loss": 0.7117, "step": 50310 }, { "epoch": 14.476409666283084, "grad_norm": 1.7214555740356445, "learning_rate": 0.0017104718066743383, "loss": 0.8046, "step": 50320 }, { "epoch": 14.479286536248562, "grad_norm": 1.3906738758087158, "learning_rate": 0.0017104142692750289, "loss": 0.744, "step": 50330 }, { "epoch": 14.482163406214038, "grad_norm": 1.0029250383377075, "learning_rate": 0.0017103567318757192, "loss": 0.7912, "step": 50340 }, { "epoch": 14.485040276179516, "grad_norm": 1.1291496753692627, "learning_rate": 0.0017102991944764098, "loss": 0.8389, "step": 50350 }, { "epoch": 14.487917146144994, "grad_norm": 1.5362346172332764, "learning_rate": 0.0017102416570771001, "loss": 0.7242, "step": 50360 }, { "epoch": 14.490794016110472, "grad_norm": 2.3858225345611572, "learning_rate": 0.0017101841196777905, "loss": 0.7147, "step": 50370 }, { "epoch": 14.49367088607595, "grad_norm": 1.2675501108169556, "learning_rate": 0.001710126582278481, "loss": 0.9615, "step": 50380 }, { "epoch": 14.496547756041426, "grad_norm": 1.8798249959945679, "learning_rate": 0.0017100690448791716, "loss": 0.6509, "step": 50390 }, { "epoch": 14.499424626006904, "grad_norm": 0.6925789713859558, "learning_rate": 0.001710011507479862, "loss": 0.6671, "step": 50400 }, { "epoch": 14.502301495972382, "grad_norm": 2.069464921951294, "learning_rate": 0.0017099539700805525, "loss": 0.8535, "step": 50410 }, { "epoch": 14.50517836593786, "grad_norm": 1.7499221563339233, "learning_rate": 0.0017098964326812429, "loss": 0.9181, "step": 50420 }, { "epoch": 14.508055235903337, "grad_norm": 1.4365447759628296, "learning_rate": 0.0017098388952819332, "loss": 0.7062, "step": 50430 }, { "epoch": 14.510932105868815, "grad_norm": 0.792146623134613, "learning_rate": 0.0017097813578826238, "loss": 0.8497, "step": 50440 }, { "epoch": 14.513808975834293, "grad_norm": 1.1695572137832642, "learning_rate": 0.0017097238204833141, "loss": 0.6868, "step": 50450 }, { "epoch": 14.51668584579977, "grad_norm": 1.7922154664993286, "learning_rate": 0.0017096662830840047, "loss": 0.7581, "step": 50460 }, { "epoch": 14.519562715765247, "grad_norm": 1.4713804721832275, "learning_rate": 0.0017096087456846952, "loss": 0.8162, "step": 50470 }, { "epoch": 14.522439585730725, "grad_norm": 0.9742769598960876, "learning_rate": 0.0017095512082853854, "loss": 0.7579, "step": 50480 }, { "epoch": 14.525316455696203, "grad_norm": 2.448713779449463, "learning_rate": 0.001709493670886076, "loss": 0.7082, "step": 50490 }, { "epoch": 14.52819332566168, "grad_norm": 1.4413917064666748, "learning_rate": 0.0017094361334867665, "loss": 0.6551, "step": 50500 }, { "epoch": 14.531070195627157, "grad_norm": 1.4349724054336548, "learning_rate": 0.0017093785960874569, "loss": 0.8042, "step": 50510 }, { "epoch": 14.533947065592635, "grad_norm": 0.671746015548706, "learning_rate": 0.0017093210586881474, "loss": 0.6765, "step": 50520 }, { "epoch": 14.536823935558113, "grad_norm": 1.4421021938323975, "learning_rate": 0.001709263521288838, "loss": 0.754, "step": 50530 }, { "epoch": 14.53970080552359, "grad_norm": 1.6449283361434937, "learning_rate": 0.0017092059838895281, "loss": 0.7339, "step": 50540 }, { "epoch": 14.542577675489067, "grad_norm": 2.146446943283081, "learning_rate": 0.0017091484464902187, "loss": 0.9798, "step": 50550 }, { "epoch": 14.545454545454545, "grad_norm": 1.8801368474960327, "learning_rate": 0.001709090909090909, "loss": 0.6644, "step": 50560 }, { "epoch": 14.548331415420023, "grad_norm": 1.732142448425293, "learning_rate": 0.0017090333716915996, "loss": 0.8248, "step": 50570 }, { "epoch": 14.551208285385501, "grad_norm": 1.8650811910629272, "learning_rate": 0.0017089758342922901, "loss": 0.7399, "step": 50580 }, { "epoch": 14.554085155350979, "grad_norm": 2.0142807960510254, "learning_rate": 0.0017089182968929805, "loss": 0.8176, "step": 50590 }, { "epoch": 14.556962025316455, "grad_norm": 1.1185312271118164, "learning_rate": 0.0017088607594936708, "loss": 1.007, "step": 50600 }, { "epoch": 14.559838895281933, "grad_norm": 1.4315687417984009, "learning_rate": 0.0017088032220943614, "loss": 0.841, "step": 50610 }, { "epoch": 14.562715765247411, "grad_norm": 1.2363059520721436, "learning_rate": 0.0017087456846950518, "loss": 0.6888, "step": 50620 }, { "epoch": 14.565592635212889, "grad_norm": 1.692244052886963, "learning_rate": 0.0017086881472957423, "loss": 0.8674, "step": 50630 }, { "epoch": 14.568469505178365, "grad_norm": 1.3397393226623535, "learning_rate": 0.0017086306098964329, "loss": 0.7549, "step": 50640 }, { "epoch": 14.571346375143843, "grad_norm": 0.8630361557006836, "learning_rate": 0.0017085730724971232, "loss": 0.6666, "step": 50650 }, { "epoch": 14.574223245109321, "grad_norm": 1.0637460947036743, "learning_rate": 0.0017085155350978136, "loss": 0.6709, "step": 50660 }, { "epoch": 14.5771001150748, "grad_norm": 1.259351372718811, "learning_rate": 0.001708457997698504, "loss": 0.8271, "step": 50670 }, { "epoch": 14.579976985040275, "grad_norm": 1.0612382888793945, "learning_rate": 0.0017084004602991945, "loss": 0.6903, "step": 50680 }, { "epoch": 14.582853855005753, "grad_norm": 1.6057318449020386, "learning_rate": 0.001708342922899885, "loss": 0.769, "step": 50690 }, { "epoch": 14.585730724971231, "grad_norm": 2.1577765941619873, "learning_rate": 0.0017082853855005754, "loss": 0.8407, "step": 50700 }, { "epoch": 14.58860759493671, "grad_norm": 1.434425950050354, "learning_rate": 0.001708227848101266, "loss": 0.6931, "step": 50710 }, { "epoch": 14.591484464902187, "grad_norm": 1.7713072299957275, "learning_rate": 0.0017081703107019563, "loss": 0.8237, "step": 50720 }, { "epoch": 14.594361334867664, "grad_norm": 1.3502601385116577, "learning_rate": 0.0017081127733026467, "loss": 0.7257, "step": 50730 }, { "epoch": 14.597238204833141, "grad_norm": 1.0595778226852417, "learning_rate": 0.0017080552359033372, "loss": 0.7464, "step": 50740 }, { "epoch": 14.60011507479862, "grad_norm": 1.2663577795028687, "learning_rate": 0.0017079976985040278, "loss": 0.6194, "step": 50750 }, { "epoch": 14.602991944764097, "grad_norm": 0.9209491014480591, "learning_rate": 0.0017079401611047181, "loss": 0.7852, "step": 50760 }, { "epoch": 14.605868814729574, "grad_norm": 1.166765570640564, "learning_rate": 0.0017078826237054087, "loss": 0.7377, "step": 50770 }, { "epoch": 14.608745684695052, "grad_norm": 1.492089867591858, "learning_rate": 0.0017078250863060988, "loss": 0.7561, "step": 50780 }, { "epoch": 14.61162255466053, "grad_norm": 0.7369781136512756, "learning_rate": 0.0017077675489067894, "loss": 0.7479, "step": 50790 }, { "epoch": 14.614499424626008, "grad_norm": 2.3252456188201904, "learning_rate": 0.00170771001150748, "loss": 0.8529, "step": 50800 }, { "epoch": 14.617376294591484, "grad_norm": 1.4261034727096558, "learning_rate": 0.0017076524741081703, "loss": 0.9311, "step": 50810 }, { "epoch": 14.620253164556962, "grad_norm": 2.1364212036132812, "learning_rate": 0.0017075949367088609, "loss": 0.7835, "step": 50820 }, { "epoch": 14.62313003452244, "grad_norm": 0.9804750680923462, "learning_rate": 0.0017075373993095514, "loss": 0.7043, "step": 50830 }, { "epoch": 14.626006904487918, "grad_norm": 1.3477896451950073, "learning_rate": 0.0017074798619102416, "loss": 0.7567, "step": 50840 }, { "epoch": 14.628883774453394, "grad_norm": 0.8118675351142883, "learning_rate": 0.0017074223245109321, "loss": 0.8007, "step": 50850 }, { "epoch": 14.631760644418872, "grad_norm": 0.7827019095420837, "learning_rate": 0.0017073647871116227, "loss": 0.6573, "step": 50860 }, { "epoch": 14.63463751438435, "grad_norm": 1.9135297536849976, "learning_rate": 0.001707307249712313, "loss": 0.771, "step": 50870 }, { "epoch": 14.637514384349828, "grad_norm": 1.7248945236206055, "learning_rate": 0.0017072497123130036, "loss": 0.7768, "step": 50880 }, { "epoch": 14.640391254315304, "grad_norm": 0.9033364057540894, "learning_rate": 0.0017071921749136937, "loss": 0.796, "step": 50890 }, { "epoch": 14.643268124280782, "grad_norm": 1.45908784866333, "learning_rate": 0.0017071346375143843, "loss": 0.763, "step": 50900 }, { "epoch": 14.64614499424626, "grad_norm": 1.3680082559585571, "learning_rate": 0.0017070771001150749, "loss": 0.768, "step": 50910 }, { "epoch": 14.649021864211738, "grad_norm": 1.3566985130310059, "learning_rate": 0.0017070195627157652, "loss": 0.8674, "step": 50920 }, { "epoch": 14.651898734177216, "grad_norm": 2.697570323944092, "learning_rate": 0.0017069620253164558, "loss": 0.7324, "step": 50930 }, { "epoch": 14.654775604142692, "grad_norm": 2.169613838195801, "learning_rate": 0.0017069044879171463, "loss": 0.7651, "step": 50940 }, { "epoch": 14.65765247410817, "grad_norm": 2.0616695880889893, "learning_rate": 0.0017068469505178365, "loss": 0.9092, "step": 50950 }, { "epoch": 14.660529344073648, "grad_norm": 1.4135551452636719, "learning_rate": 0.001706789413118527, "loss": 0.7052, "step": 50960 }, { "epoch": 14.663406214039126, "grad_norm": 1.1339809894561768, "learning_rate": 0.0017067318757192176, "loss": 0.7646, "step": 50970 }, { "epoch": 14.666283084004602, "grad_norm": 2.1253371238708496, "learning_rate": 0.001706674338319908, "loss": 0.9169, "step": 50980 }, { "epoch": 14.66915995397008, "grad_norm": 0.9366138577461243, "learning_rate": 0.0017066168009205985, "loss": 0.8631, "step": 50990 }, { "epoch": 14.672036823935558, "grad_norm": 1.0063681602478027, "learning_rate": 0.0017065592635212888, "loss": 0.6933, "step": 51000 }, { "epoch": 14.674913693901036, "grad_norm": 2.043708562850952, "learning_rate": 0.0017065017261219792, "loss": 0.6011, "step": 51010 }, { "epoch": 14.677790563866512, "grad_norm": 1.4451603889465332, "learning_rate": 0.0017064441887226698, "loss": 0.787, "step": 51020 }, { "epoch": 14.68066743383199, "grad_norm": 1.5498435497283936, "learning_rate": 0.00170638665132336, "loss": 0.8404, "step": 51030 }, { "epoch": 14.683544303797468, "grad_norm": 1.4718934297561646, "learning_rate": 0.0017063291139240507, "loss": 0.7605, "step": 51040 }, { "epoch": 14.686421173762946, "grad_norm": 1.4145747423171997, "learning_rate": 0.0017062715765247412, "loss": 0.7195, "step": 51050 }, { "epoch": 14.689298043728424, "grad_norm": 0.9548234939575195, "learning_rate": 0.0017062140391254316, "loss": 0.7714, "step": 51060 }, { "epoch": 14.6921749136939, "grad_norm": 0.8958413600921631, "learning_rate": 0.001706156501726122, "loss": 0.8458, "step": 51070 }, { "epoch": 14.695051783659379, "grad_norm": 1.1451777219772339, "learning_rate": 0.0017060989643268125, "loss": 0.6611, "step": 51080 }, { "epoch": 14.697928653624857, "grad_norm": 1.146896243095398, "learning_rate": 0.0017060414269275028, "loss": 0.7572, "step": 51090 }, { "epoch": 14.700805523590335, "grad_norm": 1.284846544265747, "learning_rate": 0.0017059838895281934, "loss": 0.7101, "step": 51100 }, { "epoch": 14.70368239355581, "grad_norm": 1.3950855731964111, "learning_rate": 0.001705926352128884, "loss": 0.7511, "step": 51110 }, { "epoch": 14.706559263521289, "grad_norm": 1.23857581615448, "learning_rate": 0.0017058688147295743, "loss": 0.6974, "step": 51120 }, { "epoch": 14.709436133486767, "grad_norm": 1.4119813442230225, "learning_rate": 0.0017058112773302647, "loss": 0.8719, "step": 51130 }, { "epoch": 14.712313003452245, "grad_norm": 1.4467332363128662, "learning_rate": 0.001705753739930955, "loss": 0.9058, "step": 51140 }, { "epoch": 14.715189873417721, "grad_norm": 1.128799319267273, "learning_rate": 0.0017056962025316456, "loss": 0.8832, "step": 51150 }, { "epoch": 14.718066743383199, "grad_norm": 0.9263936281204224, "learning_rate": 0.0017056386651323361, "loss": 0.8125, "step": 51160 }, { "epoch": 14.720943613348677, "grad_norm": 0.7207299470901489, "learning_rate": 0.0017055811277330265, "loss": 0.9843, "step": 51170 }, { "epoch": 14.723820483314155, "grad_norm": 2.368314027786255, "learning_rate": 0.001705523590333717, "loss": 0.8027, "step": 51180 }, { "epoch": 14.726697353279631, "grad_norm": 0.7841061949729919, "learning_rate": 0.0017054660529344074, "loss": 0.9442, "step": 51190 }, { "epoch": 14.729574223245109, "grad_norm": 1.1431434154510498, "learning_rate": 0.0017054085155350977, "loss": 0.6693, "step": 51200 }, { "epoch": 14.732451093210587, "grad_norm": 1.498537302017212, "learning_rate": 0.0017053509781357883, "loss": 0.6748, "step": 51210 }, { "epoch": 14.735327963176065, "grad_norm": 1.6656912565231323, "learning_rate": 0.0017052934407364789, "loss": 0.7694, "step": 51220 }, { "epoch": 14.738204833141541, "grad_norm": 1.7472432851791382, "learning_rate": 0.0017052359033371692, "loss": 0.8948, "step": 51230 }, { "epoch": 14.74108170310702, "grad_norm": 1.7628810405731201, "learning_rate": 0.0017051783659378598, "loss": 0.7502, "step": 51240 }, { "epoch": 14.743958573072497, "grad_norm": 1.5100162029266357, "learning_rate": 0.00170512082853855, "loss": 0.8428, "step": 51250 }, { "epoch": 14.746835443037975, "grad_norm": 2.1229312419891357, "learning_rate": 0.0017050632911392405, "loss": 0.7122, "step": 51260 }, { "epoch": 14.749712313003453, "grad_norm": 2.5209949016571045, "learning_rate": 0.001705005753739931, "loss": 0.9077, "step": 51270 }, { "epoch": 14.75258918296893, "grad_norm": 0.936468243598938, "learning_rate": 0.0017049482163406214, "loss": 0.8288, "step": 51280 }, { "epoch": 14.755466052934407, "grad_norm": 1.6386691331863403, "learning_rate": 0.001704890678941312, "loss": 1.0159, "step": 51290 }, { "epoch": 14.758342922899885, "grad_norm": 1.4565660953521729, "learning_rate": 0.0017048331415420025, "loss": 0.7727, "step": 51300 }, { "epoch": 14.761219792865363, "grad_norm": 1.9589591026306152, "learning_rate": 0.0017047756041426926, "loss": 0.843, "step": 51310 }, { "epoch": 14.76409666283084, "grad_norm": 3.9351541996002197, "learning_rate": 0.0017047180667433832, "loss": 0.8283, "step": 51320 }, { "epoch": 14.766973532796317, "grad_norm": 1.9796128273010254, "learning_rate": 0.0017046605293440738, "loss": 1.2887, "step": 51330 }, { "epoch": 14.769850402761795, "grad_norm": 1.4656916856765747, "learning_rate": 0.0017046029919447641, "loss": 0.8214, "step": 51340 }, { "epoch": 14.772727272727273, "grad_norm": 2.0558557510375977, "learning_rate": 0.0017045454545454547, "loss": 0.8909, "step": 51350 }, { "epoch": 14.77560414269275, "grad_norm": 1.6597923040390015, "learning_rate": 0.001704487917146145, "loss": 0.8385, "step": 51360 }, { "epoch": 14.778481012658228, "grad_norm": 0.937873363494873, "learning_rate": 0.0017044303797468354, "loss": 0.8852, "step": 51370 }, { "epoch": 14.781357882623706, "grad_norm": 2.121023178100586, "learning_rate": 0.001704372842347526, "loss": 0.8798, "step": 51380 }, { "epoch": 14.784234752589184, "grad_norm": 1.4405618906021118, "learning_rate": 0.0017043153049482163, "loss": 0.7951, "step": 51390 }, { "epoch": 14.78711162255466, "grad_norm": 1.5065232515335083, "learning_rate": 0.0017042577675489068, "loss": 0.6926, "step": 51400 }, { "epoch": 14.789988492520138, "grad_norm": 1.0401480197906494, "learning_rate": 0.0017042002301495974, "loss": 0.7096, "step": 51410 }, { "epoch": 14.792865362485616, "grad_norm": 1.4704958200454712, "learning_rate": 0.0017041426927502878, "loss": 0.7163, "step": 51420 }, { "epoch": 14.795742232451094, "grad_norm": 1.0713562965393066, "learning_rate": 0.001704085155350978, "loss": 0.9743, "step": 51430 }, { "epoch": 14.79861910241657, "grad_norm": 1.4838943481445312, "learning_rate": 0.0017040276179516687, "loss": 0.8938, "step": 51440 }, { "epoch": 14.801495972382048, "grad_norm": 1.0952439308166504, "learning_rate": 0.001703970080552359, "loss": 0.9107, "step": 51450 }, { "epoch": 14.804372842347526, "grad_norm": 1.3124470710754395, "learning_rate": 0.0017039125431530496, "loss": 0.717, "step": 51460 }, { "epoch": 14.807249712313004, "grad_norm": 1.430262565612793, "learning_rate": 0.00170385500575374, "loss": 0.6453, "step": 51470 }, { "epoch": 14.810126582278482, "grad_norm": 1.9532527923583984, "learning_rate": 0.0017037974683544305, "loss": 0.8267, "step": 51480 }, { "epoch": 14.813003452243958, "grad_norm": 0.9371190071105957, "learning_rate": 0.0017037399309551208, "loss": 0.7387, "step": 51490 }, { "epoch": 14.815880322209436, "grad_norm": 1.7190502882003784, "learning_rate": 0.0017036823935558112, "loss": 0.7537, "step": 51500 }, { "epoch": 14.818757192174914, "grad_norm": 0.8299685120582581, "learning_rate": 0.0017036248561565018, "loss": 0.8607, "step": 51510 }, { "epoch": 14.821634062140392, "grad_norm": 2.2709884643554688, "learning_rate": 0.0017035673187571923, "loss": 0.6907, "step": 51520 }, { "epoch": 14.824510932105868, "grad_norm": 2.998769760131836, "learning_rate": 0.0017035097813578827, "loss": 0.8846, "step": 51530 }, { "epoch": 14.827387802071346, "grad_norm": 2.1858158111572266, "learning_rate": 0.0017034522439585732, "loss": 0.7579, "step": 51540 }, { "epoch": 14.830264672036824, "grad_norm": 1.8084447383880615, "learning_rate": 0.0017033947065592636, "loss": 0.8398, "step": 51550 }, { "epoch": 14.833141542002302, "grad_norm": 1.5596327781677246, "learning_rate": 0.001703337169159954, "loss": 0.9375, "step": 51560 }, { "epoch": 14.836018411967778, "grad_norm": 1.5239996910095215, "learning_rate": 0.0017032796317606445, "loss": 0.8532, "step": 51570 }, { "epoch": 14.838895281933256, "grad_norm": 1.2600529193878174, "learning_rate": 0.0017032220943613348, "loss": 0.7445, "step": 51580 }, { "epoch": 14.841772151898734, "grad_norm": 0.8110621571540833, "learning_rate": 0.0017031645569620254, "loss": 0.6039, "step": 51590 }, { "epoch": 14.844649021864212, "grad_norm": 1.094595193862915, "learning_rate": 0.001703107019562716, "loss": 0.7051, "step": 51600 }, { "epoch": 14.84752589182969, "grad_norm": 1.5396615266799927, "learning_rate": 0.001703049482163406, "loss": 0.6507, "step": 51610 }, { "epoch": 14.850402761795166, "grad_norm": 1.6316962242126465, "learning_rate": 0.0017029919447640967, "loss": 0.7389, "step": 51620 }, { "epoch": 14.853279631760644, "grad_norm": 1.423295021057129, "learning_rate": 0.0017029344073647872, "loss": 0.9316, "step": 51630 }, { "epoch": 14.856156501726122, "grad_norm": 1.6051056385040283, "learning_rate": 0.0017028768699654776, "loss": 0.8011, "step": 51640 }, { "epoch": 14.8590333716916, "grad_norm": 1.6051632165908813, "learning_rate": 0.0017028193325661681, "loss": 0.7451, "step": 51650 }, { "epoch": 14.861910241657077, "grad_norm": 1.0298391580581665, "learning_rate": 0.0017027617951668587, "loss": 0.6196, "step": 51660 }, { "epoch": 14.864787111622555, "grad_norm": 1.4521385431289673, "learning_rate": 0.0017027042577675488, "loss": 0.9053, "step": 51670 }, { "epoch": 14.867663981588032, "grad_norm": 4.238468647003174, "learning_rate": 0.0017026467203682394, "loss": 0.7958, "step": 51680 }, { "epoch": 14.87054085155351, "grad_norm": 0.9676786661148071, "learning_rate": 0.00170258918296893, "loss": 0.711, "step": 51690 }, { "epoch": 14.873417721518987, "grad_norm": 1.3809404373168945, "learning_rate": 0.0017025316455696203, "loss": 0.7281, "step": 51700 }, { "epoch": 14.876294591484465, "grad_norm": 1.1902395486831665, "learning_rate": 0.0017024741081703109, "loss": 0.8354, "step": 51710 }, { "epoch": 14.879171461449943, "grad_norm": 1.4517228603363037, "learning_rate": 0.001702416570771001, "loss": 0.7835, "step": 51720 }, { "epoch": 14.88204833141542, "grad_norm": 1.1184602975845337, "learning_rate": 0.0017023590333716916, "loss": 0.714, "step": 51730 }, { "epoch": 14.884925201380897, "grad_norm": 1.9573578834533691, "learning_rate": 0.0017023014959723821, "loss": 0.7802, "step": 51740 }, { "epoch": 14.887802071346375, "grad_norm": 1.8078997135162354, "learning_rate": 0.0017022439585730725, "loss": 0.7617, "step": 51750 }, { "epoch": 14.890678941311853, "grad_norm": 1.1039862632751465, "learning_rate": 0.001702186421173763, "loss": 0.7658, "step": 51760 }, { "epoch": 14.89355581127733, "grad_norm": 1.2770073413848877, "learning_rate": 0.0017021288837744536, "loss": 0.7033, "step": 51770 }, { "epoch": 14.896432681242807, "grad_norm": 1.4406040906906128, "learning_rate": 0.0017020713463751437, "loss": 0.8967, "step": 51780 }, { "epoch": 14.899309551208285, "grad_norm": 1.9713555574417114, "learning_rate": 0.0017020138089758343, "loss": 0.9045, "step": 51790 }, { "epoch": 14.902186421173763, "grad_norm": 1.1747057437896729, "learning_rate": 0.0017019562715765249, "loss": 0.698, "step": 51800 }, { "epoch": 14.905063291139241, "grad_norm": 2.13315486907959, "learning_rate": 0.0017018987341772152, "loss": 0.9867, "step": 51810 }, { "epoch": 14.907940161104719, "grad_norm": 1.6079550981521606, "learning_rate": 0.0017018411967779058, "loss": 0.891, "step": 51820 }, { "epoch": 14.910817031070195, "grad_norm": 1.6554009914398193, "learning_rate": 0.0017017836593785961, "loss": 0.7507, "step": 51830 }, { "epoch": 14.913693901035673, "grad_norm": 1.8535045385360718, "learning_rate": 0.0017017261219792865, "loss": 0.6123, "step": 51840 }, { "epoch": 14.916570771001151, "grad_norm": 2.0833377838134766, "learning_rate": 0.001701668584579977, "loss": 0.778, "step": 51850 }, { "epoch": 14.919447640966629, "grad_norm": 1.5815566778182983, "learning_rate": 0.0017016110471806674, "loss": 0.7736, "step": 51860 }, { "epoch": 14.922324510932105, "grad_norm": 1.4073420763015747, "learning_rate": 0.001701553509781358, "loss": 0.6873, "step": 51870 }, { "epoch": 14.925201380897583, "grad_norm": 1.3548996448516846, "learning_rate": 0.0017014959723820485, "loss": 0.7526, "step": 51880 }, { "epoch": 14.928078250863061, "grad_norm": 1.100771188735962, "learning_rate": 0.0017014384349827388, "loss": 0.7428, "step": 51890 }, { "epoch": 14.93095512082854, "grad_norm": 0.7266016006469727, "learning_rate": 0.0017013808975834292, "loss": 0.8121, "step": 51900 }, { "epoch": 14.933831990794015, "grad_norm": 1.3554757833480835, "learning_rate": 0.0017013233601841198, "loss": 0.8124, "step": 51910 }, { "epoch": 14.936708860759493, "grad_norm": 1.4201674461364746, "learning_rate": 0.00170126582278481, "loss": 0.9431, "step": 51920 }, { "epoch": 14.939585730724971, "grad_norm": 1.1627576351165771, "learning_rate": 0.0017012082853855007, "loss": 0.7238, "step": 51930 }, { "epoch": 14.94246260069045, "grad_norm": 1.1665880680084229, "learning_rate": 0.001701150747986191, "loss": 0.7914, "step": 51940 }, { "epoch": 14.945339470655927, "grad_norm": 1.3083043098449707, "learning_rate": 0.0017010932105868816, "loss": 0.9252, "step": 51950 }, { "epoch": 14.948216340621403, "grad_norm": 1.1037620306015015, "learning_rate": 0.001701035673187572, "loss": 0.977, "step": 51960 }, { "epoch": 14.951093210586881, "grad_norm": 1.1429318189620972, "learning_rate": 0.0017009781357882623, "loss": 0.8243, "step": 51970 }, { "epoch": 14.95397008055236, "grad_norm": 1.4465725421905518, "learning_rate": 0.0017009205983889528, "loss": 0.8778, "step": 51980 }, { "epoch": 14.956846950517837, "grad_norm": 3.161163330078125, "learning_rate": 0.0017008630609896434, "loss": 0.7086, "step": 51990 }, { "epoch": 14.959723820483314, "grad_norm": 1.2336899042129517, "learning_rate": 0.0017008055235903337, "loss": 0.7088, "step": 52000 }, { "epoch": 14.962600690448792, "grad_norm": 1.0469716787338257, "learning_rate": 0.0017007479861910243, "loss": 0.6379, "step": 52010 }, { "epoch": 14.96547756041427, "grad_norm": 1.0860991477966309, "learning_rate": 0.0017006904487917147, "loss": 0.6604, "step": 52020 }, { "epoch": 14.968354430379748, "grad_norm": 1.443664312362671, "learning_rate": 0.001700632911392405, "loss": 0.8883, "step": 52030 }, { "epoch": 14.971231300345224, "grad_norm": 0.9865883588790894, "learning_rate": 0.0017005753739930956, "loss": 0.8081, "step": 52040 }, { "epoch": 14.974108170310702, "grad_norm": 1.2638343572616577, "learning_rate": 0.001700517836593786, "loss": 0.8756, "step": 52050 }, { "epoch": 14.97698504027618, "grad_norm": 0.9357279539108276, "learning_rate": 0.0017004602991944765, "loss": 0.8471, "step": 52060 }, { "epoch": 14.979861910241658, "grad_norm": 3.5635581016540527, "learning_rate": 0.001700402761795167, "loss": 0.7468, "step": 52070 }, { "epoch": 14.982738780207134, "grad_norm": 0.974120020866394, "learning_rate": 0.0017003452243958572, "loss": 0.7182, "step": 52080 }, { "epoch": 14.985615650172612, "grad_norm": 1.4687730073928833, "learning_rate": 0.0017002876869965477, "loss": 0.7408, "step": 52090 }, { "epoch": 14.98849252013809, "grad_norm": 1.1242616176605225, "learning_rate": 0.0017002301495972383, "loss": 0.7311, "step": 52100 }, { "epoch": 14.991369390103568, "grad_norm": 1.8798552751541138, "learning_rate": 0.0017001726121979286, "loss": 0.7543, "step": 52110 }, { "epoch": 14.994246260069044, "grad_norm": 1.2585479021072388, "learning_rate": 0.0017001150747986192, "loss": 0.6713, "step": 52120 }, { "epoch": 14.997123130034522, "grad_norm": 1.8473964929580688, "learning_rate": 0.0017000575373993098, "loss": 0.6527, "step": 52130 }, { "epoch": 15.0, "grad_norm": 1.7266873121261597, "learning_rate": 0.0017, "loss": 0.7962, "step": 52140 }, { "epoch": 15.002876869965478, "grad_norm": 0.7192155122756958, "learning_rate": 0.0016999424626006905, "loss": 0.599, "step": 52150 }, { "epoch": 15.005753739930956, "grad_norm": 1.113778829574585, "learning_rate": 0.0016998849252013808, "loss": 0.6843, "step": 52160 }, { "epoch": 15.008630609896432, "grad_norm": 1.345284104347229, "learning_rate": 0.0016998273878020714, "loss": 0.8126, "step": 52170 }, { "epoch": 15.01150747986191, "grad_norm": 1.6294482946395874, "learning_rate": 0.001699769850402762, "loss": 0.6338, "step": 52180 }, { "epoch": 15.014384349827388, "grad_norm": 2.4944815635681152, "learning_rate": 0.0016997123130034523, "loss": 0.611, "step": 52190 }, { "epoch": 15.017261219792866, "grad_norm": 1.1206700801849365, "learning_rate": 0.0016996547756041426, "loss": 0.8201, "step": 52200 }, { "epoch": 15.020138089758342, "grad_norm": 1.5284229516983032, "learning_rate": 0.0016995972382048332, "loss": 0.7574, "step": 52210 }, { "epoch": 15.02301495972382, "grad_norm": 1.2678165435791016, "learning_rate": 0.0016995397008055235, "loss": 0.7402, "step": 52220 }, { "epoch": 15.025891829689298, "grad_norm": 1.5119229555130005, "learning_rate": 0.0016994821634062141, "loss": 0.8341, "step": 52230 }, { "epoch": 15.028768699654776, "grad_norm": 0.8145226240158081, "learning_rate": 0.0016994246260069047, "loss": 0.5217, "step": 52240 }, { "epoch": 15.031645569620252, "grad_norm": 0.9961361885070801, "learning_rate": 0.001699367088607595, "loss": 0.8023, "step": 52250 }, { "epoch": 15.03452243958573, "grad_norm": 1.3212428092956543, "learning_rate": 0.0016993095512082854, "loss": 0.6447, "step": 52260 }, { "epoch": 15.037399309551208, "grad_norm": 0.9820305109024048, "learning_rate": 0.0016992520138089757, "loss": 0.7624, "step": 52270 }, { "epoch": 15.040276179516686, "grad_norm": 2.020662307739258, "learning_rate": 0.0016991944764096663, "loss": 0.7345, "step": 52280 }, { "epoch": 15.043153049482163, "grad_norm": 1.7686591148376465, "learning_rate": 0.0016991369390103568, "loss": 0.8418, "step": 52290 }, { "epoch": 15.04602991944764, "grad_norm": 1.0680391788482666, "learning_rate": 0.0016990794016110472, "loss": 0.7164, "step": 52300 }, { "epoch": 15.048906789413119, "grad_norm": 1.1824861764907837, "learning_rate": 0.0016990218642117378, "loss": 0.6758, "step": 52310 }, { "epoch": 15.051783659378597, "grad_norm": 1.6018062829971313, "learning_rate": 0.001698964326812428, "loss": 0.6561, "step": 52320 }, { "epoch": 15.054660529344075, "grad_norm": 1.134976863861084, "learning_rate": 0.0016989067894131185, "loss": 0.8043, "step": 52330 }, { "epoch": 15.05753739930955, "grad_norm": 1.0009092092514038, "learning_rate": 0.001698849252013809, "loss": 0.6201, "step": 52340 }, { "epoch": 15.060414269275029, "grad_norm": 1.5644153356552124, "learning_rate": 0.0016987917146144996, "loss": 0.8948, "step": 52350 }, { "epoch": 15.063291139240507, "grad_norm": 0.7077837586402893, "learning_rate": 0.00169873417721519, "loss": 0.8085, "step": 52360 }, { "epoch": 15.066168009205985, "grad_norm": 2.8405075073242188, "learning_rate": 0.0016986766398158805, "loss": 0.832, "step": 52370 }, { "epoch": 15.06904487917146, "grad_norm": 1.6174918413162231, "learning_rate": 0.0016986191024165708, "loss": 0.7399, "step": 52380 }, { "epoch": 15.071921749136939, "grad_norm": 1.4303685426712036, "learning_rate": 0.0016985615650172612, "loss": 0.734, "step": 52390 }, { "epoch": 15.074798619102417, "grad_norm": 2.723723888397217, "learning_rate": 0.0016985040276179517, "loss": 0.7063, "step": 52400 }, { "epoch": 15.077675489067895, "grad_norm": 2.7746479511260986, "learning_rate": 0.001698446490218642, "loss": 0.8889, "step": 52410 }, { "epoch": 15.080552359033371, "grad_norm": 1.77993643283844, "learning_rate": 0.0016983889528193327, "loss": 0.707, "step": 52420 }, { "epoch": 15.083429228998849, "grad_norm": 1.0632556676864624, "learning_rate": 0.0016983314154200232, "loss": 0.6456, "step": 52430 }, { "epoch": 15.086306098964327, "grad_norm": 0.9022243618965149, "learning_rate": 0.0016982738780207134, "loss": 0.6627, "step": 52440 }, { "epoch": 15.089182968929805, "grad_norm": 1.0311836004257202, "learning_rate": 0.001698216340621404, "loss": 0.7689, "step": 52450 }, { "epoch": 15.092059838895281, "grad_norm": 1.3812192678451538, "learning_rate": 0.0016981588032220945, "loss": 0.7755, "step": 52460 }, { "epoch": 15.094936708860759, "grad_norm": 1.9228150844573975, "learning_rate": 0.0016981012658227848, "loss": 0.8109, "step": 52470 }, { "epoch": 15.097813578826237, "grad_norm": 1.2741495370864868, "learning_rate": 0.0016980437284234754, "loss": 0.9881, "step": 52480 }, { "epoch": 15.100690448791715, "grad_norm": 1.0213333368301392, "learning_rate": 0.001697986191024166, "loss": 0.7063, "step": 52490 }, { "epoch": 15.103567318757193, "grad_norm": 0.9884507060050964, "learning_rate": 0.001697928653624856, "loss": 0.7519, "step": 52500 }, { "epoch": 15.10644418872267, "grad_norm": 1.4542393684387207, "learning_rate": 0.0016978711162255467, "loss": 0.9231, "step": 52510 }, { "epoch": 15.109321058688147, "grad_norm": 1.1931545734405518, "learning_rate": 0.001697813578826237, "loss": 0.6988, "step": 52520 }, { "epoch": 15.112197928653625, "grad_norm": 0.8430798053741455, "learning_rate": 0.0016977560414269276, "loss": 0.6913, "step": 52530 }, { "epoch": 15.115074798619103, "grad_norm": 1.20718252658844, "learning_rate": 0.0016976985040276181, "loss": 0.7267, "step": 52540 }, { "epoch": 15.11795166858458, "grad_norm": 1.7269915342330933, "learning_rate": 0.0016976409666283083, "loss": 0.763, "step": 52550 }, { "epoch": 15.120828538550057, "grad_norm": 1.5916123390197754, "learning_rate": 0.0016975834292289988, "loss": 0.8025, "step": 52560 }, { "epoch": 15.123705408515535, "grad_norm": 2.1077256202697754, "learning_rate": 0.0016975258918296894, "loss": 1.0777, "step": 52570 }, { "epoch": 15.126582278481013, "grad_norm": 1.372161626815796, "learning_rate": 0.0016974683544303797, "loss": 0.6629, "step": 52580 }, { "epoch": 15.12945914844649, "grad_norm": 1.763396978378296, "learning_rate": 0.0016974108170310703, "loss": 0.8141, "step": 52590 }, { "epoch": 15.132336018411968, "grad_norm": 1.327485203742981, "learning_rate": 0.0016973532796317609, "loss": 0.8151, "step": 52600 }, { "epoch": 15.135212888377445, "grad_norm": 1.8336693048477173, "learning_rate": 0.001697295742232451, "loss": 0.8787, "step": 52610 }, { "epoch": 15.138089758342923, "grad_norm": 0.8533892035484314, "learning_rate": 0.0016972382048331416, "loss": 0.6347, "step": 52620 }, { "epoch": 15.1409666283084, "grad_norm": 1.1867055892944336, "learning_rate": 0.001697180667433832, "loss": 0.6086, "step": 52630 }, { "epoch": 15.143843498273878, "grad_norm": 1.4782525300979614, "learning_rate": 0.0016971231300345225, "loss": 0.7783, "step": 52640 }, { "epoch": 15.146720368239356, "grad_norm": 1.0852386951446533, "learning_rate": 0.001697065592635213, "loss": 0.9838, "step": 52650 }, { "epoch": 15.149597238204834, "grad_norm": 1.3910856246948242, "learning_rate": 0.0016970080552359034, "loss": 0.5615, "step": 52660 }, { "epoch": 15.15247410817031, "grad_norm": 1.2932839393615723, "learning_rate": 0.0016969505178365937, "loss": 0.6295, "step": 52670 }, { "epoch": 15.155350978135788, "grad_norm": 1.9071484804153442, "learning_rate": 0.0016968929804372843, "loss": 0.7957, "step": 52680 }, { "epoch": 15.158227848101266, "grad_norm": 1.5241698026657104, "learning_rate": 0.0016968354430379746, "loss": 0.7368, "step": 52690 }, { "epoch": 15.161104718066744, "grad_norm": 0.6959882974624634, "learning_rate": 0.0016967779056386652, "loss": 0.7839, "step": 52700 }, { "epoch": 15.163981588032222, "grad_norm": 1.6588733196258545, "learning_rate": 0.0016967203682393558, "loss": 0.6332, "step": 52710 }, { "epoch": 15.166858457997698, "grad_norm": 0.9929079413414001, "learning_rate": 0.001696662830840046, "loss": 0.9515, "step": 52720 }, { "epoch": 15.169735327963176, "grad_norm": 1.315690517425537, "learning_rate": 0.0016966052934407365, "loss": 0.8769, "step": 52730 }, { "epoch": 15.172612197928654, "grad_norm": 2.0950205326080322, "learning_rate": 0.0016965477560414268, "loss": 0.7134, "step": 52740 }, { "epoch": 15.175489067894132, "grad_norm": 1.1581796407699585, "learning_rate": 0.0016964902186421174, "loss": 0.6396, "step": 52750 }, { "epoch": 15.178365937859608, "grad_norm": 1.3000478744506836, "learning_rate": 0.001696432681242808, "loss": 0.7608, "step": 52760 }, { "epoch": 15.181242807825086, "grad_norm": 1.4130401611328125, "learning_rate": 0.0016963751438434983, "loss": 0.8824, "step": 52770 }, { "epoch": 15.184119677790564, "grad_norm": 1.0824916362762451, "learning_rate": 0.0016963176064441888, "loss": 0.8546, "step": 52780 }, { "epoch": 15.186996547756042, "grad_norm": 1.3994334936141968, "learning_rate": 0.0016962600690448792, "loss": 0.7442, "step": 52790 }, { "epoch": 15.189873417721518, "grad_norm": 1.26249098777771, "learning_rate": 0.0016962025316455695, "loss": 0.7455, "step": 52800 }, { "epoch": 15.192750287686996, "grad_norm": 1.666835904121399, "learning_rate": 0.00169614499424626, "loss": 0.7554, "step": 52810 }, { "epoch": 15.195627157652474, "grad_norm": 1.4462839365005493, "learning_rate": 0.0016960874568469507, "loss": 0.777, "step": 52820 }, { "epoch": 15.198504027617952, "grad_norm": 1.4492697715759277, "learning_rate": 0.001696029919447641, "loss": 0.7371, "step": 52830 }, { "epoch": 15.201380897583428, "grad_norm": 0.8341372609138489, "learning_rate": 0.0016959723820483316, "loss": 0.7737, "step": 52840 }, { "epoch": 15.204257767548906, "grad_norm": 1.3579537868499756, "learning_rate": 0.0016959148446490217, "loss": 0.7142, "step": 52850 }, { "epoch": 15.207134637514384, "grad_norm": 1.6828491687774658, "learning_rate": 0.0016958573072497123, "loss": 0.799, "step": 52860 }, { "epoch": 15.210011507479862, "grad_norm": 1.2299028635025024, "learning_rate": 0.0016957997698504028, "loss": 0.6362, "step": 52870 }, { "epoch": 15.21288837744534, "grad_norm": 1.3328986167907715, "learning_rate": 0.0016957422324510932, "loss": 0.8847, "step": 52880 }, { "epoch": 15.215765247410816, "grad_norm": 1.303722620010376, "learning_rate": 0.0016956846950517837, "loss": 0.9025, "step": 52890 }, { "epoch": 15.218642117376294, "grad_norm": 1.1916860342025757, "learning_rate": 0.0016956271576524743, "loss": 0.7502, "step": 52900 }, { "epoch": 15.221518987341772, "grad_norm": 2.0355947017669678, "learning_rate": 0.0016955696202531644, "loss": 0.8139, "step": 52910 }, { "epoch": 15.22439585730725, "grad_norm": 1.5941323041915894, "learning_rate": 0.001695512082853855, "loss": 0.702, "step": 52920 }, { "epoch": 15.227272727272727, "grad_norm": 1.207518219947815, "learning_rate": 0.0016954545454545456, "loss": 0.6337, "step": 52930 }, { "epoch": 15.230149597238205, "grad_norm": 1.574862003326416, "learning_rate": 0.001695397008055236, "loss": 0.6984, "step": 52940 }, { "epoch": 15.233026467203683, "grad_norm": 1.4079697132110596, "learning_rate": 0.0016953394706559265, "loss": 0.7192, "step": 52950 }, { "epoch": 15.23590333716916, "grad_norm": 0.8003212213516235, "learning_rate": 0.0016952819332566168, "loss": 0.944, "step": 52960 }, { "epoch": 15.238780207134637, "grad_norm": 1.0237529277801514, "learning_rate": 0.0016952243958573072, "loss": 0.6981, "step": 52970 }, { "epoch": 15.241657077100115, "grad_norm": 1.0779173374176025, "learning_rate": 0.0016951668584579977, "loss": 0.8968, "step": 52980 }, { "epoch": 15.244533947065593, "grad_norm": 0.8729848265647888, "learning_rate": 0.001695109321058688, "loss": 0.5822, "step": 52990 }, { "epoch": 15.24741081703107, "grad_norm": 0.8988728523254395, "learning_rate": 0.0016950517836593786, "loss": 0.7476, "step": 53000 }, { "epoch": 15.250287686996547, "grad_norm": 2.3359644412994385, "learning_rate": 0.0016949942462600692, "loss": 0.9636, "step": 53010 }, { "epoch": 15.253164556962025, "grad_norm": 0.6378338932991028, "learning_rate": 0.0016949367088607596, "loss": 0.7736, "step": 53020 }, { "epoch": 15.256041426927503, "grad_norm": 1.9440834522247314, "learning_rate": 0.00169487917146145, "loss": 0.8488, "step": 53030 }, { "epoch": 15.25891829689298, "grad_norm": 1.4755761623382568, "learning_rate": 0.0016948216340621405, "loss": 0.6726, "step": 53040 }, { "epoch": 15.261795166858459, "grad_norm": 1.7749451398849487, "learning_rate": 0.0016947640966628308, "loss": 0.7245, "step": 53050 }, { "epoch": 15.264672036823935, "grad_norm": 1.4723109006881714, "learning_rate": 0.0016947065592635214, "loss": 0.7043, "step": 53060 }, { "epoch": 15.267548906789413, "grad_norm": 0.9940308332443237, "learning_rate": 0.001694649021864212, "loss": 0.7076, "step": 53070 }, { "epoch": 15.270425776754891, "grad_norm": 2.659996509552002, "learning_rate": 0.0016945914844649023, "loss": 0.741, "step": 53080 }, { "epoch": 15.273302646720369, "grad_norm": 0.7545803189277649, "learning_rate": 0.0016945339470655926, "loss": 0.6929, "step": 53090 }, { "epoch": 15.276179516685845, "grad_norm": 1.5030291080474854, "learning_rate": 0.001694476409666283, "loss": 0.7711, "step": 53100 }, { "epoch": 15.279056386651323, "grad_norm": 1.421973466873169, "learning_rate": 0.0016944188722669735, "loss": 0.7077, "step": 53110 }, { "epoch": 15.281933256616801, "grad_norm": 1.4649832248687744, "learning_rate": 0.0016943613348676641, "loss": 0.8176, "step": 53120 }, { "epoch": 15.284810126582279, "grad_norm": 1.3388780355453491, "learning_rate": 0.0016943037974683545, "loss": 0.6518, "step": 53130 }, { "epoch": 15.287686996547755, "grad_norm": 0.7445565462112427, "learning_rate": 0.001694246260069045, "loss": 0.7306, "step": 53140 }, { "epoch": 15.290563866513233, "grad_norm": 1.4061352014541626, "learning_rate": 0.0016941887226697354, "loss": 0.7728, "step": 53150 }, { "epoch": 15.293440736478711, "grad_norm": 1.1693776845932007, "learning_rate": 0.0016941311852704257, "loss": 1.0564, "step": 53160 }, { "epoch": 15.29631760644419, "grad_norm": 1.2051464319229126, "learning_rate": 0.0016940736478711163, "loss": 0.7343, "step": 53170 }, { "epoch": 15.299194476409665, "grad_norm": 1.139875888824463, "learning_rate": 0.0016940161104718068, "loss": 0.9158, "step": 53180 }, { "epoch": 15.302071346375143, "grad_norm": 0.93247389793396, "learning_rate": 0.0016939585730724972, "loss": 0.8042, "step": 53190 }, { "epoch": 15.304948216340621, "grad_norm": 0.7397547364234924, "learning_rate": 0.0016939010356731878, "loss": 0.9239, "step": 53200 }, { "epoch": 15.3078250863061, "grad_norm": 0.9605591297149658, "learning_rate": 0.0016938434982738779, "loss": 0.6686, "step": 53210 }, { "epoch": 15.310701956271577, "grad_norm": 1.3517634868621826, "learning_rate": 0.0016937859608745684, "loss": 0.8333, "step": 53220 }, { "epoch": 15.313578826237054, "grad_norm": 1.9337031841278076, "learning_rate": 0.001693728423475259, "loss": 0.7293, "step": 53230 }, { "epoch": 15.316455696202532, "grad_norm": 2.107353925704956, "learning_rate": 0.0016936708860759494, "loss": 0.7175, "step": 53240 }, { "epoch": 15.31933256616801, "grad_norm": 1.3229432106018066, "learning_rate": 0.00169361334867664, "loss": 0.7796, "step": 53250 }, { "epoch": 15.322209436133488, "grad_norm": 1.4174219369888306, "learning_rate": 0.0016935558112773305, "loss": 0.5776, "step": 53260 }, { "epoch": 15.325086306098964, "grad_norm": 0.770632266998291, "learning_rate": 0.0016934982738780206, "loss": 0.6744, "step": 53270 }, { "epoch": 15.327963176064442, "grad_norm": 1.257704257965088, "learning_rate": 0.0016934407364787112, "loss": 0.6946, "step": 53280 }, { "epoch": 15.33084004602992, "grad_norm": 1.9088298082351685, "learning_rate": 0.0016933831990794017, "loss": 0.8224, "step": 53290 }, { "epoch": 15.333716915995398, "grad_norm": 0.9228498935699463, "learning_rate": 0.001693325661680092, "loss": 0.7789, "step": 53300 }, { "epoch": 15.336593785960874, "grad_norm": 1.5013827085494995, "learning_rate": 0.0016932681242807827, "loss": 0.8762, "step": 53310 }, { "epoch": 15.339470655926352, "grad_norm": 1.23712956905365, "learning_rate": 0.0016932105868814728, "loss": 0.6631, "step": 53320 }, { "epoch": 15.34234752589183, "grad_norm": 1.5703442096710205, "learning_rate": 0.0016931530494821634, "loss": 0.8001, "step": 53330 }, { "epoch": 15.345224395857308, "grad_norm": 1.4738068580627441, "learning_rate": 0.001693095512082854, "loss": 0.8052, "step": 53340 }, { "epoch": 15.348101265822784, "grad_norm": 1.083644151687622, "learning_rate": 0.0016930379746835443, "loss": 0.6466, "step": 53350 }, { "epoch": 15.350978135788262, "grad_norm": 1.1782629489898682, "learning_rate": 0.0016929804372842348, "loss": 0.7028, "step": 53360 }, { "epoch": 15.35385500575374, "grad_norm": 1.2281957864761353, "learning_rate": 0.0016929228998849254, "loss": 0.9243, "step": 53370 }, { "epoch": 15.356731875719218, "grad_norm": 1.048893928527832, "learning_rate": 0.0016928653624856155, "loss": 0.7016, "step": 53380 }, { "epoch": 15.359608745684696, "grad_norm": 1.1861989498138428, "learning_rate": 0.001692807825086306, "loss": 0.6864, "step": 53390 }, { "epoch": 15.362485615650172, "grad_norm": 2.585677146911621, "learning_rate": 0.0016927502876869966, "loss": 0.8192, "step": 53400 }, { "epoch": 15.36536248561565, "grad_norm": 1.9473804235458374, "learning_rate": 0.001692692750287687, "loss": 0.9044, "step": 53410 }, { "epoch": 15.368239355581128, "grad_norm": 1.3215223550796509, "learning_rate": 0.0016926352128883776, "loss": 0.7101, "step": 53420 }, { "epoch": 15.371116225546606, "grad_norm": 1.414343237876892, "learning_rate": 0.001692577675489068, "loss": 0.6815, "step": 53430 }, { "epoch": 15.373993095512082, "grad_norm": 2.0117928981781006, "learning_rate": 0.0016925201380897583, "loss": 0.886, "step": 53440 }, { "epoch": 15.37686996547756, "grad_norm": 0.6506211757659912, "learning_rate": 0.0016924626006904488, "loss": 0.6829, "step": 53450 }, { "epoch": 15.379746835443038, "grad_norm": 1.0368118286132812, "learning_rate": 0.0016924050632911392, "loss": 0.656, "step": 53460 }, { "epoch": 15.382623705408516, "grad_norm": 0.9409059286117554, "learning_rate": 0.0016923475258918297, "loss": 0.704, "step": 53470 }, { "epoch": 15.385500575373992, "grad_norm": 1.303916096687317, "learning_rate": 0.0016922899884925203, "loss": 0.7611, "step": 53480 }, { "epoch": 15.38837744533947, "grad_norm": 0.9526037573814392, "learning_rate": 0.0016922324510932106, "loss": 0.8677, "step": 53490 }, { "epoch": 15.391254315304948, "grad_norm": 1.2567920684814453, "learning_rate": 0.001692174913693901, "loss": 0.7521, "step": 53500 }, { "epoch": 15.394131185270426, "grad_norm": 0.736618161201477, "learning_rate": 0.0016921173762945916, "loss": 0.7506, "step": 53510 }, { "epoch": 15.397008055235903, "grad_norm": 2.666379451751709, "learning_rate": 0.001692059838895282, "loss": 0.8991, "step": 53520 }, { "epoch": 15.39988492520138, "grad_norm": 1.9810787439346313, "learning_rate": 0.0016920023014959725, "loss": 0.7333, "step": 53530 }, { "epoch": 15.402761795166859, "grad_norm": 1.098262906074524, "learning_rate": 0.0016919447640966628, "loss": 0.864, "step": 53540 }, { "epoch": 15.405638665132336, "grad_norm": 1.3930068016052246, "learning_rate": 0.0016918872266973534, "loss": 0.6677, "step": 53550 }, { "epoch": 15.408515535097813, "grad_norm": 0.8945462107658386, "learning_rate": 0.0016918296892980437, "loss": 0.8082, "step": 53560 }, { "epoch": 15.41139240506329, "grad_norm": 1.5067614316940308, "learning_rate": 0.001691772151898734, "loss": 0.7378, "step": 53570 }, { "epoch": 15.414269275028769, "grad_norm": 0.9869092106819153, "learning_rate": 0.0016917146144994246, "loss": 0.6891, "step": 53580 }, { "epoch": 15.417146144994247, "grad_norm": 1.8499497175216675, "learning_rate": 0.0016916570771001152, "loss": 0.8273, "step": 53590 }, { "epoch": 15.420023014959725, "grad_norm": 2.117055654525757, "learning_rate": 0.0016915995397008055, "loss": 0.6755, "step": 53600 }, { "epoch": 15.4228998849252, "grad_norm": 2.118234634399414, "learning_rate": 0.001691542002301496, "loss": 0.8026, "step": 53610 }, { "epoch": 15.425776754890679, "grad_norm": 1.218599557876587, "learning_rate": 0.0016914844649021865, "loss": 0.835, "step": 53620 }, { "epoch": 15.428653624856157, "grad_norm": 0.7034440040588379, "learning_rate": 0.0016914269275028768, "loss": 0.8467, "step": 53630 }, { "epoch": 15.431530494821635, "grad_norm": 0.985251247882843, "learning_rate": 0.0016913693901035674, "loss": 0.7918, "step": 53640 }, { "epoch": 15.434407364787111, "grad_norm": 1.0876638889312744, "learning_rate": 0.0016913118527042577, "loss": 0.8423, "step": 53650 }, { "epoch": 15.437284234752589, "grad_norm": 1.4048985242843628, "learning_rate": 0.0016912543153049483, "loss": 0.7561, "step": 53660 }, { "epoch": 15.440161104718067, "grad_norm": 1.178123950958252, "learning_rate": 0.0016911967779056388, "loss": 0.7103, "step": 53670 }, { "epoch": 15.443037974683545, "grad_norm": 1.076071858406067, "learning_rate": 0.001691139240506329, "loss": 0.9577, "step": 53680 }, { "epoch": 15.445914844649021, "grad_norm": 1.7089426517486572, "learning_rate": 0.0016910817031070195, "loss": 0.8392, "step": 53690 }, { "epoch": 15.448791714614499, "grad_norm": 2.1952555179595947, "learning_rate": 0.00169102416570771, "loss": 0.9336, "step": 53700 }, { "epoch": 15.451668584579977, "grad_norm": 1.5096023082733154, "learning_rate": 0.0016909666283084004, "loss": 0.6813, "step": 53710 }, { "epoch": 15.454545454545455, "grad_norm": 1.362622618675232, "learning_rate": 0.001690909090909091, "loss": 0.6829, "step": 53720 }, { "epoch": 15.457422324510933, "grad_norm": 1.6120198965072632, "learning_rate": 0.0016908515535097816, "loss": 0.8932, "step": 53730 }, { "epoch": 15.46029919447641, "grad_norm": 1.9748445749282837, "learning_rate": 0.0016907940161104717, "loss": 0.6566, "step": 53740 }, { "epoch": 15.463176064441887, "grad_norm": 1.1032816171646118, "learning_rate": 0.0016907364787111623, "loss": 0.9546, "step": 53750 }, { "epoch": 15.466052934407365, "grad_norm": 1.2755810022354126, "learning_rate": 0.0016906789413118528, "loss": 0.7864, "step": 53760 }, { "epoch": 15.468929804372843, "grad_norm": 1.5913782119750977, "learning_rate": 0.0016906214039125432, "loss": 0.7716, "step": 53770 }, { "epoch": 15.47180667433832, "grad_norm": 1.4184728860855103, "learning_rate": 0.0016905638665132337, "loss": 0.7036, "step": 53780 }, { "epoch": 15.474683544303797, "grad_norm": 1.226175308227539, "learning_rate": 0.001690506329113924, "loss": 0.8731, "step": 53790 }, { "epoch": 15.477560414269275, "grad_norm": 1.8460043668746948, "learning_rate": 0.0016904487917146144, "loss": 0.8023, "step": 53800 }, { "epoch": 15.480437284234753, "grad_norm": 0.9157655835151672, "learning_rate": 0.001690391254315305, "loss": 0.6686, "step": 53810 }, { "epoch": 15.48331415420023, "grad_norm": 2.5544369220733643, "learning_rate": 0.0016903337169159953, "loss": 0.8214, "step": 53820 }, { "epoch": 15.486191024165707, "grad_norm": 0.705062985420227, "learning_rate": 0.001690276179516686, "loss": 0.7177, "step": 53830 }, { "epoch": 15.489067894131185, "grad_norm": 1.0924876928329468, "learning_rate": 0.0016902186421173765, "loss": 0.8479, "step": 53840 }, { "epoch": 15.491944764096663, "grad_norm": 1.276867151260376, "learning_rate": 0.0016901611047180668, "loss": 0.8824, "step": 53850 }, { "epoch": 15.49482163406214, "grad_norm": 1.0901706218719482, "learning_rate": 0.0016901035673187572, "loss": 0.9784, "step": 53860 }, { "epoch": 15.497698504027618, "grad_norm": 1.706292986869812, "learning_rate": 0.0016900460299194477, "loss": 0.6267, "step": 53870 }, { "epoch": 15.500575373993096, "grad_norm": 1.1220366954803467, "learning_rate": 0.001689988492520138, "loss": 0.7236, "step": 53880 }, { "epoch": 15.503452243958574, "grad_norm": 1.2200918197631836, "learning_rate": 0.0016899309551208286, "loss": 0.7131, "step": 53890 }, { "epoch": 15.50632911392405, "grad_norm": 1.272170066833496, "learning_rate": 0.001689873417721519, "loss": 0.7635, "step": 53900 }, { "epoch": 15.509205983889528, "grad_norm": 0.8521543741226196, "learning_rate": 0.0016898158803222096, "loss": 0.5053, "step": 53910 }, { "epoch": 15.512082853855006, "grad_norm": 1.6421315670013428, "learning_rate": 0.0016897583429229, "loss": 0.7192, "step": 53920 }, { "epoch": 15.514959723820484, "grad_norm": 2.186854600906372, "learning_rate": 0.0016897008055235902, "loss": 0.629, "step": 53930 }, { "epoch": 15.517836593785962, "grad_norm": 0.8954242467880249, "learning_rate": 0.0016896432681242808, "loss": 0.7829, "step": 53940 }, { "epoch": 15.520713463751438, "grad_norm": 1.7187355756759644, "learning_rate": 0.0016895857307249714, "loss": 0.8473, "step": 53950 }, { "epoch": 15.523590333716916, "grad_norm": 1.07926607131958, "learning_rate": 0.0016895281933256617, "loss": 0.7949, "step": 53960 }, { "epoch": 15.526467203682394, "grad_norm": 0.8082513213157654, "learning_rate": 0.0016894706559263523, "loss": 0.6322, "step": 53970 }, { "epoch": 15.529344073647872, "grad_norm": 2.7341785430908203, "learning_rate": 0.0016894131185270426, "loss": 0.771, "step": 53980 }, { "epoch": 15.532220943613348, "grad_norm": 1.4856091737747192, "learning_rate": 0.001689355581127733, "loss": 0.9012, "step": 53990 }, { "epoch": 15.535097813578826, "grad_norm": 1.4638773202896118, "learning_rate": 0.0016892980437284235, "loss": 0.8275, "step": 54000 }, { "epoch": 15.537974683544304, "grad_norm": 1.6804676055908203, "learning_rate": 0.001689240506329114, "loss": 0.8205, "step": 54010 }, { "epoch": 15.540851553509782, "grad_norm": 1.465952754020691, "learning_rate": 0.0016891829689298045, "loss": 0.7634, "step": 54020 }, { "epoch": 15.543728423475258, "grad_norm": 1.3253254890441895, "learning_rate": 0.001689125431530495, "loss": 0.746, "step": 54030 }, { "epoch": 15.546605293440736, "grad_norm": 1.5533363819122314, "learning_rate": 0.0016890678941311851, "loss": 0.6463, "step": 54040 }, { "epoch": 15.549482163406214, "grad_norm": 0.8719034790992737, "learning_rate": 0.0016890103567318757, "loss": 0.6472, "step": 54050 }, { "epoch": 15.552359033371692, "grad_norm": 0.6943911910057068, "learning_rate": 0.0016889528193325663, "loss": 0.9564, "step": 54060 }, { "epoch": 15.55523590333717, "grad_norm": 1.4031428098678589, "learning_rate": 0.0016888952819332566, "loss": 0.7994, "step": 54070 }, { "epoch": 15.558112773302646, "grad_norm": 1.6951344013214111, "learning_rate": 0.0016888377445339472, "loss": 0.7761, "step": 54080 }, { "epoch": 15.560989643268124, "grad_norm": 1.3461147546768188, "learning_rate": 0.0016887802071346378, "loss": 0.7586, "step": 54090 }, { "epoch": 15.563866513233602, "grad_norm": 1.4502793550491333, "learning_rate": 0.0016887226697353279, "loss": 0.9106, "step": 54100 }, { "epoch": 15.566743383199078, "grad_norm": 1.1759980916976929, "learning_rate": 0.0016886651323360184, "loss": 0.9386, "step": 54110 }, { "epoch": 15.569620253164556, "grad_norm": 2.110184669494629, "learning_rate": 0.0016886075949367088, "loss": 0.7642, "step": 54120 }, { "epoch": 15.572497123130034, "grad_norm": 1.1067237854003906, "learning_rate": 0.0016885500575373994, "loss": 0.8488, "step": 54130 }, { "epoch": 15.575373993095512, "grad_norm": 2.4627766609191895, "learning_rate": 0.00168849252013809, "loss": 0.8496, "step": 54140 }, { "epoch": 15.57825086306099, "grad_norm": 0.9326212406158447, "learning_rate": 0.00168843498273878, "loss": 0.7308, "step": 54150 }, { "epoch": 15.581127733026467, "grad_norm": 1.369486689567566, "learning_rate": 0.0016883774453394706, "loss": 0.8649, "step": 54160 }, { "epoch": 15.584004602991945, "grad_norm": 1.4518038034439087, "learning_rate": 0.0016883199079401612, "loss": 0.5169, "step": 54170 }, { "epoch": 15.586881472957423, "grad_norm": 0.6832488775253296, "learning_rate": 0.0016882623705408515, "loss": 0.7478, "step": 54180 }, { "epoch": 15.5897583429229, "grad_norm": 1.4041802883148193, "learning_rate": 0.001688204833141542, "loss": 0.8321, "step": 54190 }, { "epoch": 15.592635212888377, "grad_norm": 2.004913806915283, "learning_rate": 0.0016881472957422327, "loss": 0.775, "step": 54200 }, { "epoch": 15.595512082853855, "grad_norm": 0.8226330876350403, "learning_rate": 0.0016880897583429228, "loss": 0.7796, "step": 54210 }, { "epoch": 15.598388952819333, "grad_norm": 1.4971903562545776, "learning_rate": 0.0016880322209436133, "loss": 0.6605, "step": 54220 }, { "epoch": 15.60126582278481, "grad_norm": 0.9914678335189819, "learning_rate": 0.0016879746835443037, "loss": 0.7735, "step": 54230 }, { "epoch": 15.604142692750287, "grad_norm": 0.9627618193626404, "learning_rate": 0.0016879171461449943, "loss": 0.7504, "step": 54240 }, { "epoch": 15.607019562715765, "grad_norm": 1.8395634889602661, "learning_rate": 0.0016878596087456848, "loss": 0.7216, "step": 54250 }, { "epoch": 15.609896432681243, "grad_norm": 0.7362044453620911, "learning_rate": 0.0016878020713463752, "loss": 0.725, "step": 54260 }, { "epoch": 15.61277330264672, "grad_norm": 1.0521665811538696, "learning_rate": 0.0016877445339470655, "loss": 0.6688, "step": 54270 }, { "epoch": 15.615650172612199, "grad_norm": 1.1895619630813599, "learning_rate": 0.001687686996547756, "loss": 0.7927, "step": 54280 }, { "epoch": 15.618527042577675, "grad_norm": 1.3475266695022583, "learning_rate": 0.0016876294591484464, "loss": 0.7456, "step": 54290 }, { "epoch": 15.621403912543153, "grad_norm": 1.4853241443634033, "learning_rate": 0.001687571921749137, "loss": 0.7547, "step": 54300 }, { "epoch": 15.624280782508631, "grad_norm": 1.7547080516815186, "learning_rate": 0.0016875143843498276, "loss": 0.7526, "step": 54310 }, { "epoch": 15.627157652474109, "grad_norm": 0.863179624080658, "learning_rate": 0.001687456846950518, "loss": 0.627, "step": 54320 }, { "epoch": 15.630034522439585, "grad_norm": 1.5562525987625122, "learning_rate": 0.0016873993095512083, "loss": 0.7281, "step": 54330 }, { "epoch": 15.632911392405063, "grad_norm": 1.4525810480117798, "learning_rate": 0.0016873417721518988, "loss": 0.845, "step": 54340 }, { "epoch": 15.635788262370541, "grad_norm": 0.9278258085250854, "learning_rate": 0.0016872842347525892, "loss": 0.7304, "step": 54350 }, { "epoch": 15.638665132336019, "grad_norm": 2.0464329719543457, "learning_rate": 0.0016872266973532797, "loss": 0.8849, "step": 54360 }, { "epoch": 15.641542002301495, "grad_norm": 0.8307523727416992, "learning_rate": 0.00168716915995397, "loss": 0.6725, "step": 54370 }, { "epoch": 15.644418872266973, "grad_norm": 1.5140280723571777, "learning_rate": 0.0016871116225546606, "loss": 0.7222, "step": 54380 }, { "epoch": 15.647295742232451, "grad_norm": 1.98822021484375, "learning_rate": 0.001687054085155351, "loss": 0.7917, "step": 54390 }, { "epoch": 15.65017261219793, "grad_norm": 1.302359938621521, "learning_rate": 0.0016869965477560413, "loss": 0.822, "step": 54400 }, { "epoch": 15.653049482163405, "grad_norm": 1.1896822452545166, "learning_rate": 0.001686939010356732, "loss": 0.7592, "step": 54410 }, { "epoch": 15.655926352128883, "grad_norm": 1.6515554189682007, "learning_rate": 0.0016868814729574225, "loss": 0.7642, "step": 54420 }, { "epoch": 15.658803222094361, "grad_norm": 1.223531723022461, "learning_rate": 0.0016868239355581128, "loss": 0.717, "step": 54430 }, { "epoch": 15.66168009205984, "grad_norm": 1.984127402305603, "learning_rate": 0.0016867663981588034, "loss": 0.7588, "step": 54440 }, { "epoch": 15.664556962025316, "grad_norm": 0.9966217279434204, "learning_rate": 0.0016867088607594937, "loss": 0.7499, "step": 54450 }, { "epoch": 15.667433831990794, "grad_norm": 1.7508491277694702, "learning_rate": 0.001686651323360184, "loss": 0.8177, "step": 54460 }, { "epoch": 15.670310701956272, "grad_norm": 2.3648581504821777, "learning_rate": 0.0016865937859608746, "loss": 0.9311, "step": 54470 }, { "epoch": 15.67318757192175, "grad_norm": 1.1289806365966797, "learning_rate": 0.001686536248561565, "loss": 0.8819, "step": 54480 }, { "epoch": 15.676064441887227, "grad_norm": 0.9197402000427246, "learning_rate": 0.0016864787111622555, "loss": 0.7051, "step": 54490 }, { "epoch": 15.678941311852704, "grad_norm": 1.1511263847351074, "learning_rate": 0.001686421173762946, "loss": 0.7022, "step": 54500 }, { "epoch": 15.681818181818182, "grad_norm": 1.1764037609100342, "learning_rate": 0.0016863636363636362, "loss": 0.6546, "step": 54510 }, { "epoch": 15.68469505178366, "grad_norm": 1.124755620956421, "learning_rate": 0.0016863060989643268, "loss": 0.6146, "step": 54520 }, { "epoch": 15.687571921749138, "grad_norm": 0.8523632287979126, "learning_rate": 0.0016862485615650174, "loss": 0.8262, "step": 54530 }, { "epoch": 15.690448791714614, "grad_norm": 1.197803020477295, "learning_rate": 0.0016861910241657077, "loss": 0.6827, "step": 54540 }, { "epoch": 15.693325661680092, "grad_norm": 0.7540414333343506, "learning_rate": 0.0016861334867663983, "loss": 0.7001, "step": 54550 }, { "epoch": 15.69620253164557, "grad_norm": 1.2089250087738037, "learning_rate": 0.0016860759493670888, "loss": 0.709, "step": 54560 }, { "epoch": 15.699079401611048, "grad_norm": 1.785646677017212, "learning_rate": 0.001686018411967779, "loss": 0.8128, "step": 54570 }, { "epoch": 15.701956271576524, "grad_norm": 1.592133641242981, "learning_rate": 0.0016859608745684695, "loss": 0.8303, "step": 54580 }, { "epoch": 15.704833141542002, "grad_norm": 1.6158301830291748, "learning_rate": 0.0016859033371691599, "loss": 0.8336, "step": 54590 }, { "epoch": 15.70771001150748, "grad_norm": 1.3711122274398804, "learning_rate": 0.0016858457997698504, "loss": 0.7319, "step": 54600 }, { "epoch": 15.710586881472958, "grad_norm": 0.856611430644989, "learning_rate": 0.001685788262370541, "loss": 0.6684, "step": 54610 }, { "epoch": 15.713463751438436, "grad_norm": 1.419658899307251, "learning_rate": 0.0016857307249712314, "loss": 0.7377, "step": 54620 }, { "epoch": 15.716340621403912, "grad_norm": 0.764744222164154, "learning_rate": 0.0016856731875719217, "loss": 0.9258, "step": 54630 }, { "epoch": 15.71921749136939, "grad_norm": 1.4048728942871094, "learning_rate": 0.0016856156501726123, "loss": 0.7486, "step": 54640 }, { "epoch": 15.722094361334868, "grad_norm": 1.1249475479125977, "learning_rate": 0.0016855581127733026, "loss": 0.6554, "step": 54650 }, { "epoch": 15.724971231300346, "grad_norm": 0.7694981694221497, "learning_rate": 0.0016855005753739932, "loss": 0.6405, "step": 54660 }, { "epoch": 15.727848101265822, "grad_norm": 2.400824546813965, "learning_rate": 0.0016854430379746837, "loss": 0.6643, "step": 54670 }, { "epoch": 15.7307249712313, "grad_norm": 1.1033802032470703, "learning_rate": 0.001685385500575374, "loss": 0.622, "step": 54680 }, { "epoch": 15.733601841196778, "grad_norm": 1.0648044347763062, "learning_rate": 0.0016853279631760644, "loss": 0.6933, "step": 54690 }, { "epoch": 15.736478711162256, "grad_norm": 1.014441728591919, "learning_rate": 0.0016852704257767548, "loss": 0.8058, "step": 54700 }, { "epoch": 15.739355581127732, "grad_norm": 3.368424415588379, "learning_rate": 0.0016852128883774453, "loss": 0.7302, "step": 54710 }, { "epoch": 15.74223245109321, "grad_norm": 0.8399114012718201, "learning_rate": 0.001685155350978136, "loss": 0.7949, "step": 54720 }, { "epoch": 15.745109321058688, "grad_norm": 0.7170528769493103, "learning_rate": 0.0016850978135788263, "loss": 0.7201, "step": 54730 }, { "epoch": 15.747986191024166, "grad_norm": 1.158556342124939, "learning_rate": 0.0016850402761795168, "loss": 0.8578, "step": 54740 }, { "epoch": 15.750863060989643, "grad_norm": 1.2432926893234253, "learning_rate": 0.0016849827387802072, "loss": 0.6748, "step": 54750 }, { "epoch": 15.75373993095512, "grad_norm": 1.315818428993225, "learning_rate": 0.0016849252013808975, "loss": 0.6681, "step": 54760 }, { "epoch": 15.756616800920598, "grad_norm": 0.9168164730072021, "learning_rate": 0.001684867663981588, "loss": 0.7048, "step": 54770 }, { "epoch": 15.759493670886076, "grad_norm": 0.8045198321342468, "learning_rate": 0.0016848101265822786, "loss": 1.0305, "step": 54780 }, { "epoch": 15.762370540851553, "grad_norm": 1.241408348083496, "learning_rate": 0.001684752589182969, "loss": 0.8385, "step": 54790 }, { "epoch": 15.76524741081703, "grad_norm": 1.5851855278015137, "learning_rate": 0.0016846950517836596, "loss": 0.7873, "step": 54800 }, { "epoch": 15.768124280782509, "grad_norm": 1.5156251192092896, "learning_rate": 0.0016846375143843497, "loss": 0.7118, "step": 54810 }, { "epoch": 15.771001150747987, "grad_norm": 1.1078941822052002, "learning_rate": 0.0016845799769850402, "loss": 0.7774, "step": 54820 }, { "epoch": 15.773878020713465, "grad_norm": 1.2147523164749146, "learning_rate": 0.0016845224395857308, "loss": 0.6531, "step": 54830 }, { "epoch": 15.77675489067894, "grad_norm": 0.8164410591125488, "learning_rate": 0.0016844649021864212, "loss": 0.7012, "step": 54840 }, { "epoch": 15.779631760644419, "grad_norm": 1.172612190246582, "learning_rate": 0.0016844073647871117, "loss": 0.7798, "step": 54850 }, { "epoch": 15.782508630609897, "grad_norm": 1.7105791568756104, "learning_rate": 0.0016843498273878023, "loss": 0.7784, "step": 54860 }, { "epoch": 15.785385500575375, "grad_norm": 1.3029935359954834, "learning_rate": 0.0016842922899884924, "loss": 0.6839, "step": 54870 }, { "epoch": 15.788262370540851, "grad_norm": 0.9093999862670898, "learning_rate": 0.001684234752589183, "loss": 0.9512, "step": 54880 }, { "epoch": 15.791139240506329, "grad_norm": 1.340453028678894, "learning_rate": 0.0016841772151898735, "loss": 0.7898, "step": 54890 }, { "epoch": 15.794016110471807, "grad_norm": 1.3306812047958374, "learning_rate": 0.0016841196777905639, "loss": 0.5856, "step": 54900 }, { "epoch": 15.796892980437285, "grad_norm": 1.6132960319519043, "learning_rate": 0.0016840621403912545, "loss": 0.7537, "step": 54910 }, { "epoch": 15.799769850402761, "grad_norm": 1.625346064567566, "learning_rate": 0.0016840046029919446, "loss": 0.7309, "step": 54920 }, { "epoch": 15.802646720368239, "grad_norm": 1.714422583580017, "learning_rate": 0.0016839470655926351, "loss": 0.7954, "step": 54930 }, { "epoch": 15.805523590333717, "grad_norm": 1.2956526279449463, "learning_rate": 0.0016838895281933257, "loss": 1.0299, "step": 54940 }, { "epoch": 15.808400460299195, "grad_norm": 0.7144485712051392, "learning_rate": 0.001683831990794016, "loss": 0.8956, "step": 54950 }, { "epoch": 15.811277330264673, "grad_norm": 1.1665899753570557, "learning_rate": 0.0016837744533947066, "loss": 0.866, "step": 54960 }, { "epoch": 15.81415420023015, "grad_norm": 1.304112195968628, "learning_rate": 0.0016837169159953972, "loss": 0.8364, "step": 54970 }, { "epoch": 15.817031070195627, "grad_norm": 1.678553581237793, "learning_rate": 0.0016836593785960873, "loss": 0.8244, "step": 54980 }, { "epoch": 15.819907940161105, "grad_norm": 1.6376457214355469, "learning_rate": 0.0016836018411967779, "loss": 0.8204, "step": 54990 }, { "epoch": 15.822784810126583, "grad_norm": 1.4411101341247559, "learning_rate": 0.0016835443037974684, "loss": 0.6731, "step": 55000 }, { "epoch": 15.82566168009206, "grad_norm": 2.312534809112549, "learning_rate": 0.0016834867663981588, "loss": 0.6844, "step": 55010 }, { "epoch": 15.828538550057537, "grad_norm": 1.6647461652755737, "learning_rate": 0.0016834292289988494, "loss": 0.7679, "step": 55020 }, { "epoch": 15.831415420023015, "grad_norm": 1.634221076965332, "learning_rate": 0.00168337169159954, "loss": 0.9814, "step": 55030 }, { "epoch": 15.834292289988493, "grad_norm": 1.1776517629623413, "learning_rate": 0.00168331415420023, "loss": 0.7907, "step": 55040 }, { "epoch": 15.83716915995397, "grad_norm": 1.5067558288574219, "learning_rate": 0.0016832566168009206, "loss": 0.7421, "step": 55050 }, { "epoch": 15.840046029919447, "grad_norm": 1.728089690208435, "learning_rate": 0.001683199079401611, "loss": 0.7975, "step": 55060 }, { "epoch": 15.842922899884925, "grad_norm": 1.545763373374939, "learning_rate": 0.0016831415420023015, "loss": 0.7063, "step": 55070 }, { "epoch": 15.845799769850403, "grad_norm": 1.3508135080337524, "learning_rate": 0.001683084004602992, "loss": 0.7645, "step": 55080 }, { "epoch": 15.84867663981588, "grad_norm": 1.2631471157073975, "learning_rate": 0.0016830264672036824, "loss": 0.6467, "step": 55090 }, { "epoch": 15.851553509781358, "grad_norm": 0.7294857501983643, "learning_rate": 0.0016829689298043728, "loss": 0.5784, "step": 55100 }, { "epoch": 15.854430379746836, "grad_norm": 1.5954837799072266, "learning_rate": 0.0016829113924050633, "loss": 0.8194, "step": 55110 }, { "epoch": 15.857307249712314, "grad_norm": 2.340003490447998, "learning_rate": 0.0016828538550057537, "loss": 0.7727, "step": 55120 }, { "epoch": 15.86018411967779, "grad_norm": 0.5814921259880066, "learning_rate": 0.0016827963176064443, "loss": 0.7404, "step": 55130 }, { "epoch": 15.863060989643268, "grad_norm": 0.839806318283081, "learning_rate": 0.0016827387802071348, "loss": 0.8642, "step": 55140 }, { "epoch": 15.865937859608746, "grad_norm": 1.1575061082839966, "learning_rate": 0.0016826812428078252, "loss": 0.6162, "step": 55150 }, { "epoch": 15.868814729574224, "grad_norm": 1.3208814859390259, "learning_rate": 0.0016826237054085155, "loss": 0.6699, "step": 55160 }, { "epoch": 15.871691599539702, "grad_norm": 0.9683300852775574, "learning_rate": 0.0016825661680092059, "loss": 0.8601, "step": 55170 }, { "epoch": 15.874568469505178, "grad_norm": 1.6546951532363892, "learning_rate": 0.0016825086306098964, "loss": 0.9551, "step": 55180 }, { "epoch": 15.877445339470656, "grad_norm": 3.2850472927093506, "learning_rate": 0.001682451093210587, "loss": 0.9132, "step": 55190 }, { "epoch": 15.880322209436134, "grad_norm": 1.5905482769012451, "learning_rate": 0.0016823935558112773, "loss": 0.6292, "step": 55200 }, { "epoch": 15.883199079401612, "grad_norm": 1.014876365661621, "learning_rate": 0.001682336018411968, "loss": 0.6717, "step": 55210 }, { "epoch": 15.886075949367088, "grad_norm": 1.3248387575149536, "learning_rate": 0.0016822784810126582, "loss": 0.7675, "step": 55220 }, { "epoch": 15.888952819332566, "grad_norm": 1.525429368019104, "learning_rate": 0.0016822209436133486, "loss": 0.8423, "step": 55230 }, { "epoch": 15.891829689298044, "grad_norm": 0.923863410949707, "learning_rate": 0.0016821634062140392, "loss": 0.7079, "step": 55240 }, { "epoch": 15.894706559263522, "grad_norm": 1.0167615413665771, "learning_rate": 0.0016821058688147297, "loss": 0.6576, "step": 55250 }, { "epoch": 15.897583429228998, "grad_norm": 1.2715842723846436, "learning_rate": 0.00168204833141542, "loss": 0.7977, "step": 55260 }, { "epoch": 15.900460299194476, "grad_norm": 1.2772510051727295, "learning_rate": 0.0016819907940161106, "loss": 0.8445, "step": 55270 }, { "epoch": 15.903337169159954, "grad_norm": 1.6835169792175293, "learning_rate": 0.0016819332566168008, "loss": 0.7304, "step": 55280 }, { "epoch": 15.906214039125432, "grad_norm": 1.3272390365600586, "learning_rate": 0.0016818757192174913, "loss": 0.7238, "step": 55290 }, { "epoch": 15.909090909090908, "grad_norm": 1.3273022174835205, "learning_rate": 0.001681818181818182, "loss": 0.9989, "step": 55300 }, { "epoch": 15.911967779056386, "grad_norm": 1.0362075567245483, "learning_rate": 0.0016817606444188722, "loss": 0.7848, "step": 55310 }, { "epoch": 15.914844649021864, "grad_norm": 1.3757216930389404, "learning_rate": 0.0016817031070195628, "loss": 0.6634, "step": 55320 }, { "epoch": 15.917721518987342, "grad_norm": 1.2001781463623047, "learning_rate": 0.0016816455696202534, "loss": 0.952, "step": 55330 }, { "epoch": 15.920598388952818, "grad_norm": 0.9363238215446472, "learning_rate": 0.0016815880322209435, "loss": 0.9992, "step": 55340 }, { "epoch": 15.923475258918296, "grad_norm": 1.9778684377670288, "learning_rate": 0.001681530494821634, "loss": 0.8243, "step": 55350 }, { "epoch": 15.926352128883774, "grad_norm": 1.8321928977966309, "learning_rate": 0.0016814729574223246, "loss": 0.7868, "step": 55360 }, { "epoch": 15.929228998849252, "grad_norm": 0.9875156283378601, "learning_rate": 0.001681415420023015, "loss": 0.6849, "step": 55370 }, { "epoch": 15.93210586881473, "grad_norm": 2.0897271633148193, "learning_rate": 0.0016813578826237055, "loss": 1.0367, "step": 55380 }, { "epoch": 15.934982738780207, "grad_norm": 0.8270977139472961, "learning_rate": 0.0016813003452243959, "loss": 0.5865, "step": 55390 }, { "epoch": 15.937859608745685, "grad_norm": 1.7798585891723633, "learning_rate": 0.0016812428078250862, "loss": 0.8106, "step": 55400 }, { "epoch": 15.940736478711163, "grad_norm": 0.8162516951560974, "learning_rate": 0.0016811852704257768, "loss": 0.6283, "step": 55410 }, { "epoch": 15.94361334867664, "grad_norm": 0.9253304600715637, "learning_rate": 0.0016811277330264671, "loss": 0.8253, "step": 55420 }, { "epoch": 15.946490218642117, "grad_norm": 1.1742221117019653, "learning_rate": 0.0016810701956271577, "loss": 0.8673, "step": 55430 }, { "epoch": 15.949367088607595, "grad_norm": 1.2281851768493652, "learning_rate": 0.0016810126582278483, "loss": 0.7656, "step": 55440 }, { "epoch": 15.952243958573073, "grad_norm": 1.3760372400283813, "learning_rate": 0.0016809551208285386, "loss": 0.7411, "step": 55450 }, { "epoch": 15.95512082853855, "grad_norm": 1.565773844718933, "learning_rate": 0.001680897583429229, "loss": 0.7406, "step": 55460 }, { "epoch": 15.957997698504027, "grad_norm": 2.0910940170288086, "learning_rate": 0.0016808400460299195, "loss": 0.9267, "step": 55470 }, { "epoch": 15.960874568469505, "grad_norm": 2.5331509113311768, "learning_rate": 0.0016807825086306099, "loss": 0.6749, "step": 55480 }, { "epoch": 15.963751438434983, "grad_norm": 2.280701160430908, "learning_rate": 0.0016807249712313004, "loss": 0.8204, "step": 55490 }, { "epoch": 15.96662830840046, "grad_norm": 1.186996340751648, "learning_rate": 0.0016806674338319908, "loss": 0.5605, "step": 55500 }, { "epoch": 15.969505178365939, "grad_norm": 1.5441088676452637, "learning_rate": 0.0016806098964326813, "loss": 0.5957, "step": 55510 }, { "epoch": 15.972382048331415, "grad_norm": 1.0071730613708496, "learning_rate": 0.0016805523590333717, "loss": 0.5458, "step": 55520 }, { "epoch": 15.975258918296893, "grad_norm": 1.6801151037216187, "learning_rate": 0.001680494821634062, "loss": 0.8334, "step": 55530 }, { "epoch": 15.978135788262371, "grad_norm": 0.8913708329200745, "learning_rate": 0.0016804372842347526, "loss": 0.607, "step": 55540 }, { "epoch": 15.981012658227849, "grad_norm": 0.9865275621414185, "learning_rate": 0.0016803797468354432, "loss": 0.7137, "step": 55550 }, { "epoch": 15.983889528193325, "grad_norm": 1.7316879034042358, "learning_rate": 0.0016803222094361335, "loss": 0.7383, "step": 55560 }, { "epoch": 15.986766398158803, "grad_norm": 0.9915218949317932, "learning_rate": 0.001680264672036824, "loss": 0.7688, "step": 55570 }, { "epoch": 15.989643268124281, "grad_norm": 0.9432808756828308, "learning_rate": 0.0016802071346375144, "loss": 0.7386, "step": 55580 }, { "epoch": 15.992520138089759, "grad_norm": 1.1271519660949707, "learning_rate": 0.0016801495972382048, "loss": 0.7013, "step": 55590 }, { "epoch": 15.995397008055235, "grad_norm": 1.8839000463485718, "learning_rate": 0.0016800920598388953, "loss": 0.8491, "step": 55600 }, { "epoch": 15.998273878020713, "grad_norm": 1.7493869066238403, "learning_rate": 0.0016800345224395857, "loss": 0.7293, "step": 55610 }, { "epoch": 16.00115074798619, "grad_norm": 1.02761709690094, "learning_rate": 0.0016799769850402763, "loss": 0.7109, "step": 55620 }, { "epoch": 16.004027617951667, "grad_norm": 1.4696158170700073, "learning_rate": 0.0016799194476409668, "loss": 0.7786, "step": 55630 }, { "epoch": 16.006904487917147, "grad_norm": 1.133388876914978, "learning_rate": 0.001679861910241657, "loss": 0.7161, "step": 55640 }, { "epoch": 16.009781357882623, "grad_norm": 0.7590664029121399, "learning_rate": 0.0016798043728423475, "loss": 0.6427, "step": 55650 }, { "epoch": 16.0126582278481, "grad_norm": 1.124159336090088, "learning_rate": 0.001679746835443038, "loss": 0.6546, "step": 55660 }, { "epoch": 16.01553509781358, "grad_norm": 1.6844911575317383, "learning_rate": 0.0016796892980437284, "loss": 0.8433, "step": 55670 }, { "epoch": 16.018411967779056, "grad_norm": 0.8324518203735352, "learning_rate": 0.001679631760644419, "loss": 0.5918, "step": 55680 }, { "epoch": 16.021288837744535, "grad_norm": 1.3413217067718506, "learning_rate": 0.0016795742232451095, "loss": 0.6769, "step": 55690 }, { "epoch": 16.02416570771001, "grad_norm": 1.7681446075439453, "learning_rate": 0.0016795166858457997, "loss": 0.761, "step": 55700 }, { "epoch": 16.027042577675488, "grad_norm": 1.309334635734558, "learning_rate": 0.0016794591484464902, "loss": 0.6885, "step": 55710 }, { "epoch": 16.029919447640967, "grad_norm": 2.062044858932495, "learning_rate": 0.0016794016110471808, "loss": 0.806, "step": 55720 }, { "epoch": 16.032796317606444, "grad_norm": 1.5603677034378052, "learning_rate": 0.0016793440736478712, "loss": 0.6737, "step": 55730 }, { "epoch": 16.035673187571923, "grad_norm": 1.7024551630020142, "learning_rate": 0.0016792865362485617, "loss": 0.7617, "step": 55740 }, { "epoch": 16.0385500575374, "grad_norm": 1.133318543434143, "learning_rate": 0.0016792289988492518, "loss": 0.5959, "step": 55750 }, { "epoch": 16.041426927502876, "grad_norm": 1.7131155729293823, "learning_rate": 0.0016791714614499424, "loss": 0.8987, "step": 55760 }, { "epoch": 16.044303797468356, "grad_norm": 1.0642141103744507, "learning_rate": 0.001679113924050633, "loss": 0.6675, "step": 55770 }, { "epoch": 16.04718066743383, "grad_norm": 1.5177690982818604, "learning_rate": 0.0016790563866513233, "loss": 0.7454, "step": 55780 }, { "epoch": 16.050057537399308, "grad_norm": 0.7772868275642395, "learning_rate": 0.0016789988492520139, "loss": 0.9142, "step": 55790 }, { "epoch": 16.052934407364788, "grad_norm": 1.6607117652893066, "learning_rate": 0.0016789413118527045, "loss": 0.8364, "step": 55800 }, { "epoch": 16.055811277330264, "grad_norm": 1.6340950727462769, "learning_rate": 0.0016788837744533946, "loss": 0.6421, "step": 55810 }, { "epoch": 16.058688147295744, "grad_norm": 1.7388497591018677, "learning_rate": 0.0016788262370540851, "loss": 0.803, "step": 55820 }, { "epoch": 16.06156501726122, "grad_norm": 0.8321808576583862, "learning_rate": 0.0016787686996547757, "loss": 0.6826, "step": 55830 }, { "epoch": 16.064441887226696, "grad_norm": 1.4914342164993286, "learning_rate": 0.001678711162255466, "loss": 0.8496, "step": 55840 }, { "epoch": 16.067318757192176, "grad_norm": 1.1249382495880127, "learning_rate": 0.0016786536248561566, "loss": 0.7079, "step": 55850 }, { "epoch": 16.070195627157652, "grad_norm": 1.257369875907898, "learning_rate": 0.001678596087456847, "loss": 0.6362, "step": 55860 }, { "epoch": 16.073072497123132, "grad_norm": 1.5919914245605469, "learning_rate": 0.0016785385500575373, "loss": 0.6197, "step": 55870 }, { "epoch": 16.075949367088608, "grad_norm": 1.2454328536987305, "learning_rate": 0.0016784810126582279, "loss": 0.8372, "step": 55880 }, { "epoch": 16.078826237054084, "grad_norm": 0.9563472867012024, "learning_rate": 0.0016784234752589182, "loss": 0.5495, "step": 55890 }, { "epoch": 16.081703107019564, "grad_norm": 1.8395062685012817, "learning_rate": 0.0016783659378596088, "loss": 0.6537, "step": 55900 }, { "epoch": 16.08457997698504, "grad_norm": 1.0587934255599976, "learning_rate": 0.0016783084004602994, "loss": 0.6174, "step": 55910 }, { "epoch": 16.087456846950516, "grad_norm": 0.873705267906189, "learning_rate": 0.0016782508630609897, "loss": 0.6755, "step": 55920 }, { "epoch": 16.090333716915996, "grad_norm": 1.6350171566009521, "learning_rate": 0.00167819332566168, "loss": 0.6426, "step": 55930 }, { "epoch": 16.093210586881472, "grad_norm": 2.291867733001709, "learning_rate": 0.0016781357882623706, "loss": 0.7535, "step": 55940 }, { "epoch": 16.096087456846952, "grad_norm": 1.2719045877456665, "learning_rate": 0.001678078250863061, "loss": 0.8749, "step": 55950 }, { "epoch": 16.09896432681243, "grad_norm": 0.9714142084121704, "learning_rate": 0.0016780207134637515, "loss": 0.6861, "step": 55960 }, { "epoch": 16.101841196777904, "grad_norm": 3.496828079223633, "learning_rate": 0.0016779631760644419, "loss": 0.8609, "step": 55970 }, { "epoch": 16.104718066743384, "grad_norm": 1.6453157663345337, "learning_rate": 0.0016779056386651324, "loss": 0.8368, "step": 55980 }, { "epoch": 16.10759493670886, "grad_norm": 1.2274091243743896, "learning_rate": 0.0016778481012658228, "loss": 0.7526, "step": 55990 }, { "epoch": 16.110471806674337, "grad_norm": 1.420494794845581, "learning_rate": 0.0016777905638665131, "loss": 0.6948, "step": 56000 }, { "epoch": 16.113348676639816, "grad_norm": 1.2714145183563232, "learning_rate": 0.0016777330264672037, "loss": 0.5585, "step": 56010 }, { "epoch": 16.116225546605293, "grad_norm": 1.0938037633895874, "learning_rate": 0.0016776754890678943, "loss": 0.6973, "step": 56020 }, { "epoch": 16.119102416570772, "grad_norm": 1.046505331993103, "learning_rate": 0.0016776179516685846, "loss": 0.574, "step": 56030 }, { "epoch": 16.12197928653625, "grad_norm": 1.4029957056045532, "learning_rate": 0.0016775604142692752, "loss": 0.6833, "step": 56040 }, { "epoch": 16.124856156501725, "grad_norm": 0.9959986805915833, "learning_rate": 0.0016775028768699655, "loss": 0.6428, "step": 56050 }, { "epoch": 16.127733026467205, "grad_norm": 1.632708191871643, "learning_rate": 0.0016774453394706559, "loss": 0.6031, "step": 56060 }, { "epoch": 16.13060989643268, "grad_norm": 1.6110947132110596, "learning_rate": 0.0016773878020713464, "loss": 0.919, "step": 56070 }, { "epoch": 16.13348676639816, "grad_norm": 2.383202314376831, "learning_rate": 0.0016773302646720368, "loss": 0.705, "step": 56080 }, { "epoch": 16.136363636363637, "grad_norm": 1.2571115493774414, "learning_rate": 0.0016772727272727273, "loss": 0.7123, "step": 56090 }, { "epoch": 16.139240506329113, "grad_norm": 1.2813613414764404, "learning_rate": 0.001677215189873418, "loss": 0.6966, "step": 56100 }, { "epoch": 16.142117376294593, "grad_norm": 1.5758622884750366, "learning_rate": 0.001677157652474108, "loss": 0.7521, "step": 56110 }, { "epoch": 16.14499424626007, "grad_norm": 1.3724827766418457, "learning_rate": 0.0016771001150747986, "loss": 0.7814, "step": 56120 }, { "epoch": 16.147871116225545, "grad_norm": 2.0188474655151367, "learning_rate": 0.0016770425776754892, "loss": 0.9152, "step": 56130 }, { "epoch": 16.150747986191025, "grad_norm": 1.040043830871582, "learning_rate": 0.0016769850402761795, "loss": 0.8881, "step": 56140 }, { "epoch": 16.1536248561565, "grad_norm": 1.52669095993042, "learning_rate": 0.00167692750287687, "loss": 0.8954, "step": 56150 }, { "epoch": 16.15650172612198, "grad_norm": 0.6642987132072449, "learning_rate": 0.0016768699654775606, "loss": 0.8438, "step": 56160 }, { "epoch": 16.159378596087457, "grad_norm": 1.6331441402435303, "learning_rate": 0.0016768124280782508, "loss": 0.8573, "step": 56170 }, { "epoch": 16.162255466052933, "grad_norm": 2.7317919731140137, "learning_rate": 0.0016767548906789413, "loss": 0.6234, "step": 56180 }, { "epoch": 16.165132336018413, "grad_norm": 2.5715136528015137, "learning_rate": 0.0016766973532796317, "loss": 0.6856, "step": 56190 }, { "epoch": 16.16800920598389, "grad_norm": 1.532976508140564, "learning_rate": 0.0016766398158803222, "loss": 0.7133, "step": 56200 }, { "epoch": 16.170886075949365, "grad_norm": 1.6491793394088745, "learning_rate": 0.0016765822784810128, "loss": 0.8165, "step": 56210 }, { "epoch": 16.173762945914845, "grad_norm": 1.7924752235412598, "learning_rate": 0.0016765247410817031, "loss": 0.7615, "step": 56220 }, { "epoch": 16.17663981588032, "grad_norm": 1.0487385988235474, "learning_rate": 0.0016764672036823935, "loss": 0.6925, "step": 56230 }, { "epoch": 16.1795166858458, "grad_norm": 1.196157455444336, "learning_rate": 0.001676409666283084, "loss": 0.6225, "step": 56240 }, { "epoch": 16.182393555811277, "grad_norm": 1.249588131904602, "learning_rate": 0.0016763521288837744, "loss": 0.5934, "step": 56250 }, { "epoch": 16.185270425776753, "grad_norm": 1.0900005102157593, "learning_rate": 0.001676294591484465, "loss": 0.7319, "step": 56260 }, { "epoch": 16.188147295742233, "grad_norm": 1.3588839769363403, "learning_rate": 0.0016762370540851555, "loss": 0.5498, "step": 56270 }, { "epoch": 16.19102416570771, "grad_norm": 1.0915426015853882, "learning_rate": 0.0016761795166858459, "loss": 0.8952, "step": 56280 }, { "epoch": 16.19390103567319, "grad_norm": 1.20509672164917, "learning_rate": 0.0016761219792865362, "loss": 0.5807, "step": 56290 }, { "epoch": 16.196777905638665, "grad_norm": 1.9709227085113525, "learning_rate": 0.0016760644418872268, "loss": 0.9162, "step": 56300 }, { "epoch": 16.19965477560414, "grad_norm": 1.222725749015808, "learning_rate": 0.0016760069044879171, "loss": 0.6571, "step": 56310 }, { "epoch": 16.20253164556962, "grad_norm": 0.9494869112968445, "learning_rate": 0.0016759493670886077, "loss": 0.7545, "step": 56320 }, { "epoch": 16.205408515535098, "grad_norm": 1.138251781463623, "learning_rate": 0.001675891829689298, "loss": 0.5844, "step": 56330 }, { "epoch": 16.208285385500574, "grad_norm": 1.2112956047058105, "learning_rate": 0.0016758342922899886, "loss": 0.6034, "step": 56340 }, { "epoch": 16.211162255466053, "grad_norm": 1.978869915008545, "learning_rate": 0.001675776754890679, "loss": 0.91, "step": 56350 }, { "epoch": 16.21403912543153, "grad_norm": 0.7005031108856201, "learning_rate": 0.0016757192174913693, "loss": 0.6303, "step": 56360 }, { "epoch": 16.21691599539701, "grad_norm": 1.1849491596221924, "learning_rate": 0.0016756616800920599, "loss": 0.7442, "step": 56370 }, { "epoch": 16.219792865362486, "grad_norm": 1.6350736618041992, "learning_rate": 0.0016756041426927504, "loss": 0.71, "step": 56380 }, { "epoch": 16.222669735327962, "grad_norm": 1.068589210510254, "learning_rate": 0.0016755466052934408, "loss": 0.8175, "step": 56390 }, { "epoch": 16.22554660529344, "grad_norm": 2.0833945274353027, "learning_rate": 0.0016754890678941313, "loss": 0.719, "step": 56400 }, { "epoch": 16.228423475258918, "grad_norm": 1.2354179620742798, "learning_rate": 0.0016754315304948217, "loss": 0.8893, "step": 56410 }, { "epoch": 16.231300345224398, "grad_norm": 0.8673328161239624, "learning_rate": 0.001675373993095512, "loss": 0.8673, "step": 56420 }, { "epoch": 16.234177215189874, "grad_norm": 1.359466791152954, "learning_rate": 0.0016753164556962026, "loss": 0.776, "step": 56430 }, { "epoch": 16.23705408515535, "grad_norm": 1.586113691329956, "learning_rate": 0.001675258918296893, "loss": 1.0018, "step": 56440 }, { "epoch": 16.23993095512083, "grad_norm": 0.8456898927688599, "learning_rate": 0.0016752013808975835, "loss": 0.8366, "step": 56450 }, { "epoch": 16.242807825086306, "grad_norm": 1.183385968208313, "learning_rate": 0.001675143843498274, "loss": 0.7268, "step": 56460 }, { "epoch": 16.245684695051782, "grad_norm": 1.0146796703338623, "learning_rate": 0.0016750863060989642, "loss": 0.7217, "step": 56470 }, { "epoch": 16.248561565017262, "grad_norm": 2.546718120574951, "learning_rate": 0.0016750287686996548, "loss": 0.8335, "step": 56480 }, { "epoch": 16.251438434982738, "grad_norm": 0.8109592795372009, "learning_rate": 0.0016749712313003453, "loss": 0.7464, "step": 56490 }, { "epoch": 16.254315304948218, "grad_norm": 1.044926404953003, "learning_rate": 0.0016749136939010357, "loss": 0.7745, "step": 56500 }, { "epoch": 16.257192174913694, "grad_norm": 1.489577293395996, "learning_rate": 0.0016748561565017262, "loss": 0.8584, "step": 56510 }, { "epoch": 16.26006904487917, "grad_norm": 0.9414940476417542, "learning_rate": 0.0016747986191024168, "loss": 0.6322, "step": 56520 }, { "epoch": 16.26294591484465, "grad_norm": 1.6070277690887451, "learning_rate": 0.001674741081703107, "loss": 0.6547, "step": 56530 }, { "epoch": 16.265822784810126, "grad_norm": 0.8447463512420654, "learning_rate": 0.0016746835443037975, "loss": 0.8091, "step": 56540 }, { "epoch": 16.268699654775602, "grad_norm": 1.8467206954956055, "learning_rate": 0.0016746260069044879, "loss": 0.8133, "step": 56550 }, { "epoch": 16.271576524741082, "grad_norm": 1.5499637126922607, "learning_rate": 0.0016745684695051784, "loss": 0.7027, "step": 56560 }, { "epoch": 16.27445339470656, "grad_norm": 1.468657374382019, "learning_rate": 0.001674510932105869, "loss": 0.6517, "step": 56570 }, { "epoch": 16.277330264672038, "grad_norm": 1.4332664012908936, "learning_rate": 0.0016744533947065591, "loss": 0.7265, "step": 56580 }, { "epoch": 16.280207134637514, "grad_norm": 1.6753853559494019, "learning_rate": 0.0016743958573072497, "loss": 0.76, "step": 56590 }, { "epoch": 16.28308400460299, "grad_norm": 1.0300391912460327, "learning_rate": 0.0016743383199079402, "loss": 0.7031, "step": 56600 }, { "epoch": 16.28596087456847, "grad_norm": 0.7937681674957275, "learning_rate": 0.0016742807825086306, "loss": 0.6375, "step": 56610 }, { "epoch": 16.288837744533947, "grad_norm": 1.4089218378067017, "learning_rate": 0.0016742232451093212, "loss": 0.8704, "step": 56620 }, { "epoch": 16.291714614499426, "grad_norm": 1.4423937797546387, "learning_rate": 0.0016741657077100117, "loss": 0.5889, "step": 56630 }, { "epoch": 16.294591484464902, "grad_norm": 0.9842143654823303, "learning_rate": 0.0016741081703107018, "loss": 0.636, "step": 56640 }, { "epoch": 16.29746835443038, "grad_norm": 1.0969338417053223, "learning_rate": 0.0016740506329113924, "loss": 0.6395, "step": 56650 }, { "epoch": 16.30034522439586, "grad_norm": 0.8699008226394653, "learning_rate": 0.0016739930955120828, "loss": 0.6782, "step": 56660 }, { "epoch": 16.303222094361335, "grad_norm": 1.196638822555542, "learning_rate": 0.0016739355581127733, "loss": 0.8366, "step": 56670 }, { "epoch": 16.30609896432681, "grad_norm": 1.831987738609314, "learning_rate": 0.0016738780207134639, "loss": 0.595, "step": 56680 }, { "epoch": 16.30897583429229, "grad_norm": 2.109386920928955, "learning_rate": 0.0016738204833141542, "loss": 0.7088, "step": 56690 }, { "epoch": 16.311852704257767, "grad_norm": 3.079275369644165, "learning_rate": 0.0016737629459148446, "loss": 0.6795, "step": 56700 }, { "epoch": 16.314729574223247, "grad_norm": 2.849172830581665, "learning_rate": 0.0016737054085155351, "loss": 0.849, "step": 56710 }, { "epoch": 16.317606444188723, "grad_norm": 1.1127939224243164, "learning_rate": 0.0016736478711162255, "loss": 0.7575, "step": 56720 }, { "epoch": 16.3204833141542, "grad_norm": 1.2201555967330933, "learning_rate": 0.001673590333716916, "loss": 0.7845, "step": 56730 }, { "epoch": 16.32336018411968, "grad_norm": 1.0673887729644775, "learning_rate": 0.0016735327963176066, "loss": 0.6883, "step": 56740 }, { "epoch": 16.326237054085155, "grad_norm": 1.5329209566116333, "learning_rate": 0.001673475258918297, "loss": 0.8098, "step": 56750 }, { "epoch": 16.32911392405063, "grad_norm": 0.8261838555335999, "learning_rate": 0.0016734177215189873, "loss": 0.7167, "step": 56760 }, { "epoch": 16.33199079401611, "grad_norm": 1.2047885656356812, "learning_rate": 0.0016733601841196777, "loss": 0.7743, "step": 56770 }, { "epoch": 16.334867663981587, "grad_norm": 1.1351536512374878, "learning_rate": 0.0016733026467203682, "loss": 0.6233, "step": 56780 }, { "epoch": 16.337744533947067, "grad_norm": 1.6468837261199951, "learning_rate": 0.0016732451093210588, "loss": 0.8168, "step": 56790 }, { "epoch": 16.340621403912543, "grad_norm": 0.8803701996803284, "learning_rate": 0.0016731875719217491, "loss": 0.6676, "step": 56800 }, { "epoch": 16.34349827387802, "grad_norm": 1.455000400543213, "learning_rate": 0.0016731300345224397, "loss": 0.7416, "step": 56810 }, { "epoch": 16.3463751438435, "grad_norm": 1.2738124132156372, "learning_rate": 0.00167307249712313, "loss": 0.6606, "step": 56820 }, { "epoch": 16.349252013808975, "grad_norm": 1.597474217414856, "learning_rate": 0.0016730149597238204, "loss": 0.7231, "step": 56830 }, { "epoch": 16.352128883774455, "grad_norm": 1.7838383913040161, "learning_rate": 0.001672957422324511, "loss": 0.7349, "step": 56840 }, { "epoch": 16.35500575373993, "grad_norm": 1.4587165117263794, "learning_rate": 0.0016728998849252015, "loss": 0.7573, "step": 56850 }, { "epoch": 16.357882623705407, "grad_norm": 1.214585304260254, "learning_rate": 0.0016728423475258919, "loss": 0.6406, "step": 56860 }, { "epoch": 16.360759493670887, "grad_norm": 3.071873188018799, "learning_rate": 0.0016727848101265824, "loss": 0.7994, "step": 56870 }, { "epoch": 16.363636363636363, "grad_norm": 2.0982730388641357, "learning_rate": 0.0016727272727272726, "loss": 0.9246, "step": 56880 }, { "epoch": 16.36651323360184, "grad_norm": 1.918656826019287, "learning_rate": 0.0016726697353279631, "loss": 0.8045, "step": 56890 }, { "epoch": 16.36939010356732, "grad_norm": 0.9435940384864807, "learning_rate": 0.0016726121979286537, "loss": 0.7269, "step": 56900 }, { "epoch": 16.372266973532795, "grad_norm": 1.0497597455978394, "learning_rate": 0.001672554660529344, "loss": 0.8346, "step": 56910 }, { "epoch": 16.375143843498275, "grad_norm": 0.8675594925880432, "learning_rate": 0.0016724971231300346, "loss": 0.8102, "step": 56920 }, { "epoch": 16.37802071346375, "grad_norm": 0.7683021426200867, "learning_rate": 0.0016724395857307252, "loss": 0.8658, "step": 56930 }, { "epoch": 16.380897583429228, "grad_norm": 2.0596351623535156, "learning_rate": 0.0016723820483314153, "loss": 0.7588, "step": 56940 }, { "epoch": 16.383774453394707, "grad_norm": 2.3091671466827393, "learning_rate": 0.0016723245109321059, "loss": 0.7223, "step": 56950 }, { "epoch": 16.386651323360184, "grad_norm": 1.1897261142730713, "learning_rate": 0.0016722669735327964, "loss": 0.7371, "step": 56960 }, { "epoch": 16.389528193325663, "grad_norm": 1.1188530921936035, "learning_rate": 0.0016722094361334868, "loss": 0.7465, "step": 56970 }, { "epoch": 16.39240506329114, "grad_norm": 1.791678547859192, "learning_rate": 0.0016721518987341773, "loss": 0.9087, "step": 56980 }, { "epoch": 16.395281933256616, "grad_norm": 1.2947992086410522, "learning_rate": 0.001672094361334868, "loss": 0.8391, "step": 56990 }, { "epoch": 16.398158803222096, "grad_norm": 1.1390025615692139, "learning_rate": 0.001672036823935558, "loss": 0.8392, "step": 57000 }, { "epoch": 16.40103567318757, "grad_norm": 0.9161383509635925, "learning_rate": 0.0016719792865362486, "loss": 0.7231, "step": 57010 }, { "epoch": 16.403912543153048, "grad_norm": 1.7100629806518555, "learning_rate": 0.001671921749136939, "loss": 0.778, "step": 57020 }, { "epoch": 16.406789413118528, "grad_norm": 1.6574101448059082, "learning_rate": 0.0016718642117376295, "loss": 0.7051, "step": 57030 }, { "epoch": 16.409666283084004, "grad_norm": 1.1335331201553345, "learning_rate": 0.00167180667433832, "loss": 0.7308, "step": 57040 }, { "epoch": 16.412543153049484, "grad_norm": 0.9937106966972351, "learning_rate": 0.0016717491369390104, "loss": 0.6771, "step": 57050 }, { "epoch": 16.41542002301496, "grad_norm": 1.157829761505127, "learning_rate": 0.0016716915995397008, "loss": 0.7161, "step": 57060 }, { "epoch": 16.418296892980436, "grad_norm": 2.238677740097046, "learning_rate": 0.0016716340621403913, "loss": 0.8557, "step": 57070 }, { "epoch": 16.421173762945916, "grad_norm": 3.0847246646881104, "learning_rate": 0.0016715765247410817, "loss": 0.7175, "step": 57080 }, { "epoch": 16.424050632911392, "grad_norm": 1.0002989768981934, "learning_rate": 0.0016715189873417722, "loss": 0.9384, "step": 57090 }, { "epoch": 16.42692750287687, "grad_norm": 2.5067367553710938, "learning_rate": 0.0016714614499424628, "loss": 0.8288, "step": 57100 }, { "epoch": 16.429804372842348, "grad_norm": 0.9541964530944824, "learning_rate": 0.0016714039125431531, "loss": 0.8864, "step": 57110 }, { "epoch": 16.432681242807824, "grad_norm": 0.858173668384552, "learning_rate": 0.0016713463751438435, "loss": 0.6407, "step": 57120 }, { "epoch": 16.435558112773304, "grad_norm": 1.4203870296478271, "learning_rate": 0.0016712888377445338, "loss": 0.6327, "step": 57130 }, { "epoch": 16.43843498273878, "grad_norm": 3.234740972518921, "learning_rate": 0.0016712313003452244, "loss": 1.016, "step": 57140 }, { "epoch": 16.441311852704256, "grad_norm": 1.5803382396697998, "learning_rate": 0.001671173762945915, "loss": 0.7007, "step": 57150 }, { "epoch": 16.444188722669736, "grad_norm": 1.3851786851882935, "learning_rate": 0.0016711162255466053, "loss": 0.7948, "step": 57160 }, { "epoch": 16.447065592635212, "grad_norm": 1.6065267324447632, "learning_rate": 0.0016710586881472959, "loss": 0.7005, "step": 57170 }, { "epoch": 16.449942462600692, "grad_norm": 1.9532017707824707, "learning_rate": 0.0016710011507479862, "loss": 0.7122, "step": 57180 }, { "epoch": 16.45281933256617, "grad_norm": 2.6485719680786133, "learning_rate": 0.0016709436133486766, "loss": 0.7179, "step": 57190 }, { "epoch": 16.455696202531644, "grad_norm": 1.3194576501846313, "learning_rate": 0.0016708860759493671, "loss": 0.7586, "step": 57200 }, { "epoch": 16.458573072497124, "grad_norm": 1.5926820039749146, "learning_rate": 0.0016708285385500577, "loss": 0.7555, "step": 57210 }, { "epoch": 16.4614499424626, "grad_norm": 1.6607033014297485, "learning_rate": 0.001670771001150748, "loss": 0.6712, "step": 57220 }, { "epoch": 16.464326812428077, "grad_norm": 1.5485622882843018, "learning_rate": 0.0016707134637514386, "loss": 0.8198, "step": 57230 }, { "epoch": 16.467203682393556, "grad_norm": 1.3433150053024292, "learning_rate": 0.0016706559263521287, "loss": 0.7432, "step": 57240 }, { "epoch": 16.470080552359033, "grad_norm": 0.954745352268219, "learning_rate": 0.0016705983889528193, "loss": 0.7994, "step": 57250 }, { "epoch": 16.472957422324512, "grad_norm": 2.0990312099456787, "learning_rate": 0.0016705408515535099, "loss": 0.6141, "step": 57260 }, { "epoch": 16.47583429228999, "grad_norm": 1.3581597805023193, "learning_rate": 0.0016704833141542002, "loss": 0.6637, "step": 57270 }, { "epoch": 16.478711162255465, "grad_norm": 2.005920648574829, "learning_rate": 0.0016704257767548908, "loss": 0.9925, "step": 57280 }, { "epoch": 16.481588032220944, "grad_norm": 0.8359763026237488, "learning_rate": 0.0016703682393555813, "loss": 0.566, "step": 57290 }, { "epoch": 16.48446490218642, "grad_norm": 1.052222490310669, "learning_rate": 0.0016703107019562715, "loss": 0.7472, "step": 57300 }, { "epoch": 16.4873417721519, "grad_norm": 1.9072948694229126, "learning_rate": 0.001670253164556962, "loss": 0.7283, "step": 57310 }, { "epoch": 16.490218642117377, "grad_norm": 0.8276464343070984, "learning_rate": 0.0016701956271576526, "loss": 0.8197, "step": 57320 }, { "epoch": 16.493095512082853, "grad_norm": 1.705801010131836, "learning_rate": 0.001670138089758343, "loss": 0.7174, "step": 57330 }, { "epoch": 16.495972382048333, "grad_norm": 1.3157775402069092, "learning_rate": 0.0016700805523590335, "loss": 0.7399, "step": 57340 }, { "epoch": 16.49884925201381, "grad_norm": 2.5289804935455322, "learning_rate": 0.0016700230149597236, "loss": 0.695, "step": 57350 }, { "epoch": 16.501726121979285, "grad_norm": 1.1676064729690552, "learning_rate": 0.0016699654775604142, "loss": 0.7036, "step": 57360 }, { "epoch": 16.504602991944765, "grad_norm": 1.0644882917404175, "learning_rate": 0.0016699079401611048, "loss": 0.8883, "step": 57370 }, { "epoch": 16.50747986191024, "grad_norm": 1.2172038555145264, "learning_rate": 0.0016698504027617951, "loss": 0.7924, "step": 57380 }, { "epoch": 16.51035673187572, "grad_norm": 2.248800754547119, "learning_rate": 0.0016697928653624857, "loss": 0.8836, "step": 57390 }, { "epoch": 16.513233601841197, "grad_norm": 0.8884774446487427, "learning_rate": 0.0016697353279631762, "loss": 0.5595, "step": 57400 }, { "epoch": 16.516110471806673, "grad_norm": 3.9168660640716553, "learning_rate": 0.0016696777905638664, "loss": 0.6523, "step": 57410 }, { "epoch": 16.518987341772153, "grad_norm": 1.1042147874832153, "learning_rate": 0.001669620253164557, "loss": 0.7427, "step": 57420 }, { "epoch": 16.52186421173763, "grad_norm": 1.0226492881774902, "learning_rate": 0.0016695627157652475, "loss": 0.5374, "step": 57430 }, { "epoch": 16.524741081703105, "grad_norm": 1.0675452947616577, "learning_rate": 0.0016695051783659379, "loss": 0.7996, "step": 57440 }, { "epoch": 16.527617951668585, "grad_norm": 1.28988516330719, "learning_rate": 0.0016694476409666284, "loss": 0.6753, "step": 57450 }, { "epoch": 16.53049482163406, "grad_norm": 1.5527799129486084, "learning_rate": 0.0016693901035673188, "loss": 0.7316, "step": 57460 }, { "epoch": 16.53337169159954, "grad_norm": 1.218031644821167, "learning_rate": 0.0016693325661680091, "loss": 0.7998, "step": 57470 }, { "epoch": 16.536248561565017, "grad_norm": 2.4595251083374023, "learning_rate": 0.0016692750287686997, "loss": 0.6477, "step": 57480 }, { "epoch": 16.539125431530493, "grad_norm": 0.8012495040893555, "learning_rate": 0.00166921749136939, "loss": 0.7576, "step": 57490 }, { "epoch": 16.542002301495973, "grad_norm": 0.9589389562606812, "learning_rate": 0.0016691599539700806, "loss": 0.7039, "step": 57500 }, { "epoch": 16.54487917146145, "grad_norm": 0.9223586320877075, "learning_rate": 0.0016691024165707711, "loss": 0.7235, "step": 57510 }, { "epoch": 16.54775604142693, "grad_norm": 1.0798089504241943, "learning_rate": 0.0016690448791714615, "loss": 0.7784, "step": 57520 }, { "epoch": 16.550632911392405, "grad_norm": 1.1265227794647217, "learning_rate": 0.0016689873417721518, "loss": 0.6666, "step": 57530 }, { "epoch": 16.55350978135788, "grad_norm": 0.9911708235740662, "learning_rate": 0.0016689298043728424, "loss": 0.9154, "step": 57540 }, { "epoch": 16.55638665132336, "grad_norm": 1.0893560647964478, "learning_rate": 0.0016688722669735328, "loss": 0.7727, "step": 57550 }, { "epoch": 16.559263521288837, "grad_norm": 1.6835709810256958, "learning_rate": 0.0016688147295742233, "loss": 0.6009, "step": 57560 }, { "epoch": 16.562140391254314, "grad_norm": 1.2364972829818726, "learning_rate": 0.0016687571921749137, "loss": 0.7837, "step": 57570 }, { "epoch": 16.565017261219793, "grad_norm": 0.8838427066802979, "learning_rate": 0.0016686996547756042, "loss": 0.8444, "step": 57580 }, { "epoch": 16.56789413118527, "grad_norm": 3.1367383003234863, "learning_rate": 0.0016686421173762946, "loss": 0.6854, "step": 57590 }, { "epoch": 16.57077100115075, "grad_norm": 1.5007799863815308, "learning_rate": 0.001668584579976985, "loss": 0.8993, "step": 57600 }, { "epoch": 16.573647871116226, "grad_norm": 1.5230345726013184, "learning_rate": 0.0016685270425776755, "loss": 0.7934, "step": 57610 }, { "epoch": 16.576524741081702, "grad_norm": 0.6923266649246216, "learning_rate": 0.001668469505178366, "loss": 0.7419, "step": 57620 }, { "epoch": 16.57940161104718, "grad_norm": 1.0521618127822876, "learning_rate": 0.0016684119677790564, "loss": 0.7388, "step": 57630 }, { "epoch": 16.582278481012658, "grad_norm": 0.9618034958839417, "learning_rate": 0.001668354430379747, "loss": 0.6248, "step": 57640 }, { "epoch": 16.585155350978134, "grad_norm": 1.2632973194122314, "learning_rate": 0.0016682968929804373, "loss": 0.6959, "step": 57650 }, { "epoch": 16.588032220943614, "grad_norm": 2.2406702041625977, "learning_rate": 0.0016682393555811277, "loss": 0.8307, "step": 57660 }, { "epoch": 16.59090909090909, "grad_norm": 1.4507081508636475, "learning_rate": 0.0016681818181818182, "loss": 0.8448, "step": 57670 }, { "epoch": 16.59378596087457, "grad_norm": 0.9421238899230957, "learning_rate": 0.0016681242807825088, "loss": 0.7995, "step": 57680 }, { "epoch": 16.596662830840046, "grad_norm": 1.222519040107727, "learning_rate": 0.0016680667433831991, "loss": 0.7236, "step": 57690 }, { "epoch": 16.599539700805522, "grad_norm": 1.067276120185852, "learning_rate": 0.0016680092059838897, "loss": 0.759, "step": 57700 }, { "epoch": 16.602416570771002, "grad_norm": 1.0751911401748657, "learning_rate": 0.0016679516685845798, "loss": 0.6094, "step": 57710 }, { "epoch": 16.605293440736478, "grad_norm": 1.032848834991455, "learning_rate": 0.0016678941311852704, "loss": 0.7125, "step": 57720 }, { "epoch": 16.608170310701958, "grad_norm": 1.1537102460861206, "learning_rate": 0.001667836593785961, "loss": 0.7639, "step": 57730 }, { "epoch": 16.611047180667434, "grad_norm": 1.4718544483184814, "learning_rate": 0.0016677790563866513, "loss": 0.883, "step": 57740 }, { "epoch": 16.61392405063291, "grad_norm": 2.1900928020477295, "learning_rate": 0.0016677215189873419, "loss": 0.8996, "step": 57750 }, { "epoch": 16.61680092059839, "grad_norm": 0.9930851459503174, "learning_rate": 0.0016676639815880324, "loss": 0.7811, "step": 57760 }, { "epoch": 16.619677790563866, "grad_norm": 0.9625142216682434, "learning_rate": 0.0016676064441887226, "loss": 0.7828, "step": 57770 }, { "epoch": 16.622554660529342, "grad_norm": 1.0095536708831787, "learning_rate": 0.0016675489067894131, "loss": 0.9154, "step": 57780 }, { "epoch": 16.625431530494822, "grad_norm": 1.105212688446045, "learning_rate": 0.0016674913693901037, "loss": 0.8527, "step": 57790 }, { "epoch": 16.6283084004603, "grad_norm": 0.675234317779541, "learning_rate": 0.001667433831990794, "loss": 0.7637, "step": 57800 }, { "epoch": 16.631185270425778, "grad_norm": 1.5885311365127563, "learning_rate": 0.0016673762945914846, "loss": 0.9525, "step": 57810 }, { "epoch": 16.634062140391254, "grad_norm": 2.095540761947632, "learning_rate": 0.001667318757192175, "loss": 0.5682, "step": 57820 }, { "epoch": 16.63693901035673, "grad_norm": 0.6999608874320984, "learning_rate": 0.0016672612197928653, "loss": 0.7759, "step": 57830 }, { "epoch": 16.63981588032221, "grad_norm": 1.2308293581008911, "learning_rate": 0.0016672036823935559, "loss": 0.7673, "step": 57840 }, { "epoch": 16.642692750287686, "grad_norm": 2.2559072971343994, "learning_rate": 0.0016671461449942462, "loss": 1.1824, "step": 57850 }, { "epoch": 16.645569620253166, "grad_norm": 1.5545380115509033, "learning_rate": 0.0016670886075949368, "loss": 0.7298, "step": 57860 }, { "epoch": 16.648446490218642, "grad_norm": 1.7116426229476929, "learning_rate": 0.0016670310701956273, "loss": 0.9132, "step": 57870 }, { "epoch": 16.65132336018412, "grad_norm": 3.283067226409912, "learning_rate": 0.0016669735327963177, "loss": 0.7961, "step": 57880 }, { "epoch": 16.6542002301496, "grad_norm": 1.2405563592910767, "learning_rate": 0.001666915995397008, "loss": 0.7801, "step": 57890 }, { "epoch": 16.657077100115075, "grad_norm": 1.6337575912475586, "learning_rate": 0.0016668584579976986, "loss": 0.9406, "step": 57900 }, { "epoch": 16.65995397008055, "grad_norm": 1.1502721309661865, "learning_rate": 0.001666800920598389, "loss": 0.8458, "step": 57910 }, { "epoch": 16.66283084004603, "grad_norm": 1.650629997253418, "learning_rate": 0.0016667433831990795, "loss": 0.9204, "step": 57920 }, { "epoch": 16.665707710011507, "grad_norm": 0.8598907589912415, "learning_rate": 0.0016666858457997698, "loss": 0.6332, "step": 57930 }, { "epoch": 16.668584579976987, "grad_norm": 1.407340168952942, "learning_rate": 0.0016666283084004604, "loss": 0.8387, "step": 57940 }, { "epoch": 16.671461449942463, "grad_norm": 0.9921379089355469, "learning_rate": 0.0016665707710011508, "loss": 0.7996, "step": 57950 }, { "epoch": 16.67433831990794, "grad_norm": 1.651570439338684, "learning_rate": 0.001666513233601841, "loss": 0.7905, "step": 57960 }, { "epoch": 16.67721518987342, "grad_norm": 1.232186198234558, "learning_rate": 0.0016664556962025317, "loss": 0.8062, "step": 57970 }, { "epoch": 16.680092059838895, "grad_norm": 0.9933435320854187, "learning_rate": 0.0016663981588032222, "loss": 0.6794, "step": 57980 }, { "epoch": 16.682968929804375, "grad_norm": 1.4126801490783691, "learning_rate": 0.0016663406214039126, "loss": 0.7402, "step": 57990 }, { "epoch": 16.68584579976985, "grad_norm": 1.248914122581482, "learning_rate": 0.0016662830840046031, "loss": 0.7746, "step": 58000 }, { "epoch": 16.688722669735327, "grad_norm": 0.9452697038650513, "learning_rate": 0.0016662255466052935, "loss": 0.8223, "step": 58010 }, { "epoch": 16.691599539700807, "grad_norm": 1.0686028003692627, "learning_rate": 0.0016661680092059838, "loss": 0.6949, "step": 58020 }, { "epoch": 16.694476409666283, "grad_norm": 1.7585452795028687, "learning_rate": 0.0016661104718066744, "loss": 0.7151, "step": 58030 }, { "epoch": 16.69735327963176, "grad_norm": 0.6659751534461975, "learning_rate": 0.0016660529344073647, "loss": 0.8502, "step": 58040 }, { "epoch": 16.70023014959724, "grad_norm": 2.0583417415618896, "learning_rate": 0.0016659953970080553, "loss": 0.7858, "step": 58050 }, { "epoch": 16.703107019562715, "grad_norm": 1.737593173980713, "learning_rate": 0.0016659378596087459, "loss": 0.6904, "step": 58060 }, { "epoch": 16.705983889528195, "grad_norm": 1.3815752267837524, "learning_rate": 0.001665880322209436, "loss": 0.8517, "step": 58070 }, { "epoch": 16.70886075949367, "grad_norm": 1.3516817092895508, "learning_rate": 0.0016658227848101266, "loss": 0.6946, "step": 58080 }, { "epoch": 16.711737629459147, "grad_norm": 1.7919245958328247, "learning_rate": 0.0016657652474108171, "loss": 0.7074, "step": 58090 }, { "epoch": 16.714614499424627, "grad_norm": 1.9748620986938477, "learning_rate": 0.0016657077100115075, "loss": 0.7964, "step": 58100 }, { "epoch": 16.717491369390103, "grad_norm": 1.1828391551971436, "learning_rate": 0.001665650172612198, "loss": 0.7924, "step": 58110 }, { "epoch": 16.72036823935558, "grad_norm": 0.588628888130188, "learning_rate": 0.0016655926352128886, "loss": 0.9221, "step": 58120 }, { "epoch": 16.72324510932106, "grad_norm": 1.1893187761306763, "learning_rate": 0.0016655350978135787, "loss": 0.8179, "step": 58130 }, { "epoch": 16.726121979286535, "grad_norm": 1.4925737380981445, "learning_rate": 0.0016654775604142693, "loss": 0.6673, "step": 58140 }, { "epoch": 16.728998849252015, "grad_norm": 1.2743775844573975, "learning_rate": 0.0016654200230149597, "loss": 0.7131, "step": 58150 }, { "epoch": 16.73187571921749, "grad_norm": 1.157593846321106, "learning_rate": 0.0016653624856156502, "loss": 0.6809, "step": 58160 }, { "epoch": 16.734752589182968, "grad_norm": 1.3464372158050537, "learning_rate": 0.0016653049482163408, "loss": 0.7058, "step": 58170 }, { "epoch": 16.737629459148447, "grad_norm": 1.3063695430755615, "learning_rate": 0.001665247410817031, "loss": 0.7162, "step": 58180 }, { "epoch": 16.740506329113924, "grad_norm": 2.484400987625122, "learning_rate": 0.0016651898734177215, "loss": 0.7287, "step": 58190 }, { "epoch": 16.743383199079403, "grad_norm": 0.8465154767036438, "learning_rate": 0.001665132336018412, "loss": 0.5754, "step": 58200 }, { "epoch": 16.74626006904488, "grad_norm": 1.786865472793579, "learning_rate": 0.0016650747986191024, "loss": 0.7519, "step": 58210 }, { "epoch": 16.749136939010356, "grad_norm": 2.09194278717041, "learning_rate": 0.001665017261219793, "loss": 0.5874, "step": 58220 }, { "epoch": 16.752013808975835, "grad_norm": 2.03987717628479, "learning_rate": 0.0016649597238204835, "loss": 0.8819, "step": 58230 }, { "epoch": 16.75489067894131, "grad_norm": 1.738233208656311, "learning_rate": 0.0016649021864211736, "loss": 0.7221, "step": 58240 }, { "epoch": 16.757767548906788, "grad_norm": 1.2379921674728394, "learning_rate": 0.0016648446490218642, "loss": 0.9019, "step": 58250 }, { "epoch": 16.760644418872268, "grad_norm": 2.2933645248413086, "learning_rate": 0.0016647871116225546, "loss": 0.8409, "step": 58260 }, { "epoch": 16.763521288837744, "grad_norm": 0.8882244229316711, "learning_rate": 0.0016647295742232451, "loss": 0.6289, "step": 58270 }, { "epoch": 16.766398158803224, "grad_norm": 1.1161469221115112, "learning_rate": 0.0016646720368239357, "loss": 0.6614, "step": 58280 }, { "epoch": 16.7692750287687, "grad_norm": 2.207594871520996, "learning_rate": 0.001664614499424626, "loss": 0.8095, "step": 58290 }, { "epoch": 16.772151898734176, "grad_norm": 0.6868904232978821, "learning_rate": 0.0016645569620253164, "loss": 0.939, "step": 58300 }, { "epoch": 16.775028768699656, "grad_norm": 1.7974328994750977, "learning_rate": 0.001664499424626007, "loss": 0.7472, "step": 58310 }, { "epoch": 16.777905638665132, "grad_norm": 1.0661958456039429, "learning_rate": 0.0016644418872266973, "loss": 0.7889, "step": 58320 }, { "epoch": 16.780782508630608, "grad_norm": 1.5489716529846191, "learning_rate": 0.0016643843498273878, "loss": 0.7062, "step": 58330 }, { "epoch": 16.783659378596088, "grad_norm": 1.6091759204864502, "learning_rate": 0.0016643268124280784, "loss": 0.7143, "step": 58340 }, { "epoch": 16.786536248561564, "grad_norm": 1.8917492628097534, "learning_rate": 0.0016642692750287688, "loss": 0.8154, "step": 58350 }, { "epoch": 16.789413118527044, "grad_norm": 1.2137668132781982, "learning_rate": 0.001664211737629459, "loss": 0.8576, "step": 58360 }, { "epoch": 16.79228998849252, "grad_norm": 1.3268775939941406, "learning_rate": 0.0016641542002301497, "loss": 0.7151, "step": 58370 }, { "epoch": 16.795166858457996, "grad_norm": 1.616127848625183, "learning_rate": 0.00166409666283084, "loss": 0.8175, "step": 58380 }, { "epoch": 16.798043728423476, "grad_norm": 1.5125138759613037, "learning_rate": 0.0016640391254315306, "loss": 0.7636, "step": 58390 }, { "epoch": 16.800920598388952, "grad_norm": 1.0578017234802246, "learning_rate": 0.001663981588032221, "loss": 0.8866, "step": 58400 }, { "epoch": 16.803797468354432, "grad_norm": 1.5628082752227783, "learning_rate": 0.0016639240506329115, "loss": 0.7511, "step": 58410 }, { "epoch": 16.806674338319908, "grad_norm": 1.0444495677947998, "learning_rate": 0.0016638665132336018, "loss": 0.7653, "step": 58420 }, { "epoch": 16.809551208285384, "grad_norm": 2.087580919265747, "learning_rate": 0.0016638089758342922, "loss": 0.869, "step": 58430 }, { "epoch": 16.812428078250864, "grad_norm": 1.2665380239486694, "learning_rate": 0.0016637514384349828, "loss": 0.875, "step": 58440 }, { "epoch": 16.81530494821634, "grad_norm": 0.6314935088157654, "learning_rate": 0.0016636939010356733, "loss": 0.7243, "step": 58450 }, { "epoch": 16.818181818181817, "grad_norm": 3.1357038021087646, "learning_rate": 0.0016636363636363637, "loss": 0.6852, "step": 58460 }, { "epoch": 16.821058688147296, "grad_norm": 2.682586193084717, "learning_rate": 0.0016635788262370542, "loss": 0.705, "step": 58470 }, { "epoch": 16.823935558112773, "grad_norm": 1.6802573204040527, "learning_rate": 0.0016635212888377446, "loss": 0.7932, "step": 58480 }, { "epoch": 16.826812428078252, "grad_norm": 1.0281683206558228, "learning_rate": 0.001663463751438435, "loss": 0.7989, "step": 58490 }, { "epoch": 16.82968929804373, "grad_norm": 1.6707913875579834, "learning_rate": 0.0016634062140391255, "loss": 0.7514, "step": 58500 }, { "epoch": 16.832566168009205, "grad_norm": 1.487988829612732, "learning_rate": 0.0016633486766398158, "loss": 0.6195, "step": 58510 }, { "epoch": 16.835443037974684, "grad_norm": 1.4251420497894287, "learning_rate": 0.0016632911392405064, "loss": 0.8569, "step": 58520 }, { "epoch": 16.83831990794016, "grad_norm": 2.6032872200012207, "learning_rate": 0.001663233601841197, "loss": 0.7993, "step": 58530 }, { "epoch": 16.841196777905637, "grad_norm": 0.8965581059455872, "learning_rate": 0.001663176064441887, "loss": 0.7438, "step": 58540 }, { "epoch": 16.844073647871117, "grad_norm": 0.9464526772499084, "learning_rate": 0.0016631185270425777, "loss": 0.719, "step": 58550 }, { "epoch": 16.846950517836593, "grad_norm": 1.3424829244613647, "learning_rate": 0.0016630609896432682, "loss": 0.7286, "step": 58560 }, { "epoch": 16.849827387802073, "grad_norm": 0.8926374912261963, "learning_rate": 0.0016630034522439586, "loss": 0.7343, "step": 58570 }, { "epoch": 16.85270425776755, "grad_norm": 1.968994379043579, "learning_rate": 0.0016629459148446491, "loss": 0.8146, "step": 58580 }, { "epoch": 16.855581127733025, "grad_norm": 1.2234852313995361, "learning_rate": 0.0016628883774453397, "loss": 0.6028, "step": 58590 }, { "epoch": 16.858457997698505, "grad_norm": 0.9001519680023193, "learning_rate": 0.0016628308400460298, "loss": 0.7998, "step": 58600 }, { "epoch": 16.86133486766398, "grad_norm": 2.064361333847046, "learning_rate": 0.0016627733026467204, "loss": 0.8062, "step": 58610 }, { "epoch": 16.86421173762946, "grad_norm": 1.0880892276763916, "learning_rate": 0.0016627157652474107, "loss": 0.7515, "step": 58620 }, { "epoch": 16.867088607594937, "grad_norm": 0.7697743773460388, "learning_rate": 0.0016626582278481013, "loss": 0.8225, "step": 58630 }, { "epoch": 16.869965477560413, "grad_norm": 1.5824177265167236, "learning_rate": 0.0016626006904487919, "loss": 0.6963, "step": 58640 }, { "epoch": 16.872842347525893, "grad_norm": 0.8590817451477051, "learning_rate": 0.0016625431530494822, "loss": 0.8097, "step": 58650 }, { "epoch": 16.87571921749137, "grad_norm": 0.9118816256523132, "learning_rate": 0.0016624856156501726, "loss": 0.8061, "step": 58660 }, { "epoch": 16.878596087456845, "grad_norm": 1.4934183359146118, "learning_rate": 0.0016624280782508631, "loss": 0.7076, "step": 58670 }, { "epoch": 16.881472957422325, "grad_norm": 1.082436442375183, "learning_rate": 0.0016623705408515535, "loss": 0.7787, "step": 58680 }, { "epoch": 16.8843498273878, "grad_norm": 1.6111502647399902, "learning_rate": 0.001662313003452244, "loss": 0.6717, "step": 58690 }, { "epoch": 16.88722669735328, "grad_norm": 1.945615530014038, "learning_rate": 0.0016622554660529346, "loss": 0.9139, "step": 58700 }, { "epoch": 16.890103567318757, "grad_norm": 1.0471487045288086, "learning_rate": 0.001662197928653625, "loss": 0.9354, "step": 58710 }, { "epoch": 16.892980437284233, "grad_norm": 1.4514886140823364, "learning_rate": 0.0016621403912543153, "loss": 0.6983, "step": 58720 }, { "epoch": 16.895857307249713, "grad_norm": 1.4926735162734985, "learning_rate": 0.0016620828538550056, "loss": 0.9112, "step": 58730 }, { "epoch": 16.89873417721519, "grad_norm": 1.0508956909179688, "learning_rate": 0.0016620253164556962, "loss": 0.6763, "step": 58740 }, { "epoch": 16.90161104718067, "grad_norm": 0.7465677261352539, "learning_rate": 0.0016619677790563868, "loss": 0.8056, "step": 58750 }, { "epoch": 16.904487917146145, "grad_norm": 1.358984112739563, "learning_rate": 0.0016619102416570771, "loss": 0.7453, "step": 58760 }, { "epoch": 16.90736478711162, "grad_norm": 0.9273386001586914, "learning_rate": 0.0016618527042577677, "loss": 0.6848, "step": 58770 }, { "epoch": 16.9102416570771, "grad_norm": 2.7712833881378174, "learning_rate": 0.001661795166858458, "loss": 0.7345, "step": 58780 }, { "epoch": 16.913118527042577, "grad_norm": 2.1907424926757812, "learning_rate": 0.0016617376294591484, "loss": 0.9232, "step": 58790 }, { "epoch": 16.915995397008054, "grad_norm": 1.5401743650436401, "learning_rate": 0.001661680092059839, "loss": 0.9837, "step": 58800 }, { "epoch": 16.918872266973533, "grad_norm": 1.0307164192199707, "learning_rate": 0.0016616225546605295, "loss": 0.7984, "step": 58810 }, { "epoch": 16.92174913693901, "grad_norm": 1.3656468391418457, "learning_rate": 0.0016615650172612198, "loss": 0.8949, "step": 58820 }, { "epoch": 16.92462600690449, "grad_norm": 0.7887130379676819, "learning_rate": 0.0016615074798619104, "loss": 0.8023, "step": 58830 }, { "epoch": 16.927502876869966, "grad_norm": 2.00730562210083, "learning_rate": 0.0016614499424626005, "loss": 0.9749, "step": 58840 }, { "epoch": 16.930379746835442, "grad_norm": 2.0725443363189697, "learning_rate": 0.001661392405063291, "loss": 0.7031, "step": 58850 }, { "epoch": 16.93325661680092, "grad_norm": 1.4304777383804321, "learning_rate": 0.0016613348676639817, "loss": 0.7949, "step": 58860 }, { "epoch": 16.936133486766398, "grad_norm": 1.372184157371521, "learning_rate": 0.001661277330264672, "loss": 0.7538, "step": 58870 }, { "epoch": 16.939010356731877, "grad_norm": 1.6427000761032104, "learning_rate": 0.0016612197928653626, "loss": 0.7291, "step": 58880 }, { "epoch": 16.941887226697354, "grad_norm": 1.3888617753982544, "learning_rate": 0.0016611622554660531, "loss": 0.8061, "step": 58890 }, { "epoch": 16.94476409666283, "grad_norm": 1.1938763856887817, "learning_rate": 0.0016611047180667433, "loss": 0.7356, "step": 58900 }, { "epoch": 16.94764096662831, "grad_norm": 1.1444182395935059, "learning_rate": 0.0016610471806674338, "loss": 0.6842, "step": 58910 }, { "epoch": 16.950517836593786, "grad_norm": 1.4718968868255615, "learning_rate": 0.0016609896432681244, "loss": 0.8982, "step": 58920 }, { "epoch": 16.953394706559262, "grad_norm": 1.3506693840026855, "learning_rate": 0.0016609321058688147, "loss": 1.0401, "step": 58930 }, { "epoch": 16.956271576524742, "grad_norm": 1.9078072309494019, "learning_rate": 0.0016608745684695053, "loss": 0.8502, "step": 58940 }, { "epoch": 16.959148446490218, "grad_norm": 1.9737205505371094, "learning_rate": 0.0016608170310701959, "loss": 0.8787, "step": 58950 }, { "epoch": 16.962025316455698, "grad_norm": 1.06631338596344, "learning_rate": 0.001660759493670886, "loss": 0.8348, "step": 58960 }, { "epoch": 16.964902186421174, "grad_norm": 1.0894190073013306, "learning_rate": 0.0016607019562715766, "loss": 0.7047, "step": 58970 }, { "epoch": 16.96777905638665, "grad_norm": 1.7173857688903809, "learning_rate": 0.001660644418872267, "loss": 0.9052, "step": 58980 }, { "epoch": 16.97065592635213, "grad_norm": 1.5194041728973389, "learning_rate": 0.0016605868814729575, "loss": 0.8157, "step": 58990 }, { "epoch": 16.973532796317606, "grad_norm": 0.8253759741783142, "learning_rate": 0.001660529344073648, "loss": 0.8128, "step": 59000 }, { "epoch": 16.976409666283082, "grad_norm": 2.6342716217041016, "learning_rate": 0.0016604718066743382, "loss": 0.7573, "step": 59010 }, { "epoch": 16.979286536248562, "grad_norm": 1.0186941623687744, "learning_rate": 0.0016604142692750287, "loss": 0.9725, "step": 59020 }, { "epoch": 16.98216340621404, "grad_norm": 0.8209824562072754, "learning_rate": 0.0016603567318757193, "loss": 0.6978, "step": 59030 }, { "epoch": 16.985040276179518, "grad_norm": 0.6732996106147766, "learning_rate": 0.0016602991944764096, "loss": 0.6714, "step": 59040 }, { "epoch": 16.987917146144994, "grad_norm": 1.2328287363052368, "learning_rate": 0.0016602416570771002, "loss": 0.6485, "step": 59050 }, { "epoch": 16.99079401611047, "grad_norm": 1.1457864046096802, "learning_rate": 0.0016601841196777908, "loss": 0.9559, "step": 59060 }, { "epoch": 16.99367088607595, "grad_norm": 3.6446917057037354, "learning_rate": 0.001660126582278481, "loss": 0.8347, "step": 59070 }, { "epoch": 16.996547756041426, "grad_norm": 1.4033615589141846, "learning_rate": 0.0016600690448791715, "loss": 0.6947, "step": 59080 }, { "epoch": 16.999424626006906, "grad_norm": 0.8708285689353943, "learning_rate": 0.0016600115074798618, "loss": 0.6437, "step": 59090 }, { "epoch": 17.002301495972382, "grad_norm": 0.9686982035636902, "learning_rate": 0.0016599539700805524, "loss": 0.8177, "step": 59100 }, { "epoch": 17.00517836593786, "grad_norm": 1.3431872129440308, "learning_rate": 0.001659896432681243, "loss": 0.6172, "step": 59110 }, { "epoch": 17.00805523590334, "grad_norm": 0.9748004674911499, "learning_rate": 0.0016598388952819333, "loss": 0.7122, "step": 59120 }, { "epoch": 17.010932105868815, "grad_norm": 1.3286701440811157, "learning_rate": 0.0016597813578826236, "loss": 0.7016, "step": 59130 }, { "epoch": 17.01380897583429, "grad_norm": 1.7882270812988281, "learning_rate": 0.0016597238204833142, "loss": 0.5608, "step": 59140 }, { "epoch": 17.01668584579977, "grad_norm": 1.1264170408248901, "learning_rate": 0.0016596662830840046, "loss": 0.6974, "step": 59150 }, { "epoch": 17.019562715765247, "grad_norm": 1.2219669818878174, "learning_rate": 0.0016596087456846951, "loss": 0.963, "step": 59160 }, { "epoch": 17.022439585730726, "grad_norm": 1.533250093460083, "learning_rate": 0.0016595512082853857, "loss": 0.7132, "step": 59170 }, { "epoch": 17.025316455696203, "grad_norm": 1.243544578552246, "learning_rate": 0.001659493670886076, "loss": 0.5249, "step": 59180 }, { "epoch": 17.02819332566168, "grad_norm": 2.0343120098114014, "learning_rate": 0.0016594361334867664, "loss": 0.8577, "step": 59190 }, { "epoch": 17.03107019562716, "grad_norm": 1.5061551332473755, "learning_rate": 0.0016593785960874567, "loss": 0.761, "step": 59200 }, { "epoch": 17.033947065592635, "grad_norm": 1.5388115644454956, "learning_rate": 0.0016593210586881473, "loss": 0.6571, "step": 59210 }, { "epoch": 17.03682393555811, "grad_norm": 1.633588433265686, "learning_rate": 0.0016592635212888378, "loss": 0.7109, "step": 59220 }, { "epoch": 17.03970080552359, "grad_norm": 1.108887791633606, "learning_rate": 0.0016592059838895282, "loss": 0.6592, "step": 59230 }, { "epoch": 17.042577675489067, "grad_norm": 0.8239474892616272, "learning_rate": 0.0016591484464902188, "loss": 0.6724, "step": 59240 }, { "epoch": 17.045454545454547, "grad_norm": 1.2350902557373047, "learning_rate": 0.001659090909090909, "loss": 0.6568, "step": 59250 }, { "epoch": 17.048331415420023, "grad_norm": 1.0973918437957764, "learning_rate": 0.0016590333716915995, "loss": 0.5415, "step": 59260 }, { "epoch": 17.0512082853855, "grad_norm": 1.7573676109313965, "learning_rate": 0.00165897583429229, "loss": 0.8061, "step": 59270 }, { "epoch": 17.05408515535098, "grad_norm": 1.8402761220932007, "learning_rate": 0.0016589182968929806, "loss": 0.6956, "step": 59280 }, { "epoch": 17.056962025316455, "grad_norm": 3.6024270057678223, "learning_rate": 0.001658860759493671, "loss": 0.7429, "step": 59290 }, { "epoch": 17.059838895281935, "grad_norm": 1.0620169639587402, "learning_rate": 0.0016588032220943615, "loss": 0.8365, "step": 59300 }, { "epoch": 17.06271576524741, "grad_norm": 1.4280660152435303, "learning_rate": 0.0016587456846950516, "loss": 0.7486, "step": 59310 }, { "epoch": 17.065592635212887, "grad_norm": 1.4277774095535278, "learning_rate": 0.0016586881472957422, "loss": 0.7629, "step": 59320 }, { "epoch": 17.068469505178367, "grad_norm": 1.189619541168213, "learning_rate": 0.0016586306098964327, "loss": 0.6332, "step": 59330 }, { "epoch": 17.071346375143843, "grad_norm": 0.8044414520263672, "learning_rate": 0.001658573072497123, "loss": 0.5732, "step": 59340 }, { "epoch": 17.07422324510932, "grad_norm": 0.8417288064956665, "learning_rate": 0.0016585155350978137, "loss": 0.7734, "step": 59350 }, { "epoch": 17.0771001150748, "grad_norm": 1.3851962089538574, "learning_rate": 0.0016584579976985042, "loss": 0.7445, "step": 59360 }, { "epoch": 17.079976985040275, "grad_norm": 1.3334276676177979, "learning_rate": 0.0016584004602991944, "loss": 0.7386, "step": 59370 }, { "epoch": 17.082853855005755, "grad_norm": 1.5387380123138428, "learning_rate": 0.001658342922899885, "loss": 0.6895, "step": 59380 }, { "epoch": 17.08573072497123, "grad_norm": 1.4527643918991089, "learning_rate": 0.0016582853855005755, "loss": 0.7397, "step": 59390 }, { "epoch": 17.088607594936708, "grad_norm": 0.9276213645935059, "learning_rate": 0.0016582278481012658, "loss": 0.8576, "step": 59400 }, { "epoch": 17.091484464902187, "grad_norm": 1.2336112260818481, "learning_rate": 0.0016581703107019564, "loss": 0.7824, "step": 59410 }, { "epoch": 17.094361334867664, "grad_norm": 1.0561789274215698, "learning_rate": 0.0016581127733026467, "loss": 0.7144, "step": 59420 }, { "epoch": 17.097238204833143, "grad_norm": 1.2554192543029785, "learning_rate": 0.001658055235903337, "loss": 0.7218, "step": 59430 }, { "epoch": 17.10011507479862, "grad_norm": 1.7984113693237305, "learning_rate": 0.0016579976985040277, "loss": 0.8248, "step": 59440 }, { "epoch": 17.102991944764096, "grad_norm": 2.355945348739624, "learning_rate": 0.001657940161104718, "loss": 0.6072, "step": 59450 }, { "epoch": 17.105868814729575, "grad_norm": 1.0414680242538452, "learning_rate": 0.0016578826237054086, "loss": 0.7506, "step": 59460 }, { "epoch": 17.10874568469505, "grad_norm": 1.0904107093811035, "learning_rate": 0.0016578250863060991, "loss": 0.8066, "step": 59470 }, { "epoch": 17.111622554660528, "grad_norm": 1.54349684715271, "learning_rate": 0.0016577675489067895, "loss": 0.71, "step": 59480 }, { "epoch": 17.114499424626008, "grad_norm": 1.2794157266616821, "learning_rate": 0.0016577100115074798, "loss": 0.6556, "step": 59490 }, { "epoch": 17.117376294591484, "grad_norm": 2.1307613849639893, "learning_rate": 0.0016576524741081704, "loss": 0.7184, "step": 59500 }, { "epoch": 17.120253164556964, "grad_norm": 1.499680995941162, "learning_rate": 0.0016575949367088607, "loss": 0.6181, "step": 59510 }, { "epoch": 17.12313003452244, "grad_norm": 1.3347537517547607, "learning_rate": 0.0016575373993095513, "loss": 0.7014, "step": 59520 }, { "epoch": 17.126006904487916, "grad_norm": 0.7654240131378174, "learning_rate": 0.0016574798619102416, "loss": 0.8066, "step": 59530 }, { "epoch": 17.128883774453396, "grad_norm": 1.4153521060943604, "learning_rate": 0.0016574223245109322, "loss": 0.6339, "step": 59540 }, { "epoch": 17.131760644418872, "grad_norm": 0.8778973817825317, "learning_rate": 0.0016573647871116226, "loss": 0.6904, "step": 59550 }, { "epoch": 17.134637514384348, "grad_norm": 1.4527877569198608, "learning_rate": 0.001657307249712313, "loss": 0.7682, "step": 59560 }, { "epoch": 17.137514384349828, "grad_norm": 1.6142336130142212, "learning_rate": 0.0016572497123130035, "loss": 0.6953, "step": 59570 }, { "epoch": 17.140391254315304, "grad_norm": 1.9164990186691284, "learning_rate": 0.001657192174913694, "loss": 0.761, "step": 59580 }, { "epoch": 17.143268124280784, "grad_norm": 0.8610620498657227, "learning_rate": 0.0016571346375143844, "loss": 0.9057, "step": 59590 }, { "epoch": 17.14614499424626, "grad_norm": 1.3459035158157349, "learning_rate": 0.001657077100115075, "loss": 0.7138, "step": 59600 }, { "epoch": 17.149021864211736, "grad_norm": 0.8784189820289612, "learning_rate": 0.0016570195627157653, "loss": 0.8362, "step": 59610 }, { "epoch": 17.151898734177216, "grad_norm": 1.2744451761245728, "learning_rate": 0.0016569620253164556, "loss": 0.5658, "step": 59620 }, { "epoch": 17.154775604142692, "grad_norm": 1.4435423612594604, "learning_rate": 0.0016569044879171462, "loss": 0.7114, "step": 59630 }, { "epoch": 17.157652474108172, "grad_norm": 0.9257628917694092, "learning_rate": 0.0016568469505178368, "loss": 0.624, "step": 59640 }, { "epoch": 17.160529344073648, "grad_norm": 0.6801641583442688, "learning_rate": 0.0016567894131185271, "loss": 0.7075, "step": 59650 }, { "epoch": 17.163406214039124, "grad_norm": 0.7190287709236145, "learning_rate": 0.0016567318757192177, "loss": 0.8234, "step": 59660 }, { "epoch": 17.166283084004604, "grad_norm": 2.0601398944854736, "learning_rate": 0.0016566743383199078, "loss": 0.7447, "step": 59670 }, { "epoch": 17.16915995397008, "grad_norm": 1.507551908493042, "learning_rate": 0.0016566168009205984, "loss": 0.7085, "step": 59680 }, { "epoch": 17.172036823935557, "grad_norm": 0.7105184197425842, "learning_rate": 0.001656559263521289, "loss": 0.7804, "step": 59690 }, { "epoch": 17.174913693901036, "grad_norm": 3.1987791061401367, "learning_rate": 0.0016565017261219793, "loss": 0.7262, "step": 59700 }, { "epoch": 17.177790563866512, "grad_norm": 1.3217946290969849, "learning_rate": 0.0016564441887226698, "loss": 0.7237, "step": 59710 }, { "epoch": 17.180667433831992, "grad_norm": 1.0319328308105469, "learning_rate": 0.0016563866513233604, "loss": 0.7155, "step": 59720 }, { "epoch": 17.18354430379747, "grad_norm": 1.259107232093811, "learning_rate": 0.0016563291139240505, "loss": 0.6857, "step": 59730 }, { "epoch": 17.186421173762945, "grad_norm": 1.0093765258789062, "learning_rate": 0.001656271576524741, "loss": 0.7256, "step": 59740 }, { "epoch": 17.189298043728424, "grad_norm": 1.0343068838119507, "learning_rate": 0.0016562140391254317, "loss": 1.0307, "step": 59750 }, { "epoch": 17.1921749136939, "grad_norm": 1.742465853691101, "learning_rate": 0.001656156501726122, "loss": 0.7267, "step": 59760 }, { "epoch": 17.19505178365938, "grad_norm": 0.7887401580810547, "learning_rate": 0.0016560989643268126, "loss": 0.7738, "step": 59770 }, { "epoch": 17.197928653624857, "grad_norm": 1.7686930894851685, "learning_rate": 0.001656041426927503, "loss": 0.7852, "step": 59780 }, { "epoch": 17.200805523590333, "grad_norm": 1.37193763256073, "learning_rate": 0.0016559838895281933, "loss": 0.8255, "step": 59790 }, { "epoch": 17.203682393555813, "grad_norm": 0.8450044989585876, "learning_rate": 0.0016559263521288838, "loss": 0.8791, "step": 59800 }, { "epoch": 17.20655926352129, "grad_norm": 1.044519305229187, "learning_rate": 0.0016558688147295742, "loss": 0.5775, "step": 59810 }, { "epoch": 17.209436133486765, "grad_norm": 1.924437165260315, "learning_rate": 0.0016558112773302647, "loss": 0.7716, "step": 59820 }, { "epoch": 17.212313003452245, "grad_norm": 1.3889411687850952, "learning_rate": 0.0016557537399309553, "loss": 0.8307, "step": 59830 }, { "epoch": 17.21518987341772, "grad_norm": 4.137118816375732, "learning_rate": 0.0016556962025316454, "loss": 0.7734, "step": 59840 }, { "epoch": 17.2180667433832, "grad_norm": 1.4445246458053589, "learning_rate": 0.001655638665132336, "loss": 0.5962, "step": 59850 }, { "epoch": 17.220943613348677, "grad_norm": 1.2160651683807373, "learning_rate": 0.0016555811277330266, "loss": 0.7291, "step": 59860 }, { "epoch": 17.223820483314153, "grad_norm": 0.9587355256080627, "learning_rate": 0.001655523590333717, "loss": 0.6762, "step": 59870 }, { "epoch": 17.226697353279633, "grad_norm": 1.534591555595398, "learning_rate": 0.0016554660529344075, "loss": 0.723, "step": 59880 }, { "epoch": 17.22957422324511, "grad_norm": 1.0629353523254395, "learning_rate": 0.0016554085155350978, "loss": 0.6625, "step": 59890 }, { "epoch": 17.232451093210585, "grad_norm": 3.0245065689086914, "learning_rate": 0.0016553509781357882, "loss": 0.8705, "step": 59900 }, { "epoch": 17.235327963176065, "grad_norm": 1.4023977518081665, "learning_rate": 0.0016552934407364787, "loss": 0.6772, "step": 59910 }, { "epoch": 17.23820483314154, "grad_norm": 1.5387015342712402, "learning_rate": 0.001655235903337169, "loss": 0.7335, "step": 59920 }, { "epoch": 17.24108170310702, "grad_norm": 1.1071480512619019, "learning_rate": 0.0016551783659378596, "loss": 0.5996, "step": 59930 }, { "epoch": 17.243958573072497, "grad_norm": 0.9969618916511536, "learning_rate": 0.0016551208285385502, "loss": 0.7047, "step": 59940 }, { "epoch": 17.246835443037973, "grad_norm": 0.5155742168426514, "learning_rate": 0.0016550632911392406, "loss": 0.828, "step": 59950 }, { "epoch": 17.249712313003453, "grad_norm": 0.9685875773429871, "learning_rate": 0.001655005753739931, "loss": 0.7652, "step": 59960 }, { "epoch": 17.25258918296893, "grad_norm": 1.4057542085647583, "learning_rate": 0.0016549482163406215, "loss": 0.7405, "step": 59970 }, { "epoch": 17.25546605293441, "grad_norm": 1.7570215463638306, "learning_rate": 0.0016548906789413118, "loss": 0.6584, "step": 59980 }, { "epoch": 17.258342922899885, "grad_norm": 1.741615653038025, "learning_rate": 0.0016548331415420024, "loss": 0.6735, "step": 59990 }, { "epoch": 17.26121979286536, "grad_norm": 1.5686028003692627, "learning_rate": 0.0016547756041426927, "loss": 0.7642, "step": 60000 }, { "epoch": 17.26409666283084, "grad_norm": 0.7758271098136902, "learning_rate": 0.0016547180667433833, "loss": 0.6188, "step": 60010 }, { "epoch": 17.266973532796317, "grad_norm": 0.8758584260940552, "learning_rate": 0.0016546605293440736, "loss": 0.6203, "step": 60020 }, { "epoch": 17.269850402761794, "grad_norm": 1.145421028137207, "learning_rate": 0.001654602991944764, "loss": 0.7881, "step": 60030 }, { "epoch": 17.272727272727273, "grad_norm": 1.651932716369629, "learning_rate": 0.0016545454545454545, "loss": 0.701, "step": 60040 }, { "epoch": 17.27560414269275, "grad_norm": 0.8021969795227051, "learning_rate": 0.0016544879171461451, "loss": 0.6994, "step": 60050 }, { "epoch": 17.27848101265823, "grad_norm": 1.8284653425216675, "learning_rate": 0.0016544303797468355, "loss": 0.7828, "step": 60060 }, { "epoch": 17.281357882623706, "grad_norm": 0.6942156553268433, "learning_rate": 0.001654372842347526, "loss": 0.7303, "step": 60070 }, { "epoch": 17.28423475258918, "grad_norm": 1.6723213195800781, "learning_rate": 0.0016543153049482164, "loss": 0.6158, "step": 60080 }, { "epoch": 17.28711162255466, "grad_norm": 1.354958415031433, "learning_rate": 0.0016542577675489067, "loss": 0.5813, "step": 60090 }, { "epoch": 17.289988492520138, "grad_norm": 1.0955702066421509, "learning_rate": 0.0016542002301495973, "loss": 0.5988, "step": 60100 }, { "epoch": 17.292865362485614, "grad_norm": 1.6951920986175537, "learning_rate": 0.0016541426927502876, "loss": 0.8545, "step": 60110 }, { "epoch": 17.295742232451094, "grad_norm": 1.5124938488006592, "learning_rate": 0.0016540851553509782, "loss": 0.6056, "step": 60120 }, { "epoch": 17.29861910241657, "grad_norm": 1.144546389579773, "learning_rate": 0.0016540276179516688, "loss": 0.8039, "step": 60130 }, { "epoch": 17.30149597238205, "grad_norm": 1.1788380146026611, "learning_rate": 0.0016539700805523589, "loss": 0.7653, "step": 60140 }, { "epoch": 17.304372842347526, "grad_norm": 1.1643794775009155, "learning_rate": 0.0016539125431530495, "loss": 0.7144, "step": 60150 }, { "epoch": 17.307249712313002, "grad_norm": 1.032637596130371, "learning_rate": 0.00165385500575374, "loss": 0.7346, "step": 60160 }, { "epoch": 17.310126582278482, "grad_norm": 1.0088696479797363, "learning_rate": 0.0016537974683544304, "loss": 0.7217, "step": 60170 }, { "epoch": 17.313003452243958, "grad_norm": 1.1986076831817627, "learning_rate": 0.001653739930955121, "loss": 0.7381, "step": 60180 }, { "epoch": 17.315880322209438, "grad_norm": 1.2390531301498413, "learning_rate": 0.0016536823935558115, "loss": 0.6906, "step": 60190 }, { "epoch": 17.318757192174914, "grad_norm": 1.0683870315551758, "learning_rate": 0.0016536248561565016, "loss": 0.9088, "step": 60200 }, { "epoch": 17.32163406214039, "grad_norm": 1.358199119567871, "learning_rate": 0.0016535673187571922, "loss": 0.7998, "step": 60210 }, { "epoch": 17.32451093210587, "grad_norm": 1.6467041969299316, "learning_rate": 0.0016535097813578825, "loss": 0.9303, "step": 60220 }, { "epoch": 17.327387802071346, "grad_norm": 1.3493940830230713, "learning_rate": 0.001653452243958573, "loss": 0.7734, "step": 60230 }, { "epoch": 17.330264672036822, "grad_norm": 0.8477672338485718, "learning_rate": 0.0016533947065592637, "loss": 0.7834, "step": 60240 }, { "epoch": 17.333141542002302, "grad_norm": 0.7738592624664307, "learning_rate": 0.001653337169159954, "loss": 0.6749, "step": 60250 }, { "epoch": 17.33601841196778, "grad_norm": 1.0950689315795898, "learning_rate": 0.0016532796317606444, "loss": 0.6296, "step": 60260 }, { "epoch": 17.338895281933258, "grad_norm": 1.3064600229263306, "learning_rate": 0.001653222094361335, "loss": 0.8547, "step": 60270 }, { "epoch": 17.341772151898734, "grad_norm": 1.1739875078201294, "learning_rate": 0.0016531645569620253, "loss": 0.6059, "step": 60280 }, { "epoch": 17.34464902186421, "grad_norm": 1.3352633714675903, "learning_rate": 0.0016531070195627158, "loss": 0.7029, "step": 60290 }, { "epoch": 17.34752589182969, "grad_norm": 1.0582956075668335, "learning_rate": 0.0016530494821634064, "loss": 0.6667, "step": 60300 }, { "epoch": 17.350402761795166, "grad_norm": 1.1573442220687866, "learning_rate": 0.0016529919447640967, "loss": 0.8687, "step": 60310 }, { "epoch": 17.353279631760646, "grad_norm": 1.128717303276062, "learning_rate": 0.001652934407364787, "loss": 0.781, "step": 60320 }, { "epoch": 17.356156501726122, "grad_norm": 4.465848445892334, "learning_rate": 0.0016528768699654776, "loss": 0.9407, "step": 60330 }, { "epoch": 17.3590333716916, "grad_norm": 1.1976457834243774, "learning_rate": 0.001652819332566168, "loss": 0.8785, "step": 60340 }, { "epoch": 17.36191024165708, "grad_norm": 1.1707404851913452, "learning_rate": 0.0016527617951668586, "loss": 0.7503, "step": 60350 }, { "epoch": 17.364787111622555, "grad_norm": 1.1824913024902344, "learning_rate": 0.001652704257767549, "loss": 0.7294, "step": 60360 }, { "epoch": 17.36766398158803, "grad_norm": 2.8289995193481445, "learning_rate": 0.0016526467203682395, "loss": 0.748, "step": 60370 }, { "epoch": 17.37054085155351, "grad_norm": 1.5718432664871216, "learning_rate": 0.0016525891829689298, "loss": 0.7427, "step": 60380 }, { "epoch": 17.373417721518987, "grad_norm": 0.7998115420341492, "learning_rate": 0.0016525316455696202, "loss": 0.5805, "step": 60390 }, { "epoch": 17.376294591484466, "grad_norm": 1.601285696029663, "learning_rate": 0.0016524741081703107, "loss": 0.7057, "step": 60400 }, { "epoch": 17.379171461449943, "grad_norm": 1.1376286745071411, "learning_rate": 0.0016524165707710013, "loss": 0.6836, "step": 60410 }, { "epoch": 17.38204833141542, "grad_norm": 1.0254061222076416, "learning_rate": 0.0016523590333716916, "loss": 0.8001, "step": 60420 }, { "epoch": 17.3849252013809, "grad_norm": 1.4930393695831299, "learning_rate": 0.0016523014959723822, "loss": 0.7902, "step": 60430 }, { "epoch": 17.387802071346375, "grad_norm": 1.0859768390655518, "learning_rate": 0.0016522439585730726, "loss": 0.8397, "step": 60440 }, { "epoch": 17.39067894131185, "grad_norm": 1.176641821861267, "learning_rate": 0.001652186421173763, "loss": 0.6408, "step": 60450 }, { "epoch": 17.39355581127733, "grad_norm": 1.7644375562667847, "learning_rate": 0.0016521288837744535, "loss": 0.9667, "step": 60460 }, { "epoch": 17.396432681242807, "grad_norm": 1.2195185422897339, "learning_rate": 0.0016520713463751438, "loss": 0.675, "step": 60470 }, { "epoch": 17.399309551208287, "grad_norm": 1.970190167427063, "learning_rate": 0.0016520138089758344, "loss": 0.9081, "step": 60480 }, { "epoch": 17.402186421173763, "grad_norm": 1.8973708152770996, "learning_rate": 0.001651956271576525, "loss": 0.8265, "step": 60490 }, { "epoch": 17.40506329113924, "grad_norm": 2.2986934185028076, "learning_rate": 0.001651898734177215, "loss": 0.7908, "step": 60500 }, { "epoch": 17.40794016110472, "grad_norm": 1.1508116722106934, "learning_rate": 0.0016518411967779056, "loss": 0.8005, "step": 60510 }, { "epoch": 17.410817031070195, "grad_norm": 0.6037564277648926, "learning_rate": 0.0016517836593785962, "loss": 0.7274, "step": 60520 }, { "epoch": 17.413693901035675, "grad_norm": 1.5268245935440063, "learning_rate": 0.0016517261219792865, "loss": 0.8108, "step": 60530 }, { "epoch": 17.41657077100115, "grad_norm": 1.4025167226791382, "learning_rate": 0.001651668584579977, "loss": 0.8572, "step": 60540 }, { "epoch": 17.419447640966627, "grad_norm": 1.5326683521270752, "learning_rate": 0.0016516110471806677, "loss": 0.6362, "step": 60550 }, { "epoch": 17.422324510932107, "grad_norm": 1.4211088418960571, "learning_rate": 0.0016515535097813578, "loss": 0.6428, "step": 60560 }, { "epoch": 17.425201380897583, "grad_norm": 1.8697000741958618, "learning_rate": 0.0016514959723820484, "loss": 0.6389, "step": 60570 }, { "epoch": 17.42807825086306, "grad_norm": 0.7962353825569153, "learning_rate": 0.0016514384349827387, "loss": 0.7344, "step": 60580 }, { "epoch": 17.43095512082854, "grad_norm": 1.2120805978775024, "learning_rate": 0.0016513808975834293, "loss": 0.9198, "step": 60590 }, { "epoch": 17.433831990794015, "grad_norm": 1.75242018699646, "learning_rate": 0.0016513233601841198, "loss": 0.7064, "step": 60600 }, { "epoch": 17.436708860759495, "grad_norm": 1.9889910221099854, "learning_rate": 0.0016512658227848102, "loss": 0.9906, "step": 60610 }, { "epoch": 17.43958573072497, "grad_norm": 1.170061707496643, "learning_rate": 0.0016512082853855005, "loss": 0.6087, "step": 60620 }, { "epoch": 17.442462600690448, "grad_norm": 1.1340813636779785, "learning_rate": 0.001651150747986191, "loss": 0.6598, "step": 60630 }, { "epoch": 17.445339470655927, "grad_norm": 1.0872076749801636, "learning_rate": 0.0016510932105868814, "loss": 0.6703, "step": 60640 }, { "epoch": 17.448216340621403, "grad_norm": 1.5503253936767578, "learning_rate": 0.001651035673187572, "loss": 0.5916, "step": 60650 }, { "epoch": 17.451093210586883, "grad_norm": 1.6067299842834473, "learning_rate": 0.0016509781357882626, "loss": 0.8217, "step": 60660 }, { "epoch": 17.45397008055236, "grad_norm": 1.5675742626190186, "learning_rate": 0.0016509205983889527, "loss": 0.8029, "step": 60670 }, { "epoch": 17.456846950517836, "grad_norm": 1.1585841178894043, "learning_rate": 0.0016508630609896433, "loss": 0.6882, "step": 60680 }, { "epoch": 17.459723820483315, "grad_norm": 1.6682418584823608, "learning_rate": 0.0016508055235903336, "loss": 0.9129, "step": 60690 }, { "epoch": 17.46260069044879, "grad_norm": 1.4490476846694946, "learning_rate": 0.0016507479861910242, "loss": 0.7866, "step": 60700 }, { "epoch": 17.465477560414268, "grad_norm": 1.3209202289581299, "learning_rate": 0.0016506904487917147, "loss": 0.7969, "step": 60710 }, { "epoch": 17.468354430379748, "grad_norm": 0.6295074224472046, "learning_rate": 0.001650632911392405, "loss": 0.7949, "step": 60720 }, { "epoch": 17.471231300345224, "grad_norm": 2.305023193359375, "learning_rate": 0.0016505753739930954, "loss": 0.7686, "step": 60730 }, { "epoch": 17.474108170310704, "grad_norm": 1.5327619314193726, "learning_rate": 0.001650517836593786, "loss": 0.8029, "step": 60740 }, { "epoch": 17.47698504027618, "grad_norm": 0.7712783813476562, "learning_rate": 0.0016504602991944763, "loss": 0.7822, "step": 60750 }, { "epoch": 17.479861910241656, "grad_norm": 1.3488543033599854, "learning_rate": 0.001650402761795167, "loss": 0.6047, "step": 60760 }, { "epoch": 17.482738780207136, "grad_norm": 0.84311842918396, "learning_rate": 0.0016503452243958575, "loss": 0.8139, "step": 60770 }, { "epoch": 17.485615650172612, "grad_norm": 0.9786946177482605, "learning_rate": 0.0016502876869965478, "loss": 0.7192, "step": 60780 }, { "epoch": 17.488492520138088, "grad_norm": 1.9279555082321167, "learning_rate": 0.0016502301495972382, "loss": 0.8388, "step": 60790 }, { "epoch": 17.491369390103568, "grad_norm": 1.0935732126235962, "learning_rate": 0.0016501726121979285, "loss": 0.6964, "step": 60800 }, { "epoch": 17.494246260069044, "grad_norm": 1.018598198890686, "learning_rate": 0.001650115074798619, "loss": 0.6858, "step": 60810 }, { "epoch": 17.497123130034524, "grad_norm": 1.3241970539093018, "learning_rate": 0.0016500575373993096, "loss": 0.6848, "step": 60820 }, { "epoch": 17.5, "grad_norm": 2.4793355464935303, "learning_rate": 0.00165, "loss": 0.7952, "step": 60830 }, { "epoch": 17.502876869965476, "grad_norm": 1.4433820247650146, "learning_rate": 0.0016499424626006906, "loss": 0.8986, "step": 60840 }, { "epoch": 17.505753739930956, "grad_norm": 1.841988205909729, "learning_rate": 0.001649884925201381, "loss": 0.6826, "step": 60850 }, { "epoch": 17.508630609896432, "grad_norm": 0.6317101120948792, "learning_rate": 0.0016498273878020712, "loss": 0.7031, "step": 60860 }, { "epoch": 17.511507479861912, "grad_norm": 0.8296363949775696, "learning_rate": 0.0016497698504027618, "loss": 0.9707, "step": 60870 }, { "epoch": 17.514384349827388, "grad_norm": 1.143373966217041, "learning_rate": 0.0016497123130034524, "loss": 0.7471, "step": 60880 }, { "epoch": 17.517261219792864, "grad_norm": 1.0517195463180542, "learning_rate": 0.0016496547756041427, "loss": 0.6544, "step": 60890 }, { "epoch": 17.520138089758344, "grad_norm": 1.2931607961654663, "learning_rate": 0.0016495972382048333, "loss": 0.6398, "step": 60900 }, { "epoch": 17.52301495972382, "grad_norm": 0.9061785340309143, "learning_rate": 0.0016495397008055236, "loss": 0.7047, "step": 60910 }, { "epoch": 17.525891829689296, "grad_norm": 0.8698462247848511, "learning_rate": 0.001649482163406214, "loss": 0.7363, "step": 60920 }, { "epoch": 17.528768699654776, "grad_norm": 1.3193267583847046, "learning_rate": 0.0016494246260069045, "loss": 0.9212, "step": 60930 }, { "epoch": 17.531645569620252, "grad_norm": 1.0143165588378906, "learning_rate": 0.001649367088607595, "loss": 0.6814, "step": 60940 }, { "epoch": 17.534522439585732, "grad_norm": 2.48783016204834, "learning_rate": 0.0016493095512082855, "loss": 0.7019, "step": 60950 }, { "epoch": 17.53739930955121, "grad_norm": 1.2614928483963013, "learning_rate": 0.001649252013808976, "loss": 0.9975, "step": 60960 }, { "epoch": 17.540276179516685, "grad_norm": 0.9593960046768188, "learning_rate": 0.0016491944764096662, "loss": 0.6473, "step": 60970 }, { "epoch": 17.543153049482164, "grad_norm": 1.0932130813598633, "learning_rate": 0.0016491369390103567, "loss": 0.7323, "step": 60980 }, { "epoch": 17.54602991944764, "grad_norm": 1.0743132829666138, "learning_rate": 0.0016490794016110473, "loss": 0.7177, "step": 60990 }, { "epoch": 17.548906789413117, "grad_norm": 0.7799952626228333, "learning_rate": 0.0016490218642117376, "loss": 0.636, "step": 61000 }, { "epoch": 17.551783659378597, "grad_norm": 1.2977216243743896, "learning_rate": 0.0016489643268124282, "loss": 0.8308, "step": 61010 }, { "epoch": 17.554660529344073, "grad_norm": 1.7039684057235718, "learning_rate": 0.0016489067894131188, "loss": 0.7338, "step": 61020 }, { "epoch": 17.557537399309552, "grad_norm": 1.399594783782959, "learning_rate": 0.0016488492520138089, "loss": 0.8712, "step": 61030 }, { "epoch": 17.56041426927503, "grad_norm": 1.6073615550994873, "learning_rate": 0.0016487917146144994, "loss": 0.7336, "step": 61040 }, { "epoch": 17.563291139240505, "grad_norm": 3.94059681892395, "learning_rate": 0.0016487341772151898, "loss": 0.8436, "step": 61050 }, { "epoch": 17.566168009205985, "grad_norm": 1.630576729774475, "learning_rate": 0.0016486766398158804, "loss": 1.0217, "step": 61060 }, { "epoch": 17.56904487917146, "grad_norm": 1.234158992767334, "learning_rate": 0.001648619102416571, "loss": 0.7093, "step": 61070 }, { "epoch": 17.57192174913694, "grad_norm": 1.8975822925567627, "learning_rate": 0.0016485615650172613, "loss": 0.7793, "step": 61080 }, { "epoch": 17.574798619102417, "grad_norm": 2.2391715049743652, "learning_rate": 0.0016485040276179516, "loss": 0.8672, "step": 61090 }, { "epoch": 17.577675489067893, "grad_norm": 1.7534018754959106, "learning_rate": 0.0016484464902186422, "loss": 0.8822, "step": 61100 }, { "epoch": 17.580552359033373, "grad_norm": 0.8755640983581543, "learning_rate": 0.0016483889528193325, "loss": 0.7866, "step": 61110 }, { "epoch": 17.58342922899885, "grad_norm": 1.4691890478134155, "learning_rate": 0.001648331415420023, "loss": 0.6502, "step": 61120 }, { "epoch": 17.586306098964325, "grad_norm": 2.2809064388275146, "learning_rate": 0.0016482738780207137, "loss": 0.8948, "step": 61130 }, { "epoch": 17.589182968929805, "grad_norm": 1.5494862794876099, "learning_rate": 0.001648216340621404, "loss": 0.6193, "step": 61140 }, { "epoch": 17.59205983889528, "grad_norm": 1.1962769031524658, "learning_rate": 0.0016481588032220944, "loss": 0.724, "step": 61150 }, { "epoch": 17.59493670886076, "grad_norm": 1.011070966720581, "learning_rate": 0.0016481012658227847, "loss": 0.8305, "step": 61160 }, { "epoch": 17.597813578826237, "grad_norm": 1.4352582693099976, "learning_rate": 0.0016480437284234753, "loss": 0.6827, "step": 61170 }, { "epoch": 17.600690448791713, "grad_norm": 4.25236177444458, "learning_rate": 0.0016479861910241658, "loss": 0.8214, "step": 61180 }, { "epoch": 17.603567318757193, "grad_norm": 1.14589524269104, "learning_rate": 0.0016479286536248562, "loss": 0.8732, "step": 61190 }, { "epoch": 17.60644418872267, "grad_norm": 0.7356566786766052, "learning_rate": 0.0016478711162255467, "loss": 0.658, "step": 61200 }, { "epoch": 17.60932105868815, "grad_norm": 2.066740036010742, "learning_rate": 0.001647813578826237, "loss": 0.7356, "step": 61210 }, { "epoch": 17.612197928653625, "grad_norm": 1.5948162078857422, "learning_rate": 0.0016477560414269274, "loss": 0.7107, "step": 61220 }, { "epoch": 17.6150747986191, "grad_norm": 1.2624098062515259, "learning_rate": 0.001647698504027618, "loss": 0.8791, "step": 61230 }, { "epoch": 17.61795166858458, "grad_norm": 1.3647799491882324, "learning_rate": 0.0016476409666283086, "loss": 0.6873, "step": 61240 }, { "epoch": 17.620828538550057, "grad_norm": 0.7776311635971069, "learning_rate": 0.001647583429228999, "loss": 0.5733, "step": 61250 }, { "epoch": 17.623705408515534, "grad_norm": 0.868355393409729, "learning_rate": 0.0016475258918296895, "loss": 0.9045, "step": 61260 }, { "epoch": 17.626582278481013, "grad_norm": 2.1578314304351807, "learning_rate": 0.0016474683544303796, "loss": 0.768, "step": 61270 }, { "epoch": 17.62945914844649, "grad_norm": 1.358891248703003, "learning_rate": 0.0016474108170310702, "loss": 0.6969, "step": 61280 }, { "epoch": 17.63233601841197, "grad_norm": 1.2732083797454834, "learning_rate": 0.0016473532796317607, "loss": 0.7178, "step": 61290 }, { "epoch": 17.635212888377445, "grad_norm": 1.357895851135254, "learning_rate": 0.001647295742232451, "loss": 0.738, "step": 61300 }, { "epoch": 17.63808975834292, "grad_norm": 2.112776756286621, "learning_rate": 0.0016472382048331416, "loss": 0.7209, "step": 61310 }, { "epoch": 17.6409666283084, "grad_norm": 0.980426013469696, "learning_rate": 0.0016471806674338322, "loss": 0.7397, "step": 61320 }, { "epoch": 17.643843498273878, "grad_norm": 1.3796170949935913, "learning_rate": 0.0016471231300345223, "loss": 0.6961, "step": 61330 }, { "epoch": 17.646720368239357, "grad_norm": 1.2337285280227661, "learning_rate": 0.001647065592635213, "loss": 0.8135, "step": 61340 }, { "epoch": 17.649597238204834, "grad_norm": 1.2544540166854858, "learning_rate": 0.0016470080552359035, "loss": 0.5949, "step": 61350 }, { "epoch": 17.65247410817031, "grad_norm": 1.107194185256958, "learning_rate": 0.0016469505178365938, "loss": 0.7043, "step": 61360 }, { "epoch": 17.65535097813579, "grad_norm": 1.1205874681472778, "learning_rate": 0.0016468929804372844, "loss": 0.6784, "step": 61370 }, { "epoch": 17.658227848101266, "grad_norm": 0.9493898749351501, "learning_rate": 0.0016468354430379747, "loss": 0.9415, "step": 61380 }, { "epoch": 17.661104718066742, "grad_norm": 1.5723776817321777, "learning_rate": 0.001646777905638665, "loss": 0.9939, "step": 61390 }, { "epoch": 17.66398158803222, "grad_norm": 1.67062246799469, "learning_rate": 0.0016467203682393556, "loss": 0.7378, "step": 61400 }, { "epoch": 17.666858457997698, "grad_norm": 0.8651061654090881, "learning_rate": 0.001646662830840046, "loss": 0.7746, "step": 61410 }, { "epoch": 17.669735327963178, "grad_norm": 0.9910094738006592, "learning_rate": 0.0016466052934407365, "loss": 0.73, "step": 61420 }, { "epoch": 17.672612197928654, "grad_norm": 1.3768404722213745, "learning_rate": 0.001646547756041427, "loss": 1.0346, "step": 61430 }, { "epoch": 17.67548906789413, "grad_norm": 2.7488155364990234, "learning_rate": 0.0016464902186421175, "loss": 1.0017, "step": 61440 }, { "epoch": 17.67836593785961, "grad_norm": 1.0385479927062988, "learning_rate": 0.0016464326812428078, "loss": 0.7729, "step": 61450 }, { "epoch": 17.681242807825086, "grad_norm": 1.1582417488098145, "learning_rate": 0.0016463751438434984, "loss": 0.7528, "step": 61460 }, { "epoch": 17.684119677790562, "grad_norm": 0.9767696261405945, "learning_rate": 0.0016463176064441887, "loss": 0.7354, "step": 61470 }, { "epoch": 17.686996547756042, "grad_norm": 2.2510921955108643, "learning_rate": 0.0016462600690448793, "loss": 0.6975, "step": 61480 }, { "epoch": 17.689873417721518, "grad_norm": 1.1501257419586182, "learning_rate": 0.0016462025316455696, "loss": 0.7712, "step": 61490 }, { "epoch": 17.692750287686998, "grad_norm": 1.340937852859497, "learning_rate": 0.00164614499424626, "loss": 0.7824, "step": 61500 }, { "epoch": 17.695627157652474, "grad_norm": 1.1377670764923096, "learning_rate": 0.0016460874568469505, "loss": 0.7573, "step": 61510 }, { "epoch": 17.69850402761795, "grad_norm": 1.0595632791519165, "learning_rate": 0.0016460299194476409, "loss": 0.7348, "step": 61520 }, { "epoch": 17.70138089758343, "grad_norm": 1.588478684425354, "learning_rate": 0.0016459723820483314, "loss": 0.7564, "step": 61530 }, { "epoch": 17.704257767548906, "grad_norm": 3.0204861164093018, "learning_rate": 0.001645914844649022, "loss": 0.8485, "step": 61540 }, { "epoch": 17.707134637514386, "grad_norm": 1.1761393547058105, "learning_rate": 0.0016458573072497124, "loss": 0.7933, "step": 61550 }, { "epoch": 17.710011507479862, "grad_norm": 1.4884743690490723, "learning_rate": 0.0016457997698504027, "loss": 0.7188, "step": 61560 }, { "epoch": 17.71288837744534, "grad_norm": 1.388800859451294, "learning_rate": 0.0016457422324510933, "loss": 0.7732, "step": 61570 }, { "epoch": 17.71576524741082, "grad_norm": 2.1915183067321777, "learning_rate": 0.0016456846950517836, "loss": 0.9756, "step": 61580 }, { "epoch": 17.718642117376294, "grad_norm": 1.6176241636276245, "learning_rate": 0.0016456271576524742, "loss": 0.5943, "step": 61590 }, { "epoch": 17.72151898734177, "grad_norm": 1.1410760879516602, "learning_rate": 0.0016455696202531647, "loss": 0.7719, "step": 61600 }, { "epoch": 17.72439585730725, "grad_norm": 1.3691085577011108, "learning_rate": 0.001645512082853855, "loss": 0.7219, "step": 61610 }, { "epoch": 17.727272727272727, "grad_norm": 1.4033229351043701, "learning_rate": 0.0016454545454545454, "loss": 0.8311, "step": 61620 }, { "epoch": 17.730149597238206, "grad_norm": 2.8459057807922363, "learning_rate": 0.0016453970080552358, "loss": 0.7552, "step": 61630 }, { "epoch": 17.733026467203683, "grad_norm": 1.7783374786376953, "learning_rate": 0.0016453394706559263, "loss": 0.6497, "step": 61640 }, { "epoch": 17.73590333716916, "grad_norm": 1.2539740800857544, "learning_rate": 0.001645281933256617, "loss": 0.6825, "step": 61650 }, { "epoch": 17.73878020713464, "grad_norm": 1.281125783920288, "learning_rate": 0.0016452243958573073, "loss": 0.6865, "step": 61660 }, { "epoch": 17.741657077100115, "grad_norm": 1.4566370248794556, "learning_rate": 0.0016451668584579978, "loss": 0.8566, "step": 61670 }, { "epoch": 17.74453394706559, "grad_norm": 1.7518640756607056, "learning_rate": 0.0016451093210586882, "loss": 0.8322, "step": 61680 }, { "epoch": 17.74741081703107, "grad_norm": 1.9669315814971924, "learning_rate": 0.0016450517836593785, "loss": 0.912, "step": 61690 }, { "epoch": 17.750287686996547, "grad_norm": 0.987561047077179, "learning_rate": 0.001644994246260069, "loss": 0.7819, "step": 61700 }, { "epoch": 17.753164556962027, "grad_norm": 0.9519643187522888, "learning_rate": 0.0016449367088607596, "loss": 0.8122, "step": 61710 }, { "epoch": 17.756041426927503, "grad_norm": 0.8843225836753845, "learning_rate": 0.00164487917146145, "loss": 0.6189, "step": 61720 }, { "epoch": 17.75891829689298, "grad_norm": 0.806267499923706, "learning_rate": 0.0016448216340621406, "loss": 0.8162, "step": 61730 }, { "epoch": 17.76179516685846, "grad_norm": 0.8835660219192505, "learning_rate": 0.0016447640966628307, "loss": 0.6921, "step": 61740 }, { "epoch": 17.764672036823935, "grad_norm": 0.7322413921356201, "learning_rate": 0.0016447065592635212, "loss": 0.9104, "step": 61750 }, { "epoch": 17.767548906789415, "grad_norm": 1.3146289587020874, "learning_rate": 0.0016446490218642118, "loss": 0.8252, "step": 61760 }, { "epoch": 17.77042577675489, "grad_norm": 1.9408349990844727, "learning_rate": 0.0016445914844649022, "loss": 0.7036, "step": 61770 }, { "epoch": 17.773302646720367, "grad_norm": 1.332795262336731, "learning_rate": 0.0016445339470655927, "loss": 0.6715, "step": 61780 }, { "epoch": 17.776179516685847, "grad_norm": 1.2267316579818726, "learning_rate": 0.0016444764096662833, "loss": 0.7998, "step": 61790 }, { "epoch": 17.779056386651323, "grad_norm": 1.175176739692688, "learning_rate": 0.0016444188722669734, "loss": 0.7139, "step": 61800 }, { "epoch": 17.7819332566168, "grad_norm": 1.3185051679611206, "learning_rate": 0.001644361334867664, "loss": 0.8413, "step": 61810 }, { "epoch": 17.78481012658228, "grad_norm": 0.7105361819267273, "learning_rate": 0.0016443037974683545, "loss": 0.8605, "step": 61820 }, { "epoch": 17.787686996547755, "grad_norm": 0.8268961906433105, "learning_rate": 0.0016442462600690449, "loss": 0.6443, "step": 61830 }, { "epoch": 17.790563866513235, "grad_norm": 1.2255083322525024, "learning_rate": 0.0016441887226697355, "loss": 0.7666, "step": 61840 }, { "epoch": 17.79344073647871, "grad_norm": 1.276674747467041, "learning_rate": 0.0016441311852704258, "loss": 0.6987, "step": 61850 }, { "epoch": 17.796317606444187, "grad_norm": 1.1796811819076538, "learning_rate": 0.0016440736478711161, "loss": 0.6829, "step": 61860 }, { "epoch": 17.799194476409667, "grad_norm": 1.4561799764633179, "learning_rate": 0.0016440161104718067, "loss": 0.7518, "step": 61870 }, { "epoch": 17.802071346375143, "grad_norm": 3.1262893676757812, "learning_rate": 0.001643958573072497, "loss": 0.894, "step": 61880 }, { "epoch": 17.80494821634062, "grad_norm": 0.9549388885498047, "learning_rate": 0.0016439010356731876, "loss": 0.6106, "step": 61890 }, { "epoch": 17.8078250863061, "grad_norm": 0.6549584269523621, "learning_rate": 0.0016438434982738782, "loss": 0.7425, "step": 61900 }, { "epoch": 17.810701956271576, "grad_norm": 1.382201075553894, "learning_rate": 0.0016437859608745685, "loss": 0.7337, "step": 61910 }, { "epoch": 17.813578826237055, "grad_norm": 1.6169878244400024, "learning_rate": 0.0016437284234752589, "loss": 0.6284, "step": 61920 }, { "epoch": 17.81645569620253, "grad_norm": 2.219252824783325, "learning_rate": 0.0016436708860759494, "loss": 0.8412, "step": 61930 }, { "epoch": 17.819332566168008, "grad_norm": 2.024982213973999, "learning_rate": 0.0016436133486766398, "loss": 1.0689, "step": 61940 }, { "epoch": 17.822209436133488, "grad_norm": 0.9535984396934509, "learning_rate": 0.0016435558112773304, "loss": 0.8377, "step": 61950 }, { "epoch": 17.825086306098964, "grad_norm": 1.8746533393859863, "learning_rate": 0.0016434982738780207, "loss": 0.783, "step": 61960 }, { "epoch": 17.827963176064443, "grad_norm": 1.1060429811477661, "learning_rate": 0.0016434407364787113, "loss": 0.6296, "step": 61970 }, { "epoch": 17.83084004602992, "grad_norm": 0.920870840549469, "learning_rate": 0.0016433831990794016, "loss": 0.7223, "step": 61980 }, { "epoch": 17.833716915995396, "grad_norm": 1.1279377937316895, "learning_rate": 0.001643325661680092, "loss": 0.8778, "step": 61990 }, { "epoch": 17.836593785960876, "grad_norm": 1.301205039024353, "learning_rate": 0.0016432681242807825, "loss": 0.6733, "step": 62000 }, { "epoch": 17.839470655926352, "grad_norm": 1.6751097440719604, "learning_rate": 0.001643210586881473, "loss": 0.7357, "step": 62010 }, { "epoch": 17.842347525891828, "grad_norm": 1.9840209484100342, "learning_rate": 0.0016431530494821634, "loss": 0.7936, "step": 62020 }, { "epoch": 17.845224395857308, "grad_norm": 1.2424448728561401, "learning_rate": 0.001643095512082854, "loss": 0.7126, "step": 62030 }, { "epoch": 17.848101265822784, "grad_norm": 1.3106849193572998, "learning_rate": 0.0016430379746835443, "loss": 0.7533, "step": 62040 }, { "epoch": 17.850978135788264, "grad_norm": 1.485846757888794, "learning_rate": 0.0016429804372842347, "loss": 0.7734, "step": 62050 }, { "epoch": 17.85385500575374, "grad_norm": 0.8792023658752441, "learning_rate": 0.0016429228998849253, "loss": 0.7573, "step": 62060 }, { "epoch": 17.856731875719216, "grad_norm": 1.0483962297439575, "learning_rate": 0.0016428653624856156, "loss": 0.9279, "step": 62070 }, { "epoch": 17.859608745684696, "grad_norm": 0.9139774441719055, "learning_rate": 0.0016428078250863062, "loss": 0.9259, "step": 62080 }, { "epoch": 17.862485615650172, "grad_norm": 0.7675241827964783, "learning_rate": 0.0016427502876869967, "loss": 0.7555, "step": 62090 }, { "epoch": 17.865362485615652, "grad_norm": 2.59788179397583, "learning_rate": 0.0016426927502876869, "loss": 0.8058, "step": 62100 }, { "epoch": 17.868239355581128, "grad_norm": 1.30596125125885, "learning_rate": 0.0016426352128883774, "loss": 0.5471, "step": 62110 }, { "epoch": 17.871116225546604, "grad_norm": 1.394285798072815, "learning_rate": 0.001642577675489068, "loss": 0.7622, "step": 62120 }, { "epoch": 17.873993095512084, "grad_norm": 4.117196083068848, "learning_rate": 0.0016425201380897583, "loss": 0.7587, "step": 62130 }, { "epoch": 17.87686996547756, "grad_norm": 1.2670485973358154, "learning_rate": 0.001642462600690449, "loss": 0.6975, "step": 62140 }, { "epoch": 17.879746835443036, "grad_norm": 1.277633547782898, "learning_rate": 0.0016424050632911395, "loss": 0.7577, "step": 62150 }, { "epoch": 17.882623705408516, "grad_norm": 2.1363425254821777, "learning_rate": 0.0016423475258918296, "loss": 0.8701, "step": 62160 }, { "epoch": 17.885500575373992, "grad_norm": 1.3117181062698364, "learning_rate": 0.0016422899884925202, "loss": 0.8423, "step": 62170 }, { "epoch": 17.888377445339472, "grad_norm": 1.2763093709945679, "learning_rate": 0.0016422324510932105, "loss": 0.6339, "step": 62180 }, { "epoch": 17.89125431530495, "grad_norm": 1.146437406539917, "learning_rate": 0.001642174913693901, "loss": 0.8057, "step": 62190 }, { "epoch": 17.894131185270425, "grad_norm": 2.4757556915283203, "learning_rate": 0.0016421173762945916, "loss": 0.9138, "step": 62200 }, { "epoch": 17.897008055235904, "grad_norm": 0.9276107549667358, "learning_rate": 0.001642059838895282, "loss": 0.7946, "step": 62210 }, { "epoch": 17.89988492520138, "grad_norm": 1.0812835693359375, "learning_rate": 0.0016420023014959723, "loss": 0.6867, "step": 62220 }, { "epoch": 17.90276179516686, "grad_norm": 1.5610624551773071, "learning_rate": 0.001641944764096663, "loss": 0.733, "step": 62230 }, { "epoch": 17.905638665132336, "grad_norm": 1.7458951473236084, "learning_rate": 0.0016418872266973532, "loss": 0.7868, "step": 62240 }, { "epoch": 17.908515535097813, "grad_norm": 0.9827128648757935, "learning_rate": 0.0016418296892980438, "loss": 0.8682, "step": 62250 }, { "epoch": 17.911392405063292, "grad_norm": 2.598320245742798, "learning_rate": 0.0016417721518987344, "loss": 0.9015, "step": 62260 }, { "epoch": 17.91426927502877, "grad_norm": 1.037909746170044, "learning_rate": 0.0016417146144994247, "loss": 0.644, "step": 62270 }, { "epoch": 17.917146144994245, "grad_norm": 1.363077998161316, "learning_rate": 0.001641657077100115, "loss": 0.7297, "step": 62280 }, { "epoch": 17.920023014959725, "grad_norm": 1.1412729024887085, "learning_rate": 0.0016415995397008056, "loss": 0.7745, "step": 62290 }, { "epoch": 17.9228998849252, "grad_norm": 1.0864595174789429, "learning_rate": 0.001641542002301496, "loss": 0.6756, "step": 62300 }, { "epoch": 17.92577675489068, "grad_norm": 1.1459035873413086, "learning_rate": 0.0016414844649021865, "loss": 0.7862, "step": 62310 }, { "epoch": 17.928653624856157, "grad_norm": 1.6241350173950195, "learning_rate": 0.0016414269275028769, "loss": 0.6828, "step": 62320 }, { "epoch": 17.931530494821633, "grad_norm": 1.2812020778656006, "learning_rate": 0.0016413693901035672, "loss": 0.688, "step": 62330 }, { "epoch": 17.934407364787113, "grad_norm": 1.4547404050827026, "learning_rate": 0.0016413118527042578, "loss": 0.7788, "step": 62340 }, { "epoch": 17.93728423475259, "grad_norm": 1.5906628370285034, "learning_rate": 0.0016412543153049481, "loss": 0.7487, "step": 62350 }, { "epoch": 17.940161104718065, "grad_norm": 1.9080299139022827, "learning_rate": 0.0016411967779056387, "loss": 0.8728, "step": 62360 }, { "epoch": 17.943037974683545, "grad_norm": 0.8937801122665405, "learning_rate": 0.0016411392405063293, "loss": 0.6443, "step": 62370 }, { "epoch": 17.94591484464902, "grad_norm": 1.07843816280365, "learning_rate": 0.0016410817031070196, "loss": 0.6211, "step": 62380 }, { "epoch": 17.9487917146145, "grad_norm": 0.9433837532997131, "learning_rate": 0.00164102416570771, "loss": 0.769, "step": 62390 }, { "epoch": 17.951668584579977, "grad_norm": 1.4329584836959839, "learning_rate": 0.0016409666283084005, "loss": 0.6746, "step": 62400 }, { "epoch": 17.954545454545453, "grad_norm": 1.4648767709732056, "learning_rate": 0.0016409090909090909, "loss": 0.8088, "step": 62410 }, { "epoch": 17.957422324510933, "grad_norm": 1.1611934900283813, "learning_rate": 0.0016408515535097814, "loss": 0.8575, "step": 62420 }, { "epoch": 17.96029919447641, "grad_norm": 1.6701196432113647, "learning_rate": 0.0016407940161104718, "loss": 0.8207, "step": 62430 }, { "epoch": 17.96317606444189, "grad_norm": 0.7918471097946167, "learning_rate": 0.0016407364787111624, "loss": 0.6749, "step": 62440 }, { "epoch": 17.966052934407365, "grad_norm": 1.2251298427581787, "learning_rate": 0.0016406789413118527, "loss": 0.6859, "step": 62450 }, { "epoch": 17.96892980437284, "grad_norm": 4.3447041511535645, "learning_rate": 0.001640621403912543, "loss": 0.8589, "step": 62460 }, { "epoch": 17.97180667433832, "grad_norm": 0.8385584950447083, "learning_rate": 0.0016405638665132336, "loss": 0.567, "step": 62470 }, { "epoch": 17.974683544303797, "grad_norm": 1.236822247505188, "learning_rate": 0.0016405063291139242, "loss": 0.7397, "step": 62480 }, { "epoch": 17.977560414269274, "grad_norm": 0.825174868106842, "learning_rate": 0.0016404487917146145, "loss": 0.7249, "step": 62490 }, { "epoch": 17.980437284234753, "grad_norm": 2.2603633403778076, "learning_rate": 0.001640391254315305, "loss": 0.7933, "step": 62500 }, { "epoch": 17.98331415420023, "grad_norm": 1.5414860248565674, "learning_rate": 0.0016403337169159954, "loss": 0.6943, "step": 62510 }, { "epoch": 17.98619102416571, "grad_norm": 2.683267593383789, "learning_rate": 0.0016402761795166858, "loss": 0.7049, "step": 62520 }, { "epoch": 17.989067894131185, "grad_norm": 1.722283959388733, "learning_rate": 0.0016402186421173763, "loss": 0.6013, "step": 62530 }, { "epoch": 17.99194476409666, "grad_norm": 1.0090157985687256, "learning_rate": 0.0016401611047180667, "loss": 0.8289, "step": 62540 }, { "epoch": 17.99482163406214, "grad_norm": 0.750900149345398, "learning_rate": 0.0016401035673187573, "loss": 0.7606, "step": 62550 }, { "epoch": 17.997698504027618, "grad_norm": 2.319417953491211, "learning_rate": 0.0016400460299194478, "loss": 0.9835, "step": 62560 }, { "epoch": 18.000575373993094, "grad_norm": 1.5516250133514404, "learning_rate": 0.001639988492520138, "loss": 0.7311, "step": 62570 }, { "epoch": 18.003452243958574, "grad_norm": 1.4186574220657349, "learning_rate": 0.0016399309551208285, "loss": 0.7353, "step": 62580 }, { "epoch": 18.00632911392405, "grad_norm": 1.040906310081482, "learning_rate": 0.001639873417721519, "loss": 0.6899, "step": 62590 }, { "epoch": 18.00920598388953, "grad_norm": 1.1968369483947754, "learning_rate": 0.0016398158803222094, "loss": 0.6817, "step": 62600 }, { "epoch": 18.012082853855006, "grad_norm": 1.003503680229187, "learning_rate": 0.0016397583429229, "loss": 0.8122, "step": 62610 }, { "epoch": 18.014959723820482, "grad_norm": 1.348394751548767, "learning_rate": 0.0016397008055235906, "loss": 0.8058, "step": 62620 }, { "epoch": 18.01783659378596, "grad_norm": 1.9828251600265503, "learning_rate": 0.0016396432681242807, "loss": 0.6436, "step": 62630 }, { "epoch": 18.020713463751438, "grad_norm": 3.1804473400115967, "learning_rate": 0.0016395857307249712, "loss": 0.6381, "step": 62640 }, { "epoch": 18.023590333716918, "grad_norm": 0.8824499845504761, "learning_rate": 0.0016395281933256616, "loss": 0.7711, "step": 62650 }, { "epoch": 18.026467203682394, "grad_norm": 0.8785387277603149, "learning_rate": 0.0016394706559263522, "loss": 0.5776, "step": 62660 }, { "epoch": 18.02934407364787, "grad_norm": 1.6163134574890137, "learning_rate": 0.0016394131185270427, "loss": 0.7421, "step": 62670 }, { "epoch": 18.03222094361335, "grad_norm": 0.6165964007377625, "learning_rate": 0.001639355581127733, "loss": 0.7909, "step": 62680 }, { "epoch": 18.035097813578826, "grad_norm": 1.2873300313949585, "learning_rate": 0.0016392980437284234, "loss": 0.64, "step": 62690 }, { "epoch": 18.037974683544302, "grad_norm": 1.8343760967254639, "learning_rate": 0.001639240506329114, "loss": 0.9184, "step": 62700 }, { "epoch": 18.040851553509782, "grad_norm": 1.2254310846328735, "learning_rate": 0.0016391829689298043, "loss": 0.6888, "step": 62710 }, { "epoch": 18.043728423475258, "grad_norm": 1.2030643224716187, "learning_rate": 0.0016391254315304949, "loss": 0.8144, "step": 62720 }, { "epoch": 18.046605293440738, "grad_norm": 1.7394336462020874, "learning_rate": 0.0016390678941311855, "loss": 0.7795, "step": 62730 }, { "epoch": 18.049482163406214, "grad_norm": 1.2561743259429932, "learning_rate": 0.0016390103567318758, "loss": 0.5352, "step": 62740 }, { "epoch": 18.05235903337169, "grad_norm": 1.5099951028823853, "learning_rate": 0.0016389528193325661, "loss": 0.641, "step": 62750 }, { "epoch": 18.05523590333717, "grad_norm": 1.757111668586731, "learning_rate": 0.0016388952819332565, "loss": 0.7395, "step": 62760 }, { "epoch": 18.058112773302646, "grad_norm": 0.913235068321228, "learning_rate": 0.001638837744533947, "loss": 0.6998, "step": 62770 }, { "epoch": 18.060989643268123, "grad_norm": 0.6659649014472961, "learning_rate": 0.0016387802071346376, "loss": 0.6892, "step": 62780 }, { "epoch": 18.063866513233602, "grad_norm": 1.078695297241211, "learning_rate": 0.001638722669735328, "loss": 0.8115, "step": 62790 }, { "epoch": 18.06674338319908, "grad_norm": 1.1570181846618652, "learning_rate": 0.0016386651323360185, "loss": 0.615, "step": 62800 }, { "epoch": 18.069620253164558, "grad_norm": 1.0535943508148193, "learning_rate": 0.0016386075949367089, "loss": 0.6429, "step": 62810 }, { "epoch": 18.072497123130034, "grad_norm": 0.7986834049224854, "learning_rate": 0.0016385500575373992, "loss": 0.6211, "step": 62820 }, { "epoch": 18.07537399309551, "grad_norm": 1.022929072380066, "learning_rate": 0.0016384925201380898, "loss": 0.8833, "step": 62830 }, { "epoch": 18.07825086306099, "grad_norm": 1.1023218631744385, "learning_rate": 0.0016384349827387804, "loss": 0.8658, "step": 62840 }, { "epoch": 18.081127733026467, "grad_norm": 0.7825308442115784, "learning_rate": 0.0016383774453394707, "loss": 0.6763, "step": 62850 }, { "epoch": 18.084004602991946, "grad_norm": 1.0375806093215942, "learning_rate": 0.0016383199079401613, "loss": 0.7756, "step": 62860 }, { "epoch": 18.086881472957423, "grad_norm": 1.9728949069976807, "learning_rate": 0.0016382623705408514, "loss": 0.7855, "step": 62870 }, { "epoch": 18.0897583429229, "grad_norm": 1.2494527101516724, "learning_rate": 0.001638204833141542, "loss": 0.628, "step": 62880 }, { "epoch": 18.09263521288838, "grad_norm": 1.8520783185958862, "learning_rate": 0.0016381472957422325, "loss": 0.737, "step": 62890 }, { "epoch": 18.095512082853855, "grad_norm": 1.1597309112548828, "learning_rate": 0.0016380897583429229, "loss": 0.6748, "step": 62900 }, { "epoch": 18.09838895281933, "grad_norm": 1.145727276802063, "learning_rate": 0.0016380322209436134, "loss": 0.6287, "step": 62910 }, { "epoch": 18.10126582278481, "grad_norm": 1.371907353401184, "learning_rate": 0.001637974683544304, "loss": 0.9429, "step": 62920 }, { "epoch": 18.104142692750287, "grad_norm": 1.6599565744400024, "learning_rate": 0.0016379171461449941, "loss": 0.6041, "step": 62930 }, { "epoch": 18.107019562715767, "grad_norm": 1.0129975080490112, "learning_rate": 0.0016378596087456847, "loss": 0.5244, "step": 62940 }, { "epoch": 18.109896432681243, "grad_norm": 1.5081533193588257, "learning_rate": 0.0016378020713463753, "loss": 0.8946, "step": 62950 }, { "epoch": 18.11277330264672, "grad_norm": 1.3764299154281616, "learning_rate": 0.0016377445339470656, "loss": 0.6438, "step": 62960 }, { "epoch": 18.1156501726122, "grad_norm": 1.465024709701538, "learning_rate": 0.0016376869965477562, "loss": 0.6678, "step": 62970 }, { "epoch": 18.118527042577675, "grad_norm": 1.5584053993225098, "learning_rate": 0.0016376294591484467, "loss": 0.7348, "step": 62980 }, { "epoch": 18.121403912543155, "grad_norm": 1.8556970357894897, "learning_rate": 0.0016375719217491369, "loss": 0.6361, "step": 62990 }, { "epoch": 18.12428078250863, "grad_norm": 1.198457956314087, "learning_rate": 0.0016375143843498274, "loss": 0.8198, "step": 63000 }, { "epoch": 18.127157652474107, "grad_norm": 1.2759405374526978, "learning_rate": 0.0016374568469505178, "loss": 0.6597, "step": 63010 }, { "epoch": 18.130034522439587, "grad_norm": 1.0007150173187256, "learning_rate": 0.0016373993095512083, "loss": 0.6684, "step": 63020 }, { "epoch": 18.132911392405063, "grad_norm": 1.9155125617980957, "learning_rate": 0.001637341772151899, "loss": 0.7503, "step": 63030 }, { "epoch": 18.13578826237054, "grad_norm": 0.7547674179077148, "learning_rate": 0.0016372842347525892, "loss": 0.7505, "step": 63040 }, { "epoch": 18.13866513233602, "grad_norm": 1.9879963397979736, "learning_rate": 0.0016372266973532796, "loss": 0.6515, "step": 63050 }, { "epoch": 18.141542002301495, "grad_norm": 1.831803321838379, "learning_rate": 0.0016371691599539702, "loss": 0.8074, "step": 63060 }, { "epoch": 18.144418872266975, "grad_norm": 1.4899779558181763, "learning_rate": 0.0016371116225546605, "loss": 0.8867, "step": 63070 }, { "epoch": 18.14729574223245, "grad_norm": 1.3742241859436035, "learning_rate": 0.001637054085155351, "loss": 0.82, "step": 63080 }, { "epoch": 18.150172612197927, "grad_norm": 1.1798460483551025, "learning_rate": 0.0016369965477560416, "loss": 0.7076, "step": 63090 }, { "epoch": 18.153049482163407, "grad_norm": 1.0417991876602173, "learning_rate": 0.001636939010356732, "loss": 0.7585, "step": 63100 }, { "epoch": 18.155926352128883, "grad_norm": 0.9457079768180847, "learning_rate": 0.0016368814729574223, "loss": 0.824, "step": 63110 }, { "epoch": 18.15880322209436, "grad_norm": 1.2986032962799072, "learning_rate": 0.0016368239355581127, "loss": 0.6123, "step": 63120 }, { "epoch": 18.16168009205984, "grad_norm": 1.0409269332885742, "learning_rate": 0.0016367663981588032, "loss": 0.7443, "step": 63130 }, { "epoch": 18.164556962025316, "grad_norm": 1.0921179056167603, "learning_rate": 0.0016367088607594938, "loss": 0.6666, "step": 63140 }, { "epoch": 18.167433831990795, "grad_norm": 1.8244327306747437, "learning_rate": 0.0016366513233601841, "loss": 0.7949, "step": 63150 }, { "epoch": 18.17031070195627, "grad_norm": 1.0615192651748657, "learning_rate": 0.0016365937859608745, "loss": 0.8585, "step": 63160 }, { "epoch": 18.173187571921748, "grad_norm": 1.062868595123291, "learning_rate": 0.001636536248561565, "loss": 0.6323, "step": 63170 }, { "epoch": 18.176064441887227, "grad_norm": 1.887640357017517, "learning_rate": 0.0016364787111622554, "loss": 0.783, "step": 63180 }, { "epoch": 18.178941311852704, "grad_norm": 1.1722996234893799, "learning_rate": 0.001636421173762946, "loss": 0.7661, "step": 63190 }, { "epoch": 18.181818181818183, "grad_norm": 0.9146801829338074, "learning_rate": 0.0016363636363636365, "loss": 0.6566, "step": 63200 }, { "epoch": 18.18469505178366, "grad_norm": 1.33780837059021, "learning_rate": 0.0016363060989643269, "loss": 0.7121, "step": 63210 }, { "epoch": 18.187571921749136, "grad_norm": 1.4806541204452515, "learning_rate": 0.0016362485615650172, "loss": 0.7586, "step": 63220 }, { "epoch": 18.190448791714616, "grad_norm": 1.9741322994232178, "learning_rate": 0.0016361910241657076, "loss": 0.7235, "step": 63230 }, { "epoch": 18.193325661680092, "grad_norm": 1.200505018234253, "learning_rate": 0.0016361334867663981, "loss": 0.701, "step": 63240 }, { "epoch": 18.196202531645568, "grad_norm": 1.3111587762832642, "learning_rate": 0.0016360759493670887, "loss": 0.6432, "step": 63250 }, { "epoch": 18.199079401611048, "grad_norm": 0.8969477415084839, "learning_rate": 0.001636018411967779, "loss": 0.7548, "step": 63260 }, { "epoch": 18.201956271576524, "grad_norm": 2.109003782272339, "learning_rate": 0.0016359608745684696, "loss": 0.7902, "step": 63270 }, { "epoch": 18.204833141542004, "grad_norm": 1.719264030456543, "learning_rate": 0.00163590333716916, "loss": 0.7151, "step": 63280 }, { "epoch": 18.20771001150748, "grad_norm": 1.3080443143844604, "learning_rate": 0.0016358457997698503, "loss": 0.7375, "step": 63290 }, { "epoch": 18.210586881472956, "grad_norm": 1.1834824085235596, "learning_rate": 0.0016357882623705409, "loss": 0.5966, "step": 63300 }, { "epoch": 18.213463751438436, "grad_norm": 1.5160562992095947, "learning_rate": 0.0016357307249712314, "loss": 0.7613, "step": 63310 }, { "epoch": 18.216340621403912, "grad_norm": 1.0117777585983276, "learning_rate": 0.0016356731875719218, "loss": 0.7267, "step": 63320 }, { "epoch": 18.219217491369392, "grad_norm": 1.8496603965759277, "learning_rate": 0.0016356156501726123, "loss": 0.748, "step": 63330 }, { "epoch": 18.222094361334868, "grad_norm": 1.814676284790039, "learning_rate": 0.0016355581127733025, "loss": 0.8436, "step": 63340 }, { "epoch": 18.224971231300344, "grad_norm": 1.3093258142471313, "learning_rate": 0.001635500575373993, "loss": 0.7627, "step": 63350 }, { "epoch": 18.227848101265824, "grad_norm": 0.7229008674621582, "learning_rate": 0.0016354430379746836, "loss": 0.5415, "step": 63360 }, { "epoch": 18.2307249712313, "grad_norm": 1.6548188924789429, "learning_rate": 0.001635385500575374, "loss": 0.7021, "step": 63370 }, { "epoch": 18.233601841196776, "grad_norm": 1.498386263847351, "learning_rate": 0.0016353279631760645, "loss": 0.7246, "step": 63380 }, { "epoch": 18.236478711162256, "grad_norm": 0.8984249830245972, "learning_rate": 0.001635270425776755, "loss": 0.7656, "step": 63390 }, { "epoch": 18.239355581127732, "grad_norm": 1.0485115051269531, "learning_rate": 0.0016352128883774452, "loss": 0.6257, "step": 63400 }, { "epoch": 18.242232451093212, "grad_norm": 0.9542832970619202, "learning_rate": 0.0016351553509781358, "loss": 0.6396, "step": 63410 }, { "epoch": 18.24510932105869, "grad_norm": 0.655656635761261, "learning_rate": 0.0016350978135788263, "loss": 0.652, "step": 63420 }, { "epoch": 18.247986191024165, "grad_norm": 0.8931218385696411, "learning_rate": 0.0016350402761795167, "loss": 0.7297, "step": 63430 }, { "epoch": 18.250863060989644, "grad_norm": 1.984167456626892, "learning_rate": 0.0016349827387802073, "loss": 0.6842, "step": 63440 }, { "epoch": 18.25373993095512, "grad_norm": 1.0184417963027954, "learning_rate": 0.0016349252013808976, "loss": 0.6618, "step": 63450 }, { "epoch": 18.256616800920597, "grad_norm": 1.2452970743179321, "learning_rate": 0.001634867663981588, "loss": 0.8869, "step": 63460 }, { "epoch": 18.259493670886076, "grad_norm": 1.2556228637695312, "learning_rate": 0.0016348101265822785, "loss": 0.8313, "step": 63470 }, { "epoch": 18.262370540851553, "grad_norm": 1.1504883766174316, "learning_rate": 0.0016347525891829689, "loss": 0.6501, "step": 63480 }, { "epoch": 18.265247410817032, "grad_norm": 0.9907503128051758, "learning_rate": 0.0016346950517836594, "loss": 0.7305, "step": 63490 }, { "epoch": 18.26812428078251, "grad_norm": 0.9609786868095398, "learning_rate": 0.00163463751438435, "loss": 0.6288, "step": 63500 }, { "epoch": 18.271001150747985, "grad_norm": 1.4323747158050537, "learning_rate": 0.0016345799769850403, "loss": 0.7424, "step": 63510 }, { "epoch": 18.273878020713465, "grad_norm": 1.1576286554336548, "learning_rate": 0.0016345224395857307, "loss": 0.7659, "step": 63520 }, { "epoch": 18.27675489067894, "grad_norm": 1.175004005432129, "learning_rate": 0.0016344649021864212, "loss": 0.7067, "step": 63530 }, { "epoch": 18.27963176064442, "grad_norm": 1.2120918035507202, "learning_rate": 0.0016344073647871116, "loss": 0.7566, "step": 63540 }, { "epoch": 18.282508630609897, "grad_norm": 1.9544799327850342, "learning_rate": 0.0016343498273878022, "loss": 0.8078, "step": 63550 }, { "epoch": 18.285385500575373, "grad_norm": 0.7527849674224854, "learning_rate": 0.0016342922899884927, "loss": 0.7587, "step": 63560 }, { "epoch": 18.288262370540853, "grad_norm": 0.8725761771202087, "learning_rate": 0.001634234752589183, "loss": 0.7595, "step": 63570 }, { "epoch": 18.29113924050633, "grad_norm": 0.8829820156097412, "learning_rate": 0.0016341772151898734, "loss": 0.7247, "step": 63580 }, { "epoch": 18.294016110471805, "grad_norm": 0.8115407824516296, "learning_rate": 0.0016341196777905638, "loss": 0.6813, "step": 63590 }, { "epoch": 18.296892980437285, "grad_norm": 0.7567418217658997, "learning_rate": 0.0016340621403912543, "loss": 0.7431, "step": 63600 }, { "epoch": 18.29976985040276, "grad_norm": 0.6200090050697327, "learning_rate": 0.0016340046029919449, "loss": 0.7155, "step": 63610 }, { "epoch": 18.30264672036824, "grad_norm": 1.0438836812973022, "learning_rate": 0.0016339470655926352, "loss": 0.5564, "step": 63620 }, { "epoch": 18.305523590333717, "grad_norm": 1.2865610122680664, "learning_rate": 0.0016338895281933258, "loss": 0.786, "step": 63630 }, { "epoch": 18.308400460299193, "grad_norm": 1.388419270515442, "learning_rate": 0.0016338319907940161, "loss": 0.7159, "step": 63640 }, { "epoch": 18.311277330264673, "grad_norm": 0.8208091855049133, "learning_rate": 0.0016337744533947065, "loss": 0.6668, "step": 63650 }, { "epoch": 18.31415420023015, "grad_norm": 1.13922119140625, "learning_rate": 0.001633716915995397, "loss": 0.6947, "step": 63660 }, { "epoch": 18.317031070195625, "grad_norm": 1.4501341581344604, "learning_rate": 0.0016336593785960876, "loss": 0.7437, "step": 63670 }, { "epoch": 18.319907940161105, "grad_norm": 1.6654462814331055, "learning_rate": 0.001633601841196778, "loss": 1.0867, "step": 63680 }, { "epoch": 18.32278481012658, "grad_norm": 2.0561656951904297, "learning_rate": 0.0016335443037974685, "loss": 0.6837, "step": 63690 }, { "epoch": 18.32566168009206, "grad_norm": 1.048551321029663, "learning_rate": 0.0016334867663981587, "loss": 0.6861, "step": 63700 }, { "epoch": 18.328538550057537, "grad_norm": 1.0176111459732056, "learning_rate": 0.0016334292289988492, "loss": 0.7427, "step": 63710 }, { "epoch": 18.331415420023013, "grad_norm": 1.043241262435913, "learning_rate": 0.0016333716915995398, "loss": 0.6915, "step": 63720 }, { "epoch": 18.334292289988493, "grad_norm": 0.8812995553016663, "learning_rate": 0.0016333141542002301, "loss": 0.6541, "step": 63730 }, { "epoch": 18.33716915995397, "grad_norm": 1.8271516561508179, "learning_rate": 0.0016332566168009207, "loss": 0.5826, "step": 63740 }, { "epoch": 18.34004602991945, "grad_norm": 1.4312947988510132, "learning_rate": 0.0016331990794016113, "loss": 0.643, "step": 63750 }, { "epoch": 18.342922899884925, "grad_norm": 5.185953140258789, "learning_rate": 0.0016331415420023014, "loss": 0.804, "step": 63760 }, { "epoch": 18.3457997698504, "grad_norm": 0.6883760690689087, "learning_rate": 0.001633084004602992, "loss": 0.7281, "step": 63770 }, { "epoch": 18.34867663981588, "grad_norm": 1.524079442024231, "learning_rate": 0.0016330264672036825, "loss": 0.7515, "step": 63780 }, { "epoch": 18.351553509781358, "grad_norm": 1.2753708362579346, "learning_rate": 0.0016329689298043729, "loss": 0.7048, "step": 63790 }, { "epoch": 18.354430379746834, "grad_norm": 1.6537977457046509, "learning_rate": 0.0016329113924050634, "loss": 0.7107, "step": 63800 }, { "epoch": 18.357307249712314, "grad_norm": 1.674656867980957, "learning_rate": 0.0016328538550057538, "loss": 0.7155, "step": 63810 }, { "epoch": 18.36018411967779, "grad_norm": 2.6104187965393066, "learning_rate": 0.0016327963176064441, "loss": 0.7192, "step": 63820 }, { "epoch": 18.36306098964327, "grad_norm": 1.1565256118774414, "learning_rate": 0.0016327387802071347, "loss": 0.7815, "step": 63830 }, { "epoch": 18.365937859608746, "grad_norm": 0.937880277633667, "learning_rate": 0.001632681242807825, "loss": 0.9335, "step": 63840 }, { "epoch": 18.368814729574222, "grad_norm": 1.4555984735488892, "learning_rate": 0.0016326237054085156, "loss": 0.6899, "step": 63850 }, { "epoch": 18.3716915995397, "grad_norm": 1.5602045059204102, "learning_rate": 0.0016325661680092062, "loss": 0.7191, "step": 63860 }, { "epoch": 18.374568469505178, "grad_norm": 1.4018555879592896, "learning_rate": 0.0016325086306098965, "loss": 0.7265, "step": 63870 }, { "epoch": 18.377445339470658, "grad_norm": 0.9223349690437317, "learning_rate": 0.0016324510932105869, "loss": 0.6725, "step": 63880 }, { "epoch": 18.380322209436134, "grad_norm": 1.6920658349990845, "learning_rate": 0.0016323935558112774, "loss": 0.878, "step": 63890 }, { "epoch": 18.38319907940161, "grad_norm": 1.0852025747299194, "learning_rate": 0.0016323360184119678, "loss": 0.5979, "step": 63900 }, { "epoch": 18.38607594936709, "grad_norm": 3.883279800415039, "learning_rate": 0.0016322784810126583, "loss": 0.6315, "step": 63910 }, { "epoch": 18.388952819332566, "grad_norm": 1.6335667371749878, "learning_rate": 0.0016322209436133487, "loss": 0.8302, "step": 63920 }, { "epoch": 18.391829689298042, "grad_norm": 1.2528318166732788, "learning_rate": 0.0016321634062140392, "loss": 0.745, "step": 63930 }, { "epoch": 18.394706559263522, "grad_norm": 1.0237542390823364, "learning_rate": 0.0016321058688147296, "loss": 0.7103, "step": 63940 }, { "epoch": 18.397583429228998, "grad_norm": 2.8096182346343994, "learning_rate": 0.00163204833141542, "loss": 0.7369, "step": 63950 }, { "epoch": 18.400460299194478, "grad_norm": 2.923696279525757, "learning_rate": 0.0016319907940161105, "loss": 0.858, "step": 63960 }, { "epoch": 18.403337169159954, "grad_norm": 0.8913621306419373, "learning_rate": 0.001631933256616801, "loss": 0.774, "step": 63970 }, { "epoch": 18.40621403912543, "grad_norm": 0.9788017868995667, "learning_rate": 0.0016318757192174914, "loss": 0.7254, "step": 63980 }, { "epoch": 18.40909090909091, "grad_norm": 1.4426398277282715, "learning_rate": 0.0016318181818181818, "loss": 0.6442, "step": 63990 }, { "epoch": 18.411967779056386, "grad_norm": 1.4498575925827026, "learning_rate": 0.0016317606444188723, "loss": 0.6472, "step": 64000 }, { "epoch": 18.414844649021862, "grad_norm": 1.1249091625213623, "learning_rate": 0.0016317031070195627, "loss": 0.7262, "step": 64010 }, { "epoch": 18.417721518987342, "grad_norm": 1.0956135988235474, "learning_rate": 0.0016316455696202532, "loss": 0.879, "step": 64020 }, { "epoch": 18.42059838895282, "grad_norm": 1.633028268814087, "learning_rate": 0.0016315880322209436, "loss": 0.7728, "step": 64030 }, { "epoch": 18.423475258918298, "grad_norm": 0.6527718305587769, "learning_rate": 0.0016315304948216341, "loss": 0.7355, "step": 64040 }, { "epoch": 18.426352128883774, "grad_norm": 0.879540741443634, "learning_rate": 0.0016314729574223245, "loss": 0.7745, "step": 64050 }, { "epoch": 18.42922899884925, "grad_norm": 1.256402850151062, "learning_rate": 0.0016314154200230148, "loss": 0.8281, "step": 64060 }, { "epoch": 18.43210586881473, "grad_norm": 0.9267804622650146, "learning_rate": 0.0016313578826237054, "loss": 0.7447, "step": 64070 }, { "epoch": 18.434982738780207, "grad_norm": 1.11709463596344, "learning_rate": 0.001631300345224396, "loss": 0.8329, "step": 64080 }, { "epoch": 18.437859608745686, "grad_norm": 0.7632858753204346, "learning_rate": 0.0016312428078250863, "loss": 0.906, "step": 64090 }, { "epoch": 18.440736478711163, "grad_norm": 1.0784722566604614, "learning_rate": 0.0016311852704257769, "loss": 0.8435, "step": 64100 }, { "epoch": 18.44361334867664, "grad_norm": 1.1442164182662964, "learning_rate": 0.0016311277330264672, "loss": 0.7234, "step": 64110 }, { "epoch": 18.44649021864212, "grad_norm": 2.334506034851074, "learning_rate": 0.0016310701956271576, "loss": 0.7097, "step": 64120 }, { "epoch": 18.449367088607595, "grad_norm": 1.4635398387908936, "learning_rate": 0.0016310126582278481, "loss": 0.8573, "step": 64130 }, { "epoch": 18.45224395857307, "grad_norm": 1.0789666175842285, "learning_rate": 0.0016309551208285385, "loss": 0.7185, "step": 64140 }, { "epoch": 18.45512082853855, "grad_norm": 0.8573580980300903, "learning_rate": 0.001630897583429229, "loss": 0.7455, "step": 64150 }, { "epoch": 18.457997698504027, "grad_norm": 0.9067633152008057, "learning_rate": 0.0016308400460299196, "loss": 0.7848, "step": 64160 }, { "epoch": 18.460874568469507, "grad_norm": 2.1344351768493652, "learning_rate": 0.0016307825086306097, "loss": 0.7921, "step": 64170 }, { "epoch": 18.463751438434983, "grad_norm": 1.424593448638916, "learning_rate": 0.0016307249712313003, "loss": 1.006, "step": 64180 }, { "epoch": 18.46662830840046, "grad_norm": 1.0195175409317017, "learning_rate": 0.0016306674338319909, "loss": 0.7137, "step": 64190 }, { "epoch": 18.46950517836594, "grad_norm": 1.6456433534622192, "learning_rate": 0.0016306098964326812, "loss": 0.8821, "step": 64200 }, { "epoch": 18.472382048331415, "grad_norm": 0.9977779984474182, "learning_rate": 0.0016305523590333718, "loss": 0.6393, "step": 64210 }, { "epoch": 18.475258918296895, "grad_norm": 1.700003981590271, "learning_rate": 0.0016304948216340623, "loss": 0.8035, "step": 64220 }, { "epoch": 18.47813578826237, "grad_norm": 1.3154226541519165, "learning_rate": 0.0016304372842347525, "loss": 0.711, "step": 64230 }, { "epoch": 18.481012658227847, "grad_norm": 1.0913317203521729, "learning_rate": 0.001630379746835443, "loss": 0.6603, "step": 64240 }, { "epoch": 18.483889528193327, "grad_norm": 1.754755973815918, "learning_rate": 0.0016303222094361336, "loss": 0.6169, "step": 64250 }, { "epoch": 18.486766398158803, "grad_norm": 1.398972749710083, "learning_rate": 0.001630264672036824, "loss": 0.753, "step": 64260 }, { "epoch": 18.48964326812428, "grad_norm": 2.393693447113037, "learning_rate": 0.0016302071346375145, "loss": 0.7819, "step": 64270 }, { "epoch": 18.49252013808976, "grad_norm": 1.5855234861373901, "learning_rate": 0.0016301495972382049, "loss": 0.7749, "step": 64280 }, { "epoch": 18.495397008055235, "grad_norm": 1.5834684371948242, "learning_rate": 0.0016300920598388952, "loss": 0.8471, "step": 64290 }, { "epoch": 18.498273878020715, "grad_norm": 1.4013566970825195, "learning_rate": 0.0016300345224395858, "loss": 0.7381, "step": 64300 }, { "epoch": 18.50115074798619, "grad_norm": 1.9833778142929077, "learning_rate": 0.0016299769850402761, "loss": 0.7669, "step": 64310 }, { "epoch": 18.504027617951667, "grad_norm": 1.1516807079315186, "learning_rate": 0.0016299194476409667, "loss": 0.924, "step": 64320 }, { "epoch": 18.506904487917147, "grad_norm": 1.3751428127288818, "learning_rate": 0.0016298619102416572, "loss": 0.7351, "step": 64330 }, { "epoch": 18.509781357882623, "grad_norm": 1.3056913614273071, "learning_rate": 0.0016298043728423476, "loss": 0.6698, "step": 64340 }, { "epoch": 18.5126582278481, "grad_norm": 1.2121044397354126, "learning_rate": 0.001629746835443038, "loss": 0.6968, "step": 64350 }, { "epoch": 18.51553509781358, "grad_norm": 1.285308599472046, "learning_rate": 0.0016296892980437285, "loss": 0.6603, "step": 64360 }, { "epoch": 18.518411967779056, "grad_norm": 1.2241727113723755, "learning_rate": 0.0016296317606444189, "loss": 0.7313, "step": 64370 }, { "epoch": 18.521288837744535, "grad_norm": 1.239805817604065, "learning_rate": 0.0016295742232451094, "loss": 0.6425, "step": 64380 }, { "epoch": 18.52416570771001, "grad_norm": 1.1234756708145142, "learning_rate": 0.0016295166858457998, "loss": 0.6591, "step": 64390 }, { "epoch": 18.527042577675488, "grad_norm": 1.0526231527328491, "learning_rate": 0.0016294591484464903, "loss": 0.7399, "step": 64400 }, { "epoch": 18.529919447640967, "grad_norm": 2.2278225421905518, "learning_rate": 0.0016294016110471807, "loss": 0.8534, "step": 64410 }, { "epoch": 18.532796317606444, "grad_norm": 1.0091710090637207, "learning_rate": 0.001629344073647871, "loss": 0.7521, "step": 64420 }, { "epoch": 18.535673187571923, "grad_norm": 1.4876219034194946, "learning_rate": 0.0016292865362485616, "loss": 0.6555, "step": 64430 }, { "epoch": 18.5385500575374, "grad_norm": 1.1373106241226196, "learning_rate": 0.0016292289988492522, "loss": 1.0251, "step": 64440 }, { "epoch": 18.541426927502876, "grad_norm": 1.988344430923462, "learning_rate": 0.0016291714614499425, "loss": 0.6685, "step": 64450 }, { "epoch": 18.544303797468356, "grad_norm": 2.505058765411377, "learning_rate": 0.001629113924050633, "loss": 0.7614, "step": 64460 }, { "epoch": 18.54718066743383, "grad_norm": 1.182198405265808, "learning_rate": 0.0016290563866513234, "loss": 1.0708, "step": 64470 }, { "epoch": 18.550057537399308, "grad_norm": 0.9306745529174805, "learning_rate": 0.0016289988492520138, "loss": 0.6235, "step": 64480 }, { "epoch": 18.552934407364788, "grad_norm": 1.3154813051223755, "learning_rate": 0.0016289413118527043, "loss": 0.9944, "step": 64490 }, { "epoch": 18.555811277330264, "grad_norm": 1.1601672172546387, "learning_rate": 0.0016288837744533947, "loss": 0.7, "step": 64500 }, { "epoch": 18.558688147295744, "grad_norm": 0.720703125, "learning_rate": 0.0016288262370540852, "loss": 0.7186, "step": 64510 }, { "epoch": 18.56156501726122, "grad_norm": 1.3637911081314087, "learning_rate": 0.0016287686996547758, "loss": 0.9407, "step": 64520 }, { "epoch": 18.564441887226696, "grad_norm": 1.1306349039077759, "learning_rate": 0.001628711162255466, "loss": 0.8155, "step": 64530 }, { "epoch": 18.567318757192176, "grad_norm": 1.9585617780685425, "learning_rate": 0.0016286536248561565, "loss": 0.6813, "step": 64540 }, { "epoch": 18.570195627157652, "grad_norm": 1.2276777029037476, "learning_rate": 0.001628596087456847, "loss": 0.6529, "step": 64550 }, { "epoch": 18.57307249712313, "grad_norm": 1.1716077327728271, "learning_rate": 0.0016285385500575374, "loss": 0.6798, "step": 64560 }, { "epoch": 18.575949367088608, "grad_norm": 2.017190456390381, "learning_rate": 0.001628481012658228, "loss": 0.7397, "step": 64570 }, { "epoch": 18.578826237054084, "grad_norm": 1.116621494293213, "learning_rate": 0.0016284234752589185, "loss": 0.8084, "step": 64580 }, { "epoch": 18.581703107019564, "grad_norm": 1.2989826202392578, "learning_rate": 0.0016283659378596087, "loss": 0.6715, "step": 64590 }, { "epoch": 18.58457997698504, "grad_norm": 1.8670860528945923, "learning_rate": 0.0016283084004602992, "loss": 0.6529, "step": 64600 }, { "epoch": 18.587456846950516, "grad_norm": 1.093596339225769, "learning_rate": 0.0016282508630609896, "loss": 0.7306, "step": 64610 }, { "epoch": 18.590333716915996, "grad_norm": 2.643115758895874, "learning_rate": 0.0016281933256616801, "loss": 0.9268, "step": 64620 }, { "epoch": 18.593210586881472, "grad_norm": 1.2658318281173706, "learning_rate": 0.0016281357882623707, "loss": 0.9494, "step": 64630 }, { "epoch": 18.596087456846952, "grad_norm": 0.8941522240638733, "learning_rate": 0.001628078250863061, "loss": 0.7372, "step": 64640 }, { "epoch": 18.59896432681243, "grad_norm": 0.9977520108222961, "learning_rate": 0.0016280207134637514, "loss": 0.78, "step": 64650 }, { "epoch": 18.601841196777904, "grad_norm": 1.7273693084716797, "learning_rate": 0.001627963176064442, "loss": 0.7814, "step": 64660 }, { "epoch": 18.604718066743384, "grad_norm": 2.1029388904571533, "learning_rate": 0.0016279056386651323, "loss": 0.7547, "step": 64670 }, { "epoch": 18.60759493670886, "grad_norm": 2.932373046875, "learning_rate": 0.0016278481012658229, "loss": 0.6793, "step": 64680 }, { "epoch": 18.610471806674337, "grad_norm": 1.5548897981643677, "learning_rate": 0.0016277905638665134, "loss": 0.9091, "step": 64690 }, { "epoch": 18.613348676639816, "grad_norm": 1.3369531631469727, "learning_rate": 0.0016277330264672038, "loss": 0.8221, "step": 64700 }, { "epoch": 18.616225546605293, "grad_norm": 2.8670332431793213, "learning_rate": 0.0016276754890678941, "loss": 0.9315, "step": 64710 }, { "epoch": 18.619102416570772, "grad_norm": 1.2827517986297607, "learning_rate": 0.0016276179516685845, "loss": 0.6452, "step": 64720 }, { "epoch": 18.62197928653625, "grad_norm": 1.4498581886291504, "learning_rate": 0.001627560414269275, "loss": 0.7944, "step": 64730 }, { "epoch": 18.624856156501725, "grad_norm": 2.156061887741089, "learning_rate": 0.0016275028768699656, "loss": 0.7831, "step": 64740 }, { "epoch": 18.627733026467205, "grad_norm": 1.1118236780166626, "learning_rate": 0.001627445339470656, "loss": 0.792, "step": 64750 }, { "epoch": 18.63060989643268, "grad_norm": 1.406855583190918, "learning_rate": 0.0016273878020713465, "loss": 0.6809, "step": 64760 }, { "epoch": 18.63348676639816, "grad_norm": 0.6807098984718323, "learning_rate": 0.0016273302646720369, "loss": 0.6378, "step": 64770 }, { "epoch": 18.636363636363637, "grad_norm": 1.4600952863693237, "learning_rate": 0.0016272727272727272, "loss": 0.6513, "step": 64780 }, { "epoch": 18.639240506329113, "grad_norm": 1.312411904335022, "learning_rate": 0.0016272151898734178, "loss": 0.6085, "step": 64790 }, { "epoch": 18.642117376294593, "grad_norm": 1.7580643892288208, "learning_rate": 0.0016271576524741083, "loss": 0.6324, "step": 64800 }, { "epoch": 18.64499424626007, "grad_norm": 1.9265124797821045, "learning_rate": 0.0016271001150747987, "loss": 0.6846, "step": 64810 }, { "epoch": 18.647871116225545, "grad_norm": 0.9399491548538208, "learning_rate": 0.001627042577675489, "loss": 0.6397, "step": 64820 }, { "epoch": 18.650747986191025, "grad_norm": 1.173352599143982, "learning_rate": 0.0016269850402761794, "loss": 0.5721, "step": 64830 }, { "epoch": 18.6536248561565, "grad_norm": 0.7103503942489624, "learning_rate": 0.00162692750287687, "loss": 0.6854, "step": 64840 }, { "epoch": 18.65650172612198, "grad_norm": 0.9512889981269836, "learning_rate": 0.0016268699654775605, "loss": 0.6822, "step": 64850 }, { "epoch": 18.659378596087457, "grad_norm": 1.15985107421875, "learning_rate": 0.0016268124280782508, "loss": 0.7065, "step": 64860 }, { "epoch": 18.662255466052933, "grad_norm": 1.5900241136550903, "learning_rate": 0.0016267548906789414, "loss": 0.5863, "step": 64870 }, { "epoch": 18.665132336018413, "grad_norm": 1.1169629096984863, "learning_rate": 0.0016266973532796318, "loss": 0.6203, "step": 64880 }, { "epoch": 18.66800920598389, "grad_norm": 1.0489120483398438, "learning_rate": 0.001626639815880322, "loss": 0.8491, "step": 64890 }, { "epoch": 18.67088607594937, "grad_norm": 2.4376397132873535, "learning_rate": 0.0016265822784810127, "loss": 0.9409, "step": 64900 }, { "epoch": 18.673762945914845, "grad_norm": 1.0881969928741455, "learning_rate": 0.0016265247410817032, "loss": 0.6923, "step": 64910 }, { "epoch": 18.67663981588032, "grad_norm": 1.2288835048675537, "learning_rate": 0.0016264672036823936, "loss": 0.749, "step": 64920 }, { "epoch": 18.6795166858458, "grad_norm": 0.6392096877098083, "learning_rate": 0.0016264096662830841, "loss": 0.545, "step": 64930 }, { "epoch": 18.682393555811277, "grad_norm": 1.9571973085403442, "learning_rate": 0.0016263521288837745, "loss": 0.8951, "step": 64940 }, { "epoch": 18.685270425776753, "grad_norm": 1.2129623889923096, "learning_rate": 0.0016262945914844648, "loss": 0.6198, "step": 64950 }, { "epoch": 18.688147295742233, "grad_norm": 2.736926555633545, "learning_rate": 0.0016262370540851554, "loss": 0.7752, "step": 64960 }, { "epoch": 18.69102416570771, "grad_norm": 2.4532885551452637, "learning_rate": 0.0016261795166858458, "loss": 0.7329, "step": 64970 }, { "epoch": 18.69390103567319, "grad_norm": 1.577326774597168, "learning_rate": 0.0016261219792865363, "loss": 0.8504, "step": 64980 }, { "epoch": 18.696777905638665, "grad_norm": 1.6073230504989624, "learning_rate": 0.0016260644418872269, "loss": 0.6749, "step": 64990 }, { "epoch": 18.69965477560414, "grad_norm": 1.9032139778137207, "learning_rate": 0.001626006904487917, "loss": 0.9435, "step": 65000 }, { "epoch": 18.70253164556962, "grad_norm": 1.4391111135482788, "learning_rate": 0.0016259493670886076, "loss": 0.8732, "step": 65010 }, { "epoch": 18.705408515535098, "grad_norm": 1.1475640535354614, "learning_rate": 0.0016258918296892981, "loss": 0.7629, "step": 65020 }, { "epoch": 18.708285385500574, "grad_norm": 2.012866973876953, "learning_rate": 0.0016258342922899885, "loss": 0.8802, "step": 65030 }, { "epoch": 18.711162255466053, "grad_norm": 2.4839401245117188, "learning_rate": 0.001625776754890679, "loss": 0.8165, "step": 65040 }, { "epoch": 18.71403912543153, "grad_norm": 0.7780038714408875, "learning_rate": 0.0016257192174913696, "loss": 0.7239, "step": 65050 }, { "epoch": 18.71691599539701, "grad_norm": 1.5569326877593994, "learning_rate": 0.0016256616800920597, "loss": 0.6423, "step": 65060 }, { "epoch": 18.719792865362486, "grad_norm": 1.5431809425354004, "learning_rate": 0.0016256041426927503, "loss": 0.659, "step": 65070 }, { "epoch": 18.722669735327962, "grad_norm": 1.3849472999572754, "learning_rate": 0.0016255466052934407, "loss": 0.723, "step": 65080 }, { "epoch": 18.72554660529344, "grad_norm": 1.0494297742843628, "learning_rate": 0.0016254890678941312, "loss": 0.7571, "step": 65090 }, { "epoch": 18.728423475258918, "grad_norm": 1.2206114530563354, "learning_rate": 0.0016254315304948218, "loss": 0.8234, "step": 65100 }, { "epoch": 18.731300345224398, "grad_norm": 0.924062192440033, "learning_rate": 0.0016253739930955121, "loss": 0.695, "step": 65110 }, { "epoch": 18.734177215189874, "grad_norm": 1.0769439935684204, "learning_rate": 0.0016253164556962025, "loss": 0.8194, "step": 65120 }, { "epoch": 18.73705408515535, "grad_norm": 1.1429589986801147, "learning_rate": 0.001625258918296893, "loss": 0.8166, "step": 65130 }, { "epoch": 18.73993095512083, "grad_norm": 1.915319800376892, "learning_rate": 0.0016252013808975834, "loss": 0.858, "step": 65140 }, { "epoch": 18.742807825086306, "grad_norm": 0.8831771016120911, "learning_rate": 0.001625143843498274, "loss": 0.6354, "step": 65150 }, { "epoch": 18.745684695051782, "grad_norm": 0.9796763062477112, "learning_rate": 0.0016250863060989645, "loss": 0.9299, "step": 65160 }, { "epoch": 18.748561565017262, "grad_norm": 1.643471360206604, "learning_rate": 0.0016250287686996549, "loss": 0.6924, "step": 65170 }, { "epoch": 18.751438434982738, "grad_norm": 1.5462144613265991, "learning_rate": 0.0016249712313003452, "loss": 0.6748, "step": 65180 }, { "epoch": 18.754315304948218, "grad_norm": 1.4860038757324219, "learning_rate": 0.0016249136939010356, "loss": 0.9184, "step": 65190 }, { "epoch": 18.757192174913694, "grad_norm": 0.8399576544761658, "learning_rate": 0.0016248561565017261, "loss": 0.7246, "step": 65200 }, { "epoch": 18.76006904487917, "grad_norm": 0.9074878692626953, "learning_rate": 0.0016247986191024167, "loss": 0.7725, "step": 65210 }, { "epoch": 18.76294591484465, "grad_norm": 1.5314737558364868, "learning_rate": 0.001624741081703107, "loss": 0.7662, "step": 65220 }, { "epoch": 18.765822784810126, "grad_norm": 0.8615428805351257, "learning_rate": 0.0016246835443037976, "loss": 0.6201, "step": 65230 }, { "epoch": 18.768699654775602, "grad_norm": 0.9410349130630493, "learning_rate": 0.001624626006904488, "loss": 0.8173, "step": 65240 }, { "epoch": 18.771576524741082, "grad_norm": 1.035083293914795, "learning_rate": 0.0016245684695051783, "loss": 0.6084, "step": 65250 }, { "epoch": 18.77445339470656, "grad_norm": 2.318746328353882, "learning_rate": 0.0016245109321058689, "loss": 0.9771, "step": 65260 }, { "epoch": 18.777330264672038, "grad_norm": 1.5851638317108154, "learning_rate": 0.0016244533947065594, "loss": 0.7435, "step": 65270 }, { "epoch": 18.780207134637514, "grad_norm": 2.0770976543426514, "learning_rate": 0.0016243958573072498, "loss": 0.7812, "step": 65280 }, { "epoch": 18.78308400460299, "grad_norm": 0.6739270091056824, "learning_rate": 0.0016243383199079403, "loss": 0.9898, "step": 65290 }, { "epoch": 18.78596087456847, "grad_norm": 1.177342414855957, "learning_rate": 0.0016242807825086305, "loss": 0.6495, "step": 65300 }, { "epoch": 18.788837744533947, "grad_norm": 1.0331405401229858, "learning_rate": 0.001624223245109321, "loss": 0.6868, "step": 65310 }, { "epoch": 18.791714614499426, "grad_norm": 1.7065551280975342, "learning_rate": 0.0016241657077100116, "loss": 0.7347, "step": 65320 }, { "epoch": 18.794591484464902, "grad_norm": 1.5430018901824951, "learning_rate": 0.001624108170310702, "loss": 0.6762, "step": 65330 }, { "epoch": 18.79746835443038, "grad_norm": 1.3213433027267456, "learning_rate": 0.0016240506329113925, "loss": 0.7219, "step": 65340 }, { "epoch": 18.80034522439586, "grad_norm": 0.9058123826980591, "learning_rate": 0.001623993095512083, "loss": 0.6938, "step": 65350 }, { "epoch": 18.803222094361335, "grad_norm": 0.8232092261314392, "learning_rate": 0.0016239355581127732, "loss": 0.7405, "step": 65360 }, { "epoch": 18.80609896432681, "grad_norm": 1.1051115989685059, "learning_rate": 0.0016238780207134638, "loss": 0.606, "step": 65370 }, { "epoch": 18.80897583429229, "grad_norm": 1.6933526992797852, "learning_rate": 0.0016238204833141543, "loss": 0.8405, "step": 65380 }, { "epoch": 18.811852704257767, "grad_norm": 1.5272154808044434, "learning_rate": 0.0016237629459148447, "loss": 0.6656, "step": 65390 }, { "epoch": 18.814729574223247, "grad_norm": 1.6047003269195557, "learning_rate": 0.0016237054085155352, "loss": 0.8229, "step": 65400 }, { "epoch": 18.817606444188723, "grad_norm": 1.6775484085083008, "learning_rate": 0.0016236478711162256, "loss": 0.8232, "step": 65410 }, { "epoch": 18.8204833141542, "grad_norm": 3.0668909549713135, "learning_rate": 0.001623590333716916, "loss": 0.6695, "step": 65420 }, { "epoch": 18.82336018411968, "grad_norm": 1.4475016593933105, "learning_rate": 0.0016235327963176065, "loss": 0.5669, "step": 65430 }, { "epoch": 18.826237054085155, "grad_norm": 1.2325693368911743, "learning_rate": 0.0016234752589182968, "loss": 0.8394, "step": 65440 }, { "epoch": 18.82911392405063, "grad_norm": 1.3904633522033691, "learning_rate": 0.0016234177215189874, "loss": 0.8508, "step": 65450 }, { "epoch": 18.83199079401611, "grad_norm": 0.7594445943832397, "learning_rate": 0.001623360184119678, "loss": 0.7112, "step": 65460 }, { "epoch": 18.834867663981587, "grad_norm": 1.135013222694397, "learning_rate": 0.0016233026467203683, "loss": 0.8253, "step": 65470 }, { "epoch": 18.837744533947067, "grad_norm": 1.1983674764633179, "learning_rate": 0.0016232451093210587, "loss": 0.7301, "step": 65480 }, { "epoch": 18.840621403912543, "grad_norm": 1.8146604299545288, "learning_rate": 0.0016231875719217492, "loss": 0.8216, "step": 65490 }, { "epoch": 18.84349827387802, "grad_norm": 1.5915966033935547, "learning_rate": 0.0016231300345224396, "loss": 0.9447, "step": 65500 }, { "epoch": 18.8463751438435, "grad_norm": 1.4368925094604492, "learning_rate": 0.0016230724971231301, "loss": 0.9488, "step": 65510 }, { "epoch": 18.849252013808975, "grad_norm": 1.4292834997177124, "learning_rate": 0.0016230149597238207, "loss": 0.6782, "step": 65520 }, { "epoch": 18.852128883774455, "grad_norm": 1.1893067359924316, "learning_rate": 0.001622957422324511, "loss": 0.7985, "step": 65530 }, { "epoch": 18.85500575373993, "grad_norm": 0.8390885591506958, "learning_rate": 0.0016228998849252014, "loss": 0.7118, "step": 65540 }, { "epoch": 18.857882623705407, "grad_norm": 0.8150290250778198, "learning_rate": 0.0016228423475258917, "loss": 0.7043, "step": 65550 }, { "epoch": 18.860759493670887, "grad_norm": 1.0441012382507324, "learning_rate": 0.0016227848101265823, "loss": 0.7132, "step": 65560 }, { "epoch": 18.863636363636363, "grad_norm": 1.3043445348739624, "learning_rate": 0.0016227272727272729, "loss": 0.6741, "step": 65570 }, { "epoch": 18.86651323360184, "grad_norm": 1.4302263259887695, "learning_rate": 0.0016226697353279632, "loss": 0.7757, "step": 65580 }, { "epoch": 18.86939010356732, "grad_norm": 1.251098394393921, "learning_rate": 0.0016226121979286536, "loss": 0.6924, "step": 65590 }, { "epoch": 18.872266973532795, "grad_norm": 1.786368489265442, "learning_rate": 0.0016225546605293441, "loss": 0.5823, "step": 65600 }, { "epoch": 18.875143843498275, "grad_norm": 1.5147689580917358, "learning_rate": 0.0016224971231300345, "loss": 0.8965, "step": 65610 }, { "epoch": 18.87802071346375, "grad_norm": 1.369570016860962, "learning_rate": 0.001622439585730725, "loss": 0.8274, "step": 65620 }, { "epoch": 18.880897583429228, "grad_norm": 0.9715345501899719, "learning_rate": 0.0016223820483314156, "loss": 0.5766, "step": 65630 }, { "epoch": 18.883774453394707, "grad_norm": 1.446108102798462, "learning_rate": 0.001622324510932106, "loss": 0.7466, "step": 65640 }, { "epoch": 18.886651323360184, "grad_norm": 0.924200713634491, "learning_rate": 0.0016222669735327963, "loss": 0.9016, "step": 65650 }, { "epoch": 18.889528193325663, "grad_norm": 1.8151066303253174, "learning_rate": 0.0016222094361334866, "loss": 0.7517, "step": 65660 }, { "epoch": 18.89240506329114, "grad_norm": 1.753282070159912, "learning_rate": 0.0016221518987341772, "loss": 0.7729, "step": 65670 }, { "epoch": 18.895281933256616, "grad_norm": 0.7543156147003174, "learning_rate": 0.0016220943613348678, "loss": 0.7805, "step": 65680 }, { "epoch": 18.898158803222096, "grad_norm": 1.351148247718811, "learning_rate": 0.0016220368239355581, "loss": 0.9997, "step": 65690 }, { "epoch": 18.90103567318757, "grad_norm": 1.1112951040267944, "learning_rate": 0.0016219792865362487, "loss": 0.7646, "step": 65700 }, { "epoch": 18.903912543153048, "grad_norm": 1.833029866218567, "learning_rate": 0.001621921749136939, "loss": 0.9542, "step": 65710 }, { "epoch": 18.906789413118528, "grad_norm": 1.801137089729309, "learning_rate": 0.0016218642117376294, "loss": 0.6449, "step": 65720 }, { "epoch": 18.909666283084004, "grad_norm": 1.1554889678955078, "learning_rate": 0.00162180667433832, "loss": 0.6197, "step": 65730 }, { "epoch": 18.912543153049484, "grad_norm": 1.5286815166473389, "learning_rate": 0.0016217491369390105, "loss": 0.6253, "step": 65740 }, { "epoch": 18.91542002301496, "grad_norm": 1.0073503255844116, "learning_rate": 0.0016216915995397008, "loss": 0.7044, "step": 65750 }, { "epoch": 18.918296892980436, "grad_norm": 1.1068085432052612, "learning_rate": 0.0016216340621403914, "loss": 0.9652, "step": 65760 }, { "epoch": 18.921173762945916, "grad_norm": 1.9533997774124146, "learning_rate": 0.0016215765247410815, "loss": 0.8149, "step": 65770 }, { "epoch": 18.924050632911392, "grad_norm": 1.458174467086792, "learning_rate": 0.001621518987341772, "loss": 0.664, "step": 65780 }, { "epoch": 18.92692750287687, "grad_norm": 1.1057580709457397, "learning_rate": 0.0016214614499424627, "loss": 0.8871, "step": 65790 }, { "epoch": 18.929804372842348, "grad_norm": 1.2800121307373047, "learning_rate": 0.001621403912543153, "loss": 0.7488, "step": 65800 }, { "epoch": 18.932681242807824, "grad_norm": 3.183347702026367, "learning_rate": 0.0016213463751438436, "loss": 0.8145, "step": 65810 }, { "epoch": 18.935558112773304, "grad_norm": 0.9317610859870911, "learning_rate": 0.0016212888377445341, "loss": 0.7879, "step": 65820 }, { "epoch": 18.93843498273878, "grad_norm": 1.2132285833358765, "learning_rate": 0.0016212313003452243, "loss": 0.7018, "step": 65830 }, { "epoch": 18.941311852704256, "grad_norm": 1.3285374641418457, "learning_rate": 0.0016211737629459148, "loss": 0.883, "step": 65840 }, { "epoch": 18.944188722669736, "grad_norm": 2.002990484237671, "learning_rate": 0.0016211162255466054, "loss": 0.9261, "step": 65850 }, { "epoch": 18.947065592635212, "grad_norm": 1.3570019006729126, "learning_rate": 0.0016210586881472957, "loss": 0.7709, "step": 65860 }, { "epoch": 18.949942462600692, "grad_norm": 1.0150924921035767, "learning_rate": 0.0016210011507479863, "loss": 0.7991, "step": 65870 }, { "epoch": 18.95281933256617, "grad_norm": 1.4460963010787964, "learning_rate": 0.0016209436133486767, "loss": 0.7489, "step": 65880 }, { "epoch": 18.955696202531644, "grad_norm": 1.0266488790512085, "learning_rate": 0.001620886075949367, "loss": 0.719, "step": 65890 }, { "epoch": 18.958573072497124, "grad_norm": 0.9780745506286621, "learning_rate": 0.0016208285385500576, "loss": 0.6925, "step": 65900 }, { "epoch": 18.9614499424626, "grad_norm": 0.8778024315834045, "learning_rate": 0.001620771001150748, "loss": 0.6704, "step": 65910 }, { "epoch": 18.964326812428077, "grad_norm": 1.6874531507492065, "learning_rate": 0.0016207134637514385, "loss": 0.6272, "step": 65920 }, { "epoch": 18.967203682393556, "grad_norm": 1.1952096223831177, "learning_rate": 0.001620655926352129, "loss": 0.6004, "step": 65930 }, { "epoch": 18.970080552359033, "grad_norm": 0.9848041534423828, "learning_rate": 0.0016205983889528194, "loss": 0.7931, "step": 65940 }, { "epoch": 18.972957422324512, "grad_norm": 0.8734918832778931, "learning_rate": 0.0016205408515535097, "loss": 0.5923, "step": 65950 }, { "epoch": 18.97583429228999, "grad_norm": 1.2664743661880493, "learning_rate": 0.0016204833141542003, "loss": 0.9305, "step": 65960 }, { "epoch": 18.978711162255465, "grad_norm": 0.8463718295097351, "learning_rate": 0.0016204257767548906, "loss": 0.7781, "step": 65970 }, { "epoch": 18.981588032220944, "grad_norm": 1.0370330810546875, "learning_rate": 0.0016203682393555812, "loss": 0.604, "step": 65980 }, { "epoch": 18.98446490218642, "grad_norm": 1.8852177858352661, "learning_rate": 0.0016203107019562716, "loss": 0.8179, "step": 65990 }, { "epoch": 18.9873417721519, "grad_norm": 1.4631162881851196, "learning_rate": 0.0016202531645569621, "loss": 0.6638, "step": 66000 }, { "epoch": 18.990218642117377, "grad_norm": 0.9321560859680176, "learning_rate": 0.0016201956271576525, "loss": 0.6972, "step": 66010 }, { "epoch": 18.993095512082853, "grad_norm": 1.6522022485733032, "learning_rate": 0.0016201380897583428, "loss": 0.601, "step": 66020 }, { "epoch": 18.995972382048333, "grad_norm": 1.7573962211608887, "learning_rate": 0.0016200805523590334, "loss": 0.6712, "step": 66030 }, { "epoch": 18.99884925201381, "grad_norm": 1.4284579753875732, "learning_rate": 0.001620023014959724, "loss": 0.7408, "step": 66040 }, { "epoch": 19.001726121979285, "grad_norm": 1.0023818016052246, "learning_rate": 0.0016199654775604143, "loss": 0.62, "step": 66050 }, { "epoch": 19.004602991944765, "grad_norm": 1.9383454322814941, "learning_rate": 0.0016199079401611049, "loss": 0.7617, "step": 66060 }, { "epoch": 19.00747986191024, "grad_norm": 2.6656644344329834, "learning_rate": 0.0016198504027617952, "loss": 0.8304, "step": 66070 }, { "epoch": 19.01035673187572, "grad_norm": 0.8766264319419861, "learning_rate": 0.0016197928653624856, "loss": 0.7147, "step": 66080 }, { "epoch": 19.013233601841197, "grad_norm": 1.1204912662506104, "learning_rate": 0.0016197353279631761, "loss": 0.5793, "step": 66090 }, { "epoch": 19.016110471806673, "grad_norm": 1.2295629978179932, "learning_rate": 0.0016196777905638665, "loss": 0.799, "step": 66100 }, { "epoch": 19.018987341772153, "grad_norm": 1.188543438911438, "learning_rate": 0.001619620253164557, "loss": 0.594, "step": 66110 }, { "epoch": 19.02186421173763, "grad_norm": 1.672853708267212, "learning_rate": 0.0016195627157652476, "loss": 0.6926, "step": 66120 }, { "epoch": 19.024741081703105, "grad_norm": 1.7064144611358643, "learning_rate": 0.0016195051783659377, "loss": 0.6786, "step": 66130 }, { "epoch": 19.027617951668585, "grad_norm": 0.775120198726654, "learning_rate": 0.0016194476409666283, "loss": 0.6891, "step": 66140 }, { "epoch": 19.03049482163406, "grad_norm": 2.047619581222534, "learning_rate": 0.0016193901035673188, "loss": 0.5886, "step": 66150 }, { "epoch": 19.03337169159954, "grad_norm": 1.3214354515075684, "learning_rate": 0.0016193325661680092, "loss": 0.7917, "step": 66160 }, { "epoch": 19.036248561565017, "grad_norm": 1.8492543697357178, "learning_rate": 0.0016192750287686998, "loss": 0.7741, "step": 66170 }, { "epoch": 19.039125431530493, "grad_norm": 1.0595656633377075, "learning_rate": 0.0016192174913693903, "loss": 0.7526, "step": 66180 }, { "epoch": 19.042002301495973, "grad_norm": 2.3879568576812744, "learning_rate": 0.0016191599539700805, "loss": 0.7725, "step": 66190 }, { "epoch": 19.04487917146145, "grad_norm": 1.8657479286193848, "learning_rate": 0.001619102416570771, "loss": 0.8778, "step": 66200 }, { "epoch": 19.04775604142693, "grad_norm": 0.9527375102043152, "learning_rate": 0.0016190448791714616, "loss": 0.6775, "step": 66210 }, { "epoch": 19.050632911392405, "grad_norm": 0.9781647324562073, "learning_rate": 0.001618987341772152, "loss": 0.6105, "step": 66220 }, { "epoch": 19.05350978135788, "grad_norm": 2.0883278846740723, "learning_rate": 0.0016189298043728425, "loss": 0.6958, "step": 66230 }, { "epoch": 19.05638665132336, "grad_norm": 0.9453184008598328, "learning_rate": 0.0016188722669735328, "loss": 0.7598, "step": 66240 }, { "epoch": 19.059263521288837, "grad_norm": 1.3864822387695312, "learning_rate": 0.0016188147295742232, "loss": 0.6758, "step": 66250 }, { "epoch": 19.062140391254314, "grad_norm": 1.152389407157898, "learning_rate": 0.0016187571921749138, "loss": 0.7314, "step": 66260 }, { "epoch": 19.065017261219793, "grad_norm": 1.1811785697937012, "learning_rate": 0.001618699654775604, "loss": 0.7047, "step": 66270 }, { "epoch": 19.06789413118527, "grad_norm": 1.4992564916610718, "learning_rate": 0.0016186421173762947, "loss": 0.8303, "step": 66280 }, { "epoch": 19.07077100115075, "grad_norm": 1.3737016916275024, "learning_rate": 0.0016185845799769852, "loss": 0.6883, "step": 66290 }, { "epoch": 19.073647871116226, "grad_norm": 0.8704187870025635, "learning_rate": 0.0016185270425776756, "loss": 0.6313, "step": 66300 }, { "epoch": 19.076524741081702, "grad_norm": 2.7121963500976562, "learning_rate": 0.001618469505178366, "loss": 0.6908, "step": 66310 }, { "epoch": 19.07940161104718, "grad_norm": 1.8340955972671509, "learning_rate": 0.0016184119677790565, "loss": 0.7808, "step": 66320 }, { "epoch": 19.082278481012658, "grad_norm": 2.065749168395996, "learning_rate": 0.0016183544303797468, "loss": 0.6789, "step": 66330 }, { "epoch": 19.085155350978138, "grad_norm": 1.545401930809021, "learning_rate": 0.0016182968929804374, "loss": 0.8263, "step": 66340 }, { "epoch": 19.088032220943614, "grad_norm": 1.970501184463501, "learning_rate": 0.0016182393555811277, "loss": 0.709, "step": 66350 }, { "epoch": 19.09090909090909, "grad_norm": 1.0339229106903076, "learning_rate": 0.0016181818181818183, "loss": 0.648, "step": 66360 }, { "epoch": 19.09378596087457, "grad_norm": 1.9219262599945068, "learning_rate": 0.0016181242807825087, "loss": 0.5802, "step": 66370 }, { "epoch": 19.096662830840046, "grad_norm": 1.1072887182235718, "learning_rate": 0.001618066743383199, "loss": 0.9036, "step": 66380 }, { "epoch": 19.099539700805522, "grad_norm": 1.1219662427902222, "learning_rate": 0.0016180092059838896, "loss": 0.8541, "step": 66390 }, { "epoch": 19.102416570771002, "grad_norm": 0.9640944600105286, "learning_rate": 0.0016179516685845801, "loss": 0.6326, "step": 66400 }, { "epoch": 19.105293440736478, "grad_norm": 1.1144678592681885, "learning_rate": 0.0016178941311852705, "loss": 0.6417, "step": 66410 }, { "epoch": 19.108170310701958, "grad_norm": 1.0887794494628906, "learning_rate": 0.0016178365937859608, "loss": 0.6392, "step": 66420 }, { "epoch": 19.111047180667434, "grad_norm": 1.4716625213623047, "learning_rate": 0.0016177790563866514, "loss": 0.6585, "step": 66430 }, { "epoch": 19.11392405063291, "grad_norm": 1.025327205657959, "learning_rate": 0.0016177215189873417, "loss": 0.86, "step": 66440 }, { "epoch": 19.11680092059839, "grad_norm": 1.900992751121521, "learning_rate": 0.0016176639815880323, "loss": 0.7251, "step": 66450 }, { "epoch": 19.119677790563866, "grad_norm": 1.5499277114868164, "learning_rate": 0.0016176064441887226, "loss": 0.701, "step": 66460 }, { "epoch": 19.122554660529342, "grad_norm": 1.6159391403198242, "learning_rate": 0.0016175489067894132, "loss": 0.7105, "step": 66470 }, { "epoch": 19.125431530494822, "grad_norm": 1.9184045791625977, "learning_rate": 0.0016174913693901036, "loss": 0.6599, "step": 66480 }, { "epoch": 19.1283084004603, "grad_norm": 1.3100816011428833, "learning_rate": 0.001617433831990794, "loss": 0.7851, "step": 66490 }, { "epoch": 19.131185270425778, "grad_norm": 1.5549057722091675, "learning_rate": 0.0016173762945914845, "loss": 0.8523, "step": 66500 }, { "epoch": 19.134062140391254, "grad_norm": 1.5161664485931396, "learning_rate": 0.001617318757192175, "loss": 0.6296, "step": 66510 }, { "epoch": 19.13693901035673, "grad_norm": 1.3452554941177368, "learning_rate": 0.0016172612197928654, "loss": 0.6364, "step": 66520 }, { "epoch": 19.13981588032221, "grad_norm": 1.5303089618682861, "learning_rate": 0.001617203682393556, "loss": 0.7749, "step": 66530 }, { "epoch": 19.142692750287686, "grad_norm": 1.179609775543213, "learning_rate": 0.0016171461449942463, "loss": 0.737, "step": 66540 }, { "epoch": 19.145569620253166, "grad_norm": 1.0277360677719116, "learning_rate": 0.0016170886075949366, "loss": 0.7115, "step": 66550 }, { "epoch": 19.148446490218642, "grad_norm": 1.6126174926757812, "learning_rate": 0.0016170310701956272, "loss": 0.8192, "step": 66560 }, { "epoch": 19.15132336018412, "grad_norm": 1.829384684562683, "learning_rate": 0.0016169735327963175, "loss": 0.7269, "step": 66570 }, { "epoch": 19.1542002301496, "grad_norm": 1.2494864463806152, "learning_rate": 0.0016169159953970081, "loss": 0.6985, "step": 66580 }, { "epoch": 19.157077100115075, "grad_norm": 2.1385059356689453, "learning_rate": 0.0016168584579976987, "loss": 0.8208, "step": 66590 }, { "epoch": 19.15995397008055, "grad_norm": 2.2538185119628906, "learning_rate": 0.0016168009205983888, "loss": 0.7455, "step": 66600 }, { "epoch": 19.16283084004603, "grad_norm": 1.9104048013687134, "learning_rate": 0.0016167433831990794, "loss": 0.9322, "step": 66610 }, { "epoch": 19.165707710011507, "grad_norm": 1.4291690587997437, "learning_rate": 0.00161668584579977, "loss": 0.7075, "step": 66620 }, { "epoch": 19.168584579976987, "grad_norm": 1.022606372833252, "learning_rate": 0.0016166283084004603, "loss": 0.8114, "step": 66630 }, { "epoch": 19.171461449942463, "grad_norm": 1.5841656923294067, "learning_rate": 0.0016165707710011508, "loss": 0.8018, "step": 66640 }, { "epoch": 19.17433831990794, "grad_norm": 2.1279165744781494, "learning_rate": 0.0016165132336018414, "loss": 0.9106, "step": 66650 }, { "epoch": 19.17721518987342, "grad_norm": 1.1301929950714111, "learning_rate": 0.0016164556962025315, "loss": 0.7028, "step": 66660 }, { "epoch": 19.180092059838895, "grad_norm": 1.2993412017822266, "learning_rate": 0.001616398158803222, "loss": 0.6254, "step": 66670 }, { "epoch": 19.182968929804375, "grad_norm": 1.3671578168869019, "learning_rate": 0.0016163406214039124, "loss": 0.8713, "step": 66680 }, { "epoch": 19.18584579976985, "grad_norm": 1.4094256162643433, "learning_rate": 0.001616283084004603, "loss": 0.7951, "step": 66690 }, { "epoch": 19.188722669735327, "grad_norm": 0.6634832620620728, "learning_rate": 0.0016162255466052936, "loss": 0.7141, "step": 66700 }, { "epoch": 19.191599539700807, "grad_norm": 0.8490317463874817, "learning_rate": 0.001616168009205984, "loss": 0.6639, "step": 66710 }, { "epoch": 19.194476409666283, "grad_norm": 1.375567078590393, "learning_rate": 0.0016161104718066743, "loss": 0.6161, "step": 66720 }, { "epoch": 19.19735327963176, "grad_norm": 1.3384078741073608, "learning_rate": 0.0016160529344073648, "loss": 0.8398, "step": 66730 }, { "epoch": 19.20023014959724, "grad_norm": 0.9412862658500671, "learning_rate": 0.0016159953970080552, "loss": 0.8355, "step": 66740 }, { "epoch": 19.203107019562715, "grad_norm": 1.5655418634414673, "learning_rate": 0.0016159378596087457, "loss": 0.7294, "step": 66750 }, { "epoch": 19.205983889528195, "grad_norm": 1.4915422201156616, "learning_rate": 0.0016158803222094363, "loss": 0.7935, "step": 66760 }, { "epoch": 19.20886075949367, "grad_norm": 1.5494965314865112, "learning_rate": 0.0016158227848101267, "loss": 0.8059, "step": 66770 }, { "epoch": 19.211737629459147, "grad_norm": 1.8301379680633545, "learning_rate": 0.001615765247410817, "loss": 0.7424, "step": 66780 }, { "epoch": 19.214614499424627, "grad_norm": 2.168778896331787, "learning_rate": 0.0016157077100115074, "loss": 0.8033, "step": 66790 }, { "epoch": 19.217491369390103, "grad_norm": 1.0599582195281982, "learning_rate": 0.001615650172612198, "loss": 0.6766, "step": 66800 }, { "epoch": 19.22036823935558, "grad_norm": 2.767003059387207, "learning_rate": 0.0016155926352128885, "loss": 0.7793, "step": 66810 }, { "epoch": 19.22324510932106, "grad_norm": 1.203385591506958, "learning_rate": 0.0016155350978135788, "loss": 0.8521, "step": 66820 }, { "epoch": 19.226121979286535, "grad_norm": 0.9426012635231018, "learning_rate": 0.0016154775604142694, "loss": 0.7939, "step": 66830 }, { "epoch": 19.228998849252015, "grad_norm": 1.381017804145813, "learning_rate": 0.0016154200230149597, "loss": 0.7426, "step": 66840 }, { "epoch": 19.23187571921749, "grad_norm": 0.93629390001297, "learning_rate": 0.00161536248561565, "loss": 0.7182, "step": 66850 }, { "epoch": 19.234752589182968, "grad_norm": 1.4382449388504028, "learning_rate": 0.0016153049482163406, "loss": 0.6563, "step": 66860 }, { "epoch": 19.237629459148447, "grad_norm": 1.1938831806182861, "learning_rate": 0.0016152474108170312, "loss": 0.5723, "step": 66870 }, { "epoch": 19.240506329113924, "grad_norm": 1.9196972846984863, "learning_rate": 0.0016151898734177216, "loss": 0.8358, "step": 66880 }, { "epoch": 19.243383199079403, "grad_norm": 1.0129331350326538, "learning_rate": 0.0016151323360184121, "loss": 0.7576, "step": 66890 }, { "epoch": 19.24626006904488, "grad_norm": 1.4465967416763306, "learning_rate": 0.0016150747986191025, "loss": 0.7363, "step": 66900 }, { "epoch": 19.249136939010356, "grad_norm": 1.4128901958465576, "learning_rate": 0.0016150172612197928, "loss": 0.6133, "step": 66910 }, { "epoch": 19.252013808975835, "grad_norm": 2.2666420936584473, "learning_rate": 0.0016149597238204834, "loss": 0.8134, "step": 66920 }, { "epoch": 19.25489067894131, "grad_norm": 0.6910290122032166, "learning_rate": 0.0016149021864211737, "loss": 0.7269, "step": 66930 }, { "epoch": 19.257767548906788, "grad_norm": 1.4386851787567139, "learning_rate": 0.0016148446490218643, "loss": 0.9537, "step": 66940 }, { "epoch": 19.260644418872268, "grad_norm": 2.429962635040283, "learning_rate": 0.0016147871116225549, "loss": 0.8656, "step": 66950 }, { "epoch": 19.263521288837744, "grad_norm": 1.0183966159820557, "learning_rate": 0.001614729574223245, "loss": 0.6479, "step": 66960 }, { "epoch": 19.266398158803224, "grad_norm": 0.6492007374763489, "learning_rate": 0.0016146720368239355, "loss": 0.7316, "step": 66970 }, { "epoch": 19.2692750287687, "grad_norm": 0.9813397526741028, "learning_rate": 0.0016146144994246261, "loss": 0.7676, "step": 66980 }, { "epoch": 19.272151898734176, "grad_norm": 1.1600956916809082, "learning_rate": 0.0016145569620253165, "loss": 0.8215, "step": 66990 }, { "epoch": 19.275028768699656, "grad_norm": 2.0584676265716553, "learning_rate": 0.001614499424626007, "loss": 0.6747, "step": 67000 }, { "epoch": 19.277905638665132, "grad_norm": 0.6203359365463257, "learning_rate": 0.0016144418872266976, "loss": 0.7441, "step": 67010 }, { "epoch": 19.280782508630608, "grad_norm": 0.8687880635261536, "learning_rate": 0.0016143843498273877, "loss": 0.7098, "step": 67020 }, { "epoch": 19.283659378596088, "grad_norm": 1.1912039518356323, "learning_rate": 0.0016143268124280783, "loss": 0.7084, "step": 67030 }, { "epoch": 19.286536248561564, "grad_norm": 1.3691041469573975, "learning_rate": 0.0016142692750287686, "loss": 0.7464, "step": 67040 }, { "epoch": 19.289413118527044, "grad_norm": 0.9640227556228638, "learning_rate": 0.0016142117376294592, "loss": 0.7509, "step": 67050 }, { "epoch": 19.29228998849252, "grad_norm": 0.9020334482192993, "learning_rate": 0.0016141542002301498, "loss": 0.914, "step": 67060 }, { "epoch": 19.295166858457996, "grad_norm": 0.987053394317627, "learning_rate": 0.00161409666283084, "loss": 0.7418, "step": 67070 }, { "epoch": 19.298043728423476, "grad_norm": 1.114971399307251, "learning_rate": 0.0016140391254315305, "loss": 0.9119, "step": 67080 }, { "epoch": 19.300920598388952, "grad_norm": 1.983737826347351, "learning_rate": 0.001613981588032221, "loss": 0.6641, "step": 67090 }, { "epoch": 19.303797468354432, "grad_norm": 1.2358858585357666, "learning_rate": 0.0016139240506329114, "loss": 0.7872, "step": 67100 }, { "epoch": 19.306674338319908, "grad_norm": 1.1048400402069092, "learning_rate": 0.001613866513233602, "loss": 0.488, "step": 67110 }, { "epoch": 19.309551208285384, "grad_norm": 1.4459463357925415, "learning_rate": 0.0016138089758342925, "loss": 0.6904, "step": 67120 }, { "epoch": 19.312428078250864, "grad_norm": 1.8967735767364502, "learning_rate": 0.0016137514384349828, "loss": 0.656, "step": 67130 }, { "epoch": 19.31530494821634, "grad_norm": 0.8646332025527954, "learning_rate": 0.0016136939010356732, "loss": 0.5837, "step": 67140 }, { "epoch": 19.318181818181817, "grad_norm": 2.0021162033081055, "learning_rate": 0.0016136363636363635, "loss": 0.6518, "step": 67150 }, { "epoch": 19.321058688147296, "grad_norm": 1.6970837116241455, "learning_rate": 0.001613578826237054, "loss": 0.7026, "step": 67160 }, { "epoch": 19.323935558112773, "grad_norm": 1.4956070184707642, "learning_rate": 0.0016135212888377447, "loss": 0.7083, "step": 67170 }, { "epoch": 19.326812428078252, "grad_norm": 1.6135503053665161, "learning_rate": 0.001613463751438435, "loss": 0.7063, "step": 67180 }, { "epoch": 19.32968929804373, "grad_norm": 1.453656792640686, "learning_rate": 0.0016134062140391256, "loss": 0.6103, "step": 67190 }, { "epoch": 19.332566168009205, "grad_norm": 1.4804987907409668, "learning_rate": 0.001613348676639816, "loss": 0.752, "step": 67200 }, { "epoch": 19.335443037974684, "grad_norm": 1.0433851480484009, "learning_rate": 0.0016132911392405063, "loss": 0.5775, "step": 67210 }, { "epoch": 19.33831990794016, "grad_norm": 2.550670862197876, "learning_rate": 0.0016132336018411968, "loss": 0.9033, "step": 67220 }, { "epoch": 19.34119677790564, "grad_norm": 1.160057783126831, "learning_rate": 0.0016131760644418874, "loss": 0.6201, "step": 67230 }, { "epoch": 19.344073647871117, "grad_norm": 1.8895353078842163, "learning_rate": 0.0016131185270425777, "loss": 0.7658, "step": 67240 }, { "epoch": 19.346950517836593, "grad_norm": 1.3013668060302734, "learning_rate": 0.001613060989643268, "loss": 0.7823, "step": 67250 }, { "epoch": 19.349827387802073, "grad_norm": 1.7086586952209473, "learning_rate": 0.0016130034522439584, "loss": 0.6437, "step": 67260 }, { "epoch": 19.35270425776755, "grad_norm": 0.8863638043403625, "learning_rate": 0.001612945914844649, "loss": 0.6067, "step": 67270 }, { "epoch": 19.355581127733025, "grad_norm": 0.940359354019165, "learning_rate": 0.0016128883774453396, "loss": 0.5919, "step": 67280 }, { "epoch": 19.358457997698505, "grad_norm": 0.9673874378204346, "learning_rate": 0.00161283084004603, "loss": 0.7754, "step": 67290 }, { "epoch": 19.36133486766398, "grad_norm": 1.1847723722457886, "learning_rate": 0.0016127733026467205, "loss": 0.6184, "step": 67300 }, { "epoch": 19.36421173762946, "grad_norm": 2.0808918476104736, "learning_rate": 0.0016127157652474108, "loss": 0.7351, "step": 67310 }, { "epoch": 19.367088607594937, "grad_norm": 1.0384278297424316, "learning_rate": 0.0016126582278481012, "loss": 0.7315, "step": 67320 }, { "epoch": 19.369965477560413, "grad_norm": 1.137037754058838, "learning_rate": 0.0016126006904487917, "loss": 0.6766, "step": 67330 }, { "epoch": 19.372842347525893, "grad_norm": 1.9643664360046387, "learning_rate": 0.0016125431530494823, "loss": 0.753, "step": 67340 }, { "epoch": 19.37571921749137, "grad_norm": 1.494973063468933, "learning_rate": 0.0016124856156501726, "loss": 0.6642, "step": 67350 }, { "epoch": 19.378596087456845, "grad_norm": 1.6477329730987549, "learning_rate": 0.0016124280782508632, "loss": 0.8046, "step": 67360 }, { "epoch": 19.381472957422325, "grad_norm": 1.467452883720398, "learning_rate": 0.0016123705408515533, "loss": 0.6439, "step": 67370 }, { "epoch": 19.3843498273878, "grad_norm": 1.6162488460540771, "learning_rate": 0.001612313003452244, "loss": 0.6601, "step": 67380 }, { "epoch": 19.38722669735328, "grad_norm": 1.4771156311035156, "learning_rate": 0.0016122554660529345, "loss": 0.8591, "step": 67390 }, { "epoch": 19.390103567318757, "grad_norm": 0.9845173954963684, "learning_rate": 0.0016121979286536248, "loss": 0.5389, "step": 67400 }, { "epoch": 19.392980437284233, "grad_norm": 1.6886370182037354, "learning_rate": 0.0016121403912543154, "loss": 0.687, "step": 67410 }, { "epoch": 19.395857307249713, "grad_norm": 0.5943196415901184, "learning_rate": 0.001612082853855006, "loss": 0.5677, "step": 67420 }, { "epoch": 19.39873417721519, "grad_norm": 1.3372801542282104, "learning_rate": 0.001612025316455696, "loss": 0.6757, "step": 67430 }, { "epoch": 19.40161104718067, "grad_norm": 0.8295056223869324, "learning_rate": 0.0016119677790563866, "loss": 0.6009, "step": 67440 }, { "epoch": 19.404487917146145, "grad_norm": 0.9899870157241821, "learning_rate": 0.0016119102416570772, "loss": 0.6455, "step": 67450 }, { "epoch": 19.40736478711162, "grad_norm": 1.5016632080078125, "learning_rate": 0.0016118527042577675, "loss": 0.9031, "step": 67460 }, { "epoch": 19.4102416570771, "grad_norm": 1.1576441526412964, "learning_rate": 0.001611795166858458, "loss": 0.743, "step": 67470 }, { "epoch": 19.413118527042577, "grad_norm": 1.1046079397201538, "learning_rate": 0.0016117376294591487, "loss": 0.7456, "step": 67480 }, { "epoch": 19.415995397008054, "grad_norm": 1.3382155895233154, "learning_rate": 0.0016116800920598388, "loss": 0.6261, "step": 67490 }, { "epoch": 19.418872266973533, "grad_norm": 1.795183777809143, "learning_rate": 0.0016116225546605294, "loss": 0.8555, "step": 67500 }, { "epoch": 19.42174913693901, "grad_norm": 1.15218186378479, "learning_rate": 0.0016115650172612197, "loss": 0.5867, "step": 67510 }, { "epoch": 19.42462600690449, "grad_norm": 1.525913953781128, "learning_rate": 0.0016115074798619103, "loss": 0.7096, "step": 67520 }, { "epoch": 19.427502876869966, "grad_norm": 0.9342212677001953, "learning_rate": 0.0016114499424626008, "loss": 0.9251, "step": 67530 }, { "epoch": 19.430379746835442, "grad_norm": 2.0540108680725098, "learning_rate": 0.0016113924050632912, "loss": 0.7409, "step": 67540 }, { "epoch": 19.43325661680092, "grad_norm": 1.0781275033950806, "learning_rate": 0.0016113348676639815, "loss": 0.7546, "step": 67550 }, { "epoch": 19.436133486766398, "grad_norm": 2.4065957069396973, "learning_rate": 0.001611277330264672, "loss": 0.6209, "step": 67560 }, { "epoch": 19.439010356731877, "grad_norm": 1.3410273790359497, "learning_rate": 0.0016112197928653624, "loss": 0.7428, "step": 67570 }, { "epoch": 19.441887226697354, "grad_norm": 2.773444890975952, "learning_rate": 0.001611162255466053, "loss": 0.7494, "step": 67580 }, { "epoch": 19.44476409666283, "grad_norm": 1.1249275207519531, "learning_rate": 0.0016111047180667436, "loss": 0.7243, "step": 67590 }, { "epoch": 19.44764096662831, "grad_norm": 0.9694820046424866, "learning_rate": 0.001611047180667434, "loss": 0.6569, "step": 67600 }, { "epoch": 19.450517836593786, "grad_norm": 1.1360141038894653, "learning_rate": 0.0016109896432681243, "loss": 0.7212, "step": 67610 }, { "epoch": 19.453394706559262, "grad_norm": 1.5810412168502808, "learning_rate": 0.0016109321058688146, "loss": 0.6548, "step": 67620 }, { "epoch": 19.456271576524742, "grad_norm": 1.902716875076294, "learning_rate": 0.0016108745684695052, "loss": 0.9034, "step": 67630 }, { "epoch": 19.459148446490218, "grad_norm": 1.3657169342041016, "learning_rate": 0.0016108170310701957, "loss": 0.6904, "step": 67640 }, { "epoch": 19.462025316455698, "grad_norm": 0.8255701661109924, "learning_rate": 0.001610759493670886, "loss": 0.7926, "step": 67650 }, { "epoch": 19.464902186421174, "grad_norm": 1.5453499555587769, "learning_rate": 0.0016107019562715767, "loss": 0.6838, "step": 67660 }, { "epoch": 19.46777905638665, "grad_norm": 0.903215765953064, "learning_rate": 0.001610644418872267, "loss": 0.7491, "step": 67670 }, { "epoch": 19.47065592635213, "grad_norm": 1.1418970823287964, "learning_rate": 0.0016105868814729573, "loss": 0.5486, "step": 67680 }, { "epoch": 19.473532796317606, "grad_norm": 0.9215083122253418, "learning_rate": 0.001610529344073648, "loss": 0.8318, "step": 67690 }, { "epoch": 19.476409666283082, "grad_norm": 0.9996213316917419, "learning_rate": 0.0016104718066743385, "loss": 0.6421, "step": 67700 }, { "epoch": 19.479286536248562, "grad_norm": 1.3403997421264648, "learning_rate": 0.0016104142692750288, "loss": 0.6264, "step": 67710 }, { "epoch": 19.48216340621404, "grad_norm": 1.0758094787597656, "learning_rate": 0.0016103567318757194, "loss": 0.6308, "step": 67720 }, { "epoch": 19.485040276179518, "grad_norm": 1.1395456790924072, "learning_rate": 0.0016102991944764095, "loss": 0.7192, "step": 67730 }, { "epoch": 19.487917146144994, "grad_norm": 0.8381749391555786, "learning_rate": 0.0016102416570771, "loss": 0.6572, "step": 67740 }, { "epoch": 19.49079401611047, "grad_norm": 1.2887576818466187, "learning_rate": 0.0016101841196777906, "loss": 0.8947, "step": 67750 }, { "epoch": 19.49367088607595, "grad_norm": 0.8139247298240662, "learning_rate": 0.001610126582278481, "loss": 0.6583, "step": 67760 }, { "epoch": 19.496547756041426, "grad_norm": 1.9136885404586792, "learning_rate": 0.0016100690448791716, "loss": 0.609, "step": 67770 }, { "epoch": 19.499424626006906, "grad_norm": 1.2947731018066406, "learning_rate": 0.0016100115074798621, "loss": 0.7825, "step": 67780 }, { "epoch": 19.502301495972382, "grad_norm": 1.0898659229278564, "learning_rate": 0.0016099539700805523, "loss": 0.6961, "step": 67790 }, { "epoch": 19.50517836593786, "grad_norm": 1.8362327814102173, "learning_rate": 0.0016098964326812428, "loss": 0.6712, "step": 67800 }, { "epoch": 19.50805523590334, "grad_norm": 0.9327962398529053, "learning_rate": 0.0016098388952819334, "loss": 0.7227, "step": 67810 }, { "epoch": 19.510932105868815, "grad_norm": 0.8130226135253906, "learning_rate": 0.0016097813578826237, "loss": 0.6672, "step": 67820 }, { "epoch": 19.51380897583429, "grad_norm": 0.9271392226219177, "learning_rate": 0.0016097238204833143, "loss": 0.6323, "step": 67830 }, { "epoch": 19.51668584579977, "grad_norm": 1.6171568632125854, "learning_rate": 0.0016096662830840046, "loss": 0.9237, "step": 67840 }, { "epoch": 19.519562715765247, "grad_norm": 0.762850821018219, "learning_rate": 0.001609608745684695, "loss": 0.6605, "step": 67850 }, { "epoch": 19.522439585730726, "grad_norm": 1.5164544582366943, "learning_rate": 0.0016095512082853855, "loss": 0.7097, "step": 67860 }, { "epoch": 19.525316455696203, "grad_norm": 1.1002510786056519, "learning_rate": 0.001609493670886076, "loss": 0.7565, "step": 67870 }, { "epoch": 19.52819332566168, "grad_norm": 1.2034509181976318, "learning_rate": 0.0016094361334867665, "loss": 0.7313, "step": 67880 }, { "epoch": 19.53107019562716, "grad_norm": 1.5244718790054321, "learning_rate": 0.001609378596087457, "loss": 0.6781, "step": 67890 }, { "epoch": 19.533947065592635, "grad_norm": 1.936995267868042, "learning_rate": 0.0016093210586881474, "loss": 0.5975, "step": 67900 }, { "epoch": 19.53682393555811, "grad_norm": 1.0464138984680176, "learning_rate": 0.0016092635212888377, "loss": 0.7424, "step": 67910 }, { "epoch": 19.53970080552359, "grad_norm": 0.9028892517089844, "learning_rate": 0.0016092059838895283, "loss": 0.6653, "step": 67920 }, { "epoch": 19.542577675489067, "grad_norm": 0.9208365082740784, "learning_rate": 0.0016091484464902186, "loss": 0.7972, "step": 67930 }, { "epoch": 19.545454545454547, "grad_norm": 2.06657075881958, "learning_rate": 0.0016090909090909092, "loss": 0.877, "step": 67940 }, { "epoch": 19.548331415420023, "grad_norm": 1.7261476516723633, "learning_rate": 0.0016090333716915995, "loss": 0.8839, "step": 67950 }, { "epoch": 19.5512082853855, "grad_norm": 1.6488398313522339, "learning_rate": 0.00160897583429229, "loss": 0.6573, "step": 67960 }, { "epoch": 19.55408515535098, "grad_norm": 0.6147963404655457, "learning_rate": 0.0016089182968929804, "loss": 0.8083, "step": 67970 }, { "epoch": 19.556962025316455, "grad_norm": 2.3618524074554443, "learning_rate": 0.0016088607594936708, "loss": 0.7569, "step": 67980 }, { "epoch": 19.559838895281935, "grad_norm": 0.7732771635055542, "learning_rate": 0.0016088032220943614, "loss": 0.8132, "step": 67990 }, { "epoch": 19.56271576524741, "grad_norm": 1.3237413167953491, "learning_rate": 0.001608745684695052, "loss": 0.8396, "step": 68000 }, { "epoch": 19.565592635212887, "grad_norm": 1.6422621011734009, "learning_rate": 0.0016086881472957423, "loss": 0.7561, "step": 68010 }, { "epoch": 19.568469505178367, "grad_norm": 1.1702618598937988, "learning_rate": 0.0016086306098964328, "loss": 0.6517, "step": 68020 }, { "epoch": 19.571346375143843, "grad_norm": 1.3251577615737915, "learning_rate": 0.0016085730724971232, "loss": 0.822, "step": 68030 }, { "epoch": 19.57422324510932, "grad_norm": 1.0085299015045166, "learning_rate": 0.0016085155350978135, "loss": 0.6251, "step": 68040 }, { "epoch": 19.5771001150748, "grad_norm": 0.5134657621383667, "learning_rate": 0.001608457997698504, "loss": 0.7178, "step": 68050 }, { "epoch": 19.579976985040275, "grad_norm": 0.8938401341438293, "learning_rate": 0.0016084004602991944, "loss": 0.778, "step": 68060 }, { "epoch": 19.582853855005755, "grad_norm": 0.9399549961090088, "learning_rate": 0.001608342922899885, "loss": 0.7501, "step": 68070 }, { "epoch": 19.58573072497123, "grad_norm": 0.7468529343605042, "learning_rate": 0.0016082853855005754, "loss": 0.7256, "step": 68080 }, { "epoch": 19.588607594936708, "grad_norm": 0.9131289720535278, "learning_rate": 0.0016082278481012657, "loss": 0.8794, "step": 68090 }, { "epoch": 19.591484464902187, "grad_norm": 1.1094993352890015, "learning_rate": 0.0016081703107019563, "loss": 0.6344, "step": 68100 }, { "epoch": 19.594361334867664, "grad_norm": 1.778163194656372, "learning_rate": 0.0016081127733026468, "loss": 0.7296, "step": 68110 }, { "epoch": 19.59723820483314, "grad_norm": 1.1338696479797363, "learning_rate": 0.0016080552359033372, "loss": 0.664, "step": 68120 }, { "epoch": 19.60011507479862, "grad_norm": 1.0063623189926147, "learning_rate": 0.0016079976985040277, "loss": 0.6351, "step": 68130 }, { "epoch": 19.602991944764096, "grad_norm": 1.1995973587036133, "learning_rate": 0.001607940161104718, "loss": 0.8609, "step": 68140 }, { "epoch": 19.605868814729575, "grad_norm": 1.121773600578308, "learning_rate": 0.0016078826237054084, "loss": 0.9732, "step": 68150 }, { "epoch": 19.60874568469505, "grad_norm": 1.3935506343841553, "learning_rate": 0.001607825086306099, "loss": 0.7852, "step": 68160 }, { "epoch": 19.611622554660528, "grad_norm": 1.2790603637695312, "learning_rate": 0.0016077675489067896, "loss": 0.7078, "step": 68170 }, { "epoch": 19.614499424626008, "grad_norm": 0.8536315560340881, "learning_rate": 0.00160771001150748, "loss": 0.8853, "step": 68180 }, { "epoch": 19.617376294591484, "grad_norm": 1.797666311264038, "learning_rate": 0.0016076524741081705, "loss": 0.6254, "step": 68190 }, { "epoch": 19.620253164556964, "grad_norm": 1.2623569965362549, "learning_rate": 0.0016075949367088606, "loss": 0.8114, "step": 68200 }, { "epoch": 19.62313003452244, "grad_norm": 0.9758418798446655, "learning_rate": 0.0016075373993095512, "loss": 0.8021, "step": 68210 }, { "epoch": 19.626006904487916, "grad_norm": 1.9329493045806885, "learning_rate": 0.0016074798619102417, "loss": 0.8074, "step": 68220 }, { "epoch": 19.628883774453396, "grad_norm": 2.498106002807617, "learning_rate": 0.001607422324510932, "loss": 0.7601, "step": 68230 }, { "epoch": 19.631760644418872, "grad_norm": 1.5181106328964233, "learning_rate": 0.0016073647871116226, "loss": 0.735, "step": 68240 }, { "epoch": 19.634637514384348, "grad_norm": 0.9637396335601807, "learning_rate": 0.0016073072497123132, "loss": 0.629, "step": 68250 }, { "epoch": 19.637514384349828, "grad_norm": 0.9344921708106995, "learning_rate": 0.0016072497123130033, "loss": 0.6712, "step": 68260 }, { "epoch": 19.640391254315304, "grad_norm": 1.398574709892273, "learning_rate": 0.001607192174913694, "loss": 0.5971, "step": 68270 }, { "epoch": 19.643268124280784, "grad_norm": 1.5988203287124634, "learning_rate": 0.0016071346375143845, "loss": 0.771, "step": 68280 }, { "epoch": 19.64614499424626, "grad_norm": 1.0577133893966675, "learning_rate": 0.0016070771001150748, "loss": 0.7661, "step": 68290 }, { "epoch": 19.649021864211736, "grad_norm": 1.0299906730651855, "learning_rate": 0.0016070195627157654, "loss": 0.6539, "step": 68300 }, { "epoch": 19.651898734177216, "grad_norm": 1.0992019176483154, "learning_rate": 0.0016069620253164557, "loss": 0.6629, "step": 68310 }, { "epoch": 19.654775604142692, "grad_norm": 1.8333865404129028, "learning_rate": 0.001606904487917146, "loss": 0.797, "step": 68320 }, { "epoch": 19.657652474108172, "grad_norm": 2.172377109527588, "learning_rate": 0.0016068469505178366, "loss": 0.8212, "step": 68330 }, { "epoch": 19.660529344073648, "grad_norm": 1.9371740818023682, "learning_rate": 0.001606789413118527, "loss": 0.9822, "step": 68340 }, { "epoch": 19.663406214039124, "grad_norm": 2.0204432010650635, "learning_rate": 0.0016067318757192175, "loss": 0.9379, "step": 68350 }, { "epoch": 19.666283084004604, "grad_norm": 0.979749321937561, "learning_rate": 0.001606674338319908, "loss": 0.9366, "step": 68360 }, { "epoch": 19.66915995397008, "grad_norm": 2.0748305320739746, "learning_rate": 0.0016066168009205985, "loss": 0.6957, "step": 68370 }, { "epoch": 19.672036823935557, "grad_norm": 1.4544111490249634, "learning_rate": 0.0016065592635212888, "loss": 0.7812, "step": 68380 }, { "epoch": 19.674913693901036, "grad_norm": 0.7015653848648071, "learning_rate": 0.0016065017261219794, "loss": 0.6943, "step": 68390 }, { "epoch": 19.677790563866512, "grad_norm": 1.6614683866500854, "learning_rate": 0.0016064441887226697, "loss": 0.8161, "step": 68400 }, { "epoch": 19.680667433831992, "grad_norm": 2.3003556728363037, "learning_rate": 0.0016063866513233603, "loss": 0.7041, "step": 68410 }, { "epoch": 19.68354430379747, "grad_norm": 1.3852280378341675, "learning_rate": 0.0016063291139240506, "loss": 0.6673, "step": 68420 }, { "epoch": 19.686421173762945, "grad_norm": 1.5375068187713623, "learning_rate": 0.0016062715765247412, "loss": 0.6658, "step": 68430 }, { "epoch": 19.689298043728424, "grad_norm": 1.7727638483047485, "learning_rate": 0.0016062140391254315, "loss": 0.7669, "step": 68440 }, { "epoch": 19.6921749136939, "grad_norm": 1.6072731018066406, "learning_rate": 0.0016061565017261219, "loss": 0.6842, "step": 68450 }, { "epoch": 19.69505178365938, "grad_norm": 1.6825302839279175, "learning_rate": 0.0016060989643268124, "loss": 0.6014, "step": 68460 }, { "epoch": 19.697928653624857, "grad_norm": 1.6402595043182373, "learning_rate": 0.001606041426927503, "loss": 0.9139, "step": 68470 }, { "epoch": 19.700805523590333, "grad_norm": 1.4076863527297974, "learning_rate": 0.0016059838895281934, "loss": 0.6868, "step": 68480 }, { "epoch": 19.703682393555813, "grad_norm": 1.0824167728424072, "learning_rate": 0.001605926352128884, "loss": 0.7715, "step": 68490 }, { "epoch": 19.70655926352129, "grad_norm": 1.2530145645141602, "learning_rate": 0.0016058688147295743, "loss": 0.777, "step": 68500 }, { "epoch": 19.709436133486765, "grad_norm": 1.5471270084381104, "learning_rate": 0.0016058112773302646, "loss": 0.7284, "step": 68510 }, { "epoch": 19.712313003452245, "grad_norm": 2.026583671569824, "learning_rate": 0.0016057537399309552, "loss": 0.8148, "step": 68520 }, { "epoch": 19.71518987341772, "grad_norm": 1.3079413175582886, "learning_rate": 0.0016056962025316455, "loss": 0.6566, "step": 68530 }, { "epoch": 19.7180667433832, "grad_norm": 1.3165905475616455, "learning_rate": 0.001605638665132336, "loss": 0.58, "step": 68540 }, { "epoch": 19.720943613348677, "grad_norm": 0.8529500961303711, "learning_rate": 0.0016055811277330267, "loss": 0.7316, "step": 68550 }, { "epoch": 19.723820483314153, "grad_norm": 1.5833125114440918, "learning_rate": 0.0016055235903337168, "loss": 0.7714, "step": 68560 }, { "epoch": 19.726697353279633, "grad_norm": 0.8486599326133728, "learning_rate": 0.0016054660529344073, "loss": 0.7537, "step": 68570 }, { "epoch": 19.72957422324511, "grad_norm": 1.0482542514801025, "learning_rate": 0.001605408515535098, "loss": 0.7146, "step": 68580 }, { "epoch": 19.732451093210585, "grad_norm": 1.6282374858856201, "learning_rate": 0.0016053509781357883, "loss": 0.8815, "step": 68590 }, { "epoch": 19.735327963176065, "grad_norm": 0.9838525652885437, "learning_rate": 0.0016052934407364788, "loss": 0.7463, "step": 68600 }, { "epoch": 19.73820483314154, "grad_norm": 0.8816672563552856, "learning_rate": 0.0016052359033371694, "loss": 0.6975, "step": 68610 }, { "epoch": 19.74108170310702, "grad_norm": 1.8369605541229248, "learning_rate": 0.0016051783659378595, "loss": 0.7319, "step": 68620 }, { "epoch": 19.743958573072497, "grad_norm": 1.0761734247207642, "learning_rate": 0.00160512082853855, "loss": 0.673, "step": 68630 }, { "epoch": 19.746835443037973, "grad_norm": 1.3411444425582886, "learning_rate": 0.0016050632911392404, "loss": 0.7789, "step": 68640 }, { "epoch": 19.749712313003453, "grad_norm": 1.6028566360473633, "learning_rate": 0.001605005753739931, "loss": 0.7066, "step": 68650 }, { "epoch": 19.75258918296893, "grad_norm": 1.4246710538864136, "learning_rate": 0.0016049482163406216, "loss": 0.7491, "step": 68660 }, { "epoch": 19.75546605293441, "grad_norm": 0.8484922051429749, "learning_rate": 0.001604890678941312, "loss": 0.7075, "step": 68670 }, { "epoch": 19.758342922899885, "grad_norm": 1.1173782348632812, "learning_rate": 0.0016048331415420022, "loss": 0.7255, "step": 68680 }, { "epoch": 19.76121979286536, "grad_norm": 1.8295707702636719, "learning_rate": 0.0016047756041426928, "loss": 0.6977, "step": 68690 }, { "epoch": 19.76409666283084, "grad_norm": 0.8645023703575134, "learning_rate": 0.0016047180667433832, "loss": 0.6603, "step": 68700 }, { "epoch": 19.766973532796317, "grad_norm": 1.2843488454818726, "learning_rate": 0.0016046605293440737, "loss": 0.7177, "step": 68710 }, { "epoch": 19.769850402761794, "grad_norm": 0.8319230079650879, "learning_rate": 0.0016046029919447643, "loss": 0.7541, "step": 68720 }, { "epoch": 19.772727272727273, "grad_norm": 1.2534589767456055, "learning_rate": 0.0016045454545454546, "loss": 0.7443, "step": 68730 }, { "epoch": 19.77560414269275, "grad_norm": 1.3013901710510254, "learning_rate": 0.001604487917146145, "loss": 0.6177, "step": 68740 }, { "epoch": 19.77848101265823, "grad_norm": 1.3453342914581299, "learning_rate": 0.0016044303797468353, "loss": 0.7586, "step": 68750 }, { "epoch": 19.781357882623706, "grad_norm": 0.8988157510757446, "learning_rate": 0.001604372842347526, "loss": 0.6083, "step": 68760 }, { "epoch": 19.78423475258918, "grad_norm": 2.703766107559204, "learning_rate": 0.0016043153049482165, "loss": 0.8371, "step": 68770 }, { "epoch": 19.78711162255466, "grad_norm": 1.1336966753005981, "learning_rate": 0.0016042577675489068, "loss": 0.6332, "step": 68780 }, { "epoch": 19.789988492520138, "grad_norm": 3.622889280319214, "learning_rate": 0.0016042002301495974, "loss": 1.1136, "step": 68790 }, { "epoch": 19.792865362485614, "grad_norm": 1.2879505157470703, "learning_rate": 0.0016041426927502877, "loss": 0.9547, "step": 68800 }, { "epoch": 19.795742232451094, "grad_norm": 1.4081404209136963, "learning_rate": 0.001604085155350978, "loss": 0.5608, "step": 68810 }, { "epoch": 19.79861910241657, "grad_norm": 1.0303930044174194, "learning_rate": 0.0016040276179516686, "loss": 0.7644, "step": 68820 }, { "epoch": 19.80149597238205, "grad_norm": 1.877928614616394, "learning_rate": 0.0016039700805523592, "loss": 0.7456, "step": 68830 }, { "epoch": 19.804372842347526, "grad_norm": 1.5726298093795776, "learning_rate": 0.0016039125431530495, "loss": 0.713, "step": 68840 }, { "epoch": 19.807249712313002, "grad_norm": 0.7550081610679626, "learning_rate": 0.00160385500575374, "loss": 0.5443, "step": 68850 }, { "epoch": 19.810126582278482, "grad_norm": 1.502548336982727, "learning_rate": 0.0016037974683544304, "loss": 0.8374, "step": 68860 }, { "epoch": 19.813003452243958, "grad_norm": 1.116883397102356, "learning_rate": 0.0016037399309551208, "loss": 0.8049, "step": 68870 }, { "epoch": 19.815880322209438, "grad_norm": 2.2657382488250732, "learning_rate": 0.0016036823935558114, "loss": 0.8498, "step": 68880 }, { "epoch": 19.818757192174914, "grad_norm": 1.2202067375183105, "learning_rate": 0.0016036248561565017, "loss": 1.065, "step": 68890 }, { "epoch": 19.82163406214039, "grad_norm": 1.1665500402450562, "learning_rate": 0.0016035673187571923, "loss": 0.685, "step": 68900 }, { "epoch": 19.82451093210587, "grad_norm": 0.8845846652984619, "learning_rate": 0.0016035097813578826, "loss": 0.6818, "step": 68910 }, { "epoch": 19.827387802071346, "grad_norm": 0.9646705389022827, "learning_rate": 0.001603452243958573, "loss": 0.7488, "step": 68920 }, { "epoch": 19.830264672036822, "grad_norm": 1.3366776704788208, "learning_rate": 0.0016033947065592635, "loss": 0.8298, "step": 68930 }, { "epoch": 19.833141542002302, "grad_norm": 1.852632999420166, "learning_rate": 0.001603337169159954, "loss": 0.813, "step": 68940 }, { "epoch": 19.83601841196778, "grad_norm": 2.074925422668457, "learning_rate": 0.0016032796317606444, "loss": 0.6275, "step": 68950 }, { "epoch": 19.838895281933258, "grad_norm": 0.8529734015464783, "learning_rate": 0.001603222094361335, "loss": 0.7853, "step": 68960 }, { "epoch": 19.841772151898734, "grad_norm": 1.1328643560409546, "learning_rate": 0.0016031645569620253, "loss": 0.7356, "step": 68970 }, { "epoch": 19.84464902186421, "grad_norm": 1.0618454217910767, "learning_rate": 0.0016031070195627157, "loss": 0.8529, "step": 68980 }, { "epoch": 19.84752589182969, "grad_norm": 1.3554611206054688, "learning_rate": 0.0016030494821634063, "loss": 0.7277, "step": 68990 }, { "epoch": 19.850402761795166, "grad_norm": 2.342937469482422, "learning_rate": 0.0016029919447640966, "loss": 1.1119, "step": 69000 }, { "epoch": 19.853279631760643, "grad_norm": 1.0585098266601562, "learning_rate": 0.0016029344073647872, "loss": 0.6421, "step": 69010 }, { "epoch": 19.856156501726122, "grad_norm": 0.9894700050354004, "learning_rate": 0.0016028768699654777, "loss": 0.7616, "step": 69020 }, { "epoch": 19.8590333716916, "grad_norm": 0.9336586594581604, "learning_rate": 0.0016028193325661679, "loss": 0.7538, "step": 69030 }, { "epoch": 19.86191024165708, "grad_norm": 1.7963465452194214, "learning_rate": 0.0016027617951668584, "loss": 0.7179, "step": 69040 }, { "epoch": 19.864787111622555, "grad_norm": 1.6985132694244385, "learning_rate": 0.001602704257767549, "loss": 0.7861, "step": 69050 }, { "epoch": 19.86766398158803, "grad_norm": 1.2751802206039429, "learning_rate": 0.0016026467203682393, "loss": 0.763, "step": 69060 }, { "epoch": 19.87054085155351, "grad_norm": 0.9598271250724792, "learning_rate": 0.00160258918296893, "loss": 0.6776, "step": 69070 }, { "epoch": 19.873417721518987, "grad_norm": 1.9344117641448975, "learning_rate": 0.0016025316455696205, "loss": 0.8425, "step": 69080 }, { "epoch": 19.876294591484466, "grad_norm": 1.4296993017196655, "learning_rate": 0.0016024741081703106, "loss": 0.7402, "step": 69090 }, { "epoch": 19.879171461449943, "grad_norm": 1.6239778995513916, "learning_rate": 0.0016024165707710012, "loss": 0.7688, "step": 69100 }, { "epoch": 19.88204833141542, "grad_norm": 1.1711769104003906, "learning_rate": 0.0016023590333716915, "loss": 0.8124, "step": 69110 }, { "epoch": 19.8849252013809, "grad_norm": 1.523592472076416, "learning_rate": 0.001602301495972382, "loss": 0.6868, "step": 69120 }, { "epoch": 19.887802071346375, "grad_norm": 1.9861901998519897, "learning_rate": 0.0016022439585730726, "loss": 0.8013, "step": 69130 }, { "epoch": 19.89067894131185, "grad_norm": 0.898524820804596, "learning_rate": 0.001602186421173763, "loss": 0.6347, "step": 69140 }, { "epoch": 19.89355581127733, "grad_norm": 1.9984869956970215, "learning_rate": 0.0016021288837744533, "loss": 0.9423, "step": 69150 }, { "epoch": 19.896432681242807, "grad_norm": 2.811690330505371, "learning_rate": 0.001602071346375144, "loss": 0.7903, "step": 69160 }, { "epoch": 19.899309551208287, "grad_norm": 1.191256046295166, "learning_rate": 0.0016020138089758342, "loss": 0.7655, "step": 69170 }, { "epoch": 19.902186421173763, "grad_norm": 0.9828771352767944, "learning_rate": 0.0016019562715765248, "loss": 0.7196, "step": 69180 }, { "epoch": 19.90506329113924, "grad_norm": 2.0170562267303467, "learning_rate": 0.0016018987341772154, "loss": 0.677, "step": 69190 }, { "epoch": 19.90794016110472, "grad_norm": 1.2461789846420288, "learning_rate": 0.0016018411967779057, "loss": 0.7052, "step": 69200 }, { "epoch": 19.910817031070195, "grad_norm": 1.3648561239242554, "learning_rate": 0.001601783659378596, "loss": 0.6411, "step": 69210 }, { "epoch": 19.913693901035675, "grad_norm": 1.7791433334350586, "learning_rate": 0.0016017261219792864, "loss": 0.7645, "step": 69220 }, { "epoch": 19.91657077100115, "grad_norm": 1.469735026359558, "learning_rate": 0.001601668584579977, "loss": 0.6921, "step": 69230 }, { "epoch": 19.919447640966627, "grad_norm": 0.9148920774459839, "learning_rate": 0.0016016110471806675, "loss": 0.8344, "step": 69240 }, { "epoch": 19.922324510932107, "grad_norm": 2.6179776191711426, "learning_rate": 0.0016015535097813579, "loss": 0.9558, "step": 69250 }, { "epoch": 19.925201380897583, "grad_norm": 1.449454665184021, "learning_rate": 0.0016014959723820485, "loss": 0.6478, "step": 69260 }, { "epoch": 19.92807825086306, "grad_norm": 0.7594888806343079, "learning_rate": 0.0016014384349827388, "loss": 0.6452, "step": 69270 }, { "epoch": 19.93095512082854, "grad_norm": 1.1331298351287842, "learning_rate": 0.0016013808975834291, "loss": 0.9862, "step": 69280 }, { "epoch": 19.933831990794015, "grad_norm": 1.0573780536651611, "learning_rate": 0.0016013233601841197, "loss": 0.7214, "step": 69290 }, { "epoch": 19.936708860759495, "grad_norm": 1.1361690759658813, "learning_rate": 0.0016012658227848103, "loss": 0.7482, "step": 69300 }, { "epoch": 19.93958573072497, "grad_norm": 1.4241127967834473, "learning_rate": 0.0016012082853855006, "loss": 0.9085, "step": 69310 }, { "epoch": 19.942462600690448, "grad_norm": 1.3092525005340576, "learning_rate": 0.0016011507479861912, "loss": 0.6481, "step": 69320 }, { "epoch": 19.945339470655927, "grad_norm": 0.9203005433082581, "learning_rate": 0.0016010932105868813, "loss": 0.6865, "step": 69330 }, { "epoch": 19.948216340621403, "grad_norm": 1.5134143829345703, "learning_rate": 0.0016010356731875719, "loss": 0.8166, "step": 69340 }, { "epoch": 19.951093210586883, "grad_norm": 1.988233208656311, "learning_rate": 0.0016009781357882624, "loss": 0.8236, "step": 69350 }, { "epoch": 19.95397008055236, "grad_norm": 1.6225672960281372, "learning_rate": 0.0016009205983889528, "loss": 0.7685, "step": 69360 }, { "epoch": 19.956846950517836, "grad_norm": 1.2837307453155518, "learning_rate": 0.0016008630609896434, "loss": 0.8676, "step": 69370 }, { "epoch": 19.959723820483315, "grad_norm": 1.6388914585113525, "learning_rate": 0.001600805523590334, "loss": 0.8411, "step": 69380 }, { "epoch": 19.96260069044879, "grad_norm": 1.72356116771698, "learning_rate": 0.001600747986191024, "loss": 0.6946, "step": 69390 }, { "epoch": 19.965477560414268, "grad_norm": 2.609605312347412, "learning_rate": 0.0016006904487917146, "loss": 0.752, "step": 69400 }, { "epoch": 19.968354430379748, "grad_norm": 1.6714491844177246, "learning_rate": 0.0016006329113924052, "loss": 0.9107, "step": 69410 }, { "epoch": 19.971231300345224, "grad_norm": 2.1736979484558105, "learning_rate": 0.0016005753739930955, "loss": 0.7467, "step": 69420 }, { "epoch": 19.974108170310704, "grad_norm": 2.0022528171539307, "learning_rate": 0.001600517836593786, "loss": 0.8184, "step": 69430 }, { "epoch": 19.97698504027618, "grad_norm": 1.16141676902771, "learning_rate": 0.0016004602991944764, "loss": 0.6721, "step": 69440 }, { "epoch": 19.979861910241656, "grad_norm": 0.8800134658813477, "learning_rate": 0.0016004027617951668, "loss": 0.5582, "step": 69450 }, { "epoch": 19.982738780207136, "grad_norm": 1.608880877494812, "learning_rate": 0.0016003452243958573, "loss": 0.6896, "step": 69460 }, { "epoch": 19.985615650172612, "grad_norm": 1.3593792915344238, "learning_rate": 0.0016002876869965477, "loss": 0.6306, "step": 69470 }, { "epoch": 19.988492520138088, "grad_norm": 1.2928316593170166, "learning_rate": 0.0016002301495972383, "loss": 0.6843, "step": 69480 }, { "epoch": 19.991369390103568, "grad_norm": 0.9125980734825134, "learning_rate": 0.0016001726121979288, "loss": 0.7256, "step": 69490 }, { "epoch": 19.994246260069044, "grad_norm": 1.9735318422317505, "learning_rate": 0.0016001150747986192, "loss": 0.6049, "step": 69500 }, { "epoch": 19.997123130034524, "grad_norm": 1.182708978652954, "learning_rate": 0.0016000575373993095, "loss": 0.7556, "step": 69510 }, { "epoch": 20.0, "grad_norm": 1.3355590105056763, "learning_rate": 0.0016, "loss": 0.552, "step": 69520 }, { "epoch": 20.002876869965476, "grad_norm": 2.0497021675109863, "learning_rate": 0.0015999424626006904, "loss": 0.6981, "step": 69530 }, { "epoch": 20.005753739930956, "grad_norm": 1.6975727081298828, "learning_rate": 0.001599884925201381, "loss": 0.5894, "step": 69540 }, { "epoch": 20.008630609896432, "grad_norm": 1.192983627319336, "learning_rate": 0.0015998273878020716, "loss": 0.7581, "step": 69550 }, { "epoch": 20.011507479861912, "grad_norm": 0.8886436223983765, "learning_rate": 0.001599769850402762, "loss": 0.707, "step": 69560 }, { "epoch": 20.014384349827388, "grad_norm": 1.1425716876983643, "learning_rate": 0.0015997123130034522, "loss": 0.8564, "step": 69570 }, { "epoch": 20.017261219792864, "grad_norm": 1.207030177116394, "learning_rate": 0.0015996547756041426, "loss": 0.6179, "step": 69580 }, { "epoch": 20.020138089758344, "grad_norm": 1.4482930898666382, "learning_rate": 0.0015995972382048332, "loss": 0.6667, "step": 69590 }, { "epoch": 20.02301495972382, "grad_norm": 0.8675859570503235, "learning_rate": 0.0015995397008055237, "loss": 0.6548, "step": 69600 }, { "epoch": 20.025891829689296, "grad_norm": 1.1801470518112183, "learning_rate": 0.001599482163406214, "loss": 0.8001, "step": 69610 }, { "epoch": 20.028768699654776, "grad_norm": 1.939829707145691, "learning_rate": 0.0015994246260069046, "loss": 0.8379, "step": 69620 }, { "epoch": 20.031645569620252, "grad_norm": 1.2425885200500488, "learning_rate": 0.001599367088607595, "loss": 0.5747, "step": 69630 }, { "epoch": 20.034522439585732, "grad_norm": 0.8724502325057983, "learning_rate": 0.0015993095512082853, "loss": 0.7836, "step": 69640 }, { "epoch": 20.03739930955121, "grad_norm": 0.6716702580451965, "learning_rate": 0.0015992520138089759, "loss": 0.8613, "step": 69650 }, { "epoch": 20.040276179516685, "grad_norm": 1.398411512374878, "learning_rate": 0.0015991944764096665, "loss": 0.6776, "step": 69660 }, { "epoch": 20.043153049482164, "grad_norm": 1.1639668941497803, "learning_rate": 0.0015991369390103568, "loss": 0.7021, "step": 69670 }, { "epoch": 20.04602991944764, "grad_norm": 1.1153135299682617, "learning_rate": 0.0015990794016110474, "loss": 0.7026, "step": 69680 }, { "epoch": 20.048906789413117, "grad_norm": 0.9577275514602661, "learning_rate": 0.0015990218642117375, "loss": 0.5877, "step": 69690 }, { "epoch": 20.051783659378597, "grad_norm": 2.014366865158081, "learning_rate": 0.001598964326812428, "loss": 0.6872, "step": 69700 }, { "epoch": 20.054660529344073, "grad_norm": 1.5453624725341797, "learning_rate": 0.0015989067894131186, "loss": 0.6713, "step": 69710 }, { "epoch": 20.057537399309552, "grad_norm": 1.2864248752593994, "learning_rate": 0.001598849252013809, "loss": 0.7329, "step": 69720 }, { "epoch": 20.06041426927503, "grad_norm": 1.5760207176208496, "learning_rate": 0.0015987917146144995, "loss": 0.7507, "step": 69730 }, { "epoch": 20.063291139240505, "grad_norm": 1.3372868299484253, "learning_rate": 0.0015987341772151899, "loss": 0.607, "step": 69740 }, { "epoch": 20.066168009205985, "grad_norm": 2.654118061065674, "learning_rate": 0.0015986766398158802, "loss": 0.6107, "step": 69750 }, { "epoch": 20.06904487917146, "grad_norm": 1.3196265697479248, "learning_rate": 0.0015986191024165708, "loss": 0.5893, "step": 69760 }, { "epoch": 20.07192174913694, "grad_norm": 1.6593291759490967, "learning_rate": 0.0015985615650172614, "loss": 0.8441, "step": 69770 }, { "epoch": 20.074798619102417, "grad_norm": 0.7533068656921387, "learning_rate": 0.0015985040276179517, "loss": 0.5588, "step": 69780 }, { "epoch": 20.077675489067893, "grad_norm": 0.7544656991958618, "learning_rate": 0.0015984464902186423, "loss": 0.6202, "step": 69790 }, { "epoch": 20.080552359033373, "grad_norm": 1.484055757522583, "learning_rate": 0.0015983889528193324, "loss": 0.744, "step": 69800 }, { "epoch": 20.08342922899885, "grad_norm": 0.9478099942207336, "learning_rate": 0.001598331415420023, "loss": 0.5947, "step": 69810 }, { "epoch": 20.086306098964325, "grad_norm": 2.030427932739258, "learning_rate": 0.0015982738780207135, "loss": 0.6195, "step": 69820 }, { "epoch": 20.089182968929805, "grad_norm": 1.0458863973617554, "learning_rate": 0.0015982163406214039, "loss": 0.7134, "step": 69830 }, { "epoch": 20.09205983889528, "grad_norm": 1.3246351480484009, "learning_rate": 0.0015981588032220944, "loss": 0.9245, "step": 69840 }, { "epoch": 20.09493670886076, "grad_norm": 1.0354012250900269, "learning_rate": 0.001598101265822785, "loss": 0.6158, "step": 69850 }, { "epoch": 20.097813578826237, "grad_norm": 0.7880470752716064, "learning_rate": 0.0015980437284234751, "loss": 0.7188, "step": 69860 }, { "epoch": 20.100690448791713, "grad_norm": 1.3690593242645264, "learning_rate": 0.0015979861910241657, "loss": 0.7593, "step": 69870 }, { "epoch": 20.103567318757193, "grad_norm": 1.0997687578201294, "learning_rate": 0.0015979286536248563, "loss": 0.6124, "step": 69880 }, { "epoch": 20.10644418872267, "grad_norm": 0.8696068525314331, "learning_rate": 0.0015978711162255466, "loss": 0.619, "step": 69890 }, { "epoch": 20.10932105868815, "grad_norm": 0.9688490033149719, "learning_rate": 0.0015978135788262372, "loss": 0.6112, "step": 69900 }, { "epoch": 20.112197928653625, "grad_norm": 1.0506865978240967, "learning_rate": 0.0015977560414269275, "loss": 0.6773, "step": 69910 }, { "epoch": 20.1150747986191, "grad_norm": 1.0812139511108398, "learning_rate": 0.0015976985040276179, "loss": 0.7104, "step": 69920 }, { "epoch": 20.11795166858458, "grad_norm": 1.230534553527832, "learning_rate": 0.0015976409666283084, "loss": 0.6544, "step": 69930 }, { "epoch": 20.120828538550057, "grad_norm": 1.1765719652175903, "learning_rate": 0.0015975834292289988, "loss": 0.7154, "step": 69940 }, { "epoch": 20.123705408515534, "grad_norm": 1.1866297721862793, "learning_rate": 0.0015975258918296893, "loss": 0.554, "step": 69950 }, { "epoch": 20.126582278481013, "grad_norm": 1.0475962162017822, "learning_rate": 0.00159746835443038, "loss": 0.7351, "step": 69960 }, { "epoch": 20.12945914844649, "grad_norm": 1.5426220893859863, "learning_rate": 0.0015974108170310702, "loss": 0.8284, "step": 69970 }, { "epoch": 20.13233601841197, "grad_norm": 1.98219895362854, "learning_rate": 0.0015973532796317606, "loss": 0.6987, "step": 69980 }, { "epoch": 20.135212888377445, "grad_norm": 1.4133068323135376, "learning_rate": 0.0015972957422324512, "loss": 0.6824, "step": 69990 }, { "epoch": 20.13808975834292, "grad_norm": 0.9682452082633972, "learning_rate": 0.0015972382048331415, "loss": 0.5342, "step": 70000 }, { "epoch": 20.1409666283084, "grad_norm": 1.011720061302185, "learning_rate": 0.001597180667433832, "loss": 0.6835, "step": 70010 }, { "epoch": 20.143843498273878, "grad_norm": 1.2343196868896484, "learning_rate": 0.0015971231300345224, "loss": 0.6261, "step": 70020 }, { "epoch": 20.146720368239354, "grad_norm": 2.11262845993042, "learning_rate": 0.001597065592635213, "loss": 0.8499, "step": 70030 }, { "epoch": 20.149597238204834, "grad_norm": 1.078088641166687, "learning_rate": 0.0015970080552359033, "loss": 0.6155, "step": 70040 }, { "epoch": 20.15247410817031, "grad_norm": 2.6665289402008057, "learning_rate": 0.0015969505178365937, "loss": 0.6237, "step": 70050 }, { "epoch": 20.15535097813579, "grad_norm": 1.621127963066101, "learning_rate": 0.0015968929804372842, "loss": 0.7435, "step": 70060 }, { "epoch": 20.158227848101266, "grad_norm": 2.1526520252227783, "learning_rate": 0.0015968354430379748, "loss": 0.6603, "step": 70070 }, { "epoch": 20.161104718066742, "grad_norm": 2.2166924476623535, "learning_rate": 0.0015967779056386652, "loss": 0.9366, "step": 70080 }, { "epoch": 20.16398158803222, "grad_norm": 1.349567174911499, "learning_rate": 0.0015967203682393557, "loss": 0.829, "step": 70090 }, { "epoch": 20.166858457997698, "grad_norm": 1.4165029525756836, "learning_rate": 0.001596662830840046, "loss": 0.8175, "step": 70100 }, { "epoch": 20.169735327963178, "grad_norm": 1.8236247301101685, "learning_rate": 0.0015966052934407364, "loss": 0.6796, "step": 70110 }, { "epoch": 20.172612197928654, "grad_norm": 1.4737954139709473, "learning_rate": 0.001596547756041427, "loss": 0.7696, "step": 70120 }, { "epoch": 20.17548906789413, "grad_norm": 2.115294933319092, "learning_rate": 0.0015964902186421175, "loss": 0.7304, "step": 70130 }, { "epoch": 20.17836593785961, "grad_norm": 1.403247594833374, "learning_rate": 0.0015964326812428079, "loss": 0.6562, "step": 70140 }, { "epoch": 20.181242807825086, "grad_norm": 2.146453857421875, "learning_rate": 0.0015963751438434984, "loss": 0.6285, "step": 70150 }, { "epoch": 20.184119677790562, "grad_norm": 1.3476601839065552, "learning_rate": 0.0015963176064441886, "loss": 0.6422, "step": 70160 }, { "epoch": 20.186996547756042, "grad_norm": 0.7435993552207947, "learning_rate": 0.0015962600690448791, "loss": 0.7515, "step": 70170 }, { "epoch": 20.189873417721518, "grad_norm": 1.0965256690979004, "learning_rate": 0.0015962025316455697, "loss": 0.7449, "step": 70180 }, { "epoch": 20.192750287686998, "grad_norm": 1.7454854249954224, "learning_rate": 0.00159614499424626, "loss": 0.8522, "step": 70190 }, { "epoch": 20.195627157652474, "grad_norm": 2.5556137561798096, "learning_rate": 0.0015960874568469506, "loss": 0.644, "step": 70200 }, { "epoch": 20.19850402761795, "grad_norm": 0.9545226097106934, "learning_rate": 0.0015960299194476412, "loss": 0.5643, "step": 70210 }, { "epoch": 20.20138089758343, "grad_norm": 0.946721613407135, "learning_rate": 0.0015959723820483313, "loss": 0.728, "step": 70220 }, { "epoch": 20.204257767548906, "grad_norm": 1.002720594406128, "learning_rate": 0.0015959148446490219, "loss": 0.6966, "step": 70230 }, { "epoch": 20.207134637514386, "grad_norm": 1.109778642654419, "learning_rate": 0.0015958573072497124, "loss": 0.6229, "step": 70240 }, { "epoch": 20.210011507479862, "grad_norm": 2.9250268936157227, "learning_rate": 0.0015957997698504028, "loss": 0.8161, "step": 70250 }, { "epoch": 20.21288837744534, "grad_norm": 1.070297122001648, "learning_rate": 0.0015957422324510934, "loss": 0.7307, "step": 70260 }, { "epoch": 20.21576524741082, "grad_norm": 0.7581899166107178, "learning_rate": 0.0015956846950517837, "loss": 0.6935, "step": 70270 }, { "epoch": 20.218642117376294, "grad_norm": 2.152833938598633, "learning_rate": 0.001595627157652474, "loss": 0.9637, "step": 70280 }, { "epoch": 20.22151898734177, "grad_norm": 1.3793169260025024, "learning_rate": 0.0015955696202531646, "loss": 0.7327, "step": 70290 }, { "epoch": 20.22439585730725, "grad_norm": 1.9358175992965698, "learning_rate": 0.001595512082853855, "loss": 0.7158, "step": 70300 }, { "epoch": 20.227272727272727, "grad_norm": 1.1470673084259033, "learning_rate": 0.0015954545454545455, "loss": 0.7545, "step": 70310 }, { "epoch": 20.230149597238206, "grad_norm": 1.3891104459762573, "learning_rate": 0.001595397008055236, "loss": 0.6475, "step": 70320 }, { "epoch": 20.233026467203683, "grad_norm": 2.433396100997925, "learning_rate": 0.0015953394706559264, "loss": 0.7937, "step": 70330 }, { "epoch": 20.23590333716916, "grad_norm": 1.3128020763397217, "learning_rate": 0.0015952819332566168, "loss": 0.7667, "step": 70340 }, { "epoch": 20.23878020713464, "grad_norm": 1.025346040725708, "learning_rate": 0.0015952243958573073, "loss": 0.7014, "step": 70350 }, { "epoch": 20.241657077100115, "grad_norm": 1.1260640621185303, "learning_rate": 0.0015951668584579977, "loss": 0.6257, "step": 70360 }, { "epoch": 20.24453394706559, "grad_norm": 1.8370091915130615, "learning_rate": 0.0015951093210586883, "loss": 0.6593, "step": 70370 }, { "epoch": 20.24741081703107, "grad_norm": 0.8484997153282166, "learning_rate": 0.0015950517836593786, "loss": 0.7103, "step": 70380 }, { "epoch": 20.250287686996547, "grad_norm": 1.4068734645843506, "learning_rate": 0.0015949942462600692, "loss": 0.7335, "step": 70390 }, { "epoch": 20.253164556962027, "grad_norm": 1.1801403760910034, "learning_rate": 0.0015949367088607595, "loss": 0.7414, "step": 70400 }, { "epoch": 20.256041426927503, "grad_norm": 2.1868813037872314, "learning_rate": 0.0015948791714614499, "loss": 0.7873, "step": 70410 }, { "epoch": 20.25891829689298, "grad_norm": 1.2132083177566528, "learning_rate": 0.0015948216340621404, "loss": 0.7089, "step": 70420 }, { "epoch": 20.26179516685846, "grad_norm": 2.0219085216522217, "learning_rate": 0.001594764096662831, "loss": 0.8493, "step": 70430 }, { "epoch": 20.264672036823935, "grad_norm": 1.1564035415649414, "learning_rate": 0.0015947065592635213, "loss": 0.9002, "step": 70440 }, { "epoch": 20.267548906789415, "grad_norm": 1.4663794040679932, "learning_rate": 0.001594649021864212, "loss": 0.791, "step": 70450 }, { "epoch": 20.27042577675489, "grad_norm": 0.9990568161010742, "learning_rate": 0.0015945914844649022, "loss": 0.6591, "step": 70460 }, { "epoch": 20.273302646720367, "grad_norm": 1.4507759809494019, "learning_rate": 0.0015945339470655926, "loss": 0.7665, "step": 70470 }, { "epoch": 20.276179516685847, "grad_norm": 1.4473435878753662, "learning_rate": 0.0015944764096662832, "loss": 0.7056, "step": 70480 }, { "epoch": 20.279056386651323, "grad_norm": 1.781071662902832, "learning_rate": 0.0015944188722669735, "loss": 0.9935, "step": 70490 }, { "epoch": 20.2819332566168, "grad_norm": 0.647778332233429, "learning_rate": 0.001594361334867664, "loss": 0.6747, "step": 70500 }, { "epoch": 20.28481012658228, "grad_norm": 0.8977344632148743, "learning_rate": 0.0015943037974683546, "loss": 0.69, "step": 70510 }, { "epoch": 20.287686996547755, "grad_norm": 2.0814051628112793, "learning_rate": 0.0015942462600690448, "loss": 0.7287, "step": 70520 }, { "epoch": 20.290563866513235, "grad_norm": 1.53999662399292, "learning_rate": 0.0015941887226697353, "loss": 0.7094, "step": 70530 }, { "epoch": 20.29344073647871, "grad_norm": 2.827669143676758, "learning_rate": 0.0015941311852704259, "loss": 0.8165, "step": 70540 }, { "epoch": 20.296317606444187, "grad_norm": 1.4987993240356445, "learning_rate": 0.0015940736478711162, "loss": 0.6697, "step": 70550 }, { "epoch": 20.299194476409667, "grad_norm": 1.6476037502288818, "learning_rate": 0.0015940161104718068, "loss": 0.5562, "step": 70560 }, { "epoch": 20.302071346375143, "grad_norm": 0.7383608222007751, "learning_rate": 0.0015939585730724971, "loss": 0.7588, "step": 70570 }, { "epoch": 20.30494821634062, "grad_norm": 1.8882001638412476, "learning_rate": 0.0015939010356731875, "loss": 0.7762, "step": 70580 }, { "epoch": 20.3078250863061, "grad_norm": 0.9468279480934143, "learning_rate": 0.001593843498273878, "loss": 0.8465, "step": 70590 }, { "epoch": 20.310701956271576, "grad_norm": 2.632511854171753, "learning_rate": 0.0015937859608745684, "loss": 0.5957, "step": 70600 }, { "epoch": 20.313578826237055, "grad_norm": 1.3285537958145142, "learning_rate": 0.001593728423475259, "loss": 0.5928, "step": 70610 }, { "epoch": 20.31645569620253, "grad_norm": 2.5162808895111084, "learning_rate": 0.0015936708860759495, "loss": 0.6794, "step": 70620 }, { "epoch": 20.319332566168008, "grad_norm": 0.7644365429878235, "learning_rate": 0.0015936133486766397, "loss": 0.6886, "step": 70630 }, { "epoch": 20.322209436133488, "grad_norm": 1.0716032981872559, "learning_rate": 0.0015935558112773302, "loss": 0.6887, "step": 70640 }, { "epoch": 20.325086306098964, "grad_norm": 1.247299313545227, "learning_rate": 0.0015934982738780208, "loss": 0.6957, "step": 70650 }, { "epoch": 20.327963176064443, "grad_norm": 0.7525609731674194, "learning_rate": 0.0015934407364787111, "loss": 0.8069, "step": 70660 }, { "epoch": 20.33084004602992, "grad_norm": 1.026726484298706, "learning_rate": 0.0015933831990794017, "loss": 0.7164, "step": 70670 }, { "epoch": 20.333716915995396, "grad_norm": 1.4556313753128052, "learning_rate": 0.0015933256616800923, "loss": 0.6744, "step": 70680 }, { "epoch": 20.336593785960876, "grad_norm": 1.1716787815093994, "learning_rate": 0.0015932681242807824, "loss": 0.832, "step": 70690 }, { "epoch": 20.339470655926352, "grad_norm": 0.7640916109085083, "learning_rate": 0.001593210586881473, "loss": 0.8351, "step": 70700 }, { "epoch": 20.342347525891828, "grad_norm": 1.2465661764144897, "learning_rate": 0.0015931530494821633, "loss": 0.7677, "step": 70710 }, { "epoch": 20.345224395857308, "grad_norm": 0.756413996219635, "learning_rate": 0.0015930955120828539, "loss": 0.7854, "step": 70720 }, { "epoch": 20.348101265822784, "grad_norm": 1.0513920783996582, "learning_rate": 0.0015930379746835444, "loss": 0.7621, "step": 70730 }, { "epoch": 20.350978135788264, "grad_norm": 0.9459606409072876, "learning_rate": 0.0015929804372842348, "loss": 0.6571, "step": 70740 }, { "epoch": 20.35385500575374, "grad_norm": 0.6663411855697632, "learning_rate": 0.0015929228998849251, "loss": 0.8056, "step": 70750 }, { "epoch": 20.356731875719216, "grad_norm": 1.2247604131698608, "learning_rate": 0.0015928653624856157, "loss": 0.8318, "step": 70760 }, { "epoch": 20.359608745684696, "grad_norm": 1.5407863855361938, "learning_rate": 0.001592807825086306, "loss": 0.7964, "step": 70770 }, { "epoch": 20.362485615650172, "grad_norm": 0.9983125329017639, "learning_rate": 0.0015927502876869966, "loss": 0.9519, "step": 70780 }, { "epoch": 20.365362485615652, "grad_norm": 1.5984541177749634, "learning_rate": 0.0015926927502876872, "loss": 0.5896, "step": 70790 }, { "epoch": 20.368239355581128, "grad_norm": 2.157597303390503, "learning_rate": 0.0015926352128883775, "loss": 0.6746, "step": 70800 }, { "epoch": 20.371116225546604, "grad_norm": 1.97713303565979, "learning_rate": 0.0015925776754890679, "loss": 0.7945, "step": 70810 }, { "epoch": 20.373993095512084, "grad_norm": 2.723511219024658, "learning_rate": 0.0015925201380897584, "loss": 0.6731, "step": 70820 }, { "epoch": 20.37686996547756, "grad_norm": 1.2637165784835815, "learning_rate": 0.0015924626006904488, "loss": 0.7721, "step": 70830 }, { "epoch": 20.379746835443036, "grad_norm": 1.6020029783248901, "learning_rate": 0.0015924050632911393, "loss": 0.8713, "step": 70840 }, { "epoch": 20.382623705408516, "grad_norm": 1.2814379930496216, "learning_rate": 0.0015923475258918297, "loss": 0.8153, "step": 70850 }, { "epoch": 20.385500575373992, "grad_norm": 1.0224082469940186, "learning_rate": 0.0015922899884925202, "loss": 0.6234, "step": 70860 }, { "epoch": 20.388377445339472, "grad_norm": 1.470167875289917, "learning_rate": 0.0015922324510932106, "loss": 0.6319, "step": 70870 }, { "epoch": 20.39125431530495, "grad_norm": 1.667980670928955, "learning_rate": 0.001592174913693901, "loss": 0.7307, "step": 70880 }, { "epoch": 20.394131185270425, "grad_norm": 0.9047902822494507, "learning_rate": 0.0015921173762945915, "loss": 0.7052, "step": 70890 }, { "epoch": 20.397008055235904, "grad_norm": 1.5163037776947021, "learning_rate": 0.001592059838895282, "loss": 0.7832, "step": 70900 }, { "epoch": 20.39988492520138, "grad_norm": 1.1548008918762207, "learning_rate": 0.0015920023014959724, "loss": 0.5866, "step": 70910 }, { "epoch": 20.402761795166857, "grad_norm": 1.4502708911895752, "learning_rate": 0.001591944764096663, "loss": 0.5929, "step": 70920 }, { "epoch": 20.405638665132336, "grad_norm": 0.9277428984642029, "learning_rate": 0.0015918872266973533, "loss": 0.8146, "step": 70930 }, { "epoch": 20.408515535097813, "grad_norm": 1.7302354574203491, "learning_rate": 0.0015918296892980437, "loss": 0.8073, "step": 70940 }, { "epoch": 20.411392405063292, "grad_norm": 1.147006869316101, "learning_rate": 0.0015917721518987342, "loss": 0.7059, "step": 70950 }, { "epoch": 20.41426927502877, "grad_norm": 1.917470097541809, "learning_rate": 0.0015917146144994246, "loss": 0.7986, "step": 70960 }, { "epoch": 20.417146144994245, "grad_norm": 2.2482199668884277, "learning_rate": 0.0015916570771001151, "loss": 0.8291, "step": 70970 }, { "epoch": 20.420023014959725, "grad_norm": 0.9571822881698608, "learning_rate": 0.0015915995397008057, "loss": 0.5923, "step": 70980 }, { "epoch": 20.4228998849252, "grad_norm": 0.8923220038414001, "learning_rate": 0.0015915420023014958, "loss": 0.5145, "step": 70990 }, { "epoch": 20.42577675489068, "grad_norm": 1.5176783800125122, "learning_rate": 0.0015914844649021864, "loss": 0.8184, "step": 71000 }, { "epoch": 20.428653624856157, "grad_norm": 0.8929154276847839, "learning_rate": 0.001591426927502877, "loss": 0.7389, "step": 71010 }, { "epoch": 20.431530494821633, "grad_norm": 1.6456115245819092, "learning_rate": 0.0015913693901035673, "loss": 0.712, "step": 71020 }, { "epoch": 20.434407364787113, "grad_norm": 0.7897042632102966, "learning_rate": 0.0015913118527042579, "loss": 0.7811, "step": 71030 }, { "epoch": 20.43728423475259, "grad_norm": 1.0907381772994995, "learning_rate": 0.0015912543153049484, "loss": 0.7106, "step": 71040 }, { "epoch": 20.440161104718065, "grad_norm": 0.9215540289878845, "learning_rate": 0.0015911967779056386, "loss": 0.6668, "step": 71050 }, { "epoch": 20.443037974683545, "grad_norm": 1.4210401773452759, "learning_rate": 0.0015911392405063291, "loss": 0.6168, "step": 71060 }, { "epoch": 20.44591484464902, "grad_norm": 1.4187374114990234, "learning_rate": 0.0015910817031070195, "loss": 0.6484, "step": 71070 }, { "epoch": 20.4487917146145, "grad_norm": 1.3310970067977905, "learning_rate": 0.00159102416570771, "loss": 0.6207, "step": 71080 }, { "epoch": 20.451668584579977, "grad_norm": 0.8557272553443909, "learning_rate": 0.0015909666283084006, "loss": 0.6932, "step": 71090 }, { "epoch": 20.454545454545453, "grad_norm": 1.616424322128296, "learning_rate": 0.001590909090909091, "loss": 0.9047, "step": 71100 }, { "epoch": 20.457422324510933, "grad_norm": 1.5169956684112549, "learning_rate": 0.0015908515535097813, "loss": 0.7097, "step": 71110 }, { "epoch": 20.46029919447641, "grad_norm": 1.7087163925170898, "learning_rate": 0.0015907940161104719, "loss": 0.8508, "step": 71120 }, { "epoch": 20.46317606444189, "grad_norm": 1.7954771518707275, "learning_rate": 0.0015907364787111622, "loss": 0.8749, "step": 71130 }, { "epoch": 20.466052934407365, "grad_norm": 1.8173002004623413, "learning_rate": 0.0015906789413118528, "loss": 0.9927, "step": 71140 }, { "epoch": 20.46892980437284, "grad_norm": 0.7583553791046143, "learning_rate": 0.0015906214039125433, "loss": 0.7758, "step": 71150 }, { "epoch": 20.47180667433832, "grad_norm": 1.330991268157959, "learning_rate": 0.0015905638665132337, "loss": 0.7651, "step": 71160 }, { "epoch": 20.474683544303797, "grad_norm": 0.9972723722457886, "learning_rate": 0.001590506329113924, "loss": 0.7043, "step": 71170 }, { "epoch": 20.477560414269274, "grad_norm": 1.8873251676559448, "learning_rate": 0.0015904487917146144, "loss": 0.7527, "step": 71180 }, { "epoch": 20.480437284234753, "grad_norm": 1.3593465089797974, "learning_rate": 0.001590391254315305, "loss": 0.6814, "step": 71190 }, { "epoch": 20.48331415420023, "grad_norm": 1.708030343055725, "learning_rate": 0.0015903337169159955, "loss": 0.7193, "step": 71200 }, { "epoch": 20.48619102416571, "grad_norm": 1.7479768991470337, "learning_rate": 0.0015902761795166859, "loss": 0.6963, "step": 71210 }, { "epoch": 20.489067894131185, "grad_norm": 1.602734923362732, "learning_rate": 0.0015902186421173764, "loss": 0.7536, "step": 71220 }, { "epoch": 20.49194476409666, "grad_norm": 1.468001365661621, "learning_rate": 0.0015901611047180668, "loss": 0.7704, "step": 71230 }, { "epoch": 20.49482163406214, "grad_norm": 1.4907728433609009, "learning_rate": 0.0015901035673187571, "loss": 0.9149, "step": 71240 }, { "epoch": 20.497698504027618, "grad_norm": 1.8709121942520142, "learning_rate": 0.0015900460299194477, "loss": 0.7624, "step": 71250 }, { "epoch": 20.500575373993094, "grad_norm": 1.4108383655548096, "learning_rate": 0.0015899884925201382, "loss": 0.6684, "step": 71260 }, { "epoch": 20.503452243958574, "grad_norm": 1.7257611751556396, "learning_rate": 0.0015899309551208286, "loss": 0.8436, "step": 71270 }, { "epoch": 20.50632911392405, "grad_norm": 1.305317759513855, "learning_rate": 0.0015898734177215192, "loss": 0.5796, "step": 71280 }, { "epoch": 20.50920598388953, "grad_norm": 0.9934525489807129, "learning_rate": 0.0015898158803222093, "loss": 0.6862, "step": 71290 }, { "epoch": 20.512082853855006, "grad_norm": 1.0852283239364624, "learning_rate": 0.0015897583429228999, "loss": 0.7628, "step": 71300 }, { "epoch": 20.514959723820482, "grad_norm": 2.6936209201812744, "learning_rate": 0.0015897008055235904, "loss": 0.7653, "step": 71310 }, { "epoch": 20.51783659378596, "grad_norm": 0.9795163869857788, "learning_rate": 0.0015896432681242808, "loss": 0.7859, "step": 71320 }, { "epoch": 20.520713463751438, "grad_norm": 1.1234441995620728, "learning_rate": 0.0015895857307249713, "loss": 0.7256, "step": 71330 }, { "epoch": 20.523590333716918, "grad_norm": 1.1948808431625366, "learning_rate": 0.001589528193325662, "loss": 0.5864, "step": 71340 }, { "epoch": 20.526467203682394, "grad_norm": 0.741536557674408, "learning_rate": 0.001589470655926352, "loss": 0.6227, "step": 71350 }, { "epoch": 20.52934407364787, "grad_norm": 1.6312921047210693, "learning_rate": 0.0015894131185270426, "loss": 0.8752, "step": 71360 }, { "epoch": 20.53222094361335, "grad_norm": 1.8152821063995361, "learning_rate": 0.0015893555811277332, "loss": 0.6001, "step": 71370 }, { "epoch": 20.535097813578826, "grad_norm": 0.750837504863739, "learning_rate": 0.0015892980437284235, "loss": 0.6311, "step": 71380 }, { "epoch": 20.537974683544302, "grad_norm": 1.3248227834701538, "learning_rate": 0.001589240506329114, "loss": 0.7045, "step": 71390 }, { "epoch": 20.540851553509782, "grad_norm": 1.4773633480072021, "learning_rate": 0.0015891829689298042, "loss": 0.5575, "step": 71400 }, { "epoch": 20.543728423475258, "grad_norm": 1.4078518152236938, "learning_rate": 0.0015891254315304948, "loss": 0.7592, "step": 71410 }, { "epoch": 20.546605293440738, "grad_norm": 1.0243271589279175, "learning_rate": 0.0015890678941311853, "loss": 0.9185, "step": 71420 }, { "epoch": 20.549482163406214, "grad_norm": 1.490212321281433, "learning_rate": 0.0015890103567318757, "loss": 0.7994, "step": 71430 }, { "epoch": 20.55235903337169, "grad_norm": 1.276631474494934, "learning_rate": 0.0015889528193325662, "loss": 0.7817, "step": 71440 }, { "epoch": 20.55523590333717, "grad_norm": 1.649050235748291, "learning_rate": 0.0015888952819332568, "loss": 0.7896, "step": 71450 }, { "epoch": 20.558112773302646, "grad_norm": 1.0928733348846436, "learning_rate": 0.001588837744533947, "loss": 0.8099, "step": 71460 }, { "epoch": 20.560989643268123, "grad_norm": 1.3949496746063232, "learning_rate": 0.0015887802071346375, "loss": 0.7346, "step": 71470 }, { "epoch": 20.563866513233602, "grad_norm": 1.2353097200393677, "learning_rate": 0.001588722669735328, "loss": 0.628, "step": 71480 }, { "epoch": 20.56674338319908, "grad_norm": 1.888598918914795, "learning_rate": 0.0015886651323360184, "loss": 0.6579, "step": 71490 }, { "epoch": 20.569620253164558, "grad_norm": 0.8746137619018555, "learning_rate": 0.001588607594936709, "loss": 0.7031, "step": 71500 }, { "epoch": 20.572497123130034, "grad_norm": 1.2378495931625366, "learning_rate": 0.0015885500575373995, "loss": 0.6107, "step": 71510 }, { "epoch": 20.57537399309551, "grad_norm": 0.9307965040206909, "learning_rate": 0.0015884925201380897, "loss": 0.5726, "step": 71520 }, { "epoch": 20.57825086306099, "grad_norm": 1.2954967021942139, "learning_rate": 0.0015884349827387802, "loss": 0.6909, "step": 71530 }, { "epoch": 20.581127733026467, "grad_norm": 1.362688660621643, "learning_rate": 0.0015883774453394706, "loss": 0.6689, "step": 71540 }, { "epoch": 20.584004602991946, "grad_norm": 1.7493600845336914, "learning_rate": 0.0015883199079401611, "loss": 0.6715, "step": 71550 }, { "epoch": 20.586881472957423, "grad_norm": 1.7347569465637207, "learning_rate": 0.0015882623705408517, "loss": 0.6967, "step": 71560 }, { "epoch": 20.5897583429229, "grad_norm": 1.886245846748352, "learning_rate": 0.001588204833141542, "loss": 0.8364, "step": 71570 }, { "epoch": 20.59263521288838, "grad_norm": 1.2373985052108765, "learning_rate": 0.0015881472957422324, "loss": 0.8585, "step": 71580 }, { "epoch": 20.595512082853855, "grad_norm": 1.7403135299682617, "learning_rate": 0.001588089758342923, "loss": 0.8599, "step": 71590 }, { "epoch": 20.59838895281933, "grad_norm": 1.2980256080627441, "learning_rate": 0.0015880322209436133, "loss": 0.6652, "step": 71600 }, { "epoch": 20.60126582278481, "grad_norm": 1.050050973892212, "learning_rate": 0.0015879746835443039, "loss": 0.6196, "step": 71610 }, { "epoch": 20.604142692750287, "grad_norm": 1.8136893510818481, "learning_rate": 0.0015879171461449944, "loss": 0.6631, "step": 71620 }, { "epoch": 20.607019562715767, "grad_norm": 1.5816748142242432, "learning_rate": 0.0015878596087456848, "loss": 0.795, "step": 71630 }, { "epoch": 20.609896432681243, "grad_norm": 1.329500675201416, "learning_rate": 0.0015878020713463751, "loss": 0.7596, "step": 71640 }, { "epoch": 20.61277330264672, "grad_norm": 1.0616048574447632, "learning_rate": 0.0015877445339470655, "loss": 0.6158, "step": 71650 }, { "epoch": 20.6156501726122, "grad_norm": 1.9760233163833618, "learning_rate": 0.001587686996547756, "loss": 0.6371, "step": 71660 }, { "epoch": 20.618527042577675, "grad_norm": 2.378061532974243, "learning_rate": 0.0015876294591484466, "loss": 0.7586, "step": 71670 }, { "epoch": 20.621403912543155, "grad_norm": 1.8466038703918457, "learning_rate": 0.001587571921749137, "loss": 0.6167, "step": 71680 }, { "epoch": 20.62428078250863, "grad_norm": 2.0947415828704834, "learning_rate": 0.0015875143843498275, "loss": 0.803, "step": 71690 }, { "epoch": 20.627157652474107, "grad_norm": 1.6084612607955933, "learning_rate": 0.0015874568469505179, "loss": 0.8143, "step": 71700 }, { "epoch": 20.630034522439587, "grad_norm": 0.8777037262916565, "learning_rate": 0.0015873993095512082, "loss": 0.7116, "step": 71710 }, { "epoch": 20.632911392405063, "grad_norm": 0.9860398769378662, "learning_rate": 0.0015873417721518988, "loss": 0.5939, "step": 71720 }, { "epoch": 20.63578826237054, "grad_norm": 1.8682254552841187, "learning_rate": 0.0015872842347525893, "loss": 0.7183, "step": 71730 }, { "epoch": 20.63866513233602, "grad_norm": 1.3663045167922974, "learning_rate": 0.0015872266973532797, "loss": 0.7061, "step": 71740 }, { "epoch": 20.641542002301495, "grad_norm": 1.4001176357269287, "learning_rate": 0.0015871691599539702, "loss": 0.6807, "step": 71750 }, { "epoch": 20.644418872266975, "grad_norm": 1.4868485927581787, "learning_rate": 0.0015871116225546604, "loss": 0.6775, "step": 71760 }, { "epoch": 20.64729574223245, "grad_norm": 1.183831810951233, "learning_rate": 0.001587054085155351, "loss": 0.7402, "step": 71770 }, { "epoch": 20.650172612197927, "grad_norm": 1.042061686515808, "learning_rate": 0.0015869965477560415, "loss": 0.8779, "step": 71780 }, { "epoch": 20.653049482163407, "grad_norm": 0.6858664751052856, "learning_rate": 0.0015869390103567318, "loss": 0.8917, "step": 71790 }, { "epoch": 20.655926352128883, "grad_norm": 1.0693727731704712, "learning_rate": 0.0015868814729574224, "loss": 0.771, "step": 71800 }, { "epoch": 20.658803222094363, "grad_norm": 1.2930669784545898, "learning_rate": 0.001586823935558113, "loss": 0.7575, "step": 71810 }, { "epoch": 20.66168009205984, "grad_norm": 1.2944135665893555, "learning_rate": 0.001586766398158803, "loss": 0.6728, "step": 71820 }, { "epoch": 20.664556962025316, "grad_norm": 1.0306379795074463, "learning_rate": 0.0015867088607594937, "loss": 0.6805, "step": 71830 }, { "epoch": 20.667433831990795, "grad_norm": 2.5433475971221924, "learning_rate": 0.0015866513233601842, "loss": 0.873, "step": 71840 }, { "epoch": 20.67031070195627, "grad_norm": 2.33756422996521, "learning_rate": 0.0015865937859608746, "loss": 0.7779, "step": 71850 }, { "epoch": 20.673187571921748, "grad_norm": 0.7946393489837646, "learning_rate": 0.0015865362485615651, "loss": 0.7189, "step": 71860 }, { "epoch": 20.676064441887227, "grad_norm": 1.2395257949829102, "learning_rate": 0.0015864787111622555, "loss": 0.6248, "step": 71870 }, { "epoch": 20.678941311852704, "grad_norm": 3.0030126571655273, "learning_rate": 0.0015864211737629458, "loss": 0.8076, "step": 71880 }, { "epoch": 20.681818181818183, "grad_norm": 1.133510947227478, "learning_rate": 0.0015863636363636364, "loss": 0.8264, "step": 71890 }, { "epoch": 20.68469505178366, "grad_norm": 0.997063398361206, "learning_rate": 0.0015863060989643268, "loss": 0.6584, "step": 71900 }, { "epoch": 20.687571921749136, "grad_norm": 0.8754318952560425, "learning_rate": 0.0015862485615650173, "loss": 0.6656, "step": 71910 }, { "epoch": 20.690448791714616, "grad_norm": 2.34788179397583, "learning_rate": 0.0015861910241657079, "loss": 0.9197, "step": 71920 }, { "epoch": 20.693325661680092, "grad_norm": 2.2884275913238525, "learning_rate": 0.0015861334867663982, "loss": 0.9797, "step": 71930 }, { "epoch": 20.696202531645568, "grad_norm": 1.2286696434020996, "learning_rate": 0.0015860759493670886, "loss": 0.6564, "step": 71940 }, { "epoch": 20.699079401611048, "grad_norm": 1.2544994354248047, "learning_rate": 0.0015860184119677791, "loss": 0.6717, "step": 71950 }, { "epoch": 20.701956271576524, "grad_norm": 1.769021987915039, "learning_rate": 0.0015859608745684695, "loss": 0.8122, "step": 71960 }, { "epoch": 20.704833141542004, "grad_norm": 1.5346863269805908, "learning_rate": 0.00158590333716916, "loss": 0.7651, "step": 71970 }, { "epoch": 20.70771001150748, "grad_norm": 2.1163337230682373, "learning_rate": 0.0015858457997698504, "loss": 0.8904, "step": 71980 }, { "epoch": 20.710586881472956, "grad_norm": 1.8658778667449951, "learning_rate": 0.001585788262370541, "loss": 0.7637, "step": 71990 }, { "epoch": 20.713463751438436, "grad_norm": 0.9749820828437805, "learning_rate": 0.0015857307249712313, "loss": 0.7216, "step": 72000 }, { "epoch": 20.716340621403912, "grad_norm": 0.8849538564682007, "learning_rate": 0.0015856731875719217, "loss": 0.6959, "step": 72010 }, { "epoch": 20.719217491369392, "grad_norm": 1.252118706703186, "learning_rate": 0.0015856156501726122, "loss": 0.758, "step": 72020 }, { "epoch": 20.722094361334868, "grad_norm": 1.6765151023864746, "learning_rate": 0.0015855581127733028, "loss": 0.7994, "step": 72030 }, { "epoch": 20.724971231300344, "grad_norm": 1.493735909461975, "learning_rate": 0.0015855005753739931, "loss": 0.7462, "step": 72040 }, { "epoch": 20.727848101265824, "grad_norm": 1.206807017326355, "learning_rate": 0.0015854430379746837, "loss": 0.6567, "step": 72050 }, { "epoch": 20.7307249712313, "grad_norm": 1.764032244682312, "learning_rate": 0.001585385500575374, "loss": 0.6717, "step": 72060 }, { "epoch": 20.733601841196776, "grad_norm": 1.0467404127120972, "learning_rate": 0.0015853279631760644, "loss": 0.7805, "step": 72070 }, { "epoch": 20.736478711162256, "grad_norm": 1.9398497343063354, "learning_rate": 0.001585270425776755, "loss": 0.759, "step": 72080 }, { "epoch": 20.739355581127732, "grad_norm": 1.2900723218917847, "learning_rate": 0.0015852128883774455, "loss": 0.7603, "step": 72090 }, { "epoch": 20.742232451093212, "grad_norm": 0.8458404541015625, "learning_rate": 0.0015851553509781359, "loss": 0.8472, "step": 72100 }, { "epoch": 20.74510932105869, "grad_norm": 0.7711735963821411, "learning_rate": 0.0015850978135788264, "loss": 0.6835, "step": 72110 }, { "epoch": 20.747986191024165, "grad_norm": 1.2366514205932617, "learning_rate": 0.0015850402761795166, "loss": 0.6696, "step": 72120 }, { "epoch": 20.750863060989644, "grad_norm": 1.01325523853302, "learning_rate": 0.0015849827387802071, "loss": 0.691, "step": 72130 }, { "epoch": 20.75373993095512, "grad_norm": 0.7782756686210632, "learning_rate": 0.0015849252013808977, "loss": 0.7022, "step": 72140 }, { "epoch": 20.756616800920597, "grad_norm": 1.1959624290466309, "learning_rate": 0.001584867663981588, "loss": 0.8161, "step": 72150 }, { "epoch": 20.759493670886076, "grad_norm": 0.6734020709991455, "learning_rate": 0.0015848101265822786, "loss": 0.574, "step": 72160 }, { "epoch": 20.762370540851553, "grad_norm": 1.079888939857483, "learning_rate": 0.0015847525891829692, "loss": 0.7, "step": 72170 }, { "epoch": 20.765247410817032, "grad_norm": 2.5194926261901855, "learning_rate": 0.0015846950517836593, "loss": 0.8641, "step": 72180 }, { "epoch": 20.76812428078251, "grad_norm": 1.2271149158477783, "learning_rate": 0.0015846375143843499, "loss": 0.7838, "step": 72190 }, { "epoch": 20.771001150747985, "grad_norm": 1.333001971244812, "learning_rate": 0.0015845799769850404, "loss": 1.104, "step": 72200 }, { "epoch": 20.773878020713465, "grad_norm": 0.8862433433532715, "learning_rate": 0.0015845224395857308, "loss": 0.6796, "step": 72210 }, { "epoch": 20.77675489067894, "grad_norm": 1.265779733657837, "learning_rate": 0.0015844649021864213, "loss": 0.6301, "step": 72220 }, { "epoch": 20.77963176064442, "grad_norm": 0.8334751725196838, "learning_rate": 0.0015844073647871115, "loss": 0.7054, "step": 72230 }, { "epoch": 20.782508630609897, "grad_norm": 1.5618793964385986, "learning_rate": 0.001584349827387802, "loss": 0.6946, "step": 72240 }, { "epoch": 20.785385500575373, "grad_norm": 1.233224868774414, "learning_rate": 0.0015842922899884926, "loss": 0.721, "step": 72250 }, { "epoch": 20.788262370540853, "grad_norm": 0.8531169295310974, "learning_rate": 0.001584234752589183, "loss": 0.5846, "step": 72260 }, { "epoch": 20.79113924050633, "grad_norm": 1.2631666660308838, "learning_rate": 0.0015841772151898735, "loss": 0.6669, "step": 72270 }, { "epoch": 20.794016110471805, "grad_norm": 1.304180383682251, "learning_rate": 0.001584119677790564, "loss": 0.7853, "step": 72280 }, { "epoch": 20.796892980437285, "grad_norm": 0.9525057077407837, "learning_rate": 0.0015840621403912542, "loss": 0.6798, "step": 72290 }, { "epoch": 20.79976985040276, "grad_norm": 0.8060445189476013, "learning_rate": 0.0015840046029919448, "loss": 0.9126, "step": 72300 }, { "epoch": 20.80264672036824, "grad_norm": 1.5109946727752686, "learning_rate": 0.0015839470655926353, "loss": 0.766, "step": 72310 }, { "epoch": 20.805523590333717, "grad_norm": 0.9305226802825928, "learning_rate": 0.0015838895281933257, "loss": 0.772, "step": 72320 }, { "epoch": 20.808400460299193, "grad_norm": 0.8583582043647766, "learning_rate": 0.0015838319907940162, "loss": 0.7849, "step": 72330 }, { "epoch": 20.811277330264673, "grad_norm": 1.3924256563186646, "learning_rate": 0.0015837744533947066, "loss": 0.9402, "step": 72340 }, { "epoch": 20.81415420023015, "grad_norm": 1.304850459098816, "learning_rate": 0.001583716915995397, "loss": 0.6947, "step": 72350 }, { "epoch": 20.817031070195625, "grad_norm": 0.5921927690505981, "learning_rate": 0.0015836593785960875, "loss": 0.7286, "step": 72360 }, { "epoch": 20.819907940161105, "grad_norm": 1.0579020977020264, "learning_rate": 0.0015836018411967778, "loss": 0.6355, "step": 72370 }, { "epoch": 20.82278481012658, "grad_norm": 1.171701192855835, "learning_rate": 0.0015835443037974684, "loss": 0.6794, "step": 72380 }, { "epoch": 20.82566168009206, "grad_norm": 1.2957191467285156, "learning_rate": 0.001583486766398159, "loss": 0.8576, "step": 72390 }, { "epoch": 20.828538550057537, "grad_norm": 1.1251270771026611, "learning_rate": 0.0015834292289988493, "loss": 0.5029, "step": 72400 }, { "epoch": 20.831415420023013, "grad_norm": 2.5344536304473877, "learning_rate": 0.0015833716915995397, "loss": 1.0602, "step": 72410 }, { "epoch": 20.834292289988493, "grad_norm": 0.6820211410522461, "learning_rate": 0.0015833141542002302, "loss": 0.7247, "step": 72420 }, { "epoch": 20.83716915995397, "grad_norm": 0.9620550274848938, "learning_rate": 0.0015832566168009206, "loss": 0.6843, "step": 72430 }, { "epoch": 20.84004602991945, "grad_norm": 1.713963270187378, "learning_rate": 0.0015831990794016111, "loss": 0.8256, "step": 72440 }, { "epoch": 20.842922899884925, "grad_norm": 3.765813112258911, "learning_rate": 0.0015831415420023015, "loss": 0.6696, "step": 72450 }, { "epoch": 20.8457997698504, "grad_norm": 1.2537449598312378, "learning_rate": 0.001583084004602992, "loss": 0.6612, "step": 72460 }, { "epoch": 20.84867663981588, "grad_norm": 1.1964185237884521, "learning_rate": 0.0015830264672036824, "loss": 0.6535, "step": 72470 }, { "epoch": 20.851553509781358, "grad_norm": 0.703299343585968, "learning_rate": 0.0015829689298043727, "loss": 0.5754, "step": 72480 }, { "epoch": 20.854430379746834, "grad_norm": 1.4864765405654907, "learning_rate": 0.0015829113924050633, "loss": 0.948, "step": 72490 }, { "epoch": 20.857307249712314, "grad_norm": 0.9743189811706543, "learning_rate": 0.0015828538550057539, "loss": 0.6826, "step": 72500 }, { "epoch": 20.86018411967779, "grad_norm": 2.8153562545776367, "learning_rate": 0.0015827963176064442, "loss": 0.8772, "step": 72510 }, { "epoch": 20.86306098964327, "grad_norm": 0.9420079588890076, "learning_rate": 0.0015827387802071348, "loss": 0.8821, "step": 72520 }, { "epoch": 20.865937859608746, "grad_norm": 1.245426058769226, "learning_rate": 0.0015826812428078251, "loss": 0.7063, "step": 72530 }, { "epoch": 20.868814729574222, "grad_norm": 1.5415472984313965, "learning_rate": 0.0015826237054085155, "loss": 0.6218, "step": 72540 }, { "epoch": 20.8716915995397, "grad_norm": 1.9177511930465698, "learning_rate": 0.001582566168009206, "loss": 0.5617, "step": 72550 }, { "epoch": 20.874568469505178, "grad_norm": 0.7761956453323364, "learning_rate": 0.0015825086306098964, "loss": 0.8046, "step": 72560 }, { "epoch": 20.877445339470658, "grad_norm": 1.9209064245224, "learning_rate": 0.001582451093210587, "loss": 0.5382, "step": 72570 }, { "epoch": 20.880322209436134, "grad_norm": 1.0690815448760986, "learning_rate": 0.0015823935558112775, "loss": 0.746, "step": 72580 }, { "epoch": 20.88319907940161, "grad_norm": 1.00520920753479, "learning_rate": 0.0015823360184119676, "loss": 0.7608, "step": 72590 }, { "epoch": 20.88607594936709, "grad_norm": 1.0238817930221558, "learning_rate": 0.0015822784810126582, "loss": 0.7712, "step": 72600 }, { "epoch": 20.888952819332566, "grad_norm": 2.0517284870147705, "learning_rate": 0.0015822209436133488, "loss": 0.7292, "step": 72610 }, { "epoch": 20.891829689298042, "grad_norm": 1.6414984464645386, "learning_rate": 0.0015821634062140391, "loss": 0.8524, "step": 72620 }, { "epoch": 20.894706559263522, "grad_norm": 1.012584924697876, "learning_rate": 0.0015821058688147297, "loss": 0.795, "step": 72630 }, { "epoch": 20.897583429228998, "grad_norm": 1.3291528224945068, "learning_rate": 0.0015820483314154202, "loss": 0.735, "step": 72640 }, { "epoch": 20.900460299194478, "grad_norm": 3.1853771209716797, "learning_rate": 0.0015819907940161104, "loss": 0.7993, "step": 72650 }, { "epoch": 20.903337169159954, "grad_norm": 1.0912425518035889, "learning_rate": 0.001581933256616801, "loss": 0.8189, "step": 72660 }, { "epoch": 20.90621403912543, "grad_norm": 1.8339277505874634, "learning_rate": 0.0015818757192174913, "loss": 0.6638, "step": 72670 }, { "epoch": 20.90909090909091, "grad_norm": 0.9136529564857483, "learning_rate": 0.0015818181818181818, "loss": 0.7243, "step": 72680 }, { "epoch": 20.911967779056386, "grad_norm": 0.8053311705589294, "learning_rate": 0.0015817606444188724, "loss": 0.617, "step": 72690 }, { "epoch": 20.914844649021866, "grad_norm": 1.1053279638290405, "learning_rate": 0.0015817031070195628, "loss": 0.6915, "step": 72700 }, { "epoch": 20.917721518987342, "grad_norm": 1.8958237171173096, "learning_rate": 0.001581645569620253, "loss": 0.9453, "step": 72710 }, { "epoch": 20.92059838895282, "grad_norm": 1.0602831840515137, "learning_rate": 0.0015815880322209437, "loss": 0.6138, "step": 72720 }, { "epoch": 20.923475258918298, "grad_norm": 3.291476011276245, "learning_rate": 0.001581530494821634, "loss": 0.8363, "step": 72730 }, { "epoch": 20.926352128883774, "grad_norm": 2.186617851257324, "learning_rate": 0.0015814729574223246, "loss": 0.8675, "step": 72740 }, { "epoch": 20.92922899884925, "grad_norm": 1.5547387599945068, "learning_rate": 0.0015814154200230151, "loss": 0.822, "step": 72750 }, { "epoch": 20.93210586881473, "grad_norm": 1.0388867855072021, "learning_rate": 0.0015813578826237055, "loss": 0.7962, "step": 72760 }, { "epoch": 20.934982738780207, "grad_norm": 1.168071985244751, "learning_rate": 0.0015813003452243958, "loss": 0.6639, "step": 72770 }, { "epoch": 20.937859608745686, "grad_norm": 0.8100073933601379, "learning_rate": 0.0015812428078250864, "loss": 0.5862, "step": 72780 }, { "epoch": 20.940736478711163, "grad_norm": 1.1791545152664185, "learning_rate": 0.0015811852704257767, "loss": 0.551, "step": 72790 }, { "epoch": 20.94361334867664, "grad_norm": 1.635018229484558, "learning_rate": 0.0015811277330264673, "loss": 0.7562, "step": 72800 }, { "epoch": 20.94649021864212, "grad_norm": 2.364088773727417, "learning_rate": 0.0015810701956271577, "loss": 0.8104, "step": 72810 }, { "epoch": 20.949367088607595, "grad_norm": 1.287036418914795, "learning_rate": 0.0015810126582278482, "loss": 0.7758, "step": 72820 }, { "epoch": 20.95224395857307, "grad_norm": 1.546405553817749, "learning_rate": 0.0015809551208285386, "loss": 0.844, "step": 72830 }, { "epoch": 20.95512082853855, "grad_norm": 1.0591877698898315, "learning_rate": 0.001580897583429229, "loss": 0.6525, "step": 72840 }, { "epoch": 20.957997698504027, "grad_norm": 1.7271194458007812, "learning_rate": 0.0015808400460299195, "loss": 0.8726, "step": 72850 }, { "epoch": 20.960874568469507, "grad_norm": 1.4188830852508545, "learning_rate": 0.00158078250863061, "loss": 0.8481, "step": 72860 }, { "epoch": 20.963751438434983, "grad_norm": 1.474363088607788, "learning_rate": 0.0015807249712313004, "loss": 0.7333, "step": 72870 }, { "epoch": 20.96662830840046, "grad_norm": 1.0043919086456299, "learning_rate": 0.001580667433831991, "loss": 0.7568, "step": 72880 }, { "epoch": 20.96950517836594, "grad_norm": 0.817646324634552, "learning_rate": 0.0015806098964326813, "loss": 0.6948, "step": 72890 }, { "epoch": 20.972382048331415, "grad_norm": 1.3101301193237305, "learning_rate": 0.0015805523590333717, "loss": 0.7319, "step": 72900 }, { "epoch": 20.975258918296895, "grad_norm": 1.0945676565170288, "learning_rate": 0.0015804948216340622, "loss": 0.7236, "step": 72910 }, { "epoch": 20.97813578826237, "grad_norm": 1.4611552953720093, "learning_rate": 0.0015804372842347526, "loss": 0.6774, "step": 72920 }, { "epoch": 20.981012658227847, "grad_norm": 0.8344886898994446, "learning_rate": 0.0015803797468354431, "loss": 0.6823, "step": 72930 }, { "epoch": 20.983889528193327, "grad_norm": 1.7613221406936646, "learning_rate": 0.0015803222094361337, "loss": 0.7987, "step": 72940 }, { "epoch": 20.986766398158803, "grad_norm": 2.0703556537628174, "learning_rate": 0.0015802646720368238, "loss": 0.6116, "step": 72950 }, { "epoch": 20.98964326812428, "grad_norm": 1.4441543817520142, "learning_rate": 0.0015802071346375144, "loss": 0.8598, "step": 72960 }, { "epoch": 20.99252013808976, "grad_norm": 2.920283555984497, "learning_rate": 0.001580149597238205, "loss": 0.8172, "step": 72970 }, { "epoch": 20.995397008055235, "grad_norm": 0.9936712980270386, "learning_rate": 0.0015800920598388953, "loss": 0.8216, "step": 72980 }, { "epoch": 20.998273878020715, "grad_norm": 0.786208987236023, "learning_rate": 0.0015800345224395859, "loss": 0.7662, "step": 72990 }, { "epoch": 21.00115074798619, "grad_norm": 2.291486978530884, "learning_rate": 0.0015799769850402764, "loss": 0.8468, "step": 73000 }, { "epoch": 21.004027617951667, "grad_norm": 2.259767770767212, "learning_rate": 0.0015799194476409666, "loss": 0.7969, "step": 73010 }, { "epoch": 21.006904487917147, "grad_norm": 1.4035664796829224, "learning_rate": 0.0015798619102416571, "loss": 0.8059, "step": 73020 }, { "epoch": 21.009781357882623, "grad_norm": 2.499981641769409, "learning_rate": 0.0015798043728423475, "loss": 0.6616, "step": 73030 }, { "epoch": 21.0126582278481, "grad_norm": 0.874424159526825, "learning_rate": 0.001579746835443038, "loss": 0.6663, "step": 73040 }, { "epoch": 21.01553509781358, "grad_norm": 2.06068754196167, "learning_rate": 0.0015796892980437286, "loss": 0.7758, "step": 73050 }, { "epoch": 21.018411967779056, "grad_norm": 2.024764060974121, "learning_rate": 0.0015796317606444187, "loss": 0.6455, "step": 73060 }, { "epoch": 21.021288837744535, "grad_norm": 1.7902485132217407, "learning_rate": 0.0015795742232451093, "loss": 0.7258, "step": 73070 }, { "epoch": 21.02416570771001, "grad_norm": 1.237860083580017, "learning_rate": 0.0015795166858457999, "loss": 0.6586, "step": 73080 }, { "epoch": 21.027042577675488, "grad_norm": 0.7323033809661865, "learning_rate": 0.0015794591484464902, "loss": 0.5162, "step": 73090 }, { "epoch": 21.029919447640967, "grad_norm": 0.7221056222915649, "learning_rate": 0.0015794016110471808, "loss": 0.6754, "step": 73100 }, { "epoch": 21.032796317606444, "grad_norm": 1.5662142038345337, "learning_rate": 0.0015793440736478713, "loss": 0.618, "step": 73110 }, { "epoch": 21.035673187571923, "grad_norm": 0.9118071794509888, "learning_rate": 0.0015792865362485615, "loss": 0.6484, "step": 73120 }, { "epoch": 21.0385500575374, "grad_norm": 1.3219290971755981, "learning_rate": 0.001579228998849252, "loss": 0.6649, "step": 73130 }, { "epoch": 21.041426927502876, "grad_norm": 1.6019622087478638, "learning_rate": 0.0015791714614499424, "loss": 0.6837, "step": 73140 }, { "epoch": 21.044303797468356, "grad_norm": 1.5193374156951904, "learning_rate": 0.001579113924050633, "loss": 0.7222, "step": 73150 }, { "epoch": 21.04718066743383, "grad_norm": 1.0001533031463623, "learning_rate": 0.0015790563866513235, "loss": 0.559, "step": 73160 }, { "epoch": 21.050057537399308, "grad_norm": 2.0727152824401855, "learning_rate": 0.0015789988492520138, "loss": 0.9053, "step": 73170 }, { "epoch": 21.052934407364788, "grad_norm": 1.9029998779296875, "learning_rate": 0.0015789413118527042, "loss": 0.7475, "step": 73180 }, { "epoch": 21.055811277330264, "grad_norm": 1.6374701261520386, "learning_rate": 0.0015788837744533948, "loss": 0.5111, "step": 73190 }, { "epoch": 21.058688147295744, "grad_norm": 2.028409004211426, "learning_rate": 0.001578826237054085, "loss": 0.7363, "step": 73200 }, { "epoch": 21.06156501726122, "grad_norm": 0.9783104658126831, "learning_rate": 0.0015787686996547757, "loss": 0.7914, "step": 73210 }, { "epoch": 21.064441887226696, "grad_norm": 1.0440435409545898, "learning_rate": 0.0015787111622554662, "loss": 0.6284, "step": 73220 }, { "epoch": 21.067318757192176, "grad_norm": 1.4972755908966064, "learning_rate": 0.0015786536248561566, "loss": 0.6726, "step": 73230 }, { "epoch": 21.070195627157652, "grad_norm": 2.046114683151245, "learning_rate": 0.001578596087456847, "loss": 0.8578, "step": 73240 }, { "epoch": 21.073072497123132, "grad_norm": 1.0053048133850098, "learning_rate": 0.0015785385500575373, "loss": 0.5539, "step": 73250 }, { "epoch": 21.075949367088608, "grad_norm": 1.4147660732269287, "learning_rate": 0.0015784810126582278, "loss": 0.6662, "step": 73260 }, { "epoch": 21.078826237054084, "grad_norm": 1.1792038679122925, "learning_rate": 0.0015784234752589184, "loss": 0.516, "step": 73270 }, { "epoch": 21.081703107019564, "grad_norm": 2.275508403778076, "learning_rate": 0.0015783659378596087, "loss": 0.6926, "step": 73280 }, { "epoch": 21.08457997698504, "grad_norm": 0.9286003112792969, "learning_rate": 0.0015783084004602993, "loss": 0.6787, "step": 73290 }, { "epoch": 21.087456846950516, "grad_norm": 1.7658382654190063, "learning_rate": 0.0015782508630609897, "loss": 0.5879, "step": 73300 }, { "epoch": 21.090333716915996, "grad_norm": 1.2604082822799683, "learning_rate": 0.00157819332566168, "loss": 0.6927, "step": 73310 }, { "epoch": 21.093210586881472, "grad_norm": 1.4978861808776855, "learning_rate": 0.0015781357882623706, "loss": 0.667, "step": 73320 }, { "epoch": 21.096087456846952, "grad_norm": 2.1876046657562256, "learning_rate": 0.0015780782508630611, "loss": 0.5533, "step": 73330 }, { "epoch": 21.09896432681243, "grad_norm": 1.7763912677764893, "learning_rate": 0.0015780207134637515, "loss": 0.7882, "step": 73340 }, { "epoch": 21.101841196777904, "grad_norm": 1.1417040824890137, "learning_rate": 0.001577963176064442, "loss": 0.6414, "step": 73350 }, { "epoch": 21.104718066743384, "grad_norm": 1.5188875198364258, "learning_rate": 0.0015779056386651322, "loss": 0.61, "step": 73360 }, { "epoch": 21.10759493670886, "grad_norm": 1.2744323015213013, "learning_rate": 0.0015778481012658227, "loss": 0.7046, "step": 73370 }, { "epoch": 21.110471806674337, "grad_norm": 1.7802094221115112, "learning_rate": 0.0015777905638665133, "loss": 0.7971, "step": 73380 }, { "epoch": 21.113348676639816, "grad_norm": 1.0274603366851807, "learning_rate": 0.0015777330264672036, "loss": 0.7545, "step": 73390 }, { "epoch": 21.116225546605293, "grad_norm": 2.210408926010132, "learning_rate": 0.0015776754890678942, "loss": 0.7329, "step": 73400 }, { "epoch": 21.119102416570772, "grad_norm": 1.8596603870391846, "learning_rate": 0.0015776179516685848, "loss": 0.595, "step": 73410 }, { "epoch": 21.12197928653625, "grad_norm": 2.809965133666992, "learning_rate": 0.001577560414269275, "loss": 0.8865, "step": 73420 }, { "epoch": 21.124856156501725, "grad_norm": 1.451331615447998, "learning_rate": 0.0015775028768699655, "loss": 0.7298, "step": 73430 }, { "epoch": 21.127733026467205, "grad_norm": 1.5987539291381836, "learning_rate": 0.001577445339470656, "loss": 0.6852, "step": 73440 }, { "epoch": 21.13060989643268, "grad_norm": 0.976849377155304, "learning_rate": 0.0015773878020713464, "loss": 0.6675, "step": 73450 }, { "epoch": 21.13348676639816, "grad_norm": 1.3582696914672852, "learning_rate": 0.001577330264672037, "loss": 0.7738, "step": 73460 }, { "epoch": 21.136363636363637, "grad_norm": 2.4774372577667236, "learning_rate": 0.0015772727272727275, "loss": 0.809, "step": 73470 }, { "epoch": 21.139240506329113, "grad_norm": 1.6324445009231567, "learning_rate": 0.0015772151898734176, "loss": 0.8727, "step": 73480 }, { "epoch": 21.142117376294593, "grad_norm": 1.357921838760376, "learning_rate": 0.0015771576524741082, "loss": 0.772, "step": 73490 }, { "epoch": 21.14499424626007, "grad_norm": 2.027776002883911, "learning_rate": 0.0015771001150747985, "loss": 0.6391, "step": 73500 }, { "epoch": 21.147871116225545, "grad_norm": 1.6554452180862427, "learning_rate": 0.0015770425776754891, "loss": 0.8662, "step": 73510 }, { "epoch": 21.150747986191025, "grad_norm": 1.4277856349945068, "learning_rate": 0.0015769850402761797, "loss": 0.9194, "step": 73520 }, { "epoch": 21.1536248561565, "grad_norm": 1.0642300844192505, "learning_rate": 0.00157692750287687, "loss": 0.8927, "step": 73530 }, { "epoch": 21.15650172612198, "grad_norm": 1.0319857597351074, "learning_rate": 0.0015768699654775604, "loss": 0.4191, "step": 73540 }, { "epoch": 21.159378596087457, "grad_norm": 1.337035894393921, "learning_rate": 0.001576812428078251, "loss": 0.682, "step": 73550 }, { "epoch": 21.162255466052933, "grad_norm": 1.6416476964950562, "learning_rate": 0.0015767548906789413, "loss": 0.7072, "step": 73560 }, { "epoch": 21.165132336018413, "grad_norm": 1.0285413265228271, "learning_rate": 0.0015766973532796318, "loss": 0.5935, "step": 73570 }, { "epoch": 21.16800920598389, "grad_norm": 1.1696385145187378, "learning_rate": 0.0015766398158803224, "loss": 0.7562, "step": 73580 }, { "epoch": 21.170886075949365, "grad_norm": 1.0549073219299316, "learning_rate": 0.0015765822784810128, "loss": 0.8226, "step": 73590 }, { "epoch": 21.173762945914845, "grad_norm": 1.6379940509796143, "learning_rate": 0.001576524741081703, "loss": 0.7357, "step": 73600 }, { "epoch": 21.17663981588032, "grad_norm": 1.2614612579345703, "learning_rate": 0.0015764672036823934, "loss": 0.5221, "step": 73610 }, { "epoch": 21.1795166858458, "grad_norm": 1.5659276247024536, "learning_rate": 0.001576409666283084, "loss": 0.7484, "step": 73620 }, { "epoch": 21.182393555811277, "grad_norm": 1.444663405418396, "learning_rate": 0.0015763521288837746, "loss": 0.7692, "step": 73630 }, { "epoch": 21.185270425776753, "grad_norm": 2.2165653705596924, "learning_rate": 0.001576294591484465, "loss": 0.9782, "step": 73640 }, { "epoch": 21.188147295742233, "grad_norm": 1.5765769481658936, "learning_rate": 0.0015762370540851555, "loss": 0.6439, "step": 73650 }, { "epoch": 21.19102416570771, "grad_norm": 1.6213443279266357, "learning_rate": 0.0015761795166858458, "loss": 0.7922, "step": 73660 }, { "epoch": 21.19390103567319, "grad_norm": 1.173233151435852, "learning_rate": 0.0015761219792865362, "loss": 0.7637, "step": 73670 }, { "epoch": 21.196777905638665, "grad_norm": 2.1697609424591064, "learning_rate": 0.0015760644418872267, "loss": 0.8018, "step": 73680 }, { "epoch": 21.19965477560414, "grad_norm": 1.611158847808838, "learning_rate": 0.0015760069044879173, "loss": 0.5885, "step": 73690 }, { "epoch": 21.20253164556962, "grad_norm": 1.4243528842926025, "learning_rate": 0.0015759493670886077, "loss": 0.8782, "step": 73700 }, { "epoch": 21.205408515535098, "grad_norm": 1.0190166234970093, "learning_rate": 0.0015758918296892982, "loss": 0.6427, "step": 73710 }, { "epoch": 21.208285385500574, "grad_norm": 1.6368944644927979, "learning_rate": 0.0015758342922899884, "loss": 0.7564, "step": 73720 }, { "epoch": 21.211162255466053, "grad_norm": 0.9978936910629272, "learning_rate": 0.001575776754890679, "loss": 0.6552, "step": 73730 }, { "epoch": 21.21403912543153, "grad_norm": 1.3864896297454834, "learning_rate": 0.0015757192174913695, "loss": 0.6389, "step": 73740 }, { "epoch": 21.21691599539701, "grad_norm": 1.3086223602294922, "learning_rate": 0.0015756616800920598, "loss": 0.6368, "step": 73750 }, { "epoch": 21.219792865362486, "grad_norm": 0.9022039771080017, "learning_rate": 0.0015756041426927504, "loss": 0.588, "step": 73760 }, { "epoch": 21.222669735327962, "grad_norm": 0.9380514025688171, "learning_rate": 0.001575546605293441, "loss": 0.6676, "step": 73770 }, { "epoch": 21.22554660529344, "grad_norm": 1.7433247566223145, "learning_rate": 0.001575489067894131, "loss": 0.6714, "step": 73780 }, { "epoch": 21.228423475258918, "grad_norm": 1.676466703414917, "learning_rate": 0.0015754315304948216, "loss": 0.7349, "step": 73790 }, { "epoch": 21.231300345224398, "grad_norm": 0.7822158336639404, "learning_rate": 0.0015753739930955122, "loss": 0.7351, "step": 73800 }, { "epoch": 21.234177215189874, "grad_norm": 1.3150042295455933, "learning_rate": 0.0015753164556962026, "loss": 0.8088, "step": 73810 }, { "epoch": 21.23705408515535, "grad_norm": 1.1375123262405396, "learning_rate": 0.0015752589182968931, "loss": 0.6594, "step": 73820 }, { "epoch": 21.23993095512083, "grad_norm": 0.6115755438804626, "learning_rate": 0.0015752013808975833, "loss": 0.6824, "step": 73830 }, { "epoch": 21.242807825086306, "grad_norm": 1.6637622117996216, "learning_rate": 0.0015751438434982738, "loss": 0.6242, "step": 73840 }, { "epoch": 21.245684695051782, "grad_norm": 1.2989119291305542, "learning_rate": 0.0015750863060989644, "loss": 0.8258, "step": 73850 }, { "epoch": 21.248561565017262, "grad_norm": 1.0329668521881104, "learning_rate": 0.0015750287686996547, "loss": 0.6123, "step": 73860 }, { "epoch": 21.251438434982738, "grad_norm": 1.7471452951431274, "learning_rate": 0.0015749712313003453, "loss": 0.6677, "step": 73870 }, { "epoch": 21.254315304948218, "grad_norm": 1.257802128791809, "learning_rate": 0.0015749136939010359, "loss": 0.6782, "step": 73880 }, { "epoch": 21.257192174913694, "grad_norm": 1.0988026857376099, "learning_rate": 0.001574856156501726, "loss": 1.0041, "step": 73890 }, { "epoch": 21.26006904487917, "grad_norm": 1.804175853729248, "learning_rate": 0.0015747986191024166, "loss": 0.7255, "step": 73900 }, { "epoch": 21.26294591484465, "grad_norm": 1.6082427501678467, "learning_rate": 0.0015747410817031071, "loss": 0.7108, "step": 73910 }, { "epoch": 21.265822784810126, "grad_norm": 1.9445759057998657, "learning_rate": 0.0015746835443037975, "loss": 0.7176, "step": 73920 }, { "epoch": 21.268699654775602, "grad_norm": 0.753009021282196, "learning_rate": 0.001574626006904488, "loss": 0.5916, "step": 73930 }, { "epoch": 21.271576524741082, "grad_norm": 2.4116430282592773, "learning_rate": 0.0015745684695051784, "loss": 0.6928, "step": 73940 }, { "epoch": 21.27445339470656, "grad_norm": 1.5548145771026611, "learning_rate": 0.0015745109321058687, "loss": 0.61, "step": 73950 }, { "epoch": 21.277330264672038, "grad_norm": 0.9695996642112732, "learning_rate": 0.0015744533947065593, "loss": 0.7463, "step": 73960 }, { "epoch": 21.280207134637514, "grad_norm": 1.0583271980285645, "learning_rate": 0.0015743958573072496, "loss": 0.7004, "step": 73970 }, { "epoch": 21.28308400460299, "grad_norm": 0.9442747235298157, "learning_rate": 0.0015743383199079402, "loss": 0.7181, "step": 73980 }, { "epoch": 21.28596087456847, "grad_norm": 1.4554557800292969, "learning_rate": 0.0015742807825086308, "loss": 0.679, "step": 73990 }, { "epoch": 21.288837744533947, "grad_norm": 1.3830480575561523, "learning_rate": 0.001574223245109321, "loss": 0.7279, "step": 74000 }, { "epoch": 21.291714614499426, "grad_norm": 1.9252030849456787, "learning_rate": 0.0015741657077100115, "loss": 0.6424, "step": 74010 }, { "epoch": 21.294591484464902, "grad_norm": 0.9254446029663086, "learning_rate": 0.001574108170310702, "loss": 0.5409, "step": 74020 }, { "epoch": 21.29746835443038, "grad_norm": 0.9886388778686523, "learning_rate": 0.0015740506329113924, "loss": 0.7878, "step": 74030 }, { "epoch": 21.30034522439586, "grad_norm": 1.090336561203003, "learning_rate": 0.001573993095512083, "loss": 0.7534, "step": 74040 }, { "epoch": 21.303222094361335, "grad_norm": 1.6027287244796753, "learning_rate": 0.0015739355581127733, "loss": 0.8163, "step": 74050 }, { "epoch": 21.30609896432681, "grad_norm": 0.8260158896446228, "learning_rate": 0.0015738780207134638, "loss": 0.7405, "step": 74060 }, { "epoch": 21.30897583429229, "grad_norm": 0.6537699103355408, "learning_rate": 0.0015738204833141542, "loss": 0.6133, "step": 74070 }, { "epoch": 21.311852704257767, "grad_norm": 0.8883168697357178, "learning_rate": 0.0015737629459148445, "loss": 0.576, "step": 74080 }, { "epoch": 21.314729574223247, "grad_norm": 1.9021798372268677, "learning_rate": 0.001573705408515535, "loss": 0.8325, "step": 74090 }, { "epoch": 21.317606444188723, "grad_norm": 1.181341528892517, "learning_rate": 0.0015736478711162257, "loss": 0.6597, "step": 74100 }, { "epoch": 21.3204833141542, "grad_norm": 0.8295127749443054, "learning_rate": 0.001573590333716916, "loss": 0.6525, "step": 74110 }, { "epoch": 21.32336018411968, "grad_norm": 1.2747160196304321, "learning_rate": 0.0015735327963176066, "loss": 0.7001, "step": 74120 }, { "epoch": 21.326237054085155, "grad_norm": 0.9186509251594543, "learning_rate": 0.001573475258918297, "loss": 0.9686, "step": 74130 }, { "epoch": 21.32911392405063, "grad_norm": 1.0100271701812744, "learning_rate": 0.0015734177215189873, "loss": 0.6792, "step": 74140 }, { "epoch": 21.33199079401611, "grad_norm": 1.2676104307174683, "learning_rate": 0.0015733601841196778, "loss": 0.7281, "step": 74150 }, { "epoch": 21.334867663981587, "grad_norm": 1.078787922859192, "learning_rate": 0.0015733026467203684, "loss": 0.6283, "step": 74160 }, { "epoch": 21.337744533947067, "grad_norm": 1.9021416902542114, "learning_rate": 0.0015732451093210587, "loss": 0.738, "step": 74170 }, { "epoch": 21.340621403912543, "grad_norm": 0.892328679561615, "learning_rate": 0.0015731875719217493, "loss": 0.5955, "step": 74180 }, { "epoch": 21.34349827387802, "grad_norm": 1.506812572479248, "learning_rate": 0.0015731300345224394, "loss": 0.8446, "step": 74190 }, { "epoch": 21.3463751438435, "grad_norm": 1.3478385210037231, "learning_rate": 0.00157307249712313, "loss": 0.6675, "step": 74200 }, { "epoch": 21.349252013808975, "grad_norm": 1.4978808164596558, "learning_rate": 0.0015730149597238206, "loss": 0.6165, "step": 74210 }, { "epoch": 21.352128883774455, "grad_norm": 1.8150432109832764, "learning_rate": 0.001572957422324511, "loss": 0.7315, "step": 74220 }, { "epoch": 21.35500575373993, "grad_norm": 0.6378611326217651, "learning_rate": 0.0015728998849252015, "loss": 0.8389, "step": 74230 }, { "epoch": 21.357882623705407, "grad_norm": 1.0153883695602417, "learning_rate": 0.001572842347525892, "loss": 0.8856, "step": 74240 }, { "epoch": 21.360759493670887, "grad_norm": 1.7003936767578125, "learning_rate": 0.0015727848101265822, "loss": 0.7171, "step": 74250 }, { "epoch": 21.363636363636363, "grad_norm": 1.435477375984192, "learning_rate": 0.0015727272727272727, "loss": 0.7666, "step": 74260 }, { "epoch": 21.36651323360184, "grad_norm": 2.3838536739349365, "learning_rate": 0.0015726697353279633, "loss": 0.8446, "step": 74270 }, { "epoch": 21.36939010356732, "grad_norm": 1.1493433713912964, "learning_rate": 0.0015726121979286536, "loss": 0.8545, "step": 74280 }, { "epoch": 21.372266973532795, "grad_norm": 1.3860498666763306, "learning_rate": 0.0015725546605293442, "loss": 0.7538, "step": 74290 }, { "epoch": 21.375143843498275, "grad_norm": 2.3675854206085205, "learning_rate": 0.0015724971231300346, "loss": 0.6306, "step": 74300 }, { "epoch": 21.37802071346375, "grad_norm": 1.5328036546707153, "learning_rate": 0.001572439585730725, "loss": 0.9841, "step": 74310 }, { "epoch": 21.380897583429228, "grad_norm": 1.1623560190200806, "learning_rate": 0.0015723820483314155, "loss": 0.5663, "step": 74320 }, { "epoch": 21.383774453394707, "grad_norm": 1.119160532951355, "learning_rate": 0.0015723245109321058, "loss": 0.851, "step": 74330 }, { "epoch": 21.386651323360184, "grad_norm": 1.2786582708358765, "learning_rate": 0.0015722669735327964, "loss": 0.6928, "step": 74340 }, { "epoch": 21.389528193325663, "grad_norm": 2.158643960952759, "learning_rate": 0.001572209436133487, "loss": 0.6406, "step": 74350 }, { "epoch": 21.39240506329114, "grad_norm": 0.838300347328186, "learning_rate": 0.0015721518987341773, "loss": 0.5622, "step": 74360 }, { "epoch": 21.395281933256616, "grad_norm": 2.161200761795044, "learning_rate": 0.0015720943613348676, "loss": 0.7447, "step": 74370 }, { "epoch": 21.398158803222096, "grad_norm": 1.2016979455947876, "learning_rate": 0.0015720368239355582, "loss": 0.583, "step": 74380 }, { "epoch": 21.40103567318757, "grad_norm": 0.8044843077659607, "learning_rate": 0.0015719792865362485, "loss": 0.7604, "step": 74390 }, { "epoch": 21.403912543153048, "grad_norm": 0.9670838117599487, "learning_rate": 0.0015719217491369391, "loss": 0.7094, "step": 74400 }, { "epoch": 21.406789413118528, "grad_norm": 1.1991043090820312, "learning_rate": 0.0015718642117376295, "loss": 0.932, "step": 74410 }, { "epoch": 21.409666283084004, "grad_norm": 1.9696102142333984, "learning_rate": 0.00157180667433832, "loss": 0.5168, "step": 74420 }, { "epoch": 21.412543153049484, "grad_norm": 1.6387861967086792, "learning_rate": 0.0015717491369390104, "loss": 0.9793, "step": 74430 }, { "epoch": 21.41542002301496, "grad_norm": 0.9390121102333069, "learning_rate": 0.0015716915995397007, "loss": 0.5825, "step": 74440 }, { "epoch": 21.418296892980436, "grad_norm": 1.498087763786316, "learning_rate": 0.0015716340621403913, "loss": 0.737, "step": 74450 }, { "epoch": 21.421173762945916, "grad_norm": 1.8453737497329712, "learning_rate": 0.0015715765247410818, "loss": 0.6591, "step": 74460 }, { "epoch": 21.424050632911392, "grad_norm": 1.221845269203186, "learning_rate": 0.0015715189873417722, "loss": 0.73, "step": 74470 }, { "epoch": 21.42692750287687, "grad_norm": 1.684125542640686, "learning_rate": 0.0015714614499424628, "loss": 0.7737, "step": 74480 }, { "epoch": 21.429804372842348, "grad_norm": 1.00730562210083, "learning_rate": 0.001571403912543153, "loss": 0.643, "step": 74490 }, { "epoch": 21.432681242807824, "grad_norm": 1.441741704940796, "learning_rate": 0.0015713463751438434, "loss": 0.6896, "step": 74500 }, { "epoch": 21.435558112773304, "grad_norm": 1.787046194076538, "learning_rate": 0.001571288837744534, "loss": 0.5965, "step": 74510 }, { "epoch": 21.43843498273878, "grad_norm": 1.1891252994537354, "learning_rate": 0.0015712313003452244, "loss": 0.7222, "step": 74520 }, { "epoch": 21.441311852704256, "grad_norm": 1.2350976467132568, "learning_rate": 0.001571173762945915, "loss": 0.6795, "step": 74530 }, { "epoch": 21.444188722669736, "grad_norm": 1.1193592548370361, "learning_rate": 0.0015711162255466055, "loss": 0.7183, "step": 74540 }, { "epoch": 21.447065592635212, "grad_norm": 1.0893725156784058, "learning_rate": 0.0015710586881472956, "loss": 0.6765, "step": 74550 }, { "epoch": 21.449942462600692, "grad_norm": 1.0637600421905518, "learning_rate": 0.0015710011507479862, "loss": 0.7188, "step": 74560 }, { "epoch": 21.45281933256617, "grad_norm": 1.0492435693740845, "learning_rate": 0.0015709436133486767, "loss": 0.7539, "step": 74570 }, { "epoch": 21.455696202531644, "grad_norm": 1.2666394710540771, "learning_rate": 0.001570886075949367, "loss": 0.7828, "step": 74580 }, { "epoch": 21.458573072497124, "grad_norm": 0.8228079080581665, "learning_rate": 0.0015708285385500577, "loss": 0.6392, "step": 74590 }, { "epoch": 21.4614499424626, "grad_norm": 2.2882256507873535, "learning_rate": 0.0015707710011507482, "loss": 0.9775, "step": 74600 }, { "epoch": 21.464326812428077, "grad_norm": 1.9250789880752563, "learning_rate": 0.0015707134637514383, "loss": 0.7826, "step": 74610 }, { "epoch": 21.467203682393556, "grad_norm": 1.7107826471328735, "learning_rate": 0.001570655926352129, "loss": 0.7232, "step": 74620 }, { "epoch": 21.470080552359033, "grad_norm": 0.7974795699119568, "learning_rate": 0.0015705983889528193, "loss": 0.6095, "step": 74630 }, { "epoch": 21.472957422324512, "grad_norm": 0.9098928570747375, "learning_rate": 0.0015705408515535098, "loss": 0.6291, "step": 74640 }, { "epoch": 21.47583429228999, "grad_norm": 3.572514772415161, "learning_rate": 0.0015704833141542004, "loss": 0.5871, "step": 74650 }, { "epoch": 21.478711162255465, "grad_norm": 1.5035401582717896, "learning_rate": 0.0015704257767548905, "loss": 0.8401, "step": 74660 }, { "epoch": 21.481588032220944, "grad_norm": 1.6006509065628052, "learning_rate": 0.001570368239355581, "loss": 0.7013, "step": 74670 }, { "epoch": 21.48446490218642, "grad_norm": 1.7599163055419922, "learning_rate": 0.0015703107019562716, "loss": 0.7645, "step": 74680 }, { "epoch": 21.4873417721519, "grad_norm": 1.880845069885254, "learning_rate": 0.001570253164556962, "loss": 0.6984, "step": 74690 }, { "epoch": 21.490218642117377, "grad_norm": 1.1056064367294312, "learning_rate": 0.0015701956271576526, "loss": 0.6387, "step": 74700 }, { "epoch": 21.493095512082853, "grad_norm": 2.0243980884552, "learning_rate": 0.0015701380897583431, "loss": 0.7776, "step": 74710 }, { "epoch": 21.495972382048333, "grad_norm": 1.8319171667099, "learning_rate": 0.0015700805523590333, "loss": 0.6539, "step": 74720 }, { "epoch": 21.49884925201381, "grad_norm": 1.446397066116333, "learning_rate": 0.0015700230149597238, "loss": 0.6499, "step": 74730 }, { "epoch": 21.501726121979285, "grad_norm": 2.6152288913726807, "learning_rate": 0.0015699654775604144, "loss": 0.704, "step": 74740 }, { "epoch": 21.504602991944765, "grad_norm": 1.2885390520095825, "learning_rate": 0.0015699079401611047, "loss": 0.6391, "step": 74750 }, { "epoch": 21.50747986191024, "grad_norm": 1.971027135848999, "learning_rate": 0.0015698504027617953, "loss": 0.7071, "step": 74760 }, { "epoch": 21.51035673187572, "grad_norm": 1.7601299285888672, "learning_rate": 0.0015697928653624856, "loss": 0.8194, "step": 74770 }, { "epoch": 21.513233601841197, "grad_norm": 0.9127505421638489, "learning_rate": 0.001569735327963176, "loss": 0.6467, "step": 74780 }, { "epoch": 21.516110471806673, "grad_norm": 1.0263015031814575, "learning_rate": 0.0015696777905638665, "loss": 0.6641, "step": 74790 }, { "epoch": 21.518987341772153, "grad_norm": 1.2365504503250122, "learning_rate": 0.001569620253164557, "loss": 0.6677, "step": 74800 }, { "epoch": 21.52186421173763, "grad_norm": 1.2429641485214233, "learning_rate": 0.0015695627157652475, "loss": 0.8099, "step": 74810 }, { "epoch": 21.524741081703105, "grad_norm": 1.4033849239349365, "learning_rate": 0.001569505178365938, "loss": 0.7082, "step": 74820 }, { "epoch": 21.527617951668585, "grad_norm": 0.8129664063453674, "learning_rate": 0.0015694476409666284, "loss": 0.7368, "step": 74830 }, { "epoch": 21.53049482163406, "grad_norm": 1.2085564136505127, "learning_rate": 0.0015693901035673187, "loss": 0.7225, "step": 74840 }, { "epoch": 21.53337169159954, "grad_norm": 1.55295729637146, "learning_rate": 0.0015693325661680093, "loss": 0.7606, "step": 74850 }, { "epoch": 21.536248561565017, "grad_norm": 1.4074288606643677, "learning_rate": 0.0015692750287686996, "loss": 0.6835, "step": 74860 }, { "epoch": 21.539125431530493, "grad_norm": 1.0832794904708862, "learning_rate": 0.0015692174913693902, "loss": 0.8108, "step": 74870 }, { "epoch": 21.542002301495973, "grad_norm": 1.2764339447021484, "learning_rate": 0.0015691599539700805, "loss": 0.7131, "step": 74880 }, { "epoch": 21.54487917146145, "grad_norm": 1.356109857559204, "learning_rate": 0.001569102416570771, "loss": 0.6701, "step": 74890 }, { "epoch": 21.54775604142693, "grad_norm": 1.3957003355026245, "learning_rate": 0.0015690448791714615, "loss": 0.7061, "step": 74900 }, { "epoch": 21.550632911392405, "grad_norm": 1.91456937789917, "learning_rate": 0.0015689873417721518, "loss": 0.6654, "step": 74910 }, { "epoch": 21.55350978135788, "grad_norm": 1.3875858783721924, "learning_rate": 0.0015689298043728424, "loss": 0.5115, "step": 74920 }, { "epoch": 21.55638665132336, "grad_norm": 1.5615556240081787, "learning_rate": 0.001568872266973533, "loss": 0.6255, "step": 74930 }, { "epoch": 21.559263521288837, "grad_norm": 0.8092935681343079, "learning_rate": 0.0015688147295742233, "loss": 0.6501, "step": 74940 }, { "epoch": 21.562140391254314, "grad_norm": 1.0559370517730713, "learning_rate": 0.0015687571921749138, "loss": 0.7247, "step": 74950 }, { "epoch": 21.565017261219793, "grad_norm": 1.2381614446640015, "learning_rate": 0.0015686996547756042, "loss": 0.8391, "step": 74960 }, { "epoch": 21.56789413118527, "grad_norm": 0.9592120051383972, "learning_rate": 0.0015686421173762945, "loss": 0.5477, "step": 74970 }, { "epoch": 21.57077100115075, "grad_norm": 0.869783341884613, "learning_rate": 0.001568584579976985, "loss": 0.7707, "step": 74980 }, { "epoch": 21.573647871116226, "grad_norm": 1.1882983446121216, "learning_rate": 0.0015685270425776754, "loss": 0.8287, "step": 74990 }, { "epoch": 21.576524741081702, "grad_norm": 1.2887296676635742, "learning_rate": 0.001568469505178366, "loss": 0.7954, "step": 75000 }, { "epoch": 21.57940161104718, "grad_norm": 0.8469599485397339, "learning_rate": 0.0015684119677790566, "loss": 0.6627, "step": 75010 }, { "epoch": 21.582278481012658, "grad_norm": 1.272196650505066, "learning_rate": 0.0015683544303797467, "loss": 0.7105, "step": 75020 }, { "epoch": 21.585155350978134, "grad_norm": 1.259643793106079, "learning_rate": 0.0015682968929804373, "loss": 0.7151, "step": 75030 }, { "epoch": 21.588032220943614, "grad_norm": 1.709160566329956, "learning_rate": 0.0015682393555811278, "loss": 0.7746, "step": 75040 }, { "epoch": 21.59090909090909, "grad_norm": 1.3917500972747803, "learning_rate": 0.0015681818181818182, "loss": 0.7331, "step": 75050 }, { "epoch": 21.59378596087457, "grad_norm": 2.1376407146453857, "learning_rate": 0.0015681242807825087, "loss": 0.914, "step": 75060 }, { "epoch": 21.596662830840046, "grad_norm": 1.5305421352386475, "learning_rate": 0.0015680667433831993, "loss": 0.7189, "step": 75070 }, { "epoch": 21.599539700805522, "grad_norm": 0.8970622420310974, "learning_rate": 0.0015680092059838894, "loss": 0.7765, "step": 75080 }, { "epoch": 21.602416570771002, "grad_norm": 2.602857828140259, "learning_rate": 0.00156795166858458, "loss": 0.7701, "step": 75090 }, { "epoch": 21.605293440736478, "grad_norm": 0.9927343726158142, "learning_rate": 0.0015678941311852703, "loss": 0.6595, "step": 75100 }, { "epoch": 21.608170310701958, "grad_norm": 1.3502033948898315, "learning_rate": 0.001567836593785961, "loss": 0.7623, "step": 75110 }, { "epoch": 21.611047180667434, "grad_norm": 3.1775543689727783, "learning_rate": 0.0015677790563866515, "loss": 0.6187, "step": 75120 }, { "epoch": 21.61392405063291, "grad_norm": 1.3258482217788696, "learning_rate": 0.0015677215189873418, "loss": 0.5359, "step": 75130 }, { "epoch": 21.61680092059839, "grad_norm": 0.9618521928787231, "learning_rate": 0.0015676639815880322, "loss": 0.663, "step": 75140 }, { "epoch": 21.619677790563866, "grad_norm": 1.4811264276504517, "learning_rate": 0.0015676064441887227, "loss": 0.7605, "step": 75150 }, { "epoch": 21.622554660529342, "grad_norm": 0.8084525465965271, "learning_rate": 0.001567548906789413, "loss": 0.5684, "step": 75160 }, { "epoch": 21.625431530494822, "grad_norm": 0.9928146004676819, "learning_rate": 0.0015674913693901036, "loss": 0.5786, "step": 75170 }, { "epoch": 21.6283084004603, "grad_norm": 1.5682035684585571, "learning_rate": 0.0015674338319907942, "loss": 0.9392, "step": 75180 }, { "epoch": 21.631185270425778, "grad_norm": 1.015537142753601, "learning_rate": 0.0015673762945914846, "loss": 0.8588, "step": 75190 }, { "epoch": 21.634062140391254, "grad_norm": 1.2425180673599243, "learning_rate": 0.001567318757192175, "loss": 0.8293, "step": 75200 }, { "epoch": 21.63693901035673, "grad_norm": 0.7338656187057495, "learning_rate": 0.0015672612197928652, "loss": 0.611, "step": 75210 }, { "epoch": 21.63981588032221, "grad_norm": 1.3100427389144897, "learning_rate": 0.0015672036823935558, "loss": 0.8992, "step": 75220 }, { "epoch": 21.642692750287686, "grad_norm": 1.030339002609253, "learning_rate": 0.0015671461449942464, "loss": 0.6257, "step": 75230 }, { "epoch": 21.645569620253166, "grad_norm": 1.171303153038025, "learning_rate": 0.0015670886075949367, "loss": 0.7207, "step": 75240 }, { "epoch": 21.648446490218642, "grad_norm": 1.368037223815918, "learning_rate": 0.0015670310701956273, "loss": 0.6959, "step": 75250 }, { "epoch": 21.65132336018412, "grad_norm": 1.2367792129516602, "learning_rate": 0.0015669735327963176, "loss": 0.6846, "step": 75260 }, { "epoch": 21.6542002301496, "grad_norm": 0.9367554783821106, "learning_rate": 0.001566915995397008, "loss": 0.9929, "step": 75270 }, { "epoch": 21.657077100115075, "grad_norm": 1.8040242195129395, "learning_rate": 0.0015668584579976985, "loss": 0.8188, "step": 75280 }, { "epoch": 21.65995397008055, "grad_norm": 1.2363394498825073, "learning_rate": 0.001566800920598389, "loss": 0.5941, "step": 75290 }, { "epoch": 21.66283084004603, "grad_norm": 0.8271229863166809, "learning_rate": 0.0015667433831990795, "loss": 0.7265, "step": 75300 }, { "epoch": 21.665707710011507, "grad_norm": 1.833432912826538, "learning_rate": 0.00156668584579977, "loss": 0.9404, "step": 75310 }, { "epoch": 21.668584579976987, "grad_norm": 1.9746874570846558, "learning_rate": 0.0015666283084004601, "loss": 1.0267, "step": 75320 }, { "epoch": 21.671461449942463, "grad_norm": 0.7461695075035095, "learning_rate": 0.0015665707710011507, "loss": 0.6252, "step": 75330 }, { "epoch": 21.67433831990794, "grad_norm": 2.4253110885620117, "learning_rate": 0.0015665132336018413, "loss": 0.704, "step": 75340 }, { "epoch": 21.67721518987342, "grad_norm": 1.7379050254821777, "learning_rate": 0.0015664556962025316, "loss": 0.7775, "step": 75350 }, { "epoch": 21.680092059838895, "grad_norm": 1.080522060394287, "learning_rate": 0.0015663981588032222, "loss": 0.8326, "step": 75360 }, { "epoch": 21.682968929804375, "grad_norm": 0.9007828831672668, "learning_rate": 0.0015663406214039128, "loss": 0.5069, "step": 75370 }, { "epoch": 21.68584579976985, "grad_norm": 0.9336389303207397, "learning_rate": 0.0015662830840046029, "loss": 0.9102, "step": 75380 }, { "epoch": 21.688722669735327, "grad_norm": 1.0828791856765747, "learning_rate": 0.0015662255466052934, "loss": 0.7638, "step": 75390 }, { "epoch": 21.691599539700807, "grad_norm": 1.2154756784439087, "learning_rate": 0.001566168009205984, "loss": 0.7327, "step": 75400 }, { "epoch": 21.694476409666283, "grad_norm": 1.7849491834640503, "learning_rate": 0.0015661104718066744, "loss": 0.9851, "step": 75410 }, { "epoch": 21.69735327963176, "grad_norm": 1.2880293130874634, "learning_rate": 0.001566052934407365, "loss": 0.671, "step": 75420 }, { "epoch": 21.70023014959724, "grad_norm": 1.6373471021652222, "learning_rate": 0.0015659953970080555, "loss": 0.8253, "step": 75430 }, { "epoch": 21.703107019562715, "grad_norm": 1.2967793941497803, "learning_rate": 0.0015659378596087456, "loss": 0.6316, "step": 75440 }, { "epoch": 21.705983889528195, "grad_norm": 0.8352614045143127, "learning_rate": 0.0015658803222094362, "loss": 0.699, "step": 75450 }, { "epoch": 21.70886075949367, "grad_norm": 3.0227017402648926, "learning_rate": 0.0015658227848101265, "loss": 0.694, "step": 75460 }, { "epoch": 21.711737629459147, "grad_norm": 2.057349681854248, "learning_rate": 0.001565765247410817, "loss": 0.787, "step": 75470 }, { "epoch": 21.714614499424627, "grad_norm": 2.9543416500091553, "learning_rate": 0.0015657077100115077, "loss": 0.8145, "step": 75480 }, { "epoch": 21.717491369390103, "grad_norm": 1.1284090280532837, "learning_rate": 0.0015656501726121978, "loss": 0.7531, "step": 75490 }, { "epoch": 21.72036823935558, "grad_norm": 0.9859688878059387, "learning_rate": 0.0015655926352128883, "loss": 0.6012, "step": 75500 }, { "epoch": 21.72324510932106, "grad_norm": 0.932902455329895, "learning_rate": 0.001565535097813579, "loss": 0.6157, "step": 75510 }, { "epoch": 21.726121979286535, "grad_norm": 1.1872109174728394, "learning_rate": 0.0015654775604142693, "loss": 0.7276, "step": 75520 }, { "epoch": 21.728998849252015, "grad_norm": 1.254440188407898, "learning_rate": 0.0015654200230149598, "loss": 0.6523, "step": 75530 }, { "epoch": 21.73187571921749, "grad_norm": 1.2396076917648315, "learning_rate": 0.0015653624856156504, "loss": 0.5763, "step": 75540 }, { "epoch": 21.734752589182968, "grad_norm": 1.4461618661880493, "learning_rate": 0.0015653049482163405, "loss": 0.7203, "step": 75550 }, { "epoch": 21.737629459148447, "grad_norm": 0.9237532615661621, "learning_rate": 0.001565247410817031, "loss": 0.7146, "step": 75560 }, { "epoch": 21.740506329113924, "grad_norm": 1.432823896408081, "learning_rate": 0.0015651898734177214, "loss": 0.632, "step": 75570 }, { "epoch": 21.743383199079403, "grad_norm": 1.6964631080627441, "learning_rate": 0.001565132336018412, "loss": 0.8751, "step": 75580 }, { "epoch": 21.74626006904488, "grad_norm": 0.708401083946228, "learning_rate": 0.0015650747986191026, "loss": 0.7106, "step": 75590 }, { "epoch": 21.749136939010356, "grad_norm": 1.384973168373108, "learning_rate": 0.001565017261219793, "loss": 0.7873, "step": 75600 }, { "epoch": 21.752013808975835, "grad_norm": 1.289046287536621, "learning_rate": 0.0015649597238204832, "loss": 0.7937, "step": 75610 }, { "epoch": 21.75489067894131, "grad_norm": 1.4609317779541016, "learning_rate": 0.0015649021864211738, "loss": 0.9011, "step": 75620 }, { "epoch": 21.757767548906788, "grad_norm": 1.6202614307403564, "learning_rate": 0.0015648446490218642, "loss": 0.7517, "step": 75630 }, { "epoch": 21.760644418872268, "grad_norm": 1.1444207429885864, "learning_rate": 0.0015647871116225547, "loss": 0.6625, "step": 75640 }, { "epoch": 21.763521288837744, "grad_norm": 1.2191276550292969, "learning_rate": 0.0015647295742232453, "loss": 0.6268, "step": 75650 }, { "epoch": 21.766398158803224, "grad_norm": 1.059578537940979, "learning_rate": 0.0015646720368239356, "loss": 0.7434, "step": 75660 }, { "epoch": 21.7692750287687, "grad_norm": 2.063929557800293, "learning_rate": 0.001564614499424626, "loss": 0.7268, "step": 75670 }, { "epoch": 21.772151898734176, "grad_norm": 0.8179876804351807, "learning_rate": 0.0015645569620253163, "loss": 0.8585, "step": 75680 }, { "epoch": 21.775028768699656, "grad_norm": 1.5439174175262451, "learning_rate": 0.001564499424626007, "loss": 0.7174, "step": 75690 }, { "epoch": 21.777905638665132, "grad_norm": 1.1693512201309204, "learning_rate": 0.0015644418872266975, "loss": 0.5937, "step": 75700 }, { "epoch": 21.780782508630608, "grad_norm": 1.9308836460113525, "learning_rate": 0.0015643843498273878, "loss": 0.5713, "step": 75710 }, { "epoch": 21.783659378596088, "grad_norm": 0.7439401745796204, "learning_rate": 0.0015643268124280784, "loss": 0.6493, "step": 75720 }, { "epoch": 21.786536248561564, "grad_norm": 1.727778434753418, "learning_rate": 0.0015642692750287687, "loss": 0.8779, "step": 75730 }, { "epoch": 21.789413118527044, "grad_norm": 0.7428648471832275, "learning_rate": 0.001564211737629459, "loss": 0.8344, "step": 75740 }, { "epoch": 21.79228998849252, "grad_norm": 1.0003323554992676, "learning_rate": 0.0015641542002301496, "loss": 0.7924, "step": 75750 }, { "epoch": 21.795166858457996, "grad_norm": 0.749866783618927, "learning_rate": 0.0015640966628308402, "loss": 0.7091, "step": 75760 }, { "epoch": 21.798043728423476, "grad_norm": 1.0065739154815674, "learning_rate": 0.0015640391254315305, "loss": 0.8468, "step": 75770 }, { "epoch": 21.800920598388952, "grad_norm": 1.0409119129180908, "learning_rate": 0.001563981588032221, "loss": 0.6552, "step": 75780 }, { "epoch": 21.803797468354432, "grad_norm": 0.7087990641593933, "learning_rate": 0.0015639240506329112, "loss": 0.6323, "step": 75790 }, { "epoch": 21.806674338319908, "grad_norm": 1.391952633857727, "learning_rate": 0.0015638665132336018, "loss": 0.6479, "step": 75800 }, { "epoch": 21.809551208285384, "grad_norm": 1.4513577222824097, "learning_rate": 0.0015638089758342924, "loss": 0.839, "step": 75810 }, { "epoch": 21.812428078250864, "grad_norm": 0.7450350522994995, "learning_rate": 0.0015637514384349827, "loss": 0.7052, "step": 75820 }, { "epoch": 21.81530494821634, "grad_norm": 1.35800039768219, "learning_rate": 0.0015636939010356733, "loss": 0.7045, "step": 75830 }, { "epoch": 21.818181818181817, "grad_norm": 0.8139070868492126, "learning_rate": 0.0015636363636363638, "loss": 0.7352, "step": 75840 }, { "epoch": 21.821058688147296, "grad_norm": 1.4630146026611328, "learning_rate": 0.001563578826237054, "loss": 0.6504, "step": 75850 }, { "epoch": 21.823935558112773, "grad_norm": 2.082353353500366, "learning_rate": 0.0015635212888377445, "loss": 0.9298, "step": 75860 }, { "epoch": 21.826812428078252, "grad_norm": 1.308582067489624, "learning_rate": 0.001563463751438435, "loss": 0.6319, "step": 75870 }, { "epoch": 21.82968929804373, "grad_norm": 0.9516951441764832, "learning_rate": 0.0015634062140391254, "loss": 0.7055, "step": 75880 }, { "epoch": 21.832566168009205, "grad_norm": 2.572021961212158, "learning_rate": 0.001563348676639816, "loss": 0.913, "step": 75890 }, { "epoch": 21.835443037974684, "grad_norm": 2.0389904975891113, "learning_rate": 0.0015632911392405064, "loss": 0.8368, "step": 75900 }, { "epoch": 21.83831990794016, "grad_norm": 1.5522643327713013, "learning_rate": 0.0015632336018411967, "loss": 0.833, "step": 75910 }, { "epoch": 21.841196777905637, "grad_norm": 1.0327037572860718, "learning_rate": 0.0015631760644418873, "loss": 0.5718, "step": 75920 }, { "epoch": 21.844073647871117, "grad_norm": 1.3730623722076416, "learning_rate": 0.0015631185270425776, "loss": 0.8387, "step": 75930 }, { "epoch": 21.846950517836593, "grad_norm": 1.2306610345840454, "learning_rate": 0.0015630609896432682, "loss": 0.9854, "step": 75940 }, { "epoch": 21.849827387802073, "grad_norm": 1.0387526750564575, "learning_rate": 0.0015630034522439587, "loss": 0.7234, "step": 75950 }, { "epoch": 21.85270425776755, "grad_norm": 2.3378536701202393, "learning_rate": 0.001562945914844649, "loss": 0.8072, "step": 75960 }, { "epoch": 21.855581127733025, "grad_norm": 1.91055166721344, "learning_rate": 0.0015628883774453394, "loss": 0.6681, "step": 75970 }, { "epoch": 21.858457997698505, "grad_norm": 1.1711015701293945, "learning_rate": 0.00156283084004603, "loss": 0.6799, "step": 75980 }, { "epoch": 21.86133486766398, "grad_norm": 1.1196823120117188, "learning_rate": 0.0015627733026467203, "loss": 0.6666, "step": 75990 }, { "epoch": 21.86421173762946, "grad_norm": 1.7403942346572876, "learning_rate": 0.001562715765247411, "loss": 0.612, "step": 76000 }, { "epoch": 21.867088607594937, "grad_norm": 1.456777811050415, "learning_rate": 0.0015626582278481013, "loss": 0.7383, "step": 76010 }, { "epoch": 21.869965477560413, "grad_norm": 1.3105422258377075, "learning_rate": 0.0015626006904487918, "loss": 1.0365, "step": 76020 }, { "epoch": 21.872842347525893, "grad_norm": 0.5868180394172668, "learning_rate": 0.0015625431530494822, "loss": 0.5939, "step": 76030 }, { "epoch": 21.87571921749137, "grad_norm": 0.8187174797058105, "learning_rate": 0.0015624856156501725, "loss": 0.7163, "step": 76040 }, { "epoch": 21.878596087456845, "grad_norm": 1.7037020921707153, "learning_rate": 0.001562428078250863, "loss": 0.7026, "step": 76050 }, { "epoch": 21.881472957422325, "grad_norm": 2.1283156871795654, "learning_rate": 0.0015623705408515536, "loss": 0.5803, "step": 76060 }, { "epoch": 21.8843498273878, "grad_norm": 1.35041081905365, "learning_rate": 0.001562313003452244, "loss": 0.8136, "step": 76070 }, { "epoch": 21.88722669735328, "grad_norm": 2.475546360015869, "learning_rate": 0.0015622554660529345, "loss": 0.8917, "step": 76080 }, { "epoch": 21.890103567318757, "grad_norm": 1.2450841665267944, "learning_rate": 0.001562197928653625, "loss": 0.6901, "step": 76090 }, { "epoch": 21.892980437284233, "grad_norm": 1.026928186416626, "learning_rate": 0.0015621403912543152, "loss": 0.6935, "step": 76100 }, { "epoch": 21.895857307249713, "grad_norm": 1.0883454084396362, "learning_rate": 0.0015620828538550058, "loss": 0.833, "step": 76110 }, { "epoch": 21.89873417721519, "grad_norm": 1.7664010524749756, "learning_rate": 0.0015620253164556964, "loss": 0.7472, "step": 76120 }, { "epoch": 21.90161104718067, "grad_norm": 2.328197956085205, "learning_rate": 0.0015619677790563867, "loss": 0.9257, "step": 76130 }, { "epoch": 21.904487917146145, "grad_norm": 1.673464298248291, "learning_rate": 0.0015619102416570773, "loss": 0.76, "step": 76140 }, { "epoch": 21.90736478711162, "grad_norm": 1.4784084558486938, "learning_rate": 0.0015618527042577674, "loss": 0.6428, "step": 76150 }, { "epoch": 21.9102416570771, "grad_norm": 0.815558910369873, "learning_rate": 0.001561795166858458, "loss": 0.6139, "step": 76160 }, { "epoch": 21.913118527042577, "grad_norm": 1.5828808546066284, "learning_rate": 0.0015617376294591485, "loss": 0.7381, "step": 76170 }, { "epoch": 21.915995397008054, "grad_norm": 0.8761647343635559, "learning_rate": 0.0015616800920598389, "loss": 0.6715, "step": 76180 }, { "epoch": 21.918872266973533, "grad_norm": 1.7808668613433838, "learning_rate": 0.0015616225546605295, "loss": 0.9007, "step": 76190 }, { "epoch": 21.92174913693901, "grad_norm": 2.1308200359344482, "learning_rate": 0.00156156501726122, "loss": 0.8507, "step": 76200 }, { "epoch": 21.92462600690449, "grad_norm": 1.464855432510376, "learning_rate": 0.0015615074798619101, "loss": 0.9523, "step": 76210 }, { "epoch": 21.927502876869966, "grad_norm": 1.686346173286438, "learning_rate": 0.0015614499424626007, "loss": 0.7024, "step": 76220 }, { "epoch": 21.930379746835442, "grad_norm": 1.5645208358764648, "learning_rate": 0.0015613924050632913, "loss": 0.8004, "step": 76230 }, { "epoch": 21.93325661680092, "grad_norm": 2.0641028881073, "learning_rate": 0.0015613348676639816, "loss": 0.6784, "step": 76240 }, { "epoch": 21.936133486766398, "grad_norm": 0.9525911808013916, "learning_rate": 0.0015612773302646722, "loss": 0.8151, "step": 76250 }, { "epoch": 21.939010356731877, "grad_norm": 2.2533345222473145, "learning_rate": 0.0015612197928653623, "loss": 0.5992, "step": 76260 }, { "epoch": 21.941887226697354, "grad_norm": 0.8107246160507202, "learning_rate": 0.0015611622554660529, "loss": 0.7363, "step": 76270 }, { "epoch": 21.94476409666283, "grad_norm": 1.156683325767517, "learning_rate": 0.0015611047180667434, "loss": 0.7455, "step": 76280 }, { "epoch": 21.94764096662831, "grad_norm": 3.2113447189331055, "learning_rate": 0.0015610471806674338, "loss": 0.6868, "step": 76290 }, { "epoch": 21.950517836593786, "grad_norm": 1.1186323165893555, "learning_rate": 0.0015609896432681244, "loss": 0.652, "step": 76300 }, { "epoch": 21.953394706559262, "grad_norm": 1.5769400596618652, "learning_rate": 0.001560932105868815, "loss": 0.8462, "step": 76310 }, { "epoch": 21.956271576524742, "grad_norm": 1.0405664443969727, "learning_rate": 0.001560874568469505, "loss": 0.7087, "step": 76320 }, { "epoch": 21.959148446490218, "grad_norm": 1.364494800567627, "learning_rate": 0.0015608170310701956, "loss": 0.7418, "step": 76330 }, { "epoch": 21.962025316455698, "grad_norm": 1.6117584705352783, "learning_rate": 0.0015607594936708862, "loss": 0.8396, "step": 76340 }, { "epoch": 21.964902186421174, "grad_norm": 1.7464261054992676, "learning_rate": 0.0015607019562715765, "loss": 0.7132, "step": 76350 }, { "epoch": 21.96777905638665, "grad_norm": 1.312481164932251, "learning_rate": 0.001560644418872267, "loss": 0.7239, "step": 76360 }, { "epoch": 21.97065592635213, "grad_norm": 1.190272569656372, "learning_rate": 0.0015605868814729574, "loss": 0.7999, "step": 76370 }, { "epoch": 21.973532796317606, "grad_norm": 1.1774485111236572, "learning_rate": 0.0015605293440736478, "loss": 0.8742, "step": 76380 }, { "epoch": 21.976409666283082, "grad_norm": 0.9128437042236328, "learning_rate": 0.0015604718066743383, "loss": 0.5722, "step": 76390 }, { "epoch": 21.979286536248562, "grad_norm": 1.7165483236312866, "learning_rate": 0.0015604142692750287, "loss": 0.8914, "step": 76400 }, { "epoch": 21.98216340621404, "grad_norm": 0.9910653829574585, "learning_rate": 0.0015603567318757193, "loss": 0.6004, "step": 76410 }, { "epoch": 21.985040276179518, "grad_norm": 1.2259910106658936, "learning_rate": 0.0015602991944764098, "loss": 0.7791, "step": 76420 }, { "epoch": 21.987917146144994, "grad_norm": 2.725590467453003, "learning_rate": 0.0015602416570771002, "loss": 0.9272, "step": 76430 }, { "epoch": 21.99079401611047, "grad_norm": 2.0859792232513428, "learning_rate": 0.0015601841196777905, "loss": 0.6682, "step": 76440 }, { "epoch": 21.99367088607595, "grad_norm": 0.9838271737098694, "learning_rate": 0.001560126582278481, "loss": 0.7228, "step": 76450 }, { "epoch": 21.996547756041426, "grad_norm": 0.7848051190376282, "learning_rate": 0.0015600690448791714, "loss": 0.6682, "step": 76460 }, { "epoch": 21.999424626006906, "grad_norm": 0.70064377784729, "learning_rate": 0.001560011507479862, "loss": 0.6998, "step": 76470 }, { "epoch": 22.002301495972382, "grad_norm": 1.000533103942871, "learning_rate": 0.0015599539700805523, "loss": 0.4986, "step": 76480 }, { "epoch": 22.00517836593786, "grad_norm": 1.0129718780517578, "learning_rate": 0.001559896432681243, "loss": 0.565, "step": 76490 }, { "epoch": 22.00805523590334, "grad_norm": 1.7362889051437378, "learning_rate": 0.0015598388952819332, "loss": 0.607, "step": 76500 }, { "epoch": 22.010932105868815, "grad_norm": 2.4949986934661865, "learning_rate": 0.0015597813578826236, "loss": 0.8211, "step": 76510 }, { "epoch": 22.01380897583429, "grad_norm": 1.3199656009674072, "learning_rate": 0.0015597238204833142, "loss": 0.5392, "step": 76520 }, { "epoch": 22.01668584579977, "grad_norm": 2.830078125, "learning_rate": 0.0015596662830840047, "loss": 0.6583, "step": 76530 }, { "epoch": 22.019562715765247, "grad_norm": 0.8942529559135437, "learning_rate": 0.001559608745684695, "loss": 0.6696, "step": 76540 }, { "epoch": 22.022439585730726, "grad_norm": 1.4538472890853882, "learning_rate": 0.0015595512082853856, "loss": 0.733, "step": 76550 }, { "epoch": 22.025316455696203, "grad_norm": 0.933546245098114, "learning_rate": 0.001559493670886076, "loss": 0.6718, "step": 76560 }, { "epoch": 22.02819332566168, "grad_norm": 0.5962854027748108, "learning_rate": 0.0015594361334867663, "loss": 0.6011, "step": 76570 }, { "epoch": 22.03107019562716, "grad_norm": 0.963068962097168, "learning_rate": 0.0015593785960874569, "loss": 0.792, "step": 76580 }, { "epoch": 22.033947065592635, "grad_norm": 1.172493577003479, "learning_rate": 0.0015593210586881472, "loss": 0.6011, "step": 76590 }, { "epoch": 22.03682393555811, "grad_norm": 1.1343966722488403, "learning_rate": 0.0015592635212888378, "loss": 0.6523, "step": 76600 }, { "epoch": 22.03970080552359, "grad_norm": 1.900302529335022, "learning_rate": 0.0015592059838895284, "loss": 0.6274, "step": 76610 }, { "epoch": 22.042577675489067, "grad_norm": 1.040212869644165, "learning_rate": 0.0015591484464902185, "loss": 0.7892, "step": 76620 }, { "epoch": 22.045454545454547, "grad_norm": 1.4508110284805298, "learning_rate": 0.001559090909090909, "loss": 0.629, "step": 76630 }, { "epoch": 22.048331415420023, "grad_norm": 2.223888635635376, "learning_rate": 0.0015590333716915996, "loss": 0.6811, "step": 76640 }, { "epoch": 22.0512082853855, "grad_norm": 1.2126702070236206, "learning_rate": 0.00155897583429229, "loss": 0.7842, "step": 76650 }, { "epoch": 22.05408515535098, "grad_norm": 1.2713441848754883, "learning_rate": 0.0015589182968929805, "loss": 0.8436, "step": 76660 }, { "epoch": 22.056962025316455, "grad_norm": 1.2358375787734985, "learning_rate": 0.001558860759493671, "loss": 0.6839, "step": 76670 }, { "epoch": 22.059838895281935, "grad_norm": 1.554887056350708, "learning_rate": 0.0015588032220943612, "loss": 0.8951, "step": 76680 }, { "epoch": 22.06271576524741, "grad_norm": 1.135341763496399, "learning_rate": 0.0015587456846950518, "loss": 0.8036, "step": 76690 }, { "epoch": 22.065592635212887, "grad_norm": 1.3925224542617798, "learning_rate": 0.0015586881472957424, "loss": 0.6454, "step": 76700 }, { "epoch": 22.068469505178367, "grad_norm": 1.2612086534500122, "learning_rate": 0.0015586306098964327, "loss": 0.6986, "step": 76710 }, { "epoch": 22.071346375143843, "grad_norm": 1.5336003303527832, "learning_rate": 0.0015585730724971233, "loss": 0.8426, "step": 76720 }, { "epoch": 22.07422324510932, "grad_norm": 0.7186029553413391, "learning_rate": 0.0015585155350978136, "loss": 0.5992, "step": 76730 }, { "epoch": 22.0771001150748, "grad_norm": 1.468056082725525, "learning_rate": 0.001558457997698504, "loss": 0.6799, "step": 76740 }, { "epoch": 22.079976985040275, "grad_norm": 0.9665651321411133, "learning_rate": 0.0015584004602991945, "loss": 0.8573, "step": 76750 }, { "epoch": 22.082853855005755, "grad_norm": 2.784306764602661, "learning_rate": 0.0015583429228998849, "loss": 0.5939, "step": 76760 }, { "epoch": 22.08573072497123, "grad_norm": 1.0007392168045044, "learning_rate": 0.0015582853855005754, "loss": 0.6124, "step": 76770 }, { "epoch": 22.088607594936708, "grad_norm": 1.5602220296859741, "learning_rate": 0.001558227848101266, "loss": 0.6657, "step": 76780 }, { "epoch": 22.091484464902187, "grad_norm": 1.5026441812515259, "learning_rate": 0.0015581703107019563, "loss": 0.671, "step": 76790 }, { "epoch": 22.094361334867664, "grad_norm": 1.0236644744873047, "learning_rate": 0.0015581127733026467, "loss": 0.7455, "step": 76800 }, { "epoch": 22.097238204833143, "grad_norm": 1.2070242166519165, "learning_rate": 0.0015580552359033373, "loss": 0.7635, "step": 76810 }, { "epoch": 22.10011507479862, "grad_norm": 1.5731382369995117, "learning_rate": 0.0015579976985040276, "loss": 0.7465, "step": 76820 }, { "epoch": 22.102991944764096, "grad_norm": 1.345345377922058, "learning_rate": 0.0015579401611047182, "loss": 0.646, "step": 76830 }, { "epoch": 22.105868814729575, "grad_norm": 1.6308057308197021, "learning_rate": 0.0015578826237054085, "loss": 0.6132, "step": 76840 }, { "epoch": 22.10874568469505, "grad_norm": 1.8920716047286987, "learning_rate": 0.001557825086306099, "loss": 0.7637, "step": 76850 }, { "epoch": 22.111622554660528, "grad_norm": 1.4295339584350586, "learning_rate": 0.0015577675489067894, "loss": 0.9282, "step": 76860 }, { "epoch": 22.114499424626008, "grad_norm": 1.211828351020813, "learning_rate": 0.0015577100115074798, "loss": 0.8161, "step": 76870 }, { "epoch": 22.117376294591484, "grad_norm": 2.041689157485962, "learning_rate": 0.0015576524741081703, "loss": 0.867, "step": 76880 }, { "epoch": 22.120253164556964, "grad_norm": 1.275324821472168, "learning_rate": 0.001557594936708861, "loss": 0.8106, "step": 76890 }, { "epoch": 22.12313003452244, "grad_norm": 1.3987228870391846, "learning_rate": 0.0015575373993095513, "loss": 0.5932, "step": 76900 }, { "epoch": 22.126006904487916, "grad_norm": 1.1294355392456055, "learning_rate": 0.0015574798619102418, "loss": 0.9012, "step": 76910 }, { "epoch": 22.128883774453396, "grad_norm": 3.6840224266052246, "learning_rate": 0.0015574223245109322, "loss": 0.7196, "step": 76920 }, { "epoch": 22.131760644418872, "grad_norm": 0.842846155166626, "learning_rate": 0.0015573647871116225, "loss": 0.6214, "step": 76930 }, { "epoch": 22.134637514384348, "grad_norm": 1.5221741199493408, "learning_rate": 0.001557307249712313, "loss": 0.7366, "step": 76940 }, { "epoch": 22.137514384349828, "grad_norm": 0.6135041117668152, "learning_rate": 0.0015572497123130034, "loss": 0.6867, "step": 76950 }, { "epoch": 22.140391254315304, "grad_norm": 1.1617978811264038, "learning_rate": 0.001557192174913694, "loss": 0.7148, "step": 76960 }, { "epoch": 22.143268124280784, "grad_norm": 1.3695541620254517, "learning_rate": 0.0015571346375143845, "loss": 0.7217, "step": 76970 }, { "epoch": 22.14614499424626, "grad_norm": 1.231358289718628, "learning_rate": 0.0015570771001150747, "loss": 0.795, "step": 76980 }, { "epoch": 22.149021864211736, "grad_norm": 2.251551389694214, "learning_rate": 0.0015570195627157652, "loss": 0.6282, "step": 76990 }, { "epoch": 22.151898734177216, "grad_norm": 1.1683180332183838, "learning_rate": 0.0015569620253164558, "loss": 0.7404, "step": 77000 }, { "epoch": 22.154775604142692, "grad_norm": 1.476157307624817, "learning_rate": 0.0015569044879171462, "loss": 0.8234, "step": 77010 }, { "epoch": 22.157652474108172, "grad_norm": 1.1364213228225708, "learning_rate": 0.0015568469505178367, "loss": 0.6488, "step": 77020 }, { "epoch": 22.160529344073648, "grad_norm": 1.1369659900665283, "learning_rate": 0.0015567894131185273, "loss": 0.5659, "step": 77030 }, { "epoch": 22.163406214039124, "grad_norm": 1.94343101978302, "learning_rate": 0.0015567318757192174, "loss": 0.7851, "step": 77040 }, { "epoch": 22.166283084004604, "grad_norm": 1.8375831842422485, "learning_rate": 0.001556674338319908, "loss": 0.6827, "step": 77050 }, { "epoch": 22.16915995397008, "grad_norm": 0.7943367958068848, "learning_rate": 0.0015566168009205983, "loss": 0.7241, "step": 77060 }, { "epoch": 22.172036823935557, "grad_norm": 1.071685552597046, "learning_rate": 0.0015565592635212889, "loss": 0.7532, "step": 77070 }, { "epoch": 22.174913693901036, "grad_norm": 0.7545925974845886, "learning_rate": 0.0015565017261219794, "loss": 0.756, "step": 77080 }, { "epoch": 22.177790563866512, "grad_norm": 2.387040615081787, "learning_rate": 0.0015564441887226696, "loss": 0.7765, "step": 77090 }, { "epoch": 22.180667433831992, "grad_norm": 1.1438716650009155, "learning_rate": 0.0015563866513233601, "loss": 0.7492, "step": 77100 }, { "epoch": 22.18354430379747, "grad_norm": 1.0601520538330078, "learning_rate": 0.0015563291139240507, "loss": 0.6255, "step": 77110 }, { "epoch": 22.186421173762945, "grad_norm": 3.440744161605835, "learning_rate": 0.001556271576524741, "loss": 0.8615, "step": 77120 }, { "epoch": 22.189298043728424, "grad_norm": 1.2649255990982056, "learning_rate": 0.0015562140391254316, "loss": 0.8438, "step": 77130 }, { "epoch": 22.1921749136939, "grad_norm": 1.0462024211883545, "learning_rate": 0.0015561565017261222, "loss": 0.75, "step": 77140 }, { "epoch": 22.19505178365938, "grad_norm": 1.397282600402832, "learning_rate": 0.0015560989643268123, "loss": 0.8328, "step": 77150 }, { "epoch": 22.197928653624857, "grad_norm": 2.176582098007202, "learning_rate": 0.0015560414269275029, "loss": 0.6927, "step": 77160 }, { "epoch": 22.200805523590333, "grad_norm": 1.2011991739273071, "learning_rate": 0.0015559838895281932, "loss": 0.793, "step": 77170 }, { "epoch": 22.203682393555813, "grad_norm": 1.1916098594665527, "learning_rate": 0.0015559263521288838, "loss": 0.7515, "step": 77180 }, { "epoch": 22.20655926352129, "grad_norm": 0.5221656560897827, "learning_rate": 0.0015558688147295744, "loss": 0.8702, "step": 77190 }, { "epoch": 22.209436133486765, "grad_norm": 1.9110711812973022, "learning_rate": 0.0015558112773302647, "loss": 0.7113, "step": 77200 }, { "epoch": 22.212313003452245, "grad_norm": 1.3161227703094482, "learning_rate": 0.001555753739930955, "loss": 0.6649, "step": 77210 }, { "epoch": 22.21518987341772, "grad_norm": 1.1232930421829224, "learning_rate": 0.0015556962025316456, "loss": 0.722, "step": 77220 }, { "epoch": 22.2180667433832, "grad_norm": 0.8466033935546875, "learning_rate": 0.001555638665132336, "loss": 0.662, "step": 77230 }, { "epoch": 22.220943613348677, "grad_norm": 1.6049864292144775, "learning_rate": 0.0015555811277330265, "loss": 0.6319, "step": 77240 }, { "epoch": 22.223820483314153, "grad_norm": 1.0992166996002197, "learning_rate": 0.001555523590333717, "loss": 0.5817, "step": 77250 }, { "epoch": 22.226697353279633, "grad_norm": 0.699918270111084, "learning_rate": 0.0015554660529344074, "loss": 0.7749, "step": 77260 }, { "epoch": 22.22957422324511, "grad_norm": 1.0296367406845093, "learning_rate": 0.0015554085155350978, "loss": 0.5535, "step": 77270 }, { "epoch": 22.232451093210585, "grad_norm": 1.5726792812347412, "learning_rate": 0.0015553509781357881, "loss": 0.6983, "step": 77280 }, { "epoch": 22.235327963176065, "grad_norm": 1.0986804962158203, "learning_rate": 0.0015552934407364787, "loss": 0.565, "step": 77290 }, { "epoch": 22.23820483314154, "grad_norm": 0.8876523375511169, "learning_rate": 0.0015552359033371693, "loss": 0.6363, "step": 77300 }, { "epoch": 22.24108170310702, "grad_norm": 1.3145016431808472, "learning_rate": 0.0015551783659378596, "loss": 0.5983, "step": 77310 }, { "epoch": 22.243958573072497, "grad_norm": 0.9220057129859924, "learning_rate": 0.0015551208285385502, "loss": 0.7436, "step": 77320 }, { "epoch": 22.246835443037973, "grad_norm": 1.4911470413208008, "learning_rate": 0.0015550632911392405, "loss": 0.716, "step": 77330 }, { "epoch": 22.249712313003453, "grad_norm": 1.4134390354156494, "learning_rate": 0.0015550057537399309, "loss": 0.6676, "step": 77340 }, { "epoch": 22.25258918296893, "grad_norm": 1.0822051763534546, "learning_rate": 0.0015549482163406214, "loss": 0.7406, "step": 77350 }, { "epoch": 22.25546605293441, "grad_norm": 1.4104502201080322, "learning_rate": 0.001554890678941312, "loss": 0.6758, "step": 77360 }, { "epoch": 22.258342922899885, "grad_norm": 2.321183204650879, "learning_rate": 0.0015548331415420023, "loss": 0.857, "step": 77370 }, { "epoch": 22.26121979286536, "grad_norm": 1.9081357717514038, "learning_rate": 0.001554775604142693, "loss": 0.7682, "step": 77380 }, { "epoch": 22.26409666283084, "grad_norm": 1.3245126008987427, "learning_rate": 0.0015547180667433832, "loss": 0.8595, "step": 77390 }, { "epoch": 22.266973532796317, "grad_norm": 1.2875367403030396, "learning_rate": 0.0015546605293440736, "loss": 0.7581, "step": 77400 }, { "epoch": 22.269850402761794, "grad_norm": 1.603729248046875, "learning_rate": 0.0015546029919447642, "loss": 0.5645, "step": 77410 }, { "epoch": 22.272727272727273, "grad_norm": 1.6815993785858154, "learning_rate": 0.0015545454545454545, "loss": 0.6948, "step": 77420 }, { "epoch": 22.27560414269275, "grad_norm": 1.7129079103469849, "learning_rate": 0.001554487917146145, "loss": 0.6954, "step": 77430 }, { "epoch": 22.27848101265823, "grad_norm": 2.068422555923462, "learning_rate": 0.0015544303797468356, "loss": 0.7088, "step": 77440 }, { "epoch": 22.281357882623706, "grad_norm": 2.4328866004943848, "learning_rate": 0.0015543728423475258, "loss": 0.6391, "step": 77450 }, { "epoch": 22.28423475258918, "grad_norm": 1.196398138999939, "learning_rate": 0.0015543153049482163, "loss": 0.6882, "step": 77460 }, { "epoch": 22.28711162255466, "grad_norm": 1.2631580829620361, "learning_rate": 0.0015542577675489069, "loss": 0.7255, "step": 77470 }, { "epoch": 22.289988492520138, "grad_norm": 1.39301335811615, "learning_rate": 0.0015542002301495972, "loss": 0.702, "step": 77480 }, { "epoch": 22.292865362485614, "grad_norm": 1.3104989528656006, "learning_rate": 0.0015541426927502878, "loss": 0.8173, "step": 77490 }, { "epoch": 22.295742232451094, "grad_norm": 2.0953195095062256, "learning_rate": 0.0015540851553509784, "loss": 0.8155, "step": 77500 }, { "epoch": 22.29861910241657, "grad_norm": 1.7230448722839355, "learning_rate": 0.0015540276179516685, "loss": 0.7731, "step": 77510 }, { "epoch": 22.30149597238205, "grad_norm": 1.0399316549301147, "learning_rate": 0.001553970080552359, "loss": 0.7834, "step": 77520 }, { "epoch": 22.304372842347526, "grad_norm": 1.9067409038543701, "learning_rate": 0.0015539125431530494, "loss": 1.0004, "step": 77530 }, { "epoch": 22.307249712313002, "grad_norm": 1.147297739982605, "learning_rate": 0.00155385500575374, "loss": 0.6633, "step": 77540 }, { "epoch": 22.310126582278482, "grad_norm": 2.25766921043396, "learning_rate": 0.0015537974683544305, "loss": 0.858, "step": 77550 }, { "epoch": 22.313003452243958, "grad_norm": 1.1074726581573486, "learning_rate": 0.0015537399309551209, "loss": 0.7834, "step": 77560 }, { "epoch": 22.315880322209438, "grad_norm": 2.596228837966919, "learning_rate": 0.0015536823935558112, "loss": 0.717, "step": 77570 }, { "epoch": 22.318757192174914, "grad_norm": 1.125535249710083, "learning_rate": 0.0015536248561565018, "loss": 0.726, "step": 77580 }, { "epoch": 22.32163406214039, "grad_norm": 2.011291742324829, "learning_rate": 0.0015535673187571921, "loss": 0.5782, "step": 77590 }, { "epoch": 22.32451093210587, "grad_norm": 1.0944006443023682, "learning_rate": 0.0015535097813578827, "loss": 0.7814, "step": 77600 }, { "epoch": 22.327387802071346, "grad_norm": 1.5105725526809692, "learning_rate": 0.0015534522439585733, "loss": 0.7323, "step": 77610 }, { "epoch": 22.330264672036822, "grad_norm": 1.2274760007858276, "learning_rate": 0.0015533947065592636, "loss": 0.8328, "step": 77620 }, { "epoch": 22.333141542002302, "grad_norm": 1.3221538066864014, "learning_rate": 0.001553337169159954, "loss": 0.5726, "step": 77630 }, { "epoch": 22.33601841196778, "grad_norm": 1.759621500968933, "learning_rate": 0.0015532796317606443, "loss": 0.6373, "step": 77640 }, { "epoch": 22.338895281933258, "grad_norm": 1.3873306512832642, "learning_rate": 0.0015532220943613349, "loss": 0.7061, "step": 77650 }, { "epoch": 22.341772151898734, "grad_norm": 1.512017011642456, "learning_rate": 0.0015531645569620254, "loss": 0.6497, "step": 77660 }, { "epoch": 22.34464902186421, "grad_norm": 1.948335886001587, "learning_rate": 0.0015531070195627158, "loss": 0.8691, "step": 77670 }, { "epoch": 22.34752589182969, "grad_norm": 1.0361568927764893, "learning_rate": 0.0015530494821634063, "loss": 0.6967, "step": 77680 }, { "epoch": 22.350402761795166, "grad_norm": 1.416222333908081, "learning_rate": 0.0015529919447640967, "loss": 0.5949, "step": 77690 }, { "epoch": 22.353279631760646, "grad_norm": 0.6587647199630737, "learning_rate": 0.001552934407364787, "loss": 0.8409, "step": 77700 }, { "epoch": 22.356156501726122, "grad_norm": 0.933779776096344, "learning_rate": 0.0015528768699654776, "loss": 0.6851, "step": 77710 }, { "epoch": 22.3590333716916, "grad_norm": 0.91652911901474, "learning_rate": 0.0015528193325661682, "loss": 0.7031, "step": 77720 }, { "epoch": 22.36191024165708, "grad_norm": 0.8941243886947632, "learning_rate": 0.0015527617951668585, "loss": 0.6596, "step": 77730 }, { "epoch": 22.364787111622555, "grad_norm": 1.1665152311325073, "learning_rate": 0.001552704257767549, "loss": 0.6497, "step": 77740 }, { "epoch": 22.36766398158803, "grad_norm": 0.9275276064872742, "learning_rate": 0.0015526467203682392, "loss": 0.7095, "step": 77750 }, { "epoch": 22.37054085155351, "grad_norm": 1.410881519317627, "learning_rate": 0.0015525891829689298, "loss": 0.7994, "step": 77760 }, { "epoch": 22.373417721518987, "grad_norm": 0.9932916760444641, "learning_rate": 0.0015525316455696203, "loss": 0.6347, "step": 77770 }, { "epoch": 22.376294591484466, "grad_norm": 0.9947594404220581, "learning_rate": 0.0015524741081703107, "loss": 0.8096, "step": 77780 }, { "epoch": 22.379171461449943, "grad_norm": 3.098374128341675, "learning_rate": 0.0015524165707710012, "loss": 0.7509, "step": 77790 }, { "epoch": 22.38204833141542, "grad_norm": 1.6805930137634277, "learning_rate": 0.0015523590333716918, "loss": 0.5803, "step": 77800 }, { "epoch": 22.3849252013809, "grad_norm": 1.337093472480774, "learning_rate": 0.001552301495972382, "loss": 0.7114, "step": 77810 }, { "epoch": 22.387802071346375, "grad_norm": 0.6869626045227051, "learning_rate": 0.0015522439585730725, "loss": 0.6344, "step": 77820 }, { "epoch": 22.39067894131185, "grad_norm": 1.4309523105621338, "learning_rate": 0.001552186421173763, "loss": 0.5907, "step": 77830 }, { "epoch": 22.39355581127733, "grad_norm": 1.5918134450912476, "learning_rate": 0.0015521288837744534, "loss": 0.5575, "step": 77840 }, { "epoch": 22.396432681242807, "grad_norm": 2.1140575408935547, "learning_rate": 0.001552071346375144, "loss": 0.7855, "step": 77850 }, { "epoch": 22.399309551208287, "grad_norm": 1.0030266046524048, "learning_rate": 0.0015520138089758341, "loss": 0.8162, "step": 77860 }, { "epoch": 22.402186421173763, "grad_norm": 2.8045387268066406, "learning_rate": 0.0015519562715765247, "loss": 0.8328, "step": 77870 }, { "epoch": 22.40506329113924, "grad_norm": 1.0198036432266235, "learning_rate": 0.0015518987341772152, "loss": 0.8057, "step": 77880 }, { "epoch": 22.40794016110472, "grad_norm": 1.4324110746383667, "learning_rate": 0.0015518411967779056, "loss": 0.4971, "step": 77890 }, { "epoch": 22.410817031070195, "grad_norm": 2.0752880573272705, "learning_rate": 0.0015517836593785962, "loss": 0.689, "step": 77900 }, { "epoch": 22.413693901035675, "grad_norm": 2.2761549949645996, "learning_rate": 0.0015517261219792867, "loss": 0.7681, "step": 77910 }, { "epoch": 22.41657077100115, "grad_norm": 1.7141004800796509, "learning_rate": 0.0015516685845799768, "loss": 0.69, "step": 77920 }, { "epoch": 22.419447640966627, "grad_norm": 1.4218106269836426, "learning_rate": 0.0015516110471806674, "loss": 0.6181, "step": 77930 }, { "epoch": 22.422324510932107, "grad_norm": 0.8162327408790588, "learning_rate": 0.001551553509781358, "loss": 0.5835, "step": 77940 }, { "epoch": 22.425201380897583, "grad_norm": 2.13080096244812, "learning_rate": 0.0015514959723820483, "loss": 0.7455, "step": 77950 }, { "epoch": 22.42807825086306, "grad_norm": 0.9446977376937866, "learning_rate": 0.0015514384349827389, "loss": 0.69, "step": 77960 }, { "epoch": 22.43095512082854, "grad_norm": 1.0634182691574097, "learning_rate": 0.0015513808975834292, "loss": 0.7123, "step": 77970 }, { "epoch": 22.433831990794015, "grad_norm": 1.163977861404419, "learning_rate": 0.0015513233601841196, "loss": 0.6157, "step": 77980 }, { "epoch": 22.436708860759495, "grad_norm": 1.553653359413147, "learning_rate": 0.0015512658227848101, "loss": 0.7128, "step": 77990 }, { "epoch": 22.43958573072497, "grad_norm": 1.7804204225540161, "learning_rate": 0.0015512082853855005, "loss": 0.9371, "step": 78000 }, { "epoch": 22.442462600690448, "grad_norm": 0.8185824155807495, "learning_rate": 0.001551150747986191, "loss": 0.6425, "step": 78010 }, { "epoch": 22.445339470655927, "grad_norm": 1.3435999155044556, "learning_rate": 0.0015510932105868816, "loss": 0.7557, "step": 78020 }, { "epoch": 22.448216340621403, "grad_norm": 0.9829174280166626, "learning_rate": 0.001551035673187572, "loss": 0.66, "step": 78030 }, { "epoch": 22.451093210586883, "grad_norm": 1.2837727069854736, "learning_rate": 0.0015509781357882623, "loss": 0.6715, "step": 78040 }, { "epoch": 22.45397008055236, "grad_norm": 1.0172085762023926, "learning_rate": 0.0015509205983889529, "loss": 0.7361, "step": 78050 }, { "epoch": 22.456846950517836, "grad_norm": 1.4068464040756226, "learning_rate": 0.0015508630609896432, "loss": 0.7621, "step": 78060 }, { "epoch": 22.459723820483315, "grad_norm": 1.6094343662261963, "learning_rate": 0.0015508055235903338, "loss": 0.8289, "step": 78070 }, { "epoch": 22.46260069044879, "grad_norm": 2.8125219345092773, "learning_rate": 0.0015507479861910243, "loss": 0.9265, "step": 78080 }, { "epoch": 22.465477560414268, "grad_norm": 0.8077828884124756, "learning_rate": 0.0015506904487917147, "loss": 0.8237, "step": 78090 }, { "epoch": 22.468354430379748, "grad_norm": 1.1616326570510864, "learning_rate": 0.001550632911392405, "loss": 0.686, "step": 78100 }, { "epoch": 22.471231300345224, "grad_norm": 1.5916714668273926, "learning_rate": 0.0015505753739930954, "loss": 0.7453, "step": 78110 }, { "epoch": 22.474108170310704, "grad_norm": 2.2950093746185303, "learning_rate": 0.001550517836593786, "loss": 0.7432, "step": 78120 }, { "epoch": 22.47698504027618, "grad_norm": 2.2084712982177734, "learning_rate": 0.0015504602991944765, "loss": 0.6604, "step": 78130 }, { "epoch": 22.479861910241656, "grad_norm": 0.8573158383369446, "learning_rate": 0.0015504027617951669, "loss": 0.6103, "step": 78140 }, { "epoch": 22.482738780207136, "grad_norm": 1.422505497932434, "learning_rate": 0.0015503452243958574, "loss": 0.8424, "step": 78150 }, { "epoch": 22.485615650172612, "grad_norm": 1.0499367713928223, "learning_rate": 0.0015502876869965478, "loss": 0.6631, "step": 78160 }, { "epoch": 22.488492520138088, "grad_norm": 0.9534755945205688, "learning_rate": 0.0015502301495972381, "loss": 0.5751, "step": 78170 }, { "epoch": 22.491369390103568, "grad_norm": 1.7648391723632812, "learning_rate": 0.0015501726121979287, "loss": 0.6335, "step": 78180 }, { "epoch": 22.494246260069044, "grad_norm": 0.7176330089569092, "learning_rate": 0.0015501150747986193, "loss": 0.6191, "step": 78190 }, { "epoch": 22.497123130034524, "grad_norm": 1.6023601293563843, "learning_rate": 0.0015500575373993096, "loss": 0.711, "step": 78200 }, { "epoch": 22.5, "grad_norm": 1.560456395149231, "learning_rate": 0.0015500000000000002, "loss": 0.6964, "step": 78210 }, { "epoch": 22.502876869965476, "grad_norm": 1.1841154098510742, "learning_rate": 0.0015499424626006903, "loss": 0.6446, "step": 78220 }, { "epoch": 22.505753739930956, "grad_norm": 1.1760839223861694, "learning_rate": 0.0015498849252013809, "loss": 0.9326, "step": 78230 }, { "epoch": 22.508630609896432, "grad_norm": 1.252441167831421, "learning_rate": 0.0015498273878020714, "loss": 0.8151, "step": 78240 }, { "epoch": 22.511507479861912, "grad_norm": 1.3017767667770386, "learning_rate": 0.0015497698504027618, "loss": 0.8497, "step": 78250 }, { "epoch": 22.514384349827388, "grad_norm": 1.2166752815246582, "learning_rate": 0.0015497123130034523, "loss": 0.8594, "step": 78260 }, { "epoch": 22.517261219792864, "grad_norm": 4.0303544998168945, "learning_rate": 0.001549654775604143, "loss": 0.5909, "step": 78270 }, { "epoch": 22.520138089758344, "grad_norm": 1.3232324123382568, "learning_rate": 0.001549597238204833, "loss": 0.6156, "step": 78280 }, { "epoch": 22.52301495972382, "grad_norm": 1.825039029121399, "learning_rate": 0.0015495397008055236, "loss": 0.748, "step": 78290 }, { "epoch": 22.525891829689296, "grad_norm": 1.7223286628723145, "learning_rate": 0.0015494821634062142, "loss": 0.8664, "step": 78300 }, { "epoch": 22.528768699654776, "grad_norm": 1.435100793838501, "learning_rate": 0.0015494246260069045, "loss": 0.6739, "step": 78310 }, { "epoch": 22.531645569620252, "grad_norm": 2.377264976501465, "learning_rate": 0.001549367088607595, "loss": 0.7578, "step": 78320 }, { "epoch": 22.534522439585732, "grad_norm": 1.5274031162261963, "learning_rate": 0.0015493095512082854, "loss": 0.8412, "step": 78330 }, { "epoch": 22.53739930955121, "grad_norm": 2.162350654602051, "learning_rate": 0.0015492520138089758, "loss": 0.6898, "step": 78340 }, { "epoch": 22.540276179516685, "grad_norm": 1.0965279340744019, "learning_rate": 0.0015491944764096663, "loss": 0.6119, "step": 78350 }, { "epoch": 22.543153049482164, "grad_norm": 1.5379395484924316, "learning_rate": 0.0015491369390103567, "loss": 0.8607, "step": 78360 }, { "epoch": 22.54602991944764, "grad_norm": 1.5277785062789917, "learning_rate": 0.0015490794016110472, "loss": 0.6935, "step": 78370 }, { "epoch": 22.548906789413117, "grad_norm": 2.0377981662750244, "learning_rate": 0.0015490218642117378, "loss": 0.6999, "step": 78380 }, { "epoch": 22.551783659378597, "grad_norm": 1.7557706832885742, "learning_rate": 0.0015489643268124281, "loss": 0.6205, "step": 78390 }, { "epoch": 22.554660529344073, "grad_norm": 1.5421559810638428, "learning_rate": 0.0015489067894131185, "loss": 0.7086, "step": 78400 }, { "epoch": 22.557537399309552, "grad_norm": 1.661393165588379, "learning_rate": 0.001548849252013809, "loss": 0.7462, "step": 78410 }, { "epoch": 22.56041426927503, "grad_norm": 1.3516793251037598, "learning_rate": 0.0015487917146144994, "loss": 0.7161, "step": 78420 }, { "epoch": 22.563291139240505, "grad_norm": 1.44784677028656, "learning_rate": 0.00154873417721519, "loss": 0.5661, "step": 78430 }, { "epoch": 22.566168009205985, "grad_norm": 1.3980118036270142, "learning_rate": 0.0015486766398158803, "loss": 0.5887, "step": 78440 }, { "epoch": 22.56904487917146, "grad_norm": 0.8620604872703552, "learning_rate": 0.0015486191024165709, "loss": 0.8286, "step": 78450 }, { "epoch": 22.57192174913694, "grad_norm": 1.3440412282943726, "learning_rate": 0.0015485615650172612, "loss": 0.5785, "step": 78460 }, { "epoch": 22.574798619102417, "grad_norm": 1.4446560144424438, "learning_rate": 0.0015485040276179516, "loss": 0.8261, "step": 78470 }, { "epoch": 22.577675489067893, "grad_norm": 1.1524158716201782, "learning_rate": 0.0015484464902186421, "loss": 0.7802, "step": 78480 }, { "epoch": 22.580552359033373, "grad_norm": 1.2377492189407349, "learning_rate": 0.0015483889528193327, "loss": 0.7057, "step": 78490 }, { "epoch": 22.58342922899885, "grad_norm": 4.169656753540039, "learning_rate": 0.001548331415420023, "loss": 0.6722, "step": 78500 }, { "epoch": 22.586306098964325, "grad_norm": 0.9419495463371277, "learning_rate": 0.0015482738780207136, "loss": 0.7162, "step": 78510 }, { "epoch": 22.589182968929805, "grad_norm": 1.2682026624679565, "learning_rate": 0.001548216340621404, "loss": 0.7408, "step": 78520 }, { "epoch": 22.59205983889528, "grad_norm": 1.8444244861602783, "learning_rate": 0.0015481588032220943, "loss": 0.7829, "step": 78530 }, { "epoch": 22.59493670886076, "grad_norm": 1.6894261837005615, "learning_rate": 0.0015481012658227849, "loss": 0.7305, "step": 78540 }, { "epoch": 22.597813578826237, "grad_norm": 2.0683884620666504, "learning_rate": 0.0015480437284234752, "loss": 0.7, "step": 78550 }, { "epoch": 22.600690448791713, "grad_norm": 1.5863146781921387, "learning_rate": 0.0015479861910241658, "loss": 0.7545, "step": 78560 }, { "epoch": 22.603567318757193, "grad_norm": 1.551046371459961, "learning_rate": 0.0015479286536248563, "loss": 0.7102, "step": 78570 }, { "epoch": 22.60644418872267, "grad_norm": 0.9495275616645813, "learning_rate": 0.0015478711162255465, "loss": 0.705, "step": 78580 }, { "epoch": 22.60932105868815, "grad_norm": 2.2772979736328125, "learning_rate": 0.001547813578826237, "loss": 0.733, "step": 78590 }, { "epoch": 22.612197928653625, "grad_norm": 1.3252793550491333, "learning_rate": 0.0015477560414269276, "loss": 0.6221, "step": 78600 }, { "epoch": 22.6150747986191, "grad_norm": 1.6094026565551758, "learning_rate": 0.001547698504027618, "loss": 0.8595, "step": 78610 }, { "epoch": 22.61795166858458, "grad_norm": 1.3105685710906982, "learning_rate": 0.0015476409666283085, "loss": 0.7034, "step": 78620 }, { "epoch": 22.620828538550057, "grad_norm": 0.8795315027236938, "learning_rate": 0.001547583429228999, "loss": 0.7714, "step": 78630 }, { "epoch": 22.623705408515534, "grad_norm": 1.5634326934814453, "learning_rate": 0.0015475258918296892, "loss": 0.6743, "step": 78640 }, { "epoch": 22.626582278481013, "grad_norm": 1.8517704010009766, "learning_rate": 0.0015474683544303798, "loss": 0.6277, "step": 78650 }, { "epoch": 22.62945914844649, "grad_norm": 1.0145940780639648, "learning_rate": 0.0015474108170310701, "loss": 0.801, "step": 78660 }, { "epoch": 22.63233601841197, "grad_norm": 1.410730004310608, "learning_rate": 0.0015473532796317607, "loss": 0.6351, "step": 78670 }, { "epoch": 22.635212888377445, "grad_norm": 0.9539754986763, "learning_rate": 0.0015472957422324512, "loss": 0.729, "step": 78680 }, { "epoch": 22.63808975834292, "grad_norm": 1.211690902709961, "learning_rate": 0.0015472382048331414, "loss": 0.6192, "step": 78690 }, { "epoch": 22.6409666283084, "grad_norm": 1.7397844791412354, "learning_rate": 0.001547180667433832, "loss": 0.8109, "step": 78700 }, { "epoch": 22.643843498273878, "grad_norm": 1.5959950685501099, "learning_rate": 0.0015471231300345225, "loss": 0.8939, "step": 78710 }, { "epoch": 22.646720368239357, "grad_norm": 3.973269462585449, "learning_rate": 0.0015470655926352129, "loss": 0.905, "step": 78720 }, { "epoch": 22.649597238204834, "grad_norm": 1.1230109930038452, "learning_rate": 0.0015470080552359034, "loss": 0.7594, "step": 78730 }, { "epoch": 22.65247410817031, "grad_norm": 0.9126788973808289, "learning_rate": 0.001546950517836594, "loss": 0.6652, "step": 78740 }, { "epoch": 22.65535097813579, "grad_norm": 0.8982163667678833, "learning_rate": 0.0015468929804372841, "loss": 0.5495, "step": 78750 }, { "epoch": 22.658227848101266, "grad_norm": 1.0288734436035156, "learning_rate": 0.0015468354430379747, "loss": 0.6768, "step": 78760 }, { "epoch": 22.661104718066742, "grad_norm": 1.1071182489395142, "learning_rate": 0.0015467779056386652, "loss": 0.7508, "step": 78770 }, { "epoch": 22.66398158803222, "grad_norm": 2.2082486152648926, "learning_rate": 0.0015467203682393556, "loss": 0.6629, "step": 78780 }, { "epoch": 22.666858457997698, "grad_norm": 1.9764482975006104, "learning_rate": 0.0015466628308400461, "loss": 0.8626, "step": 78790 }, { "epoch": 22.669735327963178, "grad_norm": 1.138241171836853, "learning_rate": 0.0015466052934407365, "loss": 0.8144, "step": 78800 }, { "epoch": 22.672612197928654, "grad_norm": 1.619640588760376, "learning_rate": 0.0015465477560414268, "loss": 0.7675, "step": 78810 }, { "epoch": 22.67548906789413, "grad_norm": 1.2621002197265625, "learning_rate": 0.0015464902186421174, "loss": 0.6838, "step": 78820 }, { "epoch": 22.67836593785961, "grad_norm": 0.7570352554321289, "learning_rate": 0.0015464326812428078, "loss": 0.6159, "step": 78830 }, { "epoch": 22.681242807825086, "grad_norm": 3.2364094257354736, "learning_rate": 0.0015463751438434983, "loss": 0.8281, "step": 78840 }, { "epoch": 22.684119677790562, "grad_norm": 1.61232328414917, "learning_rate": 0.0015463176064441889, "loss": 0.7945, "step": 78850 }, { "epoch": 22.686996547756042, "grad_norm": 1.438568353652954, "learning_rate": 0.0015462600690448792, "loss": 0.6427, "step": 78860 }, { "epoch": 22.689873417721518, "grad_norm": 1.1805922985076904, "learning_rate": 0.0015462025316455696, "loss": 0.6391, "step": 78870 }, { "epoch": 22.692750287686998, "grad_norm": 0.9472993612289429, "learning_rate": 0.0015461449942462601, "loss": 0.6253, "step": 78880 }, { "epoch": 22.695627157652474, "grad_norm": 1.0175050497055054, "learning_rate": 0.0015460874568469505, "loss": 0.7402, "step": 78890 }, { "epoch": 22.69850402761795, "grad_norm": 0.929078221321106, "learning_rate": 0.001546029919447641, "loss": 0.7737, "step": 78900 }, { "epoch": 22.70138089758343, "grad_norm": 1.628734827041626, "learning_rate": 0.0015459723820483314, "loss": 0.6213, "step": 78910 }, { "epoch": 22.704257767548906, "grad_norm": 1.093766212463379, "learning_rate": 0.001545914844649022, "loss": 0.764, "step": 78920 }, { "epoch": 22.707134637514386, "grad_norm": 1.2010620832443237, "learning_rate": 0.0015458573072497123, "loss": 0.6476, "step": 78930 }, { "epoch": 22.710011507479862, "grad_norm": 1.2595601081848145, "learning_rate": 0.0015457997698504027, "loss": 0.7546, "step": 78940 }, { "epoch": 22.71288837744534, "grad_norm": 1.9487361907958984, "learning_rate": 0.0015457422324510932, "loss": 0.696, "step": 78950 }, { "epoch": 22.71576524741082, "grad_norm": 1.076504111289978, "learning_rate": 0.0015456846950517838, "loss": 0.6411, "step": 78960 }, { "epoch": 22.718642117376294, "grad_norm": 1.1198012828826904, "learning_rate": 0.0015456271576524741, "loss": 0.7458, "step": 78970 }, { "epoch": 22.72151898734177, "grad_norm": 1.7070125341415405, "learning_rate": 0.0015455696202531647, "loss": 0.7694, "step": 78980 }, { "epoch": 22.72439585730725, "grad_norm": 0.6448691487312317, "learning_rate": 0.001545512082853855, "loss": 0.8244, "step": 78990 }, { "epoch": 22.727272727272727, "grad_norm": 1.4147590398788452, "learning_rate": 0.0015454545454545454, "loss": 0.7969, "step": 79000 }, { "epoch": 22.730149597238206, "grad_norm": 0.920799195766449, "learning_rate": 0.001545397008055236, "loss": 0.5935, "step": 79010 }, { "epoch": 22.733026467203683, "grad_norm": 2.7676103115081787, "learning_rate": 0.0015453394706559263, "loss": 0.8104, "step": 79020 }, { "epoch": 22.73590333716916, "grad_norm": 0.9141708612442017, "learning_rate": 0.0015452819332566169, "loss": 0.6608, "step": 79030 }, { "epoch": 22.73878020713464, "grad_norm": 1.2930469512939453, "learning_rate": 0.0015452243958573074, "loss": 0.6518, "step": 79040 }, { "epoch": 22.741657077100115, "grad_norm": 2.0557310581207275, "learning_rate": 0.0015451668584579976, "loss": 0.6689, "step": 79050 }, { "epoch": 22.74453394706559, "grad_norm": 0.705346941947937, "learning_rate": 0.0015451093210586881, "loss": 0.6867, "step": 79060 }, { "epoch": 22.74741081703107, "grad_norm": 0.5241779088973999, "learning_rate": 0.0015450517836593787, "loss": 0.6665, "step": 79070 }, { "epoch": 22.750287686996547, "grad_norm": 1.376611590385437, "learning_rate": 0.001544994246260069, "loss": 0.6342, "step": 79080 }, { "epoch": 22.753164556962027, "grad_norm": 1.5529561042785645, "learning_rate": 0.0015449367088607596, "loss": 0.7954, "step": 79090 }, { "epoch": 22.756041426927503, "grad_norm": 1.465307354927063, "learning_rate": 0.0015448791714614502, "loss": 0.6786, "step": 79100 }, { "epoch": 22.75891829689298, "grad_norm": 1.207040786743164, "learning_rate": 0.0015448216340621403, "loss": 0.6189, "step": 79110 }, { "epoch": 22.76179516685846, "grad_norm": 1.5362430810928345, "learning_rate": 0.0015447640966628309, "loss": 0.7687, "step": 79120 }, { "epoch": 22.764672036823935, "grad_norm": 1.392293930053711, "learning_rate": 0.0015447065592635212, "loss": 0.7037, "step": 79130 }, { "epoch": 22.767548906789415, "grad_norm": 1.0641602277755737, "learning_rate": 0.0015446490218642118, "loss": 0.7046, "step": 79140 }, { "epoch": 22.77042577675489, "grad_norm": 1.2531520128250122, "learning_rate": 0.0015445914844649023, "loss": 0.7653, "step": 79150 }, { "epoch": 22.773302646720367, "grad_norm": 1.224913477897644, "learning_rate": 0.0015445339470655927, "loss": 0.7624, "step": 79160 }, { "epoch": 22.776179516685847, "grad_norm": 0.9214589595794678, "learning_rate": 0.001544476409666283, "loss": 0.802, "step": 79170 }, { "epoch": 22.779056386651323, "grad_norm": 1.0021783113479614, "learning_rate": 0.0015444188722669736, "loss": 0.7696, "step": 79180 }, { "epoch": 22.7819332566168, "grad_norm": 1.1644723415374756, "learning_rate": 0.001544361334867664, "loss": 0.7187, "step": 79190 }, { "epoch": 22.78481012658228, "grad_norm": 0.9535892009735107, "learning_rate": 0.0015443037974683545, "loss": 0.7632, "step": 79200 }, { "epoch": 22.787686996547755, "grad_norm": 1.162147879600525, "learning_rate": 0.001544246260069045, "loss": 0.679, "step": 79210 }, { "epoch": 22.790563866513235, "grad_norm": 1.0881069898605347, "learning_rate": 0.0015441887226697354, "loss": 0.7651, "step": 79220 }, { "epoch": 22.79344073647871, "grad_norm": 1.8759063482284546, "learning_rate": 0.0015441311852704258, "loss": 0.8153, "step": 79230 }, { "epoch": 22.796317606444187, "grad_norm": 1.2042491436004639, "learning_rate": 0.001544073647871116, "loss": 0.66, "step": 79240 }, { "epoch": 22.799194476409667, "grad_norm": 0.9802552461624146, "learning_rate": 0.0015440161104718067, "loss": 0.6517, "step": 79250 }, { "epoch": 22.802071346375143, "grad_norm": 1.3281992673873901, "learning_rate": 0.0015439585730724972, "loss": 0.6585, "step": 79260 }, { "epoch": 22.80494821634062, "grad_norm": 1.2160396575927734, "learning_rate": 0.0015439010356731876, "loss": 0.5887, "step": 79270 }, { "epoch": 22.8078250863061, "grad_norm": 1.841870665550232, "learning_rate": 0.0015438434982738781, "loss": 0.6103, "step": 79280 }, { "epoch": 22.810701956271576, "grad_norm": 1.6598323583602905, "learning_rate": 0.0015437859608745685, "loss": 0.8536, "step": 79290 }, { "epoch": 22.813578826237055, "grad_norm": 1.6144566535949707, "learning_rate": 0.0015437284234752588, "loss": 0.7359, "step": 79300 }, { "epoch": 22.81645569620253, "grad_norm": 1.0260487794876099, "learning_rate": 0.0015436708860759494, "loss": 0.6274, "step": 79310 }, { "epoch": 22.819332566168008, "grad_norm": 3.6414923667907715, "learning_rate": 0.00154361334867664, "loss": 0.7525, "step": 79320 }, { "epoch": 22.822209436133488, "grad_norm": 1.8767032623291016, "learning_rate": 0.0015435558112773303, "loss": 0.7126, "step": 79330 }, { "epoch": 22.825086306098964, "grad_norm": 1.609101414680481, "learning_rate": 0.0015434982738780209, "loss": 0.623, "step": 79340 }, { "epoch": 22.827963176064443, "grad_norm": 0.8807953596115112, "learning_rate": 0.0015434407364787112, "loss": 0.7117, "step": 79350 }, { "epoch": 22.83084004602992, "grad_norm": 0.9755801558494568, "learning_rate": 0.0015433831990794016, "loss": 0.6196, "step": 79360 }, { "epoch": 22.833716915995396, "grad_norm": 1.8092268705368042, "learning_rate": 0.0015433256616800921, "loss": 0.7493, "step": 79370 }, { "epoch": 22.836593785960876, "grad_norm": 1.212978482246399, "learning_rate": 0.0015432681242807825, "loss": 0.7701, "step": 79380 }, { "epoch": 22.839470655926352, "grad_norm": 1.575276494026184, "learning_rate": 0.001543210586881473, "loss": 0.7615, "step": 79390 }, { "epoch": 22.842347525891828, "grad_norm": 1.2353928089141846, "learning_rate": 0.0015431530494821636, "loss": 0.7306, "step": 79400 }, { "epoch": 22.845224395857308, "grad_norm": 2.34096097946167, "learning_rate": 0.0015430955120828537, "loss": 0.8586, "step": 79410 }, { "epoch": 22.848101265822784, "grad_norm": 1.7258135080337524, "learning_rate": 0.0015430379746835443, "loss": 0.6342, "step": 79420 }, { "epoch": 22.850978135788264, "grad_norm": 1.7365007400512695, "learning_rate": 0.0015429804372842349, "loss": 0.7433, "step": 79430 }, { "epoch": 22.85385500575374, "grad_norm": 2.459602117538452, "learning_rate": 0.0015429228998849252, "loss": 1.032, "step": 79440 }, { "epoch": 22.856731875719216, "grad_norm": 1.4493708610534668, "learning_rate": 0.0015428653624856158, "loss": 0.7714, "step": 79450 }, { "epoch": 22.859608745684696, "grad_norm": 1.3994622230529785, "learning_rate": 0.0015428078250863063, "loss": 0.7799, "step": 79460 }, { "epoch": 22.862485615650172, "grad_norm": 0.9458714723587036, "learning_rate": 0.0015427502876869965, "loss": 0.7179, "step": 79470 }, { "epoch": 22.865362485615652, "grad_norm": 1.1495229005813599, "learning_rate": 0.001542692750287687, "loss": 0.7108, "step": 79480 }, { "epoch": 22.868239355581128, "grad_norm": 1.5540920495986938, "learning_rate": 0.0015426352128883774, "loss": 0.6654, "step": 79490 }, { "epoch": 22.871116225546604, "grad_norm": 1.729505181312561, "learning_rate": 0.001542577675489068, "loss": 0.6499, "step": 79500 }, { "epoch": 22.873993095512084, "grad_norm": 1.8486621379852295, "learning_rate": 0.0015425201380897585, "loss": 0.6824, "step": 79510 }, { "epoch": 22.87686996547756, "grad_norm": 2.2027587890625, "learning_rate": 0.0015424626006904486, "loss": 0.6614, "step": 79520 }, { "epoch": 22.879746835443036, "grad_norm": 1.0428177118301392, "learning_rate": 0.0015424050632911392, "loss": 0.6812, "step": 79530 }, { "epoch": 22.882623705408516, "grad_norm": 0.942384660243988, "learning_rate": 0.0015423475258918298, "loss": 0.6671, "step": 79540 }, { "epoch": 22.885500575373992, "grad_norm": 1.6222354173660278, "learning_rate": 0.0015422899884925201, "loss": 0.7231, "step": 79550 }, { "epoch": 22.888377445339472, "grad_norm": 0.9665540456771851, "learning_rate": 0.0015422324510932107, "loss": 0.6676, "step": 79560 }, { "epoch": 22.89125431530495, "grad_norm": 1.327761173248291, "learning_rate": 0.0015421749136939012, "loss": 0.7381, "step": 79570 }, { "epoch": 22.894131185270425, "grad_norm": 1.1037335395812988, "learning_rate": 0.0015421173762945914, "loss": 0.7096, "step": 79580 }, { "epoch": 22.897008055235904, "grad_norm": 0.70468670129776, "learning_rate": 0.001542059838895282, "loss": 0.623, "step": 79590 }, { "epoch": 22.89988492520138, "grad_norm": 1.2260606288909912, "learning_rate": 0.0015420023014959723, "loss": 0.7351, "step": 79600 }, { "epoch": 22.90276179516686, "grad_norm": 1.4985949993133545, "learning_rate": 0.0015419447640966628, "loss": 0.7402, "step": 79610 }, { "epoch": 22.905638665132336, "grad_norm": 1.5177024602890015, "learning_rate": 0.0015418872266973534, "loss": 0.7814, "step": 79620 }, { "epoch": 22.908515535097813, "grad_norm": 1.5169589519500732, "learning_rate": 0.0015418296892980438, "loss": 0.6691, "step": 79630 }, { "epoch": 22.911392405063292, "grad_norm": 1.0153645277023315, "learning_rate": 0.001541772151898734, "loss": 0.7268, "step": 79640 }, { "epoch": 22.91426927502877, "grad_norm": 1.4591845273971558, "learning_rate": 0.0015417146144994247, "loss": 0.7919, "step": 79650 }, { "epoch": 22.917146144994245, "grad_norm": 0.8842734098434448, "learning_rate": 0.001541657077100115, "loss": 0.7463, "step": 79660 }, { "epoch": 22.920023014959725, "grad_norm": 1.1087017059326172, "learning_rate": 0.0015415995397008056, "loss": 0.7097, "step": 79670 }, { "epoch": 22.9228998849252, "grad_norm": 1.5794826745986938, "learning_rate": 0.0015415420023014961, "loss": 0.7123, "step": 79680 }, { "epoch": 22.92577675489068, "grad_norm": 1.2367334365844727, "learning_rate": 0.0015414844649021865, "loss": 0.7796, "step": 79690 }, { "epoch": 22.928653624856157, "grad_norm": 1.1662994623184204, "learning_rate": 0.0015414269275028768, "loss": 0.7782, "step": 79700 }, { "epoch": 22.931530494821633, "grad_norm": 1.538007378578186, "learning_rate": 0.0015413693901035672, "loss": 0.7678, "step": 79710 }, { "epoch": 22.934407364787113, "grad_norm": 1.6023340225219727, "learning_rate": 0.0015413118527042578, "loss": 0.6277, "step": 79720 }, { "epoch": 22.93728423475259, "grad_norm": 1.117121934890747, "learning_rate": 0.0015412543153049483, "loss": 0.5269, "step": 79730 }, { "epoch": 22.940161104718065, "grad_norm": 1.3095837831497192, "learning_rate": 0.0015411967779056387, "loss": 0.7056, "step": 79740 }, { "epoch": 22.943037974683545, "grad_norm": 1.2545197010040283, "learning_rate": 0.0015411392405063292, "loss": 0.8383, "step": 79750 }, { "epoch": 22.94591484464902, "grad_norm": 1.0094208717346191, "learning_rate": 0.0015410817031070196, "loss": 0.5579, "step": 79760 }, { "epoch": 22.9487917146145, "grad_norm": 1.298701286315918, "learning_rate": 0.00154102416570771, "loss": 0.8202, "step": 79770 }, { "epoch": 22.951668584579977, "grad_norm": 1.5597593784332275, "learning_rate": 0.0015409666283084005, "loss": 0.8558, "step": 79780 }, { "epoch": 22.954545454545453, "grad_norm": 0.9970853328704834, "learning_rate": 0.001540909090909091, "loss": 0.6901, "step": 79790 }, { "epoch": 22.957422324510933, "grad_norm": 0.6282816529273987, "learning_rate": 0.0015408515535097814, "loss": 0.7863, "step": 79800 }, { "epoch": 22.96029919447641, "grad_norm": 2.0633342266082764, "learning_rate": 0.001540794016110472, "loss": 0.6525, "step": 79810 }, { "epoch": 22.96317606444189, "grad_norm": 1.551346778869629, "learning_rate": 0.001540736478711162, "loss": 0.7362, "step": 79820 }, { "epoch": 22.966052934407365, "grad_norm": 0.7832872867584229, "learning_rate": 0.0015406789413118527, "loss": 0.8009, "step": 79830 }, { "epoch": 22.96892980437284, "grad_norm": 1.6488898992538452, "learning_rate": 0.0015406214039125432, "loss": 0.6686, "step": 79840 }, { "epoch": 22.97180667433832, "grad_norm": 1.373261570930481, "learning_rate": 0.0015405638665132336, "loss": 0.72, "step": 79850 }, { "epoch": 22.974683544303797, "grad_norm": 1.566951870918274, "learning_rate": 0.0015405063291139241, "loss": 0.6577, "step": 79860 }, { "epoch": 22.977560414269274, "grad_norm": 1.1712743043899536, "learning_rate": 0.0015404487917146147, "loss": 0.719, "step": 79870 }, { "epoch": 22.980437284234753, "grad_norm": 1.3955215215682983, "learning_rate": 0.0015403912543153048, "loss": 0.6912, "step": 79880 }, { "epoch": 22.98331415420023, "grad_norm": 1.3577214479446411, "learning_rate": 0.0015403337169159954, "loss": 0.765, "step": 79890 }, { "epoch": 22.98619102416571, "grad_norm": 1.5855661630630493, "learning_rate": 0.001540276179516686, "loss": 0.7889, "step": 79900 }, { "epoch": 22.989067894131185, "grad_norm": 2.208838701248169, "learning_rate": 0.0015402186421173763, "loss": 0.804, "step": 79910 }, { "epoch": 22.99194476409666, "grad_norm": 1.395614743232727, "learning_rate": 0.0015401611047180669, "loss": 0.6652, "step": 79920 }, { "epoch": 22.99482163406214, "grad_norm": 1.075221300125122, "learning_rate": 0.0015401035673187572, "loss": 0.7731, "step": 79930 }, { "epoch": 22.997698504027618, "grad_norm": 1.1221412420272827, "learning_rate": 0.0015400460299194476, "loss": 0.52, "step": 79940 }, { "epoch": 23.000575373993094, "grad_norm": 1.0569275617599487, "learning_rate": 0.0015399884925201381, "loss": 0.6528, "step": 79950 }, { "epoch": 23.003452243958574, "grad_norm": 1.2325544357299805, "learning_rate": 0.0015399309551208285, "loss": 0.6583, "step": 79960 }, { "epoch": 23.00632911392405, "grad_norm": 2.005892038345337, "learning_rate": 0.001539873417721519, "loss": 0.6191, "step": 79970 }, { "epoch": 23.00920598388953, "grad_norm": 0.9035722613334656, "learning_rate": 0.0015398158803222096, "loss": 0.6876, "step": 79980 }, { "epoch": 23.012082853855006, "grad_norm": 0.7694904804229736, "learning_rate": 0.0015397583429229, "loss": 0.5223, "step": 79990 }, { "epoch": 23.014959723820482, "grad_norm": 1.070184350013733, "learning_rate": 0.0015397008055235903, "loss": 0.6364, "step": 80000 }, { "epoch": 23.01783659378596, "grad_norm": 1.2574833631515503, "learning_rate": 0.0015396432681242809, "loss": 0.803, "step": 80010 }, { "epoch": 23.020713463751438, "grad_norm": 1.768883466720581, "learning_rate": 0.0015395857307249712, "loss": 0.8152, "step": 80020 }, { "epoch": 23.023590333716918, "grad_norm": 1.2362653017044067, "learning_rate": 0.0015395281933256618, "loss": 0.6853, "step": 80030 }, { "epoch": 23.026467203682394, "grad_norm": 0.945426344871521, "learning_rate": 0.0015394706559263523, "loss": 0.5247, "step": 80040 }, { "epoch": 23.02934407364787, "grad_norm": 1.2208131551742554, "learning_rate": 0.0015394131185270427, "loss": 0.6812, "step": 80050 }, { "epoch": 23.03222094361335, "grad_norm": 1.858604907989502, "learning_rate": 0.001539355581127733, "loss": 0.6497, "step": 80060 }, { "epoch": 23.035097813578826, "grad_norm": 1.2447878122329712, "learning_rate": 0.0015392980437284234, "loss": 0.6569, "step": 80070 }, { "epoch": 23.037974683544302, "grad_norm": 1.2718391418457031, "learning_rate": 0.001539240506329114, "loss": 0.5622, "step": 80080 }, { "epoch": 23.040851553509782, "grad_norm": 2.9089863300323486, "learning_rate": 0.0015391829689298045, "loss": 0.8052, "step": 80090 }, { "epoch": 23.043728423475258, "grad_norm": 1.4883692264556885, "learning_rate": 0.0015391254315304948, "loss": 0.8787, "step": 80100 }, { "epoch": 23.046605293440738, "grad_norm": 0.9159283638000488, "learning_rate": 0.0015390678941311854, "loss": 0.5731, "step": 80110 }, { "epoch": 23.049482163406214, "grad_norm": 1.0685962438583374, "learning_rate": 0.0015390103567318758, "loss": 0.6588, "step": 80120 }, { "epoch": 23.05235903337169, "grad_norm": 0.8689138889312744, "learning_rate": 0.001538952819332566, "loss": 0.5396, "step": 80130 }, { "epoch": 23.05523590333717, "grad_norm": 1.042372703552246, "learning_rate": 0.0015388952819332567, "loss": 0.7822, "step": 80140 }, { "epoch": 23.058112773302646, "grad_norm": 1.0671546459197998, "learning_rate": 0.0015388377445339472, "loss": 0.7395, "step": 80150 }, { "epoch": 23.060989643268123, "grad_norm": 1.1422396898269653, "learning_rate": 0.0015387802071346376, "loss": 0.7911, "step": 80160 }, { "epoch": 23.063866513233602, "grad_norm": 1.3053839206695557, "learning_rate": 0.0015387226697353281, "loss": 0.7087, "step": 80170 }, { "epoch": 23.06674338319908, "grad_norm": 1.3343291282653809, "learning_rate": 0.0015386651323360183, "loss": 0.6613, "step": 80180 }, { "epoch": 23.069620253164558, "grad_norm": 1.5230975151062012, "learning_rate": 0.0015386075949367088, "loss": 0.7162, "step": 80190 }, { "epoch": 23.072497123130034, "grad_norm": 1.4141632318496704, "learning_rate": 0.0015385500575373994, "loss": 0.7912, "step": 80200 }, { "epoch": 23.07537399309551, "grad_norm": 1.3296293020248413, "learning_rate": 0.0015384925201380897, "loss": 0.8002, "step": 80210 }, { "epoch": 23.07825086306099, "grad_norm": 1.5750702619552612, "learning_rate": 0.0015384349827387803, "loss": 0.6637, "step": 80220 }, { "epoch": 23.081127733026467, "grad_norm": 1.3741905689239502, "learning_rate": 0.0015383774453394709, "loss": 0.6481, "step": 80230 }, { "epoch": 23.084004602991946, "grad_norm": 0.9138904809951782, "learning_rate": 0.001538319907940161, "loss": 0.8089, "step": 80240 }, { "epoch": 23.086881472957423, "grad_norm": 1.3199717998504639, "learning_rate": 0.0015382623705408516, "loss": 0.9396, "step": 80250 }, { "epoch": 23.0897583429229, "grad_norm": 1.0039197206497192, "learning_rate": 0.0015382048331415421, "loss": 0.7043, "step": 80260 }, { "epoch": 23.09263521288838, "grad_norm": 2.581183433532715, "learning_rate": 0.0015381472957422325, "loss": 0.6314, "step": 80270 }, { "epoch": 23.095512082853855, "grad_norm": 1.4762533903121948, "learning_rate": 0.001538089758342923, "loss": 0.7351, "step": 80280 }, { "epoch": 23.09838895281933, "grad_norm": 0.9666973352432251, "learning_rate": 0.0015380322209436132, "loss": 0.6715, "step": 80290 }, { "epoch": 23.10126582278481, "grad_norm": 1.2061210870742798, "learning_rate": 0.0015379746835443037, "loss": 0.7053, "step": 80300 }, { "epoch": 23.104142692750287, "grad_norm": 1.7998459339141846, "learning_rate": 0.0015379171461449943, "loss": 0.6743, "step": 80310 }, { "epoch": 23.107019562715767, "grad_norm": 1.457619071006775, "learning_rate": 0.0015378596087456846, "loss": 0.6157, "step": 80320 }, { "epoch": 23.109896432681243, "grad_norm": 0.6886939406394958, "learning_rate": 0.0015378020713463752, "loss": 0.696, "step": 80330 }, { "epoch": 23.11277330264672, "grad_norm": 0.9198319911956787, "learning_rate": 0.0015377445339470658, "loss": 0.5407, "step": 80340 }, { "epoch": 23.1156501726122, "grad_norm": 1.3345730304718018, "learning_rate": 0.001537686996547756, "loss": 0.663, "step": 80350 }, { "epoch": 23.118527042577675, "grad_norm": 1.2409446239471436, "learning_rate": 0.0015376294591484465, "loss": 0.7592, "step": 80360 }, { "epoch": 23.121403912543155, "grad_norm": 1.5379317998886108, "learning_rate": 0.001537571921749137, "loss": 0.6608, "step": 80370 }, { "epoch": 23.12428078250863, "grad_norm": 1.1226226091384888, "learning_rate": 0.0015375143843498274, "loss": 0.5679, "step": 80380 }, { "epoch": 23.127157652474107, "grad_norm": 0.9358395338058472, "learning_rate": 0.001537456846950518, "loss": 0.6383, "step": 80390 }, { "epoch": 23.130034522439587, "grad_norm": 1.0120843648910522, "learning_rate": 0.0015373993095512083, "loss": 0.7341, "step": 80400 }, { "epoch": 23.132911392405063, "grad_norm": 0.9141308069229126, "learning_rate": 0.0015373417721518986, "loss": 0.6923, "step": 80410 }, { "epoch": 23.13578826237054, "grad_norm": 1.5552617311477661, "learning_rate": 0.0015372842347525892, "loss": 0.7328, "step": 80420 }, { "epoch": 23.13866513233602, "grad_norm": 0.9827109575271606, "learning_rate": 0.0015372266973532795, "loss": 0.7428, "step": 80430 }, { "epoch": 23.141542002301495, "grad_norm": 1.8775041103363037, "learning_rate": 0.0015371691599539701, "loss": 0.6353, "step": 80440 }, { "epoch": 23.144418872266975, "grad_norm": 0.9939984083175659, "learning_rate": 0.0015371116225546607, "loss": 0.7943, "step": 80450 }, { "epoch": 23.14729574223245, "grad_norm": 1.2945727109909058, "learning_rate": 0.001537054085155351, "loss": 0.6213, "step": 80460 }, { "epoch": 23.150172612197927, "grad_norm": 1.281424641609192, "learning_rate": 0.0015369965477560414, "loss": 0.7926, "step": 80470 }, { "epoch": 23.153049482163407, "grad_norm": 1.0674432516098022, "learning_rate": 0.001536939010356732, "loss": 0.715, "step": 80480 }, { "epoch": 23.155926352128883, "grad_norm": 1.6685518026351929, "learning_rate": 0.0015368814729574223, "loss": 0.6163, "step": 80490 }, { "epoch": 23.15880322209436, "grad_norm": 0.8591830134391785, "learning_rate": 0.0015368239355581128, "loss": 0.6575, "step": 80500 }, { "epoch": 23.16168009205984, "grad_norm": 0.9696584343910217, "learning_rate": 0.0015367663981588032, "loss": 0.6889, "step": 80510 }, { "epoch": 23.164556962025316, "grad_norm": 1.8096710443496704, "learning_rate": 0.0015367088607594938, "loss": 0.6482, "step": 80520 }, { "epoch": 23.167433831990795, "grad_norm": 1.1356862783432007, "learning_rate": 0.001536651323360184, "loss": 0.7486, "step": 80530 }, { "epoch": 23.17031070195627, "grad_norm": 1.5737121105194092, "learning_rate": 0.0015365937859608745, "loss": 0.7715, "step": 80540 }, { "epoch": 23.173187571921748, "grad_norm": 1.2692512273788452, "learning_rate": 0.001536536248561565, "loss": 0.7836, "step": 80550 }, { "epoch": 23.176064441887227, "grad_norm": 2.0391416549682617, "learning_rate": 0.0015364787111622556, "loss": 0.7712, "step": 80560 }, { "epoch": 23.178941311852704, "grad_norm": 1.4684480428695679, "learning_rate": 0.001536421173762946, "loss": 0.7806, "step": 80570 }, { "epoch": 23.181818181818183, "grad_norm": 2.2405824661254883, "learning_rate": 0.0015363636363636365, "loss": 0.733, "step": 80580 }, { "epoch": 23.18469505178366, "grad_norm": 1.0911310911178589, "learning_rate": 0.0015363060989643268, "loss": 0.7683, "step": 80590 }, { "epoch": 23.187571921749136, "grad_norm": 1.2287862300872803, "learning_rate": 0.0015362485615650172, "loss": 0.7274, "step": 80600 }, { "epoch": 23.190448791714616, "grad_norm": 1.177945613861084, "learning_rate": 0.0015361910241657077, "loss": 0.8072, "step": 80610 }, { "epoch": 23.193325661680092, "grad_norm": 1.7780468463897705, "learning_rate": 0.001536133486766398, "loss": 0.7397, "step": 80620 }, { "epoch": 23.196202531645568, "grad_norm": 1.8716574907302856, "learning_rate": 0.0015360759493670887, "loss": 0.7257, "step": 80630 }, { "epoch": 23.199079401611048, "grad_norm": 1.4187548160552979, "learning_rate": 0.0015360184119677792, "loss": 0.7409, "step": 80640 }, { "epoch": 23.201956271576524, "grad_norm": 1.0815954208374023, "learning_rate": 0.0015359608745684694, "loss": 0.588, "step": 80650 }, { "epoch": 23.204833141542004, "grad_norm": 1.3769712448120117, "learning_rate": 0.00153590333716916, "loss": 0.6087, "step": 80660 }, { "epoch": 23.20771001150748, "grad_norm": 0.9686635136604309, "learning_rate": 0.0015358457997698505, "loss": 0.6369, "step": 80670 }, { "epoch": 23.210586881472956, "grad_norm": 1.252530574798584, "learning_rate": 0.0015357882623705408, "loss": 0.84, "step": 80680 }, { "epoch": 23.213463751438436, "grad_norm": 1.9451757669448853, "learning_rate": 0.0015357307249712314, "loss": 0.7558, "step": 80690 }, { "epoch": 23.216340621403912, "grad_norm": 1.0049047470092773, "learning_rate": 0.001535673187571922, "loss": 0.5405, "step": 80700 }, { "epoch": 23.219217491369392, "grad_norm": 1.0351824760437012, "learning_rate": 0.001535615650172612, "loss": 0.7914, "step": 80710 }, { "epoch": 23.222094361334868, "grad_norm": 1.0358662605285645, "learning_rate": 0.0015355581127733027, "loss": 0.6059, "step": 80720 }, { "epoch": 23.224971231300344, "grad_norm": 0.7562389373779297, "learning_rate": 0.0015355005753739932, "loss": 0.8039, "step": 80730 }, { "epoch": 23.227848101265824, "grad_norm": 0.9887340664863586, "learning_rate": 0.0015354430379746836, "loss": 0.6334, "step": 80740 }, { "epoch": 23.2307249712313, "grad_norm": 0.9691938161849976, "learning_rate": 0.0015353855005753741, "loss": 0.7919, "step": 80750 }, { "epoch": 23.233601841196776, "grad_norm": 1.3763395547866821, "learning_rate": 0.0015353279631760645, "loss": 0.6876, "step": 80760 }, { "epoch": 23.236478711162256, "grad_norm": 1.0189378261566162, "learning_rate": 0.0015352704257767548, "loss": 0.6455, "step": 80770 }, { "epoch": 23.239355581127732, "grad_norm": 1.7202492952346802, "learning_rate": 0.0015352128883774454, "loss": 0.6469, "step": 80780 }, { "epoch": 23.242232451093212, "grad_norm": 0.6646707653999329, "learning_rate": 0.0015351553509781357, "loss": 0.6227, "step": 80790 }, { "epoch": 23.24510932105869, "grad_norm": 1.1841025352478027, "learning_rate": 0.0015350978135788263, "loss": 0.6779, "step": 80800 }, { "epoch": 23.247986191024165, "grad_norm": 1.1011955738067627, "learning_rate": 0.0015350402761795169, "loss": 0.8049, "step": 80810 }, { "epoch": 23.250863060989644, "grad_norm": 1.1152185201644897, "learning_rate": 0.0015349827387802072, "loss": 0.7274, "step": 80820 }, { "epoch": 23.25373993095512, "grad_norm": 1.972466230392456, "learning_rate": 0.0015349252013808976, "loss": 0.7895, "step": 80830 }, { "epoch": 23.256616800920597, "grad_norm": 1.8366683721542358, "learning_rate": 0.0015348676639815881, "loss": 0.7243, "step": 80840 }, { "epoch": 23.259493670886076, "grad_norm": 1.0477104187011719, "learning_rate": 0.0015348101265822785, "loss": 0.6369, "step": 80850 }, { "epoch": 23.262370540851553, "grad_norm": 0.9297381043434143, "learning_rate": 0.001534752589182969, "loss": 0.7134, "step": 80860 }, { "epoch": 23.265247410817032, "grad_norm": 0.8131892681121826, "learning_rate": 0.0015346950517836594, "loss": 0.455, "step": 80870 }, { "epoch": 23.26812428078251, "grad_norm": 2.066472291946411, "learning_rate": 0.00153463751438435, "loss": 0.8155, "step": 80880 }, { "epoch": 23.271001150747985, "grad_norm": 0.7041469216346741, "learning_rate": 0.0015345799769850403, "loss": 0.6938, "step": 80890 }, { "epoch": 23.273878020713465, "grad_norm": 0.7913967967033386, "learning_rate": 0.0015345224395857306, "loss": 0.5597, "step": 80900 }, { "epoch": 23.27675489067894, "grad_norm": 1.1461389064788818, "learning_rate": 0.0015344649021864212, "loss": 0.6211, "step": 80910 }, { "epoch": 23.27963176064442, "grad_norm": 1.8035986423492432, "learning_rate": 0.0015344073647871118, "loss": 0.8005, "step": 80920 }, { "epoch": 23.282508630609897, "grad_norm": 2.2003324031829834, "learning_rate": 0.001534349827387802, "loss": 0.7931, "step": 80930 }, { "epoch": 23.285385500575373, "grad_norm": 1.5896241664886475, "learning_rate": 0.0015342922899884927, "loss": 0.7757, "step": 80940 }, { "epoch": 23.288262370540853, "grad_norm": 4.052680015563965, "learning_rate": 0.001534234752589183, "loss": 0.7431, "step": 80950 }, { "epoch": 23.29113924050633, "grad_norm": 0.9723304510116577, "learning_rate": 0.0015341772151898734, "loss": 0.7141, "step": 80960 }, { "epoch": 23.294016110471805, "grad_norm": 0.8381668925285339, "learning_rate": 0.001534119677790564, "loss": 0.6258, "step": 80970 }, { "epoch": 23.296892980437285, "grad_norm": 1.7431436777114868, "learning_rate": 0.0015340621403912543, "loss": 0.7933, "step": 80980 }, { "epoch": 23.29976985040276, "grad_norm": 2.0521719455718994, "learning_rate": 0.0015340046029919448, "loss": 0.7071, "step": 80990 }, { "epoch": 23.30264672036824, "grad_norm": 1.5521820783615112, "learning_rate": 0.0015339470655926354, "loss": 0.7275, "step": 81000 }, { "epoch": 23.305523590333717, "grad_norm": 3.5656473636627197, "learning_rate": 0.0015338895281933255, "loss": 0.7972, "step": 81010 }, { "epoch": 23.308400460299193, "grad_norm": 1.531803011894226, "learning_rate": 0.001533831990794016, "loss": 0.7037, "step": 81020 }, { "epoch": 23.311277330264673, "grad_norm": 2.3653178215026855, "learning_rate": 0.0015337744533947067, "loss": 0.7487, "step": 81030 }, { "epoch": 23.31415420023015, "grad_norm": 1.5350080728530884, "learning_rate": 0.001533716915995397, "loss": 0.5546, "step": 81040 }, { "epoch": 23.317031070195625, "grad_norm": 1.3826522827148438, "learning_rate": 0.0015336593785960876, "loss": 0.8255, "step": 81050 }, { "epoch": 23.319907940161105, "grad_norm": 1.546461820602417, "learning_rate": 0.0015336018411967781, "loss": 0.585, "step": 81060 }, { "epoch": 23.32278481012658, "grad_norm": 1.8821184635162354, "learning_rate": 0.0015335443037974683, "loss": 0.7134, "step": 81070 }, { "epoch": 23.32566168009206, "grad_norm": 1.4255980253219604, "learning_rate": 0.0015334867663981588, "loss": 0.7379, "step": 81080 }, { "epoch": 23.328538550057537, "grad_norm": 1.1022018194198608, "learning_rate": 0.0015334292289988492, "loss": 0.6735, "step": 81090 }, { "epoch": 23.331415420023013, "grad_norm": 1.9737327098846436, "learning_rate": 0.0015333716915995397, "loss": 0.8287, "step": 81100 }, { "epoch": 23.334292289988493, "grad_norm": 1.3131122589111328, "learning_rate": 0.0015333141542002303, "loss": 0.6663, "step": 81110 }, { "epoch": 23.33716915995397, "grad_norm": 1.3988693952560425, "learning_rate": 0.0015332566168009204, "loss": 0.6322, "step": 81120 }, { "epoch": 23.34004602991945, "grad_norm": 1.3924816846847534, "learning_rate": 0.001533199079401611, "loss": 0.622, "step": 81130 }, { "epoch": 23.342922899884925, "grad_norm": 0.8495533466339111, "learning_rate": 0.0015331415420023016, "loss": 0.7328, "step": 81140 }, { "epoch": 23.3457997698504, "grad_norm": 1.59340500831604, "learning_rate": 0.001533084004602992, "loss": 0.7588, "step": 81150 }, { "epoch": 23.34867663981588, "grad_norm": 1.0710097551345825, "learning_rate": 0.0015330264672036825, "loss": 0.7287, "step": 81160 }, { "epoch": 23.351553509781358, "grad_norm": 1.4147579669952393, "learning_rate": 0.001532968929804373, "loss": 0.6665, "step": 81170 }, { "epoch": 23.354430379746834, "grad_norm": 1.8388371467590332, "learning_rate": 0.0015329113924050632, "loss": 0.6793, "step": 81180 }, { "epoch": 23.357307249712314, "grad_norm": 1.601165771484375, "learning_rate": 0.0015328538550057537, "loss": 0.8886, "step": 81190 }, { "epoch": 23.36018411967779, "grad_norm": 0.788641095161438, "learning_rate": 0.001532796317606444, "loss": 0.5402, "step": 81200 }, { "epoch": 23.36306098964327, "grad_norm": 2.478137731552124, "learning_rate": 0.0015327387802071346, "loss": 0.706, "step": 81210 }, { "epoch": 23.365937859608746, "grad_norm": 0.8897647857666016, "learning_rate": 0.0015326812428078252, "loss": 0.8138, "step": 81220 }, { "epoch": 23.368814729574222, "grad_norm": 1.2161495685577393, "learning_rate": 0.0015326237054085156, "loss": 0.5622, "step": 81230 }, { "epoch": 23.3716915995397, "grad_norm": 1.4116003513336182, "learning_rate": 0.001532566168009206, "loss": 0.9123, "step": 81240 }, { "epoch": 23.374568469505178, "grad_norm": 1.9547052383422852, "learning_rate": 0.0015325086306098965, "loss": 0.6319, "step": 81250 }, { "epoch": 23.377445339470658, "grad_norm": 1.5435278415679932, "learning_rate": 0.0015324510932105868, "loss": 0.5889, "step": 81260 }, { "epoch": 23.380322209436134, "grad_norm": 1.2339794635772705, "learning_rate": 0.0015323935558112774, "loss": 0.7605, "step": 81270 }, { "epoch": 23.38319907940161, "grad_norm": 1.765716791152954, "learning_rate": 0.001532336018411968, "loss": 0.6672, "step": 81280 }, { "epoch": 23.38607594936709, "grad_norm": 1.3832457065582275, "learning_rate": 0.0015322784810126583, "loss": 0.6892, "step": 81290 }, { "epoch": 23.388952819332566, "grad_norm": 2.02871036529541, "learning_rate": 0.0015322209436133486, "loss": 0.6888, "step": 81300 }, { "epoch": 23.391829689298042, "grad_norm": 1.7442461252212524, "learning_rate": 0.0015321634062140392, "loss": 0.7963, "step": 81310 }, { "epoch": 23.394706559263522, "grad_norm": 2.1110823154449463, "learning_rate": 0.0015321058688147295, "loss": 0.728, "step": 81320 }, { "epoch": 23.397583429228998, "grad_norm": 2.610762357711792, "learning_rate": 0.0015320483314154201, "loss": 0.6772, "step": 81330 }, { "epoch": 23.400460299194478, "grad_norm": 1.674232006072998, "learning_rate": 0.0015319907940161105, "loss": 0.6632, "step": 81340 }, { "epoch": 23.403337169159954, "grad_norm": 1.9228003025054932, "learning_rate": 0.001531933256616801, "loss": 0.5788, "step": 81350 }, { "epoch": 23.40621403912543, "grad_norm": 1.1010688543319702, "learning_rate": 0.0015318757192174914, "loss": 0.5447, "step": 81360 }, { "epoch": 23.40909090909091, "grad_norm": 2.0957093238830566, "learning_rate": 0.0015318181818181817, "loss": 0.8794, "step": 81370 }, { "epoch": 23.411967779056386, "grad_norm": 1.4889872074127197, "learning_rate": 0.0015317606444188723, "loss": 0.781, "step": 81380 }, { "epoch": 23.414844649021862, "grad_norm": 2.228808641433716, "learning_rate": 0.0015317031070195628, "loss": 0.5545, "step": 81390 }, { "epoch": 23.417721518987342, "grad_norm": 1.4717738628387451, "learning_rate": 0.0015316455696202532, "loss": 0.7007, "step": 81400 }, { "epoch": 23.42059838895282, "grad_norm": 1.4662989377975464, "learning_rate": 0.0015315880322209438, "loss": 0.7257, "step": 81410 }, { "epoch": 23.423475258918298, "grad_norm": 1.2174702882766724, "learning_rate": 0.001531530494821634, "loss": 0.7124, "step": 81420 }, { "epoch": 23.426352128883774, "grad_norm": 1.637555718421936, "learning_rate": 0.0015314729574223244, "loss": 0.5316, "step": 81430 }, { "epoch": 23.42922899884925, "grad_norm": 1.4691334962844849, "learning_rate": 0.001531415420023015, "loss": 0.6981, "step": 81440 }, { "epoch": 23.43210586881473, "grad_norm": 1.3837453126907349, "learning_rate": 0.0015313578826237054, "loss": 0.6337, "step": 81450 }, { "epoch": 23.434982738780207, "grad_norm": 1.0858428478240967, "learning_rate": 0.001531300345224396, "loss": 0.5941, "step": 81460 }, { "epoch": 23.437859608745686, "grad_norm": 2.6264266967773438, "learning_rate": 0.0015312428078250865, "loss": 0.7365, "step": 81470 }, { "epoch": 23.440736478711163, "grad_norm": 2.479167938232422, "learning_rate": 0.0015311852704257766, "loss": 0.7534, "step": 81480 }, { "epoch": 23.44361334867664, "grad_norm": 1.0992863178253174, "learning_rate": 0.0015311277330264672, "loss": 0.6262, "step": 81490 }, { "epoch": 23.44649021864212, "grad_norm": 1.2564232349395752, "learning_rate": 0.0015310701956271577, "loss": 0.5648, "step": 81500 }, { "epoch": 23.449367088607595, "grad_norm": 1.4686211347579956, "learning_rate": 0.001531012658227848, "loss": 0.8116, "step": 81510 }, { "epoch": 23.45224395857307, "grad_norm": 0.769144594669342, "learning_rate": 0.0015309551208285387, "loss": 0.8051, "step": 81520 }, { "epoch": 23.45512082853855, "grad_norm": 0.8223468661308289, "learning_rate": 0.0015308975834292292, "loss": 0.5916, "step": 81530 }, { "epoch": 23.457997698504027, "grad_norm": 1.2815594673156738, "learning_rate": 0.0015308400460299194, "loss": 0.6879, "step": 81540 }, { "epoch": 23.460874568469507, "grad_norm": 1.3708511590957642, "learning_rate": 0.00153078250863061, "loss": 0.6405, "step": 81550 }, { "epoch": 23.463751438434983, "grad_norm": 1.379284381866455, "learning_rate": 0.0015307249712313003, "loss": 0.7737, "step": 81560 }, { "epoch": 23.46662830840046, "grad_norm": 1.0969129800796509, "learning_rate": 0.0015306674338319908, "loss": 0.6412, "step": 81570 }, { "epoch": 23.46950517836594, "grad_norm": 2.8227319717407227, "learning_rate": 0.0015306098964326814, "loss": 0.7426, "step": 81580 }, { "epoch": 23.472382048331415, "grad_norm": 1.3665484189987183, "learning_rate": 0.0015305523590333717, "loss": 0.6292, "step": 81590 }, { "epoch": 23.475258918296895, "grad_norm": 1.135852575302124, "learning_rate": 0.001530494821634062, "loss": 0.7359, "step": 81600 }, { "epoch": 23.47813578826237, "grad_norm": 0.9712961316108704, "learning_rate": 0.0015304372842347526, "loss": 0.7271, "step": 81610 }, { "epoch": 23.481012658227847, "grad_norm": 1.3060173988342285, "learning_rate": 0.001530379746835443, "loss": 0.6285, "step": 81620 }, { "epoch": 23.483889528193327, "grad_norm": 1.0128881931304932, "learning_rate": 0.0015303222094361336, "loss": 0.6669, "step": 81630 }, { "epoch": 23.486766398158803, "grad_norm": 1.4923123121261597, "learning_rate": 0.0015302646720368241, "loss": 0.8513, "step": 81640 }, { "epoch": 23.48964326812428, "grad_norm": 0.7886641025543213, "learning_rate": 0.0015302071346375145, "loss": 0.5821, "step": 81650 }, { "epoch": 23.49252013808976, "grad_norm": 1.6970535516738892, "learning_rate": 0.0015301495972382048, "loss": 0.798, "step": 81660 }, { "epoch": 23.495397008055235, "grad_norm": 1.211618185043335, "learning_rate": 0.0015300920598388952, "loss": 0.7399, "step": 81670 }, { "epoch": 23.498273878020715, "grad_norm": 0.969528317451477, "learning_rate": 0.0015300345224395857, "loss": 0.6045, "step": 81680 }, { "epoch": 23.50115074798619, "grad_norm": 1.3101516962051392, "learning_rate": 0.0015299769850402763, "loss": 0.6181, "step": 81690 }, { "epoch": 23.504027617951667, "grad_norm": 2.2349777221679688, "learning_rate": 0.0015299194476409666, "loss": 0.9033, "step": 81700 }, { "epoch": 23.506904487917147, "grad_norm": 2.1584620475769043, "learning_rate": 0.0015298619102416572, "loss": 0.7825, "step": 81710 }, { "epoch": 23.509781357882623, "grad_norm": 2.0333495140075684, "learning_rate": 0.0015298043728423476, "loss": 0.8474, "step": 81720 }, { "epoch": 23.5126582278481, "grad_norm": 1.7983300685882568, "learning_rate": 0.001529746835443038, "loss": 0.7761, "step": 81730 }, { "epoch": 23.51553509781358, "grad_norm": 1.2238614559173584, "learning_rate": 0.0015296892980437285, "loss": 0.9293, "step": 81740 }, { "epoch": 23.518411967779056, "grad_norm": 1.2352540493011475, "learning_rate": 0.001529631760644419, "loss": 0.7907, "step": 81750 }, { "epoch": 23.521288837744535, "grad_norm": 1.122470498085022, "learning_rate": 0.0015295742232451094, "loss": 0.6881, "step": 81760 }, { "epoch": 23.52416570771001, "grad_norm": 1.7308704853057861, "learning_rate": 0.0015295166858458, "loss": 0.7492, "step": 81770 }, { "epoch": 23.527042577675488, "grad_norm": 0.8956601023674011, "learning_rate": 0.00152945914844649, "loss": 0.8832, "step": 81780 }, { "epoch": 23.529919447640967, "grad_norm": 0.9821020364761353, "learning_rate": 0.0015294016110471806, "loss": 0.7666, "step": 81790 }, { "epoch": 23.532796317606444, "grad_norm": 2.3211069107055664, "learning_rate": 0.0015293440736478712, "loss": 0.7709, "step": 81800 }, { "epoch": 23.535673187571923, "grad_norm": 0.7675131559371948, "learning_rate": 0.0015292865362485615, "loss": 0.6677, "step": 81810 }, { "epoch": 23.5385500575374, "grad_norm": 1.6827523708343506, "learning_rate": 0.001529228998849252, "loss": 0.8551, "step": 81820 }, { "epoch": 23.541426927502876, "grad_norm": 0.9624159932136536, "learning_rate": 0.0015291714614499427, "loss": 0.7477, "step": 81830 }, { "epoch": 23.544303797468356, "grad_norm": 1.7064871788024902, "learning_rate": 0.0015291139240506328, "loss": 0.7373, "step": 81840 }, { "epoch": 23.54718066743383, "grad_norm": 1.825319528579712, "learning_rate": 0.0015290563866513234, "loss": 0.772, "step": 81850 }, { "epoch": 23.550057537399308, "grad_norm": 2.655282735824585, "learning_rate": 0.001528998849252014, "loss": 0.6504, "step": 81860 }, { "epoch": 23.552934407364788, "grad_norm": 1.4363707304000854, "learning_rate": 0.0015289413118527043, "loss": 0.7849, "step": 81870 }, { "epoch": 23.555811277330264, "grad_norm": 0.894109308719635, "learning_rate": 0.0015288837744533948, "loss": 0.5775, "step": 81880 }, { "epoch": 23.558688147295744, "grad_norm": 1.9979506731033325, "learning_rate": 0.0015288262370540852, "loss": 0.6397, "step": 81890 }, { "epoch": 23.56156501726122, "grad_norm": 1.4722412824630737, "learning_rate": 0.0015287686996547755, "loss": 0.7483, "step": 81900 }, { "epoch": 23.564441887226696, "grad_norm": 1.0461724996566772, "learning_rate": 0.001528711162255466, "loss": 0.5689, "step": 81910 }, { "epoch": 23.567318757192176, "grad_norm": 2.060079574584961, "learning_rate": 0.0015286536248561564, "loss": 0.6714, "step": 81920 }, { "epoch": 23.570195627157652, "grad_norm": 1.3345108032226562, "learning_rate": 0.001528596087456847, "loss": 0.6969, "step": 81930 }, { "epoch": 23.57307249712313, "grad_norm": 1.2439607381820679, "learning_rate": 0.0015285385500575376, "loss": 0.7312, "step": 81940 }, { "epoch": 23.575949367088608, "grad_norm": 2.3694286346435547, "learning_rate": 0.0015284810126582277, "loss": 0.7863, "step": 81950 }, { "epoch": 23.578826237054084, "grad_norm": 1.5082223415374756, "learning_rate": 0.0015284234752589183, "loss": 0.5958, "step": 81960 }, { "epoch": 23.581703107019564, "grad_norm": 1.8002856969833374, "learning_rate": 0.0015283659378596088, "loss": 0.8285, "step": 81970 }, { "epoch": 23.58457997698504, "grad_norm": 1.2779884338378906, "learning_rate": 0.0015283084004602992, "loss": 0.5712, "step": 81980 }, { "epoch": 23.587456846950516, "grad_norm": 1.498064398765564, "learning_rate": 0.0015282508630609897, "loss": 0.6026, "step": 81990 }, { "epoch": 23.590333716915996, "grad_norm": 0.9582902193069458, "learning_rate": 0.0015281933256616803, "loss": 0.613, "step": 82000 }, { "epoch": 23.593210586881472, "grad_norm": 0.8263890147209167, "learning_rate": 0.0015281357882623704, "loss": 0.7596, "step": 82010 }, { "epoch": 23.596087456846952, "grad_norm": 3.311807632446289, "learning_rate": 0.001528078250863061, "loss": 1.0367, "step": 82020 }, { "epoch": 23.59896432681243, "grad_norm": 2.051025867462158, "learning_rate": 0.0015280207134637513, "loss": 0.9733, "step": 82030 }, { "epoch": 23.601841196777904, "grad_norm": 2.3653852939605713, "learning_rate": 0.001527963176064442, "loss": 0.7535, "step": 82040 }, { "epoch": 23.604718066743384, "grad_norm": 1.3328937292099, "learning_rate": 0.0015279056386651325, "loss": 0.7905, "step": 82050 }, { "epoch": 23.60759493670886, "grad_norm": 1.1405527591705322, "learning_rate": 0.0015278481012658228, "loss": 0.6393, "step": 82060 }, { "epoch": 23.610471806674337, "grad_norm": 1.2852492332458496, "learning_rate": 0.0015277905638665132, "loss": 0.6447, "step": 82070 }, { "epoch": 23.613348676639816, "grad_norm": 1.1842422485351562, "learning_rate": 0.0015277330264672037, "loss": 0.7272, "step": 82080 }, { "epoch": 23.616225546605293, "grad_norm": 1.4547172784805298, "learning_rate": 0.001527675489067894, "loss": 0.6815, "step": 82090 }, { "epoch": 23.619102416570772, "grad_norm": 1.842265248298645, "learning_rate": 0.0015276179516685846, "loss": 0.7666, "step": 82100 }, { "epoch": 23.62197928653625, "grad_norm": 0.9406402111053467, "learning_rate": 0.0015275604142692752, "loss": 0.5435, "step": 82110 }, { "epoch": 23.624856156501725, "grad_norm": 1.6089500188827515, "learning_rate": 0.0015275028768699656, "loss": 0.6436, "step": 82120 }, { "epoch": 23.627733026467205, "grad_norm": 1.217362403869629, "learning_rate": 0.001527445339470656, "loss": 0.6315, "step": 82130 }, { "epoch": 23.63060989643268, "grad_norm": 1.4331070184707642, "learning_rate": 0.0015273878020713462, "loss": 0.6793, "step": 82140 }, { "epoch": 23.63348676639816, "grad_norm": 1.366286277770996, "learning_rate": 0.0015273302646720368, "loss": 0.803, "step": 82150 }, { "epoch": 23.636363636363637, "grad_norm": 1.3172197341918945, "learning_rate": 0.0015272727272727274, "loss": 0.7523, "step": 82160 }, { "epoch": 23.639240506329113, "grad_norm": 1.5016335248947144, "learning_rate": 0.0015272151898734177, "loss": 0.7578, "step": 82170 }, { "epoch": 23.642117376294593, "grad_norm": 0.8525261282920837, "learning_rate": 0.0015271576524741083, "loss": 0.6033, "step": 82180 }, { "epoch": 23.64499424626007, "grad_norm": 2.8412606716156006, "learning_rate": 0.0015271001150747986, "loss": 0.6947, "step": 82190 }, { "epoch": 23.647871116225545, "grad_norm": 1.217163324356079, "learning_rate": 0.001527042577675489, "loss": 0.8545, "step": 82200 }, { "epoch": 23.650747986191025, "grad_norm": 2.0895700454711914, "learning_rate": 0.0015269850402761795, "loss": 0.8389, "step": 82210 }, { "epoch": 23.6536248561565, "grad_norm": 1.2303597927093506, "learning_rate": 0.0015269275028768701, "loss": 0.8407, "step": 82220 }, { "epoch": 23.65650172612198, "grad_norm": 1.0152578353881836, "learning_rate": 0.0015268699654775605, "loss": 0.8795, "step": 82230 }, { "epoch": 23.659378596087457, "grad_norm": 1.799665927886963, "learning_rate": 0.001526812428078251, "loss": 0.6834, "step": 82240 }, { "epoch": 23.662255466052933, "grad_norm": 1.2265268564224243, "learning_rate": 0.0015267548906789411, "loss": 0.7346, "step": 82250 }, { "epoch": 23.665132336018413, "grad_norm": 1.2776200771331787, "learning_rate": 0.0015266973532796317, "loss": 0.8014, "step": 82260 }, { "epoch": 23.66800920598389, "grad_norm": 0.8670624494552612, "learning_rate": 0.0015266398158803223, "loss": 0.7765, "step": 82270 }, { "epoch": 23.67088607594937, "grad_norm": 0.9547697305679321, "learning_rate": 0.0015265822784810126, "loss": 0.7534, "step": 82280 }, { "epoch": 23.673762945914845, "grad_norm": 1.5974887609481812, "learning_rate": 0.0015265247410817032, "loss": 0.7827, "step": 82290 }, { "epoch": 23.67663981588032, "grad_norm": 1.1755746603012085, "learning_rate": 0.0015264672036823938, "loss": 0.7383, "step": 82300 }, { "epoch": 23.6795166858458, "grad_norm": 1.5249954462051392, "learning_rate": 0.0015264096662830839, "loss": 0.8407, "step": 82310 }, { "epoch": 23.682393555811277, "grad_norm": 1.7133396863937378, "learning_rate": 0.0015263521288837744, "loss": 0.7943, "step": 82320 }, { "epoch": 23.685270425776753, "grad_norm": 0.7208604216575623, "learning_rate": 0.001526294591484465, "loss": 0.6816, "step": 82330 }, { "epoch": 23.688147295742233, "grad_norm": 1.3913438320159912, "learning_rate": 0.0015262370540851554, "loss": 0.9772, "step": 82340 }, { "epoch": 23.69102416570771, "grad_norm": 1.276513695716858, "learning_rate": 0.001526179516685846, "loss": 0.7573, "step": 82350 }, { "epoch": 23.69390103567319, "grad_norm": 1.1073864698410034, "learning_rate": 0.0015261219792865363, "loss": 0.7007, "step": 82360 }, { "epoch": 23.696777905638665, "grad_norm": 1.7076282501220703, "learning_rate": 0.0015260644418872266, "loss": 0.6656, "step": 82370 }, { "epoch": 23.69965477560414, "grad_norm": 1.6454743146896362, "learning_rate": 0.0015260069044879172, "loss": 0.7122, "step": 82380 }, { "epoch": 23.70253164556962, "grad_norm": 1.002052903175354, "learning_rate": 0.0015259493670886075, "loss": 0.7595, "step": 82390 }, { "epoch": 23.705408515535098, "grad_norm": 1.2751898765563965, "learning_rate": 0.001525891829689298, "loss": 0.6565, "step": 82400 }, { "epoch": 23.708285385500574, "grad_norm": 0.7429460287094116, "learning_rate": 0.0015258342922899887, "loss": 0.6433, "step": 82410 }, { "epoch": 23.711162255466053, "grad_norm": 1.0110629796981812, "learning_rate": 0.001525776754890679, "loss": 0.6279, "step": 82420 }, { "epoch": 23.71403912543153, "grad_norm": 2.6184229850769043, "learning_rate": 0.0015257192174913693, "loss": 0.7782, "step": 82430 }, { "epoch": 23.71691599539701, "grad_norm": 1.2610362768173218, "learning_rate": 0.00152566168009206, "loss": 0.6022, "step": 82440 }, { "epoch": 23.719792865362486, "grad_norm": 1.7422847747802734, "learning_rate": 0.0015256041426927503, "loss": 0.806, "step": 82450 }, { "epoch": 23.722669735327962, "grad_norm": 1.0357037782669067, "learning_rate": 0.0015255466052934408, "loss": 0.7558, "step": 82460 }, { "epoch": 23.72554660529344, "grad_norm": 1.0031365156173706, "learning_rate": 0.0015254890678941312, "loss": 0.724, "step": 82470 }, { "epoch": 23.728423475258918, "grad_norm": 1.2315711975097656, "learning_rate": 0.0015254315304948217, "loss": 0.6207, "step": 82480 }, { "epoch": 23.731300345224398, "grad_norm": 0.8384292125701904, "learning_rate": 0.001525373993095512, "loss": 0.6472, "step": 82490 }, { "epoch": 23.734177215189874, "grad_norm": 0.8605902791023254, "learning_rate": 0.0015253164556962024, "loss": 0.6203, "step": 82500 }, { "epoch": 23.73705408515535, "grad_norm": 2.2927801609039307, "learning_rate": 0.001525258918296893, "loss": 0.6724, "step": 82510 }, { "epoch": 23.73993095512083, "grad_norm": 0.900661051273346, "learning_rate": 0.0015252013808975836, "loss": 0.6897, "step": 82520 }, { "epoch": 23.742807825086306, "grad_norm": 1.0278805494308472, "learning_rate": 0.001525143843498274, "loss": 0.7888, "step": 82530 }, { "epoch": 23.745684695051782, "grad_norm": 2.0290167331695557, "learning_rate": 0.0015250863060989645, "loss": 0.7013, "step": 82540 }, { "epoch": 23.748561565017262, "grad_norm": 1.4351024627685547, "learning_rate": 0.0015250287686996548, "loss": 0.6559, "step": 82550 }, { "epoch": 23.751438434982738, "grad_norm": 0.8528622984886169, "learning_rate": 0.0015249712313003452, "loss": 0.7792, "step": 82560 }, { "epoch": 23.754315304948218, "grad_norm": 1.0709940195083618, "learning_rate": 0.0015249136939010357, "loss": 0.6617, "step": 82570 }, { "epoch": 23.757192174913694, "grad_norm": 1.1448649168014526, "learning_rate": 0.001524856156501726, "loss": 0.7127, "step": 82580 }, { "epoch": 23.76006904487917, "grad_norm": 1.2022379636764526, "learning_rate": 0.0015247986191024166, "loss": 0.859, "step": 82590 }, { "epoch": 23.76294591484465, "grad_norm": 2.0736398696899414, "learning_rate": 0.0015247410817031072, "loss": 0.8237, "step": 82600 }, { "epoch": 23.765822784810126, "grad_norm": 0.9858195781707764, "learning_rate": 0.0015246835443037973, "loss": 0.5886, "step": 82610 }, { "epoch": 23.768699654775602, "grad_norm": 3.144432306289673, "learning_rate": 0.001524626006904488, "loss": 0.7, "step": 82620 }, { "epoch": 23.771576524741082, "grad_norm": 1.692995548248291, "learning_rate": 0.0015245684695051785, "loss": 0.6444, "step": 82630 }, { "epoch": 23.77445339470656, "grad_norm": 0.7091510891914368, "learning_rate": 0.0015245109321058688, "loss": 0.6384, "step": 82640 }, { "epoch": 23.777330264672038, "grad_norm": 2.1330835819244385, "learning_rate": 0.0015244533947065594, "loss": 0.6659, "step": 82650 }, { "epoch": 23.780207134637514, "grad_norm": 1.4780278205871582, "learning_rate": 0.00152439585730725, "loss": 0.7455, "step": 82660 }, { "epoch": 23.78308400460299, "grad_norm": 1.1351038217544556, "learning_rate": 0.00152433831990794, "loss": 0.7937, "step": 82670 }, { "epoch": 23.78596087456847, "grad_norm": 0.7390393018722534, "learning_rate": 0.0015242807825086306, "loss": 0.8692, "step": 82680 }, { "epoch": 23.788837744533947, "grad_norm": 1.6664822101593018, "learning_rate": 0.0015242232451093212, "loss": 0.7762, "step": 82690 }, { "epoch": 23.791714614499426, "grad_norm": 1.5961543321609497, "learning_rate": 0.0015241657077100115, "loss": 0.6852, "step": 82700 }, { "epoch": 23.794591484464902, "grad_norm": 1.7357927560806274, "learning_rate": 0.001524108170310702, "loss": 0.6163, "step": 82710 }, { "epoch": 23.79746835443038, "grad_norm": 1.1902471780776978, "learning_rate": 0.0015240506329113924, "loss": 0.7357, "step": 82720 }, { "epoch": 23.80034522439586, "grad_norm": 2.0590498447418213, "learning_rate": 0.0015239930955120828, "loss": 0.9504, "step": 82730 }, { "epoch": 23.803222094361335, "grad_norm": 0.9595195055007935, "learning_rate": 0.0015239355581127734, "loss": 0.5357, "step": 82740 }, { "epoch": 23.80609896432681, "grad_norm": 1.2977508306503296, "learning_rate": 0.0015238780207134637, "loss": 0.6287, "step": 82750 }, { "epoch": 23.80897583429229, "grad_norm": 1.4279136657714844, "learning_rate": 0.0015238204833141543, "loss": 0.7477, "step": 82760 }, { "epoch": 23.811852704257767, "grad_norm": 1.5279186964035034, "learning_rate": 0.0015237629459148448, "loss": 0.5759, "step": 82770 }, { "epoch": 23.814729574223247, "grad_norm": 1.1295809745788574, "learning_rate": 0.001523705408515535, "loss": 0.6533, "step": 82780 }, { "epoch": 23.817606444188723, "grad_norm": 1.6000418663024902, "learning_rate": 0.0015236478711162255, "loss": 0.7919, "step": 82790 }, { "epoch": 23.8204833141542, "grad_norm": 1.0791438817977905, "learning_rate": 0.001523590333716916, "loss": 0.7286, "step": 82800 }, { "epoch": 23.82336018411968, "grad_norm": 1.186156153678894, "learning_rate": 0.0015235327963176064, "loss": 0.7675, "step": 82810 }, { "epoch": 23.826237054085155, "grad_norm": 0.8183093070983887, "learning_rate": 0.001523475258918297, "loss": 0.8019, "step": 82820 }, { "epoch": 23.82911392405063, "grad_norm": 1.2054364681243896, "learning_rate": 0.0015234177215189874, "loss": 0.7368, "step": 82830 }, { "epoch": 23.83199079401611, "grad_norm": 1.3624982833862305, "learning_rate": 0.0015233601841196777, "loss": 0.7384, "step": 82840 }, { "epoch": 23.834867663981587, "grad_norm": 1.0415860414505005, "learning_rate": 0.0015233026467203683, "loss": 0.672, "step": 82850 }, { "epoch": 23.837744533947067, "grad_norm": 1.7882649898529053, "learning_rate": 0.0015232451093210586, "loss": 0.6281, "step": 82860 }, { "epoch": 23.840621403912543, "grad_norm": 1.0747716426849365, "learning_rate": 0.0015231875719217492, "loss": 0.6872, "step": 82870 }, { "epoch": 23.84349827387802, "grad_norm": 2.1281840801239014, "learning_rate": 0.0015231300345224397, "loss": 0.8465, "step": 82880 }, { "epoch": 23.8463751438435, "grad_norm": 1.3077489137649536, "learning_rate": 0.00152307249712313, "loss": 0.6604, "step": 82890 }, { "epoch": 23.849252013808975, "grad_norm": 1.3445254564285278, "learning_rate": 0.0015230149597238204, "loss": 0.6121, "step": 82900 }, { "epoch": 23.852128883774455, "grad_norm": 0.8771604299545288, "learning_rate": 0.001522957422324511, "loss": 0.527, "step": 82910 }, { "epoch": 23.85500575373993, "grad_norm": 1.3878251314163208, "learning_rate": 0.0015228998849252013, "loss": 0.7151, "step": 82920 }, { "epoch": 23.857882623705407, "grad_norm": 1.5724166631698608, "learning_rate": 0.001522842347525892, "loss": 0.6025, "step": 82930 }, { "epoch": 23.860759493670887, "grad_norm": 0.9191491603851318, "learning_rate": 0.0015227848101265823, "loss": 0.6727, "step": 82940 }, { "epoch": 23.863636363636363, "grad_norm": 1.1213046312332153, "learning_rate": 0.0015227272727272728, "loss": 0.5648, "step": 82950 }, { "epoch": 23.86651323360184, "grad_norm": 0.6794015169143677, "learning_rate": 0.0015226697353279632, "loss": 0.6217, "step": 82960 }, { "epoch": 23.86939010356732, "grad_norm": 0.7178620100021362, "learning_rate": 0.0015226121979286535, "loss": 0.7875, "step": 82970 }, { "epoch": 23.872266973532795, "grad_norm": 1.7582634687423706, "learning_rate": 0.001522554660529344, "loss": 0.6999, "step": 82980 }, { "epoch": 23.875143843498275, "grad_norm": 1.0260913372039795, "learning_rate": 0.0015224971231300346, "loss": 0.6728, "step": 82990 }, { "epoch": 23.87802071346375, "grad_norm": 2.1194007396698, "learning_rate": 0.001522439585730725, "loss": 0.5413, "step": 83000 }, { "epoch": 23.880897583429228, "grad_norm": 0.8638573288917542, "learning_rate": 0.0015223820483314156, "loss": 0.6131, "step": 83010 }, { "epoch": 23.883774453394707, "grad_norm": 1.778604507446289, "learning_rate": 0.001522324510932106, "loss": 0.6271, "step": 83020 }, { "epoch": 23.886651323360184, "grad_norm": 1.2375272512435913, "learning_rate": 0.0015222669735327962, "loss": 0.8492, "step": 83030 }, { "epoch": 23.889528193325663, "grad_norm": 1.0328292846679688, "learning_rate": 0.0015222094361334868, "loss": 0.6822, "step": 83040 }, { "epoch": 23.89240506329114, "grad_norm": 1.703104019165039, "learning_rate": 0.0015221518987341772, "loss": 0.7428, "step": 83050 }, { "epoch": 23.895281933256616, "grad_norm": 1.0264782905578613, "learning_rate": 0.0015220943613348677, "loss": 0.6817, "step": 83060 }, { "epoch": 23.898158803222096, "grad_norm": 1.4915865659713745, "learning_rate": 0.0015220368239355583, "loss": 0.7565, "step": 83070 }, { "epoch": 23.90103567318757, "grad_norm": 1.5864787101745605, "learning_rate": 0.0015219792865362484, "loss": 0.6341, "step": 83080 }, { "epoch": 23.903912543153048, "grad_norm": 1.5998568534851074, "learning_rate": 0.001521921749136939, "loss": 1.1396, "step": 83090 }, { "epoch": 23.906789413118528, "grad_norm": 1.4405124187469482, "learning_rate": 0.0015218642117376295, "loss": 0.7121, "step": 83100 }, { "epoch": 23.909666283084004, "grad_norm": 1.043694019317627, "learning_rate": 0.0015218066743383199, "loss": 0.8166, "step": 83110 }, { "epoch": 23.912543153049484, "grad_norm": 0.8528371453285217, "learning_rate": 0.0015217491369390105, "loss": 0.5563, "step": 83120 }, { "epoch": 23.91542002301496, "grad_norm": 1.0839425325393677, "learning_rate": 0.001521691599539701, "loss": 0.7343, "step": 83130 }, { "epoch": 23.918296892980436, "grad_norm": 1.6060067415237427, "learning_rate": 0.0015216340621403911, "loss": 0.5919, "step": 83140 }, { "epoch": 23.921173762945916, "grad_norm": 0.8453500270843506, "learning_rate": 0.0015215765247410817, "loss": 0.8011, "step": 83150 }, { "epoch": 23.924050632911392, "grad_norm": 1.5519012212753296, "learning_rate": 0.001521518987341772, "loss": 0.6906, "step": 83160 }, { "epoch": 23.92692750287687, "grad_norm": 1.3533624410629272, "learning_rate": 0.0015214614499424626, "loss": 0.8029, "step": 83170 }, { "epoch": 23.929804372842348, "grad_norm": 1.3303875923156738, "learning_rate": 0.0015214039125431532, "loss": 0.7043, "step": 83180 }, { "epoch": 23.932681242807824, "grad_norm": 0.8354052901268005, "learning_rate": 0.0015213463751438435, "loss": 0.7206, "step": 83190 }, { "epoch": 23.935558112773304, "grad_norm": 1.4032704830169678, "learning_rate": 0.0015212888377445339, "loss": 0.6827, "step": 83200 }, { "epoch": 23.93843498273878, "grad_norm": 1.3944278955459595, "learning_rate": 0.0015212313003452244, "loss": 0.6361, "step": 83210 }, { "epoch": 23.941311852704256, "grad_norm": 0.9797005653381348, "learning_rate": 0.0015211737629459148, "loss": 0.7792, "step": 83220 }, { "epoch": 23.944188722669736, "grad_norm": 0.5348674654960632, "learning_rate": 0.0015211162255466054, "loss": 0.8066, "step": 83230 }, { "epoch": 23.947065592635212, "grad_norm": 2.1588199138641357, "learning_rate": 0.001521058688147296, "loss": 0.7895, "step": 83240 }, { "epoch": 23.949942462600692, "grad_norm": 0.946255624294281, "learning_rate": 0.0015210011507479863, "loss": 0.6362, "step": 83250 }, { "epoch": 23.95281933256617, "grad_norm": 1.4408501386642456, "learning_rate": 0.0015209436133486766, "loss": 0.8294, "step": 83260 }, { "epoch": 23.955696202531644, "grad_norm": 2.102834701538086, "learning_rate": 0.001520886075949367, "loss": 0.8544, "step": 83270 }, { "epoch": 23.958573072497124, "grad_norm": 1.476072072982788, "learning_rate": 0.0015208285385500575, "loss": 0.6853, "step": 83280 }, { "epoch": 23.9614499424626, "grad_norm": 0.99991375207901, "learning_rate": 0.001520771001150748, "loss": 0.7221, "step": 83290 }, { "epoch": 23.964326812428077, "grad_norm": 1.0737957954406738, "learning_rate": 0.0015207134637514384, "loss": 0.7237, "step": 83300 }, { "epoch": 23.967203682393556, "grad_norm": 1.217352271080017, "learning_rate": 0.001520655926352129, "loss": 0.7625, "step": 83310 }, { "epoch": 23.970080552359033, "grad_norm": 1.2148702144622803, "learning_rate": 0.0015205983889528193, "loss": 0.7962, "step": 83320 }, { "epoch": 23.972957422324512, "grad_norm": 1.7759219408035278, "learning_rate": 0.0015205408515535097, "loss": 0.7162, "step": 83330 }, { "epoch": 23.97583429228999, "grad_norm": 1.7371748685836792, "learning_rate": 0.0015204833141542003, "loss": 0.8784, "step": 83340 }, { "epoch": 23.978711162255465, "grad_norm": 1.201509714126587, "learning_rate": 0.0015204257767548908, "loss": 0.8079, "step": 83350 }, { "epoch": 23.981588032220944, "grad_norm": 1.4104442596435547, "learning_rate": 0.0015203682393555812, "loss": 0.7237, "step": 83360 }, { "epoch": 23.98446490218642, "grad_norm": 0.9725779294967651, "learning_rate": 0.0015203107019562717, "loss": 0.6134, "step": 83370 }, { "epoch": 23.9873417721519, "grad_norm": 1.0290393829345703, "learning_rate": 0.001520253164556962, "loss": 0.7895, "step": 83380 }, { "epoch": 23.990218642117377, "grad_norm": 1.1949125528335571, "learning_rate": 0.0015201956271576524, "loss": 0.6471, "step": 83390 }, { "epoch": 23.993095512082853, "grad_norm": 0.5867437124252319, "learning_rate": 0.001520138089758343, "loss": 0.586, "step": 83400 }, { "epoch": 23.995972382048333, "grad_norm": 0.9530180096626282, "learning_rate": 0.0015200805523590333, "loss": 0.8405, "step": 83410 }, { "epoch": 23.99884925201381, "grad_norm": 2.066445827484131, "learning_rate": 0.001520023014959724, "loss": 0.7542, "step": 83420 }, { "epoch": 24.001726121979285, "grad_norm": 1.1683851480484009, "learning_rate": 0.0015199654775604145, "loss": 0.8142, "step": 83430 }, { "epoch": 24.004602991944765, "grad_norm": 1.4938337802886963, "learning_rate": 0.0015199079401611046, "loss": 0.5953, "step": 83440 }, { "epoch": 24.00747986191024, "grad_norm": 1.0614323616027832, "learning_rate": 0.0015198504027617952, "loss": 0.5186, "step": 83450 }, { "epoch": 24.01035673187572, "grad_norm": 1.040298342704773, "learning_rate": 0.0015197928653624857, "loss": 0.5882, "step": 83460 }, { "epoch": 24.013233601841197, "grad_norm": 2.182626724243164, "learning_rate": 0.001519735327963176, "loss": 0.6051, "step": 83470 }, { "epoch": 24.016110471806673, "grad_norm": 1.1510368585586548, "learning_rate": 0.0015196777905638666, "loss": 0.6016, "step": 83480 }, { "epoch": 24.018987341772153, "grad_norm": 0.7821174263954163, "learning_rate": 0.0015196202531645572, "loss": 0.6085, "step": 83490 }, { "epoch": 24.02186421173763, "grad_norm": 2.504209041595459, "learning_rate": 0.0015195627157652473, "loss": 0.7566, "step": 83500 }, { "epoch": 24.024741081703105, "grad_norm": 1.6851868629455566, "learning_rate": 0.001519505178365938, "loss": 0.6457, "step": 83510 }, { "epoch": 24.027617951668585, "grad_norm": 0.9692181944847107, "learning_rate": 0.0015194476409666282, "loss": 0.6803, "step": 83520 }, { "epoch": 24.03049482163406, "grad_norm": 1.7230966091156006, "learning_rate": 0.0015193901035673188, "loss": 0.8721, "step": 83530 }, { "epoch": 24.03337169159954, "grad_norm": 1.0187456607818604, "learning_rate": 0.0015193325661680094, "loss": 0.5411, "step": 83540 }, { "epoch": 24.036248561565017, "grad_norm": 0.9888328313827515, "learning_rate": 0.0015192750287686997, "loss": 0.6596, "step": 83550 }, { "epoch": 24.039125431530493, "grad_norm": 1.391761064529419, "learning_rate": 0.00151921749136939, "loss": 0.6855, "step": 83560 }, { "epoch": 24.042002301495973, "grad_norm": 1.4813928604125977, "learning_rate": 0.0015191599539700806, "loss": 0.735, "step": 83570 }, { "epoch": 24.04487917146145, "grad_norm": 2.0374624729156494, "learning_rate": 0.001519102416570771, "loss": 0.6338, "step": 83580 }, { "epoch": 24.04775604142693, "grad_norm": 0.9590105414390564, "learning_rate": 0.0015190448791714615, "loss": 0.7842, "step": 83590 }, { "epoch": 24.050632911392405, "grad_norm": 1.1642756462097168, "learning_rate": 0.001518987341772152, "loss": 0.6356, "step": 83600 }, { "epoch": 24.05350978135788, "grad_norm": 1.2298295497894287, "learning_rate": 0.0015189298043728422, "loss": 0.7629, "step": 83610 }, { "epoch": 24.05638665132336, "grad_norm": 1.5107606649398804, "learning_rate": 0.0015188722669735328, "loss": 0.7074, "step": 83620 }, { "epoch": 24.059263521288837, "grad_norm": 0.9913280606269836, "learning_rate": 0.0015188147295742231, "loss": 0.683, "step": 83630 }, { "epoch": 24.062140391254314, "grad_norm": 1.1431547403335571, "learning_rate": 0.0015187571921749137, "loss": 0.5291, "step": 83640 }, { "epoch": 24.065017261219793, "grad_norm": 1.066282033920288, "learning_rate": 0.0015186996547756043, "loss": 0.7815, "step": 83650 }, { "epoch": 24.06789413118527, "grad_norm": 0.8369176387786865, "learning_rate": 0.0015186421173762946, "loss": 0.6229, "step": 83660 }, { "epoch": 24.07077100115075, "grad_norm": 1.0165858268737793, "learning_rate": 0.001518584579976985, "loss": 0.6373, "step": 83670 }, { "epoch": 24.073647871116226, "grad_norm": 2.5954437255859375, "learning_rate": 0.0015185270425776755, "loss": 0.6532, "step": 83680 }, { "epoch": 24.076524741081702, "grad_norm": 1.4188721179962158, "learning_rate": 0.0015184695051783659, "loss": 0.5829, "step": 83690 }, { "epoch": 24.07940161104718, "grad_norm": 0.8828977942466736, "learning_rate": 0.0015184119677790564, "loss": 0.7578, "step": 83700 }, { "epoch": 24.082278481012658, "grad_norm": 0.758429229259491, "learning_rate": 0.001518354430379747, "loss": 0.6323, "step": 83710 }, { "epoch": 24.085155350978138, "grad_norm": 2.1209352016448975, "learning_rate": 0.0015182968929804373, "loss": 0.6129, "step": 83720 }, { "epoch": 24.088032220943614, "grad_norm": 0.7284680604934692, "learning_rate": 0.0015182393555811277, "loss": 0.7308, "step": 83730 }, { "epoch": 24.09090909090909, "grad_norm": 0.7022935748100281, "learning_rate": 0.001518181818181818, "loss": 0.561, "step": 83740 }, { "epoch": 24.09378596087457, "grad_norm": 0.8037087917327881, "learning_rate": 0.0015181242807825086, "loss": 0.6516, "step": 83750 }, { "epoch": 24.096662830840046, "grad_norm": 1.6602059602737427, "learning_rate": 0.0015180667433831992, "loss": 0.7044, "step": 83760 }, { "epoch": 24.099539700805522, "grad_norm": 1.9341317415237427, "learning_rate": 0.0015180092059838895, "loss": 0.709, "step": 83770 }, { "epoch": 24.102416570771002, "grad_norm": 1.352462887763977, "learning_rate": 0.00151795166858458, "loss": 0.7233, "step": 83780 }, { "epoch": 24.105293440736478, "grad_norm": 1.4731312990188599, "learning_rate": 0.0015178941311852704, "loss": 0.6784, "step": 83790 }, { "epoch": 24.108170310701958, "grad_norm": 1.3639048337936401, "learning_rate": 0.0015178365937859608, "loss": 0.6337, "step": 83800 }, { "epoch": 24.111047180667434, "grad_norm": 20.712352752685547, "learning_rate": 0.0015177790563866513, "loss": 0.636, "step": 83810 }, { "epoch": 24.11392405063291, "grad_norm": 1.5147452354431152, "learning_rate": 0.001517721518987342, "loss": 0.5583, "step": 83820 }, { "epoch": 24.11680092059839, "grad_norm": 1.4734495878219604, "learning_rate": 0.0015176639815880323, "loss": 0.7482, "step": 83830 }, { "epoch": 24.119677790563866, "grad_norm": 1.0156067609786987, "learning_rate": 0.0015176064441887228, "loss": 0.7365, "step": 83840 }, { "epoch": 24.122554660529342, "grad_norm": 1.596480131149292, "learning_rate": 0.001517548906789413, "loss": 0.7814, "step": 83850 }, { "epoch": 24.125431530494822, "grad_norm": 1.1558558940887451, "learning_rate": 0.0015174913693901035, "loss": 0.5908, "step": 83860 }, { "epoch": 24.1283084004603, "grad_norm": 1.3460338115692139, "learning_rate": 0.001517433831990794, "loss": 0.7727, "step": 83870 }, { "epoch": 24.131185270425778, "grad_norm": 1.2330607175827026, "learning_rate": 0.0015173762945914844, "loss": 0.893, "step": 83880 }, { "epoch": 24.134062140391254, "grad_norm": 0.924056351184845, "learning_rate": 0.001517318757192175, "loss": 0.6743, "step": 83890 }, { "epoch": 24.13693901035673, "grad_norm": 0.8921123147010803, "learning_rate": 0.0015172612197928655, "loss": 0.7304, "step": 83900 }, { "epoch": 24.13981588032221, "grad_norm": 1.468334674835205, "learning_rate": 0.0015172036823935557, "loss": 0.7128, "step": 83910 }, { "epoch": 24.142692750287686, "grad_norm": 1.1953729391098022, "learning_rate": 0.0015171461449942462, "loss": 0.7363, "step": 83920 }, { "epoch": 24.145569620253166, "grad_norm": 2.6889781951904297, "learning_rate": 0.0015170886075949368, "loss": 0.6585, "step": 83930 }, { "epoch": 24.148446490218642, "grad_norm": 1.5811797380447388, "learning_rate": 0.0015170310701956272, "loss": 0.7124, "step": 83940 }, { "epoch": 24.15132336018412, "grad_norm": 1.4554791450500488, "learning_rate": 0.0015169735327963177, "loss": 0.824, "step": 83950 }, { "epoch": 24.1542002301496, "grad_norm": 1.6236764192581177, "learning_rate": 0.0015169159953970083, "loss": 0.5935, "step": 83960 }, { "epoch": 24.157077100115075, "grad_norm": 1.1050413846969604, "learning_rate": 0.0015168584579976984, "loss": 0.7606, "step": 83970 }, { "epoch": 24.15995397008055, "grad_norm": 0.8110806345939636, "learning_rate": 0.001516800920598389, "loss": 0.5527, "step": 83980 }, { "epoch": 24.16283084004603, "grad_norm": 1.3517745733261108, "learning_rate": 0.0015167433831990793, "loss": 0.6442, "step": 83990 }, { "epoch": 24.165707710011507, "grad_norm": 1.6290770769119263, "learning_rate": 0.0015166858457997699, "loss": 0.706, "step": 84000 }, { "epoch": 24.168584579976987, "grad_norm": 1.7171550989151, "learning_rate": 0.0015166283084004605, "loss": 0.5839, "step": 84010 }, { "epoch": 24.171461449942463, "grad_norm": 2.099250555038452, "learning_rate": 0.0015165707710011508, "loss": 0.8163, "step": 84020 }, { "epoch": 24.17433831990794, "grad_norm": 1.818634033203125, "learning_rate": 0.0015165132336018411, "loss": 0.6746, "step": 84030 }, { "epoch": 24.17721518987342, "grad_norm": 1.4745972156524658, "learning_rate": 0.0015164556962025317, "loss": 0.6226, "step": 84040 }, { "epoch": 24.180092059838895, "grad_norm": 2.3589043617248535, "learning_rate": 0.001516398158803222, "loss": 0.7556, "step": 84050 }, { "epoch": 24.182968929804375, "grad_norm": 0.876454770565033, "learning_rate": 0.0015163406214039126, "loss": 0.6989, "step": 84060 }, { "epoch": 24.18584579976985, "grad_norm": 0.9950952529907227, "learning_rate": 0.0015162830840046032, "loss": 0.862, "step": 84070 }, { "epoch": 24.188722669735327, "grad_norm": 1.2540745735168457, "learning_rate": 0.0015162255466052935, "loss": 0.6269, "step": 84080 }, { "epoch": 24.191599539700807, "grad_norm": 0.9670194983482361, "learning_rate": 0.0015161680092059839, "loss": 0.6503, "step": 84090 }, { "epoch": 24.194476409666283, "grad_norm": 0.833499014377594, "learning_rate": 0.0015161104718066742, "loss": 0.7736, "step": 84100 }, { "epoch": 24.19735327963176, "grad_norm": 1.0832931995391846, "learning_rate": 0.0015160529344073648, "loss": 0.8253, "step": 84110 }, { "epoch": 24.20023014959724, "grad_norm": 2.198054313659668, "learning_rate": 0.0015159953970080554, "loss": 0.7608, "step": 84120 }, { "epoch": 24.203107019562715, "grad_norm": 1.2941739559173584, "learning_rate": 0.0015159378596087457, "loss": 0.6961, "step": 84130 }, { "epoch": 24.205983889528195, "grad_norm": 1.1722444295883179, "learning_rate": 0.0015158803222094363, "loss": 0.7273, "step": 84140 }, { "epoch": 24.20886075949367, "grad_norm": 1.5599147081375122, "learning_rate": 0.0015158227848101266, "loss": 0.725, "step": 84150 }, { "epoch": 24.211737629459147, "grad_norm": 1.5336166620254517, "learning_rate": 0.001515765247410817, "loss": 0.6603, "step": 84160 }, { "epoch": 24.214614499424627, "grad_norm": 1.295290470123291, "learning_rate": 0.0015157077100115075, "loss": 0.5209, "step": 84170 }, { "epoch": 24.217491369390103, "grad_norm": 1.7664735317230225, "learning_rate": 0.001515650172612198, "loss": 0.6554, "step": 84180 }, { "epoch": 24.22036823935558, "grad_norm": 1.7792978286743164, "learning_rate": 0.0015155926352128884, "loss": 0.7326, "step": 84190 }, { "epoch": 24.22324510932106, "grad_norm": 0.819140613079071, "learning_rate": 0.001515535097813579, "loss": 0.5336, "step": 84200 }, { "epoch": 24.226121979286535, "grad_norm": 1.1768718957901, "learning_rate": 0.0015154775604142691, "loss": 0.7056, "step": 84210 }, { "epoch": 24.228998849252015, "grad_norm": 1.9159824848175049, "learning_rate": 0.0015154200230149597, "loss": 0.9844, "step": 84220 }, { "epoch": 24.23187571921749, "grad_norm": 1.0036298036575317, "learning_rate": 0.0015153624856156503, "loss": 0.6061, "step": 84230 }, { "epoch": 24.234752589182968, "grad_norm": 1.5606815814971924, "learning_rate": 0.0015153049482163406, "loss": 0.7028, "step": 84240 }, { "epoch": 24.237629459148447, "grad_norm": 1.6522494554519653, "learning_rate": 0.0015152474108170312, "loss": 0.6668, "step": 84250 }, { "epoch": 24.240506329113924, "grad_norm": 1.0132153034210205, "learning_rate": 0.0015151898734177217, "loss": 0.5365, "step": 84260 }, { "epoch": 24.243383199079403, "grad_norm": 1.9662469625473022, "learning_rate": 0.0015151323360184119, "loss": 0.8089, "step": 84270 }, { "epoch": 24.24626006904488, "grad_norm": 2.0830862522125244, "learning_rate": 0.0015150747986191024, "loss": 0.6977, "step": 84280 }, { "epoch": 24.249136939010356, "grad_norm": 1.2259480953216553, "learning_rate": 0.001515017261219793, "loss": 0.6303, "step": 84290 }, { "epoch": 24.252013808975835, "grad_norm": 1.2857586145401, "learning_rate": 0.0015149597238204833, "loss": 0.6524, "step": 84300 }, { "epoch": 24.25489067894131, "grad_norm": 1.2414395809173584, "learning_rate": 0.001514902186421174, "loss": 0.6832, "step": 84310 }, { "epoch": 24.257767548906788, "grad_norm": 1.0644780397415161, "learning_rate": 0.0015148446490218642, "loss": 0.749, "step": 84320 }, { "epoch": 24.260644418872268, "grad_norm": 0.8664261698722839, "learning_rate": 0.0015147871116225546, "loss": 0.6208, "step": 84330 }, { "epoch": 24.263521288837744, "grad_norm": 1.2856626510620117, "learning_rate": 0.0015147295742232452, "loss": 0.6957, "step": 84340 }, { "epoch": 24.266398158803224, "grad_norm": 0.8938957452774048, "learning_rate": 0.0015146720368239355, "loss": 0.6555, "step": 84350 }, { "epoch": 24.2692750287687, "grad_norm": 1.7037941217422485, "learning_rate": 0.001514614499424626, "loss": 0.7709, "step": 84360 }, { "epoch": 24.272151898734176, "grad_norm": 1.3783413171768188, "learning_rate": 0.0015145569620253166, "loss": 0.7176, "step": 84370 }, { "epoch": 24.275028768699656, "grad_norm": 1.0042051076889038, "learning_rate": 0.001514499424626007, "loss": 0.7421, "step": 84380 }, { "epoch": 24.277905638665132, "grad_norm": 1.4135040044784546, "learning_rate": 0.0015144418872266973, "loss": 0.6485, "step": 84390 }, { "epoch": 24.280782508630608, "grad_norm": 2.747950315475464, "learning_rate": 0.0015143843498273879, "loss": 0.62, "step": 84400 }, { "epoch": 24.283659378596088, "grad_norm": 2.2575762271881104, "learning_rate": 0.0015143268124280782, "loss": 0.6718, "step": 84410 }, { "epoch": 24.286536248561564, "grad_norm": 1.249650239944458, "learning_rate": 0.0015142692750287688, "loss": 0.5605, "step": 84420 }, { "epoch": 24.289413118527044, "grad_norm": 0.9950286149978638, "learning_rate": 0.0015142117376294591, "loss": 0.6273, "step": 84430 }, { "epoch": 24.29228998849252, "grad_norm": 2.6964690685272217, "learning_rate": 0.0015141542002301495, "loss": 0.6093, "step": 84440 }, { "epoch": 24.295166858457996, "grad_norm": 1.9262768030166626, "learning_rate": 0.00151409666283084, "loss": 0.8146, "step": 84450 }, { "epoch": 24.298043728423476, "grad_norm": 1.7751222848892212, "learning_rate": 0.0015140391254315304, "loss": 0.536, "step": 84460 }, { "epoch": 24.300920598388952, "grad_norm": 0.9808117747306824, "learning_rate": 0.001513981588032221, "loss": 0.6863, "step": 84470 }, { "epoch": 24.303797468354432, "grad_norm": 1.2149823904037476, "learning_rate": 0.0015139240506329115, "loss": 0.6559, "step": 84480 }, { "epoch": 24.306674338319908, "grad_norm": 1.1546036005020142, "learning_rate": 0.0015138665132336019, "loss": 0.6713, "step": 84490 }, { "epoch": 24.309551208285384, "grad_norm": 1.0868107080459595, "learning_rate": 0.0015138089758342922, "loss": 0.4951, "step": 84500 }, { "epoch": 24.312428078250864, "grad_norm": 2.261761426925659, "learning_rate": 0.0015137514384349828, "loss": 0.6779, "step": 84510 }, { "epoch": 24.31530494821634, "grad_norm": 1.2751531600952148, "learning_rate": 0.0015136939010356731, "loss": 0.6461, "step": 84520 }, { "epoch": 24.318181818181817, "grad_norm": 1.0741177797317505, "learning_rate": 0.0015136363636363637, "loss": 0.6324, "step": 84530 }, { "epoch": 24.321058688147296, "grad_norm": 1.085227608680725, "learning_rate": 0.001513578826237054, "loss": 0.5215, "step": 84540 }, { "epoch": 24.323935558112773, "grad_norm": 0.8489233255386353, "learning_rate": 0.0015135212888377446, "loss": 0.7324, "step": 84550 }, { "epoch": 24.326812428078252, "grad_norm": 1.8640624284744263, "learning_rate": 0.001513463751438435, "loss": 0.6105, "step": 84560 }, { "epoch": 24.32968929804373, "grad_norm": 1.3187906742095947, "learning_rate": 0.0015134062140391253, "loss": 0.7367, "step": 84570 }, { "epoch": 24.332566168009205, "grad_norm": 1.4952313899993896, "learning_rate": 0.0015133486766398159, "loss": 0.736, "step": 84580 }, { "epoch": 24.335443037974684, "grad_norm": 0.9090831279754639, "learning_rate": 0.0015132911392405064, "loss": 0.5519, "step": 84590 }, { "epoch": 24.33831990794016, "grad_norm": 1.9656014442443848, "learning_rate": 0.0015132336018411968, "loss": 0.7191, "step": 84600 }, { "epoch": 24.34119677790564, "grad_norm": 1.3135290145874023, "learning_rate": 0.0015131760644418873, "loss": 0.7278, "step": 84610 }, { "epoch": 24.344073647871117, "grad_norm": 1.5900667905807495, "learning_rate": 0.0015131185270425777, "loss": 0.9334, "step": 84620 }, { "epoch": 24.346950517836593, "grad_norm": 0.9029240608215332, "learning_rate": 0.001513060989643268, "loss": 0.6358, "step": 84630 }, { "epoch": 24.349827387802073, "grad_norm": 1.8302215337753296, "learning_rate": 0.0015130034522439586, "loss": 0.6801, "step": 84640 }, { "epoch": 24.35270425776755, "grad_norm": 0.9537377953529358, "learning_rate": 0.0015129459148446492, "loss": 0.5891, "step": 84650 }, { "epoch": 24.355581127733025, "grad_norm": 1.3593776226043701, "learning_rate": 0.0015128883774453395, "loss": 0.6588, "step": 84660 }, { "epoch": 24.358457997698505, "grad_norm": 1.3840159177780151, "learning_rate": 0.00151283084004603, "loss": 0.5845, "step": 84670 }, { "epoch": 24.36133486766398, "grad_norm": 0.9389737844467163, "learning_rate": 0.0015127733026467202, "loss": 0.5034, "step": 84680 }, { "epoch": 24.36421173762946, "grad_norm": 1.2403827905654907, "learning_rate": 0.0015127157652474108, "loss": 0.7379, "step": 84690 }, { "epoch": 24.367088607594937, "grad_norm": 1.8619213104248047, "learning_rate": 0.0015126582278481013, "loss": 0.7346, "step": 84700 }, { "epoch": 24.369965477560413, "grad_norm": 1.5380878448486328, "learning_rate": 0.0015126006904487917, "loss": 0.8388, "step": 84710 }, { "epoch": 24.372842347525893, "grad_norm": 2.1145036220550537, "learning_rate": 0.0015125431530494822, "loss": 0.6376, "step": 84720 }, { "epoch": 24.37571921749137, "grad_norm": 1.1676403284072876, "learning_rate": 0.0015124856156501728, "loss": 0.788, "step": 84730 }, { "epoch": 24.378596087456845, "grad_norm": 1.1195875406265259, "learning_rate": 0.001512428078250863, "loss": 0.6321, "step": 84740 }, { "epoch": 24.381472957422325, "grad_norm": 1.552262783050537, "learning_rate": 0.0015123705408515535, "loss": 0.6219, "step": 84750 }, { "epoch": 24.3843498273878, "grad_norm": 1.144982933998108, "learning_rate": 0.001512313003452244, "loss": 0.7257, "step": 84760 }, { "epoch": 24.38722669735328, "grad_norm": 1.6495294570922852, "learning_rate": 0.0015122554660529344, "loss": 0.7445, "step": 84770 }, { "epoch": 24.390103567318757, "grad_norm": 2.138165235519409, "learning_rate": 0.001512197928653625, "loss": 0.7383, "step": 84780 }, { "epoch": 24.392980437284233, "grad_norm": 1.585087776184082, "learning_rate": 0.0015121403912543153, "loss": 0.719, "step": 84790 }, { "epoch": 24.395857307249713, "grad_norm": 0.8750008940696716, "learning_rate": 0.0015120828538550057, "loss": 0.6555, "step": 84800 }, { "epoch": 24.39873417721519, "grad_norm": 1.2739108800888062, "learning_rate": 0.0015120253164556962, "loss": 0.7337, "step": 84810 }, { "epoch": 24.40161104718067, "grad_norm": 2.085221529006958, "learning_rate": 0.0015119677790563866, "loss": 0.7088, "step": 84820 }, { "epoch": 24.404487917146145, "grad_norm": 1.160526156425476, "learning_rate": 0.0015119102416570772, "loss": 0.6848, "step": 84830 }, { "epoch": 24.40736478711162, "grad_norm": 1.395369052886963, "learning_rate": 0.0015118527042577677, "loss": 0.6116, "step": 84840 }, { "epoch": 24.4102416570771, "grad_norm": 0.9592708349227905, "learning_rate": 0.001511795166858458, "loss": 0.6619, "step": 84850 }, { "epoch": 24.413118527042577, "grad_norm": 1.7113293409347534, "learning_rate": 0.0015117376294591484, "loss": 0.8627, "step": 84860 }, { "epoch": 24.415995397008054, "grad_norm": 0.7967115640640259, "learning_rate": 0.001511680092059839, "loss": 0.7624, "step": 84870 }, { "epoch": 24.418872266973533, "grad_norm": 1.1593619585037231, "learning_rate": 0.0015116225546605293, "loss": 0.5197, "step": 84880 }, { "epoch": 24.42174913693901, "grad_norm": 1.2417157888412476, "learning_rate": 0.0015115650172612199, "loss": 0.8058, "step": 84890 }, { "epoch": 24.42462600690449, "grad_norm": 0.9572463035583496, "learning_rate": 0.0015115074798619102, "loss": 0.9055, "step": 84900 }, { "epoch": 24.427502876869966, "grad_norm": 1.1999644041061401, "learning_rate": 0.0015114499424626008, "loss": 0.6704, "step": 84910 }, { "epoch": 24.430379746835442, "grad_norm": 1.1510341167449951, "learning_rate": 0.0015113924050632911, "loss": 0.6564, "step": 84920 }, { "epoch": 24.43325661680092, "grad_norm": 1.5873477458953857, "learning_rate": 0.0015113348676639815, "loss": 0.812, "step": 84930 }, { "epoch": 24.436133486766398, "grad_norm": 1.245166540145874, "learning_rate": 0.001511277330264672, "loss": 0.6333, "step": 84940 }, { "epoch": 24.439010356731877, "grad_norm": 0.716423749923706, "learning_rate": 0.0015112197928653626, "loss": 0.5668, "step": 84950 }, { "epoch": 24.441887226697354, "grad_norm": 0.7087412476539612, "learning_rate": 0.001511162255466053, "loss": 0.6862, "step": 84960 }, { "epoch": 24.44476409666283, "grad_norm": 1.0718473196029663, "learning_rate": 0.0015111047180667435, "loss": 0.6442, "step": 84970 }, { "epoch": 24.44764096662831, "grad_norm": 2.4893083572387695, "learning_rate": 0.0015110471806674339, "loss": 0.6392, "step": 84980 }, { "epoch": 24.450517836593786, "grad_norm": 1.5924797058105469, "learning_rate": 0.0015109896432681242, "loss": 0.7707, "step": 84990 }, { "epoch": 24.453394706559262, "grad_norm": 0.9987913370132446, "learning_rate": 0.0015109321058688148, "loss": 0.6832, "step": 85000 }, { "epoch": 24.456271576524742, "grad_norm": 1.1606149673461914, "learning_rate": 0.0015108745684695051, "loss": 0.7068, "step": 85010 }, { "epoch": 24.459148446490218, "grad_norm": 1.552699327468872, "learning_rate": 0.0015108170310701957, "loss": 0.7554, "step": 85020 }, { "epoch": 24.462025316455698, "grad_norm": 2.208289384841919, "learning_rate": 0.0015107594936708863, "loss": 0.9076, "step": 85030 }, { "epoch": 24.464902186421174, "grad_norm": 1.084433674812317, "learning_rate": 0.0015107019562715764, "loss": 0.6274, "step": 85040 }, { "epoch": 24.46777905638665, "grad_norm": 1.1839033365249634, "learning_rate": 0.001510644418872267, "loss": 0.8277, "step": 85050 }, { "epoch": 24.47065592635213, "grad_norm": 1.7511307001113892, "learning_rate": 0.0015105868814729575, "loss": 0.7287, "step": 85060 }, { "epoch": 24.473532796317606, "grad_norm": 1.5444625616073608, "learning_rate": 0.0015105293440736479, "loss": 0.7831, "step": 85070 }, { "epoch": 24.476409666283082, "grad_norm": 1.2283415794372559, "learning_rate": 0.0015104718066743384, "loss": 0.6633, "step": 85080 }, { "epoch": 24.479286536248562, "grad_norm": 1.5663096904754639, "learning_rate": 0.001510414269275029, "loss": 0.9193, "step": 85090 }, { "epoch": 24.48216340621404, "grad_norm": 1.4452788829803467, "learning_rate": 0.0015103567318757191, "loss": 0.7499, "step": 85100 }, { "epoch": 24.485040276179518, "grad_norm": 1.493073582649231, "learning_rate": 0.0015102991944764097, "loss": 0.6699, "step": 85110 }, { "epoch": 24.487917146144994, "grad_norm": 1.5689886808395386, "learning_rate": 0.0015102416570771, "loss": 0.8757, "step": 85120 }, { "epoch": 24.49079401611047, "grad_norm": 1.5441365242004395, "learning_rate": 0.0015101841196777906, "loss": 0.7093, "step": 85130 }, { "epoch": 24.49367088607595, "grad_norm": 1.407392978668213, "learning_rate": 0.0015101265822784812, "loss": 0.6122, "step": 85140 }, { "epoch": 24.496547756041426, "grad_norm": 0.83104407787323, "learning_rate": 0.0015100690448791715, "loss": 0.6635, "step": 85150 }, { "epoch": 24.499424626006906, "grad_norm": 1.1667168140411377, "learning_rate": 0.0015100115074798619, "loss": 0.6871, "step": 85160 }, { "epoch": 24.502301495972382, "grad_norm": 1.5404030084609985, "learning_rate": 0.0015099539700805524, "loss": 0.5743, "step": 85170 }, { "epoch": 24.50517836593786, "grad_norm": 1.3607124090194702, "learning_rate": 0.0015098964326812428, "loss": 0.6073, "step": 85180 }, { "epoch": 24.50805523590334, "grad_norm": 1.182559609413147, "learning_rate": 0.0015098388952819333, "loss": 0.8006, "step": 85190 }, { "epoch": 24.510932105868815, "grad_norm": 1.7727919816970825, "learning_rate": 0.001509781357882624, "loss": 0.7982, "step": 85200 }, { "epoch": 24.51380897583429, "grad_norm": 2.394850015640259, "learning_rate": 0.0015097238204833142, "loss": 0.7671, "step": 85210 }, { "epoch": 24.51668584579977, "grad_norm": 0.8881667256355286, "learning_rate": 0.0015096662830840046, "loss": 0.7078, "step": 85220 }, { "epoch": 24.519562715765247, "grad_norm": 0.887798011302948, "learning_rate": 0.001509608745684695, "loss": 0.6669, "step": 85230 }, { "epoch": 24.522439585730726, "grad_norm": 1.200229525566101, "learning_rate": 0.0015095512082853855, "loss": 0.829, "step": 85240 }, { "epoch": 24.525316455696203, "grad_norm": 1.5750855207443237, "learning_rate": 0.001509493670886076, "loss": 0.6754, "step": 85250 }, { "epoch": 24.52819332566168, "grad_norm": 1.3419650793075562, "learning_rate": 0.0015094361334867664, "loss": 0.7361, "step": 85260 }, { "epoch": 24.53107019562716, "grad_norm": 0.9296777248382568, "learning_rate": 0.0015093785960874568, "loss": 0.6945, "step": 85270 }, { "epoch": 24.533947065592635, "grad_norm": 1.7246230840682983, "learning_rate": 0.0015093210586881473, "loss": 0.5789, "step": 85280 }, { "epoch": 24.53682393555811, "grad_norm": 1.1923332214355469, "learning_rate": 0.0015092635212888377, "loss": 0.7554, "step": 85290 }, { "epoch": 24.53970080552359, "grad_norm": 1.340865969657898, "learning_rate": 0.0015092059838895282, "loss": 0.8205, "step": 85300 }, { "epoch": 24.542577675489067, "grad_norm": 1.735944390296936, "learning_rate": 0.0015091484464902188, "loss": 0.6909, "step": 85310 }, { "epoch": 24.545454545454547, "grad_norm": 1.5959036350250244, "learning_rate": 0.0015090909090909091, "loss": 0.8018, "step": 85320 }, { "epoch": 24.548331415420023, "grad_norm": 4.1914215087890625, "learning_rate": 0.0015090333716915995, "loss": 0.6371, "step": 85330 }, { "epoch": 24.5512082853855, "grad_norm": 1.1738941669464111, "learning_rate": 0.00150897583429229, "loss": 0.6337, "step": 85340 }, { "epoch": 24.55408515535098, "grad_norm": 0.864419162273407, "learning_rate": 0.0015089182968929804, "loss": 0.9503, "step": 85350 }, { "epoch": 24.556962025316455, "grad_norm": 0.7409385442733765, "learning_rate": 0.001508860759493671, "loss": 0.7703, "step": 85360 }, { "epoch": 24.559838895281935, "grad_norm": 1.444299578666687, "learning_rate": 0.0015088032220943613, "loss": 0.6737, "step": 85370 }, { "epoch": 24.56271576524741, "grad_norm": 1.5811911821365356, "learning_rate": 0.0015087456846950519, "loss": 0.5679, "step": 85380 }, { "epoch": 24.565592635212887, "grad_norm": 1.1554993391036987, "learning_rate": 0.0015086881472957422, "loss": 0.6582, "step": 85390 }, { "epoch": 24.568469505178367, "grad_norm": 0.8608315587043762, "learning_rate": 0.0015086306098964326, "loss": 0.5881, "step": 85400 }, { "epoch": 24.571346375143843, "grad_norm": 1.0186512470245361, "learning_rate": 0.0015085730724971231, "loss": 0.7373, "step": 85410 }, { "epoch": 24.57422324510932, "grad_norm": 1.3876087665557861, "learning_rate": 0.0015085155350978137, "loss": 0.9511, "step": 85420 }, { "epoch": 24.5771001150748, "grad_norm": 0.6513647437095642, "learning_rate": 0.001508457997698504, "loss": 0.719, "step": 85430 }, { "epoch": 24.579976985040275, "grad_norm": 3.177314281463623, "learning_rate": 0.0015084004602991946, "loss": 0.5838, "step": 85440 }, { "epoch": 24.582853855005755, "grad_norm": 0.6876992583274841, "learning_rate": 0.001508342922899885, "loss": 0.7494, "step": 85450 }, { "epoch": 24.58573072497123, "grad_norm": 1.665428876876831, "learning_rate": 0.0015082853855005753, "loss": 0.7762, "step": 85460 }, { "epoch": 24.588607594936708, "grad_norm": 1.4140759706497192, "learning_rate": 0.0015082278481012659, "loss": 0.6789, "step": 85470 }, { "epoch": 24.591484464902187, "grad_norm": 2.9001810550689697, "learning_rate": 0.0015081703107019562, "loss": 0.6265, "step": 85480 }, { "epoch": 24.594361334867664, "grad_norm": 1.8576303720474243, "learning_rate": 0.0015081127733026468, "loss": 0.7045, "step": 85490 }, { "epoch": 24.59723820483314, "grad_norm": 1.3553208112716675, "learning_rate": 0.0015080552359033373, "loss": 0.7599, "step": 85500 }, { "epoch": 24.60011507479862, "grad_norm": 1.653753638267517, "learning_rate": 0.0015079976985040275, "loss": 0.7481, "step": 85510 }, { "epoch": 24.602991944764096, "grad_norm": 1.207226276397705, "learning_rate": 0.001507940161104718, "loss": 0.673, "step": 85520 }, { "epoch": 24.605868814729575, "grad_norm": 1.363173484802246, "learning_rate": 0.0015078826237054086, "loss": 0.6865, "step": 85530 }, { "epoch": 24.60874568469505, "grad_norm": 3.4645040035247803, "learning_rate": 0.001507825086306099, "loss": 0.7193, "step": 85540 }, { "epoch": 24.611622554660528, "grad_norm": 2.263402223587036, "learning_rate": 0.0015077675489067895, "loss": 0.7985, "step": 85550 }, { "epoch": 24.614499424626008, "grad_norm": 0.6777218580245972, "learning_rate": 0.00150771001150748, "loss": 0.6365, "step": 85560 }, { "epoch": 24.617376294591484, "grad_norm": 1.3705190420150757, "learning_rate": 0.0015076524741081702, "loss": 0.7023, "step": 85570 }, { "epoch": 24.620253164556964, "grad_norm": 1.5856794118881226, "learning_rate": 0.0015075949367088608, "loss": 0.9872, "step": 85580 }, { "epoch": 24.62313003452244, "grad_norm": 1.7868202924728394, "learning_rate": 0.0015075373993095511, "loss": 0.8229, "step": 85590 }, { "epoch": 24.626006904487916, "grad_norm": 0.9765819907188416, "learning_rate": 0.0015074798619102417, "loss": 0.6014, "step": 85600 }, { "epoch": 24.628883774453396, "grad_norm": 1.3104908466339111, "learning_rate": 0.0015074223245109322, "loss": 0.6291, "step": 85610 }, { "epoch": 24.631760644418872, "grad_norm": 1.3788903951644897, "learning_rate": 0.0015073647871116226, "loss": 0.6555, "step": 85620 }, { "epoch": 24.634637514384348, "grad_norm": 0.9264461398124695, "learning_rate": 0.001507307249712313, "loss": 0.7643, "step": 85630 }, { "epoch": 24.637514384349828, "grad_norm": 1.3681410551071167, "learning_rate": 0.0015072497123130035, "loss": 0.9757, "step": 85640 }, { "epoch": 24.640391254315304, "grad_norm": 1.111812710762024, "learning_rate": 0.0015071921749136939, "loss": 0.7379, "step": 85650 }, { "epoch": 24.643268124280784, "grad_norm": 1.1961885690689087, "learning_rate": 0.0015071346375143844, "loss": 0.6042, "step": 85660 }, { "epoch": 24.64614499424626, "grad_norm": 1.189452886581421, "learning_rate": 0.001507077100115075, "loss": 0.7651, "step": 85670 }, { "epoch": 24.649021864211736, "grad_norm": 1.0574573278427124, "learning_rate": 0.0015070195627157653, "loss": 0.67, "step": 85680 }, { "epoch": 24.651898734177216, "grad_norm": 1.4638868570327759, "learning_rate": 0.0015069620253164557, "loss": 0.6544, "step": 85690 }, { "epoch": 24.654775604142692, "grad_norm": 0.785585880279541, "learning_rate": 0.001506904487917146, "loss": 0.6217, "step": 85700 }, { "epoch": 24.657652474108172, "grad_norm": 1.35767662525177, "learning_rate": 0.0015068469505178366, "loss": 0.6269, "step": 85710 }, { "epoch": 24.660529344073648, "grad_norm": 0.8774781823158264, "learning_rate": 0.0015067894131185271, "loss": 0.8051, "step": 85720 }, { "epoch": 24.663406214039124, "grad_norm": 1.2183475494384766, "learning_rate": 0.0015067318757192175, "loss": 0.5523, "step": 85730 }, { "epoch": 24.666283084004604, "grad_norm": 2.9619038105010986, "learning_rate": 0.001506674338319908, "loss": 0.8864, "step": 85740 }, { "epoch": 24.66915995397008, "grad_norm": 1.1318784952163696, "learning_rate": 0.0015066168009205984, "loss": 0.7299, "step": 85750 }, { "epoch": 24.672036823935557, "grad_norm": 0.9323392510414124, "learning_rate": 0.0015065592635212888, "loss": 0.5414, "step": 85760 }, { "epoch": 24.674913693901036, "grad_norm": 1.438460111618042, "learning_rate": 0.0015065017261219793, "loss": 0.8462, "step": 85770 }, { "epoch": 24.677790563866512, "grad_norm": 1.2867767810821533, "learning_rate": 0.0015064441887226699, "loss": 0.8107, "step": 85780 }, { "epoch": 24.680667433831992, "grad_norm": 0.9568605422973633, "learning_rate": 0.0015063866513233602, "loss": 0.7284, "step": 85790 }, { "epoch": 24.68354430379747, "grad_norm": 1.8775781393051147, "learning_rate": 0.0015063291139240508, "loss": 0.7384, "step": 85800 }, { "epoch": 24.686421173762945, "grad_norm": 0.7901204824447632, "learning_rate": 0.001506271576524741, "loss": 0.6505, "step": 85810 }, { "epoch": 24.689298043728424, "grad_norm": 1.9299366474151611, "learning_rate": 0.0015062140391254315, "loss": 0.6479, "step": 85820 }, { "epoch": 24.6921749136939, "grad_norm": 1.2696412801742554, "learning_rate": 0.001506156501726122, "loss": 0.6823, "step": 85830 }, { "epoch": 24.69505178365938, "grad_norm": 0.9588375687599182, "learning_rate": 0.0015060989643268124, "loss": 0.5883, "step": 85840 }, { "epoch": 24.697928653624857, "grad_norm": 1.7083483934402466, "learning_rate": 0.001506041426927503, "loss": 0.7818, "step": 85850 }, { "epoch": 24.700805523590333, "grad_norm": 0.9981765151023865, "learning_rate": 0.0015059838895281935, "loss": 0.7606, "step": 85860 }, { "epoch": 24.703682393555813, "grad_norm": 0.8615797162055969, "learning_rate": 0.0015059263521288837, "loss": 0.7223, "step": 85870 }, { "epoch": 24.70655926352129, "grad_norm": 1.190508484840393, "learning_rate": 0.0015058688147295742, "loss": 0.7717, "step": 85880 }, { "epoch": 24.709436133486765, "grad_norm": 0.9082763195037842, "learning_rate": 0.0015058112773302648, "loss": 0.8673, "step": 85890 }, { "epoch": 24.712313003452245, "grad_norm": 0.7968422770500183, "learning_rate": 0.0015057537399309551, "loss": 0.6801, "step": 85900 }, { "epoch": 24.71518987341772, "grad_norm": 1.4120320081710815, "learning_rate": 0.0015056962025316457, "loss": 0.8114, "step": 85910 }, { "epoch": 24.7180667433832, "grad_norm": 1.4544111490249634, "learning_rate": 0.0015056386651323363, "loss": 0.6757, "step": 85920 }, { "epoch": 24.720943613348677, "grad_norm": 0.7635196447372437, "learning_rate": 0.0015055811277330264, "loss": 0.7431, "step": 85930 }, { "epoch": 24.723820483314153, "grad_norm": 2.0742413997650146, "learning_rate": 0.001505523590333717, "loss": 0.7375, "step": 85940 }, { "epoch": 24.726697353279633, "grad_norm": 2.8045592308044434, "learning_rate": 0.0015054660529344073, "loss": 0.5536, "step": 85950 }, { "epoch": 24.72957422324511, "grad_norm": 1.1052277088165283, "learning_rate": 0.0015054085155350979, "loss": 0.6979, "step": 85960 }, { "epoch": 24.732451093210585, "grad_norm": 1.844759225845337, "learning_rate": 0.0015053509781357884, "loss": 0.7734, "step": 85970 }, { "epoch": 24.735327963176065, "grad_norm": 1.4919992685317993, "learning_rate": 0.0015052934407364788, "loss": 0.7839, "step": 85980 }, { "epoch": 24.73820483314154, "grad_norm": 1.0439869165420532, "learning_rate": 0.0015052359033371691, "loss": 0.7394, "step": 85990 }, { "epoch": 24.74108170310702, "grad_norm": 1.5247784852981567, "learning_rate": 0.0015051783659378597, "loss": 0.6837, "step": 86000 }, { "epoch": 24.743958573072497, "grad_norm": 1.8375598192214966, "learning_rate": 0.00150512082853855, "loss": 0.8218, "step": 86010 }, { "epoch": 24.746835443037973, "grad_norm": 0.9682074189186096, "learning_rate": 0.0015050632911392406, "loss": 0.6451, "step": 86020 }, { "epoch": 24.749712313003453, "grad_norm": 1.9368418455123901, "learning_rate": 0.0015050057537399312, "loss": 0.9048, "step": 86030 }, { "epoch": 24.75258918296893, "grad_norm": 0.9500947594642639, "learning_rate": 0.0015049482163406215, "loss": 0.6243, "step": 86040 }, { "epoch": 24.75546605293441, "grad_norm": 1.2423516511917114, "learning_rate": 0.0015048906789413119, "loss": 0.5577, "step": 86050 }, { "epoch": 24.758342922899885, "grad_norm": 1.8307971954345703, "learning_rate": 0.0015048331415420022, "loss": 0.7365, "step": 86060 }, { "epoch": 24.76121979286536, "grad_norm": 0.7358911037445068, "learning_rate": 0.0015047756041426928, "loss": 0.7782, "step": 86070 }, { "epoch": 24.76409666283084, "grad_norm": 1.5117994546890259, "learning_rate": 0.0015047180667433833, "loss": 0.8661, "step": 86080 }, { "epoch": 24.766973532796317, "grad_norm": 1.4029241800308228, "learning_rate": 0.0015046605293440737, "loss": 0.5463, "step": 86090 }, { "epoch": 24.769850402761794, "grad_norm": 1.1431902647018433, "learning_rate": 0.001504602991944764, "loss": 0.6272, "step": 86100 }, { "epoch": 24.772727272727273, "grad_norm": 2.8290953636169434, "learning_rate": 0.0015045454545454546, "loss": 0.8583, "step": 86110 }, { "epoch": 24.77560414269275, "grad_norm": 1.0581226348876953, "learning_rate": 0.001504487917146145, "loss": 0.8243, "step": 86120 }, { "epoch": 24.77848101265823, "grad_norm": 2.012559413909912, "learning_rate": 0.0015044303797468355, "loss": 0.6036, "step": 86130 }, { "epoch": 24.781357882623706, "grad_norm": 1.2372267246246338, "learning_rate": 0.001504372842347526, "loss": 0.5427, "step": 86140 }, { "epoch": 24.78423475258918, "grad_norm": 2.1819658279418945, "learning_rate": 0.0015043153049482164, "loss": 0.7204, "step": 86150 }, { "epoch": 24.78711162255466, "grad_norm": 0.9324473142623901, "learning_rate": 0.0015042577675489068, "loss": 0.7814, "step": 86160 }, { "epoch": 24.789988492520138, "grad_norm": 1.6220108270645142, "learning_rate": 0.001504200230149597, "loss": 0.7066, "step": 86170 }, { "epoch": 24.792865362485614, "grad_norm": 0.9529169797897339, "learning_rate": 0.0015041426927502877, "loss": 0.594, "step": 86180 }, { "epoch": 24.795742232451094, "grad_norm": 1.2640892267227173, "learning_rate": 0.0015040851553509782, "loss": 0.7545, "step": 86190 }, { "epoch": 24.79861910241657, "grad_norm": 1.3693573474884033, "learning_rate": 0.0015040276179516686, "loss": 0.7262, "step": 86200 }, { "epoch": 24.80149597238205, "grad_norm": 1.9558217525482178, "learning_rate": 0.0015039700805523591, "loss": 0.7463, "step": 86210 }, { "epoch": 24.804372842347526, "grad_norm": 1.188460111618042, "learning_rate": 0.0015039125431530495, "loss": 0.6526, "step": 86220 }, { "epoch": 24.807249712313002, "grad_norm": 1.8266388177871704, "learning_rate": 0.0015038550057537398, "loss": 0.7398, "step": 86230 }, { "epoch": 24.810126582278482, "grad_norm": 1.1003092527389526, "learning_rate": 0.0015037974683544304, "loss": 0.8386, "step": 86240 }, { "epoch": 24.813003452243958, "grad_norm": 1.5725889205932617, "learning_rate": 0.001503739930955121, "loss": 0.7178, "step": 86250 }, { "epoch": 24.815880322209438, "grad_norm": 1.1200350522994995, "learning_rate": 0.0015036823935558113, "loss": 0.628, "step": 86260 }, { "epoch": 24.818757192174914, "grad_norm": 1.0990996360778809, "learning_rate": 0.0015036248561565019, "loss": 0.8958, "step": 86270 }, { "epoch": 24.82163406214039, "grad_norm": 0.8883408308029175, "learning_rate": 0.001503567318757192, "loss": 0.5137, "step": 86280 }, { "epoch": 24.82451093210587, "grad_norm": 1.148152232170105, "learning_rate": 0.0015035097813578826, "loss": 0.869, "step": 86290 }, { "epoch": 24.827387802071346, "grad_norm": 0.8401930928230286, "learning_rate": 0.0015034522439585731, "loss": 0.549, "step": 86300 }, { "epoch": 24.830264672036822, "grad_norm": 0.8937065601348877, "learning_rate": 0.0015033947065592635, "loss": 0.747, "step": 86310 }, { "epoch": 24.833141542002302, "grad_norm": 1.6749486923217773, "learning_rate": 0.001503337169159954, "loss": 0.6927, "step": 86320 }, { "epoch": 24.83601841196778, "grad_norm": 2.120197057723999, "learning_rate": 0.0015032796317606446, "loss": 0.7978, "step": 86330 }, { "epoch": 24.838895281933258, "grad_norm": 0.9733365178108215, "learning_rate": 0.0015032220943613347, "loss": 0.546, "step": 86340 }, { "epoch": 24.841772151898734, "grad_norm": 2.2587313652038574, "learning_rate": 0.0015031645569620253, "loss": 0.8129, "step": 86350 }, { "epoch": 24.84464902186421, "grad_norm": 1.8289602994918823, "learning_rate": 0.0015031070195627159, "loss": 0.6604, "step": 86360 }, { "epoch": 24.84752589182969, "grad_norm": 1.4415392875671387, "learning_rate": 0.0015030494821634062, "loss": 0.7629, "step": 86370 }, { "epoch": 24.850402761795166, "grad_norm": 1.8813825845718384, "learning_rate": 0.0015029919447640968, "loss": 0.687, "step": 86380 }, { "epoch": 24.853279631760643, "grad_norm": 1.9810010194778442, "learning_rate": 0.0015029344073647871, "loss": 0.6696, "step": 86390 }, { "epoch": 24.856156501726122, "grad_norm": 0.9009311199188232, "learning_rate": 0.0015028768699654775, "loss": 0.4866, "step": 86400 }, { "epoch": 24.8590333716916, "grad_norm": 1.5619559288024902, "learning_rate": 0.001502819332566168, "loss": 0.7012, "step": 86410 }, { "epoch": 24.86191024165708, "grad_norm": 1.2702628374099731, "learning_rate": 0.0015027617951668584, "loss": 0.6596, "step": 86420 }, { "epoch": 24.864787111622555, "grad_norm": 1.6007803678512573, "learning_rate": 0.001502704257767549, "loss": 0.5549, "step": 86430 }, { "epoch": 24.86766398158803, "grad_norm": 1.1023643016815186, "learning_rate": 0.0015026467203682395, "loss": 0.8835, "step": 86440 }, { "epoch": 24.87054085155351, "grad_norm": 1.5052129030227661, "learning_rate": 0.0015025891829689299, "loss": 0.7407, "step": 86450 }, { "epoch": 24.873417721518987, "grad_norm": 1.9011425971984863, "learning_rate": 0.0015025316455696202, "loss": 0.5319, "step": 86460 }, { "epoch": 24.876294591484466, "grad_norm": 1.233192801475525, "learning_rate": 0.0015024741081703108, "loss": 0.5471, "step": 86470 }, { "epoch": 24.879171461449943, "grad_norm": 1.1839603185653687, "learning_rate": 0.0015024165707710011, "loss": 0.7664, "step": 86480 }, { "epoch": 24.88204833141542, "grad_norm": 2.258273124694824, "learning_rate": 0.0015023590333716917, "loss": 0.6946, "step": 86490 }, { "epoch": 24.8849252013809, "grad_norm": 2.412421464920044, "learning_rate": 0.001502301495972382, "loss": 0.791, "step": 86500 }, { "epoch": 24.887802071346375, "grad_norm": 0.9979481101036072, "learning_rate": 0.0015022439585730726, "loss": 0.6491, "step": 86510 }, { "epoch": 24.89067894131185, "grad_norm": 0.9354711771011353, "learning_rate": 0.001502186421173763, "loss": 0.8633, "step": 86520 }, { "epoch": 24.89355581127733, "grad_norm": 0.9146214127540588, "learning_rate": 0.0015021288837744533, "loss": 0.6629, "step": 86530 }, { "epoch": 24.896432681242807, "grad_norm": 1.884853482246399, "learning_rate": 0.0015020713463751438, "loss": 0.654, "step": 86540 }, { "epoch": 24.899309551208287, "grad_norm": 0.8550000786781311, "learning_rate": 0.0015020138089758344, "loss": 0.6374, "step": 86550 }, { "epoch": 24.902186421173763, "grad_norm": 1.7517751455307007, "learning_rate": 0.0015019562715765248, "loss": 0.7968, "step": 86560 }, { "epoch": 24.90506329113924, "grad_norm": 1.5241543054580688, "learning_rate": 0.0015018987341772153, "loss": 0.6825, "step": 86570 }, { "epoch": 24.90794016110472, "grad_norm": 1.3286385536193848, "learning_rate": 0.0015018411967779057, "loss": 0.7583, "step": 86580 }, { "epoch": 24.910817031070195, "grad_norm": 2.34102201461792, "learning_rate": 0.001501783659378596, "loss": 0.6877, "step": 86590 }, { "epoch": 24.913693901035675, "grad_norm": 2.1577441692352295, "learning_rate": 0.0015017261219792866, "loss": 0.8048, "step": 86600 }, { "epoch": 24.91657077100115, "grad_norm": 1.6933315992355347, "learning_rate": 0.0015016685845799771, "loss": 0.652, "step": 86610 }, { "epoch": 24.919447640966627, "grad_norm": 1.3034412860870361, "learning_rate": 0.0015016110471806675, "loss": 0.8678, "step": 86620 }, { "epoch": 24.922324510932107, "grad_norm": 2.132545232772827, "learning_rate": 0.001501553509781358, "loss": 0.8088, "step": 86630 }, { "epoch": 24.925201380897583, "grad_norm": 2.1585915088653564, "learning_rate": 0.0015014959723820482, "loss": 0.7992, "step": 86640 }, { "epoch": 24.92807825086306, "grad_norm": 0.827928364276886, "learning_rate": 0.0015014384349827388, "loss": 0.6406, "step": 86650 }, { "epoch": 24.93095512082854, "grad_norm": 1.0413248538970947, "learning_rate": 0.0015013808975834293, "loss": 0.5348, "step": 86660 }, { "epoch": 24.933831990794015, "grad_norm": 1.6179391145706177, "learning_rate": 0.0015013233601841197, "loss": 0.6476, "step": 86670 }, { "epoch": 24.936708860759495, "grad_norm": 1.1228901147842407, "learning_rate": 0.0015012658227848102, "loss": 0.7092, "step": 86680 }, { "epoch": 24.93958573072497, "grad_norm": 1.0067682266235352, "learning_rate": 0.0015012082853855008, "loss": 0.7734, "step": 86690 }, { "epoch": 24.942462600690448, "grad_norm": 1.3122292757034302, "learning_rate": 0.001501150747986191, "loss": 0.7275, "step": 86700 }, { "epoch": 24.945339470655927, "grad_norm": 1.3229414224624634, "learning_rate": 0.0015010932105868815, "loss": 0.5806, "step": 86710 }, { "epoch": 24.948216340621403, "grad_norm": 1.2145591974258423, "learning_rate": 0.001501035673187572, "loss": 0.7599, "step": 86720 }, { "epoch": 24.951093210586883, "grad_norm": 1.2091152667999268, "learning_rate": 0.0015009781357882624, "loss": 0.7534, "step": 86730 }, { "epoch": 24.95397008055236, "grad_norm": 1.3453588485717773, "learning_rate": 0.001500920598388953, "loss": 0.84, "step": 86740 }, { "epoch": 24.956846950517836, "grad_norm": 0.8912616968154907, "learning_rate": 0.0015008630609896433, "loss": 0.7639, "step": 86750 }, { "epoch": 24.959723820483315, "grad_norm": 1.9601233005523682, "learning_rate": 0.0015008055235903337, "loss": 0.8203, "step": 86760 }, { "epoch": 24.96260069044879, "grad_norm": 1.3554695844650269, "learning_rate": 0.0015007479861910242, "loss": 0.7568, "step": 86770 }, { "epoch": 24.965477560414268, "grad_norm": 1.1972289085388184, "learning_rate": 0.0015006904487917146, "loss": 0.6398, "step": 86780 }, { "epoch": 24.968354430379748, "grad_norm": 1.349858045578003, "learning_rate": 0.0015006329113924051, "loss": 0.7424, "step": 86790 }, { "epoch": 24.971231300345224, "grad_norm": 1.973125696182251, "learning_rate": 0.0015005753739930957, "loss": 0.9387, "step": 86800 }, { "epoch": 24.974108170310704, "grad_norm": 2.305586338043213, "learning_rate": 0.001500517836593786, "loss": 0.7614, "step": 86810 }, { "epoch": 24.97698504027618, "grad_norm": 1.6485689878463745, "learning_rate": 0.0015004602991944764, "loss": 0.8612, "step": 86820 }, { "epoch": 24.979861910241656, "grad_norm": 1.161607265472412, "learning_rate": 0.001500402761795167, "loss": 0.6839, "step": 86830 }, { "epoch": 24.982738780207136, "grad_norm": 1.1000608205795288, "learning_rate": 0.0015003452243958573, "loss": 0.6779, "step": 86840 }, { "epoch": 24.985615650172612, "grad_norm": 1.0686044692993164, "learning_rate": 0.0015002876869965479, "loss": 0.7278, "step": 86850 }, { "epoch": 24.988492520138088, "grad_norm": 1.3125035762786865, "learning_rate": 0.0015002301495972382, "loss": 0.8202, "step": 86860 }, { "epoch": 24.991369390103568, "grad_norm": 1.76900053024292, "learning_rate": 0.0015001726121979288, "loss": 0.7148, "step": 86870 }, { "epoch": 24.994246260069044, "grad_norm": 1.2613743543624878, "learning_rate": 0.0015001150747986191, "loss": 0.6982, "step": 86880 }, { "epoch": 24.997123130034524, "grad_norm": 2.073660135269165, "learning_rate": 0.0015000575373993095, "loss": 0.7634, "step": 86890 }, { "epoch": 25.0, "grad_norm": 1.5912505388259888, "learning_rate": 0.0015, "loss": 0.8195, "step": 86900 }, { "epoch": 25.002876869965476, "grad_norm": 0.7102965712547302, "learning_rate": 0.0014999424626006906, "loss": 0.5374, "step": 86910 }, { "epoch": 25.005753739930956, "grad_norm": 1.5603890419006348, "learning_rate": 0.001499884925201381, "loss": 0.7284, "step": 86920 }, { "epoch": 25.008630609896432, "grad_norm": 1.5706101655960083, "learning_rate": 0.0014998273878020713, "loss": 0.6328, "step": 86930 }, { "epoch": 25.011507479861912, "grad_norm": 1.2425705194473267, "learning_rate": 0.0014997698504027619, "loss": 0.5784, "step": 86940 }, { "epoch": 25.014384349827388, "grad_norm": 1.4139559268951416, "learning_rate": 0.0014997123130034522, "loss": 0.7336, "step": 86950 }, { "epoch": 25.017261219792864, "grad_norm": 1.1713147163391113, "learning_rate": 0.0014996547756041428, "loss": 0.6881, "step": 86960 }, { "epoch": 25.020138089758344, "grad_norm": 0.7812169194221497, "learning_rate": 0.0014995972382048331, "loss": 0.6407, "step": 86970 }, { "epoch": 25.02301495972382, "grad_norm": 1.1694589853286743, "learning_rate": 0.0014995397008055237, "loss": 0.6613, "step": 86980 }, { "epoch": 25.025891829689296, "grad_norm": 1.801372766494751, "learning_rate": 0.001499482163406214, "loss": 0.8302, "step": 86990 }, { "epoch": 25.028768699654776, "grad_norm": 2.113063335418701, "learning_rate": 0.0014994246260069044, "loss": 0.6705, "step": 87000 }, { "epoch": 25.031645569620252, "grad_norm": 2.17025089263916, "learning_rate": 0.001499367088607595, "loss": 0.6362, "step": 87010 }, { "epoch": 25.034522439585732, "grad_norm": 1.5929319858551025, "learning_rate": 0.0014993095512082855, "loss": 0.7202, "step": 87020 }, { "epoch": 25.03739930955121, "grad_norm": 0.9443532228469849, "learning_rate": 0.0014992520138089758, "loss": 0.6342, "step": 87030 }, { "epoch": 25.040276179516685, "grad_norm": 1.515275239944458, "learning_rate": 0.0014991944764096664, "loss": 0.8555, "step": 87040 }, { "epoch": 25.043153049482164, "grad_norm": 2.609706401824951, "learning_rate": 0.0014991369390103568, "loss": 0.7708, "step": 87050 }, { "epoch": 25.04602991944764, "grad_norm": 1.7567092180252075, "learning_rate": 0.001499079401611047, "loss": 0.6543, "step": 87060 }, { "epoch": 25.048906789413117, "grad_norm": 1.269525170326233, "learning_rate": 0.0014990218642117377, "loss": 0.8158, "step": 87070 }, { "epoch": 25.051783659378597, "grad_norm": 1.0073323249816895, "learning_rate": 0.001498964326812428, "loss": 0.641, "step": 87080 }, { "epoch": 25.054660529344073, "grad_norm": 1.0387791395187378, "learning_rate": 0.0014989067894131186, "loss": 0.6171, "step": 87090 }, { "epoch": 25.057537399309552, "grad_norm": 1.5090203285217285, "learning_rate": 0.0014988492520138091, "loss": 0.7577, "step": 87100 }, { "epoch": 25.06041426927503, "grad_norm": 0.9823054075241089, "learning_rate": 0.0014987917146144993, "loss": 0.7179, "step": 87110 }, { "epoch": 25.063291139240505, "grad_norm": 1.8454045057296753, "learning_rate": 0.0014987341772151898, "loss": 0.8038, "step": 87120 }, { "epoch": 25.066168009205985, "grad_norm": 0.8796719908714294, "learning_rate": 0.0014986766398158804, "loss": 0.5698, "step": 87130 }, { "epoch": 25.06904487917146, "grad_norm": 0.948121190071106, "learning_rate": 0.0014986191024165707, "loss": 0.6802, "step": 87140 }, { "epoch": 25.07192174913694, "grad_norm": 1.068509817123413, "learning_rate": 0.0014985615650172613, "loss": 0.6103, "step": 87150 }, { "epoch": 25.074798619102417, "grad_norm": 1.8913369178771973, "learning_rate": 0.0014985040276179519, "loss": 0.6503, "step": 87160 }, { "epoch": 25.077675489067893, "grad_norm": 1.7165706157684326, "learning_rate": 0.001498446490218642, "loss": 0.658, "step": 87170 }, { "epoch": 25.080552359033373, "grad_norm": 1.1247905492782593, "learning_rate": 0.0014983889528193326, "loss": 0.6146, "step": 87180 }, { "epoch": 25.08342922899885, "grad_norm": 0.5893567800521851, "learning_rate": 0.001498331415420023, "loss": 0.5547, "step": 87190 }, { "epoch": 25.086306098964325, "grad_norm": 1.6856935024261475, "learning_rate": 0.0014982738780207135, "loss": 0.6852, "step": 87200 }, { "epoch": 25.089182968929805, "grad_norm": 1.531022310256958, "learning_rate": 0.001498216340621404, "loss": 0.6824, "step": 87210 }, { "epoch": 25.09205983889528, "grad_norm": 1.6937721967697144, "learning_rate": 0.0014981588032220944, "loss": 0.7813, "step": 87220 }, { "epoch": 25.09493670886076, "grad_norm": 1.4081145524978638, "learning_rate": 0.0014981012658227847, "loss": 0.6177, "step": 87230 }, { "epoch": 25.097813578826237, "grad_norm": 1.215345859527588, "learning_rate": 0.0014980437284234753, "loss": 0.7199, "step": 87240 }, { "epoch": 25.100690448791713, "grad_norm": 1.940945029258728, "learning_rate": 0.0014979861910241656, "loss": 0.7074, "step": 87250 }, { "epoch": 25.103567318757193, "grad_norm": 1.1236056089401245, "learning_rate": 0.0014979286536248562, "loss": 0.6024, "step": 87260 }, { "epoch": 25.10644418872267, "grad_norm": 1.7725602388381958, "learning_rate": 0.0014978711162255468, "loss": 0.5743, "step": 87270 }, { "epoch": 25.10932105868815, "grad_norm": 1.7802234888076782, "learning_rate": 0.0014978135788262371, "loss": 0.6128, "step": 87280 }, { "epoch": 25.112197928653625, "grad_norm": 1.1128214597702026, "learning_rate": 0.0014977560414269275, "loss": 0.7303, "step": 87290 }, { "epoch": 25.1150747986191, "grad_norm": 1.7265055179595947, "learning_rate": 0.001497698504027618, "loss": 0.7645, "step": 87300 }, { "epoch": 25.11795166858458, "grad_norm": 1.1386017799377441, "learning_rate": 0.0014976409666283084, "loss": 0.6364, "step": 87310 }, { "epoch": 25.120828538550057, "grad_norm": 1.1429173946380615, "learning_rate": 0.001497583429228999, "loss": 0.7287, "step": 87320 }, { "epoch": 25.123705408515534, "grad_norm": 1.0780190229415894, "learning_rate": 0.0014975258918296893, "loss": 0.6358, "step": 87330 }, { "epoch": 25.126582278481013, "grad_norm": 1.2138824462890625, "learning_rate": 0.0014974683544303799, "loss": 0.6883, "step": 87340 }, { "epoch": 25.12945914844649, "grad_norm": 1.0254205465316772, "learning_rate": 0.0014974108170310702, "loss": 0.6519, "step": 87350 }, { "epoch": 25.13233601841197, "grad_norm": 1.6761884689331055, "learning_rate": 0.0014973532796317606, "loss": 0.7634, "step": 87360 }, { "epoch": 25.135212888377445, "grad_norm": 1.180301547050476, "learning_rate": 0.0014972957422324511, "loss": 0.6562, "step": 87370 }, { "epoch": 25.13808975834292, "grad_norm": 1.2499140501022339, "learning_rate": 0.0014972382048331417, "loss": 0.605, "step": 87380 }, { "epoch": 25.1409666283084, "grad_norm": 1.5407366752624512, "learning_rate": 0.001497180667433832, "loss": 0.7645, "step": 87390 }, { "epoch": 25.143843498273878, "grad_norm": 2.2749996185302734, "learning_rate": 0.0014971231300345226, "loss": 0.5093, "step": 87400 }, { "epoch": 25.146720368239354, "grad_norm": 1.2173024415969849, "learning_rate": 0.001497065592635213, "loss": 0.7962, "step": 87410 }, { "epoch": 25.149597238204834, "grad_norm": 1.5984816551208496, "learning_rate": 0.0014970080552359033, "loss": 0.6748, "step": 87420 }, { "epoch": 25.15247410817031, "grad_norm": 1.3454941511154175, "learning_rate": 0.0014969505178365938, "loss": 0.615, "step": 87430 }, { "epoch": 25.15535097813579, "grad_norm": 1.2585465908050537, "learning_rate": 0.0014968929804372842, "loss": 0.6776, "step": 87440 }, { "epoch": 25.158227848101266, "grad_norm": 1.4333500862121582, "learning_rate": 0.0014968354430379748, "loss": 0.6712, "step": 87450 }, { "epoch": 25.161104718066742, "grad_norm": 1.194075107574463, "learning_rate": 0.0014967779056386653, "loss": 0.5443, "step": 87460 }, { "epoch": 25.16398158803222, "grad_norm": 2.929234266281128, "learning_rate": 0.0014967203682393555, "loss": 0.7868, "step": 87470 }, { "epoch": 25.166858457997698, "grad_norm": 1.0252718925476074, "learning_rate": 0.001496662830840046, "loss": 0.5992, "step": 87480 }, { "epoch": 25.169735327963178, "grad_norm": 0.8664005994796753, "learning_rate": 0.0014966052934407366, "loss": 0.6911, "step": 87490 }, { "epoch": 25.172612197928654, "grad_norm": 1.9457594156265259, "learning_rate": 0.001496547756041427, "loss": 0.7729, "step": 87500 }, { "epoch": 25.17548906789413, "grad_norm": 1.290878176689148, "learning_rate": 0.0014964902186421175, "loss": 0.5952, "step": 87510 }, { "epoch": 25.17836593785961, "grad_norm": 1.033345341682434, "learning_rate": 0.001496432681242808, "loss": 0.6092, "step": 87520 }, { "epoch": 25.181242807825086, "grad_norm": 2.352294445037842, "learning_rate": 0.0014963751438434982, "loss": 0.7229, "step": 87530 }, { "epoch": 25.184119677790562, "grad_norm": 1.4576793909072876, "learning_rate": 0.0014963176064441887, "loss": 0.7384, "step": 87540 }, { "epoch": 25.186996547756042, "grad_norm": 0.9922136664390564, "learning_rate": 0.001496260069044879, "loss": 0.6248, "step": 87550 }, { "epoch": 25.189873417721518, "grad_norm": 1.4824773073196411, "learning_rate": 0.0014962025316455697, "loss": 0.6886, "step": 87560 }, { "epoch": 25.192750287686998, "grad_norm": 1.368308186531067, "learning_rate": 0.0014961449942462602, "loss": 0.5566, "step": 87570 }, { "epoch": 25.195627157652474, "grad_norm": 1.546181321144104, "learning_rate": 0.0014960874568469506, "loss": 0.8732, "step": 87580 }, { "epoch": 25.19850402761795, "grad_norm": 1.7588202953338623, "learning_rate": 0.001496029919447641, "loss": 0.7179, "step": 87590 }, { "epoch": 25.20138089758343, "grad_norm": 1.5109658241271973, "learning_rate": 0.0014959723820483315, "loss": 0.541, "step": 87600 }, { "epoch": 25.204257767548906, "grad_norm": 0.7220449447631836, "learning_rate": 0.0014959148446490218, "loss": 0.6062, "step": 87610 }, { "epoch": 25.207134637514386, "grad_norm": 0.8387991189956665, "learning_rate": 0.0014958573072497124, "loss": 0.4851, "step": 87620 }, { "epoch": 25.210011507479862, "grad_norm": 0.847510576248169, "learning_rate": 0.001495799769850403, "loss": 0.4992, "step": 87630 }, { "epoch": 25.21288837744534, "grad_norm": 2.177647829055786, "learning_rate": 0.0014957422324510933, "loss": 0.8993, "step": 87640 }, { "epoch": 25.21576524741082, "grad_norm": 1.293600082397461, "learning_rate": 0.0014956846950517837, "loss": 0.6196, "step": 87650 }, { "epoch": 25.218642117376294, "grad_norm": 2.1495580673217773, "learning_rate": 0.001495627157652474, "loss": 0.6551, "step": 87660 }, { "epoch": 25.22151898734177, "grad_norm": 1.6838070154190063, "learning_rate": 0.0014955696202531646, "loss": 0.6687, "step": 87670 }, { "epoch": 25.22439585730725, "grad_norm": 2.4939048290252686, "learning_rate": 0.0014955120828538551, "loss": 0.7049, "step": 87680 }, { "epoch": 25.227272727272727, "grad_norm": 1.9743307828903198, "learning_rate": 0.0014954545454545455, "loss": 0.6311, "step": 87690 }, { "epoch": 25.230149597238206, "grad_norm": 2.124871015548706, "learning_rate": 0.001495397008055236, "loss": 0.6416, "step": 87700 }, { "epoch": 25.233026467203683, "grad_norm": 1.9277621507644653, "learning_rate": 0.0014953394706559264, "loss": 0.789, "step": 87710 }, { "epoch": 25.23590333716916, "grad_norm": 0.9526989459991455, "learning_rate": 0.0014952819332566167, "loss": 0.5779, "step": 87720 }, { "epoch": 25.23878020713464, "grad_norm": 1.6035089492797852, "learning_rate": 0.0014952243958573073, "loss": 0.8363, "step": 87730 }, { "epoch": 25.241657077100115, "grad_norm": 1.1734750270843506, "learning_rate": 0.0014951668584579979, "loss": 0.6171, "step": 87740 }, { "epoch": 25.24453394706559, "grad_norm": 1.8307859897613525, "learning_rate": 0.0014951093210586882, "loss": 0.666, "step": 87750 }, { "epoch": 25.24741081703107, "grad_norm": 1.3075894117355347, "learning_rate": 0.0014950517836593786, "loss": 0.6441, "step": 87760 }, { "epoch": 25.250287686996547, "grad_norm": 1.0859688520431519, "learning_rate": 0.001494994246260069, "loss": 0.6034, "step": 87770 }, { "epoch": 25.253164556962027, "grad_norm": 0.8675361275672913, "learning_rate": 0.0014949367088607595, "loss": 0.744, "step": 87780 }, { "epoch": 25.256041426927503, "grad_norm": 0.9292313456535339, "learning_rate": 0.00149487917146145, "loss": 0.603, "step": 87790 }, { "epoch": 25.25891829689298, "grad_norm": 2.3239450454711914, "learning_rate": 0.0014948216340621404, "loss": 0.6602, "step": 87800 }, { "epoch": 25.26179516685846, "grad_norm": 0.9113852977752686, "learning_rate": 0.001494764096662831, "loss": 0.5585, "step": 87810 }, { "epoch": 25.264672036823935, "grad_norm": 0.9498842358589172, "learning_rate": 0.0014947065592635213, "loss": 0.6553, "step": 87820 }, { "epoch": 25.267548906789415, "grad_norm": 0.9893893003463745, "learning_rate": 0.0014946490218642116, "loss": 0.8217, "step": 87830 }, { "epoch": 25.27042577675489, "grad_norm": 1.2718439102172852, "learning_rate": 0.0014945914844649022, "loss": 0.7185, "step": 87840 }, { "epoch": 25.273302646720367, "grad_norm": 2.1867542266845703, "learning_rate": 0.0014945339470655928, "loss": 0.6277, "step": 87850 }, { "epoch": 25.276179516685847, "grad_norm": 2.8190438747406006, "learning_rate": 0.0014944764096662831, "loss": 0.6315, "step": 87860 }, { "epoch": 25.279056386651323, "grad_norm": 1.6847716569900513, "learning_rate": 0.0014944188722669737, "loss": 0.8486, "step": 87870 }, { "epoch": 25.2819332566168, "grad_norm": 3.938183546066284, "learning_rate": 0.0014943613348676638, "loss": 0.6551, "step": 87880 }, { "epoch": 25.28481012658228, "grad_norm": 1.583127737045288, "learning_rate": 0.0014943037974683544, "loss": 0.6507, "step": 87890 }, { "epoch": 25.287686996547755, "grad_norm": 0.8805901408195496, "learning_rate": 0.001494246260069045, "loss": 0.5874, "step": 87900 }, { "epoch": 25.290563866513235, "grad_norm": 1.6953206062316895, "learning_rate": 0.0014941887226697353, "loss": 0.7079, "step": 87910 }, { "epoch": 25.29344073647871, "grad_norm": 0.8425344824790955, "learning_rate": 0.0014941311852704258, "loss": 0.5283, "step": 87920 }, { "epoch": 25.296317606444187, "grad_norm": 0.8684890270233154, "learning_rate": 0.0014940736478711164, "loss": 0.6602, "step": 87930 }, { "epoch": 25.299194476409667, "grad_norm": 1.968869924545288, "learning_rate": 0.0014940161104718065, "loss": 0.8265, "step": 87940 }, { "epoch": 25.302071346375143, "grad_norm": 0.9527024626731873, "learning_rate": 0.001493958573072497, "loss": 0.6884, "step": 87950 }, { "epoch": 25.30494821634062, "grad_norm": 2.098501443862915, "learning_rate": 0.0014939010356731877, "loss": 0.7788, "step": 87960 }, { "epoch": 25.3078250863061, "grad_norm": 1.791077971458435, "learning_rate": 0.001493843498273878, "loss": 0.7376, "step": 87970 }, { "epoch": 25.310701956271576, "grad_norm": 1.5138466358184814, "learning_rate": 0.0014937859608745686, "loss": 0.7001, "step": 87980 }, { "epoch": 25.313578826237055, "grad_norm": 1.6441121101379395, "learning_rate": 0.0014937284234752591, "loss": 0.6104, "step": 87990 }, { "epoch": 25.31645569620253, "grad_norm": 1.6559317111968994, "learning_rate": 0.0014936708860759493, "loss": 0.6276, "step": 88000 }, { "epoch": 25.319332566168008, "grad_norm": 2.8596255779266357, "learning_rate": 0.0014936133486766398, "loss": 0.696, "step": 88010 }, { "epoch": 25.322209436133488, "grad_norm": 0.8686290383338928, "learning_rate": 0.0014935558112773302, "loss": 0.5529, "step": 88020 }, { "epoch": 25.325086306098964, "grad_norm": 1.3734341859817505, "learning_rate": 0.0014934982738780207, "loss": 0.7012, "step": 88030 }, { "epoch": 25.327963176064443, "grad_norm": 1.4130373001098633, "learning_rate": 0.0014934407364787113, "loss": 0.4862, "step": 88040 }, { "epoch": 25.33084004602992, "grad_norm": 1.1641032695770264, "learning_rate": 0.0014933831990794017, "loss": 0.8022, "step": 88050 }, { "epoch": 25.333716915995396, "grad_norm": 1.5013667345046997, "learning_rate": 0.001493325661680092, "loss": 0.8679, "step": 88060 }, { "epoch": 25.336593785960876, "grad_norm": 1.5818103551864624, "learning_rate": 0.0014932681242807826, "loss": 0.9289, "step": 88070 }, { "epoch": 25.339470655926352, "grad_norm": 1.6819026470184326, "learning_rate": 0.001493210586881473, "loss": 0.7131, "step": 88080 }, { "epoch": 25.342347525891828, "grad_norm": 2.1350226402282715, "learning_rate": 0.0014931530494821635, "loss": 0.6616, "step": 88090 }, { "epoch": 25.345224395857308, "grad_norm": 0.9456983208656311, "learning_rate": 0.001493095512082854, "loss": 0.653, "step": 88100 }, { "epoch": 25.348101265822784, "grad_norm": 1.0517884492874146, "learning_rate": 0.0014930379746835444, "loss": 0.7135, "step": 88110 }, { "epoch": 25.350978135788264, "grad_norm": 1.2995747327804565, "learning_rate": 0.0014929804372842347, "loss": 0.9701, "step": 88120 }, { "epoch": 25.35385500575374, "grad_norm": 1.353987216949463, "learning_rate": 0.001492922899884925, "loss": 0.6331, "step": 88130 }, { "epoch": 25.356731875719216, "grad_norm": 0.561148464679718, "learning_rate": 0.0014928653624856156, "loss": 0.7769, "step": 88140 }, { "epoch": 25.359608745684696, "grad_norm": 1.3894520998001099, "learning_rate": 0.0014928078250863062, "loss": 0.6589, "step": 88150 }, { "epoch": 25.362485615650172, "grad_norm": 1.3920223712921143, "learning_rate": 0.0014927502876869966, "loss": 0.5402, "step": 88160 }, { "epoch": 25.365362485615652, "grad_norm": 0.9576565027236938, "learning_rate": 0.0014926927502876871, "loss": 0.8055, "step": 88170 }, { "epoch": 25.368239355581128, "grad_norm": 1.3750247955322266, "learning_rate": 0.0014926352128883775, "loss": 0.707, "step": 88180 }, { "epoch": 25.371116225546604, "grad_norm": 0.9676578044891357, "learning_rate": 0.0014925776754890678, "loss": 0.5968, "step": 88190 }, { "epoch": 25.373993095512084, "grad_norm": 1.2677055597305298, "learning_rate": 0.0014925201380897584, "loss": 0.9376, "step": 88200 }, { "epoch": 25.37686996547756, "grad_norm": 1.6423933506011963, "learning_rate": 0.001492462600690449, "loss": 0.8049, "step": 88210 }, { "epoch": 25.379746835443036, "grad_norm": 1.1343671083450317, "learning_rate": 0.0014924050632911393, "loss": 0.7018, "step": 88220 }, { "epoch": 25.382623705408516, "grad_norm": 1.082202672958374, "learning_rate": 0.0014923475258918299, "loss": 0.7103, "step": 88230 }, { "epoch": 25.385500575373992, "grad_norm": 1.1793540716171265, "learning_rate": 0.00149228998849252, "loss": 0.7384, "step": 88240 }, { "epoch": 25.388377445339472, "grad_norm": 0.8503870964050293, "learning_rate": 0.0014922324510932105, "loss": 0.6099, "step": 88250 }, { "epoch": 25.39125431530495, "grad_norm": 1.3753714561462402, "learning_rate": 0.0014921749136939011, "loss": 0.7699, "step": 88260 }, { "epoch": 25.394131185270425, "grad_norm": 1.1373121738433838, "learning_rate": 0.0014921173762945915, "loss": 0.6624, "step": 88270 }, { "epoch": 25.397008055235904, "grad_norm": 0.9671270847320557, "learning_rate": 0.001492059838895282, "loss": 0.6555, "step": 88280 }, { "epoch": 25.39988492520138, "grad_norm": 1.1450692415237427, "learning_rate": 0.0014920023014959726, "loss": 0.809, "step": 88290 }, { "epoch": 25.402761795166857, "grad_norm": 1.2225172519683838, "learning_rate": 0.0014919447640966627, "loss": 0.8127, "step": 88300 }, { "epoch": 25.405638665132336, "grad_norm": 0.8401767015457153, "learning_rate": 0.0014918872266973533, "loss": 0.7047, "step": 88310 }, { "epoch": 25.408515535097813, "grad_norm": 0.6681100130081177, "learning_rate": 0.0014918296892980438, "loss": 0.5275, "step": 88320 }, { "epoch": 25.411392405063292, "grad_norm": 1.467795729637146, "learning_rate": 0.0014917721518987342, "loss": 0.7924, "step": 88330 }, { "epoch": 25.41426927502877, "grad_norm": 2.019819736480713, "learning_rate": 0.0014917146144994248, "loss": 0.5854, "step": 88340 }, { "epoch": 25.417146144994245, "grad_norm": 1.7138252258300781, "learning_rate": 0.001491657077100115, "loss": 0.7054, "step": 88350 }, { "epoch": 25.420023014959725, "grad_norm": 1.4154675006866455, "learning_rate": 0.0014915995397008055, "loss": 0.7929, "step": 88360 }, { "epoch": 25.4228998849252, "grad_norm": 2.287327527999878, "learning_rate": 0.001491542002301496, "loss": 0.8715, "step": 88370 }, { "epoch": 25.42577675489068, "grad_norm": 1.3157556056976318, "learning_rate": 0.0014914844649021864, "loss": 0.7002, "step": 88380 }, { "epoch": 25.428653624856157, "grad_norm": 1.6668342351913452, "learning_rate": 0.001491426927502877, "loss": 0.6447, "step": 88390 }, { "epoch": 25.431530494821633, "grad_norm": 1.088220238685608, "learning_rate": 0.0014913693901035675, "loss": 0.616, "step": 88400 }, { "epoch": 25.434407364787113, "grad_norm": 1.1275886297225952, "learning_rate": 0.0014913118527042578, "loss": 0.5579, "step": 88410 }, { "epoch": 25.43728423475259, "grad_norm": 1.094364047050476, "learning_rate": 0.0014912543153049482, "loss": 0.5706, "step": 88420 }, { "epoch": 25.440161104718065, "grad_norm": 1.6393296718597412, "learning_rate": 0.0014911967779056387, "loss": 0.5825, "step": 88430 }, { "epoch": 25.443037974683545, "grad_norm": 1.1436792612075806, "learning_rate": 0.001491139240506329, "loss": 0.7788, "step": 88440 }, { "epoch": 25.44591484464902, "grad_norm": 0.9975494146347046, "learning_rate": 0.0014910817031070197, "loss": 0.5996, "step": 88450 }, { "epoch": 25.4487917146145, "grad_norm": 2.140057325363159, "learning_rate": 0.00149102416570771, "loss": 0.9279, "step": 88460 }, { "epoch": 25.451668584579977, "grad_norm": 2.528940200805664, "learning_rate": 0.0014909666283084006, "loss": 0.6818, "step": 88470 }, { "epoch": 25.454545454545453, "grad_norm": 1.135968565940857, "learning_rate": 0.001490909090909091, "loss": 0.6934, "step": 88480 }, { "epoch": 25.457422324510933, "grad_norm": 1.023979902267456, "learning_rate": 0.0014908515535097813, "loss": 0.6425, "step": 88490 }, { "epoch": 25.46029919447641, "grad_norm": 0.815922200679779, "learning_rate": 0.0014907940161104718, "loss": 0.6141, "step": 88500 }, { "epoch": 25.46317606444189, "grad_norm": 1.3639569282531738, "learning_rate": 0.0014907364787111624, "loss": 0.6592, "step": 88510 }, { "epoch": 25.466052934407365, "grad_norm": 2.0855844020843506, "learning_rate": 0.0014906789413118527, "loss": 0.7495, "step": 88520 }, { "epoch": 25.46892980437284, "grad_norm": 1.2654643058776855, "learning_rate": 0.0014906214039125433, "loss": 0.6618, "step": 88530 }, { "epoch": 25.47180667433832, "grad_norm": 2.1473727226257324, "learning_rate": 0.0014905638665132336, "loss": 0.6554, "step": 88540 }, { "epoch": 25.474683544303797, "grad_norm": 1.4771116971969604, "learning_rate": 0.001490506329113924, "loss": 0.7478, "step": 88550 }, { "epoch": 25.477560414269274, "grad_norm": 1.0422343015670776, "learning_rate": 0.0014904487917146146, "loss": 0.6829, "step": 88560 }, { "epoch": 25.480437284234753, "grad_norm": 1.023240089416504, "learning_rate": 0.0014903912543153051, "loss": 0.642, "step": 88570 }, { "epoch": 25.48331415420023, "grad_norm": 1.3438063859939575, "learning_rate": 0.0014903337169159955, "loss": 0.6051, "step": 88580 }, { "epoch": 25.48619102416571, "grad_norm": 3.161058187484741, "learning_rate": 0.0014902761795166858, "loss": 0.7967, "step": 88590 }, { "epoch": 25.489067894131185, "grad_norm": 1.8351713418960571, "learning_rate": 0.0014902186421173762, "loss": 0.6501, "step": 88600 }, { "epoch": 25.49194476409666, "grad_norm": 1.4280712604522705, "learning_rate": 0.0014901611047180667, "loss": 0.5498, "step": 88610 }, { "epoch": 25.49482163406214, "grad_norm": 1.1013630628585815, "learning_rate": 0.0014901035673187573, "loss": 0.6312, "step": 88620 }, { "epoch": 25.497698504027618, "grad_norm": 2.624863862991333, "learning_rate": 0.0014900460299194476, "loss": 0.825, "step": 88630 }, { "epoch": 25.500575373993094, "grad_norm": 1.1285679340362549, "learning_rate": 0.0014899884925201382, "loss": 0.6154, "step": 88640 }, { "epoch": 25.503452243958574, "grad_norm": 1.0052671432495117, "learning_rate": 0.0014899309551208286, "loss": 0.7489, "step": 88650 }, { "epoch": 25.50632911392405, "grad_norm": 1.6525062322616577, "learning_rate": 0.001489873417721519, "loss": 0.7075, "step": 88660 }, { "epoch": 25.50920598388953, "grad_norm": 1.1158113479614258, "learning_rate": 0.0014898158803222095, "loss": 0.6835, "step": 88670 }, { "epoch": 25.512082853855006, "grad_norm": 1.0008760690689087, "learning_rate": 0.0014897583429229, "loss": 0.5276, "step": 88680 }, { "epoch": 25.514959723820482, "grad_norm": 0.8409179449081421, "learning_rate": 0.0014897008055235904, "loss": 0.8602, "step": 88690 }, { "epoch": 25.51783659378596, "grad_norm": 1.5163218975067139, "learning_rate": 0.001489643268124281, "loss": 0.8583, "step": 88700 }, { "epoch": 25.520713463751438, "grad_norm": 1.707757830619812, "learning_rate": 0.001489585730724971, "loss": 0.6176, "step": 88710 }, { "epoch": 25.523590333716918, "grad_norm": 1.9609742164611816, "learning_rate": 0.0014895281933256616, "loss": 0.598, "step": 88720 }, { "epoch": 25.526467203682394, "grad_norm": 1.5249208211898804, "learning_rate": 0.0014894706559263522, "loss": 0.7386, "step": 88730 }, { "epoch": 25.52934407364787, "grad_norm": 1.108787178993225, "learning_rate": 0.0014894131185270425, "loss": 0.7839, "step": 88740 }, { "epoch": 25.53222094361335, "grad_norm": 1.3857972621917725, "learning_rate": 0.001489355581127733, "loss": 0.8206, "step": 88750 }, { "epoch": 25.535097813578826, "grad_norm": 1.219698190689087, "learning_rate": 0.0014892980437284237, "loss": 0.6215, "step": 88760 }, { "epoch": 25.537974683544302, "grad_norm": 1.7960578203201294, "learning_rate": 0.0014892405063291138, "loss": 0.6444, "step": 88770 }, { "epoch": 25.540851553509782, "grad_norm": 0.8976735472679138, "learning_rate": 0.0014891829689298044, "loss": 0.7413, "step": 88780 }, { "epoch": 25.543728423475258, "grad_norm": 1.5416613817214966, "learning_rate": 0.001489125431530495, "loss": 0.7976, "step": 88790 }, { "epoch": 25.546605293440738, "grad_norm": 1.4022873640060425, "learning_rate": 0.0014890678941311853, "loss": 0.6819, "step": 88800 }, { "epoch": 25.549482163406214, "grad_norm": 1.6674484014511108, "learning_rate": 0.0014890103567318758, "loss": 0.8578, "step": 88810 }, { "epoch": 25.55235903337169, "grad_norm": 1.2974660396575928, "learning_rate": 0.0014889528193325662, "loss": 0.8171, "step": 88820 }, { "epoch": 25.55523590333717, "grad_norm": 1.52867591381073, "learning_rate": 0.0014888952819332565, "loss": 0.7255, "step": 88830 }, { "epoch": 25.558112773302646, "grad_norm": 0.8916042447090149, "learning_rate": 0.001488837744533947, "loss": 0.631, "step": 88840 }, { "epoch": 25.560989643268123, "grad_norm": 1.5053802728652954, "learning_rate": 0.0014887802071346374, "loss": 0.5976, "step": 88850 }, { "epoch": 25.563866513233602, "grad_norm": 2.641510248184204, "learning_rate": 0.001488722669735328, "loss": 0.7803, "step": 88860 }, { "epoch": 25.56674338319908, "grad_norm": 1.5081486701965332, "learning_rate": 0.0014886651323360186, "loss": 0.6389, "step": 88870 }, { "epoch": 25.569620253164558, "grad_norm": 1.198154091835022, "learning_rate": 0.001488607594936709, "loss": 0.6218, "step": 88880 }, { "epoch": 25.572497123130034, "grad_norm": 1.1318689584732056, "learning_rate": 0.0014885500575373993, "loss": 0.6575, "step": 88890 }, { "epoch": 25.57537399309551, "grad_norm": 1.1324975490570068, "learning_rate": 0.0014884925201380898, "loss": 0.6378, "step": 88900 }, { "epoch": 25.57825086306099, "grad_norm": 2.07602596282959, "learning_rate": 0.0014884349827387802, "loss": 0.7651, "step": 88910 }, { "epoch": 25.581127733026467, "grad_norm": 1.2526623010635376, "learning_rate": 0.0014883774453394707, "loss": 0.6439, "step": 88920 }, { "epoch": 25.584004602991946, "grad_norm": 1.9068939685821533, "learning_rate": 0.001488319907940161, "loss": 0.8401, "step": 88930 }, { "epoch": 25.586881472957423, "grad_norm": 0.9041916131973267, "learning_rate": 0.0014882623705408517, "loss": 0.6144, "step": 88940 }, { "epoch": 25.5897583429229, "grad_norm": 0.8012030124664307, "learning_rate": 0.001488204833141542, "loss": 0.5445, "step": 88950 }, { "epoch": 25.59263521288838, "grad_norm": 0.9250531196594238, "learning_rate": 0.0014881472957422323, "loss": 0.7261, "step": 88960 }, { "epoch": 25.595512082853855, "grad_norm": 1.2510504722595215, "learning_rate": 0.001488089758342923, "loss": 0.6534, "step": 88970 }, { "epoch": 25.59838895281933, "grad_norm": 1.7960662841796875, "learning_rate": 0.0014880322209436135, "loss": 0.7851, "step": 88980 }, { "epoch": 25.60126582278481, "grad_norm": 1.332482099533081, "learning_rate": 0.0014879746835443038, "loss": 0.6642, "step": 88990 }, { "epoch": 25.604142692750287, "grad_norm": 1.9106048345565796, "learning_rate": 0.0014879171461449944, "loss": 0.8453, "step": 89000 }, { "epoch": 25.607019562715767, "grad_norm": 1.1754082441329956, "learning_rate": 0.0014878596087456847, "loss": 0.7787, "step": 89010 }, { "epoch": 25.609896432681243, "grad_norm": 1.8005647659301758, "learning_rate": 0.001487802071346375, "loss": 0.6768, "step": 89020 }, { "epoch": 25.61277330264672, "grad_norm": 0.7543901205062866, "learning_rate": 0.0014877445339470656, "loss": 0.7278, "step": 89030 }, { "epoch": 25.6156501726122, "grad_norm": 1.6649024486541748, "learning_rate": 0.001487686996547756, "loss": 0.8396, "step": 89040 }, { "epoch": 25.618527042577675, "grad_norm": 1.193817377090454, "learning_rate": 0.0014876294591484466, "loss": 0.7888, "step": 89050 }, { "epoch": 25.621403912543155, "grad_norm": 1.1234514713287354, "learning_rate": 0.0014875719217491371, "loss": 0.6266, "step": 89060 }, { "epoch": 25.62428078250863, "grad_norm": 1.2001101970672607, "learning_rate": 0.0014875143843498272, "loss": 0.6723, "step": 89070 }, { "epoch": 25.627157652474107, "grad_norm": 1.2777589559555054, "learning_rate": 0.0014874568469505178, "loss": 0.8159, "step": 89080 }, { "epoch": 25.630034522439587, "grad_norm": 2.564513921737671, "learning_rate": 0.0014873993095512084, "loss": 0.7326, "step": 89090 }, { "epoch": 25.632911392405063, "grad_norm": 1.6523573398590088, "learning_rate": 0.0014873417721518987, "loss": 0.7573, "step": 89100 }, { "epoch": 25.63578826237054, "grad_norm": 1.709295630455017, "learning_rate": 0.0014872842347525893, "loss": 0.8197, "step": 89110 }, { "epoch": 25.63866513233602, "grad_norm": 1.3924715518951416, "learning_rate": 0.0014872266973532799, "loss": 0.6065, "step": 89120 }, { "epoch": 25.641542002301495, "grad_norm": 2.4209790229797363, "learning_rate": 0.00148716915995397, "loss": 0.7343, "step": 89130 }, { "epoch": 25.644418872266975, "grad_norm": 1.3449841737747192, "learning_rate": 0.0014871116225546605, "loss": 0.7102, "step": 89140 }, { "epoch": 25.64729574223245, "grad_norm": 1.9695461988449097, "learning_rate": 0.001487054085155351, "loss": 0.852, "step": 89150 }, { "epoch": 25.650172612197927, "grad_norm": 0.7501454949378967, "learning_rate": 0.0014869965477560415, "loss": 0.8314, "step": 89160 }, { "epoch": 25.653049482163407, "grad_norm": 0.7650795578956604, "learning_rate": 0.001486939010356732, "loss": 0.6566, "step": 89170 }, { "epoch": 25.655926352128883, "grad_norm": 1.8476308584213257, "learning_rate": 0.0014868814729574224, "loss": 0.6814, "step": 89180 }, { "epoch": 25.658803222094363, "grad_norm": 1.497403860092163, "learning_rate": 0.0014868239355581127, "loss": 0.7979, "step": 89190 }, { "epoch": 25.66168009205984, "grad_norm": 1.5722594261169434, "learning_rate": 0.0014867663981588033, "loss": 0.7561, "step": 89200 }, { "epoch": 25.664556962025316, "grad_norm": 0.8854784369468689, "learning_rate": 0.0014867088607594936, "loss": 0.7108, "step": 89210 }, { "epoch": 25.667433831990795, "grad_norm": 1.2615500688552856, "learning_rate": 0.0014866513233601842, "loss": 0.638, "step": 89220 }, { "epoch": 25.67031070195627, "grad_norm": 1.2537606954574585, "learning_rate": 0.0014865937859608748, "loss": 0.6493, "step": 89230 }, { "epoch": 25.673187571921748, "grad_norm": 1.0610911846160889, "learning_rate": 0.001486536248561565, "loss": 0.667, "step": 89240 }, { "epoch": 25.676064441887227, "grad_norm": 1.2777961492538452, "learning_rate": 0.0014864787111622554, "loss": 0.7981, "step": 89250 }, { "epoch": 25.678941311852704, "grad_norm": 1.5794663429260254, "learning_rate": 0.001486421173762946, "loss": 0.5406, "step": 89260 }, { "epoch": 25.681818181818183, "grad_norm": 1.219800591468811, "learning_rate": 0.0014863636363636364, "loss": 0.6581, "step": 89270 }, { "epoch": 25.68469505178366, "grad_norm": 1.4926446676254272, "learning_rate": 0.001486306098964327, "loss": 0.743, "step": 89280 }, { "epoch": 25.687571921749136, "grad_norm": 2.4216573238372803, "learning_rate": 0.0014862485615650173, "loss": 0.8229, "step": 89290 }, { "epoch": 25.690448791714616, "grad_norm": 2.2055959701538086, "learning_rate": 0.0014861910241657078, "loss": 0.7671, "step": 89300 }, { "epoch": 25.693325661680092, "grad_norm": 1.1766585111618042, "learning_rate": 0.0014861334867663982, "loss": 0.753, "step": 89310 }, { "epoch": 25.696202531645568, "grad_norm": 0.7379855513572693, "learning_rate": 0.0014860759493670885, "loss": 0.6438, "step": 89320 }, { "epoch": 25.699079401611048, "grad_norm": 1.9117789268493652, "learning_rate": 0.001486018411967779, "loss": 0.9639, "step": 89330 }, { "epoch": 25.701956271576524, "grad_norm": 1.5861847400665283, "learning_rate": 0.0014859608745684697, "loss": 0.746, "step": 89340 }, { "epoch": 25.704833141542004, "grad_norm": 0.9845160245895386, "learning_rate": 0.00148590333716916, "loss": 0.6215, "step": 89350 }, { "epoch": 25.70771001150748, "grad_norm": 1.076385736465454, "learning_rate": 0.0014858457997698506, "loss": 0.7627, "step": 89360 }, { "epoch": 25.710586881472956, "grad_norm": 1.3077383041381836, "learning_rate": 0.001485788262370541, "loss": 0.7455, "step": 89370 }, { "epoch": 25.713463751438436, "grad_norm": 1.1529955863952637, "learning_rate": 0.0014857307249712313, "loss": 0.7544, "step": 89380 }, { "epoch": 25.716340621403912, "grad_norm": 1.8173835277557373, "learning_rate": 0.0014856731875719218, "loss": 0.9319, "step": 89390 }, { "epoch": 25.719217491369392, "grad_norm": 0.8569238781929016, "learning_rate": 0.0014856156501726122, "loss": 0.663, "step": 89400 }, { "epoch": 25.722094361334868, "grad_norm": 2.094773769378662, "learning_rate": 0.0014855581127733027, "loss": 0.7164, "step": 89410 }, { "epoch": 25.724971231300344, "grad_norm": 1.4461629390716553, "learning_rate": 0.001485500575373993, "loss": 0.6897, "step": 89420 }, { "epoch": 25.727848101265824, "grad_norm": 0.8657286167144775, "learning_rate": 0.0014854430379746834, "loss": 0.5869, "step": 89430 }, { "epoch": 25.7307249712313, "grad_norm": 2.0026328563690186, "learning_rate": 0.001485385500575374, "loss": 0.5532, "step": 89440 }, { "epoch": 25.733601841196776, "grad_norm": 1.4251059293746948, "learning_rate": 0.0014853279631760646, "loss": 0.803, "step": 89450 }, { "epoch": 25.736478711162256, "grad_norm": 2.9124677181243896, "learning_rate": 0.001485270425776755, "loss": 0.8381, "step": 89460 }, { "epoch": 25.739355581127732, "grad_norm": 2.0792369842529297, "learning_rate": 0.0014852128883774455, "loss": 0.6676, "step": 89470 }, { "epoch": 25.742232451093212, "grad_norm": 0.8541513085365295, "learning_rate": 0.0014851553509781358, "loss": 0.7186, "step": 89480 }, { "epoch": 25.74510932105869, "grad_norm": 1.2344822883605957, "learning_rate": 0.0014850978135788262, "loss": 0.6756, "step": 89490 }, { "epoch": 25.747986191024165, "grad_norm": 1.0078915357589722, "learning_rate": 0.0014850402761795167, "loss": 0.6397, "step": 89500 }, { "epoch": 25.750863060989644, "grad_norm": 1.1507264375686646, "learning_rate": 0.001484982738780207, "loss": 0.6062, "step": 89510 }, { "epoch": 25.75373993095512, "grad_norm": 1.5597925186157227, "learning_rate": 0.0014849252013808976, "loss": 0.7333, "step": 89520 }, { "epoch": 25.756616800920597, "grad_norm": 1.378710389137268, "learning_rate": 0.0014848676639815882, "loss": 0.6045, "step": 89530 }, { "epoch": 25.759493670886076, "grad_norm": 1.3984278440475464, "learning_rate": 0.0014848101265822783, "loss": 0.6137, "step": 89540 }, { "epoch": 25.762370540851553, "grad_norm": 2.002176523208618, "learning_rate": 0.001484752589182969, "loss": 0.6911, "step": 89550 }, { "epoch": 25.765247410817032, "grad_norm": 1.356870412826538, "learning_rate": 0.0014846950517836595, "loss": 0.7193, "step": 89560 }, { "epoch": 25.76812428078251, "grad_norm": 0.9522294402122498, "learning_rate": 0.0014846375143843498, "loss": 0.756, "step": 89570 }, { "epoch": 25.771001150747985, "grad_norm": 2.881117820739746, "learning_rate": 0.0014845799769850404, "loss": 0.6749, "step": 89580 }, { "epoch": 25.773878020713465, "grad_norm": 1.974632978439331, "learning_rate": 0.001484522439585731, "loss": 0.8433, "step": 89590 }, { "epoch": 25.77675489067894, "grad_norm": 1.2393851280212402, "learning_rate": 0.001484464902186421, "loss": 0.737, "step": 89600 }, { "epoch": 25.77963176064442, "grad_norm": 0.9006217122077942, "learning_rate": 0.0014844073647871116, "loss": 0.6266, "step": 89610 }, { "epoch": 25.782508630609897, "grad_norm": 1.2459555864334106, "learning_rate": 0.001484349827387802, "loss": 0.7283, "step": 89620 }, { "epoch": 25.785385500575373, "grad_norm": 1.4872136116027832, "learning_rate": 0.0014842922899884925, "loss": 0.8456, "step": 89630 }, { "epoch": 25.788262370540853, "grad_norm": 1.275701880455017, "learning_rate": 0.001484234752589183, "loss": 0.6363, "step": 89640 }, { "epoch": 25.79113924050633, "grad_norm": 2.367741823196411, "learning_rate": 0.0014841772151898735, "loss": 0.7374, "step": 89650 }, { "epoch": 25.794016110471805, "grad_norm": 1.3608884811401367, "learning_rate": 0.0014841196777905638, "loss": 0.7452, "step": 89660 }, { "epoch": 25.796892980437285, "grad_norm": 1.2585424184799194, "learning_rate": 0.0014840621403912544, "loss": 0.6696, "step": 89670 }, { "epoch": 25.79976985040276, "grad_norm": 1.707909107208252, "learning_rate": 0.0014840046029919447, "loss": 0.5967, "step": 89680 }, { "epoch": 25.80264672036824, "grad_norm": 1.681815266609192, "learning_rate": 0.0014839470655926353, "loss": 0.8056, "step": 89690 }, { "epoch": 25.805523590333717, "grad_norm": 0.8322755694389343, "learning_rate": 0.0014838895281933258, "loss": 0.5795, "step": 89700 }, { "epoch": 25.808400460299193, "grad_norm": 1.4398555755615234, "learning_rate": 0.0014838319907940162, "loss": 0.6676, "step": 89710 }, { "epoch": 25.811277330264673, "grad_norm": 1.2160248756408691, "learning_rate": 0.0014837744533947065, "loss": 0.7364, "step": 89720 }, { "epoch": 25.81415420023015, "grad_norm": 1.9247084856033325, "learning_rate": 0.0014837169159953969, "loss": 0.6789, "step": 89730 }, { "epoch": 25.817031070195625, "grad_norm": 1.1902225017547607, "learning_rate": 0.0014836593785960874, "loss": 0.6926, "step": 89740 }, { "epoch": 25.819907940161105, "grad_norm": 1.9515652656555176, "learning_rate": 0.001483601841196778, "loss": 0.6171, "step": 89750 }, { "epoch": 25.82278481012658, "grad_norm": 4.56503963470459, "learning_rate": 0.0014835443037974684, "loss": 0.6943, "step": 89760 }, { "epoch": 25.82566168009206, "grad_norm": 0.9068484902381897, "learning_rate": 0.001483486766398159, "loss": 0.7495, "step": 89770 }, { "epoch": 25.828538550057537, "grad_norm": 1.4529671669006348, "learning_rate": 0.0014834292289988493, "loss": 0.7812, "step": 89780 }, { "epoch": 25.831415420023013, "grad_norm": 1.748602032661438, "learning_rate": 0.0014833716915995396, "loss": 0.7789, "step": 89790 }, { "epoch": 25.834292289988493, "grad_norm": 0.7143338322639465, "learning_rate": 0.0014833141542002302, "loss": 0.7107, "step": 89800 }, { "epoch": 25.83716915995397, "grad_norm": 1.2023861408233643, "learning_rate": 0.0014832566168009207, "loss": 0.7628, "step": 89810 }, { "epoch": 25.84004602991945, "grad_norm": 1.1646218299865723, "learning_rate": 0.001483199079401611, "loss": 0.6216, "step": 89820 }, { "epoch": 25.842922899884925, "grad_norm": 1.4896471500396729, "learning_rate": 0.0014831415420023017, "loss": 0.8862, "step": 89830 }, { "epoch": 25.8457997698504, "grad_norm": 2.2412312030792236, "learning_rate": 0.0014830840046029918, "loss": 0.7384, "step": 89840 }, { "epoch": 25.84867663981588, "grad_norm": 0.7455440759658813, "learning_rate": 0.0014830264672036823, "loss": 0.5555, "step": 89850 }, { "epoch": 25.851553509781358, "grad_norm": 1.2953071594238281, "learning_rate": 0.001482968929804373, "loss": 0.7287, "step": 89860 }, { "epoch": 25.854430379746834, "grad_norm": 1.4682257175445557, "learning_rate": 0.0014829113924050633, "loss": 0.6493, "step": 89870 }, { "epoch": 25.857307249712314, "grad_norm": 1.2934315204620361, "learning_rate": 0.0014828538550057538, "loss": 0.7531, "step": 89880 }, { "epoch": 25.86018411967779, "grad_norm": 1.082344651222229, "learning_rate": 0.0014827963176064444, "loss": 0.725, "step": 89890 }, { "epoch": 25.86306098964327, "grad_norm": 0.8227112889289856, "learning_rate": 0.0014827387802071345, "loss": 0.9861, "step": 89900 }, { "epoch": 25.865937859608746, "grad_norm": 2.2358977794647217, "learning_rate": 0.001482681242807825, "loss": 0.7883, "step": 89910 }, { "epoch": 25.868814729574222, "grad_norm": 2.11013126373291, "learning_rate": 0.0014826237054085156, "loss": 0.9841, "step": 89920 }, { "epoch": 25.8716915995397, "grad_norm": 1.3550939559936523, "learning_rate": 0.001482566168009206, "loss": 0.7483, "step": 89930 }, { "epoch": 25.874568469505178, "grad_norm": 1.5339878797531128, "learning_rate": 0.0014825086306098966, "loss": 0.6089, "step": 89940 }, { "epoch": 25.877445339470658, "grad_norm": 1.2378238439559937, "learning_rate": 0.0014824510932105871, "loss": 0.7363, "step": 89950 }, { "epoch": 25.880322209436134, "grad_norm": 1.7968512773513794, "learning_rate": 0.0014823935558112772, "loss": 0.6464, "step": 89960 }, { "epoch": 25.88319907940161, "grad_norm": 1.3399990797042847, "learning_rate": 0.0014823360184119678, "loss": 0.7195, "step": 89970 }, { "epoch": 25.88607594936709, "grad_norm": 0.6888872981071472, "learning_rate": 0.0014822784810126582, "loss": 0.7004, "step": 89980 }, { "epoch": 25.888952819332566, "grad_norm": 1.7237684726715088, "learning_rate": 0.0014822209436133487, "loss": 0.6576, "step": 89990 }, { "epoch": 25.891829689298042, "grad_norm": 1.2381092309951782, "learning_rate": 0.0014821634062140393, "loss": 0.7189, "step": 90000 }, { "epoch": 25.894706559263522, "grad_norm": 0.724648654460907, "learning_rate": 0.0014821058688147296, "loss": 0.6064, "step": 90010 }, { "epoch": 25.897583429228998, "grad_norm": 2.0716190338134766, "learning_rate": 0.00148204833141542, "loss": 0.7008, "step": 90020 }, { "epoch": 25.900460299194478, "grad_norm": 0.8050498962402344, "learning_rate": 0.0014819907940161105, "loss": 0.5915, "step": 90030 }, { "epoch": 25.903337169159954, "grad_norm": 0.6403930187225342, "learning_rate": 0.0014819332566168009, "loss": 0.6981, "step": 90040 }, { "epoch": 25.90621403912543, "grad_norm": 0.8841162919998169, "learning_rate": 0.0014818757192174915, "loss": 0.6556, "step": 90050 }, { "epoch": 25.90909090909091, "grad_norm": 1.2871813774108887, "learning_rate": 0.001481818181818182, "loss": 0.7299, "step": 90060 }, { "epoch": 25.911967779056386, "grad_norm": 0.8081269264221191, "learning_rate": 0.0014817606444188724, "loss": 0.6655, "step": 90070 }, { "epoch": 25.914844649021866, "grad_norm": 1.152162790298462, "learning_rate": 0.0014817031070195627, "loss": 1.0015, "step": 90080 }, { "epoch": 25.917721518987342, "grad_norm": 0.8396894931793213, "learning_rate": 0.001481645569620253, "loss": 0.6646, "step": 90090 }, { "epoch": 25.92059838895282, "grad_norm": 1.264359712600708, "learning_rate": 0.0014815880322209436, "loss": 0.619, "step": 90100 }, { "epoch": 25.923475258918298, "grad_norm": 1.8256183862686157, "learning_rate": 0.0014815304948216342, "loss": 0.7459, "step": 90110 }, { "epoch": 25.926352128883774, "grad_norm": 0.7710301876068115, "learning_rate": 0.0014814729574223245, "loss": 0.6851, "step": 90120 }, { "epoch": 25.92922899884925, "grad_norm": 1.2994980812072754, "learning_rate": 0.001481415420023015, "loss": 0.589, "step": 90130 }, { "epoch": 25.93210586881473, "grad_norm": 2.1842405796051025, "learning_rate": 0.0014813578826237054, "loss": 0.8083, "step": 90140 }, { "epoch": 25.934982738780207, "grad_norm": 1.2408027648925781, "learning_rate": 0.0014813003452243958, "loss": 0.5953, "step": 90150 }, { "epoch": 25.937859608745686, "grad_norm": 1.5635006427764893, "learning_rate": 0.0014812428078250864, "loss": 0.7011, "step": 90160 }, { "epoch": 25.940736478711163, "grad_norm": 1.7484004497528076, "learning_rate": 0.001481185270425777, "loss": 0.8095, "step": 90170 }, { "epoch": 25.94361334867664, "grad_norm": 2.111470937728882, "learning_rate": 0.0014811277330264673, "loss": 0.6906, "step": 90180 }, { "epoch": 25.94649021864212, "grad_norm": 1.0348485708236694, "learning_rate": 0.0014810701956271578, "loss": 0.9863, "step": 90190 }, { "epoch": 25.949367088607595, "grad_norm": 1.5538407564163208, "learning_rate": 0.001481012658227848, "loss": 0.7154, "step": 90200 }, { "epoch": 25.95224395857307, "grad_norm": 1.5771820545196533, "learning_rate": 0.0014809551208285385, "loss": 0.6201, "step": 90210 }, { "epoch": 25.95512082853855, "grad_norm": 0.9078784584999084, "learning_rate": 0.001480897583429229, "loss": 0.6821, "step": 90220 }, { "epoch": 25.957997698504027, "grad_norm": 1.2827903032302856, "learning_rate": 0.0014808400460299194, "loss": 0.6948, "step": 90230 }, { "epoch": 25.960874568469507, "grad_norm": 0.9219550490379333, "learning_rate": 0.00148078250863061, "loss": 0.6684, "step": 90240 }, { "epoch": 25.963751438434983, "grad_norm": 1.3624091148376465, "learning_rate": 0.0014807249712313003, "loss": 0.593, "step": 90250 }, { "epoch": 25.96662830840046, "grad_norm": 1.6782621145248413, "learning_rate": 0.0014806674338319907, "loss": 0.6988, "step": 90260 }, { "epoch": 25.96950517836594, "grad_norm": 1.391120195388794, "learning_rate": 0.0014806098964326813, "loss": 0.6875, "step": 90270 }, { "epoch": 25.972382048331415, "grad_norm": 2.054089307785034, "learning_rate": 0.0014805523590333718, "loss": 0.7742, "step": 90280 }, { "epoch": 25.975258918296895, "grad_norm": 2.5878162384033203, "learning_rate": 0.0014804948216340622, "loss": 0.7698, "step": 90290 }, { "epoch": 25.97813578826237, "grad_norm": 1.2360635995864868, "learning_rate": 0.0014804372842347527, "loss": 0.6772, "step": 90300 }, { "epoch": 25.981012658227847, "grad_norm": 2.1255104541778564, "learning_rate": 0.0014803797468354429, "loss": 0.8198, "step": 90310 }, { "epoch": 25.983889528193327, "grad_norm": 0.9826225638389587, "learning_rate": 0.0014803222094361334, "loss": 0.6899, "step": 90320 }, { "epoch": 25.986766398158803, "grad_norm": 1.6835753917694092, "learning_rate": 0.001480264672036824, "loss": 0.582, "step": 90330 }, { "epoch": 25.98964326812428, "grad_norm": 2.0962107181549072, "learning_rate": 0.0014802071346375143, "loss": 0.6005, "step": 90340 }, { "epoch": 25.99252013808976, "grad_norm": 2.6968228816986084, "learning_rate": 0.001480149597238205, "loss": 0.7574, "step": 90350 }, { "epoch": 25.995397008055235, "grad_norm": 1.1149630546569824, "learning_rate": 0.0014800920598388955, "loss": 0.7684, "step": 90360 }, { "epoch": 25.998273878020715, "grad_norm": 1.8573466539382935, "learning_rate": 0.0014800345224395856, "loss": 0.7194, "step": 90370 }, { "epoch": 26.00115074798619, "grad_norm": 0.6885170936584473, "learning_rate": 0.0014799769850402762, "loss": 0.7325, "step": 90380 }, { "epoch": 26.004027617951667, "grad_norm": 1.0796196460723877, "learning_rate": 0.0014799194476409667, "loss": 0.7292, "step": 90390 }, { "epoch": 26.006904487917147, "grad_norm": 1.7973506450653076, "learning_rate": 0.001479861910241657, "loss": 0.819, "step": 90400 }, { "epoch": 26.009781357882623, "grad_norm": 0.9757175445556641, "learning_rate": 0.0014798043728423476, "loss": 0.6974, "step": 90410 }, { "epoch": 26.0126582278481, "grad_norm": 1.0629079341888428, "learning_rate": 0.001479746835443038, "loss": 0.5173, "step": 90420 }, { "epoch": 26.01553509781358, "grad_norm": 2.2612831592559814, "learning_rate": 0.0014796892980437283, "loss": 0.7055, "step": 90430 }, { "epoch": 26.018411967779056, "grad_norm": 1.103598952293396, "learning_rate": 0.001479631760644419, "loss": 0.6882, "step": 90440 }, { "epoch": 26.021288837744535, "grad_norm": 1.1304200887680054, "learning_rate": 0.0014795742232451092, "loss": 0.6599, "step": 90450 }, { "epoch": 26.02416570771001, "grad_norm": 0.8403756618499756, "learning_rate": 0.0014795166858457998, "loss": 0.6396, "step": 90460 }, { "epoch": 26.027042577675488, "grad_norm": 2.019291639328003, "learning_rate": 0.0014794591484464904, "loss": 0.6087, "step": 90470 }, { "epoch": 26.029919447640967, "grad_norm": 1.0895556211471558, "learning_rate": 0.0014794016110471807, "loss": 0.5963, "step": 90480 }, { "epoch": 26.032796317606444, "grad_norm": 1.440704345703125, "learning_rate": 0.001479344073647871, "loss": 0.705, "step": 90490 }, { "epoch": 26.035673187571923, "grad_norm": 1.4272246360778809, "learning_rate": 0.0014792865362485616, "loss": 0.6859, "step": 90500 }, { "epoch": 26.0385500575374, "grad_norm": 1.1416791677474976, "learning_rate": 0.001479228998849252, "loss": 0.6211, "step": 90510 }, { "epoch": 26.041426927502876, "grad_norm": 0.8819931149482727, "learning_rate": 0.0014791714614499425, "loss": 0.7091, "step": 90520 }, { "epoch": 26.044303797468356, "grad_norm": 0.7032570838928223, "learning_rate": 0.001479113924050633, "loss": 0.5677, "step": 90530 }, { "epoch": 26.04718066743383, "grad_norm": 1.0572789907455444, "learning_rate": 0.0014790563866513234, "loss": 0.556, "step": 90540 }, { "epoch": 26.050057537399308, "grad_norm": 1.346640706062317, "learning_rate": 0.0014789988492520138, "loss": 0.7081, "step": 90550 }, { "epoch": 26.052934407364788, "grad_norm": 2.161802291870117, "learning_rate": 0.0014789413118527041, "loss": 0.6864, "step": 90560 }, { "epoch": 26.055811277330264, "grad_norm": 1.549265742301941, "learning_rate": 0.0014788837744533947, "loss": 0.7491, "step": 90570 }, { "epoch": 26.058688147295744, "grad_norm": 1.8592705726623535, "learning_rate": 0.0014788262370540853, "loss": 0.76, "step": 90580 }, { "epoch": 26.06156501726122, "grad_norm": 1.3324402570724487, "learning_rate": 0.0014787686996547756, "loss": 0.6731, "step": 90590 }, { "epoch": 26.064441887226696, "grad_norm": 0.8689659237861633, "learning_rate": 0.0014787111622554662, "loss": 0.6506, "step": 90600 }, { "epoch": 26.067318757192176, "grad_norm": 0.9740495085716248, "learning_rate": 0.0014786536248561565, "loss": 0.665, "step": 90610 }, { "epoch": 26.070195627157652, "grad_norm": 1.1536211967468262, "learning_rate": 0.0014785960874568469, "loss": 0.5828, "step": 90620 }, { "epoch": 26.073072497123132, "grad_norm": 1.664230465888977, "learning_rate": 0.0014785385500575374, "loss": 0.7446, "step": 90630 }, { "epoch": 26.075949367088608, "grad_norm": 1.5866539478302002, "learning_rate": 0.001478481012658228, "loss": 0.6064, "step": 90640 }, { "epoch": 26.078826237054084, "grad_norm": 0.8911318778991699, "learning_rate": 0.0014784234752589184, "loss": 0.6506, "step": 90650 }, { "epoch": 26.081703107019564, "grad_norm": 0.9452614784240723, "learning_rate": 0.001478365937859609, "loss": 0.7344, "step": 90660 }, { "epoch": 26.08457997698504, "grad_norm": 1.5124765634536743, "learning_rate": 0.001478308400460299, "loss": 0.7131, "step": 90670 }, { "epoch": 26.087456846950516, "grad_norm": 1.2089018821716309, "learning_rate": 0.0014782508630609896, "loss": 0.6371, "step": 90680 }, { "epoch": 26.090333716915996, "grad_norm": 0.824756383895874, "learning_rate": 0.0014781933256616802, "loss": 0.6405, "step": 90690 }, { "epoch": 26.093210586881472, "grad_norm": 1.36720609664917, "learning_rate": 0.0014781357882623705, "loss": 0.6222, "step": 90700 }, { "epoch": 26.096087456846952, "grad_norm": 2.0585665702819824, "learning_rate": 0.001478078250863061, "loss": 0.6649, "step": 90710 }, { "epoch": 26.09896432681243, "grad_norm": 1.6025452613830566, "learning_rate": 0.0014780207134637516, "loss": 0.8514, "step": 90720 }, { "epoch": 26.101841196777904, "grad_norm": 1.5981868505477905, "learning_rate": 0.0014779631760644418, "loss": 0.6678, "step": 90730 }, { "epoch": 26.104718066743384, "grad_norm": 0.6754271388053894, "learning_rate": 0.0014779056386651323, "loss": 0.655, "step": 90740 }, { "epoch": 26.10759493670886, "grad_norm": 0.8266879320144653, "learning_rate": 0.001477848101265823, "loss": 0.5998, "step": 90750 }, { "epoch": 26.110471806674337, "grad_norm": 1.1591767072677612, "learning_rate": 0.0014777905638665133, "loss": 0.4815, "step": 90760 }, { "epoch": 26.113348676639816, "grad_norm": 1.668770432472229, "learning_rate": 0.0014777330264672038, "loss": 0.6226, "step": 90770 }, { "epoch": 26.116225546605293, "grad_norm": 1.0870972871780396, "learning_rate": 0.0014776754890678942, "loss": 0.5961, "step": 90780 }, { "epoch": 26.119102416570772, "grad_norm": 3.278297185897827, "learning_rate": 0.0014776179516685845, "loss": 0.582, "step": 90790 }, { "epoch": 26.12197928653625, "grad_norm": 0.9907582402229309, "learning_rate": 0.001477560414269275, "loss": 0.56, "step": 90800 }, { "epoch": 26.124856156501725, "grad_norm": 3.3261120319366455, "learning_rate": 0.0014775028768699654, "loss": 0.7786, "step": 90810 }, { "epoch": 26.127733026467205, "grad_norm": 0.8164195418357849, "learning_rate": 0.001477445339470656, "loss": 0.6218, "step": 90820 }, { "epoch": 26.13060989643268, "grad_norm": 1.39687979221344, "learning_rate": 0.0014773878020713466, "loss": 0.8162, "step": 90830 }, { "epoch": 26.13348676639816, "grad_norm": 1.0114073753356934, "learning_rate": 0.001477330264672037, "loss": 0.6632, "step": 90840 }, { "epoch": 26.136363636363637, "grad_norm": 1.977674126625061, "learning_rate": 0.0014772727272727272, "loss": 0.7262, "step": 90850 }, { "epoch": 26.139240506329113, "grad_norm": 1.1992883682250977, "learning_rate": 0.0014772151898734178, "loss": 0.5395, "step": 90860 }, { "epoch": 26.142117376294593, "grad_norm": 1.3306697607040405, "learning_rate": 0.0014771576524741082, "loss": 0.6675, "step": 90870 }, { "epoch": 26.14499424626007, "grad_norm": 0.8717645406723022, "learning_rate": 0.0014771001150747987, "loss": 0.5479, "step": 90880 }, { "epoch": 26.147871116225545, "grad_norm": 0.7346360683441162, "learning_rate": 0.001477042577675489, "loss": 0.522, "step": 90890 }, { "epoch": 26.150747986191025, "grad_norm": 1.6436102390289307, "learning_rate": 0.0014769850402761796, "loss": 0.6885, "step": 90900 }, { "epoch": 26.1536248561565, "grad_norm": 1.5437612533569336, "learning_rate": 0.00147692750287687, "loss": 0.6653, "step": 90910 }, { "epoch": 26.15650172612198, "grad_norm": 1.0371294021606445, "learning_rate": 0.0014768699654775603, "loss": 0.6103, "step": 90920 }, { "epoch": 26.159378596087457, "grad_norm": 1.9819947481155396, "learning_rate": 0.0014768124280782509, "loss": 0.6269, "step": 90930 }, { "epoch": 26.162255466052933, "grad_norm": 1.337193250656128, "learning_rate": 0.0014767548906789415, "loss": 0.826, "step": 90940 }, { "epoch": 26.165132336018413, "grad_norm": 1.513700246810913, "learning_rate": 0.0014766973532796318, "loss": 0.8304, "step": 90950 }, { "epoch": 26.16800920598389, "grad_norm": 1.1833926439285278, "learning_rate": 0.0014766398158803224, "loss": 0.9087, "step": 90960 }, { "epoch": 26.170886075949365, "grad_norm": 0.7506342530250549, "learning_rate": 0.0014765822784810127, "loss": 0.5397, "step": 90970 }, { "epoch": 26.173762945914845, "grad_norm": 0.9115679264068604, "learning_rate": 0.001476524741081703, "loss": 0.6513, "step": 90980 }, { "epoch": 26.17663981588032, "grad_norm": 1.2695643901824951, "learning_rate": 0.0014764672036823936, "loss": 0.5967, "step": 90990 }, { "epoch": 26.1795166858458, "grad_norm": 1.2862671613693237, "learning_rate": 0.001476409666283084, "loss": 0.6134, "step": 91000 }, { "epoch": 26.182393555811277, "grad_norm": 1.2985624074935913, "learning_rate": 0.0014763521288837745, "loss": 0.7365, "step": 91010 }, { "epoch": 26.185270425776753, "grad_norm": 1.4088326692581177, "learning_rate": 0.001476294591484465, "loss": 0.7893, "step": 91020 }, { "epoch": 26.188147295742233, "grad_norm": 0.7311326265335083, "learning_rate": 0.0014762370540851552, "loss": 0.6486, "step": 91030 }, { "epoch": 26.19102416570771, "grad_norm": 0.741655170917511, "learning_rate": 0.0014761795166858458, "loss": 0.5241, "step": 91040 }, { "epoch": 26.19390103567319, "grad_norm": 0.8555836081504822, "learning_rate": 0.0014761219792865364, "loss": 0.5462, "step": 91050 }, { "epoch": 26.196777905638665, "grad_norm": 1.4390089511871338, "learning_rate": 0.0014760644418872267, "loss": 0.7159, "step": 91060 }, { "epoch": 26.19965477560414, "grad_norm": 1.209151029586792, "learning_rate": 0.0014760069044879173, "loss": 0.7287, "step": 91070 }, { "epoch": 26.20253164556962, "grad_norm": 1.0092315673828125, "learning_rate": 0.0014759493670886076, "loss": 0.7243, "step": 91080 }, { "epoch": 26.205408515535098, "grad_norm": 1.336525559425354, "learning_rate": 0.001475891829689298, "loss": 0.7269, "step": 91090 }, { "epoch": 26.208285385500574, "grad_norm": 2.351243019104004, "learning_rate": 0.0014758342922899885, "loss": 0.6897, "step": 91100 }, { "epoch": 26.211162255466053, "grad_norm": 1.9583643674850464, "learning_rate": 0.0014757767548906789, "loss": 0.8391, "step": 91110 }, { "epoch": 26.21403912543153, "grad_norm": 0.8492492437362671, "learning_rate": 0.0014757192174913694, "loss": 0.6766, "step": 91120 }, { "epoch": 26.21691599539701, "grad_norm": 1.0086102485656738, "learning_rate": 0.00147566168009206, "loss": 0.7613, "step": 91130 }, { "epoch": 26.219792865362486, "grad_norm": 1.0566306114196777, "learning_rate": 0.0014756041426927501, "loss": 0.5699, "step": 91140 }, { "epoch": 26.222669735327962, "grad_norm": 1.0435587167739868, "learning_rate": 0.0014755466052934407, "loss": 0.7786, "step": 91150 }, { "epoch": 26.22554660529344, "grad_norm": 1.3297669887542725, "learning_rate": 0.0014754890678941313, "loss": 0.7651, "step": 91160 }, { "epoch": 26.228423475258918, "grad_norm": 1.7520087957382202, "learning_rate": 0.0014754315304948216, "loss": 0.6158, "step": 91170 }, { "epoch": 26.231300345224398, "grad_norm": 1.2406760454177856, "learning_rate": 0.0014753739930955122, "loss": 0.691, "step": 91180 }, { "epoch": 26.234177215189874, "grad_norm": 1.0934245586395264, "learning_rate": 0.0014753164556962027, "loss": 0.7625, "step": 91190 }, { "epoch": 26.23705408515535, "grad_norm": 1.02867591381073, "learning_rate": 0.0014752589182968929, "loss": 0.7493, "step": 91200 }, { "epoch": 26.23993095512083, "grad_norm": 1.6359783411026, "learning_rate": 0.0014752013808975834, "loss": 0.8004, "step": 91210 }, { "epoch": 26.242807825086306, "grad_norm": 2.6727163791656494, "learning_rate": 0.001475143843498274, "loss": 0.8601, "step": 91220 }, { "epoch": 26.245684695051782, "grad_norm": 1.7073637247085571, "learning_rate": 0.0014750863060989643, "loss": 0.6384, "step": 91230 }, { "epoch": 26.248561565017262, "grad_norm": 0.9436203837394714, "learning_rate": 0.001475028768699655, "loss": 0.4596, "step": 91240 }, { "epoch": 26.251438434982738, "grad_norm": 2.2234973907470703, "learning_rate": 0.0014749712313003452, "loss": 0.482, "step": 91250 }, { "epoch": 26.254315304948218, "grad_norm": 0.9480252861976624, "learning_rate": 0.0014749136939010356, "loss": 0.5518, "step": 91260 }, { "epoch": 26.257192174913694, "grad_norm": 1.3345180749893188, "learning_rate": 0.0014748561565017262, "loss": 0.682, "step": 91270 }, { "epoch": 26.26006904487917, "grad_norm": 1.656172752380371, "learning_rate": 0.0014747986191024165, "loss": 0.8449, "step": 91280 }, { "epoch": 26.26294591484465, "grad_norm": 1.6403807401657104, "learning_rate": 0.001474741081703107, "loss": 0.7344, "step": 91290 }, { "epoch": 26.265822784810126, "grad_norm": 1.6178038120269775, "learning_rate": 0.0014746835443037976, "loss": 0.7784, "step": 91300 }, { "epoch": 26.268699654775602, "grad_norm": 1.3044884204864502, "learning_rate": 0.001474626006904488, "loss": 0.7079, "step": 91310 }, { "epoch": 26.271576524741082, "grad_norm": 1.7441198825836182, "learning_rate": 0.0014745684695051783, "loss": 0.7139, "step": 91320 }, { "epoch": 26.27445339470656, "grad_norm": 1.8768506050109863, "learning_rate": 0.001474510932105869, "loss": 0.7412, "step": 91330 }, { "epoch": 26.277330264672038, "grad_norm": 1.03408682346344, "learning_rate": 0.0014744533947065592, "loss": 0.6404, "step": 91340 }, { "epoch": 26.280207134637514, "grad_norm": 0.6484766006469727, "learning_rate": 0.0014743958573072498, "loss": 0.6211, "step": 91350 }, { "epoch": 26.28308400460299, "grad_norm": 1.0941697359085083, "learning_rate": 0.0014743383199079401, "loss": 0.7724, "step": 91360 }, { "epoch": 26.28596087456847, "grad_norm": 1.3634063005447388, "learning_rate": 0.0014742807825086307, "loss": 0.6473, "step": 91370 }, { "epoch": 26.288837744533947, "grad_norm": 0.8726528286933899, "learning_rate": 0.001474223245109321, "loss": 0.5902, "step": 91380 }, { "epoch": 26.291714614499426, "grad_norm": 1.7540613412857056, "learning_rate": 0.0014741657077100114, "loss": 0.6763, "step": 91390 }, { "epoch": 26.294591484464902, "grad_norm": 1.208851933479309, "learning_rate": 0.001474108170310702, "loss": 0.8352, "step": 91400 }, { "epoch": 26.29746835443038, "grad_norm": 1.2941802740097046, "learning_rate": 0.0014740506329113925, "loss": 0.7597, "step": 91410 }, { "epoch": 26.30034522439586, "grad_norm": 1.0547574758529663, "learning_rate": 0.0014739930955120829, "loss": 0.5995, "step": 91420 }, { "epoch": 26.303222094361335, "grad_norm": 2.0996809005737305, "learning_rate": 0.0014739355581127734, "loss": 0.8315, "step": 91430 }, { "epoch": 26.30609896432681, "grad_norm": 1.2339684963226318, "learning_rate": 0.0014738780207134638, "loss": 0.5511, "step": 91440 }, { "epoch": 26.30897583429229, "grad_norm": 1.2292988300323486, "learning_rate": 0.0014738204833141541, "loss": 0.7572, "step": 91450 }, { "epoch": 26.311852704257767, "grad_norm": 1.2685651779174805, "learning_rate": 0.0014737629459148447, "loss": 0.6835, "step": 91460 }, { "epoch": 26.314729574223247, "grad_norm": 0.8267917633056641, "learning_rate": 0.001473705408515535, "loss": 0.6514, "step": 91470 }, { "epoch": 26.317606444188723, "grad_norm": 1.8777753114700317, "learning_rate": 0.0014736478711162256, "loss": 0.7442, "step": 91480 }, { "epoch": 26.3204833141542, "grad_norm": 1.4576767683029175, "learning_rate": 0.0014735903337169162, "loss": 0.6271, "step": 91490 }, { "epoch": 26.32336018411968, "grad_norm": 1.725435495376587, "learning_rate": 0.0014735327963176063, "loss": 0.6594, "step": 91500 }, { "epoch": 26.326237054085155, "grad_norm": 1.0646637678146362, "learning_rate": 0.0014734752589182969, "loss": 0.6572, "step": 91510 }, { "epoch": 26.32911392405063, "grad_norm": 1.504990816116333, "learning_rate": 0.0014734177215189874, "loss": 0.5897, "step": 91520 }, { "epoch": 26.33199079401611, "grad_norm": 1.1271687746047974, "learning_rate": 0.0014733601841196778, "loss": 0.5935, "step": 91530 }, { "epoch": 26.334867663981587, "grad_norm": 1.0958796739578247, "learning_rate": 0.0014733026467203683, "loss": 0.7828, "step": 91540 }, { "epoch": 26.337744533947067, "grad_norm": 1.7035571336746216, "learning_rate": 0.001473245109321059, "loss": 0.7559, "step": 91550 }, { "epoch": 26.340621403912543, "grad_norm": 1.5038211345672607, "learning_rate": 0.001473187571921749, "loss": 0.6505, "step": 91560 }, { "epoch": 26.34349827387802, "grad_norm": 1.5292268991470337, "learning_rate": 0.0014731300345224396, "loss": 0.666, "step": 91570 }, { "epoch": 26.3463751438435, "grad_norm": 1.56719172000885, "learning_rate": 0.00147307249712313, "loss": 0.9068, "step": 91580 }, { "epoch": 26.349252013808975, "grad_norm": 0.9989093542098999, "learning_rate": 0.0014730149597238205, "loss": 0.7949, "step": 91590 }, { "epoch": 26.352128883774455, "grad_norm": 0.7253686189651489, "learning_rate": 0.001472957422324511, "loss": 0.5167, "step": 91600 }, { "epoch": 26.35500575373993, "grad_norm": 0.8479141592979431, "learning_rate": 0.0014728998849252014, "loss": 0.6405, "step": 91610 }, { "epoch": 26.357882623705407, "grad_norm": 0.8457096219062805, "learning_rate": 0.0014728423475258918, "loss": 0.6299, "step": 91620 }, { "epoch": 26.360759493670887, "grad_norm": 1.6824772357940674, "learning_rate": 0.0014727848101265823, "loss": 0.6692, "step": 91630 }, { "epoch": 26.363636363636363, "grad_norm": 0.754021406173706, "learning_rate": 0.0014727272727272727, "loss": 0.8323, "step": 91640 }, { "epoch": 26.36651323360184, "grad_norm": 1.4269145727157593, "learning_rate": 0.0014726697353279633, "loss": 0.7373, "step": 91650 }, { "epoch": 26.36939010356732, "grad_norm": 1.0759443044662476, "learning_rate": 0.0014726121979286538, "loss": 0.706, "step": 91660 }, { "epoch": 26.372266973532795, "grad_norm": 1.5159565210342407, "learning_rate": 0.0014725546605293442, "loss": 0.6272, "step": 91670 }, { "epoch": 26.375143843498275, "grad_norm": 0.7205740213394165, "learning_rate": 0.0014724971231300345, "loss": 0.5412, "step": 91680 }, { "epoch": 26.37802071346375, "grad_norm": 0.8951582312583923, "learning_rate": 0.0014724395857307249, "loss": 0.7493, "step": 91690 }, { "epoch": 26.380897583429228, "grad_norm": 0.9819085597991943, "learning_rate": 0.0014723820483314154, "loss": 0.8725, "step": 91700 }, { "epoch": 26.383774453394707, "grad_norm": 1.476949691772461, "learning_rate": 0.001472324510932106, "loss": 0.809, "step": 91710 }, { "epoch": 26.386651323360184, "grad_norm": 0.9697636365890503, "learning_rate": 0.0014722669735327963, "loss": 0.8325, "step": 91720 }, { "epoch": 26.389528193325663, "grad_norm": 1.0302194356918335, "learning_rate": 0.001472209436133487, "loss": 0.6339, "step": 91730 }, { "epoch": 26.39240506329114, "grad_norm": 1.2058722972869873, "learning_rate": 0.0014721518987341772, "loss": 0.5906, "step": 91740 }, { "epoch": 26.395281933256616, "grad_norm": 1.3749068975448608, "learning_rate": 0.0014720943613348676, "loss": 0.7061, "step": 91750 }, { "epoch": 26.398158803222096, "grad_norm": 1.828069806098938, "learning_rate": 0.0014720368239355582, "loss": 0.8547, "step": 91760 }, { "epoch": 26.40103567318757, "grad_norm": 1.2672890424728394, "learning_rate": 0.0014719792865362487, "loss": 0.9411, "step": 91770 }, { "epoch": 26.403912543153048, "grad_norm": 1.5417699813842773, "learning_rate": 0.001471921749136939, "loss": 0.9188, "step": 91780 }, { "epoch": 26.406789413118528, "grad_norm": 2.696767568588257, "learning_rate": 0.0014718642117376296, "loss": 0.7068, "step": 91790 }, { "epoch": 26.409666283084004, "grad_norm": 1.208600640296936, "learning_rate": 0.0014718066743383198, "loss": 0.9144, "step": 91800 }, { "epoch": 26.412543153049484, "grad_norm": 1.185305118560791, "learning_rate": 0.0014717491369390103, "loss": 0.5898, "step": 91810 }, { "epoch": 26.41542002301496, "grad_norm": 1.7118382453918457, "learning_rate": 0.0014716915995397009, "loss": 0.5881, "step": 91820 }, { "epoch": 26.418296892980436, "grad_norm": 0.9939917325973511, "learning_rate": 0.0014716340621403912, "loss": 0.6912, "step": 91830 }, { "epoch": 26.421173762945916, "grad_norm": 1.3661658763885498, "learning_rate": 0.0014715765247410818, "loss": 0.597, "step": 91840 }, { "epoch": 26.424050632911392, "grad_norm": 1.3298221826553345, "learning_rate": 0.0014715189873417724, "loss": 0.5809, "step": 91850 }, { "epoch": 26.42692750287687, "grad_norm": 1.3984224796295166, "learning_rate": 0.0014714614499424625, "loss": 0.713, "step": 91860 }, { "epoch": 26.429804372842348, "grad_norm": 1.6899482011795044, "learning_rate": 0.001471403912543153, "loss": 0.6395, "step": 91870 }, { "epoch": 26.432681242807824, "grad_norm": 0.9123204350471497, "learning_rate": 0.0014713463751438436, "loss": 0.7491, "step": 91880 }, { "epoch": 26.435558112773304, "grad_norm": 1.6279621124267578, "learning_rate": 0.001471288837744534, "loss": 0.817, "step": 91890 }, { "epoch": 26.43843498273878, "grad_norm": 0.7939938902854919, "learning_rate": 0.0014712313003452245, "loss": 0.6587, "step": 91900 }, { "epoch": 26.441311852704256, "grad_norm": 0.9726763367652893, "learning_rate": 0.0014711737629459149, "loss": 0.6147, "step": 91910 }, { "epoch": 26.444188722669736, "grad_norm": 0.8719043731689453, "learning_rate": 0.0014711162255466052, "loss": 0.6478, "step": 91920 }, { "epoch": 26.447065592635212, "grad_norm": 0.9905012845993042, "learning_rate": 0.0014710586881472958, "loss": 0.5171, "step": 91930 }, { "epoch": 26.449942462600692, "grad_norm": 1.074767827987671, "learning_rate": 0.0014710011507479861, "loss": 0.7048, "step": 91940 }, { "epoch": 26.45281933256617, "grad_norm": 2.5738842487335205, "learning_rate": 0.0014709436133486767, "loss": 0.765, "step": 91950 }, { "epoch": 26.455696202531644, "grad_norm": 1.3753025531768799, "learning_rate": 0.0014708860759493673, "loss": 0.8103, "step": 91960 }, { "epoch": 26.458573072497124, "grad_norm": 1.1332123279571533, "learning_rate": 0.0014708285385500574, "loss": 0.8428, "step": 91970 }, { "epoch": 26.4614499424626, "grad_norm": 0.8207932114601135, "learning_rate": 0.001470771001150748, "loss": 0.7202, "step": 91980 }, { "epoch": 26.464326812428077, "grad_norm": 1.1007697582244873, "learning_rate": 0.0014707134637514385, "loss": 0.5807, "step": 91990 }, { "epoch": 26.467203682393556, "grad_norm": 1.0842350721359253, "learning_rate": 0.0014706559263521289, "loss": 0.5768, "step": 92000 }, { "epoch": 26.470080552359033, "grad_norm": 1.848252773284912, "learning_rate": 0.0014705983889528194, "loss": 0.6005, "step": 92010 }, { "epoch": 26.472957422324512, "grad_norm": 1.6917693614959717, "learning_rate": 0.00147054085155351, "loss": 0.8646, "step": 92020 }, { "epoch": 26.47583429228999, "grad_norm": 0.8893348574638367, "learning_rate": 0.0014704833141542001, "loss": 0.7478, "step": 92030 }, { "epoch": 26.478711162255465, "grad_norm": 0.958080530166626, "learning_rate": 0.0014704257767548907, "loss": 0.6702, "step": 92040 }, { "epoch": 26.481588032220944, "grad_norm": 0.8342671394348145, "learning_rate": 0.001470368239355581, "loss": 0.8083, "step": 92050 }, { "epoch": 26.48446490218642, "grad_norm": 1.6176254749298096, "learning_rate": 0.0014703107019562716, "loss": 0.734, "step": 92060 }, { "epoch": 26.4873417721519, "grad_norm": 1.422114372253418, "learning_rate": 0.0014702531645569622, "loss": 0.6921, "step": 92070 }, { "epoch": 26.490218642117377, "grad_norm": 1.2217546701431274, "learning_rate": 0.0014701956271576525, "loss": 0.5606, "step": 92080 }, { "epoch": 26.493095512082853, "grad_norm": 1.6021651029586792, "learning_rate": 0.0014701380897583429, "loss": 0.7355, "step": 92090 }, { "epoch": 26.495972382048333, "grad_norm": 1.2897485494613647, "learning_rate": 0.0014700805523590334, "loss": 0.6482, "step": 92100 }, { "epoch": 26.49884925201381, "grad_norm": 1.7280080318450928, "learning_rate": 0.0014700230149597238, "loss": 0.6785, "step": 92110 }, { "epoch": 26.501726121979285, "grad_norm": 0.6833740472793579, "learning_rate": 0.0014699654775604143, "loss": 0.6881, "step": 92120 }, { "epoch": 26.504602991944765, "grad_norm": 1.5898345708847046, "learning_rate": 0.001469907940161105, "loss": 0.6701, "step": 92130 }, { "epoch": 26.50747986191024, "grad_norm": 1.1860512495040894, "learning_rate": 0.0014698504027617952, "loss": 0.8128, "step": 92140 }, { "epoch": 26.51035673187572, "grad_norm": 1.0005966424942017, "learning_rate": 0.0014697928653624856, "loss": 0.5577, "step": 92150 }, { "epoch": 26.513233601841197, "grad_norm": 0.9621253609657288, "learning_rate": 0.001469735327963176, "loss": 0.6265, "step": 92160 }, { "epoch": 26.516110471806673, "grad_norm": 1.7549760341644287, "learning_rate": 0.0014696777905638665, "loss": 0.8177, "step": 92170 }, { "epoch": 26.518987341772153, "grad_norm": 1.2238733768463135, "learning_rate": 0.001469620253164557, "loss": 0.7018, "step": 92180 }, { "epoch": 26.52186421173763, "grad_norm": 1.0709497928619385, "learning_rate": 0.0014695627157652474, "loss": 0.6596, "step": 92190 }, { "epoch": 26.524741081703105, "grad_norm": 1.004793405532837, "learning_rate": 0.001469505178365938, "loss": 0.8016, "step": 92200 }, { "epoch": 26.527617951668585, "grad_norm": 1.8777976036071777, "learning_rate": 0.0014694476409666283, "loss": 0.5497, "step": 92210 }, { "epoch": 26.53049482163406, "grad_norm": 1.2516919374465942, "learning_rate": 0.0014693901035673187, "loss": 0.8134, "step": 92220 }, { "epoch": 26.53337169159954, "grad_norm": 1.7833360433578491, "learning_rate": 0.0014693325661680092, "loss": 0.7681, "step": 92230 }, { "epoch": 26.536248561565017, "grad_norm": 1.2868765592575073, "learning_rate": 0.0014692750287686998, "loss": 0.6636, "step": 92240 }, { "epoch": 26.539125431530493, "grad_norm": 1.2088409662246704, "learning_rate": 0.0014692174913693901, "loss": 0.639, "step": 92250 }, { "epoch": 26.542002301495973, "grad_norm": 1.046557068824768, "learning_rate": 0.0014691599539700807, "loss": 0.7452, "step": 92260 }, { "epoch": 26.54487917146145, "grad_norm": 0.7940595149993896, "learning_rate": 0.0014691024165707708, "loss": 0.6196, "step": 92270 }, { "epoch": 26.54775604142693, "grad_norm": 1.6006486415863037, "learning_rate": 0.0014690448791714614, "loss": 0.6099, "step": 92280 }, { "epoch": 26.550632911392405, "grad_norm": 1.0830352306365967, "learning_rate": 0.001468987341772152, "loss": 0.8227, "step": 92290 }, { "epoch": 26.55350978135788, "grad_norm": 1.2585548162460327, "learning_rate": 0.0014689298043728423, "loss": 0.7068, "step": 92300 }, { "epoch": 26.55638665132336, "grad_norm": 1.3457598686218262, "learning_rate": 0.0014688722669735329, "loss": 0.6886, "step": 92310 }, { "epoch": 26.559263521288837, "grad_norm": 1.4433156251907349, "learning_rate": 0.0014688147295742234, "loss": 0.7038, "step": 92320 }, { "epoch": 26.562140391254314, "grad_norm": 1.9733614921569824, "learning_rate": 0.0014687571921749136, "loss": 1.022, "step": 92330 }, { "epoch": 26.565017261219793, "grad_norm": 1.005523920059204, "learning_rate": 0.0014686996547756041, "loss": 0.7501, "step": 92340 }, { "epoch": 26.56789413118527, "grad_norm": 1.8308807611465454, "learning_rate": 0.0014686421173762947, "loss": 0.7908, "step": 92350 }, { "epoch": 26.57077100115075, "grad_norm": 1.5818824768066406, "learning_rate": 0.001468584579976985, "loss": 0.77, "step": 92360 }, { "epoch": 26.573647871116226, "grad_norm": 1.6100568771362305, "learning_rate": 0.0014685270425776756, "loss": 0.6518, "step": 92370 }, { "epoch": 26.576524741081702, "grad_norm": 0.7610489726066589, "learning_rate": 0.001468469505178366, "loss": 0.6591, "step": 92380 }, { "epoch": 26.57940161104718, "grad_norm": 1.1294918060302734, "learning_rate": 0.0014684119677790563, "loss": 0.7807, "step": 92390 }, { "epoch": 26.582278481012658, "grad_norm": 1.1935386657714844, "learning_rate": 0.0014683544303797469, "loss": 0.6882, "step": 92400 }, { "epoch": 26.585155350978134, "grad_norm": 0.9622103571891785, "learning_rate": 0.0014682968929804372, "loss": 0.6688, "step": 92410 }, { "epoch": 26.588032220943614, "grad_norm": 1.3888769149780273, "learning_rate": 0.0014682393555811278, "loss": 0.5774, "step": 92420 }, { "epoch": 26.59090909090909, "grad_norm": 2.0177974700927734, "learning_rate": 0.0014681818181818183, "loss": 0.8248, "step": 92430 }, { "epoch": 26.59378596087457, "grad_norm": 2.447477102279663, "learning_rate": 0.0014681242807825087, "loss": 0.759, "step": 92440 }, { "epoch": 26.596662830840046, "grad_norm": 1.0054469108581543, "learning_rate": 0.001468066743383199, "loss": 0.7005, "step": 92450 }, { "epoch": 26.599539700805522, "grad_norm": 1.0078495740890503, "learning_rate": 0.0014680092059838896, "loss": 0.589, "step": 92460 }, { "epoch": 26.602416570771002, "grad_norm": 1.1030811071395874, "learning_rate": 0.00146795166858458, "loss": 0.7751, "step": 92470 }, { "epoch": 26.605293440736478, "grad_norm": 1.1259589195251465, "learning_rate": 0.0014678941311852705, "loss": 0.5383, "step": 92480 }, { "epoch": 26.608170310701958, "grad_norm": 0.9695020914077759, "learning_rate": 0.0014678365937859609, "loss": 0.6149, "step": 92490 }, { "epoch": 26.611047180667434, "grad_norm": 1.7375251054763794, "learning_rate": 0.0014677790563866514, "loss": 0.6762, "step": 92500 }, { "epoch": 26.61392405063291, "grad_norm": 1.7425775527954102, "learning_rate": 0.0014677215189873418, "loss": 0.7216, "step": 92510 }, { "epoch": 26.61680092059839, "grad_norm": 1.716187596321106, "learning_rate": 0.0014676639815880321, "loss": 0.6836, "step": 92520 }, { "epoch": 26.619677790563866, "grad_norm": 0.8043881058692932, "learning_rate": 0.0014676064441887227, "loss": 0.5822, "step": 92530 }, { "epoch": 26.622554660529342, "grad_norm": 1.2609838247299194, "learning_rate": 0.0014675489067894132, "loss": 0.6836, "step": 92540 }, { "epoch": 26.625431530494822, "grad_norm": 0.9519590735435486, "learning_rate": 0.0014674913693901036, "loss": 0.7567, "step": 92550 }, { "epoch": 26.6283084004603, "grad_norm": 1.2326881885528564, "learning_rate": 0.0014674338319907942, "loss": 0.6595, "step": 92560 }, { "epoch": 26.631185270425778, "grad_norm": 1.2785651683807373, "learning_rate": 0.0014673762945914845, "loss": 0.6429, "step": 92570 }, { "epoch": 26.634062140391254, "grad_norm": 0.8775125741958618, "learning_rate": 0.0014673187571921749, "loss": 0.7289, "step": 92580 }, { "epoch": 26.63693901035673, "grad_norm": 0.9585853219032288, "learning_rate": 0.0014672612197928654, "loss": 0.602, "step": 92590 }, { "epoch": 26.63981588032221, "grad_norm": 1.430497169494629, "learning_rate": 0.001467203682393556, "loss": 0.5751, "step": 92600 }, { "epoch": 26.642692750287686, "grad_norm": 1.407586693763733, "learning_rate": 0.0014671461449942463, "loss": 0.79, "step": 92610 }, { "epoch": 26.645569620253166, "grad_norm": 1.1413910388946533, "learning_rate": 0.001467088607594937, "loss": 0.7715, "step": 92620 }, { "epoch": 26.648446490218642, "grad_norm": 1.9324613809585571, "learning_rate": 0.001467031070195627, "loss": 0.6677, "step": 92630 }, { "epoch": 26.65132336018412, "grad_norm": 1.408380389213562, "learning_rate": 0.0014669735327963176, "loss": 0.6182, "step": 92640 }, { "epoch": 26.6542002301496, "grad_norm": 0.9645879864692688, "learning_rate": 0.0014669159953970082, "loss": 0.7394, "step": 92650 }, { "epoch": 26.657077100115075, "grad_norm": 1.1898113489151, "learning_rate": 0.0014668584579976985, "loss": 0.7402, "step": 92660 }, { "epoch": 26.65995397008055, "grad_norm": 1.3404314517974854, "learning_rate": 0.001466800920598389, "loss": 0.7607, "step": 92670 }, { "epoch": 26.66283084004603, "grad_norm": 0.7055138945579529, "learning_rate": 0.0014667433831990796, "loss": 0.8348, "step": 92680 }, { "epoch": 26.665707710011507, "grad_norm": 1.5355961322784424, "learning_rate": 0.0014666858457997698, "loss": 0.5126, "step": 92690 }, { "epoch": 26.668584579976987, "grad_norm": 0.9882781505584717, "learning_rate": 0.0014666283084004603, "loss": 0.6563, "step": 92700 }, { "epoch": 26.671461449942463, "grad_norm": 1.9664100408554077, "learning_rate": 0.0014665707710011509, "loss": 0.8953, "step": 92710 }, { "epoch": 26.67433831990794, "grad_norm": 0.667680025100708, "learning_rate": 0.0014665132336018412, "loss": 0.6105, "step": 92720 }, { "epoch": 26.67721518987342, "grad_norm": 0.8699446320533752, "learning_rate": 0.0014664556962025318, "loss": 0.7315, "step": 92730 }, { "epoch": 26.680092059838895, "grad_norm": 1.7190686464309692, "learning_rate": 0.001466398158803222, "loss": 0.6352, "step": 92740 }, { "epoch": 26.682968929804375, "grad_norm": 1.2691845893859863, "learning_rate": 0.0014663406214039125, "loss": 0.9141, "step": 92750 }, { "epoch": 26.68584579976985, "grad_norm": 0.8052981495857239, "learning_rate": 0.001466283084004603, "loss": 0.6158, "step": 92760 }, { "epoch": 26.688722669735327, "grad_norm": 1.5477068424224854, "learning_rate": 0.0014662255466052934, "loss": 0.5708, "step": 92770 }, { "epoch": 26.691599539700807, "grad_norm": 0.9274078011512756, "learning_rate": 0.001466168009205984, "loss": 0.656, "step": 92780 }, { "epoch": 26.694476409666283, "grad_norm": 1.2279744148254395, "learning_rate": 0.0014661104718066745, "loss": 0.6737, "step": 92790 }, { "epoch": 26.69735327963176, "grad_norm": 0.9560614824295044, "learning_rate": 0.0014660529344073647, "loss": 0.6308, "step": 92800 }, { "epoch": 26.70023014959724, "grad_norm": 0.736638069152832, "learning_rate": 0.0014659953970080552, "loss": 0.6639, "step": 92810 }, { "epoch": 26.703107019562715, "grad_norm": 1.0270169973373413, "learning_rate": 0.0014659378596087458, "loss": 0.5966, "step": 92820 }, { "epoch": 26.705983889528195, "grad_norm": 1.1791232824325562, "learning_rate": 0.0014658803222094361, "loss": 0.8666, "step": 92830 }, { "epoch": 26.70886075949367, "grad_norm": 0.9947037696838379, "learning_rate": 0.0014658227848101267, "loss": 0.8231, "step": 92840 }, { "epoch": 26.711737629459147, "grad_norm": 0.902490496635437, "learning_rate": 0.001465765247410817, "loss": 0.6794, "step": 92850 }, { "epoch": 26.714614499424627, "grad_norm": 1.0838372707366943, "learning_rate": 0.0014657077100115074, "loss": 0.7459, "step": 92860 }, { "epoch": 26.717491369390103, "grad_norm": 1.5233651399612427, "learning_rate": 0.001465650172612198, "loss": 0.7117, "step": 92870 }, { "epoch": 26.72036823935558, "grad_norm": 1.0427874326705933, "learning_rate": 0.0014655926352128883, "loss": 0.6484, "step": 92880 }, { "epoch": 26.72324510932106, "grad_norm": 2.4829916954040527, "learning_rate": 0.0014655350978135789, "loss": 0.9266, "step": 92890 }, { "epoch": 26.726121979286535, "grad_norm": 2.3181166648864746, "learning_rate": 0.0014654775604142694, "loss": 0.6702, "step": 92900 }, { "epoch": 26.728998849252015, "grad_norm": 1.0838996171951294, "learning_rate": 0.0014654200230149598, "loss": 0.7572, "step": 92910 }, { "epoch": 26.73187571921749, "grad_norm": 1.3432672023773193, "learning_rate": 0.0014653624856156501, "loss": 0.6574, "step": 92920 }, { "epoch": 26.734752589182968, "grad_norm": 1.3911834955215454, "learning_rate": 0.0014653049482163407, "loss": 0.6357, "step": 92930 }, { "epoch": 26.737629459148447, "grad_norm": 0.8166238069534302, "learning_rate": 0.001465247410817031, "loss": 0.6547, "step": 92940 }, { "epoch": 26.740506329113924, "grad_norm": 1.1860226392745972, "learning_rate": 0.0014651898734177216, "loss": 0.6678, "step": 92950 }, { "epoch": 26.743383199079403, "grad_norm": 1.5310728549957275, "learning_rate": 0.001465132336018412, "loss": 0.6816, "step": 92960 }, { "epoch": 26.74626006904488, "grad_norm": 1.5261093378067017, "learning_rate": 0.0014650747986191025, "loss": 0.7898, "step": 92970 }, { "epoch": 26.749136939010356, "grad_norm": 1.4097263813018799, "learning_rate": 0.0014650172612197929, "loss": 0.7954, "step": 92980 }, { "epoch": 26.752013808975835, "grad_norm": 0.7086218595504761, "learning_rate": 0.0014649597238204832, "loss": 0.6044, "step": 92990 }, { "epoch": 26.75489067894131, "grad_norm": 1.4677832126617432, "learning_rate": 0.0014649021864211738, "loss": 0.8666, "step": 93000 }, { "epoch": 26.757767548906788, "grad_norm": 0.8964833617210388, "learning_rate": 0.0014648446490218643, "loss": 0.6216, "step": 93010 }, { "epoch": 26.760644418872268, "grad_norm": 0.6914772391319275, "learning_rate": 0.0014647871116225547, "loss": 0.7446, "step": 93020 }, { "epoch": 26.763521288837744, "grad_norm": 1.2107696533203125, "learning_rate": 0.0014647295742232452, "loss": 0.7627, "step": 93030 }, { "epoch": 26.766398158803224, "grad_norm": 1.1762031316757202, "learning_rate": 0.0014646720368239356, "loss": 0.6194, "step": 93040 }, { "epoch": 26.7692750287687, "grad_norm": 1.7218683958053589, "learning_rate": 0.001464614499424626, "loss": 0.7476, "step": 93050 }, { "epoch": 26.772151898734176, "grad_norm": 1.211356520652771, "learning_rate": 0.0014645569620253165, "loss": 0.6679, "step": 93060 }, { "epoch": 26.775028768699656, "grad_norm": 1.5847060680389404, "learning_rate": 0.0014644994246260068, "loss": 0.7834, "step": 93070 }, { "epoch": 26.777905638665132, "grad_norm": 0.9138326644897461, "learning_rate": 0.0014644418872266974, "loss": 0.6552, "step": 93080 }, { "epoch": 26.780782508630608, "grad_norm": 1.5429638624191284, "learning_rate": 0.001464384349827388, "loss": 0.6117, "step": 93090 }, { "epoch": 26.783659378596088, "grad_norm": 2.1176493167877197, "learning_rate": 0.001464326812428078, "loss": 0.7629, "step": 93100 }, { "epoch": 26.786536248561564, "grad_norm": 2.2391767501831055, "learning_rate": 0.0014642692750287687, "loss": 0.7178, "step": 93110 }, { "epoch": 26.789413118527044, "grad_norm": 1.4477498531341553, "learning_rate": 0.0014642117376294592, "loss": 0.7939, "step": 93120 }, { "epoch": 26.79228998849252, "grad_norm": 0.7170533537864685, "learning_rate": 0.0014641542002301496, "loss": 0.7059, "step": 93130 }, { "epoch": 26.795166858457996, "grad_norm": 1.4878573417663574, "learning_rate": 0.0014640966628308401, "loss": 0.5129, "step": 93140 }, { "epoch": 26.798043728423476, "grad_norm": 1.4373359680175781, "learning_rate": 0.0014640391254315307, "loss": 0.7052, "step": 93150 }, { "epoch": 26.800920598388952, "grad_norm": 1.0061227083206177, "learning_rate": 0.0014639815880322208, "loss": 0.5606, "step": 93160 }, { "epoch": 26.803797468354432, "grad_norm": 1.3892955780029297, "learning_rate": 0.0014639240506329114, "loss": 0.7448, "step": 93170 }, { "epoch": 26.806674338319908, "grad_norm": 1.2765928506851196, "learning_rate": 0.001463866513233602, "loss": 0.6709, "step": 93180 }, { "epoch": 26.809551208285384, "grad_norm": 1.6032975912094116, "learning_rate": 0.0014638089758342923, "loss": 0.7534, "step": 93190 }, { "epoch": 26.812428078250864, "grad_norm": 1.2677600383758545, "learning_rate": 0.0014637514384349829, "loss": 0.594, "step": 93200 }, { "epoch": 26.81530494821634, "grad_norm": 0.9185822010040283, "learning_rate": 0.0014636939010356732, "loss": 0.6284, "step": 93210 }, { "epoch": 26.818181818181817, "grad_norm": 1.977096676826477, "learning_rate": 0.0014636363636363636, "loss": 0.5937, "step": 93220 }, { "epoch": 26.821058688147296, "grad_norm": 1.1857784986495972, "learning_rate": 0.0014635788262370541, "loss": 0.7974, "step": 93230 }, { "epoch": 26.823935558112773, "grad_norm": 1.6550109386444092, "learning_rate": 0.0014635212888377445, "loss": 0.6992, "step": 93240 }, { "epoch": 26.826812428078252, "grad_norm": 2.012336254119873, "learning_rate": 0.001463463751438435, "loss": 0.7129, "step": 93250 }, { "epoch": 26.82968929804373, "grad_norm": 0.6528522372245789, "learning_rate": 0.0014634062140391256, "loss": 0.6668, "step": 93260 }, { "epoch": 26.832566168009205, "grad_norm": 2.0681257247924805, "learning_rate": 0.001463348676639816, "loss": 0.8052, "step": 93270 }, { "epoch": 26.835443037974684, "grad_norm": 0.9483610987663269, "learning_rate": 0.0014632911392405063, "loss": 0.7748, "step": 93280 }, { "epoch": 26.83831990794016, "grad_norm": 1.1309889554977417, "learning_rate": 0.0014632336018411969, "loss": 0.6981, "step": 93290 }, { "epoch": 26.841196777905637, "grad_norm": 0.7832451462745667, "learning_rate": 0.0014631760644418872, "loss": 0.6793, "step": 93300 }, { "epoch": 26.844073647871117, "grad_norm": 1.2003835439682007, "learning_rate": 0.0014631185270425778, "loss": 0.7657, "step": 93310 }, { "epoch": 26.846950517836593, "grad_norm": 1.1687471866607666, "learning_rate": 0.0014630609896432681, "loss": 0.8468, "step": 93320 }, { "epoch": 26.849827387802073, "grad_norm": 1.780623435974121, "learning_rate": 0.0014630034522439587, "loss": 0.6465, "step": 93330 }, { "epoch": 26.85270425776755, "grad_norm": 1.0264321565628052, "learning_rate": 0.001462945914844649, "loss": 0.6106, "step": 93340 }, { "epoch": 26.855581127733025, "grad_norm": 1.650213360786438, "learning_rate": 0.0014628883774453394, "loss": 0.7004, "step": 93350 }, { "epoch": 26.858457997698505, "grad_norm": 1.6744002103805542, "learning_rate": 0.00146283084004603, "loss": 0.6719, "step": 93360 }, { "epoch": 26.86133486766398, "grad_norm": 1.0047634840011597, "learning_rate": 0.0014627733026467205, "loss": 0.8056, "step": 93370 }, { "epoch": 26.86421173762946, "grad_norm": 1.7686522006988525, "learning_rate": 0.0014627157652474109, "loss": 0.6503, "step": 93380 }, { "epoch": 26.867088607594937, "grad_norm": 1.5482076406478882, "learning_rate": 0.0014626582278481014, "loss": 0.6489, "step": 93390 }, { "epoch": 26.869965477560413, "grad_norm": 1.6867092847824097, "learning_rate": 0.0014626006904487918, "loss": 0.8353, "step": 93400 }, { "epoch": 26.872842347525893, "grad_norm": 1.406309962272644, "learning_rate": 0.0014625431530494821, "loss": 0.9528, "step": 93410 }, { "epoch": 26.87571921749137, "grad_norm": 1.0855560302734375, "learning_rate": 0.0014624856156501727, "loss": 0.579, "step": 93420 }, { "epoch": 26.878596087456845, "grad_norm": 0.9676840901374817, "learning_rate": 0.001462428078250863, "loss": 0.6639, "step": 93430 }, { "epoch": 26.881472957422325, "grad_norm": 1.3293501138687134, "learning_rate": 0.0014623705408515536, "loss": 0.6538, "step": 93440 }, { "epoch": 26.8843498273878, "grad_norm": 1.3251330852508545, "learning_rate": 0.0014623130034522442, "loss": 0.5742, "step": 93450 }, { "epoch": 26.88722669735328, "grad_norm": 1.4989033937454224, "learning_rate": 0.0014622554660529343, "loss": 0.6355, "step": 93460 }, { "epoch": 26.890103567318757, "grad_norm": 1.9303531646728516, "learning_rate": 0.0014621979286536249, "loss": 0.6167, "step": 93470 }, { "epoch": 26.892980437284233, "grad_norm": 1.7940821647644043, "learning_rate": 0.0014621403912543154, "loss": 0.779, "step": 93480 }, { "epoch": 26.895857307249713, "grad_norm": 0.7790858745574951, "learning_rate": 0.0014620828538550058, "loss": 0.5667, "step": 93490 }, { "epoch": 26.89873417721519, "grad_norm": 1.4159119129180908, "learning_rate": 0.0014620253164556963, "loss": 0.7607, "step": 93500 }, { "epoch": 26.90161104718067, "grad_norm": 0.8653009533882141, "learning_rate": 0.0014619677790563869, "loss": 0.6702, "step": 93510 }, { "epoch": 26.904487917146145, "grad_norm": 1.532922625541687, "learning_rate": 0.001461910241657077, "loss": 0.5811, "step": 93520 }, { "epoch": 26.90736478711162, "grad_norm": 1.32847261428833, "learning_rate": 0.0014618527042577676, "loss": 0.6351, "step": 93530 }, { "epoch": 26.9102416570771, "grad_norm": 1.671256184577942, "learning_rate": 0.001461795166858458, "loss": 0.6481, "step": 93540 }, { "epoch": 26.913118527042577, "grad_norm": 1.2115808725357056, "learning_rate": 0.0014617376294591485, "loss": 0.7216, "step": 93550 }, { "epoch": 26.915995397008054, "grad_norm": 1.6762174367904663, "learning_rate": 0.001461680092059839, "loss": 0.6713, "step": 93560 }, { "epoch": 26.918872266973533, "grad_norm": 0.9381908178329468, "learning_rate": 0.0014616225546605292, "loss": 0.5687, "step": 93570 }, { "epoch": 26.92174913693901, "grad_norm": 0.9403814077377319, "learning_rate": 0.0014615650172612198, "loss": 0.7475, "step": 93580 }, { "epoch": 26.92462600690449, "grad_norm": 1.4976633787155151, "learning_rate": 0.0014615074798619103, "loss": 0.7077, "step": 93590 }, { "epoch": 26.927502876869966, "grad_norm": 2.2121636867523193, "learning_rate": 0.0014614499424626007, "loss": 0.7848, "step": 93600 }, { "epoch": 26.930379746835442, "grad_norm": 3.7145755290985107, "learning_rate": 0.0014613924050632912, "loss": 0.7206, "step": 93610 }, { "epoch": 26.93325661680092, "grad_norm": 0.8738667368888855, "learning_rate": 0.0014613348676639818, "loss": 0.6611, "step": 93620 }, { "epoch": 26.936133486766398, "grad_norm": 1.0923491716384888, "learning_rate": 0.001461277330264672, "loss": 0.5974, "step": 93630 }, { "epoch": 26.939010356731877, "grad_norm": 1.2404299974441528, "learning_rate": 0.0014612197928653625, "loss": 0.8436, "step": 93640 }, { "epoch": 26.941887226697354, "grad_norm": 1.3434995412826538, "learning_rate": 0.0014611622554660528, "loss": 0.8342, "step": 93650 }, { "epoch": 26.94476409666283, "grad_norm": 1.2917450666427612, "learning_rate": 0.0014611047180667434, "loss": 0.5738, "step": 93660 }, { "epoch": 26.94764096662831, "grad_norm": 1.4179332256317139, "learning_rate": 0.001461047180667434, "loss": 0.8168, "step": 93670 }, { "epoch": 26.950517836593786, "grad_norm": 1.6170200109481812, "learning_rate": 0.0014609896432681243, "loss": 0.6824, "step": 93680 }, { "epoch": 26.953394706559262, "grad_norm": 1.466744303703308, "learning_rate": 0.0014609321058688147, "loss": 0.7926, "step": 93690 }, { "epoch": 26.956271576524742, "grad_norm": 1.2276369333267212, "learning_rate": 0.0014608745684695052, "loss": 0.6529, "step": 93700 }, { "epoch": 26.959148446490218, "grad_norm": 1.1368056535720825, "learning_rate": 0.0014608170310701956, "loss": 0.5645, "step": 93710 }, { "epoch": 26.962025316455698, "grad_norm": 1.5638747215270996, "learning_rate": 0.0014607594936708861, "loss": 0.717, "step": 93720 }, { "epoch": 26.964902186421174, "grad_norm": 1.6871501207351685, "learning_rate": 0.0014607019562715767, "loss": 0.5595, "step": 93730 }, { "epoch": 26.96777905638665, "grad_norm": 0.8696613907814026, "learning_rate": 0.001460644418872267, "loss": 0.6733, "step": 93740 }, { "epoch": 26.97065592635213, "grad_norm": 1.2795871496200562, "learning_rate": 0.0014605868814729574, "loss": 0.7472, "step": 93750 }, { "epoch": 26.973532796317606, "grad_norm": 1.763917326927185, "learning_rate": 0.0014605293440736477, "loss": 0.6505, "step": 93760 }, { "epoch": 26.976409666283082, "grad_norm": 1.1728003025054932, "learning_rate": 0.0014604718066743383, "loss": 0.7963, "step": 93770 }, { "epoch": 26.979286536248562, "grad_norm": 1.4433701038360596, "learning_rate": 0.0014604142692750289, "loss": 0.6006, "step": 93780 }, { "epoch": 26.98216340621404, "grad_norm": 1.8831532001495361, "learning_rate": 0.0014603567318757192, "loss": 0.735, "step": 93790 }, { "epoch": 26.985040276179518, "grad_norm": 1.1921340227127075, "learning_rate": 0.0014602991944764098, "loss": 0.6253, "step": 93800 }, { "epoch": 26.987917146144994, "grad_norm": 0.9539740085601807, "learning_rate": 0.0014602416570771001, "loss": 0.6967, "step": 93810 }, { "epoch": 26.99079401611047, "grad_norm": 1.3933846950531006, "learning_rate": 0.0014601841196777905, "loss": 0.7271, "step": 93820 }, { "epoch": 26.99367088607595, "grad_norm": 2.307525396347046, "learning_rate": 0.001460126582278481, "loss": 0.6663, "step": 93830 }, { "epoch": 26.996547756041426, "grad_norm": 1.6139459609985352, "learning_rate": 0.0014600690448791716, "loss": 0.7954, "step": 93840 }, { "epoch": 26.999424626006906, "grad_norm": 0.9924572706222534, "learning_rate": 0.001460011507479862, "loss": 0.5853, "step": 93850 }, { "epoch": 27.002301495972382, "grad_norm": 1.9841779470443726, "learning_rate": 0.0014599539700805525, "loss": 0.7151, "step": 93860 }, { "epoch": 27.00517836593786, "grad_norm": 1.0803196430206299, "learning_rate": 0.0014598964326812429, "loss": 0.6417, "step": 93870 }, { "epoch": 27.00805523590334, "grad_norm": 1.0205655097961426, "learning_rate": 0.0014598388952819332, "loss": 0.6516, "step": 93880 }, { "epoch": 27.010932105868815, "grad_norm": 1.3391655683517456, "learning_rate": 0.0014597813578826238, "loss": 0.6308, "step": 93890 }, { "epoch": 27.01380897583429, "grad_norm": 1.830166220664978, "learning_rate": 0.0014597238204833141, "loss": 0.6889, "step": 93900 }, { "epoch": 27.01668584579977, "grad_norm": 0.8148573040962219, "learning_rate": 0.0014596662830840047, "loss": 0.6133, "step": 93910 }, { "epoch": 27.019562715765247, "grad_norm": 1.3389922380447388, "learning_rate": 0.0014596087456846952, "loss": 0.7448, "step": 93920 }, { "epoch": 27.022439585730726, "grad_norm": 1.071588158607483, "learning_rate": 0.0014595512082853854, "loss": 0.6815, "step": 93930 }, { "epoch": 27.025316455696203, "grad_norm": 1.2114404439926147, "learning_rate": 0.001459493670886076, "loss": 0.5618, "step": 93940 }, { "epoch": 27.02819332566168, "grad_norm": 1.656601071357727, "learning_rate": 0.0014594361334867665, "loss": 0.4957, "step": 93950 }, { "epoch": 27.03107019562716, "grad_norm": 1.32853102684021, "learning_rate": 0.0014593785960874568, "loss": 0.6805, "step": 93960 }, { "epoch": 27.033947065592635, "grad_norm": 1.560258150100708, "learning_rate": 0.0014593210586881474, "loss": 0.7294, "step": 93970 }, { "epoch": 27.03682393555811, "grad_norm": 1.1534825563430786, "learning_rate": 0.001459263521288838, "loss": 0.6068, "step": 93980 }, { "epoch": 27.03970080552359, "grad_norm": 1.2916197776794434, "learning_rate": 0.001459205983889528, "loss": 0.5207, "step": 93990 }, { "epoch": 27.042577675489067, "grad_norm": 1.23114013671875, "learning_rate": 0.0014591484464902187, "loss": 0.7223, "step": 94000 }, { "epoch": 27.045454545454547, "grad_norm": 1.0603506565093994, "learning_rate": 0.001459090909090909, "loss": 0.6246, "step": 94010 }, { "epoch": 27.048331415420023, "grad_norm": 0.7204486727714539, "learning_rate": 0.0014590333716915996, "loss": 0.564, "step": 94020 }, { "epoch": 27.0512082853855, "grad_norm": 1.0224690437316895, "learning_rate": 0.0014589758342922901, "loss": 0.6063, "step": 94030 }, { "epoch": 27.05408515535098, "grad_norm": 1.7041022777557373, "learning_rate": 0.0014589182968929805, "loss": 0.7157, "step": 94040 }, { "epoch": 27.056962025316455, "grad_norm": 1.5317213535308838, "learning_rate": 0.0014588607594936708, "loss": 0.8002, "step": 94050 }, { "epoch": 27.059838895281935, "grad_norm": 0.926189661026001, "learning_rate": 0.0014588032220943614, "loss": 0.5947, "step": 94060 }, { "epoch": 27.06271576524741, "grad_norm": 2.1498191356658936, "learning_rate": 0.0014587456846950517, "loss": 0.658, "step": 94070 }, { "epoch": 27.065592635212887, "grad_norm": 1.8594740629196167, "learning_rate": 0.0014586881472957423, "loss": 0.4933, "step": 94080 }, { "epoch": 27.068469505178367, "grad_norm": 1.1612863540649414, "learning_rate": 0.0014586306098964329, "loss": 0.6712, "step": 94090 }, { "epoch": 27.071346375143843, "grad_norm": 1.6780767440795898, "learning_rate": 0.0014585730724971232, "loss": 0.6274, "step": 94100 }, { "epoch": 27.07422324510932, "grad_norm": 2.2434685230255127, "learning_rate": 0.0014585155350978136, "loss": 0.6419, "step": 94110 }, { "epoch": 27.0771001150748, "grad_norm": 1.2331980466842651, "learning_rate": 0.001458457997698504, "loss": 0.7348, "step": 94120 }, { "epoch": 27.079976985040275, "grad_norm": 0.9919854998588562, "learning_rate": 0.0014584004602991945, "loss": 0.611, "step": 94130 }, { "epoch": 27.082853855005755, "grad_norm": 1.236801266670227, "learning_rate": 0.001458342922899885, "loss": 0.5412, "step": 94140 }, { "epoch": 27.08573072497123, "grad_norm": 1.1194616556167603, "learning_rate": 0.0014582853855005754, "loss": 0.8284, "step": 94150 }, { "epoch": 27.088607594936708, "grad_norm": 0.8957392573356628, "learning_rate": 0.001458227848101266, "loss": 0.6619, "step": 94160 }, { "epoch": 27.091484464902187, "grad_norm": 1.1680898666381836, "learning_rate": 0.0014581703107019563, "loss": 0.6492, "step": 94170 }, { "epoch": 27.094361334867664, "grad_norm": 0.9635521769523621, "learning_rate": 0.0014581127733026466, "loss": 0.5963, "step": 94180 }, { "epoch": 27.097238204833143, "grad_norm": 1.6468690633773804, "learning_rate": 0.0014580552359033372, "loss": 0.6177, "step": 94190 }, { "epoch": 27.10011507479862, "grad_norm": 1.087067723274231, "learning_rate": 0.0014579976985040278, "loss": 0.7338, "step": 94200 }, { "epoch": 27.102991944764096, "grad_norm": 1.9906402826309204, "learning_rate": 0.0014579401611047181, "loss": 0.6346, "step": 94210 }, { "epoch": 27.105868814729575, "grad_norm": 2.0195260047912598, "learning_rate": 0.0014578826237054087, "loss": 0.7136, "step": 94220 }, { "epoch": 27.10874568469505, "grad_norm": 1.2239302396774292, "learning_rate": 0.0014578250863060988, "loss": 0.4875, "step": 94230 }, { "epoch": 27.111622554660528, "grad_norm": 0.9951456189155579, "learning_rate": 0.0014577675489067894, "loss": 0.6981, "step": 94240 }, { "epoch": 27.114499424626008, "grad_norm": 1.3571271896362305, "learning_rate": 0.00145771001150748, "loss": 0.8311, "step": 94250 }, { "epoch": 27.117376294591484, "grad_norm": 0.9790176153182983, "learning_rate": 0.0014576524741081703, "loss": 0.8109, "step": 94260 }, { "epoch": 27.120253164556964, "grad_norm": 1.9687408208847046, "learning_rate": 0.0014575949367088609, "loss": 0.6671, "step": 94270 }, { "epoch": 27.12313003452244, "grad_norm": 1.4820300340652466, "learning_rate": 0.0014575373993095514, "loss": 0.76, "step": 94280 }, { "epoch": 27.126006904487916, "grad_norm": 0.9481080174446106, "learning_rate": 0.0014574798619102416, "loss": 0.6173, "step": 94290 }, { "epoch": 27.128883774453396, "grad_norm": 1.3882306814193726, "learning_rate": 0.0014574223245109321, "loss": 0.5832, "step": 94300 }, { "epoch": 27.131760644418872, "grad_norm": 0.7927123308181763, "learning_rate": 0.0014573647871116227, "loss": 0.5926, "step": 94310 }, { "epoch": 27.134637514384348, "grad_norm": 1.7726409435272217, "learning_rate": 0.001457307249712313, "loss": 0.7278, "step": 94320 }, { "epoch": 27.137514384349828, "grad_norm": 0.8971561789512634, "learning_rate": 0.0014572497123130036, "loss": 0.6366, "step": 94330 }, { "epoch": 27.140391254315304, "grad_norm": 0.9781902432441711, "learning_rate": 0.0014571921749136937, "loss": 0.7012, "step": 94340 }, { "epoch": 27.143268124280784, "grad_norm": 0.873957633972168, "learning_rate": 0.0014571346375143843, "loss": 0.7083, "step": 94350 }, { "epoch": 27.14614499424626, "grad_norm": 1.7808116674423218, "learning_rate": 0.0014570771001150748, "loss": 0.8337, "step": 94360 }, { "epoch": 27.149021864211736, "grad_norm": 1.1229344606399536, "learning_rate": 0.0014570195627157652, "loss": 0.8168, "step": 94370 }, { "epoch": 27.151898734177216, "grad_norm": 0.997273325920105, "learning_rate": 0.0014569620253164558, "loss": 0.547, "step": 94380 }, { "epoch": 27.154775604142692, "grad_norm": 1.2117185592651367, "learning_rate": 0.0014569044879171463, "loss": 0.6776, "step": 94390 }, { "epoch": 27.157652474108172, "grad_norm": 1.44802725315094, "learning_rate": 0.0014568469505178365, "loss": 0.7386, "step": 94400 }, { "epoch": 27.160529344073648, "grad_norm": 1.177836537361145, "learning_rate": 0.001456789413118527, "loss": 0.5089, "step": 94410 }, { "epoch": 27.163406214039124, "grad_norm": 1.7384669780731201, "learning_rate": 0.0014567318757192176, "loss": 0.6915, "step": 94420 }, { "epoch": 27.166283084004604, "grad_norm": 0.9111249446868896, "learning_rate": 0.001456674338319908, "loss": 0.6333, "step": 94430 }, { "epoch": 27.16915995397008, "grad_norm": 1.5561597347259521, "learning_rate": 0.0014566168009205985, "loss": 0.778, "step": 94440 }, { "epoch": 27.172036823935557, "grad_norm": 0.8948779106140137, "learning_rate": 0.0014565592635212888, "loss": 0.6372, "step": 94450 }, { "epoch": 27.174913693901036, "grad_norm": 1.2244771718978882, "learning_rate": 0.0014565017261219792, "loss": 0.8154, "step": 94460 }, { "epoch": 27.177790563866512, "grad_norm": 1.1705677509307861, "learning_rate": 0.0014564441887226698, "loss": 0.6581, "step": 94470 }, { "epoch": 27.180667433831992, "grad_norm": 1.6992077827453613, "learning_rate": 0.00145638665132336, "loss": 0.7847, "step": 94480 }, { "epoch": 27.18354430379747, "grad_norm": 1.1030033826828003, "learning_rate": 0.0014563291139240507, "loss": 0.7325, "step": 94490 }, { "epoch": 27.186421173762945, "grad_norm": 0.8852542042732239, "learning_rate": 0.0014562715765247412, "loss": 0.6486, "step": 94500 }, { "epoch": 27.189298043728424, "grad_norm": 1.1157766580581665, "learning_rate": 0.0014562140391254316, "loss": 0.4999, "step": 94510 }, { "epoch": 27.1921749136939, "grad_norm": 1.3337827920913696, "learning_rate": 0.001456156501726122, "loss": 0.6724, "step": 94520 }, { "epoch": 27.19505178365938, "grad_norm": 1.6587798595428467, "learning_rate": 0.0014560989643268125, "loss": 0.557, "step": 94530 }, { "epoch": 27.197928653624857, "grad_norm": 1.1652332544326782, "learning_rate": 0.0014560414269275028, "loss": 0.5929, "step": 94540 }, { "epoch": 27.200805523590333, "grad_norm": 1.4712399244308472, "learning_rate": 0.0014559838895281934, "loss": 0.6726, "step": 94550 }, { "epoch": 27.203682393555813, "grad_norm": 1.0006377696990967, "learning_rate": 0.001455926352128884, "loss": 0.557, "step": 94560 }, { "epoch": 27.20655926352129, "grad_norm": 1.0266999006271362, "learning_rate": 0.0014558688147295743, "loss": 0.6422, "step": 94570 }, { "epoch": 27.209436133486765, "grad_norm": 1.8318233489990234, "learning_rate": 0.0014558112773302647, "loss": 0.6736, "step": 94580 }, { "epoch": 27.212313003452245, "grad_norm": 0.7690281867980957, "learning_rate": 0.001455753739930955, "loss": 0.5376, "step": 94590 }, { "epoch": 27.21518987341772, "grad_norm": 1.6479541063308716, "learning_rate": 0.0014556962025316456, "loss": 0.6288, "step": 94600 }, { "epoch": 27.2180667433832, "grad_norm": 1.5242916345596313, "learning_rate": 0.0014556386651323361, "loss": 0.7825, "step": 94610 }, { "epoch": 27.220943613348677, "grad_norm": 1.547337532043457, "learning_rate": 0.0014555811277330265, "loss": 0.6269, "step": 94620 }, { "epoch": 27.223820483314153, "grad_norm": 0.9405324459075928, "learning_rate": 0.001455523590333717, "loss": 0.8631, "step": 94630 }, { "epoch": 27.226697353279633, "grad_norm": 1.3364520072937012, "learning_rate": 0.0014554660529344074, "loss": 0.6929, "step": 94640 }, { "epoch": 27.22957422324511, "grad_norm": 1.0328575372695923, "learning_rate": 0.0014554085155350977, "loss": 0.6858, "step": 94650 }, { "epoch": 27.232451093210585, "grad_norm": 1.5440860986709595, "learning_rate": 0.0014553509781357883, "loss": 0.6221, "step": 94660 }, { "epoch": 27.235327963176065, "grad_norm": 1.3669166564941406, "learning_rate": 0.0014552934407364789, "loss": 0.6788, "step": 94670 }, { "epoch": 27.23820483314154, "grad_norm": 1.5557044744491577, "learning_rate": 0.0014552359033371692, "loss": 0.542, "step": 94680 }, { "epoch": 27.24108170310702, "grad_norm": 0.9281784892082214, "learning_rate": 0.0014551783659378598, "loss": 0.6996, "step": 94690 }, { "epoch": 27.243958573072497, "grad_norm": 1.165428638458252, "learning_rate": 0.00145512082853855, "loss": 0.7776, "step": 94700 }, { "epoch": 27.246835443037973, "grad_norm": 2.602142810821533, "learning_rate": 0.0014550632911392405, "loss": 0.8557, "step": 94710 }, { "epoch": 27.249712313003453, "grad_norm": 1.040327548980713, "learning_rate": 0.001455005753739931, "loss": 0.5326, "step": 94720 }, { "epoch": 27.25258918296893, "grad_norm": 2.4359984397888184, "learning_rate": 0.0014549482163406214, "loss": 0.6714, "step": 94730 }, { "epoch": 27.25546605293441, "grad_norm": 1.0946043729782104, "learning_rate": 0.001454890678941312, "loss": 0.6652, "step": 94740 }, { "epoch": 27.258342922899885, "grad_norm": 0.6927813291549683, "learning_rate": 0.0014548331415420025, "loss": 0.8256, "step": 94750 }, { "epoch": 27.26121979286536, "grad_norm": 1.886931300163269, "learning_rate": 0.0014547756041426926, "loss": 0.774, "step": 94760 }, { "epoch": 27.26409666283084, "grad_norm": 1.4277187585830688, "learning_rate": 0.0014547180667433832, "loss": 0.7919, "step": 94770 }, { "epoch": 27.266973532796317, "grad_norm": 1.9789040088653564, "learning_rate": 0.0014546605293440738, "loss": 0.6931, "step": 94780 }, { "epoch": 27.269850402761794, "grad_norm": 0.7479328513145447, "learning_rate": 0.0014546029919447641, "loss": 0.6037, "step": 94790 }, { "epoch": 27.272727272727273, "grad_norm": 1.172606348991394, "learning_rate": 0.0014545454545454547, "loss": 0.7164, "step": 94800 }, { "epoch": 27.27560414269275, "grad_norm": 1.6035271883010864, "learning_rate": 0.001454487917146145, "loss": 0.6092, "step": 94810 }, { "epoch": 27.27848101265823, "grad_norm": 1.9224148988723755, "learning_rate": 0.0014544303797468354, "loss": 0.8615, "step": 94820 }, { "epoch": 27.281357882623706, "grad_norm": 1.2889034748077393, "learning_rate": 0.001454372842347526, "loss": 0.5339, "step": 94830 }, { "epoch": 27.28423475258918, "grad_norm": 1.054936170578003, "learning_rate": 0.0014543153049482163, "loss": 0.6654, "step": 94840 }, { "epoch": 27.28711162255466, "grad_norm": 0.9638950228691101, "learning_rate": 0.0014542577675489068, "loss": 0.6366, "step": 94850 }, { "epoch": 27.289988492520138, "grad_norm": 1.650258183479309, "learning_rate": 0.0014542002301495974, "loss": 0.5992, "step": 94860 }, { "epoch": 27.292865362485614, "grad_norm": 1.5750961303710938, "learning_rate": 0.0014541426927502878, "loss": 0.6835, "step": 94870 }, { "epoch": 27.295742232451094, "grad_norm": 0.970781147480011, "learning_rate": 0.001454085155350978, "loss": 0.6659, "step": 94880 }, { "epoch": 27.29861910241657, "grad_norm": 1.192095398902893, "learning_rate": 0.0014540276179516687, "loss": 0.6614, "step": 94890 }, { "epoch": 27.30149597238205, "grad_norm": 2.13655948638916, "learning_rate": 0.001453970080552359, "loss": 0.7007, "step": 94900 }, { "epoch": 27.304372842347526, "grad_norm": 0.8834494352340698, "learning_rate": 0.0014539125431530496, "loss": 0.574, "step": 94910 }, { "epoch": 27.307249712313002, "grad_norm": 0.8252514004707336, "learning_rate": 0.00145385500575374, "loss": 0.7113, "step": 94920 }, { "epoch": 27.310126582278482, "grad_norm": 1.5385302305221558, "learning_rate": 0.0014537974683544305, "loss": 0.8174, "step": 94930 }, { "epoch": 27.313003452243958, "grad_norm": 1.0569857358932495, "learning_rate": 0.0014537399309551208, "loss": 0.6721, "step": 94940 }, { "epoch": 27.315880322209438, "grad_norm": 1.914790153503418, "learning_rate": 0.0014536823935558112, "loss": 0.6166, "step": 94950 }, { "epoch": 27.318757192174914, "grad_norm": 0.810127317905426, "learning_rate": 0.0014536248561565017, "loss": 0.6559, "step": 94960 }, { "epoch": 27.32163406214039, "grad_norm": 1.5224683284759521, "learning_rate": 0.0014535673187571923, "loss": 0.6337, "step": 94970 }, { "epoch": 27.32451093210587, "grad_norm": 1.0209791660308838, "learning_rate": 0.0014535097813578827, "loss": 0.5777, "step": 94980 }, { "epoch": 27.327387802071346, "grad_norm": 1.2561701536178589, "learning_rate": 0.0014534522439585732, "loss": 0.7585, "step": 94990 }, { "epoch": 27.330264672036822, "grad_norm": 1.0627450942993164, "learning_rate": 0.0014533947065592636, "loss": 0.6145, "step": 95000 }, { "epoch": 27.333141542002302, "grad_norm": 0.7884569764137268, "learning_rate": 0.001453337169159954, "loss": 0.7105, "step": 95010 }, { "epoch": 27.33601841196778, "grad_norm": 1.5534745454788208, "learning_rate": 0.0014532796317606445, "loss": 0.749, "step": 95020 }, { "epoch": 27.338895281933258, "grad_norm": 0.826554536819458, "learning_rate": 0.0014532220943613348, "loss": 0.5301, "step": 95030 }, { "epoch": 27.341772151898734, "grad_norm": 0.8497284650802612, "learning_rate": 0.0014531645569620254, "loss": 0.6441, "step": 95040 }, { "epoch": 27.34464902186421, "grad_norm": 1.1569321155548096, "learning_rate": 0.001453107019562716, "loss": 0.7078, "step": 95050 }, { "epoch": 27.34752589182969, "grad_norm": 1.7154905796051025, "learning_rate": 0.001453049482163406, "loss": 0.7585, "step": 95060 }, { "epoch": 27.350402761795166, "grad_norm": 0.9944276213645935, "learning_rate": 0.0014529919447640966, "loss": 0.5952, "step": 95070 }, { "epoch": 27.353279631760646, "grad_norm": 3.343593120574951, "learning_rate": 0.0014529344073647872, "loss": 0.704, "step": 95080 }, { "epoch": 27.356156501726122, "grad_norm": 1.8795998096466064, "learning_rate": 0.0014528768699654776, "loss": 0.6153, "step": 95090 }, { "epoch": 27.3590333716916, "grad_norm": 1.3465451002120972, "learning_rate": 0.0014528193325661681, "loss": 0.7901, "step": 95100 }, { "epoch": 27.36191024165708, "grad_norm": 0.7306562662124634, "learning_rate": 0.0014527617951668587, "loss": 0.6549, "step": 95110 }, { "epoch": 27.364787111622555, "grad_norm": 1.2070585489273071, "learning_rate": 0.0014527042577675488, "loss": 0.6724, "step": 95120 }, { "epoch": 27.36766398158803, "grad_norm": 1.8575154542922974, "learning_rate": 0.0014526467203682394, "loss": 0.7665, "step": 95130 }, { "epoch": 27.37054085155351, "grad_norm": 0.8711831569671631, "learning_rate": 0.00145258918296893, "loss": 0.844, "step": 95140 }, { "epoch": 27.373417721518987, "grad_norm": 1.5013189315795898, "learning_rate": 0.0014525316455696203, "loss": 0.5379, "step": 95150 }, { "epoch": 27.376294591484466, "grad_norm": 1.2686214447021484, "learning_rate": 0.0014524741081703109, "loss": 0.7447, "step": 95160 }, { "epoch": 27.379171461449943, "grad_norm": 1.204768180847168, "learning_rate": 0.001452416570771001, "loss": 0.6728, "step": 95170 }, { "epoch": 27.38204833141542, "grad_norm": 1.5640109777450562, "learning_rate": 0.0014523590333716915, "loss": 0.6749, "step": 95180 }, { "epoch": 27.3849252013809, "grad_norm": 1.1043362617492676, "learning_rate": 0.0014523014959723821, "loss": 0.6959, "step": 95190 }, { "epoch": 27.387802071346375, "grad_norm": 1.0182018280029297, "learning_rate": 0.0014522439585730725, "loss": 0.6689, "step": 95200 }, { "epoch": 27.39067894131185, "grad_norm": 1.1123936176300049, "learning_rate": 0.001452186421173763, "loss": 0.8849, "step": 95210 }, { "epoch": 27.39355581127733, "grad_norm": 0.9856337904930115, "learning_rate": 0.0014521288837744536, "loss": 0.8745, "step": 95220 }, { "epoch": 27.396432681242807, "grad_norm": 1.6465389728546143, "learning_rate": 0.0014520713463751437, "loss": 0.6968, "step": 95230 }, { "epoch": 27.399309551208287, "grad_norm": 1.1991748809814453, "learning_rate": 0.0014520138089758343, "loss": 0.8839, "step": 95240 }, { "epoch": 27.402186421173763, "grad_norm": 1.391542673110962, "learning_rate": 0.0014519562715765248, "loss": 0.5731, "step": 95250 }, { "epoch": 27.40506329113924, "grad_norm": 0.8547037839889526, "learning_rate": 0.0014518987341772152, "loss": 0.8294, "step": 95260 }, { "epoch": 27.40794016110472, "grad_norm": 1.718855619430542, "learning_rate": 0.0014518411967779058, "loss": 0.6658, "step": 95270 }, { "epoch": 27.410817031070195, "grad_norm": 1.0419191122055054, "learning_rate": 0.001451783659378596, "loss": 0.7182, "step": 95280 }, { "epoch": 27.413693901035675, "grad_norm": 1.890380859375, "learning_rate": 0.0014517261219792865, "loss": 0.6999, "step": 95290 }, { "epoch": 27.41657077100115, "grad_norm": 0.9910386800765991, "learning_rate": 0.001451668584579977, "loss": 0.6838, "step": 95300 }, { "epoch": 27.419447640966627, "grad_norm": 1.0799627304077148, "learning_rate": 0.0014516110471806674, "loss": 0.7144, "step": 95310 }, { "epoch": 27.422324510932107, "grad_norm": 1.2081729173660278, "learning_rate": 0.001451553509781358, "loss": 0.7471, "step": 95320 }, { "epoch": 27.425201380897583, "grad_norm": 1.2875593900680542, "learning_rate": 0.0014514959723820485, "loss": 0.6099, "step": 95330 }, { "epoch": 27.42807825086306, "grad_norm": 1.2748756408691406, "learning_rate": 0.0014514384349827388, "loss": 0.7149, "step": 95340 }, { "epoch": 27.43095512082854, "grad_norm": 2.155491352081299, "learning_rate": 0.0014513808975834292, "loss": 0.7005, "step": 95350 }, { "epoch": 27.433831990794015, "grad_norm": 0.9174428582191467, "learning_rate": 0.0014513233601841197, "loss": 0.6636, "step": 95360 }, { "epoch": 27.436708860759495, "grad_norm": 0.7302303910255432, "learning_rate": 0.00145126582278481, "loss": 0.4946, "step": 95370 }, { "epoch": 27.43958573072497, "grad_norm": 0.6647293567657471, "learning_rate": 0.0014512082853855007, "loss": 0.5085, "step": 95380 }, { "epoch": 27.442462600690448, "grad_norm": 1.2823283672332764, "learning_rate": 0.001451150747986191, "loss": 0.709, "step": 95390 }, { "epoch": 27.445339470655927, "grad_norm": 1.323307752609253, "learning_rate": 0.0014510932105868816, "loss": 0.8201, "step": 95400 }, { "epoch": 27.448216340621403, "grad_norm": 3.647366523742676, "learning_rate": 0.001451035673187572, "loss": 0.8278, "step": 95410 }, { "epoch": 27.451093210586883, "grad_norm": 0.9928665161132812, "learning_rate": 0.0014509781357882623, "loss": 0.6484, "step": 95420 }, { "epoch": 27.45397008055236, "grad_norm": 1.6040185689926147, "learning_rate": 0.0014509205983889528, "loss": 0.6245, "step": 95430 }, { "epoch": 27.456846950517836, "grad_norm": 1.1715806722640991, "learning_rate": 0.0014508630609896434, "loss": 0.5925, "step": 95440 }, { "epoch": 27.459723820483315, "grad_norm": 1.2437198162078857, "learning_rate": 0.0014508055235903337, "loss": 0.7591, "step": 95450 }, { "epoch": 27.46260069044879, "grad_norm": 1.6893543004989624, "learning_rate": 0.0014507479861910243, "loss": 0.7184, "step": 95460 }, { "epoch": 27.465477560414268, "grad_norm": 0.9821218252182007, "learning_rate": 0.0014506904487917147, "loss": 0.6542, "step": 95470 }, { "epoch": 27.468354430379748, "grad_norm": 1.8667643070220947, "learning_rate": 0.001450632911392405, "loss": 0.6057, "step": 95480 }, { "epoch": 27.471231300345224, "grad_norm": 1.5854064226150513, "learning_rate": 0.0014505753739930956, "loss": 0.5125, "step": 95490 }, { "epoch": 27.474108170310704, "grad_norm": 0.9111550450325012, "learning_rate": 0.001450517836593786, "loss": 0.6154, "step": 95500 }, { "epoch": 27.47698504027618, "grad_norm": 0.9408077001571655, "learning_rate": 0.0014504602991944765, "loss": 0.5567, "step": 95510 }, { "epoch": 27.479861910241656, "grad_norm": 1.0419973134994507, "learning_rate": 0.001450402761795167, "loss": 0.8972, "step": 95520 }, { "epoch": 27.482738780207136, "grad_norm": 1.7308313846588135, "learning_rate": 0.0014503452243958572, "loss": 0.6811, "step": 95530 }, { "epoch": 27.485615650172612, "grad_norm": 1.9619758129119873, "learning_rate": 0.0014502876869965477, "loss": 0.7777, "step": 95540 }, { "epoch": 27.488492520138088, "grad_norm": 1.5955973863601685, "learning_rate": 0.0014502301495972383, "loss": 0.6943, "step": 95550 }, { "epoch": 27.491369390103568, "grad_norm": 0.6428900957107544, "learning_rate": 0.0014501726121979286, "loss": 0.5104, "step": 95560 }, { "epoch": 27.494246260069044, "grad_norm": 1.1974704265594482, "learning_rate": 0.0014501150747986192, "loss": 0.5586, "step": 95570 }, { "epoch": 27.497123130034524, "grad_norm": 0.716313898563385, "learning_rate": 0.0014500575373993098, "loss": 0.5114, "step": 95580 }, { "epoch": 27.5, "grad_norm": 1.6206694841384888, "learning_rate": 0.00145, "loss": 0.7189, "step": 95590 }, { "epoch": 27.502876869965476, "grad_norm": 1.7535516023635864, "learning_rate": 0.0014499424626006905, "loss": 0.7183, "step": 95600 }, { "epoch": 27.505753739930956, "grad_norm": 1.0888582468032837, "learning_rate": 0.0014498849252013808, "loss": 0.6457, "step": 95610 }, { "epoch": 27.508630609896432, "grad_norm": 1.6906182765960693, "learning_rate": 0.0014498273878020714, "loss": 0.5834, "step": 95620 }, { "epoch": 27.511507479861912, "grad_norm": 0.6944978833198547, "learning_rate": 0.001449769850402762, "loss": 0.8006, "step": 95630 }, { "epoch": 27.514384349827388, "grad_norm": 1.181705117225647, "learning_rate": 0.0014497123130034523, "loss": 0.7215, "step": 95640 }, { "epoch": 27.517261219792864, "grad_norm": 1.1403110027313232, "learning_rate": 0.0014496547756041426, "loss": 0.7235, "step": 95650 }, { "epoch": 27.520138089758344, "grad_norm": 1.0192129611968994, "learning_rate": 0.0014495972382048332, "loss": 0.714, "step": 95660 }, { "epoch": 27.52301495972382, "grad_norm": 1.0410720109939575, "learning_rate": 0.0014495397008055235, "loss": 0.7919, "step": 95670 }, { "epoch": 27.525891829689296, "grad_norm": 1.7011340856552124, "learning_rate": 0.001449482163406214, "loss": 0.8763, "step": 95680 }, { "epoch": 27.528768699654776, "grad_norm": 1.1861457824707031, "learning_rate": 0.0014494246260069047, "loss": 0.6394, "step": 95690 }, { "epoch": 27.531645569620252, "grad_norm": 1.7864596843719482, "learning_rate": 0.001449367088607595, "loss": 0.5425, "step": 95700 }, { "epoch": 27.534522439585732, "grad_norm": 1.1965975761413574, "learning_rate": 0.0014493095512082854, "loss": 0.525, "step": 95710 }, { "epoch": 27.53739930955121, "grad_norm": 1.223996877670288, "learning_rate": 0.0014492520138089757, "loss": 0.7315, "step": 95720 }, { "epoch": 27.540276179516685, "grad_norm": 1.461185336112976, "learning_rate": 0.0014491944764096663, "loss": 0.567, "step": 95730 }, { "epoch": 27.543153049482164, "grad_norm": 1.326875925064087, "learning_rate": 0.0014491369390103568, "loss": 0.5322, "step": 95740 }, { "epoch": 27.54602991944764, "grad_norm": 0.9277008771896362, "learning_rate": 0.0014490794016110472, "loss": 0.5827, "step": 95750 }, { "epoch": 27.548906789413117, "grad_norm": 1.615224838256836, "learning_rate": 0.0014490218642117378, "loss": 0.6295, "step": 95760 }, { "epoch": 27.551783659378597, "grad_norm": 3.602755546569824, "learning_rate": 0.001448964326812428, "loss": 0.7912, "step": 95770 }, { "epoch": 27.554660529344073, "grad_norm": 1.1395905017852783, "learning_rate": 0.0014489067894131184, "loss": 0.5815, "step": 95780 }, { "epoch": 27.557537399309552, "grad_norm": 1.059303641319275, "learning_rate": 0.001448849252013809, "loss": 0.5201, "step": 95790 }, { "epoch": 27.56041426927503, "grad_norm": 1.111070990562439, "learning_rate": 0.0014487917146144996, "loss": 0.6231, "step": 95800 }, { "epoch": 27.563291139240505, "grad_norm": 0.4922904968261719, "learning_rate": 0.00144873417721519, "loss": 0.6614, "step": 95810 }, { "epoch": 27.566168009205985, "grad_norm": 1.4324800968170166, "learning_rate": 0.0014486766398158805, "loss": 0.8392, "step": 95820 }, { "epoch": 27.56904487917146, "grad_norm": 2.2808165550231934, "learning_rate": 0.0014486191024165708, "loss": 0.9966, "step": 95830 }, { "epoch": 27.57192174913694, "grad_norm": 1.3295252323150635, "learning_rate": 0.0014485615650172612, "loss": 0.8673, "step": 95840 }, { "epoch": 27.574798619102417, "grad_norm": 1.186360478401184, "learning_rate": 0.0014485040276179517, "loss": 0.6061, "step": 95850 }, { "epoch": 27.577675489067893, "grad_norm": 0.8526051044464111, "learning_rate": 0.001448446490218642, "loss": 0.7145, "step": 95860 }, { "epoch": 27.580552359033373, "grad_norm": 2.110058307647705, "learning_rate": 0.0014483889528193327, "loss": 0.6316, "step": 95870 }, { "epoch": 27.58342922899885, "grad_norm": 1.7547425031661987, "learning_rate": 0.0014483314154200232, "loss": 0.6814, "step": 95880 }, { "epoch": 27.586306098964325, "grad_norm": 1.2521367073059082, "learning_rate": 0.0014482738780207133, "loss": 0.7027, "step": 95890 }, { "epoch": 27.589182968929805, "grad_norm": 1.307438611984253, "learning_rate": 0.001448216340621404, "loss": 0.5517, "step": 95900 }, { "epoch": 27.59205983889528, "grad_norm": 1.959604024887085, "learning_rate": 0.0014481588032220945, "loss": 0.7047, "step": 95910 }, { "epoch": 27.59493670886076, "grad_norm": 1.6325788497924805, "learning_rate": 0.0014481012658227848, "loss": 0.8344, "step": 95920 }, { "epoch": 27.597813578826237, "grad_norm": 0.6496118307113647, "learning_rate": 0.0014480437284234754, "loss": 0.6505, "step": 95930 }, { "epoch": 27.600690448791713, "grad_norm": 1.3318487405776978, "learning_rate": 0.001447986191024166, "loss": 0.7189, "step": 95940 }, { "epoch": 27.603567318757193, "grad_norm": 1.4753657579421997, "learning_rate": 0.001447928653624856, "loss": 0.7307, "step": 95950 }, { "epoch": 27.60644418872267, "grad_norm": 0.782086193561554, "learning_rate": 0.0014478711162255466, "loss": 0.6467, "step": 95960 }, { "epoch": 27.60932105868815, "grad_norm": 1.1242473125457764, "learning_rate": 0.001447813578826237, "loss": 0.5783, "step": 95970 }, { "epoch": 27.612197928653625, "grad_norm": 1.4599204063415527, "learning_rate": 0.0014477560414269276, "loss": 0.7051, "step": 95980 }, { "epoch": 27.6150747986191, "grad_norm": 1.2226790189743042, "learning_rate": 0.0014476985040276181, "loss": 0.7206, "step": 95990 }, { "epoch": 27.61795166858458, "grad_norm": 0.8639785051345825, "learning_rate": 0.0014476409666283083, "loss": 0.6493, "step": 96000 }, { "epoch": 27.620828538550057, "grad_norm": 1.370250940322876, "learning_rate": 0.0014475834292289988, "loss": 0.7216, "step": 96010 }, { "epoch": 27.623705408515534, "grad_norm": 1.114710807800293, "learning_rate": 0.0014475258918296894, "loss": 0.609, "step": 96020 }, { "epoch": 27.626582278481013, "grad_norm": 2.0731136798858643, "learning_rate": 0.0014474683544303797, "loss": 0.7219, "step": 96030 }, { "epoch": 27.62945914844649, "grad_norm": 1.8129172325134277, "learning_rate": 0.0014474108170310703, "loss": 0.8444, "step": 96040 }, { "epoch": 27.63233601841197, "grad_norm": 1.2060985565185547, "learning_rate": 0.0014473532796317609, "loss": 0.7706, "step": 96050 }, { "epoch": 27.635212888377445, "grad_norm": 1.2483210563659668, "learning_rate": 0.001447295742232451, "loss": 0.6968, "step": 96060 }, { "epoch": 27.63808975834292, "grad_norm": 1.1809399127960205, "learning_rate": 0.0014472382048331415, "loss": 0.6174, "step": 96070 }, { "epoch": 27.6409666283084, "grad_norm": 1.5387221574783325, "learning_rate": 0.001447180667433832, "loss": 0.6188, "step": 96080 }, { "epoch": 27.643843498273878, "grad_norm": 1.3097314834594727, "learning_rate": 0.0014471231300345225, "loss": 0.7534, "step": 96090 }, { "epoch": 27.646720368239357, "grad_norm": 2.0895838737487793, "learning_rate": 0.001447065592635213, "loss": 0.5997, "step": 96100 }, { "epoch": 27.649597238204834, "grad_norm": 2.5592336654663086, "learning_rate": 0.0014470080552359034, "loss": 0.8228, "step": 96110 }, { "epoch": 27.65247410817031, "grad_norm": 1.086512804031372, "learning_rate": 0.0014469505178365937, "loss": 0.7354, "step": 96120 }, { "epoch": 27.65535097813579, "grad_norm": 1.9733734130859375, "learning_rate": 0.0014468929804372843, "loss": 0.7609, "step": 96130 }, { "epoch": 27.658227848101266, "grad_norm": 1.0401601791381836, "learning_rate": 0.0014468354430379746, "loss": 0.7125, "step": 96140 }, { "epoch": 27.661104718066742, "grad_norm": 1.116300106048584, "learning_rate": 0.0014467779056386652, "loss": 0.6843, "step": 96150 }, { "epoch": 27.66398158803222, "grad_norm": 1.0188889503479004, "learning_rate": 0.0014467203682393558, "loss": 0.6685, "step": 96160 }, { "epoch": 27.666858457997698, "grad_norm": 2.5497183799743652, "learning_rate": 0.001446662830840046, "loss": 0.6318, "step": 96170 }, { "epoch": 27.669735327963178, "grad_norm": 2.593613862991333, "learning_rate": 0.0014466052934407364, "loss": 0.7175, "step": 96180 }, { "epoch": 27.672612197928654, "grad_norm": 1.1955018043518066, "learning_rate": 0.0014465477560414268, "loss": 0.6215, "step": 96190 }, { "epoch": 27.67548906789413, "grad_norm": 2.1091461181640625, "learning_rate": 0.0014464902186421174, "loss": 0.766, "step": 96200 }, { "epoch": 27.67836593785961, "grad_norm": 1.0003278255462646, "learning_rate": 0.001446432681242808, "loss": 0.6697, "step": 96210 }, { "epoch": 27.681242807825086, "grad_norm": 0.7421731352806091, "learning_rate": 0.0014463751438434983, "loss": 0.6731, "step": 96220 }, { "epoch": 27.684119677790562, "grad_norm": 0.8544963002204895, "learning_rate": 0.0014463176064441888, "loss": 0.6269, "step": 96230 }, { "epoch": 27.686996547756042, "grad_norm": 1.304474949836731, "learning_rate": 0.0014462600690448792, "loss": 0.8492, "step": 96240 }, { "epoch": 27.689873417721518, "grad_norm": 0.7553572654724121, "learning_rate": 0.0014462025316455695, "loss": 0.5612, "step": 96250 }, { "epoch": 27.692750287686998, "grad_norm": 1.06884765625, "learning_rate": 0.00144614499424626, "loss": 0.7234, "step": 96260 }, { "epoch": 27.695627157652474, "grad_norm": 1.1634546518325806, "learning_rate": 0.0014460874568469507, "loss": 0.7061, "step": 96270 }, { "epoch": 27.69850402761795, "grad_norm": 1.1014761924743652, "learning_rate": 0.001446029919447641, "loss": 0.7327, "step": 96280 }, { "epoch": 27.70138089758343, "grad_norm": 0.9198920130729675, "learning_rate": 0.0014459723820483316, "loss": 0.644, "step": 96290 }, { "epoch": 27.704257767548906, "grad_norm": 1.6123223304748535, "learning_rate": 0.0014459148446490217, "loss": 0.867, "step": 96300 }, { "epoch": 27.707134637514386, "grad_norm": 0.8733587265014648, "learning_rate": 0.0014458573072497123, "loss": 0.6835, "step": 96310 }, { "epoch": 27.710011507479862, "grad_norm": 1.13865065574646, "learning_rate": 0.0014457997698504028, "loss": 0.6718, "step": 96320 }, { "epoch": 27.71288837744534, "grad_norm": 1.4352384805679321, "learning_rate": 0.0014457422324510932, "loss": 0.7389, "step": 96330 }, { "epoch": 27.71576524741082, "grad_norm": 1.8784735202789307, "learning_rate": 0.0014456846950517837, "loss": 0.7331, "step": 96340 }, { "epoch": 27.718642117376294, "grad_norm": 2.6760804653167725, "learning_rate": 0.0014456271576524743, "loss": 0.8725, "step": 96350 }, { "epoch": 27.72151898734177, "grad_norm": 1.453492522239685, "learning_rate": 0.0014455696202531644, "loss": 0.7848, "step": 96360 }, { "epoch": 27.72439585730725, "grad_norm": 1.5492421388626099, "learning_rate": 0.001445512082853855, "loss": 0.673, "step": 96370 }, { "epoch": 27.727272727272727, "grad_norm": 1.5578283071517944, "learning_rate": 0.0014454545454545456, "loss": 0.7787, "step": 96380 }, { "epoch": 27.730149597238206, "grad_norm": 0.8549133539199829, "learning_rate": 0.001445397008055236, "loss": 0.8307, "step": 96390 }, { "epoch": 27.733026467203683, "grad_norm": 1.1049787998199463, "learning_rate": 0.0014453394706559265, "loss": 0.717, "step": 96400 }, { "epoch": 27.73590333716916, "grad_norm": 1.270400047302246, "learning_rate": 0.0014452819332566168, "loss": 0.7052, "step": 96410 }, { "epoch": 27.73878020713464, "grad_norm": 2.3203976154327393, "learning_rate": 0.0014452243958573072, "loss": 0.6187, "step": 96420 }, { "epoch": 27.741657077100115, "grad_norm": 0.7860099077224731, "learning_rate": 0.0014451668584579977, "loss": 0.6511, "step": 96430 }, { "epoch": 27.74453394706559, "grad_norm": 1.0354557037353516, "learning_rate": 0.001445109321058688, "loss": 0.6747, "step": 96440 }, { "epoch": 27.74741081703107, "grad_norm": 3.064876079559326, "learning_rate": 0.0014450517836593786, "loss": 0.7287, "step": 96450 }, { "epoch": 27.750287686996547, "grad_norm": 0.740741491317749, "learning_rate": 0.0014449942462600692, "loss": 0.4755, "step": 96460 }, { "epoch": 27.753164556962027, "grad_norm": 1.0417773723602295, "learning_rate": 0.0014449367088607596, "loss": 0.7109, "step": 96470 }, { "epoch": 27.756041426927503, "grad_norm": 2.4960107803344727, "learning_rate": 0.00144487917146145, "loss": 0.8601, "step": 96480 }, { "epoch": 27.75891829689298, "grad_norm": 1.6876517534255981, "learning_rate": 0.0014448216340621405, "loss": 0.7589, "step": 96490 }, { "epoch": 27.76179516685846, "grad_norm": 1.1424680948257446, "learning_rate": 0.0014447640966628308, "loss": 0.5877, "step": 96500 }, { "epoch": 27.764672036823935, "grad_norm": 1.1163771152496338, "learning_rate": 0.0014447065592635214, "loss": 0.7308, "step": 96510 }, { "epoch": 27.767548906789415, "grad_norm": 1.9586247205734253, "learning_rate": 0.001444649021864212, "loss": 0.716, "step": 96520 }, { "epoch": 27.77042577675489, "grad_norm": 0.8138300180435181, "learning_rate": 0.0014445914844649023, "loss": 0.7805, "step": 96530 }, { "epoch": 27.773302646720367, "grad_norm": 1.3783929347991943, "learning_rate": 0.0014445339470655926, "loss": 0.7406, "step": 96540 }, { "epoch": 27.776179516685847, "grad_norm": 1.4725453853607178, "learning_rate": 0.001444476409666283, "loss": 0.6246, "step": 96550 }, { "epoch": 27.779056386651323, "grad_norm": 1.422518253326416, "learning_rate": 0.0014444188722669735, "loss": 0.6941, "step": 96560 }, { "epoch": 27.7819332566168, "grad_norm": 2.2797656059265137, "learning_rate": 0.001444361334867664, "loss": 0.7458, "step": 96570 }, { "epoch": 27.78481012658228, "grad_norm": 1.4273895025253296, "learning_rate": 0.0014443037974683545, "loss": 0.733, "step": 96580 }, { "epoch": 27.787686996547755, "grad_norm": 3.597693920135498, "learning_rate": 0.001444246260069045, "loss": 0.7469, "step": 96590 }, { "epoch": 27.790563866513235, "grad_norm": 1.9985769987106323, "learning_rate": 0.0014441887226697354, "loss": 0.6774, "step": 96600 }, { "epoch": 27.79344073647871, "grad_norm": 1.9126230478286743, "learning_rate": 0.0014441311852704257, "loss": 0.8766, "step": 96610 }, { "epoch": 27.796317606444187, "grad_norm": 1.2396258115768433, "learning_rate": 0.0014440736478711163, "loss": 0.7306, "step": 96620 }, { "epoch": 27.799194476409667, "grad_norm": 1.565501093864441, "learning_rate": 0.0014440161104718068, "loss": 0.8123, "step": 96630 }, { "epoch": 27.802071346375143, "grad_norm": 0.840950071811676, "learning_rate": 0.0014439585730724972, "loss": 0.6997, "step": 96640 }, { "epoch": 27.80494821634062, "grad_norm": 1.0023558139801025, "learning_rate": 0.0014439010356731877, "loss": 0.5904, "step": 96650 }, { "epoch": 27.8078250863061, "grad_norm": 2.173497200012207, "learning_rate": 0.0014438434982738779, "loss": 0.5738, "step": 96660 }, { "epoch": 27.810701956271576, "grad_norm": 1.1939634084701538, "learning_rate": 0.0014437859608745684, "loss": 0.5173, "step": 96670 }, { "epoch": 27.813578826237055, "grad_norm": 2.200847864151001, "learning_rate": 0.001443728423475259, "loss": 0.6329, "step": 96680 }, { "epoch": 27.81645569620253, "grad_norm": 1.2204957008361816, "learning_rate": 0.0014436708860759494, "loss": 0.6038, "step": 96690 }, { "epoch": 27.819332566168008, "grad_norm": 0.8542926907539368, "learning_rate": 0.00144361334867664, "loss": 0.7419, "step": 96700 }, { "epoch": 27.822209436133488, "grad_norm": 1.0109127759933472, "learning_rate": 0.0014435558112773305, "loss": 0.6857, "step": 96710 }, { "epoch": 27.825086306098964, "grad_norm": 0.924102246761322, "learning_rate": 0.0014434982738780206, "loss": 0.695, "step": 96720 }, { "epoch": 27.827963176064443, "grad_norm": 2.2320706844329834, "learning_rate": 0.0014434407364787112, "loss": 0.7809, "step": 96730 }, { "epoch": 27.83084004602992, "grad_norm": 1.7057737112045288, "learning_rate": 0.0014433831990794017, "loss": 0.5732, "step": 96740 }, { "epoch": 27.833716915995396, "grad_norm": 1.026647686958313, "learning_rate": 0.001443325661680092, "loss": 0.8486, "step": 96750 }, { "epoch": 27.836593785960876, "grad_norm": 1.9797977209091187, "learning_rate": 0.0014432681242807827, "loss": 0.6529, "step": 96760 }, { "epoch": 27.839470655926352, "grad_norm": 1.8118788003921509, "learning_rate": 0.0014432105868814728, "loss": 0.654, "step": 96770 }, { "epoch": 27.842347525891828, "grad_norm": 2.616838216781616, "learning_rate": 0.0014431530494821633, "loss": 0.7649, "step": 96780 }, { "epoch": 27.845224395857308, "grad_norm": 1.5769442319869995, "learning_rate": 0.001443095512082854, "loss": 0.7692, "step": 96790 }, { "epoch": 27.848101265822784, "grad_norm": 2.150367498397827, "learning_rate": 0.0014430379746835443, "loss": 0.8271, "step": 96800 }, { "epoch": 27.850978135788264, "grad_norm": 0.634518563747406, "learning_rate": 0.0014429804372842348, "loss": 0.452, "step": 96810 }, { "epoch": 27.85385500575374, "grad_norm": 1.3104467391967773, "learning_rate": 0.0014429228998849254, "loss": 0.5609, "step": 96820 }, { "epoch": 27.856731875719216, "grad_norm": 1.4565532207489014, "learning_rate": 0.0014428653624856155, "loss": 0.4916, "step": 96830 }, { "epoch": 27.859608745684696, "grad_norm": 1.4469138383865356, "learning_rate": 0.001442807825086306, "loss": 0.8066, "step": 96840 }, { "epoch": 27.862485615650172, "grad_norm": 1.4676063060760498, "learning_rate": 0.0014427502876869966, "loss": 0.8119, "step": 96850 }, { "epoch": 27.865362485615652, "grad_norm": 0.9995054602622986, "learning_rate": 0.001442692750287687, "loss": 0.6253, "step": 96860 }, { "epoch": 27.868239355581128, "grad_norm": 1.1174333095550537, "learning_rate": 0.0014426352128883776, "loss": 0.6085, "step": 96870 }, { "epoch": 27.871116225546604, "grad_norm": 1.2660268545150757, "learning_rate": 0.001442577675489068, "loss": 0.6077, "step": 96880 }, { "epoch": 27.873993095512084, "grad_norm": 0.990644097328186, "learning_rate": 0.0014425201380897582, "loss": 0.7002, "step": 96890 }, { "epoch": 27.87686996547756, "grad_norm": 1.1996442079544067, "learning_rate": 0.0014424626006904488, "loss": 0.6411, "step": 96900 }, { "epoch": 27.879746835443036, "grad_norm": 1.6863807439804077, "learning_rate": 0.0014424050632911392, "loss": 0.5627, "step": 96910 }, { "epoch": 27.882623705408516, "grad_norm": 3.3692421913146973, "learning_rate": 0.0014423475258918297, "loss": 0.7451, "step": 96920 }, { "epoch": 27.885500575373992, "grad_norm": 0.9527097344398499, "learning_rate": 0.0014422899884925203, "loss": 0.6145, "step": 96930 }, { "epoch": 27.888377445339472, "grad_norm": 1.5514044761657715, "learning_rate": 0.0014422324510932106, "loss": 0.6106, "step": 96940 }, { "epoch": 27.89125431530495, "grad_norm": 1.56913423538208, "learning_rate": 0.001442174913693901, "loss": 0.812, "step": 96950 }, { "epoch": 27.894131185270425, "grad_norm": 1.9430761337280273, "learning_rate": 0.0014421173762945915, "loss": 0.7291, "step": 96960 }, { "epoch": 27.897008055235904, "grad_norm": 0.9690058827400208, "learning_rate": 0.001442059838895282, "loss": 0.6443, "step": 96970 }, { "epoch": 27.89988492520138, "grad_norm": 1.3823997974395752, "learning_rate": 0.0014420023014959725, "loss": 0.6535, "step": 96980 }, { "epoch": 27.90276179516686, "grad_norm": 1.437643051147461, "learning_rate": 0.0014419447640966628, "loss": 0.6941, "step": 96990 }, { "epoch": 27.905638665132336, "grad_norm": 1.3559435606002808, "learning_rate": 0.0014418872266973534, "loss": 0.6245, "step": 97000 }, { "epoch": 27.908515535097813, "grad_norm": 1.4793291091918945, "learning_rate": 0.0014418296892980437, "loss": 0.8406, "step": 97010 }, { "epoch": 27.911392405063292, "grad_norm": 0.7732558250427246, "learning_rate": 0.001441772151898734, "loss": 0.7857, "step": 97020 }, { "epoch": 27.91426927502877, "grad_norm": 1.3482942581176758, "learning_rate": 0.0014417146144994246, "loss": 0.5988, "step": 97030 }, { "epoch": 27.917146144994245, "grad_norm": 1.0565927028656006, "learning_rate": 0.0014416570771001152, "loss": 0.7144, "step": 97040 }, { "epoch": 27.920023014959725, "grad_norm": 0.9478973746299744, "learning_rate": 0.0014415995397008055, "loss": 0.7129, "step": 97050 }, { "epoch": 27.9228998849252, "grad_norm": 1.5035786628723145, "learning_rate": 0.001441542002301496, "loss": 0.6398, "step": 97060 }, { "epoch": 27.92577675489068, "grad_norm": 2.0482046604156494, "learning_rate": 0.0014414844649021864, "loss": 0.7204, "step": 97070 }, { "epoch": 27.928653624856157, "grad_norm": 1.7349302768707275, "learning_rate": 0.0014414269275028768, "loss": 0.717, "step": 97080 }, { "epoch": 27.931530494821633, "grad_norm": 0.9734586477279663, "learning_rate": 0.0014413693901035674, "loss": 1.0253, "step": 97090 }, { "epoch": 27.934407364787113, "grad_norm": 1.0016969442367554, "learning_rate": 0.0014413118527042577, "loss": 0.5938, "step": 97100 }, { "epoch": 27.93728423475259, "grad_norm": 1.834415316581726, "learning_rate": 0.0014412543153049483, "loss": 0.7248, "step": 97110 }, { "epoch": 27.940161104718065, "grad_norm": 2.016735792160034, "learning_rate": 0.0014411967779056388, "loss": 0.7405, "step": 97120 }, { "epoch": 27.943037974683545, "grad_norm": 2.3032748699188232, "learning_rate": 0.001441139240506329, "loss": 0.7346, "step": 97130 }, { "epoch": 27.94591484464902, "grad_norm": 1.607072114944458, "learning_rate": 0.0014410817031070195, "loss": 0.8153, "step": 97140 }, { "epoch": 27.9487917146145, "grad_norm": 1.7024438381195068, "learning_rate": 0.00144102416570771, "loss": 0.7368, "step": 97150 }, { "epoch": 27.951668584579977, "grad_norm": 0.797687292098999, "learning_rate": 0.0014409666283084004, "loss": 0.6292, "step": 97160 }, { "epoch": 27.954545454545453, "grad_norm": 1.5145604610443115, "learning_rate": 0.001440909090909091, "loss": 0.7818, "step": 97170 }, { "epoch": 27.957422324510933, "grad_norm": 1.5744107961654663, "learning_rate": 0.0014408515535097816, "loss": 0.7438, "step": 97180 }, { "epoch": 27.96029919447641, "grad_norm": 2.523552179336548, "learning_rate": 0.0014407940161104717, "loss": 0.6851, "step": 97190 }, { "epoch": 27.96317606444189, "grad_norm": 1.9552079439163208, "learning_rate": 0.0014407364787111623, "loss": 0.7838, "step": 97200 }, { "epoch": 27.966052934407365, "grad_norm": 2.3090574741363525, "learning_rate": 0.0014406789413118528, "loss": 0.9225, "step": 97210 }, { "epoch": 27.96892980437284, "grad_norm": 2.1354782581329346, "learning_rate": 0.0014406214039125432, "loss": 0.8005, "step": 97220 }, { "epoch": 27.97180667433832, "grad_norm": 1.8582384586334229, "learning_rate": 0.0014405638665132337, "loss": 0.7497, "step": 97230 }, { "epoch": 27.974683544303797, "grad_norm": 1.5105515718460083, "learning_rate": 0.001440506329113924, "loss": 0.5978, "step": 97240 }, { "epoch": 27.977560414269274, "grad_norm": 1.1895561218261719, "learning_rate": 0.0014404487917146144, "loss": 0.7759, "step": 97250 }, { "epoch": 27.980437284234753, "grad_norm": 0.7734619975090027, "learning_rate": 0.001440391254315305, "loss": 0.6249, "step": 97260 }, { "epoch": 27.98331415420023, "grad_norm": 1.767866849899292, "learning_rate": 0.0014403337169159953, "loss": 0.7495, "step": 97270 }, { "epoch": 27.98619102416571, "grad_norm": 0.8517055511474609, "learning_rate": 0.001440276179516686, "loss": 0.7416, "step": 97280 }, { "epoch": 27.989067894131185, "grad_norm": 1.0940243005752563, "learning_rate": 0.0014402186421173765, "loss": 0.7453, "step": 97290 }, { "epoch": 27.99194476409666, "grad_norm": 2.2657744884490967, "learning_rate": 0.0014401611047180668, "loss": 0.9389, "step": 97300 }, { "epoch": 27.99482163406214, "grad_norm": 1.6365020275115967, "learning_rate": 0.0014401035673187572, "loss": 0.7259, "step": 97310 }, { "epoch": 27.997698504027618, "grad_norm": 1.291898250579834, "learning_rate": 0.0014400460299194477, "loss": 0.7236, "step": 97320 }, { "epoch": 28.000575373993094, "grad_norm": 1.511910319328308, "learning_rate": 0.001439988492520138, "loss": 0.8835, "step": 97330 }, { "epoch": 28.003452243958574, "grad_norm": 1.4294387102127075, "learning_rate": 0.0014399309551208286, "loss": 0.7027, "step": 97340 }, { "epoch": 28.00632911392405, "grad_norm": 1.2089269161224365, "learning_rate": 0.001439873417721519, "loss": 0.7069, "step": 97350 }, { "epoch": 28.00920598388953, "grad_norm": 2.1111855506896973, "learning_rate": 0.0014398158803222095, "loss": 0.6501, "step": 97360 }, { "epoch": 28.012082853855006, "grad_norm": 1.6064276695251465, "learning_rate": 0.0014397583429229, "loss": 0.7118, "step": 97370 }, { "epoch": 28.014959723820482, "grad_norm": 1.3805758953094482, "learning_rate": 0.0014397008055235902, "loss": 0.6132, "step": 97380 }, { "epoch": 28.01783659378596, "grad_norm": 1.1136834621429443, "learning_rate": 0.0014396432681242808, "loss": 0.557, "step": 97390 }, { "epoch": 28.020713463751438, "grad_norm": 1.0009377002716064, "learning_rate": 0.0014395857307249714, "loss": 0.6865, "step": 97400 }, { "epoch": 28.023590333716918, "grad_norm": 1.4698728322982788, "learning_rate": 0.0014395281933256617, "loss": 0.7932, "step": 97410 }, { "epoch": 28.026467203682394, "grad_norm": 1.0051692724227905, "learning_rate": 0.0014394706559263523, "loss": 0.6909, "step": 97420 }, { "epoch": 28.02934407364787, "grad_norm": 1.873909592628479, "learning_rate": 0.0014394131185270426, "loss": 0.5997, "step": 97430 }, { "epoch": 28.03222094361335, "grad_norm": 1.8142043352127075, "learning_rate": 0.001439355581127733, "loss": 0.8037, "step": 97440 }, { "epoch": 28.035097813578826, "grad_norm": 1.3153867721557617, "learning_rate": 0.0014392980437284235, "loss": 0.7449, "step": 97450 }, { "epoch": 28.037974683544302, "grad_norm": 1.5305043458938599, "learning_rate": 0.0014392405063291139, "loss": 0.7427, "step": 97460 }, { "epoch": 28.040851553509782, "grad_norm": 1.0144333839416504, "learning_rate": 0.0014391829689298045, "loss": 0.6517, "step": 97470 }, { "epoch": 28.043728423475258, "grad_norm": 1.2833282947540283, "learning_rate": 0.001439125431530495, "loss": 0.6753, "step": 97480 }, { "epoch": 28.046605293440738, "grad_norm": 0.9613077044487, "learning_rate": 0.0014390678941311851, "loss": 0.5328, "step": 97490 }, { "epoch": 28.049482163406214, "grad_norm": 2.3539037704467773, "learning_rate": 0.0014390103567318757, "loss": 0.701, "step": 97500 }, { "epoch": 28.05235903337169, "grad_norm": 1.3868154287338257, "learning_rate": 0.0014389528193325663, "loss": 0.7744, "step": 97510 }, { "epoch": 28.05523590333717, "grad_norm": 1.5554585456848145, "learning_rate": 0.0014388952819332566, "loss": 0.5734, "step": 97520 }, { "epoch": 28.058112773302646, "grad_norm": 1.199549913406372, "learning_rate": 0.0014388377445339472, "loss": 0.6953, "step": 97530 }, { "epoch": 28.060989643268123, "grad_norm": 1.0151574611663818, "learning_rate": 0.0014387802071346377, "loss": 0.768, "step": 97540 }, { "epoch": 28.063866513233602, "grad_norm": 1.1557430028915405, "learning_rate": 0.0014387226697353279, "loss": 0.6664, "step": 97550 }, { "epoch": 28.06674338319908, "grad_norm": 1.5405027866363525, "learning_rate": 0.0014386651323360184, "loss": 0.6775, "step": 97560 }, { "epoch": 28.069620253164558, "grad_norm": 2.1850757598876953, "learning_rate": 0.0014386075949367088, "loss": 0.7737, "step": 97570 }, { "epoch": 28.072497123130034, "grad_norm": 1.1087802648544312, "learning_rate": 0.0014385500575373994, "loss": 0.7269, "step": 97580 }, { "epoch": 28.07537399309551, "grad_norm": 1.5149885416030884, "learning_rate": 0.00143849252013809, "loss": 0.6209, "step": 97590 }, { "epoch": 28.07825086306099, "grad_norm": 0.9795422554016113, "learning_rate": 0.00143843498273878, "loss": 0.7852, "step": 97600 }, { "epoch": 28.081127733026467, "grad_norm": 1.5914870500564575, "learning_rate": 0.0014383774453394706, "loss": 0.6986, "step": 97610 }, { "epoch": 28.084004602991946, "grad_norm": 2.0038046836853027, "learning_rate": 0.0014383199079401612, "loss": 0.8212, "step": 97620 }, { "epoch": 28.086881472957423, "grad_norm": 1.1892560720443726, "learning_rate": 0.0014382623705408515, "loss": 0.6439, "step": 97630 }, { "epoch": 28.0897583429229, "grad_norm": 1.0149086713790894, "learning_rate": 0.001438204833141542, "loss": 0.6667, "step": 97640 }, { "epoch": 28.09263521288838, "grad_norm": 0.8279122710227966, "learning_rate": 0.0014381472957422326, "loss": 0.6575, "step": 97650 }, { "epoch": 28.095512082853855, "grad_norm": 0.7821322679519653, "learning_rate": 0.0014380897583429228, "loss": 0.7929, "step": 97660 }, { "epoch": 28.09838895281933, "grad_norm": 1.0301743745803833, "learning_rate": 0.0014380322209436133, "loss": 0.6084, "step": 97670 }, { "epoch": 28.10126582278481, "grad_norm": 1.2463939189910889, "learning_rate": 0.0014379746835443037, "loss": 0.6333, "step": 97680 }, { "epoch": 28.104142692750287, "grad_norm": 0.9776997566223145, "learning_rate": 0.0014379171461449943, "loss": 0.6307, "step": 97690 }, { "epoch": 28.107019562715767, "grad_norm": 0.7751217484474182, "learning_rate": 0.0014378596087456848, "loss": 0.6366, "step": 97700 }, { "epoch": 28.109896432681243, "grad_norm": 2.211493968963623, "learning_rate": 0.0014378020713463752, "loss": 0.5605, "step": 97710 }, { "epoch": 28.11277330264672, "grad_norm": 1.4785486459732056, "learning_rate": 0.0014377445339470655, "loss": 0.596, "step": 97720 }, { "epoch": 28.1156501726122, "grad_norm": 1.3834582567214966, "learning_rate": 0.001437686996547756, "loss": 0.7262, "step": 97730 }, { "epoch": 28.118527042577675, "grad_norm": 1.1177774667739868, "learning_rate": 0.0014376294591484464, "loss": 0.6424, "step": 97740 }, { "epoch": 28.121403912543155, "grad_norm": 0.9352301955223083, "learning_rate": 0.001437571921749137, "loss": 0.6368, "step": 97750 }, { "epoch": 28.12428078250863, "grad_norm": 1.402906894683838, "learning_rate": 0.0014375143843498276, "loss": 0.5331, "step": 97760 }, { "epoch": 28.127157652474107, "grad_norm": 2.3335120677948, "learning_rate": 0.001437456846950518, "loss": 0.5854, "step": 97770 }, { "epoch": 28.130034522439587, "grad_norm": 1.5137008428573608, "learning_rate": 0.0014373993095512082, "loss": 0.5926, "step": 97780 }, { "epoch": 28.132911392405063, "grad_norm": 1.0502378940582275, "learning_rate": 0.0014373417721518988, "loss": 0.6812, "step": 97790 }, { "epoch": 28.13578826237054, "grad_norm": 1.896369457244873, "learning_rate": 0.0014372842347525892, "loss": 0.8535, "step": 97800 }, { "epoch": 28.13866513233602, "grad_norm": 1.709564208984375, "learning_rate": 0.0014372266973532797, "loss": 0.5901, "step": 97810 }, { "epoch": 28.141542002301495, "grad_norm": 1.395209550857544, "learning_rate": 0.00143716915995397, "loss": 0.6188, "step": 97820 }, { "epoch": 28.144418872266975, "grad_norm": 1.0691895484924316, "learning_rate": 0.0014371116225546606, "loss": 0.7208, "step": 97830 }, { "epoch": 28.14729574223245, "grad_norm": 1.306748390197754, "learning_rate": 0.001437054085155351, "loss": 0.7367, "step": 97840 }, { "epoch": 28.150172612197927, "grad_norm": 1.6381433010101318, "learning_rate": 0.0014369965477560413, "loss": 0.5788, "step": 97850 }, { "epoch": 28.153049482163407, "grad_norm": 1.8179728984832764, "learning_rate": 0.0014369390103567319, "loss": 0.648, "step": 97860 }, { "epoch": 28.155926352128883, "grad_norm": 1.6217199563980103, "learning_rate": 0.0014368814729574225, "loss": 0.854, "step": 97870 }, { "epoch": 28.15880322209436, "grad_norm": 0.7877699136734009, "learning_rate": 0.0014368239355581128, "loss": 0.6139, "step": 97880 }, { "epoch": 28.16168009205984, "grad_norm": 2.4069056510925293, "learning_rate": 0.0014367663981588034, "loss": 0.7513, "step": 97890 }, { "epoch": 28.164556962025316, "grad_norm": 0.6780787110328674, "learning_rate": 0.0014367088607594937, "loss": 0.554, "step": 97900 }, { "epoch": 28.167433831990795, "grad_norm": 0.8110470771789551, "learning_rate": 0.001436651323360184, "loss": 0.5614, "step": 97910 }, { "epoch": 28.17031070195627, "grad_norm": 1.2654101848602295, "learning_rate": 0.0014365937859608746, "loss": 0.7666, "step": 97920 }, { "epoch": 28.173187571921748, "grad_norm": 1.0159170627593994, "learning_rate": 0.001436536248561565, "loss": 0.5758, "step": 97930 }, { "epoch": 28.176064441887227, "grad_norm": 1.3628568649291992, "learning_rate": 0.0014364787111622555, "loss": 0.6937, "step": 97940 }, { "epoch": 28.178941311852704, "grad_norm": 2.330888271331787, "learning_rate": 0.001436421173762946, "loss": 0.6948, "step": 97950 }, { "epoch": 28.181818181818183, "grad_norm": 1.5139297246932983, "learning_rate": 0.0014363636363636362, "loss": 0.5609, "step": 97960 }, { "epoch": 28.18469505178366, "grad_norm": 1.2745767831802368, "learning_rate": 0.0014363060989643268, "loss": 0.7562, "step": 97970 }, { "epoch": 28.187571921749136, "grad_norm": 1.0337353944778442, "learning_rate": 0.0014362485615650174, "loss": 0.5474, "step": 97980 }, { "epoch": 28.190448791714616, "grad_norm": 0.8301578164100647, "learning_rate": 0.0014361910241657077, "loss": 0.6329, "step": 97990 }, { "epoch": 28.193325661680092, "grad_norm": 1.3586124181747437, "learning_rate": 0.0014361334867663983, "loss": 0.5912, "step": 98000 }, { "epoch": 28.196202531645568, "grad_norm": 0.7789419889450073, "learning_rate": 0.0014360759493670888, "loss": 0.8012, "step": 98010 }, { "epoch": 28.199079401611048, "grad_norm": 1.2240239381790161, "learning_rate": 0.001436018411967779, "loss": 0.5934, "step": 98020 }, { "epoch": 28.201956271576524, "grad_norm": 1.0114754438400269, "learning_rate": 0.0014359608745684695, "loss": 0.7528, "step": 98030 }, { "epoch": 28.204833141542004, "grad_norm": 1.2338272333145142, "learning_rate": 0.0014359033371691599, "loss": 0.5859, "step": 98040 }, { "epoch": 28.20771001150748, "grad_norm": 1.3190586566925049, "learning_rate": 0.0014358457997698504, "loss": 0.5737, "step": 98050 }, { "epoch": 28.210586881472956, "grad_norm": 1.847608208656311, "learning_rate": 0.001435788262370541, "loss": 0.659, "step": 98060 }, { "epoch": 28.213463751438436, "grad_norm": 2.1495094299316406, "learning_rate": 0.0014357307249712313, "loss": 0.6513, "step": 98070 }, { "epoch": 28.216340621403912, "grad_norm": 1.8112293481826782, "learning_rate": 0.0014356731875719217, "loss": 0.7039, "step": 98080 }, { "epoch": 28.219217491369392, "grad_norm": 1.3995788097381592, "learning_rate": 0.0014356156501726123, "loss": 0.6627, "step": 98090 }, { "epoch": 28.222094361334868, "grad_norm": 1.0831018686294556, "learning_rate": 0.0014355581127733026, "loss": 0.6192, "step": 98100 }, { "epoch": 28.224971231300344, "grad_norm": 1.9039380550384521, "learning_rate": 0.0014355005753739932, "loss": 0.787, "step": 98110 }, { "epoch": 28.227848101265824, "grad_norm": 1.037024974822998, "learning_rate": 0.0014354430379746837, "loss": 0.6617, "step": 98120 }, { "epoch": 28.2307249712313, "grad_norm": 0.9483423829078674, "learning_rate": 0.001435385500575374, "loss": 0.7267, "step": 98130 }, { "epoch": 28.233601841196776, "grad_norm": 1.0828449726104736, "learning_rate": 0.0014353279631760644, "loss": 0.6199, "step": 98140 }, { "epoch": 28.236478711162256, "grad_norm": 1.323148488998413, "learning_rate": 0.0014352704257767548, "loss": 0.638, "step": 98150 }, { "epoch": 28.239355581127732, "grad_norm": 1.975509762763977, "learning_rate": 0.0014352128883774453, "loss": 0.7615, "step": 98160 }, { "epoch": 28.242232451093212, "grad_norm": 0.6430041193962097, "learning_rate": 0.001435155350978136, "loss": 0.5903, "step": 98170 }, { "epoch": 28.24510932105869, "grad_norm": 1.094052791595459, "learning_rate": 0.0014350978135788262, "loss": 0.76, "step": 98180 }, { "epoch": 28.247986191024165, "grad_norm": 2.0932250022888184, "learning_rate": 0.0014350402761795168, "loss": 0.7166, "step": 98190 }, { "epoch": 28.250863060989644, "grad_norm": 0.9113301634788513, "learning_rate": 0.0014349827387802072, "loss": 0.4544, "step": 98200 }, { "epoch": 28.25373993095512, "grad_norm": 1.8334654569625854, "learning_rate": 0.0014349252013808975, "loss": 0.7276, "step": 98210 }, { "epoch": 28.256616800920597, "grad_norm": 0.8809409141540527, "learning_rate": 0.001434867663981588, "loss": 0.6147, "step": 98220 }, { "epoch": 28.259493670886076, "grad_norm": 0.5532363653182983, "learning_rate": 0.0014348101265822786, "loss": 0.7765, "step": 98230 }, { "epoch": 28.262370540851553, "grad_norm": 2.03109073638916, "learning_rate": 0.001434752589182969, "loss": 0.6232, "step": 98240 }, { "epoch": 28.265247410817032, "grad_norm": 1.3214462995529175, "learning_rate": 0.0014346950517836595, "loss": 0.5821, "step": 98250 }, { "epoch": 28.26812428078251, "grad_norm": 1.2234344482421875, "learning_rate": 0.0014346375143843497, "loss": 0.7414, "step": 98260 }, { "epoch": 28.271001150747985, "grad_norm": 2.4651424884796143, "learning_rate": 0.0014345799769850402, "loss": 0.9222, "step": 98270 }, { "epoch": 28.273878020713465, "grad_norm": 1.4422968626022339, "learning_rate": 0.0014345224395857308, "loss": 0.5217, "step": 98280 }, { "epoch": 28.27675489067894, "grad_norm": 0.6502865552902222, "learning_rate": 0.0014344649021864212, "loss": 0.5305, "step": 98290 }, { "epoch": 28.27963176064442, "grad_norm": 2.130000591278076, "learning_rate": 0.0014344073647871117, "loss": 0.8475, "step": 98300 }, { "epoch": 28.282508630609897, "grad_norm": 1.308244228363037, "learning_rate": 0.0014343498273878023, "loss": 0.533, "step": 98310 }, { "epoch": 28.285385500575373, "grad_norm": 2.591731309890747, "learning_rate": 0.0014342922899884924, "loss": 0.7228, "step": 98320 }, { "epoch": 28.288262370540853, "grad_norm": 1.3767067193984985, "learning_rate": 0.001434234752589183, "loss": 0.7783, "step": 98330 }, { "epoch": 28.29113924050633, "grad_norm": 1.0525318384170532, "learning_rate": 0.0014341772151898735, "loss": 0.555, "step": 98340 }, { "epoch": 28.294016110471805, "grad_norm": 1.451111078262329, "learning_rate": 0.0014341196777905639, "loss": 0.7483, "step": 98350 }, { "epoch": 28.296892980437285, "grad_norm": 1.2686001062393188, "learning_rate": 0.0014340621403912544, "loss": 0.9404, "step": 98360 }, { "epoch": 28.29976985040276, "grad_norm": 1.4169892072677612, "learning_rate": 0.0014340046029919446, "loss": 0.8037, "step": 98370 }, { "epoch": 28.30264672036824, "grad_norm": 1.1616029739379883, "learning_rate": 0.0014339470655926351, "loss": 0.6856, "step": 98380 }, { "epoch": 28.305523590333717, "grad_norm": 1.382926344871521, "learning_rate": 0.0014338895281933257, "loss": 0.5391, "step": 98390 }, { "epoch": 28.308400460299193, "grad_norm": 1.4553271532058716, "learning_rate": 0.001433831990794016, "loss": 0.6524, "step": 98400 }, { "epoch": 28.311277330264673, "grad_norm": 1.3226507902145386, "learning_rate": 0.0014337744533947066, "loss": 0.6881, "step": 98410 }, { "epoch": 28.31415420023015, "grad_norm": 2.1186676025390625, "learning_rate": 0.0014337169159953972, "loss": 0.755, "step": 98420 }, { "epoch": 28.317031070195625, "grad_norm": 1.213297963142395, "learning_rate": 0.0014336593785960873, "loss": 0.6694, "step": 98430 }, { "epoch": 28.319907940161105, "grad_norm": 1.1289851665496826, "learning_rate": 0.0014336018411967779, "loss": 0.5801, "step": 98440 }, { "epoch": 28.32278481012658, "grad_norm": 1.1488919258117676, "learning_rate": 0.0014335443037974684, "loss": 0.6662, "step": 98450 }, { "epoch": 28.32566168009206, "grad_norm": 1.2323946952819824, "learning_rate": 0.0014334867663981588, "loss": 0.7106, "step": 98460 }, { "epoch": 28.328538550057537, "grad_norm": 1.6748040914535522, "learning_rate": 0.0014334292289988494, "loss": 0.7852, "step": 98470 }, { "epoch": 28.331415420023013, "grad_norm": 1.1398882865905762, "learning_rate": 0.00143337169159954, "loss": 0.6791, "step": 98480 }, { "epoch": 28.334292289988493, "grad_norm": 1.5353070497512817, "learning_rate": 0.00143331415420023, "loss": 0.7021, "step": 98490 }, { "epoch": 28.33716915995397, "grad_norm": 0.8533285856246948, "learning_rate": 0.0014332566168009206, "loss": 0.6009, "step": 98500 }, { "epoch": 28.34004602991945, "grad_norm": 1.8650572299957275, "learning_rate": 0.001433199079401611, "loss": 0.7669, "step": 98510 }, { "epoch": 28.342922899884925, "grad_norm": 1.1858363151550293, "learning_rate": 0.0014331415420023015, "loss": 0.6153, "step": 98520 }, { "epoch": 28.3457997698504, "grad_norm": 1.4572243690490723, "learning_rate": 0.001433084004602992, "loss": 0.4324, "step": 98530 }, { "epoch": 28.34867663981588, "grad_norm": 0.6846284866333008, "learning_rate": 0.0014330264672036824, "loss": 0.6697, "step": 98540 }, { "epoch": 28.351553509781358, "grad_norm": 1.4328490495681763, "learning_rate": 0.0014329689298043728, "loss": 0.8168, "step": 98550 }, { "epoch": 28.354430379746834, "grad_norm": 1.369495153427124, "learning_rate": 0.0014329113924050633, "loss": 0.7532, "step": 98560 }, { "epoch": 28.357307249712314, "grad_norm": 0.9216927886009216, "learning_rate": 0.0014328538550057537, "loss": 0.696, "step": 98570 }, { "epoch": 28.36018411967779, "grad_norm": 1.640745997428894, "learning_rate": 0.0014327963176064443, "loss": 0.6421, "step": 98580 }, { "epoch": 28.36306098964327, "grad_norm": 1.0160192251205444, "learning_rate": 0.0014327387802071348, "loss": 0.5099, "step": 98590 }, { "epoch": 28.365937859608746, "grad_norm": 1.0819953680038452, "learning_rate": 0.0014326812428078252, "loss": 0.5913, "step": 98600 }, { "epoch": 28.368814729574222, "grad_norm": 2.327833890914917, "learning_rate": 0.0014326237054085155, "loss": 0.6865, "step": 98610 }, { "epoch": 28.3716915995397, "grad_norm": 1.5297539234161377, "learning_rate": 0.0014325661680092059, "loss": 0.6155, "step": 98620 }, { "epoch": 28.374568469505178, "grad_norm": 0.9148325324058533, "learning_rate": 0.0014325086306098964, "loss": 0.7137, "step": 98630 }, { "epoch": 28.377445339470658, "grad_norm": 1.0345656871795654, "learning_rate": 0.001432451093210587, "loss": 0.6861, "step": 98640 }, { "epoch": 28.380322209436134, "grad_norm": 1.71079683303833, "learning_rate": 0.0014323935558112773, "loss": 0.5167, "step": 98650 }, { "epoch": 28.38319907940161, "grad_norm": 1.6320323944091797, "learning_rate": 0.001432336018411968, "loss": 0.6928, "step": 98660 }, { "epoch": 28.38607594936709, "grad_norm": 1.385824203491211, "learning_rate": 0.0014322784810126582, "loss": 0.5328, "step": 98670 }, { "epoch": 28.388952819332566, "grad_norm": 1.3141168355941772, "learning_rate": 0.0014322209436133486, "loss": 0.6227, "step": 98680 }, { "epoch": 28.391829689298042, "grad_norm": 1.7949844598770142, "learning_rate": 0.0014321634062140392, "loss": 0.6253, "step": 98690 }, { "epoch": 28.394706559263522, "grad_norm": 1.145257830619812, "learning_rate": 0.0014321058688147297, "loss": 0.6529, "step": 98700 }, { "epoch": 28.397583429228998, "grad_norm": 0.9867374897003174, "learning_rate": 0.00143204833141542, "loss": 0.9288, "step": 98710 }, { "epoch": 28.400460299194478, "grad_norm": 1.7046442031860352, "learning_rate": 0.0014319907940161106, "loss": 0.6754, "step": 98720 }, { "epoch": 28.403337169159954, "grad_norm": 1.1510517597198486, "learning_rate": 0.0014319332566168008, "loss": 0.8235, "step": 98730 }, { "epoch": 28.40621403912543, "grad_norm": 2.3206946849823, "learning_rate": 0.0014318757192174913, "loss": 0.845, "step": 98740 }, { "epoch": 28.40909090909091, "grad_norm": 0.9535107612609863, "learning_rate": 0.0014318181818181819, "loss": 0.6225, "step": 98750 }, { "epoch": 28.411967779056386, "grad_norm": 1.3100228309631348, "learning_rate": 0.0014317606444188722, "loss": 0.6647, "step": 98760 }, { "epoch": 28.414844649021862, "grad_norm": 1.359700083732605, "learning_rate": 0.0014317031070195628, "loss": 0.7119, "step": 98770 }, { "epoch": 28.417721518987342, "grad_norm": 1.0778745412826538, "learning_rate": 0.0014316455696202534, "loss": 0.5935, "step": 98780 }, { "epoch": 28.42059838895282, "grad_norm": 0.9209002256393433, "learning_rate": 0.0014315880322209435, "loss": 0.6134, "step": 98790 }, { "epoch": 28.423475258918298, "grad_norm": 1.2857105731964111, "learning_rate": 0.001431530494821634, "loss": 0.7455, "step": 98800 }, { "epoch": 28.426352128883774, "grad_norm": 0.7684097290039062, "learning_rate": 0.0014314729574223246, "loss": 0.7903, "step": 98810 }, { "epoch": 28.42922899884925, "grad_norm": 0.7647650241851807, "learning_rate": 0.001431415420023015, "loss": 0.6042, "step": 98820 }, { "epoch": 28.43210586881473, "grad_norm": 0.8957035541534424, "learning_rate": 0.0014313578826237055, "loss": 0.6381, "step": 98830 }, { "epoch": 28.434982738780207, "grad_norm": 1.2778445482254028, "learning_rate": 0.0014313003452243959, "loss": 0.7818, "step": 98840 }, { "epoch": 28.437859608745686, "grad_norm": 2.1395905017852783, "learning_rate": 0.0014312428078250862, "loss": 0.7305, "step": 98850 }, { "epoch": 28.440736478711163, "grad_norm": 1.3298672437667847, "learning_rate": 0.0014311852704257768, "loss": 0.6785, "step": 98860 }, { "epoch": 28.44361334867664, "grad_norm": 1.5304263830184937, "learning_rate": 0.0014311277330264671, "loss": 0.5877, "step": 98870 }, { "epoch": 28.44649021864212, "grad_norm": 1.0572552680969238, "learning_rate": 0.0014310701956271577, "loss": 0.6015, "step": 98880 }, { "epoch": 28.449367088607595, "grad_norm": 1.2031645774841309, "learning_rate": 0.0014310126582278483, "loss": 0.6293, "step": 98890 }, { "epoch": 28.45224395857307, "grad_norm": 1.2519503831863403, "learning_rate": 0.0014309551208285386, "loss": 0.5962, "step": 98900 }, { "epoch": 28.45512082853855, "grad_norm": 1.6100794076919556, "learning_rate": 0.001430897583429229, "loss": 0.7012, "step": 98910 }, { "epoch": 28.457997698504027, "grad_norm": 1.9431267976760864, "learning_rate": 0.0014308400460299195, "loss": 0.6841, "step": 98920 }, { "epoch": 28.460874568469507, "grad_norm": 1.138036847114563, "learning_rate": 0.0014307825086306099, "loss": 0.7092, "step": 98930 }, { "epoch": 28.463751438434983, "grad_norm": 2.501084566116333, "learning_rate": 0.0014307249712313004, "loss": 0.8942, "step": 98940 }, { "epoch": 28.46662830840046, "grad_norm": 1.3244998455047607, "learning_rate": 0.0014306674338319908, "loss": 0.7106, "step": 98950 }, { "epoch": 28.46950517836594, "grad_norm": 1.8157305717468262, "learning_rate": 0.0014306098964326813, "loss": 0.7378, "step": 98960 }, { "epoch": 28.472382048331415, "grad_norm": 1.8400204181671143, "learning_rate": 0.0014305523590333717, "loss": 0.6164, "step": 98970 }, { "epoch": 28.475258918296895, "grad_norm": 0.9884552955627441, "learning_rate": 0.001430494821634062, "loss": 0.7962, "step": 98980 }, { "epoch": 28.47813578826237, "grad_norm": 0.8592895865440369, "learning_rate": 0.0014304372842347526, "loss": 0.5543, "step": 98990 }, { "epoch": 28.481012658227847, "grad_norm": 2.699090003967285, "learning_rate": 0.0014303797468354432, "loss": 0.7248, "step": 99000 }, { "epoch": 28.483889528193327, "grad_norm": 1.1632367372512817, "learning_rate": 0.0014303222094361335, "loss": 0.7216, "step": 99010 }, { "epoch": 28.486766398158803, "grad_norm": 2.0491180419921875, "learning_rate": 0.001430264672036824, "loss": 0.6978, "step": 99020 }, { "epoch": 28.48964326812428, "grad_norm": 1.5351592302322388, "learning_rate": 0.0014302071346375144, "loss": 0.6272, "step": 99030 }, { "epoch": 28.49252013808976, "grad_norm": 1.2814348936080933, "learning_rate": 0.0014301495972382048, "loss": 0.7046, "step": 99040 }, { "epoch": 28.495397008055235, "grad_norm": 1.7829711437225342, "learning_rate": 0.0014300920598388953, "loss": 0.6775, "step": 99050 }, { "epoch": 28.498273878020715, "grad_norm": 1.3890475034713745, "learning_rate": 0.0014300345224395857, "loss": 0.6394, "step": 99060 }, { "epoch": 28.50115074798619, "grad_norm": 1.055866003036499, "learning_rate": 0.0014299769850402762, "loss": 0.57, "step": 99070 }, { "epoch": 28.504027617951667, "grad_norm": 1.3375838994979858, "learning_rate": 0.0014299194476409668, "loss": 0.669, "step": 99080 }, { "epoch": 28.506904487917147, "grad_norm": 0.9185505509376526, "learning_rate": 0.001429861910241657, "loss": 0.5788, "step": 99090 }, { "epoch": 28.509781357882623, "grad_norm": 1.371253252029419, "learning_rate": 0.0014298043728423475, "loss": 0.6062, "step": 99100 }, { "epoch": 28.5126582278481, "grad_norm": 2.13139009475708, "learning_rate": 0.001429746835443038, "loss": 0.7237, "step": 99110 }, { "epoch": 28.51553509781358, "grad_norm": 1.0249367952346802, "learning_rate": 0.0014296892980437284, "loss": 0.6656, "step": 99120 }, { "epoch": 28.518411967779056, "grad_norm": 1.4396257400512695, "learning_rate": 0.001429631760644419, "loss": 0.5953, "step": 99130 }, { "epoch": 28.521288837744535, "grad_norm": 1.0447410345077515, "learning_rate": 0.0014295742232451095, "loss": 0.6941, "step": 99140 }, { "epoch": 28.52416570771001, "grad_norm": 1.7064367532730103, "learning_rate": 0.0014295166858457997, "loss": 0.7033, "step": 99150 }, { "epoch": 28.527042577675488, "grad_norm": 1.6867141723632812, "learning_rate": 0.0014294591484464902, "loss": 0.722, "step": 99160 }, { "epoch": 28.529919447640967, "grad_norm": 0.8285651206970215, "learning_rate": 0.0014294016110471808, "loss": 0.7272, "step": 99170 }, { "epoch": 28.532796317606444, "grad_norm": 1.8853546380996704, "learning_rate": 0.0014293440736478711, "loss": 0.6344, "step": 99180 }, { "epoch": 28.535673187571923, "grad_norm": 1.6656577587127686, "learning_rate": 0.0014292865362485617, "loss": 0.8461, "step": 99190 }, { "epoch": 28.5385500575374, "grad_norm": 1.2320786714553833, "learning_rate": 0.0014292289988492518, "loss": 0.6896, "step": 99200 }, { "epoch": 28.541426927502876, "grad_norm": 1.7326442003250122, "learning_rate": 0.0014291714614499424, "loss": 0.7656, "step": 99210 }, { "epoch": 28.544303797468356, "grad_norm": 1.2264807224273682, "learning_rate": 0.001429113924050633, "loss": 0.79, "step": 99220 }, { "epoch": 28.54718066743383, "grad_norm": 1.0391507148742676, "learning_rate": 0.0014290563866513233, "loss": 0.5373, "step": 99230 }, { "epoch": 28.550057537399308, "grad_norm": 0.6191099882125854, "learning_rate": 0.0014289988492520139, "loss": 0.7642, "step": 99240 }, { "epoch": 28.552934407364788, "grad_norm": 1.7470569610595703, "learning_rate": 0.0014289413118527044, "loss": 0.7452, "step": 99250 }, { "epoch": 28.555811277330264, "grad_norm": 1.308512806892395, "learning_rate": 0.0014288837744533946, "loss": 0.7364, "step": 99260 }, { "epoch": 28.558688147295744, "grad_norm": 1.3094290494918823, "learning_rate": 0.0014288262370540851, "loss": 0.7749, "step": 99270 }, { "epoch": 28.56156501726122, "grad_norm": 0.8627744317054749, "learning_rate": 0.0014287686996547757, "loss": 0.6296, "step": 99280 }, { "epoch": 28.564441887226696, "grad_norm": 1.269110083580017, "learning_rate": 0.001428711162255466, "loss": 0.6494, "step": 99290 }, { "epoch": 28.567318757192176, "grad_norm": 1.2725911140441895, "learning_rate": 0.0014286536248561566, "loss": 0.6529, "step": 99300 }, { "epoch": 28.570195627157652, "grad_norm": 0.8493912816047668, "learning_rate": 0.001428596087456847, "loss": 0.7652, "step": 99310 }, { "epoch": 28.57307249712313, "grad_norm": 1.44217050075531, "learning_rate": 0.0014285385500575373, "loss": 0.6829, "step": 99320 }, { "epoch": 28.575949367088608, "grad_norm": 0.9450591206550598, "learning_rate": 0.0014284810126582279, "loss": 0.7894, "step": 99330 }, { "epoch": 28.578826237054084, "grad_norm": 1.8039335012435913, "learning_rate": 0.0014284234752589182, "loss": 0.6511, "step": 99340 }, { "epoch": 28.581703107019564, "grad_norm": 2.3075954914093018, "learning_rate": 0.0014283659378596088, "loss": 0.8356, "step": 99350 }, { "epoch": 28.58457997698504, "grad_norm": 1.3764457702636719, "learning_rate": 0.0014283084004602993, "loss": 0.6403, "step": 99360 }, { "epoch": 28.587456846950516, "grad_norm": 0.7210388779640198, "learning_rate": 0.0014282508630609897, "loss": 0.6174, "step": 99370 }, { "epoch": 28.590333716915996, "grad_norm": 1.0732489824295044, "learning_rate": 0.00142819332566168, "loss": 0.6957, "step": 99380 }, { "epoch": 28.593210586881472, "grad_norm": 3.549065589904785, "learning_rate": 0.0014281357882623706, "loss": 0.6874, "step": 99390 }, { "epoch": 28.596087456846952, "grad_norm": 0.9555190205574036, "learning_rate": 0.001428078250863061, "loss": 0.6994, "step": 99400 }, { "epoch": 28.59896432681243, "grad_norm": 1.7270307540893555, "learning_rate": 0.0014280207134637515, "loss": 0.5982, "step": 99410 }, { "epoch": 28.601841196777904, "grad_norm": 1.7267930507659912, "learning_rate": 0.0014279631760644419, "loss": 0.6385, "step": 99420 }, { "epoch": 28.604718066743384, "grad_norm": 2.2212321758270264, "learning_rate": 0.0014279056386651324, "loss": 0.6725, "step": 99430 }, { "epoch": 28.60759493670886, "grad_norm": 2.210758686065674, "learning_rate": 0.0014278481012658228, "loss": 0.739, "step": 99440 }, { "epoch": 28.610471806674337, "grad_norm": 2.202075481414795, "learning_rate": 0.0014277905638665131, "loss": 0.6953, "step": 99450 }, { "epoch": 28.613348676639816, "grad_norm": 0.9929065704345703, "learning_rate": 0.0014277330264672037, "loss": 0.6586, "step": 99460 }, { "epoch": 28.616225546605293, "grad_norm": 2.0093114376068115, "learning_rate": 0.0014276754890678942, "loss": 0.647, "step": 99470 }, { "epoch": 28.619102416570772, "grad_norm": 0.7824351191520691, "learning_rate": 0.0014276179516685846, "loss": 0.7532, "step": 99480 }, { "epoch": 28.62197928653625, "grad_norm": 2.1754839420318604, "learning_rate": 0.0014275604142692752, "loss": 0.5969, "step": 99490 }, { "epoch": 28.624856156501725, "grad_norm": 1.2212529182434082, "learning_rate": 0.0014275028768699655, "loss": 0.7253, "step": 99500 }, { "epoch": 28.627733026467205, "grad_norm": 0.6635967493057251, "learning_rate": 0.0014274453394706559, "loss": 0.6024, "step": 99510 }, { "epoch": 28.63060989643268, "grad_norm": 0.9594953060150146, "learning_rate": 0.0014273878020713464, "loss": 0.6715, "step": 99520 }, { "epoch": 28.63348676639816, "grad_norm": 2.01204252243042, "learning_rate": 0.0014273302646720368, "loss": 0.7467, "step": 99530 }, { "epoch": 28.636363636363637, "grad_norm": 1.7073643207550049, "learning_rate": 0.0014272727272727273, "loss": 0.7449, "step": 99540 }, { "epoch": 28.639240506329113, "grad_norm": 1.6420520544052124, "learning_rate": 0.001427215189873418, "loss": 0.8711, "step": 99550 }, { "epoch": 28.642117376294593, "grad_norm": 2.0470943450927734, "learning_rate": 0.001427157652474108, "loss": 0.7537, "step": 99560 }, { "epoch": 28.64499424626007, "grad_norm": 2.4208407402038574, "learning_rate": 0.0014271001150747986, "loss": 0.7161, "step": 99570 }, { "epoch": 28.647871116225545, "grad_norm": 0.7606947422027588, "learning_rate": 0.0014270425776754892, "loss": 0.7019, "step": 99580 }, { "epoch": 28.650747986191025, "grad_norm": 1.4744502305984497, "learning_rate": 0.0014269850402761795, "loss": 0.5916, "step": 99590 }, { "epoch": 28.6536248561565, "grad_norm": 1.243086576461792, "learning_rate": 0.00142692750287687, "loss": 0.5616, "step": 99600 }, { "epoch": 28.65650172612198, "grad_norm": 1.3934212923049927, "learning_rate": 0.0014268699654775606, "loss": 0.6966, "step": 99610 }, { "epoch": 28.659378596087457, "grad_norm": 0.9705784320831299, "learning_rate": 0.0014268124280782508, "loss": 0.5952, "step": 99620 }, { "epoch": 28.662255466052933, "grad_norm": 1.2310642004013062, "learning_rate": 0.0014267548906789413, "loss": 0.6498, "step": 99630 }, { "epoch": 28.665132336018413, "grad_norm": 1.4567198753356934, "learning_rate": 0.0014266973532796317, "loss": 0.7289, "step": 99640 }, { "epoch": 28.66800920598389, "grad_norm": 1.4031249284744263, "learning_rate": 0.0014266398158803222, "loss": 0.7861, "step": 99650 }, { "epoch": 28.67088607594937, "grad_norm": 0.6420433521270752, "learning_rate": 0.0014265822784810128, "loss": 0.6567, "step": 99660 }, { "epoch": 28.673762945914845, "grad_norm": 1.0651925802230835, "learning_rate": 0.0014265247410817031, "loss": 0.6817, "step": 99670 }, { "epoch": 28.67663981588032, "grad_norm": 1.7716073989868164, "learning_rate": 0.0014264672036823935, "loss": 0.5623, "step": 99680 }, { "epoch": 28.6795166858458, "grad_norm": 1.2701252698898315, "learning_rate": 0.001426409666283084, "loss": 0.639, "step": 99690 }, { "epoch": 28.682393555811277, "grad_norm": 0.9520374536514282, "learning_rate": 0.0014263521288837744, "loss": 0.8001, "step": 99700 }, { "epoch": 28.685270425776753, "grad_norm": 0.99726802110672, "learning_rate": 0.001426294591484465, "loss": 0.6525, "step": 99710 }, { "epoch": 28.688147295742233, "grad_norm": 1.3792122602462769, "learning_rate": 0.0014262370540851555, "loss": 0.6734, "step": 99720 }, { "epoch": 28.69102416570771, "grad_norm": 1.7227833271026611, "learning_rate": 0.0014261795166858459, "loss": 0.7163, "step": 99730 }, { "epoch": 28.69390103567319, "grad_norm": 1.2553168535232544, "learning_rate": 0.0014261219792865362, "loss": 0.7257, "step": 99740 }, { "epoch": 28.696777905638665, "grad_norm": 1.449950933456421, "learning_rate": 0.0014260644418872268, "loss": 0.5978, "step": 99750 }, { "epoch": 28.69965477560414, "grad_norm": 1.1350690126419067, "learning_rate": 0.0014260069044879171, "loss": 0.6616, "step": 99760 }, { "epoch": 28.70253164556962, "grad_norm": 1.5425450801849365, "learning_rate": 0.0014259493670886077, "loss": 0.688, "step": 99770 }, { "epoch": 28.705408515535098, "grad_norm": 1.065254807472229, "learning_rate": 0.001425891829689298, "loss": 0.8727, "step": 99780 }, { "epoch": 28.708285385500574, "grad_norm": 0.6773955225944519, "learning_rate": 0.0014258342922899886, "loss": 0.5191, "step": 99790 }, { "epoch": 28.711162255466053, "grad_norm": 1.0951775312423706, "learning_rate": 0.001425776754890679, "loss": 0.5979, "step": 99800 }, { "epoch": 28.71403912543153, "grad_norm": 0.5733194351196289, "learning_rate": 0.0014257192174913693, "loss": 0.6137, "step": 99810 }, { "epoch": 28.71691599539701, "grad_norm": 0.8540785312652588, "learning_rate": 0.0014256616800920599, "loss": 0.9005, "step": 99820 }, { "epoch": 28.719792865362486, "grad_norm": 0.8305344581604004, "learning_rate": 0.0014256041426927504, "loss": 0.61, "step": 99830 }, { "epoch": 28.722669735327962, "grad_norm": 1.481332540512085, "learning_rate": 0.0014255466052934408, "loss": 0.6109, "step": 99840 }, { "epoch": 28.72554660529344, "grad_norm": 0.9147358536720276, "learning_rate": 0.0014254890678941313, "loss": 0.848, "step": 99850 }, { "epoch": 28.728423475258918, "grad_norm": 1.1315455436706543, "learning_rate": 0.0014254315304948217, "loss": 0.6254, "step": 99860 }, { "epoch": 28.731300345224398, "grad_norm": 1.7606446743011475, "learning_rate": 0.001425373993095512, "loss": 0.8, "step": 99870 }, { "epoch": 28.734177215189874, "grad_norm": 0.9691432118415833, "learning_rate": 0.0014253164556962026, "loss": 0.7229, "step": 99880 }, { "epoch": 28.73705408515535, "grad_norm": 1.7268575429916382, "learning_rate": 0.001425258918296893, "loss": 0.7835, "step": 99890 }, { "epoch": 28.73993095512083, "grad_norm": 1.2962415218353271, "learning_rate": 0.0014252013808975835, "loss": 0.6277, "step": 99900 }, { "epoch": 28.742807825086306, "grad_norm": 1.234806776046753, "learning_rate": 0.001425143843498274, "loss": 0.6151, "step": 99910 }, { "epoch": 28.745684695051782, "grad_norm": 1.3363878726959229, "learning_rate": 0.0014250863060989642, "loss": 0.5945, "step": 99920 }, { "epoch": 28.748561565017262, "grad_norm": 0.9980052709579468, "learning_rate": 0.0014250287686996548, "loss": 0.7816, "step": 99930 }, { "epoch": 28.751438434982738, "grad_norm": 0.973694384098053, "learning_rate": 0.0014249712313003453, "loss": 0.7701, "step": 99940 }, { "epoch": 28.754315304948218, "grad_norm": 1.1978965997695923, "learning_rate": 0.0014249136939010357, "loss": 0.6773, "step": 99950 }, { "epoch": 28.757192174913694, "grad_norm": 0.9392955899238586, "learning_rate": 0.0014248561565017262, "loss": 0.7602, "step": 99960 }, { "epoch": 28.76006904487917, "grad_norm": 0.7919186353683472, "learning_rate": 0.0014247986191024168, "loss": 0.6457, "step": 99970 }, { "epoch": 28.76294591484465, "grad_norm": 1.0963038206100464, "learning_rate": 0.001424741081703107, "loss": 0.7841, "step": 99980 }, { "epoch": 28.765822784810126, "grad_norm": 1.1941066980361938, "learning_rate": 0.0014246835443037975, "loss": 0.669, "step": 99990 }, { "epoch": 28.768699654775602, "grad_norm": 1.3376426696777344, "learning_rate": 0.0014246260069044878, "loss": 0.6314, "step": 100000 }, { "epoch": 28.771576524741082, "grad_norm": 1.0713906288146973, "learning_rate": 0.0014245684695051784, "loss": 0.6966, "step": 100010 }, { "epoch": 28.77445339470656, "grad_norm": 2.3070688247680664, "learning_rate": 0.001424510932105869, "loss": 0.5779, "step": 100020 }, { "epoch": 28.777330264672038, "grad_norm": 1.1645559072494507, "learning_rate": 0.001424453394706559, "loss": 0.6505, "step": 100030 }, { "epoch": 28.780207134637514, "grad_norm": 1.569300889968872, "learning_rate": 0.0014243958573072497, "loss": 0.7573, "step": 100040 }, { "epoch": 28.78308400460299, "grad_norm": 1.6149574518203735, "learning_rate": 0.0014243383199079402, "loss": 0.7108, "step": 100050 }, { "epoch": 28.78596087456847, "grad_norm": 2.2455294132232666, "learning_rate": 0.0014242807825086306, "loss": 0.7073, "step": 100060 }, { "epoch": 28.788837744533947, "grad_norm": 2.4858481884002686, "learning_rate": 0.0014242232451093211, "loss": 0.8097, "step": 100070 }, { "epoch": 28.791714614499426, "grad_norm": 0.9411770105361938, "learning_rate": 0.0014241657077100117, "loss": 0.7238, "step": 100080 }, { "epoch": 28.794591484464902, "grad_norm": 1.1248048543930054, "learning_rate": 0.0014241081703107018, "loss": 0.6377, "step": 100090 }, { "epoch": 28.79746835443038, "grad_norm": 1.2690339088439941, "learning_rate": 0.0014240506329113924, "loss": 0.6209, "step": 100100 }, { "epoch": 28.80034522439586, "grad_norm": 1.400866985321045, "learning_rate": 0.0014239930955120828, "loss": 0.7936, "step": 100110 }, { "epoch": 28.803222094361335, "grad_norm": 0.8230043649673462, "learning_rate": 0.0014239355581127733, "loss": 0.686, "step": 100120 }, { "epoch": 28.80609896432681, "grad_norm": 2.1463260650634766, "learning_rate": 0.0014238780207134639, "loss": 0.7375, "step": 100130 }, { "epoch": 28.80897583429229, "grad_norm": 1.117856502532959, "learning_rate": 0.0014238204833141542, "loss": 0.7355, "step": 100140 }, { "epoch": 28.811852704257767, "grad_norm": 1.0617092847824097, "learning_rate": 0.0014237629459148446, "loss": 0.767, "step": 100150 }, { "epoch": 28.814729574223247, "grad_norm": 0.8424966335296631, "learning_rate": 0.0014237054085155351, "loss": 0.7359, "step": 100160 }, { "epoch": 28.817606444188723, "grad_norm": 1.5688481330871582, "learning_rate": 0.0014236478711162255, "loss": 0.6372, "step": 100170 }, { "epoch": 28.8204833141542, "grad_norm": 0.9874265789985657, "learning_rate": 0.001423590333716916, "loss": 0.5237, "step": 100180 }, { "epoch": 28.82336018411968, "grad_norm": 1.0820988416671753, "learning_rate": 0.0014235327963176066, "loss": 0.7115, "step": 100190 }, { "epoch": 28.826237054085155, "grad_norm": 1.145213007926941, "learning_rate": 0.001423475258918297, "loss": 0.6335, "step": 100200 }, { "epoch": 28.82911392405063, "grad_norm": 1.0838418006896973, "learning_rate": 0.0014234177215189873, "loss": 0.6721, "step": 100210 }, { "epoch": 28.83199079401611, "grad_norm": 2.2331888675689697, "learning_rate": 0.0014233601841196777, "loss": 0.8023, "step": 100220 }, { "epoch": 28.834867663981587, "grad_norm": 1.1046406030654907, "learning_rate": 0.0014233026467203682, "loss": 0.711, "step": 100230 }, { "epoch": 28.837744533947067, "grad_norm": 1.3834856748580933, "learning_rate": 0.0014232451093210588, "loss": 0.6283, "step": 100240 }, { "epoch": 28.840621403912543, "grad_norm": 1.2316505908966064, "learning_rate": 0.0014231875719217491, "loss": 0.7483, "step": 100250 }, { "epoch": 28.84349827387802, "grad_norm": 1.9133254289627075, "learning_rate": 0.0014231300345224397, "loss": 0.6835, "step": 100260 }, { "epoch": 28.8463751438435, "grad_norm": 1.4881824254989624, "learning_rate": 0.00142307249712313, "loss": 0.7096, "step": 100270 }, { "epoch": 28.849252013808975, "grad_norm": 1.4637179374694824, "learning_rate": 0.0014230149597238204, "loss": 0.7924, "step": 100280 }, { "epoch": 28.852128883774455, "grad_norm": 1.4910943508148193, "learning_rate": 0.001422957422324511, "loss": 0.8109, "step": 100290 }, { "epoch": 28.85500575373993, "grad_norm": 1.3218538761138916, "learning_rate": 0.0014228998849252015, "loss": 0.6263, "step": 100300 }, { "epoch": 28.857882623705407, "grad_norm": 0.6817387938499451, "learning_rate": 0.0014228423475258919, "loss": 0.6715, "step": 100310 }, { "epoch": 28.860759493670887, "grad_norm": 0.6312575936317444, "learning_rate": 0.0014227848101265824, "loss": 0.5441, "step": 100320 }, { "epoch": 28.863636363636363, "grad_norm": 0.9220953583717346, "learning_rate": 0.0014227272727272726, "loss": 0.5454, "step": 100330 }, { "epoch": 28.86651323360184, "grad_norm": 0.8929261565208435, "learning_rate": 0.0014226697353279631, "loss": 0.528, "step": 100340 }, { "epoch": 28.86939010356732, "grad_norm": 0.6600839495658875, "learning_rate": 0.0014226121979286537, "loss": 0.6335, "step": 100350 }, { "epoch": 28.872266973532795, "grad_norm": 2.349024534225464, "learning_rate": 0.001422554660529344, "loss": 0.8427, "step": 100360 }, { "epoch": 28.875143843498275, "grad_norm": 2.0293235778808594, "learning_rate": 0.0014224971231300346, "loss": 0.7769, "step": 100370 }, { "epoch": 28.87802071346375, "grad_norm": 0.9582504034042358, "learning_rate": 0.0014224395857307252, "loss": 0.6933, "step": 100380 }, { "epoch": 28.880897583429228, "grad_norm": 1.068291187286377, "learning_rate": 0.0014223820483314153, "loss": 0.6977, "step": 100390 }, { "epoch": 28.883774453394707, "grad_norm": 0.6726018190383911, "learning_rate": 0.0014223245109321059, "loss": 0.5247, "step": 100400 }, { "epoch": 28.886651323360184, "grad_norm": 0.9231269955635071, "learning_rate": 0.0014222669735327964, "loss": 0.7436, "step": 100410 }, { "epoch": 28.889528193325663, "grad_norm": 1.1157174110412598, "learning_rate": 0.0014222094361334868, "loss": 0.6602, "step": 100420 }, { "epoch": 28.89240506329114, "grad_norm": 0.799759566783905, "learning_rate": 0.0014221518987341773, "loss": 0.7625, "step": 100430 }, { "epoch": 28.895281933256616, "grad_norm": 0.9290706515312195, "learning_rate": 0.001422094361334868, "loss": 0.6027, "step": 100440 }, { "epoch": 28.898158803222096, "grad_norm": 2.5828261375427246, "learning_rate": 0.001422036823935558, "loss": 0.738, "step": 100450 }, { "epoch": 28.90103567318757, "grad_norm": 1.473423719406128, "learning_rate": 0.0014219792865362486, "loss": 0.6243, "step": 100460 }, { "epoch": 28.903912543153048, "grad_norm": 0.7681978940963745, "learning_rate": 0.001421921749136939, "loss": 0.6193, "step": 100470 }, { "epoch": 28.906789413118528, "grad_norm": 1.0157004594802856, "learning_rate": 0.0014218642117376295, "loss": 0.7747, "step": 100480 }, { "epoch": 28.909666283084004, "grad_norm": 0.739517331123352, "learning_rate": 0.00142180667433832, "loss": 0.6829, "step": 100490 }, { "epoch": 28.912543153049484, "grad_norm": 1.6211555004119873, "learning_rate": 0.0014217491369390104, "loss": 0.5981, "step": 100500 }, { "epoch": 28.91542002301496, "grad_norm": 2.3287057876586914, "learning_rate": 0.0014216915995397008, "loss": 0.6648, "step": 100510 }, { "epoch": 28.918296892980436, "grad_norm": 1.7312654256820679, "learning_rate": 0.0014216340621403913, "loss": 0.7469, "step": 100520 }, { "epoch": 28.921173762945916, "grad_norm": 1.1550204753875732, "learning_rate": 0.0014215765247410817, "loss": 0.7424, "step": 100530 }, { "epoch": 28.924050632911392, "grad_norm": 1.1932929754257202, "learning_rate": 0.0014215189873417722, "loss": 0.5513, "step": 100540 }, { "epoch": 28.92692750287687, "grad_norm": 1.0139368772506714, "learning_rate": 0.0014214614499424628, "loss": 0.6044, "step": 100550 }, { "epoch": 28.929804372842348, "grad_norm": 0.8528698086738586, "learning_rate": 0.0014214039125431531, "loss": 0.5518, "step": 100560 }, { "epoch": 28.932681242807824, "grad_norm": 1.4596370458602905, "learning_rate": 0.0014213463751438435, "loss": 0.7658, "step": 100570 }, { "epoch": 28.935558112773304, "grad_norm": 1.3024972677230835, "learning_rate": 0.0014212888377445338, "loss": 0.6679, "step": 100580 }, { "epoch": 28.93843498273878, "grad_norm": 2.5162737369537354, "learning_rate": 0.0014212313003452244, "loss": 0.6392, "step": 100590 }, { "epoch": 28.941311852704256, "grad_norm": 1.0870682001113892, "learning_rate": 0.001421173762945915, "loss": 0.5548, "step": 100600 }, { "epoch": 28.944188722669736, "grad_norm": 0.8584506511688232, "learning_rate": 0.0014211162255466053, "loss": 0.6649, "step": 100610 }, { "epoch": 28.947065592635212, "grad_norm": 1.1801596879959106, "learning_rate": 0.0014210586881472959, "loss": 0.7884, "step": 100620 }, { "epoch": 28.949942462600692, "grad_norm": 1.5724095106124878, "learning_rate": 0.0014210011507479862, "loss": 0.6223, "step": 100630 }, { "epoch": 28.95281933256617, "grad_norm": 1.0061440467834473, "learning_rate": 0.0014209436133486766, "loss": 0.7232, "step": 100640 }, { "epoch": 28.955696202531644, "grad_norm": 1.0773168802261353, "learning_rate": 0.0014208860759493671, "loss": 0.6284, "step": 100650 }, { "epoch": 28.958573072497124, "grad_norm": 0.8839488625526428, "learning_rate": 0.0014208285385500577, "loss": 0.6328, "step": 100660 }, { "epoch": 28.9614499424626, "grad_norm": 0.9823446869850159, "learning_rate": 0.001420771001150748, "loss": 0.7573, "step": 100670 }, { "epoch": 28.964326812428077, "grad_norm": 1.479917049407959, "learning_rate": 0.0014207134637514386, "loss": 0.7322, "step": 100680 }, { "epoch": 28.967203682393556, "grad_norm": 1.9751416444778442, "learning_rate": 0.0014206559263521287, "loss": 0.5654, "step": 100690 }, { "epoch": 28.970080552359033, "grad_norm": 1.2952382564544678, "learning_rate": 0.0014205983889528193, "loss": 0.6421, "step": 100700 }, { "epoch": 28.972957422324512, "grad_norm": 1.7057143449783325, "learning_rate": 0.0014205408515535099, "loss": 0.7824, "step": 100710 }, { "epoch": 28.97583429228999, "grad_norm": 1.4684679508209229, "learning_rate": 0.0014204833141542002, "loss": 0.6608, "step": 100720 }, { "epoch": 28.978711162255465, "grad_norm": 0.9801089763641357, "learning_rate": 0.0014204257767548908, "loss": 0.5392, "step": 100730 }, { "epoch": 28.981588032220944, "grad_norm": 1.3594344854354858, "learning_rate": 0.0014203682393555813, "loss": 0.6698, "step": 100740 }, { "epoch": 28.98446490218642, "grad_norm": 1.5895051956176758, "learning_rate": 0.0014203107019562715, "loss": 0.694, "step": 100750 }, { "epoch": 28.9873417721519, "grad_norm": 1.1961102485656738, "learning_rate": 0.001420253164556962, "loss": 0.7531, "step": 100760 }, { "epoch": 28.990218642117377, "grad_norm": 1.1891292333602905, "learning_rate": 0.0014201956271576526, "loss": 0.612, "step": 100770 }, { "epoch": 28.993095512082853, "grad_norm": 1.3841055631637573, "learning_rate": 0.001420138089758343, "loss": 0.8884, "step": 100780 }, { "epoch": 28.995972382048333, "grad_norm": 0.783710777759552, "learning_rate": 0.0014200805523590335, "loss": 0.6804, "step": 100790 }, { "epoch": 28.99884925201381, "grad_norm": 1.1524403095245361, "learning_rate": 0.0014200230149597236, "loss": 0.6482, "step": 100800 }, { "epoch": 29.001726121979285, "grad_norm": 1.1626842021942139, "learning_rate": 0.0014199654775604142, "loss": 0.6939, "step": 100810 }, { "epoch": 29.004602991944765, "grad_norm": 2.331996440887451, "learning_rate": 0.0014199079401611048, "loss": 0.4808, "step": 100820 }, { "epoch": 29.00747986191024, "grad_norm": 0.9125064015388489, "learning_rate": 0.0014198504027617951, "loss": 0.7206, "step": 100830 }, { "epoch": 29.01035673187572, "grad_norm": 1.953494668006897, "learning_rate": 0.0014197928653624857, "loss": 0.7304, "step": 100840 }, { "epoch": 29.013233601841197, "grad_norm": 1.0321799516677856, "learning_rate": 0.0014197353279631762, "loss": 0.4547, "step": 100850 }, { "epoch": 29.016110471806673, "grad_norm": 2.5732171535491943, "learning_rate": 0.0014196777905638664, "loss": 0.7091, "step": 100860 }, { "epoch": 29.018987341772153, "grad_norm": 1.2469104528427124, "learning_rate": 0.001419620253164557, "loss": 0.5959, "step": 100870 }, { "epoch": 29.02186421173763, "grad_norm": 1.4529684782028198, "learning_rate": 0.0014195627157652475, "loss": 0.5726, "step": 100880 }, { "epoch": 29.024741081703105, "grad_norm": 0.9036800861358643, "learning_rate": 0.0014195051783659378, "loss": 0.7079, "step": 100890 }, { "epoch": 29.027617951668585, "grad_norm": 1.588539719581604, "learning_rate": 0.0014194476409666284, "loss": 0.747, "step": 100900 }, { "epoch": 29.03049482163406, "grad_norm": 0.7216766476631165, "learning_rate": 0.0014193901035673188, "loss": 0.5593, "step": 100910 }, { "epoch": 29.03337169159954, "grad_norm": 0.6224176287651062, "learning_rate": 0.001419332566168009, "loss": 0.5922, "step": 100920 }, { "epoch": 29.036248561565017, "grad_norm": 1.6216739416122437, "learning_rate": 0.0014192750287686997, "loss": 0.6305, "step": 100930 }, { "epoch": 29.039125431530493, "grad_norm": 1.072546362876892, "learning_rate": 0.00141921749136939, "loss": 0.7312, "step": 100940 }, { "epoch": 29.042002301495973, "grad_norm": 1.1452982425689697, "learning_rate": 0.0014191599539700806, "loss": 0.6245, "step": 100950 }, { "epoch": 29.04487917146145, "grad_norm": 0.6817319393157959, "learning_rate": 0.0014191024165707711, "loss": 0.5668, "step": 100960 }, { "epoch": 29.04775604142693, "grad_norm": 0.9689522981643677, "learning_rate": 0.0014190448791714615, "loss": 0.7355, "step": 100970 }, { "epoch": 29.050632911392405, "grad_norm": 1.5866055488586426, "learning_rate": 0.0014189873417721518, "loss": 0.7018, "step": 100980 }, { "epoch": 29.05350978135788, "grad_norm": 1.6301888227462769, "learning_rate": 0.0014189298043728424, "loss": 0.583, "step": 100990 }, { "epoch": 29.05638665132336, "grad_norm": 1.2927894592285156, "learning_rate": 0.0014188722669735327, "loss": 0.564, "step": 101000 }, { "epoch": 29.059263521288837, "grad_norm": 0.8501038551330566, "learning_rate": 0.0014188147295742233, "loss": 0.6513, "step": 101010 }, { "epoch": 29.062140391254314, "grad_norm": 0.8162983655929565, "learning_rate": 0.0014187571921749137, "loss": 0.569, "step": 101020 }, { "epoch": 29.065017261219793, "grad_norm": 1.043503761291504, "learning_rate": 0.0014186996547756042, "loss": 0.5638, "step": 101030 }, { "epoch": 29.06789413118527, "grad_norm": 2.0083181858062744, "learning_rate": 0.0014186421173762946, "loss": 0.7473, "step": 101040 }, { "epoch": 29.07077100115075, "grad_norm": 2.592756748199463, "learning_rate": 0.001418584579976985, "loss": 0.7889, "step": 101050 }, { "epoch": 29.073647871116226, "grad_norm": 0.8257704377174377, "learning_rate": 0.0014185270425776755, "loss": 0.6217, "step": 101060 }, { "epoch": 29.076524741081702, "grad_norm": 0.7968425154685974, "learning_rate": 0.001418469505178366, "loss": 0.6106, "step": 101070 }, { "epoch": 29.07940161104718, "grad_norm": 1.2833977937698364, "learning_rate": 0.0014184119677790564, "loss": 0.6039, "step": 101080 }, { "epoch": 29.082278481012658, "grad_norm": 1.290459156036377, "learning_rate": 0.001418354430379747, "loss": 0.4624, "step": 101090 }, { "epoch": 29.085155350978138, "grad_norm": 1.0824782848358154, "learning_rate": 0.0014182968929804373, "loss": 0.7188, "step": 101100 }, { "epoch": 29.088032220943614, "grad_norm": 1.894992470741272, "learning_rate": 0.0014182393555811277, "loss": 0.8832, "step": 101110 }, { "epoch": 29.09090909090909, "grad_norm": 0.8163963556289673, "learning_rate": 0.0014181818181818182, "loss": 0.492, "step": 101120 }, { "epoch": 29.09378596087457, "grad_norm": 2.154810905456543, "learning_rate": 0.0014181242807825088, "loss": 0.8425, "step": 101130 }, { "epoch": 29.096662830840046, "grad_norm": 0.9478538632392883, "learning_rate": 0.0014180667433831991, "loss": 0.588, "step": 101140 }, { "epoch": 29.099539700805522, "grad_norm": 1.3545188903808594, "learning_rate": 0.0014180092059838897, "loss": 0.6819, "step": 101150 }, { "epoch": 29.102416570771002, "grad_norm": 2.248908281326294, "learning_rate": 0.0014179516685845798, "loss": 0.5319, "step": 101160 }, { "epoch": 29.105293440736478, "grad_norm": 1.4049553871154785, "learning_rate": 0.0014178941311852704, "loss": 0.7814, "step": 101170 }, { "epoch": 29.108170310701958, "grad_norm": 1.4767570495605469, "learning_rate": 0.001417836593785961, "loss": 0.7678, "step": 101180 }, { "epoch": 29.111047180667434, "grad_norm": 1.5789965391159058, "learning_rate": 0.0014177790563866513, "loss": 0.841, "step": 101190 }, { "epoch": 29.11392405063291, "grad_norm": 0.857011079788208, "learning_rate": 0.0014177215189873419, "loss": 0.6131, "step": 101200 }, { "epoch": 29.11680092059839, "grad_norm": 1.7691478729248047, "learning_rate": 0.0014176639815880324, "loss": 0.7015, "step": 101210 }, { "epoch": 29.119677790563866, "grad_norm": 1.2626599073410034, "learning_rate": 0.0014176064441887226, "loss": 0.7741, "step": 101220 }, { "epoch": 29.122554660529342, "grad_norm": 0.8380058407783508, "learning_rate": 0.0014175489067894131, "loss": 0.6834, "step": 101230 }, { "epoch": 29.125431530494822, "grad_norm": 1.2303208112716675, "learning_rate": 0.0014174913693901037, "loss": 0.6929, "step": 101240 }, { "epoch": 29.1283084004603, "grad_norm": 1.9068652391433716, "learning_rate": 0.001417433831990794, "loss": 0.7587, "step": 101250 }, { "epoch": 29.131185270425778, "grad_norm": 1.776503086090088, "learning_rate": 0.0014173762945914846, "loss": 0.7834, "step": 101260 }, { "epoch": 29.134062140391254, "grad_norm": 1.2366762161254883, "learning_rate": 0.001417318757192175, "loss": 0.7989, "step": 101270 }, { "epoch": 29.13693901035673, "grad_norm": 0.868712842464447, "learning_rate": 0.0014172612197928653, "loss": 0.7483, "step": 101280 }, { "epoch": 29.13981588032221, "grad_norm": 1.8653737306594849, "learning_rate": 0.0014172036823935559, "loss": 0.6645, "step": 101290 }, { "epoch": 29.142692750287686, "grad_norm": 1.4989147186279297, "learning_rate": 0.0014171461449942462, "loss": 0.7087, "step": 101300 }, { "epoch": 29.145569620253166, "grad_norm": 1.0595206022262573, "learning_rate": 0.0014170886075949368, "loss": 0.6122, "step": 101310 }, { "epoch": 29.148446490218642, "grad_norm": 1.1422078609466553, "learning_rate": 0.0014170310701956273, "loss": 0.8469, "step": 101320 }, { "epoch": 29.15132336018412, "grad_norm": 1.1908315420150757, "learning_rate": 0.0014169735327963177, "loss": 0.6357, "step": 101330 }, { "epoch": 29.1542002301496, "grad_norm": 0.8666046261787415, "learning_rate": 0.001416915995397008, "loss": 0.5386, "step": 101340 }, { "epoch": 29.157077100115075, "grad_norm": 1.6734094619750977, "learning_rate": 0.0014168584579976986, "loss": 0.7231, "step": 101350 }, { "epoch": 29.15995397008055, "grad_norm": 1.2685900926589966, "learning_rate": 0.001416800920598389, "loss": 0.8037, "step": 101360 }, { "epoch": 29.16283084004603, "grad_norm": 0.7938339710235596, "learning_rate": 0.0014167433831990795, "loss": 0.5136, "step": 101370 }, { "epoch": 29.165707710011507, "grad_norm": 0.7514458298683167, "learning_rate": 0.0014166858457997698, "loss": 0.5914, "step": 101380 }, { "epoch": 29.168584579976987, "grad_norm": 1.9993963241577148, "learning_rate": 0.0014166283084004604, "loss": 0.7751, "step": 101390 }, { "epoch": 29.171461449942463, "grad_norm": 1.435086965560913, "learning_rate": 0.0014165707710011508, "loss": 0.7164, "step": 101400 }, { "epoch": 29.17433831990794, "grad_norm": 1.5192283391952515, "learning_rate": 0.001416513233601841, "loss": 0.6486, "step": 101410 }, { "epoch": 29.17721518987342, "grad_norm": 1.8748127222061157, "learning_rate": 0.0014164556962025317, "loss": 0.8715, "step": 101420 }, { "epoch": 29.180092059838895, "grad_norm": 6.130177974700928, "learning_rate": 0.0014163981588032222, "loss": 0.5714, "step": 101430 }, { "epoch": 29.182968929804375, "grad_norm": 1.7997313737869263, "learning_rate": 0.0014163406214039126, "loss": 0.6262, "step": 101440 }, { "epoch": 29.18584579976985, "grad_norm": 1.734119176864624, "learning_rate": 0.0014162830840046031, "loss": 0.6461, "step": 101450 }, { "epoch": 29.188722669735327, "grad_norm": 2.1591365337371826, "learning_rate": 0.0014162255466052935, "loss": 0.7249, "step": 101460 }, { "epoch": 29.191599539700807, "grad_norm": 0.8381248712539673, "learning_rate": 0.0014161680092059838, "loss": 0.6164, "step": 101470 }, { "epoch": 29.194476409666283, "grad_norm": 0.987406313419342, "learning_rate": 0.0014161104718066744, "loss": 0.586, "step": 101480 }, { "epoch": 29.19735327963176, "grad_norm": 1.5774867534637451, "learning_rate": 0.0014160529344073647, "loss": 0.7744, "step": 101490 }, { "epoch": 29.20023014959724, "grad_norm": 0.9673576951026917, "learning_rate": 0.0014159953970080553, "loss": 0.6131, "step": 101500 }, { "epoch": 29.203107019562715, "grad_norm": 1.683430552482605, "learning_rate": 0.0014159378596087459, "loss": 0.5486, "step": 101510 }, { "epoch": 29.205983889528195, "grad_norm": 1.8609373569488525, "learning_rate": 0.001415880322209436, "loss": 0.6296, "step": 101520 }, { "epoch": 29.20886075949367, "grad_norm": 0.9143011569976807, "learning_rate": 0.0014158227848101266, "loss": 0.6246, "step": 101530 }, { "epoch": 29.211737629459147, "grad_norm": 1.4148997068405151, "learning_rate": 0.0014157652474108171, "loss": 0.6338, "step": 101540 }, { "epoch": 29.214614499424627, "grad_norm": 1.5749601125717163, "learning_rate": 0.0014157077100115075, "loss": 0.7324, "step": 101550 }, { "epoch": 29.217491369390103, "grad_norm": 1.0763589143753052, "learning_rate": 0.001415650172612198, "loss": 0.8866, "step": 101560 }, { "epoch": 29.22036823935558, "grad_norm": 1.8266140222549438, "learning_rate": 0.0014155926352128886, "loss": 0.6047, "step": 101570 }, { "epoch": 29.22324510932106, "grad_norm": 1.4779701232910156, "learning_rate": 0.0014155350978135787, "loss": 0.6358, "step": 101580 }, { "epoch": 29.226121979286535, "grad_norm": 1.912597894668579, "learning_rate": 0.0014154775604142693, "loss": 0.6205, "step": 101590 }, { "epoch": 29.228998849252015, "grad_norm": 1.4075040817260742, "learning_rate": 0.0014154200230149596, "loss": 0.7773, "step": 101600 }, { "epoch": 29.23187571921749, "grad_norm": 1.3911077976226807, "learning_rate": 0.0014153624856156502, "loss": 0.5994, "step": 101610 }, { "epoch": 29.234752589182968, "grad_norm": 1.645186185836792, "learning_rate": 0.0014153049482163408, "loss": 0.6552, "step": 101620 }, { "epoch": 29.237629459148447, "grad_norm": 1.5904442071914673, "learning_rate": 0.001415247410817031, "loss": 0.8016, "step": 101630 }, { "epoch": 29.240506329113924, "grad_norm": 1.1780363321304321, "learning_rate": 0.0014151898734177215, "loss": 0.5887, "step": 101640 }, { "epoch": 29.243383199079403, "grad_norm": 1.4327811002731323, "learning_rate": 0.001415132336018412, "loss": 0.6846, "step": 101650 }, { "epoch": 29.24626006904488, "grad_norm": 3.2582037448883057, "learning_rate": 0.0014150747986191024, "loss": 0.704, "step": 101660 }, { "epoch": 29.249136939010356, "grad_norm": 0.6826105117797852, "learning_rate": 0.001415017261219793, "loss": 0.6005, "step": 101670 }, { "epoch": 29.252013808975835, "grad_norm": 1.1391140222549438, "learning_rate": 0.0014149597238204835, "loss": 0.6045, "step": 101680 }, { "epoch": 29.25489067894131, "grad_norm": 1.3349610567092896, "learning_rate": 0.0014149021864211736, "loss": 0.6662, "step": 101690 }, { "epoch": 29.257767548906788, "grad_norm": 1.706800103187561, "learning_rate": 0.0014148446490218642, "loss": 0.7468, "step": 101700 }, { "epoch": 29.260644418872268, "grad_norm": 1.2601840496063232, "learning_rate": 0.0014147871116225545, "loss": 0.7014, "step": 101710 }, { "epoch": 29.263521288837744, "grad_norm": 1.468130350112915, "learning_rate": 0.0014147295742232451, "loss": 0.6107, "step": 101720 }, { "epoch": 29.266398158803224, "grad_norm": 1.1267656087875366, "learning_rate": 0.0014146720368239357, "loss": 0.5301, "step": 101730 }, { "epoch": 29.2692750287687, "grad_norm": 1.7980270385742188, "learning_rate": 0.001414614499424626, "loss": 0.6949, "step": 101740 }, { "epoch": 29.272151898734176, "grad_norm": 1.9951003789901733, "learning_rate": 0.0014145569620253164, "loss": 0.5743, "step": 101750 }, { "epoch": 29.275028768699656, "grad_norm": 1.3654179573059082, "learning_rate": 0.001414499424626007, "loss": 0.5912, "step": 101760 }, { "epoch": 29.277905638665132, "grad_norm": 0.9545527100563049, "learning_rate": 0.0014144418872266973, "loss": 0.6195, "step": 101770 }, { "epoch": 29.280782508630608, "grad_norm": 0.9498183727264404, "learning_rate": 0.0014143843498273878, "loss": 0.7273, "step": 101780 }, { "epoch": 29.283659378596088, "grad_norm": 1.039706826210022, "learning_rate": 0.0014143268124280784, "loss": 0.6916, "step": 101790 }, { "epoch": 29.286536248561564, "grad_norm": 1.4936597347259521, "learning_rate": 0.0014142692750287688, "loss": 0.8589, "step": 101800 }, { "epoch": 29.289413118527044, "grad_norm": 0.8871173858642578, "learning_rate": 0.001414211737629459, "loss": 0.6359, "step": 101810 }, { "epoch": 29.29228998849252, "grad_norm": 1.1176822185516357, "learning_rate": 0.0014141542002301497, "loss": 0.548, "step": 101820 }, { "epoch": 29.295166858457996, "grad_norm": 0.82255619764328, "learning_rate": 0.00141409666283084, "loss": 0.5842, "step": 101830 }, { "epoch": 29.298043728423476, "grad_norm": 1.2415317296981812, "learning_rate": 0.0014140391254315306, "loss": 0.5401, "step": 101840 }, { "epoch": 29.300920598388952, "grad_norm": 1.1338567733764648, "learning_rate": 0.001413981588032221, "loss": 0.554, "step": 101850 }, { "epoch": 29.303797468354432, "grad_norm": 1.029028058052063, "learning_rate": 0.0014139240506329115, "loss": 0.6219, "step": 101860 }, { "epoch": 29.306674338319908, "grad_norm": 0.7535187005996704, "learning_rate": 0.0014138665132336018, "loss": 0.7048, "step": 101870 }, { "epoch": 29.309551208285384, "grad_norm": 0.9894916415214539, "learning_rate": 0.0014138089758342922, "loss": 0.5873, "step": 101880 }, { "epoch": 29.312428078250864, "grad_norm": 1.3843494653701782, "learning_rate": 0.0014137514384349827, "loss": 0.6366, "step": 101890 }, { "epoch": 29.31530494821634, "grad_norm": 0.5728133320808411, "learning_rate": 0.0014136939010356733, "loss": 0.5083, "step": 101900 }, { "epoch": 29.318181818181817, "grad_norm": 1.7989261150360107, "learning_rate": 0.0014136363636363637, "loss": 0.6069, "step": 101910 }, { "epoch": 29.321058688147296, "grad_norm": 1.4440544843673706, "learning_rate": 0.0014135788262370542, "loss": 0.8898, "step": 101920 }, { "epoch": 29.323935558112773, "grad_norm": 0.7269273996353149, "learning_rate": 0.0014135212888377446, "loss": 0.6706, "step": 101930 }, { "epoch": 29.326812428078252, "grad_norm": 1.4041738510131836, "learning_rate": 0.001413463751438435, "loss": 0.792, "step": 101940 }, { "epoch": 29.32968929804373, "grad_norm": 1.7334424257278442, "learning_rate": 0.0014134062140391255, "loss": 0.7541, "step": 101950 }, { "epoch": 29.332566168009205, "grad_norm": 1.1866968870162964, "learning_rate": 0.0014133486766398158, "loss": 0.7785, "step": 101960 }, { "epoch": 29.335443037974684, "grad_norm": 1.673905372619629, "learning_rate": 0.0014132911392405064, "loss": 0.6266, "step": 101970 }, { "epoch": 29.33831990794016, "grad_norm": 1.3657995462417603, "learning_rate": 0.001413233601841197, "loss": 0.7649, "step": 101980 }, { "epoch": 29.34119677790564, "grad_norm": 0.9763387441635132, "learning_rate": 0.001413176064441887, "loss": 0.6437, "step": 101990 }, { "epoch": 29.344073647871117, "grad_norm": 0.9544954895973206, "learning_rate": 0.0014131185270425776, "loss": 0.6209, "step": 102000 }, { "epoch": 29.346950517836593, "grad_norm": 1.0781712532043457, "learning_rate": 0.0014130609896432682, "loss": 0.688, "step": 102010 }, { "epoch": 29.349827387802073, "grad_norm": 0.8140134811401367, "learning_rate": 0.0014130034522439586, "loss": 0.8437, "step": 102020 }, { "epoch": 29.35270425776755, "grad_norm": 0.9485437273979187, "learning_rate": 0.0014129459148446491, "loss": 0.6637, "step": 102030 }, { "epoch": 29.355581127733025, "grad_norm": 0.6843702793121338, "learning_rate": 0.0014128883774453397, "loss": 0.5278, "step": 102040 }, { "epoch": 29.358457997698505, "grad_norm": 1.2660363912582397, "learning_rate": 0.0014128308400460298, "loss": 0.5775, "step": 102050 }, { "epoch": 29.36133486766398, "grad_norm": 2.0864944458007812, "learning_rate": 0.0014127733026467204, "loss": 0.9703, "step": 102060 }, { "epoch": 29.36421173762946, "grad_norm": 1.9193015098571777, "learning_rate": 0.0014127157652474107, "loss": 0.5312, "step": 102070 }, { "epoch": 29.367088607594937, "grad_norm": 1.0823454856872559, "learning_rate": 0.0014126582278481013, "loss": 0.7724, "step": 102080 }, { "epoch": 29.369965477560413, "grad_norm": 1.7341408729553223, "learning_rate": 0.0014126006904487919, "loss": 0.8056, "step": 102090 }, { "epoch": 29.372842347525893, "grad_norm": 1.1073747873306274, "learning_rate": 0.0014125431530494822, "loss": 0.6791, "step": 102100 }, { "epoch": 29.37571921749137, "grad_norm": 1.3919519186019897, "learning_rate": 0.0014124856156501726, "loss": 0.7157, "step": 102110 }, { "epoch": 29.378596087456845, "grad_norm": 1.123836874961853, "learning_rate": 0.0014124280782508631, "loss": 0.522, "step": 102120 }, { "epoch": 29.381472957422325, "grad_norm": 0.9443336129188538, "learning_rate": 0.0014123705408515535, "loss": 0.4815, "step": 102130 }, { "epoch": 29.3843498273878, "grad_norm": 0.9623945951461792, "learning_rate": 0.001412313003452244, "loss": 0.5589, "step": 102140 }, { "epoch": 29.38722669735328, "grad_norm": 1.2326581478118896, "learning_rate": 0.0014122554660529346, "loss": 0.6221, "step": 102150 }, { "epoch": 29.390103567318757, "grad_norm": 1.4379703998565674, "learning_rate": 0.001412197928653625, "loss": 0.6157, "step": 102160 }, { "epoch": 29.392980437284233, "grad_norm": 1.0937573909759521, "learning_rate": 0.0014121403912543153, "loss": 0.6258, "step": 102170 }, { "epoch": 29.395857307249713, "grad_norm": 1.1370638608932495, "learning_rate": 0.0014120828538550056, "loss": 0.6437, "step": 102180 }, { "epoch": 29.39873417721519, "grad_norm": 0.5683709383010864, "learning_rate": 0.0014120253164556962, "loss": 0.5244, "step": 102190 }, { "epoch": 29.40161104718067, "grad_norm": 1.1824930906295776, "learning_rate": 0.0014119677790563868, "loss": 0.6317, "step": 102200 }, { "epoch": 29.404487917146145, "grad_norm": 0.894550085067749, "learning_rate": 0.001411910241657077, "loss": 0.7362, "step": 102210 }, { "epoch": 29.40736478711162, "grad_norm": 0.7107440233230591, "learning_rate": 0.0014118527042577677, "loss": 0.5992, "step": 102220 }, { "epoch": 29.4102416570771, "grad_norm": 1.5017861127853394, "learning_rate": 0.001411795166858458, "loss": 0.6745, "step": 102230 }, { "epoch": 29.413118527042577, "grad_norm": 3.329719066619873, "learning_rate": 0.0014117376294591484, "loss": 0.6774, "step": 102240 }, { "epoch": 29.415995397008054, "grad_norm": 1.3242161273956299, "learning_rate": 0.001411680092059839, "loss": 0.6268, "step": 102250 }, { "epoch": 29.418872266973533, "grad_norm": 0.6188547015190125, "learning_rate": 0.0014116225546605295, "loss": 0.6666, "step": 102260 }, { "epoch": 29.42174913693901, "grad_norm": 1.4330087900161743, "learning_rate": 0.0014115650172612198, "loss": 0.7044, "step": 102270 }, { "epoch": 29.42462600690449, "grad_norm": 1.8098827600479126, "learning_rate": 0.0014115074798619104, "loss": 0.7057, "step": 102280 }, { "epoch": 29.427502876869966, "grad_norm": 1.0129979848861694, "learning_rate": 0.0014114499424626005, "loss": 0.7346, "step": 102290 }, { "epoch": 29.430379746835442, "grad_norm": 1.3757679462432861, "learning_rate": 0.001411392405063291, "loss": 0.6949, "step": 102300 }, { "epoch": 29.43325661680092, "grad_norm": 1.4870319366455078, "learning_rate": 0.0014113348676639817, "loss": 0.6868, "step": 102310 }, { "epoch": 29.436133486766398, "grad_norm": 0.9549956321716309, "learning_rate": 0.001411277330264672, "loss": 0.6154, "step": 102320 }, { "epoch": 29.439010356731877, "grad_norm": 1.958848237991333, "learning_rate": 0.0014112197928653626, "loss": 0.7252, "step": 102330 }, { "epoch": 29.441887226697354, "grad_norm": 2.1794376373291016, "learning_rate": 0.0014111622554660531, "loss": 0.7431, "step": 102340 }, { "epoch": 29.44476409666283, "grad_norm": 0.829032301902771, "learning_rate": 0.0014111047180667433, "loss": 0.5858, "step": 102350 }, { "epoch": 29.44764096662831, "grad_norm": 2.2501444816589355, "learning_rate": 0.0014110471806674338, "loss": 0.8691, "step": 102360 }, { "epoch": 29.450517836593786, "grad_norm": 1.858372688293457, "learning_rate": 0.0014109896432681244, "loss": 0.6287, "step": 102370 }, { "epoch": 29.453394706559262, "grad_norm": 1.0011438131332397, "learning_rate": 0.0014109321058688147, "loss": 0.6427, "step": 102380 }, { "epoch": 29.456271576524742, "grad_norm": 1.774553656578064, "learning_rate": 0.0014108745684695053, "loss": 0.7879, "step": 102390 }, { "epoch": 29.459148446490218, "grad_norm": 1.2089511156082153, "learning_rate": 0.0014108170310701959, "loss": 0.6721, "step": 102400 }, { "epoch": 29.462025316455698, "grad_norm": 0.9296855926513672, "learning_rate": 0.001410759493670886, "loss": 0.593, "step": 102410 }, { "epoch": 29.464902186421174, "grad_norm": 1.6915158033370972, "learning_rate": 0.0014107019562715766, "loss": 0.5347, "step": 102420 }, { "epoch": 29.46777905638665, "grad_norm": 0.8972437381744385, "learning_rate": 0.001410644418872267, "loss": 0.6334, "step": 102430 }, { "epoch": 29.47065592635213, "grad_norm": 1.554282784461975, "learning_rate": 0.0014105868814729575, "loss": 0.7178, "step": 102440 }, { "epoch": 29.473532796317606, "grad_norm": 1.2705851793289185, "learning_rate": 0.001410529344073648, "loss": 0.5868, "step": 102450 }, { "epoch": 29.476409666283082, "grad_norm": 2.472536563873291, "learning_rate": 0.0014104718066743382, "loss": 0.6682, "step": 102460 }, { "epoch": 29.479286536248562, "grad_norm": 0.689852237701416, "learning_rate": 0.0014104142692750287, "loss": 0.6512, "step": 102470 }, { "epoch": 29.48216340621404, "grad_norm": 0.9764667749404907, "learning_rate": 0.0014103567318757193, "loss": 0.6849, "step": 102480 }, { "epoch": 29.485040276179518, "grad_norm": 1.3013029098510742, "learning_rate": 0.0014102991944764096, "loss": 0.7354, "step": 102490 }, { "epoch": 29.487917146144994, "grad_norm": 1.2575443983078003, "learning_rate": 0.0014102416570771002, "loss": 0.7614, "step": 102500 }, { "epoch": 29.49079401611047, "grad_norm": 1.5587862730026245, "learning_rate": 0.0014101841196777908, "loss": 0.8031, "step": 102510 }, { "epoch": 29.49367088607595, "grad_norm": 0.9335415363311768, "learning_rate": 0.001410126582278481, "loss": 0.6264, "step": 102520 }, { "epoch": 29.496547756041426, "grad_norm": 1.2616456747055054, "learning_rate": 0.0014100690448791715, "loss": 0.8137, "step": 102530 }, { "epoch": 29.499424626006906, "grad_norm": 1.0847829580307007, "learning_rate": 0.0014100115074798618, "loss": 0.6598, "step": 102540 }, { "epoch": 29.502301495972382, "grad_norm": 2.34565806388855, "learning_rate": 0.0014099539700805524, "loss": 0.605, "step": 102550 }, { "epoch": 29.50517836593786, "grad_norm": 1.0785682201385498, "learning_rate": 0.001409896432681243, "loss": 0.6511, "step": 102560 }, { "epoch": 29.50805523590334, "grad_norm": 1.2205275297164917, "learning_rate": 0.0014098388952819333, "loss": 0.7109, "step": 102570 }, { "epoch": 29.510932105868815, "grad_norm": 0.694186270236969, "learning_rate": 0.0014097813578826236, "loss": 0.7234, "step": 102580 }, { "epoch": 29.51380897583429, "grad_norm": 1.8232518434524536, "learning_rate": 0.0014097238204833142, "loss": 0.6826, "step": 102590 }, { "epoch": 29.51668584579977, "grad_norm": 0.7252364158630371, "learning_rate": 0.0014096662830840045, "loss": 0.6809, "step": 102600 }, { "epoch": 29.519562715765247, "grad_norm": 2.062366247177124, "learning_rate": 0.0014096087456846951, "loss": 0.594, "step": 102610 }, { "epoch": 29.522439585730726, "grad_norm": 0.9549062252044678, "learning_rate": 0.0014095512082853857, "loss": 0.5929, "step": 102620 }, { "epoch": 29.525316455696203, "grad_norm": 1.2843080759048462, "learning_rate": 0.001409493670886076, "loss": 0.7271, "step": 102630 }, { "epoch": 29.52819332566168, "grad_norm": 1.650425672531128, "learning_rate": 0.0014094361334867664, "loss": 0.7634, "step": 102640 }, { "epoch": 29.53107019562716, "grad_norm": 1.4066206216812134, "learning_rate": 0.0014093785960874567, "loss": 0.6436, "step": 102650 }, { "epoch": 29.533947065592635, "grad_norm": 1.3518972396850586, "learning_rate": 0.0014093210586881473, "loss": 0.6586, "step": 102660 }, { "epoch": 29.53682393555811, "grad_norm": 1.9540150165557861, "learning_rate": 0.0014092635212888378, "loss": 0.638, "step": 102670 }, { "epoch": 29.53970080552359, "grad_norm": 1.242581844329834, "learning_rate": 0.0014092059838895282, "loss": 0.6926, "step": 102680 }, { "epoch": 29.542577675489067, "grad_norm": 0.992975652217865, "learning_rate": 0.0014091484464902188, "loss": 0.5859, "step": 102690 }, { "epoch": 29.545454545454547, "grad_norm": 0.7278484106063843, "learning_rate": 0.001409090909090909, "loss": 0.6435, "step": 102700 }, { "epoch": 29.548331415420023, "grad_norm": 1.3602235317230225, "learning_rate": 0.0014090333716915994, "loss": 0.6608, "step": 102710 }, { "epoch": 29.5512082853855, "grad_norm": 1.2170883417129517, "learning_rate": 0.00140897583429229, "loss": 0.592, "step": 102720 }, { "epoch": 29.55408515535098, "grad_norm": 1.141194224357605, "learning_rate": 0.0014089182968929806, "loss": 0.6585, "step": 102730 }, { "epoch": 29.556962025316455, "grad_norm": 1.8446213006973267, "learning_rate": 0.001408860759493671, "loss": 0.6788, "step": 102740 }, { "epoch": 29.559838895281935, "grad_norm": 0.8711302280426025, "learning_rate": 0.0014088032220943615, "loss": 0.5284, "step": 102750 }, { "epoch": 29.56271576524741, "grad_norm": 0.9486345052719116, "learning_rate": 0.0014087456846950516, "loss": 0.5616, "step": 102760 }, { "epoch": 29.565592635212887, "grad_norm": 1.6049939393997192, "learning_rate": 0.0014086881472957422, "loss": 0.6472, "step": 102770 }, { "epoch": 29.568469505178367, "grad_norm": 1.3511027097702026, "learning_rate": 0.0014086306098964327, "loss": 0.7166, "step": 102780 }, { "epoch": 29.571346375143843, "grad_norm": 2.1923134326934814, "learning_rate": 0.001408573072497123, "loss": 0.681, "step": 102790 }, { "epoch": 29.57422324510932, "grad_norm": 2.0399508476257324, "learning_rate": 0.0014085155350978137, "loss": 0.8012, "step": 102800 }, { "epoch": 29.5771001150748, "grad_norm": 3.329493761062622, "learning_rate": 0.0014084579976985042, "loss": 0.7062, "step": 102810 }, { "epoch": 29.579976985040275, "grad_norm": 4.1685357093811035, "learning_rate": 0.0014084004602991943, "loss": 0.6767, "step": 102820 }, { "epoch": 29.582853855005755, "grad_norm": 1.0117976665496826, "learning_rate": 0.001408342922899885, "loss": 0.6338, "step": 102830 }, { "epoch": 29.58573072497123, "grad_norm": 1.00786554813385, "learning_rate": 0.0014082853855005755, "loss": 0.7586, "step": 102840 }, { "epoch": 29.588607594936708, "grad_norm": 1.0550901889801025, "learning_rate": 0.0014082278481012658, "loss": 0.8471, "step": 102850 }, { "epoch": 29.591484464902187, "grad_norm": 1.0196149349212646, "learning_rate": 0.0014081703107019564, "loss": 0.7127, "step": 102860 }, { "epoch": 29.594361334867664, "grad_norm": 1.0245015621185303, "learning_rate": 0.0014081127733026467, "loss": 0.5256, "step": 102870 }, { "epoch": 29.59723820483314, "grad_norm": 1.5059170722961426, "learning_rate": 0.001408055235903337, "loss": 0.7686, "step": 102880 }, { "epoch": 29.60011507479862, "grad_norm": 1.4040333032608032, "learning_rate": 0.0014079976985040276, "loss": 0.7169, "step": 102890 }, { "epoch": 29.602991944764096, "grad_norm": 1.7458544969558716, "learning_rate": 0.001407940161104718, "loss": 0.7262, "step": 102900 }, { "epoch": 29.605868814729575, "grad_norm": 0.9836497902870178, "learning_rate": 0.0014078826237054086, "loss": 0.575, "step": 102910 }, { "epoch": 29.60874568469505, "grad_norm": 1.2542535066604614, "learning_rate": 0.0014078250863060991, "loss": 0.7543, "step": 102920 }, { "epoch": 29.611622554660528, "grad_norm": 1.280691146850586, "learning_rate": 0.0014077675489067895, "loss": 0.6647, "step": 102930 }, { "epoch": 29.614499424626008, "grad_norm": 1.3115777969360352, "learning_rate": 0.0014077100115074798, "loss": 0.5671, "step": 102940 }, { "epoch": 29.617376294591484, "grad_norm": 1.9401164054870605, "learning_rate": 0.0014076524741081704, "loss": 0.8155, "step": 102950 }, { "epoch": 29.620253164556964, "grad_norm": 1.2421207427978516, "learning_rate": 0.0014075949367088607, "loss": 0.6426, "step": 102960 }, { "epoch": 29.62313003452244, "grad_norm": 1.9226263761520386, "learning_rate": 0.0014075373993095513, "loss": 0.6422, "step": 102970 }, { "epoch": 29.626006904487916, "grad_norm": 0.7684628367424011, "learning_rate": 0.0014074798619102416, "loss": 0.633, "step": 102980 }, { "epoch": 29.628883774453396, "grad_norm": 2.040092945098877, "learning_rate": 0.0014074223245109322, "loss": 0.6771, "step": 102990 }, { "epoch": 29.631760644418872, "grad_norm": 1.0682337284088135, "learning_rate": 0.0014073647871116225, "loss": 0.5785, "step": 103000 }, { "epoch": 29.634637514384348, "grad_norm": 2.1568963527679443, "learning_rate": 0.001407307249712313, "loss": 0.7779, "step": 103010 }, { "epoch": 29.637514384349828, "grad_norm": 0.5126205682754517, "learning_rate": 0.0014072497123130035, "loss": 0.6401, "step": 103020 }, { "epoch": 29.640391254315304, "grad_norm": 1.450819969177246, "learning_rate": 0.001407192174913694, "loss": 0.5641, "step": 103030 }, { "epoch": 29.643268124280784, "grad_norm": 1.6318248510360718, "learning_rate": 0.0014071346375143844, "loss": 0.7032, "step": 103040 }, { "epoch": 29.64614499424626, "grad_norm": 2.190868616104126, "learning_rate": 0.001407077100115075, "loss": 0.7062, "step": 103050 }, { "epoch": 29.649021864211736, "grad_norm": 2.2027101516723633, "learning_rate": 0.0014070195627157653, "loss": 0.6511, "step": 103060 }, { "epoch": 29.651898734177216, "grad_norm": 1.1428941488265991, "learning_rate": 0.0014069620253164556, "loss": 0.7565, "step": 103070 }, { "epoch": 29.654775604142692, "grad_norm": 1.6729226112365723, "learning_rate": 0.0014069044879171462, "loss": 0.7599, "step": 103080 }, { "epoch": 29.657652474108172, "grad_norm": 2.34889554977417, "learning_rate": 0.0014068469505178368, "loss": 0.7532, "step": 103090 }, { "epoch": 29.660529344073648, "grad_norm": 1.0092406272888184, "learning_rate": 0.001406789413118527, "loss": 0.7964, "step": 103100 }, { "epoch": 29.663406214039124, "grad_norm": 0.9693057537078857, "learning_rate": 0.0014067318757192177, "loss": 0.5366, "step": 103110 }, { "epoch": 29.666283084004604, "grad_norm": 0.7252638339996338, "learning_rate": 0.0014066743383199078, "loss": 0.7212, "step": 103120 }, { "epoch": 29.66915995397008, "grad_norm": 0.8751195669174194, "learning_rate": 0.0014066168009205984, "loss": 0.5317, "step": 103130 }, { "epoch": 29.672036823935557, "grad_norm": 0.9017048478126526, "learning_rate": 0.001406559263521289, "loss": 0.6906, "step": 103140 }, { "epoch": 29.674913693901036, "grad_norm": 2.8088393211364746, "learning_rate": 0.0014065017261219793, "loss": 0.6224, "step": 103150 }, { "epoch": 29.677790563866512, "grad_norm": 0.839067816734314, "learning_rate": 0.0014064441887226698, "loss": 0.7191, "step": 103160 }, { "epoch": 29.680667433831992, "grad_norm": 1.1216331720352173, "learning_rate": 0.0014063866513233604, "loss": 0.5945, "step": 103170 }, { "epoch": 29.68354430379747, "grad_norm": 2.6986496448516846, "learning_rate": 0.0014063291139240505, "loss": 0.667, "step": 103180 }, { "epoch": 29.686421173762945, "grad_norm": 1.3928067684173584, "learning_rate": 0.001406271576524741, "loss": 0.6726, "step": 103190 }, { "epoch": 29.689298043728424, "grad_norm": 0.7573829889297485, "learning_rate": 0.0014062140391254317, "loss": 0.6905, "step": 103200 }, { "epoch": 29.6921749136939, "grad_norm": 1.351508378982544, "learning_rate": 0.001406156501726122, "loss": 0.6448, "step": 103210 }, { "epoch": 29.69505178365938, "grad_norm": 0.6078707575798035, "learning_rate": 0.0014060989643268126, "loss": 0.5886, "step": 103220 }, { "epoch": 29.697928653624857, "grad_norm": 1.1279730796813965, "learning_rate": 0.001406041426927503, "loss": 0.7491, "step": 103230 }, { "epoch": 29.700805523590333, "grad_norm": 0.8248838186264038, "learning_rate": 0.0014059838895281933, "loss": 0.6811, "step": 103240 }, { "epoch": 29.703682393555813, "grad_norm": 3.2106475830078125, "learning_rate": 0.0014059263521288838, "loss": 0.6275, "step": 103250 }, { "epoch": 29.70655926352129, "grad_norm": 2.1951141357421875, "learning_rate": 0.0014058688147295742, "loss": 0.6098, "step": 103260 }, { "epoch": 29.709436133486765, "grad_norm": 1.8759572505950928, "learning_rate": 0.0014058112773302647, "loss": 0.7028, "step": 103270 }, { "epoch": 29.712313003452245, "grad_norm": 1.4884990453720093, "learning_rate": 0.0014057537399309553, "loss": 0.7055, "step": 103280 }, { "epoch": 29.71518987341772, "grad_norm": 1.3062108755111694, "learning_rate": 0.0014056962025316454, "loss": 0.8374, "step": 103290 }, { "epoch": 29.7180667433832, "grad_norm": 0.9498335123062134, "learning_rate": 0.001405638665132336, "loss": 0.4706, "step": 103300 }, { "epoch": 29.720943613348677, "grad_norm": 1.0970065593719482, "learning_rate": 0.0014055811277330266, "loss": 0.7771, "step": 103310 }, { "epoch": 29.723820483314153, "grad_norm": 1.1621938943862915, "learning_rate": 0.001405523590333717, "loss": 0.746, "step": 103320 }, { "epoch": 29.726697353279633, "grad_norm": 1.0187513828277588, "learning_rate": 0.0014054660529344075, "loss": 0.5976, "step": 103330 }, { "epoch": 29.72957422324511, "grad_norm": 1.1839994192123413, "learning_rate": 0.0014054085155350978, "loss": 0.6144, "step": 103340 }, { "epoch": 29.732451093210585, "grad_norm": 1.2777172327041626, "learning_rate": 0.0014053509781357882, "loss": 0.5745, "step": 103350 }, { "epoch": 29.735327963176065, "grad_norm": 2.0766618251800537, "learning_rate": 0.0014052934407364787, "loss": 0.7003, "step": 103360 }, { "epoch": 29.73820483314154, "grad_norm": 1.2374342679977417, "learning_rate": 0.001405235903337169, "loss": 0.7063, "step": 103370 }, { "epoch": 29.74108170310702, "grad_norm": 2.308096170425415, "learning_rate": 0.0014051783659378596, "loss": 0.7347, "step": 103380 }, { "epoch": 29.743958573072497, "grad_norm": 1.673753023147583, "learning_rate": 0.0014051208285385502, "loss": 0.6349, "step": 103390 }, { "epoch": 29.746835443037973, "grad_norm": 0.6784268617630005, "learning_rate": 0.0014050632911392406, "loss": 0.5917, "step": 103400 }, { "epoch": 29.749712313003453, "grad_norm": 0.887353777885437, "learning_rate": 0.001405005753739931, "loss": 0.6036, "step": 103410 }, { "epoch": 29.75258918296893, "grad_norm": 1.7355868816375732, "learning_rate": 0.0014049482163406215, "loss": 0.7573, "step": 103420 }, { "epoch": 29.75546605293441, "grad_norm": 0.9217425584793091, "learning_rate": 0.0014048906789413118, "loss": 0.715, "step": 103430 }, { "epoch": 29.758342922899885, "grad_norm": 1.207849144935608, "learning_rate": 0.0014048331415420024, "loss": 0.7019, "step": 103440 }, { "epoch": 29.76121979286536, "grad_norm": 1.386540412902832, "learning_rate": 0.0014047756041426927, "loss": 0.7165, "step": 103450 }, { "epoch": 29.76409666283084, "grad_norm": 0.8765902519226074, "learning_rate": 0.0014047180667433833, "loss": 0.6529, "step": 103460 }, { "epoch": 29.766973532796317, "grad_norm": 1.4071589708328247, "learning_rate": 0.0014046605293440736, "loss": 0.7676, "step": 103470 }, { "epoch": 29.769850402761794, "grad_norm": 0.9965376257896423, "learning_rate": 0.001404602991944764, "loss": 0.7716, "step": 103480 }, { "epoch": 29.772727272727273, "grad_norm": 0.9863953590393066, "learning_rate": 0.0014045454545454545, "loss": 0.6661, "step": 103490 }, { "epoch": 29.77560414269275, "grad_norm": 0.9610451459884644, "learning_rate": 0.001404487917146145, "loss": 0.5524, "step": 103500 }, { "epoch": 29.77848101265823, "grad_norm": 1.314001441001892, "learning_rate": 0.0014044303797468355, "loss": 0.846, "step": 103510 }, { "epoch": 29.781357882623706, "grad_norm": 1.1892303228378296, "learning_rate": 0.001404372842347526, "loss": 0.6925, "step": 103520 }, { "epoch": 29.78423475258918, "grad_norm": 1.7055566310882568, "learning_rate": 0.0014043153049482164, "loss": 0.5578, "step": 103530 }, { "epoch": 29.78711162255466, "grad_norm": 1.5727341175079346, "learning_rate": 0.0014042577675489067, "loss": 0.8241, "step": 103540 }, { "epoch": 29.789988492520138, "grad_norm": 1.404987096786499, "learning_rate": 0.0014042002301495973, "loss": 0.5722, "step": 103550 }, { "epoch": 29.792865362485614, "grad_norm": 1.7869970798492432, "learning_rate": 0.0014041426927502876, "loss": 0.732, "step": 103560 }, { "epoch": 29.795742232451094, "grad_norm": 0.6791825294494629, "learning_rate": 0.0014040851553509782, "loss": 0.6961, "step": 103570 }, { "epoch": 29.79861910241657, "grad_norm": 1.3513449430465698, "learning_rate": 0.0014040276179516688, "loss": 0.861, "step": 103580 }, { "epoch": 29.80149597238205, "grad_norm": 1.2506356239318848, "learning_rate": 0.0014039700805523589, "loss": 0.7987, "step": 103590 }, { "epoch": 29.804372842347526, "grad_norm": 1.0892082452774048, "learning_rate": 0.0014039125431530494, "loss": 0.5554, "step": 103600 }, { "epoch": 29.807249712313002, "grad_norm": 1.439324975013733, "learning_rate": 0.00140385500575374, "loss": 0.5957, "step": 103610 }, { "epoch": 29.810126582278482, "grad_norm": 0.8582077622413635, "learning_rate": 0.0014037974683544304, "loss": 0.6117, "step": 103620 }, { "epoch": 29.813003452243958, "grad_norm": 0.5965712666511536, "learning_rate": 0.001403739930955121, "loss": 0.6463, "step": 103630 }, { "epoch": 29.815880322209438, "grad_norm": 1.4155722856521606, "learning_rate": 0.0014036823935558115, "loss": 0.6861, "step": 103640 }, { "epoch": 29.818757192174914, "grad_norm": 1.3571574687957764, "learning_rate": 0.0014036248561565016, "loss": 0.6965, "step": 103650 }, { "epoch": 29.82163406214039, "grad_norm": 0.8144711852073669, "learning_rate": 0.0014035673187571922, "loss": 0.588, "step": 103660 }, { "epoch": 29.82451093210587, "grad_norm": 1.469348430633545, "learning_rate": 0.0014035097813578825, "loss": 0.6951, "step": 103670 }, { "epoch": 29.827387802071346, "grad_norm": 1.6614344120025635, "learning_rate": 0.001403452243958573, "loss": 0.8456, "step": 103680 }, { "epoch": 29.830264672036822, "grad_norm": 2.1955959796905518, "learning_rate": 0.0014033947065592637, "loss": 0.6979, "step": 103690 }, { "epoch": 29.833141542002302, "grad_norm": 0.9049367308616638, "learning_rate": 0.001403337169159954, "loss": 0.6779, "step": 103700 }, { "epoch": 29.83601841196778, "grad_norm": 1.9127146005630493, "learning_rate": 0.0014032796317606443, "loss": 1.0781, "step": 103710 }, { "epoch": 29.838895281933258, "grad_norm": 1.1023449897766113, "learning_rate": 0.001403222094361335, "loss": 0.5668, "step": 103720 }, { "epoch": 29.841772151898734, "grad_norm": 1.147041916847229, "learning_rate": 0.0014031645569620253, "loss": 0.6803, "step": 103730 }, { "epoch": 29.84464902186421, "grad_norm": 0.9886897206306458, "learning_rate": 0.0014031070195627158, "loss": 0.603, "step": 103740 }, { "epoch": 29.84752589182969, "grad_norm": 0.713692843914032, "learning_rate": 0.0014030494821634064, "loss": 0.6932, "step": 103750 }, { "epoch": 29.850402761795166, "grad_norm": 1.1295673847198486, "learning_rate": 0.0014029919447640967, "loss": 0.7665, "step": 103760 }, { "epoch": 29.853279631760643, "grad_norm": 1.1276624202728271, "learning_rate": 0.001402934407364787, "loss": 0.6704, "step": 103770 }, { "epoch": 29.856156501726122, "grad_norm": 1.0284632444381714, "learning_rate": 0.0014028768699654776, "loss": 0.6703, "step": 103780 }, { "epoch": 29.8590333716916, "grad_norm": 0.7449131608009338, "learning_rate": 0.001402819332566168, "loss": 0.5704, "step": 103790 }, { "epoch": 29.86191024165708, "grad_norm": 1.2006138563156128, "learning_rate": 0.0014027617951668586, "loss": 0.6861, "step": 103800 }, { "epoch": 29.864787111622555, "grad_norm": 0.8980486392974854, "learning_rate": 0.001402704257767549, "loss": 0.8438, "step": 103810 }, { "epoch": 29.86766398158803, "grad_norm": 1.4875537157058716, "learning_rate": 0.0014026467203682395, "loss": 0.7255, "step": 103820 }, { "epoch": 29.87054085155351, "grad_norm": 1.0727876424789429, "learning_rate": 0.0014025891829689298, "loss": 0.6749, "step": 103830 }, { "epoch": 29.873417721518987, "grad_norm": 0.8413900136947632, "learning_rate": 0.0014025316455696202, "loss": 0.6255, "step": 103840 }, { "epoch": 29.876294591484466, "grad_norm": 1.2961784601211548, "learning_rate": 0.0014024741081703107, "loss": 0.708, "step": 103850 }, { "epoch": 29.879171461449943, "grad_norm": 3.418252468109131, "learning_rate": 0.0014024165707710013, "loss": 0.7769, "step": 103860 }, { "epoch": 29.88204833141542, "grad_norm": 1.5740528106689453, "learning_rate": 0.0014023590333716916, "loss": 0.7272, "step": 103870 }, { "epoch": 29.8849252013809, "grad_norm": 1.6542108058929443, "learning_rate": 0.0014023014959723822, "loss": 0.6599, "step": 103880 }, { "epoch": 29.887802071346375, "grad_norm": 2.0476911067962646, "learning_rate": 0.0014022439585730725, "loss": 0.634, "step": 103890 }, { "epoch": 29.89067894131185, "grad_norm": 2.7061767578125, "learning_rate": 0.001402186421173763, "loss": 0.7298, "step": 103900 }, { "epoch": 29.89355581127733, "grad_norm": 0.9933760166168213, "learning_rate": 0.0014021288837744535, "loss": 0.6083, "step": 103910 }, { "epoch": 29.896432681242807, "grad_norm": 1.061134696006775, "learning_rate": 0.0014020713463751438, "loss": 0.583, "step": 103920 }, { "epoch": 29.899309551208287, "grad_norm": 1.2708122730255127, "learning_rate": 0.0014020138089758344, "loss": 0.742, "step": 103930 }, { "epoch": 29.902186421173763, "grad_norm": 1.1070663928985596, "learning_rate": 0.001401956271576525, "loss": 0.6623, "step": 103940 }, { "epoch": 29.90506329113924, "grad_norm": 1.0425273180007935, "learning_rate": 0.001401898734177215, "loss": 0.7779, "step": 103950 }, { "epoch": 29.90794016110472, "grad_norm": 1.533319115638733, "learning_rate": 0.0014018411967779056, "loss": 0.658, "step": 103960 }, { "epoch": 29.910817031070195, "grad_norm": 1.3798561096191406, "learning_rate": 0.0014017836593785962, "loss": 0.7848, "step": 103970 }, { "epoch": 29.913693901035675, "grad_norm": 1.9489773511886597, "learning_rate": 0.0014017261219792865, "loss": 0.7643, "step": 103980 }, { "epoch": 29.91657077100115, "grad_norm": 1.2546488046646118, "learning_rate": 0.001401668584579977, "loss": 0.722, "step": 103990 }, { "epoch": 29.919447640966627, "grad_norm": 1.7973389625549316, "learning_rate": 0.0014016110471806677, "loss": 0.6756, "step": 104000 }, { "epoch": 29.922324510932107, "grad_norm": 1.4741578102111816, "learning_rate": 0.0014015535097813578, "loss": 0.7667, "step": 104010 }, { "epoch": 29.925201380897583, "grad_norm": 1.5812664031982422, "learning_rate": 0.0014014959723820484, "loss": 0.8625, "step": 104020 }, { "epoch": 29.92807825086306, "grad_norm": 4.012279510498047, "learning_rate": 0.0014014384349827387, "loss": 0.815, "step": 104030 }, { "epoch": 29.93095512082854, "grad_norm": 2.1213326454162598, "learning_rate": 0.0014013808975834293, "loss": 0.6652, "step": 104040 }, { "epoch": 29.933831990794015, "grad_norm": 1.6420308351516724, "learning_rate": 0.0014013233601841198, "loss": 0.7179, "step": 104050 }, { "epoch": 29.936708860759495, "grad_norm": 1.520243525505066, "learning_rate": 0.0014012658227848102, "loss": 0.6979, "step": 104060 }, { "epoch": 29.93958573072497, "grad_norm": 1.5074349641799927, "learning_rate": 0.0014012082853855005, "loss": 0.8723, "step": 104070 }, { "epoch": 29.942462600690448, "grad_norm": 1.0100977420806885, "learning_rate": 0.001401150747986191, "loss": 0.5984, "step": 104080 }, { "epoch": 29.945339470655927, "grad_norm": 0.7665450572967529, "learning_rate": 0.0014010932105868814, "loss": 0.6391, "step": 104090 }, { "epoch": 29.948216340621403, "grad_norm": 1.1759274005889893, "learning_rate": 0.001401035673187572, "loss": 0.6248, "step": 104100 }, { "epoch": 29.951093210586883, "grad_norm": 2.704063653945923, "learning_rate": 0.0014009781357882626, "loss": 0.665, "step": 104110 }, { "epoch": 29.95397008055236, "grad_norm": 1.1445673704147339, "learning_rate": 0.0014009205983889527, "loss": 0.7507, "step": 104120 }, { "epoch": 29.956846950517836, "grad_norm": 1.6145042181015015, "learning_rate": 0.0014008630609896433, "loss": 0.57, "step": 104130 }, { "epoch": 29.959723820483315, "grad_norm": 1.5113459825515747, "learning_rate": 0.0014008055235903336, "loss": 0.7435, "step": 104140 }, { "epoch": 29.96260069044879, "grad_norm": 1.0925884246826172, "learning_rate": 0.0014007479861910242, "loss": 0.632, "step": 104150 }, { "epoch": 29.965477560414268, "grad_norm": 1.140398621559143, "learning_rate": 0.0014006904487917147, "loss": 0.5266, "step": 104160 }, { "epoch": 29.968354430379748, "grad_norm": 1.0053884983062744, "learning_rate": 0.001400632911392405, "loss": 0.7638, "step": 104170 }, { "epoch": 29.971231300345224, "grad_norm": 1.2955517768859863, "learning_rate": 0.0014005753739930954, "loss": 0.5675, "step": 104180 }, { "epoch": 29.974108170310704, "grad_norm": 1.1905189752578735, "learning_rate": 0.001400517836593786, "loss": 0.6833, "step": 104190 }, { "epoch": 29.97698504027618, "grad_norm": 1.4586689472198486, "learning_rate": 0.0014004602991944763, "loss": 0.6024, "step": 104200 }, { "epoch": 29.979861910241656, "grad_norm": 1.9902534484863281, "learning_rate": 0.001400402761795167, "loss": 0.9053, "step": 104210 }, { "epoch": 29.982738780207136, "grad_norm": 1.2156237363815308, "learning_rate": 0.0014003452243958575, "loss": 0.6836, "step": 104220 }, { "epoch": 29.985615650172612, "grad_norm": 1.3584387302398682, "learning_rate": 0.0014002876869965478, "loss": 0.6222, "step": 104230 }, { "epoch": 29.988492520138088, "grad_norm": 0.8358542919158936, "learning_rate": 0.0014002301495972382, "loss": 0.7187, "step": 104240 }, { "epoch": 29.991369390103568, "grad_norm": 1.6801257133483887, "learning_rate": 0.0014001726121979285, "loss": 0.6835, "step": 104250 }, { "epoch": 29.994246260069044, "grad_norm": 1.4218419790267944, "learning_rate": 0.001400115074798619, "loss": 0.8009, "step": 104260 }, { "epoch": 29.997123130034524, "grad_norm": 1.648660659790039, "learning_rate": 0.0014000575373993096, "loss": 0.6036, "step": 104270 }, { "epoch": 30.0, "grad_norm": 1.3104642629623413, "learning_rate": 0.0014, "loss": 0.5979, "step": 104280 }, { "epoch": 30.002876869965476, "grad_norm": 1.3065669536590576, "learning_rate": 0.0013999424626006905, "loss": 0.7157, "step": 104290 }, { "epoch": 30.005753739930956, "grad_norm": 1.797942042350769, "learning_rate": 0.001399884925201381, "loss": 0.7445, "step": 104300 }, { "epoch": 30.008630609896432, "grad_norm": 1.6232579946517944, "learning_rate": 0.0013998273878020712, "loss": 0.7403, "step": 104310 }, { "epoch": 30.011507479861912, "grad_norm": 1.3107235431671143, "learning_rate": 0.0013997698504027618, "loss": 0.5205, "step": 104320 }, { "epoch": 30.014384349827388, "grad_norm": 1.7717052698135376, "learning_rate": 0.0013997123130034524, "loss": 0.5746, "step": 104330 }, { "epoch": 30.017261219792864, "grad_norm": 1.837931513786316, "learning_rate": 0.0013996547756041427, "loss": 0.7275, "step": 104340 }, { "epoch": 30.020138089758344, "grad_norm": 1.977535367012024, "learning_rate": 0.0013995972382048333, "loss": 0.7161, "step": 104350 }, { "epoch": 30.02301495972382, "grad_norm": 0.7147780656814575, "learning_rate": 0.0013995397008055236, "loss": 0.611, "step": 104360 }, { "epoch": 30.025891829689296, "grad_norm": 1.3018670082092285, "learning_rate": 0.001399482163406214, "loss": 0.4784, "step": 104370 }, { "epoch": 30.028768699654776, "grad_norm": 1.4512741565704346, "learning_rate": 0.0013994246260069045, "loss": 0.8759, "step": 104380 }, { "epoch": 30.031645569620252, "grad_norm": 0.6653361916542053, "learning_rate": 0.0013993670886075949, "loss": 0.4596, "step": 104390 }, { "epoch": 30.034522439585732, "grad_norm": 0.7998755574226379, "learning_rate": 0.0013993095512082855, "loss": 0.827, "step": 104400 }, { "epoch": 30.03739930955121, "grad_norm": 1.2847450971603394, "learning_rate": 0.001399252013808976, "loss": 0.7283, "step": 104410 }, { "epoch": 30.040276179516685, "grad_norm": 1.2752302885055542, "learning_rate": 0.0013991944764096661, "loss": 0.7521, "step": 104420 }, { "epoch": 30.043153049482164, "grad_norm": 1.6622874736785889, "learning_rate": 0.0013991369390103567, "loss": 0.6866, "step": 104430 }, { "epoch": 30.04602991944764, "grad_norm": 1.343758463859558, "learning_rate": 0.0013990794016110473, "loss": 0.663, "step": 104440 }, { "epoch": 30.048906789413117, "grad_norm": 1.5008567571640015, "learning_rate": 0.0013990218642117376, "loss": 0.7355, "step": 104450 }, { "epoch": 30.051783659378597, "grad_norm": 1.2644188404083252, "learning_rate": 0.0013989643268124282, "loss": 0.6431, "step": 104460 }, { "epoch": 30.054660529344073, "grad_norm": 1.0382962226867676, "learning_rate": 0.0013989067894131187, "loss": 0.8144, "step": 104470 }, { "epoch": 30.057537399309552, "grad_norm": 0.7797491550445557, "learning_rate": 0.0013988492520138089, "loss": 0.5986, "step": 104480 }, { "epoch": 30.06041426927503, "grad_norm": 1.3279812335968018, "learning_rate": 0.0013987917146144994, "loss": 0.4681, "step": 104490 }, { "epoch": 30.063291139240505, "grad_norm": 0.8413770794868469, "learning_rate": 0.0013987341772151898, "loss": 0.5435, "step": 104500 }, { "epoch": 30.066168009205985, "grad_norm": 1.898857831954956, "learning_rate": 0.0013986766398158804, "loss": 0.6882, "step": 104510 }, { "epoch": 30.06904487917146, "grad_norm": 1.2674371004104614, "learning_rate": 0.001398619102416571, "loss": 0.6121, "step": 104520 }, { "epoch": 30.07192174913694, "grad_norm": 1.1098475456237793, "learning_rate": 0.0013985615650172613, "loss": 0.5054, "step": 104530 }, { "epoch": 30.074798619102417, "grad_norm": 2.0368003845214844, "learning_rate": 0.0013985040276179516, "loss": 0.6372, "step": 104540 }, { "epoch": 30.077675489067893, "grad_norm": 2.3172929286956787, "learning_rate": 0.0013984464902186422, "loss": 0.6925, "step": 104550 }, { "epoch": 30.080552359033373, "grad_norm": 1.3763405084609985, "learning_rate": 0.0013983889528193325, "loss": 0.6779, "step": 104560 }, { "epoch": 30.08342922899885, "grad_norm": 0.9273676872253418, "learning_rate": 0.001398331415420023, "loss": 0.542, "step": 104570 }, { "epoch": 30.086306098964325, "grad_norm": 1.0988785028457642, "learning_rate": 0.0013982738780207137, "loss": 0.5924, "step": 104580 }, { "epoch": 30.089182968929805, "grad_norm": 1.1541774272918701, "learning_rate": 0.001398216340621404, "loss": 0.7993, "step": 104590 }, { "epoch": 30.09205983889528, "grad_norm": 1.8315377235412598, "learning_rate": 0.0013981588032220943, "loss": 0.704, "step": 104600 }, { "epoch": 30.09493670886076, "grad_norm": 1.1518664360046387, "learning_rate": 0.0013981012658227847, "loss": 0.5907, "step": 104610 }, { "epoch": 30.097813578826237, "grad_norm": 1.186131238937378, "learning_rate": 0.0013980437284234753, "loss": 0.5789, "step": 104620 }, { "epoch": 30.100690448791713, "grad_norm": 1.3400652408599854, "learning_rate": 0.0013979861910241658, "loss": 0.6151, "step": 104630 }, { "epoch": 30.103567318757193, "grad_norm": 1.4733195304870605, "learning_rate": 0.0013979286536248562, "loss": 0.6357, "step": 104640 }, { "epoch": 30.10644418872267, "grad_norm": 1.6702985763549805, "learning_rate": 0.0013978711162255467, "loss": 0.57, "step": 104650 }, { "epoch": 30.10932105868815, "grad_norm": 1.4201939105987549, "learning_rate": 0.001397813578826237, "loss": 0.6145, "step": 104660 }, { "epoch": 30.112197928653625, "grad_norm": 1.6971964836120605, "learning_rate": 0.0013977560414269274, "loss": 0.7246, "step": 104670 }, { "epoch": 30.1150747986191, "grad_norm": 1.2110055685043335, "learning_rate": 0.001397698504027618, "loss": 0.635, "step": 104680 }, { "epoch": 30.11795166858458, "grad_norm": 1.2643473148345947, "learning_rate": 0.0013976409666283086, "loss": 0.6172, "step": 104690 }, { "epoch": 30.120828538550057, "grad_norm": 1.5473265647888184, "learning_rate": 0.001397583429228999, "loss": 0.6423, "step": 104700 }, { "epoch": 30.123705408515534, "grad_norm": 0.9263600707054138, "learning_rate": 0.0013975258918296895, "loss": 0.612, "step": 104710 }, { "epoch": 30.126582278481013, "grad_norm": 1.3111982345581055, "learning_rate": 0.0013974683544303796, "loss": 0.5258, "step": 104720 }, { "epoch": 30.12945914844649, "grad_norm": 1.6801505088806152, "learning_rate": 0.0013974108170310702, "loss": 0.7382, "step": 104730 }, { "epoch": 30.13233601841197, "grad_norm": 0.9458281397819519, "learning_rate": 0.0013973532796317607, "loss": 0.5652, "step": 104740 }, { "epoch": 30.135212888377445, "grad_norm": 0.8105847239494324, "learning_rate": 0.001397295742232451, "loss": 0.6007, "step": 104750 }, { "epoch": 30.13808975834292, "grad_norm": 1.4327696561813354, "learning_rate": 0.0013972382048331416, "loss": 0.8644, "step": 104760 }, { "epoch": 30.1409666283084, "grad_norm": 1.090075135231018, "learning_rate": 0.0013971806674338322, "loss": 0.7029, "step": 104770 }, { "epoch": 30.143843498273878, "grad_norm": 2.2497732639312744, "learning_rate": 0.0013971231300345223, "loss": 0.5443, "step": 104780 }, { "epoch": 30.146720368239354, "grad_norm": 2.221158504486084, "learning_rate": 0.0013970655926352129, "loss": 0.7725, "step": 104790 }, { "epoch": 30.149597238204834, "grad_norm": 1.1006999015808105, "learning_rate": 0.0013970080552359035, "loss": 0.5343, "step": 104800 }, { "epoch": 30.15247410817031, "grad_norm": 1.352198839187622, "learning_rate": 0.0013969505178365938, "loss": 0.6303, "step": 104810 }, { "epoch": 30.15535097813579, "grad_norm": 1.0616602897644043, "learning_rate": 0.0013968929804372844, "loss": 0.5221, "step": 104820 }, { "epoch": 30.158227848101266, "grad_norm": 1.81692373752594, "learning_rate": 0.0013968354430379747, "loss": 0.6829, "step": 104830 }, { "epoch": 30.161104718066742, "grad_norm": 1.4604791402816772, "learning_rate": 0.001396777905638665, "loss": 0.5251, "step": 104840 }, { "epoch": 30.16398158803222, "grad_norm": 1.5867186784744263, "learning_rate": 0.0013967203682393556, "loss": 0.6656, "step": 104850 }, { "epoch": 30.166858457997698, "grad_norm": 2.759119749069214, "learning_rate": 0.001396662830840046, "loss": 0.5729, "step": 104860 }, { "epoch": 30.169735327963178, "grad_norm": 0.8051636219024658, "learning_rate": 0.0013966052934407365, "loss": 0.6849, "step": 104870 }, { "epoch": 30.172612197928654, "grad_norm": 1.946343183517456, "learning_rate": 0.001396547756041427, "loss": 0.8556, "step": 104880 }, { "epoch": 30.17548906789413, "grad_norm": 0.9605329036712646, "learning_rate": 0.0013964902186421174, "loss": 0.571, "step": 104890 }, { "epoch": 30.17836593785961, "grad_norm": 1.5942161083221436, "learning_rate": 0.0013964326812428078, "loss": 0.6842, "step": 104900 }, { "epoch": 30.181242807825086, "grad_norm": 1.6648670434951782, "learning_rate": 0.0013963751438434984, "loss": 0.6803, "step": 104910 }, { "epoch": 30.184119677790562, "grad_norm": 1.3110443353652954, "learning_rate": 0.0013963176064441887, "loss": 0.5809, "step": 104920 }, { "epoch": 30.186996547756042, "grad_norm": 1.0526546239852905, "learning_rate": 0.0013962600690448793, "loss": 0.6626, "step": 104930 }, { "epoch": 30.189873417721518, "grad_norm": 1.7337892055511475, "learning_rate": 0.0013962025316455696, "loss": 0.5135, "step": 104940 }, { "epoch": 30.192750287686998, "grad_norm": 3.3003039360046387, "learning_rate": 0.00139614499424626, "loss": 0.6945, "step": 104950 }, { "epoch": 30.195627157652474, "grad_norm": 1.2919235229492188, "learning_rate": 0.0013960874568469505, "loss": 0.6604, "step": 104960 }, { "epoch": 30.19850402761795, "grad_norm": 1.0557109117507935, "learning_rate": 0.0013960299194476409, "loss": 0.5671, "step": 104970 }, { "epoch": 30.20138089758343, "grad_norm": 2.2252402305603027, "learning_rate": 0.0013959723820483314, "loss": 0.7316, "step": 104980 }, { "epoch": 30.204257767548906, "grad_norm": 1.1863723993301392, "learning_rate": 0.001395914844649022, "loss": 0.6665, "step": 104990 }, { "epoch": 30.207134637514386, "grad_norm": 0.7034436464309692, "learning_rate": 0.0013958573072497123, "loss": 0.5759, "step": 105000 }, { "epoch": 30.210011507479862, "grad_norm": 1.5082112550735474, "learning_rate": 0.0013957997698504027, "loss": 0.6278, "step": 105010 }, { "epoch": 30.21288837744534, "grad_norm": 1.6765509843826294, "learning_rate": 0.0013957422324510933, "loss": 0.6696, "step": 105020 }, { "epoch": 30.21576524741082, "grad_norm": 1.5639442205429077, "learning_rate": 0.0013956846950517836, "loss": 0.5513, "step": 105030 }, { "epoch": 30.218642117376294, "grad_norm": 0.8949357271194458, "learning_rate": 0.0013956271576524742, "loss": 0.5087, "step": 105040 }, { "epoch": 30.22151898734177, "grad_norm": 1.191649317741394, "learning_rate": 0.0013955696202531647, "loss": 0.5926, "step": 105050 }, { "epoch": 30.22439585730725, "grad_norm": 1.7569494247436523, "learning_rate": 0.001395512082853855, "loss": 0.6926, "step": 105060 }, { "epoch": 30.227272727272727, "grad_norm": 1.2958487272262573, "learning_rate": 0.0013954545454545454, "loss": 0.5959, "step": 105070 }, { "epoch": 30.230149597238206, "grad_norm": 1.2874773740768433, "learning_rate": 0.0013953970080552358, "loss": 0.6019, "step": 105080 }, { "epoch": 30.233026467203683, "grad_norm": 0.8973435163497925, "learning_rate": 0.0013953394706559263, "loss": 0.6617, "step": 105090 }, { "epoch": 30.23590333716916, "grad_norm": 1.138462781906128, "learning_rate": 0.001395281933256617, "loss": 0.7338, "step": 105100 }, { "epoch": 30.23878020713464, "grad_norm": 0.9819810390472412, "learning_rate": 0.0013952243958573073, "loss": 0.5797, "step": 105110 }, { "epoch": 30.241657077100115, "grad_norm": 1.1997917890548706, "learning_rate": 0.0013951668584579978, "loss": 0.7942, "step": 105120 }, { "epoch": 30.24453394706559, "grad_norm": 1.5220861434936523, "learning_rate": 0.0013951093210586882, "loss": 0.4898, "step": 105130 }, { "epoch": 30.24741081703107, "grad_norm": 1.0471867322921753, "learning_rate": 0.0013950517836593785, "loss": 0.7766, "step": 105140 }, { "epoch": 30.250287686996547, "grad_norm": 1.5533015727996826, "learning_rate": 0.001394994246260069, "loss": 0.548, "step": 105150 }, { "epoch": 30.253164556962027, "grad_norm": 0.881529688835144, "learning_rate": 0.0013949367088607596, "loss": 0.6214, "step": 105160 }, { "epoch": 30.256041426927503, "grad_norm": 1.9113903045654297, "learning_rate": 0.00139487917146145, "loss": 0.6445, "step": 105170 }, { "epoch": 30.25891829689298, "grad_norm": 1.2900402545928955, "learning_rate": 0.0013948216340621405, "loss": 0.6375, "step": 105180 }, { "epoch": 30.26179516685846, "grad_norm": 1.101955533027649, "learning_rate": 0.0013947640966628307, "loss": 0.5958, "step": 105190 }, { "epoch": 30.264672036823935, "grad_norm": 0.9530765414237976, "learning_rate": 0.0013947065592635212, "loss": 0.7003, "step": 105200 }, { "epoch": 30.267548906789415, "grad_norm": 0.973207950592041, "learning_rate": 0.0013946490218642118, "loss": 0.6674, "step": 105210 }, { "epoch": 30.27042577675489, "grad_norm": 0.7733547687530518, "learning_rate": 0.0013945914844649022, "loss": 0.516, "step": 105220 }, { "epoch": 30.273302646720367, "grad_norm": 1.3835183382034302, "learning_rate": 0.0013945339470655927, "loss": 0.5686, "step": 105230 }, { "epoch": 30.276179516685847, "grad_norm": 1.2130062580108643, "learning_rate": 0.0013944764096662833, "loss": 0.7388, "step": 105240 }, { "epoch": 30.279056386651323, "grad_norm": 1.0485880374908447, "learning_rate": 0.0013944188722669734, "loss": 0.6375, "step": 105250 }, { "epoch": 30.2819332566168, "grad_norm": 0.991782546043396, "learning_rate": 0.001394361334867664, "loss": 0.6838, "step": 105260 }, { "epoch": 30.28481012658228, "grad_norm": 0.7421627640724182, "learning_rate": 0.0013943037974683545, "loss": 0.6423, "step": 105270 }, { "epoch": 30.287686996547755, "grad_norm": 0.8612406253814697, "learning_rate": 0.0013942462600690449, "loss": 0.5745, "step": 105280 }, { "epoch": 30.290563866513235, "grad_norm": 1.0469639301300049, "learning_rate": 0.0013941887226697354, "loss": 0.7585, "step": 105290 }, { "epoch": 30.29344073647871, "grad_norm": 1.3431322574615479, "learning_rate": 0.0013941311852704258, "loss": 0.6597, "step": 105300 }, { "epoch": 30.296317606444187, "grad_norm": 1.1106699705123901, "learning_rate": 0.0013940736478711161, "loss": 0.7514, "step": 105310 }, { "epoch": 30.299194476409667, "grad_norm": 1.0484291315078735, "learning_rate": 0.0013940161104718067, "loss": 0.8314, "step": 105320 }, { "epoch": 30.302071346375143, "grad_norm": 0.7216423749923706, "learning_rate": 0.001393958573072497, "loss": 0.7575, "step": 105330 }, { "epoch": 30.30494821634062, "grad_norm": 0.9299666285514832, "learning_rate": 0.0013939010356731876, "loss": 0.6138, "step": 105340 }, { "epoch": 30.3078250863061, "grad_norm": 1.4869221448898315, "learning_rate": 0.0013938434982738782, "loss": 0.6562, "step": 105350 }, { "epoch": 30.310701956271576, "grad_norm": 3.621121644973755, "learning_rate": 0.0013937859608745685, "loss": 0.6273, "step": 105360 }, { "epoch": 30.313578826237055, "grad_norm": 1.533328890800476, "learning_rate": 0.0013937284234752589, "loss": 0.6874, "step": 105370 }, { "epoch": 30.31645569620253, "grad_norm": 2.0127551555633545, "learning_rate": 0.0013936708860759494, "loss": 0.5931, "step": 105380 }, { "epoch": 30.319332566168008, "grad_norm": 1.1860475540161133, "learning_rate": 0.0013936133486766398, "loss": 0.7176, "step": 105390 }, { "epoch": 30.322209436133488, "grad_norm": 1.3446582555770874, "learning_rate": 0.0013935558112773304, "loss": 0.5776, "step": 105400 }, { "epoch": 30.325086306098964, "grad_norm": 2.097043514251709, "learning_rate": 0.0013934982738780207, "loss": 0.5965, "step": 105410 }, { "epoch": 30.327963176064443, "grad_norm": 1.6736197471618652, "learning_rate": 0.0013934407364787113, "loss": 0.6143, "step": 105420 }, { "epoch": 30.33084004602992, "grad_norm": 1.5387604236602783, "learning_rate": 0.0013933831990794016, "loss": 0.6632, "step": 105430 }, { "epoch": 30.333716915995396, "grad_norm": 1.2226879596710205, "learning_rate": 0.001393325661680092, "loss": 0.5519, "step": 105440 }, { "epoch": 30.336593785960876, "grad_norm": 1.208003282546997, "learning_rate": 0.0013932681242807825, "loss": 0.591, "step": 105450 }, { "epoch": 30.339470655926352, "grad_norm": 1.148518681526184, "learning_rate": 0.001393210586881473, "loss": 0.6044, "step": 105460 }, { "epoch": 30.342347525891828, "grad_norm": 1.3816732168197632, "learning_rate": 0.0013931530494821634, "loss": 0.7422, "step": 105470 }, { "epoch": 30.345224395857308, "grad_norm": 1.055405855178833, "learning_rate": 0.001393095512082854, "loss": 0.6057, "step": 105480 }, { "epoch": 30.348101265822784, "grad_norm": 0.9307126402854919, "learning_rate": 0.0013930379746835443, "loss": 0.714, "step": 105490 }, { "epoch": 30.350978135788264, "grad_norm": 1.6382793188095093, "learning_rate": 0.0013929804372842347, "loss": 0.8799, "step": 105500 }, { "epoch": 30.35385500575374, "grad_norm": 1.5112446546554565, "learning_rate": 0.0013929228998849253, "loss": 0.6358, "step": 105510 }, { "epoch": 30.356731875719216, "grad_norm": 2.249098777770996, "learning_rate": 0.0013928653624856156, "loss": 0.7055, "step": 105520 }, { "epoch": 30.359608745684696, "grad_norm": 1.8258413076400757, "learning_rate": 0.0013928078250863062, "loss": 0.6367, "step": 105530 }, { "epoch": 30.362485615650172, "grad_norm": 1.6214323043823242, "learning_rate": 0.0013927502876869967, "loss": 0.8999, "step": 105540 }, { "epoch": 30.365362485615652, "grad_norm": 1.1465706825256348, "learning_rate": 0.0013926927502876869, "loss": 0.8933, "step": 105550 }, { "epoch": 30.368239355581128, "grad_norm": 2.482419490814209, "learning_rate": 0.0013926352128883774, "loss": 0.659, "step": 105560 }, { "epoch": 30.371116225546604, "grad_norm": 0.9453302621841431, "learning_rate": 0.001392577675489068, "loss": 0.7633, "step": 105570 }, { "epoch": 30.373993095512084, "grad_norm": 1.0135730504989624, "learning_rate": 0.0013925201380897583, "loss": 0.5831, "step": 105580 }, { "epoch": 30.37686996547756, "grad_norm": 1.161895513534546, "learning_rate": 0.001392462600690449, "loss": 0.7041, "step": 105590 }, { "epoch": 30.379746835443036, "grad_norm": 1.0108574628829956, "learning_rate": 0.0013924050632911395, "loss": 0.7523, "step": 105600 }, { "epoch": 30.382623705408516, "grad_norm": 1.4613418579101562, "learning_rate": 0.0013923475258918296, "loss": 0.6207, "step": 105610 }, { "epoch": 30.385500575373992, "grad_norm": 2.4407522678375244, "learning_rate": 0.0013922899884925202, "loss": 0.826, "step": 105620 }, { "epoch": 30.388377445339472, "grad_norm": 1.2720263004302979, "learning_rate": 0.0013922324510932105, "loss": 0.7371, "step": 105630 }, { "epoch": 30.39125431530495, "grad_norm": 1.1181831359863281, "learning_rate": 0.001392174913693901, "loss": 0.7268, "step": 105640 }, { "epoch": 30.394131185270425, "grad_norm": 1.7919931411743164, "learning_rate": 0.0013921173762945916, "loss": 0.6697, "step": 105650 }, { "epoch": 30.397008055235904, "grad_norm": 1.4110910892486572, "learning_rate": 0.001392059838895282, "loss": 0.6581, "step": 105660 }, { "epoch": 30.39988492520138, "grad_norm": 2.071721315383911, "learning_rate": 0.0013920023014959723, "loss": 0.7023, "step": 105670 }, { "epoch": 30.402761795166857, "grad_norm": 0.7942394614219666, "learning_rate": 0.0013919447640966629, "loss": 0.9308, "step": 105680 }, { "epoch": 30.405638665132336, "grad_norm": 2.0849857330322266, "learning_rate": 0.0013918872266973532, "loss": 0.5773, "step": 105690 }, { "epoch": 30.408515535097813, "grad_norm": 1.71795654296875, "learning_rate": 0.0013918296892980438, "loss": 0.7522, "step": 105700 }, { "epoch": 30.411392405063292, "grad_norm": 1.4158935546875, "learning_rate": 0.0013917721518987344, "loss": 0.6798, "step": 105710 }, { "epoch": 30.41426927502877, "grad_norm": 1.1741951704025269, "learning_rate": 0.0013917146144994247, "loss": 0.6991, "step": 105720 }, { "epoch": 30.417146144994245, "grad_norm": 2.546647548675537, "learning_rate": 0.001391657077100115, "loss": 0.6525, "step": 105730 }, { "epoch": 30.420023014959725, "grad_norm": 0.9865665435791016, "learning_rate": 0.0013915995397008056, "loss": 0.6548, "step": 105740 }, { "epoch": 30.4228998849252, "grad_norm": 1.2941722869873047, "learning_rate": 0.001391542002301496, "loss": 0.6291, "step": 105750 }, { "epoch": 30.42577675489068, "grad_norm": 1.951656460762024, "learning_rate": 0.0013914844649021865, "loss": 0.6876, "step": 105760 }, { "epoch": 30.428653624856157, "grad_norm": 1.5284440517425537, "learning_rate": 0.0013914269275028769, "loss": 0.6147, "step": 105770 }, { "epoch": 30.431530494821633, "grad_norm": 0.8630654215812683, "learning_rate": 0.0013913693901035672, "loss": 0.6625, "step": 105780 }, { "epoch": 30.434407364787113, "grad_norm": 0.9099169373512268, "learning_rate": 0.0013913118527042578, "loss": 0.6967, "step": 105790 }, { "epoch": 30.43728423475259, "grad_norm": 1.0790473222732544, "learning_rate": 0.0013912543153049481, "loss": 0.5451, "step": 105800 }, { "epoch": 30.440161104718065, "grad_norm": 0.9956268072128296, "learning_rate": 0.0013911967779056387, "loss": 0.5026, "step": 105810 }, { "epoch": 30.443037974683545, "grad_norm": 0.7601430416107178, "learning_rate": 0.0013911392405063293, "loss": 0.6209, "step": 105820 }, { "epoch": 30.44591484464902, "grad_norm": 1.1296964883804321, "learning_rate": 0.0013910817031070196, "loss": 0.738, "step": 105830 }, { "epoch": 30.4487917146145, "grad_norm": 1.881531834602356, "learning_rate": 0.00139102416570771, "loss": 0.8347, "step": 105840 }, { "epoch": 30.451668584579977, "grad_norm": 1.046085238456726, "learning_rate": 0.0013909666283084005, "loss": 0.586, "step": 105850 }, { "epoch": 30.454545454545453, "grad_norm": 1.7124793529510498, "learning_rate": 0.0013909090909090909, "loss": 0.7106, "step": 105860 }, { "epoch": 30.457422324510933, "grad_norm": 1.275933027267456, "learning_rate": 0.0013908515535097814, "loss": 0.7406, "step": 105870 }, { "epoch": 30.46029919447641, "grad_norm": 1.1543084383010864, "learning_rate": 0.0013907940161104718, "loss": 0.5391, "step": 105880 }, { "epoch": 30.46317606444189, "grad_norm": 1.3989800214767456, "learning_rate": 0.0013907364787111623, "loss": 0.5779, "step": 105890 }, { "epoch": 30.466052934407365, "grad_norm": 0.9827699065208435, "learning_rate": 0.0013906789413118527, "loss": 0.5882, "step": 105900 }, { "epoch": 30.46892980437284, "grad_norm": 1.536669135093689, "learning_rate": 0.001390621403912543, "loss": 0.656, "step": 105910 }, { "epoch": 30.47180667433832, "grad_norm": 1.125410795211792, "learning_rate": 0.0013905638665132336, "loss": 0.6864, "step": 105920 }, { "epoch": 30.474683544303797, "grad_norm": 1.5010699033737183, "learning_rate": 0.0013905063291139242, "loss": 0.7365, "step": 105930 }, { "epoch": 30.477560414269274, "grad_norm": 2.127913236618042, "learning_rate": 0.0013904487917146145, "loss": 0.5575, "step": 105940 }, { "epoch": 30.480437284234753, "grad_norm": 2.164496421813965, "learning_rate": 0.001390391254315305, "loss": 0.7734, "step": 105950 }, { "epoch": 30.48331415420023, "grad_norm": 1.3800204992294312, "learning_rate": 0.0013903337169159954, "loss": 0.664, "step": 105960 }, { "epoch": 30.48619102416571, "grad_norm": 1.445682168006897, "learning_rate": 0.0013902761795166858, "loss": 0.7447, "step": 105970 }, { "epoch": 30.489067894131185, "grad_norm": 1.092777132987976, "learning_rate": 0.0013902186421173763, "loss": 0.6806, "step": 105980 }, { "epoch": 30.49194476409666, "grad_norm": 1.1021273136138916, "learning_rate": 0.0013901611047180667, "loss": 0.7264, "step": 105990 }, { "epoch": 30.49482163406214, "grad_norm": 1.2590197324752808, "learning_rate": 0.0013901035673187572, "loss": 0.6725, "step": 106000 }, { "epoch": 30.497698504027618, "grad_norm": 1.993193507194519, "learning_rate": 0.0013900460299194478, "loss": 0.5823, "step": 106010 }, { "epoch": 30.500575373993094, "grad_norm": 0.8751702308654785, "learning_rate": 0.001389988492520138, "loss": 0.6126, "step": 106020 }, { "epoch": 30.503452243958574, "grad_norm": 2.9627110958099365, "learning_rate": 0.0013899309551208285, "loss": 0.6835, "step": 106030 }, { "epoch": 30.50632911392405, "grad_norm": 0.8067577481269836, "learning_rate": 0.001389873417721519, "loss": 0.6556, "step": 106040 }, { "epoch": 30.50920598388953, "grad_norm": 1.2804973125457764, "learning_rate": 0.0013898158803222094, "loss": 0.822, "step": 106050 }, { "epoch": 30.512082853855006, "grad_norm": 1.780505895614624, "learning_rate": 0.0013897583429229, "loss": 0.8299, "step": 106060 }, { "epoch": 30.514959723820482, "grad_norm": 2.6858184337615967, "learning_rate": 0.0013897008055235905, "loss": 0.7167, "step": 106070 }, { "epoch": 30.51783659378596, "grad_norm": 1.1272372007369995, "learning_rate": 0.0013896432681242807, "loss": 0.7648, "step": 106080 }, { "epoch": 30.520713463751438, "grad_norm": 0.8730027675628662, "learning_rate": 0.0013895857307249712, "loss": 0.4969, "step": 106090 }, { "epoch": 30.523590333716918, "grad_norm": 0.8973532915115356, "learning_rate": 0.0013895281933256616, "loss": 0.6267, "step": 106100 }, { "epoch": 30.526467203682394, "grad_norm": 1.557898998260498, "learning_rate": 0.0013894706559263522, "loss": 0.6497, "step": 106110 }, { "epoch": 30.52934407364787, "grad_norm": 1.1329505443572998, "learning_rate": 0.0013894131185270427, "loss": 0.7789, "step": 106120 }, { "epoch": 30.53222094361335, "grad_norm": 1.308624505996704, "learning_rate": 0.001389355581127733, "loss": 0.5924, "step": 106130 }, { "epoch": 30.535097813578826, "grad_norm": 2.515451431274414, "learning_rate": 0.0013892980437284234, "loss": 0.8475, "step": 106140 }, { "epoch": 30.537974683544302, "grad_norm": 1.3374472856521606, "learning_rate": 0.001389240506329114, "loss": 0.7717, "step": 106150 }, { "epoch": 30.540851553509782, "grad_norm": 0.8591842651367188, "learning_rate": 0.0013891829689298043, "loss": 0.6037, "step": 106160 }, { "epoch": 30.543728423475258, "grad_norm": 1.9840401411056519, "learning_rate": 0.0013891254315304949, "loss": 0.81, "step": 106170 }, { "epoch": 30.546605293440738, "grad_norm": 1.0413881540298462, "learning_rate": 0.0013890678941311854, "loss": 0.643, "step": 106180 }, { "epoch": 30.549482163406214, "grad_norm": 1.6767383813858032, "learning_rate": 0.0013890103567318758, "loss": 0.6361, "step": 106190 }, { "epoch": 30.55235903337169, "grad_norm": 1.53343665599823, "learning_rate": 0.0013889528193325661, "loss": 0.5718, "step": 106200 }, { "epoch": 30.55523590333717, "grad_norm": 0.8039007186889648, "learning_rate": 0.0013888952819332565, "loss": 0.7673, "step": 106210 }, { "epoch": 30.558112773302646, "grad_norm": 1.4626601934432983, "learning_rate": 0.001388837744533947, "loss": 0.6658, "step": 106220 }, { "epoch": 30.560989643268123, "grad_norm": 0.8606476187705994, "learning_rate": 0.0013887802071346376, "loss": 0.7028, "step": 106230 }, { "epoch": 30.563866513233602, "grad_norm": 1.622833013534546, "learning_rate": 0.001388722669735328, "loss": 0.7923, "step": 106240 }, { "epoch": 30.56674338319908, "grad_norm": 1.3740557432174683, "learning_rate": 0.0013886651323360185, "loss": 0.8308, "step": 106250 }, { "epoch": 30.569620253164558, "grad_norm": 1.9411641359329224, "learning_rate": 0.0013886075949367089, "loss": 0.6697, "step": 106260 }, { "epoch": 30.572497123130034, "grad_norm": 1.037056565284729, "learning_rate": 0.0013885500575373992, "loss": 0.5206, "step": 106270 }, { "epoch": 30.57537399309551, "grad_norm": 0.7339346408843994, "learning_rate": 0.0013884925201380898, "loss": 0.552, "step": 106280 }, { "epoch": 30.57825086306099, "grad_norm": 1.0379291772842407, "learning_rate": 0.0013884349827387803, "loss": 0.6579, "step": 106290 }, { "epoch": 30.581127733026467, "grad_norm": 1.1176989078521729, "learning_rate": 0.0013883774453394707, "loss": 0.642, "step": 106300 }, { "epoch": 30.584004602991946, "grad_norm": 1.1000981330871582, "learning_rate": 0.0013883199079401613, "loss": 0.6758, "step": 106310 }, { "epoch": 30.586881472957423, "grad_norm": 1.0016156435012817, "learning_rate": 0.0013882623705408514, "loss": 0.6849, "step": 106320 }, { "epoch": 30.5897583429229, "grad_norm": 1.1145392656326294, "learning_rate": 0.001388204833141542, "loss": 0.6474, "step": 106330 }, { "epoch": 30.59263521288838, "grad_norm": 1.1298632621765137, "learning_rate": 0.0013881472957422325, "loss": 0.7043, "step": 106340 }, { "epoch": 30.595512082853855, "grad_norm": 1.0272690057754517, "learning_rate": 0.0013880897583429229, "loss": 0.6822, "step": 106350 }, { "epoch": 30.59838895281933, "grad_norm": 1.7549792528152466, "learning_rate": 0.0013880322209436134, "loss": 0.6837, "step": 106360 }, { "epoch": 30.60126582278481, "grad_norm": 2.4770302772521973, "learning_rate": 0.001387974683544304, "loss": 0.7384, "step": 106370 }, { "epoch": 30.604142692750287, "grad_norm": 1.280956506729126, "learning_rate": 0.0013879171461449941, "loss": 0.5837, "step": 106380 }, { "epoch": 30.607019562715767, "grad_norm": 1.350319743156433, "learning_rate": 0.0013878596087456847, "loss": 0.5425, "step": 106390 }, { "epoch": 30.609896432681243, "grad_norm": 1.1521515846252441, "learning_rate": 0.0013878020713463753, "loss": 0.6888, "step": 106400 }, { "epoch": 30.61277330264672, "grad_norm": 1.5212256908416748, "learning_rate": 0.0013877445339470656, "loss": 0.6868, "step": 106410 }, { "epoch": 30.6156501726122, "grad_norm": 1.0932899713516235, "learning_rate": 0.0013876869965477562, "loss": 0.777, "step": 106420 }, { "epoch": 30.618527042577675, "grad_norm": 1.5478971004486084, "learning_rate": 0.0013876294591484467, "loss": 0.6522, "step": 106430 }, { "epoch": 30.621403912543155, "grad_norm": 1.1193535327911377, "learning_rate": 0.0013875719217491369, "loss": 0.7034, "step": 106440 }, { "epoch": 30.62428078250863, "grad_norm": 0.8206389546394348, "learning_rate": 0.0013875143843498274, "loss": 0.5926, "step": 106450 }, { "epoch": 30.627157652474107, "grad_norm": 1.3738809823989868, "learning_rate": 0.0013874568469505178, "loss": 0.6788, "step": 106460 }, { "epoch": 30.630034522439587, "grad_norm": 1.4511706829071045, "learning_rate": 0.0013873993095512083, "loss": 0.6705, "step": 106470 }, { "epoch": 30.632911392405063, "grad_norm": 2.231149911880493, "learning_rate": 0.001387341772151899, "loss": 0.7076, "step": 106480 }, { "epoch": 30.63578826237054, "grad_norm": 0.6617242097854614, "learning_rate": 0.0013872842347525892, "loss": 0.4654, "step": 106490 }, { "epoch": 30.63866513233602, "grad_norm": 1.8162347078323364, "learning_rate": 0.0013872266973532796, "loss": 0.6829, "step": 106500 }, { "epoch": 30.641542002301495, "grad_norm": 0.6343122720718384, "learning_rate": 0.0013871691599539702, "loss": 0.6053, "step": 106510 }, { "epoch": 30.644418872266975, "grad_norm": 1.080660343170166, "learning_rate": 0.0013871116225546605, "loss": 0.7107, "step": 106520 }, { "epoch": 30.64729574223245, "grad_norm": 1.4881681203842163, "learning_rate": 0.001387054085155351, "loss": 0.6792, "step": 106530 }, { "epoch": 30.650172612197927, "grad_norm": 1.3326114416122437, "learning_rate": 0.0013869965477560416, "loss": 0.6557, "step": 106540 }, { "epoch": 30.653049482163407, "grad_norm": 2.265233278274536, "learning_rate": 0.001386939010356732, "loss": 0.5068, "step": 106550 }, { "epoch": 30.655926352128883, "grad_norm": 1.0788573026657104, "learning_rate": 0.0013868814729574223, "loss": 0.6237, "step": 106560 }, { "epoch": 30.658803222094363, "grad_norm": 1.6245537996292114, "learning_rate": 0.0013868239355581127, "loss": 0.7363, "step": 106570 }, { "epoch": 30.66168009205984, "grad_norm": 1.1085644960403442, "learning_rate": 0.0013867663981588032, "loss": 0.819, "step": 106580 }, { "epoch": 30.664556962025316, "grad_norm": 1.2537710666656494, "learning_rate": 0.0013867088607594938, "loss": 0.626, "step": 106590 }, { "epoch": 30.667433831990795, "grad_norm": 2.4343650341033936, "learning_rate": 0.0013866513233601841, "loss": 0.7839, "step": 106600 }, { "epoch": 30.67031070195627, "grad_norm": 1.8394579887390137, "learning_rate": 0.0013865937859608745, "loss": 0.5742, "step": 106610 }, { "epoch": 30.673187571921748, "grad_norm": 0.8421008586883545, "learning_rate": 0.001386536248561565, "loss": 0.6156, "step": 106620 }, { "epoch": 30.676064441887227, "grad_norm": 1.9874060153961182, "learning_rate": 0.0013864787111622554, "loss": 0.7074, "step": 106630 }, { "epoch": 30.678941311852704, "grad_norm": 1.4622201919555664, "learning_rate": 0.001386421173762946, "loss": 0.6312, "step": 106640 }, { "epoch": 30.681818181818183, "grad_norm": 1.2988591194152832, "learning_rate": 0.0013863636363636365, "loss": 0.8867, "step": 106650 }, { "epoch": 30.68469505178366, "grad_norm": 0.8227753043174744, "learning_rate": 0.0013863060989643269, "loss": 0.4918, "step": 106660 }, { "epoch": 30.687571921749136, "grad_norm": 0.8809624910354614, "learning_rate": 0.0013862485615650172, "loss": 0.5347, "step": 106670 }, { "epoch": 30.690448791714616, "grad_norm": 1.235876441001892, "learning_rate": 0.0013861910241657076, "loss": 0.6646, "step": 106680 }, { "epoch": 30.693325661680092, "grad_norm": 1.7084324359893799, "learning_rate": 0.0013861334867663981, "loss": 0.7169, "step": 106690 }, { "epoch": 30.696202531645568, "grad_norm": 1.0427643060684204, "learning_rate": 0.0013860759493670887, "loss": 0.5418, "step": 106700 }, { "epoch": 30.699079401611048, "grad_norm": 0.816898763179779, "learning_rate": 0.001386018411967779, "loss": 0.7235, "step": 106710 }, { "epoch": 30.701956271576524, "grad_norm": 2.2202847003936768, "learning_rate": 0.0013859608745684696, "loss": 0.65, "step": 106720 }, { "epoch": 30.704833141542004, "grad_norm": 0.9031729698181152, "learning_rate": 0.00138590333716916, "loss": 0.5475, "step": 106730 }, { "epoch": 30.70771001150748, "grad_norm": 1.368277668952942, "learning_rate": 0.0013858457997698503, "loss": 0.606, "step": 106740 }, { "epoch": 30.710586881472956, "grad_norm": 1.420758843421936, "learning_rate": 0.0013857882623705409, "loss": 0.5344, "step": 106750 }, { "epoch": 30.713463751438436, "grad_norm": 1.071347951889038, "learning_rate": 0.0013857307249712314, "loss": 0.6992, "step": 106760 }, { "epoch": 30.716340621403912, "grad_norm": 1.2041618824005127, "learning_rate": 0.0013856731875719218, "loss": 0.6013, "step": 106770 }, { "epoch": 30.719217491369392, "grad_norm": 0.9413031339645386, "learning_rate": 0.0013856156501726123, "loss": 0.787, "step": 106780 }, { "epoch": 30.722094361334868, "grad_norm": 0.7473911643028259, "learning_rate": 0.0013855581127733025, "loss": 0.7301, "step": 106790 }, { "epoch": 30.724971231300344, "grad_norm": 1.0288488864898682, "learning_rate": 0.001385500575373993, "loss": 0.7705, "step": 106800 }, { "epoch": 30.727848101265824, "grad_norm": 0.9467809796333313, "learning_rate": 0.0013854430379746836, "loss": 0.5569, "step": 106810 }, { "epoch": 30.7307249712313, "grad_norm": 1.6515668630599976, "learning_rate": 0.001385385500575374, "loss": 0.62, "step": 106820 }, { "epoch": 30.733601841196776, "grad_norm": 2.6613214015960693, "learning_rate": 0.0013853279631760645, "loss": 0.596, "step": 106830 }, { "epoch": 30.736478711162256, "grad_norm": 3.3165082931518555, "learning_rate": 0.001385270425776755, "loss": 0.8299, "step": 106840 }, { "epoch": 30.739355581127732, "grad_norm": 1.2757539749145508, "learning_rate": 0.0013852128883774452, "loss": 0.624, "step": 106850 }, { "epoch": 30.742232451093212, "grad_norm": 1.6426451206207275, "learning_rate": 0.0013851553509781358, "loss": 0.5405, "step": 106860 }, { "epoch": 30.74510932105869, "grad_norm": 1.4437209367752075, "learning_rate": 0.0013850978135788263, "loss": 0.5539, "step": 106870 }, { "epoch": 30.747986191024165, "grad_norm": 1.013473629951477, "learning_rate": 0.0013850402761795167, "loss": 0.6001, "step": 106880 }, { "epoch": 30.750863060989644, "grad_norm": 0.9672659635543823, "learning_rate": 0.0013849827387802072, "loss": 0.6208, "step": 106890 }, { "epoch": 30.75373993095512, "grad_norm": 1.1688199043273926, "learning_rate": 0.0013849252013808976, "loss": 0.5865, "step": 106900 }, { "epoch": 30.756616800920597, "grad_norm": 1.4119783639907837, "learning_rate": 0.001384867663981588, "loss": 0.623, "step": 106910 }, { "epoch": 30.759493670886076, "grad_norm": 1.400153398513794, "learning_rate": 0.0013848101265822785, "loss": 0.5459, "step": 106920 }, { "epoch": 30.762370540851553, "grad_norm": 1.7930985689163208, "learning_rate": 0.0013847525891829689, "loss": 0.7543, "step": 106930 }, { "epoch": 30.765247410817032, "grad_norm": 2.0559937953948975, "learning_rate": 0.0013846950517836594, "loss": 0.7028, "step": 106940 }, { "epoch": 30.76812428078251, "grad_norm": 1.2340728044509888, "learning_rate": 0.00138463751438435, "loss": 0.6733, "step": 106950 }, { "epoch": 30.771001150747985, "grad_norm": 1.8489183187484741, "learning_rate": 0.0013845799769850403, "loss": 0.6299, "step": 106960 }, { "epoch": 30.773878020713465, "grad_norm": 1.1523751020431519, "learning_rate": 0.0013845224395857307, "loss": 0.6441, "step": 106970 }, { "epoch": 30.77675489067894, "grad_norm": 1.1275339126586914, "learning_rate": 0.0013844649021864212, "loss": 0.5689, "step": 106980 }, { "epoch": 30.77963176064442, "grad_norm": 0.8523368239402771, "learning_rate": 0.0013844073647871116, "loss": 0.5262, "step": 106990 }, { "epoch": 30.782508630609897, "grad_norm": 1.2231327295303345, "learning_rate": 0.0013843498273878021, "loss": 0.6698, "step": 107000 }, { "epoch": 30.785385500575373, "grad_norm": 1.5822011232376099, "learning_rate": 0.0013842922899884927, "loss": 0.7592, "step": 107010 }, { "epoch": 30.788262370540853, "grad_norm": 1.9109340906143188, "learning_rate": 0.001384234752589183, "loss": 0.6236, "step": 107020 }, { "epoch": 30.79113924050633, "grad_norm": 1.4392461776733398, "learning_rate": 0.0013841772151898734, "loss": 0.6019, "step": 107030 }, { "epoch": 30.794016110471805, "grad_norm": 2.1748569011688232, "learning_rate": 0.0013841196777905638, "loss": 0.7979, "step": 107040 }, { "epoch": 30.796892980437285, "grad_norm": 1.6168955564498901, "learning_rate": 0.0013840621403912543, "loss": 0.6421, "step": 107050 }, { "epoch": 30.79976985040276, "grad_norm": 1.9893373250961304, "learning_rate": 0.0013840046029919449, "loss": 0.747, "step": 107060 }, { "epoch": 30.80264672036824, "grad_norm": 1.1763038635253906, "learning_rate": 0.0013839470655926352, "loss": 0.7942, "step": 107070 }, { "epoch": 30.805523590333717, "grad_norm": 2.3987014293670654, "learning_rate": 0.0013838895281933258, "loss": 0.6279, "step": 107080 }, { "epoch": 30.808400460299193, "grad_norm": 0.9288198947906494, "learning_rate": 0.0013838319907940161, "loss": 0.7652, "step": 107090 }, { "epoch": 30.811277330264673, "grad_norm": 0.6753720045089722, "learning_rate": 0.0013837744533947065, "loss": 0.5523, "step": 107100 }, { "epoch": 30.81415420023015, "grad_norm": 1.1352499723434448, "learning_rate": 0.001383716915995397, "loss": 0.7308, "step": 107110 }, { "epoch": 30.817031070195625, "grad_norm": 1.118986964225769, "learning_rate": 0.0013836593785960876, "loss": 0.5295, "step": 107120 }, { "epoch": 30.819907940161105, "grad_norm": 1.3100488185882568, "learning_rate": 0.001383601841196778, "loss": 0.5385, "step": 107130 }, { "epoch": 30.82278481012658, "grad_norm": 1.0640332698822021, "learning_rate": 0.0013835443037974685, "loss": 0.7685, "step": 107140 }, { "epoch": 30.82566168009206, "grad_norm": 0.9207266569137573, "learning_rate": 0.0013834867663981587, "loss": 0.6746, "step": 107150 }, { "epoch": 30.828538550057537, "grad_norm": 0.6854994893074036, "learning_rate": 0.0013834292289988492, "loss": 0.7143, "step": 107160 }, { "epoch": 30.831415420023013, "grad_norm": 0.7711651921272278, "learning_rate": 0.0013833716915995398, "loss": 0.7536, "step": 107170 }, { "epoch": 30.834292289988493, "grad_norm": 0.818152666091919, "learning_rate": 0.0013833141542002301, "loss": 0.6763, "step": 107180 }, { "epoch": 30.83716915995397, "grad_norm": 2.252819061279297, "learning_rate": 0.0013832566168009207, "loss": 0.6703, "step": 107190 }, { "epoch": 30.84004602991945, "grad_norm": 1.6011383533477783, "learning_rate": 0.0013831990794016113, "loss": 0.6063, "step": 107200 }, { "epoch": 30.842922899884925, "grad_norm": 2.3263587951660156, "learning_rate": 0.0013831415420023014, "loss": 0.6603, "step": 107210 }, { "epoch": 30.8457997698504, "grad_norm": 2.900059938430786, "learning_rate": 0.001383084004602992, "loss": 0.7347, "step": 107220 }, { "epoch": 30.84867663981588, "grad_norm": 2.241285800933838, "learning_rate": 0.0013830264672036825, "loss": 0.756, "step": 107230 }, { "epoch": 30.851553509781358, "grad_norm": 1.5670946836471558, "learning_rate": 0.0013829689298043729, "loss": 0.7558, "step": 107240 }, { "epoch": 30.854430379746834, "grad_norm": 0.9635230302810669, "learning_rate": 0.0013829113924050634, "loss": 0.5809, "step": 107250 }, { "epoch": 30.857307249712314, "grad_norm": 2.481774091720581, "learning_rate": 0.0013828538550057538, "loss": 0.6582, "step": 107260 }, { "epoch": 30.86018411967779, "grad_norm": 1.9857726097106934, "learning_rate": 0.0013827963176064441, "loss": 0.6094, "step": 107270 }, { "epoch": 30.86306098964327, "grad_norm": 1.083902359008789, "learning_rate": 0.0013827387802071347, "loss": 0.5392, "step": 107280 }, { "epoch": 30.865937859608746, "grad_norm": 1.581013798713684, "learning_rate": 0.001382681242807825, "loss": 0.6656, "step": 107290 }, { "epoch": 30.868814729574222, "grad_norm": 0.9798242449760437, "learning_rate": 0.0013826237054085156, "loss": 0.8292, "step": 107300 }, { "epoch": 30.8716915995397, "grad_norm": 1.5349277257919312, "learning_rate": 0.0013825661680092062, "loss": 0.7194, "step": 107310 }, { "epoch": 30.874568469505178, "grad_norm": 2.329008102416992, "learning_rate": 0.0013825086306098965, "loss": 0.7108, "step": 107320 }, { "epoch": 30.877445339470658, "grad_norm": 1.379442811012268, "learning_rate": 0.0013824510932105869, "loss": 0.6759, "step": 107330 }, { "epoch": 30.880322209436134, "grad_norm": 0.8405389189720154, "learning_rate": 0.0013823935558112774, "loss": 0.5645, "step": 107340 }, { "epoch": 30.88319907940161, "grad_norm": 0.6599010229110718, "learning_rate": 0.0013823360184119678, "loss": 0.6539, "step": 107350 }, { "epoch": 30.88607594936709, "grad_norm": 1.5134236812591553, "learning_rate": 0.0013822784810126583, "loss": 0.7617, "step": 107360 }, { "epoch": 30.888952819332566, "grad_norm": 1.631925344467163, "learning_rate": 0.0013822209436133487, "loss": 0.6557, "step": 107370 }, { "epoch": 30.891829689298042, "grad_norm": 1.1973706483840942, "learning_rate": 0.0013821634062140392, "loss": 0.7457, "step": 107380 }, { "epoch": 30.894706559263522, "grad_norm": 1.3673906326293945, "learning_rate": 0.0013821058688147296, "loss": 0.7815, "step": 107390 }, { "epoch": 30.897583429228998, "grad_norm": 0.9135557413101196, "learning_rate": 0.00138204833141542, "loss": 0.5274, "step": 107400 }, { "epoch": 30.900460299194478, "grad_norm": 1.5464599132537842, "learning_rate": 0.0013819907940161105, "loss": 0.6767, "step": 107410 }, { "epoch": 30.903337169159954, "grad_norm": 1.8730624914169312, "learning_rate": 0.001381933256616801, "loss": 0.8659, "step": 107420 }, { "epoch": 30.90621403912543, "grad_norm": 1.0047993659973145, "learning_rate": 0.0013818757192174914, "loss": 0.696, "step": 107430 }, { "epoch": 30.90909090909091, "grad_norm": 0.7766068577766418, "learning_rate": 0.0013818181818181818, "loss": 0.8444, "step": 107440 }, { "epoch": 30.911967779056386, "grad_norm": 1.0065549612045288, "learning_rate": 0.0013817606444188723, "loss": 1.075, "step": 107450 }, { "epoch": 30.914844649021866, "grad_norm": 0.7799211144447327, "learning_rate": 0.0013817031070195627, "loss": 0.6116, "step": 107460 }, { "epoch": 30.917721518987342, "grad_norm": 1.1038469076156616, "learning_rate": 0.0013816455696202532, "loss": 0.7046, "step": 107470 }, { "epoch": 30.92059838895282, "grad_norm": 1.3395941257476807, "learning_rate": 0.0013815880322209436, "loss": 0.6655, "step": 107480 }, { "epoch": 30.923475258918298, "grad_norm": 0.7669755816459656, "learning_rate": 0.0013815304948216341, "loss": 0.5635, "step": 107490 }, { "epoch": 30.926352128883774, "grad_norm": 1.0196411609649658, "learning_rate": 0.0013814729574223245, "loss": 0.7747, "step": 107500 }, { "epoch": 30.92922899884925, "grad_norm": 0.5793113708496094, "learning_rate": 0.0013814154200230148, "loss": 0.7107, "step": 107510 }, { "epoch": 30.93210586881473, "grad_norm": 2.255796194076538, "learning_rate": 0.0013813578826237054, "loss": 0.8157, "step": 107520 }, { "epoch": 30.934982738780207, "grad_norm": 1.3324811458587646, "learning_rate": 0.001381300345224396, "loss": 0.6426, "step": 107530 }, { "epoch": 30.937859608745686, "grad_norm": 1.2050704956054688, "learning_rate": 0.0013812428078250863, "loss": 0.6924, "step": 107540 }, { "epoch": 30.940736478711163, "grad_norm": 1.3044456243515015, "learning_rate": 0.0013811852704257769, "loss": 0.6231, "step": 107550 }, { "epoch": 30.94361334867664, "grad_norm": 2.406710386276245, "learning_rate": 0.0013811277330264672, "loss": 0.9373, "step": 107560 }, { "epoch": 30.94649021864212, "grad_norm": 2.583804130554199, "learning_rate": 0.0013810701956271576, "loss": 0.7587, "step": 107570 }, { "epoch": 30.949367088607595, "grad_norm": 0.5293459296226501, "learning_rate": 0.0013810126582278481, "loss": 0.6174, "step": 107580 }, { "epoch": 30.95224395857307, "grad_norm": 1.7222553491592407, "learning_rate": 0.0013809551208285385, "loss": 0.7432, "step": 107590 }, { "epoch": 30.95512082853855, "grad_norm": 1.9157105684280396, "learning_rate": 0.001380897583429229, "loss": 0.6982, "step": 107600 }, { "epoch": 30.957997698504027, "grad_norm": 1.6220098733901978, "learning_rate": 0.0013808400460299196, "loss": 0.6945, "step": 107610 }, { "epoch": 30.960874568469507, "grad_norm": 0.674050509929657, "learning_rate": 0.0013807825086306097, "loss": 0.672, "step": 107620 }, { "epoch": 30.963751438434983, "grad_norm": 1.2057923078536987, "learning_rate": 0.0013807249712313003, "loss": 0.7706, "step": 107630 }, { "epoch": 30.96662830840046, "grad_norm": 2.8081045150756836, "learning_rate": 0.0013806674338319909, "loss": 0.7643, "step": 107640 }, { "epoch": 30.96950517836594, "grad_norm": 1.4198745489120483, "learning_rate": 0.0013806098964326812, "loss": 0.6675, "step": 107650 }, { "epoch": 30.972382048331415, "grad_norm": 0.7689831852912903, "learning_rate": 0.0013805523590333718, "loss": 0.6317, "step": 107660 }, { "epoch": 30.975258918296895, "grad_norm": 1.1972978115081787, "learning_rate": 0.0013804948216340623, "loss": 0.5658, "step": 107670 }, { "epoch": 30.97813578826237, "grad_norm": 0.7970529794692993, "learning_rate": 0.0013804372842347525, "loss": 0.6913, "step": 107680 }, { "epoch": 30.981012658227847, "grad_norm": 1.1090489625930786, "learning_rate": 0.001380379746835443, "loss": 0.7275, "step": 107690 }, { "epoch": 30.983889528193327, "grad_norm": 1.7006614208221436, "learning_rate": 0.0013803222094361336, "loss": 0.7061, "step": 107700 }, { "epoch": 30.986766398158803, "grad_norm": 1.8762059211730957, "learning_rate": 0.001380264672036824, "loss": 0.8041, "step": 107710 }, { "epoch": 30.98964326812428, "grad_norm": 0.9122306108474731, "learning_rate": 0.0013802071346375145, "loss": 0.7026, "step": 107720 }, { "epoch": 30.99252013808976, "grad_norm": 0.9857863783836365, "learning_rate": 0.0013801495972382049, "loss": 0.7036, "step": 107730 }, { "epoch": 30.995397008055235, "grad_norm": 1.1994589567184448, "learning_rate": 0.0013800920598388952, "loss": 0.5596, "step": 107740 }, { "epoch": 30.998273878020715, "grad_norm": 2.1180384159088135, "learning_rate": 0.0013800345224395858, "loss": 0.7401, "step": 107750 }, { "epoch": 31.00115074798619, "grad_norm": 0.9407117366790771, "learning_rate": 0.0013799769850402761, "loss": 0.501, "step": 107760 }, { "epoch": 31.004027617951667, "grad_norm": 2.121230363845825, "learning_rate": 0.0013799194476409667, "loss": 0.6937, "step": 107770 }, { "epoch": 31.006904487917147, "grad_norm": 1.7091888189315796, "learning_rate": 0.0013798619102416572, "loss": 0.5848, "step": 107780 }, { "epoch": 31.009781357882623, "grad_norm": 1.5657538175582886, "learning_rate": 0.0013798043728423476, "loss": 0.6083, "step": 107790 }, { "epoch": 31.0126582278481, "grad_norm": 1.554988145828247, "learning_rate": 0.001379746835443038, "loss": 0.7268, "step": 107800 }, { "epoch": 31.01553509781358, "grad_norm": 1.3661214113235474, "learning_rate": 0.0013796892980437285, "loss": 0.6014, "step": 107810 }, { "epoch": 31.018411967779056, "grad_norm": 1.5720545053482056, "learning_rate": 0.0013796317606444188, "loss": 0.5494, "step": 107820 }, { "epoch": 31.021288837744535, "grad_norm": 0.9287423491477966, "learning_rate": 0.0013795742232451094, "loss": 0.6963, "step": 107830 }, { "epoch": 31.02416570771001, "grad_norm": 2.2460546493530273, "learning_rate": 0.0013795166858457998, "loss": 0.5676, "step": 107840 }, { "epoch": 31.027042577675488, "grad_norm": 1.764967441558838, "learning_rate": 0.0013794591484464903, "loss": 0.578, "step": 107850 }, { "epoch": 31.029919447640967, "grad_norm": 3.5633950233459473, "learning_rate": 0.0013794016110471807, "loss": 0.6223, "step": 107860 }, { "epoch": 31.032796317606444, "grad_norm": 1.5063247680664062, "learning_rate": 0.001379344073647871, "loss": 0.6972, "step": 107870 }, { "epoch": 31.035673187571923, "grad_norm": 0.8336902856826782, "learning_rate": 0.0013792865362485616, "loss": 0.7943, "step": 107880 }, { "epoch": 31.0385500575374, "grad_norm": 1.5014305114746094, "learning_rate": 0.0013792289988492521, "loss": 0.6125, "step": 107890 }, { "epoch": 31.041426927502876, "grad_norm": 1.122821569442749, "learning_rate": 0.0013791714614499425, "loss": 0.7649, "step": 107900 }, { "epoch": 31.044303797468356, "grad_norm": 1.4431846141815186, "learning_rate": 0.001379113924050633, "loss": 0.5715, "step": 107910 }, { "epoch": 31.04718066743383, "grad_norm": 1.2931104898452759, "learning_rate": 0.0013790563866513234, "loss": 0.7292, "step": 107920 }, { "epoch": 31.050057537399308, "grad_norm": 0.8941500186920166, "learning_rate": 0.0013789988492520138, "loss": 0.5862, "step": 107930 }, { "epoch": 31.052934407364788, "grad_norm": 1.8299816846847534, "learning_rate": 0.0013789413118527043, "loss": 0.7484, "step": 107940 }, { "epoch": 31.055811277330264, "grad_norm": 1.668749213218689, "learning_rate": 0.0013788837744533947, "loss": 0.6143, "step": 107950 }, { "epoch": 31.058688147295744, "grad_norm": 0.8316138982772827, "learning_rate": 0.0013788262370540852, "loss": 0.7735, "step": 107960 }, { "epoch": 31.06156501726122, "grad_norm": 1.7470288276672363, "learning_rate": 0.0013787686996547758, "loss": 0.548, "step": 107970 }, { "epoch": 31.064441887226696, "grad_norm": 0.8182955384254456, "learning_rate": 0.001378711162255466, "loss": 0.612, "step": 107980 }, { "epoch": 31.067318757192176, "grad_norm": 1.126882791519165, "learning_rate": 0.0013786536248561565, "loss": 0.517, "step": 107990 }, { "epoch": 31.070195627157652, "grad_norm": 0.7985445261001587, "learning_rate": 0.001378596087456847, "loss": 0.5611, "step": 108000 }, { "epoch": 31.073072497123132, "grad_norm": 1.4482307434082031, "learning_rate": 0.0013785385500575374, "loss": 0.8297, "step": 108010 }, { "epoch": 31.075949367088608, "grad_norm": 0.7494382858276367, "learning_rate": 0.001378481012658228, "loss": 0.5772, "step": 108020 }, { "epoch": 31.078826237054084, "grad_norm": 0.8102985620498657, "learning_rate": 0.0013784234752589185, "loss": 0.5172, "step": 108030 }, { "epoch": 31.081703107019564, "grad_norm": 0.9940605759620667, "learning_rate": 0.0013783659378596087, "loss": 0.4675, "step": 108040 }, { "epoch": 31.08457997698504, "grad_norm": 1.4964426755905151, "learning_rate": 0.0013783084004602992, "loss": 0.554, "step": 108050 }, { "epoch": 31.087456846950516, "grad_norm": 1.6898905038833618, "learning_rate": 0.0013782508630609896, "loss": 0.8013, "step": 108060 }, { "epoch": 31.090333716915996, "grad_norm": 3.1498966217041016, "learning_rate": 0.0013781933256616801, "loss": 0.6502, "step": 108070 }, { "epoch": 31.093210586881472, "grad_norm": 1.2688639163970947, "learning_rate": 0.0013781357882623707, "loss": 0.5534, "step": 108080 }, { "epoch": 31.096087456846952, "grad_norm": 1.0246998071670532, "learning_rate": 0.001378078250863061, "loss": 0.6527, "step": 108090 }, { "epoch": 31.09896432681243, "grad_norm": 1.2805486917495728, "learning_rate": 0.0013780207134637514, "loss": 0.6679, "step": 108100 }, { "epoch": 31.101841196777904, "grad_norm": 0.9815778136253357, "learning_rate": 0.001377963176064442, "loss": 0.6027, "step": 108110 }, { "epoch": 31.104718066743384, "grad_norm": 0.684607744216919, "learning_rate": 0.0013779056386651323, "loss": 0.559, "step": 108120 }, { "epoch": 31.10759493670886, "grad_norm": 1.4454708099365234, "learning_rate": 0.0013778481012658229, "loss": 0.729, "step": 108130 }, { "epoch": 31.110471806674337, "grad_norm": 1.0730712413787842, "learning_rate": 0.0013777905638665134, "loss": 0.6744, "step": 108140 }, { "epoch": 31.113348676639816, "grad_norm": 0.6758909225463867, "learning_rate": 0.0013777330264672038, "loss": 0.5161, "step": 108150 }, { "epoch": 31.116225546605293, "grad_norm": 2.3868582248687744, "learning_rate": 0.0013776754890678941, "loss": 0.5472, "step": 108160 }, { "epoch": 31.119102416570772, "grad_norm": 1.621932864189148, "learning_rate": 0.0013776179516685845, "loss": 0.9462, "step": 108170 }, { "epoch": 31.12197928653625, "grad_norm": 0.7641804218292236, "learning_rate": 0.001377560414269275, "loss": 0.6807, "step": 108180 }, { "epoch": 31.124856156501725, "grad_norm": 1.5932756662368774, "learning_rate": 0.0013775028768699656, "loss": 0.5716, "step": 108190 }, { "epoch": 31.127733026467205, "grad_norm": 1.421065330505371, "learning_rate": 0.001377445339470656, "loss": 0.5931, "step": 108200 }, { "epoch": 31.13060989643268, "grad_norm": 0.9241177439689636, "learning_rate": 0.0013773878020713465, "loss": 0.6081, "step": 108210 }, { "epoch": 31.13348676639816, "grad_norm": 2.6261885166168213, "learning_rate": 0.0013773302646720369, "loss": 0.7033, "step": 108220 }, { "epoch": 31.136363636363637, "grad_norm": 0.9667146801948547, "learning_rate": 0.0013772727272727272, "loss": 0.5179, "step": 108230 }, { "epoch": 31.139240506329113, "grad_norm": 0.7997017502784729, "learning_rate": 0.0013772151898734178, "loss": 0.5312, "step": 108240 }, { "epoch": 31.142117376294593, "grad_norm": 0.8021880388259888, "learning_rate": 0.0013771576524741083, "loss": 0.5997, "step": 108250 }, { "epoch": 31.14499424626007, "grad_norm": 1.1432090997695923, "learning_rate": 0.0013771001150747987, "loss": 0.7262, "step": 108260 }, { "epoch": 31.147871116225545, "grad_norm": 1.7634375095367432, "learning_rate": 0.001377042577675489, "loss": 0.5873, "step": 108270 }, { "epoch": 31.150747986191025, "grad_norm": 0.8734719157218933, "learning_rate": 0.0013769850402761794, "loss": 0.582, "step": 108280 }, { "epoch": 31.1536248561565, "grad_norm": 1.1514166593551636, "learning_rate": 0.00137692750287687, "loss": 0.5512, "step": 108290 }, { "epoch": 31.15650172612198, "grad_norm": 1.4594699144363403, "learning_rate": 0.0013768699654775605, "loss": 0.6714, "step": 108300 }, { "epoch": 31.159378596087457, "grad_norm": 3.110262393951416, "learning_rate": 0.0013768124280782508, "loss": 0.7465, "step": 108310 }, { "epoch": 31.162255466052933, "grad_norm": 2.5481035709381104, "learning_rate": 0.0013767548906789414, "loss": 0.7197, "step": 108320 }, { "epoch": 31.165132336018413, "grad_norm": 1.2108689546585083, "learning_rate": 0.0013766973532796318, "loss": 0.8, "step": 108330 }, { "epoch": 31.16800920598389, "grad_norm": 2.010763168334961, "learning_rate": 0.001376639815880322, "loss": 0.5725, "step": 108340 }, { "epoch": 31.170886075949365, "grad_norm": 1.1375724077224731, "learning_rate": 0.0013765822784810127, "loss": 0.513, "step": 108350 }, { "epoch": 31.173762945914845, "grad_norm": 1.354981541633606, "learning_rate": 0.0013765247410817032, "loss": 0.5981, "step": 108360 }, { "epoch": 31.17663981588032, "grad_norm": 1.209359049797058, "learning_rate": 0.0013764672036823936, "loss": 0.5297, "step": 108370 }, { "epoch": 31.1795166858458, "grad_norm": 1.0583094358444214, "learning_rate": 0.0013764096662830841, "loss": 0.6824, "step": 108380 }, { "epoch": 31.182393555811277, "grad_norm": 1.8147988319396973, "learning_rate": 0.0013763521288837745, "loss": 0.6643, "step": 108390 }, { "epoch": 31.185270425776753, "grad_norm": 1.008662462234497, "learning_rate": 0.0013762945914844648, "loss": 0.7315, "step": 108400 }, { "epoch": 31.188147295742233, "grad_norm": 1.2687798738479614, "learning_rate": 0.0013762370540851554, "loss": 0.5975, "step": 108410 }, { "epoch": 31.19102416570771, "grad_norm": 2.4745612144470215, "learning_rate": 0.0013761795166858457, "loss": 0.6674, "step": 108420 }, { "epoch": 31.19390103567319, "grad_norm": 1.1255449056625366, "learning_rate": 0.0013761219792865363, "loss": 0.7056, "step": 108430 }, { "epoch": 31.196777905638665, "grad_norm": 1.3076822757720947, "learning_rate": 0.0013760644418872269, "loss": 0.7069, "step": 108440 }, { "epoch": 31.19965477560414, "grad_norm": 1.1276867389678955, "learning_rate": 0.001376006904487917, "loss": 0.6925, "step": 108450 }, { "epoch": 31.20253164556962, "grad_norm": 1.3196932077407837, "learning_rate": 0.0013759493670886076, "loss": 0.615, "step": 108460 }, { "epoch": 31.205408515535098, "grad_norm": 0.8408050537109375, "learning_rate": 0.0013758918296892981, "loss": 0.7874, "step": 108470 }, { "epoch": 31.208285385500574, "grad_norm": 1.4164212942123413, "learning_rate": 0.0013758342922899885, "loss": 0.7352, "step": 108480 }, { "epoch": 31.211162255466053, "grad_norm": 0.8965266942977905, "learning_rate": 0.001375776754890679, "loss": 0.5705, "step": 108490 }, { "epoch": 31.21403912543153, "grad_norm": 1.712427020072937, "learning_rate": 0.0013757192174913696, "loss": 0.4937, "step": 108500 }, { "epoch": 31.21691599539701, "grad_norm": 1.0547142028808594, "learning_rate": 0.0013756616800920597, "loss": 0.6137, "step": 108510 }, { "epoch": 31.219792865362486, "grad_norm": 1.4636318683624268, "learning_rate": 0.0013756041426927503, "loss": 0.5832, "step": 108520 }, { "epoch": 31.222669735327962, "grad_norm": 0.6604181528091431, "learning_rate": 0.0013755466052934406, "loss": 0.7253, "step": 108530 }, { "epoch": 31.22554660529344, "grad_norm": 1.1136467456817627, "learning_rate": 0.0013754890678941312, "loss": 0.6796, "step": 108540 }, { "epoch": 31.228423475258918, "grad_norm": 1.7541258335113525, "learning_rate": 0.0013754315304948218, "loss": 0.7131, "step": 108550 }, { "epoch": 31.231300345224398, "grad_norm": 1.0466344356536865, "learning_rate": 0.0013753739930955121, "loss": 0.6887, "step": 108560 }, { "epoch": 31.234177215189874, "grad_norm": 1.4654430150985718, "learning_rate": 0.0013753164556962025, "loss": 0.7384, "step": 108570 }, { "epoch": 31.23705408515535, "grad_norm": 0.9331908822059631, "learning_rate": 0.001375258918296893, "loss": 0.6259, "step": 108580 }, { "epoch": 31.23993095512083, "grad_norm": 1.695066213607788, "learning_rate": 0.0013752013808975834, "loss": 0.7804, "step": 108590 }, { "epoch": 31.242807825086306, "grad_norm": 0.9690415859222412, "learning_rate": 0.001375143843498274, "loss": 0.6449, "step": 108600 }, { "epoch": 31.245684695051782, "grad_norm": 1.403673529624939, "learning_rate": 0.0013750863060989645, "loss": 0.6718, "step": 108610 }, { "epoch": 31.248561565017262, "grad_norm": 1.1219143867492676, "learning_rate": 0.0013750287686996549, "loss": 0.7461, "step": 108620 }, { "epoch": 31.251438434982738, "grad_norm": 1.1295565366744995, "learning_rate": 0.0013749712313003452, "loss": 0.4991, "step": 108630 }, { "epoch": 31.254315304948218, "grad_norm": 1.403316855430603, "learning_rate": 0.0013749136939010355, "loss": 0.5198, "step": 108640 }, { "epoch": 31.257192174913694, "grad_norm": 0.8520101308822632, "learning_rate": 0.0013748561565017261, "loss": 0.6134, "step": 108650 }, { "epoch": 31.26006904487917, "grad_norm": 1.043370246887207, "learning_rate": 0.0013747986191024167, "loss": 0.7098, "step": 108660 }, { "epoch": 31.26294591484465, "grad_norm": 1.5922753810882568, "learning_rate": 0.001374741081703107, "loss": 0.6013, "step": 108670 }, { "epoch": 31.265822784810126, "grad_norm": 1.3271721601486206, "learning_rate": 0.0013746835443037976, "loss": 0.619, "step": 108680 }, { "epoch": 31.268699654775602, "grad_norm": 0.8237621784210205, "learning_rate": 0.001374626006904488, "loss": 0.7199, "step": 108690 }, { "epoch": 31.271576524741082, "grad_norm": 1.2668383121490479, "learning_rate": 0.0013745684695051783, "loss": 0.709, "step": 108700 }, { "epoch": 31.27445339470656, "grad_norm": 1.721039891242981, "learning_rate": 0.0013745109321058688, "loss": 0.6194, "step": 108710 }, { "epoch": 31.277330264672038, "grad_norm": 2.4138033390045166, "learning_rate": 0.0013744533947065594, "loss": 0.72, "step": 108720 }, { "epoch": 31.280207134637514, "grad_norm": 1.599666953086853, "learning_rate": 0.0013743958573072498, "loss": 0.7968, "step": 108730 }, { "epoch": 31.28308400460299, "grad_norm": 2.3591606616973877, "learning_rate": 0.0013743383199079403, "loss": 0.6297, "step": 108740 }, { "epoch": 31.28596087456847, "grad_norm": 0.9848114252090454, "learning_rate": 0.0013742807825086305, "loss": 0.6528, "step": 108750 }, { "epoch": 31.288837744533947, "grad_norm": 1.4215599298477173, "learning_rate": 0.001374223245109321, "loss": 0.7004, "step": 108760 }, { "epoch": 31.291714614499426, "grad_norm": 1.4053759574890137, "learning_rate": 0.0013741657077100116, "loss": 0.6468, "step": 108770 }, { "epoch": 31.294591484464902, "grad_norm": 2.0698940753936768, "learning_rate": 0.001374108170310702, "loss": 0.8028, "step": 108780 }, { "epoch": 31.29746835443038, "grad_norm": 0.7315686941146851, "learning_rate": 0.0013740506329113925, "loss": 0.6854, "step": 108790 }, { "epoch": 31.30034522439586, "grad_norm": 1.3940762281417847, "learning_rate": 0.001373993095512083, "loss": 0.5827, "step": 108800 }, { "epoch": 31.303222094361335, "grad_norm": 1.582711100578308, "learning_rate": 0.0013739355581127732, "loss": 0.6389, "step": 108810 }, { "epoch": 31.30609896432681, "grad_norm": 1.0562400817871094, "learning_rate": 0.0013738780207134637, "loss": 0.5091, "step": 108820 }, { "epoch": 31.30897583429229, "grad_norm": 1.6952131986618042, "learning_rate": 0.0013738204833141543, "loss": 0.8451, "step": 108830 }, { "epoch": 31.311852704257767, "grad_norm": 1.042309045791626, "learning_rate": 0.0013737629459148447, "loss": 0.5852, "step": 108840 }, { "epoch": 31.314729574223247, "grad_norm": 1.2537248134613037, "learning_rate": 0.0013737054085155352, "loss": 0.5818, "step": 108850 }, { "epoch": 31.317606444188723, "grad_norm": 1.4034072160720825, "learning_rate": 0.0013736478711162256, "loss": 0.6256, "step": 108860 }, { "epoch": 31.3204833141542, "grad_norm": 1.837801218032837, "learning_rate": 0.001373590333716916, "loss": 0.5957, "step": 108870 }, { "epoch": 31.32336018411968, "grad_norm": 0.8130699396133423, "learning_rate": 0.0013735327963176065, "loss": 0.6472, "step": 108880 }, { "epoch": 31.326237054085155, "grad_norm": 1.4326112270355225, "learning_rate": 0.0013734752589182968, "loss": 0.9235, "step": 108890 }, { "epoch": 31.32911392405063, "grad_norm": 1.1682322025299072, "learning_rate": 0.0013734177215189874, "loss": 0.8779, "step": 108900 }, { "epoch": 31.33199079401611, "grad_norm": 0.8823994994163513, "learning_rate": 0.001373360184119678, "loss": 0.6168, "step": 108910 }, { "epoch": 31.334867663981587, "grad_norm": 1.500533103942871, "learning_rate": 0.0013733026467203683, "loss": 0.6746, "step": 108920 }, { "epoch": 31.337744533947067, "grad_norm": 0.8321349024772644, "learning_rate": 0.0013732451093210587, "loss": 0.7248, "step": 108930 }, { "epoch": 31.340621403912543, "grad_norm": 1.3206173181533813, "learning_rate": 0.0013731875719217492, "loss": 0.5883, "step": 108940 }, { "epoch": 31.34349827387802, "grad_norm": 1.0759367942810059, "learning_rate": 0.0013731300345224396, "loss": 0.6691, "step": 108950 }, { "epoch": 31.3463751438435, "grad_norm": 1.0234822034835815, "learning_rate": 0.0013730724971231301, "loss": 0.6823, "step": 108960 }, { "epoch": 31.349252013808975, "grad_norm": 1.7131837606430054, "learning_rate": 0.0013730149597238207, "loss": 0.8338, "step": 108970 }, { "epoch": 31.352128883774455, "grad_norm": 0.8873069882392883, "learning_rate": 0.001372957422324511, "loss": 0.5856, "step": 108980 }, { "epoch": 31.35500575373993, "grad_norm": 1.1754790544509888, "learning_rate": 0.0013728998849252014, "loss": 0.6292, "step": 108990 }, { "epoch": 31.357882623705407, "grad_norm": 1.3978341817855835, "learning_rate": 0.0013728423475258917, "loss": 0.7441, "step": 109000 }, { "epoch": 31.360759493670887, "grad_norm": 1.8034427165985107, "learning_rate": 0.0013727848101265823, "loss": 0.7111, "step": 109010 }, { "epoch": 31.363636363636363, "grad_norm": 1.5467031002044678, "learning_rate": 0.0013727272727272729, "loss": 0.7748, "step": 109020 }, { "epoch": 31.36651323360184, "grad_norm": 1.1333825588226318, "learning_rate": 0.0013726697353279632, "loss": 0.6203, "step": 109030 }, { "epoch": 31.36939010356732, "grad_norm": 1.8954933881759644, "learning_rate": 0.0013726121979286536, "loss": 0.5482, "step": 109040 }, { "epoch": 31.372266973532795, "grad_norm": 1.494203805923462, "learning_rate": 0.0013725546605293441, "loss": 0.5539, "step": 109050 }, { "epoch": 31.375143843498275, "grad_norm": 0.8515860438346863, "learning_rate": 0.0013724971231300345, "loss": 0.722, "step": 109060 }, { "epoch": 31.37802071346375, "grad_norm": 2.2679200172424316, "learning_rate": 0.001372439585730725, "loss": 0.6011, "step": 109070 }, { "epoch": 31.380897583429228, "grad_norm": 0.8476516604423523, "learning_rate": 0.0013723820483314156, "loss": 0.6135, "step": 109080 }, { "epoch": 31.383774453394707, "grad_norm": 0.7176491618156433, "learning_rate": 0.001372324510932106, "loss": 0.6536, "step": 109090 }, { "epoch": 31.386651323360184, "grad_norm": 1.918856143951416, "learning_rate": 0.0013722669735327963, "loss": 0.6116, "step": 109100 }, { "epoch": 31.389528193325663, "grad_norm": 1.0490163564682007, "learning_rate": 0.0013722094361334866, "loss": 0.6122, "step": 109110 }, { "epoch": 31.39240506329114, "grad_norm": 1.0244183540344238, "learning_rate": 0.0013721518987341772, "loss": 0.6448, "step": 109120 }, { "epoch": 31.395281933256616, "grad_norm": 0.9258647561073303, "learning_rate": 0.0013720943613348678, "loss": 0.4839, "step": 109130 }, { "epoch": 31.398158803222096, "grad_norm": 1.0003886222839355, "learning_rate": 0.001372036823935558, "loss": 0.5518, "step": 109140 }, { "epoch": 31.40103567318757, "grad_norm": 1.3123914003372192, "learning_rate": 0.0013719792865362487, "loss": 0.6473, "step": 109150 }, { "epoch": 31.403912543153048, "grad_norm": 1.9656320810317993, "learning_rate": 0.001371921749136939, "loss": 0.8302, "step": 109160 }, { "epoch": 31.406789413118528, "grad_norm": 1.3682801723480225, "learning_rate": 0.0013718642117376294, "loss": 0.7128, "step": 109170 }, { "epoch": 31.409666283084004, "grad_norm": 1.4111045598983765, "learning_rate": 0.00137180667433832, "loss": 0.6373, "step": 109180 }, { "epoch": 31.412543153049484, "grad_norm": 1.0540529489517212, "learning_rate": 0.0013717491369390105, "loss": 0.7308, "step": 109190 }, { "epoch": 31.41542002301496, "grad_norm": 0.7969087958335876, "learning_rate": 0.0013716915995397008, "loss": 0.7179, "step": 109200 }, { "epoch": 31.418296892980436, "grad_norm": 1.14924955368042, "learning_rate": 0.0013716340621403914, "loss": 0.6414, "step": 109210 }, { "epoch": 31.421173762945916, "grad_norm": 1.2357527017593384, "learning_rate": 0.0013715765247410815, "loss": 0.5894, "step": 109220 }, { "epoch": 31.424050632911392, "grad_norm": 0.9877561330795288, "learning_rate": 0.001371518987341772, "loss": 0.7158, "step": 109230 }, { "epoch": 31.42692750287687, "grad_norm": 1.2739794254302979, "learning_rate": 0.0013714614499424627, "loss": 0.6471, "step": 109240 }, { "epoch": 31.429804372842348, "grad_norm": 1.7093563079833984, "learning_rate": 0.001371403912543153, "loss": 0.6509, "step": 109250 }, { "epoch": 31.432681242807824, "grad_norm": 1.5610496997833252, "learning_rate": 0.0013713463751438436, "loss": 0.5823, "step": 109260 }, { "epoch": 31.435558112773304, "grad_norm": 2.80141282081604, "learning_rate": 0.0013712888377445341, "loss": 0.6924, "step": 109270 }, { "epoch": 31.43843498273878, "grad_norm": 1.4736669063568115, "learning_rate": 0.0013712313003452243, "loss": 0.6343, "step": 109280 }, { "epoch": 31.441311852704256, "grad_norm": 1.8389579057693481, "learning_rate": 0.0013711737629459148, "loss": 0.5689, "step": 109290 }, { "epoch": 31.444188722669736, "grad_norm": 1.65763258934021, "learning_rate": 0.0013711162255466054, "loss": 0.7194, "step": 109300 }, { "epoch": 31.447065592635212, "grad_norm": 1.4640114307403564, "learning_rate": 0.0013710586881472957, "loss": 0.795, "step": 109310 }, { "epoch": 31.449942462600692, "grad_norm": 1.2396842241287231, "learning_rate": 0.0013710011507479863, "loss": 0.5092, "step": 109320 }, { "epoch": 31.45281933256617, "grad_norm": 2.155938148498535, "learning_rate": 0.0013709436133486767, "loss": 0.5499, "step": 109330 }, { "epoch": 31.455696202531644, "grad_norm": 1.306262731552124, "learning_rate": 0.001370886075949367, "loss": 0.6644, "step": 109340 }, { "epoch": 31.458573072497124, "grad_norm": 0.8568639755249023, "learning_rate": 0.0013708285385500576, "loss": 0.6268, "step": 109350 }, { "epoch": 31.4614499424626, "grad_norm": 0.887698233127594, "learning_rate": 0.001370771001150748, "loss": 0.6687, "step": 109360 }, { "epoch": 31.464326812428077, "grad_norm": 1.6261228322982788, "learning_rate": 0.0013707134637514385, "loss": 0.6238, "step": 109370 }, { "epoch": 31.467203682393556, "grad_norm": 1.0341291427612305, "learning_rate": 0.001370655926352129, "loss": 0.5206, "step": 109380 }, { "epoch": 31.470080552359033, "grad_norm": 0.7123714089393616, "learning_rate": 0.0013705983889528194, "loss": 0.7602, "step": 109390 }, { "epoch": 31.472957422324512, "grad_norm": 2.130983591079712, "learning_rate": 0.0013705408515535097, "loss": 0.5701, "step": 109400 }, { "epoch": 31.47583429228999, "grad_norm": 1.1814749240875244, "learning_rate": 0.0013704833141542003, "loss": 0.6712, "step": 109410 }, { "epoch": 31.478711162255465, "grad_norm": 1.172368049621582, "learning_rate": 0.0013704257767548906, "loss": 0.6273, "step": 109420 }, { "epoch": 31.481588032220944, "grad_norm": 0.834121584892273, "learning_rate": 0.0013703682393555812, "loss": 0.602, "step": 109430 }, { "epoch": 31.48446490218642, "grad_norm": 1.3375381231307983, "learning_rate": 0.0013703107019562716, "loss": 0.5709, "step": 109440 }, { "epoch": 31.4873417721519, "grad_norm": 1.524808406829834, "learning_rate": 0.0013702531645569621, "loss": 0.5303, "step": 109450 }, { "epoch": 31.490218642117377, "grad_norm": 0.9547202587127686, "learning_rate": 0.0013701956271576525, "loss": 0.6196, "step": 109460 }, { "epoch": 31.493095512082853, "grad_norm": 1.382034182548523, "learning_rate": 0.0013701380897583428, "loss": 0.6541, "step": 109470 }, { "epoch": 31.495972382048333, "grad_norm": 1.8124322891235352, "learning_rate": 0.0013700805523590334, "loss": 0.5863, "step": 109480 }, { "epoch": 31.49884925201381, "grad_norm": 1.7749544382095337, "learning_rate": 0.001370023014959724, "loss": 0.7687, "step": 109490 }, { "epoch": 31.501726121979285, "grad_norm": 2.009843111038208, "learning_rate": 0.0013699654775604143, "loss": 0.6761, "step": 109500 }, { "epoch": 31.504602991944765, "grad_norm": 1.0234378576278687, "learning_rate": 0.0013699079401611049, "loss": 0.6473, "step": 109510 }, { "epoch": 31.50747986191024, "grad_norm": 1.394050121307373, "learning_rate": 0.0013698504027617952, "loss": 0.7504, "step": 109520 }, { "epoch": 31.51035673187572, "grad_norm": 2.2492079734802246, "learning_rate": 0.0013697928653624855, "loss": 0.6825, "step": 109530 }, { "epoch": 31.513233601841197, "grad_norm": 1.0058774948120117, "learning_rate": 0.0013697353279631761, "loss": 0.5769, "step": 109540 }, { "epoch": 31.516110471806673, "grad_norm": 1.0260984897613525, "learning_rate": 0.0013696777905638665, "loss": 0.5219, "step": 109550 }, { "epoch": 31.518987341772153, "grad_norm": 1.3468401432037354, "learning_rate": 0.001369620253164557, "loss": 0.5612, "step": 109560 }, { "epoch": 31.52186421173763, "grad_norm": 1.3935859203338623, "learning_rate": 0.0013695627157652476, "loss": 0.7261, "step": 109570 }, { "epoch": 31.524741081703105, "grad_norm": 1.8210761547088623, "learning_rate": 0.0013695051783659377, "loss": 0.6931, "step": 109580 }, { "epoch": 31.527617951668585, "grad_norm": 1.7599513530731201, "learning_rate": 0.0013694476409666283, "loss": 0.832, "step": 109590 }, { "epoch": 31.53049482163406, "grad_norm": 1.2567801475524902, "learning_rate": 0.0013693901035673188, "loss": 0.6261, "step": 109600 }, { "epoch": 31.53337169159954, "grad_norm": 0.8456266522407532, "learning_rate": 0.0013693325661680092, "loss": 0.5806, "step": 109610 }, { "epoch": 31.536248561565017, "grad_norm": 1.5018303394317627, "learning_rate": 0.0013692750287686998, "loss": 0.6397, "step": 109620 }, { "epoch": 31.539125431530493, "grad_norm": 2.577068567276001, "learning_rate": 0.0013692174913693903, "loss": 0.7491, "step": 109630 }, { "epoch": 31.542002301495973, "grad_norm": 1.3059496879577637, "learning_rate": 0.0013691599539700804, "loss": 0.5764, "step": 109640 }, { "epoch": 31.54487917146145, "grad_norm": 1.4304088354110718, "learning_rate": 0.001369102416570771, "loss": 0.639, "step": 109650 }, { "epoch": 31.54775604142693, "grad_norm": 2.4148037433624268, "learning_rate": 0.0013690448791714616, "loss": 0.7742, "step": 109660 }, { "epoch": 31.550632911392405, "grad_norm": 1.1932655572891235, "learning_rate": 0.001368987341772152, "loss": 0.639, "step": 109670 }, { "epoch": 31.55350978135788, "grad_norm": 4.561163425445557, "learning_rate": 0.0013689298043728425, "loss": 0.8631, "step": 109680 }, { "epoch": 31.55638665132336, "grad_norm": 1.0972188711166382, "learning_rate": 0.0013688722669735328, "loss": 0.6255, "step": 109690 }, { "epoch": 31.559263521288837, "grad_norm": 1.357054352760315, "learning_rate": 0.0013688147295742232, "loss": 0.6028, "step": 109700 }, { "epoch": 31.562140391254314, "grad_norm": 2.277555227279663, "learning_rate": 0.0013687571921749137, "loss": 0.7497, "step": 109710 }, { "epoch": 31.565017261219793, "grad_norm": 0.6891695261001587, "learning_rate": 0.001368699654775604, "loss": 0.5029, "step": 109720 }, { "epoch": 31.56789413118527, "grad_norm": 1.8724300861358643, "learning_rate": 0.0013686421173762947, "loss": 0.8093, "step": 109730 }, { "epoch": 31.57077100115075, "grad_norm": 1.5627331733703613, "learning_rate": 0.0013685845799769852, "loss": 0.6992, "step": 109740 }, { "epoch": 31.573647871116226, "grad_norm": 3.073394775390625, "learning_rate": 0.0013685270425776756, "loss": 0.6683, "step": 109750 }, { "epoch": 31.576524741081702, "grad_norm": 0.8436806797981262, "learning_rate": 0.001368469505178366, "loss": 0.6755, "step": 109760 }, { "epoch": 31.57940161104718, "grad_norm": 1.41615891456604, "learning_rate": 0.0013684119677790565, "loss": 0.6401, "step": 109770 }, { "epoch": 31.582278481012658, "grad_norm": 0.8099513649940491, "learning_rate": 0.0013683544303797468, "loss": 0.5107, "step": 109780 }, { "epoch": 31.585155350978134, "grad_norm": 2.1145341396331787, "learning_rate": 0.0013682968929804374, "loss": 0.5798, "step": 109790 }, { "epoch": 31.588032220943614, "grad_norm": 1.4401854276657104, "learning_rate": 0.0013682393555811277, "loss": 0.7403, "step": 109800 }, { "epoch": 31.59090909090909, "grad_norm": 1.4823248386383057, "learning_rate": 0.0013681818181818183, "loss": 0.814, "step": 109810 }, { "epoch": 31.59378596087457, "grad_norm": 2.0761773586273193, "learning_rate": 0.0013681242807825086, "loss": 0.7822, "step": 109820 }, { "epoch": 31.596662830840046, "grad_norm": 1.2588999271392822, "learning_rate": 0.001368066743383199, "loss": 0.6448, "step": 109830 }, { "epoch": 31.599539700805522, "grad_norm": 1.258463978767395, "learning_rate": 0.0013680092059838896, "loss": 0.8101, "step": 109840 }, { "epoch": 31.602416570771002, "grad_norm": 1.085362434387207, "learning_rate": 0.0013679516685845801, "loss": 0.7825, "step": 109850 }, { "epoch": 31.605293440736478, "grad_norm": 0.7857301235198975, "learning_rate": 0.0013678941311852705, "loss": 0.7015, "step": 109860 }, { "epoch": 31.608170310701958, "grad_norm": 1.5346442461013794, "learning_rate": 0.0013678365937859608, "loss": 0.8324, "step": 109870 }, { "epoch": 31.611047180667434, "grad_norm": 2.193296194076538, "learning_rate": 0.0013677790563866514, "loss": 0.7241, "step": 109880 }, { "epoch": 31.61392405063291, "grad_norm": 1.734203815460205, "learning_rate": 0.0013677215189873417, "loss": 0.597, "step": 109890 }, { "epoch": 31.61680092059839, "grad_norm": 1.4406569004058838, "learning_rate": 0.0013676639815880323, "loss": 0.7372, "step": 109900 }, { "epoch": 31.619677790563866, "grad_norm": 1.8938543796539307, "learning_rate": 0.0013676064441887226, "loss": 0.6377, "step": 109910 }, { "epoch": 31.622554660529342, "grad_norm": 1.4920507669448853, "learning_rate": 0.0013675489067894132, "loss": 0.7334, "step": 109920 }, { "epoch": 31.625431530494822, "grad_norm": 1.2961061000823975, "learning_rate": 0.0013674913693901036, "loss": 0.6106, "step": 109930 }, { "epoch": 31.6283084004603, "grad_norm": 0.9495383501052856, "learning_rate": 0.001367433831990794, "loss": 0.6725, "step": 109940 }, { "epoch": 31.631185270425778, "grad_norm": 2.3693482875823975, "learning_rate": 0.0013673762945914845, "loss": 0.6634, "step": 109950 }, { "epoch": 31.634062140391254, "grad_norm": 1.6616532802581787, "learning_rate": 0.001367318757192175, "loss": 0.7175, "step": 109960 }, { "epoch": 31.63693901035673, "grad_norm": 0.8173543214797974, "learning_rate": 0.0013672612197928654, "loss": 0.6244, "step": 109970 }, { "epoch": 31.63981588032221, "grad_norm": 0.775861382484436, "learning_rate": 0.001367203682393556, "loss": 0.4912, "step": 109980 }, { "epoch": 31.642692750287686, "grad_norm": 1.6108944416046143, "learning_rate": 0.0013671461449942463, "loss": 0.5986, "step": 109990 }, { "epoch": 31.645569620253166, "grad_norm": 2.5138275623321533, "learning_rate": 0.0013670886075949366, "loss": 0.7494, "step": 110000 }, { "epoch": 31.648446490218642, "grad_norm": 0.994295597076416, "learning_rate": 0.0013670310701956272, "loss": 0.8089, "step": 110010 }, { "epoch": 31.65132336018412, "grad_norm": 0.7477415800094604, "learning_rate": 0.0013669735327963175, "loss": 0.7123, "step": 110020 }, { "epoch": 31.6542002301496, "grad_norm": 1.0977717638015747, "learning_rate": 0.001366915995397008, "loss": 0.8051, "step": 110030 }, { "epoch": 31.657077100115075, "grad_norm": 1.2828341722488403, "learning_rate": 0.0013668584579976987, "loss": 0.5811, "step": 110040 }, { "epoch": 31.65995397008055, "grad_norm": 1.3700108528137207, "learning_rate": 0.0013668009205983888, "loss": 0.6424, "step": 110050 }, { "epoch": 31.66283084004603, "grad_norm": 0.5653286576271057, "learning_rate": 0.0013667433831990794, "loss": 0.6632, "step": 110060 }, { "epoch": 31.665707710011507, "grad_norm": 3.3031904697418213, "learning_rate": 0.00136668584579977, "loss": 0.578, "step": 110070 }, { "epoch": 31.668584579976987, "grad_norm": 1.2773696184158325, "learning_rate": 0.0013666283084004603, "loss": 0.6246, "step": 110080 }, { "epoch": 31.671461449942463, "grad_norm": 1.4964481592178345, "learning_rate": 0.0013665707710011508, "loss": 0.7685, "step": 110090 }, { "epoch": 31.67433831990794, "grad_norm": 1.3908573389053345, "learning_rate": 0.0013665132336018414, "loss": 0.631, "step": 110100 }, { "epoch": 31.67721518987342, "grad_norm": 1.0322099924087524, "learning_rate": 0.0013664556962025315, "loss": 0.5749, "step": 110110 }, { "epoch": 31.680092059838895, "grad_norm": 2.836230993270874, "learning_rate": 0.001366398158803222, "loss": 0.7377, "step": 110120 }, { "epoch": 31.682968929804375, "grad_norm": 2.1151387691497803, "learning_rate": 0.0013663406214039124, "loss": 0.8371, "step": 110130 }, { "epoch": 31.68584579976985, "grad_norm": 1.7918673753738403, "learning_rate": 0.001366283084004603, "loss": 0.7229, "step": 110140 }, { "epoch": 31.688722669735327, "grad_norm": 1.4359357357025146, "learning_rate": 0.0013662255466052936, "loss": 0.6393, "step": 110150 }, { "epoch": 31.691599539700807, "grad_norm": 2.112744092941284, "learning_rate": 0.001366168009205984, "loss": 0.5314, "step": 110160 }, { "epoch": 31.694476409666283, "grad_norm": 1.5407673120498657, "learning_rate": 0.0013661104718066743, "loss": 0.6741, "step": 110170 }, { "epoch": 31.69735327963176, "grad_norm": 1.2636911869049072, "learning_rate": 0.0013660529344073648, "loss": 0.4955, "step": 110180 }, { "epoch": 31.70023014959724, "grad_norm": 1.1781989336013794, "learning_rate": 0.0013659953970080552, "loss": 0.5636, "step": 110190 }, { "epoch": 31.703107019562715, "grad_norm": 1.0788657665252686, "learning_rate": 0.0013659378596087457, "loss": 0.5692, "step": 110200 }, { "epoch": 31.705983889528195, "grad_norm": 1.7675271034240723, "learning_rate": 0.0013658803222094363, "loss": 0.5047, "step": 110210 }, { "epoch": 31.70886075949367, "grad_norm": 1.2975817918777466, "learning_rate": 0.0013658227848101267, "loss": 0.6309, "step": 110220 }, { "epoch": 31.711737629459147, "grad_norm": 1.967789888381958, "learning_rate": 0.001365765247410817, "loss": 0.7186, "step": 110230 }, { "epoch": 31.714614499424627, "grad_norm": 0.989602267742157, "learning_rate": 0.0013657077100115073, "loss": 0.608, "step": 110240 }, { "epoch": 31.717491369390103, "grad_norm": 1.712247610092163, "learning_rate": 0.001365650172612198, "loss": 0.6224, "step": 110250 }, { "epoch": 31.72036823935558, "grad_norm": 1.8212246894836426, "learning_rate": 0.0013655926352128885, "loss": 0.8437, "step": 110260 }, { "epoch": 31.72324510932106, "grad_norm": 0.9427002668380737, "learning_rate": 0.0013655350978135788, "loss": 0.6039, "step": 110270 }, { "epoch": 31.726121979286535, "grad_norm": 1.6983726024627686, "learning_rate": 0.0013654775604142694, "loss": 0.7154, "step": 110280 }, { "epoch": 31.728998849252015, "grad_norm": 1.674142837524414, "learning_rate": 0.0013654200230149597, "loss": 0.8116, "step": 110290 }, { "epoch": 31.73187571921749, "grad_norm": 2.074073076248169, "learning_rate": 0.00136536248561565, "loss": 0.6615, "step": 110300 }, { "epoch": 31.734752589182968, "grad_norm": 1.466530442237854, "learning_rate": 0.0013653049482163406, "loss": 0.7178, "step": 110310 }, { "epoch": 31.737629459148447, "grad_norm": 0.6884454488754272, "learning_rate": 0.0013652474108170312, "loss": 0.7528, "step": 110320 }, { "epoch": 31.740506329113924, "grad_norm": 1.9504928588867188, "learning_rate": 0.0013651898734177216, "loss": 0.6343, "step": 110330 }, { "epoch": 31.743383199079403, "grad_norm": 1.0073416233062744, "learning_rate": 0.0013651323360184121, "loss": 0.653, "step": 110340 }, { "epoch": 31.74626006904488, "grad_norm": 1.0530928373336792, "learning_rate": 0.0013650747986191025, "loss": 0.6462, "step": 110350 }, { "epoch": 31.749136939010356, "grad_norm": 1.4019217491149902, "learning_rate": 0.0013650172612197928, "loss": 0.7185, "step": 110360 }, { "epoch": 31.752013808975835, "grad_norm": 1.0198827981948853, "learning_rate": 0.0013649597238204834, "loss": 0.6261, "step": 110370 }, { "epoch": 31.75489067894131, "grad_norm": 2.0615053176879883, "learning_rate": 0.0013649021864211737, "loss": 0.7863, "step": 110380 }, { "epoch": 31.757767548906788, "grad_norm": 0.8584215641021729, "learning_rate": 0.0013648446490218643, "loss": 0.64, "step": 110390 }, { "epoch": 31.760644418872268, "grad_norm": 0.9665014147758484, "learning_rate": 0.0013647871116225549, "loss": 0.6708, "step": 110400 }, { "epoch": 31.763521288837744, "grad_norm": 1.0456085205078125, "learning_rate": 0.001364729574223245, "loss": 0.5949, "step": 110410 }, { "epoch": 31.766398158803224, "grad_norm": 1.3728017807006836, "learning_rate": 0.0013646720368239355, "loss": 0.5934, "step": 110420 }, { "epoch": 31.7692750287687, "grad_norm": 1.3528183698654175, "learning_rate": 0.0013646144994246261, "loss": 0.9384, "step": 110430 }, { "epoch": 31.772151898734176, "grad_norm": 1.1656297445297241, "learning_rate": 0.0013645569620253165, "loss": 0.6416, "step": 110440 }, { "epoch": 31.775028768699656, "grad_norm": 2.388695478439331, "learning_rate": 0.001364499424626007, "loss": 0.6529, "step": 110450 }, { "epoch": 31.777905638665132, "grad_norm": 1.5868020057678223, "learning_rate": 0.0013644418872266976, "loss": 0.7076, "step": 110460 }, { "epoch": 31.780782508630608, "grad_norm": 1.8770530223846436, "learning_rate": 0.0013643843498273877, "loss": 0.6794, "step": 110470 }, { "epoch": 31.783659378596088, "grad_norm": 1.049485683441162, "learning_rate": 0.0013643268124280783, "loss": 0.7525, "step": 110480 }, { "epoch": 31.786536248561564, "grad_norm": 0.6358066201210022, "learning_rate": 0.0013642692750287686, "loss": 0.6235, "step": 110490 }, { "epoch": 31.789413118527044, "grad_norm": 1.0832990407943726, "learning_rate": 0.0013642117376294592, "loss": 0.651, "step": 110500 }, { "epoch": 31.79228998849252, "grad_norm": 1.2036633491516113, "learning_rate": 0.0013641542002301498, "loss": 0.709, "step": 110510 }, { "epoch": 31.795166858457996, "grad_norm": 1.5138202905654907, "learning_rate": 0.00136409666283084, "loss": 0.5874, "step": 110520 }, { "epoch": 31.798043728423476, "grad_norm": 1.5282577276229858, "learning_rate": 0.0013640391254315304, "loss": 0.6606, "step": 110530 }, { "epoch": 31.800920598388952, "grad_norm": 1.6985743045806885, "learning_rate": 0.001363981588032221, "loss": 0.854, "step": 110540 }, { "epoch": 31.803797468354432, "grad_norm": 1.487125039100647, "learning_rate": 0.0013639240506329114, "loss": 0.7416, "step": 110550 }, { "epoch": 31.806674338319908, "grad_norm": 1.9872231483459473, "learning_rate": 0.001363866513233602, "loss": 0.7271, "step": 110560 }, { "epoch": 31.809551208285384, "grad_norm": 1.3339921236038208, "learning_rate": 0.0013638089758342925, "loss": 0.667, "step": 110570 }, { "epoch": 31.812428078250864, "grad_norm": 1.0092581510543823, "learning_rate": 0.0013637514384349828, "loss": 0.5945, "step": 110580 }, { "epoch": 31.81530494821634, "grad_norm": 0.9411472082138062, "learning_rate": 0.0013636939010356732, "loss": 0.6717, "step": 110590 }, { "epoch": 31.818181818181817, "grad_norm": 1.2319512367248535, "learning_rate": 0.0013636363636363635, "loss": 0.6172, "step": 110600 }, { "epoch": 31.821058688147296, "grad_norm": 1.662850022315979, "learning_rate": 0.001363578826237054, "loss": 0.8152, "step": 110610 }, { "epoch": 31.823935558112773, "grad_norm": 1.0345088243484497, "learning_rate": 0.0013635212888377447, "loss": 0.717, "step": 110620 }, { "epoch": 31.826812428078252, "grad_norm": 1.2256121635437012, "learning_rate": 0.001363463751438435, "loss": 0.7534, "step": 110630 }, { "epoch": 31.82968929804373, "grad_norm": 1.3092634677886963, "learning_rate": 0.0013634062140391256, "loss": 0.7967, "step": 110640 }, { "epoch": 31.832566168009205, "grad_norm": 1.1662293672561646, "learning_rate": 0.001363348676639816, "loss": 0.6719, "step": 110650 }, { "epoch": 31.835443037974684, "grad_norm": 1.5000746250152588, "learning_rate": 0.0013632911392405063, "loss": 0.6449, "step": 110660 }, { "epoch": 31.83831990794016, "grad_norm": 0.6096943020820618, "learning_rate": 0.0013632336018411968, "loss": 0.5949, "step": 110670 }, { "epoch": 31.841196777905637, "grad_norm": 0.9128050804138184, "learning_rate": 0.0013631760644418874, "loss": 0.6373, "step": 110680 }, { "epoch": 31.844073647871117, "grad_norm": 1.7365436553955078, "learning_rate": 0.0013631185270425777, "loss": 0.7668, "step": 110690 }, { "epoch": 31.846950517836593, "grad_norm": 1.9121496677398682, "learning_rate": 0.001363060989643268, "loss": 0.6736, "step": 110700 }, { "epoch": 31.849827387802073, "grad_norm": 1.1457942724227905, "learning_rate": 0.0013630034522439584, "loss": 0.551, "step": 110710 }, { "epoch": 31.85270425776755, "grad_norm": 0.9729867577552795, "learning_rate": 0.001362945914844649, "loss": 0.6405, "step": 110720 }, { "epoch": 31.855581127733025, "grad_norm": 1.0411407947540283, "learning_rate": 0.0013628883774453396, "loss": 0.5913, "step": 110730 }, { "epoch": 31.858457997698505, "grad_norm": 1.8815680742263794, "learning_rate": 0.00136283084004603, "loss": 0.6964, "step": 110740 }, { "epoch": 31.86133486766398, "grad_norm": 1.3912745714187622, "learning_rate": 0.0013627733026467205, "loss": 0.677, "step": 110750 }, { "epoch": 31.86421173762946, "grad_norm": 1.8411574363708496, "learning_rate": 0.0013627157652474108, "loss": 0.7373, "step": 110760 }, { "epoch": 31.867088607594937, "grad_norm": 2.4996676445007324, "learning_rate": 0.0013626582278481012, "loss": 0.7627, "step": 110770 }, { "epoch": 31.869965477560413, "grad_norm": 0.9638175368309021, "learning_rate": 0.0013626006904487917, "loss": 0.717, "step": 110780 }, { "epoch": 31.872842347525893, "grad_norm": 1.0785269737243652, "learning_rate": 0.0013625431530494823, "loss": 0.6237, "step": 110790 }, { "epoch": 31.87571921749137, "grad_norm": 1.4707365036010742, "learning_rate": 0.0013624856156501726, "loss": 0.6558, "step": 110800 }, { "epoch": 31.878596087456845, "grad_norm": 1.5363918542861938, "learning_rate": 0.0013624280782508632, "loss": 0.6288, "step": 110810 }, { "epoch": 31.881472957422325, "grad_norm": 1.0375458002090454, "learning_rate": 0.0013623705408515533, "loss": 0.5882, "step": 110820 }, { "epoch": 31.8843498273878, "grad_norm": 1.2357861995697021, "learning_rate": 0.001362313003452244, "loss": 0.6421, "step": 110830 }, { "epoch": 31.88722669735328, "grad_norm": 1.1327388286590576, "learning_rate": 0.0013622554660529345, "loss": 0.6066, "step": 110840 }, { "epoch": 31.890103567318757, "grad_norm": 1.7826189994812012, "learning_rate": 0.0013621979286536248, "loss": 0.8773, "step": 110850 }, { "epoch": 31.892980437284233, "grad_norm": 1.7715332508087158, "learning_rate": 0.0013621403912543154, "loss": 0.6682, "step": 110860 }, { "epoch": 31.895857307249713, "grad_norm": 1.63881516456604, "learning_rate": 0.001362082853855006, "loss": 0.7207, "step": 110870 }, { "epoch": 31.89873417721519, "grad_norm": 1.1262681484222412, "learning_rate": 0.001362025316455696, "loss": 0.6696, "step": 110880 }, { "epoch": 31.90161104718067, "grad_norm": 2.140347957611084, "learning_rate": 0.0013619677790563866, "loss": 0.6156, "step": 110890 }, { "epoch": 31.904487917146145, "grad_norm": 0.8401181697845459, "learning_rate": 0.0013619102416570772, "loss": 0.6891, "step": 110900 }, { "epoch": 31.90736478711162, "grad_norm": 1.2279859781265259, "learning_rate": 0.0013618527042577675, "loss": 0.7587, "step": 110910 }, { "epoch": 31.9102416570771, "grad_norm": 0.9205774068832397, "learning_rate": 0.001361795166858458, "loss": 0.9368, "step": 110920 }, { "epoch": 31.913118527042577, "grad_norm": 0.9716803431510925, "learning_rate": 0.0013617376294591487, "loss": 0.5343, "step": 110930 }, { "epoch": 31.915995397008054, "grad_norm": 1.3809226751327515, "learning_rate": 0.0013616800920598388, "loss": 0.6527, "step": 110940 }, { "epoch": 31.918872266973533, "grad_norm": 1.5284634828567505, "learning_rate": 0.0013616225546605294, "loss": 0.6874, "step": 110950 }, { "epoch": 31.92174913693901, "grad_norm": 1.484597086906433, "learning_rate": 0.0013615650172612197, "loss": 0.7208, "step": 110960 }, { "epoch": 31.92462600690449, "grad_norm": 1.6530508995056152, "learning_rate": 0.0013615074798619103, "loss": 0.7326, "step": 110970 }, { "epoch": 31.927502876869966, "grad_norm": 1.0661309957504272, "learning_rate": 0.0013614499424626008, "loss": 0.6166, "step": 110980 }, { "epoch": 31.930379746835442, "grad_norm": 2.206098794937134, "learning_rate": 0.0013613924050632912, "loss": 0.6347, "step": 110990 }, { "epoch": 31.93325661680092, "grad_norm": 1.759775996208191, "learning_rate": 0.0013613348676639815, "loss": 0.7324, "step": 111000 }, { "epoch": 31.936133486766398, "grad_norm": 3.2139289379119873, "learning_rate": 0.001361277330264672, "loss": 0.7055, "step": 111010 }, { "epoch": 31.939010356731877, "grad_norm": 1.0204603672027588, "learning_rate": 0.0013612197928653624, "loss": 0.633, "step": 111020 }, { "epoch": 31.941887226697354, "grad_norm": 1.1022168397903442, "learning_rate": 0.001361162255466053, "loss": 0.7019, "step": 111030 }, { "epoch": 31.94476409666283, "grad_norm": 1.3258377313613892, "learning_rate": 0.0013611047180667436, "loss": 0.5837, "step": 111040 }, { "epoch": 31.94764096662831, "grad_norm": 1.0271203517913818, "learning_rate": 0.001361047180667434, "loss": 0.7699, "step": 111050 }, { "epoch": 31.950517836593786, "grad_norm": 1.2053395509719849, "learning_rate": 0.0013609896432681243, "loss": 0.5505, "step": 111060 }, { "epoch": 31.953394706559262, "grad_norm": 1.4328844547271729, "learning_rate": 0.0013609321058688146, "loss": 0.5902, "step": 111070 }, { "epoch": 31.956271576524742, "grad_norm": 1.4170689582824707, "learning_rate": 0.0013608745684695052, "loss": 0.6475, "step": 111080 }, { "epoch": 31.959148446490218, "grad_norm": 1.062450647354126, "learning_rate": 0.0013608170310701957, "loss": 0.6993, "step": 111090 }, { "epoch": 31.962025316455698, "grad_norm": 1.5633825063705444, "learning_rate": 0.001360759493670886, "loss": 0.7917, "step": 111100 }, { "epoch": 31.964902186421174, "grad_norm": 1.2463504076004028, "learning_rate": 0.0013607019562715766, "loss": 0.5961, "step": 111110 }, { "epoch": 31.96777905638665, "grad_norm": 1.4644980430603027, "learning_rate": 0.001360644418872267, "loss": 0.5867, "step": 111120 }, { "epoch": 31.97065592635213, "grad_norm": 1.2068281173706055, "learning_rate": 0.0013605868814729573, "loss": 0.736, "step": 111130 }, { "epoch": 31.973532796317606, "grad_norm": 1.1553562879562378, "learning_rate": 0.001360529344073648, "loss": 0.7714, "step": 111140 }, { "epoch": 31.976409666283082, "grad_norm": 0.6519283652305603, "learning_rate": 0.0013604718066743385, "loss": 0.7514, "step": 111150 }, { "epoch": 31.979286536248562, "grad_norm": 1.2200658321380615, "learning_rate": 0.0013604142692750288, "loss": 0.5931, "step": 111160 }, { "epoch": 31.98216340621404, "grad_norm": 1.1480122804641724, "learning_rate": 0.0013603567318757194, "loss": 0.7644, "step": 111170 }, { "epoch": 31.985040276179518, "grad_norm": 1.2374906539916992, "learning_rate": 0.0013602991944764095, "loss": 0.6491, "step": 111180 }, { "epoch": 31.987917146144994, "grad_norm": 1.1161009073257446, "learning_rate": 0.0013602416570771, "loss": 0.6045, "step": 111190 }, { "epoch": 31.99079401611047, "grad_norm": 1.5318082571029663, "learning_rate": 0.0013601841196777906, "loss": 0.7626, "step": 111200 }, { "epoch": 31.99367088607595, "grad_norm": 2.528585195541382, "learning_rate": 0.001360126582278481, "loss": 0.6647, "step": 111210 }, { "epoch": 31.996547756041426, "grad_norm": 0.8009740710258484, "learning_rate": 0.0013600690448791716, "loss": 0.4988, "step": 111220 }, { "epoch": 31.999424626006906, "grad_norm": 1.1493070125579834, "learning_rate": 0.0013600115074798621, "loss": 0.6343, "step": 111230 }, { "epoch": 32.00230149597238, "grad_norm": 1.3500490188598633, "learning_rate": 0.0013599539700805522, "loss": 0.6212, "step": 111240 }, { "epoch": 32.00517836593786, "grad_norm": 0.870020866394043, "learning_rate": 0.0013598964326812428, "loss": 0.6122, "step": 111250 }, { "epoch": 32.008055235903335, "grad_norm": 0.870665967464447, "learning_rate": 0.0013598388952819334, "loss": 0.5855, "step": 111260 }, { "epoch": 32.01093210586882, "grad_norm": 0.714802622795105, "learning_rate": 0.0013597813578826237, "loss": 0.6988, "step": 111270 }, { "epoch": 32.013808975834294, "grad_norm": 1.8812755346298218, "learning_rate": 0.0013597238204833143, "loss": 0.6588, "step": 111280 }, { "epoch": 32.01668584579977, "grad_norm": 0.8984551429748535, "learning_rate": 0.0013596662830840046, "loss": 0.5999, "step": 111290 }, { "epoch": 32.01956271576525, "grad_norm": 1.424437165260315, "learning_rate": 0.001359608745684695, "loss": 0.6396, "step": 111300 }, { "epoch": 32.02243958573072, "grad_norm": 1.281581163406372, "learning_rate": 0.0013595512082853855, "loss": 0.5945, "step": 111310 }, { "epoch": 32.0253164556962, "grad_norm": 3.2661032676696777, "learning_rate": 0.0013594936708860759, "loss": 0.7194, "step": 111320 }, { "epoch": 32.02819332566168, "grad_norm": 1.1270982027053833, "learning_rate": 0.0013594361334867665, "loss": 0.5883, "step": 111330 }, { "epoch": 32.03107019562716, "grad_norm": 1.5757187604904175, "learning_rate": 0.001359378596087457, "loss": 0.7395, "step": 111340 }, { "epoch": 32.033947065592635, "grad_norm": 0.714415431022644, "learning_rate": 0.0013593210586881474, "loss": 0.5778, "step": 111350 }, { "epoch": 32.03682393555811, "grad_norm": 1.3934162855148315, "learning_rate": 0.0013592635212888377, "loss": 0.5906, "step": 111360 }, { "epoch": 32.03970080552359, "grad_norm": 1.0554792881011963, "learning_rate": 0.0013592059838895283, "loss": 0.5155, "step": 111370 }, { "epoch": 32.04257767548907, "grad_norm": 1.075174331665039, "learning_rate": 0.0013591484464902186, "loss": 0.687, "step": 111380 }, { "epoch": 32.04545454545455, "grad_norm": 0.9178965091705322, "learning_rate": 0.0013590909090909092, "loss": 0.7273, "step": 111390 }, { "epoch": 32.04833141542002, "grad_norm": 1.3003984689712524, "learning_rate": 0.0013590333716915995, "loss": 0.573, "step": 111400 }, { "epoch": 32.0512082853855, "grad_norm": 1.06514310836792, "learning_rate": 0.00135897583429229, "loss": 0.5496, "step": 111410 }, { "epoch": 32.054085155350975, "grad_norm": 2.1991519927978516, "learning_rate": 0.0013589182968929804, "loss": 0.645, "step": 111420 }, { "epoch": 32.05696202531646, "grad_norm": 1.3451870679855347, "learning_rate": 0.0013588607594936708, "loss": 0.6436, "step": 111430 }, { "epoch": 32.059838895281935, "grad_norm": 1.6670950651168823, "learning_rate": 0.0013588032220943614, "loss": 0.6067, "step": 111440 }, { "epoch": 32.06271576524741, "grad_norm": 1.3959784507751465, "learning_rate": 0.001358745684695052, "loss": 0.6021, "step": 111450 }, { "epoch": 32.06559263521289, "grad_norm": 0.9548773765563965, "learning_rate": 0.0013586881472957423, "loss": 0.5257, "step": 111460 }, { "epoch": 32.06846950517836, "grad_norm": 0.771824300289154, "learning_rate": 0.0013586306098964328, "loss": 0.6775, "step": 111470 }, { "epoch": 32.07134637514385, "grad_norm": 1.2989546060562134, "learning_rate": 0.0013585730724971232, "loss": 0.6993, "step": 111480 }, { "epoch": 32.07422324510932, "grad_norm": 0.7333385944366455, "learning_rate": 0.0013585155350978135, "loss": 0.5163, "step": 111490 }, { "epoch": 32.0771001150748, "grad_norm": 1.4210354089736938, "learning_rate": 0.001358457997698504, "loss": 0.6116, "step": 111500 }, { "epoch": 32.079976985040275, "grad_norm": 0.845403254032135, "learning_rate": 0.0013584004602991944, "loss": 0.5842, "step": 111510 }, { "epoch": 32.08285385500575, "grad_norm": 0.9622634053230286, "learning_rate": 0.001358342922899885, "loss": 0.6348, "step": 111520 }, { "epoch": 32.08573072497123, "grad_norm": 1.0887930393218994, "learning_rate": 0.0013582853855005753, "loss": 0.6315, "step": 111530 }, { "epoch": 32.08860759493671, "grad_norm": 0.8548122644424438, "learning_rate": 0.0013582278481012657, "loss": 0.6868, "step": 111540 }, { "epoch": 32.09148446490219, "grad_norm": 1.5828243494033813, "learning_rate": 0.0013581703107019563, "loss": 0.5483, "step": 111550 }, { "epoch": 32.09436133486766, "grad_norm": 1.4857691526412964, "learning_rate": 0.0013581127733026468, "loss": 0.6973, "step": 111560 }, { "epoch": 32.09723820483314, "grad_norm": 1.0529274940490723, "learning_rate": 0.0013580552359033372, "loss": 0.57, "step": 111570 }, { "epoch": 32.100115074798616, "grad_norm": 1.2782636880874634, "learning_rate": 0.0013579976985040277, "loss": 0.6738, "step": 111580 }, { "epoch": 32.1029919447641, "grad_norm": 1.8008170127868652, "learning_rate": 0.001357940161104718, "loss": 0.8569, "step": 111590 }, { "epoch": 32.105868814729575, "grad_norm": 1.3900196552276611, "learning_rate": 0.0013578826237054084, "loss": 0.6972, "step": 111600 }, { "epoch": 32.10874568469505, "grad_norm": 1.0878294706344604, "learning_rate": 0.001357825086306099, "loss": 0.5723, "step": 111610 }, { "epoch": 32.11162255466053, "grad_norm": 1.373019814491272, "learning_rate": 0.0013577675489067896, "loss": 0.6452, "step": 111620 }, { "epoch": 32.114499424626004, "grad_norm": 2.290553569793701, "learning_rate": 0.00135771001150748, "loss": 0.7963, "step": 111630 }, { "epoch": 32.11737629459149, "grad_norm": 1.4271929264068604, "learning_rate": 0.0013576524741081705, "loss": 0.5748, "step": 111640 }, { "epoch": 32.120253164556964, "grad_norm": 1.669589638710022, "learning_rate": 0.0013575949367088606, "loss": 0.6253, "step": 111650 }, { "epoch": 32.12313003452244, "grad_norm": 2.201998710632324, "learning_rate": 0.0013575373993095512, "loss": 0.4681, "step": 111660 }, { "epoch": 32.126006904487916, "grad_norm": 1.1644253730773926, "learning_rate": 0.0013574798619102417, "loss": 0.575, "step": 111670 }, { "epoch": 32.12888377445339, "grad_norm": 1.3510847091674805, "learning_rate": 0.001357422324510932, "loss": 0.6679, "step": 111680 }, { "epoch": 32.131760644418875, "grad_norm": 0.8705337047576904, "learning_rate": 0.0013573647871116226, "loss": 0.7793, "step": 111690 }, { "epoch": 32.13463751438435, "grad_norm": 1.4308340549468994, "learning_rate": 0.0013573072497123132, "loss": 0.5673, "step": 111700 }, { "epoch": 32.13751438434983, "grad_norm": 1.0292482376098633, "learning_rate": 0.0013572497123130033, "loss": 0.6975, "step": 111710 }, { "epoch": 32.140391254315304, "grad_norm": 2.143857002258301, "learning_rate": 0.001357192174913694, "loss": 0.6551, "step": 111720 }, { "epoch": 32.14326812428078, "grad_norm": 0.575312614440918, "learning_rate": 0.0013571346375143845, "loss": 0.5801, "step": 111730 }, { "epoch": 32.146144994246264, "grad_norm": 1.3646273612976074, "learning_rate": 0.0013570771001150748, "loss": 0.5542, "step": 111740 }, { "epoch": 32.14902186421174, "grad_norm": 0.8520287871360779, "learning_rate": 0.0013570195627157654, "loss": 0.6177, "step": 111750 }, { "epoch": 32.151898734177216, "grad_norm": 1.9297456741333008, "learning_rate": 0.0013569620253164557, "loss": 0.5326, "step": 111760 }, { "epoch": 32.15477560414269, "grad_norm": 0.7452853918075562, "learning_rate": 0.001356904487917146, "loss": 0.5186, "step": 111770 }, { "epoch": 32.15765247410817, "grad_norm": 1.6608643531799316, "learning_rate": 0.0013568469505178366, "loss": 0.6108, "step": 111780 }, { "epoch": 32.160529344073645, "grad_norm": 1.3859163522720337, "learning_rate": 0.001356789413118527, "loss": 0.6137, "step": 111790 }, { "epoch": 32.16340621403913, "grad_norm": 1.0675503015518188, "learning_rate": 0.0013567318757192175, "loss": 0.6274, "step": 111800 }, { "epoch": 32.166283084004604, "grad_norm": 0.7658320665359497, "learning_rate": 0.001356674338319908, "loss": 0.6187, "step": 111810 }, { "epoch": 32.16915995397008, "grad_norm": 1.2175624370574951, "learning_rate": 0.0013566168009205984, "loss": 0.7219, "step": 111820 }, { "epoch": 32.17203682393556, "grad_norm": 1.6424325704574585, "learning_rate": 0.0013565592635212888, "loss": 0.5544, "step": 111830 }, { "epoch": 32.17491369390103, "grad_norm": 0.5966711044311523, "learning_rate": 0.0013565017261219794, "loss": 0.4401, "step": 111840 }, { "epoch": 32.177790563866516, "grad_norm": 1.0649250745773315, "learning_rate": 0.0013564441887226697, "loss": 0.6155, "step": 111850 }, { "epoch": 32.18066743383199, "grad_norm": 1.4546526670455933, "learning_rate": 0.0013563866513233603, "loss": 0.6823, "step": 111860 }, { "epoch": 32.18354430379747, "grad_norm": 1.043031096458435, "learning_rate": 0.0013563291139240506, "loss": 0.6911, "step": 111870 }, { "epoch": 32.186421173762945, "grad_norm": 1.6645711660385132, "learning_rate": 0.0013562715765247412, "loss": 0.6593, "step": 111880 }, { "epoch": 32.18929804372842, "grad_norm": 1.3738666772842407, "learning_rate": 0.0013562140391254315, "loss": 0.6785, "step": 111890 }, { "epoch": 32.192174913693904, "grad_norm": 1.249651551246643, "learning_rate": 0.0013561565017261219, "loss": 0.7723, "step": 111900 }, { "epoch": 32.19505178365938, "grad_norm": 0.8381174206733704, "learning_rate": 0.0013560989643268124, "loss": 0.5362, "step": 111910 }, { "epoch": 32.19792865362486, "grad_norm": 0.9019412994384766, "learning_rate": 0.001356041426927503, "loss": 0.5074, "step": 111920 }, { "epoch": 32.20080552359033, "grad_norm": 1.3803341388702393, "learning_rate": 0.0013559838895281933, "loss": 0.7306, "step": 111930 }, { "epoch": 32.20368239355581, "grad_norm": 1.2683180570602417, "learning_rate": 0.001355926352128884, "loss": 0.5941, "step": 111940 }, { "epoch": 32.20655926352129, "grad_norm": 2.171797513961792, "learning_rate": 0.0013558688147295743, "loss": 0.6204, "step": 111950 }, { "epoch": 32.20943613348677, "grad_norm": 1.6628990173339844, "learning_rate": 0.0013558112773302646, "loss": 0.6851, "step": 111960 }, { "epoch": 32.212313003452245, "grad_norm": 0.8522893786430359, "learning_rate": 0.0013557537399309552, "loss": 0.6307, "step": 111970 }, { "epoch": 32.21518987341772, "grad_norm": 0.874367892742157, "learning_rate": 0.0013556962025316455, "loss": 0.8243, "step": 111980 }, { "epoch": 32.2180667433832, "grad_norm": 2.6627843379974365, "learning_rate": 0.001355638665132336, "loss": 0.6815, "step": 111990 }, { "epoch": 32.22094361334867, "grad_norm": 1.2676883935928345, "learning_rate": 0.0013555811277330266, "loss": 0.7164, "step": 112000 }, { "epoch": 32.22382048331416, "grad_norm": 1.8983994722366333, "learning_rate": 0.0013555235903337168, "loss": 0.5956, "step": 112010 }, { "epoch": 32.22669735327963, "grad_norm": 1.2768489122390747, "learning_rate": 0.0013554660529344073, "loss": 0.6501, "step": 112020 }, { "epoch": 32.22957422324511, "grad_norm": 1.6005762815475464, "learning_rate": 0.001355408515535098, "loss": 0.5053, "step": 112030 }, { "epoch": 32.232451093210585, "grad_norm": 1.9799449443817139, "learning_rate": 0.0013553509781357883, "loss": 0.7222, "step": 112040 }, { "epoch": 32.23532796317606, "grad_norm": 1.085402488708496, "learning_rate": 0.0013552934407364788, "loss": 0.8327, "step": 112050 }, { "epoch": 32.238204833141545, "grad_norm": 0.8394299745559692, "learning_rate": 0.0013552359033371694, "loss": 0.7982, "step": 112060 }, { "epoch": 32.24108170310702, "grad_norm": 0.8966269493103027, "learning_rate": 0.0013551783659378595, "loss": 0.7375, "step": 112070 }, { "epoch": 32.2439585730725, "grad_norm": 0.909720242023468, "learning_rate": 0.00135512082853855, "loss": 0.6398, "step": 112080 }, { "epoch": 32.24683544303797, "grad_norm": 1.9658435583114624, "learning_rate": 0.0013550632911392404, "loss": 0.6329, "step": 112090 }, { "epoch": 32.24971231300345, "grad_norm": 1.3213777542114258, "learning_rate": 0.001355005753739931, "loss": 0.6118, "step": 112100 }, { "epoch": 32.25258918296893, "grad_norm": 1.2182732820510864, "learning_rate": 0.0013549482163406215, "loss": 0.5642, "step": 112110 }, { "epoch": 32.25546605293441, "grad_norm": 1.9393541812896729, "learning_rate": 0.001354890678941312, "loss": 0.5605, "step": 112120 }, { "epoch": 32.258342922899885, "grad_norm": 1.0096102952957153, "learning_rate": 0.0013548331415420022, "loss": 0.6814, "step": 112130 }, { "epoch": 32.26121979286536, "grad_norm": 1.7059611082077026, "learning_rate": 0.0013547756041426928, "loss": 0.6735, "step": 112140 }, { "epoch": 32.26409666283084, "grad_norm": 1.4876588582992554, "learning_rate": 0.0013547180667433832, "loss": 0.6898, "step": 112150 }, { "epoch": 32.26697353279632, "grad_norm": 1.2464535236358643, "learning_rate": 0.0013546605293440737, "loss": 0.5705, "step": 112160 }, { "epoch": 32.2698504027618, "grad_norm": 1.871908187866211, "learning_rate": 0.0013546029919447643, "loss": 0.6839, "step": 112170 }, { "epoch": 32.27272727272727, "grad_norm": 1.1645492315292358, "learning_rate": 0.0013545454545454546, "loss": 0.6556, "step": 112180 }, { "epoch": 32.27560414269275, "grad_norm": 1.0328197479248047, "learning_rate": 0.001354487917146145, "loss": 0.6237, "step": 112190 }, { "epoch": 32.278481012658226, "grad_norm": 1.1974542140960693, "learning_rate": 0.0013544303797468353, "loss": 0.6658, "step": 112200 }, { "epoch": 32.2813578826237, "grad_norm": 1.7868313789367676, "learning_rate": 0.0013543728423475259, "loss": 0.6945, "step": 112210 }, { "epoch": 32.284234752589185, "grad_norm": 1.0878478288650513, "learning_rate": 0.0013543153049482165, "loss": 0.6894, "step": 112220 }, { "epoch": 32.28711162255466, "grad_norm": 1.439982533454895, "learning_rate": 0.0013542577675489068, "loss": 0.7471, "step": 112230 }, { "epoch": 32.28998849252014, "grad_norm": 0.8501585721969604, "learning_rate": 0.0013542002301495974, "loss": 0.5661, "step": 112240 }, { "epoch": 32.292865362485614, "grad_norm": 2.1915135383605957, "learning_rate": 0.0013541426927502877, "loss": 0.6261, "step": 112250 }, { "epoch": 32.29574223245109, "grad_norm": 0.8683331608772278, "learning_rate": 0.001354085155350978, "loss": 0.527, "step": 112260 }, { "epoch": 32.29861910241657, "grad_norm": 1.1639373302459717, "learning_rate": 0.0013540276179516686, "loss": 0.7846, "step": 112270 }, { "epoch": 32.30149597238205, "grad_norm": 1.3647959232330322, "learning_rate": 0.0013539700805523592, "loss": 0.5909, "step": 112280 }, { "epoch": 32.304372842347526, "grad_norm": 1.1014050245285034, "learning_rate": 0.0013539125431530495, "loss": 0.6223, "step": 112290 }, { "epoch": 32.307249712313, "grad_norm": 1.6498827934265137, "learning_rate": 0.00135385500575374, "loss": 0.7197, "step": 112300 }, { "epoch": 32.31012658227848, "grad_norm": 0.9567673802375793, "learning_rate": 0.0013537974683544304, "loss": 0.5766, "step": 112310 }, { "epoch": 32.31300345224396, "grad_norm": 1.1515138149261475, "learning_rate": 0.0013537399309551208, "loss": 0.7883, "step": 112320 }, { "epoch": 32.31588032220944, "grad_norm": 0.922888994216919, "learning_rate": 0.0013536823935558114, "loss": 0.6843, "step": 112330 }, { "epoch": 32.318757192174914, "grad_norm": 0.8889279365539551, "learning_rate": 0.0013536248561565017, "loss": 0.5628, "step": 112340 }, { "epoch": 32.32163406214039, "grad_norm": 1.4017674922943115, "learning_rate": 0.0013535673187571923, "loss": 0.6648, "step": 112350 }, { "epoch": 32.324510932105866, "grad_norm": 0.782816469669342, "learning_rate": 0.0013535097813578826, "loss": 0.688, "step": 112360 }, { "epoch": 32.32738780207135, "grad_norm": 1.2286072969436646, "learning_rate": 0.001353452243958573, "loss": 0.5581, "step": 112370 }, { "epoch": 32.330264672036826, "grad_norm": 1.0524744987487793, "learning_rate": 0.0013533947065592635, "loss": 0.6683, "step": 112380 }, { "epoch": 32.3331415420023, "grad_norm": 0.872179388999939, "learning_rate": 0.001353337169159954, "loss": 0.6554, "step": 112390 }, { "epoch": 32.33601841196778, "grad_norm": 1.6239858865737915, "learning_rate": 0.0013532796317606444, "loss": 0.6732, "step": 112400 }, { "epoch": 32.338895281933254, "grad_norm": 1.9442687034606934, "learning_rate": 0.001353222094361335, "loss": 0.6391, "step": 112410 }, { "epoch": 32.34177215189873, "grad_norm": 1.4495998620986938, "learning_rate": 0.0013531645569620253, "loss": 0.6865, "step": 112420 }, { "epoch": 32.344649021864214, "grad_norm": 0.9779070615768433, "learning_rate": 0.0013531070195627157, "loss": 0.7168, "step": 112430 }, { "epoch": 32.34752589182969, "grad_norm": 0.9603230357170105, "learning_rate": 0.0013530494821634063, "loss": 0.5609, "step": 112440 }, { "epoch": 32.350402761795166, "grad_norm": 1.0745306015014648, "learning_rate": 0.0013529919447640966, "loss": 0.6228, "step": 112450 }, { "epoch": 32.35327963176064, "grad_norm": 0.7446808218955994, "learning_rate": 0.0013529344073647872, "loss": 0.6192, "step": 112460 }, { "epoch": 32.35615650172612, "grad_norm": 2.373727560043335, "learning_rate": 0.0013528768699654777, "loss": 0.5705, "step": 112470 }, { "epoch": 32.3590333716916, "grad_norm": 1.5456408262252808, "learning_rate": 0.0013528193325661679, "loss": 0.5657, "step": 112480 }, { "epoch": 32.36191024165708, "grad_norm": 2.13202166557312, "learning_rate": 0.0013527617951668584, "loss": 0.5831, "step": 112490 }, { "epoch": 32.364787111622555, "grad_norm": 1.8747367858886719, "learning_rate": 0.001352704257767549, "loss": 0.7627, "step": 112500 }, { "epoch": 32.36766398158803, "grad_norm": 0.7473626136779785, "learning_rate": 0.0013526467203682393, "loss": 0.7316, "step": 112510 }, { "epoch": 32.37054085155351, "grad_norm": 1.0292595624923706, "learning_rate": 0.00135258918296893, "loss": 0.72, "step": 112520 }, { "epoch": 32.37341772151899, "grad_norm": 1.1342127323150635, "learning_rate": 0.0013525316455696205, "loss": 0.6133, "step": 112530 }, { "epoch": 32.376294591484466, "grad_norm": 1.6910693645477295, "learning_rate": 0.0013524741081703106, "loss": 0.6118, "step": 112540 }, { "epoch": 32.37917146144994, "grad_norm": 1.4589587450027466, "learning_rate": 0.0013524165707710012, "loss": 0.6799, "step": 112550 }, { "epoch": 32.38204833141542, "grad_norm": 1.6590015888214111, "learning_rate": 0.0013523590333716915, "loss": 0.6264, "step": 112560 }, { "epoch": 32.384925201380895, "grad_norm": 1.2269126176834106, "learning_rate": 0.001352301495972382, "loss": 0.5603, "step": 112570 }, { "epoch": 32.38780207134638, "grad_norm": 0.7740179896354675, "learning_rate": 0.0013522439585730726, "loss": 0.4829, "step": 112580 }, { "epoch": 32.390678941311855, "grad_norm": 1.208936095237732, "learning_rate": 0.001352186421173763, "loss": 0.5582, "step": 112590 }, { "epoch": 32.39355581127733, "grad_norm": 1.0102579593658447, "learning_rate": 0.0013521288837744533, "loss": 0.6134, "step": 112600 }, { "epoch": 32.39643268124281, "grad_norm": 0.7822890877723694, "learning_rate": 0.0013520713463751439, "loss": 0.5319, "step": 112610 }, { "epoch": 32.39930955120828, "grad_norm": 0.8713284730911255, "learning_rate": 0.0013520138089758342, "loss": 0.7529, "step": 112620 }, { "epoch": 32.40218642117377, "grad_norm": 0.8689504265785217, "learning_rate": 0.0013519562715765248, "loss": 0.6962, "step": 112630 }, { "epoch": 32.40506329113924, "grad_norm": 1.0274735689163208, "learning_rate": 0.0013518987341772154, "loss": 0.6807, "step": 112640 }, { "epoch": 32.40794016110472, "grad_norm": 0.7330124378204346, "learning_rate": 0.0013518411967779057, "loss": 0.6779, "step": 112650 }, { "epoch": 32.410817031070195, "grad_norm": 0.8658022284507751, "learning_rate": 0.001351783659378596, "loss": 0.6796, "step": 112660 }, { "epoch": 32.41369390103567, "grad_norm": 1.0180284976959229, "learning_rate": 0.0013517261219792864, "loss": 0.5394, "step": 112670 }, { "epoch": 32.41657077100115, "grad_norm": 2.0905539989471436, "learning_rate": 0.001351668584579977, "loss": 0.7625, "step": 112680 }, { "epoch": 32.41944764096663, "grad_norm": 1.5758981704711914, "learning_rate": 0.0013516110471806675, "loss": 0.8307, "step": 112690 }, { "epoch": 32.42232451093211, "grad_norm": 1.1610926389694214, "learning_rate": 0.0013515535097813579, "loss": 0.681, "step": 112700 }, { "epoch": 32.42520138089758, "grad_norm": 1.1668226718902588, "learning_rate": 0.0013514959723820484, "loss": 0.6809, "step": 112710 }, { "epoch": 32.42807825086306, "grad_norm": 1.9094418287277222, "learning_rate": 0.0013514384349827388, "loss": 0.6504, "step": 112720 }, { "epoch": 32.430955120828536, "grad_norm": 1.6623612642288208, "learning_rate": 0.0013513808975834291, "loss": 0.6617, "step": 112730 }, { "epoch": 32.43383199079402, "grad_norm": 0.6746872067451477, "learning_rate": 0.0013513233601841197, "loss": 0.6483, "step": 112740 }, { "epoch": 32.436708860759495, "grad_norm": 1.2749963998794556, "learning_rate": 0.0013512658227848103, "loss": 0.6403, "step": 112750 }, { "epoch": 32.43958573072497, "grad_norm": 0.7541317939758301, "learning_rate": 0.0013512082853855006, "loss": 0.6887, "step": 112760 }, { "epoch": 32.44246260069045, "grad_norm": 1.0786505937576294, "learning_rate": 0.0013511507479861912, "loss": 0.7906, "step": 112770 }, { "epoch": 32.445339470655924, "grad_norm": 1.1140042543411255, "learning_rate": 0.0013510932105868813, "loss": 0.6144, "step": 112780 }, { "epoch": 32.44821634062141, "grad_norm": 1.2100273370742798, "learning_rate": 0.0013510356731875719, "loss": 0.6485, "step": 112790 }, { "epoch": 32.45109321058688, "grad_norm": 1.7919628620147705, "learning_rate": 0.0013509781357882624, "loss": 0.5675, "step": 112800 }, { "epoch": 32.45397008055236, "grad_norm": 1.172179937362671, "learning_rate": 0.0013509205983889528, "loss": 0.6306, "step": 112810 }, { "epoch": 32.456846950517836, "grad_norm": 1.7872803211212158, "learning_rate": 0.0013508630609896433, "loss": 0.7094, "step": 112820 }, { "epoch": 32.45972382048331, "grad_norm": 1.5688481330871582, "learning_rate": 0.001350805523590334, "loss": 0.8473, "step": 112830 }, { "epoch": 32.462600690448795, "grad_norm": 1.0610876083374023, "learning_rate": 0.001350747986191024, "loss": 0.5231, "step": 112840 }, { "epoch": 32.46547756041427, "grad_norm": 1.7160109281539917, "learning_rate": 0.0013506904487917146, "loss": 0.5464, "step": 112850 }, { "epoch": 32.46835443037975, "grad_norm": 1.3049763441085815, "learning_rate": 0.0013506329113924052, "loss": 0.8023, "step": 112860 }, { "epoch": 32.471231300345224, "grad_norm": 1.570935606956482, "learning_rate": 0.0013505753739930955, "loss": 0.6063, "step": 112870 }, { "epoch": 32.4741081703107, "grad_norm": 1.9441108703613281, "learning_rate": 0.001350517836593786, "loss": 0.6614, "step": 112880 }, { "epoch": 32.476985040276176, "grad_norm": 1.811238169670105, "learning_rate": 0.0013504602991944764, "loss": 0.7565, "step": 112890 }, { "epoch": 32.47986191024166, "grad_norm": 1.0199979543685913, "learning_rate": 0.0013504027617951668, "loss": 0.6975, "step": 112900 }, { "epoch": 32.482738780207136, "grad_norm": 1.4697425365447998, "learning_rate": 0.0013503452243958573, "loss": 0.5898, "step": 112910 }, { "epoch": 32.48561565017261, "grad_norm": 1.9566292762756348, "learning_rate": 0.0013502876869965477, "loss": 0.7353, "step": 112920 }, { "epoch": 32.48849252013809, "grad_norm": 0.7542455196380615, "learning_rate": 0.0013502301495972382, "loss": 0.5927, "step": 112930 }, { "epoch": 32.491369390103564, "grad_norm": 1.1928691864013672, "learning_rate": 0.0013501726121979288, "loss": 0.6779, "step": 112940 }, { "epoch": 32.49424626006905, "grad_norm": 2.599864959716797, "learning_rate": 0.0013501150747986192, "loss": 0.5612, "step": 112950 }, { "epoch": 32.497123130034524, "grad_norm": 1.4349632263183594, "learning_rate": 0.0013500575373993095, "loss": 0.6338, "step": 112960 }, { "epoch": 32.5, "grad_norm": 1.4023847579956055, "learning_rate": 0.00135, "loss": 0.6853, "step": 112970 }, { "epoch": 32.502876869965476, "grad_norm": 1.3516830205917358, "learning_rate": 0.0013499424626006904, "loss": 0.5292, "step": 112980 }, { "epoch": 32.50575373993095, "grad_norm": 1.7590454816818237, "learning_rate": 0.001349884925201381, "loss": 0.7287, "step": 112990 }, { "epoch": 32.508630609896436, "grad_norm": 1.2307108640670776, "learning_rate": 0.0013498273878020715, "loss": 0.7602, "step": 113000 }, { "epoch": 32.51150747986191, "grad_norm": 0.8045552968978882, "learning_rate": 0.001349769850402762, "loss": 0.6208, "step": 113010 }, { "epoch": 32.51438434982739, "grad_norm": 1.6682976484298706, "learning_rate": 0.0013497123130034522, "loss": 0.5929, "step": 113020 }, { "epoch": 32.517261219792864, "grad_norm": 0.8348795771598816, "learning_rate": 0.0013496547756041426, "loss": 0.6391, "step": 113030 }, { "epoch": 32.52013808975834, "grad_norm": 1.2452791929244995, "learning_rate": 0.0013495972382048332, "loss": 0.6314, "step": 113040 }, { "epoch": 32.523014959723824, "grad_norm": 1.0087194442749023, "learning_rate": 0.0013495397008055237, "loss": 0.638, "step": 113050 }, { "epoch": 32.5258918296893, "grad_norm": 1.7342870235443115, "learning_rate": 0.001349482163406214, "loss": 0.6695, "step": 113060 }, { "epoch": 32.528768699654776, "grad_norm": 2.8277876377105713, "learning_rate": 0.0013494246260069046, "loss": 0.6843, "step": 113070 }, { "epoch": 32.53164556962025, "grad_norm": 1.868717074394226, "learning_rate": 0.001349367088607595, "loss": 0.6521, "step": 113080 }, { "epoch": 32.53452243958573, "grad_norm": 1.1549994945526123, "learning_rate": 0.0013493095512082853, "loss": 0.788, "step": 113090 }, { "epoch": 32.537399309551205, "grad_norm": 1.0850975513458252, "learning_rate": 0.0013492520138089759, "loss": 0.7332, "step": 113100 }, { "epoch": 32.54027617951669, "grad_norm": 1.245389461517334, "learning_rate": 0.0013491944764096664, "loss": 0.5644, "step": 113110 }, { "epoch": 32.543153049482164, "grad_norm": 1.2887033224105835, "learning_rate": 0.0013491369390103568, "loss": 0.6216, "step": 113120 }, { "epoch": 32.54602991944764, "grad_norm": 1.5750610828399658, "learning_rate": 0.0013490794016110474, "loss": 0.7511, "step": 113130 }, { "epoch": 32.54890678941312, "grad_norm": 1.3650310039520264, "learning_rate": 0.0013490218642117375, "loss": 0.5533, "step": 113140 }, { "epoch": 32.55178365937859, "grad_norm": 0.8145451545715332, "learning_rate": 0.001348964326812428, "loss": 0.5604, "step": 113150 }, { "epoch": 32.554660529344076, "grad_norm": 0.9425777792930603, "learning_rate": 0.0013489067894131186, "loss": 0.8064, "step": 113160 }, { "epoch": 32.55753739930955, "grad_norm": 0.8732068538665771, "learning_rate": 0.001348849252013809, "loss": 0.6916, "step": 113170 }, { "epoch": 32.56041426927503, "grad_norm": 0.8031399250030518, "learning_rate": 0.0013487917146144995, "loss": 0.5656, "step": 113180 }, { "epoch": 32.563291139240505, "grad_norm": 2.1528706550598145, "learning_rate": 0.0013487341772151899, "loss": 0.6368, "step": 113190 }, { "epoch": 32.56616800920598, "grad_norm": 1.8130879402160645, "learning_rate": 0.0013486766398158802, "loss": 0.7086, "step": 113200 }, { "epoch": 32.569044879171464, "grad_norm": 1.3495854139328003, "learning_rate": 0.0013486191024165708, "loss": 0.6157, "step": 113210 }, { "epoch": 32.57192174913694, "grad_norm": 1.1693780422210693, "learning_rate": 0.0013485615650172614, "loss": 0.6141, "step": 113220 }, { "epoch": 32.57479861910242, "grad_norm": 0.7001094818115234, "learning_rate": 0.0013485040276179517, "loss": 0.5784, "step": 113230 }, { "epoch": 32.57767548906789, "grad_norm": 1.234342098236084, "learning_rate": 0.0013484464902186423, "loss": 0.4847, "step": 113240 }, { "epoch": 32.58055235903337, "grad_norm": 1.1022531986236572, "learning_rate": 0.0013483889528193324, "loss": 0.7226, "step": 113250 }, { "epoch": 32.58342922899885, "grad_norm": 1.4165185689926147, "learning_rate": 0.001348331415420023, "loss": 0.5946, "step": 113260 }, { "epoch": 32.58630609896433, "grad_norm": 2.066922426223755, "learning_rate": 0.0013482738780207135, "loss": 0.7919, "step": 113270 }, { "epoch": 32.589182968929805, "grad_norm": 0.8779420852661133, "learning_rate": 0.0013482163406214039, "loss": 0.6497, "step": 113280 }, { "epoch": 32.59205983889528, "grad_norm": 0.6572971343994141, "learning_rate": 0.0013481588032220944, "loss": 0.6059, "step": 113290 }, { "epoch": 32.59493670886076, "grad_norm": 0.9442328214645386, "learning_rate": 0.001348101265822785, "loss": 0.4658, "step": 113300 }, { "epoch": 32.59781357882623, "grad_norm": 1.2553797960281372, "learning_rate": 0.0013480437284234751, "loss": 0.6534, "step": 113310 }, { "epoch": 32.60069044879172, "grad_norm": 0.8523808121681213, "learning_rate": 0.0013479861910241657, "loss": 0.6361, "step": 113320 }, { "epoch": 32.60356731875719, "grad_norm": 1.1336809396743774, "learning_rate": 0.0013479286536248563, "loss": 0.6723, "step": 113330 }, { "epoch": 32.60644418872267, "grad_norm": 2.042717456817627, "learning_rate": 0.0013478711162255466, "loss": 0.6719, "step": 113340 }, { "epoch": 32.609321058688145, "grad_norm": 1.177140712738037, "learning_rate": 0.0013478135788262372, "loss": 0.648, "step": 113350 }, { "epoch": 32.61219792865362, "grad_norm": 1.4853057861328125, "learning_rate": 0.0013477560414269275, "loss": 0.6785, "step": 113360 }, { "epoch": 32.615074798619105, "grad_norm": 0.8318729400634766, "learning_rate": 0.0013476985040276179, "loss": 0.6004, "step": 113370 }, { "epoch": 32.61795166858458, "grad_norm": 1.2126740217208862, "learning_rate": 0.0013476409666283084, "loss": 0.6067, "step": 113380 }, { "epoch": 32.62082853855006, "grad_norm": 1.0326595306396484, "learning_rate": 0.0013475834292289988, "loss": 0.6926, "step": 113390 }, { "epoch": 32.623705408515534, "grad_norm": 1.293726921081543, "learning_rate": 0.0013475258918296893, "loss": 0.8501, "step": 113400 }, { "epoch": 32.62658227848101, "grad_norm": 0.9560134410858154, "learning_rate": 0.00134746835443038, "loss": 0.6384, "step": 113410 }, { "epoch": 32.62945914844649, "grad_norm": 0.7792637348175049, "learning_rate": 0.0013474108170310702, "loss": 0.6018, "step": 113420 }, { "epoch": 32.63233601841197, "grad_norm": 1.6028937101364136, "learning_rate": 0.0013473532796317606, "loss": 0.7363, "step": 113430 }, { "epoch": 32.635212888377445, "grad_norm": 1.5715960264205933, "learning_rate": 0.0013472957422324512, "loss": 0.7073, "step": 113440 }, { "epoch": 32.63808975834292, "grad_norm": 1.1386090517044067, "learning_rate": 0.0013472382048331415, "loss": 0.8087, "step": 113450 }, { "epoch": 32.6409666283084, "grad_norm": 0.8785519599914551, "learning_rate": 0.001347180667433832, "loss": 0.6413, "step": 113460 }, { "epoch": 32.64384349827388, "grad_norm": 1.7689440250396729, "learning_rate": 0.0013471231300345224, "loss": 0.8279, "step": 113470 }, { "epoch": 32.64672036823936, "grad_norm": 0.9397186040878296, "learning_rate": 0.001347065592635213, "loss": 0.7203, "step": 113480 }, { "epoch": 32.649597238204834, "grad_norm": 1.6934350728988647, "learning_rate": 0.0013470080552359033, "loss": 0.5978, "step": 113490 }, { "epoch": 32.65247410817031, "grad_norm": 1.438231110572815, "learning_rate": 0.0013469505178365937, "loss": 0.8187, "step": 113500 }, { "epoch": 32.655350978135786, "grad_norm": 1.5676729679107666, "learning_rate": 0.0013468929804372842, "loss": 0.6272, "step": 113510 }, { "epoch": 32.65822784810126, "grad_norm": 1.0376039743423462, "learning_rate": 0.0013468354430379748, "loss": 0.6537, "step": 113520 }, { "epoch": 32.661104718066746, "grad_norm": 0.9923722743988037, "learning_rate": 0.0013467779056386651, "loss": 0.5893, "step": 113530 }, { "epoch": 32.66398158803222, "grad_norm": 1.9778144359588623, "learning_rate": 0.0013467203682393557, "loss": 0.7722, "step": 113540 }, { "epoch": 32.6668584579977, "grad_norm": 1.0821611881256104, "learning_rate": 0.001346662830840046, "loss": 0.7445, "step": 113550 }, { "epoch": 32.669735327963174, "grad_norm": 0.986308753490448, "learning_rate": 0.0013466052934407364, "loss": 0.6195, "step": 113560 }, { "epoch": 32.67261219792865, "grad_norm": 2.171135187149048, "learning_rate": 0.001346547756041427, "loss": 0.6203, "step": 113570 }, { "epoch": 32.675489067894134, "grad_norm": 1.1353824138641357, "learning_rate": 0.0013464902186421175, "loss": 0.6291, "step": 113580 }, { "epoch": 32.67836593785961, "grad_norm": 2.0609052181243896, "learning_rate": 0.0013464326812428079, "loss": 0.6262, "step": 113590 }, { "epoch": 32.681242807825086, "grad_norm": 0.951546311378479, "learning_rate": 0.0013463751438434984, "loss": 0.7248, "step": 113600 }, { "epoch": 32.68411967779056, "grad_norm": 1.0362857580184937, "learning_rate": 0.0013463176064441886, "loss": 0.7302, "step": 113610 }, { "epoch": 32.68699654775604, "grad_norm": 1.96060049533844, "learning_rate": 0.0013462600690448791, "loss": 0.706, "step": 113620 }, { "epoch": 32.68987341772152, "grad_norm": 0.9629406929016113, "learning_rate": 0.0013462025316455697, "loss": 0.5519, "step": 113630 }, { "epoch": 32.692750287687, "grad_norm": 2.7292487621307373, "learning_rate": 0.00134614499424626, "loss": 0.7383, "step": 113640 }, { "epoch": 32.695627157652474, "grad_norm": 1.666227102279663, "learning_rate": 0.0013460874568469506, "loss": 0.6649, "step": 113650 }, { "epoch": 32.69850402761795, "grad_norm": 1.135594367980957, "learning_rate": 0.0013460299194476412, "loss": 0.5674, "step": 113660 }, { "epoch": 32.70138089758343, "grad_norm": 1.0126912593841553, "learning_rate": 0.0013459723820483313, "loss": 0.7168, "step": 113670 }, { "epoch": 32.70425776754891, "grad_norm": 0.7621949315071106, "learning_rate": 0.0013459148446490219, "loss": 0.5174, "step": 113680 }, { "epoch": 32.707134637514386, "grad_norm": 1.179098129272461, "learning_rate": 0.0013458573072497124, "loss": 0.5846, "step": 113690 }, { "epoch": 32.71001150747986, "grad_norm": 1.5024415254592896, "learning_rate": 0.0013457997698504028, "loss": 0.628, "step": 113700 }, { "epoch": 32.71288837744534, "grad_norm": 1.3852187395095825, "learning_rate": 0.0013457422324510933, "loss": 0.5741, "step": 113710 }, { "epoch": 32.715765247410815, "grad_norm": 1.1439647674560547, "learning_rate": 0.0013456846950517837, "loss": 0.5817, "step": 113720 }, { "epoch": 32.7186421173763, "grad_norm": 2.4725441932678223, "learning_rate": 0.001345627157652474, "loss": 0.5864, "step": 113730 }, { "epoch": 32.721518987341774, "grad_norm": 1.644547462463379, "learning_rate": 0.0013455696202531646, "loss": 0.861, "step": 113740 }, { "epoch": 32.72439585730725, "grad_norm": 1.3313100337982178, "learning_rate": 0.001345512082853855, "loss": 0.6291, "step": 113750 }, { "epoch": 32.72727272727273, "grad_norm": 1.0322011709213257, "learning_rate": 0.0013454545454545455, "loss": 0.6926, "step": 113760 }, { "epoch": 32.7301495972382, "grad_norm": 1.759541392326355, "learning_rate": 0.001345397008055236, "loss": 0.5998, "step": 113770 }, { "epoch": 32.73302646720368, "grad_norm": 2.543868064880371, "learning_rate": 0.0013453394706559264, "loss": 0.6686, "step": 113780 }, { "epoch": 32.73590333716916, "grad_norm": 0.8311344981193542, "learning_rate": 0.0013452819332566168, "loss": 0.7008, "step": 113790 }, { "epoch": 32.73878020713464, "grad_norm": 0.7307848930358887, "learning_rate": 0.0013452243958573073, "loss": 0.5833, "step": 113800 }, { "epoch": 32.741657077100115, "grad_norm": 1.3035149574279785, "learning_rate": 0.0013451668584579977, "loss": 0.5373, "step": 113810 }, { "epoch": 32.74453394706559, "grad_norm": 1.648824691772461, "learning_rate": 0.0013451093210586882, "loss": 0.6335, "step": 113820 }, { "epoch": 32.74741081703107, "grad_norm": 1.7676873207092285, "learning_rate": 0.0013450517836593786, "loss": 0.8308, "step": 113830 }, { "epoch": 32.75028768699655, "grad_norm": 1.163859248161316, "learning_rate": 0.0013449942462600692, "loss": 0.5937, "step": 113840 }, { "epoch": 32.75316455696203, "grad_norm": 0.9183908700942993, "learning_rate": 0.0013449367088607595, "loss": 0.6201, "step": 113850 }, { "epoch": 32.7560414269275, "grad_norm": 1.6940364837646484, "learning_rate": 0.0013448791714614499, "loss": 0.6279, "step": 113860 }, { "epoch": 32.75891829689298, "grad_norm": 1.8509026765823364, "learning_rate": 0.0013448216340621404, "loss": 0.6999, "step": 113870 }, { "epoch": 32.761795166858455, "grad_norm": 1.078341007232666, "learning_rate": 0.001344764096662831, "loss": 0.7658, "step": 113880 }, { "epoch": 32.76467203682394, "grad_norm": 1.6670950651168823, "learning_rate": 0.0013447065592635213, "loss": 0.6078, "step": 113890 }, { "epoch": 32.767548906789415, "grad_norm": 1.400792121887207, "learning_rate": 0.0013446490218642119, "loss": 0.6415, "step": 113900 }, { "epoch": 32.77042577675489, "grad_norm": 0.9690033793449402, "learning_rate": 0.0013445914844649022, "loss": 0.7124, "step": 113910 }, { "epoch": 32.77330264672037, "grad_norm": 0.8863778710365295, "learning_rate": 0.0013445339470655926, "loss": 0.6092, "step": 113920 }, { "epoch": 32.77617951668584, "grad_norm": 1.7402236461639404, "learning_rate": 0.0013444764096662831, "loss": 0.8091, "step": 113930 }, { "epoch": 32.77905638665133, "grad_norm": 0.6271328926086426, "learning_rate": 0.0013444188722669735, "loss": 0.6075, "step": 113940 }, { "epoch": 32.7819332566168, "grad_norm": 1.2061045169830322, "learning_rate": 0.001344361334867664, "loss": 0.6245, "step": 113950 }, { "epoch": 32.78481012658228, "grad_norm": 2.3793399333953857, "learning_rate": 0.0013443037974683546, "loss": 0.5684, "step": 113960 }, { "epoch": 32.787686996547755, "grad_norm": 0.7163549661636353, "learning_rate": 0.0013442462600690448, "loss": 0.7169, "step": 113970 }, { "epoch": 32.79056386651323, "grad_norm": 1.1131073236465454, "learning_rate": 0.0013441887226697353, "loss": 0.791, "step": 113980 }, { "epoch": 32.79344073647871, "grad_norm": 1.584733486175537, "learning_rate": 0.0013441311852704259, "loss": 0.6346, "step": 113990 }, { "epoch": 32.79631760644419, "grad_norm": 1.9491347074508667, "learning_rate": 0.0013440736478711162, "loss": 0.5453, "step": 114000 }, { "epoch": 32.79919447640967, "grad_norm": 1.6340774297714233, "learning_rate": 0.0013440161104718068, "loss": 0.5738, "step": 114010 }, { "epoch": 32.80207134637514, "grad_norm": 0.5273883938789368, "learning_rate": 0.0013439585730724971, "loss": 0.8723, "step": 114020 }, { "epoch": 32.80494821634062, "grad_norm": 1.160089373588562, "learning_rate": 0.0013439010356731875, "loss": 0.7353, "step": 114030 }, { "epoch": 32.807825086306096, "grad_norm": 1.2466069459915161, "learning_rate": 0.001343843498273878, "loss": 0.7119, "step": 114040 }, { "epoch": 32.81070195627158, "grad_norm": 1.7026714086532593, "learning_rate": 0.0013437859608745684, "loss": 0.6377, "step": 114050 }, { "epoch": 32.813578826237055, "grad_norm": 1.3616385459899902, "learning_rate": 0.001343728423475259, "loss": 0.6508, "step": 114060 }, { "epoch": 32.81645569620253, "grad_norm": 1.4462493658065796, "learning_rate": 0.0013436708860759495, "loss": 0.6663, "step": 114070 }, { "epoch": 32.81933256616801, "grad_norm": 1.4296903610229492, "learning_rate": 0.0013436133486766397, "loss": 0.717, "step": 114080 }, { "epoch": 32.822209436133484, "grad_norm": 1.3245106935501099, "learning_rate": 0.0013435558112773302, "loss": 0.7529, "step": 114090 }, { "epoch": 32.82508630609897, "grad_norm": 1.8843104839324951, "learning_rate": 0.0013434982738780208, "loss": 0.7372, "step": 114100 }, { "epoch": 32.82796317606444, "grad_norm": 1.2994873523712158, "learning_rate": 0.0013434407364787111, "loss": 0.6047, "step": 114110 }, { "epoch": 32.83084004602992, "grad_norm": 1.4682506322860718, "learning_rate": 0.0013433831990794017, "loss": 0.7459, "step": 114120 }, { "epoch": 32.833716915995396, "grad_norm": 1.7896417379379272, "learning_rate": 0.0013433256616800923, "loss": 0.6498, "step": 114130 }, { "epoch": 32.83659378596087, "grad_norm": 1.8644195795059204, "learning_rate": 0.0013432681242807824, "loss": 0.6895, "step": 114140 }, { "epoch": 32.839470655926355, "grad_norm": 1.5743430852890015, "learning_rate": 0.001343210586881473, "loss": 0.7418, "step": 114150 }, { "epoch": 32.84234752589183, "grad_norm": 0.7805876731872559, "learning_rate": 0.0013431530494821633, "loss": 0.6999, "step": 114160 }, { "epoch": 32.84522439585731, "grad_norm": 1.833783745765686, "learning_rate": 0.0013430955120828539, "loss": 0.6604, "step": 114170 }, { "epoch": 32.848101265822784, "grad_norm": 1.0062365531921387, "learning_rate": 0.0013430379746835444, "loss": 0.7603, "step": 114180 }, { "epoch": 32.85097813578826, "grad_norm": 4.317041873931885, "learning_rate": 0.0013429804372842348, "loss": 0.9152, "step": 114190 }, { "epoch": 32.85385500575374, "grad_norm": 1.3302392959594727, "learning_rate": 0.0013429228998849251, "loss": 0.7536, "step": 114200 }, { "epoch": 32.85673187571922, "grad_norm": 1.8543319702148438, "learning_rate": 0.0013428653624856157, "loss": 0.6575, "step": 114210 }, { "epoch": 32.859608745684696, "grad_norm": 1.0824651718139648, "learning_rate": 0.001342807825086306, "loss": 0.7991, "step": 114220 }, { "epoch": 32.86248561565017, "grad_norm": 0.931502640247345, "learning_rate": 0.0013427502876869966, "loss": 0.5789, "step": 114230 }, { "epoch": 32.86536248561565, "grad_norm": 2.0271270275115967, "learning_rate": 0.0013426927502876872, "loss": 0.7409, "step": 114240 }, { "epoch": 32.868239355581125, "grad_norm": 0.7270491123199463, "learning_rate": 0.0013426352128883775, "loss": 0.764, "step": 114250 }, { "epoch": 32.87111622554661, "grad_norm": 1.3499984741210938, "learning_rate": 0.0013425776754890679, "loss": 0.6385, "step": 114260 }, { "epoch": 32.873993095512084, "grad_norm": 2.0587058067321777, "learning_rate": 0.0013425201380897584, "loss": 0.5905, "step": 114270 }, { "epoch": 32.87686996547756, "grad_norm": 1.1571495532989502, "learning_rate": 0.0013424626006904488, "loss": 0.5631, "step": 114280 }, { "epoch": 32.879746835443036, "grad_norm": 2.2092015743255615, "learning_rate": 0.0013424050632911393, "loss": 0.7191, "step": 114290 }, { "epoch": 32.88262370540851, "grad_norm": 1.2362253665924072, "learning_rate": 0.0013423475258918297, "loss": 0.7104, "step": 114300 }, { "epoch": 32.885500575373996, "grad_norm": 1.35275399684906, "learning_rate": 0.0013422899884925202, "loss": 0.7387, "step": 114310 }, { "epoch": 32.88837744533947, "grad_norm": 1.4790290594100952, "learning_rate": 0.0013422324510932106, "loss": 0.8178, "step": 114320 }, { "epoch": 32.89125431530495, "grad_norm": 2.9293978214263916, "learning_rate": 0.001342174913693901, "loss": 1.0704, "step": 114330 }, { "epoch": 32.894131185270425, "grad_norm": 1.071468710899353, "learning_rate": 0.0013421173762945915, "loss": 0.6359, "step": 114340 }, { "epoch": 32.8970080552359, "grad_norm": 0.5696933269500732, "learning_rate": 0.001342059838895282, "loss": 0.6865, "step": 114350 }, { "epoch": 32.899884925201384, "grad_norm": 1.7751214504241943, "learning_rate": 0.0013420023014959724, "loss": 0.6136, "step": 114360 }, { "epoch": 32.90276179516686, "grad_norm": 1.0712372064590454, "learning_rate": 0.001341944764096663, "loss": 0.6049, "step": 114370 }, { "epoch": 32.90563866513234, "grad_norm": 2.010089635848999, "learning_rate": 0.0013418872266973533, "loss": 0.7226, "step": 114380 }, { "epoch": 32.90851553509781, "grad_norm": 2.409224510192871, "learning_rate": 0.0013418296892980437, "loss": 0.6743, "step": 114390 }, { "epoch": 32.91139240506329, "grad_norm": 1.4511278867721558, "learning_rate": 0.0013417721518987342, "loss": 0.6469, "step": 114400 }, { "epoch": 32.91426927502877, "grad_norm": 1.2986356019973755, "learning_rate": 0.0013417146144994246, "loss": 0.6376, "step": 114410 }, { "epoch": 32.91714614499425, "grad_norm": 1.100347876548767, "learning_rate": 0.0013416570771001151, "loss": 0.633, "step": 114420 }, { "epoch": 32.920023014959725, "grad_norm": 3.0440502166748047, "learning_rate": 0.0013415995397008057, "loss": 0.7336, "step": 114430 }, { "epoch": 32.9228998849252, "grad_norm": 1.4592487812042236, "learning_rate": 0.0013415420023014958, "loss": 0.7157, "step": 114440 }, { "epoch": 32.92577675489068, "grad_norm": 1.4368700981140137, "learning_rate": 0.0013414844649021864, "loss": 0.7362, "step": 114450 }, { "epoch": 32.92865362485615, "grad_norm": 1.78180730342865, "learning_rate": 0.001341426927502877, "loss": 0.6836, "step": 114460 }, { "epoch": 32.93153049482164, "grad_norm": 0.9149044156074524, "learning_rate": 0.0013413693901035673, "loss": 0.7121, "step": 114470 }, { "epoch": 32.93440736478711, "grad_norm": 1.2946970462799072, "learning_rate": 0.0013413118527042579, "loss": 0.8052, "step": 114480 }, { "epoch": 32.93728423475259, "grad_norm": 1.3163610696792603, "learning_rate": 0.0013412543153049484, "loss": 0.7568, "step": 114490 }, { "epoch": 32.940161104718065, "grad_norm": 2.0836994647979736, "learning_rate": 0.0013411967779056386, "loss": 0.7707, "step": 114500 }, { "epoch": 32.94303797468354, "grad_norm": 1.2556610107421875, "learning_rate": 0.0013411392405063291, "loss": 0.5089, "step": 114510 }, { "epoch": 32.945914844649025, "grad_norm": 1.5218113660812378, "learning_rate": 0.0013410817031070195, "loss": 0.7283, "step": 114520 }, { "epoch": 32.9487917146145, "grad_norm": 1.2076789140701294, "learning_rate": 0.00134102416570771, "loss": 0.5534, "step": 114530 }, { "epoch": 32.95166858457998, "grad_norm": 0.9798129200935364, "learning_rate": 0.0013409666283084006, "loss": 0.4781, "step": 114540 }, { "epoch": 32.95454545454545, "grad_norm": 1.1303845643997192, "learning_rate": 0.001340909090909091, "loss": 0.5974, "step": 114550 }, { "epoch": 32.95742232451093, "grad_norm": 2.083650588989258, "learning_rate": 0.0013408515535097813, "loss": 0.8491, "step": 114560 }, { "epoch": 32.96029919447641, "grad_norm": 0.992199718952179, "learning_rate": 0.0013407940161104719, "loss": 0.6278, "step": 114570 }, { "epoch": 32.96317606444189, "grad_norm": 1.7466154098510742, "learning_rate": 0.0013407364787111622, "loss": 0.5975, "step": 114580 }, { "epoch": 32.966052934407365, "grad_norm": 0.6399038434028625, "learning_rate": 0.0013406789413118528, "loss": 0.5872, "step": 114590 }, { "epoch": 32.96892980437284, "grad_norm": 1.3589303493499756, "learning_rate": 0.0013406214039125433, "loss": 0.7138, "step": 114600 }, { "epoch": 32.97180667433832, "grad_norm": 1.68218195438385, "learning_rate": 0.0013405638665132337, "loss": 0.6753, "step": 114610 }, { "epoch": 32.9746835443038, "grad_norm": 1.4808708429336548, "learning_rate": 0.001340506329113924, "loss": 0.5679, "step": 114620 }, { "epoch": 32.97756041426928, "grad_norm": 0.9641446471214294, "learning_rate": 0.0013404487917146144, "loss": 0.5851, "step": 114630 }, { "epoch": 32.98043728423475, "grad_norm": 1.445610761642456, "learning_rate": 0.001340391254315305, "loss": 0.7387, "step": 114640 }, { "epoch": 32.98331415420023, "grad_norm": 0.84487384557724, "learning_rate": 0.0013403337169159955, "loss": 0.6674, "step": 114650 }, { "epoch": 32.986191024165706, "grad_norm": 1.021690011024475, "learning_rate": 0.0013402761795166859, "loss": 0.7738, "step": 114660 }, { "epoch": 32.98906789413118, "grad_norm": 3.0019264221191406, "learning_rate": 0.0013402186421173764, "loss": 0.6644, "step": 114670 }, { "epoch": 32.991944764096665, "grad_norm": 0.5765823125839233, "learning_rate": 0.0013401611047180668, "loss": 0.5325, "step": 114680 }, { "epoch": 32.99482163406214, "grad_norm": 1.3937616348266602, "learning_rate": 0.0013401035673187571, "loss": 0.612, "step": 114690 }, { "epoch": 32.99769850402762, "grad_norm": 2.058537006378174, "learning_rate": 0.0013400460299194477, "loss": 0.5949, "step": 114700 }, { "epoch": 33.000575373993094, "grad_norm": 1.5059118270874023, "learning_rate": 0.0013399884925201382, "loss": 0.6346, "step": 114710 }, { "epoch": 33.00345224395857, "grad_norm": 0.7354339361190796, "learning_rate": 0.0013399309551208286, "loss": 0.4861, "step": 114720 }, { "epoch": 33.00632911392405, "grad_norm": 1.4803569316864014, "learning_rate": 0.0013398734177215192, "loss": 0.7053, "step": 114730 }, { "epoch": 33.00920598388953, "grad_norm": 0.959018349647522, "learning_rate": 0.0013398158803222093, "loss": 0.5592, "step": 114740 }, { "epoch": 33.012082853855006, "grad_norm": 1.036569356918335, "learning_rate": 0.0013397583429228998, "loss": 0.5422, "step": 114750 }, { "epoch": 33.01495972382048, "grad_norm": 1.2019160985946655, "learning_rate": 0.0013397008055235904, "loss": 0.5696, "step": 114760 }, { "epoch": 33.01783659378596, "grad_norm": 0.8364092707633972, "learning_rate": 0.0013396432681242808, "loss": 0.7164, "step": 114770 }, { "epoch": 33.02071346375144, "grad_norm": 0.912051796913147, "learning_rate": 0.0013395857307249713, "loss": 0.5384, "step": 114780 }, { "epoch": 33.02359033371692, "grad_norm": 1.4295762777328491, "learning_rate": 0.0013395281933256619, "loss": 0.6914, "step": 114790 }, { "epoch": 33.026467203682394, "grad_norm": 0.9960798621177673, "learning_rate": 0.001339470655926352, "loss": 0.6072, "step": 114800 }, { "epoch": 33.02934407364787, "grad_norm": 1.0091490745544434, "learning_rate": 0.0013394131185270426, "loss": 0.5646, "step": 114810 }, { "epoch": 33.032220943613346, "grad_norm": 2.4700775146484375, "learning_rate": 0.0013393555811277331, "loss": 0.6422, "step": 114820 }, { "epoch": 33.03509781357883, "grad_norm": 1.2523584365844727, "learning_rate": 0.0013392980437284235, "loss": 0.6109, "step": 114830 }, { "epoch": 33.037974683544306, "grad_norm": 0.9553319215774536, "learning_rate": 0.001339240506329114, "loss": 0.7304, "step": 114840 }, { "epoch": 33.04085155350978, "grad_norm": 1.5307631492614746, "learning_rate": 0.0013391829689298042, "loss": 0.5188, "step": 114850 }, { "epoch": 33.04372842347526, "grad_norm": 1.3464826345443726, "learning_rate": 0.0013391254315304948, "loss": 0.8112, "step": 114860 }, { "epoch": 33.046605293440734, "grad_norm": 0.6957850456237793, "learning_rate": 0.0013390678941311853, "loss": 0.4227, "step": 114870 }, { "epoch": 33.04948216340621, "grad_norm": 1.217566967010498, "learning_rate": 0.0013390103567318757, "loss": 0.623, "step": 114880 }, { "epoch": 33.052359033371694, "grad_norm": 1.5687142610549927, "learning_rate": 0.0013389528193325662, "loss": 0.5156, "step": 114890 }, { "epoch": 33.05523590333717, "grad_norm": 0.8062666654586792, "learning_rate": 0.0013388952819332568, "loss": 0.6428, "step": 114900 }, { "epoch": 33.058112773302646, "grad_norm": 2.5392444133758545, "learning_rate": 0.001338837744533947, "loss": 0.5808, "step": 114910 }, { "epoch": 33.06098964326812, "grad_norm": 1.373342752456665, "learning_rate": 0.0013387802071346375, "loss": 0.5553, "step": 114920 }, { "epoch": 33.0638665132336, "grad_norm": 0.7803667187690735, "learning_rate": 0.001338722669735328, "loss": 0.7844, "step": 114930 }, { "epoch": 33.06674338319908, "grad_norm": 0.8927552103996277, "learning_rate": 0.0013386651323360184, "loss": 0.5185, "step": 114940 }, { "epoch": 33.06962025316456, "grad_norm": 1.088205099105835, "learning_rate": 0.001338607594936709, "loss": 0.7486, "step": 114950 }, { "epoch": 33.072497123130034, "grad_norm": 1.0661276578903198, "learning_rate": 0.0013385500575373995, "loss": 0.5842, "step": 114960 }, { "epoch": 33.07537399309551, "grad_norm": 0.9813355803489685, "learning_rate": 0.0013384925201380897, "loss": 0.5737, "step": 114970 }, { "epoch": 33.07825086306099, "grad_norm": 0.8283749222755432, "learning_rate": 0.0013384349827387802, "loss": 0.7626, "step": 114980 }, { "epoch": 33.08112773302647, "grad_norm": 0.7498893141746521, "learning_rate": 0.0013383774453394706, "loss": 0.5819, "step": 114990 }, { "epoch": 33.084004602991946, "grad_norm": 1.1276849508285522, "learning_rate": 0.0013383199079401611, "loss": 0.5281, "step": 115000 }, { "epoch": 33.08688147295742, "grad_norm": 1.179673433303833, "learning_rate": 0.0013382623705408517, "loss": 0.6116, "step": 115010 }, { "epoch": 33.0897583429229, "grad_norm": 1.1456756591796875, "learning_rate": 0.001338204833141542, "loss": 0.7897, "step": 115020 }, { "epoch": 33.092635212888375, "grad_norm": 0.842644453048706, "learning_rate": 0.0013381472957422324, "loss": 0.6285, "step": 115030 }, { "epoch": 33.09551208285386, "grad_norm": 1.2350162267684937, "learning_rate": 0.001338089758342923, "loss": 0.5617, "step": 115040 }, { "epoch": 33.098388952819334, "grad_norm": 1.2773780822753906, "learning_rate": 0.0013380322209436133, "loss": 0.7055, "step": 115050 }, { "epoch": 33.10126582278481, "grad_norm": 0.7734719514846802, "learning_rate": 0.0013379746835443039, "loss": 0.7135, "step": 115060 }, { "epoch": 33.10414269275029, "grad_norm": 2.1778817176818848, "learning_rate": 0.0013379171461449944, "loss": 0.532, "step": 115070 }, { "epoch": 33.10701956271576, "grad_norm": 1.021040916442871, "learning_rate": 0.0013378596087456848, "loss": 0.691, "step": 115080 }, { "epoch": 33.10989643268124, "grad_norm": 0.9535300731658936, "learning_rate": 0.0013378020713463751, "loss": 0.579, "step": 115090 }, { "epoch": 33.11277330264672, "grad_norm": 0.6831159591674805, "learning_rate": 0.0013377445339470655, "loss": 0.6979, "step": 115100 }, { "epoch": 33.1156501726122, "grad_norm": 1.499103307723999, "learning_rate": 0.001337686996547756, "loss": 0.6487, "step": 115110 }, { "epoch": 33.118527042577675, "grad_norm": 1.2901815176010132, "learning_rate": 0.0013376294591484466, "loss": 0.5672, "step": 115120 }, { "epoch": 33.12140391254315, "grad_norm": 0.7673388719558716, "learning_rate": 0.001337571921749137, "loss": 0.5525, "step": 115130 }, { "epoch": 33.12428078250863, "grad_norm": 1.19014310836792, "learning_rate": 0.0013375143843498275, "loss": 0.6633, "step": 115140 }, { "epoch": 33.12715765247411, "grad_norm": 0.8877944946289062, "learning_rate": 0.0013374568469505179, "loss": 0.557, "step": 115150 }, { "epoch": 33.13003452243959, "grad_norm": 1.08236563205719, "learning_rate": 0.0013373993095512082, "loss": 0.5499, "step": 115160 }, { "epoch": 33.13291139240506, "grad_norm": 2.20733904838562, "learning_rate": 0.0013373417721518988, "loss": 0.7544, "step": 115170 }, { "epoch": 33.13578826237054, "grad_norm": 0.7080804705619812, "learning_rate": 0.0013372842347525893, "loss": 0.5064, "step": 115180 }, { "epoch": 33.138665132336016, "grad_norm": 1.4285001754760742, "learning_rate": 0.0013372266973532797, "loss": 0.749, "step": 115190 }, { "epoch": 33.1415420023015, "grad_norm": 2.0752079486846924, "learning_rate": 0.0013371691599539702, "loss": 0.6356, "step": 115200 }, { "epoch": 33.144418872266975, "grad_norm": 1.4273871183395386, "learning_rate": 0.0013371116225546604, "loss": 0.6786, "step": 115210 }, { "epoch": 33.14729574223245, "grad_norm": 1.4815328121185303, "learning_rate": 0.001337054085155351, "loss": 0.5452, "step": 115220 }, { "epoch": 33.15017261219793, "grad_norm": 0.8683795928955078, "learning_rate": 0.0013369965477560415, "loss": 0.7565, "step": 115230 }, { "epoch": 33.153049482163404, "grad_norm": 1.126369595527649, "learning_rate": 0.0013369390103567318, "loss": 0.7263, "step": 115240 }, { "epoch": 33.15592635212889, "grad_norm": 1.7136037349700928, "learning_rate": 0.0013368814729574224, "loss": 0.6533, "step": 115250 }, { "epoch": 33.15880322209436, "grad_norm": 2.3305294513702393, "learning_rate": 0.001336823935558113, "loss": 0.6681, "step": 115260 }, { "epoch": 33.16168009205984, "grad_norm": 1.1582428216934204, "learning_rate": 0.001336766398158803, "loss": 0.6013, "step": 115270 }, { "epoch": 33.164556962025316, "grad_norm": 1.2116142511367798, "learning_rate": 0.0013367088607594937, "loss": 0.5105, "step": 115280 }, { "epoch": 33.16743383199079, "grad_norm": 1.5937857627868652, "learning_rate": 0.0013366513233601842, "loss": 0.5274, "step": 115290 }, { "epoch": 33.170310701956275, "grad_norm": 1.7352838516235352, "learning_rate": 0.0013365937859608746, "loss": 0.6558, "step": 115300 }, { "epoch": 33.17318757192175, "grad_norm": 1.070910096168518, "learning_rate": 0.0013365362485615651, "loss": 0.7571, "step": 115310 }, { "epoch": 33.17606444188723, "grad_norm": 1.3139575719833374, "learning_rate": 0.0013364787111622555, "loss": 0.5778, "step": 115320 }, { "epoch": 33.178941311852704, "grad_norm": 0.9180240035057068, "learning_rate": 0.0013364211737629458, "loss": 0.5397, "step": 115330 }, { "epoch": 33.18181818181818, "grad_norm": 1.0023112297058105, "learning_rate": 0.0013363636363636364, "loss": 0.5583, "step": 115340 }, { "epoch": 33.184695051783656, "grad_norm": 0.8176108598709106, "learning_rate": 0.0013363060989643267, "loss": 0.5321, "step": 115350 }, { "epoch": 33.18757192174914, "grad_norm": 1.1538888216018677, "learning_rate": 0.0013362485615650173, "loss": 0.5826, "step": 115360 }, { "epoch": 33.190448791714616, "grad_norm": 1.09234619140625, "learning_rate": 0.0013361910241657079, "loss": 0.6073, "step": 115370 }, { "epoch": 33.19332566168009, "grad_norm": 1.5883678197860718, "learning_rate": 0.0013361334867663982, "loss": 0.682, "step": 115380 }, { "epoch": 33.19620253164557, "grad_norm": 1.837958812713623, "learning_rate": 0.0013360759493670886, "loss": 0.6422, "step": 115390 }, { "epoch": 33.199079401611044, "grad_norm": 3.505406379699707, "learning_rate": 0.0013360184119677791, "loss": 0.6864, "step": 115400 }, { "epoch": 33.20195627157653, "grad_norm": 0.9884960055351257, "learning_rate": 0.0013359608745684695, "loss": 0.6717, "step": 115410 }, { "epoch": 33.204833141542004, "grad_norm": 1.4795063734054565, "learning_rate": 0.00133590333716916, "loss": 0.648, "step": 115420 }, { "epoch": 33.20771001150748, "grad_norm": 0.7700596451759338, "learning_rate": 0.0013358457997698504, "loss": 0.5303, "step": 115430 }, { "epoch": 33.210586881472956, "grad_norm": 1.115572452545166, "learning_rate": 0.001335788262370541, "loss": 0.823, "step": 115440 }, { "epoch": 33.21346375143843, "grad_norm": 0.702832818031311, "learning_rate": 0.0013357307249712313, "loss": 0.4345, "step": 115450 }, { "epoch": 33.216340621403916, "grad_norm": 1.1545387506484985, "learning_rate": 0.0013356731875719216, "loss": 0.4683, "step": 115460 }, { "epoch": 33.21921749136939, "grad_norm": 0.7187862396240234, "learning_rate": 0.0013356156501726122, "loss": 0.7205, "step": 115470 }, { "epoch": 33.22209436133487, "grad_norm": 1.9543887376785278, "learning_rate": 0.0013355581127733028, "loss": 0.68, "step": 115480 }, { "epoch": 33.224971231300344, "grad_norm": 1.8116388320922852, "learning_rate": 0.0013355005753739931, "loss": 0.6428, "step": 115490 }, { "epoch": 33.22784810126582, "grad_norm": 0.7478896379470825, "learning_rate": 0.0013354430379746837, "loss": 0.616, "step": 115500 }, { "epoch": 33.230724971231304, "grad_norm": 1.0711463689804077, "learning_rate": 0.001335385500575374, "loss": 0.7403, "step": 115510 }, { "epoch": 33.23360184119678, "grad_norm": 1.606223702430725, "learning_rate": 0.0013353279631760644, "loss": 0.7237, "step": 115520 }, { "epoch": 33.236478711162256, "grad_norm": 0.6629437804222107, "learning_rate": 0.001335270425776755, "loss": 0.6253, "step": 115530 }, { "epoch": 33.23935558112773, "grad_norm": 0.9975525140762329, "learning_rate": 0.0013352128883774455, "loss": 0.6853, "step": 115540 }, { "epoch": 33.24223245109321, "grad_norm": 1.1473960876464844, "learning_rate": 0.0013351553509781359, "loss": 0.5657, "step": 115550 }, { "epoch": 33.245109321058685, "grad_norm": 1.5854521989822388, "learning_rate": 0.0013350978135788264, "loss": 0.5645, "step": 115560 }, { "epoch": 33.24798619102417, "grad_norm": 1.523120403289795, "learning_rate": 0.0013350402761795166, "loss": 0.6175, "step": 115570 }, { "epoch": 33.250863060989644, "grad_norm": 1.3959811925888062, "learning_rate": 0.0013349827387802071, "loss": 0.5965, "step": 115580 }, { "epoch": 33.25373993095512, "grad_norm": 1.3126329183578491, "learning_rate": 0.0013349252013808977, "loss": 0.5594, "step": 115590 }, { "epoch": 33.2566168009206, "grad_norm": 1.6764878034591675, "learning_rate": 0.001334867663981588, "loss": 0.62, "step": 115600 }, { "epoch": 33.25949367088607, "grad_norm": 0.8626076579093933, "learning_rate": 0.0013348101265822786, "loss": 0.5514, "step": 115610 }, { "epoch": 33.262370540851556, "grad_norm": 0.692494809627533, "learning_rate": 0.0013347525891829692, "loss": 0.6689, "step": 115620 }, { "epoch": 33.26524741081703, "grad_norm": 1.4356409311294556, "learning_rate": 0.0013346950517836593, "loss": 0.7111, "step": 115630 }, { "epoch": 33.26812428078251, "grad_norm": 1.7897498607635498, "learning_rate": 0.0013346375143843498, "loss": 0.6891, "step": 115640 }, { "epoch": 33.271001150747985, "grad_norm": 1.200249433517456, "learning_rate": 0.0013345799769850404, "loss": 0.8165, "step": 115650 }, { "epoch": 33.27387802071346, "grad_norm": 1.0396450757980347, "learning_rate": 0.0013345224395857308, "loss": 0.6952, "step": 115660 }, { "epoch": 33.276754890678944, "grad_norm": 1.3619378805160522, "learning_rate": 0.0013344649021864213, "loss": 0.7483, "step": 115670 }, { "epoch": 33.27963176064442, "grad_norm": 0.9593172073364258, "learning_rate": 0.0013344073647871115, "loss": 0.5043, "step": 115680 }, { "epoch": 33.2825086306099, "grad_norm": 1.4262986183166504, "learning_rate": 0.001334349827387802, "loss": 0.5317, "step": 115690 }, { "epoch": 33.28538550057537, "grad_norm": 0.8839027881622314, "learning_rate": 0.0013342922899884926, "loss": 0.7158, "step": 115700 }, { "epoch": 33.28826237054085, "grad_norm": 1.7540024518966675, "learning_rate": 0.001334234752589183, "loss": 0.5682, "step": 115710 }, { "epoch": 33.29113924050633, "grad_norm": 1.1790767908096313, "learning_rate": 0.0013341772151898735, "loss": 0.7737, "step": 115720 }, { "epoch": 33.29401611047181, "grad_norm": 1.0844841003417969, "learning_rate": 0.001334119677790564, "loss": 0.4775, "step": 115730 }, { "epoch": 33.296892980437285, "grad_norm": 1.2498009204864502, "learning_rate": 0.0013340621403912542, "loss": 0.5882, "step": 115740 }, { "epoch": 33.29976985040276, "grad_norm": 0.7023822665214539, "learning_rate": 0.0013340046029919447, "loss": 0.6888, "step": 115750 }, { "epoch": 33.30264672036824, "grad_norm": 1.605074167251587, "learning_rate": 0.0013339470655926353, "loss": 0.6918, "step": 115760 }, { "epoch": 33.30552359033371, "grad_norm": 1.4384312629699707, "learning_rate": 0.0013338895281933257, "loss": 0.5919, "step": 115770 }, { "epoch": 33.3084004602992, "grad_norm": 1.1231229305267334, "learning_rate": 0.0013338319907940162, "loss": 0.7644, "step": 115780 }, { "epoch": 33.31127733026467, "grad_norm": 3.419996976852417, "learning_rate": 0.0013337744533947066, "loss": 0.771, "step": 115790 }, { "epoch": 33.31415420023015, "grad_norm": 2.1923668384552, "learning_rate": 0.001333716915995397, "loss": 0.6238, "step": 115800 }, { "epoch": 33.317031070195625, "grad_norm": 3.2667102813720703, "learning_rate": 0.0013336593785960875, "loss": 0.5952, "step": 115810 }, { "epoch": 33.3199079401611, "grad_norm": 1.2264646291732788, "learning_rate": 0.0013336018411967778, "loss": 0.7857, "step": 115820 }, { "epoch": 33.322784810126585, "grad_norm": 1.3346339464187622, "learning_rate": 0.0013335443037974684, "loss": 0.7951, "step": 115830 }, { "epoch": 33.32566168009206, "grad_norm": 2.541954278945923, "learning_rate": 0.001333486766398159, "loss": 0.6504, "step": 115840 }, { "epoch": 33.32853855005754, "grad_norm": 1.4014259576797485, "learning_rate": 0.0013334292289988493, "loss": 0.6512, "step": 115850 }, { "epoch": 33.33141542002301, "grad_norm": 1.520072340965271, "learning_rate": 0.0013333716915995397, "loss": 0.7001, "step": 115860 }, { "epoch": 33.33429228998849, "grad_norm": 0.834792971611023, "learning_rate": 0.0013333141542002302, "loss": 0.5836, "step": 115870 }, { "epoch": 33.33716915995397, "grad_norm": 0.9132148027420044, "learning_rate": 0.0013332566168009206, "loss": 0.6544, "step": 115880 }, { "epoch": 33.34004602991945, "grad_norm": 1.0525461435317993, "learning_rate": 0.0013331990794016111, "loss": 0.6302, "step": 115890 }, { "epoch": 33.342922899884925, "grad_norm": 2.7696080207824707, "learning_rate": 0.0013331415420023015, "loss": 0.6548, "step": 115900 }, { "epoch": 33.3457997698504, "grad_norm": 1.737508773803711, "learning_rate": 0.001333084004602992, "loss": 0.6534, "step": 115910 }, { "epoch": 33.34867663981588, "grad_norm": 1.0678142309188843, "learning_rate": 0.0013330264672036824, "loss": 0.7432, "step": 115920 }, { "epoch": 33.35155350978136, "grad_norm": 1.1288999319076538, "learning_rate": 0.0013329689298043727, "loss": 0.6853, "step": 115930 }, { "epoch": 33.35443037974684, "grad_norm": 1.143009066581726, "learning_rate": 0.0013329113924050633, "loss": 0.6654, "step": 115940 }, { "epoch": 33.35730724971231, "grad_norm": 1.0370678901672363, "learning_rate": 0.0013328538550057539, "loss": 0.5923, "step": 115950 }, { "epoch": 33.36018411967779, "grad_norm": 1.7867302894592285, "learning_rate": 0.0013327963176064442, "loss": 0.686, "step": 115960 }, { "epoch": 33.363060989643266, "grad_norm": 1.87152898311615, "learning_rate": 0.0013327387802071348, "loss": 0.7258, "step": 115970 }, { "epoch": 33.36593785960875, "grad_norm": 1.1244240999221802, "learning_rate": 0.0013326812428078251, "loss": 0.6787, "step": 115980 }, { "epoch": 33.368814729574225, "grad_norm": 1.7234182357788086, "learning_rate": 0.0013326237054085155, "loss": 0.6543, "step": 115990 }, { "epoch": 33.3716915995397, "grad_norm": 1.1235969066619873, "learning_rate": 0.001332566168009206, "loss": 0.5414, "step": 116000 }, { "epoch": 33.37456846950518, "grad_norm": 1.4199696779251099, "learning_rate": 0.0013325086306098964, "loss": 0.6582, "step": 116010 }, { "epoch": 33.377445339470654, "grad_norm": 0.9941786527633667, "learning_rate": 0.001332451093210587, "loss": 0.6132, "step": 116020 }, { "epoch": 33.38032220943613, "grad_norm": 0.8279240131378174, "learning_rate": 0.0013323935558112775, "loss": 0.5734, "step": 116030 }, { "epoch": 33.383199079401614, "grad_norm": 1.2188751697540283, "learning_rate": 0.0013323360184119676, "loss": 0.7944, "step": 116040 }, { "epoch": 33.38607594936709, "grad_norm": 1.4231637716293335, "learning_rate": 0.0013322784810126582, "loss": 0.8396, "step": 116050 }, { "epoch": 33.388952819332566, "grad_norm": 1.8030239343643188, "learning_rate": 0.0013322209436133488, "loss": 0.5716, "step": 116060 }, { "epoch": 33.39182968929804, "grad_norm": 1.0032238960266113, "learning_rate": 0.0013321634062140391, "loss": 0.5738, "step": 116070 }, { "epoch": 33.39470655926352, "grad_norm": 0.9038340449333191, "learning_rate": 0.0013321058688147297, "loss": 0.6913, "step": 116080 }, { "epoch": 33.397583429229, "grad_norm": 1.271604061126709, "learning_rate": 0.0013320483314154202, "loss": 0.6812, "step": 116090 }, { "epoch": 33.40046029919448, "grad_norm": 0.8160106539726257, "learning_rate": 0.0013319907940161104, "loss": 0.676, "step": 116100 }, { "epoch": 33.403337169159954, "grad_norm": 1.4518429040908813, "learning_rate": 0.001331933256616801, "loss": 0.6652, "step": 116110 }, { "epoch": 33.40621403912543, "grad_norm": 0.8572640419006348, "learning_rate": 0.0013318757192174913, "loss": 0.5361, "step": 116120 }, { "epoch": 33.40909090909091, "grad_norm": 1.2351192235946655, "learning_rate": 0.0013318181818181818, "loss": 0.6585, "step": 116130 }, { "epoch": 33.41196777905639, "grad_norm": 0.9853208065032959, "learning_rate": 0.0013317606444188724, "loss": 0.7107, "step": 116140 }, { "epoch": 33.414844649021866, "grad_norm": 1.6174395084381104, "learning_rate": 0.0013317031070195628, "loss": 0.5914, "step": 116150 }, { "epoch": 33.41772151898734, "grad_norm": 1.1563903093338013, "learning_rate": 0.001331645569620253, "loss": 0.7372, "step": 116160 }, { "epoch": 33.42059838895282, "grad_norm": 1.9164000749588013, "learning_rate": 0.0013315880322209437, "loss": 0.7451, "step": 116170 }, { "epoch": 33.423475258918295, "grad_norm": 1.1554296016693115, "learning_rate": 0.001331530494821634, "loss": 0.531, "step": 116180 }, { "epoch": 33.42635212888378, "grad_norm": 0.8773396611213684, "learning_rate": 0.0013314729574223246, "loss": 0.6451, "step": 116190 }, { "epoch": 33.429228998849254, "grad_norm": 1.48817777633667, "learning_rate": 0.0013314154200230151, "loss": 0.7778, "step": 116200 }, { "epoch": 33.43210586881473, "grad_norm": 1.8742146492004395, "learning_rate": 0.0013313578826237055, "loss": 0.6402, "step": 116210 }, { "epoch": 33.43498273878021, "grad_norm": 1.3334616422653198, "learning_rate": 0.0013313003452243958, "loss": 0.4979, "step": 116220 }, { "epoch": 33.43785960874568, "grad_norm": 2.013611078262329, "learning_rate": 0.0013312428078250864, "loss": 0.7845, "step": 116230 }, { "epoch": 33.44073647871116, "grad_norm": 1.7229902744293213, "learning_rate": 0.0013311852704257767, "loss": 0.5691, "step": 116240 }, { "epoch": 33.44361334867664, "grad_norm": 1.1097890138626099, "learning_rate": 0.0013311277330264673, "loss": 0.5312, "step": 116250 }, { "epoch": 33.44649021864212, "grad_norm": 1.6640719175338745, "learning_rate": 0.0013310701956271577, "loss": 0.724, "step": 116260 }, { "epoch": 33.449367088607595, "grad_norm": 1.394674301147461, "learning_rate": 0.0013310126582278482, "loss": 0.6173, "step": 116270 }, { "epoch": 33.45224395857307, "grad_norm": 1.1585439443588257, "learning_rate": 0.0013309551208285386, "loss": 0.6429, "step": 116280 }, { "epoch": 33.45512082853855, "grad_norm": 1.936398983001709, "learning_rate": 0.001330897583429229, "loss": 0.7296, "step": 116290 }, { "epoch": 33.45799769850403, "grad_norm": 1.0565292835235596, "learning_rate": 0.0013308400460299195, "loss": 0.5022, "step": 116300 }, { "epoch": 33.46087456846951, "grad_norm": 1.0678128004074097, "learning_rate": 0.00133078250863061, "loss": 0.6685, "step": 116310 }, { "epoch": 33.46375143843498, "grad_norm": 4.2384724617004395, "learning_rate": 0.0013307249712313004, "loss": 0.6586, "step": 116320 }, { "epoch": 33.46662830840046, "grad_norm": 1.3489426374435425, "learning_rate": 0.001330667433831991, "loss": 0.6362, "step": 116330 }, { "epoch": 33.469505178365935, "grad_norm": 1.735697627067566, "learning_rate": 0.0013306098964326813, "loss": 0.6841, "step": 116340 }, { "epoch": 33.47238204833142, "grad_norm": 1.8185498714447021, "learning_rate": 0.0013305523590333716, "loss": 0.6022, "step": 116350 }, { "epoch": 33.475258918296895, "grad_norm": 1.662055492401123, "learning_rate": 0.0013304948216340622, "loss": 0.661, "step": 116360 }, { "epoch": 33.47813578826237, "grad_norm": 1.1117318868637085, "learning_rate": 0.0013304372842347526, "loss": 0.6479, "step": 116370 }, { "epoch": 33.48101265822785, "grad_norm": 2.6002535820007324, "learning_rate": 0.0013303797468354431, "loss": 0.7281, "step": 116380 }, { "epoch": 33.48388952819332, "grad_norm": 1.1692712306976318, "learning_rate": 0.0013303222094361337, "loss": 0.8119, "step": 116390 }, { "epoch": 33.48676639815881, "grad_norm": 0.9300438761711121, "learning_rate": 0.0013302646720368238, "loss": 0.6531, "step": 116400 }, { "epoch": 33.48964326812428, "grad_norm": 1.2250406742095947, "learning_rate": 0.0013302071346375144, "loss": 0.6074, "step": 116410 }, { "epoch": 33.49252013808976, "grad_norm": 1.0278998613357544, "learning_rate": 0.001330149597238205, "loss": 0.52, "step": 116420 }, { "epoch": 33.495397008055235, "grad_norm": 1.0181576013565063, "learning_rate": 0.0013300920598388953, "loss": 0.6149, "step": 116430 }, { "epoch": 33.49827387802071, "grad_norm": 1.4449883699417114, "learning_rate": 0.0013300345224395859, "loss": 0.5723, "step": 116440 }, { "epoch": 33.50115074798619, "grad_norm": 2.412576198577881, "learning_rate": 0.0013299769850402764, "loss": 0.7736, "step": 116450 }, { "epoch": 33.50402761795167, "grad_norm": 1.5900846719741821, "learning_rate": 0.0013299194476409665, "loss": 0.6058, "step": 116460 }, { "epoch": 33.50690448791715, "grad_norm": 1.284406065940857, "learning_rate": 0.0013298619102416571, "loss": 0.6395, "step": 116470 }, { "epoch": 33.50978135788262, "grad_norm": 1.6350215673446655, "learning_rate": 0.0013298043728423475, "loss": 0.5677, "step": 116480 }, { "epoch": 33.5126582278481, "grad_norm": 0.75470370054245, "learning_rate": 0.001329746835443038, "loss": 0.5903, "step": 116490 }, { "epoch": 33.515535097813576, "grad_norm": 0.9840258359909058, "learning_rate": 0.0013296892980437286, "loss": 0.6299, "step": 116500 }, { "epoch": 33.51841196777906, "grad_norm": 0.9172046184539795, "learning_rate": 0.0013296317606444187, "loss": 0.6543, "step": 116510 }, { "epoch": 33.521288837744535, "grad_norm": 1.1130521297454834, "learning_rate": 0.0013295742232451093, "loss": 0.5281, "step": 116520 }, { "epoch": 33.52416570771001, "grad_norm": 0.6968500018119812, "learning_rate": 0.0013295166858457998, "loss": 0.6187, "step": 116530 }, { "epoch": 33.52704257767549, "grad_norm": 2.0031187534332275, "learning_rate": 0.0013294591484464902, "loss": 0.6154, "step": 116540 }, { "epoch": 33.529919447640964, "grad_norm": 1.9052194356918335, "learning_rate": 0.0013294016110471808, "loss": 0.636, "step": 116550 }, { "epoch": 33.53279631760645, "grad_norm": 1.3291234970092773, "learning_rate": 0.0013293440736478713, "loss": 0.6161, "step": 116560 }, { "epoch": 33.53567318757192, "grad_norm": 1.0690330266952515, "learning_rate": 0.0013292865362485615, "loss": 0.7302, "step": 116570 }, { "epoch": 33.5385500575374, "grad_norm": 1.160218358039856, "learning_rate": 0.001329228998849252, "loss": 0.7953, "step": 116580 }, { "epoch": 33.541426927502876, "grad_norm": 0.9489644765853882, "learning_rate": 0.0013291714614499424, "loss": 0.709, "step": 116590 }, { "epoch": 33.54430379746835, "grad_norm": 1.0222523212432861, "learning_rate": 0.001329113924050633, "loss": 0.6358, "step": 116600 }, { "epoch": 33.547180667433835, "grad_norm": 1.7134648561477661, "learning_rate": 0.0013290563866513235, "loss": 0.5807, "step": 116610 }, { "epoch": 33.55005753739931, "grad_norm": 1.1899218559265137, "learning_rate": 0.0013289988492520138, "loss": 0.6461, "step": 116620 }, { "epoch": 33.55293440736479, "grad_norm": 1.2240952253341675, "learning_rate": 0.0013289413118527042, "loss": 0.6662, "step": 116630 }, { "epoch": 33.555811277330264, "grad_norm": 1.2279646396636963, "learning_rate": 0.0013288837744533947, "loss": 0.7674, "step": 116640 }, { "epoch": 33.55868814729574, "grad_norm": 1.6186957359313965, "learning_rate": 0.001328826237054085, "loss": 0.639, "step": 116650 }, { "epoch": 33.561565017261216, "grad_norm": 2.244112014770508, "learning_rate": 0.0013287686996547757, "loss": 0.7365, "step": 116660 }, { "epoch": 33.5644418872267, "grad_norm": 1.0151609182357788, "learning_rate": 0.0013287111622554662, "loss": 0.5232, "step": 116670 }, { "epoch": 33.567318757192176, "grad_norm": 1.2329292297363281, "learning_rate": 0.0013286536248561566, "loss": 0.5512, "step": 116680 }, { "epoch": 33.57019562715765, "grad_norm": 1.0460317134857178, "learning_rate": 0.001328596087456847, "loss": 0.6933, "step": 116690 }, { "epoch": 33.57307249712313, "grad_norm": 1.6939033269882202, "learning_rate": 0.0013285385500575373, "loss": 0.8282, "step": 116700 }, { "epoch": 33.575949367088604, "grad_norm": 1.394700288772583, "learning_rate": 0.0013284810126582278, "loss": 0.6075, "step": 116710 }, { "epoch": 33.57882623705409, "grad_norm": 1.892101764678955, "learning_rate": 0.0013284234752589184, "loss": 0.6642, "step": 116720 }, { "epoch": 33.581703107019564, "grad_norm": 1.0104256868362427, "learning_rate": 0.0013283659378596087, "loss": 0.574, "step": 116730 }, { "epoch": 33.58457997698504, "grad_norm": 1.2665514945983887, "learning_rate": 0.0013283084004602993, "loss": 0.6158, "step": 116740 }, { "epoch": 33.587456846950516, "grad_norm": 1.1095305681228638, "learning_rate": 0.0013282508630609896, "loss": 0.5671, "step": 116750 }, { "epoch": 33.59033371691599, "grad_norm": 0.6740999221801758, "learning_rate": 0.00132819332566168, "loss": 0.6022, "step": 116760 }, { "epoch": 33.593210586881476, "grad_norm": 1.441866397857666, "learning_rate": 0.0013281357882623706, "loss": 0.6578, "step": 116770 }, { "epoch": 33.59608745684695, "grad_norm": 1.8646209239959717, "learning_rate": 0.0013280782508630611, "loss": 0.81, "step": 116780 }, { "epoch": 33.59896432681243, "grad_norm": 1.3661696910858154, "learning_rate": 0.0013280207134637515, "loss": 0.7021, "step": 116790 }, { "epoch": 33.601841196777904, "grad_norm": 1.6508294343948364, "learning_rate": 0.001327963176064442, "loss": 0.7436, "step": 116800 }, { "epoch": 33.60471806674338, "grad_norm": 1.0512131452560425, "learning_rate": 0.0013279056386651322, "loss": 0.6586, "step": 116810 }, { "epoch": 33.607594936708864, "grad_norm": 1.4910576343536377, "learning_rate": 0.0013278481012658227, "loss": 0.6808, "step": 116820 }, { "epoch": 33.61047180667434, "grad_norm": 0.8124220371246338, "learning_rate": 0.0013277905638665133, "loss": 0.7445, "step": 116830 }, { "epoch": 33.613348676639816, "grad_norm": 1.3926920890808105, "learning_rate": 0.0013277330264672036, "loss": 0.7161, "step": 116840 }, { "epoch": 33.61622554660529, "grad_norm": 0.8710263967514038, "learning_rate": 0.0013276754890678942, "loss": 0.5805, "step": 116850 }, { "epoch": 33.61910241657077, "grad_norm": 1.5101813077926636, "learning_rate": 0.0013276179516685848, "loss": 0.7829, "step": 116860 }, { "epoch": 33.621979286536245, "grad_norm": 1.0747013092041016, "learning_rate": 0.001327560414269275, "loss": 0.7159, "step": 116870 }, { "epoch": 33.62485615650173, "grad_norm": 1.2653146982192993, "learning_rate": 0.0013275028768699655, "loss": 0.6162, "step": 116880 }, { "epoch": 33.627733026467205, "grad_norm": 2.1122353076934814, "learning_rate": 0.001327445339470656, "loss": 0.7356, "step": 116890 }, { "epoch": 33.63060989643268, "grad_norm": 1.2913272380828857, "learning_rate": 0.0013273878020713464, "loss": 0.6551, "step": 116900 }, { "epoch": 33.63348676639816, "grad_norm": 0.9237548112869263, "learning_rate": 0.001327330264672037, "loss": 0.6389, "step": 116910 }, { "epoch": 33.63636363636363, "grad_norm": 1.3731625080108643, "learning_rate": 0.0013272727272727275, "loss": 0.5534, "step": 116920 }, { "epoch": 33.639240506329116, "grad_norm": 1.1898193359375, "learning_rate": 0.0013272151898734176, "loss": 0.5609, "step": 116930 }, { "epoch": 33.64211737629459, "grad_norm": 1.8794445991516113, "learning_rate": 0.0013271576524741082, "loss": 0.5405, "step": 116940 }, { "epoch": 33.64499424626007, "grad_norm": 1.312808632850647, "learning_rate": 0.0013271001150747985, "loss": 0.6407, "step": 116950 }, { "epoch": 33.647871116225545, "grad_norm": 1.759677529335022, "learning_rate": 0.001327042577675489, "loss": 0.7411, "step": 116960 }, { "epoch": 33.65074798619102, "grad_norm": 1.1134241819381714, "learning_rate": 0.0013269850402761797, "loss": 0.6843, "step": 116970 }, { "epoch": 33.653624856156505, "grad_norm": 1.506299376487732, "learning_rate": 0.00132692750287687, "loss": 0.7169, "step": 116980 }, { "epoch": 33.65650172612198, "grad_norm": 2.344444751739502, "learning_rate": 0.0013268699654775604, "loss": 0.8151, "step": 116990 }, { "epoch": 33.65937859608746, "grad_norm": 1.0532366037368774, "learning_rate": 0.001326812428078251, "loss": 0.6258, "step": 117000 }, { "epoch": 33.66225546605293, "grad_norm": 1.2129108905792236, "learning_rate": 0.0013267548906789413, "loss": 0.6309, "step": 117010 }, { "epoch": 33.66513233601841, "grad_norm": 0.6259572505950928, "learning_rate": 0.0013266973532796318, "loss": 0.6711, "step": 117020 }, { "epoch": 33.66800920598389, "grad_norm": 1.7927780151367188, "learning_rate": 0.0013266398158803224, "loss": 0.7445, "step": 117030 }, { "epoch": 33.67088607594937, "grad_norm": 0.8307962417602539, "learning_rate": 0.0013265822784810128, "loss": 0.5714, "step": 117040 }, { "epoch": 33.673762945914845, "grad_norm": 1.5762903690338135, "learning_rate": 0.001326524741081703, "loss": 0.6479, "step": 117050 }, { "epoch": 33.67663981588032, "grad_norm": 1.1376217603683472, "learning_rate": 0.0013264672036823934, "loss": 0.7147, "step": 117060 }, { "epoch": 33.6795166858458, "grad_norm": 1.1558854579925537, "learning_rate": 0.001326409666283084, "loss": 0.7195, "step": 117070 }, { "epoch": 33.68239355581128, "grad_norm": 1.3318605422973633, "learning_rate": 0.0013263521288837746, "loss": 0.6115, "step": 117080 }, { "epoch": 33.68527042577676, "grad_norm": 1.491297960281372, "learning_rate": 0.001326294591484465, "loss": 0.641, "step": 117090 }, { "epoch": 33.68814729574223, "grad_norm": 1.4981579780578613, "learning_rate": 0.0013262370540851555, "loss": 0.6499, "step": 117100 }, { "epoch": 33.69102416570771, "grad_norm": 1.7018721103668213, "learning_rate": 0.0013261795166858458, "loss": 0.7538, "step": 117110 }, { "epoch": 33.693901035673186, "grad_norm": 0.7963976263999939, "learning_rate": 0.0013261219792865362, "loss": 0.6337, "step": 117120 }, { "epoch": 33.69677790563866, "grad_norm": 1.6492465734481812, "learning_rate": 0.0013260644418872267, "loss": 0.6005, "step": 117130 }, { "epoch": 33.699654775604145, "grad_norm": 0.857548177242279, "learning_rate": 0.0013260069044879173, "loss": 0.6542, "step": 117140 }, { "epoch": 33.70253164556962, "grad_norm": 0.7791717052459717, "learning_rate": 0.0013259493670886077, "loss": 0.6641, "step": 117150 }, { "epoch": 33.7054085155351, "grad_norm": 1.3090336322784424, "learning_rate": 0.0013258918296892982, "loss": 0.7411, "step": 117160 }, { "epoch": 33.708285385500574, "grad_norm": 1.0600084066390991, "learning_rate": 0.0013258342922899883, "loss": 0.628, "step": 117170 }, { "epoch": 33.71116225546605, "grad_norm": 0.9340768456459045, "learning_rate": 0.001325776754890679, "loss": 0.6647, "step": 117180 }, { "epoch": 33.71403912543153, "grad_norm": 1.0534284114837646, "learning_rate": 0.0013257192174913695, "loss": 0.6811, "step": 117190 }, { "epoch": 33.71691599539701, "grad_norm": 0.9104546904563904, "learning_rate": 0.0013256616800920598, "loss": 0.594, "step": 117200 }, { "epoch": 33.719792865362486, "grad_norm": 1.5096325874328613, "learning_rate": 0.0013256041426927504, "loss": 0.5315, "step": 117210 }, { "epoch": 33.72266973532796, "grad_norm": 0.8031327128410339, "learning_rate": 0.001325546605293441, "loss": 0.5668, "step": 117220 }, { "epoch": 33.72554660529344, "grad_norm": 0.9741936922073364, "learning_rate": 0.001325489067894131, "loss": 0.7665, "step": 117230 }, { "epoch": 33.72842347525892, "grad_norm": 0.816593587398529, "learning_rate": 0.0013254315304948216, "loss": 0.6981, "step": 117240 }, { "epoch": 33.7313003452244, "grad_norm": 0.6815898418426514, "learning_rate": 0.0013253739930955122, "loss": 0.5678, "step": 117250 }, { "epoch": 33.734177215189874, "grad_norm": 0.8689858913421631, "learning_rate": 0.0013253164556962026, "loss": 0.5963, "step": 117260 }, { "epoch": 33.73705408515535, "grad_norm": 1.5658538341522217, "learning_rate": 0.0013252589182968931, "loss": 0.5497, "step": 117270 }, { "epoch": 33.739930955120826, "grad_norm": 2.0221774578094482, "learning_rate": 0.0013252013808975832, "loss": 0.5813, "step": 117280 }, { "epoch": 33.74280782508631, "grad_norm": 1.2807207107543945, "learning_rate": 0.0013251438434982738, "loss": 0.7494, "step": 117290 }, { "epoch": 33.745684695051786, "grad_norm": 1.7550909519195557, "learning_rate": 0.0013250863060989644, "loss": 0.659, "step": 117300 }, { "epoch": 33.74856156501726, "grad_norm": 1.8475691080093384, "learning_rate": 0.0013250287686996547, "loss": 0.9359, "step": 117310 }, { "epoch": 33.75143843498274, "grad_norm": 1.150002121925354, "learning_rate": 0.0013249712313003453, "loss": 0.6765, "step": 117320 }, { "epoch": 33.754315304948214, "grad_norm": 0.8481449484825134, "learning_rate": 0.0013249136939010359, "loss": 0.6776, "step": 117330 }, { "epoch": 33.75719217491369, "grad_norm": 0.8357058763504028, "learning_rate": 0.001324856156501726, "loss": 0.7846, "step": 117340 }, { "epoch": 33.760069044879174, "grad_norm": 2.0789196491241455, "learning_rate": 0.0013247986191024165, "loss": 0.6803, "step": 117350 }, { "epoch": 33.76294591484465, "grad_norm": 0.7180538773536682, "learning_rate": 0.0013247410817031071, "loss": 0.6333, "step": 117360 }, { "epoch": 33.765822784810126, "grad_norm": 1.3989835977554321, "learning_rate": 0.0013246835443037975, "loss": 0.679, "step": 117370 }, { "epoch": 33.7686996547756, "grad_norm": 1.1150387525558472, "learning_rate": 0.001324626006904488, "loss": 0.6914, "step": 117380 }, { "epoch": 33.77157652474108, "grad_norm": 0.7113204598426819, "learning_rate": 0.0013245684695051784, "loss": 0.5198, "step": 117390 }, { "epoch": 33.77445339470656, "grad_norm": 1.1028962135314941, "learning_rate": 0.0013245109321058687, "loss": 0.5639, "step": 117400 }, { "epoch": 33.77733026467204, "grad_norm": 1.2070480585098267, "learning_rate": 0.0013244533947065593, "loss": 0.669, "step": 117410 }, { "epoch": 33.780207134637514, "grad_norm": 1.105207920074463, "learning_rate": 0.0013243958573072496, "loss": 0.6697, "step": 117420 }, { "epoch": 33.78308400460299, "grad_norm": 1.5117268562316895, "learning_rate": 0.0013243383199079402, "loss": 0.7818, "step": 117430 }, { "epoch": 33.78596087456847, "grad_norm": 0.9914703369140625, "learning_rate": 0.0013242807825086308, "loss": 0.6786, "step": 117440 }, { "epoch": 33.78883774453395, "grad_norm": 1.3251372575759888, "learning_rate": 0.001324223245109321, "loss": 0.8506, "step": 117450 }, { "epoch": 33.791714614499426, "grad_norm": 2.082612991333008, "learning_rate": 0.0013241657077100114, "loss": 0.9264, "step": 117460 }, { "epoch": 33.7945914844649, "grad_norm": 2.162004232406616, "learning_rate": 0.001324108170310702, "loss": 0.6556, "step": 117470 }, { "epoch": 33.79746835443038, "grad_norm": 1.4515656232833862, "learning_rate": 0.0013240506329113924, "loss": 0.6826, "step": 117480 }, { "epoch": 33.800345224395855, "grad_norm": 1.4029338359832764, "learning_rate": 0.001323993095512083, "loss": 0.7709, "step": 117490 }, { "epoch": 33.80322209436134, "grad_norm": 1.0406742095947266, "learning_rate": 0.0013239355581127733, "loss": 0.638, "step": 117500 }, { "epoch": 33.806098964326814, "grad_norm": 2.1548993587493896, "learning_rate": 0.0013238780207134638, "loss": 0.661, "step": 117510 }, { "epoch": 33.80897583429229, "grad_norm": 1.451284408569336, "learning_rate": 0.0013238204833141542, "loss": 0.6948, "step": 117520 }, { "epoch": 33.81185270425777, "grad_norm": 1.7602643966674805, "learning_rate": 0.0013237629459148445, "loss": 0.6003, "step": 117530 }, { "epoch": 33.81472957422324, "grad_norm": 0.8867735266685486, "learning_rate": 0.001323705408515535, "loss": 0.7375, "step": 117540 }, { "epoch": 33.81760644418872, "grad_norm": 1.493527889251709, "learning_rate": 0.0013236478711162257, "loss": 0.6458, "step": 117550 }, { "epoch": 33.8204833141542, "grad_norm": 1.0331352949142456, "learning_rate": 0.001323590333716916, "loss": 0.7157, "step": 117560 }, { "epoch": 33.82336018411968, "grad_norm": 1.1558518409729004, "learning_rate": 0.0013235327963176066, "loss": 0.6113, "step": 117570 }, { "epoch": 33.826237054085155, "grad_norm": 0.8675569295883179, "learning_rate": 0.001323475258918297, "loss": 0.6296, "step": 117580 }, { "epoch": 33.82911392405063, "grad_norm": 2.4278950691223145, "learning_rate": 0.0013234177215189873, "loss": 0.5489, "step": 117590 }, { "epoch": 33.83199079401611, "grad_norm": 1.2159992456436157, "learning_rate": 0.0013233601841196778, "loss": 0.7314, "step": 117600 }, { "epoch": 33.83486766398159, "grad_norm": 1.370303988456726, "learning_rate": 0.0013233026467203684, "loss": 0.6924, "step": 117610 }, { "epoch": 33.83774453394707, "grad_norm": 0.7063048481941223, "learning_rate": 0.0013232451093210587, "loss": 0.5777, "step": 117620 }, { "epoch": 33.84062140391254, "grad_norm": 1.6015150547027588, "learning_rate": 0.0013231875719217493, "loss": 0.5154, "step": 117630 }, { "epoch": 33.84349827387802, "grad_norm": 1.269412875175476, "learning_rate": 0.0013231300345224394, "loss": 0.5711, "step": 117640 }, { "epoch": 33.846375143843495, "grad_norm": 1.3810545206069946, "learning_rate": 0.00132307249712313, "loss": 0.6372, "step": 117650 }, { "epoch": 33.84925201380898, "grad_norm": 1.9813321828842163, "learning_rate": 0.0013230149597238206, "loss": 0.6525, "step": 117660 }, { "epoch": 33.852128883774455, "grad_norm": 1.3158966302871704, "learning_rate": 0.001322957422324511, "loss": 0.5883, "step": 117670 }, { "epoch": 33.85500575373993, "grad_norm": 0.900800347328186, "learning_rate": 0.0013228998849252015, "loss": 0.6674, "step": 117680 }, { "epoch": 33.85788262370541, "grad_norm": 0.943449079990387, "learning_rate": 0.001322842347525892, "loss": 0.4511, "step": 117690 }, { "epoch": 33.860759493670884, "grad_norm": 1.4194084405899048, "learning_rate": 0.0013227848101265822, "loss": 0.7124, "step": 117700 }, { "epoch": 33.86363636363637, "grad_norm": 0.5764732360839844, "learning_rate": 0.0013227272727272727, "loss": 0.5988, "step": 117710 }, { "epoch": 33.86651323360184, "grad_norm": 1.0818229913711548, "learning_rate": 0.0013226697353279633, "loss": 0.64, "step": 117720 }, { "epoch": 33.86939010356732, "grad_norm": 1.0082451105117798, "learning_rate": 0.0013226121979286536, "loss": 0.5572, "step": 117730 }, { "epoch": 33.872266973532795, "grad_norm": 1.1168978214263916, "learning_rate": 0.0013225546605293442, "loss": 0.7079, "step": 117740 }, { "epoch": 33.87514384349827, "grad_norm": 1.460389256477356, "learning_rate": 0.0013224971231300345, "loss": 0.699, "step": 117750 }, { "epoch": 33.878020713463755, "grad_norm": 1.7004806995391846, "learning_rate": 0.001322439585730725, "loss": 0.6158, "step": 117760 }, { "epoch": 33.88089758342923, "grad_norm": 2.3665215969085693, "learning_rate": 0.0013223820483314155, "loss": 0.6272, "step": 117770 }, { "epoch": 33.88377445339471, "grad_norm": 0.8419874906539917, "learning_rate": 0.0013223245109321058, "loss": 0.668, "step": 117780 }, { "epoch": 33.886651323360184, "grad_norm": 0.9352025389671326, "learning_rate": 0.0013222669735327964, "loss": 0.7626, "step": 117790 }, { "epoch": 33.88952819332566, "grad_norm": 1.355834722518921, "learning_rate": 0.001322209436133487, "loss": 0.6081, "step": 117800 }, { "epoch": 33.892405063291136, "grad_norm": 0.8985539078712463, "learning_rate": 0.0013221518987341773, "loss": 0.6636, "step": 117810 }, { "epoch": 33.89528193325662, "grad_norm": 1.251177191734314, "learning_rate": 0.0013220943613348676, "loss": 0.738, "step": 117820 }, { "epoch": 33.898158803222096, "grad_norm": 1.9891947507858276, "learning_rate": 0.0013220368239355582, "loss": 0.7787, "step": 117830 }, { "epoch": 33.90103567318757, "grad_norm": 1.1078256368637085, "learning_rate": 0.0013219792865362485, "loss": 0.6478, "step": 117840 }, { "epoch": 33.90391254315305, "grad_norm": 1.7073925733566284, "learning_rate": 0.001321921749136939, "loss": 0.6337, "step": 117850 }, { "epoch": 33.906789413118524, "grad_norm": 0.8837651014328003, "learning_rate": 0.0013218642117376295, "loss": 0.6899, "step": 117860 }, { "epoch": 33.90966628308401, "grad_norm": 1.363842487335205, "learning_rate": 0.00132180667433832, "loss": 0.8342, "step": 117870 }, { "epoch": 33.912543153049484, "grad_norm": 0.8890332579612732, "learning_rate": 0.0013217491369390104, "loss": 0.7065, "step": 117880 }, { "epoch": 33.91542002301496, "grad_norm": 0.8086684346199036, "learning_rate": 0.0013216915995397007, "loss": 0.7056, "step": 117890 }, { "epoch": 33.918296892980436, "grad_norm": 0.9469595551490784, "learning_rate": 0.0013216340621403913, "loss": 0.5978, "step": 117900 }, { "epoch": 33.92117376294591, "grad_norm": 1.3654305934906006, "learning_rate": 0.0013215765247410818, "loss": 0.6341, "step": 117910 }, { "epoch": 33.924050632911396, "grad_norm": 1.1237382888793945, "learning_rate": 0.0013215189873417722, "loss": 0.7265, "step": 117920 }, { "epoch": 33.92692750287687, "grad_norm": 1.0564417839050293, "learning_rate": 0.0013214614499424627, "loss": 0.6314, "step": 117930 }, { "epoch": 33.92980437284235, "grad_norm": 1.2956128120422363, "learning_rate": 0.001321403912543153, "loss": 0.7303, "step": 117940 }, { "epoch": 33.932681242807824, "grad_norm": 1.4492313861846924, "learning_rate": 0.0013213463751438434, "loss": 0.5728, "step": 117950 }, { "epoch": 33.9355581127733, "grad_norm": 1.2867684364318848, "learning_rate": 0.001321288837744534, "loss": 0.6716, "step": 117960 }, { "epoch": 33.938434982738784, "grad_norm": 1.1729705333709717, "learning_rate": 0.0013212313003452244, "loss": 0.5179, "step": 117970 }, { "epoch": 33.94131185270426, "grad_norm": 0.9591587781906128, "learning_rate": 0.001321173762945915, "loss": 0.6504, "step": 117980 }, { "epoch": 33.944188722669736, "grad_norm": 1.312394618988037, "learning_rate": 0.0013211162255466055, "loss": 0.6387, "step": 117990 }, { "epoch": 33.94706559263521, "grad_norm": 0.9151277542114258, "learning_rate": 0.0013210586881472956, "loss": 0.7345, "step": 118000 }, { "epoch": 33.94994246260069, "grad_norm": 1.5211204290390015, "learning_rate": 0.0013210011507479862, "loss": 0.4979, "step": 118010 }, { "epoch": 33.952819332566165, "grad_norm": 1.3607569932937622, "learning_rate": 0.0013209436133486767, "loss": 0.743, "step": 118020 }, { "epoch": 33.95569620253165, "grad_norm": 1.3509082794189453, "learning_rate": 0.001320886075949367, "loss": 0.5871, "step": 118030 }, { "epoch": 33.958573072497124, "grad_norm": 1.285957932472229, "learning_rate": 0.0013208285385500577, "loss": 0.5493, "step": 118040 }, { "epoch": 33.9614499424626, "grad_norm": 1.5580934286117554, "learning_rate": 0.0013207710011507482, "loss": 0.7104, "step": 118050 }, { "epoch": 33.96432681242808, "grad_norm": 1.230797290802002, "learning_rate": 0.0013207134637514383, "loss": 0.6722, "step": 118060 }, { "epoch": 33.96720368239355, "grad_norm": 1.6528400182724, "learning_rate": 0.001320655926352129, "loss": 0.5713, "step": 118070 }, { "epoch": 33.970080552359036, "grad_norm": 3.721674919128418, "learning_rate": 0.0013205983889528193, "loss": 0.7925, "step": 118080 }, { "epoch": 33.97295742232451, "grad_norm": 1.3230549097061157, "learning_rate": 0.0013205408515535098, "loss": 0.639, "step": 118090 }, { "epoch": 33.97583429228999, "grad_norm": 1.5791749954223633, "learning_rate": 0.0013204833141542004, "loss": 0.6207, "step": 118100 }, { "epoch": 33.978711162255465, "grad_norm": 1.072664499282837, "learning_rate": 0.0013204257767548905, "loss": 0.7541, "step": 118110 }, { "epoch": 33.98158803222094, "grad_norm": 2.4892892837524414, "learning_rate": 0.001320368239355581, "loss": 0.7523, "step": 118120 }, { "epoch": 33.984464902186424, "grad_norm": 1.4327021837234497, "learning_rate": 0.0013203107019562716, "loss": 0.6919, "step": 118130 }, { "epoch": 33.9873417721519, "grad_norm": 1.0834312438964844, "learning_rate": 0.001320253164556962, "loss": 0.5304, "step": 118140 }, { "epoch": 33.99021864211738, "grad_norm": 1.2642377614974976, "learning_rate": 0.0013201956271576526, "loss": 0.7387, "step": 118150 }, { "epoch": 33.99309551208285, "grad_norm": 0.765182375907898, "learning_rate": 0.0013201380897583431, "loss": 0.6645, "step": 118160 }, { "epoch": 33.99597238204833, "grad_norm": 1.3816149234771729, "learning_rate": 0.0013200805523590332, "loss": 0.8037, "step": 118170 }, { "epoch": 33.99884925201381, "grad_norm": 1.2270665168762207, "learning_rate": 0.0013200230149597238, "loss": 0.6168, "step": 118180 }, { "epoch": 34.00172612197929, "grad_norm": 1.1031004190444946, "learning_rate": 0.0013199654775604144, "loss": 0.5102, "step": 118190 }, { "epoch": 34.004602991944765, "grad_norm": 1.1167575120925903, "learning_rate": 0.0013199079401611047, "loss": 0.6777, "step": 118200 }, { "epoch": 34.00747986191024, "grad_norm": 0.8145797252655029, "learning_rate": 0.0013198504027617953, "loss": 0.6544, "step": 118210 }, { "epoch": 34.01035673187572, "grad_norm": 0.820701003074646, "learning_rate": 0.0013197928653624856, "loss": 0.4881, "step": 118220 }, { "epoch": 34.01323360184119, "grad_norm": 1.3373873233795166, "learning_rate": 0.001319735327963176, "loss": 0.5397, "step": 118230 }, { "epoch": 34.01611047180668, "grad_norm": 1.6535227298736572, "learning_rate": 0.0013196777905638665, "loss": 0.7695, "step": 118240 }, { "epoch": 34.01898734177215, "grad_norm": 1.6154282093048096, "learning_rate": 0.0013196202531645569, "loss": 0.6266, "step": 118250 }, { "epoch": 34.02186421173763, "grad_norm": 1.0712703466415405, "learning_rate": 0.0013195627157652475, "loss": 0.6312, "step": 118260 }, { "epoch": 34.024741081703105, "grad_norm": 0.9988464117050171, "learning_rate": 0.001319505178365938, "loss": 0.7398, "step": 118270 }, { "epoch": 34.02761795166858, "grad_norm": 1.8052833080291748, "learning_rate": 0.0013194476409666284, "loss": 0.6044, "step": 118280 }, { "epoch": 34.030494821634065, "grad_norm": 1.019490122795105, "learning_rate": 0.0013193901035673187, "loss": 0.5519, "step": 118290 }, { "epoch": 34.03337169159954, "grad_norm": 1.704163670539856, "learning_rate": 0.0013193325661680093, "loss": 0.606, "step": 118300 }, { "epoch": 34.03624856156502, "grad_norm": 1.9435889720916748, "learning_rate": 0.0013192750287686996, "loss": 0.6209, "step": 118310 }, { "epoch": 34.03912543153049, "grad_norm": 1.3242394924163818, "learning_rate": 0.0013192174913693902, "loss": 0.5885, "step": 118320 }, { "epoch": 34.04200230149597, "grad_norm": 0.9119540452957153, "learning_rate": 0.0013191599539700805, "loss": 0.6474, "step": 118330 }, { "epoch": 34.04487917146145, "grad_norm": 0.8943156599998474, "learning_rate": 0.001319102416570771, "loss": 0.7743, "step": 118340 }, { "epoch": 34.04775604142693, "grad_norm": 1.5775671005249023, "learning_rate": 0.0013190448791714614, "loss": 0.6295, "step": 118350 }, { "epoch": 34.050632911392405, "grad_norm": 1.24107825756073, "learning_rate": 0.0013189873417721518, "loss": 0.7235, "step": 118360 }, { "epoch": 34.05350978135788, "grad_norm": 1.6026180982589722, "learning_rate": 0.0013189298043728424, "loss": 0.8436, "step": 118370 }, { "epoch": 34.05638665132336, "grad_norm": 1.5517007112503052, "learning_rate": 0.001318872266973533, "loss": 0.6565, "step": 118380 }, { "epoch": 34.05926352128884, "grad_norm": 1.325469732284546, "learning_rate": 0.0013188147295742233, "loss": 0.5233, "step": 118390 }, { "epoch": 34.06214039125432, "grad_norm": 1.4431833028793335, "learning_rate": 0.0013187571921749138, "loss": 0.756, "step": 118400 }, { "epoch": 34.06501726121979, "grad_norm": 2.217402219772339, "learning_rate": 0.0013186996547756042, "loss": 0.7824, "step": 118410 }, { "epoch": 34.06789413118527, "grad_norm": 1.2265576124191284, "learning_rate": 0.0013186421173762945, "loss": 0.5437, "step": 118420 }, { "epoch": 34.070771001150746, "grad_norm": 2.0347542762756348, "learning_rate": 0.001318584579976985, "loss": 0.6005, "step": 118430 }, { "epoch": 34.07364787111622, "grad_norm": 1.4230722188949585, "learning_rate": 0.0013185270425776754, "loss": 0.5484, "step": 118440 }, { "epoch": 34.076524741081705, "grad_norm": 0.8283043503761292, "learning_rate": 0.001318469505178366, "loss": 0.6285, "step": 118450 }, { "epoch": 34.07940161104718, "grad_norm": 1.0843772888183594, "learning_rate": 0.0013184119677790566, "loss": 0.7211, "step": 118460 }, { "epoch": 34.08227848101266, "grad_norm": 0.7480962872505188, "learning_rate": 0.0013183544303797467, "loss": 0.6074, "step": 118470 }, { "epoch": 34.085155350978134, "grad_norm": 1.1447069644927979, "learning_rate": 0.0013182968929804373, "loss": 0.6105, "step": 118480 }, { "epoch": 34.08803222094361, "grad_norm": 1.0693646669387817, "learning_rate": 0.0013182393555811278, "loss": 0.5088, "step": 118490 }, { "epoch": 34.09090909090909, "grad_norm": 1.8005000352859497, "learning_rate": 0.0013181818181818182, "loss": 0.5857, "step": 118500 }, { "epoch": 34.09378596087457, "grad_norm": 1.5468240976333618, "learning_rate": 0.0013181242807825087, "loss": 0.5725, "step": 118510 }, { "epoch": 34.096662830840046, "grad_norm": 1.3415836095809937, "learning_rate": 0.0013180667433831993, "loss": 0.7328, "step": 118520 }, { "epoch": 34.09953970080552, "grad_norm": 1.5227705240249634, "learning_rate": 0.0013180092059838894, "loss": 0.6998, "step": 118530 }, { "epoch": 34.102416570771, "grad_norm": 1.9863808155059814, "learning_rate": 0.00131795166858458, "loss": 0.5039, "step": 118540 }, { "epoch": 34.10529344073648, "grad_norm": 1.152291178703308, "learning_rate": 0.0013178941311852703, "loss": 0.607, "step": 118550 }, { "epoch": 34.10817031070196, "grad_norm": 2.136603832244873, "learning_rate": 0.001317836593785961, "loss": 0.6584, "step": 118560 }, { "epoch": 34.111047180667434, "grad_norm": 1.4038770198822021, "learning_rate": 0.0013177790563866515, "loss": 0.6809, "step": 118570 }, { "epoch": 34.11392405063291, "grad_norm": 1.1591620445251465, "learning_rate": 0.0013177215189873418, "loss": 0.8019, "step": 118580 }, { "epoch": 34.116800920598386, "grad_norm": 1.082639217376709, "learning_rate": 0.0013176639815880322, "loss": 0.5559, "step": 118590 }, { "epoch": 34.11967779056387, "grad_norm": 0.7588828206062317, "learning_rate": 0.0013176064441887227, "loss": 0.545, "step": 118600 }, { "epoch": 34.122554660529346, "grad_norm": 1.4072556495666504, "learning_rate": 0.001317548906789413, "loss": 0.6105, "step": 118610 }, { "epoch": 34.12543153049482, "grad_norm": 1.5272341966629028, "learning_rate": 0.0013174913693901036, "loss": 0.6734, "step": 118620 }, { "epoch": 34.1283084004603, "grad_norm": 1.32057785987854, "learning_rate": 0.0013174338319907942, "loss": 0.6616, "step": 118630 }, { "epoch": 34.131185270425775, "grad_norm": 1.3222784996032715, "learning_rate": 0.0013173762945914845, "loss": 0.5547, "step": 118640 }, { "epoch": 34.13406214039125, "grad_norm": 1.453261137008667, "learning_rate": 0.001317318757192175, "loss": 0.5523, "step": 118650 }, { "epoch": 34.136939010356734, "grad_norm": 1.4032553434371948, "learning_rate": 0.0013172612197928652, "loss": 0.6961, "step": 118660 }, { "epoch": 34.13981588032221, "grad_norm": 1.0512884855270386, "learning_rate": 0.0013172036823935558, "loss": 0.7996, "step": 118670 }, { "epoch": 34.14269275028769, "grad_norm": 1.2561695575714111, "learning_rate": 0.0013171461449942464, "loss": 0.673, "step": 118680 }, { "epoch": 34.14556962025316, "grad_norm": 1.427371621131897, "learning_rate": 0.0013170886075949367, "loss": 0.6089, "step": 118690 }, { "epoch": 34.14844649021864, "grad_norm": 1.9697799682617188, "learning_rate": 0.0013170310701956273, "loss": 0.5701, "step": 118700 }, { "epoch": 34.15132336018412, "grad_norm": 1.3958301544189453, "learning_rate": 0.0013169735327963176, "loss": 0.5873, "step": 118710 }, { "epoch": 34.1542002301496, "grad_norm": 1.9504766464233398, "learning_rate": 0.001316915995397008, "loss": 0.5858, "step": 118720 }, { "epoch": 34.157077100115075, "grad_norm": 1.9571001529693604, "learning_rate": 0.0013168584579976985, "loss": 0.7516, "step": 118730 }, { "epoch": 34.15995397008055, "grad_norm": 0.8779451251029968, "learning_rate": 0.001316800920598389, "loss": 0.5907, "step": 118740 }, { "epoch": 34.16283084004603, "grad_norm": 1.8534880876541138, "learning_rate": 0.0013167433831990794, "loss": 0.675, "step": 118750 }, { "epoch": 34.16570771001151, "grad_norm": 2.2119081020355225, "learning_rate": 0.00131668584579977, "loss": 0.6368, "step": 118760 }, { "epoch": 34.16858457997699, "grad_norm": 1.3502957820892334, "learning_rate": 0.0013166283084004601, "loss": 0.4725, "step": 118770 }, { "epoch": 34.17146144994246, "grad_norm": 1.6276984214782715, "learning_rate": 0.0013165707710011507, "loss": 0.6393, "step": 118780 }, { "epoch": 34.17433831990794, "grad_norm": 1.4418977499008179, "learning_rate": 0.0013165132336018413, "loss": 0.7349, "step": 118790 }, { "epoch": 34.177215189873415, "grad_norm": 1.5139756202697754, "learning_rate": 0.0013164556962025316, "loss": 0.7327, "step": 118800 }, { "epoch": 34.1800920598389, "grad_norm": 0.9616110920906067, "learning_rate": 0.0013163981588032222, "loss": 0.5847, "step": 118810 }, { "epoch": 34.182968929804375, "grad_norm": 1.1030465364456177, "learning_rate": 0.0013163406214039127, "loss": 0.5574, "step": 118820 }, { "epoch": 34.18584579976985, "grad_norm": 1.6837413311004639, "learning_rate": 0.0013162830840046029, "loss": 0.6312, "step": 118830 }, { "epoch": 34.18872266973533, "grad_norm": 1.5722877979278564, "learning_rate": 0.0013162255466052934, "loss": 0.5484, "step": 118840 }, { "epoch": 34.1915995397008, "grad_norm": 0.9872090220451355, "learning_rate": 0.001316168009205984, "loss": 0.6446, "step": 118850 }, { "epoch": 34.19447640966629, "grad_norm": 0.9166339635848999, "learning_rate": 0.0013161104718066744, "loss": 0.533, "step": 118860 }, { "epoch": 34.19735327963176, "grad_norm": 2.3194525241851807, "learning_rate": 0.001316052934407365, "loss": 0.5683, "step": 118870 }, { "epoch": 34.20023014959724, "grad_norm": 0.9684630632400513, "learning_rate": 0.0013159953970080555, "loss": 0.7539, "step": 118880 }, { "epoch": 34.203107019562715, "grad_norm": 0.9969468712806702, "learning_rate": 0.0013159378596087456, "loss": 0.5636, "step": 118890 }, { "epoch": 34.20598388952819, "grad_norm": 1.3185361623764038, "learning_rate": 0.0013158803222094362, "loss": 0.58, "step": 118900 }, { "epoch": 34.20886075949367, "grad_norm": 1.628606915473938, "learning_rate": 0.0013158227848101265, "loss": 0.5942, "step": 118910 }, { "epoch": 34.21173762945915, "grad_norm": 1.0445587635040283, "learning_rate": 0.001315765247410817, "loss": 0.5642, "step": 118920 }, { "epoch": 34.21461449942463, "grad_norm": 1.0806082487106323, "learning_rate": 0.0013157077100115076, "loss": 0.6682, "step": 118930 }, { "epoch": 34.2174913693901, "grad_norm": 2.118865489959717, "learning_rate": 0.0013156501726121978, "loss": 0.5473, "step": 118940 }, { "epoch": 34.22036823935558, "grad_norm": 1.2152979373931885, "learning_rate": 0.0013155926352128883, "loss": 0.6439, "step": 118950 }, { "epoch": 34.223245109321056, "grad_norm": 1.761177897453308, "learning_rate": 0.001315535097813579, "loss": 0.6367, "step": 118960 }, { "epoch": 34.22612197928654, "grad_norm": 0.958247184753418, "learning_rate": 0.0013154775604142693, "loss": 0.4814, "step": 118970 }, { "epoch": 34.228998849252015, "grad_norm": 2.025035858154297, "learning_rate": 0.0013154200230149598, "loss": 0.5619, "step": 118980 }, { "epoch": 34.23187571921749, "grad_norm": 1.0385502576828003, "learning_rate": 0.0013153624856156504, "loss": 0.6579, "step": 118990 }, { "epoch": 34.23475258918297, "grad_norm": 1.1388514041900635, "learning_rate": 0.0013153049482163405, "loss": 0.6296, "step": 119000 }, { "epoch": 34.237629459148444, "grad_norm": 1.7091223001480103, "learning_rate": 0.001315247410817031, "loss": 0.7369, "step": 119010 }, { "epoch": 34.24050632911393, "grad_norm": 1.4640034437179565, "learning_rate": 0.0013151898734177214, "loss": 0.6013, "step": 119020 }, { "epoch": 34.2433831990794, "grad_norm": 0.9093284010887146, "learning_rate": 0.001315132336018412, "loss": 0.5899, "step": 119030 }, { "epoch": 34.24626006904488, "grad_norm": 1.932257890701294, "learning_rate": 0.0013150747986191026, "loss": 0.65, "step": 119040 }, { "epoch": 34.249136939010356, "grad_norm": 0.7926138639450073, "learning_rate": 0.001315017261219793, "loss": 0.5144, "step": 119050 }, { "epoch": 34.25201380897583, "grad_norm": 2.800631523132324, "learning_rate": 0.0013149597238204832, "loss": 0.7687, "step": 119060 }, { "epoch": 34.254890678941315, "grad_norm": 1.5199060440063477, "learning_rate": 0.0013149021864211738, "loss": 0.6466, "step": 119070 }, { "epoch": 34.25776754890679, "grad_norm": 1.3375672101974487, "learning_rate": 0.0013148446490218642, "loss": 0.4924, "step": 119080 }, { "epoch": 34.26064441887227, "grad_norm": 1.6296230554580688, "learning_rate": 0.0013147871116225547, "loss": 0.6948, "step": 119090 }, { "epoch": 34.263521288837744, "grad_norm": 0.8658024668693542, "learning_rate": 0.0013147295742232453, "loss": 0.6102, "step": 119100 }, { "epoch": 34.26639815880322, "grad_norm": 2.053048849105835, "learning_rate": 0.0013146720368239356, "loss": 0.7564, "step": 119110 }, { "epoch": 34.269275028768696, "grad_norm": 0.8367325663566589, "learning_rate": 0.001314614499424626, "loss": 0.6978, "step": 119120 }, { "epoch": 34.27215189873418, "grad_norm": 1.2205530405044556, "learning_rate": 0.0013145569620253163, "loss": 0.6617, "step": 119130 }, { "epoch": 34.275028768699656, "grad_norm": 1.2755755186080933, "learning_rate": 0.0013144994246260069, "loss": 0.7322, "step": 119140 }, { "epoch": 34.27790563866513, "grad_norm": 2.255525588989258, "learning_rate": 0.0013144418872266975, "loss": 0.801, "step": 119150 }, { "epoch": 34.28078250863061, "grad_norm": 1.2700616121292114, "learning_rate": 0.0013143843498273878, "loss": 0.6404, "step": 119160 }, { "epoch": 34.283659378596084, "grad_norm": 1.1312397718429565, "learning_rate": 0.0013143268124280784, "loss": 0.5875, "step": 119170 }, { "epoch": 34.28653624856157, "grad_norm": 1.6852163076400757, "learning_rate": 0.0013142692750287687, "loss": 0.6815, "step": 119180 }, { "epoch": 34.289413118527044, "grad_norm": 1.7908536195755005, "learning_rate": 0.001314211737629459, "loss": 0.5824, "step": 119190 }, { "epoch": 34.29228998849252, "grad_norm": 1.1959314346313477, "learning_rate": 0.0013141542002301496, "loss": 0.6419, "step": 119200 }, { "epoch": 34.295166858457996, "grad_norm": 0.8224475383758545, "learning_rate": 0.0013140966628308402, "loss": 0.5915, "step": 119210 }, { "epoch": 34.29804372842347, "grad_norm": 2.181823492050171, "learning_rate": 0.0013140391254315305, "loss": 0.6444, "step": 119220 }, { "epoch": 34.300920598388956, "grad_norm": 1.4722033739089966, "learning_rate": 0.001313981588032221, "loss": 0.5017, "step": 119230 }, { "epoch": 34.30379746835443, "grad_norm": 1.7575942277908325, "learning_rate": 0.0013139240506329112, "loss": 0.704, "step": 119240 }, { "epoch": 34.30667433831991, "grad_norm": 0.7497271299362183, "learning_rate": 0.0013138665132336018, "loss": 0.6803, "step": 119250 }, { "epoch": 34.309551208285384, "grad_norm": 1.2475109100341797, "learning_rate": 0.0013138089758342924, "loss": 0.7015, "step": 119260 }, { "epoch": 34.31242807825086, "grad_norm": 2.106342315673828, "learning_rate": 0.0013137514384349827, "loss": 0.6162, "step": 119270 }, { "epoch": 34.315304948216344, "grad_norm": 1.9447485208511353, "learning_rate": 0.0013136939010356733, "loss": 0.6714, "step": 119280 }, { "epoch": 34.31818181818182, "grad_norm": 1.0816185474395752, "learning_rate": 0.0013136363636363638, "loss": 0.5564, "step": 119290 }, { "epoch": 34.321058688147296, "grad_norm": 0.9316880106925964, "learning_rate": 0.001313578826237054, "loss": 0.6326, "step": 119300 }, { "epoch": 34.32393555811277, "grad_norm": 1.243416666984558, "learning_rate": 0.0013135212888377445, "loss": 0.7168, "step": 119310 }, { "epoch": 34.32681242807825, "grad_norm": 1.1551870107650757, "learning_rate": 0.001313463751438435, "loss": 0.5897, "step": 119320 }, { "epoch": 34.329689298043725, "grad_norm": 0.8012694120407104, "learning_rate": 0.0013134062140391254, "loss": 0.5231, "step": 119330 }, { "epoch": 34.33256616800921, "grad_norm": 2.2038285732269287, "learning_rate": 0.001313348676639816, "loss": 0.7802, "step": 119340 }, { "epoch": 34.335443037974684, "grad_norm": 1.042300820350647, "learning_rate": 0.0013132911392405063, "loss": 0.6101, "step": 119350 }, { "epoch": 34.33831990794016, "grad_norm": 2.2298402786254883, "learning_rate": 0.0013132336018411967, "loss": 0.7255, "step": 119360 }, { "epoch": 34.34119677790564, "grad_norm": 1.1358240842819214, "learning_rate": 0.0013131760644418873, "loss": 0.6876, "step": 119370 }, { "epoch": 34.34407364787111, "grad_norm": 1.679740309715271, "learning_rate": 0.0013131185270425776, "loss": 0.8511, "step": 119380 }, { "epoch": 34.346950517836596, "grad_norm": 1.4649937152862549, "learning_rate": 0.0013130609896432682, "loss": 0.6286, "step": 119390 }, { "epoch": 34.34982738780207, "grad_norm": 1.503507375717163, "learning_rate": 0.0013130034522439587, "loss": 0.6451, "step": 119400 }, { "epoch": 34.35270425776755, "grad_norm": 2.007131576538086, "learning_rate": 0.001312945914844649, "loss": 0.6208, "step": 119410 }, { "epoch": 34.355581127733025, "grad_norm": 1.7938014268875122, "learning_rate": 0.0013128883774453394, "loss": 0.551, "step": 119420 }, { "epoch": 34.3584579976985, "grad_norm": 1.047701120376587, "learning_rate": 0.00131283084004603, "loss": 0.7665, "step": 119430 }, { "epoch": 34.361334867663984, "grad_norm": 1.8308571577072144, "learning_rate": 0.0013127733026467203, "loss": 0.5562, "step": 119440 }, { "epoch": 34.36421173762946, "grad_norm": 0.8811420798301697, "learning_rate": 0.001312715765247411, "loss": 0.5418, "step": 119450 }, { "epoch": 34.36708860759494, "grad_norm": 0.8237721920013428, "learning_rate": 0.0013126582278481012, "loss": 0.5492, "step": 119460 }, { "epoch": 34.36996547756041, "grad_norm": 1.8103207349777222, "learning_rate": 0.0013126006904487918, "loss": 0.6479, "step": 119470 }, { "epoch": 34.37284234752589, "grad_norm": 1.2929493188858032, "learning_rate": 0.0013125431530494822, "loss": 0.7489, "step": 119480 }, { "epoch": 34.37571921749137, "grad_norm": 0.9502235651016235, "learning_rate": 0.0013124856156501725, "loss": 0.69, "step": 119490 }, { "epoch": 34.37859608745685, "grad_norm": 1.2738933563232422, "learning_rate": 0.001312428078250863, "loss": 0.4755, "step": 119500 }, { "epoch": 34.381472957422325, "grad_norm": 1.3407479524612427, "learning_rate": 0.0013123705408515536, "loss": 0.7121, "step": 119510 }, { "epoch": 34.3843498273878, "grad_norm": 1.8662337064743042, "learning_rate": 0.001312313003452244, "loss": 0.6508, "step": 119520 }, { "epoch": 34.38722669735328, "grad_norm": 1.4116324186325073, "learning_rate": 0.0013122554660529345, "loss": 0.6243, "step": 119530 }, { "epoch": 34.39010356731876, "grad_norm": 0.989515483379364, "learning_rate": 0.001312197928653625, "loss": 0.6577, "step": 119540 }, { "epoch": 34.39298043728424, "grad_norm": 1.3774064779281616, "learning_rate": 0.0013121403912543152, "loss": 0.5516, "step": 119550 }, { "epoch": 34.39585730724971, "grad_norm": 1.7245798110961914, "learning_rate": 0.0013120828538550058, "loss": 0.7594, "step": 119560 }, { "epoch": 34.39873417721519, "grad_norm": 2.4769434928894043, "learning_rate": 0.0013120253164556964, "loss": 0.8053, "step": 119570 }, { "epoch": 34.401611047180666, "grad_norm": 1.379536509513855, "learning_rate": 0.0013119677790563867, "loss": 0.7004, "step": 119580 }, { "epoch": 34.40448791714614, "grad_norm": 0.8438026309013367, "learning_rate": 0.0013119102416570773, "loss": 0.4997, "step": 119590 }, { "epoch": 34.407364787111625, "grad_norm": 1.1767252683639526, "learning_rate": 0.0013118527042577674, "loss": 0.5808, "step": 119600 }, { "epoch": 34.4102416570771, "grad_norm": 0.8889065384864807, "learning_rate": 0.001311795166858458, "loss": 0.5606, "step": 119610 }, { "epoch": 34.41311852704258, "grad_norm": 2.298675537109375, "learning_rate": 0.0013117376294591485, "loss": 0.6646, "step": 119620 }, { "epoch": 34.415995397008054, "grad_norm": 1.1356053352355957, "learning_rate": 0.0013116800920598389, "loss": 0.647, "step": 119630 }, { "epoch": 34.41887226697353, "grad_norm": 2.411416530609131, "learning_rate": 0.0013116225546605294, "loss": 0.5894, "step": 119640 }, { "epoch": 34.42174913693901, "grad_norm": 0.9432646036148071, "learning_rate": 0.00131156501726122, "loss": 0.5424, "step": 119650 }, { "epoch": 34.42462600690449, "grad_norm": 1.7890568971633911, "learning_rate": 0.0013115074798619101, "loss": 0.6452, "step": 119660 }, { "epoch": 34.427502876869966, "grad_norm": 1.3745685815811157, "learning_rate": 0.0013114499424626007, "loss": 0.6933, "step": 119670 }, { "epoch": 34.43037974683544, "grad_norm": 1.1230534315109253, "learning_rate": 0.0013113924050632913, "loss": 0.6386, "step": 119680 }, { "epoch": 34.43325661680092, "grad_norm": 1.2924636602401733, "learning_rate": 0.0013113348676639816, "loss": 0.5065, "step": 119690 }, { "epoch": 34.4361334867664, "grad_norm": 0.8913500308990479, "learning_rate": 0.0013112773302646722, "loss": 0.6397, "step": 119700 }, { "epoch": 34.43901035673188, "grad_norm": 0.784794270992279, "learning_rate": 0.0013112197928653623, "loss": 0.4877, "step": 119710 }, { "epoch": 34.441887226697354, "grad_norm": 1.471961259841919, "learning_rate": 0.0013111622554660529, "loss": 0.5546, "step": 119720 }, { "epoch": 34.44476409666283, "grad_norm": 1.2882647514343262, "learning_rate": 0.0013111047180667434, "loss": 0.6189, "step": 119730 }, { "epoch": 34.447640966628306, "grad_norm": 1.6093982458114624, "learning_rate": 0.0013110471806674338, "loss": 0.6571, "step": 119740 }, { "epoch": 34.45051783659379, "grad_norm": 1.878657579421997, "learning_rate": 0.0013109896432681243, "loss": 0.6263, "step": 119750 }, { "epoch": 34.453394706559266, "grad_norm": 1.3420480489730835, "learning_rate": 0.001310932105868815, "loss": 0.6698, "step": 119760 }, { "epoch": 34.45627157652474, "grad_norm": 1.3304463624954224, "learning_rate": 0.001310874568469505, "loss": 0.7585, "step": 119770 }, { "epoch": 34.45914844649022, "grad_norm": 0.8632087111473083, "learning_rate": 0.0013108170310701956, "loss": 0.6781, "step": 119780 }, { "epoch": 34.462025316455694, "grad_norm": 1.212212324142456, "learning_rate": 0.0013107594936708862, "loss": 0.6705, "step": 119790 }, { "epoch": 34.46490218642117, "grad_norm": 1.0865535736083984, "learning_rate": 0.0013107019562715765, "loss": 0.6902, "step": 119800 }, { "epoch": 34.467779056386654, "grad_norm": 2.687941312789917, "learning_rate": 0.001310644418872267, "loss": 0.576, "step": 119810 }, { "epoch": 34.47065592635213, "grad_norm": 1.1695666313171387, "learning_rate": 0.0013105868814729574, "loss": 0.6372, "step": 119820 }, { "epoch": 34.473532796317606, "grad_norm": 2.2298154830932617, "learning_rate": 0.0013105293440736478, "loss": 0.5873, "step": 119830 }, { "epoch": 34.47640966628308, "grad_norm": 1.2353873252868652, "learning_rate": 0.0013104718066743383, "loss": 0.623, "step": 119840 }, { "epoch": 34.47928653624856, "grad_norm": 1.9779001474380493, "learning_rate": 0.0013104142692750287, "loss": 0.7515, "step": 119850 }, { "epoch": 34.48216340621404, "grad_norm": 0.9890196919441223, "learning_rate": 0.0013103567318757193, "loss": 0.5726, "step": 119860 }, { "epoch": 34.48504027617952, "grad_norm": 0.9090538024902344, "learning_rate": 0.0013102991944764098, "loss": 0.6425, "step": 119870 }, { "epoch": 34.487917146144994, "grad_norm": 0.6471905708312988, "learning_rate": 0.0013102416570771002, "loss": 0.5824, "step": 119880 }, { "epoch": 34.49079401611047, "grad_norm": 1.1503241062164307, "learning_rate": 0.0013101841196777905, "loss": 0.6379, "step": 119890 }, { "epoch": 34.49367088607595, "grad_norm": 1.814849853515625, "learning_rate": 0.001310126582278481, "loss": 0.6683, "step": 119900 }, { "epoch": 34.49654775604143, "grad_norm": 1.08064603805542, "learning_rate": 0.0013100690448791714, "loss": 0.7158, "step": 119910 }, { "epoch": 34.499424626006906, "grad_norm": 1.7857078313827515, "learning_rate": 0.001310011507479862, "loss": 0.5716, "step": 119920 }, { "epoch": 34.50230149597238, "grad_norm": 1.162184238433838, "learning_rate": 0.0013099539700805523, "loss": 0.6035, "step": 119930 }, { "epoch": 34.50517836593786, "grad_norm": 1.2588322162628174, "learning_rate": 0.001309896432681243, "loss": 0.5477, "step": 119940 }, { "epoch": 34.508055235903335, "grad_norm": 1.0362082719802856, "learning_rate": 0.0013098388952819332, "loss": 0.5832, "step": 119950 }, { "epoch": 34.51093210586882, "grad_norm": 0.8838446736335754, "learning_rate": 0.0013097813578826236, "loss": 0.6522, "step": 119960 }, { "epoch": 34.513808975834294, "grad_norm": 0.7944849133491516, "learning_rate": 0.0013097238204833142, "loss": 1.0275, "step": 119970 }, { "epoch": 34.51668584579977, "grad_norm": 0.9329739809036255, "learning_rate": 0.0013096662830840047, "loss": 0.5682, "step": 119980 }, { "epoch": 34.51956271576525, "grad_norm": 0.9135277271270752, "learning_rate": 0.001309608745684695, "loss": 0.6252, "step": 119990 }, { "epoch": 34.52243958573072, "grad_norm": 1.1100965738296509, "learning_rate": 0.0013095512082853856, "loss": 0.5136, "step": 120000 }, { "epoch": 34.5253164556962, "grad_norm": 1.5812569856643677, "learning_rate": 0.001309493670886076, "loss": 0.6395, "step": 120010 }, { "epoch": 34.52819332566168, "grad_norm": 0.7508601546287537, "learning_rate": 0.0013094361334867663, "loss": 0.6199, "step": 120020 }, { "epoch": 34.53107019562716, "grad_norm": 1.387820839881897, "learning_rate": 0.0013093785960874569, "loss": 0.6349, "step": 120030 }, { "epoch": 34.533947065592635, "grad_norm": 0.9581232070922852, "learning_rate": 0.0013093210586881472, "loss": 0.609, "step": 120040 }, { "epoch": 34.53682393555811, "grad_norm": 1.2524017095565796, "learning_rate": 0.0013092635212888378, "loss": 0.7317, "step": 120050 }, { "epoch": 34.53970080552359, "grad_norm": 0.9681649208068848, "learning_rate": 0.0013092059838895284, "loss": 0.5726, "step": 120060 }, { "epoch": 34.54257767548907, "grad_norm": 1.6770265102386475, "learning_rate": 0.0013091484464902185, "loss": 0.5917, "step": 120070 }, { "epoch": 34.54545454545455, "grad_norm": 1.5520381927490234, "learning_rate": 0.001309090909090909, "loss": 0.6175, "step": 120080 }, { "epoch": 34.54833141542002, "grad_norm": 0.9467349648475647, "learning_rate": 0.0013090333716915996, "loss": 0.5327, "step": 120090 }, { "epoch": 34.5512082853855, "grad_norm": 1.3829381465911865, "learning_rate": 0.00130897583429229, "loss": 0.6369, "step": 120100 }, { "epoch": 34.554085155350975, "grad_norm": 1.2018705606460571, "learning_rate": 0.0013089182968929805, "loss": 0.6571, "step": 120110 }, { "epoch": 34.55696202531646, "grad_norm": 1.6750887632369995, "learning_rate": 0.001308860759493671, "loss": 0.6363, "step": 120120 }, { "epoch": 34.559838895281935, "grad_norm": 1.1273303031921387, "learning_rate": 0.0013088032220943612, "loss": 0.6349, "step": 120130 }, { "epoch": 34.56271576524741, "grad_norm": 1.275882601737976, "learning_rate": 0.0013087456846950518, "loss": 0.6182, "step": 120140 }, { "epoch": 34.56559263521289, "grad_norm": 2.1492559909820557, "learning_rate": 0.0013086881472957424, "loss": 0.8323, "step": 120150 }, { "epoch": 34.56846950517836, "grad_norm": 1.0366145372390747, "learning_rate": 0.0013086306098964327, "loss": 0.6502, "step": 120160 }, { "epoch": 34.57134637514385, "grad_norm": 1.896626353263855, "learning_rate": 0.0013085730724971233, "loss": 0.6353, "step": 120170 }, { "epoch": 34.57422324510932, "grad_norm": 1.1479376554489136, "learning_rate": 0.0013085155350978136, "loss": 0.7734, "step": 120180 }, { "epoch": 34.5771001150748, "grad_norm": 0.9747574329376221, "learning_rate": 0.001308457997698504, "loss": 0.6311, "step": 120190 }, { "epoch": 34.579976985040275, "grad_norm": 1.2012290954589844, "learning_rate": 0.0013084004602991945, "loss": 0.5723, "step": 120200 }, { "epoch": 34.58285385500575, "grad_norm": 1.4153754711151123, "learning_rate": 0.0013083429228998849, "loss": 0.9132, "step": 120210 }, { "epoch": 34.58573072497123, "grad_norm": 1.2519232034683228, "learning_rate": 0.0013082853855005754, "loss": 0.8846, "step": 120220 }, { "epoch": 34.58860759493671, "grad_norm": 1.636932611465454, "learning_rate": 0.001308227848101266, "loss": 0.6749, "step": 120230 }, { "epoch": 34.59148446490219, "grad_norm": 1.3962974548339844, "learning_rate": 0.0013081703107019563, "loss": 0.6084, "step": 120240 }, { "epoch": 34.59436133486766, "grad_norm": 0.9421695470809937, "learning_rate": 0.0013081127733026467, "loss": 0.6151, "step": 120250 }, { "epoch": 34.59723820483314, "grad_norm": 2.096024751663208, "learning_rate": 0.0013080552359033373, "loss": 0.5335, "step": 120260 }, { "epoch": 34.600115074798616, "grad_norm": 1.525837779045105, "learning_rate": 0.0013079976985040276, "loss": 0.7293, "step": 120270 }, { "epoch": 34.6029919447641, "grad_norm": 1.9133460521697998, "learning_rate": 0.0013079401611047182, "loss": 0.6678, "step": 120280 }, { "epoch": 34.605868814729575, "grad_norm": 1.1212592124938965, "learning_rate": 0.0013078826237054085, "loss": 0.6853, "step": 120290 }, { "epoch": 34.60874568469505, "grad_norm": 1.2844656705856323, "learning_rate": 0.001307825086306099, "loss": 0.7255, "step": 120300 }, { "epoch": 34.61162255466053, "grad_norm": 1.1268326044082642, "learning_rate": 0.0013077675489067894, "loss": 0.4988, "step": 120310 }, { "epoch": 34.614499424626004, "grad_norm": 0.7089065909385681, "learning_rate": 0.0013077100115074798, "loss": 0.7015, "step": 120320 }, { "epoch": 34.61737629459149, "grad_norm": 1.116255760192871, "learning_rate": 0.0013076524741081703, "loss": 0.6312, "step": 120330 }, { "epoch": 34.620253164556964, "grad_norm": 0.8240270614624023, "learning_rate": 0.001307594936708861, "loss": 0.6428, "step": 120340 }, { "epoch": 34.62313003452244, "grad_norm": 0.8762728571891785, "learning_rate": 0.0013075373993095512, "loss": 0.6344, "step": 120350 }, { "epoch": 34.626006904487916, "grad_norm": 0.7794961929321289, "learning_rate": 0.0013074798619102418, "loss": 0.6825, "step": 120360 }, { "epoch": 34.62888377445339, "grad_norm": 1.9668821096420288, "learning_rate": 0.0013074223245109322, "loss": 0.7195, "step": 120370 }, { "epoch": 34.631760644418875, "grad_norm": 0.7377554178237915, "learning_rate": 0.0013073647871116225, "loss": 0.5014, "step": 120380 }, { "epoch": 34.63463751438435, "grad_norm": 1.283095121383667, "learning_rate": 0.001307307249712313, "loss": 0.6691, "step": 120390 }, { "epoch": 34.63751438434983, "grad_norm": 1.5880146026611328, "learning_rate": 0.0013072497123130034, "loss": 0.8385, "step": 120400 }, { "epoch": 34.640391254315304, "grad_norm": 1.1645375490188599, "learning_rate": 0.001307192174913694, "loss": 0.6185, "step": 120410 }, { "epoch": 34.64326812428078, "grad_norm": 1.0692768096923828, "learning_rate": 0.0013071346375143845, "loss": 0.5598, "step": 120420 }, { "epoch": 34.64614499424626, "grad_norm": 1.0990360975265503, "learning_rate": 0.0013070771001150747, "loss": 0.7045, "step": 120430 }, { "epoch": 34.64902186421174, "grad_norm": 1.44371497631073, "learning_rate": 0.0013070195627157652, "loss": 0.7197, "step": 120440 }, { "epoch": 34.651898734177216, "grad_norm": 1.205014705657959, "learning_rate": 0.0013069620253164558, "loss": 0.6268, "step": 120450 }, { "epoch": 34.65477560414269, "grad_norm": 1.347331166267395, "learning_rate": 0.0013069044879171461, "loss": 0.5841, "step": 120460 }, { "epoch": 34.65765247410817, "grad_norm": 1.484697699546814, "learning_rate": 0.0013068469505178367, "loss": 0.626, "step": 120470 }, { "epoch": 34.660529344073645, "grad_norm": 1.6221894025802612, "learning_rate": 0.0013067894131185273, "loss": 0.6241, "step": 120480 }, { "epoch": 34.66340621403913, "grad_norm": 1.4751126766204834, "learning_rate": 0.0013067318757192174, "loss": 0.5608, "step": 120490 }, { "epoch": 34.666283084004604, "grad_norm": 0.9074457287788391, "learning_rate": 0.001306674338319908, "loss": 0.5918, "step": 120500 }, { "epoch": 34.66915995397008, "grad_norm": 1.1487361192703247, "learning_rate": 0.0013066168009205983, "loss": 0.7108, "step": 120510 }, { "epoch": 34.67203682393556, "grad_norm": 1.6856698989868164, "learning_rate": 0.0013065592635212889, "loss": 0.5277, "step": 120520 }, { "epoch": 34.67491369390103, "grad_norm": 1.15231192111969, "learning_rate": 0.0013065017261219794, "loss": 0.6889, "step": 120530 }, { "epoch": 34.677790563866516, "grad_norm": 0.8291967511177063, "learning_rate": 0.0013064441887226696, "loss": 0.5882, "step": 120540 }, { "epoch": 34.68066743383199, "grad_norm": 0.8466646671295166, "learning_rate": 0.0013063866513233601, "loss": 0.5777, "step": 120550 }, { "epoch": 34.68354430379747, "grad_norm": 0.9035428762435913, "learning_rate": 0.0013063291139240507, "loss": 0.5882, "step": 120560 }, { "epoch": 34.686421173762945, "grad_norm": 1.5202336311340332, "learning_rate": 0.001306271576524741, "loss": 0.675, "step": 120570 }, { "epoch": 34.68929804372842, "grad_norm": 1.0596258640289307, "learning_rate": 0.0013062140391254316, "loss": 0.5708, "step": 120580 }, { "epoch": 34.692174913693904, "grad_norm": 1.7150977849960327, "learning_rate": 0.0013061565017261222, "loss": 0.5973, "step": 120590 }, { "epoch": 34.69505178365938, "grad_norm": 1.7675055265426636, "learning_rate": 0.0013060989643268123, "loss": 0.771, "step": 120600 }, { "epoch": 34.69792865362486, "grad_norm": 1.5544403791427612, "learning_rate": 0.0013060414269275029, "loss": 0.7552, "step": 120610 }, { "epoch": 34.70080552359033, "grad_norm": 1.0310343503952026, "learning_rate": 0.0013059838895281932, "loss": 0.6384, "step": 120620 }, { "epoch": 34.70368239355581, "grad_norm": 1.1989802122116089, "learning_rate": 0.0013059263521288838, "loss": 0.6309, "step": 120630 }, { "epoch": 34.70655926352129, "grad_norm": 0.7760665416717529, "learning_rate": 0.0013058688147295743, "loss": 0.5355, "step": 120640 }, { "epoch": 34.70943613348677, "grad_norm": 1.3207319974899292, "learning_rate": 0.0013058112773302647, "loss": 0.5576, "step": 120650 }, { "epoch": 34.712313003452245, "grad_norm": 1.7724342346191406, "learning_rate": 0.001305753739930955, "loss": 0.5186, "step": 120660 }, { "epoch": 34.71518987341772, "grad_norm": 0.9870404005050659, "learning_rate": 0.0013056962025316456, "loss": 0.682, "step": 120670 }, { "epoch": 34.7180667433832, "grad_norm": 1.500105381011963, "learning_rate": 0.001305638665132336, "loss": 0.7751, "step": 120680 }, { "epoch": 34.72094361334867, "grad_norm": 1.8930325508117676, "learning_rate": 0.0013055811277330265, "loss": 0.6582, "step": 120690 }, { "epoch": 34.72382048331416, "grad_norm": 1.3060969114303589, "learning_rate": 0.001305523590333717, "loss": 0.6464, "step": 120700 }, { "epoch": 34.72669735327963, "grad_norm": 1.407688021659851, "learning_rate": 0.0013054660529344074, "loss": 0.6479, "step": 120710 }, { "epoch": 34.72957422324511, "grad_norm": 1.1762230396270752, "learning_rate": 0.0013054085155350978, "loss": 0.645, "step": 120720 }, { "epoch": 34.732451093210585, "grad_norm": 1.2734731435775757, "learning_rate": 0.0013053509781357881, "loss": 0.6352, "step": 120730 }, { "epoch": 34.73532796317606, "grad_norm": 1.4324818849563599, "learning_rate": 0.0013052934407364787, "loss": 0.5972, "step": 120740 }, { "epoch": 34.738204833141545, "grad_norm": 0.994727611541748, "learning_rate": 0.0013052359033371692, "loss": 0.6614, "step": 120750 }, { "epoch": 34.74108170310702, "grad_norm": 0.8106046319007874, "learning_rate": 0.0013051783659378596, "loss": 0.5985, "step": 120760 }, { "epoch": 34.7439585730725, "grad_norm": 2.2967286109924316, "learning_rate": 0.0013051208285385502, "loss": 0.4755, "step": 120770 }, { "epoch": 34.74683544303797, "grad_norm": 1.6038169860839844, "learning_rate": 0.0013050632911392405, "loss": 0.7801, "step": 120780 }, { "epoch": 34.74971231300345, "grad_norm": 0.8913998603820801, "learning_rate": 0.0013050057537399309, "loss": 0.529, "step": 120790 }, { "epoch": 34.75258918296893, "grad_norm": 1.740846872329712, "learning_rate": 0.0013049482163406214, "loss": 0.7388, "step": 120800 }, { "epoch": 34.75546605293441, "grad_norm": 1.646622896194458, "learning_rate": 0.001304890678941312, "loss": 0.5805, "step": 120810 }, { "epoch": 34.758342922899885, "grad_norm": 1.8985882997512817, "learning_rate": 0.0013048331415420023, "loss": 0.7175, "step": 120820 }, { "epoch": 34.76121979286536, "grad_norm": 2.0361790657043457, "learning_rate": 0.001304775604142693, "loss": 0.6029, "step": 120830 }, { "epoch": 34.76409666283084, "grad_norm": 1.449681043624878, "learning_rate": 0.0013047180667433832, "loss": 0.7275, "step": 120840 }, { "epoch": 34.76697353279632, "grad_norm": 0.9295435547828674, "learning_rate": 0.0013046605293440736, "loss": 0.7014, "step": 120850 }, { "epoch": 34.7698504027618, "grad_norm": 1.1145819425582886, "learning_rate": 0.0013046029919447642, "loss": 0.6488, "step": 120860 }, { "epoch": 34.77272727272727, "grad_norm": 1.1456811428070068, "learning_rate": 0.0013045454545454545, "loss": 0.6716, "step": 120870 }, { "epoch": 34.77560414269275, "grad_norm": 1.308536410331726, "learning_rate": 0.001304487917146145, "loss": 0.6779, "step": 120880 }, { "epoch": 34.778481012658226, "grad_norm": 1.3001540899276733, "learning_rate": 0.0013044303797468356, "loss": 0.8336, "step": 120890 }, { "epoch": 34.7813578826237, "grad_norm": 0.9710904955863953, "learning_rate": 0.0013043728423475258, "loss": 0.6448, "step": 120900 }, { "epoch": 34.784234752589185, "grad_norm": 0.9101234674453735, "learning_rate": 0.0013043153049482163, "loss": 0.5848, "step": 120910 }, { "epoch": 34.78711162255466, "grad_norm": 0.9357549548149109, "learning_rate": 0.0013042577675489069, "loss": 0.5656, "step": 120920 }, { "epoch": 34.78998849252014, "grad_norm": 1.1337929964065552, "learning_rate": 0.0013042002301495972, "loss": 0.8104, "step": 120930 }, { "epoch": 34.792865362485614, "grad_norm": 1.2341238260269165, "learning_rate": 0.0013041426927502878, "loss": 0.6102, "step": 120940 }, { "epoch": 34.79574223245109, "grad_norm": 1.7581268548965454, "learning_rate": 0.0013040851553509784, "loss": 0.7333, "step": 120950 }, { "epoch": 34.79861910241657, "grad_norm": 1.5039710998535156, "learning_rate": 0.0013040276179516685, "loss": 0.75, "step": 120960 }, { "epoch": 34.80149597238205, "grad_norm": 1.5605716705322266, "learning_rate": 0.001303970080552359, "loss": 0.6441, "step": 120970 }, { "epoch": 34.804372842347526, "grad_norm": 1.8431965112686157, "learning_rate": 0.0013039125431530494, "loss": 0.6136, "step": 120980 }, { "epoch": 34.807249712313, "grad_norm": 2.5670711994171143, "learning_rate": 0.00130385500575374, "loss": 0.731, "step": 120990 }, { "epoch": 34.81012658227848, "grad_norm": 2.8253989219665527, "learning_rate": 0.0013037974683544305, "loss": 0.6834, "step": 121000 }, { "epoch": 34.81300345224396, "grad_norm": 1.2853540182113647, "learning_rate": 0.0013037399309551209, "loss": 0.712, "step": 121010 }, { "epoch": 34.81588032220944, "grad_norm": 0.7102975845336914, "learning_rate": 0.0013036823935558112, "loss": 0.5965, "step": 121020 }, { "epoch": 34.818757192174914, "grad_norm": 1.56624174118042, "learning_rate": 0.0013036248561565018, "loss": 0.715, "step": 121030 }, { "epoch": 34.82163406214039, "grad_norm": 1.2366145849227905, "learning_rate": 0.0013035673187571921, "loss": 0.7378, "step": 121040 }, { "epoch": 34.824510932105866, "grad_norm": 1.5941758155822754, "learning_rate": 0.0013035097813578827, "loss": 0.7635, "step": 121050 }, { "epoch": 34.82738780207135, "grad_norm": 1.000996470451355, "learning_rate": 0.0013034522439585733, "loss": 0.6343, "step": 121060 }, { "epoch": 34.830264672036826, "grad_norm": 0.9014743566513062, "learning_rate": 0.0013033947065592636, "loss": 0.6555, "step": 121070 }, { "epoch": 34.8331415420023, "grad_norm": 1.1188334226608276, "learning_rate": 0.001303337169159954, "loss": 0.6433, "step": 121080 }, { "epoch": 34.83601841196778, "grad_norm": 1.1714622974395752, "learning_rate": 0.0013032796317606443, "loss": 0.6939, "step": 121090 }, { "epoch": 34.838895281933254, "grad_norm": 0.966219425201416, "learning_rate": 0.0013032220943613349, "loss": 0.4914, "step": 121100 }, { "epoch": 34.84177215189874, "grad_norm": 2.047881603240967, "learning_rate": 0.0013031645569620254, "loss": 0.665, "step": 121110 }, { "epoch": 34.844649021864214, "grad_norm": 2.1099965572357178, "learning_rate": 0.0013031070195627158, "loss": 0.6019, "step": 121120 }, { "epoch": 34.84752589182969, "grad_norm": 1.2196389436721802, "learning_rate": 0.0013030494821634063, "loss": 0.6748, "step": 121130 }, { "epoch": 34.850402761795166, "grad_norm": 1.2692257165908813, "learning_rate": 0.0013029919447640967, "loss": 0.6931, "step": 121140 }, { "epoch": 34.85327963176064, "grad_norm": 2.065145254135132, "learning_rate": 0.001302934407364787, "loss": 0.6712, "step": 121150 }, { "epoch": 34.85615650172612, "grad_norm": 1.3237957954406738, "learning_rate": 0.0013028768699654776, "loss": 0.554, "step": 121160 }, { "epoch": 34.8590333716916, "grad_norm": 1.1715950965881348, "learning_rate": 0.0013028193325661682, "loss": 0.6756, "step": 121170 }, { "epoch": 34.86191024165708, "grad_norm": 1.7932109832763672, "learning_rate": 0.0013027617951668585, "loss": 0.8364, "step": 121180 }, { "epoch": 34.864787111622555, "grad_norm": 1.8271172046661377, "learning_rate": 0.001302704257767549, "loss": 0.5843, "step": 121190 }, { "epoch": 34.86766398158803, "grad_norm": 1.8323242664337158, "learning_rate": 0.0013026467203682392, "loss": 0.8499, "step": 121200 }, { "epoch": 34.87054085155351, "grad_norm": 1.1916528940200806, "learning_rate": 0.0013025891829689298, "loss": 0.5891, "step": 121210 }, { "epoch": 34.87341772151899, "grad_norm": 1.229735255241394, "learning_rate": 0.0013025316455696203, "loss": 0.6915, "step": 121220 }, { "epoch": 34.876294591484466, "grad_norm": 0.9889194965362549, "learning_rate": 0.0013024741081703107, "loss": 0.5919, "step": 121230 }, { "epoch": 34.87917146144994, "grad_norm": 2.1639113426208496, "learning_rate": 0.0013024165707710012, "loss": 0.634, "step": 121240 }, { "epoch": 34.88204833141542, "grad_norm": 1.9243818521499634, "learning_rate": 0.0013023590333716918, "loss": 0.7454, "step": 121250 }, { "epoch": 34.884925201380895, "grad_norm": 1.2434022426605225, "learning_rate": 0.001302301495972382, "loss": 0.6566, "step": 121260 }, { "epoch": 34.88780207134638, "grad_norm": 1.9427638053894043, "learning_rate": 0.0013022439585730725, "loss": 0.5496, "step": 121270 }, { "epoch": 34.890678941311855, "grad_norm": 1.3665841817855835, "learning_rate": 0.001302186421173763, "loss": 0.7471, "step": 121280 }, { "epoch": 34.89355581127733, "grad_norm": 2.054069995880127, "learning_rate": 0.0013021288837744534, "loss": 0.7349, "step": 121290 }, { "epoch": 34.89643268124281, "grad_norm": 1.721956491470337, "learning_rate": 0.001302071346375144, "loss": 0.6725, "step": 121300 }, { "epoch": 34.89930955120828, "grad_norm": 1.3368446826934814, "learning_rate": 0.001302013808975834, "loss": 0.5731, "step": 121310 }, { "epoch": 34.90218642117377, "grad_norm": 2.7611300945281982, "learning_rate": 0.0013019562715765247, "loss": 0.7841, "step": 121320 }, { "epoch": 34.90506329113924, "grad_norm": 0.8362019062042236, "learning_rate": 0.0013018987341772152, "loss": 0.5805, "step": 121330 }, { "epoch": 34.90794016110472, "grad_norm": 0.97116619348526, "learning_rate": 0.0013018411967779056, "loss": 0.5792, "step": 121340 }, { "epoch": 34.910817031070195, "grad_norm": 1.2609399557113647, "learning_rate": 0.0013017836593785961, "loss": 0.7301, "step": 121350 }, { "epoch": 34.91369390103567, "grad_norm": 1.8114635944366455, "learning_rate": 0.0013017261219792867, "loss": 0.7344, "step": 121360 }, { "epoch": 34.91657077100115, "grad_norm": 1.7492631673812866, "learning_rate": 0.0013016685845799768, "loss": 0.6064, "step": 121370 }, { "epoch": 34.91944764096663, "grad_norm": 1.969617486000061, "learning_rate": 0.0013016110471806674, "loss": 0.6069, "step": 121380 }, { "epoch": 34.92232451093211, "grad_norm": 0.8180137276649475, "learning_rate": 0.001301553509781358, "loss": 0.5624, "step": 121390 }, { "epoch": 34.92520138089758, "grad_norm": 1.5236002206802368, "learning_rate": 0.0013014959723820483, "loss": 0.6406, "step": 121400 }, { "epoch": 34.92807825086306, "grad_norm": 1.8252251148223877, "learning_rate": 0.0013014384349827389, "loss": 0.5888, "step": 121410 }, { "epoch": 34.930955120828536, "grad_norm": 1.1817638874053955, "learning_rate": 0.0013013808975834292, "loss": 0.588, "step": 121420 }, { "epoch": 34.93383199079402, "grad_norm": 1.277163028717041, "learning_rate": 0.0013013233601841196, "loss": 0.6746, "step": 121430 }, { "epoch": 34.936708860759495, "grad_norm": 1.1609927415847778, "learning_rate": 0.0013012658227848101, "loss": 0.7052, "step": 121440 }, { "epoch": 34.93958573072497, "grad_norm": 1.034863829612732, "learning_rate": 0.0013012082853855005, "loss": 0.5588, "step": 121450 }, { "epoch": 34.94246260069045, "grad_norm": 2.1063480377197266, "learning_rate": 0.001301150747986191, "loss": 0.6118, "step": 121460 }, { "epoch": 34.945339470655924, "grad_norm": 2.557284355163574, "learning_rate": 0.0013010932105868816, "loss": 0.7053, "step": 121470 }, { "epoch": 34.94821634062141, "grad_norm": 1.9057430028915405, "learning_rate": 0.001301035673187572, "loss": 0.6333, "step": 121480 }, { "epoch": 34.95109321058688, "grad_norm": 1.1579536199569702, "learning_rate": 0.0013009781357882623, "loss": 0.7226, "step": 121490 }, { "epoch": 34.95397008055236, "grad_norm": 1.335109829902649, "learning_rate": 0.0013009205983889529, "loss": 0.677, "step": 121500 }, { "epoch": 34.956846950517836, "grad_norm": 1.3226318359375, "learning_rate": 0.0013008630609896432, "loss": 0.5867, "step": 121510 }, { "epoch": 34.95972382048331, "grad_norm": 1.79436194896698, "learning_rate": 0.0013008055235903338, "loss": 0.6775, "step": 121520 }, { "epoch": 34.962600690448795, "grad_norm": 1.277288794517517, "learning_rate": 0.0013007479861910243, "loss": 0.6864, "step": 121530 }, { "epoch": 34.96547756041427, "grad_norm": 0.8228813409805298, "learning_rate": 0.0013006904487917147, "loss": 0.7707, "step": 121540 }, { "epoch": 34.96835443037975, "grad_norm": 1.2337430715560913, "learning_rate": 0.001300632911392405, "loss": 0.5959, "step": 121550 }, { "epoch": 34.971231300345224, "grad_norm": 0.995926558971405, "learning_rate": 0.0013005753739930954, "loss": 0.5553, "step": 121560 }, { "epoch": 34.9741081703107, "grad_norm": 0.9422650933265686, "learning_rate": 0.001300517836593786, "loss": 0.6534, "step": 121570 }, { "epoch": 34.976985040276176, "grad_norm": 1.2565395832061768, "learning_rate": 0.0013004602991944765, "loss": 0.6008, "step": 121580 }, { "epoch": 34.97986191024166, "grad_norm": 1.0010828971862793, "learning_rate": 0.0013004027617951669, "loss": 0.6431, "step": 121590 }, { "epoch": 34.982738780207136, "grad_norm": 2.7721872329711914, "learning_rate": 0.0013003452243958574, "loss": 0.6985, "step": 121600 }, { "epoch": 34.98561565017261, "grad_norm": 1.5293853282928467, "learning_rate": 0.0013002876869965478, "loss": 0.5656, "step": 121610 }, { "epoch": 34.98849252013809, "grad_norm": 0.9336037039756775, "learning_rate": 0.0013002301495972381, "loss": 0.5402, "step": 121620 }, { "epoch": 34.991369390103564, "grad_norm": 1.6637194156646729, "learning_rate": 0.0013001726121979287, "loss": 0.631, "step": 121630 }, { "epoch": 34.99424626006905, "grad_norm": 1.9421420097351074, "learning_rate": 0.0013001150747986192, "loss": 0.5243, "step": 121640 }, { "epoch": 34.997123130034524, "grad_norm": 1.8298429250717163, "learning_rate": 0.0013000575373993096, "loss": 0.705, "step": 121650 }, { "epoch": 35.0, "grad_norm": 0.9582569599151611, "learning_rate": 0.0013000000000000002, "loss": 0.6899, "step": 121660 }, { "epoch": 35.002876869965476, "grad_norm": 1.1130037307739258, "learning_rate": 0.0012999424626006903, "loss": 0.6451, "step": 121670 }, { "epoch": 35.00575373993095, "grad_norm": 0.8486852049827576, "learning_rate": 0.0012998849252013809, "loss": 0.5811, "step": 121680 }, { "epoch": 35.008630609896436, "grad_norm": 1.1414252519607544, "learning_rate": 0.0012998273878020714, "loss": 0.582, "step": 121690 }, { "epoch": 35.01150747986191, "grad_norm": 0.7246752977371216, "learning_rate": 0.0012997698504027618, "loss": 0.6627, "step": 121700 }, { "epoch": 35.01438434982739, "grad_norm": 0.938604474067688, "learning_rate": 0.0012997123130034523, "loss": 0.5799, "step": 121710 }, { "epoch": 35.017261219792864, "grad_norm": 1.2110202312469482, "learning_rate": 0.0012996547756041429, "loss": 0.6352, "step": 121720 }, { "epoch": 35.02013808975834, "grad_norm": 1.9215483665466309, "learning_rate": 0.001299597238204833, "loss": 0.5091, "step": 121730 }, { "epoch": 35.023014959723824, "grad_norm": 1.2717543840408325, "learning_rate": 0.0012995397008055236, "loss": 0.7834, "step": 121740 }, { "epoch": 35.0258918296893, "grad_norm": 1.3050732612609863, "learning_rate": 0.0012994821634062141, "loss": 0.589, "step": 121750 }, { "epoch": 35.028768699654776, "grad_norm": 1.4088234901428223, "learning_rate": 0.0012994246260069045, "loss": 0.6382, "step": 121760 }, { "epoch": 35.03164556962025, "grad_norm": 1.0058587789535522, "learning_rate": 0.001299367088607595, "loss": 0.6436, "step": 121770 }, { "epoch": 35.03452243958573, "grad_norm": 1.2790679931640625, "learning_rate": 0.0012993095512082854, "loss": 0.7103, "step": 121780 }, { "epoch": 35.037399309551205, "grad_norm": 1.6372981071472168, "learning_rate": 0.0012992520138089758, "loss": 0.5158, "step": 121790 }, { "epoch": 35.04027617951669, "grad_norm": 2.8183343410491943, "learning_rate": 0.0012991944764096663, "loss": 0.7032, "step": 121800 }, { "epoch": 35.043153049482164, "grad_norm": 2.33980131149292, "learning_rate": 0.0012991369390103567, "loss": 0.6672, "step": 121810 }, { "epoch": 35.04602991944764, "grad_norm": 0.9811382293701172, "learning_rate": 0.0012990794016110472, "loss": 0.516, "step": 121820 }, { "epoch": 35.04890678941312, "grad_norm": 2.028693675994873, "learning_rate": 0.0012990218642117378, "loss": 0.5844, "step": 121830 }, { "epoch": 35.05178365937859, "grad_norm": 1.4033994674682617, "learning_rate": 0.0012989643268124281, "loss": 0.6459, "step": 121840 }, { "epoch": 35.054660529344076, "grad_norm": 0.9488550424575806, "learning_rate": 0.0012989067894131185, "loss": 0.6145, "step": 121850 }, { "epoch": 35.05753739930955, "grad_norm": 0.9210557341575623, "learning_rate": 0.001298849252013809, "loss": 0.486, "step": 121860 }, { "epoch": 35.06041426927503, "grad_norm": 1.129925012588501, "learning_rate": 0.0012987917146144994, "loss": 0.6544, "step": 121870 }, { "epoch": 35.063291139240505, "grad_norm": 0.8556455373764038, "learning_rate": 0.00129873417721519, "loss": 0.5477, "step": 121880 }, { "epoch": 35.06616800920598, "grad_norm": 0.8197180032730103, "learning_rate": 0.0012986766398158803, "loss": 0.6122, "step": 121890 }, { "epoch": 35.069044879171464, "grad_norm": 1.0541746616363525, "learning_rate": 0.0012986191024165709, "loss": 0.675, "step": 121900 }, { "epoch": 35.07192174913694, "grad_norm": 1.055965781211853, "learning_rate": 0.0012985615650172612, "loss": 0.5733, "step": 121910 }, { "epoch": 35.07479861910242, "grad_norm": 1.7409114837646484, "learning_rate": 0.0012985040276179516, "loss": 0.5335, "step": 121920 }, { "epoch": 35.07767548906789, "grad_norm": 1.8117976188659668, "learning_rate": 0.0012984464902186421, "loss": 0.5987, "step": 121930 }, { "epoch": 35.08055235903337, "grad_norm": 1.0815894603729248, "learning_rate": 0.0012983889528193327, "loss": 0.6023, "step": 121940 }, { "epoch": 35.08342922899885, "grad_norm": 1.5715008974075317, "learning_rate": 0.001298331415420023, "loss": 0.6893, "step": 121950 }, { "epoch": 35.08630609896433, "grad_norm": 1.016541838645935, "learning_rate": 0.0012982738780207136, "loss": 0.6463, "step": 121960 }, { "epoch": 35.089182968929805, "grad_norm": 1.3769406080245972, "learning_rate": 0.001298216340621404, "loss": 0.64, "step": 121970 }, { "epoch": 35.09205983889528, "grad_norm": 2.8884639739990234, "learning_rate": 0.0012981588032220943, "loss": 0.6354, "step": 121980 }, { "epoch": 35.09493670886076, "grad_norm": 1.9884024858474731, "learning_rate": 0.0012981012658227849, "loss": 0.6536, "step": 121990 }, { "epoch": 35.09781357882623, "grad_norm": 0.7022340297698975, "learning_rate": 0.0012980437284234752, "loss": 0.4892, "step": 122000 }, { "epoch": 35.10069044879172, "grad_norm": 2.8582704067230225, "learning_rate": 0.0012979861910241658, "loss": 0.6295, "step": 122010 }, { "epoch": 35.10356731875719, "grad_norm": 1.8430525064468384, "learning_rate": 0.0012979286536248563, "loss": 0.7919, "step": 122020 }, { "epoch": 35.10644418872267, "grad_norm": 1.064498782157898, "learning_rate": 0.0012978711162255465, "loss": 0.6996, "step": 122030 }, { "epoch": 35.109321058688145, "grad_norm": 0.8046554923057556, "learning_rate": 0.001297813578826237, "loss": 0.5751, "step": 122040 }, { "epoch": 35.11219792865362, "grad_norm": 0.8820590376853943, "learning_rate": 0.0012977560414269276, "loss": 0.5853, "step": 122050 }, { "epoch": 35.115074798619105, "grad_norm": 0.9241273403167725, "learning_rate": 0.001297698504027618, "loss": 0.6438, "step": 122060 }, { "epoch": 35.11795166858458, "grad_norm": 1.4218823909759521, "learning_rate": 0.0012976409666283085, "loss": 0.5961, "step": 122070 }, { "epoch": 35.12082853855006, "grad_norm": 1.0548174381256104, "learning_rate": 0.001297583429228999, "loss": 0.6539, "step": 122080 }, { "epoch": 35.123705408515534, "grad_norm": 1.446840524673462, "learning_rate": 0.0012975258918296892, "loss": 0.7775, "step": 122090 }, { "epoch": 35.12658227848101, "grad_norm": 1.7569552659988403, "learning_rate": 0.0012974683544303798, "loss": 0.6198, "step": 122100 }, { "epoch": 35.12945914844649, "grad_norm": 1.4630908966064453, "learning_rate": 0.0012974108170310701, "loss": 0.4365, "step": 122110 }, { "epoch": 35.13233601841197, "grad_norm": 1.3255215883255005, "learning_rate": 0.0012973532796317607, "loss": 0.579, "step": 122120 }, { "epoch": 35.135212888377445, "grad_norm": 1.3204537630081177, "learning_rate": 0.0012972957422324512, "loss": 0.6275, "step": 122130 }, { "epoch": 35.13808975834292, "grad_norm": 1.258529782295227, "learning_rate": 0.0012972382048331414, "loss": 0.8013, "step": 122140 }, { "epoch": 35.1409666283084, "grad_norm": 1.6989294290542603, "learning_rate": 0.001297180667433832, "loss": 0.6102, "step": 122150 }, { "epoch": 35.14384349827388, "grad_norm": 1.2874289751052856, "learning_rate": 0.0012971231300345225, "loss": 0.7376, "step": 122160 }, { "epoch": 35.14672036823936, "grad_norm": 1.325655221939087, "learning_rate": 0.0012970655926352128, "loss": 0.6775, "step": 122170 }, { "epoch": 35.149597238204834, "grad_norm": 1.0928518772125244, "learning_rate": 0.0012970080552359034, "loss": 0.6275, "step": 122180 }, { "epoch": 35.15247410817031, "grad_norm": 1.5320405960083008, "learning_rate": 0.001296950517836594, "loss": 0.5278, "step": 122190 }, { "epoch": 35.155350978135786, "grad_norm": 1.80951726436615, "learning_rate": 0.001296892980437284, "loss": 0.5562, "step": 122200 }, { "epoch": 35.15822784810127, "grad_norm": 1.6553435325622559, "learning_rate": 0.0012968354430379747, "loss": 0.679, "step": 122210 }, { "epoch": 35.161104718066746, "grad_norm": 0.7890986204147339, "learning_rate": 0.0012967779056386652, "loss": 0.8052, "step": 122220 }, { "epoch": 35.16398158803222, "grad_norm": 1.57562255859375, "learning_rate": 0.0012967203682393556, "loss": 0.4676, "step": 122230 }, { "epoch": 35.1668584579977, "grad_norm": 1.9046131372451782, "learning_rate": 0.0012966628308400461, "loss": 0.6257, "step": 122240 }, { "epoch": 35.169735327963174, "grad_norm": 2.0021629333496094, "learning_rate": 0.0012966052934407365, "loss": 0.6262, "step": 122250 }, { "epoch": 35.17261219792865, "grad_norm": 1.0232189893722534, "learning_rate": 0.0012965477560414268, "loss": 0.6543, "step": 122260 }, { "epoch": 35.175489067894134, "grad_norm": 1.40536630153656, "learning_rate": 0.0012964902186421174, "loss": 0.5429, "step": 122270 }, { "epoch": 35.17836593785961, "grad_norm": 1.393646001815796, "learning_rate": 0.0012964326812428077, "loss": 0.6514, "step": 122280 }, { "epoch": 35.181242807825086, "grad_norm": 3.8474981784820557, "learning_rate": 0.0012963751438434983, "loss": 0.6051, "step": 122290 }, { "epoch": 35.18411967779056, "grad_norm": 0.6948934197425842, "learning_rate": 0.0012963176064441889, "loss": 0.6202, "step": 122300 }, { "epoch": 35.18699654775604, "grad_norm": 1.7925440073013306, "learning_rate": 0.0012962600690448792, "loss": 0.5795, "step": 122310 }, { "epoch": 35.18987341772152, "grad_norm": 1.328122854232788, "learning_rate": 0.0012962025316455696, "loss": 0.5265, "step": 122320 }, { "epoch": 35.192750287687, "grad_norm": 1.6612646579742432, "learning_rate": 0.0012961449942462601, "loss": 0.7356, "step": 122330 }, { "epoch": 35.195627157652474, "grad_norm": 2.094283103942871, "learning_rate": 0.0012960874568469505, "loss": 0.6724, "step": 122340 }, { "epoch": 35.19850402761795, "grad_norm": 1.020207405090332, "learning_rate": 0.001296029919447641, "loss": 0.5779, "step": 122350 }, { "epoch": 35.20138089758343, "grad_norm": 1.6502635478973389, "learning_rate": 0.0012959723820483314, "loss": 0.6827, "step": 122360 }, { "epoch": 35.20425776754891, "grad_norm": 1.3087120056152344, "learning_rate": 0.001295914844649022, "loss": 0.5891, "step": 122370 }, { "epoch": 35.207134637514386, "grad_norm": 1.1609187126159668, "learning_rate": 0.0012958573072497123, "loss": 0.6152, "step": 122380 }, { "epoch": 35.21001150747986, "grad_norm": 1.1880351305007935, "learning_rate": 0.0012957997698504026, "loss": 0.6065, "step": 122390 }, { "epoch": 35.21288837744534, "grad_norm": 1.8155354261398315, "learning_rate": 0.0012957422324510932, "loss": 0.6275, "step": 122400 }, { "epoch": 35.215765247410815, "grad_norm": 1.1488854885101318, "learning_rate": 0.0012956846950517838, "loss": 0.6164, "step": 122410 }, { "epoch": 35.2186421173763, "grad_norm": 0.7840193510055542, "learning_rate": 0.0012956271576524741, "loss": 0.6349, "step": 122420 }, { "epoch": 35.221518987341774, "grad_norm": 0.7066742777824402, "learning_rate": 0.0012955696202531647, "loss": 0.5122, "step": 122430 }, { "epoch": 35.22439585730725, "grad_norm": 1.147070050239563, "learning_rate": 0.001295512082853855, "loss": 0.6028, "step": 122440 }, { "epoch": 35.22727272727273, "grad_norm": 1.432902455329895, "learning_rate": 0.0012954545454545454, "loss": 0.6899, "step": 122450 }, { "epoch": 35.2301495972382, "grad_norm": 1.0022832155227661, "learning_rate": 0.001295397008055236, "loss": 0.6202, "step": 122460 }, { "epoch": 35.23302646720368, "grad_norm": 1.4978725910186768, "learning_rate": 0.0012953394706559263, "loss": 0.6328, "step": 122470 }, { "epoch": 35.23590333716916, "grad_norm": 1.2430126667022705, "learning_rate": 0.0012952819332566169, "loss": 0.6305, "step": 122480 }, { "epoch": 35.23878020713464, "grad_norm": 1.0584145784378052, "learning_rate": 0.0012952243958573074, "loss": 0.5297, "step": 122490 }, { "epoch": 35.241657077100115, "grad_norm": 1.4737824201583862, "learning_rate": 0.0012951668584579976, "loss": 0.6465, "step": 122500 }, { "epoch": 35.24453394706559, "grad_norm": 2.0606160163879395, "learning_rate": 0.0012951093210586881, "loss": 0.6096, "step": 122510 }, { "epoch": 35.24741081703107, "grad_norm": 1.183349370956421, "learning_rate": 0.0012950517836593787, "loss": 0.7673, "step": 122520 }, { "epoch": 35.25028768699655, "grad_norm": 1.641047477722168, "learning_rate": 0.001294994246260069, "loss": 0.5135, "step": 122530 }, { "epoch": 35.25316455696203, "grad_norm": 1.055256962776184, "learning_rate": 0.0012949367088607596, "loss": 0.7467, "step": 122540 }, { "epoch": 35.2560414269275, "grad_norm": 0.8519513607025146, "learning_rate": 0.0012948791714614502, "loss": 0.5732, "step": 122550 }, { "epoch": 35.25891829689298, "grad_norm": 2.2363126277923584, "learning_rate": 0.0012948216340621403, "loss": 0.5564, "step": 122560 }, { "epoch": 35.261795166858455, "grad_norm": 0.8527117967605591, "learning_rate": 0.0012947640966628308, "loss": 0.5949, "step": 122570 }, { "epoch": 35.26467203682394, "grad_norm": 1.0033376216888428, "learning_rate": 0.0012947065592635212, "loss": 0.6081, "step": 122580 }, { "epoch": 35.267548906789415, "grad_norm": 3.297987937927246, "learning_rate": 0.0012946490218642118, "loss": 0.6654, "step": 122590 }, { "epoch": 35.27042577675489, "grad_norm": 0.8274513483047485, "learning_rate": 0.0012945914844649023, "loss": 0.6706, "step": 122600 }, { "epoch": 35.27330264672037, "grad_norm": 2.2128331661224365, "learning_rate": 0.0012945339470655927, "loss": 0.8768, "step": 122610 }, { "epoch": 35.27617951668584, "grad_norm": 1.7958418130874634, "learning_rate": 0.001294476409666283, "loss": 0.6393, "step": 122620 }, { "epoch": 35.27905638665133, "grad_norm": 1.157252311706543, "learning_rate": 0.0012944188722669736, "loss": 0.5755, "step": 122630 }, { "epoch": 35.2819332566168, "grad_norm": 1.1315839290618896, "learning_rate": 0.001294361334867664, "loss": 0.6165, "step": 122640 }, { "epoch": 35.28481012658228, "grad_norm": 1.6254483461380005, "learning_rate": 0.0012943037974683545, "loss": 0.5388, "step": 122650 }, { "epoch": 35.287686996547755, "grad_norm": 2.02685284614563, "learning_rate": 0.001294246260069045, "loss": 0.6417, "step": 122660 }, { "epoch": 35.29056386651323, "grad_norm": 1.186818242073059, "learning_rate": 0.0012941887226697354, "loss": 0.7148, "step": 122670 }, { "epoch": 35.29344073647871, "grad_norm": 1.0991997718811035, "learning_rate": 0.0012941311852704258, "loss": 0.6173, "step": 122680 }, { "epoch": 35.29631760644419, "grad_norm": 1.0623451471328735, "learning_rate": 0.001294073647871116, "loss": 0.5567, "step": 122690 }, { "epoch": 35.29919447640967, "grad_norm": 2.0326333045959473, "learning_rate": 0.0012940161104718067, "loss": 0.6098, "step": 122700 }, { "epoch": 35.30207134637514, "grad_norm": 1.491912841796875, "learning_rate": 0.0012939585730724972, "loss": 0.6844, "step": 122710 }, { "epoch": 35.30494821634062, "grad_norm": 1.341660499572754, "learning_rate": 0.0012939010356731876, "loss": 0.7018, "step": 122720 }, { "epoch": 35.307825086306096, "grad_norm": 1.19025456905365, "learning_rate": 0.0012938434982738781, "loss": 0.5356, "step": 122730 }, { "epoch": 35.31070195627158, "grad_norm": 0.9409652948379517, "learning_rate": 0.0012937859608745685, "loss": 0.5602, "step": 122740 }, { "epoch": 35.313578826237055, "grad_norm": 1.468011736869812, "learning_rate": 0.0012937284234752588, "loss": 0.5785, "step": 122750 }, { "epoch": 35.31645569620253, "grad_norm": 1.9243134260177612, "learning_rate": 0.0012936708860759494, "loss": 0.6033, "step": 122760 }, { "epoch": 35.31933256616801, "grad_norm": 1.6417948007583618, "learning_rate": 0.00129361334867664, "loss": 0.5673, "step": 122770 }, { "epoch": 35.322209436133484, "grad_norm": 1.054665207862854, "learning_rate": 0.0012935558112773303, "loss": 0.6611, "step": 122780 }, { "epoch": 35.32508630609897, "grad_norm": 1.2426743507385254, "learning_rate": 0.0012934982738780209, "loss": 0.7031, "step": 122790 }, { "epoch": 35.32796317606444, "grad_norm": 0.9558032751083374, "learning_rate": 0.0012934407364787112, "loss": 0.6585, "step": 122800 }, { "epoch": 35.33084004602992, "grad_norm": 1.4124691486358643, "learning_rate": 0.0012933831990794016, "loss": 0.6634, "step": 122810 }, { "epoch": 35.333716915995396, "grad_norm": 1.307566523551941, "learning_rate": 0.0012933256616800921, "loss": 0.803, "step": 122820 }, { "epoch": 35.33659378596087, "grad_norm": 1.8988864421844482, "learning_rate": 0.0012932681242807825, "loss": 0.5679, "step": 122830 }, { "epoch": 35.339470655926355, "grad_norm": 1.5030022859573364, "learning_rate": 0.001293210586881473, "loss": 0.6832, "step": 122840 }, { "epoch": 35.34234752589183, "grad_norm": 1.2074079513549805, "learning_rate": 0.0012931530494821636, "loss": 0.5453, "step": 122850 }, { "epoch": 35.34522439585731, "grad_norm": 1.336946725845337, "learning_rate": 0.0012930955120828537, "loss": 0.8248, "step": 122860 }, { "epoch": 35.348101265822784, "grad_norm": 0.6456385850906372, "learning_rate": 0.0012930379746835443, "loss": 0.5751, "step": 122870 }, { "epoch": 35.35097813578826, "grad_norm": 1.0474269390106201, "learning_rate": 0.0012929804372842349, "loss": 0.4717, "step": 122880 }, { "epoch": 35.353855005753736, "grad_norm": 1.3933500051498413, "learning_rate": 0.0012929228998849252, "loss": 0.6845, "step": 122890 }, { "epoch": 35.35673187571922, "grad_norm": 1.2623417377471924, "learning_rate": 0.0012928653624856158, "loss": 0.714, "step": 122900 }, { "epoch": 35.359608745684696, "grad_norm": 3.6041829586029053, "learning_rate": 0.0012928078250863063, "loss": 0.7222, "step": 122910 }, { "epoch": 35.36248561565017, "grad_norm": 1.9292728900909424, "learning_rate": 0.0012927502876869965, "loss": 0.6346, "step": 122920 }, { "epoch": 35.36536248561565, "grad_norm": 0.7779126167297363, "learning_rate": 0.001292692750287687, "loss": 0.6282, "step": 122930 }, { "epoch": 35.368239355581125, "grad_norm": 2.489612579345703, "learning_rate": 0.0012926352128883774, "loss": 0.633, "step": 122940 }, { "epoch": 35.37111622554661, "grad_norm": 1.016923189163208, "learning_rate": 0.001292577675489068, "loss": 0.5487, "step": 122950 }, { "epoch": 35.373993095512084, "grad_norm": 1.0411280393600464, "learning_rate": 0.0012925201380897585, "loss": 0.5887, "step": 122960 }, { "epoch": 35.37686996547756, "grad_norm": 0.998267412185669, "learning_rate": 0.0012924626006904486, "loss": 0.6151, "step": 122970 }, { "epoch": 35.379746835443036, "grad_norm": 1.2822493314743042, "learning_rate": 0.0012924050632911392, "loss": 0.5676, "step": 122980 }, { "epoch": 35.38262370540851, "grad_norm": 0.8974433541297913, "learning_rate": 0.0012923475258918298, "loss": 0.5942, "step": 122990 }, { "epoch": 35.385500575373996, "grad_norm": 2.743154525756836, "learning_rate": 0.0012922899884925201, "loss": 0.7008, "step": 123000 }, { "epoch": 35.38837744533947, "grad_norm": 1.8362069129943848, "learning_rate": 0.0012922324510932107, "loss": 0.6417, "step": 123010 }, { "epoch": 35.39125431530495, "grad_norm": 1.245694637298584, "learning_rate": 0.0012921749136939012, "loss": 0.6186, "step": 123020 }, { "epoch": 35.394131185270425, "grad_norm": 1.185023307800293, "learning_rate": 0.0012921173762945914, "loss": 0.715, "step": 123030 }, { "epoch": 35.3970080552359, "grad_norm": 0.807199239730835, "learning_rate": 0.001292059838895282, "loss": 0.5705, "step": 123040 }, { "epoch": 35.399884925201384, "grad_norm": 1.71426522731781, "learning_rate": 0.0012920023014959723, "loss": 0.713, "step": 123050 }, { "epoch": 35.40276179516686, "grad_norm": 1.4241142272949219, "learning_rate": 0.0012919447640966628, "loss": 0.5407, "step": 123060 }, { "epoch": 35.40563866513234, "grad_norm": 0.9851242899894714, "learning_rate": 0.0012918872266973534, "loss": 0.5576, "step": 123070 }, { "epoch": 35.40851553509781, "grad_norm": 1.2914674282073975, "learning_rate": 0.0012918296892980438, "loss": 0.6627, "step": 123080 }, { "epoch": 35.41139240506329, "grad_norm": 1.0581759214401245, "learning_rate": 0.001291772151898734, "loss": 0.5643, "step": 123090 }, { "epoch": 35.41426927502877, "grad_norm": 1.847334623336792, "learning_rate": 0.0012917146144994247, "loss": 0.7252, "step": 123100 }, { "epoch": 35.41714614499425, "grad_norm": 1.1608525514602661, "learning_rate": 0.001291657077100115, "loss": 0.5386, "step": 123110 }, { "epoch": 35.420023014959725, "grad_norm": 1.3721705675125122, "learning_rate": 0.0012915995397008056, "loss": 0.5785, "step": 123120 }, { "epoch": 35.4228998849252, "grad_norm": 1.5900654792785645, "learning_rate": 0.0012915420023014961, "loss": 0.5454, "step": 123130 }, { "epoch": 35.42577675489068, "grad_norm": 0.910337507724762, "learning_rate": 0.0012914844649021865, "loss": 0.5445, "step": 123140 }, { "epoch": 35.42865362485615, "grad_norm": 1.5629428625106812, "learning_rate": 0.0012914269275028768, "loss": 0.6941, "step": 123150 }, { "epoch": 35.43153049482164, "grad_norm": 2.314253330230713, "learning_rate": 0.0012913693901035672, "loss": 0.7257, "step": 123160 }, { "epoch": 35.43440736478711, "grad_norm": 1.4838931560516357, "learning_rate": 0.0012913118527042577, "loss": 0.7165, "step": 123170 }, { "epoch": 35.43728423475259, "grad_norm": 1.05739426612854, "learning_rate": 0.0012912543153049483, "loss": 0.5527, "step": 123180 }, { "epoch": 35.440161104718065, "grad_norm": 2.2698423862457275, "learning_rate": 0.0012911967779056387, "loss": 0.7121, "step": 123190 }, { "epoch": 35.44303797468354, "grad_norm": 2.3061981201171875, "learning_rate": 0.0012911392405063292, "loss": 0.6112, "step": 123200 }, { "epoch": 35.445914844649025, "grad_norm": 1.703336238861084, "learning_rate": 0.0012910817031070196, "loss": 0.5902, "step": 123210 }, { "epoch": 35.4487917146145, "grad_norm": 0.9658749103546143, "learning_rate": 0.00129102416570771, "loss": 0.7193, "step": 123220 }, { "epoch": 35.45166858457998, "grad_norm": 2.044182777404785, "learning_rate": 0.0012909666283084005, "loss": 0.6552, "step": 123230 }, { "epoch": 35.45454545454545, "grad_norm": 1.087488055229187, "learning_rate": 0.001290909090909091, "loss": 0.6489, "step": 123240 }, { "epoch": 35.45742232451093, "grad_norm": 1.1705533266067505, "learning_rate": 0.0012908515535097814, "loss": 0.6284, "step": 123250 }, { "epoch": 35.46029919447641, "grad_norm": 1.7874019145965576, "learning_rate": 0.001290794016110472, "loss": 0.6891, "step": 123260 }, { "epoch": 35.46317606444189, "grad_norm": 1.4549732208251953, "learning_rate": 0.001290736478711162, "loss": 0.6956, "step": 123270 }, { "epoch": 35.466052934407365, "grad_norm": 1.735845923423767, "learning_rate": 0.0012906789413118526, "loss": 0.5381, "step": 123280 }, { "epoch": 35.46892980437284, "grad_norm": 1.6387858390808105, "learning_rate": 0.0012906214039125432, "loss": 0.6514, "step": 123290 }, { "epoch": 35.47180667433832, "grad_norm": 1.320443868637085, "learning_rate": 0.0012905638665132336, "loss": 0.5224, "step": 123300 }, { "epoch": 35.4746835443038, "grad_norm": 0.7709893584251404, "learning_rate": 0.0012905063291139241, "loss": 0.612, "step": 123310 }, { "epoch": 35.47756041426928, "grad_norm": 0.8712248206138611, "learning_rate": 0.0012904487917146147, "loss": 0.6138, "step": 123320 }, { "epoch": 35.48043728423475, "grad_norm": 1.3029719591140747, "learning_rate": 0.0012903912543153048, "loss": 0.5777, "step": 123330 }, { "epoch": 35.48331415420023, "grad_norm": 1.359474778175354, "learning_rate": 0.0012903337169159954, "loss": 0.6796, "step": 123340 }, { "epoch": 35.486191024165706, "grad_norm": 1.107001543045044, "learning_rate": 0.001290276179516686, "loss": 0.6012, "step": 123350 }, { "epoch": 35.48906789413118, "grad_norm": 2.0563597679138184, "learning_rate": 0.0012902186421173763, "loss": 0.7773, "step": 123360 }, { "epoch": 35.491944764096665, "grad_norm": 1.1818954944610596, "learning_rate": 0.0012901611047180669, "loss": 0.5304, "step": 123370 }, { "epoch": 35.49482163406214, "grad_norm": 2.939296007156372, "learning_rate": 0.0012901035673187572, "loss": 0.6081, "step": 123380 }, { "epoch": 35.49769850402762, "grad_norm": 0.8795904517173767, "learning_rate": 0.0012900460299194475, "loss": 0.6234, "step": 123390 }, { "epoch": 35.500575373993094, "grad_norm": 1.129762887954712, "learning_rate": 0.0012899884925201381, "loss": 0.8715, "step": 123400 }, { "epoch": 35.50345224395857, "grad_norm": 1.6291910409927368, "learning_rate": 0.0012899309551208285, "loss": 0.6559, "step": 123410 }, { "epoch": 35.50632911392405, "grad_norm": 1.4516305923461914, "learning_rate": 0.001289873417721519, "loss": 0.6564, "step": 123420 }, { "epoch": 35.50920598388953, "grad_norm": 0.830474853515625, "learning_rate": 0.0012898158803222096, "loss": 0.5532, "step": 123430 }, { "epoch": 35.512082853855006, "grad_norm": 1.2737782001495361, "learning_rate": 0.0012897583429229, "loss": 0.6668, "step": 123440 }, { "epoch": 35.51495972382048, "grad_norm": 1.4677177667617798, "learning_rate": 0.0012897008055235903, "loss": 0.4946, "step": 123450 }, { "epoch": 35.51783659378596, "grad_norm": 0.7628716230392456, "learning_rate": 0.0012896432681242808, "loss": 0.6098, "step": 123460 }, { "epoch": 35.52071346375144, "grad_norm": 1.084187388420105, "learning_rate": 0.0012895857307249712, "loss": 0.6454, "step": 123470 }, { "epoch": 35.52359033371692, "grad_norm": 1.1783604621887207, "learning_rate": 0.0012895281933256618, "loss": 0.5528, "step": 123480 }, { "epoch": 35.526467203682394, "grad_norm": 0.7353448271751404, "learning_rate": 0.0012894706559263523, "loss": 0.7158, "step": 123490 }, { "epoch": 35.52934407364787, "grad_norm": 1.0227426290512085, "learning_rate": 0.0012894131185270427, "loss": 0.6221, "step": 123500 }, { "epoch": 35.532220943613346, "grad_norm": 0.8487083911895752, "learning_rate": 0.001289355581127733, "loss": 0.5165, "step": 123510 }, { "epoch": 35.53509781357883, "grad_norm": 0.6559293270111084, "learning_rate": 0.0012892980437284234, "loss": 0.5505, "step": 123520 }, { "epoch": 35.537974683544306, "grad_norm": 1.7563698291778564, "learning_rate": 0.001289240506329114, "loss": 0.5223, "step": 123530 }, { "epoch": 35.54085155350978, "grad_norm": 0.9744412302970886, "learning_rate": 0.0012891829689298045, "loss": 0.5013, "step": 123540 }, { "epoch": 35.54372842347526, "grad_norm": 1.5707088708877563, "learning_rate": 0.0012891254315304948, "loss": 0.6007, "step": 123550 }, { "epoch": 35.546605293440734, "grad_norm": 1.0205974578857422, "learning_rate": 0.0012890678941311854, "loss": 0.5911, "step": 123560 }, { "epoch": 35.54948216340621, "grad_norm": 1.4088166952133179, "learning_rate": 0.0012890103567318757, "loss": 0.6591, "step": 123570 }, { "epoch": 35.552359033371694, "grad_norm": 0.9963101148605347, "learning_rate": 0.001288952819332566, "loss": 0.8409, "step": 123580 }, { "epoch": 35.55523590333717, "grad_norm": 2.458263635635376, "learning_rate": 0.0012888952819332567, "loss": 0.6274, "step": 123590 }, { "epoch": 35.558112773302646, "grad_norm": 1.058610200881958, "learning_rate": 0.0012888377445339472, "loss": 0.715, "step": 123600 }, { "epoch": 35.56098964326812, "grad_norm": 0.8356097936630249, "learning_rate": 0.0012887802071346376, "loss": 0.8223, "step": 123610 }, { "epoch": 35.5638665132336, "grad_norm": 0.8422795534133911, "learning_rate": 0.0012887226697353281, "loss": 0.5717, "step": 123620 }, { "epoch": 35.56674338319908, "grad_norm": 1.7494701147079468, "learning_rate": 0.0012886651323360183, "loss": 0.7371, "step": 123630 }, { "epoch": 35.56962025316456, "grad_norm": 1.6662777662277222, "learning_rate": 0.0012886075949367088, "loss": 0.7844, "step": 123640 }, { "epoch": 35.572497123130034, "grad_norm": 0.6761674880981445, "learning_rate": 0.0012885500575373994, "loss": 0.6454, "step": 123650 }, { "epoch": 35.57537399309551, "grad_norm": 1.1722912788391113, "learning_rate": 0.0012884925201380897, "loss": 0.7377, "step": 123660 }, { "epoch": 35.57825086306099, "grad_norm": 0.807662308216095, "learning_rate": 0.0012884349827387803, "loss": 0.6505, "step": 123670 }, { "epoch": 35.58112773302647, "grad_norm": 1.5406502485275269, "learning_rate": 0.0012883774453394709, "loss": 0.5974, "step": 123680 }, { "epoch": 35.584004602991946, "grad_norm": 1.5094873905181885, "learning_rate": 0.001288319907940161, "loss": 0.6551, "step": 123690 }, { "epoch": 35.58688147295742, "grad_norm": 1.4753810167312622, "learning_rate": 0.0012882623705408516, "loss": 0.6896, "step": 123700 }, { "epoch": 35.5897583429229, "grad_norm": 1.0770535469055176, "learning_rate": 0.0012882048331415421, "loss": 0.633, "step": 123710 }, { "epoch": 35.592635212888375, "grad_norm": 1.4485729932785034, "learning_rate": 0.0012881472957422325, "loss": 0.613, "step": 123720 }, { "epoch": 35.59551208285386, "grad_norm": 1.2320690155029297, "learning_rate": 0.001288089758342923, "loss": 0.5078, "step": 123730 }, { "epoch": 35.598388952819334, "grad_norm": 1.4054559469223022, "learning_rate": 0.0012880322209436132, "loss": 0.7572, "step": 123740 }, { "epoch": 35.60126582278481, "grad_norm": 1.6448475122451782, "learning_rate": 0.0012879746835443037, "loss": 0.8634, "step": 123750 }, { "epoch": 35.60414269275029, "grad_norm": 1.3124003410339355, "learning_rate": 0.0012879171461449943, "loss": 0.5369, "step": 123760 }, { "epoch": 35.60701956271576, "grad_norm": 1.5279711484909058, "learning_rate": 0.0012878596087456846, "loss": 0.8482, "step": 123770 }, { "epoch": 35.60989643268124, "grad_norm": 1.4315645694732666, "learning_rate": 0.0012878020713463752, "loss": 0.5792, "step": 123780 }, { "epoch": 35.61277330264672, "grad_norm": 1.7375134229660034, "learning_rate": 0.0012877445339470658, "loss": 0.5942, "step": 123790 }, { "epoch": 35.6156501726122, "grad_norm": 1.294356107711792, "learning_rate": 0.001287686996547756, "loss": 0.6205, "step": 123800 }, { "epoch": 35.618527042577675, "grad_norm": 1.2713398933410645, "learning_rate": 0.0012876294591484465, "loss": 0.6439, "step": 123810 }, { "epoch": 35.62140391254315, "grad_norm": 3.346931219100952, "learning_rate": 0.001287571921749137, "loss": 0.673, "step": 123820 }, { "epoch": 35.62428078250863, "grad_norm": 1.6826772689819336, "learning_rate": 0.0012875143843498274, "loss": 0.5773, "step": 123830 }, { "epoch": 35.62715765247411, "grad_norm": 1.1715949773788452, "learning_rate": 0.001287456846950518, "loss": 0.8053, "step": 123840 }, { "epoch": 35.63003452243959, "grad_norm": 0.8749632239341736, "learning_rate": 0.0012873993095512083, "loss": 0.602, "step": 123850 }, { "epoch": 35.63291139240506, "grad_norm": 1.1582719087600708, "learning_rate": 0.0012873417721518986, "loss": 0.5407, "step": 123860 }, { "epoch": 35.63578826237054, "grad_norm": 2.5926754474639893, "learning_rate": 0.0012872842347525892, "loss": 0.7192, "step": 123870 }, { "epoch": 35.638665132336016, "grad_norm": 1.364395022392273, "learning_rate": 0.0012872266973532795, "loss": 0.6239, "step": 123880 }, { "epoch": 35.6415420023015, "grad_norm": 1.0651613473892212, "learning_rate": 0.00128716915995397, "loss": 0.5325, "step": 123890 }, { "epoch": 35.644418872266975, "grad_norm": 0.8421018123626709, "learning_rate": 0.0012871116225546607, "loss": 0.5008, "step": 123900 }, { "epoch": 35.64729574223245, "grad_norm": 0.9652060270309448, "learning_rate": 0.001287054085155351, "loss": 0.6497, "step": 123910 }, { "epoch": 35.65017261219793, "grad_norm": 1.2366807460784912, "learning_rate": 0.0012869965477560414, "loss": 0.6847, "step": 123920 }, { "epoch": 35.653049482163404, "grad_norm": 1.179631233215332, "learning_rate": 0.001286939010356732, "loss": 0.4958, "step": 123930 }, { "epoch": 35.65592635212889, "grad_norm": 2.1420037746429443, "learning_rate": 0.0012868814729574223, "loss": 0.7158, "step": 123940 }, { "epoch": 35.65880322209436, "grad_norm": 0.9532420635223389, "learning_rate": 0.0012868239355581128, "loss": 0.5239, "step": 123950 }, { "epoch": 35.66168009205984, "grad_norm": 0.9594547152519226, "learning_rate": 0.0012867663981588032, "loss": 0.6299, "step": 123960 }, { "epoch": 35.664556962025316, "grad_norm": 0.8025074005126953, "learning_rate": 0.0012867088607594938, "loss": 0.5361, "step": 123970 }, { "epoch": 35.66743383199079, "grad_norm": 0.8099360466003418, "learning_rate": 0.001286651323360184, "loss": 0.5346, "step": 123980 }, { "epoch": 35.670310701956275, "grad_norm": 1.1640440225601196, "learning_rate": 0.0012865937859608744, "loss": 0.6047, "step": 123990 }, { "epoch": 35.67318757192175, "grad_norm": 1.5157239437103271, "learning_rate": 0.001286536248561565, "loss": 0.6143, "step": 124000 }, { "epoch": 35.67606444188723, "grad_norm": 1.2759312391281128, "learning_rate": 0.0012864787111622556, "loss": 0.6355, "step": 124010 }, { "epoch": 35.678941311852704, "grad_norm": 1.401016116142273, "learning_rate": 0.001286421173762946, "loss": 0.5295, "step": 124020 }, { "epoch": 35.68181818181818, "grad_norm": 1.5162822008132935, "learning_rate": 0.0012863636363636365, "loss": 0.4983, "step": 124030 }, { "epoch": 35.684695051783656, "grad_norm": 1.1821677684783936, "learning_rate": 0.0012863060989643268, "loss": 0.6061, "step": 124040 }, { "epoch": 35.68757192174914, "grad_norm": 0.7448542714118958, "learning_rate": 0.0012862485615650172, "loss": 0.5766, "step": 124050 }, { "epoch": 35.690448791714616, "grad_norm": 1.3205561637878418, "learning_rate": 0.0012861910241657077, "loss": 0.6956, "step": 124060 }, { "epoch": 35.69332566168009, "grad_norm": 1.2752009630203247, "learning_rate": 0.001286133486766398, "loss": 0.5139, "step": 124070 }, { "epoch": 35.69620253164557, "grad_norm": 0.8897020816802979, "learning_rate": 0.0012860759493670887, "loss": 0.539, "step": 124080 }, { "epoch": 35.699079401611044, "grad_norm": 0.9756476283073425, "learning_rate": 0.0012860184119677792, "loss": 0.6459, "step": 124090 }, { "epoch": 35.70195627157653, "grad_norm": 1.9096250534057617, "learning_rate": 0.0012859608745684693, "loss": 0.6003, "step": 124100 }, { "epoch": 35.704833141542004, "grad_norm": 0.7296656966209412, "learning_rate": 0.00128590333716916, "loss": 0.5624, "step": 124110 }, { "epoch": 35.70771001150748, "grad_norm": 1.108971118927002, "learning_rate": 0.0012858457997698505, "loss": 0.5647, "step": 124120 }, { "epoch": 35.710586881472956, "grad_norm": 1.3100968599319458, "learning_rate": 0.0012857882623705408, "loss": 0.7578, "step": 124130 }, { "epoch": 35.71346375143843, "grad_norm": 3.3522489070892334, "learning_rate": 0.0012857307249712314, "loss": 0.7425, "step": 124140 }, { "epoch": 35.716340621403916, "grad_norm": 1.5345911979675293, "learning_rate": 0.001285673187571922, "loss": 0.6664, "step": 124150 }, { "epoch": 35.71921749136939, "grad_norm": 1.036337971687317, "learning_rate": 0.001285615650172612, "loss": 0.7349, "step": 124160 }, { "epoch": 35.72209436133487, "grad_norm": 1.820525884628296, "learning_rate": 0.0012855581127733026, "loss": 0.678, "step": 124170 }, { "epoch": 35.724971231300344, "grad_norm": 1.761610507965088, "learning_rate": 0.0012855005753739932, "loss": 0.8309, "step": 124180 }, { "epoch": 35.72784810126582, "grad_norm": 1.758474588394165, "learning_rate": 0.0012854430379746836, "loss": 0.6646, "step": 124190 }, { "epoch": 35.730724971231304, "grad_norm": 1.9247244596481323, "learning_rate": 0.0012853855005753741, "loss": 0.6308, "step": 124200 }, { "epoch": 35.73360184119678, "grad_norm": 2.3243143558502197, "learning_rate": 0.0012853279631760645, "loss": 0.6274, "step": 124210 }, { "epoch": 35.736478711162256, "grad_norm": 1.5355489253997803, "learning_rate": 0.0012852704257767548, "loss": 0.6356, "step": 124220 }, { "epoch": 35.73935558112773, "grad_norm": 0.8322862982749939, "learning_rate": 0.0012852128883774454, "loss": 0.4486, "step": 124230 }, { "epoch": 35.74223245109321, "grad_norm": 1.6754647493362427, "learning_rate": 0.0012851553509781357, "loss": 0.6339, "step": 124240 }, { "epoch": 35.745109321058685, "grad_norm": 1.0830193758010864, "learning_rate": 0.0012850978135788263, "loss": 0.8169, "step": 124250 }, { "epoch": 35.74798619102417, "grad_norm": 2.669123888015747, "learning_rate": 0.0012850402761795169, "loss": 0.6794, "step": 124260 }, { "epoch": 35.750863060989644, "grad_norm": 2.011288642883301, "learning_rate": 0.0012849827387802072, "loss": 0.7108, "step": 124270 }, { "epoch": 35.75373993095512, "grad_norm": 1.415548324584961, "learning_rate": 0.0012849252013808975, "loss": 0.6603, "step": 124280 }, { "epoch": 35.7566168009206, "grad_norm": 0.9853147864341736, "learning_rate": 0.0012848676639815881, "loss": 0.6247, "step": 124290 }, { "epoch": 35.75949367088607, "grad_norm": 1.944821834564209, "learning_rate": 0.0012848101265822785, "loss": 0.617, "step": 124300 }, { "epoch": 35.762370540851556, "grad_norm": 1.5347989797592163, "learning_rate": 0.001284752589182969, "loss": 0.6795, "step": 124310 }, { "epoch": 35.76524741081703, "grad_norm": 2.7568485736846924, "learning_rate": 0.0012846950517836594, "loss": 0.6361, "step": 124320 }, { "epoch": 35.76812428078251, "grad_norm": 0.9566839933395386, "learning_rate": 0.00128463751438435, "loss": 0.6536, "step": 124330 }, { "epoch": 35.771001150747985, "grad_norm": 1.3133901357650757, "learning_rate": 0.0012845799769850403, "loss": 0.7101, "step": 124340 }, { "epoch": 35.77387802071346, "grad_norm": 1.2379515171051025, "learning_rate": 0.0012845224395857306, "loss": 0.7284, "step": 124350 }, { "epoch": 35.776754890678944, "grad_norm": 1.1684176921844482, "learning_rate": 0.0012844649021864212, "loss": 0.5747, "step": 124360 }, { "epoch": 35.77963176064442, "grad_norm": 1.3586912155151367, "learning_rate": 0.0012844073647871118, "loss": 0.7028, "step": 124370 }, { "epoch": 35.7825086306099, "grad_norm": 1.8347408771514893, "learning_rate": 0.001284349827387802, "loss": 0.5565, "step": 124380 }, { "epoch": 35.78538550057537, "grad_norm": 1.6354329586029053, "learning_rate": 0.0012842922899884927, "loss": 0.6497, "step": 124390 }, { "epoch": 35.78826237054085, "grad_norm": 1.6100828647613525, "learning_rate": 0.001284234752589183, "loss": 0.5388, "step": 124400 }, { "epoch": 35.79113924050633, "grad_norm": 1.1990066766738892, "learning_rate": 0.0012841772151898734, "loss": 0.5898, "step": 124410 }, { "epoch": 35.79401611047181, "grad_norm": 1.8730906248092651, "learning_rate": 0.001284119677790564, "loss": 0.86, "step": 124420 }, { "epoch": 35.796892980437285, "grad_norm": 1.0700129270553589, "learning_rate": 0.0012840621403912543, "loss": 0.6493, "step": 124430 }, { "epoch": 35.79976985040276, "grad_norm": 0.9266183376312256, "learning_rate": 0.0012840046029919448, "loss": 0.5861, "step": 124440 }, { "epoch": 35.80264672036824, "grad_norm": 1.5207939147949219, "learning_rate": 0.0012839470655926354, "loss": 0.8581, "step": 124450 }, { "epoch": 35.80552359033371, "grad_norm": 1.8710286617279053, "learning_rate": 0.0012838895281933255, "loss": 0.8176, "step": 124460 }, { "epoch": 35.8084004602992, "grad_norm": 1.2688649892807007, "learning_rate": 0.001283831990794016, "loss": 0.674, "step": 124470 }, { "epoch": 35.81127733026467, "grad_norm": 0.938479483127594, "learning_rate": 0.0012837744533947067, "loss": 0.5698, "step": 124480 }, { "epoch": 35.81415420023015, "grad_norm": 1.7335247993469238, "learning_rate": 0.001283716915995397, "loss": 0.6587, "step": 124490 }, { "epoch": 35.817031070195625, "grad_norm": 1.1325161457061768, "learning_rate": 0.0012836593785960876, "loss": 0.8443, "step": 124500 }, { "epoch": 35.8199079401611, "grad_norm": 1.063185453414917, "learning_rate": 0.0012836018411967781, "loss": 0.5549, "step": 124510 }, { "epoch": 35.822784810126585, "grad_norm": 1.7391709089279175, "learning_rate": 0.0012835443037974683, "loss": 0.5667, "step": 124520 }, { "epoch": 35.82566168009206, "grad_norm": 1.052909016609192, "learning_rate": 0.0012834867663981588, "loss": 0.776, "step": 124530 }, { "epoch": 35.82853855005754, "grad_norm": 0.9837585687637329, "learning_rate": 0.0012834292289988492, "loss": 0.6216, "step": 124540 }, { "epoch": 35.83141542002301, "grad_norm": 0.9192885756492615, "learning_rate": 0.0012833716915995397, "loss": 0.6953, "step": 124550 }, { "epoch": 35.83429228998849, "grad_norm": 0.7310075163841248, "learning_rate": 0.0012833141542002303, "loss": 0.6646, "step": 124560 }, { "epoch": 35.83716915995397, "grad_norm": 2.3780174255371094, "learning_rate": 0.0012832566168009204, "loss": 0.6058, "step": 124570 }, { "epoch": 35.84004602991945, "grad_norm": 1.6889231204986572, "learning_rate": 0.001283199079401611, "loss": 0.6913, "step": 124580 }, { "epoch": 35.842922899884925, "grad_norm": 1.8016072511672974, "learning_rate": 0.0012831415420023016, "loss": 0.5342, "step": 124590 }, { "epoch": 35.8457997698504, "grad_norm": 1.0800338983535767, "learning_rate": 0.001283084004602992, "loss": 0.5716, "step": 124600 }, { "epoch": 35.84867663981588, "grad_norm": 1.0516321659088135, "learning_rate": 0.0012830264672036825, "loss": 0.6124, "step": 124610 }, { "epoch": 35.85155350978136, "grad_norm": 1.200092077255249, "learning_rate": 0.001282968929804373, "loss": 0.7118, "step": 124620 }, { "epoch": 35.85443037974684, "grad_norm": 1.6709550619125366, "learning_rate": 0.0012829113924050632, "loss": 0.628, "step": 124630 }, { "epoch": 35.85730724971231, "grad_norm": 1.1875859498977661, "learning_rate": 0.0012828538550057537, "loss": 0.6973, "step": 124640 }, { "epoch": 35.86018411967779, "grad_norm": 0.7295214533805847, "learning_rate": 0.001282796317606444, "loss": 0.4989, "step": 124650 }, { "epoch": 35.863060989643266, "grad_norm": 1.783467411994934, "learning_rate": 0.0012827387802071346, "loss": 0.5359, "step": 124660 }, { "epoch": 35.86593785960875, "grad_norm": 1.7632288932800293, "learning_rate": 0.0012826812428078252, "loss": 0.5143, "step": 124670 }, { "epoch": 35.868814729574225, "grad_norm": 1.034305214881897, "learning_rate": 0.0012826237054085156, "loss": 0.7398, "step": 124680 }, { "epoch": 35.8716915995397, "grad_norm": 0.942518413066864, "learning_rate": 0.001282566168009206, "loss": 0.4881, "step": 124690 }, { "epoch": 35.87456846950518, "grad_norm": 1.8483917713165283, "learning_rate": 0.0012825086306098965, "loss": 0.6966, "step": 124700 }, { "epoch": 35.877445339470654, "grad_norm": 1.618803858757019, "learning_rate": 0.0012824510932105868, "loss": 0.5941, "step": 124710 }, { "epoch": 35.88032220943613, "grad_norm": 1.1516211032867432, "learning_rate": 0.0012823935558112774, "loss": 0.8476, "step": 124720 }, { "epoch": 35.883199079401614, "grad_norm": 1.3217881917953491, "learning_rate": 0.001282336018411968, "loss": 0.6775, "step": 124730 }, { "epoch": 35.88607594936709, "grad_norm": 1.1465873718261719, "learning_rate": 0.0012822784810126583, "loss": 0.6223, "step": 124740 }, { "epoch": 35.888952819332566, "grad_norm": 1.2133047580718994, "learning_rate": 0.0012822209436133486, "loss": 0.6775, "step": 124750 }, { "epoch": 35.89182968929804, "grad_norm": 1.222594141960144, "learning_rate": 0.0012821634062140392, "loss": 0.485, "step": 124760 }, { "epoch": 35.89470655926352, "grad_norm": 2.0505967140197754, "learning_rate": 0.0012821058688147295, "loss": 0.6641, "step": 124770 }, { "epoch": 35.897583429229, "grad_norm": 1.4745187759399414, "learning_rate": 0.00128204833141542, "loss": 0.695, "step": 124780 }, { "epoch": 35.90046029919448, "grad_norm": 2.6381893157958984, "learning_rate": 0.0012819907940161105, "loss": 0.6776, "step": 124790 }, { "epoch": 35.903337169159954, "grad_norm": 1.2322280406951904, "learning_rate": 0.001281933256616801, "loss": 0.632, "step": 124800 }, { "epoch": 35.90621403912543, "grad_norm": 1.5049270391464233, "learning_rate": 0.0012818757192174914, "loss": 0.6735, "step": 124810 }, { "epoch": 35.90909090909091, "grad_norm": 2.4358832836151123, "learning_rate": 0.0012818181818181817, "loss": 0.8219, "step": 124820 }, { "epoch": 35.91196777905639, "grad_norm": 1.8318891525268555, "learning_rate": 0.0012817606444188723, "loss": 0.5347, "step": 124830 }, { "epoch": 35.914844649021866, "grad_norm": 1.4071673154830933, "learning_rate": 0.0012817031070195628, "loss": 0.6761, "step": 124840 }, { "epoch": 35.91772151898734, "grad_norm": 0.9968730211257935, "learning_rate": 0.0012816455696202532, "loss": 0.5323, "step": 124850 }, { "epoch": 35.92059838895282, "grad_norm": 2.299586534500122, "learning_rate": 0.0012815880322209437, "loss": 0.7397, "step": 124860 }, { "epoch": 35.923475258918295, "grad_norm": 1.587943434715271, "learning_rate": 0.001281530494821634, "loss": 0.6118, "step": 124870 }, { "epoch": 35.92635212888378, "grad_norm": 1.8655641078948975, "learning_rate": 0.0012814729574223244, "loss": 0.6788, "step": 124880 }, { "epoch": 35.929228998849254, "grad_norm": 1.6708786487579346, "learning_rate": 0.001281415420023015, "loss": 0.6933, "step": 124890 }, { "epoch": 35.93210586881473, "grad_norm": 1.4949685335159302, "learning_rate": 0.0012813578826237054, "loss": 0.6308, "step": 124900 }, { "epoch": 35.93498273878021, "grad_norm": 1.2385412454605103, "learning_rate": 0.001281300345224396, "loss": 0.5894, "step": 124910 }, { "epoch": 35.93785960874568, "grad_norm": 1.1770035028457642, "learning_rate": 0.0012812428078250865, "loss": 0.6558, "step": 124920 }, { "epoch": 35.94073647871116, "grad_norm": 1.4187285900115967, "learning_rate": 0.0012811852704257766, "loss": 0.5907, "step": 124930 }, { "epoch": 35.94361334867664, "grad_norm": 1.0803720951080322, "learning_rate": 0.0012811277330264672, "loss": 0.6165, "step": 124940 }, { "epoch": 35.94649021864212, "grad_norm": 1.9012876749038696, "learning_rate": 0.0012810701956271577, "loss": 0.775, "step": 124950 }, { "epoch": 35.949367088607595, "grad_norm": 0.8318964838981628, "learning_rate": 0.001281012658227848, "loss": 0.6121, "step": 124960 }, { "epoch": 35.95224395857307, "grad_norm": 1.810349941253662, "learning_rate": 0.0012809551208285387, "loss": 0.5684, "step": 124970 }, { "epoch": 35.95512082853855, "grad_norm": 1.0170782804489136, "learning_rate": 0.0012808975834292292, "loss": 0.5472, "step": 124980 }, { "epoch": 35.95799769850403, "grad_norm": 1.9099602699279785, "learning_rate": 0.0012808400460299193, "loss": 0.4605, "step": 124990 }, { "epoch": 35.96087456846951, "grad_norm": 0.8990292549133301, "learning_rate": 0.00128078250863061, "loss": 0.867, "step": 125000 }, { "epoch": 35.96375143843498, "grad_norm": 2.9105112552642822, "learning_rate": 0.0012807249712313003, "loss": 0.7128, "step": 125010 }, { "epoch": 35.96662830840046, "grad_norm": 2.1397836208343506, "learning_rate": 0.0012806674338319908, "loss": 0.7781, "step": 125020 }, { "epoch": 35.969505178365935, "grad_norm": 0.930709958076477, "learning_rate": 0.0012806098964326814, "loss": 0.5256, "step": 125030 }, { "epoch": 35.97238204833142, "grad_norm": 1.4340883493423462, "learning_rate": 0.0012805523590333717, "loss": 0.7443, "step": 125040 }, { "epoch": 35.975258918296895, "grad_norm": 0.849265456199646, "learning_rate": 0.001280494821634062, "loss": 0.5723, "step": 125050 }, { "epoch": 35.97813578826237, "grad_norm": 1.6201674938201904, "learning_rate": 0.0012804372842347526, "loss": 0.6832, "step": 125060 }, { "epoch": 35.98101265822785, "grad_norm": 1.3277795314788818, "learning_rate": 0.001280379746835443, "loss": 0.7748, "step": 125070 }, { "epoch": 35.98388952819332, "grad_norm": 2.5256166458129883, "learning_rate": 0.0012803222094361336, "loss": 0.6049, "step": 125080 }, { "epoch": 35.98676639815881, "grad_norm": 2.5611014366149902, "learning_rate": 0.0012802646720368241, "loss": 0.5205, "step": 125090 }, { "epoch": 35.98964326812428, "grad_norm": 0.8890759348869324, "learning_rate": 0.0012802071346375145, "loss": 0.625, "step": 125100 }, { "epoch": 35.99252013808976, "grad_norm": 1.0638201236724854, "learning_rate": 0.0012801495972382048, "loss": 0.698, "step": 125110 }, { "epoch": 35.995397008055235, "grad_norm": 1.8096027374267578, "learning_rate": 0.0012800920598388952, "loss": 0.6547, "step": 125120 }, { "epoch": 35.99827387802071, "grad_norm": 0.6349412798881531, "learning_rate": 0.0012800345224395857, "loss": 0.5399, "step": 125130 }, { "epoch": 36.00115074798619, "grad_norm": 0.9811376333236694, "learning_rate": 0.0012799769850402763, "loss": 0.687, "step": 125140 }, { "epoch": 36.00402761795167, "grad_norm": 1.9601397514343262, "learning_rate": 0.0012799194476409666, "loss": 0.5833, "step": 125150 }, { "epoch": 36.00690448791715, "grad_norm": 1.01530122756958, "learning_rate": 0.0012798619102416572, "loss": 0.6602, "step": 125160 }, { "epoch": 36.00978135788262, "grad_norm": 1.2366962432861328, "learning_rate": 0.0012798043728423475, "loss": 0.6603, "step": 125170 }, { "epoch": 36.0126582278481, "grad_norm": 0.9117022156715393, "learning_rate": 0.001279746835443038, "loss": 0.5199, "step": 125180 }, { "epoch": 36.015535097813576, "grad_norm": 1.4481284618377686, "learning_rate": 0.0012796892980437285, "loss": 0.4599, "step": 125190 }, { "epoch": 36.01841196777906, "grad_norm": 0.9646732211112976, "learning_rate": 0.001279631760644419, "loss": 0.6616, "step": 125200 }, { "epoch": 36.021288837744535, "grad_norm": 1.045168161392212, "learning_rate": 0.0012795742232451094, "loss": 0.5823, "step": 125210 }, { "epoch": 36.02416570771001, "grad_norm": 1.106612205505371, "learning_rate": 0.0012795166858458, "loss": 0.5068, "step": 125220 }, { "epoch": 36.02704257767549, "grad_norm": 1.8099522590637207, "learning_rate": 0.00127945914844649, "loss": 0.6559, "step": 125230 }, { "epoch": 36.029919447640964, "grad_norm": 1.4153262376785278, "learning_rate": 0.0012794016110471806, "loss": 0.4491, "step": 125240 }, { "epoch": 36.03279631760645, "grad_norm": 1.083006739616394, "learning_rate": 0.0012793440736478712, "loss": 0.428, "step": 125250 }, { "epoch": 36.03567318757192, "grad_norm": 1.3604480028152466, "learning_rate": 0.0012792865362485615, "loss": 0.5292, "step": 125260 }, { "epoch": 36.0385500575374, "grad_norm": 1.0308066606521606, "learning_rate": 0.001279228998849252, "loss": 0.4683, "step": 125270 }, { "epoch": 36.041426927502876, "grad_norm": 0.833781898021698, "learning_rate": 0.0012791714614499427, "loss": 0.758, "step": 125280 }, { "epoch": 36.04430379746835, "grad_norm": 1.9929447174072266, "learning_rate": 0.0012791139240506328, "loss": 0.6454, "step": 125290 }, { "epoch": 36.047180667433835, "grad_norm": 1.4372752904891968, "learning_rate": 0.0012790563866513234, "loss": 0.6239, "step": 125300 }, { "epoch": 36.05005753739931, "grad_norm": 0.8725244998931885, "learning_rate": 0.001278998849252014, "loss": 0.6616, "step": 125310 }, { "epoch": 36.05293440736479, "grad_norm": 0.8862009048461914, "learning_rate": 0.0012789413118527043, "loss": 0.4501, "step": 125320 }, { "epoch": 36.055811277330264, "grad_norm": 1.3504973649978638, "learning_rate": 0.0012788837744533948, "loss": 0.7127, "step": 125330 }, { "epoch": 36.05868814729574, "grad_norm": 1.7204231023788452, "learning_rate": 0.0012788262370540852, "loss": 0.6183, "step": 125340 }, { "epoch": 36.061565017261216, "grad_norm": 1.4574151039123535, "learning_rate": 0.0012787686996547755, "loss": 0.5667, "step": 125350 }, { "epoch": 36.0644418872267, "grad_norm": 1.45218825340271, "learning_rate": 0.001278711162255466, "loss": 0.6065, "step": 125360 }, { "epoch": 36.067318757192176, "grad_norm": 2.1712288856506348, "learning_rate": 0.0012786536248561564, "loss": 0.5243, "step": 125370 }, { "epoch": 36.07019562715765, "grad_norm": 0.9201226830482483, "learning_rate": 0.001278596087456847, "loss": 0.659, "step": 125380 }, { "epoch": 36.07307249712313, "grad_norm": 0.7710680365562439, "learning_rate": 0.0012785385500575376, "loss": 0.5495, "step": 125390 }, { "epoch": 36.075949367088604, "grad_norm": 1.6151313781738281, "learning_rate": 0.0012784810126582277, "loss": 0.6375, "step": 125400 }, { "epoch": 36.07882623705409, "grad_norm": 1.7367440462112427, "learning_rate": 0.0012784234752589183, "loss": 0.6989, "step": 125410 }, { "epoch": 36.081703107019564, "grad_norm": 1.1299808025360107, "learning_rate": 0.0012783659378596088, "loss": 0.6443, "step": 125420 }, { "epoch": 36.08457997698504, "grad_norm": 1.6511026620864868, "learning_rate": 0.0012783084004602992, "loss": 0.6673, "step": 125430 }, { "epoch": 36.087456846950516, "grad_norm": 1.0179885625839233, "learning_rate": 0.0012782508630609897, "loss": 0.4771, "step": 125440 }, { "epoch": 36.09033371691599, "grad_norm": 1.3161327838897705, "learning_rate": 0.0012781933256616803, "loss": 0.5352, "step": 125450 }, { "epoch": 36.093210586881476, "grad_norm": 1.0246878862380981, "learning_rate": 0.0012781357882623704, "loss": 0.6586, "step": 125460 }, { "epoch": 36.09608745684695, "grad_norm": 0.9641363024711609, "learning_rate": 0.001278078250863061, "loss": 0.4527, "step": 125470 }, { "epoch": 36.09896432681243, "grad_norm": 0.8137925267219543, "learning_rate": 0.0012780207134637513, "loss": 0.6055, "step": 125480 }, { "epoch": 36.101841196777904, "grad_norm": 0.9386588931083679, "learning_rate": 0.001277963176064442, "loss": 0.6865, "step": 125490 }, { "epoch": 36.10471806674338, "grad_norm": 1.3210548162460327, "learning_rate": 0.0012779056386651325, "loss": 0.7017, "step": 125500 }, { "epoch": 36.107594936708864, "grad_norm": 0.7799622416496277, "learning_rate": 0.0012778481012658228, "loss": 0.5385, "step": 125510 }, { "epoch": 36.11047180667434, "grad_norm": 1.5695915222167969, "learning_rate": 0.0012777905638665132, "loss": 0.6442, "step": 125520 }, { "epoch": 36.113348676639816, "grad_norm": 1.1992840766906738, "learning_rate": 0.0012777330264672037, "loss": 0.4985, "step": 125530 }, { "epoch": 36.11622554660529, "grad_norm": 1.2960816621780396, "learning_rate": 0.001277675489067894, "loss": 0.5266, "step": 125540 }, { "epoch": 36.11910241657077, "grad_norm": 1.018091082572937, "learning_rate": 0.0012776179516685846, "loss": 0.5182, "step": 125550 }, { "epoch": 36.121979286536245, "grad_norm": 1.403912901878357, "learning_rate": 0.0012775604142692752, "loss": 0.6317, "step": 125560 }, { "epoch": 36.12485615650173, "grad_norm": 1.1734809875488281, "learning_rate": 0.0012775028768699655, "loss": 0.5751, "step": 125570 }, { "epoch": 36.127733026467205, "grad_norm": 2.1224141120910645, "learning_rate": 0.001277445339470656, "loss": 0.5801, "step": 125580 }, { "epoch": 36.13060989643268, "grad_norm": 0.8641849756240845, "learning_rate": 0.0012773878020713462, "loss": 0.588, "step": 125590 }, { "epoch": 36.13348676639816, "grad_norm": 2.9970664978027344, "learning_rate": 0.0012773302646720368, "loss": 0.623, "step": 125600 }, { "epoch": 36.13636363636363, "grad_norm": 2.1202073097229004, "learning_rate": 0.0012772727272727274, "loss": 0.6727, "step": 125610 }, { "epoch": 36.139240506329116, "grad_norm": 2.2425761222839355, "learning_rate": 0.0012772151898734177, "loss": 0.5677, "step": 125620 }, { "epoch": 36.14211737629459, "grad_norm": 1.7746434211730957, "learning_rate": 0.0012771576524741083, "loss": 0.7452, "step": 125630 }, { "epoch": 36.14499424626007, "grad_norm": 1.0837539434432983, "learning_rate": 0.0012771001150747986, "loss": 0.4698, "step": 125640 }, { "epoch": 36.147871116225545, "grad_norm": 1.6636810302734375, "learning_rate": 0.001277042577675489, "loss": 0.6282, "step": 125650 }, { "epoch": 36.15074798619102, "grad_norm": 1.4025158882141113, "learning_rate": 0.0012769850402761795, "loss": 0.7347, "step": 125660 }, { "epoch": 36.153624856156505, "grad_norm": 0.9421229362487793, "learning_rate": 0.00127692750287687, "loss": 0.5243, "step": 125670 }, { "epoch": 36.15650172612198, "grad_norm": 1.0422391891479492, "learning_rate": 0.0012768699654775605, "loss": 0.5954, "step": 125680 }, { "epoch": 36.15937859608746, "grad_norm": 1.0949519872665405, "learning_rate": 0.001276812428078251, "loss": 0.6544, "step": 125690 }, { "epoch": 36.16225546605293, "grad_norm": 1.1732068061828613, "learning_rate": 0.0012767548906789411, "loss": 0.4739, "step": 125700 }, { "epoch": 36.16513233601841, "grad_norm": 1.390479326248169, "learning_rate": 0.0012766973532796317, "loss": 0.5698, "step": 125710 }, { "epoch": 36.16800920598389, "grad_norm": 0.7839459180831909, "learning_rate": 0.0012766398158803223, "loss": 0.5328, "step": 125720 }, { "epoch": 36.17088607594937, "grad_norm": 0.554320752620697, "learning_rate": 0.0012765822784810126, "loss": 0.5921, "step": 125730 }, { "epoch": 36.173762945914845, "grad_norm": 1.1301158666610718, "learning_rate": 0.0012765247410817032, "loss": 0.561, "step": 125740 }, { "epoch": 36.17663981588032, "grad_norm": 3.3849313259124756, "learning_rate": 0.0012764672036823937, "loss": 0.7693, "step": 125750 }, { "epoch": 36.1795166858458, "grad_norm": 1.1222680807113647, "learning_rate": 0.0012764096662830839, "loss": 0.5609, "step": 125760 }, { "epoch": 36.18239355581128, "grad_norm": 1.3803116083145142, "learning_rate": 0.0012763521288837744, "loss": 0.6168, "step": 125770 }, { "epoch": 36.18527042577676, "grad_norm": 0.9681711196899414, "learning_rate": 0.001276294591484465, "loss": 0.6337, "step": 125780 }, { "epoch": 36.18814729574223, "grad_norm": 1.9465276002883911, "learning_rate": 0.0012762370540851554, "loss": 0.6249, "step": 125790 }, { "epoch": 36.19102416570771, "grad_norm": 1.891054630279541, "learning_rate": 0.001276179516685846, "loss": 0.5203, "step": 125800 }, { "epoch": 36.193901035673186, "grad_norm": 1.181028962135315, "learning_rate": 0.0012761219792865363, "loss": 0.7032, "step": 125810 }, { "epoch": 36.19677790563866, "grad_norm": 1.124604344367981, "learning_rate": 0.0012760644418872266, "loss": 0.6078, "step": 125820 }, { "epoch": 36.199654775604145, "grad_norm": 1.8248223066329956, "learning_rate": 0.0012760069044879172, "loss": 0.6246, "step": 125830 }, { "epoch": 36.20253164556962, "grad_norm": 1.0190536975860596, "learning_rate": 0.0012759493670886075, "loss": 0.5241, "step": 125840 }, { "epoch": 36.2054085155351, "grad_norm": 1.0428959131240845, "learning_rate": 0.001275891829689298, "loss": 0.606, "step": 125850 }, { "epoch": 36.208285385500574, "grad_norm": 1.6937922239303589, "learning_rate": 0.0012758342922899886, "loss": 0.7341, "step": 125860 }, { "epoch": 36.21116225546605, "grad_norm": 2.0755674839019775, "learning_rate": 0.001275776754890679, "loss": 0.6791, "step": 125870 }, { "epoch": 36.21403912543153, "grad_norm": 1.2913025617599487, "learning_rate": 0.0012757192174913693, "loss": 0.5375, "step": 125880 }, { "epoch": 36.21691599539701, "grad_norm": 1.4579832553863525, "learning_rate": 0.00127566168009206, "loss": 0.6055, "step": 125890 }, { "epoch": 36.219792865362486, "grad_norm": 1.632143259048462, "learning_rate": 0.0012756041426927503, "loss": 0.5651, "step": 125900 }, { "epoch": 36.22266973532796, "grad_norm": 1.9454458951950073, "learning_rate": 0.0012755466052934408, "loss": 0.5369, "step": 125910 }, { "epoch": 36.22554660529344, "grad_norm": 1.0485267639160156, "learning_rate": 0.0012754890678941312, "loss": 0.6409, "step": 125920 }, { "epoch": 36.22842347525892, "grad_norm": 1.0590628385543823, "learning_rate": 0.0012754315304948217, "loss": 0.7266, "step": 125930 }, { "epoch": 36.2313003452244, "grad_norm": 1.417396903038025, "learning_rate": 0.001275373993095512, "loss": 0.5844, "step": 125940 }, { "epoch": 36.234177215189874, "grad_norm": 1.1818190813064575, "learning_rate": 0.0012753164556962024, "loss": 0.6449, "step": 125950 }, { "epoch": 36.23705408515535, "grad_norm": 2.203362464904785, "learning_rate": 0.001275258918296893, "loss": 0.5438, "step": 125960 }, { "epoch": 36.239930955120826, "grad_norm": 0.9078600406646729, "learning_rate": 0.0012752013808975836, "loss": 0.653, "step": 125970 }, { "epoch": 36.24280782508631, "grad_norm": 1.1010148525238037, "learning_rate": 0.001275143843498274, "loss": 0.5642, "step": 125980 }, { "epoch": 36.245684695051786, "grad_norm": 1.3464767932891846, "learning_rate": 0.0012750863060989645, "loss": 0.5552, "step": 125990 }, { "epoch": 36.24856156501726, "grad_norm": 1.049705982208252, "learning_rate": 0.0012750287686996548, "loss": 0.5129, "step": 126000 }, { "epoch": 36.25143843498274, "grad_norm": 1.6006780862808228, "learning_rate": 0.0012749712313003452, "loss": 0.5741, "step": 126010 }, { "epoch": 36.254315304948214, "grad_norm": 1.2136231660842896, "learning_rate": 0.0012749136939010357, "loss": 0.5129, "step": 126020 }, { "epoch": 36.25719217491369, "grad_norm": 0.8380433320999146, "learning_rate": 0.001274856156501726, "loss": 0.6497, "step": 126030 }, { "epoch": 36.260069044879174, "grad_norm": 1.1730093955993652, "learning_rate": 0.0012747986191024166, "loss": 0.5749, "step": 126040 }, { "epoch": 36.26294591484465, "grad_norm": 1.0226894617080688, "learning_rate": 0.0012747410817031072, "loss": 0.77, "step": 126050 }, { "epoch": 36.265822784810126, "grad_norm": 1.744449496269226, "learning_rate": 0.0012746835443037973, "loss": 0.5995, "step": 126060 }, { "epoch": 36.2686996547756, "grad_norm": 1.4133108854293823, "learning_rate": 0.0012746260069044879, "loss": 0.6703, "step": 126070 }, { "epoch": 36.27157652474108, "grad_norm": 1.2260280847549438, "learning_rate": 0.0012745684695051785, "loss": 0.5527, "step": 126080 }, { "epoch": 36.27445339470656, "grad_norm": 2.2193970680236816, "learning_rate": 0.0012745109321058688, "loss": 0.6395, "step": 126090 }, { "epoch": 36.27733026467204, "grad_norm": 1.5943964719772339, "learning_rate": 0.0012744533947065594, "loss": 0.5453, "step": 126100 }, { "epoch": 36.280207134637514, "grad_norm": 1.1735811233520508, "learning_rate": 0.00127439585730725, "loss": 0.706, "step": 126110 }, { "epoch": 36.28308400460299, "grad_norm": 1.9952794313430786, "learning_rate": 0.00127433831990794, "loss": 0.8062, "step": 126120 }, { "epoch": 36.28596087456847, "grad_norm": 1.0265097618103027, "learning_rate": 0.0012742807825086306, "loss": 0.6258, "step": 126130 }, { "epoch": 36.28883774453395, "grad_norm": 1.6225210428237915, "learning_rate": 0.0012742232451093212, "loss": 0.6776, "step": 126140 }, { "epoch": 36.291714614499426, "grad_norm": 1.0214929580688477, "learning_rate": 0.0012741657077100115, "loss": 0.5966, "step": 126150 }, { "epoch": 36.2945914844649, "grad_norm": 0.887089192867279, "learning_rate": 0.001274108170310702, "loss": 0.5161, "step": 126160 }, { "epoch": 36.29746835443038, "grad_norm": 0.9141277074813843, "learning_rate": 0.0012740506329113924, "loss": 0.6687, "step": 126170 }, { "epoch": 36.300345224395855, "grad_norm": 2.4808051586151123, "learning_rate": 0.0012739930955120828, "loss": 0.6621, "step": 126180 }, { "epoch": 36.30322209436134, "grad_norm": 1.608841896057129, "learning_rate": 0.0012739355581127734, "loss": 0.6619, "step": 126190 }, { "epoch": 36.306098964326814, "grad_norm": 0.8791751861572266, "learning_rate": 0.0012738780207134637, "loss": 0.571, "step": 126200 }, { "epoch": 36.30897583429229, "grad_norm": 1.1883338689804077, "learning_rate": 0.0012738204833141543, "loss": 0.5359, "step": 126210 }, { "epoch": 36.31185270425777, "grad_norm": 1.045730471611023, "learning_rate": 0.0012737629459148448, "loss": 0.6374, "step": 126220 }, { "epoch": 36.31472957422324, "grad_norm": 0.9412585496902466, "learning_rate": 0.001273705408515535, "loss": 0.5099, "step": 126230 }, { "epoch": 36.31760644418872, "grad_norm": 1.0768502950668335, "learning_rate": 0.0012736478711162255, "loss": 0.668, "step": 126240 }, { "epoch": 36.3204833141542, "grad_norm": 1.1230683326721191, "learning_rate": 0.001273590333716916, "loss": 0.504, "step": 126250 }, { "epoch": 36.32336018411968, "grad_norm": 1.0494372844696045, "learning_rate": 0.0012735327963176064, "loss": 0.4987, "step": 126260 }, { "epoch": 36.326237054085155, "grad_norm": 0.6392415165901184, "learning_rate": 0.001273475258918297, "loss": 0.474, "step": 126270 }, { "epoch": 36.32911392405063, "grad_norm": 1.317927598953247, "learning_rate": 0.0012734177215189873, "loss": 0.528, "step": 126280 }, { "epoch": 36.33199079401611, "grad_norm": 1.0872111320495605, "learning_rate": 0.0012733601841196777, "loss": 0.4581, "step": 126290 }, { "epoch": 36.33486766398159, "grad_norm": 1.2727601528167725, "learning_rate": 0.0012733026467203683, "loss": 0.7206, "step": 126300 }, { "epoch": 36.33774453394707, "grad_norm": 1.0725678205490112, "learning_rate": 0.0012732451093210586, "loss": 0.5135, "step": 126310 }, { "epoch": 36.34062140391254, "grad_norm": 1.071722149848938, "learning_rate": 0.0012731875719217492, "loss": 0.6458, "step": 126320 }, { "epoch": 36.34349827387802, "grad_norm": 1.4295933246612549, "learning_rate": 0.0012731300345224397, "loss": 0.5535, "step": 126330 }, { "epoch": 36.346375143843495, "grad_norm": 0.8820585012435913, "learning_rate": 0.00127307249712313, "loss": 0.7007, "step": 126340 }, { "epoch": 36.34925201380898, "grad_norm": 1.57330322265625, "learning_rate": 0.0012730149597238204, "loss": 0.6274, "step": 126350 }, { "epoch": 36.352128883774455, "grad_norm": 2.7876880168914795, "learning_rate": 0.001272957422324511, "loss": 0.594, "step": 126360 }, { "epoch": 36.35500575373993, "grad_norm": 1.0575494766235352, "learning_rate": 0.0012728998849252013, "loss": 0.5039, "step": 126370 }, { "epoch": 36.35788262370541, "grad_norm": 1.3250043392181396, "learning_rate": 0.001272842347525892, "loss": 0.5886, "step": 126380 }, { "epoch": 36.360759493670884, "grad_norm": 0.7119548916816711, "learning_rate": 0.0012727848101265822, "loss": 0.7318, "step": 126390 }, { "epoch": 36.36363636363637, "grad_norm": 1.1005758047103882, "learning_rate": 0.0012727272727272728, "loss": 0.6032, "step": 126400 }, { "epoch": 36.36651323360184, "grad_norm": 1.3117774724960327, "learning_rate": 0.0012726697353279632, "loss": 0.5173, "step": 126410 }, { "epoch": 36.36939010356732, "grad_norm": 1.5732495784759521, "learning_rate": 0.0012726121979286535, "loss": 0.6484, "step": 126420 }, { "epoch": 36.372266973532795, "grad_norm": 1.3958964347839355, "learning_rate": 0.001272554660529344, "loss": 0.5797, "step": 126430 }, { "epoch": 36.37514384349827, "grad_norm": 0.8793711066246033, "learning_rate": 0.0012724971231300346, "loss": 0.5873, "step": 126440 }, { "epoch": 36.378020713463755, "grad_norm": 0.8417361378669739, "learning_rate": 0.001272439585730725, "loss": 0.6235, "step": 126450 }, { "epoch": 36.38089758342923, "grad_norm": 1.669600248336792, "learning_rate": 0.0012723820483314155, "loss": 0.6716, "step": 126460 }, { "epoch": 36.38377445339471, "grad_norm": 1.1855560541152954, "learning_rate": 0.001272324510932106, "loss": 0.4948, "step": 126470 }, { "epoch": 36.386651323360184, "grad_norm": 1.6066057682037354, "learning_rate": 0.0012722669735327962, "loss": 0.7429, "step": 126480 }, { "epoch": 36.38952819332566, "grad_norm": 0.839127242565155, "learning_rate": 0.0012722094361334868, "loss": 0.6191, "step": 126490 }, { "epoch": 36.392405063291136, "grad_norm": 0.9235110878944397, "learning_rate": 0.0012721518987341772, "loss": 0.6137, "step": 126500 }, { "epoch": 36.39528193325662, "grad_norm": 1.506502389907837, "learning_rate": 0.0012720943613348677, "loss": 0.7573, "step": 126510 }, { "epoch": 36.398158803222096, "grad_norm": 1.8996909856796265, "learning_rate": 0.0012720368239355583, "loss": 0.6466, "step": 126520 }, { "epoch": 36.40103567318757, "grad_norm": 0.9732187986373901, "learning_rate": 0.0012719792865362484, "loss": 0.6434, "step": 126530 }, { "epoch": 36.40391254315305, "grad_norm": 1.010300874710083, "learning_rate": 0.001271921749136939, "loss": 0.6913, "step": 126540 }, { "epoch": 36.406789413118524, "grad_norm": 0.8756526708602905, "learning_rate": 0.0012718642117376295, "loss": 0.5402, "step": 126550 }, { "epoch": 36.40966628308401, "grad_norm": 0.9533424973487854, "learning_rate": 0.0012718066743383199, "loss": 0.484, "step": 126560 }, { "epoch": 36.412543153049484, "grad_norm": 1.9778820276260376, "learning_rate": 0.0012717491369390104, "loss": 0.715, "step": 126570 }, { "epoch": 36.41542002301496, "grad_norm": 1.1374964714050293, "learning_rate": 0.001271691599539701, "loss": 0.5907, "step": 126580 }, { "epoch": 36.418296892980436, "grad_norm": 1.7552380561828613, "learning_rate": 0.0012716340621403911, "loss": 0.7193, "step": 126590 }, { "epoch": 36.42117376294591, "grad_norm": 1.2666294574737549, "learning_rate": 0.0012715765247410817, "loss": 0.6159, "step": 126600 }, { "epoch": 36.424050632911396, "grad_norm": 1.03568696975708, "learning_rate": 0.001271518987341772, "loss": 0.7848, "step": 126610 }, { "epoch": 36.42692750287687, "grad_norm": 1.900466799736023, "learning_rate": 0.0012714614499424626, "loss": 0.6726, "step": 126620 }, { "epoch": 36.42980437284235, "grad_norm": 1.4830129146575928, "learning_rate": 0.0012714039125431532, "loss": 0.7322, "step": 126630 }, { "epoch": 36.432681242807824, "grad_norm": 1.1063501834869385, "learning_rate": 0.0012713463751438435, "loss": 0.6031, "step": 126640 }, { "epoch": 36.4355581127733, "grad_norm": 1.879227638244629, "learning_rate": 0.0012712888377445339, "loss": 0.7505, "step": 126650 }, { "epoch": 36.438434982738784, "grad_norm": 2.1179964542388916, "learning_rate": 0.0012712313003452244, "loss": 0.66, "step": 126660 }, { "epoch": 36.44131185270426, "grad_norm": 0.9219690561294556, "learning_rate": 0.0012711737629459148, "loss": 0.8049, "step": 126670 }, { "epoch": 36.444188722669736, "grad_norm": 1.4310929775238037, "learning_rate": 0.0012711162255466054, "loss": 0.5734, "step": 126680 }, { "epoch": 36.44706559263521, "grad_norm": 1.4033942222595215, "learning_rate": 0.001271058688147296, "loss": 0.7154, "step": 126690 }, { "epoch": 36.44994246260069, "grad_norm": 2.0250144004821777, "learning_rate": 0.0012710011507479863, "loss": 0.6683, "step": 126700 }, { "epoch": 36.452819332566165, "grad_norm": 0.975035548210144, "learning_rate": 0.0012709436133486766, "loss": 0.4997, "step": 126710 }, { "epoch": 36.45569620253165, "grad_norm": 1.9074701070785522, "learning_rate": 0.001270886075949367, "loss": 0.556, "step": 126720 }, { "epoch": 36.458573072497124, "grad_norm": 1.1450573205947876, "learning_rate": 0.0012708285385500575, "loss": 0.6017, "step": 126730 }, { "epoch": 36.4614499424626, "grad_norm": 0.7386209964752197, "learning_rate": 0.001270771001150748, "loss": 0.7384, "step": 126740 }, { "epoch": 36.46432681242808, "grad_norm": 0.7982977628707886, "learning_rate": 0.0012707134637514384, "loss": 0.6282, "step": 126750 }, { "epoch": 36.46720368239355, "grad_norm": 1.3045258522033691, "learning_rate": 0.001270655926352129, "loss": 0.6219, "step": 126760 }, { "epoch": 36.470080552359036, "grad_norm": 1.6811596155166626, "learning_rate": 0.0012705983889528193, "loss": 0.6672, "step": 126770 }, { "epoch": 36.47295742232451, "grad_norm": 1.8149099349975586, "learning_rate": 0.0012705408515535097, "loss": 0.6495, "step": 126780 }, { "epoch": 36.47583429228999, "grad_norm": 1.027329921722412, "learning_rate": 0.0012704833141542003, "loss": 0.5134, "step": 126790 }, { "epoch": 36.478711162255465, "grad_norm": 2.36164927482605, "learning_rate": 0.0012704257767548908, "loss": 0.7921, "step": 126800 }, { "epoch": 36.48158803222094, "grad_norm": 2.4409990310668945, "learning_rate": 0.0012703682393555812, "loss": 0.72, "step": 126810 }, { "epoch": 36.484464902186424, "grad_norm": 1.325250506401062, "learning_rate": 0.0012703107019562717, "loss": 0.542, "step": 126820 }, { "epoch": 36.4873417721519, "grad_norm": 1.1047439575195312, "learning_rate": 0.001270253164556962, "loss": 0.5819, "step": 126830 }, { "epoch": 36.49021864211738, "grad_norm": 1.3176897764205933, "learning_rate": 0.0012701956271576524, "loss": 0.4883, "step": 126840 }, { "epoch": 36.49309551208285, "grad_norm": 1.8605244159698486, "learning_rate": 0.001270138089758343, "loss": 0.5419, "step": 126850 }, { "epoch": 36.49597238204833, "grad_norm": 1.8240272998809814, "learning_rate": 0.0012700805523590333, "loss": 0.627, "step": 126860 }, { "epoch": 36.49884925201381, "grad_norm": 0.9040969014167786, "learning_rate": 0.001270023014959724, "loss": 0.621, "step": 126870 }, { "epoch": 36.50172612197929, "grad_norm": 2.184452533721924, "learning_rate": 0.0012699654775604145, "loss": 0.6347, "step": 126880 }, { "epoch": 36.504602991944765, "grad_norm": 1.1672760248184204, "learning_rate": 0.0012699079401611046, "loss": 0.5765, "step": 126890 }, { "epoch": 36.50747986191024, "grad_norm": 1.556363821029663, "learning_rate": 0.0012698504027617952, "loss": 0.5346, "step": 126900 }, { "epoch": 36.51035673187572, "grad_norm": 1.0398895740509033, "learning_rate": 0.0012697928653624857, "loss": 0.6673, "step": 126910 }, { "epoch": 36.51323360184119, "grad_norm": 1.0696253776550293, "learning_rate": 0.001269735327963176, "loss": 0.6266, "step": 126920 }, { "epoch": 36.51611047180668, "grad_norm": 1.0411491394042969, "learning_rate": 0.0012696777905638666, "loss": 0.6666, "step": 126930 }, { "epoch": 36.51898734177215, "grad_norm": 1.1682348251342773, "learning_rate": 0.0012696202531645572, "loss": 0.6297, "step": 126940 }, { "epoch": 36.52186421173763, "grad_norm": 1.8775501251220703, "learning_rate": 0.0012695627157652473, "loss": 0.7339, "step": 126950 }, { "epoch": 36.524741081703105, "grad_norm": 1.0264955759048462, "learning_rate": 0.0012695051783659379, "loss": 0.6272, "step": 126960 }, { "epoch": 36.52761795166858, "grad_norm": 1.0813900232315063, "learning_rate": 0.0012694476409666282, "loss": 0.6083, "step": 126970 }, { "epoch": 36.530494821634065, "grad_norm": 0.8165217041969299, "learning_rate": 0.0012693901035673188, "loss": 0.5567, "step": 126980 }, { "epoch": 36.53337169159954, "grad_norm": 1.99833345413208, "learning_rate": 0.0012693325661680094, "loss": 0.5581, "step": 126990 }, { "epoch": 36.53624856156502, "grad_norm": 1.1657344102859497, "learning_rate": 0.0012692750287686997, "loss": 0.6398, "step": 127000 }, { "epoch": 36.53912543153049, "grad_norm": 2.280395269393921, "learning_rate": 0.00126921749136939, "loss": 0.6561, "step": 127010 }, { "epoch": 36.54200230149597, "grad_norm": 1.1086143255233765, "learning_rate": 0.0012691599539700806, "loss": 0.6355, "step": 127020 }, { "epoch": 36.54487917146145, "grad_norm": 0.9574507474899292, "learning_rate": 0.001269102416570771, "loss": 0.6297, "step": 127030 }, { "epoch": 36.54775604142693, "grad_norm": 2.0922813415527344, "learning_rate": 0.0012690448791714615, "loss": 0.6509, "step": 127040 }, { "epoch": 36.550632911392405, "grad_norm": 1.2919601202011108, "learning_rate": 0.001268987341772152, "loss": 0.7622, "step": 127050 }, { "epoch": 36.55350978135788, "grad_norm": 1.9782531261444092, "learning_rate": 0.0012689298043728422, "loss": 0.6316, "step": 127060 }, { "epoch": 36.55638665132336, "grad_norm": 1.142635703086853, "learning_rate": 0.0012688722669735328, "loss": 0.7406, "step": 127070 }, { "epoch": 36.55926352128884, "grad_norm": 0.8783445358276367, "learning_rate": 0.0012688147295742231, "loss": 0.5773, "step": 127080 }, { "epoch": 36.56214039125432, "grad_norm": 1.696244239807129, "learning_rate": 0.0012687571921749137, "loss": 0.7306, "step": 127090 }, { "epoch": 36.56501726121979, "grad_norm": 1.7931251525878906, "learning_rate": 0.0012686996547756043, "loss": 0.8466, "step": 127100 }, { "epoch": 36.56789413118527, "grad_norm": 1.13631010055542, "learning_rate": 0.0012686421173762946, "loss": 0.5073, "step": 127110 }, { "epoch": 36.570771001150746, "grad_norm": 1.0675971508026123, "learning_rate": 0.001268584579976985, "loss": 0.5733, "step": 127120 }, { "epoch": 36.57364787111622, "grad_norm": 0.9454900622367859, "learning_rate": 0.0012685270425776755, "loss": 0.5078, "step": 127130 }, { "epoch": 36.576524741081705, "grad_norm": 1.0112086534500122, "learning_rate": 0.0012684695051783659, "loss": 0.7655, "step": 127140 }, { "epoch": 36.57940161104718, "grad_norm": 0.9128594994544983, "learning_rate": 0.0012684119677790564, "loss": 0.6119, "step": 127150 }, { "epoch": 36.58227848101266, "grad_norm": 1.1648833751678467, "learning_rate": 0.001268354430379747, "loss": 0.5606, "step": 127160 }, { "epoch": 36.585155350978134, "grad_norm": 1.9042155742645264, "learning_rate": 0.0012682968929804373, "loss": 0.593, "step": 127170 }, { "epoch": 36.58803222094361, "grad_norm": 1.1101655960083008, "learning_rate": 0.0012682393555811277, "loss": 0.5694, "step": 127180 }, { "epoch": 36.59090909090909, "grad_norm": 1.2990901470184326, "learning_rate": 0.001268181818181818, "loss": 0.6033, "step": 127190 }, { "epoch": 36.59378596087457, "grad_norm": 1.3757354021072388, "learning_rate": 0.0012681242807825086, "loss": 0.6977, "step": 127200 }, { "epoch": 36.596662830840046, "grad_norm": 0.6497424244880676, "learning_rate": 0.0012680667433831992, "loss": 0.6028, "step": 127210 }, { "epoch": 36.59953970080552, "grad_norm": 1.7281279563903809, "learning_rate": 0.0012680092059838895, "loss": 0.7951, "step": 127220 }, { "epoch": 36.602416570771, "grad_norm": 1.7125178575515747, "learning_rate": 0.00126795166858458, "loss": 0.7167, "step": 127230 }, { "epoch": 36.60529344073648, "grad_norm": 0.8191421031951904, "learning_rate": 0.0012678941311852704, "loss": 0.5608, "step": 127240 }, { "epoch": 36.60817031070196, "grad_norm": 1.6195545196533203, "learning_rate": 0.0012678365937859608, "loss": 0.7214, "step": 127250 }, { "epoch": 36.611047180667434, "grad_norm": 0.9329208135604858, "learning_rate": 0.0012677790563866513, "loss": 0.5834, "step": 127260 }, { "epoch": 36.61392405063291, "grad_norm": 2.0920326709747314, "learning_rate": 0.001267721518987342, "loss": 0.5417, "step": 127270 }, { "epoch": 36.616800920598386, "grad_norm": 0.5506436228752136, "learning_rate": 0.0012676639815880322, "loss": 0.6916, "step": 127280 }, { "epoch": 36.61967779056387, "grad_norm": 1.8843176364898682, "learning_rate": 0.0012676064441887228, "loss": 0.5837, "step": 127290 }, { "epoch": 36.622554660529346, "grad_norm": 1.2502808570861816, "learning_rate": 0.001267548906789413, "loss": 0.8061, "step": 127300 }, { "epoch": 36.62543153049482, "grad_norm": 1.4614540338516235, "learning_rate": 0.0012674913693901035, "loss": 0.6675, "step": 127310 }, { "epoch": 36.6283084004603, "grad_norm": 1.9326924085617065, "learning_rate": 0.001267433831990794, "loss": 0.5362, "step": 127320 }, { "epoch": 36.631185270425775, "grad_norm": 0.8571163415908813, "learning_rate": 0.0012673762945914844, "loss": 0.7267, "step": 127330 }, { "epoch": 36.63406214039125, "grad_norm": 1.4834768772125244, "learning_rate": 0.001267318757192175, "loss": 0.669, "step": 127340 }, { "epoch": 36.636939010356734, "grad_norm": 1.0728676319122314, "learning_rate": 0.0012672612197928655, "loss": 0.5238, "step": 127350 }, { "epoch": 36.63981588032221, "grad_norm": 2.099208354949951, "learning_rate": 0.0012672036823935557, "loss": 0.8094, "step": 127360 }, { "epoch": 36.64269275028769, "grad_norm": 1.3210004568099976, "learning_rate": 0.0012671461449942462, "loss": 0.8849, "step": 127370 }, { "epoch": 36.64556962025316, "grad_norm": 1.4554837942123413, "learning_rate": 0.0012670886075949368, "loss": 0.5496, "step": 127380 }, { "epoch": 36.64844649021864, "grad_norm": 1.382813572883606, "learning_rate": 0.0012670310701956271, "loss": 0.7856, "step": 127390 }, { "epoch": 36.65132336018412, "grad_norm": 0.5839675664901733, "learning_rate": 0.0012669735327963177, "loss": 0.6921, "step": 127400 }, { "epoch": 36.6542002301496, "grad_norm": 1.3241089582443237, "learning_rate": 0.0012669159953970083, "loss": 0.5194, "step": 127410 }, { "epoch": 36.657077100115075, "grad_norm": 1.3641377687454224, "learning_rate": 0.0012668584579976984, "loss": 0.5835, "step": 127420 }, { "epoch": 36.65995397008055, "grad_norm": 2.259657859802246, "learning_rate": 0.001266800920598389, "loss": 0.7828, "step": 127430 }, { "epoch": 36.66283084004603, "grad_norm": 0.638289749622345, "learning_rate": 0.0012667433831990793, "loss": 0.6053, "step": 127440 }, { "epoch": 36.66570771001151, "grad_norm": 0.7276908755302429, "learning_rate": 0.0012666858457997699, "loss": 0.6085, "step": 127450 }, { "epoch": 36.66858457997699, "grad_norm": 0.8443387150764465, "learning_rate": 0.0012666283084004604, "loss": 0.7325, "step": 127460 }, { "epoch": 36.67146144994246, "grad_norm": 1.3691662549972534, "learning_rate": 0.0012665707710011508, "loss": 0.6511, "step": 127470 }, { "epoch": 36.67433831990794, "grad_norm": 1.1603411436080933, "learning_rate": 0.0012665132336018411, "loss": 0.5977, "step": 127480 }, { "epoch": 36.677215189873415, "grad_norm": 1.3308758735656738, "learning_rate": 0.0012664556962025317, "loss": 0.465, "step": 127490 }, { "epoch": 36.6800920598389, "grad_norm": 1.8382554054260254, "learning_rate": 0.001266398158803222, "loss": 0.637, "step": 127500 }, { "epoch": 36.682968929804375, "grad_norm": 1.087557315826416, "learning_rate": 0.0012663406214039126, "loss": 0.6904, "step": 127510 }, { "epoch": 36.68584579976985, "grad_norm": 3.9717183113098145, "learning_rate": 0.0012662830840046032, "loss": 0.7925, "step": 127520 }, { "epoch": 36.68872266973533, "grad_norm": 1.0185438394546509, "learning_rate": 0.0012662255466052935, "loss": 0.6295, "step": 127530 }, { "epoch": 36.6915995397008, "grad_norm": 1.0864461660385132, "learning_rate": 0.0012661680092059839, "loss": 0.53, "step": 127540 }, { "epoch": 36.69447640966629, "grad_norm": 1.3081015348434448, "learning_rate": 0.0012661104718066742, "loss": 0.5151, "step": 127550 }, { "epoch": 36.69735327963176, "grad_norm": 1.2692400217056274, "learning_rate": 0.0012660529344073648, "loss": 0.7167, "step": 127560 }, { "epoch": 36.70023014959724, "grad_norm": 2.3792831897735596, "learning_rate": 0.0012659953970080553, "loss": 0.6504, "step": 127570 }, { "epoch": 36.703107019562715, "grad_norm": 1.7127224206924438, "learning_rate": 0.0012659378596087457, "loss": 0.7337, "step": 127580 }, { "epoch": 36.70598388952819, "grad_norm": 1.0763624906539917, "learning_rate": 0.0012658803222094363, "loss": 0.6623, "step": 127590 }, { "epoch": 36.70886075949367, "grad_norm": 0.8359348177909851, "learning_rate": 0.0012658227848101266, "loss": 0.5516, "step": 127600 }, { "epoch": 36.71173762945915, "grad_norm": 1.1462846994400024, "learning_rate": 0.001265765247410817, "loss": 0.6318, "step": 127610 }, { "epoch": 36.71461449942463, "grad_norm": 0.7598323822021484, "learning_rate": 0.0012657077100115075, "loss": 0.5564, "step": 127620 }, { "epoch": 36.7174913693901, "grad_norm": 1.5113729238510132, "learning_rate": 0.001265650172612198, "loss": 0.5608, "step": 127630 }, { "epoch": 36.72036823935558, "grad_norm": 0.907637894153595, "learning_rate": 0.0012655926352128884, "loss": 0.7083, "step": 127640 }, { "epoch": 36.723245109321056, "grad_norm": 0.6223429441452026, "learning_rate": 0.001265535097813579, "loss": 0.5143, "step": 127650 }, { "epoch": 36.72612197928654, "grad_norm": 0.9525299072265625, "learning_rate": 0.0012654775604142691, "loss": 0.5673, "step": 127660 }, { "epoch": 36.728998849252015, "grad_norm": 0.6829152703285217, "learning_rate": 0.0012654200230149597, "loss": 0.7265, "step": 127670 }, { "epoch": 36.73187571921749, "grad_norm": 1.0926063060760498, "learning_rate": 0.0012653624856156502, "loss": 0.6399, "step": 127680 }, { "epoch": 36.73475258918297, "grad_norm": 1.4691461324691772, "learning_rate": 0.0012653049482163406, "loss": 0.7174, "step": 127690 }, { "epoch": 36.737629459148444, "grad_norm": 2.0420382022857666, "learning_rate": 0.0012652474108170312, "loss": 0.7015, "step": 127700 }, { "epoch": 36.74050632911393, "grad_norm": 0.7931090593338013, "learning_rate": 0.0012651898734177217, "loss": 0.6312, "step": 127710 }, { "epoch": 36.7433831990794, "grad_norm": 1.2702046632766724, "learning_rate": 0.0012651323360184119, "loss": 0.8356, "step": 127720 }, { "epoch": 36.74626006904488, "grad_norm": 2.1431467533111572, "learning_rate": 0.0012650747986191024, "loss": 0.7194, "step": 127730 }, { "epoch": 36.749136939010356, "grad_norm": 2.0835018157958984, "learning_rate": 0.001265017261219793, "loss": 0.5734, "step": 127740 }, { "epoch": 36.75201380897583, "grad_norm": 1.0369479656219482, "learning_rate": 0.0012649597238204833, "loss": 0.6652, "step": 127750 }, { "epoch": 36.754890678941315, "grad_norm": 2.073699951171875, "learning_rate": 0.001264902186421174, "loss": 0.6875, "step": 127760 }, { "epoch": 36.75776754890679, "grad_norm": 1.0324898958206177, "learning_rate": 0.0012648446490218642, "loss": 0.6918, "step": 127770 }, { "epoch": 36.76064441887227, "grad_norm": 1.1846942901611328, "learning_rate": 0.0012647871116225546, "loss": 0.7768, "step": 127780 }, { "epoch": 36.763521288837744, "grad_norm": 1.9322503805160522, "learning_rate": 0.0012647295742232452, "loss": 0.6262, "step": 127790 }, { "epoch": 36.76639815880322, "grad_norm": 1.1405991315841675, "learning_rate": 0.0012646720368239355, "loss": 0.6213, "step": 127800 }, { "epoch": 36.769275028768696, "grad_norm": 0.742459237575531, "learning_rate": 0.001264614499424626, "loss": 0.5743, "step": 127810 }, { "epoch": 36.77215189873418, "grad_norm": 2.4619741439819336, "learning_rate": 0.0012645569620253166, "loss": 0.7258, "step": 127820 }, { "epoch": 36.775028768699656, "grad_norm": 1.2507786750793457, "learning_rate": 0.001264499424626007, "loss": 0.5461, "step": 127830 }, { "epoch": 36.77790563866513, "grad_norm": 1.2619808912277222, "learning_rate": 0.0012644418872266973, "loss": 0.5804, "step": 127840 }, { "epoch": 36.78078250863061, "grad_norm": 1.4777978658676147, "learning_rate": 0.0012643843498273879, "loss": 0.6603, "step": 127850 }, { "epoch": 36.783659378596084, "grad_norm": 2.0277342796325684, "learning_rate": 0.0012643268124280782, "loss": 0.7324, "step": 127860 }, { "epoch": 36.78653624856157, "grad_norm": 1.0836637020111084, "learning_rate": 0.0012642692750287688, "loss": 0.6468, "step": 127870 }, { "epoch": 36.789413118527044, "grad_norm": 1.3189220428466797, "learning_rate": 0.0012642117376294591, "loss": 0.7329, "step": 127880 }, { "epoch": 36.79228998849252, "grad_norm": 0.8206143975257874, "learning_rate": 0.0012641542002301495, "loss": 0.6142, "step": 127890 }, { "epoch": 36.795166858457996, "grad_norm": 1.575449824333191, "learning_rate": 0.00126409666283084, "loss": 0.6284, "step": 127900 }, { "epoch": 36.79804372842347, "grad_norm": 1.5172853469848633, "learning_rate": 0.0012640391254315304, "loss": 0.6183, "step": 127910 }, { "epoch": 36.800920598388956, "grad_norm": 1.142655611038208, "learning_rate": 0.001263981588032221, "loss": 0.6369, "step": 127920 }, { "epoch": 36.80379746835443, "grad_norm": 1.6250145435333252, "learning_rate": 0.0012639240506329115, "loss": 0.8863, "step": 127930 }, { "epoch": 36.80667433831991, "grad_norm": 1.848667860031128, "learning_rate": 0.0012638665132336019, "loss": 0.7807, "step": 127940 }, { "epoch": 36.809551208285384, "grad_norm": 1.2651079893112183, "learning_rate": 0.0012638089758342922, "loss": 0.7016, "step": 127950 }, { "epoch": 36.81242807825086, "grad_norm": 1.459818720817566, "learning_rate": 0.0012637514384349828, "loss": 0.6648, "step": 127960 }, { "epoch": 36.815304948216344, "grad_norm": 1.1152372360229492, "learning_rate": 0.0012636939010356731, "loss": 0.5803, "step": 127970 }, { "epoch": 36.81818181818182, "grad_norm": 1.421373724937439, "learning_rate": 0.0012636363636363637, "loss": 0.7165, "step": 127980 }, { "epoch": 36.821058688147296, "grad_norm": 2.4324216842651367, "learning_rate": 0.001263578826237054, "loss": 0.6004, "step": 127990 }, { "epoch": 36.82393555811277, "grad_norm": 2.5857136249542236, "learning_rate": 0.0012635212888377446, "loss": 0.6328, "step": 128000 }, { "epoch": 36.82681242807825, "grad_norm": 1.4060817956924438, "learning_rate": 0.001263463751438435, "loss": 0.6444, "step": 128010 }, { "epoch": 36.829689298043725, "grad_norm": 0.5088091492652893, "learning_rate": 0.0012634062140391253, "loss": 0.7768, "step": 128020 }, { "epoch": 36.83256616800921, "grad_norm": 1.9520972967147827, "learning_rate": 0.0012633486766398159, "loss": 0.8209, "step": 128030 }, { "epoch": 36.835443037974684, "grad_norm": 1.5712286233901978, "learning_rate": 0.0012632911392405064, "loss": 0.558, "step": 128040 }, { "epoch": 36.83831990794016, "grad_norm": 0.9078832864761353, "learning_rate": 0.0012632336018411968, "loss": 0.722, "step": 128050 }, { "epoch": 36.84119677790564, "grad_norm": 1.1536072492599487, "learning_rate": 0.0012631760644418873, "loss": 0.5707, "step": 128060 }, { "epoch": 36.84407364787111, "grad_norm": 1.667487621307373, "learning_rate": 0.0012631185270425777, "loss": 0.6864, "step": 128070 }, { "epoch": 36.846950517836596, "grad_norm": 1.3547097444534302, "learning_rate": 0.001263060989643268, "loss": 0.6348, "step": 128080 }, { "epoch": 36.84982738780207, "grad_norm": 0.8408635258674622, "learning_rate": 0.0012630034522439586, "loss": 0.7426, "step": 128090 }, { "epoch": 36.85270425776755, "grad_norm": 1.310015320777893, "learning_rate": 0.0012629459148446492, "loss": 0.566, "step": 128100 }, { "epoch": 36.855581127733025, "grad_norm": 1.0369030237197876, "learning_rate": 0.0012628883774453395, "loss": 0.5109, "step": 128110 }, { "epoch": 36.8584579976985, "grad_norm": 1.9929766654968262, "learning_rate": 0.00126283084004603, "loss": 0.6426, "step": 128120 }, { "epoch": 36.861334867663984, "grad_norm": 2.061868906021118, "learning_rate": 0.0012627733026467202, "loss": 0.6978, "step": 128130 }, { "epoch": 36.86421173762946, "grad_norm": 1.0895980596542358, "learning_rate": 0.0012627157652474108, "loss": 0.579, "step": 128140 }, { "epoch": 36.86708860759494, "grad_norm": 1.092370867729187, "learning_rate": 0.0012626582278481013, "loss": 0.6251, "step": 128150 }, { "epoch": 36.86996547756041, "grad_norm": 0.7343124151229858, "learning_rate": 0.0012626006904487917, "loss": 0.4697, "step": 128160 }, { "epoch": 36.87284234752589, "grad_norm": 1.5512491464614868, "learning_rate": 0.0012625431530494822, "loss": 0.6506, "step": 128170 }, { "epoch": 36.87571921749137, "grad_norm": 1.2621349096298218, "learning_rate": 0.0012624856156501728, "loss": 0.6212, "step": 128180 }, { "epoch": 36.87859608745685, "grad_norm": 0.9078325629234314, "learning_rate": 0.001262428078250863, "loss": 0.5987, "step": 128190 }, { "epoch": 36.881472957422325, "grad_norm": 1.1625697612762451, "learning_rate": 0.0012623705408515535, "loss": 0.7853, "step": 128200 }, { "epoch": 36.8843498273878, "grad_norm": 1.526418685913086, "learning_rate": 0.001262313003452244, "loss": 0.6402, "step": 128210 }, { "epoch": 36.88722669735328, "grad_norm": 0.8077722191810608, "learning_rate": 0.0012622554660529344, "loss": 0.8377, "step": 128220 }, { "epoch": 36.89010356731876, "grad_norm": 1.464866280555725, "learning_rate": 0.001262197928653625, "loss": 0.6367, "step": 128230 }, { "epoch": 36.89298043728424, "grad_norm": 1.371598243713379, "learning_rate": 0.0012621403912543153, "loss": 0.6111, "step": 128240 }, { "epoch": 36.89585730724971, "grad_norm": 2.007202386856079, "learning_rate": 0.0012620828538550057, "loss": 0.7331, "step": 128250 }, { "epoch": 36.89873417721519, "grad_norm": 1.3994864225387573, "learning_rate": 0.0012620253164556962, "loss": 0.5183, "step": 128260 }, { "epoch": 36.901611047180666, "grad_norm": 1.2459489107131958, "learning_rate": 0.0012619677790563866, "loss": 0.5612, "step": 128270 }, { "epoch": 36.90448791714614, "grad_norm": 0.8164500594139099, "learning_rate": 0.0012619102416570771, "loss": 0.6517, "step": 128280 }, { "epoch": 36.907364787111625, "grad_norm": 1.0715750455856323, "learning_rate": 0.0012618527042577677, "loss": 0.6287, "step": 128290 }, { "epoch": 36.9102416570771, "grad_norm": 2.1415531635284424, "learning_rate": 0.001261795166858458, "loss": 0.8571, "step": 128300 }, { "epoch": 36.91311852704258, "grad_norm": 1.1168067455291748, "learning_rate": 0.0012617376294591484, "loss": 0.4636, "step": 128310 }, { "epoch": 36.915995397008054, "grad_norm": 1.0834888219833374, "learning_rate": 0.001261680092059839, "loss": 0.6356, "step": 128320 }, { "epoch": 36.91887226697353, "grad_norm": 0.9716507792472839, "learning_rate": 0.0012616225546605293, "loss": 0.4918, "step": 128330 }, { "epoch": 36.92174913693901, "grad_norm": 0.9352133870124817, "learning_rate": 0.0012615650172612199, "loss": 0.7478, "step": 128340 }, { "epoch": 36.92462600690449, "grad_norm": 2.169790267944336, "learning_rate": 0.0012615074798619102, "loss": 0.6631, "step": 128350 }, { "epoch": 36.927502876869966, "grad_norm": 1.1130183935165405, "learning_rate": 0.0012614499424626008, "loss": 0.726, "step": 128360 }, { "epoch": 36.93037974683544, "grad_norm": 0.9316884875297546, "learning_rate": 0.0012613924050632911, "loss": 0.7816, "step": 128370 }, { "epoch": 36.93325661680092, "grad_norm": 1.0796068906784058, "learning_rate": 0.0012613348676639815, "loss": 0.5823, "step": 128380 }, { "epoch": 36.9361334867664, "grad_norm": 0.7896091938018799, "learning_rate": 0.001261277330264672, "loss": 0.7062, "step": 128390 }, { "epoch": 36.93901035673188, "grad_norm": 1.066789150238037, "learning_rate": 0.0012612197928653626, "loss": 0.6492, "step": 128400 }, { "epoch": 36.941887226697354, "grad_norm": 1.6486248970031738, "learning_rate": 0.001261162255466053, "loss": 0.6957, "step": 128410 }, { "epoch": 36.94476409666283, "grad_norm": 0.9558826088905334, "learning_rate": 0.0012611047180667435, "loss": 0.5028, "step": 128420 }, { "epoch": 36.947640966628306, "grad_norm": 1.67715322971344, "learning_rate": 0.0012610471806674339, "loss": 0.6913, "step": 128430 }, { "epoch": 36.95051783659379, "grad_norm": 1.2316062450408936, "learning_rate": 0.0012609896432681242, "loss": 0.562, "step": 128440 }, { "epoch": 36.953394706559266, "grad_norm": 0.8590080738067627, "learning_rate": 0.0012609321058688148, "loss": 0.7457, "step": 128450 }, { "epoch": 36.95627157652474, "grad_norm": 1.3040884733200073, "learning_rate": 0.0012608745684695051, "loss": 0.59, "step": 128460 }, { "epoch": 36.95914844649022, "grad_norm": 1.6328054666519165, "learning_rate": 0.0012608170310701957, "loss": 0.7024, "step": 128470 }, { "epoch": 36.962025316455694, "grad_norm": 1.488634467124939, "learning_rate": 0.0012607594936708863, "loss": 0.6317, "step": 128480 }, { "epoch": 36.96490218642117, "grad_norm": 1.5075939893722534, "learning_rate": 0.0012607019562715764, "loss": 0.647, "step": 128490 }, { "epoch": 36.967779056386654, "grad_norm": 1.393699288368225, "learning_rate": 0.001260644418872267, "loss": 0.6995, "step": 128500 }, { "epoch": 36.97065592635213, "grad_norm": 1.1738756895065308, "learning_rate": 0.0012605868814729575, "loss": 0.5707, "step": 128510 }, { "epoch": 36.973532796317606, "grad_norm": 1.1338598728179932, "learning_rate": 0.0012605293440736479, "loss": 0.7275, "step": 128520 }, { "epoch": 36.97640966628308, "grad_norm": 1.011391282081604, "learning_rate": 0.0012604718066743384, "loss": 0.6299, "step": 128530 }, { "epoch": 36.97928653624856, "grad_norm": 0.9581730961799622, "learning_rate": 0.001260414269275029, "loss": 0.6197, "step": 128540 }, { "epoch": 36.98216340621404, "grad_norm": 1.2789883613586426, "learning_rate": 0.0012603567318757191, "loss": 0.6661, "step": 128550 }, { "epoch": 36.98504027617952, "grad_norm": 1.387355089187622, "learning_rate": 0.0012602991944764097, "loss": 0.5511, "step": 128560 }, { "epoch": 36.987917146144994, "grad_norm": 1.1801562309265137, "learning_rate": 0.0012602416570771, "loss": 0.7617, "step": 128570 }, { "epoch": 36.99079401611047, "grad_norm": 1.520608901977539, "learning_rate": 0.0012601841196777906, "loss": 0.7674, "step": 128580 }, { "epoch": 36.99367088607595, "grad_norm": 0.6949547529220581, "learning_rate": 0.0012601265822784812, "loss": 0.6686, "step": 128590 }, { "epoch": 36.99654775604143, "grad_norm": 1.417223572731018, "learning_rate": 0.0012600690448791715, "loss": 0.6012, "step": 128600 }, { "epoch": 36.999424626006906, "grad_norm": 0.9081017374992371, "learning_rate": 0.0012600115074798619, "loss": 0.5894, "step": 128610 }, { "epoch": 37.00230149597238, "grad_norm": 0.5636569857597351, "learning_rate": 0.0012599539700805524, "loss": 0.4193, "step": 128620 }, { "epoch": 37.00517836593786, "grad_norm": 1.5236592292785645, "learning_rate": 0.0012598964326812428, "loss": 0.5968, "step": 128630 }, { "epoch": 37.008055235903335, "grad_norm": 1.6343876123428345, "learning_rate": 0.0012598388952819333, "loss": 0.6407, "step": 128640 }, { "epoch": 37.01093210586882, "grad_norm": 1.850114107131958, "learning_rate": 0.001259781357882624, "loss": 0.5335, "step": 128650 }, { "epoch": 37.013808975834294, "grad_norm": 1.4339557886123657, "learning_rate": 0.0012597238204833142, "loss": 0.7547, "step": 128660 }, { "epoch": 37.01668584579977, "grad_norm": 2.155024528503418, "learning_rate": 0.0012596662830840046, "loss": 0.4959, "step": 128670 }, { "epoch": 37.01956271576525, "grad_norm": 1.2518736124038696, "learning_rate": 0.001259608745684695, "loss": 0.4369, "step": 128680 }, { "epoch": 37.02243958573072, "grad_norm": 1.4253766536712646, "learning_rate": 0.0012595512082853855, "loss": 0.4281, "step": 128690 }, { "epoch": 37.0253164556962, "grad_norm": 2.1200428009033203, "learning_rate": 0.001259493670886076, "loss": 0.6846, "step": 128700 }, { "epoch": 37.02819332566168, "grad_norm": 2.2408268451690674, "learning_rate": 0.0012594361334867664, "loss": 0.6865, "step": 128710 }, { "epoch": 37.03107019562716, "grad_norm": 0.49352186918258667, "learning_rate": 0.0012593785960874568, "loss": 0.4889, "step": 128720 }, { "epoch": 37.033947065592635, "grad_norm": 1.2610218524932861, "learning_rate": 0.0012593210586881473, "loss": 0.4771, "step": 128730 }, { "epoch": 37.03682393555811, "grad_norm": 1.4603346586227417, "learning_rate": 0.0012592635212888377, "loss": 0.6772, "step": 128740 }, { "epoch": 37.03970080552359, "grad_norm": 1.5577552318572998, "learning_rate": 0.0012592059838895282, "loss": 0.6015, "step": 128750 }, { "epoch": 37.04257767548907, "grad_norm": 1.174912691116333, "learning_rate": 0.0012591484464902188, "loss": 0.5189, "step": 128760 }, { "epoch": 37.04545454545455, "grad_norm": 1.579185962677002, "learning_rate": 0.0012590909090909091, "loss": 0.6088, "step": 128770 }, { "epoch": 37.04833141542002, "grad_norm": 2.2996666431427, "learning_rate": 0.0012590333716915995, "loss": 0.7941, "step": 128780 }, { "epoch": 37.0512082853855, "grad_norm": 0.6721940636634827, "learning_rate": 0.00125897583429229, "loss": 0.6096, "step": 128790 }, { "epoch": 37.054085155350975, "grad_norm": 1.683647632598877, "learning_rate": 0.0012589182968929804, "loss": 0.5708, "step": 128800 }, { "epoch": 37.05696202531646, "grad_norm": 0.8166724443435669, "learning_rate": 0.001258860759493671, "loss": 0.5345, "step": 128810 }, { "epoch": 37.059838895281935, "grad_norm": 1.412353754043579, "learning_rate": 0.0012588032220943613, "loss": 0.6501, "step": 128820 }, { "epoch": 37.06271576524741, "grad_norm": 1.5825763940811157, "learning_rate": 0.0012587456846950519, "loss": 0.6028, "step": 128830 }, { "epoch": 37.06559263521289, "grad_norm": 1.3585841655731201, "learning_rate": 0.0012586881472957422, "loss": 0.5193, "step": 128840 }, { "epoch": 37.06846950517836, "grad_norm": 1.1533737182617188, "learning_rate": 0.0012586306098964326, "loss": 0.5688, "step": 128850 }, { "epoch": 37.07134637514385, "grad_norm": 1.040914535522461, "learning_rate": 0.0012585730724971231, "loss": 0.5908, "step": 128860 }, { "epoch": 37.07422324510932, "grad_norm": 1.6286251544952393, "learning_rate": 0.0012585155350978137, "loss": 0.6065, "step": 128870 }, { "epoch": 37.0771001150748, "grad_norm": 2.1478559970855713, "learning_rate": 0.001258457997698504, "loss": 0.5651, "step": 128880 }, { "epoch": 37.079976985040275, "grad_norm": 0.9147481322288513, "learning_rate": 0.0012584004602991946, "loss": 0.4933, "step": 128890 }, { "epoch": 37.08285385500575, "grad_norm": 1.1540318727493286, "learning_rate": 0.001258342922899885, "loss": 0.6694, "step": 128900 }, { "epoch": 37.08573072497123, "grad_norm": 1.1260430812835693, "learning_rate": 0.0012582853855005753, "loss": 0.768, "step": 128910 }, { "epoch": 37.08860759493671, "grad_norm": 0.9582189917564392, "learning_rate": 0.0012582278481012659, "loss": 0.7658, "step": 128920 }, { "epoch": 37.09148446490219, "grad_norm": 1.3428642749786377, "learning_rate": 0.0012581703107019562, "loss": 0.5801, "step": 128930 }, { "epoch": 37.09436133486766, "grad_norm": 0.8901224136352539, "learning_rate": 0.0012581127733026468, "loss": 0.5786, "step": 128940 }, { "epoch": 37.09723820483314, "grad_norm": 1.1040058135986328, "learning_rate": 0.0012580552359033373, "loss": 0.6237, "step": 128950 }, { "epoch": 37.100115074798616, "grad_norm": 1.9250742197036743, "learning_rate": 0.0012579976985040275, "loss": 0.5532, "step": 128960 }, { "epoch": 37.1029919447641, "grad_norm": 0.9793703556060791, "learning_rate": 0.001257940161104718, "loss": 0.5727, "step": 128970 }, { "epoch": 37.105868814729575, "grad_norm": 1.2964624166488647, "learning_rate": 0.0012578826237054086, "loss": 0.6777, "step": 128980 }, { "epoch": 37.10874568469505, "grad_norm": 0.7439241409301758, "learning_rate": 0.001257825086306099, "loss": 0.534, "step": 128990 }, { "epoch": 37.11162255466053, "grad_norm": 0.9059264659881592, "learning_rate": 0.0012577675489067895, "loss": 0.4898, "step": 129000 }, { "epoch": 37.114499424626004, "grad_norm": 1.2150644063949585, "learning_rate": 0.00125771001150748, "loss": 0.5232, "step": 129010 }, { "epoch": 37.11737629459149, "grad_norm": 1.044272541999817, "learning_rate": 0.0012576524741081702, "loss": 0.5522, "step": 129020 }, { "epoch": 37.120253164556964, "grad_norm": 1.6632506847381592, "learning_rate": 0.0012575949367088608, "loss": 0.5954, "step": 129030 }, { "epoch": 37.12313003452244, "grad_norm": 0.9249413013458252, "learning_rate": 0.0012575373993095511, "loss": 0.5902, "step": 129040 }, { "epoch": 37.126006904487916, "grad_norm": 2.1340839862823486, "learning_rate": 0.0012574798619102417, "loss": 0.6211, "step": 129050 }, { "epoch": 37.12888377445339, "grad_norm": 0.7289341688156128, "learning_rate": 0.0012574223245109322, "loss": 0.5287, "step": 129060 }, { "epoch": 37.131760644418875, "grad_norm": 0.6588833332061768, "learning_rate": 0.0012573647871116226, "loss": 0.5161, "step": 129070 }, { "epoch": 37.13463751438435, "grad_norm": 1.5028444528579712, "learning_rate": 0.001257307249712313, "loss": 0.5731, "step": 129080 }, { "epoch": 37.13751438434983, "grad_norm": 1.5127208232879639, "learning_rate": 0.0012572497123130035, "loss": 0.7008, "step": 129090 }, { "epoch": 37.140391254315304, "grad_norm": 1.0513650178909302, "learning_rate": 0.0012571921749136938, "loss": 0.5541, "step": 129100 }, { "epoch": 37.14326812428078, "grad_norm": 0.5772268176078796, "learning_rate": 0.0012571346375143844, "loss": 0.6169, "step": 129110 }, { "epoch": 37.146144994246264, "grad_norm": 0.894356369972229, "learning_rate": 0.001257077100115075, "loss": 0.6044, "step": 129120 }, { "epoch": 37.14902186421174, "grad_norm": 2.1823418140411377, "learning_rate": 0.0012570195627157653, "loss": 0.6484, "step": 129130 }, { "epoch": 37.151898734177216, "grad_norm": 2.097323179244995, "learning_rate": 0.0012569620253164557, "loss": 0.674, "step": 129140 }, { "epoch": 37.15477560414269, "grad_norm": 1.7067549228668213, "learning_rate": 0.001256904487917146, "loss": 0.5411, "step": 129150 }, { "epoch": 37.15765247410817, "grad_norm": 1.7653610706329346, "learning_rate": 0.0012568469505178366, "loss": 0.6615, "step": 129160 }, { "epoch": 37.160529344073645, "grad_norm": 1.728064775466919, "learning_rate": 0.0012567894131185271, "loss": 0.5672, "step": 129170 }, { "epoch": 37.16340621403913, "grad_norm": 1.442626953125, "learning_rate": 0.0012567318757192175, "loss": 0.7642, "step": 129180 }, { "epoch": 37.166283084004604, "grad_norm": 1.130104422569275, "learning_rate": 0.001256674338319908, "loss": 0.5938, "step": 129190 }, { "epoch": 37.16915995397008, "grad_norm": 1.2775453329086304, "learning_rate": 0.0012566168009205984, "loss": 0.674, "step": 129200 }, { "epoch": 37.17203682393556, "grad_norm": 0.8733412623405457, "learning_rate": 0.0012565592635212887, "loss": 0.5182, "step": 129210 }, { "epoch": 37.17491369390103, "grad_norm": 0.8426233530044556, "learning_rate": 0.0012565017261219793, "loss": 0.6149, "step": 129220 }, { "epoch": 37.177790563866516, "grad_norm": 1.173690676689148, "learning_rate": 0.0012564441887226699, "loss": 0.583, "step": 129230 }, { "epoch": 37.18066743383199, "grad_norm": 2.5807204246520996, "learning_rate": 0.0012563866513233602, "loss": 0.7134, "step": 129240 }, { "epoch": 37.18354430379747, "grad_norm": 1.8250676393508911, "learning_rate": 0.0012563291139240508, "loss": 0.7139, "step": 129250 }, { "epoch": 37.186421173762945, "grad_norm": 1.316196322441101, "learning_rate": 0.001256271576524741, "loss": 0.7967, "step": 129260 }, { "epoch": 37.18929804372842, "grad_norm": 1.259824275970459, "learning_rate": 0.0012562140391254315, "loss": 0.6199, "step": 129270 }, { "epoch": 37.192174913693904, "grad_norm": 1.7482601404190063, "learning_rate": 0.001256156501726122, "loss": 0.6044, "step": 129280 }, { "epoch": 37.19505178365938, "grad_norm": 0.8920528888702393, "learning_rate": 0.0012560989643268124, "loss": 0.5987, "step": 129290 }, { "epoch": 37.19792865362486, "grad_norm": 1.7574714422225952, "learning_rate": 0.001256041426927503, "loss": 0.5607, "step": 129300 }, { "epoch": 37.20080552359033, "grad_norm": 1.920295000076294, "learning_rate": 0.0012559838895281935, "loss": 0.5657, "step": 129310 }, { "epoch": 37.20368239355581, "grad_norm": 1.175888180732727, "learning_rate": 0.0012559263521288837, "loss": 0.5935, "step": 129320 }, { "epoch": 37.20655926352129, "grad_norm": 1.0194436311721802, "learning_rate": 0.0012558688147295742, "loss": 0.6501, "step": 129330 }, { "epoch": 37.20943613348677, "grad_norm": 1.2678930759429932, "learning_rate": 0.0012558112773302648, "loss": 0.8436, "step": 129340 }, { "epoch": 37.212313003452245, "grad_norm": 1.1096349954605103, "learning_rate": 0.0012557537399309551, "loss": 0.4259, "step": 129350 }, { "epoch": 37.21518987341772, "grad_norm": 2.125455856323242, "learning_rate": 0.0012556962025316457, "loss": 0.6938, "step": 129360 }, { "epoch": 37.2180667433832, "grad_norm": 1.078570008277893, "learning_rate": 0.0012556386651323363, "loss": 0.5735, "step": 129370 }, { "epoch": 37.22094361334867, "grad_norm": 1.3264158964157104, "learning_rate": 0.0012555811277330264, "loss": 0.5235, "step": 129380 }, { "epoch": 37.22382048331416, "grad_norm": 1.503660798072815, "learning_rate": 0.001255523590333717, "loss": 0.6254, "step": 129390 }, { "epoch": 37.22669735327963, "grad_norm": 1.2263163328170776, "learning_rate": 0.0012554660529344073, "loss": 0.6316, "step": 129400 }, { "epoch": 37.22957422324511, "grad_norm": 1.466821312904358, "learning_rate": 0.0012554085155350979, "loss": 0.5977, "step": 129410 }, { "epoch": 37.232451093210585, "grad_norm": 1.9031715393066406, "learning_rate": 0.0012553509781357884, "loss": 0.6684, "step": 129420 }, { "epoch": 37.23532796317606, "grad_norm": 1.0425453186035156, "learning_rate": 0.0012552934407364788, "loss": 0.5404, "step": 129430 }, { "epoch": 37.238204833141545, "grad_norm": 1.1203984022140503, "learning_rate": 0.0012552359033371691, "loss": 0.5842, "step": 129440 }, { "epoch": 37.24108170310702, "grad_norm": 1.93630850315094, "learning_rate": 0.0012551783659378597, "loss": 0.6566, "step": 129450 }, { "epoch": 37.2439585730725, "grad_norm": 1.0481162071228027, "learning_rate": 0.00125512082853855, "loss": 0.5681, "step": 129460 }, { "epoch": 37.24683544303797, "grad_norm": 1.54935884475708, "learning_rate": 0.0012550632911392406, "loss": 0.7029, "step": 129470 }, { "epoch": 37.24971231300345, "grad_norm": 1.1037907600402832, "learning_rate": 0.0012550057537399312, "loss": 0.5113, "step": 129480 }, { "epoch": 37.25258918296893, "grad_norm": 2.1682941913604736, "learning_rate": 0.0012549482163406215, "loss": 0.7138, "step": 129490 }, { "epoch": 37.25546605293441, "grad_norm": 1.4087539911270142, "learning_rate": 0.0012548906789413119, "loss": 0.5851, "step": 129500 }, { "epoch": 37.258342922899885, "grad_norm": 2.506477117538452, "learning_rate": 0.0012548331415420022, "loss": 0.5804, "step": 129510 }, { "epoch": 37.26121979286536, "grad_norm": 1.8241032361984253, "learning_rate": 0.0012547756041426928, "loss": 0.6441, "step": 129520 }, { "epoch": 37.26409666283084, "grad_norm": 1.967148780822754, "learning_rate": 0.0012547180667433833, "loss": 0.5779, "step": 129530 }, { "epoch": 37.26697353279632, "grad_norm": 2.077712059020996, "learning_rate": 0.0012546605293440737, "loss": 0.8579, "step": 129540 }, { "epoch": 37.2698504027618, "grad_norm": 0.8679627180099487, "learning_rate": 0.001254602991944764, "loss": 0.4959, "step": 129550 }, { "epoch": 37.27272727272727, "grad_norm": 2.1620638370513916, "learning_rate": 0.0012545454545454546, "loss": 0.6557, "step": 129560 }, { "epoch": 37.27560414269275, "grad_norm": 1.660902976989746, "learning_rate": 0.001254487917146145, "loss": 0.536, "step": 129570 }, { "epoch": 37.278481012658226, "grad_norm": 1.7735639810562134, "learning_rate": 0.0012544303797468355, "loss": 0.5649, "step": 129580 }, { "epoch": 37.2813578826237, "grad_norm": 2.4174160957336426, "learning_rate": 0.001254372842347526, "loss": 0.5678, "step": 129590 }, { "epoch": 37.284234752589185, "grad_norm": 0.8283683657646179, "learning_rate": 0.0012543153049482164, "loss": 0.6104, "step": 129600 }, { "epoch": 37.28711162255466, "grad_norm": 1.181068778038025, "learning_rate": 0.0012542577675489068, "loss": 0.6263, "step": 129610 }, { "epoch": 37.28998849252014, "grad_norm": 1.6851005554199219, "learning_rate": 0.001254200230149597, "loss": 0.6613, "step": 129620 }, { "epoch": 37.292865362485614, "grad_norm": 1.469440221786499, "learning_rate": 0.0012541426927502877, "loss": 0.4611, "step": 129630 }, { "epoch": 37.29574223245109, "grad_norm": 0.804894208908081, "learning_rate": 0.0012540851553509782, "loss": 0.6013, "step": 129640 }, { "epoch": 37.29861910241657, "grad_norm": 1.0811591148376465, "learning_rate": 0.0012540276179516686, "loss": 0.5518, "step": 129650 }, { "epoch": 37.30149597238205, "grad_norm": 1.389285922050476, "learning_rate": 0.0012539700805523591, "loss": 0.7134, "step": 129660 }, { "epoch": 37.304372842347526, "grad_norm": 1.399716854095459, "learning_rate": 0.0012539125431530495, "loss": 0.5604, "step": 129670 }, { "epoch": 37.307249712313, "grad_norm": 1.235508918762207, "learning_rate": 0.0012538550057537398, "loss": 0.6529, "step": 129680 }, { "epoch": 37.31012658227848, "grad_norm": 1.1467458009719849, "learning_rate": 0.0012537974683544304, "loss": 0.6261, "step": 129690 }, { "epoch": 37.31300345224396, "grad_norm": 1.143766164779663, "learning_rate": 0.001253739930955121, "loss": 0.5315, "step": 129700 }, { "epoch": 37.31588032220944, "grad_norm": 1.1925886869430542, "learning_rate": 0.0012536823935558113, "loss": 0.609, "step": 129710 }, { "epoch": 37.318757192174914, "grad_norm": 2.035757303237915, "learning_rate": 0.0012536248561565019, "loss": 0.5423, "step": 129720 }, { "epoch": 37.32163406214039, "grad_norm": 1.8947376012802124, "learning_rate": 0.001253567318757192, "loss": 0.5387, "step": 129730 }, { "epoch": 37.324510932105866, "grad_norm": 1.459705114364624, "learning_rate": 0.0012535097813578826, "loss": 0.5744, "step": 129740 }, { "epoch": 37.32738780207135, "grad_norm": 1.4013495445251465, "learning_rate": 0.0012534522439585731, "loss": 0.6777, "step": 129750 }, { "epoch": 37.330264672036826, "grad_norm": 1.6509757041931152, "learning_rate": 0.0012533947065592635, "loss": 0.5075, "step": 129760 }, { "epoch": 37.3331415420023, "grad_norm": 1.6741282939910889, "learning_rate": 0.001253337169159954, "loss": 0.7848, "step": 129770 }, { "epoch": 37.33601841196778, "grad_norm": 1.2280595302581787, "learning_rate": 0.0012532796317606446, "loss": 0.8166, "step": 129780 }, { "epoch": 37.338895281933254, "grad_norm": 1.4525402784347534, "learning_rate": 0.0012532220943613347, "loss": 0.5354, "step": 129790 }, { "epoch": 37.34177215189873, "grad_norm": 1.3505887985229492, "learning_rate": 0.0012531645569620253, "loss": 0.559, "step": 129800 }, { "epoch": 37.344649021864214, "grad_norm": 1.1588504314422607, "learning_rate": 0.0012531070195627159, "loss": 0.6002, "step": 129810 }, { "epoch": 37.34752589182969, "grad_norm": 1.0835667848587036, "learning_rate": 0.0012530494821634062, "loss": 0.6385, "step": 129820 }, { "epoch": 37.350402761795166, "grad_norm": 0.9866071343421936, "learning_rate": 0.0012529919447640968, "loss": 0.6588, "step": 129830 }, { "epoch": 37.35327963176064, "grad_norm": 1.232414960861206, "learning_rate": 0.0012529344073647871, "loss": 0.6782, "step": 129840 }, { "epoch": 37.35615650172612, "grad_norm": 1.8245842456817627, "learning_rate": 0.0012528768699654775, "loss": 0.5616, "step": 129850 }, { "epoch": 37.3590333716916, "grad_norm": 1.0619078874588013, "learning_rate": 0.001252819332566168, "loss": 0.5193, "step": 129860 }, { "epoch": 37.36191024165708, "grad_norm": 1.4683929681777954, "learning_rate": 0.0012527617951668584, "loss": 0.6378, "step": 129870 }, { "epoch": 37.364787111622555, "grad_norm": 0.7987720966339111, "learning_rate": 0.001252704257767549, "loss": 0.6486, "step": 129880 }, { "epoch": 37.36766398158803, "grad_norm": 0.8768426179885864, "learning_rate": 0.0012526467203682395, "loss": 0.6448, "step": 129890 }, { "epoch": 37.37054085155351, "grad_norm": 1.1863105297088623, "learning_rate": 0.0012525891829689299, "loss": 0.5548, "step": 129900 }, { "epoch": 37.37341772151899, "grad_norm": 1.71285080909729, "learning_rate": 0.0012525316455696202, "loss": 0.592, "step": 129910 }, { "epoch": 37.376294591484466, "grad_norm": 1.479129672050476, "learning_rate": 0.0012524741081703108, "loss": 0.6416, "step": 129920 }, { "epoch": 37.37917146144994, "grad_norm": 1.2120648622512817, "learning_rate": 0.0012524165707710011, "loss": 0.6808, "step": 129930 }, { "epoch": 37.38204833141542, "grad_norm": 1.0988574028015137, "learning_rate": 0.0012523590333716917, "loss": 0.5197, "step": 129940 }, { "epoch": 37.384925201380895, "grad_norm": 0.8657506108283997, "learning_rate": 0.001252301495972382, "loss": 0.473, "step": 129950 }, { "epoch": 37.38780207134638, "grad_norm": 0.8376123309135437, "learning_rate": 0.0012522439585730726, "loss": 0.5346, "step": 129960 }, { "epoch": 37.390678941311855, "grad_norm": 1.5291439294815063, "learning_rate": 0.001252186421173763, "loss": 0.536, "step": 129970 }, { "epoch": 37.39355581127733, "grad_norm": 2.5700337886810303, "learning_rate": 0.0012521288837744533, "loss": 0.6231, "step": 129980 }, { "epoch": 37.39643268124281, "grad_norm": 1.7038888931274414, "learning_rate": 0.0012520713463751438, "loss": 0.6566, "step": 129990 }, { "epoch": 37.39930955120828, "grad_norm": 1.3554655313491821, "learning_rate": 0.0012520138089758344, "loss": 0.736, "step": 130000 }, { "epoch": 37.40218642117377, "grad_norm": 0.8847460746765137, "learning_rate": 0.0012519562715765248, "loss": 0.5965, "step": 130010 }, { "epoch": 37.40506329113924, "grad_norm": 1.2118418216705322, "learning_rate": 0.0012518987341772153, "loss": 0.7008, "step": 130020 }, { "epoch": 37.40794016110472, "grad_norm": 2.367548942565918, "learning_rate": 0.0012518411967779057, "loss": 0.5481, "step": 130030 }, { "epoch": 37.410817031070195, "grad_norm": 1.0334694385528564, "learning_rate": 0.001251783659378596, "loss": 0.5739, "step": 130040 }, { "epoch": 37.41369390103567, "grad_norm": 1.3359273672103882, "learning_rate": 0.0012517261219792866, "loss": 0.5286, "step": 130050 }, { "epoch": 37.41657077100115, "grad_norm": 1.78816819190979, "learning_rate": 0.0012516685845799771, "loss": 0.7074, "step": 130060 }, { "epoch": 37.41944764096663, "grad_norm": 1.2124329805374146, "learning_rate": 0.0012516110471806675, "loss": 0.6787, "step": 130070 }, { "epoch": 37.42232451093211, "grad_norm": 1.9721717834472656, "learning_rate": 0.001251553509781358, "loss": 0.6327, "step": 130080 }, { "epoch": 37.42520138089758, "grad_norm": 1.8691611289978027, "learning_rate": 0.0012514959723820482, "loss": 0.621, "step": 130090 }, { "epoch": 37.42807825086306, "grad_norm": 0.7053963541984558, "learning_rate": 0.0012514384349827387, "loss": 0.5435, "step": 130100 }, { "epoch": 37.430955120828536, "grad_norm": 1.6770323514938354, "learning_rate": 0.0012513808975834293, "loss": 0.7342, "step": 130110 }, { "epoch": 37.43383199079402, "grad_norm": 1.5699368715286255, "learning_rate": 0.0012513233601841197, "loss": 0.781, "step": 130120 }, { "epoch": 37.436708860759495, "grad_norm": 0.8112168908119202, "learning_rate": 0.0012512658227848102, "loss": 0.6761, "step": 130130 }, { "epoch": 37.43958573072497, "grad_norm": 1.029510736465454, "learning_rate": 0.0012512082853855008, "loss": 0.5542, "step": 130140 }, { "epoch": 37.44246260069045, "grad_norm": 1.75215482711792, "learning_rate": 0.001251150747986191, "loss": 0.6536, "step": 130150 }, { "epoch": 37.445339470655924, "grad_norm": 1.3425177335739136, "learning_rate": 0.0012510932105868815, "loss": 0.599, "step": 130160 }, { "epoch": 37.44821634062141, "grad_norm": 1.0321052074432373, "learning_rate": 0.001251035673187572, "loss": 0.5396, "step": 130170 }, { "epoch": 37.45109321058688, "grad_norm": 1.355316400527954, "learning_rate": 0.0012509781357882624, "loss": 0.5492, "step": 130180 }, { "epoch": 37.45397008055236, "grad_norm": 0.8048127293586731, "learning_rate": 0.001250920598388953, "loss": 0.5767, "step": 130190 }, { "epoch": 37.456846950517836, "grad_norm": 0.8806231021881104, "learning_rate": 0.0012508630609896433, "loss": 0.6665, "step": 130200 }, { "epoch": 37.45972382048331, "grad_norm": 1.3129650354385376, "learning_rate": 0.0012508055235903336, "loss": 0.8185, "step": 130210 }, { "epoch": 37.462600690448795, "grad_norm": 1.19904625415802, "learning_rate": 0.0012507479861910242, "loss": 0.5177, "step": 130220 }, { "epoch": 37.46547756041427, "grad_norm": 1.3368319272994995, "learning_rate": 0.0012506904487917146, "loss": 0.7056, "step": 130230 }, { "epoch": 37.46835443037975, "grad_norm": 0.9033453464508057, "learning_rate": 0.0012506329113924051, "loss": 0.5286, "step": 130240 }, { "epoch": 37.471231300345224, "grad_norm": 1.8097922801971436, "learning_rate": 0.0012505753739930957, "loss": 0.7431, "step": 130250 }, { "epoch": 37.4741081703107, "grad_norm": 0.8449621796607971, "learning_rate": 0.001250517836593786, "loss": 0.6091, "step": 130260 }, { "epoch": 37.476985040276176, "grad_norm": 1.4743175506591797, "learning_rate": 0.0012504602991944764, "loss": 0.5127, "step": 130270 }, { "epoch": 37.47986191024166, "grad_norm": 0.7222030162811279, "learning_rate": 0.001250402761795167, "loss": 0.6049, "step": 130280 }, { "epoch": 37.482738780207136, "grad_norm": 1.6391311883926392, "learning_rate": 0.0012503452243958573, "loss": 0.7724, "step": 130290 }, { "epoch": 37.48561565017261, "grad_norm": 2.3456830978393555, "learning_rate": 0.0012502876869965479, "loss": 0.6817, "step": 130300 }, { "epoch": 37.48849252013809, "grad_norm": 0.8399429321289062, "learning_rate": 0.0012502301495972382, "loss": 0.5876, "step": 130310 }, { "epoch": 37.491369390103564, "grad_norm": 3.7117743492126465, "learning_rate": 0.0012501726121979288, "loss": 0.6987, "step": 130320 }, { "epoch": 37.49424626006905, "grad_norm": 1.3134657144546509, "learning_rate": 0.0012501150747986191, "loss": 0.6003, "step": 130330 }, { "epoch": 37.497123130034524, "grad_norm": 2.1297576427459717, "learning_rate": 0.0012500575373993095, "loss": 0.6454, "step": 130340 }, { "epoch": 37.5, "grad_norm": 1.513309121131897, "learning_rate": 0.00125, "loss": 0.6287, "step": 130350 }, { "epoch": 37.502876869965476, "grad_norm": 1.527850866317749, "learning_rate": 0.0012499424626006906, "loss": 0.5709, "step": 130360 }, { "epoch": 37.50575373993095, "grad_norm": 1.3099589347839355, "learning_rate": 0.001249884925201381, "loss": 0.7136, "step": 130370 }, { "epoch": 37.508630609896436, "grad_norm": 1.2906780242919922, "learning_rate": 0.0012498273878020713, "loss": 0.7162, "step": 130380 }, { "epoch": 37.51150747986191, "grad_norm": 1.4266022443771362, "learning_rate": 0.0012497698504027618, "loss": 0.5693, "step": 130390 }, { "epoch": 37.51438434982739, "grad_norm": 0.9628028273582458, "learning_rate": 0.0012497123130034522, "loss": 0.5866, "step": 130400 }, { "epoch": 37.517261219792864, "grad_norm": 0.9565821886062622, "learning_rate": 0.0012496547756041428, "loss": 0.677, "step": 130410 }, { "epoch": 37.52013808975834, "grad_norm": 1.0143877267837524, "learning_rate": 0.001249597238204833, "loss": 0.64, "step": 130420 }, { "epoch": 37.523014959723824, "grad_norm": 1.3715416193008423, "learning_rate": 0.0012495397008055237, "loss": 0.6671, "step": 130430 }, { "epoch": 37.5258918296893, "grad_norm": 1.2689915895462036, "learning_rate": 0.001249482163406214, "loss": 0.6984, "step": 130440 }, { "epoch": 37.528768699654776, "grad_norm": 1.0105533599853516, "learning_rate": 0.0012494246260069044, "loss": 0.5959, "step": 130450 }, { "epoch": 37.53164556962025, "grad_norm": 1.1978199481964111, "learning_rate": 0.001249367088607595, "loss": 0.5726, "step": 130460 }, { "epoch": 37.53452243958573, "grad_norm": 0.7836152911186218, "learning_rate": 0.0012493095512082855, "loss": 0.5017, "step": 130470 }, { "epoch": 37.537399309551205, "grad_norm": 1.4307188987731934, "learning_rate": 0.0012492520138089758, "loss": 0.7248, "step": 130480 }, { "epoch": 37.54027617951669, "grad_norm": 1.0942102670669556, "learning_rate": 0.0012491944764096664, "loss": 0.5236, "step": 130490 }, { "epoch": 37.543153049482164, "grad_norm": 1.3009364604949951, "learning_rate": 0.0012491369390103568, "loss": 0.5942, "step": 130500 }, { "epoch": 37.54602991944764, "grad_norm": 2.1046361923217773, "learning_rate": 0.001249079401611047, "loss": 0.7169, "step": 130510 }, { "epoch": 37.54890678941312, "grad_norm": 2.0824966430664062, "learning_rate": 0.0012490218642117377, "loss": 0.6508, "step": 130520 }, { "epoch": 37.55178365937859, "grad_norm": 1.7400261163711548, "learning_rate": 0.001248964326812428, "loss": 0.7514, "step": 130530 }, { "epoch": 37.554660529344076, "grad_norm": 1.0608677864074707, "learning_rate": 0.0012489067894131186, "loss": 0.6055, "step": 130540 }, { "epoch": 37.55753739930955, "grad_norm": 1.9051774740219116, "learning_rate": 0.0012488492520138091, "loss": 0.6623, "step": 130550 }, { "epoch": 37.56041426927503, "grad_norm": 0.9668086767196655, "learning_rate": 0.0012487917146144993, "loss": 0.7242, "step": 130560 }, { "epoch": 37.563291139240505, "grad_norm": 0.9814884662628174, "learning_rate": 0.0012487341772151898, "loss": 0.522, "step": 130570 }, { "epoch": 37.56616800920598, "grad_norm": 0.6663951873779297, "learning_rate": 0.0012486766398158804, "loss": 0.5445, "step": 130580 }, { "epoch": 37.569044879171464, "grad_norm": 1.2071441411972046, "learning_rate": 0.0012486191024165707, "loss": 0.6464, "step": 130590 }, { "epoch": 37.57192174913694, "grad_norm": 0.8853676915168762, "learning_rate": 0.0012485615650172613, "loss": 0.6702, "step": 130600 }, { "epoch": 37.57479861910242, "grad_norm": 1.0170576572418213, "learning_rate": 0.0012485040276179519, "loss": 0.5927, "step": 130610 }, { "epoch": 37.57767548906789, "grad_norm": 1.1319859027862549, "learning_rate": 0.001248446490218642, "loss": 0.514, "step": 130620 }, { "epoch": 37.58055235903337, "grad_norm": 1.4953428506851196, "learning_rate": 0.0012483889528193326, "loss": 0.6482, "step": 130630 }, { "epoch": 37.58342922899885, "grad_norm": 0.7095493674278259, "learning_rate": 0.001248331415420023, "loss": 0.6626, "step": 130640 }, { "epoch": 37.58630609896433, "grad_norm": 1.6998294591903687, "learning_rate": 0.0012482738780207135, "loss": 0.6694, "step": 130650 }, { "epoch": 37.589182968929805, "grad_norm": 1.6803078651428223, "learning_rate": 0.001248216340621404, "loss": 0.5434, "step": 130660 }, { "epoch": 37.59205983889528, "grad_norm": 1.6614868640899658, "learning_rate": 0.0012481588032220944, "loss": 0.666, "step": 130670 }, { "epoch": 37.59493670886076, "grad_norm": 1.5007169246673584, "learning_rate": 0.0012481012658227847, "loss": 0.5434, "step": 130680 }, { "epoch": 37.59781357882623, "grad_norm": 1.8083016872406006, "learning_rate": 0.0012480437284234753, "loss": 0.6598, "step": 130690 }, { "epoch": 37.60069044879172, "grad_norm": 2.141719341278076, "learning_rate": 0.0012479861910241656, "loss": 0.7128, "step": 130700 }, { "epoch": 37.60356731875719, "grad_norm": 1.7410340309143066, "learning_rate": 0.0012479286536248562, "loss": 0.5808, "step": 130710 }, { "epoch": 37.60644418872267, "grad_norm": 1.6621819734573364, "learning_rate": 0.0012478711162255468, "loss": 0.8012, "step": 130720 }, { "epoch": 37.609321058688145, "grad_norm": 2.823331117630005, "learning_rate": 0.0012478135788262371, "loss": 0.6764, "step": 130730 }, { "epoch": 37.61219792865362, "grad_norm": 0.7661779522895813, "learning_rate": 0.0012477560414269275, "loss": 0.6196, "step": 130740 }, { "epoch": 37.615074798619105, "grad_norm": 0.8432340621948242, "learning_rate": 0.001247698504027618, "loss": 0.5396, "step": 130750 }, { "epoch": 37.61795166858458, "grad_norm": 2.877277374267578, "learning_rate": 0.0012476409666283084, "loss": 0.7043, "step": 130760 }, { "epoch": 37.62082853855006, "grad_norm": 0.9352349042892456, "learning_rate": 0.001247583429228999, "loss": 0.4819, "step": 130770 }, { "epoch": 37.623705408515534, "grad_norm": 1.112389087677002, "learning_rate": 0.0012475258918296893, "loss": 0.6523, "step": 130780 }, { "epoch": 37.62658227848101, "grad_norm": 2.732966661453247, "learning_rate": 0.0012474683544303799, "loss": 0.6884, "step": 130790 }, { "epoch": 37.62945914844649, "grad_norm": 0.9444367289543152, "learning_rate": 0.0012474108170310702, "loss": 0.627, "step": 130800 }, { "epoch": 37.63233601841197, "grad_norm": 0.8618001341819763, "learning_rate": 0.0012473532796317605, "loss": 0.6403, "step": 130810 }, { "epoch": 37.635212888377445, "grad_norm": 1.1944912672042847, "learning_rate": 0.0012472957422324511, "loss": 0.5123, "step": 130820 }, { "epoch": 37.63808975834292, "grad_norm": 1.137013554573059, "learning_rate": 0.0012472382048331417, "loss": 0.6781, "step": 130830 }, { "epoch": 37.6409666283084, "grad_norm": 1.3163939714431763, "learning_rate": 0.001247180667433832, "loss": 0.5765, "step": 130840 }, { "epoch": 37.64384349827388, "grad_norm": 1.1130778789520264, "learning_rate": 0.0012471231300345226, "loss": 0.6511, "step": 130850 }, { "epoch": 37.64672036823936, "grad_norm": 1.0517899990081787, "learning_rate": 0.001247065592635213, "loss": 0.6491, "step": 130860 }, { "epoch": 37.649597238204834, "grad_norm": 1.296968936920166, "learning_rate": 0.0012470080552359033, "loss": 0.6562, "step": 130870 }, { "epoch": 37.65247410817031, "grad_norm": 1.4220972061157227, "learning_rate": 0.0012469505178365938, "loss": 0.55, "step": 130880 }, { "epoch": 37.655350978135786, "grad_norm": 0.7192716598510742, "learning_rate": 0.0012468929804372842, "loss": 0.6236, "step": 130890 }, { "epoch": 37.65822784810126, "grad_norm": 0.977310061454773, "learning_rate": 0.0012468354430379748, "loss": 0.7236, "step": 130900 }, { "epoch": 37.661104718066746, "grad_norm": 1.1260230541229248, "learning_rate": 0.0012467779056386653, "loss": 0.6397, "step": 130910 }, { "epoch": 37.66398158803222, "grad_norm": 1.483599305152893, "learning_rate": 0.0012467203682393554, "loss": 0.5733, "step": 130920 }, { "epoch": 37.6668584579977, "grad_norm": 1.5654171705245972, "learning_rate": 0.001246662830840046, "loss": 0.6712, "step": 130930 }, { "epoch": 37.669735327963174, "grad_norm": 2.062774896621704, "learning_rate": 0.0012466052934407366, "loss": 0.8519, "step": 130940 }, { "epoch": 37.67261219792865, "grad_norm": 0.641200840473175, "learning_rate": 0.001246547756041427, "loss": 0.6388, "step": 130950 }, { "epoch": 37.675489067894134, "grad_norm": 1.6230173110961914, "learning_rate": 0.0012464902186421175, "loss": 0.774, "step": 130960 }, { "epoch": 37.67836593785961, "grad_norm": 1.65764582157135, "learning_rate": 0.001246432681242808, "loss": 0.6853, "step": 130970 }, { "epoch": 37.681242807825086, "grad_norm": 1.0886825323104858, "learning_rate": 0.0012463751438434982, "loss": 0.6941, "step": 130980 }, { "epoch": 37.68411967779056, "grad_norm": 2.478935480117798, "learning_rate": 0.0012463176064441887, "loss": 0.6145, "step": 130990 }, { "epoch": 37.68699654775604, "grad_norm": 0.9172120094299316, "learning_rate": 0.001246260069044879, "loss": 0.7257, "step": 131000 }, { "epoch": 37.68987341772152, "grad_norm": 0.9217063188552856, "learning_rate": 0.0012462025316455697, "loss": 0.6196, "step": 131010 }, { "epoch": 37.692750287687, "grad_norm": 1.425835371017456, "learning_rate": 0.0012461449942462602, "loss": 0.606, "step": 131020 }, { "epoch": 37.695627157652474, "grad_norm": 2.0247445106506348, "learning_rate": 0.0012460874568469506, "loss": 0.6104, "step": 131030 }, { "epoch": 37.69850402761795, "grad_norm": 1.1680371761322021, "learning_rate": 0.001246029919447641, "loss": 0.584, "step": 131040 }, { "epoch": 37.70138089758343, "grad_norm": 1.203467845916748, "learning_rate": 0.0012459723820483315, "loss": 0.658, "step": 131050 }, { "epoch": 37.70425776754891, "grad_norm": 1.071076512336731, "learning_rate": 0.0012459148446490218, "loss": 0.6227, "step": 131060 }, { "epoch": 37.707134637514386, "grad_norm": 1.4367464780807495, "learning_rate": 0.0012458573072497124, "loss": 0.6859, "step": 131070 }, { "epoch": 37.71001150747986, "grad_norm": 1.1424723863601685, "learning_rate": 0.001245799769850403, "loss": 0.5548, "step": 131080 }, { "epoch": 37.71288837744534, "grad_norm": 1.6587756872177124, "learning_rate": 0.0012457422324510933, "loss": 0.6955, "step": 131090 }, { "epoch": 37.715765247410815, "grad_norm": 1.8755402565002441, "learning_rate": 0.0012456846950517836, "loss": 0.9009, "step": 131100 }, { "epoch": 37.7186421173763, "grad_norm": 1.565402626991272, "learning_rate": 0.001245627157652474, "loss": 0.6361, "step": 131110 }, { "epoch": 37.721518987341774, "grad_norm": 0.7267645001411438, "learning_rate": 0.0012455696202531646, "loss": 0.6779, "step": 131120 }, { "epoch": 37.72439585730725, "grad_norm": 1.3018534183502197, "learning_rate": 0.0012455120828538551, "loss": 0.6651, "step": 131130 }, { "epoch": 37.72727272727273, "grad_norm": 0.9403828978538513, "learning_rate": 0.0012454545454545455, "loss": 0.9407, "step": 131140 }, { "epoch": 37.7301495972382, "grad_norm": 1.184683084487915, "learning_rate": 0.001245397008055236, "loss": 0.7057, "step": 131150 }, { "epoch": 37.73302646720368, "grad_norm": 1.8051179647445679, "learning_rate": 0.0012453394706559264, "loss": 0.5649, "step": 131160 }, { "epoch": 37.73590333716916, "grad_norm": 2.102236747741699, "learning_rate": 0.0012452819332566167, "loss": 0.6924, "step": 131170 }, { "epoch": 37.73878020713464, "grad_norm": 1.0852837562561035, "learning_rate": 0.0012452243958573073, "loss": 0.6205, "step": 131180 }, { "epoch": 37.741657077100115, "grad_norm": 0.6217397451400757, "learning_rate": 0.0012451668584579979, "loss": 0.594, "step": 131190 }, { "epoch": 37.74453394706559, "grad_norm": 1.280706524848938, "learning_rate": 0.0012451093210586882, "loss": 0.5977, "step": 131200 }, { "epoch": 37.74741081703107, "grad_norm": 1.6371773481369019, "learning_rate": 0.0012450517836593785, "loss": 0.5888, "step": 131210 }, { "epoch": 37.75028768699655, "grad_norm": 1.8770304918289185, "learning_rate": 0.001244994246260069, "loss": 0.7535, "step": 131220 }, { "epoch": 37.75316455696203, "grad_norm": 1.81844162940979, "learning_rate": 0.0012449367088607595, "loss": 0.7585, "step": 131230 }, { "epoch": 37.7560414269275, "grad_norm": 1.3967968225479126, "learning_rate": 0.00124487917146145, "loss": 0.6085, "step": 131240 }, { "epoch": 37.75891829689298, "grad_norm": 1.566861629486084, "learning_rate": 0.0012448216340621404, "loss": 0.5794, "step": 131250 }, { "epoch": 37.761795166858455, "grad_norm": 0.8116239309310913, "learning_rate": 0.001244764096662831, "loss": 0.6064, "step": 131260 }, { "epoch": 37.76467203682394, "grad_norm": 1.1938635110855103, "learning_rate": 0.0012447065592635213, "loss": 0.5667, "step": 131270 }, { "epoch": 37.767548906789415, "grad_norm": 0.7612485289573669, "learning_rate": 0.0012446490218642116, "loss": 0.6966, "step": 131280 }, { "epoch": 37.77042577675489, "grad_norm": 1.4515706300735474, "learning_rate": 0.0012445914844649022, "loss": 0.5478, "step": 131290 }, { "epoch": 37.77330264672037, "grad_norm": 1.3738635778427124, "learning_rate": 0.0012445339470655928, "loss": 0.5268, "step": 131300 }, { "epoch": 37.77617951668584, "grad_norm": 1.4193865060806274, "learning_rate": 0.001244476409666283, "loss": 0.6082, "step": 131310 }, { "epoch": 37.77905638665133, "grad_norm": 1.3304572105407715, "learning_rate": 0.0012444188722669737, "loss": 0.7106, "step": 131320 }, { "epoch": 37.7819332566168, "grad_norm": 1.976941466331482, "learning_rate": 0.0012443613348676638, "loss": 0.6407, "step": 131330 }, { "epoch": 37.78481012658228, "grad_norm": 1.3195197582244873, "learning_rate": 0.0012443037974683544, "loss": 0.5902, "step": 131340 }, { "epoch": 37.787686996547755, "grad_norm": 1.251630425453186, "learning_rate": 0.001244246260069045, "loss": 0.6078, "step": 131350 }, { "epoch": 37.79056386651323, "grad_norm": 1.245786190032959, "learning_rate": 0.0012441887226697353, "loss": 0.6059, "step": 131360 }, { "epoch": 37.79344073647871, "grad_norm": 1.2381690740585327, "learning_rate": 0.0012441311852704258, "loss": 0.5874, "step": 131370 }, { "epoch": 37.79631760644419, "grad_norm": 1.3498355150222778, "learning_rate": 0.0012440736478711164, "loss": 0.6432, "step": 131380 }, { "epoch": 37.79919447640967, "grad_norm": 1.3339334726333618, "learning_rate": 0.0012440161104718065, "loss": 0.7288, "step": 131390 }, { "epoch": 37.80207134637514, "grad_norm": 1.5856976509094238, "learning_rate": 0.001243958573072497, "loss": 0.5504, "step": 131400 }, { "epoch": 37.80494821634062, "grad_norm": 1.1092852354049683, "learning_rate": 0.0012439010356731877, "loss": 0.6353, "step": 131410 }, { "epoch": 37.807825086306096, "grad_norm": 0.6454669237136841, "learning_rate": 0.001243843498273878, "loss": 0.6281, "step": 131420 }, { "epoch": 37.81070195627158, "grad_norm": 1.1998604536056519, "learning_rate": 0.0012437859608745686, "loss": 0.7103, "step": 131430 }, { "epoch": 37.813578826237055, "grad_norm": 1.1242414712905884, "learning_rate": 0.0012437284234752591, "loss": 0.4886, "step": 131440 }, { "epoch": 37.81645569620253, "grad_norm": 1.1468091011047363, "learning_rate": 0.0012436708860759493, "loss": 0.6839, "step": 131450 }, { "epoch": 37.81933256616801, "grad_norm": 1.6061193943023682, "learning_rate": 0.0012436133486766398, "loss": 0.7087, "step": 131460 }, { "epoch": 37.822209436133484, "grad_norm": 0.9147240519523621, "learning_rate": 0.0012435558112773302, "loss": 0.7002, "step": 131470 }, { "epoch": 37.82508630609897, "grad_norm": 1.7573357820510864, "learning_rate": 0.0012434982738780207, "loss": 0.5241, "step": 131480 }, { "epoch": 37.82796317606444, "grad_norm": 1.039199948310852, "learning_rate": 0.0012434407364787113, "loss": 0.5148, "step": 131490 }, { "epoch": 37.83084004602992, "grad_norm": 0.8595248460769653, "learning_rate": 0.0012433831990794016, "loss": 0.5006, "step": 131500 }, { "epoch": 37.833716915995396, "grad_norm": 1.8410077095031738, "learning_rate": 0.001243325661680092, "loss": 0.586, "step": 131510 }, { "epoch": 37.83659378596087, "grad_norm": 1.4612981081008911, "learning_rate": 0.0012432681242807826, "loss": 0.6595, "step": 131520 }, { "epoch": 37.839470655926355, "grad_norm": 1.4139204025268555, "learning_rate": 0.001243210586881473, "loss": 0.5232, "step": 131530 }, { "epoch": 37.84234752589183, "grad_norm": 0.8161247372627258, "learning_rate": 0.0012431530494821635, "loss": 0.6365, "step": 131540 }, { "epoch": 37.84522439585731, "grad_norm": 1.6266766786575317, "learning_rate": 0.001243095512082854, "loss": 0.7798, "step": 131550 }, { "epoch": 37.848101265822784, "grad_norm": 1.3114596605300903, "learning_rate": 0.0012430379746835444, "loss": 0.603, "step": 131560 }, { "epoch": 37.85097813578826, "grad_norm": 1.0651031732559204, "learning_rate": 0.0012429804372842347, "loss": 0.7118, "step": 131570 }, { "epoch": 37.85385500575374, "grad_norm": 1.8349238634109497, "learning_rate": 0.001242922899884925, "loss": 0.6523, "step": 131580 }, { "epoch": 37.85673187571922, "grad_norm": 1.4501088857650757, "learning_rate": 0.0012428653624856156, "loss": 0.6664, "step": 131590 }, { "epoch": 37.859608745684696, "grad_norm": 1.914193034172058, "learning_rate": 0.0012428078250863062, "loss": 0.7964, "step": 131600 }, { "epoch": 37.86248561565017, "grad_norm": 0.9336039423942566, "learning_rate": 0.0012427502876869966, "loss": 0.6536, "step": 131610 }, { "epoch": 37.86536248561565, "grad_norm": 1.143997073173523, "learning_rate": 0.0012426927502876871, "loss": 0.6424, "step": 131620 }, { "epoch": 37.868239355581125, "grad_norm": 0.988726019859314, "learning_rate": 0.0012426352128883775, "loss": 0.5479, "step": 131630 }, { "epoch": 37.87111622554661, "grad_norm": 0.9765114188194275, "learning_rate": 0.0012425776754890678, "loss": 0.6442, "step": 131640 }, { "epoch": 37.873993095512084, "grad_norm": 1.0186941623687744, "learning_rate": 0.0012425201380897584, "loss": 0.5857, "step": 131650 }, { "epoch": 37.87686996547756, "grad_norm": 1.3189826011657715, "learning_rate": 0.001242462600690449, "loss": 0.5106, "step": 131660 }, { "epoch": 37.879746835443036, "grad_norm": 0.9396400451660156, "learning_rate": 0.0012424050632911393, "loss": 0.5709, "step": 131670 }, { "epoch": 37.88262370540851, "grad_norm": 1.1411018371582031, "learning_rate": 0.0012423475258918298, "loss": 0.882, "step": 131680 }, { "epoch": 37.885500575373996, "grad_norm": 1.501504898071289, "learning_rate": 0.00124228998849252, "loss": 0.5306, "step": 131690 }, { "epoch": 37.88837744533947, "grad_norm": 1.0810301303863525, "learning_rate": 0.0012422324510932105, "loss": 0.474, "step": 131700 }, { "epoch": 37.89125431530495, "grad_norm": 2.122920274734497, "learning_rate": 0.001242174913693901, "loss": 0.6051, "step": 131710 }, { "epoch": 37.894131185270425, "grad_norm": 1.4467482566833496, "learning_rate": 0.0012421173762945915, "loss": 0.6542, "step": 131720 }, { "epoch": 37.8970080552359, "grad_norm": 1.6636244058609009, "learning_rate": 0.001242059838895282, "loss": 0.5888, "step": 131730 }, { "epoch": 37.899884925201384, "grad_norm": 0.8497523069381714, "learning_rate": 0.0012420023014959726, "loss": 0.6165, "step": 131740 }, { "epoch": 37.90276179516686, "grad_norm": 1.7059051990509033, "learning_rate": 0.0012419447640966627, "loss": 0.6477, "step": 131750 }, { "epoch": 37.90563866513234, "grad_norm": 0.828590452671051, "learning_rate": 0.0012418872266973533, "loss": 0.4851, "step": 131760 }, { "epoch": 37.90851553509781, "grad_norm": 0.699934184551239, "learning_rate": 0.0012418296892980438, "loss": 0.6776, "step": 131770 }, { "epoch": 37.91139240506329, "grad_norm": 0.8997734189033508, "learning_rate": 0.0012417721518987342, "loss": 0.576, "step": 131780 }, { "epoch": 37.91426927502877, "grad_norm": 1.515645980834961, "learning_rate": 0.0012417146144994248, "loss": 0.6414, "step": 131790 }, { "epoch": 37.91714614499425, "grad_norm": 2.0334434509277344, "learning_rate": 0.001241657077100115, "loss": 0.7353, "step": 131800 }, { "epoch": 37.920023014959725, "grad_norm": 1.0778108835220337, "learning_rate": 0.0012415995397008054, "loss": 0.684, "step": 131810 }, { "epoch": 37.9228998849252, "grad_norm": 1.2381200790405273, "learning_rate": 0.001241542002301496, "loss": 0.655, "step": 131820 }, { "epoch": 37.92577675489068, "grad_norm": 4.212337493896484, "learning_rate": 0.0012414844649021864, "loss": 0.5733, "step": 131830 }, { "epoch": 37.92865362485615, "grad_norm": 0.8174734115600586, "learning_rate": 0.001241426927502877, "loss": 0.4729, "step": 131840 }, { "epoch": 37.93153049482164, "grad_norm": 1.78788161277771, "learning_rate": 0.0012413693901035675, "loss": 0.5949, "step": 131850 }, { "epoch": 37.93440736478711, "grad_norm": 1.3911744356155396, "learning_rate": 0.0012413118527042578, "loss": 0.6614, "step": 131860 }, { "epoch": 37.93728423475259, "grad_norm": 2.541048049926758, "learning_rate": 0.0012412543153049482, "loss": 0.5723, "step": 131870 }, { "epoch": 37.940161104718065, "grad_norm": 0.9249476790428162, "learning_rate": 0.0012411967779056387, "loss": 0.5327, "step": 131880 }, { "epoch": 37.94303797468354, "grad_norm": 1.1008187532424927, "learning_rate": 0.001241139240506329, "loss": 0.726, "step": 131890 }, { "epoch": 37.945914844649025, "grad_norm": 1.2045613527297974, "learning_rate": 0.0012410817031070197, "loss": 0.6558, "step": 131900 }, { "epoch": 37.9487917146145, "grad_norm": 1.3775163888931274, "learning_rate": 0.00124102416570771, "loss": 0.5633, "step": 131910 }, { "epoch": 37.95166858457998, "grad_norm": 2.315652847290039, "learning_rate": 0.0012409666283084006, "loss": 0.7268, "step": 131920 }, { "epoch": 37.95454545454545, "grad_norm": 1.2455240488052368, "learning_rate": 0.001240909090909091, "loss": 0.5884, "step": 131930 }, { "epoch": 37.95742232451093, "grad_norm": 0.6932968497276306, "learning_rate": 0.0012408515535097813, "loss": 0.4883, "step": 131940 }, { "epoch": 37.96029919447641, "grad_norm": 1.5734943151474, "learning_rate": 0.0012407940161104718, "loss": 0.6956, "step": 131950 }, { "epoch": 37.96317606444189, "grad_norm": 1.8362253904342651, "learning_rate": 0.0012407364787111624, "loss": 0.679, "step": 131960 }, { "epoch": 37.966052934407365, "grad_norm": 0.9161854982376099, "learning_rate": 0.0012406789413118527, "loss": 0.7329, "step": 131970 }, { "epoch": 37.96892980437284, "grad_norm": 1.20054030418396, "learning_rate": 0.0012406214039125433, "loss": 0.5734, "step": 131980 }, { "epoch": 37.97180667433832, "grad_norm": 0.8415230512619019, "learning_rate": 0.0012405638665132336, "loss": 0.6551, "step": 131990 }, { "epoch": 37.9746835443038, "grad_norm": 1.394234538078308, "learning_rate": 0.001240506329113924, "loss": 0.6199, "step": 132000 }, { "epoch": 37.97756041426928, "grad_norm": 0.8269740343093872, "learning_rate": 0.0012404487917146146, "loss": 0.7539, "step": 132010 }, { "epoch": 37.98043728423475, "grad_norm": 1.2679038047790527, "learning_rate": 0.0012403912543153051, "loss": 0.5766, "step": 132020 }, { "epoch": 37.98331415420023, "grad_norm": 0.7756711840629578, "learning_rate": 0.0012403337169159955, "loss": 0.4868, "step": 132030 }, { "epoch": 37.986191024165706, "grad_norm": 1.398193120956421, "learning_rate": 0.0012402761795166858, "loss": 0.7109, "step": 132040 }, { "epoch": 37.98906789413118, "grad_norm": 0.7917036414146423, "learning_rate": 0.0012402186421173762, "loss": 0.7223, "step": 132050 }, { "epoch": 37.991944764096665, "grad_norm": 0.9821282625198364, "learning_rate": 0.0012401611047180667, "loss": 0.6696, "step": 132060 }, { "epoch": 37.99482163406214, "grad_norm": 3.2233998775482178, "learning_rate": 0.0012401035673187573, "loss": 0.7458, "step": 132070 }, { "epoch": 37.99769850402762, "grad_norm": 1.8605875968933105, "learning_rate": 0.0012400460299194476, "loss": 0.6841, "step": 132080 }, { "epoch": 38.000575373993094, "grad_norm": 0.8025923371315002, "learning_rate": 0.0012399884925201382, "loss": 0.573, "step": 132090 }, { "epoch": 38.00345224395857, "grad_norm": 1.4974749088287354, "learning_rate": 0.0012399309551208285, "loss": 0.5657, "step": 132100 }, { "epoch": 38.00632911392405, "grad_norm": 1.8811460733413696, "learning_rate": 0.001239873417721519, "loss": 0.5702, "step": 132110 }, { "epoch": 38.00920598388953, "grad_norm": 1.2524263858795166, "learning_rate": 0.0012398158803222095, "loss": 0.6801, "step": 132120 }, { "epoch": 38.012082853855006, "grad_norm": 1.049189567565918, "learning_rate": 0.0012397583429229, "loss": 0.4778, "step": 132130 }, { "epoch": 38.01495972382048, "grad_norm": 0.9358544945716858, "learning_rate": 0.0012397008055235904, "loss": 0.6699, "step": 132140 }, { "epoch": 38.01783659378596, "grad_norm": 0.784034788608551, "learning_rate": 0.001239643268124281, "loss": 0.5418, "step": 132150 }, { "epoch": 38.02071346375144, "grad_norm": 1.1741451025009155, "learning_rate": 0.001239585730724971, "loss": 0.5909, "step": 132160 }, { "epoch": 38.02359033371692, "grad_norm": 0.983523428440094, "learning_rate": 0.0012395281933256616, "loss": 0.4407, "step": 132170 }, { "epoch": 38.026467203682394, "grad_norm": 1.657235026359558, "learning_rate": 0.0012394706559263522, "loss": 0.6712, "step": 132180 }, { "epoch": 38.02934407364787, "grad_norm": 1.0230332612991333, "learning_rate": 0.0012394131185270425, "loss": 0.4913, "step": 132190 }, { "epoch": 38.032220943613346, "grad_norm": 1.1260586977005005, "learning_rate": 0.001239355581127733, "loss": 0.5371, "step": 132200 }, { "epoch": 38.03509781357883, "grad_norm": 0.8316954374313354, "learning_rate": 0.0012392980437284237, "loss": 0.5964, "step": 132210 }, { "epoch": 38.037974683544306, "grad_norm": 0.4836975038051605, "learning_rate": 0.0012392405063291138, "loss": 0.51, "step": 132220 }, { "epoch": 38.04085155350978, "grad_norm": 0.8071117997169495, "learning_rate": 0.0012391829689298044, "loss": 0.5579, "step": 132230 }, { "epoch": 38.04372842347526, "grad_norm": 0.9849600195884705, "learning_rate": 0.001239125431530495, "loss": 0.647, "step": 132240 }, { "epoch": 38.046605293440734, "grad_norm": 1.1982377767562866, "learning_rate": 0.0012390678941311853, "loss": 0.5955, "step": 132250 }, { "epoch": 38.04948216340621, "grad_norm": 1.5779809951782227, "learning_rate": 0.0012390103567318758, "loss": 0.6125, "step": 132260 }, { "epoch": 38.052359033371694, "grad_norm": 0.6238377690315247, "learning_rate": 0.0012389528193325662, "loss": 0.5222, "step": 132270 }, { "epoch": 38.05523590333717, "grad_norm": 1.3130406141281128, "learning_rate": 0.0012388952819332565, "loss": 0.7137, "step": 132280 }, { "epoch": 38.058112773302646, "grad_norm": 1.2333828210830688, "learning_rate": 0.001238837744533947, "loss": 0.5554, "step": 132290 }, { "epoch": 38.06098964326812, "grad_norm": 0.7976217865943909, "learning_rate": 0.0012387802071346374, "loss": 0.6248, "step": 132300 }, { "epoch": 38.0638665132336, "grad_norm": 1.3080419301986694, "learning_rate": 0.001238722669735328, "loss": 0.5709, "step": 132310 }, { "epoch": 38.06674338319908, "grad_norm": 1.4633510112762451, "learning_rate": 0.0012386651323360186, "loss": 0.5394, "step": 132320 }, { "epoch": 38.06962025316456, "grad_norm": 0.966442346572876, "learning_rate": 0.001238607594936709, "loss": 0.6397, "step": 132330 }, { "epoch": 38.072497123130034, "grad_norm": 1.048430323600769, "learning_rate": 0.0012385500575373993, "loss": 0.6378, "step": 132340 }, { "epoch": 38.07537399309551, "grad_norm": 1.0063841342926025, "learning_rate": 0.0012384925201380898, "loss": 0.6601, "step": 132350 }, { "epoch": 38.07825086306099, "grad_norm": 1.0578863620758057, "learning_rate": 0.0012384349827387802, "loss": 0.6097, "step": 132360 }, { "epoch": 38.08112773302647, "grad_norm": 1.1930075883865356, "learning_rate": 0.0012383774453394707, "loss": 0.4415, "step": 132370 }, { "epoch": 38.084004602991946, "grad_norm": 1.006708025932312, "learning_rate": 0.001238319907940161, "loss": 0.7529, "step": 132380 }, { "epoch": 38.08688147295742, "grad_norm": 1.1744308471679688, "learning_rate": 0.0012382623705408516, "loss": 0.5924, "step": 132390 }, { "epoch": 38.0897583429229, "grad_norm": 1.3309987783432007, "learning_rate": 0.001238204833141542, "loss": 0.5954, "step": 132400 }, { "epoch": 38.092635212888375, "grad_norm": 2.037987470626831, "learning_rate": 0.0012381472957422323, "loss": 0.5678, "step": 132410 }, { "epoch": 38.09551208285386, "grad_norm": 1.781354308128357, "learning_rate": 0.001238089758342923, "loss": 0.5393, "step": 132420 }, { "epoch": 38.098388952819334, "grad_norm": 0.8655934929847717, "learning_rate": 0.0012380322209436135, "loss": 0.6604, "step": 132430 }, { "epoch": 38.10126582278481, "grad_norm": 1.211317777633667, "learning_rate": 0.0012379746835443038, "loss": 0.6037, "step": 132440 }, { "epoch": 38.10414269275029, "grad_norm": 3.7030465602874756, "learning_rate": 0.0012379171461449944, "loss": 0.5351, "step": 132450 }, { "epoch": 38.10701956271576, "grad_norm": 1.1300525665283203, "learning_rate": 0.0012378596087456847, "loss": 0.5115, "step": 132460 }, { "epoch": 38.10989643268124, "grad_norm": 1.3094666004180908, "learning_rate": 0.001237802071346375, "loss": 0.6026, "step": 132470 }, { "epoch": 38.11277330264672, "grad_norm": 1.0488979816436768, "learning_rate": 0.0012377445339470656, "loss": 0.5678, "step": 132480 }, { "epoch": 38.1156501726122, "grad_norm": 1.8056756258010864, "learning_rate": 0.001237686996547756, "loss": 0.6597, "step": 132490 }, { "epoch": 38.118527042577675, "grad_norm": 1.8488965034484863, "learning_rate": 0.0012376294591484465, "loss": 0.5976, "step": 132500 }, { "epoch": 38.12140391254315, "grad_norm": 1.4341037273406982, "learning_rate": 0.0012375719217491371, "loss": 0.549, "step": 132510 }, { "epoch": 38.12428078250863, "grad_norm": 1.106615424156189, "learning_rate": 0.0012375143843498272, "loss": 0.5251, "step": 132520 }, { "epoch": 38.12715765247411, "grad_norm": 1.3512393236160278, "learning_rate": 0.0012374568469505178, "loss": 0.7392, "step": 132530 }, { "epoch": 38.13003452243959, "grad_norm": 1.9344843626022339, "learning_rate": 0.0012373993095512084, "loss": 0.6309, "step": 132540 }, { "epoch": 38.13291139240506, "grad_norm": 0.7179101705551147, "learning_rate": 0.0012373417721518987, "loss": 0.5825, "step": 132550 }, { "epoch": 38.13578826237054, "grad_norm": 1.040221095085144, "learning_rate": 0.0012372842347525893, "loss": 0.5054, "step": 132560 }, { "epoch": 38.138665132336016, "grad_norm": 2.1019983291625977, "learning_rate": 0.0012372266973532798, "loss": 0.7187, "step": 132570 }, { "epoch": 38.1415420023015, "grad_norm": 1.5887856483459473, "learning_rate": 0.00123716915995397, "loss": 0.6442, "step": 132580 }, { "epoch": 38.144418872266975, "grad_norm": 0.510649561882019, "learning_rate": 0.0012371116225546605, "loss": 0.594, "step": 132590 }, { "epoch": 38.14729574223245, "grad_norm": 3.0478079319000244, "learning_rate": 0.0012370540851553509, "loss": 0.8238, "step": 132600 }, { "epoch": 38.15017261219793, "grad_norm": 3.591064453125, "learning_rate": 0.0012369965477560415, "loss": 0.5757, "step": 132610 }, { "epoch": 38.153049482163404, "grad_norm": 0.7260814309120178, "learning_rate": 0.001236939010356732, "loss": 0.5553, "step": 132620 }, { "epoch": 38.15592635212889, "grad_norm": 1.3846741914749146, "learning_rate": 0.0012368814729574224, "loss": 0.7782, "step": 132630 }, { "epoch": 38.15880322209436, "grad_norm": 1.5358296632766724, "learning_rate": 0.0012368239355581127, "loss": 0.492, "step": 132640 }, { "epoch": 38.16168009205984, "grad_norm": 1.1677360534667969, "learning_rate": 0.0012367663981588033, "loss": 0.5988, "step": 132650 }, { "epoch": 38.164556962025316, "grad_norm": 1.8596811294555664, "learning_rate": 0.0012367088607594936, "loss": 0.5524, "step": 132660 }, { "epoch": 38.16743383199079, "grad_norm": 1.186894416809082, "learning_rate": 0.0012366513233601842, "loss": 0.7038, "step": 132670 }, { "epoch": 38.170310701956275, "grad_norm": 0.9824629426002502, "learning_rate": 0.0012365937859608747, "loss": 0.5871, "step": 132680 }, { "epoch": 38.17318757192175, "grad_norm": 1.0085179805755615, "learning_rate": 0.001236536248561565, "loss": 0.5063, "step": 132690 }, { "epoch": 38.17606444188723, "grad_norm": 1.3567475080490112, "learning_rate": 0.0012364787111622554, "loss": 0.5187, "step": 132700 }, { "epoch": 38.178941311852704, "grad_norm": 1.0149626731872559, "learning_rate": 0.001236421173762946, "loss": 0.5626, "step": 132710 }, { "epoch": 38.18181818181818, "grad_norm": 0.695844829082489, "learning_rate": 0.0012363636363636364, "loss": 0.5326, "step": 132720 }, { "epoch": 38.184695051783656, "grad_norm": 2.3106305599212646, "learning_rate": 0.001236306098964327, "loss": 0.5668, "step": 132730 }, { "epoch": 38.18757192174914, "grad_norm": 0.6717910766601562, "learning_rate": 0.0012362485615650173, "loss": 0.5956, "step": 132740 }, { "epoch": 38.190448791714616, "grad_norm": 3.082894802093506, "learning_rate": 0.0012361910241657078, "loss": 0.5766, "step": 132750 }, { "epoch": 38.19332566168009, "grad_norm": 1.4697556495666504, "learning_rate": 0.0012361334867663982, "loss": 0.68, "step": 132760 }, { "epoch": 38.19620253164557, "grad_norm": 0.9854005575180054, "learning_rate": 0.0012360759493670885, "loss": 0.652, "step": 132770 }, { "epoch": 38.199079401611044, "grad_norm": 1.1878787279129028, "learning_rate": 0.001236018411967779, "loss": 0.6073, "step": 132780 }, { "epoch": 38.20195627157653, "grad_norm": 1.5987125635147095, "learning_rate": 0.0012359608745684697, "loss": 0.751, "step": 132790 }, { "epoch": 38.204833141542004, "grad_norm": 1.5619025230407715, "learning_rate": 0.00123590333716916, "loss": 0.5765, "step": 132800 }, { "epoch": 38.20771001150748, "grad_norm": 1.4606014490127563, "learning_rate": 0.0012358457997698506, "loss": 0.5107, "step": 132810 }, { "epoch": 38.210586881472956, "grad_norm": 1.2637006044387817, "learning_rate": 0.001235788262370541, "loss": 0.4729, "step": 132820 }, { "epoch": 38.21346375143843, "grad_norm": 1.0056898593902588, "learning_rate": 0.0012357307249712313, "loss": 0.5174, "step": 132830 }, { "epoch": 38.216340621403916, "grad_norm": 1.3803856372833252, "learning_rate": 0.0012356731875719218, "loss": 0.4996, "step": 132840 }, { "epoch": 38.21921749136939, "grad_norm": 1.5656436681747437, "learning_rate": 0.0012356156501726122, "loss": 0.646, "step": 132850 }, { "epoch": 38.22209436133487, "grad_norm": 1.8405358791351318, "learning_rate": 0.0012355581127733027, "loss": 0.7473, "step": 132860 }, { "epoch": 38.224971231300344, "grad_norm": 1.3105531930923462, "learning_rate": 0.001235500575373993, "loss": 0.5154, "step": 132870 }, { "epoch": 38.22784810126582, "grad_norm": 1.4645358324050903, "learning_rate": 0.0012354430379746834, "loss": 0.6031, "step": 132880 }, { "epoch": 38.230724971231304, "grad_norm": 0.8027086853981018, "learning_rate": 0.001235385500575374, "loss": 0.6324, "step": 132890 }, { "epoch": 38.23360184119678, "grad_norm": 1.3402018547058105, "learning_rate": 0.0012353279631760646, "loss": 0.749, "step": 132900 }, { "epoch": 38.236478711162256, "grad_norm": 2.7731051445007324, "learning_rate": 0.001235270425776755, "loss": 0.5248, "step": 132910 }, { "epoch": 38.23935558112773, "grad_norm": 1.685225248336792, "learning_rate": 0.0012352128883774455, "loss": 0.716, "step": 132920 }, { "epoch": 38.24223245109321, "grad_norm": 1.0952332019805908, "learning_rate": 0.0012351553509781358, "loss": 0.6736, "step": 132930 }, { "epoch": 38.245109321058685, "grad_norm": 0.8370102643966675, "learning_rate": 0.0012350978135788262, "loss": 0.6918, "step": 132940 }, { "epoch": 38.24798619102417, "grad_norm": 1.5123059749603271, "learning_rate": 0.0012350402761795167, "loss": 0.4837, "step": 132950 }, { "epoch": 38.250863060989644, "grad_norm": 1.647943377494812, "learning_rate": 0.001234982738780207, "loss": 0.5784, "step": 132960 }, { "epoch": 38.25373993095512, "grad_norm": 1.5246868133544922, "learning_rate": 0.0012349252013808976, "loss": 0.7405, "step": 132970 }, { "epoch": 38.2566168009206, "grad_norm": 0.993175208568573, "learning_rate": 0.0012348676639815882, "loss": 0.5153, "step": 132980 }, { "epoch": 38.25949367088607, "grad_norm": 1.4523802995681763, "learning_rate": 0.0012348101265822783, "loss": 0.5595, "step": 132990 }, { "epoch": 38.262370540851556, "grad_norm": 2.0486392974853516, "learning_rate": 0.0012347525891829689, "loss": 0.5696, "step": 133000 }, { "epoch": 38.26524741081703, "grad_norm": 0.844657838344574, "learning_rate": 0.0012346950517836595, "loss": 0.5499, "step": 133010 }, { "epoch": 38.26812428078251, "grad_norm": 0.8529022336006165, "learning_rate": 0.0012346375143843498, "loss": 0.5848, "step": 133020 }, { "epoch": 38.271001150747985, "grad_norm": 1.6870328187942505, "learning_rate": 0.0012345799769850404, "loss": 0.738, "step": 133030 }, { "epoch": 38.27387802071346, "grad_norm": 1.5348783731460571, "learning_rate": 0.001234522439585731, "loss": 0.6841, "step": 133040 }, { "epoch": 38.276754890678944, "grad_norm": 1.4797903299331665, "learning_rate": 0.001234464902186421, "loss": 0.687, "step": 133050 }, { "epoch": 38.27963176064442, "grad_norm": 0.7240067720413208, "learning_rate": 0.0012344073647871116, "loss": 0.6881, "step": 133060 }, { "epoch": 38.2825086306099, "grad_norm": 1.042326807975769, "learning_rate": 0.001234349827387802, "loss": 0.6737, "step": 133070 }, { "epoch": 38.28538550057537, "grad_norm": 1.0076996088027954, "learning_rate": 0.0012342922899884925, "loss": 0.5889, "step": 133080 }, { "epoch": 38.28826237054085, "grad_norm": 1.3625144958496094, "learning_rate": 0.001234234752589183, "loss": 0.5754, "step": 133090 }, { "epoch": 38.29113924050633, "grad_norm": 2.1912364959716797, "learning_rate": 0.0012341772151898734, "loss": 0.6799, "step": 133100 }, { "epoch": 38.29401611047181, "grad_norm": 1.9305275678634644, "learning_rate": 0.0012341196777905638, "loss": 0.6578, "step": 133110 }, { "epoch": 38.296892980437285, "grad_norm": 0.6557206511497498, "learning_rate": 0.0012340621403912544, "loss": 0.4967, "step": 133120 }, { "epoch": 38.29976985040276, "grad_norm": 1.4963761568069458, "learning_rate": 0.0012340046029919447, "loss": 0.6751, "step": 133130 }, { "epoch": 38.30264672036824, "grad_norm": 0.5347162485122681, "learning_rate": 0.0012339470655926353, "loss": 0.5458, "step": 133140 }, { "epoch": 38.30552359033371, "grad_norm": 1.1696925163269043, "learning_rate": 0.0012338895281933258, "loss": 0.5554, "step": 133150 }, { "epoch": 38.3084004602992, "grad_norm": 1.255153775215149, "learning_rate": 0.0012338319907940162, "loss": 0.5997, "step": 133160 }, { "epoch": 38.31127733026467, "grad_norm": 1.276868462562561, "learning_rate": 0.0012337744533947065, "loss": 0.5341, "step": 133170 }, { "epoch": 38.31415420023015, "grad_norm": 1.1900755167007446, "learning_rate": 0.0012337169159953969, "loss": 0.6657, "step": 133180 }, { "epoch": 38.317031070195625, "grad_norm": 1.272753357887268, "learning_rate": 0.0012336593785960874, "loss": 0.6213, "step": 133190 }, { "epoch": 38.3199079401611, "grad_norm": 1.2556835412979126, "learning_rate": 0.001233601841196778, "loss": 0.5612, "step": 133200 }, { "epoch": 38.322784810126585, "grad_norm": 1.4779196977615356, "learning_rate": 0.0012335443037974683, "loss": 0.7319, "step": 133210 }, { "epoch": 38.32566168009206, "grad_norm": 0.9553693532943726, "learning_rate": 0.001233486766398159, "loss": 0.6401, "step": 133220 }, { "epoch": 38.32853855005754, "grad_norm": 1.4257975816726685, "learning_rate": 0.0012334292289988493, "loss": 0.6277, "step": 133230 }, { "epoch": 38.33141542002301, "grad_norm": 1.3260581493377686, "learning_rate": 0.0012333716915995396, "loss": 0.486, "step": 133240 }, { "epoch": 38.33429228998849, "grad_norm": 0.7727785706520081, "learning_rate": 0.0012333141542002302, "loss": 0.5932, "step": 133250 }, { "epoch": 38.33716915995397, "grad_norm": 0.8448812365531921, "learning_rate": 0.0012332566168009207, "loss": 0.5816, "step": 133260 }, { "epoch": 38.34004602991945, "grad_norm": 1.2498738765716553, "learning_rate": 0.001233199079401611, "loss": 0.6529, "step": 133270 }, { "epoch": 38.342922899884925, "grad_norm": 1.2137175798416138, "learning_rate": 0.0012331415420023016, "loss": 0.6695, "step": 133280 }, { "epoch": 38.3457997698504, "grad_norm": 1.1170567274093628, "learning_rate": 0.0012330840046029918, "loss": 0.5807, "step": 133290 }, { "epoch": 38.34867663981588, "grad_norm": 1.30861234664917, "learning_rate": 0.0012330264672036823, "loss": 0.5589, "step": 133300 }, { "epoch": 38.35155350978136, "grad_norm": 1.1602540016174316, "learning_rate": 0.001232968929804373, "loss": 0.7161, "step": 133310 }, { "epoch": 38.35443037974684, "grad_norm": 0.8998003602027893, "learning_rate": 0.0012329113924050633, "loss": 0.6301, "step": 133320 }, { "epoch": 38.35730724971231, "grad_norm": 1.37026047706604, "learning_rate": 0.0012328538550057538, "loss": 0.6087, "step": 133330 }, { "epoch": 38.36018411967779, "grad_norm": 1.6622134447097778, "learning_rate": 0.0012327963176064444, "loss": 0.5575, "step": 133340 }, { "epoch": 38.363060989643266, "grad_norm": 1.9758449792861938, "learning_rate": 0.0012327387802071345, "loss": 0.607, "step": 133350 }, { "epoch": 38.36593785960875, "grad_norm": 1.2956840991973877, "learning_rate": 0.001232681242807825, "loss": 0.5868, "step": 133360 }, { "epoch": 38.368814729574225, "grad_norm": 1.9339905977249146, "learning_rate": 0.0012326237054085156, "loss": 0.5917, "step": 133370 }, { "epoch": 38.3716915995397, "grad_norm": 3.4469540119171143, "learning_rate": 0.001232566168009206, "loss": 0.5789, "step": 133380 }, { "epoch": 38.37456846950518, "grad_norm": 0.7272837162017822, "learning_rate": 0.0012325086306098965, "loss": 0.6528, "step": 133390 }, { "epoch": 38.377445339470654, "grad_norm": 1.8793522119522095, "learning_rate": 0.0012324510932105871, "loss": 0.6516, "step": 133400 }, { "epoch": 38.38032220943613, "grad_norm": 2.0462465286254883, "learning_rate": 0.0012323935558112772, "loss": 0.6192, "step": 133410 }, { "epoch": 38.383199079401614, "grad_norm": 1.2897824048995972, "learning_rate": 0.0012323360184119678, "loss": 0.6036, "step": 133420 }, { "epoch": 38.38607594936709, "grad_norm": 0.7173210978507996, "learning_rate": 0.0012322784810126582, "loss": 0.5598, "step": 133430 }, { "epoch": 38.388952819332566, "grad_norm": 1.1728123426437378, "learning_rate": 0.0012322209436133487, "loss": 0.5817, "step": 133440 }, { "epoch": 38.39182968929804, "grad_norm": 1.7572901248931885, "learning_rate": 0.0012321634062140393, "loss": 0.6581, "step": 133450 }, { "epoch": 38.39470655926352, "grad_norm": 1.3451052904129028, "learning_rate": 0.0012321058688147296, "loss": 0.6763, "step": 133460 }, { "epoch": 38.397583429229, "grad_norm": 1.0185322761535645, "learning_rate": 0.00123204833141542, "loss": 0.5148, "step": 133470 }, { "epoch": 38.40046029919448, "grad_norm": 1.486271858215332, "learning_rate": 0.0012319907940161105, "loss": 0.5637, "step": 133480 }, { "epoch": 38.403337169159954, "grad_norm": 0.8550745248794556, "learning_rate": 0.0012319332566168009, "loss": 0.5611, "step": 133490 }, { "epoch": 38.40621403912543, "grad_norm": 2.610830307006836, "learning_rate": 0.0012318757192174914, "loss": 0.5984, "step": 133500 }, { "epoch": 38.40909090909091, "grad_norm": 1.8552309274673462, "learning_rate": 0.001231818181818182, "loss": 0.6538, "step": 133510 }, { "epoch": 38.41196777905639, "grad_norm": 1.9630895853042603, "learning_rate": 0.0012317606444188724, "loss": 0.5923, "step": 133520 }, { "epoch": 38.414844649021866, "grad_norm": 1.2116204500198364, "learning_rate": 0.0012317031070195627, "loss": 0.6664, "step": 133530 }, { "epoch": 38.41772151898734, "grad_norm": 1.129745602607727, "learning_rate": 0.001231645569620253, "loss": 0.6279, "step": 133540 }, { "epoch": 38.42059838895282, "grad_norm": 1.2062726020812988, "learning_rate": 0.0012315880322209436, "loss": 0.49, "step": 133550 }, { "epoch": 38.423475258918295, "grad_norm": 0.9983264803886414, "learning_rate": 0.0012315304948216342, "loss": 0.577, "step": 133560 }, { "epoch": 38.42635212888378, "grad_norm": 0.6293587684631348, "learning_rate": 0.0012314729574223245, "loss": 0.6034, "step": 133570 }, { "epoch": 38.429228998849254, "grad_norm": 1.0586649179458618, "learning_rate": 0.001231415420023015, "loss": 0.5387, "step": 133580 }, { "epoch": 38.43210586881473, "grad_norm": 0.6602687835693359, "learning_rate": 0.0012313578826237054, "loss": 0.4645, "step": 133590 }, { "epoch": 38.43498273878021, "grad_norm": 0.9752905964851379, "learning_rate": 0.0012313003452243958, "loss": 0.7504, "step": 133600 }, { "epoch": 38.43785960874568, "grad_norm": 1.8202476501464844, "learning_rate": 0.0012312428078250864, "loss": 0.5812, "step": 133610 }, { "epoch": 38.44073647871116, "grad_norm": 1.556415319442749, "learning_rate": 0.001231185270425777, "loss": 0.6114, "step": 133620 }, { "epoch": 38.44361334867664, "grad_norm": 3.3483002185821533, "learning_rate": 0.0012311277330264673, "loss": 0.5367, "step": 133630 }, { "epoch": 38.44649021864212, "grad_norm": 1.046976923942566, "learning_rate": 0.0012310701956271578, "loss": 0.5807, "step": 133640 }, { "epoch": 38.449367088607595, "grad_norm": 0.946054220199585, "learning_rate": 0.001231012658227848, "loss": 0.5683, "step": 133650 }, { "epoch": 38.45224395857307, "grad_norm": 0.9873062372207642, "learning_rate": 0.0012309551208285385, "loss": 0.5193, "step": 133660 }, { "epoch": 38.45512082853855, "grad_norm": 0.6971515417098999, "learning_rate": 0.001230897583429229, "loss": 0.6525, "step": 133670 }, { "epoch": 38.45799769850403, "grad_norm": 1.610739827156067, "learning_rate": 0.0012308400460299194, "loss": 0.4977, "step": 133680 }, { "epoch": 38.46087456846951, "grad_norm": 1.1608734130859375, "learning_rate": 0.00123078250863061, "loss": 0.6508, "step": 133690 }, { "epoch": 38.46375143843498, "grad_norm": 1.0435413122177124, "learning_rate": 0.0012307249712313003, "loss": 0.6226, "step": 133700 }, { "epoch": 38.46662830840046, "grad_norm": 1.213723063468933, "learning_rate": 0.0012306674338319907, "loss": 0.6744, "step": 133710 }, { "epoch": 38.469505178365935, "grad_norm": 0.8709338903427124, "learning_rate": 0.0012306098964326813, "loss": 0.6831, "step": 133720 }, { "epoch": 38.47238204833142, "grad_norm": 2.143791913986206, "learning_rate": 0.0012305523590333718, "loss": 0.6013, "step": 133730 }, { "epoch": 38.475258918296895, "grad_norm": 0.7572073340415955, "learning_rate": 0.0012304948216340622, "loss": 0.5704, "step": 133740 }, { "epoch": 38.47813578826237, "grad_norm": 0.7985982894897461, "learning_rate": 0.0012304372842347527, "loss": 0.7257, "step": 133750 }, { "epoch": 38.48101265822785, "grad_norm": 0.6342219710350037, "learning_rate": 0.0012303797468354429, "loss": 0.5052, "step": 133760 }, { "epoch": 38.48388952819332, "grad_norm": 1.145219087600708, "learning_rate": 0.0012303222094361334, "loss": 0.6122, "step": 133770 }, { "epoch": 38.48676639815881, "grad_norm": 1.173548936843872, "learning_rate": 0.001230264672036824, "loss": 0.584, "step": 133780 }, { "epoch": 38.48964326812428, "grad_norm": 0.8687019348144531, "learning_rate": 0.0012302071346375143, "loss": 0.7291, "step": 133790 }, { "epoch": 38.49252013808976, "grad_norm": 1.027146816253662, "learning_rate": 0.001230149597238205, "loss": 0.7596, "step": 133800 }, { "epoch": 38.495397008055235, "grad_norm": 1.134977102279663, "learning_rate": 0.0012300920598388955, "loss": 0.6259, "step": 133810 }, { "epoch": 38.49827387802071, "grad_norm": 1.329917550086975, "learning_rate": 0.0012300345224395856, "loss": 0.5077, "step": 133820 }, { "epoch": 38.50115074798619, "grad_norm": 0.9007286429405212, "learning_rate": 0.0012299769850402762, "loss": 0.4241, "step": 133830 }, { "epoch": 38.50402761795167, "grad_norm": 0.8134470582008362, "learning_rate": 0.0012299194476409667, "loss": 0.7467, "step": 133840 }, { "epoch": 38.50690448791715, "grad_norm": 0.9409294128417969, "learning_rate": 0.001229861910241657, "loss": 0.5675, "step": 133850 }, { "epoch": 38.50978135788262, "grad_norm": 1.8507287502288818, "learning_rate": 0.0012298043728423476, "loss": 0.6074, "step": 133860 }, { "epoch": 38.5126582278481, "grad_norm": 1.9415868520736694, "learning_rate": 0.001229746835443038, "loss": 0.6296, "step": 133870 }, { "epoch": 38.515535097813576, "grad_norm": 0.9612424969673157, "learning_rate": 0.0012296892980437283, "loss": 0.7917, "step": 133880 }, { "epoch": 38.51841196777906, "grad_norm": 0.7644562125205994, "learning_rate": 0.0012296317606444189, "loss": 0.6036, "step": 133890 }, { "epoch": 38.521288837744535, "grad_norm": 0.9084702134132385, "learning_rate": 0.0012295742232451092, "loss": 0.6446, "step": 133900 }, { "epoch": 38.52416570771001, "grad_norm": 3.433183431625366, "learning_rate": 0.0012295166858457998, "loss": 0.6879, "step": 133910 }, { "epoch": 38.52704257767549, "grad_norm": 1.2531462907791138, "learning_rate": 0.0012294591484464904, "loss": 0.6659, "step": 133920 }, { "epoch": 38.529919447640964, "grad_norm": 1.839430809020996, "learning_rate": 0.0012294016110471807, "loss": 0.5674, "step": 133930 }, { "epoch": 38.53279631760645, "grad_norm": 0.8037095069885254, "learning_rate": 0.001229344073647871, "loss": 0.6692, "step": 133940 }, { "epoch": 38.53567318757192, "grad_norm": 1.0715060234069824, "learning_rate": 0.0012292865362485616, "loss": 0.6136, "step": 133950 }, { "epoch": 38.5385500575374, "grad_norm": 2.0912773609161377, "learning_rate": 0.001229228998849252, "loss": 0.6596, "step": 133960 }, { "epoch": 38.541426927502876, "grad_norm": 1.3844530582427979, "learning_rate": 0.0012291714614499425, "loss": 0.739, "step": 133970 }, { "epoch": 38.54430379746835, "grad_norm": 1.3902508020401, "learning_rate": 0.001229113924050633, "loss": 0.5568, "step": 133980 }, { "epoch": 38.547180667433835, "grad_norm": 1.0019570589065552, "learning_rate": 0.0012290563866513234, "loss": 0.5806, "step": 133990 }, { "epoch": 38.55005753739931, "grad_norm": 1.5839662551879883, "learning_rate": 0.0012289988492520138, "loss": 0.5882, "step": 134000 }, { "epoch": 38.55293440736479, "grad_norm": 1.5487682819366455, "learning_rate": 0.0012289413118527041, "loss": 0.6118, "step": 134010 }, { "epoch": 38.555811277330264, "grad_norm": 1.865804672241211, "learning_rate": 0.0012288837744533947, "loss": 0.6503, "step": 134020 }, { "epoch": 38.55868814729574, "grad_norm": 1.4693562984466553, "learning_rate": 0.0012288262370540853, "loss": 0.6027, "step": 134030 }, { "epoch": 38.561565017261216, "grad_norm": 2.394793748855591, "learning_rate": 0.0012287686996547756, "loss": 0.5439, "step": 134040 }, { "epoch": 38.5644418872267, "grad_norm": 0.8702265024185181, "learning_rate": 0.0012287111622554662, "loss": 0.6104, "step": 134050 }, { "epoch": 38.567318757192176, "grad_norm": 0.8777639865875244, "learning_rate": 0.0012286536248561565, "loss": 0.6343, "step": 134060 }, { "epoch": 38.57019562715765, "grad_norm": 1.1775953769683838, "learning_rate": 0.0012285960874568469, "loss": 0.5819, "step": 134070 }, { "epoch": 38.57307249712313, "grad_norm": 1.5000659227371216, "learning_rate": 0.0012285385500575374, "loss": 0.7142, "step": 134080 }, { "epoch": 38.575949367088604, "grad_norm": 1.9106791019439697, "learning_rate": 0.001228481012658228, "loss": 0.6978, "step": 134090 }, { "epoch": 38.57882623705409, "grad_norm": 1.5800025463104248, "learning_rate": 0.0012284234752589183, "loss": 0.4473, "step": 134100 }, { "epoch": 38.581703107019564, "grad_norm": 0.8732807636260986, "learning_rate": 0.001228365937859609, "loss": 0.6302, "step": 134110 }, { "epoch": 38.58457997698504, "grad_norm": 1.1349979639053345, "learning_rate": 0.001228308400460299, "loss": 0.6918, "step": 134120 }, { "epoch": 38.587456846950516, "grad_norm": 1.3548243045806885, "learning_rate": 0.0012282508630609896, "loss": 0.5532, "step": 134130 }, { "epoch": 38.59033371691599, "grad_norm": 1.5461825132369995, "learning_rate": 0.0012281933256616802, "loss": 0.6673, "step": 134140 }, { "epoch": 38.593210586881476, "grad_norm": 1.1243493556976318, "learning_rate": 0.0012281357882623705, "loss": 0.6001, "step": 134150 }, { "epoch": 38.59608745684695, "grad_norm": 0.9237939715385437, "learning_rate": 0.001228078250863061, "loss": 0.5842, "step": 134160 }, { "epoch": 38.59896432681243, "grad_norm": 3.361201763153076, "learning_rate": 0.0012280207134637516, "loss": 0.7875, "step": 134170 }, { "epoch": 38.601841196777904, "grad_norm": 1.9701899290084839, "learning_rate": 0.0012279631760644418, "loss": 0.6978, "step": 134180 }, { "epoch": 38.60471806674338, "grad_norm": 0.9994022250175476, "learning_rate": 0.0012279056386651323, "loss": 0.5747, "step": 134190 }, { "epoch": 38.607594936708864, "grad_norm": 2.1689603328704834, "learning_rate": 0.001227848101265823, "loss": 0.758, "step": 134200 }, { "epoch": 38.61047180667434, "grad_norm": 1.7986520528793335, "learning_rate": 0.0012277905638665132, "loss": 0.5416, "step": 134210 }, { "epoch": 38.613348676639816, "grad_norm": 1.3142545223236084, "learning_rate": 0.0012277330264672038, "loss": 0.6965, "step": 134220 }, { "epoch": 38.61622554660529, "grad_norm": 1.373721957206726, "learning_rate": 0.0012276754890678942, "loss": 0.7171, "step": 134230 }, { "epoch": 38.61910241657077, "grad_norm": 1.5379250049591064, "learning_rate": 0.0012276179516685845, "loss": 0.7143, "step": 134240 }, { "epoch": 38.621979286536245, "grad_norm": 1.862517237663269, "learning_rate": 0.001227560414269275, "loss": 0.6583, "step": 134250 }, { "epoch": 38.62485615650173, "grad_norm": 1.06504225730896, "learning_rate": 0.0012275028768699654, "loss": 0.6047, "step": 134260 }, { "epoch": 38.627733026467205, "grad_norm": 2.9092702865600586, "learning_rate": 0.001227445339470656, "loss": 0.8426, "step": 134270 }, { "epoch": 38.63060989643268, "grad_norm": 1.5706006288528442, "learning_rate": 0.0012273878020713465, "loss": 0.8058, "step": 134280 }, { "epoch": 38.63348676639816, "grad_norm": 0.8340823650360107, "learning_rate": 0.001227330264672037, "loss": 0.5183, "step": 134290 }, { "epoch": 38.63636363636363, "grad_norm": 1.396660327911377, "learning_rate": 0.0012272727272727272, "loss": 0.6553, "step": 134300 }, { "epoch": 38.639240506329116, "grad_norm": 1.3502507209777832, "learning_rate": 0.0012272151898734178, "loss": 0.7602, "step": 134310 }, { "epoch": 38.64211737629459, "grad_norm": 2.2982430458068848, "learning_rate": 0.0012271576524741082, "loss": 0.4996, "step": 134320 }, { "epoch": 38.64499424626007, "grad_norm": 0.8192237615585327, "learning_rate": 0.0012271001150747987, "loss": 0.7549, "step": 134330 }, { "epoch": 38.647871116225545, "grad_norm": 1.0968835353851318, "learning_rate": 0.001227042577675489, "loss": 0.5681, "step": 134340 }, { "epoch": 38.65074798619102, "grad_norm": 1.7767431735992432, "learning_rate": 0.0012269850402761796, "loss": 0.6573, "step": 134350 }, { "epoch": 38.653624856156505, "grad_norm": 1.633641242980957, "learning_rate": 0.00122692750287687, "loss": 0.6354, "step": 134360 }, { "epoch": 38.65650172612198, "grad_norm": 0.6496639847755432, "learning_rate": 0.0012268699654775603, "loss": 0.6243, "step": 134370 }, { "epoch": 38.65937859608746, "grad_norm": 0.7824620008468628, "learning_rate": 0.0012268124280782509, "loss": 0.5607, "step": 134380 }, { "epoch": 38.66225546605293, "grad_norm": 1.2854362726211548, "learning_rate": 0.0012267548906789414, "loss": 0.6231, "step": 134390 }, { "epoch": 38.66513233601841, "grad_norm": 0.5477733612060547, "learning_rate": 0.0012266973532796318, "loss": 0.5092, "step": 134400 }, { "epoch": 38.66800920598389, "grad_norm": 1.374019980430603, "learning_rate": 0.0012266398158803224, "loss": 0.6595, "step": 134410 }, { "epoch": 38.67088607594937, "grad_norm": 1.1918295621871948, "learning_rate": 0.0012265822784810127, "loss": 0.6954, "step": 134420 }, { "epoch": 38.673762945914845, "grad_norm": 1.1529971361160278, "learning_rate": 0.001226524741081703, "loss": 0.7031, "step": 134430 }, { "epoch": 38.67663981588032, "grad_norm": 2.057079315185547, "learning_rate": 0.0012264672036823936, "loss": 0.6979, "step": 134440 }, { "epoch": 38.6795166858458, "grad_norm": 1.1394052505493164, "learning_rate": 0.001226409666283084, "loss": 0.6295, "step": 134450 }, { "epoch": 38.68239355581128, "grad_norm": 1.361406922340393, "learning_rate": 0.0012263521288837745, "loss": 0.5445, "step": 134460 }, { "epoch": 38.68527042577676, "grad_norm": 1.7379945516586304, "learning_rate": 0.001226294591484465, "loss": 0.7207, "step": 134470 }, { "epoch": 38.68814729574223, "grad_norm": 0.7734658122062683, "learning_rate": 0.0012262370540851552, "loss": 0.5062, "step": 134480 }, { "epoch": 38.69102416570771, "grad_norm": 1.0998436212539673, "learning_rate": 0.0012261795166858458, "loss": 0.6641, "step": 134490 }, { "epoch": 38.693901035673186, "grad_norm": 1.0210763216018677, "learning_rate": 0.0012261219792865363, "loss": 0.7588, "step": 134500 }, { "epoch": 38.69677790563866, "grad_norm": 1.6209105253219604, "learning_rate": 0.0012260644418872267, "loss": 0.6021, "step": 134510 }, { "epoch": 38.699654775604145, "grad_norm": 0.8702322840690613, "learning_rate": 0.0012260069044879173, "loss": 0.5369, "step": 134520 }, { "epoch": 38.70253164556962, "grad_norm": 1.8293706178665161, "learning_rate": 0.0012259493670886076, "loss": 0.678, "step": 134530 }, { "epoch": 38.7054085155351, "grad_norm": 1.0137403011322021, "learning_rate": 0.001225891829689298, "loss": 0.5684, "step": 134540 }, { "epoch": 38.708285385500574, "grad_norm": 1.187330961227417, "learning_rate": 0.0012258342922899885, "loss": 0.5611, "step": 134550 }, { "epoch": 38.71116225546605, "grad_norm": 2.4022719860076904, "learning_rate": 0.0012257767548906789, "loss": 0.6088, "step": 134560 }, { "epoch": 38.71403912543153, "grad_norm": 1.3363478183746338, "learning_rate": 0.0012257192174913694, "loss": 0.4246, "step": 134570 }, { "epoch": 38.71691599539701, "grad_norm": 0.9785165190696716, "learning_rate": 0.00122566168009206, "loss": 0.8108, "step": 134580 }, { "epoch": 38.719792865362486, "grad_norm": 1.4219388961791992, "learning_rate": 0.0012256041426927501, "loss": 0.63, "step": 134590 }, { "epoch": 38.72266973532796, "grad_norm": 1.3184560537338257, "learning_rate": 0.0012255466052934407, "loss": 0.8154, "step": 134600 }, { "epoch": 38.72554660529344, "grad_norm": 1.439013123512268, "learning_rate": 0.0012254890678941313, "loss": 0.6106, "step": 134610 }, { "epoch": 38.72842347525892, "grad_norm": 1.6526929140090942, "learning_rate": 0.0012254315304948216, "loss": 0.7268, "step": 134620 }, { "epoch": 38.7313003452244, "grad_norm": 1.8721530437469482, "learning_rate": 0.0012253739930955122, "loss": 0.65, "step": 134630 }, { "epoch": 38.734177215189874, "grad_norm": 1.263715386390686, "learning_rate": 0.0012253164556962027, "loss": 0.6519, "step": 134640 }, { "epoch": 38.73705408515535, "grad_norm": 1.102567434310913, "learning_rate": 0.0012252589182968929, "loss": 0.5384, "step": 134650 }, { "epoch": 38.739930955120826, "grad_norm": 1.1207752227783203, "learning_rate": 0.0012252013808975834, "loss": 0.6378, "step": 134660 }, { "epoch": 38.74280782508631, "grad_norm": 2.1348612308502197, "learning_rate": 0.001225143843498274, "loss": 0.7594, "step": 134670 }, { "epoch": 38.745684695051786, "grad_norm": 1.535105586051941, "learning_rate": 0.0012250863060989643, "loss": 0.5714, "step": 134680 }, { "epoch": 38.74856156501726, "grad_norm": 0.9707946181297302, "learning_rate": 0.001225028768699655, "loss": 0.7485, "step": 134690 }, { "epoch": 38.75143843498274, "grad_norm": 1.24343740940094, "learning_rate": 0.0012249712313003452, "loss": 0.5716, "step": 134700 }, { "epoch": 38.754315304948214, "grad_norm": 1.622003197669983, "learning_rate": 0.0012249136939010356, "loss": 0.6008, "step": 134710 }, { "epoch": 38.75719217491369, "grad_norm": 1.3383917808532715, "learning_rate": 0.0012248561565017262, "loss": 0.5986, "step": 134720 }, { "epoch": 38.760069044879174, "grad_norm": 1.12409508228302, "learning_rate": 0.0012247986191024165, "loss": 0.6823, "step": 134730 }, { "epoch": 38.76294591484465, "grad_norm": 0.7798771262168884, "learning_rate": 0.001224741081703107, "loss": 0.5737, "step": 134740 }, { "epoch": 38.765822784810126, "grad_norm": 2.0525193214416504, "learning_rate": 0.0012246835443037976, "loss": 0.6892, "step": 134750 }, { "epoch": 38.7686996547756, "grad_norm": 1.8671754598617554, "learning_rate": 0.001224626006904488, "loss": 0.56, "step": 134760 }, { "epoch": 38.77157652474108, "grad_norm": 3.0512583255767822, "learning_rate": 0.0012245684695051783, "loss": 0.6371, "step": 134770 }, { "epoch": 38.77445339470656, "grad_norm": 1.6246780157089233, "learning_rate": 0.0012245109321058689, "loss": 0.6784, "step": 134780 }, { "epoch": 38.77733026467204, "grad_norm": 1.3732513189315796, "learning_rate": 0.0012244533947065592, "loss": 0.6271, "step": 134790 }, { "epoch": 38.780207134637514, "grad_norm": 1.9371730089187622, "learning_rate": 0.0012243958573072498, "loss": 0.5832, "step": 134800 }, { "epoch": 38.78308400460299, "grad_norm": 2.405086040496826, "learning_rate": 0.0012243383199079401, "loss": 0.645, "step": 134810 }, { "epoch": 38.78596087456847, "grad_norm": 0.5637243986129761, "learning_rate": 0.0012242807825086307, "loss": 0.6048, "step": 134820 }, { "epoch": 38.78883774453395, "grad_norm": 2.173957586288452, "learning_rate": 0.001224223245109321, "loss": 0.5763, "step": 134830 }, { "epoch": 38.791714614499426, "grad_norm": 0.8004460334777832, "learning_rate": 0.0012241657077100114, "loss": 0.5189, "step": 134840 }, { "epoch": 38.7945914844649, "grad_norm": 1.6665560007095337, "learning_rate": 0.001224108170310702, "loss": 0.6071, "step": 134850 }, { "epoch": 38.79746835443038, "grad_norm": 1.3583487272262573, "learning_rate": 0.0012240506329113925, "loss": 0.5851, "step": 134860 }, { "epoch": 38.800345224395855, "grad_norm": 0.9201472401618958, "learning_rate": 0.0012239930955120829, "loss": 0.7276, "step": 134870 }, { "epoch": 38.80322209436134, "grad_norm": 1.9808578491210938, "learning_rate": 0.0012239355581127734, "loss": 0.7282, "step": 134880 }, { "epoch": 38.806098964326814, "grad_norm": 1.265785813331604, "learning_rate": 0.0012238780207134638, "loss": 0.615, "step": 134890 }, { "epoch": 38.80897583429229, "grad_norm": 2.519041061401367, "learning_rate": 0.0012238204833141541, "loss": 0.6986, "step": 134900 }, { "epoch": 38.81185270425777, "grad_norm": 0.741981565952301, "learning_rate": 0.0012237629459148447, "loss": 0.783, "step": 134910 }, { "epoch": 38.81472957422324, "grad_norm": 0.7398860454559326, "learning_rate": 0.001223705408515535, "loss": 0.7395, "step": 134920 }, { "epoch": 38.81760644418872, "grad_norm": 1.649330973625183, "learning_rate": 0.0012236478711162256, "loss": 0.6214, "step": 134930 }, { "epoch": 38.8204833141542, "grad_norm": 0.9271859526634216, "learning_rate": 0.0012235903337169162, "loss": 0.4768, "step": 134940 }, { "epoch": 38.82336018411968, "grad_norm": 1.1645371913909912, "learning_rate": 0.0012235327963176063, "loss": 0.5528, "step": 134950 }, { "epoch": 38.826237054085155, "grad_norm": 0.8603237867355347, "learning_rate": 0.0012234752589182969, "loss": 0.4423, "step": 134960 }, { "epoch": 38.82911392405063, "grad_norm": 1.0891003608703613, "learning_rate": 0.0012234177215189874, "loss": 0.699, "step": 134970 }, { "epoch": 38.83199079401611, "grad_norm": 1.388288140296936, "learning_rate": 0.0012233601841196778, "loss": 0.5526, "step": 134980 }, { "epoch": 38.83486766398159, "grad_norm": 1.020821452140808, "learning_rate": 0.0012233026467203683, "loss": 0.6448, "step": 134990 }, { "epoch": 38.83774453394707, "grad_norm": 2.769754648208618, "learning_rate": 0.001223245109321059, "loss": 0.7692, "step": 135000 }, { "epoch": 38.84062140391254, "grad_norm": 1.7130990028381348, "learning_rate": 0.001223187571921749, "loss": 0.6156, "step": 135010 }, { "epoch": 38.84349827387802, "grad_norm": 1.1054092645645142, "learning_rate": 0.0012231300345224396, "loss": 0.6647, "step": 135020 }, { "epoch": 38.846375143843495, "grad_norm": 1.117314100265503, "learning_rate": 0.00122307249712313, "loss": 0.7402, "step": 135030 }, { "epoch": 38.84925201380898, "grad_norm": 0.9191248416900635, "learning_rate": 0.0012230149597238205, "loss": 0.6051, "step": 135040 }, { "epoch": 38.852128883774455, "grad_norm": 0.9727329015731812, "learning_rate": 0.001222957422324511, "loss": 0.6136, "step": 135050 }, { "epoch": 38.85500575373993, "grad_norm": 1.8107982873916626, "learning_rate": 0.0012228998849252014, "loss": 0.5448, "step": 135060 }, { "epoch": 38.85788262370541, "grad_norm": 1.7977015972137451, "learning_rate": 0.0012228423475258918, "loss": 0.8397, "step": 135070 }, { "epoch": 38.860759493670884, "grad_norm": 1.2483278512954712, "learning_rate": 0.0012227848101265823, "loss": 0.5643, "step": 135080 }, { "epoch": 38.86363636363637, "grad_norm": 1.123596429824829, "learning_rate": 0.0012227272727272727, "loss": 0.6689, "step": 135090 }, { "epoch": 38.86651323360184, "grad_norm": 0.8028199076652527, "learning_rate": 0.0012226697353279632, "loss": 0.4882, "step": 135100 }, { "epoch": 38.86939010356732, "grad_norm": 1.1254887580871582, "learning_rate": 0.0012226121979286538, "loss": 0.6365, "step": 135110 }, { "epoch": 38.872266973532795, "grad_norm": 0.7200288772583008, "learning_rate": 0.0012225546605293442, "loss": 0.7349, "step": 135120 }, { "epoch": 38.87514384349827, "grad_norm": 1.079779863357544, "learning_rate": 0.0012224971231300345, "loss": 0.6079, "step": 135130 }, { "epoch": 38.878020713463755, "grad_norm": 1.6653759479522705, "learning_rate": 0.0012224395857307249, "loss": 0.4964, "step": 135140 }, { "epoch": 38.88089758342923, "grad_norm": 0.9286338686943054, "learning_rate": 0.0012223820483314154, "loss": 0.5981, "step": 135150 }, { "epoch": 38.88377445339471, "grad_norm": 2.116875171661377, "learning_rate": 0.001222324510932106, "loss": 0.6622, "step": 135160 }, { "epoch": 38.886651323360184, "grad_norm": 1.5225259065628052, "learning_rate": 0.0012222669735327963, "loss": 0.6049, "step": 135170 }, { "epoch": 38.88952819332566, "grad_norm": 0.8139018416404724, "learning_rate": 0.0012222094361334869, "loss": 0.6687, "step": 135180 }, { "epoch": 38.892405063291136, "grad_norm": 1.4654632806777954, "learning_rate": 0.0012221518987341772, "loss": 0.9194, "step": 135190 }, { "epoch": 38.89528193325662, "grad_norm": 0.7388535141944885, "learning_rate": 0.0012220943613348676, "loss": 0.5799, "step": 135200 }, { "epoch": 38.898158803222096, "grad_norm": 2.22902512550354, "learning_rate": 0.0012220368239355581, "loss": 0.7154, "step": 135210 }, { "epoch": 38.90103567318757, "grad_norm": 0.8440847992897034, "learning_rate": 0.0012219792865362487, "loss": 0.6071, "step": 135220 }, { "epoch": 38.90391254315305, "grad_norm": 1.6943475008010864, "learning_rate": 0.001221921749136939, "loss": 0.58, "step": 135230 }, { "epoch": 38.906789413118524, "grad_norm": 0.9315806031227112, "learning_rate": 0.0012218642117376296, "loss": 0.6355, "step": 135240 }, { "epoch": 38.90966628308401, "grad_norm": 1.0329314470291138, "learning_rate": 0.0012218066743383198, "loss": 0.507, "step": 135250 }, { "epoch": 38.912543153049484, "grad_norm": 1.1688392162322998, "learning_rate": 0.0012217491369390103, "loss": 0.586, "step": 135260 }, { "epoch": 38.91542002301496, "grad_norm": 1.9084495306015015, "learning_rate": 0.0012216915995397009, "loss": 0.7832, "step": 135270 }, { "epoch": 38.918296892980436, "grad_norm": 0.831516683101654, "learning_rate": 0.0012216340621403912, "loss": 0.6247, "step": 135280 }, { "epoch": 38.92117376294591, "grad_norm": 2.1510720252990723, "learning_rate": 0.0012215765247410818, "loss": 0.8428, "step": 135290 }, { "epoch": 38.924050632911396, "grad_norm": 1.1522619724273682, "learning_rate": 0.0012215189873417724, "loss": 0.6233, "step": 135300 }, { "epoch": 38.92692750287687, "grad_norm": 0.9792157411575317, "learning_rate": 0.0012214614499424625, "loss": 0.5955, "step": 135310 }, { "epoch": 38.92980437284235, "grad_norm": 1.6152478456497192, "learning_rate": 0.001221403912543153, "loss": 0.4976, "step": 135320 }, { "epoch": 38.932681242807824, "grad_norm": 1.134021520614624, "learning_rate": 0.0012213463751438436, "loss": 0.5842, "step": 135330 }, { "epoch": 38.9355581127733, "grad_norm": 0.6807675957679749, "learning_rate": 0.001221288837744534, "loss": 0.6168, "step": 135340 }, { "epoch": 38.938434982738784, "grad_norm": 1.018829107284546, "learning_rate": 0.0012212313003452245, "loss": 0.5892, "step": 135350 }, { "epoch": 38.94131185270426, "grad_norm": 1.6985194683074951, "learning_rate": 0.0012211737629459149, "loss": 0.5992, "step": 135360 }, { "epoch": 38.944188722669736, "grad_norm": 0.7526320815086365, "learning_rate": 0.0012211162255466052, "loss": 0.6635, "step": 135370 }, { "epoch": 38.94706559263521, "grad_norm": 1.9637551307678223, "learning_rate": 0.0012210586881472958, "loss": 0.5768, "step": 135380 }, { "epoch": 38.94994246260069, "grad_norm": 0.9546074271202087, "learning_rate": 0.0012210011507479861, "loss": 0.5828, "step": 135390 }, { "epoch": 38.952819332566165, "grad_norm": 1.252132534980774, "learning_rate": 0.0012209436133486767, "loss": 0.601, "step": 135400 }, { "epoch": 38.95569620253165, "grad_norm": 1.502431035041809, "learning_rate": 0.0012208860759493673, "loss": 0.6111, "step": 135410 }, { "epoch": 38.958573072497124, "grad_norm": 0.761772871017456, "learning_rate": 0.0012208285385500574, "loss": 0.7508, "step": 135420 }, { "epoch": 38.9614499424626, "grad_norm": 0.6871891617774963, "learning_rate": 0.001220771001150748, "loss": 0.5972, "step": 135430 }, { "epoch": 38.96432681242808, "grad_norm": 1.197084665298462, "learning_rate": 0.0012207134637514385, "loss": 0.623, "step": 135440 }, { "epoch": 38.96720368239355, "grad_norm": 1.1155858039855957, "learning_rate": 0.0012206559263521289, "loss": 0.6548, "step": 135450 }, { "epoch": 38.970080552359036, "grad_norm": 0.9458328485488892, "learning_rate": 0.0012205983889528194, "loss": 0.5072, "step": 135460 }, { "epoch": 38.97295742232451, "grad_norm": 1.7603330612182617, "learning_rate": 0.00122054085155351, "loss": 0.699, "step": 135470 }, { "epoch": 38.97583429228999, "grad_norm": 0.9957621693611145, "learning_rate": 0.0012204833141542001, "loss": 0.5755, "step": 135480 }, { "epoch": 38.978711162255465, "grad_norm": 1.4614057540893555, "learning_rate": 0.0012204257767548907, "loss": 0.6336, "step": 135490 }, { "epoch": 38.98158803222094, "grad_norm": 0.8816256523132324, "learning_rate": 0.001220368239355581, "loss": 0.5357, "step": 135500 }, { "epoch": 38.984464902186424, "grad_norm": 1.272365927696228, "learning_rate": 0.0012203107019562716, "loss": 0.5557, "step": 135510 }, { "epoch": 38.9873417721519, "grad_norm": 1.9417238235473633, "learning_rate": 0.0012202531645569622, "loss": 0.642, "step": 135520 }, { "epoch": 38.99021864211738, "grad_norm": 0.7972007989883423, "learning_rate": 0.0012201956271576525, "loss": 0.4734, "step": 135530 }, { "epoch": 38.99309551208285, "grad_norm": 1.3872472047805786, "learning_rate": 0.0012201380897583429, "loss": 0.5536, "step": 135540 }, { "epoch": 38.99597238204833, "grad_norm": 1.2384475469589233, "learning_rate": 0.0012200805523590334, "loss": 0.7493, "step": 135550 }, { "epoch": 38.99884925201381, "grad_norm": 0.8999079465866089, "learning_rate": 0.0012200230149597238, "loss": 0.7368, "step": 135560 }, { "epoch": 39.00172612197929, "grad_norm": 1.3693032264709473, "learning_rate": 0.0012199654775604143, "loss": 0.5717, "step": 135570 }, { "epoch": 39.004602991944765, "grad_norm": 1.1782702207565308, "learning_rate": 0.001219907940161105, "loss": 0.509, "step": 135580 }, { "epoch": 39.00747986191024, "grad_norm": 1.3544889688491821, "learning_rate": 0.0012198504027617952, "loss": 0.532, "step": 135590 }, { "epoch": 39.01035673187572, "grad_norm": 0.7729141116142273, "learning_rate": 0.0012197928653624856, "loss": 0.7416, "step": 135600 }, { "epoch": 39.01323360184119, "grad_norm": 2.2834277153015137, "learning_rate": 0.001219735327963176, "loss": 0.6257, "step": 135610 }, { "epoch": 39.01611047180668, "grad_norm": 1.7197054624557495, "learning_rate": 0.0012196777905638665, "loss": 0.6544, "step": 135620 }, { "epoch": 39.01898734177215, "grad_norm": 0.9396806359291077, "learning_rate": 0.001219620253164557, "loss": 0.633, "step": 135630 }, { "epoch": 39.02186421173763, "grad_norm": 1.2951925992965698, "learning_rate": 0.0012195627157652474, "loss": 0.5758, "step": 135640 }, { "epoch": 39.024741081703105, "grad_norm": 1.6650960445404053, "learning_rate": 0.001219505178365938, "loss": 0.6255, "step": 135650 }, { "epoch": 39.02761795166858, "grad_norm": 0.7432948350906372, "learning_rate": 0.0012194476409666283, "loss": 0.4784, "step": 135660 }, { "epoch": 39.030494821634065, "grad_norm": 2.0694100856781006, "learning_rate": 0.0012193901035673187, "loss": 0.5487, "step": 135670 }, { "epoch": 39.03337169159954, "grad_norm": 1.5520082712173462, "learning_rate": 0.0012193325661680092, "loss": 0.4666, "step": 135680 }, { "epoch": 39.03624856156502, "grad_norm": 0.676545262336731, "learning_rate": 0.0012192750287686998, "loss": 0.4892, "step": 135690 }, { "epoch": 39.03912543153049, "grad_norm": 0.7528459429740906, "learning_rate": 0.0012192174913693901, "loss": 0.5542, "step": 135700 }, { "epoch": 39.04200230149597, "grad_norm": 1.7550418376922607, "learning_rate": 0.0012191599539700807, "loss": 0.5323, "step": 135710 }, { "epoch": 39.04487917146145, "grad_norm": 1.1232266426086426, "learning_rate": 0.0012191024165707708, "loss": 0.56, "step": 135720 }, { "epoch": 39.04775604142693, "grad_norm": 1.5103968381881714, "learning_rate": 0.0012190448791714614, "loss": 0.4921, "step": 135730 }, { "epoch": 39.050632911392405, "grad_norm": 1.4475547075271606, "learning_rate": 0.001218987341772152, "loss": 0.5401, "step": 135740 }, { "epoch": 39.05350978135788, "grad_norm": 0.7947186231613159, "learning_rate": 0.0012189298043728423, "loss": 0.467, "step": 135750 }, { "epoch": 39.05638665132336, "grad_norm": 1.211312174797058, "learning_rate": 0.0012188722669735329, "loss": 0.503, "step": 135760 }, { "epoch": 39.05926352128884, "grad_norm": 1.2908214330673218, "learning_rate": 0.0012188147295742234, "loss": 0.6623, "step": 135770 }, { "epoch": 39.06214039125432, "grad_norm": 0.9396178722381592, "learning_rate": 0.0012187571921749136, "loss": 0.5974, "step": 135780 }, { "epoch": 39.06501726121979, "grad_norm": 1.2043362855911255, "learning_rate": 0.0012186996547756041, "loss": 0.5356, "step": 135790 }, { "epoch": 39.06789413118527, "grad_norm": 1.6564912796020508, "learning_rate": 0.0012186421173762947, "loss": 0.6058, "step": 135800 }, { "epoch": 39.070771001150746, "grad_norm": 2.2359611988067627, "learning_rate": 0.001218584579976985, "loss": 0.6194, "step": 135810 }, { "epoch": 39.07364787111622, "grad_norm": 1.6683119535446167, "learning_rate": 0.0012185270425776756, "loss": 0.5711, "step": 135820 }, { "epoch": 39.076524741081705, "grad_norm": 0.9284206628799438, "learning_rate": 0.001218469505178366, "loss": 0.7532, "step": 135830 }, { "epoch": 39.07940161104718, "grad_norm": 0.9522349238395691, "learning_rate": 0.0012184119677790563, "loss": 0.5304, "step": 135840 }, { "epoch": 39.08227848101266, "grad_norm": 1.2512552738189697, "learning_rate": 0.0012183544303797469, "loss": 0.7212, "step": 135850 }, { "epoch": 39.085155350978134, "grad_norm": 0.8088015913963318, "learning_rate": 0.0012182968929804372, "loss": 0.5275, "step": 135860 }, { "epoch": 39.08803222094361, "grad_norm": 2.022111177444458, "learning_rate": 0.0012182393555811278, "loss": 0.7023, "step": 135870 }, { "epoch": 39.09090909090909, "grad_norm": 1.6479361057281494, "learning_rate": 0.0012181818181818183, "loss": 0.7708, "step": 135880 }, { "epoch": 39.09378596087457, "grad_norm": 1.9220848083496094, "learning_rate": 0.0012181242807825087, "loss": 0.5314, "step": 135890 }, { "epoch": 39.096662830840046, "grad_norm": 1.7932246923446655, "learning_rate": 0.001218066743383199, "loss": 0.7849, "step": 135900 }, { "epoch": 39.09953970080552, "grad_norm": 1.3109092712402344, "learning_rate": 0.0012180092059838896, "loss": 0.5523, "step": 135910 }, { "epoch": 39.102416570771, "grad_norm": 1.021805763244629, "learning_rate": 0.00121795166858458, "loss": 0.4837, "step": 135920 }, { "epoch": 39.10529344073648, "grad_norm": 1.286332607269287, "learning_rate": 0.0012178941311852705, "loss": 0.6474, "step": 135930 }, { "epoch": 39.10817031070196, "grad_norm": 1.7019037008285522, "learning_rate": 0.0012178365937859609, "loss": 0.5746, "step": 135940 }, { "epoch": 39.111047180667434, "grad_norm": 1.2678178548812866, "learning_rate": 0.0012177790563866514, "loss": 0.4666, "step": 135950 }, { "epoch": 39.11392405063291, "grad_norm": 1.7997716665267944, "learning_rate": 0.0012177215189873418, "loss": 0.5553, "step": 135960 }, { "epoch": 39.116800920598386, "grad_norm": 1.2777953147888184, "learning_rate": 0.0012176639815880321, "loss": 0.6463, "step": 135970 }, { "epoch": 39.11967779056387, "grad_norm": 1.5778173208236694, "learning_rate": 0.0012176064441887227, "loss": 0.477, "step": 135980 }, { "epoch": 39.122554660529346, "grad_norm": 1.085201382637024, "learning_rate": 0.0012175489067894132, "loss": 0.5396, "step": 135990 }, { "epoch": 39.12543153049482, "grad_norm": 2.635317802429199, "learning_rate": 0.0012174913693901036, "loss": 0.8904, "step": 136000 }, { "epoch": 39.1283084004603, "grad_norm": 0.9191156625747681, "learning_rate": 0.0012174338319907942, "loss": 0.6422, "step": 136010 }, { "epoch": 39.131185270425775, "grad_norm": 0.9425532817840576, "learning_rate": 0.0012173762945914845, "loss": 0.5976, "step": 136020 }, { "epoch": 39.13406214039125, "grad_norm": 1.9683564901351929, "learning_rate": 0.0012173187571921748, "loss": 0.5682, "step": 136030 }, { "epoch": 39.136939010356734, "grad_norm": 1.786605715751648, "learning_rate": 0.0012172612197928654, "loss": 0.7825, "step": 136040 }, { "epoch": 39.13981588032221, "grad_norm": 1.4177021980285645, "learning_rate": 0.001217203682393556, "loss": 0.6002, "step": 136050 }, { "epoch": 39.14269275028769, "grad_norm": 1.9535506963729858, "learning_rate": 0.0012171461449942463, "loss": 0.5466, "step": 136060 }, { "epoch": 39.14556962025316, "grad_norm": 0.9824904203414917, "learning_rate": 0.0012170886075949369, "loss": 0.4987, "step": 136070 }, { "epoch": 39.14844649021864, "grad_norm": 1.475623607635498, "learning_rate": 0.001217031070195627, "loss": 0.5695, "step": 136080 }, { "epoch": 39.15132336018412, "grad_norm": 1.5364902019500732, "learning_rate": 0.0012169735327963176, "loss": 0.5252, "step": 136090 }, { "epoch": 39.1542002301496, "grad_norm": 1.1418155431747437, "learning_rate": 0.0012169159953970081, "loss": 0.4645, "step": 136100 }, { "epoch": 39.157077100115075, "grad_norm": 0.9917524456977844, "learning_rate": 0.0012168584579976985, "loss": 0.5423, "step": 136110 }, { "epoch": 39.15995397008055, "grad_norm": 1.048311471939087, "learning_rate": 0.001216800920598389, "loss": 0.6416, "step": 136120 }, { "epoch": 39.16283084004603, "grad_norm": 1.545204997062683, "learning_rate": 0.0012167433831990796, "loss": 0.6152, "step": 136130 }, { "epoch": 39.16570771001151, "grad_norm": 1.118815302848816, "learning_rate": 0.0012166858457997698, "loss": 0.5111, "step": 136140 }, { "epoch": 39.16858457997699, "grad_norm": 1.7009806632995605, "learning_rate": 0.0012166283084004603, "loss": 0.6592, "step": 136150 }, { "epoch": 39.17146144994246, "grad_norm": 1.221247911453247, "learning_rate": 0.0012165707710011509, "loss": 0.5786, "step": 136160 }, { "epoch": 39.17433831990794, "grad_norm": 1.1074049472808838, "learning_rate": 0.0012165132336018412, "loss": 0.558, "step": 136170 }, { "epoch": 39.177215189873415, "grad_norm": 0.8631056547164917, "learning_rate": 0.0012164556962025318, "loss": 0.5361, "step": 136180 }, { "epoch": 39.1800920598389, "grad_norm": 0.8126227855682373, "learning_rate": 0.001216398158803222, "loss": 0.6183, "step": 136190 }, { "epoch": 39.182968929804375, "grad_norm": 0.9618827700614929, "learning_rate": 0.0012163406214039125, "loss": 0.6893, "step": 136200 }, { "epoch": 39.18584579976985, "grad_norm": 0.8886460661888123, "learning_rate": 0.001216283084004603, "loss": 0.5333, "step": 136210 }, { "epoch": 39.18872266973533, "grad_norm": 1.5197850465774536, "learning_rate": 0.0012162255466052934, "loss": 0.7349, "step": 136220 }, { "epoch": 39.1915995397008, "grad_norm": 2.263212203979492, "learning_rate": 0.001216168009205984, "loss": 0.5464, "step": 136230 }, { "epoch": 39.19447640966629, "grad_norm": 1.9954900741577148, "learning_rate": 0.0012161104718066745, "loss": 0.604, "step": 136240 }, { "epoch": 39.19735327963176, "grad_norm": 1.0920696258544922, "learning_rate": 0.0012160529344073647, "loss": 0.616, "step": 136250 }, { "epoch": 39.20023014959724, "grad_norm": 1.1515828371047974, "learning_rate": 0.0012159953970080552, "loss": 0.6078, "step": 136260 }, { "epoch": 39.203107019562715, "grad_norm": 0.7474911212921143, "learning_rate": 0.0012159378596087458, "loss": 0.5089, "step": 136270 }, { "epoch": 39.20598388952819, "grad_norm": 0.9668832421302795, "learning_rate": 0.0012158803222094361, "loss": 0.61, "step": 136280 }, { "epoch": 39.20886075949367, "grad_norm": 0.8272156715393066, "learning_rate": 0.0012158227848101267, "loss": 0.4639, "step": 136290 }, { "epoch": 39.21173762945915, "grad_norm": 1.6299474239349365, "learning_rate": 0.001215765247410817, "loss": 0.7005, "step": 136300 }, { "epoch": 39.21461449942463, "grad_norm": 0.9058082699775696, "learning_rate": 0.0012157077100115074, "loss": 0.5931, "step": 136310 }, { "epoch": 39.2174913693901, "grad_norm": 0.7973120212554932, "learning_rate": 0.001215650172612198, "loss": 0.6762, "step": 136320 }, { "epoch": 39.22036823935558, "grad_norm": 0.5344700217247009, "learning_rate": 0.0012155926352128883, "loss": 0.5572, "step": 136330 }, { "epoch": 39.223245109321056, "grad_norm": 1.1494964361190796, "learning_rate": 0.0012155350978135789, "loss": 0.5455, "step": 136340 }, { "epoch": 39.22612197928654, "grad_norm": 0.8288915157318115, "learning_rate": 0.0012154775604142694, "loss": 0.5852, "step": 136350 }, { "epoch": 39.228998849252015, "grad_norm": 2.417088270187378, "learning_rate": 0.0012154200230149598, "loss": 0.7073, "step": 136360 }, { "epoch": 39.23187571921749, "grad_norm": 0.8908731341362, "learning_rate": 0.0012153624856156501, "loss": 0.6655, "step": 136370 }, { "epoch": 39.23475258918297, "grad_norm": 1.0181094408035278, "learning_rate": 0.0012153049482163407, "loss": 0.639, "step": 136380 }, { "epoch": 39.237629459148444, "grad_norm": 1.365851640701294, "learning_rate": 0.001215247410817031, "loss": 0.5606, "step": 136390 }, { "epoch": 39.24050632911393, "grad_norm": 1.8582453727722168, "learning_rate": 0.0012151898734177216, "loss": 0.5099, "step": 136400 }, { "epoch": 39.2433831990794, "grad_norm": 0.7662612199783325, "learning_rate": 0.001215132336018412, "loss": 0.5015, "step": 136410 }, { "epoch": 39.24626006904488, "grad_norm": 1.2712675333023071, "learning_rate": 0.0012150747986191025, "loss": 0.6265, "step": 136420 }, { "epoch": 39.249136939010356, "grad_norm": 0.78822261095047, "learning_rate": 0.0012150172612197929, "loss": 0.6973, "step": 136430 }, { "epoch": 39.25201380897583, "grad_norm": 0.946713924407959, "learning_rate": 0.0012149597238204832, "loss": 0.5636, "step": 136440 }, { "epoch": 39.254890678941315, "grad_norm": 1.3380546569824219, "learning_rate": 0.0012149021864211738, "loss": 0.7241, "step": 136450 }, { "epoch": 39.25776754890679, "grad_norm": 1.0805134773254395, "learning_rate": 0.0012148446490218643, "loss": 0.5392, "step": 136460 }, { "epoch": 39.26064441887227, "grad_norm": 2.128526449203491, "learning_rate": 0.0012147871116225547, "loss": 0.6379, "step": 136470 }, { "epoch": 39.263521288837744, "grad_norm": 0.8214801549911499, "learning_rate": 0.0012147295742232452, "loss": 0.5122, "step": 136480 }, { "epoch": 39.26639815880322, "grad_norm": 1.7541933059692383, "learning_rate": 0.0012146720368239356, "loss": 0.5481, "step": 136490 }, { "epoch": 39.269275028768696, "grad_norm": 1.927580714225769, "learning_rate": 0.001214614499424626, "loss": 0.8718, "step": 136500 }, { "epoch": 39.27215189873418, "grad_norm": 1.340912938117981, "learning_rate": 0.0012145569620253165, "loss": 0.5859, "step": 136510 }, { "epoch": 39.275028768699656, "grad_norm": 1.0493680238723755, "learning_rate": 0.0012144994246260068, "loss": 0.6824, "step": 136520 }, { "epoch": 39.27790563866513, "grad_norm": 0.9162917137145996, "learning_rate": 0.0012144418872266974, "loss": 0.4386, "step": 136530 }, { "epoch": 39.28078250863061, "grad_norm": 1.3602622747421265, "learning_rate": 0.001214384349827388, "loss": 0.6021, "step": 136540 }, { "epoch": 39.283659378596084, "grad_norm": 1.1708060503005981, "learning_rate": 0.001214326812428078, "loss": 0.5473, "step": 136550 }, { "epoch": 39.28653624856157, "grad_norm": 2.4045398235321045, "learning_rate": 0.0012142692750287687, "loss": 0.5488, "step": 136560 }, { "epoch": 39.289413118527044, "grad_norm": 1.1398849487304688, "learning_rate": 0.0012142117376294592, "loss": 0.581, "step": 136570 }, { "epoch": 39.29228998849252, "grad_norm": 2.9227352142333984, "learning_rate": 0.0012141542002301496, "loss": 0.6276, "step": 136580 }, { "epoch": 39.295166858457996, "grad_norm": 3.5136778354644775, "learning_rate": 0.0012140966628308401, "loss": 0.5661, "step": 136590 }, { "epoch": 39.29804372842347, "grad_norm": 0.9269981384277344, "learning_rate": 0.0012140391254315307, "loss": 0.6238, "step": 136600 }, { "epoch": 39.300920598388956, "grad_norm": 1.2296663522720337, "learning_rate": 0.0012139815880322208, "loss": 0.732, "step": 136610 }, { "epoch": 39.30379746835443, "grad_norm": 1.102718472480774, "learning_rate": 0.0012139240506329114, "loss": 0.6532, "step": 136620 }, { "epoch": 39.30667433831991, "grad_norm": 1.0149980783462524, "learning_rate": 0.001213866513233602, "loss": 0.742, "step": 136630 }, { "epoch": 39.309551208285384, "grad_norm": 1.3105907440185547, "learning_rate": 0.0012138089758342923, "loss": 0.5627, "step": 136640 }, { "epoch": 39.31242807825086, "grad_norm": 1.3495959043502808, "learning_rate": 0.0012137514384349829, "loss": 0.5683, "step": 136650 }, { "epoch": 39.315304948216344, "grad_norm": 1.2506859302520752, "learning_rate": 0.0012136939010356732, "loss": 0.555, "step": 136660 }, { "epoch": 39.31818181818182, "grad_norm": 1.5024739503860474, "learning_rate": 0.0012136363636363636, "loss": 0.7139, "step": 136670 }, { "epoch": 39.321058688147296, "grad_norm": 1.12998366355896, "learning_rate": 0.0012135788262370541, "loss": 0.5495, "step": 136680 }, { "epoch": 39.32393555811277, "grad_norm": 0.9537767767906189, "learning_rate": 0.0012135212888377445, "loss": 0.5687, "step": 136690 }, { "epoch": 39.32681242807825, "grad_norm": 0.9531059861183167, "learning_rate": 0.001213463751438435, "loss": 0.5837, "step": 136700 }, { "epoch": 39.329689298043725, "grad_norm": 1.7899855375289917, "learning_rate": 0.0012134062140391256, "loss": 0.7735, "step": 136710 }, { "epoch": 39.33256616800921, "grad_norm": 1.537711501121521, "learning_rate": 0.001213348676639816, "loss": 0.7026, "step": 136720 }, { "epoch": 39.335443037974684, "grad_norm": 1.0559532642364502, "learning_rate": 0.0012132911392405063, "loss": 0.72, "step": 136730 }, { "epoch": 39.33831990794016, "grad_norm": 1.242639422416687, "learning_rate": 0.0012132336018411969, "loss": 0.6193, "step": 136740 }, { "epoch": 39.34119677790564, "grad_norm": 1.0795453786849976, "learning_rate": 0.0012131760644418872, "loss": 0.6325, "step": 136750 }, { "epoch": 39.34407364787111, "grad_norm": 2.130845069885254, "learning_rate": 0.0012131185270425778, "loss": 0.683, "step": 136760 }, { "epoch": 39.346950517836596, "grad_norm": 1.176383137702942, "learning_rate": 0.0012130609896432681, "loss": 0.6528, "step": 136770 }, { "epoch": 39.34982738780207, "grad_norm": 0.9235899448394775, "learning_rate": 0.0012130034522439587, "loss": 0.4996, "step": 136780 }, { "epoch": 39.35270425776755, "grad_norm": 2.310638666152954, "learning_rate": 0.001212945914844649, "loss": 0.6808, "step": 136790 }, { "epoch": 39.355581127733025, "grad_norm": 0.7209078669548035, "learning_rate": 0.0012128883774453394, "loss": 0.4483, "step": 136800 }, { "epoch": 39.3584579976985, "grad_norm": 1.0049225091934204, "learning_rate": 0.00121283084004603, "loss": 0.4198, "step": 136810 }, { "epoch": 39.361334867663984, "grad_norm": 1.73427414894104, "learning_rate": 0.0012127733026467205, "loss": 0.4886, "step": 136820 }, { "epoch": 39.36421173762946, "grad_norm": 1.8962221145629883, "learning_rate": 0.0012127157652474109, "loss": 0.8068, "step": 136830 }, { "epoch": 39.36708860759494, "grad_norm": 1.1196398735046387, "learning_rate": 0.0012126582278481014, "loss": 0.6248, "step": 136840 }, { "epoch": 39.36996547756041, "grad_norm": 0.9200382828712463, "learning_rate": 0.0012126006904487918, "loss": 0.5677, "step": 136850 }, { "epoch": 39.37284234752589, "grad_norm": 0.9587675929069519, "learning_rate": 0.0012125431530494821, "loss": 0.4879, "step": 136860 }, { "epoch": 39.37571921749137, "grad_norm": 0.8604655265808105, "learning_rate": 0.0012124856156501727, "loss": 0.6377, "step": 136870 }, { "epoch": 39.37859608745685, "grad_norm": 1.3424581289291382, "learning_rate": 0.001212428078250863, "loss": 0.7208, "step": 136880 }, { "epoch": 39.381472957422325, "grad_norm": 1.311464786529541, "learning_rate": 0.0012123705408515536, "loss": 0.5349, "step": 136890 }, { "epoch": 39.3843498273878, "grad_norm": 0.8342934846878052, "learning_rate": 0.0012123130034522442, "loss": 0.4935, "step": 136900 }, { "epoch": 39.38722669735328, "grad_norm": 0.8397471308708191, "learning_rate": 0.0012122554660529343, "loss": 0.9457, "step": 136910 }, { "epoch": 39.39010356731876, "grad_norm": 1.856793999671936, "learning_rate": 0.0012121979286536248, "loss": 0.7436, "step": 136920 }, { "epoch": 39.39298043728424, "grad_norm": 1.440369963645935, "learning_rate": 0.0012121403912543154, "loss": 0.663, "step": 136930 }, { "epoch": 39.39585730724971, "grad_norm": 1.7838553190231323, "learning_rate": 0.0012120828538550058, "loss": 0.5182, "step": 136940 }, { "epoch": 39.39873417721519, "grad_norm": 0.8548162579536438, "learning_rate": 0.0012120253164556963, "loss": 0.6495, "step": 136950 }, { "epoch": 39.401611047180666, "grad_norm": 1.3137685060501099, "learning_rate": 0.0012119677790563869, "loss": 0.5943, "step": 136960 }, { "epoch": 39.40448791714614, "grad_norm": 2.620882987976074, "learning_rate": 0.001211910241657077, "loss": 0.5182, "step": 136970 }, { "epoch": 39.407364787111625, "grad_norm": 1.0315243005752563, "learning_rate": 0.0012118527042577676, "loss": 0.6026, "step": 136980 }, { "epoch": 39.4102416570771, "grad_norm": 1.743924856185913, "learning_rate": 0.001211795166858458, "loss": 0.6644, "step": 136990 }, { "epoch": 39.41311852704258, "grad_norm": 1.2037838697433472, "learning_rate": 0.0012117376294591485, "loss": 0.6158, "step": 137000 }, { "epoch": 39.415995397008054, "grad_norm": 1.811518669128418, "learning_rate": 0.001211680092059839, "loss": 0.5535, "step": 137010 }, { "epoch": 39.41887226697353, "grad_norm": 1.4318536520004272, "learning_rate": 0.0012116225546605292, "loss": 0.6113, "step": 137020 }, { "epoch": 39.42174913693901, "grad_norm": 1.2812782526016235, "learning_rate": 0.0012115650172612197, "loss": 0.686, "step": 137030 }, { "epoch": 39.42462600690449, "grad_norm": 0.7078317999839783, "learning_rate": 0.0012115074798619103, "loss": 0.6147, "step": 137040 }, { "epoch": 39.427502876869966, "grad_norm": 1.1782156229019165, "learning_rate": 0.0012114499424626007, "loss": 0.6212, "step": 137050 }, { "epoch": 39.43037974683544, "grad_norm": 1.1879210472106934, "learning_rate": 0.0012113924050632912, "loss": 0.6786, "step": 137060 }, { "epoch": 39.43325661680092, "grad_norm": 1.0200729370117188, "learning_rate": 0.0012113348676639818, "loss": 0.6076, "step": 137070 }, { "epoch": 39.4361334867664, "grad_norm": 1.4870390892028809, "learning_rate": 0.001211277330264672, "loss": 0.6033, "step": 137080 }, { "epoch": 39.43901035673188, "grad_norm": 0.8438193202018738, "learning_rate": 0.0012112197928653625, "loss": 0.5908, "step": 137090 }, { "epoch": 39.441887226697354, "grad_norm": 1.5095205307006836, "learning_rate": 0.0012111622554660528, "loss": 0.5423, "step": 137100 }, { "epoch": 39.44476409666283, "grad_norm": 1.177027940750122, "learning_rate": 0.0012111047180667434, "loss": 0.5963, "step": 137110 }, { "epoch": 39.447640966628306, "grad_norm": 1.8576407432556152, "learning_rate": 0.001211047180667434, "loss": 0.6195, "step": 137120 }, { "epoch": 39.45051783659379, "grad_norm": 1.5597726106643677, "learning_rate": 0.0012109896432681243, "loss": 0.5928, "step": 137130 }, { "epoch": 39.453394706559266, "grad_norm": 1.09966242313385, "learning_rate": 0.0012109321058688147, "loss": 0.7819, "step": 137140 }, { "epoch": 39.45627157652474, "grad_norm": 0.985576331615448, "learning_rate": 0.0012108745684695052, "loss": 0.6817, "step": 137150 }, { "epoch": 39.45914844649022, "grad_norm": 0.6220554709434509, "learning_rate": 0.0012108170310701956, "loss": 0.6286, "step": 137160 }, { "epoch": 39.462025316455694, "grad_norm": 1.2767677307128906, "learning_rate": 0.0012107594936708861, "loss": 0.6009, "step": 137170 }, { "epoch": 39.46490218642117, "grad_norm": 0.8988207578659058, "learning_rate": 0.0012107019562715767, "loss": 0.6049, "step": 137180 }, { "epoch": 39.467779056386654, "grad_norm": 1.3518797159194946, "learning_rate": 0.001210644418872267, "loss": 0.604, "step": 137190 }, { "epoch": 39.47065592635213, "grad_norm": 1.1893696784973145, "learning_rate": 0.0012105868814729574, "loss": 0.6191, "step": 137200 }, { "epoch": 39.473532796317606, "grad_norm": 1.5035401582717896, "learning_rate": 0.0012105293440736477, "loss": 0.5764, "step": 137210 }, { "epoch": 39.47640966628308, "grad_norm": 1.2301526069641113, "learning_rate": 0.0012104718066743383, "loss": 0.5849, "step": 137220 }, { "epoch": 39.47928653624856, "grad_norm": 1.0705146789550781, "learning_rate": 0.0012104142692750289, "loss": 0.5356, "step": 137230 }, { "epoch": 39.48216340621404, "grad_norm": 0.8893834948539734, "learning_rate": 0.0012103567318757192, "loss": 0.6797, "step": 137240 }, { "epoch": 39.48504027617952, "grad_norm": 0.7155129313468933, "learning_rate": 0.0012102991944764098, "loss": 0.5091, "step": 137250 }, { "epoch": 39.487917146144994, "grad_norm": 1.3361395597457886, "learning_rate": 0.0012102416570771001, "loss": 0.6785, "step": 137260 }, { "epoch": 39.49079401611047, "grad_norm": 1.0814123153686523, "learning_rate": 0.0012101841196777905, "loss": 0.5064, "step": 137270 }, { "epoch": 39.49367088607595, "grad_norm": 1.1134850978851318, "learning_rate": 0.001210126582278481, "loss": 0.5845, "step": 137280 }, { "epoch": 39.49654775604143, "grad_norm": 1.2820277214050293, "learning_rate": 0.0012100690448791716, "loss": 0.632, "step": 137290 }, { "epoch": 39.499424626006906, "grad_norm": 1.0249478816986084, "learning_rate": 0.001210011507479862, "loss": 0.6661, "step": 137300 }, { "epoch": 39.50230149597238, "grad_norm": 1.4785691499710083, "learning_rate": 0.0012099539700805525, "loss": 0.5307, "step": 137310 }, { "epoch": 39.50517836593786, "grad_norm": 1.1163723468780518, "learning_rate": 0.0012098964326812428, "loss": 0.5688, "step": 137320 }, { "epoch": 39.508055235903335, "grad_norm": 1.3926606178283691, "learning_rate": 0.0012098388952819332, "loss": 0.622, "step": 137330 }, { "epoch": 39.51093210586882, "grad_norm": 1.4928584098815918, "learning_rate": 0.0012097813578826238, "loss": 0.5571, "step": 137340 }, { "epoch": 39.513808975834294, "grad_norm": 1.1796298027038574, "learning_rate": 0.001209723820483314, "loss": 0.6156, "step": 137350 }, { "epoch": 39.51668584579977, "grad_norm": 2.1861929893493652, "learning_rate": 0.0012096662830840047, "loss": 0.5549, "step": 137360 }, { "epoch": 39.51956271576525, "grad_norm": 1.2030709981918335, "learning_rate": 0.0012096087456846952, "loss": 0.7786, "step": 137370 }, { "epoch": 39.52243958573072, "grad_norm": 1.554720401763916, "learning_rate": 0.0012095512082853854, "loss": 0.5911, "step": 137380 }, { "epoch": 39.5253164556962, "grad_norm": 1.5788041353225708, "learning_rate": 0.001209493670886076, "loss": 0.6076, "step": 137390 }, { "epoch": 39.52819332566168, "grad_norm": 0.9201759099960327, "learning_rate": 0.0012094361334867665, "loss": 0.6699, "step": 137400 }, { "epoch": 39.53107019562716, "grad_norm": 0.6300579905509949, "learning_rate": 0.0012093785960874568, "loss": 0.6722, "step": 137410 }, { "epoch": 39.533947065592635, "grad_norm": 1.9024477005004883, "learning_rate": 0.0012093210586881474, "loss": 0.5422, "step": 137420 }, { "epoch": 39.53682393555811, "grad_norm": 1.3675357103347778, "learning_rate": 0.001209263521288838, "loss": 0.6209, "step": 137430 }, { "epoch": 39.53970080552359, "grad_norm": 0.6354597210884094, "learning_rate": 0.001209205983889528, "loss": 0.5322, "step": 137440 }, { "epoch": 39.54257767548907, "grad_norm": 1.577441692352295, "learning_rate": 0.0012091484464902187, "loss": 0.5948, "step": 137450 }, { "epoch": 39.54545454545455, "grad_norm": 1.9125232696533203, "learning_rate": 0.001209090909090909, "loss": 0.6285, "step": 137460 }, { "epoch": 39.54833141542002, "grad_norm": 2.045632839202881, "learning_rate": 0.0012090333716915996, "loss": 0.6762, "step": 137470 }, { "epoch": 39.5512082853855, "grad_norm": 1.7913352251052856, "learning_rate": 0.0012089758342922901, "loss": 0.5824, "step": 137480 }, { "epoch": 39.554085155350975, "grad_norm": 0.9437160491943359, "learning_rate": 0.0012089182968929805, "loss": 0.6567, "step": 137490 }, { "epoch": 39.55696202531646, "grad_norm": 1.1401029825210571, "learning_rate": 0.0012088607594936708, "loss": 0.7096, "step": 137500 }, { "epoch": 39.559838895281935, "grad_norm": 1.1252251863479614, "learning_rate": 0.0012088032220943614, "loss": 0.5344, "step": 137510 }, { "epoch": 39.56271576524741, "grad_norm": 2.104050397872925, "learning_rate": 0.0012087456846950517, "loss": 0.6587, "step": 137520 }, { "epoch": 39.56559263521289, "grad_norm": 0.9246380925178528, "learning_rate": 0.0012086881472957423, "loss": 0.5779, "step": 137530 }, { "epoch": 39.56846950517836, "grad_norm": 0.8800190687179565, "learning_rate": 0.0012086306098964329, "loss": 0.5763, "step": 137540 }, { "epoch": 39.57134637514385, "grad_norm": 2.320680856704712, "learning_rate": 0.0012085730724971232, "loss": 0.6849, "step": 137550 }, { "epoch": 39.57422324510932, "grad_norm": 0.9954655766487122, "learning_rate": 0.0012085155350978136, "loss": 0.5673, "step": 137560 }, { "epoch": 39.5771001150748, "grad_norm": 0.900767982006073, "learning_rate": 0.001208457997698504, "loss": 0.5866, "step": 137570 }, { "epoch": 39.579976985040275, "grad_norm": 1.8260111808776855, "learning_rate": 0.0012084004602991945, "loss": 0.5979, "step": 137580 }, { "epoch": 39.58285385500575, "grad_norm": 0.9858190417289734, "learning_rate": 0.001208342922899885, "loss": 0.548, "step": 137590 }, { "epoch": 39.58573072497123, "grad_norm": 0.8375604748725891, "learning_rate": 0.0012082853855005754, "loss": 0.6348, "step": 137600 }, { "epoch": 39.58860759493671, "grad_norm": 2.138916492462158, "learning_rate": 0.001208227848101266, "loss": 0.5957, "step": 137610 }, { "epoch": 39.59148446490219, "grad_norm": 1.0022649765014648, "learning_rate": 0.0012081703107019563, "loss": 0.7216, "step": 137620 }, { "epoch": 39.59436133486766, "grad_norm": 2.3851754665374756, "learning_rate": 0.0012081127733026466, "loss": 0.6978, "step": 137630 }, { "epoch": 39.59723820483314, "grad_norm": 1.4655903577804565, "learning_rate": 0.0012080552359033372, "loss": 0.5064, "step": 137640 }, { "epoch": 39.600115074798616, "grad_norm": 1.086984634399414, "learning_rate": 0.0012079976985040278, "loss": 0.434, "step": 137650 }, { "epoch": 39.6029919447641, "grad_norm": 0.8369525074958801, "learning_rate": 0.0012079401611047181, "loss": 0.5671, "step": 137660 }, { "epoch": 39.605868814729575, "grad_norm": 1.0354313850402832, "learning_rate": 0.0012078826237054087, "loss": 0.631, "step": 137670 }, { "epoch": 39.60874568469505, "grad_norm": 1.386082410812378, "learning_rate": 0.0012078250863060988, "loss": 0.7184, "step": 137680 }, { "epoch": 39.61162255466053, "grad_norm": 1.013493537902832, "learning_rate": 0.0012077675489067894, "loss": 0.5105, "step": 137690 }, { "epoch": 39.614499424626004, "grad_norm": 1.2318446636199951, "learning_rate": 0.00120771001150748, "loss": 0.5322, "step": 137700 }, { "epoch": 39.61737629459149, "grad_norm": 1.7099858522415161, "learning_rate": 0.0012076524741081703, "loss": 0.7127, "step": 137710 }, { "epoch": 39.620253164556964, "grad_norm": 1.7572150230407715, "learning_rate": 0.0012075949367088609, "loss": 0.6474, "step": 137720 }, { "epoch": 39.62313003452244, "grad_norm": 1.0422035455703735, "learning_rate": 0.0012075373993095514, "loss": 0.5061, "step": 137730 }, { "epoch": 39.626006904487916, "grad_norm": 1.5227696895599365, "learning_rate": 0.0012074798619102415, "loss": 0.7137, "step": 137740 }, { "epoch": 39.62888377445339, "grad_norm": 1.0709625482559204, "learning_rate": 0.0012074223245109321, "loss": 0.6759, "step": 137750 }, { "epoch": 39.631760644418875, "grad_norm": 1.5318516492843628, "learning_rate": 0.0012073647871116227, "loss": 0.8168, "step": 137760 }, { "epoch": 39.63463751438435, "grad_norm": 1.2883604764938354, "learning_rate": 0.001207307249712313, "loss": 0.5606, "step": 137770 }, { "epoch": 39.63751438434983, "grad_norm": 2.058406114578247, "learning_rate": 0.0012072497123130036, "loss": 0.726, "step": 137780 }, { "epoch": 39.640391254315304, "grad_norm": 0.9344834089279175, "learning_rate": 0.0012071921749136937, "loss": 0.6473, "step": 137790 }, { "epoch": 39.64326812428078, "grad_norm": 1.3154913187026978, "learning_rate": 0.0012071346375143843, "loss": 0.6177, "step": 137800 }, { "epoch": 39.64614499424626, "grad_norm": 1.1395909786224365, "learning_rate": 0.0012070771001150748, "loss": 0.7456, "step": 137810 }, { "epoch": 39.64902186421174, "grad_norm": 1.5425777435302734, "learning_rate": 0.0012070195627157652, "loss": 0.6139, "step": 137820 }, { "epoch": 39.651898734177216, "grad_norm": 0.9315268397331238, "learning_rate": 0.0012069620253164558, "loss": 0.5454, "step": 137830 }, { "epoch": 39.65477560414269, "grad_norm": 1.476260781288147, "learning_rate": 0.0012069044879171463, "loss": 0.5739, "step": 137840 }, { "epoch": 39.65765247410817, "grad_norm": 1.3214104175567627, "learning_rate": 0.0012068469505178364, "loss": 0.6513, "step": 137850 }, { "epoch": 39.660529344073645, "grad_norm": 0.9333321452140808, "learning_rate": 0.001206789413118527, "loss": 0.6286, "step": 137860 }, { "epoch": 39.66340621403913, "grad_norm": 1.056483507156372, "learning_rate": 0.0012067318757192176, "loss": 0.5632, "step": 137870 }, { "epoch": 39.666283084004604, "grad_norm": 0.8803721070289612, "learning_rate": 0.001206674338319908, "loss": 0.4564, "step": 137880 }, { "epoch": 39.66915995397008, "grad_norm": 1.2989113330841064, "learning_rate": 0.0012066168009205985, "loss": 0.5861, "step": 137890 }, { "epoch": 39.67203682393556, "grad_norm": 1.0872650146484375, "learning_rate": 0.0012065592635212888, "loss": 0.6329, "step": 137900 }, { "epoch": 39.67491369390103, "grad_norm": 1.0971870422363281, "learning_rate": 0.0012065017261219792, "loss": 0.6912, "step": 137910 }, { "epoch": 39.677790563866516, "grad_norm": 1.527197241783142, "learning_rate": 0.0012064441887226697, "loss": 0.6795, "step": 137920 }, { "epoch": 39.68066743383199, "grad_norm": 0.9430323839187622, "learning_rate": 0.00120638665132336, "loss": 0.6771, "step": 137930 }, { "epoch": 39.68354430379747, "grad_norm": 1.6013140678405762, "learning_rate": 0.0012063291139240507, "loss": 0.6492, "step": 137940 }, { "epoch": 39.686421173762945, "grad_norm": 0.7682034373283386, "learning_rate": 0.0012062715765247412, "loss": 0.7419, "step": 137950 }, { "epoch": 39.68929804372842, "grad_norm": 1.3912097215652466, "learning_rate": 0.0012062140391254316, "loss": 0.6093, "step": 137960 }, { "epoch": 39.692174913693904, "grad_norm": 1.4113811254501343, "learning_rate": 0.001206156501726122, "loss": 0.6453, "step": 137970 }, { "epoch": 39.69505178365938, "grad_norm": 0.6303613781929016, "learning_rate": 0.0012060989643268125, "loss": 0.4949, "step": 137980 }, { "epoch": 39.69792865362486, "grad_norm": 1.1950163841247559, "learning_rate": 0.0012060414269275028, "loss": 0.7208, "step": 137990 }, { "epoch": 39.70080552359033, "grad_norm": 1.3677889108657837, "learning_rate": 0.0012059838895281934, "loss": 0.5874, "step": 138000 }, { "epoch": 39.70368239355581, "grad_norm": 1.3828015327453613, "learning_rate": 0.001205926352128884, "loss": 0.6654, "step": 138010 }, { "epoch": 39.70655926352129, "grad_norm": 1.0305224657058716, "learning_rate": 0.0012058688147295743, "loss": 0.7441, "step": 138020 }, { "epoch": 39.70943613348677, "grad_norm": 1.6724101305007935, "learning_rate": 0.0012058112773302646, "loss": 0.6725, "step": 138030 }, { "epoch": 39.712313003452245, "grad_norm": 2.87502121925354, "learning_rate": 0.001205753739930955, "loss": 0.6885, "step": 138040 }, { "epoch": 39.71518987341772, "grad_norm": 2.3071036338806152, "learning_rate": 0.0012056962025316456, "loss": 0.6745, "step": 138050 }, { "epoch": 39.7180667433832, "grad_norm": 1.230228304862976, "learning_rate": 0.0012056386651323361, "loss": 0.6028, "step": 138060 }, { "epoch": 39.72094361334867, "grad_norm": 1.4335869550704956, "learning_rate": 0.0012055811277330265, "loss": 0.6129, "step": 138070 }, { "epoch": 39.72382048331416, "grad_norm": 1.5359313488006592, "learning_rate": 0.001205523590333717, "loss": 0.6828, "step": 138080 }, { "epoch": 39.72669735327963, "grad_norm": 0.7454546093940735, "learning_rate": 0.0012054660529344074, "loss": 0.4884, "step": 138090 }, { "epoch": 39.72957422324511, "grad_norm": 1.7741856575012207, "learning_rate": 0.0012054085155350977, "loss": 0.6551, "step": 138100 }, { "epoch": 39.732451093210585, "grad_norm": 1.65682852268219, "learning_rate": 0.0012053509781357883, "loss": 0.5273, "step": 138110 }, { "epoch": 39.73532796317606, "grad_norm": 0.982516348361969, "learning_rate": 0.0012052934407364789, "loss": 0.4877, "step": 138120 }, { "epoch": 39.738204833141545, "grad_norm": 1.2396701574325562, "learning_rate": 0.0012052359033371692, "loss": 0.6838, "step": 138130 }, { "epoch": 39.74108170310702, "grad_norm": 1.4281727075576782, "learning_rate": 0.0012051783659378598, "loss": 0.5684, "step": 138140 }, { "epoch": 39.7439585730725, "grad_norm": 1.9349554777145386, "learning_rate": 0.00120512082853855, "loss": 0.8422, "step": 138150 }, { "epoch": 39.74683544303797, "grad_norm": 0.8557795882225037, "learning_rate": 0.0012050632911392405, "loss": 0.6697, "step": 138160 }, { "epoch": 39.74971231300345, "grad_norm": 0.7552885413169861, "learning_rate": 0.001205005753739931, "loss": 0.5949, "step": 138170 }, { "epoch": 39.75258918296893, "grad_norm": 1.3118057250976562, "learning_rate": 0.0012049482163406214, "loss": 0.6276, "step": 138180 }, { "epoch": 39.75546605293441, "grad_norm": 0.9911910891532898, "learning_rate": 0.001204890678941312, "loss": 0.6615, "step": 138190 }, { "epoch": 39.758342922899885, "grad_norm": 1.2115044593811035, "learning_rate": 0.0012048331415420025, "loss": 0.4888, "step": 138200 }, { "epoch": 39.76121979286536, "grad_norm": 1.360889196395874, "learning_rate": 0.0012047756041426926, "loss": 0.4825, "step": 138210 }, { "epoch": 39.76409666283084, "grad_norm": 1.3889126777648926, "learning_rate": 0.0012047180667433832, "loss": 0.6255, "step": 138220 }, { "epoch": 39.76697353279632, "grad_norm": 1.7331268787384033, "learning_rate": 0.0012046605293440738, "loss": 0.5848, "step": 138230 }, { "epoch": 39.7698504027618, "grad_norm": 1.329722285270691, "learning_rate": 0.001204602991944764, "loss": 0.6148, "step": 138240 }, { "epoch": 39.77272727272727, "grad_norm": 1.4783976078033447, "learning_rate": 0.0012045454545454547, "loss": 0.6251, "step": 138250 }, { "epoch": 39.77560414269275, "grad_norm": 1.4742753505706787, "learning_rate": 0.001204487917146145, "loss": 0.676, "step": 138260 }, { "epoch": 39.778481012658226, "grad_norm": 1.0148272514343262, "learning_rate": 0.0012044303797468354, "loss": 0.6789, "step": 138270 }, { "epoch": 39.7813578826237, "grad_norm": 0.9563295245170593, "learning_rate": 0.001204372842347526, "loss": 0.5365, "step": 138280 }, { "epoch": 39.784234752589185, "grad_norm": 1.678506851196289, "learning_rate": 0.0012043153049482163, "loss": 0.7358, "step": 138290 }, { "epoch": 39.78711162255466, "grad_norm": 0.8797421455383301, "learning_rate": 0.0012042577675489068, "loss": 0.5233, "step": 138300 }, { "epoch": 39.78998849252014, "grad_norm": 0.7761005163192749, "learning_rate": 0.0012042002301495974, "loss": 0.6899, "step": 138310 }, { "epoch": 39.792865362485614, "grad_norm": 1.9451583623886108, "learning_rate": 0.0012041426927502877, "loss": 0.7612, "step": 138320 }, { "epoch": 39.79574223245109, "grad_norm": 1.0894358158111572, "learning_rate": 0.001204085155350978, "loss": 0.642, "step": 138330 }, { "epoch": 39.79861910241657, "grad_norm": 1.1949210166931152, "learning_rate": 0.0012040276179516687, "loss": 0.5933, "step": 138340 }, { "epoch": 39.80149597238205, "grad_norm": 1.3470141887664795, "learning_rate": 0.001203970080552359, "loss": 0.4856, "step": 138350 }, { "epoch": 39.804372842347526, "grad_norm": 1.6488560438156128, "learning_rate": 0.0012039125431530496, "loss": 0.6524, "step": 138360 }, { "epoch": 39.807249712313, "grad_norm": 1.1562495231628418, "learning_rate": 0.00120385500575374, "loss": 0.6197, "step": 138370 }, { "epoch": 39.81012658227848, "grad_norm": 0.8831611275672913, "learning_rate": 0.0012037974683544305, "loss": 0.589, "step": 138380 }, { "epoch": 39.81300345224396, "grad_norm": 0.8979684710502625, "learning_rate": 0.0012037399309551208, "loss": 0.4896, "step": 138390 }, { "epoch": 39.81588032220944, "grad_norm": 2.0026183128356934, "learning_rate": 0.0012036823935558112, "loss": 0.9152, "step": 138400 }, { "epoch": 39.818757192174914, "grad_norm": 0.9175696969032288, "learning_rate": 0.0012036248561565017, "loss": 0.5276, "step": 138410 }, { "epoch": 39.82163406214039, "grad_norm": 1.1478673219680786, "learning_rate": 0.0012035673187571923, "loss": 0.6688, "step": 138420 }, { "epoch": 39.824510932105866, "grad_norm": 1.034658670425415, "learning_rate": 0.0012035097813578827, "loss": 0.679, "step": 138430 }, { "epoch": 39.82738780207135, "grad_norm": 0.9632428884506226, "learning_rate": 0.0012034522439585732, "loss": 0.6111, "step": 138440 }, { "epoch": 39.830264672036826, "grad_norm": 1.4320741891860962, "learning_rate": 0.0012033947065592636, "loss": 0.6404, "step": 138450 }, { "epoch": 39.8331415420023, "grad_norm": 1.8772777318954468, "learning_rate": 0.001203337169159954, "loss": 0.8595, "step": 138460 }, { "epoch": 39.83601841196778, "grad_norm": 0.9285217523574829, "learning_rate": 0.0012032796317606445, "loss": 0.5185, "step": 138470 }, { "epoch": 39.838895281933254, "grad_norm": 1.01333487033844, "learning_rate": 0.0012032220943613348, "loss": 0.6793, "step": 138480 }, { "epoch": 39.84177215189874, "grad_norm": 1.3122899532318115, "learning_rate": 0.0012031645569620254, "loss": 0.5118, "step": 138490 }, { "epoch": 39.844649021864214, "grad_norm": 1.6611912250518799, "learning_rate": 0.001203107019562716, "loss": 0.6293, "step": 138500 }, { "epoch": 39.84752589182969, "grad_norm": 1.241681456565857, "learning_rate": 0.001203049482163406, "loss": 0.6178, "step": 138510 }, { "epoch": 39.850402761795166, "grad_norm": 0.7698656916618347, "learning_rate": 0.0012029919447640966, "loss": 0.6126, "step": 138520 }, { "epoch": 39.85327963176064, "grad_norm": 1.4599313735961914, "learning_rate": 0.0012029344073647872, "loss": 0.6635, "step": 138530 }, { "epoch": 39.85615650172612, "grad_norm": 0.8793261647224426, "learning_rate": 0.0012028768699654776, "loss": 0.8127, "step": 138540 }, { "epoch": 39.8590333716916, "grad_norm": 1.2335284948349, "learning_rate": 0.0012028193325661681, "loss": 0.5751, "step": 138550 }, { "epoch": 39.86191024165708, "grad_norm": 2.492412567138672, "learning_rate": 0.0012027617951668587, "loss": 0.58, "step": 138560 }, { "epoch": 39.864787111622555, "grad_norm": 1.1013520956039429, "learning_rate": 0.0012027042577675488, "loss": 0.6209, "step": 138570 }, { "epoch": 39.86766398158803, "grad_norm": 1.049130916595459, "learning_rate": 0.0012026467203682394, "loss": 0.518, "step": 138580 }, { "epoch": 39.87054085155351, "grad_norm": 0.9742609262466431, "learning_rate": 0.00120258918296893, "loss": 0.6403, "step": 138590 }, { "epoch": 39.87341772151899, "grad_norm": 1.9763615131378174, "learning_rate": 0.0012025316455696203, "loss": 0.6958, "step": 138600 }, { "epoch": 39.876294591484466, "grad_norm": 1.47320556640625, "learning_rate": 0.0012024741081703109, "loss": 0.7532, "step": 138610 }, { "epoch": 39.87917146144994, "grad_norm": 1.0210421085357666, "learning_rate": 0.001202416570771001, "loss": 0.5445, "step": 138620 }, { "epoch": 39.88204833141542, "grad_norm": 1.686331033706665, "learning_rate": 0.0012023590333716915, "loss": 0.6984, "step": 138630 }, { "epoch": 39.884925201380895, "grad_norm": 1.198426365852356, "learning_rate": 0.0012023014959723821, "loss": 0.6768, "step": 138640 }, { "epoch": 39.88780207134638, "grad_norm": 0.5801644325256348, "learning_rate": 0.0012022439585730725, "loss": 0.5097, "step": 138650 }, { "epoch": 39.890678941311855, "grad_norm": 1.418565034866333, "learning_rate": 0.001202186421173763, "loss": 0.5361, "step": 138660 }, { "epoch": 39.89355581127733, "grad_norm": 1.9281110763549805, "learning_rate": 0.0012021288837744536, "loss": 0.7177, "step": 138670 }, { "epoch": 39.89643268124281, "grad_norm": 1.208965539932251, "learning_rate": 0.0012020713463751437, "loss": 0.4871, "step": 138680 }, { "epoch": 39.89930955120828, "grad_norm": 0.805628776550293, "learning_rate": 0.0012020138089758343, "loss": 0.5967, "step": 138690 }, { "epoch": 39.90218642117377, "grad_norm": 1.0986732244491577, "learning_rate": 0.0012019562715765248, "loss": 0.5464, "step": 138700 }, { "epoch": 39.90506329113924, "grad_norm": 1.3653455972671509, "learning_rate": 0.0012018987341772152, "loss": 0.5159, "step": 138710 }, { "epoch": 39.90794016110472, "grad_norm": 2.4023795127868652, "learning_rate": 0.0012018411967779058, "loss": 0.7822, "step": 138720 }, { "epoch": 39.910817031070195, "grad_norm": 1.6028724908828735, "learning_rate": 0.001201783659378596, "loss": 0.6729, "step": 138730 }, { "epoch": 39.91369390103567, "grad_norm": 0.7596341371536255, "learning_rate": 0.0012017261219792864, "loss": 0.6964, "step": 138740 }, { "epoch": 39.91657077100115, "grad_norm": 1.1168715953826904, "learning_rate": 0.001201668584579977, "loss": 0.4741, "step": 138750 }, { "epoch": 39.91944764096663, "grad_norm": 1.267073154449463, "learning_rate": 0.0012016110471806674, "loss": 0.5456, "step": 138760 }, { "epoch": 39.92232451093211, "grad_norm": 1.1473946571350098, "learning_rate": 0.001201553509781358, "loss": 0.6206, "step": 138770 }, { "epoch": 39.92520138089758, "grad_norm": 1.2588355541229248, "learning_rate": 0.0012014959723820485, "loss": 0.6924, "step": 138780 }, { "epoch": 39.92807825086306, "grad_norm": 2.861368179321289, "learning_rate": 0.0012014384349827388, "loss": 0.5673, "step": 138790 }, { "epoch": 39.930955120828536, "grad_norm": 1.0766955614089966, "learning_rate": 0.0012013808975834292, "loss": 0.6422, "step": 138800 }, { "epoch": 39.93383199079402, "grad_norm": 1.594594120979309, "learning_rate": 0.0012013233601841197, "loss": 0.6113, "step": 138810 }, { "epoch": 39.936708860759495, "grad_norm": 0.7892242074012756, "learning_rate": 0.00120126582278481, "loss": 0.6078, "step": 138820 }, { "epoch": 39.93958573072497, "grad_norm": 1.1311461925506592, "learning_rate": 0.0012012082853855007, "loss": 0.6251, "step": 138830 }, { "epoch": 39.94246260069045, "grad_norm": 2.5175132751464844, "learning_rate": 0.001201150747986191, "loss": 0.5993, "step": 138840 }, { "epoch": 39.945339470655924, "grad_norm": 1.2862271070480347, "learning_rate": 0.0012010932105868816, "loss": 0.5213, "step": 138850 }, { "epoch": 39.94821634062141, "grad_norm": 1.5520083904266357, "learning_rate": 0.001201035673187572, "loss": 0.4883, "step": 138860 }, { "epoch": 39.95109321058688, "grad_norm": 1.8711613416671753, "learning_rate": 0.0012009781357882623, "loss": 0.5722, "step": 138870 }, { "epoch": 39.95397008055236, "grad_norm": 1.6181915998458862, "learning_rate": 0.0012009205983889528, "loss": 0.6814, "step": 138880 }, { "epoch": 39.956846950517836, "grad_norm": 1.12531578540802, "learning_rate": 0.0012008630609896434, "loss": 0.5807, "step": 138890 }, { "epoch": 39.95972382048331, "grad_norm": 1.501779556274414, "learning_rate": 0.0012008055235903337, "loss": 0.7112, "step": 138900 }, { "epoch": 39.962600690448795, "grad_norm": 0.9372612237930298, "learning_rate": 0.0012007479861910243, "loss": 0.5965, "step": 138910 }, { "epoch": 39.96547756041427, "grad_norm": 0.9205166101455688, "learning_rate": 0.0012006904487917146, "loss": 0.6438, "step": 138920 }, { "epoch": 39.96835443037975, "grad_norm": 1.9613630771636963, "learning_rate": 0.001200632911392405, "loss": 0.7825, "step": 138930 }, { "epoch": 39.971231300345224, "grad_norm": 0.867321789264679, "learning_rate": 0.0012005753739930956, "loss": 0.6677, "step": 138940 }, { "epoch": 39.9741081703107, "grad_norm": 0.873665452003479, "learning_rate": 0.001200517836593786, "loss": 0.6965, "step": 138950 }, { "epoch": 39.976985040276176, "grad_norm": 1.0648164749145508, "learning_rate": 0.0012004602991944765, "loss": 0.6304, "step": 138960 }, { "epoch": 39.97986191024166, "grad_norm": 2.1960036754608154, "learning_rate": 0.001200402761795167, "loss": 0.653, "step": 138970 }, { "epoch": 39.982738780207136, "grad_norm": 1.4335684776306152, "learning_rate": 0.0012003452243958572, "loss": 0.6121, "step": 138980 }, { "epoch": 39.98561565017261, "grad_norm": 1.3206677436828613, "learning_rate": 0.0012002876869965477, "loss": 0.4961, "step": 138990 }, { "epoch": 39.98849252013809, "grad_norm": 1.3940132856369019, "learning_rate": 0.0012002301495972383, "loss": 0.5703, "step": 139000 }, { "epoch": 39.991369390103564, "grad_norm": 0.9775663614273071, "learning_rate": 0.0012001726121979286, "loss": 0.4823, "step": 139010 }, { "epoch": 39.99424626006905, "grad_norm": 1.8278807401657104, "learning_rate": 0.0012001150747986192, "loss": 0.7039, "step": 139020 }, { "epoch": 39.997123130034524, "grad_norm": 0.6657835841178894, "learning_rate": 0.0012000575373993098, "loss": 0.7128, "step": 139030 }, { "epoch": 40.0, "grad_norm": 0.9849488735198975, "learning_rate": 0.0012, "loss": 0.5628, "step": 139040 }, { "epoch": 40.002876869965476, "grad_norm": 2.1875338554382324, "learning_rate": 0.0011999424626006905, "loss": 0.5454, "step": 139050 }, { "epoch": 40.00575373993095, "grad_norm": 1.4411808252334595, "learning_rate": 0.0011998849252013808, "loss": 0.57, "step": 139060 }, { "epoch": 40.008630609896436, "grad_norm": 1.5011448860168457, "learning_rate": 0.0011998273878020714, "loss": 0.5681, "step": 139070 }, { "epoch": 40.01150747986191, "grad_norm": 1.4096418619155884, "learning_rate": 0.001199769850402762, "loss": 0.5309, "step": 139080 }, { "epoch": 40.01438434982739, "grad_norm": 0.9371535778045654, "learning_rate": 0.0011997123130034523, "loss": 0.5697, "step": 139090 }, { "epoch": 40.017261219792864, "grad_norm": 0.8063069581985474, "learning_rate": 0.0011996547756041426, "loss": 0.6043, "step": 139100 }, { "epoch": 40.02013808975834, "grad_norm": 1.475304365158081, "learning_rate": 0.0011995972382048332, "loss": 0.7516, "step": 139110 }, { "epoch": 40.023014959723824, "grad_norm": 1.9351292848587036, "learning_rate": 0.0011995397008055235, "loss": 0.5692, "step": 139120 }, { "epoch": 40.0258918296893, "grad_norm": 1.2686837911605835, "learning_rate": 0.001199482163406214, "loss": 0.5468, "step": 139130 }, { "epoch": 40.028768699654776, "grad_norm": 1.3393553495407104, "learning_rate": 0.0011994246260069047, "loss": 0.5866, "step": 139140 }, { "epoch": 40.03164556962025, "grad_norm": 1.5401599407196045, "learning_rate": 0.001199367088607595, "loss": 0.6804, "step": 139150 }, { "epoch": 40.03452243958573, "grad_norm": 1.8443987369537354, "learning_rate": 0.0011993095512082854, "loss": 0.5764, "step": 139160 }, { "epoch": 40.037399309551205, "grad_norm": 1.0240682363510132, "learning_rate": 0.0011992520138089757, "loss": 0.6567, "step": 139170 }, { "epoch": 40.04027617951669, "grad_norm": 1.1527999639511108, "learning_rate": 0.0011991944764096663, "loss": 0.5946, "step": 139180 }, { "epoch": 40.043153049482164, "grad_norm": 0.8864424824714661, "learning_rate": 0.0011991369390103568, "loss": 0.5092, "step": 139190 }, { "epoch": 40.04602991944764, "grad_norm": 1.8814488649368286, "learning_rate": 0.0011990794016110472, "loss": 0.6593, "step": 139200 }, { "epoch": 40.04890678941312, "grad_norm": 1.0552561283111572, "learning_rate": 0.0011990218642117377, "loss": 0.5455, "step": 139210 }, { "epoch": 40.05178365937859, "grad_norm": 1.710195541381836, "learning_rate": 0.001198964326812428, "loss": 0.4467, "step": 139220 }, { "epoch": 40.054660529344076, "grad_norm": 1.0014113187789917, "learning_rate": 0.0011989067894131184, "loss": 0.4457, "step": 139230 }, { "epoch": 40.05753739930955, "grad_norm": 1.2903802394866943, "learning_rate": 0.001198849252013809, "loss": 0.5017, "step": 139240 }, { "epoch": 40.06041426927503, "grad_norm": 1.5483042001724243, "learning_rate": 0.0011987917146144996, "loss": 0.5098, "step": 139250 }, { "epoch": 40.063291139240505, "grad_norm": 1.49233877658844, "learning_rate": 0.00119873417721519, "loss": 0.5009, "step": 139260 }, { "epoch": 40.06616800920598, "grad_norm": 0.9814323782920837, "learning_rate": 0.0011986766398158805, "loss": 0.5398, "step": 139270 }, { "epoch": 40.069044879171464, "grad_norm": 3.066525459289551, "learning_rate": 0.0011986191024165708, "loss": 0.7279, "step": 139280 }, { "epoch": 40.07192174913694, "grad_norm": 1.2993427515029907, "learning_rate": 0.0011985615650172612, "loss": 0.4612, "step": 139290 }, { "epoch": 40.07479861910242, "grad_norm": 0.8700226545333862, "learning_rate": 0.0011985040276179517, "loss": 0.7024, "step": 139300 }, { "epoch": 40.07767548906789, "grad_norm": 0.8710877299308777, "learning_rate": 0.001198446490218642, "loss": 0.4845, "step": 139310 }, { "epoch": 40.08055235903337, "grad_norm": 0.7798520922660828, "learning_rate": 0.0011983889528193326, "loss": 0.5748, "step": 139320 }, { "epoch": 40.08342922899885, "grad_norm": 1.3732234239578247, "learning_rate": 0.0011983314154200232, "loss": 0.5037, "step": 139330 }, { "epoch": 40.08630609896433, "grad_norm": 0.8158047199249268, "learning_rate": 0.0011982738780207133, "loss": 0.5573, "step": 139340 }, { "epoch": 40.089182968929805, "grad_norm": 2.0127289295196533, "learning_rate": 0.001198216340621404, "loss": 0.4931, "step": 139350 }, { "epoch": 40.09205983889528, "grad_norm": 0.7458972930908203, "learning_rate": 0.0011981588032220945, "loss": 0.5439, "step": 139360 }, { "epoch": 40.09493670886076, "grad_norm": 1.362380862236023, "learning_rate": 0.0011981012658227848, "loss": 0.5139, "step": 139370 }, { "epoch": 40.09781357882623, "grad_norm": 1.7357879877090454, "learning_rate": 0.0011980437284234754, "loss": 0.5701, "step": 139380 }, { "epoch": 40.10069044879172, "grad_norm": 1.3217767477035522, "learning_rate": 0.001197986191024166, "loss": 0.5334, "step": 139390 }, { "epoch": 40.10356731875719, "grad_norm": 1.1948347091674805, "learning_rate": 0.001197928653624856, "loss": 0.5797, "step": 139400 }, { "epoch": 40.10644418872267, "grad_norm": 0.7679895758628845, "learning_rate": 0.0011978711162255466, "loss": 0.5904, "step": 139410 }, { "epoch": 40.109321058688145, "grad_norm": 1.4937959909439087, "learning_rate": 0.001197813578826237, "loss": 0.5171, "step": 139420 }, { "epoch": 40.11219792865362, "grad_norm": 1.9007267951965332, "learning_rate": 0.0011977560414269276, "loss": 0.6244, "step": 139430 }, { "epoch": 40.115074798619105, "grad_norm": 1.187967300415039, "learning_rate": 0.0011976985040276181, "loss": 0.5951, "step": 139440 }, { "epoch": 40.11795166858458, "grad_norm": 1.0579043626785278, "learning_rate": 0.0011976409666283082, "loss": 0.4858, "step": 139450 }, { "epoch": 40.12082853855006, "grad_norm": 1.4420207738876343, "learning_rate": 0.0011975834292289988, "loss": 0.5963, "step": 139460 }, { "epoch": 40.123705408515534, "grad_norm": 1.6048129796981812, "learning_rate": 0.0011975258918296894, "loss": 0.5898, "step": 139470 }, { "epoch": 40.12658227848101, "grad_norm": 2.5269999504089355, "learning_rate": 0.0011974683544303797, "loss": 0.5812, "step": 139480 }, { "epoch": 40.12945914844649, "grad_norm": 1.2779210805892944, "learning_rate": 0.0011974108170310703, "loss": 0.6194, "step": 139490 }, { "epoch": 40.13233601841197, "grad_norm": 0.7346370220184326, "learning_rate": 0.0011973532796317608, "loss": 0.6073, "step": 139500 }, { "epoch": 40.135212888377445, "grad_norm": 0.7016090750694275, "learning_rate": 0.001197295742232451, "loss": 0.6015, "step": 139510 }, { "epoch": 40.13808975834292, "grad_norm": 1.5047460794448853, "learning_rate": 0.0011972382048331415, "loss": 0.6249, "step": 139520 }, { "epoch": 40.1409666283084, "grad_norm": 1.3866305351257324, "learning_rate": 0.0011971806674338319, "loss": 0.6304, "step": 139530 }, { "epoch": 40.14384349827388, "grad_norm": 0.6418248414993286, "learning_rate": 0.0011971231300345225, "loss": 0.5534, "step": 139540 }, { "epoch": 40.14672036823936, "grad_norm": 0.9458606839179993, "learning_rate": 0.001197065592635213, "loss": 0.5501, "step": 139550 }, { "epoch": 40.149597238204834, "grad_norm": 1.3510853052139282, "learning_rate": 0.0011970080552359034, "loss": 0.5685, "step": 139560 }, { "epoch": 40.15247410817031, "grad_norm": 0.9162773489952087, "learning_rate": 0.0011969505178365937, "loss": 0.5901, "step": 139570 }, { "epoch": 40.155350978135786, "grad_norm": 0.9969032406806946, "learning_rate": 0.0011968929804372843, "loss": 0.6194, "step": 139580 }, { "epoch": 40.15822784810127, "grad_norm": 1.284252405166626, "learning_rate": 0.0011968354430379746, "loss": 0.704, "step": 139590 }, { "epoch": 40.161104718066746, "grad_norm": 1.2624170780181885, "learning_rate": 0.0011967779056386652, "loss": 0.6568, "step": 139600 }, { "epoch": 40.16398158803222, "grad_norm": 1.485034704208374, "learning_rate": 0.0011967203682393558, "loss": 0.515, "step": 139610 }, { "epoch": 40.1668584579977, "grad_norm": 0.7822240591049194, "learning_rate": 0.001196662830840046, "loss": 0.4403, "step": 139620 }, { "epoch": 40.169735327963174, "grad_norm": 0.8499975204467773, "learning_rate": 0.0011966052934407364, "loss": 0.6238, "step": 139630 }, { "epoch": 40.17261219792865, "grad_norm": 1.6105928421020508, "learning_rate": 0.0011965477560414268, "loss": 0.6395, "step": 139640 }, { "epoch": 40.175489067894134, "grad_norm": 0.7415489554405212, "learning_rate": 0.0011964902186421174, "loss": 0.6757, "step": 139650 }, { "epoch": 40.17836593785961, "grad_norm": 1.9182348251342773, "learning_rate": 0.001196432681242808, "loss": 0.6625, "step": 139660 }, { "epoch": 40.181242807825086, "grad_norm": 1.3813762664794922, "learning_rate": 0.0011963751438434983, "loss": 0.5015, "step": 139670 }, { "epoch": 40.18411967779056, "grad_norm": 1.4766651391983032, "learning_rate": 0.0011963176064441888, "loss": 0.5526, "step": 139680 }, { "epoch": 40.18699654775604, "grad_norm": 1.4001215696334839, "learning_rate": 0.0011962600690448792, "loss": 0.6188, "step": 139690 }, { "epoch": 40.18987341772152, "grad_norm": 0.6989181637763977, "learning_rate": 0.0011962025316455695, "loss": 0.7188, "step": 139700 }, { "epoch": 40.192750287687, "grad_norm": 0.8052700757980347, "learning_rate": 0.00119614499424626, "loss": 0.5177, "step": 139710 }, { "epoch": 40.195627157652474, "grad_norm": 0.9608855247497559, "learning_rate": 0.0011960874568469507, "loss": 0.5244, "step": 139720 }, { "epoch": 40.19850402761795, "grad_norm": 1.0172334909439087, "learning_rate": 0.001196029919447641, "loss": 0.5702, "step": 139730 }, { "epoch": 40.20138089758343, "grad_norm": 1.7115603685379028, "learning_rate": 0.0011959723820483316, "loss": 0.6798, "step": 139740 }, { "epoch": 40.20425776754891, "grad_norm": 1.3289963006973267, "learning_rate": 0.0011959148446490217, "loss": 0.6351, "step": 139750 }, { "epoch": 40.207134637514386, "grad_norm": 1.7651677131652832, "learning_rate": 0.0011958573072497123, "loss": 0.6221, "step": 139760 }, { "epoch": 40.21001150747986, "grad_norm": 0.6273890733718872, "learning_rate": 0.0011957997698504028, "loss": 0.5166, "step": 139770 }, { "epoch": 40.21288837744534, "grad_norm": 0.8848966360092163, "learning_rate": 0.0011957422324510932, "loss": 0.6175, "step": 139780 }, { "epoch": 40.215765247410815, "grad_norm": 1.2332686185836792, "learning_rate": 0.0011956846950517837, "loss": 0.6028, "step": 139790 }, { "epoch": 40.2186421173763, "grad_norm": 1.121983528137207, "learning_rate": 0.0011956271576524743, "loss": 0.5233, "step": 139800 }, { "epoch": 40.221518987341774, "grad_norm": 1.8256975412368774, "learning_rate": 0.0011955696202531644, "loss": 0.639, "step": 139810 }, { "epoch": 40.22439585730725, "grad_norm": 0.865436315536499, "learning_rate": 0.001195512082853855, "loss": 0.5882, "step": 139820 }, { "epoch": 40.22727272727273, "grad_norm": 0.9732932448387146, "learning_rate": 0.0011954545454545456, "loss": 0.5978, "step": 139830 }, { "epoch": 40.2301495972382, "grad_norm": 1.6272740364074707, "learning_rate": 0.001195397008055236, "loss": 0.6658, "step": 139840 }, { "epoch": 40.23302646720368, "grad_norm": 1.9780646562576294, "learning_rate": 0.0011953394706559265, "loss": 0.6599, "step": 139850 }, { "epoch": 40.23590333716916, "grad_norm": 1.0805087089538574, "learning_rate": 0.0011952819332566168, "loss": 0.6444, "step": 139860 }, { "epoch": 40.23878020713464, "grad_norm": 1.551204800605774, "learning_rate": 0.0011952243958573072, "loss": 0.5523, "step": 139870 }, { "epoch": 40.241657077100115, "grad_norm": 0.8004522323608398, "learning_rate": 0.0011951668584579977, "loss": 0.5895, "step": 139880 }, { "epoch": 40.24453394706559, "grad_norm": 1.0463366508483887, "learning_rate": 0.001195109321058688, "loss": 0.5831, "step": 139890 }, { "epoch": 40.24741081703107, "grad_norm": 1.1409295797348022, "learning_rate": 0.0011950517836593786, "loss": 0.7195, "step": 139900 }, { "epoch": 40.25028768699655, "grad_norm": 1.527730107307434, "learning_rate": 0.0011949942462600692, "loss": 0.5853, "step": 139910 }, { "epoch": 40.25316455696203, "grad_norm": 1.4027053117752075, "learning_rate": 0.0011949367088607595, "loss": 0.6593, "step": 139920 }, { "epoch": 40.2560414269275, "grad_norm": 1.560994267463684, "learning_rate": 0.00119487917146145, "loss": 0.5837, "step": 139930 }, { "epoch": 40.25891829689298, "grad_norm": 1.132472038269043, "learning_rate": 0.0011948216340621405, "loss": 0.6294, "step": 139940 }, { "epoch": 40.261795166858455, "grad_norm": 0.8141928911209106, "learning_rate": 0.0011947640966628308, "loss": 0.5313, "step": 139950 }, { "epoch": 40.26467203682394, "grad_norm": 1.1970276832580566, "learning_rate": 0.0011947065592635214, "loss": 0.6878, "step": 139960 }, { "epoch": 40.267548906789415, "grad_norm": 1.6445860862731934, "learning_rate": 0.001194649021864212, "loss": 0.6456, "step": 139970 }, { "epoch": 40.27042577675489, "grad_norm": 2.230426549911499, "learning_rate": 0.0011945914844649023, "loss": 0.6497, "step": 139980 }, { "epoch": 40.27330264672037, "grad_norm": 2.9231765270233154, "learning_rate": 0.0011945339470655926, "loss": 0.4378, "step": 139990 }, { "epoch": 40.27617951668584, "grad_norm": 1.0570701360702515, "learning_rate": 0.001194476409666283, "loss": 0.598, "step": 140000 }, { "epoch": 40.27905638665133, "grad_norm": 1.2995104789733887, "learning_rate": 0.0011944188722669735, "loss": 0.6875, "step": 140010 }, { "epoch": 40.2819332566168, "grad_norm": 0.9498854279518127, "learning_rate": 0.001194361334867664, "loss": 0.6307, "step": 140020 }, { "epoch": 40.28481012658228, "grad_norm": 0.9968244433403015, "learning_rate": 0.0011943037974683544, "loss": 0.6373, "step": 140030 }, { "epoch": 40.287686996547755, "grad_norm": 2.119365692138672, "learning_rate": 0.001194246260069045, "loss": 0.6661, "step": 140040 }, { "epoch": 40.29056386651323, "grad_norm": 1.6030187606811523, "learning_rate": 0.0011941887226697354, "loss": 0.598, "step": 140050 }, { "epoch": 40.29344073647871, "grad_norm": 1.6666151285171509, "learning_rate": 0.0011941311852704257, "loss": 0.4919, "step": 140060 }, { "epoch": 40.29631760644419, "grad_norm": 0.9952103495597839, "learning_rate": 0.0011940736478711163, "loss": 0.5548, "step": 140070 }, { "epoch": 40.29919447640967, "grad_norm": 0.9899098873138428, "learning_rate": 0.0011940161104718068, "loss": 0.68, "step": 140080 }, { "epoch": 40.30207134637514, "grad_norm": 1.5696454048156738, "learning_rate": 0.0011939585730724972, "loss": 0.5645, "step": 140090 }, { "epoch": 40.30494821634062, "grad_norm": 1.254586935043335, "learning_rate": 0.0011939010356731877, "loss": 0.6342, "step": 140100 }, { "epoch": 40.307825086306096, "grad_norm": 1.176574468612671, "learning_rate": 0.0011938434982738779, "loss": 0.6255, "step": 140110 }, { "epoch": 40.31070195627158, "grad_norm": 1.239898920059204, "learning_rate": 0.0011937859608745684, "loss": 0.5519, "step": 140120 }, { "epoch": 40.313578826237055, "grad_norm": 1.695677399635315, "learning_rate": 0.001193728423475259, "loss": 0.6435, "step": 140130 }, { "epoch": 40.31645569620253, "grad_norm": 1.4909018278121948, "learning_rate": 0.0011936708860759493, "loss": 0.3943, "step": 140140 }, { "epoch": 40.31933256616801, "grad_norm": 1.1968162059783936, "learning_rate": 0.00119361334867664, "loss": 0.398, "step": 140150 }, { "epoch": 40.322209436133484, "grad_norm": 0.881652295589447, "learning_rate": 0.0011935558112773305, "loss": 0.7621, "step": 140160 }, { "epoch": 40.32508630609897, "grad_norm": 0.885290265083313, "learning_rate": 0.0011934982738780206, "loss": 0.5437, "step": 140170 }, { "epoch": 40.32796317606444, "grad_norm": 1.900551199913025, "learning_rate": 0.0011934407364787112, "loss": 0.568, "step": 140180 }, { "epoch": 40.33084004602992, "grad_norm": 1.8994883298873901, "learning_rate": 0.0011933831990794017, "loss": 0.6672, "step": 140190 }, { "epoch": 40.333716915995396, "grad_norm": 0.9167900085449219, "learning_rate": 0.001193325661680092, "loss": 0.5424, "step": 140200 }, { "epoch": 40.33659378596087, "grad_norm": 1.5363231897354126, "learning_rate": 0.0011932681242807826, "loss": 0.4947, "step": 140210 }, { "epoch": 40.339470655926355, "grad_norm": 1.2490695714950562, "learning_rate": 0.0011932105868814728, "loss": 0.6659, "step": 140220 }, { "epoch": 40.34234752589183, "grad_norm": 1.8188986778259277, "learning_rate": 0.0011931530494821633, "loss": 0.5796, "step": 140230 }, { "epoch": 40.34522439585731, "grad_norm": 1.4554975032806396, "learning_rate": 0.001193095512082854, "loss": 0.5961, "step": 140240 }, { "epoch": 40.348101265822784, "grad_norm": 1.6214845180511475, "learning_rate": 0.0011930379746835443, "loss": 0.5133, "step": 140250 }, { "epoch": 40.35097813578826, "grad_norm": 1.1152945756912231, "learning_rate": 0.0011929804372842348, "loss": 0.5995, "step": 140260 }, { "epoch": 40.353855005753736, "grad_norm": 1.0516706705093384, "learning_rate": 0.0011929228998849254, "loss": 0.6102, "step": 140270 }, { "epoch": 40.35673187571922, "grad_norm": 1.421968698501587, "learning_rate": 0.0011928653624856155, "loss": 0.5548, "step": 140280 }, { "epoch": 40.359608745684696, "grad_norm": 0.8439323902130127, "learning_rate": 0.001192807825086306, "loss": 0.5445, "step": 140290 }, { "epoch": 40.36248561565017, "grad_norm": 1.5421695709228516, "learning_rate": 0.0011927502876869966, "loss": 0.567, "step": 140300 }, { "epoch": 40.36536248561565, "grad_norm": 0.707302451133728, "learning_rate": 0.001192692750287687, "loss": 0.5422, "step": 140310 }, { "epoch": 40.368239355581125, "grad_norm": 1.508608341217041, "learning_rate": 0.0011926352128883775, "loss": 0.5346, "step": 140320 }, { "epoch": 40.37111622554661, "grad_norm": 0.8616765737533569, "learning_rate": 0.001192577675489068, "loss": 0.5058, "step": 140330 }, { "epoch": 40.373993095512084, "grad_norm": 1.5363539457321167, "learning_rate": 0.0011925201380897582, "loss": 0.4848, "step": 140340 }, { "epoch": 40.37686996547756, "grad_norm": 2.241422176361084, "learning_rate": 0.0011924626006904488, "loss": 0.677, "step": 140350 }, { "epoch": 40.379746835443036, "grad_norm": 1.3994455337524414, "learning_rate": 0.0011924050632911392, "loss": 0.6632, "step": 140360 }, { "epoch": 40.38262370540851, "grad_norm": 1.7653224468231201, "learning_rate": 0.0011923475258918297, "loss": 0.5132, "step": 140370 }, { "epoch": 40.385500575373996, "grad_norm": 0.7810588479042053, "learning_rate": 0.0011922899884925203, "loss": 0.5317, "step": 140380 }, { "epoch": 40.38837744533947, "grad_norm": 0.956531822681427, "learning_rate": 0.0011922324510932106, "loss": 0.657, "step": 140390 }, { "epoch": 40.39125431530495, "grad_norm": 1.20218825340271, "learning_rate": 0.001192174913693901, "loss": 0.6207, "step": 140400 }, { "epoch": 40.394131185270425, "grad_norm": 1.322658658027649, "learning_rate": 0.0011921173762945915, "loss": 0.5792, "step": 140410 }, { "epoch": 40.3970080552359, "grad_norm": 1.3755112886428833, "learning_rate": 0.0011920598388952819, "loss": 0.5574, "step": 140420 }, { "epoch": 40.399884925201384, "grad_norm": 1.85406494140625, "learning_rate": 0.0011920023014959725, "loss": 0.5128, "step": 140430 }, { "epoch": 40.40276179516686, "grad_norm": 0.8405399322509766, "learning_rate": 0.0011919447640966628, "loss": 0.4748, "step": 140440 }, { "epoch": 40.40563866513234, "grad_norm": 1.1863211393356323, "learning_rate": 0.0011918872266973534, "loss": 0.5433, "step": 140450 }, { "epoch": 40.40851553509781, "grad_norm": 1.4174585342407227, "learning_rate": 0.0011918296892980437, "loss": 0.6654, "step": 140460 }, { "epoch": 40.41139240506329, "grad_norm": 1.8240617513656616, "learning_rate": 0.001191772151898734, "loss": 0.6363, "step": 140470 }, { "epoch": 40.41426927502877, "grad_norm": 1.1583821773529053, "learning_rate": 0.0011917146144994246, "loss": 0.6275, "step": 140480 }, { "epoch": 40.41714614499425, "grad_norm": 1.9769841432571411, "learning_rate": 0.0011916570771001152, "loss": 0.6496, "step": 140490 }, { "epoch": 40.420023014959725, "grad_norm": 1.6187382936477661, "learning_rate": 0.0011915995397008055, "loss": 0.5304, "step": 140500 }, { "epoch": 40.4228998849252, "grad_norm": 1.4580950736999512, "learning_rate": 0.001191542002301496, "loss": 0.5771, "step": 140510 }, { "epoch": 40.42577675489068, "grad_norm": 1.2549272775650024, "learning_rate": 0.0011914844649021864, "loss": 0.5545, "step": 140520 }, { "epoch": 40.42865362485615, "grad_norm": 1.204360008239746, "learning_rate": 0.0011914269275028768, "loss": 0.6214, "step": 140530 }, { "epoch": 40.43153049482164, "grad_norm": 1.0718052387237549, "learning_rate": 0.0011913693901035674, "loss": 0.4973, "step": 140540 }, { "epoch": 40.43440736478711, "grad_norm": 1.7012519836425781, "learning_rate": 0.0011913118527042577, "loss": 0.7781, "step": 140550 }, { "epoch": 40.43728423475259, "grad_norm": 1.5785719156265259, "learning_rate": 0.0011912543153049483, "loss": 0.6976, "step": 140560 }, { "epoch": 40.440161104718065, "grad_norm": 0.9427841901779175, "learning_rate": 0.0011911967779056388, "loss": 0.4673, "step": 140570 }, { "epoch": 40.44303797468354, "grad_norm": 1.3432406187057495, "learning_rate": 0.001191139240506329, "loss": 0.5789, "step": 140580 }, { "epoch": 40.445914844649025, "grad_norm": 1.2510124444961548, "learning_rate": 0.0011910817031070195, "loss": 0.646, "step": 140590 }, { "epoch": 40.4487917146145, "grad_norm": 1.3548994064331055, "learning_rate": 0.00119102416570771, "loss": 0.5618, "step": 140600 }, { "epoch": 40.45166858457998, "grad_norm": 1.1787760257720947, "learning_rate": 0.0011909666283084004, "loss": 0.5428, "step": 140610 }, { "epoch": 40.45454545454545, "grad_norm": 0.8021193146705627, "learning_rate": 0.001190909090909091, "loss": 0.5615, "step": 140620 }, { "epoch": 40.45742232451093, "grad_norm": 1.0175443887710571, "learning_rate": 0.0011908515535097816, "loss": 0.5437, "step": 140630 }, { "epoch": 40.46029919447641, "grad_norm": 1.1165480613708496, "learning_rate": 0.0011907940161104717, "loss": 0.6034, "step": 140640 }, { "epoch": 40.46317606444189, "grad_norm": 0.7386147975921631, "learning_rate": 0.0011907364787111623, "loss": 0.579, "step": 140650 }, { "epoch": 40.466052934407365, "grad_norm": 0.7341903448104858, "learning_rate": 0.0011906789413118528, "loss": 0.5189, "step": 140660 }, { "epoch": 40.46892980437284, "grad_norm": 1.684162974357605, "learning_rate": 0.0011906214039125432, "loss": 0.7512, "step": 140670 }, { "epoch": 40.47180667433832, "grad_norm": 1.4519342184066772, "learning_rate": 0.0011905638665132337, "loss": 0.6255, "step": 140680 }, { "epoch": 40.4746835443038, "grad_norm": 1.1217608451843262, "learning_rate": 0.001190506329113924, "loss": 0.5532, "step": 140690 }, { "epoch": 40.47756041426928, "grad_norm": 0.9818419218063354, "learning_rate": 0.0011904487917146144, "loss": 0.6198, "step": 140700 }, { "epoch": 40.48043728423475, "grad_norm": 1.5857164859771729, "learning_rate": 0.001190391254315305, "loss": 0.621, "step": 140710 }, { "epoch": 40.48331415420023, "grad_norm": 1.8297797441482544, "learning_rate": 0.0011903337169159953, "loss": 0.6709, "step": 140720 }, { "epoch": 40.486191024165706, "grad_norm": 0.8260520100593567, "learning_rate": 0.001190276179516686, "loss": 0.4385, "step": 140730 }, { "epoch": 40.48906789413118, "grad_norm": 1.075793981552124, "learning_rate": 0.0011902186421173765, "loss": 0.6765, "step": 140740 }, { "epoch": 40.491944764096665, "grad_norm": 1.6370407342910767, "learning_rate": 0.0011901611047180668, "loss": 0.4382, "step": 140750 }, { "epoch": 40.49482163406214, "grad_norm": 2.3548386096954346, "learning_rate": 0.0011901035673187572, "loss": 0.5942, "step": 140760 }, { "epoch": 40.49769850402762, "grad_norm": 1.0101778507232666, "learning_rate": 0.0011900460299194477, "loss": 0.7185, "step": 140770 }, { "epoch": 40.500575373993094, "grad_norm": 1.587553858757019, "learning_rate": 0.001189988492520138, "loss": 0.5782, "step": 140780 }, { "epoch": 40.50345224395857, "grad_norm": 1.400429129600525, "learning_rate": 0.0011899309551208286, "loss": 0.8611, "step": 140790 }, { "epoch": 40.50632911392405, "grad_norm": 1.4044115543365479, "learning_rate": 0.001189873417721519, "loss": 0.545, "step": 140800 }, { "epoch": 40.50920598388953, "grad_norm": 1.7500299215316772, "learning_rate": 0.0011898158803222095, "loss": 0.7269, "step": 140810 }, { "epoch": 40.512082853855006, "grad_norm": 1.8587167263031006, "learning_rate": 0.0011897583429228999, "loss": 0.952, "step": 140820 }, { "epoch": 40.51495972382048, "grad_norm": 1.4160314798355103, "learning_rate": 0.0011897008055235902, "loss": 0.6683, "step": 140830 }, { "epoch": 40.51783659378596, "grad_norm": 1.1933528184890747, "learning_rate": 0.0011896432681242808, "loss": 0.6612, "step": 140840 }, { "epoch": 40.52071346375144, "grad_norm": 0.9024684429168701, "learning_rate": 0.0011895857307249714, "loss": 0.5095, "step": 140850 }, { "epoch": 40.52359033371692, "grad_norm": 1.4419928789138794, "learning_rate": 0.0011895281933256617, "loss": 0.7467, "step": 140860 }, { "epoch": 40.526467203682394, "grad_norm": 1.7358530759811401, "learning_rate": 0.0011894706559263523, "loss": 0.6497, "step": 140870 }, { "epoch": 40.52934407364787, "grad_norm": 0.7702597379684448, "learning_rate": 0.0011894131185270426, "loss": 0.5327, "step": 140880 }, { "epoch": 40.532220943613346, "grad_norm": 1.568326711654663, "learning_rate": 0.001189355581127733, "loss": 0.5313, "step": 140890 }, { "epoch": 40.53509781357883, "grad_norm": 1.6501580476760864, "learning_rate": 0.0011892980437284235, "loss": 0.6259, "step": 140900 }, { "epoch": 40.537974683544306, "grad_norm": 1.3778023719787598, "learning_rate": 0.0011892405063291139, "loss": 0.7651, "step": 140910 }, { "epoch": 40.54085155350978, "grad_norm": 1.7534003257751465, "learning_rate": 0.0011891829689298044, "loss": 0.6464, "step": 140920 }, { "epoch": 40.54372842347526, "grad_norm": 1.0066322088241577, "learning_rate": 0.001189125431530495, "loss": 0.6123, "step": 140930 }, { "epoch": 40.546605293440734, "grad_norm": 1.496830940246582, "learning_rate": 0.0011890678941311851, "loss": 0.6263, "step": 140940 }, { "epoch": 40.54948216340621, "grad_norm": 1.1582417488098145, "learning_rate": 0.0011890103567318757, "loss": 0.7344, "step": 140950 }, { "epoch": 40.552359033371694, "grad_norm": 0.8531109094619751, "learning_rate": 0.0011889528193325663, "loss": 0.5763, "step": 140960 }, { "epoch": 40.55523590333717, "grad_norm": 1.3181626796722412, "learning_rate": 0.0011888952819332566, "loss": 0.6135, "step": 140970 }, { "epoch": 40.558112773302646, "grad_norm": 1.3971165418624878, "learning_rate": 0.0011888377445339472, "loss": 0.5218, "step": 140980 }, { "epoch": 40.56098964326812, "grad_norm": 1.3020565509796143, "learning_rate": 0.0011887802071346377, "loss": 0.4908, "step": 140990 }, { "epoch": 40.5638665132336, "grad_norm": 0.9746589064598083, "learning_rate": 0.0011887226697353279, "loss": 0.5785, "step": 141000 }, { "epoch": 40.56674338319908, "grad_norm": 1.1061201095581055, "learning_rate": 0.0011886651323360184, "loss": 0.6685, "step": 141010 }, { "epoch": 40.56962025316456, "grad_norm": 0.970228374004364, "learning_rate": 0.0011886075949367088, "loss": 0.6925, "step": 141020 }, { "epoch": 40.572497123130034, "grad_norm": 1.8335281610488892, "learning_rate": 0.0011885500575373993, "loss": 0.5739, "step": 141030 }, { "epoch": 40.57537399309551, "grad_norm": 1.6812530755996704, "learning_rate": 0.00118849252013809, "loss": 0.5936, "step": 141040 }, { "epoch": 40.57825086306099, "grad_norm": 2.010467290878296, "learning_rate": 0.00118843498273878, "loss": 0.544, "step": 141050 }, { "epoch": 40.58112773302647, "grad_norm": 2.55256724357605, "learning_rate": 0.0011883774453394706, "loss": 0.7338, "step": 141060 }, { "epoch": 40.584004602991946, "grad_norm": 1.6774667501449585, "learning_rate": 0.0011883199079401612, "loss": 0.6256, "step": 141070 }, { "epoch": 40.58688147295742, "grad_norm": 0.80806964635849, "learning_rate": 0.0011882623705408515, "loss": 0.564, "step": 141080 }, { "epoch": 40.5897583429229, "grad_norm": 2.15010142326355, "learning_rate": 0.001188204833141542, "loss": 0.573, "step": 141090 }, { "epoch": 40.592635212888375, "grad_norm": 1.0122604370117188, "learning_rate": 0.0011881472957422326, "loss": 0.8135, "step": 141100 }, { "epoch": 40.59551208285386, "grad_norm": 0.8812713623046875, "learning_rate": 0.0011880897583429228, "loss": 0.645, "step": 141110 }, { "epoch": 40.598388952819334, "grad_norm": 0.8785222768783569, "learning_rate": 0.0011880322209436133, "loss": 0.5321, "step": 141120 }, { "epoch": 40.60126582278481, "grad_norm": 1.5968728065490723, "learning_rate": 0.0011879746835443037, "loss": 0.6323, "step": 141130 }, { "epoch": 40.60414269275029, "grad_norm": 1.2150179147720337, "learning_rate": 0.0011879171461449942, "loss": 0.4985, "step": 141140 }, { "epoch": 40.60701956271576, "grad_norm": 1.3301620483398438, "learning_rate": 0.0011878596087456848, "loss": 0.5983, "step": 141150 }, { "epoch": 40.60989643268124, "grad_norm": 0.8468219637870789, "learning_rate": 0.0011878020713463752, "loss": 0.7376, "step": 141160 }, { "epoch": 40.61277330264672, "grad_norm": 1.0249767303466797, "learning_rate": 0.0011877445339470655, "loss": 0.569, "step": 141170 }, { "epoch": 40.6156501726122, "grad_norm": 2.1660027503967285, "learning_rate": 0.001187686996547756, "loss": 0.6616, "step": 141180 }, { "epoch": 40.618527042577675, "grad_norm": 1.5488460063934326, "learning_rate": 0.0011876294591484464, "loss": 0.5426, "step": 141190 }, { "epoch": 40.62140391254315, "grad_norm": 0.8857126832008362, "learning_rate": 0.001187571921749137, "loss": 0.519, "step": 141200 }, { "epoch": 40.62428078250863, "grad_norm": 2.510746479034424, "learning_rate": 0.0011875143843498275, "loss": 0.8009, "step": 141210 }, { "epoch": 40.62715765247411, "grad_norm": 0.8261073231697083, "learning_rate": 0.001187456846950518, "loss": 0.6202, "step": 141220 }, { "epoch": 40.63003452243959, "grad_norm": 1.0169793367385864, "learning_rate": 0.0011873993095512082, "loss": 0.5989, "step": 141230 }, { "epoch": 40.63291139240506, "grad_norm": 1.4953418970108032, "learning_rate": 0.0011873417721518988, "loss": 0.5829, "step": 141240 }, { "epoch": 40.63578826237054, "grad_norm": 1.720964789390564, "learning_rate": 0.0011872842347525892, "loss": 0.6692, "step": 141250 }, { "epoch": 40.638665132336016, "grad_norm": 0.9970053434371948, "learning_rate": 0.0011872266973532797, "loss": 0.6589, "step": 141260 }, { "epoch": 40.6415420023015, "grad_norm": 1.8262416124343872, "learning_rate": 0.00118716915995397, "loss": 0.6072, "step": 141270 }, { "epoch": 40.644418872266975, "grad_norm": 1.502020239830017, "learning_rate": 0.0011871116225546606, "loss": 0.6571, "step": 141280 }, { "epoch": 40.64729574223245, "grad_norm": 1.6341495513916016, "learning_rate": 0.001187054085155351, "loss": 0.5642, "step": 141290 }, { "epoch": 40.65017261219793, "grad_norm": 0.885936975479126, "learning_rate": 0.0011869965477560413, "loss": 0.5746, "step": 141300 }, { "epoch": 40.653049482163404, "grad_norm": 1.1023695468902588, "learning_rate": 0.0011869390103567319, "loss": 0.4921, "step": 141310 }, { "epoch": 40.65592635212889, "grad_norm": 2.050039768218994, "learning_rate": 0.0011868814729574224, "loss": 0.6066, "step": 141320 }, { "epoch": 40.65880322209436, "grad_norm": 1.1120128631591797, "learning_rate": 0.0011868239355581128, "loss": 0.5183, "step": 141330 }, { "epoch": 40.66168009205984, "grad_norm": 1.5291141271591187, "learning_rate": 0.0011867663981588034, "loss": 0.6466, "step": 141340 }, { "epoch": 40.664556962025316, "grad_norm": 1.1227378845214844, "learning_rate": 0.0011867088607594937, "loss": 0.7658, "step": 141350 }, { "epoch": 40.66743383199079, "grad_norm": 0.7946558594703674, "learning_rate": 0.001186651323360184, "loss": 0.5951, "step": 141360 }, { "epoch": 40.670310701956275, "grad_norm": 1.0857510566711426, "learning_rate": 0.0011865937859608746, "loss": 0.646, "step": 141370 }, { "epoch": 40.67318757192175, "grad_norm": 1.6074706315994263, "learning_rate": 0.001186536248561565, "loss": 0.6717, "step": 141380 }, { "epoch": 40.67606444188723, "grad_norm": 0.8218780755996704, "learning_rate": 0.0011864787111622555, "loss": 0.6506, "step": 141390 }, { "epoch": 40.678941311852704, "grad_norm": 1.7313790321350098, "learning_rate": 0.001186421173762946, "loss": 0.5322, "step": 141400 }, { "epoch": 40.68181818181818, "grad_norm": 1.7246770858764648, "learning_rate": 0.0011863636363636362, "loss": 0.5919, "step": 141410 }, { "epoch": 40.684695051783656, "grad_norm": 1.2320523262023926, "learning_rate": 0.0011863060989643268, "loss": 0.6553, "step": 141420 }, { "epoch": 40.68757192174914, "grad_norm": 1.1018861532211304, "learning_rate": 0.0011862485615650174, "loss": 0.6146, "step": 141430 }, { "epoch": 40.690448791714616, "grad_norm": 1.6662770509719849, "learning_rate": 0.0011861910241657077, "loss": 0.8311, "step": 141440 }, { "epoch": 40.69332566168009, "grad_norm": 0.9637722969055176, "learning_rate": 0.0011861334867663983, "loss": 0.6764, "step": 141450 }, { "epoch": 40.69620253164557, "grad_norm": 0.9427024722099304, "learning_rate": 0.0011860759493670888, "loss": 0.5063, "step": 141460 }, { "epoch": 40.699079401611044, "grad_norm": 1.201014518737793, "learning_rate": 0.001186018411967779, "loss": 0.5828, "step": 141470 }, { "epoch": 40.70195627157653, "grad_norm": 2.1345481872558594, "learning_rate": 0.0011859608745684695, "loss": 0.8086, "step": 141480 }, { "epoch": 40.704833141542004, "grad_norm": 0.6477738618850708, "learning_rate": 0.0011859033371691599, "loss": 0.6313, "step": 141490 }, { "epoch": 40.70771001150748, "grad_norm": 1.4606451988220215, "learning_rate": 0.0011858457997698504, "loss": 0.6835, "step": 141500 }, { "epoch": 40.710586881472956, "grad_norm": 1.4357163906097412, "learning_rate": 0.001185788262370541, "loss": 0.6766, "step": 141510 }, { "epoch": 40.71346375143843, "grad_norm": 1.1975040435791016, "learning_rate": 0.0011857307249712313, "loss": 0.7177, "step": 141520 }, { "epoch": 40.716340621403916, "grad_norm": 0.9100948572158813, "learning_rate": 0.0011856731875719217, "loss": 0.5325, "step": 141530 }, { "epoch": 40.71921749136939, "grad_norm": 2.4074466228485107, "learning_rate": 0.0011856156501726123, "loss": 0.8635, "step": 141540 }, { "epoch": 40.72209436133487, "grad_norm": 2.1666600704193115, "learning_rate": 0.0011855581127733026, "loss": 0.7382, "step": 141550 }, { "epoch": 40.724971231300344, "grad_norm": 1.8985130786895752, "learning_rate": 0.0011855005753739932, "loss": 0.825, "step": 141560 }, { "epoch": 40.72784810126582, "grad_norm": 0.972583532333374, "learning_rate": 0.0011854430379746837, "loss": 0.6171, "step": 141570 }, { "epoch": 40.730724971231304, "grad_norm": 1.5917041301727295, "learning_rate": 0.001185385500575374, "loss": 0.8197, "step": 141580 }, { "epoch": 40.73360184119678, "grad_norm": 1.9039889574050903, "learning_rate": 0.0011853279631760644, "loss": 0.5557, "step": 141590 }, { "epoch": 40.736478711162256, "grad_norm": 1.0842005014419556, "learning_rate": 0.0011852704257767548, "loss": 0.5388, "step": 141600 }, { "epoch": 40.73935558112773, "grad_norm": 1.9500612020492554, "learning_rate": 0.0011852128883774453, "loss": 0.6131, "step": 141610 }, { "epoch": 40.74223245109321, "grad_norm": 2.299234628677368, "learning_rate": 0.001185155350978136, "loss": 0.492, "step": 141620 }, { "epoch": 40.745109321058685, "grad_norm": 1.8415626287460327, "learning_rate": 0.0011850978135788262, "loss": 0.6953, "step": 141630 }, { "epoch": 40.74798619102417, "grad_norm": 1.0382028818130493, "learning_rate": 0.0011850402761795168, "loss": 0.6811, "step": 141640 }, { "epoch": 40.750863060989644, "grad_norm": 1.05538809299469, "learning_rate": 0.0011849827387802072, "loss": 0.6835, "step": 141650 }, { "epoch": 40.75373993095512, "grad_norm": 1.5951075553894043, "learning_rate": 0.0011849252013808975, "loss": 0.6451, "step": 141660 }, { "epoch": 40.7566168009206, "grad_norm": 0.8640345335006714, "learning_rate": 0.001184867663981588, "loss": 0.6904, "step": 141670 }, { "epoch": 40.75949367088607, "grad_norm": 1.3601536750793457, "learning_rate": 0.0011848101265822786, "loss": 0.5969, "step": 141680 }, { "epoch": 40.762370540851556, "grad_norm": 1.5087732076644897, "learning_rate": 0.001184752589182969, "loss": 0.489, "step": 141690 }, { "epoch": 40.76524741081703, "grad_norm": 0.9563970565795898, "learning_rate": 0.0011846950517836595, "loss": 0.6506, "step": 141700 }, { "epoch": 40.76812428078251, "grad_norm": 1.410158395767212, "learning_rate": 0.0011846375143843497, "loss": 0.5809, "step": 141710 }, { "epoch": 40.771001150747985, "grad_norm": 0.9412717819213867, "learning_rate": 0.0011845799769850402, "loss": 0.6696, "step": 141720 }, { "epoch": 40.77387802071346, "grad_norm": 0.9140216112136841, "learning_rate": 0.0011845224395857308, "loss": 0.6209, "step": 141730 }, { "epoch": 40.776754890678944, "grad_norm": 1.6761621236801147, "learning_rate": 0.0011844649021864211, "loss": 0.5833, "step": 141740 }, { "epoch": 40.77963176064442, "grad_norm": 1.4742321968078613, "learning_rate": 0.0011844073647871117, "loss": 0.7003, "step": 141750 }, { "epoch": 40.7825086306099, "grad_norm": 1.0704455375671387, "learning_rate": 0.0011843498273878023, "loss": 0.673, "step": 141760 }, { "epoch": 40.78538550057537, "grad_norm": 0.758004367351532, "learning_rate": 0.0011842922899884924, "loss": 0.6564, "step": 141770 }, { "epoch": 40.78826237054085, "grad_norm": 1.151462197303772, "learning_rate": 0.001184234752589183, "loss": 0.6329, "step": 141780 }, { "epoch": 40.79113924050633, "grad_norm": 1.3067060708999634, "learning_rate": 0.0011841772151898735, "loss": 0.5383, "step": 141790 }, { "epoch": 40.79401611047181, "grad_norm": 2.780923843383789, "learning_rate": 0.0011841196777905639, "loss": 0.6899, "step": 141800 }, { "epoch": 40.796892980437285, "grad_norm": 1.426658272743225, "learning_rate": 0.0011840621403912544, "loss": 0.7412, "step": 141810 }, { "epoch": 40.79976985040276, "grad_norm": 1.0244207382202148, "learning_rate": 0.0011840046029919446, "loss": 0.6398, "step": 141820 }, { "epoch": 40.80264672036824, "grad_norm": 0.8279811143875122, "learning_rate": 0.0011839470655926351, "loss": 0.5224, "step": 141830 }, { "epoch": 40.80552359033371, "grad_norm": 1.2936545610427856, "learning_rate": 0.0011838895281933257, "loss": 0.5088, "step": 141840 }, { "epoch": 40.8084004602992, "grad_norm": 1.4686108827590942, "learning_rate": 0.001183831990794016, "loss": 0.6021, "step": 141850 }, { "epoch": 40.81127733026467, "grad_norm": 1.561507225036621, "learning_rate": 0.0011837744533947066, "loss": 0.511, "step": 141860 }, { "epoch": 40.81415420023015, "grad_norm": 4.190846920013428, "learning_rate": 0.0011837169159953972, "loss": 0.5501, "step": 141870 }, { "epoch": 40.817031070195625, "grad_norm": 2.3058907985687256, "learning_rate": 0.0011836593785960873, "loss": 0.7661, "step": 141880 }, { "epoch": 40.8199079401611, "grad_norm": 1.3270909786224365, "learning_rate": 0.0011836018411967779, "loss": 0.6359, "step": 141890 }, { "epoch": 40.822784810126585, "grad_norm": 1.5054638385772705, "learning_rate": 0.0011835443037974684, "loss": 0.7125, "step": 141900 }, { "epoch": 40.82566168009206, "grad_norm": 0.7298152446746826, "learning_rate": 0.0011834867663981588, "loss": 0.6162, "step": 141910 }, { "epoch": 40.82853855005754, "grad_norm": 1.054345726966858, "learning_rate": 0.0011834292289988493, "loss": 0.5277, "step": 141920 }, { "epoch": 40.83141542002301, "grad_norm": 1.5956225395202637, "learning_rate": 0.00118337169159954, "loss": 0.6386, "step": 141930 }, { "epoch": 40.83429228998849, "grad_norm": 1.1763476133346558, "learning_rate": 0.00118331415420023, "loss": 0.5874, "step": 141940 }, { "epoch": 40.83716915995397, "grad_norm": 1.125558853149414, "learning_rate": 0.0011832566168009206, "loss": 0.589, "step": 141950 }, { "epoch": 40.84004602991945, "grad_norm": 1.078456163406372, "learning_rate": 0.001183199079401611, "loss": 0.5952, "step": 141960 }, { "epoch": 40.842922899884925, "grad_norm": 1.242203712463379, "learning_rate": 0.0011831415420023015, "loss": 0.6873, "step": 141970 }, { "epoch": 40.8457997698504, "grad_norm": 0.9344821572303772, "learning_rate": 0.001183084004602992, "loss": 0.6211, "step": 141980 }, { "epoch": 40.84867663981588, "grad_norm": 1.4281857013702393, "learning_rate": 0.0011830264672036824, "loss": 0.6851, "step": 141990 }, { "epoch": 40.85155350978136, "grad_norm": 0.9164716005325317, "learning_rate": 0.0011829689298043728, "loss": 0.4806, "step": 142000 }, { "epoch": 40.85443037974684, "grad_norm": 1.099633812904358, "learning_rate": 0.0011829113924050633, "loss": 0.56, "step": 142010 }, { "epoch": 40.85730724971231, "grad_norm": 1.1545346975326538, "learning_rate": 0.0011828538550057537, "loss": 0.555, "step": 142020 }, { "epoch": 40.86018411967779, "grad_norm": 0.7397092580795288, "learning_rate": 0.0011827963176064442, "loss": 0.665, "step": 142030 }, { "epoch": 40.863060989643266, "grad_norm": 1.229495882987976, "learning_rate": 0.0011827387802071348, "loss": 0.5616, "step": 142040 }, { "epoch": 40.86593785960875, "grad_norm": 0.9535021185874939, "learning_rate": 0.0011826812428078252, "loss": 0.6185, "step": 142050 }, { "epoch": 40.868814729574225, "grad_norm": 1.323967456817627, "learning_rate": 0.0011826237054085155, "loss": 0.6533, "step": 142060 }, { "epoch": 40.8716915995397, "grad_norm": 0.8920976519584656, "learning_rate": 0.0011825661680092059, "loss": 0.575, "step": 142070 }, { "epoch": 40.87456846950518, "grad_norm": 0.7294211387634277, "learning_rate": 0.0011825086306098964, "loss": 0.6486, "step": 142080 }, { "epoch": 40.877445339470654, "grad_norm": 0.7734864354133606, "learning_rate": 0.001182451093210587, "loss": 0.5016, "step": 142090 }, { "epoch": 40.88032220943613, "grad_norm": 1.624556303024292, "learning_rate": 0.0011823935558112773, "loss": 0.6974, "step": 142100 }, { "epoch": 40.883199079401614, "grad_norm": 1.2239338159561157, "learning_rate": 0.0011823360184119679, "loss": 0.556, "step": 142110 }, { "epoch": 40.88607594936709, "grad_norm": 1.469558835029602, "learning_rate": 0.0011822784810126582, "loss": 0.6156, "step": 142120 }, { "epoch": 40.888952819332566, "grad_norm": 3.889333724975586, "learning_rate": 0.0011822209436133486, "loss": 0.598, "step": 142130 }, { "epoch": 40.89182968929804, "grad_norm": 0.8777658343315125, "learning_rate": 0.0011821634062140391, "loss": 0.6471, "step": 142140 }, { "epoch": 40.89470655926352, "grad_norm": 1.1912164688110352, "learning_rate": 0.0011821058688147297, "loss": 0.5295, "step": 142150 }, { "epoch": 40.897583429229, "grad_norm": 1.2865732908248901, "learning_rate": 0.00118204833141542, "loss": 0.5554, "step": 142160 }, { "epoch": 40.90046029919448, "grad_norm": 1.1921578645706177, "learning_rate": 0.0011819907940161106, "loss": 0.6603, "step": 142170 }, { "epoch": 40.903337169159954, "grad_norm": 0.9349653720855713, "learning_rate": 0.0011819332566168008, "loss": 0.5057, "step": 142180 }, { "epoch": 40.90621403912543, "grad_norm": 1.1573907136917114, "learning_rate": 0.0011818757192174913, "loss": 0.5082, "step": 142190 }, { "epoch": 40.90909090909091, "grad_norm": 2.1117959022521973, "learning_rate": 0.0011818181818181819, "loss": 0.6378, "step": 142200 }, { "epoch": 40.91196777905639, "grad_norm": 0.962717592716217, "learning_rate": 0.0011817606444188722, "loss": 0.7749, "step": 142210 }, { "epoch": 40.914844649021866, "grad_norm": 1.1821224689483643, "learning_rate": 0.0011817031070195628, "loss": 0.59, "step": 142220 }, { "epoch": 40.91772151898734, "grad_norm": 2.9380791187286377, "learning_rate": 0.0011816455696202534, "loss": 0.6331, "step": 142230 }, { "epoch": 40.92059838895282, "grad_norm": 1.2057148218154907, "learning_rate": 0.0011815880322209435, "loss": 0.5284, "step": 142240 }, { "epoch": 40.923475258918295, "grad_norm": 1.5358128547668457, "learning_rate": 0.001181530494821634, "loss": 0.5267, "step": 142250 }, { "epoch": 40.92635212888378, "grad_norm": 1.1787523031234741, "learning_rate": 0.0011814729574223246, "loss": 0.5319, "step": 142260 }, { "epoch": 40.929228998849254, "grad_norm": 1.8015204668045044, "learning_rate": 0.001181415420023015, "loss": 0.5508, "step": 142270 }, { "epoch": 40.93210586881473, "grad_norm": 0.8230338096618652, "learning_rate": 0.0011813578826237055, "loss": 0.717, "step": 142280 }, { "epoch": 40.93498273878021, "grad_norm": 1.2806034088134766, "learning_rate": 0.0011813003452243959, "loss": 0.6658, "step": 142290 }, { "epoch": 40.93785960874568, "grad_norm": 1.102919101715088, "learning_rate": 0.0011812428078250862, "loss": 0.618, "step": 142300 }, { "epoch": 40.94073647871116, "grad_norm": 1.7148175239562988, "learning_rate": 0.0011811852704257768, "loss": 0.5908, "step": 142310 }, { "epoch": 40.94361334867664, "grad_norm": 1.9049293994903564, "learning_rate": 0.0011811277330264671, "loss": 0.7281, "step": 142320 }, { "epoch": 40.94649021864212, "grad_norm": 1.2213175296783447, "learning_rate": 0.0011810701956271577, "loss": 0.7729, "step": 142330 }, { "epoch": 40.949367088607595, "grad_norm": 1.0535130500793457, "learning_rate": 0.0011810126582278483, "loss": 0.5607, "step": 142340 }, { "epoch": 40.95224395857307, "grad_norm": 2.5387542247772217, "learning_rate": 0.0011809551208285386, "loss": 0.6145, "step": 142350 }, { "epoch": 40.95512082853855, "grad_norm": 1.3432867527008057, "learning_rate": 0.001180897583429229, "loss": 0.5712, "step": 142360 }, { "epoch": 40.95799769850403, "grad_norm": 1.7165979146957397, "learning_rate": 0.0011808400460299195, "loss": 0.6146, "step": 142370 }, { "epoch": 40.96087456846951, "grad_norm": 2.667032241821289, "learning_rate": 0.0011807825086306099, "loss": 0.6295, "step": 142380 }, { "epoch": 40.96375143843498, "grad_norm": 1.3149387836456299, "learning_rate": 0.0011807249712313004, "loss": 0.734, "step": 142390 }, { "epoch": 40.96662830840046, "grad_norm": 0.8856929540634155, "learning_rate": 0.0011806674338319908, "loss": 0.5926, "step": 142400 }, { "epoch": 40.969505178365935, "grad_norm": 1.5899884700775146, "learning_rate": 0.0011806098964326813, "loss": 0.5528, "step": 142410 }, { "epoch": 40.97238204833142, "grad_norm": 0.9829550385475159, "learning_rate": 0.0011805523590333717, "loss": 0.6071, "step": 142420 }, { "epoch": 40.975258918296895, "grad_norm": 0.8554349541664124, "learning_rate": 0.001180494821634062, "loss": 0.5404, "step": 142430 }, { "epoch": 40.97813578826237, "grad_norm": 1.303248405456543, "learning_rate": 0.0011804372842347526, "loss": 0.6431, "step": 142440 }, { "epoch": 40.98101265822785, "grad_norm": 1.5951251983642578, "learning_rate": 0.0011803797468354432, "loss": 0.7651, "step": 142450 }, { "epoch": 40.98388952819332, "grad_norm": 0.9901815056800842, "learning_rate": 0.0011803222094361335, "loss": 0.4571, "step": 142460 }, { "epoch": 40.98676639815881, "grad_norm": 1.115095853805542, "learning_rate": 0.001180264672036824, "loss": 0.7395, "step": 142470 }, { "epoch": 40.98964326812428, "grad_norm": 0.9498860239982605, "learning_rate": 0.0011802071346375144, "loss": 0.7339, "step": 142480 }, { "epoch": 40.99252013808976, "grad_norm": 0.8432764410972595, "learning_rate": 0.0011801495972382048, "loss": 0.6182, "step": 142490 }, { "epoch": 40.995397008055235, "grad_norm": 1.8315376043319702, "learning_rate": 0.0011800920598388953, "loss": 0.7801, "step": 142500 }, { "epoch": 40.99827387802071, "grad_norm": 0.9472142457962036, "learning_rate": 0.0011800345224395857, "loss": 0.6831, "step": 142510 }, { "epoch": 41.00115074798619, "grad_norm": 1.703884243965149, "learning_rate": 0.0011799769850402762, "loss": 0.6816, "step": 142520 }, { "epoch": 41.00402761795167, "grad_norm": 2.3111395835876465, "learning_rate": 0.0011799194476409668, "loss": 0.6517, "step": 142530 }, { "epoch": 41.00690448791715, "grad_norm": 1.0273722410202026, "learning_rate": 0.001179861910241657, "loss": 0.6165, "step": 142540 }, { "epoch": 41.00978135788262, "grad_norm": 0.9900680780410767, "learning_rate": 0.0011798043728423475, "loss": 0.6362, "step": 142550 }, { "epoch": 41.0126582278481, "grad_norm": 1.0782606601715088, "learning_rate": 0.001179746835443038, "loss": 0.497, "step": 142560 }, { "epoch": 41.015535097813576, "grad_norm": 1.5207271575927734, "learning_rate": 0.0011796892980437284, "loss": 0.5371, "step": 142570 }, { "epoch": 41.01841196777906, "grad_norm": 1.0371495485305786, "learning_rate": 0.001179631760644419, "loss": 0.6509, "step": 142580 }, { "epoch": 41.021288837744535, "grad_norm": 1.6763871908187866, "learning_rate": 0.0011795742232451095, "loss": 0.6645, "step": 142590 }, { "epoch": 41.02416570771001, "grad_norm": 1.1302162408828735, "learning_rate": 0.0011795166858457997, "loss": 0.5806, "step": 142600 }, { "epoch": 41.02704257767549, "grad_norm": 1.3619084358215332, "learning_rate": 0.0011794591484464902, "loss": 0.6405, "step": 142610 }, { "epoch": 41.029919447640964, "grad_norm": 1.9527040719985962, "learning_rate": 0.0011794016110471808, "loss": 0.5614, "step": 142620 }, { "epoch": 41.03279631760645, "grad_norm": 1.2753186225891113, "learning_rate": 0.0011793440736478711, "loss": 0.5107, "step": 142630 }, { "epoch": 41.03567318757192, "grad_norm": 1.7375190258026123, "learning_rate": 0.0011792865362485617, "loss": 0.6115, "step": 142640 }, { "epoch": 41.0385500575374, "grad_norm": 0.8076035976409912, "learning_rate": 0.0011792289988492518, "loss": 0.6663, "step": 142650 }, { "epoch": 41.041426927502876, "grad_norm": 1.8543742895126343, "learning_rate": 0.0011791714614499424, "loss": 0.5881, "step": 142660 }, { "epoch": 41.04430379746835, "grad_norm": 1.7931935787200928, "learning_rate": 0.001179113924050633, "loss": 0.6211, "step": 142670 }, { "epoch": 41.047180667433835, "grad_norm": 1.37186598777771, "learning_rate": 0.0011790563866513233, "loss": 0.5098, "step": 142680 }, { "epoch": 41.05005753739931, "grad_norm": 1.0128711462020874, "learning_rate": 0.0011789988492520139, "loss": 0.5881, "step": 142690 }, { "epoch": 41.05293440736479, "grad_norm": 1.1137480735778809, "learning_rate": 0.0011789413118527044, "loss": 0.4946, "step": 142700 }, { "epoch": 41.055811277330264, "grad_norm": 0.8475337624549866, "learning_rate": 0.0011788837744533946, "loss": 0.5081, "step": 142710 }, { "epoch": 41.05868814729574, "grad_norm": 0.5369262099266052, "learning_rate": 0.0011788262370540851, "loss": 0.6104, "step": 142720 }, { "epoch": 41.061565017261216, "grad_norm": 2.500073194503784, "learning_rate": 0.0011787686996547757, "loss": 0.4861, "step": 142730 }, { "epoch": 41.0644418872267, "grad_norm": 0.8169375658035278, "learning_rate": 0.001178711162255466, "loss": 0.4834, "step": 142740 }, { "epoch": 41.067318757192176, "grad_norm": 1.4308147430419922, "learning_rate": 0.0011786536248561566, "loss": 0.6303, "step": 142750 }, { "epoch": 41.07019562715765, "grad_norm": 1.7420891523361206, "learning_rate": 0.001178596087456847, "loss": 0.6999, "step": 142760 }, { "epoch": 41.07307249712313, "grad_norm": 0.9650272130966187, "learning_rate": 0.0011785385500575373, "loss": 0.5997, "step": 142770 }, { "epoch": 41.075949367088604, "grad_norm": 0.7744970321655273, "learning_rate": 0.0011784810126582279, "loss": 0.4398, "step": 142780 }, { "epoch": 41.07882623705409, "grad_norm": 1.375100016593933, "learning_rate": 0.0011784234752589182, "loss": 0.6379, "step": 142790 }, { "epoch": 41.081703107019564, "grad_norm": 1.2990739345550537, "learning_rate": 0.0011783659378596088, "loss": 0.5899, "step": 142800 }, { "epoch": 41.08457997698504, "grad_norm": 1.1591813564300537, "learning_rate": 0.0011783084004602993, "loss": 0.5905, "step": 142810 }, { "epoch": 41.087456846950516, "grad_norm": 1.2462016344070435, "learning_rate": 0.0011782508630609897, "loss": 0.5717, "step": 142820 }, { "epoch": 41.09033371691599, "grad_norm": 1.328986644744873, "learning_rate": 0.00117819332566168, "loss": 0.5712, "step": 142830 }, { "epoch": 41.093210586881476, "grad_norm": 0.9784356951713562, "learning_rate": 0.0011781357882623706, "loss": 0.5472, "step": 142840 }, { "epoch": 41.09608745684695, "grad_norm": 1.7072151899337769, "learning_rate": 0.001178078250863061, "loss": 0.5326, "step": 142850 }, { "epoch": 41.09896432681243, "grad_norm": 1.4135786294937134, "learning_rate": 0.0011780207134637515, "loss": 0.5634, "step": 142860 }, { "epoch": 41.101841196777904, "grad_norm": 2.2513301372528076, "learning_rate": 0.0011779631760644419, "loss": 0.6824, "step": 142870 }, { "epoch": 41.10471806674338, "grad_norm": 0.9176710247993469, "learning_rate": 0.0011779056386651324, "loss": 0.4509, "step": 142880 }, { "epoch": 41.107594936708864, "grad_norm": 1.3286306858062744, "learning_rate": 0.0011778481012658228, "loss": 0.6095, "step": 142890 }, { "epoch": 41.11047180667434, "grad_norm": 1.3349922895431519, "learning_rate": 0.0011777905638665131, "loss": 0.5656, "step": 142900 }, { "epoch": 41.113348676639816, "grad_norm": 0.9969598054885864, "learning_rate": 0.0011777330264672037, "loss": 0.6322, "step": 142910 }, { "epoch": 41.11622554660529, "grad_norm": 0.798020601272583, "learning_rate": 0.0011776754890678942, "loss": 0.5763, "step": 142920 }, { "epoch": 41.11910241657077, "grad_norm": 1.5784298181533813, "learning_rate": 0.0011776179516685846, "loss": 0.6955, "step": 142930 }, { "epoch": 41.121979286536245, "grad_norm": 1.2546387910842896, "learning_rate": 0.0011775604142692752, "loss": 0.5072, "step": 142940 }, { "epoch": 41.12485615650173, "grad_norm": 1.744240164756775, "learning_rate": 0.0011775028768699655, "loss": 0.5633, "step": 142950 }, { "epoch": 41.127733026467205, "grad_norm": 1.0297104120254517, "learning_rate": 0.0011774453394706558, "loss": 0.4803, "step": 142960 }, { "epoch": 41.13060989643268, "grad_norm": 1.3798465728759766, "learning_rate": 0.0011773878020713464, "loss": 0.5973, "step": 142970 }, { "epoch": 41.13348676639816, "grad_norm": 0.9800519943237305, "learning_rate": 0.0011773302646720368, "loss": 0.5727, "step": 142980 }, { "epoch": 41.13636363636363, "grad_norm": 2.2956435680389404, "learning_rate": 0.0011772727272727273, "loss": 0.5169, "step": 142990 }, { "epoch": 41.139240506329116, "grad_norm": 1.4372186660766602, "learning_rate": 0.0011772151898734179, "loss": 0.5285, "step": 143000 }, { "epoch": 41.14211737629459, "grad_norm": 0.9951793551445007, "learning_rate": 0.001177157652474108, "loss": 0.5221, "step": 143010 }, { "epoch": 41.14499424626007, "grad_norm": 1.6204837560653687, "learning_rate": 0.0011771001150747986, "loss": 0.5024, "step": 143020 }, { "epoch": 41.147871116225545, "grad_norm": 1.8348495960235596, "learning_rate": 0.0011770425776754891, "loss": 0.5293, "step": 143030 }, { "epoch": 41.15074798619102, "grad_norm": 0.9774318933486938, "learning_rate": 0.0011769850402761795, "loss": 0.6126, "step": 143040 }, { "epoch": 41.153624856156505, "grad_norm": 1.324263334274292, "learning_rate": 0.00117692750287687, "loss": 0.5198, "step": 143050 }, { "epoch": 41.15650172612198, "grad_norm": 1.5073236227035522, "learning_rate": 0.0011768699654775606, "loss": 0.6065, "step": 143060 }, { "epoch": 41.15937859608746, "grad_norm": 1.1972723007202148, "learning_rate": 0.0011768124280782508, "loss": 0.5374, "step": 143070 }, { "epoch": 41.16225546605293, "grad_norm": 0.9806527495384216, "learning_rate": 0.0011767548906789413, "loss": 0.5285, "step": 143080 }, { "epoch": 41.16513233601841, "grad_norm": 1.2610268592834473, "learning_rate": 0.0011766973532796317, "loss": 0.6828, "step": 143090 }, { "epoch": 41.16800920598389, "grad_norm": 1.023797631263733, "learning_rate": 0.0011766398158803222, "loss": 0.5975, "step": 143100 }, { "epoch": 41.17088607594937, "grad_norm": 1.0811671018600464, "learning_rate": 0.0011765822784810128, "loss": 0.5325, "step": 143110 }, { "epoch": 41.173762945914845, "grad_norm": 1.3791346549987793, "learning_rate": 0.0011765247410817031, "loss": 0.6535, "step": 143120 }, { "epoch": 41.17663981588032, "grad_norm": 0.6792453527450562, "learning_rate": 0.0011764672036823935, "loss": 0.519, "step": 143130 }, { "epoch": 41.1795166858458, "grad_norm": 1.37250816822052, "learning_rate": 0.001176409666283084, "loss": 0.5824, "step": 143140 }, { "epoch": 41.18239355581128, "grad_norm": 1.8136979341506958, "learning_rate": 0.0011763521288837744, "loss": 0.6677, "step": 143150 }, { "epoch": 41.18527042577676, "grad_norm": 1.2535353899002075, "learning_rate": 0.001176294591484465, "loss": 0.6015, "step": 143160 }, { "epoch": 41.18814729574223, "grad_norm": 0.7138901352882385, "learning_rate": 0.0011762370540851555, "loss": 0.4422, "step": 143170 }, { "epoch": 41.19102416570771, "grad_norm": 1.0772323608398438, "learning_rate": 0.0011761795166858459, "loss": 0.6839, "step": 143180 }, { "epoch": 41.193901035673186, "grad_norm": 1.0583969354629517, "learning_rate": 0.0011761219792865362, "loss": 0.5353, "step": 143190 }, { "epoch": 41.19677790563866, "grad_norm": 1.8609418869018555, "learning_rate": 0.0011760644418872268, "loss": 0.5354, "step": 143200 }, { "epoch": 41.199654775604145, "grad_norm": 1.799031138420105, "learning_rate": 0.0011760069044879171, "loss": 0.5704, "step": 143210 }, { "epoch": 41.20253164556962, "grad_norm": 1.1947170495986938, "learning_rate": 0.0011759493670886077, "loss": 0.7287, "step": 143220 }, { "epoch": 41.2054085155351, "grad_norm": 1.8121860027313232, "learning_rate": 0.001175891829689298, "loss": 0.5606, "step": 143230 }, { "epoch": 41.208285385500574, "grad_norm": 1.0164031982421875, "learning_rate": 0.0011758342922899886, "loss": 0.5979, "step": 143240 }, { "epoch": 41.21116225546605, "grad_norm": 1.3534218072891235, "learning_rate": 0.001175776754890679, "loss": 0.7486, "step": 143250 }, { "epoch": 41.21403912543153, "grad_norm": 1.7241942882537842, "learning_rate": 0.0011757192174913693, "loss": 0.5687, "step": 143260 }, { "epoch": 41.21691599539701, "grad_norm": 1.6559230089187622, "learning_rate": 0.0011756616800920599, "loss": 0.6283, "step": 143270 }, { "epoch": 41.219792865362486, "grad_norm": 0.9319953322410583, "learning_rate": 0.0011756041426927504, "loss": 0.6223, "step": 143280 }, { "epoch": 41.22266973532796, "grad_norm": 1.2925994396209717, "learning_rate": 0.0011755466052934408, "loss": 0.5856, "step": 143290 }, { "epoch": 41.22554660529344, "grad_norm": 1.6474109888076782, "learning_rate": 0.0011754890678941313, "loss": 0.6071, "step": 143300 }, { "epoch": 41.22842347525892, "grad_norm": 1.8012553453445435, "learning_rate": 0.0011754315304948217, "loss": 0.5825, "step": 143310 }, { "epoch": 41.2313003452244, "grad_norm": 1.045581579208374, "learning_rate": 0.001175373993095512, "loss": 0.5585, "step": 143320 }, { "epoch": 41.234177215189874, "grad_norm": 1.3409538269042969, "learning_rate": 0.0011753164556962026, "loss": 0.5491, "step": 143330 }, { "epoch": 41.23705408515535, "grad_norm": 1.634412169456482, "learning_rate": 0.001175258918296893, "loss": 0.664, "step": 143340 }, { "epoch": 41.239930955120826, "grad_norm": 1.0458064079284668, "learning_rate": 0.0011752013808975835, "loss": 0.58, "step": 143350 }, { "epoch": 41.24280782508631, "grad_norm": 1.7681132555007935, "learning_rate": 0.001175143843498274, "loss": 0.6316, "step": 143360 }, { "epoch": 41.245684695051786, "grad_norm": 1.1994129419326782, "learning_rate": 0.0011750863060989642, "loss": 0.5701, "step": 143370 }, { "epoch": 41.24856156501726, "grad_norm": 1.5719660520553589, "learning_rate": 0.0011750287686996548, "loss": 0.529, "step": 143380 }, { "epoch": 41.25143843498274, "grad_norm": 1.4474952220916748, "learning_rate": 0.0011749712313003453, "loss": 0.6163, "step": 143390 }, { "epoch": 41.254315304948214, "grad_norm": 1.0265865325927734, "learning_rate": 0.0011749136939010357, "loss": 0.4964, "step": 143400 }, { "epoch": 41.25719217491369, "grad_norm": 0.720953106880188, "learning_rate": 0.0011748561565017262, "loss": 0.6366, "step": 143410 }, { "epoch": 41.260069044879174, "grad_norm": 1.2657302618026733, "learning_rate": 0.0011747986191024168, "loss": 0.6644, "step": 143420 }, { "epoch": 41.26294591484465, "grad_norm": 2.1984283924102783, "learning_rate": 0.001174741081703107, "loss": 0.7463, "step": 143430 }, { "epoch": 41.265822784810126, "grad_norm": 0.8443483710289001, "learning_rate": 0.0011746835443037975, "loss": 0.5921, "step": 143440 }, { "epoch": 41.2686996547756, "grad_norm": 1.1982325315475464, "learning_rate": 0.0011746260069044878, "loss": 0.5975, "step": 143450 }, { "epoch": 41.27157652474108, "grad_norm": 1.233773112297058, "learning_rate": 0.0011745684695051784, "loss": 0.624, "step": 143460 }, { "epoch": 41.27445339470656, "grad_norm": 0.9134758114814758, "learning_rate": 0.001174510932105869, "loss": 0.5229, "step": 143470 }, { "epoch": 41.27733026467204, "grad_norm": 1.6372817754745483, "learning_rate": 0.001174453394706559, "loss": 0.5148, "step": 143480 }, { "epoch": 41.280207134637514, "grad_norm": 0.9220947623252869, "learning_rate": 0.0011743958573072497, "loss": 0.5236, "step": 143490 }, { "epoch": 41.28308400460299, "grad_norm": 0.9820501208305359, "learning_rate": 0.0011743383199079402, "loss": 0.6107, "step": 143500 }, { "epoch": 41.28596087456847, "grad_norm": 0.714604914188385, "learning_rate": 0.0011742807825086306, "loss": 0.5881, "step": 143510 }, { "epoch": 41.28883774453395, "grad_norm": 1.2672522068023682, "learning_rate": 0.0011742232451093211, "loss": 0.5927, "step": 143520 }, { "epoch": 41.291714614499426, "grad_norm": 1.217492699623108, "learning_rate": 0.0011741657077100117, "loss": 0.4603, "step": 143530 }, { "epoch": 41.2945914844649, "grad_norm": 1.1238540410995483, "learning_rate": 0.0011741081703107018, "loss": 0.6264, "step": 143540 }, { "epoch": 41.29746835443038, "grad_norm": 1.6833925247192383, "learning_rate": 0.0011740506329113924, "loss": 0.8069, "step": 143550 }, { "epoch": 41.300345224395855, "grad_norm": 1.1749461889266968, "learning_rate": 0.0011739930955120827, "loss": 0.4733, "step": 143560 }, { "epoch": 41.30322209436134, "grad_norm": 0.759903609752655, "learning_rate": 0.0011739355581127733, "loss": 0.527, "step": 143570 }, { "epoch": 41.306098964326814, "grad_norm": 1.2551273107528687, "learning_rate": 0.0011738780207134639, "loss": 0.4827, "step": 143580 }, { "epoch": 41.30897583429229, "grad_norm": 1.4079409837722778, "learning_rate": 0.0011738204833141542, "loss": 0.5748, "step": 143590 }, { "epoch": 41.31185270425777, "grad_norm": 0.9967993497848511, "learning_rate": 0.0011737629459148446, "loss": 0.4825, "step": 143600 }, { "epoch": 41.31472957422324, "grad_norm": 1.6297979354858398, "learning_rate": 0.0011737054085155351, "loss": 0.5897, "step": 143610 }, { "epoch": 41.31760644418872, "grad_norm": 0.6900091767311096, "learning_rate": 0.0011736478711162255, "loss": 0.4649, "step": 143620 }, { "epoch": 41.3204833141542, "grad_norm": 0.9339842200279236, "learning_rate": 0.001173590333716916, "loss": 0.6212, "step": 143630 }, { "epoch": 41.32336018411968, "grad_norm": 1.0604727268218994, "learning_rate": 0.0011735327963176066, "loss": 0.5111, "step": 143640 }, { "epoch": 41.326237054085155, "grad_norm": 2.265791416168213, "learning_rate": 0.001173475258918297, "loss": 0.711, "step": 143650 }, { "epoch": 41.32911392405063, "grad_norm": 0.9419971108436584, "learning_rate": 0.0011734177215189873, "loss": 0.4888, "step": 143660 }, { "epoch": 41.33199079401611, "grad_norm": 1.607042908668518, "learning_rate": 0.0011733601841196776, "loss": 0.6187, "step": 143670 }, { "epoch": 41.33486766398159, "grad_norm": 1.9465068578720093, "learning_rate": 0.0011733026467203682, "loss": 0.6586, "step": 143680 }, { "epoch": 41.33774453394707, "grad_norm": 0.9920545220375061, "learning_rate": 0.0011732451093210588, "loss": 0.6333, "step": 143690 }, { "epoch": 41.34062140391254, "grad_norm": 1.270851731300354, "learning_rate": 0.0011731875719217491, "loss": 0.6407, "step": 143700 }, { "epoch": 41.34349827387802, "grad_norm": 1.1746917963027954, "learning_rate": 0.0011731300345224397, "loss": 0.7687, "step": 143710 }, { "epoch": 41.346375143843495, "grad_norm": 2.8214364051818848, "learning_rate": 0.00117307249712313, "loss": 0.6222, "step": 143720 }, { "epoch": 41.34925201380898, "grad_norm": 1.1364623308181763, "learning_rate": 0.0011730149597238204, "loss": 0.6065, "step": 143730 }, { "epoch": 41.352128883774455, "grad_norm": 2.4638278484344482, "learning_rate": 0.001172957422324511, "loss": 0.568, "step": 143740 }, { "epoch": 41.35500575373993, "grad_norm": 1.3675423860549927, "learning_rate": 0.0011728998849252015, "loss": 0.7327, "step": 143750 }, { "epoch": 41.35788262370541, "grad_norm": 1.1269252300262451, "learning_rate": 0.0011728423475258919, "loss": 0.5787, "step": 143760 }, { "epoch": 41.360759493670884, "grad_norm": 2.119899034500122, "learning_rate": 0.0011727848101265824, "loss": 0.6736, "step": 143770 }, { "epoch": 41.36363636363637, "grad_norm": 1.9040279388427734, "learning_rate": 0.0011727272727272726, "loss": 0.7331, "step": 143780 }, { "epoch": 41.36651323360184, "grad_norm": 0.7850164175033569, "learning_rate": 0.0011726697353279631, "loss": 0.5602, "step": 143790 }, { "epoch": 41.36939010356732, "grad_norm": 2.792510509490967, "learning_rate": 0.0011726121979286537, "loss": 0.4883, "step": 143800 }, { "epoch": 41.372266973532795, "grad_norm": 0.9111494421958923, "learning_rate": 0.001172554660529344, "loss": 0.531, "step": 143810 }, { "epoch": 41.37514384349827, "grad_norm": 2.6016528606414795, "learning_rate": 0.0011724971231300346, "loss": 0.581, "step": 143820 }, { "epoch": 41.378020713463755, "grad_norm": 1.0293856859207153, "learning_rate": 0.0011724395857307252, "loss": 0.6022, "step": 143830 }, { "epoch": 41.38089758342923, "grad_norm": 1.0655897855758667, "learning_rate": 0.0011723820483314153, "loss": 0.7194, "step": 143840 }, { "epoch": 41.38377445339471, "grad_norm": 1.5890885591506958, "learning_rate": 0.0011723245109321058, "loss": 0.5394, "step": 143850 }, { "epoch": 41.386651323360184, "grad_norm": 1.1529059410095215, "learning_rate": 0.0011722669735327964, "loss": 0.4442, "step": 143860 }, { "epoch": 41.38952819332566, "grad_norm": 2.1954972743988037, "learning_rate": 0.0011722094361334868, "loss": 0.5821, "step": 143870 }, { "epoch": 41.392405063291136, "grad_norm": 1.758060336112976, "learning_rate": 0.0011721518987341773, "loss": 0.7438, "step": 143880 }, { "epoch": 41.39528193325662, "grad_norm": 1.276017665863037, "learning_rate": 0.0011720943613348679, "loss": 0.5647, "step": 143890 }, { "epoch": 41.398158803222096, "grad_norm": 1.1514612436294556, "learning_rate": 0.001172036823935558, "loss": 0.7865, "step": 143900 }, { "epoch": 41.40103567318757, "grad_norm": 1.3151417970657349, "learning_rate": 0.0011719792865362486, "loss": 0.4866, "step": 143910 }, { "epoch": 41.40391254315305, "grad_norm": 1.1212527751922607, "learning_rate": 0.001171921749136939, "loss": 0.4844, "step": 143920 }, { "epoch": 41.406789413118524, "grad_norm": 2.0140328407287598, "learning_rate": 0.0011718642117376295, "loss": 0.6731, "step": 143930 }, { "epoch": 41.40966628308401, "grad_norm": 1.0408419370651245, "learning_rate": 0.00117180667433832, "loss": 0.5774, "step": 143940 }, { "epoch": 41.412543153049484, "grad_norm": 0.9099985957145691, "learning_rate": 0.0011717491369390104, "loss": 0.6572, "step": 143950 }, { "epoch": 41.41542002301496, "grad_norm": 1.1328355073928833, "learning_rate": 0.0011716915995397007, "loss": 0.5483, "step": 143960 }, { "epoch": 41.418296892980436, "grad_norm": 0.6504455804824829, "learning_rate": 0.0011716340621403913, "loss": 0.6468, "step": 143970 }, { "epoch": 41.42117376294591, "grad_norm": 1.209729790687561, "learning_rate": 0.0011715765247410817, "loss": 0.6144, "step": 143980 }, { "epoch": 41.424050632911396, "grad_norm": 2.053377389907837, "learning_rate": 0.0011715189873417722, "loss": 0.7408, "step": 143990 }, { "epoch": 41.42692750287687, "grad_norm": 1.7971291542053223, "learning_rate": 0.0011714614499424628, "loss": 0.5628, "step": 144000 }, { "epoch": 41.42980437284235, "grad_norm": 0.8942458629608154, "learning_rate": 0.0011714039125431531, "loss": 0.5918, "step": 144010 }, { "epoch": 41.432681242807824, "grad_norm": 1.1173797845840454, "learning_rate": 0.0011713463751438435, "loss": 0.6947, "step": 144020 }, { "epoch": 41.4355581127733, "grad_norm": 1.858144760131836, "learning_rate": 0.0011712888377445338, "loss": 0.7416, "step": 144030 }, { "epoch": 41.438434982738784, "grad_norm": 0.5963137149810791, "learning_rate": 0.0011712313003452244, "loss": 0.6272, "step": 144040 }, { "epoch": 41.44131185270426, "grad_norm": 0.7294614911079407, "learning_rate": 0.001171173762945915, "loss": 0.5499, "step": 144050 }, { "epoch": 41.444188722669736, "grad_norm": 2.5596301555633545, "learning_rate": 0.0011711162255466053, "loss": 0.6062, "step": 144060 }, { "epoch": 41.44706559263521, "grad_norm": 1.6793551445007324, "learning_rate": 0.0011710586881472959, "loss": 0.552, "step": 144070 }, { "epoch": 41.44994246260069, "grad_norm": 1.8081512451171875, "learning_rate": 0.0011710011507479862, "loss": 0.7153, "step": 144080 }, { "epoch": 41.452819332566165, "grad_norm": 1.9515873193740845, "learning_rate": 0.0011709436133486766, "loss": 0.5686, "step": 144090 }, { "epoch": 41.45569620253165, "grad_norm": 1.64484703540802, "learning_rate": 0.0011708860759493671, "loss": 0.5576, "step": 144100 }, { "epoch": 41.458573072497124, "grad_norm": 1.2413060665130615, "learning_rate": 0.0011708285385500577, "loss": 0.5025, "step": 144110 }, { "epoch": 41.4614499424626, "grad_norm": 1.1679595708847046, "learning_rate": 0.001170771001150748, "loss": 0.4142, "step": 144120 }, { "epoch": 41.46432681242808, "grad_norm": 1.4311972856521606, "learning_rate": 0.0011707134637514386, "loss": 0.5753, "step": 144130 }, { "epoch": 41.46720368239355, "grad_norm": 1.0096662044525146, "learning_rate": 0.0011706559263521287, "loss": 0.4921, "step": 144140 }, { "epoch": 41.470080552359036, "grad_norm": 0.8047205805778503, "learning_rate": 0.0011705983889528193, "loss": 0.6127, "step": 144150 }, { "epoch": 41.47295742232451, "grad_norm": 2.43644642829895, "learning_rate": 0.0011705408515535099, "loss": 0.6193, "step": 144160 }, { "epoch": 41.47583429228999, "grad_norm": 1.1656088829040527, "learning_rate": 0.0011704833141542002, "loss": 0.5297, "step": 144170 }, { "epoch": 41.478711162255465, "grad_norm": 1.6009515523910522, "learning_rate": 0.0011704257767548908, "loss": 0.5273, "step": 144180 }, { "epoch": 41.48158803222094, "grad_norm": 1.008497714996338, "learning_rate": 0.0011703682393555813, "loss": 0.6192, "step": 144190 }, { "epoch": 41.484464902186424, "grad_norm": 0.7374574542045593, "learning_rate": 0.0011703107019562715, "loss": 0.4448, "step": 144200 }, { "epoch": 41.4873417721519, "grad_norm": 0.9553016424179077, "learning_rate": 0.001170253164556962, "loss": 0.5808, "step": 144210 }, { "epoch": 41.49021864211738, "grad_norm": 1.5109786987304688, "learning_rate": 0.0011701956271576526, "loss": 0.5852, "step": 144220 }, { "epoch": 41.49309551208285, "grad_norm": 1.7994455099105835, "learning_rate": 0.001170138089758343, "loss": 0.4607, "step": 144230 }, { "epoch": 41.49597238204833, "grad_norm": 1.324204444885254, "learning_rate": 0.0011700805523590335, "loss": 0.6406, "step": 144240 }, { "epoch": 41.49884925201381, "grad_norm": 0.9274610877037048, "learning_rate": 0.0011700230149597236, "loss": 0.6488, "step": 144250 }, { "epoch": 41.50172612197929, "grad_norm": 1.5118688344955444, "learning_rate": 0.0011699654775604142, "loss": 0.5307, "step": 144260 }, { "epoch": 41.504602991944765, "grad_norm": 0.6773768663406372, "learning_rate": 0.0011699079401611048, "loss": 0.7115, "step": 144270 }, { "epoch": 41.50747986191024, "grad_norm": 0.6606385707855225, "learning_rate": 0.0011698504027617951, "loss": 0.5487, "step": 144280 }, { "epoch": 41.51035673187572, "grad_norm": 2.3314597606658936, "learning_rate": 0.0011697928653624857, "loss": 0.4645, "step": 144290 }, { "epoch": 41.51323360184119, "grad_norm": 1.08059823513031, "learning_rate": 0.0011697353279631762, "loss": 0.6904, "step": 144300 }, { "epoch": 41.51611047180668, "grad_norm": 1.0085915327072144, "learning_rate": 0.0011696777905638664, "loss": 0.5531, "step": 144310 }, { "epoch": 41.51898734177215, "grad_norm": 1.3385951519012451, "learning_rate": 0.001169620253164557, "loss": 0.5609, "step": 144320 }, { "epoch": 41.52186421173763, "grad_norm": 1.2484681606292725, "learning_rate": 0.0011695627157652475, "loss": 0.6467, "step": 144330 }, { "epoch": 41.524741081703105, "grad_norm": 1.055221438407898, "learning_rate": 0.0011695051783659378, "loss": 0.566, "step": 144340 }, { "epoch": 41.52761795166858, "grad_norm": 0.9590489268302917, "learning_rate": 0.0011694476409666284, "loss": 0.6169, "step": 144350 }, { "epoch": 41.530494821634065, "grad_norm": 2.4645943641662598, "learning_rate": 0.0011693901035673188, "loss": 0.8183, "step": 144360 }, { "epoch": 41.53337169159954, "grad_norm": 1.183240532875061, "learning_rate": 0.001169332566168009, "loss": 0.4797, "step": 144370 }, { "epoch": 41.53624856156502, "grad_norm": 1.0939151048660278, "learning_rate": 0.0011692750287686997, "loss": 0.4453, "step": 144380 }, { "epoch": 41.53912543153049, "grad_norm": 1.0494012832641602, "learning_rate": 0.00116921749136939, "loss": 0.6347, "step": 144390 }, { "epoch": 41.54200230149597, "grad_norm": 1.5833643674850464, "learning_rate": 0.0011691599539700806, "loss": 0.4808, "step": 144400 }, { "epoch": 41.54487917146145, "grad_norm": 1.2799718379974365, "learning_rate": 0.0011691024165707711, "loss": 0.6332, "step": 144410 }, { "epoch": 41.54775604142693, "grad_norm": 1.9188897609710693, "learning_rate": 0.0011690448791714615, "loss": 0.5465, "step": 144420 }, { "epoch": 41.550632911392405, "grad_norm": 1.9364436864852905, "learning_rate": 0.0011689873417721518, "loss": 0.6508, "step": 144430 }, { "epoch": 41.55350978135788, "grad_norm": 2.256390333175659, "learning_rate": 0.0011689298043728424, "loss": 0.6553, "step": 144440 }, { "epoch": 41.55638665132336, "grad_norm": 1.6732605695724487, "learning_rate": 0.0011688722669735327, "loss": 0.7058, "step": 144450 }, { "epoch": 41.55926352128884, "grad_norm": 1.188122034072876, "learning_rate": 0.0011688147295742233, "loss": 0.6151, "step": 144460 }, { "epoch": 41.56214039125432, "grad_norm": 0.7639703154563904, "learning_rate": 0.0011687571921749137, "loss": 0.5063, "step": 144470 }, { "epoch": 41.56501726121979, "grad_norm": 1.0478155612945557, "learning_rate": 0.0011686996547756042, "loss": 0.496, "step": 144480 }, { "epoch": 41.56789413118527, "grad_norm": 1.4401276111602783, "learning_rate": 0.0011686421173762946, "loss": 0.5026, "step": 144490 }, { "epoch": 41.570771001150746, "grad_norm": 1.1735155582427979, "learning_rate": 0.001168584579976985, "loss": 0.5328, "step": 144500 }, { "epoch": 41.57364787111622, "grad_norm": 1.5038132667541504, "learning_rate": 0.0011685270425776755, "loss": 0.6438, "step": 144510 }, { "epoch": 41.576524741081705, "grad_norm": 1.1557880640029907, "learning_rate": 0.001168469505178366, "loss": 0.5175, "step": 144520 }, { "epoch": 41.57940161104718, "grad_norm": 1.6125168800354004, "learning_rate": 0.0011684119677790564, "loss": 0.5498, "step": 144530 }, { "epoch": 41.58227848101266, "grad_norm": 1.0829389095306396, "learning_rate": 0.001168354430379747, "loss": 0.5896, "step": 144540 }, { "epoch": 41.585155350978134, "grad_norm": 1.2728074789047241, "learning_rate": 0.0011682968929804373, "loss": 0.4987, "step": 144550 }, { "epoch": 41.58803222094361, "grad_norm": 1.4190142154693604, "learning_rate": 0.0011682393555811276, "loss": 0.5258, "step": 144560 }, { "epoch": 41.59090909090909, "grad_norm": 1.695252537727356, "learning_rate": 0.0011681818181818182, "loss": 0.5995, "step": 144570 }, { "epoch": 41.59378596087457, "grad_norm": 1.3122036457061768, "learning_rate": 0.0011681242807825088, "loss": 0.6883, "step": 144580 }, { "epoch": 41.596662830840046, "grad_norm": 2.1324262619018555, "learning_rate": 0.0011680667433831991, "loss": 0.6272, "step": 144590 }, { "epoch": 41.59953970080552, "grad_norm": 1.5259385108947754, "learning_rate": 0.0011680092059838897, "loss": 0.6248, "step": 144600 }, { "epoch": 41.602416570771, "grad_norm": 2.2319536209106445, "learning_rate": 0.0011679516685845798, "loss": 0.7158, "step": 144610 }, { "epoch": 41.60529344073648, "grad_norm": 0.9118198752403259, "learning_rate": 0.0011678941311852704, "loss": 0.6976, "step": 144620 }, { "epoch": 41.60817031070196, "grad_norm": 1.0972561836242676, "learning_rate": 0.001167836593785961, "loss": 0.4674, "step": 144630 }, { "epoch": 41.611047180667434, "grad_norm": 2.2654812335968018, "learning_rate": 0.0011677790563866513, "loss": 0.59, "step": 144640 }, { "epoch": 41.61392405063291, "grad_norm": 0.7844156622886658, "learning_rate": 0.0011677215189873419, "loss": 0.5853, "step": 144650 }, { "epoch": 41.616800920598386, "grad_norm": 2.120373010635376, "learning_rate": 0.0011676639815880324, "loss": 0.7005, "step": 144660 }, { "epoch": 41.61967779056387, "grad_norm": 0.8782020807266235, "learning_rate": 0.0011676064441887225, "loss": 0.6156, "step": 144670 }, { "epoch": 41.622554660529346, "grad_norm": 1.8088109493255615, "learning_rate": 0.0011675489067894131, "loss": 0.505, "step": 144680 }, { "epoch": 41.62543153049482, "grad_norm": 1.6273348331451416, "learning_rate": 0.0011674913693901037, "loss": 0.6834, "step": 144690 }, { "epoch": 41.6283084004603, "grad_norm": 1.145183801651001, "learning_rate": 0.001167433831990794, "loss": 0.6232, "step": 144700 }, { "epoch": 41.631185270425775, "grad_norm": 0.8528972268104553, "learning_rate": 0.0011673762945914846, "loss": 0.5161, "step": 144710 }, { "epoch": 41.63406214039125, "grad_norm": 2.606137752532959, "learning_rate": 0.001167318757192175, "loss": 0.8236, "step": 144720 }, { "epoch": 41.636939010356734, "grad_norm": 1.4405438899993896, "learning_rate": 0.0011672612197928653, "loss": 0.7044, "step": 144730 }, { "epoch": 41.63981588032221, "grad_norm": 1.2218036651611328, "learning_rate": 0.0011672036823935558, "loss": 0.5935, "step": 144740 }, { "epoch": 41.64269275028769, "grad_norm": 1.5289044380187988, "learning_rate": 0.0011671461449942462, "loss": 0.5605, "step": 144750 }, { "epoch": 41.64556962025316, "grad_norm": 2.792355537414551, "learning_rate": 0.0011670886075949368, "loss": 0.6618, "step": 144760 }, { "epoch": 41.64844649021864, "grad_norm": 1.1613359451293945, "learning_rate": 0.0011670310701956273, "loss": 0.4846, "step": 144770 }, { "epoch": 41.65132336018412, "grad_norm": 1.1527020931243896, "learning_rate": 0.0011669735327963177, "loss": 0.5209, "step": 144780 }, { "epoch": 41.6542002301496, "grad_norm": 0.8482678532600403, "learning_rate": 0.001166915995397008, "loss": 0.7416, "step": 144790 }, { "epoch": 41.657077100115075, "grad_norm": 1.5571852922439575, "learning_rate": 0.0011668584579976986, "loss": 0.6845, "step": 144800 }, { "epoch": 41.65995397008055, "grad_norm": 0.5713979601860046, "learning_rate": 0.001166800920598389, "loss": 0.615, "step": 144810 }, { "epoch": 41.66283084004603, "grad_norm": 4.146972179412842, "learning_rate": 0.0011667433831990795, "loss": 0.6348, "step": 144820 }, { "epoch": 41.66570771001151, "grad_norm": 1.0002487897872925, "learning_rate": 0.0011666858457997698, "loss": 0.5414, "step": 144830 }, { "epoch": 41.66858457997699, "grad_norm": 0.7338613867759705, "learning_rate": 0.0011666283084004604, "loss": 0.4212, "step": 144840 }, { "epoch": 41.67146144994246, "grad_norm": 1.0231034755706787, "learning_rate": 0.0011665707710011507, "loss": 0.5912, "step": 144850 }, { "epoch": 41.67433831990794, "grad_norm": 2.012164831161499, "learning_rate": 0.001166513233601841, "loss": 0.6386, "step": 144860 }, { "epoch": 41.677215189873415, "grad_norm": 1.866409182548523, "learning_rate": 0.0011664556962025317, "loss": 0.7597, "step": 144870 }, { "epoch": 41.6800920598389, "grad_norm": 1.1412395238876343, "learning_rate": 0.0011663981588032222, "loss": 0.5267, "step": 144880 }, { "epoch": 41.682968929804375, "grad_norm": 4.562682151794434, "learning_rate": 0.0011663406214039126, "loss": 0.541, "step": 144890 }, { "epoch": 41.68584579976985, "grad_norm": 1.908416748046875, "learning_rate": 0.0011662830840046031, "loss": 0.6932, "step": 144900 }, { "epoch": 41.68872266973533, "grad_norm": 1.325770378112793, "learning_rate": 0.0011662255466052935, "loss": 0.5646, "step": 144910 }, { "epoch": 41.6915995397008, "grad_norm": 1.281893253326416, "learning_rate": 0.0011661680092059838, "loss": 0.6632, "step": 144920 }, { "epoch": 41.69447640966629, "grad_norm": 1.2177501916885376, "learning_rate": 0.0011661104718066744, "loss": 0.659, "step": 144930 }, { "epoch": 41.69735327963176, "grad_norm": 0.7796083092689514, "learning_rate": 0.0011660529344073647, "loss": 0.6864, "step": 144940 }, { "epoch": 41.70023014959724, "grad_norm": 1.2289091348648071, "learning_rate": 0.0011659953970080553, "loss": 0.7567, "step": 144950 }, { "epoch": 41.703107019562715, "grad_norm": 1.1518659591674805, "learning_rate": 0.0011659378596087459, "loss": 0.5847, "step": 144960 }, { "epoch": 41.70598388952819, "grad_norm": 1.1273020505905151, "learning_rate": 0.001165880322209436, "loss": 0.5451, "step": 144970 }, { "epoch": 41.70886075949367, "grad_norm": 1.2509537935256958, "learning_rate": 0.0011658227848101266, "loss": 0.6109, "step": 144980 }, { "epoch": 41.71173762945915, "grad_norm": 1.2924079895019531, "learning_rate": 0.0011657652474108171, "loss": 0.5664, "step": 144990 }, { "epoch": 41.71461449942463, "grad_norm": 1.4738050699234009, "learning_rate": 0.0011657077100115075, "loss": 0.5616, "step": 145000 }, { "epoch": 41.7174913693901, "grad_norm": 1.5369679927825928, "learning_rate": 0.001165650172612198, "loss": 0.638, "step": 145010 }, { "epoch": 41.72036823935558, "grad_norm": 0.6775330901145935, "learning_rate": 0.0011655926352128886, "loss": 0.4746, "step": 145020 }, { "epoch": 41.723245109321056, "grad_norm": 0.872043788433075, "learning_rate": 0.0011655350978135787, "loss": 0.5583, "step": 145030 }, { "epoch": 41.72612197928654, "grad_norm": 0.6489368677139282, "learning_rate": 0.0011654775604142693, "loss": 0.5608, "step": 145040 }, { "epoch": 41.728998849252015, "grad_norm": 0.7254220843315125, "learning_rate": 0.0011654200230149596, "loss": 0.4725, "step": 145050 }, { "epoch": 41.73187571921749, "grad_norm": 0.9491965770721436, "learning_rate": 0.0011653624856156502, "loss": 0.647, "step": 145060 }, { "epoch": 41.73475258918297, "grad_norm": 0.994675874710083, "learning_rate": 0.0011653049482163408, "loss": 0.4926, "step": 145070 }, { "epoch": 41.737629459148444, "grad_norm": 1.4676127433776855, "learning_rate": 0.001165247410817031, "loss": 0.6332, "step": 145080 }, { "epoch": 41.74050632911393, "grad_norm": 0.9460590481758118, "learning_rate": 0.0011651898734177215, "loss": 0.643, "step": 145090 }, { "epoch": 41.7433831990794, "grad_norm": 2.530134677886963, "learning_rate": 0.001165132336018412, "loss": 0.6447, "step": 145100 }, { "epoch": 41.74626006904488, "grad_norm": 0.9114454388618469, "learning_rate": 0.0011650747986191024, "loss": 0.6703, "step": 145110 }, { "epoch": 41.749136939010356, "grad_norm": 1.421040415763855, "learning_rate": 0.001165017261219793, "loss": 0.6039, "step": 145120 }, { "epoch": 41.75201380897583, "grad_norm": 2.613199234008789, "learning_rate": 0.0011649597238204835, "loss": 0.7767, "step": 145130 }, { "epoch": 41.754890678941315, "grad_norm": 1.5861585140228271, "learning_rate": 0.0011649021864211736, "loss": 0.5964, "step": 145140 }, { "epoch": 41.75776754890679, "grad_norm": 1.5608893632888794, "learning_rate": 0.0011648446490218642, "loss": 0.5406, "step": 145150 }, { "epoch": 41.76064441887227, "grad_norm": 1.400209903717041, "learning_rate": 0.0011647871116225545, "loss": 0.5559, "step": 145160 }, { "epoch": 41.763521288837744, "grad_norm": 2.0104949474334717, "learning_rate": 0.001164729574223245, "loss": 0.7565, "step": 145170 }, { "epoch": 41.76639815880322, "grad_norm": 0.732845664024353, "learning_rate": 0.0011646720368239357, "loss": 0.6031, "step": 145180 }, { "epoch": 41.769275028768696, "grad_norm": 1.0606681108474731, "learning_rate": 0.001164614499424626, "loss": 0.6263, "step": 145190 }, { "epoch": 41.77215189873418, "grad_norm": 1.2117419242858887, "learning_rate": 0.0011645569620253164, "loss": 0.5597, "step": 145200 }, { "epoch": 41.775028768699656, "grad_norm": 1.2193522453308105, "learning_rate": 0.001164499424626007, "loss": 0.5517, "step": 145210 }, { "epoch": 41.77790563866513, "grad_norm": 1.980408787727356, "learning_rate": 0.0011644418872266973, "loss": 0.7324, "step": 145220 }, { "epoch": 41.78078250863061, "grad_norm": 2.009248971939087, "learning_rate": 0.0011643843498273878, "loss": 0.6142, "step": 145230 }, { "epoch": 41.783659378596084, "grad_norm": 1.7452887296676636, "learning_rate": 0.0011643268124280784, "loss": 0.7082, "step": 145240 }, { "epoch": 41.78653624856157, "grad_norm": 0.7066949009895325, "learning_rate": 0.0011642692750287688, "loss": 0.613, "step": 145250 }, { "epoch": 41.789413118527044, "grad_norm": 1.8296934366226196, "learning_rate": 0.001164211737629459, "loss": 0.6977, "step": 145260 }, { "epoch": 41.79228998849252, "grad_norm": 1.3125149011611938, "learning_rate": 0.0011641542002301497, "loss": 0.6252, "step": 145270 }, { "epoch": 41.795166858457996, "grad_norm": 0.8481420874595642, "learning_rate": 0.00116409666283084, "loss": 0.5812, "step": 145280 }, { "epoch": 41.79804372842347, "grad_norm": 1.1141870021820068, "learning_rate": 0.0011640391254315306, "loss": 0.4968, "step": 145290 }, { "epoch": 41.800920598388956, "grad_norm": 1.4630279541015625, "learning_rate": 0.001163981588032221, "loss": 0.7101, "step": 145300 }, { "epoch": 41.80379746835443, "grad_norm": 1.4712971448898315, "learning_rate": 0.0011639240506329115, "loss": 0.6802, "step": 145310 }, { "epoch": 41.80667433831991, "grad_norm": 2.206434965133667, "learning_rate": 0.0011638665132336018, "loss": 0.596, "step": 145320 }, { "epoch": 41.809551208285384, "grad_norm": 1.3881291151046753, "learning_rate": 0.0011638089758342922, "loss": 0.6056, "step": 145330 }, { "epoch": 41.81242807825086, "grad_norm": 8.591246604919434, "learning_rate": 0.0011637514384349827, "loss": 0.555, "step": 145340 }, { "epoch": 41.815304948216344, "grad_norm": 1.6134668588638306, "learning_rate": 0.0011636939010356733, "loss": 0.6002, "step": 145350 }, { "epoch": 41.81818181818182, "grad_norm": 1.3414270877838135, "learning_rate": 0.0011636363636363637, "loss": 0.5374, "step": 145360 }, { "epoch": 41.821058688147296, "grad_norm": 1.06337571144104, "learning_rate": 0.0011635788262370542, "loss": 0.6231, "step": 145370 }, { "epoch": 41.82393555811277, "grad_norm": 1.0287015438079834, "learning_rate": 0.0011635212888377446, "loss": 0.7381, "step": 145380 }, { "epoch": 41.82681242807825, "grad_norm": 2.7930586338043213, "learning_rate": 0.001163463751438435, "loss": 0.5395, "step": 145390 }, { "epoch": 41.829689298043725, "grad_norm": 1.5279008150100708, "learning_rate": 0.0011634062140391255, "loss": 0.6847, "step": 145400 }, { "epoch": 41.83256616800921, "grad_norm": 1.6951326131820679, "learning_rate": 0.0011633486766398158, "loss": 0.5866, "step": 145410 }, { "epoch": 41.835443037974684, "grad_norm": 1.674659252166748, "learning_rate": 0.0011632911392405064, "loss": 0.6929, "step": 145420 }, { "epoch": 41.83831990794016, "grad_norm": 1.2914718389511108, "learning_rate": 0.001163233601841197, "loss": 0.6024, "step": 145430 }, { "epoch": 41.84119677790564, "grad_norm": 1.0882174968719482, "learning_rate": 0.001163176064441887, "loss": 0.4964, "step": 145440 }, { "epoch": 41.84407364787111, "grad_norm": 1.09990394115448, "learning_rate": 0.0011631185270425776, "loss": 0.4762, "step": 145450 }, { "epoch": 41.846950517836596, "grad_norm": 1.6378079652786255, "learning_rate": 0.0011630609896432682, "loss": 0.472, "step": 145460 }, { "epoch": 41.84982738780207, "grad_norm": 1.3419396877288818, "learning_rate": 0.0011630034522439586, "loss": 0.6377, "step": 145470 }, { "epoch": 41.85270425776755, "grad_norm": 0.8472691178321838, "learning_rate": 0.0011629459148446491, "loss": 0.7274, "step": 145480 }, { "epoch": 41.855581127733025, "grad_norm": 0.7690522074699402, "learning_rate": 0.0011628883774453397, "loss": 0.8002, "step": 145490 }, { "epoch": 41.8584579976985, "grad_norm": 1.8882557153701782, "learning_rate": 0.0011628308400460298, "loss": 0.5967, "step": 145500 }, { "epoch": 41.861334867663984, "grad_norm": 1.2612897157669067, "learning_rate": 0.0011627733026467204, "loss": 0.5486, "step": 145510 }, { "epoch": 41.86421173762946, "grad_norm": 1.3655295372009277, "learning_rate": 0.0011627157652474107, "loss": 0.8083, "step": 145520 }, { "epoch": 41.86708860759494, "grad_norm": 2.33687424659729, "learning_rate": 0.0011626582278481013, "loss": 0.6806, "step": 145530 }, { "epoch": 41.86996547756041, "grad_norm": 1.1488028764724731, "learning_rate": 0.0011626006904487919, "loss": 0.5148, "step": 145540 }, { "epoch": 41.87284234752589, "grad_norm": 1.268574833869934, "learning_rate": 0.0011625431530494822, "loss": 0.6467, "step": 145550 }, { "epoch": 41.87571921749137, "grad_norm": 2.1533172130584717, "learning_rate": 0.0011624856156501725, "loss": 0.6774, "step": 145560 }, { "epoch": 41.87859608745685, "grad_norm": 0.5332282185554504, "learning_rate": 0.0011624280782508631, "loss": 0.619, "step": 145570 }, { "epoch": 41.881472957422325, "grad_norm": 0.6636444330215454, "learning_rate": 0.0011623705408515535, "loss": 0.737, "step": 145580 }, { "epoch": 41.8843498273878, "grad_norm": 1.2569063901901245, "learning_rate": 0.001162313003452244, "loss": 0.6877, "step": 145590 }, { "epoch": 41.88722669735328, "grad_norm": 2.1195337772369385, "learning_rate": 0.0011622554660529346, "loss": 0.5706, "step": 145600 }, { "epoch": 41.89010356731876, "grad_norm": 1.7337920665740967, "learning_rate": 0.001162197928653625, "loss": 0.6727, "step": 145610 }, { "epoch": 41.89298043728424, "grad_norm": 0.9227240681648254, "learning_rate": 0.0011621403912543153, "loss": 0.6695, "step": 145620 }, { "epoch": 41.89585730724971, "grad_norm": 0.7178308963775635, "learning_rate": 0.0011620828538550056, "loss": 0.5036, "step": 145630 }, { "epoch": 41.89873417721519, "grad_norm": 0.9686480164527893, "learning_rate": 0.0011620253164556962, "loss": 0.5678, "step": 145640 }, { "epoch": 41.901611047180666, "grad_norm": 1.4272555112838745, "learning_rate": 0.0011619677790563868, "loss": 0.6373, "step": 145650 }, { "epoch": 41.90448791714614, "grad_norm": 2.5517287254333496, "learning_rate": 0.001161910241657077, "loss": 0.5553, "step": 145660 }, { "epoch": 41.907364787111625, "grad_norm": 1.074585199356079, "learning_rate": 0.0011618527042577677, "loss": 0.5117, "step": 145670 }, { "epoch": 41.9102416570771, "grad_norm": 1.0799767971038818, "learning_rate": 0.001161795166858458, "loss": 0.7054, "step": 145680 }, { "epoch": 41.91311852704258, "grad_norm": 0.8855361938476562, "learning_rate": 0.0011617376294591484, "loss": 0.5885, "step": 145690 }, { "epoch": 41.915995397008054, "grad_norm": 0.8480935096740723, "learning_rate": 0.001161680092059839, "loss": 0.5661, "step": 145700 }, { "epoch": 41.91887226697353, "grad_norm": 2.0224661827087402, "learning_rate": 0.0011616225546605295, "loss": 0.7545, "step": 145710 }, { "epoch": 41.92174913693901, "grad_norm": 2.6768527030944824, "learning_rate": 0.0011615650172612198, "loss": 0.7667, "step": 145720 }, { "epoch": 41.92462600690449, "grad_norm": 1.1398937702178955, "learning_rate": 0.0011615074798619104, "loss": 0.5585, "step": 145730 }, { "epoch": 41.927502876869966, "grad_norm": 1.5075767040252686, "learning_rate": 0.0011614499424626005, "loss": 0.6452, "step": 145740 }, { "epoch": 41.93037974683544, "grad_norm": 1.0007151365280151, "learning_rate": 0.001161392405063291, "loss": 0.6204, "step": 145750 }, { "epoch": 41.93325661680092, "grad_norm": 1.1384437084197998, "learning_rate": 0.0011613348676639817, "loss": 0.5806, "step": 145760 }, { "epoch": 41.9361334867664, "grad_norm": 0.8507449626922607, "learning_rate": 0.001161277330264672, "loss": 0.5186, "step": 145770 }, { "epoch": 41.93901035673188, "grad_norm": 1.07515287399292, "learning_rate": 0.0011612197928653626, "loss": 0.5504, "step": 145780 }, { "epoch": 41.941887226697354, "grad_norm": 1.2571405172348022, "learning_rate": 0.0011611622554660531, "loss": 0.6405, "step": 145790 }, { "epoch": 41.94476409666283, "grad_norm": 1.025526523590088, "learning_rate": 0.0011611047180667433, "loss": 0.5559, "step": 145800 }, { "epoch": 41.947640966628306, "grad_norm": 1.521492838859558, "learning_rate": 0.0011610471806674338, "loss": 0.6342, "step": 145810 }, { "epoch": 41.95051783659379, "grad_norm": 1.223250389099121, "learning_rate": 0.0011609896432681244, "loss": 0.6075, "step": 145820 }, { "epoch": 41.953394706559266, "grad_norm": 0.8479483723640442, "learning_rate": 0.0011609321058688147, "loss": 0.6998, "step": 145830 }, { "epoch": 41.95627157652474, "grad_norm": 1.3937747478485107, "learning_rate": 0.0011608745684695053, "loss": 0.8062, "step": 145840 }, { "epoch": 41.95914844649022, "grad_norm": 1.2359896898269653, "learning_rate": 0.0011608170310701959, "loss": 0.6033, "step": 145850 }, { "epoch": 41.962025316455694, "grad_norm": 0.9634897708892822, "learning_rate": 0.001160759493670886, "loss": 0.6142, "step": 145860 }, { "epoch": 41.96490218642117, "grad_norm": 1.1758768558502197, "learning_rate": 0.0011607019562715766, "loss": 0.5434, "step": 145870 }, { "epoch": 41.967779056386654, "grad_norm": 1.9703359603881836, "learning_rate": 0.001160644418872267, "loss": 0.6578, "step": 145880 }, { "epoch": 41.97065592635213, "grad_norm": 2.268798828125, "learning_rate": 0.0011605868814729575, "loss": 0.5076, "step": 145890 }, { "epoch": 41.973532796317606, "grad_norm": 1.7632627487182617, "learning_rate": 0.001160529344073648, "loss": 0.6126, "step": 145900 }, { "epoch": 41.97640966628308, "grad_norm": 0.7682411670684814, "learning_rate": 0.0011604718066743382, "loss": 0.5922, "step": 145910 }, { "epoch": 41.97928653624856, "grad_norm": 1.3083404302597046, "learning_rate": 0.0011604142692750287, "loss": 0.6354, "step": 145920 }, { "epoch": 41.98216340621404, "grad_norm": 1.1494004726409912, "learning_rate": 0.0011603567318757193, "loss": 0.5249, "step": 145930 }, { "epoch": 41.98504027617952, "grad_norm": 2.083163261413574, "learning_rate": 0.0011602991944764096, "loss": 0.6493, "step": 145940 }, { "epoch": 41.987917146144994, "grad_norm": 0.9143192768096924, "learning_rate": 0.0011602416570771002, "loss": 0.5635, "step": 145950 }, { "epoch": 41.99079401611047, "grad_norm": 0.957199215888977, "learning_rate": 0.0011601841196777908, "loss": 0.6209, "step": 145960 }, { "epoch": 41.99367088607595, "grad_norm": 1.2499306201934814, "learning_rate": 0.001160126582278481, "loss": 0.6196, "step": 145970 }, { "epoch": 41.99654775604143, "grad_norm": 0.9298665523529053, "learning_rate": 0.0011600690448791715, "loss": 0.6, "step": 145980 }, { "epoch": 41.999424626006906, "grad_norm": 2.258856773376465, "learning_rate": 0.0011600115074798618, "loss": 0.8349, "step": 145990 }, { "epoch": 42.00230149597238, "grad_norm": 1.2908744812011719, "learning_rate": 0.0011599539700805524, "loss": 0.4515, "step": 146000 }, { "epoch": 42.00517836593786, "grad_norm": 1.7486188411712646, "learning_rate": 0.001159896432681243, "loss": 0.4681, "step": 146010 }, { "epoch": 42.008055235903335, "grad_norm": 1.1220521926879883, "learning_rate": 0.0011598388952819333, "loss": 0.45, "step": 146020 }, { "epoch": 42.01093210586882, "grad_norm": 0.6449015736579895, "learning_rate": 0.0011597813578826236, "loss": 0.6215, "step": 146030 }, { "epoch": 42.013808975834294, "grad_norm": 1.3376268148422241, "learning_rate": 0.0011597238204833142, "loss": 0.7086, "step": 146040 }, { "epoch": 42.01668584579977, "grad_norm": 1.8594609498977661, "learning_rate": 0.0011596662830840045, "loss": 0.5776, "step": 146050 }, { "epoch": 42.01956271576525, "grad_norm": 0.9562433362007141, "learning_rate": 0.001159608745684695, "loss": 0.4927, "step": 146060 }, { "epoch": 42.02243958573072, "grad_norm": 1.3887786865234375, "learning_rate": 0.0011595512082853857, "loss": 0.5138, "step": 146070 }, { "epoch": 42.0253164556962, "grad_norm": 1.2426482439041138, "learning_rate": 0.001159493670886076, "loss": 0.5579, "step": 146080 }, { "epoch": 42.02819332566168, "grad_norm": 2.890864849090576, "learning_rate": 0.0011594361334867664, "loss": 0.6438, "step": 146090 }, { "epoch": 42.03107019562716, "grad_norm": 1.7685197591781616, "learning_rate": 0.0011593785960874567, "loss": 0.5806, "step": 146100 }, { "epoch": 42.033947065592635, "grad_norm": 2.3765366077423096, "learning_rate": 0.0011593210586881473, "loss": 0.6058, "step": 146110 }, { "epoch": 42.03682393555811, "grad_norm": 1.9906879663467407, "learning_rate": 0.0011592635212888378, "loss": 0.5482, "step": 146120 }, { "epoch": 42.03970080552359, "grad_norm": 1.749038577079773, "learning_rate": 0.0011592059838895282, "loss": 0.5798, "step": 146130 }, { "epoch": 42.04257767548907, "grad_norm": 1.0823811292648315, "learning_rate": 0.0011591484464902187, "loss": 0.5943, "step": 146140 }, { "epoch": 42.04545454545455, "grad_norm": 1.3632017374038696, "learning_rate": 0.001159090909090909, "loss": 0.5581, "step": 146150 }, { "epoch": 42.04833141542002, "grad_norm": 2.191030740737915, "learning_rate": 0.0011590333716915994, "loss": 0.5729, "step": 146160 }, { "epoch": 42.0512082853855, "grad_norm": 1.679391622543335, "learning_rate": 0.00115897583429229, "loss": 0.4702, "step": 146170 }, { "epoch": 42.054085155350975, "grad_norm": 1.636789321899414, "learning_rate": 0.0011589182968929806, "loss": 0.6113, "step": 146180 }, { "epoch": 42.05696202531646, "grad_norm": 1.9786041975021362, "learning_rate": 0.001158860759493671, "loss": 0.5116, "step": 146190 }, { "epoch": 42.059838895281935, "grad_norm": 1.4540828466415405, "learning_rate": 0.0011588032220943615, "loss": 0.5823, "step": 146200 }, { "epoch": 42.06271576524741, "grad_norm": 1.8358103036880493, "learning_rate": 0.0011587456846950516, "loss": 0.5942, "step": 146210 }, { "epoch": 42.06559263521289, "grad_norm": 1.3776490688323975, "learning_rate": 0.0011586881472957422, "loss": 0.7307, "step": 146220 }, { "epoch": 42.06846950517836, "grad_norm": 1.3807519674301147, "learning_rate": 0.0011586306098964327, "loss": 0.5026, "step": 146230 }, { "epoch": 42.07134637514385, "grad_norm": 0.8503516912460327, "learning_rate": 0.001158573072497123, "loss": 0.5211, "step": 146240 }, { "epoch": 42.07422324510932, "grad_norm": 0.7464879155158997, "learning_rate": 0.0011585155350978137, "loss": 0.4946, "step": 146250 }, { "epoch": 42.0771001150748, "grad_norm": 1.18947172164917, "learning_rate": 0.0011584579976985042, "loss": 0.588, "step": 146260 }, { "epoch": 42.079976985040275, "grad_norm": 1.171506404876709, "learning_rate": 0.0011584004602991943, "loss": 0.5039, "step": 146270 }, { "epoch": 42.08285385500575, "grad_norm": 1.572199821472168, "learning_rate": 0.001158342922899885, "loss": 0.5164, "step": 146280 }, { "epoch": 42.08573072497123, "grad_norm": 1.3228082656860352, "learning_rate": 0.0011582853855005755, "loss": 0.5502, "step": 146290 }, { "epoch": 42.08860759493671, "grad_norm": 1.825429916381836, "learning_rate": 0.0011582278481012658, "loss": 0.5277, "step": 146300 }, { "epoch": 42.09148446490219, "grad_norm": 0.8249419927597046, "learning_rate": 0.0011581703107019564, "loss": 0.635, "step": 146310 }, { "epoch": 42.09436133486766, "grad_norm": 1.7872505187988281, "learning_rate": 0.0011581127733026467, "loss": 0.6014, "step": 146320 }, { "epoch": 42.09723820483314, "grad_norm": 1.4543976783752441, "learning_rate": 0.001158055235903337, "loss": 0.7315, "step": 146330 }, { "epoch": 42.100115074798616, "grad_norm": 1.7423591613769531, "learning_rate": 0.0011579976985040276, "loss": 0.5866, "step": 146340 }, { "epoch": 42.1029919447641, "grad_norm": 2.5289218425750732, "learning_rate": 0.001157940161104718, "loss": 0.5478, "step": 146350 }, { "epoch": 42.105868814729575, "grad_norm": 1.110267162322998, "learning_rate": 0.0011578826237054086, "loss": 0.69, "step": 146360 }, { "epoch": 42.10874568469505, "grad_norm": 1.4629169702529907, "learning_rate": 0.0011578250863060991, "loss": 0.5769, "step": 146370 }, { "epoch": 42.11162255466053, "grad_norm": 1.1041427850723267, "learning_rate": 0.0011577675489067895, "loss": 0.5889, "step": 146380 }, { "epoch": 42.114499424626004, "grad_norm": 1.0381354093551636, "learning_rate": 0.0011577100115074798, "loss": 0.6159, "step": 146390 }, { "epoch": 42.11737629459149, "grad_norm": 0.8638337850570679, "learning_rate": 0.0011576524741081704, "loss": 0.5535, "step": 146400 }, { "epoch": 42.120253164556964, "grad_norm": 0.6730237603187561, "learning_rate": 0.0011575949367088607, "loss": 0.6352, "step": 146410 }, { "epoch": 42.12313003452244, "grad_norm": 1.2357943058013916, "learning_rate": 0.0011575373993095513, "loss": 0.6277, "step": 146420 }, { "epoch": 42.126006904487916, "grad_norm": 1.8685511350631714, "learning_rate": 0.0011574798619102416, "loss": 0.6284, "step": 146430 }, { "epoch": 42.12888377445339, "grad_norm": 1.519490122795105, "learning_rate": 0.0011574223245109322, "loss": 0.6809, "step": 146440 }, { "epoch": 42.131760644418875, "grad_norm": 1.0410637855529785, "learning_rate": 0.0011573647871116225, "loss": 0.7061, "step": 146450 }, { "epoch": 42.13463751438435, "grad_norm": 0.7432610392570496, "learning_rate": 0.0011573072497123129, "loss": 0.5768, "step": 146460 }, { "epoch": 42.13751438434983, "grad_norm": 0.8164476752281189, "learning_rate": 0.0011572497123130035, "loss": 0.5263, "step": 146470 }, { "epoch": 42.140391254315304, "grad_norm": 0.9142445921897888, "learning_rate": 0.001157192174913694, "loss": 0.6701, "step": 146480 }, { "epoch": 42.14326812428078, "grad_norm": 2.0785837173461914, "learning_rate": 0.0011571346375143844, "loss": 0.64, "step": 146490 }, { "epoch": 42.146144994246264, "grad_norm": 0.7484948635101318, "learning_rate": 0.001157077100115075, "loss": 0.6562, "step": 146500 }, { "epoch": 42.14902186421174, "grad_norm": 1.4166532754898071, "learning_rate": 0.0011570195627157653, "loss": 0.4717, "step": 146510 }, { "epoch": 42.151898734177216, "grad_norm": 1.2056893110275269, "learning_rate": 0.0011569620253164556, "loss": 0.5189, "step": 146520 }, { "epoch": 42.15477560414269, "grad_norm": 1.1941474676132202, "learning_rate": 0.0011569044879171462, "loss": 0.5649, "step": 146530 }, { "epoch": 42.15765247410817, "grad_norm": 0.9235044121742249, "learning_rate": 0.0011568469505178368, "loss": 0.5075, "step": 146540 }, { "epoch": 42.160529344073645, "grad_norm": 1.467197299003601, "learning_rate": 0.001156789413118527, "loss": 0.5687, "step": 146550 }, { "epoch": 42.16340621403913, "grad_norm": 1.336414098739624, "learning_rate": 0.0011567318757192177, "loss": 0.4464, "step": 146560 }, { "epoch": 42.166283084004604, "grad_norm": 0.7745324373245239, "learning_rate": 0.0011566743383199078, "loss": 0.4617, "step": 146570 }, { "epoch": 42.16915995397008, "grad_norm": 1.3255349397659302, "learning_rate": 0.0011566168009205984, "loss": 0.6854, "step": 146580 }, { "epoch": 42.17203682393556, "grad_norm": 1.8770674467086792, "learning_rate": 0.001156559263521289, "loss": 0.6364, "step": 146590 }, { "epoch": 42.17491369390103, "grad_norm": 1.0986065864562988, "learning_rate": 0.0011565017261219793, "loss": 0.5837, "step": 146600 }, { "epoch": 42.177790563866516, "grad_norm": 2.2684056758880615, "learning_rate": 0.0011564441887226698, "loss": 0.5277, "step": 146610 }, { "epoch": 42.18066743383199, "grad_norm": 1.0405571460723877, "learning_rate": 0.0011563866513233604, "loss": 0.4899, "step": 146620 }, { "epoch": 42.18354430379747, "grad_norm": 0.7779875993728638, "learning_rate": 0.0011563291139240505, "loss": 0.6194, "step": 146630 }, { "epoch": 42.186421173762945, "grad_norm": 1.4450002908706665, "learning_rate": 0.001156271576524741, "loss": 0.5302, "step": 146640 }, { "epoch": 42.18929804372842, "grad_norm": 0.7142106294631958, "learning_rate": 0.0011562140391254317, "loss": 0.5037, "step": 146650 }, { "epoch": 42.192174913693904, "grad_norm": 1.0324572324752808, "learning_rate": 0.001156156501726122, "loss": 0.4805, "step": 146660 }, { "epoch": 42.19505178365938, "grad_norm": 0.8021841049194336, "learning_rate": 0.0011560989643268126, "loss": 0.7241, "step": 146670 }, { "epoch": 42.19792865362486, "grad_norm": 1.1459280252456665, "learning_rate": 0.001156041426927503, "loss": 0.4306, "step": 146680 }, { "epoch": 42.20080552359033, "grad_norm": 1.4661064147949219, "learning_rate": 0.0011559838895281933, "loss": 0.6802, "step": 146690 }, { "epoch": 42.20368239355581, "grad_norm": 0.8674758672714233, "learning_rate": 0.0011559263521288838, "loss": 0.481, "step": 146700 }, { "epoch": 42.20655926352129, "grad_norm": 1.669130802154541, "learning_rate": 0.0011558688147295742, "loss": 0.4845, "step": 146710 }, { "epoch": 42.20943613348677, "grad_norm": 2.131192207336426, "learning_rate": 0.0011558112773302647, "loss": 0.603, "step": 146720 }, { "epoch": 42.212313003452245, "grad_norm": 1.0698301792144775, "learning_rate": 0.0011557537399309553, "loss": 0.5895, "step": 146730 }, { "epoch": 42.21518987341772, "grad_norm": 0.9778000712394714, "learning_rate": 0.0011556962025316454, "loss": 0.4952, "step": 146740 }, { "epoch": 42.2180667433832, "grad_norm": 0.6342841982841492, "learning_rate": 0.001155638665132336, "loss": 0.4991, "step": 146750 }, { "epoch": 42.22094361334867, "grad_norm": 0.7543377876281738, "learning_rate": 0.0011555811277330266, "loss": 0.4748, "step": 146760 }, { "epoch": 42.22382048331416, "grad_norm": 1.447847843170166, "learning_rate": 0.001155523590333717, "loss": 0.6009, "step": 146770 }, { "epoch": 42.22669735327963, "grad_norm": 0.643233597278595, "learning_rate": 0.0011554660529344075, "loss": 0.5361, "step": 146780 }, { "epoch": 42.22957422324511, "grad_norm": 0.949222207069397, "learning_rate": 0.0011554085155350978, "loss": 0.5665, "step": 146790 }, { "epoch": 42.232451093210585, "grad_norm": 1.7546718120574951, "learning_rate": 0.0011553509781357882, "loss": 0.5429, "step": 146800 }, { "epoch": 42.23532796317606, "grad_norm": 1.314827799797058, "learning_rate": 0.0011552934407364787, "loss": 0.5299, "step": 146810 }, { "epoch": 42.238204833141545, "grad_norm": 3.189862012863159, "learning_rate": 0.001155235903337169, "loss": 0.5967, "step": 146820 }, { "epoch": 42.24108170310702, "grad_norm": 0.931583046913147, "learning_rate": 0.0011551783659378596, "loss": 0.572, "step": 146830 }, { "epoch": 42.2439585730725, "grad_norm": 0.7654252648353577, "learning_rate": 0.0011551208285385502, "loss": 0.5273, "step": 146840 }, { "epoch": 42.24683544303797, "grad_norm": 0.9165040254592896, "learning_rate": 0.0011550632911392405, "loss": 0.6529, "step": 146850 }, { "epoch": 42.24971231300345, "grad_norm": 1.3097094297409058, "learning_rate": 0.001155005753739931, "loss": 0.5185, "step": 146860 }, { "epoch": 42.25258918296893, "grad_norm": 0.697655200958252, "learning_rate": 0.0011549482163406215, "loss": 0.4868, "step": 146870 }, { "epoch": 42.25546605293441, "grad_norm": 1.2033777236938477, "learning_rate": 0.0011548906789413118, "loss": 0.6362, "step": 146880 }, { "epoch": 42.258342922899885, "grad_norm": 2.5299792289733887, "learning_rate": 0.0011548331415420024, "loss": 0.6429, "step": 146890 }, { "epoch": 42.26121979286536, "grad_norm": 0.8494623303413391, "learning_rate": 0.0011547756041426927, "loss": 0.6518, "step": 146900 }, { "epoch": 42.26409666283084, "grad_norm": 1.0650568008422852, "learning_rate": 0.0011547180667433833, "loss": 0.5623, "step": 146910 }, { "epoch": 42.26697353279632, "grad_norm": 1.1751224994659424, "learning_rate": 0.0011546605293440736, "loss": 0.5139, "step": 146920 }, { "epoch": 42.2698504027618, "grad_norm": 0.9486983418464661, "learning_rate": 0.001154602991944764, "loss": 0.5326, "step": 146930 }, { "epoch": 42.27272727272727, "grad_norm": 1.2894929647445679, "learning_rate": 0.0011545454545454545, "loss": 0.5641, "step": 146940 }, { "epoch": 42.27560414269275, "grad_norm": 2.113497018814087, "learning_rate": 0.001154487917146145, "loss": 0.5791, "step": 146950 }, { "epoch": 42.278481012658226, "grad_norm": 1.8686834573745728, "learning_rate": 0.0011544303797468354, "loss": 0.5675, "step": 146960 }, { "epoch": 42.2813578826237, "grad_norm": 1.041269063949585, "learning_rate": 0.001154372842347526, "loss": 0.5743, "step": 146970 }, { "epoch": 42.284234752589185, "grad_norm": 0.7714723944664001, "learning_rate": 0.0011543153049482164, "loss": 0.4381, "step": 146980 }, { "epoch": 42.28711162255466, "grad_norm": 1.9132708311080933, "learning_rate": 0.0011542577675489067, "loss": 0.7032, "step": 146990 }, { "epoch": 42.28998849252014, "grad_norm": 1.2990050315856934, "learning_rate": 0.0011542002301495973, "loss": 0.5294, "step": 147000 }, { "epoch": 42.292865362485614, "grad_norm": 1.1694520711898804, "learning_rate": 0.0011541426927502876, "loss": 0.7199, "step": 147010 }, { "epoch": 42.29574223245109, "grad_norm": 1.258109450340271, "learning_rate": 0.0011540851553509782, "loss": 0.5103, "step": 147020 }, { "epoch": 42.29861910241657, "grad_norm": 1.175632357597351, "learning_rate": 0.0011540276179516687, "loss": 0.549, "step": 147030 }, { "epoch": 42.30149597238205, "grad_norm": 2.264404773712158, "learning_rate": 0.0011539700805523589, "loss": 0.5284, "step": 147040 }, { "epoch": 42.304372842347526, "grad_norm": 1.6454319953918457, "learning_rate": 0.0011539125431530494, "loss": 0.6611, "step": 147050 }, { "epoch": 42.307249712313, "grad_norm": 1.76193368434906, "learning_rate": 0.00115385500575374, "loss": 0.5477, "step": 147060 }, { "epoch": 42.31012658227848, "grad_norm": 0.9807255864143372, "learning_rate": 0.0011537974683544304, "loss": 0.4595, "step": 147070 }, { "epoch": 42.31300345224396, "grad_norm": 0.768363893032074, "learning_rate": 0.001153739930955121, "loss": 0.6207, "step": 147080 }, { "epoch": 42.31588032220944, "grad_norm": 0.8625784516334534, "learning_rate": 0.0011536823935558115, "loss": 0.5243, "step": 147090 }, { "epoch": 42.318757192174914, "grad_norm": 1.621441125869751, "learning_rate": 0.0011536248561565016, "loss": 0.488, "step": 147100 }, { "epoch": 42.32163406214039, "grad_norm": 0.7038310170173645, "learning_rate": 0.0011535673187571922, "loss": 0.6826, "step": 147110 }, { "epoch": 42.324510932105866, "grad_norm": 1.3937749862670898, "learning_rate": 0.0011535097813578825, "loss": 0.5199, "step": 147120 }, { "epoch": 42.32738780207135, "grad_norm": 1.0160273313522339, "learning_rate": 0.001153452243958573, "loss": 0.5145, "step": 147130 }, { "epoch": 42.330264672036826, "grad_norm": 0.9609178304672241, "learning_rate": 0.0011533947065592636, "loss": 0.544, "step": 147140 }, { "epoch": 42.3331415420023, "grad_norm": 1.6973050832748413, "learning_rate": 0.001153337169159954, "loss": 0.4877, "step": 147150 }, { "epoch": 42.33601841196778, "grad_norm": 1.5274778604507446, "learning_rate": 0.0011532796317606443, "loss": 0.7017, "step": 147160 }, { "epoch": 42.338895281933254, "grad_norm": 0.9787659049034119, "learning_rate": 0.001153222094361335, "loss": 0.656, "step": 147170 }, { "epoch": 42.34177215189873, "grad_norm": 1.4144896268844604, "learning_rate": 0.0011531645569620253, "loss": 0.6174, "step": 147180 }, { "epoch": 42.344649021864214, "grad_norm": 1.6213617324829102, "learning_rate": 0.0011531070195627158, "loss": 0.6002, "step": 147190 }, { "epoch": 42.34752589182969, "grad_norm": 1.0583772659301758, "learning_rate": 0.0011530494821634064, "loss": 0.4473, "step": 147200 }, { "epoch": 42.350402761795166, "grad_norm": 2.225604772567749, "learning_rate": 0.0011529919447640967, "loss": 0.5677, "step": 147210 }, { "epoch": 42.35327963176064, "grad_norm": 0.9842342138290405, "learning_rate": 0.001152934407364787, "loss": 0.5152, "step": 147220 }, { "epoch": 42.35615650172612, "grad_norm": 0.9479960799217224, "learning_rate": 0.0011528768699654776, "loss": 0.5077, "step": 147230 }, { "epoch": 42.3590333716916, "grad_norm": 1.5247050523757935, "learning_rate": 0.001152819332566168, "loss": 0.5118, "step": 147240 }, { "epoch": 42.36191024165708, "grad_norm": 1.4337689876556396, "learning_rate": 0.0011527617951668586, "loss": 0.5741, "step": 147250 }, { "epoch": 42.364787111622555, "grad_norm": 1.3480032682418823, "learning_rate": 0.001152704257767549, "loss": 0.5743, "step": 147260 }, { "epoch": 42.36766398158803, "grad_norm": 1.0718395709991455, "learning_rate": 0.0011526467203682395, "loss": 0.6885, "step": 147270 }, { "epoch": 42.37054085155351, "grad_norm": 1.0048035383224487, "learning_rate": 0.0011525891829689298, "loss": 0.553, "step": 147280 }, { "epoch": 42.37341772151899, "grad_norm": 2.065673351287842, "learning_rate": 0.0011525316455696202, "loss": 0.5407, "step": 147290 }, { "epoch": 42.376294591484466, "grad_norm": 1.979550838470459, "learning_rate": 0.0011524741081703107, "loss": 0.6032, "step": 147300 }, { "epoch": 42.37917146144994, "grad_norm": 1.2616699934005737, "learning_rate": 0.0011524165707710013, "loss": 0.5563, "step": 147310 }, { "epoch": 42.38204833141542, "grad_norm": 1.0724953413009644, "learning_rate": 0.0011523590333716916, "loss": 0.5557, "step": 147320 }, { "epoch": 42.384925201380895, "grad_norm": 0.7934211492538452, "learning_rate": 0.0011523014959723822, "loss": 0.4926, "step": 147330 }, { "epoch": 42.38780207134638, "grad_norm": 1.1156344413757324, "learning_rate": 0.0011522439585730725, "loss": 0.631, "step": 147340 }, { "epoch": 42.390678941311855, "grad_norm": 1.0187749862670898, "learning_rate": 0.0011521864211737629, "loss": 0.4969, "step": 147350 }, { "epoch": 42.39355581127733, "grad_norm": 2.052292585372925, "learning_rate": 0.0011521288837744535, "loss": 0.7184, "step": 147360 }, { "epoch": 42.39643268124281, "grad_norm": 1.5232326984405518, "learning_rate": 0.0011520713463751438, "loss": 0.6157, "step": 147370 }, { "epoch": 42.39930955120828, "grad_norm": 2.1071763038635254, "learning_rate": 0.0011520138089758344, "loss": 0.7691, "step": 147380 }, { "epoch": 42.40218642117377, "grad_norm": 1.8843320608139038, "learning_rate": 0.001151956271576525, "loss": 0.7616, "step": 147390 }, { "epoch": 42.40506329113924, "grad_norm": 1.1509755849838257, "learning_rate": 0.001151898734177215, "loss": 0.561, "step": 147400 }, { "epoch": 42.40794016110472, "grad_norm": 1.0997761487960815, "learning_rate": 0.0011518411967779056, "loss": 0.5554, "step": 147410 }, { "epoch": 42.410817031070195, "grad_norm": 1.0641623735427856, "learning_rate": 0.0011517836593785962, "loss": 0.5671, "step": 147420 }, { "epoch": 42.41369390103567, "grad_norm": 1.5955983400344849, "learning_rate": 0.0011517261219792865, "loss": 0.5228, "step": 147430 }, { "epoch": 42.41657077100115, "grad_norm": 1.0981403589248657, "learning_rate": 0.001151668584579977, "loss": 0.5632, "step": 147440 }, { "epoch": 42.41944764096663, "grad_norm": 1.1055872440338135, "learning_rate": 0.0011516110471806677, "loss": 0.601, "step": 147450 }, { "epoch": 42.42232451093211, "grad_norm": 0.8581045269966125, "learning_rate": 0.0011515535097813578, "loss": 0.6374, "step": 147460 }, { "epoch": 42.42520138089758, "grad_norm": 3.0006003379821777, "learning_rate": 0.0011514959723820484, "loss": 0.7595, "step": 147470 }, { "epoch": 42.42807825086306, "grad_norm": 1.6593842506408691, "learning_rate": 0.0011514384349827387, "loss": 0.6863, "step": 147480 }, { "epoch": 42.430955120828536, "grad_norm": 0.6677425503730774, "learning_rate": 0.0011513808975834293, "loss": 0.5933, "step": 147490 }, { "epoch": 42.43383199079402, "grad_norm": 1.3112082481384277, "learning_rate": 0.0011513233601841198, "loss": 0.5614, "step": 147500 }, { "epoch": 42.436708860759495, "grad_norm": 1.6219853162765503, "learning_rate": 0.0011512658227848102, "loss": 0.5161, "step": 147510 }, { "epoch": 42.43958573072497, "grad_norm": 1.3116739988327026, "learning_rate": 0.0011512082853855005, "loss": 0.4569, "step": 147520 }, { "epoch": 42.44246260069045, "grad_norm": 1.120166540145874, "learning_rate": 0.001151150747986191, "loss": 0.4339, "step": 147530 }, { "epoch": 42.445339470655924, "grad_norm": 1.828979253768921, "learning_rate": 0.0011510932105868814, "loss": 0.6327, "step": 147540 }, { "epoch": 42.44821634062141, "grad_norm": 1.6132917404174805, "learning_rate": 0.001151035673187572, "loss": 0.6967, "step": 147550 }, { "epoch": 42.45109321058688, "grad_norm": 1.0114153623580933, "learning_rate": 0.0011509781357882626, "loss": 0.6029, "step": 147560 }, { "epoch": 42.45397008055236, "grad_norm": 1.7592514753341675, "learning_rate": 0.0011509205983889527, "loss": 0.7247, "step": 147570 }, { "epoch": 42.456846950517836, "grad_norm": 0.8028454184532166, "learning_rate": 0.0011508630609896433, "loss": 0.6277, "step": 147580 }, { "epoch": 42.45972382048331, "grad_norm": 0.8815896511077881, "learning_rate": 0.0011508055235903336, "loss": 0.56, "step": 147590 }, { "epoch": 42.462600690448795, "grad_norm": 1.4746326208114624, "learning_rate": 0.0011507479861910242, "loss": 0.5835, "step": 147600 }, { "epoch": 42.46547756041427, "grad_norm": 1.410830020904541, "learning_rate": 0.0011506904487917147, "loss": 0.598, "step": 147610 }, { "epoch": 42.46835443037975, "grad_norm": 1.1233583688735962, "learning_rate": 0.001150632911392405, "loss": 0.587, "step": 147620 }, { "epoch": 42.471231300345224, "grad_norm": 0.9878201484680176, "learning_rate": 0.0011505753739930954, "loss": 0.5118, "step": 147630 }, { "epoch": 42.4741081703107, "grad_norm": 0.8807927370071411, "learning_rate": 0.001150517836593786, "loss": 0.534, "step": 147640 }, { "epoch": 42.476985040276176, "grad_norm": 0.8692641258239746, "learning_rate": 0.0011504602991944763, "loss": 0.5424, "step": 147650 }, { "epoch": 42.47986191024166, "grad_norm": 1.452514886856079, "learning_rate": 0.001150402761795167, "loss": 0.6873, "step": 147660 }, { "epoch": 42.482738780207136, "grad_norm": 1.9024838209152222, "learning_rate": 0.0011503452243958575, "loss": 0.6336, "step": 147670 }, { "epoch": 42.48561565017261, "grad_norm": 1.8639856576919556, "learning_rate": 0.0011502876869965478, "loss": 0.8348, "step": 147680 }, { "epoch": 42.48849252013809, "grad_norm": 2.0905401706695557, "learning_rate": 0.0011502301495972382, "loss": 0.6186, "step": 147690 }, { "epoch": 42.491369390103564, "grad_norm": 1.5635805130004883, "learning_rate": 0.0011501726121979285, "loss": 0.6781, "step": 147700 }, { "epoch": 42.49424626006905, "grad_norm": 1.4015642404556274, "learning_rate": 0.001150115074798619, "loss": 0.5776, "step": 147710 }, { "epoch": 42.497123130034524, "grad_norm": 0.8371996879577637, "learning_rate": 0.0011500575373993096, "loss": 0.7987, "step": 147720 }, { "epoch": 42.5, "grad_norm": 0.5066792964935303, "learning_rate": 0.00115, "loss": 0.5761, "step": 147730 }, { "epoch": 42.502876869965476, "grad_norm": 0.7752453684806824, "learning_rate": 0.0011499424626006905, "loss": 0.6609, "step": 147740 }, { "epoch": 42.50575373993095, "grad_norm": 1.5334738492965698, "learning_rate": 0.0011498849252013809, "loss": 0.5192, "step": 147750 }, { "epoch": 42.508630609896436, "grad_norm": 1.209053635597229, "learning_rate": 0.0011498273878020712, "loss": 0.5263, "step": 147760 }, { "epoch": 42.51150747986191, "grad_norm": 1.6921446323394775, "learning_rate": 0.0011497698504027618, "loss": 0.6063, "step": 147770 }, { "epoch": 42.51438434982739, "grad_norm": 2.1117660999298096, "learning_rate": 0.0011497123130034524, "loss": 0.6893, "step": 147780 }, { "epoch": 42.517261219792864, "grad_norm": 1.6822420358657837, "learning_rate": 0.0011496547756041427, "loss": 0.5794, "step": 147790 }, { "epoch": 42.52013808975834, "grad_norm": 2.6667778491973877, "learning_rate": 0.0011495972382048333, "loss": 0.6792, "step": 147800 }, { "epoch": 42.523014959723824, "grad_norm": 1.0580195188522339, "learning_rate": 0.0011495397008055236, "loss": 0.5672, "step": 147810 }, { "epoch": 42.5258918296893, "grad_norm": 1.2960795164108276, "learning_rate": 0.001149482163406214, "loss": 0.7063, "step": 147820 }, { "epoch": 42.528768699654776, "grad_norm": 0.9010213613510132, "learning_rate": 0.0011494246260069045, "loss": 0.4754, "step": 147830 }, { "epoch": 42.53164556962025, "grad_norm": 1.3519256114959717, "learning_rate": 0.0011493670886075949, "loss": 0.5442, "step": 147840 }, { "epoch": 42.53452243958573, "grad_norm": 1.3909730911254883, "learning_rate": 0.0011493095512082854, "loss": 0.7433, "step": 147850 }, { "epoch": 42.537399309551205, "grad_norm": 2.5334396362304688, "learning_rate": 0.001149252013808976, "loss": 0.6267, "step": 147860 }, { "epoch": 42.54027617951669, "grad_norm": 1.5620996952056885, "learning_rate": 0.0011491944764096661, "loss": 0.601, "step": 147870 }, { "epoch": 42.543153049482164, "grad_norm": 0.9148750901222229, "learning_rate": 0.0011491369390103567, "loss": 0.4457, "step": 147880 }, { "epoch": 42.54602991944764, "grad_norm": 2.0857231616973877, "learning_rate": 0.0011490794016110473, "loss": 0.7106, "step": 147890 }, { "epoch": 42.54890678941312, "grad_norm": 3.0948894023895264, "learning_rate": 0.0011490218642117376, "loss": 0.7041, "step": 147900 }, { "epoch": 42.55178365937859, "grad_norm": 1.5602957010269165, "learning_rate": 0.0011489643268124282, "loss": 0.6566, "step": 147910 }, { "epoch": 42.554660529344076, "grad_norm": 0.9224087595939636, "learning_rate": 0.0011489067894131187, "loss": 0.6022, "step": 147920 }, { "epoch": 42.55753739930955, "grad_norm": 1.1487618684768677, "learning_rate": 0.0011488492520138089, "loss": 0.5758, "step": 147930 }, { "epoch": 42.56041426927503, "grad_norm": 1.2363892793655396, "learning_rate": 0.0011487917146144994, "loss": 0.5504, "step": 147940 }, { "epoch": 42.563291139240505, "grad_norm": 1.1292766332626343, "learning_rate": 0.0011487341772151898, "loss": 0.6547, "step": 147950 }, { "epoch": 42.56616800920598, "grad_norm": 1.3286733627319336, "learning_rate": 0.0011486766398158803, "loss": 0.676, "step": 147960 }, { "epoch": 42.569044879171464, "grad_norm": 0.7576092481613159, "learning_rate": 0.001148619102416571, "loss": 0.461, "step": 147970 }, { "epoch": 42.57192174913694, "grad_norm": 0.7447943687438965, "learning_rate": 0.0011485615650172613, "loss": 0.6594, "step": 147980 }, { "epoch": 42.57479861910242, "grad_norm": 1.9377816915512085, "learning_rate": 0.0011485040276179516, "loss": 0.6766, "step": 147990 }, { "epoch": 42.57767548906789, "grad_norm": 1.448338270187378, "learning_rate": 0.0011484464902186422, "loss": 0.7721, "step": 148000 }, { "epoch": 42.58055235903337, "grad_norm": 0.8758957386016846, "learning_rate": 0.0011483889528193325, "loss": 0.536, "step": 148010 }, { "epoch": 42.58342922899885, "grad_norm": 0.9248679876327515, "learning_rate": 0.001148331415420023, "loss": 0.6134, "step": 148020 }, { "epoch": 42.58630609896433, "grad_norm": 0.8351538181304932, "learning_rate": 0.0011482738780207136, "loss": 0.5017, "step": 148030 }, { "epoch": 42.589182968929805, "grad_norm": 2.008467674255371, "learning_rate": 0.001148216340621404, "loss": 0.6068, "step": 148040 }, { "epoch": 42.59205983889528, "grad_norm": 1.7696534395217896, "learning_rate": 0.0011481588032220943, "loss": 0.6947, "step": 148050 }, { "epoch": 42.59493670886076, "grad_norm": 0.9478296041488647, "learning_rate": 0.0011481012658227847, "loss": 0.586, "step": 148060 }, { "epoch": 42.59781357882623, "grad_norm": 0.7573837637901306, "learning_rate": 0.0011480437284234753, "loss": 0.6731, "step": 148070 }, { "epoch": 42.60069044879172, "grad_norm": 0.8981243968009949, "learning_rate": 0.0011479861910241658, "loss": 0.5119, "step": 148080 }, { "epoch": 42.60356731875719, "grad_norm": 1.114439606666565, "learning_rate": 0.0011479286536248562, "loss": 0.5885, "step": 148090 }, { "epoch": 42.60644418872267, "grad_norm": 1.930594563484192, "learning_rate": 0.0011478711162255467, "loss": 0.6082, "step": 148100 }, { "epoch": 42.609321058688145, "grad_norm": 0.7968553900718689, "learning_rate": 0.001147813578826237, "loss": 0.6157, "step": 148110 }, { "epoch": 42.61219792865362, "grad_norm": 2.1926896572113037, "learning_rate": 0.0011477560414269274, "loss": 0.5184, "step": 148120 }, { "epoch": 42.615074798619105, "grad_norm": 1.4011040925979614, "learning_rate": 0.001147698504027618, "loss": 0.656, "step": 148130 }, { "epoch": 42.61795166858458, "grad_norm": 1.567828893661499, "learning_rate": 0.0011476409666283085, "loss": 0.6422, "step": 148140 }, { "epoch": 42.62082853855006, "grad_norm": 1.6246670484542847, "learning_rate": 0.001147583429228999, "loss": 0.6074, "step": 148150 }, { "epoch": 42.623705408515534, "grad_norm": 0.8082448840141296, "learning_rate": 0.0011475258918296895, "loss": 0.5494, "step": 148160 }, { "epoch": 42.62658227848101, "grad_norm": 1.8002573251724243, "learning_rate": 0.0011474683544303796, "loss": 0.5642, "step": 148170 }, { "epoch": 42.62945914844649, "grad_norm": 1.1985182762145996, "learning_rate": 0.0011474108170310702, "loss": 0.5902, "step": 148180 }, { "epoch": 42.63233601841197, "grad_norm": 0.7618874311447144, "learning_rate": 0.0011473532796317607, "loss": 0.6309, "step": 148190 }, { "epoch": 42.635212888377445, "grad_norm": 1.1596535444259644, "learning_rate": 0.001147295742232451, "loss": 0.5972, "step": 148200 }, { "epoch": 42.63808975834292, "grad_norm": 1.280030608177185, "learning_rate": 0.0011472382048331416, "loss": 0.6409, "step": 148210 }, { "epoch": 42.6409666283084, "grad_norm": 1.3377681970596313, "learning_rate": 0.0011471806674338322, "loss": 0.5932, "step": 148220 }, { "epoch": 42.64384349827388, "grad_norm": 2.2309908866882324, "learning_rate": 0.0011471231300345223, "loss": 0.6215, "step": 148230 }, { "epoch": 42.64672036823936, "grad_norm": 1.5585073232650757, "learning_rate": 0.0011470655926352129, "loss": 0.5091, "step": 148240 }, { "epoch": 42.649597238204834, "grad_norm": 1.3008921146392822, "learning_rate": 0.0011470080552359034, "loss": 0.6536, "step": 148250 }, { "epoch": 42.65247410817031, "grad_norm": 1.8254146575927734, "learning_rate": 0.0011469505178365938, "loss": 0.6469, "step": 148260 }, { "epoch": 42.655350978135786, "grad_norm": 0.6666128039360046, "learning_rate": 0.0011468929804372844, "loss": 0.6349, "step": 148270 }, { "epoch": 42.65822784810126, "grad_norm": 1.5220593214035034, "learning_rate": 0.0011468354430379747, "loss": 0.6235, "step": 148280 }, { "epoch": 42.661104718066746, "grad_norm": 1.1797008514404297, "learning_rate": 0.001146777905638665, "loss": 0.5912, "step": 148290 }, { "epoch": 42.66398158803222, "grad_norm": 1.448172688484192, "learning_rate": 0.0011467203682393556, "loss": 0.7554, "step": 148300 }, { "epoch": 42.6668584579977, "grad_norm": 0.9205918312072754, "learning_rate": 0.001146662830840046, "loss": 0.6769, "step": 148310 }, { "epoch": 42.669735327963174, "grad_norm": 0.8304944038391113, "learning_rate": 0.0011466052934407365, "loss": 0.5979, "step": 148320 }, { "epoch": 42.67261219792865, "grad_norm": 1.1495524644851685, "learning_rate": 0.001146547756041427, "loss": 0.7521, "step": 148330 }, { "epoch": 42.675489067894134, "grad_norm": 1.3847382068634033, "learning_rate": 0.0011464902186421174, "loss": 0.5456, "step": 148340 }, { "epoch": 42.67836593785961, "grad_norm": 1.6062763929367065, "learning_rate": 0.0011464326812428078, "loss": 0.7135, "step": 148350 }, { "epoch": 42.681242807825086, "grad_norm": 0.7216130495071411, "learning_rate": 0.0011463751438434984, "loss": 0.5053, "step": 148360 }, { "epoch": 42.68411967779056, "grad_norm": 1.597683310508728, "learning_rate": 0.0011463176064441887, "loss": 0.6206, "step": 148370 }, { "epoch": 42.68699654775604, "grad_norm": 1.7555615901947021, "learning_rate": 0.0011462600690448793, "loss": 0.737, "step": 148380 }, { "epoch": 42.68987341772152, "grad_norm": 1.7827377319335938, "learning_rate": 0.0011462025316455696, "loss": 0.6017, "step": 148390 }, { "epoch": 42.692750287687, "grad_norm": 1.6715532541275024, "learning_rate": 0.00114614499424626, "loss": 0.5551, "step": 148400 }, { "epoch": 42.695627157652474, "grad_norm": 1.6825690269470215, "learning_rate": 0.0011460874568469505, "loss": 0.5319, "step": 148410 }, { "epoch": 42.69850402761795, "grad_norm": 0.9994091391563416, "learning_rate": 0.0011460299194476409, "loss": 0.4246, "step": 148420 }, { "epoch": 42.70138089758343, "grad_norm": 1.0483464002609253, "learning_rate": 0.0011459723820483314, "loss": 0.7215, "step": 148430 }, { "epoch": 42.70425776754891, "grad_norm": 1.2878308296203613, "learning_rate": 0.001145914844649022, "loss": 0.7746, "step": 148440 }, { "epoch": 42.707134637514386, "grad_norm": 3.0045342445373535, "learning_rate": 0.0011458573072497123, "loss": 0.5624, "step": 148450 }, { "epoch": 42.71001150747986, "grad_norm": 1.9284528493881226, "learning_rate": 0.0011457997698504027, "loss": 0.602, "step": 148460 }, { "epoch": 42.71288837744534, "grad_norm": 0.6166881322860718, "learning_rate": 0.0011457422324510933, "loss": 0.6287, "step": 148470 }, { "epoch": 42.715765247410815, "grad_norm": 2.019540309906006, "learning_rate": 0.0011456846950517836, "loss": 0.5405, "step": 148480 }, { "epoch": 42.7186421173763, "grad_norm": 1.868080496788025, "learning_rate": 0.0011456271576524742, "loss": 0.6578, "step": 148490 }, { "epoch": 42.721518987341774, "grad_norm": 0.7655522227287292, "learning_rate": 0.0011455696202531647, "loss": 0.5697, "step": 148500 }, { "epoch": 42.72439585730725, "grad_norm": 1.208164930343628, "learning_rate": 0.001145512082853855, "loss": 0.7477, "step": 148510 }, { "epoch": 42.72727272727273, "grad_norm": 0.6655400395393372, "learning_rate": 0.0011454545454545454, "loss": 0.5548, "step": 148520 }, { "epoch": 42.7301495972382, "grad_norm": 1.6672853231430054, "learning_rate": 0.0011453970080552358, "loss": 0.6681, "step": 148530 }, { "epoch": 42.73302646720368, "grad_norm": 1.8016395568847656, "learning_rate": 0.0011453394706559263, "loss": 0.692, "step": 148540 }, { "epoch": 42.73590333716916, "grad_norm": 1.3057150840759277, "learning_rate": 0.001145281933256617, "loss": 0.5073, "step": 148550 }, { "epoch": 42.73878020713464, "grad_norm": 1.3913562297821045, "learning_rate": 0.0011452243958573072, "loss": 0.7027, "step": 148560 }, { "epoch": 42.741657077100115, "grad_norm": 1.6244745254516602, "learning_rate": 0.0011451668584579978, "loss": 0.5548, "step": 148570 }, { "epoch": 42.74453394706559, "grad_norm": 2.0637073516845703, "learning_rate": 0.0011451093210586882, "loss": 0.688, "step": 148580 }, { "epoch": 42.74741081703107, "grad_norm": 2.3189871311187744, "learning_rate": 0.0011450517836593785, "loss": 0.6509, "step": 148590 }, { "epoch": 42.75028768699655, "grad_norm": 1.3449078798294067, "learning_rate": 0.001144994246260069, "loss": 0.5451, "step": 148600 }, { "epoch": 42.75316455696203, "grad_norm": 1.589569091796875, "learning_rate": 0.0011449367088607596, "loss": 0.6783, "step": 148610 }, { "epoch": 42.7560414269275, "grad_norm": 0.52900230884552, "learning_rate": 0.00114487917146145, "loss": 0.5432, "step": 148620 }, { "epoch": 42.75891829689298, "grad_norm": 1.7605410814285278, "learning_rate": 0.0011448216340621405, "loss": 0.8581, "step": 148630 }, { "epoch": 42.761795166858455, "grad_norm": 0.6366732716560364, "learning_rate": 0.0011447640966628307, "loss": 0.7446, "step": 148640 }, { "epoch": 42.76467203682394, "grad_norm": 0.6956754922866821, "learning_rate": 0.0011447065592635212, "loss": 0.4985, "step": 148650 }, { "epoch": 42.767548906789415, "grad_norm": 1.4546395540237427, "learning_rate": 0.0011446490218642118, "loss": 0.5903, "step": 148660 }, { "epoch": 42.77042577675489, "grad_norm": 1.697417140007019, "learning_rate": 0.0011445914844649021, "loss": 0.8558, "step": 148670 }, { "epoch": 42.77330264672037, "grad_norm": 2.21299409866333, "learning_rate": 0.0011445339470655927, "loss": 0.5416, "step": 148680 }, { "epoch": 42.77617951668584, "grad_norm": 0.5562913417816162, "learning_rate": 0.0011444764096662833, "loss": 0.4245, "step": 148690 }, { "epoch": 42.77905638665133, "grad_norm": 1.247200846672058, "learning_rate": 0.0011444188722669734, "loss": 0.5055, "step": 148700 }, { "epoch": 42.7819332566168, "grad_norm": 0.9716946482658386, "learning_rate": 0.001144361334867664, "loss": 0.5292, "step": 148710 }, { "epoch": 42.78481012658228, "grad_norm": 0.7329412698745728, "learning_rate": 0.0011443037974683545, "loss": 0.5883, "step": 148720 }, { "epoch": 42.787686996547755, "grad_norm": 0.9232075810432434, "learning_rate": 0.0011442462600690449, "loss": 0.6101, "step": 148730 }, { "epoch": 42.79056386651323, "grad_norm": 1.004019856452942, "learning_rate": 0.0011441887226697354, "loss": 0.5106, "step": 148740 }, { "epoch": 42.79344073647871, "grad_norm": 0.7950924634933472, "learning_rate": 0.0011441311852704258, "loss": 0.686, "step": 148750 }, { "epoch": 42.79631760644419, "grad_norm": 0.8804298639297485, "learning_rate": 0.0011440736478711161, "loss": 0.5512, "step": 148760 }, { "epoch": 42.79919447640967, "grad_norm": 1.7751930952072144, "learning_rate": 0.0011440161104718067, "loss": 0.5756, "step": 148770 }, { "epoch": 42.80207134637514, "grad_norm": 1.6051541566848755, "learning_rate": 0.001143958573072497, "loss": 0.6339, "step": 148780 }, { "epoch": 42.80494821634062, "grad_norm": 0.6845955848693848, "learning_rate": 0.0011439010356731876, "loss": 0.5744, "step": 148790 }, { "epoch": 42.807825086306096, "grad_norm": 0.8630478978157043, "learning_rate": 0.0011438434982738782, "loss": 0.6173, "step": 148800 }, { "epoch": 42.81070195627158, "grad_norm": 0.9706284403800964, "learning_rate": 0.0011437859608745685, "loss": 0.718, "step": 148810 }, { "epoch": 42.813578826237055, "grad_norm": 2.6556947231292725, "learning_rate": 0.0011437284234752589, "loss": 0.5864, "step": 148820 }, { "epoch": 42.81645569620253, "grad_norm": 1.6103228330612183, "learning_rate": 0.0011436708860759494, "loss": 0.6834, "step": 148830 }, { "epoch": 42.81933256616801, "grad_norm": 0.6356338858604431, "learning_rate": 0.0011436133486766398, "loss": 0.5698, "step": 148840 }, { "epoch": 42.822209436133484, "grad_norm": 1.2885046005249023, "learning_rate": 0.0011435558112773303, "loss": 0.623, "step": 148850 }, { "epoch": 42.82508630609897, "grad_norm": 1.5992878675460815, "learning_rate": 0.0011434982738780207, "loss": 0.6626, "step": 148860 }, { "epoch": 42.82796317606444, "grad_norm": 0.7567806839942932, "learning_rate": 0.0011434407364787113, "loss": 0.6627, "step": 148870 }, { "epoch": 42.83084004602992, "grad_norm": 1.3312031030654907, "learning_rate": 0.0011433831990794016, "loss": 0.4965, "step": 148880 }, { "epoch": 42.833716915995396, "grad_norm": 1.3491744995117188, "learning_rate": 0.001143325661680092, "loss": 0.5615, "step": 148890 }, { "epoch": 42.83659378596087, "grad_norm": 1.1099905967712402, "learning_rate": 0.0011432681242807825, "loss": 0.6011, "step": 148900 }, { "epoch": 42.839470655926355, "grad_norm": 1.1613353490829468, "learning_rate": 0.001143210586881473, "loss": 0.5217, "step": 148910 }, { "epoch": 42.84234752589183, "grad_norm": 0.9559850692749023, "learning_rate": 0.0011431530494821634, "loss": 0.7478, "step": 148920 }, { "epoch": 42.84522439585731, "grad_norm": 2.3885345458984375, "learning_rate": 0.001143095512082854, "loss": 0.652, "step": 148930 }, { "epoch": 42.848101265822784, "grad_norm": 1.4763847589492798, "learning_rate": 0.0011430379746835443, "loss": 0.6539, "step": 148940 }, { "epoch": 42.85097813578826, "grad_norm": 1.0366849899291992, "learning_rate": 0.0011429804372842347, "loss": 0.5571, "step": 148950 }, { "epoch": 42.85385500575374, "grad_norm": 1.599227786064148, "learning_rate": 0.0011429228998849252, "loss": 0.6251, "step": 148960 }, { "epoch": 42.85673187571922, "grad_norm": 2.119070291519165, "learning_rate": 0.0011428653624856156, "loss": 0.5628, "step": 148970 }, { "epoch": 42.859608745684696, "grad_norm": 1.1557910442352295, "learning_rate": 0.0011428078250863062, "loss": 0.7212, "step": 148980 }, { "epoch": 42.86248561565017, "grad_norm": 1.3163789510726929, "learning_rate": 0.0011427502876869967, "loss": 0.5988, "step": 148990 }, { "epoch": 42.86536248561565, "grad_norm": 1.3667690753936768, "learning_rate": 0.0011426927502876869, "loss": 0.5593, "step": 149000 }, { "epoch": 42.868239355581125, "grad_norm": 1.9273959398269653, "learning_rate": 0.0011426352128883774, "loss": 0.656, "step": 149010 }, { "epoch": 42.87111622554661, "grad_norm": 1.4674168825149536, "learning_rate": 0.001142577675489068, "loss": 0.6047, "step": 149020 }, { "epoch": 42.873993095512084, "grad_norm": 0.6945445537567139, "learning_rate": 0.0011425201380897583, "loss": 0.5356, "step": 149030 }, { "epoch": 42.87686996547756, "grad_norm": 1.133489727973938, "learning_rate": 0.001142462600690449, "loss": 0.6289, "step": 149040 }, { "epoch": 42.879746835443036, "grad_norm": 2.084089517593384, "learning_rate": 0.0011424050632911395, "loss": 0.4749, "step": 149050 }, { "epoch": 42.88262370540851, "grad_norm": 1.1049916744232178, "learning_rate": 0.0011423475258918296, "loss": 0.5545, "step": 149060 }, { "epoch": 42.885500575373996, "grad_norm": 0.9543839693069458, "learning_rate": 0.0011422899884925202, "loss": 0.6419, "step": 149070 }, { "epoch": 42.88837744533947, "grad_norm": 1.4126136302947998, "learning_rate": 0.0011422324510932105, "loss": 0.5638, "step": 149080 }, { "epoch": 42.89125431530495, "grad_norm": 1.4898977279663086, "learning_rate": 0.001142174913693901, "loss": 0.5859, "step": 149090 }, { "epoch": 42.894131185270425, "grad_norm": 1.0991699695587158, "learning_rate": 0.0011421173762945916, "loss": 0.5196, "step": 149100 }, { "epoch": 42.8970080552359, "grad_norm": 1.0083822011947632, "learning_rate": 0.001142059838895282, "loss": 0.4839, "step": 149110 }, { "epoch": 42.899884925201384, "grad_norm": 1.6004881858825684, "learning_rate": 0.0011420023014959723, "loss": 0.5144, "step": 149120 }, { "epoch": 42.90276179516686, "grad_norm": 0.8322696685791016, "learning_rate": 0.0011419447640966629, "loss": 0.6059, "step": 149130 }, { "epoch": 42.90563866513234, "grad_norm": 1.2497060298919678, "learning_rate": 0.0011418872266973532, "loss": 0.6371, "step": 149140 }, { "epoch": 42.90851553509781, "grad_norm": 0.9703688621520996, "learning_rate": 0.0011418296892980438, "loss": 0.5349, "step": 149150 }, { "epoch": 42.91139240506329, "grad_norm": 0.73880535364151, "learning_rate": 0.0011417721518987344, "loss": 0.6598, "step": 149160 }, { "epoch": 42.91426927502877, "grad_norm": 1.003406047821045, "learning_rate": 0.0011417146144994247, "loss": 0.5075, "step": 149170 }, { "epoch": 42.91714614499425, "grad_norm": 2.6954360008239746, "learning_rate": 0.001141657077100115, "loss": 0.6616, "step": 149180 }, { "epoch": 42.920023014959725, "grad_norm": 1.1844353675842285, "learning_rate": 0.0011415995397008056, "loss": 0.635, "step": 149190 }, { "epoch": 42.9228998849252, "grad_norm": 1.6879477500915527, "learning_rate": 0.001141542002301496, "loss": 0.7712, "step": 149200 }, { "epoch": 42.92577675489068, "grad_norm": 0.8938877582550049, "learning_rate": 0.0011414844649021865, "loss": 0.6064, "step": 149210 }, { "epoch": 42.92865362485615, "grad_norm": 0.8654124736785889, "learning_rate": 0.0011414269275028769, "loss": 0.5353, "step": 149220 }, { "epoch": 42.93153049482164, "grad_norm": 0.9258270263671875, "learning_rate": 0.0011413693901035672, "loss": 0.543, "step": 149230 }, { "epoch": 42.93440736478711, "grad_norm": 1.4326214790344238, "learning_rate": 0.0011413118527042578, "loss": 0.6354, "step": 149240 }, { "epoch": 42.93728423475259, "grad_norm": 0.8815916776657104, "learning_rate": 0.0011412543153049481, "loss": 0.7281, "step": 149250 }, { "epoch": 42.940161104718065, "grad_norm": 1.010915994644165, "learning_rate": 0.0011411967779056387, "loss": 0.6506, "step": 149260 }, { "epoch": 42.94303797468354, "grad_norm": 0.9294273853302002, "learning_rate": 0.0011411392405063293, "loss": 0.5528, "step": 149270 }, { "epoch": 42.945914844649025, "grad_norm": 1.967900037765503, "learning_rate": 0.0011410817031070196, "loss": 0.5872, "step": 149280 }, { "epoch": 42.9487917146145, "grad_norm": 1.7148141860961914, "learning_rate": 0.00114102416570771, "loss": 0.4904, "step": 149290 }, { "epoch": 42.95166858457998, "grad_norm": 2.255366086959839, "learning_rate": 0.0011409666283084005, "loss": 0.7789, "step": 149300 }, { "epoch": 42.95454545454545, "grad_norm": 1.4478230476379395, "learning_rate": 0.0011409090909090909, "loss": 0.4886, "step": 149310 }, { "epoch": 42.95742232451093, "grad_norm": 1.2769991159439087, "learning_rate": 0.0011408515535097814, "loss": 0.7604, "step": 149320 }, { "epoch": 42.96029919447641, "grad_norm": 0.9462365508079529, "learning_rate": 0.0011407940161104718, "loss": 0.7133, "step": 149330 }, { "epoch": 42.96317606444189, "grad_norm": 2.0061779022216797, "learning_rate": 0.0011407364787111623, "loss": 0.6629, "step": 149340 }, { "epoch": 42.966052934407365, "grad_norm": 0.9885314106941223, "learning_rate": 0.0011406789413118527, "loss": 0.6152, "step": 149350 }, { "epoch": 42.96892980437284, "grad_norm": 1.227712631225586, "learning_rate": 0.001140621403912543, "loss": 0.7099, "step": 149360 }, { "epoch": 42.97180667433832, "grad_norm": 0.6396909356117249, "learning_rate": 0.0011405638665132336, "loss": 0.5265, "step": 149370 }, { "epoch": 42.9746835443038, "grad_norm": 1.8109666109085083, "learning_rate": 0.0011405063291139242, "loss": 0.6032, "step": 149380 }, { "epoch": 42.97756041426928, "grad_norm": 1.6192247867584229, "learning_rate": 0.0011404487917146145, "loss": 0.54, "step": 149390 }, { "epoch": 42.98043728423475, "grad_norm": 0.928032398223877, "learning_rate": 0.001140391254315305, "loss": 0.5774, "step": 149400 }, { "epoch": 42.98331415420023, "grad_norm": 2.2102909088134766, "learning_rate": 0.0011403337169159954, "loss": 0.6477, "step": 149410 }, { "epoch": 42.986191024165706, "grad_norm": 0.9183003306388855, "learning_rate": 0.0011402761795166858, "loss": 0.5925, "step": 149420 }, { "epoch": 42.98906789413118, "grad_norm": 1.6477649211883545, "learning_rate": 0.0011402186421173763, "loss": 0.5667, "step": 149430 }, { "epoch": 42.991944764096665, "grad_norm": 0.6612658500671387, "learning_rate": 0.0011401611047180667, "loss": 0.5819, "step": 149440 }, { "epoch": 42.99482163406214, "grad_norm": 1.7936112880706787, "learning_rate": 0.0011401035673187572, "loss": 0.6618, "step": 149450 }, { "epoch": 42.99769850402762, "grad_norm": 1.3010181188583374, "learning_rate": 0.0011400460299194478, "loss": 0.6304, "step": 149460 }, { "epoch": 43.000575373993094, "grad_norm": 1.528557300567627, "learning_rate": 0.001139988492520138, "loss": 0.5127, "step": 149470 }, { "epoch": 43.00345224395857, "grad_norm": 1.3744611740112305, "learning_rate": 0.0011399309551208285, "loss": 0.4787, "step": 149480 }, { "epoch": 43.00632911392405, "grad_norm": 1.1959748268127441, "learning_rate": 0.001139873417721519, "loss": 0.5611, "step": 149490 }, { "epoch": 43.00920598388953, "grad_norm": 0.8861094117164612, "learning_rate": 0.0011398158803222094, "loss": 0.478, "step": 149500 }, { "epoch": 43.012082853855006, "grad_norm": 0.9505797028541565, "learning_rate": 0.0011397583429229, "loss": 0.5146, "step": 149510 }, { "epoch": 43.01495972382048, "grad_norm": 2.142923355102539, "learning_rate": 0.0011397008055235905, "loss": 0.5105, "step": 149520 }, { "epoch": 43.01783659378596, "grad_norm": 0.8091637492179871, "learning_rate": 0.0011396432681242807, "loss": 0.357, "step": 149530 }, { "epoch": 43.02071346375144, "grad_norm": 1.028327465057373, "learning_rate": 0.0011395857307249712, "loss": 0.5266, "step": 149540 }, { "epoch": 43.02359033371692, "grad_norm": 1.1365580558776855, "learning_rate": 0.0011395281933256616, "loss": 0.5364, "step": 149550 }, { "epoch": 43.026467203682394, "grad_norm": 0.9707778096199036, "learning_rate": 0.0011394706559263521, "loss": 0.5427, "step": 149560 }, { "epoch": 43.02934407364787, "grad_norm": 1.1177051067352295, "learning_rate": 0.0011394131185270427, "loss": 0.7663, "step": 149570 }, { "epoch": 43.032220943613346, "grad_norm": 1.6278420686721802, "learning_rate": 0.001139355581127733, "loss": 0.5704, "step": 149580 }, { "epoch": 43.03509781357883, "grad_norm": 0.9436597228050232, "learning_rate": 0.0011392980437284234, "loss": 0.4586, "step": 149590 }, { "epoch": 43.037974683544306, "grad_norm": 1.5625677108764648, "learning_rate": 0.001139240506329114, "loss": 0.6053, "step": 149600 }, { "epoch": 43.04085155350978, "grad_norm": 1.861481785774231, "learning_rate": 0.0011391829689298043, "loss": 0.6127, "step": 149610 }, { "epoch": 43.04372842347526, "grad_norm": 1.4779622554779053, "learning_rate": 0.0011391254315304949, "loss": 0.5755, "step": 149620 }, { "epoch": 43.046605293440734, "grad_norm": 2.023521900177002, "learning_rate": 0.0011390678941311854, "loss": 0.5318, "step": 149630 }, { "epoch": 43.04948216340621, "grad_norm": 0.9913136959075928, "learning_rate": 0.0011390103567318758, "loss": 0.4605, "step": 149640 }, { "epoch": 43.052359033371694, "grad_norm": 1.5370423793792725, "learning_rate": 0.0011389528193325661, "loss": 0.4203, "step": 149650 }, { "epoch": 43.05523590333717, "grad_norm": 1.4795774221420288, "learning_rate": 0.0011388952819332565, "loss": 0.485, "step": 149660 }, { "epoch": 43.058112773302646, "grad_norm": 0.8566302061080933, "learning_rate": 0.001138837744533947, "loss": 0.6095, "step": 149670 }, { "epoch": 43.06098964326812, "grad_norm": 0.7220605611801147, "learning_rate": 0.0011387802071346376, "loss": 0.527, "step": 149680 }, { "epoch": 43.0638665132336, "grad_norm": 1.156201720237732, "learning_rate": 0.001138722669735328, "loss": 0.5313, "step": 149690 }, { "epoch": 43.06674338319908, "grad_norm": 0.8377506732940674, "learning_rate": 0.0011386651323360185, "loss": 0.5076, "step": 149700 }, { "epoch": 43.06962025316456, "grad_norm": 1.2186956405639648, "learning_rate": 0.0011386075949367089, "loss": 0.4974, "step": 149710 }, { "epoch": 43.072497123130034, "grad_norm": 2.2840354442596436, "learning_rate": 0.0011385500575373992, "loss": 0.5099, "step": 149720 }, { "epoch": 43.07537399309551, "grad_norm": 1.189957618713379, "learning_rate": 0.0011384925201380898, "loss": 0.662, "step": 149730 }, { "epoch": 43.07825086306099, "grad_norm": 1.3749589920043945, "learning_rate": 0.0011384349827387803, "loss": 0.7008, "step": 149740 }, { "epoch": 43.08112773302647, "grad_norm": 2.145465135574341, "learning_rate": 0.0011383774453394707, "loss": 0.5435, "step": 149750 }, { "epoch": 43.084004602991946, "grad_norm": 1.1689163446426392, "learning_rate": 0.0011383199079401613, "loss": 0.5424, "step": 149760 }, { "epoch": 43.08688147295742, "grad_norm": 1.0048032999038696, "learning_rate": 0.0011382623705408514, "loss": 0.6013, "step": 149770 }, { "epoch": 43.0897583429229, "grad_norm": 1.529023289680481, "learning_rate": 0.001138204833141542, "loss": 0.5919, "step": 149780 }, { "epoch": 43.092635212888375, "grad_norm": 1.4046090841293335, "learning_rate": 0.0011381472957422325, "loss": 0.5647, "step": 149790 }, { "epoch": 43.09551208285386, "grad_norm": 1.8447391986846924, "learning_rate": 0.0011380897583429229, "loss": 0.8254, "step": 149800 }, { "epoch": 43.098388952819334, "grad_norm": 1.4220709800720215, "learning_rate": 0.0011380322209436134, "loss": 0.5205, "step": 149810 }, { "epoch": 43.10126582278481, "grad_norm": 1.843914270401001, "learning_rate": 0.001137974683544304, "loss": 0.5806, "step": 149820 }, { "epoch": 43.10414269275029, "grad_norm": 1.2245798110961914, "learning_rate": 0.0011379171461449941, "loss": 0.6851, "step": 149830 }, { "epoch": 43.10701956271576, "grad_norm": 0.9484842419624329, "learning_rate": 0.0011378596087456847, "loss": 0.5818, "step": 149840 }, { "epoch": 43.10989643268124, "grad_norm": 0.8424268960952759, "learning_rate": 0.0011378020713463752, "loss": 0.657, "step": 149850 }, { "epoch": 43.11277330264672, "grad_norm": 1.4561090469360352, "learning_rate": 0.0011377445339470656, "loss": 0.5468, "step": 149860 }, { "epoch": 43.1156501726122, "grad_norm": 1.850246548652649, "learning_rate": 0.0011376869965477562, "loss": 0.642, "step": 149870 }, { "epoch": 43.118527042577675, "grad_norm": 0.8557595014572144, "learning_rate": 0.0011376294591484467, "loss": 0.4966, "step": 149880 }, { "epoch": 43.12140391254315, "grad_norm": 1.5812174081802368, "learning_rate": 0.0011375719217491369, "loss": 0.4974, "step": 149890 }, { "epoch": 43.12428078250863, "grad_norm": 1.4403913021087646, "learning_rate": 0.0011375143843498274, "loss": 0.5677, "step": 149900 }, { "epoch": 43.12715765247411, "grad_norm": 0.9283823370933533, "learning_rate": 0.0011374568469505178, "loss": 0.4749, "step": 149910 }, { "epoch": 43.13003452243959, "grad_norm": 1.651453971862793, "learning_rate": 0.0011373993095512083, "loss": 0.6272, "step": 149920 }, { "epoch": 43.13291139240506, "grad_norm": 2.4133732318878174, "learning_rate": 0.0011373417721518989, "loss": 0.5638, "step": 149930 }, { "epoch": 43.13578826237054, "grad_norm": 1.163543939590454, "learning_rate": 0.0011372842347525892, "loss": 0.4811, "step": 149940 }, { "epoch": 43.138665132336016, "grad_norm": 1.464872121810913, "learning_rate": 0.0011372266973532796, "loss": 0.6075, "step": 149950 }, { "epoch": 43.1415420023015, "grad_norm": 1.0930736064910889, "learning_rate": 0.0011371691599539701, "loss": 0.5204, "step": 149960 }, { "epoch": 43.144418872266975, "grad_norm": 2.0450851917266846, "learning_rate": 0.0011371116225546605, "loss": 0.6701, "step": 149970 }, { "epoch": 43.14729574223245, "grad_norm": 0.7408958077430725, "learning_rate": 0.001137054085155351, "loss": 0.453, "step": 149980 }, { "epoch": 43.15017261219793, "grad_norm": 1.6567538976669312, "learning_rate": 0.0011369965477560416, "loss": 0.5097, "step": 149990 }, { "epoch": 43.153049482163404, "grad_norm": 2.085573673248291, "learning_rate": 0.001136939010356732, "loss": 0.6894, "step": 150000 }, { "epoch": 43.15592635212889, "grad_norm": 2.020494222640991, "learning_rate": 0.0011368814729574223, "loss": 0.5399, "step": 150010 }, { "epoch": 43.15880322209436, "grad_norm": 1.8052241802215576, "learning_rate": 0.0011368239355581127, "loss": 0.5262, "step": 150020 }, { "epoch": 43.16168009205984, "grad_norm": 1.3414500951766968, "learning_rate": 0.0011367663981588032, "loss": 0.5362, "step": 150030 }, { "epoch": 43.164556962025316, "grad_norm": 0.6259375810623169, "learning_rate": 0.0011367088607594938, "loss": 0.6155, "step": 150040 }, { "epoch": 43.16743383199079, "grad_norm": 1.1697238683700562, "learning_rate": 0.0011366513233601841, "loss": 0.5907, "step": 150050 }, { "epoch": 43.170310701956275, "grad_norm": 1.9961559772491455, "learning_rate": 0.0011365937859608745, "loss": 0.677, "step": 150060 }, { "epoch": 43.17318757192175, "grad_norm": 0.9414137601852417, "learning_rate": 0.001136536248561565, "loss": 0.5287, "step": 150070 }, { "epoch": 43.17606444188723, "grad_norm": 1.0986756086349487, "learning_rate": 0.0011364787111622554, "loss": 0.4613, "step": 150080 }, { "epoch": 43.178941311852704, "grad_norm": 1.177215814590454, "learning_rate": 0.001136421173762946, "loss": 0.7493, "step": 150090 }, { "epoch": 43.18181818181818, "grad_norm": 0.8150240778923035, "learning_rate": 0.0011363636363636365, "loss": 0.6008, "step": 150100 }, { "epoch": 43.184695051783656, "grad_norm": 1.4703569412231445, "learning_rate": 0.0011363060989643269, "loss": 0.4757, "step": 150110 }, { "epoch": 43.18757192174914, "grad_norm": 1.087619662284851, "learning_rate": 0.0011362485615650172, "loss": 0.6382, "step": 150120 }, { "epoch": 43.190448791714616, "grad_norm": 1.3601446151733398, "learning_rate": 0.0011361910241657076, "loss": 0.4889, "step": 150130 }, { "epoch": 43.19332566168009, "grad_norm": 0.9133602380752563, "learning_rate": 0.0011361334867663981, "loss": 0.5408, "step": 150140 }, { "epoch": 43.19620253164557, "grad_norm": 0.6995880603790283, "learning_rate": 0.0011360759493670887, "loss": 0.5687, "step": 150150 }, { "epoch": 43.199079401611044, "grad_norm": 1.0406588315963745, "learning_rate": 0.001136018411967779, "loss": 0.4571, "step": 150160 }, { "epoch": 43.20195627157653, "grad_norm": 1.9611799716949463, "learning_rate": 0.0011359608745684696, "loss": 0.6422, "step": 150170 }, { "epoch": 43.204833141542004, "grad_norm": 1.971500039100647, "learning_rate": 0.00113590333716916, "loss": 0.6345, "step": 150180 }, { "epoch": 43.20771001150748, "grad_norm": 1.96961510181427, "learning_rate": 0.0011358457997698503, "loss": 0.6373, "step": 150190 }, { "epoch": 43.210586881472956, "grad_norm": 1.1597838401794434, "learning_rate": 0.0011357882623705409, "loss": 0.6219, "step": 150200 }, { "epoch": 43.21346375143843, "grad_norm": 1.297189712524414, "learning_rate": 0.0011357307249712314, "loss": 0.5079, "step": 150210 }, { "epoch": 43.216340621403916, "grad_norm": 1.2859901189804077, "learning_rate": 0.0011356731875719218, "loss": 0.6381, "step": 150220 }, { "epoch": 43.21921749136939, "grad_norm": 0.963326632976532, "learning_rate": 0.0011356156501726123, "loss": 0.5211, "step": 150230 }, { "epoch": 43.22209436133487, "grad_norm": 2.9291629791259766, "learning_rate": 0.0011355581127733025, "loss": 0.676, "step": 150240 }, { "epoch": 43.224971231300344, "grad_norm": 1.4971524477005005, "learning_rate": 0.001135500575373993, "loss": 0.585, "step": 150250 }, { "epoch": 43.22784810126582, "grad_norm": 1.8244024515151978, "learning_rate": 0.0011354430379746836, "loss": 0.5284, "step": 150260 }, { "epoch": 43.230724971231304, "grad_norm": 2.1288256645202637, "learning_rate": 0.001135385500575374, "loss": 0.6498, "step": 150270 }, { "epoch": 43.23360184119678, "grad_norm": 1.1154882907867432, "learning_rate": 0.0011353279631760645, "loss": 0.5692, "step": 150280 }, { "epoch": 43.236478711162256, "grad_norm": 1.3137820959091187, "learning_rate": 0.001135270425776755, "loss": 0.5259, "step": 150290 }, { "epoch": 43.23935558112773, "grad_norm": 0.8064895868301392, "learning_rate": 0.0011352128883774452, "loss": 0.4962, "step": 150300 }, { "epoch": 43.24223245109321, "grad_norm": 1.0585018396377563, "learning_rate": 0.0011351553509781358, "loss": 0.5844, "step": 150310 }, { "epoch": 43.245109321058685, "grad_norm": 1.2752132415771484, "learning_rate": 0.0011350978135788263, "loss": 0.5063, "step": 150320 }, { "epoch": 43.24798619102417, "grad_norm": 1.2874329090118408, "learning_rate": 0.0011350402761795167, "loss": 0.6901, "step": 150330 }, { "epoch": 43.250863060989644, "grad_norm": 1.0748392343521118, "learning_rate": 0.0011349827387802072, "loss": 0.5358, "step": 150340 }, { "epoch": 43.25373993095512, "grad_norm": 1.76883065700531, "learning_rate": 0.0011349252013808976, "loss": 0.6592, "step": 150350 }, { "epoch": 43.2566168009206, "grad_norm": 0.896681547164917, "learning_rate": 0.001134867663981588, "loss": 0.5598, "step": 150360 }, { "epoch": 43.25949367088607, "grad_norm": 2.043303966522217, "learning_rate": 0.0011348101265822785, "loss": 0.7381, "step": 150370 }, { "epoch": 43.262370540851556, "grad_norm": 1.729781150817871, "learning_rate": 0.0011347525891829688, "loss": 0.5002, "step": 150380 }, { "epoch": 43.26524741081703, "grad_norm": 1.3782883882522583, "learning_rate": 0.0011346950517836594, "loss": 0.6402, "step": 150390 }, { "epoch": 43.26812428078251, "grad_norm": 1.2247562408447266, "learning_rate": 0.00113463751438435, "loss": 0.4148, "step": 150400 }, { "epoch": 43.271001150747985, "grad_norm": 0.7926607131958008, "learning_rate": 0.0011345799769850403, "loss": 0.4406, "step": 150410 }, { "epoch": 43.27387802071346, "grad_norm": 1.1987841129302979, "learning_rate": 0.0011345224395857307, "loss": 0.6029, "step": 150420 }, { "epoch": 43.276754890678944, "grad_norm": 1.0102498531341553, "learning_rate": 0.0011344649021864212, "loss": 0.5474, "step": 150430 }, { "epoch": 43.27963176064442, "grad_norm": 2.1439030170440674, "learning_rate": 0.0011344073647871116, "loss": 0.6195, "step": 150440 }, { "epoch": 43.2825086306099, "grad_norm": 1.2060185670852661, "learning_rate": 0.0011343498273878021, "loss": 0.536, "step": 150450 }, { "epoch": 43.28538550057537, "grad_norm": 1.204984188079834, "learning_rate": 0.0011342922899884927, "loss": 0.589, "step": 150460 }, { "epoch": 43.28826237054085, "grad_norm": 1.1763063669204712, "learning_rate": 0.001134234752589183, "loss": 0.5643, "step": 150470 }, { "epoch": 43.29113924050633, "grad_norm": 0.8725758194923401, "learning_rate": 0.0011341772151898734, "loss": 0.4974, "step": 150480 }, { "epoch": 43.29401611047181, "grad_norm": 0.8061342239379883, "learning_rate": 0.0011341196777905637, "loss": 0.5692, "step": 150490 }, { "epoch": 43.296892980437285, "grad_norm": 2.0805294513702393, "learning_rate": 0.0011340621403912543, "loss": 0.5978, "step": 150500 }, { "epoch": 43.29976985040276, "grad_norm": 0.9470042586326599, "learning_rate": 0.0011340046029919449, "loss": 0.5416, "step": 150510 }, { "epoch": 43.30264672036824, "grad_norm": 0.9311071038246155, "learning_rate": 0.0011339470655926352, "loss": 0.6462, "step": 150520 }, { "epoch": 43.30552359033371, "grad_norm": 1.137307047843933, "learning_rate": 0.0011338895281933258, "loss": 0.5793, "step": 150530 }, { "epoch": 43.3084004602992, "grad_norm": 1.728010892868042, "learning_rate": 0.0011338319907940161, "loss": 0.5991, "step": 150540 }, { "epoch": 43.31127733026467, "grad_norm": 2.092661142349243, "learning_rate": 0.0011337744533947065, "loss": 0.5774, "step": 150550 }, { "epoch": 43.31415420023015, "grad_norm": 0.88137286901474, "learning_rate": 0.001133716915995397, "loss": 0.4967, "step": 150560 }, { "epoch": 43.317031070195625, "grad_norm": 1.6055787801742554, "learning_rate": 0.0011336593785960876, "loss": 0.6717, "step": 150570 }, { "epoch": 43.3199079401611, "grad_norm": 1.9226982593536377, "learning_rate": 0.001133601841196778, "loss": 0.6338, "step": 150580 }, { "epoch": 43.322784810126585, "grad_norm": 2.1383252143859863, "learning_rate": 0.0011335443037974685, "loss": 0.5551, "step": 150590 }, { "epoch": 43.32566168009206, "grad_norm": 1.627279281616211, "learning_rate": 0.0011334867663981586, "loss": 0.5447, "step": 150600 }, { "epoch": 43.32853855005754, "grad_norm": 1.4463257789611816, "learning_rate": 0.0011334292289988492, "loss": 0.544, "step": 150610 }, { "epoch": 43.33141542002301, "grad_norm": 1.7414617538452148, "learning_rate": 0.0011333716915995398, "loss": 0.6229, "step": 150620 }, { "epoch": 43.33429228998849, "grad_norm": 1.1211024522781372, "learning_rate": 0.0011333141542002301, "loss": 0.7026, "step": 150630 }, { "epoch": 43.33716915995397, "grad_norm": 0.6803088784217834, "learning_rate": 0.0011332566168009207, "loss": 0.583, "step": 150640 }, { "epoch": 43.34004602991945, "grad_norm": 1.5408072471618652, "learning_rate": 0.0011331990794016113, "loss": 0.6046, "step": 150650 }, { "epoch": 43.342922899884925, "grad_norm": 1.0338772535324097, "learning_rate": 0.0011331415420023014, "loss": 0.7692, "step": 150660 }, { "epoch": 43.3457997698504, "grad_norm": 1.671456217765808, "learning_rate": 0.001133084004602992, "loss": 0.6111, "step": 150670 }, { "epoch": 43.34867663981588, "grad_norm": 2.392505645751953, "learning_rate": 0.0011330264672036825, "loss": 0.6241, "step": 150680 }, { "epoch": 43.35155350978136, "grad_norm": 0.9607836008071899, "learning_rate": 0.0011329689298043729, "loss": 0.6552, "step": 150690 }, { "epoch": 43.35443037974684, "grad_norm": 1.3503837585449219, "learning_rate": 0.0011329113924050634, "loss": 0.5928, "step": 150700 }, { "epoch": 43.35730724971231, "grad_norm": 2.5405309200286865, "learning_rate": 0.0011328538550057538, "loss": 0.5826, "step": 150710 }, { "epoch": 43.36018411967779, "grad_norm": 2.4969565868377686, "learning_rate": 0.0011327963176064441, "loss": 0.5869, "step": 150720 }, { "epoch": 43.363060989643266, "grad_norm": 1.1503328084945679, "learning_rate": 0.0011327387802071347, "loss": 0.5989, "step": 150730 }, { "epoch": 43.36593785960875, "grad_norm": 1.051801085472107, "learning_rate": 0.001132681242807825, "loss": 0.5739, "step": 150740 }, { "epoch": 43.368814729574225, "grad_norm": 0.9042471051216125, "learning_rate": 0.0011326237054085156, "loss": 0.5819, "step": 150750 }, { "epoch": 43.3716915995397, "grad_norm": 1.2947893142700195, "learning_rate": 0.0011325661680092062, "loss": 0.7582, "step": 150760 }, { "epoch": 43.37456846950518, "grad_norm": 1.3310590982437134, "learning_rate": 0.0011325086306098965, "loss": 0.5512, "step": 150770 }, { "epoch": 43.377445339470654, "grad_norm": 1.087477445602417, "learning_rate": 0.0011324510932105868, "loss": 0.5932, "step": 150780 }, { "epoch": 43.38032220943613, "grad_norm": 2.213256359100342, "learning_rate": 0.0011323935558112774, "loss": 0.6933, "step": 150790 }, { "epoch": 43.383199079401614, "grad_norm": 2.2331674098968506, "learning_rate": 0.0011323360184119678, "loss": 0.7249, "step": 150800 }, { "epoch": 43.38607594936709, "grad_norm": 1.3511343002319336, "learning_rate": 0.0011322784810126583, "loss": 0.4728, "step": 150810 }, { "epoch": 43.388952819332566, "grad_norm": 1.288111925125122, "learning_rate": 0.0011322209436133487, "loss": 0.4869, "step": 150820 }, { "epoch": 43.39182968929804, "grad_norm": 1.652740716934204, "learning_rate": 0.0011321634062140392, "loss": 0.6511, "step": 150830 }, { "epoch": 43.39470655926352, "grad_norm": 0.7490584254264832, "learning_rate": 0.0011321058688147296, "loss": 0.5808, "step": 150840 }, { "epoch": 43.397583429229, "grad_norm": 0.908640444278717, "learning_rate": 0.00113204833141542, "loss": 0.5767, "step": 150850 }, { "epoch": 43.40046029919448, "grad_norm": 2.107147455215454, "learning_rate": 0.0011319907940161105, "loss": 0.6386, "step": 150860 }, { "epoch": 43.403337169159954, "grad_norm": 0.8198642730712891, "learning_rate": 0.001131933256616801, "loss": 0.589, "step": 150870 }, { "epoch": 43.40621403912543, "grad_norm": 2.1333510875701904, "learning_rate": 0.0011318757192174914, "loss": 0.709, "step": 150880 }, { "epoch": 43.40909090909091, "grad_norm": 1.106026291847229, "learning_rate": 0.0011318181818181818, "loss": 0.5505, "step": 150890 }, { "epoch": 43.41196777905639, "grad_norm": 0.5558889508247375, "learning_rate": 0.0011317606444188723, "loss": 0.6103, "step": 150900 }, { "epoch": 43.414844649021866, "grad_norm": 1.4791285991668701, "learning_rate": 0.0011317031070195627, "loss": 0.4594, "step": 150910 }, { "epoch": 43.41772151898734, "grad_norm": 5.261424541473389, "learning_rate": 0.0011316455696202532, "loss": 0.5472, "step": 150920 }, { "epoch": 43.42059838895282, "grad_norm": 0.6068573594093323, "learning_rate": 0.0011315880322209436, "loss": 0.5606, "step": 150930 }, { "epoch": 43.423475258918295, "grad_norm": 1.686147928237915, "learning_rate": 0.0011315304948216341, "loss": 0.5841, "step": 150940 }, { "epoch": 43.42635212888378, "grad_norm": 1.3159235715866089, "learning_rate": 0.0011314729574223245, "loss": 0.5627, "step": 150950 }, { "epoch": 43.429228998849254, "grad_norm": 2.2294583320617676, "learning_rate": 0.0011314154200230148, "loss": 0.5418, "step": 150960 }, { "epoch": 43.43210586881473, "grad_norm": 4.702817440032959, "learning_rate": 0.0011313578826237054, "loss": 0.6783, "step": 150970 }, { "epoch": 43.43498273878021, "grad_norm": 1.5217705965042114, "learning_rate": 0.001131300345224396, "loss": 0.6466, "step": 150980 }, { "epoch": 43.43785960874568, "grad_norm": 2.746288299560547, "learning_rate": 0.0011312428078250863, "loss": 0.777, "step": 150990 }, { "epoch": 43.44073647871116, "grad_norm": 1.462040662765503, "learning_rate": 0.0011311852704257769, "loss": 0.6303, "step": 151000 }, { "epoch": 43.44361334867664, "grad_norm": 1.038284420967102, "learning_rate": 0.0011311277330264672, "loss": 0.5214, "step": 151010 }, { "epoch": 43.44649021864212, "grad_norm": 1.357619285583496, "learning_rate": 0.0011310701956271576, "loss": 0.5961, "step": 151020 }, { "epoch": 43.449367088607595, "grad_norm": 0.6139394044876099, "learning_rate": 0.0011310126582278481, "loss": 0.7552, "step": 151030 }, { "epoch": 43.45224395857307, "grad_norm": 1.4439924955368042, "learning_rate": 0.0011309551208285385, "loss": 0.5031, "step": 151040 }, { "epoch": 43.45512082853855, "grad_norm": 0.8119112849235535, "learning_rate": 0.001130897583429229, "loss": 0.5446, "step": 151050 }, { "epoch": 43.45799769850403, "grad_norm": 1.1841707229614258, "learning_rate": 0.0011308400460299196, "loss": 0.4563, "step": 151060 }, { "epoch": 43.46087456846951, "grad_norm": 1.5013889074325562, "learning_rate": 0.0011307825086306097, "loss": 0.5901, "step": 151070 }, { "epoch": 43.46375143843498, "grad_norm": 1.0521425008773804, "learning_rate": 0.0011307249712313003, "loss": 0.5862, "step": 151080 }, { "epoch": 43.46662830840046, "grad_norm": 2.2579352855682373, "learning_rate": 0.0011306674338319909, "loss": 0.5631, "step": 151090 }, { "epoch": 43.469505178365935, "grad_norm": 0.8671956062316895, "learning_rate": 0.0011306098964326812, "loss": 0.5214, "step": 151100 }, { "epoch": 43.47238204833142, "grad_norm": 1.007672905921936, "learning_rate": 0.0011305523590333718, "loss": 0.6697, "step": 151110 }, { "epoch": 43.475258918296895, "grad_norm": 2.229574203491211, "learning_rate": 0.0011304948216340623, "loss": 0.6766, "step": 151120 }, { "epoch": 43.47813578826237, "grad_norm": 1.4002786874771118, "learning_rate": 0.0011304372842347525, "loss": 0.5645, "step": 151130 }, { "epoch": 43.48101265822785, "grad_norm": 1.0903468132019043, "learning_rate": 0.001130379746835443, "loss": 0.6128, "step": 151140 }, { "epoch": 43.48388952819332, "grad_norm": 1.437510371208191, "learning_rate": 0.0011303222094361336, "loss": 0.5855, "step": 151150 }, { "epoch": 43.48676639815881, "grad_norm": 1.2020325660705566, "learning_rate": 0.001130264672036824, "loss": 0.6568, "step": 151160 }, { "epoch": 43.48964326812428, "grad_norm": 1.035928726196289, "learning_rate": 0.0011302071346375145, "loss": 0.5652, "step": 151170 }, { "epoch": 43.49252013808976, "grad_norm": 0.9057309627532959, "learning_rate": 0.0011301495972382049, "loss": 0.6529, "step": 151180 }, { "epoch": 43.495397008055235, "grad_norm": 1.2878727912902832, "learning_rate": 0.0011300920598388952, "loss": 0.5393, "step": 151190 }, { "epoch": 43.49827387802071, "grad_norm": 0.7185171842575073, "learning_rate": 0.0011300345224395858, "loss": 0.6759, "step": 151200 }, { "epoch": 43.50115074798619, "grad_norm": 1.0339505672454834, "learning_rate": 0.0011299769850402761, "loss": 0.5124, "step": 151210 }, { "epoch": 43.50402761795167, "grad_norm": 0.8861956000328064, "learning_rate": 0.0011299194476409667, "loss": 0.5758, "step": 151220 }, { "epoch": 43.50690448791715, "grad_norm": 1.4363309144973755, "learning_rate": 0.0011298619102416572, "loss": 0.5557, "step": 151230 }, { "epoch": 43.50978135788262, "grad_norm": 1.2386327981948853, "learning_rate": 0.0011298043728423476, "loss": 0.6157, "step": 151240 }, { "epoch": 43.5126582278481, "grad_norm": 0.8062719106674194, "learning_rate": 0.001129746835443038, "loss": 0.533, "step": 151250 }, { "epoch": 43.515535097813576, "grad_norm": 0.9153886437416077, "learning_rate": 0.0011296892980437285, "loss": 0.6436, "step": 151260 }, { "epoch": 43.51841196777906, "grad_norm": 1.6073975563049316, "learning_rate": 0.0011296317606444188, "loss": 0.7085, "step": 151270 }, { "epoch": 43.521288837744535, "grad_norm": 1.5296828746795654, "learning_rate": 0.0011295742232451094, "loss": 0.6812, "step": 151280 }, { "epoch": 43.52416570771001, "grad_norm": 1.311601161956787, "learning_rate": 0.0011295166858457998, "loss": 0.627, "step": 151290 }, { "epoch": 43.52704257767549, "grad_norm": 1.0978752374649048, "learning_rate": 0.0011294591484464903, "loss": 0.5483, "step": 151300 }, { "epoch": 43.529919447640964, "grad_norm": 2.556550979614258, "learning_rate": 0.0011294016110471807, "loss": 0.5666, "step": 151310 }, { "epoch": 43.53279631760645, "grad_norm": 1.246984601020813, "learning_rate": 0.001129344073647871, "loss": 0.6327, "step": 151320 }, { "epoch": 43.53567318757192, "grad_norm": 1.3204807043075562, "learning_rate": 0.0011292865362485616, "loss": 0.6104, "step": 151330 }, { "epoch": 43.5385500575374, "grad_norm": 1.2268526554107666, "learning_rate": 0.0011292289988492521, "loss": 0.5558, "step": 151340 }, { "epoch": 43.541426927502876, "grad_norm": 1.2563501596450806, "learning_rate": 0.0011291714614499425, "loss": 0.6459, "step": 151350 }, { "epoch": 43.54430379746835, "grad_norm": 1.6788992881774902, "learning_rate": 0.001129113924050633, "loss": 0.5545, "step": 151360 }, { "epoch": 43.547180667433835, "grad_norm": 0.9873321056365967, "learning_rate": 0.0011290563866513234, "loss": 0.5042, "step": 151370 }, { "epoch": 43.55005753739931, "grad_norm": 1.9267857074737549, "learning_rate": 0.0011289988492520137, "loss": 0.553, "step": 151380 }, { "epoch": 43.55293440736479, "grad_norm": 1.9204528331756592, "learning_rate": 0.0011289413118527043, "loss": 0.6983, "step": 151390 }, { "epoch": 43.555811277330264, "grad_norm": 1.4446728229522705, "learning_rate": 0.0011288837744533947, "loss": 0.5429, "step": 151400 }, { "epoch": 43.55868814729574, "grad_norm": 1.3301141262054443, "learning_rate": 0.0011288262370540852, "loss": 0.6066, "step": 151410 }, { "epoch": 43.561565017261216, "grad_norm": 2.1066887378692627, "learning_rate": 0.0011287686996547758, "loss": 0.5377, "step": 151420 }, { "epoch": 43.5644418872267, "grad_norm": 1.1746782064437866, "learning_rate": 0.001128711162255466, "loss": 0.7285, "step": 151430 }, { "epoch": 43.567318757192176, "grad_norm": 1.0201506614685059, "learning_rate": 0.0011286536248561565, "loss": 0.5563, "step": 151440 }, { "epoch": 43.57019562715765, "grad_norm": 1.5301743745803833, "learning_rate": 0.001128596087456847, "loss": 0.5688, "step": 151450 }, { "epoch": 43.57307249712313, "grad_norm": 0.8118834495544434, "learning_rate": 0.0011285385500575374, "loss": 0.4776, "step": 151460 }, { "epoch": 43.575949367088604, "grad_norm": 0.7624797821044922, "learning_rate": 0.001128481012658228, "loss": 0.5122, "step": 151470 }, { "epoch": 43.57882623705409, "grad_norm": 0.7751355171203613, "learning_rate": 0.0011284234752589185, "loss": 0.5003, "step": 151480 }, { "epoch": 43.581703107019564, "grad_norm": 1.019641637802124, "learning_rate": 0.0011283659378596086, "loss": 0.6776, "step": 151490 }, { "epoch": 43.58457997698504, "grad_norm": 1.1781225204467773, "learning_rate": 0.0011283084004602992, "loss": 0.6598, "step": 151500 }, { "epoch": 43.587456846950516, "grad_norm": 1.4593391418457031, "learning_rate": 0.0011282508630609896, "loss": 0.6677, "step": 151510 }, { "epoch": 43.59033371691599, "grad_norm": 1.3871310949325562, "learning_rate": 0.0011281933256616801, "loss": 0.5656, "step": 151520 }, { "epoch": 43.593210586881476, "grad_norm": 0.9578132033348083, "learning_rate": 0.0011281357882623707, "loss": 0.4956, "step": 151530 }, { "epoch": 43.59608745684695, "grad_norm": 0.5835226774215698, "learning_rate": 0.001128078250863061, "loss": 0.4998, "step": 151540 }, { "epoch": 43.59896432681243, "grad_norm": 0.8710330128669739, "learning_rate": 0.0011280207134637514, "loss": 0.5322, "step": 151550 }, { "epoch": 43.601841196777904, "grad_norm": 1.0869534015655518, "learning_rate": 0.001127963176064442, "loss": 0.6008, "step": 151560 }, { "epoch": 43.60471806674338, "grad_norm": 1.6356959342956543, "learning_rate": 0.0011279056386651323, "loss": 0.6549, "step": 151570 }, { "epoch": 43.607594936708864, "grad_norm": 1.2508150339126587, "learning_rate": 0.0011278481012658229, "loss": 0.7978, "step": 151580 }, { "epoch": 43.61047180667434, "grad_norm": 1.2314754724502563, "learning_rate": 0.0011277905638665134, "loss": 0.5781, "step": 151590 }, { "epoch": 43.613348676639816, "grad_norm": 1.7782646417617798, "learning_rate": 0.0011277330264672038, "loss": 0.5616, "step": 151600 }, { "epoch": 43.61622554660529, "grad_norm": 1.5833321809768677, "learning_rate": 0.0011276754890678941, "loss": 0.6193, "step": 151610 }, { "epoch": 43.61910241657077, "grad_norm": 0.8131325840950012, "learning_rate": 0.0011276179516685845, "loss": 0.4871, "step": 151620 }, { "epoch": 43.621979286536245, "grad_norm": 2.1632978916168213, "learning_rate": 0.001127560414269275, "loss": 0.5603, "step": 151630 }, { "epoch": 43.62485615650173, "grad_norm": 1.0292787551879883, "learning_rate": 0.0011275028768699656, "loss": 0.669, "step": 151640 }, { "epoch": 43.627733026467205, "grad_norm": 1.3123515844345093, "learning_rate": 0.001127445339470656, "loss": 0.6129, "step": 151650 }, { "epoch": 43.63060989643268, "grad_norm": 1.599539875984192, "learning_rate": 0.0011273878020713465, "loss": 0.5709, "step": 151660 }, { "epoch": 43.63348676639816, "grad_norm": 0.9657867550849915, "learning_rate": 0.0011273302646720368, "loss": 0.6377, "step": 151670 }, { "epoch": 43.63636363636363, "grad_norm": 1.5401036739349365, "learning_rate": 0.0011272727272727272, "loss": 0.5877, "step": 151680 }, { "epoch": 43.639240506329116, "grad_norm": 1.4271036386489868, "learning_rate": 0.0011272151898734178, "loss": 0.6819, "step": 151690 }, { "epoch": 43.64211737629459, "grad_norm": 1.7791290283203125, "learning_rate": 0.0011271576524741083, "loss": 0.6785, "step": 151700 }, { "epoch": 43.64499424626007, "grad_norm": 0.7980376482009888, "learning_rate": 0.0011271001150747987, "loss": 0.5263, "step": 151710 }, { "epoch": 43.647871116225545, "grad_norm": 1.4041638374328613, "learning_rate": 0.001127042577675489, "loss": 0.4986, "step": 151720 }, { "epoch": 43.65074798619102, "grad_norm": 0.7719420194625854, "learning_rate": 0.0011269850402761794, "loss": 0.491, "step": 151730 }, { "epoch": 43.653624856156505, "grad_norm": 1.3328174352645874, "learning_rate": 0.00112692750287687, "loss": 0.6743, "step": 151740 }, { "epoch": 43.65650172612198, "grad_norm": 1.5370850563049316, "learning_rate": 0.0011268699654775605, "loss": 0.4801, "step": 151750 }, { "epoch": 43.65937859608746, "grad_norm": 1.4110530614852905, "learning_rate": 0.0011268124280782508, "loss": 0.6513, "step": 151760 }, { "epoch": 43.66225546605293, "grad_norm": 0.9768955707550049, "learning_rate": 0.0011267548906789414, "loss": 0.4867, "step": 151770 }, { "epoch": 43.66513233601841, "grad_norm": 1.3773590326309204, "learning_rate": 0.0011266973532796317, "loss": 0.6725, "step": 151780 }, { "epoch": 43.66800920598389, "grad_norm": 1.1884572505950928, "learning_rate": 0.001126639815880322, "loss": 0.6212, "step": 151790 }, { "epoch": 43.67088607594937, "grad_norm": 1.3530943393707275, "learning_rate": 0.0011265822784810127, "loss": 0.6092, "step": 151800 }, { "epoch": 43.673762945914845, "grad_norm": 1.2233554124832153, "learning_rate": 0.0011265247410817032, "loss": 0.666, "step": 151810 }, { "epoch": 43.67663981588032, "grad_norm": 1.2232654094696045, "learning_rate": 0.0011264672036823936, "loss": 0.5369, "step": 151820 }, { "epoch": 43.6795166858458, "grad_norm": 2.3554117679595947, "learning_rate": 0.0011264096662830841, "loss": 0.521, "step": 151830 }, { "epoch": 43.68239355581128, "grad_norm": 1.92689847946167, "learning_rate": 0.0011263521288837745, "loss": 0.7752, "step": 151840 }, { "epoch": 43.68527042577676, "grad_norm": 1.626381278038025, "learning_rate": 0.0011262945914844648, "loss": 0.6545, "step": 151850 }, { "epoch": 43.68814729574223, "grad_norm": 1.2572546005249023, "learning_rate": 0.0011262370540851554, "loss": 0.6277, "step": 151860 }, { "epoch": 43.69102416570771, "grad_norm": 1.1983951330184937, "learning_rate": 0.0011261795166858457, "loss": 0.4308, "step": 151870 }, { "epoch": 43.693901035673186, "grad_norm": 1.0219528675079346, "learning_rate": 0.0011261219792865363, "loss": 0.4859, "step": 151880 }, { "epoch": 43.69677790563866, "grad_norm": 1.6642365455627441, "learning_rate": 0.0011260644418872269, "loss": 0.6108, "step": 151890 }, { "epoch": 43.699654775604145, "grad_norm": 1.5594404935836792, "learning_rate": 0.001126006904487917, "loss": 0.6291, "step": 151900 }, { "epoch": 43.70253164556962, "grad_norm": 1.3445217609405518, "learning_rate": 0.0011259493670886076, "loss": 0.4581, "step": 151910 }, { "epoch": 43.7054085155351, "grad_norm": 1.0945193767547607, "learning_rate": 0.0011258918296892981, "loss": 0.578, "step": 151920 }, { "epoch": 43.708285385500574, "grad_norm": 1.1947412490844727, "learning_rate": 0.0011258342922899885, "loss": 0.5007, "step": 151930 }, { "epoch": 43.71116225546605, "grad_norm": 0.9496126174926758, "learning_rate": 0.001125776754890679, "loss": 0.6067, "step": 151940 }, { "epoch": 43.71403912543153, "grad_norm": 2.31205415725708, "learning_rate": 0.0011257192174913696, "loss": 0.6658, "step": 151950 }, { "epoch": 43.71691599539701, "grad_norm": 1.0292800664901733, "learning_rate": 0.0011256616800920597, "loss": 0.54, "step": 151960 }, { "epoch": 43.719792865362486, "grad_norm": 5.329665660858154, "learning_rate": 0.0011256041426927503, "loss": 0.5592, "step": 151970 }, { "epoch": 43.72266973532796, "grad_norm": 1.1606237888336182, "learning_rate": 0.0011255466052934406, "loss": 0.5059, "step": 151980 }, { "epoch": 43.72554660529344, "grad_norm": 1.5569989681243896, "learning_rate": 0.0011254890678941312, "loss": 0.6285, "step": 151990 }, { "epoch": 43.72842347525892, "grad_norm": 0.7784891724586487, "learning_rate": 0.0011254315304948218, "loss": 0.5378, "step": 152000 }, { "epoch": 43.7313003452244, "grad_norm": 1.341620683670044, "learning_rate": 0.0011253739930955121, "loss": 0.5834, "step": 152010 }, { "epoch": 43.734177215189874, "grad_norm": 1.2199440002441406, "learning_rate": 0.0011253164556962025, "loss": 0.5577, "step": 152020 }, { "epoch": 43.73705408515535, "grad_norm": 0.9474427103996277, "learning_rate": 0.001125258918296893, "loss": 0.5951, "step": 152030 }, { "epoch": 43.739930955120826, "grad_norm": 0.9205393195152283, "learning_rate": 0.0011252013808975834, "loss": 0.5399, "step": 152040 }, { "epoch": 43.74280782508631, "grad_norm": 1.2631196975708008, "learning_rate": 0.001125143843498274, "loss": 0.7306, "step": 152050 }, { "epoch": 43.745684695051786, "grad_norm": 1.7628449201583862, "learning_rate": 0.0011250863060989645, "loss": 0.5735, "step": 152060 }, { "epoch": 43.74856156501726, "grad_norm": 1.7129247188568115, "learning_rate": 0.0011250287686996548, "loss": 0.582, "step": 152070 }, { "epoch": 43.75143843498274, "grad_norm": 2.187007188796997, "learning_rate": 0.0011249712313003452, "loss": 0.6607, "step": 152080 }, { "epoch": 43.754315304948214, "grad_norm": 1.8455674648284912, "learning_rate": 0.0011249136939010355, "loss": 0.6461, "step": 152090 }, { "epoch": 43.75719217491369, "grad_norm": 0.6423133611679077, "learning_rate": 0.001124856156501726, "loss": 0.5767, "step": 152100 }, { "epoch": 43.760069044879174, "grad_norm": 1.59645676612854, "learning_rate": 0.0011247986191024167, "loss": 0.5341, "step": 152110 }, { "epoch": 43.76294591484465, "grad_norm": 0.8472005724906921, "learning_rate": 0.001124741081703107, "loss": 0.5143, "step": 152120 }, { "epoch": 43.765822784810126, "grad_norm": 1.5029476881027222, "learning_rate": 0.0011246835443037976, "loss": 0.5908, "step": 152130 }, { "epoch": 43.7686996547756, "grad_norm": 1.5071492195129395, "learning_rate": 0.001124626006904488, "loss": 0.5289, "step": 152140 }, { "epoch": 43.77157652474108, "grad_norm": 1.281968593597412, "learning_rate": 0.0011245684695051783, "loss": 0.5041, "step": 152150 }, { "epoch": 43.77445339470656, "grad_norm": 0.8617805242538452, "learning_rate": 0.0011245109321058688, "loss": 0.4806, "step": 152160 }, { "epoch": 43.77733026467204, "grad_norm": 0.9508687257766724, "learning_rate": 0.0011244533947065594, "loss": 0.611, "step": 152170 }, { "epoch": 43.780207134637514, "grad_norm": 0.854314923286438, "learning_rate": 0.0011243958573072498, "loss": 0.69, "step": 152180 }, { "epoch": 43.78308400460299, "grad_norm": 0.7062690854072571, "learning_rate": 0.0011243383199079403, "loss": 0.6316, "step": 152190 }, { "epoch": 43.78596087456847, "grad_norm": 1.437523603439331, "learning_rate": 0.0011242807825086304, "loss": 0.5575, "step": 152200 }, { "epoch": 43.78883774453395, "grad_norm": 1.3218358755111694, "learning_rate": 0.001124223245109321, "loss": 0.6226, "step": 152210 }, { "epoch": 43.791714614499426, "grad_norm": 0.9247041940689087, "learning_rate": 0.0011241657077100116, "loss": 0.7518, "step": 152220 }, { "epoch": 43.7945914844649, "grad_norm": 0.8399156928062439, "learning_rate": 0.001124108170310702, "loss": 0.5585, "step": 152230 }, { "epoch": 43.79746835443038, "grad_norm": 1.5842177867889404, "learning_rate": 0.0011240506329113925, "loss": 0.8098, "step": 152240 }, { "epoch": 43.800345224395855, "grad_norm": 0.8972689509391785, "learning_rate": 0.001123993095512083, "loss": 0.6304, "step": 152250 }, { "epoch": 43.80322209436134, "grad_norm": 2.185403347015381, "learning_rate": 0.0011239355581127732, "loss": 0.5473, "step": 152260 }, { "epoch": 43.806098964326814, "grad_norm": 1.0577831268310547, "learning_rate": 0.0011238780207134637, "loss": 0.4958, "step": 152270 }, { "epoch": 43.80897583429229, "grad_norm": 1.8588643074035645, "learning_rate": 0.0011238204833141543, "loss": 0.6142, "step": 152280 }, { "epoch": 43.81185270425777, "grad_norm": 0.6699174642562866, "learning_rate": 0.0011237629459148447, "loss": 0.6448, "step": 152290 }, { "epoch": 43.81472957422324, "grad_norm": 1.058482050895691, "learning_rate": 0.0011237054085155352, "loss": 0.566, "step": 152300 }, { "epoch": 43.81760644418872, "grad_norm": 1.7927802801132202, "learning_rate": 0.0011236478711162256, "loss": 0.5997, "step": 152310 }, { "epoch": 43.8204833141542, "grad_norm": 0.8643743991851807, "learning_rate": 0.001123590333716916, "loss": 0.6495, "step": 152320 }, { "epoch": 43.82336018411968, "grad_norm": 0.7698912024497986, "learning_rate": 0.0011235327963176065, "loss": 0.5486, "step": 152330 }, { "epoch": 43.826237054085155, "grad_norm": 1.634254813194275, "learning_rate": 0.0011234752589182968, "loss": 0.5348, "step": 152340 }, { "epoch": 43.82911392405063, "grad_norm": 1.5098669528961182, "learning_rate": 0.0011234177215189874, "loss": 0.6303, "step": 152350 }, { "epoch": 43.83199079401611, "grad_norm": 1.6900854110717773, "learning_rate": 0.001123360184119678, "loss": 0.5529, "step": 152360 }, { "epoch": 43.83486766398159, "grad_norm": 0.8233761787414551, "learning_rate": 0.0011233026467203683, "loss": 0.6221, "step": 152370 }, { "epoch": 43.83774453394707, "grad_norm": 0.767987072467804, "learning_rate": 0.0011232451093210586, "loss": 0.5679, "step": 152380 }, { "epoch": 43.84062140391254, "grad_norm": 1.3130558729171753, "learning_rate": 0.0011231875719217492, "loss": 0.4987, "step": 152390 }, { "epoch": 43.84349827387802, "grad_norm": 1.1961464881896973, "learning_rate": 0.0011231300345224396, "loss": 0.606, "step": 152400 }, { "epoch": 43.846375143843495, "grad_norm": 1.884602427482605, "learning_rate": 0.0011230724971231301, "loss": 0.6645, "step": 152410 }, { "epoch": 43.84925201380898, "grad_norm": 0.9995684027671814, "learning_rate": 0.0011230149597238207, "loss": 0.7367, "step": 152420 }, { "epoch": 43.852128883774455, "grad_norm": 1.0591193437576294, "learning_rate": 0.001122957422324511, "loss": 0.4903, "step": 152430 }, { "epoch": 43.85500575373993, "grad_norm": 3.17506742477417, "learning_rate": 0.0011228998849252014, "loss": 0.7241, "step": 152440 }, { "epoch": 43.85788262370541, "grad_norm": 0.8513684272766113, "learning_rate": 0.0011228423475258917, "loss": 0.7524, "step": 152450 }, { "epoch": 43.860759493670884, "grad_norm": 1.7739002704620361, "learning_rate": 0.0011227848101265823, "loss": 0.6831, "step": 152460 }, { "epoch": 43.86363636363637, "grad_norm": 1.5844776630401611, "learning_rate": 0.0011227272727272729, "loss": 0.5295, "step": 152470 }, { "epoch": 43.86651323360184, "grad_norm": 1.5602037906646729, "learning_rate": 0.0011226697353279632, "loss": 0.5871, "step": 152480 }, { "epoch": 43.86939010356732, "grad_norm": 0.7754516005516052, "learning_rate": 0.0011226121979286535, "loss": 0.5505, "step": 152490 }, { "epoch": 43.872266973532795, "grad_norm": 1.3222514390945435, "learning_rate": 0.0011225546605293441, "loss": 0.4947, "step": 152500 }, { "epoch": 43.87514384349827, "grad_norm": 1.385822057723999, "learning_rate": 0.0011224971231300345, "loss": 0.5359, "step": 152510 }, { "epoch": 43.878020713463755, "grad_norm": 1.1530537605285645, "learning_rate": 0.001122439585730725, "loss": 0.6387, "step": 152520 }, { "epoch": 43.88089758342923, "grad_norm": 1.167090654373169, "learning_rate": 0.0011223820483314156, "loss": 0.5463, "step": 152530 }, { "epoch": 43.88377445339471, "grad_norm": 1.3066505193710327, "learning_rate": 0.001122324510932106, "loss": 0.514, "step": 152540 }, { "epoch": 43.886651323360184, "grad_norm": 2.883516311645508, "learning_rate": 0.0011222669735327963, "loss": 0.6132, "step": 152550 }, { "epoch": 43.88952819332566, "grad_norm": 1.0311012268066406, "learning_rate": 0.0011222094361334866, "loss": 0.5388, "step": 152560 }, { "epoch": 43.892405063291136, "grad_norm": 0.9220284223556519, "learning_rate": 0.0011221518987341772, "loss": 0.8019, "step": 152570 }, { "epoch": 43.89528193325662, "grad_norm": 0.8717339634895325, "learning_rate": 0.0011220943613348678, "loss": 0.5857, "step": 152580 }, { "epoch": 43.898158803222096, "grad_norm": 0.8650273680686951, "learning_rate": 0.001122036823935558, "loss": 0.5906, "step": 152590 }, { "epoch": 43.90103567318757, "grad_norm": 1.9445645809173584, "learning_rate": 0.0011219792865362487, "loss": 0.6616, "step": 152600 }, { "epoch": 43.90391254315305, "grad_norm": 0.6764745116233826, "learning_rate": 0.001121921749136939, "loss": 0.5733, "step": 152610 }, { "epoch": 43.906789413118524, "grad_norm": 1.1908576488494873, "learning_rate": 0.0011218642117376294, "loss": 0.5926, "step": 152620 }, { "epoch": 43.90966628308401, "grad_norm": 1.524361491203308, "learning_rate": 0.00112180667433832, "loss": 0.6898, "step": 152630 }, { "epoch": 43.912543153049484, "grad_norm": 1.2544735670089722, "learning_rate": 0.0011217491369390105, "loss": 0.7055, "step": 152640 }, { "epoch": 43.91542002301496, "grad_norm": 1.5121983289718628, "learning_rate": 0.0011216915995397008, "loss": 0.5621, "step": 152650 }, { "epoch": 43.918296892980436, "grad_norm": 0.9144952893257141, "learning_rate": 0.0011216340621403914, "loss": 0.6346, "step": 152660 }, { "epoch": 43.92117376294591, "grad_norm": 1.6807247400283813, "learning_rate": 0.0011215765247410815, "loss": 0.6927, "step": 152670 }, { "epoch": 43.924050632911396, "grad_norm": 1.2324626445770264, "learning_rate": 0.001121518987341772, "loss": 0.685, "step": 152680 }, { "epoch": 43.92692750287687, "grad_norm": 0.9637466669082642, "learning_rate": 0.0011214614499424627, "loss": 0.5968, "step": 152690 }, { "epoch": 43.92980437284235, "grad_norm": 1.2503774166107178, "learning_rate": 0.001121403912543153, "loss": 0.5952, "step": 152700 }, { "epoch": 43.932681242807824, "grad_norm": 1.2438080310821533, "learning_rate": 0.0011213463751438436, "loss": 0.8051, "step": 152710 }, { "epoch": 43.9355581127733, "grad_norm": 1.6613047122955322, "learning_rate": 0.0011212888377445341, "loss": 0.7193, "step": 152720 }, { "epoch": 43.938434982738784, "grad_norm": 1.3946549892425537, "learning_rate": 0.0011212313003452243, "loss": 0.712, "step": 152730 }, { "epoch": 43.94131185270426, "grad_norm": 1.494118094444275, "learning_rate": 0.0011211737629459148, "loss": 0.5763, "step": 152740 }, { "epoch": 43.944188722669736, "grad_norm": 0.9208534359931946, "learning_rate": 0.0011211162255466054, "loss": 0.4666, "step": 152750 }, { "epoch": 43.94706559263521, "grad_norm": 0.7992838025093079, "learning_rate": 0.0011210586881472957, "loss": 0.6589, "step": 152760 }, { "epoch": 43.94994246260069, "grad_norm": 0.9797748327255249, "learning_rate": 0.0011210011507479863, "loss": 0.5165, "step": 152770 }, { "epoch": 43.952819332566165, "grad_norm": 1.2040566205978394, "learning_rate": 0.0011209436133486766, "loss": 0.603, "step": 152780 }, { "epoch": 43.95569620253165, "grad_norm": 1.3020107746124268, "learning_rate": 0.001120886075949367, "loss": 0.6217, "step": 152790 }, { "epoch": 43.958573072497124, "grad_norm": 1.6692521572113037, "learning_rate": 0.0011208285385500576, "loss": 0.5586, "step": 152800 }, { "epoch": 43.9614499424626, "grad_norm": 1.5295289754867554, "learning_rate": 0.001120771001150748, "loss": 0.58, "step": 152810 }, { "epoch": 43.96432681242808, "grad_norm": 1.2898225784301758, "learning_rate": 0.0011207134637514385, "loss": 0.5145, "step": 152820 }, { "epoch": 43.96720368239355, "grad_norm": 2.426034927368164, "learning_rate": 0.001120655926352129, "loss": 0.7076, "step": 152830 }, { "epoch": 43.970080552359036, "grad_norm": 2.3185200691223145, "learning_rate": 0.0011205983889528194, "loss": 0.721, "step": 152840 }, { "epoch": 43.97295742232451, "grad_norm": 2.0686419010162354, "learning_rate": 0.0011205408515535097, "loss": 0.5808, "step": 152850 }, { "epoch": 43.97583429228999, "grad_norm": 1.0109971761703491, "learning_rate": 0.0011204833141542003, "loss": 0.6032, "step": 152860 }, { "epoch": 43.978711162255465, "grad_norm": 0.9755745530128479, "learning_rate": 0.0011204257767548906, "loss": 0.4879, "step": 152870 }, { "epoch": 43.98158803222094, "grad_norm": 1.6264382600784302, "learning_rate": 0.0011203682393555812, "loss": 0.5279, "step": 152880 }, { "epoch": 43.984464902186424, "grad_norm": 1.0232969522476196, "learning_rate": 0.0011203107019562716, "loss": 0.4938, "step": 152890 }, { "epoch": 43.9873417721519, "grad_norm": 1.250659465789795, "learning_rate": 0.0011202531645569621, "loss": 0.59, "step": 152900 }, { "epoch": 43.99021864211738, "grad_norm": 1.9817761182785034, "learning_rate": 0.0011201956271576525, "loss": 0.6764, "step": 152910 }, { "epoch": 43.99309551208285, "grad_norm": 1.2685531377792358, "learning_rate": 0.0011201380897583428, "loss": 0.5336, "step": 152920 }, { "epoch": 43.99597238204833, "grad_norm": 1.8537520170211792, "learning_rate": 0.0011200805523590334, "loss": 0.5341, "step": 152930 }, { "epoch": 43.99884925201381, "grad_norm": 1.5670257806777954, "learning_rate": 0.001120023014959724, "loss": 0.6854, "step": 152940 }, { "epoch": 44.00172612197929, "grad_norm": 1.1661185026168823, "learning_rate": 0.0011199654775604143, "loss": 0.4454, "step": 152950 }, { "epoch": 44.004602991944765, "grad_norm": 1.0111441612243652, "learning_rate": 0.0011199079401611048, "loss": 0.4408, "step": 152960 }, { "epoch": 44.00747986191024, "grad_norm": 0.9317083358764648, "learning_rate": 0.0011198504027617952, "loss": 0.4957, "step": 152970 }, { "epoch": 44.01035673187572, "grad_norm": 0.8230199813842773, "learning_rate": 0.0011197928653624855, "loss": 0.573, "step": 152980 }, { "epoch": 44.01323360184119, "grad_norm": 1.1687554121017456, "learning_rate": 0.001119735327963176, "loss": 0.6193, "step": 152990 }, { "epoch": 44.01611047180668, "grad_norm": 1.311229944229126, "learning_rate": 0.0011196777905638665, "loss": 0.4138, "step": 153000 }, { "epoch": 44.01898734177215, "grad_norm": 1.720177173614502, "learning_rate": 0.001119620253164557, "loss": 0.6711, "step": 153010 }, { "epoch": 44.02186421173763, "grad_norm": 0.8698323369026184, "learning_rate": 0.0011195627157652476, "loss": 0.5685, "step": 153020 }, { "epoch": 44.024741081703105, "grad_norm": 1.1818121671676636, "learning_rate": 0.0011195051783659377, "loss": 0.6519, "step": 153030 }, { "epoch": 44.02761795166858, "grad_norm": 1.3187291622161865, "learning_rate": 0.0011194476409666283, "loss": 0.5725, "step": 153040 }, { "epoch": 44.030494821634065, "grad_norm": 0.9786016941070557, "learning_rate": 0.0011193901035673188, "loss": 0.525, "step": 153050 }, { "epoch": 44.03337169159954, "grad_norm": 1.2261364459991455, "learning_rate": 0.0011193325661680092, "loss": 0.4521, "step": 153060 }, { "epoch": 44.03624856156502, "grad_norm": 2.021409749984741, "learning_rate": 0.0011192750287686997, "loss": 0.558, "step": 153070 }, { "epoch": 44.03912543153049, "grad_norm": 1.1808996200561523, "learning_rate": 0.0011192174913693903, "loss": 0.482, "step": 153080 }, { "epoch": 44.04200230149597, "grad_norm": 1.8835599422454834, "learning_rate": 0.0011191599539700804, "loss": 0.5809, "step": 153090 }, { "epoch": 44.04487917146145, "grad_norm": 1.8218640089035034, "learning_rate": 0.001119102416570771, "loss": 0.5995, "step": 153100 }, { "epoch": 44.04775604142693, "grad_norm": 1.4566354751586914, "learning_rate": 0.0011190448791714616, "loss": 0.5322, "step": 153110 }, { "epoch": 44.050632911392405, "grad_norm": 1.1025941371917725, "learning_rate": 0.001118987341772152, "loss": 0.5553, "step": 153120 }, { "epoch": 44.05350978135788, "grad_norm": 0.8787980675697327, "learning_rate": 0.0011189298043728425, "loss": 0.5146, "step": 153130 }, { "epoch": 44.05638665132336, "grad_norm": 1.0682464838027954, "learning_rate": 0.0011188722669735328, "loss": 0.4783, "step": 153140 }, { "epoch": 44.05926352128884, "grad_norm": 1.238560438156128, "learning_rate": 0.0011188147295742232, "loss": 0.3741, "step": 153150 }, { "epoch": 44.06214039125432, "grad_norm": 1.3154720067977905, "learning_rate": 0.0011187571921749137, "loss": 0.6629, "step": 153160 }, { "epoch": 44.06501726121979, "grad_norm": 1.377937912940979, "learning_rate": 0.001118699654775604, "loss": 0.653, "step": 153170 }, { "epoch": 44.06789413118527, "grad_norm": 1.8567105531692505, "learning_rate": 0.0011186421173762947, "loss": 0.683, "step": 153180 }, { "epoch": 44.070771001150746, "grad_norm": 1.631489634513855, "learning_rate": 0.0011185845799769852, "loss": 0.626, "step": 153190 }, { "epoch": 44.07364787111622, "grad_norm": 1.1721763610839844, "learning_rate": 0.0011185270425776756, "loss": 0.6124, "step": 153200 }, { "epoch": 44.076524741081705, "grad_norm": 0.9448806643486023, "learning_rate": 0.001118469505178366, "loss": 0.5627, "step": 153210 }, { "epoch": 44.07940161104718, "grad_norm": 1.0138659477233887, "learning_rate": 0.0011184119677790565, "loss": 0.5216, "step": 153220 }, { "epoch": 44.08227848101266, "grad_norm": 0.8594726920127869, "learning_rate": 0.0011183544303797468, "loss": 0.5881, "step": 153230 }, { "epoch": 44.085155350978134, "grad_norm": 1.051655888557434, "learning_rate": 0.0011182968929804374, "loss": 0.5843, "step": 153240 }, { "epoch": 44.08803222094361, "grad_norm": 1.3103913068771362, "learning_rate": 0.0011182393555811277, "loss": 0.5659, "step": 153250 }, { "epoch": 44.09090909090909, "grad_norm": 1.725239634513855, "learning_rate": 0.0011181818181818183, "loss": 0.6477, "step": 153260 }, { "epoch": 44.09378596087457, "grad_norm": 1.3258984088897705, "learning_rate": 0.0011181242807825086, "loss": 0.5591, "step": 153270 }, { "epoch": 44.096662830840046, "grad_norm": 0.8721164464950562, "learning_rate": 0.001118066743383199, "loss": 0.4468, "step": 153280 }, { "epoch": 44.09953970080552, "grad_norm": 1.4037634134292603, "learning_rate": 0.0011180092059838896, "loss": 0.5853, "step": 153290 }, { "epoch": 44.102416570771, "grad_norm": 1.6406118869781494, "learning_rate": 0.0011179516685845801, "loss": 0.5543, "step": 153300 }, { "epoch": 44.10529344073648, "grad_norm": 0.9583193063735962, "learning_rate": 0.0011178941311852705, "loss": 0.5339, "step": 153310 }, { "epoch": 44.10817031070196, "grad_norm": 1.0598207712173462, "learning_rate": 0.0011178365937859608, "loss": 0.4982, "step": 153320 }, { "epoch": 44.111047180667434, "grad_norm": 1.5715750455856323, "learning_rate": 0.0011177790563866514, "loss": 0.6351, "step": 153330 }, { "epoch": 44.11392405063291, "grad_norm": 1.124502420425415, "learning_rate": 0.0011177215189873417, "loss": 0.5205, "step": 153340 }, { "epoch": 44.116800920598386, "grad_norm": 0.7743774056434631, "learning_rate": 0.0011176639815880323, "loss": 0.5356, "step": 153350 }, { "epoch": 44.11967779056387, "grad_norm": 1.319114327430725, "learning_rate": 0.0011176064441887226, "loss": 0.5738, "step": 153360 }, { "epoch": 44.122554660529346, "grad_norm": 1.7543631792068481, "learning_rate": 0.0011175489067894132, "loss": 0.5293, "step": 153370 }, { "epoch": 44.12543153049482, "grad_norm": 0.9097146391868591, "learning_rate": 0.0011174913693901035, "loss": 0.5034, "step": 153380 }, { "epoch": 44.1283084004603, "grad_norm": 1.2536145448684692, "learning_rate": 0.001117433831990794, "loss": 0.5935, "step": 153390 }, { "epoch": 44.131185270425775, "grad_norm": 1.010408878326416, "learning_rate": 0.0011173762945914845, "loss": 0.4817, "step": 153400 }, { "epoch": 44.13406214039125, "grad_norm": 2.5419623851776123, "learning_rate": 0.001117318757192175, "loss": 0.4739, "step": 153410 }, { "epoch": 44.136939010356734, "grad_norm": 0.990667998790741, "learning_rate": 0.0011172612197928654, "loss": 0.5255, "step": 153420 }, { "epoch": 44.13981588032221, "grad_norm": 1.1413905620574951, "learning_rate": 0.001117203682393556, "loss": 0.5469, "step": 153430 }, { "epoch": 44.14269275028769, "grad_norm": 0.9621499180793762, "learning_rate": 0.0011171461449942463, "loss": 0.513, "step": 153440 }, { "epoch": 44.14556962025316, "grad_norm": 1.3258249759674072, "learning_rate": 0.0011170886075949366, "loss": 0.6507, "step": 153450 }, { "epoch": 44.14844649021864, "grad_norm": 1.5247546434402466, "learning_rate": 0.0011170310701956272, "loss": 0.5954, "step": 153460 }, { "epoch": 44.15132336018412, "grad_norm": 1.063601016998291, "learning_rate": 0.0011169735327963175, "loss": 0.4571, "step": 153470 }, { "epoch": 44.1542002301496, "grad_norm": 0.5856224894523621, "learning_rate": 0.001116915995397008, "loss": 0.5701, "step": 153480 }, { "epoch": 44.157077100115075, "grad_norm": 1.2226210832595825, "learning_rate": 0.0011168584579976987, "loss": 0.663, "step": 153490 }, { "epoch": 44.15995397008055, "grad_norm": 1.0736355781555176, "learning_rate": 0.0011168009205983888, "loss": 0.4589, "step": 153500 }, { "epoch": 44.16283084004603, "grad_norm": 1.279660940170288, "learning_rate": 0.0011167433831990794, "loss": 0.6141, "step": 153510 }, { "epoch": 44.16570771001151, "grad_norm": 1.660960078239441, "learning_rate": 0.00111668584579977, "loss": 0.6751, "step": 153520 }, { "epoch": 44.16858457997699, "grad_norm": 0.9494513273239136, "learning_rate": 0.0011166283084004603, "loss": 0.524, "step": 153530 }, { "epoch": 44.17146144994246, "grad_norm": 1.7450335025787354, "learning_rate": 0.0011165707710011508, "loss": 0.6146, "step": 153540 }, { "epoch": 44.17433831990794, "grad_norm": 1.1617306470870972, "learning_rate": 0.0011165132336018414, "loss": 0.7482, "step": 153550 }, { "epoch": 44.177215189873415, "grad_norm": 0.9570603370666504, "learning_rate": 0.0011164556962025315, "loss": 0.5615, "step": 153560 }, { "epoch": 44.1800920598389, "grad_norm": 1.1594748497009277, "learning_rate": 0.001116398158803222, "loss": 0.5128, "step": 153570 }, { "epoch": 44.182968929804375, "grad_norm": 1.0574835538864136, "learning_rate": 0.0011163406214039124, "loss": 0.5489, "step": 153580 }, { "epoch": 44.18584579976985, "grad_norm": 0.9368337988853455, "learning_rate": 0.001116283084004603, "loss": 0.4525, "step": 153590 }, { "epoch": 44.18872266973533, "grad_norm": 1.2604094743728638, "learning_rate": 0.0011162255466052936, "loss": 0.504, "step": 153600 }, { "epoch": 44.1915995397008, "grad_norm": 1.3534412384033203, "learning_rate": 0.001116168009205984, "loss": 0.5458, "step": 153610 }, { "epoch": 44.19447640966629, "grad_norm": 1.3978681564331055, "learning_rate": 0.0011161104718066743, "loss": 0.5072, "step": 153620 }, { "epoch": 44.19735327963176, "grad_norm": 1.9596291780471802, "learning_rate": 0.0011160529344073648, "loss": 0.6026, "step": 153630 }, { "epoch": 44.20023014959724, "grad_norm": 1.4330796003341675, "learning_rate": 0.0011159953970080552, "loss": 0.615, "step": 153640 }, { "epoch": 44.203107019562715, "grad_norm": 0.5585352182388306, "learning_rate": 0.0011159378596087457, "loss": 0.6125, "step": 153650 }, { "epoch": 44.20598388952819, "grad_norm": 1.1241577863693237, "learning_rate": 0.0011158803222094363, "loss": 0.7065, "step": 153660 }, { "epoch": 44.20886075949367, "grad_norm": 1.2172889709472656, "learning_rate": 0.0011158227848101266, "loss": 0.6454, "step": 153670 }, { "epoch": 44.21173762945915, "grad_norm": 1.1320109367370605, "learning_rate": 0.001115765247410817, "loss": 0.525, "step": 153680 }, { "epoch": 44.21461449942463, "grad_norm": 1.8591018915176392, "learning_rate": 0.0011157077100115073, "loss": 0.6889, "step": 153690 }, { "epoch": 44.2174913693901, "grad_norm": 1.5091508626937866, "learning_rate": 0.001115650172612198, "loss": 0.6994, "step": 153700 }, { "epoch": 44.22036823935558, "grad_norm": 1.4871032238006592, "learning_rate": 0.0011155926352128885, "loss": 0.481, "step": 153710 }, { "epoch": 44.223245109321056, "grad_norm": 0.5601220726966858, "learning_rate": 0.0011155350978135788, "loss": 0.5657, "step": 153720 }, { "epoch": 44.22612197928654, "grad_norm": 1.0887119770050049, "learning_rate": 0.0011154775604142694, "loss": 0.5925, "step": 153730 }, { "epoch": 44.228998849252015, "grad_norm": 1.4545642137527466, "learning_rate": 0.0011154200230149597, "loss": 0.705, "step": 153740 }, { "epoch": 44.23187571921749, "grad_norm": 0.621703565120697, "learning_rate": 0.00111536248561565, "loss": 0.5394, "step": 153750 }, { "epoch": 44.23475258918297, "grad_norm": 0.7966737747192383, "learning_rate": 0.0011153049482163406, "loss": 0.4085, "step": 153760 }, { "epoch": 44.237629459148444, "grad_norm": 1.7261558771133423, "learning_rate": 0.0011152474108170312, "loss": 0.5016, "step": 153770 }, { "epoch": 44.24050632911393, "grad_norm": 0.9653737545013428, "learning_rate": 0.0011151898734177215, "loss": 0.5035, "step": 153780 }, { "epoch": 44.2433831990794, "grad_norm": 1.596704363822937, "learning_rate": 0.0011151323360184121, "loss": 0.5212, "step": 153790 }, { "epoch": 44.24626006904488, "grad_norm": 1.7972733974456787, "learning_rate": 0.0011150747986191025, "loss": 0.5595, "step": 153800 }, { "epoch": 44.249136939010356, "grad_norm": 1.1222102642059326, "learning_rate": 0.0011150172612197928, "loss": 0.5852, "step": 153810 }, { "epoch": 44.25201380897583, "grad_norm": 1.5309200286865234, "learning_rate": 0.0011149597238204834, "loss": 0.564, "step": 153820 }, { "epoch": 44.254890678941315, "grad_norm": 0.8888220191001892, "learning_rate": 0.0011149021864211737, "loss": 0.567, "step": 153830 }, { "epoch": 44.25776754890679, "grad_norm": 1.2746574878692627, "learning_rate": 0.0011148446490218643, "loss": 0.4925, "step": 153840 }, { "epoch": 44.26064441887227, "grad_norm": 1.242037057876587, "learning_rate": 0.0011147871116225548, "loss": 0.5858, "step": 153850 }, { "epoch": 44.263521288837744, "grad_norm": 0.8925018310546875, "learning_rate": 0.001114729574223245, "loss": 0.6434, "step": 153860 }, { "epoch": 44.26639815880322, "grad_norm": 0.8125441074371338, "learning_rate": 0.0011146720368239355, "loss": 0.549, "step": 153870 }, { "epoch": 44.269275028768696, "grad_norm": 1.4999637603759766, "learning_rate": 0.001114614499424626, "loss": 0.8329, "step": 153880 }, { "epoch": 44.27215189873418, "grad_norm": 1.5442253351211548, "learning_rate": 0.0011145569620253165, "loss": 0.5281, "step": 153890 }, { "epoch": 44.275028768699656, "grad_norm": 1.3636547327041626, "learning_rate": 0.001114499424626007, "loss": 0.5575, "step": 153900 }, { "epoch": 44.27790563866513, "grad_norm": 1.4977000951766968, "learning_rate": 0.0011144418872266976, "loss": 0.6211, "step": 153910 }, { "epoch": 44.28078250863061, "grad_norm": 1.86936354637146, "learning_rate": 0.0011143843498273877, "loss": 0.596, "step": 153920 }, { "epoch": 44.283659378596084, "grad_norm": 1.065434217453003, "learning_rate": 0.0011143268124280783, "loss": 0.4631, "step": 153930 }, { "epoch": 44.28653624856157, "grad_norm": 0.8142687678337097, "learning_rate": 0.0011142692750287686, "loss": 0.4178, "step": 153940 }, { "epoch": 44.289413118527044, "grad_norm": 0.5836511850357056, "learning_rate": 0.0011142117376294592, "loss": 0.5886, "step": 153950 }, { "epoch": 44.29228998849252, "grad_norm": 0.7341201305389404, "learning_rate": 0.0011141542002301497, "loss": 0.4922, "step": 153960 }, { "epoch": 44.295166858457996, "grad_norm": 1.8305987119674683, "learning_rate": 0.00111409666283084, "loss": 0.513, "step": 153970 }, { "epoch": 44.29804372842347, "grad_norm": 1.832785725593567, "learning_rate": 0.0011140391254315304, "loss": 0.6425, "step": 153980 }, { "epoch": 44.300920598388956, "grad_norm": 0.8684152364730835, "learning_rate": 0.001113981588032221, "loss": 0.5321, "step": 153990 }, { "epoch": 44.30379746835443, "grad_norm": 1.5052223205566406, "learning_rate": 0.0011139240506329114, "loss": 0.5706, "step": 154000 }, { "epoch": 44.30667433831991, "grad_norm": 0.9401293992996216, "learning_rate": 0.001113866513233602, "loss": 0.5971, "step": 154010 }, { "epoch": 44.309551208285384, "grad_norm": 0.997841477394104, "learning_rate": 0.0011138089758342925, "loss": 0.5312, "step": 154020 }, { "epoch": 44.31242807825086, "grad_norm": 0.936613142490387, "learning_rate": 0.0011137514384349828, "loss": 0.5806, "step": 154030 }, { "epoch": 44.315304948216344, "grad_norm": 1.8061832189559937, "learning_rate": 0.0011136939010356732, "loss": 0.5401, "step": 154040 }, { "epoch": 44.31818181818182, "grad_norm": 1.045305609703064, "learning_rate": 0.0011136363636363635, "loss": 0.6259, "step": 154050 }, { "epoch": 44.321058688147296, "grad_norm": 0.8165888786315918, "learning_rate": 0.001113578826237054, "loss": 0.4951, "step": 154060 }, { "epoch": 44.32393555811277, "grad_norm": 0.9973496198654175, "learning_rate": 0.0011135212888377446, "loss": 0.5742, "step": 154070 }, { "epoch": 44.32681242807825, "grad_norm": 1.2484376430511475, "learning_rate": 0.001113463751438435, "loss": 0.6888, "step": 154080 }, { "epoch": 44.329689298043725, "grad_norm": 0.812646210193634, "learning_rate": 0.0011134062140391256, "loss": 0.5255, "step": 154090 }, { "epoch": 44.33256616800921, "grad_norm": 0.9382977485656738, "learning_rate": 0.001113348676639816, "loss": 0.4935, "step": 154100 }, { "epoch": 44.335443037974684, "grad_norm": 1.1409351825714111, "learning_rate": 0.0011132911392405063, "loss": 0.5026, "step": 154110 }, { "epoch": 44.33831990794016, "grad_norm": 1.394652247428894, "learning_rate": 0.0011132336018411968, "loss": 0.5387, "step": 154120 }, { "epoch": 44.34119677790564, "grad_norm": 1.3465468883514404, "learning_rate": 0.0011131760644418874, "loss": 0.4167, "step": 154130 }, { "epoch": 44.34407364787111, "grad_norm": 0.9132834672927856, "learning_rate": 0.0011131185270425777, "loss": 0.5049, "step": 154140 }, { "epoch": 44.346950517836596, "grad_norm": 1.620124340057373, "learning_rate": 0.001113060989643268, "loss": 0.5111, "step": 154150 }, { "epoch": 44.34982738780207, "grad_norm": 0.7072352170944214, "learning_rate": 0.0011130034522439584, "loss": 0.4542, "step": 154160 }, { "epoch": 44.35270425776755, "grad_norm": 0.903580904006958, "learning_rate": 0.001112945914844649, "loss": 0.5501, "step": 154170 }, { "epoch": 44.355581127733025, "grad_norm": 1.3980640172958374, "learning_rate": 0.0011128883774453396, "loss": 0.6076, "step": 154180 }, { "epoch": 44.3584579976985, "grad_norm": 2.0084872245788574, "learning_rate": 0.00111283084004603, "loss": 0.5607, "step": 154190 }, { "epoch": 44.361334867663984, "grad_norm": 1.6775906085968018, "learning_rate": 0.0011127733026467205, "loss": 0.6275, "step": 154200 }, { "epoch": 44.36421173762946, "grad_norm": 0.7707603573799133, "learning_rate": 0.0011127157652474108, "loss": 0.526, "step": 154210 }, { "epoch": 44.36708860759494, "grad_norm": 0.8445754647254944, "learning_rate": 0.0011126582278481012, "loss": 0.5852, "step": 154220 }, { "epoch": 44.36996547756041, "grad_norm": 1.3748533725738525, "learning_rate": 0.0011126006904487917, "loss": 0.7006, "step": 154230 }, { "epoch": 44.37284234752589, "grad_norm": 1.5594565868377686, "learning_rate": 0.0011125431530494823, "loss": 0.6719, "step": 154240 }, { "epoch": 44.37571921749137, "grad_norm": 1.0224847793579102, "learning_rate": 0.0011124856156501726, "loss": 0.5217, "step": 154250 }, { "epoch": 44.37859608745685, "grad_norm": 0.839407742023468, "learning_rate": 0.0011124280782508632, "loss": 0.6544, "step": 154260 }, { "epoch": 44.381472957422325, "grad_norm": 1.3445826768875122, "learning_rate": 0.0011123705408515533, "loss": 0.6, "step": 154270 }, { "epoch": 44.3843498273878, "grad_norm": 0.8715407252311707, "learning_rate": 0.0011123130034522439, "loss": 0.5309, "step": 154280 }, { "epoch": 44.38722669735328, "grad_norm": 1.4380991458892822, "learning_rate": 0.0011122554660529345, "loss": 0.5357, "step": 154290 }, { "epoch": 44.39010356731876, "grad_norm": 1.019059181213379, "learning_rate": 0.0011121979286536248, "loss": 0.6102, "step": 154300 }, { "epoch": 44.39298043728424, "grad_norm": 1.236751675605774, "learning_rate": 0.0011121403912543154, "loss": 0.5535, "step": 154310 }, { "epoch": 44.39585730724971, "grad_norm": 0.7121469378471375, "learning_rate": 0.001112082853855006, "loss": 0.5146, "step": 154320 }, { "epoch": 44.39873417721519, "grad_norm": 1.0273405313491821, "learning_rate": 0.001112025316455696, "loss": 0.5337, "step": 154330 }, { "epoch": 44.401611047180666, "grad_norm": 1.4250069856643677, "learning_rate": 0.0011119677790563866, "loss": 0.611, "step": 154340 }, { "epoch": 44.40448791714614, "grad_norm": 1.8813447952270508, "learning_rate": 0.0011119102416570772, "loss": 0.8145, "step": 154350 }, { "epoch": 44.407364787111625, "grad_norm": 0.8219639658927917, "learning_rate": 0.0011118527042577675, "loss": 0.5939, "step": 154360 }, { "epoch": 44.4102416570771, "grad_norm": 1.1319756507873535, "learning_rate": 0.001111795166858458, "loss": 0.5499, "step": 154370 }, { "epoch": 44.41311852704258, "grad_norm": 1.3698230981826782, "learning_rate": 0.0011117376294591487, "loss": 0.5131, "step": 154380 }, { "epoch": 44.415995397008054, "grad_norm": 1.4934558868408203, "learning_rate": 0.0011116800920598388, "loss": 0.5943, "step": 154390 }, { "epoch": 44.41887226697353, "grad_norm": 1.3433454036712646, "learning_rate": 0.0011116225546605294, "loss": 0.5713, "step": 154400 }, { "epoch": 44.42174913693901, "grad_norm": 1.1125231981277466, "learning_rate": 0.0011115650172612197, "loss": 0.6127, "step": 154410 }, { "epoch": 44.42462600690449, "grad_norm": 1.0254755020141602, "learning_rate": 0.0011115074798619103, "loss": 0.83, "step": 154420 }, { "epoch": 44.427502876869966, "grad_norm": 1.5435218811035156, "learning_rate": 0.0011114499424626008, "loss": 0.5196, "step": 154430 }, { "epoch": 44.43037974683544, "grad_norm": 1.5138288736343384, "learning_rate": 0.0011113924050632912, "loss": 0.5821, "step": 154440 }, { "epoch": 44.43325661680092, "grad_norm": 4.234665870666504, "learning_rate": 0.0011113348676639815, "loss": 0.6768, "step": 154450 }, { "epoch": 44.4361334867664, "grad_norm": 1.9423285722732544, "learning_rate": 0.001111277330264672, "loss": 0.6799, "step": 154460 }, { "epoch": 44.43901035673188, "grad_norm": 1.5564738512039185, "learning_rate": 0.0011112197928653624, "loss": 0.6458, "step": 154470 }, { "epoch": 44.441887226697354, "grad_norm": 1.1992090940475464, "learning_rate": 0.001111162255466053, "loss": 0.5635, "step": 154480 }, { "epoch": 44.44476409666283, "grad_norm": 1.2902723550796509, "learning_rate": 0.0011111047180667436, "loss": 0.5494, "step": 154490 }, { "epoch": 44.447640966628306, "grad_norm": 1.399914026260376, "learning_rate": 0.001111047180667434, "loss": 0.6415, "step": 154500 }, { "epoch": 44.45051783659379, "grad_norm": 0.875059962272644, "learning_rate": 0.0011109896432681243, "loss": 0.6412, "step": 154510 }, { "epoch": 44.453394706559266, "grad_norm": 1.053774356842041, "learning_rate": 0.0011109321058688146, "loss": 0.6058, "step": 154520 }, { "epoch": 44.45627157652474, "grad_norm": 1.3063019514083862, "learning_rate": 0.0011108745684695052, "loss": 0.6127, "step": 154530 }, { "epoch": 44.45914844649022, "grad_norm": 0.9963556528091431, "learning_rate": 0.0011108170310701957, "loss": 0.5625, "step": 154540 }, { "epoch": 44.462025316455694, "grad_norm": 1.043115258216858, "learning_rate": 0.001110759493670886, "loss": 0.5749, "step": 154550 }, { "epoch": 44.46490218642117, "grad_norm": 0.8466911911964417, "learning_rate": 0.0011107019562715766, "loss": 0.62, "step": 154560 }, { "epoch": 44.467779056386654, "grad_norm": 1.25593101978302, "learning_rate": 0.001110644418872267, "loss": 0.6285, "step": 154570 }, { "epoch": 44.47065592635213, "grad_norm": 0.7840921878814697, "learning_rate": 0.0011105868814729573, "loss": 0.5849, "step": 154580 }, { "epoch": 44.473532796317606, "grad_norm": 0.9613875150680542, "learning_rate": 0.001110529344073648, "loss": 0.5603, "step": 154590 }, { "epoch": 44.47640966628308, "grad_norm": 1.3970816135406494, "learning_rate": 0.0011104718066743385, "loss": 0.5324, "step": 154600 }, { "epoch": 44.47928653624856, "grad_norm": 1.3260068893432617, "learning_rate": 0.0011104142692750288, "loss": 0.6605, "step": 154610 }, { "epoch": 44.48216340621404, "grad_norm": 0.9879833459854126, "learning_rate": 0.0011103567318757194, "loss": 0.5808, "step": 154620 }, { "epoch": 44.48504027617952, "grad_norm": 1.5073986053466797, "learning_rate": 0.0011102991944764095, "loss": 0.5067, "step": 154630 }, { "epoch": 44.487917146144994, "grad_norm": 1.9329248666763306, "learning_rate": 0.0011102416570771, "loss": 0.5582, "step": 154640 }, { "epoch": 44.49079401611047, "grad_norm": 1.1691757440567017, "learning_rate": 0.0011101841196777906, "loss": 0.6547, "step": 154650 }, { "epoch": 44.49367088607595, "grad_norm": 0.9836063385009766, "learning_rate": 0.001110126582278481, "loss": 0.4973, "step": 154660 }, { "epoch": 44.49654775604143, "grad_norm": 0.7741673588752747, "learning_rate": 0.0011100690448791715, "loss": 0.518, "step": 154670 }, { "epoch": 44.499424626006906, "grad_norm": 1.1767253875732422, "learning_rate": 0.0011100115074798621, "loss": 0.5157, "step": 154680 }, { "epoch": 44.50230149597238, "grad_norm": 1.9817372560501099, "learning_rate": 0.0011099539700805522, "loss": 0.6279, "step": 154690 }, { "epoch": 44.50517836593786, "grad_norm": 1.0130800008773804, "learning_rate": 0.0011098964326812428, "loss": 0.7129, "step": 154700 }, { "epoch": 44.508055235903335, "grad_norm": 1.2252181768417358, "learning_rate": 0.0011098388952819334, "loss": 0.6754, "step": 154710 }, { "epoch": 44.51093210586882, "grad_norm": 1.6602087020874023, "learning_rate": 0.0011097813578826237, "loss": 0.6261, "step": 154720 }, { "epoch": 44.513808975834294, "grad_norm": 1.3937677145004272, "learning_rate": 0.0011097238204833143, "loss": 0.5768, "step": 154730 }, { "epoch": 44.51668584579977, "grad_norm": 0.7208621501922607, "learning_rate": 0.0011096662830840046, "loss": 0.4886, "step": 154740 }, { "epoch": 44.51956271576525, "grad_norm": 0.6176857948303223, "learning_rate": 0.001109608745684695, "loss": 0.6201, "step": 154750 }, { "epoch": 44.52243958573072, "grad_norm": 0.7625857591629028, "learning_rate": 0.0011095512082853855, "loss": 0.4238, "step": 154760 }, { "epoch": 44.5253164556962, "grad_norm": 2.18306827545166, "learning_rate": 0.0011094936708860759, "loss": 0.6984, "step": 154770 }, { "epoch": 44.52819332566168, "grad_norm": 1.3750907182693481, "learning_rate": 0.0011094361334867664, "loss": 0.4748, "step": 154780 }, { "epoch": 44.53107019562716, "grad_norm": 0.7987792491912842, "learning_rate": 0.001109378596087457, "loss": 0.5059, "step": 154790 }, { "epoch": 44.533947065592635, "grad_norm": 0.9478512406349182, "learning_rate": 0.0011093210586881474, "loss": 0.5029, "step": 154800 }, { "epoch": 44.53682393555811, "grad_norm": 1.3006747961044312, "learning_rate": 0.0011092635212888377, "loss": 0.6742, "step": 154810 }, { "epoch": 44.53970080552359, "grad_norm": 0.7990760207176208, "learning_rate": 0.0011092059838895283, "loss": 0.4654, "step": 154820 }, { "epoch": 44.54257767548907, "grad_norm": 0.8654599785804749, "learning_rate": 0.0011091484464902186, "loss": 0.6235, "step": 154830 }, { "epoch": 44.54545454545455, "grad_norm": 0.9836347103118896, "learning_rate": 0.0011090909090909092, "loss": 0.554, "step": 154840 }, { "epoch": 44.54833141542002, "grad_norm": 0.8128916025161743, "learning_rate": 0.0011090333716915995, "loss": 0.6319, "step": 154850 }, { "epoch": 44.5512082853855, "grad_norm": 1.127228021621704, "learning_rate": 0.00110897583429229, "loss": 0.507, "step": 154860 }, { "epoch": 44.554085155350975, "grad_norm": 1.04009211063385, "learning_rate": 0.0011089182968929804, "loss": 0.6659, "step": 154870 }, { "epoch": 44.55696202531646, "grad_norm": 1.669897198677063, "learning_rate": 0.0011088607594936708, "loss": 0.7104, "step": 154880 }, { "epoch": 44.559838895281935, "grad_norm": 1.7479817867279053, "learning_rate": 0.0011088032220943614, "loss": 0.5541, "step": 154890 }, { "epoch": 44.56271576524741, "grad_norm": 1.3437483310699463, "learning_rate": 0.001108745684695052, "loss": 0.5841, "step": 154900 }, { "epoch": 44.56559263521289, "grad_norm": 1.643496036529541, "learning_rate": 0.0011086881472957423, "loss": 0.5847, "step": 154910 }, { "epoch": 44.56846950517836, "grad_norm": 1.6385884284973145, "learning_rate": 0.0011086306098964328, "loss": 0.4856, "step": 154920 }, { "epoch": 44.57134637514385, "grad_norm": 2.275994300842285, "learning_rate": 0.0011085730724971232, "loss": 0.725, "step": 154930 }, { "epoch": 44.57422324510932, "grad_norm": 1.6000105142593384, "learning_rate": 0.0011085155350978135, "loss": 0.5373, "step": 154940 }, { "epoch": 44.5771001150748, "grad_norm": 0.9519878625869751, "learning_rate": 0.001108457997698504, "loss": 0.6155, "step": 154950 }, { "epoch": 44.579976985040275, "grad_norm": 1.5991450548171997, "learning_rate": 0.0011084004602991944, "loss": 0.559, "step": 154960 }, { "epoch": 44.58285385500575, "grad_norm": 1.5861588716506958, "learning_rate": 0.001108342922899885, "loss": 0.4682, "step": 154970 }, { "epoch": 44.58573072497123, "grad_norm": 0.9409889578819275, "learning_rate": 0.0011082853855005753, "loss": 0.6212, "step": 154980 }, { "epoch": 44.58860759493671, "grad_norm": 1.5628242492675781, "learning_rate": 0.0011082278481012657, "loss": 0.5137, "step": 154990 }, { "epoch": 44.59148446490219, "grad_norm": 1.0430964231491089, "learning_rate": 0.0011081703107019563, "loss": 0.5493, "step": 155000 }, { "epoch": 44.59436133486766, "grad_norm": 3.981281042098999, "learning_rate": 0.0011081127733026468, "loss": 0.5753, "step": 155010 }, { "epoch": 44.59723820483314, "grad_norm": 0.6151972413063049, "learning_rate": 0.0011080552359033372, "loss": 0.6261, "step": 155020 }, { "epoch": 44.600115074798616, "grad_norm": 2.1042263507843018, "learning_rate": 0.0011079976985040277, "loss": 0.6045, "step": 155030 }, { "epoch": 44.6029919447641, "grad_norm": 1.691383957862854, "learning_rate": 0.001107940161104718, "loss": 0.4467, "step": 155040 }, { "epoch": 44.605868814729575, "grad_norm": 1.1264050006866455, "learning_rate": 0.0011078826237054084, "loss": 0.6068, "step": 155050 }, { "epoch": 44.60874568469505, "grad_norm": 0.821373462677002, "learning_rate": 0.001107825086306099, "loss": 0.608, "step": 155060 }, { "epoch": 44.61162255466053, "grad_norm": 0.8213144540786743, "learning_rate": 0.0011077675489067895, "loss": 0.5013, "step": 155070 }, { "epoch": 44.614499424626004, "grad_norm": 1.715104103088379, "learning_rate": 0.00110771001150748, "loss": 0.5726, "step": 155080 }, { "epoch": 44.61737629459149, "grad_norm": 1.494614839553833, "learning_rate": 0.0011076524741081705, "loss": 0.4867, "step": 155090 }, { "epoch": 44.620253164556964, "grad_norm": 1.185076355934143, "learning_rate": 0.0011075949367088606, "loss": 0.6304, "step": 155100 }, { "epoch": 44.62313003452244, "grad_norm": 0.7615221738815308, "learning_rate": 0.0011075373993095512, "loss": 0.5071, "step": 155110 }, { "epoch": 44.626006904487916, "grad_norm": 3.0261988639831543, "learning_rate": 0.0011074798619102417, "loss": 0.6696, "step": 155120 }, { "epoch": 44.62888377445339, "grad_norm": 0.9317981600761414, "learning_rate": 0.001107422324510932, "loss": 0.477, "step": 155130 }, { "epoch": 44.631760644418875, "grad_norm": 0.952157199382782, "learning_rate": 0.0011073647871116226, "loss": 0.7179, "step": 155140 }, { "epoch": 44.63463751438435, "grad_norm": 1.0522758960723877, "learning_rate": 0.0011073072497123132, "loss": 0.6314, "step": 155150 }, { "epoch": 44.63751438434983, "grad_norm": 1.0413175821304321, "learning_rate": 0.0011072497123130033, "loss": 0.5615, "step": 155160 }, { "epoch": 44.640391254315304, "grad_norm": 1.249868631362915, "learning_rate": 0.0011071921749136939, "loss": 0.5988, "step": 155170 }, { "epoch": 44.64326812428078, "grad_norm": 2.425837993621826, "learning_rate": 0.0011071346375143845, "loss": 0.7121, "step": 155180 }, { "epoch": 44.64614499424626, "grad_norm": 1.3333561420440674, "learning_rate": 0.0011070771001150748, "loss": 0.5939, "step": 155190 }, { "epoch": 44.64902186421174, "grad_norm": 1.6052337884902954, "learning_rate": 0.0011070195627157654, "loss": 0.5867, "step": 155200 }, { "epoch": 44.651898734177216, "grad_norm": 2.576925039291382, "learning_rate": 0.0011069620253164557, "loss": 0.6303, "step": 155210 }, { "epoch": 44.65477560414269, "grad_norm": 2.068369150161743, "learning_rate": 0.001106904487917146, "loss": 0.5056, "step": 155220 }, { "epoch": 44.65765247410817, "grad_norm": 1.908273696899414, "learning_rate": 0.0011068469505178366, "loss": 0.5387, "step": 155230 }, { "epoch": 44.660529344073645, "grad_norm": 1.145410418510437, "learning_rate": 0.001106789413118527, "loss": 0.6043, "step": 155240 }, { "epoch": 44.66340621403913, "grad_norm": 0.7294275760650635, "learning_rate": 0.0011067318757192175, "loss": 0.5728, "step": 155250 }, { "epoch": 44.666283084004604, "grad_norm": 0.9119346141815186, "learning_rate": 0.001106674338319908, "loss": 0.6122, "step": 155260 }, { "epoch": 44.66915995397008, "grad_norm": 0.8910244703292847, "learning_rate": 0.0011066168009205984, "loss": 0.5929, "step": 155270 }, { "epoch": 44.67203682393556, "grad_norm": 2.5617480278015137, "learning_rate": 0.0011065592635212888, "loss": 0.6381, "step": 155280 }, { "epoch": 44.67491369390103, "grad_norm": 1.307166576385498, "learning_rate": 0.0011065017261219794, "loss": 0.5009, "step": 155290 }, { "epoch": 44.677790563866516, "grad_norm": 0.6485257744789124, "learning_rate": 0.0011064441887226697, "loss": 0.4978, "step": 155300 }, { "epoch": 44.68066743383199, "grad_norm": 1.6715693473815918, "learning_rate": 0.0011063866513233603, "loss": 0.5636, "step": 155310 }, { "epoch": 44.68354430379747, "grad_norm": 1.1703380346298218, "learning_rate": 0.0011063291139240506, "loss": 0.5237, "step": 155320 }, { "epoch": 44.686421173762945, "grad_norm": 1.4134368896484375, "learning_rate": 0.0011062715765247412, "loss": 0.6113, "step": 155330 }, { "epoch": 44.68929804372842, "grad_norm": 0.9737533330917358, "learning_rate": 0.0011062140391254315, "loss": 0.6064, "step": 155340 }, { "epoch": 44.692174913693904, "grad_norm": 1.528387427330017, "learning_rate": 0.0011061565017261219, "loss": 0.5676, "step": 155350 }, { "epoch": 44.69505178365938, "grad_norm": 1.11653470993042, "learning_rate": 0.0011060989643268124, "loss": 0.6646, "step": 155360 }, { "epoch": 44.69792865362486, "grad_norm": 1.7799673080444336, "learning_rate": 0.001106041426927503, "loss": 0.4957, "step": 155370 }, { "epoch": 44.70080552359033, "grad_norm": 1.1429117918014526, "learning_rate": 0.0011059838895281933, "loss": 0.6445, "step": 155380 }, { "epoch": 44.70368239355581, "grad_norm": 2.79363751411438, "learning_rate": 0.001105926352128884, "loss": 0.796, "step": 155390 }, { "epoch": 44.70655926352129, "grad_norm": 1.2173981666564941, "learning_rate": 0.0011058688147295743, "loss": 0.6692, "step": 155400 }, { "epoch": 44.70943613348677, "grad_norm": 1.2338510751724243, "learning_rate": 0.0011058112773302646, "loss": 0.597, "step": 155410 }, { "epoch": 44.712313003452245, "grad_norm": 1.0799360275268555, "learning_rate": 0.0011057537399309552, "loss": 0.6169, "step": 155420 }, { "epoch": 44.71518987341772, "grad_norm": 0.6742135286331177, "learning_rate": 0.0011056962025316455, "loss": 0.4807, "step": 155430 }, { "epoch": 44.7180667433832, "grad_norm": 1.1417304277420044, "learning_rate": 0.001105638665132336, "loss": 0.5699, "step": 155440 }, { "epoch": 44.72094361334867, "grad_norm": 1.2899823188781738, "learning_rate": 0.0011055811277330266, "loss": 0.6458, "step": 155450 }, { "epoch": 44.72382048331416, "grad_norm": 1.1316670179367065, "learning_rate": 0.0011055235903337168, "loss": 0.6096, "step": 155460 }, { "epoch": 44.72669735327963, "grad_norm": 1.7805941104888916, "learning_rate": 0.0011054660529344073, "loss": 0.6406, "step": 155470 }, { "epoch": 44.72957422324511, "grad_norm": 0.7411304116249084, "learning_rate": 0.001105408515535098, "loss": 0.6591, "step": 155480 }, { "epoch": 44.732451093210585, "grad_norm": 1.163561224937439, "learning_rate": 0.0011053509781357882, "loss": 0.5899, "step": 155490 }, { "epoch": 44.73532796317606, "grad_norm": 2.3373496532440186, "learning_rate": 0.0011052934407364788, "loss": 0.6599, "step": 155500 }, { "epoch": 44.738204833141545, "grad_norm": 0.9784538149833679, "learning_rate": 0.0011052359033371694, "loss": 0.5952, "step": 155510 }, { "epoch": 44.74108170310702, "grad_norm": 1.4820433855056763, "learning_rate": 0.0011051783659378595, "loss": 0.5486, "step": 155520 }, { "epoch": 44.7439585730725, "grad_norm": 1.2865052223205566, "learning_rate": 0.00110512082853855, "loss": 0.6343, "step": 155530 }, { "epoch": 44.74683544303797, "grad_norm": 0.6617619395256042, "learning_rate": 0.0011050632911392404, "loss": 0.6237, "step": 155540 }, { "epoch": 44.74971231300345, "grad_norm": 1.0523213148117065, "learning_rate": 0.001105005753739931, "loss": 0.5212, "step": 155550 }, { "epoch": 44.75258918296893, "grad_norm": 2.6722567081451416, "learning_rate": 0.0011049482163406215, "loss": 0.6207, "step": 155560 }, { "epoch": 44.75546605293441, "grad_norm": 0.8521923422813416, "learning_rate": 0.0011048906789413119, "loss": 0.6248, "step": 155570 }, { "epoch": 44.758342922899885, "grad_norm": 1.265159249305725, "learning_rate": 0.0011048331415420022, "loss": 0.6389, "step": 155580 }, { "epoch": 44.76121979286536, "grad_norm": 1.9092864990234375, "learning_rate": 0.0011047756041426928, "loss": 0.5331, "step": 155590 }, { "epoch": 44.76409666283084, "grad_norm": 1.6504031419754028, "learning_rate": 0.0011047180667433831, "loss": 0.5168, "step": 155600 }, { "epoch": 44.76697353279632, "grad_norm": 1.7633061408996582, "learning_rate": 0.0011046605293440737, "loss": 0.5792, "step": 155610 }, { "epoch": 44.7698504027618, "grad_norm": 1.350838541984558, "learning_rate": 0.0011046029919447643, "loss": 0.6046, "step": 155620 }, { "epoch": 44.77272727272727, "grad_norm": 0.9124739766120911, "learning_rate": 0.0011045454545454546, "loss": 0.7077, "step": 155630 }, { "epoch": 44.77560414269275, "grad_norm": 2.225987195968628, "learning_rate": 0.001104487917146145, "loss": 0.7421, "step": 155640 }, { "epoch": 44.778481012658226, "grad_norm": 1.0008397102355957, "learning_rate": 0.0011044303797468353, "loss": 0.7102, "step": 155650 }, { "epoch": 44.7813578826237, "grad_norm": 1.426232099533081, "learning_rate": 0.0011043728423475259, "loss": 0.6414, "step": 155660 }, { "epoch": 44.784234752589185, "grad_norm": 1.4855281114578247, "learning_rate": 0.0011043153049482164, "loss": 0.5607, "step": 155670 }, { "epoch": 44.78711162255466, "grad_norm": 2.4619665145874023, "learning_rate": 0.0011042577675489068, "loss": 0.7134, "step": 155680 }, { "epoch": 44.78998849252014, "grad_norm": 1.9515856504440308, "learning_rate": 0.0011042002301495974, "loss": 0.5956, "step": 155690 }, { "epoch": 44.792865362485614, "grad_norm": 2.4890005588531494, "learning_rate": 0.0011041426927502877, "loss": 0.5563, "step": 155700 }, { "epoch": 44.79574223245109, "grad_norm": 0.7906855344772339, "learning_rate": 0.001104085155350978, "loss": 0.6465, "step": 155710 }, { "epoch": 44.79861910241657, "grad_norm": 1.5232020616531372, "learning_rate": 0.0011040276179516686, "loss": 0.6787, "step": 155720 }, { "epoch": 44.80149597238205, "grad_norm": 1.1055350303649902, "learning_rate": 0.0011039700805523592, "loss": 0.5705, "step": 155730 }, { "epoch": 44.804372842347526, "grad_norm": 0.9279105067253113, "learning_rate": 0.0011039125431530495, "loss": 0.6134, "step": 155740 }, { "epoch": 44.807249712313, "grad_norm": 1.2797430753707886, "learning_rate": 0.00110385500575374, "loss": 0.6198, "step": 155750 }, { "epoch": 44.81012658227848, "grad_norm": 0.9051334857940674, "learning_rate": 0.0011037974683544304, "loss": 0.5369, "step": 155760 }, { "epoch": 44.81300345224396, "grad_norm": 1.225466012954712, "learning_rate": 0.0011037399309551208, "loss": 0.6049, "step": 155770 }, { "epoch": 44.81588032220944, "grad_norm": 3.9546079635620117, "learning_rate": 0.0011036823935558113, "loss": 0.5896, "step": 155780 }, { "epoch": 44.818757192174914, "grad_norm": 0.540328323841095, "learning_rate": 0.0011036248561565017, "loss": 0.594, "step": 155790 }, { "epoch": 44.82163406214039, "grad_norm": 1.225862741470337, "learning_rate": 0.0011035673187571923, "loss": 0.7, "step": 155800 }, { "epoch": 44.824510932105866, "grad_norm": 1.2587611675262451, "learning_rate": 0.0011035097813578826, "loss": 0.566, "step": 155810 }, { "epoch": 44.82738780207135, "grad_norm": 1.4242775440216064, "learning_rate": 0.001103452243958573, "loss": 0.6117, "step": 155820 }, { "epoch": 44.830264672036826, "grad_norm": 1.7203353643417358, "learning_rate": 0.0011033947065592635, "loss": 0.605, "step": 155830 }, { "epoch": 44.8331415420023, "grad_norm": 2.108616828918457, "learning_rate": 0.001103337169159954, "loss": 0.8089, "step": 155840 }, { "epoch": 44.83601841196778, "grad_norm": 1.1214333772659302, "learning_rate": 0.0011032796317606444, "loss": 0.7735, "step": 155850 }, { "epoch": 44.838895281933254, "grad_norm": 1.0675716400146484, "learning_rate": 0.001103222094361335, "loss": 0.4636, "step": 155860 }, { "epoch": 44.84177215189874, "grad_norm": 1.0970933437347412, "learning_rate": 0.0011031645569620253, "loss": 0.6362, "step": 155870 }, { "epoch": 44.844649021864214, "grad_norm": 1.2821564674377441, "learning_rate": 0.0011031070195627157, "loss": 0.5265, "step": 155880 }, { "epoch": 44.84752589182969, "grad_norm": 1.1312788724899292, "learning_rate": 0.0011030494821634062, "loss": 0.5786, "step": 155890 }, { "epoch": 44.850402761795166, "grad_norm": 1.7211241722106934, "learning_rate": 0.0011029919447640966, "loss": 0.5634, "step": 155900 }, { "epoch": 44.85327963176064, "grad_norm": 1.6847541332244873, "learning_rate": 0.0011029344073647872, "loss": 0.6106, "step": 155910 }, { "epoch": 44.85615650172612, "grad_norm": 1.061963677406311, "learning_rate": 0.0011028768699654777, "loss": 0.5262, "step": 155920 }, { "epoch": 44.8590333716916, "grad_norm": 0.8827042579650879, "learning_rate": 0.0011028193325661679, "loss": 0.558, "step": 155930 }, { "epoch": 44.86191024165708, "grad_norm": 1.7878957986831665, "learning_rate": 0.0011027617951668584, "loss": 0.5975, "step": 155940 }, { "epoch": 44.864787111622555, "grad_norm": 1.0225938558578491, "learning_rate": 0.001102704257767549, "loss": 0.5815, "step": 155950 }, { "epoch": 44.86766398158803, "grad_norm": 1.4805344343185425, "learning_rate": 0.0011026467203682393, "loss": 0.458, "step": 155960 }, { "epoch": 44.87054085155351, "grad_norm": 1.2256804704666138, "learning_rate": 0.00110258918296893, "loss": 0.5412, "step": 155970 }, { "epoch": 44.87341772151899, "grad_norm": 1.7223763465881348, "learning_rate": 0.0011025316455696205, "loss": 0.4692, "step": 155980 }, { "epoch": 44.876294591484466, "grad_norm": 2.1000161170959473, "learning_rate": 0.0011024741081703106, "loss": 0.4916, "step": 155990 }, { "epoch": 44.87917146144994, "grad_norm": 1.2852630615234375, "learning_rate": 0.0011024165707710012, "loss": 0.7428, "step": 156000 }, { "epoch": 44.88204833141542, "grad_norm": 1.1531798839569092, "learning_rate": 0.0011023590333716915, "loss": 0.6065, "step": 156010 }, { "epoch": 44.884925201380895, "grad_norm": 1.722399115562439, "learning_rate": 0.001102301495972382, "loss": 0.7967, "step": 156020 }, { "epoch": 44.88780207134638, "grad_norm": 0.6868346929550171, "learning_rate": 0.0011022439585730726, "loss": 0.5256, "step": 156030 }, { "epoch": 44.890678941311855, "grad_norm": 1.8034563064575195, "learning_rate": 0.001102186421173763, "loss": 0.5394, "step": 156040 }, { "epoch": 44.89355581127733, "grad_norm": 1.1515333652496338, "learning_rate": 0.0011021288837744533, "loss": 0.57, "step": 156050 }, { "epoch": 44.89643268124281, "grad_norm": 1.3928086757659912, "learning_rate": 0.0011020713463751439, "loss": 0.6144, "step": 156060 }, { "epoch": 44.89930955120828, "grad_norm": 0.791222870349884, "learning_rate": 0.0011020138089758342, "loss": 0.502, "step": 156070 }, { "epoch": 44.90218642117377, "grad_norm": 3.7241735458374023, "learning_rate": 0.0011019562715765248, "loss": 0.6872, "step": 156080 }, { "epoch": 44.90506329113924, "grad_norm": 1.350777506828308, "learning_rate": 0.0011018987341772154, "loss": 0.5855, "step": 156090 }, { "epoch": 44.90794016110472, "grad_norm": 1.9701956510543823, "learning_rate": 0.0011018411967779057, "loss": 0.6417, "step": 156100 }, { "epoch": 44.910817031070195, "grad_norm": 1.106729507446289, "learning_rate": 0.001101783659378596, "loss": 0.5134, "step": 156110 }, { "epoch": 44.91369390103567, "grad_norm": 1.5727200508117676, "learning_rate": 0.0011017261219792864, "loss": 0.606, "step": 156120 }, { "epoch": 44.91657077100115, "grad_norm": 0.7421380877494812, "learning_rate": 0.001101668584579977, "loss": 0.5556, "step": 156130 }, { "epoch": 44.91944764096663, "grad_norm": 1.0762900114059448, "learning_rate": 0.0011016110471806675, "loss": 0.6153, "step": 156140 }, { "epoch": 44.92232451093211, "grad_norm": 1.5315744876861572, "learning_rate": 0.0011015535097813579, "loss": 0.6628, "step": 156150 }, { "epoch": 44.92520138089758, "grad_norm": 0.9183085560798645, "learning_rate": 0.0011014959723820484, "loss": 0.5193, "step": 156160 }, { "epoch": 44.92807825086306, "grad_norm": 1.009008526802063, "learning_rate": 0.0011014384349827388, "loss": 0.6439, "step": 156170 }, { "epoch": 44.930955120828536, "grad_norm": 1.0021394491195679, "learning_rate": 0.0011013808975834291, "loss": 0.5097, "step": 156180 }, { "epoch": 44.93383199079402, "grad_norm": 1.8017759323120117, "learning_rate": 0.0011013233601841197, "loss": 0.5384, "step": 156190 }, { "epoch": 44.936708860759495, "grad_norm": 0.8992404937744141, "learning_rate": 0.0011012658227848103, "loss": 0.5797, "step": 156200 }, { "epoch": 44.93958573072497, "grad_norm": 2.4620895385742188, "learning_rate": 0.0011012082853855006, "loss": 0.7053, "step": 156210 }, { "epoch": 44.94246260069045, "grad_norm": 0.9383994936943054, "learning_rate": 0.0011011507479861912, "loss": 0.5265, "step": 156220 }, { "epoch": 44.945339470655924, "grad_norm": 1.863356590270996, "learning_rate": 0.0011010932105868813, "loss": 0.6804, "step": 156230 }, { "epoch": 44.94821634062141, "grad_norm": 1.1906468868255615, "learning_rate": 0.0011010356731875719, "loss": 0.5642, "step": 156240 }, { "epoch": 44.95109321058688, "grad_norm": 1.1584656238555908, "learning_rate": 0.0011009781357882624, "loss": 0.5692, "step": 156250 }, { "epoch": 44.95397008055236, "grad_norm": 1.5569666624069214, "learning_rate": 0.0011009205983889528, "loss": 0.5455, "step": 156260 }, { "epoch": 44.956846950517836, "grad_norm": 1.5495316982269287, "learning_rate": 0.0011008630609896433, "loss": 0.7632, "step": 156270 }, { "epoch": 44.95972382048331, "grad_norm": 0.8915393948554993, "learning_rate": 0.001100805523590334, "loss": 0.5537, "step": 156280 }, { "epoch": 44.962600690448795, "grad_norm": 2.1203339099884033, "learning_rate": 0.001100747986191024, "loss": 0.5667, "step": 156290 }, { "epoch": 44.96547756041427, "grad_norm": 0.5772513151168823, "learning_rate": 0.0011006904487917146, "loss": 0.6594, "step": 156300 }, { "epoch": 44.96835443037975, "grad_norm": 0.8580964207649231, "learning_rate": 0.0011006329113924052, "loss": 0.4901, "step": 156310 }, { "epoch": 44.971231300345224, "grad_norm": 1.414507269859314, "learning_rate": 0.0011005753739930955, "loss": 0.5831, "step": 156320 }, { "epoch": 44.9741081703107, "grad_norm": 1.5224229097366333, "learning_rate": 0.001100517836593786, "loss": 0.6165, "step": 156330 }, { "epoch": 44.976985040276176, "grad_norm": 1.0589295625686646, "learning_rate": 0.0011004602991944764, "loss": 0.6841, "step": 156340 }, { "epoch": 44.97986191024166, "grad_norm": 0.7753192782402039, "learning_rate": 0.0011004027617951668, "loss": 0.5753, "step": 156350 }, { "epoch": 44.982738780207136, "grad_norm": 0.500852108001709, "learning_rate": 0.0011003452243958573, "loss": 0.5339, "step": 156360 }, { "epoch": 44.98561565017261, "grad_norm": 1.4580378532409668, "learning_rate": 0.0011002876869965477, "loss": 0.6537, "step": 156370 }, { "epoch": 44.98849252013809, "grad_norm": 2.691053867340088, "learning_rate": 0.0011002301495972382, "loss": 0.6556, "step": 156380 }, { "epoch": 44.991369390103564, "grad_norm": 0.8721194267272949, "learning_rate": 0.0011001726121979288, "loss": 0.4238, "step": 156390 }, { "epoch": 44.99424626006905, "grad_norm": 0.887243390083313, "learning_rate": 0.0011001150747986192, "loss": 0.5294, "step": 156400 }, { "epoch": 44.997123130034524, "grad_norm": 1.3037817478179932, "learning_rate": 0.0011000575373993095, "loss": 0.6788, "step": 156410 }, { "epoch": 45.0, "grad_norm": 1.4970988035202026, "learning_rate": 0.0011, "loss": 0.6463, "step": 156420 }, { "epoch": 45.002876869965476, "grad_norm": 0.6809529662132263, "learning_rate": 0.0010999424626006904, "loss": 0.6161, "step": 156430 }, { "epoch": 45.00575373993095, "grad_norm": 1.6093099117279053, "learning_rate": 0.001099884925201381, "loss": 0.6257, "step": 156440 }, { "epoch": 45.008630609896436, "grad_norm": 1.8905620574951172, "learning_rate": 0.0010998273878020715, "loss": 0.5085, "step": 156450 }, { "epoch": 45.01150747986191, "grad_norm": 1.0268876552581787, "learning_rate": 0.0010997698504027619, "loss": 0.4984, "step": 156460 }, { "epoch": 45.01438434982739, "grad_norm": 1.6728688478469849, "learning_rate": 0.0010997123130034522, "loss": 0.4994, "step": 156470 }, { "epoch": 45.017261219792864, "grad_norm": 1.0824482440948486, "learning_rate": 0.0010996547756041426, "loss": 0.5753, "step": 156480 }, { "epoch": 45.02013808975834, "grad_norm": 1.165934443473816, "learning_rate": 0.0010995972382048331, "loss": 0.5498, "step": 156490 }, { "epoch": 45.023014959723824, "grad_norm": 1.1535158157348633, "learning_rate": 0.0010995397008055237, "loss": 0.6509, "step": 156500 }, { "epoch": 45.0258918296893, "grad_norm": 0.9402276873588562, "learning_rate": 0.001099482163406214, "loss": 0.5769, "step": 156510 }, { "epoch": 45.028768699654776, "grad_norm": 1.8738727569580078, "learning_rate": 0.0010994246260069046, "loss": 0.6433, "step": 156520 }, { "epoch": 45.03164556962025, "grad_norm": 1.6265658140182495, "learning_rate": 0.001099367088607595, "loss": 0.6254, "step": 156530 }, { "epoch": 45.03452243958573, "grad_norm": 1.3532723188400269, "learning_rate": 0.0010993095512082853, "loss": 0.4391, "step": 156540 }, { "epoch": 45.037399309551205, "grad_norm": 2.053679943084717, "learning_rate": 0.0010992520138089759, "loss": 0.5849, "step": 156550 }, { "epoch": 45.04027617951669, "grad_norm": 1.49441659450531, "learning_rate": 0.0010991944764096664, "loss": 0.5666, "step": 156560 }, { "epoch": 45.043153049482164, "grad_norm": 1.8538401126861572, "learning_rate": 0.0010991369390103568, "loss": 0.5077, "step": 156570 }, { "epoch": 45.04602991944764, "grad_norm": 1.902204155921936, "learning_rate": 0.0010990794016110474, "loss": 0.5991, "step": 156580 }, { "epoch": 45.04890678941312, "grad_norm": 1.1903154850006104, "learning_rate": 0.0010990218642117375, "loss": 0.535, "step": 156590 }, { "epoch": 45.05178365937859, "grad_norm": 1.4531399011611938, "learning_rate": 0.001098964326812428, "loss": 0.5004, "step": 156600 }, { "epoch": 45.054660529344076, "grad_norm": 0.8176640868186951, "learning_rate": 0.0010989067894131186, "loss": 0.4406, "step": 156610 }, { "epoch": 45.05753739930955, "grad_norm": 1.15321946144104, "learning_rate": 0.001098849252013809, "loss": 0.6039, "step": 156620 }, { "epoch": 45.06041426927503, "grad_norm": 1.6690231561660767, "learning_rate": 0.0010987917146144995, "loss": 0.4609, "step": 156630 }, { "epoch": 45.063291139240505, "grad_norm": 1.510160207748413, "learning_rate": 0.0010987341772151899, "loss": 0.5208, "step": 156640 }, { "epoch": 45.06616800920598, "grad_norm": 1.12086021900177, "learning_rate": 0.0010986766398158802, "loss": 0.695, "step": 156650 }, { "epoch": 45.069044879171464, "grad_norm": 1.1332814693450928, "learning_rate": 0.0010986191024165708, "loss": 0.7911, "step": 156660 }, { "epoch": 45.07192174913694, "grad_norm": 1.319129467010498, "learning_rate": 0.0010985615650172613, "loss": 0.4377, "step": 156670 }, { "epoch": 45.07479861910242, "grad_norm": 1.1068992614746094, "learning_rate": 0.0010985040276179517, "loss": 0.5015, "step": 156680 }, { "epoch": 45.07767548906789, "grad_norm": 2.3776817321777344, "learning_rate": 0.0010984464902186423, "loss": 0.5165, "step": 156690 }, { "epoch": 45.08055235903337, "grad_norm": 1.8851391077041626, "learning_rate": 0.0010983889528193324, "loss": 0.539, "step": 156700 }, { "epoch": 45.08342922899885, "grad_norm": 1.5321252346038818, "learning_rate": 0.001098331415420023, "loss": 0.4821, "step": 156710 }, { "epoch": 45.08630609896433, "grad_norm": 1.5535831451416016, "learning_rate": 0.0010982738780207135, "loss": 0.6837, "step": 156720 }, { "epoch": 45.089182968929805, "grad_norm": 1.0766949653625488, "learning_rate": 0.0010982163406214039, "loss": 0.6421, "step": 156730 }, { "epoch": 45.09205983889528, "grad_norm": 1.2453380823135376, "learning_rate": 0.0010981588032220944, "loss": 0.6127, "step": 156740 }, { "epoch": 45.09493670886076, "grad_norm": 0.5021914839744568, "learning_rate": 0.001098101265822785, "loss": 0.5245, "step": 156750 }, { "epoch": 45.09781357882623, "grad_norm": 1.4086265563964844, "learning_rate": 0.0010980437284234751, "loss": 0.5569, "step": 156760 }, { "epoch": 45.10069044879172, "grad_norm": 0.9775968194007874, "learning_rate": 0.0010979861910241657, "loss": 0.4603, "step": 156770 }, { "epoch": 45.10356731875719, "grad_norm": 1.4413789510726929, "learning_rate": 0.0010979286536248562, "loss": 0.6027, "step": 156780 }, { "epoch": 45.10644418872267, "grad_norm": 1.8493752479553223, "learning_rate": 0.0010978711162255466, "loss": 0.5631, "step": 156790 }, { "epoch": 45.109321058688145, "grad_norm": 0.8471063375473022, "learning_rate": 0.0010978135788262372, "loss": 0.4279, "step": 156800 }, { "epoch": 45.11219792865362, "grad_norm": 1.248145580291748, "learning_rate": 0.0010977560414269275, "loss": 0.4907, "step": 156810 }, { "epoch": 45.115074798619105, "grad_norm": 0.7450137734413147, "learning_rate": 0.0010976985040276179, "loss": 0.5294, "step": 156820 }, { "epoch": 45.11795166858458, "grad_norm": 2.3824121952056885, "learning_rate": 0.0010976409666283084, "loss": 0.5785, "step": 156830 }, { "epoch": 45.12082853855006, "grad_norm": 1.664893388748169, "learning_rate": 0.0010975834292289988, "loss": 0.4286, "step": 156840 }, { "epoch": 45.123705408515534, "grad_norm": 1.689658761024475, "learning_rate": 0.0010975258918296893, "loss": 0.5351, "step": 156850 }, { "epoch": 45.12658227848101, "grad_norm": 1.1609548330307007, "learning_rate": 0.0010974683544303799, "loss": 0.6451, "step": 156860 }, { "epoch": 45.12945914844649, "grad_norm": 1.5488898754119873, "learning_rate": 0.0010974108170310702, "loss": 0.5078, "step": 156870 }, { "epoch": 45.13233601841197, "grad_norm": 0.779960572719574, "learning_rate": 0.0010973532796317606, "loss": 0.5754, "step": 156880 }, { "epoch": 45.135212888377445, "grad_norm": 1.5351338386535645, "learning_rate": 0.0010972957422324511, "loss": 0.4258, "step": 156890 }, { "epoch": 45.13808975834292, "grad_norm": 1.3061436414718628, "learning_rate": 0.0010972382048331415, "loss": 0.488, "step": 156900 }, { "epoch": 45.1409666283084, "grad_norm": 1.075953483581543, "learning_rate": 0.001097180667433832, "loss": 0.542, "step": 156910 }, { "epoch": 45.14384349827388, "grad_norm": 2.26615047454834, "learning_rate": 0.0010971231300345224, "loss": 0.6842, "step": 156920 }, { "epoch": 45.14672036823936, "grad_norm": 2.0389585494995117, "learning_rate": 0.001097065592635213, "loss": 0.6346, "step": 156930 }, { "epoch": 45.149597238204834, "grad_norm": 1.415013074874878, "learning_rate": 0.0010970080552359033, "loss": 0.5329, "step": 156940 }, { "epoch": 45.15247410817031, "grad_norm": 1.0295356512069702, "learning_rate": 0.0010969505178365937, "loss": 0.5951, "step": 156950 }, { "epoch": 45.155350978135786, "grad_norm": 2.2707204818725586, "learning_rate": 0.0010968929804372842, "loss": 0.6288, "step": 156960 }, { "epoch": 45.15822784810127, "grad_norm": 1.3807742595672607, "learning_rate": 0.0010968354430379748, "loss": 0.4786, "step": 156970 }, { "epoch": 45.161104718066746, "grad_norm": 1.1282066106796265, "learning_rate": 0.0010967779056386651, "loss": 0.4474, "step": 156980 }, { "epoch": 45.16398158803222, "grad_norm": 1.5415385961532593, "learning_rate": 0.0010967203682393557, "loss": 0.5891, "step": 156990 }, { "epoch": 45.1668584579977, "grad_norm": 1.0294432640075684, "learning_rate": 0.001096662830840046, "loss": 0.472, "step": 157000 }, { "epoch": 45.169735327963174, "grad_norm": 1.15913987159729, "learning_rate": 0.0010966052934407364, "loss": 0.5805, "step": 157010 }, { "epoch": 45.17261219792865, "grad_norm": 1.3674461841583252, "learning_rate": 0.001096547756041427, "loss": 0.6599, "step": 157020 }, { "epoch": 45.175489067894134, "grad_norm": 1.8643503189086914, "learning_rate": 0.0010964902186421175, "loss": 0.6855, "step": 157030 }, { "epoch": 45.17836593785961, "grad_norm": 0.7017168402671814, "learning_rate": 0.0010964326812428079, "loss": 0.7068, "step": 157040 }, { "epoch": 45.181242807825086, "grad_norm": 1.6919752359390259, "learning_rate": 0.0010963751438434984, "loss": 0.538, "step": 157050 }, { "epoch": 45.18411967779056, "grad_norm": 1.766174077987671, "learning_rate": 0.0010963176064441886, "loss": 0.5198, "step": 157060 }, { "epoch": 45.18699654775604, "grad_norm": 1.4684089422225952, "learning_rate": 0.0010962600690448791, "loss": 0.6838, "step": 157070 }, { "epoch": 45.18987341772152, "grad_norm": 1.5566062927246094, "learning_rate": 0.0010962025316455697, "loss": 0.5326, "step": 157080 }, { "epoch": 45.192750287687, "grad_norm": 1.076975703239441, "learning_rate": 0.00109614499424626, "loss": 0.6998, "step": 157090 }, { "epoch": 45.195627157652474, "grad_norm": 1.0938093662261963, "learning_rate": 0.0010960874568469506, "loss": 0.5307, "step": 157100 }, { "epoch": 45.19850402761795, "grad_norm": 0.6617721319198608, "learning_rate": 0.0010960299194476412, "loss": 0.4484, "step": 157110 }, { "epoch": 45.20138089758343, "grad_norm": 1.780381202697754, "learning_rate": 0.0010959723820483313, "loss": 0.581, "step": 157120 }, { "epoch": 45.20425776754891, "grad_norm": 1.7947263717651367, "learning_rate": 0.0010959148446490219, "loss": 0.525, "step": 157130 }, { "epoch": 45.207134637514386, "grad_norm": 1.162933588027954, "learning_rate": 0.0010958573072497124, "loss": 0.4936, "step": 157140 }, { "epoch": 45.21001150747986, "grad_norm": 4.2409210205078125, "learning_rate": 0.0010957997698504028, "loss": 0.6853, "step": 157150 }, { "epoch": 45.21288837744534, "grad_norm": 1.0576879978179932, "learning_rate": 0.0010957422324510933, "loss": 0.5177, "step": 157160 }, { "epoch": 45.215765247410815, "grad_norm": 1.7668617963790894, "learning_rate": 0.0010956846950517837, "loss": 0.6002, "step": 157170 }, { "epoch": 45.2186421173763, "grad_norm": 1.7043755054473877, "learning_rate": 0.001095627157652474, "loss": 0.6213, "step": 157180 }, { "epoch": 45.221518987341774, "grad_norm": 1.2783366441726685, "learning_rate": 0.0010955696202531646, "loss": 0.6287, "step": 157190 }, { "epoch": 45.22439585730725, "grad_norm": 2.0377368927001953, "learning_rate": 0.001095512082853855, "loss": 0.6221, "step": 157200 }, { "epoch": 45.22727272727273, "grad_norm": 0.7672362327575684, "learning_rate": 0.0010954545454545455, "loss": 0.3852, "step": 157210 }, { "epoch": 45.2301495972382, "grad_norm": 1.1842882633209229, "learning_rate": 0.001095397008055236, "loss": 0.6577, "step": 157220 }, { "epoch": 45.23302646720368, "grad_norm": 0.9641808867454529, "learning_rate": 0.0010953394706559264, "loss": 0.4478, "step": 157230 }, { "epoch": 45.23590333716916, "grad_norm": 1.5333970785140991, "learning_rate": 0.0010952819332566168, "loss": 0.5792, "step": 157240 }, { "epoch": 45.23878020713464, "grad_norm": 0.9196968078613281, "learning_rate": 0.0010952243958573073, "loss": 0.5179, "step": 157250 }, { "epoch": 45.241657077100115, "grad_norm": 0.874958872795105, "learning_rate": 0.0010951668584579977, "loss": 0.6201, "step": 157260 }, { "epoch": 45.24453394706559, "grad_norm": 1.325085163116455, "learning_rate": 0.0010951093210586882, "loss": 0.6281, "step": 157270 }, { "epoch": 45.24741081703107, "grad_norm": 1.623045802116394, "learning_rate": 0.0010950517836593786, "loss": 0.6069, "step": 157280 }, { "epoch": 45.25028768699655, "grad_norm": 0.8711390495300293, "learning_rate": 0.0010949942462600692, "loss": 0.5339, "step": 157290 }, { "epoch": 45.25316455696203, "grad_norm": 1.1868749856948853, "learning_rate": 0.0010949367088607595, "loss": 0.5504, "step": 157300 }, { "epoch": 45.2560414269275, "grad_norm": 0.9415478706359863, "learning_rate": 0.0010948791714614498, "loss": 0.4775, "step": 157310 }, { "epoch": 45.25891829689298, "grad_norm": 0.9430540204048157, "learning_rate": 0.0010948216340621404, "loss": 0.5888, "step": 157320 }, { "epoch": 45.261795166858455, "grad_norm": 0.9291953444480896, "learning_rate": 0.001094764096662831, "loss": 0.5564, "step": 157330 }, { "epoch": 45.26467203682394, "grad_norm": 1.017980933189392, "learning_rate": 0.0010947065592635213, "loss": 0.4911, "step": 157340 }, { "epoch": 45.267548906789415, "grad_norm": 1.696380615234375, "learning_rate": 0.0010946490218642119, "loss": 0.5349, "step": 157350 }, { "epoch": 45.27042577675489, "grad_norm": 1.8956034183502197, "learning_rate": 0.0010945914844649022, "loss": 0.6894, "step": 157360 }, { "epoch": 45.27330264672037, "grad_norm": 1.1575963497161865, "learning_rate": 0.0010945339470655926, "loss": 0.5418, "step": 157370 }, { "epoch": 45.27617951668584, "grad_norm": 1.1810617446899414, "learning_rate": 0.0010944764096662831, "loss": 0.598, "step": 157380 }, { "epoch": 45.27905638665133, "grad_norm": 1.3220067024230957, "learning_rate": 0.0010944188722669735, "loss": 0.7591, "step": 157390 }, { "epoch": 45.2819332566168, "grad_norm": 1.045798897743225, "learning_rate": 0.001094361334867664, "loss": 0.5093, "step": 157400 }, { "epoch": 45.28481012658228, "grad_norm": 1.0589693784713745, "learning_rate": 0.0010943037974683546, "loss": 0.5208, "step": 157410 }, { "epoch": 45.287686996547755, "grad_norm": 1.44037663936615, "learning_rate": 0.0010942462600690447, "loss": 0.5633, "step": 157420 }, { "epoch": 45.29056386651323, "grad_norm": 1.2516300678253174, "learning_rate": 0.0010941887226697353, "loss": 0.658, "step": 157430 }, { "epoch": 45.29344073647871, "grad_norm": 0.7545686364173889, "learning_rate": 0.0010941311852704259, "loss": 0.4813, "step": 157440 }, { "epoch": 45.29631760644419, "grad_norm": 0.9399777054786682, "learning_rate": 0.0010940736478711162, "loss": 0.5082, "step": 157450 }, { "epoch": 45.29919447640967, "grad_norm": 1.5505287647247314, "learning_rate": 0.0010940161104718068, "loss": 0.6839, "step": 157460 }, { "epoch": 45.30207134637514, "grad_norm": 1.3034300804138184, "learning_rate": 0.0010939585730724971, "loss": 0.6538, "step": 157470 }, { "epoch": 45.30494821634062, "grad_norm": 1.5044435262680054, "learning_rate": 0.0010939010356731875, "loss": 0.5666, "step": 157480 }, { "epoch": 45.307825086306096, "grad_norm": 0.9123226404190063, "learning_rate": 0.001093843498273878, "loss": 0.4838, "step": 157490 }, { "epoch": 45.31070195627158, "grad_norm": 2.018357753753662, "learning_rate": 0.0010937859608745684, "loss": 0.5413, "step": 157500 }, { "epoch": 45.313578826237055, "grad_norm": 1.3229776620864868, "learning_rate": 0.001093728423475259, "loss": 0.6339, "step": 157510 }, { "epoch": 45.31645569620253, "grad_norm": 0.7483320236206055, "learning_rate": 0.0010936708860759495, "loss": 0.5948, "step": 157520 }, { "epoch": 45.31933256616801, "grad_norm": 1.4133377075195312, "learning_rate": 0.0010936133486766397, "loss": 0.5347, "step": 157530 }, { "epoch": 45.322209436133484, "grad_norm": 1.095980167388916, "learning_rate": 0.0010935558112773302, "loss": 0.4481, "step": 157540 }, { "epoch": 45.32508630609897, "grad_norm": 1.460664987564087, "learning_rate": 0.0010934982738780208, "loss": 0.6429, "step": 157550 }, { "epoch": 45.32796317606444, "grad_norm": 0.7892983555793762, "learning_rate": 0.0010934407364787111, "loss": 0.5537, "step": 157560 }, { "epoch": 45.33084004602992, "grad_norm": 1.7523311376571655, "learning_rate": 0.0010933831990794017, "loss": 0.6698, "step": 157570 }, { "epoch": 45.333716915995396, "grad_norm": 0.7833161354064941, "learning_rate": 0.0010933256616800923, "loss": 0.5717, "step": 157580 }, { "epoch": 45.33659378596087, "grad_norm": 0.8494453430175781, "learning_rate": 0.0010932681242807824, "loss": 0.4707, "step": 157590 }, { "epoch": 45.339470655926355, "grad_norm": 1.2551461458206177, "learning_rate": 0.001093210586881473, "loss": 0.5564, "step": 157600 }, { "epoch": 45.34234752589183, "grad_norm": 0.8182249665260315, "learning_rate": 0.0010931530494821633, "loss": 0.5342, "step": 157610 }, { "epoch": 45.34522439585731, "grad_norm": 1.0569947957992554, "learning_rate": 0.0010930955120828539, "loss": 0.4963, "step": 157620 }, { "epoch": 45.348101265822784, "grad_norm": 1.092234492301941, "learning_rate": 0.0010930379746835444, "loss": 0.6505, "step": 157630 }, { "epoch": 45.35097813578826, "grad_norm": 1.0970193147659302, "learning_rate": 0.0010929804372842348, "loss": 0.6983, "step": 157640 }, { "epoch": 45.353855005753736, "grad_norm": 0.9285383820533752, "learning_rate": 0.0010929228998849251, "loss": 0.5254, "step": 157650 }, { "epoch": 45.35673187571922, "grad_norm": 1.0163540840148926, "learning_rate": 0.0010928653624856157, "loss": 0.5652, "step": 157660 }, { "epoch": 45.359608745684696, "grad_norm": 0.6705255508422852, "learning_rate": 0.001092807825086306, "loss": 0.5204, "step": 157670 }, { "epoch": 45.36248561565017, "grad_norm": 0.7962871193885803, "learning_rate": 0.0010927502876869966, "loss": 0.5603, "step": 157680 }, { "epoch": 45.36536248561565, "grad_norm": 1.188016414642334, "learning_rate": 0.0010926927502876872, "loss": 0.6147, "step": 157690 }, { "epoch": 45.368239355581125, "grad_norm": 2.474534511566162, "learning_rate": 0.0010926352128883775, "loss": 0.619, "step": 157700 }, { "epoch": 45.37111622554661, "grad_norm": 1.3405476808547974, "learning_rate": 0.0010925776754890679, "loss": 0.6505, "step": 157710 }, { "epoch": 45.373993095512084, "grad_norm": 1.8080635070800781, "learning_rate": 0.0010925201380897584, "loss": 0.5653, "step": 157720 }, { "epoch": 45.37686996547756, "grad_norm": 1.7272306680679321, "learning_rate": 0.0010924626006904488, "loss": 0.5308, "step": 157730 }, { "epoch": 45.379746835443036, "grad_norm": 0.8736345767974854, "learning_rate": 0.0010924050632911393, "loss": 0.567, "step": 157740 }, { "epoch": 45.38262370540851, "grad_norm": 1.2082405090332031, "learning_rate": 0.0010923475258918297, "loss": 0.4822, "step": 157750 }, { "epoch": 45.385500575373996, "grad_norm": 1.2483359575271606, "learning_rate": 0.0010922899884925202, "loss": 0.6304, "step": 157760 }, { "epoch": 45.38837744533947, "grad_norm": 1.6653015613555908, "learning_rate": 0.0010922324510932106, "loss": 0.6747, "step": 157770 }, { "epoch": 45.39125431530495, "grad_norm": 2.0661635398864746, "learning_rate": 0.001092174913693901, "loss": 0.5047, "step": 157780 }, { "epoch": 45.394131185270425, "grad_norm": 0.7365753650665283, "learning_rate": 0.0010921173762945915, "loss": 0.5623, "step": 157790 }, { "epoch": 45.3970080552359, "grad_norm": 1.508962869644165, "learning_rate": 0.001092059838895282, "loss": 0.479, "step": 157800 }, { "epoch": 45.399884925201384, "grad_norm": 1.0541247129440308, "learning_rate": 0.0010920023014959724, "loss": 0.6125, "step": 157810 }, { "epoch": 45.40276179516686, "grad_norm": 1.4485254287719727, "learning_rate": 0.001091944764096663, "loss": 0.6741, "step": 157820 }, { "epoch": 45.40563866513234, "grad_norm": 1.9278669357299805, "learning_rate": 0.0010918872266973533, "loss": 0.5869, "step": 157830 }, { "epoch": 45.40851553509781, "grad_norm": 1.4240772724151611, "learning_rate": 0.0010918296892980437, "loss": 0.5191, "step": 157840 }, { "epoch": 45.41139240506329, "grad_norm": 1.6921091079711914, "learning_rate": 0.0010917721518987342, "loss": 0.5632, "step": 157850 }, { "epoch": 45.41426927502877, "grad_norm": 1.1739475727081299, "learning_rate": 0.0010917146144994246, "loss": 0.5295, "step": 157860 }, { "epoch": 45.41714614499425, "grad_norm": 1.468778133392334, "learning_rate": 0.0010916570771001151, "loss": 0.6566, "step": 157870 }, { "epoch": 45.420023014959725, "grad_norm": 0.8148202896118164, "learning_rate": 0.0010915995397008057, "loss": 0.5948, "step": 157880 }, { "epoch": 45.4228998849252, "grad_norm": 3.1187779903411865, "learning_rate": 0.0010915420023014958, "loss": 0.7141, "step": 157890 }, { "epoch": 45.42577675489068, "grad_norm": 1.024803876876831, "learning_rate": 0.0010914844649021864, "loss": 0.4918, "step": 157900 }, { "epoch": 45.42865362485615, "grad_norm": 1.2975127696990967, "learning_rate": 0.001091426927502877, "loss": 0.4754, "step": 157910 }, { "epoch": 45.43153049482164, "grad_norm": 1.3092272281646729, "learning_rate": 0.0010913693901035673, "loss": 0.4845, "step": 157920 }, { "epoch": 45.43440736478711, "grad_norm": 1.5052974224090576, "learning_rate": 0.0010913118527042579, "loss": 0.5393, "step": 157930 }, { "epoch": 45.43728423475259, "grad_norm": 1.8081929683685303, "learning_rate": 0.0010912543153049484, "loss": 0.5807, "step": 157940 }, { "epoch": 45.440161104718065, "grad_norm": 1.4538630247116089, "learning_rate": 0.0010911967779056386, "loss": 0.5795, "step": 157950 }, { "epoch": 45.44303797468354, "grad_norm": 1.2150551080703735, "learning_rate": 0.0010911392405063291, "loss": 0.5948, "step": 157960 }, { "epoch": 45.445914844649025, "grad_norm": 1.6802148818969727, "learning_rate": 0.0010910817031070195, "loss": 0.6164, "step": 157970 }, { "epoch": 45.4487917146145, "grad_norm": 2.534372568130493, "learning_rate": 0.00109102416570771, "loss": 0.5892, "step": 157980 }, { "epoch": 45.45166858457998, "grad_norm": 1.1861943006515503, "learning_rate": 0.0010909666283084006, "loss": 0.6185, "step": 157990 }, { "epoch": 45.45454545454545, "grad_norm": 0.9933858513832092, "learning_rate": 0.001090909090909091, "loss": 0.6531, "step": 158000 }, { "epoch": 45.45742232451093, "grad_norm": 1.482967734336853, "learning_rate": 0.0010908515535097813, "loss": 0.5345, "step": 158010 }, { "epoch": 45.46029919447641, "grad_norm": 0.9169593453407288, "learning_rate": 0.0010907940161104719, "loss": 0.6286, "step": 158020 }, { "epoch": 45.46317606444189, "grad_norm": 1.1617659330368042, "learning_rate": 0.0010907364787111622, "loss": 0.6464, "step": 158030 }, { "epoch": 45.466052934407365, "grad_norm": 1.7771437168121338, "learning_rate": 0.0010906789413118528, "loss": 0.5535, "step": 158040 }, { "epoch": 45.46892980437284, "grad_norm": 0.8526556491851807, "learning_rate": 0.0010906214039125433, "loss": 0.4614, "step": 158050 }, { "epoch": 45.47180667433832, "grad_norm": 1.9023611545562744, "learning_rate": 0.0010905638665132337, "loss": 0.4901, "step": 158060 }, { "epoch": 45.4746835443038, "grad_norm": 0.6698625087738037, "learning_rate": 0.001090506329113924, "loss": 0.4679, "step": 158070 }, { "epoch": 45.47756041426928, "grad_norm": 0.5699282288551331, "learning_rate": 0.0010904487917146144, "loss": 0.4558, "step": 158080 }, { "epoch": 45.48043728423475, "grad_norm": 1.468485713005066, "learning_rate": 0.001090391254315305, "loss": 0.6676, "step": 158090 }, { "epoch": 45.48331415420023, "grad_norm": 1.63393235206604, "learning_rate": 0.0010903337169159955, "loss": 0.5839, "step": 158100 }, { "epoch": 45.486191024165706, "grad_norm": 1.054572582244873, "learning_rate": 0.0010902761795166859, "loss": 0.5381, "step": 158110 }, { "epoch": 45.48906789413118, "grad_norm": 1.0675190687179565, "learning_rate": 0.0010902186421173764, "loss": 0.5207, "step": 158120 }, { "epoch": 45.491944764096665, "grad_norm": 0.614805281162262, "learning_rate": 0.0010901611047180668, "loss": 0.4495, "step": 158130 }, { "epoch": 45.49482163406214, "grad_norm": 1.703296184539795, "learning_rate": 0.0010901035673187571, "loss": 0.5304, "step": 158140 }, { "epoch": 45.49769850402762, "grad_norm": 1.2470413446426392, "learning_rate": 0.0010900460299194477, "loss": 0.4858, "step": 158150 }, { "epoch": 45.500575373993094, "grad_norm": 1.4965680837631226, "learning_rate": 0.0010899884925201382, "loss": 0.6533, "step": 158160 }, { "epoch": 45.50345224395857, "grad_norm": 1.400347352027893, "learning_rate": 0.0010899309551208286, "loss": 0.5943, "step": 158170 }, { "epoch": 45.50632911392405, "grad_norm": 1.2319103479385376, "learning_rate": 0.0010898734177215192, "loss": 0.4671, "step": 158180 }, { "epoch": 45.50920598388953, "grad_norm": 1.1612663269042969, "learning_rate": 0.0010898158803222093, "loss": 0.6985, "step": 158190 }, { "epoch": 45.512082853855006, "grad_norm": 1.341591238975525, "learning_rate": 0.0010897583429228998, "loss": 0.6395, "step": 158200 }, { "epoch": 45.51495972382048, "grad_norm": 1.4044309854507446, "learning_rate": 0.0010897008055235904, "loss": 0.47, "step": 158210 }, { "epoch": 45.51783659378596, "grad_norm": 1.3595837354660034, "learning_rate": 0.0010896432681242808, "loss": 0.4932, "step": 158220 }, { "epoch": 45.52071346375144, "grad_norm": 1.3194726705551147, "learning_rate": 0.0010895857307249713, "loss": 0.5457, "step": 158230 }, { "epoch": 45.52359033371692, "grad_norm": 1.0912121534347534, "learning_rate": 0.0010895281933256619, "loss": 0.5763, "step": 158240 }, { "epoch": 45.526467203682394, "grad_norm": 1.3998651504516602, "learning_rate": 0.001089470655926352, "loss": 0.6616, "step": 158250 }, { "epoch": 45.52934407364787, "grad_norm": 1.3439812660217285, "learning_rate": 0.0010894131185270426, "loss": 0.586, "step": 158260 }, { "epoch": 45.532220943613346, "grad_norm": 1.0343022346496582, "learning_rate": 0.0010893555811277331, "loss": 0.6252, "step": 158270 }, { "epoch": 45.53509781357883, "grad_norm": 1.0815632343292236, "learning_rate": 0.0010892980437284235, "loss": 0.6794, "step": 158280 }, { "epoch": 45.537974683544306, "grad_norm": 1.2854644060134888, "learning_rate": 0.001089240506329114, "loss": 0.6108, "step": 158290 }, { "epoch": 45.54085155350978, "grad_norm": 1.467008113861084, "learning_rate": 0.0010891829689298042, "loss": 0.5001, "step": 158300 }, { "epoch": 45.54372842347526, "grad_norm": 0.9481443166732788, "learning_rate": 0.0010891254315304947, "loss": 0.6021, "step": 158310 }, { "epoch": 45.546605293440734, "grad_norm": 1.653988242149353, "learning_rate": 0.0010890678941311853, "loss": 0.7584, "step": 158320 }, { "epoch": 45.54948216340621, "grad_norm": 1.1785808801651, "learning_rate": 0.0010890103567318757, "loss": 0.4733, "step": 158330 }, { "epoch": 45.552359033371694, "grad_norm": 1.3856922388076782, "learning_rate": 0.0010889528193325662, "loss": 0.6804, "step": 158340 }, { "epoch": 45.55523590333717, "grad_norm": 0.7099706530570984, "learning_rate": 0.0010888952819332568, "loss": 0.6043, "step": 158350 }, { "epoch": 45.558112773302646, "grad_norm": 1.5216885805130005, "learning_rate": 0.001088837744533947, "loss": 0.5283, "step": 158360 }, { "epoch": 45.56098964326812, "grad_norm": 1.6291500329971313, "learning_rate": 0.0010887802071346375, "loss": 0.6493, "step": 158370 }, { "epoch": 45.5638665132336, "grad_norm": 2.137838363647461, "learning_rate": 0.001088722669735328, "loss": 0.5918, "step": 158380 }, { "epoch": 45.56674338319908, "grad_norm": 1.6491894721984863, "learning_rate": 0.0010886651323360184, "loss": 0.588, "step": 158390 }, { "epoch": 45.56962025316456, "grad_norm": 1.0006029605865479, "learning_rate": 0.001088607594936709, "loss": 0.6336, "step": 158400 }, { "epoch": 45.572497123130034, "grad_norm": 3.0985541343688965, "learning_rate": 0.0010885500575373995, "loss": 0.6849, "step": 158410 }, { "epoch": 45.57537399309551, "grad_norm": 1.247040033340454, "learning_rate": 0.0010884925201380896, "loss": 0.5138, "step": 158420 }, { "epoch": 45.57825086306099, "grad_norm": 2.177504777908325, "learning_rate": 0.0010884349827387802, "loss": 0.5532, "step": 158430 }, { "epoch": 45.58112773302647, "grad_norm": 0.6531399488449097, "learning_rate": 0.0010883774453394706, "loss": 0.4996, "step": 158440 }, { "epoch": 45.584004602991946, "grad_norm": 1.4041601419448853, "learning_rate": 0.0010883199079401611, "loss": 0.4926, "step": 158450 }, { "epoch": 45.58688147295742, "grad_norm": 2.1136245727539062, "learning_rate": 0.0010882623705408517, "loss": 0.4952, "step": 158460 }, { "epoch": 45.5897583429229, "grad_norm": 1.2838715314865112, "learning_rate": 0.001088204833141542, "loss": 0.6174, "step": 158470 }, { "epoch": 45.592635212888375, "grad_norm": 0.7609138488769531, "learning_rate": 0.0010881472957422324, "loss": 0.6321, "step": 158480 }, { "epoch": 45.59551208285386, "grad_norm": 1.266786813735962, "learning_rate": 0.001088089758342923, "loss": 0.4496, "step": 158490 }, { "epoch": 45.598388952819334, "grad_norm": 0.6382750868797302, "learning_rate": 0.0010880322209436133, "loss": 0.6041, "step": 158500 }, { "epoch": 45.60126582278481, "grad_norm": 1.6596277952194214, "learning_rate": 0.0010879746835443039, "loss": 0.8565, "step": 158510 }, { "epoch": 45.60414269275029, "grad_norm": 0.7203642129898071, "learning_rate": 0.0010879171461449944, "loss": 0.5612, "step": 158520 }, { "epoch": 45.60701956271576, "grad_norm": 0.8655259013175964, "learning_rate": 0.0010878596087456848, "loss": 0.566, "step": 158530 }, { "epoch": 45.60989643268124, "grad_norm": 1.7176587581634521, "learning_rate": 0.0010878020713463751, "loss": 0.5147, "step": 158540 }, { "epoch": 45.61277330264672, "grad_norm": 1.6613034009933472, "learning_rate": 0.0010877445339470655, "loss": 0.5289, "step": 158550 }, { "epoch": 45.6156501726122, "grad_norm": 2.2319023609161377, "learning_rate": 0.001087686996547756, "loss": 0.6088, "step": 158560 }, { "epoch": 45.618527042577675, "grad_norm": 0.7286262512207031, "learning_rate": 0.0010876294591484466, "loss": 0.6067, "step": 158570 }, { "epoch": 45.62140391254315, "grad_norm": 1.482527256011963, "learning_rate": 0.001087571921749137, "loss": 0.6141, "step": 158580 }, { "epoch": 45.62428078250863, "grad_norm": 0.7673502564430237, "learning_rate": 0.0010875143843498275, "loss": 0.5817, "step": 158590 }, { "epoch": 45.62715765247411, "grad_norm": 1.445295810699463, "learning_rate": 0.0010874568469505178, "loss": 0.5656, "step": 158600 }, { "epoch": 45.63003452243959, "grad_norm": 1.114974021911621, "learning_rate": 0.0010873993095512082, "loss": 0.7046, "step": 158610 }, { "epoch": 45.63291139240506, "grad_norm": 1.0349643230438232, "learning_rate": 0.0010873417721518988, "loss": 0.6087, "step": 158620 }, { "epoch": 45.63578826237054, "grad_norm": 1.1277692317962646, "learning_rate": 0.0010872842347525893, "loss": 0.5307, "step": 158630 }, { "epoch": 45.638665132336016, "grad_norm": 1.1014115810394287, "learning_rate": 0.0010872266973532797, "loss": 0.7364, "step": 158640 }, { "epoch": 45.6415420023015, "grad_norm": 1.5572375059127808, "learning_rate": 0.0010871691599539702, "loss": 0.607, "step": 158650 }, { "epoch": 45.644418872266975, "grad_norm": 1.2388434410095215, "learning_rate": 0.0010871116225546604, "loss": 0.5404, "step": 158660 }, { "epoch": 45.64729574223245, "grad_norm": 1.002493143081665, "learning_rate": 0.001087054085155351, "loss": 0.6528, "step": 158670 }, { "epoch": 45.65017261219793, "grad_norm": 1.0562891960144043, "learning_rate": 0.0010869965477560415, "loss": 0.5044, "step": 158680 }, { "epoch": 45.653049482163404, "grad_norm": 1.0086673498153687, "learning_rate": 0.0010869390103567318, "loss": 0.4259, "step": 158690 }, { "epoch": 45.65592635212889, "grad_norm": 1.271813154220581, "learning_rate": 0.0010868814729574224, "loss": 0.7436, "step": 158700 }, { "epoch": 45.65880322209436, "grad_norm": 0.6397767663002014, "learning_rate": 0.001086823935558113, "loss": 0.6585, "step": 158710 }, { "epoch": 45.66168009205984, "grad_norm": 0.8764796257019043, "learning_rate": 0.001086766398158803, "loss": 0.6119, "step": 158720 }, { "epoch": 45.664556962025316, "grad_norm": 1.3861690759658813, "learning_rate": 0.0010867088607594937, "loss": 0.5066, "step": 158730 }, { "epoch": 45.66743383199079, "grad_norm": 2.4524126052856445, "learning_rate": 0.0010866513233601842, "loss": 0.6551, "step": 158740 }, { "epoch": 45.670310701956275, "grad_norm": 0.8656563758850098, "learning_rate": 0.0010865937859608746, "loss": 0.4824, "step": 158750 }, { "epoch": 45.67318757192175, "grad_norm": 1.2795265913009644, "learning_rate": 0.0010865362485615651, "loss": 0.529, "step": 158760 }, { "epoch": 45.67606444188723, "grad_norm": 1.6406875848770142, "learning_rate": 0.0010864787111622555, "loss": 0.5776, "step": 158770 }, { "epoch": 45.678941311852704, "grad_norm": 0.7911466360092163, "learning_rate": 0.0010864211737629458, "loss": 0.5907, "step": 158780 }, { "epoch": 45.68181818181818, "grad_norm": 0.7737602591514587, "learning_rate": 0.0010863636363636364, "loss": 0.4708, "step": 158790 }, { "epoch": 45.684695051783656, "grad_norm": 2.4795243740081787, "learning_rate": 0.0010863060989643267, "loss": 0.7232, "step": 158800 }, { "epoch": 45.68757192174914, "grad_norm": 2.8634324073791504, "learning_rate": 0.0010862485615650173, "loss": 0.7141, "step": 158810 }, { "epoch": 45.690448791714616, "grad_norm": 1.6715881824493408, "learning_rate": 0.0010861910241657079, "loss": 0.6298, "step": 158820 }, { "epoch": 45.69332566168009, "grad_norm": 1.5376083850860596, "learning_rate": 0.0010861334867663982, "loss": 0.6834, "step": 158830 }, { "epoch": 45.69620253164557, "grad_norm": 1.3928172588348389, "learning_rate": 0.0010860759493670886, "loss": 0.6042, "step": 158840 }, { "epoch": 45.699079401611044, "grad_norm": 1.4576095342636108, "learning_rate": 0.0010860184119677791, "loss": 0.6903, "step": 158850 }, { "epoch": 45.70195627157653, "grad_norm": 0.928460419178009, "learning_rate": 0.0010859608745684695, "loss": 0.5384, "step": 158860 }, { "epoch": 45.704833141542004, "grad_norm": 0.7469093799591064, "learning_rate": 0.00108590333716916, "loss": 0.5758, "step": 158870 }, { "epoch": 45.70771001150748, "grad_norm": 3.114151954650879, "learning_rate": 0.0010858457997698504, "loss": 0.5677, "step": 158880 }, { "epoch": 45.710586881472956, "grad_norm": 1.4162447452545166, "learning_rate": 0.001085788262370541, "loss": 0.5779, "step": 158890 }, { "epoch": 45.71346375143843, "grad_norm": 0.8620601892471313, "learning_rate": 0.0010857307249712313, "loss": 0.494, "step": 158900 }, { "epoch": 45.716340621403916, "grad_norm": 0.7167282700538635, "learning_rate": 0.0010856731875719216, "loss": 0.4465, "step": 158910 }, { "epoch": 45.71921749136939, "grad_norm": 1.6548420190811157, "learning_rate": 0.0010856156501726122, "loss": 0.5224, "step": 158920 }, { "epoch": 45.72209436133487, "grad_norm": 1.6740044355392456, "learning_rate": 0.0010855581127733028, "loss": 0.5693, "step": 158930 }, { "epoch": 45.724971231300344, "grad_norm": 2.094320058822632, "learning_rate": 0.0010855005753739931, "loss": 0.6283, "step": 158940 }, { "epoch": 45.72784810126582, "grad_norm": 2.2550041675567627, "learning_rate": 0.0010854430379746837, "loss": 0.6259, "step": 158950 }, { "epoch": 45.730724971231304, "grad_norm": 0.941828191280365, "learning_rate": 0.001085385500575374, "loss": 0.6149, "step": 158960 }, { "epoch": 45.73360184119678, "grad_norm": 1.1133619546890259, "learning_rate": 0.0010853279631760644, "loss": 0.5893, "step": 158970 }, { "epoch": 45.736478711162256, "grad_norm": 0.9120194315910339, "learning_rate": 0.001085270425776755, "loss": 0.4537, "step": 158980 }, { "epoch": 45.73935558112773, "grad_norm": 1.6605414152145386, "learning_rate": 0.0010852128883774455, "loss": 0.508, "step": 158990 }, { "epoch": 45.74223245109321, "grad_norm": 3.6918039321899414, "learning_rate": 0.0010851553509781359, "loss": 0.563, "step": 159000 }, { "epoch": 45.745109321058685, "grad_norm": 1.1619981527328491, "learning_rate": 0.0010850978135788264, "loss": 0.5952, "step": 159010 }, { "epoch": 45.74798619102417, "grad_norm": 0.8595175743103027, "learning_rate": 0.0010850402761795165, "loss": 0.667, "step": 159020 }, { "epoch": 45.750863060989644, "grad_norm": 1.0469622611999512, "learning_rate": 0.0010849827387802071, "loss": 0.6137, "step": 159030 }, { "epoch": 45.75373993095512, "grad_norm": 0.7989106178283691, "learning_rate": 0.0010849252013808977, "loss": 0.4682, "step": 159040 }, { "epoch": 45.7566168009206, "grad_norm": 1.5526615381240845, "learning_rate": 0.001084867663981588, "loss": 0.6104, "step": 159050 }, { "epoch": 45.75949367088607, "grad_norm": 0.8912620544433594, "learning_rate": 0.0010848101265822786, "loss": 0.5759, "step": 159060 }, { "epoch": 45.762370540851556, "grad_norm": 1.4496755599975586, "learning_rate": 0.0010847525891829691, "loss": 0.5371, "step": 159070 }, { "epoch": 45.76524741081703, "grad_norm": 0.6622799634933472, "learning_rate": 0.0010846950517836593, "loss": 0.5856, "step": 159080 }, { "epoch": 45.76812428078251, "grad_norm": 0.9037414193153381, "learning_rate": 0.0010846375143843498, "loss": 0.595, "step": 159090 }, { "epoch": 45.771001150747985, "grad_norm": 1.1574312448501587, "learning_rate": 0.0010845799769850404, "loss": 0.596, "step": 159100 }, { "epoch": 45.77387802071346, "grad_norm": 1.7402194738388062, "learning_rate": 0.0010845224395857308, "loss": 0.7948, "step": 159110 }, { "epoch": 45.776754890678944, "grad_norm": 0.9791214466094971, "learning_rate": 0.0010844649021864213, "loss": 0.5273, "step": 159120 }, { "epoch": 45.77963176064442, "grad_norm": 1.4130624532699585, "learning_rate": 0.0010844073647871114, "loss": 0.6177, "step": 159130 }, { "epoch": 45.7825086306099, "grad_norm": 1.6594188213348389, "learning_rate": 0.001084349827387802, "loss": 0.5933, "step": 159140 }, { "epoch": 45.78538550057537, "grad_norm": 1.4932159185409546, "learning_rate": 0.0010842922899884926, "loss": 0.6068, "step": 159150 }, { "epoch": 45.78826237054085, "grad_norm": 1.0911664962768555, "learning_rate": 0.001084234752589183, "loss": 0.4902, "step": 159160 }, { "epoch": 45.79113924050633, "grad_norm": 1.4005985260009766, "learning_rate": 0.0010841772151898735, "loss": 0.5781, "step": 159170 }, { "epoch": 45.79401611047181, "grad_norm": 2.00546932220459, "learning_rate": 0.001084119677790564, "loss": 0.5502, "step": 159180 }, { "epoch": 45.796892980437285, "grad_norm": 2.0817360877990723, "learning_rate": 0.0010840621403912542, "loss": 0.6343, "step": 159190 }, { "epoch": 45.79976985040276, "grad_norm": 0.7959161996841431, "learning_rate": 0.0010840046029919447, "loss": 0.4823, "step": 159200 }, { "epoch": 45.80264672036824, "grad_norm": 1.2657735347747803, "learning_rate": 0.0010839470655926353, "loss": 0.7111, "step": 159210 }, { "epoch": 45.80552359033371, "grad_norm": 3.290874719619751, "learning_rate": 0.0010838895281933257, "loss": 0.6049, "step": 159220 }, { "epoch": 45.8084004602992, "grad_norm": 0.7774231433868408, "learning_rate": 0.0010838319907940162, "loss": 0.4489, "step": 159230 }, { "epoch": 45.81127733026467, "grad_norm": 0.8326508402824402, "learning_rate": 0.0010837744533947066, "loss": 0.5817, "step": 159240 }, { "epoch": 45.81415420023015, "grad_norm": 1.7680598497390747, "learning_rate": 0.001083716915995397, "loss": 0.6847, "step": 159250 }, { "epoch": 45.817031070195625, "grad_norm": 0.9553441405296326, "learning_rate": 0.0010836593785960875, "loss": 0.5491, "step": 159260 }, { "epoch": 45.8199079401611, "grad_norm": 1.3426454067230225, "learning_rate": 0.0010836018411967778, "loss": 0.547, "step": 159270 }, { "epoch": 45.822784810126585, "grad_norm": 1.6651358604431152, "learning_rate": 0.0010835443037974684, "loss": 0.6563, "step": 159280 }, { "epoch": 45.82566168009206, "grad_norm": 1.063948154449463, "learning_rate": 0.001083486766398159, "loss": 0.6275, "step": 159290 }, { "epoch": 45.82853855005754, "grad_norm": 1.0064324140548706, "learning_rate": 0.0010834292289988493, "loss": 0.4332, "step": 159300 }, { "epoch": 45.83141542002301, "grad_norm": 1.012642741203308, "learning_rate": 0.0010833716915995396, "loss": 0.5101, "step": 159310 }, { "epoch": 45.83429228998849, "grad_norm": 1.3921077251434326, "learning_rate": 0.0010833141542002302, "loss": 0.6933, "step": 159320 }, { "epoch": 45.83716915995397, "grad_norm": 1.5429431200027466, "learning_rate": 0.0010832566168009206, "loss": 0.6007, "step": 159330 }, { "epoch": 45.84004602991945, "grad_norm": 0.809343159198761, "learning_rate": 0.0010831990794016111, "loss": 0.5741, "step": 159340 }, { "epoch": 45.842922899884925, "grad_norm": 1.3279474973678589, "learning_rate": 0.0010831415420023015, "loss": 0.6064, "step": 159350 }, { "epoch": 45.8457997698504, "grad_norm": 1.349725365638733, "learning_rate": 0.001083084004602992, "loss": 0.5775, "step": 159360 }, { "epoch": 45.84867663981588, "grad_norm": 1.7047910690307617, "learning_rate": 0.0010830264672036824, "loss": 0.539, "step": 159370 }, { "epoch": 45.85155350978136, "grad_norm": 1.8885585069656372, "learning_rate": 0.0010829689298043727, "loss": 0.6559, "step": 159380 }, { "epoch": 45.85443037974684, "grad_norm": 0.7708277702331543, "learning_rate": 0.0010829113924050633, "loss": 0.6394, "step": 159390 }, { "epoch": 45.85730724971231, "grad_norm": 1.3368958234786987, "learning_rate": 0.0010828538550057539, "loss": 0.592, "step": 159400 }, { "epoch": 45.86018411967779, "grad_norm": 1.7908706665039062, "learning_rate": 0.0010827963176064442, "loss": 0.5012, "step": 159410 }, { "epoch": 45.863060989643266, "grad_norm": 0.816711962223053, "learning_rate": 0.0010827387802071348, "loss": 0.4658, "step": 159420 }, { "epoch": 45.86593785960875, "grad_norm": 1.2779853343963623, "learning_rate": 0.0010826812428078251, "loss": 0.5459, "step": 159430 }, { "epoch": 45.868814729574225, "grad_norm": 0.9528493881225586, "learning_rate": 0.0010826237054085155, "loss": 0.4722, "step": 159440 }, { "epoch": 45.8716915995397, "grad_norm": 0.9584585428237915, "learning_rate": 0.001082566168009206, "loss": 0.5435, "step": 159450 }, { "epoch": 45.87456846950518, "grad_norm": 1.579312801361084, "learning_rate": 0.0010825086306098964, "loss": 0.4414, "step": 159460 }, { "epoch": 45.877445339470654, "grad_norm": 1.1618523597717285, "learning_rate": 0.001082451093210587, "loss": 0.602, "step": 159470 }, { "epoch": 45.88032220943613, "grad_norm": 1.0119742155075073, "learning_rate": 0.0010823935558112775, "loss": 0.5231, "step": 159480 }, { "epoch": 45.883199079401614, "grad_norm": 0.8890780806541443, "learning_rate": 0.0010823360184119676, "loss": 0.4168, "step": 159490 }, { "epoch": 45.88607594936709, "grad_norm": 0.73658287525177, "learning_rate": 0.0010822784810126582, "loss": 0.7039, "step": 159500 }, { "epoch": 45.888952819332566, "grad_norm": 1.3172399997711182, "learning_rate": 0.0010822209436133488, "loss": 0.7084, "step": 159510 }, { "epoch": 45.89182968929804, "grad_norm": 1.0670356750488281, "learning_rate": 0.001082163406214039, "loss": 0.5585, "step": 159520 }, { "epoch": 45.89470655926352, "grad_norm": 1.3071011304855347, "learning_rate": 0.0010821058688147297, "loss": 0.5632, "step": 159530 }, { "epoch": 45.897583429229, "grad_norm": 0.9893922209739685, "learning_rate": 0.0010820483314154202, "loss": 0.5856, "step": 159540 }, { "epoch": 45.90046029919448, "grad_norm": 2.319653272628784, "learning_rate": 0.0010819907940161104, "loss": 0.7525, "step": 159550 }, { "epoch": 45.903337169159954, "grad_norm": 2.020402669906616, "learning_rate": 0.001081933256616801, "loss": 0.6285, "step": 159560 }, { "epoch": 45.90621403912543, "grad_norm": 1.0575107336044312, "learning_rate": 0.0010818757192174913, "loss": 0.739, "step": 159570 }, { "epoch": 45.90909090909091, "grad_norm": 1.750935673713684, "learning_rate": 0.0010818181818181818, "loss": 0.5974, "step": 159580 }, { "epoch": 45.91196777905639, "grad_norm": 1.3125423192977905, "learning_rate": 0.0010817606444188724, "loss": 0.5713, "step": 159590 }, { "epoch": 45.914844649021866, "grad_norm": 0.9978044629096985, "learning_rate": 0.0010817031070195627, "loss": 0.5523, "step": 159600 }, { "epoch": 45.91772151898734, "grad_norm": 1.412602186203003, "learning_rate": 0.001081645569620253, "loss": 0.5113, "step": 159610 }, { "epoch": 45.92059838895282, "grad_norm": 1.756112813949585, "learning_rate": 0.0010815880322209437, "loss": 0.6423, "step": 159620 }, { "epoch": 45.923475258918295, "grad_norm": 1.740883231163025, "learning_rate": 0.001081530494821634, "loss": 0.5317, "step": 159630 }, { "epoch": 45.92635212888378, "grad_norm": 1.0815085172653198, "learning_rate": 0.0010814729574223246, "loss": 0.636, "step": 159640 }, { "epoch": 45.929228998849254, "grad_norm": 0.919498860836029, "learning_rate": 0.0010814154200230151, "loss": 0.5119, "step": 159650 }, { "epoch": 45.93210586881473, "grad_norm": 1.518491268157959, "learning_rate": 0.0010813578826237055, "loss": 0.4849, "step": 159660 }, { "epoch": 45.93498273878021, "grad_norm": 1.4792780876159668, "learning_rate": 0.0010813003452243958, "loss": 0.5862, "step": 159670 }, { "epoch": 45.93785960874568, "grad_norm": 1.3641971349716187, "learning_rate": 0.0010812428078250864, "loss": 0.8483, "step": 159680 }, { "epoch": 45.94073647871116, "grad_norm": 1.6103267669677734, "learning_rate": 0.0010811852704257767, "loss": 0.6126, "step": 159690 }, { "epoch": 45.94361334867664, "grad_norm": 1.1469330787658691, "learning_rate": 0.0010811277330264673, "loss": 0.5712, "step": 159700 }, { "epoch": 45.94649021864212, "grad_norm": 1.5391660928726196, "learning_rate": 0.0010810701956271576, "loss": 0.6753, "step": 159710 }, { "epoch": 45.949367088607595, "grad_norm": 1.1500803232192993, "learning_rate": 0.0010810126582278482, "loss": 0.5471, "step": 159720 }, { "epoch": 45.95224395857307, "grad_norm": 1.1057316064834595, "learning_rate": 0.0010809551208285386, "loss": 0.5995, "step": 159730 }, { "epoch": 45.95512082853855, "grad_norm": 0.8675986528396606, "learning_rate": 0.001080897583429229, "loss": 0.5085, "step": 159740 }, { "epoch": 45.95799769850403, "grad_norm": 2.091330051422119, "learning_rate": 0.0010808400460299195, "loss": 0.573, "step": 159750 }, { "epoch": 45.96087456846951, "grad_norm": 1.085584044456482, "learning_rate": 0.00108078250863061, "loss": 0.4101, "step": 159760 }, { "epoch": 45.96375143843498, "grad_norm": 0.9793149828910828, "learning_rate": 0.0010807249712313004, "loss": 0.4961, "step": 159770 }, { "epoch": 45.96662830840046, "grad_norm": 1.2077652215957642, "learning_rate": 0.001080667433831991, "loss": 0.6158, "step": 159780 }, { "epoch": 45.969505178365935, "grad_norm": 1.3551173210144043, "learning_rate": 0.0010806098964326813, "loss": 0.5651, "step": 159790 }, { "epoch": 45.97238204833142, "grad_norm": 0.9639198184013367, "learning_rate": 0.0010805523590333716, "loss": 0.4481, "step": 159800 }, { "epoch": 45.975258918296895, "grad_norm": 2.2182769775390625, "learning_rate": 0.0010804948216340622, "loss": 0.6335, "step": 159810 }, { "epoch": 45.97813578826237, "grad_norm": 1.3039460182189941, "learning_rate": 0.0010804372842347526, "loss": 0.5804, "step": 159820 }, { "epoch": 45.98101265822785, "grad_norm": 0.977796733379364, "learning_rate": 0.0010803797468354431, "loss": 0.5525, "step": 159830 }, { "epoch": 45.98388952819332, "grad_norm": 2.5391759872436523, "learning_rate": 0.0010803222094361337, "loss": 0.568, "step": 159840 }, { "epoch": 45.98676639815881, "grad_norm": 1.7252198457717896, "learning_rate": 0.0010802646720368238, "loss": 0.5595, "step": 159850 }, { "epoch": 45.98964326812428, "grad_norm": 1.2763910293579102, "learning_rate": 0.0010802071346375144, "loss": 0.5057, "step": 159860 }, { "epoch": 45.99252013808976, "grad_norm": 2.722154378890991, "learning_rate": 0.001080149597238205, "loss": 0.8026, "step": 159870 }, { "epoch": 45.995397008055235, "grad_norm": 1.2829285860061646, "learning_rate": 0.0010800920598388953, "loss": 0.6146, "step": 159880 }, { "epoch": 45.99827387802071, "grad_norm": 0.7403559684753418, "learning_rate": 0.0010800345224395858, "loss": 0.5477, "step": 159890 }, { "epoch": 46.00115074798619, "grad_norm": 1.485356092453003, "learning_rate": 0.0010799769850402764, "loss": 0.5031, "step": 159900 }, { "epoch": 46.00402761795167, "grad_norm": 1.2985763549804688, "learning_rate": 0.0010799194476409665, "loss": 0.5023, "step": 159910 }, { "epoch": 46.00690448791715, "grad_norm": 1.2653818130493164, "learning_rate": 0.001079861910241657, "loss": 0.5946, "step": 159920 }, { "epoch": 46.00978135788262, "grad_norm": 0.9345512986183167, "learning_rate": 0.0010798043728423475, "loss": 0.5138, "step": 159930 }, { "epoch": 46.0126582278481, "grad_norm": 1.1161009073257446, "learning_rate": 0.001079746835443038, "loss": 0.4683, "step": 159940 }, { "epoch": 46.015535097813576, "grad_norm": 0.8999732136726379, "learning_rate": 0.0010796892980437286, "loss": 0.6366, "step": 159950 }, { "epoch": 46.01841196777906, "grad_norm": 1.8787665367126465, "learning_rate": 0.0010796317606444187, "loss": 0.5029, "step": 159960 }, { "epoch": 46.021288837744535, "grad_norm": 1.447504997253418, "learning_rate": 0.0010795742232451093, "loss": 0.487, "step": 159970 }, { "epoch": 46.02416570771001, "grad_norm": 1.321048378944397, "learning_rate": 0.0010795166858457998, "loss": 0.5467, "step": 159980 }, { "epoch": 46.02704257767549, "grad_norm": 2.464169979095459, "learning_rate": 0.0010794591484464902, "loss": 0.6088, "step": 159990 }, { "epoch": 46.029919447640964, "grad_norm": 0.9400912523269653, "learning_rate": 0.0010794016110471808, "loss": 0.4809, "step": 160000 }, { "epoch": 46.03279631760645, "grad_norm": 0.9584947824478149, "learning_rate": 0.0010793440736478713, "loss": 0.5106, "step": 160010 }, { "epoch": 46.03567318757192, "grad_norm": 0.9290129542350769, "learning_rate": 0.0010792865362485614, "loss": 0.4316, "step": 160020 }, { "epoch": 46.0385500575374, "grad_norm": 0.8014642000198364, "learning_rate": 0.001079228998849252, "loss": 0.4578, "step": 160030 }, { "epoch": 46.041426927502876, "grad_norm": 1.419593334197998, "learning_rate": 0.0010791714614499424, "loss": 0.5616, "step": 160040 }, { "epoch": 46.04430379746835, "grad_norm": 0.6752297878265381, "learning_rate": 0.001079113924050633, "loss": 0.4646, "step": 160050 }, { "epoch": 46.047180667433835, "grad_norm": 0.7638952732086182, "learning_rate": 0.0010790563866513235, "loss": 0.5278, "step": 160060 }, { "epoch": 46.05005753739931, "grad_norm": 1.104178786277771, "learning_rate": 0.0010789988492520138, "loss": 0.5513, "step": 160070 }, { "epoch": 46.05293440736479, "grad_norm": 1.1635726690292358, "learning_rate": 0.0010789413118527042, "loss": 0.5106, "step": 160080 }, { "epoch": 46.055811277330264, "grad_norm": 0.9450583457946777, "learning_rate": 0.0010788837744533947, "loss": 0.552, "step": 160090 }, { "epoch": 46.05868814729574, "grad_norm": 1.3309530019760132, "learning_rate": 0.001078826237054085, "loss": 0.4911, "step": 160100 }, { "epoch": 46.061565017261216, "grad_norm": 1.9600194692611694, "learning_rate": 0.0010787686996547757, "loss": 0.4766, "step": 160110 }, { "epoch": 46.0644418872267, "grad_norm": 0.9724538326263428, "learning_rate": 0.0010787111622554662, "loss": 0.5777, "step": 160120 }, { "epoch": 46.067318757192176, "grad_norm": 2.092498779296875, "learning_rate": 0.0010786536248561566, "loss": 0.5527, "step": 160130 }, { "epoch": 46.07019562715765, "grad_norm": 0.7805997133255005, "learning_rate": 0.001078596087456847, "loss": 0.5112, "step": 160140 }, { "epoch": 46.07307249712313, "grad_norm": 1.0756312608718872, "learning_rate": 0.0010785385500575373, "loss": 0.7147, "step": 160150 }, { "epoch": 46.075949367088604, "grad_norm": 1.9298210144042969, "learning_rate": 0.0010784810126582278, "loss": 0.4939, "step": 160160 }, { "epoch": 46.07882623705409, "grad_norm": 0.9925779104232788, "learning_rate": 0.0010784234752589184, "loss": 0.4691, "step": 160170 }, { "epoch": 46.081703107019564, "grad_norm": 1.4234025478363037, "learning_rate": 0.0010783659378596087, "loss": 0.6054, "step": 160180 }, { "epoch": 46.08457997698504, "grad_norm": 1.6687108278274536, "learning_rate": 0.0010783084004602993, "loss": 0.6587, "step": 160190 }, { "epoch": 46.087456846950516, "grad_norm": 1.0662825107574463, "learning_rate": 0.0010782508630609896, "loss": 0.6024, "step": 160200 }, { "epoch": 46.09033371691599, "grad_norm": 1.3887470960617065, "learning_rate": 0.00107819332566168, "loss": 0.4436, "step": 160210 }, { "epoch": 46.093210586881476, "grad_norm": 0.9795328974723816, "learning_rate": 0.0010781357882623706, "loss": 0.4587, "step": 160220 }, { "epoch": 46.09608745684695, "grad_norm": 1.5549734830856323, "learning_rate": 0.0010780782508630611, "loss": 0.5097, "step": 160230 }, { "epoch": 46.09896432681243, "grad_norm": 1.1104710102081299, "learning_rate": 0.0010780207134637515, "loss": 0.6351, "step": 160240 }, { "epoch": 46.101841196777904, "grad_norm": 1.1977711915969849, "learning_rate": 0.001077963176064442, "loss": 0.4684, "step": 160250 }, { "epoch": 46.10471806674338, "grad_norm": 3.5136985778808594, "learning_rate": 0.0010779056386651322, "loss": 0.5704, "step": 160260 }, { "epoch": 46.107594936708864, "grad_norm": 0.7982392311096191, "learning_rate": 0.0010778481012658227, "loss": 0.5236, "step": 160270 }, { "epoch": 46.11047180667434, "grad_norm": 1.1517654657363892, "learning_rate": 0.0010777905638665133, "loss": 0.5423, "step": 160280 }, { "epoch": 46.113348676639816, "grad_norm": 1.9853061437606812, "learning_rate": 0.0010777330264672036, "loss": 0.5381, "step": 160290 }, { "epoch": 46.11622554660529, "grad_norm": 1.0593297481536865, "learning_rate": 0.0010776754890678942, "loss": 0.4626, "step": 160300 }, { "epoch": 46.11910241657077, "grad_norm": 1.2957427501678467, "learning_rate": 0.0010776179516685848, "loss": 0.4688, "step": 160310 }, { "epoch": 46.121979286536245, "grad_norm": 0.9745382070541382, "learning_rate": 0.001077560414269275, "loss": 0.4992, "step": 160320 }, { "epoch": 46.12485615650173, "grad_norm": 0.6217295527458191, "learning_rate": 0.0010775028768699655, "loss": 0.4529, "step": 160330 }, { "epoch": 46.127733026467205, "grad_norm": 1.56199312210083, "learning_rate": 0.001077445339470656, "loss": 0.5937, "step": 160340 }, { "epoch": 46.13060989643268, "grad_norm": 0.963514506816864, "learning_rate": 0.0010773878020713464, "loss": 0.4941, "step": 160350 }, { "epoch": 46.13348676639816, "grad_norm": 0.9092453718185425, "learning_rate": 0.001077330264672037, "loss": 0.5424, "step": 160360 }, { "epoch": 46.13636363636363, "grad_norm": 1.1132385730743408, "learning_rate": 0.0010772727272727275, "loss": 0.5457, "step": 160370 }, { "epoch": 46.139240506329116, "grad_norm": 1.1523833274841309, "learning_rate": 0.0010772151898734176, "loss": 0.5423, "step": 160380 }, { "epoch": 46.14211737629459, "grad_norm": 1.0656572580337524, "learning_rate": 0.0010771576524741082, "loss": 0.5553, "step": 160390 }, { "epoch": 46.14499424626007, "grad_norm": 0.9065995812416077, "learning_rate": 0.0010771001150747985, "loss": 0.6045, "step": 160400 }, { "epoch": 46.147871116225545, "grad_norm": 0.9877519011497498, "learning_rate": 0.001077042577675489, "loss": 0.4958, "step": 160410 }, { "epoch": 46.15074798619102, "grad_norm": 2.3552093505859375, "learning_rate": 0.0010769850402761797, "loss": 0.5992, "step": 160420 }, { "epoch": 46.153624856156505, "grad_norm": 1.0214329957962036, "learning_rate": 0.00107692750287687, "loss": 0.5433, "step": 160430 }, { "epoch": 46.15650172612198, "grad_norm": 1.4858826398849487, "learning_rate": 0.0010768699654775604, "loss": 0.3829, "step": 160440 }, { "epoch": 46.15937859608746, "grad_norm": 1.1084918975830078, "learning_rate": 0.001076812428078251, "loss": 0.5678, "step": 160450 }, { "epoch": 46.16225546605293, "grad_norm": 1.8041900396347046, "learning_rate": 0.0010767548906789413, "loss": 0.6137, "step": 160460 }, { "epoch": 46.16513233601841, "grad_norm": 1.1071927547454834, "learning_rate": 0.0010766973532796318, "loss": 0.5502, "step": 160470 }, { "epoch": 46.16800920598389, "grad_norm": 1.2311584949493408, "learning_rate": 0.0010766398158803224, "loss": 0.4662, "step": 160480 }, { "epoch": 46.17088607594937, "grad_norm": 1.4068670272827148, "learning_rate": 0.0010765822784810127, "loss": 0.5881, "step": 160490 }, { "epoch": 46.173762945914845, "grad_norm": 0.7669169902801514, "learning_rate": 0.001076524741081703, "loss": 0.4829, "step": 160500 }, { "epoch": 46.17663981588032, "grad_norm": 0.9665488004684448, "learning_rate": 0.0010764672036823934, "loss": 0.484, "step": 160510 }, { "epoch": 46.1795166858458, "grad_norm": 1.9804799556732178, "learning_rate": 0.001076409666283084, "loss": 0.5564, "step": 160520 }, { "epoch": 46.18239355581128, "grad_norm": 1.3994745016098022, "learning_rate": 0.0010763521288837746, "loss": 0.4679, "step": 160530 }, { "epoch": 46.18527042577676, "grad_norm": 1.653755784034729, "learning_rate": 0.001076294591484465, "loss": 0.5332, "step": 160540 }, { "epoch": 46.18814729574223, "grad_norm": 2.0501902103424072, "learning_rate": 0.0010762370540851555, "loss": 0.6656, "step": 160550 }, { "epoch": 46.19102416570771, "grad_norm": 1.9070720672607422, "learning_rate": 0.0010761795166858458, "loss": 0.603, "step": 160560 }, { "epoch": 46.193901035673186, "grad_norm": 0.9918104410171509, "learning_rate": 0.0010761219792865362, "loss": 0.4954, "step": 160570 }, { "epoch": 46.19677790563866, "grad_norm": 1.4008365869522095, "learning_rate": 0.0010760644418872267, "loss": 0.5403, "step": 160580 }, { "epoch": 46.199654775604145, "grad_norm": 0.9388597011566162, "learning_rate": 0.0010760069044879173, "loss": 0.5533, "step": 160590 }, { "epoch": 46.20253164556962, "grad_norm": 1.6825976371765137, "learning_rate": 0.0010759493670886076, "loss": 0.6401, "step": 160600 }, { "epoch": 46.2054085155351, "grad_norm": 0.950511634349823, "learning_rate": 0.0010758918296892982, "loss": 0.4116, "step": 160610 }, { "epoch": 46.208285385500574, "grad_norm": 1.529110074043274, "learning_rate": 0.0010758342922899883, "loss": 0.6647, "step": 160620 }, { "epoch": 46.21116225546605, "grad_norm": 1.6043615341186523, "learning_rate": 0.001075776754890679, "loss": 0.4635, "step": 160630 }, { "epoch": 46.21403912543153, "grad_norm": 1.3744053840637207, "learning_rate": 0.0010757192174913695, "loss": 0.4845, "step": 160640 }, { "epoch": 46.21691599539701, "grad_norm": 2.1238274574279785, "learning_rate": 0.0010756616800920598, "loss": 0.4543, "step": 160650 }, { "epoch": 46.219792865362486, "grad_norm": 1.0477877855300903, "learning_rate": 0.0010756041426927504, "loss": 0.5245, "step": 160660 }, { "epoch": 46.22266973532796, "grad_norm": 0.7725094556808472, "learning_rate": 0.001075546605293441, "loss": 0.642, "step": 160670 }, { "epoch": 46.22554660529344, "grad_norm": 1.1672054529190063, "learning_rate": 0.001075489067894131, "loss": 0.6609, "step": 160680 }, { "epoch": 46.22842347525892, "grad_norm": 1.2073129415512085, "learning_rate": 0.0010754315304948216, "loss": 0.5459, "step": 160690 }, { "epoch": 46.2313003452244, "grad_norm": 1.2707974910736084, "learning_rate": 0.0010753739930955122, "loss": 0.5493, "step": 160700 }, { "epoch": 46.234177215189874, "grad_norm": 0.7291651964187622, "learning_rate": 0.0010753164556962025, "loss": 0.5035, "step": 160710 }, { "epoch": 46.23705408515535, "grad_norm": 1.3888797760009766, "learning_rate": 0.0010752589182968931, "loss": 0.5746, "step": 160720 }, { "epoch": 46.239930955120826, "grad_norm": 1.0444196462631226, "learning_rate": 0.0010752013808975832, "loss": 0.497, "step": 160730 }, { "epoch": 46.24280782508631, "grad_norm": 1.5067929029464722, "learning_rate": 0.0010751438434982738, "loss": 0.5892, "step": 160740 }, { "epoch": 46.245684695051786, "grad_norm": 1.5692245960235596, "learning_rate": 0.0010750863060989644, "loss": 0.5457, "step": 160750 }, { "epoch": 46.24856156501726, "grad_norm": 0.8404988646507263, "learning_rate": 0.0010750287686996547, "loss": 0.526, "step": 160760 }, { "epoch": 46.25143843498274, "grad_norm": 1.1171797513961792, "learning_rate": 0.0010749712313003453, "loss": 0.4873, "step": 160770 }, { "epoch": 46.254315304948214, "grad_norm": 1.6649807691574097, "learning_rate": 0.0010749136939010358, "loss": 0.5561, "step": 160780 }, { "epoch": 46.25719217491369, "grad_norm": 0.8578873872756958, "learning_rate": 0.001074856156501726, "loss": 0.574, "step": 160790 }, { "epoch": 46.260069044879174, "grad_norm": 0.6502517461776733, "learning_rate": 0.0010747986191024165, "loss": 0.4906, "step": 160800 }, { "epoch": 46.26294591484465, "grad_norm": 1.1409865617752075, "learning_rate": 0.001074741081703107, "loss": 0.6984, "step": 160810 }, { "epoch": 46.265822784810126, "grad_norm": 1.1994825601577759, "learning_rate": 0.0010746835443037975, "loss": 0.5107, "step": 160820 }, { "epoch": 46.2686996547756, "grad_norm": 1.0930676460266113, "learning_rate": 0.001074626006904488, "loss": 0.5112, "step": 160830 }, { "epoch": 46.27157652474108, "grad_norm": 1.3013030290603638, "learning_rate": 0.0010745684695051784, "loss": 0.5008, "step": 160840 }, { "epoch": 46.27445339470656, "grad_norm": 1.149135708808899, "learning_rate": 0.0010745109321058687, "loss": 0.6585, "step": 160850 }, { "epoch": 46.27733026467204, "grad_norm": 0.7753260731697083, "learning_rate": 0.0010744533947065593, "loss": 0.4566, "step": 160860 }, { "epoch": 46.280207134637514, "grad_norm": 1.0083421468734741, "learning_rate": 0.0010743958573072496, "loss": 0.5893, "step": 160870 }, { "epoch": 46.28308400460299, "grad_norm": 1.0195823907852173, "learning_rate": 0.0010743383199079402, "loss": 0.5388, "step": 160880 }, { "epoch": 46.28596087456847, "grad_norm": 1.26505708694458, "learning_rate": 0.0010742807825086307, "loss": 0.6812, "step": 160890 }, { "epoch": 46.28883774453395, "grad_norm": 1.3560965061187744, "learning_rate": 0.001074223245109321, "loss": 0.679, "step": 160900 }, { "epoch": 46.291714614499426, "grad_norm": 1.1328890323638916, "learning_rate": 0.0010741657077100114, "loss": 0.4258, "step": 160910 }, { "epoch": 46.2945914844649, "grad_norm": 1.1642966270446777, "learning_rate": 0.001074108170310702, "loss": 0.4228, "step": 160920 }, { "epoch": 46.29746835443038, "grad_norm": 1.1523715257644653, "learning_rate": 0.0010740506329113924, "loss": 0.5608, "step": 160930 }, { "epoch": 46.300345224395855, "grad_norm": 1.429055094718933, "learning_rate": 0.001073993095512083, "loss": 0.636, "step": 160940 }, { "epoch": 46.30322209436134, "grad_norm": 1.5098443031311035, "learning_rate": 0.0010739355581127733, "loss": 0.4693, "step": 160950 }, { "epoch": 46.306098964326814, "grad_norm": 1.2928725481033325, "learning_rate": 0.0010738780207134638, "loss": 0.6114, "step": 160960 }, { "epoch": 46.30897583429229, "grad_norm": 1.0365707874298096, "learning_rate": 0.0010738204833141542, "loss": 0.5071, "step": 160970 }, { "epoch": 46.31185270425777, "grad_norm": 1.3725014925003052, "learning_rate": 0.0010737629459148445, "loss": 0.4774, "step": 160980 }, { "epoch": 46.31472957422324, "grad_norm": 2.2917537689208984, "learning_rate": 0.001073705408515535, "loss": 0.5926, "step": 160990 }, { "epoch": 46.31760644418872, "grad_norm": 1.2898361682891846, "learning_rate": 0.0010736478711162257, "loss": 0.5845, "step": 161000 }, { "epoch": 46.3204833141542, "grad_norm": 2.288161277770996, "learning_rate": 0.001073590333716916, "loss": 0.7242, "step": 161010 }, { "epoch": 46.32336018411968, "grad_norm": 0.705934464931488, "learning_rate": 0.0010735327963176066, "loss": 0.5615, "step": 161020 }, { "epoch": 46.326237054085155, "grad_norm": 1.6798336505889893, "learning_rate": 0.001073475258918297, "loss": 0.596, "step": 161030 }, { "epoch": 46.32911392405063, "grad_norm": 2.091447114944458, "learning_rate": 0.0010734177215189873, "loss": 0.6489, "step": 161040 }, { "epoch": 46.33199079401611, "grad_norm": 1.1204503774642944, "learning_rate": 0.0010733601841196778, "loss": 0.5447, "step": 161050 }, { "epoch": 46.33486766398159, "grad_norm": 1.4335119724273682, "learning_rate": 0.0010733026467203684, "loss": 0.4626, "step": 161060 }, { "epoch": 46.33774453394707, "grad_norm": 0.958497941493988, "learning_rate": 0.0010732451093210587, "loss": 0.5576, "step": 161070 }, { "epoch": 46.34062140391254, "grad_norm": 1.5120707750320435, "learning_rate": 0.0010731875719217493, "loss": 0.5914, "step": 161080 }, { "epoch": 46.34349827387802, "grad_norm": 0.8065426349639893, "learning_rate": 0.0010731300345224394, "loss": 0.4515, "step": 161090 }, { "epoch": 46.346375143843495, "grad_norm": 3.195997953414917, "learning_rate": 0.00107307249712313, "loss": 0.6007, "step": 161100 }, { "epoch": 46.34925201380898, "grad_norm": 2.312849998474121, "learning_rate": 0.0010730149597238206, "loss": 0.552, "step": 161110 }, { "epoch": 46.352128883774455, "grad_norm": 1.1869546175003052, "learning_rate": 0.001072957422324511, "loss": 0.483, "step": 161120 }, { "epoch": 46.35500575373993, "grad_norm": 1.6073418855667114, "learning_rate": 0.0010728998849252015, "loss": 0.5728, "step": 161130 }, { "epoch": 46.35788262370541, "grad_norm": 1.3051954507827759, "learning_rate": 0.001072842347525892, "loss": 0.6067, "step": 161140 }, { "epoch": 46.360759493670884, "grad_norm": 1.5228705406188965, "learning_rate": 0.0010727848101265822, "loss": 0.5469, "step": 161150 }, { "epoch": 46.36363636363637, "grad_norm": 1.411794900894165, "learning_rate": 0.0010727272727272727, "loss": 0.5854, "step": 161160 }, { "epoch": 46.36651323360184, "grad_norm": 1.808501958847046, "learning_rate": 0.0010726697353279633, "loss": 0.6441, "step": 161170 }, { "epoch": 46.36939010356732, "grad_norm": 1.054395318031311, "learning_rate": 0.0010726121979286536, "loss": 0.4939, "step": 161180 }, { "epoch": 46.372266973532795, "grad_norm": 1.1639070510864258, "learning_rate": 0.0010725546605293442, "loss": 0.5278, "step": 161190 }, { "epoch": 46.37514384349827, "grad_norm": 1.1715977191925049, "learning_rate": 0.0010724971231300345, "loss": 0.5336, "step": 161200 }, { "epoch": 46.378020713463755, "grad_norm": 0.9814412593841553, "learning_rate": 0.0010724395857307249, "loss": 0.6316, "step": 161210 }, { "epoch": 46.38089758342923, "grad_norm": 1.0699549913406372, "learning_rate": 0.0010723820483314155, "loss": 0.5229, "step": 161220 }, { "epoch": 46.38377445339471, "grad_norm": 0.8664377927780151, "learning_rate": 0.0010723245109321058, "loss": 0.6366, "step": 161230 }, { "epoch": 46.386651323360184, "grad_norm": 1.0818874835968018, "learning_rate": 0.0010722669735327964, "loss": 0.5212, "step": 161240 }, { "epoch": 46.38952819332566, "grad_norm": 1.7612255811691284, "learning_rate": 0.001072209436133487, "loss": 0.5884, "step": 161250 }, { "epoch": 46.392405063291136, "grad_norm": 1.8827340602874756, "learning_rate": 0.0010721518987341773, "loss": 0.6402, "step": 161260 }, { "epoch": 46.39528193325662, "grad_norm": 1.5256654024124146, "learning_rate": 0.0010720943613348676, "loss": 0.6055, "step": 161270 }, { "epoch": 46.398158803222096, "grad_norm": 1.0580726861953735, "learning_rate": 0.0010720368239355582, "loss": 0.6208, "step": 161280 }, { "epoch": 46.40103567318757, "grad_norm": 0.9757481217384338, "learning_rate": 0.0010719792865362485, "loss": 0.4935, "step": 161290 }, { "epoch": 46.40391254315305, "grad_norm": 1.4652953147888184, "learning_rate": 0.001071921749136939, "loss": 0.5572, "step": 161300 }, { "epoch": 46.406789413118524, "grad_norm": 2.744961738586426, "learning_rate": 0.0010718642117376294, "loss": 0.6649, "step": 161310 }, { "epoch": 46.40966628308401, "grad_norm": 1.6178938150405884, "learning_rate": 0.00107180667433832, "loss": 0.5757, "step": 161320 }, { "epoch": 46.412543153049484, "grad_norm": 0.9251542091369629, "learning_rate": 0.0010717491369390104, "loss": 0.5857, "step": 161330 }, { "epoch": 46.41542002301496, "grad_norm": 0.8097003102302551, "learning_rate": 0.0010716915995397007, "loss": 0.5026, "step": 161340 }, { "epoch": 46.418296892980436, "grad_norm": 2.099215269088745, "learning_rate": 0.0010716340621403913, "loss": 0.6836, "step": 161350 }, { "epoch": 46.42117376294591, "grad_norm": 0.9171457886695862, "learning_rate": 0.0010715765247410818, "loss": 0.4648, "step": 161360 }, { "epoch": 46.424050632911396, "grad_norm": 1.1540621519088745, "learning_rate": 0.0010715189873417722, "loss": 0.5326, "step": 161370 }, { "epoch": 46.42692750287687, "grad_norm": 1.7395225763320923, "learning_rate": 0.0010714614499424627, "loss": 0.6642, "step": 161380 }, { "epoch": 46.42980437284235, "grad_norm": 0.7327577471733093, "learning_rate": 0.001071403912543153, "loss": 0.4468, "step": 161390 }, { "epoch": 46.432681242807824, "grad_norm": 1.0519410371780396, "learning_rate": 0.0010713463751438434, "loss": 0.5753, "step": 161400 }, { "epoch": 46.4355581127733, "grad_norm": 2.2930214405059814, "learning_rate": 0.001071288837744534, "loss": 0.558, "step": 161410 }, { "epoch": 46.438434982738784, "grad_norm": 1.5741040706634521, "learning_rate": 0.0010712313003452243, "loss": 0.6539, "step": 161420 }, { "epoch": 46.44131185270426, "grad_norm": 1.107743263244629, "learning_rate": 0.001071173762945915, "loss": 0.6332, "step": 161430 }, { "epoch": 46.444188722669736, "grad_norm": 1.0362880229949951, "learning_rate": 0.0010711162255466055, "loss": 0.5853, "step": 161440 }, { "epoch": 46.44706559263521, "grad_norm": 1.3344674110412598, "learning_rate": 0.0010710586881472956, "loss": 0.7236, "step": 161450 }, { "epoch": 46.44994246260069, "grad_norm": 0.978645384311676, "learning_rate": 0.0010710011507479862, "loss": 0.5937, "step": 161460 }, { "epoch": 46.452819332566165, "grad_norm": 1.4221954345703125, "learning_rate": 0.0010709436133486767, "loss": 0.4586, "step": 161470 }, { "epoch": 46.45569620253165, "grad_norm": 1.981491208076477, "learning_rate": 0.001070886075949367, "loss": 0.7524, "step": 161480 }, { "epoch": 46.458573072497124, "grad_norm": 0.6264251470565796, "learning_rate": 0.0010708285385500576, "loss": 0.4242, "step": 161490 }, { "epoch": 46.4614499424626, "grad_norm": 1.7066627740859985, "learning_rate": 0.0010707710011507482, "loss": 0.5179, "step": 161500 }, { "epoch": 46.46432681242808, "grad_norm": 0.838563859462738, "learning_rate": 0.0010707134637514383, "loss": 0.6041, "step": 161510 }, { "epoch": 46.46720368239355, "grad_norm": 0.8464142680168152, "learning_rate": 0.001070655926352129, "loss": 0.4726, "step": 161520 }, { "epoch": 46.470080552359036, "grad_norm": 0.9235607385635376, "learning_rate": 0.0010705983889528193, "loss": 0.541, "step": 161530 }, { "epoch": 46.47295742232451, "grad_norm": 0.7780229449272156, "learning_rate": 0.0010705408515535098, "loss": 0.5705, "step": 161540 }, { "epoch": 46.47583429228999, "grad_norm": 1.1753463745117188, "learning_rate": 0.0010704833141542004, "loss": 0.5505, "step": 161550 }, { "epoch": 46.478711162255465, "grad_norm": 1.2520252466201782, "learning_rate": 0.0010704257767548905, "loss": 0.5761, "step": 161560 }, { "epoch": 46.48158803222094, "grad_norm": 0.807939350605011, "learning_rate": 0.001070368239355581, "loss": 0.556, "step": 161570 }, { "epoch": 46.484464902186424, "grad_norm": 0.9528747797012329, "learning_rate": 0.0010703107019562716, "loss": 0.6233, "step": 161580 }, { "epoch": 46.4873417721519, "grad_norm": 1.3640210628509521, "learning_rate": 0.001070253164556962, "loss": 0.5862, "step": 161590 }, { "epoch": 46.49021864211738, "grad_norm": 1.7939423322677612, "learning_rate": 0.0010701956271576525, "loss": 0.5383, "step": 161600 }, { "epoch": 46.49309551208285, "grad_norm": 1.2088011503219604, "learning_rate": 0.0010701380897583431, "loss": 0.6905, "step": 161610 }, { "epoch": 46.49597238204833, "grad_norm": 2.220808267593384, "learning_rate": 0.0010700805523590332, "loss": 0.5108, "step": 161620 }, { "epoch": 46.49884925201381, "grad_norm": 1.2779028415679932, "learning_rate": 0.0010700230149597238, "loss": 0.519, "step": 161630 }, { "epoch": 46.50172612197929, "grad_norm": 1.678018569946289, "learning_rate": 0.0010699654775604144, "loss": 0.6697, "step": 161640 }, { "epoch": 46.504602991944765, "grad_norm": 1.5940696001052856, "learning_rate": 0.0010699079401611047, "loss": 0.5484, "step": 161650 }, { "epoch": 46.50747986191024, "grad_norm": 0.8259779214859009, "learning_rate": 0.0010698504027617953, "loss": 0.5567, "step": 161660 }, { "epoch": 46.51035673187572, "grad_norm": 0.8455133438110352, "learning_rate": 0.0010697928653624856, "loss": 0.6424, "step": 161670 }, { "epoch": 46.51323360184119, "grad_norm": 2.860445022583008, "learning_rate": 0.001069735327963176, "loss": 0.6366, "step": 161680 }, { "epoch": 46.51611047180668, "grad_norm": 1.050301432609558, "learning_rate": 0.0010696777905638665, "loss": 0.5921, "step": 161690 }, { "epoch": 46.51898734177215, "grad_norm": 1.900654911994934, "learning_rate": 0.0010696202531645569, "loss": 0.6154, "step": 161700 }, { "epoch": 46.52186421173763, "grad_norm": 1.0108907222747803, "learning_rate": 0.0010695627157652474, "loss": 0.5897, "step": 161710 }, { "epoch": 46.524741081703105, "grad_norm": 2.1089189052581787, "learning_rate": 0.001069505178365938, "loss": 0.491, "step": 161720 }, { "epoch": 46.52761795166858, "grad_norm": 1.8729770183563232, "learning_rate": 0.0010694476409666284, "loss": 0.5316, "step": 161730 }, { "epoch": 46.530494821634065, "grad_norm": 0.9417877197265625, "learning_rate": 0.0010693901035673187, "loss": 0.4909, "step": 161740 }, { "epoch": 46.53337169159954, "grad_norm": 0.9075225591659546, "learning_rate": 0.0010693325661680093, "loss": 0.6026, "step": 161750 }, { "epoch": 46.53624856156502, "grad_norm": 2.3645825386047363, "learning_rate": 0.0010692750287686996, "loss": 0.5832, "step": 161760 }, { "epoch": 46.53912543153049, "grad_norm": 0.8928740620613098, "learning_rate": 0.0010692174913693902, "loss": 0.5882, "step": 161770 }, { "epoch": 46.54200230149597, "grad_norm": 0.75980144739151, "learning_rate": 0.0010691599539700805, "loss": 0.5764, "step": 161780 }, { "epoch": 46.54487917146145, "grad_norm": 3.2338461875915527, "learning_rate": 0.001069102416570771, "loss": 0.6904, "step": 161790 }, { "epoch": 46.54775604142693, "grad_norm": 1.6533769369125366, "learning_rate": 0.0010690448791714614, "loss": 0.6373, "step": 161800 }, { "epoch": 46.550632911392405, "grad_norm": 2.3529129028320312, "learning_rate": 0.0010689873417721518, "loss": 0.6136, "step": 161810 }, { "epoch": 46.55350978135788, "grad_norm": 1.2175869941711426, "learning_rate": 0.0010689298043728424, "loss": 0.5923, "step": 161820 }, { "epoch": 46.55638665132336, "grad_norm": 0.9564940929412842, "learning_rate": 0.001068872266973533, "loss": 0.5315, "step": 161830 }, { "epoch": 46.55926352128884, "grad_norm": 1.1845818758010864, "learning_rate": 0.0010688147295742233, "loss": 0.5872, "step": 161840 }, { "epoch": 46.56214039125432, "grad_norm": 1.061916470527649, "learning_rate": 0.0010687571921749138, "loss": 0.6683, "step": 161850 }, { "epoch": 46.56501726121979, "grad_norm": 1.7310712337493896, "learning_rate": 0.0010686996547756042, "loss": 0.5013, "step": 161860 }, { "epoch": 46.56789413118527, "grad_norm": 1.2229841947555542, "learning_rate": 0.0010686421173762945, "loss": 0.6063, "step": 161870 }, { "epoch": 46.570771001150746, "grad_norm": 1.8845787048339844, "learning_rate": 0.001068584579976985, "loss": 0.605, "step": 161880 }, { "epoch": 46.57364787111622, "grad_norm": 1.3205894231796265, "learning_rate": 0.0010685270425776754, "loss": 0.5772, "step": 161890 }, { "epoch": 46.576524741081705, "grad_norm": 1.0770881175994873, "learning_rate": 0.001068469505178366, "loss": 0.544, "step": 161900 }, { "epoch": 46.57940161104718, "grad_norm": 0.7665511965751648, "learning_rate": 0.0010684119677790566, "loss": 0.586, "step": 161910 }, { "epoch": 46.58227848101266, "grad_norm": 2.061758518218994, "learning_rate": 0.0010683544303797467, "loss": 0.6307, "step": 161920 }, { "epoch": 46.585155350978134, "grad_norm": 0.928084135055542, "learning_rate": 0.0010682968929804373, "loss": 0.5134, "step": 161930 }, { "epoch": 46.58803222094361, "grad_norm": 1.0988380908966064, "learning_rate": 0.0010682393555811278, "loss": 0.5897, "step": 161940 }, { "epoch": 46.59090909090909, "grad_norm": 2.2112622261047363, "learning_rate": 0.0010681818181818182, "loss": 0.5683, "step": 161950 }, { "epoch": 46.59378596087457, "grad_norm": 1.48213529586792, "learning_rate": 0.0010681242807825087, "loss": 0.525, "step": 161960 }, { "epoch": 46.596662830840046, "grad_norm": 0.753067135810852, "learning_rate": 0.0010680667433831993, "loss": 0.5275, "step": 161970 }, { "epoch": 46.59953970080552, "grad_norm": 1.4530199766159058, "learning_rate": 0.0010680092059838894, "loss": 0.5288, "step": 161980 }, { "epoch": 46.602416570771, "grad_norm": 1.280942678451538, "learning_rate": 0.00106795166858458, "loss": 0.6232, "step": 161990 }, { "epoch": 46.60529344073648, "grad_norm": 0.8807258009910583, "learning_rate": 0.0010678941311852703, "loss": 0.4541, "step": 162000 }, { "epoch": 46.60817031070196, "grad_norm": 3.3750336170196533, "learning_rate": 0.001067836593785961, "loss": 0.6642, "step": 162010 }, { "epoch": 46.611047180667434, "grad_norm": 1.2908015251159668, "learning_rate": 0.0010677790563866515, "loss": 0.5496, "step": 162020 }, { "epoch": 46.61392405063291, "grad_norm": 1.3145264387130737, "learning_rate": 0.0010677215189873418, "loss": 0.6797, "step": 162030 }, { "epoch": 46.616800920598386, "grad_norm": 1.1721729040145874, "learning_rate": 0.0010676639815880322, "loss": 0.4479, "step": 162040 }, { "epoch": 46.61967779056387, "grad_norm": 1.4328181743621826, "learning_rate": 0.0010676064441887227, "loss": 0.6571, "step": 162050 }, { "epoch": 46.622554660529346, "grad_norm": 1.764066457748413, "learning_rate": 0.001067548906789413, "loss": 0.731, "step": 162060 }, { "epoch": 46.62543153049482, "grad_norm": 0.9817997217178345, "learning_rate": 0.0010674913693901036, "loss": 0.4074, "step": 162070 }, { "epoch": 46.6283084004603, "grad_norm": 1.4243134260177612, "learning_rate": 0.0010674338319907942, "loss": 0.6602, "step": 162080 }, { "epoch": 46.631185270425775, "grad_norm": 1.961323618888855, "learning_rate": 0.0010673762945914845, "loss": 0.591, "step": 162090 }, { "epoch": 46.63406214039125, "grad_norm": 1.528525948524475, "learning_rate": 0.0010673187571921749, "loss": 0.5356, "step": 162100 }, { "epoch": 46.636939010356734, "grad_norm": 0.7198001742362976, "learning_rate": 0.0010672612197928652, "loss": 0.5258, "step": 162110 }, { "epoch": 46.63981588032221, "grad_norm": 1.473345160484314, "learning_rate": 0.0010672036823935558, "loss": 0.6507, "step": 162120 }, { "epoch": 46.64269275028769, "grad_norm": 1.1816902160644531, "learning_rate": 0.0010671461449942464, "loss": 0.5869, "step": 162130 }, { "epoch": 46.64556962025316, "grad_norm": 1.0590280294418335, "learning_rate": 0.0010670886075949367, "loss": 0.4795, "step": 162140 }, { "epoch": 46.64844649021864, "grad_norm": 1.9721698760986328, "learning_rate": 0.0010670310701956273, "loss": 0.7612, "step": 162150 }, { "epoch": 46.65132336018412, "grad_norm": 1.6805609464645386, "learning_rate": 0.0010669735327963176, "loss": 0.5915, "step": 162160 }, { "epoch": 46.6542002301496, "grad_norm": 1.790917158126831, "learning_rate": 0.001066915995397008, "loss": 0.6591, "step": 162170 }, { "epoch": 46.657077100115075, "grad_norm": 1.2266050577163696, "learning_rate": 0.0010668584579976985, "loss": 0.5629, "step": 162180 }, { "epoch": 46.65995397008055, "grad_norm": 1.097146987915039, "learning_rate": 0.001066800920598389, "loss": 0.5837, "step": 162190 }, { "epoch": 46.66283084004603, "grad_norm": 1.3279409408569336, "learning_rate": 0.0010667433831990794, "loss": 0.6418, "step": 162200 }, { "epoch": 46.66570771001151, "grad_norm": 1.813639521598816, "learning_rate": 0.00106668584579977, "loss": 0.5321, "step": 162210 }, { "epoch": 46.66858457997699, "grad_norm": 1.3272984027862549, "learning_rate": 0.0010666283084004601, "loss": 0.5177, "step": 162220 }, { "epoch": 46.67146144994246, "grad_norm": 1.5876398086547852, "learning_rate": 0.0010665707710011507, "loss": 0.5059, "step": 162230 }, { "epoch": 46.67433831990794, "grad_norm": 0.7049971222877502, "learning_rate": 0.0010665132336018413, "loss": 0.4755, "step": 162240 }, { "epoch": 46.677215189873415, "grad_norm": 0.8785813450813293, "learning_rate": 0.0010664556962025316, "loss": 0.4795, "step": 162250 }, { "epoch": 46.6800920598389, "grad_norm": 1.006697416305542, "learning_rate": 0.0010663981588032222, "loss": 0.6595, "step": 162260 }, { "epoch": 46.682968929804375, "grad_norm": 1.1703922748565674, "learning_rate": 0.0010663406214039127, "loss": 0.4064, "step": 162270 }, { "epoch": 46.68584579976985, "grad_norm": 1.3514519929885864, "learning_rate": 0.0010662830840046029, "loss": 0.6477, "step": 162280 }, { "epoch": 46.68872266973533, "grad_norm": 0.9431992769241333, "learning_rate": 0.0010662255466052934, "loss": 0.5809, "step": 162290 }, { "epoch": 46.6915995397008, "grad_norm": 2.3029611110687256, "learning_rate": 0.001066168009205984, "loss": 0.6847, "step": 162300 }, { "epoch": 46.69447640966629, "grad_norm": 1.985661268234253, "learning_rate": 0.0010661104718066743, "loss": 0.5502, "step": 162310 }, { "epoch": 46.69735327963176, "grad_norm": 1.4911669492721558, "learning_rate": 0.001066052934407365, "loss": 0.5464, "step": 162320 }, { "epoch": 46.70023014959724, "grad_norm": 1.3960462808609009, "learning_rate": 0.0010659953970080555, "loss": 0.5943, "step": 162330 }, { "epoch": 46.703107019562715, "grad_norm": 2.1156790256500244, "learning_rate": 0.0010659378596087456, "loss": 0.5876, "step": 162340 }, { "epoch": 46.70598388952819, "grad_norm": 0.9576559066772461, "learning_rate": 0.0010658803222094362, "loss": 0.5942, "step": 162350 }, { "epoch": 46.70886075949367, "grad_norm": 1.1179544925689697, "learning_rate": 0.0010658227848101265, "loss": 0.625, "step": 162360 }, { "epoch": 46.71173762945915, "grad_norm": 1.1921043395996094, "learning_rate": 0.001065765247410817, "loss": 0.532, "step": 162370 }, { "epoch": 46.71461449942463, "grad_norm": 1.327846646308899, "learning_rate": 0.0010657077100115076, "loss": 0.4797, "step": 162380 }, { "epoch": 46.7174913693901, "grad_norm": 1.6752113103866577, "learning_rate": 0.0010656501726121978, "loss": 0.5722, "step": 162390 }, { "epoch": 46.72036823935558, "grad_norm": 0.6222019791603088, "learning_rate": 0.0010655926352128883, "loss": 0.6121, "step": 162400 }, { "epoch": 46.723245109321056, "grad_norm": 1.1959350109100342, "learning_rate": 0.001065535097813579, "loss": 0.6724, "step": 162410 }, { "epoch": 46.72612197928654, "grad_norm": 0.9858449697494507, "learning_rate": 0.0010654775604142692, "loss": 0.521, "step": 162420 }, { "epoch": 46.728998849252015, "grad_norm": 1.2737035751342773, "learning_rate": 0.0010654200230149598, "loss": 0.6354, "step": 162430 }, { "epoch": 46.73187571921749, "grad_norm": 2.6975910663604736, "learning_rate": 0.0010653624856156504, "loss": 0.7353, "step": 162440 }, { "epoch": 46.73475258918297, "grad_norm": 0.6014644503593445, "learning_rate": 0.0010653049482163405, "loss": 0.5121, "step": 162450 }, { "epoch": 46.737629459148444, "grad_norm": 0.9479286074638367, "learning_rate": 0.001065247410817031, "loss": 0.4974, "step": 162460 }, { "epoch": 46.74050632911393, "grad_norm": 1.8074382543563843, "learning_rate": 0.0010651898734177214, "loss": 0.564, "step": 162470 }, { "epoch": 46.7433831990794, "grad_norm": 0.8209614157676697, "learning_rate": 0.001065132336018412, "loss": 0.5611, "step": 162480 }, { "epoch": 46.74626006904488, "grad_norm": 1.0657987594604492, "learning_rate": 0.0010650747986191025, "loss": 0.5702, "step": 162490 }, { "epoch": 46.749136939010356, "grad_norm": 1.6577184200286865, "learning_rate": 0.001065017261219793, "loss": 0.7007, "step": 162500 }, { "epoch": 46.75201380897583, "grad_norm": 1.6564221382141113, "learning_rate": 0.0010649597238204832, "loss": 0.5805, "step": 162510 }, { "epoch": 46.754890678941315, "grad_norm": 1.0488896369934082, "learning_rate": 0.0010649021864211738, "loss": 0.6508, "step": 162520 }, { "epoch": 46.75776754890679, "grad_norm": 1.108506679534912, "learning_rate": 0.0010648446490218642, "loss": 0.6323, "step": 162530 }, { "epoch": 46.76064441887227, "grad_norm": 1.371177077293396, "learning_rate": 0.0010647871116225547, "loss": 0.574, "step": 162540 }, { "epoch": 46.763521288837744, "grad_norm": 1.53667151927948, "learning_rate": 0.0010647295742232453, "loss": 0.6154, "step": 162550 }, { "epoch": 46.76639815880322, "grad_norm": 1.5897036790847778, "learning_rate": 0.0010646720368239356, "loss": 0.517, "step": 162560 }, { "epoch": 46.769275028768696, "grad_norm": 0.790421187877655, "learning_rate": 0.001064614499424626, "loss": 0.4043, "step": 162570 }, { "epoch": 46.77215189873418, "grad_norm": 0.9615301489830017, "learning_rate": 0.0010645569620253163, "loss": 0.6381, "step": 162580 }, { "epoch": 46.775028768699656, "grad_norm": 0.6642991304397583, "learning_rate": 0.0010644994246260069, "loss": 0.6417, "step": 162590 }, { "epoch": 46.77790563866513, "grad_norm": 1.313232421875, "learning_rate": 0.0010644418872266974, "loss": 0.4947, "step": 162600 }, { "epoch": 46.78078250863061, "grad_norm": 1.2112616300582886, "learning_rate": 0.0010643843498273878, "loss": 0.6965, "step": 162610 }, { "epoch": 46.783659378596084, "grad_norm": 1.1221387386322021, "learning_rate": 0.0010643268124280784, "loss": 0.597, "step": 162620 }, { "epoch": 46.78653624856157, "grad_norm": 1.418351173400879, "learning_rate": 0.0010642692750287687, "loss": 0.583, "step": 162630 }, { "epoch": 46.789413118527044, "grad_norm": 1.8647748231887817, "learning_rate": 0.001064211737629459, "loss": 0.6452, "step": 162640 }, { "epoch": 46.79228998849252, "grad_norm": 1.1833173036575317, "learning_rate": 0.0010641542002301496, "loss": 0.5541, "step": 162650 }, { "epoch": 46.795166858457996, "grad_norm": 1.0609227418899536, "learning_rate": 0.0010640966628308402, "loss": 0.5603, "step": 162660 }, { "epoch": 46.79804372842347, "grad_norm": 1.1513936519622803, "learning_rate": 0.0010640391254315305, "loss": 0.6303, "step": 162670 }, { "epoch": 46.800920598388956, "grad_norm": 2.052225351333618, "learning_rate": 0.001063981588032221, "loss": 0.5286, "step": 162680 }, { "epoch": 46.80379746835443, "grad_norm": 1.3728598356246948, "learning_rate": 0.0010639240506329112, "loss": 0.6381, "step": 162690 }, { "epoch": 46.80667433831991, "grad_norm": 1.0709112882614136, "learning_rate": 0.0010638665132336018, "loss": 0.5965, "step": 162700 }, { "epoch": 46.809551208285384, "grad_norm": 1.248375654220581, "learning_rate": 0.0010638089758342923, "loss": 0.5645, "step": 162710 }, { "epoch": 46.81242807825086, "grad_norm": 1.5460633039474487, "learning_rate": 0.0010637514384349827, "loss": 0.9999, "step": 162720 }, { "epoch": 46.815304948216344, "grad_norm": 0.9383688569068909, "learning_rate": 0.0010636939010356733, "loss": 0.5124, "step": 162730 }, { "epoch": 46.81818181818182, "grad_norm": 2.64127779006958, "learning_rate": 0.0010636363636363638, "loss": 0.7056, "step": 162740 }, { "epoch": 46.821058688147296, "grad_norm": 1.0819783210754395, "learning_rate": 0.001063578826237054, "loss": 0.5556, "step": 162750 }, { "epoch": 46.82393555811277, "grad_norm": 0.9864495992660522, "learning_rate": 0.0010635212888377445, "loss": 0.5056, "step": 162760 }, { "epoch": 46.82681242807825, "grad_norm": 1.276126503944397, "learning_rate": 0.001063463751438435, "loss": 0.6371, "step": 162770 }, { "epoch": 46.829689298043725, "grad_norm": 1.4670487642288208, "learning_rate": 0.0010634062140391254, "loss": 0.606, "step": 162780 }, { "epoch": 46.83256616800921, "grad_norm": 0.8784105181694031, "learning_rate": 0.001063348676639816, "loss": 0.5426, "step": 162790 }, { "epoch": 46.835443037974684, "grad_norm": 1.1820931434631348, "learning_rate": 0.0010632911392405063, "loss": 0.4803, "step": 162800 }, { "epoch": 46.83831990794016, "grad_norm": 1.1340148448944092, "learning_rate": 0.0010632336018411967, "loss": 0.6565, "step": 162810 }, { "epoch": 46.84119677790564, "grad_norm": 2.326140880584717, "learning_rate": 0.0010631760644418873, "loss": 0.58, "step": 162820 }, { "epoch": 46.84407364787111, "grad_norm": 0.8150396943092346, "learning_rate": 0.0010631185270425776, "loss": 0.4944, "step": 162830 }, { "epoch": 46.846950517836596, "grad_norm": 1.6031758785247803, "learning_rate": 0.0010630609896432682, "loss": 0.6366, "step": 162840 }, { "epoch": 46.84982738780207, "grad_norm": 1.8369619846343994, "learning_rate": 0.0010630034522439587, "loss": 0.8195, "step": 162850 }, { "epoch": 46.85270425776755, "grad_norm": 1.275000810623169, "learning_rate": 0.001062945914844649, "loss": 0.4554, "step": 162860 }, { "epoch": 46.855581127733025, "grad_norm": 0.996727705001831, "learning_rate": 0.0010628883774453394, "loss": 0.6173, "step": 162870 }, { "epoch": 46.8584579976985, "grad_norm": 1.1466212272644043, "learning_rate": 0.00106283084004603, "loss": 0.5731, "step": 162880 }, { "epoch": 46.861334867663984, "grad_norm": 0.9181458353996277, "learning_rate": 0.0010627733026467203, "loss": 0.4318, "step": 162890 }, { "epoch": 46.86421173762946, "grad_norm": 0.7054606676101685, "learning_rate": 0.001062715765247411, "loss": 0.5823, "step": 162900 }, { "epoch": 46.86708860759494, "grad_norm": 1.1290966272354126, "learning_rate": 0.0010626582278481012, "loss": 0.5858, "step": 162910 }, { "epoch": 46.86996547756041, "grad_norm": 1.916342854499817, "learning_rate": 0.0010626006904487918, "loss": 0.7139, "step": 162920 }, { "epoch": 46.87284234752589, "grad_norm": 0.7849993705749512, "learning_rate": 0.0010625431530494822, "loss": 0.5741, "step": 162930 }, { "epoch": 46.87571921749137, "grad_norm": 1.6968106031417847, "learning_rate": 0.0010624856156501725, "loss": 0.6365, "step": 162940 }, { "epoch": 46.87859608745685, "grad_norm": 2.1077003479003906, "learning_rate": 0.001062428078250863, "loss": 0.6862, "step": 162950 }, { "epoch": 46.881472957422325, "grad_norm": 1.4721888303756714, "learning_rate": 0.0010623705408515536, "loss": 0.5895, "step": 162960 }, { "epoch": 46.8843498273878, "grad_norm": 0.6455712914466858, "learning_rate": 0.001062313003452244, "loss": 0.5565, "step": 162970 }, { "epoch": 46.88722669735328, "grad_norm": 0.5854381322860718, "learning_rate": 0.0010622554660529345, "loss": 0.5188, "step": 162980 }, { "epoch": 46.89010356731876, "grad_norm": 1.334537148475647, "learning_rate": 0.0010621979286536249, "loss": 0.6756, "step": 162990 }, { "epoch": 46.89298043728424, "grad_norm": 0.7995830774307251, "learning_rate": 0.0010621403912543152, "loss": 0.7153, "step": 163000 }, { "epoch": 46.89585730724971, "grad_norm": 0.9824384450912476, "learning_rate": 0.0010620828538550058, "loss": 0.5984, "step": 163010 }, { "epoch": 46.89873417721519, "grad_norm": 1.5242300033569336, "learning_rate": 0.0010620253164556964, "loss": 0.6987, "step": 163020 }, { "epoch": 46.901611047180666, "grad_norm": 1.5269070863723755, "learning_rate": 0.0010619677790563867, "loss": 0.5441, "step": 163030 }, { "epoch": 46.90448791714614, "grad_norm": 1.011469841003418, "learning_rate": 0.0010619102416570773, "loss": 0.5553, "step": 163040 }, { "epoch": 46.907364787111625, "grad_norm": 1.5838234424591064, "learning_rate": 0.0010618527042577674, "loss": 0.6269, "step": 163050 }, { "epoch": 46.9102416570771, "grad_norm": 1.5455377101898193, "learning_rate": 0.001061795166858458, "loss": 0.6796, "step": 163060 }, { "epoch": 46.91311852704258, "grad_norm": 0.9873084425926208, "learning_rate": 0.0010617376294591485, "loss": 0.5169, "step": 163070 }, { "epoch": 46.915995397008054, "grad_norm": 1.0156570672988892, "learning_rate": 0.0010616800920598389, "loss": 0.6163, "step": 163080 }, { "epoch": 46.91887226697353, "grad_norm": 2.0931992530822754, "learning_rate": 0.0010616225546605294, "loss": 0.6249, "step": 163090 }, { "epoch": 46.92174913693901, "grad_norm": 2.7985808849334717, "learning_rate": 0.00106156501726122, "loss": 0.5209, "step": 163100 }, { "epoch": 46.92462600690449, "grad_norm": 1.0883264541625977, "learning_rate": 0.0010615074798619101, "loss": 0.5106, "step": 163110 }, { "epoch": 46.927502876869966, "grad_norm": 1.3113476037979126, "learning_rate": 0.0010614499424626007, "loss": 0.616, "step": 163120 }, { "epoch": 46.93037974683544, "grad_norm": 1.3621578216552734, "learning_rate": 0.0010613924050632913, "loss": 0.6483, "step": 163130 }, { "epoch": 46.93325661680092, "grad_norm": 1.3658902645111084, "learning_rate": 0.0010613348676639816, "loss": 0.5008, "step": 163140 }, { "epoch": 46.9361334867664, "grad_norm": 1.8478612899780273, "learning_rate": 0.0010612773302646722, "loss": 0.5593, "step": 163150 }, { "epoch": 46.93901035673188, "grad_norm": 2.2028615474700928, "learning_rate": 0.0010612197928653623, "loss": 0.5895, "step": 163160 }, { "epoch": 46.941887226697354, "grad_norm": 0.9897506833076477, "learning_rate": 0.0010611622554660529, "loss": 0.5585, "step": 163170 }, { "epoch": 46.94476409666283, "grad_norm": 1.040964961051941, "learning_rate": 0.0010611047180667434, "loss": 0.5187, "step": 163180 }, { "epoch": 46.947640966628306, "grad_norm": 1.1452903747558594, "learning_rate": 0.0010610471806674338, "loss": 0.6197, "step": 163190 }, { "epoch": 46.95051783659379, "grad_norm": 1.0919986963272095, "learning_rate": 0.0010609896432681243, "loss": 0.4993, "step": 163200 }, { "epoch": 46.953394706559266, "grad_norm": 1.4187610149383545, "learning_rate": 0.001060932105868815, "loss": 0.4739, "step": 163210 }, { "epoch": 46.95627157652474, "grad_norm": 0.7881262302398682, "learning_rate": 0.001060874568469505, "loss": 0.4923, "step": 163220 }, { "epoch": 46.95914844649022, "grad_norm": 1.0026674270629883, "learning_rate": 0.0010608170310701956, "loss": 0.5316, "step": 163230 }, { "epoch": 46.962025316455694, "grad_norm": 0.9192516803741455, "learning_rate": 0.0010607594936708862, "loss": 0.6705, "step": 163240 }, { "epoch": 46.96490218642117, "grad_norm": 1.9533722400665283, "learning_rate": 0.0010607019562715765, "loss": 0.5393, "step": 163250 }, { "epoch": 46.967779056386654, "grad_norm": 1.1422618627548218, "learning_rate": 0.001060644418872267, "loss": 0.6123, "step": 163260 }, { "epoch": 46.97065592635213, "grad_norm": 1.7078510522842407, "learning_rate": 0.0010605868814729574, "loss": 0.6874, "step": 163270 }, { "epoch": 46.973532796317606, "grad_norm": 2.0616323947906494, "learning_rate": 0.0010605293440736478, "loss": 0.5733, "step": 163280 }, { "epoch": 46.97640966628308, "grad_norm": 0.9069936275482178, "learning_rate": 0.0010604718066743383, "loss": 0.5349, "step": 163290 }, { "epoch": 46.97928653624856, "grad_norm": 1.5458462238311768, "learning_rate": 0.0010604142692750287, "loss": 0.6579, "step": 163300 }, { "epoch": 46.98216340621404, "grad_norm": 1.0144497156143188, "learning_rate": 0.0010603567318757192, "loss": 0.5944, "step": 163310 }, { "epoch": 46.98504027617952, "grad_norm": 1.778709053993225, "learning_rate": 0.0010602991944764098, "loss": 0.5133, "step": 163320 }, { "epoch": 46.987917146144994, "grad_norm": 0.9712766408920288, "learning_rate": 0.0010602416570771002, "loss": 0.4876, "step": 163330 }, { "epoch": 46.99079401611047, "grad_norm": 1.45807945728302, "learning_rate": 0.0010601841196777905, "loss": 0.571, "step": 163340 }, { "epoch": 46.99367088607595, "grad_norm": 1.237135410308838, "learning_rate": 0.001060126582278481, "loss": 0.6432, "step": 163350 }, { "epoch": 46.99654775604143, "grad_norm": 1.9789187908172607, "learning_rate": 0.0010600690448791714, "loss": 0.6461, "step": 163360 }, { "epoch": 46.999424626006906, "grad_norm": 1.1709095239639282, "learning_rate": 0.001060011507479862, "loss": 0.6349, "step": 163370 }, { "epoch": 47.00230149597238, "grad_norm": 1.1349369287490845, "learning_rate": 0.0010599539700805523, "loss": 0.5158, "step": 163380 }, { "epoch": 47.00517836593786, "grad_norm": 1.0212347507476807, "learning_rate": 0.0010598964326812429, "loss": 0.538, "step": 163390 }, { "epoch": 47.008055235903335, "grad_norm": 1.2157598733901978, "learning_rate": 0.0010598388952819332, "loss": 0.4898, "step": 163400 }, { "epoch": 47.01093210586882, "grad_norm": 1.308340311050415, "learning_rate": 0.0010597813578826236, "loss": 0.5537, "step": 163410 }, { "epoch": 47.013808975834294, "grad_norm": 0.7767809629440308, "learning_rate": 0.0010597238204833141, "loss": 0.5567, "step": 163420 }, { "epoch": 47.01668584579977, "grad_norm": 9.857559204101562, "learning_rate": 0.0010596662830840047, "loss": 0.4034, "step": 163430 }, { "epoch": 47.01956271576525, "grad_norm": 1.3652106523513794, "learning_rate": 0.001059608745684695, "loss": 0.587, "step": 163440 }, { "epoch": 47.02243958573072, "grad_norm": 1.2650361061096191, "learning_rate": 0.0010595512082853856, "loss": 0.5172, "step": 163450 }, { "epoch": 47.0253164556962, "grad_norm": 0.9910292029380798, "learning_rate": 0.001059493670886076, "loss": 0.4899, "step": 163460 }, { "epoch": 47.02819332566168, "grad_norm": 0.9913462400436401, "learning_rate": 0.0010594361334867663, "loss": 0.515, "step": 163470 }, { "epoch": 47.03107019562716, "grad_norm": 1.214974284172058, "learning_rate": 0.0010593785960874569, "loss": 0.5888, "step": 163480 }, { "epoch": 47.033947065592635, "grad_norm": 1.2479526996612549, "learning_rate": 0.0010593210586881472, "loss": 0.5606, "step": 163490 }, { "epoch": 47.03682393555811, "grad_norm": 0.911820650100708, "learning_rate": 0.0010592635212888378, "loss": 0.4898, "step": 163500 }, { "epoch": 47.03970080552359, "grad_norm": 1.1984249353408813, "learning_rate": 0.0010592059838895284, "loss": 0.5532, "step": 163510 }, { "epoch": 47.04257767548907, "grad_norm": 1.4360451698303223, "learning_rate": 0.0010591484464902185, "loss": 0.5391, "step": 163520 }, { "epoch": 47.04545454545455, "grad_norm": 0.7728720903396606, "learning_rate": 0.001059090909090909, "loss": 0.4319, "step": 163530 }, { "epoch": 47.04833141542002, "grad_norm": 0.9765911102294922, "learning_rate": 0.0010590333716915996, "loss": 0.5818, "step": 163540 }, { "epoch": 47.0512082853855, "grad_norm": 0.9331145286560059, "learning_rate": 0.00105897583429229, "loss": 0.4832, "step": 163550 }, { "epoch": 47.054085155350975, "grad_norm": 1.3830459117889404, "learning_rate": 0.0010589182968929805, "loss": 0.5468, "step": 163560 }, { "epoch": 47.05696202531646, "grad_norm": 0.7403780817985535, "learning_rate": 0.001058860759493671, "loss": 0.4858, "step": 163570 }, { "epoch": 47.059838895281935, "grad_norm": 1.4614731073379517, "learning_rate": 0.0010588032220943612, "loss": 0.4251, "step": 163580 }, { "epoch": 47.06271576524741, "grad_norm": 1.1484339237213135, "learning_rate": 0.0010587456846950518, "loss": 0.4829, "step": 163590 }, { "epoch": 47.06559263521289, "grad_norm": 0.8449071645736694, "learning_rate": 0.0010586881472957423, "loss": 0.4578, "step": 163600 }, { "epoch": 47.06846950517836, "grad_norm": 1.4499675035476685, "learning_rate": 0.0010586306098964327, "loss": 0.4458, "step": 163610 }, { "epoch": 47.07134637514385, "grad_norm": 1.3388288021087646, "learning_rate": 0.0010585730724971233, "loss": 0.4442, "step": 163620 }, { "epoch": 47.07422324510932, "grad_norm": 1.312165379524231, "learning_rate": 0.0010585155350978136, "loss": 0.4439, "step": 163630 }, { "epoch": 47.0771001150748, "grad_norm": 0.9839030504226685, "learning_rate": 0.001058457997698504, "loss": 0.5035, "step": 163640 }, { "epoch": 47.079976985040275, "grad_norm": 1.3998769521713257, "learning_rate": 0.0010584004602991945, "loss": 0.6564, "step": 163650 }, { "epoch": 47.08285385500575, "grad_norm": 1.4483225345611572, "learning_rate": 0.0010583429228998849, "loss": 0.5336, "step": 163660 }, { "epoch": 47.08573072497123, "grad_norm": 1.0208423137664795, "learning_rate": 0.0010582853855005754, "loss": 0.465, "step": 163670 }, { "epoch": 47.08860759493671, "grad_norm": 0.6467624306678772, "learning_rate": 0.001058227848101266, "loss": 0.5286, "step": 163680 }, { "epoch": 47.09148446490219, "grad_norm": 2.677922248840332, "learning_rate": 0.0010581703107019563, "loss": 0.5775, "step": 163690 }, { "epoch": 47.09436133486766, "grad_norm": 1.236204743385315, "learning_rate": 0.0010581127733026467, "loss": 0.5151, "step": 163700 }, { "epoch": 47.09723820483314, "grad_norm": 0.887215256690979, "learning_rate": 0.0010580552359033372, "loss": 0.4963, "step": 163710 }, { "epoch": 47.100115074798616, "grad_norm": 1.4231785535812378, "learning_rate": 0.0010579976985040276, "loss": 0.5311, "step": 163720 }, { "epoch": 47.1029919447641, "grad_norm": 1.0478121042251587, "learning_rate": 0.0010579401611047182, "loss": 0.5771, "step": 163730 }, { "epoch": 47.105868814729575, "grad_norm": 1.3140208721160889, "learning_rate": 0.0010578826237054085, "loss": 0.5693, "step": 163740 }, { "epoch": 47.10874568469505, "grad_norm": 1.7613706588745117, "learning_rate": 0.001057825086306099, "loss": 0.6173, "step": 163750 }, { "epoch": 47.11162255466053, "grad_norm": 1.4309080839157104, "learning_rate": 0.0010577675489067894, "loss": 0.566, "step": 163760 }, { "epoch": 47.114499424626004, "grad_norm": 2.880509853363037, "learning_rate": 0.0010577100115074798, "loss": 0.5051, "step": 163770 }, { "epoch": 47.11737629459149, "grad_norm": 1.5528658628463745, "learning_rate": 0.0010576524741081703, "loss": 0.425, "step": 163780 }, { "epoch": 47.120253164556964, "grad_norm": 1.1611511707305908, "learning_rate": 0.001057594936708861, "loss": 0.5288, "step": 163790 }, { "epoch": 47.12313003452244, "grad_norm": 1.7433960437774658, "learning_rate": 0.0010575373993095512, "loss": 0.6374, "step": 163800 }, { "epoch": 47.126006904487916, "grad_norm": 1.1644072532653809, "learning_rate": 0.0010574798619102418, "loss": 0.4636, "step": 163810 }, { "epoch": 47.12888377445339, "grad_norm": 1.029341697692871, "learning_rate": 0.0010574223245109322, "loss": 0.5125, "step": 163820 }, { "epoch": 47.131760644418875, "grad_norm": 1.181247591972351, "learning_rate": 0.0010573647871116225, "loss": 0.4463, "step": 163830 }, { "epoch": 47.13463751438435, "grad_norm": 1.3574796915054321, "learning_rate": 0.001057307249712313, "loss": 0.4891, "step": 163840 }, { "epoch": 47.13751438434983, "grad_norm": 1.2846225500106812, "learning_rate": 0.0010572497123130034, "loss": 0.5678, "step": 163850 }, { "epoch": 47.140391254315304, "grad_norm": 1.001835584640503, "learning_rate": 0.001057192174913694, "loss": 0.486, "step": 163860 }, { "epoch": 47.14326812428078, "grad_norm": 0.8794318437576294, "learning_rate": 0.0010571346375143845, "loss": 0.5667, "step": 163870 }, { "epoch": 47.146144994246264, "grad_norm": 0.6534110903739929, "learning_rate": 0.0010570771001150747, "loss": 0.4483, "step": 163880 }, { "epoch": 47.14902186421174, "grad_norm": 1.311582088470459, "learning_rate": 0.0010570195627157652, "loss": 0.5594, "step": 163890 }, { "epoch": 47.151898734177216, "grad_norm": 1.4227508306503296, "learning_rate": 0.0010569620253164558, "loss": 0.6068, "step": 163900 }, { "epoch": 47.15477560414269, "grad_norm": 2.0143041610717773, "learning_rate": 0.0010569044879171461, "loss": 0.5258, "step": 163910 }, { "epoch": 47.15765247410817, "grad_norm": 1.7554956674575806, "learning_rate": 0.0010568469505178367, "loss": 0.4865, "step": 163920 }, { "epoch": 47.160529344073645, "grad_norm": 1.513532042503357, "learning_rate": 0.0010567894131185273, "loss": 0.5822, "step": 163930 }, { "epoch": 47.16340621403913, "grad_norm": 0.8994565010070801, "learning_rate": 0.0010567318757192174, "loss": 0.5717, "step": 163940 }, { "epoch": 47.166283084004604, "grad_norm": 1.1615585088729858, "learning_rate": 0.001056674338319908, "loss": 0.654, "step": 163950 }, { "epoch": 47.16915995397008, "grad_norm": 2.1878113746643066, "learning_rate": 0.0010566168009205983, "loss": 0.7125, "step": 163960 }, { "epoch": 47.17203682393556, "grad_norm": 1.5101070404052734, "learning_rate": 0.0010565592635212889, "loss": 0.5125, "step": 163970 }, { "epoch": 47.17491369390103, "grad_norm": 1.0171608924865723, "learning_rate": 0.0010565017261219794, "loss": 0.4345, "step": 163980 }, { "epoch": 47.177790563866516, "grad_norm": 1.5702414512634277, "learning_rate": 0.0010564441887226696, "loss": 0.6207, "step": 163990 }, { "epoch": 47.18066743383199, "grad_norm": 1.1399999856948853, "learning_rate": 0.0010563866513233601, "loss": 0.5598, "step": 164000 }, { "epoch": 47.18354430379747, "grad_norm": 1.178849458694458, "learning_rate": 0.0010563291139240507, "loss": 0.5566, "step": 164010 }, { "epoch": 47.186421173762945, "grad_norm": 1.304891586303711, "learning_rate": 0.001056271576524741, "loss": 0.6653, "step": 164020 }, { "epoch": 47.18929804372842, "grad_norm": 2.092653751373291, "learning_rate": 0.0010562140391254316, "loss": 0.516, "step": 164030 }, { "epoch": 47.192174913693904, "grad_norm": 1.1969285011291504, "learning_rate": 0.0010561565017261222, "loss": 0.6248, "step": 164040 }, { "epoch": 47.19505178365938, "grad_norm": 1.5940313339233398, "learning_rate": 0.0010560989643268123, "loss": 0.483, "step": 164050 }, { "epoch": 47.19792865362486, "grad_norm": 0.8768670558929443, "learning_rate": 0.0010560414269275029, "loss": 0.5444, "step": 164060 }, { "epoch": 47.20080552359033, "grad_norm": 2.1177737712860107, "learning_rate": 0.0010559838895281932, "loss": 0.5335, "step": 164070 }, { "epoch": 47.20368239355581, "grad_norm": 1.1986371278762817, "learning_rate": 0.0010559263521288838, "loss": 0.5425, "step": 164080 }, { "epoch": 47.20655926352129, "grad_norm": 1.1117286682128906, "learning_rate": 0.0010558688147295743, "loss": 0.6583, "step": 164090 }, { "epoch": 47.20943613348677, "grad_norm": 0.7816566228866577, "learning_rate": 0.0010558112773302647, "loss": 0.4322, "step": 164100 }, { "epoch": 47.212313003452245, "grad_norm": 1.7916573286056519, "learning_rate": 0.001055753739930955, "loss": 0.4911, "step": 164110 }, { "epoch": 47.21518987341772, "grad_norm": 1.005212426185608, "learning_rate": 0.0010556962025316456, "loss": 0.6313, "step": 164120 }, { "epoch": 47.2180667433832, "grad_norm": 1.2774686813354492, "learning_rate": 0.001055638665132336, "loss": 0.6377, "step": 164130 }, { "epoch": 47.22094361334867, "grad_norm": 2.496410846710205, "learning_rate": 0.0010555811277330265, "loss": 0.568, "step": 164140 }, { "epoch": 47.22382048331416, "grad_norm": 1.501028060913086, "learning_rate": 0.001055523590333717, "loss": 0.4619, "step": 164150 }, { "epoch": 47.22669735327963, "grad_norm": 1.5429483652114868, "learning_rate": 0.0010554660529344074, "loss": 0.5215, "step": 164160 }, { "epoch": 47.22957422324511, "grad_norm": 1.7408391237258911, "learning_rate": 0.0010554085155350978, "loss": 0.5637, "step": 164170 }, { "epoch": 47.232451093210585, "grad_norm": 1.5576696395874023, "learning_rate": 0.0010553509781357881, "loss": 0.5848, "step": 164180 }, { "epoch": 47.23532796317606, "grad_norm": 1.1932200193405151, "learning_rate": 0.0010552934407364787, "loss": 0.5372, "step": 164190 }, { "epoch": 47.238204833141545, "grad_norm": 1.7546640634536743, "learning_rate": 0.0010552359033371692, "loss": 0.5985, "step": 164200 }, { "epoch": 47.24108170310702, "grad_norm": 0.7505480647087097, "learning_rate": 0.0010551783659378596, "loss": 0.5871, "step": 164210 }, { "epoch": 47.2439585730725, "grad_norm": 1.6113059520721436, "learning_rate": 0.0010551208285385502, "loss": 0.7339, "step": 164220 }, { "epoch": 47.24683544303797, "grad_norm": 1.323372721672058, "learning_rate": 0.0010550632911392405, "loss": 0.5142, "step": 164230 }, { "epoch": 47.24971231300345, "grad_norm": 1.7361674308776855, "learning_rate": 0.0010550057537399308, "loss": 0.6342, "step": 164240 }, { "epoch": 47.25258918296893, "grad_norm": 1.9775248765945435, "learning_rate": 0.0010549482163406214, "loss": 0.5607, "step": 164250 }, { "epoch": 47.25546605293441, "grad_norm": 1.6142746210098267, "learning_rate": 0.001054890678941312, "loss": 0.5197, "step": 164260 }, { "epoch": 47.258342922899885, "grad_norm": 0.7664133906364441, "learning_rate": 0.0010548331415420023, "loss": 0.5403, "step": 164270 }, { "epoch": 47.26121979286536, "grad_norm": 1.0425200462341309, "learning_rate": 0.0010547756041426929, "loss": 0.5747, "step": 164280 }, { "epoch": 47.26409666283084, "grad_norm": 1.005733847618103, "learning_rate": 0.0010547180667433832, "loss": 0.4769, "step": 164290 }, { "epoch": 47.26697353279632, "grad_norm": 0.9385311007499695, "learning_rate": 0.0010546605293440736, "loss": 0.4445, "step": 164300 }, { "epoch": 47.2698504027618, "grad_norm": 0.9419658184051514, "learning_rate": 0.0010546029919447641, "loss": 0.5007, "step": 164310 }, { "epoch": 47.27272727272727, "grad_norm": 1.3569285869598389, "learning_rate": 0.0010545454545454545, "loss": 0.5511, "step": 164320 }, { "epoch": 47.27560414269275, "grad_norm": 1.1522208452224731, "learning_rate": 0.001054487917146145, "loss": 0.5975, "step": 164330 }, { "epoch": 47.278481012658226, "grad_norm": 0.6940345168113708, "learning_rate": 0.0010544303797468356, "loss": 0.4428, "step": 164340 }, { "epoch": 47.2813578826237, "grad_norm": 1.879329800605774, "learning_rate": 0.0010543728423475258, "loss": 0.6121, "step": 164350 }, { "epoch": 47.284234752589185, "grad_norm": 2.182797431945801, "learning_rate": 0.0010543153049482163, "loss": 0.5346, "step": 164360 }, { "epoch": 47.28711162255466, "grad_norm": 2.324350595474243, "learning_rate": 0.0010542577675489069, "loss": 0.6313, "step": 164370 }, { "epoch": 47.28998849252014, "grad_norm": 0.8938006162643433, "learning_rate": 0.0010542002301495972, "loss": 0.5683, "step": 164380 }, { "epoch": 47.292865362485614, "grad_norm": 1.0695013999938965, "learning_rate": 0.0010541426927502878, "loss": 0.5493, "step": 164390 }, { "epoch": 47.29574223245109, "grad_norm": 1.3494850397109985, "learning_rate": 0.0010540851553509784, "loss": 0.5205, "step": 164400 }, { "epoch": 47.29861910241657, "grad_norm": 1.2204434871673584, "learning_rate": 0.0010540276179516685, "loss": 0.5003, "step": 164410 }, { "epoch": 47.30149597238205, "grad_norm": 1.5121856927871704, "learning_rate": 0.001053970080552359, "loss": 0.4864, "step": 164420 }, { "epoch": 47.304372842347526, "grad_norm": 1.121932029724121, "learning_rate": 0.0010539125431530494, "loss": 0.5718, "step": 164430 }, { "epoch": 47.307249712313, "grad_norm": 3.5899498462677, "learning_rate": 0.00105385500575374, "loss": 0.5941, "step": 164440 }, { "epoch": 47.31012658227848, "grad_norm": 1.0101288557052612, "learning_rate": 0.0010537974683544305, "loss": 0.6468, "step": 164450 }, { "epoch": 47.31300345224396, "grad_norm": 2.1898679733276367, "learning_rate": 0.0010537399309551209, "loss": 0.5663, "step": 164460 }, { "epoch": 47.31588032220944, "grad_norm": 0.7816730737686157, "learning_rate": 0.0010536823935558112, "loss": 0.4244, "step": 164470 }, { "epoch": 47.318757192174914, "grad_norm": 0.9162155985832214, "learning_rate": 0.0010536248561565018, "loss": 0.4396, "step": 164480 }, { "epoch": 47.32163406214039, "grad_norm": 1.5482113361358643, "learning_rate": 0.0010535673187571921, "loss": 0.5356, "step": 164490 }, { "epoch": 47.324510932105866, "grad_norm": 1.3721344470977783, "learning_rate": 0.0010535097813578827, "loss": 0.5544, "step": 164500 }, { "epoch": 47.32738780207135, "grad_norm": 0.638580858707428, "learning_rate": 0.0010534522439585733, "loss": 0.644, "step": 164510 }, { "epoch": 47.330264672036826, "grad_norm": 0.872546374797821, "learning_rate": 0.0010533947065592636, "loss": 0.4364, "step": 164520 }, { "epoch": 47.3331415420023, "grad_norm": 1.4780596494674683, "learning_rate": 0.001053337169159954, "loss": 0.5487, "step": 164530 }, { "epoch": 47.33601841196778, "grad_norm": 1.2378768920898438, "learning_rate": 0.0010532796317606443, "loss": 0.6183, "step": 164540 }, { "epoch": 47.338895281933254, "grad_norm": 1.7420910596847534, "learning_rate": 0.0010532220943613349, "loss": 0.5146, "step": 164550 }, { "epoch": 47.34177215189873, "grad_norm": 1.1529245376586914, "learning_rate": 0.0010531645569620254, "loss": 0.6881, "step": 164560 }, { "epoch": 47.344649021864214, "grad_norm": 0.6531471014022827, "learning_rate": 0.0010531070195627158, "loss": 0.5521, "step": 164570 }, { "epoch": 47.34752589182969, "grad_norm": 0.8667263388633728, "learning_rate": 0.0010530494821634063, "loss": 0.428, "step": 164580 }, { "epoch": 47.350402761795166, "grad_norm": 1.4463626146316528, "learning_rate": 0.0010529919447640967, "loss": 0.4693, "step": 164590 }, { "epoch": 47.35327963176064, "grad_norm": 1.0216143131256104, "learning_rate": 0.001052934407364787, "loss": 0.5457, "step": 164600 }, { "epoch": 47.35615650172612, "grad_norm": 1.6635942459106445, "learning_rate": 0.0010528768699654776, "loss": 0.5803, "step": 164610 }, { "epoch": 47.3590333716916, "grad_norm": 2.1688709259033203, "learning_rate": 0.0010528193325661682, "loss": 0.534, "step": 164620 }, { "epoch": 47.36191024165708, "grad_norm": 1.0662899017333984, "learning_rate": 0.0010527617951668585, "loss": 0.5908, "step": 164630 }, { "epoch": 47.364787111622555, "grad_norm": 0.7355162501335144, "learning_rate": 0.001052704257767549, "loss": 0.5581, "step": 164640 }, { "epoch": 47.36766398158803, "grad_norm": 1.0198723077774048, "learning_rate": 0.0010526467203682392, "loss": 0.763, "step": 164650 }, { "epoch": 47.37054085155351, "grad_norm": 0.9350776076316833, "learning_rate": 0.0010525891829689298, "loss": 0.471, "step": 164660 }, { "epoch": 47.37341772151899, "grad_norm": 1.5500578880310059, "learning_rate": 0.0010525316455696203, "loss": 0.5262, "step": 164670 }, { "epoch": 47.376294591484466, "grad_norm": 2.269599437713623, "learning_rate": 0.0010524741081703107, "loss": 0.6531, "step": 164680 }, { "epoch": 47.37917146144994, "grad_norm": 1.579509973526001, "learning_rate": 0.0010524165707710012, "loss": 0.5174, "step": 164690 }, { "epoch": 47.38204833141542, "grad_norm": 1.6687334775924683, "learning_rate": 0.0010523590333716918, "loss": 0.5088, "step": 164700 }, { "epoch": 47.384925201380895, "grad_norm": 0.7288058400154114, "learning_rate": 0.001052301495972382, "loss": 0.5815, "step": 164710 }, { "epoch": 47.38780207134638, "grad_norm": 1.0209779739379883, "learning_rate": 0.0010522439585730725, "loss": 0.6023, "step": 164720 }, { "epoch": 47.390678941311855, "grad_norm": 1.2549083232879639, "learning_rate": 0.001052186421173763, "loss": 0.5831, "step": 164730 }, { "epoch": 47.39355581127733, "grad_norm": 2.5663301944732666, "learning_rate": 0.0010521288837744534, "loss": 0.6767, "step": 164740 }, { "epoch": 47.39643268124281, "grad_norm": 2.1939761638641357, "learning_rate": 0.001052071346375144, "loss": 0.6025, "step": 164750 }, { "epoch": 47.39930955120828, "grad_norm": 1.557503342628479, "learning_rate": 0.001052013808975834, "loss": 0.6341, "step": 164760 }, { "epoch": 47.40218642117377, "grad_norm": 1.1520839929580688, "learning_rate": 0.0010519562715765247, "loss": 0.4622, "step": 164770 }, { "epoch": 47.40506329113924, "grad_norm": 1.1846516132354736, "learning_rate": 0.0010518987341772152, "loss": 0.6235, "step": 164780 }, { "epoch": 47.40794016110472, "grad_norm": 1.383567452430725, "learning_rate": 0.0010518411967779056, "loss": 0.5401, "step": 164790 }, { "epoch": 47.410817031070195, "grad_norm": 2.135303020477295, "learning_rate": 0.0010517836593785961, "loss": 0.6317, "step": 164800 }, { "epoch": 47.41369390103567, "grad_norm": 1.1616837978363037, "learning_rate": 0.0010517261219792867, "loss": 0.5665, "step": 164810 }, { "epoch": 47.41657077100115, "grad_norm": 1.1323529481887817, "learning_rate": 0.0010516685845799768, "loss": 0.4854, "step": 164820 }, { "epoch": 47.41944764096663, "grad_norm": 1.3870863914489746, "learning_rate": 0.0010516110471806674, "loss": 0.6419, "step": 164830 }, { "epoch": 47.42232451093211, "grad_norm": 0.9069197773933411, "learning_rate": 0.001051553509781358, "loss": 0.4467, "step": 164840 }, { "epoch": 47.42520138089758, "grad_norm": 0.9397703409194946, "learning_rate": 0.0010514959723820483, "loss": 0.483, "step": 164850 }, { "epoch": 47.42807825086306, "grad_norm": 1.0070652961730957, "learning_rate": 0.0010514384349827389, "loss": 0.5118, "step": 164860 }, { "epoch": 47.430955120828536, "grad_norm": 1.432822346687317, "learning_rate": 0.0010513808975834292, "loss": 0.5292, "step": 164870 }, { "epoch": 47.43383199079402, "grad_norm": 0.759949266910553, "learning_rate": 0.0010513233601841196, "loss": 0.4037, "step": 164880 }, { "epoch": 47.436708860759495, "grad_norm": 1.1822904348373413, "learning_rate": 0.0010512658227848101, "loss": 0.4954, "step": 164890 }, { "epoch": 47.43958573072497, "grad_norm": 1.7066357135772705, "learning_rate": 0.0010512082853855005, "loss": 0.5361, "step": 164900 }, { "epoch": 47.44246260069045, "grad_norm": 1.2263410091400146, "learning_rate": 0.001051150747986191, "loss": 0.6294, "step": 164910 }, { "epoch": 47.445339470655924, "grad_norm": 0.9635533094406128, "learning_rate": 0.0010510932105868816, "loss": 0.6411, "step": 164920 }, { "epoch": 47.44821634062141, "grad_norm": 1.3973755836486816, "learning_rate": 0.001051035673187572, "loss": 0.5688, "step": 164930 }, { "epoch": 47.45109321058688, "grad_norm": 1.5155954360961914, "learning_rate": 0.0010509781357882623, "loss": 0.5526, "step": 164940 }, { "epoch": 47.45397008055236, "grad_norm": 1.6661046743392944, "learning_rate": 0.0010509205983889529, "loss": 0.5138, "step": 164950 }, { "epoch": 47.456846950517836, "grad_norm": 1.1243274211883545, "learning_rate": 0.0010508630609896432, "loss": 0.5628, "step": 164960 }, { "epoch": 47.45972382048331, "grad_norm": 1.6993247270584106, "learning_rate": 0.0010508055235903338, "loss": 0.4398, "step": 164970 }, { "epoch": 47.462600690448795, "grad_norm": 1.3930268287658691, "learning_rate": 0.0010507479861910243, "loss": 0.5474, "step": 164980 }, { "epoch": 47.46547756041427, "grad_norm": 1.9507346153259277, "learning_rate": 0.0010506904487917147, "loss": 0.4683, "step": 164990 }, { "epoch": 47.46835443037975, "grad_norm": 0.8248023390769958, "learning_rate": 0.001050632911392405, "loss": 0.613, "step": 165000 }, { "epoch": 47.471231300345224, "grad_norm": 0.9290661811828613, "learning_rate": 0.0010505753739930954, "loss": 0.5577, "step": 165010 }, { "epoch": 47.4741081703107, "grad_norm": 1.5932869911193848, "learning_rate": 0.001050517836593786, "loss": 0.4857, "step": 165020 }, { "epoch": 47.476985040276176, "grad_norm": 1.4294525384902954, "learning_rate": 0.0010504602991944765, "loss": 0.5736, "step": 165030 }, { "epoch": 47.47986191024166, "grad_norm": 2.002537727355957, "learning_rate": 0.0010504027617951669, "loss": 0.6009, "step": 165040 }, { "epoch": 47.482738780207136, "grad_norm": 1.7449473142623901, "learning_rate": 0.0010503452243958574, "loss": 0.4624, "step": 165050 }, { "epoch": 47.48561565017261, "grad_norm": 1.1423639059066772, "learning_rate": 0.0010502876869965478, "loss": 0.5677, "step": 165060 }, { "epoch": 47.48849252013809, "grad_norm": 1.5674021244049072, "learning_rate": 0.0010502301495972381, "loss": 0.5318, "step": 165070 }, { "epoch": 47.491369390103564, "grad_norm": 1.0238161087036133, "learning_rate": 0.0010501726121979287, "loss": 0.6327, "step": 165080 }, { "epoch": 47.49424626006905, "grad_norm": 1.1686809062957764, "learning_rate": 0.0010501150747986192, "loss": 0.5303, "step": 165090 }, { "epoch": 47.497123130034524, "grad_norm": 1.8959059715270996, "learning_rate": 0.0010500575373993096, "loss": 0.6267, "step": 165100 }, { "epoch": 47.5, "grad_norm": 0.8675440549850464, "learning_rate": 0.0010500000000000002, "loss": 0.4882, "step": 165110 }, { "epoch": 47.502876869965476, "grad_norm": 0.7265903949737549, "learning_rate": 0.0010499424626006903, "loss": 0.6538, "step": 165120 }, { "epoch": 47.50575373993095, "grad_norm": 1.5719236135482788, "learning_rate": 0.0010498849252013808, "loss": 0.5083, "step": 165130 }, { "epoch": 47.508630609896436, "grad_norm": 2.1665329933166504, "learning_rate": 0.0010498273878020714, "loss": 0.6248, "step": 165140 }, { "epoch": 47.51150747986191, "grad_norm": 2.0413243770599365, "learning_rate": 0.0010497698504027618, "loss": 0.4946, "step": 165150 }, { "epoch": 47.51438434982739, "grad_norm": 1.041872501373291, "learning_rate": 0.0010497123130034523, "loss": 0.6686, "step": 165160 }, { "epoch": 47.517261219792864, "grad_norm": 1.1566195487976074, "learning_rate": 0.0010496547756041429, "loss": 0.5411, "step": 165170 }, { "epoch": 47.52013808975834, "grad_norm": 2.4268765449523926, "learning_rate": 0.001049597238204833, "loss": 0.4904, "step": 165180 }, { "epoch": 47.523014959723824, "grad_norm": 1.5747292041778564, "learning_rate": 0.0010495397008055236, "loss": 0.5018, "step": 165190 }, { "epoch": 47.5258918296893, "grad_norm": 1.350315809249878, "learning_rate": 0.0010494821634062141, "loss": 0.6485, "step": 165200 }, { "epoch": 47.528768699654776, "grad_norm": 1.2123805284500122, "learning_rate": 0.0010494246260069045, "loss": 0.5949, "step": 165210 }, { "epoch": 47.53164556962025, "grad_norm": 1.4780060052871704, "learning_rate": 0.001049367088607595, "loss": 0.6602, "step": 165220 }, { "epoch": 47.53452243958573, "grad_norm": 1.031371831893921, "learning_rate": 0.0010493095512082854, "loss": 0.6453, "step": 165230 }, { "epoch": 47.537399309551205, "grad_norm": 1.8183748722076416, "learning_rate": 0.0010492520138089757, "loss": 0.6585, "step": 165240 }, { "epoch": 47.54027617951669, "grad_norm": 0.7811353206634521, "learning_rate": 0.0010491944764096663, "loss": 0.5643, "step": 165250 }, { "epoch": 47.543153049482164, "grad_norm": 0.6545145511627197, "learning_rate": 0.0010491369390103567, "loss": 0.5539, "step": 165260 }, { "epoch": 47.54602991944764, "grad_norm": 0.9137258529663086, "learning_rate": 0.0010490794016110472, "loss": 0.4718, "step": 165270 }, { "epoch": 47.54890678941312, "grad_norm": 1.0350275039672852, "learning_rate": 0.0010490218642117378, "loss": 0.5927, "step": 165280 }, { "epoch": 47.55178365937859, "grad_norm": 1.2628931999206543, "learning_rate": 0.0010489643268124281, "loss": 0.7046, "step": 165290 }, { "epoch": 47.554660529344076, "grad_norm": 2.9488649368286133, "learning_rate": 0.0010489067894131185, "loss": 0.5781, "step": 165300 }, { "epoch": 47.55753739930955, "grad_norm": 1.8244885206222534, "learning_rate": 0.001048849252013809, "loss": 0.4647, "step": 165310 }, { "epoch": 47.56041426927503, "grad_norm": 1.466845989227295, "learning_rate": 0.0010487917146144994, "loss": 0.4921, "step": 165320 }, { "epoch": 47.563291139240505, "grad_norm": 2.2000300884246826, "learning_rate": 0.00104873417721519, "loss": 0.5754, "step": 165330 }, { "epoch": 47.56616800920598, "grad_norm": 0.8985205292701721, "learning_rate": 0.0010486766398158803, "loss": 0.5408, "step": 165340 }, { "epoch": 47.569044879171464, "grad_norm": 1.3824225664138794, "learning_rate": 0.0010486191024165709, "loss": 0.6054, "step": 165350 }, { "epoch": 47.57192174913694, "grad_norm": 0.9131180047988892, "learning_rate": 0.0010485615650172612, "loss": 0.5307, "step": 165360 }, { "epoch": 47.57479861910242, "grad_norm": 1.1222704648971558, "learning_rate": 0.0010485040276179516, "loss": 0.5029, "step": 165370 }, { "epoch": 47.57767548906789, "grad_norm": 1.6178456544876099, "learning_rate": 0.0010484464902186421, "loss": 0.5725, "step": 165380 }, { "epoch": 47.58055235903337, "grad_norm": 1.2587913274765015, "learning_rate": 0.0010483889528193327, "loss": 0.5476, "step": 165390 }, { "epoch": 47.58342922899885, "grad_norm": 3.2217350006103516, "learning_rate": 0.001048331415420023, "loss": 0.4836, "step": 165400 }, { "epoch": 47.58630609896433, "grad_norm": 1.8140406608581543, "learning_rate": 0.0010482738780207136, "loss": 0.613, "step": 165410 }, { "epoch": 47.589182968929805, "grad_norm": 0.905642032623291, "learning_rate": 0.001048216340621404, "loss": 0.5533, "step": 165420 }, { "epoch": 47.59205983889528, "grad_norm": 0.9394773840904236, "learning_rate": 0.0010481588032220943, "loss": 0.5137, "step": 165430 }, { "epoch": 47.59493670886076, "grad_norm": 1.4794535636901855, "learning_rate": 0.0010481012658227849, "loss": 0.5623, "step": 165440 }, { "epoch": 47.59781357882623, "grad_norm": 0.9301053285598755, "learning_rate": 0.0010480437284234752, "loss": 0.5601, "step": 165450 }, { "epoch": 47.60069044879172, "grad_norm": 1.3370383977890015, "learning_rate": 0.0010479861910241658, "loss": 0.5131, "step": 165460 }, { "epoch": 47.60356731875719, "grad_norm": 1.7545841932296753, "learning_rate": 0.0010479286536248563, "loss": 0.7253, "step": 165470 }, { "epoch": 47.60644418872267, "grad_norm": 1.9909175634384155, "learning_rate": 0.0010478711162255465, "loss": 0.6941, "step": 165480 }, { "epoch": 47.609321058688145, "grad_norm": 1.5299787521362305, "learning_rate": 0.001047813578826237, "loss": 0.5127, "step": 165490 }, { "epoch": 47.61219792865362, "grad_norm": 1.2347289323806763, "learning_rate": 0.0010477560414269276, "loss": 0.5457, "step": 165500 }, { "epoch": 47.615074798619105, "grad_norm": 1.0952566862106323, "learning_rate": 0.001047698504027618, "loss": 0.4456, "step": 165510 }, { "epoch": 47.61795166858458, "grad_norm": 1.5514912605285645, "learning_rate": 0.0010476409666283085, "loss": 0.7431, "step": 165520 }, { "epoch": 47.62082853855006, "grad_norm": 1.2741225957870483, "learning_rate": 0.001047583429228999, "loss": 0.6624, "step": 165530 }, { "epoch": 47.623705408515534, "grad_norm": 1.6566722393035889, "learning_rate": 0.0010475258918296892, "loss": 0.6904, "step": 165540 }, { "epoch": 47.62658227848101, "grad_norm": 1.4273874759674072, "learning_rate": 0.0010474683544303798, "loss": 0.5608, "step": 165550 }, { "epoch": 47.62945914844649, "grad_norm": 1.7968913316726685, "learning_rate": 0.00104741081703107, "loss": 0.5358, "step": 165560 }, { "epoch": 47.63233601841197, "grad_norm": 1.6074771881103516, "learning_rate": 0.0010473532796317607, "loss": 0.6769, "step": 165570 }, { "epoch": 47.635212888377445, "grad_norm": 1.1731418371200562, "learning_rate": 0.0010472957422324512, "loss": 0.6761, "step": 165580 }, { "epoch": 47.63808975834292, "grad_norm": 1.1567972898483276, "learning_rate": 0.0010472382048331414, "loss": 0.7611, "step": 165590 }, { "epoch": 47.6409666283084, "grad_norm": 1.978976845741272, "learning_rate": 0.001047180667433832, "loss": 0.6709, "step": 165600 }, { "epoch": 47.64384349827388, "grad_norm": 1.0468331575393677, "learning_rate": 0.0010471231300345225, "loss": 0.5448, "step": 165610 }, { "epoch": 47.64672036823936, "grad_norm": 0.6871417164802551, "learning_rate": 0.0010470655926352128, "loss": 0.4972, "step": 165620 }, { "epoch": 47.649597238204834, "grad_norm": 1.067103624343872, "learning_rate": 0.0010470080552359034, "loss": 0.5626, "step": 165630 }, { "epoch": 47.65247410817031, "grad_norm": 1.4811075925827026, "learning_rate": 0.001046950517836594, "loss": 0.4666, "step": 165640 }, { "epoch": 47.655350978135786, "grad_norm": 0.888751745223999, "learning_rate": 0.001046892980437284, "loss": 0.5718, "step": 165650 }, { "epoch": 47.65822784810126, "grad_norm": 3.47145414352417, "learning_rate": 0.0010468354430379747, "loss": 0.5725, "step": 165660 }, { "epoch": 47.661104718066746, "grad_norm": 2.2195065021514893, "learning_rate": 0.0010467779056386652, "loss": 0.5394, "step": 165670 }, { "epoch": 47.66398158803222, "grad_norm": 1.4777288436889648, "learning_rate": 0.0010467203682393556, "loss": 0.687, "step": 165680 }, { "epoch": 47.6668584579977, "grad_norm": 0.8628817796707153, "learning_rate": 0.0010466628308400461, "loss": 0.5336, "step": 165690 }, { "epoch": 47.669735327963174, "grad_norm": 1.1397119760513306, "learning_rate": 0.0010466052934407365, "loss": 0.5884, "step": 165700 }, { "epoch": 47.67261219792865, "grad_norm": 1.1653987169265747, "learning_rate": 0.0010465477560414268, "loss": 0.5534, "step": 165710 }, { "epoch": 47.675489067894134, "grad_norm": 1.1091614961624146, "learning_rate": 0.0010464902186421174, "loss": 0.5639, "step": 165720 }, { "epoch": 47.67836593785961, "grad_norm": 1.3817834854125977, "learning_rate": 0.0010464326812428077, "loss": 0.6328, "step": 165730 }, { "epoch": 47.681242807825086, "grad_norm": 1.1112921237945557, "learning_rate": 0.0010463751438434983, "loss": 0.471, "step": 165740 }, { "epoch": 47.68411967779056, "grad_norm": 1.2249704599380493, "learning_rate": 0.0010463176064441889, "loss": 0.6509, "step": 165750 }, { "epoch": 47.68699654775604, "grad_norm": 1.1403038501739502, "learning_rate": 0.0010462600690448792, "loss": 0.4881, "step": 165760 }, { "epoch": 47.68987341772152, "grad_norm": 1.541712760925293, "learning_rate": 0.0010462025316455696, "loss": 0.5556, "step": 165770 }, { "epoch": 47.692750287687, "grad_norm": 1.6083253622055054, "learning_rate": 0.0010461449942462601, "loss": 0.693, "step": 165780 }, { "epoch": 47.695627157652474, "grad_norm": 1.508802890777588, "learning_rate": 0.0010460874568469505, "loss": 0.5521, "step": 165790 }, { "epoch": 47.69850402761795, "grad_norm": 1.0257365703582764, "learning_rate": 0.001046029919447641, "loss": 0.631, "step": 165800 }, { "epoch": 47.70138089758343, "grad_norm": 1.5645016431808472, "learning_rate": 0.0010459723820483314, "loss": 0.4673, "step": 165810 }, { "epoch": 47.70425776754891, "grad_norm": 2.2588272094726562, "learning_rate": 0.001045914844649022, "loss": 0.7009, "step": 165820 }, { "epoch": 47.707134637514386, "grad_norm": 1.5770955085754395, "learning_rate": 0.0010458573072497123, "loss": 0.6056, "step": 165830 }, { "epoch": 47.71001150747986, "grad_norm": 0.8771629929542542, "learning_rate": 0.0010457997698504026, "loss": 0.6181, "step": 165840 }, { "epoch": 47.71288837744534, "grad_norm": 1.1131532192230225, "learning_rate": 0.0010457422324510932, "loss": 0.5178, "step": 165850 }, { "epoch": 47.715765247410815, "grad_norm": 0.6263073086738586, "learning_rate": 0.0010456846950517838, "loss": 0.6086, "step": 165860 }, { "epoch": 47.7186421173763, "grad_norm": 1.3347439765930176, "learning_rate": 0.0010456271576524741, "loss": 0.581, "step": 165870 }, { "epoch": 47.721518987341774, "grad_norm": 0.646705150604248, "learning_rate": 0.0010455696202531647, "loss": 0.6397, "step": 165880 }, { "epoch": 47.72439585730725, "grad_norm": 1.1361393928527832, "learning_rate": 0.001045512082853855, "loss": 0.6628, "step": 165890 }, { "epoch": 47.72727272727273, "grad_norm": 0.9410201907157898, "learning_rate": 0.0010454545454545454, "loss": 0.5553, "step": 165900 }, { "epoch": 47.7301495972382, "grad_norm": 1.9405232667922974, "learning_rate": 0.001045397008055236, "loss": 0.5473, "step": 165910 }, { "epoch": 47.73302646720368, "grad_norm": 1.4832037687301636, "learning_rate": 0.0010453394706559263, "loss": 0.5713, "step": 165920 }, { "epoch": 47.73590333716916, "grad_norm": 1.3286876678466797, "learning_rate": 0.0010452819332566169, "loss": 0.5502, "step": 165930 }, { "epoch": 47.73878020713464, "grad_norm": 1.5977500677108765, "learning_rate": 0.0010452243958573074, "loss": 0.6087, "step": 165940 }, { "epoch": 47.741657077100115, "grad_norm": 1.0733966827392578, "learning_rate": 0.0010451668584579975, "loss": 0.585, "step": 165950 }, { "epoch": 47.74453394706559, "grad_norm": 1.5066087245941162, "learning_rate": 0.0010451093210586881, "loss": 0.5802, "step": 165960 }, { "epoch": 47.74741081703107, "grad_norm": 1.255497694015503, "learning_rate": 0.0010450517836593787, "loss": 0.5521, "step": 165970 }, { "epoch": 47.75028768699655, "grad_norm": 0.710789144039154, "learning_rate": 0.001044994246260069, "loss": 0.5329, "step": 165980 }, { "epoch": 47.75316455696203, "grad_norm": 2.738111734390259, "learning_rate": 0.0010449367088607596, "loss": 0.6609, "step": 165990 }, { "epoch": 47.7560414269275, "grad_norm": 1.3627870082855225, "learning_rate": 0.0010448791714614501, "loss": 0.4784, "step": 166000 }, { "epoch": 47.75891829689298, "grad_norm": 2.861319065093994, "learning_rate": 0.0010448216340621403, "loss": 0.5911, "step": 166010 }, { "epoch": 47.761795166858455, "grad_norm": 1.3252156972885132, "learning_rate": 0.0010447640966628308, "loss": 0.5233, "step": 166020 }, { "epoch": 47.76467203682394, "grad_norm": 0.9675438404083252, "learning_rate": 0.0010447065592635212, "loss": 0.6037, "step": 166030 }, { "epoch": 47.767548906789415, "grad_norm": 1.7514102458953857, "learning_rate": 0.0010446490218642118, "loss": 0.4996, "step": 166040 }, { "epoch": 47.77042577675489, "grad_norm": 1.1529704332351685, "learning_rate": 0.0010445914844649023, "loss": 0.6405, "step": 166050 }, { "epoch": 47.77330264672037, "grad_norm": 0.6300958395004272, "learning_rate": 0.0010445339470655927, "loss": 0.6318, "step": 166060 }, { "epoch": 47.77617951668584, "grad_norm": 1.7961745262145996, "learning_rate": 0.001044476409666283, "loss": 0.4429, "step": 166070 }, { "epoch": 47.77905638665133, "grad_norm": 0.8295326232910156, "learning_rate": 0.0010444188722669736, "loss": 0.4752, "step": 166080 }, { "epoch": 47.7819332566168, "grad_norm": 1.696450114250183, "learning_rate": 0.001044361334867664, "loss": 0.7028, "step": 166090 }, { "epoch": 47.78481012658228, "grad_norm": 1.0797125101089478, "learning_rate": 0.0010443037974683545, "loss": 0.4754, "step": 166100 }, { "epoch": 47.787686996547755, "grad_norm": 1.5161687135696411, "learning_rate": 0.001044246260069045, "loss": 0.6284, "step": 166110 }, { "epoch": 47.79056386651323, "grad_norm": 1.2404001951217651, "learning_rate": 0.0010441887226697354, "loss": 0.6206, "step": 166120 }, { "epoch": 47.79344073647871, "grad_norm": 0.8380745053291321, "learning_rate": 0.0010441311852704257, "loss": 0.5806, "step": 166130 }, { "epoch": 47.79631760644419, "grad_norm": 1.6635388135910034, "learning_rate": 0.001044073647871116, "loss": 0.7219, "step": 166140 }, { "epoch": 47.79919447640967, "grad_norm": 1.2313199043273926, "learning_rate": 0.0010440161104718067, "loss": 0.5315, "step": 166150 }, { "epoch": 47.80207134637514, "grad_norm": 1.7043474912643433, "learning_rate": 0.0010439585730724972, "loss": 0.4961, "step": 166160 }, { "epoch": 47.80494821634062, "grad_norm": 0.8413060307502747, "learning_rate": 0.0010439010356731876, "loss": 0.4931, "step": 166170 }, { "epoch": 47.807825086306096, "grad_norm": 2.558760166168213, "learning_rate": 0.0010438434982738781, "loss": 0.5381, "step": 166180 }, { "epoch": 47.81070195627158, "grad_norm": 0.8449182510375977, "learning_rate": 0.0010437859608745685, "loss": 0.5403, "step": 166190 }, { "epoch": 47.813578826237055, "grad_norm": 1.0603275299072266, "learning_rate": 0.0010437284234752588, "loss": 0.4784, "step": 166200 }, { "epoch": 47.81645569620253, "grad_norm": 0.6854394674301147, "learning_rate": 0.0010436708860759494, "loss": 0.543, "step": 166210 }, { "epoch": 47.81933256616801, "grad_norm": 0.8995063900947571, "learning_rate": 0.00104361334867664, "loss": 0.5582, "step": 166220 }, { "epoch": 47.822209436133484, "grad_norm": 1.3649144172668457, "learning_rate": 0.0010435558112773303, "loss": 0.5758, "step": 166230 }, { "epoch": 47.82508630609897, "grad_norm": 2.1022112369537354, "learning_rate": 0.0010434982738780209, "loss": 0.548, "step": 166240 }, { "epoch": 47.82796317606444, "grad_norm": 1.1924940347671509, "learning_rate": 0.0010434407364787112, "loss": 0.5709, "step": 166250 }, { "epoch": 47.83084004602992, "grad_norm": 1.1735906600952148, "learning_rate": 0.0010433831990794016, "loss": 0.5125, "step": 166260 }, { "epoch": 47.833716915995396, "grad_norm": 0.6556110382080078, "learning_rate": 0.0010433256616800921, "loss": 0.655, "step": 166270 }, { "epoch": 47.83659378596087, "grad_norm": 1.5740282535552979, "learning_rate": 0.0010432681242807825, "loss": 0.4985, "step": 166280 }, { "epoch": 47.839470655926355, "grad_norm": 1.1827894449234009, "learning_rate": 0.001043210586881473, "loss": 0.5884, "step": 166290 }, { "epoch": 47.84234752589183, "grad_norm": 1.6006317138671875, "learning_rate": 0.0010431530494821636, "loss": 0.6724, "step": 166300 }, { "epoch": 47.84522439585731, "grad_norm": 0.976151168346405, "learning_rate": 0.0010430955120828537, "loss": 0.4836, "step": 166310 }, { "epoch": 47.848101265822784, "grad_norm": 2.8052995204925537, "learning_rate": 0.0010430379746835443, "loss": 0.64, "step": 166320 }, { "epoch": 47.85097813578826, "grad_norm": 1.0476576089859009, "learning_rate": 0.0010429804372842349, "loss": 0.4539, "step": 166330 }, { "epoch": 47.85385500575374, "grad_norm": 0.820130467414856, "learning_rate": 0.0010429228998849252, "loss": 0.5522, "step": 166340 }, { "epoch": 47.85673187571922, "grad_norm": 1.1544362306594849, "learning_rate": 0.0010428653624856158, "loss": 0.5828, "step": 166350 }, { "epoch": 47.859608745684696, "grad_norm": 1.7193955183029175, "learning_rate": 0.0010428078250863063, "loss": 0.6694, "step": 166360 }, { "epoch": 47.86248561565017, "grad_norm": 1.8202065229415894, "learning_rate": 0.0010427502876869965, "loss": 0.6272, "step": 166370 }, { "epoch": 47.86536248561565, "grad_norm": 1.0097213983535767, "learning_rate": 0.001042692750287687, "loss": 0.7243, "step": 166380 }, { "epoch": 47.868239355581125, "grad_norm": 2.5571649074554443, "learning_rate": 0.0010426352128883774, "loss": 0.505, "step": 166390 }, { "epoch": 47.87111622554661, "grad_norm": 2.424384355545044, "learning_rate": 0.001042577675489068, "loss": 0.6298, "step": 166400 }, { "epoch": 47.873993095512084, "grad_norm": 1.557895302772522, "learning_rate": 0.0010425201380897585, "loss": 0.6266, "step": 166410 }, { "epoch": 47.87686996547756, "grad_norm": 1.6941587924957275, "learning_rate": 0.0010424626006904486, "loss": 0.578, "step": 166420 }, { "epoch": 47.879746835443036, "grad_norm": 1.152771234512329, "learning_rate": 0.0010424050632911392, "loss": 0.5479, "step": 166430 }, { "epoch": 47.88262370540851, "grad_norm": 0.9085242748260498, "learning_rate": 0.0010423475258918298, "loss": 0.5452, "step": 166440 }, { "epoch": 47.885500575373996, "grad_norm": 1.6674823760986328, "learning_rate": 0.00104228998849252, "loss": 0.5484, "step": 166450 }, { "epoch": 47.88837744533947, "grad_norm": 1.2187750339508057, "learning_rate": 0.0010422324510932107, "loss": 0.6225, "step": 166460 }, { "epoch": 47.89125431530495, "grad_norm": 0.8733391165733337, "learning_rate": 0.0010421749136939012, "loss": 0.6316, "step": 166470 }, { "epoch": 47.894131185270425, "grad_norm": 1.3407857418060303, "learning_rate": 0.0010421173762945914, "loss": 0.6603, "step": 166480 }, { "epoch": 47.8970080552359, "grad_norm": 1.4551026821136475, "learning_rate": 0.001042059838895282, "loss": 0.5843, "step": 166490 }, { "epoch": 47.899884925201384, "grad_norm": 1.9077023267745972, "learning_rate": 0.0010420023014959723, "loss": 0.7316, "step": 166500 }, { "epoch": 47.90276179516686, "grad_norm": 1.025520920753479, "learning_rate": 0.0010419447640966628, "loss": 0.5519, "step": 166510 }, { "epoch": 47.90563866513234, "grad_norm": 1.8400686979293823, "learning_rate": 0.0010418872266973534, "loss": 0.6013, "step": 166520 }, { "epoch": 47.90851553509781, "grad_norm": 0.7540651559829712, "learning_rate": 0.0010418296892980437, "loss": 0.6354, "step": 166530 }, { "epoch": 47.91139240506329, "grad_norm": 0.808590829372406, "learning_rate": 0.001041772151898734, "loss": 0.5933, "step": 166540 }, { "epoch": 47.91426927502877, "grad_norm": 0.9695663452148438, "learning_rate": 0.0010417146144994247, "loss": 0.5533, "step": 166550 }, { "epoch": 47.91714614499425, "grad_norm": 1.2204288244247437, "learning_rate": 0.001041657077100115, "loss": 0.6439, "step": 166560 }, { "epoch": 47.920023014959725, "grad_norm": 1.257068157196045, "learning_rate": 0.0010415995397008056, "loss": 0.5601, "step": 166570 }, { "epoch": 47.9228998849252, "grad_norm": 1.3564246892929077, "learning_rate": 0.0010415420023014961, "loss": 0.4137, "step": 166580 }, { "epoch": 47.92577675489068, "grad_norm": 1.6491000652313232, "learning_rate": 0.0010414844649021865, "loss": 0.5769, "step": 166590 }, { "epoch": 47.92865362485615, "grad_norm": 2.3038368225097656, "learning_rate": 0.0010414269275028768, "loss": 0.6494, "step": 166600 }, { "epoch": 47.93153049482164, "grad_norm": 0.8871062994003296, "learning_rate": 0.0010413693901035672, "loss": 0.5602, "step": 166610 }, { "epoch": 47.93440736478711, "grad_norm": 1.237492561340332, "learning_rate": 0.0010413118527042577, "loss": 0.5304, "step": 166620 }, { "epoch": 47.93728423475259, "grad_norm": 1.021365761756897, "learning_rate": 0.0010412543153049483, "loss": 0.5307, "step": 166630 }, { "epoch": 47.940161104718065, "grad_norm": 1.2890387773513794, "learning_rate": 0.0010411967779056387, "loss": 0.563, "step": 166640 }, { "epoch": 47.94303797468354, "grad_norm": 1.9997649192810059, "learning_rate": 0.0010411392405063292, "loss": 0.5668, "step": 166650 }, { "epoch": 47.945914844649025, "grad_norm": 1.0985702276229858, "learning_rate": 0.0010410817031070196, "loss": 0.508, "step": 166660 }, { "epoch": 47.9487917146145, "grad_norm": 1.5955537557601929, "learning_rate": 0.00104102416570771, "loss": 0.6434, "step": 166670 }, { "epoch": 47.95166858457998, "grad_norm": 1.302445650100708, "learning_rate": 0.0010409666283084005, "loss": 0.5429, "step": 166680 }, { "epoch": 47.95454545454545, "grad_norm": 1.7442957162857056, "learning_rate": 0.001040909090909091, "loss": 0.618, "step": 166690 }, { "epoch": 47.95742232451093, "grad_norm": 0.9152805805206299, "learning_rate": 0.0010408515535097814, "loss": 0.6146, "step": 166700 }, { "epoch": 47.96029919447641, "grad_norm": 1.365240216255188, "learning_rate": 0.001040794016110472, "loss": 0.5972, "step": 166710 }, { "epoch": 47.96317606444189, "grad_norm": 1.7340383529663086, "learning_rate": 0.001040736478711162, "loss": 0.5931, "step": 166720 }, { "epoch": 47.966052934407365, "grad_norm": 1.1359506845474243, "learning_rate": 0.0010406789413118526, "loss": 0.6222, "step": 166730 }, { "epoch": 47.96892980437284, "grad_norm": 0.9794079065322876, "learning_rate": 0.0010406214039125432, "loss": 0.5612, "step": 166740 }, { "epoch": 47.97180667433832, "grad_norm": 1.1519250869750977, "learning_rate": 0.0010405638665132336, "loss": 0.564, "step": 166750 }, { "epoch": 47.9746835443038, "grad_norm": 0.6296606063842773, "learning_rate": 0.0010405063291139241, "loss": 0.6404, "step": 166760 }, { "epoch": 47.97756041426928, "grad_norm": 1.2406121492385864, "learning_rate": 0.0010404487917146147, "loss": 0.4475, "step": 166770 }, { "epoch": 47.98043728423475, "grad_norm": 1.9927030801773071, "learning_rate": 0.0010403912543153048, "loss": 0.6419, "step": 166780 }, { "epoch": 47.98331415420023, "grad_norm": 1.6990946531295776, "learning_rate": 0.0010403337169159954, "loss": 0.7165, "step": 166790 }, { "epoch": 47.986191024165706, "grad_norm": 1.0486977100372314, "learning_rate": 0.001040276179516686, "loss": 0.5645, "step": 166800 }, { "epoch": 47.98906789413118, "grad_norm": 1.0470349788665771, "learning_rate": 0.0010402186421173763, "loss": 0.4927, "step": 166810 }, { "epoch": 47.991944764096665, "grad_norm": 0.9127523303031921, "learning_rate": 0.0010401611047180669, "loss": 0.6895, "step": 166820 }, { "epoch": 47.99482163406214, "grad_norm": 1.5220340490341187, "learning_rate": 0.0010401035673187572, "loss": 0.6438, "step": 166830 }, { "epoch": 47.99769850402762, "grad_norm": 0.9342548251152039, "learning_rate": 0.0010400460299194475, "loss": 0.6749, "step": 166840 }, { "epoch": 48.000575373993094, "grad_norm": 2.9493792057037354, "learning_rate": 0.001039988492520138, "loss": 0.7132, "step": 166850 }, { "epoch": 48.00345224395857, "grad_norm": 1.426220178604126, "learning_rate": 0.0010399309551208285, "loss": 0.3645, "step": 166860 }, { "epoch": 48.00632911392405, "grad_norm": 1.039766550064087, "learning_rate": 0.001039873417721519, "loss": 0.4382, "step": 166870 }, { "epoch": 48.00920598388953, "grad_norm": 1.290798306465149, "learning_rate": 0.0010398158803222096, "loss": 0.5335, "step": 166880 }, { "epoch": 48.012082853855006, "grad_norm": 1.2802387475967407, "learning_rate": 0.0010397583429229, "loss": 0.6174, "step": 166890 }, { "epoch": 48.01495972382048, "grad_norm": 2.9510326385498047, "learning_rate": 0.0010397008055235903, "loss": 0.5908, "step": 166900 }, { "epoch": 48.01783659378596, "grad_norm": 2.1229686737060547, "learning_rate": 0.0010396432681242808, "loss": 0.5065, "step": 166910 }, { "epoch": 48.02071346375144, "grad_norm": 0.6494744420051575, "learning_rate": 0.0010395857307249712, "loss": 0.4146, "step": 166920 }, { "epoch": 48.02359033371692, "grad_norm": 1.4559983015060425, "learning_rate": 0.0010395281933256618, "loss": 0.5381, "step": 166930 }, { "epoch": 48.026467203682394, "grad_norm": 0.82490473985672, "learning_rate": 0.0010394706559263523, "loss": 0.4329, "step": 166940 }, { "epoch": 48.02934407364787, "grad_norm": 1.2930011749267578, "learning_rate": 0.0010394131185270427, "loss": 0.5059, "step": 166950 }, { "epoch": 48.032220943613346, "grad_norm": 0.9060482978820801, "learning_rate": 0.001039355581127733, "loss": 0.4346, "step": 166960 }, { "epoch": 48.03509781357883, "grad_norm": 1.560893177986145, "learning_rate": 0.0010392980437284234, "loss": 0.5586, "step": 166970 }, { "epoch": 48.037974683544306, "grad_norm": 1.3306390047073364, "learning_rate": 0.001039240506329114, "loss": 0.5243, "step": 166980 }, { "epoch": 48.04085155350978, "grad_norm": 1.2992746829986572, "learning_rate": 0.0010391829689298045, "loss": 0.4986, "step": 166990 }, { "epoch": 48.04372842347526, "grad_norm": 1.6200608015060425, "learning_rate": 0.0010391254315304948, "loss": 0.6175, "step": 167000 }, { "epoch": 48.046605293440734, "grad_norm": 1.1964439153671265, "learning_rate": 0.0010390678941311854, "loss": 0.5579, "step": 167010 }, { "epoch": 48.04948216340621, "grad_norm": 0.7248733043670654, "learning_rate": 0.0010390103567318757, "loss": 0.4914, "step": 167020 }, { "epoch": 48.052359033371694, "grad_norm": 1.3445782661437988, "learning_rate": 0.001038952819332566, "loss": 0.4729, "step": 167030 }, { "epoch": 48.05523590333717, "grad_norm": 1.1057169437408447, "learning_rate": 0.0010388952819332567, "loss": 0.4281, "step": 167040 }, { "epoch": 48.058112773302646, "grad_norm": 1.0450700521469116, "learning_rate": 0.0010388377445339472, "loss": 0.5097, "step": 167050 }, { "epoch": 48.06098964326812, "grad_norm": 1.6236097812652588, "learning_rate": 0.0010387802071346376, "loss": 0.4411, "step": 167060 }, { "epoch": 48.0638665132336, "grad_norm": 0.9951871633529663, "learning_rate": 0.0010387226697353281, "loss": 0.5279, "step": 167070 }, { "epoch": 48.06674338319908, "grad_norm": 1.39470636844635, "learning_rate": 0.0010386651323360183, "loss": 0.5257, "step": 167080 }, { "epoch": 48.06962025316456, "grad_norm": 1.2476325035095215, "learning_rate": 0.0010386075949367088, "loss": 0.57, "step": 167090 }, { "epoch": 48.072497123130034, "grad_norm": 0.8398035168647766, "learning_rate": 0.0010385500575373994, "loss": 0.4875, "step": 167100 }, { "epoch": 48.07537399309551, "grad_norm": 1.3549031019210815, "learning_rate": 0.0010384925201380897, "loss": 0.4845, "step": 167110 }, { "epoch": 48.07825086306099, "grad_norm": 1.16571843624115, "learning_rate": 0.0010384349827387803, "loss": 0.5593, "step": 167120 }, { "epoch": 48.08112773302647, "grad_norm": 1.0260173082351685, "learning_rate": 0.0010383774453394709, "loss": 0.4628, "step": 167130 }, { "epoch": 48.084004602991946, "grad_norm": 0.8800816535949707, "learning_rate": 0.001038319907940161, "loss": 0.6103, "step": 167140 }, { "epoch": 48.08688147295742, "grad_norm": 0.975152313709259, "learning_rate": 0.0010382623705408516, "loss": 0.4787, "step": 167150 }, { "epoch": 48.0897583429229, "grad_norm": 2.598827838897705, "learning_rate": 0.0010382048331415421, "loss": 0.629, "step": 167160 }, { "epoch": 48.092635212888375, "grad_norm": 0.7385347485542297, "learning_rate": 0.0010381472957422325, "loss": 0.5336, "step": 167170 }, { "epoch": 48.09551208285386, "grad_norm": 0.9126054048538208, "learning_rate": 0.001038089758342923, "loss": 0.4274, "step": 167180 }, { "epoch": 48.098388952819334, "grad_norm": 1.2669556140899658, "learning_rate": 0.0010380322209436132, "loss": 0.5126, "step": 167190 }, { "epoch": 48.10126582278481, "grad_norm": 0.8140427470207214, "learning_rate": 0.0010379746835443037, "loss": 0.5654, "step": 167200 }, { "epoch": 48.10414269275029, "grad_norm": 1.2981057167053223, "learning_rate": 0.0010379171461449943, "loss": 0.6451, "step": 167210 }, { "epoch": 48.10701956271576, "grad_norm": 1.2637715339660645, "learning_rate": 0.0010378596087456846, "loss": 0.4427, "step": 167220 }, { "epoch": 48.10989643268124, "grad_norm": 2.7733230590820312, "learning_rate": 0.0010378020713463752, "loss": 0.6462, "step": 167230 }, { "epoch": 48.11277330264672, "grad_norm": 1.9303321838378906, "learning_rate": 0.0010377445339470658, "loss": 0.6839, "step": 167240 }, { "epoch": 48.1156501726122, "grad_norm": 1.1064679622650146, "learning_rate": 0.001037686996547756, "loss": 0.5868, "step": 167250 }, { "epoch": 48.118527042577675, "grad_norm": 0.9062410593032837, "learning_rate": 0.0010376294591484465, "loss": 0.4165, "step": 167260 }, { "epoch": 48.12140391254315, "grad_norm": 0.5158881545066833, "learning_rate": 0.001037571921749137, "loss": 0.4842, "step": 167270 }, { "epoch": 48.12428078250863, "grad_norm": 0.9433450102806091, "learning_rate": 0.0010375143843498274, "loss": 0.5317, "step": 167280 }, { "epoch": 48.12715765247411, "grad_norm": 0.9209532737731934, "learning_rate": 0.001037456846950518, "loss": 0.4445, "step": 167290 }, { "epoch": 48.13003452243959, "grad_norm": 1.0607151985168457, "learning_rate": 0.0010373993095512083, "loss": 0.5959, "step": 167300 }, { "epoch": 48.13291139240506, "grad_norm": 1.015122652053833, "learning_rate": 0.0010373417721518986, "loss": 0.5745, "step": 167310 }, { "epoch": 48.13578826237054, "grad_norm": 0.9803451895713806, "learning_rate": 0.0010372842347525892, "loss": 0.5551, "step": 167320 }, { "epoch": 48.138665132336016, "grad_norm": 1.0993752479553223, "learning_rate": 0.0010372266973532795, "loss": 0.4929, "step": 167330 }, { "epoch": 48.1415420023015, "grad_norm": 0.9967573881149292, "learning_rate": 0.00103716915995397, "loss": 0.4263, "step": 167340 }, { "epoch": 48.144418872266975, "grad_norm": 0.9596606492996216, "learning_rate": 0.0010371116225546607, "loss": 0.4705, "step": 167350 }, { "epoch": 48.14729574223245, "grad_norm": 2.4955294132232666, "learning_rate": 0.001037054085155351, "loss": 0.5674, "step": 167360 }, { "epoch": 48.15017261219793, "grad_norm": 1.4192698001861572, "learning_rate": 0.0010369965477560414, "loss": 0.6534, "step": 167370 }, { "epoch": 48.153049482163404, "grad_norm": 0.7110682725906372, "learning_rate": 0.001036939010356732, "loss": 0.4219, "step": 167380 }, { "epoch": 48.15592635212889, "grad_norm": 1.236385464668274, "learning_rate": 0.0010368814729574223, "loss": 0.5744, "step": 167390 }, { "epoch": 48.15880322209436, "grad_norm": 1.1455897092819214, "learning_rate": 0.0010368239355581128, "loss": 0.6095, "step": 167400 }, { "epoch": 48.16168009205984, "grad_norm": 1.4372138977050781, "learning_rate": 0.0010367663981588032, "loss": 0.4525, "step": 167410 }, { "epoch": 48.164556962025316, "grad_norm": 1.171935796737671, "learning_rate": 0.0010367088607594937, "loss": 0.4532, "step": 167420 }, { "epoch": 48.16743383199079, "grad_norm": 0.7594796419143677, "learning_rate": 0.001036651323360184, "loss": 0.455, "step": 167430 }, { "epoch": 48.170310701956275, "grad_norm": 1.4074655771255493, "learning_rate": 0.0010365937859608744, "loss": 0.5253, "step": 167440 }, { "epoch": 48.17318757192175, "grad_norm": 1.3184586763381958, "learning_rate": 0.001036536248561565, "loss": 0.6371, "step": 167450 }, { "epoch": 48.17606444188723, "grad_norm": 1.25991690158844, "learning_rate": 0.0010364787111622556, "loss": 0.7711, "step": 167460 }, { "epoch": 48.178941311852704, "grad_norm": 2.4617843627929688, "learning_rate": 0.001036421173762946, "loss": 0.6142, "step": 167470 }, { "epoch": 48.18181818181818, "grad_norm": 1.7338672876358032, "learning_rate": 0.0010363636363636365, "loss": 0.6049, "step": 167480 }, { "epoch": 48.184695051783656, "grad_norm": 1.155153751373291, "learning_rate": 0.0010363060989643268, "loss": 0.4762, "step": 167490 }, { "epoch": 48.18757192174914, "grad_norm": 2.1938624382019043, "learning_rate": 0.0010362485615650172, "loss": 0.6172, "step": 167500 }, { "epoch": 48.190448791714616, "grad_norm": 1.1071662902832031, "learning_rate": 0.0010361910241657077, "loss": 0.523, "step": 167510 }, { "epoch": 48.19332566168009, "grad_norm": 2.042870044708252, "learning_rate": 0.001036133486766398, "loss": 0.5081, "step": 167520 }, { "epoch": 48.19620253164557, "grad_norm": 0.9679957032203674, "learning_rate": 0.0010360759493670886, "loss": 0.7167, "step": 167530 }, { "epoch": 48.199079401611044, "grad_norm": 0.9664821028709412, "learning_rate": 0.0010360184119677792, "loss": 0.5556, "step": 167540 }, { "epoch": 48.20195627157653, "grad_norm": 1.4582141637802124, "learning_rate": 0.0010359608745684693, "loss": 0.7014, "step": 167550 }, { "epoch": 48.204833141542004, "grad_norm": 1.3897438049316406, "learning_rate": 0.00103590333716916, "loss": 0.5334, "step": 167560 }, { "epoch": 48.20771001150748, "grad_norm": 1.8033244609832764, "learning_rate": 0.0010358457997698505, "loss": 0.5503, "step": 167570 }, { "epoch": 48.210586881472956, "grad_norm": 1.0398539304733276, "learning_rate": 0.0010357882623705408, "loss": 0.4932, "step": 167580 }, { "epoch": 48.21346375143843, "grad_norm": 1.0470702648162842, "learning_rate": 0.0010357307249712314, "loss": 0.556, "step": 167590 }, { "epoch": 48.216340621403916, "grad_norm": 1.444322109222412, "learning_rate": 0.001035673187571922, "loss": 0.5935, "step": 167600 }, { "epoch": 48.21921749136939, "grad_norm": 1.5809695720672607, "learning_rate": 0.001035615650172612, "loss": 0.5579, "step": 167610 }, { "epoch": 48.22209436133487, "grad_norm": 1.1787834167480469, "learning_rate": 0.0010355581127733026, "loss": 0.4724, "step": 167620 }, { "epoch": 48.224971231300344, "grad_norm": 1.207769751548767, "learning_rate": 0.0010355005753739932, "loss": 0.5856, "step": 167630 }, { "epoch": 48.22784810126582, "grad_norm": 1.3983592987060547, "learning_rate": 0.0010354430379746836, "loss": 0.464, "step": 167640 }, { "epoch": 48.230724971231304, "grad_norm": 0.557463526725769, "learning_rate": 0.0010353855005753741, "loss": 0.5074, "step": 167650 }, { "epoch": 48.23360184119678, "grad_norm": 1.198527216911316, "learning_rate": 0.0010353279631760645, "loss": 0.6552, "step": 167660 }, { "epoch": 48.236478711162256, "grad_norm": 1.59165620803833, "learning_rate": 0.0010352704257767548, "loss": 0.6, "step": 167670 }, { "epoch": 48.23935558112773, "grad_norm": 1.848864197731018, "learning_rate": 0.0010352128883774454, "loss": 0.6883, "step": 167680 }, { "epoch": 48.24223245109321, "grad_norm": 1.3065999746322632, "learning_rate": 0.0010351553509781357, "loss": 0.5163, "step": 167690 }, { "epoch": 48.245109321058685, "grad_norm": 0.7518611550331116, "learning_rate": 0.0010350978135788263, "loss": 0.5041, "step": 167700 }, { "epoch": 48.24798619102417, "grad_norm": 0.9598302245140076, "learning_rate": 0.0010350402761795168, "loss": 0.4602, "step": 167710 }, { "epoch": 48.250863060989644, "grad_norm": 0.8117572069168091, "learning_rate": 0.0010349827387802072, "loss": 0.6461, "step": 167720 }, { "epoch": 48.25373993095512, "grad_norm": 1.0643360614776611, "learning_rate": 0.0010349252013808975, "loss": 0.6231, "step": 167730 }, { "epoch": 48.2566168009206, "grad_norm": 1.1351772546768188, "learning_rate": 0.001034867663981588, "loss": 0.5067, "step": 167740 }, { "epoch": 48.25949367088607, "grad_norm": 0.9393398761749268, "learning_rate": 0.0010348101265822785, "loss": 0.6262, "step": 167750 }, { "epoch": 48.262370540851556, "grad_norm": 0.50782310962677, "learning_rate": 0.001034752589182969, "loss": 0.524, "step": 167760 }, { "epoch": 48.26524741081703, "grad_norm": 1.6751493215560913, "learning_rate": 0.0010346950517836594, "loss": 0.6312, "step": 167770 }, { "epoch": 48.26812428078251, "grad_norm": 1.337878942489624, "learning_rate": 0.00103463751438435, "loss": 0.485, "step": 167780 }, { "epoch": 48.271001150747985, "grad_norm": 1.75273859500885, "learning_rate": 0.0010345799769850403, "loss": 0.6054, "step": 167790 }, { "epoch": 48.27387802071346, "grad_norm": 1.573348879814148, "learning_rate": 0.0010345224395857306, "loss": 0.4531, "step": 167800 }, { "epoch": 48.276754890678944, "grad_norm": 0.9069904685020447, "learning_rate": 0.0010344649021864212, "loss": 0.6038, "step": 167810 }, { "epoch": 48.27963176064442, "grad_norm": 2.044867753982544, "learning_rate": 0.0010344073647871118, "loss": 0.6065, "step": 167820 }, { "epoch": 48.2825086306099, "grad_norm": 1.0460354089736938, "learning_rate": 0.001034349827387802, "loss": 0.5002, "step": 167830 }, { "epoch": 48.28538550057537, "grad_norm": 1.028422474861145, "learning_rate": 0.0010342922899884927, "loss": 0.6308, "step": 167840 }, { "epoch": 48.28826237054085, "grad_norm": 0.8948143720626831, "learning_rate": 0.001034234752589183, "loss": 0.4719, "step": 167850 }, { "epoch": 48.29113924050633, "grad_norm": 1.338609218597412, "learning_rate": 0.0010341772151898734, "loss": 0.5992, "step": 167860 }, { "epoch": 48.29401611047181, "grad_norm": 2.0221168994903564, "learning_rate": 0.001034119677790564, "loss": 0.5583, "step": 167870 }, { "epoch": 48.296892980437285, "grad_norm": 1.2367174625396729, "learning_rate": 0.0010340621403912543, "loss": 0.478, "step": 167880 }, { "epoch": 48.29976985040276, "grad_norm": 1.5926796197891235, "learning_rate": 0.0010340046029919448, "loss": 0.7178, "step": 167890 }, { "epoch": 48.30264672036824, "grad_norm": 1.6857216358184814, "learning_rate": 0.0010339470655926354, "loss": 0.6651, "step": 167900 }, { "epoch": 48.30552359033371, "grad_norm": 1.0310791730880737, "learning_rate": 0.0010338895281933255, "loss": 0.561, "step": 167910 }, { "epoch": 48.3084004602992, "grad_norm": 1.5182853937149048, "learning_rate": 0.001033831990794016, "loss": 0.6359, "step": 167920 }, { "epoch": 48.31127733026467, "grad_norm": 1.1968897581100464, "learning_rate": 0.0010337744533947067, "loss": 0.5891, "step": 167930 }, { "epoch": 48.31415420023015, "grad_norm": 1.4832987785339355, "learning_rate": 0.001033716915995397, "loss": 0.4785, "step": 167940 }, { "epoch": 48.317031070195625, "grad_norm": 0.6171721816062927, "learning_rate": 0.0010336593785960876, "loss": 0.5335, "step": 167950 }, { "epoch": 48.3199079401611, "grad_norm": 1.579529881477356, "learning_rate": 0.0010336018411967781, "loss": 0.5091, "step": 167960 }, { "epoch": 48.322784810126585, "grad_norm": 3.336345672607422, "learning_rate": 0.0010335443037974683, "loss": 0.6708, "step": 167970 }, { "epoch": 48.32566168009206, "grad_norm": 1.2871171236038208, "learning_rate": 0.0010334867663981588, "loss": 0.4814, "step": 167980 }, { "epoch": 48.32853855005754, "grad_norm": 1.569230079650879, "learning_rate": 0.0010334292289988492, "loss": 0.5474, "step": 167990 }, { "epoch": 48.33141542002301, "grad_norm": 1.6331313848495483, "learning_rate": 0.0010333716915995397, "loss": 0.5417, "step": 168000 }, { "epoch": 48.33429228998849, "grad_norm": 0.863332211971283, "learning_rate": 0.0010333141542002303, "loss": 0.5309, "step": 168010 }, { "epoch": 48.33716915995397, "grad_norm": 1.087770700454712, "learning_rate": 0.0010332566168009204, "loss": 0.4537, "step": 168020 }, { "epoch": 48.34004602991945, "grad_norm": 0.83413165807724, "learning_rate": 0.001033199079401611, "loss": 0.5525, "step": 168030 }, { "epoch": 48.342922899884925, "grad_norm": 2.154141426086426, "learning_rate": 0.0010331415420023016, "loss": 0.6415, "step": 168040 }, { "epoch": 48.3457997698504, "grad_norm": 2.084099054336548, "learning_rate": 0.001033084004602992, "loss": 0.4866, "step": 168050 }, { "epoch": 48.34867663981588, "grad_norm": 0.979293167591095, "learning_rate": 0.0010330264672036825, "loss": 0.5835, "step": 168060 }, { "epoch": 48.35155350978136, "grad_norm": 2.1612372398376465, "learning_rate": 0.001032968929804373, "loss": 0.6969, "step": 168070 }, { "epoch": 48.35443037974684, "grad_norm": 1.6214014291763306, "learning_rate": 0.0010329113924050632, "loss": 0.477, "step": 168080 }, { "epoch": 48.35730724971231, "grad_norm": 1.3485260009765625, "learning_rate": 0.0010328538550057537, "loss": 0.5264, "step": 168090 }, { "epoch": 48.36018411967779, "grad_norm": 1.190344214439392, "learning_rate": 0.001032796317606444, "loss": 0.5798, "step": 168100 }, { "epoch": 48.363060989643266, "grad_norm": 1.7923308610916138, "learning_rate": 0.0010327387802071346, "loss": 0.4726, "step": 168110 }, { "epoch": 48.36593785960875, "grad_norm": 0.999329149723053, "learning_rate": 0.0010326812428078252, "loss": 0.5114, "step": 168120 }, { "epoch": 48.368814729574225, "grad_norm": 0.8816108107566833, "learning_rate": 0.0010326237054085155, "loss": 0.6199, "step": 168130 }, { "epoch": 48.3716915995397, "grad_norm": 0.9270433783531189, "learning_rate": 0.001032566168009206, "loss": 0.4466, "step": 168140 }, { "epoch": 48.37456846950518, "grad_norm": 1.4991611242294312, "learning_rate": 0.0010325086306098965, "loss": 0.6056, "step": 168150 }, { "epoch": 48.377445339470654, "grad_norm": 1.2529319524765015, "learning_rate": 0.0010324510932105868, "loss": 0.5259, "step": 168160 }, { "epoch": 48.38032220943613, "grad_norm": 1.0090574026107788, "learning_rate": 0.0010323935558112774, "loss": 0.555, "step": 168170 }, { "epoch": 48.383199079401614, "grad_norm": 1.0579146146774292, "learning_rate": 0.001032336018411968, "loss": 0.5346, "step": 168180 }, { "epoch": 48.38607594936709, "grad_norm": 0.8382234573364258, "learning_rate": 0.0010322784810126583, "loss": 0.5117, "step": 168190 }, { "epoch": 48.388952819332566, "grad_norm": 0.8928050994873047, "learning_rate": 0.0010322209436133486, "loss": 0.6047, "step": 168200 }, { "epoch": 48.39182968929804, "grad_norm": 1.0331838130950928, "learning_rate": 0.0010321634062140392, "loss": 0.5078, "step": 168210 }, { "epoch": 48.39470655926352, "grad_norm": 1.9469783306121826, "learning_rate": 0.0010321058688147295, "loss": 0.6387, "step": 168220 }, { "epoch": 48.397583429229, "grad_norm": 1.593069076538086, "learning_rate": 0.00103204833141542, "loss": 0.5409, "step": 168230 }, { "epoch": 48.40046029919448, "grad_norm": 0.9320031404495239, "learning_rate": 0.0010319907940161104, "loss": 0.6376, "step": 168240 }, { "epoch": 48.403337169159954, "grad_norm": 0.6864359378814697, "learning_rate": 0.001031933256616801, "loss": 0.5209, "step": 168250 }, { "epoch": 48.40621403912543, "grad_norm": 0.8163076639175415, "learning_rate": 0.0010318757192174914, "loss": 0.4652, "step": 168260 }, { "epoch": 48.40909090909091, "grad_norm": 1.3555015325546265, "learning_rate": 0.0010318181818181817, "loss": 0.6658, "step": 168270 }, { "epoch": 48.41196777905639, "grad_norm": 0.5996629595756531, "learning_rate": 0.0010317606444188723, "loss": 0.4668, "step": 168280 }, { "epoch": 48.414844649021866, "grad_norm": 1.3811308145523071, "learning_rate": 0.0010317031070195628, "loss": 0.5283, "step": 168290 }, { "epoch": 48.41772151898734, "grad_norm": 1.283304214477539, "learning_rate": 0.0010316455696202532, "loss": 0.4422, "step": 168300 }, { "epoch": 48.42059838895282, "grad_norm": 1.7752245664596558, "learning_rate": 0.0010315880322209437, "loss": 0.606, "step": 168310 }, { "epoch": 48.423475258918295, "grad_norm": 1.5621169805526733, "learning_rate": 0.001031530494821634, "loss": 0.6709, "step": 168320 }, { "epoch": 48.42635212888378, "grad_norm": 1.2903863191604614, "learning_rate": 0.0010314729574223244, "loss": 0.5251, "step": 168330 }, { "epoch": 48.429228998849254, "grad_norm": 0.8628045320510864, "learning_rate": 0.001031415420023015, "loss": 0.5326, "step": 168340 }, { "epoch": 48.43210586881473, "grad_norm": 1.4174147844314575, "learning_rate": 0.0010313578826237053, "loss": 0.4699, "step": 168350 }, { "epoch": 48.43498273878021, "grad_norm": 2.002901315689087, "learning_rate": 0.001031300345224396, "loss": 0.4995, "step": 168360 }, { "epoch": 48.43785960874568, "grad_norm": 1.0305118560791016, "learning_rate": 0.0010312428078250865, "loss": 0.5991, "step": 168370 }, { "epoch": 48.44073647871116, "grad_norm": 1.3413656949996948, "learning_rate": 0.0010311852704257766, "loss": 0.5347, "step": 168380 }, { "epoch": 48.44361334867664, "grad_norm": 0.9804049730300903, "learning_rate": 0.0010311277330264672, "loss": 0.5282, "step": 168390 }, { "epoch": 48.44649021864212, "grad_norm": 0.6993036270141602, "learning_rate": 0.0010310701956271577, "loss": 0.6018, "step": 168400 }, { "epoch": 48.449367088607595, "grad_norm": 1.228163719177246, "learning_rate": 0.001031012658227848, "loss": 0.5533, "step": 168410 }, { "epoch": 48.45224395857307, "grad_norm": 0.8571146726608276, "learning_rate": 0.0010309551208285386, "loss": 0.4177, "step": 168420 }, { "epoch": 48.45512082853855, "grad_norm": 1.2443382740020752, "learning_rate": 0.0010308975834292292, "loss": 0.6248, "step": 168430 }, { "epoch": 48.45799769850403, "grad_norm": 1.1479192972183228, "learning_rate": 0.0010308400460299193, "loss": 0.4809, "step": 168440 }, { "epoch": 48.46087456846951, "grad_norm": 0.6469796299934387, "learning_rate": 0.00103078250863061, "loss": 0.4751, "step": 168450 }, { "epoch": 48.46375143843498, "grad_norm": 1.6746083498001099, "learning_rate": 0.0010307249712313003, "loss": 0.6313, "step": 168460 }, { "epoch": 48.46662830840046, "grad_norm": 0.7296242117881775, "learning_rate": 0.0010306674338319908, "loss": 0.5533, "step": 168470 }, { "epoch": 48.469505178365935, "grad_norm": 1.3410612344741821, "learning_rate": 0.0010306098964326814, "loss": 0.6319, "step": 168480 }, { "epoch": 48.47238204833142, "grad_norm": 0.9547623991966248, "learning_rate": 0.0010305523590333717, "loss": 0.5068, "step": 168490 }, { "epoch": 48.475258918296895, "grad_norm": 1.5594165325164795, "learning_rate": 0.001030494821634062, "loss": 0.5582, "step": 168500 }, { "epoch": 48.47813578826237, "grad_norm": 1.1613448858261108, "learning_rate": 0.0010304372842347526, "loss": 0.5969, "step": 168510 }, { "epoch": 48.48101265822785, "grad_norm": 3.631657600402832, "learning_rate": 0.001030379746835443, "loss": 0.5479, "step": 168520 }, { "epoch": 48.48388952819332, "grad_norm": 2.5044541358947754, "learning_rate": 0.0010303222094361335, "loss": 0.5989, "step": 168530 }, { "epoch": 48.48676639815881, "grad_norm": 1.659202218055725, "learning_rate": 0.0010302646720368241, "loss": 0.5938, "step": 168540 }, { "epoch": 48.48964326812428, "grad_norm": 0.8377583622932434, "learning_rate": 0.0010302071346375145, "loss": 0.5619, "step": 168550 }, { "epoch": 48.49252013808976, "grad_norm": 2.315876007080078, "learning_rate": 0.0010301495972382048, "loss": 0.5176, "step": 168560 }, { "epoch": 48.495397008055235, "grad_norm": 1.1277002096176147, "learning_rate": 0.0010300920598388952, "loss": 0.5633, "step": 168570 }, { "epoch": 48.49827387802071, "grad_norm": 1.0684733390808105, "learning_rate": 0.0010300345224395857, "loss": 0.4299, "step": 168580 }, { "epoch": 48.50115074798619, "grad_norm": 1.7413480281829834, "learning_rate": 0.0010299769850402763, "loss": 0.6213, "step": 168590 }, { "epoch": 48.50402761795167, "grad_norm": 1.086405634880066, "learning_rate": 0.0010299194476409666, "loss": 0.4763, "step": 168600 }, { "epoch": 48.50690448791715, "grad_norm": 1.7029461860656738, "learning_rate": 0.0010298619102416572, "loss": 0.5822, "step": 168610 }, { "epoch": 48.50978135788262, "grad_norm": 1.5257675647735596, "learning_rate": 0.0010298043728423475, "loss": 0.6245, "step": 168620 }, { "epoch": 48.5126582278481, "grad_norm": 1.8317288160324097, "learning_rate": 0.0010297468354430379, "loss": 0.6313, "step": 168630 }, { "epoch": 48.515535097813576, "grad_norm": 1.2783379554748535, "learning_rate": 0.0010296892980437285, "loss": 0.4196, "step": 168640 }, { "epoch": 48.51841196777906, "grad_norm": 1.9670257568359375, "learning_rate": 0.001029631760644419, "loss": 0.527, "step": 168650 }, { "epoch": 48.521288837744535, "grad_norm": 0.8076265454292297, "learning_rate": 0.0010295742232451094, "loss": 0.533, "step": 168660 }, { "epoch": 48.52416570771001, "grad_norm": 0.7721544504165649, "learning_rate": 0.0010295166858458, "loss": 0.6372, "step": 168670 }, { "epoch": 48.52704257767549, "grad_norm": 1.3686906099319458, "learning_rate": 0.00102945914844649, "loss": 0.3981, "step": 168680 }, { "epoch": 48.529919447640964, "grad_norm": 1.5805467367172241, "learning_rate": 0.0010294016110471806, "loss": 0.48, "step": 168690 }, { "epoch": 48.53279631760645, "grad_norm": 1.3037880659103394, "learning_rate": 0.0010293440736478712, "loss": 0.4692, "step": 168700 }, { "epoch": 48.53567318757192, "grad_norm": 1.8727784156799316, "learning_rate": 0.0010292865362485615, "loss": 0.5328, "step": 168710 }, { "epoch": 48.5385500575374, "grad_norm": 0.8463683128356934, "learning_rate": 0.001029228998849252, "loss": 0.6267, "step": 168720 }, { "epoch": 48.541426927502876, "grad_norm": 1.0954760313034058, "learning_rate": 0.0010291714614499427, "loss": 0.5246, "step": 168730 }, { "epoch": 48.54430379746835, "grad_norm": 0.7584606409072876, "learning_rate": 0.0010291139240506328, "loss": 0.3674, "step": 168740 }, { "epoch": 48.547180667433835, "grad_norm": 1.1536020040512085, "learning_rate": 0.0010290563866513234, "loss": 0.5055, "step": 168750 }, { "epoch": 48.55005753739931, "grad_norm": 0.7020536065101624, "learning_rate": 0.001028998849252014, "loss": 0.4313, "step": 168760 }, { "epoch": 48.55293440736479, "grad_norm": 0.8113455176353455, "learning_rate": 0.0010289413118527043, "loss": 0.4955, "step": 168770 }, { "epoch": 48.555811277330264, "grad_norm": 0.9785591959953308, "learning_rate": 0.0010288837744533948, "loss": 0.5392, "step": 168780 }, { "epoch": 48.55868814729574, "grad_norm": 1.4096264839172363, "learning_rate": 0.0010288262370540852, "loss": 0.7066, "step": 168790 }, { "epoch": 48.561565017261216, "grad_norm": 1.3925013542175293, "learning_rate": 0.0010287686996547755, "loss": 0.472, "step": 168800 }, { "epoch": 48.5644418872267, "grad_norm": 1.0049091577529907, "learning_rate": 0.001028711162255466, "loss": 0.5974, "step": 168810 }, { "epoch": 48.567318757192176, "grad_norm": 1.201610803604126, "learning_rate": 0.0010286536248561564, "loss": 0.5628, "step": 168820 }, { "epoch": 48.57019562715765, "grad_norm": 1.2586705684661865, "learning_rate": 0.001028596087456847, "loss": 0.5426, "step": 168830 }, { "epoch": 48.57307249712313, "grad_norm": 1.708740472793579, "learning_rate": 0.0010285385500575376, "loss": 0.4969, "step": 168840 }, { "epoch": 48.575949367088604, "grad_norm": 1.2458617687225342, "learning_rate": 0.0010284810126582277, "loss": 0.4211, "step": 168850 }, { "epoch": 48.57882623705409, "grad_norm": 1.8196667432785034, "learning_rate": 0.0010284234752589183, "loss": 0.5083, "step": 168860 }, { "epoch": 48.581703107019564, "grad_norm": 1.459763526916504, "learning_rate": 0.0010283659378596088, "loss": 0.4773, "step": 168870 }, { "epoch": 48.58457997698504, "grad_norm": 1.3447246551513672, "learning_rate": 0.0010283084004602992, "loss": 0.4962, "step": 168880 }, { "epoch": 48.587456846950516, "grad_norm": 1.4896695613861084, "learning_rate": 0.0010282508630609897, "loss": 0.5125, "step": 168890 }, { "epoch": 48.59033371691599, "grad_norm": 1.0745948553085327, "learning_rate": 0.0010281933256616803, "loss": 0.6149, "step": 168900 }, { "epoch": 48.593210586881476, "grad_norm": 1.031734585762024, "learning_rate": 0.0010281357882623704, "loss": 0.5525, "step": 168910 }, { "epoch": 48.59608745684695, "grad_norm": 0.8658469915390015, "learning_rate": 0.001028078250863061, "loss": 0.5928, "step": 168920 }, { "epoch": 48.59896432681243, "grad_norm": 0.9233260750770569, "learning_rate": 0.0010280207134637513, "loss": 0.5361, "step": 168930 }, { "epoch": 48.601841196777904, "grad_norm": 1.4213967323303223, "learning_rate": 0.001027963176064442, "loss": 0.6182, "step": 168940 }, { "epoch": 48.60471806674338, "grad_norm": 0.959462583065033, "learning_rate": 0.0010279056386651325, "loss": 0.56, "step": 168950 }, { "epoch": 48.607594936708864, "grad_norm": 1.191152572631836, "learning_rate": 0.0010278481012658228, "loss": 0.6012, "step": 168960 }, { "epoch": 48.61047180667434, "grad_norm": 1.069772720336914, "learning_rate": 0.0010277905638665132, "loss": 0.5667, "step": 168970 }, { "epoch": 48.613348676639816, "grad_norm": 1.7755956649780273, "learning_rate": 0.0010277330264672037, "loss": 0.7423, "step": 168980 }, { "epoch": 48.61622554660529, "grad_norm": 1.1780136823654175, "learning_rate": 0.001027675489067894, "loss": 0.5502, "step": 168990 }, { "epoch": 48.61910241657077, "grad_norm": 0.829209566116333, "learning_rate": 0.0010276179516685846, "loss": 0.6594, "step": 169000 }, { "epoch": 48.621979286536245, "grad_norm": 1.032118797302246, "learning_rate": 0.0010275604142692752, "loss": 0.5305, "step": 169010 }, { "epoch": 48.62485615650173, "grad_norm": 1.3809027671813965, "learning_rate": 0.0010275028768699655, "loss": 0.5992, "step": 169020 }, { "epoch": 48.627733026467205, "grad_norm": 1.5854922533035278, "learning_rate": 0.0010274453394706559, "loss": 0.4883, "step": 169030 }, { "epoch": 48.63060989643268, "grad_norm": 1.1531329154968262, "learning_rate": 0.0010273878020713462, "loss": 0.5916, "step": 169040 }, { "epoch": 48.63348676639816, "grad_norm": 0.8936783075332642, "learning_rate": 0.0010273302646720368, "loss": 0.6028, "step": 169050 }, { "epoch": 48.63636363636363, "grad_norm": 0.9411478042602539, "learning_rate": 0.0010272727272727274, "loss": 0.4516, "step": 169060 }, { "epoch": 48.639240506329116, "grad_norm": 3.3869991302490234, "learning_rate": 0.0010272151898734177, "loss": 0.6011, "step": 169070 }, { "epoch": 48.64211737629459, "grad_norm": 1.633741021156311, "learning_rate": 0.0010271576524741083, "loss": 0.5116, "step": 169080 }, { "epoch": 48.64499424626007, "grad_norm": 1.6528264284133911, "learning_rate": 0.0010271001150747986, "loss": 0.4993, "step": 169090 }, { "epoch": 48.647871116225545, "grad_norm": 1.3539785146713257, "learning_rate": 0.001027042577675489, "loss": 0.637, "step": 169100 }, { "epoch": 48.65074798619102, "grad_norm": 1.4323244094848633, "learning_rate": 0.0010269850402761795, "loss": 0.466, "step": 169110 }, { "epoch": 48.653624856156505, "grad_norm": 0.8152749538421631, "learning_rate": 0.00102692750287687, "loss": 0.5358, "step": 169120 }, { "epoch": 48.65650172612198, "grad_norm": 1.609030842781067, "learning_rate": 0.0010268699654775604, "loss": 0.64, "step": 169130 }, { "epoch": 48.65937859608746, "grad_norm": 1.5315839052200317, "learning_rate": 0.001026812428078251, "loss": 0.7209, "step": 169140 }, { "epoch": 48.66225546605293, "grad_norm": 0.9407646656036377, "learning_rate": 0.0010267548906789411, "loss": 0.4507, "step": 169150 }, { "epoch": 48.66513233601841, "grad_norm": 0.8940749168395996, "learning_rate": 0.0010266973532796317, "loss": 0.597, "step": 169160 }, { "epoch": 48.66800920598389, "grad_norm": 1.7642278671264648, "learning_rate": 0.0010266398158803223, "loss": 0.5515, "step": 169170 }, { "epoch": 48.67088607594937, "grad_norm": 1.1249094009399414, "learning_rate": 0.0010265822784810126, "loss": 0.5605, "step": 169180 }, { "epoch": 48.673762945914845, "grad_norm": 1.1602890491485596, "learning_rate": 0.0010265247410817032, "loss": 0.3991, "step": 169190 }, { "epoch": 48.67663981588032, "grad_norm": 0.7834805250167847, "learning_rate": 0.0010264672036823937, "loss": 0.5281, "step": 169200 }, { "epoch": 48.6795166858458, "grad_norm": 0.8997914791107178, "learning_rate": 0.0010264096662830839, "loss": 0.5722, "step": 169210 }, { "epoch": 48.68239355581128, "grad_norm": 1.1017308235168457, "learning_rate": 0.0010263521288837744, "loss": 0.5896, "step": 169220 }, { "epoch": 48.68527042577676, "grad_norm": 1.1399474143981934, "learning_rate": 0.001026294591484465, "loss": 0.486, "step": 169230 }, { "epoch": 48.68814729574223, "grad_norm": 0.9010734558105469, "learning_rate": 0.0010262370540851553, "loss": 0.7034, "step": 169240 }, { "epoch": 48.69102416570771, "grad_norm": 0.9081595540046692, "learning_rate": 0.001026179516685846, "loss": 0.5139, "step": 169250 }, { "epoch": 48.693901035673186, "grad_norm": 1.3635218143463135, "learning_rate": 0.0010261219792865363, "loss": 0.4804, "step": 169260 }, { "epoch": 48.69677790563866, "grad_norm": 1.8893911838531494, "learning_rate": 0.0010260644418872266, "loss": 0.5917, "step": 169270 }, { "epoch": 48.699654775604145, "grad_norm": 1.3276311159133911, "learning_rate": 0.0010260069044879172, "loss": 0.5784, "step": 169280 }, { "epoch": 48.70253164556962, "grad_norm": 1.8381948471069336, "learning_rate": 0.0010259493670886075, "loss": 0.7507, "step": 169290 }, { "epoch": 48.7054085155351, "grad_norm": 0.8986776471138, "learning_rate": 0.001025891829689298, "loss": 0.5194, "step": 169300 }, { "epoch": 48.708285385500574, "grad_norm": 0.8929279446601868, "learning_rate": 0.0010258342922899886, "loss": 0.6159, "step": 169310 }, { "epoch": 48.71116225546605, "grad_norm": 0.6798769235610962, "learning_rate": 0.001025776754890679, "loss": 0.5717, "step": 169320 }, { "epoch": 48.71403912543153, "grad_norm": 0.9765310883522034, "learning_rate": 0.0010257192174913693, "loss": 0.487, "step": 169330 }, { "epoch": 48.71691599539701, "grad_norm": 2.42635440826416, "learning_rate": 0.00102566168009206, "loss": 0.6316, "step": 169340 }, { "epoch": 48.719792865362486, "grad_norm": 0.923046886920929, "learning_rate": 0.0010256041426927502, "loss": 0.5095, "step": 169350 }, { "epoch": 48.72266973532796, "grad_norm": 1.4596264362335205, "learning_rate": 0.0010255466052934408, "loss": 0.7899, "step": 169360 }, { "epoch": 48.72554660529344, "grad_norm": 0.8665754795074463, "learning_rate": 0.0010254890678941312, "loss": 0.7588, "step": 169370 }, { "epoch": 48.72842347525892, "grad_norm": 1.4074281454086304, "learning_rate": 0.0010254315304948217, "loss": 0.5319, "step": 169380 }, { "epoch": 48.7313003452244, "grad_norm": 1.4765064716339111, "learning_rate": 0.001025373993095512, "loss": 0.514, "step": 169390 }, { "epoch": 48.734177215189874, "grad_norm": 0.9008504748344421, "learning_rate": 0.0010253164556962024, "loss": 0.5341, "step": 169400 }, { "epoch": 48.73705408515535, "grad_norm": 2.0379037857055664, "learning_rate": 0.001025258918296893, "loss": 0.5563, "step": 169410 }, { "epoch": 48.739930955120826, "grad_norm": 1.6928802728652954, "learning_rate": 0.0010252013808975835, "loss": 0.7276, "step": 169420 }, { "epoch": 48.74280782508631, "grad_norm": 1.1702169179916382, "learning_rate": 0.001025143843498274, "loss": 0.4352, "step": 169430 }, { "epoch": 48.745684695051786, "grad_norm": 1.495489478111267, "learning_rate": 0.0010250863060989645, "loss": 0.5747, "step": 169440 }, { "epoch": 48.74856156501726, "grad_norm": 0.7135157585144043, "learning_rate": 0.0010250287686996548, "loss": 0.5108, "step": 169450 }, { "epoch": 48.75143843498274, "grad_norm": 1.1185475587844849, "learning_rate": 0.0010249712313003452, "loss": 0.5278, "step": 169460 }, { "epoch": 48.754315304948214, "grad_norm": 0.6932414770126343, "learning_rate": 0.0010249136939010357, "loss": 0.5988, "step": 169470 }, { "epoch": 48.75719217491369, "grad_norm": 2.102851390838623, "learning_rate": 0.001024856156501726, "loss": 0.6583, "step": 169480 }, { "epoch": 48.760069044879174, "grad_norm": 1.2516642808914185, "learning_rate": 0.0010247986191024166, "loss": 0.5321, "step": 169490 }, { "epoch": 48.76294591484465, "grad_norm": 1.573033094406128, "learning_rate": 0.0010247410817031072, "loss": 0.5112, "step": 169500 }, { "epoch": 48.765822784810126, "grad_norm": 1.6633976697921753, "learning_rate": 0.0010246835443037973, "loss": 0.5752, "step": 169510 }, { "epoch": 48.7686996547756, "grad_norm": 1.39569890499115, "learning_rate": 0.0010246260069044879, "loss": 0.6832, "step": 169520 }, { "epoch": 48.77157652474108, "grad_norm": 1.2493834495544434, "learning_rate": 0.0010245684695051784, "loss": 0.5889, "step": 169530 }, { "epoch": 48.77445339470656, "grad_norm": 1.5150395631790161, "learning_rate": 0.0010245109321058688, "loss": 0.6009, "step": 169540 }, { "epoch": 48.77733026467204, "grad_norm": 1.3393911123275757, "learning_rate": 0.0010244533947065594, "loss": 0.6618, "step": 169550 }, { "epoch": 48.780207134637514, "grad_norm": 0.6784234642982483, "learning_rate": 0.00102439585730725, "loss": 0.4964, "step": 169560 }, { "epoch": 48.78308400460299, "grad_norm": 2.6831579208374023, "learning_rate": 0.00102433831990794, "loss": 0.6676, "step": 169570 }, { "epoch": 48.78596087456847, "grad_norm": 1.2191739082336426, "learning_rate": 0.0010242807825086306, "loss": 0.5648, "step": 169580 }, { "epoch": 48.78883774453395, "grad_norm": 1.2912276983261108, "learning_rate": 0.0010242232451093212, "loss": 0.543, "step": 169590 }, { "epoch": 48.791714614499426, "grad_norm": 0.9153738021850586, "learning_rate": 0.0010241657077100115, "loss": 0.5271, "step": 169600 }, { "epoch": 48.7945914844649, "grad_norm": 1.1476351022720337, "learning_rate": 0.001024108170310702, "loss": 0.4506, "step": 169610 }, { "epoch": 48.79746835443038, "grad_norm": 2.0842812061309814, "learning_rate": 0.0010240506329113924, "loss": 0.5778, "step": 169620 }, { "epoch": 48.800345224395855, "grad_norm": 0.6455382704734802, "learning_rate": 0.0010239930955120828, "loss": 0.5558, "step": 169630 }, { "epoch": 48.80322209436134, "grad_norm": 1.4531573057174683, "learning_rate": 0.0010239355581127734, "loss": 0.6217, "step": 169640 }, { "epoch": 48.806098964326814, "grad_norm": 1.2054048776626587, "learning_rate": 0.0010238780207134637, "loss": 0.5395, "step": 169650 }, { "epoch": 48.80897583429229, "grad_norm": 1.3988876342773438, "learning_rate": 0.0010238204833141543, "loss": 0.6088, "step": 169660 }, { "epoch": 48.81185270425777, "grad_norm": 1.2419397830963135, "learning_rate": 0.0010237629459148448, "loss": 0.4589, "step": 169670 }, { "epoch": 48.81472957422324, "grad_norm": 1.2937062978744507, "learning_rate": 0.001023705408515535, "loss": 0.535, "step": 169680 }, { "epoch": 48.81760644418872, "grad_norm": 1.2849637269973755, "learning_rate": 0.0010236478711162255, "loss": 0.6571, "step": 169690 }, { "epoch": 48.8204833141542, "grad_norm": 1.3228217363357544, "learning_rate": 0.001023590333716916, "loss": 0.5994, "step": 169700 }, { "epoch": 48.82336018411968, "grad_norm": 0.9517315030097961, "learning_rate": 0.0010235327963176064, "loss": 0.5559, "step": 169710 }, { "epoch": 48.826237054085155, "grad_norm": 1.1482921838760376, "learning_rate": 0.001023475258918297, "loss": 0.4716, "step": 169720 }, { "epoch": 48.82911392405063, "grad_norm": 1.0787757635116577, "learning_rate": 0.0010234177215189873, "loss": 0.5897, "step": 169730 }, { "epoch": 48.83199079401611, "grad_norm": 0.7248291373252869, "learning_rate": 0.0010233601841196777, "loss": 0.5913, "step": 169740 }, { "epoch": 48.83486766398159, "grad_norm": 1.2291985750198364, "learning_rate": 0.0010233026467203683, "loss": 0.741, "step": 169750 }, { "epoch": 48.83774453394707, "grad_norm": 1.1110494136810303, "learning_rate": 0.0010232451093210586, "loss": 0.54, "step": 169760 }, { "epoch": 48.84062140391254, "grad_norm": 0.9790530204772949, "learning_rate": 0.0010231875719217492, "loss": 0.4779, "step": 169770 }, { "epoch": 48.84349827387802, "grad_norm": 1.1018627882003784, "learning_rate": 0.0010231300345224397, "loss": 0.5349, "step": 169780 }, { "epoch": 48.846375143843495, "grad_norm": 1.0039286613464355, "learning_rate": 0.00102307249712313, "loss": 0.5612, "step": 169790 }, { "epoch": 48.84925201380898, "grad_norm": 1.0506190061569214, "learning_rate": 0.0010230149597238204, "loss": 0.6375, "step": 169800 }, { "epoch": 48.852128883774455, "grad_norm": 1.053654670715332, "learning_rate": 0.001022957422324511, "loss": 0.6353, "step": 169810 }, { "epoch": 48.85500575373993, "grad_norm": 2.114231586456299, "learning_rate": 0.0010228998849252013, "loss": 0.7133, "step": 169820 }, { "epoch": 48.85788262370541, "grad_norm": 0.6972163915634155, "learning_rate": 0.001022842347525892, "loss": 0.6309, "step": 169830 }, { "epoch": 48.860759493670884, "grad_norm": 1.5434614419937134, "learning_rate": 0.0010227848101265822, "loss": 0.5639, "step": 169840 }, { "epoch": 48.86363636363637, "grad_norm": 1.1441044807434082, "learning_rate": 0.0010227272727272728, "loss": 0.5663, "step": 169850 }, { "epoch": 48.86651323360184, "grad_norm": 1.2633905410766602, "learning_rate": 0.0010226697353279632, "loss": 0.5007, "step": 169860 }, { "epoch": 48.86939010356732, "grad_norm": 1.8819273710250854, "learning_rate": 0.0010226121979286535, "loss": 0.5754, "step": 169870 }, { "epoch": 48.872266973532795, "grad_norm": 1.6219913959503174, "learning_rate": 0.001022554660529344, "loss": 0.5091, "step": 169880 }, { "epoch": 48.87514384349827, "grad_norm": 0.9465612173080444, "learning_rate": 0.0010224971231300346, "loss": 0.6419, "step": 169890 }, { "epoch": 48.878020713463755, "grad_norm": 1.472603440284729, "learning_rate": 0.001022439585730725, "loss": 0.662, "step": 169900 }, { "epoch": 48.88089758342923, "grad_norm": 1.0398675203323364, "learning_rate": 0.0010223820483314155, "loss": 0.5091, "step": 169910 }, { "epoch": 48.88377445339471, "grad_norm": 1.8384628295898438, "learning_rate": 0.0010223245109321059, "loss": 0.5982, "step": 169920 }, { "epoch": 48.886651323360184, "grad_norm": 1.483029842376709, "learning_rate": 0.0010222669735327962, "loss": 0.5778, "step": 169930 }, { "epoch": 48.88952819332566, "grad_norm": 1.1960934400558472, "learning_rate": 0.0010222094361334868, "loss": 0.6407, "step": 169940 }, { "epoch": 48.892405063291136, "grad_norm": 0.7748231887817383, "learning_rate": 0.0010221518987341771, "loss": 0.6319, "step": 169950 }, { "epoch": 48.89528193325662, "grad_norm": 1.4132113456726074, "learning_rate": 0.0010220943613348677, "loss": 0.5581, "step": 169960 }, { "epoch": 48.898158803222096, "grad_norm": 1.2503433227539062, "learning_rate": 0.0010220368239355583, "loss": 0.58, "step": 169970 }, { "epoch": 48.90103567318757, "grad_norm": 1.2035138607025146, "learning_rate": 0.0010219792865362484, "loss": 0.4707, "step": 169980 }, { "epoch": 48.90391254315305, "grad_norm": 1.887205719947815, "learning_rate": 0.001021921749136939, "loss": 0.5421, "step": 169990 }, { "epoch": 48.906789413118524, "grad_norm": 1.3135536909103394, "learning_rate": 0.0010218642117376295, "loss": 0.5015, "step": 170000 }, { "epoch": 48.90966628308401, "grad_norm": 0.7058517932891846, "learning_rate": 0.0010218066743383199, "loss": 0.5729, "step": 170010 }, { "epoch": 48.912543153049484, "grad_norm": 0.9463266134262085, "learning_rate": 0.0010217491369390104, "loss": 0.5916, "step": 170020 }, { "epoch": 48.91542002301496, "grad_norm": 1.8234837055206299, "learning_rate": 0.001021691599539701, "loss": 0.6525, "step": 170030 }, { "epoch": 48.918296892980436, "grad_norm": 0.700532078742981, "learning_rate": 0.0010216340621403911, "loss": 0.4392, "step": 170040 }, { "epoch": 48.92117376294591, "grad_norm": 1.9974547624588013, "learning_rate": 0.0010215765247410817, "loss": 0.7092, "step": 170050 }, { "epoch": 48.924050632911396, "grad_norm": 0.8468695282936096, "learning_rate": 0.001021518987341772, "loss": 0.6287, "step": 170060 }, { "epoch": 48.92692750287687, "grad_norm": 1.3617883920669556, "learning_rate": 0.0010214614499424626, "loss": 0.5807, "step": 170070 }, { "epoch": 48.92980437284235, "grad_norm": 2.115213632583618, "learning_rate": 0.0010214039125431532, "loss": 0.5908, "step": 170080 }, { "epoch": 48.932681242807824, "grad_norm": 0.8471257090568542, "learning_rate": 0.0010213463751438435, "loss": 0.5751, "step": 170090 }, { "epoch": 48.9355581127733, "grad_norm": 0.6063029766082764, "learning_rate": 0.0010212888377445339, "loss": 0.4655, "step": 170100 }, { "epoch": 48.938434982738784, "grad_norm": 1.4966349601745605, "learning_rate": 0.0010212313003452244, "loss": 0.5428, "step": 170110 }, { "epoch": 48.94131185270426, "grad_norm": 2.3928678035736084, "learning_rate": 0.0010211737629459148, "loss": 0.6778, "step": 170120 }, { "epoch": 48.944188722669736, "grad_norm": 1.1395506858825684, "learning_rate": 0.0010211162255466053, "loss": 0.6631, "step": 170130 }, { "epoch": 48.94706559263521, "grad_norm": 1.6090508699417114, "learning_rate": 0.001021058688147296, "loss": 0.5364, "step": 170140 }, { "epoch": 48.94994246260069, "grad_norm": 0.8094843626022339, "learning_rate": 0.0010210011507479863, "loss": 0.4925, "step": 170150 }, { "epoch": 48.952819332566165, "grad_norm": 1.2004119157791138, "learning_rate": 0.0010209436133486766, "loss": 0.5402, "step": 170160 }, { "epoch": 48.95569620253165, "grad_norm": 1.0755807161331177, "learning_rate": 0.001020886075949367, "loss": 0.5211, "step": 170170 }, { "epoch": 48.958573072497124, "grad_norm": 0.8619556427001953, "learning_rate": 0.0010208285385500575, "loss": 0.4936, "step": 170180 }, { "epoch": 48.9614499424626, "grad_norm": 2.19449520111084, "learning_rate": 0.001020771001150748, "loss": 0.7592, "step": 170190 }, { "epoch": 48.96432681242808, "grad_norm": 1.2840676307678223, "learning_rate": 0.0010207134637514384, "loss": 0.6036, "step": 170200 }, { "epoch": 48.96720368239355, "grad_norm": 2.6134471893310547, "learning_rate": 0.001020655926352129, "loss": 0.6403, "step": 170210 }, { "epoch": 48.970080552359036, "grad_norm": 1.3267954587936401, "learning_rate": 0.0010205983889528193, "loss": 0.6097, "step": 170220 }, { "epoch": 48.97295742232451, "grad_norm": 1.280131459236145, "learning_rate": 0.0010205408515535097, "loss": 0.4917, "step": 170230 }, { "epoch": 48.97583429228999, "grad_norm": 1.8692444562911987, "learning_rate": 0.0010204833141542002, "loss": 0.5828, "step": 170240 }, { "epoch": 48.978711162255465, "grad_norm": 1.0217522382736206, "learning_rate": 0.0010204257767548908, "loss": 0.6786, "step": 170250 }, { "epoch": 48.98158803222094, "grad_norm": 1.175717830657959, "learning_rate": 0.0010203682393555812, "loss": 0.5556, "step": 170260 }, { "epoch": 48.984464902186424, "grad_norm": 0.6666806936264038, "learning_rate": 0.0010203107019562717, "loss": 0.4612, "step": 170270 }, { "epoch": 48.9873417721519, "grad_norm": 2.2221035957336426, "learning_rate": 0.001020253164556962, "loss": 0.7091, "step": 170280 }, { "epoch": 48.99021864211738, "grad_norm": 1.3646607398986816, "learning_rate": 0.0010201956271576524, "loss": 0.7055, "step": 170290 }, { "epoch": 48.99309551208285, "grad_norm": 1.4717456102371216, "learning_rate": 0.001020138089758343, "loss": 0.6573, "step": 170300 }, { "epoch": 48.99597238204833, "grad_norm": 1.5227632522583008, "learning_rate": 0.0010200805523590333, "loss": 0.4345, "step": 170310 }, { "epoch": 48.99884925201381, "grad_norm": 0.7910281419754028, "learning_rate": 0.0010200230149597239, "loss": 0.4909, "step": 170320 }, { "epoch": 49.00172612197929, "grad_norm": 1.4758566617965698, "learning_rate": 0.0010199654775604145, "loss": 0.5966, "step": 170330 }, { "epoch": 49.004602991944765, "grad_norm": 1.8592770099639893, "learning_rate": 0.0010199079401611046, "loss": 0.4224, "step": 170340 }, { "epoch": 49.00747986191024, "grad_norm": 1.383453607559204, "learning_rate": 0.0010198504027617951, "loss": 0.6267, "step": 170350 }, { "epoch": 49.01035673187572, "grad_norm": 1.4852569103240967, "learning_rate": 0.0010197928653624857, "loss": 0.5186, "step": 170360 }, { "epoch": 49.01323360184119, "grad_norm": 2.7831714153289795, "learning_rate": 0.001019735327963176, "loss": 0.464, "step": 170370 }, { "epoch": 49.01611047180668, "grad_norm": 0.7684211134910583, "learning_rate": 0.0010196777905638666, "loss": 0.5168, "step": 170380 }, { "epoch": 49.01898734177215, "grad_norm": 1.3183525800704956, "learning_rate": 0.0010196202531645572, "loss": 0.6288, "step": 170390 }, { "epoch": 49.02186421173763, "grad_norm": 1.5790561437606812, "learning_rate": 0.0010195627157652473, "loss": 0.5243, "step": 170400 }, { "epoch": 49.024741081703105, "grad_norm": 1.3633732795715332, "learning_rate": 0.0010195051783659379, "loss": 0.4099, "step": 170410 }, { "epoch": 49.02761795166858, "grad_norm": 0.9729598760604858, "learning_rate": 0.0010194476409666282, "loss": 0.5697, "step": 170420 }, { "epoch": 49.030494821634065, "grad_norm": 0.9419822692871094, "learning_rate": 0.0010193901035673188, "loss": 0.5323, "step": 170430 }, { "epoch": 49.03337169159954, "grad_norm": 0.7825868129730225, "learning_rate": 0.0010193325661680094, "loss": 0.4809, "step": 170440 }, { "epoch": 49.03624856156502, "grad_norm": 1.1582145690917969, "learning_rate": 0.0010192750287686997, "loss": 0.5452, "step": 170450 }, { "epoch": 49.03912543153049, "grad_norm": 1.3982242345809937, "learning_rate": 0.00101921749136939, "loss": 0.4652, "step": 170460 }, { "epoch": 49.04200230149597, "grad_norm": 1.0289262533187866, "learning_rate": 0.0010191599539700806, "loss": 0.4579, "step": 170470 }, { "epoch": 49.04487917146145, "grad_norm": 1.057751178741455, "learning_rate": 0.001019102416570771, "loss": 0.4109, "step": 170480 }, { "epoch": 49.04775604142693, "grad_norm": 1.390123724937439, "learning_rate": 0.0010190448791714615, "loss": 0.5042, "step": 170490 }, { "epoch": 49.050632911392405, "grad_norm": 1.635097622871399, "learning_rate": 0.001018987341772152, "loss": 0.5163, "step": 170500 }, { "epoch": 49.05350978135788, "grad_norm": 0.7742230892181396, "learning_rate": 0.0010189298043728422, "loss": 0.5946, "step": 170510 }, { "epoch": 49.05638665132336, "grad_norm": 1.177836298942566, "learning_rate": 0.0010188722669735328, "loss": 0.5749, "step": 170520 }, { "epoch": 49.05926352128884, "grad_norm": 1.2612751722335815, "learning_rate": 0.0010188147295742231, "loss": 0.5719, "step": 170530 }, { "epoch": 49.06214039125432, "grad_norm": 1.1640663146972656, "learning_rate": 0.0010187571921749137, "loss": 0.5321, "step": 170540 }, { "epoch": 49.06501726121979, "grad_norm": 1.3572546243667603, "learning_rate": 0.0010186996547756043, "loss": 0.5778, "step": 170550 }, { "epoch": 49.06789413118527, "grad_norm": 1.7082000970840454, "learning_rate": 0.0010186421173762946, "loss": 0.542, "step": 170560 }, { "epoch": 49.070771001150746, "grad_norm": 1.3688675165176392, "learning_rate": 0.001018584579976985, "loss": 0.5968, "step": 170570 }, { "epoch": 49.07364787111622, "grad_norm": 1.176002860069275, "learning_rate": 0.0010185270425776755, "loss": 0.6783, "step": 170580 }, { "epoch": 49.076524741081705, "grad_norm": 0.9630182385444641, "learning_rate": 0.0010184695051783659, "loss": 0.4884, "step": 170590 }, { "epoch": 49.07940161104718, "grad_norm": 1.1913610696792603, "learning_rate": 0.0010184119677790564, "loss": 0.5017, "step": 170600 }, { "epoch": 49.08227848101266, "grad_norm": 1.285961389541626, "learning_rate": 0.001018354430379747, "loss": 0.6005, "step": 170610 }, { "epoch": 49.085155350978134, "grad_norm": 1.3794101476669312, "learning_rate": 0.0010182968929804373, "loss": 0.5726, "step": 170620 }, { "epoch": 49.08803222094361, "grad_norm": 1.224763035774231, "learning_rate": 0.0010182393555811277, "loss": 0.457, "step": 170630 }, { "epoch": 49.09090909090909, "grad_norm": 1.43500816822052, "learning_rate": 0.001018181818181818, "loss": 0.4869, "step": 170640 }, { "epoch": 49.09378596087457, "grad_norm": 2.3997273445129395, "learning_rate": 0.0010181242807825086, "loss": 0.5324, "step": 170650 }, { "epoch": 49.096662830840046, "grad_norm": 1.314712643623352, "learning_rate": 0.0010180667433831992, "loss": 0.3809, "step": 170660 }, { "epoch": 49.09953970080552, "grad_norm": 1.21785306930542, "learning_rate": 0.0010180092059838895, "loss": 0.6226, "step": 170670 }, { "epoch": 49.102416570771, "grad_norm": 1.0056029558181763, "learning_rate": 0.00101795166858458, "loss": 0.4933, "step": 170680 }, { "epoch": 49.10529344073648, "grad_norm": 1.212041974067688, "learning_rate": 0.0010178941311852704, "loss": 0.5527, "step": 170690 }, { "epoch": 49.10817031070196, "grad_norm": 0.9920755624771118, "learning_rate": 0.0010178365937859608, "loss": 0.3901, "step": 170700 }, { "epoch": 49.111047180667434, "grad_norm": 1.339898705482483, "learning_rate": 0.0010177790563866513, "loss": 0.4734, "step": 170710 }, { "epoch": 49.11392405063291, "grad_norm": 1.1597166061401367, "learning_rate": 0.001017721518987342, "loss": 0.4981, "step": 170720 }, { "epoch": 49.116800920598386, "grad_norm": 0.8971077799797058, "learning_rate": 0.0010176639815880322, "loss": 0.5546, "step": 170730 }, { "epoch": 49.11967779056387, "grad_norm": 1.1278904676437378, "learning_rate": 0.0010176064441887228, "loss": 0.5385, "step": 170740 }, { "epoch": 49.122554660529346, "grad_norm": 0.8751294016838074, "learning_rate": 0.001017548906789413, "loss": 0.3938, "step": 170750 }, { "epoch": 49.12543153049482, "grad_norm": 0.7817651033401489, "learning_rate": 0.0010174913693901035, "loss": 0.4534, "step": 170760 }, { "epoch": 49.1283084004603, "grad_norm": 1.5437978506088257, "learning_rate": 0.001017433831990794, "loss": 0.5004, "step": 170770 }, { "epoch": 49.131185270425775, "grad_norm": 1.4422907829284668, "learning_rate": 0.0010173762945914844, "loss": 0.5084, "step": 170780 }, { "epoch": 49.13406214039125, "grad_norm": 0.7916235327720642, "learning_rate": 0.001017318757192175, "loss": 0.4741, "step": 170790 }, { "epoch": 49.136939010356734, "grad_norm": 0.7672241926193237, "learning_rate": 0.0010172612197928655, "loss": 0.4806, "step": 170800 }, { "epoch": 49.13981588032221, "grad_norm": 1.2837995290756226, "learning_rate": 0.0010172036823935557, "loss": 0.5725, "step": 170810 }, { "epoch": 49.14269275028769, "grad_norm": 0.6913431286811829, "learning_rate": 0.0010171461449942462, "loss": 0.4503, "step": 170820 }, { "epoch": 49.14556962025316, "grad_norm": 0.8716894388198853, "learning_rate": 0.0010170886075949368, "loss": 0.5758, "step": 170830 }, { "epoch": 49.14844649021864, "grad_norm": 1.4722793102264404, "learning_rate": 0.0010170310701956271, "loss": 0.5407, "step": 170840 }, { "epoch": 49.15132336018412, "grad_norm": 0.9166285991668701, "learning_rate": 0.0010169735327963177, "loss": 0.5239, "step": 170850 }, { "epoch": 49.1542002301496, "grad_norm": 1.514585256576538, "learning_rate": 0.0010169159953970083, "loss": 0.4884, "step": 170860 }, { "epoch": 49.157077100115075, "grad_norm": 1.02142333984375, "learning_rate": 0.0010168584579976984, "loss": 0.5994, "step": 170870 }, { "epoch": 49.15995397008055, "grad_norm": 1.3418775796890259, "learning_rate": 0.001016800920598389, "loss": 0.6444, "step": 170880 }, { "epoch": 49.16283084004603, "grad_norm": 1.0561362504959106, "learning_rate": 0.0010167433831990793, "loss": 0.4438, "step": 170890 }, { "epoch": 49.16570771001151, "grad_norm": 1.407253623008728, "learning_rate": 0.0010166858457997699, "loss": 0.5049, "step": 170900 }, { "epoch": 49.16858457997699, "grad_norm": 0.9100568294525146, "learning_rate": 0.0010166283084004604, "loss": 0.6243, "step": 170910 }, { "epoch": 49.17146144994246, "grad_norm": 1.2661464214324951, "learning_rate": 0.0010165707710011508, "loss": 0.6192, "step": 170920 }, { "epoch": 49.17433831990794, "grad_norm": 1.144944429397583, "learning_rate": 0.0010165132336018411, "loss": 0.4601, "step": 170930 }, { "epoch": 49.177215189873415, "grad_norm": 0.8438548445701599, "learning_rate": 0.0010164556962025317, "loss": 0.4521, "step": 170940 }, { "epoch": 49.1800920598389, "grad_norm": 1.753544569015503, "learning_rate": 0.001016398158803222, "loss": 0.5096, "step": 170950 }, { "epoch": 49.182968929804375, "grad_norm": 2.166161060333252, "learning_rate": 0.0010163406214039126, "loss": 0.4851, "step": 170960 }, { "epoch": 49.18584579976985, "grad_norm": 1.0678972005844116, "learning_rate": 0.0010162830840046032, "loss": 0.5391, "step": 170970 }, { "epoch": 49.18872266973533, "grad_norm": 1.4883249998092651, "learning_rate": 0.0010162255466052935, "loss": 0.6187, "step": 170980 }, { "epoch": 49.1915995397008, "grad_norm": 1.0052913427352905, "learning_rate": 0.0010161680092059839, "loss": 0.43, "step": 170990 }, { "epoch": 49.19447640966629, "grad_norm": 1.1367876529693604, "learning_rate": 0.0010161104718066742, "loss": 0.4524, "step": 171000 }, { "epoch": 49.19735327963176, "grad_norm": 1.4287856817245483, "learning_rate": 0.0010160529344073648, "loss": 0.5719, "step": 171010 }, { "epoch": 49.20023014959724, "grad_norm": 0.6720683574676514, "learning_rate": 0.0010159953970080553, "loss": 0.4979, "step": 171020 }, { "epoch": 49.203107019562715, "grad_norm": 1.095935344696045, "learning_rate": 0.0010159378596087457, "loss": 0.5391, "step": 171030 }, { "epoch": 49.20598388952819, "grad_norm": 1.2087723016738892, "learning_rate": 0.0010158803222094363, "loss": 0.6538, "step": 171040 }, { "epoch": 49.20886075949367, "grad_norm": 0.8598353266716003, "learning_rate": 0.0010158227848101266, "loss": 0.5473, "step": 171050 }, { "epoch": 49.21173762945915, "grad_norm": 0.7111011743545532, "learning_rate": 0.001015765247410817, "loss": 0.5629, "step": 171060 }, { "epoch": 49.21461449942463, "grad_norm": 1.4315125942230225, "learning_rate": 0.0010157077100115075, "loss": 0.5764, "step": 171070 }, { "epoch": 49.2174913693901, "grad_norm": 1.0371325016021729, "learning_rate": 0.001015650172612198, "loss": 0.5569, "step": 171080 }, { "epoch": 49.22036823935558, "grad_norm": 1.078688383102417, "learning_rate": 0.0010155926352128884, "loss": 0.4502, "step": 171090 }, { "epoch": 49.223245109321056, "grad_norm": 1.6615455150604248, "learning_rate": 0.001015535097813579, "loss": 0.421, "step": 171100 }, { "epoch": 49.22612197928654, "grad_norm": 1.027543306350708, "learning_rate": 0.0010154775604142691, "loss": 0.5193, "step": 171110 }, { "epoch": 49.228998849252015, "grad_norm": 1.612642526626587, "learning_rate": 0.0010154200230149597, "loss": 0.4608, "step": 171120 }, { "epoch": 49.23187571921749, "grad_norm": 1.9363350868225098, "learning_rate": 0.0010153624856156502, "loss": 0.4725, "step": 171130 }, { "epoch": 49.23475258918297, "grad_norm": 0.9806938171386719, "learning_rate": 0.0010153049482163406, "loss": 0.5284, "step": 171140 }, { "epoch": 49.237629459148444, "grad_norm": 1.3774670362472534, "learning_rate": 0.0010152474108170312, "loss": 0.6918, "step": 171150 }, { "epoch": 49.24050632911393, "grad_norm": 1.75867760181427, "learning_rate": 0.0010151898734177217, "loss": 0.6042, "step": 171160 }, { "epoch": 49.2433831990794, "grad_norm": 1.510677695274353, "learning_rate": 0.0010151323360184118, "loss": 0.4867, "step": 171170 }, { "epoch": 49.24626006904488, "grad_norm": 1.3705112934112549, "learning_rate": 0.0010150747986191024, "loss": 0.5503, "step": 171180 }, { "epoch": 49.249136939010356, "grad_norm": 1.843839168548584, "learning_rate": 0.001015017261219793, "loss": 0.6408, "step": 171190 }, { "epoch": 49.25201380897583, "grad_norm": 0.7621862888336182, "learning_rate": 0.0010149597238204833, "loss": 0.5169, "step": 171200 }, { "epoch": 49.254890678941315, "grad_norm": 1.158260703086853, "learning_rate": 0.0010149021864211739, "loss": 0.5291, "step": 171210 }, { "epoch": 49.25776754890679, "grad_norm": 1.1066999435424805, "learning_rate": 0.0010148446490218642, "loss": 0.4885, "step": 171220 }, { "epoch": 49.26064441887227, "grad_norm": 0.8684391379356384, "learning_rate": 0.0010147871116225546, "loss": 0.4951, "step": 171230 }, { "epoch": 49.263521288837744, "grad_norm": 1.5337152481079102, "learning_rate": 0.0010147295742232451, "loss": 0.5227, "step": 171240 }, { "epoch": 49.26639815880322, "grad_norm": 1.3969272375106812, "learning_rate": 0.0010146720368239355, "loss": 0.5703, "step": 171250 }, { "epoch": 49.269275028768696, "grad_norm": 1.6455886363983154, "learning_rate": 0.001014614499424626, "loss": 0.628, "step": 171260 }, { "epoch": 49.27215189873418, "grad_norm": 1.1686159372329712, "learning_rate": 0.0010145569620253166, "loss": 0.6241, "step": 171270 }, { "epoch": 49.275028768699656, "grad_norm": 1.152407169342041, "learning_rate": 0.001014499424626007, "loss": 0.6347, "step": 171280 }, { "epoch": 49.27790563866513, "grad_norm": 2.0019822120666504, "learning_rate": 0.0010144418872266973, "loss": 0.5109, "step": 171290 }, { "epoch": 49.28078250863061, "grad_norm": 0.9365318417549133, "learning_rate": 0.0010143843498273879, "loss": 0.561, "step": 171300 }, { "epoch": 49.283659378596084, "grad_norm": 0.9386937022209167, "learning_rate": 0.0010143268124280782, "loss": 0.5163, "step": 171310 }, { "epoch": 49.28653624856157, "grad_norm": 1.8163598775863647, "learning_rate": 0.0010142692750287688, "loss": 0.6168, "step": 171320 }, { "epoch": 49.289413118527044, "grad_norm": 2.058258295059204, "learning_rate": 0.0010142117376294591, "loss": 0.4906, "step": 171330 }, { "epoch": 49.29228998849252, "grad_norm": 0.9007338881492615, "learning_rate": 0.0010141542002301495, "loss": 0.5515, "step": 171340 }, { "epoch": 49.295166858457996, "grad_norm": 1.3620250225067139, "learning_rate": 0.00101409666283084, "loss": 0.522, "step": 171350 }, { "epoch": 49.29804372842347, "grad_norm": 1.0759061574935913, "learning_rate": 0.0010140391254315304, "loss": 0.5079, "step": 171360 }, { "epoch": 49.300920598388956, "grad_norm": 1.4434088468551636, "learning_rate": 0.001013981588032221, "loss": 0.5766, "step": 171370 }, { "epoch": 49.30379746835443, "grad_norm": 0.968480110168457, "learning_rate": 0.0010139240506329115, "loss": 0.5161, "step": 171380 }, { "epoch": 49.30667433831991, "grad_norm": 1.245613932609558, "learning_rate": 0.0010138665132336019, "loss": 0.5766, "step": 171390 }, { "epoch": 49.309551208285384, "grad_norm": 1.066396951675415, "learning_rate": 0.0010138089758342922, "loss": 0.5803, "step": 171400 }, { "epoch": 49.31242807825086, "grad_norm": 2.001495838165283, "learning_rate": 0.0010137514384349828, "loss": 0.5869, "step": 171410 }, { "epoch": 49.315304948216344, "grad_norm": 1.341164469718933, "learning_rate": 0.0010136939010356731, "loss": 0.5615, "step": 171420 }, { "epoch": 49.31818181818182, "grad_norm": 1.6231915950775146, "learning_rate": 0.0010136363636363637, "loss": 0.6185, "step": 171430 }, { "epoch": 49.321058688147296, "grad_norm": 1.3801862001419067, "learning_rate": 0.001013578826237054, "loss": 0.4799, "step": 171440 }, { "epoch": 49.32393555811277, "grad_norm": 1.4954559803009033, "learning_rate": 0.0010135212888377446, "loss": 0.5215, "step": 171450 }, { "epoch": 49.32681242807825, "grad_norm": 0.7610265612602234, "learning_rate": 0.001013463751438435, "loss": 0.4054, "step": 171460 }, { "epoch": 49.329689298043725, "grad_norm": 0.9376672506332397, "learning_rate": 0.0010134062140391253, "loss": 0.5266, "step": 171470 }, { "epoch": 49.33256616800921, "grad_norm": 1.2664605379104614, "learning_rate": 0.0010133486766398159, "loss": 0.6276, "step": 171480 }, { "epoch": 49.335443037974684, "grad_norm": 0.8264333009719849, "learning_rate": 0.0010132911392405064, "loss": 0.4954, "step": 171490 }, { "epoch": 49.33831990794016, "grad_norm": 0.8353149890899658, "learning_rate": 0.0010132336018411968, "loss": 0.5308, "step": 171500 }, { "epoch": 49.34119677790564, "grad_norm": 2.179979085922241, "learning_rate": 0.0010131760644418873, "loss": 0.5746, "step": 171510 }, { "epoch": 49.34407364787111, "grad_norm": 1.7604238986968994, "learning_rate": 0.0010131185270425777, "loss": 0.4988, "step": 171520 }, { "epoch": 49.346950517836596, "grad_norm": 1.887803554534912, "learning_rate": 0.001013060989643268, "loss": 0.5398, "step": 171530 }, { "epoch": 49.34982738780207, "grad_norm": 1.6373578310012817, "learning_rate": 0.0010130034522439586, "loss": 0.5686, "step": 171540 }, { "epoch": 49.35270425776755, "grad_norm": 1.1914602518081665, "learning_rate": 0.0010129459148446492, "loss": 0.6794, "step": 171550 }, { "epoch": 49.355581127733025, "grad_norm": 0.8482685685157776, "learning_rate": 0.0010128883774453395, "loss": 0.4728, "step": 171560 }, { "epoch": 49.3584579976985, "grad_norm": 1.0355802774429321, "learning_rate": 0.00101283084004603, "loss": 0.5566, "step": 171570 }, { "epoch": 49.361334867663984, "grad_norm": 1.083052158355713, "learning_rate": 0.0010127733026467202, "loss": 0.5585, "step": 171580 }, { "epoch": 49.36421173762946, "grad_norm": 0.8050544261932373, "learning_rate": 0.0010127157652474108, "loss": 0.6158, "step": 171590 }, { "epoch": 49.36708860759494, "grad_norm": 1.524219036102295, "learning_rate": 0.0010126582278481013, "loss": 0.5691, "step": 171600 }, { "epoch": 49.36996547756041, "grad_norm": 2.2404844760894775, "learning_rate": 0.0010126006904487917, "loss": 0.5247, "step": 171610 }, { "epoch": 49.37284234752589, "grad_norm": 0.7864936590194702, "learning_rate": 0.0010125431530494822, "loss": 0.5384, "step": 171620 }, { "epoch": 49.37571921749137, "grad_norm": 1.8841298818588257, "learning_rate": 0.0010124856156501728, "loss": 0.5741, "step": 171630 }, { "epoch": 49.37859608745685, "grad_norm": 2.5519683361053467, "learning_rate": 0.001012428078250863, "loss": 0.6865, "step": 171640 }, { "epoch": 49.381472957422325, "grad_norm": 0.7821370363235474, "learning_rate": 0.0010123705408515535, "loss": 0.6828, "step": 171650 }, { "epoch": 49.3843498273878, "grad_norm": 2.7029523849487305, "learning_rate": 0.001012313003452244, "loss": 0.5017, "step": 171660 }, { "epoch": 49.38722669735328, "grad_norm": 2.292405366897583, "learning_rate": 0.0010122554660529344, "loss": 0.6214, "step": 171670 }, { "epoch": 49.39010356731876, "grad_norm": 1.2682193517684937, "learning_rate": 0.001012197928653625, "loss": 0.4795, "step": 171680 }, { "epoch": 49.39298043728424, "grad_norm": 1.534092903137207, "learning_rate": 0.0010121403912543153, "loss": 0.462, "step": 171690 }, { "epoch": 49.39585730724971, "grad_norm": 1.2427587509155273, "learning_rate": 0.0010120828538550057, "loss": 0.5792, "step": 171700 }, { "epoch": 49.39873417721519, "grad_norm": 2.856452226638794, "learning_rate": 0.0010120253164556962, "loss": 0.594, "step": 171710 }, { "epoch": 49.401611047180666, "grad_norm": 2.28474760055542, "learning_rate": 0.0010119677790563866, "loss": 0.4944, "step": 171720 }, { "epoch": 49.40448791714614, "grad_norm": 1.1067417860031128, "learning_rate": 0.0010119102416570771, "loss": 0.4641, "step": 171730 }, { "epoch": 49.407364787111625, "grad_norm": 0.9891190528869629, "learning_rate": 0.0010118527042577677, "loss": 0.613, "step": 171740 }, { "epoch": 49.4102416570771, "grad_norm": 1.0875906944274902, "learning_rate": 0.001011795166858458, "loss": 0.5289, "step": 171750 }, { "epoch": 49.41311852704258, "grad_norm": 1.1850030422210693, "learning_rate": 0.0010117376294591484, "loss": 0.537, "step": 171760 }, { "epoch": 49.415995397008054, "grad_norm": 1.2398425340652466, "learning_rate": 0.001011680092059839, "loss": 0.5352, "step": 171770 }, { "epoch": 49.41887226697353, "grad_norm": 1.8849899768829346, "learning_rate": 0.0010116225546605293, "loss": 0.6103, "step": 171780 }, { "epoch": 49.42174913693901, "grad_norm": 1.3349148035049438, "learning_rate": 0.0010115650172612199, "loss": 0.5505, "step": 171790 }, { "epoch": 49.42462600690449, "grad_norm": 1.3145123720169067, "learning_rate": 0.0010115074798619102, "loss": 0.5297, "step": 171800 }, { "epoch": 49.427502876869966, "grad_norm": 1.1481614112854004, "learning_rate": 0.0010114499424626008, "loss": 0.5429, "step": 171810 }, { "epoch": 49.43037974683544, "grad_norm": 0.7913581728935242, "learning_rate": 0.0010113924050632911, "loss": 0.6456, "step": 171820 }, { "epoch": 49.43325661680092, "grad_norm": 1.6528743505477905, "learning_rate": 0.0010113348676639815, "loss": 0.7211, "step": 171830 }, { "epoch": 49.4361334867664, "grad_norm": 1.2706142663955688, "learning_rate": 0.001011277330264672, "loss": 0.554, "step": 171840 }, { "epoch": 49.43901035673188, "grad_norm": 1.6054627895355225, "learning_rate": 0.0010112197928653626, "loss": 0.5119, "step": 171850 }, { "epoch": 49.441887226697354, "grad_norm": 1.246376872062683, "learning_rate": 0.001011162255466053, "loss": 0.5525, "step": 171860 }, { "epoch": 49.44476409666283, "grad_norm": 1.2171931266784668, "learning_rate": 0.0010111047180667435, "loss": 0.5482, "step": 171870 }, { "epoch": 49.447640966628306, "grad_norm": 1.5191422700881958, "learning_rate": 0.0010110471806674339, "loss": 0.7312, "step": 171880 }, { "epoch": 49.45051783659379, "grad_norm": 1.8497711420059204, "learning_rate": 0.0010109896432681242, "loss": 0.4666, "step": 171890 }, { "epoch": 49.453394706559266, "grad_norm": 0.7098370790481567, "learning_rate": 0.0010109321058688148, "loss": 0.5427, "step": 171900 }, { "epoch": 49.45627157652474, "grad_norm": 2.355323553085327, "learning_rate": 0.0010108745684695051, "loss": 0.6251, "step": 171910 }, { "epoch": 49.45914844649022, "grad_norm": 1.9007089138031006, "learning_rate": 0.0010108170310701957, "loss": 0.5506, "step": 171920 }, { "epoch": 49.462025316455694, "grad_norm": 0.9790036678314209, "learning_rate": 0.0010107594936708863, "loss": 0.454, "step": 171930 }, { "epoch": 49.46490218642117, "grad_norm": 1.611167311668396, "learning_rate": 0.0010107019562715764, "loss": 0.5524, "step": 171940 }, { "epoch": 49.467779056386654, "grad_norm": 1.5954055786132812, "learning_rate": 0.001010644418872267, "loss": 0.6122, "step": 171950 }, { "epoch": 49.47065592635213, "grad_norm": 1.3660277128219604, "learning_rate": 0.0010105868814729575, "loss": 0.5824, "step": 171960 }, { "epoch": 49.473532796317606, "grad_norm": 1.2249056100845337, "learning_rate": 0.0010105293440736479, "loss": 0.3655, "step": 171970 }, { "epoch": 49.47640966628308, "grad_norm": 2.0353994369506836, "learning_rate": 0.0010104718066743384, "loss": 0.4812, "step": 171980 }, { "epoch": 49.47928653624856, "grad_norm": 0.8066942691802979, "learning_rate": 0.001010414269275029, "loss": 0.5032, "step": 171990 }, { "epoch": 49.48216340621404, "grad_norm": 1.5237016677856445, "learning_rate": 0.0010103567318757191, "loss": 0.4932, "step": 172000 }, { "epoch": 49.48504027617952, "grad_norm": 2.0960946083068848, "learning_rate": 0.0010102991944764097, "loss": 0.5884, "step": 172010 }, { "epoch": 49.487917146144994, "grad_norm": 0.9801545143127441, "learning_rate": 0.0010102416570771, "loss": 0.5376, "step": 172020 }, { "epoch": 49.49079401611047, "grad_norm": 1.401435136795044, "learning_rate": 0.0010101841196777906, "loss": 0.5227, "step": 172030 }, { "epoch": 49.49367088607595, "grad_norm": 1.2028608322143555, "learning_rate": 0.0010101265822784812, "loss": 0.4974, "step": 172040 }, { "epoch": 49.49654775604143, "grad_norm": 0.8520664572715759, "learning_rate": 0.0010100690448791715, "loss": 0.6148, "step": 172050 }, { "epoch": 49.499424626006906, "grad_norm": 0.8910282254219055, "learning_rate": 0.0010100115074798618, "loss": 0.5807, "step": 172060 }, { "epoch": 49.50230149597238, "grad_norm": 1.3095017671585083, "learning_rate": 0.0010099539700805524, "loss": 0.5475, "step": 172070 }, { "epoch": 49.50517836593786, "grad_norm": 1.8246146440505981, "learning_rate": 0.0010098964326812428, "loss": 0.5946, "step": 172080 }, { "epoch": 49.508055235903335, "grad_norm": 1.1927883625030518, "learning_rate": 0.0010098388952819333, "loss": 0.5368, "step": 172090 }, { "epoch": 49.51093210586882, "grad_norm": 0.7312538027763367, "learning_rate": 0.0010097813578826239, "loss": 0.51, "step": 172100 }, { "epoch": 49.513808975834294, "grad_norm": 0.9854645729064941, "learning_rate": 0.0010097238204833142, "loss": 0.5449, "step": 172110 }, { "epoch": 49.51668584579977, "grad_norm": 1.7768354415893555, "learning_rate": 0.0010096662830840046, "loss": 0.5544, "step": 172120 }, { "epoch": 49.51956271576525, "grad_norm": 0.9735424518585205, "learning_rate": 0.001009608745684695, "loss": 0.556, "step": 172130 }, { "epoch": 49.52243958573072, "grad_norm": 1.5610216856002808, "learning_rate": 0.0010095512082853855, "loss": 0.5174, "step": 172140 }, { "epoch": 49.5253164556962, "grad_norm": 1.1450144052505493, "learning_rate": 0.001009493670886076, "loss": 0.7372, "step": 172150 }, { "epoch": 49.52819332566168, "grad_norm": 1.211328387260437, "learning_rate": 0.0010094361334867664, "loss": 0.4601, "step": 172160 }, { "epoch": 49.53107019562716, "grad_norm": 2.771470308303833, "learning_rate": 0.0010093785960874567, "loss": 0.7343, "step": 172170 }, { "epoch": 49.533947065592635, "grad_norm": 1.383192539215088, "learning_rate": 0.0010093210586881473, "loss": 0.5327, "step": 172180 }, { "epoch": 49.53682393555811, "grad_norm": 2.028733015060425, "learning_rate": 0.0010092635212888377, "loss": 0.6422, "step": 172190 }, { "epoch": 49.53970080552359, "grad_norm": 1.6185368299484253, "learning_rate": 0.0010092059838895282, "loss": 0.3878, "step": 172200 }, { "epoch": 49.54257767548907, "grad_norm": 0.9241930842399597, "learning_rate": 0.0010091484464902188, "loss": 0.4997, "step": 172210 }, { "epoch": 49.54545454545455, "grad_norm": 1.6657886505126953, "learning_rate": 0.0010090909090909091, "loss": 0.5662, "step": 172220 }, { "epoch": 49.54833141542002, "grad_norm": 1.4652680158615112, "learning_rate": 0.0010090333716915995, "loss": 0.6471, "step": 172230 }, { "epoch": 49.5512082853855, "grad_norm": 1.800241470336914, "learning_rate": 0.00100897583429229, "loss": 0.5437, "step": 172240 }, { "epoch": 49.554085155350975, "grad_norm": 1.8405572175979614, "learning_rate": 0.0010089182968929804, "loss": 0.5669, "step": 172250 }, { "epoch": 49.55696202531646, "grad_norm": 0.8527219295501709, "learning_rate": 0.001008860759493671, "loss": 0.634, "step": 172260 }, { "epoch": 49.559838895281935, "grad_norm": 1.2075188159942627, "learning_rate": 0.0010088032220943613, "loss": 0.5268, "step": 172270 }, { "epoch": 49.56271576524741, "grad_norm": 0.9960005283355713, "learning_rate": 0.0010087456846950519, "loss": 0.5096, "step": 172280 }, { "epoch": 49.56559263521289, "grad_norm": 0.7641738653182983, "learning_rate": 0.0010086881472957422, "loss": 0.4987, "step": 172290 }, { "epoch": 49.56846950517836, "grad_norm": 1.3807897567749023, "learning_rate": 0.0010086306098964326, "loss": 0.5102, "step": 172300 }, { "epoch": 49.57134637514385, "grad_norm": 1.044127106666565, "learning_rate": 0.0010085730724971231, "loss": 0.6047, "step": 172310 }, { "epoch": 49.57422324510932, "grad_norm": 1.7195160388946533, "learning_rate": 0.0010085155350978137, "loss": 0.6586, "step": 172320 }, { "epoch": 49.5771001150748, "grad_norm": 1.3102003335952759, "learning_rate": 0.001008457997698504, "loss": 0.4941, "step": 172330 }, { "epoch": 49.579976985040275, "grad_norm": 2.7475669384002686, "learning_rate": 0.0010084004602991946, "loss": 0.7358, "step": 172340 }, { "epoch": 49.58285385500575, "grad_norm": 0.7943110466003418, "learning_rate": 0.001008342922899885, "loss": 0.4925, "step": 172350 }, { "epoch": 49.58573072497123, "grad_norm": 0.9858769178390503, "learning_rate": 0.0010082853855005753, "loss": 0.5232, "step": 172360 }, { "epoch": 49.58860759493671, "grad_norm": 1.432909607887268, "learning_rate": 0.0010082278481012659, "loss": 0.676, "step": 172370 }, { "epoch": 49.59148446490219, "grad_norm": 2.3292248249053955, "learning_rate": 0.0010081703107019562, "loss": 0.6409, "step": 172380 }, { "epoch": 49.59436133486766, "grad_norm": 1.151596188545227, "learning_rate": 0.0010081127733026468, "loss": 0.4299, "step": 172390 }, { "epoch": 49.59723820483314, "grad_norm": 2.5862841606140137, "learning_rate": 0.0010080552359033373, "loss": 0.6367, "step": 172400 }, { "epoch": 49.600115074798616, "grad_norm": 1.2479562759399414, "learning_rate": 0.0010079976985040275, "loss": 0.562, "step": 172410 }, { "epoch": 49.6029919447641, "grad_norm": 1.2765849828720093, "learning_rate": 0.001007940161104718, "loss": 0.5333, "step": 172420 }, { "epoch": 49.605868814729575, "grad_norm": 1.3182380199432373, "learning_rate": 0.0010078826237054086, "loss": 0.5253, "step": 172430 }, { "epoch": 49.60874568469505, "grad_norm": 2.4928059577941895, "learning_rate": 0.001007825086306099, "loss": 0.596, "step": 172440 }, { "epoch": 49.61162255466053, "grad_norm": 1.227715015411377, "learning_rate": 0.0010077675489067895, "loss": 0.6494, "step": 172450 }, { "epoch": 49.614499424626004, "grad_norm": 1.0894775390625, "learning_rate": 0.00100771001150748, "loss": 0.5484, "step": 172460 }, { "epoch": 49.61737629459149, "grad_norm": 1.0865073204040527, "learning_rate": 0.0010076524741081702, "loss": 0.507, "step": 172470 }, { "epoch": 49.620253164556964, "grad_norm": 0.8206126689910889, "learning_rate": 0.0010075949367088608, "loss": 0.6087, "step": 172480 }, { "epoch": 49.62313003452244, "grad_norm": 0.7223386168479919, "learning_rate": 0.0010075373993095511, "loss": 0.6555, "step": 172490 }, { "epoch": 49.626006904487916, "grad_norm": 1.3716882467269897, "learning_rate": 0.0010074798619102417, "loss": 0.5859, "step": 172500 }, { "epoch": 49.62888377445339, "grad_norm": 1.6566894054412842, "learning_rate": 0.0010074223245109322, "loss": 0.4989, "step": 172510 }, { "epoch": 49.631760644418875, "grad_norm": 1.194490909576416, "learning_rate": 0.0010073647871116226, "loss": 0.4871, "step": 172520 }, { "epoch": 49.63463751438435, "grad_norm": 0.7273660898208618, "learning_rate": 0.001007307249712313, "loss": 0.5248, "step": 172530 }, { "epoch": 49.63751438434983, "grad_norm": 1.6181445121765137, "learning_rate": 0.0010072497123130035, "loss": 0.7398, "step": 172540 }, { "epoch": 49.640391254315304, "grad_norm": 1.6454411745071411, "learning_rate": 0.0010071921749136938, "loss": 0.6106, "step": 172550 }, { "epoch": 49.64326812428078, "grad_norm": 1.3246346712112427, "learning_rate": 0.0010071346375143844, "loss": 0.5972, "step": 172560 }, { "epoch": 49.64614499424626, "grad_norm": 0.9317622780799866, "learning_rate": 0.001007077100115075, "loss": 0.5832, "step": 172570 }, { "epoch": 49.64902186421174, "grad_norm": 1.7424392700195312, "learning_rate": 0.0010070195627157653, "loss": 0.6215, "step": 172580 }, { "epoch": 49.651898734177216, "grad_norm": 2.441154956817627, "learning_rate": 0.0010069620253164557, "loss": 0.6056, "step": 172590 }, { "epoch": 49.65477560414269, "grad_norm": 1.249510645866394, "learning_rate": 0.001006904487917146, "loss": 0.4199, "step": 172600 }, { "epoch": 49.65765247410817, "grad_norm": 1.0226401090621948, "learning_rate": 0.0010068469505178366, "loss": 0.4044, "step": 172610 }, { "epoch": 49.660529344073645, "grad_norm": 0.9374539852142334, "learning_rate": 0.0010067894131185271, "loss": 0.5202, "step": 172620 }, { "epoch": 49.66340621403913, "grad_norm": 1.2235509157180786, "learning_rate": 0.0010067318757192175, "loss": 0.508, "step": 172630 }, { "epoch": 49.666283084004604, "grad_norm": 1.5213301181793213, "learning_rate": 0.001006674338319908, "loss": 0.5443, "step": 172640 }, { "epoch": 49.66915995397008, "grad_norm": 1.2415635585784912, "learning_rate": 0.0010066168009205984, "loss": 0.5965, "step": 172650 }, { "epoch": 49.67203682393556, "grad_norm": 1.4301735162734985, "learning_rate": 0.0010065592635212887, "loss": 0.532, "step": 172660 }, { "epoch": 49.67491369390103, "grad_norm": 0.8769571185112, "learning_rate": 0.0010065017261219793, "loss": 0.4786, "step": 172670 }, { "epoch": 49.677790563866516, "grad_norm": 1.1601601839065552, "learning_rate": 0.0010064441887226699, "loss": 0.5043, "step": 172680 }, { "epoch": 49.68066743383199, "grad_norm": 2.1309256553649902, "learning_rate": 0.0010063866513233602, "loss": 0.6371, "step": 172690 }, { "epoch": 49.68354430379747, "grad_norm": 1.5936191082000732, "learning_rate": 0.0010063291139240508, "loss": 0.5204, "step": 172700 }, { "epoch": 49.686421173762945, "grad_norm": 0.8604790568351746, "learning_rate": 0.001006271576524741, "loss": 0.6, "step": 172710 }, { "epoch": 49.68929804372842, "grad_norm": 1.2831106185913086, "learning_rate": 0.0010062140391254315, "loss": 0.599, "step": 172720 }, { "epoch": 49.692174913693904, "grad_norm": 1.7690513134002686, "learning_rate": 0.001006156501726122, "loss": 0.4966, "step": 172730 }, { "epoch": 49.69505178365938, "grad_norm": 1.5903488397598267, "learning_rate": 0.0010060989643268124, "loss": 0.5682, "step": 172740 }, { "epoch": 49.69792865362486, "grad_norm": 1.3976424932479858, "learning_rate": 0.001006041426927503, "loss": 0.5311, "step": 172750 }, { "epoch": 49.70080552359033, "grad_norm": 0.937942385673523, "learning_rate": 0.0010059838895281935, "loss": 0.5089, "step": 172760 }, { "epoch": 49.70368239355581, "grad_norm": 0.9128726720809937, "learning_rate": 0.0010059263521288836, "loss": 0.5673, "step": 172770 }, { "epoch": 49.70655926352129, "grad_norm": 1.5918289422988892, "learning_rate": 0.0010058688147295742, "loss": 0.6997, "step": 172780 }, { "epoch": 49.70943613348677, "grad_norm": 1.755370020866394, "learning_rate": 0.0010058112773302648, "loss": 0.5501, "step": 172790 }, { "epoch": 49.712313003452245, "grad_norm": 1.5183262825012207, "learning_rate": 0.0010057537399309551, "loss": 0.4756, "step": 172800 }, { "epoch": 49.71518987341772, "grad_norm": 1.2505083084106445, "learning_rate": 0.0010056962025316457, "loss": 0.5646, "step": 172810 }, { "epoch": 49.7180667433832, "grad_norm": 1.3257434368133545, "learning_rate": 0.0010056386651323362, "loss": 0.4546, "step": 172820 }, { "epoch": 49.72094361334867, "grad_norm": 1.480539083480835, "learning_rate": 0.0010055811277330264, "loss": 0.6392, "step": 172830 }, { "epoch": 49.72382048331416, "grad_norm": 0.9184191226959229, "learning_rate": 0.001005523590333717, "loss": 0.6528, "step": 172840 }, { "epoch": 49.72669735327963, "grad_norm": 1.3714746236801147, "learning_rate": 0.0010054660529344073, "loss": 0.567, "step": 172850 }, { "epoch": 49.72957422324511, "grad_norm": 1.7979398965835571, "learning_rate": 0.0010054085155350979, "loss": 0.5278, "step": 172860 }, { "epoch": 49.732451093210585, "grad_norm": 0.8691368103027344, "learning_rate": 0.0010053509781357884, "loss": 0.4729, "step": 172870 }, { "epoch": 49.73532796317606, "grad_norm": 1.3299483060836792, "learning_rate": 0.0010052934407364788, "loss": 0.561, "step": 172880 }, { "epoch": 49.738204833141545, "grad_norm": 1.4487234354019165, "learning_rate": 0.0010052359033371691, "loss": 0.7405, "step": 172890 }, { "epoch": 49.74108170310702, "grad_norm": 2.5447113513946533, "learning_rate": 0.0010051783659378597, "loss": 0.5428, "step": 172900 }, { "epoch": 49.7439585730725, "grad_norm": 1.2912811040878296, "learning_rate": 0.00100512082853855, "loss": 0.5027, "step": 172910 }, { "epoch": 49.74683544303797, "grad_norm": 0.8081824779510498, "learning_rate": 0.0010050632911392406, "loss": 0.456, "step": 172920 }, { "epoch": 49.74971231300345, "grad_norm": 1.1944420337677002, "learning_rate": 0.0010050057537399312, "loss": 0.6423, "step": 172930 }, { "epoch": 49.75258918296893, "grad_norm": 1.1284081935882568, "learning_rate": 0.0010049482163406215, "loss": 0.4583, "step": 172940 }, { "epoch": 49.75546605293441, "grad_norm": 1.7974764108657837, "learning_rate": 0.0010048906789413118, "loss": 0.523, "step": 172950 }, { "epoch": 49.758342922899885, "grad_norm": 1.1558884382247925, "learning_rate": 0.0010048331415420022, "loss": 0.5125, "step": 172960 }, { "epoch": 49.76121979286536, "grad_norm": 1.314373254776001, "learning_rate": 0.0010047756041426928, "loss": 0.5191, "step": 172970 }, { "epoch": 49.76409666283084, "grad_norm": 1.2280887365341187, "learning_rate": 0.0010047180667433833, "loss": 0.6178, "step": 172980 }, { "epoch": 49.76697353279632, "grad_norm": 1.159813404083252, "learning_rate": 0.0010046605293440737, "loss": 0.4567, "step": 172990 }, { "epoch": 49.7698504027618, "grad_norm": 1.4314687252044678, "learning_rate": 0.001004602991944764, "loss": 0.6498, "step": 173000 }, { "epoch": 49.77272727272727, "grad_norm": 1.2026934623718262, "learning_rate": 0.0010045454545454546, "loss": 0.4625, "step": 173010 }, { "epoch": 49.77560414269275, "grad_norm": 0.7997050285339355, "learning_rate": 0.001004487917146145, "loss": 0.5196, "step": 173020 }, { "epoch": 49.778481012658226, "grad_norm": 0.7651547789573669, "learning_rate": 0.0010044303797468355, "loss": 0.4987, "step": 173030 }, { "epoch": 49.7813578826237, "grad_norm": 2.0369510650634766, "learning_rate": 0.001004372842347526, "loss": 0.6351, "step": 173040 }, { "epoch": 49.784234752589185, "grad_norm": 1.9089165925979614, "learning_rate": 0.0010043153049482164, "loss": 0.717, "step": 173050 }, { "epoch": 49.78711162255466, "grad_norm": 1.4725286960601807, "learning_rate": 0.0010042577675489067, "loss": 0.5532, "step": 173060 }, { "epoch": 49.78998849252014, "grad_norm": 1.1903842687606812, "learning_rate": 0.001004200230149597, "loss": 0.4765, "step": 173070 }, { "epoch": 49.792865362485614, "grad_norm": 1.013066053390503, "learning_rate": 0.0010041426927502877, "loss": 0.4417, "step": 173080 }, { "epoch": 49.79574223245109, "grad_norm": 2.026740074157715, "learning_rate": 0.0010040851553509782, "loss": 0.6641, "step": 173090 }, { "epoch": 49.79861910241657, "grad_norm": 1.646484613418579, "learning_rate": 0.0010040276179516686, "loss": 0.4838, "step": 173100 }, { "epoch": 49.80149597238205, "grad_norm": 1.5547082424163818, "learning_rate": 0.0010039700805523591, "loss": 0.5632, "step": 173110 }, { "epoch": 49.804372842347526, "grad_norm": 0.9125845432281494, "learning_rate": 0.0010039125431530495, "loss": 0.4354, "step": 173120 }, { "epoch": 49.807249712313, "grad_norm": 0.9539616703987122, "learning_rate": 0.0010038550057537398, "loss": 0.5688, "step": 173130 }, { "epoch": 49.81012658227848, "grad_norm": 0.8280944228172302, "learning_rate": 0.0010037974683544304, "loss": 0.4511, "step": 173140 }, { "epoch": 49.81300345224396, "grad_norm": 1.045152187347412, "learning_rate": 0.001003739930955121, "loss": 0.6145, "step": 173150 }, { "epoch": 49.81588032220944, "grad_norm": 1.4138644933700562, "learning_rate": 0.0010036823935558113, "loss": 0.5797, "step": 173160 }, { "epoch": 49.818757192174914, "grad_norm": 1.4445104598999023, "learning_rate": 0.0010036248561565019, "loss": 0.6225, "step": 173170 }, { "epoch": 49.82163406214039, "grad_norm": 0.9068413376808167, "learning_rate": 0.001003567318757192, "loss": 0.538, "step": 173180 }, { "epoch": 49.824510932105866, "grad_norm": 1.1786688566207886, "learning_rate": 0.0010035097813578826, "loss": 0.4944, "step": 173190 }, { "epoch": 49.82738780207135, "grad_norm": 0.8186862468719482, "learning_rate": 0.0010034522439585731, "loss": 0.561, "step": 173200 }, { "epoch": 49.830264672036826, "grad_norm": 0.8609384298324585, "learning_rate": 0.0010033947065592635, "loss": 0.745, "step": 173210 }, { "epoch": 49.8331415420023, "grad_norm": 0.9390416145324707, "learning_rate": 0.001003337169159954, "loss": 0.6588, "step": 173220 }, { "epoch": 49.83601841196778, "grad_norm": 1.0697541236877441, "learning_rate": 0.0010032796317606446, "loss": 0.4665, "step": 173230 }, { "epoch": 49.838895281933254, "grad_norm": 0.9427742958068848, "learning_rate": 0.0010032220943613347, "loss": 0.5054, "step": 173240 }, { "epoch": 49.84177215189874, "grad_norm": 1.4200115203857422, "learning_rate": 0.0010031645569620253, "loss": 0.5011, "step": 173250 }, { "epoch": 49.844649021864214, "grad_norm": 1.2653058767318726, "learning_rate": 0.0010031070195627159, "loss": 0.7585, "step": 173260 }, { "epoch": 49.84752589182969, "grad_norm": 0.7219619154930115, "learning_rate": 0.0010030494821634062, "loss": 0.6193, "step": 173270 }, { "epoch": 49.850402761795166, "grad_norm": 1.6754300594329834, "learning_rate": 0.0010029919447640968, "loss": 0.5994, "step": 173280 }, { "epoch": 49.85327963176064, "grad_norm": 0.9200731515884399, "learning_rate": 0.0010029344073647871, "loss": 0.5924, "step": 173290 }, { "epoch": 49.85615650172612, "grad_norm": 1.5809333324432373, "learning_rate": 0.0010028768699654775, "loss": 0.5133, "step": 173300 }, { "epoch": 49.8590333716916, "grad_norm": 1.239166021347046, "learning_rate": 0.001002819332566168, "loss": 0.6347, "step": 173310 }, { "epoch": 49.86191024165708, "grad_norm": 1.0644128322601318, "learning_rate": 0.0010027617951668584, "loss": 0.4668, "step": 173320 }, { "epoch": 49.864787111622555, "grad_norm": 0.9325580596923828, "learning_rate": 0.001002704257767549, "loss": 0.5371, "step": 173330 }, { "epoch": 49.86766398158803, "grad_norm": 1.6418476104736328, "learning_rate": 0.0010026467203682395, "loss": 0.694, "step": 173340 }, { "epoch": 49.87054085155351, "grad_norm": 0.6359697580337524, "learning_rate": 0.0010025891829689298, "loss": 0.5106, "step": 173350 }, { "epoch": 49.87341772151899, "grad_norm": 2.064980983734131, "learning_rate": 0.0010025316455696202, "loss": 0.5551, "step": 173360 }, { "epoch": 49.876294591484466, "grad_norm": 0.6238666772842407, "learning_rate": 0.0010024741081703108, "loss": 0.4856, "step": 173370 }, { "epoch": 49.87917146144994, "grad_norm": 1.2550239562988281, "learning_rate": 0.001002416570771001, "loss": 0.4822, "step": 173380 }, { "epoch": 49.88204833141542, "grad_norm": 1.5665289163589478, "learning_rate": 0.0010023590333716917, "loss": 0.4785, "step": 173390 }, { "epoch": 49.884925201380895, "grad_norm": 1.0785893201828003, "learning_rate": 0.001002301495972382, "loss": 0.4583, "step": 173400 }, { "epoch": 49.88780207134638, "grad_norm": 2.2472598552703857, "learning_rate": 0.0010022439585730726, "loss": 0.5903, "step": 173410 }, { "epoch": 49.890678941311855, "grad_norm": 1.842564344406128, "learning_rate": 0.001002186421173763, "loss": 0.5741, "step": 173420 }, { "epoch": 49.89355581127733, "grad_norm": 1.344527006149292, "learning_rate": 0.0010021288837744533, "loss": 0.6086, "step": 173430 }, { "epoch": 49.89643268124281, "grad_norm": 3.037713050842285, "learning_rate": 0.0010020713463751438, "loss": 0.6655, "step": 173440 }, { "epoch": 49.89930955120828, "grad_norm": 0.9079721570014954, "learning_rate": 0.0010020138089758344, "loss": 0.5674, "step": 173450 }, { "epoch": 49.90218642117377, "grad_norm": 1.0670095682144165, "learning_rate": 0.0010019562715765248, "loss": 0.5958, "step": 173460 }, { "epoch": 49.90506329113924, "grad_norm": 1.1362874507904053, "learning_rate": 0.0010018987341772153, "loss": 0.5271, "step": 173470 }, { "epoch": 49.90794016110472, "grad_norm": 0.9262005090713501, "learning_rate": 0.0010018411967779057, "loss": 0.5697, "step": 173480 }, { "epoch": 49.910817031070195, "grad_norm": 1.0283440351486206, "learning_rate": 0.001001783659378596, "loss": 0.5303, "step": 173490 }, { "epoch": 49.91369390103567, "grad_norm": 1.9915680885314941, "learning_rate": 0.0010017261219792866, "loss": 0.6464, "step": 173500 }, { "epoch": 49.91657077100115, "grad_norm": 1.246832013130188, "learning_rate": 0.0010016685845799771, "loss": 0.4928, "step": 173510 }, { "epoch": 49.91944764096663, "grad_norm": 0.9117889404296875, "learning_rate": 0.0010016110471806675, "loss": 0.3788, "step": 173520 }, { "epoch": 49.92232451093211, "grad_norm": 1.9555667638778687, "learning_rate": 0.001001553509781358, "loss": 0.652, "step": 173530 }, { "epoch": 49.92520138089758, "grad_norm": 2.8805723190307617, "learning_rate": 0.0010014959723820482, "loss": 0.8495, "step": 173540 }, { "epoch": 49.92807825086306, "grad_norm": 0.9930192828178406, "learning_rate": 0.0010014384349827387, "loss": 0.5519, "step": 173550 }, { "epoch": 49.930955120828536, "grad_norm": 1.8988215923309326, "learning_rate": 0.0010013808975834293, "loss": 0.7484, "step": 173560 }, { "epoch": 49.93383199079402, "grad_norm": 0.741253137588501, "learning_rate": 0.0010013233601841197, "loss": 0.4834, "step": 173570 }, { "epoch": 49.936708860759495, "grad_norm": 1.824171543121338, "learning_rate": 0.0010012658227848102, "loss": 0.5905, "step": 173580 }, { "epoch": 49.93958573072497, "grad_norm": 1.0166993141174316, "learning_rate": 0.0010012082853855008, "loss": 0.6229, "step": 173590 }, { "epoch": 49.94246260069045, "grad_norm": 1.345605492591858, "learning_rate": 0.001001150747986191, "loss": 0.4575, "step": 173600 }, { "epoch": 49.945339470655924, "grad_norm": 2.1301510334014893, "learning_rate": 0.0010010932105868815, "loss": 0.5126, "step": 173610 }, { "epoch": 49.94821634062141, "grad_norm": 1.1499663591384888, "learning_rate": 0.001001035673187572, "loss": 0.6974, "step": 173620 }, { "epoch": 49.95109321058688, "grad_norm": 1.3246089220046997, "learning_rate": 0.0010009781357882624, "loss": 0.6237, "step": 173630 }, { "epoch": 49.95397008055236, "grad_norm": 0.6345042586326599, "learning_rate": 0.001000920598388953, "loss": 0.5151, "step": 173640 }, { "epoch": 49.956846950517836, "grad_norm": 1.2463085651397705, "learning_rate": 0.0010008630609896433, "loss": 0.5267, "step": 173650 }, { "epoch": 49.95972382048331, "grad_norm": 1.4190900325775146, "learning_rate": 0.0010008055235903336, "loss": 0.4734, "step": 173660 }, { "epoch": 49.962600690448795, "grad_norm": 0.7762610912322998, "learning_rate": 0.0010007479861910242, "loss": 0.4348, "step": 173670 }, { "epoch": 49.96547756041427, "grad_norm": 0.769498884677887, "learning_rate": 0.0010006904487917146, "loss": 0.6249, "step": 173680 }, { "epoch": 49.96835443037975, "grad_norm": 1.4549593925476074, "learning_rate": 0.0010006329113924051, "loss": 0.5942, "step": 173690 }, { "epoch": 49.971231300345224, "grad_norm": 1.4553701877593994, "learning_rate": 0.0010005753739930957, "loss": 0.5721, "step": 173700 }, { "epoch": 49.9741081703107, "grad_norm": 1.8040238618850708, "learning_rate": 0.001000517836593786, "loss": 0.5642, "step": 173710 }, { "epoch": 49.976985040276176, "grad_norm": 1.453660488128662, "learning_rate": 0.0010004602991944764, "loss": 0.5032, "step": 173720 }, { "epoch": 49.97986191024166, "grad_norm": 1.474408507347107, "learning_rate": 0.001000402761795167, "loss": 0.4947, "step": 173730 }, { "epoch": 49.982738780207136, "grad_norm": 1.127488613128662, "learning_rate": 0.0010003452243958573, "loss": 0.7502, "step": 173740 }, { "epoch": 49.98561565017261, "grad_norm": 1.174673318862915, "learning_rate": 0.0010002876869965479, "loss": 0.6387, "step": 173750 }, { "epoch": 49.98849252013809, "grad_norm": 2.4750382900238037, "learning_rate": 0.0010002301495972382, "loss": 0.5308, "step": 173760 }, { "epoch": 49.991369390103564, "grad_norm": 1.3120843172073364, "learning_rate": 0.0010001726121979288, "loss": 0.7682, "step": 173770 }, { "epoch": 49.99424626006905, "grad_norm": 1.6433837413787842, "learning_rate": 0.0010001150747986191, "loss": 0.5218, "step": 173780 }, { "epoch": 49.997123130034524, "grad_norm": 0.6486939787864685, "learning_rate": 0.0010000575373993095, "loss": 0.5741, "step": 173790 }, { "epoch": 50.0, "grad_norm": 1.1351324319839478, "learning_rate": 0.001, "loss": 0.4931, "step": 173800 }, { "epoch": 50.002876869965476, "grad_norm": 1.291408658027649, "learning_rate": 0.0009999424626006904, "loss": 0.5697, "step": 173810 }, { "epoch": 50.00575373993095, "grad_norm": 1.8036733865737915, "learning_rate": 0.000999884925201381, "loss": 0.5584, "step": 173820 }, { "epoch": 50.008630609896436, "grad_norm": 1.169643759727478, "learning_rate": 0.0009998273878020713, "loss": 0.4726, "step": 173830 }, { "epoch": 50.01150747986191, "grad_norm": 1.8618781566619873, "learning_rate": 0.0009997698504027618, "loss": 0.5936, "step": 173840 }, { "epoch": 50.01438434982739, "grad_norm": 0.7724915146827698, "learning_rate": 0.0009997123130034522, "loss": 0.4876, "step": 173850 }, { "epoch": 50.017261219792864, "grad_norm": 0.8818336129188538, "learning_rate": 0.0009996547756041428, "loss": 0.3696, "step": 173860 }, { "epoch": 50.02013808975834, "grad_norm": 1.5646843910217285, "learning_rate": 0.000999597238204833, "loss": 0.5657, "step": 173870 }, { "epoch": 50.023014959723824, "grad_norm": 1.049551248550415, "learning_rate": 0.0009995397008055237, "loss": 0.473, "step": 173880 }, { "epoch": 50.0258918296893, "grad_norm": 1.059411883354187, "learning_rate": 0.000999482163406214, "loss": 0.4881, "step": 173890 }, { "epoch": 50.028768699654776, "grad_norm": 1.7983548641204834, "learning_rate": 0.0009994246260069046, "loss": 0.6167, "step": 173900 }, { "epoch": 50.03164556962025, "grad_norm": 1.0157183408737183, "learning_rate": 0.000999367088607595, "loss": 0.4737, "step": 173910 }, { "epoch": 50.03452243958573, "grad_norm": 2.9045753479003906, "learning_rate": 0.0009993095512082853, "loss": 0.5444, "step": 173920 }, { "epoch": 50.037399309551205, "grad_norm": 1.31008780002594, "learning_rate": 0.0009992520138089758, "loss": 0.5339, "step": 173930 }, { "epoch": 50.04027617951669, "grad_norm": 1.105494737625122, "learning_rate": 0.0009991944764096664, "loss": 0.4578, "step": 173940 }, { "epoch": 50.043153049482164, "grad_norm": 1.0195273160934448, "learning_rate": 0.0009991369390103567, "loss": 0.4217, "step": 173950 }, { "epoch": 50.04602991944764, "grad_norm": 1.6528136730194092, "learning_rate": 0.000999079401611047, "loss": 0.4039, "step": 173960 }, { "epoch": 50.04890678941312, "grad_norm": 0.9568613767623901, "learning_rate": 0.0009990218642117377, "loss": 0.392, "step": 173970 }, { "epoch": 50.05178365937859, "grad_norm": 1.5223671197891235, "learning_rate": 0.000998964326812428, "loss": 0.5443, "step": 173980 }, { "epoch": 50.054660529344076, "grad_norm": 1.0316858291625977, "learning_rate": 0.0009989067894131186, "loss": 0.4733, "step": 173990 }, { "epoch": 50.05753739930955, "grad_norm": 1.226666808128357, "learning_rate": 0.0009988492520138091, "loss": 0.5428, "step": 174000 }, { "epoch": 50.06041426927503, "grad_norm": 0.9874235391616821, "learning_rate": 0.0009987917146144995, "loss": 0.4394, "step": 174010 }, { "epoch": 50.063291139240505, "grad_norm": 0.935820460319519, "learning_rate": 0.0009987341772151898, "loss": 0.5404, "step": 174020 }, { "epoch": 50.06616800920598, "grad_norm": 1.2135696411132812, "learning_rate": 0.0009986766398158804, "loss": 0.5284, "step": 174030 }, { "epoch": 50.069044879171464, "grad_norm": 1.0251209735870361, "learning_rate": 0.0009986191024165707, "loss": 0.5045, "step": 174040 }, { "epoch": 50.07192174913694, "grad_norm": 1.3775513172149658, "learning_rate": 0.0009985615650172613, "loss": 0.5171, "step": 174050 }, { "epoch": 50.07479861910242, "grad_norm": 0.6331904530525208, "learning_rate": 0.0009985040276179516, "loss": 0.5225, "step": 174060 }, { "epoch": 50.07767548906789, "grad_norm": 1.345171332359314, "learning_rate": 0.000998446490218642, "loss": 0.5847, "step": 174070 }, { "epoch": 50.08055235903337, "grad_norm": 1.6370078325271606, "learning_rate": 0.0009983889528193326, "loss": 0.4521, "step": 174080 }, { "epoch": 50.08342922899885, "grad_norm": 1.3088992834091187, "learning_rate": 0.0009983314154200231, "loss": 0.5851, "step": 174090 }, { "epoch": 50.08630609896433, "grad_norm": 1.8270219564437866, "learning_rate": 0.0009982738780207135, "loss": 0.5644, "step": 174100 }, { "epoch": 50.089182968929805, "grad_norm": 1.0880136489868164, "learning_rate": 0.000998216340621404, "loss": 0.5318, "step": 174110 }, { "epoch": 50.09205983889528, "grad_norm": 1.5495836734771729, "learning_rate": 0.0009981588032220944, "loss": 0.6081, "step": 174120 }, { "epoch": 50.09493670886076, "grad_norm": 1.1783027648925781, "learning_rate": 0.0009981012658227847, "loss": 0.4245, "step": 174130 }, { "epoch": 50.09781357882623, "grad_norm": 1.6195372343063354, "learning_rate": 0.0009980437284234753, "loss": 0.5295, "step": 174140 }, { "epoch": 50.10069044879172, "grad_norm": 0.6952713131904602, "learning_rate": 0.0009979861910241659, "loss": 0.5607, "step": 174150 }, { "epoch": 50.10356731875719, "grad_norm": 0.9415850043296814, "learning_rate": 0.0009979286536248562, "loss": 0.4456, "step": 174160 }, { "epoch": 50.10644418872267, "grad_norm": 1.132291555404663, "learning_rate": 0.0009978711162255465, "loss": 0.5524, "step": 174170 }, { "epoch": 50.109321058688145, "grad_norm": 1.0520613193511963, "learning_rate": 0.0009978135788262371, "loss": 0.4461, "step": 174180 }, { "epoch": 50.11219792865362, "grad_norm": 1.0835074186325073, "learning_rate": 0.0009977560414269275, "loss": 0.5099, "step": 174190 }, { "epoch": 50.115074798619105, "grad_norm": 1.6976991891860962, "learning_rate": 0.000997698504027618, "loss": 0.4694, "step": 174200 }, { "epoch": 50.11795166858458, "grad_norm": 1.5653340816497803, "learning_rate": 0.0009976409666283084, "loss": 0.4727, "step": 174210 }, { "epoch": 50.12082853855006, "grad_norm": 1.5512595176696777, "learning_rate": 0.000997583429228999, "loss": 0.6267, "step": 174220 }, { "epoch": 50.123705408515534, "grad_norm": 1.4262504577636719, "learning_rate": 0.0009975258918296893, "loss": 0.5473, "step": 174230 }, { "epoch": 50.12658227848101, "grad_norm": 2.4142627716064453, "learning_rate": 0.0009974683544303798, "loss": 0.5311, "step": 174240 }, { "epoch": 50.12945914844649, "grad_norm": 1.1539998054504395, "learning_rate": 0.0009974108170310702, "loss": 0.5222, "step": 174250 }, { "epoch": 50.13233601841197, "grad_norm": 1.075671911239624, "learning_rate": 0.0009973532796317608, "loss": 0.4843, "step": 174260 }, { "epoch": 50.135212888377445, "grad_norm": 1.3233063220977783, "learning_rate": 0.000997295742232451, "loss": 0.5306, "step": 174270 }, { "epoch": 50.13808975834292, "grad_norm": 0.7093977928161621, "learning_rate": 0.0009972382048331415, "loss": 0.5883, "step": 174280 }, { "epoch": 50.1409666283084, "grad_norm": 0.9592173099517822, "learning_rate": 0.000997180667433832, "loss": 0.5413, "step": 174290 }, { "epoch": 50.14384349827388, "grad_norm": 1.2797001600265503, "learning_rate": 0.0009971231300345226, "loss": 0.5802, "step": 174300 }, { "epoch": 50.14672036823936, "grad_norm": 0.862000584602356, "learning_rate": 0.000997065592635213, "loss": 0.3714, "step": 174310 }, { "epoch": 50.149597238204834, "grad_norm": 0.9723159670829773, "learning_rate": 0.0009970080552359033, "loss": 0.6151, "step": 174320 }, { "epoch": 50.15247410817031, "grad_norm": 0.7233383059501648, "learning_rate": 0.0009969505178365938, "loss": 0.4507, "step": 174330 }, { "epoch": 50.155350978135786, "grad_norm": 0.922467052936554, "learning_rate": 0.0009968929804372842, "loss": 0.4888, "step": 174340 }, { "epoch": 50.15822784810127, "grad_norm": 1.9144706726074219, "learning_rate": 0.0009968354430379747, "loss": 0.6306, "step": 174350 }, { "epoch": 50.161104718066746, "grad_norm": 1.8778293132781982, "learning_rate": 0.000996777905638665, "loss": 0.5564, "step": 174360 }, { "epoch": 50.16398158803222, "grad_norm": 2.1215109825134277, "learning_rate": 0.0009967203682393557, "loss": 0.6071, "step": 174370 }, { "epoch": 50.1668584579977, "grad_norm": 1.2952991724014282, "learning_rate": 0.000996662830840046, "loss": 0.3809, "step": 174380 }, { "epoch": 50.169735327963174, "grad_norm": 2.1385350227355957, "learning_rate": 0.0009966052934407366, "loss": 0.4877, "step": 174390 }, { "epoch": 50.17261219792865, "grad_norm": 1.360168218612671, "learning_rate": 0.000996547756041427, "loss": 0.4994, "step": 174400 }, { "epoch": 50.175489067894134, "grad_norm": 1.568579077720642, "learning_rate": 0.0009964902186421175, "loss": 0.6142, "step": 174410 }, { "epoch": 50.17836593785961, "grad_norm": 1.9853283166885376, "learning_rate": 0.0009964326812428078, "loss": 0.5025, "step": 174420 }, { "epoch": 50.181242807825086, "grad_norm": 1.2162033319473267, "learning_rate": 0.0009963751438434982, "loss": 0.4066, "step": 174430 }, { "epoch": 50.18411967779056, "grad_norm": 0.8282601833343506, "learning_rate": 0.0009963176064441887, "loss": 0.5737, "step": 174440 }, { "epoch": 50.18699654775604, "grad_norm": 1.1036452054977417, "learning_rate": 0.0009962600690448793, "loss": 0.4581, "step": 174450 }, { "epoch": 50.18987341772152, "grad_norm": 1.1427236795425415, "learning_rate": 0.0009962025316455697, "loss": 0.3825, "step": 174460 }, { "epoch": 50.192750287687, "grad_norm": 1.9619922637939453, "learning_rate": 0.00099614499424626, "loss": 0.5765, "step": 174470 }, { "epoch": 50.195627157652474, "grad_norm": 0.9821725487709045, "learning_rate": 0.0009960874568469506, "loss": 0.5609, "step": 174480 }, { "epoch": 50.19850402761795, "grad_norm": 0.8694064617156982, "learning_rate": 0.000996029919447641, "loss": 0.5356, "step": 174490 }, { "epoch": 50.20138089758343, "grad_norm": 1.3085846900939941, "learning_rate": 0.0009959723820483315, "loss": 0.5143, "step": 174500 }, { "epoch": 50.20425776754891, "grad_norm": 1.1856358051300049, "learning_rate": 0.000995914844649022, "loss": 0.4909, "step": 174510 }, { "epoch": 50.207134637514386, "grad_norm": 1.1229020357131958, "learning_rate": 0.0009958573072497124, "loss": 0.5941, "step": 174520 }, { "epoch": 50.21001150747986, "grad_norm": 1.091737151145935, "learning_rate": 0.0009957997698504027, "loss": 0.5758, "step": 174530 }, { "epoch": 50.21288837744534, "grad_norm": 1.2667025327682495, "learning_rate": 0.0009957422324510933, "loss": 0.5363, "step": 174540 }, { "epoch": 50.215765247410815, "grad_norm": 5.082650661468506, "learning_rate": 0.0009956846950517836, "loss": 0.6126, "step": 174550 }, { "epoch": 50.2186421173763, "grad_norm": 1.794334888458252, "learning_rate": 0.0009956271576524742, "loss": 0.5915, "step": 174560 }, { "epoch": 50.221518987341774, "grad_norm": 1.6527817249298096, "learning_rate": 0.0009955696202531646, "loss": 0.4658, "step": 174570 }, { "epoch": 50.22439585730725, "grad_norm": 2.256621837615967, "learning_rate": 0.000995512082853855, "loss": 0.6463, "step": 174580 }, { "epoch": 50.22727272727273, "grad_norm": 1.3676588535308838, "learning_rate": 0.0009954545454545455, "loss": 0.4528, "step": 174590 }, { "epoch": 50.2301495972382, "grad_norm": 0.7729818224906921, "learning_rate": 0.000995397008055236, "loss": 0.4978, "step": 174600 }, { "epoch": 50.23302646720368, "grad_norm": 1.2760041952133179, "learning_rate": 0.0009953394706559264, "loss": 0.514, "step": 174610 }, { "epoch": 50.23590333716916, "grad_norm": 1.0760692358016968, "learning_rate": 0.000995281933256617, "loss": 0.4846, "step": 174620 }, { "epoch": 50.23878020713464, "grad_norm": 1.0065993070602417, "learning_rate": 0.0009952243958573073, "loss": 0.5641, "step": 174630 }, { "epoch": 50.241657077100115, "grad_norm": 1.3541325330734253, "learning_rate": 0.0009951668584579976, "loss": 0.5278, "step": 174640 }, { "epoch": 50.24453394706559, "grad_norm": 0.8075144290924072, "learning_rate": 0.0009951093210586882, "loss": 0.4523, "step": 174650 }, { "epoch": 50.24741081703107, "grad_norm": 1.5624676942825317, "learning_rate": 0.0009950517836593785, "loss": 0.4876, "step": 174660 }, { "epoch": 50.25028768699655, "grad_norm": 0.8053553700447083, "learning_rate": 0.000994994246260069, "loss": 0.6529, "step": 174670 }, { "epoch": 50.25316455696203, "grad_norm": 1.2999974489212036, "learning_rate": 0.0009949367088607595, "loss": 0.4521, "step": 174680 }, { "epoch": 50.2560414269275, "grad_norm": 0.7565445303916931, "learning_rate": 0.00099487917146145, "loss": 0.5002, "step": 174690 }, { "epoch": 50.25891829689298, "grad_norm": 1.385079264640808, "learning_rate": 0.0009948216340621404, "loss": 0.6295, "step": 174700 }, { "epoch": 50.261795166858455, "grad_norm": 1.5625566244125366, "learning_rate": 0.000994764096662831, "loss": 0.5487, "step": 174710 }, { "epoch": 50.26467203682394, "grad_norm": 1.0803396701812744, "learning_rate": 0.0009947065592635213, "loss": 0.6495, "step": 174720 }, { "epoch": 50.267548906789415, "grad_norm": 1.710294246673584, "learning_rate": 0.0009946490218642118, "loss": 0.5603, "step": 174730 }, { "epoch": 50.27042577675489, "grad_norm": 1.272430658340454, "learning_rate": 0.0009945914844649022, "loss": 0.4868, "step": 174740 }, { "epoch": 50.27330264672037, "grad_norm": 1.7142170667648315, "learning_rate": 0.0009945339470655925, "loss": 0.5664, "step": 174750 }, { "epoch": 50.27617951668584, "grad_norm": 2.9481301307678223, "learning_rate": 0.000994476409666283, "loss": 0.5851, "step": 174760 }, { "epoch": 50.27905638665133, "grad_norm": 1.2802510261535645, "learning_rate": 0.0009944188722669737, "loss": 0.5058, "step": 174770 }, { "epoch": 50.2819332566168, "grad_norm": 1.1811457872390747, "learning_rate": 0.000994361334867664, "loss": 0.5333, "step": 174780 }, { "epoch": 50.28481012658228, "grad_norm": 1.0067144632339478, "learning_rate": 0.0009943037974683544, "loss": 0.4551, "step": 174790 }, { "epoch": 50.287686996547755, "grad_norm": 1.5937836170196533, "learning_rate": 0.000994246260069045, "loss": 0.548, "step": 174800 }, { "epoch": 50.29056386651323, "grad_norm": 0.7641977071762085, "learning_rate": 0.0009941887226697353, "loss": 0.5152, "step": 174810 }, { "epoch": 50.29344073647871, "grad_norm": 1.5489883422851562, "learning_rate": 0.0009941311852704258, "loss": 0.6966, "step": 174820 }, { "epoch": 50.29631760644419, "grad_norm": 1.4532461166381836, "learning_rate": 0.0009940736478711162, "loss": 0.4846, "step": 174830 }, { "epoch": 50.29919447640967, "grad_norm": 0.8480737805366516, "learning_rate": 0.0009940161104718067, "loss": 0.4562, "step": 174840 }, { "epoch": 50.30207134637514, "grad_norm": 0.9994176626205444, "learning_rate": 0.000993958573072497, "loss": 0.5402, "step": 174850 }, { "epoch": 50.30494821634062, "grad_norm": 0.7655282020568848, "learning_rate": 0.0009939010356731877, "loss": 0.5356, "step": 174860 }, { "epoch": 50.307825086306096, "grad_norm": 1.6826494932174683, "learning_rate": 0.000993843498273878, "loss": 0.5317, "step": 174870 }, { "epoch": 50.31070195627158, "grad_norm": 1.117066740989685, "learning_rate": 0.0009937859608745686, "loss": 0.6285, "step": 174880 }, { "epoch": 50.313578826237055, "grad_norm": 1.5472468137741089, "learning_rate": 0.000993728423475259, "loss": 0.6686, "step": 174890 }, { "epoch": 50.31645569620253, "grad_norm": 1.4673947095870972, "learning_rate": 0.0009936708860759493, "loss": 0.4866, "step": 174900 }, { "epoch": 50.31933256616801, "grad_norm": 1.3906253576278687, "learning_rate": 0.0009936133486766398, "loss": 0.4504, "step": 174910 }, { "epoch": 50.322209436133484, "grad_norm": 1.0425361394882202, "learning_rate": 0.0009935558112773304, "loss": 0.6324, "step": 174920 }, { "epoch": 50.32508630609897, "grad_norm": 0.9117326140403748, "learning_rate": 0.0009934982738780207, "loss": 0.523, "step": 174930 }, { "epoch": 50.32796317606444, "grad_norm": 1.1612693071365356, "learning_rate": 0.000993440736478711, "loss": 0.6562, "step": 174940 }, { "epoch": 50.33084004602992, "grad_norm": 0.9288016557693481, "learning_rate": 0.0009933831990794016, "loss": 0.5108, "step": 174950 }, { "epoch": 50.333716915995396, "grad_norm": 1.531548261642456, "learning_rate": 0.000993325661680092, "loss": 0.5633, "step": 174960 }, { "epoch": 50.33659378596087, "grad_norm": 1.5173001289367676, "learning_rate": 0.0009932681242807826, "loss": 0.5161, "step": 174970 }, { "epoch": 50.339470655926355, "grad_norm": 1.4000415802001953, "learning_rate": 0.0009932105868814731, "loss": 0.5195, "step": 174980 }, { "epoch": 50.34234752589183, "grad_norm": 0.9208953380584717, "learning_rate": 0.0009931530494821635, "loss": 0.539, "step": 174990 }, { "epoch": 50.34522439585731, "grad_norm": 1.2670459747314453, "learning_rate": 0.0009930955120828538, "loss": 0.669, "step": 175000 }, { "epoch": 50.348101265822784, "grad_norm": 0.7703257203102112, "learning_rate": 0.0009930379746835444, "loss": 0.6463, "step": 175010 }, { "epoch": 50.35097813578826, "grad_norm": 1.0350098609924316, "learning_rate": 0.0009929804372842347, "loss": 0.6649, "step": 175020 }, { "epoch": 50.353855005753736, "grad_norm": 0.9478449821472168, "learning_rate": 0.0009929228998849253, "loss": 0.6024, "step": 175030 }, { "epoch": 50.35673187571922, "grad_norm": 1.8721287250518799, "learning_rate": 0.0009928653624856156, "loss": 0.5222, "step": 175040 }, { "epoch": 50.359608745684696, "grad_norm": 1.3394978046417236, "learning_rate": 0.000992807825086306, "loss": 0.4362, "step": 175050 }, { "epoch": 50.36248561565017, "grad_norm": 0.8162395358085632, "learning_rate": 0.0009927502876869965, "loss": 0.4299, "step": 175060 }, { "epoch": 50.36536248561565, "grad_norm": 1.3330585956573486, "learning_rate": 0.0009926927502876871, "loss": 0.6139, "step": 175070 }, { "epoch": 50.368239355581125, "grad_norm": 0.899433434009552, "learning_rate": 0.0009926352128883775, "loss": 0.5536, "step": 175080 }, { "epoch": 50.37111622554661, "grad_norm": 0.7274063229560852, "learning_rate": 0.000992577675489068, "loss": 0.4299, "step": 175090 }, { "epoch": 50.373993095512084, "grad_norm": 0.8675907254219055, "learning_rate": 0.0009925201380897584, "loss": 0.6079, "step": 175100 }, { "epoch": 50.37686996547756, "grad_norm": 1.6545445919036865, "learning_rate": 0.0009924626006904487, "loss": 0.5227, "step": 175110 }, { "epoch": 50.379746835443036, "grad_norm": 1.7024809122085571, "learning_rate": 0.0009924050632911393, "loss": 0.5862, "step": 175120 }, { "epoch": 50.38262370540851, "grad_norm": 1.3083170652389526, "learning_rate": 0.0009923475258918298, "loss": 0.6529, "step": 175130 }, { "epoch": 50.385500575373996, "grad_norm": 1.1658607721328735, "learning_rate": 0.0009922899884925202, "loss": 0.6935, "step": 175140 }, { "epoch": 50.38837744533947, "grad_norm": 0.8558002710342407, "learning_rate": 0.0009922324510932105, "loss": 0.4281, "step": 175150 }, { "epoch": 50.39125431530495, "grad_norm": 0.8228013515472412, "learning_rate": 0.000992174913693901, "loss": 0.4965, "step": 175160 }, { "epoch": 50.394131185270425, "grad_norm": 1.4933619499206543, "learning_rate": 0.0009921173762945914, "loss": 0.5568, "step": 175170 }, { "epoch": 50.3970080552359, "grad_norm": 1.6506836414337158, "learning_rate": 0.000992059838895282, "loss": 0.5152, "step": 175180 }, { "epoch": 50.399884925201384, "grad_norm": 1.0735523700714111, "learning_rate": 0.0009920023014959724, "loss": 0.4464, "step": 175190 }, { "epoch": 50.40276179516686, "grad_norm": 2.434941530227661, "learning_rate": 0.000991944764096663, "loss": 0.537, "step": 175200 }, { "epoch": 50.40563866513234, "grad_norm": 1.6977771520614624, "learning_rate": 0.0009918872266973533, "loss": 0.5672, "step": 175210 }, { "epoch": 50.40851553509781, "grad_norm": 1.2519397735595703, "learning_rate": 0.0009918296892980438, "loss": 0.4903, "step": 175220 }, { "epoch": 50.41139240506329, "grad_norm": 1.468996286392212, "learning_rate": 0.0009917721518987342, "loss": 0.4898, "step": 175230 }, { "epoch": 50.41426927502877, "grad_norm": 1.0571528673171997, "learning_rate": 0.0009917146144994247, "loss": 0.5574, "step": 175240 }, { "epoch": 50.41714614499425, "grad_norm": 1.5349377393722534, "learning_rate": 0.000991657077100115, "loss": 0.554, "step": 175250 }, { "epoch": 50.420023014959725, "grad_norm": 1.1316912174224854, "learning_rate": 0.0009915995397008054, "loss": 0.4779, "step": 175260 }, { "epoch": 50.4228998849252, "grad_norm": 0.8641403317451477, "learning_rate": 0.000991542002301496, "loss": 0.6939, "step": 175270 }, { "epoch": 50.42577675489068, "grad_norm": 0.911833643913269, "learning_rate": 0.0009914844649021866, "loss": 0.5291, "step": 175280 }, { "epoch": 50.42865362485615, "grad_norm": 0.8908498287200928, "learning_rate": 0.000991426927502877, "loss": 0.4774, "step": 175290 }, { "epoch": 50.43153049482164, "grad_norm": 1.2240779399871826, "learning_rate": 0.0009913693901035673, "loss": 0.5135, "step": 175300 }, { "epoch": 50.43440736478711, "grad_norm": 0.6674096584320068, "learning_rate": 0.0009913118527042578, "loss": 0.5486, "step": 175310 }, { "epoch": 50.43728423475259, "grad_norm": 2.4863972663879395, "learning_rate": 0.0009912543153049482, "loss": 0.6065, "step": 175320 }, { "epoch": 50.440161104718065, "grad_norm": 0.7625817656517029, "learning_rate": 0.0009911967779056387, "loss": 0.4712, "step": 175330 }, { "epoch": 50.44303797468354, "grad_norm": 2.9948015213012695, "learning_rate": 0.000991139240506329, "loss": 0.5863, "step": 175340 }, { "epoch": 50.445914844649025, "grad_norm": 1.1814075708389282, "learning_rate": 0.0009910817031070196, "loss": 0.4475, "step": 175350 }, { "epoch": 50.4487917146145, "grad_norm": 1.082506775856018, "learning_rate": 0.00099102416570771, "loss": 0.5215, "step": 175360 }, { "epoch": 50.45166858457998, "grad_norm": 0.9629687666893005, "learning_rate": 0.0009909666283084006, "loss": 0.639, "step": 175370 }, { "epoch": 50.45454545454545, "grad_norm": 1.957442283630371, "learning_rate": 0.000990909090909091, "loss": 0.563, "step": 175380 }, { "epoch": 50.45742232451093, "grad_norm": 2.130002021789551, "learning_rate": 0.0009908515535097815, "loss": 0.6113, "step": 175390 }, { "epoch": 50.46029919447641, "grad_norm": 1.6904032230377197, "learning_rate": 0.0009907940161104718, "loss": 0.4825, "step": 175400 }, { "epoch": 50.46317606444189, "grad_norm": 1.8895435333251953, "learning_rate": 0.0009907364787111622, "loss": 0.592, "step": 175410 }, { "epoch": 50.466052934407365, "grad_norm": 0.844121515750885, "learning_rate": 0.0009906789413118527, "loss": 0.6846, "step": 175420 }, { "epoch": 50.46892980437284, "grad_norm": 1.2434840202331543, "learning_rate": 0.0009906214039125433, "loss": 0.6018, "step": 175430 }, { "epoch": 50.47180667433832, "grad_norm": 1.154750108718872, "learning_rate": 0.0009905638665132336, "loss": 0.5501, "step": 175440 }, { "epoch": 50.4746835443038, "grad_norm": 2.1662704944610596, "learning_rate": 0.000990506329113924, "loss": 0.468, "step": 175450 }, { "epoch": 50.47756041426928, "grad_norm": 0.9776313900947571, "learning_rate": 0.0009904487917146146, "loss": 0.4956, "step": 175460 }, { "epoch": 50.48043728423475, "grad_norm": 1.3169366121292114, "learning_rate": 0.000990391254315305, "loss": 0.4224, "step": 175470 }, { "epoch": 50.48331415420023, "grad_norm": 1.2030243873596191, "learning_rate": 0.0009903337169159955, "loss": 0.4416, "step": 175480 }, { "epoch": 50.486191024165706, "grad_norm": 0.9935893416404724, "learning_rate": 0.0009902761795166858, "loss": 0.4115, "step": 175490 }, { "epoch": 50.48906789413118, "grad_norm": 0.816813051700592, "learning_rate": 0.0009902186421173764, "loss": 0.6194, "step": 175500 }, { "epoch": 50.491944764096665, "grad_norm": 1.0783578157424927, "learning_rate": 0.0009901611047180667, "loss": 0.6116, "step": 175510 }, { "epoch": 50.49482163406214, "grad_norm": 0.9140645861625671, "learning_rate": 0.000990103567318757, "loss": 0.6341, "step": 175520 }, { "epoch": 50.49769850402762, "grad_norm": 1.0540196895599365, "learning_rate": 0.0009900460299194476, "loss": 0.4978, "step": 175530 }, { "epoch": 50.500575373993094, "grad_norm": 1.860351324081421, "learning_rate": 0.0009899884925201382, "loss": 0.4986, "step": 175540 }, { "epoch": 50.50345224395857, "grad_norm": 0.986946165561676, "learning_rate": 0.0009899309551208285, "loss": 0.5578, "step": 175550 }, { "epoch": 50.50632911392405, "grad_norm": 1.2729710340499878, "learning_rate": 0.0009898734177215189, "loss": 0.5399, "step": 175560 }, { "epoch": 50.50920598388953, "grad_norm": 0.8982555866241455, "learning_rate": 0.0009898158803222095, "loss": 0.4184, "step": 175570 }, { "epoch": 50.512082853855006, "grad_norm": 1.1610125303268433, "learning_rate": 0.0009897583429228998, "loss": 0.4828, "step": 175580 }, { "epoch": 50.51495972382048, "grad_norm": 0.8031913042068481, "learning_rate": 0.0009897008055235904, "loss": 0.4482, "step": 175590 }, { "epoch": 50.51783659378596, "grad_norm": 0.6806146502494812, "learning_rate": 0.000989643268124281, "loss": 0.4074, "step": 175600 }, { "epoch": 50.52071346375144, "grad_norm": 0.9617199301719666, "learning_rate": 0.0009895857307249713, "loss": 0.5684, "step": 175610 }, { "epoch": 50.52359033371692, "grad_norm": 1.8512077331542969, "learning_rate": 0.0009895281933256616, "loss": 0.5495, "step": 175620 }, { "epoch": 50.526467203682394, "grad_norm": 0.8708090782165527, "learning_rate": 0.0009894706559263522, "loss": 0.5471, "step": 175630 }, { "epoch": 50.52934407364787, "grad_norm": 0.9402784109115601, "learning_rate": 0.0009894131185270425, "loss": 0.501, "step": 175640 }, { "epoch": 50.532220943613346, "grad_norm": 1.0212947130203247, "learning_rate": 0.000989355581127733, "loss": 0.6262, "step": 175650 }, { "epoch": 50.53509781357883, "grad_norm": 1.178052306175232, "learning_rate": 0.0009892980437284234, "loss": 0.5658, "step": 175660 }, { "epoch": 50.537974683544306, "grad_norm": 1.574540615081787, "learning_rate": 0.000989240506329114, "loss": 0.7594, "step": 175670 }, { "epoch": 50.54085155350978, "grad_norm": 1.6091618537902832, "learning_rate": 0.0009891829689298044, "loss": 0.5861, "step": 175680 }, { "epoch": 50.54372842347526, "grad_norm": 0.6001691222190857, "learning_rate": 0.000989125431530495, "loss": 0.4304, "step": 175690 }, { "epoch": 50.546605293440734, "grad_norm": 1.4643348455429077, "learning_rate": 0.0009890678941311853, "loss": 0.4745, "step": 175700 }, { "epoch": 50.54948216340621, "grad_norm": 1.0414838790893555, "learning_rate": 0.0009890103567318758, "loss": 0.7724, "step": 175710 }, { "epoch": 50.552359033371694, "grad_norm": 1.6157007217407227, "learning_rate": 0.0009889528193325662, "loss": 0.6625, "step": 175720 }, { "epoch": 50.55523590333717, "grad_norm": 0.7818509340286255, "learning_rate": 0.0009888952819332565, "loss": 0.4624, "step": 175730 }, { "epoch": 50.558112773302646, "grad_norm": 2.0803990364074707, "learning_rate": 0.000988837744533947, "loss": 0.6066, "step": 175740 }, { "epoch": 50.56098964326812, "grad_norm": 1.165501594543457, "learning_rate": 0.0009887802071346377, "loss": 0.551, "step": 175750 }, { "epoch": 50.5638665132336, "grad_norm": 0.8472060561180115, "learning_rate": 0.000988722669735328, "loss": 0.5184, "step": 175760 }, { "epoch": 50.56674338319908, "grad_norm": 0.8816019892692566, "learning_rate": 0.0009886651323360183, "loss": 0.4613, "step": 175770 }, { "epoch": 50.56962025316456, "grad_norm": 0.9067543148994446, "learning_rate": 0.000988607594936709, "loss": 0.4885, "step": 175780 }, { "epoch": 50.572497123130034, "grad_norm": 1.109226942062378, "learning_rate": 0.0009885500575373993, "loss": 0.5345, "step": 175790 }, { "epoch": 50.57537399309551, "grad_norm": 1.4025095701217651, "learning_rate": 0.0009884925201380898, "loss": 0.5998, "step": 175800 }, { "epoch": 50.57825086306099, "grad_norm": 1.0772945880889893, "learning_rate": 0.0009884349827387802, "loss": 0.5884, "step": 175810 }, { "epoch": 50.58112773302647, "grad_norm": 1.4366827011108398, "learning_rate": 0.0009883774453394707, "loss": 0.7309, "step": 175820 }, { "epoch": 50.584004602991946, "grad_norm": 1.220406413078308, "learning_rate": 0.000988319907940161, "loss": 0.5048, "step": 175830 }, { "epoch": 50.58688147295742, "grad_norm": 1.093662142753601, "learning_rate": 0.0009882623705408516, "loss": 0.5461, "step": 175840 }, { "epoch": 50.5897583429229, "grad_norm": 1.7198487520217896, "learning_rate": 0.000988204833141542, "loss": 0.5616, "step": 175850 }, { "epoch": 50.592635212888375, "grad_norm": 1.3506752252578735, "learning_rate": 0.0009881472957422326, "loss": 0.6101, "step": 175860 }, { "epoch": 50.59551208285386, "grad_norm": 1.0672062635421753, "learning_rate": 0.000988089758342923, "loss": 0.4437, "step": 175870 }, { "epoch": 50.598388952819334, "grad_norm": 1.4140870571136475, "learning_rate": 0.0009880322209436132, "loss": 0.6246, "step": 175880 }, { "epoch": 50.60126582278481, "grad_norm": 1.6818209886550903, "learning_rate": 0.0009879746835443038, "loss": 0.5116, "step": 175890 }, { "epoch": 50.60414269275029, "grad_norm": 1.3010106086730957, "learning_rate": 0.0009879171461449944, "loss": 0.6412, "step": 175900 }, { "epoch": 50.60701956271576, "grad_norm": 2.7872109413146973, "learning_rate": 0.0009878596087456847, "loss": 0.6147, "step": 175910 }, { "epoch": 50.60989643268124, "grad_norm": 1.500413417816162, "learning_rate": 0.000987802071346375, "loss": 0.5618, "step": 175920 }, { "epoch": 50.61277330264672, "grad_norm": 1.1498693227767944, "learning_rate": 0.0009877445339470656, "loss": 0.6754, "step": 175930 }, { "epoch": 50.6156501726122, "grad_norm": 1.3647099733352661, "learning_rate": 0.000987686996547756, "loss": 0.5817, "step": 175940 }, { "epoch": 50.618527042577675, "grad_norm": 1.6332147121429443, "learning_rate": 0.0009876294591484465, "loss": 0.5589, "step": 175950 }, { "epoch": 50.62140391254315, "grad_norm": 0.7780968546867371, "learning_rate": 0.000987571921749137, "loss": 0.5343, "step": 175960 }, { "epoch": 50.62428078250863, "grad_norm": 0.7547972798347473, "learning_rate": 0.0009875143843498275, "loss": 0.5345, "step": 175970 }, { "epoch": 50.62715765247411, "grad_norm": 2.3526101112365723, "learning_rate": 0.0009874568469505178, "loss": 0.6087, "step": 175980 }, { "epoch": 50.63003452243959, "grad_norm": 0.9747403860092163, "learning_rate": 0.0009873993095512084, "loss": 0.6407, "step": 175990 }, { "epoch": 50.63291139240506, "grad_norm": 0.7902271747589111, "learning_rate": 0.0009873417721518987, "loss": 0.5159, "step": 176000 }, { "epoch": 50.63578826237054, "grad_norm": 0.7462774515151978, "learning_rate": 0.0009872842347525893, "loss": 0.6077, "step": 176010 }, { "epoch": 50.638665132336016, "grad_norm": 0.710852324962616, "learning_rate": 0.0009872266973532796, "loss": 0.5814, "step": 176020 }, { "epoch": 50.6415420023015, "grad_norm": 1.6014900207519531, "learning_rate": 0.00098716915995397, "loss": 0.5177, "step": 176030 }, { "epoch": 50.644418872266975, "grad_norm": 1.0932849645614624, "learning_rate": 0.0009871116225546605, "loss": 0.4901, "step": 176040 }, { "epoch": 50.64729574223245, "grad_norm": 1.18418550491333, "learning_rate": 0.000987054085155351, "loss": 0.5651, "step": 176050 }, { "epoch": 50.65017261219793, "grad_norm": 1.449005365371704, "learning_rate": 0.0009869965477560414, "loss": 0.5847, "step": 176060 }, { "epoch": 50.653049482163404, "grad_norm": 0.9768629670143127, "learning_rate": 0.000986939010356732, "loss": 0.4633, "step": 176070 }, { "epoch": 50.65592635212889, "grad_norm": 1.6714282035827637, "learning_rate": 0.0009868814729574224, "loss": 0.6874, "step": 176080 }, { "epoch": 50.65880322209436, "grad_norm": 1.1572397947311401, "learning_rate": 0.0009868239355581127, "loss": 0.5494, "step": 176090 }, { "epoch": 50.66168009205984, "grad_norm": 1.0834996700286865, "learning_rate": 0.0009867663981588033, "loss": 0.4839, "step": 176100 }, { "epoch": 50.664556962025316, "grad_norm": 1.138962745666504, "learning_rate": 0.0009867088607594938, "loss": 0.4759, "step": 176110 }, { "epoch": 50.66743383199079, "grad_norm": 1.4646930694580078, "learning_rate": 0.0009866513233601842, "loss": 0.5065, "step": 176120 }, { "epoch": 50.670310701956275, "grad_norm": 1.7756962776184082, "learning_rate": 0.0009865937859608745, "loss": 0.5406, "step": 176130 }, { "epoch": 50.67318757192175, "grad_norm": 1.7900573015213013, "learning_rate": 0.000986536248561565, "loss": 0.4863, "step": 176140 }, { "epoch": 50.67606444188723, "grad_norm": 0.791283130645752, "learning_rate": 0.0009864787111622554, "loss": 0.5846, "step": 176150 }, { "epoch": 50.678941311852704, "grad_norm": 1.058470606803894, "learning_rate": 0.000986421173762946, "loss": 0.6044, "step": 176160 }, { "epoch": 50.68181818181818, "grad_norm": 1.3925609588623047, "learning_rate": 0.0009863636363636363, "loss": 0.4975, "step": 176170 }, { "epoch": 50.684695051783656, "grad_norm": 1.1359713077545166, "learning_rate": 0.000986306098964327, "loss": 0.6687, "step": 176180 }, { "epoch": 50.68757192174914, "grad_norm": 1.7752342224121094, "learning_rate": 0.0009862485615650173, "loss": 0.5041, "step": 176190 }, { "epoch": 50.690448791714616, "grad_norm": 1.3690085411071777, "learning_rate": 0.0009861910241657078, "loss": 0.636, "step": 176200 }, { "epoch": 50.69332566168009, "grad_norm": 2.487102508544922, "learning_rate": 0.0009861334867663982, "loss": 0.5926, "step": 176210 }, { "epoch": 50.69620253164557, "grad_norm": 1.650943636894226, "learning_rate": 0.0009860759493670887, "loss": 0.6212, "step": 176220 }, { "epoch": 50.699079401611044, "grad_norm": 1.2034999132156372, "learning_rate": 0.000986018411967779, "loss": 0.5956, "step": 176230 }, { "epoch": 50.70195627157653, "grad_norm": 1.26056969165802, "learning_rate": 0.0009859608745684694, "loss": 0.5343, "step": 176240 }, { "epoch": 50.704833141542004, "grad_norm": 1.2995245456695557, "learning_rate": 0.00098590333716916, "loss": 0.51, "step": 176250 }, { "epoch": 50.70771001150748, "grad_norm": 1.4246885776519775, "learning_rate": 0.0009858457997698506, "loss": 0.6362, "step": 176260 }, { "epoch": 50.710586881472956, "grad_norm": 2.1721956729888916, "learning_rate": 0.000985788262370541, "loss": 0.4544, "step": 176270 }, { "epoch": 50.71346375143843, "grad_norm": 1.3962414264678955, "learning_rate": 0.0009857307249712313, "loss": 0.5635, "step": 176280 }, { "epoch": 50.716340621403916, "grad_norm": 1.18110990524292, "learning_rate": 0.0009856731875719218, "loss": 0.6906, "step": 176290 }, { "epoch": 50.71921749136939, "grad_norm": 0.7484776377677917, "learning_rate": 0.0009856156501726122, "loss": 0.5894, "step": 176300 }, { "epoch": 50.72209436133487, "grad_norm": 1.079503059387207, "learning_rate": 0.0009855581127733027, "loss": 0.5219, "step": 176310 }, { "epoch": 50.724971231300344, "grad_norm": 1.269453763961792, "learning_rate": 0.000985500575373993, "loss": 0.5017, "step": 176320 }, { "epoch": 50.72784810126582, "grad_norm": 1.8425780534744263, "learning_rate": 0.0009854430379746836, "loss": 0.5403, "step": 176330 }, { "epoch": 50.730724971231304, "grad_norm": 1.2696759700775146, "learning_rate": 0.000985385500575374, "loss": 0.5228, "step": 176340 }, { "epoch": 50.73360184119678, "grad_norm": 0.8418923020362854, "learning_rate": 0.0009853279631760643, "loss": 0.5988, "step": 176350 }, { "epoch": 50.736478711162256, "grad_norm": 1.6133089065551758, "learning_rate": 0.000985270425776755, "loss": 0.5621, "step": 176360 }, { "epoch": 50.73935558112773, "grad_norm": 0.7696154117584229, "learning_rate": 0.0009852128883774455, "loss": 0.5254, "step": 176370 }, { "epoch": 50.74223245109321, "grad_norm": 1.3856314420700073, "learning_rate": 0.0009851553509781358, "loss": 0.5111, "step": 176380 }, { "epoch": 50.745109321058685, "grad_norm": 1.4154109954833984, "learning_rate": 0.0009850978135788262, "loss": 0.6082, "step": 176390 }, { "epoch": 50.74798619102417, "grad_norm": 0.9972489476203918, "learning_rate": 0.0009850402761795167, "loss": 0.4915, "step": 176400 }, { "epoch": 50.750863060989644, "grad_norm": 1.5944197177886963, "learning_rate": 0.000984982738780207, "loss": 0.5103, "step": 176410 }, { "epoch": 50.75373993095512, "grad_norm": 1.1732476949691772, "learning_rate": 0.0009849252013808976, "loss": 0.5975, "step": 176420 }, { "epoch": 50.7566168009206, "grad_norm": 1.8256455659866333, "learning_rate": 0.000984867663981588, "loss": 0.625, "step": 176430 }, { "epoch": 50.75949367088607, "grad_norm": 1.2487584352493286, "learning_rate": 0.0009848101265822785, "loss": 0.4514, "step": 176440 }, { "epoch": 50.762370540851556, "grad_norm": 0.9175569415092468, "learning_rate": 0.0009847525891829689, "loss": 0.5391, "step": 176450 }, { "epoch": 50.76524741081703, "grad_norm": 1.249856948852539, "learning_rate": 0.0009846950517836594, "loss": 0.4852, "step": 176460 }, { "epoch": 50.76812428078251, "grad_norm": 1.4825233221054077, "learning_rate": 0.0009846375143843498, "loss": 0.4219, "step": 176470 }, { "epoch": 50.771001150747985, "grad_norm": 0.9912077188491821, "learning_rate": 0.0009845799769850404, "loss": 0.5285, "step": 176480 }, { "epoch": 50.77387802071346, "grad_norm": 1.1607646942138672, "learning_rate": 0.0009845224395857307, "loss": 0.461, "step": 176490 }, { "epoch": 50.776754890678944, "grad_norm": 1.4093091487884521, "learning_rate": 0.000984464902186421, "loss": 0.5061, "step": 176500 }, { "epoch": 50.77963176064442, "grad_norm": 0.9943400621414185, "learning_rate": 0.0009844073647871116, "loss": 0.555, "step": 176510 }, { "epoch": 50.7825086306099, "grad_norm": 0.6172241568565369, "learning_rate": 0.0009843498273878022, "loss": 0.5429, "step": 176520 }, { "epoch": 50.78538550057537, "grad_norm": 1.5486475229263306, "learning_rate": 0.0009842922899884925, "loss": 0.5344, "step": 176530 }, { "epoch": 50.78826237054085, "grad_norm": 0.6210601329803467, "learning_rate": 0.0009842347525891829, "loss": 0.4091, "step": 176540 }, { "epoch": 50.79113924050633, "grad_norm": 1.7486101388931274, "learning_rate": 0.0009841772151898734, "loss": 0.543, "step": 176550 }, { "epoch": 50.79401611047181, "grad_norm": 1.2274292707443237, "learning_rate": 0.0009841196777905638, "loss": 0.5311, "step": 176560 }, { "epoch": 50.796892980437285, "grad_norm": 1.3276571035385132, "learning_rate": 0.0009840621403912544, "loss": 0.5604, "step": 176570 }, { "epoch": 50.79976985040276, "grad_norm": 1.4387348890304565, "learning_rate": 0.000984004602991945, "loss": 0.4229, "step": 176580 }, { "epoch": 50.80264672036824, "grad_norm": 1.313585877418518, "learning_rate": 0.0009839470655926353, "loss": 0.6407, "step": 176590 }, { "epoch": 50.80552359033371, "grad_norm": 1.2225608825683594, "learning_rate": 0.0009838895281933256, "loss": 0.5382, "step": 176600 }, { "epoch": 50.8084004602992, "grad_norm": 1.7451894283294678, "learning_rate": 0.0009838319907940162, "loss": 0.5437, "step": 176610 }, { "epoch": 50.81127733026467, "grad_norm": 1.0044662952423096, "learning_rate": 0.0009837744533947065, "loss": 0.5389, "step": 176620 }, { "epoch": 50.81415420023015, "grad_norm": 0.975250244140625, "learning_rate": 0.000983716915995397, "loss": 0.5372, "step": 176630 }, { "epoch": 50.817031070195625, "grad_norm": 1.006409764289856, "learning_rate": 0.0009836593785960874, "loss": 0.4848, "step": 176640 }, { "epoch": 50.8199079401611, "grad_norm": 0.7300835847854614, "learning_rate": 0.000983601841196778, "loss": 0.4969, "step": 176650 }, { "epoch": 50.822784810126585, "grad_norm": 1.7872363328933716, "learning_rate": 0.0009835443037974683, "loss": 0.4349, "step": 176660 }, { "epoch": 50.82566168009206, "grad_norm": 2.0936906337738037, "learning_rate": 0.000983486766398159, "loss": 0.7505, "step": 176670 }, { "epoch": 50.82853855005754, "grad_norm": 0.9054937362670898, "learning_rate": 0.0009834292289988493, "loss": 0.5798, "step": 176680 }, { "epoch": 50.83141542002301, "grad_norm": 1.0231566429138184, "learning_rate": 0.0009833716915995398, "loss": 0.4516, "step": 176690 }, { "epoch": 50.83429228998849, "grad_norm": 0.48137402534484863, "learning_rate": 0.0009833141542002302, "loss": 0.6001, "step": 176700 }, { "epoch": 50.83716915995397, "grad_norm": 0.9167951345443726, "learning_rate": 0.0009832566168009205, "loss": 0.5729, "step": 176710 }, { "epoch": 50.84004602991945, "grad_norm": 0.7904790639877319, "learning_rate": 0.000983199079401611, "loss": 0.4686, "step": 176720 }, { "epoch": 50.842922899884925, "grad_norm": 1.1161680221557617, "learning_rate": 0.0009831415420023016, "loss": 0.5366, "step": 176730 }, { "epoch": 50.8457997698504, "grad_norm": 1.2538245916366577, "learning_rate": 0.000983084004602992, "loss": 0.6571, "step": 176740 }, { "epoch": 50.84867663981588, "grad_norm": 0.8564426898956299, "learning_rate": 0.0009830264672036823, "loss": 0.6435, "step": 176750 }, { "epoch": 50.85155350978136, "grad_norm": 1.112790822982788, "learning_rate": 0.000982968929804373, "loss": 0.6648, "step": 176760 }, { "epoch": 50.85443037974684, "grad_norm": 2.5875043869018555, "learning_rate": 0.0009829113924050632, "loss": 0.6216, "step": 176770 }, { "epoch": 50.85730724971231, "grad_norm": 1.2902379035949707, "learning_rate": 0.0009828538550057538, "loss": 0.5533, "step": 176780 }, { "epoch": 50.86018411967779, "grad_norm": 1.174667239189148, "learning_rate": 0.0009827963176064442, "loss": 0.5097, "step": 176790 }, { "epoch": 50.863060989643266, "grad_norm": 1.7631627321243286, "learning_rate": 0.0009827387802071347, "loss": 0.4899, "step": 176800 }, { "epoch": 50.86593785960875, "grad_norm": 1.6184147596359253, "learning_rate": 0.000982681242807825, "loss": 0.5243, "step": 176810 }, { "epoch": 50.868814729574225, "grad_norm": 1.3917121887207031, "learning_rate": 0.0009826237054085156, "loss": 0.6234, "step": 176820 }, { "epoch": 50.8716915995397, "grad_norm": 2.183048725128174, "learning_rate": 0.000982566168009206, "loss": 0.6417, "step": 176830 }, { "epoch": 50.87456846950518, "grad_norm": 1.4843367338180542, "learning_rate": 0.0009825086306098965, "loss": 0.4552, "step": 176840 }, { "epoch": 50.877445339470654, "grad_norm": 0.999593198299408, "learning_rate": 0.0009824510932105869, "loss": 0.5928, "step": 176850 }, { "epoch": 50.88032220943613, "grad_norm": 2.0818989276885986, "learning_rate": 0.0009823935558112772, "loss": 0.5214, "step": 176860 }, { "epoch": 50.883199079401614, "grad_norm": 1.5828592777252197, "learning_rate": 0.0009823360184119678, "loss": 0.7069, "step": 176870 }, { "epoch": 50.88607594936709, "grad_norm": 1.7352426052093506, "learning_rate": 0.0009822784810126584, "loss": 0.713, "step": 176880 }, { "epoch": 50.888952819332566, "grad_norm": 1.2145017385482788, "learning_rate": 0.0009822209436133487, "loss": 0.5305, "step": 176890 }, { "epoch": 50.89182968929804, "grad_norm": 0.764295756816864, "learning_rate": 0.000982163406214039, "loss": 0.4081, "step": 176900 }, { "epoch": 50.89470655926352, "grad_norm": 1.0732654333114624, "learning_rate": 0.0009821058688147296, "loss": 0.5754, "step": 176910 }, { "epoch": 50.897583429229, "grad_norm": 1.561767339706421, "learning_rate": 0.00098204833141542, "loss": 0.6541, "step": 176920 }, { "epoch": 50.90046029919448, "grad_norm": 1.3557692766189575, "learning_rate": 0.0009819907940161105, "loss": 0.6352, "step": 176930 }, { "epoch": 50.903337169159954, "grad_norm": 1.1119272708892822, "learning_rate": 0.000981933256616801, "loss": 0.5862, "step": 176940 }, { "epoch": 50.90621403912543, "grad_norm": 1.7100415229797363, "learning_rate": 0.0009818757192174914, "loss": 0.5749, "step": 176950 }, { "epoch": 50.90909090909091, "grad_norm": 1.2020057439804077, "learning_rate": 0.0009818181818181818, "loss": 0.5023, "step": 176960 }, { "epoch": 50.91196777905639, "grad_norm": 1.3793339729309082, "learning_rate": 0.0009817606444188724, "loss": 0.4883, "step": 176970 }, { "epoch": 50.914844649021866, "grad_norm": 1.2776461839675903, "learning_rate": 0.0009817031070195627, "loss": 0.6547, "step": 176980 }, { "epoch": 50.91772151898734, "grad_norm": 1.600369930267334, "learning_rate": 0.0009816455696202533, "loss": 0.4766, "step": 176990 }, { "epoch": 50.92059838895282, "grad_norm": 1.8052676916122437, "learning_rate": 0.0009815880322209436, "loss": 0.5015, "step": 177000 }, { "epoch": 50.923475258918295, "grad_norm": 1.539408564567566, "learning_rate": 0.000981530494821634, "loss": 0.5256, "step": 177010 }, { "epoch": 50.92635212888378, "grad_norm": 1.173435091972351, "learning_rate": 0.0009814729574223245, "loss": 0.5157, "step": 177020 }, { "epoch": 50.929228998849254, "grad_norm": 1.7152949571609497, "learning_rate": 0.000981415420023015, "loss": 0.5489, "step": 177030 }, { "epoch": 50.93210586881473, "grad_norm": 0.9861384630203247, "learning_rate": 0.0009813578826237054, "loss": 0.5333, "step": 177040 }, { "epoch": 50.93498273878021, "grad_norm": 0.9573869109153748, "learning_rate": 0.000981300345224396, "loss": 0.5523, "step": 177050 }, { "epoch": 50.93785960874568, "grad_norm": 0.8983041048049927, "learning_rate": 0.0009812428078250863, "loss": 0.5103, "step": 177060 }, { "epoch": 50.94073647871116, "grad_norm": 1.0798299312591553, "learning_rate": 0.0009811852704257767, "loss": 0.5641, "step": 177070 }, { "epoch": 50.94361334867664, "grad_norm": 0.6826785206794739, "learning_rate": 0.0009811277330264673, "loss": 0.6034, "step": 177080 }, { "epoch": 50.94649021864212, "grad_norm": 1.298649787902832, "learning_rate": 0.0009810701956271578, "loss": 0.7221, "step": 177090 }, { "epoch": 50.949367088607595, "grad_norm": 0.9480109810829163, "learning_rate": 0.0009810126582278482, "loss": 0.4888, "step": 177100 }, { "epoch": 50.95224395857307, "grad_norm": 0.814931333065033, "learning_rate": 0.0009809551208285385, "loss": 0.5703, "step": 177110 }, { "epoch": 50.95512082853855, "grad_norm": 1.3059401512145996, "learning_rate": 0.0009808975834292289, "loss": 0.4565, "step": 177120 }, { "epoch": 50.95799769850403, "grad_norm": 1.5487149953842163, "learning_rate": 0.0009808400460299194, "loss": 0.6616, "step": 177130 }, { "epoch": 50.96087456846951, "grad_norm": 1.1564664840698242, "learning_rate": 0.00098078250863061, "loss": 0.4699, "step": 177140 }, { "epoch": 50.96375143843498, "grad_norm": 1.3336838483810425, "learning_rate": 0.0009807249712313003, "loss": 0.756, "step": 177150 }, { "epoch": 50.96662830840046, "grad_norm": 3.7335963249206543, "learning_rate": 0.000980667433831991, "loss": 0.4784, "step": 177160 }, { "epoch": 50.969505178365935, "grad_norm": 2.272023916244507, "learning_rate": 0.0009806098964326812, "loss": 0.442, "step": 177170 }, { "epoch": 50.97238204833142, "grad_norm": 0.9502289891242981, "learning_rate": 0.0009805523590333716, "loss": 0.5298, "step": 177180 }, { "epoch": 50.975258918296895, "grad_norm": 1.053249478340149, "learning_rate": 0.0009804948216340622, "loss": 0.4538, "step": 177190 }, { "epoch": 50.97813578826237, "grad_norm": 1.0794669389724731, "learning_rate": 0.0009804372842347527, "loss": 0.5146, "step": 177200 }, { "epoch": 50.98101265822785, "grad_norm": 1.714174747467041, "learning_rate": 0.000980379746835443, "loss": 0.4472, "step": 177210 }, { "epoch": 50.98388952819332, "grad_norm": 2.451793909072876, "learning_rate": 0.0009803222094361334, "loss": 0.7328, "step": 177220 }, { "epoch": 50.98676639815881, "grad_norm": 0.9755232930183411, "learning_rate": 0.000980264672036824, "loss": 0.4652, "step": 177230 }, { "epoch": 50.98964326812428, "grad_norm": 0.789792001247406, "learning_rate": 0.0009802071346375143, "loss": 0.5384, "step": 177240 }, { "epoch": 50.99252013808976, "grad_norm": 0.6614104509353638, "learning_rate": 0.000980149597238205, "loss": 0.4596, "step": 177250 }, { "epoch": 50.995397008055235, "grad_norm": 0.7975034117698669, "learning_rate": 0.0009800920598388952, "loss": 0.5763, "step": 177260 }, { "epoch": 50.99827387802071, "grad_norm": 0.7321025729179382, "learning_rate": 0.0009800345224395858, "loss": 0.5265, "step": 177270 }, { "epoch": 51.00115074798619, "grad_norm": 1.733339786529541, "learning_rate": 0.0009799769850402762, "loss": 0.6883, "step": 177280 }, { "epoch": 51.00402761795167, "grad_norm": 1.4662744998931885, "learning_rate": 0.0009799194476409667, "loss": 0.4771, "step": 177290 }, { "epoch": 51.00690448791715, "grad_norm": 0.9193280339241028, "learning_rate": 0.000979861910241657, "loss": 0.4926, "step": 177300 }, { "epoch": 51.00978135788262, "grad_norm": 1.2494016885757446, "learning_rate": 0.0009798043728423476, "loss": 0.4524, "step": 177310 }, { "epoch": 51.0126582278481, "grad_norm": 0.5066472887992859, "learning_rate": 0.000979746835443038, "loss": 0.4044, "step": 177320 }, { "epoch": 51.015535097813576, "grad_norm": 0.7566288113594055, "learning_rate": 0.0009796892980437283, "loss": 0.5119, "step": 177330 }, { "epoch": 51.01841196777906, "grad_norm": 1.3644843101501465, "learning_rate": 0.0009796317606444189, "loss": 0.4949, "step": 177340 }, { "epoch": 51.021288837744535, "grad_norm": 1.2176613807678223, "learning_rate": 0.0009795742232451094, "loss": 0.4369, "step": 177350 }, { "epoch": 51.02416570771001, "grad_norm": 1.72597074508667, "learning_rate": 0.0009795166858457998, "loss": 0.536, "step": 177360 }, { "epoch": 51.02704257767549, "grad_norm": 3.2285921573638916, "learning_rate": 0.0009794591484464901, "loss": 0.5233, "step": 177370 }, { "epoch": 51.029919447640964, "grad_norm": 0.9159071445465088, "learning_rate": 0.0009794016110471807, "loss": 0.4835, "step": 177380 }, { "epoch": 51.03279631760645, "grad_norm": 5.343815326690674, "learning_rate": 0.000979344073647871, "loss": 0.529, "step": 177390 }, { "epoch": 51.03567318757192, "grad_norm": 1.1725480556488037, "learning_rate": 0.0009792865362485616, "loss": 0.5147, "step": 177400 }, { "epoch": 51.0385500575374, "grad_norm": 1.084173560142517, "learning_rate": 0.000979228998849252, "loss": 0.628, "step": 177410 }, { "epoch": 51.041426927502876, "grad_norm": 0.7747770547866821, "learning_rate": 0.0009791714614499425, "loss": 0.3797, "step": 177420 }, { "epoch": 51.04430379746835, "grad_norm": 0.7902054190635681, "learning_rate": 0.0009791139240506329, "loss": 0.4149, "step": 177430 }, { "epoch": 51.047180667433835, "grad_norm": 0.8270087242126465, "learning_rate": 0.0009790563866513234, "loss": 0.4838, "step": 177440 }, { "epoch": 51.05005753739931, "grad_norm": 1.2941402196884155, "learning_rate": 0.0009789988492520138, "loss": 0.4998, "step": 177450 }, { "epoch": 51.05293440736479, "grad_norm": 1.2318891286849976, "learning_rate": 0.0009789413118527043, "loss": 0.4822, "step": 177460 }, { "epoch": 51.055811277330264, "grad_norm": 1.9693245887756348, "learning_rate": 0.0009788837744533947, "loss": 0.4581, "step": 177470 }, { "epoch": 51.05868814729574, "grad_norm": 1.7124643325805664, "learning_rate": 0.000978826237054085, "loss": 0.4979, "step": 177480 }, { "epoch": 51.061565017261216, "grad_norm": 2.269296646118164, "learning_rate": 0.0009787686996547756, "loss": 0.4663, "step": 177490 }, { "epoch": 51.0644418872267, "grad_norm": 1.0496338605880737, "learning_rate": 0.0009787111622554662, "loss": 0.4994, "step": 177500 }, { "epoch": 51.067318757192176, "grad_norm": 1.0555583238601685, "learning_rate": 0.0009786536248561565, "loss": 0.5256, "step": 177510 }, { "epoch": 51.07019562715765, "grad_norm": 0.9183987379074097, "learning_rate": 0.0009785960874568469, "loss": 0.4605, "step": 177520 }, { "epoch": 51.07307249712313, "grad_norm": 1.707442283630371, "learning_rate": 0.0009785385500575374, "loss": 0.487, "step": 177530 }, { "epoch": 51.075949367088604, "grad_norm": 0.6624066829681396, "learning_rate": 0.0009784810126582278, "loss": 0.4595, "step": 177540 }, { "epoch": 51.07882623705409, "grad_norm": 1.1067719459533691, "learning_rate": 0.0009784234752589183, "loss": 0.4791, "step": 177550 }, { "epoch": 51.081703107019564, "grad_norm": 1.9251424074172974, "learning_rate": 0.000978365937859609, "loss": 0.6658, "step": 177560 }, { "epoch": 51.08457997698504, "grad_norm": 1.199723720550537, "learning_rate": 0.0009783084004602993, "loss": 0.4647, "step": 177570 }, { "epoch": 51.087456846950516, "grad_norm": 1.6716713905334473, "learning_rate": 0.0009782508630609896, "loss": 0.5157, "step": 177580 }, { "epoch": 51.09033371691599, "grad_norm": 0.7311263680458069, "learning_rate": 0.0009781933256616802, "loss": 0.5593, "step": 177590 }, { "epoch": 51.093210586881476, "grad_norm": 0.9168660640716553, "learning_rate": 0.0009781357882623705, "loss": 0.476, "step": 177600 }, { "epoch": 51.09608745684695, "grad_norm": 1.68385648727417, "learning_rate": 0.000978078250863061, "loss": 0.5292, "step": 177610 }, { "epoch": 51.09896432681243, "grad_norm": 0.9180042743682861, "learning_rate": 0.0009780207134637514, "loss": 0.4985, "step": 177620 }, { "epoch": 51.101841196777904, "grad_norm": 1.3294451236724854, "learning_rate": 0.000977963176064442, "loss": 0.5288, "step": 177630 }, { "epoch": 51.10471806674338, "grad_norm": 1.7306870222091675, "learning_rate": 0.0009779056386651323, "loss": 0.518, "step": 177640 }, { "epoch": 51.107594936708864, "grad_norm": 0.7787103056907654, "learning_rate": 0.000977848101265823, "loss": 0.4637, "step": 177650 }, { "epoch": 51.11047180667434, "grad_norm": 0.9215006828308105, "learning_rate": 0.0009777905638665132, "loss": 0.4359, "step": 177660 }, { "epoch": 51.113348676639816, "grad_norm": 1.407060980796814, "learning_rate": 0.0009777330264672038, "loss": 0.4927, "step": 177670 }, { "epoch": 51.11622554660529, "grad_norm": 1.3632311820983887, "learning_rate": 0.0009776754890678942, "loss": 0.503, "step": 177680 }, { "epoch": 51.11910241657077, "grad_norm": 1.4998761415481567, "learning_rate": 0.0009776179516685845, "loss": 0.5337, "step": 177690 }, { "epoch": 51.121979286536245, "grad_norm": 1.494166374206543, "learning_rate": 0.000977560414269275, "loss": 0.4186, "step": 177700 }, { "epoch": 51.12485615650173, "grad_norm": 2.9055254459381104, "learning_rate": 0.0009775028768699656, "loss": 0.6044, "step": 177710 }, { "epoch": 51.127733026467205, "grad_norm": 1.4872578382492065, "learning_rate": 0.000977445339470656, "loss": 0.5467, "step": 177720 }, { "epoch": 51.13060989643268, "grad_norm": 1.66365385055542, "learning_rate": 0.0009773878020713463, "loss": 0.5593, "step": 177730 }, { "epoch": 51.13348676639816, "grad_norm": 0.8678449392318726, "learning_rate": 0.0009773302646720369, "loss": 0.6277, "step": 177740 }, { "epoch": 51.13636363636363, "grad_norm": 1.8415958881378174, "learning_rate": 0.0009772727272727272, "loss": 0.4462, "step": 177750 }, { "epoch": 51.139240506329116, "grad_norm": 1.3628968000411987, "learning_rate": 0.0009772151898734178, "loss": 0.4125, "step": 177760 }, { "epoch": 51.14211737629459, "grad_norm": 1.2986565828323364, "learning_rate": 0.0009771576524741081, "loss": 0.4337, "step": 177770 }, { "epoch": 51.14499424626007, "grad_norm": 1.1977970600128174, "learning_rate": 0.0009771001150747987, "loss": 0.4663, "step": 177780 }, { "epoch": 51.147871116225545, "grad_norm": 1.2451943159103394, "learning_rate": 0.000977042577675489, "loss": 0.4381, "step": 177790 }, { "epoch": 51.15074798619102, "grad_norm": 1.8074485063552856, "learning_rate": 0.0009769850402761796, "loss": 0.6604, "step": 177800 }, { "epoch": 51.153624856156505, "grad_norm": 1.1828864812850952, "learning_rate": 0.00097692750287687, "loss": 0.5622, "step": 177810 }, { "epoch": 51.15650172612198, "grad_norm": 1.0736980438232422, "learning_rate": 0.0009768699654775605, "loss": 0.5841, "step": 177820 }, { "epoch": 51.15937859608746, "grad_norm": 1.5105379819869995, "learning_rate": 0.0009768124280782509, "loss": 0.5355, "step": 177830 }, { "epoch": 51.16225546605293, "grad_norm": 1.3876404762268066, "learning_rate": 0.0009767548906789412, "loss": 0.6204, "step": 177840 }, { "epoch": 51.16513233601841, "grad_norm": 2.32991099357605, "learning_rate": 0.0009766973532796318, "loss": 0.4613, "step": 177850 }, { "epoch": 51.16800920598389, "grad_norm": 0.8873666524887085, "learning_rate": 0.0009766398158803224, "loss": 0.5133, "step": 177860 }, { "epoch": 51.17088607594937, "grad_norm": 1.1657543182373047, "learning_rate": 0.0009765822784810127, "loss": 0.6466, "step": 177870 }, { "epoch": 51.173762945914845, "grad_norm": 1.4735352993011475, "learning_rate": 0.000976524741081703, "loss": 0.4665, "step": 177880 }, { "epoch": 51.17663981588032, "grad_norm": 1.9306305646896362, "learning_rate": 0.0009764672036823936, "loss": 0.4472, "step": 177890 }, { "epoch": 51.1795166858458, "grad_norm": 1.2117911577224731, "learning_rate": 0.0009764096662830841, "loss": 0.6306, "step": 177900 }, { "epoch": 51.18239355581128, "grad_norm": 1.5813955068588257, "learning_rate": 0.0009763521288837744, "loss": 0.4775, "step": 177910 }, { "epoch": 51.18527042577676, "grad_norm": 1.1789872646331787, "learning_rate": 0.000976294591484465, "loss": 0.531, "step": 177920 }, { "epoch": 51.18814729574223, "grad_norm": 1.8223903179168701, "learning_rate": 0.0009762370540851554, "loss": 0.6085, "step": 177930 }, { "epoch": 51.19102416570771, "grad_norm": 1.5242156982421875, "learning_rate": 0.0009761795166858458, "loss": 0.5395, "step": 177940 }, { "epoch": 51.193901035673186, "grad_norm": 1.1703293323516846, "learning_rate": 0.0009761219792865362, "loss": 0.4849, "step": 177950 }, { "epoch": 51.19677790563866, "grad_norm": 0.8533491492271423, "learning_rate": 0.0009760644418872268, "loss": 0.531, "step": 177960 }, { "epoch": 51.199654775604145, "grad_norm": 0.9710825681686401, "learning_rate": 0.0009760069044879171, "loss": 0.5936, "step": 177970 }, { "epoch": 51.20253164556962, "grad_norm": 1.3826202154159546, "learning_rate": 0.0009759493670886076, "loss": 0.5169, "step": 177980 }, { "epoch": 51.2054085155351, "grad_norm": 0.9092939496040344, "learning_rate": 0.0009758918296892981, "loss": 0.5634, "step": 177990 }, { "epoch": 51.208285385500574, "grad_norm": 1.1254534721374512, "learning_rate": 0.0009758342922899885, "loss": 0.4538, "step": 178000 }, { "epoch": 51.21116225546605, "grad_norm": 1.5881084203720093, "learning_rate": 0.000975776754890679, "loss": 0.5494, "step": 178010 }, { "epoch": 51.21403912543153, "grad_norm": 0.8125277757644653, "learning_rate": 0.0009757192174913694, "loss": 0.4105, "step": 178020 }, { "epoch": 51.21691599539701, "grad_norm": 1.1781212091445923, "learning_rate": 0.0009756616800920599, "loss": 0.5415, "step": 178030 }, { "epoch": 51.219792865362486, "grad_norm": 1.0031200647354126, "learning_rate": 0.0009756041426927503, "loss": 0.5247, "step": 178040 }, { "epoch": 51.22266973532796, "grad_norm": 0.7717227339744568, "learning_rate": 0.0009755466052934408, "loss": 0.539, "step": 178050 }, { "epoch": 51.22554660529344, "grad_norm": 1.9510210752487183, "learning_rate": 0.0009754890678941311, "loss": 0.4563, "step": 178060 }, { "epoch": 51.22842347525892, "grad_norm": 1.5216312408447266, "learning_rate": 0.0009754315304948217, "loss": 0.4846, "step": 178070 }, { "epoch": 51.2313003452244, "grad_norm": 1.109268069267273, "learning_rate": 0.0009753739930955122, "loss": 0.4615, "step": 178080 }, { "epoch": 51.234177215189874, "grad_norm": 1.2790359258651733, "learning_rate": 0.0009753164556962025, "loss": 0.5652, "step": 178090 }, { "epoch": 51.23705408515535, "grad_norm": 0.6835514903068542, "learning_rate": 0.000975258918296893, "loss": 0.3855, "step": 178100 }, { "epoch": 51.239930955120826, "grad_norm": 1.3656402826309204, "learning_rate": 0.0009752013808975835, "loss": 0.4425, "step": 178110 }, { "epoch": 51.24280782508631, "grad_norm": 1.5644460916519165, "learning_rate": 0.0009751438434982739, "loss": 0.5521, "step": 178120 }, { "epoch": 51.245684695051786, "grad_norm": 0.7699193358421326, "learning_rate": 0.0009750863060989643, "loss": 0.5748, "step": 178130 }, { "epoch": 51.24856156501726, "grad_norm": 1.5757654905319214, "learning_rate": 0.0009750287686996549, "loss": 0.5292, "step": 178140 }, { "epoch": 51.25143843498274, "grad_norm": 1.543777346611023, "learning_rate": 0.0009749712313003452, "loss": 0.4616, "step": 178150 }, { "epoch": 51.254315304948214, "grad_norm": 0.8015722036361694, "learning_rate": 0.0009749136939010357, "loss": 0.6028, "step": 178160 }, { "epoch": 51.25719217491369, "grad_norm": 1.8339953422546387, "learning_rate": 0.0009748561565017261, "loss": 0.51, "step": 178170 }, { "epoch": 51.260069044879174, "grad_norm": 1.1049001216888428, "learning_rate": 0.0009747986191024166, "loss": 0.5699, "step": 178180 }, { "epoch": 51.26294591484465, "grad_norm": 0.6976597905158997, "learning_rate": 0.0009747410817031071, "loss": 0.6688, "step": 178190 }, { "epoch": 51.265822784810126, "grad_norm": 0.947065532207489, "learning_rate": 0.0009746835443037975, "loss": 0.4468, "step": 178200 }, { "epoch": 51.2686996547756, "grad_norm": 1.0510693788528442, "learning_rate": 0.0009746260069044879, "loss": 0.4858, "step": 178210 }, { "epoch": 51.27157652474108, "grad_norm": 0.852124810218811, "learning_rate": 0.0009745684695051784, "loss": 0.4087, "step": 178220 }, { "epoch": 51.27445339470656, "grad_norm": 1.331774115562439, "learning_rate": 0.0009745109321058689, "loss": 0.5052, "step": 178230 }, { "epoch": 51.27733026467204, "grad_norm": 0.9982602596282959, "learning_rate": 0.0009744533947065592, "loss": 0.51, "step": 178240 }, { "epoch": 51.280207134637514, "grad_norm": 1.8169156312942505, "learning_rate": 0.0009743958573072498, "loss": 0.5338, "step": 178250 }, { "epoch": 51.28308400460299, "grad_norm": 1.7409305572509766, "learning_rate": 0.0009743383199079402, "loss": 0.6428, "step": 178260 }, { "epoch": 51.28596087456847, "grad_norm": 0.8736431002616882, "learning_rate": 0.0009742807825086306, "loss": 0.6456, "step": 178270 }, { "epoch": 51.28883774453395, "grad_norm": 2.177621841430664, "learning_rate": 0.000974223245109321, "loss": 0.4458, "step": 178280 }, { "epoch": 51.291714614499426, "grad_norm": 1.5356826782226562, "learning_rate": 0.0009741657077100116, "loss": 0.4854, "step": 178290 }, { "epoch": 51.2945914844649, "grad_norm": 1.5902637243270874, "learning_rate": 0.000974108170310702, "loss": 0.5745, "step": 178300 }, { "epoch": 51.29746835443038, "grad_norm": 1.6884245872497559, "learning_rate": 0.0009740506329113924, "loss": 0.6484, "step": 178310 }, { "epoch": 51.300345224395855, "grad_norm": 1.0688433647155762, "learning_rate": 0.000973993095512083, "loss": 0.5591, "step": 178320 }, { "epoch": 51.30322209436134, "grad_norm": 1.3900309801101685, "learning_rate": 0.0009739355581127733, "loss": 0.5605, "step": 178330 }, { "epoch": 51.306098964326814, "grad_norm": 1.428593397140503, "learning_rate": 0.0009738780207134638, "loss": 0.4206, "step": 178340 }, { "epoch": 51.30897583429229, "grad_norm": 2.205221176147461, "learning_rate": 0.0009738204833141541, "loss": 0.6026, "step": 178350 }, { "epoch": 51.31185270425777, "grad_norm": 1.8639137744903564, "learning_rate": 0.0009737629459148447, "loss": 0.6319, "step": 178360 }, { "epoch": 51.31472957422324, "grad_norm": 1.9830102920532227, "learning_rate": 0.0009737054085155351, "loss": 0.433, "step": 178370 }, { "epoch": 51.31760644418872, "grad_norm": 1.2705228328704834, "learning_rate": 0.0009736478711162255, "loss": 0.4517, "step": 178380 }, { "epoch": 51.3204833141542, "grad_norm": 1.494679570198059, "learning_rate": 0.000973590333716916, "loss": 0.5574, "step": 178390 }, { "epoch": 51.32336018411968, "grad_norm": 1.4722340106964111, "learning_rate": 0.0009735327963176065, "loss": 0.5876, "step": 178400 }, { "epoch": 51.326237054085155, "grad_norm": 1.5496575832366943, "learning_rate": 0.0009734752589182969, "loss": 0.4869, "step": 178410 }, { "epoch": 51.32911392405063, "grad_norm": 1.19330632686615, "learning_rate": 0.0009734177215189873, "loss": 0.4966, "step": 178420 }, { "epoch": 51.33199079401611, "grad_norm": 1.5068899393081665, "learning_rate": 0.0009733601841196779, "loss": 0.6867, "step": 178430 }, { "epoch": 51.33486766398159, "grad_norm": 0.7694215774536133, "learning_rate": 0.0009733026467203682, "loss": 0.4953, "step": 178440 }, { "epoch": 51.33774453394707, "grad_norm": 1.9295247793197632, "learning_rate": 0.0009732451093210587, "loss": 0.6641, "step": 178450 }, { "epoch": 51.34062140391254, "grad_norm": 1.312608242034912, "learning_rate": 0.0009731875719217491, "loss": 0.4798, "step": 178460 }, { "epoch": 51.34349827387802, "grad_norm": 1.1698158979415894, "learning_rate": 0.0009731300345224396, "loss": 0.4534, "step": 178470 }, { "epoch": 51.346375143843495, "grad_norm": 1.2729610204696655, "learning_rate": 0.00097307249712313, "loss": 0.4166, "step": 178480 }, { "epoch": 51.34925201380898, "grad_norm": 1.3329808712005615, "learning_rate": 0.0009730149597238205, "loss": 0.5602, "step": 178490 }, { "epoch": 51.352128883774455, "grad_norm": 3.5201711654663086, "learning_rate": 0.0009729574223245109, "loss": 0.6096, "step": 178500 }, { "epoch": 51.35500575373993, "grad_norm": 0.8133824467658997, "learning_rate": 0.0009728998849252014, "loss": 0.5587, "step": 178510 }, { "epoch": 51.35788262370541, "grad_norm": 1.2188771963119507, "learning_rate": 0.0009728423475258919, "loss": 0.5962, "step": 178520 }, { "epoch": 51.360759493670884, "grad_norm": 0.9797092080116272, "learning_rate": 0.0009727848101265822, "loss": 0.4527, "step": 178530 }, { "epoch": 51.36363636363637, "grad_norm": 1.5628665685653687, "learning_rate": 0.0009727272727272728, "loss": 0.6855, "step": 178540 }, { "epoch": 51.36651323360184, "grad_norm": 0.6699994206428528, "learning_rate": 0.0009726697353279632, "loss": 0.4545, "step": 178550 }, { "epoch": 51.36939010356732, "grad_norm": 1.110947847366333, "learning_rate": 0.0009726121979286536, "loss": 0.4749, "step": 178560 }, { "epoch": 51.372266973532795, "grad_norm": 1.175775170326233, "learning_rate": 0.000972554660529344, "loss": 0.4158, "step": 178570 }, { "epoch": 51.37514384349827, "grad_norm": 1.6564031839370728, "learning_rate": 0.0009724971231300346, "loss": 0.4992, "step": 178580 }, { "epoch": 51.378020713463755, "grad_norm": 1.1500598192214966, "learning_rate": 0.000972439585730725, "loss": 0.5527, "step": 178590 }, { "epoch": 51.38089758342923, "grad_norm": 0.8364070653915405, "learning_rate": 0.0009723820483314154, "loss": 0.6069, "step": 178600 }, { "epoch": 51.38377445339471, "grad_norm": 1.0835071802139282, "learning_rate": 0.000972324510932106, "loss": 0.5243, "step": 178610 }, { "epoch": 51.386651323360184, "grad_norm": 0.8303788900375366, "learning_rate": 0.0009722669735327963, "loss": 0.6806, "step": 178620 }, { "epoch": 51.38952819332566, "grad_norm": 0.8605446219444275, "learning_rate": 0.0009722094361334868, "loss": 0.499, "step": 178630 }, { "epoch": 51.392405063291136, "grad_norm": 1.7891793251037598, "learning_rate": 0.0009721518987341772, "loss": 0.6344, "step": 178640 }, { "epoch": 51.39528193325662, "grad_norm": 1.2326513528823853, "learning_rate": 0.0009720943613348677, "loss": 0.5446, "step": 178650 }, { "epoch": 51.398158803222096, "grad_norm": 2.43579363822937, "learning_rate": 0.0009720368239355581, "loss": 0.4799, "step": 178660 }, { "epoch": 51.40103567318757, "grad_norm": 1.7047110795974731, "learning_rate": 0.0009719792865362486, "loss": 0.5682, "step": 178670 }, { "epoch": 51.40391254315305, "grad_norm": 0.9435938000679016, "learning_rate": 0.0009719217491369389, "loss": 0.4892, "step": 178680 }, { "epoch": 51.406789413118524, "grad_norm": 0.7811686992645264, "learning_rate": 0.0009718642117376295, "loss": 0.5898, "step": 178690 }, { "epoch": 51.40966628308401, "grad_norm": 1.620396375656128, "learning_rate": 0.00097180667433832, "loss": 0.6403, "step": 178700 }, { "epoch": 51.412543153049484, "grad_norm": 1.4759310483932495, "learning_rate": 0.0009717491369390103, "loss": 0.5673, "step": 178710 }, { "epoch": 51.41542002301496, "grad_norm": 1.1251341104507446, "learning_rate": 0.0009716915995397009, "loss": 0.444, "step": 178720 }, { "epoch": 51.418296892980436, "grad_norm": 1.4381576776504517, "learning_rate": 0.0009716340621403913, "loss": 0.5638, "step": 178730 }, { "epoch": 51.42117376294591, "grad_norm": 1.2900902032852173, "learning_rate": 0.0009715765247410817, "loss": 0.5931, "step": 178740 }, { "epoch": 51.424050632911396, "grad_norm": 2.141249656677246, "learning_rate": 0.0009715189873417721, "loss": 0.5504, "step": 178750 }, { "epoch": 51.42692750287687, "grad_norm": 0.9323436617851257, "learning_rate": 0.0009714614499424627, "loss": 0.4907, "step": 178760 }, { "epoch": 51.42980437284235, "grad_norm": 1.412123680114746, "learning_rate": 0.000971403912543153, "loss": 0.5054, "step": 178770 }, { "epoch": 51.432681242807824, "grad_norm": 1.0593972206115723, "learning_rate": 0.0009713463751438435, "loss": 0.5383, "step": 178780 }, { "epoch": 51.4355581127733, "grad_norm": 1.142189860343933, "learning_rate": 0.000971288837744534, "loss": 0.5727, "step": 178790 }, { "epoch": 51.438434982738784, "grad_norm": 0.8051527738571167, "learning_rate": 0.0009712313003452244, "loss": 0.5041, "step": 178800 }, { "epoch": 51.44131185270426, "grad_norm": 1.7830121517181396, "learning_rate": 0.0009711737629459149, "loss": 0.6039, "step": 178810 }, { "epoch": 51.444188722669736, "grad_norm": 1.0121335983276367, "learning_rate": 0.0009711162255466053, "loss": 0.4831, "step": 178820 }, { "epoch": 51.44706559263521, "grad_norm": 1.9182120561599731, "learning_rate": 0.0009710586881472958, "loss": 0.4711, "step": 178830 }, { "epoch": 51.44994246260069, "grad_norm": 1.6037194728851318, "learning_rate": 0.0009710011507479862, "loss": 0.5612, "step": 178840 }, { "epoch": 51.452819332566165, "grad_norm": 1.0844953060150146, "learning_rate": 0.0009709436133486767, "loss": 0.4769, "step": 178850 }, { "epoch": 51.45569620253165, "grad_norm": 1.3957023620605469, "learning_rate": 0.000970886075949367, "loss": 0.5959, "step": 178860 }, { "epoch": 51.458573072497124, "grad_norm": 1.3083361387252808, "learning_rate": 0.0009708285385500576, "loss": 0.5075, "step": 178870 }, { "epoch": 51.4614499424626, "grad_norm": 1.2838423252105713, "learning_rate": 0.0009707710011507481, "loss": 0.4697, "step": 178880 }, { "epoch": 51.46432681242808, "grad_norm": 1.565382480621338, "learning_rate": 0.0009707134637514384, "loss": 0.478, "step": 178890 }, { "epoch": 51.46720368239355, "grad_norm": 1.683656930923462, "learning_rate": 0.0009706559263521289, "loss": 0.5241, "step": 178900 }, { "epoch": 51.470080552359036, "grad_norm": 1.6568083763122559, "learning_rate": 0.0009705983889528194, "loss": 0.5403, "step": 178910 }, { "epoch": 51.47295742232451, "grad_norm": 1.3392260074615479, "learning_rate": 0.0009705408515535098, "loss": 0.4886, "step": 178920 }, { "epoch": 51.47583429228999, "grad_norm": 1.150024175643921, "learning_rate": 0.0009704833141542002, "loss": 0.4899, "step": 178930 }, { "epoch": 51.478711162255465, "grad_norm": 0.7138446569442749, "learning_rate": 0.0009704257767548908, "loss": 0.491, "step": 178940 }, { "epoch": 51.48158803222094, "grad_norm": 1.1276785135269165, "learning_rate": 0.0009703682393555811, "loss": 0.5323, "step": 178950 }, { "epoch": 51.484464902186424, "grad_norm": 0.6836503744125366, "learning_rate": 0.0009703107019562716, "loss": 0.4747, "step": 178960 }, { "epoch": 51.4873417721519, "grad_norm": 1.4107099771499634, "learning_rate": 0.000970253164556962, "loss": 0.6105, "step": 178970 }, { "epoch": 51.49021864211738, "grad_norm": 0.9947181940078735, "learning_rate": 0.0009701956271576525, "loss": 0.5555, "step": 178980 }, { "epoch": 51.49309551208285, "grad_norm": 2.0399787425994873, "learning_rate": 0.000970138089758343, "loss": 0.4266, "step": 178990 }, { "epoch": 51.49597238204833, "grad_norm": 0.9232620596885681, "learning_rate": 0.0009700805523590334, "loss": 0.5761, "step": 179000 }, { "epoch": 51.49884925201381, "grad_norm": 0.9118967652320862, "learning_rate": 0.0009700230149597239, "loss": 0.4868, "step": 179010 }, { "epoch": 51.50172612197929, "grad_norm": 1.4850271940231323, "learning_rate": 0.0009699654775604143, "loss": 0.7606, "step": 179020 }, { "epoch": 51.504602991944765, "grad_norm": 1.323368787765503, "learning_rate": 0.0009699079401611048, "loss": 0.4854, "step": 179030 }, { "epoch": 51.50747986191024, "grad_norm": 1.042976975440979, "learning_rate": 0.0009698504027617951, "loss": 0.4823, "step": 179040 }, { "epoch": 51.51035673187572, "grad_norm": 1.2086151838302612, "learning_rate": 0.0009697928653624857, "loss": 0.4745, "step": 179050 }, { "epoch": 51.51323360184119, "grad_norm": 1.0572834014892578, "learning_rate": 0.0009697353279631761, "loss": 0.5538, "step": 179060 }, { "epoch": 51.51611047180668, "grad_norm": 0.9385926127433777, "learning_rate": 0.0009696777905638665, "loss": 0.5886, "step": 179070 }, { "epoch": 51.51898734177215, "grad_norm": 1.3814617395401, "learning_rate": 0.000969620253164557, "loss": 0.6001, "step": 179080 }, { "epoch": 51.52186421173763, "grad_norm": 1.3123409748077393, "learning_rate": 0.0009695627157652475, "loss": 0.5505, "step": 179090 }, { "epoch": 51.524741081703105, "grad_norm": 1.1522070169448853, "learning_rate": 0.0009695051783659379, "loss": 0.5779, "step": 179100 }, { "epoch": 51.52761795166858, "grad_norm": 2.178245782852173, "learning_rate": 0.0009694476409666283, "loss": 0.5665, "step": 179110 }, { "epoch": 51.530494821634065, "grad_norm": 1.0398743152618408, "learning_rate": 0.0009693901035673189, "loss": 0.5181, "step": 179120 }, { "epoch": 51.53337169159954, "grad_norm": 1.0730143785476685, "learning_rate": 0.0009693325661680092, "loss": 0.5342, "step": 179130 }, { "epoch": 51.53624856156502, "grad_norm": 1.48733389377594, "learning_rate": 0.0009692750287686997, "loss": 0.6005, "step": 179140 }, { "epoch": 51.53912543153049, "grad_norm": 0.844497799873352, "learning_rate": 0.00096921749136939, "loss": 0.5565, "step": 179150 }, { "epoch": 51.54200230149597, "grad_norm": 1.1047492027282715, "learning_rate": 0.0009691599539700806, "loss": 0.6284, "step": 179160 }, { "epoch": 51.54487917146145, "grad_norm": 2.339205503463745, "learning_rate": 0.000969102416570771, "loss": 0.4957, "step": 179170 }, { "epoch": 51.54775604142693, "grad_norm": 1.0116658210754395, "learning_rate": 0.0009690448791714614, "loss": 0.6656, "step": 179180 }, { "epoch": 51.550632911392405, "grad_norm": 1.2710260152816772, "learning_rate": 0.0009689873417721519, "loss": 0.5688, "step": 179190 }, { "epoch": 51.55350978135788, "grad_norm": 1.0317137241363525, "learning_rate": 0.0009689298043728424, "loss": 0.5604, "step": 179200 }, { "epoch": 51.55638665132336, "grad_norm": 1.395564079284668, "learning_rate": 0.0009688722669735328, "loss": 0.6407, "step": 179210 }, { "epoch": 51.55926352128884, "grad_norm": 1.5589790344238281, "learning_rate": 0.0009688147295742232, "loss": 0.612, "step": 179220 }, { "epoch": 51.56214039125432, "grad_norm": 1.5701251029968262, "learning_rate": 0.0009687571921749138, "loss": 0.4761, "step": 179230 }, { "epoch": 51.56501726121979, "grad_norm": 0.8845697045326233, "learning_rate": 0.0009686996547756041, "loss": 0.5174, "step": 179240 }, { "epoch": 51.56789413118527, "grad_norm": 2.0337536334991455, "learning_rate": 0.0009686421173762946, "loss": 0.4536, "step": 179250 }, { "epoch": 51.570771001150746, "grad_norm": 0.971156656742096, "learning_rate": 0.000968584579976985, "loss": 0.4271, "step": 179260 }, { "epoch": 51.57364787111622, "grad_norm": 0.9764435887336731, "learning_rate": 0.0009685270425776755, "loss": 0.4988, "step": 179270 }, { "epoch": 51.576524741081705, "grad_norm": 1.6277923583984375, "learning_rate": 0.000968469505178366, "loss": 0.6553, "step": 179280 }, { "epoch": 51.57940161104718, "grad_norm": 1.8612686395645142, "learning_rate": 0.0009684119677790564, "loss": 0.5186, "step": 179290 }, { "epoch": 51.58227848101266, "grad_norm": 1.00570809841156, "learning_rate": 0.0009683544303797469, "loss": 0.5316, "step": 179300 }, { "epoch": 51.585155350978134, "grad_norm": 0.8828956484794617, "learning_rate": 0.0009682968929804373, "loss": 0.4755, "step": 179310 }, { "epoch": 51.58803222094361, "grad_norm": 0.8930321931838989, "learning_rate": 0.0009682393555811278, "loss": 0.6255, "step": 179320 }, { "epoch": 51.59090909090909, "grad_norm": 2.0779595375061035, "learning_rate": 0.0009681818181818181, "loss": 0.5807, "step": 179330 }, { "epoch": 51.59378596087457, "grad_norm": 2.1649222373962402, "learning_rate": 0.0009681242807825087, "loss": 0.7324, "step": 179340 }, { "epoch": 51.596662830840046, "grad_norm": 1.7784146070480347, "learning_rate": 0.0009680667433831991, "loss": 0.5206, "step": 179350 }, { "epoch": 51.59953970080552, "grad_norm": 1.4210342168807983, "learning_rate": 0.0009680092059838895, "loss": 0.5275, "step": 179360 }, { "epoch": 51.602416570771, "grad_norm": 1.39698326587677, "learning_rate": 0.0009679516685845799, "loss": 0.5763, "step": 179370 }, { "epoch": 51.60529344073648, "grad_norm": 0.9453150033950806, "learning_rate": 0.0009678941311852705, "loss": 0.5101, "step": 179380 }, { "epoch": 51.60817031070196, "grad_norm": 0.7479127645492554, "learning_rate": 0.0009678365937859609, "loss": 0.4941, "step": 179390 }, { "epoch": 51.611047180667434, "grad_norm": 3.8668735027313232, "learning_rate": 0.0009677790563866513, "loss": 0.5878, "step": 179400 }, { "epoch": 51.61392405063291, "grad_norm": 1.4826887845993042, "learning_rate": 0.0009677215189873419, "loss": 0.5474, "step": 179410 }, { "epoch": 51.616800920598386, "grad_norm": 0.8376256823539734, "learning_rate": 0.0009676639815880322, "loss": 0.4764, "step": 179420 }, { "epoch": 51.61967779056387, "grad_norm": 1.623571753501892, "learning_rate": 0.0009676064441887227, "loss": 0.6069, "step": 179430 }, { "epoch": 51.622554660529346, "grad_norm": 1.133453607559204, "learning_rate": 0.0009675489067894131, "loss": 0.5696, "step": 179440 }, { "epoch": 51.62543153049482, "grad_norm": 0.8259656429290771, "learning_rate": 0.0009674913693901036, "loss": 0.5538, "step": 179450 }, { "epoch": 51.6283084004603, "grad_norm": 1.2581558227539062, "learning_rate": 0.000967433831990794, "loss": 0.497, "step": 179460 }, { "epoch": 51.631185270425775, "grad_norm": 1.541190266609192, "learning_rate": 0.0009673762945914845, "loss": 0.6221, "step": 179470 }, { "epoch": 51.63406214039125, "grad_norm": 1.282347321510315, "learning_rate": 0.0009673187571921748, "loss": 0.4765, "step": 179480 }, { "epoch": 51.636939010356734, "grad_norm": 1.5749341249465942, "learning_rate": 0.0009672612197928654, "loss": 0.6648, "step": 179490 }, { "epoch": 51.63981588032221, "grad_norm": 1.353002905845642, "learning_rate": 0.0009672036823935559, "loss": 0.4641, "step": 179500 }, { "epoch": 51.64269275028769, "grad_norm": 1.7929624319076538, "learning_rate": 0.0009671461449942462, "loss": 0.574, "step": 179510 }, { "epoch": 51.64556962025316, "grad_norm": 1.0490108728408813, "learning_rate": 0.0009670886075949368, "loss": 0.6231, "step": 179520 }, { "epoch": 51.64844649021864, "grad_norm": 1.5680596828460693, "learning_rate": 0.0009670310701956272, "loss": 0.5237, "step": 179530 }, { "epoch": 51.65132336018412, "grad_norm": 1.5345443487167358, "learning_rate": 0.0009669735327963176, "loss": 0.5866, "step": 179540 }, { "epoch": 51.6542002301496, "grad_norm": 1.2753143310546875, "learning_rate": 0.000966915995397008, "loss": 0.4595, "step": 179550 }, { "epoch": 51.657077100115075, "grad_norm": 1.8415520191192627, "learning_rate": 0.0009668584579976986, "loss": 0.5531, "step": 179560 }, { "epoch": 51.65995397008055, "grad_norm": 2.399752378463745, "learning_rate": 0.0009668009205983889, "loss": 0.5275, "step": 179570 }, { "epoch": 51.66283084004603, "grad_norm": 1.3184531927108765, "learning_rate": 0.0009667433831990794, "loss": 0.5568, "step": 179580 }, { "epoch": 51.66570771001151, "grad_norm": 0.9533233642578125, "learning_rate": 0.00096668584579977, "loss": 0.5138, "step": 179590 }, { "epoch": 51.66858457997699, "grad_norm": 0.968832790851593, "learning_rate": 0.0009666283084004603, "loss": 0.5589, "step": 179600 }, { "epoch": 51.67146144994246, "grad_norm": 1.290629506111145, "learning_rate": 0.0009665707710011508, "loss": 0.5496, "step": 179610 }, { "epoch": 51.67433831990794, "grad_norm": 0.8707435727119446, "learning_rate": 0.0009665132336018412, "loss": 0.571, "step": 179620 }, { "epoch": 51.677215189873415, "grad_norm": 0.8885558247566223, "learning_rate": 0.0009664556962025317, "loss": 0.6171, "step": 179630 }, { "epoch": 51.6800920598389, "grad_norm": 1.0401662588119507, "learning_rate": 0.0009663981588032221, "loss": 0.4576, "step": 179640 }, { "epoch": 51.682968929804375, "grad_norm": 1.8336150646209717, "learning_rate": 0.0009663406214039126, "loss": 0.5523, "step": 179650 }, { "epoch": 51.68584579976985, "grad_norm": 0.700096070766449, "learning_rate": 0.0009662830840046029, "loss": 0.5272, "step": 179660 }, { "epoch": 51.68872266973533, "grad_norm": 1.3435806035995483, "learning_rate": 0.0009662255466052935, "loss": 0.4719, "step": 179670 }, { "epoch": 51.6915995397008, "grad_norm": 1.2914551496505737, "learning_rate": 0.000966168009205984, "loss": 0.5166, "step": 179680 }, { "epoch": 51.69447640966629, "grad_norm": 1.5144400596618652, "learning_rate": 0.0009661104718066743, "loss": 0.5885, "step": 179690 }, { "epoch": 51.69735327963176, "grad_norm": 1.6746618747711182, "learning_rate": 0.0009660529344073649, "loss": 0.4902, "step": 179700 }, { "epoch": 51.70023014959724, "grad_norm": 1.1472136974334717, "learning_rate": 0.0009659953970080553, "loss": 0.4506, "step": 179710 }, { "epoch": 51.703107019562715, "grad_norm": 1.9028959274291992, "learning_rate": 0.0009659378596087457, "loss": 0.5813, "step": 179720 }, { "epoch": 51.70598388952819, "grad_norm": 0.954899251461029, "learning_rate": 0.0009658803222094361, "loss": 0.5154, "step": 179730 }, { "epoch": 51.70886075949367, "grad_norm": 1.6493163108825684, "learning_rate": 0.0009658227848101267, "loss": 0.5074, "step": 179740 }, { "epoch": 51.71173762945915, "grad_norm": 0.940829336643219, "learning_rate": 0.000965765247410817, "loss": 0.5657, "step": 179750 }, { "epoch": 51.71461449942463, "grad_norm": 1.0111041069030762, "learning_rate": 0.0009657077100115075, "loss": 0.5448, "step": 179760 }, { "epoch": 51.7174913693901, "grad_norm": 1.1168431043624878, "learning_rate": 0.0009656501726121979, "loss": 0.4923, "step": 179770 }, { "epoch": 51.72036823935558, "grad_norm": 1.8696353435516357, "learning_rate": 0.0009655926352128884, "loss": 0.5903, "step": 179780 }, { "epoch": 51.723245109321056, "grad_norm": 1.4812999963760376, "learning_rate": 0.0009655350978135789, "loss": 0.5965, "step": 179790 }, { "epoch": 51.72612197928654, "grad_norm": 0.764228880405426, "learning_rate": 0.0009654775604142693, "loss": 0.4904, "step": 179800 }, { "epoch": 51.728998849252015, "grad_norm": 1.214853286743164, "learning_rate": 0.0009654200230149598, "loss": 0.6599, "step": 179810 }, { "epoch": 51.73187571921749, "grad_norm": 1.4164080619812012, "learning_rate": 0.0009653624856156502, "loss": 0.5776, "step": 179820 }, { "epoch": 51.73475258918297, "grad_norm": 1.4127395153045654, "learning_rate": 0.0009653049482163407, "loss": 0.6235, "step": 179830 }, { "epoch": 51.737629459148444, "grad_norm": 1.1910560131072998, "learning_rate": 0.000965247410817031, "loss": 0.4863, "step": 179840 }, { "epoch": 51.74050632911393, "grad_norm": 1.2087006568908691, "learning_rate": 0.0009651898734177216, "loss": 0.5529, "step": 179850 }, { "epoch": 51.7433831990794, "grad_norm": 1.202290654182434, "learning_rate": 0.000965132336018412, "loss": 0.5253, "step": 179860 }, { "epoch": 51.74626006904488, "grad_norm": 0.6580683588981628, "learning_rate": 0.0009650747986191024, "loss": 0.3689, "step": 179870 }, { "epoch": 51.749136939010356, "grad_norm": 1.8853486776351929, "learning_rate": 0.0009650172612197928, "loss": 0.5724, "step": 179880 }, { "epoch": 51.75201380897583, "grad_norm": 0.8498386144638062, "learning_rate": 0.0009649597238204834, "loss": 0.5453, "step": 179890 }, { "epoch": 51.754890678941315, "grad_norm": 1.4416241645812988, "learning_rate": 0.0009649021864211738, "loss": 0.5219, "step": 179900 }, { "epoch": 51.75776754890679, "grad_norm": 1.4525445699691772, "learning_rate": 0.0009648446490218642, "loss": 0.4475, "step": 179910 }, { "epoch": 51.76064441887227, "grad_norm": 1.181098461151123, "learning_rate": 0.0009647871116225548, "loss": 0.515, "step": 179920 }, { "epoch": 51.763521288837744, "grad_norm": 1.454392433166504, "learning_rate": 0.0009647295742232451, "loss": 0.5233, "step": 179930 }, { "epoch": 51.76639815880322, "grad_norm": 1.0692203044891357, "learning_rate": 0.0009646720368239356, "loss": 0.4962, "step": 179940 }, { "epoch": 51.769275028768696, "grad_norm": 1.0839688777923584, "learning_rate": 0.0009646144994246259, "loss": 0.4589, "step": 179950 }, { "epoch": 51.77215189873418, "grad_norm": 1.2286702394485474, "learning_rate": 0.0009645569620253165, "loss": 0.7595, "step": 179960 }, { "epoch": 51.775028768699656, "grad_norm": 1.0789839029312134, "learning_rate": 0.000964499424626007, "loss": 0.5024, "step": 179970 }, { "epoch": 51.77790563866513, "grad_norm": 1.300218105316162, "learning_rate": 0.0009644418872266973, "loss": 0.4738, "step": 179980 }, { "epoch": 51.78078250863061, "grad_norm": 0.7704766392707825, "learning_rate": 0.0009643843498273879, "loss": 0.475, "step": 179990 }, { "epoch": 51.783659378596084, "grad_norm": 0.8963378667831421, "learning_rate": 0.0009643268124280783, "loss": 0.5162, "step": 180000 }, { "epoch": 51.78653624856157, "grad_norm": 1.557018518447876, "learning_rate": 0.0009642692750287687, "loss": 0.5442, "step": 180010 }, { "epoch": 51.789413118527044, "grad_norm": 1.6852673292160034, "learning_rate": 0.0009642117376294591, "loss": 0.5736, "step": 180020 }, { "epoch": 51.79228998849252, "grad_norm": 1.777382731437683, "learning_rate": 0.0009641542002301497, "loss": 0.7045, "step": 180030 }, { "epoch": 51.795166858457996, "grad_norm": 2.154694080352783, "learning_rate": 0.00096409666283084, "loss": 0.4995, "step": 180040 }, { "epoch": 51.79804372842347, "grad_norm": 1.119785189628601, "learning_rate": 0.0009640391254315305, "loss": 0.4516, "step": 180050 }, { "epoch": 51.800920598388956, "grad_norm": 1.4007899761199951, "learning_rate": 0.0009639815880322209, "loss": 0.5276, "step": 180060 }, { "epoch": 51.80379746835443, "grad_norm": 1.3994951248168945, "learning_rate": 0.0009639240506329114, "loss": 0.614, "step": 180070 }, { "epoch": 51.80667433831991, "grad_norm": 1.7685329914093018, "learning_rate": 0.0009638665132336018, "loss": 0.5339, "step": 180080 }, { "epoch": 51.809551208285384, "grad_norm": 1.150375247001648, "learning_rate": 0.0009638089758342923, "loss": 0.5214, "step": 180090 }, { "epoch": 51.81242807825086, "grad_norm": 1.328446388244629, "learning_rate": 0.0009637514384349828, "loss": 0.7257, "step": 180100 }, { "epoch": 51.815304948216344, "grad_norm": 1.3322112560272217, "learning_rate": 0.0009636939010356732, "loss": 0.5797, "step": 180110 }, { "epoch": 51.81818181818182, "grad_norm": 0.8692741990089417, "learning_rate": 0.0009636363636363637, "loss": 0.4111, "step": 180120 }, { "epoch": 51.821058688147296, "grad_norm": 1.0226850509643555, "learning_rate": 0.000963578826237054, "loss": 0.4348, "step": 180130 }, { "epoch": 51.82393555811277, "grad_norm": 1.4536058902740479, "learning_rate": 0.0009635212888377446, "loss": 0.5289, "step": 180140 }, { "epoch": 51.82681242807825, "grad_norm": 0.9871499538421631, "learning_rate": 0.000963463751438435, "loss": 0.589, "step": 180150 }, { "epoch": 51.829689298043725, "grad_norm": 1.1568503379821777, "learning_rate": 0.0009634062140391254, "loss": 0.598, "step": 180160 }, { "epoch": 51.83256616800921, "grad_norm": 1.7436871528625488, "learning_rate": 0.0009633486766398158, "loss": 0.4948, "step": 180170 }, { "epoch": 51.835443037974684, "grad_norm": 1.7314443588256836, "learning_rate": 0.0009632911392405064, "loss": 0.5712, "step": 180180 }, { "epoch": 51.83831990794016, "grad_norm": 2.3603010177612305, "learning_rate": 0.0009632336018411968, "loss": 0.6714, "step": 180190 }, { "epoch": 51.84119677790564, "grad_norm": 1.0398362874984741, "learning_rate": 0.0009631760644418872, "loss": 0.6605, "step": 180200 }, { "epoch": 51.84407364787111, "grad_norm": 1.0823664665222168, "learning_rate": 0.0009631185270425778, "loss": 0.5643, "step": 180210 }, { "epoch": 51.846950517836596, "grad_norm": 1.3485348224639893, "learning_rate": 0.0009630609896432681, "loss": 0.6123, "step": 180220 }, { "epoch": 51.84982738780207, "grad_norm": 1.6654295921325684, "learning_rate": 0.0009630034522439586, "loss": 0.5665, "step": 180230 }, { "epoch": 51.85270425776755, "grad_norm": 1.1154981851577759, "learning_rate": 0.000962945914844649, "loss": 0.4829, "step": 180240 }, { "epoch": 51.855581127733025, "grad_norm": 1.8571693897247314, "learning_rate": 0.0009628883774453395, "loss": 0.5622, "step": 180250 }, { "epoch": 51.8584579976985, "grad_norm": 0.6955996751785278, "learning_rate": 0.0009628308400460299, "loss": 0.5714, "step": 180260 }, { "epoch": 51.861334867663984, "grad_norm": 2.220775842666626, "learning_rate": 0.0009627733026467204, "loss": 0.6173, "step": 180270 }, { "epoch": 51.86421173762946, "grad_norm": 1.8873670101165771, "learning_rate": 0.0009627157652474108, "loss": 0.6018, "step": 180280 }, { "epoch": 51.86708860759494, "grad_norm": 0.7526190280914307, "learning_rate": 0.0009626582278481013, "loss": 0.5103, "step": 180290 }, { "epoch": 51.86996547756041, "grad_norm": 3.7815799713134766, "learning_rate": 0.0009626006904487918, "loss": 0.5044, "step": 180300 }, { "epoch": 51.87284234752589, "grad_norm": 1.5823007822036743, "learning_rate": 0.0009625431530494821, "loss": 0.5325, "step": 180310 }, { "epoch": 51.87571921749137, "grad_norm": 1.750902771949768, "learning_rate": 0.0009624856156501727, "loss": 0.6955, "step": 180320 }, { "epoch": 51.87859608745685, "grad_norm": 1.7659375667572021, "learning_rate": 0.0009624280782508631, "loss": 0.5919, "step": 180330 }, { "epoch": 51.881472957422325, "grad_norm": 1.4153006076812744, "learning_rate": 0.0009623705408515535, "loss": 0.6551, "step": 180340 }, { "epoch": 51.8843498273878, "grad_norm": 3.333143472671509, "learning_rate": 0.0009623130034522439, "loss": 0.5824, "step": 180350 }, { "epoch": 51.88722669735328, "grad_norm": 0.793045163154602, "learning_rate": 0.0009622554660529345, "loss": 0.5014, "step": 180360 }, { "epoch": 51.89010356731876, "grad_norm": 1.0773530006408691, "learning_rate": 0.0009621979286536248, "loss": 0.4027, "step": 180370 }, { "epoch": 51.89298043728424, "grad_norm": 1.3755886554718018, "learning_rate": 0.0009621403912543153, "loss": 0.6158, "step": 180380 }, { "epoch": 51.89585730724971, "grad_norm": 0.69831383228302, "learning_rate": 0.0009620828538550059, "loss": 0.4784, "step": 180390 }, { "epoch": 51.89873417721519, "grad_norm": 0.8804978728294373, "learning_rate": 0.0009620253164556962, "loss": 0.5228, "step": 180400 }, { "epoch": 51.901611047180666, "grad_norm": 1.056013584136963, "learning_rate": 0.0009619677790563867, "loss": 0.5261, "step": 180410 }, { "epoch": 51.90448791714614, "grad_norm": 0.734550952911377, "learning_rate": 0.0009619102416570771, "loss": 0.5088, "step": 180420 }, { "epoch": 51.907364787111625, "grad_norm": 0.6898322105407715, "learning_rate": 0.0009618527042577676, "loss": 0.4557, "step": 180430 }, { "epoch": 51.9102416570771, "grad_norm": 1.3098434209823608, "learning_rate": 0.000961795166858458, "loss": 0.53, "step": 180440 }, { "epoch": 51.91311852704258, "grad_norm": 0.7105202674865723, "learning_rate": 0.0009617376294591485, "loss": 0.422, "step": 180450 }, { "epoch": 51.915995397008054, "grad_norm": 0.596222460269928, "learning_rate": 0.0009616800920598388, "loss": 0.4273, "step": 180460 }, { "epoch": 51.91887226697353, "grad_norm": 1.2685860395431519, "learning_rate": 0.0009616225546605294, "loss": 0.5939, "step": 180470 }, { "epoch": 51.92174913693901, "grad_norm": 0.9341767430305481, "learning_rate": 0.0009615650172612199, "loss": 0.4987, "step": 180480 }, { "epoch": 51.92462600690449, "grad_norm": 0.8860072493553162, "learning_rate": 0.0009615074798619102, "loss": 0.7752, "step": 180490 }, { "epoch": 51.927502876869966, "grad_norm": 0.7871548533439636, "learning_rate": 0.0009614499424626008, "loss": 0.5704, "step": 180500 }, { "epoch": 51.93037974683544, "grad_norm": 1.3529672622680664, "learning_rate": 0.0009613924050632912, "loss": 0.8546, "step": 180510 }, { "epoch": 51.93325661680092, "grad_norm": 0.9049682021141052, "learning_rate": 0.0009613348676639816, "loss": 0.5649, "step": 180520 }, { "epoch": 51.9361334867664, "grad_norm": 1.3412307500839233, "learning_rate": 0.000961277330264672, "loss": 0.5105, "step": 180530 }, { "epoch": 51.93901035673188, "grad_norm": 1.8910802602767944, "learning_rate": 0.0009612197928653626, "loss": 0.4969, "step": 180540 }, { "epoch": 51.941887226697354, "grad_norm": 1.0172394514083862, "learning_rate": 0.0009611622554660529, "loss": 0.5407, "step": 180550 }, { "epoch": 51.94476409666283, "grad_norm": 0.644057035446167, "learning_rate": 0.0009611047180667434, "loss": 0.6181, "step": 180560 }, { "epoch": 51.947640966628306, "grad_norm": 2.106839179992676, "learning_rate": 0.000961047180667434, "loss": 0.6803, "step": 180570 }, { "epoch": 51.95051783659379, "grad_norm": 1.213878870010376, "learning_rate": 0.0009609896432681243, "loss": 0.4927, "step": 180580 }, { "epoch": 51.953394706559266, "grad_norm": 1.4868766069412231, "learning_rate": 0.0009609321058688148, "loss": 0.5694, "step": 180590 }, { "epoch": 51.95627157652474, "grad_norm": 1.6819806098937988, "learning_rate": 0.0009608745684695052, "loss": 0.6046, "step": 180600 }, { "epoch": 51.95914844649022, "grad_norm": 1.03499174118042, "learning_rate": 0.0009608170310701957, "loss": 0.6896, "step": 180610 }, { "epoch": 51.962025316455694, "grad_norm": 1.5023136138916016, "learning_rate": 0.0009607594936708861, "loss": 0.463, "step": 180620 }, { "epoch": 51.96490218642117, "grad_norm": 1.2485263347625732, "learning_rate": 0.0009607019562715766, "loss": 0.6736, "step": 180630 }, { "epoch": 51.967779056386654, "grad_norm": 1.9075324535369873, "learning_rate": 0.0009606444188722669, "loss": 0.5598, "step": 180640 }, { "epoch": 51.97065592635213, "grad_norm": 1.9786529541015625, "learning_rate": 0.0009605868814729575, "loss": 0.4595, "step": 180650 }, { "epoch": 51.973532796317606, "grad_norm": 1.940307855606079, "learning_rate": 0.0009605293440736479, "loss": 0.6455, "step": 180660 }, { "epoch": 51.97640966628308, "grad_norm": 0.6308138370513916, "learning_rate": 0.0009604718066743383, "loss": 0.4903, "step": 180670 }, { "epoch": 51.97928653624856, "grad_norm": 0.8630291819572449, "learning_rate": 0.0009604142692750289, "loss": 0.5657, "step": 180680 }, { "epoch": 51.98216340621404, "grad_norm": 3.49362850189209, "learning_rate": 0.0009603567318757193, "loss": 0.5852, "step": 180690 }, { "epoch": 51.98504027617952, "grad_norm": 1.0467509031295776, "learning_rate": 0.0009602991944764097, "loss": 0.5321, "step": 180700 }, { "epoch": 51.987917146144994, "grad_norm": 1.0286413431167603, "learning_rate": 0.0009602416570771001, "loss": 0.5382, "step": 180710 }, { "epoch": 51.99079401611047, "grad_norm": 1.789783239364624, "learning_rate": 0.0009601841196777907, "loss": 0.6721, "step": 180720 }, { "epoch": 51.99367088607595, "grad_norm": 0.8069059252738953, "learning_rate": 0.000960126582278481, "loss": 0.5655, "step": 180730 }, { "epoch": 51.99654775604143, "grad_norm": 1.6092052459716797, "learning_rate": 0.0009600690448791715, "loss": 0.6072, "step": 180740 }, { "epoch": 51.999424626006906, "grad_norm": 7.215903282165527, "learning_rate": 0.0009600115074798618, "loss": 0.512, "step": 180750 }, { "epoch": 52.00230149597238, "grad_norm": 0.8547035455703735, "learning_rate": 0.0009599539700805524, "loss": 0.514, "step": 180760 }, { "epoch": 52.00517836593786, "grad_norm": 0.8050607442855835, "learning_rate": 0.0009598964326812428, "loss": 0.3627, "step": 180770 }, { "epoch": 52.008055235903335, "grad_norm": 1.525439977645874, "learning_rate": 0.0009598388952819332, "loss": 0.4221, "step": 180780 }, { "epoch": 52.01093210586882, "grad_norm": 1.5119637250900269, "learning_rate": 0.0009597813578826238, "loss": 0.5027, "step": 180790 }, { "epoch": 52.013808975834294, "grad_norm": 1.2881821393966675, "learning_rate": 0.0009597238204833142, "loss": 0.4293, "step": 180800 }, { "epoch": 52.01668584579977, "grad_norm": 1.1358826160430908, "learning_rate": 0.0009596662830840046, "loss": 0.476, "step": 180810 }, { "epoch": 52.01956271576525, "grad_norm": 1.076390266418457, "learning_rate": 0.000959608745684695, "loss": 0.4018, "step": 180820 }, { "epoch": 52.02243958573072, "grad_norm": 1.0979348421096802, "learning_rate": 0.0009595512082853856, "loss": 0.4539, "step": 180830 }, { "epoch": 52.0253164556962, "grad_norm": 1.570853352546692, "learning_rate": 0.0009594936708860759, "loss": 0.4231, "step": 180840 }, { "epoch": 52.02819332566168, "grad_norm": 0.7720128893852234, "learning_rate": 0.0009594361334867664, "loss": 0.4688, "step": 180850 }, { "epoch": 52.03107019562716, "grad_norm": 1.0621453523635864, "learning_rate": 0.0009593785960874568, "loss": 0.4303, "step": 180860 }, { "epoch": 52.033947065592635, "grad_norm": 1.4891225099563599, "learning_rate": 0.0009593210586881473, "loss": 0.4254, "step": 180870 }, { "epoch": 52.03682393555811, "grad_norm": 1.554814338684082, "learning_rate": 0.0009592635212888377, "loss": 0.4484, "step": 180880 }, { "epoch": 52.03970080552359, "grad_norm": 1.1990686655044556, "learning_rate": 0.0009592059838895282, "loss": 0.4859, "step": 180890 }, { "epoch": 52.04257767548907, "grad_norm": 1.17049241065979, "learning_rate": 0.0009591484464902187, "loss": 0.4892, "step": 180900 }, { "epoch": 52.04545454545455, "grad_norm": 0.8391687273979187, "learning_rate": 0.0009590909090909091, "loss": 0.427, "step": 180910 }, { "epoch": 52.04833141542002, "grad_norm": 1.5724027156829834, "learning_rate": 0.0009590333716915996, "loss": 0.5061, "step": 180920 }, { "epoch": 52.0512082853855, "grad_norm": 1.25606369972229, "learning_rate": 0.0009589758342922899, "loss": 0.4265, "step": 180930 }, { "epoch": 52.054085155350975, "grad_norm": 1.2187376022338867, "learning_rate": 0.0009589182968929805, "loss": 0.481, "step": 180940 }, { "epoch": 52.05696202531646, "grad_norm": 1.8675018548965454, "learning_rate": 0.0009588607594936709, "loss": 0.5044, "step": 180950 }, { "epoch": 52.059838895281935, "grad_norm": 0.9252568483352661, "learning_rate": 0.0009588032220943613, "loss": 0.4216, "step": 180960 }, { "epoch": 52.06271576524741, "grad_norm": 1.614510178565979, "learning_rate": 0.0009587456846950518, "loss": 0.5027, "step": 180970 }, { "epoch": 52.06559263521289, "grad_norm": 2.312926769256592, "learning_rate": 0.0009586881472957423, "loss": 0.5155, "step": 180980 }, { "epoch": 52.06846950517836, "grad_norm": 1.5717103481292725, "learning_rate": 0.0009586306098964326, "loss": 0.4969, "step": 180990 }, { "epoch": 52.07134637514385, "grad_norm": 1.025186538696289, "learning_rate": 0.0009585730724971231, "loss": 0.641, "step": 181000 }, { "epoch": 52.07422324510932, "grad_norm": 1.6315512657165527, "learning_rate": 0.0009585155350978137, "loss": 0.5218, "step": 181010 }, { "epoch": 52.0771001150748, "grad_norm": 1.247754693031311, "learning_rate": 0.000958457997698504, "loss": 0.3953, "step": 181020 }, { "epoch": 52.079976985040275, "grad_norm": 0.8479031920433044, "learning_rate": 0.0009584004602991945, "loss": 0.5274, "step": 181030 }, { "epoch": 52.08285385500575, "grad_norm": 1.8310410976409912, "learning_rate": 0.0009583429228998849, "loss": 0.5636, "step": 181040 }, { "epoch": 52.08573072497123, "grad_norm": 1.1964890956878662, "learning_rate": 0.0009582853855005754, "loss": 0.5653, "step": 181050 }, { "epoch": 52.08860759493671, "grad_norm": 0.982758641242981, "learning_rate": 0.0009582278481012658, "loss": 0.612, "step": 181060 }, { "epoch": 52.09148446490219, "grad_norm": 1.6132546663284302, "learning_rate": 0.0009581703107019563, "loss": 0.5172, "step": 181070 }, { "epoch": 52.09436133486766, "grad_norm": 1.3159209489822388, "learning_rate": 0.0009581127733026467, "loss": 0.5796, "step": 181080 }, { "epoch": 52.09723820483314, "grad_norm": 0.8881993293762207, "learning_rate": 0.0009580552359033372, "loss": 0.543, "step": 181090 }, { "epoch": 52.100115074798616, "grad_norm": 1.987334132194519, "learning_rate": 0.0009579976985040277, "loss": 0.5794, "step": 181100 }, { "epoch": 52.1029919447641, "grad_norm": 1.2244805097579956, "learning_rate": 0.000957940161104718, "loss": 0.6032, "step": 181110 }, { "epoch": 52.105868814729575, "grad_norm": 2.8004612922668457, "learning_rate": 0.0009578826237054086, "loss": 0.5069, "step": 181120 }, { "epoch": 52.10874568469505, "grad_norm": 2.1854817867279053, "learning_rate": 0.000957825086306099, "loss": 0.4577, "step": 181130 }, { "epoch": 52.11162255466053, "grad_norm": 0.9203590154647827, "learning_rate": 0.0009577675489067894, "loss": 0.4335, "step": 181140 }, { "epoch": 52.114499424626004, "grad_norm": 0.7674850821495056, "learning_rate": 0.0009577100115074798, "loss": 0.4183, "step": 181150 }, { "epoch": 52.11737629459149, "grad_norm": 1.6124905347824097, "learning_rate": 0.0009576524741081704, "loss": 0.5563, "step": 181160 }, { "epoch": 52.120253164556964, "grad_norm": 1.212141513824463, "learning_rate": 0.0009575949367088607, "loss": 0.5224, "step": 181170 }, { "epoch": 52.12313003452244, "grad_norm": 1.1214605569839478, "learning_rate": 0.0009575373993095512, "loss": 0.4684, "step": 181180 }, { "epoch": 52.126006904487916, "grad_norm": 1.2050918340682983, "learning_rate": 0.0009574798619102418, "loss": 0.4932, "step": 181190 }, { "epoch": 52.12888377445339, "grad_norm": 1.544669270515442, "learning_rate": 0.0009574223245109321, "loss": 0.5497, "step": 181200 }, { "epoch": 52.131760644418875, "grad_norm": 1.5418699979782104, "learning_rate": 0.0009573647871116226, "loss": 0.5569, "step": 181210 }, { "epoch": 52.13463751438435, "grad_norm": 1.0360088348388672, "learning_rate": 0.000957307249712313, "loss": 0.3974, "step": 181220 }, { "epoch": 52.13751438434983, "grad_norm": 0.8452330827713013, "learning_rate": 0.0009572497123130035, "loss": 0.4776, "step": 181230 }, { "epoch": 52.140391254315304, "grad_norm": 1.1079013347625732, "learning_rate": 0.0009571921749136939, "loss": 0.4962, "step": 181240 }, { "epoch": 52.14326812428078, "grad_norm": 1.1494903564453125, "learning_rate": 0.0009571346375143844, "loss": 0.5061, "step": 181250 }, { "epoch": 52.146144994246264, "grad_norm": 0.7651392221450806, "learning_rate": 0.0009570771001150748, "loss": 0.4992, "step": 181260 }, { "epoch": 52.14902186421174, "grad_norm": 0.6017748117446899, "learning_rate": 0.0009570195627157653, "loss": 0.5073, "step": 181270 }, { "epoch": 52.151898734177216, "grad_norm": 1.178245186805725, "learning_rate": 0.0009569620253164557, "loss": 0.5547, "step": 181280 }, { "epoch": 52.15477560414269, "grad_norm": 0.945609450340271, "learning_rate": 0.0009569044879171461, "loss": 0.4734, "step": 181290 }, { "epoch": 52.15765247410817, "grad_norm": 1.6350094079971313, "learning_rate": 0.0009568469505178367, "loss": 0.5231, "step": 181300 }, { "epoch": 52.160529344073645, "grad_norm": 1.5451695919036865, "learning_rate": 0.0009567894131185271, "loss": 0.4266, "step": 181310 }, { "epoch": 52.16340621403913, "grad_norm": 2.926633596420288, "learning_rate": 0.0009567318757192175, "loss": 0.5467, "step": 181320 }, { "epoch": 52.166283084004604, "grad_norm": 0.9498541951179504, "learning_rate": 0.0009566743383199079, "loss": 0.5481, "step": 181330 }, { "epoch": 52.16915995397008, "grad_norm": 1.6259270906448364, "learning_rate": 0.0009566168009205985, "loss": 0.6131, "step": 181340 }, { "epoch": 52.17203682393556, "grad_norm": 0.7641351819038391, "learning_rate": 0.0009565592635212888, "loss": 0.5306, "step": 181350 }, { "epoch": 52.17491369390103, "grad_norm": 0.7579176425933838, "learning_rate": 0.0009565017261219793, "loss": 0.4503, "step": 181360 }, { "epoch": 52.177790563866516, "grad_norm": 1.9921183586120605, "learning_rate": 0.0009564441887226698, "loss": 0.4421, "step": 181370 }, { "epoch": 52.18066743383199, "grad_norm": 0.8799489140510559, "learning_rate": 0.0009563866513233602, "loss": 0.3843, "step": 181380 }, { "epoch": 52.18354430379747, "grad_norm": 1.450270652770996, "learning_rate": 0.0009563291139240507, "loss": 0.5608, "step": 181390 }, { "epoch": 52.186421173762945, "grad_norm": 1.1423828601837158, "learning_rate": 0.0009562715765247411, "loss": 0.639, "step": 181400 }, { "epoch": 52.18929804372842, "grad_norm": 0.8794196844100952, "learning_rate": 0.0009562140391254316, "loss": 0.615, "step": 181410 }, { "epoch": 52.192174913693904, "grad_norm": 1.2247127294540405, "learning_rate": 0.000956156501726122, "loss": 0.6628, "step": 181420 }, { "epoch": 52.19505178365938, "grad_norm": 1.618354320526123, "learning_rate": 0.0009560989643268125, "loss": 0.61, "step": 181430 }, { "epoch": 52.19792865362486, "grad_norm": 0.631755530834198, "learning_rate": 0.0009560414269275028, "loss": 0.4942, "step": 181440 }, { "epoch": 52.20080552359033, "grad_norm": 1.3823550939559937, "learning_rate": 0.0009559838895281934, "loss": 0.5827, "step": 181450 }, { "epoch": 52.20368239355581, "grad_norm": 1.8040889501571655, "learning_rate": 0.0009559263521288838, "loss": 0.5088, "step": 181460 }, { "epoch": 52.20655926352129, "grad_norm": 1.11762535572052, "learning_rate": 0.0009558688147295742, "loss": 0.5073, "step": 181470 }, { "epoch": 52.20943613348677, "grad_norm": 1.0121042728424072, "learning_rate": 0.0009558112773302648, "loss": 0.4188, "step": 181480 }, { "epoch": 52.212313003452245, "grad_norm": 1.4278284311294556, "learning_rate": 0.0009557537399309552, "loss": 0.5047, "step": 181490 }, { "epoch": 52.21518987341772, "grad_norm": 1.6776498556137085, "learning_rate": 0.0009556962025316456, "loss": 0.5238, "step": 181500 }, { "epoch": 52.2180667433832, "grad_norm": 1.3138313293457031, "learning_rate": 0.000955638665132336, "loss": 0.6284, "step": 181510 }, { "epoch": 52.22094361334867, "grad_norm": 1.5745457410812378, "learning_rate": 0.0009555811277330266, "loss": 0.6401, "step": 181520 }, { "epoch": 52.22382048331416, "grad_norm": 0.7879127264022827, "learning_rate": 0.0009555235903337169, "loss": 0.411, "step": 181530 }, { "epoch": 52.22669735327963, "grad_norm": 0.9931883811950684, "learning_rate": 0.0009554660529344074, "loss": 0.5072, "step": 181540 }, { "epoch": 52.22957422324511, "grad_norm": 2.5255298614501953, "learning_rate": 0.0009554085155350979, "loss": 0.6894, "step": 181550 }, { "epoch": 52.232451093210585, "grad_norm": 1.2479026317596436, "learning_rate": 0.0009553509781357883, "loss": 0.5328, "step": 181560 }, { "epoch": 52.23532796317606, "grad_norm": 1.1545895338058472, "learning_rate": 0.0009552934407364787, "loss": 0.5632, "step": 181570 }, { "epoch": 52.238204833141545, "grad_norm": 1.395216941833496, "learning_rate": 0.0009552359033371691, "loss": 0.4307, "step": 181580 }, { "epoch": 52.24108170310702, "grad_norm": 0.7369202971458435, "learning_rate": 0.0009551783659378597, "loss": 0.4761, "step": 181590 }, { "epoch": 52.2439585730725, "grad_norm": 0.6432122588157654, "learning_rate": 0.0009551208285385501, "loss": 0.4919, "step": 181600 }, { "epoch": 52.24683544303797, "grad_norm": 1.0189918279647827, "learning_rate": 0.0009550632911392405, "loss": 0.4722, "step": 181610 }, { "epoch": 52.24971231300345, "grad_norm": 1.0559213161468506, "learning_rate": 0.0009550057537399309, "loss": 0.4003, "step": 181620 }, { "epoch": 52.25258918296893, "grad_norm": 1.8138988018035889, "learning_rate": 0.0009549482163406215, "loss": 0.5746, "step": 181630 }, { "epoch": 52.25546605293441, "grad_norm": 1.5936529636383057, "learning_rate": 0.0009548906789413118, "loss": 0.4149, "step": 181640 }, { "epoch": 52.258342922899885, "grad_norm": 0.9527376890182495, "learning_rate": 0.0009548331415420023, "loss": 0.4299, "step": 181650 }, { "epoch": 52.26121979286536, "grad_norm": 1.9008285999298096, "learning_rate": 0.0009547756041426928, "loss": 0.7959, "step": 181660 }, { "epoch": 52.26409666283084, "grad_norm": 2.08663272857666, "learning_rate": 0.0009547180667433832, "loss": 0.6034, "step": 181670 }, { "epoch": 52.26697353279632, "grad_norm": 1.5596132278442383, "learning_rate": 0.0009546605293440736, "loss": 0.4741, "step": 181680 }, { "epoch": 52.2698504027618, "grad_norm": 1.1087607145309448, "learning_rate": 0.0009546029919447641, "loss": 0.5484, "step": 181690 }, { "epoch": 52.27272727272727, "grad_norm": 0.7754012942314148, "learning_rate": 0.0009545454545454546, "loss": 0.5151, "step": 181700 }, { "epoch": 52.27560414269275, "grad_norm": 1.7507085800170898, "learning_rate": 0.000954487917146145, "loss": 0.6013, "step": 181710 }, { "epoch": 52.278481012658226, "grad_norm": 1.4414143562316895, "learning_rate": 0.0009544303797468355, "loss": 0.4867, "step": 181720 }, { "epoch": 52.2813578826237, "grad_norm": 1.0455477237701416, "learning_rate": 0.0009543728423475258, "loss": 0.4652, "step": 181730 }, { "epoch": 52.284234752589185, "grad_norm": 2.164034843444824, "learning_rate": 0.0009543153049482164, "loss": 0.5496, "step": 181740 }, { "epoch": 52.28711162255466, "grad_norm": 1.5370408296585083, "learning_rate": 0.0009542577675489068, "loss": 0.5582, "step": 181750 }, { "epoch": 52.28998849252014, "grad_norm": 1.2087215185165405, "learning_rate": 0.0009542002301495972, "loss": 0.5265, "step": 181760 }, { "epoch": 52.292865362485614, "grad_norm": 2.294790029525757, "learning_rate": 0.0009541426927502877, "loss": 0.4205, "step": 181770 }, { "epoch": 52.29574223245109, "grad_norm": 0.6345133781433105, "learning_rate": 0.0009540851553509782, "loss": 0.4733, "step": 181780 }, { "epoch": 52.29861910241657, "grad_norm": 1.0655031204223633, "learning_rate": 0.0009540276179516685, "loss": 0.5586, "step": 181790 }, { "epoch": 52.30149597238205, "grad_norm": 1.0086414813995361, "learning_rate": 0.000953970080552359, "loss": 0.5562, "step": 181800 }, { "epoch": 52.304372842347526, "grad_norm": 0.8066883087158203, "learning_rate": 0.0009539125431530496, "loss": 0.4348, "step": 181810 }, { "epoch": 52.307249712313, "grad_norm": 0.6677890419960022, "learning_rate": 0.0009538550057537399, "loss": 0.4507, "step": 181820 }, { "epoch": 52.31012658227848, "grad_norm": 2.0755887031555176, "learning_rate": 0.0009537974683544304, "loss": 0.56, "step": 181830 }, { "epoch": 52.31300345224396, "grad_norm": 2.2345404624938965, "learning_rate": 0.0009537399309551208, "loss": 0.5452, "step": 181840 }, { "epoch": 52.31588032220944, "grad_norm": 1.5682963132858276, "learning_rate": 0.0009536823935558113, "loss": 0.4501, "step": 181850 }, { "epoch": 52.318757192174914, "grad_norm": 1.361069679260254, "learning_rate": 0.0009536248561565017, "loss": 0.5214, "step": 181860 }, { "epoch": 52.32163406214039, "grad_norm": 1.1828943490982056, "learning_rate": 0.0009535673187571922, "loss": 0.5687, "step": 181870 }, { "epoch": 52.324510932105866, "grad_norm": 1.2771320343017578, "learning_rate": 0.0009535097813578826, "loss": 0.4672, "step": 181880 }, { "epoch": 52.32738780207135, "grad_norm": 1.4572093486785889, "learning_rate": 0.0009534522439585731, "loss": 0.4945, "step": 181890 }, { "epoch": 52.330264672036826, "grad_norm": 0.9470398426055908, "learning_rate": 0.0009533947065592636, "loss": 0.5613, "step": 181900 }, { "epoch": 52.3331415420023, "grad_norm": 2.6542460918426514, "learning_rate": 0.0009533371691599539, "loss": 0.5591, "step": 181910 }, { "epoch": 52.33601841196778, "grad_norm": 1.7725889682769775, "learning_rate": 0.0009532796317606445, "loss": 0.5995, "step": 181920 }, { "epoch": 52.338895281933254, "grad_norm": 3.086780548095703, "learning_rate": 0.0009532220943613349, "loss": 0.5632, "step": 181930 }, { "epoch": 52.34177215189873, "grad_norm": 1.2225465774536133, "learning_rate": 0.0009531645569620253, "loss": 0.5322, "step": 181940 }, { "epoch": 52.344649021864214, "grad_norm": 1.3068344593048096, "learning_rate": 0.0009531070195627158, "loss": 0.4857, "step": 181950 }, { "epoch": 52.34752589182969, "grad_norm": 0.6086899042129517, "learning_rate": 0.0009530494821634063, "loss": 0.5339, "step": 181960 }, { "epoch": 52.350402761795166, "grad_norm": 1.2534167766571045, "learning_rate": 0.0009529919447640966, "loss": 0.5742, "step": 181970 }, { "epoch": 52.35327963176064, "grad_norm": 1.0937732458114624, "learning_rate": 0.0009529344073647871, "loss": 0.4679, "step": 181980 }, { "epoch": 52.35615650172612, "grad_norm": 1.2356479167938232, "learning_rate": 0.0009528768699654777, "loss": 0.4836, "step": 181990 }, { "epoch": 52.3590333716916, "grad_norm": 2.410712242126465, "learning_rate": 0.000952819332566168, "loss": 0.6921, "step": 182000 }, { "epoch": 52.36191024165708, "grad_norm": 0.6775416135787964, "learning_rate": 0.0009527617951668585, "loss": 0.5595, "step": 182010 }, { "epoch": 52.364787111622555, "grad_norm": 1.6191551685333252, "learning_rate": 0.0009527042577675489, "loss": 0.5381, "step": 182020 }, { "epoch": 52.36766398158803, "grad_norm": 1.3694990873336792, "learning_rate": 0.0009526467203682394, "loss": 0.5077, "step": 182030 }, { "epoch": 52.37054085155351, "grad_norm": 0.7908263802528381, "learning_rate": 0.0009525891829689298, "loss": 0.5067, "step": 182040 }, { "epoch": 52.37341772151899, "grad_norm": 2.531522035598755, "learning_rate": 0.0009525316455696203, "loss": 0.5957, "step": 182050 }, { "epoch": 52.376294591484466, "grad_norm": 0.9140535593032837, "learning_rate": 0.0009524741081703107, "loss": 0.5176, "step": 182060 }, { "epoch": 52.37917146144994, "grad_norm": 1.0230425596237183, "learning_rate": 0.0009524165707710012, "loss": 0.4784, "step": 182070 }, { "epoch": 52.38204833141542, "grad_norm": 1.103798747062683, "learning_rate": 0.0009523590333716916, "loss": 0.5019, "step": 182080 }, { "epoch": 52.384925201380895, "grad_norm": 0.6917409300804138, "learning_rate": 0.000952301495972382, "loss": 0.4868, "step": 182090 }, { "epoch": 52.38780207134638, "grad_norm": 0.9496123194694519, "learning_rate": 0.0009522439585730726, "loss": 0.4268, "step": 182100 }, { "epoch": 52.390678941311855, "grad_norm": 1.1466569900512695, "learning_rate": 0.000952186421173763, "loss": 0.4898, "step": 182110 }, { "epoch": 52.39355581127733, "grad_norm": 1.4778589010238647, "learning_rate": 0.0009521288837744534, "loss": 0.4149, "step": 182120 }, { "epoch": 52.39643268124281, "grad_norm": 1.663456678390503, "learning_rate": 0.0009520713463751438, "loss": 0.5836, "step": 182130 }, { "epoch": 52.39930955120828, "grad_norm": 1.4763572216033936, "learning_rate": 0.0009520138089758344, "loss": 0.5937, "step": 182140 }, { "epoch": 52.40218642117377, "grad_norm": 1.1717702150344849, "learning_rate": 0.0009519562715765247, "loss": 0.41, "step": 182150 }, { "epoch": 52.40506329113924, "grad_norm": 1.1505475044250488, "learning_rate": 0.0009518987341772152, "loss": 0.4323, "step": 182160 }, { "epoch": 52.40794016110472, "grad_norm": 1.5282443761825562, "learning_rate": 0.0009518411967779057, "loss": 0.4594, "step": 182170 }, { "epoch": 52.410817031070195, "grad_norm": 0.593222975730896, "learning_rate": 0.0009517836593785961, "loss": 0.4799, "step": 182180 }, { "epoch": 52.41369390103567, "grad_norm": 1.8502219915390015, "learning_rate": 0.0009517261219792865, "loss": 0.4755, "step": 182190 }, { "epoch": 52.41657077100115, "grad_norm": 0.8991269469261169, "learning_rate": 0.000951668584579977, "loss": 0.3704, "step": 182200 }, { "epoch": 52.41944764096663, "grad_norm": 1.9736260175704956, "learning_rate": 0.0009516110471806675, "loss": 0.6206, "step": 182210 }, { "epoch": 52.42232451093211, "grad_norm": 0.7361525297164917, "learning_rate": 0.0009515535097813579, "loss": 0.5665, "step": 182220 }, { "epoch": 52.42520138089758, "grad_norm": 1.8269761800765991, "learning_rate": 0.0009514959723820484, "loss": 0.6168, "step": 182230 }, { "epoch": 52.42807825086306, "grad_norm": 0.9750592112541199, "learning_rate": 0.0009514384349827388, "loss": 0.431, "step": 182240 }, { "epoch": 52.430955120828536, "grad_norm": 0.7459394931793213, "learning_rate": 0.0009513808975834293, "loss": 0.5479, "step": 182250 }, { "epoch": 52.43383199079402, "grad_norm": 3.016892194747925, "learning_rate": 0.0009513233601841197, "loss": 0.5173, "step": 182260 }, { "epoch": 52.436708860759495, "grad_norm": 3.366621732711792, "learning_rate": 0.0009512658227848101, "loss": 0.5982, "step": 182270 }, { "epoch": 52.43958573072497, "grad_norm": 1.6250983476638794, "learning_rate": 0.0009512082853855006, "loss": 0.5066, "step": 182280 }, { "epoch": 52.44246260069045, "grad_norm": 1.1779141426086426, "learning_rate": 0.0009511507479861911, "loss": 0.3981, "step": 182290 }, { "epoch": 52.445339470655924, "grad_norm": 1.2009552717208862, "learning_rate": 0.0009510932105868815, "loss": 0.5156, "step": 182300 }, { "epoch": 52.44821634062141, "grad_norm": 0.9451977014541626, "learning_rate": 0.0009510356731875719, "loss": 0.6677, "step": 182310 }, { "epoch": 52.45109321058688, "grad_norm": 1.083503246307373, "learning_rate": 0.0009509781357882625, "loss": 0.4902, "step": 182320 }, { "epoch": 52.45397008055236, "grad_norm": 0.7955496311187744, "learning_rate": 0.0009509205983889528, "loss": 0.4877, "step": 182330 }, { "epoch": 52.456846950517836, "grad_norm": 1.5551444292068481, "learning_rate": 0.0009508630609896433, "loss": 0.537, "step": 182340 }, { "epoch": 52.45972382048331, "grad_norm": 0.8594126105308533, "learning_rate": 0.0009508055235903338, "loss": 0.413, "step": 182350 }, { "epoch": 52.462600690448795, "grad_norm": 1.5368688106536865, "learning_rate": 0.0009507479861910242, "loss": 0.6412, "step": 182360 }, { "epoch": 52.46547756041427, "grad_norm": 1.240922451019287, "learning_rate": 0.0009506904487917146, "loss": 0.5806, "step": 182370 }, { "epoch": 52.46835443037975, "grad_norm": 2.083159923553467, "learning_rate": 0.0009506329113924051, "loss": 0.6545, "step": 182380 }, { "epoch": 52.471231300345224, "grad_norm": 1.5219957828521729, "learning_rate": 0.0009505753739930956, "loss": 0.6562, "step": 182390 }, { "epoch": 52.4741081703107, "grad_norm": 2.213346004486084, "learning_rate": 0.000950517836593786, "loss": 0.6368, "step": 182400 }, { "epoch": 52.476985040276176, "grad_norm": 1.7398388385772705, "learning_rate": 0.0009504602991944764, "loss": 0.4719, "step": 182410 }, { "epoch": 52.47986191024166, "grad_norm": 1.0460147857666016, "learning_rate": 0.0009504027617951668, "loss": 0.4924, "step": 182420 }, { "epoch": 52.482738780207136, "grad_norm": 1.984015941619873, "learning_rate": 0.0009503452243958574, "loss": 0.5902, "step": 182430 }, { "epoch": 52.48561565017261, "grad_norm": 1.8165479898452759, "learning_rate": 0.0009502876869965477, "loss": 0.631, "step": 182440 }, { "epoch": 52.48849252013809, "grad_norm": 1.3336236476898193, "learning_rate": 0.0009502301495972382, "loss": 0.652, "step": 182450 }, { "epoch": 52.491369390103564, "grad_norm": 1.441728115081787, "learning_rate": 0.0009501726121979287, "loss": 0.6134, "step": 182460 }, { "epoch": 52.49424626006905, "grad_norm": 2.106084108352661, "learning_rate": 0.0009501150747986191, "loss": 0.5808, "step": 182470 }, { "epoch": 52.497123130034524, "grad_norm": 1.7410798072814941, "learning_rate": 0.0009500575373993095, "loss": 0.6474, "step": 182480 }, { "epoch": 52.5, "grad_norm": 1.4448047876358032, "learning_rate": 0.00095, "loss": 0.4573, "step": 182490 }, { "epoch": 52.502876869965476, "grad_norm": 1.4424457550048828, "learning_rate": 0.0009499424626006905, "loss": 0.5234, "step": 182500 }, { "epoch": 52.50575373993095, "grad_norm": 1.182348370552063, "learning_rate": 0.0009498849252013809, "loss": 0.4086, "step": 182510 }, { "epoch": 52.508630609896436, "grad_norm": 1.7472692728042603, "learning_rate": 0.0009498273878020714, "loss": 0.5576, "step": 182520 }, { "epoch": 52.51150747986191, "grad_norm": 2.505134344100952, "learning_rate": 0.0009497698504027618, "loss": 0.6124, "step": 182530 }, { "epoch": 52.51438434982739, "grad_norm": 1.745605707168579, "learning_rate": 0.0009497123130034523, "loss": 0.5346, "step": 182540 }, { "epoch": 52.517261219792864, "grad_norm": 1.4215447902679443, "learning_rate": 0.0009496547756041427, "loss": 0.5603, "step": 182550 }, { "epoch": 52.52013808975834, "grad_norm": 1.3319936990737915, "learning_rate": 0.0009495972382048331, "loss": 0.6011, "step": 182560 }, { "epoch": 52.523014959723824, "grad_norm": 0.6822454333305359, "learning_rate": 0.0009495397008055236, "loss": 0.4751, "step": 182570 }, { "epoch": 52.5258918296893, "grad_norm": 1.0625203847885132, "learning_rate": 0.0009494821634062141, "loss": 0.6317, "step": 182580 }, { "epoch": 52.528768699654776, "grad_norm": 2.186173677444458, "learning_rate": 0.0009494246260069044, "loss": 0.6687, "step": 182590 }, { "epoch": 52.53164556962025, "grad_norm": 2.054793357849121, "learning_rate": 0.0009493670886075949, "loss": 0.7053, "step": 182600 }, { "epoch": 52.53452243958573, "grad_norm": 1.0963302850723267, "learning_rate": 0.0009493095512082855, "loss": 0.5376, "step": 182610 }, { "epoch": 52.537399309551205, "grad_norm": 1.145245909690857, "learning_rate": 0.0009492520138089758, "loss": 0.4741, "step": 182620 }, { "epoch": 52.54027617951669, "grad_norm": 1.5345640182495117, "learning_rate": 0.0009491944764096663, "loss": 0.5715, "step": 182630 }, { "epoch": 52.543153049482164, "grad_norm": 0.9624919295310974, "learning_rate": 0.0009491369390103568, "loss": 0.5315, "step": 182640 }, { "epoch": 52.54602991944764, "grad_norm": 1.759126901626587, "learning_rate": 0.0009490794016110472, "loss": 0.6001, "step": 182650 }, { "epoch": 52.54890678941312, "grad_norm": 1.2759637832641602, "learning_rate": 0.0009490218642117376, "loss": 0.5058, "step": 182660 }, { "epoch": 52.55178365937859, "grad_norm": 1.3327995538711548, "learning_rate": 0.0009489643268124281, "loss": 0.4789, "step": 182670 }, { "epoch": 52.554660529344076, "grad_norm": 1.0201961994171143, "learning_rate": 0.0009489067894131185, "loss": 0.5233, "step": 182680 }, { "epoch": 52.55753739930955, "grad_norm": 0.8718031048774719, "learning_rate": 0.000948849252013809, "loss": 0.493, "step": 182690 }, { "epoch": 52.56041426927503, "grad_norm": 0.867084801197052, "learning_rate": 0.0009487917146144995, "loss": 0.4317, "step": 182700 }, { "epoch": 52.563291139240505, "grad_norm": 1.5151926279067993, "learning_rate": 0.0009487341772151898, "loss": 0.6055, "step": 182710 }, { "epoch": 52.56616800920598, "grad_norm": 1.120405912399292, "learning_rate": 0.0009486766398158804, "loss": 0.482, "step": 182720 }, { "epoch": 52.569044879171464, "grad_norm": 1.259957194328308, "learning_rate": 0.0009486191024165708, "loss": 0.4968, "step": 182730 }, { "epoch": 52.57192174913694, "grad_norm": 1.1830919981002808, "learning_rate": 0.0009485615650172612, "loss": 0.6008, "step": 182740 }, { "epoch": 52.57479861910242, "grad_norm": 1.1063965559005737, "learning_rate": 0.0009485040276179517, "loss": 0.415, "step": 182750 }, { "epoch": 52.57767548906789, "grad_norm": 1.377705454826355, "learning_rate": 0.0009484464902186422, "loss": 0.4433, "step": 182760 }, { "epoch": 52.58055235903337, "grad_norm": 0.8137063384056091, "learning_rate": 0.0009483889528193325, "loss": 0.6233, "step": 182770 }, { "epoch": 52.58342922899885, "grad_norm": 0.9054972529411316, "learning_rate": 0.000948331415420023, "loss": 0.6273, "step": 182780 }, { "epoch": 52.58630609896433, "grad_norm": 2.109229326248169, "learning_rate": 0.0009482738780207136, "loss": 0.5714, "step": 182790 }, { "epoch": 52.589182968929805, "grad_norm": 1.9025577306747437, "learning_rate": 0.0009482163406214039, "loss": 0.5809, "step": 182800 }, { "epoch": 52.59205983889528, "grad_norm": 0.748195469379425, "learning_rate": 0.0009481588032220944, "loss": 0.5496, "step": 182810 }, { "epoch": 52.59493670886076, "grad_norm": 1.951277732849121, "learning_rate": 0.0009481012658227848, "loss": 0.5946, "step": 182820 }, { "epoch": 52.59781357882623, "grad_norm": 1.1140254735946655, "learning_rate": 0.0009480437284234753, "loss": 0.4283, "step": 182830 }, { "epoch": 52.60069044879172, "grad_norm": 1.1794750690460205, "learning_rate": 0.0009479861910241657, "loss": 0.4257, "step": 182840 }, { "epoch": 52.60356731875719, "grad_norm": 1.8289384841918945, "learning_rate": 0.0009479286536248562, "loss": 0.5764, "step": 182850 }, { "epoch": 52.60644418872267, "grad_norm": 1.6431198120117188, "learning_rate": 0.0009478711162255466, "loss": 0.5109, "step": 182860 }, { "epoch": 52.609321058688145, "grad_norm": 1.803605318069458, "learning_rate": 0.0009478135788262371, "loss": 0.7093, "step": 182870 }, { "epoch": 52.61219792865362, "grad_norm": 1.3079341650009155, "learning_rate": 0.0009477560414269275, "loss": 0.4457, "step": 182880 }, { "epoch": 52.615074798619105, "grad_norm": 1.6762020587921143, "learning_rate": 0.0009476985040276179, "loss": 0.558, "step": 182890 }, { "epoch": 52.61795166858458, "grad_norm": 0.8711796402931213, "learning_rate": 0.0009476409666283085, "loss": 0.4802, "step": 182900 }, { "epoch": 52.62082853855006, "grad_norm": 0.8201977014541626, "learning_rate": 0.0009475834292289989, "loss": 0.5018, "step": 182910 }, { "epoch": 52.623705408515534, "grad_norm": 0.654241144657135, "learning_rate": 0.0009475258918296893, "loss": 0.4255, "step": 182920 }, { "epoch": 52.62658227848101, "grad_norm": 1.0656546354293823, "learning_rate": 0.0009474683544303798, "loss": 0.4973, "step": 182930 }, { "epoch": 52.62945914844649, "grad_norm": 1.0250515937805176, "learning_rate": 0.0009474108170310703, "loss": 0.4881, "step": 182940 }, { "epoch": 52.63233601841197, "grad_norm": 0.6307627558708191, "learning_rate": 0.0009473532796317606, "loss": 0.6239, "step": 182950 }, { "epoch": 52.635212888377445, "grad_norm": 1.671230435371399, "learning_rate": 0.0009472957422324511, "loss": 0.4754, "step": 182960 }, { "epoch": 52.63808975834292, "grad_norm": 2.006634473800659, "learning_rate": 0.0009472382048331416, "loss": 0.5813, "step": 182970 }, { "epoch": 52.6409666283084, "grad_norm": 1.2092294692993164, "learning_rate": 0.000947180667433832, "loss": 0.6161, "step": 182980 }, { "epoch": 52.64384349827388, "grad_norm": 1.0016024112701416, "learning_rate": 0.0009471231300345224, "loss": 0.4218, "step": 182990 }, { "epoch": 52.64672036823936, "grad_norm": 2.371248245239258, "learning_rate": 0.0009470655926352129, "loss": 0.5546, "step": 183000 }, { "epoch": 52.649597238204834, "grad_norm": 0.8498952984809875, "learning_rate": 0.0009470080552359034, "loss": 0.6558, "step": 183010 }, { "epoch": 52.65247410817031, "grad_norm": 1.165814995765686, "learning_rate": 0.0009469505178365938, "loss": 0.4536, "step": 183020 }, { "epoch": 52.655350978135786, "grad_norm": 2.4712321758270264, "learning_rate": 0.0009468929804372843, "loss": 0.7205, "step": 183030 }, { "epoch": 52.65822784810126, "grad_norm": 1.2810757160186768, "learning_rate": 0.0009468354430379747, "loss": 0.4557, "step": 183040 }, { "epoch": 52.661104718066746, "grad_norm": 1.4481115341186523, "learning_rate": 0.0009467779056386652, "loss": 0.5035, "step": 183050 }, { "epoch": 52.66398158803222, "grad_norm": 1.3986222743988037, "learning_rate": 0.0009467203682393556, "loss": 0.5892, "step": 183060 }, { "epoch": 52.6668584579977, "grad_norm": 1.0220178365707397, "learning_rate": 0.000946662830840046, "loss": 0.4623, "step": 183070 }, { "epoch": 52.669735327963174, "grad_norm": 1.5653530359268188, "learning_rate": 0.0009466052934407365, "loss": 0.4593, "step": 183080 }, { "epoch": 52.67261219792865, "grad_norm": 0.9426063895225525, "learning_rate": 0.000946547756041427, "loss": 0.4138, "step": 183090 }, { "epoch": 52.675489067894134, "grad_norm": 0.9052807092666626, "learning_rate": 0.0009464902186421174, "loss": 0.4829, "step": 183100 }, { "epoch": 52.67836593785961, "grad_norm": 1.586603045463562, "learning_rate": 0.0009464326812428078, "loss": 0.6391, "step": 183110 }, { "epoch": 52.681242807825086, "grad_norm": 1.256978988647461, "learning_rate": 0.0009463751438434984, "loss": 0.6402, "step": 183120 }, { "epoch": 52.68411967779056, "grad_norm": 1.2232717275619507, "learning_rate": 0.0009463176064441887, "loss": 0.3892, "step": 183130 }, { "epoch": 52.68699654775604, "grad_norm": 0.9710673093795776, "learning_rate": 0.0009462600690448792, "loss": 0.5131, "step": 183140 }, { "epoch": 52.68987341772152, "grad_norm": 2.556067705154419, "learning_rate": 0.0009462025316455697, "loss": 0.5341, "step": 183150 }, { "epoch": 52.692750287687, "grad_norm": 1.8376556634902954, "learning_rate": 0.0009461449942462601, "loss": 0.4441, "step": 183160 }, { "epoch": 52.695627157652474, "grad_norm": 1.7403244972229004, "learning_rate": 0.0009460874568469505, "loss": 0.5588, "step": 183170 }, { "epoch": 52.69850402761795, "grad_norm": 0.9074386954307556, "learning_rate": 0.000946029919447641, "loss": 0.5466, "step": 183180 }, { "epoch": 52.70138089758343, "grad_norm": 0.7979340553283691, "learning_rate": 0.0009459723820483314, "loss": 0.471, "step": 183190 }, { "epoch": 52.70425776754891, "grad_norm": 1.4596554040908813, "learning_rate": 0.0009459148446490219, "loss": 0.5963, "step": 183200 }, { "epoch": 52.707134637514386, "grad_norm": 1.512423038482666, "learning_rate": 0.0009458573072497124, "loss": 0.4217, "step": 183210 }, { "epoch": 52.71001150747986, "grad_norm": 2.414156436920166, "learning_rate": 0.0009457997698504028, "loss": 0.4624, "step": 183220 }, { "epoch": 52.71288837744534, "grad_norm": 1.1563247442245483, "learning_rate": 0.0009457422324510933, "loss": 0.4835, "step": 183230 }, { "epoch": 52.715765247410815, "grad_norm": 1.1933234930038452, "learning_rate": 0.0009456846950517836, "loss": 0.5958, "step": 183240 }, { "epoch": 52.7186421173763, "grad_norm": 1.210432529449463, "learning_rate": 0.0009456271576524741, "loss": 0.5228, "step": 183250 }, { "epoch": 52.721518987341774, "grad_norm": 0.6694377064704895, "learning_rate": 0.0009455696202531646, "loss": 0.3937, "step": 183260 }, { "epoch": 52.72439585730725, "grad_norm": 1.251190423965454, "learning_rate": 0.000945512082853855, "loss": 0.5294, "step": 183270 }, { "epoch": 52.72727272727273, "grad_norm": 1.386977195739746, "learning_rate": 0.0009454545454545454, "loss": 0.7332, "step": 183280 }, { "epoch": 52.7301495972382, "grad_norm": 1.6948485374450684, "learning_rate": 0.0009453970080552359, "loss": 0.5795, "step": 183290 }, { "epoch": 52.73302646720368, "grad_norm": 0.9435979127883911, "learning_rate": 0.0009453394706559264, "loss": 0.4385, "step": 183300 }, { "epoch": 52.73590333716916, "grad_norm": 1.105491280555725, "learning_rate": 0.0009452819332566168, "loss": 0.4638, "step": 183310 }, { "epoch": 52.73878020713464, "grad_norm": 1.117493987083435, "learning_rate": 0.0009452243958573073, "loss": 0.488, "step": 183320 }, { "epoch": 52.741657077100115, "grad_norm": 1.8156424760818481, "learning_rate": 0.0009451668584579977, "loss": 0.5274, "step": 183330 }, { "epoch": 52.74453394706559, "grad_norm": 1.0599644184112549, "learning_rate": 0.0009451093210586882, "loss": 0.6576, "step": 183340 }, { "epoch": 52.74741081703107, "grad_norm": 1.0829236507415771, "learning_rate": 0.0009450517836593786, "loss": 0.8034, "step": 183350 }, { "epoch": 52.75028768699655, "grad_norm": 1.0497760772705078, "learning_rate": 0.000944994246260069, "loss": 0.4798, "step": 183360 }, { "epoch": 52.75316455696203, "grad_norm": 0.924773633480072, "learning_rate": 0.0009449367088607595, "loss": 0.5836, "step": 183370 }, { "epoch": 52.7560414269275, "grad_norm": 1.1615464687347412, "learning_rate": 0.00094487917146145, "loss": 0.5887, "step": 183380 }, { "epoch": 52.75891829689298, "grad_norm": 0.7606778740882874, "learning_rate": 0.0009448216340621403, "loss": 0.5272, "step": 183390 }, { "epoch": 52.761795166858455, "grad_norm": 2.154918909072876, "learning_rate": 0.0009447640966628308, "loss": 0.588, "step": 183400 }, { "epoch": 52.76467203682394, "grad_norm": 1.1192058324813843, "learning_rate": 0.0009447065592635214, "loss": 0.5037, "step": 183410 }, { "epoch": 52.767548906789415, "grad_norm": 0.9718700051307678, "learning_rate": 0.0009446490218642117, "loss": 0.446, "step": 183420 }, { "epoch": 52.77042577675489, "grad_norm": 1.421101450920105, "learning_rate": 0.0009445914844649022, "loss": 0.6817, "step": 183430 }, { "epoch": 52.77330264672037, "grad_norm": 1.495133876800537, "learning_rate": 0.0009445339470655927, "loss": 0.4662, "step": 183440 }, { "epoch": 52.77617951668584, "grad_norm": 0.877753496170044, "learning_rate": 0.0009444764096662831, "loss": 0.5307, "step": 183450 }, { "epoch": 52.77905638665133, "grad_norm": 0.9306657910346985, "learning_rate": 0.0009444188722669735, "loss": 0.7369, "step": 183460 }, { "epoch": 52.7819332566168, "grad_norm": 1.1156891584396362, "learning_rate": 0.000944361334867664, "loss": 0.5443, "step": 183470 }, { "epoch": 52.78481012658228, "grad_norm": 1.564342737197876, "learning_rate": 0.0009443037974683544, "loss": 0.6053, "step": 183480 }, { "epoch": 52.787686996547755, "grad_norm": 1.8265578746795654, "learning_rate": 0.0009442462600690449, "loss": 0.6046, "step": 183490 }, { "epoch": 52.79056386651323, "grad_norm": 1.3238155841827393, "learning_rate": 0.0009441887226697354, "loss": 0.5126, "step": 183500 }, { "epoch": 52.79344073647871, "grad_norm": 0.6352380514144897, "learning_rate": 0.0009441311852704257, "loss": 0.4496, "step": 183510 }, { "epoch": 52.79631760644419, "grad_norm": 0.7492839097976685, "learning_rate": 0.0009440736478711163, "loss": 0.5502, "step": 183520 }, { "epoch": 52.79919447640967, "grad_norm": 1.1868939399719238, "learning_rate": 0.0009440161104718067, "loss": 0.5053, "step": 183530 }, { "epoch": 52.80207134637514, "grad_norm": 1.3585888147354126, "learning_rate": 0.0009439585730724971, "loss": 0.454, "step": 183540 }, { "epoch": 52.80494821634062, "grad_norm": 0.9863675832748413, "learning_rate": 0.0009439010356731876, "loss": 0.5494, "step": 183550 }, { "epoch": 52.807825086306096, "grad_norm": 0.8196317553520203, "learning_rate": 0.0009438434982738781, "loss": 0.6189, "step": 183560 }, { "epoch": 52.81070195627158, "grad_norm": 2.424410581588745, "learning_rate": 0.0009437859608745684, "loss": 0.5484, "step": 183570 }, { "epoch": 52.813578826237055, "grad_norm": 2.0526719093322754, "learning_rate": 0.0009437284234752589, "loss": 0.7735, "step": 183580 }, { "epoch": 52.81645569620253, "grad_norm": 1.157151699066162, "learning_rate": 0.0009436708860759495, "loss": 0.494, "step": 183590 }, { "epoch": 52.81933256616801, "grad_norm": 1.962053656578064, "learning_rate": 0.0009436133486766398, "loss": 0.5586, "step": 183600 }, { "epoch": 52.822209436133484, "grad_norm": 1.5495227575302124, "learning_rate": 0.0009435558112773303, "loss": 0.6199, "step": 183610 }, { "epoch": 52.82508630609897, "grad_norm": 1.2756710052490234, "learning_rate": 0.0009434982738780208, "loss": 0.5325, "step": 183620 }, { "epoch": 52.82796317606444, "grad_norm": 1.9501351118087769, "learning_rate": 0.0009434407364787112, "loss": 0.6328, "step": 183630 }, { "epoch": 52.83084004602992, "grad_norm": 1.4028189182281494, "learning_rate": 0.0009433831990794016, "loss": 0.5997, "step": 183640 }, { "epoch": 52.833716915995396, "grad_norm": 1.6990478038787842, "learning_rate": 0.0009433256616800921, "loss": 0.6147, "step": 183650 }, { "epoch": 52.83659378596087, "grad_norm": 1.226650357246399, "learning_rate": 0.0009432681242807825, "loss": 0.6748, "step": 183660 }, { "epoch": 52.839470655926355, "grad_norm": 1.0202494859695435, "learning_rate": 0.000943210586881473, "loss": 0.515, "step": 183670 }, { "epoch": 52.84234752589183, "grad_norm": 0.749352753162384, "learning_rate": 0.0009431530494821634, "loss": 0.5295, "step": 183680 }, { "epoch": 52.84522439585731, "grad_norm": 1.5270154476165771, "learning_rate": 0.0009430955120828538, "loss": 0.5359, "step": 183690 }, { "epoch": 52.848101265822784, "grad_norm": 1.4360178709030151, "learning_rate": 0.0009430379746835444, "loss": 0.5294, "step": 183700 }, { "epoch": 52.85097813578826, "grad_norm": 0.8328234553337097, "learning_rate": 0.0009429804372842348, "loss": 0.5949, "step": 183710 }, { "epoch": 52.85385500575374, "grad_norm": 1.389596939086914, "learning_rate": 0.0009429228998849252, "loss": 0.5695, "step": 183720 }, { "epoch": 52.85673187571922, "grad_norm": 1.49485182762146, "learning_rate": 0.0009428653624856157, "loss": 0.5203, "step": 183730 }, { "epoch": 52.859608745684696, "grad_norm": 2.139571189880371, "learning_rate": 0.0009428078250863062, "loss": 0.5151, "step": 183740 }, { "epoch": 52.86248561565017, "grad_norm": 1.7973748445510864, "learning_rate": 0.0009427502876869965, "loss": 0.5955, "step": 183750 }, { "epoch": 52.86536248561565, "grad_norm": 1.4443212747573853, "learning_rate": 0.000942692750287687, "loss": 0.4504, "step": 183760 }, { "epoch": 52.868239355581125, "grad_norm": 1.1285583972930908, "learning_rate": 0.0009426352128883775, "loss": 0.4778, "step": 183770 }, { "epoch": 52.87111622554661, "grad_norm": 1.2307155132293701, "learning_rate": 0.0009425776754890679, "loss": 0.5388, "step": 183780 }, { "epoch": 52.873993095512084, "grad_norm": 1.1362725496292114, "learning_rate": 0.0009425201380897583, "loss": 0.4978, "step": 183790 }, { "epoch": 52.87686996547756, "grad_norm": 1.114700198173523, "learning_rate": 0.0009424626006904488, "loss": 0.5648, "step": 183800 }, { "epoch": 52.879746835443036, "grad_norm": 2.2254929542541504, "learning_rate": 0.0009424050632911393, "loss": 0.7001, "step": 183810 }, { "epoch": 52.88262370540851, "grad_norm": 1.654332160949707, "learning_rate": 0.0009423475258918297, "loss": 0.481, "step": 183820 }, { "epoch": 52.885500575373996, "grad_norm": 1.1007829904556274, "learning_rate": 0.0009422899884925202, "loss": 0.5773, "step": 183830 }, { "epoch": 52.88837744533947, "grad_norm": 1.101559042930603, "learning_rate": 0.0009422324510932106, "loss": 0.5325, "step": 183840 }, { "epoch": 52.89125431530495, "grad_norm": 1.5212045907974243, "learning_rate": 0.0009421749136939011, "loss": 0.5594, "step": 183850 }, { "epoch": 52.894131185270425, "grad_norm": 0.8115573525428772, "learning_rate": 0.0009421173762945915, "loss": 0.6268, "step": 183860 }, { "epoch": 52.8970080552359, "grad_norm": 2.0010910034179688, "learning_rate": 0.0009420598388952819, "loss": 0.6168, "step": 183870 }, { "epoch": 52.899884925201384, "grad_norm": 1.4661222696304321, "learning_rate": 0.0009420023014959724, "loss": 0.5174, "step": 183880 }, { "epoch": 52.90276179516686, "grad_norm": 1.0311291217803955, "learning_rate": 0.0009419447640966629, "loss": 0.6596, "step": 183890 }, { "epoch": 52.90563866513234, "grad_norm": 1.0316877365112305, "learning_rate": 0.0009418872266973532, "loss": 0.4829, "step": 183900 }, { "epoch": 52.90851553509781, "grad_norm": 0.8843462467193604, "learning_rate": 0.0009418296892980438, "loss": 0.5424, "step": 183910 }, { "epoch": 52.91139240506329, "grad_norm": 0.8724769353866577, "learning_rate": 0.0009417721518987343, "loss": 0.5093, "step": 183920 }, { "epoch": 52.91426927502877, "grad_norm": 2.525646686553955, "learning_rate": 0.0009417146144994246, "loss": 0.6437, "step": 183930 }, { "epoch": 52.91714614499425, "grad_norm": 1.0729690790176392, "learning_rate": 0.0009416570771001151, "loss": 0.4411, "step": 183940 }, { "epoch": 52.920023014959725, "grad_norm": 1.189651608467102, "learning_rate": 0.0009415995397008056, "loss": 0.5175, "step": 183950 }, { "epoch": 52.9228998849252, "grad_norm": 3.262399196624756, "learning_rate": 0.000941542002301496, "loss": 0.6543, "step": 183960 }, { "epoch": 52.92577675489068, "grad_norm": 1.436712384223938, "learning_rate": 0.0009414844649021864, "loss": 0.5686, "step": 183970 }, { "epoch": 52.92865362485615, "grad_norm": 1.06752347946167, "learning_rate": 0.0009414269275028769, "loss": 0.4744, "step": 183980 }, { "epoch": 52.93153049482164, "grad_norm": 1.7354607582092285, "learning_rate": 0.0009413693901035673, "loss": 0.5097, "step": 183990 }, { "epoch": 52.93440736478711, "grad_norm": 0.6765187382698059, "learning_rate": 0.0009413118527042578, "loss": 0.475, "step": 184000 }, { "epoch": 52.93728423475259, "grad_norm": 1.2500780820846558, "learning_rate": 0.0009412543153049483, "loss": 0.5264, "step": 184010 }, { "epoch": 52.940161104718065, "grad_norm": 1.0470523834228516, "learning_rate": 0.0009411967779056387, "loss": 0.4524, "step": 184020 }, { "epoch": 52.94303797468354, "grad_norm": 0.8374322652816772, "learning_rate": 0.0009411392405063292, "loss": 0.5015, "step": 184030 }, { "epoch": 52.945914844649025, "grad_norm": 1.515333890914917, "learning_rate": 0.0009410817031070196, "loss": 0.6539, "step": 184040 }, { "epoch": 52.9487917146145, "grad_norm": 1.4623059034347534, "learning_rate": 0.00094102416570771, "loss": 0.4864, "step": 184050 }, { "epoch": 52.95166858457998, "grad_norm": 1.00944983959198, "learning_rate": 0.0009409666283084005, "loss": 0.4964, "step": 184060 }, { "epoch": 52.95454545454545, "grad_norm": 1.4582489728927612, "learning_rate": 0.0009409090909090909, "loss": 0.6043, "step": 184070 }, { "epoch": 52.95742232451093, "grad_norm": 1.6266862154006958, "learning_rate": 0.0009408515535097813, "loss": 0.548, "step": 184080 }, { "epoch": 52.96029919447641, "grad_norm": 1.0112907886505127, "learning_rate": 0.0009407940161104718, "loss": 0.4832, "step": 184090 }, { "epoch": 52.96317606444189, "grad_norm": 1.0944788455963135, "learning_rate": 0.0009407364787111622, "loss": 0.6052, "step": 184100 }, { "epoch": 52.966052934407365, "grad_norm": 0.9259018301963806, "learning_rate": 0.0009406789413118527, "loss": 0.4727, "step": 184110 }, { "epoch": 52.96892980437284, "grad_norm": 0.7906967997550964, "learning_rate": 0.0009406214039125432, "loss": 0.5331, "step": 184120 }, { "epoch": 52.97180667433832, "grad_norm": 1.0348479747772217, "learning_rate": 0.0009405638665132336, "loss": 0.5052, "step": 184130 }, { "epoch": 52.9746835443038, "grad_norm": 0.9281073212623596, "learning_rate": 0.0009405063291139241, "loss": 0.4499, "step": 184140 }, { "epoch": 52.97756041426928, "grad_norm": 1.4516032934188843, "learning_rate": 0.0009404487917146145, "loss": 0.5777, "step": 184150 }, { "epoch": 52.98043728423475, "grad_norm": 1.342928171157837, "learning_rate": 0.0009403912543153049, "loss": 0.4546, "step": 184160 }, { "epoch": 52.98331415420023, "grad_norm": 1.6460379362106323, "learning_rate": 0.0009403337169159954, "loss": 0.7295, "step": 184170 }, { "epoch": 52.986191024165706, "grad_norm": 1.6105059385299683, "learning_rate": 0.0009402761795166859, "loss": 0.493, "step": 184180 }, { "epoch": 52.98906789413118, "grad_norm": 1.4951503276824951, "learning_rate": 0.0009402186421173762, "loss": 0.5235, "step": 184190 }, { "epoch": 52.991944764096665, "grad_norm": 1.2162469625473022, "learning_rate": 0.0009401611047180668, "loss": 0.5336, "step": 184200 }, { "epoch": 52.99482163406214, "grad_norm": 1.103907823562622, "learning_rate": 0.0009401035673187573, "loss": 0.7027, "step": 184210 }, { "epoch": 52.99769850402762, "grad_norm": 0.889705240726471, "learning_rate": 0.0009400460299194476, "loss": 0.6557, "step": 184220 }, { "epoch": 53.000575373993094, "grad_norm": 1.6117037534713745, "learning_rate": 0.0009399884925201381, "loss": 0.6015, "step": 184230 }, { "epoch": 53.00345224395857, "grad_norm": 1.2424145936965942, "learning_rate": 0.0009399309551208286, "loss": 0.4604, "step": 184240 }, { "epoch": 53.00632911392405, "grad_norm": 1.48212730884552, "learning_rate": 0.000939873417721519, "loss": 0.4827, "step": 184250 }, { "epoch": 53.00920598388953, "grad_norm": 1.5059999227523804, "learning_rate": 0.0009398158803222094, "loss": 0.4235, "step": 184260 }, { "epoch": 53.012082853855006, "grad_norm": 1.210649847984314, "learning_rate": 0.0009397583429228999, "loss": 0.3957, "step": 184270 }, { "epoch": 53.01495972382048, "grad_norm": 0.7905729413032532, "learning_rate": 0.0009397008055235903, "loss": 0.3856, "step": 184280 }, { "epoch": 53.01783659378596, "grad_norm": 0.7827482223510742, "learning_rate": 0.0009396432681242808, "loss": 0.5984, "step": 184290 }, { "epoch": 53.02071346375144, "grad_norm": 0.9615886807441711, "learning_rate": 0.0009395857307249713, "loss": 0.4959, "step": 184300 }, { "epoch": 53.02359033371692, "grad_norm": 1.3365060091018677, "learning_rate": 0.0009395281933256617, "loss": 0.5403, "step": 184310 }, { "epoch": 53.026467203682394, "grad_norm": 1.628743052482605, "learning_rate": 0.0009394706559263522, "loss": 0.6634, "step": 184320 }, { "epoch": 53.02934407364787, "grad_norm": 1.4218320846557617, "learning_rate": 0.0009394131185270426, "loss": 0.5024, "step": 184330 }, { "epoch": 53.032220943613346, "grad_norm": 1.9638137817382812, "learning_rate": 0.000939355581127733, "loss": 0.4784, "step": 184340 }, { "epoch": 53.03509781357883, "grad_norm": 1.2786544561386108, "learning_rate": 0.0009392980437284235, "loss": 0.5272, "step": 184350 }, { "epoch": 53.037974683544306, "grad_norm": 1.827438473701477, "learning_rate": 0.000939240506329114, "loss": 0.4432, "step": 184360 }, { "epoch": 53.04085155350978, "grad_norm": 1.7129719257354736, "learning_rate": 0.0009391829689298043, "loss": 0.4631, "step": 184370 }, { "epoch": 53.04372842347526, "grad_norm": 1.2777600288391113, "learning_rate": 0.0009391254315304948, "loss": 0.3898, "step": 184380 }, { "epoch": 53.046605293440734, "grad_norm": 1.340577244758606, "learning_rate": 0.0009390678941311854, "loss": 0.5914, "step": 184390 }, { "epoch": 53.04948216340621, "grad_norm": 0.7745294570922852, "learning_rate": 0.0009390103567318757, "loss": 0.4719, "step": 184400 }, { "epoch": 53.052359033371694, "grad_norm": 1.237509846687317, "learning_rate": 0.0009389528193325662, "loss": 0.5308, "step": 184410 }, { "epoch": 53.05523590333717, "grad_norm": 1.4364205598831177, "learning_rate": 0.0009388952819332567, "loss": 0.4859, "step": 184420 }, { "epoch": 53.058112773302646, "grad_norm": 0.9234220385551453, "learning_rate": 0.0009388377445339471, "loss": 0.4643, "step": 184430 }, { "epoch": 53.06098964326812, "grad_norm": 1.0990712642669678, "learning_rate": 0.0009387802071346375, "loss": 0.5499, "step": 184440 }, { "epoch": 53.0638665132336, "grad_norm": 1.8660738468170166, "learning_rate": 0.000938722669735328, "loss": 0.5827, "step": 184450 }, { "epoch": 53.06674338319908, "grad_norm": 0.8408239483833313, "learning_rate": 0.0009386651323360184, "loss": 0.4769, "step": 184460 }, { "epoch": 53.06962025316456, "grad_norm": 1.4704902172088623, "learning_rate": 0.0009386075949367089, "loss": 0.5187, "step": 184470 }, { "epoch": 53.072497123130034, "grad_norm": 1.4993127584457397, "learning_rate": 0.0009385500575373993, "loss": 0.4998, "step": 184480 }, { "epoch": 53.07537399309551, "grad_norm": 1.597525954246521, "learning_rate": 0.0009384925201380897, "loss": 0.5041, "step": 184490 }, { "epoch": 53.07825086306099, "grad_norm": 1.3556572198867798, "learning_rate": 0.0009384349827387803, "loss": 0.4232, "step": 184500 }, { "epoch": 53.08112773302647, "grad_norm": 1.3358668088912964, "learning_rate": 0.0009383774453394707, "loss": 0.5797, "step": 184510 }, { "epoch": 53.084004602991946, "grad_norm": 1.4961823225021362, "learning_rate": 0.0009383199079401611, "loss": 0.5729, "step": 184520 }, { "epoch": 53.08688147295742, "grad_norm": 1.3079593181610107, "learning_rate": 0.0009382623705408516, "loss": 0.4359, "step": 184530 }, { "epoch": 53.0897583429229, "grad_norm": 2.150216817855835, "learning_rate": 0.0009382048331415421, "loss": 0.5242, "step": 184540 }, { "epoch": 53.092635212888375, "grad_norm": 0.5897876620292664, "learning_rate": 0.0009381472957422324, "loss": 0.5391, "step": 184550 }, { "epoch": 53.09551208285386, "grad_norm": 1.2159755229949951, "learning_rate": 0.0009380897583429229, "loss": 0.6067, "step": 184560 }, { "epoch": 53.098388952819334, "grad_norm": 1.2614903450012207, "learning_rate": 0.0009380322209436134, "loss": 0.4548, "step": 184570 }, { "epoch": 53.10126582278481, "grad_norm": 1.6892986297607422, "learning_rate": 0.0009379746835443038, "loss": 0.5117, "step": 184580 }, { "epoch": 53.10414269275029, "grad_norm": 1.1873780488967896, "learning_rate": 0.0009379171461449942, "loss": 0.4893, "step": 184590 }, { "epoch": 53.10701956271576, "grad_norm": 0.7452897429466248, "learning_rate": 0.0009378596087456848, "loss": 0.5393, "step": 184600 }, { "epoch": 53.10989643268124, "grad_norm": 2.005553960800171, "learning_rate": 0.0009378020713463752, "loss": 0.5532, "step": 184610 }, { "epoch": 53.11277330264672, "grad_norm": 1.1854559183120728, "learning_rate": 0.0009377445339470656, "loss": 0.5654, "step": 184620 }, { "epoch": 53.1156501726122, "grad_norm": 1.034535527229309, "learning_rate": 0.0009376869965477561, "loss": 0.4975, "step": 184630 }, { "epoch": 53.118527042577675, "grad_norm": 0.994670569896698, "learning_rate": 0.0009376294591484465, "loss": 0.4636, "step": 184640 }, { "epoch": 53.12140391254315, "grad_norm": 1.7043410539627075, "learning_rate": 0.000937571921749137, "loss": 0.5189, "step": 184650 }, { "epoch": 53.12428078250863, "grad_norm": 1.6911766529083252, "learning_rate": 0.0009375143843498274, "loss": 0.4732, "step": 184660 }, { "epoch": 53.12715765247411, "grad_norm": 0.696422815322876, "learning_rate": 0.0009374568469505178, "loss": 0.4715, "step": 184670 }, { "epoch": 53.13003452243959, "grad_norm": 0.8978742957115173, "learning_rate": 0.0009373993095512083, "loss": 0.5527, "step": 184680 }, { "epoch": 53.13291139240506, "grad_norm": 1.3508825302124023, "learning_rate": 0.0009373417721518988, "loss": 0.5331, "step": 184690 }, { "epoch": 53.13578826237054, "grad_norm": 1.6346325874328613, "learning_rate": 0.0009372842347525891, "loss": 0.4199, "step": 184700 }, { "epoch": 53.138665132336016, "grad_norm": 0.7564713954925537, "learning_rate": 0.0009372266973532797, "loss": 0.4091, "step": 184710 }, { "epoch": 53.1415420023015, "grad_norm": 1.0785657167434692, "learning_rate": 0.0009371691599539702, "loss": 0.6472, "step": 184720 }, { "epoch": 53.144418872266975, "grad_norm": 1.8258037567138672, "learning_rate": 0.0009371116225546605, "loss": 0.5557, "step": 184730 }, { "epoch": 53.14729574223245, "grad_norm": 0.9282339811325073, "learning_rate": 0.000937054085155351, "loss": 0.4485, "step": 184740 }, { "epoch": 53.15017261219793, "grad_norm": 2.210238456726074, "learning_rate": 0.0009369965477560415, "loss": 0.5036, "step": 184750 }, { "epoch": 53.153049482163404, "grad_norm": 1.4969533681869507, "learning_rate": 0.0009369390103567319, "loss": 0.4802, "step": 184760 }, { "epoch": 53.15592635212889, "grad_norm": 1.075236439704895, "learning_rate": 0.0009368814729574223, "loss": 0.4371, "step": 184770 }, { "epoch": 53.15880322209436, "grad_norm": 1.2914361953735352, "learning_rate": 0.0009368239355581128, "loss": 0.5197, "step": 184780 }, { "epoch": 53.16168009205984, "grad_norm": 1.8916255235671997, "learning_rate": 0.0009367663981588032, "loss": 0.5935, "step": 184790 }, { "epoch": 53.164556962025316, "grad_norm": 0.7996817231178284, "learning_rate": 0.0009367088607594937, "loss": 0.4603, "step": 184800 }, { "epoch": 53.16743383199079, "grad_norm": 2.3269336223602295, "learning_rate": 0.0009366513233601842, "loss": 0.6031, "step": 184810 }, { "epoch": 53.170310701956275, "grad_norm": 1.1962509155273438, "learning_rate": 0.0009365937859608746, "loss": 0.4197, "step": 184820 }, { "epoch": 53.17318757192175, "grad_norm": 1.4149045944213867, "learning_rate": 0.0009365362485615651, "loss": 0.4863, "step": 184830 }, { "epoch": 53.17606444188723, "grad_norm": 1.4615452289581299, "learning_rate": 0.0009364787111622555, "loss": 0.6066, "step": 184840 }, { "epoch": 53.178941311852704, "grad_norm": 1.3464385271072388, "learning_rate": 0.0009364211737629459, "loss": 0.448, "step": 184850 }, { "epoch": 53.18181818181818, "grad_norm": 1.327560544013977, "learning_rate": 0.0009363636363636364, "loss": 0.5807, "step": 184860 }, { "epoch": 53.184695051783656, "grad_norm": 2.1012308597564697, "learning_rate": 0.0009363060989643268, "loss": 0.5539, "step": 184870 }, { "epoch": 53.18757192174914, "grad_norm": 1.5892796516418457, "learning_rate": 0.0009362485615650172, "loss": 0.6329, "step": 184880 }, { "epoch": 53.190448791714616, "grad_norm": 1.5449132919311523, "learning_rate": 0.0009361910241657078, "loss": 0.4721, "step": 184890 }, { "epoch": 53.19332566168009, "grad_norm": 1.0866820812225342, "learning_rate": 0.0009361334867663981, "loss": 0.5481, "step": 184900 }, { "epoch": 53.19620253164557, "grad_norm": 2.479275941848755, "learning_rate": 0.0009360759493670886, "loss": 0.4806, "step": 184910 }, { "epoch": 53.199079401611044, "grad_norm": 1.080419898033142, "learning_rate": 0.0009360184119677791, "loss": 0.3725, "step": 184920 }, { "epoch": 53.20195627157653, "grad_norm": 0.6162868142127991, "learning_rate": 0.0009359608745684695, "loss": 0.4361, "step": 184930 }, { "epoch": 53.204833141542004, "grad_norm": 1.282883882522583, "learning_rate": 0.00093590333716916, "loss": 0.4296, "step": 184940 }, { "epoch": 53.20771001150748, "grad_norm": 2.5854692459106445, "learning_rate": 0.0009358457997698504, "loss": 0.5796, "step": 184950 }, { "epoch": 53.210586881472956, "grad_norm": 1.4610323905944824, "learning_rate": 0.0009357882623705408, "loss": 0.4799, "step": 184960 }, { "epoch": 53.21346375143843, "grad_norm": 1.7959551811218262, "learning_rate": 0.0009357307249712313, "loss": 0.5546, "step": 184970 }, { "epoch": 53.216340621403916, "grad_norm": 0.8346577286720276, "learning_rate": 0.0009356731875719218, "loss": 0.6176, "step": 184980 }, { "epoch": 53.21921749136939, "grad_norm": 1.1793339252471924, "learning_rate": 0.0009356156501726121, "loss": 0.4725, "step": 184990 }, { "epoch": 53.22209436133487, "grad_norm": 0.7316305041313171, "learning_rate": 0.0009355581127733027, "loss": 0.5088, "step": 185000 }, { "epoch": 53.224971231300344, "grad_norm": 1.135263204574585, "learning_rate": 0.0009355005753739932, "loss": 0.5538, "step": 185010 }, { "epoch": 53.22784810126582, "grad_norm": 0.9883730411529541, "learning_rate": 0.0009354430379746835, "loss": 0.5428, "step": 185020 }, { "epoch": 53.230724971231304, "grad_norm": 2.322646141052246, "learning_rate": 0.000935385500575374, "loss": 0.6047, "step": 185030 }, { "epoch": 53.23360184119678, "grad_norm": 1.0642070770263672, "learning_rate": 0.0009353279631760645, "loss": 0.4691, "step": 185040 }, { "epoch": 53.236478711162256, "grad_norm": 0.823072075843811, "learning_rate": 0.0009352704257767549, "loss": 0.4236, "step": 185050 }, { "epoch": 53.23935558112773, "grad_norm": 2.870692253112793, "learning_rate": 0.0009352128883774453, "loss": 0.5287, "step": 185060 }, { "epoch": 53.24223245109321, "grad_norm": 1.1381473541259766, "learning_rate": 0.0009351553509781358, "loss": 0.6115, "step": 185070 }, { "epoch": 53.245109321058685, "grad_norm": 0.6128092408180237, "learning_rate": 0.0009350978135788262, "loss": 0.4599, "step": 185080 }, { "epoch": 53.24798619102417, "grad_norm": 1.6449989080429077, "learning_rate": 0.0009350402761795167, "loss": 0.6602, "step": 185090 }, { "epoch": 53.250863060989644, "grad_norm": 1.3946629762649536, "learning_rate": 0.0009349827387802071, "loss": 0.4662, "step": 185100 }, { "epoch": 53.25373993095512, "grad_norm": 2.319326162338257, "learning_rate": 0.0009349252013808976, "loss": 0.6417, "step": 185110 }, { "epoch": 53.2566168009206, "grad_norm": 1.3631983995437622, "learning_rate": 0.0009348676639815881, "loss": 0.5848, "step": 185120 }, { "epoch": 53.25949367088607, "grad_norm": 1.3445675373077393, "learning_rate": 0.0009348101265822785, "loss": 0.5315, "step": 185130 }, { "epoch": 53.262370540851556, "grad_norm": 1.4017072916030884, "learning_rate": 0.0009347525891829689, "loss": 0.5792, "step": 185140 }, { "epoch": 53.26524741081703, "grad_norm": 1.0823737382888794, "learning_rate": 0.0009346950517836594, "loss": 0.6002, "step": 185150 }, { "epoch": 53.26812428078251, "grad_norm": 0.8056831359863281, "learning_rate": 0.0009346375143843499, "loss": 0.5241, "step": 185160 }, { "epoch": 53.271001150747985, "grad_norm": 0.5651950836181641, "learning_rate": 0.0009345799769850402, "loss": 0.4477, "step": 185170 }, { "epoch": 53.27387802071346, "grad_norm": 1.8547536134719849, "learning_rate": 0.0009345224395857308, "loss": 0.4572, "step": 185180 }, { "epoch": 53.276754890678944, "grad_norm": 1.3072770833969116, "learning_rate": 0.0009344649021864212, "loss": 0.4597, "step": 185190 }, { "epoch": 53.27963176064442, "grad_norm": 1.6242862939834595, "learning_rate": 0.0009344073647871116, "loss": 0.6111, "step": 185200 }, { "epoch": 53.2825086306099, "grad_norm": 0.7773745059967041, "learning_rate": 0.000934349827387802, "loss": 0.5553, "step": 185210 }, { "epoch": 53.28538550057537, "grad_norm": 1.2835091352462769, "learning_rate": 0.0009342922899884926, "loss": 0.5596, "step": 185220 }, { "epoch": 53.28826237054085, "grad_norm": 1.1234647035598755, "learning_rate": 0.000934234752589183, "loss": 0.4073, "step": 185230 }, { "epoch": 53.29113924050633, "grad_norm": 0.7722728848457336, "learning_rate": 0.0009341772151898734, "loss": 0.5135, "step": 185240 }, { "epoch": 53.29401611047181, "grad_norm": 1.8840821981430054, "learning_rate": 0.0009341196777905639, "loss": 0.6425, "step": 185250 }, { "epoch": 53.296892980437285, "grad_norm": 1.6223574876785278, "learning_rate": 0.0009340621403912543, "loss": 0.4791, "step": 185260 }, { "epoch": 53.29976985040276, "grad_norm": 1.9484039545059204, "learning_rate": 0.0009340046029919448, "loss": 0.6013, "step": 185270 }, { "epoch": 53.30264672036824, "grad_norm": 1.0948137044906616, "learning_rate": 0.0009339470655926352, "loss": 0.4898, "step": 185280 }, { "epoch": 53.30552359033371, "grad_norm": 1.6697044372558594, "learning_rate": 0.0009338895281933257, "loss": 0.5235, "step": 185290 }, { "epoch": 53.3084004602992, "grad_norm": 0.9056973457336426, "learning_rate": 0.0009338319907940162, "loss": 0.4487, "step": 185300 }, { "epoch": 53.31127733026467, "grad_norm": 1.3327370882034302, "learning_rate": 0.0009337744533947066, "loss": 0.5566, "step": 185310 }, { "epoch": 53.31415420023015, "grad_norm": 1.7908568382263184, "learning_rate": 0.000933716915995397, "loss": 0.5699, "step": 185320 }, { "epoch": 53.317031070195625, "grad_norm": 0.8332316279411316, "learning_rate": 0.0009336593785960875, "loss": 0.4996, "step": 185330 }, { "epoch": 53.3199079401611, "grad_norm": 0.9775539636611938, "learning_rate": 0.000933601841196778, "loss": 0.5039, "step": 185340 }, { "epoch": 53.322784810126585, "grad_norm": 1.7936893701553345, "learning_rate": 0.0009335443037974683, "loss": 0.5021, "step": 185350 }, { "epoch": 53.32566168009206, "grad_norm": 0.9702759981155396, "learning_rate": 0.0009334867663981588, "loss": 0.4608, "step": 185360 }, { "epoch": 53.32853855005754, "grad_norm": 1.596653938293457, "learning_rate": 0.0009334292289988493, "loss": 0.5736, "step": 185370 }, { "epoch": 53.33141542002301, "grad_norm": 1.1123369932174683, "learning_rate": 0.0009333716915995397, "loss": 0.5332, "step": 185380 }, { "epoch": 53.33429228998849, "grad_norm": 1.6020586490631104, "learning_rate": 0.0009333141542002301, "loss": 0.4338, "step": 185390 }, { "epoch": 53.33716915995397, "grad_norm": 0.7120080590248108, "learning_rate": 0.0009332566168009207, "loss": 0.5619, "step": 185400 }, { "epoch": 53.34004602991945, "grad_norm": 0.9360897541046143, "learning_rate": 0.000933199079401611, "loss": 0.5789, "step": 185410 }, { "epoch": 53.342922899884925, "grad_norm": 1.2484006881713867, "learning_rate": 0.0009331415420023015, "loss": 0.6106, "step": 185420 }, { "epoch": 53.3457997698504, "grad_norm": 0.9875848889350891, "learning_rate": 0.000933084004602992, "loss": 0.5092, "step": 185430 }, { "epoch": 53.34867663981588, "grad_norm": 1.4610090255737305, "learning_rate": 0.0009330264672036824, "loss": 0.453, "step": 185440 }, { "epoch": 53.35155350978136, "grad_norm": 1.0219416618347168, "learning_rate": 0.0009329689298043729, "loss": 0.5636, "step": 185450 }, { "epoch": 53.35443037974684, "grad_norm": 1.816623568534851, "learning_rate": 0.0009329113924050633, "loss": 0.4758, "step": 185460 }, { "epoch": 53.35730724971231, "grad_norm": 0.9277734160423279, "learning_rate": 0.0009328538550057537, "loss": 0.4884, "step": 185470 }, { "epoch": 53.36018411967779, "grad_norm": 2.135021209716797, "learning_rate": 0.0009327963176064442, "loss": 0.4477, "step": 185480 }, { "epoch": 53.363060989643266, "grad_norm": 1.3334531784057617, "learning_rate": 0.0009327387802071347, "loss": 0.4592, "step": 185490 }, { "epoch": 53.36593785960875, "grad_norm": 0.890931248664856, "learning_rate": 0.000932681242807825, "loss": 0.4998, "step": 185500 }, { "epoch": 53.368814729574225, "grad_norm": 0.8288316130638123, "learning_rate": 0.0009326237054085156, "loss": 0.4823, "step": 185510 }, { "epoch": 53.3716915995397, "grad_norm": 0.92253577709198, "learning_rate": 0.0009325661680092061, "loss": 0.5737, "step": 185520 }, { "epoch": 53.37456846950518, "grad_norm": 1.377764344215393, "learning_rate": 0.0009325086306098964, "loss": 0.4989, "step": 185530 }, { "epoch": 53.377445339470654, "grad_norm": 0.730680525302887, "learning_rate": 0.0009324510932105869, "loss": 0.6043, "step": 185540 }, { "epoch": 53.38032220943613, "grad_norm": 1.1723648309707642, "learning_rate": 0.0009323935558112774, "loss": 0.4927, "step": 185550 }, { "epoch": 53.383199079401614, "grad_norm": 0.6982041597366333, "learning_rate": 0.0009323360184119678, "loss": 0.6529, "step": 185560 }, { "epoch": 53.38607594936709, "grad_norm": 1.4322589635849, "learning_rate": 0.0009322784810126582, "loss": 0.5814, "step": 185570 }, { "epoch": 53.388952819332566, "grad_norm": 1.3974106311798096, "learning_rate": 0.0009322209436133488, "loss": 0.489, "step": 185580 }, { "epoch": 53.39182968929804, "grad_norm": 0.8762840032577515, "learning_rate": 0.0009321634062140391, "loss": 0.524, "step": 185590 }, { "epoch": 53.39470655926352, "grad_norm": 1.9879103899002075, "learning_rate": 0.0009321058688147296, "loss": 0.4829, "step": 185600 }, { "epoch": 53.397583429229, "grad_norm": 1.4757659435272217, "learning_rate": 0.0009320483314154201, "loss": 0.5133, "step": 185610 }, { "epoch": 53.40046029919448, "grad_norm": 1.6411362886428833, "learning_rate": 0.0009319907940161105, "loss": 0.6872, "step": 185620 }, { "epoch": 53.403337169159954, "grad_norm": 1.3570661544799805, "learning_rate": 0.000931933256616801, "loss": 0.5586, "step": 185630 }, { "epoch": 53.40621403912543, "grad_norm": 2.214590311050415, "learning_rate": 0.0009318757192174914, "loss": 0.6199, "step": 185640 }, { "epoch": 53.40909090909091, "grad_norm": 1.3213249444961548, "learning_rate": 0.0009318181818181818, "loss": 0.5436, "step": 185650 }, { "epoch": 53.41196777905639, "grad_norm": 1.240046501159668, "learning_rate": 0.0009317606444188723, "loss": 0.4655, "step": 185660 }, { "epoch": 53.414844649021866, "grad_norm": 1.6200915575027466, "learning_rate": 0.0009317031070195628, "loss": 0.4929, "step": 185670 }, { "epoch": 53.41772151898734, "grad_norm": 1.1415716409683228, "learning_rate": 0.0009316455696202531, "loss": 0.4795, "step": 185680 }, { "epoch": 53.42059838895282, "grad_norm": 0.6240211129188538, "learning_rate": 0.0009315880322209437, "loss": 0.5259, "step": 185690 }, { "epoch": 53.423475258918295, "grad_norm": 1.5354524850845337, "learning_rate": 0.000931530494821634, "loss": 0.5196, "step": 185700 }, { "epoch": 53.42635212888378, "grad_norm": 0.9247556328773499, "learning_rate": 0.0009314729574223245, "loss": 0.5879, "step": 185710 }, { "epoch": 53.429228998849254, "grad_norm": 0.8325715065002441, "learning_rate": 0.000931415420023015, "loss": 0.4258, "step": 185720 }, { "epoch": 53.43210586881473, "grad_norm": 1.308680534362793, "learning_rate": 0.0009313578826237054, "loss": 0.5199, "step": 185730 }, { "epoch": 53.43498273878021, "grad_norm": 1.9624710083007812, "learning_rate": 0.0009313003452243959, "loss": 0.4991, "step": 185740 }, { "epoch": 53.43785960874568, "grad_norm": 0.7632930278778076, "learning_rate": 0.0009312428078250863, "loss": 0.4981, "step": 185750 }, { "epoch": 53.44073647871116, "grad_norm": 1.5260810852050781, "learning_rate": 0.0009311852704257767, "loss": 0.554, "step": 185760 }, { "epoch": 53.44361334867664, "grad_norm": 0.7148757576942444, "learning_rate": 0.0009311277330264672, "loss": 0.4813, "step": 185770 }, { "epoch": 53.44649021864212, "grad_norm": 1.5297049283981323, "learning_rate": 0.0009310701956271577, "loss": 0.6618, "step": 185780 }, { "epoch": 53.449367088607595, "grad_norm": 1.5222748517990112, "learning_rate": 0.000931012658227848, "loss": 0.5417, "step": 185790 }, { "epoch": 53.45224395857307, "grad_norm": 0.9312212467193604, "learning_rate": 0.0009309551208285386, "loss": 0.5802, "step": 185800 }, { "epoch": 53.45512082853855, "grad_norm": 1.565521240234375, "learning_rate": 0.0009308975834292291, "loss": 0.5929, "step": 185810 }, { "epoch": 53.45799769850403, "grad_norm": 1.6037623882293701, "learning_rate": 0.0009308400460299194, "loss": 0.4195, "step": 185820 }, { "epoch": 53.46087456846951, "grad_norm": 0.7859009504318237, "learning_rate": 0.0009307825086306099, "loss": 0.5109, "step": 185830 }, { "epoch": 53.46375143843498, "grad_norm": 1.0085625648498535, "learning_rate": 0.0009307249712313004, "loss": 0.4776, "step": 185840 }, { "epoch": 53.46662830840046, "grad_norm": 1.3609097003936768, "learning_rate": 0.0009306674338319908, "loss": 0.5128, "step": 185850 }, { "epoch": 53.469505178365935, "grad_norm": 0.9472770094871521, "learning_rate": 0.0009306098964326812, "loss": 0.5124, "step": 185860 }, { "epoch": 53.47238204833142, "grad_norm": 1.296683669090271, "learning_rate": 0.0009305523590333718, "loss": 0.5438, "step": 185870 }, { "epoch": 53.475258918296895, "grad_norm": 0.9338683485984802, "learning_rate": 0.0009304948216340621, "loss": 0.6356, "step": 185880 }, { "epoch": 53.47813578826237, "grad_norm": 0.8156852126121521, "learning_rate": 0.0009304372842347526, "loss": 0.5422, "step": 185890 }, { "epoch": 53.48101265822785, "grad_norm": 0.8999266624450684, "learning_rate": 0.000930379746835443, "loss": 0.4618, "step": 185900 }, { "epoch": 53.48388952819332, "grad_norm": 0.9672932624816895, "learning_rate": 0.0009303222094361335, "loss": 0.4385, "step": 185910 }, { "epoch": 53.48676639815881, "grad_norm": 0.8723313212394714, "learning_rate": 0.000930264672036824, "loss": 0.4656, "step": 185920 }, { "epoch": 53.48964326812428, "grad_norm": 2.1522059440612793, "learning_rate": 0.0009302071346375144, "loss": 0.5804, "step": 185930 }, { "epoch": 53.49252013808976, "grad_norm": 1.4023871421813965, "learning_rate": 0.0009301495972382048, "loss": 0.4597, "step": 185940 }, { "epoch": 53.495397008055235, "grad_norm": 2.142625093460083, "learning_rate": 0.0009300920598388953, "loss": 0.5287, "step": 185950 }, { "epoch": 53.49827387802071, "grad_norm": 0.9310464859008789, "learning_rate": 0.0009300345224395858, "loss": 0.4495, "step": 185960 }, { "epoch": 53.50115074798619, "grad_norm": 1.3609633445739746, "learning_rate": 0.0009299769850402761, "loss": 0.385, "step": 185970 }, { "epoch": 53.50402761795167, "grad_norm": 1.612545132637024, "learning_rate": 0.0009299194476409667, "loss": 0.6511, "step": 185980 }, { "epoch": 53.50690448791715, "grad_norm": 1.1642963886260986, "learning_rate": 0.0009298619102416571, "loss": 0.5399, "step": 185990 }, { "epoch": 53.50978135788262, "grad_norm": 0.8147064447402954, "learning_rate": 0.0009298043728423475, "loss": 0.3907, "step": 186000 }, { "epoch": 53.5126582278481, "grad_norm": 0.9818214774131775, "learning_rate": 0.000929746835443038, "loss": 0.5938, "step": 186010 }, { "epoch": 53.515535097813576, "grad_norm": 0.9160990118980408, "learning_rate": 0.0009296892980437285, "loss": 0.5488, "step": 186020 }, { "epoch": 53.51841196777906, "grad_norm": 0.9072353839874268, "learning_rate": 0.0009296317606444189, "loss": 0.5088, "step": 186030 }, { "epoch": 53.521288837744535, "grad_norm": 0.9827706217765808, "learning_rate": 0.0009295742232451093, "loss": 0.4325, "step": 186040 }, { "epoch": 53.52416570771001, "grad_norm": 0.7168743014335632, "learning_rate": 0.0009295166858457998, "loss": 0.6446, "step": 186050 }, { "epoch": 53.52704257767549, "grad_norm": 1.7248591184616089, "learning_rate": 0.0009294591484464902, "loss": 0.6015, "step": 186060 }, { "epoch": 53.529919447640964, "grad_norm": 1.4896029233932495, "learning_rate": 0.0009294016110471807, "loss": 0.5248, "step": 186070 }, { "epoch": 53.53279631760645, "grad_norm": 0.938572883605957, "learning_rate": 0.0009293440736478711, "loss": 0.5482, "step": 186080 }, { "epoch": 53.53567318757192, "grad_norm": 0.7390862703323364, "learning_rate": 0.0009292865362485616, "loss": 0.4135, "step": 186090 }, { "epoch": 53.5385500575374, "grad_norm": 1.197166085243225, "learning_rate": 0.000929228998849252, "loss": 0.523, "step": 186100 }, { "epoch": 53.541426927502876, "grad_norm": 0.9086145162582397, "learning_rate": 0.0009291714614499425, "loss": 0.5843, "step": 186110 }, { "epoch": 53.54430379746835, "grad_norm": 1.5887293815612793, "learning_rate": 0.0009291139240506329, "loss": 0.5055, "step": 186120 }, { "epoch": 53.547180667433835, "grad_norm": 1.2483928203582764, "learning_rate": 0.0009290563866513234, "loss": 0.6543, "step": 186130 }, { "epoch": 53.55005753739931, "grad_norm": 1.281813383102417, "learning_rate": 0.0009289988492520139, "loss": 0.3702, "step": 186140 }, { "epoch": 53.55293440736479, "grad_norm": 2.3545079231262207, "learning_rate": 0.0009289413118527042, "loss": 0.5119, "step": 186150 }, { "epoch": 53.555811277330264, "grad_norm": 1.4648810625076294, "learning_rate": 0.0009288837744533948, "loss": 0.6982, "step": 186160 }, { "epoch": 53.55868814729574, "grad_norm": 0.9960803389549255, "learning_rate": 0.0009288262370540852, "loss": 0.4705, "step": 186170 }, { "epoch": 53.561565017261216, "grad_norm": 1.2523634433746338, "learning_rate": 0.0009287686996547756, "loss": 0.6027, "step": 186180 }, { "epoch": 53.5644418872267, "grad_norm": 1.8438955545425415, "learning_rate": 0.000928711162255466, "loss": 0.449, "step": 186190 }, { "epoch": 53.567318757192176, "grad_norm": 1.908105492591858, "learning_rate": 0.0009286536248561566, "loss": 0.5544, "step": 186200 }, { "epoch": 53.57019562715765, "grad_norm": 0.815693199634552, "learning_rate": 0.000928596087456847, "loss": 0.5598, "step": 186210 }, { "epoch": 53.57307249712313, "grad_norm": 1.2962629795074463, "learning_rate": 0.0009285385500575374, "loss": 0.4873, "step": 186220 }, { "epoch": 53.575949367088604, "grad_norm": 1.1991345882415771, "learning_rate": 0.0009284810126582279, "loss": 0.577, "step": 186230 }, { "epoch": 53.57882623705409, "grad_norm": 1.4478024244308472, "learning_rate": 0.0009284234752589183, "loss": 0.4323, "step": 186240 }, { "epoch": 53.581703107019564, "grad_norm": 2.1067616939544678, "learning_rate": 0.0009283659378596088, "loss": 0.6182, "step": 186250 }, { "epoch": 53.58457997698504, "grad_norm": 1.34104323387146, "learning_rate": 0.0009283084004602992, "loss": 0.4871, "step": 186260 }, { "epoch": 53.587456846950516, "grad_norm": 2.387270212173462, "learning_rate": 0.0009282508630609897, "loss": 0.5771, "step": 186270 }, { "epoch": 53.59033371691599, "grad_norm": 1.5993778705596924, "learning_rate": 0.0009281933256616801, "loss": 0.5536, "step": 186280 }, { "epoch": 53.593210586881476, "grad_norm": 1.2526301145553589, "learning_rate": 0.0009281357882623706, "loss": 0.5721, "step": 186290 }, { "epoch": 53.59608745684695, "grad_norm": 1.2335057258605957, "learning_rate": 0.0009280782508630609, "loss": 0.6906, "step": 186300 }, { "epoch": 53.59896432681243, "grad_norm": 1.0137012004852295, "learning_rate": 0.0009280207134637515, "loss": 0.5861, "step": 186310 }, { "epoch": 53.601841196777904, "grad_norm": 0.9771128296852112, "learning_rate": 0.000927963176064442, "loss": 0.6166, "step": 186320 }, { "epoch": 53.60471806674338, "grad_norm": 1.6019107103347778, "learning_rate": 0.0009279056386651323, "loss": 0.525, "step": 186330 }, { "epoch": 53.607594936708864, "grad_norm": 0.7742268443107605, "learning_rate": 0.0009278481012658228, "loss": 0.458, "step": 186340 }, { "epoch": 53.61047180667434, "grad_norm": 0.8658954501152039, "learning_rate": 0.0009277905638665133, "loss": 0.4562, "step": 186350 }, { "epoch": 53.613348676639816, "grad_norm": 1.2818907499313354, "learning_rate": 0.0009277330264672037, "loss": 0.6694, "step": 186360 }, { "epoch": 53.61622554660529, "grad_norm": 1.3925178050994873, "learning_rate": 0.0009276754890678941, "loss": 0.4979, "step": 186370 }, { "epoch": 53.61910241657077, "grad_norm": 1.5759726762771606, "learning_rate": 0.0009276179516685847, "loss": 0.453, "step": 186380 }, { "epoch": 53.621979286536245, "grad_norm": 1.6103401184082031, "learning_rate": 0.000927560414269275, "loss": 0.4371, "step": 186390 }, { "epoch": 53.62485615650173, "grad_norm": 1.0604567527770996, "learning_rate": 0.0009275028768699655, "loss": 0.5319, "step": 186400 }, { "epoch": 53.627733026467205, "grad_norm": 1.3764839172363281, "learning_rate": 0.000927445339470656, "loss": 0.5326, "step": 186410 }, { "epoch": 53.63060989643268, "grad_norm": 0.5711532235145569, "learning_rate": 0.0009273878020713464, "loss": 0.5032, "step": 186420 }, { "epoch": 53.63348676639816, "grad_norm": 1.6674151420593262, "learning_rate": 0.0009273302646720369, "loss": 0.6137, "step": 186430 }, { "epoch": 53.63636363636363, "grad_norm": 1.1185563802719116, "learning_rate": 0.0009272727272727273, "loss": 0.4717, "step": 186440 }, { "epoch": 53.639240506329116, "grad_norm": 1.0994436740875244, "learning_rate": 0.0009272151898734177, "loss": 0.4806, "step": 186450 }, { "epoch": 53.64211737629459, "grad_norm": 1.0031766891479492, "learning_rate": 0.0009271576524741082, "loss": 0.6553, "step": 186460 }, { "epoch": 53.64499424626007, "grad_norm": 0.9354944825172424, "learning_rate": 0.0009271001150747987, "loss": 0.5098, "step": 186470 }, { "epoch": 53.647871116225545, "grad_norm": 1.0575424432754517, "learning_rate": 0.000927042577675489, "loss": 0.5229, "step": 186480 }, { "epoch": 53.65074798619102, "grad_norm": 0.8489079475402832, "learning_rate": 0.0009269850402761796, "loss": 0.5838, "step": 186490 }, { "epoch": 53.653624856156505, "grad_norm": 1.5457998514175415, "learning_rate": 0.00092692750287687, "loss": 0.4178, "step": 186500 }, { "epoch": 53.65650172612198, "grad_norm": 1.7207802534103394, "learning_rate": 0.0009268699654775604, "loss": 0.4863, "step": 186510 }, { "epoch": 53.65937859608746, "grad_norm": 0.6785180568695068, "learning_rate": 0.0009268124280782509, "loss": 0.4498, "step": 186520 }, { "epoch": 53.66225546605293, "grad_norm": 0.9827288389205933, "learning_rate": 0.0009267548906789413, "loss": 0.5681, "step": 186530 }, { "epoch": 53.66513233601841, "grad_norm": 1.04389488697052, "learning_rate": 0.0009266973532796318, "loss": 0.4907, "step": 186540 }, { "epoch": 53.66800920598389, "grad_norm": 1.2234983444213867, "learning_rate": 0.0009266398158803222, "loss": 0.504, "step": 186550 }, { "epoch": 53.67088607594937, "grad_norm": 1.232496976852417, "learning_rate": 0.0009265822784810127, "loss": 0.5246, "step": 186560 }, { "epoch": 53.673762945914845, "grad_norm": 1.7703975439071655, "learning_rate": 0.0009265247410817031, "loss": 0.5724, "step": 186570 }, { "epoch": 53.67663981588032, "grad_norm": 1.300021767616272, "learning_rate": 0.0009264672036823936, "loss": 0.5874, "step": 186580 }, { "epoch": 53.6795166858458, "grad_norm": 1.035325288772583, "learning_rate": 0.0009264096662830839, "loss": 0.51, "step": 186590 }, { "epoch": 53.68239355581128, "grad_norm": 1.3899271488189697, "learning_rate": 0.0009263521288837745, "loss": 0.5613, "step": 186600 }, { "epoch": 53.68527042577676, "grad_norm": 2.2155251502990723, "learning_rate": 0.000926294591484465, "loss": 0.6325, "step": 186610 }, { "epoch": 53.68814729574223, "grad_norm": 0.9319280385971069, "learning_rate": 0.0009262370540851553, "loss": 0.594, "step": 186620 }, { "epoch": 53.69102416570771, "grad_norm": 0.950650691986084, "learning_rate": 0.0009261795166858458, "loss": 0.5934, "step": 186630 }, { "epoch": 53.693901035673186, "grad_norm": 1.1641559600830078, "learning_rate": 0.0009261219792865363, "loss": 0.5294, "step": 186640 }, { "epoch": 53.69677790563866, "grad_norm": 2.096963882446289, "learning_rate": 0.0009260644418872267, "loss": 0.497, "step": 186650 }, { "epoch": 53.699654775604145, "grad_norm": 1.3718887567520142, "learning_rate": 0.0009260069044879171, "loss": 0.5514, "step": 186660 }, { "epoch": 53.70253164556962, "grad_norm": 0.9445362091064453, "learning_rate": 0.0009259493670886077, "loss": 0.4607, "step": 186670 }, { "epoch": 53.7054085155351, "grad_norm": 1.5400047302246094, "learning_rate": 0.000925891829689298, "loss": 0.6041, "step": 186680 }, { "epoch": 53.708285385500574, "grad_norm": 1.0830256938934326, "learning_rate": 0.0009258342922899885, "loss": 0.5048, "step": 186690 }, { "epoch": 53.71116225546605, "grad_norm": 0.4607945382595062, "learning_rate": 0.000925776754890679, "loss": 0.604, "step": 186700 }, { "epoch": 53.71403912543153, "grad_norm": 0.6543579697608948, "learning_rate": 0.0009257192174913694, "loss": 0.5373, "step": 186710 }, { "epoch": 53.71691599539701, "grad_norm": 1.388048768043518, "learning_rate": 0.0009256616800920599, "loss": 0.7286, "step": 186720 }, { "epoch": 53.719792865362486, "grad_norm": 0.8727129697799683, "learning_rate": 0.0009256041426927503, "loss": 0.5289, "step": 186730 }, { "epoch": 53.72266973532796, "grad_norm": 1.1963251829147339, "learning_rate": 0.0009255466052934407, "loss": 0.4364, "step": 186740 }, { "epoch": 53.72554660529344, "grad_norm": 1.3396458625793457, "learning_rate": 0.0009254890678941312, "loss": 0.6937, "step": 186750 }, { "epoch": 53.72842347525892, "grad_norm": 0.639114260673523, "learning_rate": 0.0009254315304948217, "loss": 0.4428, "step": 186760 }, { "epoch": 53.7313003452244, "grad_norm": 1.3315589427947998, "learning_rate": 0.000925373993095512, "loss": 0.5096, "step": 186770 }, { "epoch": 53.734177215189874, "grad_norm": 0.9297134876251221, "learning_rate": 0.0009253164556962026, "loss": 0.4368, "step": 186780 }, { "epoch": 53.73705408515535, "grad_norm": 1.081430435180664, "learning_rate": 0.000925258918296893, "loss": 0.5825, "step": 186790 }, { "epoch": 53.739930955120826, "grad_norm": 2.0683908462524414, "learning_rate": 0.0009252013808975834, "loss": 0.5613, "step": 186800 }, { "epoch": 53.74280782508631, "grad_norm": 1.092482328414917, "learning_rate": 0.0009251438434982738, "loss": 0.4349, "step": 186810 }, { "epoch": 53.745684695051786, "grad_norm": 1.1937706470489502, "learning_rate": 0.0009250863060989644, "loss": 0.485, "step": 186820 }, { "epoch": 53.74856156501726, "grad_norm": 1.3528646230697632, "learning_rate": 0.0009250287686996548, "loss": 0.6153, "step": 186830 }, { "epoch": 53.75143843498274, "grad_norm": 1.6140031814575195, "learning_rate": 0.0009249712313003452, "loss": 0.4918, "step": 186840 }, { "epoch": 53.754315304948214, "grad_norm": 1.1122888326644897, "learning_rate": 0.0009249136939010358, "loss": 0.5367, "step": 186850 }, { "epoch": 53.75719217491369, "grad_norm": 0.82119220495224, "learning_rate": 0.0009248561565017261, "loss": 0.5103, "step": 186860 }, { "epoch": 53.760069044879174, "grad_norm": 2.1757123470306396, "learning_rate": 0.0009247986191024166, "loss": 0.5349, "step": 186870 }, { "epoch": 53.76294591484465, "grad_norm": 0.85897296667099, "learning_rate": 0.000924741081703107, "loss": 0.5422, "step": 186880 }, { "epoch": 53.765822784810126, "grad_norm": 1.89950692653656, "learning_rate": 0.0009246835443037975, "loss": 0.5859, "step": 186890 }, { "epoch": 53.7686996547756, "grad_norm": 1.288644790649414, "learning_rate": 0.000924626006904488, "loss": 0.5554, "step": 186900 }, { "epoch": 53.77157652474108, "grad_norm": 1.3605754375457764, "learning_rate": 0.0009245684695051784, "loss": 0.6004, "step": 186910 }, { "epoch": 53.77445339470656, "grad_norm": 1.1352242231369019, "learning_rate": 0.0009245109321058688, "loss": 0.5319, "step": 186920 }, { "epoch": 53.77733026467204, "grad_norm": 1.9006264209747314, "learning_rate": 0.0009244533947065593, "loss": 0.5064, "step": 186930 }, { "epoch": 53.780207134637514, "grad_norm": 1.9855581521987915, "learning_rate": 0.0009243958573072498, "loss": 0.5174, "step": 186940 }, { "epoch": 53.78308400460299, "grad_norm": 1.1869202852249146, "learning_rate": 0.0009243383199079401, "loss": 0.59, "step": 186950 }, { "epoch": 53.78596087456847, "grad_norm": 2.056938409805298, "learning_rate": 0.0009242807825086307, "loss": 0.4351, "step": 186960 }, { "epoch": 53.78883774453395, "grad_norm": 0.7150076627731323, "learning_rate": 0.0009242232451093211, "loss": 0.4928, "step": 186970 }, { "epoch": 53.791714614499426, "grad_norm": 1.302324891090393, "learning_rate": 0.0009241657077100115, "loss": 0.519, "step": 186980 }, { "epoch": 53.7945914844649, "grad_norm": 1.550125002861023, "learning_rate": 0.0009241081703107019, "loss": 0.6436, "step": 186990 }, { "epoch": 53.79746835443038, "grad_norm": 1.309230089187622, "learning_rate": 0.0009240506329113925, "loss": 0.558, "step": 187000 }, { "epoch": 53.800345224395855, "grad_norm": 1.1250083446502686, "learning_rate": 0.0009239930955120828, "loss": 0.5363, "step": 187010 }, { "epoch": 53.80322209436134, "grad_norm": 0.7692813277244568, "learning_rate": 0.0009239355581127733, "loss": 0.4296, "step": 187020 }, { "epoch": 53.806098964326814, "grad_norm": 1.0250073671340942, "learning_rate": 0.0009238780207134638, "loss": 0.6554, "step": 187030 }, { "epoch": 53.80897583429229, "grad_norm": 1.655207633972168, "learning_rate": 0.0009238204833141542, "loss": 0.4718, "step": 187040 }, { "epoch": 53.81185270425777, "grad_norm": 1.1677098274230957, "learning_rate": 0.0009237629459148447, "loss": 0.5217, "step": 187050 }, { "epoch": 53.81472957422324, "grad_norm": 2.1413447856903076, "learning_rate": 0.0009237054085155351, "loss": 0.6143, "step": 187060 }, { "epoch": 53.81760644418872, "grad_norm": 1.457761287689209, "learning_rate": 0.0009236478711162256, "loss": 0.5279, "step": 187070 }, { "epoch": 53.8204833141542, "grad_norm": 1.3536797761917114, "learning_rate": 0.000923590333716916, "loss": 0.5529, "step": 187080 }, { "epoch": 53.82336018411968, "grad_norm": 1.234259009361267, "learning_rate": 0.0009235327963176065, "loss": 0.4721, "step": 187090 }, { "epoch": 53.826237054085155, "grad_norm": 0.896376371383667, "learning_rate": 0.0009234752589182968, "loss": 0.5541, "step": 187100 }, { "epoch": 53.82911392405063, "grad_norm": 1.0078274011611938, "learning_rate": 0.0009234177215189874, "loss": 0.5064, "step": 187110 }, { "epoch": 53.83199079401611, "grad_norm": 1.430601716041565, "learning_rate": 0.0009233601841196779, "loss": 0.6802, "step": 187120 }, { "epoch": 53.83486766398159, "grad_norm": 1.435365080833435, "learning_rate": 0.0009233026467203682, "loss": 0.5555, "step": 187130 }, { "epoch": 53.83774453394707, "grad_norm": 0.9770337343215942, "learning_rate": 0.0009232451093210588, "loss": 0.439, "step": 187140 }, { "epoch": 53.84062140391254, "grad_norm": 1.339951515197754, "learning_rate": 0.0009231875719217492, "loss": 0.5494, "step": 187150 }, { "epoch": 53.84349827387802, "grad_norm": 1.0128568410873413, "learning_rate": 0.0009231300345224396, "loss": 0.5775, "step": 187160 }, { "epoch": 53.846375143843495, "grad_norm": 0.9509447813034058, "learning_rate": 0.00092307249712313, "loss": 0.4242, "step": 187170 }, { "epoch": 53.84925201380898, "grad_norm": 0.9886730313301086, "learning_rate": 0.0009230149597238206, "loss": 0.4624, "step": 187180 }, { "epoch": 53.852128883774455, "grad_norm": 0.8620446920394897, "learning_rate": 0.0009229574223245109, "loss": 0.6404, "step": 187190 }, { "epoch": 53.85500575373993, "grad_norm": 0.8291692733764648, "learning_rate": 0.0009228998849252014, "loss": 0.5579, "step": 187200 }, { "epoch": 53.85788262370541, "grad_norm": 1.1644837856292725, "learning_rate": 0.0009228423475258919, "loss": 0.5886, "step": 187210 }, { "epoch": 53.860759493670884, "grad_norm": 1.224144458770752, "learning_rate": 0.0009227848101265823, "loss": 0.4665, "step": 187220 }, { "epoch": 53.86363636363637, "grad_norm": 1.257842779159546, "learning_rate": 0.0009227272727272728, "loss": 0.6122, "step": 187230 }, { "epoch": 53.86651323360184, "grad_norm": 0.8647997379302979, "learning_rate": 0.0009226697353279632, "loss": 0.5833, "step": 187240 }, { "epoch": 53.86939010356732, "grad_norm": 1.4040189981460571, "learning_rate": 0.0009226121979286537, "loss": 0.4317, "step": 187250 }, { "epoch": 53.872266973532795, "grad_norm": 0.6374104619026184, "learning_rate": 0.0009225546605293441, "loss": 0.4373, "step": 187260 }, { "epoch": 53.87514384349827, "grad_norm": 2.539571762084961, "learning_rate": 0.0009224971231300346, "loss": 0.7149, "step": 187270 }, { "epoch": 53.878020713463755, "grad_norm": 1.4035906791687012, "learning_rate": 0.0009224395857307249, "loss": 0.4712, "step": 187280 }, { "epoch": 53.88089758342923, "grad_norm": 1.8530951738357544, "learning_rate": 0.0009223820483314155, "loss": 0.5249, "step": 187290 }, { "epoch": 53.88377445339471, "grad_norm": 1.1833049058914185, "learning_rate": 0.000922324510932106, "loss": 0.5162, "step": 187300 }, { "epoch": 53.886651323360184, "grad_norm": 1.3993867635726929, "learning_rate": 0.0009222669735327963, "loss": 0.4404, "step": 187310 }, { "epoch": 53.88952819332566, "grad_norm": 1.0503745079040527, "learning_rate": 0.0009222094361334868, "loss": 0.5338, "step": 187320 }, { "epoch": 53.892405063291136, "grad_norm": 1.4184571504592896, "learning_rate": 0.0009221518987341773, "loss": 0.5658, "step": 187330 }, { "epoch": 53.89528193325662, "grad_norm": 1.022072672843933, "learning_rate": 0.0009220943613348677, "loss": 0.4085, "step": 187340 }, { "epoch": 53.898158803222096, "grad_norm": 0.9198304414749146, "learning_rate": 0.0009220368239355581, "loss": 0.4464, "step": 187350 }, { "epoch": 53.90103567318757, "grad_norm": 0.8688418865203857, "learning_rate": 0.0009219792865362486, "loss": 0.4227, "step": 187360 }, { "epoch": 53.90391254315305, "grad_norm": 1.0948058366775513, "learning_rate": 0.000921921749136939, "loss": 0.5232, "step": 187370 }, { "epoch": 53.906789413118524, "grad_norm": 1.2472829818725586, "learning_rate": 0.0009218642117376295, "loss": 0.5942, "step": 187380 }, { "epoch": 53.90966628308401, "grad_norm": 2.4360101222991943, "learning_rate": 0.0009218066743383198, "loss": 0.664, "step": 187390 }, { "epoch": 53.912543153049484, "grad_norm": 1.5366809368133545, "learning_rate": 0.0009217491369390104, "loss": 0.5176, "step": 187400 }, { "epoch": 53.91542002301496, "grad_norm": 2.1694610118865967, "learning_rate": 0.0009216915995397009, "loss": 0.5173, "step": 187410 }, { "epoch": 53.918296892980436, "grad_norm": 1.1316578388214111, "learning_rate": 0.0009216340621403912, "loss": 0.603, "step": 187420 }, { "epoch": 53.92117376294591, "grad_norm": 0.9890956282615662, "learning_rate": 0.0009215765247410817, "loss": 0.5957, "step": 187430 }, { "epoch": 53.924050632911396, "grad_norm": 1.0498310327529907, "learning_rate": 0.0009215189873417722, "loss": 0.5006, "step": 187440 }, { "epoch": 53.92692750287687, "grad_norm": 0.9202952980995178, "learning_rate": 0.0009214614499424626, "loss": 0.4611, "step": 187450 }, { "epoch": 53.92980437284235, "grad_norm": 1.2480976581573486, "learning_rate": 0.000921403912543153, "loss": 0.5745, "step": 187460 }, { "epoch": 53.932681242807824, "grad_norm": 1.1025129556655884, "learning_rate": 0.0009213463751438436, "loss": 0.5849, "step": 187470 }, { "epoch": 53.9355581127733, "grad_norm": 1.2375551462173462, "learning_rate": 0.0009212888377445339, "loss": 0.4555, "step": 187480 }, { "epoch": 53.938434982738784, "grad_norm": 1.2046164274215698, "learning_rate": 0.0009212313003452244, "loss": 0.5236, "step": 187490 }, { "epoch": 53.94131185270426, "grad_norm": 1.2165743112564087, "learning_rate": 0.0009211737629459148, "loss": 0.5665, "step": 187500 }, { "epoch": 53.944188722669736, "grad_norm": 0.8954380750656128, "learning_rate": 0.0009211162255466053, "loss": 0.5376, "step": 187510 }, { "epoch": 53.94706559263521, "grad_norm": 1.5532751083374023, "learning_rate": 0.0009210586881472958, "loss": 0.5527, "step": 187520 }, { "epoch": 53.94994246260069, "grad_norm": 1.1765700578689575, "learning_rate": 0.0009210011507479862, "loss": 0.4338, "step": 187530 }, { "epoch": 53.952819332566165, "grad_norm": 0.5848357081413269, "learning_rate": 0.0009209436133486767, "loss": 0.407, "step": 187540 }, { "epoch": 53.95569620253165, "grad_norm": 0.6841355562210083, "learning_rate": 0.0009208860759493671, "loss": 0.5347, "step": 187550 }, { "epoch": 53.958573072497124, "grad_norm": 0.7212316393852234, "learning_rate": 0.0009208285385500576, "loss": 0.5584, "step": 187560 }, { "epoch": 53.9614499424626, "grad_norm": 2.3005547523498535, "learning_rate": 0.0009207710011507479, "loss": 0.5244, "step": 187570 }, { "epoch": 53.96432681242808, "grad_norm": 1.5764973163604736, "learning_rate": 0.0009207134637514385, "loss": 0.4276, "step": 187580 }, { "epoch": 53.96720368239355, "grad_norm": 1.9737834930419922, "learning_rate": 0.0009206559263521289, "loss": 0.5672, "step": 187590 }, { "epoch": 53.970080552359036, "grad_norm": 1.2987070083618164, "learning_rate": 0.0009205983889528193, "loss": 0.5879, "step": 187600 }, { "epoch": 53.97295742232451, "grad_norm": 0.9632757306098938, "learning_rate": 0.0009205408515535097, "loss": 0.5807, "step": 187610 }, { "epoch": 53.97583429228999, "grad_norm": 1.5315638780593872, "learning_rate": 0.0009204833141542003, "loss": 0.6313, "step": 187620 }, { "epoch": 53.978711162255465, "grad_norm": 1.4949848651885986, "learning_rate": 0.0009204257767548907, "loss": 0.6272, "step": 187630 }, { "epoch": 53.98158803222094, "grad_norm": 1.4377721548080444, "learning_rate": 0.0009203682393555811, "loss": 0.5132, "step": 187640 }, { "epoch": 53.984464902186424, "grad_norm": 2.0251615047454834, "learning_rate": 0.0009203107019562717, "loss": 0.4563, "step": 187650 }, { "epoch": 53.9873417721519, "grad_norm": 2.019994020462036, "learning_rate": 0.000920253164556962, "loss": 0.4582, "step": 187660 }, { "epoch": 53.99021864211738, "grad_norm": 1.7301831245422363, "learning_rate": 0.0009201956271576525, "loss": 0.5048, "step": 187670 }, { "epoch": 53.99309551208285, "grad_norm": 0.7617121338844299, "learning_rate": 0.0009201380897583429, "loss": 0.6861, "step": 187680 }, { "epoch": 53.99597238204833, "grad_norm": 1.9664193391799927, "learning_rate": 0.0009200805523590334, "loss": 0.5915, "step": 187690 }, { "epoch": 53.99884925201381, "grad_norm": 0.7099858522415161, "learning_rate": 0.0009200230149597238, "loss": 0.57, "step": 187700 }, { "epoch": 54.00172612197929, "grad_norm": 1.2374640703201294, "learning_rate": 0.0009199654775604143, "loss": 0.3873, "step": 187710 }, { "epoch": 54.004602991944765, "grad_norm": 0.7219910025596619, "learning_rate": 0.0009199079401611046, "loss": 0.5245, "step": 187720 }, { "epoch": 54.00747986191024, "grad_norm": 0.9031584858894348, "learning_rate": 0.0009198504027617952, "loss": 0.4932, "step": 187730 }, { "epoch": 54.01035673187572, "grad_norm": 0.9115791916847229, "learning_rate": 0.0009197928653624857, "loss": 0.4786, "step": 187740 }, { "epoch": 54.01323360184119, "grad_norm": 1.0101606845855713, "learning_rate": 0.000919735327963176, "loss": 0.4232, "step": 187750 }, { "epoch": 54.01611047180668, "grad_norm": 1.7448947429656982, "learning_rate": 0.0009196777905638666, "loss": 0.4528, "step": 187760 }, { "epoch": 54.01898734177215, "grad_norm": 1.2970041036605835, "learning_rate": 0.000919620253164557, "loss": 0.5031, "step": 187770 }, { "epoch": 54.02186421173763, "grad_norm": 1.133010983467102, "learning_rate": 0.0009195627157652474, "loss": 0.3907, "step": 187780 }, { "epoch": 54.024741081703105, "grad_norm": 1.2326759099960327, "learning_rate": 0.0009195051783659378, "loss": 0.4651, "step": 187790 }, { "epoch": 54.02761795166858, "grad_norm": 1.6061605215072632, "learning_rate": 0.0009194476409666284, "loss": 0.6256, "step": 187800 }, { "epoch": 54.030494821634065, "grad_norm": 1.0089046955108643, "learning_rate": 0.0009193901035673187, "loss": 0.3793, "step": 187810 }, { "epoch": 54.03337169159954, "grad_norm": 1.1634572744369507, "learning_rate": 0.0009193325661680092, "loss": 0.5353, "step": 187820 }, { "epoch": 54.03624856156502, "grad_norm": 0.9826326966285706, "learning_rate": 0.0009192750287686998, "loss": 0.5009, "step": 187830 }, { "epoch": 54.03912543153049, "grad_norm": 2.1110358238220215, "learning_rate": 0.0009192174913693901, "loss": 0.4767, "step": 187840 }, { "epoch": 54.04200230149597, "grad_norm": 1.30633544921875, "learning_rate": 0.0009191599539700806, "loss": 0.5317, "step": 187850 }, { "epoch": 54.04487917146145, "grad_norm": 1.010235071182251, "learning_rate": 0.000919102416570771, "loss": 0.4742, "step": 187860 }, { "epoch": 54.04775604142693, "grad_norm": 1.7231794595718384, "learning_rate": 0.0009190448791714615, "loss": 0.5456, "step": 187870 }, { "epoch": 54.050632911392405, "grad_norm": 0.7720971703529358, "learning_rate": 0.0009189873417721519, "loss": 0.4615, "step": 187880 }, { "epoch": 54.05350978135788, "grad_norm": 2.7667300701141357, "learning_rate": 0.0009189298043728424, "loss": 0.4613, "step": 187890 }, { "epoch": 54.05638665132336, "grad_norm": 1.6940138339996338, "learning_rate": 0.0009188722669735327, "loss": 0.3935, "step": 187900 }, { "epoch": 54.05926352128884, "grad_norm": 2.17822527885437, "learning_rate": 0.0009188147295742233, "loss": 0.4678, "step": 187910 }, { "epoch": 54.06214039125432, "grad_norm": 1.9226435422897339, "learning_rate": 0.0009187571921749138, "loss": 0.6735, "step": 187920 }, { "epoch": 54.06501726121979, "grad_norm": 2.2673215866088867, "learning_rate": 0.0009186996547756041, "loss": 0.4965, "step": 187930 }, { "epoch": 54.06789413118527, "grad_norm": 1.6841223239898682, "learning_rate": 0.0009186421173762947, "loss": 0.4827, "step": 187940 }, { "epoch": 54.070771001150746, "grad_norm": 2.2090649604797363, "learning_rate": 0.0009185845799769851, "loss": 0.6256, "step": 187950 }, { "epoch": 54.07364787111622, "grad_norm": 1.3895777463912964, "learning_rate": 0.0009185270425776755, "loss": 0.4304, "step": 187960 }, { "epoch": 54.076524741081705, "grad_norm": 0.7629975080490112, "learning_rate": 0.0009184695051783659, "loss": 0.5531, "step": 187970 }, { "epoch": 54.07940161104718, "grad_norm": 2.5866169929504395, "learning_rate": 0.0009184119677790565, "loss": 0.4828, "step": 187980 }, { "epoch": 54.08227848101266, "grad_norm": 2.121338129043579, "learning_rate": 0.0009183544303797468, "loss": 0.5571, "step": 187990 }, { "epoch": 54.085155350978134, "grad_norm": 1.0075751543045044, "learning_rate": 0.0009182968929804373, "loss": 0.522, "step": 188000 }, { "epoch": 54.08803222094361, "grad_norm": 1.1463888883590698, "learning_rate": 0.0009182393555811277, "loss": 0.4462, "step": 188010 }, { "epoch": 54.09090909090909, "grad_norm": 0.9803749918937683, "learning_rate": 0.0009181818181818182, "loss": 0.5373, "step": 188020 }, { "epoch": 54.09378596087457, "grad_norm": 1.79812490940094, "learning_rate": 0.0009181242807825087, "loss": 0.5157, "step": 188030 }, { "epoch": 54.096662830840046, "grad_norm": 0.9981594085693359, "learning_rate": 0.0009180667433831991, "loss": 0.534, "step": 188040 }, { "epoch": 54.09953970080552, "grad_norm": 0.5591610670089722, "learning_rate": 0.0009180092059838896, "loss": 0.5489, "step": 188050 }, { "epoch": 54.102416570771, "grad_norm": 1.236078143119812, "learning_rate": 0.00091795166858458, "loss": 0.6638, "step": 188060 }, { "epoch": 54.10529344073648, "grad_norm": 1.2618945837020874, "learning_rate": 0.0009178941311852705, "loss": 0.4022, "step": 188070 }, { "epoch": 54.10817031070196, "grad_norm": 1.0824522972106934, "learning_rate": 0.0009178365937859608, "loss": 0.7001, "step": 188080 }, { "epoch": 54.111047180667434, "grad_norm": 0.8811436891555786, "learning_rate": 0.0009177790563866514, "loss": 0.486, "step": 188090 }, { "epoch": 54.11392405063291, "grad_norm": 1.308019757270813, "learning_rate": 0.0009177215189873418, "loss": 0.5887, "step": 188100 }, { "epoch": 54.116800920598386, "grad_norm": 1.3973376750946045, "learning_rate": 0.0009176639815880322, "loss": 0.6796, "step": 188110 }, { "epoch": 54.11967779056387, "grad_norm": 1.9582021236419678, "learning_rate": 0.0009176064441887228, "loss": 0.4704, "step": 188120 }, { "epoch": 54.122554660529346, "grad_norm": 0.8363974690437317, "learning_rate": 0.0009175489067894132, "loss": 0.43, "step": 188130 }, { "epoch": 54.12543153049482, "grad_norm": 1.9768537282943726, "learning_rate": 0.0009174913693901036, "loss": 0.4939, "step": 188140 }, { "epoch": 54.1283084004603, "grad_norm": 1.7762422561645508, "learning_rate": 0.000917433831990794, "loss": 0.5518, "step": 188150 }, { "epoch": 54.131185270425775, "grad_norm": 1.3671863079071045, "learning_rate": 0.0009173762945914846, "loss": 0.5283, "step": 188160 }, { "epoch": 54.13406214039125, "grad_norm": 1.1347867250442505, "learning_rate": 0.0009173187571921749, "loss": 0.5452, "step": 188170 }, { "epoch": 54.136939010356734, "grad_norm": 1.6280266046524048, "learning_rate": 0.0009172612197928654, "loss": 0.4911, "step": 188180 }, { "epoch": 54.13981588032221, "grad_norm": 1.2494535446166992, "learning_rate": 0.0009172036823935557, "loss": 0.4505, "step": 188190 }, { "epoch": 54.14269275028769, "grad_norm": 1.0879963636398315, "learning_rate": 0.0009171461449942463, "loss": 0.4248, "step": 188200 }, { "epoch": 54.14556962025316, "grad_norm": 2.8734564781188965, "learning_rate": 0.0009170886075949368, "loss": 0.7067, "step": 188210 }, { "epoch": 54.14844649021864, "grad_norm": 1.5733462572097778, "learning_rate": 0.0009170310701956271, "loss": 0.4723, "step": 188220 }, { "epoch": 54.15132336018412, "grad_norm": 1.3736079931259155, "learning_rate": 0.0009169735327963177, "loss": 0.4825, "step": 188230 }, { "epoch": 54.1542002301496, "grad_norm": 1.618096947669983, "learning_rate": 0.0009169159953970081, "loss": 0.5014, "step": 188240 }, { "epoch": 54.157077100115075, "grad_norm": 1.2288100719451904, "learning_rate": 0.0009168584579976985, "loss": 0.4873, "step": 188250 }, { "epoch": 54.15995397008055, "grad_norm": 1.784325122833252, "learning_rate": 0.0009168009205983889, "loss": 0.4557, "step": 188260 }, { "epoch": 54.16283084004603, "grad_norm": 1.0785176753997803, "learning_rate": 0.0009167433831990795, "loss": 0.4303, "step": 188270 }, { "epoch": 54.16570771001151, "grad_norm": 2.0483288764953613, "learning_rate": 0.0009166858457997698, "loss": 0.6191, "step": 188280 }, { "epoch": 54.16858457997699, "grad_norm": 0.898165762424469, "learning_rate": 0.0009166283084004603, "loss": 0.4815, "step": 188290 }, { "epoch": 54.17146144994246, "grad_norm": 1.4532623291015625, "learning_rate": 0.0009165707710011507, "loss": 0.3919, "step": 188300 }, { "epoch": 54.17433831990794, "grad_norm": 1.999211072921753, "learning_rate": 0.0009165132336018412, "loss": 0.6419, "step": 188310 }, { "epoch": 54.177215189873415, "grad_norm": 1.1176601648330688, "learning_rate": 0.0009164556962025317, "loss": 0.4743, "step": 188320 }, { "epoch": 54.1800920598389, "grad_norm": 0.9106754064559937, "learning_rate": 0.0009163981588032221, "loss": 0.4709, "step": 188330 }, { "epoch": 54.182968929804375, "grad_norm": 0.7037858366966248, "learning_rate": 0.0009163406214039126, "loss": 0.3957, "step": 188340 }, { "epoch": 54.18584579976985, "grad_norm": 1.588567852973938, "learning_rate": 0.000916283084004603, "loss": 0.5006, "step": 188350 }, { "epoch": 54.18872266973533, "grad_norm": 1.100342869758606, "learning_rate": 0.0009162255466052935, "loss": 0.4672, "step": 188360 }, { "epoch": 54.1915995397008, "grad_norm": 0.5785638689994812, "learning_rate": 0.0009161680092059838, "loss": 0.4509, "step": 188370 }, { "epoch": 54.19447640966629, "grad_norm": 1.625675916671753, "learning_rate": 0.0009161104718066744, "loss": 0.3962, "step": 188380 }, { "epoch": 54.19735327963176, "grad_norm": 0.9841164350509644, "learning_rate": 0.0009160529344073648, "loss": 0.3826, "step": 188390 }, { "epoch": 54.20023014959724, "grad_norm": 2.1285924911499023, "learning_rate": 0.0009159953970080552, "loss": 0.46, "step": 188400 }, { "epoch": 54.203107019562715, "grad_norm": 2.3235011100769043, "learning_rate": 0.0009159378596087456, "loss": 0.4424, "step": 188410 }, { "epoch": 54.20598388952819, "grad_norm": 1.382576584815979, "learning_rate": 0.0009158803222094362, "loss": 0.4494, "step": 188420 }, { "epoch": 54.20886075949367, "grad_norm": 1.2679742574691772, "learning_rate": 0.0009158227848101266, "loss": 0.4467, "step": 188430 }, { "epoch": 54.21173762945915, "grad_norm": 1.4982101917266846, "learning_rate": 0.000915765247410817, "loss": 0.4706, "step": 188440 }, { "epoch": 54.21461449942463, "grad_norm": 2.0841751098632812, "learning_rate": 0.0009157077100115076, "loss": 0.5198, "step": 188450 }, { "epoch": 54.2174913693901, "grad_norm": 1.1206833124160767, "learning_rate": 0.0009156501726121979, "loss": 0.4596, "step": 188460 }, { "epoch": 54.22036823935558, "grad_norm": 1.0622769594192505, "learning_rate": 0.0009155926352128884, "loss": 0.5294, "step": 188470 }, { "epoch": 54.223245109321056, "grad_norm": 1.4441343545913696, "learning_rate": 0.0009155350978135788, "loss": 0.544, "step": 188480 }, { "epoch": 54.22612197928654, "grad_norm": 1.2574315071105957, "learning_rate": 0.0009154775604142693, "loss": 0.3851, "step": 188490 }, { "epoch": 54.228998849252015, "grad_norm": 1.358251929283142, "learning_rate": 0.0009154200230149597, "loss": 0.5681, "step": 188500 }, { "epoch": 54.23187571921749, "grad_norm": 1.6175258159637451, "learning_rate": 0.0009153624856156502, "loss": 0.5022, "step": 188510 }, { "epoch": 54.23475258918297, "grad_norm": 0.9456014037132263, "learning_rate": 0.0009153049482163407, "loss": 0.5271, "step": 188520 }, { "epoch": 54.237629459148444, "grad_norm": 1.0642871856689453, "learning_rate": 0.0009152474108170311, "loss": 0.4992, "step": 188530 }, { "epoch": 54.24050632911393, "grad_norm": 1.1733118295669556, "learning_rate": 0.0009151898734177216, "loss": 0.5563, "step": 188540 }, { "epoch": 54.2433831990794, "grad_norm": 1.392741322517395, "learning_rate": 0.0009151323360184119, "loss": 0.5232, "step": 188550 }, { "epoch": 54.24626006904488, "grad_norm": 1.0769768953323364, "learning_rate": 0.0009150747986191025, "loss": 0.6081, "step": 188560 }, { "epoch": 54.249136939010356, "grad_norm": 1.4877841472625732, "learning_rate": 0.0009150172612197929, "loss": 0.5049, "step": 188570 }, { "epoch": 54.25201380897583, "grad_norm": 1.2265962362289429, "learning_rate": 0.0009149597238204833, "loss": 0.5519, "step": 188580 }, { "epoch": 54.254890678941315, "grad_norm": 2.049504518508911, "learning_rate": 0.0009149021864211737, "loss": 0.5153, "step": 188590 }, { "epoch": 54.25776754890679, "grad_norm": 1.3369563817977905, "learning_rate": 0.0009148446490218643, "loss": 0.5046, "step": 188600 }, { "epoch": 54.26064441887227, "grad_norm": 1.3767576217651367, "learning_rate": 0.0009147871116225546, "loss": 0.4511, "step": 188610 }, { "epoch": 54.263521288837744, "grad_norm": 1.1970800161361694, "learning_rate": 0.0009147295742232451, "loss": 0.4613, "step": 188620 }, { "epoch": 54.26639815880322, "grad_norm": 0.8282061815261841, "learning_rate": 0.0009146720368239357, "loss": 0.5372, "step": 188630 }, { "epoch": 54.269275028768696, "grad_norm": 0.8725049495697021, "learning_rate": 0.000914614499424626, "loss": 0.5689, "step": 188640 }, { "epoch": 54.27215189873418, "grad_norm": 1.34462308883667, "learning_rate": 0.0009145569620253165, "loss": 0.492, "step": 188650 }, { "epoch": 54.275028768699656, "grad_norm": 0.8273333311080933, "learning_rate": 0.0009144994246260069, "loss": 0.4508, "step": 188660 }, { "epoch": 54.27790563866513, "grad_norm": 1.4938371181488037, "learning_rate": 0.0009144418872266974, "loss": 0.4929, "step": 188670 }, { "epoch": 54.28078250863061, "grad_norm": 1.4587539434432983, "learning_rate": 0.0009143843498273878, "loss": 0.4611, "step": 188680 }, { "epoch": 54.283659378596084, "grad_norm": 1.3979543447494507, "learning_rate": 0.0009143268124280783, "loss": 0.5606, "step": 188690 }, { "epoch": 54.28653624856157, "grad_norm": 1.0766013860702515, "learning_rate": 0.0009142692750287686, "loss": 0.4767, "step": 188700 }, { "epoch": 54.289413118527044, "grad_norm": 1.2875025272369385, "learning_rate": 0.0009142117376294592, "loss": 0.4181, "step": 188710 }, { "epoch": 54.29228998849252, "grad_norm": 1.0428861379623413, "learning_rate": 0.0009141542002301497, "loss": 0.5467, "step": 188720 }, { "epoch": 54.295166858457996, "grad_norm": 1.764420509338379, "learning_rate": 0.00091409666283084, "loss": 0.5343, "step": 188730 }, { "epoch": 54.29804372842347, "grad_norm": 1.246649980545044, "learning_rate": 0.0009140391254315306, "loss": 0.5629, "step": 188740 }, { "epoch": 54.300920598388956, "grad_norm": 1.6503310203552246, "learning_rate": 0.000913981588032221, "loss": 0.5112, "step": 188750 }, { "epoch": 54.30379746835443, "grad_norm": 1.4551026821136475, "learning_rate": 0.0009139240506329114, "loss": 0.4407, "step": 188760 }, { "epoch": 54.30667433831991, "grad_norm": 3.2431323528289795, "learning_rate": 0.0009138665132336018, "loss": 0.5424, "step": 188770 }, { "epoch": 54.309551208285384, "grad_norm": 0.9320700168609619, "learning_rate": 0.0009138089758342924, "loss": 0.5271, "step": 188780 }, { "epoch": 54.31242807825086, "grad_norm": 0.7499563694000244, "learning_rate": 0.0009137514384349827, "loss": 0.41, "step": 188790 }, { "epoch": 54.315304948216344, "grad_norm": 0.9924735426902771, "learning_rate": 0.0009136939010356732, "loss": 0.5463, "step": 188800 }, { "epoch": 54.31818181818182, "grad_norm": 1.0895707607269287, "learning_rate": 0.0009136363636363638, "loss": 0.6638, "step": 188810 }, { "epoch": 54.321058688147296, "grad_norm": 2.3359999656677246, "learning_rate": 0.0009135788262370541, "loss": 0.6129, "step": 188820 }, { "epoch": 54.32393555811277, "grad_norm": 1.081952691078186, "learning_rate": 0.0009135212888377446, "loss": 0.4523, "step": 188830 }, { "epoch": 54.32681242807825, "grad_norm": 1.116349697113037, "learning_rate": 0.000913463751438435, "loss": 0.4883, "step": 188840 }, { "epoch": 54.329689298043725, "grad_norm": 0.9427017569541931, "learning_rate": 0.0009134062140391255, "loss": 0.3706, "step": 188850 }, { "epoch": 54.33256616800921, "grad_norm": 1.171778917312622, "learning_rate": 0.0009133486766398159, "loss": 0.5989, "step": 188860 }, { "epoch": 54.335443037974684, "grad_norm": 1.7925689220428467, "learning_rate": 0.0009132911392405064, "loss": 0.4424, "step": 188870 }, { "epoch": 54.33831990794016, "grad_norm": 1.235884428024292, "learning_rate": 0.0009132336018411967, "loss": 0.4988, "step": 188880 }, { "epoch": 54.34119677790564, "grad_norm": 2.475923538208008, "learning_rate": 0.0009131760644418873, "loss": 0.5587, "step": 188890 }, { "epoch": 54.34407364787111, "grad_norm": 2.5706288814544678, "learning_rate": 0.0009131185270425777, "loss": 0.4727, "step": 188900 }, { "epoch": 54.346950517836596, "grad_norm": 1.2237530946731567, "learning_rate": 0.0009130609896432681, "loss": 0.5808, "step": 188910 }, { "epoch": 54.34982738780207, "grad_norm": 1.2418599128723145, "learning_rate": 0.0009130034522439587, "loss": 0.499, "step": 188920 }, { "epoch": 54.35270425776755, "grad_norm": 1.9259923696517944, "learning_rate": 0.0009129459148446491, "loss": 0.7041, "step": 188930 }, { "epoch": 54.355581127733025, "grad_norm": 0.6178113222122192, "learning_rate": 0.0009128883774453395, "loss": 0.4125, "step": 188940 }, { "epoch": 54.3584579976985, "grad_norm": 1.9991909265518188, "learning_rate": 0.0009128308400460299, "loss": 0.4926, "step": 188950 }, { "epoch": 54.361334867663984, "grad_norm": 1.2231645584106445, "learning_rate": 0.0009127733026467205, "loss": 0.4421, "step": 188960 }, { "epoch": 54.36421173762946, "grad_norm": 1.1139709949493408, "learning_rate": 0.0009127157652474108, "loss": 0.4516, "step": 188970 }, { "epoch": 54.36708860759494, "grad_norm": 0.9435675144195557, "learning_rate": 0.0009126582278481013, "loss": 0.407, "step": 188980 }, { "epoch": 54.36996547756041, "grad_norm": 0.9184463620185852, "learning_rate": 0.0009126006904487916, "loss": 0.4274, "step": 188990 }, { "epoch": 54.37284234752589, "grad_norm": 1.4691301584243774, "learning_rate": 0.0009125431530494822, "loss": 0.5336, "step": 189000 }, { "epoch": 54.37571921749137, "grad_norm": 1.7175700664520264, "learning_rate": 0.0009124856156501726, "loss": 0.5676, "step": 189010 }, { "epoch": 54.37859608745685, "grad_norm": 1.2088135480880737, "learning_rate": 0.000912428078250863, "loss": 0.524, "step": 189020 }, { "epoch": 54.381472957422325, "grad_norm": 0.840613067150116, "learning_rate": 0.0009123705408515536, "loss": 0.5214, "step": 189030 }, { "epoch": 54.3843498273878, "grad_norm": 2.1970109939575195, "learning_rate": 0.000912313003452244, "loss": 0.521, "step": 189040 }, { "epoch": 54.38722669735328, "grad_norm": 1.4395546913146973, "learning_rate": 0.0009122554660529344, "loss": 0.5708, "step": 189050 }, { "epoch": 54.39010356731876, "grad_norm": 1.3326284885406494, "learning_rate": 0.0009121979286536248, "loss": 0.4813, "step": 189060 }, { "epoch": 54.39298043728424, "grad_norm": 1.5439037084579468, "learning_rate": 0.0009121403912543154, "loss": 0.5589, "step": 189070 }, { "epoch": 54.39585730724971, "grad_norm": 1.454752802848816, "learning_rate": 0.0009120828538550057, "loss": 0.453, "step": 189080 }, { "epoch": 54.39873417721519, "grad_norm": 1.8917531967163086, "learning_rate": 0.0009120253164556962, "loss": 0.5797, "step": 189090 }, { "epoch": 54.401611047180666, "grad_norm": 0.665601372718811, "learning_rate": 0.0009119677790563866, "loss": 0.4699, "step": 189100 }, { "epoch": 54.40448791714614, "grad_norm": 2.852419853210449, "learning_rate": 0.0009119102416570771, "loss": 0.6164, "step": 189110 }, { "epoch": 54.407364787111625, "grad_norm": 1.2729308605194092, "learning_rate": 0.0009118527042577676, "loss": 0.5971, "step": 189120 }, { "epoch": 54.4102416570771, "grad_norm": 1.3077417612075806, "learning_rate": 0.000911795166858458, "loss": 0.5027, "step": 189130 }, { "epoch": 54.41311852704258, "grad_norm": 0.7799658179283142, "learning_rate": 0.0009117376294591485, "loss": 0.591, "step": 189140 }, { "epoch": 54.415995397008054, "grad_norm": 1.0447297096252441, "learning_rate": 0.0009116800920598389, "loss": 0.5199, "step": 189150 }, { "epoch": 54.41887226697353, "grad_norm": 0.9478827118873596, "learning_rate": 0.0009116225546605294, "loss": 0.5558, "step": 189160 }, { "epoch": 54.42174913693901, "grad_norm": 0.9997052550315857, "learning_rate": 0.0009115650172612197, "loss": 0.4836, "step": 189170 }, { "epoch": 54.42462600690449, "grad_norm": 1.031169056892395, "learning_rate": 0.0009115074798619103, "loss": 0.7042, "step": 189180 }, { "epoch": 54.427502876869966, "grad_norm": 1.1614731550216675, "learning_rate": 0.0009114499424626007, "loss": 0.4565, "step": 189190 }, { "epoch": 54.43037974683544, "grad_norm": 1.349040150642395, "learning_rate": 0.0009113924050632911, "loss": 0.5539, "step": 189200 }, { "epoch": 54.43325661680092, "grad_norm": 0.7085492014884949, "learning_rate": 0.0009113348676639817, "loss": 0.5094, "step": 189210 }, { "epoch": 54.4361334867664, "grad_norm": 1.9471710920333862, "learning_rate": 0.0009112773302646721, "loss": 0.5563, "step": 189220 }, { "epoch": 54.43901035673188, "grad_norm": 1.907572627067566, "learning_rate": 0.0009112197928653625, "loss": 0.4498, "step": 189230 }, { "epoch": 54.441887226697354, "grad_norm": 1.285197377204895, "learning_rate": 0.0009111622554660529, "loss": 0.4656, "step": 189240 }, { "epoch": 54.44476409666283, "grad_norm": 2.7528326511383057, "learning_rate": 0.0009111047180667435, "loss": 0.7518, "step": 189250 }, { "epoch": 54.447640966628306, "grad_norm": 1.5382163524627686, "learning_rate": 0.0009110471806674338, "loss": 0.5397, "step": 189260 }, { "epoch": 54.45051783659379, "grad_norm": 1.2348235845565796, "learning_rate": 0.0009109896432681243, "loss": 0.4972, "step": 189270 }, { "epoch": 54.453394706559266, "grad_norm": 1.2586709260940552, "learning_rate": 0.0009109321058688147, "loss": 0.4294, "step": 189280 }, { "epoch": 54.45627157652474, "grad_norm": 1.2527045011520386, "learning_rate": 0.0009108745684695052, "loss": 0.5026, "step": 189290 }, { "epoch": 54.45914844649022, "grad_norm": 0.639777660369873, "learning_rate": 0.0009108170310701956, "loss": 0.3698, "step": 189300 }, { "epoch": 54.462025316455694, "grad_norm": 1.048396110534668, "learning_rate": 0.0009107594936708861, "loss": 0.4813, "step": 189310 }, { "epoch": 54.46490218642117, "grad_norm": 1.7332180738449097, "learning_rate": 0.0009107019562715766, "loss": 0.6158, "step": 189320 }, { "epoch": 54.467779056386654, "grad_norm": 0.8146083354949951, "learning_rate": 0.000910644418872267, "loss": 0.5065, "step": 189330 }, { "epoch": 54.47065592635213, "grad_norm": 1.3136307001113892, "learning_rate": 0.0009105868814729575, "loss": 0.6151, "step": 189340 }, { "epoch": 54.473532796317606, "grad_norm": 0.8582802414894104, "learning_rate": 0.0009105293440736478, "loss": 0.4181, "step": 189350 }, { "epoch": 54.47640966628308, "grad_norm": 1.8123648166656494, "learning_rate": 0.0009104718066743384, "loss": 0.4992, "step": 189360 }, { "epoch": 54.47928653624856, "grad_norm": 1.5106064081192017, "learning_rate": 0.0009104142692750288, "loss": 0.4834, "step": 189370 }, { "epoch": 54.48216340621404, "grad_norm": 1.3277130126953125, "learning_rate": 0.0009103567318757192, "loss": 0.4614, "step": 189380 }, { "epoch": 54.48504027617952, "grad_norm": 2.311971426010132, "learning_rate": 0.0009102991944764096, "loss": 0.5337, "step": 189390 }, { "epoch": 54.487917146144994, "grad_norm": 1.1027839183807373, "learning_rate": 0.0009102416570771002, "loss": 0.5246, "step": 189400 }, { "epoch": 54.49079401611047, "grad_norm": 0.6612793207168579, "learning_rate": 0.0009101841196777905, "loss": 0.4379, "step": 189410 }, { "epoch": 54.49367088607595, "grad_norm": 2.2071475982666016, "learning_rate": 0.000910126582278481, "loss": 0.5947, "step": 189420 }, { "epoch": 54.49654775604143, "grad_norm": 0.7342813014984131, "learning_rate": 0.0009100690448791716, "loss": 0.496, "step": 189430 }, { "epoch": 54.499424626006906, "grad_norm": 1.3183754682540894, "learning_rate": 0.0009100115074798619, "loss": 0.582, "step": 189440 }, { "epoch": 54.50230149597238, "grad_norm": 1.7667872905731201, "learning_rate": 0.0009099539700805524, "loss": 0.5108, "step": 189450 }, { "epoch": 54.50517836593786, "grad_norm": 1.57584810256958, "learning_rate": 0.0009098964326812428, "loss": 0.5536, "step": 189460 }, { "epoch": 54.508055235903335, "grad_norm": 1.6455272436141968, "learning_rate": 0.0009098388952819333, "loss": 0.6577, "step": 189470 }, { "epoch": 54.51093210586882, "grad_norm": 1.227920651435852, "learning_rate": 0.0009097813578826237, "loss": 0.4949, "step": 189480 }, { "epoch": 54.513808975834294, "grad_norm": 0.9639086127281189, "learning_rate": 0.0009097238204833142, "loss": 0.5232, "step": 189490 }, { "epoch": 54.51668584579977, "grad_norm": 1.9644032716751099, "learning_rate": 0.0009096662830840046, "loss": 0.5233, "step": 189500 }, { "epoch": 54.51956271576525, "grad_norm": 0.7918155193328857, "learning_rate": 0.0009096087456846951, "loss": 0.4553, "step": 189510 }, { "epoch": 54.52243958573072, "grad_norm": 0.9351522326469421, "learning_rate": 0.0009095512082853856, "loss": 0.7071, "step": 189520 }, { "epoch": 54.5253164556962, "grad_norm": 1.2550761699676514, "learning_rate": 0.0009094936708860759, "loss": 0.4577, "step": 189530 }, { "epoch": 54.52819332566168, "grad_norm": 1.1191494464874268, "learning_rate": 0.0009094361334867665, "loss": 0.6281, "step": 189540 }, { "epoch": 54.53107019562716, "grad_norm": 0.8756943941116333, "learning_rate": 0.0009093785960874569, "loss": 0.5613, "step": 189550 }, { "epoch": 54.533947065592635, "grad_norm": 2.0368239879608154, "learning_rate": 0.0009093210586881473, "loss": 0.4885, "step": 189560 }, { "epoch": 54.53682393555811, "grad_norm": 0.8126552104949951, "learning_rate": 0.0009092635212888377, "loss": 0.4577, "step": 189570 }, { "epoch": 54.53970080552359, "grad_norm": 2.090243101119995, "learning_rate": 0.0009092059838895283, "loss": 0.3932, "step": 189580 }, { "epoch": 54.54257767548907, "grad_norm": 0.7780600190162659, "learning_rate": 0.0009091484464902186, "loss": 0.3665, "step": 189590 }, { "epoch": 54.54545454545455, "grad_norm": 1.2978761196136475, "learning_rate": 0.0009090909090909091, "loss": 0.5175, "step": 189600 }, { "epoch": 54.54833141542002, "grad_norm": 1.815045714378357, "learning_rate": 0.0009090333716915997, "loss": 0.4839, "step": 189610 }, { "epoch": 54.5512082853855, "grad_norm": 1.3711192607879639, "learning_rate": 0.00090897583429229, "loss": 0.4565, "step": 189620 }, { "epoch": 54.554085155350975, "grad_norm": 1.638339638710022, "learning_rate": 0.0009089182968929805, "loss": 0.6152, "step": 189630 }, { "epoch": 54.55696202531646, "grad_norm": 2.247072696685791, "learning_rate": 0.0009088607594936709, "loss": 0.5462, "step": 189640 }, { "epoch": 54.559838895281935, "grad_norm": 1.2625758647918701, "learning_rate": 0.0009088032220943614, "loss": 0.4815, "step": 189650 }, { "epoch": 54.56271576524741, "grad_norm": 1.1831642389297485, "learning_rate": 0.0009087456846950518, "loss": 0.5754, "step": 189660 }, { "epoch": 54.56559263521289, "grad_norm": 1.4929146766662598, "learning_rate": 0.0009086881472957423, "loss": 0.6802, "step": 189670 }, { "epoch": 54.56846950517836, "grad_norm": 0.8403797745704651, "learning_rate": 0.0009086306098964326, "loss": 0.4763, "step": 189680 }, { "epoch": 54.57134637514385, "grad_norm": 1.1291453838348389, "learning_rate": 0.0009085730724971232, "loss": 0.5295, "step": 189690 }, { "epoch": 54.57422324510932, "grad_norm": 2.289116382598877, "learning_rate": 0.0009085155350978136, "loss": 0.4614, "step": 189700 }, { "epoch": 54.5771001150748, "grad_norm": 1.7825706005096436, "learning_rate": 0.000908457997698504, "loss": 0.6353, "step": 189710 }, { "epoch": 54.579976985040275, "grad_norm": 1.7789814472198486, "learning_rate": 0.0009084004602991946, "loss": 0.7092, "step": 189720 }, { "epoch": 54.58285385500575, "grad_norm": 0.9698624014854431, "learning_rate": 0.000908342922899885, "loss": 0.5302, "step": 189730 }, { "epoch": 54.58573072497123, "grad_norm": 1.2886605262756348, "learning_rate": 0.0009082853855005754, "loss": 0.5572, "step": 189740 }, { "epoch": 54.58860759493671, "grad_norm": 1.7392773628234863, "learning_rate": 0.0009082278481012658, "loss": 0.4946, "step": 189750 }, { "epoch": 54.59148446490219, "grad_norm": 2.230289936065674, "learning_rate": 0.0009081703107019564, "loss": 0.5076, "step": 189760 }, { "epoch": 54.59436133486766, "grad_norm": 1.251615047454834, "learning_rate": 0.0009081127733026467, "loss": 0.608, "step": 189770 }, { "epoch": 54.59723820483314, "grad_norm": 1.815003752708435, "learning_rate": 0.0009080552359033372, "loss": 0.6084, "step": 189780 }, { "epoch": 54.600115074798616, "grad_norm": 1.6021537780761719, "learning_rate": 0.0009079976985040277, "loss": 0.4988, "step": 189790 }, { "epoch": 54.6029919447641, "grad_norm": 1.2290496826171875, "learning_rate": 0.0009079401611047181, "loss": 0.5447, "step": 189800 }, { "epoch": 54.605868814729575, "grad_norm": 1.2809126377105713, "learning_rate": 0.0009078826237054085, "loss": 0.414, "step": 189810 }, { "epoch": 54.60874568469505, "grad_norm": 1.8388184309005737, "learning_rate": 0.0009078250863060989, "loss": 0.5112, "step": 189820 }, { "epoch": 54.61162255466053, "grad_norm": 1.204687476158142, "learning_rate": 0.0009077675489067895, "loss": 0.6896, "step": 189830 }, { "epoch": 54.614499424626004, "grad_norm": 2.2818753719329834, "learning_rate": 0.0009077100115074799, "loss": 0.6586, "step": 189840 }, { "epoch": 54.61737629459149, "grad_norm": 1.6138930320739746, "learning_rate": 0.0009076524741081703, "loss": 0.4891, "step": 189850 }, { "epoch": 54.620253164556964, "grad_norm": 1.2184356451034546, "learning_rate": 0.0009075949367088607, "loss": 0.5568, "step": 189860 }, { "epoch": 54.62313003452244, "grad_norm": 1.5410007238388062, "learning_rate": 0.0009075373993095513, "loss": 0.5413, "step": 189870 }, { "epoch": 54.626006904487916, "grad_norm": 1.5045921802520752, "learning_rate": 0.0009074798619102416, "loss": 0.5047, "step": 189880 }, { "epoch": 54.62888377445339, "grad_norm": 1.024239420890808, "learning_rate": 0.0009074223245109321, "loss": 0.5952, "step": 189890 }, { "epoch": 54.631760644418875, "grad_norm": 2.404921293258667, "learning_rate": 0.0009073647871116226, "loss": 0.6797, "step": 189900 }, { "epoch": 54.63463751438435, "grad_norm": 1.0804929733276367, "learning_rate": 0.000907307249712313, "loss": 0.5215, "step": 189910 }, { "epoch": 54.63751438434983, "grad_norm": 1.8574578762054443, "learning_rate": 0.0009072497123130034, "loss": 0.4907, "step": 189920 }, { "epoch": 54.640391254315304, "grad_norm": 4.534539222717285, "learning_rate": 0.0009071921749136939, "loss": 0.582, "step": 189930 }, { "epoch": 54.64326812428078, "grad_norm": 1.3728309869766235, "learning_rate": 0.0009071346375143844, "loss": 0.5021, "step": 189940 }, { "epoch": 54.64614499424626, "grad_norm": 2.3294806480407715, "learning_rate": 0.0009070771001150748, "loss": 0.5108, "step": 189950 }, { "epoch": 54.64902186421174, "grad_norm": 1.3598902225494385, "learning_rate": 0.0009070195627157653, "loss": 0.4534, "step": 189960 }, { "epoch": 54.651898734177216, "grad_norm": 1.2201045751571655, "learning_rate": 0.0009069620253164556, "loss": 0.5113, "step": 189970 }, { "epoch": 54.65477560414269, "grad_norm": 1.3560140132904053, "learning_rate": 0.0009069044879171462, "loss": 0.482, "step": 189980 }, { "epoch": 54.65765247410817, "grad_norm": 0.7809067964553833, "learning_rate": 0.0009068469505178366, "loss": 0.4733, "step": 189990 }, { "epoch": 54.660529344073645, "grad_norm": 1.0537272691726685, "learning_rate": 0.000906789413118527, "loss": 0.4931, "step": 190000 }, { "epoch": 54.66340621403913, "grad_norm": 1.6991993188858032, "learning_rate": 0.0009067318757192175, "loss": 0.5085, "step": 190010 }, { "epoch": 54.666283084004604, "grad_norm": 1.9324489831924438, "learning_rate": 0.000906674338319908, "loss": 0.4719, "step": 190020 }, { "epoch": 54.66915995397008, "grad_norm": 1.2148529291152954, "learning_rate": 0.0009066168009205984, "loss": 0.4921, "step": 190030 }, { "epoch": 54.67203682393556, "grad_norm": 1.221078634262085, "learning_rate": 0.0009065592635212888, "loss": 0.463, "step": 190040 }, { "epoch": 54.67491369390103, "grad_norm": 0.9324973821640015, "learning_rate": 0.0009065017261219794, "loss": 0.4915, "step": 190050 }, { "epoch": 54.677790563866516, "grad_norm": 1.5172812938690186, "learning_rate": 0.0009064441887226697, "loss": 0.6116, "step": 190060 }, { "epoch": 54.68066743383199, "grad_norm": 1.481492280960083, "learning_rate": 0.0009063866513233602, "loss": 0.4185, "step": 190070 }, { "epoch": 54.68354430379747, "grad_norm": 2.0377445220947266, "learning_rate": 0.0009063291139240506, "loss": 0.5788, "step": 190080 }, { "epoch": 54.686421173762945, "grad_norm": 1.1016448736190796, "learning_rate": 0.0009062715765247411, "loss": 0.479, "step": 190090 }, { "epoch": 54.68929804372842, "grad_norm": 0.7530880570411682, "learning_rate": 0.0009062140391254315, "loss": 0.5481, "step": 190100 }, { "epoch": 54.692174913693904, "grad_norm": 0.6152088046073914, "learning_rate": 0.000906156501726122, "loss": 0.3885, "step": 190110 }, { "epoch": 54.69505178365938, "grad_norm": 1.4933433532714844, "learning_rate": 0.0009060989643268125, "loss": 0.4384, "step": 190120 }, { "epoch": 54.69792865362486, "grad_norm": 0.9897905588150024, "learning_rate": 0.0009060414269275029, "loss": 0.4631, "step": 190130 }, { "epoch": 54.70080552359033, "grad_norm": 2.7287604808807373, "learning_rate": 0.0009059838895281934, "loss": 0.6391, "step": 190140 }, { "epoch": 54.70368239355581, "grad_norm": 1.0157670974731445, "learning_rate": 0.0009059263521288837, "loss": 0.5276, "step": 190150 }, { "epoch": 54.70655926352129, "grad_norm": 1.2954509258270264, "learning_rate": 0.0009058688147295743, "loss": 0.4813, "step": 190160 }, { "epoch": 54.70943613348677, "grad_norm": 1.455511450767517, "learning_rate": 0.0009058112773302647, "loss": 0.6744, "step": 190170 }, { "epoch": 54.712313003452245, "grad_norm": 1.324565052986145, "learning_rate": 0.0009057537399309551, "loss": 0.5408, "step": 190180 }, { "epoch": 54.71518987341772, "grad_norm": 1.5421696901321411, "learning_rate": 0.0009056962025316456, "loss": 0.562, "step": 190190 }, { "epoch": 54.7180667433832, "grad_norm": 1.2463234663009644, "learning_rate": 0.0009056386651323361, "loss": 0.5414, "step": 190200 }, { "epoch": 54.72094361334867, "grad_norm": 1.4689847230911255, "learning_rate": 0.0009055811277330264, "loss": 0.5338, "step": 190210 }, { "epoch": 54.72382048331416, "grad_norm": 1.1133770942687988, "learning_rate": 0.0009055235903337169, "loss": 0.4597, "step": 190220 }, { "epoch": 54.72669735327963, "grad_norm": 0.9279519319534302, "learning_rate": 0.0009054660529344075, "loss": 0.5946, "step": 190230 }, { "epoch": 54.72957422324511, "grad_norm": 1.0813387632369995, "learning_rate": 0.0009054085155350978, "loss": 0.4987, "step": 190240 }, { "epoch": 54.732451093210585, "grad_norm": 1.105893850326538, "learning_rate": 0.0009053509781357883, "loss": 0.488, "step": 190250 }, { "epoch": 54.73532796317606, "grad_norm": 1.821075439453125, "learning_rate": 0.0009052934407364787, "loss": 0.5536, "step": 190260 }, { "epoch": 54.738204833141545, "grad_norm": 0.8557912111282349, "learning_rate": 0.0009052359033371692, "loss": 0.5357, "step": 190270 }, { "epoch": 54.74108170310702, "grad_norm": 0.8877124786376953, "learning_rate": 0.0009051783659378596, "loss": 0.5269, "step": 190280 }, { "epoch": 54.7439585730725, "grad_norm": 1.2324923276901245, "learning_rate": 0.0009051208285385501, "loss": 0.5155, "step": 190290 }, { "epoch": 54.74683544303797, "grad_norm": 1.37357497215271, "learning_rate": 0.0009050632911392405, "loss": 0.5042, "step": 190300 }, { "epoch": 54.74971231300345, "grad_norm": 1.8348207473754883, "learning_rate": 0.000905005753739931, "loss": 0.5554, "step": 190310 }, { "epoch": 54.75258918296893, "grad_norm": 2.242182731628418, "learning_rate": 0.0009049482163406215, "loss": 0.4902, "step": 190320 }, { "epoch": 54.75546605293441, "grad_norm": 1.5200557708740234, "learning_rate": 0.0009048906789413118, "loss": 0.5218, "step": 190330 }, { "epoch": 54.758342922899885, "grad_norm": 1.7022029161453247, "learning_rate": 0.0009048331415420024, "loss": 0.551, "step": 190340 }, { "epoch": 54.76121979286536, "grad_norm": 0.7871067523956299, "learning_rate": 0.0009047756041426928, "loss": 0.5889, "step": 190350 }, { "epoch": 54.76409666283084, "grad_norm": 1.1296138763427734, "learning_rate": 0.0009047180667433832, "loss": 0.4294, "step": 190360 }, { "epoch": 54.76697353279632, "grad_norm": 0.7734681963920593, "learning_rate": 0.0009046605293440736, "loss": 0.5047, "step": 190370 }, { "epoch": 54.7698504027618, "grad_norm": 1.2931647300720215, "learning_rate": 0.0009046029919447642, "loss": 0.4924, "step": 190380 }, { "epoch": 54.77272727272727, "grad_norm": 1.1534267663955688, "learning_rate": 0.0009045454545454545, "loss": 0.4762, "step": 190390 }, { "epoch": 54.77560414269275, "grad_norm": 1.1345736980438232, "learning_rate": 0.000904487917146145, "loss": 0.6209, "step": 190400 }, { "epoch": 54.778481012658226, "grad_norm": 1.8820762634277344, "learning_rate": 0.0009044303797468356, "loss": 0.4716, "step": 190410 }, { "epoch": 54.7813578826237, "grad_norm": 0.8873131275177002, "learning_rate": 0.0009043728423475259, "loss": 0.4312, "step": 190420 }, { "epoch": 54.784234752589185, "grad_norm": 1.217578411102295, "learning_rate": 0.0009043153049482164, "loss": 0.6248, "step": 190430 }, { "epoch": 54.78711162255466, "grad_norm": 1.8577648401260376, "learning_rate": 0.0009042577675489068, "loss": 0.5759, "step": 190440 }, { "epoch": 54.78998849252014, "grad_norm": 1.296186923980713, "learning_rate": 0.0009042002301495973, "loss": 0.5355, "step": 190450 }, { "epoch": 54.792865362485614, "grad_norm": 1.1489239931106567, "learning_rate": 0.0009041426927502877, "loss": 0.5509, "step": 190460 }, { "epoch": 54.79574223245109, "grad_norm": 1.068491816520691, "learning_rate": 0.0009040851553509782, "loss": 0.5562, "step": 190470 }, { "epoch": 54.79861910241657, "grad_norm": 0.885240375995636, "learning_rate": 0.0009040276179516686, "loss": 0.5771, "step": 190480 }, { "epoch": 54.80149597238205, "grad_norm": 0.9775464534759521, "learning_rate": 0.0009039700805523591, "loss": 0.4647, "step": 190490 }, { "epoch": 54.804372842347526, "grad_norm": 1.604476809501648, "learning_rate": 0.0009039125431530495, "loss": 0.5253, "step": 190500 }, { "epoch": 54.807249712313, "grad_norm": 1.8881139755249023, "learning_rate": 0.0009038550057537399, "loss": 0.5308, "step": 190510 }, { "epoch": 54.81012658227848, "grad_norm": 1.9820659160614014, "learning_rate": 0.0009037974683544305, "loss": 0.702, "step": 190520 }, { "epoch": 54.81300345224396, "grad_norm": 1.9649925231933594, "learning_rate": 0.0009037399309551209, "loss": 0.537, "step": 190530 }, { "epoch": 54.81588032220944, "grad_norm": 1.7475603818893433, "learning_rate": 0.0009036823935558113, "loss": 0.5232, "step": 190540 }, { "epoch": 54.818757192174914, "grad_norm": 5.992896556854248, "learning_rate": 0.0009036248561565017, "loss": 0.4757, "step": 190550 }, { "epoch": 54.82163406214039, "grad_norm": 2.509438991546631, "learning_rate": 0.0009035673187571923, "loss": 0.5433, "step": 190560 }, { "epoch": 54.824510932105866, "grad_norm": 1.934979796409607, "learning_rate": 0.0009035097813578826, "loss": 0.612, "step": 190570 }, { "epoch": 54.82738780207135, "grad_norm": 1.4517289400100708, "learning_rate": 0.0009034522439585731, "loss": 0.46, "step": 190580 }, { "epoch": 54.830264672036826, "grad_norm": 1.6583654880523682, "learning_rate": 0.0009033947065592636, "loss": 0.6919, "step": 190590 }, { "epoch": 54.8331415420023, "grad_norm": 1.5639761686325073, "learning_rate": 0.000903337169159954, "loss": 0.4647, "step": 190600 }, { "epoch": 54.83601841196778, "grad_norm": 1.017173409461975, "learning_rate": 0.0009032796317606444, "loss": 0.464, "step": 190610 }, { "epoch": 54.838895281933254, "grad_norm": 1.0536209344863892, "learning_rate": 0.0009032220943613348, "loss": 0.513, "step": 190620 }, { "epoch": 54.84177215189874, "grad_norm": 2.247487783432007, "learning_rate": 0.0009031645569620254, "loss": 0.7126, "step": 190630 }, { "epoch": 54.844649021864214, "grad_norm": 1.0034832954406738, "learning_rate": 0.0009031070195627158, "loss": 0.4604, "step": 190640 }, { "epoch": 54.84752589182969, "grad_norm": 1.067421555519104, "learning_rate": 0.0009030494821634062, "loss": 0.6449, "step": 190650 }, { "epoch": 54.850402761795166, "grad_norm": 1.689295768737793, "learning_rate": 0.0009029919447640966, "loss": 0.5484, "step": 190660 }, { "epoch": 54.85327963176064, "grad_norm": 0.7995288372039795, "learning_rate": 0.0009029344073647872, "loss": 0.3962, "step": 190670 }, { "epoch": 54.85615650172612, "grad_norm": 0.9540481567382812, "learning_rate": 0.0009028768699654775, "loss": 0.6375, "step": 190680 }, { "epoch": 54.8590333716916, "grad_norm": 1.0085511207580566, "learning_rate": 0.000902819332566168, "loss": 0.5053, "step": 190690 }, { "epoch": 54.86191024165708, "grad_norm": 0.8183619976043701, "learning_rate": 0.0009027617951668585, "loss": 0.5352, "step": 190700 }, { "epoch": 54.864787111622555, "grad_norm": 1.3076447248458862, "learning_rate": 0.0009027042577675489, "loss": 0.5225, "step": 190710 }, { "epoch": 54.86766398158803, "grad_norm": 1.875044345855713, "learning_rate": 0.0009026467203682393, "loss": 0.5307, "step": 190720 }, { "epoch": 54.87054085155351, "grad_norm": 2.264634847640991, "learning_rate": 0.0009025891829689298, "loss": 0.572, "step": 190730 }, { "epoch": 54.87341772151899, "grad_norm": 0.7746609449386597, "learning_rate": 0.0009025316455696203, "loss": 0.6612, "step": 190740 }, { "epoch": 54.876294591484466, "grad_norm": 1.2701877355575562, "learning_rate": 0.0009024741081703107, "loss": 0.446, "step": 190750 }, { "epoch": 54.87917146144994, "grad_norm": 1.3677507638931274, "learning_rate": 0.0009024165707710012, "loss": 0.4303, "step": 190760 }, { "epoch": 54.88204833141542, "grad_norm": 1.6700830459594727, "learning_rate": 0.0009023590333716916, "loss": 0.4043, "step": 190770 }, { "epoch": 54.884925201380895, "grad_norm": 1.3909995555877686, "learning_rate": 0.0009023014959723821, "loss": 0.5541, "step": 190780 }, { "epoch": 54.88780207134638, "grad_norm": 2.73638916015625, "learning_rate": 0.0009022439585730725, "loss": 0.7542, "step": 190790 }, { "epoch": 54.890678941311855, "grad_norm": 1.6700063943862915, "learning_rate": 0.0009021864211737629, "loss": 0.5118, "step": 190800 }, { "epoch": 54.89355581127733, "grad_norm": 2.1057958602905273, "learning_rate": 0.0009021288837744534, "loss": 0.5303, "step": 190810 }, { "epoch": 54.89643268124281, "grad_norm": 1.034482717514038, "learning_rate": 0.0009020713463751439, "loss": 0.5449, "step": 190820 }, { "epoch": 54.89930955120828, "grad_norm": 1.3566428422927856, "learning_rate": 0.0009020138089758342, "loss": 0.4316, "step": 190830 }, { "epoch": 54.90218642117377, "grad_norm": 2.3855512142181396, "learning_rate": 0.0009019562715765247, "loss": 0.5906, "step": 190840 }, { "epoch": 54.90506329113924, "grad_norm": 1.3875497579574585, "learning_rate": 0.0009018987341772153, "loss": 0.5251, "step": 190850 }, { "epoch": 54.90794016110472, "grad_norm": 0.7321896553039551, "learning_rate": 0.0009018411967779056, "loss": 0.429, "step": 190860 }, { "epoch": 54.910817031070195, "grad_norm": 0.8669320344924927, "learning_rate": 0.0009017836593785961, "loss": 0.6127, "step": 190870 }, { "epoch": 54.91369390103567, "grad_norm": 2.4371161460876465, "learning_rate": 0.0009017261219792866, "loss": 0.6361, "step": 190880 }, { "epoch": 54.91657077100115, "grad_norm": 1.3054457902908325, "learning_rate": 0.000901668584579977, "loss": 0.454, "step": 190890 }, { "epoch": 54.91944764096663, "grad_norm": 1.3019315004348755, "learning_rate": 0.0009016110471806674, "loss": 0.5385, "step": 190900 }, { "epoch": 54.92232451093211, "grad_norm": 1.8634403944015503, "learning_rate": 0.0009015535097813579, "loss": 0.483, "step": 190910 }, { "epoch": 54.92520138089758, "grad_norm": 1.5778248310089111, "learning_rate": 0.0009014959723820483, "loss": 0.5841, "step": 190920 }, { "epoch": 54.92807825086306, "grad_norm": 1.0887857675552368, "learning_rate": 0.0009014384349827388, "loss": 0.4992, "step": 190930 }, { "epoch": 54.930955120828536, "grad_norm": 1.5475515127182007, "learning_rate": 0.0009013808975834293, "loss": 0.5564, "step": 190940 }, { "epoch": 54.93383199079402, "grad_norm": 2.4649012088775635, "learning_rate": 0.0009013233601841196, "loss": 0.5156, "step": 190950 }, { "epoch": 54.936708860759495, "grad_norm": 1.9242521524429321, "learning_rate": 0.0009012658227848102, "loss": 0.5825, "step": 190960 }, { "epoch": 54.93958573072497, "grad_norm": 1.30814528465271, "learning_rate": 0.0009012082853855006, "loss": 0.5027, "step": 190970 }, { "epoch": 54.94246260069045, "grad_norm": 1.8382881879806519, "learning_rate": 0.000901150747986191, "loss": 0.5192, "step": 190980 }, { "epoch": 54.945339470655924, "grad_norm": 0.856453001499176, "learning_rate": 0.0009010932105868815, "loss": 0.6286, "step": 190990 }, { "epoch": 54.94821634062141, "grad_norm": 1.087211012840271, "learning_rate": 0.000901035673187572, "loss": 0.4892, "step": 191000 }, { "epoch": 54.95109321058688, "grad_norm": 1.5169912576675415, "learning_rate": 0.0009009781357882623, "loss": 0.4852, "step": 191010 }, { "epoch": 54.95397008055236, "grad_norm": 1.2192071676254272, "learning_rate": 0.0009009205983889528, "loss": 0.5323, "step": 191020 }, { "epoch": 54.956846950517836, "grad_norm": 0.7311320900917053, "learning_rate": 0.0009008630609896434, "loss": 0.5966, "step": 191030 }, { "epoch": 54.95972382048331, "grad_norm": 2.141465663909912, "learning_rate": 0.0009008055235903337, "loss": 0.5916, "step": 191040 }, { "epoch": 54.962600690448795, "grad_norm": 2.383138656616211, "learning_rate": 0.0009007479861910242, "loss": 0.5171, "step": 191050 }, { "epoch": 54.96547756041427, "grad_norm": 1.6111136674880981, "learning_rate": 0.0009006904487917146, "loss": 0.6269, "step": 191060 }, { "epoch": 54.96835443037975, "grad_norm": 0.8810709714889526, "learning_rate": 0.0009006329113924051, "loss": 0.5772, "step": 191070 }, { "epoch": 54.971231300345224, "grad_norm": 1.0935229063034058, "learning_rate": 0.0009005753739930955, "loss": 0.5086, "step": 191080 }, { "epoch": 54.9741081703107, "grad_norm": 0.8580503463745117, "learning_rate": 0.000900517836593786, "loss": 0.4769, "step": 191090 }, { "epoch": 54.976985040276176, "grad_norm": 1.0642651319503784, "learning_rate": 0.0009004602991944764, "loss": 0.5022, "step": 191100 }, { "epoch": 54.97986191024166, "grad_norm": 2.1287569999694824, "learning_rate": 0.0009004027617951669, "loss": 0.4758, "step": 191110 }, { "epoch": 54.982738780207136, "grad_norm": 1.199808120727539, "learning_rate": 0.0009003452243958574, "loss": 0.6225, "step": 191120 }, { "epoch": 54.98561565017261, "grad_norm": 0.6623222827911377, "learning_rate": 0.0009002876869965477, "loss": 0.5356, "step": 191130 }, { "epoch": 54.98849252013809, "grad_norm": 2.2993485927581787, "learning_rate": 0.0009002301495972383, "loss": 0.4489, "step": 191140 }, { "epoch": 54.991369390103564, "grad_norm": 0.8535524606704712, "learning_rate": 0.0009001726121979287, "loss": 0.5152, "step": 191150 }, { "epoch": 54.99424626006905, "grad_norm": 1.176999568939209, "learning_rate": 0.0009001150747986191, "loss": 0.4995, "step": 191160 }, { "epoch": 54.997123130034524, "grad_norm": 1.3943021297454834, "learning_rate": 0.0009000575373993096, "loss": 0.5415, "step": 191170 }, { "epoch": 55.0, "grad_norm": 1.0867637395858765, "learning_rate": 0.0009000000000000001, "loss": 0.4398, "step": 191180 }, { "epoch": 55.002876869965476, "grad_norm": 1.0576372146606445, "learning_rate": 0.0008999424626006904, "loss": 0.3846, "step": 191190 }, { "epoch": 55.00575373993095, "grad_norm": 1.7942619323730469, "learning_rate": 0.0008998849252013809, "loss": 0.4997, "step": 191200 }, { "epoch": 55.008630609896436, "grad_norm": 1.1517504453659058, "learning_rate": 0.0008998273878020715, "loss": 0.4558, "step": 191210 }, { "epoch": 55.01150747986191, "grad_norm": 1.6645361185073853, "learning_rate": 0.0008997698504027618, "loss": 0.406, "step": 191220 }, { "epoch": 55.01438434982739, "grad_norm": 0.8256327509880066, "learning_rate": 0.0008997123130034523, "loss": 0.4474, "step": 191230 }, { "epoch": 55.017261219792864, "grad_norm": 2.1513051986694336, "learning_rate": 0.0008996547756041427, "loss": 0.5095, "step": 191240 }, { "epoch": 55.02013808975834, "grad_norm": 1.2464404106140137, "learning_rate": 0.0008995972382048332, "loss": 0.461, "step": 191250 }, { "epoch": 55.023014959723824, "grad_norm": 0.9156180024147034, "learning_rate": 0.0008995397008055236, "loss": 0.4424, "step": 191260 }, { "epoch": 55.0258918296893, "grad_norm": 1.34222412109375, "learning_rate": 0.0008994821634062141, "loss": 0.5463, "step": 191270 }, { "epoch": 55.028768699654776, "grad_norm": 1.911533236503601, "learning_rate": 0.0008994246260069045, "loss": 0.4733, "step": 191280 }, { "epoch": 55.03164556962025, "grad_norm": 1.8875561952590942, "learning_rate": 0.000899367088607595, "loss": 0.5404, "step": 191290 }, { "epoch": 55.03452243958573, "grad_norm": 1.0712045431137085, "learning_rate": 0.0008993095512082854, "loss": 0.5659, "step": 191300 }, { "epoch": 55.037399309551205, "grad_norm": 1.595178246498108, "learning_rate": 0.0008992520138089758, "loss": 0.4482, "step": 191310 }, { "epoch": 55.04027617951669, "grad_norm": 1.5521998405456543, "learning_rate": 0.0008991944764096664, "loss": 0.5421, "step": 191320 }, { "epoch": 55.043153049482164, "grad_norm": 1.1817659139633179, "learning_rate": 0.0008991369390103568, "loss": 0.4892, "step": 191330 }, { "epoch": 55.04602991944764, "grad_norm": 1.0661860704421997, "learning_rate": 0.0008990794016110472, "loss": 0.3958, "step": 191340 }, { "epoch": 55.04890678941312, "grad_norm": 1.9545135498046875, "learning_rate": 0.0008990218642117376, "loss": 0.5553, "step": 191350 }, { "epoch": 55.05178365937859, "grad_norm": 1.608533501625061, "learning_rate": 0.0008989643268124282, "loss": 0.6003, "step": 191360 }, { "epoch": 55.054660529344076, "grad_norm": 1.6978497505187988, "learning_rate": 0.0008989067894131185, "loss": 0.418, "step": 191370 }, { "epoch": 55.05753739930955, "grad_norm": 0.9178676605224609, "learning_rate": 0.000898849252013809, "loss": 0.4523, "step": 191380 }, { "epoch": 55.06041426927503, "grad_norm": 1.6653159856796265, "learning_rate": 0.0008987917146144995, "loss": 0.5495, "step": 191390 }, { "epoch": 55.063291139240505, "grad_norm": 1.4197099208831787, "learning_rate": 0.0008987341772151899, "loss": 0.5362, "step": 191400 }, { "epoch": 55.06616800920598, "grad_norm": 1.8553743362426758, "learning_rate": 0.0008986766398158803, "loss": 0.4524, "step": 191410 }, { "epoch": 55.069044879171464, "grad_norm": 0.7291057109832764, "learning_rate": 0.0008986191024165707, "loss": 0.494, "step": 191420 }, { "epoch": 55.07192174913694, "grad_norm": 0.784076452255249, "learning_rate": 0.0008985615650172613, "loss": 0.4341, "step": 191430 }, { "epoch": 55.07479861910242, "grad_norm": 0.877448558807373, "learning_rate": 0.0008985040276179517, "loss": 0.4314, "step": 191440 }, { "epoch": 55.07767548906789, "grad_norm": 1.8781336545944214, "learning_rate": 0.0008984464902186421, "loss": 0.4557, "step": 191450 }, { "epoch": 55.08055235903337, "grad_norm": 1.2108181715011597, "learning_rate": 0.0008983889528193326, "loss": 0.4284, "step": 191460 }, { "epoch": 55.08342922899885, "grad_norm": 1.344668984413147, "learning_rate": 0.0008983314154200231, "loss": 0.47, "step": 191470 }, { "epoch": 55.08630609896433, "grad_norm": 1.6111156940460205, "learning_rate": 0.0008982738780207134, "loss": 0.519, "step": 191480 }, { "epoch": 55.089182968929805, "grad_norm": 1.2108805179595947, "learning_rate": 0.0008982163406214039, "loss": 0.5092, "step": 191490 }, { "epoch": 55.09205983889528, "grad_norm": 1.520322322845459, "learning_rate": 0.0008981588032220944, "loss": 0.4685, "step": 191500 }, { "epoch": 55.09493670886076, "grad_norm": 0.653353750705719, "learning_rate": 0.0008981012658227848, "loss": 0.6627, "step": 191510 }, { "epoch": 55.09781357882623, "grad_norm": 1.3224108219146729, "learning_rate": 0.0008980437284234752, "loss": 0.5108, "step": 191520 }, { "epoch": 55.10069044879172, "grad_norm": 1.7453535795211792, "learning_rate": 0.0008979861910241657, "loss": 0.4339, "step": 191530 }, { "epoch": 55.10356731875719, "grad_norm": 1.1096248626708984, "learning_rate": 0.0008979286536248562, "loss": 0.4629, "step": 191540 }, { "epoch": 55.10644418872267, "grad_norm": 1.0414824485778809, "learning_rate": 0.0008978711162255466, "loss": 0.4581, "step": 191550 }, { "epoch": 55.109321058688145, "grad_norm": 2.0158395767211914, "learning_rate": 0.0008978135788262371, "loss": 0.4947, "step": 191560 }, { "epoch": 55.11219792865362, "grad_norm": 0.8151575922966003, "learning_rate": 0.0008977560414269275, "loss": 0.4949, "step": 191570 }, { "epoch": 55.115074798619105, "grad_norm": 1.521043300628662, "learning_rate": 0.000897698504027618, "loss": 0.6496, "step": 191580 }, { "epoch": 55.11795166858458, "grad_norm": 0.9587467312812805, "learning_rate": 0.0008976409666283084, "loss": 0.6799, "step": 191590 }, { "epoch": 55.12082853855006, "grad_norm": 1.7281993627548218, "learning_rate": 0.0008975834292289988, "loss": 0.3803, "step": 191600 }, { "epoch": 55.123705408515534, "grad_norm": 0.6774498224258423, "learning_rate": 0.0008975258918296893, "loss": 0.4511, "step": 191610 }, { "epoch": 55.12658227848101, "grad_norm": 1.9456902742385864, "learning_rate": 0.0008974683544303798, "loss": 0.4674, "step": 191620 }, { "epoch": 55.12945914844649, "grad_norm": 1.386873722076416, "learning_rate": 0.0008974108170310701, "loss": 0.4787, "step": 191630 }, { "epoch": 55.13233601841197, "grad_norm": 1.71725332736969, "learning_rate": 0.0008973532796317606, "loss": 0.4206, "step": 191640 }, { "epoch": 55.135212888377445, "grad_norm": 1.4285666942596436, "learning_rate": 0.0008972957422324512, "loss": 0.4387, "step": 191650 }, { "epoch": 55.13808975834292, "grad_norm": 1.2869555950164795, "learning_rate": 0.0008972382048331415, "loss": 0.5215, "step": 191660 }, { "epoch": 55.1409666283084, "grad_norm": 1.14519202709198, "learning_rate": 0.000897180667433832, "loss": 0.6288, "step": 191670 }, { "epoch": 55.14384349827388, "grad_norm": 0.8512738943099976, "learning_rate": 0.0008971231300345225, "loss": 0.4706, "step": 191680 }, { "epoch": 55.14672036823936, "grad_norm": 1.2242276668548584, "learning_rate": 0.0008970655926352129, "loss": 0.4994, "step": 191690 }, { "epoch": 55.149597238204834, "grad_norm": 1.108458399772644, "learning_rate": 0.0008970080552359033, "loss": 0.5231, "step": 191700 }, { "epoch": 55.15247410817031, "grad_norm": 1.4183870553970337, "learning_rate": 0.0008969505178365938, "loss": 0.4681, "step": 191710 }, { "epoch": 55.155350978135786, "grad_norm": 1.7034152746200562, "learning_rate": 0.0008968929804372842, "loss": 0.5566, "step": 191720 }, { "epoch": 55.15822784810127, "grad_norm": 1.5388717651367188, "learning_rate": 0.0008968354430379747, "loss": 0.4458, "step": 191730 }, { "epoch": 55.161104718066746, "grad_norm": 1.8793455362319946, "learning_rate": 0.0008967779056386652, "loss": 0.508, "step": 191740 }, { "epoch": 55.16398158803222, "grad_norm": 2.1512765884399414, "learning_rate": 0.0008967203682393556, "loss": 0.5337, "step": 191750 }, { "epoch": 55.1668584579977, "grad_norm": 2.303041934967041, "learning_rate": 0.0008966628308400461, "loss": 0.5496, "step": 191760 }, { "epoch": 55.169735327963174, "grad_norm": 0.6802749037742615, "learning_rate": 0.0008966052934407365, "loss": 0.495, "step": 191770 }, { "epoch": 55.17261219792865, "grad_norm": 1.2780544757843018, "learning_rate": 0.0008965477560414269, "loss": 0.5023, "step": 191780 }, { "epoch": 55.175489067894134, "grad_norm": 2.798823118209839, "learning_rate": 0.0008964902186421174, "loss": 0.5428, "step": 191790 }, { "epoch": 55.17836593785961, "grad_norm": 1.1437515020370483, "learning_rate": 0.0008964326812428079, "loss": 0.6846, "step": 191800 }, { "epoch": 55.181242807825086, "grad_norm": 0.9129843711853027, "learning_rate": 0.0008963751438434982, "loss": 0.5911, "step": 191810 }, { "epoch": 55.18411967779056, "grad_norm": 1.4100217819213867, "learning_rate": 0.0008963176064441887, "loss": 0.5137, "step": 191820 }, { "epoch": 55.18699654775604, "grad_norm": 1.4451570510864258, "learning_rate": 0.0008962600690448793, "loss": 0.4858, "step": 191830 }, { "epoch": 55.18987341772152, "grad_norm": 1.0407185554504395, "learning_rate": 0.0008962025316455696, "loss": 0.4918, "step": 191840 }, { "epoch": 55.192750287687, "grad_norm": 3.3665482997894287, "learning_rate": 0.0008961449942462601, "loss": 0.474, "step": 191850 }, { "epoch": 55.195627157652474, "grad_norm": 1.2558428049087524, "learning_rate": 0.0008960874568469506, "loss": 0.5277, "step": 191860 }, { "epoch": 55.19850402761795, "grad_norm": 1.5424727201461792, "learning_rate": 0.000896029919447641, "loss": 0.6121, "step": 191870 }, { "epoch": 55.20138089758343, "grad_norm": 0.7904720902442932, "learning_rate": 0.0008959723820483314, "loss": 0.4678, "step": 191880 }, { "epoch": 55.20425776754891, "grad_norm": 1.420527696609497, "learning_rate": 0.0008959148446490219, "loss": 0.4788, "step": 191890 }, { "epoch": 55.207134637514386, "grad_norm": 1.332828402519226, "learning_rate": 0.0008958573072497123, "loss": 0.561, "step": 191900 }, { "epoch": 55.21001150747986, "grad_norm": 1.4610563516616821, "learning_rate": 0.0008957997698504028, "loss": 0.5031, "step": 191910 }, { "epoch": 55.21288837744534, "grad_norm": 1.7558554410934448, "learning_rate": 0.0008957422324510932, "loss": 0.5124, "step": 191920 }, { "epoch": 55.215765247410815, "grad_norm": 0.9895446300506592, "learning_rate": 0.0008956846950517836, "loss": 0.3592, "step": 191930 }, { "epoch": 55.2186421173763, "grad_norm": 1.2556405067443848, "learning_rate": 0.0008956271576524742, "loss": 0.5484, "step": 191940 }, { "epoch": 55.221518987341774, "grad_norm": 2.1814701557159424, "learning_rate": 0.0008955696202531646, "loss": 0.5023, "step": 191950 }, { "epoch": 55.22439585730725, "grad_norm": 0.8353727459907532, "learning_rate": 0.000895512082853855, "loss": 0.404, "step": 191960 }, { "epoch": 55.22727272727273, "grad_norm": 1.8542064428329468, "learning_rate": 0.0008954545454545455, "loss": 0.5762, "step": 191970 }, { "epoch": 55.2301495972382, "grad_norm": 0.9264556169509888, "learning_rate": 0.000895397008055236, "loss": 0.4942, "step": 191980 }, { "epoch": 55.23302646720368, "grad_norm": 0.9289765357971191, "learning_rate": 0.0008953394706559263, "loss": 0.4008, "step": 191990 }, { "epoch": 55.23590333716916, "grad_norm": 0.939882755279541, "learning_rate": 0.0008952819332566168, "loss": 0.4314, "step": 192000 }, { "epoch": 55.23878020713464, "grad_norm": 1.1989351511001587, "learning_rate": 0.0008952243958573073, "loss": 0.5919, "step": 192010 }, { "epoch": 55.241657077100115, "grad_norm": 1.2717456817626953, "learning_rate": 0.0008951668584579977, "loss": 0.4766, "step": 192020 }, { "epoch": 55.24453394706559, "grad_norm": 1.5955405235290527, "learning_rate": 0.0008951093210586882, "loss": 0.6534, "step": 192030 }, { "epoch": 55.24741081703107, "grad_norm": 0.905430018901825, "learning_rate": 0.0008950517836593786, "loss": 0.5252, "step": 192040 }, { "epoch": 55.25028768699655, "grad_norm": 1.58829927444458, "learning_rate": 0.0008949942462600691, "loss": 0.534, "step": 192050 }, { "epoch": 55.25316455696203, "grad_norm": 1.1629544496536255, "learning_rate": 0.0008949367088607595, "loss": 0.5675, "step": 192060 }, { "epoch": 55.2560414269275, "grad_norm": 2.0901875495910645, "learning_rate": 0.00089487917146145, "loss": 0.5697, "step": 192070 }, { "epoch": 55.25891829689298, "grad_norm": 0.7907506227493286, "learning_rate": 0.0008948216340621404, "loss": 0.5027, "step": 192080 }, { "epoch": 55.261795166858455, "grad_norm": 0.9519562125205994, "learning_rate": 0.0008947640966628309, "loss": 0.5181, "step": 192090 }, { "epoch": 55.26467203682394, "grad_norm": 1.1075854301452637, "learning_rate": 0.0008947065592635213, "loss": 0.4522, "step": 192100 }, { "epoch": 55.267548906789415, "grad_norm": 1.5451369285583496, "learning_rate": 0.0008946490218642117, "loss": 0.4862, "step": 192110 }, { "epoch": 55.27042577675489, "grad_norm": 1.3705228567123413, "learning_rate": 0.0008945914844649023, "loss": 0.4605, "step": 192120 }, { "epoch": 55.27330264672037, "grad_norm": 1.0871819257736206, "learning_rate": 0.0008945339470655927, "loss": 0.4087, "step": 192130 }, { "epoch": 55.27617951668584, "grad_norm": 1.4428378343582153, "learning_rate": 0.000894476409666283, "loss": 0.5098, "step": 192140 }, { "epoch": 55.27905638665133, "grad_norm": 0.9014685153961182, "learning_rate": 0.0008944188722669736, "loss": 0.4848, "step": 192150 }, { "epoch": 55.2819332566168, "grad_norm": 1.09437096118927, "learning_rate": 0.0008943613348676641, "loss": 0.4846, "step": 192160 }, { "epoch": 55.28481012658228, "grad_norm": 0.9521837830543518, "learning_rate": 0.0008943037974683544, "loss": 0.4506, "step": 192170 }, { "epoch": 55.287686996547755, "grad_norm": 1.7884989976882935, "learning_rate": 0.0008942462600690449, "loss": 0.4689, "step": 192180 }, { "epoch": 55.29056386651323, "grad_norm": 0.9525042176246643, "learning_rate": 0.0008941887226697354, "loss": 0.4758, "step": 192190 }, { "epoch": 55.29344073647871, "grad_norm": 1.8862651586532593, "learning_rate": 0.0008941311852704258, "loss": 0.5557, "step": 192200 }, { "epoch": 55.29631760644419, "grad_norm": 1.6287423372268677, "learning_rate": 0.0008940736478711162, "loss": 0.4906, "step": 192210 }, { "epoch": 55.29919447640967, "grad_norm": 0.9098293781280518, "learning_rate": 0.0008940161104718066, "loss": 0.4338, "step": 192220 }, { "epoch": 55.30207134637514, "grad_norm": 1.073955774307251, "learning_rate": 0.0008939585730724972, "loss": 0.5156, "step": 192230 }, { "epoch": 55.30494821634062, "grad_norm": 0.9970206022262573, "learning_rate": 0.0008939010356731876, "loss": 0.5839, "step": 192240 }, { "epoch": 55.307825086306096, "grad_norm": 1.7602691650390625, "learning_rate": 0.000893843498273878, "loss": 0.6063, "step": 192250 }, { "epoch": 55.31070195627158, "grad_norm": 0.7135701179504395, "learning_rate": 0.0008937859608745685, "loss": 0.4712, "step": 192260 }, { "epoch": 55.313578826237055, "grad_norm": 1.8490018844604492, "learning_rate": 0.000893728423475259, "loss": 0.4795, "step": 192270 }, { "epoch": 55.31645569620253, "grad_norm": 1.5680691003799438, "learning_rate": 0.0008936708860759493, "loss": 0.4741, "step": 192280 }, { "epoch": 55.31933256616801, "grad_norm": 1.695834994316101, "learning_rate": 0.0008936133486766398, "loss": 0.6601, "step": 192290 }, { "epoch": 55.322209436133484, "grad_norm": 0.582365095615387, "learning_rate": 0.0008935558112773303, "loss": 0.3729, "step": 192300 }, { "epoch": 55.32508630609897, "grad_norm": 1.6408394575119019, "learning_rate": 0.0008934982738780207, "loss": 0.4775, "step": 192310 }, { "epoch": 55.32796317606444, "grad_norm": 1.2025014162063599, "learning_rate": 0.0008934407364787111, "loss": 0.5145, "step": 192320 }, { "epoch": 55.33084004602992, "grad_norm": 1.0940446853637695, "learning_rate": 0.0008933831990794016, "loss": 0.7302, "step": 192330 }, { "epoch": 55.333716915995396, "grad_norm": 0.8420528769493103, "learning_rate": 0.0008933256616800921, "loss": 0.5322, "step": 192340 }, { "epoch": 55.33659378596087, "grad_norm": 1.310398817062378, "learning_rate": 0.0008932681242807825, "loss": 0.4534, "step": 192350 }, { "epoch": 55.339470655926355, "grad_norm": 1.1770179271697998, "learning_rate": 0.000893210586881473, "loss": 0.5779, "step": 192360 }, { "epoch": 55.34234752589183, "grad_norm": 0.9496458172798157, "learning_rate": 0.0008931530494821634, "loss": 0.6331, "step": 192370 }, { "epoch": 55.34522439585731, "grad_norm": 0.6419431567192078, "learning_rate": 0.0008930955120828539, "loss": 0.5364, "step": 192380 }, { "epoch": 55.348101265822784, "grad_norm": 1.4133076667785645, "learning_rate": 0.0008930379746835443, "loss": 0.6464, "step": 192390 }, { "epoch": 55.35097813578826, "grad_norm": 1.5752204656600952, "learning_rate": 0.0008929804372842347, "loss": 0.5336, "step": 192400 }, { "epoch": 55.353855005753736, "grad_norm": 0.960171103477478, "learning_rate": 0.0008929228998849252, "loss": 0.5503, "step": 192410 }, { "epoch": 55.35673187571922, "grad_norm": 0.9609081745147705, "learning_rate": 0.0008928653624856157, "loss": 0.494, "step": 192420 }, { "epoch": 55.359608745684696, "grad_norm": 1.9713715314865112, "learning_rate": 0.000892807825086306, "loss": 0.4612, "step": 192430 }, { "epoch": 55.36248561565017, "grad_norm": 0.8298340439796448, "learning_rate": 0.0008927502876869966, "loss": 0.5099, "step": 192440 }, { "epoch": 55.36536248561565, "grad_norm": 1.1453015804290771, "learning_rate": 0.0008926927502876871, "loss": 0.4764, "step": 192450 }, { "epoch": 55.368239355581125, "grad_norm": 3.7365660667419434, "learning_rate": 0.0008926352128883774, "loss": 0.5376, "step": 192460 }, { "epoch": 55.37111622554661, "grad_norm": 1.2078242301940918, "learning_rate": 0.0008925776754890679, "loss": 0.4125, "step": 192470 }, { "epoch": 55.373993095512084, "grad_norm": 1.372002363204956, "learning_rate": 0.0008925201380897584, "loss": 0.5577, "step": 192480 }, { "epoch": 55.37686996547756, "grad_norm": 1.5060251951217651, "learning_rate": 0.0008924626006904488, "loss": 0.5031, "step": 192490 }, { "epoch": 55.379746835443036, "grad_norm": 0.937004804611206, "learning_rate": 0.0008924050632911392, "loss": 0.5128, "step": 192500 }, { "epoch": 55.38262370540851, "grad_norm": 1.5805963277816772, "learning_rate": 0.0008923475258918297, "loss": 0.4911, "step": 192510 }, { "epoch": 55.385500575373996, "grad_norm": 1.4962711334228516, "learning_rate": 0.0008922899884925201, "loss": 0.5185, "step": 192520 }, { "epoch": 55.38837744533947, "grad_norm": 0.6694822907447815, "learning_rate": 0.0008922324510932106, "loss": 0.4289, "step": 192530 }, { "epoch": 55.39125431530495, "grad_norm": 1.5731382369995117, "learning_rate": 0.0008921749136939011, "loss": 0.5846, "step": 192540 }, { "epoch": 55.394131185270425, "grad_norm": 1.5027142763137817, "learning_rate": 0.0008921173762945915, "loss": 0.4758, "step": 192550 }, { "epoch": 55.3970080552359, "grad_norm": 1.3700244426727295, "learning_rate": 0.000892059838895282, "loss": 0.5471, "step": 192560 }, { "epoch": 55.399884925201384, "grad_norm": 1.5047096014022827, "learning_rate": 0.0008920023014959724, "loss": 0.5413, "step": 192570 }, { "epoch": 55.40276179516686, "grad_norm": 3.0764455795288086, "learning_rate": 0.0008919447640966628, "loss": 0.595, "step": 192580 }, { "epoch": 55.40563866513234, "grad_norm": 1.1257350444793701, "learning_rate": 0.0008918872266973533, "loss": 0.4819, "step": 192590 }, { "epoch": 55.40851553509781, "grad_norm": 1.3278919458389282, "learning_rate": 0.0008918296892980438, "loss": 0.4675, "step": 192600 }, { "epoch": 55.41139240506329, "grad_norm": 1.094225287437439, "learning_rate": 0.0008917721518987341, "loss": 0.5039, "step": 192610 }, { "epoch": 55.41426927502877, "grad_norm": 2.132807970046997, "learning_rate": 0.0008917146144994246, "loss": 0.5367, "step": 192620 }, { "epoch": 55.41714614499425, "grad_norm": 0.9526152610778809, "learning_rate": 0.0008916570771001152, "loss": 0.5557, "step": 192630 }, { "epoch": 55.420023014959725, "grad_norm": 1.5540037155151367, "learning_rate": 0.0008915995397008055, "loss": 0.4088, "step": 192640 }, { "epoch": 55.4228998849252, "grad_norm": 0.8672072887420654, "learning_rate": 0.000891542002301496, "loss": 0.5212, "step": 192650 }, { "epoch": 55.42577675489068, "grad_norm": 2.401402711868286, "learning_rate": 0.0008914844649021865, "loss": 0.578, "step": 192660 }, { "epoch": 55.42865362485615, "grad_norm": 1.508655071258545, "learning_rate": 0.0008914269275028769, "loss": 0.4069, "step": 192670 }, { "epoch": 55.43153049482164, "grad_norm": 1.2157901525497437, "learning_rate": 0.0008913693901035673, "loss": 0.5153, "step": 192680 }, { "epoch": 55.43440736478711, "grad_norm": 1.3736366033554077, "learning_rate": 0.0008913118527042578, "loss": 0.5519, "step": 192690 }, { "epoch": 55.43728423475259, "grad_norm": 1.983436942100525, "learning_rate": 0.0008912543153049482, "loss": 0.5168, "step": 192700 }, { "epoch": 55.440161104718065, "grad_norm": 1.0588608980178833, "learning_rate": 0.0008911967779056387, "loss": 0.4482, "step": 192710 }, { "epoch": 55.44303797468354, "grad_norm": 2.030179500579834, "learning_rate": 0.0008911392405063291, "loss": 0.556, "step": 192720 }, { "epoch": 55.445914844649025, "grad_norm": 2.439321279525757, "learning_rate": 0.0008910817031070196, "loss": 0.4954, "step": 192730 }, { "epoch": 55.4487917146145, "grad_norm": 1.9764395952224731, "learning_rate": 0.0008910241657077101, "loss": 0.4873, "step": 192740 }, { "epoch": 55.45166858457998, "grad_norm": 1.1972342729568481, "learning_rate": 0.0008909666283084005, "loss": 0.5016, "step": 192750 }, { "epoch": 55.45454545454545, "grad_norm": 0.92010498046875, "learning_rate": 0.0008909090909090909, "loss": 0.6009, "step": 192760 }, { "epoch": 55.45742232451093, "grad_norm": 1.1110970973968506, "learning_rate": 0.0008908515535097814, "loss": 0.4487, "step": 192770 }, { "epoch": 55.46029919447641, "grad_norm": 1.0734354257583618, "learning_rate": 0.0008907940161104719, "loss": 0.5503, "step": 192780 }, { "epoch": 55.46317606444189, "grad_norm": 1.0658196210861206, "learning_rate": 0.0008907364787111622, "loss": 0.5374, "step": 192790 }, { "epoch": 55.466052934407365, "grad_norm": 1.3840187788009644, "learning_rate": 0.0008906789413118527, "loss": 0.4653, "step": 192800 }, { "epoch": 55.46892980437284, "grad_norm": 1.5031088590621948, "learning_rate": 0.0008906214039125432, "loss": 0.5446, "step": 192810 }, { "epoch": 55.47180667433832, "grad_norm": 0.7186896204948425, "learning_rate": 0.0008905638665132336, "loss": 0.5923, "step": 192820 }, { "epoch": 55.4746835443038, "grad_norm": 1.2341395616531372, "learning_rate": 0.000890506329113924, "loss": 0.5109, "step": 192830 }, { "epoch": 55.47756041426928, "grad_norm": 1.6784758567810059, "learning_rate": 0.0008904487917146146, "loss": 0.5183, "step": 192840 }, { "epoch": 55.48043728423475, "grad_norm": 1.220970630645752, "learning_rate": 0.000890391254315305, "loss": 0.4605, "step": 192850 }, { "epoch": 55.48331415420023, "grad_norm": 3.28434419631958, "learning_rate": 0.0008903337169159954, "loss": 0.507, "step": 192860 }, { "epoch": 55.486191024165706, "grad_norm": 1.1176198720932007, "learning_rate": 0.0008902761795166859, "loss": 0.474, "step": 192870 }, { "epoch": 55.48906789413118, "grad_norm": 0.9422771334648132, "learning_rate": 0.0008902186421173763, "loss": 0.4176, "step": 192880 }, { "epoch": 55.491944764096665, "grad_norm": 1.045433759689331, "learning_rate": 0.0008901611047180668, "loss": 0.4566, "step": 192890 }, { "epoch": 55.49482163406214, "grad_norm": 1.3272086381912231, "learning_rate": 0.0008901035673187572, "loss": 0.4191, "step": 192900 }, { "epoch": 55.49769850402762, "grad_norm": 1.9976751804351807, "learning_rate": 0.0008900460299194476, "loss": 0.4497, "step": 192910 }, { "epoch": 55.500575373993094, "grad_norm": 0.8675887584686279, "learning_rate": 0.0008899884925201381, "loss": 0.4579, "step": 192920 }, { "epoch": 55.50345224395857, "grad_norm": 1.4439129829406738, "learning_rate": 0.0008899309551208286, "loss": 0.5062, "step": 192930 }, { "epoch": 55.50632911392405, "grad_norm": 0.87398761510849, "learning_rate": 0.000889873417721519, "loss": 0.5141, "step": 192940 }, { "epoch": 55.50920598388953, "grad_norm": 1.9126332998275757, "learning_rate": 0.0008898158803222095, "loss": 0.4129, "step": 192950 }, { "epoch": 55.512082853855006, "grad_norm": 1.1307681798934937, "learning_rate": 0.0008897583429229, "loss": 0.4057, "step": 192960 }, { "epoch": 55.51495972382048, "grad_norm": 0.814534604549408, "learning_rate": 0.0008897008055235903, "loss": 0.4543, "step": 192970 }, { "epoch": 55.51783659378596, "grad_norm": 1.982623815536499, "learning_rate": 0.0008896432681242808, "loss": 0.6021, "step": 192980 }, { "epoch": 55.52071346375144, "grad_norm": 1.4997435808181763, "learning_rate": 0.0008895857307249713, "loss": 0.4525, "step": 192990 }, { "epoch": 55.52359033371692, "grad_norm": 1.3717594146728516, "learning_rate": 0.0008895281933256617, "loss": 0.4713, "step": 193000 }, { "epoch": 55.526467203682394, "grad_norm": 0.7868111729621887, "learning_rate": 0.0008894706559263521, "loss": 0.5295, "step": 193010 }, { "epoch": 55.52934407364787, "grad_norm": 1.5367485284805298, "learning_rate": 0.0008894131185270426, "loss": 0.4343, "step": 193020 }, { "epoch": 55.532220943613346, "grad_norm": 1.1979057788848877, "learning_rate": 0.000889355581127733, "loss": 0.489, "step": 193030 }, { "epoch": 55.53509781357883, "grad_norm": 0.9871022701263428, "learning_rate": 0.0008892980437284235, "loss": 0.4531, "step": 193040 }, { "epoch": 55.537974683544306, "grad_norm": 1.595694899559021, "learning_rate": 0.0008892405063291139, "loss": 0.5707, "step": 193050 }, { "epoch": 55.54085155350978, "grad_norm": 2.016197681427002, "learning_rate": 0.0008891829689298044, "loss": 0.5136, "step": 193060 }, { "epoch": 55.54372842347526, "grad_norm": 1.1961779594421387, "learning_rate": 0.0008891254315304949, "loss": 0.5624, "step": 193070 }, { "epoch": 55.546605293440734, "grad_norm": 1.511867642402649, "learning_rate": 0.0008890678941311852, "loss": 0.5414, "step": 193080 }, { "epoch": 55.54948216340621, "grad_norm": 0.7930490970611572, "learning_rate": 0.0008890103567318757, "loss": 0.4499, "step": 193090 }, { "epoch": 55.552359033371694, "grad_norm": 1.464762806892395, "learning_rate": 0.0008889528193325662, "loss": 0.5828, "step": 193100 }, { "epoch": 55.55523590333717, "grad_norm": 1.2979224920272827, "learning_rate": 0.0008888952819332566, "loss": 0.7107, "step": 193110 }, { "epoch": 55.558112773302646, "grad_norm": 1.4122207164764404, "learning_rate": 0.000888837744533947, "loss": 0.5461, "step": 193120 }, { "epoch": 55.56098964326812, "grad_norm": 0.9470601677894592, "learning_rate": 0.0008887802071346376, "loss": 0.4908, "step": 193130 }, { "epoch": 55.5638665132336, "grad_norm": 2.0042567253112793, "learning_rate": 0.000888722669735328, "loss": 0.5084, "step": 193140 }, { "epoch": 55.56674338319908, "grad_norm": 1.9357857704162598, "learning_rate": 0.0008886651323360184, "loss": 0.5529, "step": 193150 }, { "epoch": 55.56962025316456, "grad_norm": 0.9810435771942139, "learning_rate": 0.0008886075949367089, "loss": 0.4674, "step": 193160 }, { "epoch": 55.572497123130034, "grad_norm": 0.8961494565010071, "learning_rate": 0.0008885500575373993, "loss": 0.6678, "step": 193170 }, { "epoch": 55.57537399309551, "grad_norm": 1.9943286180496216, "learning_rate": 0.0008884925201380898, "loss": 0.6179, "step": 193180 }, { "epoch": 55.57825086306099, "grad_norm": 0.6017313599586487, "learning_rate": 0.0008884349827387802, "loss": 0.5314, "step": 193190 }, { "epoch": 55.58112773302647, "grad_norm": 2.3143393993377686, "learning_rate": 0.0008883774453394706, "loss": 0.6194, "step": 193200 }, { "epoch": 55.584004602991946, "grad_norm": 1.2018840312957764, "learning_rate": 0.0008883199079401611, "loss": 0.5365, "step": 193210 }, { "epoch": 55.58688147295742, "grad_norm": 0.8376664519309998, "learning_rate": 0.0008882623705408516, "loss": 0.3905, "step": 193220 }, { "epoch": 55.5897583429229, "grad_norm": 1.1211779117584229, "learning_rate": 0.0008882048331415419, "loss": 0.4403, "step": 193230 }, { "epoch": 55.592635212888375, "grad_norm": 1.3852519989013672, "learning_rate": 0.0008881472957422325, "loss": 0.5757, "step": 193240 }, { "epoch": 55.59551208285386, "grad_norm": 2.023118495941162, "learning_rate": 0.000888089758342923, "loss": 0.4447, "step": 193250 }, { "epoch": 55.598388952819334, "grad_norm": 1.394174337387085, "learning_rate": 0.0008880322209436133, "loss": 0.4851, "step": 193260 }, { "epoch": 55.60126582278481, "grad_norm": 1.0034055709838867, "learning_rate": 0.0008879746835443038, "loss": 0.5207, "step": 193270 }, { "epoch": 55.60414269275029, "grad_norm": 2.0669150352478027, "learning_rate": 0.0008879171461449943, "loss": 0.4319, "step": 193280 }, { "epoch": 55.60701956271576, "grad_norm": 2.0676674842834473, "learning_rate": 0.0008878596087456847, "loss": 0.4612, "step": 193290 }, { "epoch": 55.60989643268124, "grad_norm": 1.0134508609771729, "learning_rate": 0.0008878020713463751, "loss": 0.516, "step": 193300 }, { "epoch": 55.61277330264672, "grad_norm": 0.8458591103553772, "learning_rate": 0.0008877445339470656, "loss": 0.5123, "step": 193310 }, { "epoch": 55.6156501726122, "grad_norm": 1.4290761947631836, "learning_rate": 0.000887686996547756, "loss": 0.5319, "step": 193320 }, { "epoch": 55.618527042577675, "grad_norm": 1.292722463607788, "learning_rate": 0.0008876294591484465, "loss": 0.5518, "step": 193330 }, { "epoch": 55.62140391254315, "grad_norm": 1.9897022247314453, "learning_rate": 0.000887571921749137, "loss": 0.53, "step": 193340 }, { "epoch": 55.62428078250863, "grad_norm": 1.7936069965362549, "learning_rate": 0.0008875143843498274, "loss": 0.6201, "step": 193350 }, { "epoch": 55.62715765247411, "grad_norm": 1.2163199186325073, "learning_rate": 0.0008874568469505179, "loss": 0.5087, "step": 193360 }, { "epoch": 55.63003452243959, "grad_norm": 0.5500385761260986, "learning_rate": 0.0008873993095512083, "loss": 0.5047, "step": 193370 }, { "epoch": 55.63291139240506, "grad_norm": 0.9563448429107666, "learning_rate": 0.0008873417721518987, "loss": 0.4326, "step": 193380 }, { "epoch": 55.63578826237054, "grad_norm": 1.8510762453079224, "learning_rate": 0.0008872842347525892, "loss": 0.5158, "step": 193390 }, { "epoch": 55.638665132336016, "grad_norm": 1.449378490447998, "learning_rate": 0.0008872266973532797, "loss": 0.4864, "step": 193400 }, { "epoch": 55.6415420023015, "grad_norm": 0.7117237448692322, "learning_rate": 0.00088716915995397, "loss": 0.5782, "step": 193410 }, { "epoch": 55.644418872266975, "grad_norm": 2.2392632961273193, "learning_rate": 0.0008871116225546606, "loss": 0.5709, "step": 193420 }, { "epoch": 55.64729574223245, "grad_norm": 1.0196948051452637, "learning_rate": 0.000887054085155351, "loss": 0.4686, "step": 193430 }, { "epoch": 55.65017261219793, "grad_norm": 1.0739963054656982, "learning_rate": 0.0008869965477560414, "loss": 0.4341, "step": 193440 }, { "epoch": 55.653049482163404, "grad_norm": 0.933933675289154, "learning_rate": 0.0008869390103567319, "loss": 0.4626, "step": 193450 }, { "epoch": 55.65592635212889, "grad_norm": 1.7387592792510986, "learning_rate": 0.0008868814729574224, "loss": 0.5954, "step": 193460 }, { "epoch": 55.65880322209436, "grad_norm": 1.4322625398635864, "learning_rate": 0.0008868239355581128, "loss": 0.6257, "step": 193470 }, { "epoch": 55.66168009205984, "grad_norm": 0.6667686104774475, "learning_rate": 0.0008867663981588032, "loss": 0.49, "step": 193480 }, { "epoch": 55.664556962025316, "grad_norm": 1.1844124794006348, "learning_rate": 0.0008867088607594937, "loss": 0.5395, "step": 193490 }, { "epoch": 55.66743383199079, "grad_norm": 1.5913833379745483, "learning_rate": 0.0008866513233601841, "loss": 0.6282, "step": 193500 }, { "epoch": 55.670310701956275, "grad_norm": 1.6944180727005005, "learning_rate": 0.0008865937859608746, "loss": 0.5055, "step": 193510 }, { "epoch": 55.67318757192175, "grad_norm": 0.9342077374458313, "learning_rate": 0.000886536248561565, "loss": 0.5188, "step": 193520 }, { "epoch": 55.67606444188723, "grad_norm": 1.6205573081970215, "learning_rate": 0.0008864787111622555, "loss": 0.6137, "step": 193530 }, { "epoch": 55.678941311852704, "grad_norm": 0.9735684990882874, "learning_rate": 0.000886421173762946, "loss": 0.4443, "step": 193540 }, { "epoch": 55.68181818181818, "grad_norm": 0.7708859443664551, "learning_rate": 0.0008863636363636364, "loss": 0.4565, "step": 193550 }, { "epoch": 55.684695051783656, "grad_norm": 0.9371477961540222, "learning_rate": 0.0008863060989643268, "loss": 0.5137, "step": 193560 }, { "epoch": 55.68757192174914, "grad_norm": 1.3235772848129272, "learning_rate": 0.0008862485615650173, "loss": 0.508, "step": 193570 }, { "epoch": 55.690448791714616, "grad_norm": 1.0724271535873413, "learning_rate": 0.0008861910241657078, "loss": 0.479, "step": 193580 }, { "epoch": 55.69332566168009, "grad_norm": 0.9313302636146545, "learning_rate": 0.0008861334867663981, "loss": 0.5299, "step": 193590 }, { "epoch": 55.69620253164557, "grad_norm": 1.1987746953964233, "learning_rate": 0.0008860759493670886, "loss": 0.567, "step": 193600 }, { "epoch": 55.699079401611044, "grad_norm": 1.0728638172149658, "learning_rate": 0.0008860184119677791, "loss": 0.5656, "step": 193610 }, { "epoch": 55.70195627157653, "grad_norm": 0.775148868560791, "learning_rate": 0.0008859608745684695, "loss": 0.6534, "step": 193620 }, { "epoch": 55.704833141542004, "grad_norm": 0.8256094455718994, "learning_rate": 0.00088590333716916, "loss": 0.4896, "step": 193630 }, { "epoch": 55.70771001150748, "grad_norm": 1.619657278060913, "learning_rate": 0.0008858457997698505, "loss": 0.5144, "step": 193640 }, { "epoch": 55.710586881472956, "grad_norm": 1.056335687637329, "learning_rate": 0.0008857882623705409, "loss": 0.4384, "step": 193650 }, { "epoch": 55.71346375143843, "grad_norm": 0.5488625764846802, "learning_rate": 0.0008857307249712313, "loss": 0.5504, "step": 193660 }, { "epoch": 55.716340621403916, "grad_norm": 1.793584942817688, "learning_rate": 0.0008856731875719218, "loss": 0.6324, "step": 193670 }, { "epoch": 55.71921749136939, "grad_norm": 1.5777596235275269, "learning_rate": 0.0008856156501726122, "loss": 0.493, "step": 193680 }, { "epoch": 55.72209436133487, "grad_norm": 0.8622596263885498, "learning_rate": 0.0008855581127733027, "loss": 0.6685, "step": 193690 }, { "epoch": 55.724971231300344, "grad_norm": 1.6945593357086182, "learning_rate": 0.0008855005753739931, "loss": 0.5729, "step": 193700 }, { "epoch": 55.72784810126582, "grad_norm": 1.381683349609375, "learning_rate": 0.0008854430379746835, "loss": 0.5371, "step": 193710 }, { "epoch": 55.730724971231304, "grad_norm": 1.8134063482284546, "learning_rate": 0.000885385500575374, "loss": 0.5963, "step": 193720 }, { "epoch": 55.73360184119678, "grad_norm": 1.7241286039352417, "learning_rate": 0.0008853279631760645, "loss": 0.4125, "step": 193730 }, { "epoch": 55.736478711162256, "grad_norm": 1.4434285163879395, "learning_rate": 0.0008852704257767548, "loss": 0.5443, "step": 193740 }, { "epoch": 55.73935558112773, "grad_norm": 1.059717059135437, "learning_rate": 0.0008852128883774454, "loss": 0.6361, "step": 193750 }, { "epoch": 55.74223245109321, "grad_norm": 0.9577580690383911, "learning_rate": 0.0008851553509781359, "loss": 0.5178, "step": 193760 }, { "epoch": 55.745109321058685, "grad_norm": 1.46918785572052, "learning_rate": 0.0008850978135788262, "loss": 0.4379, "step": 193770 }, { "epoch": 55.74798619102417, "grad_norm": 0.685317873954773, "learning_rate": 0.0008850402761795167, "loss": 0.5351, "step": 193780 }, { "epoch": 55.750863060989644, "grad_norm": 0.9412233829498291, "learning_rate": 0.0008849827387802072, "loss": 0.5165, "step": 193790 }, { "epoch": 55.75373993095512, "grad_norm": 1.205087661743164, "learning_rate": 0.0008849252013808976, "loss": 0.4593, "step": 193800 }, { "epoch": 55.7566168009206, "grad_norm": 1.7913155555725098, "learning_rate": 0.000884867663981588, "loss": 0.5778, "step": 193810 }, { "epoch": 55.75949367088607, "grad_norm": 1.8848803043365479, "learning_rate": 0.0008848101265822786, "loss": 0.4928, "step": 193820 }, { "epoch": 55.762370540851556, "grad_norm": 1.0033529996871948, "learning_rate": 0.000884752589182969, "loss": 0.4649, "step": 193830 }, { "epoch": 55.76524741081703, "grad_norm": 1.4515498876571655, "learning_rate": 0.0008846950517836594, "loss": 0.4803, "step": 193840 }, { "epoch": 55.76812428078251, "grad_norm": 1.5759167671203613, "learning_rate": 0.0008846375143843499, "loss": 0.5159, "step": 193850 }, { "epoch": 55.771001150747985, "grad_norm": 1.1523504257202148, "learning_rate": 0.0008845799769850403, "loss": 0.4474, "step": 193860 }, { "epoch": 55.77387802071346, "grad_norm": 1.3098036050796509, "learning_rate": 0.0008845224395857308, "loss": 0.5147, "step": 193870 }, { "epoch": 55.776754890678944, "grad_norm": 1.5610746145248413, "learning_rate": 0.0008844649021864211, "loss": 0.4648, "step": 193880 }, { "epoch": 55.77963176064442, "grad_norm": 1.3708207607269287, "learning_rate": 0.0008844073647871116, "loss": 0.4407, "step": 193890 }, { "epoch": 55.7825086306099, "grad_norm": 1.4368845224380493, "learning_rate": 0.0008843498273878021, "loss": 0.5877, "step": 193900 }, { "epoch": 55.78538550057537, "grad_norm": 0.9216896891593933, "learning_rate": 0.0008842922899884925, "loss": 0.5614, "step": 193910 }, { "epoch": 55.78826237054085, "grad_norm": 1.281102180480957, "learning_rate": 0.0008842347525891829, "loss": 0.4402, "step": 193920 }, { "epoch": 55.79113924050633, "grad_norm": 1.3255501985549927, "learning_rate": 0.0008841772151898735, "loss": 0.6463, "step": 193930 }, { "epoch": 55.79401611047181, "grad_norm": 0.62183678150177, "learning_rate": 0.0008841196777905639, "loss": 0.4819, "step": 193940 }, { "epoch": 55.796892980437285, "grad_norm": 1.838330626487732, "learning_rate": 0.0008840621403912543, "loss": 0.5337, "step": 193950 }, { "epoch": 55.79976985040276, "grad_norm": 0.9634010195732117, "learning_rate": 0.0008840046029919448, "loss": 0.4194, "step": 193960 }, { "epoch": 55.80264672036824, "grad_norm": 1.0704208612442017, "learning_rate": 0.0008839470655926352, "loss": 0.4372, "step": 193970 }, { "epoch": 55.80552359033371, "grad_norm": 0.7241243124008179, "learning_rate": 0.0008838895281933257, "loss": 0.612, "step": 193980 }, { "epoch": 55.8084004602992, "grad_norm": 0.9693845510482788, "learning_rate": 0.0008838319907940161, "loss": 0.449, "step": 193990 }, { "epoch": 55.81127733026467, "grad_norm": 1.4573383331298828, "learning_rate": 0.0008837744533947065, "loss": 0.4963, "step": 194000 }, { "epoch": 55.81415420023015, "grad_norm": 0.5236177444458008, "learning_rate": 0.000883716915995397, "loss": 0.4906, "step": 194010 }, { "epoch": 55.817031070195625, "grad_norm": 1.6743532419204712, "learning_rate": 0.0008836593785960875, "loss": 0.5672, "step": 194020 }, { "epoch": 55.8199079401611, "grad_norm": 2.1222591400146484, "learning_rate": 0.0008836018411967778, "loss": 0.5558, "step": 194030 }, { "epoch": 55.822784810126585, "grad_norm": 1.4109920263290405, "learning_rate": 0.0008835443037974684, "loss": 0.5061, "step": 194040 }, { "epoch": 55.82566168009206, "grad_norm": 1.4935730695724487, "learning_rate": 0.0008834867663981589, "loss": 0.6219, "step": 194050 }, { "epoch": 55.82853855005754, "grad_norm": 1.1661800146102905, "learning_rate": 0.0008834292289988492, "loss": 0.5465, "step": 194060 }, { "epoch": 55.83141542002301, "grad_norm": 1.8859469890594482, "learning_rate": 0.0008833716915995397, "loss": 0.4616, "step": 194070 }, { "epoch": 55.83429228998849, "grad_norm": 1.4321564435958862, "learning_rate": 0.0008833141542002302, "loss": 0.5815, "step": 194080 }, { "epoch": 55.83716915995397, "grad_norm": 1.0551776885986328, "learning_rate": 0.0008832566168009206, "loss": 0.4707, "step": 194090 }, { "epoch": 55.84004602991945, "grad_norm": 1.222720980644226, "learning_rate": 0.000883199079401611, "loss": 0.4742, "step": 194100 }, { "epoch": 55.842922899884925, "grad_norm": 1.417242407798767, "learning_rate": 0.0008831415420023016, "loss": 0.4791, "step": 194110 }, { "epoch": 55.8457997698504, "grad_norm": 1.3514195680618286, "learning_rate": 0.0008830840046029919, "loss": 0.6829, "step": 194120 }, { "epoch": 55.84867663981588, "grad_norm": 0.7248212099075317, "learning_rate": 0.0008830264672036824, "loss": 0.4428, "step": 194130 }, { "epoch": 55.85155350978136, "grad_norm": 1.0552613735198975, "learning_rate": 0.0008829689298043729, "loss": 0.4717, "step": 194140 }, { "epoch": 55.85443037974684, "grad_norm": 1.385297417640686, "learning_rate": 0.0008829113924050633, "loss": 0.4953, "step": 194150 }, { "epoch": 55.85730724971231, "grad_norm": 1.2274267673492432, "learning_rate": 0.0008828538550057538, "loss": 0.5062, "step": 194160 }, { "epoch": 55.86018411967779, "grad_norm": 1.0942716598510742, "learning_rate": 0.0008827963176064442, "loss": 0.4788, "step": 194170 }, { "epoch": 55.863060989643266, "grad_norm": 0.8725908994674683, "learning_rate": 0.0008827387802071346, "loss": 0.507, "step": 194180 }, { "epoch": 55.86593785960875, "grad_norm": 1.3985741138458252, "learning_rate": 0.0008826812428078251, "loss": 0.6516, "step": 194190 }, { "epoch": 55.868814729574225, "grad_norm": 0.9359809160232544, "learning_rate": 0.0008826237054085156, "loss": 0.4995, "step": 194200 }, { "epoch": 55.8716915995397, "grad_norm": 1.283354640007019, "learning_rate": 0.0008825661680092059, "loss": 0.5346, "step": 194210 }, { "epoch": 55.87456846950518, "grad_norm": 0.9676039218902588, "learning_rate": 0.0008825086306098965, "loss": 0.5565, "step": 194220 }, { "epoch": 55.877445339470654, "grad_norm": 1.7048523426055908, "learning_rate": 0.000882451093210587, "loss": 0.5241, "step": 194230 }, { "epoch": 55.88032220943613, "grad_norm": 1.0749770402908325, "learning_rate": 0.0008823935558112773, "loss": 0.6227, "step": 194240 }, { "epoch": 55.883199079401614, "grad_norm": 1.5926810503005981, "learning_rate": 0.0008823360184119678, "loss": 0.5298, "step": 194250 }, { "epoch": 55.88607594936709, "grad_norm": 1.4864451885223389, "learning_rate": 0.0008822784810126583, "loss": 0.4734, "step": 194260 }, { "epoch": 55.888952819332566, "grad_norm": 1.3741472959518433, "learning_rate": 0.0008822209436133487, "loss": 0.4971, "step": 194270 }, { "epoch": 55.89182968929804, "grad_norm": 1.2559961080551147, "learning_rate": 0.0008821634062140391, "loss": 0.6261, "step": 194280 }, { "epoch": 55.89470655926352, "grad_norm": 1.4154609441757202, "learning_rate": 0.0008821058688147296, "loss": 0.5733, "step": 194290 }, { "epoch": 55.897583429229, "grad_norm": 1.2646479606628418, "learning_rate": 0.00088204833141542, "loss": 0.4088, "step": 194300 }, { "epoch": 55.90046029919448, "grad_norm": 0.6452280879020691, "learning_rate": 0.0008819907940161105, "loss": 0.5379, "step": 194310 }, { "epoch": 55.903337169159954, "grad_norm": 0.9814216494560242, "learning_rate": 0.0008819332566168009, "loss": 0.5593, "step": 194320 }, { "epoch": 55.90621403912543, "grad_norm": 1.3891676664352417, "learning_rate": 0.0008818757192174914, "loss": 0.5606, "step": 194330 }, { "epoch": 55.90909090909091, "grad_norm": 2.0103137493133545, "learning_rate": 0.0008818181818181819, "loss": 0.4537, "step": 194340 }, { "epoch": 55.91196777905639, "grad_norm": 1.3381283283233643, "learning_rate": 0.0008817606444188723, "loss": 0.4776, "step": 194350 }, { "epoch": 55.914844649021866, "grad_norm": 1.1206390857696533, "learning_rate": 0.0008817031070195627, "loss": 0.5504, "step": 194360 }, { "epoch": 55.91772151898734, "grad_norm": 1.6687976121902466, "learning_rate": 0.0008816455696202532, "loss": 0.4831, "step": 194370 }, { "epoch": 55.92059838895282, "grad_norm": 2.3534679412841797, "learning_rate": 0.0008815880322209437, "loss": 0.4791, "step": 194380 }, { "epoch": 55.923475258918295, "grad_norm": 1.509159803390503, "learning_rate": 0.000881530494821634, "loss": 0.5119, "step": 194390 }, { "epoch": 55.92635212888378, "grad_norm": 1.8850407600402832, "learning_rate": 0.0008814729574223246, "loss": 0.497, "step": 194400 }, { "epoch": 55.929228998849254, "grad_norm": 0.8664026856422424, "learning_rate": 0.000881415420023015, "loss": 0.5219, "step": 194410 }, { "epoch": 55.93210586881473, "grad_norm": 1.1446341276168823, "learning_rate": 0.0008813578826237054, "loss": 0.474, "step": 194420 }, { "epoch": 55.93498273878021, "grad_norm": 0.7419944405555725, "learning_rate": 0.0008813003452243958, "loss": 0.494, "step": 194430 }, { "epoch": 55.93785960874568, "grad_norm": 1.3173315525054932, "learning_rate": 0.0008812428078250864, "loss": 0.537, "step": 194440 }, { "epoch": 55.94073647871116, "grad_norm": 0.8193073272705078, "learning_rate": 0.0008811852704257768, "loss": 0.5312, "step": 194450 }, { "epoch": 55.94361334867664, "grad_norm": 1.2984836101531982, "learning_rate": 0.0008811277330264672, "loss": 0.48, "step": 194460 }, { "epoch": 55.94649021864212, "grad_norm": 0.8064628839492798, "learning_rate": 0.0008810701956271577, "loss": 0.4347, "step": 194470 }, { "epoch": 55.949367088607595, "grad_norm": 1.2250709533691406, "learning_rate": 0.0008810126582278481, "loss": 0.4996, "step": 194480 }, { "epoch": 55.95224395857307, "grad_norm": 1.984344482421875, "learning_rate": 0.0008809551208285386, "loss": 0.5454, "step": 194490 }, { "epoch": 55.95512082853855, "grad_norm": 1.7876168489456177, "learning_rate": 0.000880897583429229, "loss": 0.5642, "step": 194500 }, { "epoch": 55.95799769850403, "grad_norm": 1.0002353191375732, "learning_rate": 0.0008808400460299195, "loss": 0.6034, "step": 194510 }, { "epoch": 55.96087456846951, "grad_norm": 1.1196397542953491, "learning_rate": 0.0008807825086306099, "loss": 0.5135, "step": 194520 }, { "epoch": 55.96375143843498, "grad_norm": 1.480279564857483, "learning_rate": 0.0008807249712313004, "loss": 0.3811, "step": 194530 }, { "epoch": 55.96662830840046, "grad_norm": 0.9986647963523865, "learning_rate": 0.0008806674338319907, "loss": 0.517, "step": 194540 }, { "epoch": 55.969505178365935, "grad_norm": 1.4512484073638916, "learning_rate": 0.0008806098964326813, "loss": 0.4819, "step": 194550 }, { "epoch": 55.97238204833142, "grad_norm": 1.0963871479034424, "learning_rate": 0.0008805523590333718, "loss": 0.4373, "step": 194560 }, { "epoch": 55.975258918296895, "grad_norm": 1.8704626560211182, "learning_rate": 0.0008804948216340621, "loss": 0.4964, "step": 194570 }, { "epoch": 55.97813578826237, "grad_norm": 1.1313406229019165, "learning_rate": 0.0008804372842347526, "loss": 0.5603, "step": 194580 }, { "epoch": 55.98101265822785, "grad_norm": 1.5121915340423584, "learning_rate": 0.0008803797468354431, "loss": 0.4894, "step": 194590 }, { "epoch": 55.98388952819332, "grad_norm": 1.703971028327942, "learning_rate": 0.0008803222094361335, "loss": 0.4603, "step": 194600 }, { "epoch": 55.98676639815881, "grad_norm": 0.8503710627555847, "learning_rate": 0.0008802646720368239, "loss": 0.459, "step": 194610 }, { "epoch": 55.98964326812428, "grad_norm": 0.765723466873169, "learning_rate": 0.0008802071346375145, "loss": 0.6003, "step": 194620 }, { "epoch": 55.99252013808976, "grad_norm": 0.709006667137146, "learning_rate": 0.0008801495972382048, "loss": 0.5473, "step": 194630 }, { "epoch": 55.995397008055235, "grad_norm": 1.1980997323989868, "learning_rate": 0.0008800920598388953, "loss": 0.4189, "step": 194640 }, { "epoch": 55.99827387802071, "grad_norm": 1.05067777633667, "learning_rate": 0.0008800345224395858, "loss": 0.48, "step": 194650 }, { "epoch": 56.00115074798619, "grad_norm": 1.366196632385254, "learning_rate": 0.0008799769850402762, "loss": 0.6097, "step": 194660 }, { "epoch": 56.00402761795167, "grad_norm": 0.6320409178733826, "learning_rate": 0.0008799194476409667, "loss": 0.4344, "step": 194670 }, { "epoch": 56.00690448791715, "grad_norm": 0.9080954194068909, "learning_rate": 0.0008798619102416571, "loss": 0.456, "step": 194680 }, { "epoch": 56.00978135788262, "grad_norm": 0.6551455855369568, "learning_rate": 0.0008798043728423475, "loss": 0.4171, "step": 194690 }, { "epoch": 56.0126582278481, "grad_norm": 0.9458539485931396, "learning_rate": 0.000879746835443038, "loss": 0.3601, "step": 194700 }, { "epoch": 56.015535097813576, "grad_norm": 0.80989009141922, "learning_rate": 0.0008796892980437284, "loss": 0.4705, "step": 194710 }, { "epoch": 56.01841196777906, "grad_norm": 0.8966606855392456, "learning_rate": 0.0008796317606444188, "loss": 0.4849, "step": 194720 }, { "epoch": 56.021288837744535, "grad_norm": 0.6592791676521301, "learning_rate": 0.0008795742232451094, "loss": 0.3573, "step": 194730 }, { "epoch": 56.02416570771001, "grad_norm": 0.517180860042572, "learning_rate": 0.0008795166858457997, "loss": 0.4456, "step": 194740 }, { "epoch": 56.02704257767549, "grad_norm": 2.523366689682007, "learning_rate": 0.0008794591484464902, "loss": 0.5922, "step": 194750 }, { "epoch": 56.029919447640964, "grad_norm": 1.006513237953186, "learning_rate": 0.0008794016110471807, "loss": 0.5189, "step": 194760 }, { "epoch": 56.03279631760645, "grad_norm": 0.7377173900604248, "learning_rate": 0.0008793440736478711, "loss": 0.4488, "step": 194770 }, { "epoch": 56.03567318757192, "grad_norm": 1.348389744758606, "learning_rate": 0.0008792865362485616, "loss": 0.4421, "step": 194780 }, { "epoch": 56.0385500575374, "grad_norm": 1.3538646697998047, "learning_rate": 0.000879228998849252, "loss": 0.4397, "step": 194790 }, { "epoch": 56.041426927502876, "grad_norm": 1.4682857990264893, "learning_rate": 0.0008791714614499425, "loss": 0.4375, "step": 194800 }, { "epoch": 56.04430379746835, "grad_norm": 1.4402787685394287, "learning_rate": 0.0008791139240506329, "loss": 0.5102, "step": 194810 }, { "epoch": 56.047180667433835, "grad_norm": 1.8856074810028076, "learning_rate": 0.0008790563866513234, "loss": 0.5548, "step": 194820 }, { "epoch": 56.05005753739931, "grad_norm": 0.9399152398109436, "learning_rate": 0.0008789988492520137, "loss": 0.4424, "step": 194830 }, { "epoch": 56.05293440736479, "grad_norm": 2.6435461044311523, "learning_rate": 0.0008789413118527043, "loss": 0.6908, "step": 194840 }, { "epoch": 56.055811277330264, "grad_norm": 1.3750041723251343, "learning_rate": 0.0008788837744533948, "loss": 0.4269, "step": 194850 }, { "epoch": 56.05868814729574, "grad_norm": 1.260200023651123, "learning_rate": 0.0008788262370540851, "loss": 0.6007, "step": 194860 }, { "epoch": 56.061565017261216, "grad_norm": 1.4001387357711792, "learning_rate": 0.0008787686996547756, "loss": 0.4729, "step": 194870 }, { "epoch": 56.0644418872267, "grad_norm": 1.0770423412322998, "learning_rate": 0.0008787111622554661, "loss": 0.4681, "step": 194880 }, { "epoch": 56.067318757192176, "grad_norm": 1.2166924476623535, "learning_rate": 0.0008786536248561565, "loss": 0.4395, "step": 194890 }, { "epoch": 56.07019562715765, "grad_norm": 0.9673837423324585, "learning_rate": 0.0008785960874568469, "loss": 0.4808, "step": 194900 }, { "epoch": 56.07307249712313, "grad_norm": 0.8700651526451111, "learning_rate": 0.0008785385500575375, "loss": 0.4793, "step": 194910 }, { "epoch": 56.075949367088604, "grad_norm": 1.16818368434906, "learning_rate": 0.0008784810126582278, "loss": 0.4983, "step": 194920 }, { "epoch": 56.07882623705409, "grad_norm": 2.0167136192321777, "learning_rate": 0.0008784234752589183, "loss": 0.5537, "step": 194930 }, { "epoch": 56.081703107019564, "grad_norm": 1.206414818763733, "learning_rate": 0.0008783659378596088, "loss": 0.5168, "step": 194940 }, { "epoch": 56.08457997698504, "grad_norm": 1.2509468793869019, "learning_rate": 0.0008783084004602992, "loss": 0.5199, "step": 194950 }, { "epoch": 56.087456846950516, "grad_norm": 1.1165170669555664, "learning_rate": 0.0008782508630609897, "loss": 0.565, "step": 194960 }, { "epoch": 56.09033371691599, "grad_norm": 1.0812488794326782, "learning_rate": 0.0008781933256616801, "loss": 0.4949, "step": 194970 }, { "epoch": 56.093210586881476, "grad_norm": 2.4206995964050293, "learning_rate": 0.0008781357882623705, "loss": 0.5036, "step": 194980 }, { "epoch": 56.09608745684695, "grad_norm": 0.6941149234771729, "learning_rate": 0.000878078250863061, "loss": 0.485, "step": 194990 }, { "epoch": 56.09896432681243, "grad_norm": 0.8046701550483704, "learning_rate": 0.0008780207134637515, "loss": 0.5409, "step": 195000 }, { "epoch": 56.101841196777904, "grad_norm": 1.2245995998382568, "learning_rate": 0.0008779631760644418, "loss": 0.3617, "step": 195010 }, { "epoch": 56.10471806674338, "grad_norm": 1.4641194343566895, "learning_rate": 0.0008779056386651324, "loss": 0.3924, "step": 195020 }, { "epoch": 56.107594936708864, "grad_norm": 1.8164867162704468, "learning_rate": 0.0008778481012658229, "loss": 0.533, "step": 195030 }, { "epoch": 56.11047180667434, "grad_norm": 0.7988298535346985, "learning_rate": 0.0008777905638665132, "loss": 0.4145, "step": 195040 }, { "epoch": 56.113348676639816, "grad_norm": 1.252502202987671, "learning_rate": 0.0008777330264672037, "loss": 0.4048, "step": 195050 }, { "epoch": 56.11622554660529, "grad_norm": 1.4169377088546753, "learning_rate": 0.0008776754890678942, "loss": 0.5219, "step": 195060 }, { "epoch": 56.11910241657077, "grad_norm": 1.0331312417984009, "learning_rate": 0.0008776179516685846, "loss": 0.5201, "step": 195070 }, { "epoch": 56.121979286536245, "grad_norm": 0.6563588976860046, "learning_rate": 0.000877560414269275, "loss": 0.5703, "step": 195080 }, { "epoch": 56.12485615650173, "grad_norm": 1.799532413482666, "learning_rate": 0.0008775028768699656, "loss": 0.4449, "step": 195090 }, { "epoch": 56.127733026467205, "grad_norm": 1.2916628122329712, "learning_rate": 0.0008774453394706559, "loss": 0.4221, "step": 195100 }, { "epoch": 56.13060989643268, "grad_norm": 1.3472856283187866, "learning_rate": 0.0008773878020713464, "loss": 0.4792, "step": 195110 }, { "epoch": 56.13348676639816, "grad_norm": 0.7483385801315308, "learning_rate": 0.0008773302646720368, "loss": 0.4299, "step": 195120 }, { "epoch": 56.13636363636363, "grad_norm": 0.7405614256858826, "learning_rate": 0.0008772727272727273, "loss": 0.3831, "step": 195130 }, { "epoch": 56.139240506329116, "grad_norm": 0.5413944721221924, "learning_rate": 0.0008772151898734178, "loss": 0.4382, "step": 195140 }, { "epoch": 56.14211737629459, "grad_norm": 1.127234935760498, "learning_rate": 0.0008771576524741082, "loss": 0.4524, "step": 195150 }, { "epoch": 56.14499424626007, "grad_norm": 1.135651707649231, "learning_rate": 0.0008771001150747986, "loss": 0.4517, "step": 195160 }, { "epoch": 56.147871116225545, "grad_norm": 1.402282476425171, "learning_rate": 0.0008770425776754891, "loss": 0.4283, "step": 195170 }, { "epoch": 56.15074798619102, "grad_norm": 0.9787107706069946, "learning_rate": 0.0008769850402761796, "loss": 0.5254, "step": 195180 }, { "epoch": 56.153624856156505, "grad_norm": 1.279882550239563, "learning_rate": 0.0008769275028768699, "loss": 0.4911, "step": 195190 }, { "epoch": 56.15650172612198, "grad_norm": 1.6467150449752808, "learning_rate": 0.0008768699654775605, "loss": 0.4238, "step": 195200 }, { "epoch": 56.15937859608746, "grad_norm": 1.4839924573898315, "learning_rate": 0.0008768124280782509, "loss": 0.5709, "step": 195210 }, { "epoch": 56.16225546605293, "grad_norm": 1.3393577337265015, "learning_rate": 0.0008767548906789413, "loss": 0.5508, "step": 195220 }, { "epoch": 56.16513233601841, "grad_norm": 1.2194942235946655, "learning_rate": 0.0008766973532796317, "loss": 0.434, "step": 195230 }, { "epoch": 56.16800920598389, "grad_norm": 1.0915253162384033, "learning_rate": 0.0008766398158803223, "loss": 0.3563, "step": 195240 }, { "epoch": 56.17088607594937, "grad_norm": 1.5752358436584473, "learning_rate": 0.0008765822784810127, "loss": 0.452, "step": 195250 }, { "epoch": 56.173762945914845, "grad_norm": 1.0089856386184692, "learning_rate": 0.0008765247410817031, "loss": 0.3834, "step": 195260 }, { "epoch": 56.17663981588032, "grad_norm": 1.1335992813110352, "learning_rate": 0.0008764672036823936, "loss": 0.5485, "step": 195270 }, { "epoch": 56.1795166858458, "grad_norm": 1.3139184713363647, "learning_rate": 0.000876409666283084, "loss": 0.5468, "step": 195280 }, { "epoch": 56.18239355581128, "grad_norm": 1.370238184928894, "learning_rate": 0.0008763521288837745, "loss": 0.4611, "step": 195290 }, { "epoch": 56.18527042577676, "grad_norm": 1.6305744647979736, "learning_rate": 0.0008762945914844649, "loss": 0.6217, "step": 195300 }, { "epoch": 56.18814729574223, "grad_norm": 2.149383068084717, "learning_rate": 0.0008762370540851554, "loss": 0.5904, "step": 195310 }, { "epoch": 56.19102416570771, "grad_norm": 1.7064539194107056, "learning_rate": 0.0008761795166858458, "loss": 0.6774, "step": 195320 }, { "epoch": 56.193901035673186, "grad_norm": 1.9567053318023682, "learning_rate": 0.0008761219792865363, "loss": 0.4738, "step": 195330 }, { "epoch": 56.19677790563866, "grad_norm": 1.3746532201766968, "learning_rate": 0.0008760644418872266, "loss": 0.4983, "step": 195340 }, { "epoch": 56.199654775604145, "grad_norm": 1.6414158344268799, "learning_rate": 0.0008760069044879172, "loss": 0.4791, "step": 195350 }, { "epoch": 56.20253164556962, "grad_norm": 1.4230520725250244, "learning_rate": 0.0008759493670886077, "loss": 0.5524, "step": 195360 }, { "epoch": 56.2054085155351, "grad_norm": 1.343323826789856, "learning_rate": 0.000875891829689298, "loss": 0.4776, "step": 195370 }, { "epoch": 56.208285385500574, "grad_norm": 1.5749496221542358, "learning_rate": 0.0008758342922899886, "loss": 0.3861, "step": 195380 }, { "epoch": 56.21116225546605, "grad_norm": 1.9136549234390259, "learning_rate": 0.000875776754890679, "loss": 0.4807, "step": 195390 }, { "epoch": 56.21403912543153, "grad_norm": 2.1178791522979736, "learning_rate": 0.0008757192174913694, "loss": 0.6571, "step": 195400 }, { "epoch": 56.21691599539701, "grad_norm": 1.8206156492233276, "learning_rate": 0.0008756616800920598, "loss": 0.495, "step": 195410 }, { "epoch": 56.219792865362486, "grad_norm": 1.4748049974441528, "learning_rate": 0.0008756041426927504, "loss": 0.5001, "step": 195420 }, { "epoch": 56.22266973532796, "grad_norm": 0.7400747537612915, "learning_rate": 0.0008755466052934407, "loss": 0.4203, "step": 195430 }, { "epoch": 56.22554660529344, "grad_norm": 0.7006232142448425, "learning_rate": 0.0008754890678941312, "loss": 0.3672, "step": 195440 }, { "epoch": 56.22842347525892, "grad_norm": 1.534837007522583, "learning_rate": 0.0008754315304948217, "loss": 0.5723, "step": 195450 }, { "epoch": 56.2313003452244, "grad_norm": 1.2977919578552246, "learning_rate": 0.0008753739930955121, "loss": 0.47, "step": 195460 }, { "epoch": 56.234177215189874, "grad_norm": 1.3280913829803467, "learning_rate": 0.0008753164556962026, "loss": 0.5725, "step": 195470 }, { "epoch": 56.23705408515535, "grad_norm": 1.5279417037963867, "learning_rate": 0.000875258918296893, "loss": 0.5042, "step": 195480 }, { "epoch": 56.239930955120826, "grad_norm": 1.811699390411377, "learning_rate": 0.0008752013808975835, "loss": 0.4982, "step": 195490 }, { "epoch": 56.24280782508631, "grad_norm": 1.4362061023712158, "learning_rate": 0.0008751438434982739, "loss": 0.5119, "step": 195500 }, { "epoch": 56.245684695051786, "grad_norm": 1.8909837007522583, "learning_rate": 0.0008750863060989644, "loss": 0.4799, "step": 195510 }, { "epoch": 56.24856156501726, "grad_norm": 2.231199026107788, "learning_rate": 0.0008750287686996547, "loss": 0.4906, "step": 195520 }, { "epoch": 56.25143843498274, "grad_norm": 0.9319671988487244, "learning_rate": 0.0008749712313003453, "loss": 0.4712, "step": 195530 }, { "epoch": 56.254315304948214, "grad_norm": 1.3861100673675537, "learning_rate": 0.0008749136939010356, "loss": 0.4861, "step": 195540 }, { "epoch": 56.25719217491369, "grad_norm": 1.418491244316101, "learning_rate": 0.0008748561565017261, "loss": 0.438, "step": 195550 }, { "epoch": 56.260069044879174, "grad_norm": 0.9367448091506958, "learning_rate": 0.0008747986191024166, "loss": 0.423, "step": 195560 }, { "epoch": 56.26294591484465, "grad_norm": 1.0720210075378418, "learning_rate": 0.000874741081703107, "loss": 0.4201, "step": 195570 }, { "epoch": 56.265822784810126, "grad_norm": 1.4714336395263672, "learning_rate": 0.0008746835443037975, "loss": 0.5353, "step": 195580 }, { "epoch": 56.2686996547756, "grad_norm": 1.3209431171417236, "learning_rate": 0.0008746260069044879, "loss": 0.5243, "step": 195590 }, { "epoch": 56.27157652474108, "grad_norm": 2.2414145469665527, "learning_rate": 0.0008745684695051784, "loss": 0.5553, "step": 195600 }, { "epoch": 56.27445339470656, "grad_norm": 1.0664387941360474, "learning_rate": 0.0008745109321058688, "loss": 0.4633, "step": 195610 }, { "epoch": 56.27733026467204, "grad_norm": 0.9574835300445557, "learning_rate": 0.0008744533947065593, "loss": 0.4582, "step": 195620 }, { "epoch": 56.280207134637514, "grad_norm": 1.4863452911376953, "learning_rate": 0.0008743958573072496, "loss": 0.5078, "step": 195630 }, { "epoch": 56.28308400460299, "grad_norm": 2.197577714920044, "learning_rate": 0.0008743383199079402, "loss": 0.4174, "step": 195640 }, { "epoch": 56.28596087456847, "grad_norm": 1.1947702169418335, "learning_rate": 0.0008742807825086307, "loss": 0.445, "step": 195650 }, { "epoch": 56.28883774453395, "grad_norm": 2.502983808517456, "learning_rate": 0.000874223245109321, "loss": 0.5666, "step": 195660 }, { "epoch": 56.291714614499426, "grad_norm": 1.0550774335861206, "learning_rate": 0.0008741657077100115, "loss": 0.5522, "step": 195670 }, { "epoch": 56.2945914844649, "grad_norm": 1.0870674848556519, "learning_rate": 0.000874108170310702, "loss": 0.3995, "step": 195680 }, { "epoch": 56.29746835443038, "grad_norm": 0.794779896736145, "learning_rate": 0.0008740506329113924, "loss": 0.5344, "step": 195690 }, { "epoch": 56.300345224395855, "grad_norm": 1.2299888134002686, "learning_rate": 0.0008739930955120828, "loss": 0.4809, "step": 195700 }, { "epoch": 56.30322209436134, "grad_norm": 1.3994814157485962, "learning_rate": 0.0008739355581127734, "loss": 0.5182, "step": 195710 }, { "epoch": 56.306098964326814, "grad_norm": 2.638873338699341, "learning_rate": 0.0008738780207134637, "loss": 0.5195, "step": 195720 }, { "epoch": 56.30897583429229, "grad_norm": 1.2093983888626099, "learning_rate": 0.0008738204833141542, "loss": 0.3654, "step": 195730 }, { "epoch": 56.31185270425777, "grad_norm": 1.2868558168411255, "learning_rate": 0.0008737629459148446, "loss": 0.4791, "step": 195740 }, { "epoch": 56.31472957422324, "grad_norm": 1.1549738645553589, "learning_rate": 0.0008737054085155351, "loss": 0.507, "step": 195750 }, { "epoch": 56.31760644418872, "grad_norm": 1.3539842367172241, "learning_rate": 0.0008736478711162256, "loss": 0.5836, "step": 195760 }, { "epoch": 56.3204833141542, "grad_norm": 0.9958211183547974, "learning_rate": 0.000873590333716916, "loss": 0.3444, "step": 195770 }, { "epoch": 56.32336018411968, "grad_norm": 1.201119303703308, "learning_rate": 0.0008735327963176065, "loss": 0.4363, "step": 195780 }, { "epoch": 56.326237054085155, "grad_norm": 1.4580267667770386, "learning_rate": 0.0008734752589182969, "loss": 0.5854, "step": 195790 }, { "epoch": 56.32911392405063, "grad_norm": 1.5483802556991577, "learning_rate": 0.0008734177215189874, "loss": 0.484, "step": 195800 }, { "epoch": 56.33199079401611, "grad_norm": 0.782600998878479, "learning_rate": 0.0008733601841196777, "loss": 0.4013, "step": 195810 }, { "epoch": 56.33486766398159, "grad_norm": 1.304893136024475, "learning_rate": 0.0008733026467203683, "loss": 0.4299, "step": 195820 }, { "epoch": 56.33774453394707, "grad_norm": 1.184130311012268, "learning_rate": 0.0008732451093210587, "loss": 0.4577, "step": 195830 }, { "epoch": 56.34062140391254, "grad_norm": 1.9145702123641968, "learning_rate": 0.0008731875719217491, "loss": 0.4843, "step": 195840 }, { "epoch": 56.34349827387802, "grad_norm": 0.7054509520530701, "learning_rate": 0.0008731300345224396, "loss": 0.5564, "step": 195850 }, { "epoch": 56.346375143843495, "grad_norm": 1.16448175907135, "learning_rate": 0.0008730724971231301, "loss": 0.4481, "step": 195860 }, { "epoch": 56.34925201380898, "grad_norm": 1.0004676580429077, "learning_rate": 0.0008730149597238205, "loss": 0.4206, "step": 195870 }, { "epoch": 56.352128883774455, "grad_norm": 1.4517669677734375, "learning_rate": 0.0008729574223245109, "loss": 0.5362, "step": 195880 }, { "epoch": 56.35500575373993, "grad_norm": 1.317802906036377, "learning_rate": 0.0008728998849252015, "loss": 0.4405, "step": 195890 }, { "epoch": 56.35788262370541, "grad_norm": 1.8639214038848877, "learning_rate": 0.0008728423475258918, "loss": 0.4769, "step": 195900 }, { "epoch": 56.360759493670884, "grad_norm": 1.6545228958129883, "learning_rate": 0.0008727848101265823, "loss": 0.5221, "step": 195910 }, { "epoch": 56.36363636363637, "grad_norm": 1.4141924381256104, "learning_rate": 0.0008727272727272727, "loss": 0.5931, "step": 195920 }, { "epoch": 56.36651323360184, "grad_norm": 1.2648072242736816, "learning_rate": 0.0008726697353279632, "loss": 0.4801, "step": 195930 }, { "epoch": 56.36939010356732, "grad_norm": 2.3962297439575195, "learning_rate": 0.0008726121979286537, "loss": 0.5345, "step": 195940 }, { "epoch": 56.372266973532795, "grad_norm": 1.0650207996368408, "learning_rate": 0.0008725546605293441, "loss": 0.4433, "step": 195950 }, { "epoch": 56.37514384349827, "grad_norm": 2.2313804626464844, "learning_rate": 0.0008724971231300345, "loss": 0.5514, "step": 195960 }, { "epoch": 56.378020713463755, "grad_norm": 1.1026382446289062, "learning_rate": 0.000872439585730725, "loss": 0.4951, "step": 195970 }, { "epoch": 56.38089758342923, "grad_norm": 1.2658230066299438, "learning_rate": 0.0008723820483314155, "loss": 0.4721, "step": 195980 }, { "epoch": 56.38377445339471, "grad_norm": 1.0681532621383667, "learning_rate": 0.0008723245109321058, "loss": 0.5219, "step": 195990 }, { "epoch": 56.386651323360184, "grad_norm": 1.2644824981689453, "learning_rate": 0.0008722669735327964, "loss": 0.521, "step": 196000 }, { "epoch": 56.38952819332566, "grad_norm": 2.425931692123413, "learning_rate": 0.0008722094361334868, "loss": 0.6533, "step": 196010 }, { "epoch": 56.392405063291136, "grad_norm": 1.2742249965667725, "learning_rate": 0.0008721518987341772, "loss": 0.433, "step": 196020 }, { "epoch": 56.39528193325662, "grad_norm": 1.106771469116211, "learning_rate": 0.0008720943613348676, "loss": 0.6056, "step": 196030 }, { "epoch": 56.398158803222096, "grad_norm": 2.396726131439209, "learning_rate": 0.0008720368239355582, "loss": 0.5327, "step": 196040 }, { "epoch": 56.40103567318757, "grad_norm": 2.0136170387268066, "learning_rate": 0.0008719792865362486, "loss": 0.5459, "step": 196050 }, { "epoch": 56.40391254315305, "grad_norm": 0.7047438025474548, "learning_rate": 0.000871921749136939, "loss": 0.4541, "step": 196060 }, { "epoch": 56.406789413118524, "grad_norm": 1.3053293228149414, "learning_rate": 0.0008718642117376296, "loss": 0.6227, "step": 196070 }, { "epoch": 56.40966628308401, "grad_norm": 1.1661313772201538, "learning_rate": 0.0008718066743383199, "loss": 0.4853, "step": 196080 }, { "epoch": 56.412543153049484, "grad_norm": 1.703932523727417, "learning_rate": 0.0008717491369390104, "loss": 0.4843, "step": 196090 }, { "epoch": 56.41542002301496, "grad_norm": 0.6657625436782837, "learning_rate": 0.0008716915995397008, "loss": 0.6319, "step": 196100 }, { "epoch": 56.418296892980436, "grad_norm": 0.8296352028846741, "learning_rate": 0.0008716340621403913, "loss": 0.4098, "step": 196110 }, { "epoch": 56.42117376294591, "grad_norm": 0.5996424555778503, "learning_rate": 0.0008715765247410817, "loss": 0.3847, "step": 196120 }, { "epoch": 56.424050632911396, "grad_norm": 1.8057008981704712, "learning_rate": 0.0008715189873417722, "loss": 0.5628, "step": 196130 }, { "epoch": 56.42692750287687, "grad_norm": 1.191908597946167, "learning_rate": 0.0008714614499424625, "loss": 0.4723, "step": 196140 }, { "epoch": 56.42980437284235, "grad_norm": 0.732341468334198, "learning_rate": 0.0008714039125431531, "loss": 0.6232, "step": 196150 }, { "epoch": 56.432681242807824, "grad_norm": 1.0324019193649292, "learning_rate": 0.0008713463751438436, "loss": 0.5299, "step": 196160 }, { "epoch": 56.4355581127733, "grad_norm": 1.4806842803955078, "learning_rate": 0.0008712888377445339, "loss": 0.5124, "step": 196170 }, { "epoch": 56.438434982738784, "grad_norm": 0.9456686973571777, "learning_rate": 0.0008712313003452245, "loss": 0.5219, "step": 196180 }, { "epoch": 56.44131185270426, "grad_norm": 0.6890031099319458, "learning_rate": 0.0008711737629459149, "loss": 0.4894, "step": 196190 }, { "epoch": 56.444188722669736, "grad_norm": 1.213343858718872, "learning_rate": 0.0008711162255466053, "loss": 0.5471, "step": 196200 }, { "epoch": 56.44706559263521, "grad_norm": 0.9381269812583923, "learning_rate": 0.0008710586881472957, "loss": 0.6142, "step": 196210 }, { "epoch": 56.44994246260069, "grad_norm": 1.327299952507019, "learning_rate": 0.0008710011507479863, "loss": 0.4951, "step": 196220 }, { "epoch": 56.452819332566165, "grad_norm": 1.165381908416748, "learning_rate": 0.0008709436133486766, "loss": 0.4058, "step": 196230 }, { "epoch": 56.45569620253165, "grad_norm": 0.693823516368866, "learning_rate": 0.0008708860759493671, "loss": 0.456, "step": 196240 }, { "epoch": 56.458573072497124, "grad_norm": 1.2098876237869263, "learning_rate": 0.0008708285385500576, "loss": 0.5068, "step": 196250 }, { "epoch": 56.4614499424626, "grad_norm": 1.1196999549865723, "learning_rate": 0.000870771001150748, "loss": 0.5657, "step": 196260 }, { "epoch": 56.46432681242808, "grad_norm": 1.3705010414123535, "learning_rate": 0.0008707134637514385, "loss": 0.53, "step": 196270 }, { "epoch": 56.46720368239355, "grad_norm": 1.6763545274734497, "learning_rate": 0.0008706559263521289, "loss": 0.5764, "step": 196280 }, { "epoch": 56.470080552359036, "grad_norm": 1.5019152164459229, "learning_rate": 0.0008705983889528194, "loss": 0.4846, "step": 196290 }, { "epoch": 56.47295742232451, "grad_norm": 1.614011526107788, "learning_rate": 0.0008705408515535098, "loss": 0.4788, "step": 196300 }, { "epoch": 56.47583429228999, "grad_norm": 1.3262434005737305, "learning_rate": 0.0008704833141542003, "loss": 0.5297, "step": 196310 }, { "epoch": 56.478711162255465, "grad_norm": 1.384882926940918, "learning_rate": 0.0008704257767548906, "loss": 0.5807, "step": 196320 }, { "epoch": 56.48158803222094, "grad_norm": 0.7706450819969177, "learning_rate": 0.0008703682393555812, "loss": 0.4737, "step": 196330 }, { "epoch": 56.484464902186424, "grad_norm": 1.6374379396438599, "learning_rate": 0.0008703107019562717, "loss": 0.4936, "step": 196340 }, { "epoch": 56.4873417721519, "grad_norm": 1.599252462387085, "learning_rate": 0.000870253164556962, "loss": 0.494, "step": 196350 }, { "epoch": 56.49021864211738, "grad_norm": 0.8824124336242676, "learning_rate": 0.0008701956271576526, "loss": 0.4539, "step": 196360 }, { "epoch": 56.49309551208285, "grad_norm": 0.8898214101791382, "learning_rate": 0.0008701380897583429, "loss": 0.5944, "step": 196370 }, { "epoch": 56.49597238204833, "grad_norm": 0.6554699540138245, "learning_rate": 0.0008700805523590334, "loss": 0.55, "step": 196380 }, { "epoch": 56.49884925201381, "grad_norm": 1.61135995388031, "learning_rate": 0.0008700230149597238, "loss": 0.5503, "step": 196390 }, { "epoch": 56.50172612197929, "grad_norm": 1.8222018480300903, "learning_rate": 0.0008699654775604143, "loss": 0.4488, "step": 196400 }, { "epoch": 56.504602991944765, "grad_norm": 0.9884044528007507, "learning_rate": 0.0008699079401611047, "loss": 0.5015, "step": 196410 }, { "epoch": 56.50747986191024, "grad_norm": 1.2249332666397095, "learning_rate": 0.0008698504027617952, "loss": 0.5293, "step": 196420 }, { "epoch": 56.51035673187572, "grad_norm": 2.0826797485351562, "learning_rate": 0.0008697928653624855, "loss": 0.6063, "step": 196430 }, { "epoch": 56.51323360184119, "grad_norm": 1.0057467222213745, "learning_rate": 0.0008697353279631761, "loss": 0.433, "step": 196440 }, { "epoch": 56.51611047180668, "grad_norm": 0.8011317253112793, "learning_rate": 0.0008696777905638666, "loss": 0.4723, "step": 196450 }, { "epoch": 56.51898734177215, "grad_norm": 1.4409946203231812, "learning_rate": 0.0008696202531645569, "loss": 0.5022, "step": 196460 }, { "epoch": 56.52186421173763, "grad_norm": 0.8530933260917664, "learning_rate": 0.0008695627157652475, "loss": 0.6191, "step": 196470 }, { "epoch": 56.524741081703105, "grad_norm": 1.0664074420928955, "learning_rate": 0.0008695051783659379, "loss": 0.4172, "step": 196480 }, { "epoch": 56.52761795166858, "grad_norm": 0.7632474899291992, "learning_rate": 0.0008694476409666283, "loss": 0.5325, "step": 196490 }, { "epoch": 56.530494821634065, "grad_norm": 1.926735758781433, "learning_rate": 0.0008693901035673187, "loss": 0.417, "step": 196500 }, { "epoch": 56.53337169159954, "grad_norm": 2.319530487060547, "learning_rate": 0.0008693325661680093, "loss": 0.6341, "step": 196510 }, { "epoch": 56.53624856156502, "grad_norm": 1.1238205432891846, "learning_rate": 0.0008692750287686996, "loss": 0.4833, "step": 196520 }, { "epoch": 56.53912543153049, "grad_norm": 1.2253289222717285, "learning_rate": 0.0008692174913693901, "loss": 0.4853, "step": 196530 }, { "epoch": 56.54200230149597, "grad_norm": 1.033127784729004, "learning_rate": 0.0008691599539700805, "loss": 0.4195, "step": 196540 }, { "epoch": 56.54487917146145, "grad_norm": 1.131737470626831, "learning_rate": 0.000869102416570771, "loss": 0.6203, "step": 196550 }, { "epoch": 56.54775604142693, "grad_norm": 1.3811085224151611, "learning_rate": 0.0008690448791714615, "loss": 0.4407, "step": 196560 }, { "epoch": 56.550632911392405, "grad_norm": 1.2664000988006592, "learning_rate": 0.0008689873417721519, "loss": 0.5355, "step": 196570 }, { "epoch": 56.55350978135788, "grad_norm": 1.6154165267944336, "learning_rate": 0.0008689298043728424, "loss": 0.5577, "step": 196580 }, { "epoch": 56.55638665132336, "grad_norm": 1.0565447807312012, "learning_rate": 0.0008688722669735328, "loss": 0.5372, "step": 196590 }, { "epoch": 56.55926352128884, "grad_norm": 1.5222831964492798, "learning_rate": 0.0008688147295742233, "loss": 0.5742, "step": 196600 }, { "epoch": 56.56214039125432, "grad_norm": 1.228479266166687, "learning_rate": 0.0008687571921749136, "loss": 0.5298, "step": 196610 }, { "epoch": 56.56501726121979, "grad_norm": 0.8074541687965393, "learning_rate": 0.0008686996547756042, "loss": 0.5394, "step": 196620 }, { "epoch": 56.56789413118527, "grad_norm": 2.160407066345215, "learning_rate": 0.0008686421173762946, "loss": 0.5006, "step": 196630 }, { "epoch": 56.570771001150746, "grad_norm": 0.7286128997802734, "learning_rate": 0.000868584579976985, "loss": 0.4328, "step": 196640 }, { "epoch": 56.57364787111622, "grad_norm": 1.5739418268203735, "learning_rate": 0.0008685270425776754, "loss": 0.4961, "step": 196650 }, { "epoch": 56.576524741081705, "grad_norm": 1.4116421937942505, "learning_rate": 0.000868469505178366, "loss": 0.5503, "step": 196660 }, { "epoch": 56.57940161104718, "grad_norm": 2.288475751876831, "learning_rate": 0.0008684119677790564, "loss": 0.5231, "step": 196670 }, { "epoch": 56.58227848101266, "grad_norm": 1.5570485591888428, "learning_rate": 0.0008683544303797468, "loss": 0.5093, "step": 196680 }, { "epoch": 56.585155350978134, "grad_norm": 0.7082319855690002, "learning_rate": 0.0008682968929804374, "loss": 0.6416, "step": 196690 }, { "epoch": 56.58803222094361, "grad_norm": 1.4275051355361938, "learning_rate": 0.0008682393555811277, "loss": 0.4467, "step": 196700 }, { "epoch": 56.59090909090909, "grad_norm": 1.0939061641693115, "learning_rate": 0.0008681818181818182, "loss": 0.5873, "step": 196710 }, { "epoch": 56.59378596087457, "grad_norm": 1.9736799001693726, "learning_rate": 0.0008681242807825086, "loss": 0.5574, "step": 196720 }, { "epoch": 56.596662830840046, "grad_norm": 1.4602882862091064, "learning_rate": 0.0008680667433831991, "loss": 0.508, "step": 196730 }, { "epoch": 56.59953970080552, "grad_norm": 2.2116963863372803, "learning_rate": 0.0008680092059838895, "loss": 0.4895, "step": 196740 }, { "epoch": 56.602416570771, "grad_norm": 2.4582607746124268, "learning_rate": 0.00086795166858458, "loss": 0.6549, "step": 196750 }, { "epoch": 56.60529344073648, "grad_norm": 1.9525843858718872, "learning_rate": 0.0008678941311852705, "loss": 0.5182, "step": 196760 }, { "epoch": 56.60817031070196, "grad_norm": 0.7058462500572205, "learning_rate": 0.0008678365937859609, "loss": 0.5165, "step": 196770 }, { "epoch": 56.611047180667434, "grad_norm": 1.5865323543548584, "learning_rate": 0.0008677790563866514, "loss": 0.4325, "step": 196780 }, { "epoch": 56.61392405063291, "grad_norm": 2.1966421604156494, "learning_rate": 0.0008677215189873417, "loss": 0.522, "step": 196790 }, { "epoch": 56.616800920598386, "grad_norm": 2.203847885131836, "learning_rate": 0.0008676639815880323, "loss": 0.504, "step": 196800 }, { "epoch": 56.61967779056387, "grad_norm": 0.8583868145942688, "learning_rate": 0.0008676064441887227, "loss": 0.5052, "step": 196810 }, { "epoch": 56.622554660529346, "grad_norm": 1.3452441692352295, "learning_rate": 0.0008675489067894131, "loss": 0.5882, "step": 196820 }, { "epoch": 56.62543153049482, "grad_norm": 0.711443305015564, "learning_rate": 0.0008674913693901035, "loss": 0.4778, "step": 196830 }, { "epoch": 56.6283084004603, "grad_norm": 1.0340707302093506, "learning_rate": 0.0008674338319907941, "loss": 0.5623, "step": 196840 }, { "epoch": 56.631185270425775, "grad_norm": 1.2553249597549438, "learning_rate": 0.0008673762945914845, "loss": 0.4546, "step": 196850 }, { "epoch": 56.63406214039125, "grad_norm": 0.9572194814682007, "learning_rate": 0.0008673187571921749, "loss": 0.5343, "step": 196860 }, { "epoch": 56.636939010356734, "grad_norm": 1.7667988538742065, "learning_rate": 0.0008672612197928655, "loss": 0.5567, "step": 196870 }, { "epoch": 56.63981588032221, "grad_norm": 1.7182345390319824, "learning_rate": 0.0008672036823935558, "loss": 0.5539, "step": 196880 }, { "epoch": 56.64269275028769, "grad_norm": 1.9293545484542847, "learning_rate": 0.0008671461449942463, "loss": 0.547, "step": 196890 }, { "epoch": 56.64556962025316, "grad_norm": 1.6807093620300293, "learning_rate": 0.0008670886075949367, "loss": 0.4901, "step": 196900 }, { "epoch": 56.64844649021864, "grad_norm": 0.971642792224884, "learning_rate": 0.0008670310701956272, "loss": 0.4385, "step": 196910 }, { "epoch": 56.65132336018412, "grad_norm": 0.9895016551017761, "learning_rate": 0.0008669735327963176, "loss": 0.4338, "step": 196920 }, { "epoch": 56.6542002301496, "grad_norm": 1.0738022327423096, "learning_rate": 0.0008669159953970081, "loss": 0.5408, "step": 196930 }, { "epoch": 56.657077100115075, "grad_norm": 1.438082218170166, "learning_rate": 0.0008668584579976984, "loss": 0.5067, "step": 196940 }, { "epoch": 56.65995397008055, "grad_norm": 1.135360598564148, "learning_rate": 0.000866800920598389, "loss": 0.4803, "step": 196950 }, { "epoch": 56.66283084004603, "grad_norm": 0.8210171461105347, "learning_rate": 0.0008667433831990795, "loss": 0.4871, "step": 196960 }, { "epoch": 56.66570771001151, "grad_norm": 1.0189706087112427, "learning_rate": 0.0008666858457997698, "loss": 0.4947, "step": 196970 }, { "epoch": 56.66858457997699, "grad_norm": 1.2079269886016846, "learning_rate": 0.0008666283084004604, "loss": 0.6075, "step": 196980 }, { "epoch": 56.67146144994246, "grad_norm": 1.763153314590454, "learning_rate": 0.0008665707710011508, "loss": 0.4696, "step": 196990 }, { "epoch": 56.67433831990794, "grad_norm": 1.8655847311019897, "learning_rate": 0.0008665132336018412, "loss": 0.5197, "step": 197000 }, { "epoch": 56.677215189873415, "grad_norm": 1.271630048751831, "learning_rate": 0.0008664556962025316, "loss": 0.5125, "step": 197010 }, { "epoch": 56.6800920598389, "grad_norm": 1.557965874671936, "learning_rate": 0.0008663981588032222, "loss": 0.5082, "step": 197020 }, { "epoch": 56.682968929804375, "grad_norm": 1.0572222471237183, "learning_rate": 0.0008663406214039125, "loss": 0.5382, "step": 197030 }, { "epoch": 56.68584579976985, "grad_norm": 0.7719274759292603, "learning_rate": 0.000866283084004603, "loss": 0.4816, "step": 197040 }, { "epoch": 56.68872266973533, "grad_norm": 1.8449766635894775, "learning_rate": 0.0008662255466052936, "loss": 0.5707, "step": 197050 }, { "epoch": 56.6915995397008, "grad_norm": 1.1530588865280151, "learning_rate": 0.0008661680092059839, "loss": 0.5485, "step": 197060 }, { "epoch": 56.69447640966629, "grad_norm": 2.256164789199829, "learning_rate": 0.0008661104718066744, "loss": 0.5771, "step": 197070 }, { "epoch": 56.69735327963176, "grad_norm": 1.1663223505020142, "learning_rate": 0.0008660529344073648, "loss": 0.5449, "step": 197080 }, { "epoch": 56.70023014959724, "grad_norm": 1.8732905387878418, "learning_rate": 0.0008659953970080553, "loss": 0.4015, "step": 197090 }, { "epoch": 56.703107019562715, "grad_norm": 1.0084906816482544, "learning_rate": 0.0008659378596087457, "loss": 0.6128, "step": 197100 }, { "epoch": 56.70598388952819, "grad_norm": 0.8309433460235596, "learning_rate": 0.0008658803222094362, "loss": 0.6595, "step": 197110 }, { "epoch": 56.70886075949367, "grad_norm": 1.187227487564087, "learning_rate": 0.0008658227848101265, "loss": 0.513, "step": 197120 }, { "epoch": 56.71173762945915, "grad_norm": 1.9833309650421143, "learning_rate": 0.0008657652474108171, "loss": 0.5242, "step": 197130 }, { "epoch": 56.71461449942463, "grad_norm": 0.8838355541229248, "learning_rate": 0.0008657077100115076, "loss": 0.4343, "step": 197140 }, { "epoch": 56.7174913693901, "grad_norm": 1.4530304670333862, "learning_rate": 0.0008656501726121979, "loss": 0.4718, "step": 197150 }, { "epoch": 56.72036823935558, "grad_norm": 1.5229867696762085, "learning_rate": 0.0008655926352128885, "loss": 0.5513, "step": 197160 }, { "epoch": 56.723245109321056, "grad_norm": 1.2935686111450195, "learning_rate": 0.0008655350978135789, "loss": 0.4436, "step": 197170 }, { "epoch": 56.72612197928654, "grad_norm": 1.0436310768127441, "learning_rate": 0.0008654775604142693, "loss": 0.5599, "step": 197180 }, { "epoch": 56.728998849252015, "grad_norm": 1.1144753694534302, "learning_rate": 0.0008654200230149597, "loss": 0.4783, "step": 197190 }, { "epoch": 56.73187571921749, "grad_norm": 1.3240137100219727, "learning_rate": 0.0008653624856156502, "loss": 0.4512, "step": 197200 }, { "epoch": 56.73475258918297, "grad_norm": 1.386525273323059, "learning_rate": 0.0008653049482163406, "loss": 0.6507, "step": 197210 }, { "epoch": 56.737629459148444, "grad_norm": 1.191810965538025, "learning_rate": 0.0008652474108170311, "loss": 0.4853, "step": 197220 }, { "epoch": 56.74050632911393, "grad_norm": 1.264163851737976, "learning_rate": 0.0008651898734177214, "loss": 0.4847, "step": 197230 }, { "epoch": 56.7433831990794, "grad_norm": 2.188218832015991, "learning_rate": 0.000865132336018412, "loss": 0.5398, "step": 197240 }, { "epoch": 56.74626006904488, "grad_norm": 1.4848922491073608, "learning_rate": 0.0008650747986191025, "loss": 0.4726, "step": 197250 }, { "epoch": 56.749136939010356, "grad_norm": 2.953284740447998, "learning_rate": 0.0008650172612197928, "loss": 0.5986, "step": 197260 }, { "epoch": 56.75201380897583, "grad_norm": 1.319225549697876, "learning_rate": 0.0008649597238204834, "loss": 0.5442, "step": 197270 }, { "epoch": 56.754890678941315, "grad_norm": 1.6488251686096191, "learning_rate": 0.0008649021864211738, "loss": 0.4455, "step": 197280 }, { "epoch": 56.75776754890679, "grad_norm": 1.5902503728866577, "learning_rate": 0.0008648446490218642, "loss": 0.5655, "step": 197290 }, { "epoch": 56.76064441887227, "grad_norm": 1.1883453130722046, "learning_rate": 0.0008647871116225546, "loss": 0.5917, "step": 197300 }, { "epoch": 56.763521288837744, "grad_norm": 1.2107843160629272, "learning_rate": 0.0008647295742232452, "loss": 0.4951, "step": 197310 }, { "epoch": 56.76639815880322, "grad_norm": 1.829988956451416, "learning_rate": 0.0008646720368239355, "loss": 0.5755, "step": 197320 }, { "epoch": 56.769275028768696, "grad_norm": 2.8235373497009277, "learning_rate": 0.000864614499424626, "loss": 0.4554, "step": 197330 }, { "epoch": 56.77215189873418, "grad_norm": 0.6730037927627563, "learning_rate": 0.0008645569620253166, "loss": 0.4807, "step": 197340 }, { "epoch": 56.775028768699656, "grad_norm": 1.782801866531372, "learning_rate": 0.0008644994246260069, "loss": 0.6133, "step": 197350 }, { "epoch": 56.77790563866513, "grad_norm": 1.1770673990249634, "learning_rate": 0.0008644418872266974, "loss": 0.6215, "step": 197360 }, { "epoch": 56.78078250863061, "grad_norm": 1.460031270980835, "learning_rate": 0.0008643843498273878, "loss": 0.4912, "step": 197370 }, { "epoch": 56.783659378596084, "grad_norm": 0.9310067296028137, "learning_rate": 0.0008643268124280783, "loss": 0.4114, "step": 197380 }, { "epoch": 56.78653624856157, "grad_norm": 1.7986055612564087, "learning_rate": 0.0008642692750287687, "loss": 0.6114, "step": 197390 }, { "epoch": 56.789413118527044, "grad_norm": 1.6189250946044922, "learning_rate": 0.0008642117376294592, "loss": 0.4802, "step": 197400 }, { "epoch": 56.79228998849252, "grad_norm": 1.2339552640914917, "learning_rate": 0.0008641542002301495, "loss": 0.5457, "step": 197410 }, { "epoch": 56.795166858457996, "grad_norm": 1.7427104711532593, "learning_rate": 0.0008640966628308401, "loss": 0.7201, "step": 197420 }, { "epoch": 56.79804372842347, "grad_norm": 1.733625054359436, "learning_rate": 0.0008640391254315305, "loss": 0.5732, "step": 197430 }, { "epoch": 56.800920598388956, "grad_norm": 1.4265973567962646, "learning_rate": 0.0008639815880322209, "loss": 0.4753, "step": 197440 }, { "epoch": 56.80379746835443, "grad_norm": 1.2821296453475952, "learning_rate": 0.0008639240506329115, "loss": 0.4977, "step": 197450 }, { "epoch": 56.80667433831991, "grad_norm": 0.8766689896583557, "learning_rate": 0.0008638665132336019, "loss": 0.5801, "step": 197460 }, { "epoch": 56.809551208285384, "grad_norm": 1.071785569190979, "learning_rate": 0.0008638089758342923, "loss": 0.4441, "step": 197470 }, { "epoch": 56.81242807825086, "grad_norm": 1.3709290027618408, "learning_rate": 0.0008637514384349827, "loss": 0.5006, "step": 197480 }, { "epoch": 56.815304948216344, "grad_norm": 1.1162290573120117, "learning_rate": 0.0008636939010356733, "loss": 0.5411, "step": 197490 }, { "epoch": 56.81818181818182, "grad_norm": 0.9707418084144592, "learning_rate": 0.0008636363636363636, "loss": 0.5007, "step": 197500 }, { "epoch": 56.821058688147296, "grad_norm": 1.4832719564437866, "learning_rate": 0.0008635788262370541, "loss": 0.6838, "step": 197510 }, { "epoch": 56.82393555811277, "grad_norm": 1.3147454261779785, "learning_rate": 0.0008635212888377445, "loss": 0.4839, "step": 197520 }, { "epoch": 56.82681242807825, "grad_norm": 1.995896339416504, "learning_rate": 0.000863463751438435, "loss": 0.5879, "step": 197530 }, { "epoch": 56.829689298043725, "grad_norm": 1.2788386344909668, "learning_rate": 0.0008634062140391254, "loss": 0.5788, "step": 197540 }, { "epoch": 56.83256616800921, "grad_norm": 1.5500844717025757, "learning_rate": 0.0008633486766398159, "loss": 0.5361, "step": 197550 }, { "epoch": 56.835443037974684, "grad_norm": 1.5833241939544678, "learning_rate": 0.0008632911392405064, "loss": 0.4269, "step": 197560 }, { "epoch": 56.83831990794016, "grad_norm": 1.2500383853912354, "learning_rate": 0.0008632336018411968, "loss": 0.6365, "step": 197570 }, { "epoch": 56.84119677790564, "grad_norm": 0.916080117225647, "learning_rate": 0.0008631760644418873, "loss": 0.5172, "step": 197580 }, { "epoch": 56.84407364787111, "grad_norm": 1.1474063396453857, "learning_rate": 0.0008631185270425776, "loss": 0.3967, "step": 197590 }, { "epoch": 56.846950517836596, "grad_norm": 1.2998950481414795, "learning_rate": 0.0008630609896432682, "loss": 0.4492, "step": 197600 }, { "epoch": 56.84982738780207, "grad_norm": 0.8587114810943604, "learning_rate": 0.0008630034522439586, "loss": 0.4253, "step": 197610 }, { "epoch": 56.85270425776755, "grad_norm": 1.4073227643966675, "learning_rate": 0.000862945914844649, "loss": 0.5084, "step": 197620 }, { "epoch": 56.855581127733025, "grad_norm": 1.0040087699890137, "learning_rate": 0.0008628883774453394, "loss": 0.4854, "step": 197630 }, { "epoch": 56.8584579976985, "grad_norm": 2.459669828414917, "learning_rate": 0.00086283084004603, "loss": 0.607, "step": 197640 }, { "epoch": 56.861334867663984, "grad_norm": 1.0501108169555664, "learning_rate": 0.0008627733026467203, "loss": 0.4085, "step": 197650 }, { "epoch": 56.86421173762946, "grad_norm": 1.260374903678894, "learning_rate": 0.0008627157652474108, "loss": 0.4868, "step": 197660 }, { "epoch": 56.86708860759494, "grad_norm": 1.3359700441360474, "learning_rate": 0.0008626582278481014, "loss": 0.6622, "step": 197670 }, { "epoch": 56.86996547756041, "grad_norm": 0.9925462603569031, "learning_rate": 0.0008626006904487917, "loss": 0.5755, "step": 197680 }, { "epoch": 56.87284234752589, "grad_norm": 1.5793858766555786, "learning_rate": 0.0008625431530494822, "loss": 0.4647, "step": 197690 }, { "epoch": 56.87571921749137, "grad_norm": 1.8064697980880737, "learning_rate": 0.0008624856156501726, "loss": 0.5954, "step": 197700 }, { "epoch": 56.87859608745685, "grad_norm": 1.3228557109832764, "learning_rate": 0.0008624280782508631, "loss": 0.5587, "step": 197710 }, { "epoch": 56.881472957422325, "grad_norm": 0.8756299614906311, "learning_rate": 0.0008623705408515535, "loss": 0.499, "step": 197720 }, { "epoch": 56.8843498273878, "grad_norm": 1.2921236753463745, "learning_rate": 0.000862313003452244, "loss": 0.4777, "step": 197730 }, { "epoch": 56.88722669735328, "grad_norm": 1.7261712551116943, "learning_rate": 0.0008622554660529344, "loss": 0.5996, "step": 197740 }, { "epoch": 56.89010356731876, "grad_norm": 0.9279820919036865, "learning_rate": 0.0008621979286536249, "loss": 0.4635, "step": 197750 }, { "epoch": 56.89298043728424, "grad_norm": 1.374947428703308, "learning_rate": 0.0008621403912543154, "loss": 0.5176, "step": 197760 }, { "epoch": 56.89585730724971, "grad_norm": 2.0673251152038574, "learning_rate": 0.0008620828538550057, "loss": 0.5898, "step": 197770 }, { "epoch": 56.89873417721519, "grad_norm": 1.609744906425476, "learning_rate": 0.0008620253164556963, "loss": 0.5018, "step": 197780 }, { "epoch": 56.901611047180666, "grad_norm": 0.7229542136192322, "learning_rate": 0.0008619677790563867, "loss": 0.4479, "step": 197790 }, { "epoch": 56.90448791714614, "grad_norm": 0.8696655035018921, "learning_rate": 0.0008619102416570771, "loss": 0.5257, "step": 197800 }, { "epoch": 56.907364787111625, "grad_norm": 1.4902162551879883, "learning_rate": 0.0008618527042577675, "loss": 0.6062, "step": 197810 }, { "epoch": 56.9102416570771, "grad_norm": 1.0375772714614868, "learning_rate": 0.0008617951668584581, "loss": 0.4755, "step": 197820 }, { "epoch": 56.91311852704258, "grad_norm": 0.8094565272331238, "learning_rate": 0.0008617376294591484, "loss": 0.4806, "step": 197830 }, { "epoch": 56.915995397008054, "grad_norm": 1.5614153146743774, "learning_rate": 0.0008616800920598389, "loss": 0.5256, "step": 197840 }, { "epoch": 56.91887226697353, "grad_norm": 1.3382381200790405, "learning_rate": 0.0008616225546605295, "loss": 0.579, "step": 197850 }, { "epoch": 56.92174913693901, "grad_norm": 1.1960358619689941, "learning_rate": 0.0008615650172612198, "loss": 0.5155, "step": 197860 }, { "epoch": 56.92462600690449, "grad_norm": 1.1794795989990234, "learning_rate": 0.0008615074798619103, "loss": 0.4632, "step": 197870 }, { "epoch": 56.927502876869966, "grad_norm": 1.6133735179901123, "learning_rate": 0.0008614499424626007, "loss": 0.4632, "step": 197880 }, { "epoch": 56.93037974683544, "grad_norm": 1.1683603525161743, "learning_rate": 0.0008613924050632912, "loss": 0.5566, "step": 197890 }, { "epoch": 56.93325661680092, "grad_norm": 1.0726350545883179, "learning_rate": 0.0008613348676639816, "loss": 0.4552, "step": 197900 }, { "epoch": 56.9361334867664, "grad_norm": 0.9922034740447998, "learning_rate": 0.0008612773302646721, "loss": 0.4453, "step": 197910 }, { "epoch": 56.93901035673188, "grad_norm": 1.1933966875076294, "learning_rate": 0.0008612197928653624, "loss": 0.4682, "step": 197920 }, { "epoch": 56.941887226697354, "grad_norm": 1.4576891660690308, "learning_rate": 0.000861162255466053, "loss": 0.5546, "step": 197930 }, { "epoch": 56.94476409666283, "grad_norm": 1.0191348791122437, "learning_rate": 0.0008611047180667435, "loss": 0.5409, "step": 197940 }, { "epoch": 56.947640966628306, "grad_norm": 0.9460402727127075, "learning_rate": 0.0008610471806674338, "loss": 0.4583, "step": 197950 }, { "epoch": 56.95051783659379, "grad_norm": 1.3638916015625, "learning_rate": 0.0008609896432681244, "loss": 0.4903, "step": 197960 }, { "epoch": 56.953394706559266, "grad_norm": 2.090921640396118, "learning_rate": 0.0008609321058688148, "loss": 0.4674, "step": 197970 }, { "epoch": 56.95627157652474, "grad_norm": 1.7551430463790894, "learning_rate": 0.0008608745684695052, "loss": 0.5816, "step": 197980 }, { "epoch": 56.95914844649022, "grad_norm": 2.090312957763672, "learning_rate": 0.0008608170310701956, "loss": 0.5752, "step": 197990 }, { "epoch": 56.962025316455694, "grad_norm": 1.2568724155426025, "learning_rate": 0.0008607594936708862, "loss": 0.5213, "step": 198000 }, { "epoch": 56.96490218642117, "grad_norm": 0.9789957404136658, "learning_rate": 0.0008607019562715765, "loss": 0.4966, "step": 198010 }, { "epoch": 56.967779056386654, "grad_norm": 0.7068460583686829, "learning_rate": 0.000860644418872267, "loss": 0.4631, "step": 198020 }, { "epoch": 56.97065592635213, "grad_norm": 2.1055681705474854, "learning_rate": 0.0008605868814729574, "loss": 0.57, "step": 198030 }, { "epoch": 56.973532796317606, "grad_norm": 0.7899104356765747, "learning_rate": 0.0008605293440736479, "loss": 0.5227, "step": 198040 }, { "epoch": 56.97640966628308, "grad_norm": 1.1195926666259766, "learning_rate": 0.0008604718066743384, "loss": 0.4558, "step": 198050 }, { "epoch": 56.97928653624856, "grad_norm": 1.1743885278701782, "learning_rate": 0.0008604142692750287, "loss": 0.5646, "step": 198060 }, { "epoch": 56.98216340621404, "grad_norm": 1.1664726734161377, "learning_rate": 0.0008603567318757193, "loss": 0.5129, "step": 198070 }, { "epoch": 56.98504027617952, "grad_norm": 1.4401482343673706, "learning_rate": 0.0008602991944764097, "loss": 0.5385, "step": 198080 }, { "epoch": 56.987917146144994, "grad_norm": 2.341047763824463, "learning_rate": 0.0008602416570771001, "loss": 0.4817, "step": 198090 }, { "epoch": 56.99079401611047, "grad_norm": 1.0651841163635254, "learning_rate": 0.0008601841196777905, "loss": 0.3682, "step": 198100 }, { "epoch": 56.99367088607595, "grad_norm": 1.406423568725586, "learning_rate": 0.0008601265822784811, "loss": 0.5847, "step": 198110 }, { "epoch": 56.99654775604143, "grad_norm": 1.5451020002365112, "learning_rate": 0.0008600690448791714, "loss": 0.5319, "step": 198120 }, { "epoch": 56.999424626006906, "grad_norm": 3.320929765701294, "learning_rate": 0.0008600115074798619, "loss": 0.5648, "step": 198130 }, { "epoch": 57.00230149597238, "grad_norm": 0.7104335427284241, "learning_rate": 0.0008599539700805525, "loss": 0.4964, "step": 198140 }, { "epoch": 57.00517836593786, "grad_norm": 1.0955523252487183, "learning_rate": 0.0008598964326812428, "loss": 0.4009, "step": 198150 }, { "epoch": 57.008055235903335, "grad_norm": 0.986304759979248, "learning_rate": 0.0008598388952819333, "loss": 0.3644, "step": 198160 }, { "epoch": 57.01093210586882, "grad_norm": 0.840960681438446, "learning_rate": 0.0008597813578826237, "loss": 0.4437, "step": 198170 }, { "epoch": 57.013808975834294, "grad_norm": 1.2503925561904907, "learning_rate": 0.0008597238204833142, "loss": 0.5747, "step": 198180 }, { "epoch": 57.01668584579977, "grad_norm": 1.0855947732925415, "learning_rate": 0.0008596662830840046, "loss": 0.4619, "step": 198190 }, { "epoch": 57.01956271576525, "grad_norm": 1.622070074081421, "learning_rate": 0.0008596087456846951, "loss": 0.4806, "step": 198200 }, { "epoch": 57.02243958573072, "grad_norm": 0.8504353761672974, "learning_rate": 0.0008595512082853854, "loss": 0.4884, "step": 198210 }, { "epoch": 57.0253164556962, "grad_norm": 1.2442840337753296, "learning_rate": 0.000859493670886076, "loss": 0.4259, "step": 198220 }, { "epoch": 57.02819332566168, "grad_norm": 1.1913460493087769, "learning_rate": 0.0008594361334867664, "loss": 0.5494, "step": 198230 }, { "epoch": 57.03107019562716, "grad_norm": 2.199472188949585, "learning_rate": 0.0008593785960874568, "loss": 0.4674, "step": 198240 }, { "epoch": 57.033947065592635, "grad_norm": 1.6085454225540161, "learning_rate": 0.0008593210586881474, "loss": 0.4244, "step": 198250 }, { "epoch": 57.03682393555811, "grad_norm": 1.255942940711975, "learning_rate": 0.0008592635212888378, "loss": 0.4561, "step": 198260 }, { "epoch": 57.03970080552359, "grad_norm": 0.6142603158950806, "learning_rate": 0.0008592059838895282, "loss": 0.424, "step": 198270 }, { "epoch": 57.04257767548907, "grad_norm": 1.2731976509094238, "learning_rate": 0.0008591484464902186, "loss": 0.5244, "step": 198280 }, { "epoch": 57.04545454545455, "grad_norm": 1.0141557455062866, "learning_rate": 0.0008590909090909092, "loss": 0.4108, "step": 198290 }, { "epoch": 57.04833141542002, "grad_norm": 0.9580450654029846, "learning_rate": 0.0008590333716915995, "loss": 0.4696, "step": 198300 }, { "epoch": 57.0512082853855, "grad_norm": 0.9739032983779907, "learning_rate": 0.00085897583429229, "loss": 0.4317, "step": 198310 }, { "epoch": 57.054085155350975, "grad_norm": 0.8006138205528259, "learning_rate": 0.0008589182968929804, "loss": 0.5244, "step": 198320 }, { "epoch": 57.05696202531646, "grad_norm": 1.3438104391098022, "learning_rate": 0.0008588607594936709, "loss": 0.4097, "step": 198330 }, { "epoch": 57.059838895281935, "grad_norm": 1.0483280420303345, "learning_rate": 0.0008588032220943613, "loss": 0.4517, "step": 198340 }, { "epoch": 57.06271576524741, "grad_norm": 2.0746805667877197, "learning_rate": 0.0008587456846950518, "loss": 0.449, "step": 198350 }, { "epoch": 57.06559263521289, "grad_norm": 1.8714783191680908, "learning_rate": 0.0008586881472957423, "loss": 0.4158, "step": 198360 }, { "epoch": 57.06846950517836, "grad_norm": 0.8500292301177979, "learning_rate": 0.0008586306098964327, "loss": 0.5902, "step": 198370 }, { "epoch": 57.07134637514385, "grad_norm": 1.1923201084136963, "learning_rate": 0.0008585730724971232, "loss": 0.5848, "step": 198380 }, { "epoch": 57.07422324510932, "grad_norm": 1.4954971075057983, "learning_rate": 0.0008585155350978135, "loss": 0.4219, "step": 198390 }, { "epoch": 57.0771001150748, "grad_norm": 0.852711021900177, "learning_rate": 0.0008584579976985041, "loss": 0.4668, "step": 198400 }, { "epoch": 57.079976985040275, "grad_norm": 1.7228684425354004, "learning_rate": 0.0008584004602991945, "loss": 0.4341, "step": 198410 }, { "epoch": 57.08285385500575, "grad_norm": 0.8540493845939636, "learning_rate": 0.0008583429228998849, "loss": 0.5731, "step": 198420 }, { "epoch": 57.08573072497123, "grad_norm": 1.5297214984893799, "learning_rate": 0.0008582853855005754, "loss": 0.4439, "step": 198430 }, { "epoch": 57.08860759493671, "grad_norm": 0.735416829586029, "learning_rate": 0.0008582278481012659, "loss": 0.4273, "step": 198440 }, { "epoch": 57.09148446490219, "grad_norm": 1.3235100507736206, "learning_rate": 0.0008581703107019562, "loss": 0.3426, "step": 198450 }, { "epoch": 57.09436133486766, "grad_norm": 1.1014366149902344, "learning_rate": 0.0008581127733026467, "loss": 0.5298, "step": 198460 }, { "epoch": 57.09723820483314, "grad_norm": 1.4904768466949463, "learning_rate": 0.0008580552359033373, "loss": 0.6032, "step": 198470 }, { "epoch": 57.100115074798616, "grad_norm": 1.0578557252883911, "learning_rate": 0.0008579976985040276, "loss": 0.4656, "step": 198480 }, { "epoch": 57.1029919447641, "grad_norm": 1.1550748348236084, "learning_rate": 0.0008579401611047181, "loss": 0.472, "step": 198490 }, { "epoch": 57.105868814729575, "grad_norm": 1.549860954284668, "learning_rate": 0.0008578826237054085, "loss": 0.5025, "step": 198500 }, { "epoch": 57.10874568469505, "grad_norm": 1.9231370687484741, "learning_rate": 0.000857825086306099, "loss": 0.4688, "step": 198510 }, { "epoch": 57.11162255466053, "grad_norm": 1.1936205625534058, "learning_rate": 0.0008577675489067894, "loss": 0.4642, "step": 198520 }, { "epoch": 57.114499424626004, "grad_norm": 1.6488639116287231, "learning_rate": 0.0008577100115074799, "loss": 0.5264, "step": 198530 }, { "epoch": 57.11737629459149, "grad_norm": 1.4893461465835571, "learning_rate": 0.0008576524741081703, "loss": 0.4643, "step": 198540 }, { "epoch": 57.120253164556964, "grad_norm": 1.1282899379730225, "learning_rate": 0.0008575949367088608, "loss": 0.4074, "step": 198550 }, { "epoch": 57.12313003452244, "grad_norm": 2.1918303966522217, "learning_rate": 0.0008575373993095513, "loss": 0.4392, "step": 198560 }, { "epoch": 57.126006904487916, "grad_norm": 1.1923960447311401, "learning_rate": 0.0008574798619102416, "loss": 0.5048, "step": 198570 }, { "epoch": 57.12888377445339, "grad_norm": 0.9357778429985046, "learning_rate": 0.0008574223245109322, "loss": 0.4107, "step": 198580 }, { "epoch": 57.131760644418875, "grad_norm": 0.8711089491844177, "learning_rate": 0.0008573647871116226, "loss": 0.471, "step": 198590 }, { "epoch": 57.13463751438435, "grad_norm": 1.2156542539596558, "learning_rate": 0.000857307249712313, "loss": 0.5194, "step": 198600 }, { "epoch": 57.13751438434983, "grad_norm": 1.1371091604232788, "learning_rate": 0.0008572497123130034, "loss": 0.5705, "step": 198610 }, { "epoch": 57.140391254315304, "grad_norm": 1.22404146194458, "learning_rate": 0.000857192174913694, "loss": 0.4377, "step": 198620 }, { "epoch": 57.14326812428078, "grad_norm": 1.3433784246444702, "learning_rate": 0.0008571346375143843, "loss": 0.4524, "step": 198630 }, { "epoch": 57.146144994246264, "grad_norm": 1.1175230741500854, "learning_rate": 0.0008570771001150748, "loss": 0.5129, "step": 198640 }, { "epoch": 57.14902186421174, "grad_norm": 1.138487458229065, "learning_rate": 0.0008570195627157654, "loss": 0.3959, "step": 198650 }, { "epoch": 57.151898734177216, "grad_norm": 0.9933739900588989, "learning_rate": 0.0008569620253164557, "loss": 0.5614, "step": 198660 }, { "epoch": 57.15477560414269, "grad_norm": 0.9918845295906067, "learning_rate": 0.0008569044879171462, "loss": 0.5532, "step": 198670 }, { "epoch": 57.15765247410817, "grad_norm": 0.8997785449028015, "learning_rate": 0.0008568469505178366, "loss": 0.4798, "step": 198680 }, { "epoch": 57.160529344073645, "grad_norm": 1.2365167140960693, "learning_rate": 0.0008567894131185271, "loss": 0.5875, "step": 198690 }, { "epoch": 57.16340621403913, "grad_norm": 1.8136540651321411, "learning_rate": 0.0008567318757192175, "loss": 0.4953, "step": 198700 }, { "epoch": 57.166283084004604, "grad_norm": 1.3533161878585815, "learning_rate": 0.000856674338319908, "loss": 0.4517, "step": 198710 }, { "epoch": 57.16915995397008, "grad_norm": 1.3883540630340576, "learning_rate": 0.0008566168009205984, "loss": 0.4768, "step": 198720 }, { "epoch": 57.17203682393556, "grad_norm": 1.7738369703292847, "learning_rate": 0.0008565592635212889, "loss": 0.4137, "step": 198730 }, { "epoch": 57.17491369390103, "grad_norm": 0.932299017906189, "learning_rate": 0.0008565017261219793, "loss": 0.5834, "step": 198740 }, { "epoch": 57.177790563866516, "grad_norm": 1.5295652151107788, "learning_rate": 0.0008564441887226697, "loss": 0.5826, "step": 198750 }, { "epoch": 57.18066743383199, "grad_norm": 1.9098790884017944, "learning_rate": 0.0008563866513233603, "loss": 0.5777, "step": 198760 }, { "epoch": 57.18354430379747, "grad_norm": 1.3903650045394897, "learning_rate": 0.0008563291139240507, "loss": 0.4846, "step": 198770 }, { "epoch": 57.186421173762945, "grad_norm": 1.3493282794952393, "learning_rate": 0.0008562715765247411, "loss": 0.4599, "step": 198780 }, { "epoch": 57.18929804372842, "grad_norm": 1.1852519512176514, "learning_rate": 0.0008562140391254315, "loss": 0.4492, "step": 198790 }, { "epoch": 57.192174913693904, "grad_norm": 0.8878856897354126, "learning_rate": 0.0008561565017261221, "loss": 0.5938, "step": 198800 }, { "epoch": 57.19505178365938, "grad_norm": 1.06819748878479, "learning_rate": 0.0008560989643268124, "loss": 0.5404, "step": 198810 }, { "epoch": 57.19792865362486, "grad_norm": 1.0078140497207642, "learning_rate": 0.0008560414269275029, "loss": 0.5255, "step": 198820 }, { "epoch": 57.20080552359033, "grad_norm": 1.2809410095214844, "learning_rate": 0.0008559838895281934, "loss": 0.4459, "step": 198830 }, { "epoch": 57.20368239355581, "grad_norm": 5.805602073669434, "learning_rate": 0.0008559263521288838, "loss": 0.4729, "step": 198840 }, { "epoch": 57.20655926352129, "grad_norm": 0.6941699981689453, "learning_rate": 0.0008558688147295743, "loss": 0.4846, "step": 198850 }, { "epoch": 57.20943613348677, "grad_norm": 0.6634433269500732, "learning_rate": 0.0008558112773302646, "loss": 0.484, "step": 198860 }, { "epoch": 57.212313003452245, "grad_norm": 1.733344554901123, "learning_rate": 0.0008557537399309552, "loss": 0.4377, "step": 198870 }, { "epoch": 57.21518987341772, "grad_norm": 1.287474513053894, "learning_rate": 0.0008556962025316456, "loss": 0.4122, "step": 198880 }, { "epoch": 57.2180667433832, "grad_norm": 2.262181520462036, "learning_rate": 0.000855638665132336, "loss": 0.5207, "step": 198890 }, { "epoch": 57.22094361334867, "grad_norm": 1.6597881317138672, "learning_rate": 0.0008555811277330264, "loss": 0.5042, "step": 198900 }, { "epoch": 57.22382048331416, "grad_norm": 2.008680582046509, "learning_rate": 0.000855523590333717, "loss": 0.5016, "step": 198910 }, { "epoch": 57.22669735327963, "grad_norm": 1.8341684341430664, "learning_rate": 0.0008554660529344073, "loss": 0.6337, "step": 198920 }, { "epoch": 57.22957422324511, "grad_norm": 2.2966012954711914, "learning_rate": 0.0008554085155350978, "loss": 0.5071, "step": 198930 }, { "epoch": 57.232451093210585, "grad_norm": 0.9727095365524292, "learning_rate": 0.0008553509781357883, "loss": 0.5081, "step": 198940 }, { "epoch": 57.23532796317606, "grad_norm": 1.461871862411499, "learning_rate": 0.0008552934407364787, "loss": 0.5171, "step": 198950 }, { "epoch": 57.238204833141545, "grad_norm": 1.2029225826263428, "learning_rate": 0.0008552359033371692, "loss": 0.4458, "step": 198960 }, { "epoch": 57.24108170310702, "grad_norm": 1.3882910013198853, "learning_rate": 0.0008551783659378596, "loss": 0.4748, "step": 198970 }, { "epoch": 57.2439585730725, "grad_norm": 2.4582102298736572, "learning_rate": 0.0008551208285385501, "loss": 0.4672, "step": 198980 }, { "epoch": 57.24683544303797, "grad_norm": 0.9859287738800049, "learning_rate": 0.0008550632911392405, "loss": 0.4585, "step": 198990 }, { "epoch": 57.24971231300345, "grad_norm": 1.280116081237793, "learning_rate": 0.000855005753739931, "loss": 0.5098, "step": 199000 }, { "epoch": 57.25258918296893, "grad_norm": 0.896796464920044, "learning_rate": 0.0008549482163406214, "loss": 0.5229, "step": 199010 }, { "epoch": 57.25546605293441, "grad_norm": 1.0930393934249878, "learning_rate": 0.0008548906789413119, "loss": 0.414, "step": 199020 }, { "epoch": 57.258342922899885, "grad_norm": 0.7952499985694885, "learning_rate": 0.0008548331415420023, "loss": 0.4499, "step": 199030 }, { "epoch": 57.26121979286536, "grad_norm": 1.3318016529083252, "learning_rate": 0.0008547756041426927, "loss": 0.5063, "step": 199040 }, { "epoch": 57.26409666283084, "grad_norm": 2.489041328430176, "learning_rate": 0.0008547180667433833, "loss": 0.6101, "step": 199050 }, { "epoch": 57.26697353279632, "grad_norm": 0.9781965613365173, "learning_rate": 0.0008546605293440737, "loss": 0.5034, "step": 199060 }, { "epoch": 57.2698504027618, "grad_norm": 1.1115567684173584, "learning_rate": 0.0008546029919447641, "loss": 0.5164, "step": 199070 }, { "epoch": 57.27272727272727, "grad_norm": 1.162920355796814, "learning_rate": 0.0008545454545454545, "loss": 0.503, "step": 199080 }, { "epoch": 57.27560414269275, "grad_norm": 0.6082600951194763, "learning_rate": 0.0008544879171461451, "loss": 0.4673, "step": 199090 }, { "epoch": 57.278481012658226, "grad_norm": 1.2146466970443726, "learning_rate": 0.0008544303797468354, "loss": 0.5854, "step": 199100 }, { "epoch": 57.2813578826237, "grad_norm": 1.1575995683670044, "learning_rate": 0.0008543728423475259, "loss": 0.4589, "step": 199110 }, { "epoch": 57.284234752589185, "grad_norm": 1.6206430196762085, "learning_rate": 0.0008543153049482164, "loss": 0.5316, "step": 199120 }, { "epoch": 57.28711162255466, "grad_norm": 0.8624745607376099, "learning_rate": 0.0008542577675489068, "loss": 0.5155, "step": 199130 }, { "epoch": 57.28998849252014, "grad_norm": 1.1395198106765747, "learning_rate": 0.0008542002301495972, "loss": 0.558, "step": 199140 }, { "epoch": 57.292865362485614, "grad_norm": 0.5720397233963013, "learning_rate": 0.0008541426927502877, "loss": 0.5139, "step": 199150 }, { "epoch": 57.29574223245109, "grad_norm": 1.4281980991363525, "learning_rate": 0.0008540851553509782, "loss": 0.4957, "step": 199160 }, { "epoch": 57.29861910241657, "grad_norm": 0.8904478549957275, "learning_rate": 0.0008540276179516686, "loss": 0.4804, "step": 199170 }, { "epoch": 57.30149597238205, "grad_norm": 1.8783459663391113, "learning_rate": 0.0008539700805523591, "loss": 0.5135, "step": 199180 }, { "epoch": 57.304372842347526, "grad_norm": 1.3115060329437256, "learning_rate": 0.0008539125431530494, "loss": 0.5208, "step": 199190 }, { "epoch": 57.307249712313, "grad_norm": 1.6100062131881714, "learning_rate": 0.00085385500575374, "loss": 0.5275, "step": 199200 }, { "epoch": 57.31012658227848, "grad_norm": 2.4763593673706055, "learning_rate": 0.0008537974683544304, "loss": 0.501, "step": 199210 }, { "epoch": 57.31300345224396, "grad_norm": 1.4066165685653687, "learning_rate": 0.0008537399309551208, "loss": 0.58, "step": 199220 }, { "epoch": 57.31588032220944, "grad_norm": 1.322435975074768, "learning_rate": 0.0008536823935558113, "loss": 0.4868, "step": 199230 }, { "epoch": 57.318757192174914, "grad_norm": 1.3337830305099487, "learning_rate": 0.0008536248561565018, "loss": 0.4131, "step": 199240 }, { "epoch": 57.32163406214039, "grad_norm": 1.4923619031906128, "learning_rate": 0.0008535673187571921, "loss": 0.4004, "step": 199250 }, { "epoch": 57.324510932105866, "grad_norm": 1.3194024562835693, "learning_rate": 0.0008535097813578826, "loss": 0.4508, "step": 199260 }, { "epoch": 57.32738780207135, "grad_norm": 0.911718487739563, "learning_rate": 0.0008534522439585732, "loss": 0.4087, "step": 199270 }, { "epoch": 57.330264672036826, "grad_norm": 1.3853189945220947, "learning_rate": 0.0008533947065592635, "loss": 0.4722, "step": 199280 }, { "epoch": 57.3331415420023, "grad_norm": 1.0082513093948364, "learning_rate": 0.000853337169159954, "loss": 0.4418, "step": 199290 }, { "epoch": 57.33601841196778, "grad_norm": 1.1148016452789307, "learning_rate": 0.0008532796317606444, "loss": 0.4803, "step": 199300 }, { "epoch": 57.338895281933254, "grad_norm": 1.4497253894805908, "learning_rate": 0.0008532220943613349, "loss": 0.5449, "step": 199310 }, { "epoch": 57.34177215189873, "grad_norm": 1.5711902379989624, "learning_rate": 0.0008531645569620253, "loss": 0.5375, "step": 199320 }, { "epoch": 57.344649021864214, "grad_norm": 0.84516441822052, "learning_rate": 0.0008531070195627158, "loss": 0.6287, "step": 199330 }, { "epoch": 57.34752589182969, "grad_norm": 1.0825783014297485, "learning_rate": 0.0008530494821634062, "loss": 0.5249, "step": 199340 }, { "epoch": 57.350402761795166, "grad_norm": 0.9464071393013, "learning_rate": 0.0008529919447640967, "loss": 0.4615, "step": 199350 }, { "epoch": 57.35327963176064, "grad_norm": 1.523382544517517, "learning_rate": 0.0008529344073647872, "loss": 0.5781, "step": 199360 }, { "epoch": 57.35615650172612, "grad_norm": 1.50845468044281, "learning_rate": 0.0008528768699654775, "loss": 0.5753, "step": 199370 }, { "epoch": 57.3590333716916, "grad_norm": 2.703922986984253, "learning_rate": 0.0008528193325661681, "loss": 0.517, "step": 199380 }, { "epoch": 57.36191024165708, "grad_norm": 1.0894688367843628, "learning_rate": 0.0008527617951668585, "loss": 0.4025, "step": 199390 }, { "epoch": 57.364787111622555, "grad_norm": 0.7825767993927002, "learning_rate": 0.0008527042577675489, "loss": 0.4788, "step": 199400 }, { "epoch": 57.36766398158803, "grad_norm": 1.098533272743225, "learning_rate": 0.0008526467203682394, "loss": 0.4861, "step": 199410 }, { "epoch": 57.37054085155351, "grad_norm": 0.733278214931488, "learning_rate": 0.0008525891829689299, "loss": 0.4309, "step": 199420 }, { "epoch": 57.37341772151899, "grad_norm": 1.075953722000122, "learning_rate": 0.0008525316455696202, "loss": 0.4068, "step": 199430 }, { "epoch": 57.376294591484466, "grad_norm": 0.8069642782211304, "learning_rate": 0.0008524741081703107, "loss": 0.3932, "step": 199440 }, { "epoch": 57.37917146144994, "grad_norm": 0.8875704407691956, "learning_rate": 0.0008524165707710013, "loss": 0.4457, "step": 199450 }, { "epoch": 57.38204833141542, "grad_norm": 1.0482962131500244, "learning_rate": 0.0008523590333716916, "loss": 0.5862, "step": 199460 }, { "epoch": 57.384925201380895, "grad_norm": 0.9894561767578125, "learning_rate": 0.0008523014959723821, "loss": 0.476, "step": 199470 }, { "epoch": 57.38780207134638, "grad_norm": 0.9398358464241028, "learning_rate": 0.0008522439585730725, "loss": 0.4578, "step": 199480 }, { "epoch": 57.390678941311855, "grad_norm": 1.5952959060668945, "learning_rate": 0.000852186421173763, "loss": 0.5295, "step": 199490 }, { "epoch": 57.39355581127733, "grad_norm": 1.0654394626617432, "learning_rate": 0.0008521288837744534, "loss": 0.4644, "step": 199500 }, { "epoch": 57.39643268124281, "grad_norm": 1.1762728691101074, "learning_rate": 0.0008520713463751439, "loss": 0.3727, "step": 199510 }, { "epoch": 57.39930955120828, "grad_norm": 1.2166839838027954, "learning_rate": 0.0008520138089758343, "loss": 0.5251, "step": 199520 }, { "epoch": 57.40218642117377, "grad_norm": 1.372281551361084, "learning_rate": 0.0008519562715765248, "loss": 0.5451, "step": 199530 }, { "epoch": 57.40506329113924, "grad_norm": 1.4654864072799683, "learning_rate": 0.0008518987341772152, "loss": 0.49, "step": 199540 }, { "epoch": 57.40794016110472, "grad_norm": 1.0457046031951904, "learning_rate": 0.0008518411967779056, "loss": 0.492, "step": 199550 }, { "epoch": 57.410817031070195, "grad_norm": 0.8999008536338806, "learning_rate": 0.0008517836593785962, "loss": 0.5169, "step": 199560 }, { "epoch": 57.41369390103567, "grad_norm": 1.6092042922973633, "learning_rate": 0.0008517261219792866, "loss": 0.5416, "step": 199570 }, { "epoch": 57.41657077100115, "grad_norm": 2.090712785720825, "learning_rate": 0.000851668584579977, "loss": 0.426, "step": 199580 }, { "epoch": 57.41944764096663, "grad_norm": 0.8514578342437744, "learning_rate": 0.0008516110471806674, "loss": 0.4444, "step": 199590 }, { "epoch": 57.42232451093211, "grad_norm": 1.9002777338027954, "learning_rate": 0.000851553509781358, "loss": 0.4759, "step": 199600 }, { "epoch": 57.42520138089758, "grad_norm": 1.00962495803833, "learning_rate": 0.0008514959723820483, "loss": 0.4776, "step": 199610 }, { "epoch": 57.42807825086306, "grad_norm": 2.6616203784942627, "learning_rate": 0.0008514384349827388, "loss": 0.4371, "step": 199620 }, { "epoch": 57.430955120828536, "grad_norm": 1.4570775032043457, "learning_rate": 0.0008513808975834293, "loss": 0.4969, "step": 199630 }, { "epoch": 57.43383199079402, "grad_norm": 0.8733295202255249, "learning_rate": 0.0008513233601841197, "loss": 0.4703, "step": 199640 }, { "epoch": 57.436708860759495, "grad_norm": 0.6810780763626099, "learning_rate": 0.0008512658227848101, "loss": 0.4309, "step": 199650 }, { "epoch": 57.43958573072497, "grad_norm": 2.246159791946411, "learning_rate": 0.0008512082853855005, "loss": 0.4101, "step": 199660 }, { "epoch": 57.44246260069045, "grad_norm": 1.298418402671814, "learning_rate": 0.0008511507479861911, "loss": 0.4927, "step": 199670 }, { "epoch": 57.445339470655924, "grad_norm": 0.8300815224647522, "learning_rate": 0.0008510932105868815, "loss": 0.4567, "step": 199680 }, { "epoch": 57.44821634062141, "grad_norm": 1.523482084274292, "learning_rate": 0.0008510356731875719, "loss": 0.4129, "step": 199690 }, { "epoch": 57.45109321058688, "grad_norm": 1.0219656229019165, "learning_rate": 0.0008509781357882624, "loss": 0.4169, "step": 199700 }, { "epoch": 57.45397008055236, "grad_norm": 1.0924556255340576, "learning_rate": 0.0008509205983889529, "loss": 0.4885, "step": 199710 }, { "epoch": 57.456846950517836, "grad_norm": 1.5585004091262817, "learning_rate": 0.0008508630609896432, "loss": 0.527, "step": 199720 }, { "epoch": 57.45972382048331, "grad_norm": 1.0647786855697632, "learning_rate": 0.0008508055235903337, "loss": 0.5097, "step": 199730 }, { "epoch": 57.462600690448795, "grad_norm": 1.0458718538284302, "learning_rate": 0.0008507479861910242, "loss": 0.5137, "step": 199740 }, { "epoch": 57.46547756041427, "grad_norm": 1.4639708995819092, "learning_rate": 0.0008506904487917146, "loss": 0.5373, "step": 199750 }, { "epoch": 57.46835443037975, "grad_norm": 1.9228336811065674, "learning_rate": 0.000850632911392405, "loss": 0.5511, "step": 199760 }, { "epoch": 57.471231300345224, "grad_norm": 1.135595679283142, "learning_rate": 0.0008505753739930955, "loss": 0.4744, "step": 199770 }, { "epoch": 57.4741081703107, "grad_norm": 1.6241044998168945, "learning_rate": 0.000850517836593786, "loss": 0.5742, "step": 199780 }, { "epoch": 57.476985040276176, "grad_norm": 3.325556993484497, "learning_rate": 0.0008504602991944764, "loss": 0.5456, "step": 199790 }, { "epoch": 57.47986191024166, "grad_norm": 1.355785846710205, "learning_rate": 0.0008504027617951669, "loss": 0.4578, "step": 199800 }, { "epoch": 57.482738780207136, "grad_norm": 0.9957934021949768, "learning_rate": 0.0008503452243958573, "loss": 0.545, "step": 199810 }, { "epoch": 57.48561565017261, "grad_norm": 1.3100805282592773, "learning_rate": 0.0008502876869965478, "loss": 0.5719, "step": 199820 }, { "epoch": 57.48849252013809, "grad_norm": 1.0887712240219116, "learning_rate": 0.0008502301495972382, "loss": 0.4422, "step": 199830 }, { "epoch": 57.491369390103564, "grad_norm": 2.0683605670928955, "learning_rate": 0.0008501726121979286, "loss": 0.4944, "step": 199840 }, { "epoch": 57.49424626006905, "grad_norm": 1.7088481187820435, "learning_rate": 0.0008501150747986192, "loss": 0.4292, "step": 199850 }, { "epoch": 57.497123130034524, "grad_norm": 0.965003490447998, "learning_rate": 0.0008500575373993096, "loss": 0.4134, "step": 199860 }, { "epoch": 57.5, "grad_norm": 2.2986278533935547, "learning_rate": 0.00085, "loss": 0.4755, "step": 199870 }, { "epoch": 57.502876869965476, "grad_norm": 1.2496825456619263, "learning_rate": 0.0008499424626006904, "loss": 0.4355, "step": 199880 }, { "epoch": 57.50575373993095, "grad_norm": 1.7133904695510864, "learning_rate": 0.000849884925201381, "loss": 0.4643, "step": 199890 }, { "epoch": 57.508630609896436, "grad_norm": 1.7428226470947266, "learning_rate": 0.0008498273878020713, "loss": 0.4413, "step": 199900 }, { "epoch": 57.51150747986191, "grad_norm": 1.0316555500030518, "learning_rate": 0.0008497698504027618, "loss": 0.4426, "step": 199910 }, { "epoch": 57.51438434982739, "grad_norm": 1.2745829820632935, "learning_rate": 0.0008497123130034523, "loss": 0.5912, "step": 199920 }, { "epoch": 57.517261219792864, "grad_norm": 1.0862082242965698, "learning_rate": 0.0008496547756041427, "loss": 0.4755, "step": 199930 }, { "epoch": 57.52013808975834, "grad_norm": 0.6717174053192139, "learning_rate": 0.0008495972382048331, "loss": 0.3805, "step": 199940 }, { "epoch": 57.523014959723824, "grad_norm": 2.6340548992156982, "learning_rate": 0.0008495397008055236, "loss": 0.4895, "step": 199950 }, { "epoch": 57.5258918296893, "grad_norm": 1.1989483833312988, "learning_rate": 0.000849482163406214, "loss": 0.4297, "step": 199960 }, { "epoch": 57.528768699654776, "grad_norm": 1.4299238920211792, "learning_rate": 0.0008494246260069045, "loss": 0.5512, "step": 199970 }, { "epoch": 57.53164556962025, "grad_norm": 2.0356619358062744, "learning_rate": 0.000849367088607595, "loss": 0.49, "step": 199980 }, { "epoch": 57.53452243958573, "grad_norm": 0.9211841225624084, "learning_rate": 0.0008493095512082854, "loss": 0.5003, "step": 199990 }, { "epoch": 57.537399309551205, "grad_norm": 1.1928329467773438, "learning_rate": 0.0008492520138089759, "loss": 0.599, "step": 200000 }, { "epoch": 57.54027617951669, "grad_norm": 1.0762799978256226, "learning_rate": 0.0008491944764096663, "loss": 0.5044, "step": 200010 }, { "epoch": 57.543153049482164, "grad_norm": 1.4648290872573853, "learning_rate": 0.0008491369390103567, "loss": 0.6058, "step": 200020 }, { "epoch": 57.54602991944764, "grad_norm": 1.0536922216415405, "learning_rate": 0.0008490794016110472, "loss": 0.5807, "step": 200030 }, { "epoch": 57.54890678941312, "grad_norm": 1.3325519561767578, "learning_rate": 0.0008490218642117377, "loss": 0.4265, "step": 200040 }, { "epoch": 57.55178365937859, "grad_norm": 1.3529958724975586, "learning_rate": 0.000848964326812428, "loss": 0.4364, "step": 200050 }, { "epoch": 57.554660529344076, "grad_norm": 0.7524682879447937, "learning_rate": 0.0008489067894131185, "loss": 0.3183, "step": 200060 }, { "epoch": 57.55753739930955, "grad_norm": 1.077764868736267, "learning_rate": 0.0008488492520138091, "loss": 0.5007, "step": 200070 }, { "epoch": 57.56041426927503, "grad_norm": 1.5402131080627441, "learning_rate": 0.0008487917146144994, "loss": 0.5811, "step": 200080 }, { "epoch": 57.563291139240505, "grad_norm": 1.4832779169082642, "learning_rate": 0.0008487341772151899, "loss": 0.5453, "step": 200090 }, { "epoch": 57.56616800920598, "grad_norm": 0.8138609528541565, "learning_rate": 0.0008486766398158804, "loss": 0.4595, "step": 200100 }, { "epoch": 57.569044879171464, "grad_norm": 1.156891942024231, "learning_rate": 0.0008486191024165708, "loss": 0.4273, "step": 200110 }, { "epoch": 57.57192174913694, "grad_norm": 1.2061251401901245, "learning_rate": 0.0008485615650172612, "loss": 0.4601, "step": 200120 }, { "epoch": 57.57479861910242, "grad_norm": 1.3334764242172241, "learning_rate": 0.0008485040276179517, "loss": 0.4656, "step": 200130 }, { "epoch": 57.57767548906789, "grad_norm": 1.0553497076034546, "learning_rate": 0.0008484464902186421, "loss": 0.6017, "step": 200140 }, { "epoch": 57.58055235903337, "grad_norm": 1.234187126159668, "learning_rate": 0.0008483889528193326, "loss": 0.4843, "step": 200150 }, { "epoch": 57.58342922899885, "grad_norm": 1.2435474395751953, "learning_rate": 0.000848331415420023, "loss": 0.5137, "step": 200160 }, { "epoch": 57.58630609896433, "grad_norm": 1.4543086290359497, "learning_rate": 0.0008482738780207134, "loss": 0.6057, "step": 200170 }, { "epoch": 57.589182968929805, "grad_norm": 0.9052022099494934, "learning_rate": 0.000848216340621404, "loss": 0.4648, "step": 200180 }, { "epoch": 57.59205983889528, "grad_norm": 0.8248547911643982, "learning_rate": 0.0008481588032220944, "loss": 0.574, "step": 200190 }, { "epoch": 57.59493670886076, "grad_norm": 1.8402653932571411, "learning_rate": 0.0008481012658227848, "loss": 0.6585, "step": 200200 }, { "epoch": 57.59781357882623, "grad_norm": 1.3928793668746948, "learning_rate": 0.0008480437284234753, "loss": 0.4882, "step": 200210 }, { "epoch": 57.60069044879172, "grad_norm": 1.3348177671432495, "learning_rate": 0.0008479861910241658, "loss": 0.5504, "step": 200220 }, { "epoch": 57.60356731875719, "grad_norm": 0.6679501533508301, "learning_rate": 0.0008479286536248561, "loss": 0.3513, "step": 200230 }, { "epoch": 57.60644418872267, "grad_norm": 1.5924022197723389, "learning_rate": 0.0008478711162255466, "loss": 0.4243, "step": 200240 }, { "epoch": 57.609321058688145, "grad_norm": 2.204817533493042, "learning_rate": 0.0008478135788262372, "loss": 0.5144, "step": 200250 }, { "epoch": 57.61219792865362, "grad_norm": 0.7243925929069519, "learning_rate": 0.0008477560414269275, "loss": 0.4968, "step": 200260 }, { "epoch": 57.615074798619105, "grad_norm": 1.0390607118606567, "learning_rate": 0.000847698504027618, "loss": 0.5376, "step": 200270 }, { "epoch": 57.61795166858458, "grad_norm": 2.0438222885131836, "learning_rate": 0.0008476409666283084, "loss": 0.4814, "step": 200280 }, { "epoch": 57.62082853855006, "grad_norm": 0.845994770526886, "learning_rate": 0.0008475834292289989, "loss": 0.528, "step": 200290 }, { "epoch": 57.623705408515534, "grad_norm": 1.1037406921386719, "learning_rate": 0.0008475258918296893, "loss": 0.4155, "step": 200300 }, { "epoch": 57.62658227848101, "grad_norm": 1.3689876794815063, "learning_rate": 0.0008474683544303798, "loss": 0.5103, "step": 200310 }, { "epoch": 57.62945914844649, "grad_norm": 0.8299627900123596, "learning_rate": 0.0008474108170310702, "loss": 0.4085, "step": 200320 }, { "epoch": 57.63233601841197, "grad_norm": 1.0828783512115479, "learning_rate": 0.0008473532796317607, "loss": 0.474, "step": 200330 }, { "epoch": 57.635212888377445, "grad_norm": 1.7190264463424683, "learning_rate": 0.0008472957422324511, "loss": 0.4504, "step": 200340 }, { "epoch": 57.63808975834292, "grad_norm": 0.6723747849464417, "learning_rate": 0.0008472382048331415, "loss": 0.4957, "step": 200350 }, { "epoch": 57.6409666283084, "grad_norm": 0.9977958798408508, "learning_rate": 0.0008471806674338321, "loss": 0.406, "step": 200360 }, { "epoch": 57.64384349827388, "grad_norm": 1.0181957483291626, "learning_rate": 0.0008471231300345225, "loss": 0.5202, "step": 200370 }, { "epoch": 57.64672036823936, "grad_norm": 1.5040582418441772, "learning_rate": 0.0008470655926352129, "loss": 0.555, "step": 200380 }, { "epoch": 57.649597238204834, "grad_norm": 0.6696551442146301, "learning_rate": 0.0008470080552359034, "loss": 0.4281, "step": 200390 }, { "epoch": 57.65247410817031, "grad_norm": 0.99814772605896, "learning_rate": 0.0008469505178365939, "loss": 0.5474, "step": 200400 }, { "epoch": 57.655350978135786, "grad_norm": 1.238168716430664, "learning_rate": 0.0008468929804372842, "loss": 0.5086, "step": 200410 }, { "epoch": 57.65822784810126, "grad_norm": 1.5858166217803955, "learning_rate": 0.0008468354430379747, "loss": 0.7201, "step": 200420 }, { "epoch": 57.661104718066746, "grad_norm": 1.3544425964355469, "learning_rate": 0.0008467779056386652, "loss": 0.4323, "step": 200430 }, { "epoch": 57.66398158803222, "grad_norm": 1.072310209274292, "learning_rate": 0.0008467203682393556, "loss": 0.4364, "step": 200440 }, { "epoch": 57.6668584579977, "grad_norm": 2.223219156265259, "learning_rate": 0.000846662830840046, "loss": 0.5359, "step": 200450 }, { "epoch": 57.669735327963174, "grad_norm": 0.7331924438476562, "learning_rate": 0.0008466052934407364, "loss": 0.5616, "step": 200460 }, { "epoch": 57.67261219792865, "grad_norm": 2.8404715061187744, "learning_rate": 0.000846547756041427, "loss": 0.6393, "step": 200470 }, { "epoch": 57.675489067894134, "grad_norm": 1.5594432353973389, "learning_rate": 0.0008464902186421174, "loss": 0.5344, "step": 200480 }, { "epoch": 57.67836593785961, "grad_norm": 1.2797576189041138, "learning_rate": 0.0008464326812428078, "loss": 0.6273, "step": 200490 }, { "epoch": 57.681242807825086, "grad_norm": 0.7969018816947937, "learning_rate": 0.0008463751438434983, "loss": 0.501, "step": 200500 }, { "epoch": 57.68411967779056, "grad_norm": 0.9810711145401001, "learning_rate": 0.0008463176064441888, "loss": 0.5736, "step": 200510 }, { "epoch": 57.68699654775604, "grad_norm": 1.28366219997406, "learning_rate": 0.0008462600690448791, "loss": 0.5369, "step": 200520 }, { "epoch": 57.68987341772152, "grad_norm": 1.1823104619979858, "learning_rate": 0.0008462025316455696, "loss": 0.5558, "step": 200530 }, { "epoch": 57.692750287687, "grad_norm": 1.0513685941696167, "learning_rate": 0.0008461449942462601, "loss": 0.527, "step": 200540 }, { "epoch": 57.695627157652474, "grad_norm": 1.7260093688964844, "learning_rate": 0.0008460874568469505, "loss": 0.5589, "step": 200550 }, { "epoch": 57.69850402761795, "grad_norm": 1.507648229598999, "learning_rate": 0.000846029919447641, "loss": 0.6143, "step": 200560 }, { "epoch": 57.70138089758343, "grad_norm": 0.7125827074050903, "learning_rate": 0.0008459723820483314, "loss": 0.5123, "step": 200570 }, { "epoch": 57.70425776754891, "grad_norm": 0.9184088110923767, "learning_rate": 0.0008459148446490219, "loss": 0.5034, "step": 200580 }, { "epoch": 57.707134637514386, "grad_norm": 1.7109553813934326, "learning_rate": 0.0008458573072497123, "loss": 0.5109, "step": 200590 }, { "epoch": 57.71001150747986, "grad_norm": 1.5655730962753296, "learning_rate": 0.0008457997698504028, "loss": 0.4696, "step": 200600 }, { "epoch": 57.71288837744534, "grad_norm": 1.0959150791168213, "learning_rate": 0.0008457422324510932, "loss": 0.3791, "step": 200610 }, { "epoch": 57.715765247410815, "grad_norm": 1.8806136846542358, "learning_rate": 0.0008456846950517837, "loss": 0.611, "step": 200620 }, { "epoch": 57.7186421173763, "grad_norm": 0.9125579595565796, "learning_rate": 0.0008456271576524741, "loss": 0.6119, "step": 200630 }, { "epoch": 57.721518987341774, "grad_norm": 0.8102630972862244, "learning_rate": 0.0008455696202531645, "loss": 0.5365, "step": 200640 }, { "epoch": 57.72439585730725, "grad_norm": 0.7630280256271362, "learning_rate": 0.000845512082853855, "loss": 0.4804, "step": 200650 }, { "epoch": 57.72727272727273, "grad_norm": 1.3773478269577026, "learning_rate": 0.0008454545454545455, "loss": 0.399, "step": 200660 }, { "epoch": 57.7301495972382, "grad_norm": 0.749296247959137, "learning_rate": 0.0008453970080552359, "loss": 0.4821, "step": 200670 }, { "epoch": 57.73302646720368, "grad_norm": 1.3367671966552734, "learning_rate": 0.0008453394706559264, "loss": 0.4555, "step": 200680 }, { "epoch": 57.73590333716916, "grad_norm": 1.0347237586975098, "learning_rate": 0.0008452819332566169, "loss": 0.4661, "step": 200690 }, { "epoch": 57.73878020713464, "grad_norm": 2.87953519821167, "learning_rate": 0.0008452243958573072, "loss": 0.5315, "step": 200700 }, { "epoch": 57.741657077100115, "grad_norm": 0.8146804571151733, "learning_rate": 0.0008451668584579977, "loss": 0.5178, "step": 200710 }, { "epoch": 57.74453394706559, "grad_norm": 1.6643685102462769, "learning_rate": 0.0008451093210586882, "loss": 0.5514, "step": 200720 }, { "epoch": 57.74741081703107, "grad_norm": 1.0503249168395996, "learning_rate": 0.0008450517836593786, "loss": 0.4032, "step": 200730 }, { "epoch": 57.75028768699655, "grad_norm": 1.4515516757965088, "learning_rate": 0.000844994246260069, "loss": 0.5555, "step": 200740 }, { "epoch": 57.75316455696203, "grad_norm": 1.2718391418457031, "learning_rate": 0.0008449367088607595, "loss": 0.4707, "step": 200750 }, { "epoch": 57.7560414269275, "grad_norm": 0.6911075115203857, "learning_rate": 0.00084487917146145, "loss": 0.4547, "step": 200760 }, { "epoch": 57.75891829689298, "grad_norm": 1.1162660121917725, "learning_rate": 0.0008448216340621404, "loss": 0.4817, "step": 200770 }, { "epoch": 57.761795166858455, "grad_norm": 0.6537383794784546, "learning_rate": 0.0008447640966628309, "loss": 0.5008, "step": 200780 }, { "epoch": 57.76467203682394, "grad_norm": 1.3812735080718994, "learning_rate": 0.0008447065592635213, "loss": 0.4855, "step": 200790 }, { "epoch": 57.767548906789415, "grad_norm": 2.1242196559906006, "learning_rate": 0.0008446490218642118, "loss": 0.5804, "step": 200800 }, { "epoch": 57.77042577675489, "grad_norm": 0.9549672603607178, "learning_rate": 0.0008445914844649022, "loss": 0.4308, "step": 200810 }, { "epoch": 57.77330264672037, "grad_norm": 1.2754652500152588, "learning_rate": 0.0008445339470655926, "loss": 0.4858, "step": 200820 }, { "epoch": 57.77617951668584, "grad_norm": 1.9977091550827026, "learning_rate": 0.0008444764096662831, "loss": 0.6132, "step": 200830 }, { "epoch": 57.77905638665133, "grad_norm": 1.2483822107315063, "learning_rate": 0.0008444188722669736, "loss": 0.5198, "step": 200840 }, { "epoch": 57.7819332566168, "grad_norm": 0.8487493395805359, "learning_rate": 0.0008443613348676639, "loss": 0.3943, "step": 200850 }, { "epoch": 57.78481012658228, "grad_norm": 2.2871453762054443, "learning_rate": 0.0008443037974683544, "loss": 0.5366, "step": 200860 }, { "epoch": 57.787686996547755, "grad_norm": 1.135394811630249, "learning_rate": 0.000844246260069045, "loss": 0.4445, "step": 200870 }, { "epoch": 57.79056386651323, "grad_norm": 2.522660732269287, "learning_rate": 0.0008441887226697353, "loss": 0.7089, "step": 200880 }, { "epoch": 57.79344073647871, "grad_norm": 1.487644076347351, "learning_rate": 0.0008441311852704258, "loss": 0.5945, "step": 200890 }, { "epoch": 57.79631760644419, "grad_norm": 1.2369822263717651, "learning_rate": 0.0008440736478711163, "loss": 0.5898, "step": 200900 }, { "epoch": 57.79919447640967, "grad_norm": 1.7954814434051514, "learning_rate": 0.0008440161104718067, "loss": 0.59, "step": 200910 }, { "epoch": 57.80207134637514, "grad_norm": 1.8865739107131958, "learning_rate": 0.0008439585730724971, "loss": 0.5055, "step": 200920 }, { "epoch": 57.80494821634062, "grad_norm": 1.3932236433029175, "learning_rate": 0.0008439010356731876, "loss": 0.5615, "step": 200930 }, { "epoch": 57.807825086306096, "grad_norm": 0.6950613856315613, "learning_rate": 0.000843843498273878, "loss": 0.4871, "step": 200940 }, { "epoch": 57.81070195627158, "grad_norm": 1.8372665643692017, "learning_rate": 0.0008437859608745685, "loss": 0.5105, "step": 200950 }, { "epoch": 57.813578826237055, "grad_norm": 0.9110214114189148, "learning_rate": 0.000843728423475259, "loss": 0.4975, "step": 200960 }, { "epoch": 57.81645569620253, "grad_norm": 0.7729853987693787, "learning_rate": 0.0008436708860759494, "loss": 0.481, "step": 200970 }, { "epoch": 57.81933256616801, "grad_norm": 1.3893702030181885, "learning_rate": 0.0008436133486766399, "loss": 0.4944, "step": 200980 }, { "epoch": 57.822209436133484, "grad_norm": 1.5148608684539795, "learning_rate": 0.0008435558112773303, "loss": 0.3729, "step": 200990 }, { "epoch": 57.82508630609897, "grad_norm": 1.1095151901245117, "learning_rate": 0.0008434982738780207, "loss": 0.5581, "step": 201000 }, { "epoch": 57.82796317606444, "grad_norm": 1.189021348953247, "learning_rate": 0.0008434407364787112, "loss": 0.5053, "step": 201010 }, { "epoch": 57.83084004602992, "grad_norm": 1.3269816637039185, "learning_rate": 0.0008433831990794017, "loss": 0.4895, "step": 201020 }, { "epoch": 57.833716915995396, "grad_norm": 1.5575095415115356, "learning_rate": 0.000843325661680092, "loss": 0.4383, "step": 201030 }, { "epoch": 57.83659378596087, "grad_norm": 1.6643544435501099, "learning_rate": 0.0008432681242807825, "loss": 0.4735, "step": 201040 }, { "epoch": 57.839470655926355, "grad_norm": 0.8292138576507568, "learning_rate": 0.000843210586881473, "loss": 0.7278, "step": 201050 }, { "epoch": 57.84234752589183, "grad_norm": 2.2594847679138184, "learning_rate": 0.0008431530494821634, "loss": 0.5214, "step": 201060 }, { "epoch": 57.84522439585731, "grad_norm": 2.0521490573883057, "learning_rate": 0.0008430955120828539, "loss": 0.5767, "step": 201070 }, { "epoch": 57.848101265822784, "grad_norm": 1.4730998277664185, "learning_rate": 0.0008430379746835444, "loss": 0.536, "step": 201080 }, { "epoch": 57.85097813578826, "grad_norm": 1.2464768886566162, "learning_rate": 0.0008429804372842348, "loss": 0.4951, "step": 201090 }, { "epoch": 57.85385500575374, "grad_norm": 1.4281364679336548, "learning_rate": 0.0008429228998849252, "loss": 0.5501, "step": 201100 }, { "epoch": 57.85673187571922, "grad_norm": 2.278853416442871, "learning_rate": 0.0008428653624856157, "loss": 0.5582, "step": 201110 }, { "epoch": 57.859608745684696, "grad_norm": 0.7715792655944824, "learning_rate": 0.0008428078250863061, "loss": 0.4499, "step": 201120 }, { "epoch": 57.86248561565017, "grad_norm": 1.3063685894012451, "learning_rate": 0.0008427502876869966, "loss": 0.5438, "step": 201130 }, { "epoch": 57.86536248561565, "grad_norm": 0.8868860602378845, "learning_rate": 0.000842692750287687, "loss": 0.4222, "step": 201140 }, { "epoch": 57.868239355581125, "grad_norm": 1.2968382835388184, "learning_rate": 0.0008426352128883774, "loss": 0.3981, "step": 201150 }, { "epoch": 57.87111622554661, "grad_norm": 0.8927236795425415, "learning_rate": 0.000842577675489068, "loss": 0.5347, "step": 201160 }, { "epoch": 57.873993095512084, "grad_norm": 2.58889102935791, "learning_rate": 0.0008425201380897584, "loss": 0.712, "step": 201170 }, { "epoch": 57.87686996547756, "grad_norm": 2.0925519466400146, "learning_rate": 0.0008424626006904488, "loss": 0.5817, "step": 201180 }, { "epoch": 57.879746835443036, "grad_norm": 2.214622735977173, "learning_rate": 0.0008424050632911393, "loss": 0.4726, "step": 201190 }, { "epoch": 57.88262370540851, "grad_norm": 1.5361368656158447, "learning_rate": 0.0008423475258918298, "loss": 0.4875, "step": 201200 }, { "epoch": 57.885500575373996, "grad_norm": 1.133945345878601, "learning_rate": 0.0008422899884925201, "loss": 0.4424, "step": 201210 }, { "epoch": 57.88837744533947, "grad_norm": 1.5500328540802002, "learning_rate": 0.0008422324510932106, "loss": 0.4715, "step": 201220 }, { "epoch": 57.89125431530495, "grad_norm": 0.8492522239685059, "learning_rate": 0.0008421749136939011, "loss": 0.4667, "step": 201230 }, { "epoch": 57.894131185270425, "grad_norm": 1.3247584104537964, "learning_rate": 0.0008421173762945915, "loss": 0.5713, "step": 201240 }, { "epoch": 57.8970080552359, "grad_norm": 1.7169408798217773, "learning_rate": 0.0008420598388952819, "loss": 0.6182, "step": 201250 }, { "epoch": 57.899884925201384, "grad_norm": 1.109401822090149, "learning_rate": 0.0008420023014959723, "loss": 0.5258, "step": 201260 }, { "epoch": 57.90276179516686, "grad_norm": 0.9724928140640259, "learning_rate": 0.0008419447640966629, "loss": 0.5561, "step": 201270 }, { "epoch": 57.90563866513234, "grad_norm": 1.6587824821472168, "learning_rate": 0.0008418872266973533, "loss": 0.562, "step": 201280 }, { "epoch": 57.90851553509781, "grad_norm": 1.2672322988510132, "learning_rate": 0.0008418296892980437, "loss": 0.4958, "step": 201290 }, { "epoch": 57.91139240506329, "grad_norm": 1.1178539991378784, "learning_rate": 0.0008417721518987342, "loss": 0.7088, "step": 201300 }, { "epoch": 57.91426927502877, "grad_norm": 1.609961748123169, "learning_rate": 0.0008417146144994247, "loss": 0.5116, "step": 201310 }, { "epoch": 57.91714614499425, "grad_norm": 1.4233072996139526, "learning_rate": 0.000841657077100115, "loss": 0.5151, "step": 201320 }, { "epoch": 57.920023014959725, "grad_norm": 0.9966496229171753, "learning_rate": 0.0008415995397008055, "loss": 0.5544, "step": 201330 }, { "epoch": 57.9228998849252, "grad_norm": 0.9093314409255981, "learning_rate": 0.000841542002301496, "loss": 0.5081, "step": 201340 }, { "epoch": 57.92577675489068, "grad_norm": 1.696485996246338, "learning_rate": 0.0008414844649021864, "loss": 0.6204, "step": 201350 }, { "epoch": 57.92865362485615, "grad_norm": 1.730737328529358, "learning_rate": 0.0008414269275028768, "loss": 0.5153, "step": 201360 }, { "epoch": 57.93153049482164, "grad_norm": 1.2779804468154907, "learning_rate": 0.0008413693901035674, "loss": 0.4466, "step": 201370 }, { "epoch": 57.93440736478711, "grad_norm": 1.2945005893707275, "learning_rate": 0.0008413118527042578, "loss": 0.5, "step": 201380 }, { "epoch": 57.93728423475259, "grad_norm": 0.7907283902168274, "learning_rate": 0.0008412543153049482, "loss": 0.4397, "step": 201390 }, { "epoch": 57.940161104718065, "grad_norm": 0.9959597587585449, "learning_rate": 0.0008411967779056387, "loss": 0.6075, "step": 201400 }, { "epoch": 57.94303797468354, "grad_norm": 0.8982173204421997, "learning_rate": 0.0008411392405063291, "loss": 0.4653, "step": 201410 }, { "epoch": 57.945914844649025, "grad_norm": 0.9386853575706482, "learning_rate": 0.0008410817031070196, "loss": 0.4633, "step": 201420 }, { "epoch": 57.9487917146145, "grad_norm": 1.4174315929412842, "learning_rate": 0.00084102416570771, "loss": 0.451, "step": 201430 }, { "epoch": 57.95166858457998, "grad_norm": 0.7802709341049194, "learning_rate": 0.0008409666283084004, "loss": 0.5012, "step": 201440 }, { "epoch": 57.95454545454545, "grad_norm": 1.0092544555664062, "learning_rate": 0.000840909090909091, "loss": 0.5418, "step": 201450 }, { "epoch": 57.95742232451093, "grad_norm": 1.4226752519607544, "learning_rate": 0.0008408515535097814, "loss": 0.4166, "step": 201460 }, { "epoch": 57.96029919447641, "grad_norm": 1.977702021598816, "learning_rate": 0.0008407940161104717, "loss": 0.5905, "step": 201470 }, { "epoch": 57.96317606444189, "grad_norm": 0.6177926063537598, "learning_rate": 0.0008407364787111623, "loss": 0.5312, "step": 201480 }, { "epoch": 57.966052934407365, "grad_norm": 1.4939630031585693, "learning_rate": 0.0008406789413118528, "loss": 0.5031, "step": 201490 }, { "epoch": 57.96892980437284, "grad_norm": 1.337476134300232, "learning_rate": 0.0008406214039125431, "loss": 0.4829, "step": 201500 }, { "epoch": 57.97180667433832, "grad_norm": 1.1896637678146362, "learning_rate": 0.0008405638665132336, "loss": 0.4409, "step": 201510 }, { "epoch": 57.9746835443038, "grad_norm": 0.6081786155700684, "learning_rate": 0.0008405063291139241, "loss": 0.4928, "step": 201520 }, { "epoch": 57.97756041426928, "grad_norm": 2.170994758605957, "learning_rate": 0.0008404487917146145, "loss": 0.5301, "step": 201530 }, { "epoch": 57.98043728423475, "grad_norm": 1.08425772190094, "learning_rate": 0.0008403912543153049, "loss": 0.4875, "step": 201540 }, { "epoch": 57.98331415420023, "grad_norm": 0.8921001553535461, "learning_rate": 0.0008403337169159954, "loss": 0.5113, "step": 201550 }, { "epoch": 57.986191024165706, "grad_norm": 1.1478183269500732, "learning_rate": 0.0008402761795166858, "loss": 0.4874, "step": 201560 }, { "epoch": 57.98906789413118, "grad_norm": 1.2025269269943237, "learning_rate": 0.0008402186421173763, "loss": 0.4747, "step": 201570 }, { "epoch": 57.991944764096665, "grad_norm": 1.4717538356781006, "learning_rate": 0.0008401611047180668, "loss": 0.5185, "step": 201580 }, { "epoch": 57.99482163406214, "grad_norm": 1.4243121147155762, "learning_rate": 0.0008401035673187572, "loss": 0.4989, "step": 201590 }, { "epoch": 57.99769850402762, "grad_norm": 1.5107994079589844, "learning_rate": 0.0008400460299194477, "loss": 0.5825, "step": 201600 }, { "epoch": 58.000575373993094, "grad_norm": 1.7551370859146118, "learning_rate": 0.0008399884925201381, "loss": 0.6081, "step": 201610 }, { "epoch": 58.00345224395857, "grad_norm": 0.690526008605957, "learning_rate": 0.0008399309551208285, "loss": 0.4425, "step": 201620 }, { "epoch": 58.00632911392405, "grad_norm": 1.6897175312042236, "learning_rate": 0.000839873417721519, "loss": 0.4585, "step": 201630 }, { "epoch": 58.00920598388953, "grad_norm": 1.1470649242401123, "learning_rate": 0.0008398158803222095, "loss": 0.4259, "step": 201640 }, { "epoch": 58.012082853855006, "grad_norm": 0.9882254600524902, "learning_rate": 0.0008397583429228998, "loss": 0.5506, "step": 201650 }, { "epoch": 58.01495972382048, "grad_norm": 0.7172695398330688, "learning_rate": 0.0008397008055235904, "loss": 0.4721, "step": 201660 }, { "epoch": 58.01783659378596, "grad_norm": 1.4073655605316162, "learning_rate": 0.0008396432681242809, "loss": 0.5226, "step": 201670 }, { "epoch": 58.02071346375144, "grad_norm": 0.6963782906532288, "learning_rate": 0.0008395857307249712, "loss": 0.5242, "step": 201680 }, { "epoch": 58.02359033371692, "grad_norm": 1.7233977317810059, "learning_rate": 0.0008395281933256617, "loss": 0.4898, "step": 201690 }, { "epoch": 58.026467203682394, "grad_norm": 0.8189466595649719, "learning_rate": 0.0008394706559263522, "loss": 0.4298, "step": 201700 }, { "epoch": 58.02934407364787, "grad_norm": 1.2927634716033936, "learning_rate": 0.0008394131185270426, "loss": 0.4463, "step": 201710 }, { "epoch": 58.032220943613346, "grad_norm": 0.8486729264259338, "learning_rate": 0.000839355581127733, "loss": 0.46, "step": 201720 }, { "epoch": 58.03509781357883, "grad_norm": 1.1142750978469849, "learning_rate": 0.0008392980437284235, "loss": 0.4536, "step": 201730 }, { "epoch": 58.037974683544306, "grad_norm": 1.0066969394683838, "learning_rate": 0.0008392405063291139, "loss": 0.4579, "step": 201740 }, { "epoch": 58.04085155350978, "grad_norm": 1.0236200094223022, "learning_rate": 0.0008391829689298044, "loss": 0.3974, "step": 201750 }, { "epoch": 58.04372842347526, "grad_norm": 1.117002010345459, "learning_rate": 0.0008391254315304949, "loss": 0.5479, "step": 201760 }, { "epoch": 58.046605293440734, "grad_norm": 1.0072274208068848, "learning_rate": 0.0008390678941311853, "loss": 0.3895, "step": 201770 }, { "epoch": 58.04948216340621, "grad_norm": 0.8602511882781982, "learning_rate": 0.0008390103567318758, "loss": 0.475, "step": 201780 }, { "epoch": 58.052359033371694, "grad_norm": 0.737062931060791, "learning_rate": 0.0008389528193325662, "loss": 0.5189, "step": 201790 }, { "epoch": 58.05523590333717, "grad_norm": 1.8472001552581787, "learning_rate": 0.0008388952819332566, "loss": 0.5322, "step": 201800 }, { "epoch": 58.058112773302646, "grad_norm": 1.1609143018722534, "learning_rate": 0.0008388377445339471, "loss": 0.5798, "step": 201810 }, { "epoch": 58.06098964326812, "grad_norm": 0.8684885501861572, "learning_rate": 0.0008387802071346376, "loss": 0.4541, "step": 201820 }, { "epoch": 58.0638665132336, "grad_norm": 0.8366134762763977, "learning_rate": 0.0008387226697353279, "loss": 0.3862, "step": 201830 }, { "epoch": 58.06674338319908, "grad_norm": 0.8650946021080017, "learning_rate": 0.0008386651323360184, "loss": 0.4901, "step": 201840 }, { "epoch": 58.06962025316456, "grad_norm": 1.7235966920852661, "learning_rate": 0.000838607594936709, "loss": 0.5034, "step": 201850 }, { "epoch": 58.072497123130034, "grad_norm": 1.5197556018829346, "learning_rate": 0.0008385500575373993, "loss": 0.4233, "step": 201860 }, { "epoch": 58.07537399309551, "grad_norm": 1.0227127075195312, "learning_rate": 0.0008384925201380898, "loss": 0.4285, "step": 201870 }, { "epoch": 58.07825086306099, "grad_norm": 2.128694772720337, "learning_rate": 0.0008384349827387803, "loss": 0.502, "step": 201880 }, { "epoch": 58.08112773302647, "grad_norm": 1.4567840099334717, "learning_rate": 0.0008383774453394707, "loss": 0.479, "step": 201890 }, { "epoch": 58.084004602991946, "grad_norm": 1.4383260011672974, "learning_rate": 0.0008383199079401611, "loss": 0.4702, "step": 201900 }, { "epoch": 58.08688147295742, "grad_norm": 1.0285110473632812, "learning_rate": 0.0008382623705408516, "loss": 0.5154, "step": 201910 }, { "epoch": 58.0897583429229, "grad_norm": 0.8986902832984924, "learning_rate": 0.000838204833141542, "loss": 0.5509, "step": 201920 }, { "epoch": 58.092635212888375, "grad_norm": 1.9001387357711792, "learning_rate": 0.0008381472957422325, "loss": 0.5077, "step": 201930 }, { "epoch": 58.09551208285386, "grad_norm": 1.8092830181121826, "learning_rate": 0.0008380897583429229, "loss": 0.562, "step": 201940 }, { "epoch": 58.098388952819334, "grad_norm": 0.6777967810630798, "learning_rate": 0.0008380322209436134, "loss": 0.4302, "step": 201950 }, { "epoch": 58.10126582278481, "grad_norm": 0.9671874642372131, "learning_rate": 0.0008379746835443039, "loss": 0.4049, "step": 201960 }, { "epoch": 58.10414269275029, "grad_norm": 1.326210379600525, "learning_rate": 0.0008379171461449943, "loss": 0.3944, "step": 201970 }, { "epoch": 58.10701956271576, "grad_norm": 0.9471763968467712, "learning_rate": 0.0008378596087456847, "loss": 0.4217, "step": 201980 }, { "epoch": 58.10989643268124, "grad_norm": 1.2178946733474731, "learning_rate": 0.0008378020713463752, "loss": 0.432, "step": 201990 }, { "epoch": 58.11277330264672, "grad_norm": 1.0978844165802002, "learning_rate": 0.0008377445339470657, "loss": 0.3763, "step": 202000 }, { "epoch": 58.1156501726122, "grad_norm": 1.0299713611602783, "learning_rate": 0.000837686996547756, "loss": 0.4457, "step": 202010 }, { "epoch": 58.118527042577675, "grad_norm": 0.9704151153564453, "learning_rate": 0.0008376294591484465, "loss": 0.3755, "step": 202020 }, { "epoch": 58.12140391254315, "grad_norm": 1.6681225299835205, "learning_rate": 0.000837571921749137, "loss": 0.5254, "step": 202030 }, { "epoch": 58.12428078250863, "grad_norm": 1.3259303569793701, "learning_rate": 0.0008375143843498274, "loss": 0.4549, "step": 202040 }, { "epoch": 58.12715765247411, "grad_norm": 1.4698452949523926, "learning_rate": 0.0008374568469505178, "loss": 0.5079, "step": 202050 }, { "epoch": 58.13003452243959, "grad_norm": 1.977939486503601, "learning_rate": 0.0008373993095512084, "loss": 0.4082, "step": 202060 }, { "epoch": 58.13291139240506, "grad_norm": 1.327158808708191, "learning_rate": 0.0008373417721518988, "loss": 0.4545, "step": 202070 }, { "epoch": 58.13578826237054, "grad_norm": 1.2825326919555664, "learning_rate": 0.0008372842347525892, "loss": 0.476, "step": 202080 }, { "epoch": 58.138665132336016, "grad_norm": 0.7701807022094727, "learning_rate": 0.0008372266973532796, "loss": 0.4433, "step": 202090 }, { "epoch": 58.1415420023015, "grad_norm": 1.64815354347229, "learning_rate": 0.0008371691599539701, "loss": 0.5518, "step": 202100 }, { "epoch": 58.144418872266975, "grad_norm": 0.8639223575592041, "learning_rate": 0.0008371116225546606, "loss": 0.4881, "step": 202110 }, { "epoch": 58.14729574223245, "grad_norm": 1.5644865036010742, "learning_rate": 0.0008370540851553509, "loss": 0.523, "step": 202120 }, { "epoch": 58.15017261219793, "grad_norm": 1.283784031867981, "learning_rate": 0.0008369965477560414, "loss": 0.469, "step": 202130 }, { "epoch": 58.153049482163404, "grad_norm": 1.7751867771148682, "learning_rate": 0.0008369390103567319, "loss": 0.474, "step": 202140 }, { "epoch": 58.15592635212889, "grad_norm": 2.028873920440674, "learning_rate": 0.0008368814729574223, "loss": 0.4226, "step": 202150 }, { "epoch": 58.15880322209436, "grad_norm": 1.3262264728546143, "learning_rate": 0.0008368239355581127, "loss": 0.478, "step": 202160 }, { "epoch": 58.16168009205984, "grad_norm": 1.1476569175720215, "learning_rate": 0.0008367663981588033, "loss": 0.3832, "step": 202170 }, { "epoch": 58.164556962025316, "grad_norm": 1.0217697620391846, "learning_rate": 0.0008367088607594937, "loss": 0.4063, "step": 202180 }, { "epoch": 58.16743383199079, "grad_norm": 1.3184863328933716, "learning_rate": 0.0008366513233601841, "loss": 0.5221, "step": 202190 }, { "epoch": 58.170310701956275, "grad_norm": 0.8172518610954285, "learning_rate": 0.0008365937859608746, "loss": 0.4209, "step": 202200 }, { "epoch": 58.17318757192175, "grad_norm": 1.7637790441513062, "learning_rate": 0.000836536248561565, "loss": 0.56, "step": 202210 }, { "epoch": 58.17606444188723, "grad_norm": 1.6307289600372314, "learning_rate": 0.0008364787111622555, "loss": 0.5691, "step": 202220 }, { "epoch": 58.178941311852704, "grad_norm": 1.15320885181427, "learning_rate": 0.0008364211737629459, "loss": 0.4883, "step": 202230 }, { "epoch": 58.18181818181818, "grad_norm": 1.212385654449463, "learning_rate": 0.0008363636363636363, "loss": 0.3175, "step": 202240 }, { "epoch": 58.184695051783656, "grad_norm": 1.734207272529602, "learning_rate": 0.0008363060989643268, "loss": 0.5812, "step": 202250 }, { "epoch": 58.18757192174914, "grad_norm": 1.1547588109970093, "learning_rate": 0.0008362485615650173, "loss": 0.6557, "step": 202260 }, { "epoch": 58.190448791714616, "grad_norm": 0.7284319996833801, "learning_rate": 0.0008361910241657076, "loss": 0.5269, "step": 202270 }, { "epoch": 58.19332566168009, "grad_norm": 1.4196135997772217, "learning_rate": 0.0008361334867663982, "loss": 0.5196, "step": 202280 }, { "epoch": 58.19620253164557, "grad_norm": 0.785240650177002, "learning_rate": 0.0008360759493670887, "loss": 0.5096, "step": 202290 }, { "epoch": 58.199079401611044, "grad_norm": 1.2037557363510132, "learning_rate": 0.000836018411967779, "loss": 0.4229, "step": 202300 }, { "epoch": 58.20195627157653, "grad_norm": 1.097497582435608, "learning_rate": 0.0008359608745684695, "loss": 0.4037, "step": 202310 }, { "epoch": 58.204833141542004, "grad_norm": 1.3649358749389648, "learning_rate": 0.00083590333716916, "loss": 0.4649, "step": 202320 }, { "epoch": 58.20771001150748, "grad_norm": 0.8091023564338684, "learning_rate": 0.0008358457997698504, "loss": 0.4166, "step": 202330 }, { "epoch": 58.210586881472956, "grad_norm": 1.0226370096206665, "learning_rate": 0.0008357882623705408, "loss": 0.529, "step": 202340 }, { "epoch": 58.21346375143843, "grad_norm": 1.0210705995559692, "learning_rate": 0.0008357307249712314, "loss": 0.4664, "step": 202350 }, { "epoch": 58.216340621403916, "grad_norm": 1.1912846565246582, "learning_rate": 0.0008356731875719217, "loss": 0.4925, "step": 202360 }, { "epoch": 58.21921749136939, "grad_norm": 1.0270261764526367, "learning_rate": 0.0008356156501726122, "loss": 0.6721, "step": 202370 }, { "epoch": 58.22209436133487, "grad_norm": 0.8012372255325317, "learning_rate": 0.0008355581127733027, "loss": 0.3475, "step": 202380 }, { "epoch": 58.224971231300344, "grad_norm": 0.9635639786720276, "learning_rate": 0.0008355005753739931, "loss": 0.4125, "step": 202390 }, { "epoch": 58.22784810126582, "grad_norm": 0.82814621925354, "learning_rate": 0.0008354430379746836, "loss": 0.5179, "step": 202400 }, { "epoch": 58.230724971231304, "grad_norm": 1.4779212474822998, "learning_rate": 0.000835385500575374, "loss": 0.5312, "step": 202410 }, { "epoch": 58.23360184119678, "grad_norm": 0.7197113037109375, "learning_rate": 0.0008353279631760644, "loss": 0.4153, "step": 202420 }, { "epoch": 58.236478711162256, "grad_norm": 0.8218297958374023, "learning_rate": 0.0008352704257767549, "loss": 0.4556, "step": 202430 }, { "epoch": 58.23935558112773, "grad_norm": 1.9345799684524536, "learning_rate": 0.0008352128883774454, "loss": 0.4807, "step": 202440 }, { "epoch": 58.24223245109321, "grad_norm": 0.8559636473655701, "learning_rate": 0.0008351553509781357, "loss": 0.4001, "step": 202450 }, { "epoch": 58.245109321058685, "grad_norm": 1.18616783618927, "learning_rate": 0.0008350978135788263, "loss": 0.6015, "step": 202460 }, { "epoch": 58.24798619102417, "grad_norm": 1.5672383308410645, "learning_rate": 0.0008350402761795168, "loss": 0.541, "step": 202470 }, { "epoch": 58.250863060989644, "grad_norm": 1.6684726476669312, "learning_rate": 0.0008349827387802071, "loss": 0.6136, "step": 202480 }, { "epoch": 58.25373993095512, "grad_norm": 0.6793472766876221, "learning_rate": 0.0008349252013808976, "loss": 0.532, "step": 202490 }, { "epoch": 58.2566168009206, "grad_norm": 0.6718248724937439, "learning_rate": 0.0008348676639815881, "loss": 0.5117, "step": 202500 }, { "epoch": 58.25949367088607, "grad_norm": 0.9316689372062683, "learning_rate": 0.0008348101265822785, "loss": 0.3638, "step": 202510 }, { "epoch": 58.262370540851556, "grad_norm": 2.0862903594970703, "learning_rate": 0.0008347525891829689, "loss": 0.5222, "step": 202520 }, { "epoch": 58.26524741081703, "grad_norm": 1.866559386253357, "learning_rate": 0.0008346950517836594, "loss": 0.4672, "step": 202530 }, { "epoch": 58.26812428078251, "grad_norm": 0.7252947688102722, "learning_rate": 0.0008346375143843498, "loss": 0.6297, "step": 202540 }, { "epoch": 58.271001150747985, "grad_norm": 1.0564007759094238, "learning_rate": 0.0008345799769850403, "loss": 0.4416, "step": 202550 }, { "epoch": 58.27387802071346, "grad_norm": 1.280727505683899, "learning_rate": 0.0008345224395857307, "loss": 0.4607, "step": 202560 }, { "epoch": 58.276754890678944, "grad_norm": 1.3195198774337769, "learning_rate": 0.0008344649021864212, "loss": 0.4989, "step": 202570 }, { "epoch": 58.27963176064442, "grad_norm": 2.6187636852264404, "learning_rate": 0.0008344073647871117, "loss": 0.4912, "step": 202580 }, { "epoch": 58.2825086306099, "grad_norm": 0.9883415102958679, "learning_rate": 0.0008343498273878021, "loss": 0.4823, "step": 202590 }, { "epoch": 58.28538550057537, "grad_norm": 1.1808929443359375, "learning_rate": 0.0008342922899884925, "loss": 0.4461, "step": 202600 }, { "epoch": 58.28826237054085, "grad_norm": 1.115546464920044, "learning_rate": 0.000834234752589183, "loss": 0.4104, "step": 202610 }, { "epoch": 58.29113924050633, "grad_norm": 1.4811254739761353, "learning_rate": 0.0008341772151898735, "loss": 0.5021, "step": 202620 }, { "epoch": 58.29401611047181, "grad_norm": 0.8389849662780762, "learning_rate": 0.0008341196777905638, "loss": 0.4187, "step": 202630 }, { "epoch": 58.296892980437285, "grad_norm": 1.3988616466522217, "learning_rate": 0.0008340621403912544, "loss": 0.6035, "step": 202640 }, { "epoch": 58.29976985040276, "grad_norm": 2.075683116912842, "learning_rate": 0.0008340046029919448, "loss": 0.4921, "step": 202650 }, { "epoch": 58.30264672036824, "grad_norm": 1.812099575996399, "learning_rate": 0.0008339470655926352, "loss": 0.5441, "step": 202660 }, { "epoch": 58.30552359033371, "grad_norm": 1.8716598749160767, "learning_rate": 0.0008338895281933257, "loss": 0.4322, "step": 202670 }, { "epoch": 58.3084004602992, "grad_norm": 2.1607580184936523, "learning_rate": 0.0008338319907940162, "loss": 0.4604, "step": 202680 }, { "epoch": 58.31127733026467, "grad_norm": 1.3204511404037476, "learning_rate": 0.0008337744533947066, "loss": 0.5423, "step": 202690 }, { "epoch": 58.31415420023015, "grad_norm": 2.166602611541748, "learning_rate": 0.000833716915995397, "loss": 0.453, "step": 202700 }, { "epoch": 58.317031070195625, "grad_norm": 1.3223408460617065, "learning_rate": 0.0008336593785960875, "loss": 0.5505, "step": 202710 }, { "epoch": 58.3199079401611, "grad_norm": 1.349839210510254, "learning_rate": 0.0008336018411967779, "loss": 0.5216, "step": 202720 }, { "epoch": 58.322784810126585, "grad_norm": 0.7711019515991211, "learning_rate": 0.0008335443037974684, "loss": 0.3785, "step": 202730 }, { "epoch": 58.32566168009206, "grad_norm": 1.2935121059417725, "learning_rate": 0.0008334867663981588, "loss": 0.3522, "step": 202740 }, { "epoch": 58.32853855005754, "grad_norm": 0.973875880241394, "learning_rate": 0.0008334292289988493, "loss": 0.46, "step": 202750 }, { "epoch": 58.33141542002301, "grad_norm": 0.9994425773620605, "learning_rate": 0.0008333716915995397, "loss": 0.404, "step": 202760 }, { "epoch": 58.33429228998849, "grad_norm": 0.7816792130470276, "learning_rate": 0.0008333141542002302, "loss": 0.4685, "step": 202770 }, { "epoch": 58.33716915995397, "grad_norm": 1.066135287284851, "learning_rate": 0.0008332566168009206, "loss": 0.4645, "step": 202780 }, { "epoch": 58.34004602991945, "grad_norm": 1.060304045677185, "learning_rate": 0.0008331990794016111, "loss": 0.4207, "step": 202790 }, { "epoch": 58.342922899884925, "grad_norm": 1.1042325496673584, "learning_rate": 0.0008331415420023016, "loss": 0.5644, "step": 202800 }, { "epoch": 58.3457997698504, "grad_norm": 0.819119930267334, "learning_rate": 0.0008330840046029919, "loss": 0.5603, "step": 202810 }, { "epoch": 58.34867663981588, "grad_norm": 0.9071418642997742, "learning_rate": 0.0008330264672036824, "loss": 0.4852, "step": 202820 }, { "epoch": 58.35155350978136, "grad_norm": 0.7777530550956726, "learning_rate": 0.0008329689298043729, "loss": 0.5394, "step": 202830 }, { "epoch": 58.35443037974684, "grad_norm": 2.3614985942840576, "learning_rate": 0.0008329113924050633, "loss": 0.5732, "step": 202840 }, { "epoch": 58.35730724971231, "grad_norm": 0.8137789964675903, "learning_rate": 0.0008328538550057537, "loss": 0.513, "step": 202850 }, { "epoch": 58.36018411967779, "grad_norm": 1.527118444442749, "learning_rate": 0.0008327963176064443, "loss": 0.449, "step": 202860 }, { "epoch": 58.363060989643266, "grad_norm": 0.829805314540863, "learning_rate": 0.0008327387802071347, "loss": 0.5371, "step": 202870 }, { "epoch": 58.36593785960875, "grad_norm": 0.8768258690834045, "learning_rate": 0.0008326812428078251, "loss": 0.4911, "step": 202880 }, { "epoch": 58.368814729574225, "grad_norm": 2.1856815814971924, "learning_rate": 0.0008326237054085155, "loss": 0.5315, "step": 202890 }, { "epoch": 58.3716915995397, "grad_norm": 0.635384202003479, "learning_rate": 0.000832566168009206, "loss": 0.3764, "step": 202900 }, { "epoch": 58.37456846950518, "grad_norm": 1.500174880027771, "learning_rate": 0.0008325086306098965, "loss": 0.4711, "step": 202910 }, { "epoch": 58.377445339470654, "grad_norm": 1.159499168395996, "learning_rate": 0.0008324510932105868, "loss": 0.4129, "step": 202920 }, { "epoch": 58.38032220943613, "grad_norm": 2.3300557136535645, "learning_rate": 0.0008323935558112773, "loss": 0.5558, "step": 202930 }, { "epoch": 58.383199079401614, "grad_norm": 2.785308599472046, "learning_rate": 0.0008323360184119678, "loss": 0.5075, "step": 202940 }, { "epoch": 58.38607594936709, "grad_norm": 1.512211561203003, "learning_rate": 0.0008322784810126582, "loss": 0.4939, "step": 202950 }, { "epoch": 58.388952819332566, "grad_norm": 1.463330864906311, "learning_rate": 0.0008322209436133486, "loss": 0.5026, "step": 202960 }, { "epoch": 58.39182968929804, "grad_norm": 1.8142542839050293, "learning_rate": 0.0008321634062140392, "loss": 0.5715, "step": 202970 }, { "epoch": 58.39470655926352, "grad_norm": 1.374929428100586, "learning_rate": 0.0008321058688147296, "loss": 0.4707, "step": 202980 }, { "epoch": 58.397583429229, "grad_norm": 1.0942350625991821, "learning_rate": 0.00083204833141542, "loss": 0.495, "step": 202990 }, { "epoch": 58.40046029919448, "grad_norm": 1.2534252405166626, "learning_rate": 0.0008319907940161105, "loss": 0.3726, "step": 203000 }, { "epoch": 58.403337169159954, "grad_norm": 1.9195780754089355, "learning_rate": 0.0008319332566168009, "loss": 0.5291, "step": 203010 }, { "epoch": 58.40621403912543, "grad_norm": 0.6053760051727295, "learning_rate": 0.0008318757192174914, "loss": 0.4545, "step": 203020 }, { "epoch": 58.40909090909091, "grad_norm": 0.9800548553466797, "learning_rate": 0.0008318181818181818, "loss": 0.4556, "step": 203030 }, { "epoch": 58.41196777905639, "grad_norm": 1.1256216764450073, "learning_rate": 0.0008317606444188723, "loss": 0.4853, "step": 203040 }, { "epoch": 58.414844649021866, "grad_norm": 1.0569777488708496, "learning_rate": 0.0008317031070195627, "loss": 0.4746, "step": 203050 }, { "epoch": 58.41772151898734, "grad_norm": 1.1685364246368408, "learning_rate": 0.0008316455696202532, "loss": 0.5369, "step": 203060 }, { "epoch": 58.42059838895282, "grad_norm": 1.4984807968139648, "learning_rate": 0.0008315880322209435, "loss": 0.4763, "step": 203070 }, { "epoch": 58.423475258918295, "grad_norm": 1.083325743675232, "learning_rate": 0.0008315304948216341, "loss": 0.4175, "step": 203080 }, { "epoch": 58.42635212888378, "grad_norm": 0.636610746383667, "learning_rate": 0.0008314729574223246, "loss": 0.5222, "step": 203090 }, { "epoch": 58.429228998849254, "grad_norm": 1.142960786819458, "learning_rate": 0.0008314154200230149, "loss": 0.5633, "step": 203100 }, { "epoch": 58.43210586881473, "grad_norm": 1.1692792177200317, "learning_rate": 0.0008313578826237054, "loss": 0.4886, "step": 203110 }, { "epoch": 58.43498273878021, "grad_norm": 0.7339569926261902, "learning_rate": 0.0008313003452243959, "loss": 0.4471, "step": 203120 }, { "epoch": 58.43785960874568, "grad_norm": 1.468309998512268, "learning_rate": 0.0008312428078250863, "loss": 0.4701, "step": 203130 }, { "epoch": 58.44073647871116, "grad_norm": 2.127636194229126, "learning_rate": 0.0008311852704257767, "loss": 0.413, "step": 203140 }, { "epoch": 58.44361334867664, "grad_norm": 0.6900174021720886, "learning_rate": 0.0008311277330264673, "loss": 0.5035, "step": 203150 }, { "epoch": 58.44649021864212, "grad_norm": 1.0116372108459473, "learning_rate": 0.0008310701956271576, "loss": 0.442, "step": 203160 }, { "epoch": 58.449367088607595, "grad_norm": 1.4035720825195312, "learning_rate": 0.0008310126582278481, "loss": 0.4771, "step": 203170 }, { "epoch": 58.45224395857307, "grad_norm": 0.6782088875770569, "learning_rate": 0.0008309551208285386, "loss": 0.5002, "step": 203180 }, { "epoch": 58.45512082853855, "grad_norm": 0.748956561088562, "learning_rate": 0.000830897583429229, "loss": 0.4266, "step": 203190 }, { "epoch": 58.45799769850403, "grad_norm": 1.621828556060791, "learning_rate": 0.0008308400460299195, "loss": 0.5489, "step": 203200 }, { "epoch": 58.46087456846951, "grad_norm": 1.662493348121643, "learning_rate": 0.0008307825086306099, "loss": 0.5319, "step": 203210 }, { "epoch": 58.46375143843498, "grad_norm": 1.152214527130127, "learning_rate": 0.0008307249712313003, "loss": 0.4962, "step": 203220 }, { "epoch": 58.46662830840046, "grad_norm": 1.785767912864685, "learning_rate": 0.0008306674338319908, "loss": 0.5497, "step": 203230 }, { "epoch": 58.469505178365935, "grad_norm": 1.3646893501281738, "learning_rate": 0.0008306098964326813, "loss": 0.4794, "step": 203240 }, { "epoch": 58.47238204833142, "grad_norm": 1.5622026920318604, "learning_rate": 0.0008305523590333716, "loss": 0.4837, "step": 203250 }, { "epoch": 58.475258918296895, "grad_norm": 1.161354422569275, "learning_rate": 0.0008304948216340622, "loss": 0.4969, "step": 203260 }, { "epoch": 58.47813578826237, "grad_norm": 1.0146872997283936, "learning_rate": 0.0008304372842347527, "loss": 0.5292, "step": 203270 }, { "epoch": 58.48101265822785, "grad_norm": 1.707793951034546, "learning_rate": 0.000830379746835443, "loss": 0.5251, "step": 203280 }, { "epoch": 58.48388952819332, "grad_norm": 0.9474461674690247, "learning_rate": 0.0008303222094361335, "loss": 0.4294, "step": 203290 }, { "epoch": 58.48676639815881, "grad_norm": 1.547315001487732, "learning_rate": 0.000830264672036824, "loss": 0.5652, "step": 203300 }, { "epoch": 58.48964326812428, "grad_norm": 1.0683549642562866, "learning_rate": 0.0008302071346375144, "loss": 0.5216, "step": 203310 }, { "epoch": 58.49252013808976, "grad_norm": 2.990535020828247, "learning_rate": 0.0008301495972382048, "loss": 0.5005, "step": 203320 }, { "epoch": 58.495397008055235, "grad_norm": 0.8427612781524658, "learning_rate": 0.0008300920598388954, "loss": 0.398, "step": 203330 }, { "epoch": 58.49827387802071, "grad_norm": 0.7960718870162964, "learning_rate": 0.0008300345224395857, "loss": 0.5296, "step": 203340 }, { "epoch": 58.50115074798619, "grad_norm": 1.0342077016830444, "learning_rate": 0.0008299769850402762, "loss": 0.4258, "step": 203350 }, { "epoch": 58.50402761795167, "grad_norm": 1.3593534231185913, "learning_rate": 0.0008299194476409666, "loss": 0.6027, "step": 203360 }, { "epoch": 58.50690448791715, "grad_norm": 1.4952392578125, "learning_rate": 0.0008298619102416571, "loss": 0.3991, "step": 203370 }, { "epoch": 58.50978135788262, "grad_norm": 1.8296862840652466, "learning_rate": 0.0008298043728423476, "loss": 0.674, "step": 203380 }, { "epoch": 58.5126582278481, "grad_norm": 1.369255781173706, "learning_rate": 0.000829746835443038, "loss": 0.5471, "step": 203390 }, { "epoch": 58.515535097813576, "grad_norm": 1.2290257215499878, "learning_rate": 0.0008296892980437284, "loss": 0.4017, "step": 203400 }, { "epoch": 58.51841196777906, "grad_norm": 1.3713701963424683, "learning_rate": 0.0008296317606444189, "loss": 0.564, "step": 203410 }, { "epoch": 58.521288837744535, "grad_norm": 1.0761946439743042, "learning_rate": 0.0008295742232451094, "loss": 0.5045, "step": 203420 }, { "epoch": 58.52416570771001, "grad_norm": 1.4815582036972046, "learning_rate": 0.0008295166858457997, "loss": 0.548, "step": 203430 }, { "epoch": 58.52704257767549, "grad_norm": 1.068446159362793, "learning_rate": 0.0008294591484464903, "loss": 0.466, "step": 203440 }, { "epoch": 58.529919447640964, "grad_norm": 1.813561201095581, "learning_rate": 0.0008294016110471807, "loss": 0.484, "step": 203450 }, { "epoch": 58.53279631760645, "grad_norm": 1.2639585733413696, "learning_rate": 0.0008293440736478711, "loss": 0.4654, "step": 203460 }, { "epoch": 58.53567318757192, "grad_norm": 1.2507063150405884, "learning_rate": 0.0008292865362485615, "loss": 0.4798, "step": 203470 }, { "epoch": 58.5385500575374, "grad_norm": 0.8307746648788452, "learning_rate": 0.0008292289988492521, "loss": 0.4175, "step": 203480 }, { "epoch": 58.541426927502876, "grad_norm": 1.4292843341827393, "learning_rate": 0.0008291714614499425, "loss": 0.4835, "step": 203490 }, { "epoch": 58.54430379746835, "grad_norm": 0.7327202558517456, "learning_rate": 0.0008291139240506329, "loss": 0.4204, "step": 203500 }, { "epoch": 58.547180667433835, "grad_norm": 1.370607614517212, "learning_rate": 0.0008290563866513234, "loss": 0.4604, "step": 203510 }, { "epoch": 58.55005753739931, "grad_norm": 2.095069646835327, "learning_rate": 0.0008289988492520138, "loss": 0.5434, "step": 203520 }, { "epoch": 58.55293440736479, "grad_norm": 1.4494980573654175, "learning_rate": 0.0008289413118527043, "loss": 0.5155, "step": 203530 }, { "epoch": 58.555811277330264, "grad_norm": 1.300929069519043, "learning_rate": 0.0008288837744533947, "loss": 0.4453, "step": 203540 }, { "epoch": 58.55868814729574, "grad_norm": 1.553040862083435, "learning_rate": 0.0008288262370540852, "loss": 0.5643, "step": 203550 }, { "epoch": 58.561565017261216, "grad_norm": 1.37012779712677, "learning_rate": 0.0008287686996547756, "loss": 0.489, "step": 203560 }, { "epoch": 58.5644418872267, "grad_norm": 0.7980526089668274, "learning_rate": 0.0008287111622554661, "loss": 0.4867, "step": 203570 }, { "epoch": 58.567318757192176, "grad_norm": 1.2285784482955933, "learning_rate": 0.0008286536248561565, "loss": 0.4893, "step": 203580 }, { "epoch": 58.57019562715765, "grad_norm": 1.934772253036499, "learning_rate": 0.000828596087456847, "loss": 0.482, "step": 203590 }, { "epoch": 58.57307249712313, "grad_norm": 1.884950876235962, "learning_rate": 0.0008285385500575375, "loss": 0.4344, "step": 203600 }, { "epoch": 58.575949367088604, "grad_norm": 1.1827327013015747, "learning_rate": 0.0008284810126582278, "loss": 0.6214, "step": 203610 }, { "epoch": 58.57882623705409, "grad_norm": 0.8296565413475037, "learning_rate": 0.0008284234752589184, "loss": 0.5904, "step": 203620 }, { "epoch": 58.581703107019564, "grad_norm": 1.0340825319290161, "learning_rate": 0.0008283659378596088, "loss": 0.4604, "step": 203630 }, { "epoch": 58.58457997698504, "grad_norm": 1.567824125289917, "learning_rate": 0.0008283084004602992, "loss": 0.4929, "step": 203640 }, { "epoch": 58.587456846950516, "grad_norm": 0.921481728553772, "learning_rate": 0.0008282508630609896, "loss": 0.4242, "step": 203650 }, { "epoch": 58.59033371691599, "grad_norm": 2.751080274581909, "learning_rate": 0.0008281933256616802, "loss": 0.5866, "step": 203660 }, { "epoch": 58.593210586881476, "grad_norm": 1.5144253969192505, "learning_rate": 0.0008281357882623706, "loss": 0.6528, "step": 203670 }, { "epoch": 58.59608745684695, "grad_norm": 1.0236512422561646, "learning_rate": 0.000828078250863061, "loss": 0.5453, "step": 203680 }, { "epoch": 58.59896432681243, "grad_norm": 1.3191713094711304, "learning_rate": 0.0008280207134637515, "loss": 0.4204, "step": 203690 }, { "epoch": 58.601841196777904, "grad_norm": 1.5071643590927124, "learning_rate": 0.0008279631760644419, "loss": 0.44, "step": 203700 }, { "epoch": 58.60471806674338, "grad_norm": 2.3674890995025635, "learning_rate": 0.0008279056386651324, "loss": 0.5296, "step": 203710 }, { "epoch": 58.607594936708864, "grad_norm": 1.5289998054504395, "learning_rate": 0.0008278481012658227, "loss": 0.6752, "step": 203720 }, { "epoch": 58.61047180667434, "grad_norm": 1.3061044216156006, "learning_rate": 0.0008277905638665133, "loss": 0.6011, "step": 203730 }, { "epoch": 58.613348676639816, "grad_norm": 0.8642243146896362, "learning_rate": 0.0008277330264672037, "loss": 0.4526, "step": 203740 }, { "epoch": 58.61622554660529, "grad_norm": 1.039109468460083, "learning_rate": 0.0008276754890678941, "loss": 0.4445, "step": 203750 }, { "epoch": 58.61910241657077, "grad_norm": 1.2127387523651123, "learning_rate": 0.0008276179516685845, "loss": 0.5872, "step": 203760 }, { "epoch": 58.621979286536245, "grad_norm": 1.221835732460022, "learning_rate": 0.0008275604142692751, "loss": 0.4884, "step": 203770 }, { "epoch": 58.62485615650173, "grad_norm": 2.052882671356201, "learning_rate": 0.0008275028768699655, "loss": 0.5048, "step": 203780 }, { "epoch": 58.627733026467205, "grad_norm": 1.5013375282287598, "learning_rate": 0.0008274453394706559, "loss": 0.4581, "step": 203790 }, { "epoch": 58.63060989643268, "grad_norm": 1.987640619277954, "learning_rate": 0.0008273878020713464, "loss": 0.5059, "step": 203800 }, { "epoch": 58.63348676639816, "grad_norm": 1.4981101751327515, "learning_rate": 0.0008273302646720368, "loss": 0.6568, "step": 203810 }, { "epoch": 58.63636363636363, "grad_norm": 1.6394883394241333, "learning_rate": 0.0008272727272727273, "loss": 0.5049, "step": 203820 }, { "epoch": 58.639240506329116, "grad_norm": 1.3768724203109741, "learning_rate": 0.0008272151898734177, "loss": 0.4432, "step": 203830 }, { "epoch": 58.64211737629459, "grad_norm": 2.107665777206421, "learning_rate": 0.0008271576524741082, "loss": 0.545, "step": 203840 }, { "epoch": 58.64499424626007, "grad_norm": 1.4466335773468018, "learning_rate": 0.0008271001150747986, "loss": 0.5131, "step": 203850 }, { "epoch": 58.647871116225545, "grad_norm": 0.9953253269195557, "learning_rate": 0.0008270425776754891, "loss": 0.4838, "step": 203860 }, { "epoch": 58.65074798619102, "grad_norm": 0.9530386924743652, "learning_rate": 0.0008269850402761794, "loss": 0.4441, "step": 203870 }, { "epoch": 58.653624856156505, "grad_norm": 1.170322299003601, "learning_rate": 0.00082692750287687, "loss": 0.486, "step": 203880 }, { "epoch": 58.65650172612198, "grad_norm": 1.7513575553894043, "learning_rate": 0.0008268699654775605, "loss": 0.4522, "step": 203890 }, { "epoch": 58.65937859608746, "grad_norm": 0.9246312975883484, "learning_rate": 0.0008268124280782508, "loss": 0.5713, "step": 203900 }, { "epoch": 58.66225546605293, "grad_norm": 1.2444076538085938, "learning_rate": 0.0008267548906789413, "loss": 0.5675, "step": 203910 }, { "epoch": 58.66513233601841, "grad_norm": 2.099583625793457, "learning_rate": 0.0008266973532796318, "loss": 0.4646, "step": 203920 }, { "epoch": 58.66800920598389, "grad_norm": 0.8204025626182556, "learning_rate": 0.0008266398158803222, "loss": 0.5033, "step": 203930 }, { "epoch": 58.67088607594937, "grad_norm": 1.134481430053711, "learning_rate": 0.0008265822784810126, "loss": 0.6071, "step": 203940 }, { "epoch": 58.673762945914845, "grad_norm": 0.8567030429840088, "learning_rate": 0.0008265247410817032, "loss": 0.4947, "step": 203950 }, { "epoch": 58.67663981588032, "grad_norm": 0.7695216536521912, "learning_rate": 0.0008264672036823935, "loss": 0.3718, "step": 203960 }, { "epoch": 58.6795166858458, "grad_norm": 2.3191580772399902, "learning_rate": 0.000826409666283084, "loss": 0.692, "step": 203970 }, { "epoch": 58.68239355581128, "grad_norm": 2.1463184356689453, "learning_rate": 0.0008263521288837745, "loss": 0.5866, "step": 203980 }, { "epoch": 58.68527042577676, "grad_norm": 1.3715314865112305, "learning_rate": 0.0008262945914844649, "loss": 0.3681, "step": 203990 }, { "epoch": 58.68814729574223, "grad_norm": 1.1167711019515991, "learning_rate": 0.0008262370540851554, "loss": 0.6076, "step": 204000 }, { "epoch": 58.69102416570771, "grad_norm": 1.0048009157180786, "learning_rate": 0.0008261795166858458, "loss": 0.42, "step": 204010 }, { "epoch": 58.693901035673186, "grad_norm": 1.2420591115951538, "learning_rate": 0.0008261219792865363, "loss": 0.4371, "step": 204020 }, { "epoch": 58.69677790563866, "grad_norm": 0.8191589713096619, "learning_rate": 0.0008260644418872267, "loss": 0.4708, "step": 204030 }, { "epoch": 58.699654775604145, "grad_norm": 1.640242338180542, "learning_rate": 0.0008260069044879172, "loss": 0.4902, "step": 204040 }, { "epoch": 58.70253164556962, "grad_norm": 2.056122064590454, "learning_rate": 0.0008259493670886075, "loss": 0.6587, "step": 204050 }, { "epoch": 58.7054085155351, "grad_norm": 1.4188182353973389, "learning_rate": 0.0008258918296892981, "loss": 0.5362, "step": 204060 }, { "epoch": 58.708285385500574, "grad_norm": 1.8799563646316528, "learning_rate": 0.0008258342922899886, "loss": 0.4378, "step": 204070 }, { "epoch": 58.71116225546605, "grad_norm": 1.3840686082839966, "learning_rate": 0.0008257767548906789, "loss": 0.4008, "step": 204080 }, { "epoch": 58.71403912543153, "grad_norm": 1.1239356994628906, "learning_rate": 0.0008257192174913694, "loss": 0.4681, "step": 204090 }, { "epoch": 58.71691599539701, "grad_norm": 1.3504674434661865, "learning_rate": 0.0008256616800920599, "loss": 0.4501, "step": 204100 }, { "epoch": 58.719792865362486, "grad_norm": 1.152281403541565, "learning_rate": 0.0008256041426927503, "loss": 0.5381, "step": 204110 }, { "epoch": 58.72266973532796, "grad_norm": 2.2798123359680176, "learning_rate": 0.0008255466052934407, "loss": 0.4604, "step": 204120 }, { "epoch": 58.72554660529344, "grad_norm": 0.8863848447799683, "learning_rate": 0.0008254890678941313, "loss": 0.4662, "step": 204130 }, { "epoch": 58.72842347525892, "grad_norm": 1.162444829940796, "learning_rate": 0.0008254315304948216, "loss": 0.5083, "step": 204140 }, { "epoch": 58.7313003452244, "grad_norm": 1.0589038133621216, "learning_rate": 0.0008253739930955121, "loss": 0.4764, "step": 204150 }, { "epoch": 58.734177215189874, "grad_norm": 0.8786314725875854, "learning_rate": 0.0008253164556962025, "loss": 0.386, "step": 204160 }, { "epoch": 58.73705408515535, "grad_norm": 1.8064857721328735, "learning_rate": 0.000825258918296893, "loss": 0.6276, "step": 204170 }, { "epoch": 58.739930955120826, "grad_norm": 1.4993032217025757, "learning_rate": 0.0008252013808975835, "loss": 0.4738, "step": 204180 }, { "epoch": 58.74280782508631, "grad_norm": 0.9066321849822998, "learning_rate": 0.0008251438434982739, "loss": 0.473, "step": 204190 }, { "epoch": 58.745684695051786, "grad_norm": 1.8424365520477295, "learning_rate": 0.0008250863060989643, "loss": 0.5307, "step": 204200 }, { "epoch": 58.74856156501726, "grad_norm": 1.2306252717971802, "learning_rate": 0.0008250287686996548, "loss": 0.4489, "step": 204210 }, { "epoch": 58.75143843498274, "grad_norm": 1.206878900527954, "learning_rate": 0.0008249712313003453, "loss": 0.5012, "step": 204220 }, { "epoch": 58.754315304948214, "grad_norm": 0.8342494964599609, "learning_rate": 0.0008249136939010356, "loss": 0.5509, "step": 204230 }, { "epoch": 58.75719217491369, "grad_norm": 1.2881776094436646, "learning_rate": 0.0008248561565017262, "loss": 0.4379, "step": 204240 }, { "epoch": 58.760069044879174, "grad_norm": 1.279232382774353, "learning_rate": 0.0008247986191024166, "loss": 0.6486, "step": 204250 }, { "epoch": 58.76294591484465, "grad_norm": 2.076723098754883, "learning_rate": 0.000824741081703107, "loss": 0.4907, "step": 204260 }, { "epoch": 58.765822784810126, "grad_norm": 1.1202797889709473, "learning_rate": 0.0008246835443037974, "loss": 0.4415, "step": 204270 }, { "epoch": 58.7686996547756, "grad_norm": 1.2620750665664673, "learning_rate": 0.000824626006904488, "loss": 0.4555, "step": 204280 }, { "epoch": 58.77157652474108, "grad_norm": 1.5474542379379272, "learning_rate": 0.0008245684695051784, "loss": 0.664, "step": 204290 }, { "epoch": 58.77445339470656, "grad_norm": 1.2689625024795532, "learning_rate": 0.0008245109321058688, "loss": 0.458, "step": 204300 }, { "epoch": 58.77733026467204, "grad_norm": 1.263946294784546, "learning_rate": 0.0008244533947065594, "loss": 0.5178, "step": 204310 }, { "epoch": 58.780207134637514, "grad_norm": 0.7772836089134216, "learning_rate": 0.0008243958573072497, "loss": 0.4028, "step": 204320 }, { "epoch": 58.78308400460299, "grad_norm": 2.3342812061309814, "learning_rate": 0.0008243383199079402, "loss": 0.5332, "step": 204330 }, { "epoch": 58.78596087456847, "grad_norm": 1.5771279335021973, "learning_rate": 0.0008242807825086306, "loss": 0.5325, "step": 204340 }, { "epoch": 58.78883774453395, "grad_norm": 0.7915146350860596, "learning_rate": 0.0008242232451093211, "loss": 0.52, "step": 204350 }, { "epoch": 58.791714614499426, "grad_norm": 0.5955244302749634, "learning_rate": 0.0008241657077100115, "loss": 0.5034, "step": 204360 }, { "epoch": 58.7945914844649, "grad_norm": 1.3072476387023926, "learning_rate": 0.000824108170310702, "loss": 0.6641, "step": 204370 }, { "epoch": 58.79746835443038, "grad_norm": 1.1792535781860352, "learning_rate": 0.0008240506329113923, "loss": 0.5014, "step": 204380 }, { "epoch": 58.800345224395855, "grad_norm": 1.1244311332702637, "learning_rate": 0.0008239930955120829, "loss": 0.6383, "step": 204390 }, { "epoch": 58.80322209436134, "grad_norm": 0.7666695713996887, "learning_rate": 0.0008239355581127734, "loss": 0.4552, "step": 204400 }, { "epoch": 58.806098964326814, "grad_norm": 1.0115039348602295, "learning_rate": 0.0008238780207134637, "loss": 0.4597, "step": 204410 }, { "epoch": 58.80897583429229, "grad_norm": 1.351630449295044, "learning_rate": 0.0008238204833141543, "loss": 0.5609, "step": 204420 }, { "epoch": 58.81185270425777, "grad_norm": 0.7621310353279114, "learning_rate": 0.0008237629459148447, "loss": 0.5333, "step": 204430 }, { "epoch": 58.81472957422324, "grad_norm": 1.3100491762161255, "learning_rate": 0.0008237054085155351, "loss": 0.4131, "step": 204440 }, { "epoch": 58.81760644418872, "grad_norm": 0.9758052825927734, "learning_rate": 0.0008236478711162255, "loss": 0.4285, "step": 204450 }, { "epoch": 58.8204833141542, "grad_norm": 1.02145516872406, "learning_rate": 0.0008235903337169161, "loss": 0.5059, "step": 204460 }, { "epoch": 58.82336018411968, "grad_norm": 0.7256750464439392, "learning_rate": 0.0008235327963176064, "loss": 0.4889, "step": 204470 }, { "epoch": 58.826237054085155, "grad_norm": 3.107064962387085, "learning_rate": 0.0008234752589182969, "loss": 0.4404, "step": 204480 }, { "epoch": 58.82911392405063, "grad_norm": 1.9267853498458862, "learning_rate": 0.0008234177215189874, "loss": 0.4258, "step": 204490 }, { "epoch": 58.83199079401611, "grad_norm": 1.5054712295532227, "learning_rate": 0.0008233601841196778, "loss": 0.4684, "step": 204500 }, { "epoch": 58.83486766398159, "grad_norm": 1.6278358697891235, "learning_rate": 0.0008233026467203683, "loss": 0.4402, "step": 204510 }, { "epoch": 58.83774453394707, "grad_norm": 1.2761603593826294, "learning_rate": 0.0008232451093210587, "loss": 0.5097, "step": 204520 }, { "epoch": 58.84062140391254, "grad_norm": 1.3290011882781982, "learning_rate": 0.0008231875719217492, "loss": 0.5584, "step": 204530 }, { "epoch": 58.84349827387802, "grad_norm": 1.8042289018630981, "learning_rate": 0.0008231300345224396, "loss": 0.5249, "step": 204540 }, { "epoch": 58.846375143843495, "grad_norm": 1.857285976409912, "learning_rate": 0.00082307249712313, "loss": 0.5652, "step": 204550 }, { "epoch": 58.84925201380898, "grad_norm": 1.575848937034607, "learning_rate": 0.0008230149597238204, "loss": 0.5342, "step": 204560 }, { "epoch": 58.852128883774455, "grad_norm": 1.0756012201309204, "learning_rate": 0.000822957422324511, "loss": 0.3942, "step": 204570 }, { "epoch": 58.85500575373993, "grad_norm": 1.302869200706482, "learning_rate": 0.0008228998849252014, "loss": 0.5334, "step": 204580 }, { "epoch": 58.85788262370541, "grad_norm": 1.7062965631484985, "learning_rate": 0.0008228423475258918, "loss": 0.475, "step": 204590 }, { "epoch": 58.860759493670884, "grad_norm": 1.6184468269348145, "learning_rate": 0.0008227848101265824, "loss": 0.5234, "step": 204600 }, { "epoch": 58.86363636363637, "grad_norm": 1.6769605875015259, "learning_rate": 0.0008227272727272727, "loss": 0.5787, "step": 204610 }, { "epoch": 58.86651323360184, "grad_norm": 2.4698050022125244, "learning_rate": 0.0008226697353279632, "loss": 0.4775, "step": 204620 }, { "epoch": 58.86939010356732, "grad_norm": 1.1505136489868164, "learning_rate": 0.0008226121979286536, "loss": 0.5405, "step": 204630 }, { "epoch": 58.872266973532795, "grad_norm": 1.8979531526565552, "learning_rate": 0.0008225546605293441, "loss": 0.4173, "step": 204640 }, { "epoch": 58.87514384349827, "grad_norm": 1.2220960855484009, "learning_rate": 0.0008224971231300345, "loss": 0.5663, "step": 204650 }, { "epoch": 58.878020713463755, "grad_norm": 1.2482664585113525, "learning_rate": 0.000822439585730725, "loss": 0.5005, "step": 204660 }, { "epoch": 58.88089758342923, "grad_norm": 1.0896025896072388, "learning_rate": 0.0008223820483314153, "loss": 0.5568, "step": 204670 }, { "epoch": 58.88377445339471, "grad_norm": 1.4988771677017212, "learning_rate": 0.0008223245109321059, "loss": 0.6866, "step": 204680 }, { "epoch": 58.886651323360184, "grad_norm": 0.8652375936508179, "learning_rate": 0.0008222669735327964, "loss": 0.4671, "step": 204690 }, { "epoch": 58.88952819332566, "grad_norm": 0.7914178371429443, "learning_rate": 0.0008222094361334867, "loss": 0.462, "step": 204700 }, { "epoch": 58.892405063291136, "grad_norm": 1.7846311330795288, "learning_rate": 0.0008221518987341773, "loss": 0.534, "step": 204710 }, { "epoch": 58.89528193325662, "grad_norm": 1.3409875631332397, "learning_rate": 0.0008220943613348677, "loss": 0.616, "step": 204720 }, { "epoch": 58.898158803222096, "grad_norm": 0.6435748338699341, "learning_rate": 0.0008220368239355581, "loss": 0.4521, "step": 204730 }, { "epoch": 58.90103567318757, "grad_norm": 1.7231817245483398, "learning_rate": 0.0008219792865362485, "loss": 0.7085, "step": 204740 }, { "epoch": 58.90391254315305, "grad_norm": 0.8264200687408447, "learning_rate": 0.0008219217491369391, "loss": 0.4188, "step": 204750 }, { "epoch": 58.906789413118524, "grad_norm": 1.0848002433776855, "learning_rate": 0.0008218642117376294, "loss": 0.4614, "step": 204760 }, { "epoch": 58.90966628308401, "grad_norm": 0.7962034344673157, "learning_rate": 0.0008218066743383199, "loss": 0.4573, "step": 204770 }, { "epoch": 58.912543153049484, "grad_norm": 0.7941579818725586, "learning_rate": 0.0008217491369390104, "loss": 0.5071, "step": 204780 }, { "epoch": 58.91542002301496, "grad_norm": 1.526320219039917, "learning_rate": 0.0008216915995397008, "loss": 0.4348, "step": 204790 }, { "epoch": 58.918296892980436, "grad_norm": 2.3479228019714355, "learning_rate": 0.0008216340621403913, "loss": 0.4898, "step": 204800 }, { "epoch": 58.92117376294591, "grad_norm": 0.7731755375862122, "learning_rate": 0.0008215765247410817, "loss": 0.5405, "step": 204810 }, { "epoch": 58.924050632911396, "grad_norm": 1.0995514392852783, "learning_rate": 0.0008215189873417722, "loss": 0.4744, "step": 204820 }, { "epoch": 58.92692750287687, "grad_norm": 1.0440480709075928, "learning_rate": 0.0008214614499424626, "loss": 0.6034, "step": 204830 }, { "epoch": 58.92980437284235, "grad_norm": 2.187241792678833, "learning_rate": 0.0008214039125431531, "loss": 0.499, "step": 204840 }, { "epoch": 58.932681242807824, "grad_norm": 1.1377626657485962, "learning_rate": 0.0008213463751438434, "loss": 0.4599, "step": 204850 }, { "epoch": 58.9355581127733, "grad_norm": 1.9979726076126099, "learning_rate": 0.000821288837744534, "loss": 0.6394, "step": 204860 }, { "epoch": 58.938434982738784, "grad_norm": 1.3612537384033203, "learning_rate": 0.0008212313003452245, "loss": 0.5078, "step": 204870 }, { "epoch": 58.94131185270426, "grad_norm": 1.4093506336212158, "learning_rate": 0.0008211737629459148, "loss": 0.4942, "step": 204880 }, { "epoch": 58.944188722669736, "grad_norm": 1.9228007793426514, "learning_rate": 0.0008211162255466053, "loss": 0.4693, "step": 204890 }, { "epoch": 58.94706559263521, "grad_norm": 1.2191344499588013, "learning_rate": 0.0008210586881472958, "loss": 0.4899, "step": 204900 }, { "epoch": 58.94994246260069, "grad_norm": 0.6020845174789429, "learning_rate": 0.0008210011507479862, "loss": 0.4601, "step": 204910 }, { "epoch": 58.952819332566165, "grad_norm": 1.6438746452331543, "learning_rate": 0.0008209436133486766, "loss": 0.4076, "step": 204920 }, { "epoch": 58.95569620253165, "grad_norm": 1.1624929904937744, "learning_rate": 0.0008208860759493672, "loss": 0.4232, "step": 204930 }, { "epoch": 58.958573072497124, "grad_norm": 1.5939215421676636, "learning_rate": 0.0008208285385500575, "loss": 0.4996, "step": 204940 }, { "epoch": 58.9614499424626, "grad_norm": 1.2235136032104492, "learning_rate": 0.000820771001150748, "loss": 0.4963, "step": 204950 }, { "epoch": 58.96432681242808, "grad_norm": 1.217007040977478, "learning_rate": 0.0008207134637514384, "loss": 0.4798, "step": 204960 }, { "epoch": 58.96720368239355, "grad_norm": 1.1074415445327759, "learning_rate": 0.0008206559263521289, "loss": 0.4328, "step": 204970 }, { "epoch": 58.970080552359036, "grad_norm": 0.8605700135231018, "learning_rate": 0.0008205983889528194, "loss": 0.5323, "step": 204980 }, { "epoch": 58.97295742232451, "grad_norm": 0.9133802652359009, "learning_rate": 0.0008205408515535098, "loss": 0.5646, "step": 204990 }, { "epoch": 58.97583429228999, "grad_norm": 1.3669973611831665, "learning_rate": 0.0008204833141542003, "loss": 0.5258, "step": 205000 }, { "epoch": 58.978711162255465, "grad_norm": 1.4271596670150757, "learning_rate": 0.0008204257767548907, "loss": 0.4884, "step": 205010 }, { "epoch": 58.98158803222094, "grad_norm": 1.7908921241760254, "learning_rate": 0.0008203682393555812, "loss": 0.632, "step": 205020 }, { "epoch": 58.984464902186424, "grad_norm": 1.2436649799346924, "learning_rate": 0.0008203107019562715, "loss": 0.4599, "step": 205030 }, { "epoch": 58.9873417721519, "grad_norm": 0.9523735642433167, "learning_rate": 0.0008202531645569621, "loss": 0.4159, "step": 205040 }, { "epoch": 58.99021864211738, "grad_norm": 1.87877357006073, "learning_rate": 0.0008201956271576525, "loss": 0.5095, "step": 205050 }, { "epoch": 58.99309551208285, "grad_norm": 0.6676111817359924, "learning_rate": 0.0008201380897583429, "loss": 0.4199, "step": 205060 }, { "epoch": 58.99597238204833, "grad_norm": 1.7179937362670898, "learning_rate": 0.0008200805523590333, "loss": 0.4222, "step": 205070 }, { "epoch": 58.99884925201381, "grad_norm": 1.4410310983657837, "learning_rate": 0.0008200230149597239, "loss": 0.5028, "step": 205080 }, { "epoch": 59.00172612197929, "grad_norm": 1.2436025142669678, "learning_rate": 0.0008199654775604143, "loss": 0.4614, "step": 205090 }, { "epoch": 59.004602991944765, "grad_norm": 1.4540959596633911, "learning_rate": 0.0008199079401611047, "loss": 0.4772, "step": 205100 }, { "epoch": 59.00747986191024, "grad_norm": 1.8834445476531982, "learning_rate": 0.0008198504027617953, "loss": 0.5039, "step": 205110 }, { "epoch": 59.01035673187572, "grad_norm": 1.6172571182250977, "learning_rate": 0.0008197928653624856, "loss": 0.4608, "step": 205120 }, { "epoch": 59.01323360184119, "grad_norm": 1.1809924840927124, "learning_rate": 0.0008197353279631761, "loss": 0.4303, "step": 205130 }, { "epoch": 59.01611047180668, "grad_norm": 1.0831904411315918, "learning_rate": 0.0008196777905638665, "loss": 0.4893, "step": 205140 }, { "epoch": 59.01898734177215, "grad_norm": 3.1753437519073486, "learning_rate": 0.000819620253164557, "loss": 0.4611, "step": 205150 }, { "epoch": 59.02186421173763, "grad_norm": 1.4655609130859375, "learning_rate": 0.0008195627157652474, "loss": 0.3479, "step": 205160 }, { "epoch": 59.024741081703105, "grad_norm": 1.1248291730880737, "learning_rate": 0.0008195051783659379, "loss": 0.4824, "step": 205170 }, { "epoch": 59.02761795166858, "grad_norm": 0.9869675636291504, "learning_rate": 0.0008194476409666282, "loss": 0.428, "step": 205180 }, { "epoch": 59.030494821634065, "grad_norm": 0.9840391874313354, "learning_rate": 0.0008193901035673188, "loss": 0.4171, "step": 205190 }, { "epoch": 59.03337169159954, "grad_norm": 1.0067566633224487, "learning_rate": 0.0008193325661680093, "loss": 0.4549, "step": 205200 }, { "epoch": 59.03624856156502, "grad_norm": 1.310050368309021, "learning_rate": 0.0008192750287686996, "loss": 0.4599, "step": 205210 }, { "epoch": 59.03912543153049, "grad_norm": 1.951562762260437, "learning_rate": 0.0008192174913693902, "loss": 0.5475, "step": 205220 }, { "epoch": 59.04200230149597, "grad_norm": 0.986030638217926, "learning_rate": 0.0008191599539700806, "loss": 0.3684, "step": 205230 }, { "epoch": 59.04487917146145, "grad_norm": 0.8841460347175598, "learning_rate": 0.000819102416570771, "loss": 0.4739, "step": 205240 }, { "epoch": 59.04775604142693, "grad_norm": 2.1862637996673584, "learning_rate": 0.0008190448791714614, "loss": 0.5784, "step": 205250 }, { "epoch": 59.050632911392405, "grad_norm": 1.5678306818008423, "learning_rate": 0.000818987341772152, "loss": 0.4662, "step": 205260 }, { "epoch": 59.05350978135788, "grad_norm": 1.25120210647583, "learning_rate": 0.0008189298043728423, "loss": 0.4897, "step": 205270 }, { "epoch": 59.05638665132336, "grad_norm": 1.2031817436218262, "learning_rate": 0.0008188722669735328, "loss": 0.5493, "step": 205280 }, { "epoch": 59.05926352128884, "grad_norm": 0.9332833290100098, "learning_rate": 0.0008188147295742234, "loss": 0.436, "step": 205290 }, { "epoch": 59.06214039125432, "grad_norm": 1.1418299674987793, "learning_rate": 0.0008187571921749137, "loss": 0.5809, "step": 205300 }, { "epoch": 59.06501726121979, "grad_norm": 1.1652826070785522, "learning_rate": 0.0008186996547756042, "loss": 0.4333, "step": 205310 }, { "epoch": 59.06789413118527, "grad_norm": 0.7605677247047424, "learning_rate": 0.0008186421173762946, "loss": 0.4642, "step": 205320 }, { "epoch": 59.070771001150746, "grad_norm": 1.0637961626052856, "learning_rate": 0.0008185845799769851, "loss": 0.4175, "step": 205330 }, { "epoch": 59.07364787111622, "grad_norm": 1.1575974225997925, "learning_rate": 0.0008185270425776755, "loss": 0.5031, "step": 205340 }, { "epoch": 59.076524741081705, "grad_norm": 1.9231947660446167, "learning_rate": 0.000818469505178366, "loss": 0.4885, "step": 205350 }, { "epoch": 59.07940161104718, "grad_norm": 1.3895758390426636, "learning_rate": 0.0008184119677790563, "loss": 0.5297, "step": 205360 }, { "epoch": 59.08227848101266, "grad_norm": 1.458365559577942, "learning_rate": 0.0008183544303797469, "loss": 0.3836, "step": 205370 }, { "epoch": 59.085155350978134, "grad_norm": 0.7266530394554138, "learning_rate": 0.0008182968929804372, "loss": 0.4241, "step": 205380 }, { "epoch": 59.08803222094361, "grad_norm": 1.3163737058639526, "learning_rate": 0.0008182393555811277, "loss": 0.4981, "step": 205390 }, { "epoch": 59.09090909090909, "grad_norm": 1.372833490371704, "learning_rate": 0.0008181818181818183, "loss": 0.4476, "step": 205400 }, { "epoch": 59.09378596087457, "grad_norm": 1.1725627183914185, "learning_rate": 0.0008181242807825086, "loss": 0.3756, "step": 205410 }, { "epoch": 59.096662830840046, "grad_norm": 2.3989503383636475, "learning_rate": 0.0008180667433831991, "loss": 0.5501, "step": 205420 }, { "epoch": 59.09953970080552, "grad_norm": 0.979584276676178, "learning_rate": 0.0008180092059838895, "loss": 0.6403, "step": 205430 }, { "epoch": 59.102416570771, "grad_norm": 1.2701327800750732, "learning_rate": 0.00081795166858458, "loss": 0.6232, "step": 205440 }, { "epoch": 59.10529344073648, "grad_norm": 1.0844714641571045, "learning_rate": 0.0008178941311852704, "loss": 0.4959, "step": 205450 }, { "epoch": 59.10817031070196, "grad_norm": 1.1110601425170898, "learning_rate": 0.0008178365937859609, "loss": 0.5064, "step": 205460 }, { "epoch": 59.111047180667434, "grad_norm": 1.321339726448059, "learning_rate": 0.0008177790563866512, "loss": 0.4505, "step": 205470 }, { "epoch": 59.11392405063291, "grad_norm": 0.8800696730613708, "learning_rate": 0.0008177215189873418, "loss": 0.4666, "step": 205480 }, { "epoch": 59.116800920598386, "grad_norm": 1.7064073085784912, "learning_rate": 0.0008176639815880323, "loss": 0.4429, "step": 205490 }, { "epoch": 59.11967779056387, "grad_norm": 1.3729385137557983, "learning_rate": 0.0008176064441887226, "loss": 0.5282, "step": 205500 }, { "epoch": 59.122554660529346, "grad_norm": 0.9312718510627747, "learning_rate": 0.0008175489067894132, "loss": 0.5109, "step": 205510 }, { "epoch": 59.12543153049482, "grad_norm": 1.0846747159957886, "learning_rate": 0.0008174913693901036, "loss": 0.6542, "step": 205520 }, { "epoch": 59.1283084004603, "grad_norm": 1.3167574405670166, "learning_rate": 0.000817433831990794, "loss": 0.5331, "step": 205530 }, { "epoch": 59.131185270425775, "grad_norm": 0.821717381477356, "learning_rate": 0.0008173762945914844, "loss": 0.4495, "step": 205540 }, { "epoch": 59.13406214039125, "grad_norm": 1.0736178159713745, "learning_rate": 0.000817318757192175, "loss": 0.3928, "step": 205550 }, { "epoch": 59.136939010356734, "grad_norm": 2.0494511127471924, "learning_rate": 0.0008172612197928653, "loss": 0.5004, "step": 205560 }, { "epoch": 59.13981588032221, "grad_norm": 1.0038771629333496, "learning_rate": 0.0008172036823935558, "loss": 0.4445, "step": 205570 }, { "epoch": 59.14269275028769, "grad_norm": 1.3404498100280762, "learning_rate": 0.0008171461449942464, "loss": 0.4251, "step": 205580 }, { "epoch": 59.14556962025316, "grad_norm": 1.2317039966583252, "learning_rate": 0.0008170886075949367, "loss": 0.4483, "step": 205590 }, { "epoch": 59.14844649021864, "grad_norm": 1.479475975036621, "learning_rate": 0.0008170310701956272, "loss": 0.4906, "step": 205600 }, { "epoch": 59.15132336018412, "grad_norm": 0.9502744078636169, "learning_rate": 0.0008169735327963176, "loss": 0.3977, "step": 205610 }, { "epoch": 59.1542002301496, "grad_norm": 0.7777419686317444, "learning_rate": 0.0008169159953970081, "loss": 0.4317, "step": 205620 }, { "epoch": 59.157077100115075, "grad_norm": 0.6384119391441345, "learning_rate": 0.0008168584579976985, "loss": 0.504, "step": 205630 }, { "epoch": 59.15995397008055, "grad_norm": 1.0076227188110352, "learning_rate": 0.000816800920598389, "loss": 0.4279, "step": 205640 }, { "epoch": 59.16283084004603, "grad_norm": 2.631150722503662, "learning_rate": 0.0008167433831990793, "loss": 0.4873, "step": 205650 }, { "epoch": 59.16570771001151, "grad_norm": 1.093630075454712, "learning_rate": 0.0008166858457997699, "loss": 0.3716, "step": 205660 }, { "epoch": 59.16858457997699, "grad_norm": 2.1742660999298096, "learning_rate": 0.0008166283084004603, "loss": 0.5737, "step": 205670 }, { "epoch": 59.17146144994246, "grad_norm": 1.5983349084854126, "learning_rate": 0.0008165707710011507, "loss": 0.3933, "step": 205680 }, { "epoch": 59.17433831990794, "grad_norm": 1.232298493385315, "learning_rate": 0.0008165132336018413, "loss": 0.4362, "step": 205690 }, { "epoch": 59.177215189873415, "grad_norm": 1.3473671674728394, "learning_rate": 0.0008164556962025317, "loss": 0.4845, "step": 205700 }, { "epoch": 59.1800920598389, "grad_norm": 1.4383001327514648, "learning_rate": 0.0008163981588032221, "loss": 0.5985, "step": 205710 }, { "epoch": 59.182968929804375, "grad_norm": 0.982668936252594, "learning_rate": 0.0008163406214039125, "loss": 0.4175, "step": 205720 }, { "epoch": 59.18584579976985, "grad_norm": 1.1191965341567993, "learning_rate": 0.0008162830840046031, "loss": 0.4582, "step": 205730 }, { "epoch": 59.18872266973533, "grad_norm": 0.5987817645072937, "learning_rate": 0.0008162255466052934, "loss": 0.4633, "step": 205740 }, { "epoch": 59.1915995397008, "grad_norm": 2.250807762145996, "learning_rate": 0.0008161680092059839, "loss": 0.6074, "step": 205750 }, { "epoch": 59.19447640966629, "grad_norm": 1.2777036428451538, "learning_rate": 0.0008161104718066743, "loss": 0.523, "step": 205760 }, { "epoch": 59.19735327963176, "grad_norm": 1.8705731630325317, "learning_rate": 0.0008160529344073648, "loss": 0.5077, "step": 205770 }, { "epoch": 59.20023014959724, "grad_norm": 0.9753017425537109, "learning_rate": 0.0008159953970080553, "loss": 0.4495, "step": 205780 }, { "epoch": 59.203107019562715, "grad_norm": 1.1828430891036987, "learning_rate": 0.0008159378596087457, "loss": 0.4148, "step": 205790 }, { "epoch": 59.20598388952819, "grad_norm": 1.1823711395263672, "learning_rate": 0.0008158803222094362, "loss": 0.4751, "step": 205800 }, { "epoch": 59.20886075949367, "grad_norm": 0.7778722643852234, "learning_rate": 0.0008158227848101266, "loss": 0.494, "step": 205810 }, { "epoch": 59.21173762945915, "grad_norm": 1.4132344722747803, "learning_rate": 0.0008157652474108171, "loss": 0.4303, "step": 205820 }, { "epoch": 59.21461449942463, "grad_norm": 1.0133565664291382, "learning_rate": 0.0008157077100115074, "loss": 0.3592, "step": 205830 }, { "epoch": 59.2174913693901, "grad_norm": 0.8577277660369873, "learning_rate": 0.000815650172612198, "loss": 0.4101, "step": 205840 }, { "epoch": 59.22036823935558, "grad_norm": 0.8050710558891296, "learning_rate": 0.0008155926352128884, "loss": 0.496, "step": 205850 }, { "epoch": 59.223245109321056, "grad_norm": 1.1349120140075684, "learning_rate": 0.0008155350978135788, "loss": 0.5016, "step": 205860 }, { "epoch": 59.22612197928654, "grad_norm": 0.8127169609069824, "learning_rate": 0.0008154775604142692, "loss": 0.5014, "step": 205870 }, { "epoch": 59.228998849252015, "grad_norm": 0.9361680150032043, "learning_rate": 0.0008154200230149598, "loss": 0.4506, "step": 205880 }, { "epoch": 59.23187571921749, "grad_norm": 1.290050983428955, "learning_rate": 0.0008153624856156502, "loss": 0.4926, "step": 205890 }, { "epoch": 59.23475258918297, "grad_norm": 1.7151437997817993, "learning_rate": 0.0008153049482163406, "loss": 0.4983, "step": 205900 }, { "epoch": 59.237629459148444, "grad_norm": 0.8653045296669006, "learning_rate": 0.0008152474108170312, "loss": 0.4151, "step": 205910 }, { "epoch": 59.24050632911393, "grad_norm": 1.8937891721725464, "learning_rate": 0.0008151898734177215, "loss": 0.5704, "step": 205920 }, { "epoch": 59.2433831990794, "grad_norm": 1.2103456258773804, "learning_rate": 0.000815132336018412, "loss": 0.4843, "step": 205930 }, { "epoch": 59.24626006904488, "grad_norm": 1.2313660383224487, "learning_rate": 0.0008150747986191024, "loss": 0.4609, "step": 205940 }, { "epoch": 59.249136939010356, "grad_norm": 1.6205381155014038, "learning_rate": 0.0008150172612197929, "loss": 0.4583, "step": 205950 }, { "epoch": 59.25201380897583, "grad_norm": 0.9306756258010864, "learning_rate": 0.0008149597238204833, "loss": 0.4524, "step": 205960 }, { "epoch": 59.254890678941315, "grad_norm": 1.011638879776001, "learning_rate": 0.0008149021864211738, "loss": 0.5094, "step": 205970 }, { "epoch": 59.25776754890679, "grad_norm": 1.4432706832885742, "learning_rate": 0.0008148446490218643, "loss": 0.7201, "step": 205980 }, { "epoch": 59.26064441887227, "grad_norm": 2.247241497039795, "learning_rate": 0.0008147871116225547, "loss": 0.5526, "step": 205990 }, { "epoch": 59.263521288837744, "grad_norm": 1.018797516822815, "learning_rate": 0.0008147295742232452, "loss": 0.4461, "step": 206000 }, { "epoch": 59.26639815880322, "grad_norm": 0.9035149812698364, "learning_rate": 0.0008146720368239355, "loss": 0.4163, "step": 206010 }, { "epoch": 59.269275028768696, "grad_norm": 1.1229863166809082, "learning_rate": 0.0008146144994246261, "loss": 0.4587, "step": 206020 }, { "epoch": 59.27215189873418, "grad_norm": 1.1367461681365967, "learning_rate": 0.0008145569620253165, "loss": 0.456, "step": 206030 }, { "epoch": 59.275028768699656, "grad_norm": 0.8096043467521667, "learning_rate": 0.0008144994246260069, "loss": 0.4138, "step": 206040 }, { "epoch": 59.27790563866513, "grad_norm": 1.774956464767456, "learning_rate": 0.0008144418872266973, "loss": 0.4641, "step": 206050 }, { "epoch": 59.28078250863061, "grad_norm": 1.7745475769042969, "learning_rate": 0.0008143843498273879, "loss": 0.392, "step": 206060 }, { "epoch": 59.283659378596084, "grad_norm": 1.2359877824783325, "learning_rate": 0.0008143268124280782, "loss": 0.4942, "step": 206070 }, { "epoch": 59.28653624856157, "grad_norm": 0.9441584348678589, "learning_rate": 0.0008142692750287687, "loss": 0.3745, "step": 206080 }, { "epoch": 59.289413118527044, "grad_norm": 0.7318493723869324, "learning_rate": 0.0008142117376294593, "loss": 0.4702, "step": 206090 }, { "epoch": 59.29228998849252, "grad_norm": 0.8711539506912231, "learning_rate": 0.0008141542002301496, "loss": 0.6606, "step": 206100 }, { "epoch": 59.295166858457996, "grad_norm": 1.0312747955322266, "learning_rate": 0.0008140966628308401, "loss": 0.4917, "step": 206110 }, { "epoch": 59.29804372842347, "grad_norm": 1.0412890911102295, "learning_rate": 0.0008140391254315305, "loss": 0.3984, "step": 206120 }, { "epoch": 59.300920598388956, "grad_norm": 1.0474706888198853, "learning_rate": 0.000813981588032221, "loss": 0.5625, "step": 206130 }, { "epoch": 59.30379746835443, "grad_norm": 0.9024300575256348, "learning_rate": 0.0008139240506329114, "loss": 0.484, "step": 206140 }, { "epoch": 59.30667433831991, "grad_norm": 1.6904221773147583, "learning_rate": 0.0008138665132336019, "loss": 0.4886, "step": 206150 }, { "epoch": 59.309551208285384, "grad_norm": 1.014079213142395, "learning_rate": 0.0008138089758342922, "loss": 0.498, "step": 206160 }, { "epoch": 59.31242807825086, "grad_norm": 0.8417145013809204, "learning_rate": 0.0008137514384349828, "loss": 0.454, "step": 206170 }, { "epoch": 59.315304948216344, "grad_norm": 1.4551725387573242, "learning_rate": 0.0008136939010356733, "loss": 0.4772, "step": 206180 }, { "epoch": 59.31818181818182, "grad_norm": 1.2694965600967407, "learning_rate": 0.0008136363636363636, "loss": 0.4331, "step": 206190 }, { "epoch": 59.321058688147296, "grad_norm": 1.6140395402908325, "learning_rate": 0.0008135788262370542, "loss": 0.5341, "step": 206200 }, { "epoch": 59.32393555811277, "grad_norm": 0.759391188621521, "learning_rate": 0.0008135212888377445, "loss": 0.4727, "step": 206210 }, { "epoch": 59.32681242807825, "grad_norm": 0.8061800003051758, "learning_rate": 0.000813463751438435, "loss": 0.4084, "step": 206220 }, { "epoch": 59.329689298043725, "grad_norm": 1.2679154872894287, "learning_rate": 0.0008134062140391254, "loss": 0.4331, "step": 206230 }, { "epoch": 59.33256616800921, "grad_norm": 0.9785436391830444, "learning_rate": 0.0008133486766398159, "loss": 0.3696, "step": 206240 }, { "epoch": 59.335443037974684, "grad_norm": 0.6249603033065796, "learning_rate": 0.0008132911392405063, "loss": 0.4329, "step": 206250 }, { "epoch": 59.33831990794016, "grad_norm": 1.629874348640442, "learning_rate": 0.0008132336018411968, "loss": 0.439, "step": 206260 }, { "epoch": 59.34119677790564, "grad_norm": 1.3522469997406006, "learning_rate": 0.0008131760644418872, "loss": 0.4868, "step": 206270 }, { "epoch": 59.34407364787111, "grad_norm": 1.4683761596679688, "learning_rate": 0.0008131185270425777, "loss": 0.5139, "step": 206280 }, { "epoch": 59.346950517836596, "grad_norm": 0.6557620167732239, "learning_rate": 0.0008130609896432682, "loss": 0.5065, "step": 206290 }, { "epoch": 59.34982738780207, "grad_norm": 1.4119001626968384, "learning_rate": 0.0008130034522439585, "loss": 0.4471, "step": 206300 }, { "epoch": 59.35270425776755, "grad_norm": 0.8812971115112305, "learning_rate": 0.0008129459148446491, "loss": 0.4305, "step": 206310 }, { "epoch": 59.355581127733025, "grad_norm": 0.8538916110992432, "learning_rate": 0.0008128883774453395, "loss": 0.5982, "step": 206320 }, { "epoch": 59.3584579976985, "grad_norm": 1.025591254234314, "learning_rate": 0.0008128308400460299, "loss": 0.5062, "step": 206330 }, { "epoch": 59.361334867663984, "grad_norm": 1.440254807472229, "learning_rate": 0.0008127733026467203, "loss": 0.4413, "step": 206340 }, { "epoch": 59.36421173762946, "grad_norm": 1.6503815650939941, "learning_rate": 0.0008127157652474109, "loss": 0.3656, "step": 206350 }, { "epoch": 59.36708860759494, "grad_norm": 1.1057161092758179, "learning_rate": 0.0008126582278481012, "loss": 0.4876, "step": 206360 }, { "epoch": 59.36996547756041, "grad_norm": 2.209841251373291, "learning_rate": 0.0008126006904487917, "loss": 0.4106, "step": 206370 }, { "epoch": 59.37284234752589, "grad_norm": 1.3476989269256592, "learning_rate": 0.0008125431530494823, "loss": 0.574, "step": 206380 }, { "epoch": 59.37571921749137, "grad_norm": 1.3490355014801025, "learning_rate": 0.0008124856156501726, "loss": 0.4457, "step": 206390 }, { "epoch": 59.37859608745685, "grad_norm": 0.9696698188781738, "learning_rate": 0.0008124280782508631, "loss": 0.4441, "step": 206400 }, { "epoch": 59.381472957422325, "grad_norm": 1.3373546600341797, "learning_rate": 0.0008123705408515535, "loss": 0.4783, "step": 206410 }, { "epoch": 59.3843498273878, "grad_norm": 1.9339874982833862, "learning_rate": 0.000812313003452244, "loss": 0.5649, "step": 206420 }, { "epoch": 59.38722669735328, "grad_norm": 0.9595092535018921, "learning_rate": 0.0008122554660529344, "loss": 0.3699, "step": 206430 }, { "epoch": 59.39010356731876, "grad_norm": 1.3765244483947754, "learning_rate": 0.0008121979286536249, "loss": 0.4969, "step": 206440 }, { "epoch": 59.39298043728424, "grad_norm": 1.5965341329574585, "learning_rate": 0.0008121403912543152, "loss": 0.4295, "step": 206450 }, { "epoch": 59.39585730724971, "grad_norm": 1.0440826416015625, "learning_rate": 0.0008120828538550058, "loss": 0.4144, "step": 206460 }, { "epoch": 59.39873417721519, "grad_norm": 0.8711215257644653, "learning_rate": 0.0008120253164556962, "loss": 0.5077, "step": 206470 }, { "epoch": 59.401611047180666, "grad_norm": 1.3438843488693237, "learning_rate": 0.0008119677790563866, "loss": 0.5705, "step": 206480 }, { "epoch": 59.40448791714614, "grad_norm": 1.0907940864562988, "learning_rate": 0.0008119102416570772, "loss": 0.4584, "step": 206490 }, { "epoch": 59.407364787111625, "grad_norm": 1.1750918626785278, "learning_rate": 0.0008118527042577676, "loss": 0.5395, "step": 206500 }, { "epoch": 59.4102416570771, "grad_norm": 1.2397072315216064, "learning_rate": 0.000811795166858458, "loss": 0.5314, "step": 206510 }, { "epoch": 59.41311852704258, "grad_norm": 1.7955549955368042, "learning_rate": 0.0008117376294591484, "loss": 0.639, "step": 206520 }, { "epoch": 59.415995397008054, "grad_norm": 0.9578474164009094, "learning_rate": 0.000811680092059839, "loss": 0.4367, "step": 206530 }, { "epoch": 59.41887226697353, "grad_norm": 1.0621707439422607, "learning_rate": 0.0008116225546605293, "loss": 0.3707, "step": 206540 }, { "epoch": 59.42174913693901, "grad_norm": 1.82184636592865, "learning_rate": 0.0008115650172612198, "loss": 0.4628, "step": 206550 }, { "epoch": 59.42462600690449, "grad_norm": 1.8672869205474854, "learning_rate": 0.0008115074798619103, "loss": 0.435, "step": 206560 }, { "epoch": 59.427502876869966, "grad_norm": 1.2676081657409668, "learning_rate": 0.0008114499424626007, "loss": 0.4254, "step": 206570 }, { "epoch": 59.43037974683544, "grad_norm": 0.8056090474128723, "learning_rate": 0.0008113924050632911, "loss": 0.4826, "step": 206580 }, { "epoch": 59.43325661680092, "grad_norm": 1.0017672777175903, "learning_rate": 0.0008113348676639816, "loss": 0.4622, "step": 206590 }, { "epoch": 59.4361334867664, "grad_norm": 1.7179625034332275, "learning_rate": 0.0008112773302646721, "loss": 0.5539, "step": 206600 }, { "epoch": 59.43901035673188, "grad_norm": 0.9546042680740356, "learning_rate": 0.0008112197928653625, "loss": 0.5187, "step": 206610 }, { "epoch": 59.441887226697354, "grad_norm": 0.8853949904441833, "learning_rate": 0.000811162255466053, "loss": 0.3852, "step": 206620 }, { "epoch": 59.44476409666283, "grad_norm": 1.236884355545044, "learning_rate": 0.0008111047180667433, "loss": 0.42, "step": 206630 }, { "epoch": 59.447640966628306, "grad_norm": 1.3111350536346436, "learning_rate": 0.0008110471806674339, "loss": 0.5419, "step": 206640 }, { "epoch": 59.45051783659379, "grad_norm": 1.531493902206421, "learning_rate": 0.0008109896432681243, "loss": 0.5309, "step": 206650 }, { "epoch": 59.453394706559266, "grad_norm": 1.3469418287277222, "learning_rate": 0.0008109321058688147, "loss": 0.4279, "step": 206660 }, { "epoch": 59.45627157652474, "grad_norm": 0.8010450601577759, "learning_rate": 0.0008108745684695052, "loss": 0.598, "step": 206670 }, { "epoch": 59.45914844649022, "grad_norm": 1.2201476097106934, "learning_rate": 0.0008108170310701957, "loss": 0.4604, "step": 206680 }, { "epoch": 59.462025316455694, "grad_norm": 0.956597089767456, "learning_rate": 0.000810759493670886, "loss": 0.4494, "step": 206690 }, { "epoch": 59.46490218642117, "grad_norm": 1.29062020778656, "learning_rate": 0.0008107019562715765, "loss": 0.4802, "step": 206700 }, { "epoch": 59.467779056386654, "grad_norm": 1.2259399890899658, "learning_rate": 0.0008106444188722671, "loss": 0.4268, "step": 206710 }, { "epoch": 59.47065592635213, "grad_norm": 1.054895043373108, "learning_rate": 0.0008105868814729574, "loss": 0.4115, "step": 206720 }, { "epoch": 59.473532796317606, "grad_norm": 1.1512647867202759, "learning_rate": 0.0008105293440736479, "loss": 0.4911, "step": 206730 }, { "epoch": 59.47640966628308, "grad_norm": 0.7560882568359375, "learning_rate": 0.0008104718066743383, "loss": 0.5115, "step": 206740 }, { "epoch": 59.47928653624856, "grad_norm": 0.795682430267334, "learning_rate": 0.0008104142692750288, "loss": 0.4697, "step": 206750 }, { "epoch": 59.48216340621404, "grad_norm": 0.9402971863746643, "learning_rate": 0.0008103567318757192, "loss": 0.4798, "step": 206760 }, { "epoch": 59.48504027617952, "grad_norm": 0.5638743042945862, "learning_rate": 0.0008102991944764097, "loss": 0.4523, "step": 206770 }, { "epoch": 59.487917146144994, "grad_norm": 2.3129687309265137, "learning_rate": 0.0008102416570771002, "loss": 0.4976, "step": 206780 }, { "epoch": 59.49079401611047, "grad_norm": 1.2228611707687378, "learning_rate": 0.0008101841196777906, "loss": 0.5073, "step": 206790 }, { "epoch": 59.49367088607595, "grad_norm": 1.5725611448287964, "learning_rate": 0.0008101265822784811, "loss": 0.4889, "step": 206800 }, { "epoch": 59.49654775604143, "grad_norm": 2.9794962406158447, "learning_rate": 0.0008100690448791714, "loss": 0.4393, "step": 206810 }, { "epoch": 59.499424626006906, "grad_norm": 0.736854612827301, "learning_rate": 0.000810011507479862, "loss": 0.4731, "step": 206820 }, { "epoch": 59.50230149597238, "grad_norm": 1.3610516786575317, "learning_rate": 0.0008099539700805524, "loss": 0.5029, "step": 206830 }, { "epoch": 59.50517836593786, "grad_norm": 1.0235750675201416, "learning_rate": 0.0008098964326812428, "loss": 0.5681, "step": 206840 }, { "epoch": 59.508055235903335, "grad_norm": 1.4721583127975464, "learning_rate": 0.0008098388952819332, "loss": 0.3976, "step": 206850 }, { "epoch": 59.51093210586882, "grad_norm": 1.1402674913406372, "learning_rate": 0.0008097813578826238, "loss": 0.4259, "step": 206860 }, { "epoch": 59.513808975834294, "grad_norm": 2.8683202266693115, "learning_rate": 0.0008097238204833141, "loss": 0.6266, "step": 206870 }, { "epoch": 59.51668584579977, "grad_norm": 1.261864423751831, "learning_rate": 0.0008096662830840046, "loss": 0.4539, "step": 206880 }, { "epoch": 59.51956271576525, "grad_norm": 0.8243156671524048, "learning_rate": 0.0008096087456846952, "loss": 0.4117, "step": 206890 }, { "epoch": 59.52243958573072, "grad_norm": 1.1122113466262817, "learning_rate": 0.0008095512082853855, "loss": 0.4994, "step": 206900 }, { "epoch": 59.5253164556962, "grad_norm": 0.905716061592102, "learning_rate": 0.000809493670886076, "loss": 0.5658, "step": 206910 }, { "epoch": 59.52819332566168, "grad_norm": 1.4789643287658691, "learning_rate": 0.0008094361334867664, "loss": 0.4976, "step": 206920 }, { "epoch": 59.53107019562716, "grad_norm": 2.1649162769317627, "learning_rate": 0.0008093785960874569, "loss": 0.5117, "step": 206930 }, { "epoch": 59.533947065592635, "grad_norm": 1.9016636610031128, "learning_rate": 0.0008093210586881473, "loss": 0.466, "step": 206940 }, { "epoch": 59.53682393555811, "grad_norm": 2.065051317214966, "learning_rate": 0.0008092635212888378, "loss": 0.559, "step": 206950 }, { "epoch": 59.53970080552359, "grad_norm": 2.188594102859497, "learning_rate": 0.0008092059838895282, "loss": 0.5443, "step": 206960 }, { "epoch": 59.54257767548907, "grad_norm": 0.9362998008728027, "learning_rate": 0.0008091484464902187, "loss": 0.418, "step": 206970 }, { "epoch": 59.54545454545455, "grad_norm": 1.879330039024353, "learning_rate": 0.0008090909090909092, "loss": 0.462, "step": 206980 }, { "epoch": 59.54833141542002, "grad_norm": 0.9163966774940491, "learning_rate": 0.0008090333716915995, "loss": 0.4542, "step": 206990 }, { "epoch": 59.5512082853855, "grad_norm": 1.348137617111206, "learning_rate": 0.0008089758342922901, "loss": 0.5802, "step": 207000 }, { "epoch": 59.554085155350975, "grad_norm": 1.8142390251159668, "learning_rate": 0.0008089182968929804, "loss": 0.5091, "step": 207010 }, { "epoch": 59.55696202531646, "grad_norm": 1.4725769758224487, "learning_rate": 0.0008088607594936709, "loss": 0.5487, "step": 207020 }, { "epoch": 59.559838895281935, "grad_norm": 1.257804036140442, "learning_rate": 0.0008088032220943613, "loss": 0.5093, "step": 207030 }, { "epoch": 59.56271576524741, "grad_norm": 1.5005638599395752, "learning_rate": 0.0008087456846950518, "loss": 0.4748, "step": 207040 }, { "epoch": 59.56559263521289, "grad_norm": 1.6993815898895264, "learning_rate": 0.0008086881472957422, "loss": 0.5334, "step": 207050 }, { "epoch": 59.56846950517836, "grad_norm": 1.6168792247772217, "learning_rate": 0.0008086306098964327, "loss": 0.521, "step": 207060 }, { "epoch": 59.57134637514385, "grad_norm": 0.9769168496131897, "learning_rate": 0.0008085730724971231, "loss": 0.4207, "step": 207070 }, { "epoch": 59.57422324510932, "grad_norm": 0.9688934683799744, "learning_rate": 0.0008085155350978136, "loss": 0.4893, "step": 207080 }, { "epoch": 59.5771001150748, "grad_norm": 1.2918776273727417, "learning_rate": 0.0008084579976985041, "loss": 0.4802, "step": 207090 }, { "epoch": 59.579976985040275, "grad_norm": 1.8204032182693481, "learning_rate": 0.0008084004602991944, "loss": 0.5172, "step": 207100 }, { "epoch": 59.58285385500575, "grad_norm": 1.4248859882354736, "learning_rate": 0.000808342922899885, "loss": 0.459, "step": 207110 }, { "epoch": 59.58573072497123, "grad_norm": 1.3753252029418945, "learning_rate": 0.0008082853855005754, "loss": 0.5076, "step": 207120 }, { "epoch": 59.58860759493671, "grad_norm": 1.8138985633850098, "learning_rate": 0.0008082278481012658, "loss": 0.4249, "step": 207130 }, { "epoch": 59.59148446490219, "grad_norm": 1.244494080543518, "learning_rate": 0.0008081703107019562, "loss": 0.4274, "step": 207140 }, { "epoch": 59.59436133486766, "grad_norm": 1.8428125381469727, "learning_rate": 0.0008081127733026468, "loss": 0.4281, "step": 207150 }, { "epoch": 59.59723820483314, "grad_norm": 1.0228393077850342, "learning_rate": 0.0008080552359033371, "loss": 0.4776, "step": 207160 }, { "epoch": 59.600115074798616, "grad_norm": 0.757165789604187, "learning_rate": 0.0008079976985040276, "loss": 0.5237, "step": 207170 }, { "epoch": 59.6029919447641, "grad_norm": 1.4808088541030884, "learning_rate": 0.0008079401611047182, "loss": 0.4465, "step": 207180 }, { "epoch": 59.605868814729575, "grad_norm": 1.673977017402649, "learning_rate": 0.0008078826237054085, "loss": 0.4966, "step": 207190 }, { "epoch": 59.60874568469505, "grad_norm": 0.6946340203285217, "learning_rate": 0.000807825086306099, "loss": 0.4947, "step": 207200 }, { "epoch": 59.61162255466053, "grad_norm": 1.2232568264007568, "learning_rate": 0.0008077675489067894, "loss": 0.5424, "step": 207210 }, { "epoch": 59.614499424626004, "grad_norm": 1.128644347190857, "learning_rate": 0.0008077100115074799, "loss": 0.5159, "step": 207220 }, { "epoch": 59.61737629459149, "grad_norm": 0.9265565872192383, "learning_rate": 0.0008076524741081703, "loss": 0.5188, "step": 207230 }, { "epoch": 59.620253164556964, "grad_norm": 1.8661216497421265, "learning_rate": 0.0008075949367088608, "loss": 0.457, "step": 207240 }, { "epoch": 59.62313003452244, "grad_norm": 1.961944580078125, "learning_rate": 0.0008075373993095512, "loss": 0.5493, "step": 207250 }, { "epoch": 59.626006904487916, "grad_norm": 2.2951951026916504, "learning_rate": 0.0008074798619102417, "loss": 0.5519, "step": 207260 }, { "epoch": 59.62888377445339, "grad_norm": 1.4252582788467407, "learning_rate": 0.0008074223245109321, "loss": 0.6414, "step": 207270 }, { "epoch": 59.631760644418875, "grad_norm": 0.9731642603874207, "learning_rate": 0.0008073647871116225, "loss": 0.4475, "step": 207280 }, { "epoch": 59.63463751438435, "grad_norm": 1.1350343227386475, "learning_rate": 0.0008073072497123131, "loss": 0.4225, "step": 207290 }, { "epoch": 59.63751438434983, "grad_norm": 2.6975932121276855, "learning_rate": 0.0008072497123130035, "loss": 0.4444, "step": 207300 }, { "epoch": 59.640391254315304, "grad_norm": 0.9328523874282837, "learning_rate": 0.0008071921749136939, "loss": 0.4706, "step": 207310 }, { "epoch": 59.64326812428078, "grad_norm": 1.8414913415908813, "learning_rate": 0.0008071346375143843, "loss": 0.4324, "step": 207320 }, { "epoch": 59.64614499424626, "grad_norm": 1.4704324007034302, "learning_rate": 0.0008070771001150749, "loss": 0.4365, "step": 207330 }, { "epoch": 59.64902186421174, "grad_norm": 0.726656436920166, "learning_rate": 0.0008070195627157652, "loss": 0.369, "step": 207340 }, { "epoch": 59.651898734177216, "grad_norm": 1.5384186506271362, "learning_rate": 0.0008069620253164557, "loss": 0.4177, "step": 207350 }, { "epoch": 59.65477560414269, "grad_norm": 0.994818389415741, "learning_rate": 0.0008069044879171462, "loss": 0.5476, "step": 207360 }, { "epoch": 59.65765247410817, "grad_norm": 2.0952606201171875, "learning_rate": 0.0008068469505178366, "loss": 0.629, "step": 207370 }, { "epoch": 59.660529344073645, "grad_norm": 1.6227638721466064, "learning_rate": 0.000806789413118527, "loss": 0.4795, "step": 207380 }, { "epoch": 59.66340621403913, "grad_norm": 1.3212661743164062, "learning_rate": 0.0008067318757192175, "loss": 0.5395, "step": 207390 }, { "epoch": 59.666283084004604, "grad_norm": 1.0446126461029053, "learning_rate": 0.000806674338319908, "loss": 0.5533, "step": 207400 }, { "epoch": 59.66915995397008, "grad_norm": 1.2276684045791626, "learning_rate": 0.0008066168009205984, "loss": 0.5987, "step": 207410 }, { "epoch": 59.67203682393556, "grad_norm": 1.5389333963394165, "learning_rate": 0.0008065592635212889, "loss": 0.4555, "step": 207420 }, { "epoch": 59.67491369390103, "grad_norm": 1.3665200471878052, "learning_rate": 0.0008065017261219792, "loss": 0.5394, "step": 207430 }, { "epoch": 59.677790563866516, "grad_norm": 0.9828131794929504, "learning_rate": 0.0008064441887226698, "loss": 0.4253, "step": 207440 }, { "epoch": 59.68066743383199, "grad_norm": 1.1004638671875, "learning_rate": 0.0008063866513233602, "loss": 0.4949, "step": 207450 }, { "epoch": 59.68354430379747, "grad_norm": 1.5910964012145996, "learning_rate": 0.0008063291139240506, "loss": 0.5408, "step": 207460 }, { "epoch": 59.686421173762945, "grad_norm": 1.741536021232605, "learning_rate": 0.0008062715765247411, "loss": 0.5851, "step": 207470 }, { "epoch": 59.68929804372842, "grad_norm": 1.4280340671539307, "learning_rate": 0.0008062140391254316, "loss": 0.4771, "step": 207480 }, { "epoch": 59.692174913693904, "grad_norm": 2.2560698986053467, "learning_rate": 0.000806156501726122, "loss": 0.4809, "step": 207490 }, { "epoch": 59.69505178365938, "grad_norm": 1.053544521331787, "learning_rate": 0.0008060989643268124, "loss": 0.554, "step": 207500 }, { "epoch": 59.69792865362486, "grad_norm": 1.114872694015503, "learning_rate": 0.000806041426927503, "loss": 0.5015, "step": 207510 }, { "epoch": 59.70080552359033, "grad_norm": 1.7664064168930054, "learning_rate": 0.0008059838895281933, "loss": 0.6157, "step": 207520 }, { "epoch": 59.70368239355581, "grad_norm": 1.8391352891921997, "learning_rate": 0.0008059263521288838, "loss": 0.5199, "step": 207530 }, { "epoch": 59.70655926352129, "grad_norm": 1.605640172958374, "learning_rate": 0.0008058688147295743, "loss": 0.5336, "step": 207540 }, { "epoch": 59.70943613348677, "grad_norm": 0.5951908826828003, "learning_rate": 0.0008058112773302647, "loss": 0.5014, "step": 207550 }, { "epoch": 59.712313003452245, "grad_norm": 1.2752561569213867, "learning_rate": 0.0008057537399309551, "loss": 0.4963, "step": 207560 }, { "epoch": 59.71518987341772, "grad_norm": 1.8209257125854492, "learning_rate": 0.0008056962025316456, "loss": 0.4879, "step": 207570 }, { "epoch": 59.7180667433832, "grad_norm": 0.9163201451301575, "learning_rate": 0.000805638665132336, "loss": 0.4042, "step": 207580 }, { "epoch": 59.72094361334867, "grad_norm": 0.8760365843772888, "learning_rate": 0.0008055811277330265, "loss": 0.4612, "step": 207590 }, { "epoch": 59.72382048331416, "grad_norm": 1.0857654809951782, "learning_rate": 0.000805523590333717, "loss": 0.4713, "step": 207600 }, { "epoch": 59.72669735327963, "grad_norm": 0.865135908126831, "learning_rate": 0.0008054660529344073, "loss": 0.3904, "step": 207610 }, { "epoch": 59.72957422324511, "grad_norm": 2.318692445755005, "learning_rate": 0.0008054085155350979, "loss": 0.5126, "step": 207620 }, { "epoch": 59.732451093210585, "grad_norm": 1.1517540216445923, "learning_rate": 0.0008053509781357883, "loss": 0.4873, "step": 207630 }, { "epoch": 59.73532796317606, "grad_norm": 1.1454254388809204, "learning_rate": 0.0008052934407364787, "loss": 0.4771, "step": 207640 }, { "epoch": 59.738204833141545, "grad_norm": 1.0571085214614868, "learning_rate": 0.0008052359033371692, "loss": 0.4442, "step": 207650 }, { "epoch": 59.74108170310702, "grad_norm": 1.6974226236343384, "learning_rate": 0.0008051783659378597, "loss": 0.5682, "step": 207660 }, { "epoch": 59.7439585730725, "grad_norm": 1.4298053979873657, "learning_rate": 0.00080512082853855, "loss": 0.5103, "step": 207670 }, { "epoch": 59.74683544303797, "grad_norm": 1.860414981842041, "learning_rate": 0.0008050632911392405, "loss": 0.4575, "step": 207680 }, { "epoch": 59.74971231300345, "grad_norm": 2.2880005836486816, "learning_rate": 0.0008050057537399311, "loss": 0.5644, "step": 207690 }, { "epoch": 59.75258918296893, "grad_norm": 1.4413201808929443, "learning_rate": 0.0008049482163406214, "loss": 0.5586, "step": 207700 }, { "epoch": 59.75546605293441, "grad_norm": 1.5604968070983887, "learning_rate": 0.0008048906789413119, "loss": 0.6212, "step": 207710 }, { "epoch": 59.758342922899885, "grad_norm": 1.776356816291809, "learning_rate": 0.0008048331415420023, "loss": 0.3847, "step": 207720 }, { "epoch": 59.76121979286536, "grad_norm": 0.8786848783493042, "learning_rate": 0.0008047756041426928, "loss": 0.5028, "step": 207730 }, { "epoch": 59.76409666283084, "grad_norm": 0.7092501521110535, "learning_rate": 0.0008047180667433832, "loss": 0.3374, "step": 207740 }, { "epoch": 59.76697353279632, "grad_norm": 1.4738163948059082, "learning_rate": 0.0008046605293440737, "loss": 0.5502, "step": 207750 }, { "epoch": 59.7698504027618, "grad_norm": 0.8582972884178162, "learning_rate": 0.0008046029919447641, "loss": 0.405, "step": 207760 }, { "epoch": 59.77272727272727, "grad_norm": 1.6669561862945557, "learning_rate": 0.0008045454545454546, "loss": 0.4967, "step": 207770 }, { "epoch": 59.77560414269275, "grad_norm": 1.5401350259780884, "learning_rate": 0.000804487917146145, "loss": 0.4797, "step": 207780 }, { "epoch": 59.778481012658226, "grad_norm": 1.116623878479004, "learning_rate": 0.0008044303797468354, "loss": 0.4531, "step": 207790 }, { "epoch": 59.7813578826237, "grad_norm": 1.0588194131851196, "learning_rate": 0.000804372842347526, "loss": 0.4953, "step": 207800 }, { "epoch": 59.784234752589185, "grad_norm": 0.8544493317604065, "learning_rate": 0.0008043153049482164, "loss": 0.4525, "step": 207810 }, { "epoch": 59.78711162255466, "grad_norm": 1.0115370750427246, "learning_rate": 0.0008042577675489068, "loss": 0.6222, "step": 207820 }, { "epoch": 59.78998849252014, "grad_norm": 1.0856379270553589, "learning_rate": 0.0008042002301495972, "loss": 0.5482, "step": 207830 }, { "epoch": 59.792865362485614, "grad_norm": 1.2385679483413696, "learning_rate": 0.0008041426927502877, "loss": 0.4911, "step": 207840 }, { "epoch": 59.79574223245109, "grad_norm": 1.5884010791778564, "learning_rate": 0.0008040851553509781, "loss": 0.5398, "step": 207850 }, { "epoch": 59.79861910241657, "grad_norm": 1.382615566253662, "learning_rate": 0.0008040276179516686, "loss": 0.441, "step": 207860 }, { "epoch": 59.80149597238205, "grad_norm": 1.3006128072738647, "learning_rate": 0.000803970080552359, "loss": 0.4894, "step": 207870 }, { "epoch": 59.804372842347526, "grad_norm": 2.0357625484466553, "learning_rate": 0.0008039125431530495, "loss": 0.4506, "step": 207880 }, { "epoch": 59.807249712313, "grad_norm": 1.8595153093338013, "learning_rate": 0.00080385500575374, "loss": 0.3907, "step": 207890 }, { "epoch": 59.81012658227848, "grad_norm": 0.7983515858650208, "learning_rate": 0.0008037974683544303, "loss": 0.5247, "step": 207900 }, { "epoch": 59.81300345224396, "grad_norm": 1.6666011810302734, "learning_rate": 0.0008037399309551209, "loss": 0.4639, "step": 207910 }, { "epoch": 59.81588032220944, "grad_norm": 1.91599440574646, "learning_rate": 0.0008036823935558113, "loss": 0.4994, "step": 207920 }, { "epoch": 59.818757192174914, "grad_norm": 1.5020831823349, "learning_rate": 0.0008036248561565017, "loss": 0.4938, "step": 207930 }, { "epoch": 59.82163406214039, "grad_norm": 0.9848323464393616, "learning_rate": 0.0008035673187571922, "loss": 0.462, "step": 207940 }, { "epoch": 59.824510932105866, "grad_norm": 0.9842317700386047, "learning_rate": 0.0008035097813578827, "loss": 0.5013, "step": 207950 }, { "epoch": 59.82738780207135, "grad_norm": 1.4949986934661865, "learning_rate": 0.000803452243958573, "loss": 0.5738, "step": 207960 }, { "epoch": 59.830264672036826, "grad_norm": 0.742776095867157, "learning_rate": 0.0008033947065592635, "loss": 0.4887, "step": 207970 }, { "epoch": 59.8331415420023, "grad_norm": 1.385859489440918, "learning_rate": 0.000803337169159954, "loss": 0.4722, "step": 207980 }, { "epoch": 59.83601841196778, "grad_norm": 1.308212161064148, "learning_rate": 0.0008032796317606444, "loss": 0.4642, "step": 207990 }, { "epoch": 59.838895281933254, "grad_norm": 1.610670804977417, "learning_rate": 0.0008032220943613349, "loss": 0.5828, "step": 208000 }, { "epoch": 59.84177215189874, "grad_norm": 0.8404146432876587, "learning_rate": 0.0008031645569620253, "loss": 0.5695, "step": 208010 }, { "epoch": 59.844649021864214, "grad_norm": 2.1267082691192627, "learning_rate": 0.0008031070195627158, "loss": 0.5165, "step": 208020 }, { "epoch": 59.84752589182969, "grad_norm": 1.0921748876571655, "learning_rate": 0.0008030494821634062, "loss": 0.555, "step": 208030 }, { "epoch": 59.850402761795166, "grad_norm": 1.178943157196045, "learning_rate": 0.0008029919447640967, "loss": 0.4415, "step": 208040 }, { "epoch": 59.85327963176064, "grad_norm": 2.6006734371185303, "learning_rate": 0.0008029344073647871, "loss": 0.4755, "step": 208050 }, { "epoch": 59.85615650172612, "grad_norm": 1.8400864601135254, "learning_rate": 0.0008028768699654776, "loss": 0.5197, "step": 208060 }, { "epoch": 59.8590333716916, "grad_norm": 1.0843361616134644, "learning_rate": 0.000802819332566168, "loss": 0.5551, "step": 208070 }, { "epoch": 59.86191024165708, "grad_norm": 1.1407443284988403, "learning_rate": 0.0008027617951668584, "loss": 0.3816, "step": 208080 }, { "epoch": 59.864787111622555, "grad_norm": 0.7932084202766418, "learning_rate": 0.000802704257767549, "loss": 0.4697, "step": 208090 }, { "epoch": 59.86766398158803, "grad_norm": 2.049145221710205, "learning_rate": 0.0008026467203682394, "loss": 0.5489, "step": 208100 }, { "epoch": 59.87054085155351, "grad_norm": 1.029874324798584, "learning_rate": 0.0008025891829689298, "loss": 0.4768, "step": 208110 }, { "epoch": 59.87341772151899, "grad_norm": 2.099463701248169, "learning_rate": 0.0008025316455696202, "loss": 0.5087, "step": 208120 }, { "epoch": 59.876294591484466, "grad_norm": 1.7633707523345947, "learning_rate": 0.0008024741081703108, "loss": 0.5295, "step": 208130 }, { "epoch": 59.87917146144994, "grad_norm": 1.2637577056884766, "learning_rate": 0.0008024165707710011, "loss": 0.5378, "step": 208140 }, { "epoch": 59.88204833141542, "grad_norm": 1.0366995334625244, "learning_rate": 0.0008023590333716916, "loss": 0.4646, "step": 208150 }, { "epoch": 59.884925201380895, "grad_norm": 1.17568039894104, "learning_rate": 0.0008023014959723821, "loss": 0.528, "step": 208160 }, { "epoch": 59.88780207134638, "grad_norm": 0.7820009589195251, "learning_rate": 0.0008022439585730725, "loss": 0.4279, "step": 208170 }, { "epoch": 59.890678941311855, "grad_norm": 0.8636986613273621, "learning_rate": 0.000802186421173763, "loss": 0.4417, "step": 208180 }, { "epoch": 59.89355581127733, "grad_norm": 1.1451835632324219, "learning_rate": 0.0008021288837744534, "loss": 0.4456, "step": 208190 }, { "epoch": 59.89643268124281, "grad_norm": 1.7522294521331787, "learning_rate": 0.0008020713463751439, "loss": 0.5532, "step": 208200 }, { "epoch": 59.89930955120828, "grad_norm": 0.7499272227287292, "learning_rate": 0.0008020138089758343, "loss": 0.5759, "step": 208210 }, { "epoch": 59.90218642117377, "grad_norm": 1.3131170272827148, "learning_rate": 0.0008019562715765248, "loss": 0.5311, "step": 208220 }, { "epoch": 59.90506329113924, "grad_norm": 1.4882786273956299, "learning_rate": 0.0008018987341772152, "loss": 0.4999, "step": 208230 }, { "epoch": 59.90794016110472, "grad_norm": 1.2885911464691162, "learning_rate": 0.0008018411967779057, "loss": 0.4595, "step": 208240 }, { "epoch": 59.910817031070195, "grad_norm": 0.9264880418777466, "learning_rate": 0.0008017836593785961, "loss": 0.4214, "step": 208250 }, { "epoch": 59.91369390103567, "grad_norm": 1.135282278060913, "learning_rate": 0.0008017261219792865, "loss": 0.4446, "step": 208260 }, { "epoch": 59.91657077100115, "grad_norm": 0.985120415687561, "learning_rate": 0.000801668584579977, "loss": 0.4923, "step": 208270 }, { "epoch": 59.91944764096663, "grad_norm": 1.5172929763793945, "learning_rate": 0.0008016110471806675, "loss": 0.4694, "step": 208280 }, { "epoch": 59.92232451093211, "grad_norm": 1.975868821144104, "learning_rate": 0.0008015535097813578, "loss": 0.4815, "step": 208290 }, { "epoch": 59.92520138089758, "grad_norm": 1.479912281036377, "learning_rate": 0.0008014959723820483, "loss": 0.536, "step": 208300 }, { "epoch": 59.92807825086306, "grad_norm": 1.3317439556121826, "learning_rate": 0.0008014384349827389, "loss": 0.6649, "step": 208310 }, { "epoch": 59.930955120828536, "grad_norm": 0.7249622941017151, "learning_rate": 0.0008013808975834292, "loss": 0.3937, "step": 208320 }, { "epoch": 59.93383199079402, "grad_norm": 1.368200421333313, "learning_rate": 0.0008013233601841197, "loss": 0.4597, "step": 208330 }, { "epoch": 59.936708860759495, "grad_norm": 1.2566193342208862, "learning_rate": 0.0008012658227848102, "loss": 0.486, "step": 208340 }, { "epoch": 59.93958573072497, "grad_norm": 1.0008593797683716, "learning_rate": 0.0008012082853855006, "loss": 0.6291, "step": 208350 }, { "epoch": 59.94246260069045, "grad_norm": 1.644654631614685, "learning_rate": 0.000801150747986191, "loss": 0.4779, "step": 208360 }, { "epoch": 59.945339470655924, "grad_norm": 1.578657865524292, "learning_rate": 0.0008010932105868815, "loss": 0.4555, "step": 208370 }, { "epoch": 59.94821634062141, "grad_norm": 1.1000036001205444, "learning_rate": 0.000801035673187572, "loss": 0.4798, "step": 208380 }, { "epoch": 59.95109321058688, "grad_norm": 1.7388852834701538, "learning_rate": 0.0008009781357882624, "loss": 0.4662, "step": 208390 }, { "epoch": 59.95397008055236, "grad_norm": 1.316361665725708, "learning_rate": 0.0008009205983889529, "loss": 0.5133, "step": 208400 }, { "epoch": 59.956846950517836, "grad_norm": 2.136636734008789, "learning_rate": 0.0008008630609896432, "loss": 0.5804, "step": 208410 }, { "epoch": 59.95972382048331, "grad_norm": 2.64282488822937, "learning_rate": 0.0008008055235903338, "loss": 0.4663, "step": 208420 }, { "epoch": 59.962600690448795, "grad_norm": 1.7587107419967651, "learning_rate": 0.0008007479861910242, "loss": 0.6344, "step": 208430 }, { "epoch": 59.96547756041427, "grad_norm": 1.9612765312194824, "learning_rate": 0.0008006904487917146, "loss": 0.5452, "step": 208440 }, { "epoch": 59.96835443037975, "grad_norm": 0.944869875907898, "learning_rate": 0.0008006329113924051, "loss": 0.5598, "step": 208450 }, { "epoch": 59.971231300345224, "grad_norm": 2.988795757293701, "learning_rate": 0.0008005753739930956, "loss": 0.4795, "step": 208460 }, { "epoch": 59.9741081703107, "grad_norm": 1.5369291305541992, "learning_rate": 0.0008005178365937859, "loss": 0.5211, "step": 208470 }, { "epoch": 59.976985040276176, "grad_norm": 1.2440426349639893, "learning_rate": 0.0008004602991944764, "loss": 0.4612, "step": 208480 }, { "epoch": 59.97986191024166, "grad_norm": 1.8296401500701904, "learning_rate": 0.000800402761795167, "loss": 0.5246, "step": 208490 }, { "epoch": 59.982738780207136, "grad_norm": 1.6324574947357178, "learning_rate": 0.0008003452243958573, "loss": 0.563, "step": 208500 }, { "epoch": 59.98561565017261, "grad_norm": 1.3333410024642944, "learning_rate": 0.0008002876869965478, "loss": 0.5916, "step": 208510 }, { "epoch": 59.98849252013809, "grad_norm": 1.1336040496826172, "learning_rate": 0.0008002301495972382, "loss": 0.4991, "step": 208520 }, { "epoch": 59.991369390103564, "grad_norm": 0.9782961010932922, "learning_rate": 0.0008001726121979287, "loss": 0.4574, "step": 208530 }, { "epoch": 59.99424626006905, "grad_norm": 1.3352617025375366, "learning_rate": 0.0008001150747986191, "loss": 0.5102, "step": 208540 }, { "epoch": 59.997123130034524, "grad_norm": 0.8084611892700195, "learning_rate": 0.0008000575373993096, "loss": 0.4541, "step": 208550 }, { "epoch": 60.0, "grad_norm": 1.6994714736938477, "learning_rate": 0.0008, "loss": 0.3967, "step": 208560 }, { "epoch": 60.002876869965476, "grad_norm": 2.568627119064331, "learning_rate": 0.0007999424626006905, "loss": 0.389, "step": 208570 }, { "epoch": 60.00575373993095, "grad_norm": 1.7442069053649902, "learning_rate": 0.000799884925201381, "loss": 0.4186, "step": 208580 }, { "epoch": 60.008630609896436, "grad_norm": 0.6019251346588135, "learning_rate": 0.0007998273878020713, "loss": 0.4555, "step": 208590 }, { "epoch": 60.01150747986191, "grad_norm": 1.076005220413208, "learning_rate": 0.0007997698504027619, "loss": 0.3704, "step": 208600 }, { "epoch": 60.01438434982739, "grad_norm": 0.8061358332633972, "learning_rate": 0.0007997123130034523, "loss": 0.3101, "step": 208610 }, { "epoch": 60.017261219792864, "grad_norm": 2.799072504043579, "learning_rate": 0.0007996547756041427, "loss": 0.412, "step": 208620 }, { "epoch": 60.02013808975834, "grad_norm": 1.5471421480178833, "learning_rate": 0.0007995972382048332, "loss": 0.4955, "step": 208630 }, { "epoch": 60.023014959723824, "grad_norm": 1.1443322896957397, "learning_rate": 0.0007995397008055237, "loss": 0.5105, "step": 208640 }, { "epoch": 60.0258918296893, "grad_norm": 1.0812029838562012, "learning_rate": 0.000799482163406214, "loss": 0.4813, "step": 208650 }, { "epoch": 60.028768699654776, "grad_norm": 1.1989712715148926, "learning_rate": 0.0007994246260069045, "loss": 0.3931, "step": 208660 }, { "epoch": 60.03164556962025, "grad_norm": 1.3854284286499023, "learning_rate": 0.0007993670886075949, "loss": 0.4249, "step": 208670 }, { "epoch": 60.03452243958573, "grad_norm": 0.6867343187332153, "learning_rate": 0.0007993095512082854, "loss": 0.3981, "step": 208680 }, { "epoch": 60.037399309551205, "grad_norm": 1.9007469415664673, "learning_rate": 0.0007992520138089759, "loss": 0.423, "step": 208690 }, { "epoch": 60.04027617951669, "grad_norm": 0.8021664619445801, "learning_rate": 0.0007991944764096662, "loss": 0.4189, "step": 208700 }, { "epoch": 60.043153049482164, "grad_norm": 1.655988335609436, "learning_rate": 0.0007991369390103568, "loss": 0.4484, "step": 208710 }, { "epoch": 60.04602991944764, "grad_norm": 1.3976763486862183, "learning_rate": 0.0007990794016110472, "loss": 0.5595, "step": 208720 }, { "epoch": 60.04890678941312, "grad_norm": 1.061814785003662, "learning_rate": 0.0007990218642117376, "loss": 0.4727, "step": 208730 }, { "epoch": 60.05178365937859, "grad_norm": 1.0631463527679443, "learning_rate": 0.0007989643268124281, "loss": 0.4417, "step": 208740 }, { "epoch": 60.054660529344076, "grad_norm": 0.643368124961853, "learning_rate": 0.0007989067894131186, "loss": 0.3875, "step": 208750 }, { "epoch": 60.05753739930955, "grad_norm": 1.1360602378845215, "learning_rate": 0.0007988492520138089, "loss": 0.4439, "step": 208760 }, { "epoch": 60.06041426927503, "grad_norm": 1.1653766632080078, "learning_rate": 0.0007987917146144994, "loss": 0.3735, "step": 208770 }, { "epoch": 60.063291139240505, "grad_norm": 1.4114799499511719, "learning_rate": 0.00079873417721519, "loss": 0.4966, "step": 208780 }, { "epoch": 60.06616800920598, "grad_norm": 0.8996612429618835, "learning_rate": 0.0007986766398158803, "loss": 0.4552, "step": 208790 }, { "epoch": 60.069044879171464, "grad_norm": 1.6687157154083252, "learning_rate": 0.0007986191024165708, "loss": 0.4336, "step": 208800 }, { "epoch": 60.07192174913694, "grad_norm": 1.6849433183670044, "learning_rate": 0.0007985615650172612, "loss": 0.4683, "step": 208810 }, { "epoch": 60.07479861910242, "grad_norm": 1.302093744277954, "learning_rate": 0.0007985040276179517, "loss": 0.4995, "step": 208820 }, { "epoch": 60.07767548906789, "grad_norm": 3.7967770099639893, "learning_rate": 0.0007984464902186421, "loss": 0.5139, "step": 208830 }, { "epoch": 60.08055235903337, "grad_norm": 0.7414332628250122, "learning_rate": 0.0007983889528193326, "loss": 0.4541, "step": 208840 }, { "epoch": 60.08342922899885, "grad_norm": 1.209526538848877, "learning_rate": 0.000798331415420023, "loss": 0.5035, "step": 208850 }, { "epoch": 60.08630609896433, "grad_norm": 7.927278995513916, "learning_rate": 0.0007982738780207135, "loss": 0.5577, "step": 208860 }, { "epoch": 60.089182968929805, "grad_norm": 1.7789781093597412, "learning_rate": 0.0007982163406214039, "loss": 0.4402, "step": 208870 }, { "epoch": 60.09205983889528, "grad_norm": 0.9582881331443787, "learning_rate": 0.0007981588032220943, "loss": 0.4279, "step": 208880 }, { "epoch": 60.09493670886076, "grad_norm": 1.1242533922195435, "learning_rate": 0.0007981012658227849, "loss": 0.5087, "step": 208890 }, { "epoch": 60.09781357882623, "grad_norm": 0.7980586290359497, "learning_rate": 0.0007980437284234753, "loss": 0.3481, "step": 208900 }, { "epoch": 60.10069044879172, "grad_norm": 0.8134324550628662, "learning_rate": 0.0007979861910241657, "loss": 0.3696, "step": 208910 }, { "epoch": 60.10356731875719, "grad_norm": 1.927364468574524, "learning_rate": 0.0007979286536248562, "loss": 0.5401, "step": 208920 }, { "epoch": 60.10644418872267, "grad_norm": 0.9676886200904846, "learning_rate": 0.0007978711162255467, "loss": 0.526, "step": 208930 }, { "epoch": 60.109321058688145, "grad_norm": 0.918145477771759, "learning_rate": 0.000797813578826237, "loss": 0.3859, "step": 208940 }, { "epoch": 60.11219792865362, "grad_norm": 1.1174043416976929, "learning_rate": 0.0007977560414269275, "loss": 0.5617, "step": 208950 }, { "epoch": 60.115074798619105, "grad_norm": 1.1932979822158813, "learning_rate": 0.000797698504027618, "loss": 0.5079, "step": 208960 }, { "epoch": 60.11795166858458, "grad_norm": 1.4296907186508179, "learning_rate": 0.0007976409666283084, "loss": 0.4832, "step": 208970 }, { "epoch": 60.12082853855006, "grad_norm": 1.2745996713638306, "learning_rate": 0.0007975834292289988, "loss": 0.4005, "step": 208980 }, { "epoch": 60.123705408515534, "grad_norm": 1.0812854766845703, "learning_rate": 0.0007975258918296893, "loss": 0.442, "step": 208990 }, { "epoch": 60.12658227848101, "grad_norm": 1.3193984031677246, "learning_rate": 0.0007974683544303798, "loss": 0.4416, "step": 209000 }, { "epoch": 60.12945914844649, "grad_norm": 1.2431060075759888, "learning_rate": 0.0007974108170310702, "loss": 0.4575, "step": 209010 }, { "epoch": 60.13233601841197, "grad_norm": 1.4629535675048828, "learning_rate": 0.0007973532796317607, "loss": 0.4099, "step": 209020 }, { "epoch": 60.135212888377445, "grad_norm": 1.3242284059524536, "learning_rate": 0.0007972957422324511, "loss": 0.4453, "step": 209030 }, { "epoch": 60.13808975834292, "grad_norm": 1.8447918891906738, "learning_rate": 0.0007972382048331416, "loss": 0.4101, "step": 209040 }, { "epoch": 60.1409666283084, "grad_norm": 0.8267900943756104, "learning_rate": 0.000797180667433832, "loss": 0.3644, "step": 209050 }, { "epoch": 60.14384349827388, "grad_norm": 1.1496551036834717, "learning_rate": 0.0007971231300345224, "loss": 0.4431, "step": 209060 }, { "epoch": 60.14672036823936, "grad_norm": 1.2297842502593994, "learning_rate": 0.0007970655926352129, "loss": 0.4631, "step": 209070 }, { "epoch": 60.149597238204834, "grad_norm": 0.8634676337242126, "learning_rate": 0.0007970080552359034, "loss": 0.4732, "step": 209080 }, { "epoch": 60.15247410817031, "grad_norm": 1.540898323059082, "learning_rate": 0.0007969505178365937, "loss": 0.4228, "step": 209090 }, { "epoch": 60.155350978135786, "grad_norm": 0.8455323576927185, "learning_rate": 0.0007968929804372842, "loss": 0.4702, "step": 209100 }, { "epoch": 60.15822784810127, "grad_norm": 1.050722360610962, "learning_rate": 0.0007968354430379748, "loss": 0.4793, "step": 209110 }, { "epoch": 60.161104718066746, "grad_norm": 1.4158252477645874, "learning_rate": 0.0007967779056386651, "loss": 0.334, "step": 209120 }, { "epoch": 60.16398158803222, "grad_norm": 1.362733006477356, "learning_rate": 0.0007967203682393556, "loss": 0.4466, "step": 209130 }, { "epoch": 60.1668584579977, "grad_norm": 1.1953686475753784, "learning_rate": 0.0007966628308400461, "loss": 0.4979, "step": 209140 }, { "epoch": 60.169735327963174, "grad_norm": 2.1819825172424316, "learning_rate": 0.0007966052934407365, "loss": 0.5362, "step": 209150 }, { "epoch": 60.17261219792865, "grad_norm": 1.395936369895935, "learning_rate": 0.0007965477560414269, "loss": 0.4116, "step": 209160 }, { "epoch": 60.175489067894134, "grad_norm": 1.0400991439819336, "learning_rate": 0.0007964902186421174, "loss": 0.4567, "step": 209170 }, { "epoch": 60.17836593785961, "grad_norm": 1.813607931137085, "learning_rate": 0.0007964326812428078, "loss": 0.5971, "step": 209180 }, { "epoch": 60.181242807825086, "grad_norm": 0.9777278304100037, "learning_rate": 0.0007963751438434983, "loss": 0.4946, "step": 209190 }, { "epoch": 60.18411967779056, "grad_norm": 0.7419540286064148, "learning_rate": 0.0007963176064441888, "loss": 0.4233, "step": 209200 }, { "epoch": 60.18699654775604, "grad_norm": 0.7962350845336914, "learning_rate": 0.0007962600690448792, "loss": 0.3945, "step": 209210 }, { "epoch": 60.18987341772152, "grad_norm": 0.6870803236961365, "learning_rate": 0.0007962025316455697, "loss": 0.5533, "step": 209220 }, { "epoch": 60.192750287687, "grad_norm": 0.7349685430526733, "learning_rate": 0.0007961449942462601, "loss": 0.4709, "step": 209230 }, { "epoch": 60.195627157652474, "grad_norm": 2.651726245880127, "learning_rate": 0.0007960874568469505, "loss": 0.4655, "step": 209240 }, { "epoch": 60.19850402761795, "grad_norm": 0.9997001886367798, "learning_rate": 0.000796029919447641, "loss": 0.4542, "step": 209250 }, { "epoch": 60.20138089758343, "grad_norm": 1.3133478164672852, "learning_rate": 0.0007959723820483315, "loss": 0.4016, "step": 209260 }, { "epoch": 60.20425776754891, "grad_norm": 0.8150297403335571, "learning_rate": 0.0007959148446490218, "loss": 0.4527, "step": 209270 }, { "epoch": 60.207134637514386, "grad_norm": 0.9709712862968445, "learning_rate": 0.0007958573072497123, "loss": 0.3655, "step": 209280 }, { "epoch": 60.21001150747986, "grad_norm": 1.1708735227584839, "learning_rate": 0.0007957997698504029, "loss": 0.4442, "step": 209290 }, { "epoch": 60.21288837744534, "grad_norm": 1.3849321603775024, "learning_rate": 0.0007957422324510932, "loss": 0.5607, "step": 209300 }, { "epoch": 60.215765247410815, "grad_norm": 1.1974544525146484, "learning_rate": 0.0007956846950517837, "loss": 0.4838, "step": 209310 }, { "epoch": 60.2186421173763, "grad_norm": 1.7437549829483032, "learning_rate": 0.0007956271576524742, "loss": 0.4839, "step": 209320 }, { "epoch": 60.221518987341774, "grad_norm": 1.2979862689971924, "learning_rate": 0.0007955696202531646, "loss": 0.4086, "step": 209330 }, { "epoch": 60.22439585730725, "grad_norm": 1.5171558856964111, "learning_rate": 0.000795512082853855, "loss": 0.4613, "step": 209340 }, { "epoch": 60.22727272727273, "grad_norm": 1.544869303703308, "learning_rate": 0.0007954545454545455, "loss": 0.5668, "step": 209350 }, { "epoch": 60.2301495972382, "grad_norm": 2.062804698944092, "learning_rate": 0.0007953970080552359, "loss": 0.5131, "step": 209360 }, { "epoch": 60.23302646720368, "grad_norm": 0.6515256762504578, "learning_rate": 0.0007953394706559264, "loss": 0.4611, "step": 209370 }, { "epoch": 60.23590333716916, "grad_norm": 1.7303787469863892, "learning_rate": 0.0007952819332566168, "loss": 0.4545, "step": 209380 }, { "epoch": 60.23878020713464, "grad_norm": 1.9315564632415771, "learning_rate": 0.0007952243958573072, "loss": 0.6097, "step": 209390 }, { "epoch": 60.241657077100115, "grad_norm": 1.0334649085998535, "learning_rate": 0.0007951668584579978, "loss": 0.4475, "step": 209400 }, { "epoch": 60.24453394706559, "grad_norm": 1.179233431816101, "learning_rate": 0.0007951093210586882, "loss": 0.4601, "step": 209410 }, { "epoch": 60.24741081703107, "grad_norm": 1.4139957427978516, "learning_rate": 0.0007950517836593786, "loss": 0.5865, "step": 209420 }, { "epoch": 60.25028768699655, "grad_norm": 1.1430495977401733, "learning_rate": 0.0007949942462600691, "loss": 0.3885, "step": 209430 }, { "epoch": 60.25316455696203, "grad_norm": 1.690714716911316, "learning_rate": 0.0007949367088607596, "loss": 0.4692, "step": 209440 }, { "epoch": 60.2560414269275, "grad_norm": 1.3933037519454956, "learning_rate": 0.0007948791714614499, "loss": 0.5319, "step": 209450 }, { "epoch": 60.25891829689298, "grad_norm": 2.024097204208374, "learning_rate": 0.0007948216340621404, "loss": 0.5773, "step": 209460 }, { "epoch": 60.261795166858455, "grad_norm": 1.0311949253082275, "learning_rate": 0.000794764096662831, "loss": 0.4323, "step": 209470 }, { "epoch": 60.26467203682394, "grad_norm": 0.780692994594574, "learning_rate": 0.0007947065592635213, "loss": 0.4236, "step": 209480 }, { "epoch": 60.267548906789415, "grad_norm": 1.3297563791275024, "learning_rate": 0.0007946490218642117, "loss": 0.4316, "step": 209490 }, { "epoch": 60.27042577675489, "grad_norm": 1.0066686868667603, "learning_rate": 0.0007945914844649021, "loss": 0.4167, "step": 209500 }, { "epoch": 60.27330264672037, "grad_norm": 1.494150996208191, "learning_rate": 0.0007945339470655927, "loss": 0.3949, "step": 209510 }, { "epoch": 60.27617951668584, "grad_norm": 1.3587875366210938, "learning_rate": 0.0007944764096662831, "loss": 0.3975, "step": 209520 }, { "epoch": 60.27905638665133, "grad_norm": 1.3202279806137085, "learning_rate": 0.0007944188722669735, "loss": 0.5038, "step": 209530 }, { "epoch": 60.2819332566168, "grad_norm": 1.0889626741409302, "learning_rate": 0.000794361334867664, "loss": 0.4893, "step": 209540 }, { "epoch": 60.28481012658228, "grad_norm": 1.3899424076080322, "learning_rate": 0.0007943037974683545, "loss": 0.5096, "step": 209550 }, { "epoch": 60.287686996547755, "grad_norm": 0.9854750037193298, "learning_rate": 0.0007942462600690448, "loss": 0.4843, "step": 209560 }, { "epoch": 60.29056386651323, "grad_norm": 1.2559490203857422, "learning_rate": 0.0007941887226697353, "loss": 0.4003, "step": 209570 }, { "epoch": 60.29344073647871, "grad_norm": 1.0881577730178833, "learning_rate": 0.0007941311852704258, "loss": 0.4742, "step": 209580 }, { "epoch": 60.29631760644419, "grad_norm": 1.1514177322387695, "learning_rate": 0.0007940736478711162, "loss": 0.4684, "step": 209590 }, { "epoch": 60.29919447640967, "grad_norm": 0.8657869100570679, "learning_rate": 0.0007940161104718067, "loss": 0.4501, "step": 209600 }, { "epoch": 60.30207134637514, "grad_norm": 1.5921510457992554, "learning_rate": 0.0007939585730724972, "loss": 0.5005, "step": 209610 }, { "epoch": 60.30494821634062, "grad_norm": 0.9685018658638, "learning_rate": 0.0007939010356731876, "loss": 0.5186, "step": 209620 }, { "epoch": 60.307825086306096, "grad_norm": 0.9857500791549683, "learning_rate": 0.000793843498273878, "loss": 0.4637, "step": 209630 }, { "epoch": 60.31070195627158, "grad_norm": 0.7027332782745361, "learning_rate": 0.0007937859608745685, "loss": 0.5014, "step": 209640 }, { "epoch": 60.313578826237055, "grad_norm": 0.952876091003418, "learning_rate": 0.0007937284234752589, "loss": 0.4958, "step": 209650 }, { "epoch": 60.31645569620253, "grad_norm": 0.9990411996841431, "learning_rate": 0.0007936708860759494, "loss": 0.4304, "step": 209660 }, { "epoch": 60.31933256616801, "grad_norm": 1.2255709171295166, "learning_rate": 0.0007936133486766398, "loss": 0.5746, "step": 209670 }, { "epoch": 60.322209436133484, "grad_norm": 1.4071033000946045, "learning_rate": 0.0007935558112773302, "loss": 0.5092, "step": 209680 }, { "epoch": 60.32508630609897, "grad_norm": 0.8074150681495667, "learning_rate": 0.0007934982738780208, "loss": 0.4381, "step": 209690 }, { "epoch": 60.32796317606444, "grad_norm": 2.306553602218628, "learning_rate": 0.0007934407364787112, "loss": 0.47, "step": 209700 }, { "epoch": 60.33084004602992, "grad_norm": 1.1055965423583984, "learning_rate": 0.0007933831990794016, "loss": 0.4925, "step": 209710 }, { "epoch": 60.333716915995396, "grad_norm": 1.2385077476501465, "learning_rate": 0.0007933256616800921, "loss": 0.5063, "step": 209720 }, { "epoch": 60.33659378596087, "grad_norm": 1.2123348712921143, "learning_rate": 0.0007932681242807826, "loss": 0.5161, "step": 209730 }, { "epoch": 60.339470655926355, "grad_norm": 1.7913813591003418, "learning_rate": 0.0007932105868814729, "loss": 0.4457, "step": 209740 }, { "epoch": 60.34234752589183, "grad_norm": 1.1260178089141846, "learning_rate": 0.0007931530494821634, "loss": 0.481, "step": 209750 }, { "epoch": 60.34522439585731, "grad_norm": 0.8353064656257629, "learning_rate": 0.0007930955120828539, "loss": 0.568, "step": 209760 }, { "epoch": 60.348101265822784, "grad_norm": 2.2333734035491943, "learning_rate": 0.0007930379746835443, "loss": 0.5634, "step": 209770 }, { "epoch": 60.35097813578826, "grad_norm": 1.2709225416183472, "learning_rate": 0.0007929804372842347, "loss": 0.4605, "step": 209780 }, { "epoch": 60.353855005753736, "grad_norm": 1.4110925197601318, "learning_rate": 0.0007929228998849252, "loss": 0.4883, "step": 209790 }, { "epoch": 60.35673187571922, "grad_norm": 1.1471456289291382, "learning_rate": 0.0007928653624856157, "loss": 0.3981, "step": 209800 }, { "epoch": 60.359608745684696, "grad_norm": 1.660866141319275, "learning_rate": 0.0007928078250863061, "loss": 0.4802, "step": 209810 }, { "epoch": 60.36248561565017, "grad_norm": 1.045041561126709, "learning_rate": 0.0007927502876869966, "loss": 0.4541, "step": 209820 }, { "epoch": 60.36536248561565, "grad_norm": 1.140012502670288, "learning_rate": 0.000792692750287687, "loss": 0.4995, "step": 209830 }, { "epoch": 60.368239355581125, "grad_norm": 1.1759271621704102, "learning_rate": 0.0007926352128883775, "loss": 0.4742, "step": 209840 }, { "epoch": 60.37111622554661, "grad_norm": 0.9462875127792358, "learning_rate": 0.0007925776754890679, "loss": 0.3799, "step": 209850 }, { "epoch": 60.373993095512084, "grad_norm": 0.985075056552887, "learning_rate": 0.0007925201380897583, "loss": 0.4548, "step": 209860 }, { "epoch": 60.37686996547756, "grad_norm": 1.6248440742492676, "learning_rate": 0.0007924626006904488, "loss": 0.3995, "step": 209870 }, { "epoch": 60.379746835443036, "grad_norm": 1.0529673099517822, "learning_rate": 0.0007924050632911393, "loss": 0.4388, "step": 209880 }, { "epoch": 60.38262370540851, "grad_norm": 1.461342692375183, "learning_rate": 0.0007923475258918296, "loss": 0.4298, "step": 209890 }, { "epoch": 60.385500575373996, "grad_norm": 1.1095941066741943, "learning_rate": 0.0007922899884925202, "loss": 0.4259, "step": 209900 }, { "epoch": 60.38837744533947, "grad_norm": 2.027501106262207, "learning_rate": 0.0007922324510932107, "loss": 0.4832, "step": 209910 }, { "epoch": 60.39125431530495, "grad_norm": 1.180443525314331, "learning_rate": 0.000792174913693901, "loss": 0.5669, "step": 209920 }, { "epoch": 60.394131185270425, "grad_norm": 1.528227686882019, "learning_rate": 0.0007921173762945915, "loss": 0.5812, "step": 209930 }, { "epoch": 60.3970080552359, "grad_norm": 1.9087563753128052, "learning_rate": 0.000792059838895282, "loss": 0.5282, "step": 209940 }, { "epoch": 60.399884925201384, "grad_norm": 0.8204033374786377, "learning_rate": 0.0007920023014959724, "loss": 0.4443, "step": 209950 }, { "epoch": 60.40276179516686, "grad_norm": 0.7078280448913574, "learning_rate": 0.0007919447640966628, "loss": 0.4878, "step": 209960 }, { "epoch": 60.40563866513234, "grad_norm": 1.4541051387786865, "learning_rate": 0.0007918872266973533, "loss": 0.4327, "step": 209970 }, { "epoch": 60.40851553509781, "grad_norm": 1.0126700401306152, "learning_rate": 0.0007918296892980437, "loss": 0.5646, "step": 209980 }, { "epoch": 60.41139240506329, "grad_norm": 1.8969388008117676, "learning_rate": 0.0007917721518987342, "loss": 0.4835, "step": 209990 }, { "epoch": 60.41426927502877, "grad_norm": 2.061558246612549, "learning_rate": 0.0007917146144994247, "loss": 0.6156, "step": 210000 }, { "epoch": 60.41714614499425, "grad_norm": 1.6748595237731934, "learning_rate": 0.0007916570771001151, "loss": 0.3882, "step": 210010 }, { "epoch": 60.420023014959725, "grad_norm": 1.4442570209503174, "learning_rate": 0.0007915995397008056, "loss": 0.4544, "step": 210020 }, { "epoch": 60.4228998849252, "grad_norm": 0.998388946056366, "learning_rate": 0.000791542002301496, "loss": 0.5242, "step": 210030 }, { "epoch": 60.42577675489068, "grad_norm": 0.9436192512512207, "learning_rate": 0.0007914844649021864, "loss": 0.441, "step": 210040 }, { "epoch": 60.42865362485615, "grad_norm": 0.839927613735199, "learning_rate": 0.0007914269275028769, "loss": 0.4444, "step": 210050 }, { "epoch": 60.43153049482164, "grad_norm": 0.5160093307495117, "learning_rate": 0.0007913693901035674, "loss": 0.4796, "step": 210060 }, { "epoch": 60.43440736478711, "grad_norm": 0.6692561507225037, "learning_rate": 0.0007913118527042577, "loss": 0.5126, "step": 210070 }, { "epoch": 60.43728423475259, "grad_norm": 0.9307100176811218, "learning_rate": 0.0007912543153049482, "loss": 0.3957, "step": 210080 }, { "epoch": 60.440161104718065, "grad_norm": 1.3549749851226807, "learning_rate": 0.0007911967779056388, "loss": 0.4236, "step": 210090 }, { "epoch": 60.44303797468354, "grad_norm": 1.2576136589050293, "learning_rate": 0.0007911392405063291, "loss": 0.4866, "step": 210100 }, { "epoch": 60.445914844649025, "grad_norm": 1.2042790651321411, "learning_rate": 0.0007910817031070196, "loss": 0.4905, "step": 210110 }, { "epoch": 60.4487917146145, "grad_norm": 1.516741156578064, "learning_rate": 0.0007910241657077101, "loss": 0.4786, "step": 210120 }, { "epoch": 60.45166858457998, "grad_norm": 0.8551397919654846, "learning_rate": 0.0007909666283084005, "loss": 0.574, "step": 210130 }, { "epoch": 60.45454545454545, "grad_norm": 3.224902391433716, "learning_rate": 0.0007909090909090909, "loss": 0.4855, "step": 210140 }, { "epoch": 60.45742232451093, "grad_norm": 1.1496567726135254, "learning_rate": 0.0007908515535097814, "loss": 0.5093, "step": 210150 }, { "epoch": 60.46029919447641, "grad_norm": 0.9313070178031921, "learning_rate": 0.0007907940161104718, "loss": 0.4764, "step": 210160 }, { "epoch": 60.46317606444189, "grad_norm": 1.6440900564193726, "learning_rate": 0.0007907364787111623, "loss": 0.6104, "step": 210170 }, { "epoch": 60.466052934407365, "grad_norm": 1.1126798391342163, "learning_rate": 0.0007906789413118527, "loss": 0.4919, "step": 210180 }, { "epoch": 60.46892980437284, "grad_norm": 2.1418192386627197, "learning_rate": 0.0007906214039125432, "loss": 0.4257, "step": 210190 }, { "epoch": 60.47180667433832, "grad_norm": 1.2689696550369263, "learning_rate": 0.0007905638665132337, "loss": 0.475, "step": 210200 }, { "epoch": 60.4746835443038, "grad_norm": 0.9631286859512329, "learning_rate": 0.0007905063291139241, "loss": 0.3847, "step": 210210 }, { "epoch": 60.47756041426928, "grad_norm": 0.8255744576454163, "learning_rate": 0.0007904487917146145, "loss": 0.4396, "step": 210220 }, { "epoch": 60.48043728423475, "grad_norm": 0.6486734747886658, "learning_rate": 0.000790391254315305, "loss": 0.441, "step": 210230 }, { "epoch": 60.48331415420023, "grad_norm": 1.2771937847137451, "learning_rate": 0.0007903337169159955, "loss": 0.6934, "step": 210240 }, { "epoch": 60.486191024165706, "grad_norm": 0.8969390392303467, "learning_rate": 0.0007902761795166858, "loss": 0.4705, "step": 210250 }, { "epoch": 60.48906789413118, "grad_norm": 0.9684302806854248, "learning_rate": 0.0007902186421173763, "loss": 0.4576, "step": 210260 }, { "epoch": 60.491944764096665, "grad_norm": 1.879380464553833, "learning_rate": 0.0007901611047180668, "loss": 0.547, "step": 210270 }, { "epoch": 60.49482163406214, "grad_norm": 0.9942872524261475, "learning_rate": 0.0007901035673187572, "loss": 0.3734, "step": 210280 }, { "epoch": 60.49769850402762, "grad_norm": 1.1432453393936157, "learning_rate": 0.0007900460299194476, "loss": 0.4458, "step": 210290 }, { "epoch": 60.500575373993094, "grad_norm": 0.9473669528961182, "learning_rate": 0.0007899884925201382, "loss": 0.4863, "step": 210300 }, { "epoch": 60.50345224395857, "grad_norm": 1.3687313795089722, "learning_rate": 0.0007899309551208286, "loss": 0.4346, "step": 210310 }, { "epoch": 60.50632911392405, "grad_norm": 0.8863913416862488, "learning_rate": 0.000789873417721519, "loss": 0.3605, "step": 210320 }, { "epoch": 60.50920598388953, "grad_norm": 1.1790404319763184, "learning_rate": 0.0007898158803222094, "loss": 0.3795, "step": 210330 }, { "epoch": 60.512082853855006, "grad_norm": 1.1294232606887817, "learning_rate": 0.0007897583429228999, "loss": 0.581, "step": 210340 }, { "epoch": 60.51495972382048, "grad_norm": 1.5568007230758667, "learning_rate": 0.0007897008055235904, "loss": 0.3673, "step": 210350 }, { "epoch": 60.51783659378596, "grad_norm": 1.3887314796447754, "learning_rate": 0.0007896432681242807, "loss": 0.5213, "step": 210360 }, { "epoch": 60.52071346375144, "grad_norm": 0.9657174944877625, "learning_rate": 0.0007895857307249712, "loss": 0.5528, "step": 210370 }, { "epoch": 60.52359033371692, "grad_norm": 0.9162465929985046, "learning_rate": 0.0007895281933256617, "loss": 0.4619, "step": 210380 }, { "epoch": 60.526467203682394, "grad_norm": 0.9623863697052002, "learning_rate": 0.0007894706559263521, "loss": 0.4347, "step": 210390 }, { "epoch": 60.52934407364787, "grad_norm": 1.534593105316162, "learning_rate": 0.0007894131185270425, "loss": 0.4814, "step": 210400 }, { "epoch": 60.532220943613346, "grad_norm": 2.483584403991699, "learning_rate": 0.0007893555811277331, "loss": 0.6175, "step": 210410 }, { "epoch": 60.53509781357883, "grad_norm": 1.265514612197876, "learning_rate": 0.0007892980437284235, "loss": 0.4199, "step": 210420 }, { "epoch": 60.537974683544306, "grad_norm": 0.6749572157859802, "learning_rate": 0.0007892405063291139, "loss": 0.5439, "step": 210430 }, { "epoch": 60.54085155350978, "grad_norm": 1.359352469444275, "learning_rate": 0.0007891829689298044, "loss": 0.5061, "step": 210440 }, { "epoch": 60.54372842347526, "grad_norm": 1.044071912765503, "learning_rate": 0.0007891254315304948, "loss": 0.4866, "step": 210450 }, { "epoch": 60.546605293440734, "grad_norm": 0.8563905954360962, "learning_rate": 0.0007890678941311853, "loss": 0.481, "step": 210460 }, { "epoch": 60.54948216340621, "grad_norm": 1.1764172315597534, "learning_rate": 0.0007890103567318757, "loss": 0.5797, "step": 210470 }, { "epoch": 60.552359033371694, "grad_norm": 1.1271291971206665, "learning_rate": 0.0007889528193325661, "loss": 0.4519, "step": 210480 }, { "epoch": 60.55523590333717, "grad_norm": 1.4358367919921875, "learning_rate": 0.0007888952819332566, "loss": 0.4126, "step": 210490 }, { "epoch": 60.558112773302646, "grad_norm": 0.8044018149375916, "learning_rate": 0.0007888377445339471, "loss": 0.4029, "step": 210500 }, { "epoch": 60.56098964326812, "grad_norm": 0.9754035472869873, "learning_rate": 0.0007887802071346375, "loss": 0.5534, "step": 210510 }, { "epoch": 60.5638665132336, "grad_norm": 0.8661779761314392, "learning_rate": 0.000788722669735328, "loss": 0.4069, "step": 210520 }, { "epoch": 60.56674338319908, "grad_norm": 0.6907144784927368, "learning_rate": 0.0007886651323360185, "loss": 0.4627, "step": 210530 }, { "epoch": 60.56962025316456, "grad_norm": 1.6529688835144043, "learning_rate": 0.0007886075949367088, "loss": 0.5252, "step": 210540 }, { "epoch": 60.572497123130034, "grad_norm": 0.94488525390625, "learning_rate": 0.0007885500575373993, "loss": 0.4871, "step": 210550 }, { "epoch": 60.57537399309551, "grad_norm": 1.2850666046142578, "learning_rate": 0.0007884925201380898, "loss": 0.4238, "step": 210560 }, { "epoch": 60.57825086306099, "grad_norm": 2.30480694770813, "learning_rate": 0.0007884349827387802, "loss": 0.4269, "step": 210570 }, { "epoch": 60.58112773302647, "grad_norm": 1.6502434015274048, "learning_rate": 0.0007883774453394706, "loss": 0.5, "step": 210580 }, { "epoch": 60.584004602991946, "grad_norm": 1.7892248630523682, "learning_rate": 0.0007883199079401612, "loss": 0.6753, "step": 210590 }, { "epoch": 60.58688147295742, "grad_norm": 0.8434891700744629, "learning_rate": 0.0007882623705408516, "loss": 0.5129, "step": 210600 }, { "epoch": 60.5897583429229, "grad_norm": 1.2555080652236938, "learning_rate": 0.000788204833141542, "loss": 0.4445, "step": 210610 }, { "epoch": 60.592635212888375, "grad_norm": 1.2851545810699463, "learning_rate": 0.0007881472957422325, "loss": 0.5559, "step": 210620 }, { "epoch": 60.59551208285386, "grad_norm": 1.2831860780715942, "learning_rate": 0.0007880897583429229, "loss": 0.3965, "step": 210630 }, { "epoch": 60.598388952819334, "grad_norm": 1.0029828548431396, "learning_rate": 0.0007880322209436134, "loss": 0.4379, "step": 210640 }, { "epoch": 60.60126582278481, "grad_norm": 1.171586275100708, "learning_rate": 0.0007879746835443038, "loss": 0.5104, "step": 210650 }, { "epoch": 60.60414269275029, "grad_norm": 2.622025489807129, "learning_rate": 0.0007879171461449942, "loss": 0.5378, "step": 210660 }, { "epoch": 60.60701956271576, "grad_norm": 1.4893110990524292, "learning_rate": 0.0007878596087456847, "loss": 0.4708, "step": 210670 }, { "epoch": 60.60989643268124, "grad_norm": 1.0327363014221191, "learning_rate": 0.0007878020713463752, "loss": 0.4399, "step": 210680 }, { "epoch": 60.61277330264672, "grad_norm": 0.9439401030540466, "learning_rate": 0.0007877445339470655, "loss": 0.4618, "step": 210690 }, { "epoch": 60.6156501726122, "grad_norm": 1.243780255317688, "learning_rate": 0.0007876869965477561, "loss": 0.4888, "step": 210700 }, { "epoch": 60.618527042577675, "grad_norm": 1.3428549766540527, "learning_rate": 0.0007876294591484466, "loss": 0.4309, "step": 210710 }, { "epoch": 60.62140391254315, "grad_norm": 1.336289644241333, "learning_rate": 0.0007875719217491369, "loss": 0.4805, "step": 210720 }, { "epoch": 60.62428078250863, "grad_norm": 1.887963891029358, "learning_rate": 0.0007875143843498274, "loss": 0.5103, "step": 210730 }, { "epoch": 60.62715765247411, "grad_norm": 2.5147383213043213, "learning_rate": 0.0007874568469505179, "loss": 0.5228, "step": 210740 }, { "epoch": 60.63003452243959, "grad_norm": 1.611269235610962, "learning_rate": 0.0007873993095512083, "loss": 0.5101, "step": 210750 }, { "epoch": 60.63291139240506, "grad_norm": 1.35880708694458, "learning_rate": 0.0007873417721518987, "loss": 0.4802, "step": 210760 }, { "epoch": 60.63578826237054, "grad_norm": 1.3260918855667114, "learning_rate": 0.0007872842347525892, "loss": 0.4119, "step": 210770 }, { "epoch": 60.638665132336016, "grad_norm": 0.9906712174415588, "learning_rate": 0.0007872266973532796, "loss": 0.4162, "step": 210780 }, { "epoch": 60.6415420023015, "grad_norm": 0.8263194561004639, "learning_rate": 0.0007871691599539701, "loss": 0.3868, "step": 210790 }, { "epoch": 60.644418872266975, "grad_norm": 2.3144218921661377, "learning_rate": 0.0007871116225546606, "loss": 0.5361, "step": 210800 }, { "epoch": 60.64729574223245, "grad_norm": 0.950072705745697, "learning_rate": 0.000787054085155351, "loss": 0.5459, "step": 210810 }, { "epoch": 60.65017261219793, "grad_norm": 1.1987242698669434, "learning_rate": 0.0007869965477560415, "loss": 0.4878, "step": 210820 }, { "epoch": 60.653049482163404, "grad_norm": 0.7718819379806519, "learning_rate": 0.0007869390103567319, "loss": 0.4192, "step": 210830 }, { "epoch": 60.65592635212889, "grad_norm": 1.4715867042541504, "learning_rate": 0.0007868814729574223, "loss": 0.4905, "step": 210840 }, { "epoch": 60.65880322209436, "grad_norm": 1.1280145645141602, "learning_rate": 0.0007868239355581128, "loss": 0.5274, "step": 210850 }, { "epoch": 60.66168009205984, "grad_norm": 1.526943325996399, "learning_rate": 0.0007867663981588033, "loss": 0.4683, "step": 210860 }, { "epoch": 60.664556962025316, "grad_norm": 1.2959794998168945, "learning_rate": 0.0007867088607594936, "loss": 0.6003, "step": 210870 }, { "epoch": 60.66743383199079, "grad_norm": 2.3198437690734863, "learning_rate": 0.0007866513233601842, "loss": 0.5895, "step": 210880 }, { "epoch": 60.670310701956275, "grad_norm": 0.7854957580566406, "learning_rate": 0.0007865937859608747, "loss": 0.5929, "step": 210890 }, { "epoch": 60.67318757192175, "grad_norm": 1.195300579071045, "learning_rate": 0.000786536248561565, "loss": 0.4377, "step": 210900 }, { "epoch": 60.67606444188723, "grad_norm": 0.8515156507492065, "learning_rate": 0.0007864787111622555, "loss": 0.46, "step": 210910 }, { "epoch": 60.678941311852704, "grad_norm": 1.7823647260665894, "learning_rate": 0.000786421173762946, "loss": 0.4905, "step": 210920 }, { "epoch": 60.68181818181818, "grad_norm": 2.1186790466308594, "learning_rate": 0.0007863636363636364, "loss": 0.48, "step": 210930 }, { "epoch": 60.684695051783656, "grad_norm": 0.6597215533256531, "learning_rate": 0.0007863060989643268, "loss": 0.3489, "step": 210940 }, { "epoch": 60.68757192174914, "grad_norm": 1.0978829860687256, "learning_rate": 0.0007862485615650173, "loss": 0.6506, "step": 210950 }, { "epoch": 60.690448791714616, "grad_norm": 1.2898300886154175, "learning_rate": 0.0007861910241657077, "loss": 0.4893, "step": 210960 }, { "epoch": 60.69332566168009, "grad_norm": 0.8656835556030273, "learning_rate": 0.0007861334867663982, "loss": 0.4245, "step": 210970 }, { "epoch": 60.69620253164557, "grad_norm": 1.967902660369873, "learning_rate": 0.0007860759493670886, "loss": 0.6246, "step": 210980 }, { "epoch": 60.699079401611044, "grad_norm": 1.568476915359497, "learning_rate": 0.0007860184119677791, "loss": 0.5015, "step": 210990 }, { "epoch": 60.70195627157653, "grad_norm": 2.01782488822937, "learning_rate": 0.0007859608745684696, "loss": 0.5082, "step": 211000 }, { "epoch": 60.704833141542004, "grad_norm": 1.043480396270752, "learning_rate": 0.00078590333716916, "loss": 0.46, "step": 211010 }, { "epoch": 60.70771001150748, "grad_norm": 0.7185854315757751, "learning_rate": 0.0007858457997698504, "loss": 0.4419, "step": 211020 }, { "epoch": 60.710586881472956, "grad_norm": 1.7430822849273682, "learning_rate": 0.0007857882623705409, "loss": 0.6244, "step": 211030 }, { "epoch": 60.71346375143843, "grad_norm": 0.9141906499862671, "learning_rate": 0.0007857307249712314, "loss": 0.3974, "step": 211040 }, { "epoch": 60.716340621403916, "grad_norm": 1.0281379222869873, "learning_rate": 0.0007856731875719217, "loss": 0.5051, "step": 211050 }, { "epoch": 60.71921749136939, "grad_norm": 1.2368888854980469, "learning_rate": 0.0007856156501726122, "loss": 0.3815, "step": 211060 }, { "epoch": 60.72209436133487, "grad_norm": 1.3505007028579712, "learning_rate": 0.0007855581127733027, "loss": 0.4542, "step": 211070 }, { "epoch": 60.724971231300344, "grad_norm": 1.095145583152771, "learning_rate": 0.0007855005753739931, "loss": 0.5537, "step": 211080 }, { "epoch": 60.72784810126582, "grad_norm": 0.762945294380188, "learning_rate": 0.0007854430379746835, "loss": 0.4973, "step": 211090 }, { "epoch": 60.730724971231304, "grad_norm": 1.4086217880249023, "learning_rate": 0.0007853855005753741, "loss": 0.521, "step": 211100 }, { "epoch": 60.73360184119678, "grad_norm": 1.1753346920013428, "learning_rate": 0.0007853279631760645, "loss": 0.4817, "step": 211110 }, { "epoch": 60.736478711162256, "grad_norm": 1.0397429466247559, "learning_rate": 0.0007852704257767549, "loss": 0.5585, "step": 211120 }, { "epoch": 60.73935558112773, "grad_norm": 1.2841377258300781, "learning_rate": 0.0007852128883774453, "loss": 0.5208, "step": 211130 }, { "epoch": 60.74223245109321, "grad_norm": 1.5300942659378052, "learning_rate": 0.0007851553509781358, "loss": 0.546, "step": 211140 }, { "epoch": 60.745109321058685, "grad_norm": 1.3148365020751953, "learning_rate": 0.0007850978135788263, "loss": 0.4985, "step": 211150 }, { "epoch": 60.74798619102417, "grad_norm": 2.175633668899536, "learning_rate": 0.0007850402761795166, "loss": 0.5434, "step": 211160 }, { "epoch": 60.750863060989644, "grad_norm": 0.9338217973709106, "learning_rate": 0.0007849827387802072, "loss": 0.5014, "step": 211170 }, { "epoch": 60.75373993095512, "grad_norm": 1.7439805269241333, "learning_rate": 0.0007849252013808976, "loss": 0.511, "step": 211180 }, { "epoch": 60.7566168009206, "grad_norm": 2.3271279335021973, "learning_rate": 0.000784867663981588, "loss": 0.6722, "step": 211190 }, { "epoch": 60.75949367088607, "grad_norm": 1.0124125480651855, "learning_rate": 0.0007848101265822784, "loss": 0.4688, "step": 211200 }, { "epoch": 60.762370540851556, "grad_norm": 1.18279230594635, "learning_rate": 0.000784752589182969, "loss": 0.4753, "step": 211210 }, { "epoch": 60.76524741081703, "grad_norm": 0.8369835019111633, "learning_rate": 0.0007846950517836594, "loss": 0.4925, "step": 211220 }, { "epoch": 60.76812428078251, "grad_norm": 1.2043988704681396, "learning_rate": 0.0007846375143843498, "loss": 0.5167, "step": 211230 }, { "epoch": 60.771001150747985, "grad_norm": 0.9374145865440369, "learning_rate": 0.0007845799769850403, "loss": 0.4874, "step": 211240 }, { "epoch": 60.77387802071346, "grad_norm": 1.0737336874008179, "learning_rate": 0.0007845224395857307, "loss": 0.4722, "step": 211250 }, { "epoch": 60.776754890678944, "grad_norm": 0.8833744525909424, "learning_rate": 0.0007844649021864212, "loss": 0.4889, "step": 211260 }, { "epoch": 60.77963176064442, "grad_norm": 1.2912511825561523, "learning_rate": 0.0007844073647871116, "loss": 0.3972, "step": 211270 }, { "epoch": 60.7825086306099, "grad_norm": 2.503735065460205, "learning_rate": 0.0007843498273878021, "loss": 0.5324, "step": 211280 }, { "epoch": 60.78538550057537, "grad_norm": 0.9350903630256653, "learning_rate": 0.0007842922899884925, "loss": 0.5977, "step": 211290 }, { "epoch": 60.78826237054085, "grad_norm": 0.709047257900238, "learning_rate": 0.000784234752589183, "loss": 0.5129, "step": 211300 }, { "epoch": 60.79113924050633, "grad_norm": 1.4407246112823486, "learning_rate": 0.0007841772151898734, "loss": 0.5424, "step": 211310 }, { "epoch": 60.79401611047181, "grad_norm": 1.06103515625, "learning_rate": 0.0007841196777905639, "loss": 0.6251, "step": 211320 }, { "epoch": 60.796892980437285, "grad_norm": 0.725468635559082, "learning_rate": 0.0007840621403912544, "loss": 0.5878, "step": 211330 }, { "epoch": 60.79976985040276, "grad_norm": 1.6227673292160034, "learning_rate": 0.0007840046029919447, "loss": 0.4335, "step": 211340 }, { "epoch": 60.80264672036824, "grad_norm": 1.4243685007095337, "learning_rate": 0.0007839470655926352, "loss": 0.4658, "step": 211350 }, { "epoch": 60.80552359033371, "grad_norm": 1.0009174346923828, "learning_rate": 0.0007838895281933257, "loss": 0.4976, "step": 211360 }, { "epoch": 60.8084004602992, "grad_norm": 1.8653160333633423, "learning_rate": 0.0007838319907940161, "loss": 0.5461, "step": 211370 }, { "epoch": 60.81127733026467, "grad_norm": 1.0208120346069336, "learning_rate": 0.0007837744533947065, "loss": 0.3852, "step": 211380 }, { "epoch": 60.81415420023015, "grad_norm": 1.105679988861084, "learning_rate": 0.0007837169159953971, "loss": 0.4555, "step": 211390 }, { "epoch": 60.817031070195625, "grad_norm": 1.1296557188034058, "learning_rate": 0.0007836593785960874, "loss": 0.6232, "step": 211400 }, { "epoch": 60.8199079401611, "grad_norm": 1.5762989521026611, "learning_rate": 0.0007836018411967779, "loss": 0.5802, "step": 211410 }, { "epoch": 60.822784810126585, "grad_norm": 2.1909382343292236, "learning_rate": 0.0007835443037974684, "loss": 0.5736, "step": 211420 }, { "epoch": 60.82566168009206, "grad_norm": 0.7859272956848145, "learning_rate": 0.0007834867663981588, "loss": 0.4265, "step": 211430 }, { "epoch": 60.82853855005754, "grad_norm": 1.4007688760757446, "learning_rate": 0.0007834292289988493, "loss": 0.4778, "step": 211440 }, { "epoch": 60.83141542002301, "grad_norm": 1.690958023071289, "learning_rate": 0.0007833716915995397, "loss": 0.5214, "step": 211450 }, { "epoch": 60.83429228998849, "grad_norm": 1.128403663635254, "learning_rate": 0.0007833141542002301, "loss": 0.5158, "step": 211460 }, { "epoch": 60.83716915995397, "grad_norm": 1.172912836074829, "learning_rate": 0.0007832566168009206, "loss": 0.571, "step": 211470 }, { "epoch": 60.84004602991945, "grad_norm": 1.0561732053756714, "learning_rate": 0.0007831990794016111, "loss": 0.3876, "step": 211480 }, { "epoch": 60.842922899884925, "grad_norm": 1.7146955728530884, "learning_rate": 0.0007831415420023014, "loss": 0.4522, "step": 211490 }, { "epoch": 60.8457997698504, "grad_norm": 0.9352230429649353, "learning_rate": 0.000783084004602992, "loss": 0.4422, "step": 211500 }, { "epoch": 60.84867663981588, "grad_norm": 0.9706202149391174, "learning_rate": 0.0007830264672036825, "loss": 0.4895, "step": 211510 }, { "epoch": 60.85155350978136, "grad_norm": 2.020172119140625, "learning_rate": 0.0007829689298043728, "loss": 0.6017, "step": 211520 }, { "epoch": 60.85443037974684, "grad_norm": 1.8777259588241577, "learning_rate": 0.0007829113924050633, "loss": 0.4831, "step": 211530 }, { "epoch": 60.85730724971231, "grad_norm": 1.1431878805160522, "learning_rate": 0.0007828538550057538, "loss": 0.345, "step": 211540 }, { "epoch": 60.86018411967779, "grad_norm": 1.346639633178711, "learning_rate": 0.0007827963176064442, "loss": 0.5966, "step": 211550 }, { "epoch": 60.863060989643266, "grad_norm": 0.883135199546814, "learning_rate": 0.0007827387802071346, "loss": 0.6228, "step": 211560 }, { "epoch": 60.86593785960875, "grad_norm": 1.3409802913665771, "learning_rate": 0.0007826812428078252, "loss": 0.5598, "step": 211570 }, { "epoch": 60.868814729574225, "grad_norm": 0.6757358908653259, "learning_rate": 0.0007826237054085155, "loss": 0.4662, "step": 211580 }, { "epoch": 60.8716915995397, "grad_norm": 1.6983270645141602, "learning_rate": 0.000782566168009206, "loss": 0.4998, "step": 211590 }, { "epoch": 60.87456846950518, "grad_norm": 1.5010350942611694, "learning_rate": 0.0007825086306098965, "loss": 0.4645, "step": 211600 }, { "epoch": 60.877445339470654, "grad_norm": 1.1037639379501343, "learning_rate": 0.0007824510932105869, "loss": 0.3752, "step": 211610 }, { "epoch": 60.88032220943613, "grad_norm": 1.56764817237854, "learning_rate": 0.0007823935558112774, "loss": 0.4487, "step": 211620 }, { "epoch": 60.883199079401614, "grad_norm": 0.9411537647247314, "learning_rate": 0.0007823360184119678, "loss": 0.6108, "step": 211630 }, { "epoch": 60.88607594936709, "grad_norm": 1.0637227296829224, "learning_rate": 0.0007822784810126582, "loss": 0.4204, "step": 211640 }, { "epoch": 60.888952819332566, "grad_norm": 1.0847492218017578, "learning_rate": 0.0007822209436133487, "loss": 0.5304, "step": 211650 }, { "epoch": 60.89182968929804, "grad_norm": 2.0588743686676025, "learning_rate": 0.0007821634062140392, "loss": 0.5612, "step": 211660 }, { "epoch": 60.89470655926352, "grad_norm": 2.049314260482788, "learning_rate": 0.0007821058688147295, "loss": 0.5292, "step": 211670 }, { "epoch": 60.897583429229, "grad_norm": 0.8691837787628174, "learning_rate": 0.0007820483314154201, "loss": 0.3626, "step": 211680 }, { "epoch": 60.90046029919448, "grad_norm": 2.4224209785461426, "learning_rate": 0.0007819907940161106, "loss": 0.5728, "step": 211690 }, { "epoch": 60.903337169159954, "grad_norm": 1.086337685585022, "learning_rate": 0.0007819332566168009, "loss": 0.3671, "step": 211700 }, { "epoch": 60.90621403912543, "grad_norm": 2.08785343170166, "learning_rate": 0.0007818757192174914, "loss": 0.5066, "step": 211710 }, { "epoch": 60.90909090909091, "grad_norm": 0.9377140998840332, "learning_rate": 0.0007818181818181819, "loss": 0.5712, "step": 211720 }, { "epoch": 60.91196777905639, "grad_norm": 1.8112818002700806, "learning_rate": 0.0007817606444188723, "loss": 0.5724, "step": 211730 }, { "epoch": 60.914844649021866, "grad_norm": 1.5407551527023315, "learning_rate": 0.0007817031070195627, "loss": 0.5018, "step": 211740 }, { "epoch": 60.91772151898734, "grad_norm": 0.8125807642936707, "learning_rate": 0.0007816455696202532, "loss": 0.5149, "step": 211750 }, { "epoch": 60.92059838895282, "grad_norm": 1.2408218383789062, "learning_rate": 0.0007815880322209436, "loss": 0.4334, "step": 211760 }, { "epoch": 60.923475258918295, "grad_norm": 1.3290144205093384, "learning_rate": 0.0007815304948216341, "loss": 0.3716, "step": 211770 }, { "epoch": 60.92635212888378, "grad_norm": 0.6499240398406982, "learning_rate": 0.0007814729574223245, "loss": 0.4335, "step": 211780 }, { "epoch": 60.929228998849254, "grad_norm": 1.5532113313674927, "learning_rate": 0.000781415420023015, "loss": 0.5232, "step": 211790 }, { "epoch": 60.93210586881473, "grad_norm": 1.1819229125976562, "learning_rate": 0.0007813578826237055, "loss": 0.4607, "step": 211800 }, { "epoch": 60.93498273878021, "grad_norm": 0.978100061416626, "learning_rate": 0.0007813003452243959, "loss": 0.4547, "step": 211810 }, { "epoch": 60.93785960874568, "grad_norm": 1.8044214248657227, "learning_rate": 0.0007812428078250863, "loss": 0.4288, "step": 211820 }, { "epoch": 60.94073647871116, "grad_norm": 1.1102101802825928, "learning_rate": 0.0007811852704257768, "loss": 0.4542, "step": 211830 }, { "epoch": 60.94361334867664, "grad_norm": 0.9595940113067627, "learning_rate": 0.0007811277330264673, "loss": 0.4944, "step": 211840 }, { "epoch": 60.94649021864212, "grad_norm": 0.9897448420524597, "learning_rate": 0.0007810701956271576, "loss": 0.4606, "step": 211850 }, { "epoch": 60.949367088607595, "grad_norm": 1.145796775817871, "learning_rate": 0.0007810126582278482, "loss": 0.4883, "step": 211860 }, { "epoch": 60.95224395857307, "grad_norm": 1.8529977798461914, "learning_rate": 0.0007809551208285386, "loss": 0.4412, "step": 211870 }, { "epoch": 60.95512082853855, "grad_norm": 2.7248024940490723, "learning_rate": 0.000780897583429229, "loss": 0.5114, "step": 211880 }, { "epoch": 60.95799769850403, "grad_norm": 1.120321273803711, "learning_rate": 0.0007808400460299194, "loss": 0.5531, "step": 211890 }, { "epoch": 60.96087456846951, "grad_norm": 1.5551862716674805, "learning_rate": 0.00078078250863061, "loss": 0.5773, "step": 211900 }, { "epoch": 60.96375143843498, "grad_norm": 1.1886383295059204, "learning_rate": 0.0007807249712313004, "loss": 0.44, "step": 211910 }, { "epoch": 60.96662830840046, "grad_norm": 2.0384416580200195, "learning_rate": 0.0007806674338319908, "loss": 0.4987, "step": 211920 }, { "epoch": 60.969505178365935, "grad_norm": 1.3941689729690552, "learning_rate": 0.0007806098964326812, "loss": 0.5757, "step": 211930 }, { "epoch": 60.97238204833142, "grad_norm": 2.112926721572876, "learning_rate": 0.0007805523590333717, "loss": 0.5195, "step": 211940 }, { "epoch": 60.975258918296895, "grad_norm": 1.2960755825042725, "learning_rate": 0.0007804948216340622, "loss": 0.5397, "step": 211950 }, { "epoch": 60.97813578826237, "grad_norm": 0.8841187953948975, "learning_rate": 0.0007804372842347525, "loss": 0.5661, "step": 211960 }, { "epoch": 60.98101265822785, "grad_norm": 1.3126980066299438, "learning_rate": 0.0007803797468354431, "loss": 0.6266, "step": 211970 }, { "epoch": 60.98388952819332, "grad_norm": 0.8884032964706421, "learning_rate": 0.0007803222094361335, "loss": 0.4957, "step": 211980 }, { "epoch": 60.98676639815881, "grad_norm": 1.2091785669326782, "learning_rate": 0.0007802646720368239, "loss": 0.5522, "step": 211990 }, { "epoch": 60.98964326812428, "grad_norm": 0.8991204500198364, "learning_rate": 0.0007802071346375143, "loss": 0.4166, "step": 212000 }, { "epoch": 60.99252013808976, "grad_norm": 1.0734515190124512, "learning_rate": 0.0007801495972382049, "loss": 0.4296, "step": 212010 }, { "epoch": 60.995397008055235, "grad_norm": 1.8319873809814453, "learning_rate": 0.0007800920598388953, "loss": 0.6688, "step": 212020 }, { "epoch": 60.99827387802071, "grad_norm": 1.1522119045257568, "learning_rate": 0.0007800345224395857, "loss": 0.4908, "step": 212030 }, { "epoch": 61.00115074798619, "grad_norm": 1.4768459796905518, "learning_rate": 0.0007799769850402762, "loss": 0.4207, "step": 212040 }, { "epoch": 61.00402761795167, "grad_norm": 1.2450083494186401, "learning_rate": 0.0007799194476409666, "loss": 0.4189, "step": 212050 }, { "epoch": 61.00690448791715, "grad_norm": 1.1059026718139648, "learning_rate": 0.0007798619102416571, "loss": 0.3745, "step": 212060 }, { "epoch": 61.00978135788262, "grad_norm": 1.8421720266342163, "learning_rate": 0.0007798043728423475, "loss": 0.3924, "step": 212070 }, { "epoch": 61.0126582278481, "grad_norm": 0.856816291809082, "learning_rate": 0.000779746835443038, "loss": 0.4549, "step": 212080 }, { "epoch": 61.015535097813576, "grad_norm": 2.043889045715332, "learning_rate": 0.0007796892980437284, "loss": 0.4539, "step": 212090 }, { "epoch": 61.01841196777906, "grad_norm": 1.0549100637435913, "learning_rate": 0.0007796317606444189, "loss": 0.4092, "step": 212100 }, { "epoch": 61.021288837744535, "grad_norm": 1.3037272691726685, "learning_rate": 0.0007795742232451092, "loss": 0.5013, "step": 212110 }, { "epoch": 61.02416570771001, "grad_norm": 1.0441924333572388, "learning_rate": 0.0007795166858457998, "loss": 0.4221, "step": 212120 }, { "epoch": 61.02704257767549, "grad_norm": 1.008947491645813, "learning_rate": 0.0007794591484464903, "loss": 0.3294, "step": 212130 }, { "epoch": 61.029919447640964, "grad_norm": 0.6281323432922363, "learning_rate": 0.0007794016110471806, "loss": 0.3616, "step": 212140 }, { "epoch": 61.03279631760645, "grad_norm": 1.4953669309616089, "learning_rate": 0.0007793440736478712, "loss": 0.4516, "step": 212150 }, { "epoch": 61.03567318757192, "grad_norm": 1.2999563217163086, "learning_rate": 0.0007792865362485616, "loss": 0.5005, "step": 212160 }, { "epoch": 61.0385500575374, "grad_norm": 1.2499363422393799, "learning_rate": 0.000779228998849252, "loss": 0.3993, "step": 212170 }, { "epoch": 61.041426927502876, "grad_norm": 0.975407600402832, "learning_rate": 0.0007791714614499424, "loss": 0.3795, "step": 212180 }, { "epoch": 61.04430379746835, "grad_norm": 2.170480966567993, "learning_rate": 0.000779113924050633, "loss": 0.3586, "step": 212190 }, { "epoch": 61.047180667433835, "grad_norm": 1.5693447589874268, "learning_rate": 0.0007790563866513233, "loss": 0.4801, "step": 212200 }, { "epoch": 61.05005753739931, "grad_norm": 1.7838633060455322, "learning_rate": 0.0007789988492520138, "loss": 0.5171, "step": 212210 }, { "epoch": 61.05293440736479, "grad_norm": 1.017052412033081, "learning_rate": 0.0007789413118527043, "loss": 0.3815, "step": 212220 }, { "epoch": 61.055811277330264, "grad_norm": 1.0480393171310425, "learning_rate": 0.0007788837744533947, "loss": 0.3392, "step": 212230 }, { "epoch": 61.05868814729574, "grad_norm": 0.8650832772254944, "learning_rate": 0.0007788262370540852, "loss": 0.3703, "step": 212240 }, { "epoch": 61.061565017261216, "grad_norm": 1.190184473991394, "learning_rate": 0.0007787686996547756, "loss": 0.4122, "step": 212250 }, { "epoch": 61.0644418872267, "grad_norm": 1.6094348430633545, "learning_rate": 0.0007787111622554661, "loss": 0.3488, "step": 212260 }, { "epoch": 61.067318757192176, "grad_norm": 0.7838519215583801, "learning_rate": 0.0007786536248561565, "loss": 0.3922, "step": 212270 }, { "epoch": 61.07019562715765, "grad_norm": 1.1653672456741333, "learning_rate": 0.000778596087456847, "loss": 0.5875, "step": 212280 }, { "epoch": 61.07307249712313, "grad_norm": 1.456946849822998, "learning_rate": 0.0007785385500575373, "loss": 0.4472, "step": 212290 }, { "epoch": 61.075949367088604, "grad_norm": 1.3519599437713623, "learning_rate": 0.0007784810126582279, "loss": 0.4391, "step": 212300 }, { "epoch": 61.07882623705409, "grad_norm": 0.8206512928009033, "learning_rate": 0.0007784234752589184, "loss": 0.3461, "step": 212310 }, { "epoch": 61.081703107019564, "grad_norm": 1.076183795928955, "learning_rate": 0.0007783659378596087, "loss": 0.4403, "step": 212320 }, { "epoch": 61.08457997698504, "grad_norm": 1.5136330127716064, "learning_rate": 0.0007783084004602992, "loss": 0.4434, "step": 212330 }, { "epoch": 61.087456846950516, "grad_norm": 1.1292818784713745, "learning_rate": 0.0007782508630609897, "loss": 0.5353, "step": 212340 }, { "epoch": 61.09033371691599, "grad_norm": 1.0767533779144287, "learning_rate": 0.0007781933256616801, "loss": 0.4063, "step": 212350 }, { "epoch": 61.093210586881476, "grad_norm": 2.397815227508545, "learning_rate": 0.0007781357882623705, "loss": 0.4412, "step": 212360 }, { "epoch": 61.09608745684695, "grad_norm": 0.6473254561424255, "learning_rate": 0.0007780782508630611, "loss": 0.4529, "step": 212370 }, { "epoch": 61.09896432681243, "grad_norm": 1.2576245069503784, "learning_rate": 0.0007780207134637514, "loss": 0.4567, "step": 212380 }, { "epoch": 61.101841196777904, "grad_norm": 1.005698800086975, "learning_rate": 0.0007779631760644419, "loss": 0.5058, "step": 212390 }, { "epoch": 61.10471806674338, "grad_norm": 1.4868857860565186, "learning_rate": 0.0007779056386651323, "loss": 0.3901, "step": 212400 }, { "epoch": 61.107594936708864, "grad_norm": 1.0132683515548706, "learning_rate": 0.0007778481012658228, "loss": 0.5974, "step": 212410 }, { "epoch": 61.11047180667434, "grad_norm": 0.8017581701278687, "learning_rate": 0.0007777905638665133, "loss": 0.4444, "step": 212420 }, { "epoch": 61.113348676639816, "grad_norm": 1.1006397008895874, "learning_rate": 0.0007777330264672037, "loss": 0.3385, "step": 212430 }, { "epoch": 61.11622554660529, "grad_norm": 1.3262473344802856, "learning_rate": 0.0007776754890678941, "loss": 0.4873, "step": 212440 }, { "epoch": 61.11910241657077, "grad_norm": 1.4012067317962646, "learning_rate": 0.0007776179516685846, "loss": 0.4355, "step": 212450 }, { "epoch": 61.121979286536245, "grad_norm": 0.9365929961204529, "learning_rate": 0.0007775604142692751, "loss": 0.4304, "step": 212460 }, { "epoch": 61.12485615650173, "grad_norm": 1.3712339401245117, "learning_rate": 0.0007775028768699654, "loss": 0.4175, "step": 212470 }, { "epoch": 61.127733026467205, "grad_norm": 0.988868772983551, "learning_rate": 0.000777445339470656, "loss": 0.3769, "step": 212480 }, { "epoch": 61.13060989643268, "grad_norm": 1.5356684923171997, "learning_rate": 0.0007773878020713464, "loss": 0.478, "step": 212490 }, { "epoch": 61.13348676639816, "grad_norm": 3.5454092025756836, "learning_rate": 0.0007773302646720368, "loss": 0.4053, "step": 212500 }, { "epoch": 61.13636363636363, "grad_norm": 1.4820642471313477, "learning_rate": 0.0007772727272727273, "loss": 0.4096, "step": 212510 }, { "epoch": 61.139240506329116, "grad_norm": 1.0659151077270508, "learning_rate": 0.0007772151898734178, "loss": 0.5948, "step": 212520 }, { "epoch": 61.14211737629459, "grad_norm": 0.7246063947677612, "learning_rate": 0.0007771576524741082, "loss": 0.4532, "step": 212530 }, { "epoch": 61.14499424626007, "grad_norm": 1.4461854696273804, "learning_rate": 0.0007771001150747986, "loss": 0.4693, "step": 212540 }, { "epoch": 61.147871116225545, "grad_norm": 0.6822283267974854, "learning_rate": 0.0007770425776754892, "loss": 0.4573, "step": 212550 }, { "epoch": 61.15074798619102, "grad_norm": 1.523605465888977, "learning_rate": 0.0007769850402761795, "loss": 0.4945, "step": 212560 }, { "epoch": 61.153624856156505, "grad_norm": 0.6389397382736206, "learning_rate": 0.00077692750287687, "loss": 0.4318, "step": 212570 }, { "epoch": 61.15650172612198, "grad_norm": 2.0090014934539795, "learning_rate": 0.0007768699654775604, "loss": 0.4141, "step": 212580 }, { "epoch": 61.15937859608746, "grad_norm": 2.360672950744629, "learning_rate": 0.0007768124280782509, "loss": 0.5608, "step": 212590 }, { "epoch": 61.16225546605293, "grad_norm": 2.264202833175659, "learning_rate": 0.0007767548906789414, "loss": 0.431, "step": 212600 }, { "epoch": 61.16513233601841, "grad_norm": 0.9781167507171631, "learning_rate": 0.0007766973532796318, "loss": 0.4768, "step": 212610 }, { "epoch": 61.16800920598389, "grad_norm": 2.1071009635925293, "learning_rate": 0.0007766398158803222, "loss": 0.4386, "step": 212620 }, { "epoch": 61.17088607594937, "grad_norm": 0.7401096224784851, "learning_rate": 0.0007765822784810127, "loss": 0.4798, "step": 212630 }, { "epoch": 61.173762945914845, "grad_norm": 1.5804113149642944, "learning_rate": 0.0007765247410817032, "loss": 0.4066, "step": 212640 }, { "epoch": 61.17663981588032, "grad_norm": 0.9394485354423523, "learning_rate": 0.0007764672036823935, "loss": 0.3745, "step": 212650 }, { "epoch": 61.1795166858458, "grad_norm": 0.7454380989074707, "learning_rate": 0.0007764096662830841, "loss": 0.5116, "step": 212660 }, { "epoch": 61.18239355581128, "grad_norm": 1.5517323017120361, "learning_rate": 0.0007763521288837745, "loss": 0.4824, "step": 212670 }, { "epoch": 61.18527042577676, "grad_norm": 1.1524755954742432, "learning_rate": 0.0007762945914844649, "loss": 0.4065, "step": 212680 }, { "epoch": 61.18814729574223, "grad_norm": 1.268924593925476, "learning_rate": 0.0007762370540851553, "loss": 0.482, "step": 212690 }, { "epoch": 61.19102416570771, "grad_norm": 0.9112066030502319, "learning_rate": 0.0007761795166858459, "loss": 0.4613, "step": 212700 }, { "epoch": 61.193901035673186, "grad_norm": 2.083068609237671, "learning_rate": 0.0007761219792865363, "loss": 0.4625, "step": 212710 }, { "epoch": 61.19677790563866, "grad_norm": 1.12126886844635, "learning_rate": 0.0007760644418872267, "loss": 0.4745, "step": 212720 }, { "epoch": 61.199654775604145, "grad_norm": 3.854534149169922, "learning_rate": 0.0007760069044879171, "loss": 0.416, "step": 212730 }, { "epoch": 61.20253164556962, "grad_norm": 0.8488188982009888, "learning_rate": 0.0007759493670886076, "loss": 0.4552, "step": 212740 }, { "epoch": 61.2054085155351, "grad_norm": 1.4013195037841797, "learning_rate": 0.0007758918296892981, "loss": 0.393, "step": 212750 }, { "epoch": 61.208285385500574, "grad_norm": 1.0214821100234985, "learning_rate": 0.0007758342922899884, "loss": 0.3984, "step": 212760 }, { "epoch": 61.21116225546605, "grad_norm": 1.695669174194336, "learning_rate": 0.000775776754890679, "loss": 0.5213, "step": 212770 }, { "epoch": 61.21403912543153, "grad_norm": 1.6797854900360107, "learning_rate": 0.0007757192174913694, "loss": 0.4696, "step": 212780 }, { "epoch": 61.21691599539701, "grad_norm": 1.1352663040161133, "learning_rate": 0.0007756616800920598, "loss": 0.3887, "step": 212790 }, { "epoch": 61.219792865362486, "grad_norm": 1.962072491645813, "learning_rate": 0.0007756041426927502, "loss": 0.4557, "step": 212800 }, { "epoch": 61.22266973532796, "grad_norm": 0.9669568538665771, "learning_rate": 0.0007755466052934408, "loss": 0.3955, "step": 212810 }, { "epoch": 61.22554660529344, "grad_norm": 1.3880677223205566, "learning_rate": 0.0007754890678941312, "loss": 0.637, "step": 212820 }, { "epoch": 61.22842347525892, "grad_norm": 1.1351298093795776, "learning_rate": 0.0007754315304948216, "loss": 0.5044, "step": 212830 }, { "epoch": 61.2313003452244, "grad_norm": 2.4139420986175537, "learning_rate": 0.0007753739930955122, "loss": 0.5336, "step": 212840 }, { "epoch": 61.234177215189874, "grad_norm": 1.4073119163513184, "learning_rate": 0.0007753164556962025, "loss": 0.3973, "step": 212850 }, { "epoch": 61.23705408515535, "grad_norm": 1.0083303451538086, "learning_rate": 0.000775258918296893, "loss": 0.418, "step": 212860 }, { "epoch": 61.239930955120826, "grad_norm": 1.4079653024673462, "learning_rate": 0.0007752013808975834, "loss": 0.4644, "step": 212870 }, { "epoch": 61.24280782508631, "grad_norm": 0.8122951984405518, "learning_rate": 0.0007751438434982739, "loss": 0.5676, "step": 212880 }, { "epoch": 61.245684695051786, "grad_norm": 1.042840600013733, "learning_rate": 0.0007750863060989643, "loss": 0.4016, "step": 212890 }, { "epoch": 61.24856156501726, "grad_norm": 1.0949169397354126, "learning_rate": 0.0007750287686996548, "loss": 0.4967, "step": 212900 }, { "epoch": 61.25143843498274, "grad_norm": 1.5964709520339966, "learning_rate": 0.0007749712313003451, "loss": 0.4271, "step": 212910 }, { "epoch": 61.254315304948214, "grad_norm": 0.7523447871208191, "learning_rate": 0.0007749136939010357, "loss": 0.3632, "step": 212920 }, { "epoch": 61.25719217491369, "grad_norm": 1.1357544660568237, "learning_rate": 0.0007748561565017262, "loss": 0.5327, "step": 212930 }, { "epoch": 61.260069044879174, "grad_norm": 1.0427533388137817, "learning_rate": 0.0007747986191024165, "loss": 0.494, "step": 212940 }, { "epoch": 61.26294591484465, "grad_norm": 1.5593079328536987, "learning_rate": 0.0007747410817031071, "loss": 0.5745, "step": 212950 }, { "epoch": 61.265822784810126, "grad_norm": 0.5772055387496948, "learning_rate": 0.0007746835443037975, "loss": 0.4614, "step": 212960 }, { "epoch": 61.2686996547756, "grad_norm": 1.2926558256149292, "learning_rate": 0.0007746260069044879, "loss": 0.5059, "step": 212970 }, { "epoch": 61.27157652474108, "grad_norm": 1.4066699743270874, "learning_rate": 0.0007745684695051783, "loss": 0.4762, "step": 212980 }, { "epoch": 61.27445339470656, "grad_norm": 1.0243103504180908, "learning_rate": 0.0007745109321058689, "loss": 0.4849, "step": 212990 }, { "epoch": 61.27733026467204, "grad_norm": 0.8214989900588989, "learning_rate": 0.0007744533947065592, "loss": 0.5883, "step": 213000 }, { "epoch": 61.280207134637514, "grad_norm": 1.2775102853775024, "learning_rate": 0.0007743958573072497, "loss": 0.47, "step": 213010 }, { "epoch": 61.28308400460299, "grad_norm": 1.1084599494934082, "learning_rate": 0.0007743383199079402, "loss": 0.4245, "step": 213020 }, { "epoch": 61.28596087456847, "grad_norm": 0.8721960783004761, "learning_rate": 0.0007742807825086306, "loss": 0.5226, "step": 213030 }, { "epoch": 61.28883774453395, "grad_norm": 1.2042394876480103, "learning_rate": 0.0007742232451093211, "loss": 0.3626, "step": 213040 }, { "epoch": 61.291714614499426, "grad_norm": 0.8231556415557861, "learning_rate": 0.0007741657077100115, "loss": 0.4266, "step": 213050 }, { "epoch": 61.2945914844649, "grad_norm": 0.9055149555206299, "learning_rate": 0.000774108170310702, "loss": 0.4573, "step": 213060 }, { "epoch": 61.29746835443038, "grad_norm": 1.0918810367584229, "learning_rate": 0.0007740506329113924, "loss": 0.4935, "step": 213070 }, { "epoch": 61.300345224395855, "grad_norm": 0.7786263227462769, "learning_rate": 0.0007739930955120829, "loss": 0.4761, "step": 213080 }, { "epoch": 61.30322209436134, "grad_norm": 1.5342493057250977, "learning_rate": 0.0007739355581127732, "loss": 0.5753, "step": 213090 }, { "epoch": 61.306098964326814, "grad_norm": 0.8992907404899597, "learning_rate": 0.0007738780207134638, "loss": 0.4386, "step": 213100 }, { "epoch": 61.30897583429229, "grad_norm": 1.9493545293807983, "learning_rate": 0.0007738204833141543, "loss": 0.5394, "step": 213110 }, { "epoch": 61.31185270425777, "grad_norm": 1.087266445159912, "learning_rate": 0.0007737629459148446, "loss": 0.5406, "step": 213120 }, { "epoch": 61.31472957422324, "grad_norm": 1.3075848817825317, "learning_rate": 0.0007737054085155351, "loss": 0.4715, "step": 213130 }, { "epoch": 61.31760644418872, "grad_norm": 0.8833962678909302, "learning_rate": 0.0007736478711162256, "loss": 0.5389, "step": 213140 }, { "epoch": 61.3204833141542, "grad_norm": 1.74419105052948, "learning_rate": 0.000773590333716916, "loss": 0.5504, "step": 213150 }, { "epoch": 61.32336018411968, "grad_norm": 0.6780538558959961, "learning_rate": 0.0007735327963176064, "loss": 0.5507, "step": 213160 }, { "epoch": 61.326237054085155, "grad_norm": 1.1538015604019165, "learning_rate": 0.000773475258918297, "loss": 0.3807, "step": 213170 }, { "epoch": 61.32911392405063, "grad_norm": 0.7767968773841858, "learning_rate": 0.0007734177215189873, "loss": 0.4653, "step": 213180 }, { "epoch": 61.33199079401611, "grad_norm": 0.5812421441078186, "learning_rate": 0.0007733601841196778, "loss": 0.3526, "step": 213190 }, { "epoch": 61.33486766398159, "grad_norm": 1.4419790506362915, "learning_rate": 0.0007733026467203682, "loss": 0.4104, "step": 213200 }, { "epoch": 61.33774453394707, "grad_norm": 1.723374366760254, "learning_rate": 0.0007732451093210587, "loss": 0.4274, "step": 213210 }, { "epoch": 61.34062140391254, "grad_norm": 4.023796558380127, "learning_rate": 0.0007731875719217492, "loss": 0.4824, "step": 213220 }, { "epoch": 61.34349827387802, "grad_norm": 0.6884216070175171, "learning_rate": 0.0007731300345224396, "loss": 0.4268, "step": 213230 }, { "epoch": 61.346375143843495, "grad_norm": 2.1030170917510986, "learning_rate": 0.0007730724971231301, "loss": 0.4197, "step": 213240 }, { "epoch": 61.34925201380898, "grad_norm": 1.1213254928588867, "learning_rate": 0.0007730149597238205, "loss": 0.4674, "step": 213250 }, { "epoch": 61.352128883774455, "grad_norm": 1.7377190589904785, "learning_rate": 0.000772957422324511, "loss": 0.4794, "step": 213260 }, { "epoch": 61.35500575373993, "grad_norm": 1.3154237270355225, "learning_rate": 0.0007728998849252013, "loss": 0.4379, "step": 213270 }, { "epoch": 61.35788262370541, "grad_norm": 0.8167446255683899, "learning_rate": 0.0007728423475258919, "loss": 0.4083, "step": 213280 }, { "epoch": 61.360759493670884, "grad_norm": 1.3314908742904663, "learning_rate": 0.0007727848101265823, "loss": 0.4997, "step": 213290 }, { "epoch": 61.36363636363637, "grad_norm": 0.9367501139640808, "learning_rate": 0.0007727272727272727, "loss": 0.4273, "step": 213300 }, { "epoch": 61.36651323360184, "grad_norm": 1.4379016160964966, "learning_rate": 0.0007726697353279631, "loss": 0.4087, "step": 213310 }, { "epoch": 61.36939010356732, "grad_norm": 1.9415364265441895, "learning_rate": 0.0007726121979286537, "loss": 0.4888, "step": 213320 }, { "epoch": 61.372266973532795, "grad_norm": 1.7388044595718384, "learning_rate": 0.0007725546605293441, "loss": 0.4714, "step": 213330 }, { "epoch": 61.37514384349827, "grad_norm": 1.248289704322815, "learning_rate": 0.0007724971231300345, "loss": 0.4004, "step": 213340 }, { "epoch": 61.378020713463755, "grad_norm": 0.9143892526626587, "learning_rate": 0.0007724395857307251, "loss": 0.4429, "step": 213350 }, { "epoch": 61.38089758342923, "grad_norm": 1.6046327352523804, "learning_rate": 0.0007723820483314154, "loss": 0.3717, "step": 213360 }, { "epoch": 61.38377445339471, "grad_norm": 1.9126560688018799, "learning_rate": 0.0007723245109321059, "loss": 0.4033, "step": 213370 }, { "epoch": 61.386651323360184, "grad_norm": 0.795249879360199, "learning_rate": 0.0007722669735327963, "loss": 0.437, "step": 213380 }, { "epoch": 61.38952819332566, "grad_norm": 1.9536020755767822, "learning_rate": 0.0007722094361334868, "loss": 0.5089, "step": 213390 }, { "epoch": 61.392405063291136, "grad_norm": 1.0083684921264648, "learning_rate": 0.0007721518987341772, "loss": 0.597, "step": 213400 }, { "epoch": 61.39528193325662, "grad_norm": 1.8812494277954102, "learning_rate": 0.0007720943613348677, "loss": 0.4326, "step": 213410 }, { "epoch": 61.398158803222096, "grad_norm": 1.5010231733322144, "learning_rate": 0.000772036823935558, "loss": 0.4711, "step": 213420 }, { "epoch": 61.40103567318757, "grad_norm": 1.5507394075393677, "learning_rate": 0.0007719792865362486, "loss": 0.4624, "step": 213430 }, { "epoch": 61.40391254315305, "grad_norm": 1.0645323991775513, "learning_rate": 0.0007719217491369391, "loss": 0.4721, "step": 213440 }, { "epoch": 61.406789413118524, "grad_norm": 0.8487796783447266, "learning_rate": 0.0007718642117376294, "loss": 0.4435, "step": 213450 }, { "epoch": 61.40966628308401, "grad_norm": 0.6151041388511658, "learning_rate": 0.00077180667433832, "loss": 0.4465, "step": 213460 }, { "epoch": 61.412543153049484, "grad_norm": 2.032169818878174, "learning_rate": 0.0007717491369390104, "loss": 0.4473, "step": 213470 }, { "epoch": 61.41542002301496, "grad_norm": 1.0755187273025513, "learning_rate": 0.0007716915995397008, "loss": 0.6521, "step": 213480 }, { "epoch": 61.418296892980436, "grad_norm": 1.4460387229919434, "learning_rate": 0.0007716340621403912, "loss": 0.5851, "step": 213490 }, { "epoch": 61.42117376294591, "grad_norm": 0.9480328559875488, "learning_rate": 0.0007715765247410818, "loss": 0.4921, "step": 213500 }, { "epoch": 61.424050632911396, "grad_norm": 3.295616388320923, "learning_rate": 0.0007715189873417722, "loss": 0.5218, "step": 213510 }, { "epoch": 61.42692750287687, "grad_norm": 1.6675240993499756, "learning_rate": 0.0007714614499424626, "loss": 0.4767, "step": 213520 }, { "epoch": 61.42980437284235, "grad_norm": 1.158483862876892, "learning_rate": 0.0007714039125431532, "loss": 0.4874, "step": 213530 }, { "epoch": 61.432681242807824, "grad_norm": 1.2801603078842163, "learning_rate": 0.0007713463751438435, "loss": 0.5769, "step": 213540 }, { "epoch": 61.4355581127733, "grad_norm": 0.9114943146705627, "learning_rate": 0.000771288837744534, "loss": 0.5557, "step": 213550 }, { "epoch": 61.438434982738784, "grad_norm": 0.9319927096366882, "learning_rate": 0.0007712313003452243, "loss": 0.4183, "step": 213560 }, { "epoch": 61.44131185270426, "grad_norm": 1.0729399919509888, "learning_rate": 0.0007711737629459149, "loss": 0.4094, "step": 213570 }, { "epoch": 61.444188722669736, "grad_norm": 1.308800220489502, "learning_rate": 0.0007711162255466053, "loss": 0.3832, "step": 213580 }, { "epoch": 61.44706559263521, "grad_norm": 1.1702042818069458, "learning_rate": 0.0007710586881472957, "loss": 0.4303, "step": 213590 }, { "epoch": 61.44994246260069, "grad_norm": 1.3664424419403076, "learning_rate": 0.0007710011507479861, "loss": 0.3913, "step": 213600 }, { "epoch": 61.452819332566165, "grad_norm": 1.5774110555648804, "learning_rate": 0.0007709436133486767, "loss": 0.4915, "step": 213610 }, { "epoch": 61.45569620253165, "grad_norm": 1.4247956275939941, "learning_rate": 0.000770886075949367, "loss": 0.3958, "step": 213620 }, { "epoch": 61.458573072497124, "grad_norm": 1.4149644374847412, "learning_rate": 0.0007708285385500575, "loss": 0.5896, "step": 213630 }, { "epoch": 61.4614499424626, "grad_norm": 1.029368281364441, "learning_rate": 0.0007707710011507481, "loss": 0.3785, "step": 213640 }, { "epoch": 61.46432681242808, "grad_norm": 1.0875831842422485, "learning_rate": 0.0007707134637514384, "loss": 0.5164, "step": 213650 }, { "epoch": 61.46720368239355, "grad_norm": 1.1054309606552124, "learning_rate": 0.0007706559263521289, "loss": 0.438, "step": 213660 }, { "epoch": 61.470080552359036, "grad_norm": 1.1649298667907715, "learning_rate": 0.0007705983889528193, "loss": 0.4096, "step": 213670 }, { "epoch": 61.47295742232451, "grad_norm": 1.0989608764648438, "learning_rate": 0.0007705408515535098, "loss": 0.4563, "step": 213680 }, { "epoch": 61.47583429228999, "grad_norm": 1.7441778182983398, "learning_rate": 0.0007704833141542002, "loss": 0.5427, "step": 213690 }, { "epoch": 61.478711162255465, "grad_norm": 1.248929500579834, "learning_rate": 0.0007704257767548907, "loss": 0.4624, "step": 213700 }, { "epoch": 61.48158803222094, "grad_norm": 1.205021619796753, "learning_rate": 0.000770368239355581, "loss": 0.4596, "step": 213710 }, { "epoch": 61.484464902186424, "grad_norm": 0.8557747602462769, "learning_rate": 0.0007703107019562716, "loss": 0.4025, "step": 213720 }, { "epoch": 61.4873417721519, "grad_norm": 1.600341796875, "learning_rate": 0.0007702531645569621, "loss": 0.454, "step": 213730 }, { "epoch": 61.49021864211738, "grad_norm": 1.2515548467636108, "learning_rate": 0.0007701956271576524, "loss": 0.494, "step": 213740 }, { "epoch": 61.49309551208285, "grad_norm": 1.2553420066833496, "learning_rate": 0.000770138089758343, "loss": 0.5438, "step": 213750 }, { "epoch": 61.49597238204833, "grad_norm": 3.009828567504883, "learning_rate": 0.0007700805523590334, "loss": 0.4454, "step": 213760 }, { "epoch": 61.49884925201381, "grad_norm": 1.4438328742980957, "learning_rate": 0.0007700230149597238, "loss": 0.4827, "step": 213770 }, { "epoch": 61.50172612197929, "grad_norm": 1.280903697013855, "learning_rate": 0.0007699654775604142, "loss": 0.4778, "step": 213780 }, { "epoch": 61.504602991944765, "grad_norm": 0.8453162908554077, "learning_rate": 0.0007699079401611048, "loss": 0.4511, "step": 213790 }, { "epoch": 61.50747986191024, "grad_norm": 1.6335201263427734, "learning_rate": 0.0007698504027617951, "loss": 0.5112, "step": 213800 }, { "epoch": 61.51035673187572, "grad_norm": 0.8507388234138489, "learning_rate": 0.0007697928653624856, "loss": 0.4477, "step": 213810 }, { "epoch": 61.51323360184119, "grad_norm": 1.3963226079940796, "learning_rate": 0.0007697353279631762, "loss": 0.4092, "step": 213820 }, { "epoch": 61.51611047180668, "grad_norm": 0.6838570833206177, "learning_rate": 0.0007696777905638665, "loss": 0.3742, "step": 213830 }, { "epoch": 61.51898734177215, "grad_norm": 1.5817654132843018, "learning_rate": 0.000769620253164557, "loss": 0.5, "step": 213840 }, { "epoch": 61.52186421173763, "grad_norm": 1.330871343612671, "learning_rate": 0.0007695627157652474, "loss": 0.3916, "step": 213850 }, { "epoch": 61.524741081703105, "grad_norm": 1.7002822160720825, "learning_rate": 0.0007695051783659379, "loss": 0.4627, "step": 213860 }, { "epoch": 61.52761795166858, "grad_norm": 0.9110557436943054, "learning_rate": 0.0007694476409666283, "loss": 0.4206, "step": 213870 }, { "epoch": 61.530494821634065, "grad_norm": 1.3175855875015259, "learning_rate": 0.0007693901035673188, "loss": 0.3978, "step": 213880 }, { "epoch": 61.53337169159954, "grad_norm": 1.0429784059524536, "learning_rate": 0.0007693325661680091, "loss": 0.4524, "step": 213890 }, { "epoch": 61.53624856156502, "grad_norm": 2.057603120803833, "learning_rate": 0.0007692750287686997, "loss": 0.4888, "step": 213900 }, { "epoch": 61.53912543153049, "grad_norm": 1.836289644241333, "learning_rate": 0.0007692174913693902, "loss": 0.428, "step": 213910 }, { "epoch": 61.54200230149597, "grad_norm": 1.0493361949920654, "learning_rate": 0.0007691599539700805, "loss": 0.4714, "step": 213920 }, { "epoch": 61.54487917146145, "grad_norm": 0.8150122165679932, "learning_rate": 0.0007691024165707711, "loss": 0.404, "step": 213930 }, { "epoch": 61.54775604142693, "grad_norm": 1.2906501293182373, "learning_rate": 0.0007690448791714615, "loss": 0.4528, "step": 213940 }, { "epoch": 61.550632911392405, "grad_norm": 1.1917798519134521, "learning_rate": 0.0007689873417721519, "loss": 0.5156, "step": 213950 }, { "epoch": 61.55350978135788, "grad_norm": 0.8169200420379639, "learning_rate": 0.0007689298043728423, "loss": 0.5751, "step": 213960 }, { "epoch": 61.55638665132336, "grad_norm": 1.333778977394104, "learning_rate": 0.0007688722669735329, "loss": 0.5229, "step": 213970 }, { "epoch": 61.55926352128884, "grad_norm": 1.0896618366241455, "learning_rate": 0.0007688147295742232, "loss": 0.5096, "step": 213980 }, { "epoch": 61.56214039125432, "grad_norm": 1.1172672510147095, "learning_rate": 0.0007687571921749137, "loss": 0.4151, "step": 213990 }, { "epoch": 61.56501726121979, "grad_norm": 2.1090991497039795, "learning_rate": 0.0007686996547756041, "loss": 0.5783, "step": 214000 }, { "epoch": 61.56789413118527, "grad_norm": 1.3664824962615967, "learning_rate": 0.0007686421173762946, "loss": 0.4086, "step": 214010 }, { "epoch": 61.570771001150746, "grad_norm": 1.6710251569747925, "learning_rate": 0.0007685845799769851, "loss": 0.4461, "step": 214020 }, { "epoch": 61.57364787111622, "grad_norm": 1.3381623029708862, "learning_rate": 0.0007685270425776755, "loss": 0.585, "step": 214030 }, { "epoch": 61.576524741081705, "grad_norm": 1.5902780294418335, "learning_rate": 0.000768469505178366, "loss": 0.4471, "step": 214040 }, { "epoch": 61.57940161104718, "grad_norm": 1.1300338506698608, "learning_rate": 0.0007684119677790564, "loss": 0.4305, "step": 214050 }, { "epoch": 61.58227848101266, "grad_norm": 1.8426597118377686, "learning_rate": 0.0007683544303797469, "loss": 0.5033, "step": 214060 }, { "epoch": 61.585155350978134, "grad_norm": 1.160239577293396, "learning_rate": 0.0007682968929804372, "loss": 0.4227, "step": 214070 }, { "epoch": 61.58803222094361, "grad_norm": 1.4559518098831177, "learning_rate": 0.0007682393555811278, "loss": 0.5148, "step": 214080 }, { "epoch": 61.59090909090909, "grad_norm": 1.8739240169525146, "learning_rate": 0.0007681818181818182, "loss": 0.5976, "step": 214090 }, { "epoch": 61.59378596087457, "grad_norm": 0.6682366132736206, "learning_rate": 0.0007681242807825086, "loss": 0.4423, "step": 214100 }, { "epoch": 61.596662830840046, "grad_norm": 1.0002572536468506, "learning_rate": 0.000768066743383199, "loss": 0.5198, "step": 214110 }, { "epoch": 61.59953970080552, "grad_norm": 1.2312711477279663, "learning_rate": 0.0007680092059838896, "loss": 0.514, "step": 214120 }, { "epoch": 61.602416570771, "grad_norm": 0.7378392815589905, "learning_rate": 0.00076795166858458, "loss": 0.519, "step": 214130 }, { "epoch": 61.60529344073648, "grad_norm": 1.448734164237976, "learning_rate": 0.0007678941311852704, "loss": 0.4461, "step": 214140 }, { "epoch": 61.60817031070196, "grad_norm": 1.0262757539749146, "learning_rate": 0.000767836593785961, "loss": 0.643, "step": 214150 }, { "epoch": 61.611047180667434, "grad_norm": 1.0763925313949585, "learning_rate": 0.0007677790563866513, "loss": 0.4273, "step": 214160 }, { "epoch": 61.61392405063291, "grad_norm": 1.0111442804336548, "learning_rate": 0.0007677215189873418, "loss": 0.4642, "step": 214170 }, { "epoch": 61.616800920598386, "grad_norm": 1.310040831565857, "learning_rate": 0.0007676639815880322, "loss": 0.4847, "step": 214180 }, { "epoch": 61.61967779056387, "grad_norm": 0.6982129216194153, "learning_rate": 0.0007676064441887227, "loss": 0.5566, "step": 214190 }, { "epoch": 61.622554660529346, "grad_norm": 0.7687984704971313, "learning_rate": 0.0007675489067894131, "loss": 0.4827, "step": 214200 }, { "epoch": 61.62543153049482, "grad_norm": 1.8795108795166016, "learning_rate": 0.0007674913693901036, "loss": 0.6526, "step": 214210 }, { "epoch": 61.6283084004603, "grad_norm": 1.92251455783844, "learning_rate": 0.0007674338319907941, "loss": 0.4947, "step": 214220 }, { "epoch": 61.631185270425775, "grad_norm": 0.7719165086746216, "learning_rate": 0.0007673762945914845, "loss": 0.4248, "step": 214230 }, { "epoch": 61.63406214039125, "grad_norm": 1.2626160383224487, "learning_rate": 0.000767318757192175, "loss": 0.4934, "step": 214240 }, { "epoch": 61.636939010356734, "grad_norm": 1.2316350936889648, "learning_rate": 0.0007672612197928653, "loss": 0.5057, "step": 214250 }, { "epoch": 61.63981588032221, "grad_norm": 1.8945398330688477, "learning_rate": 0.0007672036823935559, "loss": 0.4908, "step": 214260 }, { "epoch": 61.64269275028769, "grad_norm": 1.0469030141830444, "learning_rate": 0.0007671461449942463, "loss": 0.4391, "step": 214270 }, { "epoch": 61.64556962025316, "grad_norm": 1.3530280590057373, "learning_rate": 0.0007670886075949367, "loss": 0.4706, "step": 214280 }, { "epoch": 61.64844649021864, "grad_norm": 2.053983688354492, "learning_rate": 0.0007670310701956271, "loss": 0.5682, "step": 214290 }, { "epoch": 61.65132336018412, "grad_norm": 1.117884635925293, "learning_rate": 0.0007669735327963177, "loss": 0.5032, "step": 214300 }, { "epoch": 61.6542002301496, "grad_norm": 1.196761965751648, "learning_rate": 0.000766915995397008, "loss": 0.4212, "step": 214310 }, { "epoch": 61.657077100115075, "grad_norm": 1.7126926183700562, "learning_rate": 0.0007668584579976985, "loss": 0.5258, "step": 214320 }, { "epoch": 61.65995397008055, "grad_norm": 0.7223535180091858, "learning_rate": 0.0007668009205983891, "loss": 0.4517, "step": 214330 }, { "epoch": 61.66283084004603, "grad_norm": 0.975050151348114, "learning_rate": 0.0007667433831990794, "loss": 0.4508, "step": 214340 }, { "epoch": 61.66570771001151, "grad_norm": 0.8281993269920349, "learning_rate": 0.0007666858457997699, "loss": 0.5445, "step": 214350 }, { "epoch": 61.66858457997699, "grad_norm": 2.455986261367798, "learning_rate": 0.0007666283084004602, "loss": 0.4847, "step": 214360 }, { "epoch": 61.67146144994246, "grad_norm": 0.8621183633804321, "learning_rate": 0.0007665707710011508, "loss": 0.5939, "step": 214370 }, { "epoch": 61.67433831990794, "grad_norm": 3.360724925994873, "learning_rate": 0.0007665132336018412, "loss": 0.5693, "step": 214380 }, { "epoch": 61.677215189873415, "grad_norm": 1.804435133934021, "learning_rate": 0.0007664556962025316, "loss": 0.4944, "step": 214390 }, { "epoch": 61.6800920598389, "grad_norm": 0.7037897706031799, "learning_rate": 0.000766398158803222, "loss": 0.5132, "step": 214400 }, { "epoch": 61.682968929804375, "grad_norm": 1.899896502494812, "learning_rate": 0.0007663406214039126, "loss": 0.5834, "step": 214410 }, { "epoch": 61.68584579976985, "grad_norm": 2.6386196613311768, "learning_rate": 0.000766283084004603, "loss": 0.472, "step": 214420 }, { "epoch": 61.68872266973533, "grad_norm": 1.447056770324707, "learning_rate": 0.0007662255466052934, "loss": 0.4443, "step": 214430 }, { "epoch": 61.6915995397008, "grad_norm": 1.711217999458313, "learning_rate": 0.000766168009205984, "loss": 0.5653, "step": 214440 }, { "epoch": 61.69447640966629, "grad_norm": 1.3435524702072144, "learning_rate": 0.0007661104718066743, "loss": 0.623, "step": 214450 }, { "epoch": 61.69735327963176, "grad_norm": 0.9464781284332275, "learning_rate": 0.0007660529344073648, "loss": 0.5043, "step": 214460 }, { "epoch": 61.70023014959724, "grad_norm": 1.3086278438568115, "learning_rate": 0.0007659953970080552, "loss": 0.4992, "step": 214470 }, { "epoch": 61.703107019562715, "grad_norm": 1.0524109601974487, "learning_rate": 0.0007659378596087457, "loss": 0.5692, "step": 214480 }, { "epoch": 61.70598388952819, "grad_norm": 1.2892428636550903, "learning_rate": 0.0007658803222094361, "loss": 0.5267, "step": 214490 }, { "epoch": 61.70886075949367, "grad_norm": 1.0057398080825806, "learning_rate": 0.0007658227848101266, "loss": 0.4905, "step": 214500 }, { "epoch": 61.71173762945915, "grad_norm": 1.2935516834259033, "learning_rate": 0.000765765247410817, "loss": 0.5478, "step": 214510 }, { "epoch": 61.71461449942463, "grad_norm": 1.4642539024353027, "learning_rate": 0.0007657077100115075, "loss": 0.5201, "step": 214520 }, { "epoch": 61.7174913693901, "grad_norm": 0.8721577525138855, "learning_rate": 0.000765650172612198, "loss": 0.4471, "step": 214530 }, { "epoch": 61.72036823935558, "grad_norm": 1.3453596830368042, "learning_rate": 0.0007655926352128883, "loss": 0.4771, "step": 214540 }, { "epoch": 61.723245109321056, "grad_norm": 0.8722812533378601, "learning_rate": 0.0007655350978135789, "loss": 0.4993, "step": 214550 }, { "epoch": 61.72612197928654, "grad_norm": 0.9016339778900146, "learning_rate": 0.0007654775604142693, "loss": 0.4359, "step": 214560 }, { "epoch": 61.728998849252015, "grad_norm": 1.3666633367538452, "learning_rate": 0.0007654200230149597, "loss": 0.5249, "step": 214570 }, { "epoch": 61.73187571921749, "grad_norm": 1.1811033487319946, "learning_rate": 0.0007653624856156501, "loss": 0.501, "step": 214580 }, { "epoch": 61.73475258918297, "grad_norm": 1.0754119157791138, "learning_rate": 0.0007653049482163407, "loss": 0.3848, "step": 214590 }, { "epoch": 61.737629459148444, "grad_norm": 0.7701308131217957, "learning_rate": 0.000765247410817031, "loss": 0.4044, "step": 214600 }, { "epoch": 61.74050632911393, "grad_norm": 0.8981059789657593, "learning_rate": 0.0007651898734177215, "loss": 0.4354, "step": 214610 }, { "epoch": 61.7433831990794, "grad_norm": 1.9746211767196655, "learning_rate": 0.0007651323360184121, "loss": 0.5055, "step": 214620 }, { "epoch": 61.74626006904488, "grad_norm": 1.8843570947647095, "learning_rate": 0.0007650747986191024, "loss": 0.5095, "step": 214630 }, { "epoch": 61.749136939010356, "grad_norm": 0.9220640063285828, "learning_rate": 0.0007650172612197929, "loss": 0.5388, "step": 214640 }, { "epoch": 61.75201380897583, "grad_norm": 1.109305500984192, "learning_rate": 0.0007649597238204833, "loss": 0.4591, "step": 214650 }, { "epoch": 61.754890678941315, "grad_norm": 2.750056266784668, "learning_rate": 0.0007649021864211738, "loss": 0.5501, "step": 214660 }, { "epoch": 61.75776754890679, "grad_norm": 1.4944019317626953, "learning_rate": 0.0007648446490218642, "loss": 0.5193, "step": 214670 }, { "epoch": 61.76064441887227, "grad_norm": 1.9973194599151611, "learning_rate": 0.0007647871116225547, "loss": 0.6783, "step": 214680 }, { "epoch": 61.763521288837744, "grad_norm": 0.7910259962081909, "learning_rate": 0.000764729574223245, "loss": 0.4014, "step": 214690 }, { "epoch": 61.76639815880322, "grad_norm": 1.346771001815796, "learning_rate": 0.0007646720368239356, "loss": 0.3527, "step": 214700 }, { "epoch": 61.769275028768696, "grad_norm": 1.2394264936447144, "learning_rate": 0.000764614499424626, "loss": 0.3995, "step": 214710 }, { "epoch": 61.77215189873418, "grad_norm": 1.1318440437316895, "learning_rate": 0.0007645569620253164, "loss": 0.4871, "step": 214720 }, { "epoch": 61.775028768699656, "grad_norm": 0.7422620058059692, "learning_rate": 0.000764499424626007, "loss": 0.4222, "step": 214730 }, { "epoch": 61.77790563866513, "grad_norm": 1.3191295862197876, "learning_rate": 0.0007644418872266974, "loss": 0.5413, "step": 214740 }, { "epoch": 61.78078250863061, "grad_norm": 1.5672991275787354, "learning_rate": 0.0007643843498273878, "loss": 0.4297, "step": 214750 }, { "epoch": 61.783659378596084, "grad_norm": 0.9322837591171265, "learning_rate": 0.0007643268124280782, "loss": 0.45, "step": 214760 }, { "epoch": 61.78653624856157, "grad_norm": 2.000615358352661, "learning_rate": 0.0007642692750287688, "loss": 0.4632, "step": 214770 }, { "epoch": 61.789413118527044, "grad_norm": 2.499166488647461, "learning_rate": 0.0007642117376294591, "loss": 0.5941, "step": 214780 }, { "epoch": 61.79228998849252, "grad_norm": 1.225142002105713, "learning_rate": 0.0007641542002301496, "loss": 0.3851, "step": 214790 }, { "epoch": 61.795166858457996, "grad_norm": 1.2265714406967163, "learning_rate": 0.0007640966628308402, "loss": 0.4501, "step": 214800 }, { "epoch": 61.79804372842347, "grad_norm": 1.621459722518921, "learning_rate": 0.0007640391254315305, "loss": 0.5839, "step": 214810 }, { "epoch": 61.800920598388956, "grad_norm": 0.8413860201835632, "learning_rate": 0.000763981588032221, "loss": 0.5642, "step": 214820 }, { "epoch": 61.80379746835443, "grad_norm": 0.6943935751914978, "learning_rate": 0.0007639240506329114, "loss": 0.3708, "step": 214830 }, { "epoch": 61.80667433831991, "grad_norm": 1.527669072151184, "learning_rate": 0.0007638665132336019, "loss": 0.5655, "step": 214840 }, { "epoch": 61.809551208285384, "grad_norm": 1.53890061378479, "learning_rate": 0.0007638089758342923, "loss": 0.4977, "step": 214850 }, { "epoch": 61.81242807825086, "grad_norm": 1.1145644187927246, "learning_rate": 0.0007637514384349828, "loss": 0.3861, "step": 214860 }, { "epoch": 61.815304948216344, "grad_norm": 2.3097822666168213, "learning_rate": 0.0007636939010356731, "loss": 0.4914, "step": 214870 }, { "epoch": 61.81818181818182, "grad_norm": 1.8546488285064697, "learning_rate": 0.0007636363636363637, "loss": 0.4625, "step": 214880 }, { "epoch": 61.821058688147296, "grad_norm": 2.9711034297943115, "learning_rate": 0.0007635788262370541, "loss": 0.5362, "step": 214890 }, { "epoch": 61.82393555811277, "grad_norm": 2.382197618484497, "learning_rate": 0.0007635212888377445, "loss": 0.425, "step": 214900 }, { "epoch": 61.82681242807825, "grad_norm": 1.0890283584594727, "learning_rate": 0.0007634637514384351, "loss": 0.5784, "step": 214910 }, { "epoch": 61.829689298043725, "grad_norm": 0.8830954432487488, "learning_rate": 0.0007634062140391255, "loss": 0.4456, "step": 214920 }, { "epoch": 61.83256616800921, "grad_norm": 1.0867341756820679, "learning_rate": 0.0007633486766398159, "loss": 0.4638, "step": 214930 }, { "epoch": 61.835443037974684, "grad_norm": 2.057753801345825, "learning_rate": 0.0007632911392405063, "loss": 0.5485, "step": 214940 }, { "epoch": 61.83831990794016, "grad_norm": 1.8305604457855225, "learning_rate": 0.0007632336018411969, "loss": 0.5097, "step": 214950 }, { "epoch": 61.84119677790564, "grad_norm": 1.0542832612991333, "learning_rate": 0.0007631760644418872, "loss": 0.4624, "step": 214960 }, { "epoch": 61.84407364787111, "grad_norm": 0.7700026035308838, "learning_rate": 0.0007631185270425777, "loss": 0.5101, "step": 214970 }, { "epoch": 61.846950517836596, "grad_norm": 1.993733525276184, "learning_rate": 0.0007630609896432681, "loss": 0.5168, "step": 214980 }, { "epoch": 61.84982738780207, "grad_norm": 0.7836050987243652, "learning_rate": 0.0007630034522439586, "loss": 0.5689, "step": 214990 }, { "epoch": 61.85270425776755, "grad_norm": 2.0238258838653564, "learning_rate": 0.000762945914844649, "loss": 0.561, "step": 215000 }, { "epoch": 61.855581127733025, "grad_norm": 0.9391152262687683, "learning_rate": 0.0007628883774453395, "loss": 0.5673, "step": 215010 }, { "epoch": 61.8584579976985, "grad_norm": 1.2281644344329834, "learning_rate": 0.00076283084004603, "loss": 0.385, "step": 215020 }, { "epoch": 61.861334867663984, "grad_norm": 1.8440700769424438, "learning_rate": 0.0007627733026467204, "loss": 0.4936, "step": 215030 }, { "epoch": 61.86421173762946, "grad_norm": 2.2500159740448, "learning_rate": 0.0007627157652474109, "loss": 0.4424, "step": 215040 }, { "epoch": 61.86708860759494, "grad_norm": 1.1936219930648804, "learning_rate": 0.0007626582278481012, "loss": 0.4611, "step": 215050 }, { "epoch": 61.86996547756041, "grad_norm": 1.646042823791504, "learning_rate": 0.0007626006904487918, "loss": 0.5323, "step": 215060 }, { "epoch": 61.87284234752589, "grad_norm": 1.4538462162017822, "learning_rate": 0.0007625431530494822, "loss": 0.6795, "step": 215070 }, { "epoch": 61.87571921749137, "grad_norm": 0.8444012403488159, "learning_rate": 0.0007624856156501726, "loss": 0.4819, "step": 215080 }, { "epoch": 61.87859608745685, "grad_norm": 0.6875422596931458, "learning_rate": 0.000762428078250863, "loss": 0.3666, "step": 215090 }, { "epoch": 61.881472957422325, "grad_norm": 1.8854695558547974, "learning_rate": 0.0007623705408515536, "loss": 0.4117, "step": 215100 }, { "epoch": 61.8843498273878, "grad_norm": 1.3874027729034424, "learning_rate": 0.000762313003452244, "loss": 0.4563, "step": 215110 }, { "epoch": 61.88722669735328, "grad_norm": 1.9124679565429688, "learning_rate": 0.0007622554660529344, "loss": 0.4653, "step": 215120 }, { "epoch": 61.89010356731876, "grad_norm": 2.0176753997802734, "learning_rate": 0.000762197928653625, "loss": 0.458, "step": 215130 }, { "epoch": 61.89298043728424, "grad_norm": 0.8959905505180359, "learning_rate": 0.0007621403912543153, "loss": 0.4895, "step": 215140 }, { "epoch": 61.89585730724971, "grad_norm": 0.8877524733543396, "learning_rate": 0.0007620828538550058, "loss": 0.4247, "step": 215150 }, { "epoch": 61.89873417721519, "grad_norm": 2.238734245300293, "learning_rate": 0.0007620253164556962, "loss": 0.5135, "step": 215160 }, { "epoch": 61.901611047180666, "grad_norm": 1.2655274868011475, "learning_rate": 0.0007619677790563867, "loss": 0.53, "step": 215170 }, { "epoch": 61.90448791714614, "grad_norm": 0.8359958529472351, "learning_rate": 0.0007619102416570771, "loss": 0.4399, "step": 215180 }, { "epoch": 61.907364787111625, "grad_norm": 1.1886264085769653, "learning_rate": 0.0007618527042577675, "loss": 0.3947, "step": 215190 }, { "epoch": 61.9102416570771, "grad_norm": 1.1235406398773193, "learning_rate": 0.000761795166858458, "loss": 0.4869, "step": 215200 }, { "epoch": 61.91311852704258, "grad_norm": 0.8407453894615173, "learning_rate": 0.0007617376294591485, "loss": 0.4991, "step": 215210 }, { "epoch": 61.915995397008054, "grad_norm": 2.0876219272613525, "learning_rate": 0.0007616800920598388, "loss": 0.4996, "step": 215220 }, { "epoch": 61.91887226697353, "grad_norm": 0.7222008109092712, "learning_rate": 0.0007616225546605293, "loss": 0.4425, "step": 215230 }, { "epoch": 61.92174913693901, "grad_norm": 1.0594515800476074, "learning_rate": 0.0007615650172612199, "loss": 0.4751, "step": 215240 }, { "epoch": 61.92462600690449, "grad_norm": 3.124608039855957, "learning_rate": 0.0007615074798619102, "loss": 0.4997, "step": 215250 }, { "epoch": 61.927502876869966, "grad_norm": 0.9516573548316956, "learning_rate": 0.0007614499424626007, "loss": 0.5178, "step": 215260 }, { "epoch": 61.93037974683544, "grad_norm": 1.3393791913986206, "learning_rate": 0.0007613924050632911, "loss": 0.4648, "step": 215270 }, { "epoch": 61.93325661680092, "grad_norm": 0.8467264771461487, "learning_rate": 0.0007613348676639816, "loss": 0.4321, "step": 215280 }, { "epoch": 61.9361334867664, "grad_norm": 1.408046841621399, "learning_rate": 0.000761277330264672, "loss": 0.4341, "step": 215290 }, { "epoch": 61.93901035673188, "grad_norm": 1.2508002519607544, "learning_rate": 0.0007612197928653625, "loss": 0.5059, "step": 215300 }, { "epoch": 61.941887226697354, "grad_norm": 2.5891566276550293, "learning_rate": 0.000761162255466053, "loss": 0.6142, "step": 215310 }, { "epoch": 61.94476409666283, "grad_norm": 1.3845572471618652, "learning_rate": 0.0007611047180667434, "loss": 0.4372, "step": 215320 }, { "epoch": 61.947640966628306, "grad_norm": 1.5297852754592896, "learning_rate": 0.0007610471806674339, "loss": 0.415, "step": 215330 }, { "epoch": 61.95051783659379, "grad_norm": 1.207865595817566, "learning_rate": 0.0007609896432681242, "loss": 0.6082, "step": 215340 }, { "epoch": 61.953394706559266, "grad_norm": 1.1491576433181763, "learning_rate": 0.0007609321058688148, "loss": 0.4863, "step": 215350 }, { "epoch": 61.95627157652474, "grad_norm": 0.8551977872848511, "learning_rate": 0.0007608745684695052, "loss": 0.5605, "step": 215360 }, { "epoch": 61.95914844649022, "grad_norm": 1.188854455947876, "learning_rate": 0.0007608170310701956, "loss": 0.5095, "step": 215370 }, { "epoch": 61.962025316455694, "grad_norm": 1.4287424087524414, "learning_rate": 0.000760759493670886, "loss": 0.4389, "step": 215380 }, { "epoch": 61.96490218642117, "grad_norm": 1.8605473041534424, "learning_rate": 0.0007607019562715766, "loss": 0.4764, "step": 215390 }, { "epoch": 61.967779056386654, "grad_norm": 1.4213758707046509, "learning_rate": 0.0007606444188722669, "loss": 0.4306, "step": 215400 }, { "epoch": 61.97065592635213, "grad_norm": 0.7829878926277161, "learning_rate": 0.0007605868814729574, "loss": 0.4289, "step": 215410 }, { "epoch": 61.973532796317606, "grad_norm": 1.3424302339553833, "learning_rate": 0.000760529344073648, "loss": 0.5356, "step": 215420 }, { "epoch": 61.97640966628308, "grad_norm": 1.0154250860214233, "learning_rate": 0.0007604718066743383, "loss": 0.5214, "step": 215430 }, { "epoch": 61.97928653624856, "grad_norm": 0.8361066579818726, "learning_rate": 0.0007604142692750288, "loss": 0.4796, "step": 215440 }, { "epoch": 61.98216340621404, "grad_norm": 1.4219616651535034, "learning_rate": 0.0007603567318757192, "loss": 0.4254, "step": 215450 }, { "epoch": 61.98504027617952, "grad_norm": 1.0712511539459229, "learning_rate": 0.0007602991944764097, "loss": 0.4107, "step": 215460 }, { "epoch": 61.987917146144994, "grad_norm": 1.8381949663162231, "learning_rate": 0.0007602416570771001, "loss": 0.5633, "step": 215470 }, { "epoch": 61.99079401611047, "grad_norm": 1.469814658164978, "learning_rate": 0.0007601841196777906, "loss": 0.4202, "step": 215480 }, { "epoch": 61.99367088607595, "grad_norm": 1.6062629222869873, "learning_rate": 0.000760126582278481, "loss": 0.4664, "step": 215490 }, { "epoch": 61.99654775604143, "grad_norm": 2.298292636871338, "learning_rate": 0.0007600690448791715, "loss": 0.5262, "step": 215500 }, { "epoch": 61.999424626006906, "grad_norm": 1.2039982080459595, "learning_rate": 0.000760011507479862, "loss": 0.4029, "step": 215510 }, { "epoch": 62.00230149597238, "grad_norm": 1.0132644176483154, "learning_rate": 0.0007599539700805523, "loss": 0.4369, "step": 215520 }, { "epoch": 62.00517836593786, "grad_norm": 0.8371775150299072, "learning_rate": 0.0007598964326812429, "loss": 0.4488, "step": 215530 }, { "epoch": 62.008055235903335, "grad_norm": 1.0571547746658325, "learning_rate": 0.0007598388952819333, "loss": 0.4943, "step": 215540 }, { "epoch": 62.01093210586882, "grad_norm": 0.8685582876205444, "learning_rate": 0.0007597813578826237, "loss": 0.3754, "step": 215550 }, { "epoch": 62.013808975834294, "grad_norm": 1.1480551958084106, "learning_rate": 0.0007597238204833141, "loss": 0.5133, "step": 215560 }, { "epoch": 62.01668584579977, "grad_norm": 1.9278897047042847, "learning_rate": 0.0007596662830840047, "loss": 0.5359, "step": 215570 }, { "epoch": 62.01956271576525, "grad_norm": 0.9079883098602295, "learning_rate": 0.000759608745684695, "loss": 0.3988, "step": 215580 }, { "epoch": 62.02243958573072, "grad_norm": 1.119170904159546, "learning_rate": 0.0007595512082853855, "loss": 0.3688, "step": 215590 }, { "epoch": 62.0253164556962, "grad_norm": 1.095005989074707, "learning_rate": 0.000759493670886076, "loss": 0.62, "step": 215600 }, { "epoch": 62.02819332566168, "grad_norm": 1.1055978536605835, "learning_rate": 0.0007594361334867664, "loss": 0.4757, "step": 215610 }, { "epoch": 62.03107019562716, "grad_norm": 1.059895634651184, "learning_rate": 0.0007593785960874569, "loss": 0.407, "step": 215620 }, { "epoch": 62.033947065592635, "grad_norm": 1.4846947193145752, "learning_rate": 0.0007593210586881473, "loss": 0.3763, "step": 215630 }, { "epoch": 62.03682393555811, "grad_norm": 1.2273443937301636, "learning_rate": 0.0007592635212888378, "loss": 0.4551, "step": 215640 }, { "epoch": 62.03970080552359, "grad_norm": 1.1744242906570435, "learning_rate": 0.0007592059838895282, "loss": 0.4684, "step": 215650 }, { "epoch": 62.04257767548907, "grad_norm": 0.786837100982666, "learning_rate": 0.0007591484464902187, "loss": 0.3718, "step": 215660 }, { "epoch": 62.04545454545455, "grad_norm": 0.7352079749107361, "learning_rate": 0.000759090909090909, "loss": 0.3702, "step": 215670 }, { "epoch": 62.04833141542002, "grad_norm": 0.7707287073135376, "learning_rate": 0.0007590333716915996, "loss": 0.3823, "step": 215680 }, { "epoch": 62.0512082853855, "grad_norm": 0.9355643391609192, "learning_rate": 0.00075897583429229, "loss": 0.4478, "step": 215690 }, { "epoch": 62.054085155350975, "grad_norm": 1.5384529829025269, "learning_rate": 0.0007589182968929804, "loss": 0.3818, "step": 215700 }, { "epoch": 62.05696202531646, "grad_norm": 0.7230349183082581, "learning_rate": 0.000758860759493671, "loss": 0.3481, "step": 215710 }, { "epoch": 62.059838895281935, "grad_norm": 1.8056389093399048, "learning_rate": 0.0007588032220943614, "loss": 0.4761, "step": 215720 }, { "epoch": 62.06271576524741, "grad_norm": 1.4815558195114136, "learning_rate": 0.0007587456846950518, "loss": 0.4801, "step": 215730 }, { "epoch": 62.06559263521289, "grad_norm": 0.983984112739563, "learning_rate": 0.0007586881472957422, "loss": 0.4736, "step": 215740 }, { "epoch": 62.06846950517836, "grad_norm": 0.7979094386100769, "learning_rate": 0.0007586306098964328, "loss": 0.479, "step": 215750 }, { "epoch": 62.07134637514385, "grad_norm": 1.1852483749389648, "learning_rate": 0.0007585730724971231, "loss": 0.4033, "step": 215760 }, { "epoch": 62.07422324510932, "grad_norm": 1.0814939737319946, "learning_rate": 0.0007585155350978136, "loss": 0.4719, "step": 215770 }, { "epoch": 62.0771001150748, "grad_norm": 1.3263351917266846, "learning_rate": 0.0007584579976985041, "loss": 0.4983, "step": 215780 }, { "epoch": 62.079976985040275, "grad_norm": 1.0786428451538086, "learning_rate": 0.0007584004602991945, "loss": 0.3995, "step": 215790 }, { "epoch": 62.08285385500575, "grad_norm": 1.474233865737915, "learning_rate": 0.0007583429228998849, "loss": 0.3044, "step": 215800 }, { "epoch": 62.08573072497123, "grad_norm": 2.091430425643921, "learning_rate": 0.0007582853855005754, "loss": 0.5284, "step": 215810 }, { "epoch": 62.08860759493671, "grad_norm": 1.8848081827163696, "learning_rate": 0.0007582278481012659, "loss": 0.5255, "step": 215820 }, { "epoch": 62.09148446490219, "grad_norm": 1.7002309560775757, "learning_rate": 0.0007581703107019563, "loss": 0.4379, "step": 215830 }, { "epoch": 62.09436133486766, "grad_norm": 0.8182861804962158, "learning_rate": 0.0007581127733026468, "loss": 0.4747, "step": 215840 }, { "epoch": 62.09723820483314, "grad_norm": 0.8233994841575623, "learning_rate": 0.0007580552359033371, "loss": 0.3738, "step": 215850 }, { "epoch": 62.100115074798616, "grad_norm": 0.7875089645385742, "learning_rate": 0.0007579976985040277, "loss": 0.4243, "step": 215860 }, { "epoch": 62.1029919447641, "grad_norm": 1.3019001483917236, "learning_rate": 0.0007579401611047181, "loss": 0.3743, "step": 215870 }, { "epoch": 62.105868814729575, "grad_norm": 1.1063615083694458, "learning_rate": 0.0007578826237054085, "loss": 0.3803, "step": 215880 }, { "epoch": 62.10874568469505, "grad_norm": 0.9228665828704834, "learning_rate": 0.000757825086306099, "loss": 0.4333, "step": 215890 }, { "epoch": 62.11162255466053, "grad_norm": 1.6167207956314087, "learning_rate": 0.0007577675489067895, "loss": 0.5277, "step": 215900 }, { "epoch": 62.114499424626004, "grad_norm": 1.238968849182129, "learning_rate": 0.0007577100115074798, "loss": 0.38, "step": 215910 }, { "epoch": 62.11737629459149, "grad_norm": 0.8744348883628845, "learning_rate": 0.0007576524741081703, "loss": 0.4248, "step": 215920 }, { "epoch": 62.120253164556964, "grad_norm": 1.206709861755371, "learning_rate": 0.0007575949367088609, "loss": 0.3886, "step": 215930 }, { "epoch": 62.12313003452244, "grad_norm": 0.8186149597167969, "learning_rate": 0.0007575373993095512, "loss": 0.4345, "step": 215940 }, { "epoch": 62.126006904487916, "grad_norm": 2.3697142601013184, "learning_rate": 0.0007574798619102417, "loss": 0.437, "step": 215950 }, { "epoch": 62.12888377445339, "grad_norm": 2.561811685562134, "learning_rate": 0.0007574223245109321, "loss": 0.5208, "step": 215960 }, { "epoch": 62.131760644418875, "grad_norm": 2.9560019969940186, "learning_rate": 0.0007573647871116226, "loss": 0.4944, "step": 215970 }, { "epoch": 62.13463751438435, "grad_norm": 1.0843877792358398, "learning_rate": 0.000757307249712313, "loss": 0.4239, "step": 215980 }, { "epoch": 62.13751438434983, "grad_norm": 0.9686157703399658, "learning_rate": 0.0007572497123130035, "loss": 0.4976, "step": 215990 }, { "epoch": 62.140391254315304, "grad_norm": 2.0040392875671387, "learning_rate": 0.0007571921749136939, "loss": 0.4682, "step": 216000 }, { "epoch": 62.14326812428078, "grad_norm": 0.6886879205703735, "learning_rate": 0.0007571346375143844, "loss": 0.4125, "step": 216010 }, { "epoch": 62.146144994246264, "grad_norm": 0.7481705546379089, "learning_rate": 0.0007570771001150747, "loss": 0.3935, "step": 216020 }, { "epoch": 62.14902186421174, "grad_norm": 1.3150792121887207, "learning_rate": 0.0007570195627157652, "loss": 0.4485, "step": 216030 }, { "epoch": 62.151898734177216, "grad_norm": 1.145292043685913, "learning_rate": 0.0007569620253164558, "loss": 0.5129, "step": 216040 }, { "epoch": 62.15477560414269, "grad_norm": 2.163893461227417, "learning_rate": 0.0007569044879171461, "loss": 0.484, "step": 216050 }, { "epoch": 62.15765247410817, "grad_norm": 0.7276025414466858, "learning_rate": 0.0007568469505178366, "loss": 0.4638, "step": 216060 }, { "epoch": 62.160529344073645, "grad_norm": 1.8386600017547607, "learning_rate": 0.000756789413118527, "loss": 0.5012, "step": 216070 }, { "epoch": 62.16340621403913, "grad_norm": 1.0013155937194824, "learning_rate": 0.0007567318757192175, "loss": 0.4857, "step": 216080 }, { "epoch": 62.166283084004604, "grad_norm": 1.5174957513809204, "learning_rate": 0.0007566743383199079, "loss": 0.4543, "step": 216090 }, { "epoch": 62.16915995397008, "grad_norm": 1.0552560091018677, "learning_rate": 0.0007566168009205984, "loss": 0.4234, "step": 216100 }, { "epoch": 62.17203682393556, "grad_norm": 2.5902159214019775, "learning_rate": 0.0007565592635212888, "loss": 0.5132, "step": 216110 }, { "epoch": 62.17491369390103, "grad_norm": 0.895409882068634, "learning_rate": 0.0007565017261219793, "loss": 0.4288, "step": 216120 }, { "epoch": 62.177790563866516, "grad_norm": 0.8494545221328735, "learning_rate": 0.0007564441887226698, "loss": 0.3673, "step": 216130 }, { "epoch": 62.18066743383199, "grad_norm": 1.998919129371643, "learning_rate": 0.0007563866513233601, "loss": 0.42, "step": 216140 }, { "epoch": 62.18354430379747, "grad_norm": 0.8418611884117126, "learning_rate": 0.0007563291139240507, "loss": 0.4907, "step": 216150 }, { "epoch": 62.186421173762945, "grad_norm": 1.6965599060058594, "learning_rate": 0.0007562715765247411, "loss": 0.4176, "step": 216160 }, { "epoch": 62.18929804372842, "grad_norm": 0.9826037287712097, "learning_rate": 0.0007562140391254315, "loss": 0.489, "step": 216170 }, { "epoch": 62.192174913693904, "grad_norm": 1.5686172246932983, "learning_rate": 0.000756156501726122, "loss": 0.4901, "step": 216180 }, { "epoch": 62.19505178365938, "grad_norm": 1.2385116815567017, "learning_rate": 0.0007560989643268125, "loss": 0.4484, "step": 216190 }, { "epoch": 62.19792865362486, "grad_norm": 1.2957357168197632, "learning_rate": 0.0007560414269275028, "loss": 0.4088, "step": 216200 }, { "epoch": 62.20080552359033, "grad_norm": 0.9792464971542358, "learning_rate": 0.0007559838895281933, "loss": 0.4394, "step": 216210 }, { "epoch": 62.20368239355581, "grad_norm": 1.2790864706039429, "learning_rate": 0.0007559263521288839, "loss": 0.5145, "step": 216220 }, { "epoch": 62.20655926352129, "grad_norm": 0.9443546533584595, "learning_rate": 0.0007558688147295742, "loss": 0.5585, "step": 216230 }, { "epoch": 62.20943613348677, "grad_norm": 1.4909889698028564, "learning_rate": 0.0007558112773302647, "loss": 0.4201, "step": 216240 }, { "epoch": 62.212313003452245, "grad_norm": 1.4815504550933838, "learning_rate": 0.0007557537399309551, "loss": 0.4687, "step": 216250 }, { "epoch": 62.21518987341772, "grad_norm": 1.4318398237228394, "learning_rate": 0.0007556962025316456, "loss": 0.3693, "step": 216260 }, { "epoch": 62.2180667433832, "grad_norm": 1.9372282028198242, "learning_rate": 0.000755638665132336, "loss": 0.4781, "step": 216270 }, { "epoch": 62.22094361334867, "grad_norm": 1.9229991436004639, "learning_rate": 0.0007555811277330265, "loss": 0.3773, "step": 216280 }, { "epoch": 62.22382048331416, "grad_norm": 2.096419095993042, "learning_rate": 0.0007555235903337169, "loss": 0.4147, "step": 216290 }, { "epoch": 62.22669735327963, "grad_norm": 0.6899616122245789, "learning_rate": 0.0007554660529344074, "loss": 0.485, "step": 216300 }, { "epoch": 62.22957422324511, "grad_norm": 1.1326054334640503, "learning_rate": 0.0007554085155350978, "loss": 0.4509, "step": 216310 }, { "epoch": 62.232451093210585, "grad_norm": 1.2772560119628906, "learning_rate": 0.0007553509781357882, "loss": 0.4343, "step": 216320 }, { "epoch": 62.23532796317606, "grad_norm": 1.616530179977417, "learning_rate": 0.0007552934407364788, "loss": 0.4684, "step": 216330 }, { "epoch": 62.238204833141545, "grad_norm": 1.0375741720199585, "learning_rate": 0.0007552359033371692, "loss": 0.4648, "step": 216340 }, { "epoch": 62.24108170310702, "grad_norm": 0.813323438167572, "learning_rate": 0.0007551783659378596, "loss": 0.4281, "step": 216350 }, { "epoch": 62.2439585730725, "grad_norm": 1.1652122735977173, "learning_rate": 0.00075512082853855, "loss": 0.398, "step": 216360 }, { "epoch": 62.24683544303797, "grad_norm": 0.9970903992652893, "learning_rate": 0.0007550632911392406, "loss": 0.5775, "step": 216370 }, { "epoch": 62.24971231300345, "grad_norm": 1.2465115785598755, "learning_rate": 0.0007550057537399309, "loss": 0.4455, "step": 216380 }, { "epoch": 62.25258918296893, "grad_norm": 1.349601149559021, "learning_rate": 0.0007549482163406214, "loss": 0.504, "step": 216390 }, { "epoch": 62.25546605293441, "grad_norm": 2.166442632675171, "learning_rate": 0.000754890678941312, "loss": 0.5426, "step": 216400 }, { "epoch": 62.258342922899885, "grad_norm": 0.8620198369026184, "learning_rate": 0.0007548331415420023, "loss": 0.4121, "step": 216410 }, { "epoch": 62.26121979286536, "grad_norm": 1.3513401746749878, "learning_rate": 0.0007547756041426928, "loss": 0.4815, "step": 216420 }, { "epoch": 62.26409666283084, "grad_norm": 0.8800516724586487, "learning_rate": 0.0007547180667433832, "loss": 0.4768, "step": 216430 }, { "epoch": 62.26697353279632, "grad_norm": 1.2562273740768433, "learning_rate": 0.0007546605293440737, "loss": 0.4738, "step": 216440 }, { "epoch": 62.2698504027618, "grad_norm": 0.9980642199516296, "learning_rate": 0.0007546029919447641, "loss": 0.5079, "step": 216450 }, { "epoch": 62.27272727272727, "grad_norm": 1.214365839958191, "learning_rate": 0.0007545454545454546, "loss": 0.3983, "step": 216460 }, { "epoch": 62.27560414269275, "grad_norm": 1.0913548469543457, "learning_rate": 0.000754487917146145, "loss": 0.4388, "step": 216470 }, { "epoch": 62.278481012658226, "grad_norm": 1.2575438022613525, "learning_rate": 0.0007544303797468355, "loss": 0.3978, "step": 216480 }, { "epoch": 62.2813578826237, "grad_norm": 1.2962079048156738, "learning_rate": 0.0007543728423475259, "loss": 0.4832, "step": 216490 }, { "epoch": 62.284234752589185, "grad_norm": 1.0232425928115845, "learning_rate": 0.0007543153049482163, "loss": 0.3801, "step": 216500 }, { "epoch": 62.28711162255466, "grad_norm": 2.1979784965515137, "learning_rate": 0.0007542577675489069, "loss": 0.5086, "step": 216510 }, { "epoch": 62.28998849252014, "grad_norm": 1.9622429609298706, "learning_rate": 0.0007542002301495973, "loss": 0.4962, "step": 216520 }, { "epoch": 62.292865362485614, "grad_norm": 0.6592722535133362, "learning_rate": 0.0007541426927502877, "loss": 0.3882, "step": 216530 }, { "epoch": 62.29574223245109, "grad_norm": 0.7180394530296326, "learning_rate": 0.0007540851553509781, "loss": 0.5682, "step": 216540 }, { "epoch": 62.29861910241657, "grad_norm": 0.9484618306159973, "learning_rate": 0.0007540276179516687, "loss": 0.4733, "step": 216550 }, { "epoch": 62.30149597238205, "grad_norm": 1.8289034366607666, "learning_rate": 0.000753970080552359, "loss": 0.495, "step": 216560 }, { "epoch": 62.304372842347526, "grad_norm": 2.369741201400757, "learning_rate": 0.0007539125431530495, "loss": 0.4909, "step": 216570 }, { "epoch": 62.307249712313, "grad_norm": 1.595463514328003, "learning_rate": 0.00075385500575374, "loss": 0.3678, "step": 216580 }, { "epoch": 62.31012658227848, "grad_norm": 1.1240311861038208, "learning_rate": 0.0007537974683544304, "loss": 0.5113, "step": 216590 }, { "epoch": 62.31300345224396, "grad_norm": 0.9541753530502319, "learning_rate": 0.0007537399309551208, "loss": 0.4653, "step": 216600 }, { "epoch": 62.31588032220944, "grad_norm": 1.4803646802902222, "learning_rate": 0.0007536823935558113, "loss": 0.4786, "step": 216610 }, { "epoch": 62.318757192174914, "grad_norm": 1.7618721723556519, "learning_rate": 0.0007536248561565018, "loss": 0.5427, "step": 216620 }, { "epoch": 62.32163406214039, "grad_norm": 1.4427273273468018, "learning_rate": 0.0007535673187571922, "loss": 0.442, "step": 216630 }, { "epoch": 62.324510932105866, "grad_norm": 2.1141655445098877, "learning_rate": 0.0007535097813578827, "loss": 0.4796, "step": 216640 }, { "epoch": 62.32738780207135, "grad_norm": 0.9835419058799744, "learning_rate": 0.000753452243958573, "loss": 0.4853, "step": 216650 }, { "epoch": 62.330264672036826, "grad_norm": 1.097636103630066, "learning_rate": 0.0007533947065592636, "loss": 0.6253, "step": 216660 }, { "epoch": 62.3331415420023, "grad_norm": 1.0095770359039307, "learning_rate": 0.000753337169159954, "loss": 0.4629, "step": 216670 }, { "epoch": 62.33601841196778, "grad_norm": 0.6835132837295532, "learning_rate": 0.0007532796317606444, "loss": 0.5064, "step": 216680 }, { "epoch": 62.338895281933254, "grad_norm": 0.7564606666564941, "learning_rate": 0.0007532220943613349, "loss": 0.3816, "step": 216690 }, { "epoch": 62.34177215189873, "grad_norm": 1.3134342432022095, "learning_rate": 0.0007531645569620254, "loss": 0.4872, "step": 216700 }, { "epoch": 62.344649021864214, "grad_norm": 2.0753519535064697, "learning_rate": 0.0007531070195627157, "loss": 0.504, "step": 216710 }, { "epoch": 62.34752589182969, "grad_norm": 1.3914625644683838, "learning_rate": 0.0007530494821634062, "loss": 0.4656, "step": 216720 }, { "epoch": 62.350402761795166, "grad_norm": 1.2091978788375854, "learning_rate": 0.0007529919447640968, "loss": 0.5403, "step": 216730 }, { "epoch": 62.35327963176064, "grad_norm": 0.9534205198287964, "learning_rate": 0.0007529344073647871, "loss": 0.4482, "step": 216740 }, { "epoch": 62.35615650172612, "grad_norm": 0.8956050276756287, "learning_rate": 0.0007528768699654776, "loss": 0.3817, "step": 216750 }, { "epoch": 62.3590333716916, "grad_norm": 1.3402810096740723, "learning_rate": 0.0007528193325661681, "loss": 0.4686, "step": 216760 }, { "epoch": 62.36191024165708, "grad_norm": 1.8922724723815918, "learning_rate": 0.0007527617951668585, "loss": 0.5074, "step": 216770 }, { "epoch": 62.364787111622555, "grad_norm": 1.235365629196167, "learning_rate": 0.0007527042577675489, "loss": 0.3882, "step": 216780 }, { "epoch": 62.36766398158803, "grad_norm": 1.7766956090927124, "learning_rate": 0.0007526467203682394, "loss": 0.5118, "step": 216790 }, { "epoch": 62.37054085155351, "grad_norm": 1.1735543012619019, "learning_rate": 0.0007525891829689298, "loss": 0.4928, "step": 216800 }, { "epoch": 62.37341772151899, "grad_norm": 1.1006593704223633, "learning_rate": 0.0007525316455696203, "loss": 0.4733, "step": 216810 }, { "epoch": 62.376294591484466, "grad_norm": 1.0798715353012085, "learning_rate": 0.0007524741081703108, "loss": 0.395, "step": 216820 }, { "epoch": 62.37917146144994, "grad_norm": 1.1686331033706665, "learning_rate": 0.0007524165707710011, "loss": 0.4603, "step": 216830 }, { "epoch": 62.38204833141542, "grad_norm": 1.0765694379806519, "learning_rate": 0.0007523590333716917, "loss": 0.4012, "step": 216840 }, { "epoch": 62.384925201380895, "grad_norm": 1.131469964981079, "learning_rate": 0.000752301495972382, "loss": 0.45, "step": 216850 }, { "epoch": 62.38780207134638, "grad_norm": 1.1621718406677246, "learning_rate": 0.0007522439585730725, "loss": 0.4061, "step": 216860 }, { "epoch": 62.390678941311855, "grad_norm": 0.9371111392974854, "learning_rate": 0.000752186421173763, "loss": 0.4785, "step": 216870 }, { "epoch": 62.39355581127733, "grad_norm": 1.1632850170135498, "learning_rate": 0.0007521288837744534, "loss": 0.4186, "step": 216880 }, { "epoch": 62.39643268124281, "grad_norm": 0.6846272945404053, "learning_rate": 0.0007520713463751438, "loss": 0.429, "step": 216890 }, { "epoch": 62.39930955120828, "grad_norm": 0.7266998291015625, "learning_rate": 0.0007520138089758343, "loss": 0.4455, "step": 216900 }, { "epoch": 62.40218642117377, "grad_norm": 0.9962803721427917, "learning_rate": 0.0007519562715765247, "loss": 0.4741, "step": 216910 }, { "epoch": 62.40506329113924, "grad_norm": 2.4266865253448486, "learning_rate": 0.0007518987341772152, "loss": 0.4245, "step": 216920 }, { "epoch": 62.40794016110472, "grad_norm": 0.942770779132843, "learning_rate": 0.0007518411967779057, "loss": 0.5107, "step": 216930 }, { "epoch": 62.410817031070195, "grad_norm": 1.1841349601745605, "learning_rate": 0.000751783659378596, "loss": 0.5056, "step": 216940 }, { "epoch": 62.41369390103567, "grad_norm": 1.5954442024230957, "learning_rate": 0.0007517261219792866, "loss": 0.4735, "step": 216950 }, { "epoch": 62.41657077100115, "grad_norm": 0.6793946027755737, "learning_rate": 0.000751668584579977, "loss": 0.4158, "step": 216960 }, { "epoch": 62.41944764096663, "grad_norm": 1.365949034690857, "learning_rate": 0.0007516110471806674, "loss": 0.4291, "step": 216970 }, { "epoch": 62.42232451093211, "grad_norm": 0.9161972403526306, "learning_rate": 0.0007515535097813579, "loss": 0.4417, "step": 216980 }, { "epoch": 62.42520138089758, "grad_norm": 1.4368897676467896, "learning_rate": 0.0007514959723820484, "loss": 0.4123, "step": 216990 }, { "epoch": 62.42807825086306, "grad_norm": 1.0719633102416992, "learning_rate": 0.0007514384349827387, "loss": 0.54, "step": 217000 }, { "epoch": 62.430955120828536, "grad_norm": 0.6582401394844055, "learning_rate": 0.0007513808975834292, "loss": 0.4662, "step": 217010 }, { "epoch": 62.43383199079402, "grad_norm": 1.3798551559448242, "learning_rate": 0.0007513233601841198, "loss": 0.3815, "step": 217020 }, { "epoch": 62.436708860759495, "grad_norm": 1.1834131479263306, "learning_rate": 0.0007512658227848101, "loss": 0.3857, "step": 217030 }, { "epoch": 62.43958573072497, "grad_norm": 0.9599973559379578, "learning_rate": 0.0007512082853855006, "loss": 0.5262, "step": 217040 }, { "epoch": 62.44246260069045, "grad_norm": 1.153363823890686, "learning_rate": 0.000751150747986191, "loss": 0.5464, "step": 217050 }, { "epoch": 62.445339470655924, "grad_norm": 1.0696355104446411, "learning_rate": 0.0007510932105868815, "loss": 0.4417, "step": 217060 }, { "epoch": 62.44821634062141, "grad_norm": 1.3909821510314941, "learning_rate": 0.0007510356731875719, "loss": 0.4942, "step": 217070 }, { "epoch": 62.45109321058688, "grad_norm": 0.746342122554779, "learning_rate": 0.0007509781357882624, "loss": 0.4189, "step": 217080 }, { "epoch": 62.45397008055236, "grad_norm": 1.1964784860610962, "learning_rate": 0.0007509205983889528, "loss": 0.4131, "step": 217090 }, { "epoch": 62.456846950517836, "grad_norm": 1.279950737953186, "learning_rate": 0.0007508630609896433, "loss": 0.3948, "step": 217100 }, { "epoch": 62.45972382048331, "grad_norm": 0.769167959690094, "learning_rate": 0.0007508055235903337, "loss": 0.5014, "step": 217110 }, { "epoch": 62.462600690448795, "grad_norm": 1.6885526180267334, "learning_rate": 0.0007507479861910241, "loss": 0.4594, "step": 217120 }, { "epoch": 62.46547756041427, "grad_norm": 1.8497092723846436, "learning_rate": 0.0007506904487917147, "loss": 0.4969, "step": 217130 }, { "epoch": 62.46835443037975, "grad_norm": 2.3490095138549805, "learning_rate": 0.0007506329113924051, "loss": 0.4495, "step": 217140 }, { "epoch": 62.471231300345224, "grad_norm": 1.7013859748840332, "learning_rate": 0.0007505753739930955, "loss": 0.4544, "step": 217150 }, { "epoch": 62.4741081703107, "grad_norm": 1.555506706237793, "learning_rate": 0.000750517836593786, "loss": 0.4324, "step": 217160 }, { "epoch": 62.476985040276176, "grad_norm": 1.1418557167053223, "learning_rate": 0.0007504602991944765, "loss": 0.3923, "step": 217170 }, { "epoch": 62.47986191024166, "grad_norm": 1.361531138420105, "learning_rate": 0.0007504027617951668, "loss": 0.4449, "step": 217180 }, { "epoch": 62.482738780207136, "grad_norm": 1.3061116933822632, "learning_rate": 0.0007503452243958573, "loss": 0.4419, "step": 217190 }, { "epoch": 62.48561565017261, "grad_norm": 1.054598093032837, "learning_rate": 0.0007502876869965478, "loss": 0.4846, "step": 217200 }, { "epoch": 62.48849252013809, "grad_norm": 1.9335142374038696, "learning_rate": 0.0007502301495972382, "loss": 0.4965, "step": 217210 }, { "epoch": 62.491369390103564, "grad_norm": 1.2089158296585083, "learning_rate": 0.0007501726121979286, "loss": 0.3862, "step": 217220 }, { "epoch": 62.49424626006905, "grad_norm": 1.9545623064041138, "learning_rate": 0.0007501150747986191, "loss": 0.4628, "step": 217230 }, { "epoch": 62.497123130034524, "grad_norm": 0.8934308290481567, "learning_rate": 0.0007500575373993096, "loss": 0.4034, "step": 217240 }, { "epoch": 62.5, "grad_norm": 1.2731044292449951, "learning_rate": 0.00075, "loss": 0.4769, "step": 217250 }, { "epoch": 62.502876869965476, "grad_norm": 1.2681572437286377, "learning_rate": 0.0007499424626006905, "loss": 0.4998, "step": 217260 }, { "epoch": 62.50575373993095, "grad_norm": 1.3502798080444336, "learning_rate": 0.0007498849252013809, "loss": 0.456, "step": 217270 }, { "epoch": 62.508630609896436, "grad_norm": 0.9372344017028809, "learning_rate": 0.0007498273878020714, "loss": 0.5205, "step": 217280 }, { "epoch": 62.51150747986191, "grad_norm": 1.8684558868408203, "learning_rate": 0.0007497698504027618, "loss": 0.55, "step": 217290 }, { "epoch": 62.51438434982739, "grad_norm": 1.039643406867981, "learning_rate": 0.0007497123130034522, "loss": 0.4852, "step": 217300 }, { "epoch": 62.517261219792864, "grad_norm": 1.4970104694366455, "learning_rate": 0.0007496547756041427, "loss": 0.4346, "step": 217310 }, { "epoch": 62.52013808975834, "grad_norm": 2.1479647159576416, "learning_rate": 0.0007495972382048332, "loss": 0.5452, "step": 217320 }, { "epoch": 62.523014959723824, "grad_norm": 1.3592627048492432, "learning_rate": 0.0007495397008055236, "loss": 0.4795, "step": 217330 }, { "epoch": 62.5258918296893, "grad_norm": 0.7912236452102661, "learning_rate": 0.000749482163406214, "loss": 0.4578, "step": 217340 }, { "epoch": 62.528768699654776, "grad_norm": 1.0898181200027466, "learning_rate": 0.0007494246260069046, "loss": 0.4615, "step": 217350 }, { "epoch": 62.53164556962025, "grad_norm": 1.0219831466674805, "learning_rate": 0.0007493670886075949, "loss": 0.447, "step": 217360 }, { "epoch": 62.53452243958573, "grad_norm": 2.4573891162872314, "learning_rate": 0.0007493095512082854, "loss": 0.5131, "step": 217370 }, { "epoch": 62.537399309551205, "grad_norm": 3.3030662536621094, "learning_rate": 0.0007492520138089759, "loss": 0.5918, "step": 217380 }, { "epoch": 62.54027617951669, "grad_norm": 0.727954089641571, "learning_rate": 0.0007491944764096663, "loss": 0.5988, "step": 217390 }, { "epoch": 62.543153049482164, "grad_norm": 1.9619505405426025, "learning_rate": 0.0007491369390103567, "loss": 0.5237, "step": 217400 }, { "epoch": 62.54602991944764, "grad_norm": 1.6379019021987915, "learning_rate": 0.0007490794016110472, "loss": 0.5528, "step": 217410 }, { "epoch": 62.54890678941312, "grad_norm": 2.062068223953247, "learning_rate": 0.0007490218642117377, "loss": 0.4269, "step": 217420 }, { "epoch": 62.55178365937859, "grad_norm": 0.761164665222168, "learning_rate": 0.0007489643268124281, "loss": 0.4443, "step": 217430 }, { "epoch": 62.554660529344076, "grad_norm": 1.8876559734344482, "learning_rate": 0.0007489067894131186, "loss": 0.4477, "step": 217440 }, { "epoch": 62.55753739930955, "grad_norm": 1.954279899597168, "learning_rate": 0.000748849252013809, "loss": 0.4139, "step": 217450 }, { "epoch": 62.56041426927503, "grad_norm": 0.9134647250175476, "learning_rate": 0.0007487917146144995, "loss": 0.4721, "step": 217460 }, { "epoch": 62.563291139240505, "grad_norm": 1.4238091707229614, "learning_rate": 0.0007487341772151899, "loss": 0.489, "step": 217470 }, { "epoch": 62.56616800920598, "grad_norm": 1.0181007385253906, "learning_rate": 0.0007486766398158803, "loss": 0.395, "step": 217480 }, { "epoch": 62.569044879171464, "grad_norm": 1.3391329050064087, "learning_rate": 0.0007486191024165708, "loss": 0.3859, "step": 217490 }, { "epoch": 62.57192174913694, "grad_norm": 1.0455098152160645, "learning_rate": 0.0007485615650172613, "loss": 0.4833, "step": 217500 }, { "epoch": 62.57479861910242, "grad_norm": 1.649498462677002, "learning_rate": 0.0007485040276179516, "loss": 0.4401, "step": 217510 }, { "epoch": 62.57767548906789, "grad_norm": 1.2488805055618286, "learning_rate": 0.0007484464902186421, "loss": 0.4981, "step": 217520 }, { "epoch": 62.58055235903337, "grad_norm": 0.8577150702476501, "learning_rate": 0.0007483889528193327, "loss": 0.3938, "step": 217530 }, { "epoch": 62.58342922899885, "grad_norm": 1.0197935104370117, "learning_rate": 0.000748331415420023, "loss": 0.5432, "step": 217540 }, { "epoch": 62.58630609896433, "grad_norm": 1.5498384237289429, "learning_rate": 0.0007482738780207135, "loss": 0.3948, "step": 217550 }, { "epoch": 62.589182968929805, "grad_norm": 1.6879329681396484, "learning_rate": 0.000748216340621404, "loss": 0.4476, "step": 217560 }, { "epoch": 62.59205983889528, "grad_norm": 0.7339304089546204, "learning_rate": 0.0007481588032220944, "loss": 0.4162, "step": 217570 }, { "epoch": 62.59493670886076, "grad_norm": 1.966504693031311, "learning_rate": 0.0007481012658227848, "loss": 0.4447, "step": 217580 }, { "epoch": 62.59781357882623, "grad_norm": 0.754375696182251, "learning_rate": 0.0007480437284234753, "loss": 0.4115, "step": 217590 }, { "epoch": 62.60069044879172, "grad_norm": 1.5416510105133057, "learning_rate": 0.0007479861910241657, "loss": 0.5579, "step": 217600 }, { "epoch": 62.60356731875719, "grad_norm": 1.642646312713623, "learning_rate": 0.0007479286536248562, "loss": 0.4502, "step": 217610 }, { "epoch": 62.60644418872267, "grad_norm": 1.5385648012161255, "learning_rate": 0.0007478711162255467, "loss": 0.3973, "step": 217620 }, { "epoch": 62.609321058688145, "grad_norm": 2.8107378482818604, "learning_rate": 0.000747813578826237, "loss": 0.4982, "step": 217630 }, { "epoch": 62.61219792865362, "grad_norm": 1.3288447856903076, "learning_rate": 0.0007477560414269276, "loss": 0.3975, "step": 217640 }, { "epoch": 62.615074798619105, "grad_norm": 1.1163724660873413, "learning_rate": 0.000747698504027618, "loss": 0.4336, "step": 217650 }, { "epoch": 62.61795166858458, "grad_norm": 1.4313077926635742, "learning_rate": 0.0007476409666283084, "loss": 0.4119, "step": 217660 }, { "epoch": 62.62082853855006, "grad_norm": 2.331937074661255, "learning_rate": 0.0007475834292289989, "loss": 0.507, "step": 217670 }, { "epoch": 62.623705408515534, "grad_norm": 2.0383946895599365, "learning_rate": 0.0007475258918296893, "loss": 0.3953, "step": 217680 }, { "epoch": 62.62658227848101, "grad_norm": 1.0903315544128418, "learning_rate": 0.0007474683544303797, "loss": 0.5093, "step": 217690 }, { "epoch": 62.62945914844649, "grad_norm": 1.3353699445724487, "learning_rate": 0.0007474108170310702, "loss": 0.4188, "step": 217700 }, { "epoch": 62.63233601841197, "grad_norm": 1.7739512920379639, "learning_rate": 0.0007473532796317606, "loss": 0.3964, "step": 217710 }, { "epoch": 62.635212888377445, "grad_norm": 1.0048339366912842, "learning_rate": 0.0007472957422324511, "loss": 0.5424, "step": 217720 }, { "epoch": 62.63808975834292, "grad_norm": 0.9782752394676208, "learning_rate": 0.0007472382048331416, "loss": 0.5099, "step": 217730 }, { "epoch": 62.6409666283084, "grad_norm": 1.1655718088150024, "learning_rate": 0.0007471806674338319, "loss": 0.4225, "step": 217740 }, { "epoch": 62.64384349827388, "grad_norm": 0.8824403882026672, "learning_rate": 0.0007471231300345225, "loss": 0.416, "step": 217750 }, { "epoch": 62.64672036823936, "grad_norm": 0.8754565119743347, "learning_rate": 0.0007470655926352129, "loss": 0.45, "step": 217760 }, { "epoch": 62.649597238204834, "grad_norm": 1.4619194269180298, "learning_rate": 0.0007470080552359033, "loss": 0.4492, "step": 217770 }, { "epoch": 62.65247410817031, "grad_norm": 1.4364019632339478, "learning_rate": 0.0007469505178365938, "loss": 0.4274, "step": 217780 }, { "epoch": 62.655350978135786, "grad_norm": 0.8370836973190308, "learning_rate": 0.0007468929804372843, "loss": 0.3733, "step": 217790 }, { "epoch": 62.65822784810126, "grad_norm": 0.9629577398300171, "learning_rate": 0.0007468354430379746, "loss": 0.3983, "step": 217800 }, { "epoch": 62.661104718066746, "grad_norm": 0.9740999937057495, "learning_rate": 0.0007467779056386651, "loss": 0.5631, "step": 217810 }, { "epoch": 62.66398158803222, "grad_norm": 1.0640190839767456, "learning_rate": 0.0007467203682393557, "loss": 0.5196, "step": 217820 }, { "epoch": 62.6668584579977, "grad_norm": 1.368141531944275, "learning_rate": 0.000746662830840046, "loss": 0.6036, "step": 217830 }, { "epoch": 62.669735327963174, "grad_norm": 1.0526278018951416, "learning_rate": 0.0007466052934407365, "loss": 0.6319, "step": 217840 }, { "epoch": 62.67261219792865, "grad_norm": 0.741844892501831, "learning_rate": 0.000746547756041427, "loss": 0.3194, "step": 217850 }, { "epoch": 62.675489067894134, "grad_norm": 1.2480740547180176, "learning_rate": 0.0007464902186421174, "loss": 0.3714, "step": 217860 }, { "epoch": 62.67836593785961, "grad_norm": 1.1123164892196655, "learning_rate": 0.0007464326812428078, "loss": 0.493, "step": 217870 }, { "epoch": 62.681242807825086, "grad_norm": 1.7824937105178833, "learning_rate": 0.0007463751438434983, "loss": 0.4211, "step": 217880 }, { "epoch": 62.68411967779056, "grad_norm": 1.61771821975708, "learning_rate": 0.0007463176064441887, "loss": 0.4317, "step": 217890 }, { "epoch": 62.68699654775604, "grad_norm": 0.8879011273384094, "learning_rate": 0.0007462600690448792, "loss": 0.4923, "step": 217900 }, { "epoch": 62.68987341772152, "grad_norm": 0.8245725035667419, "learning_rate": 0.0007462025316455696, "loss": 0.4479, "step": 217910 }, { "epoch": 62.692750287687, "grad_norm": 1.540171504020691, "learning_rate": 0.00074614499424626, "loss": 0.5545, "step": 217920 }, { "epoch": 62.695627157652474, "grad_norm": 0.6869266033172607, "learning_rate": 0.0007460874568469506, "loss": 0.4299, "step": 217930 }, { "epoch": 62.69850402761795, "grad_norm": 1.6262751817703247, "learning_rate": 0.000746029919447641, "loss": 0.4922, "step": 217940 }, { "epoch": 62.70138089758343, "grad_norm": 1.8718292713165283, "learning_rate": 0.0007459723820483314, "loss": 0.4081, "step": 217950 }, { "epoch": 62.70425776754891, "grad_norm": 0.9566819071769714, "learning_rate": 0.0007459148446490219, "loss": 0.4142, "step": 217960 }, { "epoch": 62.707134637514386, "grad_norm": 1.6560086011886597, "learning_rate": 0.0007458573072497124, "loss": 0.5419, "step": 217970 }, { "epoch": 62.71001150747986, "grad_norm": 1.1327548027038574, "learning_rate": 0.0007457997698504027, "loss": 0.6023, "step": 217980 }, { "epoch": 62.71288837744534, "grad_norm": 1.2290562391281128, "learning_rate": 0.0007457422324510932, "loss": 0.5726, "step": 217990 }, { "epoch": 62.715765247410815, "grad_norm": 1.883001446723938, "learning_rate": 0.0007456846950517837, "loss": 0.5717, "step": 218000 }, { "epoch": 62.7186421173763, "grad_norm": 1.0639350414276123, "learning_rate": 0.0007456271576524741, "loss": 0.551, "step": 218010 }, { "epoch": 62.721518987341774, "grad_norm": 1.3354649543762207, "learning_rate": 0.0007455696202531645, "loss": 0.671, "step": 218020 }, { "epoch": 62.72439585730725, "grad_norm": 1.5874168872833252, "learning_rate": 0.000745512082853855, "loss": 0.4436, "step": 218030 }, { "epoch": 62.72727272727273, "grad_norm": 1.7843530178070068, "learning_rate": 0.0007454545454545455, "loss": 0.5392, "step": 218040 }, { "epoch": 62.7301495972382, "grad_norm": 0.8488351106643677, "learning_rate": 0.0007453970080552359, "loss": 0.379, "step": 218050 }, { "epoch": 62.73302646720368, "grad_norm": 1.0673010349273682, "learning_rate": 0.0007453394706559264, "loss": 0.3461, "step": 218060 }, { "epoch": 62.73590333716916, "grad_norm": 1.9046028852462769, "learning_rate": 0.0007452819332566168, "loss": 0.5189, "step": 218070 }, { "epoch": 62.73878020713464, "grad_norm": 0.9867174029350281, "learning_rate": 0.0007452243958573073, "loss": 0.4917, "step": 218080 }, { "epoch": 62.741657077100115, "grad_norm": 0.9202607870101929, "learning_rate": 0.0007451668584579977, "loss": 0.4312, "step": 218090 }, { "epoch": 62.74453394706559, "grad_norm": 0.8005607724189758, "learning_rate": 0.0007451093210586881, "loss": 0.5377, "step": 218100 }, { "epoch": 62.74741081703107, "grad_norm": 0.9129936695098877, "learning_rate": 0.0007450517836593786, "loss": 0.5137, "step": 218110 }, { "epoch": 62.75028768699655, "grad_norm": 0.7691571116447449, "learning_rate": 0.0007449942462600691, "loss": 0.3432, "step": 218120 }, { "epoch": 62.75316455696203, "grad_norm": 1.0078115463256836, "learning_rate": 0.0007449367088607594, "loss": 0.5116, "step": 218130 }, { "epoch": 62.7560414269275, "grad_norm": 0.7265655398368835, "learning_rate": 0.00074487917146145, "loss": 0.4811, "step": 218140 }, { "epoch": 62.75891829689298, "grad_norm": 1.312984824180603, "learning_rate": 0.0007448216340621405, "loss": 0.6862, "step": 218150 }, { "epoch": 62.761795166858455, "grad_norm": 0.9490905404090881, "learning_rate": 0.0007447640966628308, "loss": 0.442, "step": 218160 }, { "epoch": 62.76467203682394, "grad_norm": 1.7098063230514526, "learning_rate": 0.0007447065592635213, "loss": 0.4619, "step": 218170 }, { "epoch": 62.767548906789415, "grad_norm": 1.9635628461837769, "learning_rate": 0.0007446490218642118, "loss": 0.5025, "step": 218180 }, { "epoch": 62.77042577675489, "grad_norm": 0.9439386129379272, "learning_rate": 0.0007445914844649022, "loss": 0.3931, "step": 218190 }, { "epoch": 62.77330264672037, "grad_norm": 1.8553410768508911, "learning_rate": 0.0007445339470655926, "loss": 0.5039, "step": 218200 }, { "epoch": 62.77617951668584, "grad_norm": 1.4565746784210205, "learning_rate": 0.0007444764096662831, "loss": 0.5189, "step": 218210 }, { "epoch": 62.77905638665133, "grad_norm": 1.454020619392395, "learning_rate": 0.0007444188722669735, "loss": 0.3919, "step": 218220 }, { "epoch": 62.7819332566168, "grad_norm": 2.4308762550354004, "learning_rate": 0.000744361334867664, "loss": 0.5147, "step": 218230 }, { "epoch": 62.78481012658228, "grad_norm": 1.261271595954895, "learning_rate": 0.0007443037974683545, "loss": 0.4725, "step": 218240 }, { "epoch": 62.787686996547755, "grad_norm": 1.7163519859313965, "learning_rate": 0.0007442462600690449, "loss": 0.4766, "step": 218250 }, { "epoch": 62.79056386651323, "grad_norm": 0.7816778421401978, "learning_rate": 0.0007441887226697354, "loss": 0.4787, "step": 218260 }, { "epoch": 62.79344073647871, "grad_norm": 1.796930193901062, "learning_rate": 0.0007441311852704258, "loss": 0.4451, "step": 218270 }, { "epoch": 62.79631760644419, "grad_norm": 1.1119951009750366, "learning_rate": 0.0007440736478711162, "loss": 0.4799, "step": 218280 }, { "epoch": 62.79919447640967, "grad_norm": 1.6829795837402344, "learning_rate": 0.0007440161104718067, "loss": 0.4679, "step": 218290 }, { "epoch": 62.80207134637514, "grad_norm": 1.3514612913131714, "learning_rate": 0.0007439585730724972, "loss": 0.563, "step": 218300 }, { "epoch": 62.80494821634062, "grad_norm": 1.8010605573654175, "learning_rate": 0.0007439010356731875, "loss": 0.5294, "step": 218310 }, { "epoch": 62.807825086306096, "grad_norm": 1.4541170597076416, "learning_rate": 0.000743843498273878, "loss": 0.4384, "step": 218320 }, { "epoch": 62.81070195627158, "grad_norm": 0.8948183059692383, "learning_rate": 0.0007437859608745686, "loss": 0.4178, "step": 218330 }, { "epoch": 62.813578826237055, "grad_norm": 1.6487430334091187, "learning_rate": 0.0007437284234752589, "loss": 0.4218, "step": 218340 }, { "epoch": 62.81645569620253, "grad_norm": 2.9063186645507812, "learning_rate": 0.0007436708860759494, "loss": 0.5239, "step": 218350 }, { "epoch": 62.81933256616801, "grad_norm": 1.6664918661117554, "learning_rate": 0.0007436133486766399, "loss": 0.4946, "step": 218360 }, { "epoch": 62.822209436133484, "grad_norm": 1.7748161554336548, "learning_rate": 0.0007435558112773303, "loss": 0.4966, "step": 218370 }, { "epoch": 62.82508630609897, "grad_norm": 0.9183078408241272, "learning_rate": 0.0007434982738780207, "loss": 0.3738, "step": 218380 }, { "epoch": 62.82796317606444, "grad_norm": 1.0750962495803833, "learning_rate": 0.0007434407364787112, "loss": 0.4594, "step": 218390 }, { "epoch": 62.83084004602992, "grad_norm": 1.6019837856292725, "learning_rate": 0.0007433831990794016, "loss": 0.5655, "step": 218400 }, { "epoch": 62.833716915995396, "grad_norm": 1.3188916444778442, "learning_rate": 0.0007433256616800921, "loss": 0.4825, "step": 218410 }, { "epoch": 62.83659378596087, "grad_norm": 0.8144833445549011, "learning_rate": 0.0007432681242807826, "loss": 0.505, "step": 218420 }, { "epoch": 62.839470655926355, "grad_norm": 1.4989445209503174, "learning_rate": 0.000743210586881473, "loss": 0.4367, "step": 218430 }, { "epoch": 62.84234752589183, "grad_norm": 2.094569444656372, "learning_rate": 0.0007431530494821635, "loss": 0.4573, "step": 218440 }, { "epoch": 62.84522439585731, "grad_norm": 1.7131332159042358, "learning_rate": 0.0007430955120828539, "loss": 0.437, "step": 218450 }, { "epoch": 62.848101265822784, "grad_norm": 1.1751538515090942, "learning_rate": 0.0007430379746835443, "loss": 0.4019, "step": 218460 }, { "epoch": 62.85097813578826, "grad_norm": 1.39219331741333, "learning_rate": 0.0007429804372842348, "loss": 0.4707, "step": 218470 }, { "epoch": 62.85385500575374, "grad_norm": 0.6995024085044861, "learning_rate": 0.0007429228998849253, "loss": 0.5356, "step": 218480 }, { "epoch": 62.85673187571922, "grad_norm": 1.2867618799209595, "learning_rate": 0.0007428653624856156, "loss": 0.498, "step": 218490 }, { "epoch": 62.859608745684696, "grad_norm": 1.1297703981399536, "learning_rate": 0.0007428078250863061, "loss": 0.4657, "step": 218500 }, { "epoch": 62.86248561565017, "grad_norm": 1.3823400735855103, "learning_rate": 0.0007427502876869965, "loss": 0.4215, "step": 218510 }, { "epoch": 62.86536248561565, "grad_norm": 1.2062102556228638, "learning_rate": 0.000742692750287687, "loss": 0.5532, "step": 218520 }, { "epoch": 62.868239355581125, "grad_norm": 0.73358154296875, "learning_rate": 0.0007426352128883775, "loss": 0.4398, "step": 218530 }, { "epoch": 62.87111622554661, "grad_norm": 1.7734735012054443, "learning_rate": 0.0007425776754890679, "loss": 0.5005, "step": 218540 }, { "epoch": 62.873993095512084, "grad_norm": 1.1894651651382446, "learning_rate": 0.0007425201380897584, "loss": 0.6041, "step": 218550 }, { "epoch": 62.87686996547756, "grad_norm": 1.0878161191940308, "learning_rate": 0.0007424626006904488, "loss": 0.466, "step": 218560 }, { "epoch": 62.879746835443036, "grad_norm": 1.530568242073059, "learning_rate": 0.0007424050632911392, "loss": 0.4556, "step": 218570 }, { "epoch": 62.88262370540851, "grad_norm": 1.7232226133346558, "learning_rate": 0.0007423475258918297, "loss": 0.5277, "step": 218580 }, { "epoch": 62.885500575373996, "grad_norm": 1.1831179857254028, "learning_rate": 0.0007422899884925202, "loss": 0.5247, "step": 218590 }, { "epoch": 62.88837744533947, "grad_norm": 1.4307860136032104, "learning_rate": 0.0007422324510932105, "loss": 0.3744, "step": 218600 }, { "epoch": 62.89125431530495, "grad_norm": 0.7040619850158691, "learning_rate": 0.000742174913693901, "loss": 0.5547, "step": 218610 }, { "epoch": 62.894131185270425, "grad_norm": 1.096174716949463, "learning_rate": 0.0007421173762945916, "loss": 0.4139, "step": 218620 }, { "epoch": 62.8970080552359, "grad_norm": 1.4786497354507446, "learning_rate": 0.0007420598388952819, "loss": 0.5384, "step": 218630 }, { "epoch": 62.899884925201384, "grad_norm": 1.6851625442504883, "learning_rate": 0.0007420023014959724, "loss": 0.5029, "step": 218640 }, { "epoch": 62.90276179516686, "grad_norm": 1.3492517471313477, "learning_rate": 0.0007419447640966629, "loss": 0.4351, "step": 218650 }, { "epoch": 62.90563866513234, "grad_norm": 0.8082217574119568, "learning_rate": 0.0007418872266973533, "loss": 0.5474, "step": 218660 }, { "epoch": 62.90851553509781, "grad_norm": 0.7608814835548401, "learning_rate": 0.0007418296892980437, "loss": 0.4381, "step": 218670 }, { "epoch": 62.91139240506329, "grad_norm": 1.9768640995025635, "learning_rate": 0.0007417721518987342, "loss": 0.5574, "step": 218680 }, { "epoch": 62.91426927502877, "grad_norm": 1.8088940382003784, "learning_rate": 0.0007417146144994246, "loss": 0.3723, "step": 218690 }, { "epoch": 62.91714614499425, "grad_norm": 0.8524466753005981, "learning_rate": 0.0007416570771001151, "loss": 0.4441, "step": 218700 }, { "epoch": 62.920023014959725, "grad_norm": 0.7229787111282349, "learning_rate": 0.0007415995397008055, "loss": 0.5284, "step": 218710 }, { "epoch": 62.9228998849252, "grad_norm": 1.1327672004699707, "learning_rate": 0.0007415420023014959, "loss": 0.4715, "step": 218720 }, { "epoch": 62.92577675489068, "grad_norm": 1.5660767555236816, "learning_rate": 0.0007414844649021865, "loss": 0.4559, "step": 218730 }, { "epoch": 62.92865362485615, "grad_norm": 1.0374642610549927, "learning_rate": 0.0007414269275028769, "loss": 0.5322, "step": 218740 }, { "epoch": 62.93153049482164, "grad_norm": 0.7212415933609009, "learning_rate": 0.0007413693901035673, "loss": 0.5277, "step": 218750 }, { "epoch": 62.93440736478711, "grad_norm": 1.1643017530441284, "learning_rate": 0.0007413118527042578, "loss": 0.3815, "step": 218760 }, { "epoch": 62.93728423475259, "grad_norm": 1.2199045419692993, "learning_rate": 0.0007412543153049483, "loss": 0.5087, "step": 218770 }, { "epoch": 62.940161104718065, "grad_norm": 1.6414768695831299, "learning_rate": 0.0007411967779056386, "loss": 0.5703, "step": 218780 }, { "epoch": 62.94303797468354, "grad_norm": 1.1018587350845337, "learning_rate": 0.0007411392405063291, "loss": 0.5276, "step": 218790 }, { "epoch": 62.945914844649025, "grad_norm": 1.3705086708068848, "learning_rate": 0.0007410817031070196, "loss": 0.6946, "step": 218800 }, { "epoch": 62.9487917146145, "grad_norm": 1.8585954904556274, "learning_rate": 0.00074102416570771, "loss": 0.514, "step": 218810 }, { "epoch": 62.95166858457998, "grad_norm": 2.063624620437622, "learning_rate": 0.0007409666283084004, "loss": 0.4378, "step": 218820 }, { "epoch": 62.95454545454545, "grad_norm": 1.363722801208496, "learning_rate": 0.000740909090909091, "loss": 0.4434, "step": 218830 }, { "epoch": 62.95742232451093, "grad_norm": 1.5528250932693481, "learning_rate": 0.0007408515535097814, "loss": 0.4471, "step": 218840 }, { "epoch": 62.96029919447641, "grad_norm": 1.5701593160629272, "learning_rate": 0.0007407940161104718, "loss": 0.473, "step": 218850 }, { "epoch": 62.96317606444189, "grad_norm": 2.0547189712524414, "learning_rate": 0.0007407364787111623, "loss": 0.5807, "step": 218860 }, { "epoch": 62.966052934407365, "grad_norm": 1.4659465551376343, "learning_rate": 0.0007406789413118527, "loss": 0.4038, "step": 218870 }, { "epoch": 62.96892980437284, "grad_norm": 1.5637606382369995, "learning_rate": 0.0007406214039125432, "loss": 0.4404, "step": 218880 }, { "epoch": 62.97180667433832, "grad_norm": 1.633835792541504, "learning_rate": 0.0007405638665132336, "loss": 0.4959, "step": 218890 }, { "epoch": 62.9746835443038, "grad_norm": 1.2271429300308228, "learning_rate": 0.000740506329113924, "loss": 0.5777, "step": 218900 }, { "epoch": 62.97756041426928, "grad_norm": 1.2389808893203735, "learning_rate": 0.0007404487917146145, "loss": 0.5524, "step": 218910 }, { "epoch": 62.98043728423475, "grad_norm": 1.8419944047927856, "learning_rate": 0.000740391254315305, "loss": 0.4718, "step": 218920 }, { "epoch": 62.98331415420023, "grad_norm": 1.236296534538269, "learning_rate": 0.0007403337169159953, "loss": 0.4352, "step": 218930 }, { "epoch": 62.986191024165706, "grad_norm": 2.10019588470459, "learning_rate": 0.0007402761795166859, "loss": 0.5744, "step": 218940 }, { "epoch": 62.98906789413118, "grad_norm": 0.8893066644668579, "learning_rate": 0.0007402186421173764, "loss": 0.5042, "step": 218950 }, { "epoch": 62.991944764096665, "grad_norm": 0.8886787295341492, "learning_rate": 0.0007401611047180667, "loss": 0.5354, "step": 218960 }, { "epoch": 62.99482163406214, "grad_norm": 2.2591488361358643, "learning_rate": 0.0007401035673187572, "loss": 0.5538, "step": 218970 }, { "epoch": 62.99769850402762, "grad_norm": 2.102736473083496, "learning_rate": 0.0007400460299194477, "loss": 0.4459, "step": 218980 }, { "epoch": 63.000575373993094, "grad_norm": 1.0781880617141724, "learning_rate": 0.0007399884925201381, "loss": 0.463, "step": 218990 }, { "epoch": 63.00345224395857, "grad_norm": 0.9400960803031921, "learning_rate": 0.0007399309551208285, "loss": 0.455, "step": 219000 }, { "epoch": 63.00632911392405, "grad_norm": 1.2675851583480835, "learning_rate": 0.000739873417721519, "loss": 0.4265, "step": 219010 }, { "epoch": 63.00920598388953, "grad_norm": 1.1189097166061401, "learning_rate": 0.0007398158803222094, "loss": 0.4984, "step": 219020 }, { "epoch": 63.012082853855006, "grad_norm": 1.3356267213821411, "learning_rate": 0.0007397583429228999, "loss": 0.43, "step": 219030 }, { "epoch": 63.01495972382048, "grad_norm": 1.0870188474655151, "learning_rate": 0.0007397008055235904, "loss": 0.4224, "step": 219040 }, { "epoch": 63.01783659378596, "grad_norm": 0.9003406763076782, "learning_rate": 0.0007396432681242808, "loss": 0.357, "step": 219050 }, { "epoch": 63.02071346375144, "grad_norm": 0.7884219288825989, "learning_rate": 0.0007395857307249713, "loss": 0.4176, "step": 219060 }, { "epoch": 63.02359033371692, "grad_norm": 1.0028396844863892, "learning_rate": 0.0007395281933256617, "loss": 0.4444, "step": 219070 }, { "epoch": 63.026467203682394, "grad_norm": 1.1897192001342773, "learning_rate": 0.0007394706559263521, "loss": 0.4238, "step": 219080 }, { "epoch": 63.02934407364787, "grad_norm": 0.7744433283805847, "learning_rate": 0.0007394131185270426, "loss": 0.4316, "step": 219090 }, { "epoch": 63.032220943613346, "grad_norm": 0.8769915699958801, "learning_rate": 0.0007393555811277331, "loss": 0.4721, "step": 219100 }, { "epoch": 63.03509781357883, "grad_norm": 1.583653450012207, "learning_rate": 0.0007392980437284234, "loss": 0.4371, "step": 219110 }, { "epoch": 63.037974683544306, "grad_norm": 1.142533302307129, "learning_rate": 0.000739240506329114, "loss": 0.3747, "step": 219120 }, { "epoch": 63.04085155350978, "grad_norm": 1.0045334100723267, "learning_rate": 0.0007391829689298045, "loss": 0.4353, "step": 219130 }, { "epoch": 63.04372842347526, "grad_norm": 1.034791350364685, "learning_rate": 0.0007391254315304948, "loss": 0.4724, "step": 219140 }, { "epoch": 63.046605293440734, "grad_norm": 1.5389622449874878, "learning_rate": 0.0007390678941311853, "loss": 0.4144, "step": 219150 }, { "epoch": 63.04948216340621, "grad_norm": 1.053391456604004, "learning_rate": 0.0007390103567318758, "loss": 0.3247, "step": 219160 }, { "epoch": 63.052359033371694, "grad_norm": 1.3185153007507324, "learning_rate": 0.0007389528193325662, "loss": 0.472, "step": 219170 }, { "epoch": 63.05523590333717, "grad_norm": 0.9127339720726013, "learning_rate": 0.0007388952819332566, "loss": 0.4591, "step": 219180 }, { "epoch": 63.058112773302646, "grad_norm": 1.9918007850646973, "learning_rate": 0.0007388377445339471, "loss": 0.4404, "step": 219190 }, { "epoch": 63.06098964326812, "grad_norm": 1.4428271055221558, "learning_rate": 0.0007387802071346375, "loss": 0.4522, "step": 219200 }, { "epoch": 63.0638665132336, "grad_norm": 1.3372937440872192, "learning_rate": 0.000738722669735328, "loss": 0.3765, "step": 219210 }, { "epoch": 63.06674338319908, "grad_norm": 0.5186936259269714, "learning_rate": 0.0007386651323360184, "loss": 0.4968, "step": 219220 }, { "epoch": 63.06962025316456, "grad_norm": 2.505887269973755, "learning_rate": 0.0007386075949367089, "loss": 0.5968, "step": 219230 }, { "epoch": 63.072497123130034, "grad_norm": 0.6078690886497498, "learning_rate": 0.0007385500575373994, "loss": 0.4478, "step": 219240 }, { "epoch": 63.07537399309551, "grad_norm": 1.0387234687805176, "learning_rate": 0.0007384925201380898, "loss": 0.3952, "step": 219250 }, { "epoch": 63.07825086306099, "grad_norm": 1.1144182682037354, "learning_rate": 0.0007384349827387802, "loss": 0.3683, "step": 219260 }, { "epoch": 63.08112773302647, "grad_norm": 0.7201749086380005, "learning_rate": 0.0007383774453394707, "loss": 0.3572, "step": 219270 }, { "epoch": 63.084004602991946, "grad_norm": 1.3490241765975952, "learning_rate": 0.0007383199079401612, "loss": 0.4475, "step": 219280 }, { "epoch": 63.08688147295742, "grad_norm": 1.6207610368728638, "learning_rate": 0.0007382623705408515, "loss": 0.359, "step": 219290 }, { "epoch": 63.0897583429229, "grad_norm": 1.139967679977417, "learning_rate": 0.000738204833141542, "loss": 0.3585, "step": 219300 }, { "epoch": 63.092635212888375, "grad_norm": 1.1244367361068726, "learning_rate": 0.0007381472957422325, "loss": 0.4757, "step": 219310 }, { "epoch": 63.09551208285386, "grad_norm": 1.0024447441101074, "learning_rate": 0.0007380897583429229, "loss": 0.4168, "step": 219320 }, { "epoch": 63.098388952819334, "grad_norm": 2.8847029209136963, "learning_rate": 0.0007380322209436134, "loss": 0.3966, "step": 219330 }, { "epoch": 63.10126582278481, "grad_norm": 1.3449891805648804, "learning_rate": 0.0007379746835443038, "loss": 0.5116, "step": 219340 }, { "epoch": 63.10414269275029, "grad_norm": 1.1351970434188843, "learning_rate": 0.0007379171461449943, "loss": 0.3967, "step": 219350 }, { "epoch": 63.10701956271576, "grad_norm": 2.6427228450775146, "learning_rate": 0.0007378596087456847, "loss": 0.4698, "step": 219360 }, { "epoch": 63.10989643268124, "grad_norm": 0.73717200756073, "learning_rate": 0.0007378020713463751, "loss": 0.423, "step": 219370 }, { "epoch": 63.11277330264672, "grad_norm": 1.35358726978302, "learning_rate": 0.0007377445339470656, "loss": 0.4722, "step": 219380 }, { "epoch": 63.1156501726122, "grad_norm": 0.7023805975914001, "learning_rate": 0.0007376869965477561, "loss": 0.4492, "step": 219390 }, { "epoch": 63.118527042577675, "grad_norm": 1.7447340488433838, "learning_rate": 0.0007376294591484464, "loss": 0.5866, "step": 219400 }, { "epoch": 63.12140391254315, "grad_norm": 0.9571616649627686, "learning_rate": 0.000737571921749137, "loss": 0.4254, "step": 219410 }, { "epoch": 63.12428078250863, "grad_norm": 0.9636262059211731, "learning_rate": 0.0007375143843498275, "loss": 0.417, "step": 219420 }, { "epoch": 63.12715765247411, "grad_norm": 2.2210357189178467, "learning_rate": 0.0007374568469505178, "loss": 0.592, "step": 219430 }, { "epoch": 63.13003452243959, "grad_norm": 1.195056438446045, "learning_rate": 0.0007373993095512083, "loss": 0.419, "step": 219440 }, { "epoch": 63.13291139240506, "grad_norm": 2.1492161750793457, "learning_rate": 0.0007373417721518988, "loss": 0.4424, "step": 219450 }, { "epoch": 63.13578826237054, "grad_norm": 1.9242767095565796, "learning_rate": 0.0007372842347525892, "loss": 0.4388, "step": 219460 }, { "epoch": 63.138665132336016, "grad_norm": 1.7904585599899292, "learning_rate": 0.0007372266973532796, "loss": 0.5744, "step": 219470 }, { "epoch": 63.1415420023015, "grad_norm": 1.0674099922180176, "learning_rate": 0.0007371691599539701, "loss": 0.3946, "step": 219480 }, { "epoch": 63.144418872266975, "grad_norm": 0.6654331088066101, "learning_rate": 0.0007371116225546605, "loss": 0.389, "step": 219490 }, { "epoch": 63.14729574223245, "grad_norm": 0.9786354303359985, "learning_rate": 0.000737054085155351, "loss": 0.4459, "step": 219500 }, { "epoch": 63.15017261219793, "grad_norm": 1.6535823345184326, "learning_rate": 0.0007369965477560414, "loss": 0.3974, "step": 219510 }, { "epoch": 63.153049482163404, "grad_norm": 0.9277152419090271, "learning_rate": 0.0007369390103567319, "loss": 0.4607, "step": 219520 }, { "epoch": 63.15592635212889, "grad_norm": 1.8107858896255493, "learning_rate": 0.0007368814729574224, "loss": 0.3259, "step": 219530 }, { "epoch": 63.15880322209436, "grad_norm": 1.3255592584609985, "learning_rate": 0.0007368239355581128, "loss": 0.4786, "step": 219540 }, { "epoch": 63.16168009205984, "grad_norm": 0.7572393417358398, "learning_rate": 0.0007367663981588032, "loss": 0.3655, "step": 219550 }, { "epoch": 63.164556962025316, "grad_norm": 1.0368573665618896, "learning_rate": 0.0007367088607594937, "loss": 0.586, "step": 219560 }, { "epoch": 63.16743383199079, "grad_norm": 1.0350258350372314, "learning_rate": 0.0007366513233601842, "loss": 0.4054, "step": 219570 }, { "epoch": 63.170310701956275, "grad_norm": 1.912381649017334, "learning_rate": 0.0007365937859608745, "loss": 0.4511, "step": 219580 }, { "epoch": 63.17318757192175, "grad_norm": 1.3908406496047974, "learning_rate": 0.000736536248561565, "loss": 0.3965, "step": 219590 }, { "epoch": 63.17606444188723, "grad_norm": 1.0464084148406982, "learning_rate": 0.0007364787111622555, "loss": 0.4371, "step": 219600 }, { "epoch": 63.178941311852704, "grad_norm": 1.1288318634033203, "learning_rate": 0.0007364211737629459, "loss": 0.4033, "step": 219610 }, { "epoch": 63.18181818181818, "grad_norm": 0.770293653011322, "learning_rate": 0.0007363636363636363, "loss": 0.4343, "step": 219620 }, { "epoch": 63.184695051783656, "grad_norm": 0.8521804213523865, "learning_rate": 0.0007363060989643269, "loss": 0.3472, "step": 219630 }, { "epoch": 63.18757192174914, "grad_norm": 2.044673204421997, "learning_rate": 0.0007362485615650173, "loss": 0.4459, "step": 219640 }, { "epoch": 63.190448791714616, "grad_norm": 1.7671493291854858, "learning_rate": 0.0007361910241657077, "loss": 0.4158, "step": 219650 }, { "epoch": 63.19332566168009, "grad_norm": 1.1697185039520264, "learning_rate": 0.0007361334867663982, "loss": 0.3725, "step": 219660 }, { "epoch": 63.19620253164557, "grad_norm": 1.5433367490768433, "learning_rate": 0.0007360759493670886, "loss": 0.5057, "step": 219670 }, { "epoch": 63.199079401611044, "grad_norm": 1.2592835426330566, "learning_rate": 0.0007360184119677791, "loss": 0.4623, "step": 219680 }, { "epoch": 63.20195627157653, "grad_norm": 0.6870574951171875, "learning_rate": 0.0007359608745684695, "loss": 0.4347, "step": 219690 }, { "epoch": 63.204833141542004, "grad_norm": 1.4396569728851318, "learning_rate": 0.0007359033371691599, "loss": 0.4502, "step": 219700 }, { "epoch": 63.20771001150748, "grad_norm": 0.8057601451873779, "learning_rate": 0.0007358457997698504, "loss": 0.4776, "step": 219710 }, { "epoch": 63.210586881472956, "grad_norm": 1.8630341291427612, "learning_rate": 0.0007357882623705409, "loss": 0.4099, "step": 219720 }, { "epoch": 63.21346375143843, "grad_norm": 0.8844138383865356, "learning_rate": 0.0007357307249712312, "loss": 0.4007, "step": 219730 }, { "epoch": 63.216340621403916, "grad_norm": 1.5362248420715332, "learning_rate": 0.0007356731875719218, "loss": 0.4325, "step": 219740 }, { "epoch": 63.21921749136939, "grad_norm": 0.8683405518531799, "learning_rate": 0.0007356156501726123, "loss": 0.4317, "step": 219750 }, { "epoch": 63.22209436133487, "grad_norm": 1.071824073791504, "learning_rate": 0.0007355581127733026, "loss": 0.4898, "step": 219760 }, { "epoch": 63.224971231300344, "grad_norm": 0.8759118318557739, "learning_rate": 0.0007355005753739931, "loss": 0.4145, "step": 219770 }, { "epoch": 63.22784810126582, "grad_norm": 0.5424116849899292, "learning_rate": 0.0007354430379746836, "loss": 0.4466, "step": 219780 }, { "epoch": 63.230724971231304, "grad_norm": 1.1303088665008545, "learning_rate": 0.000735385500575374, "loss": 0.448, "step": 219790 }, { "epoch": 63.23360184119678, "grad_norm": 0.8041556477546692, "learning_rate": 0.0007353279631760644, "loss": 0.4064, "step": 219800 }, { "epoch": 63.236478711162256, "grad_norm": 1.483556866645813, "learning_rate": 0.000735270425776755, "loss": 0.4941, "step": 219810 }, { "epoch": 63.23935558112773, "grad_norm": 1.1138588190078735, "learning_rate": 0.0007352128883774453, "loss": 0.4759, "step": 219820 }, { "epoch": 63.24223245109321, "grad_norm": 1.3504791259765625, "learning_rate": 0.0007351553509781358, "loss": 0.461, "step": 219830 }, { "epoch": 63.245109321058685, "grad_norm": 1.383526086807251, "learning_rate": 0.0007350978135788263, "loss": 0.4287, "step": 219840 }, { "epoch": 63.24798619102417, "grad_norm": 2.5166776180267334, "learning_rate": 0.0007350402761795167, "loss": 0.4611, "step": 219850 }, { "epoch": 63.250863060989644, "grad_norm": 0.7526422739028931, "learning_rate": 0.0007349827387802072, "loss": 0.477, "step": 219860 }, { "epoch": 63.25373993095512, "grad_norm": 0.6361796855926514, "learning_rate": 0.0007349252013808976, "loss": 0.4877, "step": 219870 }, { "epoch": 63.2566168009206, "grad_norm": 1.6409941911697388, "learning_rate": 0.000734867663981588, "loss": 0.5104, "step": 219880 }, { "epoch": 63.25949367088607, "grad_norm": 0.9195199012756348, "learning_rate": 0.0007348101265822785, "loss": 0.4472, "step": 219890 }, { "epoch": 63.262370540851556, "grad_norm": 0.9246510863304138, "learning_rate": 0.000734752589182969, "loss": 0.408, "step": 219900 }, { "epoch": 63.26524741081703, "grad_norm": 1.0113846063613892, "learning_rate": 0.0007346950517836593, "loss": 0.547, "step": 219910 }, { "epoch": 63.26812428078251, "grad_norm": 1.527318000793457, "learning_rate": 0.0007346375143843499, "loss": 0.3805, "step": 219920 }, { "epoch": 63.271001150747985, "grad_norm": 1.2426358461380005, "learning_rate": 0.0007345799769850404, "loss": 0.4165, "step": 219930 }, { "epoch": 63.27387802071346, "grad_norm": 1.781145453453064, "learning_rate": 0.0007345224395857307, "loss": 0.4951, "step": 219940 }, { "epoch": 63.276754890678944, "grad_norm": 1.5157785415649414, "learning_rate": 0.0007344649021864212, "loss": 0.4933, "step": 219950 }, { "epoch": 63.27963176064442, "grad_norm": 1.054664969444275, "learning_rate": 0.0007344073647871117, "loss": 0.4442, "step": 219960 }, { "epoch": 63.2825086306099, "grad_norm": 1.1464166641235352, "learning_rate": 0.0007343498273878021, "loss": 0.4666, "step": 219970 }, { "epoch": 63.28538550057537, "grad_norm": 1.7947734594345093, "learning_rate": 0.0007342922899884925, "loss": 0.4877, "step": 219980 }, { "epoch": 63.28826237054085, "grad_norm": 2.404893159866333, "learning_rate": 0.000734234752589183, "loss": 0.5222, "step": 219990 }, { "epoch": 63.29113924050633, "grad_norm": 1.0911976099014282, "learning_rate": 0.0007341772151898734, "loss": 0.4437, "step": 220000 }, { "epoch": 63.29401611047181, "grad_norm": 0.8735302686691284, "learning_rate": 0.0007341196777905639, "loss": 0.3983, "step": 220010 }, { "epoch": 63.296892980437285, "grad_norm": 2.198610782623291, "learning_rate": 0.0007340621403912543, "loss": 0.5065, "step": 220020 }, { "epoch": 63.29976985040276, "grad_norm": 1.7256207466125488, "learning_rate": 0.0007340046029919448, "loss": 0.5875, "step": 220030 }, { "epoch": 63.30264672036824, "grad_norm": 0.8722852468490601, "learning_rate": 0.0007339470655926353, "loss": 0.4, "step": 220040 }, { "epoch": 63.30552359033371, "grad_norm": 2.0075294971466064, "learning_rate": 0.0007338895281933257, "loss": 0.5638, "step": 220050 }, { "epoch": 63.3084004602992, "grad_norm": 1.0013127326965332, "learning_rate": 0.0007338319907940161, "loss": 0.4657, "step": 220060 }, { "epoch": 63.31127733026467, "grad_norm": 0.9049946069717407, "learning_rate": 0.0007337744533947066, "loss": 0.3442, "step": 220070 }, { "epoch": 63.31415420023015, "grad_norm": 1.8747974634170532, "learning_rate": 0.0007337169159953971, "loss": 0.5977, "step": 220080 }, { "epoch": 63.317031070195625, "grad_norm": 2.100106716156006, "learning_rate": 0.0007336593785960874, "loss": 0.5356, "step": 220090 }, { "epoch": 63.3199079401611, "grad_norm": 1.662901759147644, "learning_rate": 0.000733601841196778, "loss": 0.4882, "step": 220100 }, { "epoch": 63.322784810126585, "grad_norm": 1.1526801586151123, "learning_rate": 0.0007335443037974684, "loss": 0.4895, "step": 220110 }, { "epoch": 63.32566168009206, "grad_norm": 0.9103536605834961, "learning_rate": 0.0007334867663981588, "loss": 0.4195, "step": 220120 }, { "epoch": 63.32853855005754, "grad_norm": 2.412061929702759, "learning_rate": 0.0007334292289988492, "loss": 0.4689, "step": 220130 }, { "epoch": 63.33141542002301, "grad_norm": 0.9489082098007202, "learning_rate": 0.0007333716915995398, "loss": 0.376, "step": 220140 }, { "epoch": 63.33429228998849, "grad_norm": 1.1556835174560547, "learning_rate": 0.0007333141542002302, "loss": 0.584, "step": 220150 }, { "epoch": 63.33716915995397, "grad_norm": 1.108715295791626, "learning_rate": 0.0007332566168009206, "loss": 0.5806, "step": 220160 }, { "epoch": 63.34004602991945, "grad_norm": 1.0338739156723022, "learning_rate": 0.000733199079401611, "loss": 0.4023, "step": 220170 }, { "epoch": 63.342922899884925, "grad_norm": 1.706498384475708, "learning_rate": 0.0007331415420023015, "loss": 0.4886, "step": 220180 }, { "epoch": 63.3457997698504, "grad_norm": 1.6737207174301147, "learning_rate": 0.000733084004602992, "loss": 0.5014, "step": 220190 }, { "epoch": 63.34867663981588, "grad_norm": 1.2121394872665405, "learning_rate": 0.0007330264672036823, "loss": 0.3977, "step": 220200 }, { "epoch": 63.35155350978136, "grad_norm": 1.125291109085083, "learning_rate": 0.0007329689298043729, "loss": 0.5171, "step": 220210 }, { "epoch": 63.35443037974684, "grad_norm": 1.1181062459945679, "learning_rate": 0.0007329113924050633, "loss": 0.4247, "step": 220220 }, { "epoch": 63.35730724971231, "grad_norm": 1.1533317565917969, "learning_rate": 0.0007328538550057537, "loss": 0.4702, "step": 220230 }, { "epoch": 63.36018411967779, "grad_norm": 1.534015417098999, "learning_rate": 0.0007327963176064442, "loss": 0.4638, "step": 220240 }, { "epoch": 63.363060989643266, "grad_norm": 0.9348896741867065, "learning_rate": 0.0007327387802071347, "loss": 0.4093, "step": 220250 }, { "epoch": 63.36593785960875, "grad_norm": 1.185418725013733, "learning_rate": 0.0007326812428078251, "loss": 0.4042, "step": 220260 }, { "epoch": 63.368814729574225, "grad_norm": 0.980415940284729, "learning_rate": 0.0007326237054085155, "loss": 0.4228, "step": 220270 }, { "epoch": 63.3716915995397, "grad_norm": 1.1888384819030762, "learning_rate": 0.000732566168009206, "loss": 0.4652, "step": 220280 }, { "epoch": 63.37456846950518, "grad_norm": 2.057185649871826, "learning_rate": 0.0007325086306098964, "loss": 0.413, "step": 220290 }, { "epoch": 63.377445339470654, "grad_norm": 0.9060554504394531, "learning_rate": 0.0007324510932105869, "loss": 0.4362, "step": 220300 }, { "epoch": 63.38032220943613, "grad_norm": 1.1669530868530273, "learning_rate": 0.0007323935558112773, "loss": 0.4365, "step": 220310 }, { "epoch": 63.383199079401614, "grad_norm": 1.3909552097320557, "learning_rate": 0.0007323360184119678, "loss": 0.4843, "step": 220320 }, { "epoch": 63.38607594936709, "grad_norm": 0.9738704562187195, "learning_rate": 0.0007322784810126583, "loss": 0.4617, "step": 220330 }, { "epoch": 63.388952819332566, "grad_norm": 0.9294323921203613, "learning_rate": 0.0007322209436133487, "loss": 0.4616, "step": 220340 }, { "epoch": 63.39182968929804, "grad_norm": 1.1069622039794922, "learning_rate": 0.000732163406214039, "loss": 0.3744, "step": 220350 }, { "epoch": 63.39470655926352, "grad_norm": 1.67778742313385, "learning_rate": 0.0007321058688147296, "loss": 0.5053, "step": 220360 }, { "epoch": 63.397583429229, "grad_norm": 0.7245625853538513, "learning_rate": 0.0007320483314154201, "loss": 0.5088, "step": 220370 }, { "epoch": 63.40046029919448, "grad_norm": 1.6254807710647583, "learning_rate": 0.0007319907940161104, "loss": 0.5001, "step": 220380 }, { "epoch": 63.403337169159954, "grad_norm": 1.4566922187805176, "learning_rate": 0.000731933256616801, "loss": 0.4626, "step": 220390 }, { "epoch": 63.40621403912543, "grad_norm": 0.8629955649375916, "learning_rate": 0.0007318757192174914, "loss": 0.3924, "step": 220400 }, { "epoch": 63.40909090909091, "grad_norm": 0.9907341003417969, "learning_rate": 0.0007318181818181818, "loss": 0.4836, "step": 220410 }, { "epoch": 63.41196777905639, "grad_norm": 0.9301738142967224, "learning_rate": 0.0007317606444188722, "loss": 0.4352, "step": 220420 }, { "epoch": 63.414844649021866, "grad_norm": 1.2613780498504639, "learning_rate": 0.0007317031070195628, "loss": 0.4795, "step": 220430 }, { "epoch": 63.41772151898734, "grad_norm": 1.8915824890136719, "learning_rate": 0.0007316455696202532, "loss": 0.4132, "step": 220440 }, { "epoch": 63.42059838895282, "grad_norm": 1.47463059425354, "learning_rate": 0.0007315880322209436, "loss": 0.491, "step": 220450 }, { "epoch": 63.423475258918295, "grad_norm": 1.6778150796890259, "learning_rate": 0.0007315304948216341, "loss": 0.4774, "step": 220460 }, { "epoch": 63.42635212888378, "grad_norm": 1.0464969873428345, "learning_rate": 0.0007314729574223245, "loss": 0.4216, "step": 220470 }, { "epoch": 63.429228998849254, "grad_norm": 1.2512290477752686, "learning_rate": 0.000731415420023015, "loss": 0.4531, "step": 220480 }, { "epoch": 63.43210586881473, "grad_norm": 1.812475562095642, "learning_rate": 0.0007313578826237054, "loss": 0.5036, "step": 220490 }, { "epoch": 63.43498273878021, "grad_norm": 1.335176706314087, "learning_rate": 0.0007313003452243959, "loss": 0.5183, "step": 220500 }, { "epoch": 63.43785960874568, "grad_norm": 1.0139943361282349, "learning_rate": 0.0007312428078250863, "loss": 0.4366, "step": 220510 }, { "epoch": 63.44073647871116, "grad_norm": 1.4325467348098755, "learning_rate": 0.0007311852704257768, "loss": 0.5011, "step": 220520 }, { "epoch": 63.44361334867664, "grad_norm": 1.5108085870742798, "learning_rate": 0.0007311277330264671, "loss": 0.3856, "step": 220530 }, { "epoch": 63.44649021864212, "grad_norm": 0.7229419350624084, "learning_rate": 0.0007310701956271577, "loss": 0.5011, "step": 220540 }, { "epoch": 63.449367088607595, "grad_norm": 2.7780730724334717, "learning_rate": 0.0007310126582278482, "loss": 0.4566, "step": 220550 }, { "epoch": 63.45224395857307, "grad_norm": 1.845961570739746, "learning_rate": 0.0007309551208285385, "loss": 0.4624, "step": 220560 }, { "epoch": 63.45512082853855, "grad_norm": 1.3094011545181274, "learning_rate": 0.000730897583429229, "loss": 0.5174, "step": 220570 }, { "epoch": 63.45799769850403, "grad_norm": 0.9969354271888733, "learning_rate": 0.0007308400460299195, "loss": 0.4375, "step": 220580 }, { "epoch": 63.46087456846951, "grad_norm": 1.3584550619125366, "learning_rate": 0.0007307825086306099, "loss": 0.4747, "step": 220590 }, { "epoch": 63.46375143843498, "grad_norm": 1.49089515209198, "learning_rate": 0.0007307249712313003, "loss": 0.4103, "step": 220600 }, { "epoch": 63.46662830840046, "grad_norm": 0.8201149106025696, "learning_rate": 0.0007306674338319909, "loss": 0.4229, "step": 220610 }, { "epoch": 63.469505178365935, "grad_norm": 1.0753847360610962, "learning_rate": 0.0007306098964326812, "loss": 0.4419, "step": 220620 }, { "epoch": 63.47238204833142, "grad_norm": 1.0819287300109863, "learning_rate": 0.0007305523590333717, "loss": 0.4463, "step": 220630 }, { "epoch": 63.475258918296895, "grad_norm": 1.1313735246658325, "learning_rate": 0.0007304948216340622, "loss": 0.4992, "step": 220640 }, { "epoch": 63.47813578826237, "grad_norm": 0.7681761980056763, "learning_rate": 0.0007304372842347526, "loss": 0.4464, "step": 220650 }, { "epoch": 63.48101265822785, "grad_norm": 0.765619158744812, "learning_rate": 0.0007303797468354431, "loss": 0.444, "step": 220660 }, { "epoch": 63.48388952819332, "grad_norm": 2.6789894104003906, "learning_rate": 0.0007303222094361335, "loss": 0.4137, "step": 220670 }, { "epoch": 63.48676639815881, "grad_norm": 0.6330404877662659, "learning_rate": 0.0007302646720368239, "loss": 0.5944, "step": 220680 }, { "epoch": 63.48964326812428, "grad_norm": 2.0812253952026367, "learning_rate": 0.0007302071346375144, "loss": 0.3599, "step": 220690 }, { "epoch": 63.49252013808976, "grad_norm": 1.8749085664749146, "learning_rate": 0.0007301495972382049, "loss": 0.4643, "step": 220700 }, { "epoch": 63.495397008055235, "grad_norm": 0.9649068117141724, "learning_rate": 0.0007300920598388952, "loss": 0.4431, "step": 220710 }, { "epoch": 63.49827387802071, "grad_norm": 1.4791165590286255, "learning_rate": 0.0007300345224395858, "loss": 0.4315, "step": 220720 }, { "epoch": 63.50115074798619, "grad_norm": 1.0186965465545654, "learning_rate": 0.0007299769850402763, "loss": 0.3772, "step": 220730 }, { "epoch": 63.50402761795167, "grad_norm": 1.8421599864959717, "learning_rate": 0.0007299194476409666, "loss": 0.5193, "step": 220740 }, { "epoch": 63.50690448791715, "grad_norm": 0.9800715446472168, "learning_rate": 0.0007298619102416571, "loss": 0.4673, "step": 220750 }, { "epoch": 63.50978135788262, "grad_norm": 1.3127367496490479, "learning_rate": 0.0007298043728423476, "loss": 0.49, "step": 220760 }, { "epoch": 63.5126582278481, "grad_norm": 1.0412989854812622, "learning_rate": 0.000729746835443038, "loss": 0.4246, "step": 220770 }, { "epoch": 63.515535097813576, "grad_norm": 1.9241595268249512, "learning_rate": 0.0007296892980437284, "loss": 0.4142, "step": 220780 }, { "epoch": 63.51841196777906, "grad_norm": 1.190667986869812, "learning_rate": 0.000729631760644419, "loss": 0.4979, "step": 220790 }, { "epoch": 63.521288837744535, "grad_norm": 1.1274278163909912, "learning_rate": 0.0007295742232451093, "loss": 0.4361, "step": 220800 }, { "epoch": 63.52416570771001, "grad_norm": 1.2339526414871216, "learning_rate": 0.0007295166858457998, "loss": 0.5049, "step": 220810 }, { "epoch": 63.52704257767549, "grad_norm": 0.6933699250221252, "learning_rate": 0.0007294591484464902, "loss": 0.5073, "step": 220820 }, { "epoch": 63.529919447640964, "grad_norm": 1.1439270973205566, "learning_rate": 0.0007294016110471807, "loss": 0.4987, "step": 220830 }, { "epoch": 63.53279631760645, "grad_norm": 1.6692978143692017, "learning_rate": 0.0007293440736478712, "loss": 0.5054, "step": 220840 }, { "epoch": 63.53567318757192, "grad_norm": 2.5744926929473877, "learning_rate": 0.0007292865362485616, "loss": 0.4832, "step": 220850 }, { "epoch": 63.5385500575374, "grad_norm": 0.9494609832763672, "learning_rate": 0.000729228998849252, "loss": 0.4095, "step": 220860 }, { "epoch": 63.541426927502876, "grad_norm": 1.4980835914611816, "learning_rate": 0.0007291714614499425, "loss": 0.5268, "step": 220870 }, { "epoch": 63.54430379746835, "grad_norm": 1.4421981573104858, "learning_rate": 0.000729113924050633, "loss": 0.5176, "step": 220880 }, { "epoch": 63.547180667433835, "grad_norm": 0.8563559055328369, "learning_rate": 0.0007290563866513233, "loss": 0.5823, "step": 220890 }, { "epoch": 63.55005753739931, "grad_norm": 1.3017126321792603, "learning_rate": 0.0007289988492520139, "loss": 0.4228, "step": 220900 }, { "epoch": 63.55293440736479, "grad_norm": 0.850942850112915, "learning_rate": 0.0007289413118527043, "loss": 0.5016, "step": 220910 }, { "epoch": 63.555811277330264, "grad_norm": 0.9659186005592346, "learning_rate": 0.0007288837744533947, "loss": 0.3636, "step": 220920 }, { "epoch": 63.55868814729574, "grad_norm": 1.048140048980713, "learning_rate": 0.0007288262370540851, "loss": 0.4045, "step": 220930 }, { "epoch": 63.561565017261216, "grad_norm": 2.1569530963897705, "learning_rate": 0.0007287686996547757, "loss": 0.5544, "step": 220940 }, { "epoch": 63.5644418872267, "grad_norm": 1.2617297172546387, "learning_rate": 0.0007287111622554661, "loss": 0.397, "step": 220950 }, { "epoch": 63.567318757192176, "grad_norm": 0.8183659911155701, "learning_rate": 0.0007286536248561565, "loss": 0.3885, "step": 220960 }, { "epoch": 63.57019562715765, "grad_norm": 2.11028790473938, "learning_rate": 0.0007285960874568469, "loss": 0.555, "step": 220970 }, { "epoch": 63.57307249712313, "grad_norm": 1.0303078889846802, "learning_rate": 0.0007285385500575374, "loss": 0.4164, "step": 220980 }, { "epoch": 63.575949367088604, "grad_norm": 1.0696874856948853, "learning_rate": 0.0007284810126582279, "loss": 0.5284, "step": 220990 }, { "epoch": 63.57882623705409, "grad_norm": 1.3233128786087036, "learning_rate": 0.0007284234752589182, "loss": 0.402, "step": 221000 }, { "epoch": 63.581703107019564, "grad_norm": 1.6443556547164917, "learning_rate": 0.0007283659378596088, "loss": 0.4114, "step": 221010 }, { "epoch": 63.58457997698504, "grad_norm": 0.8728976249694824, "learning_rate": 0.0007283084004602992, "loss": 0.4898, "step": 221020 }, { "epoch": 63.587456846950516, "grad_norm": 1.313612461090088, "learning_rate": 0.0007282508630609896, "loss": 0.4957, "step": 221030 }, { "epoch": 63.59033371691599, "grad_norm": 1.069703221321106, "learning_rate": 0.00072819332566168, "loss": 0.3823, "step": 221040 }, { "epoch": 63.593210586881476, "grad_norm": 1.6825244426727295, "learning_rate": 0.0007281357882623706, "loss": 0.4472, "step": 221050 }, { "epoch": 63.59608745684695, "grad_norm": 1.068950891494751, "learning_rate": 0.000728078250863061, "loss": 0.4107, "step": 221060 }, { "epoch": 63.59896432681243, "grad_norm": 1.0690639019012451, "learning_rate": 0.0007280207134637514, "loss": 0.4844, "step": 221070 }, { "epoch": 63.601841196777904, "grad_norm": 1.4430394172668457, "learning_rate": 0.000727963176064442, "loss": 0.5144, "step": 221080 }, { "epoch": 63.60471806674338, "grad_norm": 2.191005229949951, "learning_rate": 0.0007279056386651323, "loss": 0.4537, "step": 221090 }, { "epoch": 63.607594936708864, "grad_norm": 0.8672282695770264, "learning_rate": 0.0007278481012658228, "loss": 0.4576, "step": 221100 }, { "epoch": 63.61047180667434, "grad_norm": 1.2990599870681763, "learning_rate": 0.0007277905638665132, "loss": 0.4287, "step": 221110 }, { "epoch": 63.613348676639816, "grad_norm": 1.5456010103225708, "learning_rate": 0.0007277330264672037, "loss": 0.5065, "step": 221120 }, { "epoch": 63.61622554660529, "grad_norm": 1.3372234106063843, "learning_rate": 0.0007276754890678941, "loss": 0.5257, "step": 221130 }, { "epoch": 63.61910241657077, "grad_norm": 1.264387607574463, "learning_rate": 0.0007276179516685846, "loss": 0.4306, "step": 221140 }, { "epoch": 63.621979286536245, "grad_norm": 0.9676607847213745, "learning_rate": 0.000727560414269275, "loss": 0.4979, "step": 221150 }, { "epoch": 63.62485615650173, "grad_norm": 1.1842665672302246, "learning_rate": 0.0007275028768699655, "loss": 0.4793, "step": 221160 }, { "epoch": 63.627733026467205, "grad_norm": 0.8275614380836487, "learning_rate": 0.000727445339470656, "loss": 0.4017, "step": 221170 }, { "epoch": 63.63060989643268, "grad_norm": 1.3841781616210938, "learning_rate": 0.0007273878020713463, "loss": 0.4191, "step": 221180 }, { "epoch": 63.63348676639816, "grad_norm": 1.2589010000228882, "learning_rate": 0.0007273302646720369, "loss": 0.4331, "step": 221190 }, { "epoch": 63.63636363636363, "grad_norm": 1.7618610858917236, "learning_rate": 0.0007272727272727273, "loss": 0.5267, "step": 221200 }, { "epoch": 63.639240506329116, "grad_norm": 1.8701705932617188, "learning_rate": 0.0007272151898734177, "loss": 0.4589, "step": 221210 }, { "epoch": 63.64211737629459, "grad_norm": 1.040501356124878, "learning_rate": 0.0007271576524741081, "loss": 0.475, "step": 221220 }, { "epoch": 63.64499424626007, "grad_norm": 1.096378207206726, "learning_rate": 0.0007271001150747987, "loss": 0.5015, "step": 221230 }, { "epoch": 63.647871116225545, "grad_norm": 0.5841137170791626, "learning_rate": 0.000727042577675489, "loss": 0.5481, "step": 221240 }, { "epoch": 63.65074798619102, "grad_norm": 1.1831490993499756, "learning_rate": 0.0007269850402761795, "loss": 0.4887, "step": 221250 }, { "epoch": 63.653624856156505, "grad_norm": 1.3508729934692383, "learning_rate": 0.00072692750287687, "loss": 0.5394, "step": 221260 }, { "epoch": 63.65650172612198, "grad_norm": 0.8762409687042236, "learning_rate": 0.0007268699654775604, "loss": 0.4106, "step": 221270 }, { "epoch": 63.65937859608746, "grad_norm": 1.1297754049301147, "learning_rate": 0.0007268124280782509, "loss": 0.5645, "step": 221280 }, { "epoch": 63.66225546605293, "grad_norm": 1.415320873260498, "learning_rate": 0.0007267548906789413, "loss": 0.4815, "step": 221290 }, { "epoch": 63.66513233601841, "grad_norm": 1.7745763063430786, "learning_rate": 0.0007266973532796318, "loss": 0.4753, "step": 221300 }, { "epoch": 63.66800920598389, "grad_norm": 1.0575836896896362, "learning_rate": 0.0007266398158803222, "loss": 0.3513, "step": 221310 }, { "epoch": 63.67088607594937, "grad_norm": 1.7282297611236572, "learning_rate": 0.0007265822784810127, "loss": 0.419, "step": 221320 }, { "epoch": 63.673762945914845, "grad_norm": 1.403422236442566, "learning_rate": 0.000726524741081703, "loss": 0.3922, "step": 221330 }, { "epoch": 63.67663981588032, "grad_norm": 0.834187388420105, "learning_rate": 0.0007264672036823936, "loss": 0.358, "step": 221340 }, { "epoch": 63.6795166858458, "grad_norm": 1.0538666248321533, "learning_rate": 0.0007264096662830841, "loss": 0.4, "step": 221350 }, { "epoch": 63.68239355581128, "grad_norm": 1.268518328666687, "learning_rate": 0.0007263521288837744, "loss": 0.4147, "step": 221360 }, { "epoch": 63.68527042577676, "grad_norm": 1.8573139905929565, "learning_rate": 0.000726294591484465, "loss": 0.4139, "step": 221370 }, { "epoch": 63.68814729574223, "grad_norm": 1.6974430084228516, "learning_rate": 0.0007262370540851554, "loss": 0.4872, "step": 221380 }, { "epoch": 63.69102416570771, "grad_norm": 1.1569852828979492, "learning_rate": 0.0007261795166858458, "loss": 0.4563, "step": 221390 }, { "epoch": 63.693901035673186, "grad_norm": 0.9888601899147034, "learning_rate": 0.0007261219792865362, "loss": 0.5247, "step": 221400 }, { "epoch": 63.69677790563866, "grad_norm": 1.075786828994751, "learning_rate": 0.0007260644418872268, "loss": 0.5637, "step": 221410 }, { "epoch": 63.699654775604145, "grad_norm": 1.3341747522354126, "learning_rate": 0.0007260069044879171, "loss": 0.5402, "step": 221420 }, { "epoch": 63.70253164556962, "grad_norm": 1.1392662525177002, "learning_rate": 0.0007259493670886076, "loss": 0.5803, "step": 221430 }, { "epoch": 63.7054085155351, "grad_norm": 0.7014418840408325, "learning_rate": 0.000725891829689298, "loss": 0.47, "step": 221440 }, { "epoch": 63.708285385500574, "grad_norm": 0.9219921231269836, "learning_rate": 0.0007258342922899885, "loss": 0.4053, "step": 221450 }, { "epoch": 63.71116225546605, "grad_norm": 1.4769856929779053, "learning_rate": 0.000725776754890679, "loss": 0.4511, "step": 221460 }, { "epoch": 63.71403912543153, "grad_norm": 0.7432239055633545, "learning_rate": 0.0007257192174913694, "loss": 0.376, "step": 221470 }, { "epoch": 63.71691599539701, "grad_norm": 0.6925079822540283, "learning_rate": 0.0007256616800920599, "loss": 0.3528, "step": 221480 }, { "epoch": 63.719792865362486, "grad_norm": 1.1430059671401978, "learning_rate": 0.0007256041426927503, "loss": 0.4412, "step": 221490 }, { "epoch": 63.72266973532796, "grad_norm": 1.6089893579483032, "learning_rate": 0.0007255466052934408, "loss": 0.4633, "step": 221500 }, { "epoch": 63.72554660529344, "grad_norm": 0.7668038010597229, "learning_rate": 0.0007254890678941311, "loss": 0.459, "step": 221510 }, { "epoch": 63.72842347525892, "grad_norm": 1.4527195692062378, "learning_rate": 0.0007254315304948217, "loss": 0.4399, "step": 221520 }, { "epoch": 63.7313003452244, "grad_norm": 1.4810072183609009, "learning_rate": 0.0007253739930955122, "loss": 0.5391, "step": 221530 }, { "epoch": 63.734177215189874, "grad_norm": 1.4775532484054565, "learning_rate": 0.0007253164556962025, "loss": 0.4686, "step": 221540 }, { "epoch": 63.73705408515535, "grad_norm": 1.2006638050079346, "learning_rate": 0.000725258918296893, "loss": 0.645, "step": 221550 }, { "epoch": 63.739930955120826, "grad_norm": 1.0090891122817993, "learning_rate": 0.0007252013808975835, "loss": 0.3529, "step": 221560 }, { "epoch": 63.74280782508631, "grad_norm": 1.1531447172164917, "learning_rate": 0.0007251438434982739, "loss": 0.445, "step": 221570 }, { "epoch": 63.745684695051786, "grad_norm": 1.9994760751724243, "learning_rate": 0.0007250863060989643, "loss": 0.4966, "step": 221580 }, { "epoch": 63.74856156501726, "grad_norm": 1.4674301147460938, "learning_rate": 0.0007250287686996549, "loss": 0.3873, "step": 221590 }, { "epoch": 63.75143843498274, "grad_norm": 0.94118732213974, "learning_rate": 0.0007249712313003452, "loss": 0.3958, "step": 221600 }, { "epoch": 63.754315304948214, "grad_norm": 0.6727805733680725, "learning_rate": 0.0007249136939010357, "loss": 0.4506, "step": 221610 }, { "epoch": 63.75719217491369, "grad_norm": 1.5866336822509766, "learning_rate": 0.0007248561565017261, "loss": 0.5678, "step": 221620 }, { "epoch": 63.760069044879174, "grad_norm": 1.007012963294983, "learning_rate": 0.0007247986191024166, "loss": 0.4326, "step": 221630 }, { "epoch": 63.76294591484465, "grad_norm": 1.3558863401412964, "learning_rate": 0.000724741081703107, "loss": 0.4507, "step": 221640 }, { "epoch": 63.765822784810126, "grad_norm": 1.9141101837158203, "learning_rate": 0.0007246835443037975, "loss": 0.4854, "step": 221650 }, { "epoch": 63.7686996547756, "grad_norm": 1.415250539779663, "learning_rate": 0.0007246260069044879, "loss": 0.4734, "step": 221660 }, { "epoch": 63.77157652474108, "grad_norm": 0.45922937989234924, "learning_rate": 0.0007245684695051784, "loss": 0.4203, "step": 221670 }, { "epoch": 63.77445339470656, "grad_norm": 1.4568226337432861, "learning_rate": 0.0007245109321058689, "loss": 0.5248, "step": 221680 }, { "epoch": 63.77733026467204, "grad_norm": 1.4346423149108887, "learning_rate": 0.0007244533947065592, "loss": 0.5055, "step": 221690 }, { "epoch": 63.780207134637514, "grad_norm": 1.2069231271743774, "learning_rate": 0.0007243958573072498, "loss": 0.4871, "step": 221700 }, { "epoch": 63.78308400460299, "grad_norm": 1.326714038848877, "learning_rate": 0.0007243383199079402, "loss": 0.5519, "step": 221710 }, { "epoch": 63.78596087456847, "grad_norm": 0.914717972278595, "learning_rate": 0.0007242807825086306, "loss": 0.4103, "step": 221720 }, { "epoch": 63.78883774453395, "grad_norm": 1.5315146446228027, "learning_rate": 0.000724223245109321, "loss": 0.3887, "step": 221730 }, { "epoch": 63.791714614499426, "grad_norm": 1.7000762224197388, "learning_rate": 0.0007241657077100116, "loss": 0.4144, "step": 221740 }, { "epoch": 63.7945914844649, "grad_norm": 1.5508100986480713, "learning_rate": 0.000724108170310702, "loss": 0.4553, "step": 221750 }, { "epoch": 63.79746835443038, "grad_norm": 0.8445296287536621, "learning_rate": 0.0007240506329113924, "loss": 0.4613, "step": 221760 }, { "epoch": 63.800345224395855, "grad_norm": 1.7716588973999023, "learning_rate": 0.000723993095512083, "loss": 0.5232, "step": 221770 }, { "epoch": 63.80322209436134, "grad_norm": 0.865890383720398, "learning_rate": 0.0007239355581127733, "loss": 0.3987, "step": 221780 }, { "epoch": 63.806098964326814, "grad_norm": 2.2351162433624268, "learning_rate": 0.0007238780207134638, "loss": 0.4895, "step": 221790 }, { "epoch": 63.80897583429229, "grad_norm": 1.315859317779541, "learning_rate": 0.0007238204833141541, "loss": 0.5271, "step": 221800 }, { "epoch": 63.81185270425777, "grad_norm": 0.9340511560440063, "learning_rate": 0.0007237629459148447, "loss": 0.6281, "step": 221810 }, { "epoch": 63.81472957422324, "grad_norm": 1.3090949058532715, "learning_rate": 0.0007237054085155351, "loss": 0.4233, "step": 221820 }, { "epoch": 63.81760644418872, "grad_norm": 1.4452320337295532, "learning_rate": 0.0007236478711162255, "loss": 0.4581, "step": 221830 }, { "epoch": 63.8204833141542, "grad_norm": 0.8016945719718933, "learning_rate": 0.000723590333716916, "loss": 0.3498, "step": 221840 }, { "epoch": 63.82336018411968, "grad_norm": 0.9970690608024597, "learning_rate": 0.0007235327963176065, "loss": 0.4148, "step": 221850 }, { "epoch": 63.826237054085155, "grad_norm": 1.4329955577850342, "learning_rate": 0.0007234752589182969, "loss": 0.4525, "step": 221860 }, { "epoch": 63.82911392405063, "grad_norm": 1.5245946645736694, "learning_rate": 0.0007234177215189873, "loss": 0.4162, "step": 221870 }, { "epoch": 63.83199079401611, "grad_norm": 2.2808687686920166, "learning_rate": 0.0007233601841196779, "loss": 0.6062, "step": 221880 }, { "epoch": 63.83486766398159, "grad_norm": 1.3824584484100342, "learning_rate": 0.0007233026467203682, "loss": 0.4601, "step": 221890 }, { "epoch": 63.83774453394707, "grad_norm": 1.0952818393707275, "learning_rate": 0.0007232451093210587, "loss": 0.5034, "step": 221900 }, { "epoch": 63.84062140391254, "grad_norm": 1.0930325984954834, "learning_rate": 0.0007231875719217491, "loss": 0.5013, "step": 221910 }, { "epoch": 63.84349827387802, "grad_norm": 3.7846524715423584, "learning_rate": 0.0007231300345224396, "loss": 0.5707, "step": 221920 }, { "epoch": 63.846375143843495, "grad_norm": 1.0813968181610107, "learning_rate": 0.00072307249712313, "loss": 0.4236, "step": 221930 }, { "epoch": 63.84925201380898, "grad_norm": 1.159897804260254, "learning_rate": 0.0007230149597238205, "loss": 0.4186, "step": 221940 }, { "epoch": 63.852128883774455, "grad_norm": 1.4527485370635986, "learning_rate": 0.0007229574223245108, "loss": 0.5209, "step": 221950 }, { "epoch": 63.85500575373993, "grad_norm": 1.0569745302200317, "learning_rate": 0.0007228998849252014, "loss": 0.4092, "step": 221960 }, { "epoch": 63.85788262370541, "grad_norm": 1.5205063819885254, "learning_rate": 0.0007228423475258919, "loss": 0.5548, "step": 221970 }, { "epoch": 63.860759493670884, "grad_norm": 1.2693954706192017, "learning_rate": 0.0007227848101265822, "loss": 0.4175, "step": 221980 }, { "epoch": 63.86363636363637, "grad_norm": 0.9153600335121155, "learning_rate": 0.0007227272727272728, "loss": 0.427, "step": 221990 }, { "epoch": 63.86651323360184, "grad_norm": 0.5620900392532349, "learning_rate": 0.0007226697353279632, "loss": 0.3848, "step": 222000 }, { "epoch": 63.86939010356732, "grad_norm": 1.8324360847473145, "learning_rate": 0.0007226121979286536, "loss": 0.5142, "step": 222010 }, { "epoch": 63.872266973532795, "grad_norm": 2.2242231369018555, "learning_rate": 0.000722554660529344, "loss": 0.5749, "step": 222020 }, { "epoch": 63.87514384349827, "grad_norm": 1.6089142560958862, "learning_rate": 0.0007224971231300346, "loss": 0.5178, "step": 222030 }, { "epoch": 63.878020713463755, "grad_norm": 1.2864595651626587, "learning_rate": 0.000722439585730725, "loss": 0.4132, "step": 222040 }, { "epoch": 63.88089758342923, "grad_norm": 1.2463058233261108, "learning_rate": 0.0007223820483314154, "loss": 0.5318, "step": 222050 }, { "epoch": 63.88377445339471, "grad_norm": 0.7792569994926453, "learning_rate": 0.000722324510932106, "loss": 0.4952, "step": 222060 }, { "epoch": 63.886651323360184, "grad_norm": 0.9571262001991272, "learning_rate": 0.0007222669735327963, "loss": 0.3943, "step": 222070 }, { "epoch": 63.88952819332566, "grad_norm": 1.397578239440918, "learning_rate": 0.0007222094361334868, "loss": 0.3783, "step": 222080 }, { "epoch": 63.892405063291136, "grad_norm": 1.4546397924423218, "learning_rate": 0.0007221518987341772, "loss": 0.3997, "step": 222090 }, { "epoch": 63.89528193325662, "grad_norm": 2.4714465141296387, "learning_rate": 0.0007220943613348677, "loss": 0.4201, "step": 222100 }, { "epoch": 63.898158803222096, "grad_norm": 1.4191019535064697, "learning_rate": 0.0007220368239355581, "loss": 0.372, "step": 222110 }, { "epoch": 63.90103567318757, "grad_norm": 1.785733699798584, "learning_rate": 0.0007219792865362486, "loss": 0.4363, "step": 222120 }, { "epoch": 63.90391254315305, "grad_norm": 1.3290296792984009, "learning_rate": 0.0007219217491369389, "loss": 0.4126, "step": 222130 }, { "epoch": 63.906789413118524, "grad_norm": 1.2277354001998901, "learning_rate": 0.0007218642117376295, "loss": 0.4408, "step": 222140 }, { "epoch": 63.90966628308401, "grad_norm": 2.0721702575683594, "learning_rate": 0.00072180667433832, "loss": 0.5476, "step": 222150 }, { "epoch": 63.912543153049484, "grad_norm": 0.9040274620056152, "learning_rate": 0.0007217491369390103, "loss": 0.5138, "step": 222160 }, { "epoch": 63.91542002301496, "grad_norm": 1.119067907333374, "learning_rate": 0.0007216915995397009, "loss": 0.3563, "step": 222170 }, { "epoch": 63.918296892980436, "grad_norm": 1.0027849674224854, "learning_rate": 0.0007216340621403913, "loss": 0.4352, "step": 222180 }, { "epoch": 63.92117376294591, "grad_norm": 0.9832316040992737, "learning_rate": 0.0007215765247410817, "loss": 0.4755, "step": 222190 }, { "epoch": 63.924050632911396, "grad_norm": 1.1389000415802002, "learning_rate": 0.0007215189873417721, "loss": 0.4287, "step": 222200 }, { "epoch": 63.92692750287687, "grad_norm": 0.7813708186149597, "learning_rate": 0.0007214614499424627, "loss": 0.4833, "step": 222210 }, { "epoch": 63.92980437284235, "grad_norm": 0.9755074381828308, "learning_rate": 0.000721403912543153, "loss": 0.4952, "step": 222220 }, { "epoch": 63.932681242807824, "grad_norm": 1.4842530488967896, "learning_rate": 0.0007213463751438435, "loss": 0.5223, "step": 222230 }, { "epoch": 63.9355581127733, "grad_norm": 1.1310540437698364, "learning_rate": 0.000721288837744534, "loss": 0.4055, "step": 222240 }, { "epoch": 63.938434982738784, "grad_norm": 0.9741770029067993, "learning_rate": 0.0007212313003452244, "loss": 0.4022, "step": 222250 }, { "epoch": 63.94131185270426, "grad_norm": 1.8006178140640259, "learning_rate": 0.0007211737629459149, "loss": 0.5994, "step": 222260 }, { "epoch": 63.944188722669736, "grad_norm": 1.180514931678772, "learning_rate": 0.0007211162255466053, "loss": 0.5125, "step": 222270 }, { "epoch": 63.94706559263521, "grad_norm": 1.2541844844818115, "learning_rate": 0.0007210586881472958, "loss": 0.5238, "step": 222280 }, { "epoch": 63.94994246260069, "grad_norm": 1.2590581178665161, "learning_rate": 0.0007210011507479862, "loss": 0.5467, "step": 222290 }, { "epoch": 63.952819332566165, "grad_norm": 1.476115107536316, "learning_rate": 0.0007209436133486767, "loss": 0.6516, "step": 222300 }, { "epoch": 63.95569620253165, "grad_norm": 1.0727083683013916, "learning_rate": 0.000720886075949367, "loss": 0.3818, "step": 222310 }, { "epoch": 63.958573072497124, "grad_norm": 2.182978630065918, "learning_rate": 0.0007208285385500576, "loss": 0.6245, "step": 222320 }, { "epoch": 63.9614499424626, "grad_norm": 1.0099692344665527, "learning_rate": 0.000720771001150748, "loss": 0.4351, "step": 222330 }, { "epoch": 63.96432681242808, "grad_norm": 1.1116130352020264, "learning_rate": 0.0007207134637514384, "loss": 0.5519, "step": 222340 }, { "epoch": 63.96720368239355, "grad_norm": 0.8965280055999756, "learning_rate": 0.0007206559263521289, "loss": 0.4089, "step": 222350 }, { "epoch": 63.970080552359036, "grad_norm": 1.476879596710205, "learning_rate": 0.0007205983889528194, "loss": 0.4887, "step": 222360 }, { "epoch": 63.97295742232451, "grad_norm": 3.1437008380889893, "learning_rate": 0.0007205408515535098, "loss": 0.5432, "step": 222370 }, { "epoch": 63.97583429228999, "grad_norm": 1.0812268257141113, "learning_rate": 0.0007204833141542002, "loss": 0.5031, "step": 222380 }, { "epoch": 63.978711162255465, "grad_norm": 1.7012431621551514, "learning_rate": 0.0007204257767548908, "loss": 0.5909, "step": 222390 }, { "epoch": 63.98158803222094, "grad_norm": 0.9397785067558289, "learning_rate": 0.0007203682393555811, "loss": 0.5938, "step": 222400 }, { "epoch": 63.984464902186424, "grad_norm": 1.0746660232543945, "learning_rate": 0.0007203107019562716, "loss": 0.5208, "step": 222410 }, { "epoch": 63.9873417721519, "grad_norm": 0.890444815158844, "learning_rate": 0.000720253164556962, "loss": 0.3609, "step": 222420 }, { "epoch": 63.99021864211738, "grad_norm": 2.701519250869751, "learning_rate": 0.0007201956271576525, "loss": 0.4816, "step": 222430 }, { "epoch": 63.99309551208285, "grad_norm": 1.5965652465820312, "learning_rate": 0.000720138089758343, "loss": 0.4751, "step": 222440 }, { "epoch": 63.99597238204833, "grad_norm": 1.0945723056793213, "learning_rate": 0.0007200805523590334, "loss": 0.5874, "step": 222450 }, { "epoch": 63.99884925201381, "grad_norm": 0.9123919606208801, "learning_rate": 0.0007200230149597239, "loss": 0.4287, "step": 222460 }, { "epoch": 64.00172612197929, "grad_norm": 0.7613352537155151, "learning_rate": 0.0007199654775604143, "loss": 0.466, "step": 222470 }, { "epoch": 64.00460299194476, "grad_norm": 0.880113422870636, "learning_rate": 0.0007199079401611048, "loss": 0.4765, "step": 222480 }, { "epoch": 64.00747986191024, "grad_norm": 1.0705755949020386, "learning_rate": 0.0007198504027617951, "loss": 0.4202, "step": 222490 }, { "epoch": 64.01035673187572, "grad_norm": 1.285620093345642, "learning_rate": 0.0007197928653624857, "loss": 0.4651, "step": 222500 }, { "epoch": 64.0132336018412, "grad_norm": 1.420040249824524, "learning_rate": 0.0007197353279631761, "loss": 0.4823, "step": 222510 }, { "epoch": 64.01611047180667, "grad_norm": 1.1098289489746094, "learning_rate": 0.0007196777905638665, "loss": 0.4686, "step": 222520 }, { "epoch": 64.01898734177215, "grad_norm": 1.379589319229126, "learning_rate": 0.0007196202531645569, "loss": 0.4001, "step": 222530 }, { "epoch": 64.02186421173764, "grad_norm": 1.2582539319992065, "learning_rate": 0.0007195627157652475, "loss": 0.4189, "step": 222540 }, { "epoch": 64.02474108170311, "grad_norm": 1.025967001914978, "learning_rate": 0.0007195051783659379, "loss": 0.3533, "step": 222550 }, { "epoch": 64.02761795166859, "grad_norm": 0.9613749384880066, "learning_rate": 0.0007194476409666283, "loss": 0.3965, "step": 222560 }, { "epoch": 64.03049482163406, "grad_norm": 1.740419864654541, "learning_rate": 0.0007193901035673189, "loss": 0.4177, "step": 222570 }, { "epoch": 64.03337169159954, "grad_norm": 1.188279390335083, "learning_rate": 0.0007193325661680092, "loss": 0.4362, "step": 222580 }, { "epoch": 64.03624856156502, "grad_norm": 1.2013769149780273, "learning_rate": 0.0007192750287686997, "loss": 0.4307, "step": 222590 }, { "epoch": 64.0391254315305, "grad_norm": 1.061771273612976, "learning_rate": 0.00071921749136939, "loss": 0.4131, "step": 222600 }, { "epoch": 64.04200230149597, "grad_norm": 0.9266749024391174, "learning_rate": 0.0007191599539700806, "loss": 0.4268, "step": 222610 }, { "epoch": 64.04487917146145, "grad_norm": 1.1957370042800903, "learning_rate": 0.000719102416570771, "loss": 0.4801, "step": 222620 }, { "epoch": 64.04775604142692, "grad_norm": 0.8656598329544067, "learning_rate": 0.0007190448791714614, "loss": 0.4822, "step": 222630 }, { "epoch": 64.0506329113924, "grad_norm": 1.4273442029953003, "learning_rate": 0.0007189873417721518, "loss": 0.488, "step": 222640 }, { "epoch": 64.05350978135789, "grad_norm": 1.8799357414245605, "learning_rate": 0.0007189298043728424, "loss": 0.435, "step": 222650 }, { "epoch": 64.05638665132336, "grad_norm": 0.8811652660369873, "learning_rate": 0.0007188722669735328, "loss": 0.4482, "step": 222660 }, { "epoch": 64.05926352128884, "grad_norm": 0.9088029861450195, "learning_rate": 0.0007188147295742232, "loss": 0.426, "step": 222670 }, { "epoch": 64.06214039125432, "grad_norm": 1.2316142320632935, "learning_rate": 0.0007187571921749138, "loss": 0.5993, "step": 222680 }, { "epoch": 64.0650172612198, "grad_norm": 1.4857277870178223, "learning_rate": 0.0007186996547756041, "loss": 0.4036, "step": 222690 }, { "epoch": 64.06789413118527, "grad_norm": 1.7438019514083862, "learning_rate": 0.0007186421173762946, "loss": 0.41, "step": 222700 }, { "epoch": 64.07077100115075, "grad_norm": 1.1261430978775024, "learning_rate": 0.000718584579976985, "loss": 0.3479, "step": 222710 }, { "epoch": 64.07364787111622, "grad_norm": 1.6254228353500366, "learning_rate": 0.0007185270425776755, "loss": 0.4627, "step": 222720 }, { "epoch": 64.0765247410817, "grad_norm": 0.8150748610496521, "learning_rate": 0.0007184695051783659, "loss": 0.4259, "step": 222730 }, { "epoch": 64.07940161104717, "grad_norm": 1.5486823320388794, "learning_rate": 0.0007184119677790564, "loss": 0.4473, "step": 222740 }, { "epoch": 64.08227848101266, "grad_norm": 0.8388652205467224, "learning_rate": 0.0007183544303797469, "loss": 0.4672, "step": 222750 }, { "epoch": 64.08515535097814, "grad_norm": 0.9421631097793579, "learning_rate": 0.0007182968929804373, "loss": 0.4193, "step": 222760 }, { "epoch": 64.08803222094362, "grad_norm": 0.9691998362541199, "learning_rate": 0.0007182393555811278, "loss": 0.4426, "step": 222770 }, { "epoch": 64.0909090909091, "grad_norm": 1.3331185579299927, "learning_rate": 0.0007181818181818181, "loss": 0.5347, "step": 222780 }, { "epoch": 64.09378596087457, "grad_norm": 0.8663951754570007, "learning_rate": 0.0007181242807825087, "loss": 0.4088, "step": 222790 }, { "epoch": 64.09666283084005, "grad_norm": 1.6123921871185303, "learning_rate": 0.0007180667433831991, "loss": 0.3963, "step": 222800 }, { "epoch": 64.09953970080552, "grad_norm": 1.2594131231307983, "learning_rate": 0.0007180092059838895, "loss": 0.3117, "step": 222810 }, { "epoch": 64.102416570771, "grad_norm": 0.8732991814613342, "learning_rate": 0.0007179516685845799, "loss": 0.4673, "step": 222820 }, { "epoch": 64.10529344073647, "grad_norm": 0.8941116333007812, "learning_rate": 0.0007178941311852705, "loss": 0.4513, "step": 222830 }, { "epoch": 64.10817031070195, "grad_norm": 0.8165974020957947, "learning_rate": 0.0007178365937859608, "loss": 0.3923, "step": 222840 }, { "epoch": 64.11104718066743, "grad_norm": 0.833076536655426, "learning_rate": 0.0007177790563866513, "loss": 0.5171, "step": 222850 }, { "epoch": 64.11392405063292, "grad_norm": 1.3177579641342163, "learning_rate": 0.0007177215189873419, "loss": 0.3802, "step": 222860 }, { "epoch": 64.1168009205984, "grad_norm": 2.13392972946167, "learning_rate": 0.0007176639815880322, "loss": 0.3892, "step": 222870 }, { "epoch": 64.11967779056387, "grad_norm": 1.2976491451263428, "learning_rate": 0.0007176064441887227, "loss": 0.4317, "step": 222880 }, { "epoch": 64.12255466052935, "grad_norm": 1.0605796575546265, "learning_rate": 0.0007175489067894131, "loss": 0.3799, "step": 222890 }, { "epoch": 64.12543153049482, "grad_norm": 0.8236848711967468, "learning_rate": 0.0007174913693901036, "loss": 0.3528, "step": 222900 }, { "epoch": 64.1283084004603, "grad_norm": 1.267921805381775, "learning_rate": 0.000717433831990794, "loss": 0.4671, "step": 222910 }, { "epoch": 64.13118527042577, "grad_norm": 0.668726921081543, "learning_rate": 0.0007173762945914845, "loss": 0.4929, "step": 222920 }, { "epoch": 64.13406214039125, "grad_norm": 1.2039000988006592, "learning_rate": 0.0007173187571921748, "loss": 0.3564, "step": 222930 }, { "epoch": 64.13693901035673, "grad_norm": 0.9208528995513916, "learning_rate": 0.0007172612197928654, "loss": 0.438, "step": 222940 }, { "epoch": 64.1398158803222, "grad_norm": 1.206544280052185, "learning_rate": 0.0007172036823935559, "loss": 0.4257, "step": 222950 }, { "epoch": 64.1426927502877, "grad_norm": 1.6719623804092407, "learning_rate": 0.0007171461449942462, "loss": 0.3646, "step": 222960 }, { "epoch": 64.14556962025317, "grad_norm": 1.7657324075698853, "learning_rate": 0.0007170886075949368, "loss": 0.5578, "step": 222970 }, { "epoch": 64.14844649021865, "grad_norm": 1.1607646942138672, "learning_rate": 0.0007170310701956272, "loss": 0.4792, "step": 222980 }, { "epoch": 64.15132336018412, "grad_norm": 0.7293559908866882, "learning_rate": 0.0007169735327963176, "loss": 0.3824, "step": 222990 }, { "epoch": 64.1542002301496, "grad_norm": 1.601893424987793, "learning_rate": 0.000716915995397008, "loss": 0.4054, "step": 223000 }, { "epoch": 64.15707710011507, "grad_norm": 1.1503260135650635, "learning_rate": 0.0007168584579976986, "loss": 0.4637, "step": 223010 }, { "epoch": 64.15995397008055, "grad_norm": 1.2775300741195679, "learning_rate": 0.0007168009205983889, "loss": 0.365, "step": 223020 }, { "epoch": 64.16283084004603, "grad_norm": 1.4614808559417725, "learning_rate": 0.0007167433831990794, "loss": 0.4591, "step": 223030 }, { "epoch": 64.1657077100115, "grad_norm": 1.3586301803588867, "learning_rate": 0.00071668584579977, "loss": 0.4383, "step": 223040 }, { "epoch": 64.16858457997698, "grad_norm": 1.2788621187210083, "learning_rate": 0.0007166283084004603, "loss": 0.4434, "step": 223050 }, { "epoch": 64.17146144994246, "grad_norm": 1.4015321731567383, "learning_rate": 0.0007165707710011508, "loss": 0.3724, "step": 223060 }, { "epoch": 64.17433831990795, "grad_norm": 0.978330135345459, "learning_rate": 0.0007165132336018412, "loss": 0.398, "step": 223070 }, { "epoch": 64.17721518987342, "grad_norm": 1.5292271375656128, "learning_rate": 0.0007164556962025317, "loss": 0.4318, "step": 223080 }, { "epoch": 64.1800920598389, "grad_norm": 0.6579141020774841, "learning_rate": 0.0007163981588032221, "loss": 0.4356, "step": 223090 }, { "epoch": 64.18296892980437, "grad_norm": 1.072070598602295, "learning_rate": 0.0007163406214039126, "loss": 0.4327, "step": 223100 }, { "epoch": 64.18584579976985, "grad_norm": 0.9762738943099976, "learning_rate": 0.0007162830840046029, "loss": 0.3984, "step": 223110 }, { "epoch": 64.18872266973533, "grad_norm": 1.2548202276229858, "learning_rate": 0.0007162255466052935, "loss": 0.3948, "step": 223120 }, { "epoch": 64.1915995397008, "grad_norm": 0.728175938129425, "learning_rate": 0.000716168009205984, "loss": 0.4285, "step": 223130 }, { "epoch": 64.19447640966628, "grad_norm": 1.029500126838684, "learning_rate": 0.0007161104718066743, "loss": 0.4825, "step": 223140 }, { "epoch": 64.19735327963176, "grad_norm": 0.8000009059906006, "learning_rate": 0.0007160529344073649, "loss": 0.5125, "step": 223150 }, { "epoch": 64.20023014959723, "grad_norm": 1.1457096338272095, "learning_rate": 0.0007159953970080553, "loss": 0.4562, "step": 223160 }, { "epoch": 64.20310701956272, "grad_norm": 1.0912699699401855, "learning_rate": 0.0007159378596087457, "loss": 0.4795, "step": 223170 }, { "epoch": 64.2059838895282, "grad_norm": 1.8224427700042725, "learning_rate": 0.0007158803222094361, "loss": 0.4336, "step": 223180 }, { "epoch": 64.20886075949367, "grad_norm": 0.6968960762023926, "learning_rate": 0.0007158227848101267, "loss": 0.4187, "step": 223190 }, { "epoch": 64.21173762945915, "grad_norm": 1.4206198453903198, "learning_rate": 0.000715765247410817, "loss": 0.4565, "step": 223200 }, { "epoch": 64.21461449942463, "grad_norm": 1.2694954872131348, "learning_rate": 0.0007157077100115075, "loss": 0.4914, "step": 223210 }, { "epoch": 64.2174913693901, "grad_norm": 1.406020164489746, "learning_rate": 0.0007156501726121979, "loss": 0.4673, "step": 223220 }, { "epoch": 64.22036823935558, "grad_norm": 1.4183673858642578, "learning_rate": 0.0007155926352128884, "loss": 0.432, "step": 223230 }, { "epoch": 64.22324510932106, "grad_norm": 1.0731788873672485, "learning_rate": 0.0007155350978135789, "loss": 0.33, "step": 223240 }, { "epoch": 64.22612197928653, "grad_norm": 1.0574307441711426, "learning_rate": 0.0007154775604142693, "loss": 0.5117, "step": 223250 }, { "epoch": 64.22899884925201, "grad_norm": 1.385992169380188, "learning_rate": 0.0007154200230149598, "loss": 0.4611, "step": 223260 }, { "epoch": 64.23187571921748, "grad_norm": 0.8927717804908752, "learning_rate": 0.0007153624856156502, "loss": 0.4539, "step": 223270 }, { "epoch": 64.23475258918297, "grad_norm": 1.652087926864624, "learning_rate": 0.0007153049482163407, "loss": 0.4484, "step": 223280 }, { "epoch": 64.23762945914845, "grad_norm": 2.413395881652832, "learning_rate": 0.000715247410817031, "loss": 0.4188, "step": 223290 }, { "epoch": 64.24050632911393, "grad_norm": 1.2202835083007812, "learning_rate": 0.0007151898734177216, "loss": 0.3937, "step": 223300 }, { "epoch": 64.2433831990794, "grad_norm": 1.4579704999923706, "learning_rate": 0.000715132336018412, "loss": 0.4669, "step": 223310 }, { "epoch": 64.24626006904488, "grad_norm": 0.9146773219108582, "learning_rate": 0.0007150747986191024, "loss": 0.4697, "step": 223320 }, { "epoch": 64.24913693901036, "grad_norm": 1.9165427684783936, "learning_rate": 0.0007150172612197928, "loss": 0.4124, "step": 223330 }, { "epoch": 64.25201380897583, "grad_norm": 1.744710087776184, "learning_rate": 0.0007149597238204834, "loss": 0.47, "step": 223340 }, { "epoch": 64.25489067894131, "grad_norm": 0.9570353627204895, "learning_rate": 0.0007149021864211738, "loss": 0.5055, "step": 223350 }, { "epoch": 64.25776754890678, "grad_norm": 0.5499739646911621, "learning_rate": 0.0007148446490218642, "loss": 0.4811, "step": 223360 }, { "epoch": 64.26064441887226, "grad_norm": 1.2475216388702393, "learning_rate": 0.0007147871116225548, "loss": 0.4883, "step": 223370 }, { "epoch": 64.26352128883775, "grad_norm": 1.7038776874542236, "learning_rate": 0.0007147295742232451, "loss": 0.4134, "step": 223380 }, { "epoch": 64.26639815880323, "grad_norm": 1.5574806928634644, "learning_rate": 0.0007146720368239356, "loss": 0.4493, "step": 223390 }, { "epoch": 64.2692750287687, "grad_norm": 0.7730252742767334, "learning_rate": 0.0007146144994246259, "loss": 0.4738, "step": 223400 }, { "epoch": 64.27215189873418, "grad_norm": 1.0949145555496216, "learning_rate": 0.0007145569620253165, "loss": 0.475, "step": 223410 }, { "epoch": 64.27502876869966, "grad_norm": 1.9309648275375366, "learning_rate": 0.0007144994246260069, "loss": 0.4292, "step": 223420 }, { "epoch": 64.27790563866513, "grad_norm": 0.8213697075843811, "learning_rate": 0.0007144418872266973, "loss": 0.3838, "step": 223430 }, { "epoch": 64.28078250863061, "grad_norm": 0.9227297902107239, "learning_rate": 0.0007143843498273879, "loss": 0.358, "step": 223440 }, { "epoch": 64.28365937859608, "grad_norm": 1.6265629529953003, "learning_rate": 0.0007143268124280783, "loss": 0.5379, "step": 223450 }, { "epoch": 64.28653624856156, "grad_norm": 1.188141107559204, "learning_rate": 0.0007142692750287687, "loss": 0.4691, "step": 223460 }, { "epoch": 64.28941311852704, "grad_norm": 1.1830726861953735, "learning_rate": 0.0007142117376294591, "loss": 0.4234, "step": 223470 }, { "epoch": 64.29228998849253, "grad_norm": 1.5246104001998901, "learning_rate": 0.0007141542002301497, "loss": 0.3816, "step": 223480 }, { "epoch": 64.295166858458, "grad_norm": 1.1072511672973633, "learning_rate": 0.00071409666283084, "loss": 0.4377, "step": 223490 }, { "epoch": 64.29804372842348, "grad_norm": 1.1689460277557373, "learning_rate": 0.0007140391254315305, "loss": 0.4095, "step": 223500 }, { "epoch": 64.30092059838896, "grad_norm": 1.5616861581802368, "learning_rate": 0.0007139815880322209, "loss": 0.4656, "step": 223510 }, { "epoch": 64.30379746835443, "grad_norm": 1.8664363622665405, "learning_rate": 0.0007139240506329114, "loss": 0.3849, "step": 223520 }, { "epoch": 64.30667433831991, "grad_norm": 0.6635268926620483, "learning_rate": 0.0007138665132336018, "loss": 0.3558, "step": 223530 }, { "epoch": 64.30955120828538, "grad_norm": 0.6623702049255371, "learning_rate": 0.0007138089758342923, "loss": 0.4703, "step": 223540 }, { "epoch": 64.31242807825086, "grad_norm": 1.3368786573410034, "learning_rate": 0.0007137514384349828, "loss": 0.3883, "step": 223550 }, { "epoch": 64.31530494821634, "grad_norm": 1.7222460508346558, "learning_rate": 0.0007136939010356732, "loss": 0.5255, "step": 223560 }, { "epoch": 64.31818181818181, "grad_norm": 1.1343133449554443, "learning_rate": 0.0007136363636363637, "loss": 0.4196, "step": 223570 }, { "epoch": 64.32105868814729, "grad_norm": 1.412864089012146, "learning_rate": 0.000713578826237054, "loss": 0.4495, "step": 223580 }, { "epoch": 64.32393555811278, "grad_norm": 0.9897428154945374, "learning_rate": 0.0007135212888377446, "loss": 0.4356, "step": 223590 }, { "epoch": 64.32681242807826, "grad_norm": 1.715949535369873, "learning_rate": 0.000713463751438435, "loss": 0.4183, "step": 223600 }, { "epoch": 64.32968929804373, "grad_norm": 1.3455297946929932, "learning_rate": 0.0007134062140391254, "loss": 0.5169, "step": 223610 }, { "epoch": 64.33256616800921, "grad_norm": 1.929041862487793, "learning_rate": 0.0007133486766398158, "loss": 0.4726, "step": 223620 }, { "epoch": 64.33544303797468, "grad_norm": 1.8858619928359985, "learning_rate": 0.0007132911392405064, "loss": 0.438, "step": 223630 }, { "epoch": 64.33831990794016, "grad_norm": 0.9311970472335815, "learning_rate": 0.0007132336018411967, "loss": 0.3566, "step": 223640 }, { "epoch": 64.34119677790564, "grad_norm": 1.0867525339126587, "learning_rate": 0.0007131760644418872, "loss": 0.3512, "step": 223650 }, { "epoch": 64.34407364787111, "grad_norm": 1.6082563400268555, "learning_rate": 0.0007131185270425778, "loss": 0.4173, "step": 223660 }, { "epoch": 64.34695051783659, "grad_norm": 2.042302370071411, "learning_rate": 0.0007130609896432681, "loss": 0.4597, "step": 223670 }, { "epoch": 64.34982738780207, "grad_norm": 0.7634521722793579, "learning_rate": 0.0007130034522439586, "loss": 0.4376, "step": 223680 }, { "epoch": 64.35270425776756, "grad_norm": 1.6797246932983398, "learning_rate": 0.000712945914844649, "loss": 0.5951, "step": 223690 }, { "epoch": 64.35558112773303, "grad_norm": 1.0291783809661865, "learning_rate": 0.0007128883774453395, "loss": 0.4385, "step": 223700 }, { "epoch": 64.35845799769851, "grad_norm": 2.2346999645233154, "learning_rate": 0.0007128308400460299, "loss": 0.4621, "step": 223710 }, { "epoch": 64.36133486766398, "grad_norm": 1.13652765750885, "learning_rate": 0.0007127733026467204, "loss": 0.5111, "step": 223720 }, { "epoch": 64.36421173762946, "grad_norm": 1.6351724863052368, "learning_rate": 0.0007127157652474108, "loss": 0.4162, "step": 223730 }, { "epoch": 64.36708860759494, "grad_norm": 0.8087271451950073, "learning_rate": 0.0007126582278481013, "loss": 0.417, "step": 223740 }, { "epoch": 64.36996547756041, "grad_norm": 1.2233335971832275, "learning_rate": 0.0007126006904487918, "loss": 0.4102, "step": 223750 }, { "epoch": 64.37284234752589, "grad_norm": 0.7265578508377075, "learning_rate": 0.0007125431530494821, "loss": 0.4422, "step": 223760 }, { "epoch": 64.37571921749137, "grad_norm": 1.176665186882019, "learning_rate": 0.0007124856156501727, "loss": 0.4481, "step": 223770 }, { "epoch": 64.37859608745684, "grad_norm": 1.1858391761779785, "learning_rate": 0.0007124280782508631, "loss": 0.4409, "step": 223780 }, { "epoch": 64.38147295742232, "grad_norm": 1.6784666776657104, "learning_rate": 0.0007123705408515535, "loss": 0.3707, "step": 223790 }, { "epoch": 64.38434982738781, "grad_norm": 1.4523378610610962, "learning_rate": 0.0007123130034522439, "loss": 0.5579, "step": 223800 }, { "epoch": 64.38722669735328, "grad_norm": 1.6248645782470703, "learning_rate": 0.0007122554660529345, "loss": 0.4594, "step": 223810 }, { "epoch": 64.39010356731876, "grad_norm": 1.3580683469772339, "learning_rate": 0.0007121979286536248, "loss": 0.5217, "step": 223820 }, { "epoch": 64.39298043728424, "grad_norm": 1.1032342910766602, "learning_rate": 0.0007121403912543153, "loss": 0.5053, "step": 223830 }, { "epoch": 64.39585730724971, "grad_norm": 1.7681941986083984, "learning_rate": 0.0007120828538550059, "loss": 0.5078, "step": 223840 }, { "epoch": 64.39873417721519, "grad_norm": 0.8930290937423706, "learning_rate": 0.0007120253164556962, "loss": 0.6038, "step": 223850 }, { "epoch": 64.40161104718067, "grad_norm": 1.0940237045288086, "learning_rate": 0.0007119677790563867, "loss": 0.5575, "step": 223860 }, { "epoch": 64.40448791714614, "grad_norm": 1.799085021018982, "learning_rate": 0.0007119102416570771, "loss": 0.5354, "step": 223870 }, { "epoch": 64.40736478711162, "grad_norm": 1.8789172172546387, "learning_rate": 0.0007118527042577676, "loss": 0.5066, "step": 223880 }, { "epoch": 64.4102416570771, "grad_norm": 0.8479049205780029, "learning_rate": 0.000711795166858458, "loss": 0.4865, "step": 223890 }, { "epoch": 64.41311852704258, "grad_norm": 0.9543617367744446, "learning_rate": 0.0007117376294591485, "loss": 0.3935, "step": 223900 }, { "epoch": 64.41599539700806, "grad_norm": 1.062972903251648, "learning_rate": 0.0007116800920598388, "loss": 0.4352, "step": 223910 }, { "epoch": 64.41887226697354, "grad_norm": 1.2851759195327759, "learning_rate": 0.0007116225546605294, "loss": 0.3727, "step": 223920 }, { "epoch": 64.42174913693901, "grad_norm": 0.9841741323471069, "learning_rate": 0.0007115650172612198, "loss": 0.5281, "step": 223930 }, { "epoch": 64.42462600690449, "grad_norm": 1.292664885520935, "learning_rate": 0.0007115074798619102, "loss": 0.4273, "step": 223940 }, { "epoch": 64.42750287686997, "grad_norm": 1.444090723991394, "learning_rate": 0.0007114499424626008, "loss": 0.4421, "step": 223950 }, { "epoch": 64.43037974683544, "grad_norm": 0.7207987904548645, "learning_rate": 0.0007113924050632912, "loss": 0.4969, "step": 223960 }, { "epoch": 64.43325661680092, "grad_norm": 1.0036840438842773, "learning_rate": 0.0007113348676639816, "loss": 0.395, "step": 223970 }, { "epoch": 64.4361334867664, "grad_norm": 1.5719494819641113, "learning_rate": 0.000711277330264672, "loss": 0.5475, "step": 223980 }, { "epoch": 64.43901035673187, "grad_norm": 1.4028273820877075, "learning_rate": 0.0007112197928653626, "loss": 0.4618, "step": 223990 }, { "epoch": 64.44188722669735, "grad_norm": 1.5172464847564697, "learning_rate": 0.0007111622554660529, "loss": 0.451, "step": 224000 }, { "epoch": 64.44476409666284, "grad_norm": 1.4671220779418945, "learning_rate": 0.0007111047180667434, "loss": 0.3394, "step": 224010 }, { "epoch": 64.44764096662831, "grad_norm": 1.8359084129333496, "learning_rate": 0.000711047180667434, "loss": 0.4652, "step": 224020 }, { "epoch": 64.45051783659379, "grad_norm": 1.7472147941589355, "learning_rate": 0.0007109896432681243, "loss": 0.4523, "step": 224030 }, { "epoch": 64.45339470655927, "grad_norm": 1.6046350002288818, "learning_rate": 0.0007109321058688147, "loss": 0.4566, "step": 224040 }, { "epoch": 64.45627157652474, "grad_norm": 1.2485768795013428, "learning_rate": 0.0007108745684695052, "loss": 0.4248, "step": 224050 }, { "epoch": 64.45914844649022, "grad_norm": 1.5901875495910645, "learning_rate": 0.0007108170310701957, "loss": 0.6158, "step": 224060 }, { "epoch": 64.4620253164557, "grad_norm": 1.8957842588424683, "learning_rate": 0.0007107594936708861, "loss": 0.5066, "step": 224070 }, { "epoch": 64.46490218642117, "grad_norm": 1.6506311893463135, "learning_rate": 0.0007107019562715766, "loss": 0.4075, "step": 224080 }, { "epoch": 64.46777905638665, "grad_norm": 1.1824322938919067, "learning_rate": 0.0007106444188722669, "loss": 0.5027, "step": 224090 }, { "epoch": 64.47065592635212, "grad_norm": 1.6543015241622925, "learning_rate": 0.0007105868814729575, "loss": 0.4578, "step": 224100 }, { "epoch": 64.47353279631761, "grad_norm": 1.2320133447647095, "learning_rate": 0.0007105293440736479, "loss": 0.4498, "step": 224110 }, { "epoch": 64.47640966628309, "grad_norm": 0.7442583441734314, "learning_rate": 0.0007104718066743383, "loss": 0.4766, "step": 224120 }, { "epoch": 64.47928653624857, "grad_norm": 1.1806843280792236, "learning_rate": 0.0007104142692750288, "loss": 0.5056, "step": 224130 }, { "epoch": 64.48216340621404, "grad_norm": 1.1447254419326782, "learning_rate": 0.0007103567318757193, "loss": 0.4154, "step": 224140 }, { "epoch": 64.48504027617952, "grad_norm": 1.1262266635894775, "learning_rate": 0.0007102991944764097, "loss": 0.4591, "step": 224150 }, { "epoch": 64.487917146145, "grad_norm": 2.4375712871551514, "learning_rate": 0.0007102416570771001, "loss": 0.5362, "step": 224160 }, { "epoch": 64.49079401611047, "grad_norm": 1.88148832321167, "learning_rate": 0.0007101841196777907, "loss": 0.4558, "step": 224170 }, { "epoch": 64.49367088607595, "grad_norm": 1.2445385456085205, "learning_rate": 0.000710126582278481, "loss": 0.3984, "step": 224180 }, { "epoch": 64.49654775604142, "grad_norm": 1.820469617843628, "learning_rate": 0.0007100690448791715, "loss": 0.4171, "step": 224190 }, { "epoch": 64.4994246260069, "grad_norm": 1.9386435747146606, "learning_rate": 0.0007100115074798618, "loss": 0.4408, "step": 224200 }, { "epoch": 64.50230149597238, "grad_norm": 0.7809199094772339, "learning_rate": 0.0007099539700805524, "loss": 0.37, "step": 224210 }, { "epoch": 64.50517836593787, "grad_norm": 2.1261847019195557, "learning_rate": 0.0007098964326812428, "loss": 0.5205, "step": 224220 }, { "epoch": 64.50805523590334, "grad_norm": 1.1255894899368286, "learning_rate": 0.0007098388952819332, "loss": 0.4319, "step": 224230 }, { "epoch": 64.51093210586882, "grad_norm": 1.898483157157898, "learning_rate": 0.0007097813578826238, "loss": 0.492, "step": 224240 }, { "epoch": 64.5138089758343, "grad_norm": 1.4305379390716553, "learning_rate": 0.0007097238204833142, "loss": 0.4273, "step": 224250 }, { "epoch": 64.51668584579977, "grad_norm": 2.055065155029297, "learning_rate": 0.0007096662830840046, "loss": 0.4414, "step": 224260 }, { "epoch": 64.51956271576525, "grad_norm": 1.8548529148101807, "learning_rate": 0.000709608745684695, "loss": 0.4565, "step": 224270 }, { "epoch": 64.52243958573072, "grad_norm": 1.1560590267181396, "learning_rate": 0.0007095512082853856, "loss": 0.552, "step": 224280 }, { "epoch": 64.5253164556962, "grad_norm": 1.231598973274231, "learning_rate": 0.0007094936708860759, "loss": 0.4801, "step": 224290 }, { "epoch": 64.52819332566168, "grad_norm": 1.4196645021438599, "learning_rate": 0.0007094361334867664, "loss": 0.554, "step": 224300 }, { "epoch": 64.53107019562715, "grad_norm": 2.7281839847564697, "learning_rate": 0.0007093785960874568, "loss": 0.5073, "step": 224310 }, { "epoch": 64.53394706559264, "grad_norm": 0.7675302028656006, "learning_rate": 0.0007093210586881473, "loss": 0.3801, "step": 224320 }, { "epoch": 64.53682393555812, "grad_norm": 0.8255363702774048, "learning_rate": 0.0007092635212888377, "loss": 0.4771, "step": 224330 }, { "epoch": 64.5397008055236, "grad_norm": 2.758450508117676, "learning_rate": 0.0007092059838895282, "loss": 0.5319, "step": 224340 }, { "epoch": 64.54257767548907, "grad_norm": 1.9482330083847046, "learning_rate": 0.0007091484464902187, "loss": 0.4363, "step": 224350 }, { "epoch": 64.54545454545455, "grad_norm": 1.3322690725326538, "learning_rate": 0.0007090909090909091, "loss": 0.4046, "step": 224360 }, { "epoch": 64.54833141542002, "grad_norm": 1.4418752193450928, "learning_rate": 0.0007090333716915996, "loss": 0.4475, "step": 224370 }, { "epoch": 64.5512082853855, "grad_norm": 1.6594150066375732, "learning_rate": 0.0007089758342922899, "loss": 0.4545, "step": 224380 }, { "epoch": 64.55408515535098, "grad_norm": 1.0712636709213257, "learning_rate": 0.0007089182968929805, "loss": 0.3837, "step": 224390 }, { "epoch": 64.55696202531645, "grad_norm": 1.959432601928711, "learning_rate": 0.0007088607594936709, "loss": 0.4266, "step": 224400 }, { "epoch": 64.55983889528193, "grad_norm": 1.5275987386703491, "learning_rate": 0.0007088032220943613, "loss": 0.4602, "step": 224410 }, { "epoch": 64.5627157652474, "grad_norm": 0.5914701819419861, "learning_rate": 0.0007087456846950518, "loss": 0.4315, "step": 224420 }, { "epoch": 64.5655926352129, "grad_norm": 0.9848629236221313, "learning_rate": 0.0007086881472957423, "loss": 0.5066, "step": 224430 }, { "epoch": 64.56846950517837, "grad_norm": 1.0743805170059204, "learning_rate": 0.0007086306098964326, "loss": 0.3228, "step": 224440 }, { "epoch": 64.57134637514385, "grad_norm": 1.061265468597412, "learning_rate": 0.0007085730724971231, "loss": 0.3902, "step": 224450 }, { "epoch": 64.57422324510932, "grad_norm": 2.321934938430786, "learning_rate": 0.0007085155350978137, "loss": 0.4306, "step": 224460 }, { "epoch": 64.5771001150748, "grad_norm": 2.0637574195861816, "learning_rate": 0.000708457997698504, "loss": 0.576, "step": 224470 }, { "epoch": 64.57997698504028, "grad_norm": 1.6500428915023804, "learning_rate": 0.0007084004602991945, "loss": 0.4638, "step": 224480 }, { "epoch": 64.58285385500575, "grad_norm": 1.3375498056411743, "learning_rate": 0.0007083429228998849, "loss": 0.417, "step": 224490 }, { "epoch": 64.58573072497123, "grad_norm": 1.7685935497283936, "learning_rate": 0.0007082853855005754, "loss": 0.4758, "step": 224500 }, { "epoch": 64.5886075949367, "grad_norm": 2.3013949394226074, "learning_rate": 0.0007082278481012658, "loss": 0.4778, "step": 224510 }, { "epoch": 64.59148446490218, "grad_norm": 1.2786524295806885, "learning_rate": 0.0007081703107019563, "loss": 0.4502, "step": 224520 }, { "epoch": 64.59436133486767, "grad_norm": 1.0563409328460693, "learning_rate": 0.0007081127733026467, "loss": 0.3704, "step": 224530 }, { "epoch": 64.59723820483315, "grad_norm": 1.3341044187545776, "learning_rate": 0.0007080552359033372, "loss": 0.4752, "step": 224540 }, { "epoch": 64.60011507479862, "grad_norm": 1.0497926473617554, "learning_rate": 0.0007079976985040277, "loss": 0.4654, "step": 224550 }, { "epoch": 64.6029919447641, "grad_norm": 0.7784249782562256, "learning_rate": 0.000707940161104718, "loss": 0.4326, "step": 224560 }, { "epoch": 64.60586881472958, "grad_norm": 0.8582676649093628, "learning_rate": 0.0007078826237054086, "loss": 0.4648, "step": 224570 }, { "epoch": 64.60874568469505, "grad_norm": 1.6997274160385132, "learning_rate": 0.000707825086306099, "loss": 0.3925, "step": 224580 }, { "epoch": 64.61162255466053, "grad_norm": 1.6820549964904785, "learning_rate": 0.0007077675489067894, "loss": 0.509, "step": 224590 }, { "epoch": 64.614499424626, "grad_norm": 0.8186821937561035, "learning_rate": 0.0007077100115074798, "loss": 0.4652, "step": 224600 }, { "epoch": 64.61737629459148, "grad_norm": 1.02463698387146, "learning_rate": 0.0007076524741081704, "loss": 0.4107, "step": 224610 }, { "epoch": 64.62025316455696, "grad_norm": 0.8919248580932617, "learning_rate": 0.0007075949367088607, "loss": 0.3908, "step": 224620 }, { "epoch": 64.62313003452243, "grad_norm": 0.9511719942092896, "learning_rate": 0.0007075373993095512, "loss": 0.4087, "step": 224630 }, { "epoch": 64.62600690448792, "grad_norm": 2.238276958465576, "learning_rate": 0.0007074798619102418, "loss": 0.4395, "step": 224640 }, { "epoch": 64.6288837744534, "grad_norm": 1.0494990348815918, "learning_rate": 0.0007074223245109321, "loss": 0.5423, "step": 224650 }, { "epoch": 64.63176064441888, "grad_norm": 1.14396071434021, "learning_rate": 0.0007073647871116226, "loss": 0.4798, "step": 224660 }, { "epoch": 64.63463751438435, "grad_norm": 1.178591012954712, "learning_rate": 0.000707307249712313, "loss": 0.6772, "step": 224670 }, { "epoch": 64.63751438434983, "grad_norm": 1.2959834337234497, "learning_rate": 0.0007072497123130035, "loss": 0.4184, "step": 224680 }, { "epoch": 64.6403912543153, "grad_norm": 1.5270668268203735, "learning_rate": 0.0007071921749136939, "loss": 0.5745, "step": 224690 }, { "epoch": 64.64326812428078, "grad_norm": 0.8234886527061462, "learning_rate": 0.0007071346375143844, "loss": 0.364, "step": 224700 }, { "epoch": 64.64614499424626, "grad_norm": 1.0779504776000977, "learning_rate": 0.0007070771001150748, "loss": 0.5058, "step": 224710 }, { "epoch": 64.64902186421173, "grad_norm": 1.2076008319854736, "learning_rate": 0.0007070195627157653, "loss": 0.4449, "step": 224720 }, { "epoch": 64.65189873417721, "grad_norm": 0.8968253135681152, "learning_rate": 0.0007069620253164557, "loss": 0.369, "step": 224730 }, { "epoch": 64.6547756041427, "grad_norm": 1.7371115684509277, "learning_rate": 0.0007069044879171461, "loss": 0.5086, "step": 224740 }, { "epoch": 64.65765247410818, "grad_norm": 1.1369421482086182, "learning_rate": 0.0007068469505178367, "loss": 0.4127, "step": 224750 }, { "epoch": 64.66052934407365, "grad_norm": 1.379685878753662, "learning_rate": 0.0007067894131185271, "loss": 0.3931, "step": 224760 }, { "epoch": 64.66340621403913, "grad_norm": 3.392810821533203, "learning_rate": 0.0007067318757192175, "loss": 0.4786, "step": 224770 }, { "epoch": 64.6662830840046, "grad_norm": 1.0326290130615234, "learning_rate": 0.0007066743383199079, "loss": 0.4511, "step": 224780 }, { "epoch": 64.66915995397008, "grad_norm": 1.4711127281188965, "learning_rate": 0.0007066168009205985, "loss": 0.5084, "step": 224790 }, { "epoch": 64.67203682393556, "grad_norm": 0.5900590419769287, "learning_rate": 0.0007065592635212888, "loss": 0.3493, "step": 224800 }, { "epoch": 64.67491369390103, "grad_norm": 0.7678167223930359, "learning_rate": 0.0007065017261219793, "loss": 0.3734, "step": 224810 }, { "epoch": 64.67779056386651, "grad_norm": 1.6286602020263672, "learning_rate": 0.0007064441887226698, "loss": 0.4539, "step": 224820 }, { "epoch": 64.68066743383199, "grad_norm": 1.6170580387115479, "learning_rate": 0.0007063866513233602, "loss": 0.4589, "step": 224830 }, { "epoch": 64.68354430379746, "grad_norm": 1.752600908279419, "learning_rate": 0.0007063291139240506, "loss": 0.4239, "step": 224840 }, { "epoch": 64.68642117376295, "grad_norm": 1.8898369073867798, "learning_rate": 0.0007062715765247411, "loss": 0.4259, "step": 224850 }, { "epoch": 64.68929804372843, "grad_norm": 1.4896256923675537, "learning_rate": 0.0007062140391254316, "loss": 0.4708, "step": 224860 }, { "epoch": 64.6921749136939, "grad_norm": 1.0392842292785645, "learning_rate": 0.000706156501726122, "loss": 0.4322, "step": 224870 }, { "epoch": 64.69505178365938, "grad_norm": 1.4264329671859741, "learning_rate": 0.0007060989643268125, "loss": 0.4056, "step": 224880 }, { "epoch": 64.69792865362486, "grad_norm": 1.0658071041107178, "learning_rate": 0.0007060414269275028, "loss": 0.3722, "step": 224890 }, { "epoch": 64.70080552359033, "grad_norm": 2.327045202255249, "learning_rate": 0.0007059838895281934, "loss": 0.4751, "step": 224900 }, { "epoch": 64.70368239355581, "grad_norm": 1.2246533632278442, "learning_rate": 0.0007059263521288838, "loss": 0.4436, "step": 224910 }, { "epoch": 64.70655926352129, "grad_norm": 1.3196572065353394, "learning_rate": 0.0007058688147295742, "loss": 0.5042, "step": 224920 }, { "epoch": 64.70943613348676, "grad_norm": 0.7185180187225342, "learning_rate": 0.0007058112773302647, "loss": 0.3797, "step": 224930 }, { "epoch": 64.71231300345224, "grad_norm": 1.1029419898986816, "learning_rate": 0.0007057537399309552, "loss": 0.4604, "step": 224940 }, { "epoch": 64.71518987341773, "grad_norm": 1.8637802600860596, "learning_rate": 0.0007056962025316455, "loss": 0.3726, "step": 224950 }, { "epoch": 64.7180667433832, "grad_norm": 1.659040927886963, "learning_rate": 0.000705638665132336, "loss": 0.4201, "step": 224960 }, { "epoch": 64.72094361334868, "grad_norm": 1.7669308185577393, "learning_rate": 0.0007055811277330266, "loss": 0.4745, "step": 224970 }, { "epoch": 64.72382048331416, "grad_norm": 0.9540249109268188, "learning_rate": 0.0007055235903337169, "loss": 0.5284, "step": 224980 }, { "epoch": 64.72669735327963, "grad_norm": 1.5411604642868042, "learning_rate": 0.0007054660529344074, "loss": 0.4236, "step": 224990 }, { "epoch": 64.72957422324511, "grad_norm": 0.7190196514129639, "learning_rate": 0.0007054085155350979, "loss": 0.4171, "step": 225000 }, { "epoch": 64.73245109321059, "grad_norm": 0.9587858319282532, "learning_rate": 0.0007053509781357883, "loss": 0.4365, "step": 225010 }, { "epoch": 64.73532796317606, "grad_norm": 1.3985381126403809, "learning_rate": 0.0007052934407364787, "loss": 0.4235, "step": 225020 }, { "epoch": 64.73820483314154, "grad_norm": 1.546674132347107, "learning_rate": 0.0007052359033371691, "loss": 0.4139, "step": 225030 }, { "epoch": 64.74108170310701, "grad_norm": 1.2622181177139282, "learning_rate": 0.0007051783659378596, "loss": 0.498, "step": 225040 }, { "epoch": 64.74395857307249, "grad_norm": 1.8265233039855957, "learning_rate": 0.0007051208285385501, "loss": 0.5503, "step": 225050 }, { "epoch": 64.74683544303798, "grad_norm": 1.7336695194244385, "learning_rate": 0.0007050632911392405, "loss": 0.412, "step": 225060 }, { "epoch": 64.74971231300346, "grad_norm": 1.541076421737671, "learning_rate": 0.0007050057537399309, "loss": 0.6269, "step": 225070 }, { "epoch": 64.75258918296893, "grad_norm": 1.519778847694397, "learning_rate": 0.0007049482163406215, "loss": 0.5664, "step": 225080 }, { "epoch": 64.75546605293441, "grad_norm": 1.1647717952728271, "learning_rate": 0.0007048906789413118, "loss": 0.4841, "step": 225090 }, { "epoch": 64.75834292289989, "grad_norm": 1.3758652210235596, "learning_rate": 0.0007048331415420023, "loss": 0.5645, "step": 225100 }, { "epoch": 64.76121979286536, "grad_norm": 0.750037431716919, "learning_rate": 0.0007047756041426928, "loss": 0.4756, "step": 225110 }, { "epoch": 64.76409666283084, "grad_norm": 1.685119867324829, "learning_rate": 0.0007047180667433832, "loss": 0.573, "step": 225120 }, { "epoch": 64.76697353279631, "grad_norm": 3.0230846405029297, "learning_rate": 0.0007046605293440736, "loss": 0.5729, "step": 225130 }, { "epoch": 64.76985040276179, "grad_norm": 0.8216833472251892, "learning_rate": 0.0007046029919447641, "loss": 0.4373, "step": 225140 }, { "epoch": 64.77272727272727, "grad_norm": 1.6860817670822144, "learning_rate": 0.0007045454545454546, "loss": 0.5173, "step": 225150 }, { "epoch": 64.77560414269276, "grad_norm": 0.8671481013298035, "learning_rate": 0.000704487917146145, "loss": 0.46, "step": 225160 }, { "epoch": 64.77848101265823, "grad_norm": 1.371974229812622, "learning_rate": 0.0007044303797468355, "loss": 0.4766, "step": 225170 }, { "epoch": 64.78135788262371, "grad_norm": 1.762184739112854, "learning_rate": 0.0007043728423475258, "loss": 0.4092, "step": 225180 }, { "epoch": 64.78423475258919, "grad_norm": 1.3608397245407104, "learning_rate": 0.0007043153049482164, "loss": 0.4974, "step": 225190 }, { "epoch": 64.78711162255466, "grad_norm": 0.8054991960525513, "learning_rate": 0.0007042577675489068, "loss": 0.4295, "step": 225200 }, { "epoch": 64.78998849252014, "grad_norm": 0.9125548005104065, "learning_rate": 0.0007042002301495972, "loss": 0.4042, "step": 225210 }, { "epoch": 64.79286536248561, "grad_norm": 1.3719562292099, "learning_rate": 0.0007041426927502877, "loss": 0.5062, "step": 225220 }, { "epoch": 64.79574223245109, "grad_norm": 0.9335287809371948, "learning_rate": 0.0007040851553509782, "loss": 0.4795, "step": 225230 }, { "epoch": 64.79861910241657, "grad_norm": 1.9789191484451294, "learning_rate": 0.0007040276179516685, "loss": 0.5016, "step": 225240 }, { "epoch": 64.80149597238204, "grad_norm": 0.8253608345985413, "learning_rate": 0.000703970080552359, "loss": 0.4368, "step": 225250 }, { "epoch": 64.80437284234753, "grad_norm": 2.063694715499878, "learning_rate": 0.0007039125431530496, "loss": 0.4299, "step": 225260 }, { "epoch": 64.80724971231301, "grad_norm": 1.0964537858963013, "learning_rate": 0.0007038550057537399, "loss": 0.4107, "step": 225270 }, { "epoch": 64.81012658227849, "grad_norm": 1.350242257118225, "learning_rate": 0.0007037974683544304, "loss": 0.4691, "step": 225280 }, { "epoch": 64.81300345224396, "grad_norm": 1.478055477142334, "learning_rate": 0.0007037399309551208, "loss": 0.4772, "step": 225290 }, { "epoch": 64.81588032220944, "grad_norm": 1.0018858909606934, "learning_rate": 0.0007036823935558113, "loss": 0.5499, "step": 225300 }, { "epoch": 64.81875719217491, "grad_norm": 0.8410069942474365, "learning_rate": 0.0007036248561565017, "loss": 0.5275, "step": 225310 }, { "epoch": 64.82163406214039, "grad_norm": 1.6312189102172852, "learning_rate": 0.0007035673187571922, "loss": 0.411, "step": 225320 }, { "epoch": 64.82451093210587, "grad_norm": 1.185123085975647, "learning_rate": 0.0007035097813578826, "loss": 0.4579, "step": 225330 }, { "epoch": 64.82738780207134, "grad_norm": 1.28237783908844, "learning_rate": 0.0007034522439585731, "loss": 0.4208, "step": 225340 }, { "epoch": 64.83026467203682, "grad_norm": 1.0604236125946045, "learning_rate": 0.0007033947065592636, "loss": 0.4342, "step": 225350 }, { "epoch": 64.8331415420023, "grad_norm": 1.6925082206726074, "learning_rate": 0.0007033371691599539, "loss": 0.5359, "step": 225360 }, { "epoch": 64.83601841196779, "grad_norm": 1.4020003080368042, "learning_rate": 0.0007032796317606445, "loss": 0.4588, "step": 225370 }, { "epoch": 64.83889528193326, "grad_norm": 0.8996297717094421, "learning_rate": 0.0007032220943613349, "loss": 0.4677, "step": 225380 }, { "epoch": 64.84177215189874, "grad_norm": 2.1920619010925293, "learning_rate": 0.0007031645569620253, "loss": 0.4216, "step": 225390 }, { "epoch": 64.84464902186421, "grad_norm": 1.0732978582382202, "learning_rate": 0.0007031070195627158, "loss": 0.52, "step": 225400 }, { "epoch": 64.84752589182969, "grad_norm": 2.1471571922302246, "learning_rate": 0.0007030494821634063, "loss": 0.5427, "step": 225410 }, { "epoch": 64.85040276179517, "grad_norm": 0.7848294377326965, "learning_rate": 0.0007029919447640966, "loss": 0.4939, "step": 225420 }, { "epoch": 64.85327963176064, "grad_norm": 1.8797972202301025, "learning_rate": 0.0007029344073647871, "loss": 0.5546, "step": 225430 }, { "epoch": 64.85615650172612, "grad_norm": 0.8033501505851746, "learning_rate": 0.0007028768699654777, "loss": 0.37, "step": 225440 }, { "epoch": 64.8590333716916, "grad_norm": 0.8243789672851562, "learning_rate": 0.000702819332566168, "loss": 0.4578, "step": 225450 }, { "epoch": 64.86191024165707, "grad_norm": 1.4279552698135376, "learning_rate": 0.0007027617951668585, "loss": 0.459, "step": 225460 }, { "epoch": 64.86478711162256, "grad_norm": 0.9470726251602173, "learning_rate": 0.0007027042577675489, "loss": 0.3333, "step": 225470 }, { "epoch": 64.86766398158804, "grad_norm": 1.2454012632369995, "learning_rate": 0.0007026467203682394, "loss": 0.4399, "step": 225480 }, { "epoch": 64.87054085155351, "grad_norm": 0.8804922103881836, "learning_rate": 0.0007025891829689298, "loss": 0.5831, "step": 225490 }, { "epoch": 64.87341772151899, "grad_norm": 1.0873761177062988, "learning_rate": 0.0007025316455696203, "loss": 0.4714, "step": 225500 }, { "epoch": 64.87629459148447, "grad_norm": 3.7780587673187256, "learning_rate": 0.0007024741081703107, "loss": 0.4916, "step": 225510 }, { "epoch": 64.87917146144994, "grad_norm": 1.2863949537277222, "learning_rate": 0.0007024165707710012, "loss": 0.3993, "step": 225520 }, { "epoch": 64.88204833141542, "grad_norm": 2.0271339416503906, "learning_rate": 0.0007023590333716916, "loss": 0.4473, "step": 225530 }, { "epoch": 64.8849252013809, "grad_norm": 1.0595978498458862, "learning_rate": 0.000702301495972382, "loss": 0.5608, "step": 225540 }, { "epoch": 64.88780207134637, "grad_norm": 1.3499020338058472, "learning_rate": 0.0007022439585730726, "loss": 0.3233, "step": 225550 }, { "epoch": 64.89067894131185, "grad_norm": 1.2573179006576538, "learning_rate": 0.000702186421173763, "loss": 0.5222, "step": 225560 }, { "epoch": 64.89355581127732, "grad_norm": 1.1861103773117065, "learning_rate": 0.0007021288837744534, "loss": 0.5448, "step": 225570 }, { "epoch": 64.89643268124281, "grad_norm": 1.3074681758880615, "learning_rate": 0.0007020713463751438, "loss": 0.5867, "step": 225580 }, { "epoch": 64.89930955120829, "grad_norm": 1.4525036811828613, "learning_rate": 0.0007020138089758344, "loss": 0.5065, "step": 225590 }, { "epoch": 64.90218642117377, "grad_norm": 0.8562361001968384, "learning_rate": 0.0007019562715765247, "loss": 0.3825, "step": 225600 }, { "epoch": 64.90506329113924, "grad_norm": 1.1336305141448975, "learning_rate": 0.0007018987341772152, "loss": 0.5571, "step": 225610 }, { "epoch": 64.90794016110472, "grad_norm": 1.9237531423568726, "learning_rate": 0.0007018411967779057, "loss": 0.4967, "step": 225620 }, { "epoch": 64.9108170310702, "grad_norm": 1.6846182346343994, "learning_rate": 0.0007017836593785961, "loss": 0.4288, "step": 225630 }, { "epoch": 64.91369390103567, "grad_norm": 1.1772525310516357, "learning_rate": 0.0007017261219792865, "loss": 0.4115, "step": 225640 }, { "epoch": 64.91657077100115, "grad_norm": 1.2667349576950073, "learning_rate": 0.000701668584579977, "loss": 0.3331, "step": 225650 }, { "epoch": 64.91944764096662, "grad_norm": 1.1480172872543335, "learning_rate": 0.0007016110471806675, "loss": 0.4094, "step": 225660 }, { "epoch": 64.9223245109321, "grad_norm": 2.089078664779663, "learning_rate": 0.0007015535097813579, "loss": 0.404, "step": 225670 }, { "epoch": 64.92520138089759, "grad_norm": 0.7736801505088806, "learning_rate": 0.0007014959723820484, "loss": 0.3603, "step": 225680 }, { "epoch": 64.92807825086307, "grad_norm": 1.2521474361419678, "learning_rate": 0.0007014384349827388, "loss": 0.4878, "step": 225690 }, { "epoch": 64.93095512082854, "grad_norm": 1.7225998640060425, "learning_rate": 0.0007013808975834293, "loss": 0.5167, "step": 225700 }, { "epoch": 64.93383199079402, "grad_norm": 1.8017970323562622, "learning_rate": 0.0007013233601841197, "loss": 0.4737, "step": 225710 }, { "epoch": 64.9367088607595, "grad_norm": 1.3245599269866943, "learning_rate": 0.0007012658227848101, "loss": 0.4229, "step": 225720 }, { "epoch": 64.93958573072497, "grad_norm": 1.313258171081543, "learning_rate": 0.0007012082853855006, "loss": 0.4566, "step": 225730 }, { "epoch": 64.94246260069045, "grad_norm": 1.1426554918289185, "learning_rate": 0.0007011507479861911, "loss": 0.5453, "step": 225740 }, { "epoch": 64.94533947065592, "grad_norm": 1.5595438480377197, "learning_rate": 0.0007010932105868814, "loss": 0.4573, "step": 225750 }, { "epoch": 64.9482163406214, "grad_norm": 1.1467164754867554, "learning_rate": 0.0007010356731875719, "loss": 0.4427, "step": 225760 }, { "epoch": 64.95109321058688, "grad_norm": 1.0254594087600708, "learning_rate": 0.0007009781357882625, "loss": 0.4487, "step": 225770 }, { "epoch": 64.95397008055235, "grad_norm": 0.7821159362792969, "learning_rate": 0.0007009205983889528, "loss": 0.512, "step": 225780 }, { "epoch": 64.95684695051784, "grad_norm": 1.1702381372451782, "learning_rate": 0.0007008630609896433, "loss": 0.531, "step": 225790 }, { "epoch": 64.95972382048332, "grad_norm": 2.0667364597320557, "learning_rate": 0.0007008055235903338, "loss": 0.4284, "step": 225800 }, { "epoch": 64.9626006904488, "grad_norm": 1.0556613206863403, "learning_rate": 0.0007007479861910242, "loss": 0.3335, "step": 225810 }, { "epoch": 64.96547756041427, "grad_norm": 1.2894363403320312, "learning_rate": 0.0007006904487917146, "loss": 0.5183, "step": 225820 }, { "epoch": 64.96835443037975, "grad_norm": 0.633885383605957, "learning_rate": 0.0007006329113924051, "loss": 0.4459, "step": 225830 }, { "epoch": 64.97123130034522, "grad_norm": 0.6199314594268799, "learning_rate": 0.0007005753739930955, "loss": 0.435, "step": 225840 }, { "epoch": 64.9741081703107, "grad_norm": 1.4165515899658203, "learning_rate": 0.000700517836593786, "loss": 0.5189, "step": 225850 }, { "epoch": 64.97698504027618, "grad_norm": 1.4140578508377075, "learning_rate": 0.0007004602991944763, "loss": 0.4547, "step": 225860 }, { "epoch": 64.97986191024165, "grad_norm": 2.158202886581421, "learning_rate": 0.0007004027617951668, "loss": 0.5055, "step": 225870 }, { "epoch": 64.98273878020713, "grad_norm": 1.3811914920806885, "learning_rate": 0.0007003452243958574, "loss": 0.5348, "step": 225880 }, { "epoch": 64.98561565017262, "grad_norm": 1.969186544418335, "learning_rate": 0.0007002876869965477, "loss": 0.4141, "step": 225890 }, { "epoch": 64.9884925201381, "grad_norm": 1.6936408281326294, "learning_rate": 0.0007002301495972382, "loss": 0.4901, "step": 225900 }, { "epoch": 64.99136939010357, "grad_norm": 2.2838597297668457, "learning_rate": 0.0007001726121979287, "loss": 0.5346, "step": 225910 }, { "epoch": 64.99424626006905, "grad_norm": 3.9529612064361572, "learning_rate": 0.0007001150747986191, "loss": 0.5012, "step": 225920 }, { "epoch": 64.99712313003452, "grad_norm": 1.2243657112121582, "learning_rate": 0.0007000575373993095, "loss": 0.4157, "step": 225930 }, { "epoch": 65.0, "grad_norm": 1.5214900970458984, "learning_rate": 0.0007, "loss": 0.5236, "step": 225940 }, { "epoch": 65.00287686996548, "grad_norm": 1.4202786684036255, "learning_rate": 0.0006999424626006904, "loss": 0.4232, "step": 225950 }, { "epoch": 65.00575373993095, "grad_norm": 1.0283615589141846, "learning_rate": 0.0006998849252013809, "loss": 0.5044, "step": 225960 }, { "epoch": 65.00863060989643, "grad_norm": 0.9519727826118469, "learning_rate": 0.0006998273878020714, "loss": 0.4584, "step": 225970 }, { "epoch": 65.0115074798619, "grad_norm": 1.0581200122833252, "learning_rate": 0.0006997698504027618, "loss": 0.4452, "step": 225980 }, { "epoch": 65.01438434982738, "grad_norm": 1.0854225158691406, "learning_rate": 0.0006997123130034523, "loss": 0.419, "step": 225990 }, { "epoch": 65.01726121979287, "grad_norm": 1.179819941520691, "learning_rate": 0.0006996547756041427, "loss": 0.3535, "step": 226000 }, { "epoch": 65.02013808975835, "grad_norm": 0.9223493337631226, "learning_rate": 0.0006995972382048331, "loss": 0.4185, "step": 226010 }, { "epoch": 65.02301495972382, "grad_norm": 0.9241218566894531, "learning_rate": 0.0006995397008055236, "loss": 0.4276, "step": 226020 }, { "epoch": 65.0258918296893, "grad_norm": 1.4449677467346191, "learning_rate": 0.0006994821634062141, "loss": 0.3889, "step": 226030 }, { "epoch": 65.02876869965478, "grad_norm": 1.236138939857483, "learning_rate": 0.0006994246260069044, "loss": 0.4889, "step": 226040 }, { "epoch": 65.03164556962025, "grad_norm": 0.7592193484306335, "learning_rate": 0.0006993670886075949, "loss": 0.4515, "step": 226050 }, { "epoch": 65.03452243958573, "grad_norm": 1.2817504405975342, "learning_rate": 0.0006993095512082855, "loss": 0.3517, "step": 226060 }, { "epoch": 65.0373993095512, "grad_norm": 1.0198652744293213, "learning_rate": 0.0006992520138089758, "loss": 0.3722, "step": 226070 }, { "epoch": 65.04027617951668, "grad_norm": 2.109079360961914, "learning_rate": 0.0006991944764096663, "loss": 0.3983, "step": 226080 }, { "epoch": 65.04315304948216, "grad_norm": 0.7605965733528137, "learning_rate": 0.0006991369390103568, "loss": 0.4116, "step": 226090 }, { "epoch": 65.04602991944765, "grad_norm": 2.166339159011841, "learning_rate": 0.0006990794016110472, "loss": 0.4178, "step": 226100 }, { "epoch": 65.04890678941312, "grad_norm": 1.3926008939743042, "learning_rate": 0.0006990218642117376, "loss": 0.4379, "step": 226110 }, { "epoch": 65.0517836593786, "grad_norm": 1.7964173555374146, "learning_rate": 0.0006989643268124281, "loss": 0.4586, "step": 226120 }, { "epoch": 65.05466052934408, "grad_norm": 1.5069595575332642, "learning_rate": 0.0006989067894131185, "loss": 0.4766, "step": 226130 }, { "epoch": 65.05753739930955, "grad_norm": 1.0980327129364014, "learning_rate": 0.000698849252013809, "loss": 0.5622, "step": 226140 }, { "epoch": 65.06041426927503, "grad_norm": 1.474513053894043, "learning_rate": 0.0006987917146144995, "loss": 0.32, "step": 226150 }, { "epoch": 65.0632911392405, "grad_norm": 1.8486226797103882, "learning_rate": 0.0006987341772151898, "loss": 0.502, "step": 226160 }, { "epoch": 65.06616800920598, "grad_norm": 1.2624903917312622, "learning_rate": 0.0006986766398158804, "loss": 0.4304, "step": 226170 }, { "epoch": 65.06904487917146, "grad_norm": 1.200558066368103, "learning_rate": 0.0006986191024165708, "loss": 0.4633, "step": 226180 }, { "epoch": 65.07192174913693, "grad_norm": 0.7963895797729492, "learning_rate": 0.0006985615650172612, "loss": 0.4126, "step": 226190 }, { "epoch": 65.07479861910241, "grad_norm": 1.5455433130264282, "learning_rate": 0.0006985040276179517, "loss": 0.3579, "step": 226200 }, { "epoch": 65.0776754890679, "grad_norm": 1.4574581384658813, "learning_rate": 0.0006984464902186422, "loss": 0.4454, "step": 226210 }, { "epoch": 65.08055235903338, "grad_norm": 1.1175494194030762, "learning_rate": 0.0006983889528193325, "loss": 0.373, "step": 226220 }, { "epoch": 65.08342922899885, "grad_norm": 0.9640952348709106, "learning_rate": 0.000698331415420023, "loss": 0.4101, "step": 226230 }, { "epoch": 65.08630609896433, "grad_norm": 2.1888797283172607, "learning_rate": 0.0006982738780207135, "loss": 0.5369, "step": 226240 }, { "epoch": 65.0891829689298, "grad_norm": 1.0966813564300537, "learning_rate": 0.0006982163406214039, "loss": 0.43, "step": 226250 }, { "epoch": 65.09205983889528, "grad_norm": 2.4858596324920654, "learning_rate": 0.0006981588032220944, "loss": 0.4964, "step": 226260 }, { "epoch": 65.09493670886076, "grad_norm": 1.0307896137237549, "learning_rate": 0.0006981012658227848, "loss": 0.3774, "step": 226270 }, { "epoch": 65.09781357882623, "grad_norm": 2.1173949241638184, "learning_rate": 0.0006980437284234753, "loss": 0.4781, "step": 226280 }, { "epoch": 65.10069044879171, "grad_norm": 1.05112886428833, "learning_rate": 0.0006979861910241657, "loss": 0.4014, "step": 226290 }, { "epoch": 65.10356731875719, "grad_norm": 1.5889735221862793, "learning_rate": 0.0006979286536248562, "loss": 0.4164, "step": 226300 }, { "epoch": 65.10644418872268, "grad_norm": 0.8867823481559753, "learning_rate": 0.0006978711162255466, "loss": 0.3264, "step": 226310 }, { "epoch": 65.10932105868815, "grad_norm": 0.8923580646514893, "learning_rate": 0.0006978135788262371, "loss": 0.4515, "step": 226320 }, { "epoch": 65.11219792865363, "grad_norm": 1.6305909156799316, "learning_rate": 0.0006977560414269275, "loss": 0.4268, "step": 226330 }, { "epoch": 65.1150747986191, "grad_norm": 0.7777064442634583, "learning_rate": 0.0006976985040276179, "loss": 0.3598, "step": 226340 }, { "epoch": 65.11795166858458, "grad_norm": 1.224030613899231, "learning_rate": 0.0006976409666283085, "loss": 0.4133, "step": 226350 }, { "epoch": 65.12082853855006, "grad_norm": 0.8784939646720886, "learning_rate": 0.0006975834292289989, "loss": 0.3874, "step": 226360 }, { "epoch": 65.12370540851553, "grad_norm": 1.0129352807998657, "learning_rate": 0.0006975258918296893, "loss": 0.5099, "step": 226370 }, { "epoch": 65.12658227848101, "grad_norm": 2.068880796432495, "learning_rate": 0.0006974683544303798, "loss": 0.4292, "step": 226380 }, { "epoch": 65.12945914844649, "grad_norm": 0.7549923658370972, "learning_rate": 0.0006974108170310703, "loss": 0.3782, "step": 226390 }, { "epoch": 65.13233601841196, "grad_norm": 1.3598616123199463, "learning_rate": 0.0006973532796317606, "loss": 0.4904, "step": 226400 }, { "epoch": 65.13521288837744, "grad_norm": 1.3356902599334717, "learning_rate": 0.0006972957422324511, "loss": 0.3997, "step": 226410 }, { "epoch": 65.13808975834293, "grad_norm": 1.2204113006591797, "learning_rate": 0.0006972382048331416, "loss": 0.4093, "step": 226420 }, { "epoch": 65.1409666283084, "grad_norm": 0.6277832388877869, "learning_rate": 0.000697180667433832, "loss": 0.3715, "step": 226430 }, { "epoch": 65.14384349827388, "grad_norm": 0.9888197779655457, "learning_rate": 0.0006971231300345224, "loss": 0.4099, "step": 226440 }, { "epoch": 65.14672036823936, "grad_norm": 1.5551490783691406, "learning_rate": 0.0006970655926352129, "loss": 0.4652, "step": 226450 }, { "epoch": 65.14959723820483, "grad_norm": 0.8203688263893127, "learning_rate": 0.0006970080552359034, "loss": 0.3824, "step": 226460 }, { "epoch": 65.15247410817031, "grad_norm": 0.92608243227005, "learning_rate": 0.0006969505178365938, "loss": 0.3313, "step": 226470 }, { "epoch": 65.15535097813579, "grad_norm": 1.0093554258346558, "learning_rate": 0.0006968929804372843, "loss": 0.4192, "step": 226480 }, { "epoch": 65.15822784810126, "grad_norm": 0.8776388168334961, "learning_rate": 0.0006968354430379747, "loss": 0.4903, "step": 226490 }, { "epoch": 65.16110471806674, "grad_norm": 0.7807565331459045, "learning_rate": 0.0006967779056386652, "loss": 0.3716, "step": 226500 }, { "epoch": 65.16398158803221, "grad_norm": 1.7331523895263672, "learning_rate": 0.0006967203682393556, "loss": 0.5094, "step": 226510 }, { "epoch": 65.1668584579977, "grad_norm": 1.3264154195785522, "learning_rate": 0.000696662830840046, "loss": 0.3349, "step": 226520 }, { "epoch": 65.16973532796318, "grad_norm": 1.048159122467041, "learning_rate": 0.0006966052934407365, "loss": 0.4658, "step": 226530 }, { "epoch": 65.17261219792866, "grad_norm": 1.1125881671905518, "learning_rate": 0.000696547756041427, "loss": 0.3125, "step": 226540 }, { "epoch": 65.17548906789413, "grad_norm": 0.9616109132766724, "learning_rate": 0.0006964902186421173, "loss": 0.4252, "step": 226550 }, { "epoch": 65.17836593785961, "grad_norm": 1.2045793533325195, "learning_rate": 0.0006964326812428078, "loss": 0.4635, "step": 226560 }, { "epoch": 65.18124280782509, "grad_norm": 1.1543740034103394, "learning_rate": 0.0006963751438434984, "loss": 0.4492, "step": 226570 }, { "epoch": 65.18411967779056, "grad_norm": 1.3729283809661865, "learning_rate": 0.0006963176064441887, "loss": 0.4849, "step": 226580 }, { "epoch": 65.18699654775604, "grad_norm": 1.5652081966400146, "learning_rate": 0.0006962600690448792, "loss": 0.3966, "step": 226590 }, { "epoch": 65.18987341772151, "grad_norm": 1.230871558189392, "learning_rate": 0.0006962025316455697, "loss": 0.5117, "step": 226600 }, { "epoch": 65.19275028768699, "grad_norm": 1.5802240371704102, "learning_rate": 0.0006961449942462601, "loss": 0.4547, "step": 226610 }, { "epoch": 65.19562715765247, "grad_norm": 1.3719127178192139, "learning_rate": 0.0006960874568469505, "loss": 0.4735, "step": 226620 }, { "epoch": 65.19850402761796, "grad_norm": 0.7786026000976562, "learning_rate": 0.000696029919447641, "loss": 0.4479, "step": 226630 }, { "epoch": 65.20138089758343, "grad_norm": 1.427894115447998, "learning_rate": 0.0006959723820483314, "loss": 0.4322, "step": 226640 }, { "epoch": 65.20425776754891, "grad_norm": 0.7937921285629272, "learning_rate": 0.0006959148446490219, "loss": 0.49, "step": 226650 }, { "epoch": 65.20713463751439, "grad_norm": 0.9415794014930725, "learning_rate": 0.0006958573072497124, "loss": 0.4139, "step": 226660 }, { "epoch": 65.21001150747986, "grad_norm": 2.0789947509765625, "learning_rate": 0.0006957997698504028, "loss": 0.3538, "step": 226670 }, { "epoch": 65.21288837744534, "grad_norm": 1.6841408014297485, "learning_rate": 0.0006957422324510933, "loss": 0.4359, "step": 226680 }, { "epoch": 65.21576524741081, "grad_norm": 0.9041151404380798, "learning_rate": 0.0006956846950517836, "loss": 0.4526, "step": 226690 }, { "epoch": 65.21864211737629, "grad_norm": 1.5765516757965088, "learning_rate": 0.0006956271576524741, "loss": 0.4031, "step": 226700 }, { "epoch": 65.22151898734177, "grad_norm": 1.5532710552215576, "learning_rate": 0.0006955696202531646, "loss": 0.3708, "step": 226710 }, { "epoch": 65.22439585730724, "grad_norm": 1.6189645528793335, "learning_rate": 0.000695512082853855, "loss": 0.3701, "step": 226720 }, { "epoch": 65.22727272727273, "grad_norm": 0.7115769982337952, "learning_rate": 0.0006954545454545454, "loss": 0.454, "step": 226730 }, { "epoch": 65.23014959723821, "grad_norm": 1.4502812623977661, "learning_rate": 0.0006953970080552359, "loss": 0.4533, "step": 226740 }, { "epoch": 65.23302646720369, "grad_norm": 0.9226937890052795, "learning_rate": 0.0006953394706559263, "loss": 0.48, "step": 226750 }, { "epoch": 65.23590333716916, "grad_norm": 2.371260643005371, "learning_rate": 0.0006952819332566168, "loss": 0.4354, "step": 226760 }, { "epoch": 65.23878020713464, "grad_norm": 0.9920676350593567, "learning_rate": 0.0006952243958573073, "loss": 0.4377, "step": 226770 }, { "epoch": 65.24165707710011, "grad_norm": 1.3060660362243652, "learning_rate": 0.0006951668584579977, "loss": 0.4613, "step": 226780 }, { "epoch": 65.24453394706559, "grad_norm": 1.4142578840255737, "learning_rate": 0.0006951093210586882, "loss": 0.3964, "step": 226790 }, { "epoch": 65.24741081703107, "grad_norm": 1.1410722732543945, "learning_rate": 0.0006950517836593786, "loss": 0.4406, "step": 226800 }, { "epoch": 65.25028768699654, "grad_norm": 2.2335968017578125, "learning_rate": 0.000694994246260069, "loss": 0.4508, "step": 226810 }, { "epoch": 65.25316455696202, "grad_norm": 1.313259482383728, "learning_rate": 0.0006949367088607595, "loss": 0.42, "step": 226820 }, { "epoch": 65.25604142692751, "grad_norm": 1.2700437307357788, "learning_rate": 0.00069487917146145, "loss": 0.4518, "step": 226830 }, { "epoch": 65.25891829689299, "grad_norm": 0.5431845784187317, "learning_rate": 0.0006948216340621403, "loss": 0.403, "step": 226840 }, { "epoch": 65.26179516685846, "grad_norm": 0.6544772386550903, "learning_rate": 0.0006947640966628308, "loss": 0.5813, "step": 226850 }, { "epoch": 65.26467203682394, "grad_norm": 1.198099970817566, "learning_rate": 0.0006947065592635214, "loss": 0.5076, "step": 226860 }, { "epoch": 65.26754890678941, "grad_norm": 1.975813627243042, "learning_rate": 0.0006946490218642117, "loss": 0.4301, "step": 226870 }, { "epoch": 65.27042577675489, "grad_norm": 1.2332497835159302, "learning_rate": 0.0006945914844649022, "loss": 0.4642, "step": 226880 }, { "epoch": 65.27330264672037, "grad_norm": 1.613479733467102, "learning_rate": 0.0006945339470655927, "loss": 0.4441, "step": 226890 }, { "epoch": 65.27617951668584, "grad_norm": 1.1408686637878418, "learning_rate": 0.0006944764096662831, "loss": 0.4362, "step": 226900 }, { "epoch": 65.27905638665132, "grad_norm": 0.8193832635879517, "learning_rate": 0.0006944188722669735, "loss": 0.5, "step": 226910 }, { "epoch": 65.2819332566168, "grad_norm": 1.2029296159744263, "learning_rate": 0.000694361334867664, "loss": 0.4483, "step": 226920 }, { "epoch": 65.28481012658227, "grad_norm": 1.383852481842041, "learning_rate": 0.0006943037974683544, "loss": 0.6131, "step": 226930 }, { "epoch": 65.28768699654776, "grad_norm": 0.9882615804672241, "learning_rate": 0.0006942462600690449, "loss": 0.3841, "step": 226940 }, { "epoch": 65.29056386651324, "grad_norm": 1.532726764678955, "learning_rate": 0.0006941887226697353, "loss": 0.5402, "step": 226950 }, { "epoch": 65.29344073647871, "grad_norm": 0.7156085968017578, "learning_rate": 0.0006941311852704257, "loss": 0.3971, "step": 226960 }, { "epoch": 65.29631760644419, "grad_norm": 0.755547046661377, "learning_rate": 0.0006940736478711163, "loss": 0.3937, "step": 226970 }, { "epoch": 65.29919447640967, "grad_norm": 1.1387181282043457, "learning_rate": 0.0006940161104718067, "loss": 0.4351, "step": 226980 }, { "epoch": 65.30207134637514, "grad_norm": 2.1378438472747803, "learning_rate": 0.0006939585730724971, "loss": 0.4157, "step": 226990 }, { "epoch": 65.30494821634062, "grad_norm": 1.6791696548461914, "learning_rate": 0.0006939010356731876, "loss": 0.4427, "step": 227000 }, { "epoch": 65.3078250863061, "grad_norm": 1.6779173612594604, "learning_rate": 0.0006938434982738781, "loss": 0.4004, "step": 227010 }, { "epoch": 65.31070195627157, "grad_norm": 0.687779426574707, "learning_rate": 0.0006937859608745684, "loss": 0.4057, "step": 227020 }, { "epoch": 65.31357882623705, "grad_norm": 1.638139247894287, "learning_rate": 0.0006937284234752589, "loss": 0.6518, "step": 227030 }, { "epoch": 65.31645569620254, "grad_norm": 1.1513036489486694, "learning_rate": 0.0006936708860759494, "loss": 0.4345, "step": 227040 }, { "epoch": 65.31933256616801, "grad_norm": 1.1397626399993896, "learning_rate": 0.0006936133486766398, "loss": 0.4984, "step": 227050 }, { "epoch": 65.32220943613349, "grad_norm": 1.964154839515686, "learning_rate": 0.0006935558112773303, "loss": 0.4749, "step": 227060 }, { "epoch": 65.32508630609897, "grad_norm": 0.8506055474281311, "learning_rate": 0.0006934982738780208, "loss": 0.4615, "step": 227070 }, { "epoch": 65.32796317606444, "grad_norm": 1.2842706441879272, "learning_rate": 0.0006934407364787112, "loss": 0.3748, "step": 227080 }, { "epoch": 65.33084004602992, "grad_norm": 2.101278066635132, "learning_rate": 0.0006933831990794016, "loss": 0.4527, "step": 227090 }, { "epoch": 65.3337169159954, "grad_norm": 0.9122946858406067, "learning_rate": 0.0006933256616800921, "loss": 0.4495, "step": 227100 }, { "epoch": 65.33659378596087, "grad_norm": 1.1484951972961426, "learning_rate": 0.0006932681242807825, "loss": 0.4288, "step": 227110 }, { "epoch": 65.33947065592635, "grad_norm": 1.8900363445281982, "learning_rate": 0.000693210586881473, "loss": 0.5745, "step": 227120 }, { "epoch": 65.34234752589182, "grad_norm": 1.3593109846115112, "learning_rate": 0.0006931530494821634, "loss": 0.5186, "step": 227130 }, { "epoch": 65.3452243958573, "grad_norm": 1.8326843976974487, "learning_rate": 0.0006930955120828538, "loss": 0.4559, "step": 227140 }, { "epoch": 65.34810126582279, "grad_norm": 1.1948753595352173, "learning_rate": 0.0006930379746835443, "loss": 0.328, "step": 227150 }, { "epoch": 65.35097813578827, "grad_norm": 0.9720699191093445, "learning_rate": 0.0006929804372842348, "loss": 0.4052, "step": 227160 }, { "epoch": 65.35385500575374, "grad_norm": 1.447630763053894, "learning_rate": 0.0006929228998849252, "loss": 0.5109, "step": 227170 }, { "epoch": 65.35673187571922, "grad_norm": 0.9035668969154358, "learning_rate": 0.0006928653624856157, "loss": 0.4309, "step": 227180 }, { "epoch": 65.3596087456847, "grad_norm": 1.2499792575836182, "learning_rate": 0.0006928078250863062, "loss": 0.3508, "step": 227190 }, { "epoch": 65.36248561565017, "grad_norm": 1.022497534751892, "learning_rate": 0.0006927502876869965, "loss": 0.5547, "step": 227200 }, { "epoch": 65.36536248561565, "grad_norm": 1.2353554964065552, "learning_rate": 0.000692692750287687, "loss": 0.4002, "step": 227210 }, { "epoch": 65.36823935558112, "grad_norm": 0.8670157194137573, "learning_rate": 0.0006926352128883775, "loss": 0.3765, "step": 227220 }, { "epoch": 65.3711162255466, "grad_norm": 0.9770886898040771, "learning_rate": 0.0006925776754890679, "loss": 0.395, "step": 227230 }, { "epoch": 65.37399309551208, "grad_norm": 0.8886497616767883, "learning_rate": 0.0006925201380897583, "loss": 0.4226, "step": 227240 }, { "epoch": 65.37686996547757, "grad_norm": 0.8238884210586548, "learning_rate": 0.0006924626006904488, "loss": 0.3512, "step": 227250 }, { "epoch": 65.37974683544304, "grad_norm": 1.714864730834961, "learning_rate": 0.0006924050632911393, "loss": 0.3938, "step": 227260 }, { "epoch": 65.38262370540852, "grad_norm": 1.5293316841125488, "learning_rate": 0.0006923475258918297, "loss": 0.561, "step": 227270 }, { "epoch": 65.385500575374, "grad_norm": 1.9485479593276978, "learning_rate": 0.0006922899884925202, "loss": 0.4104, "step": 227280 }, { "epoch": 65.38837744533947, "grad_norm": 1.3051611185073853, "learning_rate": 0.0006922324510932106, "loss": 0.4317, "step": 227290 }, { "epoch": 65.39125431530495, "grad_norm": 1.2041500806808472, "learning_rate": 0.0006921749136939011, "loss": 0.465, "step": 227300 }, { "epoch": 65.39413118527042, "grad_norm": 0.7376171350479126, "learning_rate": 0.0006921173762945915, "loss": 0.5063, "step": 227310 }, { "epoch": 65.3970080552359, "grad_norm": 1.211328148841858, "learning_rate": 0.0006920598388952819, "loss": 0.4823, "step": 227320 }, { "epoch": 65.39988492520138, "grad_norm": 1.1383748054504395, "learning_rate": 0.0006920023014959724, "loss": 0.448, "step": 227330 }, { "epoch": 65.40276179516685, "grad_norm": 1.4938899278640747, "learning_rate": 0.0006919447640966629, "loss": 0.5466, "step": 227340 }, { "epoch": 65.40563866513233, "grad_norm": 1.4555994272232056, "learning_rate": 0.0006918872266973532, "loss": 0.4345, "step": 227350 }, { "epoch": 65.40851553509782, "grad_norm": 0.9409940838813782, "learning_rate": 0.0006918296892980438, "loss": 0.3607, "step": 227360 }, { "epoch": 65.4113924050633, "grad_norm": 1.7469446659088135, "learning_rate": 0.0006917721518987343, "loss": 0.4481, "step": 227370 }, { "epoch": 65.41426927502877, "grad_norm": 2.7674765586853027, "learning_rate": 0.0006917146144994246, "loss": 0.5236, "step": 227380 }, { "epoch": 65.41714614499425, "grad_norm": 1.4905858039855957, "learning_rate": 0.0006916570771001151, "loss": 0.3723, "step": 227390 }, { "epoch": 65.42002301495972, "grad_norm": 2.000680923461914, "learning_rate": 0.0006915995397008056, "loss": 0.4572, "step": 227400 }, { "epoch": 65.4228998849252, "grad_norm": 0.9230098128318787, "learning_rate": 0.000691542002301496, "loss": 0.4573, "step": 227410 }, { "epoch": 65.42577675489068, "grad_norm": 1.2858716249465942, "learning_rate": 0.0006914844649021864, "loss": 0.5513, "step": 227420 }, { "epoch": 65.42865362485615, "grad_norm": 1.035530686378479, "learning_rate": 0.0006914269275028769, "loss": 0.4716, "step": 227430 }, { "epoch": 65.43153049482163, "grad_norm": 1.6701489686965942, "learning_rate": 0.0006913693901035673, "loss": 0.4116, "step": 227440 }, { "epoch": 65.4344073647871, "grad_norm": 1.3288252353668213, "learning_rate": 0.0006913118527042578, "loss": 0.4311, "step": 227450 }, { "epoch": 65.4372842347526, "grad_norm": 1.0019490718841553, "learning_rate": 0.0006912543153049483, "loss": 0.395, "step": 227460 }, { "epoch": 65.44016110471807, "grad_norm": 1.7470877170562744, "learning_rate": 0.0006911967779056387, "loss": 0.5026, "step": 227470 }, { "epoch": 65.44303797468355, "grad_norm": 0.8202452063560486, "learning_rate": 0.0006911392405063292, "loss": 0.3508, "step": 227480 }, { "epoch": 65.44591484464902, "grad_norm": 2.272230625152588, "learning_rate": 0.0006910817031070196, "loss": 0.5884, "step": 227490 }, { "epoch": 65.4487917146145, "grad_norm": 0.853845477104187, "learning_rate": 0.00069102416570771, "loss": 0.4349, "step": 227500 }, { "epoch": 65.45166858457998, "grad_norm": 0.7159543037414551, "learning_rate": 0.0006909666283084005, "loss": 0.4518, "step": 227510 }, { "epoch": 65.45454545454545, "grad_norm": 1.1781151294708252, "learning_rate": 0.0006909090909090909, "loss": 0.3839, "step": 227520 }, { "epoch": 65.45742232451093, "grad_norm": 1.826861023902893, "learning_rate": 0.0006908515535097813, "loss": 0.4092, "step": 227530 }, { "epoch": 65.4602991944764, "grad_norm": 1.144929051399231, "learning_rate": 0.0006907940161104718, "loss": 0.4864, "step": 227540 }, { "epoch": 65.46317606444188, "grad_norm": 1.1699258089065552, "learning_rate": 0.0006907364787111622, "loss": 0.3883, "step": 227550 }, { "epoch": 65.46605293440736, "grad_norm": 1.376559853553772, "learning_rate": 0.0006906789413118527, "loss": 0.4727, "step": 227560 }, { "epoch": 65.46892980437285, "grad_norm": 1.964626431465149, "learning_rate": 0.0006906214039125432, "loss": 0.4812, "step": 227570 }, { "epoch": 65.47180667433832, "grad_norm": 1.3892598152160645, "learning_rate": 0.0006905638665132336, "loss": 0.3875, "step": 227580 }, { "epoch": 65.4746835443038, "grad_norm": 1.0577354431152344, "learning_rate": 0.0006905063291139241, "loss": 0.4526, "step": 227590 }, { "epoch": 65.47756041426928, "grad_norm": 0.882854700088501, "learning_rate": 0.0006904487917146145, "loss": 0.4852, "step": 227600 }, { "epoch": 65.48043728423475, "grad_norm": 1.565834879875183, "learning_rate": 0.0006903912543153049, "loss": 0.4082, "step": 227610 }, { "epoch": 65.48331415420023, "grad_norm": 0.9194231033325195, "learning_rate": 0.0006903337169159954, "loss": 0.4046, "step": 227620 }, { "epoch": 65.4861910241657, "grad_norm": 1.9127298593521118, "learning_rate": 0.0006902761795166859, "loss": 0.4407, "step": 227630 }, { "epoch": 65.48906789413118, "grad_norm": 1.0443538427352905, "learning_rate": 0.0006902186421173762, "loss": 0.4049, "step": 227640 }, { "epoch": 65.49194476409666, "grad_norm": 0.9618143439292908, "learning_rate": 0.0006901611047180668, "loss": 0.3588, "step": 227650 }, { "epoch": 65.49482163406213, "grad_norm": 1.3172672986984253, "learning_rate": 0.0006901035673187573, "loss": 0.4015, "step": 227660 }, { "epoch": 65.49769850402762, "grad_norm": 1.795387625694275, "learning_rate": 0.0006900460299194476, "loss": 0.569, "step": 227670 }, { "epoch": 65.5005753739931, "grad_norm": 2.4670569896698, "learning_rate": 0.0006899884925201381, "loss": 0.4547, "step": 227680 }, { "epoch": 65.50345224395858, "grad_norm": 0.8792927861213684, "learning_rate": 0.0006899309551208286, "loss": 0.4095, "step": 227690 }, { "epoch": 65.50632911392405, "grad_norm": 1.4920748472213745, "learning_rate": 0.000689873417721519, "loss": 0.4253, "step": 227700 }, { "epoch": 65.50920598388953, "grad_norm": 0.6577582955360413, "learning_rate": 0.0006898158803222094, "loss": 0.3886, "step": 227710 }, { "epoch": 65.512082853855, "grad_norm": 1.918544054031372, "learning_rate": 0.0006897583429228999, "loss": 0.5376, "step": 227720 }, { "epoch": 65.51495972382048, "grad_norm": 1.6892069578170776, "learning_rate": 0.0006897008055235903, "loss": 0.4553, "step": 227730 }, { "epoch": 65.51783659378596, "grad_norm": 1.113801121711731, "learning_rate": 0.0006896432681242808, "loss": 0.542, "step": 227740 }, { "epoch": 65.52071346375143, "grad_norm": 0.9068315625190735, "learning_rate": 0.0006895857307249712, "loss": 0.4919, "step": 227750 }, { "epoch": 65.52359033371691, "grad_norm": 0.6709926724433899, "learning_rate": 0.0006895281933256617, "loss": 0.3762, "step": 227760 }, { "epoch": 65.52646720368239, "grad_norm": 0.9427375793457031, "learning_rate": 0.0006894706559263522, "loss": 0.5277, "step": 227770 }, { "epoch": 65.52934407364788, "grad_norm": 1.1095702648162842, "learning_rate": 0.0006894131185270426, "loss": 0.4256, "step": 227780 }, { "epoch": 65.53222094361335, "grad_norm": 1.2303792238235474, "learning_rate": 0.000689355581127733, "loss": 0.4133, "step": 227790 }, { "epoch": 65.53509781357883, "grad_norm": 1.3282526731491089, "learning_rate": 0.0006892980437284235, "loss": 0.4452, "step": 227800 }, { "epoch": 65.5379746835443, "grad_norm": 1.1017048358917236, "learning_rate": 0.000689240506329114, "loss": 0.3875, "step": 227810 }, { "epoch": 65.54085155350978, "grad_norm": 1.381222128868103, "learning_rate": 0.0006891829689298043, "loss": 0.4715, "step": 227820 }, { "epoch": 65.54372842347526, "grad_norm": 1.2605831623077393, "learning_rate": 0.0006891254315304948, "loss": 0.4256, "step": 227830 }, { "epoch": 65.54660529344073, "grad_norm": 1.3154319524765015, "learning_rate": 0.0006890678941311853, "loss": 0.4497, "step": 227840 }, { "epoch": 65.54948216340621, "grad_norm": 1.077881097793579, "learning_rate": 0.0006890103567318757, "loss": 0.422, "step": 227850 }, { "epoch": 65.55235903337169, "grad_norm": 1.1611762046813965, "learning_rate": 0.0006889528193325661, "loss": 0.4328, "step": 227860 }, { "epoch": 65.55523590333716, "grad_norm": 1.5093668699264526, "learning_rate": 0.0006888952819332567, "loss": 0.3478, "step": 227870 }, { "epoch": 65.55811277330265, "grad_norm": 1.2973443269729614, "learning_rate": 0.0006888377445339471, "loss": 0.4335, "step": 227880 }, { "epoch": 65.56098964326813, "grad_norm": 1.707323431968689, "learning_rate": 0.0006887802071346375, "loss": 0.4566, "step": 227890 }, { "epoch": 65.5638665132336, "grad_norm": 1.4395508766174316, "learning_rate": 0.000688722669735328, "loss": 0.4078, "step": 227900 }, { "epoch": 65.56674338319908, "grad_norm": 0.7189493179321289, "learning_rate": 0.0006886651323360184, "loss": 0.4549, "step": 227910 }, { "epoch": 65.56962025316456, "grad_norm": 1.2448703050613403, "learning_rate": 0.0006886075949367089, "loss": 0.4395, "step": 227920 }, { "epoch": 65.57249712313003, "grad_norm": 2.0828142166137695, "learning_rate": 0.0006885500575373993, "loss": 0.4815, "step": 227930 }, { "epoch": 65.57537399309551, "grad_norm": 0.7255480885505676, "learning_rate": 0.0006884925201380897, "loss": 0.3391, "step": 227940 }, { "epoch": 65.57825086306099, "grad_norm": 2.1825766563415527, "learning_rate": 0.0006884349827387802, "loss": 0.47, "step": 227950 }, { "epoch": 65.58112773302646, "grad_norm": 1.9311429262161255, "learning_rate": 0.0006883774453394707, "loss": 0.4225, "step": 227960 }, { "epoch": 65.58400460299194, "grad_norm": 1.8939058780670166, "learning_rate": 0.000688319907940161, "loss": 0.4746, "step": 227970 }, { "epoch": 65.58688147295742, "grad_norm": 0.8967658281326294, "learning_rate": 0.0006882623705408516, "loss": 0.4464, "step": 227980 }, { "epoch": 65.5897583429229, "grad_norm": 1.1304458379745483, "learning_rate": 0.0006882048331415421, "loss": 0.4392, "step": 227990 }, { "epoch": 65.59263521288838, "grad_norm": 1.4488346576690674, "learning_rate": 0.0006881472957422324, "loss": 0.363, "step": 228000 }, { "epoch": 65.59551208285386, "grad_norm": 1.9161916971206665, "learning_rate": 0.0006880897583429229, "loss": 0.4705, "step": 228010 }, { "epoch": 65.59838895281933, "grad_norm": 1.7191781997680664, "learning_rate": 0.0006880322209436134, "loss": 0.549, "step": 228020 }, { "epoch": 65.60126582278481, "grad_norm": 1.1122792959213257, "learning_rate": 0.0006879746835443038, "loss": 0.5054, "step": 228030 }, { "epoch": 65.60414269275029, "grad_norm": 1.3843460083007812, "learning_rate": 0.0006879171461449942, "loss": 0.4892, "step": 228040 }, { "epoch": 65.60701956271576, "grad_norm": 1.1045348644256592, "learning_rate": 0.0006878596087456848, "loss": 0.4488, "step": 228050 }, { "epoch": 65.60989643268124, "grad_norm": 0.9332173466682434, "learning_rate": 0.0006878020713463752, "loss": 0.5233, "step": 228060 }, { "epoch": 65.61277330264672, "grad_norm": 0.7459000945091248, "learning_rate": 0.0006877445339470656, "loss": 0.3411, "step": 228070 }, { "epoch": 65.61565017261219, "grad_norm": 0.9467480778694153, "learning_rate": 0.0006876869965477561, "loss": 0.4849, "step": 228080 }, { "epoch": 65.61852704257768, "grad_norm": 1.1051278114318848, "learning_rate": 0.0006876294591484465, "loss": 0.3998, "step": 228090 }, { "epoch": 65.62140391254316, "grad_norm": 1.398488163948059, "learning_rate": 0.000687571921749137, "loss": 0.508, "step": 228100 }, { "epoch": 65.62428078250863, "grad_norm": 1.079211711883545, "learning_rate": 0.0006875143843498274, "loss": 0.3545, "step": 228110 }, { "epoch": 65.62715765247411, "grad_norm": 1.378770351409912, "learning_rate": 0.0006874568469505178, "loss": 0.4278, "step": 228120 }, { "epoch": 65.63003452243959, "grad_norm": 1.2530144453048706, "learning_rate": 0.0006873993095512083, "loss": 0.4333, "step": 228130 }, { "epoch": 65.63291139240506, "grad_norm": 0.9986973404884338, "learning_rate": 0.0006873417721518988, "loss": 0.47, "step": 228140 }, { "epoch": 65.63578826237054, "grad_norm": 0.5948948860168457, "learning_rate": 0.0006872842347525891, "loss": 0.4417, "step": 228150 }, { "epoch": 65.63866513233602, "grad_norm": 1.8277205228805542, "learning_rate": 0.0006872266973532797, "loss": 0.3414, "step": 228160 }, { "epoch": 65.64154200230149, "grad_norm": 1.6256043910980225, "learning_rate": 0.0006871691599539702, "loss": 0.3843, "step": 228170 }, { "epoch": 65.64441887226697, "grad_norm": 1.297705054283142, "learning_rate": 0.0006871116225546605, "loss": 0.5064, "step": 228180 }, { "epoch": 65.64729574223244, "grad_norm": 1.2368897199630737, "learning_rate": 0.000687054085155351, "loss": 0.5029, "step": 228190 }, { "epoch": 65.65017261219793, "grad_norm": 0.8051905035972595, "learning_rate": 0.0006869965477560415, "loss": 0.4325, "step": 228200 }, { "epoch": 65.65304948216341, "grad_norm": 1.6547216176986694, "learning_rate": 0.0006869390103567319, "loss": 0.5296, "step": 228210 }, { "epoch": 65.65592635212889, "grad_norm": 1.4130330085754395, "learning_rate": 0.0006868814729574223, "loss": 0.4209, "step": 228220 }, { "epoch": 65.65880322209436, "grad_norm": 1.212417721748352, "learning_rate": 0.0006868239355581128, "loss": 0.4253, "step": 228230 }, { "epoch": 65.66168009205984, "grad_norm": 1.064538598060608, "learning_rate": 0.0006867663981588032, "loss": 0.4624, "step": 228240 }, { "epoch": 65.66455696202532, "grad_norm": 1.8571808338165283, "learning_rate": 0.0006867088607594937, "loss": 0.4596, "step": 228250 }, { "epoch": 65.66743383199079, "grad_norm": 1.6667205095291138, "learning_rate": 0.0006866513233601842, "loss": 0.4782, "step": 228260 }, { "epoch": 65.67031070195627, "grad_norm": 2.143439769744873, "learning_rate": 0.0006865937859608746, "loss": 0.4282, "step": 228270 }, { "epoch": 65.67318757192174, "grad_norm": 1.0766847133636475, "learning_rate": 0.0006865362485615651, "loss": 0.3606, "step": 228280 }, { "epoch": 65.67606444188722, "grad_norm": 1.3326003551483154, "learning_rate": 0.0006864787111622555, "loss": 0.4478, "step": 228290 }, { "epoch": 65.67894131185271, "grad_norm": 0.971407949924469, "learning_rate": 0.0006864211737629459, "loss": 0.4051, "step": 228300 }, { "epoch": 65.68181818181819, "grad_norm": 0.9806607961654663, "learning_rate": 0.0006863636363636364, "loss": 0.383, "step": 228310 }, { "epoch": 65.68469505178366, "grad_norm": 2.2981820106506348, "learning_rate": 0.0006863060989643268, "loss": 0.4053, "step": 228320 }, { "epoch": 65.68757192174914, "grad_norm": 2.4314610958099365, "learning_rate": 0.0006862485615650172, "loss": 0.6624, "step": 228330 }, { "epoch": 65.69044879171462, "grad_norm": 2.235496759414673, "learning_rate": 0.0006861910241657078, "loss": 0.4959, "step": 228340 }, { "epoch": 65.69332566168009, "grad_norm": 1.2294944524765015, "learning_rate": 0.0006861334867663981, "loss": 0.4384, "step": 228350 }, { "epoch": 65.69620253164557, "grad_norm": 1.477243423461914, "learning_rate": 0.0006860759493670886, "loss": 0.4563, "step": 228360 }, { "epoch": 65.69907940161104, "grad_norm": 1.5683695077896118, "learning_rate": 0.000686018411967779, "loss": 0.3348, "step": 228370 }, { "epoch": 65.70195627157652, "grad_norm": 1.2852380275726318, "learning_rate": 0.0006859608745684695, "loss": 0.4787, "step": 228380 }, { "epoch": 65.704833141542, "grad_norm": 1.1796703338623047, "learning_rate": 0.00068590333716916, "loss": 0.4415, "step": 228390 }, { "epoch": 65.70771001150747, "grad_norm": 1.0668933391571045, "learning_rate": 0.0006858457997698504, "loss": 0.5131, "step": 228400 }, { "epoch": 65.71058688147296, "grad_norm": 1.6882814168930054, "learning_rate": 0.0006857882623705408, "loss": 0.4693, "step": 228410 }, { "epoch": 65.71346375143844, "grad_norm": 1.1151410341262817, "learning_rate": 0.0006857307249712313, "loss": 0.4548, "step": 228420 }, { "epoch": 65.71634062140392, "grad_norm": 1.3698689937591553, "learning_rate": 0.0006856731875719218, "loss": 0.4304, "step": 228430 }, { "epoch": 65.71921749136939, "grad_norm": 1.817036747932434, "learning_rate": 0.0006856156501726121, "loss": 0.4385, "step": 228440 }, { "epoch": 65.72209436133487, "grad_norm": 1.1647840738296509, "learning_rate": 0.0006855581127733027, "loss": 0.5832, "step": 228450 }, { "epoch": 65.72497123130034, "grad_norm": 1.6585086584091187, "learning_rate": 0.0006855005753739932, "loss": 0.4185, "step": 228460 }, { "epoch": 65.72784810126582, "grad_norm": 0.9182495474815369, "learning_rate": 0.0006854430379746835, "loss": 0.5129, "step": 228470 }, { "epoch": 65.7307249712313, "grad_norm": 1.1491069793701172, "learning_rate": 0.000685385500575374, "loss": 0.4953, "step": 228480 }, { "epoch": 65.73360184119677, "grad_norm": 1.0452030897140503, "learning_rate": 0.0006853279631760645, "loss": 0.3539, "step": 228490 }, { "epoch": 65.73647871116225, "grad_norm": 1.8669559955596924, "learning_rate": 0.0006852704257767549, "loss": 0.6563, "step": 228500 }, { "epoch": 65.73935558112774, "grad_norm": 1.9104437828063965, "learning_rate": 0.0006852128883774453, "loss": 0.4123, "step": 228510 }, { "epoch": 65.74223245109322, "grad_norm": 0.8098231554031372, "learning_rate": 0.0006851553509781358, "loss": 0.4714, "step": 228520 }, { "epoch": 65.74510932105869, "grad_norm": 0.8109490871429443, "learning_rate": 0.0006850978135788262, "loss": 0.4563, "step": 228530 }, { "epoch": 65.74798619102417, "grad_norm": 1.5578360557556152, "learning_rate": 0.0006850402761795167, "loss": 0.4762, "step": 228540 }, { "epoch": 65.75086306098964, "grad_norm": 1.0167551040649414, "learning_rate": 0.0006849827387802071, "loss": 0.5488, "step": 228550 }, { "epoch": 65.75373993095512, "grad_norm": 1.317400574684143, "learning_rate": 0.0006849252013808976, "loss": 0.4298, "step": 228560 }, { "epoch": 65.7566168009206, "grad_norm": 1.2538365125656128, "learning_rate": 0.0006848676639815881, "loss": 0.5057, "step": 228570 }, { "epoch": 65.75949367088607, "grad_norm": 0.7139433026313782, "learning_rate": 0.0006848101265822785, "loss": 0.4195, "step": 228580 }, { "epoch": 65.76237054085155, "grad_norm": 1.8112703561782837, "learning_rate": 0.0006847525891829689, "loss": 0.3954, "step": 228590 }, { "epoch": 65.76524741081703, "grad_norm": 1.2945261001586914, "learning_rate": 0.0006846950517836594, "loss": 0.3939, "step": 228600 }, { "epoch": 65.7681242807825, "grad_norm": 0.9185485243797302, "learning_rate": 0.0006846375143843499, "loss": 0.4352, "step": 228610 }, { "epoch": 65.77100115074799, "grad_norm": 1.1022270917892456, "learning_rate": 0.0006845799769850402, "loss": 0.3754, "step": 228620 }, { "epoch": 65.77387802071347, "grad_norm": 1.1276410818099976, "learning_rate": 0.0006845224395857308, "loss": 0.4276, "step": 228630 }, { "epoch": 65.77675489067894, "grad_norm": 1.4773505926132202, "learning_rate": 0.0006844649021864212, "loss": 0.3668, "step": 228640 }, { "epoch": 65.77963176064442, "grad_norm": 1.8871777057647705, "learning_rate": 0.0006844073647871116, "loss": 0.4738, "step": 228650 }, { "epoch": 65.7825086306099, "grad_norm": 0.6311337947845459, "learning_rate": 0.000684349827387802, "loss": 0.3916, "step": 228660 }, { "epoch": 65.78538550057537, "grad_norm": 1.4725415706634521, "learning_rate": 0.0006842922899884926, "loss": 0.5015, "step": 228670 }, { "epoch": 65.78826237054085, "grad_norm": 0.7394592761993408, "learning_rate": 0.000684234752589183, "loss": 0.5395, "step": 228680 }, { "epoch": 65.79113924050633, "grad_norm": 0.7863039374351501, "learning_rate": 0.0006841772151898734, "loss": 0.3531, "step": 228690 }, { "epoch": 65.7940161104718, "grad_norm": 0.5959156155586243, "learning_rate": 0.0006841196777905639, "loss": 0.4028, "step": 228700 }, { "epoch": 65.79689298043728, "grad_norm": 0.7335093021392822, "learning_rate": 0.0006840621403912543, "loss": 0.5026, "step": 228710 }, { "epoch": 65.79976985040277, "grad_norm": 0.8801143765449524, "learning_rate": 0.0006840046029919448, "loss": 0.4677, "step": 228720 }, { "epoch": 65.80264672036824, "grad_norm": 1.0420112609863281, "learning_rate": 0.0006839470655926352, "loss": 0.4753, "step": 228730 }, { "epoch": 65.80552359033372, "grad_norm": 1.0630873441696167, "learning_rate": 0.0006838895281933257, "loss": 0.4297, "step": 228740 }, { "epoch": 65.8084004602992, "grad_norm": 1.1644576787948608, "learning_rate": 0.0006838319907940161, "loss": 0.5878, "step": 228750 }, { "epoch": 65.81127733026467, "grad_norm": 1.0885804891586304, "learning_rate": 0.0006837744533947066, "loss": 0.528, "step": 228760 }, { "epoch": 65.81415420023015, "grad_norm": 1.1628262996673584, "learning_rate": 0.000683716915995397, "loss": 0.3903, "step": 228770 }, { "epoch": 65.81703107019563, "grad_norm": 1.9816913604736328, "learning_rate": 0.0006836593785960875, "loss": 0.5324, "step": 228780 }, { "epoch": 65.8199079401611, "grad_norm": 0.8553750514984131, "learning_rate": 0.000683601841196778, "loss": 0.4219, "step": 228790 }, { "epoch": 65.82278481012658, "grad_norm": 0.6944937705993652, "learning_rate": 0.0006835443037974683, "loss": 0.4069, "step": 228800 }, { "epoch": 65.82566168009205, "grad_norm": 1.1369657516479492, "learning_rate": 0.0006834867663981588, "loss": 0.4428, "step": 228810 }, { "epoch": 65.82853855005754, "grad_norm": 2.100294351577759, "learning_rate": 0.0006834292289988493, "loss": 0.3507, "step": 228820 }, { "epoch": 65.83141542002302, "grad_norm": 0.9015583395957947, "learning_rate": 0.0006833716915995397, "loss": 0.5108, "step": 228830 }, { "epoch": 65.8342922899885, "grad_norm": 1.191933512687683, "learning_rate": 0.0006833141542002301, "loss": 0.4417, "step": 228840 }, { "epoch": 65.83716915995397, "grad_norm": 1.1142364740371704, "learning_rate": 0.0006832566168009207, "loss": 0.4377, "step": 228850 }, { "epoch": 65.84004602991945, "grad_norm": 0.9050515294075012, "learning_rate": 0.000683199079401611, "loss": 0.3845, "step": 228860 }, { "epoch": 65.84292289988493, "grad_norm": 0.8058263659477234, "learning_rate": 0.0006831415420023015, "loss": 0.5034, "step": 228870 }, { "epoch": 65.8457997698504, "grad_norm": 3.0579183101654053, "learning_rate": 0.000683084004602992, "loss": 0.4962, "step": 228880 }, { "epoch": 65.84867663981588, "grad_norm": 1.224419355392456, "learning_rate": 0.0006830264672036824, "loss": 0.511, "step": 228890 }, { "epoch": 65.85155350978135, "grad_norm": 1.2811877727508545, "learning_rate": 0.0006829689298043729, "loss": 0.4907, "step": 228900 }, { "epoch": 65.85443037974683, "grad_norm": 2.0545406341552734, "learning_rate": 0.0006829113924050633, "loss": 0.518, "step": 228910 }, { "epoch": 65.8573072497123, "grad_norm": 1.198854684829712, "learning_rate": 0.0006828538550057537, "loss": 0.5109, "step": 228920 }, { "epoch": 65.8601841196778, "grad_norm": 0.8836497068405151, "learning_rate": 0.0006827963176064442, "loss": 0.4006, "step": 228930 }, { "epoch": 65.86306098964327, "grad_norm": 2.095982074737549, "learning_rate": 0.0006827387802071347, "loss": 0.5299, "step": 228940 }, { "epoch": 65.86593785960875, "grad_norm": 0.8347452878952026, "learning_rate": 0.000682681242807825, "loss": 0.4742, "step": 228950 }, { "epoch": 65.86881472957423, "grad_norm": 1.1306116580963135, "learning_rate": 0.0006826237054085156, "loss": 0.3945, "step": 228960 }, { "epoch": 65.8716915995397, "grad_norm": 0.8919028043746948, "learning_rate": 0.0006825661680092061, "loss": 0.4459, "step": 228970 }, { "epoch": 65.87456846950518, "grad_norm": 0.8477731347084045, "learning_rate": 0.0006825086306098964, "loss": 0.4749, "step": 228980 }, { "epoch": 65.87744533947065, "grad_norm": 0.7986733317375183, "learning_rate": 0.0006824510932105869, "loss": 0.452, "step": 228990 }, { "epoch": 65.88032220943613, "grad_norm": 0.9183996319770813, "learning_rate": 0.0006823935558112774, "loss": 0.416, "step": 229000 }, { "epoch": 65.8831990794016, "grad_norm": 0.82270348072052, "learning_rate": 0.0006823360184119678, "loss": 0.43, "step": 229010 }, { "epoch": 65.88607594936708, "grad_norm": 1.1862993240356445, "learning_rate": 0.0006822784810126582, "loss": 0.3973, "step": 229020 }, { "epoch": 65.88895281933257, "grad_norm": 0.9804985523223877, "learning_rate": 0.0006822209436133488, "loss": 0.3946, "step": 229030 }, { "epoch": 65.89182968929805, "grad_norm": 0.7946248054504395, "learning_rate": 0.0006821634062140391, "loss": 0.4408, "step": 229040 }, { "epoch": 65.89470655926353, "grad_norm": 0.9486145973205566, "learning_rate": 0.0006821058688147296, "loss": 0.4975, "step": 229050 }, { "epoch": 65.897583429229, "grad_norm": 1.0477062463760376, "learning_rate": 0.00068204833141542, "loss": 0.4495, "step": 229060 }, { "epoch": 65.90046029919448, "grad_norm": 2.6843466758728027, "learning_rate": 0.0006819907940161105, "loss": 0.5072, "step": 229070 }, { "epoch": 65.90333716915995, "grad_norm": 0.9472923874855042, "learning_rate": 0.000681933256616801, "loss": 0.4522, "step": 229080 }, { "epoch": 65.90621403912543, "grad_norm": 2.0532987117767334, "learning_rate": 0.0006818757192174914, "loss": 0.4482, "step": 229090 }, { "epoch": 65.9090909090909, "grad_norm": 2.6309032440185547, "learning_rate": 0.0006818181818181818, "loss": 0.5028, "step": 229100 }, { "epoch": 65.91196777905638, "grad_norm": 1.4468837976455688, "learning_rate": 0.0006817606444188723, "loss": 0.529, "step": 229110 }, { "epoch": 65.91484464902186, "grad_norm": 0.9650824666023254, "learning_rate": 0.0006817031070195628, "loss": 0.4357, "step": 229120 }, { "epoch": 65.91772151898734, "grad_norm": 1.5449450016021729, "learning_rate": 0.0006816455696202531, "loss": 0.4859, "step": 229130 }, { "epoch": 65.92059838895283, "grad_norm": 1.1832091808319092, "learning_rate": 0.0006815880322209437, "loss": 0.511, "step": 229140 }, { "epoch": 65.9234752589183, "grad_norm": 1.9267851114273071, "learning_rate": 0.000681530494821634, "loss": 0.5067, "step": 229150 }, { "epoch": 65.92635212888378, "grad_norm": 0.9753618240356445, "learning_rate": 0.0006814729574223245, "loss": 0.444, "step": 229160 }, { "epoch": 65.92922899884925, "grad_norm": 1.2244477272033691, "learning_rate": 0.000681415420023015, "loss": 0.5264, "step": 229170 }, { "epoch": 65.93210586881473, "grad_norm": 1.072446584701538, "learning_rate": 0.0006813578826237054, "loss": 0.4357, "step": 229180 }, { "epoch": 65.9349827387802, "grad_norm": 1.1417711973190308, "learning_rate": 0.0006813003452243959, "loss": 0.3826, "step": 229190 }, { "epoch": 65.93785960874568, "grad_norm": 1.3419147729873657, "learning_rate": 0.0006812428078250863, "loss": 0.417, "step": 229200 }, { "epoch": 65.94073647871116, "grad_norm": 1.1226173639297485, "learning_rate": 0.0006811852704257767, "loss": 0.4697, "step": 229210 }, { "epoch": 65.94361334867664, "grad_norm": 1.551169991493225, "learning_rate": 0.0006811277330264672, "loss": 0.5228, "step": 229220 }, { "epoch": 65.94649021864211, "grad_norm": 0.974357545375824, "learning_rate": 0.0006810701956271577, "loss": 0.5696, "step": 229230 }, { "epoch": 65.9493670886076, "grad_norm": 1.2644662857055664, "learning_rate": 0.000681012658227848, "loss": 0.4574, "step": 229240 }, { "epoch": 65.95224395857308, "grad_norm": 1.28583824634552, "learning_rate": 0.0006809551208285386, "loss": 0.4439, "step": 229250 }, { "epoch": 65.95512082853855, "grad_norm": 0.8499462008476257, "learning_rate": 0.000680897583429229, "loss": 0.4084, "step": 229260 }, { "epoch": 65.95799769850403, "grad_norm": 0.7235179543495178, "learning_rate": 0.0006808400460299194, "loss": 0.4703, "step": 229270 }, { "epoch": 65.9608745684695, "grad_norm": 0.8413665890693665, "learning_rate": 0.0006807825086306099, "loss": 0.4465, "step": 229280 }, { "epoch": 65.96375143843498, "grad_norm": 1.2039505243301392, "learning_rate": 0.0006807249712313004, "loss": 0.4813, "step": 229290 }, { "epoch": 65.96662830840046, "grad_norm": 1.542448878288269, "learning_rate": 0.0006806674338319908, "loss": 0.4077, "step": 229300 }, { "epoch": 65.96950517836594, "grad_norm": 0.7763499617576599, "learning_rate": 0.0006806098964326812, "loss": 0.5041, "step": 229310 }, { "epoch": 65.97238204833141, "grad_norm": 2.0127577781677246, "learning_rate": 0.0006805523590333718, "loss": 0.4468, "step": 229320 }, { "epoch": 65.97525891829689, "grad_norm": 0.9146468639373779, "learning_rate": 0.0006804948216340621, "loss": 0.4683, "step": 229330 }, { "epoch": 65.97813578826236, "grad_norm": 1.6128019094467163, "learning_rate": 0.0006804372842347526, "loss": 0.3946, "step": 229340 }, { "epoch": 65.98101265822785, "grad_norm": 0.9178234934806824, "learning_rate": 0.000680379746835443, "loss": 0.3804, "step": 229350 }, { "epoch": 65.98388952819333, "grad_norm": 0.9423167705535889, "learning_rate": 0.0006803222094361335, "loss": 0.5171, "step": 229360 }, { "epoch": 65.9867663981588, "grad_norm": 1.118224024772644, "learning_rate": 0.000680264672036824, "loss": 0.5173, "step": 229370 }, { "epoch": 65.98964326812428, "grad_norm": 0.9303275346755981, "learning_rate": 0.0006802071346375144, "loss": 0.4107, "step": 229380 }, { "epoch": 65.99252013808976, "grad_norm": 1.1645303964614868, "learning_rate": 0.0006801495972382048, "loss": 0.5316, "step": 229390 }, { "epoch": 65.99539700805524, "grad_norm": 1.0622223615646362, "learning_rate": 0.0006800920598388953, "loss": 0.5441, "step": 229400 }, { "epoch": 65.99827387802071, "grad_norm": 1.805013656616211, "learning_rate": 0.0006800345224395858, "loss": 0.5374, "step": 229410 }, { "epoch": 66.00115074798619, "grad_norm": 0.7483300566673279, "learning_rate": 0.0006799769850402761, "loss": 0.3744, "step": 229420 }, { "epoch": 66.00402761795166, "grad_norm": 1.73225736618042, "learning_rate": 0.0006799194476409667, "loss": 0.4426, "step": 229430 }, { "epoch": 66.00690448791714, "grad_norm": 1.3254793882369995, "learning_rate": 0.0006798619102416571, "loss": 0.4057, "step": 229440 }, { "epoch": 66.00978135788263, "grad_norm": 1.1027815341949463, "learning_rate": 0.0006798043728423475, "loss": 0.4527, "step": 229450 }, { "epoch": 66.0126582278481, "grad_norm": 0.9809333086013794, "learning_rate": 0.0006797468354430379, "loss": 0.4208, "step": 229460 }, { "epoch": 66.01553509781358, "grad_norm": 1.474144458770752, "learning_rate": 0.0006796892980437285, "loss": 0.3797, "step": 229470 }, { "epoch": 66.01841196777906, "grad_norm": 0.7087701559066772, "learning_rate": 0.0006796317606444189, "loss": 0.3214, "step": 229480 }, { "epoch": 66.02128883774454, "grad_norm": 1.3042933940887451, "learning_rate": 0.0006795742232451093, "loss": 0.415, "step": 229490 }, { "epoch": 66.02416570771001, "grad_norm": 1.4295461177825928, "learning_rate": 0.0006795166858457998, "loss": 0.4333, "step": 229500 }, { "epoch": 66.02704257767549, "grad_norm": 1.9352173805236816, "learning_rate": 0.0006794591484464902, "loss": 0.4067, "step": 229510 }, { "epoch": 66.02991944764096, "grad_norm": 1.1192060708999634, "learning_rate": 0.0006794016110471807, "loss": 0.4739, "step": 229520 }, { "epoch": 66.03279631760644, "grad_norm": 0.9656155705451965, "learning_rate": 0.0006793440736478711, "loss": 0.4487, "step": 229530 }, { "epoch": 66.03567318757192, "grad_norm": 0.7859501242637634, "learning_rate": 0.0006792865362485616, "loss": 0.4576, "step": 229540 }, { "epoch": 66.03855005753739, "grad_norm": 1.1781715154647827, "learning_rate": 0.000679228998849252, "loss": 0.364, "step": 229550 }, { "epoch": 66.04142692750288, "grad_norm": 1.6282892227172852, "learning_rate": 0.0006791714614499425, "loss": 0.453, "step": 229560 }, { "epoch": 66.04430379746836, "grad_norm": 1.1048822402954102, "learning_rate": 0.0006791139240506328, "loss": 0.4292, "step": 229570 }, { "epoch": 66.04718066743384, "grad_norm": 1.4949291944503784, "learning_rate": 0.0006790563866513234, "loss": 0.3337, "step": 229580 }, { "epoch": 66.05005753739931, "grad_norm": 1.2972259521484375, "learning_rate": 0.0006789988492520139, "loss": 0.3755, "step": 229590 }, { "epoch": 66.05293440736479, "grad_norm": 0.5733559131622314, "learning_rate": 0.0006789413118527042, "loss": 0.3967, "step": 229600 }, { "epoch": 66.05581127733026, "grad_norm": 0.6402254104614258, "learning_rate": 0.0006788837744533948, "loss": 0.3602, "step": 229610 }, { "epoch": 66.05868814729574, "grad_norm": 1.2273151874542236, "learning_rate": 0.0006788262370540852, "loss": 0.4799, "step": 229620 }, { "epoch": 66.06156501726122, "grad_norm": 1.3141626119613647, "learning_rate": 0.0006787686996547756, "loss": 0.3824, "step": 229630 }, { "epoch": 66.06444188722669, "grad_norm": 1.314159631729126, "learning_rate": 0.000678711162255466, "loss": 0.3682, "step": 229640 }, { "epoch": 66.06731875719217, "grad_norm": 1.209633469581604, "learning_rate": 0.0006786536248561566, "loss": 0.3867, "step": 229650 }, { "epoch": 66.07019562715766, "grad_norm": 1.1872674226760864, "learning_rate": 0.000678596087456847, "loss": 0.4228, "step": 229660 }, { "epoch": 66.07307249712314, "grad_norm": 0.9950382709503174, "learning_rate": 0.0006785385500575374, "loss": 0.3719, "step": 229670 }, { "epoch": 66.07594936708861, "grad_norm": 1.1037219762802124, "learning_rate": 0.0006784810126582279, "loss": 0.3985, "step": 229680 }, { "epoch": 66.07882623705409, "grad_norm": 0.7230385541915894, "learning_rate": 0.0006784234752589183, "loss": 0.4527, "step": 229690 }, { "epoch": 66.08170310701956, "grad_norm": 0.6051057577133179, "learning_rate": 0.0006783659378596088, "loss": 0.3414, "step": 229700 }, { "epoch": 66.08457997698504, "grad_norm": 1.5866329669952393, "learning_rate": 0.0006783084004602992, "loss": 0.4806, "step": 229710 }, { "epoch": 66.08745684695052, "grad_norm": 1.0757801532745361, "learning_rate": 0.0006782508630609897, "loss": 0.4331, "step": 229720 }, { "epoch": 66.09033371691599, "grad_norm": 0.8182990550994873, "learning_rate": 0.0006781933256616801, "loss": 0.3582, "step": 229730 }, { "epoch": 66.09321058688147, "grad_norm": 0.8467893004417419, "learning_rate": 0.0006781357882623706, "loss": 0.4175, "step": 229740 }, { "epoch": 66.09608745684694, "grad_norm": 0.9296668767929077, "learning_rate": 0.0006780782508630609, "loss": 0.4579, "step": 229750 }, { "epoch": 66.09896432681242, "grad_norm": 1.2196063995361328, "learning_rate": 0.0006780207134637515, "loss": 0.37, "step": 229760 }, { "epoch": 66.10184119677791, "grad_norm": 1.5999977588653564, "learning_rate": 0.000677963176064442, "loss": 0.5074, "step": 229770 }, { "epoch": 66.10471806674339, "grad_norm": 1.075338363647461, "learning_rate": 0.0006779056386651323, "loss": 0.3903, "step": 229780 }, { "epoch": 66.10759493670886, "grad_norm": 1.0410857200622559, "learning_rate": 0.0006778481012658228, "loss": 0.3947, "step": 229790 }, { "epoch": 66.11047180667434, "grad_norm": 1.1163934469223022, "learning_rate": 0.0006777905638665133, "loss": 0.342, "step": 229800 }, { "epoch": 66.11334867663982, "grad_norm": 0.777407705783844, "learning_rate": 0.0006777330264672037, "loss": 0.3705, "step": 229810 }, { "epoch": 66.11622554660529, "grad_norm": 1.4574521780014038, "learning_rate": 0.0006776754890678941, "loss": 0.3976, "step": 229820 }, { "epoch": 66.11910241657077, "grad_norm": 1.1551942825317383, "learning_rate": 0.0006776179516685847, "loss": 0.396, "step": 229830 }, { "epoch": 66.12197928653625, "grad_norm": 1.3581924438476562, "learning_rate": 0.000677560414269275, "loss": 0.5075, "step": 229840 }, { "epoch": 66.12485615650172, "grad_norm": 1.5808125734329224, "learning_rate": 0.0006775028768699655, "loss": 0.4395, "step": 229850 }, { "epoch": 66.1277330264672, "grad_norm": 1.6881768703460693, "learning_rate": 0.000677445339470656, "loss": 0.3701, "step": 229860 }, { "epoch": 66.13060989643269, "grad_norm": 1.9608759880065918, "learning_rate": 0.0006773878020713464, "loss": 0.465, "step": 229870 }, { "epoch": 66.13348676639816, "grad_norm": 1.1898305416107178, "learning_rate": 0.0006773302646720369, "loss": 0.4106, "step": 229880 }, { "epoch": 66.13636363636364, "grad_norm": 1.978682279586792, "learning_rate": 0.0006772727272727273, "loss": 0.4033, "step": 229890 }, { "epoch": 66.13924050632912, "grad_norm": 1.1528090238571167, "learning_rate": 0.0006772151898734177, "loss": 0.412, "step": 229900 }, { "epoch": 66.14211737629459, "grad_norm": 0.8850045204162598, "learning_rate": 0.0006771576524741082, "loss": 0.4064, "step": 229910 }, { "epoch": 66.14499424626007, "grad_norm": 1.7456116676330566, "learning_rate": 0.0006771001150747987, "loss": 0.4591, "step": 229920 }, { "epoch": 66.14787111622555, "grad_norm": 1.8924931287765503, "learning_rate": 0.000677042577675489, "loss": 0.5284, "step": 229930 }, { "epoch": 66.15074798619102, "grad_norm": 1.2119994163513184, "learning_rate": 0.0006769850402761796, "loss": 0.3318, "step": 229940 }, { "epoch": 66.1536248561565, "grad_norm": 1.6545233726501465, "learning_rate": 0.00067692750287687, "loss": 0.496, "step": 229950 }, { "epoch": 66.15650172612197, "grad_norm": 1.503927230834961, "learning_rate": 0.0006768699654775604, "loss": 0.3904, "step": 229960 }, { "epoch": 66.15937859608745, "grad_norm": 1.1582250595092773, "learning_rate": 0.0006768124280782509, "loss": 0.4389, "step": 229970 }, { "epoch": 66.16225546605294, "grad_norm": 1.604838252067566, "learning_rate": 0.0006767548906789413, "loss": 0.3589, "step": 229980 }, { "epoch": 66.16513233601842, "grad_norm": 1.4587533473968506, "learning_rate": 0.0006766973532796318, "loss": 0.3625, "step": 229990 }, { "epoch": 66.16800920598389, "grad_norm": 1.0390355587005615, "learning_rate": 0.0006766398158803222, "loss": 0.4721, "step": 230000 }, { "epoch": 66.17088607594937, "grad_norm": 2.2377350330352783, "learning_rate": 0.0006765822784810127, "loss": 0.5401, "step": 230010 }, { "epoch": 66.17376294591485, "grad_norm": 1.0422961711883545, "learning_rate": 0.0006765247410817031, "loss": 0.4269, "step": 230020 }, { "epoch": 66.17663981588032, "grad_norm": 0.8952033519744873, "learning_rate": 0.0006764672036823936, "loss": 0.4365, "step": 230030 }, { "epoch": 66.1795166858458, "grad_norm": 1.181514859199524, "learning_rate": 0.0006764096662830839, "loss": 0.4134, "step": 230040 }, { "epoch": 66.18239355581127, "grad_norm": 0.7015255689620972, "learning_rate": 0.0006763521288837745, "loss": 0.3814, "step": 230050 }, { "epoch": 66.18527042577675, "grad_norm": 0.905161440372467, "learning_rate": 0.000676294591484465, "loss": 0.4252, "step": 230060 }, { "epoch": 66.18814729574223, "grad_norm": 0.9909980893135071, "learning_rate": 0.0006762370540851553, "loss": 0.3414, "step": 230070 }, { "epoch": 66.19102416570772, "grad_norm": 1.01841139793396, "learning_rate": 0.0006761795166858458, "loss": 0.3278, "step": 230080 }, { "epoch": 66.19390103567319, "grad_norm": 1.270158052444458, "learning_rate": 0.0006761219792865363, "loss": 0.3575, "step": 230090 }, { "epoch": 66.19677790563867, "grad_norm": 1.1735763549804688, "learning_rate": 0.0006760644418872267, "loss": 0.4779, "step": 230100 }, { "epoch": 66.19965477560415, "grad_norm": 0.9708371162414551, "learning_rate": 0.0006760069044879171, "loss": 0.3865, "step": 230110 }, { "epoch": 66.20253164556962, "grad_norm": 1.400447964668274, "learning_rate": 0.0006759493670886077, "loss": 0.4715, "step": 230120 }, { "epoch": 66.2054085155351, "grad_norm": 0.9085091352462769, "learning_rate": 0.000675891829689298, "loss": 0.3944, "step": 230130 }, { "epoch": 66.20828538550057, "grad_norm": 1.7840423583984375, "learning_rate": 0.0006758342922899885, "loss": 0.5653, "step": 230140 }, { "epoch": 66.21116225546605, "grad_norm": 0.8090454339981079, "learning_rate": 0.0006757767548906789, "loss": 0.4862, "step": 230150 }, { "epoch": 66.21403912543153, "grad_norm": 0.8387807607650757, "learning_rate": 0.0006757192174913694, "loss": 0.3675, "step": 230160 }, { "epoch": 66.216915995397, "grad_norm": 0.7143486738204956, "learning_rate": 0.0006756616800920599, "loss": 0.3695, "step": 230170 }, { "epoch": 66.21979286536248, "grad_norm": 2.001559019088745, "learning_rate": 0.0006756041426927503, "loss": 0.5068, "step": 230180 }, { "epoch": 66.22266973532797, "grad_norm": 1.2412798404693604, "learning_rate": 0.0006755466052934407, "loss": 0.3991, "step": 230190 }, { "epoch": 66.22554660529345, "grad_norm": 2.1773335933685303, "learning_rate": 0.0006754890678941312, "loss": 0.4691, "step": 230200 }, { "epoch": 66.22842347525892, "grad_norm": 0.9691096544265747, "learning_rate": 0.0006754315304948217, "loss": 0.4705, "step": 230210 }, { "epoch": 66.2313003452244, "grad_norm": 1.330802083015442, "learning_rate": 0.000675373993095512, "loss": 0.3889, "step": 230220 }, { "epoch": 66.23417721518987, "grad_norm": 1.3439399003982544, "learning_rate": 0.0006753164556962026, "loss": 0.3906, "step": 230230 }, { "epoch": 66.23705408515535, "grad_norm": 1.607309103012085, "learning_rate": 0.000675258918296893, "loss": 0.4027, "step": 230240 }, { "epoch": 66.23993095512083, "grad_norm": 1.6302517652511597, "learning_rate": 0.0006752013808975834, "loss": 0.5201, "step": 230250 }, { "epoch": 66.2428078250863, "grad_norm": 0.8670506477355957, "learning_rate": 0.0006751438434982738, "loss": 0.4402, "step": 230260 }, { "epoch": 66.24568469505178, "grad_norm": 1.1775343418121338, "learning_rate": 0.0006750863060989644, "loss": 0.4057, "step": 230270 }, { "epoch": 66.24856156501725, "grad_norm": 0.8639894723892212, "learning_rate": 0.0006750287686996548, "loss": 0.5135, "step": 230280 }, { "epoch": 66.25143843498275, "grad_norm": 0.8107365369796753, "learning_rate": 0.0006749712313003452, "loss": 0.4114, "step": 230290 }, { "epoch": 66.25431530494822, "grad_norm": 0.8078609108924866, "learning_rate": 0.0006749136939010358, "loss": 0.3888, "step": 230300 }, { "epoch": 66.2571921749137, "grad_norm": 1.3358495235443115, "learning_rate": 0.0006748561565017261, "loss": 0.35, "step": 230310 }, { "epoch": 66.26006904487917, "grad_norm": 1.4080262184143066, "learning_rate": 0.0006747986191024166, "loss": 0.4101, "step": 230320 }, { "epoch": 66.26294591484465, "grad_norm": 0.9792938232421875, "learning_rate": 0.000674741081703107, "loss": 0.4422, "step": 230330 }, { "epoch": 66.26582278481013, "grad_norm": 2.058985948562622, "learning_rate": 0.0006746835443037975, "loss": 0.3357, "step": 230340 }, { "epoch": 66.2686996547756, "grad_norm": 1.3580663204193115, "learning_rate": 0.0006746260069044879, "loss": 0.387, "step": 230350 }, { "epoch": 66.27157652474108, "grad_norm": 2.7554399967193604, "learning_rate": 0.0006745684695051784, "loss": 0.4934, "step": 230360 }, { "epoch": 66.27445339470655, "grad_norm": 1.0357433557510376, "learning_rate": 0.0006745109321058687, "loss": 0.4551, "step": 230370 }, { "epoch": 66.27733026467203, "grad_norm": 1.1920963525772095, "learning_rate": 0.0006744533947065593, "loss": 0.5861, "step": 230380 }, { "epoch": 66.28020713463752, "grad_norm": 0.8313540816307068, "learning_rate": 0.0006743958573072498, "loss": 0.5351, "step": 230390 }, { "epoch": 66.283084004603, "grad_norm": 0.9778434038162231, "learning_rate": 0.0006743383199079401, "loss": 0.447, "step": 230400 }, { "epoch": 66.28596087456847, "grad_norm": 5.077169418334961, "learning_rate": 0.0006742807825086307, "loss": 0.4763, "step": 230410 }, { "epoch": 66.28883774453395, "grad_norm": 0.6630676984786987, "learning_rate": 0.0006742232451093211, "loss": 0.3098, "step": 230420 }, { "epoch": 66.29171461449943, "grad_norm": 1.4163025617599487, "learning_rate": 0.0006741657077100115, "loss": 0.5439, "step": 230430 }, { "epoch": 66.2945914844649, "grad_norm": 0.7668754458427429, "learning_rate": 0.0006741081703107019, "loss": 0.3632, "step": 230440 }, { "epoch": 66.29746835443038, "grad_norm": 1.6958322525024414, "learning_rate": 0.0006740506329113925, "loss": 0.4771, "step": 230450 }, { "epoch": 66.30034522439585, "grad_norm": 0.9366699457168579, "learning_rate": 0.0006739930955120828, "loss": 0.3956, "step": 230460 }, { "epoch": 66.30322209436133, "grad_norm": 0.9308155179023743, "learning_rate": 0.0006739355581127733, "loss": 0.3276, "step": 230470 }, { "epoch": 66.30609896432681, "grad_norm": 1.5111504793167114, "learning_rate": 0.0006738780207134638, "loss": 0.4113, "step": 230480 }, { "epoch": 66.30897583429228, "grad_norm": 1.1020439863204956, "learning_rate": 0.0006738204833141542, "loss": 0.5829, "step": 230490 }, { "epoch": 66.31185270425777, "grad_norm": 1.3227925300598145, "learning_rate": 0.0006737629459148447, "loss": 0.4666, "step": 230500 }, { "epoch": 66.31472957422325, "grad_norm": 1.1324957609176636, "learning_rate": 0.0006737054085155351, "loss": 0.4677, "step": 230510 }, { "epoch": 66.31760644418873, "grad_norm": 0.812700629234314, "learning_rate": 0.0006736478711162256, "loss": 0.4506, "step": 230520 }, { "epoch": 66.3204833141542, "grad_norm": 0.8981810212135315, "learning_rate": 0.000673590333716916, "loss": 0.3738, "step": 230530 }, { "epoch": 66.32336018411968, "grad_norm": 1.4153618812561035, "learning_rate": 0.0006735327963176065, "loss": 0.5324, "step": 230540 }, { "epoch": 66.32623705408515, "grad_norm": 1.3640490770339966, "learning_rate": 0.0006734752589182968, "loss": 0.384, "step": 230550 }, { "epoch": 66.32911392405063, "grad_norm": 1.2842223644256592, "learning_rate": 0.0006734177215189874, "loss": 0.4342, "step": 230560 }, { "epoch": 66.33199079401611, "grad_norm": 1.9643893241882324, "learning_rate": 0.0006733601841196779, "loss": 0.4306, "step": 230570 }, { "epoch": 66.33486766398158, "grad_norm": 1.1846365928649902, "learning_rate": 0.0006733026467203682, "loss": 0.432, "step": 230580 }, { "epoch": 66.33774453394706, "grad_norm": 1.2405530214309692, "learning_rate": 0.0006732451093210588, "loss": 0.4514, "step": 230590 }, { "epoch": 66.34062140391255, "grad_norm": 2.0291974544525146, "learning_rate": 0.0006731875719217492, "loss": 0.5151, "step": 230600 }, { "epoch": 66.34349827387803, "grad_norm": 2.332595109939575, "learning_rate": 0.0006731300345224396, "loss": 0.4886, "step": 230610 }, { "epoch": 66.3463751438435, "grad_norm": 1.3327349424362183, "learning_rate": 0.00067307249712313, "loss": 0.4828, "step": 230620 }, { "epoch": 66.34925201380898, "grad_norm": 3.3098526000976562, "learning_rate": 0.0006730149597238206, "loss": 0.3582, "step": 230630 }, { "epoch": 66.35212888377445, "grad_norm": 1.8186620473861694, "learning_rate": 0.0006729574223245109, "loss": 0.4093, "step": 230640 }, { "epoch": 66.35500575373993, "grad_norm": 2.4578304290771484, "learning_rate": 0.0006728998849252014, "loss": 0.7542, "step": 230650 }, { "epoch": 66.35788262370541, "grad_norm": 0.9044679403305054, "learning_rate": 0.0006728423475258918, "loss": 0.4182, "step": 230660 }, { "epoch": 66.36075949367088, "grad_norm": 2.4148166179656982, "learning_rate": 0.0006727848101265823, "loss": 0.4365, "step": 230670 }, { "epoch": 66.36363636363636, "grad_norm": 0.8294494152069092, "learning_rate": 0.0006727272727272728, "loss": 0.3575, "step": 230680 }, { "epoch": 66.36651323360184, "grad_norm": 1.4738374948501587, "learning_rate": 0.0006726697353279632, "loss": 0.472, "step": 230690 }, { "epoch": 66.36939010356731, "grad_norm": 2.597668170928955, "learning_rate": 0.0006726121979286537, "loss": 0.5076, "step": 230700 }, { "epoch": 66.3722669735328, "grad_norm": 0.8975241780281067, "learning_rate": 0.0006725546605293441, "loss": 0.3446, "step": 230710 }, { "epoch": 66.37514384349828, "grad_norm": 1.1339272260665894, "learning_rate": 0.0006724971231300346, "loss": 0.4834, "step": 230720 }, { "epoch": 66.37802071346375, "grad_norm": 1.1426947116851807, "learning_rate": 0.0006724395857307249, "loss": 0.5009, "step": 230730 }, { "epoch": 66.38089758342923, "grad_norm": 2.945486068725586, "learning_rate": 0.0006723820483314155, "loss": 0.5112, "step": 230740 }, { "epoch": 66.38377445339471, "grad_norm": 0.8735793232917786, "learning_rate": 0.0006723245109321059, "loss": 0.4299, "step": 230750 }, { "epoch": 66.38665132336018, "grad_norm": 0.7022791504859924, "learning_rate": 0.0006722669735327963, "loss": 0.3694, "step": 230760 }, { "epoch": 66.38952819332566, "grad_norm": 2.1346960067749023, "learning_rate": 0.0006722094361334867, "loss": 0.5014, "step": 230770 }, { "epoch": 66.39240506329114, "grad_norm": 1.0981727838516235, "learning_rate": 0.0006721518987341773, "loss": 0.4168, "step": 230780 }, { "epoch": 66.39528193325661, "grad_norm": 2.304288148880005, "learning_rate": 0.0006720943613348677, "loss": 0.4386, "step": 230790 }, { "epoch": 66.39815880322209, "grad_norm": 0.6348063945770264, "learning_rate": 0.0006720368239355581, "loss": 0.4246, "step": 230800 }, { "epoch": 66.40103567318758, "grad_norm": 0.8276826739311218, "learning_rate": 0.0006719792865362486, "loss": 0.4958, "step": 230810 }, { "epoch": 66.40391254315306, "grad_norm": 1.3011949062347412, "learning_rate": 0.000671921749136939, "loss": 0.4798, "step": 230820 }, { "epoch": 66.40678941311853, "grad_norm": 0.9057256579399109, "learning_rate": 0.0006718642117376295, "loss": 0.4885, "step": 230830 }, { "epoch": 66.40966628308401, "grad_norm": 1.4422829151153564, "learning_rate": 0.0006718066743383198, "loss": 0.4996, "step": 230840 }, { "epoch": 66.41254315304948, "grad_norm": 1.1502960920333862, "learning_rate": 0.0006717491369390104, "loss": 0.4289, "step": 230850 }, { "epoch": 66.41542002301496, "grad_norm": 0.6268607974052429, "learning_rate": 0.0006716915995397008, "loss": 0.3889, "step": 230860 }, { "epoch": 66.41829689298044, "grad_norm": 1.215936303138733, "learning_rate": 0.0006716340621403912, "loss": 0.4571, "step": 230870 }, { "epoch": 66.42117376294591, "grad_norm": 1.330929160118103, "learning_rate": 0.0006715765247410817, "loss": 0.3719, "step": 230880 }, { "epoch": 66.42405063291139, "grad_norm": 0.7719408869743347, "learning_rate": 0.0006715189873417722, "loss": 0.5367, "step": 230890 }, { "epoch": 66.42692750287686, "grad_norm": 0.8421443104743958, "learning_rate": 0.0006714614499424626, "loss": 0.3904, "step": 230900 }, { "epoch": 66.42980437284234, "grad_norm": 0.7900868654251099, "learning_rate": 0.000671403912543153, "loss": 0.412, "step": 230910 }, { "epoch": 66.43268124280783, "grad_norm": 0.6832343935966492, "learning_rate": 0.0006713463751438436, "loss": 0.4193, "step": 230920 }, { "epoch": 66.43555811277331, "grad_norm": 0.9153329730033875, "learning_rate": 0.0006712888377445339, "loss": 0.4206, "step": 230930 }, { "epoch": 66.43843498273878, "grad_norm": 1.7883142232894897, "learning_rate": 0.0006712313003452244, "loss": 0.3507, "step": 230940 }, { "epoch": 66.44131185270426, "grad_norm": 1.948084831237793, "learning_rate": 0.0006711737629459148, "loss": 0.4633, "step": 230950 }, { "epoch": 66.44418872266974, "grad_norm": 2.688056468963623, "learning_rate": 0.0006711162255466053, "loss": 0.5457, "step": 230960 }, { "epoch": 66.44706559263521, "grad_norm": 1.23593008518219, "learning_rate": 0.0006710586881472957, "loss": 0.3916, "step": 230970 }, { "epoch": 66.44994246260069, "grad_norm": 1.4624971151351929, "learning_rate": 0.0006710011507479862, "loss": 0.3973, "step": 230980 }, { "epoch": 66.45281933256616, "grad_norm": 0.8822658658027649, "learning_rate": 0.0006709436133486767, "loss": 0.393, "step": 230990 }, { "epoch": 66.45569620253164, "grad_norm": 0.7217041254043579, "learning_rate": 0.0006708860759493671, "loss": 0.3895, "step": 231000 }, { "epoch": 66.45857307249712, "grad_norm": 1.038795828819275, "learning_rate": 0.0006708285385500576, "loss": 0.504, "step": 231010 }, { "epoch": 66.46144994246261, "grad_norm": 0.9950480461120605, "learning_rate": 0.0006707710011507479, "loss": 0.3797, "step": 231020 }, { "epoch": 66.46432681242808, "grad_norm": 0.7111487984657288, "learning_rate": 0.0006707134637514385, "loss": 0.3408, "step": 231030 }, { "epoch": 66.46720368239356, "grad_norm": 0.8192708492279053, "learning_rate": 0.0006706559263521289, "loss": 0.5081, "step": 231040 }, { "epoch": 66.47008055235904, "grad_norm": 1.4540294408798218, "learning_rate": 0.0006705983889528193, "loss": 0.5325, "step": 231050 }, { "epoch": 66.47295742232451, "grad_norm": 1.6534771919250488, "learning_rate": 0.0006705408515535097, "loss": 0.5389, "step": 231060 }, { "epoch": 66.47583429228999, "grad_norm": 1.5832775831222534, "learning_rate": 0.0006704833141542003, "loss": 0.45, "step": 231070 }, { "epoch": 66.47871116225546, "grad_norm": 1.760387897491455, "learning_rate": 0.0006704257767548907, "loss": 0.4085, "step": 231080 }, { "epoch": 66.48158803222094, "grad_norm": 0.8756046891212463, "learning_rate": 0.0006703682393555811, "loss": 0.3793, "step": 231090 }, { "epoch": 66.48446490218642, "grad_norm": 1.043670892715454, "learning_rate": 0.0006703107019562717, "loss": 0.4464, "step": 231100 }, { "epoch": 66.4873417721519, "grad_norm": 0.9711434841156006, "learning_rate": 0.000670253164556962, "loss": 0.3807, "step": 231110 }, { "epoch": 66.49021864211737, "grad_norm": 1.2568551301956177, "learning_rate": 0.0006701956271576525, "loss": 0.5118, "step": 231120 }, { "epoch": 66.49309551208286, "grad_norm": 1.5657191276550293, "learning_rate": 0.0006701380897583429, "loss": 0.516, "step": 231130 }, { "epoch": 66.49597238204834, "grad_norm": 2.3579466342926025, "learning_rate": 0.0006700805523590334, "loss": 0.581, "step": 231140 }, { "epoch": 66.49884925201381, "grad_norm": 1.0561283826828003, "learning_rate": 0.0006700230149597238, "loss": 0.3896, "step": 231150 }, { "epoch": 66.50172612197929, "grad_norm": 2.5246164798736572, "learning_rate": 0.0006699654775604143, "loss": 0.5143, "step": 231160 }, { "epoch": 66.50460299194476, "grad_norm": 1.4887232780456543, "learning_rate": 0.0006699079401611046, "loss": 0.4655, "step": 231170 }, { "epoch": 66.50747986191024, "grad_norm": 1.141611933708191, "learning_rate": 0.0006698504027617952, "loss": 0.3803, "step": 231180 }, { "epoch": 66.51035673187572, "grad_norm": 1.3065792322158813, "learning_rate": 0.0006697928653624857, "loss": 0.4, "step": 231190 }, { "epoch": 66.5132336018412, "grad_norm": 0.9255348443984985, "learning_rate": 0.000669735327963176, "loss": 0.4199, "step": 231200 }, { "epoch": 66.51611047180667, "grad_norm": 1.3329341411590576, "learning_rate": 0.0006696777905638666, "loss": 0.4024, "step": 231210 }, { "epoch": 66.51898734177215, "grad_norm": 0.6790747046470642, "learning_rate": 0.000669620253164557, "loss": 0.4502, "step": 231220 }, { "epoch": 66.52186421173764, "grad_norm": 0.8702067732810974, "learning_rate": 0.0006695627157652474, "loss": 0.5593, "step": 231230 }, { "epoch": 66.52474108170311, "grad_norm": 1.007396936416626, "learning_rate": 0.0006695051783659378, "loss": 0.4837, "step": 231240 }, { "epoch": 66.52761795166859, "grad_norm": 0.9073067903518677, "learning_rate": 0.0006694476409666284, "loss": 0.3721, "step": 231250 }, { "epoch": 66.53049482163406, "grad_norm": 1.8230396509170532, "learning_rate": 0.0006693901035673187, "loss": 0.4369, "step": 231260 }, { "epoch": 66.53337169159954, "grad_norm": 1.2014926671981812, "learning_rate": 0.0006693325661680092, "loss": 0.5774, "step": 231270 }, { "epoch": 66.53624856156502, "grad_norm": 1.5542360544204712, "learning_rate": 0.0006692750287686998, "loss": 0.4295, "step": 231280 }, { "epoch": 66.5391254315305, "grad_norm": 1.1517901420593262, "learning_rate": 0.0006692174913693901, "loss": 0.3422, "step": 231290 }, { "epoch": 66.54200230149597, "grad_norm": 1.2369136810302734, "learning_rate": 0.0006691599539700806, "loss": 0.4079, "step": 231300 }, { "epoch": 66.54487917146145, "grad_norm": 1.0041863918304443, "learning_rate": 0.000669102416570771, "loss": 0.4068, "step": 231310 }, { "epoch": 66.54775604142692, "grad_norm": 1.3979060649871826, "learning_rate": 0.0006690448791714615, "loss": 0.5056, "step": 231320 }, { "epoch": 66.5506329113924, "grad_norm": 0.9269031286239624, "learning_rate": 0.0006689873417721519, "loss": 0.367, "step": 231330 }, { "epoch": 66.55350978135789, "grad_norm": 0.8515864014625549, "learning_rate": 0.0006689298043728424, "loss": 0.3813, "step": 231340 }, { "epoch": 66.55638665132336, "grad_norm": 0.8071472644805908, "learning_rate": 0.0006688722669735327, "loss": 0.4573, "step": 231350 }, { "epoch": 66.55926352128884, "grad_norm": 0.8203406929969788, "learning_rate": 0.0006688147295742233, "loss": 0.414, "step": 231360 }, { "epoch": 66.56214039125432, "grad_norm": 0.7890409827232361, "learning_rate": 0.0006687571921749138, "loss": 0.4575, "step": 231370 }, { "epoch": 66.5650172612198, "grad_norm": 0.8753119707107544, "learning_rate": 0.0006686996547756041, "loss": 0.5125, "step": 231380 }, { "epoch": 66.56789413118527, "grad_norm": 1.3281737565994263, "learning_rate": 0.0006686421173762947, "loss": 0.3801, "step": 231390 }, { "epoch": 66.57077100115075, "grad_norm": 1.276049017906189, "learning_rate": 0.0006685845799769851, "loss": 0.3962, "step": 231400 }, { "epoch": 66.57364787111622, "grad_norm": 1.1572214365005493, "learning_rate": 0.0006685270425776755, "loss": 0.4534, "step": 231410 }, { "epoch": 66.5765247410817, "grad_norm": 2.461803913116455, "learning_rate": 0.0006684695051783659, "loss": 0.4334, "step": 231420 }, { "epoch": 66.57940161104717, "grad_norm": 2.4245564937591553, "learning_rate": 0.0006684119677790565, "loss": 0.4425, "step": 231430 }, { "epoch": 66.58227848101266, "grad_norm": 2.2613587379455566, "learning_rate": 0.0006683544303797468, "loss": 0.5166, "step": 231440 }, { "epoch": 66.58515535097814, "grad_norm": 1.278395414352417, "learning_rate": 0.0006682968929804373, "loss": 0.4437, "step": 231450 }, { "epoch": 66.58803222094362, "grad_norm": 1.3333407640457153, "learning_rate": 0.0006682393555811277, "loss": 0.4648, "step": 231460 }, { "epoch": 66.5909090909091, "grad_norm": 0.5580524206161499, "learning_rate": 0.0006681818181818182, "loss": 0.4025, "step": 231470 }, { "epoch": 66.59378596087457, "grad_norm": 2.876039505004883, "learning_rate": 0.0006681242807825087, "loss": 0.4716, "step": 231480 }, { "epoch": 66.59666283084005, "grad_norm": 2.1951119899749756, "learning_rate": 0.0006680667433831991, "loss": 0.4138, "step": 231490 }, { "epoch": 66.59953970080552, "grad_norm": 1.5529557466506958, "learning_rate": 0.0006680092059838896, "loss": 0.3888, "step": 231500 }, { "epoch": 66.602416570771, "grad_norm": 0.9564087390899658, "learning_rate": 0.00066795166858458, "loss": 0.4445, "step": 231510 }, { "epoch": 66.60529344073647, "grad_norm": 1.4361799955368042, "learning_rate": 0.0006678941311852705, "loss": 0.4141, "step": 231520 }, { "epoch": 66.60817031070195, "grad_norm": 1.52432119846344, "learning_rate": 0.0006678365937859608, "loss": 0.4047, "step": 231530 }, { "epoch": 66.61104718066743, "grad_norm": 1.0178039073944092, "learning_rate": 0.0006677790563866514, "loss": 0.5126, "step": 231540 }, { "epoch": 66.61392405063292, "grad_norm": 1.5303090810775757, "learning_rate": 0.0006677215189873418, "loss": 0.4961, "step": 231550 }, { "epoch": 66.6168009205984, "grad_norm": 0.7595980763435364, "learning_rate": 0.0006676639815880322, "loss": 0.328, "step": 231560 }, { "epoch": 66.61967779056387, "grad_norm": 1.646782636642456, "learning_rate": 0.0006676064441887228, "loss": 0.4, "step": 231570 }, { "epoch": 66.62255466052935, "grad_norm": 1.3319346904754639, "learning_rate": 0.0006675489067894132, "loss": 0.3991, "step": 231580 }, { "epoch": 66.62543153049482, "grad_norm": 2.906585454940796, "learning_rate": 0.0006674913693901036, "loss": 0.4492, "step": 231590 }, { "epoch": 66.6283084004603, "grad_norm": 1.5196796655654907, "learning_rate": 0.000667433831990794, "loss": 0.4594, "step": 231600 }, { "epoch": 66.63118527042577, "grad_norm": 1.9345930814743042, "learning_rate": 0.0006673762945914846, "loss": 0.4229, "step": 231610 }, { "epoch": 66.63406214039125, "grad_norm": 1.1834523677825928, "learning_rate": 0.0006673187571921749, "loss": 0.51, "step": 231620 }, { "epoch": 66.63693901035673, "grad_norm": 1.177124261856079, "learning_rate": 0.0006672612197928654, "loss": 0.3723, "step": 231630 }, { "epoch": 66.6398158803222, "grad_norm": 0.987186849117279, "learning_rate": 0.0006672036823935557, "loss": 0.4437, "step": 231640 }, { "epoch": 66.6426927502877, "grad_norm": 0.9455357193946838, "learning_rate": 0.0006671461449942463, "loss": 0.4405, "step": 231650 }, { "epoch": 66.64556962025317, "grad_norm": 1.1230536699295044, "learning_rate": 0.0006670886075949367, "loss": 0.3708, "step": 231660 }, { "epoch": 66.64844649021865, "grad_norm": 1.25155770778656, "learning_rate": 0.0006670310701956271, "loss": 0.6019, "step": 231670 }, { "epoch": 66.65132336018412, "grad_norm": 1.1033650636672974, "learning_rate": 0.0006669735327963177, "loss": 0.4181, "step": 231680 }, { "epoch": 66.6542002301496, "grad_norm": 0.7618333101272583, "learning_rate": 0.0006669159953970081, "loss": 0.4511, "step": 231690 }, { "epoch": 66.65707710011507, "grad_norm": 0.9367378950119019, "learning_rate": 0.0006668584579976985, "loss": 0.4818, "step": 231700 }, { "epoch": 66.65995397008055, "grad_norm": 1.8857039213180542, "learning_rate": 0.0006668009205983889, "loss": 0.5092, "step": 231710 }, { "epoch": 66.66283084004603, "grad_norm": 0.5928177237510681, "learning_rate": 0.0006667433831990795, "loss": 0.619, "step": 231720 }, { "epoch": 66.6657077100115, "grad_norm": 1.2263237237930298, "learning_rate": 0.0006666858457997698, "loss": 0.4485, "step": 231730 }, { "epoch": 66.66858457997698, "grad_norm": 1.4936684370040894, "learning_rate": 0.0006666283084004603, "loss": 0.4043, "step": 231740 }, { "epoch": 66.67146144994246, "grad_norm": 1.0236610174179077, "learning_rate": 0.0006665707710011507, "loss": 0.4382, "step": 231750 }, { "epoch": 66.67433831990795, "grad_norm": 0.8454793095588684, "learning_rate": 0.0006665132336018412, "loss": 0.3961, "step": 231760 }, { "epoch": 66.67721518987342, "grad_norm": 0.7325689792633057, "learning_rate": 0.0006664556962025316, "loss": 0.4197, "step": 231770 }, { "epoch": 66.6800920598389, "grad_norm": 1.6445603370666504, "learning_rate": 0.0006663981588032221, "loss": 0.3745, "step": 231780 }, { "epoch": 66.68296892980437, "grad_norm": 0.8324635624885559, "learning_rate": 0.0006663406214039126, "loss": 0.5408, "step": 231790 }, { "epoch": 66.68584579976985, "grad_norm": 1.108731985092163, "learning_rate": 0.000666283084004603, "loss": 0.3953, "step": 231800 }, { "epoch": 66.68872266973533, "grad_norm": 1.5572162866592407, "learning_rate": 0.0006662255466052935, "loss": 0.6536, "step": 231810 }, { "epoch": 66.6915995397008, "grad_norm": 0.9456593990325928, "learning_rate": 0.0006661680092059838, "loss": 0.3931, "step": 231820 }, { "epoch": 66.69447640966628, "grad_norm": 1.5161222219467163, "learning_rate": 0.0006661104718066744, "loss": 0.4769, "step": 231830 }, { "epoch": 66.69735327963176, "grad_norm": 1.1329410076141357, "learning_rate": 0.0006660529344073648, "loss": 0.463, "step": 231840 }, { "epoch": 66.70023014959723, "grad_norm": 1.1271899938583374, "learning_rate": 0.0006659953970080552, "loss": 0.4304, "step": 231850 }, { "epoch": 66.70310701956272, "grad_norm": 0.8525890707969666, "learning_rate": 0.0006659378596087456, "loss": 0.4276, "step": 231860 }, { "epoch": 66.7059838895282, "grad_norm": 1.863204836845398, "learning_rate": 0.0006658803222094362, "loss": 0.4344, "step": 231870 }, { "epoch": 66.70886075949367, "grad_norm": 1.1511436700820923, "learning_rate": 0.0006658227848101266, "loss": 0.4913, "step": 231880 }, { "epoch": 66.71173762945915, "grad_norm": 1.4706957340240479, "learning_rate": 0.000665765247410817, "loss": 0.6016, "step": 231890 }, { "epoch": 66.71461449942463, "grad_norm": 2.081331491470337, "learning_rate": 0.0006657077100115076, "loss": 0.4939, "step": 231900 }, { "epoch": 66.7174913693901, "grad_norm": 1.1833622455596924, "learning_rate": 0.0006656501726121979, "loss": 0.4633, "step": 231910 }, { "epoch": 66.72036823935558, "grad_norm": 0.9247418642044067, "learning_rate": 0.0006655926352128884, "loss": 0.4282, "step": 231920 }, { "epoch": 66.72324510932106, "grad_norm": 0.9237625002861023, "learning_rate": 0.0006655350978135788, "loss": 0.5847, "step": 231930 }, { "epoch": 66.72612197928653, "grad_norm": 1.2601529359817505, "learning_rate": 0.0006654775604142693, "loss": 0.43, "step": 231940 }, { "epoch": 66.72899884925201, "grad_norm": 0.885338544845581, "learning_rate": 0.0006654200230149597, "loss": 0.366, "step": 231950 }, { "epoch": 66.7318757192175, "grad_norm": 0.9429763555526733, "learning_rate": 0.0006653624856156502, "loss": 0.4577, "step": 231960 }, { "epoch": 66.73475258918297, "grad_norm": 1.2267452478408813, "learning_rate": 0.0006653049482163406, "loss": 0.473, "step": 231970 }, { "epoch": 66.73762945914845, "grad_norm": 1.1349661350250244, "learning_rate": 0.0006652474108170311, "loss": 0.4422, "step": 231980 }, { "epoch": 66.74050632911393, "grad_norm": 0.7839086651802063, "learning_rate": 0.0006651898734177216, "loss": 0.3666, "step": 231990 }, { "epoch": 66.7433831990794, "grad_norm": 1.2140278816223145, "learning_rate": 0.0006651323360184119, "loss": 0.4527, "step": 232000 }, { "epoch": 66.74626006904488, "grad_norm": 0.7756178379058838, "learning_rate": 0.0006650747986191025, "loss": 0.4566, "step": 232010 }, { "epoch": 66.74913693901036, "grad_norm": 2.2830498218536377, "learning_rate": 0.0006650172612197929, "loss": 0.3781, "step": 232020 }, { "epoch": 66.75201380897583, "grad_norm": 1.3945870399475098, "learning_rate": 0.0006649597238204833, "loss": 0.3943, "step": 232030 }, { "epoch": 66.75489067894131, "grad_norm": 0.9823087453842163, "learning_rate": 0.0006649021864211737, "loss": 0.373, "step": 232040 }, { "epoch": 66.75776754890678, "grad_norm": 1.4058706760406494, "learning_rate": 0.0006648446490218643, "loss": 0.5081, "step": 232050 }, { "epoch": 66.76064441887226, "grad_norm": 0.9589275121688843, "learning_rate": 0.0006647871116225546, "loss": 0.4349, "step": 232060 }, { "epoch": 66.76352128883775, "grad_norm": 2.200151205062866, "learning_rate": 0.0006647295742232451, "loss": 0.402, "step": 232070 }, { "epoch": 66.76639815880323, "grad_norm": 1.05356764793396, "learning_rate": 0.0006646720368239357, "loss": 0.5543, "step": 232080 }, { "epoch": 66.7692750287687, "grad_norm": 0.8308634757995605, "learning_rate": 0.000664614499424626, "loss": 0.5263, "step": 232090 }, { "epoch": 66.77215189873418, "grad_norm": 1.04966402053833, "learning_rate": 0.0006645569620253165, "loss": 0.4489, "step": 232100 }, { "epoch": 66.77502876869966, "grad_norm": 0.8749712109565735, "learning_rate": 0.0006644994246260069, "loss": 0.4103, "step": 232110 }, { "epoch": 66.77790563866513, "grad_norm": 0.8826075792312622, "learning_rate": 0.0006644418872266974, "loss": 0.4594, "step": 232120 }, { "epoch": 66.78078250863061, "grad_norm": 1.5437970161437988, "learning_rate": 0.0006643843498273878, "loss": 0.4427, "step": 232130 }, { "epoch": 66.78365937859608, "grad_norm": 0.7764111161231995, "learning_rate": 0.0006643268124280783, "loss": 0.4544, "step": 232140 }, { "epoch": 66.78653624856156, "grad_norm": 0.8990325331687927, "learning_rate": 0.0006642692750287686, "loss": 0.4787, "step": 232150 }, { "epoch": 66.78941311852704, "grad_norm": 0.9451994299888611, "learning_rate": 0.0006642117376294592, "loss": 0.3806, "step": 232160 }, { "epoch": 66.79228998849253, "grad_norm": 1.2701208591461182, "learning_rate": 0.0006641542002301497, "loss": 0.5429, "step": 232170 }, { "epoch": 66.795166858458, "grad_norm": 1.0485725402832031, "learning_rate": 0.00066409666283084, "loss": 0.4698, "step": 232180 }, { "epoch": 66.79804372842348, "grad_norm": 2.2901158332824707, "learning_rate": 0.0006640391254315306, "loss": 0.4951, "step": 232190 }, { "epoch": 66.80092059838896, "grad_norm": 1.4389779567718506, "learning_rate": 0.000663981588032221, "loss": 0.4459, "step": 232200 }, { "epoch": 66.80379746835443, "grad_norm": 0.9450972080230713, "learning_rate": 0.0006639240506329114, "loss": 0.4154, "step": 232210 }, { "epoch": 66.80667433831991, "grad_norm": 1.113844394683838, "learning_rate": 0.0006638665132336018, "loss": 0.4468, "step": 232220 }, { "epoch": 66.80955120828538, "grad_norm": 1.221351146697998, "learning_rate": 0.0006638089758342924, "loss": 0.3967, "step": 232230 }, { "epoch": 66.81242807825086, "grad_norm": 0.7626217007637024, "learning_rate": 0.0006637514384349827, "loss": 0.3875, "step": 232240 }, { "epoch": 66.81530494821634, "grad_norm": 1.5377658605575562, "learning_rate": 0.0006636939010356732, "loss": 0.4949, "step": 232250 }, { "epoch": 66.81818181818181, "grad_norm": 1.873586893081665, "learning_rate": 0.0006636363636363638, "loss": 0.4736, "step": 232260 }, { "epoch": 66.82105868814729, "grad_norm": 1.243575096130371, "learning_rate": 0.0006635788262370541, "loss": 0.4686, "step": 232270 }, { "epoch": 66.82393555811278, "grad_norm": 2.8560965061187744, "learning_rate": 0.0006635212888377446, "loss": 0.4509, "step": 232280 }, { "epoch": 66.82681242807826, "grad_norm": 0.9001913070678711, "learning_rate": 0.000663463751438435, "loss": 0.4771, "step": 232290 }, { "epoch": 66.82968929804373, "grad_norm": 1.4198750257492065, "learning_rate": 0.0006634062140391255, "loss": 0.5754, "step": 232300 }, { "epoch": 66.83256616800921, "grad_norm": 1.1661150455474854, "learning_rate": 0.0006633486766398159, "loss": 0.4087, "step": 232310 }, { "epoch": 66.83544303797468, "grad_norm": 1.2497822046279907, "learning_rate": 0.0006632911392405064, "loss": 0.4861, "step": 232320 }, { "epoch": 66.83831990794016, "grad_norm": 1.815218448638916, "learning_rate": 0.0006632336018411967, "loss": 0.393, "step": 232330 }, { "epoch": 66.84119677790564, "grad_norm": 1.0906391143798828, "learning_rate": 0.0006631760644418873, "loss": 0.4184, "step": 232340 }, { "epoch": 66.84407364787111, "grad_norm": 1.7560735940933228, "learning_rate": 0.0006631185270425777, "loss": 0.4796, "step": 232350 }, { "epoch": 66.84695051783659, "grad_norm": 1.0532184839248657, "learning_rate": 0.0006630609896432681, "loss": 0.4534, "step": 232360 }, { "epoch": 66.84982738780207, "grad_norm": 1.1042038202285767, "learning_rate": 0.0006630034522439587, "loss": 0.5173, "step": 232370 }, { "epoch": 66.85270425776756, "grad_norm": 1.8171801567077637, "learning_rate": 0.0006629459148446491, "loss": 0.4383, "step": 232380 }, { "epoch": 66.85558112773303, "grad_norm": 1.3869130611419678, "learning_rate": 0.0006628883774453395, "loss": 0.5497, "step": 232390 }, { "epoch": 66.85845799769851, "grad_norm": 1.2959318161010742, "learning_rate": 0.0006628308400460299, "loss": 0.4609, "step": 232400 }, { "epoch": 66.86133486766398, "grad_norm": 0.9952995181083679, "learning_rate": 0.0006627733026467205, "loss": 0.4023, "step": 232410 }, { "epoch": 66.86421173762946, "grad_norm": 1.1801509857177734, "learning_rate": 0.0006627157652474108, "loss": 0.5161, "step": 232420 }, { "epoch": 66.86708860759494, "grad_norm": 0.7580500245094299, "learning_rate": 0.0006626582278481013, "loss": 0.4088, "step": 232430 }, { "epoch": 66.86996547756041, "grad_norm": 0.8148230910301208, "learning_rate": 0.0006626006904487916, "loss": 0.453, "step": 232440 }, { "epoch": 66.87284234752589, "grad_norm": 1.5545697212219238, "learning_rate": 0.0006625431530494822, "loss": 0.4181, "step": 232450 }, { "epoch": 66.87571921749137, "grad_norm": 1.243043065071106, "learning_rate": 0.0006624856156501726, "loss": 0.5045, "step": 232460 }, { "epoch": 66.87859608745684, "grad_norm": 1.4711320400238037, "learning_rate": 0.000662428078250863, "loss": 0.4317, "step": 232470 }, { "epoch": 66.88147295742232, "grad_norm": 1.0292781591415405, "learning_rate": 0.0006623705408515536, "loss": 0.3377, "step": 232480 }, { "epoch": 66.88434982738781, "grad_norm": 1.1353453397750854, "learning_rate": 0.000662313003452244, "loss": 0.4899, "step": 232490 }, { "epoch": 66.88722669735328, "grad_norm": 1.9369038343429565, "learning_rate": 0.0006622554660529344, "loss": 0.4432, "step": 232500 }, { "epoch": 66.89010356731876, "grad_norm": 1.4615315198898315, "learning_rate": 0.0006621979286536248, "loss": 0.4353, "step": 232510 }, { "epoch": 66.89298043728424, "grad_norm": 0.9851123094558716, "learning_rate": 0.0006621403912543154, "loss": 0.4315, "step": 232520 }, { "epoch": 66.89585730724971, "grad_norm": 1.307766318321228, "learning_rate": 0.0006620828538550057, "loss": 0.5039, "step": 232530 }, { "epoch": 66.89873417721519, "grad_norm": 1.0828335285186768, "learning_rate": 0.0006620253164556962, "loss": 0.5519, "step": 232540 }, { "epoch": 66.90161104718067, "grad_norm": 1.0670809745788574, "learning_rate": 0.0006619677790563866, "loss": 0.3883, "step": 232550 }, { "epoch": 66.90448791714614, "grad_norm": 0.8204807043075562, "learning_rate": 0.0006619102416570771, "loss": 0.5066, "step": 232560 }, { "epoch": 66.90736478711162, "grad_norm": 0.7390071749687195, "learning_rate": 0.0006618527042577675, "loss": 0.429, "step": 232570 }, { "epoch": 66.9102416570771, "grad_norm": 1.172521710395813, "learning_rate": 0.000661795166858458, "loss": 0.4226, "step": 232580 }, { "epoch": 66.91311852704258, "grad_norm": 1.46426260471344, "learning_rate": 0.0006617376294591485, "loss": 0.4809, "step": 232590 }, { "epoch": 66.91599539700806, "grad_norm": 1.0096601247787476, "learning_rate": 0.0006616800920598389, "loss": 0.525, "step": 232600 }, { "epoch": 66.91887226697354, "grad_norm": 1.778464436531067, "learning_rate": 0.0006616225546605294, "loss": 0.533, "step": 232610 }, { "epoch": 66.92174913693901, "grad_norm": 0.8207953572273254, "learning_rate": 0.0006615650172612197, "loss": 0.382, "step": 232620 }, { "epoch": 66.92462600690449, "grad_norm": 1.0100067853927612, "learning_rate": 0.0006615074798619103, "loss": 0.3847, "step": 232630 }, { "epoch": 66.92750287686997, "grad_norm": 0.5824933648109436, "learning_rate": 0.0006614499424626007, "loss": 0.418, "step": 232640 }, { "epoch": 66.93037974683544, "grad_norm": 1.0354878902435303, "learning_rate": 0.0006613924050632911, "loss": 0.4027, "step": 232650 }, { "epoch": 66.93325661680092, "grad_norm": 0.9579403400421143, "learning_rate": 0.0006613348676639816, "loss": 0.5358, "step": 232660 }, { "epoch": 66.9361334867664, "grad_norm": 1.3865760564804077, "learning_rate": 0.0006612773302646721, "loss": 0.4387, "step": 232670 }, { "epoch": 66.93901035673187, "grad_norm": 1.6371634006500244, "learning_rate": 0.0006612197928653624, "loss": 0.4754, "step": 232680 }, { "epoch": 66.94188722669735, "grad_norm": 1.4316474199295044, "learning_rate": 0.0006611622554660529, "loss": 0.483, "step": 232690 }, { "epoch": 66.94476409666284, "grad_norm": 1.2901633977890015, "learning_rate": 0.0006611047180667435, "loss": 0.4327, "step": 232700 }, { "epoch": 66.94764096662831, "grad_norm": 0.9753307104110718, "learning_rate": 0.0006610471806674338, "loss": 0.379, "step": 232710 }, { "epoch": 66.95051783659379, "grad_norm": 1.342761754989624, "learning_rate": 0.0006609896432681243, "loss": 0.5356, "step": 232720 }, { "epoch": 66.95339470655927, "grad_norm": 1.050081491470337, "learning_rate": 0.0006609321058688147, "loss": 0.3968, "step": 232730 }, { "epoch": 66.95627157652474, "grad_norm": 1.7382506132125854, "learning_rate": 0.0006608745684695052, "loss": 0.3895, "step": 232740 }, { "epoch": 66.95914844649022, "grad_norm": 1.1444882154464722, "learning_rate": 0.0006608170310701956, "loss": 0.414, "step": 232750 }, { "epoch": 66.9620253164557, "grad_norm": 0.6798011660575867, "learning_rate": 0.0006607594936708861, "loss": 0.3501, "step": 232760 }, { "epoch": 66.96490218642117, "grad_norm": 1.420628309249878, "learning_rate": 0.0006607019562715765, "loss": 0.4764, "step": 232770 }, { "epoch": 66.96777905638665, "grad_norm": 1.4948590993881226, "learning_rate": 0.000660644418872267, "loss": 0.4163, "step": 232780 }, { "epoch": 66.97065592635212, "grad_norm": 1.445392370223999, "learning_rate": 0.0006605868814729575, "loss": 0.612, "step": 232790 }, { "epoch": 66.97353279631761, "grad_norm": 0.8995749950408936, "learning_rate": 0.0006605293440736478, "loss": 0.4936, "step": 232800 }, { "epoch": 66.97640966628309, "grad_norm": 2.0803117752075195, "learning_rate": 0.0006604718066743384, "loss": 0.4639, "step": 232810 }, { "epoch": 66.97928653624857, "grad_norm": 0.8287461996078491, "learning_rate": 0.0006604142692750288, "loss": 0.5269, "step": 232820 }, { "epoch": 66.98216340621404, "grad_norm": 1.2856870889663696, "learning_rate": 0.0006603567318757192, "loss": 0.4666, "step": 232830 }, { "epoch": 66.98504027617952, "grad_norm": 1.3155421018600464, "learning_rate": 0.0006602991944764096, "loss": 0.3608, "step": 232840 }, { "epoch": 66.987917146145, "grad_norm": 0.7094634771347046, "learning_rate": 0.0006602416570771002, "loss": 0.4457, "step": 232850 }, { "epoch": 66.99079401611047, "grad_norm": 1.570634365081787, "learning_rate": 0.0006601841196777905, "loss": 0.4716, "step": 232860 }, { "epoch": 66.99367088607595, "grad_norm": 1.2787081003189087, "learning_rate": 0.000660126582278481, "loss": 0.4198, "step": 232870 }, { "epoch": 66.99654775604142, "grad_norm": 1.2944329977035522, "learning_rate": 0.0006600690448791716, "loss": 0.3982, "step": 232880 }, { "epoch": 66.9994246260069, "grad_norm": 1.7890795469284058, "learning_rate": 0.0006600115074798619, "loss": 0.4689, "step": 232890 }, { "epoch": 67.00230149597238, "grad_norm": 1.33231520652771, "learning_rate": 0.0006599539700805524, "loss": 0.4989, "step": 232900 }, { "epoch": 67.00517836593787, "grad_norm": 0.7714821100234985, "learning_rate": 0.0006598964326812428, "loss": 0.3872, "step": 232910 }, { "epoch": 67.00805523590334, "grad_norm": 0.8803709149360657, "learning_rate": 0.0006598388952819333, "loss": 0.427, "step": 232920 }, { "epoch": 67.01093210586882, "grad_norm": 1.1025397777557373, "learning_rate": 0.0006597813578826237, "loss": 0.4713, "step": 232930 }, { "epoch": 67.0138089758343, "grad_norm": 0.8923317193984985, "learning_rate": 0.0006597238204833142, "loss": 0.4233, "step": 232940 }, { "epoch": 67.01668584579977, "grad_norm": 0.9372201561927795, "learning_rate": 0.0006596662830840046, "loss": 0.3802, "step": 232950 }, { "epoch": 67.01956271576525, "grad_norm": 0.8720126152038574, "learning_rate": 0.0006596087456846951, "loss": 0.3555, "step": 232960 }, { "epoch": 67.02243958573072, "grad_norm": 0.8158531188964844, "learning_rate": 0.0006595512082853855, "loss": 0.4078, "step": 232970 }, { "epoch": 67.0253164556962, "grad_norm": 0.9424512982368469, "learning_rate": 0.0006594936708860759, "loss": 0.3915, "step": 232980 }, { "epoch": 67.02819332566168, "grad_norm": 1.8529716730117798, "learning_rate": 0.0006594361334867665, "loss": 0.5149, "step": 232990 }, { "epoch": 67.03107019562715, "grad_norm": 1.2114274501800537, "learning_rate": 0.0006593785960874569, "loss": 0.4399, "step": 233000 }, { "epoch": 67.03394706559264, "grad_norm": 1.2045425176620483, "learning_rate": 0.0006593210586881473, "loss": 0.4285, "step": 233010 }, { "epoch": 67.03682393555812, "grad_norm": 0.6091895699501038, "learning_rate": 0.0006592635212888377, "loss": 0.3328, "step": 233020 }, { "epoch": 67.0397008055236, "grad_norm": 1.2813172340393066, "learning_rate": 0.0006592059838895283, "loss": 0.4373, "step": 233030 }, { "epoch": 67.04257767548907, "grad_norm": 1.057469129562378, "learning_rate": 0.0006591484464902186, "loss": 0.3998, "step": 233040 }, { "epoch": 67.04545454545455, "grad_norm": 1.1775908470153809, "learning_rate": 0.0006590909090909091, "loss": 0.4379, "step": 233050 }, { "epoch": 67.04833141542002, "grad_norm": 1.3286327123641968, "learning_rate": 0.0006590333716915996, "loss": 0.4327, "step": 233060 }, { "epoch": 67.0512082853855, "grad_norm": 1.9454106092453003, "learning_rate": 0.00065897583429229, "loss": 0.4569, "step": 233070 }, { "epoch": 67.05408515535098, "grad_norm": 1.2320566177368164, "learning_rate": 0.0006589182968929805, "loss": 0.4674, "step": 233080 }, { "epoch": 67.05696202531645, "grad_norm": 0.9482799768447876, "learning_rate": 0.0006588607594936709, "loss": 0.3277, "step": 233090 }, { "epoch": 67.05983889528193, "grad_norm": 1.445849895477295, "learning_rate": 0.0006588032220943614, "loss": 0.4693, "step": 233100 }, { "epoch": 67.0627157652474, "grad_norm": 0.8983811140060425, "learning_rate": 0.0006587456846950518, "loss": 0.4336, "step": 233110 }, { "epoch": 67.0655926352129, "grad_norm": 1.1955921649932861, "learning_rate": 0.0006586881472957423, "loss": 0.4593, "step": 233120 }, { "epoch": 67.06846950517837, "grad_norm": 1.921110987663269, "learning_rate": 0.0006586306098964326, "loss": 0.3649, "step": 233130 }, { "epoch": 67.07134637514385, "grad_norm": 1.1887307167053223, "learning_rate": 0.0006585730724971232, "loss": 0.4434, "step": 233140 }, { "epoch": 67.07422324510932, "grad_norm": 1.6801166534423828, "learning_rate": 0.0006585155350978136, "loss": 0.4469, "step": 233150 }, { "epoch": 67.0771001150748, "grad_norm": 1.721551775932312, "learning_rate": 0.000658457997698504, "loss": 0.4275, "step": 233160 }, { "epoch": 67.07997698504028, "grad_norm": 0.8920994400978088, "learning_rate": 0.0006584004602991946, "loss": 0.3961, "step": 233170 }, { "epoch": 67.08285385500575, "grad_norm": 2.679872512817383, "learning_rate": 0.000658342922899885, "loss": 0.4743, "step": 233180 }, { "epoch": 67.08573072497123, "grad_norm": 1.4737402200698853, "learning_rate": 0.0006582853855005754, "loss": 0.5429, "step": 233190 }, { "epoch": 67.0886075949367, "grad_norm": 2.178737163543701, "learning_rate": 0.0006582278481012658, "loss": 0.5223, "step": 233200 }, { "epoch": 67.09148446490218, "grad_norm": 0.7840883731842041, "learning_rate": 0.0006581703107019564, "loss": 0.3432, "step": 233210 }, { "epoch": 67.09436133486767, "grad_norm": 0.9708574414253235, "learning_rate": 0.0006581127733026467, "loss": 0.4448, "step": 233220 }, { "epoch": 67.09723820483315, "grad_norm": 1.5788980722427368, "learning_rate": 0.0006580552359033372, "loss": 0.2962, "step": 233230 }, { "epoch": 67.10011507479862, "grad_norm": 1.1741029024124146, "learning_rate": 0.0006579976985040277, "loss": 0.4444, "step": 233240 }, { "epoch": 67.1029919447641, "grad_norm": 2.1622724533081055, "learning_rate": 0.0006579401611047181, "loss": 0.4307, "step": 233250 }, { "epoch": 67.10586881472958, "grad_norm": 1.4485089778900146, "learning_rate": 0.0006578826237054085, "loss": 0.4892, "step": 233260 }, { "epoch": 67.10874568469505, "grad_norm": 0.998306393623352, "learning_rate": 0.0006578250863060989, "loss": 0.449, "step": 233270 }, { "epoch": 67.11162255466053, "grad_norm": 1.5745916366577148, "learning_rate": 0.0006577675489067895, "loss": 0.3217, "step": 233280 }, { "epoch": 67.114499424626, "grad_norm": 1.0282971858978271, "learning_rate": 0.0006577100115074799, "loss": 0.4397, "step": 233290 }, { "epoch": 67.11737629459148, "grad_norm": 1.5200668573379517, "learning_rate": 0.0006576524741081703, "loss": 0.4226, "step": 233300 }, { "epoch": 67.12025316455696, "grad_norm": 1.1490780115127563, "learning_rate": 0.0006575949367088607, "loss": 0.3863, "step": 233310 }, { "epoch": 67.12313003452243, "grad_norm": 0.828389585018158, "learning_rate": 0.0006575373993095513, "loss": 0.4022, "step": 233320 }, { "epoch": 67.12600690448792, "grad_norm": 1.3354333639144897, "learning_rate": 0.0006574798619102416, "loss": 0.408, "step": 233330 }, { "epoch": 67.1288837744534, "grad_norm": 1.5980746746063232, "learning_rate": 0.0006574223245109321, "loss": 0.3868, "step": 233340 }, { "epoch": 67.13176064441888, "grad_norm": 0.8616660237312317, "learning_rate": 0.0006573647871116226, "loss": 0.3633, "step": 233350 }, { "epoch": 67.13463751438435, "grad_norm": 1.0401179790496826, "learning_rate": 0.000657307249712313, "loss": 0.474, "step": 233360 }, { "epoch": 67.13751438434983, "grad_norm": 2.1944637298583984, "learning_rate": 0.0006572497123130034, "loss": 0.4737, "step": 233370 }, { "epoch": 67.1403912543153, "grad_norm": 1.6859368085861206, "learning_rate": 0.0006571921749136939, "loss": 0.4555, "step": 233380 }, { "epoch": 67.14326812428078, "grad_norm": 1.1615618467330933, "learning_rate": 0.0006571346375143844, "loss": 0.4196, "step": 233390 }, { "epoch": 67.14614499424626, "grad_norm": 1.6807888746261597, "learning_rate": 0.0006570771001150748, "loss": 0.4367, "step": 233400 }, { "epoch": 67.14902186421173, "grad_norm": 1.3654865026474, "learning_rate": 0.0006570195627157653, "loss": 0.3724, "step": 233410 }, { "epoch": 67.15189873417721, "grad_norm": 0.8931415677070618, "learning_rate": 0.0006569620253164556, "loss": 0.3168, "step": 233420 }, { "epoch": 67.1547756041427, "grad_norm": 0.9070197343826294, "learning_rate": 0.0006569044879171462, "loss": 0.4644, "step": 233430 }, { "epoch": 67.15765247410818, "grad_norm": 1.0628950595855713, "learning_rate": 0.0006568469505178366, "loss": 0.4724, "step": 233440 }, { "epoch": 67.16052934407365, "grad_norm": 1.1734269857406616, "learning_rate": 0.000656789413118527, "loss": 0.3711, "step": 233450 }, { "epoch": 67.16340621403913, "grad_norm": 1.2594974040985107, "learning_rate": 0.0006567318757192175, "loss": 0.3953, "step": 233460 }, { "epoch": 67.1662830840046, "grad_norm": 0.7664915919303894, "learning_rate": 0.000656674338319908, "loss": 0.3743, "step": 233470 }, { "epoch": 67.16915995397008, "grad_norm": 0.7457327842712402, "learning_rate": 0.0006566168009205983, "loss": 0.3723, "step": 233480 }, { "epoch": 67.17203682393556, "grad_norm": 0.8563648462295532, "learning_rate": 0.0006565592635212888, "loss": 0.3805, "step": 233490 }, { "epoch": 67.17491369390103, "grad_norm": 1.0251187086105347, "learning_rate": 0.0006565017261219794, "loss": 0.3374, "step": 233500 }, { "epoch": 67.17779056386651, "grad_norm": 1.271618366241455, "learning_rate": 0.0006564441887226697, "loss": 0.381, "step": 233510 }, { "epoch": 67.18066743383199, "grad_norm": 1.7767622470855713, "learning_rate": 0.0006563866513233602, "loss": 0.3711, "step": 233520 }, { "epoch": 67.18354430379746, "grad_norm": 1.298181414604187, "learning_rate": 0.0006563291139240506, "loss": 0.3571, "step": 233530 }, { "epoch": 67.18642117376295, "grad_norm": 1.1528456211090088, "learning_rate": 0.0006562715765247411, "loss": 0.3217, "step": 233540 }, { "epoch": 67.18929804372843, "grad_norm": 1.2111661434173584, "learning_rate": 0.0006562140391254315, "loss": 0.3541, "step": 233550 }, { "epoch": 67.1921749136939, "grad_norm": 1.1889972686767578, "learning_rate": 0.000656156501726122, "loss": 0.406, "step": 233560 }, { "epoch": 67.19505178365938, "grad_norm": 1.2254137992858887, "learning_rate": 0.0006560989643268124, "loss": 0.4463, "step": 233570 }, { "epoch": 67.19792865362486, "grad_norm": 1.3702694177627563, "learning_rate": 0.0006560414269275029, "loss": 0.3123, "step": 233580 }, { "epoch": 67.20080552359033, "grad_norm": 1.8119497299194336, "learning_rate": 0.0006559838895281934, "loss": 0.3515, "step": 233590 }, { "epoch": 67.20368239355581, "grad_norm": 1.4173452854156494, "learning_rate": 0.0006559263521288837, "loss": 0.4464, "step": 233600 }, { "epoch": 67.20655926352129, "grad_norm": 1.4897710084915161, "learning_rate": 0.0006558688147295743, "loss": 0.3448, "step": 233610 }, { "epoch": 67.20943613348676, "grad_norm": 2.151667594909668, "learning_rate": 0.0006558112773302647, "loss": 0.3982, "step": 233620 }, { "epoch": 67.21231300345224, "grad_norm": 1.2240016460418701, "learning_rate": 0.0006557537399309551, "loss": 0.487, "step": 233630 }, { "epoch": 67.21518987341773, "grad_norm": 1.3966032266616821, "learning_rate": 0.0006556962025316456, "loss": 0.4304, "step": 233640 }, { "epoch": 67.2180667433832, "grad_norm": 1.660058856010437, "learning_rate": 0.0006556386651323361, "loss": 0.3594, "step": 233650 }, { "epoch": 67.22094361334868, "grad_norm": 0.8705320358276367, "learning_rate": 0.0006555811277330264, "loss": 0.4796, "step": 233660 }, { "epoch": 67.22382048331416, "grad_norm": 1.642816185951233, "learning_rate": 0.0006555235903337169, "loss": 0.4207, "step": 233670 }, { "epoch": 67.22669735327963, "grad_norm": 0.8602100014686584, "learning_rate": 0.0006554660529344075, "loss": 0.4205, "step": 233680 }, { "epoch": 67.22957422324511, "grad_norm": 1.8725357055664062, "learning_rate": 0.0006554085155350978, "loss": 0.5409, "step": 233690 }, { "epoch": 67.23245109321059, "grad_norm": 1.0981431007385254, "learning_rate": 0.0006553509781357883, "loss": 0.5405, "step": 233700 }, { "epoch": 67.23532796317606, "grad_norm": 0.7653915882110596, "learning_rate": 0.0006552934407364787, "loss": 0.4115, "step": 233710 }, { "epoch": 67.23820483314154, "grad_norm": 1.7365840673446655, "learning_rate": 0.0006552359033371692, "loss": 0.4852, "step": 233720 }, { "epoch": 67.24108170310701, "grad_norm": 1.2028669118881226, "learning_rate": 0.0006551783659378596, "loss": 0.4201, "step": 233730 }, { "epoch": 67.24395857307249, "grad_norm": 1.1202306747436523, "learning_rate": 0.0006551208285385501, "loss": 0.4386, "step": 233740 }, { "epoch": 67.24683544303798, "grad_norm": 0.8751657605171204, "learning_rate": 0.0006550632911392405, "loss": 0.4517, "step": 233750 }, { "epoch": 67.24971231300346, "grad_norm": 0.9737333655357361, "learning_rate": 0.000655005753739931, "loss": 0.392, "step": 233760 }, { "epoch": 67.25258918296893, "grad_norm": 0.7308011651039124, "learning_rate": 0.0006549482163406214, "loss": 0.4205, "step": 233770 }, { "epoch": 67.25546605293441, "grad_norm": 0.9854613542556763, "learning_rate": 0.0006548906789413118, "loss": 0.3512, "step": 233780 }, { "epoch": 67.25834292289989, "grad_norm": 1.1198304891586304, "learning_rate": 0.0006548331415420024, "loss": 0.4104, "step": 233790 }, { "epoch": 67.26121979286536, "grad_norm": 1.9310741424560547, "learning_rate": 0.0006547756041426928, "loss": 0.3784, "step": 233800 }, { "epoch": 67.26409666283084, "grad_norm": 1.496212363243103, "learning_rate": 0.0006547180667433832, "loss": 0.3845, "step": 233810 }, { "epoch": 67.26697353279631, "grad_norm": 1.0032987594604492, "learning_rate": 0.0006546605293440736, "loss": 0.3909, "step": 233820 }, { "epoch": 67.26985040276179, "grad_norm": 1.0855833292007446, "learning_rate": 0.0006546029919447642, "loss": 0.383, "step": 233830 }, { "epoch": 67.27272727272727, "grad_norm": 0.7965779304504395, "learning_rate": 0.0006545454545454545, "loss": 0.3835, "step": 233840 }, { "epoch": 67.27560414269276, "grad_norm": 1.6127675771713257, "learning_rate": 0.000654487917146145, "loss": 0.3699, "step": 233850 }, { "epoch": 67.27848101265823, "grad_norm": 0.7209777235984802, "learning_rate": 0.0006544303797468355, "loss": 0.3609, "step": 233860 }, { "epoch": 67.28135788262371, "grad_norm": 1.4781253337860107, "learning_rate": 0.0006543728423475259, "loss": 0.4434, "step": 233870 }, { "epoch": 67.28423475258919, "grad_norm": 0.9702700972557068, "learning_rate": 0.0006543153049482163, "loss": 0.4226, "step": 233880 }, { "epoch": 67.28711162255466, "grad_norm": 1.3083040714263916, "learning_rate": 0.0006542577675489068, "loss": 0.4041, "step": 233890 }, { "epoch": 67.28998849252014, "grad_norm": 1.097523808479309, "learning_rate": 0.0006542002301495973, "loss": 0.4249, "step": 233900 }, { "epoch": 67.29286536248561, "grad_norm": 1.7887052297592163, "learning_rate": 0.0006541426927502877, "loss": 0.4917, "step": 233910 }, { "epoch": 67.29574223245109, "grad_norm": 1.608296275138855, "learning_rate": 0.0006540851553509782, "loss": 0.432, "step": 233920 }, { "epoch": 67.29861910241657, "grad_norm": 1.4104254245758057, "learning_rate": 0.0006540276179516686, "loss": 0.4529, "step": 233930 }, { "epoch": 67.30149597238204, "grad_norm": 0.9369779825210571, "learning_rate": 0.0006539700805523591, "loss": 0.4509, "step": 233940 }, { "epoch": 67.30437284234753, "grad_norm": 1.0122871398925781, "learning_rate": 0.0006539125431530495, "loss": 0.4288, "step": 233950 }, { "epoch": 67.30724971231301, "grad_norm": 0.8557955026626587, "learning_rate": 0.0006538550057537399, "loss": 0.3962, "step": 233960 }, { "epoch": 67.31012658227849, "grad_norm": 1.3413782119750977, "learning_rate": 0.0006537974683544304, "loss": 0.391, "step": 233970 }, { "epoch": 67.31300345224396, "grad_norm": 0.7568714022636414, "learning_rate": 0.0006537399309551209, "loss": 0.4329, "step": 233980 }, { "epoch": 67.31588032220944, "grad_norm": 0.8357580900192261, "learning_rate": 0.0006536823935558113, "loss": 0.3777, "step": 233990 }, { "epoch": 67.31875719217491, "grad_norm": 0.9861194491386414, "learning_rate": 0.0006536248561565017, "loss": 0.4397, "step": 234000 }, { "epoch": 67.32163406214039, "grad_norm": 1.6568617820739746, "learning_rate": 0.0006535673187571923, "loss": 0.3884, "step": 234010 }, { "epoch": 67.32451093210587, "grad_norm": 1.58963143825531, "learning_rate": 0.0006535097813578826, "loss": 0.4563, "step": 234020 }, { "epoch": 67.32738780207134, "grad_norm": 0.7707287073135376, "learning_rate": 0.0006534522439585731, "loss": 0.4202, "step": 234030 }, { "epoch": 67.33026467203682, "grad_norm": 1.620025396347046, "learning_rate": 0.0006533947065592636, "loss": 0.492, "step": 234040 }, { "epoch": 67.3331415420023, "grad_norm": 1.1632791757583618, "learning_rate": 0.000653337169159954, "loss": 0.4791, "step": 234050 }, { "epoch": 67.33601841196779, "grad_norm": 0.8283520936965942, "learning_rate": 0.0006532796317606444, "loss": 0.5171, "step": 234060 }, { "epoch": 67.33889528193326, "grad_norm": 2.504268169403076, "learning_rate": 0.0006532220943613348, "loss": 0.4649, "step": 234070 }, { "epoch": 67.34177215189874, "grad_norm": 0.9291889071464539, "learning_rate": 0.0006531645569620254, "loss": 0.444, "step": 234080 }, { "epoch": 67.34464902186421, "grad_norm": 0.9216752052307129, "learning_rate": 0.0006531070195627158, "loss": 0.4115, "step": 234090 }, { "epoch": 67.34752589182969, "grad_norm": 1.2672361135482788, "learning_rate": 0.0006530494821634062, "loss": 0.4888, "step": 234100 }, { "epoch": 67.35040276179517, "grad_norm": 1.4848558902740479, "learning_rate": 0.0006529919447640966, "loss": 0.4699, "step": 234110 }, { "epoch": 67.35327963176064, "grad_norm": 0.7445660829544067, "learning_rate": 0.0006529344073647872, "loss": 0.5166, "step": 234120 }, { "epoch": 67.35615650172612, "grad_norm": 2.0438601970672607, "learning_rate": 0.0006528768699654775, "loss": 0.526, "step": 234130 }, { "epoch": 67.3590333716916, "grad_norm": 1.155442714691162, "learning_rate": 0.000652819332566168, "loss": 0.3507, "step": 234140 }, { "epoch": 67.36191024165707, "grad_norm": 1.061274528503418, "learning_rate": 0.0006527617951668585, "loss": 0.3435, "step": 234150 }, { "epoch": 67.36478711162256, "grad_norm": 1.3604143857955933, "learning_rate": 0.0006527042577675489, "loss": 0.4567, "step": 234160 }, { "epoch": 67.36766398158804, "grad_norm": 1.0935695171356201, "learning_rate": 0.0006526467203682393, "loss": 0.4077, "step": 234170 }, { "epoch": 67.37054085155351, "grad_norm": 1.2156846523284912, "learning_rate": 0.0006525891829689298, "loss": 0.4725, "step": 234180 }, { "epoch": 67.37341772151899, "grad_norm": 1.2488198280334473, "learning_rate": 0.0006525316455696203, "loss": 0.4579, "step": 234190 }, { "epoch": 67.37629459148447, "grad_norm": 0.9993411898612976, "learning_rate": 0.0006524741081703107, "loss": 0.4678, "step": 234200 }, { "epoch": 67.37917146144994, "grad_norm": 0.9520809054374695, "learning_rate": 0.0006524165707710012, "loss": 0.4436, "step": 234210 }, { "epoch": 67.38204833141542, "grad_norm": 1.0208920240402222, "learning_rate": 0.0006523590333716916, "loss": 0.4928, "step": 234220 }, { "epoch": 67.3849252013809, "grad_norm": 1.8115599155426025, "learning_rate": 0.0006523014959723821, "loss": 0.4815, "step": 234230 }, { "epoch": 67.38780207134637, "grad_norm": 1.5409222841262817, "learning_rate": 0.0006522439585730725, "loss": 0.4827, "step": 234240 }, { "epoch": 67.39067894131185, "grad_norm": 1.2286826372146606, "learning_rate": 0.0006521864211737629, "loss": 0.4557, "step": 234250 }, { "epoch": 67.39355581127732, "grad_norm": 1.3685489892959595, "learning_rate": 0.0006521288837744534, "loss": 0.3692, "step": 234260 }, { "epoch": 67.39643268124281, "grad_norm": 2.186082601547241, "learning_rate": 0.0006520713463751439, "loss": 0.6634, "step": 234270 }, { "epoch": 67.39930955120829, "grad_norm": 1.4426075220108032, "learning_rate": 0.0006520138089758342, "loss": 0.4631, "step": 234280 }, { "epoch": 67.40218642117377, "grad_norm": 1.3716713190078735, "learning_rate": 0.0006519562715765247, "loss": 0.4675, "step": 234290 }, { "epoch": 67.40506329113924, "grad_norm": 1.205619215965271, "learning_rate": 0.0006518987341772153, "loss": 0.4445, "step": 234300 }, { "epoch": 67.40794016110472, "grad_norm": 1.0025153160095215, "learning_rate": 0.0006518411967779056, "loss": 0.4449, "step": 234310 }, { "epoch": 67.4108170310702, "grad_norm": 1.357077717781067, "learning_rate": 0.0006517836593785961, "loss": 0.4399, "step": 234320 }, { "epoch": 67.41369390103567, "grad_norm": 1.3192318677902222, "learning_rate": 0.0006517261219792866, "loss": 0.5118, "step": 234330 }, { "epoch": 67.41657077100115, "grad_norm": 1.3107527494430542, "learning_rate": 0.000651668584579977, "loss": 0.4577, "step": 234340 }, { "epoch": 67.41944764096662, "grad_norm": 1.1250141859054565, "learning_rate": 0.0006516110471806674, "loss": 0.4371, "step": 234350 }, { "epoch": 67.4223245109321, "grad_norm": 0.610623836517334, "learning_rate": 0.0006515535097813579, "loss": 0.4125, "step": 234360 }, { "epoch": 67.42520138089759, "grad_norm": 0.7951167225837708, "learning_rate": 0.0006514959723820483, "loss": 0.4273, "step": 234370 }, { "epoch": 67.42807825086307, "grad_norm": 0.8805232644081116, "learning_rate": 0.0006514384349827388, "loss": 0.4639, "step": 234380 }, { "epoch": 67.43095512082854, "grad_norm": 0.9625579118728638, "learning_rate": 0.0006513808975834293, "loss": 0.4081, "step": 234390 }, { "epoch": 67.43383199079402, "grad_norm": 0.8173273801803589, "learning_rate": 0.0006513233601841196, "loss": 0.481, "step": 234400 }, { "epoch": 67.4367088607595, "grad_norm": 1.5453118085861206, "learning_rate": 0.0006512658227848102, "loss": 0.3983, "step": 234410 }, { "epoch": 67.43958573072497, "grad_norm": 1.5289889574050903, "learning_rate": 0.0006512082853855006, "loss": 0.4074, "step": 234420 }, { "epoch": 67.44246260069045, "grad_norm": 1.772659420967102, "learning_rate": 0.000651150747986191, "loss": 0.536, "step": 234430 }, { "epoch": 67.44533947065592, "grad_norm": 1.8997820615768433, "learning_rate": 0.0006510932105868815, "loss": 0.4651, "step": 234440 }, { "epoch": 67.4482163406214, "grad_norm": 1.9295941591262817, "learning_rate": 0.000651035673187572, "loss": 0.4297, "step": 234450 }, { "epoch": 67.45109321058688, "grad_norm": 0.7227718234062195, "learning_rate": 0.0006509781357882623, "loss": 0.4463, "step": 234460 }, { "epoch": 67.45397008055235, "grad_norm": 1.045223355293274, "learning_rate": 0.0006509205983889528, "loss": 0.439, "step": 234470 }, { "epoch": 67.45684695051784, "grad_norm": 0.9837777614593506, "learning_rate": 0.0006508630609896434, "loss": 0.4532, "step": 234480 }, { "epoch": 67.45972382048332, "grad_norm": 0.6994844079017639, "learning_rate": 0.0006508055235903337, "loss": 0.3763, "step": 234490 }, { "epoch": 67.4626006904488, "grad_norm": 1.937696099281311, "learning_rate": 0.0006507479861910242, "loss": 0.4235, "step": 234500 }, { "epoch": 67.46547756041427, "grad_norm": 1.2998613119125366, "learning_rate": 0.0006506904487917146, "loss": 0.4376, "step": 234510 }, { "epoch": 67.46835443037975, "grad_norm": 0.7978249192237854, "learning_rate": 0.0006506329113924051, "loss": 0.3449, "step": 234520 }, { "epoch": 67.47123130034522, "grad_norm": 0.9386064410209656, "learning_rate": 0.0006505753739930955, "loss": 0.4467, "step": 234530 }, { "epoch": 67.4741081703107, "grad_norm": 1.0074280500411987, "learning_rate": 0.000650517836593786, "loss": 0.3287, "step": 234540 }, { "epoch": 67.47698504027618, "grad_norm": 1.0153928995132446, "learning_rate": 0.0006504602991944764, "loss": 0.338, "step": 234550 }, { "epoch": 67.47986191024165, "grad_norm": 1.501081109046936, "learning_rate": 0.0006504027617951669, "loss": 0.4969, "step": 234560 }, { "epoch": 67.48273878020713, "grad_norm": 1.7688630819320679, "learning_rate": 0.0006503452243958573, "loss": 0.3719, "step": 234570 }, { "epoch": 67.48561565017262, "grad_norm": 1.5733916759490967, "learning_rate": 0.0006502876869965477, "loss": 0.4301, "step": 234580 }, { "epoch": 67.4884925201381, "grad_norm": 2.3948798179626465, "learning_rate": 0.0006502301495972383, "loss": 0.444, "step": 234590 }, { "epoch": 67.49136939010357, "grad_norm": 1.0154422521591187, "learning_rate": 0.0006501726121979287, "loss": 0.3764, "step": 234600 }, { "epoch": 67.49424626006905, "grad_norm": 1.1640002727508545, "learning_rate": 0.0006501150747986191, "loss": 0.4087, "step": 234610 }, { "epoch": 67.49712313003452, "grad_norm": 1.2125173807144165, "learning_rate": 0.0006500575373993096, "loss": 0.3621, "step": 234620 }, { "epoch": 67.5, "grad_norm": 1.7674225568771362, "learning_rate": 0.0006500000000000001, "loss": 0.452, "step": 234630 }, { "epoch": 67.50287686996548, "grad_norm": 1.4167869091033936, "learning_rate": 0.0006499424626006904, "loss": 0.4624, "step": 234640 }, { "epoch": 67.50575373993095, "grad_norm": 1.019851803779602, "learning_rate": 0.0006498849252013809, "loss": 0.4949, "step": 234650 }, { "epoch": 67.50863060989643, "grad_norm": 1.423222541809082, "learning_rate": 0.0006498273878020714, "loss": 0.4448, "step": 234660 }, { "epoch": 67.5115074798619, "grad_norm": 1.3541417121887207, "learning_rate": 0.0006497698504027618, "loss": 0.3628, "step": 234670 }, { "epoch": 67.51438434982738, "grad_norm": 1.3544557094573975, "learning_rate": 0.0006497123130034522, "loss": 0.4121, "step": 234680 }, { "epoch": 67.51726121979287, "grad_norm": 1.763399362564087, "learning_rate": 0.0006496547756041427, "loss": 0.3882, "step": 234690 }, { "epoch": 67.52013808975835, "grad_norm": 1.2849435806274414, "learning_rate": 0.0006495972382048332, "loss": 0.3374, "step": 234700 }, { "epoch": 67.52301495972382, "grad_norm": 1.0060009956359863, "learning_rate": 0.0006495397008055236, "loss": 0.4309, "step": 234710 }, { "epoch": 67.5258918296893, "grad_norm": 0.7573493123054504, "learning_rate": 0.0006494821634062141, "loss": 0.3864, "step": 234720 }, { "epoch": 67.52876869965478, "grad_norm": 1.1014913320541382, "learning_rate": 0.0006494246260069045, "loss": 0.4782, "step": 234730 }, { "epoch": 67.53164556962025, "grad_norm": 1.5390218496322632, "learning_rate": 0.000649367088607595, "loss": 0.394, "step": 234740 }, { "epoch": 67.53452243958573, "grad_norm": 1.5702801942825317, "learning_rate": 0.0006493095512082854, "loss": 0.4329, "step": 234750 }, { "epoch": 67.5373993095512, "grad_norm": 1.167574405670166, "learning_rate": 0.0006492520138089758, "loss": 0.3733, "step": 234760 }, { "epoch": 67.54027617951668, "grad_norm": 1.4012514352798462, "learning_rate": 0.0006491944764096663, "loss": 0.4492, "step": 234770 }, { "epoch": 67.54315304948216, "grad_norm": 1.0585452318191528, "learning_rate": 0.0006491369390103568, "loss": 0.3589, "step": 234780 }, { "epoch": 67.54602991944765, "grad_norm": 2.068624973297119, "learning_rate": 0.0006490794016110471, "loss": 0.5458, "step": 234790 }, { "epoch": 67.54890678941312, "grad_norm": 1.361238956451416, "learning_rate": 0.0006490218642117376, "loss": 0.3672, "step": 234800 }, { "epoch": 67.5517836593786, "grad_norm": 1.300429105758667, "learning_rate": 0.0006489643268124282, "loss": 0.4204, "step": 234810 }, { "epoch": 67.55466052934408, "grad_norm": 2.1656570434570312, "learning_rate": 0.0006489067894131185, "loss": 0.3865, "step": 234820 }, { "epoch": 67.55753739930955, "grad_norm": 1.5338711738586426, "learning_rate": 0.000648849252013809, "loss": 0.5482, "step": 234830 }, { "epoch": 67.56041426927503, "grad_norm": 0.8614979386329651, "learning_rate": 0.0006487917146144995, "loss": 0.4873, "step": 234840 }, { "epoch": 67.5632911392405, "grad_norm": 1.1154894828796387, "learning_rate": 0.0006487341772151899, "loss": 0.5542, "step": 234850 }, { "epoch": 67.56616800920598, "grad_norm": 1.1241859197616577, "learning_rate": 0.0006486766398158803, "loss": 0.4746, "step": 234860 }, { "epoch": 67.56904487917146, "grad_norm": 1.0535881519317627, "learning_rate": 0.0006486191024165707, "loss": 0.3547, "step": 234870 }, { "epoch": 67.57192174913693, "grad_norm": 1.1168242692947388, "learning_rate": 0.0006485615650172612, "loss": 0.4044, "step": 234880 }, { "epoch": 67.57479861910241, "grad_norm": 0.8925864696502686, "learning_rate": 0.0006485040276179517, "loss": 0.5078, "step": 234890 }, { "epoch": 67.5776754890679, "grad_norm": 1.9204212427139282, "learning_rate": 0.000648446490218642, "loss": 0.3943, "step": 234900 }, { "epoch": 67.58055235903338, "grad_norm": 1.6988551616668701, "learning_rate": 0.0006483889528193326, "loss": 0.4356, "step": 234910 }, { "epoch": 67.58342922899885, "grad_norm": 1.9426568746566772, "learning_rate": 0.0006483314154200231, "loss": 0.4267, "step": 234920 }, { "epoch": 67.58630609896433, "grad_norm": 1.692012906074524, "learning_rate": 0.0006482738780207134, "loss": 0.5318, "step": 234930 }, { "epoch": 67.5891829689298, "grad_norm": 1.1180933713912964, "learning_rate": 0.0006482163406214039, "loss": 0.4695, "step": 234940 }, { "epoch": 67.59205983889528, "grad_norm": 0.7920657992362976, "learning_rate": 0.0006481588032220944, "loss": 0.3914, "step": 234950 }, { "epoch": 67.59493670886076, "grad_norm": 1.1300084590911865, "learning_rate": 0.0006481012658227848, "loss": 0.3866, "step": 234960 }, { "epoch": 67.59781357882623, "grad_norm": 1.0802136659622192, "learning_rate": 0.0006480437284234752, "loss": 0.4359, "step": 234970 }, { "epoch": 67.60069044879171, "grad_norm": 2.3635106086730957, "learning_rate": 0.0006479861910241657, "loss": 0.5171, "step": 234980 }, { "epoch": 67.60356731875719, "grad_norm": 1.3593257665634155, "learning_rate": 0.0006479286536248562, "loss": 0.4651, "step": 234990 }, { "epoch": 67.60644418872268, "grad_norm": 1.221877098083496, "learning_rate": 0.0006478711162255466, "loss": 0.3661, "step": 235000 }, { "epoch": 67.60932105868815, "grad_norm": 1.161922812461853, "learning_rate": 0.0006478135788262371, "loss": 0.42, "step": 235010 }, { "epoch": 67.61219792865363, "grad_norm": 1.0348602533340454, "learning_rate": 0.0006477560414269275, "loss": 0.4706, "step": 235020 }, { "epoch": 67.6150747986191, "grad_norm": 1.2893775701522827, "learning_rate": 0.000647698504027618, "loss": 0.4444, "step": 235030 }, { "epoch": 67.61795166858458, "grad_norm": 1.6059538125991821, "learning_rate": 0.0006476409666283084, "loss": 0.4799, "step": 235040 }, { "epoch": 67.62082853855006, "grad_norm": 2.1967601776123047, "learning_rate": 0.0006475834292289988, "loss": 0.4206, "step": 235050 }, { "epoch": 67.62370540851553, "grad_norm": 2.032137632369995, "learning_rate": 0.0006475258918296893, "loss": 0.426, "step": 235060 }, { "epoch": 67.62658227848101, "grad_norm": 2.3543810844421387, "learning_rate": 0.0006474683544303798, "loss": 0.4959, "step": 235070 }, { "epoch": 67.62945914844649, "grad_norm": 0.7244884371757507, "learning_rate": 0.0006474108170310701, "loss": 0.5116, "step": 235080 }, { "epoch": 67.63233601841196, "grad_norm": 1.4457941055297852, "learning_rate": 0.0006473532796317606, "loss": 0.3916, "step": 235090 }, { "epoch": 67.63521288837744, "grad_norm": 2.1131246089935303, "learning_rate": 0.0006472957422324512, "loss": 0.5317, "step": 235100 }, { "epoch": 67.63808975834293, "grad_norm": 1.251704216003418, "learning_rate": 0.0006472382048331415, "loss": 0.3646, "step": 235110 }, { "epoch": 67.6409666283084, "grad_norm": 1.9532592296600342, "learning_rate": 0.000647180667433832, "loss": 0.5419, "step": 235120 }, { "epoch": 67.64384349827388, "grad_norm": 1.3814818859100342, "learning_rate": 0.0006471231300345225, "loss": 0.4905, "step": 235130 }, { "epoch": 67.64672036823936, "grad_norm": 1.0889167785644531, "learning_rate": 0.0006470655926352129, "loss": 0.4172, "step": 235140 }, { "epoch": 67.64959723820483, "grad_norm": 1.5804529190063477, "learning_rate": 0.0006470080552359033, "loss": 0.4683, "step": 235150 }, { "epoch": 67.65247410817031, "grad_norm": 1.3620604276657104, "learning_rate": 0.0006469505178365938, "loss": 0.376, "step": 235160 }, { "epoch": 67.65535097813579, "grad_norm": 0.7298154234886169, "learning_rate": 0.0006468929804372842, "loss": 0.3332, "step": 235170 }, { "epoch": 67.65822784810126, "grad_norm": 0.9045685529708862, "learning_rate": 0.0006468354430379747, "loss": 0.4348, "step": 235180 }, { "epoch": 67.66110471806674, "grad_norm": 1.5632952451705933, "learning_rate": 0.0006467779056386652, "loss": 0.4238, "step": 235190 }, { "epoch": 67.66398158803221, "grad_norm": 1.4262586832046509, "learning_rate": 0.0006467203682393556, "loss": 0.3978, "step": 235200 }, { "epoch": 67.6668584579977, "grad_norm": 0.79680997133255, "learning_rate": 0.0006466628308400461, "loss": 0.4419, "step": 235210 }, { "epoch": 67.66973532796318, "grad_norm": 0.8725989460945129, "learning_rate": 0.0006466052934407365, "loss": 0.3601, "step": 235220 }, { "epoch": 67.67261219792866, "grad_norm": 1.1701992750167847, "learning_rate": 0.0006465477560414269, "loss": 0.579, "step": 235230 }, { "epoch": 67.67548906789413, "grad_norm": 4.4667277336120605, "learning_rate": 0.0006464902186421174, "loss": 0.5133, "step": 235240 }, { "epoch": 67.67836593785961, "grad_norm": 1.964836597442627, "learning_rate": 0.0006464326812428079, "loss": 0.4203, "step": 235250 }, { "epoch": 67.68124280782509, "grad_norm": 1.6865204572677612, "learning_rate": 0.0006463751438434982, "loss": 0.5208, "step": 235260 }, { "epoch": 67.68411967779056, "grad_norm": 0.9620758891105652, "learning_rate": 0.0006463176064441887, "loss": 0.4503, "step": 235270 }, { "epoch": 67.68699654775604, "grad_norm": 0.9958894848823547, "learning_rate": 0.0006462600690448793, "loss": 0.4278, "step": 235280 }, { "epoch": 67.68987341772151, "grad_norm": 1.6480122804641724, "learning_rate": 0.0006462025316455696, "loss": 0.4152, "step": 235290 }, { "epoch": 67.69275028768699, "grad_norm": 1.1449987888336182, "learning_rate": 0.0006461449942462601, "loss": 0.4411, "step": 235300 }, { "epoch": 67.69562715765247, "grad_norm": 1.1358400583267212, "learning_rate": 0.0006460874568469506, "loss": 0.4984, "step": 235310 }, { "epoch": 67.69850402761796, "grad_norm": 1.1646711826324463, "learning_rate": 0.000646029919447641, "loss": 0.5028, "step": 235320 }, { "epoch": 67.70138089758343, "grad_norm": 1.8628361225128174, "learning_rate": 0.0006459723820483314, "loss": 0.4752, "step": 235330 }, { "epoch": 67.70425776754891, "grad_norm": 1.3457695245742798, "learning_rate": 0.0006459148446490219, "loss": 0.3966, "step": 235340 }, { "epoch": 67.70713463751439, "grad_norm": 1.217045545578003, "learning_rate": 0.0006458573072497123, "loss": 0.4255, "step": 235350 }, { "epoch": 67.71001150747986, "grad_norm": 1.6562330722808838, "learning_rate": 0.0006457997698504028, "loss": 0.408, "step": 235360 }, { "epoch": 67.71288837744534, "grad_norm": 1.1435662508010864, "learning_rate": 0.0006457422324510932, "loss": 0.3553, "step": 235370 }, { "epoch": 67.71576524741081, "grad_norm": 0.6847463846206665, "learning_rate": 0.0006456846950517836, "loss": 0.5263, "step": 235380 }, { "epoch": 67.71864211737629, "grad_norm": 0.5343374013900757, "learning_rate": 0.0006456271576524742, "loss": 0.4365, "step": 235390 }, { "epoch": 67.72151898734177, "grad_norm": 0.8647840619087219, "learning_rate": 0.0006455696202531646, "loss": 0.356, "step": 235400 }, { "epoch": 67.72439585730724, "grad_norm": 1.0582002401351929, "learning_rate": 0.000645512082853855, "loss": 0.4637, "step": 235410 }, { "epoch": 67.72727272727273, "grad_norm": 1.5982624292373657, "learning_rate": 0.0006454545454545455, "loss": 0.5825, "step": 235420 }, { "epoch": 67.73014959723821, "grad_norm": 1.355008602142334, "learning_rate": 0.000645397008055236, "loss": 0.4259, "step": 235430 }, { "epoch": 67.73302646720369, "grad_norm": 1.060935616493225, "learning_rate": 0.0006453394706559263, "loss": 0.3901, "step": 235440 }, { "epoch": 67.73590333716916, "grad_norm": 1.4776455163955688, "learning_rate": 0.0006452819332566168, "loss": 0.4146, "step": 235450 }, { "epoch": 67.73878020713464, "grad_norm": 0.7432011365890503, "learning_rate": 0.0006452243958573073, "loss": 0.3783, "step": 235460 }, { "epoch": 67.74165707710011, "grad_norm": 1.1907368898391724, "learning_rate": 0.0006451668584579977, "loss": 0.422, "step": 235470 }, { "epoch": 67.74453394706559, "grad_norm": 1.0894458293914795, "learning_rate": 0.0006451093210586881, "loss": 0.4141, "step": 235480 }, { "epoch": 67.74741081703107, "grad_norm": 0.9402784705162048, "learning_rate": 0.0006450517836593786, "loss": 0.5489, "step": 235490 }, { "epoch": 67.75028768699654, "grad_norm": 1.2683794498443604, "learning_rate": 0.0006449942462600691, "loss": 0.4794, "step": 235500 }, { "epoch": 67.75316455696202, "grad_norm": 1.3316282033920288, "learning_rate": 0.0006449367088607595, "loss": 0.4842, "step": 235510 }, { "epoch": 67.75604142692751, "grad_norm": 1.2476649284362793, "learning_rate": 0.00064487917146145, "loss": 0.4852, "step": 235520 }, { "epoch": 67.75891829689299, "grad_norm": 2.6846730709075928, "learning_rate": 0.0006448216340621404, "loss": 0.4138, "step": 235530 }, { "epoch": 67.76179516685846, "grad_norm": 0.9825289249420166, "learning_rate": 0.0006447640966628309, "loss": 0.4495, "step": 235540 }, { "epoch": 67.76467203682394, "grad_norm": 1.4950248003005981, "learning_rate": 0.0006447065592635213, "loss": 0.4985, "step": 235550 }, { "epoch": 67.76754890678941, "grad_norm": 1.0695762634277344, "learning_rate": 0.0006446490218642117, "loss": 0.4557, "step": 235560 }, { "epoch": 67.77042577675489, "grad_norm": 1.372342824935913, "learning_rate": 0.0006445914844649022, "loss": 0.3621, "step": 235570 }, { "epoch": 67.77330264672037, "grad_norm": 1.5621424913406372, "learning_rate": 0.0006445339470655927, "loss": 0.4523, "step": 235580 }, { "epoch": 67.77617951668584, "grad_norm": 1.093982458114624, "learning_rate": 0.000644476409666283, "loss": 0.4494, "step": 235590 }, { "epoch": 67.77905638665132, "grad_norm": 1.0319265127182007, "learning_rate": 0.0006444188722669736, "loss": 0.4798, "step": 235600 }, { "epoch": 67.7819332566168, "grad_norm": 1.7683242559432983, "learning_rate": 0.0006443613348676641, "loss": 0.4535, "step": 235610 }, { "epoch": 67.78481012658227, "grad_norm": 2.16141676902771, "learning_rate": 0.0006443037974683544, "loss": 0.4086, "step": 235620 }, { "epoch": 67.78768699654776, "grad_norm": 1.0383105278015137, "learning_rate": 0.0006442462600690449, "loss": 0.4196, "step": 235630 }, { "epoch": 67.79056386651324, "grad_norm": 1.8601555824279785, "learning_rate": 0.0006441887226697354, "loss": 0.3454, "step": 235640 }, { "epoch": 67.79344073647871, "grad_norm": 0.6856386065483093, "learning_rate": 0.0006441311852704258, "loss": 0.4672, "step": 235650 }, { "epoch": 67.79631760644419, "grad_norm": 4.329042434692383, "learning_rate": 0.0006440736478711162, "loss": 0.4678, "step": 235660 }, { "epoch": 67.79919447640967, "grad_norm": 1.5423697233200073, "learning_rate": 0.0006440161104718066, "loss": 0.457, "step": 235670 }, { "epoch": 67.80207134637514, "grad_norm": 1.463417887687683, "learning_rate": 0.0006439585730724971, "loss": 0.4643, "step": 235680 }, { "epoch": 67.80494821634062, "grad_norm": 1.4118951559066772, "learning_rate": 0.0006439010356731876, "loss": 0.4737, "step": 235690 }, { "epoch": 67.8078250863061, "grad_norm": 1.087478756904602, "learning_rate": 0.000643843498273878, "loss": 0.4419, "step": 235700 }, { "epoch": 67.81070195627157, "grad_norm": 1.1906013488769531, "learning_rate": 0.0006437859608745685, "loss": 0.4221, "step": 235710 }, { "epoch": 67.81357882623705, "grad_norm": 1.3928495645523071, "learning_rate": 0.000643728423475259, "loss": 0.4854, "step": 235720 }, { "epoch": 67.81645569620254, "grad_norm": 2.118805408477783, "learning_rate": 0.0006436708860759493, "loss": 0.5832, "step": 235730 }, { "epoch": 67.81933256616801, "grad_norm": 1.1986209154129028, "learning_rate": 0.0006436133486766398, "loss": 0.4038, "step": 235740 }, { "epoch": 67.82220943613349, "grad_norm": 1.151273250579834, "learning_rate": 0.0006435558112773303, "loss": 0.5075, "step": 235750 }, { "epoch": 67.82508630609897, "grad_norm": 0.6828820705413818, "learning_rate": 0.0006434982738780207, "loss": 0.4429, "step": 235760 }, { "epoch": 67.82796317606444, "grad_norm": 0.8255968689918518, "learning_rate": 0.0006434407364787111, "loss": 0.358, "step": 235770 }, { "epoch": 67.83084004602992, "grad_norm": 1.5319000482559204, "learning_rate": 0.0006433831990794016, "loss": 0.3975, "step": 235780 }, { "epoch": 67.8337169159954, "grad_norm": 1.3294565677642822, "learning_rate": 0.000643325661680092, "loss": 0.4846, "step": 235790 }, { "epoch": 67.83659378596087, "grad_norm": 1.23782479763031, "learning_rate": 0.0006432681242807825, "loss": 0.4688, "step": 235800 }, { "epoch": 67.83947065592635, "grad_norm": 1.0100950002670288, "learning_rate": 0.000643210586881473, "loss": 0.455, "step": 235810 }, { "epoch": 67.84234752589182, "grad_norm": 2.1786081790924072, "learning_rate": 0.0006431530494821634, "loss": 0.4458, "step": 235820 }, { "epoch": 67.8452243958573, "grad_norm": 0.8665369749069214, "learning_rate": 0.0006430955120828539, "loss": 0.4507, "step": 235830 }, { "epoch": 67.84810126582279, "grad_norm": 1.2231463193893433, "learning_rate": 0.0006430379746835443, "loss": 0.439, "step": 235840 }, { "epoch": 67.85097813578827, "grad_norm": 2.0229952335357666, "learning_rate": 0.0006429804372842347, "loss": 0.4327, "step": 235850 }, { "epoch": 67.85385500575374, "grad_norm": 1.2386680841445923, "learning_rate": 0.0006429228998849252, "loss": 0.5039, "step": 235860 }, { "epoch": 67.85673187571922, "grad_norm": 1.2065948247909546, "learning_rate": 0.0006428653624856157, "loss": 0.4767, "step": 235870 }, { "epoch": 67.8596087456847, "grad_norm": 0.8982647061347961, "learning_rate": 0.000642807825086306, "loss": 0.501, "step": 235880 }, { "epoch": 67.86248561565017, "grad_norm": 1.08812415599823, "learning_rate": 0.0006427502876869966, "loss": 0.4136, "step": 235890 }, { "epoch": 67.86536248561565, "grad_norm": 1.6570464372634888, "learning_rate": 0.0006426927502876871, "loss": 0.4638, "step": 235900 }, { "epoch": 67.86823935558112, "grad_norm": 2.5466606616973877, "learning_rate": 0.0006426352128883774, "loss": 0.4163, "step": 235910 }, { "epoch": 67.8711162255466, "grad_norm": 1.224522590637207, "learning_rate": 0.0006425776754890679, "loss": 0.4104, "step": 235920 }, { "epoch": 67.87399309551208, "grad_norm": 2.304891586303711, "learning_rate": 0.0006425201380897584, "loss": 0.5171, "step": 235930 }, { "epoch": 67.87686996547757, "grad_norm": 1.6582785844802856, "learning_rate": 0.0006424626006904488, "loss": 0.5642, "step": 235940 }, { "epoch": 67.87974683544304, "grad_norm": 0.9733489155769348, "learning_rate": 0.0006424050632911392, "loss": 0.4992, "step": 235950 }, { "epoch": 67.88262370540852, "grad_norm": 0.7115108966827393, "learning_rate": 0.0006423475258918297, "loss": 0.3693, "step": 235960 }, { "epoch": 67.885500575374, "grad_norm": 0.6203019022941589, "learning_rate": 0.0006422899884925201, "loss": 0.51, "step": 235970 }, { "epoch": 67.88837744533947, "grad_norm": 0.7742023468017578, "learning_rate": 0.0006422324510932106, "loss": 0.433, "step": 235980 }, { "epoch": 67.89125431530495, "grad_norm": 1.6403800249099731, "learning_rate": 0.000642174913693901, "loss": 0.4183, "step": 235990 }, { "epoch": 67.89413118527042, "grad_norm": 0.7867017388343811, "learning_rate": 0.0006421173762945915, "loss": 0.5123, "step": 236000 }, { "epoch": 67.8970080552359, "grad_norm": 1.236953616142273, "learning_rate": 0.000642059838895282, "loss": 0.4182, "step": 236010 }, { "epoch": 67.89988492520138, "grad_norm": 1.5410192012786865, "learning_rate": 0.0006420023014959724, "loss": 0.4898, "step": 236020 }, { "epoch": 67.90276179516685, "grad_norm": 0.989044189453125, "learning_rate": 0.0006419447640966628, "loss": 0.4655, "step": 236030 }, { "epoch": 67.90563866513233, "grad_norm": 1.6232883930206299, "learning_rate": 0.0006418872266973533, "loss": 0.4047, "step": 236040 }, { "epoch": 67.90851553509782, "grad_norm": 1.5430034399032593, "learning_rate": 0.0006418296892980438, "loss": 0.4472, "step": 236050 }, { "epoch": 67.9113924050633, "grad_norm": 1.337429165840149, "learning_rate": 0.0006417721518987341, "loss": 0.5222, "step": 236060 }, { "epoch": 67.91426927502877, "grad_norm": 1.1489795446395874, "learning_rate": 0.0006417146144994246, "loss": 0.4355, "step": 236070 }, { "epoch": 67.91714614499425, "grad_norm": 1.2525370121002197, "learning_rate": 0.0006416570771001152, "loss": 0.3444, "step": 236080 }, { "epoch": 67.92002301495972, "grad_norm": 0.9587894678115845, "learning_rate": 0.0006415995397008055, "loss": 0.441, "step": 236090 }, { "epoch": 67.9228998849252, "grad_norm": 1.1135249137878418, "learning_rate": 0.000641542002301496, "loss": 0.3673, "step": 236100 }, { "epoch": 67.92577675489068, "grad_norm": 1.1544644832611084, "learning_rate": 0.0006414844649021865, "loss": 0.6116, "step": 236110 }, { "epoch": 67.92865362485615, "grad_norm": 1.5446887016296387, "learning_rate": 0.0006414269275028769, "loss": 0.4643, "step": 236120 }, { "epoch": 67.93153049482163, "grad_norm": 0.9370180368423462, "learning_rate": 0.0006413693901035673, "loss": 0.4068, "step": 236130 }, { "epoch": 67.9344073647871, "grad_norm": 1.0834144353866577, "learning_rate": 0.0006413118527042578, "loss": 0.4321, "step": 236140 }, { "epoch": 67.9372842347526, "grad_norm": 1.3990858793258667, "learning_rate": 0.0006412543153049482, "loss": 0.5187, "step": 236150 }, { "epoch": 67.94016110471807, "grad_norm": 0.636772871017456, "learning_rate": 0.0006411967779056387, "loss": 0.4233, "step": 236160 }, { "epoch": 67.94303797468355, "grad_norm": 1.0614663362503052, "learning_rate": 0.0006411392405063291, "loss": 0.4165, "step": 236170 }, { "epoch": 67.94591484464902, "grad_norm": 0.6818709373474121, "learning_rate": 0.0006410817031070196, "loss": 0.4723, "step": 236180 }, { "epoch": 67.9487917146145, "grad_norm": 1.185353398323059, "learning_rate": 0.00064102416570771, "loss": 0.3179, "step": 236190 }, { "epoch": 67.95166858457998, "grad_norm": 1.4400080442428589, "learning_rate": 0.0006409666283084005, "loss": 0.6888, "step": 236200 }, { "epoch": 67.95454545454545, "grad_norm": 2.4217543601989746, "learning_rate": 0.0006409090909090909, "loss": 0.4787, "step": 236210 }, { "epoch": 67.95742232451093, "grad_norm": 1.6671110391616821, "learning_rate": 0.0006408515535097814, "loss": 0.5067, "step": 236220 }, { "epoch": 67.9602991944764, "grad_norm": 1.6315160989761353, "learning_rate": 0.0006407940161104719, "loss": 0.3764, "step": 236230 }, { "epoch": 67.96317606444188, "grad_norm": 1.9002385139465332, "learning_rate": 0.0006407364787111622, "loss": 0.5473, "step": 236240 }, { "epoch": 67.96605293440736, "grad_norm": 1.1143202781677246, "learning_rate": 0.0006406789413118527, "loss": 0.4539, "step": 236250 }, { "epoch": 67.96892980437285, "grad_norm": 0.8786824345588684, "learning_rate": 0.0006406214039125432, "loss": 0.4454, "step": 236260 }, { "epoch": 67.97180667433832, "grad_norm": 1.0090771913528442, "learning_rate": 0.0006405638665132336, "loss": 0.3987, "step": 236270 }, { "epoch": 67.9746835443038, "grad_norm": 2.229318141937256, "learning_rate": 0.000640506329113924, "loss": 0.5058, "step": 236280 }, { "epoch": 67.97756041426928, "grad_norm": 1.3024650812149048, "learning_rate": 0.0006404487917146146, "loss": 0.451, "step": 236290 }, { "epoch": 67.98043728423475, "grad_norm": 2.4376206398010254, "learning_rate": 0.000640391254315305, "loss": 0.4835, "step": 236300 }, { "epoch": 67.98331415420023, "grad_norm": 1.5300414562225342, "learning_rate": 0.0006403337169159954, "loss": 0.3834, "step": 236310 }, { "epoch": 67.9861910241657, "grad_norm": 0.8260629177093506, "learning_rate": 0.0006402761795166859, "loss": 0.3299, "step": 236320 }, { "epoch": 67.98906789413118, "grad_norm": 1.0205717086791992, "learning_rate": 0.0006402186421173763, "loss": 0.377, "step": 236330 }, { "epoch": 67.99194476409666, "grad_norm": 0.9228568077087402, "learning_rate": 0.0006401611047180668, "loss": 0.3619, "step": 236340 }, { "epoch": 67.99482163406213, "grad_norm": 2.5374515056610107, "learning_rate": 0.0006401035673187572, "loss": 0.4831, "step": 236350 }, { "epoch": 67.99769850402762, "grad_norm": 0.8581351041793823, "learning_rate": 0.0006400460299194476, "loss": 0.4482, "step": 236360 }, { "epoch": 68.0005753739931, "grad_norm": 1.4746532440185547, "learning_rate": 0.0006399884925201381, "loss": 0.4425, "step": 236370 }, { "epoch": 68.00345224395858, "grad_norm": 0.7881699800491333, "learning_rate": 0.0006399309551208286, "loss": 0.4842, "step": 236380 }, { "epoch": 68.00632911392405, "grad_norm": 1.3791776895523071, "learning_rate": 0.000639873417721519, "loss": 0.4845, "step": 236390 }, { "epoch": 68.00920598388953, "grad_norm": 1.0631715059280396, "learning_rate": 0.0006398158803222095, "loss": 0.4829, "step": 236400 }, { "epoch": 68.012082853855, "grad_norm": 0.960658073425293, "learning_rate": 0.0006397583429229, "loss": 0.4632, "step": 236410 }, { "epoch": 68.01495972382048, "grad_norm": 1.654908537864685, "learning_rate": 0.0006397008055235903, "loss": 0.3834, "step": 236420 }, { "epoch": 68.01783659378596, "grad_norm": 1.528550386428833, "learning_rate": 0.0006396432681242808, "loss": 0.4088, "step": 236430 }, { "epoch": 68.02071346375143, "grad_norm": 1.5106468200683594, "learning_rate": 0.0006395857307249713, "loss": 0.3769, "step": 236440 }, { "epoch": 68.02359033371691, "grad_norm": 1.1540815830230713, "learning_rate": 0.0006395281933256617, "loss": 0.3983, "step": 236450 }, { "epoch": 68.02646720368239, "grad_norm": 1.2166767120361328, "learning_rate": 0.0006394706559263521, "loss": 0.4429, "step": 236460 }, { "epoch": 68.02934407364788, "grad_norm": 1.120140552520752, "learning_rate": 0.0006394131185270426, "loss": 0.3759, "step": 236470 }, { "epoch": 68.03222094361335, "grad_norm": 2.221668004989624, "learning_rate": 0.000639355581127733, "loss": 0.441, "step": 236480 }, { "epoch": 68.03509781357883, "grad_norm": 0.9964717626571655, "learning_rate": 0.0006392980437284235, "loss": 0.4487, "step": 236490 }, { "epoch": 68.0379746835443, "grad_norm": 1.646254301071167, "learning_rate": 0.0006392405063291138, "loss": 0.3256, "step": 236500 }, { "epoch": 68.04085155350978, "grad_norm": 1.8385332822799683, "learning_rate": 0.0006391829689298044, "loss": 0.4115, "step": 236510 }, { "epoch": 68.04372842347526, "grad_norm": 1.5528080463409424, "learning_rate": 0.0006391254315304949, "loss": 0.369, "step": 236520 }, { "epoch": 68.04660529344073, "grad_norm": 1.2894996404647827, "learning_rate": 0.0006390678941311852, "loss": 0.3327, "step": 236530 }, { "epoch": 68.04948216340621, "grad_norm": 1.3915647268295288, "learning_rate": 0.0006390103567318757, "loss": 0.3778, "step": 236540 }, { "epoch": 68.05235903337169, "grad_norm": 0.6843147873878479, "learning_rate": 0.0006389528193325662, "loss": 0.4117, "step": 236550 }, { "epoch": 68.05523590333716, "grad_norm": 0.5429864525794983, "learning_rate": 0.0006388952819332566, "loss": 0.3324, "step": 236560 }, { "epoch": 68.05811277330265, "grad_norm": 1.1093436479568481, "learning_rate": 0.000638837744533947, "loss": 0.4219, "step": 236570 }, { "epoch": 68.06098964326813, "grad_norm": 1.0122840404510498, "learning_rate": 0.0006387802071346376, "loss": 0.3568, "step": 236580 }, { "epoch": 68.0638665132336, "grad_norm": 0.944663405418396, "learning_rate": 0.000638722669735328, "loss": 0.3332, "step": 236590 }, { "epoch": 68.06674338319908, "grad_norm": 0.9260532259941101, "learning_rate": 0.0006386651323360184, "loss": 0.347, "step": 236600 }, { "epoch": 68.06962025316456, "grad_norm": 1.709094762802124, "learning_rate": 0.0006386075949367089, "loss": 0.4517, "step": 236610 }, { "epoch": 68.07249712313003, "grad_norm": 1.5186767578125, "learning_rate": 0.0006385500575373993, "loss": 0.4827, "step": 236620 }, { "epoch": 68.07537399309551, "grad_norm": 1.1427899599075317, "learning_rate": 0.0006384925201380898, "loss": 0.3222, "step": 236630 }, { "epoch": 68.07825086306099, "grad_norm": 1.5564286708831787, "learning_rate": 0.0006384349827387802, "loss": 0.4412, "step": 236640 }, { "epoch": 68.08112773302646, "grad_norm": 1.257959246635437, "learning_rate": 0.0006383774453394706, "loss": 0.3761, "step": 236650 }, { "epoch": 68.08400460299194, "grad_norm": 1.145548701286316, "learning_rate": 0.0006383199079401611, "loss": 0.3799, "step": 236660 }, { "epoch": 68.08688147295742, "grad_norm": 1.1552011966705322, "learning_rate": 0.0006382623705408516, "loss": 0.3731, "step": 236670 }, { "epoch": 68.0897583429229, "grad_norm": 0.9352872967720032, "learning_rate": 0.0006382048331415419, "loss": 0.3456, "step": 236680 }, { "epoch": 68.09263521288838, "grad_norm": 1.281032919883728, "learning_rate": 0.0006381472957422325, "loss": 0.3046, "step": 236690 }, { "epoch": 68.09551208285386, "grad_norm": 1.7609416246414185, "learning_rate": 0.000638089758342923, "loss": 0.5043, "step": 236700 }, { "epoch": 68.09838895281933, "grad_norm": 1.321540117263794, "learning_rate": 0.0006380322209436133, "loss": 0.3822, "step": 236710 }, { "epoch": 68.10126582278481, "grad_norm": 0.9640836715698242, "learning_rate": 0.0006379746835443038, "loss": 0.4063, "step": 236720 }, { "epoch": 68.10414269275029, "grad_norm": 0.6657243371009827, "learning_rate": 0.0006379171461449943, "loss": 0.4316, "step": 236730 }, { "epoch": 68.10701956271576, "grad_norm": 1.2744170427322388, "learning_rate": 0.0006378596087456847, "loss": 0.3544, "step": 236740 }, { "epoch": 68.10989643268124, "grad_norm": 1.0650720596313477, "learning_rate": 0.0006378020713463751, "loss": 0.3354, "step": 236750 }, { "epoch": 68.11277330264672, "grad_norm": 0.5157638192176819, "learning_rate": 0.0006377445339470656, "loss": 0.3267, "step": 236760 }, { "epoch": 68.11565017261219, "grad_norm": 2.0956246852874756, "learning_rate": 0.000637686996547756, "loss": 0.4485, "step": 236770 }, { "epoch": 68.11852704257768, "grad_norm": 0.7612674236297607, "learning_rate": 0.0006376294591484465, "loss": 0.4015, "step": 236780 }, { "epoch": 68.12140391254316, "grad_norm": 0.8351346850395203, "learning_rate": 0.000637571921749137, "loss": 0.5591, "step": 236790 }, { "epoch": 68.12428078250863, "grad_norm": 0.9853724241256714, "learning_rate": 0.0006375143843498274, "loss": 0.4062, "step": 236800 }, { "epoch": 68.12715765247411, "grad_norm": 1.4466156959533691, "learning_rate": 0.0006374568469505179, "loss": 0.4197, "step": 236810 }, { "epoch": 68.13003452243959, "grad_norm": 1.135583519935608, "learning_rate": 0.0006373993095512083, "loss": 0.3791, "step": 236820 }, { "epoch": 68.13291139240506, "grad_norm": 1.2419530153274536, "learning_rate": 0.0006373417721518987, "loss": 0.4426, "step": 236830 }, { "epoch": 68.13578826237054, "grad_norm": 1.076981782913208, "learning_rate": 0.0006372842347525892, "loss": 0.3912, "step": 236840 }, { "epoch": 68.13866513233602, "grad_norm": 1.3895645141601562, "learning_rate": 0.0006372266973532797, "loss": 0.5587, "step": 236850 }, { "epoch": 68.14154200230149, "grad_norm": 0.6642548441886902, "learning_rate": 0.00063716915995397, "loss": 0.3477, "step": 236860 }, { "epoch": 68.14441887226697, "grad_norm": 0.7535011768341064, "learning_rate": 0.0006371116225546606, "loss": 0.3388, "step": 236870 }, { "epoch": 68.14729574223244, "grad_norm": 1.0150952339172363, "learning_rate": 0.000637054085155351, "loss": 0.3783, "step": 236880 }, { "epoch": 68.15017261219793, "grad_norm": 1.429327130317688, "learning_rate": 0.0006369965477560414, "loss": 0.3713, "step": 236890 }, { "epoch": 68.15304948216341, "grad_norm": 2.942246437072754, "learning_rate": 0.0006369390103567319, "loss": 0.4242, "step": 236900 }, { "epoch": 68.15592635212889, "grad_norm": 1.884197473526001, "learning_rate": 0.0006368814729574224, "loss": 0.4359, "step": 236910 }, { "epoch": 68.15880322209436, "grad_norm": 0.9548966288566589, "learning_rate": 0.0006368239355581128, "loss": 0.5443, "step": 236920 }, { "epoch": 68.16168009205984, "grad_norm": 2.3783252239227295, "learning_rate": 0.0006367663981588032, "loss": 0.395, "step": 236930 }, { "epoch": 68.16455696202532, "grad_norm": 1.1719776391983032, "learning_rate": 0.0006367088607594937, "loss": 0.3525, "step": 236940 }, { "epoch": 68.16743383199079, "grad_norm": 1.0665923357009888, "learning_rate": 0.0006366513233601841, "loss": 0.3499, "step": 236950 }, { "epoch": 68.17031070195627, "grad_norm": 0.8553360104560852, "learning_rate": 0.0006365937859608746, "loss": 0.4609, "step": 236960 }, { "epoch": 68.17318757192174, "grad_norm": 0.8647863268852234, "learning_rate": 0.000636536248561565, "loss": 0.3832, "step": 236970 }, { "epoch": 68.17606444188722, "grad_norm": 1.4665112495422363, "learning_rate": 0.0006364787111622555, "loss": 0.3446, "step": 236980 }, { "epoch": 68.17894131185271, "grad_norm": 1.0231149196624756, "learning_rate": 0.000636421173762946, "loss": 0.3727, "step": 236990 }, { "epoch": 68.18181818181819, "grad_norm": 1.445186972618103, "learning_rate": 0.0006363636363636364, "loss": 0.417, "step": 237000 }, { "epoch": 68.18469505178366, "grad_norm": 1.2540361881256104, "learning_rate": 0.0006363060989643268, "loss": 0.4253, "step": 237010 }, { "epoch": 68.18757192174914, "grad_norm": 2.0542798042297363, "learning_rate": 0.0006362485615650173, "loss": 0.4785, "step": 237020 }, { "epoch": 68.19044879171462, "grad_norm": 1.2784627676010132, "learning_rate": 0.0006361910241657078, "loss": 0.3859, "step": 237030 }, { "epoch": 68.19332566168009, "grad_norm": 0.6637321710586548, "learning_rate": 0.0006361334867663981, "loss": 0.4239, "step": 237040 }, { "epoch": 68.19620253164557, "grad_norm": 1.8485844135284424, "learning_rate": 0.0006360759493670886, "loss": 0.4727, "step": 237050 }, { "epoch": 68.19907940161104, "grad_norm": 1.3638393878936768, "learning_rate": 0.0006360184119677791, "loss": 0.527, "step": 237060 }, { "epoch": 68.20195627157652, "grad_norm": 1.3506141901016235, "learning_rate": 0.0006359608745684695, "loss": 0.4477, "step": 237070 }, { "epoch": 68.204833141542, "grad_norm": 1.1817498207092285, "learning_rate": 0.0006359033371691599, "loss": 0.4259, "step": 237080 }, { "epoch": 68.20771001150747, "grad_norm": 0.6600679159164429, "learning_rate": 0.0006358457997698505, "loss": 0.4266, "step": 237090 }, { "epoch": 68.21058688147296, "grad_norm": 1.589218020439148, "learning_rate": 0.0006357882623705409, "loss": 0.4761, "step": 237100 }, { "epoch": 68.21346375143844, "grad_norm": 1.2632266283035278, "learning_rate": 0.0006357307249712313, "loss": 0.4106, "step": 237110 }, { "epoch": 68.21634062140392, "grad_norm": 0.6884087920188904, "learning_rate": 0.0006356731875719218, "loss": 0.4082, "step": 237120 }, { "epoch": 68.21921749136939, "grad_norm": 2.386927843093872, "learning_rate": 0.0006356156501726122, "loss": 0.4901, "step": 237130 }, { "epoch": 68.22209436133487, "grad_norm": 1.376845121383667, "learning_rate": 0.0006355581127733027, "loss": 0.5254, "step": 237140 }, { "epoch": 68.22497123130034, "grad_norm": 1.311230182647705, "learning_rate": 0.0006355005753739931, "loss": 0.4939, "step": 237150 }, { "epoch": 68.22784810126582, "grad_norm": 1.6469876766204834, "learning_rate": 0.0006354430379746835, "loss": 0.573, "step": 237160 }, { "epoch": 68.2307249712313, "grad_norm": 0.7297873497009277, "learning_rate": 0.000635385500575374, "loss": 0.3801, "step": 237170 }, { "epoch": 68.23360184119677, "grad_norm": 1.216846227645874, "learning_rate": 0.0006353279631760645, "loss": 0.4178, "step": 237180 }, { "epoch": 68.23647871116225, "grad_norm": 1.8386430740356445, "learning_rate": 0.0006352704257767548, "loss": 0.3523, "step": 237190 }, { "epoch": 68.23935558112774, "grad_norm": 1.503359317779541, "learning_rate": 0.0006352128883774454, "loss": 0.3276, "step": 237200 }, { "epoch": 68.24223245109322, "grad_norm": 0.870733380317688, "learning_rate": 0.0006351553509781359, "loss": 0.5243, "step": 237210 }, { "epoch": 68.24510932105869, "grad_norm": 1.3121811151504517, "learning_rate": 0.0006350978135788262, "loss": 0.4945, "step": 237220 }, { "epoch": 68.24798619102417, "grad_norm": 1.040985107421875, "learning_rate": 0.0006350402761795167, "loss": 0.3913, "step": 237230 }, { "epoch": 68.25086306098964, "grad_norm": 0.753684401512146, "learning_rate": 0.0006349827387802072, "loss": 0.3641, "step": 237240 }, { "epoch": 68.25373993095512, "grad_norm": 2.189852476119995, "learning_rate": 0.0006349252013808976, "loss": 0.3071, "step": 237250 }, { "epoch": 68.2566168009206, "grad_norm": 1.2836737632751465, "learning_rate": 0.000634867663981588, "loss": 0.3585, "step": 237260 }, { "epoch": 68.25949367088607, "grad_norm": 2.1127004623413086, "learning_rate": 0.0006348101265822786, "loss": 0.5034, "step": 237270 }, { "epoch": 68.26237054085155, "grad_norm": 0.8673487901687622, "learning_rate": 0.0006347525891829689, "loss": 0.3537, "step": 237280 }, { "epoch": 68.26524741081703, "grad_norm": 1.2478758096694946, "learning_rate": 0.0006346950517836594, "loss": 0.5733, "step": 237290 }, { "epoch": 68.2681242807825, "grad_norm": 1.8662933111190796, "learning_rate": 0.0006346375143843499, "loss": 0.4442, "step": 237300 }, { "epoch": 68.27100115074799, "grad_norm": 0.7201921343803406, "learning_rate": 0.0006345799769850403, "loss": 0.3553, "step": 237310 }, { "epoch": 68.27387802071347, "grad_norm": 1.5516616106033325, "learning_rate": 0.0006345224395857308, "loss": 0.5639, "step": 237320 }, { "epoch": 68.27675489067894, "grad_norm": 1.0645546913146973, "learning_rate": 0.0006344649021864211, "loss": 0.4096, "step": 237330 }, { "epoch": 68.27963176064442, "grad_norm": 1.346313714981079, "learning_rate": 0.0006344073647871116, "loss": 0.4256, "step": 237340 }, { "epoch": 68.2825086306099, "grad_norm": 1.4954220056533813, "learning_rate": 0.0006343498273878021, "loss": 0.3907, "step": 237350 }, { "epoch": 68.28538550057537, "grad_norm": 0.768246591091156, "learning_rate": 0.0006342922899884925, "loss": 0.3524, "step": 237360 }, { "epoch": 68.28826237054085, "grad_norm": 1.6914963722229004, "learning_rate": 0.0006342347525891829, "loss": 0.4068, "step": 237370 }, { "epoch": 68.29113924050633, "grad_norm": 1.1606388092041016, "learning_rate": 0.0006341772151898735, "loss": 0.4654, "step": 237380 }, { "epoch": 68.2940161104718, "grad_norm": 0.7344480156898499, "learning_rate": 0.0006341196777905638, "loss": 0.3575, "step": 237390 }, { "epoch": 68.29689298043728, "grad_norm": 1.2157129049301147, "learning_rate": 0.0006340621403912543, "loss": 0.3951, "step": 237400 }, { "epoch": 68.29976985040277, "grad_norm": 1.4754674434661865, "learning_rate": 0.0006340046029919448, "loss": 0.5442, "step": 237410 }, { "epoch": 68.30264672036824, "grad_norm": 0.8676685094833374, "learning_rate": 0.0006339470655926352, "loss": 0.4, "step": 237420 }, { "epoch": 68.30552359033372, "grad_norm": 1.2362264394760132, "learning_rate": 0.0006338895281933257, "loss": 0.4742, "step": 237430 }, { "epoch": 68.3084004602992, "grad_norm": 1.4917305707931519, "learning_rate": 0.0006338319907940161, "loss": 0.4334, "step": 237440 }, { "epoch": 68.31127733026467, "grad_norm": 0.9725252985954285, "learning_rate": 0.0006337744533947065, "loss": 0.3971, "step": 237450 }, { "epoch": 68.31415420023015, "grad_norm": 1.4093835353851318, "learning_rate": 0.000633716915995397, "loss": 0.3605, "step": 237460 }, { "epoch": 68.31703107019563, "grad_norm": 1.219463586807251, "learning_rate": 0.0006336593785960875, "loss": 0.4383, "step": 237470 }, { "epoch": 68.3199079401611, "grad_norm": 0.7622554898262024, "learning_rate": 0.0006336018411967778, "loss": 0.3455, "step": 237480 }, { "epoch": 68.32278481012658, "grad_norm": 1.4098496437072754, "learning_rate": 0.0006335443037974684, "loss": 0.4693, "step": 237490 }, { "epoch": 68.32566168009205, "grad_norm": 1.410754919052124, "learning_rate": 0.0006334867663981589, "loss": 0.4205, "step": 237500 }, { "epoch": 68.32853855005754, "grad_norm": 1.2955023050308228, "learning_rate": 0.0006334292289988492, "loss": 0.3721, "step": 237510 }, { "epoch": 68.33141542002302, "grad_norm": 1.2674871683120728, "learning_rate": 0.0006333716915995397, "loss": 0.4329, "step": 237520 }, { "epoch": 68.3342922899885, "grad_norm": 0.9264430999755859, "learning_rate": 0.0006333141542002302, "loss": 0.4268, "step": 237530 }, { "epoch": 68.33716915995397, "grad_norm": 0.7279974818229675, "learning_rate": 0.0006332566168009206, "loss": 0.4121, "step": 237540 }, { "epoch": 68.34004602991945, "grad_norm": 0.9743542075157166, "learning_rate": 0.000633199079401611, "loss": 0.3982, "step": 237550 }, { "epoch": 68.34292289988493, "grad_norm": 2.067683219909668, "learning_rate": 0.0006331415420023016, "loss": 0.4549, "step": 237560 }, { "epoch": 68.3457997698504, "grad_norm": 0.7479209899902344, "learning_rate": 0.0006330840046029919, "loss": 0.3948, "step": 237570 }, { "epoch": 68.34867663981588, "grad_norm": 2.457698345184326, "learning_rate": 0.0006330264672036824, "loss": 0.5189, "step": 237580 }, { "epoch": 68.35155350978135, "grad_norm": 0.903008759021759, "learning_rate": 0.0006329689298043728, "loss": 0.3927, "step": 237590 }, { "epoch": 68.35443037974683, "grad_norm": 2.35945725440979, "learning_rate": 0.0006329113924050633, "loss": 0.3808, "step": 237600 }, { "epoch": 68.3573072497123, "grad_norm": 1.6662007570266724, "learning_rate": 0.0006328538550057538, "loss": 0.5088, "step": 237610 }, { "epoch": 68.3601841196778, "grad_norm": 1.5812870264053345, "learning_rate": 0.0006327963176064442, "loss": 0.4599, "step": 237620 }, { "epoch": 68.36306098964327, "grad_norm": 0.8171576857566833, "learning_rate": 0.0006327387802071346, "loss": 0.4298, "step": 237630 }, { "epoch": 68.36593785960875, "grad_norm": 0.8217524886131287, "learning_rate": 0.0006326812428078251, "loss": 0.3528, "step": 237640 }, { "epoch": 68.36881472957423, "grad_norm": 2.242605209350586, "learning_rate": 0.0006326237054085156, "loss": 0.3971, "step": 237650 }, { "epoch": 68.3716915995397, "grad_norm": 1.3790757656097412, "learning_rate": 0.0006325661680092059, "loss": 0.4565, "step": 237660 }, { "epoch": 68.37456846950518, "grad_norm": 1.633317470550537, "learning_rate": 0.0006325086306098965, "loss": 0.4595, "step": 237670 }, { "epoch": 68.37744533947065, "grad_norm": 1.4694772958755493, "learning_rate": 0.000632451093210587, "loss": 0.4216, "step": 237680 }, { "epoch": 68.38032220943613, "grad_norm": 0.7999711632728577, "learning_rate": 0.0006323935558112773, "loss": 0.3409, "step": 237690 }, { "epoch": 68.3831990794016, "grad_norm": 0.7345283627510071, "learning_rate": 0.0006323360184119677, "loss": 0.35, "step": 237700 }, { "epoch": 68.38607594936708, "grad_norm": 0.8882680535316467, "learning_rate": 0.0006322784810126583, "loss": 0.4932, "step": 237710 }, { "epoch": 68.38895281933257, "grad_norm": 1.3997694253921509, "learning_rate": 0.0006322209436133487, "loss": 0.4648, "step": 237720 }, { "epoch": 68.39182968929805, "grad_norm": 1.6907397508621216, "learning_rate": 0.0006321634062140391, "loss": 0.5072, "step": 237730 }, { "epoch": 68.39470655926353, "grad_norm": 0.9284707903862, "learning_rate": 0.0006321058688147296, "loss": 0.3952, "step": 237740 }, { "epoch": 68.397583429229, "grad_norm": 1.0325698852539062, "learning_rate": 0.00063204833141542, "loss": 0.3707, "step": 237750 }, { "epoch": 68.40046029919448, "grad_norm": 1.3645262718200684, "learning_rate": 0.0006319907940161105, "loss": 0.3779, "step": 237760 }, { "epoch": 68.40333716915995, "grad_norm": 0.992508053779602, "learning_rate": 0.0006319332566168009, "loss": 0.4516, "step": 237770 }, { "epoch": 68.40621403912543, "grad_norm": 1.124534249305725, "learning_rate": 0.0006318757192174914, "loss": 0.3698, "step": 237780 }, { "epoch": 68.4090909090909, "grad_norm": 1.096630334854126, "learning_rate": 0.0006318181818181818, "loss": 0.4004, "step": 237790 }, { "epoch": 68.41196777905638, "grad_norm": 1.7977886199951172, "learning_rate": 0.0006317606444188723, "loss": 0.4872, "step": 237800 }, { "epoch": 68.41484464902186, "grad_norm": 1.3106287717819214, "learning_rate": 0.0006317031070195627, "loss": 0.3531, "step": 237810 }, { "epoch": 68.41772151898734, "grad_norm": 2.0199029445648193, "learning_rate": 0.0006316455696202532, "loss": 0.3648, "step": 237820 }, { "epoch": 68.42059838895283, "grad_norm": 1.289559006690979, "learning_rate": 0.0006315880322209437, "loss": 0.4715, "step": 237830 }, { "epoch": 68.4234752589183, "grad_norm": 1.6370539665222168, "learning_rate": 0.000631530494821634, "loss": 0.3851, "step": 237840 }, { "epoch": 68.42635212888378, "grad_norm": 1.388284683227539, "learning_rate": 0.0006314729574223246, "loss": 0.4811, "step": 237850 }, { "epoch": 68.42922899884925, "grad_norm": 0.9176751971244812, "learning_rate": 0.000631415420023015, "loss": 0.4052, "step": 237860 }, { "epoch": 68.43210586881473, "grad_norm": 1.8279279470443726, "learning_rate": 0.0006313578826237054, "loss": 0.4857, "step": 237870 }, { "epoch": 68.4349827387802, "grad_norm": 0.8174393177032471, "learning_rate": 0.0006313003452243958, "loss": 0.4458, "step": 237880 }, { "epoch": 68.43785960874568, "grad_norm": 1.5970193147659302, "learning_rate": 0.0006312428078250864, "loss": 0.4677, "step": 237890 }, { "epoch": 68.44073647871116, "grad_norm": 1.686776876449585, "learning_rate": 0.0006311852704257768, "loss": 0.3211, "step": 237900 }, { "epoch": 68.44361334867664, "grad_norm": 1.4009029865264893, "learning_rate": 0.0006311277330264672, "loss": 0.4752, "step": 237910 }, { "epoch": 68.44649021864211, "grad_norm": 1.4800013303756714, "learning_rate": 0.0006310701956271577, "loss": 0.4024, "step": 237920 }, { "epoch": 68.4493670886076, "grad_norm": 1.300710916519165, "learning_rate": 0.0006310126582278481, "loss": 0.3952, "step": 237930 }, { "epoch": 68.45224395857308, "grad_norm": 0.9284132719039917, "learning_rate": 0.0006309551208285386, "loss": 0.3617, "step": 237940 }, { "epoch": 68.45512082853855, "grad_norm": 2.1824262142181396, "learning_rate": 0.000630897583429229, "loss": 0.3906, "step": 237950 }, { "epoch": 68.45799769850403, "grad_norm": 1.5319024324417114, "learning_rate": 0.0006308400460299195, "loss": 0.4553, "step": 237960 }, { "epoch": 68.4608745684695, "grad_norm": 1.648654818534851, "learning_rate": 0.0006307825086306099, "loss": 0.5094, "step": 237970 }, { "epoch": 68.46375143843498, "grad_norm": 1.1537864208221436, "learning_rate": 0.0006307249712313004, "loss": 0.4959, "step": 237980 }, { "epoch": 68.46662830840046, "grad_norm": 0.8900277614593506, "learning_rate": 0.0006306674338319907, "loss": 0.4233, "step": 237990 }, { "epoch": 68.46950517836594, "grad_norm": 1.4762333631515503, "learning_rate": 0.0006306098964326813, "loss": 0.483, "step": 238000 }, { "epoch": 68.47238204833141, "grad_norm": 0.9883732199668884, "learning_rate": 0.0006305523590333718, "loss": 0.4536, "step": 238010 }, { "epoch": 68.47525891829689, "grad_norm": 0.7794633507728577, "learning_rate": 0.0006304948216340621, "loss": 0.3276, "step": 238020 }, { "epoch": 68.47813578826236, "grad_norm": 1.375594973564148, "learning_rate": 0.0006304372842347526, "loss": 0.4194, "step": 238030 }, { "epoch": 68.48101265822785, "grad_norm": 1.1644537448883057, "learning_rate": 0.0006303797468354431, "loss": 0.4766, "step": 238040 }, { "epoch": 68.48388952819333, "grad_norm": 0.8383835554122925, "learning_rate": 0.0006303222094361335, "loss": 0.3684, "step": 238050 }, { "epoch": 68.4867663981588, "grad_norm": 1.6646373271942139, "learning_rate": 0.0006302646720368239, "loss": 0.3573, "step": 238060 }, { "epoch": 68.48964326812428, "grad_norm": 2.7754478454589844, "learning_rate": 0.0006302071346375145, "loss": 0.4339, "step": 238070 }, { "epoch": 68.49252013808976, "grad_norm": 2.278639793395996, "learning_rate": 0.0006301495972382048, "loss": 0.4992, "step": 238080 }, { "epoch": 68.49539700805524, "grad_norm": 1.2121250629425049, "learning_rate": 0.0006300920598388953, "loss": 0.3893, "step": 238090 }, { "epoch": 68.49827387802071, "grad_norm": 1.5680840015411377, "learning_rate": 0.0006300345224395858, "loss": 0.4524, "step": 238100 }, { "epoch": 68.50115074798619, "grad_norm": 1.3318425416946411, "learning_rate": 0.0006299769850402762, "loss": 0.4128, "step": 238110 }, { "epoch": 68.50402761795166, "grad_norm": 0.8953407406806946, "learning_rate": 0.0006299194476409667, "loss": 0.3922, "step": 238120 }, { "epoch": 68.50690448791714, "grad_norm": 0.731471061706543, "learning_rate": 0.0006298619102416571, "loss": 0.445, "step": 238130 }, { "epoch": 68.50978135788263, "grad_norm": 1.2918306589126587, "learning_rate": 0.0006298043728423475, "loss": 0.4706, "step": 238140 }, { "epoch": 68.5126582278481, "grad_norm": 1.2068824768066406, "learning_rate": 0.000629746835443038, "loss": 0.3982, "step": 238150 }, { "epoch": 68.51553509781358, "grad_norm": 0.713080883026123, "learning_rate": 0.0006296892980437284, "loss": 0.4411, "step": 238160 }, { "epoch": 68.51841196777906, "grad_norm": 1.5591832399368286, "learning_rate": 0.0006296317606444188, "loss": 0.4568, "step": 238170 }, { "epoch": 68.52128883774454, "grad_norm": 1.5811294317245483, "learning_rate": 0.0006295742232451094, "loss": 0.5933, "step": 238180 }, { "epoch": 68.52416570771001, "grad_norm": 1.6588177680969238, "learning_rate": 0.0006295166858457997, "loss": 0.4709, "step": 238190 }, { "epoch": 68.52704257767549, "grad_norm": 1.1793315410614014, "learning_rate": 0.0006294591484464902, "loss": 0.3973, "step": 238200 }, { "epoch": 68.52991944764096, "grad_norm": 1.4621599912643433, "learning_rate": 0.0006294016110471807, "loss": 0.4369, "step": 238210 }, { "epoch": 68.53279631760644, "grad_norm": 1.6512911319732666, "learning_rate": 0.0006293440736478711, "loss": 0.4572, "step": 238220 }, { "epoch": 68.53567318757192, "grad_norm": 2.09102725982666, "learning_rate": 0.0006292865362485616, "loss": 0.4213, "step": 238230 }, { "epoch": 68.53855005753739, "grad_norm": 2.483208179473877, "learning_rate": 0.000629228998849252, "loss": 0.462, "step": 238240 }, { "epoch": 68.54142692750288, "grad_norm": 0.7412474751472473, "learning_rate": 0.0006291714614499425, "loss": 0.4488, "step": 238250 }, { "epoch": 68.54430379746836, "grad_norm": 0.9243711829185486, "learning_rate": 0.0006291139240506329, "loss": 0.3554, "step": 238260 }, { "epoch": 68.54718066743384, "grad_norm": 1.1425443887710571, "learning_rate": 0.0006290563866513234, "loss": 0.3612, "step": 238270 }, { "epoch": 68.55005753739931, "grad_norm": 0.8444837331771851, "learning_rate": 0.0006289988492520137, "loss": 0.4493, "step": 238280 }, { "epoch": 68.55293440736479, "grad_norm": 1.700514316558838, "learning_rate": 0.0006289413118527043, "loss": 0.4301, "step": 238290 }, { "epoch": 68.55581127733026, "grad_norm": 0.9389243721961975, "learning_rate": 0.0006288837744533948, "loss": 0.517, "step": 238300 }, { "epoch": 68.55868814729574, "grad_norm": 0.9357514381408691, "learning_rate": 0.0006288262370540851, "loss": 0.3966, "step": 238310 }, { "epoch": 68.56156501726122, "grad_norm": 1.5786386728286743, "learning_rate": 0.0006287686996547756, "loss": 0.5696, "step": 238320 }, { "epoch": 68.56444188722669, "grad_norm": 0.8029116988182068, "learning_rate": 0.0006287111622554661, "loss": 0.3787, "step": 238330 }, { "epoch": 68.56731875719217, "grad_norm": 1.1531178951263428, "learning_rate": 0.0006286536248561565, "loss": 0.4376, "step": 238340 }, { "epoch": 68.57019562715766, "grad_norm": 1.5581021308898926, "learning_rate": 0.0006285960874568469, "loss": 0.4154, "step": 238350 }, { "epoch": 68.57307249712314, "grad_norm": 2.028695821762085, "learning_rate": 0.0006285385500575375, "loss": 0.4557, "step": 238360 }, { "epoch": 68.57594936708861, "grad_norm": 1.3982672691345215, "learning_rate": 0.0006284810126582278, "loss": 0.3226, "step": 238370 }, { "epoch": 68.57882623705409, "grad_norm": 1.1502618789672852, "learning_rate": 0.0006284234752589183, "loss": 0.4947, "step": 238380 }, { "epoch": 68.58170310701956, "grad_norm": 1.3812565803527832, "learning_rate": 0.0006283659378596087, "loss": 0.4743, "step": 238390 }, { "epoch": 68.58457997698504, "grad_norm": 2.67964506149292, "learning_rate": 0.0006283084004602992, "loss": 0.4345, "step": 238400 }, { "epoch": 68.58745684695052, "grad_norm": 1.2323522567749023, "learning_rate": 0.0006282508630609897, "loss": 0.3881, "step": 238410 }, { "epoch": 68.59033371691599, "grad_norm": 1.0016628503799438, "learning_rate": 0.0006281933256616801, "loss": 0.3921, "step": 238420 }, { "epoch": 68.59321058688147, "grad_norm": 1.6444824934005737, "learning_rate": 0.0006281357882623705, "loss": 0.4256, "step": 238430 }, { "epoch": 68.59608745684694, "grad_norm": 0.9100323915481567, "learning_rate": 0.000628078250863061, "loss": 0.3866, "step": 238440 }, { "epoch": 68.59896432681242, "grad_norm": 0.9357041716575623, "learning_rate": 0.0006280207134637515, "loss": 0.4941, "step": 238450 }, { "epoch": 68.60184119677791, "grad_norm": 1.0853755474090576, "learning_rate": 0.0006279631760644418, "loss": 0.379, "step": 238460 }, { "epoch": 68.60471806674339, "grad_norm": 1.156546950340271, "learning_rate": 0.0006279056386651324, "loss": 0.3434, "step": 238470 }, { "epoch": 68.60759493670886, "grad_norm": 0.9655113816261292, "learning_rate": 0.0006278481012658228, "loss": 0.4013, "step": 238480 }, { "epoch": 68.61047180667434, "grad_norm": 1.6959648132324219, "learning_rate": 0.0006277905638665132, "loss": 0.45, "step": 238490 }, { "epoch": 68.61334867663982, "grad_norm": 1.028794527053833, "learning_rate": 0.0006277330264672036, "loss": 0.477, "step": 238500 }, { "epoch": 68.61622554660529, "grad_norm": 0.6256785988807678, "learning_rate": 0.0006276754890678942, "loss": 0.5767, "step": 238510 }, { "epoch": 68.61910241657077, "grad_norm": 0.6897086501121521, "learning_rate": 0.0006276179516685846, "loss": 0.4844, "step": 238520 }, { "epoch": 68.62197928653625, "grad_norm": 0.8682060837745667, "learning_rate": 0.000627560414269275, "loss": 0.3389, "step": 238530 }, { "epoch": 68.62485615650172, "grad_norm": 1.466736078262329, "learning_rate": 0.0006275028768699656, "loss": 0.3666, "step": 238540 }, { "epoch": 68.6277330264672, "grad_norm": 0.6897740960121155, "learning_rate": 0.0006274453394706559, "loss": 0.3976, "step": 238550 }, { "epoch": 68.63060989643269, "grad_norm": 0.9418630599975586, "learning_rate": 0.0006273878020713464, "loss": 0.4365, "step": 238560 }, { "epoch": 68.63348676639816, "grad_norm": 1.7089378833770752, "learning_rate": 0.0006273302646720368, "loss": 0.4572, "step": 238570 }, { "epoch": 68.63636363636364, "grad_norm": 0.7672497630119324, "learning_rate": 0.0006272727272727273, "loss": 0.3212, "step": 238580 }, { "epoch": 68.63924050632912, "grad_norm": 1.5945173501968384, "learning_rate": 0.0006272151898734177, "loss": 0.4063, "step": 238590 }, { "epoch": 68.64211737629459, "grad_norm": 1.4793081283569336, "learning_rate": 0.0006271576524741082, "loss": 0.4595, "step": 238600 }, { "epoch": 68.64499424626007, "grad_norm": 1.2750247716903687, "learning_rate": 0.0006271001150747985, "loss": 0.4769, "step": 238610 }, { "epoch": 68.64787111622555, "grad_norm": 2.150343894958496, "learning_rate": 0.0006270425776754891, "loss": 0.4167, "step": 238620 }, { "epoch": 68.65074798619102, "grad_norm": 1.1453174352645874, "learning_rate": 0.0006269850402761796, "loss": 0.564, "step": 238630 }, { "epoch": 68.6536248561565, "grad_norm": 1.1486396789550781, "learning_rate": 0.0006269275028768699, "loss": 0.3816, "step": 238640 }, { "epoch": 68.65650172612197, "grad_norm": 1.0607738494873047, "learning_rate": 0.0006268699654775605, "loss": 0.4404, "step": 238650 }, { "epoch": 68.65937859608745, "grad_norm": 1.0791242122650146, "learning_rate": 0.0006268124280782509, "loss": 0.4387, "step": 238660 }, { "epoch": 68.66225546605294, "grad_norm": 0.7549475431442261, "learning_rate": 0.0006267548906789413, "loss": 0.3762, "step": 238670 }, { "epoch": 68.66513233601842, "grad_norm": 1.23214590549469, "learning_rate": 0.0006266973532796317, "loss": 0.4809, "step": 238680 }, { "epoch": 68.66800920598389, "grad_norm": 1.2095468044281006, "learning_rate": 0.0006266398158803223, "loss": 0.4447, "step": 238690 }, { "epoch": 68.67088607594937, "grad_norm": 1.6873204708099365, "learning_rate": 0.0006265822784810126, "loss": 0.4098, "step": 238700 }, { "epoch": 68.67376294591485, "grad_norm": 3.5345842838287354, "learning_rate": 0.0006265247410817031, "loss": 0.601, "step": 238710 }, { "epoch": 68.67663981588032, "grad_norm": 2.2457361221313477, "learning_rate": 0.0006264672036823936, "loss": 0.4376, "step": 238720 }, { "epoch": 68.6795166858458, "grad_norm": 0.9647027850151062, "learning_rate": 0.000626409666283084, "loss": 0.3716, "step": 238730 }, { "epoch": 68.68239355581127, "grad_norm": 1.037416696548462, "learning_rate": 0.0006263521288837745, "loss": 0.4013, "step": 238740 }, { "epoch": 68.68527042577675, "grad_norm": 1.4821807146072388, "learning_rate": 0.0006262945914844649, "loss": 0.3619, "step": 238750 }, { "epoch": 68.68814729574223, "grad_norm": 2.002484083175659, "learning_rate": 0.0006262370540851554, "loss": 0.5547, "step": 238760 }, { "epoch": 68.69102416570772, "grad_norm": 0.9138321280479431, "learning_rate": 0.0006261795166858458, "loss": 0.4693, "step": 238770 }, { "epoch": 68.69390103567319, "grad_norm": 1.5884323120117188, "learning_rate": 0.0006261219792865363, "loss": 0.3667, "step": 238780 }, { "epoch": 68.69677790563867, "grad_norm": 1.9158694744110107, "learning_rate": 0.0006260644418872266, "loss": 0.4256, "step": 238790 }, { "epoch": 68.69965477560415, "grad_norm": 2.302379846572876, "learning_rate": 0.0006260069044879172, "loss": 0.4714, "step": 238800 }, { "epoch": 68.70253164556962, "grad_norm": 1.2535313367843628, "learning_rate": 0.0006259493670886077, "loss": 0.4871, "step": 238810 }, { "epoch": 68.7054085155351, "grad_norm": 0.9725884199142456, "learning_rate": 0.000625891829689298, "loss": 0.3674, "step": 238820 }, { "epoch": 68.70828538550057, "grad_norm": 1.2009575366973877, "learning_rate": 0.0006258342922899886, "loss": 0.4892, "step": 238830 }, { "epoch": 68.71116225546605, "grad_norm": 1.208552598953247, "learning_rate": 0.000625776754890679, "loss": 0.4917, "step": 238840 }, { "epoch": 68.71403912543153, "grad_norm": 1.8064789772033691, "learning_rate": 0.0006257192174913694, "loss": 0.4334, "step": 238850 }, { "epoch": 68.716915995397, "grad_norm": 0.9547500610351562, "learning_rate": 0.0006256616800920598, "loss": 0.4332, "step": 238860 }, { "epoch": 68.71979286536248, "grad_norm": 0.8712066411972046, "learning_rate": 0.0006256041426927504, "loss": 0.4007, "step": 238870 }, { "epoch": 68.72266973532797, "grad_norm": 0.8494831323623657, "learning_rate": 0.0006255466052934407, "loss": 0.3934, "step": 238880 }, { "epoch": 68.72554660529345, "grad_norm": 0.6134182214736938, "learning_rate": 0.0006254890678941312, "loss": 0.4501, "step": 238890 }, { "epoch": 68.72842347525892, "grad_norm": 1.3174599409103394, "learning_rate": 0.0006254315304948217, "loss": 0.3847, "step": 238900 }, { "epoch": 68.7313003452244, "grad_norm": 1.0911250114440918, "learning_rate": 0.0006253739930955121, "loss": 0.3647, "step": 238910 }, { "epoch": 68.73417721518987, "grad_norm": 0.6120995879173279, "learning_rate": 0.0006253164556962026, "loss": 0.4816, "step": 238920 }, { "epoch": 68.73705408515535, "grad_norm": 1.4378492832183838, "learning_rate": 0.000625258918296893, "loss": 0.3751, "step": 238930 }, { "epoch": 68.73993095512083, "grad_norm": 1.4033173322677612, "learning_rate": 0.0006252013808975835, "loss": 0.444, "step": 238940 }, { "epoch": 68.7428078250863, "grad_norm": 0.7559819221496582, "learning_rate": 0.0006251438434982739, "loss": 0.3722, "step": 238950 }, { "epoch": 68.74568469505178, "grad_norm": 1.4349839687347412, "learning_rate": 0.0006250863060989644, "loss": 0.3953, "step": 238960 }, { "epoch": 68.74856156501725, "grad_norm": 1.2975051403045654, "learning_rate": 0.0006250287686996547, "loss": 0.4345, "step": 238970 }, { "epoch": 68.75143843498275, "grad_norm": 1.6931873559951782, "learning_rate": 0.0006249712313003453, "loss": 0.4975, "step": 238980 }, { "epoch": 68.75431530494822, "grad_norm": 1.5128674507141113, "learning_rate": 0.0006249136939010356, "loss": 0.4403, "step": 238990 }, { "epoch": 68.7571921749137, "grad_norm": 0.9933321475982666, "learning_rate": 0.0006248561565017261, "loss": 0.3845, "step": 239000 }, { "epoch": 68.76006904487917, "grad_norm": 1.3392682075500488, "learning_rate": 0.0006247986191024166, "loss": 0.4961, "step": 239010 }, { "epoch": 68.76294591484465, "grad_norm": 1.181686520576477, "learning_rate": 0.000624741081703107, "loss": 0.3695, "step": 239020 }, { "epoch": 68.76582278481013, "grad_norm": 0.6272326707839966, "learning_rate": 0.0006246835443037975, "loss": 0.4248, "step": 239030 }, { "epoch": 68.7686996547756, "grad_norm": 2.209223747253418, "learning_rate": 0.0006246260069044879, "loss": 0.4503, "step": 239040 }, { "epoch": 68.77157652474108, "grad_norm": 0.8089357614517212, "learning_rate": 0.0006245684695051784, "loss": 0.3556, "step": 239050 }, { "epoch": 68.77445339470655, "grad_norm": 1.1121926307678223, "learning_rate": 0.0006245109321058688, "loss": 0.4986, "step": 239060 }, { "epoch": 68.77733026467203, "grad_norm": 1.0641496181488037, "learning_rate": 0.0006244533947065593, "loss": 0.3909, "step": 239070 }, { "epoch": 68.78020713463752, "grad_norm": 0.9850345253944397, "learning_rate": 0.0006243958573072496, "loss": 0.4777, "step": 239080 }, { "epoch": 68.783084004603, "grad_norm": 1.449786901473999, "learning_rate": 0.0006243383199079402, "loss": 0.4881, "step": 239090 }, { "epoch": 68.78596087456847, "grad_norm": 1.0242536067962646, "learning_rate": 0.0006242807825086307, "loss": 0.4462, "step": 239100 }, { "epoch": 68.78883774453395, "grad_norm": 1.5180943012237549, "learning_rate": 0.000624223245109321, "loss": 0.5021, "step": 239110 }, { "epoch": 68.79171461449943, "grad_norm": 2.7774369716644287, "learning_rate": 0.0006241657077100115, "loss": 0.4977, "step": 239120 }, { "epoch": 68.7945914844649, "grad_norm": 1.2577872276306152, "learning_rate": 0.000624108170310702, "loss": 0.3289, "step": 239130 }, { "epoch": 68.79746835443038, "grad_norm": 1.0771657228469849, "learning_rate": 0.0006240506329113924, "loss": 0.5672, "step": 239140 }, { "epoch": 68.80034522439585, "grad_norm": 3.332063674926758, "learning_rate": 0.0006239930955120828, "loss": 0.5828, "step": 239150 }, { "epoch": 68.80322209436133, "grad_norm": 0.8685708045959473, "learning_rate": 0.0006239355581127734, "loss": 0.7012, "step": 239160 }, { "epoch": 68.80609896432681, "grad_norm": 1.7682123184204102, "learning_rate": 0.0006238780207134637, "loss": 0.3947, "step": 239170 }, { "epoch": 68.80897583429228, "grad_norm": 1.9525563716888428, "learning_rate": 0.0006238204833141542, "loss": 0.3845, "step": 239180 }, { "epoch": 68.81185270425777, "grad_norm": 1.219763159751892, "learning_rate": 0.0006237629459148446, "loss": 0.4425, "step": 239190 }, { "epoch": 68.81472957422325, "grad_norm": 1.1610453128814697, "learning_rate": 0.0006237054085155351, "loss": 0.5624, "step": 239200 }, { "epoch": 68.81760644418873, "grad_norm": 0.9523453712463379, "learning_rate": 0.0006236478711162256, "loss": 0.445, "step": 239210 }, { "epoch": 68.8204833141542, "grad_norm": 1.7072747945785522, "learning_rate": 0.000623590333716916, "loss": 0.5324, "step": 239220 }, { "epoch": 68.82336018411968, "grad_norm": 1.3337618112564087, "learning_rate": 0.0006235327963176065, "loss": 0.4562, "step": 239230 }, { "epoch": 68.82623705408515, "grad_norm": 0.5634322166442871, "learning_rate": 0.0006234752589182969, "loss": 0.4461, "step": 239240 }, { "epoch": 68.82911392405063, "grad_norm": 1.0086562633514404, "learning_rate": 0.0006234177215189874, "loss": 0.4439, "step": 239250 }, { "epoch": 68.83199079401611, "grad_norm": 0.8991851806640625, "learning_rate": 0.0006233601841196777, "loss": 0.583, "step": 239260 }, { "epoch": 68.83486766398158, "grad_norm": 0.8456383943557739, "learning_rate": 0.0006233026467203683, "loss": 0.4885, "step": 239270 }, { "epoch": 68.83774453394706, "grad_norm": 0.768723726272583, "learning_rate": 0.0006232451093210587, "loss": 0.4384, "step": 239280 }, { "epoch": 68.84062140391255, "grad_norm": 1.0287705659866333, "learning_rate": 0.0006231875719217491, "loss": 0.3716, "step": 239290 }, { "epoch": 68.84349827387803, "grad_norm": 1.0314459800720215, "learning_rate": 0.0006231300345224395, "loss": 0.3593, "step": 239300 }, { "epoch": 68.8463751438435, "grad_norm": 1.1347490549087524, "learning_rate": 0.0006230724971231301, "loss": 0.4524, "step": 239310 }, { "epoch": 68.84925201380898, "grad_norm": 1.7906272411346436, "learning_rate": 0.0006230149597238205, "loss": 0.3639, "step": 239320 }, { "epoch": 68.85212888377445, "grad_norm": 1.8375862836837769, "learning_rate": 0.0006229574223245109, "loss": 0.534, "step": 239330 }, { "epoch": 68.85500575373993, "grad_norm": 1.0231963396072388, "learning_rate": 0.0006228998849252015, "loss": 0.3265, "step": 239340 }, { "epoch": 68.85788262370541, "grad_norm": 0.6110045313835144, "learning_rate": 0.0006228423475258918, "loss": 0.4061, "step": 239350 }, { "epoch": 68.86075949367088, "grad_norm": 1.435362458229065, "learning_rate": 0.0006227848101265823, "loss": 0.3775, "step": 239360 }, { "epoch": 68.86363636363636, "grad_norm": 1.0460840463638306, "learning_rate": 0.0006227272727272727, "loss": 0.4551, "step": 239370 }, { "epoch": 68.86651323360184, "grad_norm": 1.2411623001098633, "learning_rate": 0.0006226697353279632, "loss": 0.4507, "step": 239380 }, { "epoch": 68.86939010356731, "grad_norm": 1.0469411611557007, "learning_rate": 0.0006226121979286536, "loss": 0.4475, "step": 239390 }, { "epoch": 68.8722669735328, "grad_norm": 0.7872742414474487, "learning_rate": 0.0006225546605293441, "loss": 0.4353, "step": 239400 }, { "epoch": 68.87514384349828, "grad_norm": 1.1198375225067139, "learning_rate": 0.0006224971231300344, "loss": 0.4736, "step": 239410 }, { "epoch": 68.87802071346375, "grad_norm": 0.9828166961669922, "learning_rate": 0.000622439585730725, "loss": 0.4328, "step": 239420 }, { "epoch": 68.88089758342923, "grad_norm": 0.8493804931640625, "learning_rate": 0.0006223820483314155, "loss": 0.3179, "step": 239430 }, { "epoch": 68.88377445339471, "grad_norm": 1.240407109260559, "learning_rate": 0.0006223245109321058, "loss": 0.4383, "step": 239440 }, { "epoch": 68.88665132336018, "grad_norm": 1.669054388999939, "learning_rate": 0.0006222669735327964, "loss": 0.507, "step": 239450 }, { "epoch": 68.88952819332566, "grad_norm": 1.4194716215133667, "learning_rate": 0.0006222094361334868, "loss": 0.4407, "step": 239460 }, { "epoch": 68.89240506329114, "grad_norm": 0.9459728598594666, "learning_rate": 0.0006221518987341772, "loss": 0.4368, "step": 239470 }, { "epoch": 68.89528193325661, "grad_norm": 1.8963310718536377, "learning_rate": 0.0006220943613348676, "loss": 0.4332, "step": 239480 }, { "epoch": 68.89815880322209, "grad_norm": 1.390864610671997, "learning_rate": 0.0006220368239355582, "loss": 0.4509, "step": 239490 }, { "epoch": 68.90103567318758, "grad_norm": 1.1738595962524414, "learning_rate": 0.0006219792865362485, "loss": 0.3758, "step": 239500 }, { "epoch": 68.90391254315306, "grad_norm": 1.628387689590454, "learning_rate": 0.000621921749136939, "loss": 0.4656, "step": 239510 }, { "epoch": 68.90678941311853, "grad_norm": 2.6408884525299072, "learning_rate": 0.0006218642117376296, "loss": 0.3897, "step": 239520 }, { "epoch": 68.90966628308401, "grad_norm": 1.0071200132369995, "learning_rate": 0.0006218066743383199, "loss": 0.4308, "step": 239530 }, { "epoch": 68.91254315304948, "grad_norm": 2.1944334506988525, "learning_rate": 0.0006217491369390104, "loss": 0.4237, "step": 239540 }, { "epoch": 68.91542002301496, "grad_norm": 0.7649475336074829, "learning_rate": 0.0006216915995397008, "loss": 0.4979, "step": 239550 }, { "epoch": 68.91829689298044, "grad_norm": 1.8449583053588867, "learning_rate": 0.0006216340621403913, "loss": 0.5074, "step": 239560 }, { "epoch": 68.92117376294591, "grad_norm": 2.712031602859497, "learning_rate": 0.0006215765247410817, "loss": 0.4502, "step": 239570 }, { "epoch": 68.92405063291139, "grad_norm": 0.7852718830108643, "learning_rate": 0.0006215189873417722, "loss": 0.3903, "step": 239580 }, { "epoch": 68.92692750287686, "grad_norm": 2.312854290008545, "learning_rate": 0.0006214614499424625, "loss": 0.5303, "step": 239590 }, { "epoch": 68.92980437284234, "grad_norm": 1.5560648441314697, "learning_rate": 0.0006214039125431531, "loss": 0.4508, "step": 239600 }, { "epoch": 68.93268124280783, "grad_norm": 1.106554388999939, "learning_rate": 0.0006213463751438436, "loss": 0.3481, "step": 239610 }, { "epoch": 68.93555811277331, "grad_norm": 1.0130780935287476, "learning_rate": 0.0006212888377445339, "loss": 0.3975, "step": 239620 }, { "epoch": 68.93843498273878, "grad_norm": 1.8352230787277222, "learning_rate": 0.0006212313003452245, "loss": 0.4198, "step": 239630 }, { "epoch": 68.94131185270426, "grad_norm": 1.8478513956069946, "learning_rate": 0.0006211737629459149, "loss": 0.4422, "step": 239640 }, { "epoch": 68.94418872266974, "grad_norm": 1.1991890668869019, "learning_rate": 0.0006211162255466053, "loss": 0.4289, "step": 239650 }, { "epoch": 68.94706559263521, "grad_norm": 1.331253170967102, "learning_rate": 0.0006210586881472957, "loss": 0.4318, "step": 239660 }, { "epoch": 68.94994246260069, "grad_norm": 1.0337214469909668, "learning_rate": 0.0006210011507479863, "loss": 0.3299, "step": 239670 }, { "epoch": 68.95281933256616, "grad_norm": 0.859189510345459, "learning_rate": 0.0006209436133486766, "loss": 0.4451, "step": 239680 }, { "epoch": 68.95569620253164, "grad_norm": 1.7818161249160767, "learning_rate": 0.0006208860759493671, "loss": 0.4571, "step": 239690 }, { "epoch": 68.95857307249712, "grad_norm": 1.0192800760269165, "learning_rate": 0.0006208285385500575, "loss": 0.4701, "step": 239700 }, { "epoch": 68.96144994246261, "grad_norm": 0.9349372982978821, "learning_rate": 0.000620771001150748, "loss": 0.4039, "step": 239710 }, { "epoch": 68.96432681242808, "grad_norm": 0.8209090232849121, "learning_rate": 0.0006207134637514385, "loss": 0.3808, "step": 239720 }, { "epoch": 68.96720368239356, "grad_norm": 0.9872429370880127, "learning_rate": 0.0006206559263521289, "loss": 0.3796, "step": 239730 }, { "epoch": 68.97008055235904, "grad_norm": 1.8955049514770508, "learning_rate": 0.0006205983889528194, "loss": 0.4302, "step": 239740 }, { "epoch": 68.97295742232451, "grad_norm": 2.6648623943328857, "learning_rate": 0.0006205408515535098, "loss": 0.496, "step": 239750 }, { "epoch": 68.97583429228999, "grad_norm": 1.1520822048187256, "learning_rate": 0.0006204833141542003, "loss": 0.4453, "step": 239760 }, { "epoch": 68.97871116225546, "grad_norm": 2.576545476913452, "learning_rate": 0.0006204257767548906, "loss": 0.5444, "step": 239770 }, { "epoch": 68.98158803222094, "grad_norm": 1.6688792705535889, "learning_rate": 0.0006203682393555812, "loss": 0.4403, "step": 239780 }, { "epoch": 68.98446490218642, "grad_norm": 1.0847358703613281, "learning_rate": 0.0006203107019562716, "loss": 0.4533, "step": 239790 }, { "epoch": 68.9873417721519, "grad_norm": 0.7269628047943115, "learning_rate": 0.000620253164556962, "loss": 0.5172, "step": 239800 }, { "epoch": 68.99021864211737, "grad_norm": 0.7981418371200562, "learning_rate": 0.0006201956271576526, "loss": 0.4247, "step": 239810 }, { "epoch": 68.99309551208286, "grad_norm": 4.067726135253906, "learning_rate": 0.0006201380897583429, "loss": 0.3756, "step": 239820 }, { "epoch": 68.99597238204834, "grad_norm": 1.0707744359970093, "learning_rate": 0.0006200805523590334, "loss": 0.3903, "step": 239830 }, { "epoch": 68.99884925201381, "grad_norm": 2.1348366737365723, "learning_rate": 0.0006200230149597238, "loss": 0.5389, "step": 239840 }, { "epoch": 69.00172612197929, "grad_norm": 1.4424374103546143, "learning_rate": 0.0006199654775604143, "loss": 0.4323, "step": 239850 }, { "epoch": 69.00460299194476, "grad_norm": 1.6121184825897217, "learning_rate": 0.0006199079401611047, "loss": 0.3568, "step": 239860 }, { "epoch": 69.00747986191024, "grad_norm": 1.8615494966506958, "learning_rate": 0.0006198504027617952, "loss": 0.3709, "step": 239870 }, { "epoch": 69.01035673187572, "grad_norm": 0.6708104610443115, "learning_rate": 0.0006197928653624855, "loss": 0.4078, "step": 239880 }, { "epoch": 69.0132336018412, "grad_norm": 1.2968374490737915, "learning_rate": 0.0006197353279631761, "loss": 0.4455, "step": 239890 }, { "epoch": 69.01611047180667, "grad_norm": 1.2779664993286133, "learning_rate": 0.0006196777905638666, "loss": 0.4261, "step": 239900 }, { "epoch": 69.01898734177215, "grad_norm": 0.7886595726013184, "learning_rate": 0.0006196202531645569, "loss": 0.3889, "step": 239910 }, { "epoch": 69.02186421173764, "grad_norm": 0.7153143882751465, "learning_rate": 0.0006195627157652475, "loss": 0.3981, "step": 239920 }, { "epoch": 69.02474108170311, "grad_norm": 0.7573256492614746, "learning_rate": 0.0006195051783659379, "loss": 0.3789, "step": 239930 }, { "epoch": 69.02761795166859, "grad_norm": 1.6316417455673218, "learning_rate": 0.0006194476409666283, "loss": 0.4182, "step": 239940 }, { "epoch": 69.03049482163406, "grad_norm": 2.0177195072174072, "learning_rate": 0.0006193901035673187, "loss": 0.3809, "step": 239950 }, { "epoch": 69.03337169159954, "grad_norm": 2.799530029296875, "learning_rate": 0.0006193325661680093, "loss": 0.4869, "step": 239960 }, { "epoch": 69.03624856156502, "grad_norm": 1.0091966390609741, "learning_rate": 0.0006192750287686996, "loss": 0.424, "step": 239970 }, { "epoch": 69.0391254315305, "grad_norm": 1.2280542850494385, "learning_rate": 0.0006192174913693901, "loss": 0.3578, "step": 239980 }, { "epoch": 69.04200230149597, "grad_norm": 1.3431954383850098, "learning_rate": 0.0006191599539700805, "loss": 0.3959, "step": 239990 }, { "epoch": 69.04487917146145, "grad_norm": 1.4376106262207031, "learning_rate": 0.000619102416570771, "loss": 0.3865, "step": 240000 }, { "epoch": 69.04775604142692, "grad_norm": 1.3571830987930298, "learning_rate": 0.0006190448791714615, "loss": 0.5264, "step": 240010 }, { "epoch": 69.0506329113924, "grad_norm": 1.269922137260437, "learning_rate": 0.0006189873417721519, "loss": 0.3829, "step": 240020 }, { "epoch": 69.05350978135789, "grad_norm": 0.963341474533081, "learning_rate": 0.0006189298043728424, "loss": 0.338, "step": 240030 }, { "epoch": 69.05638665132336, "grad_norm": 0.8758367896080017, "learning_rate": 0.0006188722669735328, "loss": 0.4136, "step": 240040 }, { "epoch": 69.05926352128884, "grad_norm": 0.7484230399131775, "learning_rate": 0.0006188147295742233, "loss": 0.3294, "step": 240050 }, { "epoch": 69.06214039125432, "grad_norm": 1.5629128217697144, "learning_rate": 0.0006187571921749136, "loss": 0.4795, "step": 240060 }, { "epoch": 69.0650172612198, "grad_norm": 1.1803454160690308, "learning_rate": 0.0006186996547756042, "loss": 0.4642, "step": 240070 }, { "epoch": 69.06789413118527, "grad_norm": 1.2746509313583374, "learning_rate": 0.0006186421173762946, "loss": 0.4779, "step": 240080 }, { "epoch": 69.07077100115075, "grad_norm": 2.6010260581970215, "learning_rate": 0.000618584579976985, "loss": 0.4884, "step": 240090 }, { "epoch": 69.07364787111622, "grad_norm": 1.1401000022888184, "learning_rate": 0.0006185270425776754, "loss": 0.3337, "step": 240100 }, { "epoch": 69.0765247410817, "grad_norm": 1.0169434547424316, "learning_rate": 0.000618469505178366, "loss": 0.3837, "step": 240110 }, { "epoch": 69.07940161104717, "grad_norm": 2.15989089012146, "learning_rate": 0.0006184119677790564, "loss": 0.401, "step": 240120 }, { "epoch": 69.08227848101266, "grad_norm": 1.2313222885131836, "learning_rate": 0.0006183544303797468, "loss": 0.3734, "step": 240130 }, { "epoch": 69.08515535097814, "grad_norm": 1.5526213645935059, "learning_rate": 0.0006182968929804374, "loss": 0.4576, "step": 240140 }, { "epoch": 69.08803222094362, "grad_norm": 0.8519868850708008, "learning_rate": 0.0006182393555811277, "loss": 0.4393, "step": 240150 }, { "epoch": 69.0909090909091, "grad_norm": 1.0212055444717407, "learning_rate": 0.0006181818181818182, "loss": 0.4041, "step": 240160 }, { "epoch": 69.09378596087457, "grad_norm": 0.49737998843193054, "learning_rate": 0.0006181242807825086, "loss": 0.3395, "step": 240170 }, { "epoch": 69.09666283084005, "grad_norm": 2.8320088386535645, "learning_rate": 0.0006180667433831991, "loss": 0.4074, "step": 240180 }, { "epoch": 69.09953970080552, "grad_norm": 1.309725284576416, "learning_rate": 0.0006180092059838895, "loss": 0.392, "step": 240190 }, { "epoch": 69.102416570771, "grad_norm": 1.1584755182266235, "learning_rate": 0.00061795166858458, "loss": 0.3666, "step": 240200 }, { "epoch": 69.10529344073647, "grad_norm": 0.8909677267074585, "learning_rate": 0.0006178941311852705, "loss": 0.3692, "step": 240210 }, { "epoch": 69.10817031070195, "grad_norm": 1.5478509664535522, "learning_rate": 0.0006178365937859609, "loss": 0.4703, "step": 240220 }, { "epoch": 69.11104718066743, "grad_norm": 0.8240716457366943, "learning_rate": 0.0006177790563866514, "loss": 0.426, "step": 240230 }, { "epoch": 69.11392405063292, "grad_norm": 1.6141490936279297, "learning_rate": 0.0006177215189873417, "loss": 0.3863, "step": 240240 }, { "epoch": 69.1168009205984, "grad_norm": 1.9843907356262207, "learning_rate": 0.0006176639815880323, "loss": 0.3176, "step": 240250 }, { "epoch": 69.11967779056387, "grad_norm": 0.8717843890190125, "learning_rate": 0.0006176064441887227, "loss": 0.3598, "step": 240260 }, { "epoch": 69.12255466052935, "grad_norm": 1.8143134117126465, "learning_rate": 0.0006175489067894131, "loss": 0.4443, "step": 240270 }, { "epoch": 69.12543153049482, "grad_norm": 0.8491970896720886, "learning_rate": 0.0006174913693901035, "loss": 0.5186, "step": 240280 }, { "epoch": 69.1283084004603, "grad_norm": 0.8523895144462585, "learning_rate": 0.0006174338319907941, "loss": 0.5107, "step": 240290 }, { "epoch": 69.13118527042577, "grad_norm": 1.0553287267684937, "learning_rate": 0.0006173762945914844, "loss": 0.3397, "step": 240300 }, { "epoch": 69.13406214039125, "grad_norm": 1.1130194664001465, "learning_rate": 0.0006173187571921749, "loss": 0.4174, "step": 240310 }, { "epoch": 69.13693901035673, "grad_norm": 1.1192352771759033, "learning_rate": 0.0006172612197928655, "loss": 0.416, "step": 240320 }, { "epoch": 69.1398158803222, "grad_norm": 1.5744093656539917, "learning_rate": 0.0006172036823935558, "loss": 0.3743, "step": 240330 }, { "epoch": 69.1426927502877, "grad_norm": 1.8558621406555176, "learning_rate": 0.0006171461449942463, "loss": 0.4356, "step": 240340 }, { "epoch": 69.14556962025317, "grad_norm": 1.0905282497406006, "learning_rate": 0.0006170886075949367, "loss": 0.4384, "step": 240350 }, { "epoch": 69.14844649021865, "grad_norm": 1.584549903869629, "learning_rate": 0.0006170310701956272, "loss": 0.4686, "step": 240360 }, { "epoch": 69.15132336018412, "grad_norm": 0.8476703763008118, "learning_rate": 0.0006169735327963176, "loss": 0.2874, "step": 240370 }, { "epoch": 69.1542002301496, "grad_norm": 1.5162806510925293, "learning_rate": 0.0006169159953970081, "loss": 0.3758, "step": 240380 }, { "epoch": 69.15707710011507, "grad_norm": 1.236608624458313, "learning_rate": 0.0006168584579976984, "loss": 0.3595, "step": 240390 }, { "epoch": 69.15995397008055, "grad_norm": 1.4127275943756104, "learning_rate": 0.000616800920598389, "loss": 0.3694, "step": 240400 }, { "epoch": 69.16283084004603, "grad_norm": 1.6281147003173828, "learning_rate": 0.0006167433831990795, "loss": 0.4195, "step": 240410 }, { "epoch": 69.1657077100115, "grad_norm": 1.3785789012908936, "learning_rate": 0.0006166858457997698, "loss": 0.4419, "step": 240420 }, { "epoch": 69.16858457997698, "grad_norm": 1.4469456672668457, "learning_rate": 0.0006166283084004604, "loss": 0.3944, "step": 240430 }, { "epoch": 69.17146144994246, "grad_norm": 1.0068693161010742, "learning_rate": 0.0006165707710011508, "loss": 0.4255, "step": 240440 }, { "epoch": 69.17433831990795, "grad_norm": 1.3783193826675415, "learning_rate": 0.0006165132336018412, "loss": 0.3828, "step": 240450 }, { "epoch": 69.17721518987342, "grad_norm": 1.9093844890594482, "learning_rate": 0.0006164556962025316, "loss": 0.3603, "step": 240460 }, { "epoch": 69.1800920598389, "grad_norm": 0.808538556098938, "learning_rate": 0.0006163981588032222, "loss": 0.4692, "step": 240470 }, { "epoch": 69.18296892980437, "grad_norm": 1.2107970714569092, "learning_rate": 0.0006163406214039125, "loss": 0.5309, "step": 240480 }, { "epoch": 69.18584579976985, "grad_norm": 0.8982259035110474, "learning_rate": 0.000616283084004603, "loss": 0.4198, "step": 240490 }, { "epoch": 69.18872266973533, "grad_norm": 1.7968324422836304, "learning_rate": 0.0006162255466052936, "loss": 0.3564, "step": 240500 }, { "epoch": 69.1915995397008, "grad_norm": 1.6377849578857422, "learning_rate": 0.0006161680092059839, "loss": 0.4108, "step": 240510 }, { "epoch": 69.19447640966628, "grad_norm": 3.3146519660949707, "learning_rate": 0.0006161104718066744, "loss": 0.4619, "step": 240520 }, { "epoch": 69.19735327963176, "grad_norm": 1.1985759735107422, "learning_rate": 0.0006160529344073648, "loss": 0.3969, "step": 240530 }, { "epoch": 69.20023014959723, "grad_norm": 1.486682653427124, "learning_rate": 0.0006159953970080553, "loss": 0.4518, "step": 240540 }, { "epoch": 69.20310701956272, "grad_norm": 0.8331484198570251, "learning_rate": 0.0006159378596087457, "loss": 0.4198, "step": 240550 }, { "epoch": 69.2059838895282, "grad_norm": 0.651957631111145, "learning_rate": 0.0006158803222094362, "loss": 0.5007, "step": 240560 }, { "epoch": 69.20886075949367, "grad_norm": 1.1098008155822754, "learning_rate": 0.0006158227848101265, "loss": 0.4656, "step": 240570 }, { "epoch": 69.21173762945915, "grad_norm": 1.0306447744369507, "learning_rate": 0.0006157652474108171, "loss": 0.3808, "step": 240580 }, { "epoch": 69.21461449942463, "grad_norm": 1.0217022895812988, "learning_rate": 0.0006157077100115075, "loss": 0.4008, "step": 240590 }, { "epoch": 69.2174913693901, "grad_norm": 0.9205741286277771, "learning_rate": 0.0006156501726121979, "loss": 0.4233, "step": 240600 }, { "epoch": 69.22036823935558, "grad_norm": 1.1157066822052002, "learning_rate": 0.0006155926352128885, "loss": 0.397, "step": 240610 }, { "epoch": 69.22324510932106, "grad_norm": 1.2568867206573486, "learning_rate": 0.0006155350978135789, "loss": 0.5142, "step": 240620 }, { "epoch": 69.22612197928653, "grad_norm": 2.380155086517334, "learning_rate": 0.0006154775604142693, "loss": 0.4994, "step": 240630 }, { "epoch": 69.22899884925201, "grad_norm": 1.0777479410171509, "learning_rate": 0.0006154200230149597, "loss": 0.3763, "step": 240640 }, { "epoch": 69.23187571921748, "grad_norm": 1.3341840505599976, "learning_rate": 0.0006153624856156502, "loss": 0.3928, "step": 240650 }, { "epoch": 69.23475258918297, "grad_norm": 1.1598109006881714, "learning_rate": 0.0006153049482163406, "loss": 0.3526, "step": 240660 }, { "epoch": 69.23762945914845, "grad_norm": 0.8098065853118896, "learning_rate": 0.0006152474108170311, "loss": 0.457, "step": 240670 }, { "epoch": 69.24050632911393, "grad_norm": 1.2442907094955444, "learning_rate": 0.0006151898734177214, "loss": 0.5974, "step": 240680 }, { "epoch": 69.2433831990794, "grad_norm": 1.2039422988891602, "learning_rate": 0.000615132336018412, "loss": 0.496, "step": 240690 }, { "epoch": 69.24626006904488, "grad_norm": 0.5983997583389282, "learning_rate": 0.0006150747986191024, "loss": 0.4077, "step": 240700 }, { "epoch": 69.24913693901036, "grad_norm": 2.8396987915039062, "learning_rate": 0.0006150172612197928, "loss": 0.4391, "step": 240710 }, { "epoch": 69.25201380897583, "grad_norm": 1.2375943660736084, "learning_rate": 0.0006149597238204834, "loss": 0.4963, "step": 240720 }, { "epoch": 69.25489067894131, "grad_norm": 1.3455172777175903, "learning_rate": 0.0006149021864211738, "loss": 0.3472, "step": 240730 }, { "epoch": 69.25776754890678, "grad_norm": 0.8527609705924988, "learning_rate": 0.0006148446490218642, "loss": 0.4025, "step": 240740 }, { "epoch": 69.26064441887226, "grad_norm": 0.9320381283760071, "learning_rate": 0.0006147871116225546, "loss": 0.466, "step": 240750 }, { "epoch": 69.26352128883775, "grad_norm": 0.9574933648109436, "learning_rate": 0.0006147295742232452, "loss": 0.3518, "step": 240760 }, { "epoch": 69.26639815880323, "grad_norm": 1.4159395694732666, "learning_rate": 0.0006146720368239355, "loss": 0.4158, "step": 240770 }, { "epoch": 69.2692750287687, "grad_norm": 0.8935476541519165, "learning_rate": 0.000614614499424626, "loss": 0.4389, "step": 240780 }, { "epoch": 69.27215189873418, "grad_norm": 1.0822808742523193, "learning_rate": 0.0006145569620253165, "loss": 0.4601, "step": 240790 }, { "epoch": 69.27502876869966, "grad_norm": 1.2425569295883179, "learning_rate": 0.0006144994246260069, "loss": 0.5324, "step": 240800 }, { "epoch": 69.27790563866513, "grad_norm": 0.579133152961731, "learning_rate": 0.0006144418872266974, "loss": 0.3331, "step": 240810 }, { "epoch": 69.28078250863061, "grad_norm": 0.760696291923523, "learning_rate": 0.0006143843498273878, "loss": 0.4731, "step": 240820 }, { "epoch": 69.28365937859608, "grad_norm": 1.262089490890503, "learning_rate": 0.0006143268124280783, "loss": 0.3618, "step": 240830 }, { "epoch": 69.28653624856156, "grad_norm": 1.2881016731262207, "learning_rate": 0.0006142692750287687, "loss": 0.3865, "step": 240840 }, { "epoch": 69.28941311852704, "grad_norm": 1.57261061668396, "learning_rate": 0.0006142117376294592, "loss": 0.419, "step": 240850 }, { "epoch": 69.29228998849253, "grad_norm": 0.8729323744773865, "learning_rate": 0.0006141542002301495, "loss": 0.4625, "step": 240860 }, { "epoch": 69.295166858458, "grad_norm": 1.4675283432006836, "learning_rate": 0.0006140966628308401, "loss": 0.4591, "step": 240870 }, { "epoch": 69.29804372842348, "grad_norm": 1.0081264972686768, "learning_rate": 0.0006140391254315305, "loss": 0.3753, "step": 240880 }, { "epoch": 69.30092059838896, "grad_norm": 0.6992093920707703, "learning_rate": 0.0006139815880322209, "loss": 0.4907, "step": 240890 }, { "epoch": 69.30379746835443, "grad_norm": 0.9077238440513611, "learning_rate": 0.0006139240506329115, "loss": 0.3849, "step": 240900 }, { "epoch": 69.30667433831991, "grad_norm": 1.3370232582092285, "learning_rate": 0.0006138665132336019, "loss": 0.4702, "step": 240910 }, { "epoch": 69.30955120828538, "grad_norm": 1.0276373624801636, "learning_rate": 0.0006138089758342923, "loss": 0.5566, "step": 240920 }, { "epoch": 69.31242807825086, "grad_norm": 0.8779505491256714, "learning_rate": 0.0006137514384349827, "loss": 0.2926, "step": 240930 }, { "epoch": 69.31530494821634, "grad_norm": 0.8240659832954407, "learning_rate": 0.0006136939010356733, "loss": 0.4903, "step": 240940 }, { "epoch": 69.31818181818181, "grad_norm": 1.300502061843872, "learning_rate": 0.0006136363636363636, "loss": 0.4896, "step": 240950 }, { "epoch": 69.32105868814729, "grad_norm": 1.024034857749939, "learning_rate": 0.0006135788262370541, "loss": 0.3926, "step": 240960 }, { "epoch": 69.32393555811278, "grad_norm": 1.7775441408157349, "learning_rate": 0.0006135212888377445, "loss": 0.5239, "step": 240970 }, { "epoch": 69.32681242807826, "grad_norm": 0.9000353813171387, "learning_rate": 0.000613463751438435, "loss": 0.4845, "step": 240980 }, { "epoch": 69.32968929804373, "grad_norm": 1.923125982284546, "learning_rate": 0.0006134062140391254, "loss": 0.4548, "step": 240990 }, { "epoch": 69.33256616800921, "grad_norm": 1.271669864654541, "learning_rate": 0.0006133486766398159, "loss": 0.3856, "step": 241000 }, { "epoch": 69.33544303797468, "grad_norm": 1.758043646812439, "learning_rate": 0.0006132911392405064, "loss": 0.4189, "step": 241010 }, { "epoch": 69.33831990794016, "grad_norm": 1.8356826305389404, "learning_rate": 0.0006132336018411968, "loss": 0.437, "step": 241020 }, { "epoch": 69.34119677790564, "grad_norm": 1.916142463684082, "learning_rate": 0.0006131760644418873, "loss": 0.3456, "step": 241030 }, { "epoch": 69.34407364787111, "grad_norm": 1.131301760673523, "learning_rate": 0.0006131185270425776, "loss": 0.4132, "step": 241040 }, { "epoch": 69.34695051783659, "grad_norm": 0.9450415372848511, "learning_rate": 0.0006130609896432682, "loss": 0.4205, "step": 241050 }, { "epoch": 69.34982738780207, "grad_norm": 1.351780652999878, "learning_rate": 0.0006130034522439586, "loss": 0.3684, "step": 241060 }, { "epoch": 69.35270425776756, "grad_norm": 3.3938515186309814, "learning_rate": 0.000612945914844649, "loss": 0.3824, "step": 241070 }, { "epoch": 69.35558112773303, "grad_norm": 1.9345934391021729, "learning_rate": 0.0006128883774453394, "loss": 0.4549, "step": 241080 }, { "epoch": 69.35845799769851, "grad_norm": 0.9846606254577637, "learning_rate": 0.00061283084004603, "loss": 0.4883, "step": 241090 }, { "epoch": 69.36133486766398, "grad_norm": 1.5437016487121582, "learning_rate": 0.0006127733026467203, "loss": 0.368, "step": 241100 }, { "epoch": 69.36421173762946, "grad_norm": 0.8872537612915039, "learning_rate": 0.0006127157652474108, "loss": 0.4062, "step": 241110 }, { "epoch": 69.36708860759494, "grad_norm": 1.069949984550476, "learning_rate": 0.0006126582278481014, "loss": 0.4446, "step": 241120 }, { "epoch": 69.36996547756041, "grad_norm": 0.9841132164001465, "learning_rate": 0.0006126006904487917, "loss": 0.3642, "step": 241130 }, { "epoch": 69.37284234752589, "grad_norm": 1.9346057176589966, "learning_rate": 0.0006125431530494822, "loss": 0.5603, "step": 241140 }, { "epoch": 69.37571921749137, "grad_norm": 1.624352216720581, "learning_rate": 0.0006124856156501726, "loss": 0.4328, "step": 241150 }, { "epoch": 69.37859608745684, "grad_norm": 0.7296653985977173, "learning_rate": 0.0006124280782508631, "loss": 0.4859, "step": 241160 }, { "epoch": 69.38147295742232, "grad_norm": 2.158210039138794, "learning_rate": 0.0006123705408515535, "loss": 0.5059, "step": 241170 }, { "epoch": 69.38434982738781, "grad_norm": 1.0978434085845947, "learning_rate": 0.000612313003452244, "loss": 0.514, "step": 241180 }, { "epoch": 69.38722669735328, "grad_norm": 1.4870948791503906, "learning_rate": 0.0006122554660529344, "loss": 0.4049, "step": 241190 }, { "epoch": 69.39010356731876, "grad_norm": 0.8725548386573792, "learning_rate": 0.0006121979286536249, "loss": 0.4132, "step": 241200 }, { "epoch": 69.39298043728424, "grad_norm": 1.1598974466323853, "learning_rate": 0.0006121403912543154, "loss": 0.4011, "step": 241210 }, { "epoch": 69.39585730724971, "grad_norm": 0.8337810039520264, "learning_rate": 0.0006120828538550057, "loss": 0.3639, "step": 241220 }, { "epoch": 69.39873417721519, "grad_norm": 0.8315078020095825, "learning_rate": 0.0006120253164556963, "loss": 0.4139, "step": 241230 }, { "epoch": 69.40161104718067, "grad_norm": 0.9207577705383301, "learning_rate": 0.0006119677790563867, "loss": 0.3833, "step": 241240 }, { "epoch": 69.40448791714614, "grad_norm": 1.3356767892837524, "learning_rate": 0.0006119102416570771, "loss": 0.4003, "step": 241250 }, { "epoch": 69.40736478711162, "grad_norm": 0.6022999286651611, "learning_rate": 0.0006118527042577675, "loss": 0.3145, "step": 241260 }, { "epoch": 69.4102416570771, "grad_norm": 1.1916548013687134, "learning_rate": 0.0006117951668584581, "loss": 0.3502, "step": 241270 }, { "epoch": 69.41311852704258, "grad_norm": 0.9082919359207153, "learning_rate": 0.0006117376294591484, "loss": 0.3829, "step": 241280 }, { "epoch": 69.41599539700806, "grad_norm": 1.2363152503967285, "learning_rate": 0.0006116800920598389, "loss": 0.3332, "step": 241290 }, { "epoch": 69.41887226697354, "grad_norm": 1.6145620346069336, "learning_rate": 0.0006116225546605295, "loss": 0.4928, "step": 241300 }, { "epoch": 69.42174913693901, "grad_norm": 1.1811625957489014, "learning_rate": 0.0006115650172612198, "loss": 0.4393, "step": 241310 }, { "epoch": 69.42462600690449, "grad_norm": 0.779678463935852, "learning_rate": 0.0006115074798619103, "loss": 0.4339, "step": 241320 }, { "epoch": 69.42750287686997, "grad_norm": 0.8600189685821533, "learning_rate": 0.0006114499424626007, "loss": 0.4899, "step": 241330 }, { "epoch": 69.43037974683544, "grad_norm": 1.3114017248153687, "learning_rate": 0.0006113924050632912, "loss": 0.4, "step": 241340 }, { "epoch": 69.43325661680092, "grad_norm": 1.1338400840759277, "learning_rate": 0.0006113348676639816, "loss": 0.3368, "step": 241350 }, { "epoch": 69.4361334867664, "grad_norm": 1.4924918413162231, "learning_rate": 0.0006112773302646721, "loss": 0.4801, "step": 241360 }, { "epoch": 69.43901035673187, "grad_norm": 1.976300597190857, "learning_rate": 0.0006112197928653624, "loss": 0.4036, "step": 241370 }, { "epoch": 69.44188722669735, "grad_norm": 1.1463043689727783, "learning_rate": 0.000611162255466053, "loss": 0.3929, "step": 241380 }, { "epoch": 69.44476409666284, "grad_norm": 1.289814829826355, "learning_rate": 0.0006111047180667434, "loss": 0.3692, "step": 241390 }, { "epoch": 69.44764096662831, "grad_norm": 1.1213809251785278, "learning_rate": 0.0006110471806674338, "loss": 0.4053, "step": 241400 }, { "epoch": 69.45051783659379, "grad_norm": 1.0605605840682983, "learning_rate": 0.0006109896432681244, "loss": 0.4094, "step": 241410 }, { "epoch": 69.45339470655927, "grad_norm": 1.2532740831375122, "learning_rate": 0.0006109321058688148, "loss": 0.4192, "step": 241420 }, { "epoch": 69.45627157652474, "grad_norm": 2.4956023693084717, "learning_rate": 0.0006108745684695052, "loss": 0.4244, "step": 241430 }, { "epoch": 69.45914844649022, "grad_norm": 0.8344929814338684, "learning_rate": 0.0006108170310701956, "loss": 0.4052, "step": 241440 }, { "epoch": 69.4620253164557, "grad_norm": 1.0652159452438354, "learning_rate": 0.0006107594936708862, "loss": 0.4401, "step": 241450 }, { "epoch": 69.46490218642117, "grad_norm": 1.0087263584136963, "learning_rate": 0.0006107019562715765, "loss": 0.3869, "step": 241460 }, { "epoch": 69.46777905638665, "grad_norm": 2.27797532081604, "learning_rate": 0.000610644418872267, "loss": 0.3485, "step": 241470 }, { "epoch": 69.47065592635212, "grad_norm": 1.2182992696762085, "learning_rate": 0.0006105868814729574, "loss": 0.4444, "step": 241480 }, { "epoch": 69.47353279631761, "grad_norm": 1.737622618675232, "learning_rate": 0.0006105293440736479, "loss": 0.4183, "step": 241490 }, { "epoch": 69.47640966628309, "grad_norm": 1.505942940711975, "learning_rate": 0.0006104718066743383, "loss": 0.4453, "step": 241500 }, { "epoch": 69.47928653624857, "grad_norm": 0.8451363444328308, "learning_rate": 0.0006104142692750287, "loss": 0.4521, "step": 241510 }, { "epoch": 69.48216340621404, "grad_norm": 1.0682165622711182, "learning_rate": 0.0006103567318757193, "loss": 0.4604, "step": 241520 }, { "epoch": 69.48504027617952, "grad_norm": 1.5043151378631592, "learning_rate": 0.0006102991944764097, "loss": 0.4441, "step": 241530 }, { "epoch": 69.487917146145, "grad_norm": 0.7539488077163696, "learning_rate": 0.0006102416570771001, "loss": 0.4202, "step": 241540 }, { "epoch": 69.49079401611047, "grad_norm": 0.6992112994194031, "learning_rate": 0.0006101841196777905, "loss": 0.3443, "step": 241550 }, { "epoch": 69.49367088607595, "grad_norm": 0.8615751266479492, "learning_rate": 0.0006101265822784811, "loss": 0.4645, "step": 241560 }, { "epoch": 69.49654775604142, "grad_norm": 0.7128139734268188, "learning_rate": 0.0006100690448791714, "loss": 0.4509, "step": 241570 }, { "epoch": 69.4994246260069, "grad_norm": 0.9368787407875061, "learning_rate": 0.0006100115074798619, "loss": 0.4076, "step": 241580 }, { "epoch": 69.50230149597238, "grad_norm": 0.8212282061576843, "learning_rate": 0.0006099539700805524, "loss": 0.4197, "step": 241590 }, { "epoch": 69.50517836593787, "grad_norm": 1.2170950174331665, "learning_rate": 0.0006098964326812428, "loss": 0.4233, "step": 241600 }, { "epoch": 69.50805523590334, "grad_norm": 1.1362584829330444, "learning_rate": 0.0006098388952819332, "loss": 0.3438, "step": 241610 }, { "epoch": 69.51093210586882, "grad_norm": 1.2986711263656616, "learning_rate": 0.0006097813578826237, "loss": 0.3914, "step": 241620 }, { "epoch": 69.5138089758343, "grad_norm": 0.772774875164032, "learning_rate": 0.0006097238204833142, "loss": 0.4026, "step": 241630 }, { "epoch": 69.51668584579977, "grad_norm": 1.70906662940979, "learning_rate": 0.0006096662830840046, "loss": 0.4056, "step": 241640 }, { "epoch": 69.51956271576525, "grad_norm": 1.5960887670516968, "learning_rate": 0.0006096087456846951, "loss": 0.3489, "step": 241650 }, { "epoch": 69.52243958573072, "grad_norm": 1.005995512008667, "learning_rate": 0.0006095512082853854, "loss": 0.3978, "step": 241660 }, { "epoch": 69.5253164556962, "grad_norm": 1.5864827632904053, "learning_rate": 0.000609493670886076, "loss": 0.3703, "step": 241670 }, { "epoch": 69.52819332566168, "grad_norm": 1.083028793334961, "learning_rate": 0.0006094361334867664, "loss": 0.3503, "step": 241680 }, { "epoch": 69.53107019562715, "grad_norm": 1.3224668502807617, "learning_rate": 0.0006093785960874568, "loss": 0.3858, "step": 241690 }, { "epoch": 69.53394706559264, "grad_norm": 1.2024914026260376, "learning_rate": 0.0006093210586881473, "loss": 0.4292, "step": 241700 }, { "epoch": 69.53682393555812, "grad_norm": 0.902021050453186, "learning_rate": 0.0006092635212888378, "loss": 0.4912, "step": 241710 }, { "epoch": 69.5397008055236, "grad_norm": 2.4343600273132324, "learning_rate": 0.0006092059838895282, "loss": 0.426, "step": 241720 }, { "epoch": 69.54257767548907, "grad_norm": 1.6224347352981567, "learning_rate": 0.0006091484464902186, "loss": 0.4146, "step": 241730 }, { "epoch": 69.54545454545455, "grad_norm": 3.4675469398498535, "learning_rate": 0.0006090909090909092, "loss": 0.4978, "step": 241740 }, { "epoch": 69.54833141542002, "grad_norm": 0.8009780049324036, "learning_rate": 0.0006090333716915995, "loss": 0.3687, "step": 241750 }, { "epoch": 69.5512082853855, "grad_norm": 1.1345518827438354, "learning_rate": 0.00060897583429229, "loss": 0.454, "step": 241760 }, { "epoch": 69.55408515535098, "grad_norm": 0.7834551930427551, "learning_rate": 0.0006089182968929804, "loss": 0.3696, "step": 241770 }, { "epoch": 69.55696202531645, "grad_norm": 1.194643497467041, "learning_rate": 0.0006088607594936709, "loss": 0.4563, "step": 241780 }, { "epoch": 69.55983889528193, "grad_norm": 0.9580881595611572, "learning_rate": 0.0006088032220943613, "loss": 0.3918, "step": 241790 }, { "epoch": 69.5627157652474, "grad_norm": 1.13047194480896, "learning_rate": 0.0006087456846950518, "loss": 0.4276, "step": 241800 }, { "epoch": 69.5655926352129, "grad_norm": 0.8396874666213989, "learning_rate": 0.0006086881472957423, "loss": 0.4142, "step": 241810 }, { "epoch": 69.56846950517837, "grad_norm": 1.4813634157180786, "learning_rate": 0.0006086306098964327, "loss": 0.4284, "step": 241820 }, { "epoch": 69.57134637514385, "grad_norm": 1.4968011379241943, "learning_rate": 0.0006085730724971232, "loss": 0.4575, "step": 241830 }, { "epoch": 69.57422324510932, "grad_norm": 2.3342604637145996, "learning_rate": 0.0006085155350978135, "loss": 0.3941, "step": 241840 }, { "epoch": 69.5771001150748, "grad_norm": 1.4103105068206787, "learning_rate": 0.0006084579976985041, "loss": 0.2984, "step": 241850 }, { "epoch": 69.57997698504028, "grad_norm": 1.400635838508606, "learning_rate": 0.0006084004602991945, "loss": 0.4854, "step": 241860 }, { "epoch": 69.58285385500575, "grad_norm": 1.1314624547958374, "learning_rate": 0.0006083429228998849, "loss": 0.3789, "step": 241870 }, { "epoch": 69.58573072497123, "grad_norm": 1.2794666290283203, "learning_rate": 0.0006082853855005754, "loss": 0.4268, "step": 241880 }, { "epoch": 69.5886075949367, "grad_norm": 1.1554595232009888, "learning_rate": 0.0006082278481012659, "loss": 0.5047, "step": 241890 }, { "epoch": 69.59148446490218, "grad_norm": 1.5282546281814575, "learning_rate": 0.0006081703107019562, "loss": 0.4469, "step": 241900 }, { "epoch": 69.59436133486767, "grad_norm": 1.0668336153030396, "learning_rate": 0.0006081127733026467, "loss": 0.3947, "step": 241910 }, { "epoch": 69.59723820483315, "grad_norm": 1.5336233377456665, "learning_rate": 0.0006080552359033373, "loss": 0.3344, "step": 241920 }, { "epoch": 69.60011507479862, "grad_norm": 0.6486210823059082, "learning_rate": 0.0006079976985040276, "loss": 0.3746, "step": 241930 }, { "epoch": 69.6029919447641, "grad_norm": 0.8864206671714783, "learning_rate": 0.0006079401611047181, "loss": 0.4036, "step": 241940 }, { "epoch": 69.60586881472958, "grad_norm": 1.0531764030456543, "learning_rate": 0.0006078826237054085, "loss": 0.3942, "step": 241950 }, { "epoch": 69.60874568469505, "grad_norm": 1.2041035890579224, "learning_rate": 0.000607825086306099, "loss": 0.3672, "step": 241960 }, { "epoch": 69.61162255466053, "grad_norm": 1.5685614347457886, "learning_rate": 0.0006077675489067894, "loss": 0.4361, "step": 241970 }, { "epoch": 69.614499424626, "grad_norm": 1.7086288928985596, "learning_rate": 0.0006077100115074799, "loss": 0.5076, "step": 241980 }, { "epoch": 69.61737629459148, "grad_norm": 0.8362246751785278, "learning_rate": 0.0006076524741081703, "loss": 0.4245, "step": 241990 }, { "epoch": 69.62025316455696, "grad_norm": 0.661202609539032, "learning_rate": 0.0006075949367088608, "loss": 0.4327, "step": 242000 }, { "epoch": 69.62313003452243, "grad_norm": 1.5724221467971802, "learning_rate": 0.0006075373993095513, "loss": 0.4603, "step": 242010 }, { "epoch": 69.62600690448792, "grad_norm": 1.0692251920700073, "learning_rate": 0.0006074798619102416, "loss": 0.4395, "step": 242020 }, { "epoch": 69.6288837744534, "grad_norm": 1.6555604934692383, "learning_rate": 0.0006074223245109322, "loss": 0.4225, "step": 242030 }, { "epoch": 69.63176064441888, "grad_norm": 0.9686458110809326, "learning_rate": 0.0006073647871116226, "loss": 0.4302, "step": 242040 }, { "epoch": 69.63463751438435, "grad_norm": 1.0845476388931274, "learning_rate": 0.000607307249712313, "loss": 0.3862, "step": 242050 }, { "epoch": 69.63751438434983, "grad_norm": 1.016859769821167, "learning_rate": 0.0006072497123130034, "loss": 0.366, "step": 242060 }, { "epoch": 69.6403912543153, "grad_norm": 1.3704496622085571, "learning_rate": 0.000607192174913694, "loss": 0.4804, "step": 242070 }, { "epoch": 69.64326812428078, "grad_norm": 1.7353192567825317, "learning_rate": 0.0006071346375143843, "loss": 0.4908, "step": 242080 }, { "epoch": 69.64614499424626, "grad_norm": 0.8255289793014526, "learning_rate": 0.0006070771001150748, "loss": 0.4555, "step": 242090 }, { "epoch": 69.64902186421173, "grad_norm": 1.3594059944152832, "learning_rate": 0.0006070195627157654, "loss": 0.3794, "step": 242100 }, { "epoch": 69.65189873417721, "grad_norm": 1.1711643934249878, "learning_rate": 0.0006069620253164557, "loss": 0.3707, "step": 242110 }, { "epoch": 69.6547756041427, "grad_norm": 1.3275567293167114, "learning_rate": 0.0006069044879171462, "loss": 0.372, "step": 242120 }, { "epoch": 69.65765247410818, "grad_norm": 1.9090962409973145, "learning_rate": 0.0006068469505178366, "loss": 0.4407, "step": 242130 }, { "epoch": 69.66052934407365, "grad_norm": 0.8147175908088684, "learning_rate": 0.0006067894131185271, "loss": 0.4808, "step": 242140 }, { "epoch": 69.66340621403913, "grad_norm": 1.4410721063613892, "learning_rate": 0.0006067318757192175, "loss": 0.3995, "step": 242150 }, { "epoch": 69.6662830840046, "grad_norm": 1.8475708961486816, "learning_rate": 0.000606674338319908, "loss": 0.4292, "step": 242160 }, { "epoch": 69.66915995397008, "grad_norm": 1.117439866065979, "learning_rate": 0.0006066168009205984, "loss": 0.4363, "step": 242170 }, { "epoch": 69.67203682393556, "grad_norm": 2.527416944503784, "learning_rate": 0.0006065592635212889, "loss": 0.4954, "step": 242180 }, { "epoch": 69.67491369390103, "grad_norm": 0.5629322528839111, "learning_rate": 0.0006065017261219793, "loss": 0.4447, "step": 242190 }, { "epoch": 69.67779056386651, "grad_norm": 1.259424090385437, "learning_rate": 0.0006064441887226697, "loss": 0.4828, "step": 242200 }, { "epoch": 69.68066743383199, "grad_norm": 1.5156105756759644, "learning_rate": 0.0006063866513233603, "loss": 0.4828, "step": 242210 }, { "epoch": 69.68354430379746, "grad_norm": 0.5426051616668701, "learning_rate": 0.0006063291139240507, "loss": 0.3696, "step": 242220 }, { "epoch": 69.68642117376295, "grad_norm": 1.331478476524353, "learning_rate": 0.0006062715765247411, "loss": 0.4283, "step": 242230 }, { "epoch": 69.68929804372843, "grad_norm": 1.5054954290390015, "learning_rate": 0.0006062140391254315, "loss": 0.5602, "step": 242240 }, { "epoch": 69.6921749136939, "grad_norm": 1.017542839050293, "learning_rate": 0.0006061565017261221, "loss": 0.3905, "step": 242250 }, { "epoch": 69.69505178365938, "grad_norm": 1.7209513187408447, "learning_rate": 0.0006060989643268124, "loss": 0.5052, "step": 242260 }, { "epoch": 69.69792865362486, "grad_norm": 1.9519456624984741, "learning_rate": 0.0006060414269275029, "loss": 0.421, "step": 242270 }, { "epoch": 69.70080552359033, "grad_norm": 0.9413018822669983, "learning_rate": 0.0006059838895281934, "loss": 0.4158, "step": 242280 }, { "epoch": 69.70368239355581, "grad_norm": 0.9700536727905273, "learning_rate": 0.0006059263521288838, "loss": 0.422, "step": 242290 }, { "epoch": 69.70655926352129, "grad_norm": 1.003926396369934, "learning_rate": 0.0006058688147295742, "loss": 0.4207, "step": 242300 }, { "epoch": 69.70943613348676, "grad_norm": 1.0592916011810303, "learning_rate": 0.0006058112773302646, "loss": 0.3669, "step": 242310 }, { "epoch": 69.71231300345224, "grad_norm": 0.8258852362632751, "learning_rate": 0.0006057537399309552, "loss": 0.3173, "step": 242320 }, { "epoch": 69.71518987341773, "grad_norm": 0.7357206344604492, "learning_rate": 0.0006056962025316456, "loss": 0.3693, "step": 242330 }, { "epoch": 69.7180667433832, "grad_norm": 1.2798497676849365, "learning_rate": 0.000605638665132336, "loss": 0.4426, "step": 242340 }, { "epoch": 69.72094361334868, "grad_norm": 0.9489434957504272, "learning_rate": 0.0006055811277330264, "loss": 0.3473, "step": 242350 }, { "epoch": 69.72382048331416, "grad_norm": 1.7245831489562988, "learning_rate": 0.000605523590333717, "loss": 0.422, "step": 242360 }, { "epoch": 69.72669735327963, "grad_norm": 0.8937863707542419, "learning_rate": 0.0006054660529344073, "loss": 0.4251, "step": 242370 }, { "epoch": 69.72957422324511, "grad_norm": 0.952022910118103, "learning_rate": 0.0006054085155350978, "loss": 0.4884, "step": 242380 }, { "epoch": 69.73245109321059, "grad_norm": 2.4036636352539062, "learning_rate": 0.0006053509781357883, "loss": 0.5542, "step": 242390 }, { "epoch": 69.73532796317606, "grad_norm": 0.9174085259437561, "learning_rate": 0.0006052934407364787, "loss": 0.4173, "step": 242400 }, { "epoch": 69.73820483314154, "grad_norm": 1.0707107782363892, "learning_rate": 0.0006052359033371691, "loss": 0.3561, "step": 242410 }, { "epoch": 69.74108170310701, "grad_norm": 1.380601167678833, "learning_rate": 0.0006051783659378596, "loss": 0.4709, "step": 242420 }, { "epoch": 69.74395857307249, "grad_norm": 1.8506911993026733, "learning_rate": 0.0006051208285385501, "loss": 0.4237, "step": 242430 }, { "epoch": 69.74683544303798, "grad_norm": 1.8433345556259155, "learning_rate": 0.0006050632911392405, "loss": 0.4041, "step": 242440 }, { "epoch": 69.74971231300346, "grad_norm": 1.1945281028747559, "learning_rate": 0.000605005753739931, "loss": 0.3982, "step": 242450 }, { "epoch": 69.75258918296893, "grad_norm": 1.0272974967956543, "learning_rate": 0.0006049482163406214, "loss": 0.4416, "step": 242460 }, { "epoch": 69.75546605293441, "grad_norm": 1.9436310529708862, "learning_rate": 0.0006048906789413119, "loss": 0.4118, "step": 242470 }, { "epoch": 69.75834292289989, "grad_norm": 1.299291968345642, "learning_rate": 0.0006048331415420023, "loss": 0.4929, "step": 242480 }, { "epoch": 69.76121979286536, "grad_norm": 1.225235939025879, "learning_rate": 0.0006047756041426927, "loss": 0.4268, "step": 242490 }, { "epoch": 69.76409666283084, "grad_norm": 0.9943790435791016, "learning_rate": 0.0006047180667433832, "loss": 0.4022, "step": 242500 }, { "epoch": 69.76697353279631, "grad_norm": 1.5916087627410889, "learning_rate": 0.0006046605293440737, "loss": 0.4383, "step": 242510 }, { "epoch": 69.76985040276179, "grad_norm": 1.0908597707748413, "learning_rate": 0.000604602991944764, "loss": 0.3787, "step": 242520 }, { "epoch": 69.77272727272727, "grad_norm": 0.8086267113685608, "learning_rate": 0.0006045454545454545, "loss": 0.3612, "step": 242530 }, { "epoch": 69.77560414269276, "grad_norm": 1.1171483993530273, "learning_rate": 0.0006044879171461451, "loss": 0.4538, "step": 242540 }, { "epoch": 69.77848101265823, "grad_norm": 0.9353604912757874, "learning_rate": 0.0006044303797468354, "loss": 0.3556, "step": 242550 }, { "epoch": 69.78135788262371, "grad_norm": 1.7147223949432373, "learning_rate": 0.0006043728423475259, "loss": 0.5514, "step": 242560 }, { "epoch": 69.78423475258919, "grad_norm": 1.5984524488449097, "learning_rate": 0.0006043153049482164, "loss": 0.4392, "step": 242570 }, { "epoch": 69.78711162255466, "grad_norm": 0.7624048590660095, "learning_rate": 0.0006042577675489068, "loss": 0.4215, "step": 242580 }, { "epoch": 69.78998849252014, "grad_norm": 1.3057504892349243, "learning_rate": 0.0006042002301495972, "loss": 0.3558, "step": 242590 }, { "epoch": 69.79286536248561, "grad_norm": 1.0945848226547241, "learning_rate": 0.0006041426927502877, "loss": 0.4001, "step": 242600 }, { "epoch": 69.79574223245109, "grad_norm": 1.1378817558288574, "learning_rate": 0.0006040851553509781, "loss": 0.3999, "step": 242610 }, { "epoch": 69.79861910241657, "grad_norm": 1.4637973308563232, "learning_rate": 0.0006040276179516686, "loss": 0.3864, "step": 242620 }, { "epoch": 69.80149597238204, "grad_norm": 1.0385950803756714, "learning_rate": 0.0006039700805523591, "loss": 0.4483, "step": 242630 }, { "epoch": 69.80437284234753, "grad_norm": 0.9639930725097656, "learning_rate": 0.0006039125431530494, "loss": 0.4065, "step": 242640 }, { "epoch": 69.80724971231301, "grad_norm": 1.2581337690353394, "learning_rate": 0.00060385500575374, "loss": 0.501, "step": 242650 }, { "epoch": 69.81012658227849, "grad_norm": 1.2826600074768066, "learning_rate": 0.0006037974683544304, "loss": 0.4039, "step": 242660 }, { "epoch": 69.81300345224396, "grad_norm": 2.0253584384918213, "learning_rate": 0.0006037399309551208, "loss": 0.3775, "step": 242670 }, { "epoch": 69.81588032220944, "grad_norm": 0.7625230550765991, "learning_rate": 0.0006036823935558113, "loss": 0.4182, "step": 242680 }, { "epoch": 69.81875719217491, "grad_norm": 1.7485064268112183, "learning_rate": 0.0006036248561565018, "loss": 0.416, "step": 242690 }, { "epoch": 69.82163406214039, "grad_norm": 1.3548792600631714, "learning_rate": 0.0006035673187571921, "loss": 0.4256, "step": 242700 }, { "epoch": 69.82451093210587, "grad_norm": 1.7285202741622925, "learning_rate": 0.0006035097813578826, "loss": 0.4194, "step": 242710 }, { "epoch": 69.82738780207134, "grad_norm": 0.5459974408149719, "learning_rate": 0.0006034522439585732, "loss": 0.3607, "step": 242720 }, { "epoch": 69.83026467203682, "grad_norm": 1.600111722946167, "learning_rate": 0.0006033947065592635, "loss": 0.3939, "step": 242730 }, { "epoch": 69.8331415420023, "grad_norm": 1.6935521364212036, "learning_rate": 0.000603337169159954, "loss": 0.4351, "step": 242740 }, { "epoch": 69.83601841196779, "grad_norm": 1.1989301443099976, "learning_rate": 0.0006032796317606444, "loss": 0.3926, "step": 242750 }, { "epoch": 69.83889528193326, "grad_norm": 0.7651781439781189, "learning_rate": 0.0006032220943613349, "loss": 0.4357, "step": 242760 }, { "epoch": 69.84177215189874, "grad_norm": 1.064348578453064, "learning_rate": 0.0006031645569620253, "loss": 0.4202, "step": 242770 }, { "epoch": 69.84464902186421, "grad_norm": 1.4681683778762817, "learning_rate": 0.0006031070195627158, "loss": 0.435, "step": 242780 }, { "epoch": 69.84752589182969, "grad_norm": 0.976815402507782, "learning_rate": 0.0006030494821634062, "loss": 0.4264, "step": 242790 }, { "epoch": 69.85040276179517, "grad_norm": 1.1037018299102783, "learning_rate": 0.0006029919447640967, "loss": 0.3499, "step": 242800 }, { "epoch": 69.85327963176064, "grad_norm": 1.5537923574447632, "learning_rate": 0.0006029344073647872, "loss": 0.4454, "step": 242810 }, { "epoch": 69.85615650172612, "grad_norm": 1.3697950839996338, "learning_rate": 0.0006028768699654775, "loss": 0.449, "step": 242820 }, { "epoch": 69.8590333716916, "grad_norm": 1.791535496711731, "learning_rate": 0.0006028193325661681, "loss": 0.5013, "step": 242830 }, { "epoch": 69.86191024165707, "grad_norm": 1.1286760568618774, "learning_rate": 0.0006027617951668585, "loss": 0.4373, "step": 242840 }, { "epoch": 69.86478711162256, "grad_norm": 1.6817281246185303, "learning_rate": 0.0006027042577675489, "loss": 0.5639, "step": 242850 }, { "epoch": 69.86766398158804, "grad_norm": 1.0948172807693481, "learning_rate": 0.0006026467203682394, "loss": 0.3729, "step": 242860 }, { "epoch": 69.87054085155351, "grad_norm": 1.0665761232376099, "learning_rate": 0.0006025891829689299, "loss": 0.4489, "step": 242870 }, { "epoch": 69.87341772151899, "grad_norm": 0.5909807682037354, "learning_rate": 0.0006025316455696202, "loss": 0.496, "step": 242880 }, { "epoch": 69.87629459148447, "grad_norm": 0.7251059412956238, "learning_rate": 0.0006024741081703107, "loss": 0.3925, "step": 242890 }, { "epoch": 69.87917146144994, "grad_norm": 1.1383360624313354, "learning_rate": 0.0006024165707710012, "loss": 0.4896, "step": 242900 }, { "epoch": 69.88204833141542, "grad_norm": 2.525421142578125, "learning_rate": 0.0006023590333716916, "loss": 0.3519, "step": 242910 }, { "epoch": 69.8849252013809, "grad_norm": 1.906355381011963, "learning_rate": 0.000602301495972382, "loss": 0.4177, "step": 242920 }, { "epoch": 69.88780207134637, "grad_norm": 1.4476901292800903, "learning_rate": 0.0006022439585730725, "loss": 0.3739, "step": 242930 }, { "epoch": 69.89067894131185, "grad_norm": 1.0052827596664429, "learning_rate": 0.000602186421173763, "loss": 0.4111, "step": 242940 }, { "epoch": 69.89355581127732, "grad_norm": 0.7500172257423401, "learning_rate": 0.0006021288837744534, "loss": 0.4409, "step": 242950 }, { "epoch": 69.89643268124281, "grad_norm": 0.9517053961753845, "learning_rate": 0.0006020713463751439, "loss": 0.4012, "step": 242960 }, { "epoch": 69.89930955120829, "grad_norm": 1.2607113122940063, "learning_rate": 0.0006020138089758343, "loss": 0.4457, "step": 242970 }, { "epoch": 69.90218642117377, "grad_norm": 1.1181812286376953, "learning_rate": 0.0006019562715765248, "loss": 0.4401, "step": 242980 }, { "epoch": 69.90506329113924, "grad_norm": 1.8818764686584473, "learning_rate": 0.0006018987341772152, "loss": 0.412, "step": 242990 }, { "epoch": 69.90794016110472, "grad_norm": 0.9410784244537354, "learning_rate": 0.0006018411967779056, "loss": 0.3415, "step": 243000 }, { "epoch": 69.9108170310702, "grad_norm": 0.6794350743293762, "learning_rate": 0.0006017836593785962, "loss": 0.4108, "step": 243010 }, { "epoch": 69.91369390103567, "grad_norm": 1.1845088005065918, "learning_rate": 0.0006017261219792866, "loss": 0.4425, "step": 243020 }, { "epoch": 69.91657077100115, "grad_norm": 1.337396264076233, "learning_rate": 0.000601668584579977, "loss": 0.3733, "step": 243030 }, { "epoch": 69.91944764096662, "grad_norm": 0.7389459609985352, "learning_rate": 0.0006016110471806674, "loss": 0.434, "step": 243040 }, { "epoch": 69.9223245109321, "grad_norm": 1.6854610443115234, "learning_rate": 0.000601553509781358, "loss": 0.5077, "step": 243050 }, { "epoch": 69.92520138089759, "grad_norm": 2.089277505874634, "learning_rate": 0.0006014959723820483, "loss": 0.4183, "step": 243060 }, { "epoch": 69.92807825086307, "grad_norm": 1.3024871349334717, "learning_rate": 0.0006014384349827388, "loss": 0.4684, "step": 243070 }, { "epoch": 69.93095512082854, "grad_norm": 1.7298192977905273, "learning_rate": 0.0006013808975834293, "loss": 0.3645, "step": 243080 }, { "epoch": 69.93383199079402, "grad_norm": 0.6299886703491211, "learning_rate": 0.0006013233601841197, "loss": 0.4855, "step": 243090 }, { "epoch": 69.9367088607595, "grad_norm": 0.8672395348548889, "learning_rate": 0.0006012658227848101, "loss": 0.4575, "step": 243100 }, { "epoch": 69.93958573072497, "grad_norm": 1.3924826383590698, "learning_rate": 0.0006012082853855005, "loss": 0.4654, "step": 243110 }, { "epoch": 69.94246260069045, "grad_norm": 2.351684331893921, "learning_rate": 0.0006011507479861911, "loss": 0.5521, "step": 243120 }, { "epoch": 69.94533947065592, "grad_norm": 1.9410388469696045, "learning_rate": 0.0006010932105868815, "loss": 0.4777, "step": 243130 }, { "epoch": 69.9482163406214, "grad_norm": 1.0747931003570557, "learning_rate": 0.0006010356731875719, "loss": 0.4039, "step": 243140 }, { "epoch": 69.95109321058688, "grad_norm": 1.0995447635650635, "learning_rate": 0.0006009781357882624, "loss": 0.3689, "step": 243150 }, { "epoch": 69.95397008055235, "grad_norm": 1.2003949880599976, "learning_rate": 0.0006009205983889529, "loss": 0.3728, "step": 243160 }, { "epoch": 69.95684695051784, "grad_norm": 1.3809839487075806, "learning_rate": 0.0006008630609896432, "loss": 0.4095, "step": 243170 }, { "epoch": 69.95972382048332, "grad_norm": 1.4047642946243286, "learning_rate": 0.0006008055235903337, "loss": 0.402, "step": 243180 }, { "epoch": 69.9626006904488, "grad_norm": 1.301863193511963, "learning_rate": 0.0006007479861910242, "loss": 0.5049, "step": 243190 }, { "epoch": 69.96547756041427, "grad_norm": 0.9736343622207642, "learning_rate": 0.0006006904487917146, "loss": 0.3634, "step": 243200 }, { "epoch": 69.96835443037975, "grad_norm": 0.834162175655365, "learning_rate": 0.000600632911392405, "loss": 0.3384, "step": 243210 }, { "epoch": 69.97123130034522, "grad_norm": 1.0076184272766113, "learning_rate": 0.0006005753739930955, "loss": 0.4163, "step": 243220 }, { "epoch": 69.9741081703107, "grad_norm": 0.7426539659500122, "learning_rate": 0.000600517836593786, "loss": 0.5442, "step": 243230 }, { "epoch": 69.97698504027618, "grad_norm": 1.421746015548706, "learning_rate": 0.0006004602991944764, "loss": 0.4409, "step": 243240 }, { "epoch": 69.97986191024165, "grad_norm": 0.7786262035369873, "learning_rate": 0.0006004027617951669, "loss": 0.4545, "step": 243250 }, { "epoch": 69.98273878020713, "grad_norm": 0.8511612415313721, "learning_rate": 0.0006003452243958573, "loss": 0.5989, "step": 243260 }, { "epoch": 69.98561565017262, "grad_norm": 1.8119537830352783, "learning_rate": 0.0006002876869965478, "loss": 0.4375, "step": 243270 }, { "epoch": 69.9884925201381, "grad_norm": 1.2745884656906128, "learning_rate": 0.0006002301495972382, "loss": 0.4901, "step": 243280 }, { "epoch": 69.99136939010357, "grad_norm": 1.5993696451187134, "learning_rate": 0.0006001726121979286, "loss": 0.3994, "step": 243290 }, { "epoch": 69.99424626006905, "grad_norm": 1.4598987102508545, "learning_rate": 0.0006001150747986191, "loss": 0.3423, "step": 243300 }, { "epoch": 69.99712313003452, "grad_norm": 2.3775501251220703, "learning_rate": 0.0006000575373993096, "loss": 0.4552, "step": 243310 }, { "epoch": 70.0, "grad_norm": 0.8753863573074341, "learning_rate": 0.0006, "loss": 0.3622, "step": 243320 }, { "epoch": 70.00287686996548, "grad_norm": 1.0375182628631592, "learning_rate": 0.0005999424626006904, "loss": 0.445, "step": 243330 }, { "epoch": 70.00575373993095, "grad_norm": 1.6165183782577515, "learning_rate": 0.000599884925201381, "loss": 0.3629, "step": 243340 }, { "epoch": 70.00863060989643, "grad_norm": 1.4780759811401367, "learning_rate": 0.0005998273878020713, "loss": 0.4116, "step": 243350 }, { "epoch": 70.0115074798619, "grad_norm": 1.6452382802963257, "learning_rate": 0.0005997698504027618, "loss": 0.4404, "step": 243360 }, { "epoch": 70.01438434982738, "grad_norm": 1.4107471704483032, "learning_rate": 0.0005997123130034523, "loss": 0.4066, "step": 243370 }, { "epoch": 70.01726121979287, "grad_norm": 0.9266327023506165, "learning_rate": 0.0005996547756041427, "loss": 0.4926, "step": 243380 }, { "epoch": 70.02013808975835, "grad_norm": 0.8423048853874207, "learning_rate": 0.0005995972382048331, "loss": 0.3897, "step": 243390 }, { "epoch": 70.02301495972382, "grad_norm": 0.7385152578353882, "learning_rate": 0.0005995397008055236, "loss": 0.3207, "step": 243400 }, { "epoch": 70.0258918296893, "grad_norm": 1.1651679277420044, "learning_rate": 0.000599482163406214, "loss": 0.4332, "step": 243410 }, { "epoch": 70.02876869965478, "grad_norm": 1.1089458465576172, "learning_rate": 0.0005994246260069045, "loss": 0.3247, "step": 243420 }, { "epoch": 70.03164556962025, "grad_norm": 1.0949177742004395, "learning_rate": 0.000599367088607595, "loss": 0.3637, "step": 243430 }, { "epoch": 70.03452243958573, "grad_norm": 1.1790629625320435, "learning_rate": 0.0005993095512082854, "loss": 0.3239, "step": 243440 }, { "epoch": 70.0373993095512, "grad_norm": 0.9269433617591858, "learning_rate": 0.0005992520138089759, "loss": 0.3764, "step": 243450 }, { "epoch": 70.04027617951668, "grad_norm": 0.5681443810462952, "learning_rate": 0.0005991944764096663, "loss": 0.3083, "step": 243460 }, { "epoch": 70.04315304948216, "grad_norm": 1.1548386812210083, "learning_rate": 0.0005991369390103567, "loss": 0.3911, "step": 243470 }, { "epoch": 70.04602991944765, "grad_norm": 1.2657283544540405, "learning_rate": 0.0005990794016110472, "loss": 0.3531, "step": 243480 }, { "epoch": 70.04890678941312, "grad_norm": 1.6986286640167236, "learning_rate": 0.0005990218642117377, "loss": 0.4067, "step": 243490 }, { "epoch": 70.0517836593786, "grad_norm": 1.1074802875518799, "learning_rate": 0.000598964326812428, "loss": 0.4128, "step": 243500 }, { "epoch": 70.05466052934408, "grad_norm": 1.009544849395752, "learning_rate": 0.0005989067894131185, "loss": 0.4126, "step": 243510 }, { "epoch": 70.05753739930955, "grad_norm": 1.375812292098999, "learning_rate": 0.0005988492520138091, "loss": 0.3722, "step": 243520 }, { "epoch": 70.06041426927503, "grad_norm": 1.5742768049240112, "learning_rate": 0.0005987917146144994, "loss": 0.3847, "step": 243530 }, { "epoch": 70.0632911392405, "grad_norm": 0.863127589225769, "learning_rate": 0.0005987341772151899, "loss": 0.4082, "step": 243540 }, { "epoch": 70.06616800920598, "grad_norm": 1.3599216938018799, "learning_rate": 0.0005986766398158804, "loss": 0.3432, "step": 243550 }, { "epoch": 70.06904487917146, "grad_norm": 1.2860167026519775, "learning_rate": 0.0005986191024165708, "loss": 0.3195, "step": 243560 }, { "epoch": 70.07192174913693, "grad_norm": 0.7672280669212341, "learning_rate": 0.0005985615650172612, "loss": 0.3188, "step": 243570 }, { "epoch": 70.07479861910241, "grad_norm": 1.4055943489074707, "learning_rate": 0.0005985040276179517, "loss": 0.459, "step": 243580 }, { "epoch": 70.0776754890679, "grad_norm": 0.8523414134979248, "learning_rate": 0.0005984464902186421, "loss": 0.3762, "step": 243590 }, { "epoch": 70.08055235903338, "grad_norm": 2.2041432857513428, "learning_rate": 0.0005983889528193326, "loss": 0.3919, "step": 243600 }, { "epoch": 70.08342922899885, "grad_norm": 0.7565689086914062, "learning_rate": 0.000598331415420023, "loss": 0.4073, "step": 243610 }, { "epoch": 70.08630609896433, "grad_norm": 0.770102322101593, "learning_rate": 0.0005982738780207134, "loss": 0.3576, "step": 243620 }, { "epoch": 70.0891829689298, "grad_norm": 0.9406571388244629, "learning_rate": 0.000598216340621404, "loss": 0.3471, "step": 243630 }, { "epoch": 70.09205983889528, "grad_norm": 0.8847355842590332, "learning_rate": 0.0005981588032220944, "loss": 0.4147, "step": 243640 }, { "epoch": 70.09493670886076, "grad_norm": 1.475346326828003, "learning_rate": 0.0005981012658227848, "loss": 0.4262, "step": 243650 }, { "epoch": 70.09781357882623, "grad_norm": 0.7784372568130493, "learning_rate": 0.0005980437284234753, "loss": 0.4249, "step": 243660 }, { "epoch": 70.10069044879171, "grad_norm": 2.2458713054656982, "learning_rate": 0.0005979861910241658, "loss": 0.3494, "step": 243670 }, { "epoch": 70.10356731875719, "grad_norm": 2.320674180984497, "learning_rate": 0.0005979286536248561, "loss": 0.3862, "step": 243680 }, { "epoch": 70.10644418872268, "grad_norm": 2.916077136993408, "learning_rate": 0.0005978711162255466, "loss": 0.5188, "step": 243690 }, { "epoch": 70.10932105868815, "grad_norm": 1.0380467176437378, "learning_rate": 0.0005978135788262371, "loss": 0.3469, "step": 243700 }, { "epoch": 70.11219792865363, "grad_norm": 1.6898705959320068, "learning_rate": 0.0005977560414269275, "loss": 0.4948, "step": 243710 }, { "epoch": 70.1150747986191, "grad_norm": 1.5095664262771606, "learning_rate": 0.000597698504027618, "loss": 0.408, "step": 243720 }, { "epoch": 70.11795166858458, "grad_norm": 0.7612205147743225, "learning_rate": 0.0005976409666283084, "loss": 0.3906, "step": 243730 }, { "epoch": 70.12082853855006, "grad_norm": 1.4899730682373047, "learning_rate": 0.0005975834292289989, "loss": 0.5197, "step": 243740 }, { "epoch": 70.12370540851553, "grad_norm": 1.4181286096572876, "learning_rate": 0.0005975258918296893, "loss": 0.3878, "step": 243750 }, { "epoch": 70.12658227848101, "grad_norm": 1.3861268758773804, "learning_rate": 0.0005974683544303798, "loss": 0.3255, "step": 243760 }, { "epoch": 70.12945914844649, "grad_norm": 1.9563604593276978, "learning_rate": 0.0005974108170310702, "loss": 0.4386, "step": 243770 }, { "epoch": 70.13233601841196, "grad_norm": 1.2806003093719482, "learning_rate": 0.0005973532796317607, "loss": 0.42, "step": 243780 }, { "epoch": 70.13521288837744, "grad_norm": 1.73506498336792, "learning_rate": 0.0005972957422324511, "loss": 0.4004, "step": 243790 }, { "epoch": 70.13808975834293, "grad_norm": 0.9993563294410706, "learning_rate": 0.0005972382048331415, "loss": 0.3738, "step": 243800 }, { "epoch": 70.1409666283084, "grad_norm": 0.725836992263794, "learning_rate": 0.000597180667433832, "loss": 0.3955, "step": 243810 }, { "epoch": 70.14384349827388, "grad_norm": 1.6253478527069092, "learning_rate": 0.0005971231300345225, "loss": 0.3916, "step": 243820 }, { "epoch": 70.14672036823936, "grad_norm": 1.9600093364715576, "learning_rate": 0.0005970655926352129, "loss": 0.4511, "step": 243830 }, { "epoch": 70.14959723820483, "grad_norm": 1.2840758562088013, "learning_rate": 0.0005970080552359034, "loss": 0.3638, "step": 243840 }, { "epoch": 70.15247410817031, "grad_norm": 1.3550598621368408, "learning_rate": 0.0005969505178365939, "loss": 0.4729, "step": 243850 }, { "epoch": 70.15535097813579, "grad_norm": 1.1997206211090088, "learning_rate": 0.0005968929804372842, "loss": 0.3791, "step": 243860 }, { "epoch": 70.15822784810126, "grad_norm": 1.7604535818099976, "learning_rate": 0.0005968354430379747, "loss": 0.401, "step": 243870 }, { "epoch": 70.16110471806674, "grad_norm": 1.4238228797912598, "learning_rate": 0.0005967779056386652, "loss": 0.3852, "step": 243880 }, { "epoch": 70.16398158803221, "grad_norm": 1.43833589553833, "learning_rate": 0.0005967203682393556, "loss": 0.3614, "step": 243890 }, { "epoch": 70.1668584579977, "grad_norm": 1.7098026275634766, "learning_rate": 0.000596662830840046, "loss": 0.4024, "step": 243900 }, { "epoch": 70.16973532796318, "grad_norm": 1.3646395206451416, "learning_rate": 0.0005966052934407364, "loss": 0.4959, "step": 243910 }, { "epoch": 70.17261219792866, "grad_norm": 0.8128734827041626, "learning_rate": 0.000596547756041427, "loss": 0.4093, "step": 243920 }, { "epoch": 70.17548906789413, "grad_norm": 1.1850019693374634, "learning_rate": 0.0005964902186421174, "loss": 0.3675, "step": 243930 }, { "epoch": 70.17836593785961, "grad_norm": 1.0932424068450928, "learning_rate": 0.0005964326812428078, "loss": 0.4198, "step": 243940 }, { "epoch": 70.18124280782509, "grad_norm": 1.543348789215088, "learning_rate": 0.0005963751438434983, "loss": 0.5688, "step": 243950 }, { "epoch": 70.18411967779056, "grad_norm": 0.9108403325080872, "learning_rate": 0.0005963176064441888, "loss": 0.4458, "step": 243960 }, { "epoch": 70.18699654775604, "grad_norm": 1.5252310037612915, "learning_rate": 0.0005962600690448791, "loss": 0.3424, "step": 243970 }, { "epoch": 70.18987341772151, "grad_norm": 2.017801523208618, "learning_rate": 0.0005962025316455696, "loss": 0.3681, "step": 243980 }, { "epoch": 70.19275028768699, "grad_norm": 1.1280933618545532, "learning_rate": 0.0005961449942462601, "loss": 0.4368, "step": 243990 }, { "epoch": 70.19562715765247, "grad_norm": 1.3313480615615845, "learning_rate": 0.0005960874568469505, "loss": 0.4063, "step": 244000 }, { "epoch": 70.19850402761796, "grad_norm": 1.0299948453903198, "learning_rate": 0.0005960299194476409, "loss": 0.3347, "step": 244010 }, { "epoch": 70.20138089758343, "grad_norm": 0.8869416117668152, "learning_rate": 0.0005959723820483314, "loss": 0.383, "step": 244020 }, { "epoch": 70.20425776754891, "grad_norm": 1.111208200454712, "learning_rate": 0.0005959148446490219, "loss": 0.409, "step": 244030 }, { "epoch": 70.20713463751439, "grad_norm": 1.0058690309524536, "learning_rate": 0.0005958573072497123, "loss": 0.3755, "step": 244040 }, { "epoch": 70.21001150747986, "grad_norm": 1.2862354516983032, "learning_rate": 0.0005957997698504028, "loss": 0.4468, "step": 244050 }, { "epoch": 70.21288837744534, "grad_norm": 1.5131882429122925, "learning_rate": 0.0005957422324510932, "loss": 0.4524, "step": 244060 }, { "epoch": 70.21576524741081, "grad_norm": 1.6638545989990234, "learning_rate": 0.0005956846950517837, "loss": 0.3698, "step": 244070 }, { "epoch": 70.21864211737629, "grad_norm": 0.9149468541145325, "learning_rate": 0.0005956271576524741, "loss": 0.4584, "step": 244080 }, { "epoch": 70.22151898734177, "grad_norm": 1.2180742025375366, "learning_rate": 0.0005955696202531645, "loss": 0.4058, "step": 244090 }, { "epoch": 70.22439585730724, "grad_norm": 0.7402949929237366, "learning_rate": 0.000595512082853855, "loss": 0.3783, "step": 244100 }, { "epoch": 70.22727272727273, "grad_norm": 1.461527705192566, "learning_rate": 0.0005954545454545455, "loss": 0.4387, "step": 244110 }, { "epoch": 70.23014959723821, "grad_norm": 1.5928977727890015, "learning_rate": 0.0005953970080552358, "loss": 0.4293, "step": 244120 }, { "epoch": 70.23302646720369, "grad_norm": 0.8912076950073242, "learning_rate": 0.0005953394706559264, "loss": 0.3758, "step": 244130 }, { "epoch": 70.23590333716916, "grad_norm": 1.0867658853530884, "learning_rate": 0.0005952819332566169, "loss": 0.3751, "step": 244140 }, { "epoch": 70.23878020713464, "grad_norm": 0.8377918004989624, "learning_rate": 0.0005952243958573072, "loss": 0.4115, "step": 244150 }, { "epoch": 70.24165707710011, "grad_norm": 0.755685567855835, "learning_rate": 0.0005951668584579977, "loss": 0.3022, "step": 244160 }, { "epoch": 70.24453394706559, "grad_norm": 0.621353030204773, "learning_rate": 0.0005951093210586882, "loss": 0.3728, "step": 244170 }, { "epoch": 70.24741081703107, "grad_norm": 1.3245422840118408, "learning_rate": 0.0005950517836593786, "loss": 0.3968, "step": 244180 }, { "epoch": 70.25028768699654, "grad_norm": 1.1417711973190308, "learning_rate": 0.000594994246260069, "loss": 0.3877, "step": 244190 }, { "epoch": 70.25316455696202, "grad_norm": 2.039902448654175, "learning_rate": 0.0005949367088607595, "loss": 0.5305, "step": 244200 }, { "epoch": 70.25604142692751, "grad_norm": 1.5742888450622559, "learning_rate": 0.0005948791714614499, "loss": 0.3383, "step": 244210 }, { "epoch": 70.25891829689299, "grad_norm": 1.5423691272735596, "learning_rate": 0.0005948216340621404, "loss": 0.3946, "step": 244220 }, { "epoch": 70.26179516685846, "grad_norm": 0.7198387384414673, "learning_rate": 0.0005947640966628309, "loss": 0.4231, "step": 244230 }, { "epoch": 70.26467203682394, "grad_norm": 1.7047984600067139, "learning_rate": 0.0005947065592635213, "loss": 0.4608, "step": 244240 }, { "epoch": 70.26754890678941, "grad_norm": 3.1066558361053467, "learning_rate": 0.0005946490218642118, "loss": 0.5181, "step": 244250 }, { "epoch": 70.27042577675489, "grad_norm": 2.0294504165649414, "learning_rate": 0.0005945914844649022, "loss": 0.4436, "step": 244260 }, { "epoch": 70.27330264672037, "grad_norm": 0.5800981521606445, "learning_rate": 0.0005945339470655926, "loss": 0.4267, "step": 244270 }, { "epoch": 70.27617951668584, "grad_norm": 1.8942826986312866, "learning_rate": 0.0005944764096662831, "loss": 0.3924, "step": 244280 }, { "epoch": 70.27905638665132, "grad_norm": 1.15475594997406, "learning_rate": 0.0005944188722669736, "loss": 0.3073, "step": 244290 }, { "epoch": 70.2819332566168, "grad_norm": 1.464080810546875, "learning_rate": 0.0005943613348676639, "loss": 0.4312, "step": 244300 }, { "epoch": 70.28481012658227, "grad_norm": 1.0310412645339966, "learning_rate": 0.0005943037974683544, "loss": 0.3225, "step": 244310 }, { "epoch": 70.28768699654776, "grad_norm": 1.229513168334961, "learning_rate": 0.000594246260069045, "loss": 0.4029, "step": 244320 }, { "epoch": 70.29056386651324, "grad_norm": 1.5071499347686768, "learning_rate": 0.0005941887226697353, "loss": 0.3638, "step": 244330 }, { "epoch": 70.29344073647871, "grad_norm": 1.1611528396606445, "learning_rate": 0.0005941311852704258, "loss": 0.3299, "step": 244340 }, { "epoch": 70.29631760644419, "grad_norm": 0.7139224410057068, "learning_rate": 0.0005940736478711163, "loss": 0.3846, "step": 244350 }, { "epoch": 70.29919447640967, "grad_norm": 1.4205358028411865, "learning_rate": 0.0005940161104718067, "loss": 0.3997, "step": 244360 }, { "epoch": 70.30207134637514, "grad_norm": 1.5384440422058105, "learning_rate": 0.0005939585730724971, "loss": 0.3472, "step": 244370 }, { "epoch": 70.30494821634062, "grad_norm": 1.6277368068695068, "learning_rate": 0.0005939010356731876, "loss": 0.407, "step": 244380 }, { "epoch": 70.3078250863061, "grad_norm": 1.2075748443603516, "learning_rate": 0.000593843498273878, "loss": 0.5408, "step": 244390 }, { "epoch": 70.31070195627157, "grad_norm": 0.9948465824127197, "learning_rate": 0.0005937859608745685, "loss": 0.3302, "step": 244400 }, { "epoch": 70.31357882623705, "grad_norm": 1.2189991474151611, "learning_rate": 0.000593728423475259, "loss": 0.3632, "step": 244410 }, { "epoch": 70.31645569620254, "grad_norm": 1.5616042613983154, "learning_rate": 0.0005936708860759494, "loss": 0.3892, "step": 244420 }, { "epoch": 70.31933256616801, "grad_norm": 0.7067868113517761, "learning_rate": 0.0005936133486766399, "loss": 0.3353, "step": 244430 }, { "epoch": 70.32220943613349, "grad_norm": 1.0515817403793335, "learning_rate": 0.0005935558112773303, "loss": 0.4053, "step": 244440 }, { "epoch": 70.32508630609897, "grad_norm": 1.4119526147842407, "learning_rate": 0.0005934982738780207, "loss": 0.4112, "step": 244450 }, { "epoch": 70.32796317606444, "grad_norm": 1.7715189456939697, "learning_rate": 0.0005934407364787112, "loss": 0.4172, "step": 244460 }, { "epoch": 70.33084004602992, "grad_norm": 1.5050712823867798, "learning_rate": 0.0005933831990794017, "loss": 0.3421, "step": 244470 }, { "epoch": 70.3337169159954, "grad_norm": 0.8644404411315918, "learning_rate": 0.000593325661680092, "loss": 0.3868, "step": 244480 }, { "epoch": 70.33659378596087, "grad_norm": 0.9064748883247375, "learning_rate": 0.0005932681242807825, "loss": 0.4227, "step": 244490 }, { "epoch": 70.33947065592635, "grad_norm": 0.7811769843101501, "learning_rate": 0.000593210586881473, "loss": 0.4814, "step": 244500 }, { "epoch": 70.34234752589182, "grad_norm": 1.2053472995758057, "learning_rate": 0.0005931530494821634, "loss": 0.4054, "step": 244510 }, { "epoch": 70.3452243958573, "grad_norm": 1.9544904232025146, "learning_rate": 0.0005930955120828538, "loss": 0.6126, "step": 244520 }, { "epoch": 70.34810126582279, "grad_norm": 1.47538161277771, "learning_rate": 0.0005930379746835444, "loss": 0.4422, "step": 244530 }, { "epoch": 70.35097813578827, "grad_norm": 1.0420857667922974, "learning_rate": 0.0005929804372842348, "loss": 0.5226, "step": 244540 }, { "epoch": 70.35385500575374, "grad_norm": 0.924350917339325, "learning_rate": 0.0005929228998849252, "loss": 0.4396, "step": 244550 }, { "epoch": 70.35673187571922, "grad_norm": 0.7315792441368103, "learning_rate": 0.0005928653624856157, "loss": 0.371, "step": 244560 }, { "epoch": 70.3596087456847, "grad_norm": 1.007919192314148, "learning_rate": 0.0005928078250863061, "loss": 0.3913, "step": 244570 }, { "epoch": 70.36248561565017, "grad_norm": 1.2755764722824097, "learning_rate": 0.0005927502876869966, "loss": 0.3785, "step": 244580 }, { "epoch": 70.36536248561565, "grad_norm": 1.0392205715179443, "learning_rate": 0.000592692750287687, "loss": 0.4395, "step": 244590 }, { "epoch": 70.36823935558112, "grad_norm": 2.1624910831451416, "learning_rate": 0.0005926352128883774, "loss": 0.3903, "step": 244600 }, { "epoch": 70.3711162255466, "grad_norm": 1.1109285354614258, "learning_rate": 0.000592577675489068, "loss": 0.4824, "step": 244610 }, { "epoch": 70.37399309551208, "grad_norm": 1.735920786857605, "learning_rate": 0.0005925201380897584, "loss": 0.3883, "step": 244620 }, { "epoch": 70.37686996547757, "grad_norm": 0.9274857640266418, "learning_rate": 0.0005924626006904488, "loss": 0.377, "step": 244630 }, { "epoch": 70.37974683544304, "grad_norm": 1.441116452217102, "learning_rate": 0.0005924050632911393, "loss": 0.3766, "step": 244640 }, { "epoch": 70.38262370540852, "grad_norm": 1.4109017848968506, "learning_rate": 0.0005923475258918298, "loss": 0.4384, "step": 244650 }, { "epoch": 70.385500575374, "grad_norm": 1.2827668190002441, "learning_rate": 0.0005922899884925201, "loss": 0.3449, "step": 244660 }, { "epoch": 70.38837744533947, "grad_norm": 0.8705412149429321, "learning_rate": 0.0005922324510932106, "loss": 0.365, "step": 244670 }, { "epoch": 70.39125431530495, "grad_norm": 1.8465393781661987, "learning_rate": 0.0005921749136939011, "loss": 0.4684, "step": 244680 }, { "epoch": 70.39413118527042, "grad_norm": 1.366231083869934, "learning_rate": 0.0005921173762945915, "loss": 0.3639, "step": 244690 }, { "epoch": 70.3970080552359, "grad_norm": 1.2330302000045776, "learning_rate": 0.0005920598388952819, "loss": 0.3779, "step": 244700 }, { "epoch": 70.39988492520138, "grad_norm": 0.7466182112693787, "learning_rate": 0.0005920023014959723, "loss": 0.4783, "step": 244710 }, { "epoch": 70.40276179516685, "grad_norm": 0.8105730414390564, "learning_rate": 0.0005919447640966629, "loss": 0.355, "step": 244720 }, { "epoch": 70.40563866513233, "grad_norm": 1.3492766618728638, "learning_rate": 0.0005918872266973533, "loss": 0.377, "step": 244730 }, { "epoch": 70.40851553509782, "grad_norm": 0.8683474659919739, "learning_rate": 0.0005918296892980437, "loss": 0.486, "step": 244740 }, { "epoch": 70.4113924050633, "grad_norm": 1.8162273168563843, "learning_rate": 0.0005917721518987342, "loss": 0.3997, "step": 244750 }, { "epoch": 70.41426927502877, "grad_norm": 0.7554728388786316, "learning_rate": 0.0005917146144994247, "loss": 0.4145, "step": 244760 }, { "epoch": 70.41714614499425, "grad_norm": 0.9947987198829651, "learning_rate": 0.000591657077100115, "loss": 0.4297, "step": 244770 }, { "epoch": 70.42002301495972, "grad_norm": 1.423606276512146, "learning_rate": 0.0005915995397008055, "loss": 0.4003, "step": 244780 }, { "epoch": 70.4228998849252, "grad_norm": 1.1548906564712524, "learning_rate": 0.000591542002301496, "loss": 0.3928, "step": 244790 }, { "epoch": 70.42577675489068, "grad_norm": 0.8716641664505005, "learning_rate": 0.0005914844649021864, "loss": 0.4048, "step": 244800 }, { "epoch": 70.42865362485615, "grad_norm": 1.1928387880325317, "learning_rate": 0.0005914269275028768, "loss": 0.409, "step": 244810 }, { "epoch": 70.43153049482163, "grad_norm": 1.6550801992416382, "learning_rate": 0.0005913693901035674, "loss": 0.4944, "step": 244820 }, { "epoch": 70.4344073647871, "grad_norm": 0.676382303237915, "learning_rate": 0.0005913118527042578, "loss": 0.3324, "step": 244830 }, { "epoch": 70.4372842347526, "grad_norm": 1.9202864170074463, "learning_rate": 0.0005912543153049482, "loss": 0.3748, "step": 244840 }, { "epoch": 70.44016110471807, "grad_norm": 1.2424739599227905, "learning_rate": 0.0005911967779056387, "loss": 0.3247, "step": 244850 }, { "epoch": 70.44303797468355, "grad_norm": 2.982811212539673, "learning_rate": 0.0005911392405063291, "loss": 0.3835, "step": 244860 }, { "epoch": 70.44591484464902, "grad_norm": 0.9940411448478699, "learning_rate": 0.0005910817031070196, "loss": 0.3589, "step": 244870 }, { "epoch": 70.4487917146145, "grad_norm": 1.5376991033554077, "learning_rate": 0.00059102416570771, "loss": 0.3445, "step": 244880 }, { "epoch": 70.45166858457998, "grad_norm": 1.1284226179122925, "learning_rate": 0.0005909666283084004, "loss": 0.4552, "step": 244890 }, { "epoch": 70.45454545454545, "grad_norm": 0.8674845695495605, "learning_rate": 0.0005909090909090909, "loss": 0.426, "step": 244900 }, { "epoch": 70.45742232451093, "grad_norm": 1.0458704233169556, "learning_rate": 0.0005908515535097814, "loss": 0.4177, "step": 244910 }, { "epoch": 70.4602991944764, "grad_norm": 1.019938588142395, "learning_rate": 0.0005907940161104717, "loss": 0.3342, "step": 244920 }, { "epoch": 70.46317606444188, "grad_norm": 0.8652090430259705, "learning_rate": 0.0005907364787111623, "loss": 0.4647, "step": 244930 }, { "epoch": 70.46605293440736, "grad_norm": 1.687604546546936, "learning_rate": 0.0005906789413118528, "loss": 0.4139, "step": 244940 }, { "epoch": 70.46892980437285, "grad_norm": 1.845521092414856, "learning_rate": 0.0005906214039125431, "loss": 0.3766, "step": 244950 }, { "epoch": 70.47180667433832, "grad_norm": 1.2413734197616577, "learning_rate": 0.0005905638665132336, "loss": 0.3584, "step": 244960 }, { "epoch": 70.4746835443038, "grad_norm": 1.3133933544158936, "learning_rate": 0.0005905063291139241, "loss": 0.4541, "step": 244970 }, { "epoch": 70.47756041426928, "grad_norm": 1.5756512880325317, "learning_rate": 0.0005904487917146145, "loss": 0.4157, "step": 244980 }, { "epoch": 70.48043728423475, "grad_norm": 1.1783435344696045, "learning_rate": 0.0005903912543153049, "loss": 0.4933, "step": 244990 }, { "epoch": 70.48331415420023, "grad_norm": 0.6450667977333069, "learning_rate": 0.0005903337169159954, "loss": 0.4232, "step": 245000 }, { "epoch": 70.4861910241657, "grad_norm": 2.179696559906006, "learning_rate": 0.0005902761795166858, "loss": 0.4456, "step": 245010 }, { "epoch": 70.48906789413118, "grad_norm": 1.5049159526824951, "learning_rate": 0.0005902186421173763, "loss": 0.3827, "step": 245020 }, { "epoch": 70.49194476409666, "grad_norm": 0.8981004357337952, "learning_rate": 0.0005901611047180668, "loss": 0.4581, "step": 245030 }, { "epoch": 70.49482163406213, "grad_norm": 1.4224940538406372, "learning_rate": 0.0005901035673187572, "loss": 0.4677, "step": 245040 }, { "epoch": 70.49769850402762, "grad_norm": 1.7982004880905151, "learning_rate": 0.0005900460299194477, "loss": 0.476, "step": 245050 }, { "epoch": 70.5005753739931, "grad_norm": 1.459946870803833, "learning_rate": 0.0005899884925201381, "loss": 0.4181, "step": 245060 }, { "epoch": 70.50345224395858, "grad_norm": 1.1950849294662476, "learning_rate": 0.0005899309551208285, "loss": 0.4024, "step": 245070 }, { "epoch": 70.50632911392405, "grad_norm": 1.4077715873718262, "learning_rate": 0.000589873417721519, "loss": 0.4207, "step": 245080 }, { "epoch": 70.50920598388953, "grad_norm": 0.7641887664794922, "learning_rate": 0.0005898158803222095, "loss": 0.3832, "step": 245090 }, { "epoch": 70.512082853855, "grad_norm": 1.132047414779663, "learning_rate": 0.0005897583429228998, "loss": 0.3711, "step": 245100 }, { "epoch": 70.51495972382048, "grad_norm": 1.519895076751709, "learning_rate": 0.0005897008055235904, "loss": 0.3614, "step": 245110 }, { "epoch": 70.51783659378596, "grad_norm": 1.016093134880066, "learning_rate": 0.0005896432681242809, "loss": 0.4231, "step": 245120 }, { "epoch": 70.52071346375143, "grad_norm": 1.1585867404937744, "learning_rate": 0.0005895857307249712, "loss": 0.409, "step": 245130 }, { "epoch": 70.52359033371691, "grad_norm": 1.336397647857666, "learning_rate": 0.0005895281933256617, "loss": 0.3812, "step": 245140 }, { "epoch": 70.52646720368239, "grad_norm": 1.112949013710022, "learning_rate": 0.0005894706559263522, "loss": 0.4405, "step": 245150 }, { "epoch": 70.52934407364788, "grad_norm": 1.6175397634506226, "learning_rate": 0.0005894131185270426, "loss": 0.4399, "step": 245160 }, { "epoch": 70.53222094361335, "grad_norm": 1.4129294157028198, "learning_rate": 0.000589355581127733, "loss": 0.362, "step": 245170 }, { "epoch": 70.53509781357883, "grad_norm": 1.0765315294265747, "learning_rate": 0.0005892980437284235, "loss": 0.4507, "step": 245180 }, { "epoch": 70.5379746835443, "grad_norm": 1.399080514907837, "learning_rate": 0.0005892405063291139, "loss": 0.3661, "step": 245190 }, { "epoch": 70.54085155350978, "grad_norm": 0.6471880078315735, "learning_rate": 0.0005891829689298044, "loss": 0.4608, "step": 245200 }, { "epoch": 70.54372842347526, "grad_norm": 2.2011830806732178, "learning_rate": 0.0005891254315304948, "loss": 0.3797, "step": 245210 }, { "epoch": 70.54660529344073, "grad_norm": 1.9334148168563843, "learning_rate": 0.0005890678941311853, "loss": 0.4617, "step": 245220 }, { "epoch": 70.54948216340621, "grad_norm": 1.4715142250061035, "learning_rate": 0.0005890103567318758, "loss": 0.5418, "step": 245230 }, { "epoch": 70.55235903337169, "grad_norm": 1.0321520566940308, "learning_rate": 0.0005889528193325662, "loss": 0.4349, "step": 245240 }, { "epoch": 70.55523590333716, "grad_norm": 0.9390745759010315, "learning_rate": 0.0005888952819332566, "loss": 0.3517, "step": 245250 }, { "epoch": 70.55811277330265, "grad_norm": 2.0697133541107178, "learning_rate": 0.0005888377445339471, "loss": 0.454, "step": 245260 }, { "epoch": 70.56098964326813, "grad_norm": 1.5618818998336792, "learning_rate": 0.0005887802071346376, "loss": 0.3848, "step": 245270 }, { "epoch": 70.5638665132336, "grad_norm": 1.6888943910598755, "learning_rate": 0.0005887226697353279, "loss": 0.4293, "step": 245280 }, { "epoch": 70.56674338319908, "grad_norm": 1.1276603937149048, "learning_rate": 0.0005886651323360184, "loss": 0.4623, "step": 245290 }, { "epoch": 70.56962025316456, "grad_norm": 1.697805404663086, "learning_rate": 0.0005886075949367089, "loss": 0.4136, "step": 245300 }, { "epoch": 70.57249712313003, "grad_norm": 0.7552428841590881, "learning_rate": 0.0005885500575373993, "loss": 0.5658, "step": 245310 }, { "epoch": 70.57537399309551, "grad_norm": 1.5270015001296997, "learning_rate": 0.0005884925201380897, "loss": 0.3707, "step": 245320 }, { "epoch": 70.57825086306099, "grad_norm": 0.9429522156715393, "learning_rate": 0.0005884349827387803, "loss": 0.4651, "step": 245330 }, { "epoch": 70.58112773302646, "grad_norm": 1.587989330291748, "learning_rate": 0.0005883774453394707, "loss": 0.5551, "step": 245340 }, { "epoch": 70.58400460299194, "grad_norm": 0.6970060467720032, "learning_rate": 0.0005883199079401611, "loss": 0.3286, "step": 245350 }, { "epoch": 70.58688147295742, "grad_norm": 1.4349923133850098, "learning_rate": 0.0005882623705408516, "loss": 0.4671, "step": 245360 }, { "epoch": 70.5897583429229, "grad_norm": 1.2171838283538818, "learning_rate": 0.000588204833141542, "loss": 0.4164, "step": 245370 }, { "epoch": 70.59263521288838, "grad_norm": 1.3129658699035645, "learning_rate": 0.0005881472957422325, "loss": 0.4561, "step": 245380 }, { "epoch": 70.59551208285386, "grad_norm": 1.6419579982757568, "learning_rate": 0.0005880897583429229, "loss": 0.4581, "step": 245390 }, { "epoch": 70.59838895281933, "grad_norm": 0.9702495336532593, "learning_rate": 0.0005880322209436134, "loss": 0.4089, "step": 245400 }, { "epoch": 70.60126582278481, "grad_norm": 1.664683222770691, "learning_rate": 0.0005879746835443038, "loss": 0.5383, "step": 245410 }, { "epoch": 70.60414269275029, "grad_norm": 1.010162115097046, "learning_rate": 0.0005879171461449943, "loss": 0.3791, "step": 245420 }, { "epoch": 70.60701956271576, "grad_norm": 2.3528196811676025, "learning_rate": 0.0005878596087456846, "loss": 0.453, "step": 245430 }, { "epoch": 70.60989643268124, "grad_norm": 1.0252597332000732, "learning_rate": 0.0005878020713463752, "loss": 0.4384, "step": 245440 }, { "epoch": 70.61277330264672, "grad_norm": 1.0066518783569336, "learning_rate": 0.0005877445339470657, "loss": 0.4043, "step": 245450 }, { "epoch": 70.61565017261219, "grad_norm": 1.1166813373565674, "learning_rate": 0.000587686996547756, "loss": 0.395, "step": 245460 }, { "epoch": 70.61852704257768, "grad_norm": 1.0397149324417114, "learning_rate": 0.0005876294591484465, "loss": 0.4857, "step": 245470 }, { "epoch": 70.62140391254316, "grad_norm": 1.3651233911514282, "learning_rate": 0.000587571921749137, "loss": 0.3509, "step": 245480 }, { "epoch": 70.62428078250863, "grad_norm": 1.0130740404129028, "learning_rate": 0.0005875143843498274, "loss": 0.5039, "step": 245490 }, { "epoch": 70.62715765247411, "grad_norm": 1.4230746030807495, "learning_rate": 0.0005874568469505178, "loss": 0.3458, "step": 245500 }, { "epoch": 70.63003452243959, "grad_norm": 1.4387985467910767, "learning_rate": 0.0005873993095512084, "loss": 0.3122, "step": 245510 }, { "epoch": 70.63291139240506, "grad_norm": 1.3384770154953003, "learning_rate": 0.0005873417721518987, "loss": 0.4078, "step": 245520 }, { "epoch": 70.63578826237054, "grad_norm": 0.8104502558708191, "learning_rate": 0.0005872842347525892, "loss": 0.3845, "step": 245530 }, { "epoch": 70.63866513233602, "grad_norm": 1.0998140573501587, "learning_rate": 0.0005872266973532796, "loss": 0.4621, "step": 245540 }, { "epoch": 70.64154200230149, "grad_norm": 0.9279940724372864, "learning_rate": 0.0005871691599539701, "loss": 0.4453, "step": 245550 }, { "epoch": 70.64441887226697, "grad_norm": 0.7544185519218445, "learning_rate": 0.0005871116225546606, "loss": 0.3689, "step": 245560 }, { "epoch": 70.64729574223244, "grad_norm": 2.0783441066741943, "learning_rate": 0.0005870540851553509, "loss": 0.4658, "step": 245570 }, { "epoch": 70.65017261219793, "grad_norm": 1.219204306602478, "learning_rate": 0.0005869965477560414, "loss": 0.3871, "step": 245580 }, { "epoch": 70.65304948216341, "grad_norm": 1.8121126890182495, "learning_rate": 0.0005869390103567319, "loss": 0.4821, "step": 245590 }, { "epoch": 70.65592635212889, "grad_norm": 1.3546168804168701, "learning_rate": 0.0005868814729574223, "loss": 0.3955, "step": 245600 }, { "epoch": 70.65880322209436, "grad_norm": 1.615063190460205, "learning_rate": 0.0005868239355581127, "loss": 0.4732, "step": 245610 }, { "epoch": 70.66168009205984, "grad_norm": 0.6352275609970093, "learning_rate": 0.0005867663981588033, "loss": 0.4273, "step": 245620 }, { "epoch": 70.66455696202532, "grad_norm": 1.2524139881134033, "learning_rate": 0.0005867088607594937, "loss": 0.4393, "step": 245630 }, { "epoch": 70.66743383199079, "grad_norm": 0.9876754283905029, "learning_rate": 0.0005866513233601841, "loss": 0.4674, "step": 245640 }, { "epoch": 70.67031070195627, "grad_norm": 0.9437195062637329, "learning_rate": 0.0005865937859608746, "loss": 0.527, "step": 245650 }, { "epoch": 70.67318757192174, "grad_norm": 1.1826410293579102, "learning_rate": 0.000586536248561565, "loss": 0.4616, "step": 245660 }, { "epoch": 70.67606444188722, "grad_norm": 1.3354220390319824, "learning_rate": 0.0005864787111622555, "loss": 0.4959, "step": 245670 }, { "epoch": 70.67894131185271, "grad_norm": 0.9704756736755371, "learning_rate": 0.0005864211737629459, "loss": 0.3202, "step": 245680 }, { "epoch": 70.68181818181819, "grad_norm": 1.3208444118499756, "learning_rate": 0.0005863636363636363, "loss": 0.4346, "step": 245690 }, { "epoch": 70.68469505178366, "grad_norm": 0.9026980400085449, "learning_rate": 0.0005863060989643268, "loss": 0.3772, "step": 245700 }, { "epoch": 70.68757192174914, "grad_norm": 1.2675904035568237, "learning_rate": 0.0005862485615650173, "loss": 0.3805, "step": 245710 }, { "epoch": 70.69044879171462, "grad_norm": 0.8890547156333923, "learning_rate": 0.0005861910241657076, "loss": 0.3557, "step": 245720 }, { "epoch": 70.69332566168009, "grad_norm": 1.5758899450302124, "learning_rate": 0.0005861334867663982, "loss": 0.4238, "step": 245730 }, { "epoch": 70.69620253164557, "grad_norm": 0.7222307324409485, "learning_rate": 0.0005860759493670887, "loss": 0.4314, "step": 245740 }, { "epoch": 70.69907940161104, "grad_norm": 1.390449047088623, "learning_rate": 0.000586018411967779, "loss": 0.4461, "step": 245750 }, { "epoch": 70.70195627157652, "grad_norm": 1.1366223096847534, "learning_rate": 0.0005859608745684695, "loss": 0.4436, "step": 245760 }, { "epoch": 70.704833141542, "grad_norm": 1.5177280902862549, "learning_rate": 0.00058590333716916, "loss": 0.3604, "step": 245770 }, { "epoch": 70.70771001150747, "grad_norm": 1.2679489850997925, "learning_rate": 0.0005858457997698504, "loss": 0.3587, "step": 245780 }, { "epoch": 70.71058688147296, "grad_norm": 1.1563314199447632, "learning_rate": 0.0005857882623705408, "loss": 0.5583, "step": 245790 }, { "epoch": 70.71346375143844, "grad_norm": 0.6402600407600403, "learning_rate": 0.0005857307249712314, "loss": 0.4415, "step": 245800 }, { "epoch": 70.71634062140392, "grad_norm": 1.404706358909607, "learning_rate": 0.0005856731875719217, "loss": 0.3629, "step": 245810 }, { "epoch": 70.71921749136939, "grad_norm": 3.5045101642608643, "learning_rate": 0.0005856156501726122, "loss": 0.3979, "step": 245820 }, { "epoch": 70.72209436133487, "grad_norm": 0.7416906356811523, "learning_rate": 0.0005855581127733027, "loss": 0.373, "step": 245830 }, { "epoch": 70.72497123130034, "grad_norm": 0.8637478947639465, "learning_rate": 0.0005855005753739931, "loss": 0.4442, "step": 245840 }, { "epoch": 70.72784810126582, "grad_norm": 1.194376826286316, "learning_rate": 0.0005854430379746836, "loss": 0.4121, "step": 245850 }, { "epoch": 70.7307249712313, "grad_norm": 1.2937159538269043, "learning_rate": 0.000585385500575374, "loss": 0.3869, "step": 245860 }, { "epoch": 70.73360184119677, "grad_norm": 1.1731828451156616, "learning_rate": 0.0005853279631760644, "loss": 0.3954, "step": 245870 }, { "epoch": 70.73647871116225, "grad_norm": 0.7370753288269043, "learning_rate": 0.0005852704257767549, "loss": 0.4287, "step": 245880 }, { "epoch": 70.73935558112774, "grad_norm": 1.4440948963165283, "learning_rate": 0.0005852128883774454, "loss": 0.4147, "step": 245890 }, { "epoch": 70.74223245109322, "grad_norm": 1.3428510427474976, "learning_rate": 0.0005851553509781357, "loss": 0.4625, "step": 245900 }, { "epoch": 70.74510932105869, "grad_norm": 1.0850498676300049, "learning_rate": 0.0005850978135788263, "loss": 0.4279, "step": 245910 }, { "epoch": 70.74798619102417, "grad_norm": 0.6546433568000793, "learning_rate": 0.0005850402761795168, "loss": 0.4609, "step": 245920 }, { "epoch": 70.75086306098964, "grad_norm": 0.6861501932144165, "learning_rate": 0.0005849827387802071, "loss": 0.3801, "step": 245930 }, { "epoch": 70.75373993095512, "grad_norm": 1.610216498374939, "learning_rate": 0.0005849252013808976, "loss": 0.3532, "step": 245940 }, { "epoch": 70.7566168009206, "grad_norm": 1.1705982685089111, "learning_rate": 0.0005848676639815881, "loss": 0.4151, "step": 245950 }, { "epoch": 70.75949367088607, "grad_norm": 1.3817530870437622, "learning_rate": 0.0005848101265822785, "loss": 0.4079, "step": 245960 }, { "epoch": 70.76237054085155, "grad_norm": 1.0289629697799683, "learning_rate": 0.0005847525891829689, "loss": 0.5054, "step": 245970 }, { "epoch": 70.76524741081703, "grad_norm": 1.4553800821304321, "learning_rate": 0.0005846950517836594, "loss": 0.3454, "step": 245980 }, { "epoch": 70.7681242807825, "grad_norm": 1.1264925003051758, "learning_rate": 0.0005846375143843498, "loss": 0.4399, "step": 245990 }, { "epoch": 70.77100115074799, "grad_norm": 0.5825172066688538, "learning_rate": 0.0005845799769850403, "loss": 0.502, "step": 246000 }, { "epoch": 70.77387802071347, "grad_norm": 0.9987366795539856, "learning_rate": 0.0005845224395857307, "loss": 0.3683, "step": 246010 }, { "epoch": 70.77675489067894, "grad_norm": 1.4714927673339844, "learning_rate": 0.0005844649021864212, "loss": 0.3912, "step": 246020 }, { "epoch": 70.77963176064442, "grad_norm": 1.1579688787460327, "learning_rate": 0.0005844073647871117, "loss": 0.4433, "step": 246030 }, { "epoch": 70.7825086306099, "grad_norm": 2.281508445739746, "learning_rate": 0.0005843498273878021, "loss": 0.5281, "step": 246040 }, { "epoch": 70.78538550057537, "grad_norm": 1.2553566694259644, "learning_rate": 0.0005842922899884925, "loss": 0.429, "step": 246050 }, { "epoch": 70.78826237054085, "grad_norm": 1.642029047012329, "learning_rate": 0.000584234752589183, "loss": 0.4798, "step": 246060 }, { "epoch": 70.79113924050633, "grad_norm": 1.1117254495620728, "learning_rate": 0.0005841772151898735, "loss": 0.3599, "step": 246070 }, { "epoch": 70.7940161104718, "grad_norm": 2.1588492393493652, "learning_rate": 0.0005841196777905638, "loss": 0.4174, "step": 246080 }, { "epoch": 70.79689298043728, "grad_norm": 1.621667504310608, "learning_rate": 0.0005840621403912544, "loss": 0.409, "step": 246090 }, { "epoch": 70.79976985040277, "grad_norm": 1.5944546461105347, "learning_rate": 0.0005840046029919448, "loss": 0.5141, "step": 246100 }, { "epoch": 70.80264672036824, "grad_norm": 0.9533020853996277, "learning_rate": 0.0005839470655926352, "loss": 0.3973, "step": 246110 }, { "epoch": 70.80552359033372, "grad_norm": 0.8470715880393982, "learning_rate": 0.0005838895281933256, "loss": 0.4331, "step": 246120 }, { "epoch": 70.8084004602992, "grad_norm": 1.314534306526184, "learning_rate": 0.0005838319907940162, "loss": 0.3426, "step": 246130 }, { "epoch": 70.81127733026467, "grad_norm": 1.2503584623336792, "learning_rate": 0.0005837744533947066, "loss": 0.4244, "step": 246140 }, { "epoch": 70.81415420023015, "grad_norm": 2.558661699295044, "learning_rate": 0.000583716915995397, "loss": 0.4197, "step": 246150 }, { "epoch": 70.81703107019563, "grad_norm": 1.3387473821640015, "learning_rate": 0.0005836593785960875, "loss": 0.3843, "step": 246160 }, { "epoch": 70.8199079401611, "grad_norm": 1.2592228651046753, "learning_rate": 0.0005836018411967779, "loss": 0.4617, "step": 246170 }, { "epoch": 70.82278481012658, "grad_norm": 1.0654250383377075, "learning_rate": 0.0005835443037974684, "loss": 0.3511, "step": 246180 }, { "epoch": 70.82566168009205, "grad_norm": 2.8307018280029297, "learning_rate": 0.0005834867663981588, "loss": 0.4708, "step": 246190 }, { "epoch": 70.82853855005754, "grad_norm": 0.533231258392334, "learning_rate": 0.0005834292289988493, "loss": 0.3462, "step": 246200 }, { "epoch": 70.83141542002302, "grad_norm": 1.4638339281082153, "learning_rate": 0.0005833716915995397, "loss": 0.3337, "step": 246210 }, { "epoch": 70.8342922899885, "grad_norm": 1.4369475841522217, "learning_rate": 0.0005833141542002302, "loss": 0.441, "step": 246220 }, { "epoch": 70.83716915995397, "grad_norm": 0.7841059565544128, "learning_rate": 0.0005832566168009205, "loss": 0.4508, "step": 246230 }, { "epoch": 70.84004602991945, "grad_norm": 1.0263997316360474, "learning_rate": 0.0005831990794016111, "loss": 0.3695, "step": 246240 }, { "epoch": 70.84292289988493, "grad_norm": 1.5237226486206055, "learning_rate": 0.0005831415420023016, "loss": 0.4399, "step": 246250 }, { "epoch": 70.8457997698504, "grad_norm": 1.9612129926681519, "learning_rate": 0.0005830840046029919, "loss": 0.452, "step": 246260 }, { "epoch": 70.84867663981588, "grad_norm": 1.1847550868988037, "learning_rate": 0.0005830264672036824, "loss": 0.4729, "step": 246270 }, { "epoch": 70.85155350978135, "grad_norm": 0.9018738269805908, "learning_rate": 0.0005829689298043729, "loss": 0.4518, "step": 246280 }, { "epoch": 70.85443037974683, "grad_norm": 0.9048887491226196, "learning_rate": 0.0005829113924050633, "loss": 0.3538, "step": 246290 }, { "epoch": 70.8573072497123, "grad_norm": 1.18499755859375, "learning_rate": 0.0005828538550057537, "loss": 0.4688, "step": 246300 }, { "epoch": 70.8601841196778, "grad_norm": 0.8992658257484436, "learning_rate": 0.0005827963176064443, "loss": 0.4755, "step": 246310 }, { "epoch": 70.86306098964327, "grad_norm": 1.166353702545166, "learning_rate": 0.0005827387802071346, "loss": 0.4181, "step": 246320 }, { "epoch": 70.86593785960875, "grad_norm": 1.799974799156189, "learning_rate": 0.0005826812428078251, "loss": 0.4188, "step": 246330 }, { "epoch": 70.86881472957423, "grad_norm": 1.0162910223007202, "learning_rate": 0.0005826237054085154, "loss": 0.496, "step": 246340 }, { "epoch": 70.8716915995397, "grad_norm": 1.3999912738800049, "learning_rate": 0.000582566168009206, "loss": 0.4849, "step": 246350 }, { "epoch": 70.87456846950518, "grad_norm": 2.2335057258605957, "learning_rate": 0.0005825086306098965, "loss": 0.4673, "step": 246360 }, { "epoch": 70.87744533947065, "grad_norm": 0.996048092842102, "learning_rate": 0.0005824510932105868, "loss": 0.4791, "step": 246370 }, { "epoch": 70.88032220943613, "grad_norm": 1.8398936986923218, "learning_rate": 0.0005823935558112773, "loss": 0.3974, "step": 246380 }, { "epoch": 70.8831990794016, "grad_norm": 1.3908436298370361, "learning_rate": 0.0005823360184119678, "loss": 0.4019, "step": 246390 }, { "epoch": 70.88607594936708, "grad_norm": 1.65915048122406, "learning_rate": 0.0005822784810126582, "loss": 0.4636, "step": 246400 }, { "epoch": 70.88895281933257, "grad_norm": 1.9576475620269775, "learning_rate": 0.0005822209436133486, "loss": 0.4708, "step": 246410 }, { "epoch": 70.89182968929805, "grad_norm": 0.7180207967758179, "learning_rate": 0.0005821634062140392, "loss": 0.3908, "step": 246420 }, { "epoch": 70.89470655926353, "grad_norm": 1.014188528060913, "learning_rate": 0.0005821058688147295, "loss": 0.4267, "step": 246430 }, { "epoch": 70.897583429229, "grad_norm": 1.9689594507217407, "learning_rate": 0.00058204833141542, "loss": 0.4927, "step": 246440 }, { "epoch": 70.90046029919448, "grad_norm": 0.954677939414978, "learning_rate": 0.0005819907940161105, "loss": 0.417, "step": 246450 }, { "epoch": 70.90333716915995, "grad_norm": 1.0204336643218994, "learning_rate": 0.0005819332566168009, "loss": 0.3841, "step": 246460 }, { "epoch": 70.90621403912543, "grad_norm": 1.324233055114746, "learning_rate": 0.0005818757192174914, "loss": 0.3742, "step": 246470 }, { "epoch": 70.9090909090909, "grad_norm": 1.1551917791366577, "learning_rate": 0.0005818181818181818, "loss": 0.4144, "step": 246480 }, { "epoch": 70.91196777905638, "grad_norm": 1.7808483839035034, "learning_rate": 0.0005817606444188723, "loss": 0.4928, "step": 246490 }, { "epoch": 70.91484464902186, "grad_norm": 1.1902707815170288, "learning_rate": 0.0005817031070195627, "loss": 0.4658, "step": 246500 }, { "epoch": 70.91772151898734, "grad_norm": 1.9642192125320435, "learning_rate": 0.0005816455696202532, "loss": 0.4027, "step": 246510 }, { "epoch": 70.92059838895283, "grad_norm": 1.6110073328018188, "learning_rate": 0.0005815880322209435, "loss": 0.4338, "step": 246520 }, { "epoch": 70.9234752589183, "grad_norm": 0.7504013180732727, "learning_rate": 0.0005815304948216341, "loss": 0.3916, "step": 246530 }, { "epoch": 70.92635212888378, "grad_norm": 2.3823094367980957, "learning_rate": 0.0005814729574223246, "loss": 0.4454, "step": 246540 }, { "epoch": 70.92922899884925, "grad_norm": 2.057620048522949, "learning_rate": 0.0005814154200230149, "loss": 0.4635, "step": 246550 }, { "epoch": 70.93210586881473, "grad_norm": 1.6810568571090698, "learning_rate": 0.0005813578826237054, "loss": 0.4003, "step": 246560 }, { "epoch": 70.9349827387802, "grad_norm": 1.3895370960235596, "learning_rate": 0.0005813003452243959, "loss": 0.4151, "step": 246570 }, { "epoch": 70.93785960874568, "grad_norm": 1.0764436721801758, "learning_rate": 0.0005812428078250863, "loss": 0.4321, "step": 246580 }, { "epoch": 70.94073647871116, "grad_norm": 1.3342068195343018, "learning_rate": 0.0005811852704257767, "loss": 0.4422, "step": 246590 }, { "epoch": 70.94361334867664, "grad_norm": 1.2273216247558594, "learning_rate": 0.0005811277330264673, "loss": 0.396, "step": 246600 }, { "epoch": 70.94649021864211, "grad_norm": 1.4580031633377075, "learning_rate": 0.0005810701956271576, "loss": 0.4308, "step": 246610 }, { "epoch": 70.9493670886076, "grad_norm": 0.868781328201294, "learning_rate": 0.0005810126582278481, "loss": 0.407, "step": 246620 }, { "epoch": 70.95224395857308, "grad_norm": 1.1569910049438477, "learning_rate": 0.0005809551208285386, "loss": 0.4028, "step": 246630 }, { "epoch": 70.95512082853855, "grad_norm": 1.192091703414917, "learning_rate": 0.000580897583429229, "loss": 0.4219, "step": 246640 }, { "epoch": 70.95799769850403, "grad_norm": 1.7176953554153442, "learning_rate": 0.0005808400460299195, "loss": 0.53, "step": 246650 }, { "epoch": 70.9608745684695, "grad_norm": 0.6711648106575012, "learning_rate": 0.0005807825086306099, "loss": 0.4347, "step": 246660 }, { "epoch": 70.96375143843498, "grad_norm": 1.7533118724822998, "learning_rate": 0.0005807249712313003, "loss": 0.4319, "step": 246670 }, { "epoch": 70.96662830840046, "grad_norm": 1.488677740097046, "learning_rate": 0.0005806674338319908, "loss": 0.42, "step": 246680 }, { "epoch": 70.96950517836594, "grad_norm": 1.3548638820648193, "learning_rate": 0.0005806098964326813, "loss": 0.4216, "step": 246690 }, { "epoch": 70.97238204833141, "grad_norm": 1.2814072370529175, "learning_rate": 0.0005805523590333716, "loss": 0.4332, "step": 246700 }, { "epoch": 70.97525891829689, "grad_norm": 1.7481749057769775, "learning_rate": 0.0005804948216340622, "loss": 0.4196, "step": 246710 }, { "epoch": 70.97813578826236, "grad_norm": 3.182936191558838, "learning_rate": 0.0005804372842347526, "loss": 0.4252, "step": 246720 }, { "epoch": 70.98101265822785, "grad_norm": 0.815117359161377, "learning_rate": 0.000580379746835443, "loss": 0.4093, "step": 246730 }, { "epoch": 70.98388952819333, "grad_norm": 0.7243655323982239, "learning_rate": 0.0005803222094361335, "loss": 0.3776, "step": 246740 }, { "epoch": 70.9867663981588, "grad_norm": 0.8330391049385071, "learning_rate": 0.000580264672036824, "loss": 0.4588, "step": 246750 }, { "epoch": 70.98964326812428, "grad_norm": 0.890366792678833, "learning_rate": 0.0005802071346375144, "loss": 0.4939, "step": 246760 }, { "epoch": 70.99252013808976, "grad_norm": 1.3865532875061035, "learning_rate": 0.0005801495972382048, "loss": 0.452, "step": 246770 }, { "epoch": 70.99539700805524, "grad_norm": 2.225851058959961, "learning_rate": 0.0005800920598388954, "loss": 0.3923, "step": 246780 }, { "epoch": 70.99827387802071, "grad_norm": 1.6222565174102783, "learning_rate": 0.0005800345224395857, "loss": 0.4084, "step": 246790 }, { "epoch": 71.00115074798619, "grad_norm": 0.6418873071670532, "learning_rate": 0.0005799769850402762, "loss": 0.3956, "step": 246800 }, { "epoch": 71.00402761795166, "grad_norm": 1.4537529945373535, "learning_rate": 0.0005799194476409666, "loss": 0.4305, "step": 246810 }, { "epoch": 71.00690448791714, "grad_norm": 1.144839882850647, "learning_rate": 0.0005798619102416571, "loss": 0.4031, "step": 246820 }, { "epoch": 71.00978135788263, "grad_norm": 0.4151936173439026, "learning_rate": 0.0005798043728423476, "loss": 0.3425, "step": 246830 }, { "epoch": 71.0126582278481, "grad_norm": 0.624906599521637, "learning_rate": 0.000579746835443038, "loss": 0.2862, "step": 246840 }, { "epoch": 71.01553509781358, "grad_norm": 1.2335907220840454, "learning_rate": 0.0005796892980437284, "loss": 0.3136, "step": 246850 }, { "epoch": 71.01841196777906, "grad_norm": 2.4621024131774902, "learning_rate": 0.0005796317606444189, "loss": 0.4567, "step": 246860 }, { "epoch": 71.02128883774454, "grad_norm": 0.879911482334137, "learning_rate": 0.0005795742232451094, "loss": 0.3606, "step": 246870 }, { "epoch": 71.02416570771001, "grad_norm": 1.3178257942199707, "learning_rate": 0.0005795166858457997, "loss": 0.4648, "step": 246880 }, { "epoch": 71.02704257767549, "grad_norm": 0.8054569959640503, "learning_rate": 0.0005794591484464903, "loss": 0.3201, "step": 246890 }, { "epoch": 71.02991944764096, "grad_norm": 1.1537556648254395, "learning_rate": 0.0005794016110471807, "loss": 0.3838, "step": 246900 }, { "epoch": 71.03279631760644, "grad_norm": 1.7702569961547852, "learning_rate": 0.0005793440736478711, "loss": 0.3056, "step": 246910 }, { "epoch": 71.03567318757192, "grad_norm": 1.2360467910766602, "learning_rate": 0.0005792865362485615, "loss": 0.2905, "step": 246920 }, { "epoch": 71.03855005753739, "grad_norm": 2.1326005458831787, "learning_rate": 0.0005792289988492521, "loss": 0.3906, "step": 246930 }, { "epoch": 71.04142692750288, "grad_norm": 1.660808801651001, "learning_rate": 0.0005791714614499425, "loss": 0.4452, "step": 246940 }, { "epoch": 71.04430379746836, "grad_norm": 0.7157536745071411, "learning_rate": 0.0005791139240506329, "loss": 0.339, "step": 246950 }, { "epoch": 71.04718066743384, "grad_norm": 0.7789909839630127, "learning_rate": 0.0005790563866513234, "loss": 0.3832, "step": 246960 }, { "epoch": 71.05005753739931, "grad_norm": 1.1480551958084106, "learning_rate": 0.0005789988492520138, "loss": 0.4572, "step": 246970 }, { "epoch": 71.05293440736479, "grad_norm": 1.4976801872253418, "learning_rate": 0.0005789413118527043, "loss": 0.3206, "step": 246980 }, { "epoch": 71.05581127733026, "grad_norm": 1.004061222076416, "learning_rate": 0.0005788837744533947, "loss": 0.3854, "step": 246990 }, { "epoch": 71.05868814729574, "grad_norm": 0.8820539712905884, "learning_rate": 0.0005788262370540852, "loss": 0.364, "step": 247000 }, { "epoch": 71.06156501726122, "grad_norm": 1.0773147344589233, "learning_rate": 0.0005787686996547756, "loss": 0.3802, "step": 247010 }, { "epoch": 71.06444188722669, "grad_norm": 2.0553224086761475, "learning_rate": 0.0005787111622554661, "loss": 0.4159, "step": 247020 }, { "epoch": 71.06731875719217, "grad_norm": 1.266932725906372, "learning_rate": 0.0005786536248561564, "loss": 0.437, "step": 247030 }, { "epoch": 71.07019562715766, "grad_norm": 1.19227135181427, "learning_rate": 0.000578596087456847, "loss": 0.4366, "step": 247040 }, { "epoch": 71.07307249712314, "grad_norm": 1.2976809740066528, "learning_rate": 0.0005785385500575375, "loss": 0.4372, "step": 247050 }, { "epoch": 71.07594936708861, "grad_norm": 1.5134609937667847, "learning_rate": 0.0005784810126582278, "loss": 0.3939, "step": 247060 }, { "epoch": 71.07882623705409, "grad_norm": 0.9198955297470093, "learning_rate": 0.0005784234752589184, "loss": 0.3696, "step": 247070 }, { "epoch": 71.08170310701956, "grad_norm": 2.420380115509033, "learning_rate": 0.0005783659378596088, "loss": 0.4693, "step": 247080 }, { "epoch": 71.08457997698504, "grad_norm": 1.259886384010315, "learning_rate": 0.0005783084004602992, "loss": 0.4506, "step": 247090 }, { "epoch": 71.08745684695052, "grad_norm": 1.900093674659729, "learning_rate": 0.0005782508630609896, "loss": 0.3882, "step": 247100 }, { "epoch": 71.09033371691599, "grad_norm": 0.6749483942985535, "learning_rate": 0.0005781933256616802, "loss": 0.3376, "step": 247110 }, { "epoch": 71.09321058688147, "grad_norm": 0.91237473487854, "learning_rate": 0.0005781357882623705, "loss": 0.4471, "step": 247120 }, { "epoch": 71.09608745684694, "grad_norm": 0.8905797004699707, "learning_rate": 0.000578078250863061, "loss": 0.4207, "step": 247130 }, { "epoch": 71.09896432681242, "grad_norm": 1.1942895650863647, "learning_rate": 0.0005780207134637515, "loss": 0.3457, "step": 247140 }, { "epoch": 71.10184119677791, "grad_norm": 2.225006580352783, "learning_rate": 0.0005779631760644419, "loss": 0.5208, "step": 247150 }, { "epoch": 71.10471806674339, "grad_norm": 1.243342638015747, "learning_rate": 0.0005779056386651324, "loss": 0.3488, "step": 247160 }, { "epoch": 71.10759493670886, "grad_norm": 1.4320862293243408, "learning_rate": 0.0005778481012658227, "loss": 0.3029, "step": 247170 }, { "epoch": 71.11047180667434, "grad_norm": 1.2825967073440552, "learning_rate": 0.0005777905638665133, "loss": 0.3384, "step": 247180 }, { "epoch": 71.11334867663982, "grad_norm": 1.3102221488952637, "learning_rate": 0.0005777330264672037, "loss": 0.4188, "step": 247190 }, { "epoch": 71.11622554660529, "grad_norm": 1.6171964406967163, "learning_rate": 0.0005776754890678941, "loss": 0.362, "step": 247200 }, { "epoch": 71.11910241657077, "grad_norm": 1.3698136806488037, "learning_rate": 0.0005776179516685845, "loss": 0.3557, "step": 247210 }, { "epoch": 71.12197928653625, "grad_norm": 2.0218636989593506, "learning_rate": 0.0005775604142692751, "loss": 0.3655, "step": 247220 }, { "epoch": 71.12485615650172, "grad_norm": 1.0414031744003296, "learning_rate": 0.0005775028768699654, "loss": 0.4342, "step": 247230 }, { "epoch": 71.1277330264672, "grad_norm": 0.914149284362793, "learning_rate": 0.0005774453394706559, "loss": 0.4471, "step": 247240 }, { "epoch": 71.13060989643269, "grad_norm": 1.3441241979599, "learning_rate": 0.0005773878020713464, "loss": 0.4362, "step": 247250 }, { "epoch": 71.13348676639816, "grad_norm": 0.9390424489974976, "learning_rate": 0.0005773302646720368, "loss": 0.3489, "step": 247260 }, { "epoch": 71.13636363636364, "grad_norm": 1.1389950513839722, "learning_rate": 0.0005772727272727273, "loss": 0.3657, "step": 247270 }, { "epoch": 71.13924050632912, "grad_norm": 0.6552985906600952, "learning_rate": 0.0005772151898734177, "loss": 0.5083, "step": 247280 }, { "epoch": 71.14211737629459, "grad_norm": 1.0974096059799194, "learning_rate": 0.0005771576524741082, "loss": 0.3901, "step": 247290 }, { "epoch": 71.14499424626007, "grad_norm": 1.2855253219604492, "learning_rate": 0.0005771001150747986, "loss": 0.4872, "step": 247300 }, { "epoch": 71.14787111622555, "grad_norm": 1.744189977645874, "learning_rate": 0.0005770425776754891, "loss": 0.4642, "step": 247310 }, { "epoch": 71.15074798619102, "grad_norm": 1.651699423789978, "learning_rate": 0.0005769850402761794, "loss": 0.4563, "step": 247320 }, { "epoch": 71.1536248561565, "grad_norm": 1.0559815168380737, "learning_rate": 0.00057692750287687, "loss": 0.3726, "step": 247330 }, { "epoch": 71.15650172612197, "grad_norm": 1.3797191381454468, "learning_rate": 0.0005768699654775605, "loss": 0.4377, "step": 247340 }, { "epoch": 71.15937859608745, "grad_norm": 0.7548215985298157, "learning_rate": 0.0005768124280782508, "loss": 0.4067, "step": 247350 }, { "epoch": 71.16225546605294, "grad_norm": 1.129753828048706, "learning_rate": 0.0005767548906789413, "loss": 0.3446, "step": 247360 }, { "epoch": 71.16513233601842, "grad_norm": 1.1307108402252197, "learning_rate": 0.0005766973532796318, "loss": 0.408, "step": 247370 }, { "epoch": 71.16800920598389, "grad_norm": 1.230614185333252, "learning_rate": 0.0005766398158803222, "loss": 0.4287, "step": 247380 }, { "epoch": 71.17088607594937, "grad_norm": 2.685969114303589, "learning_rate": 0.0005765822784810126, "loss": 0.4541, "step": 247390 }, { "epoch": 71.17376294591485, "grad_norm": 0.9315610527992249, "learning_rate": 0.0005765247410817032, "loss": 0.4383, "step": 247400 }, { "epoch": 71.17663981588032, "grad_norm": 1.1094024181365967, "learning_rate": 0.0005764672036823935, "loss": 0.4262, "step": 247410 }, { "epoch": 71.1795166858458, "grad_norm": 0.9423699378967285, "learning_rate": 0.000576409666283084, "loss": 0.3383, "step": 247420 }, { "epoch": 71.18239355581127, "grad_norm": 0.966326892375946, "learning_rate": 0.0005763521288837744, "loss": 0.3628, "step": 247430 }, { "epoch": 71.18527042577675, "grad_norm": 0.6522851586341858, "learning_rate": 0.0005762945914844649, "loss": 0.4402, "step": 247440 }, { "epoch": 71.18814729574223, "grad_norm": 1.3557835817337036, "learning_rate": 0.0005762370540851554, "loss": 0.3817, "step": 247450 }, { "epoch": 71.19102416570772, "grad_norm": 1.9924694299697876, "learning_rate": 0.0005761795166858458, "loss": 0.4465, "step": 247460 }, { "epoch": 71.19390103567319, "grad_norm": 1.0813502073287964, "learning_rate": 0.0005761219792865363, "loss": 0.4589, "step": 247470 }, { "epoch": 71.19677790563867, "grad_norm": 0.8171452283859253, "learning_rate": 0.0005760644418872267, "loss": 0.4033, "step": 247480 }, { "epoch": 71.19965477560415, "grad_norm": 1.0919606685638428, "learning_rate": 0.0005760069044879172, "loss": 0.3763, "step": 247490 }, { "epoch": 71.20253164556962, "grad_norm": 0.7295979857444763, "learning_rate": 0.0005759493670886075, "loss": 0.4043, "step": 247500 }, { "epoch": 71.2054085155351, "grad_norm": 1.079606533050537, "learning_rate": 0.0005758918296892981, "loss": 0.4665, "step": 247510 }, { "epoch": 71.20828538550057, "grad_norm": 0.6958717107772827, "learning_rate": 0.0005758342922899885, "loss": 0.3127, "step": 247520 }, { "epoch": 71.21116225546605, "grad_norm": 1.4485267400741577, "learning_rate": 0.0005757767548906789, "loss": 0.444, "step": 247530 }, { "epoch": 71.21403912543153, "grad_norm": 2.0098509788513184, "learning_rate": 0.0005757192174913694, "loss": 0.3707, "step": 247540 }, { "epoch": 71.216915995397, "grad_norm": 0.8172116279602051, "learning_rate": 0.0005756616800920599, "loss": 0.4905, "step": 247550 }, { "epoch": 71.21979286536248, "grad_norm": 1.5209366083145142, "learning_rate": 0.0005756041426927503, "loss": 0.405, "step": 247560 }, { "epoch": 71.22266973532797, "grad_norm": 0.736635684967041, "learning_rate": 0.0005755466052934407, "loss": 0.3248, "step": 247570 }, { "epoch": 71.22554660529345, "grad_norm": 1.0228805541992188, "learning_rate": 0.0005754890678941313, "loss": 0.4048, "step": 247580 }, { "epoch": 71.22842347525892, "grad_norm": 0.6770291328430176, "learning_rate": 0.0005754315304948216, "loss": 0.3797, "step": 247590 }, { "epoch": 71.2313003452244, "grad_norm": 1.2316709756851196, "learning_rate": 0.0005753739930955121, "loss": 0.4202, "step": 247600 }, { "epoch": 71.23417721518987, "grad_norm": 1.112146258354187, "learning_rate": 0.0005753164556962025, "loss": 0.4085, "step": 247610 }, { "epoch": 71.23705408515535, "grad_norm": 0.9392017722129822, "learning_rate": 0.000575258918296893, "loss": 0.4286, "step": 247620 }, { "epoch": 71.23993095512083, "grad_norm": 0.8753517866134644, "learning_rate": 0.0005752013808975835, "loss": 0.3924, "step": 247630 }, { "epoch": 71.2428078250863, "grad_norm": 1.4245165586471558, "learning_rate": 0.0005751438434982739, "loss": 0.3369, "step": 247640 }, { "epoch": 71.24568469505178, "grad_norm": 2.0828893184661865, "learning_rate": 0.0005750863060989643, "loss": 0.4033, "step": 247650 }, { "epoch": 71.24856156501725, "grad_norm": 0.8418293595314026, "learning_rate": 0.0005750287686996548, "loss": 0.4176, "step": 247660 }, { "epoch": 71.25143843498275, "grad_norm": 2.6754634380340576, "learning_rate": 0.0005749712313003453, "loss": 0.4223, "step": 247670 }, { "epoch": 71.25431530494822, "grad_norm": 0.8586154580116272, "learning_rate": 0.0005749136939010356, "loss": 0.4532, "step": 247680 }, { "epoch": 71.2571921749137, "grad_norm": 1.0109484195709229, "learning_rate": 0.0005748561565017262, "loss": 0.349, "step": 247690 }, { "epoch": 71.26006904487917, "grad_norm": 0.7926132678985596, "learning_rate": 0.0005747986191024166, "loss": 0.4155, "step": 247700 }, { "epoch": 71.26294591484465, "grad_norm": 1.9548277854919434, "learning_rate": 0.000574741081703107, "loss": 0.4034, "step": 247710 }, { "epoch": 71.26582278481013, "grad_norm": 1.2090387344360352, "learning_rate": 0.0005746835443037974, "loss": 0.4502, "step": 247720 }, { "epoch": 71.2686996547756, "grad_norm": 0.9070208668708801, "learning_rate": 0.000574626006904488, "loss": 0.376, "step": 247730 }, { "epoch": 71.27157652474108, "grad_norm": 1.380340576171875, "learning_rate": 0.0005745684695051784, "loss": 0.4399, "step": 247740 }, { "epoch": 71.27445339470655, "grad_norm": 4.080400466918945, "learning_rate": 0.0005745109321058688, "loss": 0.362, "step": 247750 }, { "epoch": 71.27733026467203, "grad_norm": 0.8125205636024475, "learning_rate": 0.0005744533947065594, "loss": 0.2883, "step": 247760 }, { "epoch": 71.28020713463752, "grad_norm": 1.2878823280334473, "learning_rate": 0.0005743958573072497, "loss": 0.4048, "step": 247770 }, { "epoch": 71.283084004603, "grad_norm": 1.682489275932312, "learning_rate": 0.0005743383199079402, "loss": 0.3856, "step": 247780 }, { "epoch": 71.28596087456847, "grad_norm": 1.2516052722930908, "learning_rate": 0.0005742807825086306, "loss": 0.3628, "step": 247790 }, { "epoch": 71.28883774453395, "grad_norm": 0.7049979567527771, "learning_rate": 0.0005742232451093211, "loss": 0.3089, "step": 247800 }, { "epoch": 71.29171461449943, "grad_norm": 1.4139392375946045, "learning_rate": 0.0005741657077100115, "loss": 0.3419, "step": 247810 }, { "epoch": 71.2945914844649, "grad_norm": 1.6075592041015625, "learning_rate": 0.000574108170310702, "loss": 0.4219, "step": 247820 }, { "epoch": 71.29746835443038, "grad_norm": 1.2574384212493896, "learning_rate": 0.0005740506329113923, "loss": 0.3208, "step": 247830 }, { "epoch": 71.30034522439585, "grad_norm": 0.9597794413566589, "learning_rate": 0.0005739930955120829, "loss": 0.3998, "step": 247840 }, { "epoch": 71.30322209436133, "grad_norm": 1.5030089616775513, "learning_rate": 0.0005739355581127734, "loss": 0.4194, "step": 247850 }, { "epoch": 71.30609896432681, "grad_norm": 1.6726027727127075, "learning_rate": 0.0005738780207134637, "loss": 0.343, "step": 247860 }, { "epoch": 71.30897583429228, "grad_norm": 1.1472644805908203, "learning_rate": 0.0005738204833141543, "loss": 0.4807, "step": 247870 }, { "epoch": 71.31185270425777, "grad_norm": 0.9856356978416443, "learning_rate": 0.0005737629459148447, "loss": 0.341, "step": 247880 }, { "epoch": 71.31472957422325, "grad_norm": 0.8866696357727051, "learning_rate": 0.0005737054085155351, "loss": 0.3654, "step": 247890 }, { "epoch": 71.31760644418873, "grad_norm": 0.6889585852622986, "learning_rate": 0.0005736478711162255, "loss": 0.4103, "step": 247900 }, { "epoch": 71.3204833141542, "grad_norm": 1.0736011266708374, "learning_rate": 0.0005735903337169161, "loss": 0.4482, "step": 247910 }, { "epoch": 71.32336018411968, "grad_norm": 0.9192661643028259, "learning_rate": 0.0005735327963176064, "loss": 0.4119, "step": 247920 }, { "epoch": 71.32623705408515, "grad_norm": 2.342417001724243, "learning_rate": 0.0005734752589182969, "loss": 0.3912, "step": 247930 }, { "epoch": 71.32911392405063, "grad_norm": 1.206714153289795, "learning_rate": 0.0005734177215189874, "loss": 0.3701, "step": 247940 }, { "epoch": 71.33199079401611, "grad_norm": 0.8328553438186646, "learning_rate": 0.0005733601841196778, "loss": 0.3851, "step": 247950 }, { "epoch": 71.33486766398158, "grad_norm": 1.276867389678955, "learning_rate": 0.0005733026467203683, "loss": 0.3711, "step": 247960 }, { "epoch": 71.33774453394706, "grad_norm": 1.2361652851104736, "learning_rate": 0.0005732451093210587, "loss": 0.3522, "step": 247970 }, { "epoch": 71.34062140391255, "grad_norm": 1.554071068763733, "learning_rate": 0.0005731875719217492, "loss": 0.4848, "step": 247980 }, { "epoch": 71.34349827387803, "grad_norm": 1.313865303993225, "learning_rate": 0.0005731300345224396, "loss": 0.4732, "step": 247990 }, { "epoch": 71.3463751438435, "grad_norm": 1.1687692403793335, "learning_rate": 0.00057307249712313, "loss": 0.3399, "step": 248000 }, { "epoch": 71.34925201380898, "grad_norm": 1.0548133850097656, "learning_rate": 0.0005730149597238204, "loss": 0.4509, "step": 248010 }, { "epoch": 71.35212888377445, "grad_norm": 1.2519633769989014, "learning_rate": 0.000572957422324511, "loss": 0.4001, "step": 248020 }, { "epoch": 71.35500575373993, "grad_norm": 1.0822844505310059, "learning_rate": 0.0005728998849252013, "loss": 0.3542, "step": 248030 }, { "epoch": 71.35788262370541, "grad_norm": 1.8834424018859863, "learning_rate": 0.0005728423475258918, "loss": 0.4263, "step": 248040 }, { "epoch": 71.36075949367088, "grad_norm": 1.501975417137146, "learning_rate": 0.0005727848101265824, "loss": 0.4343, "step": 248050 }, { "epoch": 71.36363636363636, "grad_norm": 1.0399855375289917, "learning_rate": 0.0005727272727272727, "loss": 0.3909, "step": 248060 }, { "epoch": 71.36651323360184, "grad_norm": 0.989585280418396, "learning_rate": 0.0005726697353279632, "loss": 0.4591, "step": 248070 }, { "epoch": 71.36939010356731, "grad_norm": 0.7850139141082764, "learning_rate": 0.0005726121979286536, "loss": 0.505, "step": 248080 }, { "epoch": 71.3722669735328, "grad_norm": 2.496279716491699, "learning_rate": 0.0005725546605293441, "loss": 0.3514, "step": 248090 }, { "epoch": 71.37514384349828, "grad_norm": 0.8245763778686523, "learning_rate": 0.0005724971231300345, "loss": 0.4129, "step": 248100 }, { "epoch": 71.37802071346375, "grad_norm": 1.6599376201629639, "learning_rate": 0.000572439585730725, "loss": 0.4031, "step": 248110 }, { "epoch": 71.38089758342923, "grad_norm": 1.5447555780410767, "learning_rate": 0.0005723820483314153, "loss": 0.4008, "step": 248120 }, { "epoch": 71.38377445339471, "grad_norm": 1.1481614112854004, "learning_rate": 0.0005723245109321059, "loss": 0.4168, "step": 248130 }, { "epoch": 71.38665132336018, "grad_norm": 1.1810730695724487, "learning_rate": 0.0005722669735327964, "loss": 0.316, "step": 248140 }, { "epoch": 71.38952819332566, "grad_norm": 1.270753264427185, "learning_rate": 0.0005722094361334867, "loss": 0.4107, "step": 248150 }, { "epoch": 71.39240506329114, "grad_norm": 1.184842824935913, "learning_rate": 0.0005721518987341773, "loss": 0.3756, "step": 248160 }, { "epoch": 71.39528193325661, "grad_norm": 1.7505345344543457, "learning_rate": 0.0005720943613348677, "loss": 0.4362, "step": 248170 }, { "epoch": 71.39815880322209, "grad_norm": 0.8876793384552002, "learning_rate": 0.0005720368239355581, "loss": 0.3713, "step": 248180 }, { "epoch": 71.40103567318758, "grad_norm": 1.0108611583709717, "learning_rate": 0.0005719792865362485, "loss": 0.4085, "step": 248190 }, { "epoch": 71.40391254315306, "grad_norm": 1.636147379875183, "learning_rate": 0.0005719217491369391, "loss": 0.3688, "step": 248200 }, { "epoch": 71.40678941311853, "grad_norm": 1.1342672109603882, "learning_rate": 0.0005718642117376294, "loss": 0.4126, "step": 248210 }, { "epoch": 71.40966628308401, "grad_norm": 0.49600639939308167, "learning_rate": 0.0005718066743383199, "loss": 0.3417, "step": 248220 }, { "epoch": 71.41254315304948, "grad_norm": 0.7794746160507202, "learning_rate": 0.0005717491369390103, "loss": 0.3461, "step": 248230 }, { "epoch": 71.41542002301496, "grad_norm": 1.4120776653289795, "learning_rate": 0.0005716915995397008, "loss": 0.3966, "step": 248240 }, { "epoch": 71.41829689298044, "grad_norm": 2.2501256465911865, "learning_rate": 0.0005716340621403913, "loss": 0.4104, "step": 248250 }, { "epoch": 71.42117376294591, "grad_norm": 1.7755253314971924, "learning_rate": 0.0005715765247410817, "loss": 0.426, "step": 248260 }, { "epoch": 71.42405063291139, "grad_norm": 2.021059274673462, "learning_rate": 0.0005715189873417722, "loss": 0.4375, "step": 248270 }, { "epoch": 71.42692750287686, "grad_norm": 1.7352075576782227, "learning_rate": 0.0005714614499424626, "loss": 0.5081, "step": 248280 }, { "epoch": 71.42980437284234, "grad_norm": 1.608851671218872, "learning_rate": 0.0005714039125431531, "loss": 0.3979, "step": 248290 }, { "epoch": 71.43268124280783, "grad_norm": 0.8806906938552856, "learning_rate": 0.0005713463751438434, "loss": 0.3698, "step": 248300 }, { "epoch": 71.43555811277331, "grad_norm": 0.5553126335144043, "learning_rate": 0.000571288837744534, "loss": 0.3495, "step": 248310 }, { "epoch": 71.43843498273878, "grad_norm": 0.6093589067459106, "learning_rate": 0.0005712313003452244, "loss": 0.4663, "step": 248320 }, { "epoch": 71.44131185270426, "grad_norm": 1.3709701299667358, "learning_rate": 0.0005711737629459148, "loss": 0.4009, "step": 248330 }, { "epoch": 71.44418872266974, "grad_norm": 2.276184558868408, "learning_rate": 0.0005711162255466052, "loss": 0.4644, "step": 248340 }, { "epoch": 71.44706559263521, "grad_norm": 0.85378497838974, "learning_rate": 0.0005710586881472958, "loss": 0.4653, "step": 248350 }, { "epoch": 71.44994246260069, "grad_norm": 1.7709321975708008, "learning_rate": 0.0005710011507479862, "loss": 0.3798, "step": 248360 }, { "epoch": 71.45281933256616, "grad_norm": 1.8455345630645752, "learning_rate": 0.0005709436133486766, "loss": 0.4458, "step": 248370 }, { "epoch": 71.45569620253164, "grad_norm": 0.9818021655082703, "learning_rate": 0.0005708860759493672, "loss": 0.3569, "step": 248380 }, { "epoch": 71.45857307249712, "grad_norm": 1.4917336702346802, "learning_rate": 0.0005708285385500575, "loss": 0.4173, "step": 248390 }, { "epoch": 71.46144994246261, "grad_norm": 2.2287936210632324, "learning_rate": 0.000570771001150748, "loss": 0.4628, "step": 248400 }, { "epoch": 71.46432681242808, "grad_norm": 0.7466667890548706, "learning_rate": 0.0005707134637514384, "loss": 0.4543, "step": 248410 }, { "epoch": 71.46720368239356, "grad_norm": 0.9758403301239014, "learning_rate": 0.0005706559263521289, "loss": 0.3326, "step": 248420 }, { "epoch": 71.47008055235904, "grad_norm": 2.55930757522583, "learning_rate": 0.0005705983889528193, "loss": 0.5256, "step": 248430 }, { "epoch": 71.47295742232451, "grad_norm": 0.6868115067481995, "learning_rate": 0.0005705408515535098, "loss": 0.3672, "step": 248440 }, { "epoch": 71.47583429228999, "grad_norm": 1.0817484855651855, "learning_rate": 0.0005704833141542003, "loss": 0.3652, "step": 248450 }, { "epoch": 71.47871116225546, "grad_norm": 0.8618354201316833, "learning_rate": 0.0005704257767548907, "loss": 0.3685, "step": 248460 }, { "epoch": 71.48158803222094, "grad_norm": 2.1943814754486084, "learning_rate": 0.0005703682393555812, "loss": 0.459, "step": 248470 }, { "epoch": 71.48446490218642, "grad_norm": 1.5161192417144775, "learning_rate": 0.0005703107019562715, "loss": 0.4297, "step": 248480 }, { "epoch": 71.4873417721519, "grad_norm": 1.078869342803955, "learning_rate": 0.0005702531645569621, "loss": 0.4874, "step": 248490 }, { "epoch": 71.49021864211737, "grad_norm": 0.6062290072441101, "learning_rate": 0.0005701956271576525, "loss": 0.3567, "step": 248500 }, { "epoch": 71.49309551208286, "grad_norm": 0.8008736371994019, "learning_rate": 0.0005701380897583429, "loss": 0.3517, "step": 248510 }, { "epoch": 71.49597238204834, "grad_norm": 1.0022153854370117, "learning_rate": 0.0005700805523590333, "loss": 0.4579, "step": 248520 }, { "epoch": 71.49884925201381, "grad_norm": 1.0988562107086182, "learning_rate": 0.0005700230149597239, "loss": 0.4446, "step": 248530 }, { "epoch": 71.50172612197929, "grad_norm": 0.6702184081077576, "learning_rate": 0.0005699654775604143, "loss": 0.3769, "step": 248540 }, { "epoch": 71.50460299194476, "grad_norm": 1.0172522068023682, "learning_rate": 0.0005699079401611047, "loss": 0.357, "step": 248550 }, { "epoch": 71.50747986191024, "grad_norm": 1.6374880075454712, "learning_rate": 0.0005698504027617953, "loss": 0.3366, "step": 248560 }, { "epoch": 71.51035673187572, "grad_norm": 1.1524481773376465, "learning_rate": 0.0005697928653624856, "loss": 0.482, "step": 248570 }, { "epoch": 71.5132336018412, "grad_norm": 1.1387454271316528, "learning_rate": 0.0005697353279631761, "loss": 0.528, "step": 248580 }, { "epoch": 71.51611047180667, "grad_norm": 1.114646077156067, "learning_rate": 0.0005696777905638665, "loss": 0.3431, "step": 248590 }, { "epoch": 71.51898734177215, "grad_norm": 0.8632916808128357, "learning_rate": 0.000569620253164557, "loss": 0.3836, "step": 248600 }, { "epoch": 71.52186421173764, "grad_norm": 0.993900716304779, "learning_rate": 0.0005695627157652474, "loss": 0.3111, "step": 248610 }, { "epoch": 71.52474108170311, "grad_norm": 1.8470191955566406, "learning_rate": 0.0005695051783659379, "loss": 0.4199, "step": 248620 }, { "epoch": 71.52761795166859, "grad_norm": 0.7561227083206177, "learning_rate": 0.0005694476409666282, "loss": 0.3591, "step": 248630 }, { "epoch": 71.53049482163406, "grad_norm": 1.0112265348434448, "learning_rate": 0.0005693901035673188, "loss": 0.4011, "step": 248640 }, { "epoch": 71.53337169159954, "grad_norm": 1.8699432611465454, "learning_rate": 0.0005693325661680093, "loss": 0.4211, "step": 248650 }, { "epoch": 71.53624856156502, "grad_norm": 1.4076483249664307, "learning_rate": 0.0005692750287686996, "loss": 0.5159, "step": 248660 }, { "epoch": 71.5391254315305, "grad_norm": 2.1912405490875244, "learning_rate": 0.0005692174913693902, "loss": 0.4646, "step": 248670 }, { "epoch": 71.54200230149597, "grad_norm": 1.206099271774292, "learning_rate": 0.0005691599539700806, "loss": 0.4342, "step": 248680 }, { "epoch": 71.54487917146145, "grad_norm": 0.8275719285011292, "learning_rate": 0.000569102416570771, "loss": 0.3888, "step": 248690 }, { "epoch": 71.54775604142692, "grad_norm": 1.3857237100601196, "learning_rate": 0.0005690448791714614, "loss": 0.4044, "step": 248700 }, { "epoch": 71.5506329113924, "grad_norm": 1.1225669384002686, "learning_rate": 0.000568987341772152, "loss": 0.4039, "step": 248710 }, { "epoch": 71.55350978135789, "grad_norm": 2.2535884380340576, "learning_rate": 0.0005689298043728423, "loss": 0.4575, "step": 248720 }, { "epoch": 71.55638665132336, "grad_norm": 1.917206048965454, "learning_rate": 0.0005688722669735328, "loss": 0.4118, "step": 248730 }, { "epoch": 71.55926352128884, "grad_norm": 1.0889438390731812, "learning_rate": 0.0005688147295742234, "loss": 0.3975, "step": 248740 }, { "epoch": 71.56214039125432, "grad_norm": 1.4395736455917358, "learning_rate": 0.0005687571921749137, "loss": 0.3125, "step": 248750 }, { "epoch": 71.5650172612198, "grad_norm": 1.3575725555419922, "learning_rate": 0.0005686996547756042, "loss": 0.4895, "step": 248760 }, { "epoch": 71.56789413118527, "grad_norm": 1.007210612297058, "learning_rate": 0.0005686421173762946, "loss": 0.3875, "step": 248770 }, { "epoch": 71.57077100115075, "grad_norm": 1.8557624816894531, "learning_rate": 0.0005685845799769851, "loss": 0.4544, "step": 248780 }, { "epoch": 71.57364787111622, "grad_norm": 1.193734049797058, "learning_rate": 0.0005685270425776755, "loss": 0.4242, "step": 248790 }, { "epoch": 71.5765247410817, "grad_norm": 1.679890513420105, "learning_rate": 0.000568469505178366, "loss": 0.521, "step": 248800 }, { "epoch": 71.57940161104717, "grad_norm": 1.6280666589736938, "learning_rate": 0.0005684119677790563, "loss": 0.3431, "step": 248810 }, { "epoch": 71.58227848101266, "grad_norm": 1.30037260055542, "learning_rate": 0.0005683544303797469, "loss": 0.4292, "step": 248820 }, { "epoch": 71.58515535097814, "grad_norm": 1.1900442838668823, "learning_rate": 0.0005682968929804372, "loss": 0.4522, "step": 248830 }, { "epoch": 71.58803222094362, "grad_norm": 1.884982943534851, "learning_rate": 0.0005682393555811277, "loss": 0.4546, "step": 248840 }, { "epoch": 71.5909090909091, "grad_norm": 3.1046996116638184, "learning_rate": 0.0005681818181818183, "loss": 0.4093, "step": 248850 }, { "epoch": 71.59378596087457, "grad_norm": 1.5259405374526978, "learning_rate": 0.0005681242807825086, "loss": 0.3861, "step": 248860 }, { "epoch": 71.59666283084005, "grad_norm": 0.925244152545929, "learning_rate": 0.0005680667433831991, "loss": 0.3262, "step": 248870 }, { "epoch": 71.59953970080552, "grad_norm": 0.8037128448486328, "learning_rate": 0.0005680092059838895, "loss": 0.3417, "step": 248880 }, { "epoch": 71.602416570771, "grad_norm": 1.0237102508544922, "learning_rate": 0.00056795166858458, "loss": 0.388, "step": 248890 }, { "epoch": 71.60529344073647, "grad_norm": 1.3928600549697876, "learning_rate": 0.0005678941311852704, "loss": 0.4542, "step": 248900 }, { "epoch": 71.60817031070195, "grad_norm": 3.392822027206421, "learning_rate": 0.0005678365937859609, "loss": 0.4065, "step": 248910 }, { "epoch": 71.61104718066743, "grad_norm": 0.8345196843147278, "learning_rate": 0.0005677790563866512, "loss": 0.3519, "step": 248920 }, { "epoch": 71.61392405063292, "grad_norm": 1.3186362981796265, "learning_rate": 0.0005677215189873418, "loss": 0.4184, "step": 248930 }, { "epoch": 71.6168009205984, "grad_norm": 0.9046262502670288, "learning_rate": 0.0005676639815880323, "loss": 0.359, "step": 248940 }, { "epoch": 71.61967779056387, "grad_norm": 1.5895713567733765, "learning_rate": 0.0005676064441887226, "loss": 0.4671, "step": 248950 }, { "epoch": 71.62255466052935, "grad_norm": 1.4053294658660889, "learning_rate": 0.0005675489067894132, "loss": 0.3937, "step": 248960 }, { "epoch": 71.62543153049482, "grad_norm": 1.3266041278839111, "learning_rate": 0.0005674913693901036, "loss": 0.4766, "step": 248970 }, { "epoch": 71.6283084004603, "grad_norm": 1.3969696760177612, "learning_rate": 0.000567433831990794, "loss": 0.3823, "step": 248980 }, { "epoch": 71.63118527042577, "grad_norm": 1.6237120628356934, "learning_rate": 0.0005673762945914844, "loss": 0.5141, "step": 248990 }, { "epoch": 71.63406214039125, "grad_norm": 2.105827569961548, "learning_rate": 0.000567318757192175, "loss": 0.4824, "step": 249000 }, { "epoch": 71.63693901035673, "grad_norm": 1.2026617527008057, "learning_rate": 0.0005672612197928653, "loss": 0.4876, "step": 249010 }, { "epoch": 71.6398158803222, "grad_norm": 1.0999454259872437, "learning_rate": 0.0005672036823935558, "loss": 0.3361, "step": 249020 }, { "epoch": 71.6426927502877, "grad_norm": 1.3624261617660522, "learning_rate": 0.0005671461449942464, "loss": 0.352, "step": 249030 }, { "epoch": 71.64556962025317, "grad_norm": 0.8121634125709534, "learning_rate": 0.0005670886075949367, "loss": 0.4165, "step": 249040 }, { "epoch": 71.64844649021865, "grad_norm": 0.8410154581069946, "learning_rate": 0.0005670310701956272, "loss": 0.4495, "step": 249050 }, { "epoch": 71.65132336018412, "grad_norm": 1.2248430252075195, "learning_rate": 0.0005669735327963176, "loss": 0.4999, "step": 249060 }, { "epoch": 71.6542002301496, "grad_norm": 1.1086891889572144, "learning_rate": 0.0005669159953970081, "loss": 0.5424, "step": 249070 }, { "epoch": 71.65707710011507, "grad_norm": 1.2376387119293213, "learning_rate": 0.0005668584579976985, "loss": 0.4102, "step": 249080 }, { "epoch": 71.65995397008055, "grad_norm": 0.7298771739006042, "learning_rate": 0.000566800920598389, "loss": 0.347, "step": 249090 }, { "epoch": 71.66283084004603, "grad_norm": 1.5662024021148682, "learning_rate": 0.0005667433831990793, "loss": 0.3609, "step": 249100 }, { "epoch": 71.6657077100115, "grad_norm": 1.8662532567977905, "learning_rate": 0.0005666858457997699, "loss": 0.4288, "step": 249110 }, { "epoch": 71.66858457997698, "grad_norm": 1.1891676187515259, "learning_rate": 0.0005666283084004603, "loss": 0.3589, "step": 249120 }, { "epoch": 71.67146144994246, "grad_norm": 1.65716552734375, "learning_rate": 0.0005665707710011507, "loss": 0.4329, "step": 249130 }, { "epoch": 71.67433831990795, "grad_norm": 1.7379165887832642, "learning_rate": 0.0005665132336018413, "loss": 0.3498, "step": 249140 }, { "epoch": 71.67721518987342, "grad_norm": 0.9334935545921326, "learning_rate": 0.0005664556962025317, "loss": 0.3849, "step": 249150 }, { "epoch": 71.6800920598389, "grad_norm": 1.085630178451538, "learning_rate": 0.0005663981588032221, "loss": 0.384, "step": 249160 }, { "epoch": 71.68296892980437, "grad_norm": 1.3270018100738525, "learning_rate": 0.0005663406214039125, "loss": 0.3246, "step": 249170 }, { "epoch": 71.68584579976985, "grad_norm": 0.9813277721405029, "learning_rate": 0.0005662830840046031, "loss": 0.4412, "step": 249180 }, { "epoch": 71.68872266973533, "grad_norm": 1.042711615562439, "learning_rate": 0.0005662255466052934, "loss": 0.4371, "step": 249190 }, { "epoch": 71.6915995397008, "grad_norm": 1.1442924737930298, "learning_rate": 0.0005661680092059839, "loss": 0.4713, "step": 249200 }, { "epoch": 71.69447640966628, "grad_norm": 1.125529408454895, "learning_rate": 0.0005661104718066743, "loss": 0.4187, "step": 249210 }, { "epoch": 71.69735327963176, "grad_norm": 1.7966102361679077, "learning_rate": 0.0005660529344073648, "loss": 0.4759, "step": 249220 }, { "epoch": 71.70023014959723, "grad_norm": 0.5799820423126221, "learning_rate": 0.0005659953970080552, "loss": 0.3682, "step": 249230 }, { "epoch": 71.70310701956272, "grad_norm": 2.2183451652526855, "learning_rate": 0.0005659378596087457, "loss": 0.425, "step": 249240 }, { "epoch": 71.7059838895282, "grad_norm": 1.6949493885040283, "learning_rate": 0.0005658803222094362, "loss": 0.5126, "step": 249250 }, { "epoch": 71.70886075949367, "grad_norm": 1.112776279449463, "learning_rate": 0.0005658227848101266, "loss": 0.3585, "step": 249260 }, { "epoch": 71.71173762945915, "grad_norm": 0.7835490107536316, "learning_rate": 0.0005657652474108171, "loss": 0.4591, "step": 249270 }, { "epoch": 71.71461449942463, "grad_norm": 1.197435736656189, "learning_rate": 0.0005657077100115074, "loss": 0.3849, "step": 249280 }, { "epoch": 71.7174913693901, "grad_norm": 1.2515709400177002, "learning_rate": 0.000565650172612198, "loss": 0.3488, "step": 249290 }, { "epoch": 71.72036823935558, "grad_norm": 1.2328498363494873, "learning_rate": 0.0005655926352128884, "loss": 0.5337, "step": 249300 }, { "epoch": 71.72324510932106, "grad_norm": 1.4166009426116943, "learning_rate": 0.0005655350978135788, "loss": 0.3941, "step": 249310 }, { "epoch": 71.72612197928653, "grad_norm": 1.802520513534546, "learning_rate": 0.0005654775604142692, "loss": 0.3836, "step": 249320 }, { "epoch": 71.72899884925201, "grad_norm": 2.2112233638763428, "learning_rate": 0.0005654200230149598, "loss": 0.3724, "step": 249330 }, { "epoch": 71.7318757192175, "grad_norm": 1.3619307279586792, "learning_rate": 0.0005653624856156501, "loss": 0.3947, "step": 249340 }, { "epoch": 71.73475258918297, "grad_norm": 1.3886290788650513, "learning_rate": 0.0005653049482163406, "loss": 0.3941, "step": 249350 }, { "epoch": 71.73762945914845, "grad_norm": 0.9521808624267578, "learning_rate": 0.0005652474108170312, "loss": 0.4072, "step": 249360 }, { "epoch": 71.74050632911393, "grad_norm": 0.8397759795188904, "learning_rate": 0.0005651898734177215, "loss": 0.4442, "step": 249370 }, { "epoch": 71.7433831990794, "grad_norm": 2.2238070964813232, "learning_rate": 0.000565132336018412, "loss": 0.4416, "step": 249380 }, { "epoch": 71.74626006904488, "grad_norm": 0.855738639831543, "learning_rate": 0.0005650747986191024, "loss": 0.4965, "step": 249390 }, { "epoch": 71.74913693901036, "grad_norm": 1.4864782094955444, "learning_rate": 0.0005650172612197929, "loss": 0.3836, "step": 249400 }, { "epoch": 71.75201380897583, "grad_norm": 1.1016836166381836, "learning_rate": 0.0005649597238204833, "loss": 0.4572, "step": 249410 }, { "epoch": 71.75489067894131, "grad_norm": 0.9007818698883057, "learning_rate": 0.0005649021864211738, "loss": 0.3665, "step": 249420 }, { "epoch": 71.75776754890678, "grad_norm": 1.2910206317901611, "learning_rate": 0.0005648446490218642, "loss": 0.3034, "step": 249430 }, { "epoch": 71.76064441887226, "grad_norm": 1.273369312286377, "learning_rate": 0.0005647871116225547, "loss": 0.4967, "step": 249440 }, { "epoch": 71.76352128883775, "grad_norm": 0.7752823829650879, "learning_rate": 0.0005647295742232452, "loss": 0.3796, "step": 249450 }, { "epoch": 71.76639815880323, "grad_norm": 1.0993601083755493, "learning_rate": 0.0005646720368239355, "loss": 0.4086, "step": 249460 }, { "epoch": 71.7692750287687, "grad_norm": 1.159473180770874, "learning_rate": 0.0005646144994246261, "loss": 0.4196, "step": 249470 }, { "epoch": 71.77215189873418, "grad_norm": 1.7701505422592163, "learning_rate": 0.0005645569620253165, "loss": 0.4185, "step": 249480 }, { "epoch": 71.77502876869966, "grad_norm": 0.6326084733009338, "learning_rate": 0.0005644994246260069, "loss": 0.3578, "step": 249490 }, { "epoch": 71.77790563866513, "grad_norm": 0.9278303980827332, "learning_rate": 0.0005644418872266973, "loss": 0.4529, "step": 249500 }, { "epoch": 71.78078250863061, "grad_norm": 1.5758908987045288, "learning_rate": 0.0005643843498273879, "loss": 0.4894, "step": 249510 }, { "epoch": 71.78365937859608, "grad_norm": 1.0245901346206665, "learning_rate": 0.0005643268124280782, "loss": 0.3638, "step": 249520 }, { "epoch": 71.78653624856156, "grad_norm": 1.518638253211975, "learning_rate": 0.0005642692750287687, "loss": 0.4489, "step": 249530 }, { "epoch": 71.78941311852704, "grad_norm": 2.350574254989624, "learning_rate": 0.0005642117376294593, "loss": 0.3822, "step": 249540 }, { "epoch": 71.79228998849253, "grad_norm": 1.452723741531372, "learning_rate": 0.0005641542002301496, "loss": 0.4957, "step": 249550 }, { "epoch": 71.795166858458, "grad_norm": 1.0862098932266235, "learning_rate": 0.0005640966628308401, "loss": 0.4505, "step": 249560 }, { "epoch": 71.79804372842348, "grad_norm": 1.1458649635314941, "learning_rate": 0.0005640391254315305, "loss": 0.3521, "step": 249570 }, { "epoch": 71.80092059838896, "grad_norm": 1.0617034435272217, "learning_rate": 0.000563981588032221, "loss": 0.3137, "step": 249580 }, { "epoch": 71.80379746835443, "grad_norm": 2.2065317630767822, "learning_rate": 0.0005639240506329114, "loss": 0.4857, "step": 249590 }, { "epoch": 71.80667433831991, "grad_norm": 1.3027541637420654, "learning_rate": 0.0005638665132336019, "loss": 0.4056, "step": 249600 }, { "epoch": 71.80955120828538, "grad_norm": 0.998615562915802, "learning_rate": 0.0005638089758342922, "loss": 0.4124, "step": 249610 }, { "epoch": 71.81242807825086, "grad_norm": 0.5613915920257568, "learning_rate": 0.0005637514384349828, "loss": 0.4101, "step": 249620 }, { "epoch": 71.81530494821634, "grad_norm": 0.9510345458984375, "learning_rate": 0.0005636939010356732, "loss": 0.452, "step": 249630 }, { "epoch": 71.81818181818181, "grad_norm": 1.072166085243225, "learning_rate": 0.0005636363636363636, "loss": 0.3804, "step": 249640 }, { "epoch": 71.82105868814729, "grad_norm": 1.6036298274993896, "learning_rate": 0.0005635788262370542, "loss": 0.414, "step": 249650 }, { "epoch": 71.82393555811278, "grad_norm": 1.6529483795166016, "learning_rate": 0.0005635212888377445, "loss": 0.4334, "step": 249660 }, { "epoch": 71.82681242807826, "grad_norm": 1.2757209539413452, "learning_rate": 0.000563463751438435, "loss": 0.4274, "step": 249670 }, { "epoch": 71.82968929804373, "grad_norm": 1.6996349096298218, "learning_rate": 0.0005634062140391254, "loss": 0.3267, "step": 249680 }, { "epoch": 71.83256616800921, "grad_norm": 0.8790378570556641, "learning_rate": 0.0005633486766398159, "loss": 0.4911, "step": 249690 }, { "epoch": 71.83544303797468, "grad_norm": 1.3955496549606323, "learning_rate": 0.0005632911392405063, "loss": 0.3642, "step": 249700 }, { "epoch": 71.83831990794016, "grad_norm": 0.6860778331756592, "learning_rate": 0.0005632336018411968, "loss": 0.3559, "step": 249710 }, { "epoch": 71.84119677790564, "grad_norm": 2.442000389099121, "learning_rate": 0.0005631760644418872, "loss": 0.5113, "step": 249720 }, { "epoch": 71.84407364787111, "grad_norm": 1.2113254070281982, "learning_rate": 0.0005631185270425777, "loss": 0.4656, "step": 249730 }, { "epoch": 71.84695051783659, "grad_norm": 1.78351628780365, "learning_rate": 0.0005630609896432682, "loss": 0.4237, "step": 249740 }, { "epoch": 71.84982738780207, "grad_norm": 1.9453449249267578, "learning_rate": 0.0005630034522439585, "loss": 0.4102, "step": 249750 }, { "epoch": 71.85270425776756, "grad_norm": 0.9899599552154541, "learning_rate": 0.0005629459148446491, "loss": 0.3832, "step": 249760 }, { "epoch": 71.85558112773303, "grad_norm": 0.6906133890151978, "learning_rate": 0.0005628883774453395, "loss": 0.3558, "step": 249770 }, { "epoch": 71.85845799769851, "grad_norm": 1.6513539552688599, "learning_rate": 0.0005628308400460299, "loss": 0.4133, "step": 249780 }, { "epoch": 71.86133486766398, "grad_norm": 1.2577288150787354, "learning_rate": 0.0005627733026467203, "loss": 0.5819, "step": 249790 }, { "epoch": 71.86421173762946, "grad_norm": 0.9216933846473694, "learning_rate": 0.0005627157652474109, "loss": 0.5717, "step": 249800 }, { "epoch": 71.86708860759494, "grad_norm": 1.1498467922210693, "learning_rate": 0.0005626582278481012, "loss": 0.3174, "step": 249810 }, { "epoch": 71.86996547756041, "grad_norm": 1.9491709470748901, "learning_rate": 0.0005626006904487917, "loss": 0.44, "step": 249820 }, { "epoch": 71.87284234752589, "grad_norm": 0.797960102558136, "learning_rate": 0.0005625431530494823, "loss": 0.4048, "step": 249830 }, { "epoch": 71.87571921749137, "grad_norm": 1.1520287990570068, "learning_rate": 0.0005624856156501726, "loss": 0.4864, "step": 249840 }, { "epoch": 71.87859608745684, "grad_norm": 1.8027843236923218, "learning_rate": 0.000562428078250863, "loss": 0.3924, "step": 249850 }, { "epoch": 71.88147295742232, "grad_norm": 1.4399374723434448, "learning_rate": 0.0005623705408515535, "loss": 0.4532, "step": 249860 }, { "epoch": 71.88434982738781, "grad_norm": 2.520369052886963, "learning_rate": 0.000562313003452244, "loss": 0.5595, "step": 249870 }, { "epoch": 71.88722669735328, "grad_norm": 1.8431037664413452, "learning_rate": 0.0005622554660529344, "loss": 0.3963, "step": 249880 }, { "epoch": 71.89010356731876, "grad_norm": 1.89223313331604, "learning_rate": 0.0005621979286536249, "loss": 0.4136, "step": 249890 }, { "epoch": 71.89298043728424, "grad_norm": 2.1361160278320312, "learning_rate": 0.0005621403912543152, "loss": 0.5048, "step": 249900 }, { "epoch": 71.89585730724971, "grad_norm": 0.9875586628913879, "learning_rate": 0.0005620828538550058, "loss": 0.3256, "step": 249910 }, { "epoch": 71.89873417721519, "grad_norm": 0.7459840774536133, "learning_rate": 0.0005620253164556962, "loss": 0.3652, "step": 249920 }, { "epoch": 71.90161104718067, "grad_norm": 1.7782841920852661, "learning_rate": 0.0005619677790563866, "loss": 0.4226, "step": 249930 }, { "epoch": 71.90448791714614, "grad_norm": 1.2268719673156738, "learning_rate": 0.0005619102416570772, "loss": 0.3649, "step": 249940 }, { "epoch": 71.90736478711162, "grad_norm": 0.8411802649497986, "learning_rate": 0.0005618527042577676, "loss": 0.4644, "step": 249950 }, { "epoch": 71.9102416570771, "grad_norm": 1.6006708145141602, "learning_rate": 0.000561795166858458, "loss": 0.4647, "step": 249960 }, { "epoch": 71.91311852704258, "grad_norm": 0.4887147843837738, "learning_rate": 0.0005617376294591484, "loss": 0.3889, "step": 249970 }, { "epoch": 71.91599539700806, "grad_norm": 1.7111685276031494, "learning_rate": 0.000561680092059839, "loss": 0.5124, "step": 249980 }, { "epoch": 71.91887226697354, "grad_norm": 0.9100858569145203, "learning_rate": 0.0005616225546605293, "loss": 0.372, "step": 249990 }, { "epoch": 71.92174913693901, "grad_norm": 1.6422860622406006, "learning_rate": 0.0005615650172612198, "loss": 0.392, "step": 250000 }, { "epoch": 71.92462600690449, "grad_norm": 0.7475031614303589, "learning_rate": 0.0005615074798619103, "loss": 0.3955, "step": 250010 }, { "epoch": 71.92750287686997, "grad_norm": 1.9702850580215454, "learning_rate": 0.0005614499424626007, "loss": 0.4205, "step": 250020 }, { "epoch": 71.93037974683544, "grad_norm": 0.8908954858779907, "learning_rate": 0.0005613924050632911, "loss": 0.3957, "step": 250030 }, { "epoch": 71.93325661680092, "grad_norm": 1.1366411447525024, "learning_rate": 0.0005613348676639816, "loss": 0.431, "step": 250040 }, { "epoch": 71.9361334867664, "grad_norm": 1.593570590019226, "learning_rate": 0.0005612773302646721, "loss": 0.4939, "step": 250050 }, { "epoch": 71.93901035673187, "grad_norm": 0.9670520424842834, "learning_rate": 0.0005612197928653625, "loss": 0.4263, "step": 250060 }, { "epoch": 71.94188722669735, "grad_norm": 1.2382421493530273, "learning_rate": 0.000561162255466053, "loss": 0.3797, "step": 250070 }, { "epoch": 71.94476409666284, "grad_norm": 1.3132189512252808, "learning_rate": 0.0005611047180667433, "loss": 0.4261, "step": 250080 }, { "epoch": 71.94764096662831, "grad_norm": 1.4226137399673462, "learning_rate": 0.0005610471806674339, "loss": 0.4583, "step": 250090 }, { "epoch": 71.95051783659379, "grad_norm": 1.863277792930603, "learning_rate": 0.0005609896432681243, "loss": 0.4045, "step": 250100 }, { "epoch": 71.95339470655927, "grad_norm": 1.5701038837432861, "learning_rate": 0.0005609321058688147, "loss": 0.6028, "step": 250110 }, { "epoch": 71.95627157652474, "grad_norm": 1.5701712369918823, "learning_rate": 0.0005608745684695052, "loss": 0.3918, "step": 250120 }, { "epoch": 71.95914844649022, "grad_norm": 2.9767351150512695, "learning_rate": 0.0005608170310701957, "loss": 0.5179, "step": 250130 }, { "epoch": 71.9620253164557, "grad_norm": 0.9236979484558105, "learning_rate": 0.000560759493670886, "loss": 0.4445, "step": 250140 }, { "epoch": 71.96490218642117, "grad_norm": 1.5071866512298584, "learning_rate": 0.0005607019562715765, "loss": 0.4636, "step": 250150 }, { "epoch": 71.96777905638665, "grad_norm": 1.6993306875228882, "learning_rate": 0.0005606444188722671, "loss": 0.4989, "step": 250160 }, { "epoch": 71.97065592635212, "grad_norm": 1.8633413314819336, "learning_rate": 0.0005605868814729574, "loss": 0.3457, "step": 250170 }, { "epoch": 71.97353279631761, "grad_norm": 0.7333221435546875, "learning_rate": 0.0005605293440736479, "loss": 0.4589, "step": 250180 }, { "epoch": 71.97640966628309, "grad_norm": 1.1293253898620605, "learning_rate": 0.0005604718066743383, "loss": 0.3427, "step": 250190 }, { "epoch": 71.97928653624857, "grad_norm": 1.1086057424545288, "learning_rate": 0.0005604142692750288, "loss": 0.38, "step": 250200 }, { "epoch": 71.98216340621404, "grad_norm": 0.9976338744163513, "learning_rate": 0.0005603567318757192, "loss": 0.3997, "step": 250210 }, { "epoch": 71.98504027617952, "grad_norm": 1.2527000904083252, "learning_rate": 0.0005602991944764097, "loss": 0.3636, "step": 250220 }, { "epoch": 71.987917146145, "grad_norm": 0.8008921146392822, "learning_rate": 0.0005602416570771001, "loss": 0.4152, "step": 250230 }, { "epoch": 71.99079401611047, "grad_norm": 0.8041654229164124, "learning_rate": 0.0005601841196777906, "loss": 0.3698, "step": 250240 }, { "epoch": 71.99367088607595, "grad_norm": 1.52134108543396, "learning_rate": 0.0005601265822784811, "loss": 0.4552, "step": 250250 }, { "epoch": 71.99654775604142, "grad_norm": 1.5019358396530151, "learning_rate": 0.0005600690448791714, "loss": 0.3825, "step": 250260 }, { "epoch": 71.9994246260069, "grad_norm": 1.4999028444290161, "learning_rate": 0.000560011507479862, "loss": 0.3754, "step": 250270 }, { "epoch": 72.00230149597238, "grad_norm": 1.2806572914123535, "learning_rate": 0.0005599539700805524, "loss": 0.4022, "step": 250280 }, { "epoch": 72.00517836593787, "grad_norm": 2.0042924880981445, "learning_rate": 0.0005598964326812428, "loss": 0.3288, "step": 250290 }, { "epoch": 72.00805523590334, "grad_norm": 1.2387385368347168, "learning_rate": 0.0005598388952819332, "loss": 0.4278, "step": 250300 }, { "epoch": 72.01093210586882, "grad_norm": 0.9472761750221252, "learning_rate": 0.0005597813578826238, "loss": 0.2493, "step": 250310 }, { "epoch": 72.0138089758343, "grad_norm": 1.9948508739471436, "learning_rate": 0.0005597238204833141, "loss": 0.37, "step": 250320 }, { "epoch": 72.01668584579977, "grad_norm": 1.3399627208709717, "learning_rate": 0.0005596662830840046, "loss": 0.3513, "step": 250330 }, { "epoch": 72.01956271576525, "grad_norm": 1.1309012174606323, "learning_rate": 0.0005596087456846952, "loss": 0.4034, "step": 250340 }, { "epoch": 72.02243958573072, "grad_norm": 0.7827973365783691, "learning_rate": 0.0005595512082853855, "loss": 0.3173, "step": 250350 }, { "epoch": 72.0253164556962, "grad_norm": 0.8596224784851074, "learning_rate": 0.000559493670886076, "loss": 0.362, "step": 250360 }, { "epoch": 72.02819332566168, "grad_norm": 1.170121192932129, "learning_rate": 0.0005594361334867664, "loss": 0.4155, "step": 250370 }, { "epoch": 72.03107019562715, "grad_norm": 0.566718339920044, "learning_rate": 0.0005593785960874569, "loss": 0.341, "step": 250380 }, { "epoch": 72.03394706559264, "grad_norm": 1.063438057899475, "learning_rate": 0.0005593210586881473, "loss": 0.3284, "step": 250390 }, { "epoch": 72.03682393555812, "grad_norm": 0.960715651512146, "learning_rate": 0.0005592635212888378, "loss": 0.3238, "step": 250400 }, { "epoch": 72.0397008055236, "grad_norm": 0.8524848222732544, "learning_rate": 0.0005592059838895282, "loss": 0.4139, "step": 250410 }, { "epoch": 72.04257767548907, "grad_norm": 1.1408097743988037, "learning_rate": 0.0005591484464902187, "loss": 0.3801, "step": 250420 }, { "epoch": 72.04545454545455, "grad_norm": 1.2382533550262451, "learning_rate": 0.0005590909090909091, "loss": 0.3192, "step": 250430 }, { "epoch": 72.04833141542002, "grad_norm": 1.223463773727417, "learning_rate": 0.0005590333716915995, "loss": 0.3317, "step": 250440 }, { "epoch": 72.0512082853855, "grad_norm": 1.2413337230682373, "learning_rate": 0.0005589758342922901, "loss": 0.3643, "step": 250450 }, { "epoch": 72.05408515535098, "grad_norm": 0.9898074865341187, "learning_rate": 0.0005589182968929804, "loss": 0.361, "step": 250460 }, { "epoch": 72.05696202531645, "grad_norm": 1.2868152856826782, "learning_rate": 0.0005588607594936709, "loss": 0.4567, "step": 250470 }, { "epoch": 72.05983889528193, "grad_norm": 1.169633150100708, "learning_rate": 0.0005588032220943613, "loss": 0.3369, "step": 250480 }, { "epoch": 72.0627157652474, "grad_norm": 1.013424277305603, "learning_rate": 0.0005587456846950518, "loss": 0.3498, "step": 250490 }, { "epoch": 72.0655926352129, "grad_norm": 1.041575312614441, "learning_rate": 0.0005586881472957422, "loss": 0.3768, "step": 250500 }, { "epoch": 72.06846950517837, "grad_norm": 1.507710337638855, "learning_rate": 0.0005586306098964327, "loss": 0.393, "step": 250510 }, { "epoch": 72.07134637514385, "grad_norm": 1.3054403066635132, "learning_rate": 0.0005585730724971231, "loss": 0.3729, "step": 250520 }, { "epoch": 72.07422324510932, "grad_norm": 1.574411392211914, "learning_rate": 0.0005585155350978136, "loss": 0.3437, "step": 250530 }, { "epoch": 72.0771001150748, "grad_norm": 0.9667463302612305, "learning_rate": 0.000558457997698504, "loss": 0.4816, "step": 250540 }, { "epoch": 72.07997698504028, "grad_norm": 1.6812849044799805, "learning_rate": 0.0005584004602991944, "loss": 0.4333, "step": 250550 }, { "epoch": 72.08285385500575, "grad_norm": 0.9590043425559998, "learning_rate": 0.000558342922899885, "loss": 0.4497, "step": 250560 }, { "epoch": 72.08573072497123, "grad_norm": 1.5661661624908447, "learning_rate": 0.0005582853855005754, "loss": 0.3703, "step": 250570 }, { "epoch": 72.0886075949367, "grad_norm": 0.9621230959892273, "learning_rate": 0.0005582278481012658, "loss": 0.3848, "step": 250580 }, { "epoch": 72.09148446490218, "grad_norm": 0.7472506165504456, "learning_rate": 0.0005581703107019562, "loss": 0.3091, "step": 250590 }, { "epoch": 72.09436133486767, "grad_norm": 1.0867050886154175, "learning_rate": 0.0005581127733026468, "loss": 0.4378, "step": 250600 }, { "epoch": 72.09723820483315, "grad_norm": 0.6809525489807129, "learning_rate": 0.0005580552359033371, "loss": 0.3762, "step": 250610 }, { "epoch": 72.10011507479862, "grad_norm": 1.4934333562850952, "learning_rate": 0.0005579976985040276, "loss": 0.5057, "step": 250620 }, { "epoch": 72.1029919447641, "grad_norm": 1.519257664680481, "learning_rate": 0.0005579401611047181, "loss": 0.371, "step": 250630 }, { "epoch": 72.10586881472958, "grad_norm": 0.9032296538352966, "learning_rate": 0.0005578826237054085, "loss": 0.3322, "step": 250640 }, { "epoch": 72.10874568469505, "grad_norm": 0.7635512351989746, "learning_rate": 0.000557825086306099, "loss": 0.3884, "step": 250650 }, { "epoch": 72.11162255466053, "grad_norm": 1.1705306768417358, "learning_rate": 0.0005577675489067894, "loss": 0.3797, "step": 250660 }, { "epoch": 72.114499424626, "grad_norm": 1.3746942281723022, "learning_rate": 0.0005577100115074799, "loss": 0.415, "step": 250670 }, { "epoch": 72.11737629459148, "grad_norm": 1.5178548097610474, "learning_rate": 0.0005576524741081703, "loss": 0.4448, "step": 250680 }, { "epoch": 72.12025316455696, "grad_norm": 1.3825424909591675, "learning_rate": 0.0005575949367088608, "loss": 0.343, "step": 250690 }, { "epoch": 72.12313003452243, "grad_norm": 1.552569031715393, "learning_rate": 0.0005575373993095512, "loss": 0.4072, "step": 250700 }, { "epoch": 72.12600690448792, "grad_norm": 1.2141345739364624, "learning_rate": 0.0005574798619102417, "loss": 0.4087, "step": 250710 }, { "epoch": 72.1288837744534, "grad_norm": 1.4533518552780151, "learning_rate": 0.0005574223245109321, "loss": 0.3959, "step": 250720 }, { "epoch": 72.13176064441888, "grad_norm": 2.6377007961273193, "learning_rate": 0.0005573647871116225, "loss": 0.4143, "step": 250730 }, { "epoch": 72.13463751438435, "grad_norm": 1.6318250894546509, "learning_rate": 0.000557307249712313, "loss": 0.4614, "step": 250740 }, { "epoch": 72.13751438434983, "grad_norm": 0.9094603657722473, "learning_rate": 0.0005572497123130035, "loss": 0.353, "step": 250750 }, { "epoch": 72.1403912543153, "grad_norm": 0.617698073387146, "learning_rate": 0.0005571921749136939, "loss": 0.3625, "step": 250760 }, { "epoch": 72.14326812428078, "grad_norm": 1.6036350727081299, "learning_rate": 0.0005571346375143843, "loss": 0.3822, "step": 250770 }, { "epoch": 72.14614499424626, "grad_norm": 0.7085251212120056, "learning_rate": 0.0005570771001150749, "loss": 0.3302, "step": 250780 }, { "epoch": 72.14902186421173, "grad_norm": 1.1503409147262573, "learning_rate": 0.0005570195627157652, "loss": 0.3632, "step": 250790 }, { "epoch": 72.15189873417721, "grad_norm": 1.46904456615448, "learning_rate": 0.0005569620253164557, "loss": 0.4382, "step": 250800 }, { "epoch": 72.1547756041427, "grad_norm": 1.2283002138137817, "learning_rate": 0.0005569044879171462, "loss": 0.3547, "step": 250810 }, { "epoch": 72.15765247410818, "grad_norm": 1.6426939964294434, "learning_rate": 0.0005568469505178366, "loss": 0.3824, "step": 250820 }, { "epoch": 72.16052934407365, "grad_norm": 1.4130297899246216, "learning_rate": 0.000556789413118527, "loss": 0.4427, "step": 250830 }, { "epoch": 72.16340621403913, "grad_norm": 1.9254560470581055, "learning_rate": 0.0005567318757192175, "loss": 0.3935, "step": 250840 }, { "epoch": 72.1662830840046, "grad_norm": 1.107763648033142, "learning_rate": 0.000556674338319908, "loss": 0.3604, "step": 250850 }, { "epoch": 72.16915995397008, "grad_norm": 1.2463898658752441, "learning_rate": 0.0005566168009205984, "loss": 0.3375, "step": 250860 }, { "epoch": 72.17203682393556, "grad_norm": 0.95849609375, "learning_rate": 0.0005565592635212889, "loss": 0.4486, "step": 250870 }, { "epoch": 72.17491369390103, "grad_norm": 2.4654808044433594, "learning_rate": 0.0005565017261219792, "loss": 0.5046, "step": 250880 }, { "epoch": 72.17779056386651, "grad_norm": 1.1217994689941406, "learning_rate": 0.0005564441887226698, "loss": 0.3732, "step": 250890 }, { "epoch": 72.18066743383199, "grad_norm": 0.9961605668067932, "learning_rate": 0.0005563866513233602, "loss": 0.3954, "step": 250900 }, { "epoch": 72.18354430379746, "grad_norm": 1.0177983045578003, "learning_rate": 0.0005563291139240506, "loss": 0.4687, "step": 250910 }, { "epoch": 72.18642117376295, "grad_norm": 0.7267570495605469, "learning_rate": 0.0005562715765247411, "loss": 0.4733, "step": 250920 }, { "epoch": 72.18929804372843, "grad_norm": 0.8503052592277527, "learning_rate": 0.0005562140391254316, "loss": 0.3203, "step": 250930 }, { "epoch": 72.1921749136939, "grad_norm": 1.0740201473236084, "learning_rate": 0.0005561565017261219, "loss": 0.3679, "step": 250940 }, { "epoch": 72.19505178365938, "grad_norm": 1.6373815536499023, "learning_rate": 0.0005560989643268124, "loss": 0.3856, "step": 250950 }, { "epoch": 72.19792865362486, "grad_norm": 1.4575543403625488, "learning_rate": 0.000556041426927503, "loss": 0.4626, "step": 250960 }, { "epoch": 72.20080552359033, "grad_norm": 0.7855708003044128, "learning_rate": 0.0005559838895281933, "loss": 0.3909, "step": 250970 }, { "epoch": 72.20368239355581, "grad_norm": 2.244572401046753, "learning_rate": 0.0005559263521288838, "loss": 0.5221, "step": 250980 }, { "epoch": 72.20655926352129, "grad_norm": 0.4982873797416687, "learning_rate": 0.0005558688147295743, "loss": 0.3335, "step": 250990 }, { "epoch": 72.20943613348676, "grad_norm": 1.4183192253112793, "learning_rate": 0.0005558112773302647, "loss": 0.3521, "step": 251000 }, { "epoch": 72.21231300345224, "grad_norm": 1.712976336479187, "learning_rate": 0.0005557537399309551, "loss": 0.3936, "step": 251010 }, { "epoch": 72.21518987341773, "grad_norm": 1.7998669147491455, "learning_rate": 0.0005556962025316456, "loss": 0.428, "step": 251020 }, { "epoch": 72.2180667433832, "grad_norm": 1.4971028566360474, "learning_rate": 0.000555638665132336, "loss": 0.4176, "step": 251030 }, { "epoch": 72.22094361334868, "grad_norm": 0.7931479811668396, "learning_rate": 0.0005555811277330265, "loss": 0.3731, "step": 251040 }, { "epoch": 72.22382048331416, "grad_norm": 3.271780490875244, "learning_rate": 0.000555523590333717, "loss": 0.3394, "step": 251050 }, { "epoch": 72.22669735327963, "grad_norm": 1.262688398361206, "learning_rate": 0.0005554660529344073, "loss": 0.3212, "step": 251060 }, { "epoch": 72.22957422324511, "grad_norm": 0.9774066805839539, "learning_rate": 0.0005554085155350979, "loss": 0.3434, "step": 251070 }, { "epoch": 72.23245109321059, "grad_norm": 1.5426867008209229, "learning_rate": 0.0005553509781357883, "loss": 0.4014, "step": 251080 }, { "epoch": 72.23532796317606, "grad_norm": 1.0200203657150269, "learning_rate": 0.0005552934407364787, "loss": 0.4194, "step": 251090 }, { "epoch": 72.23820483314154, "grad_norm": 1.7881250381469727, "learning_rate": 0.0005552359033371692, "loss": 0.4142, "step": 251100 }, { "epoch": 72.24108170310701, "grad_norm": 1.1791516542434692, "learning_rate": 0.0005551783659378597, "loss": 0.3501, "step": 251110 }, { "epoch": 72.24395857307249, "grad_norm": 0.9962548613548279, "learning_rate": 0.00055512082853855, "loss": 0.3679, "step": 251120 }, { "epoch": 72.24683544303798, "grad_norm": 1.0510425567626953, "learning_rate": 0.0005550632911392405, "loss": 0.3993, "step": 251130 }, { "epoch": 72.24971231300346, "grad_norm": 0.9796355366706848, "learning_rate": 0.0005550057537399311, "loss": 0.3768, "step": 251140 }, { "epoch": 72.25258918296893, "grad_norm": 1.3368573188781738, "learning_rate": 0.0005549482163406214, "loss": 0.3662, "step": 251150 }, { "epoch": 72.25546605293441, "grad_norm": 1.279366135597229, "learning_rate": 0.0005548906789413119, "loss": 0.393, "step": 251160 }, { "epoch": 72.25834292289989, "grad_norm": 1.3935106992721558, "learning_rate": 0.0005548331415420023, "loss": 0.4598, "step": 251170 }, { "epoch": 72.26121979286536, "grad_norm": 1.0837832689285278, "learning_rate": 0.0005547756041426928, "loss": 0.4345, "step": 251180 }, { "epoch": 72.26409666283084, "grad_norm": 1.0691344738006592, "learning_rate": 0.0005547180667433832, "loss": 0.3981, "step": 251190 }, { "epoch": 72.26697353279631, "grad_norm": 3.3236591815948486, "learning_rate": 0.0005546605293440737, "loss": 0.4129, "step": 251200 }, { "epoch": 72.26985040276179, "grad_norm": 1.0847887992858887, "learning_rate": 0.0005546029919447641, "loss": 0.4623, "step": 251210 }, { "epoch": 72.27272727272727, "grad_norm": 0.8456010818481445, "learning_rate": 0.0005545454545454546, "loss": 0.4256, "step": 251220 }, { "epoch": 72.27560414269276, "grad_norm": 1.0959175825119019, "learning_rate": 0.000554487917146145, "loss": 0.4392, "step": 251230 }, { "epoch": 72.27848101265823, "grad_norm": 0.9744811654090881, "learning_rate": 0.0005544303797468354, "loss": 0.4543, "step": 251240 }, { "epoch": 72.28135788262371, "grad_norm": 1.5820642709732056, "learning_rate": 0.000554372842347526, "loss": 0.3624, "step": 251250 }, { "epoch": 72.28423475258919, "grad_norm": 1.0292208194732666, "learning_rate": 0.0005543153049482164, "loss": 0.3081, "step": 251260 }, { "epoch": 72.28711162255466, "grad_norm": 1.891005277633667, "learning_rate": 0.0005542577675489068, "loss": 0.3739, "step": 251270 }, { "epoch": 72.28998849252014, "grad_norm": 1.8308693170547485, "learning_rate": 0.0005542002301495972, "loss": 0.3813, "step": 251280 }, { "epoch": 72.29286536248561, "grad_norm": 1.0861660242080688, "learning_rate": 0.0005541426927502877, "loss": 0.4453, "step": 251290 }, { "epoch": 72.29574223245109, "grad_norm": 1.2156785726547241, "learning_rate": 0.0005540851553509781, "loss": 0.4349, "step": 251300 }, { "epoch": 72.29861910241657, "grad_norm": 1.4810428619384766, "learning_rate": 0.0005540276179516686, "loss": 0.3481, "step": 251310 }, { "epoch": 72.30149597238204, "grad_norm": 0.9332292079925537, "learning_rate": 0.000553970080552359, "loss": 0.3634, "step": 251320 }, { "epoch": 72.30437284234753, "grad_norm": 0.7675538063049316, "learning_rate": 0.0005539125431530495, "loss": 0.411, "step": 251330 }, { "epoch": 72.30724971231301, "grad_norm": 1.0424039363861084, "learning_rate": 0.00055385500575374, "loss": 0.3513, "step": 251340 }, { "epoch": 72.31012658227849, "grad_norm": 2.679966688156128, "learning_rate": 0.0005537974683544303, "loss": 0.351, "step": 251350 }, { "epoch": 72.31300345224396, "grad_norm": 1.2312912940979004, "learning_rate": 0.0005537399309551209, "loss": 0.3726, "step": 251360 }, { "epoch": 72.31588032220944, "grad_norm": 0.7716394066810608, "learning_rate": 0.0005536823935558113, "loss": 0.3597, "step": 251370 }, { "epoch": 72.31875719217491, "grad_norm": 0.9744765162467957, "learning_rate": 0.0005536248561565017, "loss": 0.3786, "step": 251380 }, { "epoch": 72.32163406214039, "grad_norm": 2.4012420177459717, "learning_rate": 0.0005535673187571922, "loss": 0.3234, "step": 251390 }, { "epoch": 72.32451093210587, "grad_norm": 1.3188908100128174, "learning_rate": 0.0005535097813578827, "loss": 0.3208, "step": 251400 }, { "epoch": 72.32738780207134, "grad_norm": 1.1128637790679932, "learning_rate": 0.000553452243958573, "loss": 0.3574, "step": 251410 }, { "epoch": 72.33026467203682, "grad_norm": 2.00754714012146, "learning_rate": 0.0005533947065592635, "loss": 0.4144, "step": 251420 }, { "epoch": 72.3331415420023, "grad_norm": 1.1130162477493286, "learning_rate": 0.000553337169159954, "loss": 0.4059, "step": 251430 }, { "epoch": 72.33601841196779, "grad_norm": 0.9981393218040466, "learning_rate": 0.0005532796317606444, "loss": 0.3778, "step": 251440 }, { "epoch": 72.33889528193326, "grad_norm": 1.0973939895629883, "learning_rate": 0.0005532220943613349, "loss": 0.4029, "step": 251450 }, { "epoch": 72.34177215189874, "grad_norm": 0.6057239770889282, "learning_rate": 0.0005531645569620253, "loss": 0.3325, "step": 251460 }, { "epoch": 72.34464902186421, "grad_norm": 1.409454107284546, "learning_rate": 0.0005531070195627158, "loss": 0.3066, "step": 251470 }, { "epoch": 72.34752589182969, "grad_norm": 1.1878188848495483, "learning_rate": 0.0005530494821634062, "loss": 0.3519, "step": 251480 }, { "epoch": 72.35040276179517, "grad_norm": 2.0315749645233154, "learning_rate": 0.0005529919447640967, "loss": 0.4723, "step": 251490 }, { "epoch": 72.35327963176064, "grad_norm": 0.8494985103607178, "learning_rate": 0.0005529344073647871, "loss": 0.4031, "step": 251500 }, { "epoch": 72.35615650172612, "grad_norm": 1.1823352575302124, "learning_rate": 0.0005528768699654776, "loss": 0.3908, "step": 251510 }, { "epoch": 72.3590333716916, "grad_norm": 0.6696749925613403, "learning_rate": 0.000552819332566168, "loss": 0.372, "step": 251520 }, { "epoch": 72.36191024165707, "grad_norm": 1.5417340993881226, "learning_rate": 0.0005527617951668584, "loss": 0.364, "step": 251530 }, { "epoch": 72.36478711162256, "grad_norm": 1.4230372905731201, "learning_rate": 0.000552704257767549, "loss": 0.4187, "step": 251540 }, { "epoch": 72.36766398158804, "grad_norm": 0.8849539160728455, "learning_rate": 0.0005526467203682394, "loss": 0.3703, "step": 251550 }, { "epoch": 72.37054085155351, "grad_norm": 1.3806029558181763, "learning_rate": 0.0005525891829689298, "loss": 0.3979, "step": 251560 }, { "epoch": 72.37341772151899, "grad_norm": 0.9818965196609497, "learning_rate": 0.0005525316455696202, "loss": 0.4312, "step": 251570 }, { "epoch": 72.37629459148447, "grad_norm": 1.2082985639572144, "learning_rate": 0.0005524741081703108, "loss": 0.3712, "step": 251580 }, { "epoch": 72.37917146144994, "grad_norm": 0.7825325727462769, "learning_rate": 0.0005524165707710011, "loss": 0.3895, "step": 251590 }, { "epoch": 72.38204833141542, "grad_norm": 0.6656059622764587, "learning_rate": 0.0005523590333716916, "loss": 0.4647, "step": 251600 }, { "epoch": 72.3849252013809, "grad_norm": 1.010180950164795, "learning_rate": 0.0005523014959723821, "loss": 0.4261, "step": 251610 }, { "epoch": 72.38780207134637, "grad_norm": 1.6774704456329346, "learning_rate": 0.0005522439585730725, "loss": 0.2986, "step": 251620 }, { "epoch": 72.39067894131185, "grad_norm": 1.3542308807373047, "learning_rate": 0.0005521864211737629, "loss": 0.42, "step": 251630 }, { "epoch": 72.39355581127732, "grad_norm": 0.8781651854515076, "learning_rate": 0.0005521288837744534, "loss": 0.3321, "step": 251640 }, { "epoch": 72.39643268124281, "grad_norm": 1.1879901885986328, "learning_rate": 0.0005520713463751439, "loss": 0.3181, "step": 251650 }, { "epoch": 72.39930955120829, "grad_norm": 2.3649179935455322, "learning_rate": 0.0005520138089758343, "loss": 0.4646, "step": 251660 }, { "epoch": 72.40218642117377, "grad_norm": 2.1619961261749268, "learning_rate": 0.0005519562715765248, "loss": 0.4617, "step": 251670 }, { "epoch": 72.40506329113924, "grad_norm": 0.6520742774009705, "learning_rate": 0.0005518987341772152, "loss": 0.3279, "step": 251680 }, { "epoch": 72.40794016110472, "grad_norm": 1.243544578552246, "learning_rate": 0.0005518411967779057, "loss": 0.4465, "step": 251690 }, { "epoch": 72.4108170310702, "grad_norm": 1.1466093063354492, "learning_rate": 0.0005517836593785961, "loss": 0.3929, "step": 251700 }, { "epoch": 72.41369390103567, "grad_norm": 0.9738069176673889, "learning_rate": 0.0005517261219792865, "loss": 0.3689, "step": 251710 }, { "epoch": 72.41657077100115, "grad_norm": 1.3755141496658325, "learning_rate": 0.000551668584579977, "loss": 0.4081, "step": 251720 }, { "epoch": 72.41944764096662, "grad_norm": 1.500545620918274, "learning_rate": 0.0005516110471806675, "loss": 0.3827, "step": 251730 }, { "epoch": 72.4223245109321, "grad_norm": 1.4807007312774658, "learning_rate": 0.0005515535097813578, "loss": 0.3806, "step": 251740 }, { "epoch": 72.42520138089759, "grad_norm": 2.3351690769195557, "learning_rate": 0.0005514959723820483, "loss": 0.3973, "step": 251750 }, { "epoch": 72.42807825086307, "grad_norm": 2.7921700477600098, "learning_rate": 0.0005514384349827389, "loss": 0.423, "step": 251760 }, { "epoch": 72.43095512082854, "grad_norm": 1.2236037254333496, "learning_rate": 0.0005513808975834292, "loss": 0.3606, "step": 251770 }, { "epoch": 72.43383199079402, "grad_norm": 2.7112619876861572, "learning_rate": 0.0005513233601841197, "loss": 0.3108, "step": 251780 }, { "epoch": 72.4367088607595, "grad_norm": 1.21290922164917, "learning_rate": 0.0005512658227848102, "loss": 0.3846, "step": 251790 }, { "epoch": 72.43958573072497, "grad_norm": 1.7123011350631714, "learning_rate": 0.0005512082853855006, "loss": 0.4068, "step": 251800 }, { "epoch": 72.44246260069045, "grad_norm": 1.9280226230621338, "learning_rate": 0.000551150747986191, "loss": 0.4444, "step": 251810 }, { "epoch": 72.44533947065592, "grad_norm": 1.465476632118225, "learning_rate": 0.0005510932105868815, "loss": 0.4116, "step": 251820 }, { "epoch": 72.4482163406214, "grad_norm": 1.8415601253509521, "learning_rate": 0.0005510356731875719, "loss": 0.438, "step": 251830 }, { "epoch": 72.45109321058688, "grad_norm": 1.4240543842315674, "learning_rate": 0.0005509781357882624, "loss": 0.33, "step": 251840 }, { "epoch": 72.45397008055235, "grad_norm": 1.4244657754898071, "learning_rate": 0.0005509205983889529, "loss": 0.4105, "step": 251850 }, { "epoch": 72.45684695051784, "grad_norm": 1.6838865280151367, "learning_rate": 0.0005508630609896432, "loss": 0.4082, "step": 251860 }, { "epoch": 72.45972382048332, "grad_norm": 0.8174217343330383, "learning_rate": 0.0005508055235903338, "loss": 0.3533, "step": 251870 }, { "epoch": 72.4626006904488, "grad_norm": 1.3989064693450928, "learning_rate": 0.0005507479861910242, "loss": 0.4249, "step": 251880 }, { "epoch": 72.46547756041427, "grad_norm": 1.4771318435668945, "learning_rate": 0.0005506904487917146, "loss": 0.4406, "step": 251890 }, { "epoch": 72.46835443037975, "grad_norm": 1.2521528005599976, "learning_rate": 0.0005506329113924051, "loss": 0.3806, "step": 251900 }, { "epoch": 72.47123130034522, "grad_norm": 1.2646708488464355, "learning_rate": 0.0005505753739930956, "loss": 0.4011, "step": 251910 }, { "epoch": 72.4741081703107, "grad_norm": 1.6680155992507935, "learning_rate": 0.0005505178365937859, "loss": 0.3126, "step": 251920 }, { "epoch": 72.47698504027618, "grad_norm": 2.1294138431549072, "learning_rate": 0.0005504602991944764, "loss": 0.4004, "step": 251930 }, { "epoch": 72.47986191024165, "grad_norm": 0.7132651209831238, "learning_rate": 0.000550402761795167, "loss": 0.3422, "step": 251940 }, { "epoch": 72.48273878020713, "grad_norm": 0.9098705649375916, "learning_rate": 0.0005503452243958573, "loss": 0.39, "step": 251950 }, { "epoch": 72.48561565017262, "grad_norm": 1.2198818922042847, "learning_rate": 0.0005502876869965478, "loss": 0.398, "step": 251960 }, { "epoch": 72.4884925201381, "grad_norm": 0.9610531330108643, "learning_rate": 0.0005502301495972382, "loss": 0.2939, "step": 251970 }, { "epoch": 72.49136939010357, "grad_norm": 1.2071634531021118, "learning_rate": 0.0005501726121979287, "loss": 0.3803, "step": 251980 }, { "epoch": 72.49424626006905, "grad_norm": 0.918469250202179, "learning_rate": 0.0005501150747986191, "loss": 0.3836, "step": 251990 }, { "epoch": 72.49712313003452, "grad_norm": 0.819568395614624, "learning_rate": 0.0005500575373993096, "loss": 0.3812, "step": 252000 }, { "epoch": 72.5, "grad_norm": 0.6973239183425903, "learning_rate": 0.00055, "loss": 0.5568, "step": 252010 }, { "epoch": 72.50287686996548, "grad_norm": 1.9351750612258911, "learning_rate": 0.0005499424626006905, "loss": 0.4024, "step": 252020 }, { "epoch": 72.50575373993095, "grad_norm": 1.4558218717575073, "learning_rate": 0.0005498849252013809, "loss": 0.3534, "step": 252030 }, { "epoch": 72.50863060989643, "grad_norm": 1.5396114587783813, "learning_rate": 0.0005498273878020713, "loss": 0.3627, "step": 252040 }, { "epoch": 72.5115074798619, "grad_norm": 1.4345580339431763, "learning_rate": 0.0005497698504027619, "loss": 0.4333, "step": 252050 }, { "epoch": 72.51438434982738, "grad_norm": 0.9198095202445984, "learning_rate": 0.0005497123130034523, "loss": 0.3375, "step": 252060 }, { "epoch": 72.51726121979287, "grad_norm": 1.559757113456726, "learning_rate": 0.0005496547756041427, "loss": 0.3854, "step": 252070 }, { "epoch": 72.52013808975835, "grad_norm": 2.0704619884490967, "learning_rate": 0.0005495972382048332, "loss": 0.4561, "step": 252080 }, { "epoch": 72.52301495972382, "grad_norm": 0.9580907225608826, "learning_rate": 0.0005495397008055237, "loss": 0.3054, "step": 252090 }, { "epoch": 72.5258918296893, "grad_norm": 1.3609352111816406, "learning_rate": 0.000549482163406214, "loss": 0.3238, "step": 252100 }, { "epoch": 72.52876869965478, "grad_norm": 0.6225924491882324, "learning_rate": 0.0005494246260069045, "loss": 0.4184, "step": 252110 }, { "epoch": 72.53164556962025, "grad_norm": 1.8260890245437622, "learning_rate": 0.0005493670886075949, "loss": 0.4051, "step": 252120 }, { "epoch": 72.53452243958573, "grad_norm": 1.5396887063980103, "learning_rate": 0.0005493095512082854, "loss": 0.5159, "step": 252130 }, { "epoch": 72.5373993095512, "grad_norm": 1.3119577169418335, "learning_rate": 0.0005492520138089758, "loss": 0.4535, "step": 252140 }, { "epoch": 72.54027617951668, "grad_norm": 1.26302170753479, "learning_rate": 0.0005491944764096662, "loss": 0.3818, "step": 252150 }, { "epoch": 72.54315304948216, "grad_norm": 1.3009402751922607, "learning_rate": 0.0005491369390103568, "loss": 0.4999, "step": 252160 }, { "epoch": 72.54602991944765, "grad_norm": 1.7132482528686523, "learning_rate": 0.0005490794016110472, "loss": 0.3936, "step": 252170 }, { "epoch": 72.54890678941312, "grad_norm": 1.3321703672409058, "learning_rate": 0.0005490218642117376, "loss": 0.3451, "step": 252180 }, { "epoch": 72.5517836593786, "grad_norm": 0.803463876247406, "learning_rate": 0.0005489643268124281, "loss": 0.3736, "step": 252190 }, { "epoch": 72.55466052934408, "grad_norm": 1.0902986526489258, "learning_rate": 0.0005489067894131186, "loss": 0.4145, "step": 252200 }, { "epoch": 72.55753739930955, "grad_norm": 1.990384817123413, "learning_rate": 0.0005488492520138089, "loss": 0.3972, "step": 252210 }, { "epoch": 72.56041426927503, "grad_norm": 0.7383754253387451, "learning_rate": 0.0005487917146144994, "loss": 0.4018, "step": 252220 }, { "epoch": 72.5632911392405, "grad_norm": 0.8930554986000061, "learning_rate": 0.0005487341772151899, "loss": 0.3132, "step": 252230 }, { "epoch": 72.56616800920598, "grad_norm": 0.7620847225189209, "learning_rate": 0.0005486766398158803, "loss": 0.4476, "step": 252240 }, { "epoch": 72.56904487917146, "grad_norm": 1.4808343648910522, "learning_rate": 0.0005486191024165707, "loss": 0.4455, "step": 252250 }, { "epoch": 72.57192174913693, "grad_norm": 1.6038213968276978, "learning_rate": 0.0005485615650172612, "loss": 0.3477, "step": 252260 }, { "epoch": 72.57479861910241, "grad_norm": 0.7538008093833923, "learning_rate": 0.0005485040276179517, "loss": 0.4071, "step": 252270 }, { "epoch": 72.5776754890679, "grad_norm": 1.4373692274093628, "learning_rate": 0.0005484464902186421, "loss": 0.5423, "step": 252280 }, { "epoch": 72.58055235903338, "grad_norm": 1.6545549631118774, "learning_rate": 0.0005483889528193326, "loss": 0.3963, "step": 252290 }, { "epoch": 72.58342922899885, "grad_norm": 0.9093394875526428, "learning_rate": 0.000548331415420023, "loss": 0.4749, "step": 252300 }, { "epoch": 72.58630609896433, "grad_norm": 1.0015934705734253, "learning_rate": 0.0005482738780207135, "loss": 0.4814, "step": 252310 }, { "epoch": 72.5891829689298, "grad_norm": 1.9596529006958008, "learning_rate": 0.0005482163406214039, "loss": 0.3835, "step": 252320 }, { "epoch": 72.59205983889528, "grad_norm": 0.836033284664154, "learning_rate": 0.0005481588032220943, "loss": 0.3562, "step": 252330 }, { "epoch": 72.59493670886076, "grad_norm": 0.9211058020591736, "learning_rate": 0.0005481012658227848, "loss": 0.4548, "step": 252340 }, { "epoch": 72.59781357882623, "grad_norm": 0.9907534122467041, "learning_rate": 0.0005480437284234753, "loss": 0.4427, "step": 252350 }, { "epoch": 72.60069044879171, "grad_norm": 0.5387109518051147, "learning_rate": 0.0005479861910241657, "loss": 0.3862, "step": 252360 }, { "epoch": 72.60356731875719, "grad_norm": 1.0335755348205566, "learning_rate": 0.0005479286536248562, "loss": 0.4499, "step": 252370 }, { "epoch": 72.60644418872268, "grad_norm": 1.1296316385269165, "learning_rate": 0.0005478711162255467, "loss": 0.3968, "step": 252380 }, { "epoch": 72.60932105868815, "grad_norm": 0.9746941924095154, "learning_rate": 0.000547813578826237, "loss": 0.3713, "step": 252390 }, { "epoch": 72.61219792865363, "grad_norm": 1.3738399744033813, "learning_rate": 0.0005477560414269275, "loss": 0.5304, "step": 252400 }, { "epoch": 72.6150747986191, "grad_norm": 0.8600170612335205, "learning_rate": 0.000547698504027618, "loss": 0.3563, "step": 252410 }, { "epoch": 72.61795166858458, "grad_norm": 1.0439883470535278, "learning_rate": 0.0005476409666283084, "loss": 0.5281, "step": 252420 }, { "epoch": 72.62082853855006, "grad_norm": 2.430509567260742, "learning_rate": 0.0005475834292289988, "loss": 0.4529, "step": 252430 }, { "epoch": 72.62370540851553, "grad_norm": 0.7613155841827393, "learning_rate": 0.0005475258918296893, "loss": 0.4234, "step": 252440 }, { "epoch": 72.62658227848101, "grad_norm": 1.5819616317749023, "learning_rate": 0.0005474683544303797, "loss": 0.3484, "step": 252450 }, { "epoch": 72.62945914844649, "grad_norm": 0.924505352973938, "learning_rate": 0.0005474108170310702, "loss": 0.3272, "step": 252460 }, { "epoch": 72.63233601841196, "grad_norm": 1.0333924293518066, "learning_rate": 0.0005473532796317607, "loss": 0.4423, "step": 252470 }, { "epoch": 72.63521288837744, "grad_norm": 0.6141085624694824, "learning_rate": 0.0005472957422324511, "loss": 0.3843, "step": 252480 }, { "epoch": 72.63808975834293, "grad_norm": 1.5283511877059937, "learning_rate": 0.0005472382048331416, "loss": 0.417, "step": 252490 }, { "epoch": 72.6409666283084, "grad_norm": 1.5438445806503296, "learning_rate": 0.000547180667433832, "loss": 0.5123, "step": 252500 }, { "epoch": 72.64384349827388, "grad_norm": 1.9205422401428223, "learning_rate": 0.0005471231300345224, "loss": 0.4526, "step": 252510 }, { "epoch": 72.64672036823936, "grad_norm": 2.9214835166931152, "learning_rate": 0.0005470655926352129, "loss": 0.4463, "step": 252520 }, { "epoch": 72.64959723820483, "grad_norm": 0.9692912697792053, "learning_rate": 0.0005470080552359034, "loss": 0.3783, "step": 252530 }, { "epoch": 72.65247410817031, "grad_norm": 0.8315443396568298, "learning_rate": 0.0005469505178365937, "loss": 0.4344, "step": 252540 }, { "epoch": 72.65535097813579, "grad_norm": 2.3181540966033936, "learning_rate": 0.0005468929804372842, "loss": 0.4121, "step": 252550 }, { "epoch": 72.65822784810126, "grad_norm": 1.8328970670700073, "learning_rate": 0.0005468354430379748, "loss": 0.3451, "step": 252560 }, { "epoch": 72.66110471806674, "grad_norm": 2.472029447555542, "learning_rate": 0.0005467779056386651, "loss": 0.4817, "step": 252570 }, { "epoch": 72.66398158803221, "grad_norm": 2.192012310028076, "learning_rate": 0.0005467203682393556, "loss": 0.3494, "step": 252580 }, { "epoch": 72.6668584579977, "grad_norm": 1.1893230676651, "learning_rate": 0.0005466628308400461, "loss": 0.3278, "step": 252590 }, { "epoch": 72.66973532796318, "grad_norm": 2.630925416946411, "learning_rate": 0.0005466052934407365, "loss": 0.3936, "step": 252600 }, { "epoch": 72.67261219792866, "grad_norm": 1.269976258277893, "learning_rate": 0.0005465477560414269, "loss": 0.3811, "step": 252610 }, { "epoch": 72.67548906789413, "grad_norm": 0.7872975468635559, "learning_rate": 0.0005464902186421174, "loss": 0.4073, "step": 252620 }, { "epoch": 72.67836593785961, "grad_norm": 1.1714816093444824, "learning_rate": 0.0005464326812428078, "loss": 0.371, "step": 252630 }, { "epoch": 72.68124280782509, "grad_norm": 0.8150002360343933, "learning_rate": 0.0005463751438434983, "loss": 0.3801, "step": 252640 }, { "epoch": 72.68411967779056, "grad_norm": 1.2186269760131836, "learning_rate": 0.0005463176064441888, "loss": 0.423, "step": 252650 }, { "epoch": 72.68699654775604, "grad_norm": 0.6939647793769836, "learning_rate": 0.0005462600690448792, "loss": 0.3432, "step": 252660 }, { "epoch": 72.68987341772151, "grad_norm": 0.8475246429443359, "learning_rate": 0.0005462025316455697, "loss": 0.4248, "step": 252670 }, { "epoch": 72.69275028768699, "grad_norm": 2.6724607944488525, "learning_rate": 0.0005461449942462601, "loss": 0.3714, "step": 252680 }, { "epoch": 72.69562715765247, "grad_norm": 1.2209116220474243, "learning_rate": 0.0005460874568469505, "loss": 0.4625, "step": 252690 }, { "epoch": 72.69850402761796, "grad_norm": 1.8380491733551025, "learning_rate": 0.000546029919447641, "loss": 0.4346, "step": 252700 }, { "epoch": 72.70138089758343, "grad_norm": 1.3600432872772217, "learning_rate": 0.0005459723820483315, "loss": 0.4497, "step": 252710 }, { "epoch": 72.70425776754891, "grad_norm": 1.4693928956985474, "learning_rate": 0.0005459148446490218, "loss": 0.3903, "step": 252720 }, { "epoch": 72.70713463751439, "grad_norm": 0.9389592409133911, "learning_rate": 0.0005458573072497123, "loss": 0.4127, "step": 252730 }, { "epoch": 72.71001150747986, "grad_norm": 1.2401150465011597, "learning_rate": 0.0005457997698504029, "loss": 0.4228, "step": 252740 }, { "epoch": 72.71288837744534, "grad_norm": 1.182723879814148, "learning_rate": 0.0005457422324510932, "loss": 0.4521, "step": 252750 }, { "epoch": 72.71576524741081, "grad_norm": 1.100732445716858, "learning_rate": 0.0005456846950517837, "loss": 0.3841, "step": 252760 }, { "epoch": 72.71864211737629, "grad_norm": 1.1834229230880737, "learning_rate": 0.0005456271576524742, "loss": 0.4341, "step": 252770 }, { "epoch": 72.72151898734177, "grad_norm": 0.8574690818786621, "learning_rate": 0.0005455696202531646, "loss": 0.4609, "step": 252780 }, { "epoch": 72.72439585730724, "grad_norm": 0.946158766746521, "learning_rate": 0.000545512082853855, "loss": 0.5023, "step": 252790 }, { "epoch": 72.72727272727273, "grad_norm": 0.8211731314659119, "learning_rate": 0.0005454545454545455, "loss": 0.4043, "step": 252800 }, { "epoch": 72.73014959723821, "grad_norm": 1.1070855855941772, "learning_rate": 0.0005453970080552359, "loss": 0.3379, "step": 252810 }, { "epoch": 72.73302646720369, "grad_norm": 1.2842025756835938, "learning_rate": 0.0005453394706559264, "loss": 0.4262, "step": 252820 }, { "epoch": 72.73590333716916, "grad_norm": 1.2688795328140259, "learning_rate": 0.0005452819332566168, "loss": 0.4104, "step": 252830 }, { "epoch": 72.73878020713464, "grad_norm": 0.8414679765701294, "learning_rate": 0.0005452243958573072, "loss": 0.4278, "step": 252840 }, { "epoch": 72.74165707710011, "grad_norm": 1.2529528141021729, "learning_rate": 0.0005451668584579978, "loss": 0.349, "step": 252850 }, { "epoch": 72.74453394706559, "grad_norm": 1.9175480604171753, "learning_rate": 0.0005451093210586882, "loss": 0.4862, "step": 252860 }, { "epoch": 72.74741081703107, "grad_norm": 1.9215716123580933, "learning_rate": 0.0005450517836593786, "loss": 0.4863, "step": 252870 }, { "epoch": 72.75028768699654, "grad_norm": 1.7642133235931396, "learning_rate": 0.0005449942462600691, "loss": 0.4191, "step": 252880 }, { "epoch": 72.75316455696202, "grad_norm": 1.413770318031311, "learning_rate": 0.0005449367088607596, "loss": 0.406, "step": 252890 }, { "epoch": 72.75604142692751, "grad_norm": 1.1182845830917358, "learning_rate": 0.0005448791714614499, "loss": 0.4647, "step": 252900 }, { "epoch": 72.75891829689299, "grad_norm": 0.971051812171936, "learning_rate": 0.0005448216340621404, "loss": 0.3984, "step": 252910 }, { "epoch": 72.76179516685846, "grad_norm": 1.1217118501663208, "learning_rate": 0.0005447640966628309, "loss": 0.3596, "step": 252920 }, { "epoch": 72.76467203682394, "grad_norm": 1.070016860961914, "learning_rate": 0.0005447065592635213, "loss": 0.4048, "step": 252930 }, { "epoch": 72.76754890678941, "grad_norm": 1.421973466873169, "learning_rate": 0.0005446490218642117, "loss": 0.4153, "step": 252940 }, { "epoch": 72.77042577675489, "grad_norm": 2.8859457969665527, "learning_rate": 0.0005445914844649021, "loss": 0.4757, "step": 252950 }, { "epoch": 72.77330264672037, "grad_norm": 0.965793251991272, "learning_rate": 0.0005445339470655927, "loss": 0.4158, "step": 252960 }, { "epoch": 72.77617951668584, "grad_norm": 1.3255844116210938, "learning_rate": 0.0005444764096662831, "loss": 0.3598, "step": 252970 }, { "epoch": 72.77905638665132, "grad_norm": 0.894175112247467, "learning_rate": 0.0005444188722669735, "loss": 0.3739, "step": 252980 }, { "epoch": 72.7819332566168, "grad_norm": 2.2038769721984863, "learning_rate": 0.000544361334867664, "loss": 0.4252, "step": 252990 }, { "epoch": 72.78481012658227, "grad_norm": 1.306136131286621, "learning_rate": 0.0005443037974683545, "loss": 0.3886, "step": 253000 }, { "epoch": 72.78768699654776, "grad_norm": 1.5833299160003662, "learning_rate": 0.0005442462600690448, "loss": 0.4101, "step": 253010 }, { "epoch": 72.79056386651324, "grad_norm": 1.0380581617355347, "learning_rate": 0.0005441887226697353, "loss": 0.3926, "step": 253020 }, { "epoch": 72.79344073647871, "grad_norm": 1.7577245235443115, "learning_rate": 0.0005441311852704258, "loss": 0.4615, "step": 253030 }, { "epoch": 72.79631760644419, "grad_norm": 2.73073410987854, "learning_rate": 0.0005440736478711162, "loss": 0.4864, "step": 253040 }, { "epoch": 72.79919447640967, "grad_norm": 0.8630093932151794, "learning_rate": 0.0005440161104718066, "loss": 0.3461, "step": 253050 }, { "epoch": 72.80207134637514, "grad_norm": 0.8566406965255737, "learning_rate": 0.0005439585730724972, "loss": 0.4128, "step": 253060 }, { "epoch": 72.80494821634062, "grad_norm": 1.1698323488235474, "learning_rate": 0.0005439010356731876, "loss": 0.344, "step": 253070 }, { "epoch": 72.8078250863061, "grad_norm": 1.1916847229003906, "learning_rate": 0.000543843498273878, "loss": 0.4682, "step": 253080 }, { "epoch": 72.81070195627157, "grad_norm": 1.140805959701538, "learning_rate": 0.0005437859608745685, "loss": 0.4501, "step": 253090 }, { "epoch": 72.81357882623705, "grad_norm": 1.8291977643966675, "learning_rate": 0.0005437284234752589, "loss": 0.4755, "step": 253100 }, { "epoch": 72.81645569620254, "grad_norm": 1.4154094457626343, "learning_rate": 0.0005436708860759494, "loss": 0.3512, "step": 253110 }, { "epoch": 72.81933256616801, "grad_norm": 1.7992600202560425, "learning_rate": 0.0005436133486766398, "loss": 0.4849, "step": 253120 }, { "epoch": 72.82220943613349, "grad_norm": 2.15057373046875, "learning_rate": 0.0005435558112773302, "loss": 0.4001, "step": 253130 }, { "epoch": 72.82508630609897, "grad_norm": 0.9093538522720337, "learning_rate": 0.0005434982738780207, "loss": 0.4124, "step": 253140 }, { "epoch": 72.82796317606444, "grad_norm": 0.9434348344802856, "learning_rate": 0.0005434407364787112, "loss": 0.4191, "step": 253150 }, { "epoch": 72.83084004602992, "grad_norm": 0.9630124568939209, "learning_rate": 0.0005433831990794015, "loss": 0.3729, "step": 253160 }, { "epoch": 72.8337169159954, "grad_norm": 0.9077712297439575, "learning_rate": 0.0005433256616800921, "loss": 0.3547, "step": 253170 }, { "epoch": 72.83659378596087, "grad_norm": 1.6222827434539795, "learning_rate": 0.0005432681242807826, "loss": 0.483, "step": 253180 }, { "epoch": 72.83947065592635, "grad_norm": 0.9030914306640625, "learning_rate": 0.0005432105868814729, "loss": 0.3917, "step": 253190 }, { "epoch": 72.84234752589182, "grad_norm": 1.2522447109222412, "learning_rate": 0.0005431530494821634, "loss": 0.409, "step": 253200 }, { "epoch": 72.8452243958573, "grad_norm": 1.4981595277786255, "learning_rate": 0.0005430955120828539, "loss": 0.3905, "step": 253210 }, { "epoch": 72.84810126582279, "grad_norm": 1.0161564350128174, "learning_rate": 0.0005430379746835443, "loss": 0.4125, "step": 253220 }, { "epoch": 72.85097813578827, "grad_norm": 0.7367795705795288, "learning_rate": 0.0005429804372842347, "loss": 0.4081, "step": 253230 }, { "epoch": 72.85385500575374, "grad_norm": 1.5915346145629883, "learning_rate": 0.0005429228998849252, "loss": 0.4902, "step": 253240 }, { "epoch": 72.85673187571922, "grad_norm": 1.6940313577651978, "learning_rate": 0.0005428653624856156, "loss": 0.4885, "step": 253250 }, { "epoch": 72.8596087456847, "grad_norm": 1.1396446228027344, "learning_rate": 0.0005428078250863061, "loss": 0.3327, "step": 253260 }, { "epoch": 72.86248561565017, "grad_norm": 0.8674478530883789, "learning_rate": 0.0005427502876869966, "loss": 0.4907, "step": 253270 }, { "epoch": 72.86536248561565, "grad_norm": 1.8170223236083984, "learning_rate": 0.000542692750287687, "loss": 0.4441, "step": 253280 }, { "epoch": 72.86823935558112, "grad_norm": 1.9182627201080322, "learning_rate": 0.0005426352128883775, "loss": 0.4559, "step": 253290 }, { "epoch": 72.8711162255466, "grad_norm": 1.5993380546569824, "learning_rate": 0.0005425776754890679, "loss": 0.3708, "step": 253300 }, { "epoch": 72.87399309551208, "grad_norm": 0.6614881753921509, "learning_rate": 0.0005425201380897583, "loss": 0.4188, "step": 253310 }, { "epoch": 72.87686996547757, "grad_norm": 1.3829187154769897, "learning_rate": 0.0005424626006904488, "loss": 0.3578, "step": 253320 }, { "epoch": 72.87974683544304, "grad_norm": 2.430649518966675, "learning_rate": 0.0005424050632911393, "loss": 0.4423, "step": 253330 }, { "epoch": 72.88262370540852, "grad_norm": 0.7888081669807434, "learning_rate": 0.0005423475258918296, "loss": 0.3419, "step": 253340 }, { "epoch": 72.885500575374, "grad_norm": 1.1348109245300293, "learning_rate": 0.0005422899884925202, "loss": 0.4124, "step": 253350 }, { "epoch": 72.88837744533947, "grad_norm": 0.9166552424430847, "learning_rate": 0.0005422324510932107, "loss": 0.4409, "step": 253360 }, { "epoch": 72.89125431530495, "grad_norm": 0.8009887337684631, "learning_rate": 0.000542174913693901, "loss": 0.4028, "step": 253370 }, { "epoch": 72.89413118527042, "grad_norm": 2.0599284172058105, "learning_rate": 0.0005421173762945915, "loss": 0.4666, "step": 253380 }, { "epoch": 72.8970080552359, "grad_norm": 1.3003382682800293, "learning_rate": 0.000542059838895282, "loss": 0.3604, "step": 253390 }, { "epoch": 72.89988492520138, "grad_norm": 1.9295933246612549, "learning_rate": 0.0005420023014959724, "loss": 0.415, "step": 253400 }, { "epoch": 72.90276179516685, "grad_norm": 1.4261690378189087, "learning_rate": 0.0005419447640966628, "loss": 0.4131, "step": 253410 }, { "epoch": 72.90563866513233, "grad_norm": 0.9216079711914062, "learning_rate": 0.0005418872266973533, "loss": 0.3448, "step": 253420 }, { "epoch": 72.90851553509782, "grad_norm": 1.5158851146697998, "learning_rate": 0.0005418296892980437, "loss": 0.5815, "step": 253430 }, { "epoch": 72.9113924050633, "grad_norm": 0.8605353832244873, "learning_rate": 0.0005417721518987342, "loss": 0.3831, "step": 253440 }, { "epoch": 72.91426927502877, "grad_norm": 1.0721501111984253, "learning_rate": 0.0005417146144994246, "loss": 0.4027, "step": 253450 }, { "epoch": 72.91714614499425, "grad_norm": 0.7338185906410217, "learning_rate": 0.0005416570771001151, "loss": 0.394, "step": 253460 }, { "epoch": 72.92002301495972, "grad_norm": 1.2862226963043213, "learning_rate": 0.0005415995397008056, "loss": 0.4206, "step": 253470 }, { "epoch": 72.9228998849252, "grad_norm": 0.9295480251312256, "learning_rate": 0.000541542002301496, "loss": 0.3939, "step": 253480 }, { "epoch": 72.92577675489068, "grad_norm": 2.158773899078369, "learning_rate": 0.0005414844649021864, "loss": 0.4281, "step": 253490 }, { "epoch": 72.92865362485615, "grad_norm": 1.8012149333953857, "learning_rate": 0.0005414269275028769, "loss": 0.5104, "step": 253500 }, { "epoch": 72.93153049482163, "grad_norm": 1.562862515449524, "learning_rate": 0.0005413693901035674, "loss": 0.3715, "step": 253510 }, { "epoch": 72.9344073647871, "grad_norm": 1.5252550840377808, "learning_rate": 0.0005413118527042577, "loss": 0.3734, "step": 253520 }, { "epoch": 72.9372842347526, "grad_norm": 2.181475877761841, "learning_rate": 0.0005412543153049482, "loss": 0.5721, "step": 253530 }, { "epoch": 72.94016110471807, "grad_norm": 1.0365469455718994, "learning_rate": 0.0005411967779056387, "loss": 0.4292, "step": 253540 }, { "epoch": 72.94303797468355, "grad_norm": 1.301337718963623, "learning_rate": 0.0005411392405063291, "loss": 0.411, "step": 253550 }, { "epoch": 72.94591484464902, "grad_norm": 2.1862306594848633, "learning_rate": 0.0005410817031070196, "loss": 0.3901, "step": 253560 }, { "epoch": 72.9487917146145, "grad_norm": 0.7172898054122925, "learning_rate": 0.0005410241657077101, "loss": 0.4249, "step": 253570 }, { "epoch": 72.95166858457998, "grad_norm": 1.563033938407898, "learning_rate": 0.0005409666283084005, "loss": 0.4739, "step": 253580 }, { "epoch": 72.95454545454545, "grad_norm": 6.369757175445557, "learning_rate": 0.0005409090909090909, "loss": 0.424, "step": 253590 }, { "epoch": 72.95742232451093, "grad_norm": 0.9922440648078918, "learning_rate": 0.0005408515535097814, "loss": 0.3827, "step": 253600 }, { "epoch": 72.9602991944764, "grad_norm": 1.5578949451446533, "learning_rate": 0.0005407940161104718, "loss": 0.367, "step": 253610 }, { "epoch": 72.96317606444188, "grad_norm": 1.0156071186065674, "learning_rate": 0.0005407364787111623, "loss": 0.4349, "step": 253620 }, { "epoch": 72.96605293440736, "grad_norm": 0.7097142934799194, "learning_rate": 0.0005406789413118527, "loss": 0.3114, "step": 253630 }, { "epoch": 72.96892980437285, "grad_norm": 3.225144624710083, "learning_rate": 0.0005406214039125432, "loss": 0.438, "step": 253640 }, { "epoch": 72.97180667433832, "grad_norm": 0.6382676959037781, "learning_rate": 0.0005405638665132337, "loss": 0.379, "step": 253650 }, { "epoch": 72.9746835443038, "grad_norm": 1.6619380712509155, "learning_rate": 0.0005405063291139241, "loss": 0.4434, "step": 253660 }, { "epoch": 72.97756041426928, "grad_norm": 1.490031361579895, "learning_rate": 0.0005404487917146145, "loss": 0.472, "step": 253670 }, { "epoch": 72.98043728423475, "grad_norm": 0.8777153491973877, "learning_rate": 0.000540391254315305, "loss": 0.456, "step": 253680 }, { "epoch": 72.98331415420023, "grad_norm": 0.9937700629234314, "learning_rate": 0.0005403337169159955, "loss": 0.3742, "step": 253690 }, { "epoch": 72.9861910241657, "grad_norm": 1.5373578071594238, "learning_rate": 0.0005402761795166858, "loss": 0.51, "step": 253700 }, { "epoch": 72.98906789413118, "grad_norm": 1.4946951866149902, "learning_rate": 0.0005402186421173763, "loss": 0.3847, "step": 253710 }, { "epoch": 72.99194476409666, "grad_norm": 0.8992893099784851, "learning_rate": 0.0005401611047180668, "loss": 0.4835, "step": 253720 }, { "epoch": 72.99482163406213, "grad_norm": 1.133650779724121, "learning_rate": 0.0005401035673187572, "loss": 0.3992, "step": 253730 }, { "epoch": 72.99769850402762, "grad_norm": 3.843808889389038, "learning_rate": 0.0005400460299194476, "loss": 0.4075, "step": 253740 }, { "epoch": 73.0005753739931, "grad_norm": 0.9551454186439514, "learning_rate": 0.0005399884925201382, "loss": 0.4115, "step": 253750 }, { "epoch": 73.00345224395858, "grad_norm": 1.6095249652862549, "learning_rate": 0.0005399309551208286, "loss": 0.467, "step": 253760 }, { "epoch": 73.00632911392405, "grad_norm": 1.0850510597229004, "learning_rate": 0.000539873417721519, "loss": 0.3818, "step": 253770 }, { "epoch": 73.00920598388953, "grad_norm": 0.9222824573516846, "learning_rate": 0.0005398158803222094, "loss": 0.2935, "step": 253780 }, { "epoch": 73.012082853855, "grad_norm": 1.331540584564209, "learning_rate": 0.0005397583429228999, "loss": 0.2831, "step": 253790 }, { "epoch": 73.01495972382048, "grad_norm": 1.0817757844924927, "learning_rate": 0.0005397008055235904, "loss": 0.4075, "step": 253800 }, { "epoch": 73.01783659378596, "grad_norm": 1.259873390197754, "learning_rate": 0.0005396432681242807, "loss": 0.4669, "step": 253810 }, { "epoch": 73.02071346375143, "grad_norm": 1.7332935333251953, "learning_rate": 0.0005395857307249712, "loss": 0.4472, "step": 253820 }, { "epoch": 73.02359033371691, "grad_norm": 0.8413995504379272, "learning_rate": 0.0005395281933256617, "loss": 0.3636, "step": 253830 }, { "epoch": 73.02646720368239, "grad_norm": 0.9616171717643738, "learning_rate": 0.0005394706559263521, "loss": 0.2898, "step": 253840 }, { "epoch": 73.02934407364788, "grad_norm": 0.854846715927124, "learning_rate": 0.0005394131185270425, "loss": 0.4575, "step": 253850 }, { "epoch": 73.03222094361335, "grad_norm": 3.1987125873565674, "learning_rate": 0.0005393555811277331, "loss": 0.4395, "step": 253860 }, { "epoch": 73.03509781357883, "grad_norm": 1.0839121341705322, "learning_rate": 0.0005392980437284235, "loss": 0.3554, "step": 253870 }, { "epoch": 73.0379746835443, "grad_norm": 1.2941867113113403, "learning_rate": 0.0005392405063291139, "loss": 0.3415, "step": 253880 }, { "epoch": 73.04085155350978, "grad_norm": 1.2877033948898315, "learning_rate": 0.0005391829689298044, "loss": 0.4713, "step": 253890 }, { "epoch": 73.04372842347526, "grad_norm": 1.3783563375473022, "learning_rate": 0.0005391254315304948, "loss": 0.3848, "step": 253900 }, { "epoch": 73.04660529344073, "grad_norm": 1.5416862964630127, "learning_rate": 0.0005390678941311853, "loss": 0.3339, "step": 253910 }, { "epoch": 73.04948216340621, "grad_norm": 0.8015599846839905, "learning_rate": 0.0005390103567318757, "loss": 0.3646, "step": 253920 }, { "epoch": 73.05235903337169, "grad_norm": 1.5948383808135986, "learning_rate": 0.0005389528193325661, "loss": 0.3254, "step": 253930 }, { "epoch": 73.05523590333716, "grad_norm": 1.6003894805908203, "learning_rate": 0.0005388952819332566, "loss": 0.3917, "step": 253940 }, { "epoch": 73.05811277330265, "grad_norm": 1.479936957359314, "learning_rate": 0.0005388377445339471, "loss": 0.3724, "step": 253950 }, { "epoch": 73.06098964326813, "grad_norm": 0.704688549041748, "learning_rate": 0.0005387802071346374, "loss": 0.4016, "step": 253960 }, { "epoch": 73.0638665132336, "grad_norm": 1.097231149673462, "learning_rate": 0.000538722669735328, "loss": 0.3446, "step": 253970 }, { "epoch": 73.06674338319908, "grad_norm": 1.0933232307434082, "learning_rate": 0.0005386651323360185, "loss": 0.4856, "step": 253980 }, { "epoch": 73.06962025316456, "grad_norm": 0.666200578212738, "learning_rate": 0.0005386075949367088, "loss": 0.3312, "step": 253990 }, { "epoch": 73.07249712313003, "grad_norm": 1.0177584886550903, "learning_rate": 0.0005385500575373993, "loss": 0.4153, "step": 254000 }, { "epoch": 73.07537399309551, "grad_norm": 1.8256652355194092, "learning_rate": 0.0005384925201380898, "loss": 0.385, "step": 254010 }, { "epoch": 73.07825086306099, "grad_norm": 1.0282435417175293, "learning_rate": 0.0005384349827387802, "loss": 0.3921, "step": 254020 }, { "epoch": 73.08112773302646, "grad_norm": 1.1748642921447754, "learning_rate": 0.0005383774453394706, "loss": 0.3056, "step": 254030 }, { "epoch": 73.08400460299194, "grad_norm": 1.2645435333251953, "learning_rate": 0.0005383199079401612, "loss": 0.3567, "step": 254040 }, { "epoch": 73.08688147295742, "grad_norm": 1.2441221475601196, "learning_rate": 0.0005382623705408515, "loss": 0.3304, "step": 254050 }, { "epoch": 73.0897583429229, "grad_norm": 1.2904106378555298, "learning_rate": 0.000538204833141542, "loss": 0.4542, "step": 254060 }, { "epoch": 73.09263521288838, "grad_norm": 1.561191201210022, "learning_rate": 0.0005381472957422325, "loss": 0.3686, "step": 254070 }, { "epoch": 73.09551208285386, "grad_norm": 1.3207268714904785, "learning_rate": 0.0005380897583429229, "loss": 0.3488, "step": 254080 }, { "epoch": 73.09838895281933, "grad_norm": 1.4863290786743164, "learning_rate": 0.0005380322209436134, "loss": 0.3747, "step": 254090 }, { "epoch": 73.10126582278481, "grad_norm": 1.6364939212799072, "learning_rate": 0.0005379746835443038, "loss": 0.4358, "step": 254100 }, { "epoch": 73.10414269275029, "grad_norm": 0.7688351273536682, "learning_rate": 0.0005379171461449942, "loss": 0.3165, "step": 254110 }, { "epoch": 73.10701956271576, "grad_norm": 1.1252657175064087, "learning_rate": 0.0005378596087456847, "loss": 0.3769, "step": 254120 }, { "epoch": 73.10989643268124, "grad_norm": 1.5926257371902466, "learning_rate": 0.0005378020713463752, "loss": 0.3597, "step": 254130 }, { "epoch": 73.11277330264672, "grad_norm": 1.62119460105896, "learning_rate": 0.0005377445339470655, "loss": 0.3168, "step": 254140 }, { "epoch": 73.11565017261219, "grad_norm": 0.7905897498130798, "learning_rate": 0.0005376869965477561, "loss": 0.3629, "step": 254150 }, { "epoch": 73.11852704257768, "grad_norm": 0.8461269736289978, "learning_rate": 0.0005376294591484466, "loss": 0.3767, "step": 254160 }, { "epoch": 73.12140391254316, "grad_norm": 1.6502540111541748, "learning_rate": 0.0005375719217491369, "loss": 0.3562, "step": 254170 }, { "epoch": 73.12428078250863, "grad_norm": 0.9143930077552795, "learning_rate": 0.0005375143843498274, "loss": 0.4268, "step": 254180 }, { "epoch": 73.12715765247411, "grad_norm": 0.8912720680236816, "learning_rate": 0.0005374568469505179, "loss": 0.3733, "step": 254190 }, { "epoch": 73.13003452243959, "grad_norm": 1.9800896644592285, "learning_rate": 0.0005373993095512083, "loss": 0.3975, "step": 254200 }, { "epoch": 73.13291139240506, "grad_norm": 1.0372129678726196, "learning_rate": 0.0005373417721518987, "loss": 0.3675, "step": 254210 }, { "epoch": 73.13578826237054, "grad_norm": 1.7006419897079468, "learning_rate": 0.0005372842347525892, "loss": 0.5062, "step": 254220 }, { "epoch": 73.13866513233602, "grad_norm": 0.8895530700683594, "learning_rate": 0.0005372266973532796, "loss": 0.4344, "step": 254230 }, { "epoch": 73.14154200230149, "grad_norm": 1.768884301185608, "learning_rate": 0.0005371691599539701, "loss": 0.4901, "step": 254240 }, { "epoch": 73.14441887226697, "grad_norm": 2.0512495040893555, "learning_rate": 0.0005371116225546605, "loss": 0.558, "step": 254250 }, { "epoch": 73.14729574223244, "grad_norm": 1.7579305171966553, "learning_rate": 0.000537054085155351, "loss": 0.4207, "step": 254260 }, { "epoch": 73.15017261219793, "grad_norm": 1.6142795085906982, "learning_rate": 0.0005369965477560415, "loss": 0.4136, "step": 254270 }, { "epoch": 73.15304948216341, "grad_norm": 1.302087664604187, "learning_rate": 0.0005369390103567319, "loss": 0.4484, "step": 254280 }, { "epoch": 73.15592635212889, "grad_norm": 1.0606591701507568, "learning_rate": 0.0005368814729574223, "loss": 0.3665, "step": 254290 }, { "epoch": 73.15880322209436, "grad_norm": 1.3160072565078735, "learning_rate": 0.0005368239355581128, "loss": 0.3898, "step": 254300 }, { "epoch": 73.16168009205984, "grad_norm": 1.2992075681686401, "learning_rate": 0.0005367663981588033, "loss": 0.3427, "step": 254310 }, { "epoch": 73.16455696202532, "grad_norm": 2.1171224117279053, "learning_rate": 0.0005367088607594936, "loss": 0.35, "step": 254320 }, { "epoch": 73.16743383199079, "grad_norm": 2.1689388751983643, "learning_rate": 0.0005366513233601842, "loss": 0.4273, "step": 254330 }, { "epoch": 73.17031070195627, "grad_norm": 1.188316822052002, "learning_rate": 0.0005365937859608746, "loss": 0.3386, "step": 254340 }, { "epoch": 73.17318757192174, "grad_norm": 0.9572368860244751, "learning_rate": 0.000536536248561565, "loss": 0.274, "step": 254350 }, { "epoch": 73.17606444188722, "grad_norm": 1.0354653596878052, "learning_rate": 0.0005364787111622554, "loss": 0.3998, "step": 254360 }, { "epoch": 73.17894131185271, "grad_norm": 0.7568556070327759, "learning_rate": 0.000536421173762946, "loss": 0.375, "step": 254370 }, { "epoch": 73.18181818181819, "grad_norm": 0.9277712106704712, "learning_rate": 0.0005363636363636364, "loss": 0.3271, "step": 254380 }, { "epoch": 73.18469505178366, "grad_norm": 1.0315948724746704, "learning_rate": 0.0005363060989643268, "loss": 0.3353, "step": 254390 }, { "epoch": 73.18757192174914, "grad_norm": 0.9898383021354675, "learning_rate": 0.0005362485615650173, "loss": 0.387, "step": 254400 }, { "epoch": 73.19044879171462, "grad_norm": 2.15417742729187, "learning_rate": 0.0005361910241657077, "loss": 0.3721, "step": 254410 }, { "epoch": 73.19332566168009, "grad_norm": 1.5255359411239624, "learning_rate": 0.0005361334867663982, "loss": 0.3855, "step": 254420 }, { "epoch": 73.19620253164557, "grad_norm": 0.8710078597068787, "learning_rate": 0.0005360759493670886, "loss": 0.454, "step": 254430 }, { "epoch": 73.19907940161104, "grad_norm": 1.3021960258483887, "learning_rate": 0.0005360184119677791, "loss": 0.4275, "step": 254440 }, { "epoch": 73.20195627157652, "grad_norm": 1.301352858543396, "learning_rate": 0.0005359608745684695, "loss": 0.2902, "step": 254450 }, { "epoch": 73.204833141542, "grad_norm": 2.3076651096343994, "learning_rate": 0.00053590333716916, "loss": 0.3733, "step": 254460 }, { "epoch": 73.20771001150747, "grad_norm": 1.2982248067855835, "learning_rate": 0.0005358457997698504, "loss": 0.3898, "step": 254470 }, { "epoch": 73.21058688147296, "grad_norm": 1.3332860469818115, "learning_rate": 0.0005357882623705409, "loss": 0.3522, "step": 254480 }, { "epoch": 73.21346375143844, "grad_norm": 1.1248235702514648, "learning_rate": 0.0005357307249712314, "loss": 0.3755, "step": 254490 }, { "epoch": 73.21634062140392, "grad_norm": 0.9187774062156677, "learning_rate": 0.0005356731875719217, "loss": 0.3592, "step": 254500 }, { "epoch": 73.21921749136939, "grad_norm": 0.7404766082763672, "learning_rate": 0.0005356156501726122, "loss": 0.5697, "step": 254510 }, { "epoch": 73.22209436133487, "grad_norm": 0.593626856803894, "learning_rate": 0.0005355581127733027, "loss": 0.3916, "step": 254520 }, { "epoch": 73.22497123130034, "grad_norm": 0.9237285852432251, "learning_rate": 0.0005355005753739931, "loss": 0.3562, "step": 254530 }, { "epoch": 73.22784810126582, "grad_norm": 1.135083794593811, "learning_rate": 0.0005354430379746835, "loss": 0.3534, "step": 254540 }, { "epoch": 73.2307249712313, "grad_norm": 1.3211606740951538, "learning_rate": 0.0005353855005753741, "loss": 0.3405, "step": 254550 }, { "epoch": 73.23360184119677, "grad_norm": 1.7600961923599243, "learning_rate": 0.0005353279631760645, "loss": 0.4034, "step": 254560 }, { "epoch": 73.23647871116225, "grad_norm": 1.2385244369506836, "learning_rate": 0.0005352704257767549, "loss": 0.4299, "step": 254570 }, { "epoch": 73.23935558112774, "grad_norm": 1.3501852750778198, "learning_rate": 0.0005352128883774453, "loss": 0.4355, "step": 254580 }, { "epoch": 73.24223245109322, "grad_norm": 1.9625651836395264, "learning_rate": 0.0005351553509781358, "loss": 0.3694, "step": 254590 }, { "epoch": 73.24510932105869, "grad_norm": 1.6870402097702026, "learning_rate": 0.0005350978135788263, "loss": 0.4529, "step": 254600 }, { "epoch": 73.24798619102417, "grad_norm": 1.2386692762374878, "learning_rate": 0.0005350402761795166, "loss": 0.3253, "step": 254610 }, { "epoch": 73.25086306098964, "grad_norm": 1.4288502931594849, "learning_rate": 0.0005349827387802072, "loss": 0.3395, "step": 254620 }, { "epoch": 73.25373993095512, "grad_norm": 1.7223191261291504, "learning_rate": 0.0005349252013808976, "loss": 0.3634, "step": 254630 }, { "epoch": 73.2566168009206, "grad_norm": 2.195582866668701, "learning_rate": 0.000534867663981588, "loss": 0.3942, "step": 254640 }, { "epoch": 73.25949367088607, "grad_norm": 1.754758358001709, "learning_rate": 0.0005348101265822784, "loss": 0.3795, "step": 254650 }, { "epoch": 73.26237054085155, "grad_norm": 0.8771575689315796, "learning_rate": 0.000534752589182969, "loss": 0.3876, "step": 254660 }, { "epoch": 73.26524741081703, "grad_norm": 0.9245973825454712, "learning_rate": 0.0005346950517836594, "loss": 0.3687, "step": 254670 }, { "epoch": 73.2681242807825, "grad_norm": 1.116268277168274, "learning_rate": 0.0005346375143843498, "loss": 0.3847, "step": 254680 }, { "epoch": 73.27100115074799, "grad_norm": 1.2177455425262451, "learning_rate": 0.0005345799769850403, "loss": 0.3552, "step": 254690 }, { "epoch": 73.27387802071347, "grad_norm": 1.4105150699615479, "learning_rate": 0.0005345224395857307, "loss": 0.3737, "step": 254700 }, { "epoch": 73.27675489067894, "grad_norm": 1.5271166563034058, "learning_rate": 0.0005344649021864212, "loss": 0.3228, "step": 254710 }, { "epoch": 73.27963176064442, "grad_norm": 0.6970059871673584, "learning_rate": 0.0005344073647871116, "loss": 0.3252, "step": 254720 }, { "epoch": 73.2825086306099, "grad_norm": 1.0907750129699707, "learning_rate": 0.0005343498273878021, "loss": 0.3699, "step": 254730 }, { "epoch": 73.28538550057537, "grad_norm": 1.6308671236038208, "learning_rate": 0.0005342922899884925, "loss": 0.4597, "step": 254740 }, { "epoch": 73.28826237054085, "grad_norm": 0.933899462223053, "learning_rate": 0.000534234752589183, "loss": 0.4134, "step": 254750 }, { "epoch": 73.29113924050633, "grad_norm": 1.523834228515625, "learning_rate": 0.0005341772151898733, "loss": 0.3184, "step": 254760 }, { "epoch": 73.2940161104718, "grad_norm": 0.7923101186752319, "learning_rate": 0.0005341196777905639, "loss": 0.3519, "step": 254770 }, { "epoch": 73.29689298043728, "grad_norm": 0.9228664636611938, "learning_rate": 0.0005340621403912544, "loss": 0.3922, "step": 254780 }, { "epoch": 73.29976985040277, "grad_norm": 0.8755933046340942, "learning_rate": 0.0005340046029919447, "loss": 0.3166, "step": 254790 }, { "epoch": 73.30264672036824, "grad_norm": 1.7228636741638184, "learning_rate": 0.0005339470655926352, "loss": 0.4558, "step": 254800 }, { "epoch": 73.30552359033372, "grad_norm": 2.8787333965301514, "learning_rate": 0.0005338895281933257, "loss": 0.45, "step": 254810 }, { "epoch": 73.3084004602992, "grad_norm": 1.0453386306762695, "learning_rate": 0.0005338319907940161, "loss": 0.3552, "step": 254820 }, { "epoch": 73.31127733026467, "grad_norm": 2.1066644191741943, "learning_rate": 0.0005337744533947065, "loss": 0.3792, "step": 254830 }, { "epoch": 73.31415420023015, "grad_norm": 1.0714136362075806, "learning_rate": 0.0005337169159953971, "loss": 0.3309, "step": 254840 }, { "epoch": 73.31703107019563, "grad_norm": 1.1501638889312744, "learning_rate": 0.0005336593785960874, "loss": 0.3689, "step": 254850 }, { "epoch": 73.3199079401611, "grad_norm": 1.7261704206466675, "learning_rate": 0.0005336018411967779, "loss": 0.4197, "step": 254860 }, { "epoch": 73.32278481012658, "grad_norm": 0.8763439059257507, "learning_rate": 0.0005335443037974684, "loss": 0.4205, "step": 254870 }, { "epoch": 73.32566168009205, "grad_norm": 0.9144365787506104, "learning_rate": 0.0005334867663981588, "loss": 0.3621, "step": 254880 }, { "epoch": 73.32853855005754, "grad_norm": 1.6830137968063354, "learning_rate": 0.0005334292289988493, "loss": 0.352, "step": 254890 }, { "epoch": 73.33141542002302, "grad_norm": 1.11587655544281, "learning_rate": 0.0005333716915995397, "loss": 0.3648, "step": 254900 }, { "epoch": 73.3342922899885, "grad_norm": 1.79478120803833, "learning_rate": 0.0005333141542002301, "loss": 0.4427, "step": 254910 }, { "epoch": 73.33716915995397, "grad_norm": 0.7139365077018738, "learning_rate": 0.0005332566168009206, "loss": 0.4243, "step": 254920 }, { "epoch": 73.34004602991945, "grad_norm": 1.016790747642517, "learning_rate": 0.0005331990794016111, "loss": 0.3967, "step": 254930 }, { "epoch": 73.34292289988493, "grad_norm": 2.3731842041015625, "learning_rate": 0.0005331415420023014, "loss": 0.3724, "step": 254940 }, { "epoch": 73.3457997698504, "grad_norm": 0.8512806296348572, "learning_rate": 0.000533084004602992, "loss": 0.3263, "step": 254950 }, { "epoch": 73.34867663981588, "grad_norm": 1.233871340751648, "learning_rate": 0.0005330264672036825, "loss": 0.4172, "step": 254960 }, { "epoch": 73.35155350978135, "grad_norm": 3.1053624153137207, "learning_rate": 0.0005329689298043728, "loss": 0.5047, "step": 254970 }, { "epoch": 73.35443037974683, "grad_norm": 1.0609616041183472, "learning_rate": 0.0005329113924050633, "loss": 0.3365, "step": 254980 }, { "epoch": 73.3573072497123, "grad_norm": 1.0821654796600342, "learning_rate": 0.0005328538550057538, "loss": 0.396, "step": 254990 }, { "epoch": 73.3601841196778, "grad_norm": 1.270832896232605, "learning_rate": 0.0005327963176064442, "loss": 0.3402, "step": 255000 }, { "epoch": 73.36306098964327, "grad_norm": 1.6877824068069458, "learning_rate": 0.0005327387802071346, "loss": 0.4452, "step": 255010 }, { "epoch": 73.36593785960875, "grad_norm": 1.5013870000839233, "learning_rate": 0.0005326812428078252, "loss": 0.4351, "step": 255020 }, { "epoch": 73.36881472957423, "grad_norm": 1.113426923751831, "learning_rate": 0.0005326237054085155, "loss": 0.3607, "step": 255030 }, { "epoch": 73.3716915995397, "grad_norm": 1.9968067407608032, "learning_rate": 0.000532566168009206, "loss": 0.4649, "step": 255040 }, { "epoch": 73.37456846950518, "grad_norm": 3.720273017883301, "learning_rate": 0.0005325086306098964, "loss": 0.3975, "step": 255050 }, { "epoch": 73.37744533947065, "grad_norm": 1.0448039770126343, "learning_rate": 0.0005324510932105869, "loss": 0.415, "step": 255060 }, { "epoch": 73.38032220943613, "grad_norm": 0.7426902651786804, "learning_rate": 0.0005323935558112774, "loss": 0.3319, "step": 255070 }, { "epoch": 73.3831990794016, "grad_norm": 0.7061728239059448, "learning_rate": 0.0005323360184119678, "loss": 0.4099, "step": 255080 }, { "epoch": 73.38607594936708, "grad_norm": 1.5388325452804565, "learning_rate": 0.0005322784810126582, "loss": 0.3114, "step": 255090 }, { "epoch": 73.38895281933257, "grad_norm": 1.7190169095993042, "learning_rate": 0.0005322209436133487, "loss": 0.4456, "step": 255100 }, { "epoch": 73.39182968929805, "grad_norm": 1.3058891296386719, "learning_rate": 0.0005321634062140392, "loss": 0.326, "step": 255110 }, { "epoch": 73.39470655926353, "grad_norm": 0.6425148844718933, "learning_rate": 0.0005321058688147295, "loss": 0.3925, "step": 255120 }, { "epoch": 73.397583429229, "grad_norm": 1.4127155542373657, "learning_rate": 0.0005320483314154201, "loss": 0.349, "step": 255130 }, { "epoch": 73.40046029919448, "grad_norm": 1.1550772190093994, "learning_rate": 0.0005319907940161105, "loss": 0.3694, "step": 255140 }, { "epoch": 73.40333716915995, "grad_norm": 1.3577860593795776, "learning_rate": 0.0005319332566168009, "loss": 0.4095, "step": 255150 }, { "epoch": 73.40621403912543, "grad_norm": 0.9252502918243408, "learning_rate": 0.0005318757192174913, "loss": 0.4083, "step": 255160 }, { "epoch": 73.4090909090909, "grad_norm": 1.9890356063842773, "learning_rate": 0.0005318181818181819, "loss": 0.4299, "step": 255170 }, { "epoch": 73.41196777905638, "grad_norm": 1.2030906677246094, "learning_rate": 0.0005317606444188723, "loss": 0.3678, "step": 255180 }, { "epoch": 73.41484464902186, "grad_norm": 0.7287763953208923, "learning_rate": 0.0005317031070195627, "loss": 0.3374, "step": 255190 }, { "epoch": 73.41772151898734, "grad_norm": 1.1868664026260376, "learning_rate": 0.0005316455696202532, "loss": 0.4295, "step": 255200 }, { "epoch": 73.42059838895283, "grad_norm": 2.4825966358184814, "learning_rate": 0.0005315880322209436, "loss": 0.3527, "step": 255210 }, { "epoch": 73.4234752589183, "grad_norm": 1.1656683683395386, "learning_rate": 0.0005315304948216341, "loss": 0.4643, "step": 255220 }, { "epoch": 73.42635212888378, "grad_norm": 1.120273232460022, "learning_rate": 0.0005314729574223245, "loss": 0.3873, "step": 255230 }, { "epoch": 73.42922899884925, "grad_norm": 1.3863312005996704, "learning_rate": 0.000531415420023015, "loss": 0.3062, "step": 255240 }, { "epoch": 73.43210586881473, "grad_norm": 1.0511997938156128, "learning_rate": 0.0005313578826237054, "loss": 0.4444, "step": 255250 }, { "epoch": 73.4349827387802, "grad_norm": 0.8300293684005737, "learning_rate": 0.0005313003452243959, "loss": 0.3468, "step": 255260 }, { "epoch": 73.43785960874568, "grad_norm": 0.9482238292694092, "learning_rate": 0.0005312428078250863, "loss": 0.3548, "step": 255270 }, { "epoch": 73.44073647871116, "grad_norm": 0.7038081288337708, "learning_rate": 0.0005311852704257768, "loss": 0.3174, "step": 255280 }, { "epoch": 73.44361334867664, "grad_norm": 2.182481288909912, "learning_rate": 0.0005311277330264673, "loss": 0.4289, "step": 255290 }, { "epoch": 73.44649021864211, "grad_norm": 1.669693112373352, "learning_rate": 0.0005310701956271576, "loss": 0.5302, "step": 255300 }, { "epoch": 73.4493670886076, "grad_norm": 2.2511770725250244, "learning_rate": 0.0005310126582278482, "loss": 0.548, "step": 255310 }, { "epoch": 73.45224395857308, "grad_norm": 1.7779276371002197, "learning_rate": 0.0005309551208285386, "loss": 0.4158, "step": 255320 }, { "epoch": 73.45512082853855, "grad_norm": 1.1727443933486938, "learning_rate": 0.000530897583429229, "loss": 0.3269, "step": 255330 }, { "epoch": 73.45799769850403, "grad_norm": 0.917853593826294, "learning_rate": 0.0005308400460299194, "loss": 0.4147, "step": 255340 }, { "epoch": 73.4608745684695, "grad_norm": 1.747603178024292, "learning_rate": 0.00053078250863061, "loss": 0.4814, "step": 255350 }, { "epoch": 73.46375143843498, "grad_norm": 1.462370753288269, "learning_rate": 0.0005307249712313003, "loss": 0.4124, "step": 255360 }, { "epoch": 73.46662830840046, "grad_norm": 0.8030014634132385, "learning_rate": 0.0005306674338319908, "loss": 0.3177, "step": 255370 }, { "epoch": 73.46950517836594, "grad_norm": 1.6387197971343994, "learning_rate": 0.0005306098964326812, "loss": 0.3713, "step": 255380 }, { "epoch": 73.47238204833141, "grad_norm": 1.5544607639312744, "learning_rate": 0.0005305523590333717, "loss": 0.409, "step": 255390 }, { "epoch": 73.47525891829689, "grad_norm": 2.1675539016723633, "learning_rate": 0.0005304948216340622, "loss": 0.3816, "step": 255400 }, { "epoch": 73.47813578826236, "grad_norm": 2.714491128921509, "learning_rate": 0.0005304372842347525, "loss": 0.3637, "step": 255410 }, { "epoch": 73.48101265822785, "grad_norm": 1.2966892719268799, "learning_rate": 0.0005303797468354431, "loss": 0.4359, "step": 255420 }, { "epoch": 73.48388952819333, "grad_norm": 1.2917718887329102, "learning_rate": 0.0005303222094361335, "loss": 0.3995, "step": 255430 }, { "epoch": 73.4867663981588, "grad_norm": 1.4387587308883667, "learning_rate": 0.0005302646720368239, "loss": 0.4188, "step": 255440 }, { "epoch": 73.48964326812428, "grad_norm": 1.569639801979065, "learning_rate": 0.0005302071346375143, "loss": 0.4426, "step": 255450 }, { "epoch": 73.49252013808976, "grad_norm": 0.884332537651062, "learning_rate": 0.0005301495972382049, "loss": 0.3214, "step": 255460 }, { "epoch": 73.49539700805524, "grad_norm": 1.3359886407852173, "learning_rate": 0.0005300920598388953, "loss": 0.3287, "step": 255470 }, { "epoch": 73.49827387802071, "grad_norm": 1.2250916957855225, "learning_rate": 0.0005300345224395857, "loss": 0.3947, "step": 255480 }, { "epoch": 73.50115074798619, "grad_norm": 1.7069058418273926, "learning_rate": 0.0005299769850402762, "loss": 0.3278, "step": 255490 }, { "epoch": 73.50402761795166, "grad_norm": 1.04728102684021, "learning_rate": 0.0005299194476409666, "loss": 0.4148, "step": 255500 }, { "epoch": 73.50690448791714, "grad_norm": 1.2309049367904663, "learning_rate": 0.0005298619102416571, "loss": 0.504, "step": 255510 }, { "epoch": 73.50978135788263, "grad_norm": 0.8613983392715454, "learning_rate": 0.0005298043728423475, "loss": 0.3752, "step": 255520 }, { "epoch": 73.5126582278481, "grad_norm": 1.1446964740753174, "learning_rate": 0.000529746835443038, "loss": 0.3382, "step": 255530 }, { "epoch": 73.51553509781358, "grad_norm": 1.8550785779953003, "learning_rate": 0.0005296892980437284, "loss": 0.4807, "step": 255540 }, { "epoch": 73.51841196777906, "grad_norm": 1.5831990242004395, "learning_rate": 0.0005296317606444189, "loss": 0.4803, "step": 255550 }, { "epoch": 73.52128883774454, "grad_norm": 1.5766983032226562, "learning_rate": 0.0005295742232451092, "loss": 0.3873, "step": 255560 }, { "epoch": 73.52416570771001, "grad_norm": 1.0382095575332642, "learning_rate": 0.0005295166858457998, "loss": 0.4374, "step": 255570 }, { "epoch": 73.52704257767549, "grad_norm": 2.595993757247925, "learning_rate": 0.0005294591484464903, "loss": 0.4119, "step": 255580 }, { "epoch": 73.52991944764096, "grad_norm": 1.2658873796463013, "learning_rate": 0.0005294016110471806, "loss": 0.4067, "step": 255590 }, { "epoch": 73.53279631760644, "grad_norm": 1.1457875967025757, "learning_rate": 0.0005293440736478712, "loss": 0.3764, "step": 255600 }, { "epoch": 73.53567318757192, "grad_norm": 1.9771183729171753, "learning_rate": 0.0005292865362485616, "loss": 0.4956, "step": 255610 }, { "epoch": 73.53855005753739, "grad_norm": 1.0451902151107788, "learning_rate": 0.000529228998849252, "loss": 0.4249, "step": 255620 }, { "epoch": 73.54142692750288, "grad_norm": 1.4653997421264648, "learning_rate": 0.0005291714614499424, "loss": 0.3448, "step": 255630 }, { "epoch": 73.54430379746836, "grad_norm": 0.9699633121490479, "learning_rate": 0.000529113924050633, "loss": 0.5423, "step": 255640 }, { "epoch": 73.54718066743384, "grad_norm": 1.1671583652496338, "learning_rate": 0.0005290563866513233, "loss": 0.3975, "step": 255650 }, { "epoch": 73.55005753739931, "grad_norm": 0.8474783301353455, "learning_rate": 0.0005289988492520138, "loss": 0.3938, "step": 255660 }, { "epoch": 73.55293440736479, "grad_norm": 1.2287434339523315, "learning_rate": 0.0005289413118527043, "loss": 0.4179, "step": 255670 }, { "epoch": 73.55581127733026, "grad_norm": 1.5709670782089233, "learning_rate": 0.0005288837744533947, "loss": 0.5066, "step": 255680 }, { "epoch": 73.55868814729574, "grad_norm": 1.743405818939209, "learning_rate": 0.0005288262370540852, "loss": 0.5055, "step": 255690 }, { "epoch": 73.56156501726122, "grad_norm": 1.712797999382019, "learning_rate": 0.0005287686996547756, "loss": 0.4292, "step": 255700 }, { "epoch": 73.56444188722669, "grad_norm": 1.8701163530349731, "learning_rate": 0.0005287111622554661, "loss": 0.4599, "step": 255710 }, { "epoch": 73.56731875719217, "grad_norm": 1.0972299575805664, "learning_rate": 0.0005286536248561565, "loss": 0.4087, "step": 255720 }, { "epoch": 73.57019562715766, "grad_norm": 1.113830804824829, "learning_rate": 0.000528596087456847, "loss": 0.504, "step": 255730 }, { "epoch": 73.57307249712314, "grad_norm": 1.7922240495681763, "learning_rate": 0.0005285385500575373, "loss": 0.3573, "step": 255740 }, { "epoch": 73.57594936708861, "grad_norm": 1.1043061017990112, "learning_rate": 0.0005284810126582279, "loss": 0.3684, "step": 255750 }, { "epoch": 73.57882623705409, "grad_norm": 1.1559133529663086, "learning_rate": 0.0005284234752589184, "loss": 0.3983, "step": 255760 }, { "epoch": 73.58170310701956, "grad_norm": 1.0988386869430542, "learning_rate": 0.0005283659378596087, "loss": 0.3567, "step": 255770 }, { "epoch": 73.58457997698504, "grad_norm": 0.7146241664886475, "learning_rate": 0.0005283084004602992, "loss": 0.4465, "step": 255780 }, { "epoch": 73.58745684695052, "grad_norm": 1.642262578010559, "learning_rate": 0.0005282508630609897, "loss": 0.3714, "step": 255790 }, { "epoch": 73.59033371691599, "grad_norm": 1.7914854288101196, "learning_rate": 0.0005281933256616801, "loss": 0.3434, "step": 255800 }, { "epoch": 73.59321058688147, "grad_norm": 0.9391005635261536, "learning_rate": 0.0005281357882623705, "loss": 0.48, "step": 255810 }, { "epoch": 73.59608745684694, "grad_norm": 1.39932382106781, "learning_rate": 0.0005280782508630611, "loss": 0.3566, "step": 255820 }, { "epoch": 73.59896432681242, "grad_norm": 0.8534567952156067, "learning_rate": 0.0005280207134637514, "loss": 0.3702, "step": 255830 }, { "epoch": 73.60184119677791, "grad_norm": 1.8706305027008057, "learning_rate": 0.0005279631760644419, "loss": 0.4185, "step": 255840 }, { "epoch": 73.60471806674339, "grad_norm": 1.4278414249420166, "learning_rate": 0.0005279056386651323, "loss": 0.4168, "step": 255850 }, { "epoch": 73.60759493670886, "grad_norm": 1.8191150426864624, "learning_rate": 0.0005278481012658228, "loss": 0.4653, "step": 255860 }, { "epoch": 73.61047180667434, "grad_norm": 1.3772021532058716, "learning_rate": 0.0005277905638665133, "loss": 0.3295, "step": 255870 }, { "epoch": 73.61334867663982, "grad_norm": 1.342447280883789, "learning_rate": 0.0005277330264672037, "loss": 0.4046, "step": 255880 }, { "epoch": 73.61622554660529, "grad_norm": 1.1346205472946167, "learning_rate": 0.0005276754890678941, "loss": 0.4656, "step": 255890 }, { "epoch": 73.61910241657077, "grad_norm": 1.7205075025558472, "learning_rate": 0.0005276179516685846, "loss": 0.4195, "step": 255900 }, { "epoch": 73.62197928653625, "grad_norm": 1.329658031463623, "learning_rate": 0.0005275604142692751, "loss": 0.4128, "step": 255910 }, { "epoch": 73.62485615650172, "grad_norm": 1.514951229095459, "learning_rate": 0.0005275028768699654, "loss": 0.421, "step": 255920 }, { "epoch": 73.6277330264672, "grad_norm": 1.3180466890335083, "learning_rate": 0.000527445339470656, "loss": 0.3819, "step": 255930 }, { "epoch": 73.63060989643269, "grad_norm": 1.4650031328201294, "learning_rate": 0.0005273878020713464, "loss": 0.4234, "step": 255940 }, { "epoch": 73.63348676639816, "grad_norm": 1.0937232971191406, "learning_rate": 0.0005273302646720368, "loss": 0.4137, "step": 255950 }, { "epoch": 73.63636363636364, "grad_norm": 1.094671607017517, "learning_rate": 0.0005272727272727272, "loss": 0.3759, "step": 255960 }, { "epoch": 73.63924050632912, "grad_norm": 1.438305377960205, "learning_rate": 0.0005272151898734178, "loss": 0.3973, "step": 255970 }, { "epoch": 73.64211737629459, "grad_norm": 1.5380456447601318, "learning_rate": 0.0005271576524741082, "loss": 0.3715, "step": 255980 }, { "epoch": 73.64499424626007, "grad_norm": 0.9900350570678711, "learning_rate": 0.0005271001150747986, "loss": 0.3675, "step": 255990 }, { "epoch": 73.64787111622555, "grad_norm": 1.0868123769760132, "learning_rate": 0.0005270425776754892, "loss": 0.4644, "step": 256000 }, { "epoch": 73.65074798619102, "grad_norm": 0.6942666172981262, "learning_rate": 0.0005269850402761795, "loss": 0.3253, "step": 256010 }, { "epoch": 73.6536248561565, "grad_norm": 1.453014850616455, "learning_rate": 0.00052692750287687, "loss": 0.3815, "step": 256020 }, { "epoch": 73.65650172612197, "grad_norm": 0.7614974975585938, "learning_rate": 0.0005268699654775604, "loss": 0.4123, "step": 256030 }, { "epoch": 73.65937859608745, "grad_norm": 1.6564253568649292, "learning_rate": 0.0005268124280782509, "loss": 0.3699, "step": 256040 }, { "epoch": 73.66225546605294, "grad_norm": 1.0141940116882324, "learning_rate": 0.0005267548906789413, "loss": 0.4309, "step": 256050 }, { "epoch": 73.66513233601842, "grad_norm": 1.537079095840454, "learning_rate": 0.0005266973532796318, "loss": 0.3626, "step": 256060 }, { "epoch": 73.66800920598389, "grad_norm": 2.0151917934417725, "learning_rate": 0.0005266398158803221, "loss": 0.4708, "step": 256070 }, { "epoch": 73.67088607594937, "grad_norm": 1.4523258209228516, "learning_rate": 0.0005265822784810127, "loss": 0.3969, "step": 256080 }, { "epoch": 73.67376294591485, "grad_norm": 1.5932860374450684, "learning_rate": 0.0005265247410817032, "loss": 0.3981, "step": 256090 }, { "epoch": 73.67663981588032, "grad_norm": 0.6662424802780151, "learning_rate": 0.0005264672036823935, "loss": 0.4155, "step": 256100 }, { "epoch": 73.6795166858458, "grad_norm": 1.122025489807129, "learning_rate": 0.0005264096662830841, "loss": 0.4563, "step": 256110 }, { "epoch": 73.68239355581127, "grad_norm": 1.0369664430618286, "learning_rate": 0.0005263521288837745, "loss": 0.3898, "step": 256120 }, { "epoch": 73.68527042577675, "grad_norm": 1.583788275718689, "learning_rate": 0.0005262945914844649, "loss": 0.428, "step": 256130 }, { "epoch": 73.68814729574223, "grad_norm": 1.4195581674575806, "learning_rate": 0.0005262370540851553, "loss": 0.3895, "step": 256140 }, { "epoch": 73.69102416570772, "grad_norm": 1.2497504949569702, "learning_rate": 0.0005261795166858459, "loss": 0.3434, "step": 256150 }, { "epoch": 73.69390103567319, "grad_norm": 0.8854733109474182, "learning_rate": 0.0005261219792865362, "loss": 0.3677, "step": 256160 }, { "epoch": 73.69677790563867, "grad_norm": 1.320438265800476, "learning_rate": 0.0005260644418872267, "loss": 0.3794, "step": 256170 }, { "epoch": 73.69965477560415, "grad_norm": 1.1943598985671997, "learning_rate": 0.000526006904487917, "loss": 0.4545, "step": 256180 }, { "epoch": 73.70253164556962, "grad_norm": 1.3336689472198486, "learning_rate": 0.0005259493670886076, "loss": 0.3614, "step": 256190 }, { "epoch": 73.7054085155351, "grad_norm": 1.1115490198135376, "learning_rate": 0.0005258918296892981, "loss": 0.4885, "step": 256200 }, { "epoch": 73.70828538550057, "grad_norm": 1.4529143571853638, "learning_rate": 0.0005258342922899884, "loss": 0.3405, "step": 256210 }, { "epoch": 73.71116225546605, "grad_norm": 1.598905086517334, "learning_rate": 0.000525776754890679, "loss": 0.3578, "step": 256220 }, { "epoch": 73.71403912543153, "grad_norm": 1.259554386138916, "learning_rate": 0.0005257192174913694, "loss": 0.4485, "step": 256230 }, { "epoch": 73.716915995397, "grad_norm": 1.2469044923782349, "learning_rate": 0.0005256616800920598, "loss": 0.5148, "step": 256240 }, { "epoch": 73.71979286536248, "grad_norm": 1.8808016777038574, "learning_rate": 0.0005256041426927502, "loss": 0.3857, "step": 256250 }, { "epoch": 73.72266973532797, "grad_norm": 0.8980342745780945, "learning_rate": 0.0005255466052934408, "loss": 0.3195, "step": 256260 }, { "epoch": 73.72554660529345, "grad_norm": 0.9028326869010925, "learning_rate": 0.0005254890678941311, "loss": 0.3599, "step": 256270 }, { "epoch": 73.72842347525892, "grad_norm": 1.0969358682632446, "learning_rate": 0.0005254315304948216, "loss": 0.4358, "step": 256280 }, { "epoch": 73.7313003452244, "grad_norm": 1.0337363481521606, "learning_rate": 0.0005253739930955122, "loss": 0.3799, "step": 256290 }, { "epoch": 73.73417721518987, "grad_norm": 1.529631495475769, "learning_rate": 0.0005253164556962025, "loss": 0.3758, "step": 256300 }, { "epoch": 73.73705408515535, "grad_norm": 1.1656608581542969, "learning_rate": 0.000525258918296893, "loss": 0.3402, "step": 256310 }, { "epoch": 73.73993095512083, "grad_norm": 2.100600004196167, "learning_rate": 0.0005252013808975834, "loss": 0.4258, "step": 256320 }, { "epoch": 73.7428078250863, "grad_norm": 1.19321608543396, "learning_rate": 0.0005251438434982739, "loss": 0.4578, "step": 256330 }, { "epoch": 73.74568469505178, "grad_norm": 1.0271722078323364, "learning_rate": 0.0005250863060989643, "loss": 0.4474, "step": 256340 }, { "epoch": 73.74856156501725, "grad_norm": 1.8083664178848267, "learning_rate": 0.0005250287686996548, "loss": 0.3852, "step": 256350 }, { "epoch": 73.75143843498275, "grad_norm": 2.6585426330566406, "learning_rate": 0.0005249712313003451, "loss": 0.361, "step": 256360 }, { "epoch": 73.75431530494822, "grad_norm": 1.4011281728744507, "learning_rate": 0.0005249136939010357, "loss": 0.4313, "step": 256370 }, { "epoch": 73.7571921749137, "grad_norm": 0.8984555602073669, "learning_rate": 0.0005248561565017262, "loss": 0.4712, "step": 256380 }, { "epoch": 73.76006904487917, "grad_norm": 1.1903796195983887, "learning_rate": 0.0005247986191024165, "loss": 0.3815, "step": 256390 }, { "epoch": 73.76294591484465, "grad_norm": 0.8800302743911743, "learning_rate": 0.0005247410817031071, "loss": 0.3128, "step": 256400 }, { "epoch": 73.76582278481013, "grad_norm": 1.8502517938613892, "learning_rate": 0.0005246835443037975, "loss": 0.4453, "step": 256410 }, { "epoch": 73.7686996547756, "grad_norm": 0.9184730052947998, "learning_rate": 0.0005246260069044879, "loss": 0.4352, "step": 256420 }, { "epoch": 73.77157652474108, "grad_norm": 1.4532254934310913, "learning_rate": 0.0005245684695051783, "loss": 0.4265, "step": 256430 }, { "epoch": 73.77445339470655, "grad_norm": 1.537838101387024, "learning_rate": 0.0005245109321058689, "loss": 0.3994, "step": 256440 }, { "epoch": 73.77733026467203, "grad_norm": 1.9767731428146362, "learning_rate": 0.0005244533947065592, "loss": 0.5374, "step": 256450 }, { "epoch": 73.78020713463752, "grad_norm": 1.9540830850601196, "learning_rate": 0.0005243958573072497, "loss": 0.4201, "step": 256460 }, { "epoch": 73.783084004603, "grad_norm": 2.0513150691986084, "learning_rate": 0.0005243383199079402, "loss": 0.4597, "step": 256470 }, { "epoch": 73.78596087456847, "grad_norm": 1.900605320930481, "learning_rate": 0.0005242807825086306, "loss": 0.3871, "step": 256480 }, { "epoch": 73.78883774453395, "grad_norm": 1.422045350074768, "learning_rate": 0.0005242232451093211, "loss": 0.3317, "step": 256490 }, { "epoch": 73.79171461449943, "grad_norm": 0.8239416480064392, "learning_rate": 0.0005241657077100115, "loss": 0.3175, "step": 256500 }, { "epoch": 73.7945914844649, "grad_norm": 1.1761384010314941, "learning_rate": 0.000524108170310702, "loss": 0.3471, "step": 256510 }, { "epoch": 73.79746835443038, "grad_norm": 1.1205723285675049, "learning_rate": 0.0005240506329113924, "loss": 0.3278, "step": 256520 }, { "epoch": 73.80034522439585, "grad_norm": 2.1595466136932373, "learning_rate": 0.0005239930955120829, "loss": 0.3829, "step": 256530 }, { "epoch": 73.80322209436133, "grad_norm": 1.1001967191696167, "learning_rate": 0.0005239355581127732, "loss": 0.3414, "step": 256540 }, { "epoch": 73.80609896432681, "grad_norm": 2.124617338180542, "learning_rate": 0.0005238780207134638, "loss": 0.457, "step": 256550 }, { "epoch": 73.80897583429228, "grad_norm": 0.9101315140724182, "learning_rate": 0.0005238204833141543, "loss": 0.3212, "step": 256560 }, { "epoch": 73.81185270425777, "grad_norm": 1.4483641386032104, "learning_rate": 0.0005237629459148446, "loss": 0.4017, "step": 256570 }, { "epoch": 73.81472957422325, "grad_norm": 1.7848926782608032, "learning_rate": 0.000523705408515535, "loss": 0.407, "step": 256580 }, { "epoch": 73.81760644418873, "grad_norm": 2.18957257270813, "learning_rate": 0.0005236478711162256, "loss": 0.3893, "step": 256590 }, { "epoch": 73.8204833141542, "grad_norm": 1.0498957633972168, "learning_rate": 0.000523590333716916, "loss": 0.309, "step": 256600 }, { "epoch": 73.82336018411968, "grad_norm": 1.105769395828247, "learning_rate": 0.0005235327963176064, "loss": 0.3986, "step": 256610 }, { "epoch": 73.82623705408515, "grad_norm": 1.1228758096694946, "learning_rate": 0.000523475258918297, "loss": 0.4182, "step": 256620 }, { "epoch": 73.82911392405063, "grad_norm": 1.013697862625122, "learning_rate": 0.0005234177215189873, "loss": 0.4379, "step": 256630 }, { "epoch": 73.83199079401611, "grad_norm": 0.8423892855644226, "learning_rate": 0.0005233601841196778, "loss": 0.3459, "step": 256640 }, { "epoch": 73.83486766398158, "grad_norm": 0.9303702116012573, "learning_rate": 0.0005233026467203682, "loss": 0.3866, "step": 256650 }, { "epoch": 73.83774453394706, "grad_norm": 2.246337652206421, "learning_rate": 0.0005232451093210587, "loss": 0.4714, "step": 256660 }, { "epoch": 73.84062140391255, "grad_norm": 0.8849116563796997, "learning_rate": 0.0005231875719217492, "loss": 0.3811, "step": 256670 }, { "epoch": 73.84349827387803, "grad_norm": 1.4703364372253418, "learning_rate": 0.0005231300345224396, "loss": 0.4428, "step": 256680 }, { "epoch": 73.8463751438435, "grad_norm": 1.561063528060913, "learning_rate": 0.0005230724971231301, "loss": 0.4361, "step": 256690 }, { "epoch": 73.84925201380898, "grad_norm": 1.5980830192565918, "learning_rate": 0.0005230149597238205, "loss": 0.4249, "step": 256700 }, { "epoch": 73.85212888377445, "grad_norm": 1.2873879671096802, "learning_rate": 0.000522957422324511, "loss": 0.4482, "step": 256710 }, { "epoch": 73.85500575373993, "grad_norm": 1.629265308380127, "learning_rate": 0.0005228998849252013, "loss": 0.4584, "step": 256720 }, { "epoch": 73.85788262370541, "grad_norm": 1.0902878046035767, "learning_rate": 0.0005228423475258919, "loss": 0.3891, "step": 256730 }, { "epoch": 73.86075949367088, "grad_norm": 1.1048569679260254, "learning_rate": 0.0005227848101265823, "loss": 0.4115, "step": 256740 }, { "epoch": 73.86363636363636, "grad_norm": 1.530432939529419, "learning_rate": 0.0005227272727272727, "loss": 0.479, "step": 256750 }, { "epoch": 73.86651323360184, "grad_norm": 1.0747278928756714, "learning_rate": 0.0005226697353279631, "loss": 0.3939, "step": 256760 }, { "epoch": 73.86939010356731, "grad_norm": 0.6161375045776367, "learning_rate": 0.0005226121979286537, "loss": 0.3515, "step": 256770 }, { "epoch": 73.8722669735328, "grad_norm": 2.2673187255859375, "learning_rate": 0.0005225546605293441, "loss": 0.4386, "step": 256780 }, { "epoch": 73.87514384349828, "grad_norm": 0.5625331401824951, "learning_rate": 0.0005224971231300345, "loss": 0.4655, "step": 256790 }, { "epoch": 73.87802071346375, "grad_norm": 1.0015414953231812, "learning_rate": 0.0005224395857307251, "loss": 0.3702, "step": 256800 }, { "epoch": 73.88089758342923, "grad_norm": 1.8549914360046387, "learning_rate": 0.0005223820483314154, "loss": 0.412, "step": 256810 }, { "epoch": 73.88377445339471, "grad_norm": 1.4089003801345825, "learning_rate": 0.0005223245109321059, "loss": 0.3746, "step": 256820 }, { "epoch": 73.88665132336018, "grad_norm": 1.0449477434158325, "learning_rate": 0.0005222669735327963, "loss": 0.3554, "step": 256830 }, { "epoch": 73.88952819332566, "grad_norm": 0.6980214715003967, "learning_rate": 0.0005222094361334868, "loss": 0.4237, "step": 256840 }, { "epoch": 73.89240506329114, "grad_norm": 0.6213799118995667, "learning_rate": 0.0005221518987341772, "loss": 0.3658, "step": 256850 }, { "epoch": 73.89528193325661, "grad_norm": 1.2862091064453125, "learning_rate": 0.0005220943613348677, "loss": 0.3482, "step": 256860 }, { "epoch": 73.89815880322209, "grad_norm": 0.9527751803398132, "learning_rate": 0.000522036823935558, "loss": 0.3881, "step": 256870 }, { "epoch": 73.90103567318758, "grad_norm": 1.2541247606277466, "learning_rate": 0.0005219792865362486, "loss": 0.4823, "step": 256880 }, { "epoch": 73.90391254315306, "grad_norm": 1.0154842138290405, "learning_rate": 0.0005219217491369391, "loss": 0.3529, "step": 256890 }, { "epoch": 73.90678941311853, "grad_norm": 1.272481083869934, "learning_rate": 0.0005218642117376294, "loss": 0.4111, "step": 256900 }, { "epoch": 73.90966628308401, "grad_norm": 1.0014684200286865, "learning_rate": 0.00052180667433832, "loss": 0.3552, "step": 256910 }, { "epoch": 73.91254315304948, "grad_norm": 1.0497795343399048, "learning_rate": 0.0005217491369390104, "loss": 0.4719, "step": 256920 }, { "epoch": 73.91542002301496, "grad_norm": 1.9884772300720215, "learning_rate": 0.0005216915995397008, "loss": 0.482, "step": 256930 }, { "epoch": 73.91829689298044, "grad_norm": 1.0708770751953125, "learning_rate": 0.0005216340621403912, "loss": 0.3494, "step": 256940 }, { "epoch": 73.92117376294591, "grad_norm": 1.3768985271453857, "learning_rate": 0.0005215765247410818, "loss": 0.3629, "step": 256950 }, { "epoch": 73.92405063291139, "grad_norm": 1.155771255493164, "learning_rate": 0.0005215189873417721, "loss": 0.4118, "step": 256960 }, { "epoch": 73.92692750287686, "grad_norm": 0.805748462677002, "learning_rate": 0.0005214614499424626, "loss": 0.471, "step": 256970 }, { "epoch": 73.92980437284234, "grad_norm": 1.766340970993042, "learning_rate": 0.0005214039125431532, "loss": 0.4793, "step": 256980 }, { "epoch": 73.93268124280783, "grad_norm": 2.0348947048187256, "learning_rate": 0.0005213463751438435, "loss": 0.4308, "step": 256990 }, { "epoch": 73.93555811277331, "grad_norm": 1.5043643712997437, "learning_rate": 0.000521288837744534, "loss": 0.4146, "step": 257000 }, { "epoch": 73.93843498273878, "grad_norm": 2.23068904876709, "learning_rate": 0.0005212313003452243, "loss": 0.3896, "step": 257010 }, { "epoch": 73.94131185270426, "grad_norm": 0.8468416333198547, "learning_rate": 0.0005211737629459149, "loss": 0.3754, "step": 257020 }, { "epoch": 73.94418872266974, "grad_norm": 1.0741277933120728, "learning_rate": 0.0005211162255466053, "loss": 0.4292, "step": 257030 }, { "epoch": 73.94706559263521, "grad_norm": 1.4097758531570435, "learning_rate": 0.0005210586881472957, "loss": 0.3354, "step": 257040 }, { "epoch": 73.94994246260069, "grad_norm": 1.5954934358596802, "learning_rate": 0.0005210011507479861, "loss": 0.3769, "step": 257050 }, { "epoch": 73.95281933256616, "grad_norm": 0.943229079246521, "learning_rate": 0.0005209436133486767, "loss": 0.3199, "step": 257060 }, { "epoch": 73.95569620253164, "grad_norm": 1.0238591432571411, "learning_rate": 0.000520886075949367, "loss": 0.4105, "step": 257070 }, { "epoch": 73.95857307249712, "grad_norm": 1.9157538414001465, "learning_rate": 0.0005208285385500575, "loss": 0.3987, "step": 257080 }, { "epoch": 73.96144994246261, "grad_norm": 0.9057467579841614, "learning_rate": 0.0005207710011507481, "loss": 0.3692, "step": 257090 }, { "epoch": 73.96432681242808, "grad_norm": 0.9663063883781433, "learning_rate": 0.0005207134637514384, "loss": 0.3595, "step": 257100 }, { "epoch": 73.96720368239356, "grad_norm": 1.5953199863433838, "learning_rate": 0.0005206559263521289, "loss": 0.3555, "step": 257110 }, { "epoch": 73.97008055235904, "grad_norm": 1.3966447114944458, "learning_rate": 0.0005205983889528193, "loss": 0.4264, "step": 257120 }, { "epoch": 73.97295742232451, "grad_norm": 1.8028923273086548, "learning_rate": 0.0005205408515535098, "loss": 0.4756, "step": 257130 }, { "epoch": 73.97583429228999, "grad_norm": 0.8144392371177673, "learning_rate": 0.0005204833141542002, "loss": 0.3762, "step": 257140 }, { "epoch": 73.97871116225546, "grad_norm": 0.9381827712059021, "learning_rate": 0.0005204257767548907, "loss": 0.4413, "step": 257150 }, { "epoch": 73.98158803222094, "grad_norm": 1.5505144596099854, "learning_rate": 0.000520368239355581, "loss": 0.4099, "step": 257160 }, { "epoch": 73.98446490218642, "grad_norm": 1.8332420587539673, "learning_rate": 0.0005203107019562716, "loss": 0.5171, "step": 257170 }, { "epoch": 73.9873417721519, "grad_norm": 0.75481116771698, "learning_rate": 0.0005202531645569621, "loss": 0.4669, "step": 257180 }, { "epoch": 73.99021864211737, "grad_norm": 1.0630097389221191, "learning_rate": 0.0005201956271576524, "loss": 0.3723, "step": 257190 }, { "epoch": 73.99309551208286, "grad_norm": 1.5198060274124146, "learning_rate": 0.000520138089758343, "loss": 0.3743, "step": 257200 }, { "epoch": 73.99597238204834, "grad_norm": 1.3332324028015137, "learning_rate": 0.0005200805523590334, "loss": 0.372, "step": 257210 }, { "epoch": 73.99884925201381, "grad_norm": 0.7806288003921509, "learning_rate": 0.0005200230149597238, "loss": 0.3342, "step": 257220 }, { "epoch": 74.00172612197929, "grad_norm": 0.9355313181877136, "learning_rate": 0.0005199654775604142, "loss": 0.365, "step": 257230 }, { "epoch": 74.00460299194476, "grad_norm": 1.3820863962173462, "learning_rate": 0.0005199079401611048, "loss": 0.3412, "step": 257240 }, { "epoch": 74.00747986191024, "grad_norm": 1.1879314184188843, "learning_rate": 0.0005198504027617951, "loss": 0.3676, "step": 257250 }, { "epoch": 74.01035673187572, "grad_norm": 1.110791563987732, "learning_rate": 0.0005197928653624856, "loss": 0.3641, "step": 257260 }, { "epoch": 74.0132336018412, "grad_norm": 0.9743184447288513, "learning_rate": 0.0005197353279631762, "loss": 0.4284, "step": 257270 }, { "epoch": 74.01611047180667, "grad_norm": 1.8321465253829956, "learning_rate": 0.0005196777905638665, "loss": 0.392, "step": 257280 }, { "epoch": 74.01898734177215, "grad_norm": 1.1864093542099, "learning_rate": 0.000519620253164557, "loss": 0.3418, "step": 257290 }, { "epoch": 74.02186421173764, "grad_norm": 1.4851787090301514, "learning_rate": 0.0005195627157652474, "loss": 0.3049, "step": 257300 }, { "epoch": 74.02474108170311, "grad_norm": 1.159484624862671, "learning_rate": 0.0005195051783659379, "loss": 0.3502, "step": 257310 }, { "epoch": 74.02761795166859, "grad_norm": 0.8783020377159119, "learning_rate": 0.0005194476409666283, "loss": 0.3708, "step": 257320 }, { "epoch": 74.03049482163406, "grad_norm": 1.3876105546951294, "learning_rate": 0.0005193901035673188, "loss": 0.37, "step": 257330 }, { "epoch": 74.03337169159954, "grad_norm": 0.9471389651298523, "learning_rate": 0.0005193325661680091, "loss": 0.3703, "step": 257340 }, { "epoch": 74.03624856156502, "grad_norm": 1.3757988214492798, "learning_rate": 0.0005192750287686997, "loss": 0.3445, "step": 257350 }, { "epoch": 74.0391254315305, "grad_norm": 1.2029001712799072, "learning_rate": 0.0005192174913693901, "loss": 0.3739, "step": 257360 }, { "epoch": 74.04200230149597, "grad_norm": 0.7561125159263611, "learning_rate": 0.0005191599539700805, "loss": 0.353, "step": 257370 }, { "epoch": 74.04487917146145, "grad_norm": 0.7920980453491211, "learning_rate": 0.0005191024165707711, "loss": 0.3678, "step": 257380 }, { "epoch": 74.04775604142692, "grad_norm": 1.6184020042419434, "learning_rate": 0.0005190448791714615, "loss": 0.4958, "step": 257390 }, { "epoch": 74.0506329113924, "grad_norm": 1.5553890466690063, "learning_rate": 0.0005189873417721519, "loss": 0.3668, "step": 257400 }, { "epoch": 74.05350978135789, "grad_norm": 1.131990909576416, "learning_rate": 0.0005189298043728423, "loss": 0.322, "step": 257410 }, { "epoch": 74.05638665132336, "grad_norm": 1.0399354696273804, "learning_rate": 0.0005188722669735329, "loss": 0.4358, "step": 257420 }, { "epoch": 74.05926352128884, "grad_norm": 2.9224250316619873, "learning_rate": 0.0005188147295742232, "loss": 0.4183, "step": 257430 }, { "epoch": 74.06214039125432, "grad_norm": 1.7052891254425049, "learning_rate": 0.0005187571921749137, "loss": 0.3076, "step": 257440 }, { "epoch": 74.0650172612198, "grad_norm": 0.769254744052887, "learning_rate": 0.0005186996547756041, "loss": 0.3704, "step": 257450 }, { "epoch": 74.06789413118527, "grad_norm": 0.6849994659423828, "learning_rate": 0.0005186421173762946, "loss": 0.4235, "step": 257460 }, { "epoch": 74.07077100115075, "grad_norm": 1.003291130065918, "learning_rate": 0.000518584579976985, "loss": 0.3295, "step": 257470 }, { "epoch": 74.07364787111622, "grad_norm": 2.048166036605835, "learning_rate": 0.0005185270425776755, "loss": 0.3904, "step": 257480 }, { "epoch": 74.0765247410817, "grad_norm": 0.9277427196502686, "learning_rate": 0.000518469505178366, "loss": 0.338, "step": 257490 }, { "epoch": 74.07940161104717, "grad_norm": 1.9284476041793823, "learning_rate": 0.0005184119677790564, "loss": 0.407, "step": 257500 }, { "epoch": 74.08227848101266, "grad_norm": 0.7088039517402649, "learning_rate": 0.0005183544303797469, "loss": 0.3548, "step": 257510 }, { "epoch": 74.08515535097814, "grad_norm": 1.5555272102355957, "learning_rate": 0.0005182968929804372, "loss": 0.3434, "step": 257520 }, { "epoch": 74.08803222094362, "grad_norm": 1.0882248878479004, "learning_rate": 0.0005182393555811278, "loss": 0.3593, "step": 257530 }, { "epoch": 74.0909090909091, "grad_norm": 1.596400260925293, "learning_rate": 0.0005181818181818182, "loss": 0.3403, "step": 257540 }, { "epoch": 74.09378596087457, "grad_norm": 1.1227812767028809, "learning_rate": 0.0005181242807825086, "loss": 0.3725, "step": 257550 }, { "epoch": 74.09666283084005, "grad_norm": 1.0590447187423706, "learning_rate": 0.000518066743383199, "loss": 0.3355, "step": 257560 }, { "epoch": 74.09953970080552, "grad_norm": 2.1011922359466553, "learning_rate": 0.0005180092059838896, "loss": 0.3856, "step": 257570 }, { "epoch": 74.102416570771, "grad_norm": 1.16274893283844, "learning_rate": 0.00051795166858458, "loss": 0.388, "step": 257580 }, { "epoch": 74.10529344073647, "grad_norm": 1.7048949003219604, "learning_rate": 0.0005178941311852704, "loss": 0.4371, "step": 257590 }, { "epoch": 74.10817031070195, "grad_norm": 2.5117526054382324, "learning_rate": 0.000517836593785961, "loss": 0.4061, "step": 257600 }, { "epoch": 74.11104718066743, "grad_norm": 1.560339093208313, "learning_rate": 0.0005177790563866513, "loss": 0.3917, "step": 257610 }, { "epoch": 74.11392405063292, "grad_norm": 1.3793349266052246, "learning_rate": 0.0005177215189873418, "loss": 0.2891, "step": 257620 }, { "epoch": 74.1168009205984, "grad_norm": 1.0369561910629272, "learning_rate": 0.0005176639815880322, "loss": 0.3164, "step": 257630 }, { "epoch": 74.11967779056387, "grad_norm": 1.2727117538452148, "learning_rate": 0.0005176064441887227, "loss": 0.3524, "step": 257640 }, { "epoch": 74.12255466052935, "grad_norm": 1.5470832586288452, "learning_rate": 0.0005175489067894131, "loss": 0.3984, "step": 257650 }, { "epoch": 74.12543153049482, "grad_norm": 1.8470652103424072, "learning_rate": 0.0005174913693901036, "loss": 0.3763, "step": 257660 }, { "epoch": 74.1283084004603, "grad_norm": 5.79533052444458, "learning_rate": 0.000517433831990794, "loss": 0.3602, "step": 257670 }, { "epoch": 74.13118527042577, "grad_norm": 0.6979092359542847, "learning_rate": 0.0005173762945914845, "loss": 0.4787, "step": 257680 }, { "epoch": 74.13406214039125, "grad_norm": 1.6102344989776611, "learning_rate": 0.000517318757192175, "loss": 0.3252, "step": 257690 }, { "epoch": 74.13693901035673, "grad_norm": 1.0049442052841187, "learning_rate": 0.0005172612197928653, "loss": 0.3791, "step": 257700 }, { "epoch": 74.1398158803222, "grad_norm": 0.8789691925048828, "learning_rate": 0.0005172036823935559, "loss": 0.3507, "step": 257710 }, { "epoch": 74.1426927502877, "grad_norm": 1.3263036012649536, "learning_rate": 0.0005171461449942463, "loss": 0.3421, "step": 257720 }, { "epoch": 74.14556962025317, "grad_norm": 0.8065228462219238, "learning_rate": 0.0005170886075949367, "loss": 0.3391, "step": 257730 }, { "epoch": 74.14844649021865, "grad_norm": 1.1635440587997437, "learning_rate": 0.0005170310701956271, "loss": 0.3318, "step": 257740 }, { "epoch": 74.15132336018412, "grad_norm": 2.0982534885406494, "learning_rate": 0.0005169735327963177, "loss": 0.3567, "step": 257750 }, { "epoch": 74.1542002301496, "grad_norm": 0.950762927532196, "learning_rate": 0.000516915995397008, "loss": 0.445, "step": 257760 }, { "epoch": 74.15707710011507, "grad_norm": 1.774130940437317, "learning_rate": 0.0005168584579976985, "loss": 0.3652, "step": 257770 }, { "epoch": 74.15995397008055, "grad_norm": 1.318065881729126, "learning_rate": 0.0005168009205983891, "loss": 0.3802, "step": 257780 }, { "epoch": 74.16283084004603, "grad_norm": 1.4064667224884033, "learning_rate": 0.0005167433831990794, "loss": 0.397, "step": 257790 }, { "epoch": 74.1657077100115, "grad_norm": 1.1008262634277344, "learning_rate": 0.0005166858457997699, "loss": 0.3194, "step": 257800 }, { "epoch": 74.16858457997698, "grad_norm": 1.8007965087890625, "learning_rate": 0.0005166283084004602, "loss": 0.4427, "step": 257810 }, { "epoch": 74.17146144994246, "grad_norm": 1.0946000814437866, "learning_rate": 0.0005165707710011508, "loss": 0.4303, "step": 257820 }, { "epoch": 74.17433831990795, "grad_norm": 0.5444265604019165, "learning_rate": 0.0005165132336018412, "loss": 0.3211, "step": 257830 }, { "epoch": 74.17721518987342, "grad_norm": 1.0983420610427856, "learning_rate": 0.0005164556962025316, "loss": 0.3861, "step": 257840 }, { "epoch": 74.1800920598389, "grad_norm": 1.9260411262512207, "learning_rate": 0.000516398158803222, "loss": 0.4178, "step": 257850 }, { "epoch": 74.18296892980437, "grad_norm": 0.8264568448066711, "learning_rate": 0.0005163406214039126, "loss": 0.3366, "step": 257860 }, { "epoch": 74.18584579976985, "grad_norm": 1.0856280326843262, "learning_rate": 0.000516283084004603, "loss": 0.3445, "step": 257870 }, { "epoch": 74.18872266973533, "grad_norm": 1.3436270952224731, "learning_rate": 0.0005162255466052934, "loss": 0.4218, "step": 257880 }, { "epoch": 74.1915995397008, "grad_norm": 1.8481135368347168, "learning_rate": 0.000516168009205984, "loss": 0.4443, "step": 257890 }, { "epoch": 74.19447640966628, "grad_norm": 1.2661840915679932, "learning_rate": 0.0005161104718066743, "loss": 0.3551, "step": 257900 }, { "epoch": 74.19735327963176, "grad_norm": 1.6078953742980957, "learning_rate": 0.0005160529344073648, "loss": 0.394, "step": 257910 }, { "epoch": 74.20023014959723, "grad_norm": 1.0122005939483643, "learning_rate": 0.0005159953970080552, "loss": 0.3611, "step": 257920 }, { "epoch": 74.20310701956272, "grad_norm": 1.3028117418289185, "learning_rate": 0.0005159378596087457, "loss": 0.4697, "step": 257930 }, { "epoch": 74.2059838895282, "grad_norm": 1.7138069868087769, "learning_rate": 0.0005158803222094361, "loss": 0.4165, "step": 257940 }, { "epoch": 74.20886075949367, "grad_norm": 0.9421430230140686, "learning_rate": 0.0005158227848101266, "loss": 0.448, "step": 257950 }, { "epoch": 74.21173762945915, "grad_norm": 0.9233113527297974, "learning_rate": 0.000515765247410817, "loss": 0.4162, "step": 257960 }, { "epoch": 74.21461449942463, "grad_norm": 1.1692625284194946, "learning_rate": 0.0005157077100115075, "loss": 0.4316, "step": 257970 }, { "epoch": 74.2174913693901, "grad_norm": 0.710004985332489, "learning_rate": 0.000515650172612198, "loss": 0.4081, "step": 257980 }, { "epoch": 74.22036823935558, "grad_norm": 1.3449968099594116, "learning_rate": 0.0005155926352128883, "loss": 0.5364, "step": 257990 }, { "epoch": 74.22324510932106, "grad_norm": 1.6646842956542969, "learning_rate": 0.0005155350978135789, "loss": 0.4233, "step": 258000 }, { "epoch": 74.22612197928653, "grad_norm": 1.2415086030960083, "learning_rate": 0.0005154775604142693, "loss": 0.321, "step": 258010 }, { "epoch": 74.22899884925201, "grad_norm": 1.1762746572494507, "learning_rate": 0.0005154200230149597, "loss": 0.4063, "step": 258020 }, { "epoch": 74.23187571921748, "grad_norm": 0.6126918792724609, "learning_rate": 0.0005153624856156501, "loss": 0.3923, "step": 258030 }, { "epoch": 74.23475258918297, "grad_norm": 0.9096189737319946, "learning_rate": 0.0005153049482163407, "loss": 0.3565, "step": 258040 }, { "epoch": 74.23762945914845, "grad_norm": 0.724774181842804, "learning_rate": 0.000515247410817031, "loss": 0.3913, "step": 258050 }, { "epoch": 74.24050632911393, "grad_norm": 1.3266974687576294, "learning_rate": 0.0005151898734177215, "loss": 0.3937, "step": 258060 }, { "epoch": 74.2433831990794, "grad_norm": 0.759443998336792, "learning_rate": 0.0005151323360184121, "loss": 0.3889, "step": 258070 }, { "epoch": 74.24626006904488, "grad_norm": 1.3850677013397217, "learning_rate": 0.0005150747986191024, "loss": 0.3176, "step": 258080 }, { "epoch": 74.24913693901036, "grad_norm": 0.9560301899909973, "learning_rate": 0.0005150172612197929, "loss": 0.3825, "step": 258090 }, { "epoch": 74.25201380897583, "grad_norm": 1.1414402723312378, "learning_rate": 0.0005149597238204833, "loss": 0.3459, "step": 258100 }, { "epoch": 74.25489067894131, "grad_norm": 0.716200590133667, "learning_rate": 0.0005149021864211738, "loss": 0.3511, "step": 258110 }, { "epoch": 74.25776754890678, "grad_norm": 1.5800782442092896, "learning_rate": 0.0005148446490218642, "loss": 0.3687, "step": 258120 }, { "epoch": 74.26064441887226, "grad_norm": 1.5992518663406372, "learning_rate": 0.0005147871116225547, "loss": 0.4158, "step": 258130 }, { "epoch": 74.26352128883775, "grad_norm": 1.0156501531600952, "learning_rate": 0.000514729574223245, "loss": 0.3866, "step": 258140 }, { "epoch": 74.26639815880323, "grad_norm": 1.170952558517456, "learning_rate": 0.0005146720368239356, "loss": 0.4631, "step": 258150 }, { "epoch": 74.2692750287687, "grad_norm": 1.1798672676086426, "learning_rate": 0.000514614499424626, "loss": 0.3246, "step": 258160 }, { "epoch": 74.27215189873418, "grad_norm": 1.2617957592010498, "learning_rate": 0.0005145569620253164, "loss": 0.333, "step": 258170 }, { "epoch": 74.27502876869966, "grad_norm": 1.3490140438079834, "learning_rate": 0.000514499424626007, "loss": 0.3554, "step": 258180 }, { "epoch": 74.27790563866513, "grad_norm": 0.7974257469177246, "learning_rate": 0.0005144418872266974, "loss": 0.3771, "step": 258190 }, { "epoch": 74.28078250863061, "grad_norm": 1.5723894834518433, "learning_rate": 0.0005143843498273878, "loss": 0.4248, "step": 258200 }, { "epoch": 74.28365937859608, "grad_norm": 1.3100148439407349, "learning_rate": 0.0005143268124280782, "loss": 0.3721, "step": 258210 }, { "epoch": 74.28653624856156, "grad_norm": 2.1169795989990234, "learning_rate": 0.0005142692750287688, "loss": 0.4041, "step": 258220 }, { "epoch": 74.28941311852704, "grad_norm": 1.2391791343688965, "learning_rate": 0.0005142117376294591, "loss": 0.3566, "step": 258230 }, { "epoch": 74.29228998849253, "grad_norm": 1.7669366598129272, "learning_rate": 0.0005141542002301496, "loss": 0.4497, "step": 258240 }, { "epoch": 74.295166858458, "grad_norm": 1.0976008176803589, "learning_rate": 0.0005140966628308401, "loss": 0.375, "step": 258250 }, { "epoch": 74.29804372842348, "grad_norm": 0.5820229053497314, "learning_rate": 0.0005140391254315305, "loss": 0.5127, "step": 258260 }, { "epoch": 74.30092059838896, "grad_norm": 1.818437933921814, "learning_rate": 0.000513981588032221, "loss": 0.3858, "step": 258270 }, { "epoch": 74.30379746835443, "grad_norm": 1.3446999788284302, "learning_rate": 0.0005139240506329114, "loss": 0.3809, "step": 258280 }, { "epoch": 74.30667433831991, "grad_norm": 0.8817952275276184, "learning_rate": 0.0005138665132336019, "loss": 0.3967, "step": 258290 }, { "epoch": 74.30955120828538, "grad_norm": 0.8200816512107849, "learning_rate": 0.0005138089758342923, "loss": 0.4015, "step": 258300 }, { "epoch": 74.31242807825086, "grad_norm": 1.2929010391235352, "learning_rate": 0.0005137514384349828, "loss": 0.4185, "step": 258310 }, { "epoch": 74.31530494821634, "grad_norm": 1.5143882036209106, "learning_rate": 0.0005136939010356731, "loss": 0.3724, "step": 258320 }, { "epoch": 74.31818181818181, "grad_norm": 1.9243887662887573, "learning_rate": 0.0005136363636363637, "loss": 0.3774, "step": 258330 }, { "epoch": 74.32105868814729, "grad_norm": 0.6983250975608826, "learning_rate": 0.0005135788262370541, "loss": 0.4672, "step": 258340 }, { "epoch": 74.32393555811278, "grad_norm": 1.334246277809143, "learning_rate": 0.0005135212888377445, "loss": 0.3978, "step": 258350 }, { "epoch": 74.32681242807826, "grad_norm": 1.1077334880828857, "learning_rate": 0.000513463751438435, "loss": 0.4068, "step": 258360 }, { "epoch": 74.32968929804373, "grad_norm": 0.8300569653511047, "learning_rate": 0.0005134062140391255, "loss": 0.3642, "step": 258370 }, { "epoch": 74.33256616800921, "grad_norm": 1.3223193883895874, "learning_rate": 0.0005133486766398159, "loss": 0.4091, "step": 258380 }, { "epoch": 74.33544303797468, "grad_norm": 0.8338025808334351, "learning_rate": 0.0005132911392405063, "loss": 0.3725, "step": 258390 }, { "epoch": 74.33831990794016, "grad_norm": 0.7145915031433105, "learning_rate": 0.0005132336018411969, "loss": 0.373, "step": 258400 }, { "epoch": 74.34119677790564, "grad_norm": 3.8365914821624756, "learning_rate": 0.0005131760644418872, "loss": 0.4331, "step": 258410 }, { "epoch": 74.34407364787111, "grad_norm": 1.3418604135513306, "learning_rate": 0.0005131185270425777, "loss": 0.4046, "step": 258420 }, { "epoch": 74.34695051783659, "grad_norm": 1.0655728578567505, "learning_rate": 0.0005130609896432681, "loss": 0.342, "step": 258430 }, { "epoch": 74.34982738780207, "grad_norm": 1.2746578454971313, "learning_rate": 0.0005130034522439586, "loss": 0.4005, "step": 258440 }, { "epoch": 74.35270425776756, "grad_norm": 0.8450138568878174, "learning_rate": 0.000512945914844649, "loss": 0.3621, "step": 258450 }, { "epoch": 74.35558112773303, "grad_norm": 1.112158179283142, "learning_rate": 0.0005128883774453395, "loss": 0.4221, "step": 258460 }, { "epoch": 74.35845799769851, "grad_norm": 0.8621823787689209, "learning_rate": 0.00051283084004603, "loss": 0.3568, "step": 258470 }, { "epoch": 74.36133486766398, "grad_norm": 1.0197309255599976, "learning_rate": 0.0005127733026467204, "loss": 0.3708, "step": 258480 }, { "epoch": 74.36421173762946, "grad_norm": 1.1272801160812378, "learning_rate": 0.0005127157652474109, "loss": 0.377, "step": 258490 }, { "epoch": 74.36708860759494, "grad_norm": 0.820709764957428, "learning_rate": 0.0005126582278481012, "loss": 0.3949, "step": 258500 }, { "epoch": 74.36996547756041, "grad_norm": 1.317556381225586, "learning_rate": 0.0005126006904487918, "loss": 0.3345, "step": 258510 }, { "epoch": 74.37284234752589, "grad_norm": 0.9368668794631958, "learning_rate": 0.0005125431530494822, "loss": 0.4295, "step": 258520 }, { "epoch": 74.37571921749137, "grad_norm": 2.01387095451355, "learning_rate": 0.0005124856156501726, "loss": 0.4869, "step": 258530 }, { "epoch": 74.37859608745684, "grad_norm": 0.6545670628547668, "learning_rate": 0.000512428078250863, "loss": 0.3737, "step": 258540 }, { "epoch": 74.38147295742232, "grad_norm": 0.7078239917755127, "learning_rate": 0.0005123705408515536, "loss": 0.4684, "step": 258550 }, { "epoch": 74.38434982738781, "grad_norm": 1.7236328125, "learning_rate": 0.0005123130034522439, "loss": 0.4396, "step": 258560 }, { "epoch": 74.38722669735328, "grad_norm": 1.4804611206054688, "learning_rate": 0.0005122554660529344, "loss": 0.4053, "step": 258570 }, { "epoch": 74.39010356731876, "grad_norm": 1.7539645433425903, "learning_rate": 0.000512197928653625, "loss": 0.3775, "step": 258580 }, { "epoch": 74.39298043728424, "grad_norm": 1.2084254026412964, "learning_rate": 0.0005121403912543153, "loss": 0.3604, "step": 258590 }, { "epoch": 74.39585730724971, "grad_norm": 1.9475328922271729, "learning_rate": 0.0005120828538550058, "loss": 0.3802, "step": 258600 }, { "epoch": 74.39873417721519, "grad_norm": 0.8837099075317383, "learning_rate": 0.0005120253164556962, "loss": 0.3459, "step": 258610 }, { "epoch": 74.40161104718067, "grad_norm": 1.1065499782562256, "learning_rate": 0.0005119677790563867, "loss": 0.3905, "step": 258620 }, { "epoch": 74.40448791714614, "grad_norm": 0.6466010808944702, "learning_rate": 0.0005119102416570771, "loss": 0.4097, "step": 258630 }, { "epoch": 74.40736478711162, "grad_norm": 1.6236103773117065, "learning_rate": 0.0005118527042577675, "loss": 0.3828, "step": 258640 }, { "epoch": 74.4102416570771, "grad_norm": 0.7523742318153381, "learning_rate": 0.000511795166858458, "loss": 0.3974, "step": 258650 }, { "epoch": 74.41311852704258, "grad_norm": 0.9541816711425781, "learning_rate": 0.0005117376294591485, "loss": 0.3443, "step": 258660 }, { "epoch": 74.41599539700806, "grad_norm": 1.5453139543533325, "learning_rate": 0.0005116800920598388, "loss": 0.3667, "step": 258670 }, { "epoch": 74.41887226697354, "grad_norm": 0.9546851515769958, "learning_rate": 0.0005116225546605293, "loss": 0.3117, "step": 258680 }, { "epoch": 74.42174913693901, "grad_norm": 1.7597882747650146, "learning_rate": 0.0005115650172612199, "loss": 0.5105, "step": 258690 }, { "epoch": 74.42462600690449, "grad_norm": 0.6796032190322876, "learning_rate": 0.0005115074798619102, "loss": 0.3796, "step": 258700 }, { "epoch": 74.42750287686997, "grad_norm": 0.9119305610656738, "learning_rate": 0.0005114499424626007, "loss": 0.414, "step": 258710 }, { "epoch": 74.43037974683544, "grad_norm": 1.651604175567627, "learning_rate": 0.0005113924050632911, "loss": 0.4115, "step": 258720 }, { "epoch": 74.43325661680092, "grad_norm": 0.8279147744178772, "learning_rate": 0.0005113348676639816, "loss": 0.3392, "step": 258730 }, { "epoch": 74.4361334867664, "grad_norm": 1.4328789710998535, "learning_rate": 0.000511277330264672, "loss": 0.4188, "step": 258740 }, { "epoch": 74.43901035673187, "grad_norm": 2.4441449642181396, "learning_rate": 0.0005112197928653625, "loss": 0.3345, "step": 258750 }, { "epoch": 74.44188722669735, "grad_norm": 0.6858452558517456, "learning_rate": 0.0005111622554660529, "loss": 0.3338, "step": 258760 }, { "epoch": 74.44476409666284, "grad_norm": 1.8469301462173462, "learning_rate": 0.0005111047180667434, "loss": 0.442, "step": 258770 }, { "epoch": 74.44764096662831, "grad_norm": 0.8739131093025208, "learning_rate": 0.0005110471806674339, "loss": 0.4229, "step": 258780 }, { "epoch": 74.45051783659379, "grad_norm": 0.7780680060386658, "learning_rate": 0.0005109896432681242, "loss": 0.3684, "step": 258790 }, { "epoch": 74.45339470655927, "grad_norm": 2.0195565223693848, "learning_rate": 0.0005109321058688148, "loss": 0.4283, "step": 258800 }, { "epoch": 74.45627157652474, "grad_norm": 1.872153639793396, "learning_rate": 0.0005108745684695052, "loss": 0.4282, "step": 258810 }, { "epoch": 74.45914844649022, "grad_norm": 1.193040132522583, "learning_rate": 0.0005108170310701956, "loss": 0.3986, "step": 258820 }, { "epoch": 74.4620253164557, "grad_norm": 0.9308724999427795, "learning_rate": 0.000510759493670886, "loss": 0.4335, "step": 258830 }, { "epoch": 74.46490218642117, "grad_norm": 1.6857175827026367, "learning_rate": 0.0005107019562715766, "loss": 0.3676, "step": 258840 }, { "epoch": 74.46777905638665, "grad_norm": 1.646548867225647, "learning_rate": 0.0005106444188722669, "loss": 0.3438, "step": 258850 }, { "epoch": 74.47065592635212, "grad_norm": 0.8025301694869995, "learning_rate": 0.0005105868814729574, "loss": 0.3372, "step": 258860 }, { "epoch": 74.47353279631761, "grad_norm": 1.3853901624679565, "learning_rate": 0.000510529344073648, "loss": 0.4577, "step": 258870 }, { "epoch": 74.47640966628309, "grad_norm": 1.6253654956817627, "learning_rate": 0.0005104718066743383, "loss": 0.471, "step": 258880 }, { "epoch": 74.47928653624857, "grad_norm": 1.1489415168762207, "learning_rate": 0.0005104142692750288, "loss": 0.3989, "step": 258890 }, { "epoch": 74.48216340621404, "grad_norm": 1.6911545991897583, "learning_rate": 0.0005103567318757192, "loss": 0.4411, "step": 258900 }, { "epoch": 74.48504027617952, "grad_norm": 0.9877827763557434, "learning_rate": 0.0005102991944764097, "loss": 0.2871, "step": 258910 }, { "epoch": 74.487917146145, "grad_norm": 1.4161200523376465, "learning_rate": 0.0005102416570771001, "loss": 0.3493, "step": 258920 }, { "epoch": 74.49079401611047, "grad_norm": 1.3053239583969116, "learning_rate": 0.0005101841196777906, "loss": 0.3729, "step": 258930 }, { "epoch": 74.49367088607595, "grad_norm": 1.7492096424102783, "learning_rate": 0.000510126582278481, "loss": 0.4858, "step": 258940 }, { "epoch": 74.49654775604142, "grad_norm": 1.524484634399414, "learning_rate": 0.0005100690448791715, "loss": 0.3844, "step": 258950 }, { "epoch": 74.4994246260069, "grad_norm": 1.0832608938217163, "learning_rate": 0.0005100115074798619, "loss": 0.4166, "step": 258960 }, { "epoch": 74.50230149597238, "grad_norm": 1.829744815826416, "learning_rate": 0.0005099539700805523, "loss": 0.461, "step": 258970 }, { "epoch": 74.50517836593787, "grad_norm": 1.6515055894851685, "learning_rate": 0.0005098964326812429, "loss": 0.4731, "step": 258980 }, { "epoch": 74.50805523590334, "grad_norm": 2.064974546432495, "learning_rate": 0.0005098388952819333, "loss": 0.4744, "step": 258990 }, { "epoch": 74.51093210586882, "grad_norm": 1.4428962469100952, "learning_rate": 0.0005097813578826237, "loss": 0.3559, "step": 259000 }, { "epoch": 74.5138089758343, "grad_norm": 1.2738124132156372, "learning_rate": 0.0005097238204833141, "loss": 0.3564, "step": 259010 }, { "epoch": 74.51668584579977, "grad_norm": 1.187936544418335, "learning_rate": 0.0005096662830840047, "loss": 0.5359, "step": 259020 }, { "epoch": 74.51956271576525, "grad_norm": 0.8234732151031494, "learning_rate": 0.000509608745684695, "loss": 0.3622, "step": 259030 }, { "epoch": 74.52243958573072, "grad_norm": 1.6421709060668945, "learning_rate": 0.0005095512082853855, "loss": 0.3739, "step": 259040 }, { "epoch": 74.5253164556962, "grad_norm": 1.2324280738830566, "learning_rate": 0.000509493670886076, "loss": 0.3516, "step": 259050 }, { "epoch": 74.52819332566168, "grad_norm": 1.2896696329116821, "learning_rate": 0.0005094361334867664, "loss": 0.5814, "step": 259060 }, { "epoch": 74.53107019562715, "grad_norm": 1.0084891319274902, "learning_rate": 0.0005093785960874568, "loss": 0.3542, "step": 259070 }, { "epoch": 74.53394706559264, "grad_norm": 1.3599287271499634, "learning_rate": 0.0005093210586881473, "loss": 0.4034, "step": 259080 }, { "epoch": 74.53682393555812, "grad_norm": 0.9997859001159668, "learning_rate": 0.0005092635212888378, "loss": 0.3261, "step": 259090 }, { "epoch": 74.5397008055236, "grad_norm": 0.9569012522697449, "learning_rate": 0.0005092059838895282, "loss": 0.2989, "step": 259100 }, { "epoch": 74.54257767548907, "grad_norm": 0.8629031777381897, "learning_rate": 0.0005091484464902187, "loss": 0.3511, "step": 259110 }, { "epoch": 74.54545454545455, "grad_norm": 1.5638116598129272, "learning_rate": 0.000509090909090909, "loss": 0.4101, "step": 259120 }, { "epoch": 74.54833141542002, "grad_norm": 0.7689051032066345, "learning_rate": 0.0005090333716915996, "loss": 0.3646, "step": 259130 }, { "epoch": 74.5512082853855, "grad_norm": 2.987100839614868, "learning_rate": 0.00050897583429229, "loss": 0.4521, "step": 259140 }, { "epoch": 74.55408515535098, "grad_norm": 0.9102519750595093, "learning_rate": 0.0005089182968929804, "loss": 0.3346, "step": 259150 }, { "epoch": 74.55696202531645, "grad_norm": 2.873164653778076, "learning_rate": 0.000508860759493671, "loss": 0.4373, "step": 259160 }, { "epoch": 74.55983889528193, "grad_norm": 0.8027729988098145, "learning_rate": 0.0005088032220943614, "loss": 0.421, "step": 259170 }, { "epoch": 74.5627157652474, "grad_norm": 1.1261128187179565, "learning_rate": 0.0005087456846950517, "loss": 0.3395, "step": 259180 }, { "epoch": 74.5655926352129, "grad_norm": 0.8311594128608704, "learning_rate": 0.0005086881472957422, "loss": 0.3766, "step": 259190 }, { "epoch": 74.56846950517837, "grad_norm": 1.2492786645889282, "learning_rate": 0.0005086306098964328, "loss": 0.4078, "step": 259200 }, { "epoch": 74.57134637514385, "grad_norm": 1.5403860807418823, "learning_rate": 0.0005085730724971231, "loss": 0.4695, "step": 259210 }, { "epoch": 74.57422324510932, "grad_norm": 1.3187123537063599, "learning_rate": 0.0005085155350978136, "loss": 0.4511, "step": 259220 }, { "epoch": 74.5771001150748, "grad_norm": 1.5960365533828735, "learning_rate": 0.0005084579976985041, "loss": 0.3729, "step": 259230 }, { "epoch": 74.57997698504028, "grad_norm": 0.8474758863449097, "learning_rate": 0.0005084004602991945, "loss": 0.3674, "step": 259240 }, { "epoch": 74.58285385500575, "grad_norm": 1.2521133422851562, "learning_rate": 0.0005083429228998849, "loss": 0.3559, "step": 259250 }, { "epoch": 74.58573072497123, "grad_norm": 1.1551392078399658, "learning_rate": 0.0005082853855005754, "loss": 0.3581, "step": 259260 }, { "epoch": 74.5886075949367, "grad_norm": 1.0986367464065552, "learning_rate": 0.0005082278481012658, "loss": 0.3127, "step": 259270 }, { "epoch": 74.59148446490218, "grad_norm": 0.8874123692512512, "learning_rate": 0.0005081703107019563, "loss": 0.4464, "step": 259280 }, { "epoch": 74.59436133486767, "grad_norm": 1.410219669342041, "learning_rate": 0.0005081127733026468, "loss": 0.4505, "step": 259290 }, { "epoch": 74.59723820483315, "grad_norm": 0.9654346108436584, "learning_rate": 0.0005080552359033371, "loss": 0.3208, "step": 259300 }, { "epoch": 74.60011507479862, "grad_norm": 2.4236836433410645, "learning_rate": 0.0005079976985040277, "loss": 0.4062, "step": 259310 }, { "epoch": 74.6029919447641, "grad_norm": 1.0637978315353394, "learning_rate": 0.0005079401611047181, "loss": 0.3702, "step": 259320 }, { "epoch": 74.60586881472958, "grad_norm": 1.9482940435409546, "learning_rate": 0.0005078826237054085, "loss": 0.4856, "step": 259330 }, { "epoch": 74.60874568469505, "grad_norm": 0.534032940864563, "learning_rate": 0.000507825086306099, "loss": 0.3542, "step": 259340 }, { "epoch": 74.61162255466053, "grad_norm": 1.2119519710540771, "learning_rate": 0.0005077675489067895, "loss": 0.4057, "step": 259350 }, { "epoch": 74.614499424626, "grad_norm": 1.5146653652191162, "learning_rate": 0.0005077100115074798, "loss": 0.4011, "step": 259360 }, { "epoch": 74.61737629459148, "grad_norm": 1.417000412940979, "learning_rate": 0.0005076524741081703, "loss": 0.3225, "step": 259370 }, { "epoch": 74.62025316455696, "grad_norm": 1.211972951889038, "learning_rate": 0.0005075949367088609, "loss": 0.3502, "step": 259380 }, { "epoch": 74.62313003452243, "grad_norm": 1.005120873451233, "learning_rate": 0.0005075373993095512, "loss": 0.4041, "step": 259390 }, { "epoch": 74.62600690448792, "grad_norm": 1.4029529094696045, "learning_rate": 0.0005074798619102417, "loss": 0.3928, "step": 259400 }, { "epoch": 74.6288837744534, "grad_norm": 1.5208311080932617, "learning_rate": 0.0005074223245109321, "loss": 0.3711, "step": 259410 }, { "epoch": 74.63176064441888, "grad_norm": 1.3405121564865112, "learning_rate": 0.0005073647871116226, "loss": 0.4236, "step": 259420 }, { "epoch": 74.63463751438435, "grad_norm": 1.2154884338378906, "learning_rate": 0.000507307249712313, "loss": 0.4031, "step": 259430 }, { "epoch": 74.63751438434983, "grad_norm": 1.8825931549072266, "learning_rate": 0.0005072497123130035, "loss": 0.3413, "step": 259440 }, { "epoch": 74.6403912543153, "grad_norm": 1.1811120510101318, "learning_rate": 0.0005071921749136939, "loss": 0.3517, "step": 259450 }, { "epoch": 74.64326812428078, "grad_norm": 1.675335168838501, "learning_rate": 0.0005071346375143844, "loss": 0.4199, "step": 259460 }, { "epoch": 74.64614499424626, "grad_norm": 1.620405912399292, "learning_rate": 0.0005070771001150747, "loss": 0.4136, "step": 259470 }, { "epoch": 74.64902186421173, "grad_norm": 1.2129210233688354, "learning_rate": 0.0005070195627157652, "loss": 0.3224, "step": 259480 }, { "epoch": 74.65189873417721, "grad_norm": 3.1779792308807373, "learning_rate": 0.0005069620253164558, "loss": 0.3987, "step": 259490 }, { "epoch": 74.6547756041427, "grad_norm": 1.5408952236175537, "learning_rate": 0.0005069044879171461, "loss": 0.3486, "step": 259500 }, { "epoch": 74.65765247410818, "grad_norm": 2.287980794906616, "learning_rate": 0.0005068469505178366, "loss": 0.4409, "step": 259510 }, { "epoch": 74.66052934407365, "grad_norm": 2.843463659286499, "learning_rate": 0.000506789413118527, "loss": 0.4587, "step": 259520 }, { "epoch": 74.66340621403913, "grad_norm": 0.8947418928146362, "learning_rate": 0.0005067318757192175, "loss": 0.4029, "step": 259530 }, { "epoch": 74.6662830840046, "grad_norm": 1.571023941040039, "learning_rate": 0.0005066743383199079, "loss": 0.3635, "step": 259540 }, { "epoch": 74.66915995397008, "grad_norm": 1.0274702310562134, "learning_rate": 0.0005066168009205984, "loss": 0.3603, "step": 259550 }, { "epoch": 74.67203682393556, "grad_norm": 1.894349455833435, "learning_rate": 0.0005065592635212888, "loss": 0.347, "step": 259560 }, { "epoch": 74.67491369390103, "grad_norm": 1.1227023601531982, "learning_rate": 0.0005065017261219793, "loss": 0.4531, "step": 259570 }, { "epoch": 74.67779056386651, "grad_norm": 0.8897979855537415, "learning_rate": 0.0005064441887226698, "loss": 0.4834, "step": 259580 }, { "epoch": 74.68066743383199, "grad_norm": 1.0712472200393677, "learning_rate": 0.0005063866513233601, "loss": 0.3275, "step": 259590 }, { "epoch": 74.68354430379746, "grad_norm": 2.811854839324951, "learning_rate": 0.0005063291139240507, "loss": 0.4503, "step": 259600 }, { "epoch": 74.68642117376295, "grad_norm": 1.0249123573303223, "learning_rate": 0.0005062715765247411, "loss": 0.3629, "step": 259610 }, { "epoch": 74.68929804372843, "grad_norm": 1.0737950801849365, "learning_rate": 0.0005062140391254315, "loss": 0.3865, "step": 259620 }, { "epoch": 74.6921749136939, "grad_norm": 1.5562223196029663, "learning_rate": 0.000506156501726122, "loss": 0.3937, "step": 259630 }, { "epoch": 74.69505178365938, "grad_norm": 1.371827244758606, "learning_rate": 0.0005060989643268125, "loss": 0.3751, "step": 259640 }, { "epoch": 74.69792865362486, "grad_norm": 1.201166033744812, "learning_rate": 0.0005060414269275028, "loss": 0.4516, "step": 259650 }, { "epoch": 74.70080552359033, "grad_norm": 1.0468289852142334, "learning_rate": 0.0005059838895281933, "loss": 0.3655, "step": 259660 }, { "epoch": 74.70368239355581, "grad_norm": 0.9165496230125427, "learning_rate": 0.0005059263521288839, "loss": 0.3577, "step": 259670 }, { "epoch": 74.70655926352129, "grad_norm": 1.1102635860443115, "learning_rate": 0.0005058688147295742, "loss": 0.4125, "step": 259680 }, { "epoch": 74.70943613348676, "grad_norm": 1.142430067062378, "learning_rate": 0.0005058112773302647, "loss": 0.3763, "step": 259690 }, { "epoch": 74.71231300345224, "grad_norm": 1.863105058670044, "learning_rate": 0.0005057537399309551, "loss": 0.4165, "step": 259700 }, { "epoch": 74.71518987341773, "grad_norm": 1.6334482431411743, "learning_rate": 0.0005056962025316456, "loss": 0.416, "step": 259710 }, { "epoch": 74.7180667433832, "grad_norm": 1.349222183227539, "learning_rate": 0.000505638665132336, "loss": 0.3616, "step": 259720 }, { "epoch": 74.72094361334868, "grad_norm": 0.8462854027748108, "learning_rate": 0.0005055811277330265, "loss": 0.3879, "step": 259730 }, { "epoch": 74.72382048331416, "grad_norm": 1.215963363647461, "learning_rate": 0.0005055235903337169, "loss": 0.3251, "step": 259740 }, { "epoch": 74.72669735327963, "grad_norm": 0.8114545941352844, "learning_rate": 0.0005054660529344074, "loss": 0.4025, "step": 259750 }, { "epoch": 74.72957422324511, "grad_norm": 0.6820865273475647, "learning_rate": 0.0005054085155350978, "loss": 0.3745, "step": 259760 }, { "epoch": 74.73245109321059, "grad_norm": 1.472367286682129, "learning_rate": 0.0005053509781357882, "loss": 0.3327, "step": 259770 }, { "epoch": 74.73532796317606, "grad_norm": 1.4893648624420166, "learning_rate": 0.0005052934407364788, "loss": 0.3819, "step": 259780 }, { "epoch": 74.73820483314154, "grad_norm": 1.2746353149414062, "learning_rate": 0.0005052359033371692, "loss": 0.4296, "step": 259790 }, { "epoch": 74.74108170310701, "grad_norm": 1.521363377571106, "learning_rate": 0.0005051783659378596, "loss": 0.3385, "step": 259800 }, { "epoch": 74.74395857307249, "grad_norm": 1.4585503339767456, "learning_rate": 0.00050512082853855, "loss": 0.3434, "step": 259810 }, { "epoch": 74.74683544303798, "grad_norm": 0.8506073355674744, "learning_rate": 0.0005050632911392406, "loss": 0.4028, "step": 259820 }, { "epoch": 74.74971231300346, "grad_norm": 1.7446845769882202, "learning_rate": 0.0005050057537399309, "loss": 0.4491, "step": 259830 }, { "epoch": 74.75258918296893, "grad_norm": 1.446229100227356, "learning_rate": 0.0005049482163406214, "loss": 0.4054, "step": 259840 }, { "epoch": 74.75546605293441, "grad_norm": 1.3001679182052612, "learning_rate": 0.0005048906789413119, "loss": 0.6093, "step": 259850 }, { "epoch": 74.75834292289989, "grad_norm": 1.3651481866836548, "learning_rate": 0.0005048331415420023, "loss": 0.3865, "step": 259860 }, { "epoch": 74.76121979286536, "grad_norm": 1.0423498153686523, "learning_rate": 0.0005047756041426927, "loss": 0.5107, "step": 259870 }, { "epoch": 74.76409666283084, "grad_norm": 1.191845178604126, "learning_rate": 0.0005047180667433832, "loss": 0.3475, "step": 259880 }, { "epoch": 74.76697353279631, "grad_norm": 1.2538076639175415, "learning_rate": 0.0005046605293440737, "loss": 0.3419, "step": 259890 }, { "epoch": 74.76985040276179, "grad_norm": 1.043702483177185, "learning_rate": 0.0005046029919447641, "loss": 0.3589, "step": 259900 }, { "epoch": 74.77272727272727, "grad_norm": 1.2438429594039917, "learning_rate": 0.0005045454545454546, "loss": 0.4801, "step": 259910 }, { "epoch": 74.77560414269276, "grad_norm": 0.7817185521125793, "learning_rate": 0.000504487917146145, "loss": 0.3539, "step": 259920 }, { "epoch": 74.77848101265823, "grad_norm": 1.1826809644699097, "learning_rate": 0.0005044303797468355, "loss": 0.4166, "step": 259930 }, { "epoch": 74.78135788262371, "grad_norm": 1.7003166675567627, "learning_rate": 0.0005043728423475259, "loss": 0.5061, "step": 259940 }, { "epoch": 74.78423475258919, "grad_norm": 0.8737557530403137, "learning_rate": 0.0005043153049482163, "loss": 0.3154, "step": 259950 }, { "epoch": 74.78711162255466, "grad_norm": 1.4920090436935425, "learning_rate": 0.0005042577675489068, "loss": 0.4295, "step": 259960 }, { "epoch": 74.78998849252014, "grad_norm": 1.9705772399902344, "learning_rate": 0.0005042002301495973, "loss": 0.4031, "step": 259970 }, { "epoch": 74.79286536248561, "grad_norm": 1.4256430864334106, "learning_rate": 0.0005041426927502876, "loss": 0.4176, "step": 259980 }, { "epoch": 74.79574223245109, "grad_norm": 1.207076907157898, "learning_rate": 0.0005040851553509781, "loss": 0.3864, "step": 259990 }, { "epoch": 74.79861910241657, "grad_norm": 0.7141652703285217, "learning_rate": 0.0005040276179516687, "loss": 0.479, "step": 260000 }, { "epoch": 74.80149597238204, "grad_norm": 2.2750394344329834, "learning_rate": 0.000503970080552359, "loss": 0.4728, "step": 260010 }, { "epoch": 74.80437284234753, "grad_norm": 1.0914918184280396, "learning_rate": 0.0005039125431530495, "loss": 0.4348, "step": 260020 }, { "epoch": 74.80724971231301, "grad_norm": 0.9254953265190125, "learning_rate": 0.00050385500575374, "loss": 0.3191, "step": 260030 }, { "epoch": 74.81012658227849, "grad_norm": 1.5743794441223145, "learning_rate": 0.0005037974683544304, "loss": 0.3855, "step": 260040 }, { "epoch": 74.81300345224396, "grad_norm": 1.2248984575271606, "learning_rate": 0.0005037399309551208, "loss": 0.341, "step": 260050 }, { "epoch": 74.81588032220944, "grad_norm": 0.9206624031066895, "learning_rate": 0.0005036823935558113, "loss": 0.3815, "step": 260060 }, { "epoch": 74.81875719217491, "grad_norm": 1.6434564590454102, "learning_rate": 0.0005036248561565017, "loss": 0.3204, "step": 260070 }, { "epoch": 74.82163406214039, "grad_norm": 1.085197925567627, "learning_rate": 0.0005035673187571922, "loss": 0.342, "step": 260080 }, { "epoch": 74.82451093210587, "grad_norm": 0.8869932293891907, "learning_rate": 0.0005035097813578827, "loss": 0.4024, "step": 260090 }, { "epoch": 74.82738780207134, "grad_norm": 0.9759949445724487, "learning_rate": 0.000503452243958573, "loss": 0.4726, "step": 260100 }, { "epoch": 74.83026467203682, "grad_norm": 1.5182738304138184, "learning_rate": 0.0005033947065592636, "loss": 0.446, "step": 260110 }, { "epoch": 74.8331415420023, "grad_norm": 0.9561417698860168, "learning_rate": 0.000503337169159954, "loss": 0.3381, "step": 260120 }, { "epoch": 74.83601841196779, "grad_norm": 1.515348196029663, "learning_rate": 0.0005032796317606444, "loss": 0.3302, "step": 260130 }, { "epoch": 74.83889528193326, "grad_norm": 2.0956814289093018, "learning_rate": 0.0005032220943613349, "loss": 0.4303, "step": 260140 }, { "epoch": 74.84177215189874, "grad_norm": 1.0553455352783203, "learning_rate": 0.0005031645569620254, "loss": 0.3671, "step": 260150 }, { "epoch": 74.84464902186421, "grad_norm": 0.9743509888648987, "learning_rate": 0.0005031070195627157, "loss": 0.3953, "step": 260160 }, { "epoch": 74.84752589182969, "grad_norm": 1.2676831483840942, "learning_rate": 0.0005030494821634062, "loss": 0.4317, "step": 260170 }, { "epoch": 74.85040276179517, "grad_norm": 1.3619422912597656, "learning_rate": 0.0005029919447640968, "loss": 0.462, "step": 260180 }, { "epoch": 74.85327963176064, "grad_norm": 1.9881700277328491, "learning_rate": 0.0005029344073647871, "loss": 0.4328, "step": 260190 }, { "epoch": 74.85615650172612, "grad_norm": 0.6198828816413879, "learning_rate": 0.0005028768699654776, "loss": 0.3353, "step": 260200 }, { "epoch": 74.8590333716916, "grad_norm": 1.4506282806396484, "learning_rate": 0.0005028193325661681, "loss": 0.3708, "step": 260210 }, { "epoch": 74.86191024165707, "grad_norm": 1.7040112018585205, "learning_rate": 0.0005027617951668585, "loss": 0.3565, "step": 260220 }, { "epoch": 74.86478711162256, "grad_norm": 1.9957010746002197, "learning_rate": 0.0005027042577675489, "loss": 0.4178, "step": 260230 }, { "epoch": 74.86766398158804, "grad_norm": 1.363264799118042, "learning_rate": 0.0005026467203682394, "loss": 0.3635, "step": 260240 }, { "epoch": 74.87054085155351, "grad_norm": 1.3493335247039795, "learning_rate": 0.0005025891829689298, "loss": 0.3621, "step": 260250 }, { "epoch": 74.87341772151899, "grad_norm": 1.8649705648422241, "learning_rate": 0.0005025316455696203, "loss": 0.3184, "step": 260260 }, { "epoch": 74.87629459148447, "grad_norm": 1.387291669845581, "learning_rate": 0.0005024741081703107, "loss": 0.3989, "step": 260270 }, { "epoch": 74.87917146144994, "grad_norm": 1.0113650560379028, "learning_rate": 0.0005024165707710011, "loss": 0.3358, "step": 260280 }, { "epoch": 74.88204833141542, "grad_norm": 2.0613489151000977, "learning_rate": 0.0005023590333716917, "loss": 0.5771, "step": 260290 }, { "epoch": 74.8849252013809, "grad_norm": 0.8623479604721069, "learning_rate": 0.000502301495972382, "loss": 0.4225, "step": 260300 }, { "epoch": 74.88780207134637, "grad_norm": 0.86472487449646, "learning_rate": 0.0005022439585730725, "loss": 0.4449, "step": 260310 }, { "epoch": 74.89067894131185, "grad_norm": 1.631080985069275, "learning_rate": 0.000502186421173763, "loss": 0.3629, "step": 260320 }, { "epoch": 74.89355581127732, "grad_norm": 1.238930344581604, "learning_rate": 0.0005021288837744534, "loss": 0.3409, "step": 260330 }, { "epoch": 74.89643268124281, "grad_norm": 1.248114824295044, "learning_rate": 0.0005020713463751438, "loss": 0.3756, "step": 260340 }, { "epoch": 74.89930955120829, "grad_norm": 1.8576629161834717, "learning_rate": 0.0005020138089758343, "loss": 0.4227, "step": 260350 }, { "epoch": 74.90218642117377, "grad_norm": 0.7528496384620667, "learning_rate": 0.0005019562715765247, "loss": 0.3894, "step": 260360 }, { "epoch": 74.90506329113924, "grad_norm": 1.1153302192687988, "learning_rate": 0.0005018987341772152, "loss": 0.476, "step": 260370 }, { "epoch": 74.90794016110472, "grad_norm": 1.309607982635498, "learning_rate": 0.0005018411967779057, "loss": 0.3377, "step": 260380 }, { "epoch": 74.9108170310702, "grad_norm": 0.7373248934745789, "learning_rate": 0.000501783659378596, "loss": 0.3755, "step": 260390 }, { "epoch": 74.91369390103567, "grad_norm": 1.5301076173782349, "learning_rate": 0.0005017261219792866, "loss": 0.3375, "step": 260400 }, { "epoch": 74.91657077100115, "grad_norm": 2.1191139221191406, "learning_rate": 0.000501668584579977, "loss": 0.4535, "step": 260410 }, { "epoch": 74.91944764096662, "grad_norm": 1.0498825311660767, "learning_rate": 0.0005016110471806674, "loss": 0.3984, "step": 260420 }, { "epoch": 74.9223245109321, "grad_norm": 1.6326136589050293, "learning_rate": 0.0005015535097813579, "loss": 0.3677, "step": 260430 }, { "epoch": 74.92520138089759, "grad_norm": 1.4682538509368896, "learning_rate": 0.0005014959723820484, "loss": 0.3638, "step": 260440 }, { "epoch": 74.92807825086307, "grad_norm": 1.7885183095932007, "learning_rate": 0.0005014384349827387, "loss": 0.3494, "step": 260450 }, { "epoch": 74.93095512082854, "grad_norm": 0.9004160165786743, "learning_rate": 0.0005013808975834292, "loss": 0.3353, "step": 260460 }, { "epoch": 74.93383199079402, "grad_norm": 0.9943715929985046, "learning_rate": 0.0005013233601841198, "loss": 0.3906, "step": 260470 }, { "epoch": 74.9367088607595, "grad_norm": 1.8111013174057007, "learning_rate": 0.0005012658227848101, "loss": 0.4074, "step": 260480 }, { "epoch": 74.93958573072497, "grad_norm": 0.7662106156349182, "learning_rate": 0.0005012082853855006, "loss": 0.3843, "step": 260490 }, { "epoch": 74.94246260069045, "grad_norm": 1.2256183624267578, "learning_rate": 0.000501150747986191, "loss": 0.35, "step": 260500 }, { "epoch": 74.94533947065592, "grad_norm": 0.8713206648826599, "learning_rate": 0.0005010932105868815, "loss": 0.3701, "step": 260510 }, { "epoch": 74.9482163406214, "grad_norm": 1.4407882690429688, "learning_rate": 0.0005010356731875719, "loss": 0.4318, "step": 260520 }, { "epoch": 74.95109321058688, "grad_norm": 0.8978109955787659, "learning_rate": 0.0005009781357882624, "loss": 0.3774, "step": 260530 }, { "epoch": 74.95397008055235, "grad_norm": 1.8289319276809692, "learning_rate": 0.0005009205983889528, "loss": 0.4524, "step": 260540 }, { "epoch": 74.95684695051784, "grad_norm": 1.108579397201538, "learning_rate": 0.0005008630609896433, "loss": 0.3884, "step": 260550 }, { "epoch": 74.95972382048332, "grad_norm": 1.0686956644058228, "learning_rate": 0.0005008055235903337, "loss": 0.3541, "step": 260560 }, { "epoch": 74.9626006904488, "grad_norm": 1.4507819414138794, "learning_rate": 0.0005007479861910241, "loss": 0.446, "step": 260570 }, { "epoch": 74.96547756041427, "grad_norm": 0.8273934721946716, "learning_rate": 0.0005006904487917147, "loss": 0.3696, "step": 260580 }, { "epoch": 74.96835443037975, "grad_norm": 1.1630916595458984, "learning_rate": 0.0005006329113924051, "loss": 0.4374, "step": 260590 }, { "epoch": 74.97123130034522, "grad_norm": 1.7059653997421265, "learning_rate": 0.0005005753739930955, "loss": 0.4537, "step": 260600 }, { "epoch": 74.9741081703107, "grad_norm": 1.3934955596923828, "learning_rate": 0.000500517836593786, "loss": 0.3999, "step": 260610 }, { "epoch": 74.97698504027618, "grad_norm": 1.6076817512512207, "learning_rate": 0.0005004602991944765, "loss": 0.3758, "step": 260620 }, { "epoch": 74.97986191024165, "grad_norm": 0.8485301733016968, "learning_rate": 0.0005004027617951668, "loss": 0.3905, "step": 260630 }, { "epoch": 74.98273878020713, "grad_norm": 1.1249195337295532, "learning_rate": 0.0005003452243958573, "loss": 0.3951, "step": 260640 }, { "epoch": 74.98561565017262, "grad_norm": 1.0930473804473877, "learning_rate": 0.0005002876869965478, "loss": 0.3487, "step": 260650 }, { "epoch": 74.9884925201381, "grad_norm": 0.9122366309165955, "learning_rate": 0.0005002301495972382, "loss": 0.4588, "step": 260660 }, { "epoch": 74.99136939010357, "grad_norm": 0.886994481086731, "learning_rate": 0.0005001726121979286, "loss": 0.417, "step": 260670 }, { "epoch": 74.99424626006905, "grad_norm": 2.051039457321167, "learning_rate": 0.0005001150747986191, "loss": 0.3541, "step": 260680 }, { "epoch": 74.99712313003452, "grad_norm": 0.6611050963401794, "learning_rate": 0.0005000575373993096, "loss": 0.4847, "step": 260690 }, { "epoch": 75.0, "grad_norm": 0.939800500869751, "learning_rate": 0.0005, "loss": 0.3296, "step": 260700 }, { "epoch": 75.00287686996548, "grad_norm": 1.8178701400756836, "learning_rate": 0.0004999424626006905, "loss": 0.3808, "step": 260710 }, { "epoch": 75.00575373993095, "grad_norm": 0.5345084071159363, "learning_rate": 0.0004998849252013809, "loss": 0.3316, "step": 260720 }, { "epoch": 75.00863060989643, "grad_norm": 1.8189442157745361, "learning_rate": 0.0004998273878020714, "loss": 0.4726, "step": 260730 }, { "epoch": 75.0115074798619, "grad_norm": 1.774070382118225, "learning_rate": 0.0004997698504027618, "loss": 0.4129, "step": 260740 }, { "epoch": 75.01438434982738, "grad_norm": 1.2914317846298218, "learning_rate": 0.0004997123130034523, "loss": 0.3703, "step": 260750 }, { "epoch": 75.01726121979287, "grad_norm": 0.9697668552398682, "learning_rate": 0.0004996547756041426, "loss": 0.4532, "step": 260760 }, { "epoch": 75.02013808975835, "grad_norm": 2.1608963012695312, "learning_rate": 0.0004995972382048332, "loss": 0.3745, "step": 260770 }, { "epoch": 75.02301495972382, "grad_norm": 0.7711136937141418, "learning_rate": 0.0004995397008055235, "loss": 0.3911, "step": 260780 }, { "epoch": 75.0258918296893, "grad_norm": 0.777714192867279, "learning_rate": 0.000499482163406214, "loss": 0.2912, "step": 260790 }, { "epoch": 75.02876869965478, "grad_norm": 0.9246845245361328, "learning_rate": 0.0004994246260069046, "loss": 0.3693, "step": 260800 }, { "epoch": 75.03164556962025, "grad_norm": 1.467966914176941, "learning_rate": 0.0004993670886075949, "loss": 0.39, "step": 260810 }, { "epoch": 75.03452243958573, "grad_norm": 0.9862861037254333, "learning_rate": 0.0004993095512082854, "loss": 0.3389, "step": 260820 }, { "epoch": 75.0373993095512, "grad_norm": 0.9736180901527405, "learning_rate": 0.0004992520138089758, "loss": 0.326, "step": 260830 }, { "epoch": 75.04027617951668, "grad_norm": 1.4022520780563354, "learning_rate": 0.0004991944764096663, "loss": 0.3846, "step": 260840 }, { "epoch": 75.04315304948216, "grad_norm": 0.9697163105010986, "learning_rate": 0.0004991369390103567, "loss": 0.3395, "step": 260850 }, { "epoch": 75.04602991944765, "grad_norm": 1.2276860475540161, "learning_rate": 0.0004990794016110472, "loss": 0.3684, "step": 260860 }, { "epoch": 75.04890678941312, "grad_norm": 1.9158753156661987, "learning_rate": 0.0004990218642117376, "loss": 0.3653, "step": 260870 }, { "epoch": 75.0517836593786, "grad_norm": 0.9221873879432678, "learning_rate": 0.0004989643268124281, "loss": 0.375, "step": 260880 }, { "epoch": 75.05466052934408, "grad_norm": 1.3706618547439575, "learning_rate": 0.0004989067894131186, "loss": 0.2976, "step": 260890 }, { "epoch": 75.05753739930955, "grad_norm": 1.3055222034454346, "learning_rate": 0.000498849252013809, "loss": 0.4074, "step": 260900 }, { "epoch": 75.06041426927503, "grad_norm": 1.2518432140350342, "learning_rate": 0.0004987917146144995, "loss": 0.3329, "step": 260910 }, { "epoch": 75.0632911392405, "grad_norm": 1.7202093601226807, "learning_rate": 0.0004987341772151899, "loss": 0.3801, "step": 260920 }, { "epoch": 75.06616800920598, "grad_norm": 1.8825825452804565, "learning_rate": 0.0004986766398158804, "loss": 0.4148, "step": 260930 }, { "epoch": 75.06904487917146, "grad_norm": 1.091835379600525, "learning_rate": 0.0004986191024165707, "loss": 0.3314, "step": 260940 }, { "epoch": 75.07192174913693, "grad_norm": 1.2217093706130981, "learning_rate": 0.0004985615650172613, "loss": 0.317, "step": 260950 }, { "epoch": 75.07479861910241, "grad_norm": 2.2672667503356934, "learning_rate": 0.0004985040276179516, "loss": 0.4061, "step": 260960 }, { "epoch": 75.0776754890679, "grad_norm": 1.180301308631897, "learning_rate": 0.0004984464902186421, "loss": 0.3902, "step": 260970 }, { "epoch": 75.08055235903338, "grad_norm": 1.2000693082809448, "learning_rate": 0.0004983889528193325, "loss": 0.3511, "step": 260980 }, { "epoch": 75.08342922899885, "grad_norm": 1.1907291412353516, "learning_rate": 0.000498331415420023, "loss": 0.3945, "step": 260990 }, { "epoch": 75.08630609896433, "grad_norm": 1.386765956878662, "learning_rate": 0.0004982738780207135, "loss": 0.3358, "step": 261000 }, { "epoch": 75.0891829689298, "grad_norm": 2.451826810836792, "learning_rate": 0.0004982163406214039, "loss": 0.486, "step": 261010 }, { "epoch": 75.09205983889528, "grad_norm": 2.451173782348633, "learning_rate": 0.0004981588032220944, "loss": 0.41, "step": 261020 }, { "epoch": 75.09493670886076, "grad_norm": 1.50688636302948, "learning_rate": 0.0004981012658227848, "loss": 0.3536, "step": 261030 }, { "epoch": 75.09781357882623, "grad_norm": 1.4705301523208618, "learning_rate": 0.0004980437284234753, "loss": 0.4457, "step": 261040 }, { "epoch": 75.10069044879171, "grad_norm": 1.221934199333191, "learning_rate": 0.0004979861910241657, "loss": 0.3467, "step": 261050 }, { "epoch": 75.10356731875719, "grad_norm": 2.1328840255737305, "learning_rate": 0.0004979286536248562, "loss": 0.4319, "step": 261060 }, { "epoch": 75.10644418872268, "grad_norm": 0.8301779627799988, "learning_rate": 0.0004978711162255466, "loss": 0.4384, "step": 261070 }, { "epoch": 75.10932105868815, "grad_norm": 1.7736718654632568, "learning_rate": 0.0004978135788262371, "loss": 0.3912, "step": 261080 }, { "epoch": 75.11219792865363, "grad_norm": 1.1379262208938599, "learning_rate": 0.0004977560414269274, "loss": 0.4541, "step": 261090 }, { "epoch": 75.1150747986191, "grad_norm": 1.6474509239196777, "learning_rate": 0.000497698504027618, "loss": 0.3113, "step": 261100 }, { "epoch": 75.11795166858458, "grad_norm": 1.1917266845703125, "learning_rate": 0.0004976409666283085, "loss": 0.3566, "step": 261110 }, { "epoch": 75.12082853855006, "grad_norm": 0.7037675380706787, "learning_rate": 0.0004975834292289988, "loss": 0.3248, "step": 261120 }, { "epoch": 75.12370540851553, "grad_norm": 0.8567432761192322, "learning_rate": 0.0004975258918296893, "loss": 0.3805, "step": 261130 }, { "epoch": 75.12658227848101, "grad_norm": 1.1293461322784424, "learning_rate": 0.0004974683544303797, "loss": 0.3969, "step": 261140 }, { "epoch": 75.12945914844649, "grad_norm": 2.043802499771118, "learning_rate": 0.0004974108170310702, "loss": 0.5368, "step": 261150 }, { "epoch": 75.13233601841196, "grad_norm": 1.063388705253601, "learning_rate": 0.0004973532796317606, "loss": 0.3405, "step": 261160 }, { "epoch": 75.13521288837744, "grad_norm": 1.0382449626922607, "learning_rate": 0.0004972957422324511, "loss": 0.4058, "step": 261170 }, { "epoch": 75.13808975834293, "grad_norm": 1.06337571144104, "learning_rate": 0.0004972382048331415, "loss": 0.4348, "step": 261180 }, { "epoch": 75.1409666283084, "grad_norm": 1.268859624862671, "learning_rate": 0.000497180667433832, "loss": 0.3382, "step": 261190 }, { "epoch": 75.14384349827388, "grad_norm": 0.7747922539710999, "learning_rate": 0.0004971231300345225, "loss": 0.4172, "step": 261200 }, { "epoch": 75.14672036823936, "grad_norm": 0.7201324701309204, "learning_rate": 0.0004970655926352129, "loss": 0.4475, "step": 261210 }, { "epoch": 75.14959723820483, "grad_norm": 1.4751524925231934, "learning_rate": 0.0004970080552359034, "loss": 0.4036, "step": 261220 }, { "epoch": 75.15247410817031, "grad_norm": 0.8942359685897827, "learning_rate": 0.0004969505178365938, "loss": 0.3391, "step": 261230 }, { "epoch": 75.15535097813579, "grad_norm": 1.0361804962158203, "learning_rate": 0.0004968929804372843, "loss": 0.3637, "step": 261240 }, { "epoch": 75.15822784810126, "grad_norm": 0.8713108897209167, "learning_rate": 0.0004968354430379746, "loss": 0.3883, "step": 261250 }, { "epoch": 75.16110471806674, "grad_norm": 0.6814387440681458, "learning_rate": 0.0004967779056386652, "loss": 0.3991, "step": 261260 }, { "epoch": 75.16398158803221, "grad_norm": 1.7709349393844604, "learning_rate": 0.0004967203682393555, "loss": 0.3925, "step": 261270 }, { "epoch": 75.1668584579977, "grad_norm": 1.2213984727859497, "learning_rate": 0.000496662830840046, "loss": 0.3868, "step": 261280 }, { "epoch": 75.16973532796318, "grad_norm": 0.6357776522636414, "learning_rate": 0.0004966052934407366, "loss": 0.3566, "step": 261290 }, { "epoch": 75.17261219792866, "grad_norm": 0.45354458689689636, "learning_rate": 0.0004965477560414269, "loss": 0.3059, "step": 261300 }, { "epoch": 75.17548906789413, "grad_norm": 0.989769458770752, "learning_rate": 0.0004964902186421174, "loss": 0.3334, "step": 261310 }, { "epoch": 75.17836593785961, "grad_norm": 1.3575934171676636, "learning_rate": 0.0004964326812428078, "loss": 0.357, "step": 261320 }, { "epoch": 75.18124280782509, "grad_norm": 1.0343294143676758, "learning_rate": 0.0004963751438434983, "loss": 0.3194, "step": 261330 }, { "epoch": 75.18411967779056, "grad_norm": 1.0212512016296387, "learning_rate": 0.0004963176064441887, "loss": 0.4182, "step": 261340 }, { "epoch": 75.18699654775604, "grad_norm": 1.6624943017959595, "learning_rate": 0.0004962600690448792, "loss": 0.4096, "step": 261350 }, { "epoch": 75.18987341772151, "grad_norm": 2.9287726879119873, "learning_rate": 0.0004962025316455696, "loss": 0.4552, "step": 261360 }, { "epoch": 75.19275028768699, "grad_norm": 2.0433053970336914, "learning_rate": 0.0004961449942462601, "loss": 0.3408, "step": 261370 }, { "epoch": 75.19562715765247, "grad_norm": 1.053758144378662, "learning_rate": 0.0004960874568469506, "loss": 0.448, "step": 261380 }, { "epoch": 75.19850402761796, "grad_norm": 1.0140482187271118, "learning_rate": 0.000496029919447641, "loss": 0.3342, "step": 261390 }, { "epoch": 75.20138089758343, "grad_norm": 0.8092617392539978, "learning_rate": 0.0004959723820483315, "loss": 0.329, "step": 261400 }, { "epoch": 75.20425776754891, "grad_norm": 1.7179287672042847, "learning_rate": 0.0004959148446490219, "loss": 0.4182, "step": 261410 }, { "epoch": 75.20713463751439, "grad_norm": 0.9009861350059509, "learning_rate": 0.0004958573072497124, "loss": 0.3871, "step": 261420 }, { "epoch": 75.21001150747986, "grad_norm": 0.6368340253829956, "learning_rate": 0.0004957997698504027, "loss": 0.3485, "step": 261430 }, { "epoch": 75.21288837744534, "grad_norm": 1.488411784172058, "learning_rate": 0.0004957422324510933, "loss": 0.4071, "step": 261440 }, { "epoch": 75.21576524741081, "grad_norm": 1.603906512260437, "learning_rate": 0.0004956846950517836, "loss": 0.4249, "step": 261450 }, { "epoch": 75.21864211737629, "grad_norm": 1.6994779109954834, "learning_rate": 0.0004956271576524741, "loss": 0.4959, "step": 261460 }, { "epoch": 75.22151898734177, "grad_norm": 1.237260341644287, "learning_rate": 0.0004955696202531645, "loss": 0.3743, "step": 261470 }, { "epoch": 75.22439585730724, "grad_norm": 1.180620551109314, "learning_rate": 0.000495512082853855, "loss": 0.3804, "step": 261480 }, { "epoch": 75.22727272727273, "grad_norm": 0.9054754376411438, "learning_rate": 0.0004954545454545455, "loss": 0.38, "step": 261490 }, { "epoch": 75.23014959723821, "grad_norm": 1.4923994541168213, "learning_rate": 0.0004953970080552359, "loss": 0.2936, "step": 261500 }, { "epoch": 75.23302646720369, "grad_norm": 1.396952748298645, "learning_rate": 0.0004953394706559264, "loss": 0.3694, "step": 261510 }, { "epoch": 75.23590333716916, "grad_norm": 0.9498634338378906, "learning_rate": 0.0004952819332566168, "loss": 0.366, "step": 261520 }, { "epoch": 75.23878020713464, "grad_norm": 1.5920722484588623, "learning_rate": 0.0004952243958573073, "loss": 0.3787, "step": 261530 }, { "epoch": 75.24165707710011, "grad_norm": 1.357292652130127, "learning_rate": 0.0004951668584579977, "loss": 0.3378, "step": 261540 }, { "epoch": 75.24453394706559, "grad_norm": 1.4460411071777344, "learning_rate": 0.0004951093210586882, "loss": 0.4028, "step": 261550 }, { "epoch": 75.24741081703107, "grad_norm": 1.448641061782837, "learning_rate": 0.0004950517836593785, "loss": 0.3764, "step": 261560 }, { "epoch": 75.25028768699654, "grad_norm": 0.9519826173782349, "learning_rate": 0.0004949942462600691, "loss": 0.3573, "step": 261570 }, { "epoch": 75.25316455696202, "grad_norm": 0.8307058811187744, "learning_rate": 0.0004949367088607594, "loss": 0.3215, "step": 261580 }, { "epoch": 75.25604142692751, "grad_norm": 1.2457810640335083, "learning_rate": 0.0004948791714614499, "loss": 0.3773, "step": 261590 }, { "epoch": 75.25891829689299, "grad_norm": 0.9804647564888, "learning_rate": 0.0004948216340621405, "loss": 0.4144, "step": 261600 }, { "epoch": 75.26179516685846, "grad_norm": 1.3667734861373901, "learning_rate": 0.0004947640966628308, "loss": 0.3582, "step": 261610 }, { "epoch": 75.26467203682394, "grad_norm": 1.5628492832183838, "learning_rate": 0.0004947065592635213, "loss": 0.333, "step": 261620 }, { "epoch": 75.26754890678941, "grad_norm": 1.0327666997909546, "learning_rate": 0.0004946490218642117, "loss": 0.4156, "step": 261630 }, { "epoch": 75.27042577675489, "grad_norm": 1.6516649723052979, "learning_rate": 0.0004945914844649022, "loss": 0.4635, "step": 261640 }, { "epoch": 75.27330264672037, "grad_norm": 0.9894059896469116, "learning_rate": 0.0004945339470655926, "loss": 0.3924, "step": 261650 }, { "epoch": 75.27617951668584, "grad_norm": 0.9979164004325867, "learning_rate": 0.0004944764096662831, "loss": 0.3382, "step": 261660 }, { "epoch": 75.27905638665132, "grad_norm": 0.8025639653205872, "learning_rate": 0.0004944188722669735, "loss": 0.3701, "step": 261670 }, { "epoch": 75.2819332566168, "grad_norm": 1.8583364486694336, "learning_rate": 0.000494361334867664, "loss": 0.4085, "step": 261680 }, { "epoch": 75.28481012658227, "grad_norm": 1.2196550369262695, "learning_rate": 0.0004943037974683545, "loss": 0.3604, "step": 261690 }, { "epoch": 75.28768699654776, "grad_norm": 0.6226435303688049, "learning_rate": 0.0004942462600690449, "loss": 0.5094, "step": 261700 }, { "epoch": 75.29056386651324, "grad_norm": 1.0290368795394897, "learning_rate": 0.0004941887226697354, "loss": 0.3811, "step": 261710 }, { "epoch": 75.29344073647871, "grad_norm": 1.1889705657958984, "learning_rate": 0.0004941311852704258, "loss": 0.3706, "step": 261720 }, { "epoch": 75.29631760644419, "grad_norm": 1.1611449718475342, "learning_rate": 0.0004940736478711163, "loss": 0.322, "step": 261730 }, { "epoch": 75.29919447640967, "grad_norm": 1.447227120399475, "learning_rate": 0.0004940161104718066, "loss": 0.3787, "step": 261740 }, { "epoch": 75.30207134637514, "grad_norm": 0.9206447601318359, "learning_rate": 0.0004939585730724972, "loss": 0.3607, "step": 261750 }, { "epoch": 75.30494821634062, "grad_norm": 1.3971341848373413, "learning_rate": 0.0004939010356731875, "loss": 0.3403, "step": 261760 }, { "epoch": 75.3078250863061, "grad_norm": 1.5336021184921265, "learning_rate": 0.000493843498273878, "loss": 0.3607, "step": 261770 }, { "epoch": 75.31070195627157, "grad_norm": 1.2441102266311646, "learning_rate": 0.0004937859608745686, "loss": 0.3554, "step": 261780 }, { "epoch": 75.31357882623705, "grad_norm": 1.3713302612304688, "learning_rate": 0.0004937284234752589, "loss": 0.3573, "step": 261790 }, { "epoch": 75.31645569620254, "grad_norm": 1.0447419881820679, "learning_rate": 0.0004936708860759494, "loss": 0.3759, "step": 261800 }, { "epoch": 75.31933256616801, "grad_norm": 1.0710147619247437, "learning_rate": 0.0004936133486766398, "loss": 0.3063, "step": 261810 }, { "epoch": 75.32220943613349, "grad_norm": 1.9080471992492676, "learning_rate": 0.0004935558112773303, "loss": 0.3866, "step": 261820 }, { "epoch": 75.32508630609897, "grad_norm": 1.4899407625198364, "learning_rate": 0.0004934982738780207, "loss": 0.4336, "step": 261830 }, { "epoch": 75.32796317606444, "grad_norm": 0.9444898962974548, "learning_rate": 0.0004934407364787112, "loss": 0.3897, "step": 261840 }, { "epoch": 75.33084004602992, "grad_norm": 0.8680140972137451, "learning_rate": 0.0004933831990794016, "loss": 0.4791, "step": 261850 }, { "epoch": 75.3337169159954, "grad_norm": 1.375778079032898, "learning_rate": 0.0004933256616800921, "loss": 0.3799, "step": 261860 }, { "epoch": 75.33659378596087, "grad_norm": 1.0069873332977295, "learning_rate": 0.0004932681242807825, "loss": 0.3668, "step": 261870 }, { "epoch": 75.33947065592635, "grad_norm": 0.994864284992218, "learning_rate": 0.000493210586881473, "loss": 0.4026, "step": 261880 }, { "epoch": 75.34234752589182, "grad_norm": 0.8882641792297363, "learning_rate": 0.0004931530494821635, "loss": 0.3802, "step": 261890 }, { "epoch": 75.3452243958573, "grad_norm": 1.3087512254714966, "learning_rate": 0.0004930955120828539, "loss": 0.3744, "step": 261900 }, { "epoch": 75.34810126582279, "grad_norm": 0.8322378396987915, "learning_rate": 0.0004930379746835444, "loss": 0.4064, "step": 261910 }, { "epoch": 75.35097813578827, "grad_norm": 0.6556897163391113, "learning_rate": 0.0004929804372842347, "loss": 0.3601, "step": 261920 }, { "epoch": 75.35385500575374, "grad_norm": 0.8741576671600342, "learning_rate": 0.0004929228998849253, "loss": 0.3034, "step": 261930 }, { "epoch": 75.35673187571922, "grad_norm": 1.8253114223480225, "learning_rate": 0.0004928653624856156, "loss": 0.4668, "step": 261940 }, { "epoch": 75.3596087456847, "grad_norm": 1.776018500328064, "learning_rate": 0.0004928078250863061, "loss": 0.3887, "step": 261950 }, { "epoch": 75.36248561565017, "grad_norm": 1.5964076519012451, "learning_rate": 0.0004927502876869965, "loss": 0.3047, "step": 261960 }, { "epoch": 75.36536248561565, "grad_norm": 0.7107298970222473, "learning_rate": 0.000492692750287687, "loss": 0.4051, "step": 261970 }, { "epoch": 75.36823935558112, "grad_norm": 0.8362175822257996, "learning_rate": 0.0004926352128883774, "loss": 0.3039, "step": 261980 }, { "epoch": 75.3711162255466, "grad_norm": 1.134974718093872, "learning_rate": 0.0004925776754890679, "loss": 0.3605, "step": 261990 }, { "epoch": 75.37399309551208, "grad_norm": 1.1960039138793945, "learning_rate": 0.0004925201380897584, "loss": 0.3718, "step": 262000 }, { "epoch": 75.37686996547757, "grad_norm": 1.8581961393356323, "learning_rate": 0.0004924626006904488, "loss": 0.372, "step": 262010 }, { "epoch": 75.37974683544304, "grad_norm": 1.4637658596038818, "learning_rate": 0.0004924050632911393, "loss": 0.4056, "step": 262020 }, { "epoch": 75.38262370540852, "grad_norm": 1.1129578351974487, "learning_rate": 0.0004923475258918297, "loss": 0.3445, "step": 262030 }, { "epoch": 75.385500575374, "grad_norm": 1.43739652633667, "learning_rate": 0.0004922899884925202, "loss": 0.4302, "step": 262040 }, { "epoch": 75.38837744533947, "grad_norm": 1.9513729810714722, "learning_rate": 0.0004922324510932105, "loss": 0.448, "step": 262050 }, { "epoch": 75.39125431530495, "grad_norm": 0.7920466661453247, "learning_rate": 0.0004921749136939011, "loss": 0.4064, "step": 262060 }, { "epoch": 75.39413118527042, "grad_norm": 1.3414013385772705, "learning_rate": 0.0004921173762945914, "loss": 0.4484, "step": 262070 }, { "epoch": 75.3970080552359, "grad_norm": 1.0646811723709106, "learning_rate": 0.0004920598388952819, "loss": 0.4343, "step": 262080 }, { "epoch": 75.39988492520138, "grad_norm": 0.7732225656509399, "learning_rate": 0.0004920023014959725, "loss": 0.4289, "step": 262090 }, { "epoch": 75.40276179516685, "grad_norm": 2.3456084728240967, "learning_rate": 0.0004919447640966628, "loss": 0.3529, "step": 262100 }, { "epoch": 75.40563866513233, "grad_norm": 0.734795093536377, "learning_rate": 0.0004918872266973533, "loss": 0.3788, "step": 262110 }, { "epoch": 75.40851553509782, "grad_norm": 1.8771905899047852, "learning_rate": 0.0004918296892980437, "loss": 0.486, "step": 262120 }, { "epoch": 75.4113924050633, "grad_norm": 2.0837295055389404, "learning_rate": 0.0004917721518987342, "loss": 0.3671, "step": 262130 }, { "epoch": 75.41426927502877, "grad_norm": 0.7946897745132446, "learning_rate": 0.0004917146144994246, "loss": 0.3668, "step": 262140 }, { "epoch": 75.41714614499425, "grad_norm": 0.8669568300247192, "learning_rate": 0.0004916570771001151, "loss": 0.3161, "step": 262150 }, { "epoch": 75.42002301495972, "grad_norm": 0.7306379079818726, "learning_rate": 0.0004915995397008055, "loss": 0.3722, "step": 262160 }, { "epoch": 75.4228998849252, "grad_norm": 1.0834206342697144, "learning_rate": 0.000491542002301496, "loss": 0.377, "step": 262170 }, { "epoch": 75.42577675489068, "grad_norm": 0.8474815487861633, "learning_rate": 0.0004914844649021864, "loss": 0.3474, "step": 262180 }, { "epoch": 75.42865362485615, "grad_norm": 0.8148206472396851, "learning_rate": 0.0004914269275028769, "loss": 0.3542, "step": 262190 }, { "epoch": 75.43153049482163, "grad_norm": 2.0918726921081543, "learning_rate": 0.0004913693901035674, "loss": 0.393, "step": 262200 }, { "epoch": 75.4344073647871, "grad_norm": 1.341174602508545, "learning_rate": 0.0004913118527042578, "loss": 0.3901, "step": 262210 }, { "epoch": 75.4372842347526, "grad_norm": 1.5656650066375732, "learning_rate": 0.0004912543153049483, "loss": 0.3815, "step": 262220 }, { "epoch": 75.44016110471807, "grad_norm": 1.4365342855453491, "learning_rate": 0.0004911967779056386, "loss": 0.4191, "step": 262230 }, { "epoch": 75.44303797468355, "grad_norm": 0.8963267803192139, "learning_rate": 0.0004911392405063292, "loss": 0.395, "step": 262240 }, { "epoch": 75.44591484464902, "grad_norm": 1.7271806001663208, "learning_rate": 0.0004910817031070195, "loss": 0.3607, "step": 262250 }, { "epoch": 75.4487917146145, "grad_norm": 1.4134970903396606, "learning_rate": 0.00049102416570771, "loss": 0.3948, "step": 262260 }, { "epoch": 75.45166858457998, "grad_norm": 1.463222622871399, "learning_rate": 0.0004909666283084005, "loss": 0.3842, "step": 262270 }, { "epoch": 75.45454545454545, "grad_norm": 1.251277208328247, "learning_rate": 0.0004909090909090909, "loss": 0.3547, "step": 262280 }, { "epoch": 75.45742232451093, "grad_norm": 1.2691786289215088, "learning_rate": 0.0004908515535097814, "loss": 0.3451, "step": 262290 }, { "epoch": 75.4602991944764, "grad_norm": 0.9979413151741028, "learning_rate": 0.0004907940161104718, "loss": 0.3565, "step": 262300 }, { "epoch": 75.46317606444188, "grad_norm": 1.795841097831726, "learning_rate": 0.0004907364787111623, "loss": 0.3729, "step": 262310 }, { "epoch": 75.46605293440736, "grad_norm": 1.0054614543914795, "learning_rate": 0.0004906789413118527, "loss": 0.276, "step": 262320 }, { "epoch": 75.46892980437285, "grad_norm": 1.0794856548309326, "learning_rate": 0.0004906214039125432, "loss": 0.3335, "step": 262330 }, { "epoch": 75.47180667433832, "grad_norm": 1.2494354248046875, "learning_rate": 0.0004905638665132336, "loss": 0.3995, "step": 262340 }, { "epoch": 75.4746835443038, "grad_norm": 0.983325719833374, "learning_rate": 0.0004905063291139241, "loss": 0.3997, "step": 262350 }, { "epoch": 75.47756041426928, "grad_norm": 2.3633434772491455, "learning_rate": 0.0004904487917146144, "loss": 0.3983, "step": 262360 }, { "epoch": 75.48043728423475, "grad_norm": 1.4450773000717163, "learning_rate": 0.000490391254315305, "loss": 0.4257, "step": 262370 }, { "epoch": 75.48331415420023, "grad_norm": 1.585378885269165, "learning_rate": 0.0004903337169159955, "loss": 0.3294, "step": 262380 }, { "epoch": 75.4861910241657, "grad_norm": 1.63693368434906, "learning_rate": 0.0004902761795166858, "loss": 0.3697, "step": 262390 }, { "epoch": 75.48906789413118, "grad_norm": 1.5236834287643433, "learning_rate": 0.0004902186421173764, "loss": 0.3317, "step": 262400 }, { "epoch": 75.49194476409666, "grad_norm": 2.1554207801818848, "learning_rate": 0.0004901611047180667, "loss": 0.4092, "step": 262410 }, { "epoch": 75.49482163406213, "grad_norm": 1.401320219039917, "learning_rate": 0.0004901035673187572, "loss": 0.4317, "step": 262420 }, { "epoch": 75.49769850402762, "grad_norm": 0.8415294885635376, "learning_rate": 0.0004900460299194476, "loss": 0.3361, "step": 262430 }, { "epoch": 75.5005753739931, "grad_norm": 0.7358800172805786, "learning_rate": 0.0004899884925201381, "loss": 0.371, "step": 262440 }, { "epoch": 75.50345224395858, "grad_norm": 1.6654270887374878, "learning_rate": 0.0004899309551208285, "loss": 0.3713, "step": 262450 }, { "epoch": 75.50632911392405, "grad_norm": 1.4325450658798218, "learning_rate": 0.000489873417721519, "loss": 0.3496, "step": 262460 }, { "epoch": 75.50920598388953, "grad_norm": 1.274698257446289, "learning_rate": 0.0004898158803222094, "loss": 0.3773, "step": 262470 }, { "epoch": 75.512082853855, "grad_norm": 1.5517723560333252, "learning_rate": 0.0004897583429228999, "loss": 0.3437, "step": 262480 }, { "epoch": 75.51495972382048, "grad_norm": 1.4459469318389893, "learning_rate": 0.0004897008055235904, "loss": 0.4315, "step": 262490 }, { "epoch": 75.51783659378596, "grad_norm": 0.8561514019966125, "learning_rate": 0.0004896432681242808, "loss": 0.3516, "step": 262500 }, { "epoch": 75.52071346375143, "grad_norm": 1.178684115409851, "learning_rate": 0.0004895857307249713, "loss": 0.4085, "step": 262510 }, { "epoch": 75.52359033371691, "grad_norm": 1.3333699703216553, "learning_rate": 0.0004895281933256617, "loss": 0.4827, "step": 262520 }, { "epoch": 75.52646720368239, "grad_norm": 1.2792811393737793, "learning_rate": 0.0004894706559263522, "loss": 0.4323, "step": 262530 }, { "epoch": 75.52934407364788, "grad_norm": 0.6968237161636353, "learning_rate": 0.0004894131185270425, "loss": 0.3049, "step": 262540 }, { "epoch": 75.53222094361335, "grad_norm": 0.9899774193763733, "learning_rate": 0.0004893555811277331, "loss": 0.3445, "step": 262550 }, { "epoch": 75.53509781357883, "grad_norm": 1.0527441501617432, "learning_rate": 0.0004892980437284234, "loss": 0.3127, "step": 262560 }, { "epoch": 75.5379746835443, "grad_norm": 0.8836709856987, "learning_rate": 0.0004892405063291139, "loss": 0.4102, "step": 262570 }, { "epoch": 75.54085155350978, "grad_norm": 1.7649624347686768, "learning_rate": 0.0004891829689298045, "loss": 0.368, "step": 262580 }, { "epoch": 75.54372842347526, "grad_norm": 0.8032101392745972, "learning_rate": 0.0004891254315304948, "loss": 0.3983, "step": 262590 }, { "epoch": 75.54660529344073, "grad_norm": 1.9635950326919556, "learning_rate": 0.0004890678941311853, "loss": 0.4051, "step": 262600 }, { "epoch": 75.54948216340621, "grad_norm": 2.0569310188293457, "learning_rate": 0.0004890103567318757, "loss": 0.3735, "step": 262610 }, { "epoch": 75.55235903337169, "grad_norm": 1.5121266841888428, "learning_rate": 0.0004889528193325662, "loss": 0.5102, "step": 262620 }, { "epoch": 75.55523590333716, "grad_norm": 2.021202564239502, "learning_rate": 0.0004888952819332566, "loss": 0.3843, "step": 262630 }, { "epoch": 75.55811277330265, "grad_norm": 1.0933083295822144, "learning_rate": 0.0004888377445339471, "loss": 0.3925, "step": 262640 }, { "epoch": 75.56098964326813, "grad_norm": 0.949901282787323, "learning_rate": 0.0004887802071346375, "loss": 0.4012, "step": 262650 }, { "epoch": 75.5638665132336, "grad_norm": 1.2150806188583374, "learning_rate": 0.000488722669735328, "loss": 0.3959, "step": 262660 }, { "epoch": 75.56674338319908, "grad_norm": 1.8398123979568481, "learning_rate": 0.0004886651323360184, "loss": 0.334, "step": 262670 }, { "epoch": 75.56962025316456, "grad_norm": 2.6394293308258057, "learning_rate": 0.0004886075949367089, "loss": 0.4165, "step": 262680 }, { "epoch": 75.57249712313003, "grad_norm": 2.386483669281006, "learning_rate": 0.0004885500575373994, "loss": 0.3882, "step": 262690 }, { "epoch": 75.57537399309551, "grad_norm": 1.6324297189712524, "learning_rate": 0.0004884925201380898, "loss": 0.3478, "step": 262700 }, { "epoch": 75.57825086306099, "grad_norm": 1.745705246925354, "learning_rate": 0.0004884349827387803, "loss": 0.3538, "step": 262710 }, { "epoch": 75.58112773302646, "grad_norm": 1.5109341144561768, "learning_rate": 0.0004883774453394706, "loss": 0.3621, "step": 262720 }, { "epoch": 75.58400460299194, "grad_norm": 0.924578845500946, "learning_rate": 0.0004883199079401612, "loss": 0.4227, "step": 262730 }, { "epoch": 75.58688147295742, "grad_norm": 2.10921311378479, "learning_rate": 0.0004882623705408515, "loss": 0.3128, "step": 262740 }, { "epoch": 75.5897583429229, "grad_norm": 1.578167200088501, "learning_rate": 0.00048820483314154203, "loss": 0.3374, "step": 262750 }, { "epoch": 75.59263521288838, "grad_norm": 2.0974395275115967, "learning_rate": 0.0004881472957422325, "loss": 0.4647, "step": 262760 }, { "epoch": 75.59551208285386, "grad_norm": 1.7888182401657104, "learning_rate": 0.0004880897583429229, "loss": 0.4122, "step": 262770 }, { "epoch": 75.59838895281933, "grad_norm": 1.0369514226913452, "learning_rate": 0.0004880322209436134, "loss": 0.3619, "step": 262780 }, { "epoch": 75.60126582278481, "grad_norm": 1.0440144538879395, "learning_rate": 0.0004879746835443038, "loss": 0.3737, "step": 262790 }, { "epoch": 75.60414269275029, "grad_norm": 1.2381688356399536, "learning_rate": 0.00048791714614499426, "loss": 0.3445, "step": 262800 }, { "epoch": 75.60701956271576, "grad_norm": 0.6851056218147278, "learning_rate": 0.0004878596087456847, "loss": 0.3111, "step": 262810 }, { "epoch": 75.60989643268124, "grad_norm": 1.320886254310608, "learning_rate": 0.00048780207134637517, "loss": 0.3954, "step": 262820 }, { "epoch": 75.61277330264672, "grad_norm": 0.9911491870880127, "learning_rate": 0.00048774453394706557, "loss": 0.3576, "step": 262830 }, { "epoch": 75.61565017261219, "grad_norm": 0.8045076131820679, "learning_rate": 0.0004876869965477561, "loss": 0.4637, "step": 262840 }, { "epoch": 75.61852704257768, "grad_norm": 1.1426345109939575, "learning_rate": 0.0004876294591484465, "loss": 0.3879, "step": 262850 }, { "epoch": 75.62140391254316, "grad_norm": 1.6193360090255737, "learning_rate": 0.00048757192174913694, "loss": 0.3555, "step": 262860 }, { "epoch": 75.62428078250863, "grad_norm": 0.9611448645591736, "learning_rate": 0.00048751438434982745, "loss": 0.3368, "step": 262870 }, { "epoch": 75.62715765247411, "grad_norm": 2.340357542037964, "learning_rate": 0.00048745684695051785, "loss": 0.3843, "step": 262880 }, { "epoch": 75.63003452243959, "grad_norm": 2.377230644226074, "learning_rate": 0.0004873993095512083, "loss": 0.4039, "step": 262890 }, { "epoch": 75.63291139240506, "grad_norm": 1.418010950088501, "learning_rate": 0.00048734177215189876, "loss": 0.4096, "step": 262900 }, { "epoch": 75.63578826237054, "grad_norm": 0.9919493198394775, "learning_rate": 0.0004872842347525892, "loss": 0.372, "step": 262910 }, { "epoch": 75.63866513233602, "grad_norm": 1.3639702796936035, "learning_rate": 0.0004872266973532796, "loss": 0.4865, "step": 262920 }, { "epoch": 75.64154200230149, "grad_norm": 1.0098837614059448, "learning_rate": 0.0004871691599539701, "loss": 0.2841, "step": 262930 }, { "epoch": 75.64441887226697, "grad_norm": 1.7290639877319336, "learning_rate": 0.0004871116225546605, "loss": 0.4396, "step": 262940 }, { "epoch": 75.64729574223244, "grad_norm": 0.8665385246276855, "learning_rate": 0.000487054085155351, "loss": 0.3614, "step": 262950 }, { "epoch": 75.65017261219793, "grad_norm": 0.924314558506012, "learning_rate": 0.0004869965477560415, "loss": 0.3095, "step": 262960 }, { "epoch": 75.65304948216341, "grad_norm": 1.1399415731430054, "learning_rate": 0.0004869390103567319, "loss": 0.3984, "step": 262970 }, { "epoch": 75.65592635212889, "grad_norm": 1.4737231731414795, "learning_rate": 0.00048688147295742235, "loss": 0.4254, "step": 262980 }, { "epoch": 75.65880322209436, "grad_norm": 1.4540492296218872, "learning_rate": 0.00048682393555811275, "loss": 0.3717, "step": 262990 }, { "epoch": 75.66168009205984, "grad_norm": 0.9665069580078125, "learning_rate": 0.00048676639815880326, "loss": 0.3474, "step": 263000 }, { "epoch": 75.66455696202532, "grad_norm": 2.291842222213745, "learning_rate": 0.00048670886075949366, "loss": 0.3999, "step": 263010 }, { "epoch": 75.66743383199079, "grad_norm": 0.9000623226165771, "learning_rate": 0.0004866513233601841, "loss": 0.3597, "step": 263020 }, { "epoch": 75.67031070195627, "grad_norm": 0.8061885833740234, "learning_rate": 0.00048659378596087457, "loss": 0.3657, "step": 263030 }, { "epoch": 75.67318757192174, "grad_norm": 0.891128420829773, "learning_rate": 0.000486536248561565, "loss": 0.3414, "step": 263040 }, { "epoch": 75.67606444188722, "grad_norm": 0.8945927619934082, "learning_rate": 0.00048647871116225543, "loss": 0.4268, "step": 263050 }, { "epoch": 75.67894131185271, "grad_norm": 1.1991838216781616, "learning_rate": 0.00048642117376294594, "loss": 0.3909, "step": 263060 }, { "epoch": 75.68181818181819, "grad_norm": 1.4011362791061401, "learning_rate": 0.0004863636363636364, "loss": 0.4335, "step": 263070 }, { "epoch": 75.68469505178366, "grad_norm": 1.123085856437683, "learning_rate": 0.0004863060989643268, "loss": 0.3558, "step": 263080 }, { "epoch": 75.68757192174914, "grad_norm": 1.9316449165344238, "learning_rate": 0.0004862485615650173, "loss": 0.3521, "step": 263090 }, { "epoch": 75.69044879171462, "grad_norm": 1.4771363735198975, "learning_rate": 0.0004861910241657077, "loss": 0.4482, "step": 263100 }, { "epoch": 75.69332566168009, "grad_norm": 1.5034793615341187, "learning_rate": 0.00048613348676639816, "loss": 0.4534, "step": 263110 }, { "epoch": 75.69620253164557, "grad_norm": 0.9585073590278625, "learning_rate": 0.0004860759493670886, "loss": 0.4428, "step": 263120 }, { "epoch": 75.69907940161104, "grad_norm": 0.7410928010940552, "learning_rate": 0.00048601841196777907, "loss": 0.4444, "step": 263130 }, { "epoch": 75.70195627157652, "grad_norm": 1.7761197090148926, "learning_rate": 0.00048596087456846947, "loss": 0.3344, "step": 263140 }, { "epoch": 75.704833141542, "grad_norm": 1.1333811283111572, "learning_rate": 0.00048590333716916, "loss": 0.3896, "step": 263150 }, { "epoch": 75.70771001150747, "grad_norm": 1.0154017210006714, "learning_rate": 0.00048584579976985044, "loss": 0.4105, "step": 263160 }, { "epoch": 75.71058688147296, "grad_norm": 1.1257526874542236, "learning_rate": 0.00048578826237054084, "loss": 0.3775, "step": 263170 }, { "epoch": 75.71346375143844, "grad_norm": 1.4666491746902466, "learning_rate": 0.00048573072497123135, "loss": 0.3569, "step": 263180 }, { "epoch": 75.71634062140392, "grad_norm": 1.7648719549179077, "learning_rate": 0.00048567318757192175, "loss": 0.3595, "step": 263190 }, { "epoch": 75.71921749136939, "grad_norm": 1.1628024578094482, "learning_rate": 0.0004856156501726122, "loss": 0.5769, "step": 263200 }, { "epoch": 75.72209436133487, "grad_norm": 1.4123035669326782, "learning_rate": 0.00048555811277330266, "loss": 0.3436, "step": 263210 }, { "epoch": 75.72497123130034, "grad_norm": 1.7031033039093018, "learning_rate": 0.0004855005753739931, "loss": 0.4411, "step": 263220 }, { "epoch": 75.72784810126582, "grad_norm": 2.069469928741455, "learning_rate": 0.0004854430379746835, "loss": 0.5281, "step": 263230 }, { "epoch": 75.7307249712313, "grad_norm": 1.6750730276107788, "learning_rate": 0.00048538550057537403, "loss": 0.3832, "step": 263240 }, { "epoch": 75.73360184119677, "grad_norm": 1.6511914730072021, "learning_rate": 0.00048532796317606443, "loss": 0.3607, "step": 263250 }, { "epoch": 75.73647871116225, "grad_norm": 1.9059962034225464, "learning_rate": 0.0004852704257767549, "loss": 0.4668, "step": 263260 }, { "epoch": 75.73935558112774, "grad_norm": 1.495668888092041, "learning_rate": 0.0004852128883774454, "loss": 0.3662, "step": 263270 }, { "epoch": 75.74223245109322, "grad_norm": 1.3251537084579468, "learning_rate": 0.0004851553509781358, "loss": 0.3455, "step": 263280 }, { "epoch": 75.74510932105869, "grad_norm": 2.2493362426757812, "learning_rate": 0.00048509781357882625, "loss": 0.3457, "step": 263290 }, { "epoch": 75.74798619102417, "grad_norm": 0.7725241780281067, "learning_rate": 0.0004850402761795167, "loss": 0.3396, "step": 263300 }, { "epoch": 75.75086306098964, "grad_norm": 1.8580231666564941, "learning_rate": 0.00048498273878020716, "loss": 0.3658, "step": 263310 }, { "epoch": 75.75373993095512, "grad_norm": 0.987416684627533, "learning_rate": 0.00048492520138089756, "loss": 0.5635, "step": 263320 }, { "epoch": 75.7566168009206, "grad_norm": 2.3654417991638184, "learning_rate": 0.0004848676639815881, "loss": 0.3824, "step": 263330 }, { "epoch": 75.75949367088607, "grad_norm": 0.9703559279441833, "learning_rate": 0.0004848101265822785, "loss": 0.4073, "step": 263340 }, { "epoch": 75.76237054085155, "grad_norm": 0.8438366055488586, "learning_rate": 0.00048475258918296893, "loss": 0.3767, "step": 263350 }, { "epoch": 75.76524741081703, "grad_norm": 1.310437798500061, "learning_rate": 0.00048469505178365944, "loss": 0.4477, "step": 263360 }, { "epoch": 75.7681242807825, "grad_norm": 0.7850446701049805, "learning_rate": 0.00048463751438434984, "loss": 0.4022, "step": 263370 }, { "epoch": 75.77100115074799, "grad_norm": 1.4639549255371094, "learning_rate": 0.0004845799769850403, "loss": 0.3716, "step": 263380 }, { "epoch": 75.77387802071347, "grad_norm": 0.9713453054428101, "learning_rate": 0.0004845224395857307, "loss": 0.3362, "step": 263390 }, { "epoch": 75.77675489067894, "grad_norm": 1.216049313545227, "learning_rate": 0.0004844649021864212, "loss": 0.3996, "step": 263400 }, { "epoch": 75.77963176064442, "grad_norm": 1.0336170196533203, "learning_rate": 0.0004844073647871116, "loss": 0.4556, "step": 263410 }, { "epoch": 75.7825086306099, "grad_norm": 1.3408282995224, "learning_rate": 0.00048434982738780206, "loss": 0.4125, "step": 263420 }, { "epoch": 75.78538550057537, "grad_norm": 0.6507461071014404, "learning_rate": 0.0004842922899884925, "loss": 0.3938, "step": 263430 }, { "epoch": 75.78826237054085, "grad_norm": 1.0011975765228271, "learning_rate": 0.000484234752589183, "loss": 0.3219, "step": 263440 }, { "epoch": 75.79113924050633, "grad_norm": 1.14181649684906, "learning_rate": 0.00048417721518987343, "loss": 0.3519, "step": 263450 }, { "epoch": 75.7940161104718, "grad_norm": 1.1412359476089478, "learning_rate": 0.0004841196777905639, "loss": 0.3947, "step": 263460 }, { "epoch": 75.79689298043728, "grad_norm": 1.0068904161453247, "learning_rate": 0.00048406214039125434, "loss": 0.3716, "step": 263470 }, { "epoch": 75.79976985040277, "grad_norm": 1.384830117225647, "learning_rate": 0.00048400460299194474, "loss": 0.3925, "step": 263480 }, { "epoch": 75.80264672036824, "grad_norm": 1.1494652032852173, "learning_rate": 0.00048394706559263525, "loss": 0.3906, "step": 263490 }, { "epoch": 75.80552359033372, "grad_norm": 0.947594940662384, "learning_rate": 0.00048388952819332565, "loss": 0.4326, "step": 263500 }, { "epoch": 75.8084004602992, "grad_norm": 2.5509016513824463, "learning_rate": 0.0004838319907940161, "loss": 0.3916, "step": 263510 }, { "epoch": 75.81127733026467, "grad_norm": 2.0557234287261963, "learning_rate": 0.00048377445339470656, "loss": 0.4182, "step": 263520 }, { "epoch": 75.81415420023015, "grad_norm": 1.6037276983261108, "learning_rate": 0.000483716915995397, "loss": 0.3954, "step": 263530 }, { "epoch": 75.81703107019563, "grad_norm": 0.8238523602485657, "learning_rate": 0.0004836593785960874, "loss": 0.4273, "step": 263540 }, { "epoch": 75.8199079401611, "grad_norm": 0.9850415587425232, "learning_rate": 0.00048360184119677793, "loss": 0.3883, "step": 263550 }, { "epoch": 75.82278481012658, "grad_norm": 1.1046454906463623, "learning_rate": 0.0004835443037974684, "loss": 0.4651, "step": 263560 }, { "epoch": 75.82566168009205, "grad_norm": 1.6192015409469604, "learning_rate": 0.0004834867663981588, "loss": 0.4022, "step": 263570 }, { "epoch": 75.82853855005754, "grad_norm": 1.4896258115768433, "learning_rate": 0.0004834292289988493, "loss": 0.3602, "step": 263580 }, { "epoch": 75.83141542002302, "grad_norm": 1.1857669353485107, "learning_rate": 0.0004833716915995397, "loss": 0.3968, "step": 263590 }, { "epoch": 75.8342922899885, "grad_norm": 1.4773037433624268, "learning_rate": 0.00048331415420023015, "loss": 0.3715, "step": 263600 }, { "epoch": 75.83716915995397, "grad_norm": 1.1188833713531494, "learning_rate": 0.0004832566168009206, "loss": 0.3368, "step": 263610 }, { "epoch": 75.84004602991945, "grad_norm": 1.3949495553970337, "learning_rate": 0.00048319907940161107, "loss": 0.3418, "step": 263620 }, { "epoch": 75.84292289988493, "grad_norm": 1.2472429275512695, "learning_rate": 0.00048314154200230147, "loss": 0.3806, "step": 263630 }, { "epoch": 75.8457997698504, "grad_norm": 1.9091448783874512, "learning_rate": 0.000483084004602992, "loss": 0.3552, "step": 263640 }, { "epoch": 75.84867663981588, "grad_norm": 1.0773811340332031, "learning_rate": 0.00048302646720368243, "loss": 0.3443, "step": 263650 }, { "epoch": 75.85155350978135, "grad_norm": 0.9660216569900513, "learning_rate": 0.00048296892980437283, "loss": 0.4217, "step": 263660 }, { "epoch": 75.85443037974683, "grad_norm": 1.1935629844665527, "learning_rate": 0.00048291139240506334, "loss": 0.4641, "step": 263670 }, { "epoch": 75.8573072497123, "grad_norm": 0.9839310646057129, "learning_rate": 0.00048285385500575374, "loss": 0.4227, "step": 263680 }, { "epoch": 75.8601841196778, "grad_norm": 0.6884312629699707, "learning_rate": 0.0004827963176064442, "loss": 0.3283, "step": 263690 }, { "epoch": 75.86306098964327, "grad_norm": 0.74864661693573, "learning_rate": 0.00048273878020713466, "loss": 0.388, "step": 263700 }, { "epoch": 75.86593785960875, "grad_norm": 1.0992521047592163, "learning_rate": 0.0004826812428078251, "loss": 0.3953, "step": 263710 }, { "epoch": 75.86881472957423, "grad_norm": 1.3893296718597412, "learning_rate": 0.0004826237054085155, "loss": 0.3516, "step": 263720 }, { "epoch": 75.8716915995397, "grad_norm": 1.6265546083450317, "learning_rate": 0.000482566168009206, "loss": 0.3886, "step": 263730 }, { "epoch": 75.87456846950518, "grad_norm": 1.8407329320907593, "learning_rate": 0.0004825086306098964, "loss": 0.3824, "step": 263740 }, { "epoch": 75.87744533947065, "grad_norm": 1.1418730020523071, "learning_rate": 0.0004824510932105869, "loss": 0.4203, "step": 263750 }, { "epoch": 75.88032220943613, "grad_norm": 0.8189999461174011, "learning_rate": 0.0004823935558112774, "loss": 0.4246, "step": 263760 }, { "epoch": 75.8831990794016, "grad_norm": 2.2403125762939453, "learning_rate": 0.0004823360184119678, "loss": 0.3987, "step": 263770 }, { "epoch": 75.88607594936708, "grad_norm": 0.7615209221839905, "learning_rate": 0.00048227848101265825, "loss": 0.4064, "step": 263780 }, { "epoch": 75.88895281933257, "grad_norm": 0.9976766705513, "learning_rate": 0.00048222094361334865, "loss": 0.4025, "step": 263790 }, { "epoch": 75.89182968929805, "grad_norm": 1.8063427209854126, "learning_rate": 0.00048216340621403916, "loss": 0.4324, "step": 263800 }, { "epoch": 75.89470655926353, "grad_norm": 0.9260706305503845, "learning_rate": 0.00048210586881472956, "loss": 0.3537, "step": 263810 }, { "epoch": 75.897583429229, "grad_norm": 1.6115517616271973, "learning_rate": 0.00048204833141542, "loss": 0.3901, "step": 263820 }, { "epoch": 75.90046029919448, "grad_norm": 1.3235955238342285, "learning_rate": 0.00048199079401611047, "loss": 0.3656, "step": 263830 }, { "epoch": 75.90333716915995, "grad_norm": 0.976325511932373, "learning_rate": 0.0004819332566168009, "loss": 0.3941, "step": 263840 }, { "epoch": 75.90621403912543, "grad_norm": 1.045628309249878, "learning_rate": 0.0004818757192174914, "loss": 0.3627, "step": 263850 }, { "epoch": 75.9090909090909, "grad_norm": 2.9639365673065186, "learning_rate": 0.00048181818181818184, "loss": 0.3939, "step": 263860 }, { "epoch": 75.91196777905638, "grad_norm": 1.5267891883850098, "learning_rate": 0.0004817606444188723, "loss": 0.4411, "step": 263870 }, { "epoch": 75.91484464902186, "grad_norm": 1.8874255418777466, "learning_rate": 0.0004817031070195627, "loss": 0.3404, "step": 263880 }, { "epoch": 75.91772151898734, "grad_norm": 1.079063892364502, "learning_rate": 0.0004816455696202532, "loss": 0.3368, "step": 263890 }, { "epoch": 75.92059838895283, "grad_norm": 0.6224169731140137, "learning_rate": 0.0004815880322209436, "loss": 0.3772, "step": 263900 }, { "epoch": 75.9234752589183, "grad_norm": 0.8682758212089539, "learning_rate": 0.00048153049482163406, "loss": 0.3581, "step": 263910 }, { "epoch": 75.92635212888378, "grad_norm": 1.0246511697769165, "learning_rate": 0.0004814729574223245, "loss": 0.3776, "step": 263920 }, { "epoch": 75.92922899884925, "grad_norm": 1.037001609802246, "learning_rate": 0.00048141542002301497, "loss": 0.3651, "step": 263930 }, { "epoch": 75.93210586881473, "grad_norm": 0.9929870963096619, "learning_rate": 0.0004813578826237054, "loss": 0.3862, "step": 263940 }, { "epoch": 75.9349827387802, "grad_norm": 0.838965117931366, "learning_rate": 0.0004813003452243959, "loss": 0.4356, "step": 263950 }, { "epoch": 75.93785960874568, "grad_norm": 1.429762840270996, "learning_rate": 0.00048124280782508634, "loss": 0.4216, "step": 263960 }, { "epoch": 75.94073647871116, "grad_norm": 0.9378659129142761, "learning_rate": 0.00048118527042577674, "loss": 0.3124, "step": 263970 }, { "epoch": 75.94361334867664, "grad_norm": 1.0085625648498535, "learning_rate": 0.00048112773302646725, "loss": 0.3577, "step": 263980 }, { "epoch": 75.94649021864211, "grad_norm": 1.1653138399124146, "learning_rate": 0.00048107019562715765, "loss": 0.3722, "step": 263990 }, { "epoch": 75.9493670886076, "grad_norm": 1.3923367261886597, "learning_rate": 0.0004810126582278481, "loss": 0.3594, "step": 264000 }, { "epoch": 75.95224395857308, "grad_norm": 1.2401313781738281, "learning_rate": 0.00048095512082853856, "loss": 0.4774, "step": 264010 }, { "epoch": 75.95512082853855, "grad_norm": 1.0329853296279907, "learning_rate": 0.000480897583429229, "loss": 0.4159, "step": 264020 }, { "epoch": 75.95799769850403, "grad_norm": 2.3604586124420166, "learning_rate": 0.0004808400460299194, "loss": 0.4392, "step": 264030 }, { "epoch": 75.9608745684695, "grad_norm": 1.5079692602157593, "learning_rate": 0.0004807825086306099, "loss": 0.3219, "step": 264040 }, { "epoch": 75.96375143843498, "grad_norm": 1.5817455053329468, "learning_rate": 0.0004807249712313004, "loss": 0.4127, "step": 264050 }, { "epoch": 75.96662830840046, "grad_norm": 0.9650090336799622, "learning_rate": 0.0004806674338319908, "loss": 0.4321, "step": 264060 }, { "epoch": 75.96950517836594, "grad_norm": 0.8843092322349548, "learning_rate": 0.0004806098964326813, "loss": 0.3002, "step": 264070 }, { "epoch": 75.97238204833141, "grad_norm": 1.3568586111068726, "learning_rate": 0.0004805523590333717, "loss": 0.3546, "step": 264080 }, { "epoch": 75.97525891829689, "grad_norm": 1.320281982421875, "learning_rate": 0.00048049482163406215, "loss": 0.4776, "step": 264090 }, { "epoch": 75.97813578826236, "grad_norm": 1.3606116771697998, "learning_rate": 0.0004804372842347526, "loss": 0.407, "step": 264100 }, { "epoch": 75.98101265822785, "grad_norm": 1.3513308763504028, "learning_rate": 0.00048037974683544306, "loss": 0.4162, "step": 264110 }, { "epoch": 75.98388952819333, "grad_norm": 0.9738457798957825, "learning_rate": 0.00048032220943613346, "loss": 0.3226, "step": 264120 }, { "epoch": 75.9867663981588, "grad_norm": 0.6088011264801025, "learning_rate": 0.00048026467203682397, "loss": 0.3725, "step": 264130 }, { "epoch": 75.98964326812428, "grad_norm": 1.5125681161880493, "learning_rate": 0.0004802071346375144, "loss": 0.3555, "step": 264140 }, { "epoch": 75.99252013808976, "grad_norm": 1.0290876626968384, "learning_rate": 0.00048014959723820483, "loss": 0.5051, "step": 264150 }, { "epoch": 75.99539700805524, "grad_norm": 1.7632739543914795, "learning_rate": 0.00048009205983889534, "loss": 0.3726, "step": 264160 }, { "epoch": 75.99827387802071, "grad_norm": 1.8661772012710571, "learning_rate": 0.00048003452243958574, "loss": 0.3931, "step": 264170 }, { "epoch": 76.00115074798619, "grad_norm": 1.7100203037261963, "learning_rate": 0.0004799769850402762, "loss": 0.4885, "step": 264180 }, { "epoch": 76.00402761795166, "grad_norm": 0.8395172953605652, "learning_rate": 0.0004799194476409666, "loss": 0.308, "step": 264190 }, { "epoch": 76.00690448791714, "grad_norm": 1.5646332502365112, "learning_rate": 0.0004798619102416571, "loss": 0.3591, "step": 264200 }, { "epoch": 76.00978135788263, "grad_norm": 1.5699154138565063, "learning_rate": 0.0004798043728423475, "loss": 0.3605, "step": 264210 }, { "epoch": 76.0126582278481, "grad_norm": 0.9118156433105469, "learning_rate": 0.00047974683544303796, "loss": 0.3932, "step": 264220 }, { "epoch": 76.01553509781358, "grad_norm": 1.5112065076828003, "learning_rate": 0.0004796892980437284, "loss": 0.3953, "step": 264230 }, { "epoch": 76.01841196777906, "grad_norm": 1.5117299556732178, "learning_rate": 0.0004796317606444189, "loss": 0.419, "step": 264240 }, { "epoch": 76.02128883774454, "grad_norm": 1.2898083925247192, "learning_rate": 0.00047957422324510933, "loss": 0.3572, "step": 264250 }, { "epoch": 76.02416570771001, "grad_norm": 0.814882755279541, "learning_rate": 0.0004795166858457998, "loss": 0.4104, "step": 264260 }, { "epoch": 76.02704257767549, "grad_norm": 1.7492798566818237, "learning_rate": 0.00047945914844649024, "loss": 0.4314, "step": 264270 }, { "epoch": 76.02991944764096, "grad_norm": 1.735927939414978, "learning_rate": 0.00047940161104718064, "loss": 0.4133, "step": 264280 }, { "epoch": 76.03279631760644, "grad_norm": 1.151425838470459, "learning_rate": 0.00047934407364787115, "loss": 0.3594, "step": 264290 }, { "epoch": 76.03567318757192, "grad_norm": 1.4262239933013916, "learning_rate": 0.00047928653624856155, "loss": 0.346, "step": 264300 }, { "epoch": 76.03855005753739, "grad_norm": 1.1471349000930786, "learning_rate": 0.000479228998849252, "loss": 0.4052, "step": 264310 }, { "epoch": 76.04142692750288, "grad_norm": 1.6333199739456177, "learning_rate": 0.00047917146144994246, "loss": 0.3504, "step": 264320 }, { "epoch": 76.04430379746836, "grad_norm": 1.7561885118484497, "learning_rate": 0.0004791139240506329, "loss": 0.3546, "step": 264330 }, { "epoch": 76.04718066743384, "grad_norm": 0.8613196611404419, "learning_rate": 0.0004790563866513234, "loss": 0.3389, "step": 264340 }, { "epoch": 76.05005753739931, "grad_norm": 1.3527538776397705, "learning_rate": 0.00047899884925201383, "loss": 0.3881, "step": 264350 }, { "epoch": 76.05293440736479, "grad_norm": 1.2012337446212769, "learning_rate": 0.0004789413118527043, "loss": 0.3275, "step": 264360 }, { "epoch": 76.05581127733026, "grad_norm": 1.0614452362060547, "learning_rate": 0.0004788837744533947, "loss": 0.3185, "step": 264370 }, { "epoch": 76.05868814729574, "grad_norm": 1.7161747217178345, "learning_rate": 0.0004788262370540852, "loss": 0.3415, "step": 264380 }, { "epoch": 76.06156501726122, "grad_norm": 0.8795590400695801, "learning_rate": 0.0004787686996547756, "loss": 0.4718, "step": 264390 }, { "epoch": 76.06444188722669, "grad_norm": 0.7071114778518677, "learning_rate": 0.00047871116225546605, "loss": 0.3716, "step": 264400 }, { "epoch": 76.06731875719217, "grad_norm": 0.9588674306869507, "learning_rate": 0.0004786536248561565, "loss": 0.3696, "step": 264410 }, { "epoch": 76.07019562715766, "grad_norm": 1.4143600463867188, "learning_rate": 0.00047859608745684696, "loss": 0.3538, "step": 264420 }, { "epoch": 76.07307249712314, "grad_norm": 0.8588107824325562, "learning_rate": 0.0004785385500575374, "loss": 0.3081, "step": 264430 }, { "epoch": 76.07594936708861, "grad_norm": 0.8256782293319702, "learning_rate": 0.0004784810126582279, "loss": 0.3873, "step": 264440 }, { "epoch": 76.07882623705409, "grad_norm": 0.7910501956939697, "learning_rate": 0.00047842347525891833, "loss": 0.4763, "step": 264450 }, { "epoch": 76.08170310701956, "grad_norm": 0.7168604731559753, "learning_rate": 0.00047836593785960873, "loss": 0.3549, "step": 264460 }, { "epoch": 76.08457997698504, "grad_norm": 0.9605419635772705, "learning_rate": 0.00047830840046029924, "loss": 0.2564, "step": 264470 }, { "epoch": 76.08745684695052, "grad_norm": 1.7907748222351074, "learning_rate": 0.00047825086306098964, "loss": 0.3656, "step": 264480 }, { "epoch": 76.09033371691599, "grad_norm": 1.4971898794174194, "learning_rate": 0.0004781933256616801, "loss": 0.3383, "step": 264490 }, { "epoch": 76.09321058688147, "grad_norm": 0.9795769453048706, "learning_rate": 0.00047813578826237055, "loss": 0.3766, "step": 264500 }, { "epoch": 76.09608745684694, "grad_norm": 1.5324289798736572, "learning_rate": 0.000478078250863061, "loss": 0.4093, "step": 264510 }, { "epoch": 76.09896432681242, "grad_norm": 1.2344435453414917, "learning_rate": 0.0004780207134637514, "loss": 0.3177, "step": 264520 }, { "epoch": 76.10184119677791, "grad_norm": 1.4735448360443115, "learning_rate": 0.0004779631760644419, "loss": 0.3763, "step": 264530 }, { "epoch": 76.10471806674339, "grad_norm": 1.8677436113357544, "learning_rate": 0.0004779056386651324, "loss": 0.3684, "step": 264540 }, { "epoch": 76.10759493670886, "grad_norm": 1.7237164974212646, "learning_rate": 0.0004778481012658228, "loss": 0.3438, "step": 264550 }, { "epoch": 76.11047180667434, "grad_norm": 1.161574125289917, "learning_rate": 0.0004777905638665133, "loss": 0.2867, "step": 264560 }, { "epoch": 76.11334867663982, "grad_norm": 2.0322117805480957, "learning_rate": 0.0004777330264672037, "loss": 0.4022, "step": 264570 }, { "epoch": 76.11622554660529, "grad_norm": 1.3162130117416382, "learning_rate": 0.00047767548906789414, "loss": 0.3684, "step": 264580 }, { "epoch": 76.11910241657077, "grad_norm": 1.9236650466918945, "learning_rate": 0.00047761795166858454, "loss": 0.3682, "step": 264590 }, { "epoch": 76.12197928653625, "grad_norm": 2.390432596206665, "learning_rate": 0.00047756041426927505, "loss": 0.3553, "step": 264600 }, { "epoch": 76.12485615650172, "grad_norm": 2.0158298015594482, "learning_rate": 0.00047750287686996546, "loss": 0.4879, "step": 264610 }, { "epoch": 76.1277330264672, "grad_norm": 1.1015207767486572, "learning_rate": 0.0004774453394706559, "loss": 0.4119, "step": 264620 }, { "epoch": 76.13060989643269, "grad_norm": 0.8381902575492859, "learning_rate": 0.0004773878020713464, "loss": 0.3459, "step": 264630 }, { "epoch": 76.13348676639816, "grad_norm": 0.8761842846870422, "learning_rate": 0.0004773302646720368, "loss": 0.3615, "step": 264640 }, { "epoch": 76.13636363636364, "grad_norm": 1.1417880058288574, "learning_rate": 0.0004772727272727273, "loss": 0.3572, "step": 264650 }, { "epoch": 76.13924050632912, "grad_norm": 0.8942017555236816, "learning_rate": 0.00047721518987341773, "loss": 0.4309, "step": 264660 }, { "epoch": 76.14211737629459, "grad_norm": 0.5762101411819458, "learning_rate": 0.0004771576524741082, "loss": 0.2921, "step": 264670 }, { "epoch": 76.14499424626007, "grad_norm": 1.0626524686813354, "learning_rate": 0.0004771001150747986, "loss": 0.3428, "step": 264680 }, { "epoch": 76.14787111622555, "grad_norm": 0.7750206589698792, "learning_rate": 0.0004770425776754891, "loss": 0.3392, "step": 264690 }, { "epoch": 76.15074798619102, "grad_norm": 1.081707239151001, "learning_rate": 0.0004769850402761795, "loss": 0.3099, "step": 264700 }, { "epoch": 76.1536248561565, "grad_norm": 1.4481539726257324, "learning_rate": 0.00047692750287686996, "loss": 0.3229, "step": 264710 }, { "epoch": 76.15650172612197, "grad_norm": 1.063977837562561, "learning_rate": 0.0004768699654775604, "loss": 0.367, "step": 264720 }, { "epoch": 76.15937859608745, "grad_norm": 1.6063400506973267, "learning_rate": 0.00047681242807825087, "loss": 0.3856, "step": 264730 }, { "epoch": 76.16225546605294, "grad_norm": 2.247112512588501, "learning_rate": 0.0004767548906789413, "loss": 0.4493, "step": 264740 }, { "epoch": 76.16513233601842, "grad_norm": 1.9694613218307495, "learning_rate": 0.0004766973532796318, "loss": 0.3646, "step": 264750 }, { "epoch": 76.16800920598389, "grad_norm": 1.6201159954071045, "learning_rate": 0.00047663981588032223, "loss": 0.458, "step": 264760 }, { "epoch": 76.17088607594937, "grad_norm": 2.349607467651367, "learning_rate": 0.00047658227848101264, "loss": 0.4308, "step": 264770 }, { "epoch": 76.17376294591485, "grad_norm": 2.455636501312256, "learning_rate": 0.00047652474108170315, "loss": 0.3495, "step": 264780 }, { "epoch": 76.17663981588032, "grad_norm": 1.2326563596725464, "learning_rate": 0.00047646720368239355, "loss": 0.4086, "step": 264790 }, { "epoch": 76.1795166858458, "grad_norm": 1.5634845495224, "learning_rate": 0.000476409666283084, "loss": 0.3676, "step": 264800 }, { "epoch": 76.18239355581127, "grad_norm": 1.7554216384887695, "learning_rate": 0.00047635212888377446, "loss": 0.3157, "step": 264810 }, { "epoch": 76.18527042577675, "grad_norm": 0.8601300716400146, "learning_rate": 0.0004762945914844649, "loss": 0.337, "step": 264820 }, { "epoch": 76.18814729574223, "grad_norm": 0.7723125219345093, "learning_rate": 0.00047623705408515537, "loss": 0.2927, "step": 264830 }, { "epoch": 76.19102416570772, "grad_norm": 0.9562963247299194, "learning_rate": 0.0004761795166858458, "loss": 0.3342, "step": 264840 }, { "epoch": 76.19390103567319, "grad_norm": 1.3411948680877686, "learning_rate": 0.0004761219792865363, "loss": 0.3568, "step": 264850 }, { "epoch": 76.19677790563867, "grad_norm": 0.7962923645973206, "learning_rate": 0.0004760644418872267, "loss": 0.301, "step": 264860 }, { "epoch": 76.19965477560415, "grad_norm": 1.3370099067687988, "learning_rate": 0.0004760069044879172, "loss": 0.3849, "step": 264870 }, { "epoch": 76.20253164556962, "grad_norm": 1.204649806022644, "learning_rate": 0.0004759493670886076, "loss": 0.2844, "step": 264880 }, { "epoch": 76.2054085155351, "grad_norm": 1.6440545320510864, "learning_rate": 0.00047589182968929805, "loss": 0.3735, "step": 264890 }, { "epoch": 76.20828538550057, "grad_norm": 1.1007846593856812, "learning_rate": 0.0004758342922899885, "loss": 0.3502, "step": 264900 }, { "epoch": 76.21116225546605, "grad_norm": 1.2443339824676514, "learning_rate": 0.00047577675489067896, "loss": 0.3677, "step": 264910 }, { "epoch": 76.21403912543153, "grad_norm": 1.0550150871276855, "learning_rate": 0.0004757192174913694, "loss": 0.3078, "step": 264920 }, { "epoch": 76.216915995397, "grad_norm": 1.1771024465560913, "learning_rate": 0.00047566168009205987, "loss": 0.2892, "step": 264930 }, { "epoch": 76.21979286536248, "grad_norm": 0.7076175212860107, "learning_rate": 0.0004756041426927503, "loss": 0.3846, "step": 264940 }, { "epoch": 76.22266973532797, "grad_norm": 1.1353694200515747, "learning_rate": 0.0004755466052934407, "loss": 0.3656, "step": 264950 }, { "epoch": 76.22554660529345, "grad_norm": 1.4658293724060059, "learning_rate": 0.00047548906789413124, "loss": 0.3869, "step": 264960 }, { "epoch": 76.22842347525892, "grad_norm": 1.3955119848251343, "learning_rate": 0.00047543153049482164, "loss": 0.347, "step": 264970 }, { "epoch": 76.2313003452244, "grad_norm": 1.4597378969192505, "learning_rate": 0.0004753739930955121, "loss": 0.4148, "step": 264980 }, { "epoch": 76.23417721518987, "grad_norm": 1.6752228736877441, "learning_rate": 0.00047531645569620255, "loss": 0.3565, "step": 264990 }, { "epoch": 76.23705408515535, "grad_norm": 1.265683889389038, "learning_rate": 0.000475258918296893, "loss": 0.3796, "step": 265000 }, { "epoch": 76.23993095512083, "grad_norm": 0.8763650059700012, "learning_rate": 0.0004752013808975834, "loss": 0.36, "step": 265010 }, { "epoch": 76.2428078250863, "grad_norm": 0.8409035801887512, "learning_rate": 0.00047514384349827386, "loss": 0.3518, "step": 265020 }, { "epoch": 76.24568469505178, "grad_norm": 1.235675573348999, "learning_rate": 0.00047508630609896437, "loss": 0.3718, "step": 265030 }, { "epoch": 76.24856156501725, "grad_norm": 2.5263917446136475, "learning_rate": 0.00047502876869965477, "loss": 0.3325, "step": 265040 }, { "epoch": 76.25143843498275, "grad_norm": 0.9398096799850464, "learning_rate": 0.0004749712313003452, "loss": 0.3831, "step": 265050 }, { "epoch": 76.25431530494822, "grad_norm": 1.040486216545105, "learning_rate": 0.0004749136939010357, "loss": 0.367, "step": 265060 }, { "epoch": 76.2571921749137, "grad_norm": 0.9541445970535278, "learning_rate": 0.00047485615650172614, "loss": 0.4477, "step": 265070 }, { "epoch": 76.26006904487917, "grad_norm": 0.7312747836112976, "learning_rate": 0.00047479861910241654, "loss": 0.3635, "step": 265080 }, { "epoch": 76.26294591484465, "grad_norm": 1.5122697353363037, "learning_rate": 0.00047474108170310705, "loss": 0.3257, "step": 265090 }, { "epoch": 76.26582278481013, "grad_norm": 0.9452475905418396, "learning_rate": 0.00047468354430379745, "loss": 0.3337, "step": 265100 }, { "epoch": 76.2686996547756, "grad_norm": 0.7947519421577454, "learning_rate": 0.0004746260069044879, "loss": 0.3054, "step": 265110 }, { "epoch": 76.27157652474108, "grad_norm": 1.4079396724700928, "learning_rate": 0.0004745684695051784, "loss": 0.5058, "step": 265120 }, { "epoch": 76.27445339470655, "grad_norm": 0.9229378700256348, "learning_rate": 0.0004745109321058688, "loss": 0.2871, "step": 265130 }, { "epoch": 76.27733026467203, "grad_norm": 1.6244174242019653, "learning_rate": 0.00047445339470655927, "loss": 0.4497, "step": 265140 }, { "epoch": 76.28020713463752, "grad_norm": 1.2833821773529053, "learning_rate": 0.00047439585730724973, "loss": 0.3411, "step": 265150 }, { "epoch": 76.283084004603, "grad_norm": 0.6534726023674011, "learning_rate": 0.0004743383199079402, "loss": 0.426, "step": 265160 }, { "epoch": 76.28596087456847, "grad_norm": 1.5805034637451172, "learning_rate": 0.0004742807825086306, "loss": 0.3435, "step": 265170 }, { "epoch": 76.28883774453395, "grad_norm": 1.0753157138824463, "learning_rate": 0.0004742232451093211, "loss": 0.3274, "step": 265180 }, { "epoch": 76.29171461449943, "grad_norm": 1.1422282457351685, "learning_rate": 0.0004741657077100115, "loss": 0.3319, "step": 265190 }, { "epoch": 76.2945914844649, "grad_norm": 1.4961662292480469, "learning_rate": 0.00047410817031070195, "loss": 0.3407, "step": 265200 }, { "epoch": 76.29746835443038, "grad_norm": 1.1352038383483887, "learning_rate": 0.0004740506329113924, "loss": 0.3913, "step": 265210 }, { "epoch": 76.30034522439585, "grad_norm": 1.365901231765747, "learning_rate": 0.00047399309551208286, "loss": 0.3369, "step": 265220 }, { "epoch": 76.30322209436133, "grad_norm": 1.13542902469635, "learning_rate": 0.0004739355581127733, "loss": 0.336, "step": 265230 }, { "epoch": 76.30609896432681, "grad_norm": 1.6757766008377075, "learning_rate": 0.0004738780207134638, "loss": 0.3495, "step": 265240 }, { "epoch": 76.30897583429228, "grad_norm": 0.7682331800460815, "learning_rate": 0.00047382048331415423, "loss": 0.4848, "step": 265250 }, { "epoch": 76.31185270425777, "grad_norm": 1.462354302406311, "learning_rate": 0.00047376294591484463, "loss": 0.3401, "step": 265260 }, { "epoch": 76.31472957422325, "grad_norm": 2.020160675048828, "learning_rate": 0.00047370540851553514, "loss": 0.361, "step": 265270 }, { "epoch": 76.31760644418873, "grad_norm": 1.0322914123535156, "learning_rate": 0.00047364787111622554, "loss": 0.3419, "step": 265280 }, { "epoch": 76.3204833141542, "grad_norm": 0.8648335933685303, "learning_rate": 0.000473590333716916, "loss": 0.3383, "step": 265290 }, { "epoch": 76.32336018411968, "grad_norm": 1.0252039432525635, "learning_rate": 0.00047353279631760645, "loss": 0.383, "step": 265300 }, { "epoch": 76.32623705408515, "grad_norm": 1.343455195426941, "learning_rate": 0.0004734752589182969, "loss": 0.4113, "step": 265310 }, { "epoch": 76.32911392405063, "grad_norm": 1.2401076555252075, "learning_rate": 0.00047341772151898736, "loss": 0.3737, "step": 265320 }, { "epoch": 76.33199079401611, "grad_norm": 1.323479175567627, "learning_rate": 0.0004733601841196778, "loss": 0.4688, "step": 265330 }, { "epoch": 76.33486766398158, "grad_norm": 2.5893356800079346, "learning_rate": 0.0004733026467203683, "loss": 0.4037, "step": 265340 }, { "epoch": 76.33774453394706, "grad_norm": 1.6133590936660767, "learning_rate": 0.0004732451093210587, "loss": 0.4095, "step": 265350 }, { "epoch": 76.34062140391255, "grad_norm": 1.1767628192901611, "learning_rate": 0.0004731875719217492, "loss": 0.4285, "step": 265360 }, { "epoch": 76.34349827387803, "grad_norm": 1.214965581893921, "learning_rate": 0.0004731300345224396, "loss": 0.3768, "step": 265370 }, { "epoch": 76.3463751438435, "grad_norm": 0.8005863428115845, "learning_rate": 0.00047307249712313004, "loss": 0.3217, "step": 265380 }, { "epoch": 76.34925201380898, "grad_norm": 1.4592478275299072, "learning_rate": 0.0004730149597238205, "loss": 0.3653, "step": 265390 }, { "epoch": 76.35212888377445, "grad_norm": 0.9375413656234741, "learning_rate": 0.00047295742232451095, "loss": 0.3634, "step": 265400 }, { "epoch": 76.35500575373993, "grad_norm": 0.9707690477371216, "learning_rate": 0.0004728998849252014, "loss": 0.4298, "step": 265410 }, { "epoch": 76.35788262370541, "grad_norm": 1.1763243675231934, "learning_rate": 0.0004728423475258918, "loss": 0.3295, "step": 265420 }, { "epoch": 76.36075949367088, "grad_norm": 1.3229053020477295, "learning_rate": 0.0004727848101265823, "loss": 0.4055, "step": 265430 }, { "epoch": 76.36363636363636, "grad_norm": 1.5290205478668213, "learning_rate": 0.0004727272727272727, "loss": 0.3924, "step": 265440 }, { "epoch": 76.36651323360184, "grad_norm": 0.8812848329544067, "learning_rate": 0.0004726697353279632, "loss": 0.2846, "step": 265450 }, { "epoch": 76.36939010356731, "grad_norm": 1.7937114238739014, "learning_rate": 0.00047261219792865363, "loss": 0.4111, "step": 265460 }, { "epoch": 76.3722669735328, "grad_norm": 1.381515622138977, "learning_rate": 0.0004725546605293441, "loss": 0.4312, "step": 265470 }, { "epoch": 76.37514384349828, "grad_norm": 1.1115343570709229, "learning_rate": 0.0004724971231300345, "loss": 0.4144, "step": 265480 }, { "epoch": 76.37802071346375, "grad_norm": 1.0293883085250854, "learning_rate": 0.000472439585730725, "loss": 0.3923, "step": 265490 }, { "epoch": 76.38089758342923, "grad_norm": 1.3082674741744995, "learning_rate": 0.0004723820483314154, "loss": 0.3462, "step": 265500 }, { "epoch": 76.38377445339471, "grad_norm": 2.052863121032715, "learning_rate": 0.00047232451093210585, "loss": 0.4667, "step": 265510 }, { "epoch": 76.38665132336018, "grad_norm": 0.6534569263458252, "learning_rate": 0.00047226697353279636, "loss": 0.4132, "step": 265520 }, { "epoch": 76.38952819332566, "grad_norm": 1.1281744241714478, "learning_rate": 0.00047220943613348677, "loss": 0.3201, "step": 265530 }, { "epoch": 76.39240506329114, "grad_norm": 1.0430842638015747, "learning_rate": 0.0004721518987341772, "loss": 0.4442, "step": 265540 }, { "epoch": 76.39528193325661, "grad_norm": 1.319579839706421, "learning_rate": 0.0004720943613348677, "loss": 0.4434, "step": 265550 }, { "epoch": 76.39815880322209, "grad_norm": 0.9175852537155151, "learning_rate": 0.00047203682393555813, "loss": 0.3433, "step": 265560 }, { "epoch": 76.40103567318758, "grad_norm": 0.8533312678337097, "learning_rate": 0.00047197928653624853, "loss": 0.4154, "step": 265570 }, { "epoch": 76.40391254315306, "grad_norm": 0.9863299131393433, "learning_rate": 0.00047192174913693904, "loss": 0.3518, "step": 265580 }, { "epoch": 76.40678941311853, "grad_norm": 0.9049426913261414, "learning_rate": 0.00047186421173762944, "loss": 0.4013, "step": 265590 }, { "epoch": 76.40966628308401, "grad_norm": 1.0563719272613525, "learning_rate": 0.0004718066743383199, "loss": 0.3771, "step": 265600 }, { "epoch": 76.41254315304948, "grad_norm": 1.657371163368225, "learning_rate": 0.0004717491369390104, "loss": 0.3226, "step": 265610 }, { "epoch": 76.41542002301496, "grad_norm": 1.8894270658493042, "learning_rate": 0.0004716915995397008, "loss": 0.3897, "step": 265620 }, { "epoch": 76.41829689298044, "grad_norm": 1.936036229133606, "learning_rate": 0.00047163406214039127, "loss": 0.4244, "step": 265630 }, { "epoch": 76.42117376294591, "grad_norm": 1.2715622186660767, "learning_rate": 0.0004715765247410817, "loss": 0.3158, "step": 265640 }, { "epoch": 76.42405063291139, "grad_norm": 1.3611056804656982, "learning_rate": 0.0004715189873417722, "loss": 0.3365, "step": 265650 }, { "epoch": 76.42692750287686, "grad_norm": 1.3196470737457275, "learning_rate": 0.0004714614499424626, "loss": 0.3851, "step": 265660 }, { "epoch": 76.42980437284234, "grad_norm": 1.2275733947753906, "learning_rate": 0.0004714039125431531, "loss": 0.3636, "step": 265670 }, { "epoch": 76.43268124280783, "grad_norm": 0.9029136896133423, "learning_rate": 0.0004713463751438435, "loss": 0.3508, "step": 265680 }, { "epoch": 76.43555811277331, "grad_norm": 1.009088397026062, "learning_rate": 0.00047128883774453395, "loss": 0.3848, "step": 265690 }, { "epoch": 76.43843498273878, "grad_norm": 0.8753944039344788, "learning_rate": 0.0004712313003452244, "loss": 0.3217, "step": 265700 }, { "epoch": 76.44131185270426, "grad_norm": 0.9514873027801514, "learning_rate": 0.00047117376294591486, "loss": 0.4037, "step": 265710 }, { "epoch": 76.44418872266974, "grad_norm": 1.4356229305267334, "learning_rate": 0.0004711162255466053, "loss": 0.3744, "step": 265720 }, { "epoch": 76.44706559263521, "grad_norm": 1.4075515270233154, "learning_rate": 0.00047105868814729577, "loss": 0.4247, "step": 265730 }, { "epoch": 76.44994246260069, "grad_norm": 1.5634769201278687, "learning_rate": 0.0004710011507479862, "loss": 0.4551, "step": 265740 }, { "epoch": 76.45281933256616, "grad_norm": 2.2115848064422607, "learning_rate": 0.0004709436133486766, "loss": 0.4389, "step": 265750 }, { "epoch": 76.45569620253164, "grad_norm": 0.8954130411148071, "learning_rate": 0.00047088607594936713, "loss": 0.4208, "step": 265760 }, { "epoch": 76.45857307249712, "grad_norm": 0.9172464609146118, "learning_rate": 0.00047082853855005754, "loss": 0.4077, "step": 265770 }, { "epoch": 76.46144994246261, "grad_norm": 0.9980218410491943, "learning_rate": 0.000470771001150748, "loss": 0.3041, "step": 265780 }, { "epoch": 76.46432681242808, "grad_norm": 2.2350945472717285, "learning_rate": 0.00047071346375143845, "loss": 0.334, "step": 265790 }, { "epoch": 76.46720368239356, "grad_norm": 1.0118024349212646, "learning_rate": 0.0004706559263521289, "loss": 0.3641, "step": 265800 }, { "epoch": 76.47008055235904, "grad_norm": 1.8774715662002563, "learning_rate": 0.00047059838895281936, "loss": 0.4553, "step": 265810 }, { "epoch": 76.47295742232451, "grad_norm": 1.788936734199524, "learning_rate": 0.0004705408515535098, "loss": 0.4285, "step": 265820 }, { "epoch": 76.47583429228999, "grad_norm": 1.3227909803390503, "learning_rate": 0.00047048331415420027, "loss": 0.393, "step": 265830 }, { "epoch": 76.47871116225546, "grad_norm": 2.0706777572631836, "learning_rate": 0.00047042577675489067, "loss": 0.3859, "step": 265840 }, { "epoch": 76.48158803222094, "grad_norm": 1.1004462242126465, "learning_rate": 0.0004703682393555811, "loss": 0.3265, "step": 265850 }, { "epoch": 76.48446490218642, "grad_norm": 2.0856499671936035, "learning_rate": 0.0004703107019562716, "loss": 0.3944, "step": 265860 }, { "epoch": 76.4873417721519, "grad_norm": 1.8959845304489136, "learning_rate": 0.00047025316455696204, "loss": 0.3704, "step": 265870 }, { "epoch": 76.49021864211737, "grad_norm": 1.4906667470932007, "learning_rate": 0.00047019562715765244, "loss": 0.3598, "step": 265880 }, { "epoch": 76.49309551208286, "grad_norm": 1.4438048601150513, "learning_rate": 0.00047013808975834295, "loss": 0.3577, "step": 265890 }, { "epoch": 76.49597238204834, "grad_norm": 1.195447564125061, "learning_rate": 0.0004700805523590334, "loss": 0.3549, "step": 265900 }, { "epoch": 76.49884925201381, "grad_norm": 0.9670224189758301, "learning_rate": 0.0004700230149597238, "loss": 0.3572, "step": 265910 }, { "epoch": 76.50172612197929, "grad_norm": 1.3809640407562256, "learning_rate": 0.0004699654775604143, "loss": 0.4412, "step": 265920 }, { "epoch": 76.50460299194476, "grad_norm": 1.0257503986358643, "learning_rate": 0.0004699079401611047, "loss": 0.3965, "step": 265930 }, { "epoch": 76.50747986191024, "grad_norm": 1.6135057210922241, "learning_rate": 0.00046985040276179517, "loss": 0.3795, "step": 265940 }, { "epoch": 76.51035673187572, "grad_norm": 1.0409497022628784, "learning_rate": 0.0004697928653624856, "loss": 0.3634, "step": 265950 }, { "epoch": 76.5132336018412, "grad_norm": 1.8429604768753052, "learning_rate": 0.0004697353279631761, "loss": 0.4718, "step": 265960 }, { "epoch": 76.51611047180667, "grad_norm": 1.1997836828231812, "learning_rate": 0.0004696777905638665, "loss": 0.321, "step": 265970 }, { "epoch": 76.51898734177215, "grad_norm": 1.723272681236267, "learning_rate": 0.000469620253164557, "loss": 0.3911, "step": 265980 }, { "epoch": 76.52186421173764, "grad_norm": 1.7404035329818726, "learning_rate": 0.0004695627157652474, "loss": 0.4603, "step": 265990 }, { "epoch": 76.52474108170311, "grad_norm": 1.6972764730453491, "learning_rate": 0.00046950517836593785, "loss": 0.4423, "step": 266000 }, { "epoch": 76.52761795166859, "grad_norm": 1.8957043886184692, "learning_rate": 0.00046944764096662836, "loss": 0.3769, "step": 266010 }, { "epoch": 76.53049482163406, "grad_norm": 1.61653733253479, "learning_rate": 0.00046939010356731876, "loss": 0.3759, "step": 266020 }, { "epoch": 76.53337169159954, "grad_norm": 0.5282199382781982, "learning_rate": 0.0004693325661680092, "loss": 0.3384, "step": 266030 }, { "epoch": 76.53624856156502, "grad_norm": 1.8576996326446533, "learning_rate": 0.00046927502876869967, "loss": 0.3797, "step": 266040 }, { "epoch": 76.5391254315305, "grad_norm": 1.8679084777832031, "learning_rate": 0.0004692174913693901, "loss": 0.3625, "step": 266050 }, { "epoch": 76.54200230149597, "grad_norm": 1.2088727951049805, "learning_rate": 0.00046915995397008053, "loss": 0.4467, "step": 266060 }, { "epoch": 76.54487917146145, "grad_norm": 1.3712598085403442, "learning_rate": 0.00046910241657077104, "loss": 0.3562, "step": 266070 }, { "epoch": 76.54775604142692, "grad_norm": 1.873337745666504, "learning_rate": 0.00046904487917146144, "loss": 0.3578, "step": 266080 }, { "epoch": 76.5506329113924, "grad_norm": 1.388230323791504, "learning_rate": 0.0004689873417721519, "loss": 0.3492, "step": 266090 }, { "epoch": 76.55350978135789, "grad_norm": 0.7510982155799866, "learning_rate": 0.0004689298043728424, "loss": 0.399, "step": 266100 }, { "epoch": 76.55638665132336, "grad_norm": 1.0410455465316772, "learning_rate": 0.0004688722669735328, "loss": 0.3229, "step": 266110 }, { "epoch": 76.55926352128884, "grad_norm": 1.0745080709457397, "learning_rate": 0.00046881472957422326, "loss": 0.4578, "step": 266120 }, { "epoch": 76.56214039125432, "grad_norm": 0.5734466910362244, "learning_rate": 0.0004687571921749137, "loss": 0.3446, "step": 266130 }, { "epoch": 76.5650172612198, "grad_norm": 1.0618523359298706, "learning_rate": 0.00046869965477560417, "loss": 0.3448, "step": 266140 }, { "epoch": 76.56789413118527, "grad_norm": 0.8186976909637451, "learning_rate": 0.0004686421173762946, "loss": 0.3541, "step": 266150 }, { "epoch": 76.57077100115075, "grad_norm": 1.8293209075927734, "learning_rate": 0.0004685845799769851, "loss": 0.3935, "step": 266160 }, { "epoch": 76.57364787111622, "grad_norm": 1.219959020614624, "learning_rate": 0.0004685270425776755, "loss": 0.3593, "step": 266170 }, { "epoch": 76.5765247410817, "grad_norm": 0.9197882413864136, "learning_rate": 0.00046846950517836594, "loss": 0.4147, "step": 266180 }, { "epoch": 76.57940161104717, "grad_norm": 1.3647609949111938, "learning_rate": 0.0004684119677790564, "loss": 0.354, "step": 266190 }, { "epoch": 76.58227848101266, "grad_norm": 1.602911114692688, "learning_rate": 0.00046835443037974685, "loss": 0.3634, "step": 266200 }, { "epoch": 76.58515535097814, "grad_norm": 1.1136364936828613, "learning_rate": 0.0004682968929804373, "loss": 0.4205, "step": 266210 }, { "epoch": 76.58803222094362, "grad_norm": 1.003049612045288, "learning_rate": 0.00046823935558112776, "loss": 0.4265, "step": 266220 }, { "epoch": 76.5909090909091, "grad_norm": 1.1743792295455933, "learning_rate": 0.0004681818181818182, "loss": 0.3583, "step": 266230 }, { "epoch": 76.59378596087457, "grad_norm": 2.120435953140259, "learning_rate": 0.0004681242807825086, "loss": 0.4096, "step": 266240 }, { "epoch": 76.59666283084005, "grad_norm": 0.9980162978172302, "learning_rate": 0.0004680667433831991, "loss": 0.365, "step": 266250 }, { "epoch": 76.59953970080552, "grad_norm": 2.0088250637054443, "learning_rate": 0.00046800920598388953, "loss": 0.4051, "step": 266260 }, { "epoch": 76.602416570771, "grad_norm": 1.82742178440094, "learning_rate": 0.00046795166858458, "loss": 0.3551, "step": 266270 }, { "epoch": 76.60529344073647, "grad_norm": 1.5653189420700073, "learning_rate": 0.0004678941311852704, "loss": 0.3742, "step": 266280 }, { "epoch": 76.60817031070195, "grad_norm": 0.9623902440071106, "learning_rate": 0.0004678365937859609, "loss": 0.3617, "step": 266290 }, { "epoch": 76.61104718066743, "grad_norm": 1.2130104303359985, "learning_rate": 0.00046777905638665135, "loss": 0.4061, "step": 266300 }, { "epoch": 76.61392405063292, "grad_norm": 1.3014113903045654, "learning_rate": 0.00046772151898734175, "loss": 0.3257, "step": 266310 }, { "epoch": 76.6168009205984, "grad_norm": 1.4045822620391846, "learning_rate": 0.00046766398158803226, "loss": 0.4834, "step": 266320 }, { "epoch": 76.61967779056387, "grad_norm": 0.6561858654022217, "learning_rate": 0.00046760644418872266, "loss": 0.3488, "step": 266330 }, { "epoch": 76.62255466052935, "grad_norm": 0.8503491282463074, "learning_rate": 0.0004675489067894131, "loss": 0.4035, "step": 266340 }, { "epoch": 76.62543153049482, "grad_norm": 2.561692953109741, "learning_rate": 0.0004674913693901036, "loss": 0.3993, "step": 266350 }, { "epoch": 76.6283084004603, "grad_norm": 1.6803548336029053, "learning_rate": 0.00046743383199079403, "loss": 0.4481, "step": 266360 }, { "epoch": 76.63118527042577, "grad_norm": 0.6224093437194824, "learning_rate": 0.00046737629459148443, "loss": 0.3433, "step": 266370 }, { "epoch": 76.63406214039125, "grad_norm": 1.2267876863479614, "learning_rate": 0.00046731875719217494, "loss": 0.3177, "step": 266380 }, { "epoch": 76.63693901035673, "grad_norm": 1.112553596496582, "learning_rate": 0.0004672612197928654, "loss": 0.3603, "step": 266390 }, { "epoch": 76.6398158803222, "grad_norm": 0.8496072292327881, "learning_rate": 0.0004672036823935558, "loss": 0.3586, "step": 266400 }, { "epoch": 76.6426927502877, "grad_norm": 0.9058718681335449, "learning_rate": 0.0004671461449942463, "loss": 0.3541, "step": 266410 }, { "epoch": 76.64556962025317, "grad_norm": 1.1700209379196167, "learning_rate": 0.0004670886075949367, "loss": 0.3209, "step": 266420 }, { "epoch": 76.64844649021865, "grad_norm": 1.7518984079360962, "learning_rate": 0.00046703107019562716, "loss": 0.3665, "step": 266430 }, { "epoch": 76.65132336018412, "grad_norm": 0.8333261013031006, "learning_rate": 0.0004669735327963176, "loss": 0.3571, "step": 266440 }, { "epoch": 76.6542002301496, "grad_norm": 0.8258588910102844, "learning_rate": 0.0004669159953970081, "loss": 0.3367, "step": 266450 }, { "epoch": 76.65707710011507, "grad_norm": 1.2159926891326904, "learning_rate": 0.0004668584579976985, "loss": 0.4174, "step": 266460 }, { "epoch": 76.65995397008055, "grad_norm": 1.320389747619629, "learning_rate": 0.000466800920598389, "loss": 0.4172, "step": 266470 }, { "epoch": 76.66283084004603, "grad_norm": 1.2698856592178345, "learning_rate": 0.0004667433831990794, "loss": 0.4025, "step": 266480 }, { "epoch": 76.6657077100115, "grad_norm": 0.7628870010375977, "learning_rate": 0.00046668584579976984, "loss": 0.4765, "step": 266490 }, { "epoch": 76.66858457997698, "grad_norm": 1.4140938520431519, "learning_rate": 0.00046662830840046035, "loss": 0.3696, "step": 266500 }, { "epoch": 76.67146144994246, "grad_norm": 1.2871373891830444, "learning_rate": 0.00046657077100115075, "loss": 0.3959, "step": 266510 }, { "epoch": 76.67433831990795, "grad_norm": 0.7396572828292847, "learning_rate": 0.0004665132336018412, "loss": 0.334, "step": 266520 }, { "epoch": 76.67721518987342, "grad_norm": 1.0460537672042847, "learning_rate": 0.00046645569620253167, "loss": 0.3848, "step": 266530 }, { "epoch": 76.6800920598389, "grad_norm": 0.6488119959831238, "learning_rate": 0.0004663981588032221, "loss": 0.3053, "step": 266540 }, { "epoch": 76.68296892980437, "grad_norm": 0.6741230487823486, "learning_rate": 0.0004663406214039125, "loss": 0.4266, "step": 266550 }, { "epoch": 76.68584579976985, "grad_norm": 1.0098278522491455, "learning_rate": 0.00046628308400460303, "loss": 0.3666, "step": 266560 }, { "epoch": 76.68872266973533, "grad_norm": 0.8184472918510437, "learning_rate": 0.00046622554660529343, "loss": 0.3644, "step": 266570 }, { "epoch": 76.6915995397008, "grad_norm": 1.3277508020401, "learning_rate": 0.0004661680092059839, "loss": 0.3946, "step": 266580 }, { "epoch": 76.69447640966628, "grad_norm": 0.7271338105201721, "learning_rate": 0.0004661104718066744, "loss": 0.3699, "step": 266590 }, { "epoch": 76.69735327963176, "grad_norm": 1.1993483304977417, "learning_rate": 0.0004660529344073648, "loss": 0.3741, "step": 266600 }, { "epoch": 76.70023014959723, "grad_norm": 1.940244197845459, "learning_rate": 0.00046599539700805526, "loss": 0.3768, "step": 266610 }, { "epoch": 76.70310701956272, "grad_norm": 1.261403203010559, "learning_rate": 0.0004659378596087457, "loss": 0.4126, "step": 266620 }, { "epoch": 76.7059838895282, "grad_norm": 1.1285139322280884, "learning_rate": 0.00046588032220943617, "loss": 0.3578, "step": 266630 }, { "epoch": 76.70886075949367, "grad_norm": 0.8830121755599976, "learning_rate": 0.00046582278481012657, "loss": 0.4033, "step": 266640 }, { "epoch": 76.71173762945915, "grad_norm": 1.652500033378601, "learning_rate": 0.000465765247410817, "loss": 0.4358, "step": 266650 }, { "epoch": 76.71461449942463, "grad_norm": 0.9798709154129028, "learning_rate": 0.0004657077100115075, "loss": 0.3484, "step": 266660 }, { "epoch": 76.7174913693901, "grad_norm": 1.1338714361190796, "learning_rate": 0.00046565017261219793, "loss": 0.4258, "step": 266670 }, { "epoch": 76.72036823935558, "grad_norm": 0.6961032152175903, "learning_rate": 0.00046559263521288834, "loss": 0.4418, "step": 266680 }, { "epoch": 76.72324510932106, "grad_norm": 1.4507721662521362, "learning_rate": 0.00046553509781357885, "loss": 0.3766, "step": 266690 }, { "epoch": 76.72612197928653, "grad_norm": 1.0893770456314087, "learning_rate": 0.0004654775604142693, "loss": 0.3958, "step": 266700 }, { "epoch": 76.72899884925201, "grad_norm": 0.8451460599899292, "learning_rate": 0.0004654200230149597, "loss": 0.3795, "step": 266710 }, { "epoch": 76.7318757192175, "grad_norm": 0.9634506702423096, "learning_rate": 0.0004653624856156502, "loss": 0.367, "step": 266720 }, { "epoch": 76.73475258918297, "grad_norm": 1.097151756286621, "learning_rate": 0.0004653049482163406, "loss": 0.3006, "step": 266730 }, { "epoch": 76.73762945914845, "grad_norm": 2.1605913639068604, "learning_rate": 0.00046524741081703107, "loss": 0.4028, "step": 266740 }, { "epoch": 76.74050632911393, "grad_norm": 1.596625566482544, "learning_rate": 0.0004651898734177215, "loss": 0.3995, "step": 266750 }, { "epoch": 76.7433831990794, "grad_norm": 1.0092918872833252, "learning_rate": 0.000465132336018412, "loss": 0.3325, "step": 266760 }, { "epoch": 76.74626006904488, "grad_norm": 0.9886760115623474, "learning_rate": 0.0004650747986191024, "loss": 0.3703, "step": 266770 }, { "epoch": 76.74913693901036, "grad_norm": 0.673855185508728, "learning_rate": 0.0004650172612197929, "loss": 0.4327, "step": 266780 }, { "epoch": 76.75201380897583, "grad_norm": 2.264915704727173, "learning_rate": 0.00046495972382048335, "loss": 0.3552, "step": 266790 }, { "epoch": 76.75489067894131, "grad_norm": 1.8574174642562866, "learning_rate": 0.00046490218642117375, "loss": 0.3943, "step": 266800 }, { "epoch": 76.75776754890678, "grad_norm": 1.0807119607925415, "learning_rate": 0.00046484464902186426, "loss": 0.3581, "step": 266810 }, { "epoch": 76.76064441887226, "grad_norm": 1.0111333131790161, "learning_rate": 0.00046478711162255466, "loss": 0.4202, "step": 266820 }, { "epoch": 76.76352128883775, "grad_norm": 1.135667085647583, "learning_rate": 0.0004647295742232451, "loss": 0.4083, "step": 266830 }, { "epoch": 76.76639815880323, "grad_norm": 0.7454524636268616, "learning_rate": 0.00046467203682393557, "loss": 0.3185, "step": 266840 }, { "epoch": 76.7692750287687, "grad_norm": 1.3531526327133179, "learning_rate": 0.000464614499424626, "loss": 0.3632, "step": 266850 }, { "epoch": 76.77215189873418, "grad_norm": 1.582308292388916, "learning_rate": 0.0004645569620253164, "loss": 0.4324, "step": 266860 }, { "epoch": 76.77502876869966, "grad_norm": 1.8742609024047852, "learning_rate": 0.00046449942462600694, "loss": 0.3479, "step": 266870 }, { "epoch": 76.77790563866513, "grad_norm": 1.1458547115325928, "learning_rate": 0.0004644418872266974, "loss": 0.4435, "step": 266880 }, { "epoch": 76.78078250863061, "grad_norm": 1.0834771394729614, "learning_rate": 0.0004643843498273878, "loss": 0.3226, "step": 266890 }, { "epoch": 76.78365937859608, "grad_norm": 1.714073657989502, "learning_rate": 0.0004643268124280783, "loss": 0.3939, "step": 266900 }, { "epoch": 76.78653624856156, "grad_norm": 1.8966377973556519, "learning_rate": 0.0004642692750287687, "loss": 0.3671, "step": 266910 }, { "epoch": 76.78941311852704, "grad_norm": 0.8417317271232605, "learning_rate": 0.00046421173762945916, "loss": 0.3121, "step": 266920 }, { "epoch": 76.79228998849253, "grad_norm": 0.9896027445793152, "learning_rate": 0.0004641542002301496, "loss": 0.3414, "step": 266930 }, { "epoch": 76.795166858458, "grad_norm": 0.8821836113929749, "learning_rate": 0.00046409666283084007, "loss": 0.3168, "step": 266940 }, { "epoch": 76.79804372842348, "grad_norm": 0.7323774695396423, "learning_rate": 0.00046403912543153047, "loss": 0.4215, "step": 266950 }, { "epoch": 76.80092059838896, "grad_norm": 0.732781708240509, "learning_rate": 0.000463981588032221, "loss": 0.4289, "step": 266960 }, { "epoch": 76.80379746835443, "grad_norm": 1.0270601511001587, "learning_rate": 0.0004639240506329114, "loss": 0.4817, "step": 266970 }, { "epoch": 76.80667433831991, "grad_norm": 1.1035473346710205, "learning_rate": 0.00046386651323360184, "loss": 0.4285, "step": 266980 }, { "epoch": 76.80955120828538, "grad_norm": 1.0808308124542236, "learning_rate": 0.00046380897583429235, "loss": 0.4058, "step": 266990 }, { "epoch": 76.81242807825086, "grad_norm": 2.209588050842285, "learning_rate": 0.00046375143843498275, "loss": 0.3313, "step": 267000 }, { "epoch": 76.81530494821634, "grad_norm": 1.165590763092041, "learning_rate": 0.0004636939010356732, "loss": 0.4022, "step": 267010 }, { "epoch": 76.81818181818181, "grad_norm": 1.596599817276001, "learning_rate": 0.00046363636363636366, "loss": 0.3994, "step": 267020 }, { "epoch": 76.82105868814729, "grad_norm": 1.0900810956954956, "learning_rate": 0.0004635788262370541, "loss": 0.412, "step": 267030 }, { "epoch": 76.82393555811278, "grad_norm": 1.6215941905975342, "learning_rate": 0.0004635212888377445, "loss": 0.4483, "step": 267040 }, { "epoch": 76.82681242807826, "grad_norm": 1.8082016706466675, "learning_rate": 0.000463463751438435, "loss": 0.3849, "step": 267050 }, { "epoch": 76.82968929804373, "grad_norm": 1.5849276781082153, "learning_rate": 0.00046340621403912543, "loss": 0.3147, "step": 267060 }, { "epoch": 76.83256616800921, "grad_norm": 1.1407039165496826, "learning_rate": 0.0004633486766398159, "loss": 0.3529, "step": 267070 }, { "epoch": 76.83544303797468, "grad_norm": 1.34230375289917, "learning_rate": 0.00046329113924050634, "loss": 0.3298, "step": 267080 }, { "epoch": 76.83831990794016, "grad_norm": 0.6455672979354858, "learning_rate": 0.0004632336018411968, "loss": 0.379, "step": 267090 }, { "epoch": 76.84119677790564, "grad_norm": 1.4657272100448608, "learning_rate": 0.00046317606444188725, "loss": 0.3813, "step": 267100 }, { "epoch": 76.84407364787111, "grad_norm": 0.9755170345306396, "learning_rate": 0.00046311852704257765, "loss": 0.3851, "step": 267110 }, { "epoch": 76.84695051783659, "grad_norm": 0.8660244345664978, "learning_rate": 0.00046306098964326816, "loss": 0.33, "step": 267120 }, { "epoch": 76.84982738780207, "grad_norm": 0.8363919854164124, "learning_rate": 0.00046300345224395856, "loss": 0.317, "step": 267130 }, { "epoch": 76.85270425776756, "grad_norm": 2.5434458255767822, "learning_rate": 0.000462945914844649, "loss": 0.417, "step": 267140 }, { "epoch": 76.85558112773303, "grad_norm": 0.8720129728317261, "learning_rate": 0.0004628883774453395, "loss": 0.2846, "step": 267150 }, { "epoch": 76.85845799769851, "grad_norm": 1.5064043998718262, "learning_rate": 0.00046283084004602993, "loss": 0.358, "step": 267160 }, { "epoch": 76.86133486766398, "grad_norm": 2.1955955028533936, "learning_rate": 0.00046277330264672033, "loss": 0.4302, "step": 267170 }, { "epoch": 76.86421173762946, "grad_norm": 1.1043001413345337, "learning_rate": 0.00046271576524741084, "loss": 0.452, "step": 267180 }, { "epoch": 76.86708860759494, "grad_norm": 1.0899734497070312, "learning_rate": 0.0004626582278481013, "loss": 0.3613, "step": 267190 }, { "epoch": 76.86996547756041, "grad_norm": 1.063683271408081, "learning_rate": 0.0004626006904487917, "loss": 0.409, "step": 267200 }, { "epoch": 76.87284234752589, "grad_norm": 1.330635666847229, "learning_rate": 0.0004625431530494822, "loss": 0.3442, "step": 267210 }, { "epoch": 76.87571921749137, "grad_norm": 1.735706090927124, "learning_rate": 0.0004624856156501726, "loss": 0.4076, "step": 267220 }, { "epoch": 76.87859608745684, "grad_norm": 1.1384536027908325, "learning_rate": 0.00046242807825086306, "loss": 0.3909, "step": 267230 }, { "epoch": 76.88147295742232, "grad_norm": 1.4982717037200928, "learning_rate": 0.0004623705408515535, "loss": 0.3127, "step": 267240 }, { "epoch": 76.88434982738781, "grad_norm": 0.9172805547714233, "learning_rate": 0.000462313003452244, "loss": 0.3297, "step": 267250 }, { "epoch": 76.88722669735328, "grad_norm": 1.4052894115447998, "learning_rate": 0.0004622554660529344, "loss": 0.398, "step": 267260 }, { "epoch": 76.89010356731876, "grad_norm": 0.9192183017730713, "learning_rate": 0.0004621979286536249, "loss": 0.3505, "step": 267270 }, { "epoch": 76.89298043728424, "grad_norm": 1.3049346208572388, "learning_rate": 0.00046214039125431534, "loss": 0.4303, "step": 267280 }, { "epoch": 76.89585730724971, "grad_norm": 1.0767923593521118, "learning_rate": 0.00046208285385500574, "loss": 0.3408, "step": 267290 }, { "epoch": 76.89873417721519, "grad_norm": 1.161942481994629, "learning_rate": 0.00046202531645569625, "loss": 0.3927, "step": 267300 }, { "epoch": 76.90161104718067, "grad_norm": 2.394228935241699, "learning_rate": 0.00046196777905638665, "loss": 0.4166, "step": 267310 }, { "epoch": 76.90448791714614, "grad_norm": 1.6772271394729614, "learning_rate": 0.0004619102416570771, "loss": 0.4177, "step": 267320 }, { "epoch": 76.90736478711162, "grad_norm": 0.9031077027320862, "learning_rate": 0.00046185270425776756, "loss": 0.4367, "step": 267330 }, { "epoch": 76.9102416570771, "grad_norm": 0.8273257613182068, "learning_rate": 0.000461795166858458, "loss": 0.3942, "step": 267340 }, { "epoch": 76.91311852704258, "grad_norm": 1.8313552141189575, "learning_rate": 0.0004617376294591484, "loss": 0.413, "step": 267350 }, { "epoch": 76.91599539700806, "grad_norm": 0.8683626651763916, "learning_rate": 0.00046168009205983893, "loss": 0.3171, "step": 267360 }, { "epoch": 76.91887226697354, "grad_norm": 1.6069222688674927, "learning_rate": 0.0004616225546605294, "loss": 0.4036, "step": 267370 }, { "epoch": 76.92174913693901, "grad_norm": 1.924109935760498, "learning_rate": 0.0004615650172612198, "loss": 0.4559, "step": 267380 }, { "epoch": 76.92462600690449, "grad_norm": 1.393306016921997, "learning_rate": 0.0004615074798619103, "loss": 0.4783, "step": 267390 }, { "epoch": 76.92750287686997, "grad_norm": 2.0829193592071533, "learning_rate": 0.0004614499424626007, "loss": 0.3877, "step": 267400 }, { "epoch": 76.93037974683544, "grad_norm": 1.6095422506332397, "learning_rate": 0.00046139240506329115, "loss": 0.3544, "step": 267410 }, { "epoch": 76.93325661680092, "grad_norm": 1.416520118713379, "learning_rate": 0.0004613348676639816, "loss": 0.4245, "step": 267420 }, { "epoch": 76.9361334867664, "grad_norm": 1.7670940160751343, "learning_rate": 0.00046127733026467206, "loss": 0.4267, "step": 267430 }, { "epoch": 76.93901035673187, "grad_norm": 1.7815239429473877, "learning_rate": 0.00046121979286536247, "loss": 0.4732, "step": 267440 }, { "epoch": 76.94188722669735, "grad_norm": 1.3978843688964844, "learning_rate": 0.000461162255466053, "loss": 0.3986, "step": 267450 }, { "epoch": 76.94476409666284, "grad_norm": 1.09800124168396, "learning_rate": 0.0004611047180667434, "loss": 0.3146, "step": 267460 }, { "epoch": 76.94764096662831, "grad_norm": 0.5574591755867004, "learning_rate": 0.00046104718066743383, "loss": 0.3359, "step": 267470 }, { "epoch": 76.95051783659379, "grad_norm": 1.053383469581604, "learning_rate": 0.0004609896432681243, "loss": 0.2948, "step": 267480 }, { "epoch": 76.95339470655927, "grad_norm": 1.8000394105911255, "learning_rate": 0.00046093210586881474, "loss": 0.5002, "step": 267490 }, { "epoch": 76.95627157652474, "grad_norm": 1.5116761922836304, "learning_rate": 0.0004608745684695052, "loss": 0.3682, "step": 267500 }, { "epoch": 76.95914844649022, "grad_norm": 1.658297061920166, "learning_rate": 0.0004608170310701956, "loss": 0.3399, "step": 267510 }, { "epoch": 76.9620253164557, "grad_norm": 0.7829641699790955, "learning_rate": 0.0004607594936708861, "loss": 0.3755, "step": 267520 }, { "epoch": 76.96490218642117, "grad_norm": 1.1905895471572876, "learning_rate": 0.0004607019562715765, "loss": 0.3544, "step": 267530 }, { "epoch": 76.96777905638665, "grad_norm": 1.2637346982955933, "learning_rate": 0.00046064441887226697, "loss": 0.4119, "step": 267540 }, { "epoch": 76.97065592635212, "grad_norm": 0.8355154395103455, "learning_rate": 0.0004605868814729574, "loss": 0.2819, "step": 267550 }, { "epoch": 76.97353279631761, "grad_norm": 2.1343021392822266, "learning_rate": 0.0004605293440736479, "loss": 0.312, "step": 267560 }, { "epoch": 76.97640966628309, "grad_norm": 1.3865933418273926, "learning_rate": 0.00046047180667433833, "loss": 0.5793, "step": 267570 }, { "epoch": 76.97928653624857, "grad_norm": 1.1390414237976074, "learning_rate": 0.0004604142692750288, "loss": 0.4001, "step": 267580 }, { "epoch": 76.98216340621404, "grad_norm": 1.799842357635498, "learning_rate": 0.00046035673187571924, "loss": 0.4075, "step": 267590 }, { "epoch": 76.98504027617952, "grad_norm": 1.8460206985473633, "learning_rate": 0.00046029919447640965, "loss": 0.432, "step": 267600 }, { "epoch": 76.987917146145, "grad_norm": 0.9326684474945068, "learning_rate": 0.00046024165707710016, "loss": 0.4168, "step": 267610 }, { "epoch": 76.99079401611047, "grad_norm": 0.6504576206207275, "learning_rate": 0.00046018411967779056, "loss": 0.3281, "step": 267620 }, { "epoch": 76.99367088607595, "grad_norm": 1.149497389793396, "learning_rate": 0.000460126582278481, "loss": 0.3508, "step": 267630 }, { "epoch": 76.99654775604142, "grad_norm": 1.552027702331543, "learning_rate": 0.00046006904487917147, "loss": 0.3433, "step": 267640 }, { "epoch": 76.9994246260069, "grad_norm": 1.0641441345214844, "learning_rate": 0.0004600115074798619, "loss": 0.3339, "step": 267650 }, { "epoch": 77.00230149597238, "grad_norm": 0.6975899934768677, "learning_rate": 0.0004599539700805523, "loss": 0.3919, "step": 267660 }, { "epoch": 77.00517836593787, "grad_norm": 2.187006711959839, "learning_rate": 0.00045989643268124283, "loss": 0.3756, "step": 267670 }, { "epoch": 77.00805523590334, "grad_norm": 1.5445443391799927, "learning_rate": 0.0004598388952819333, "loss": 0.3706, "step": 267680 }, { "epoch": 77.01093210586882, "grad_norm": 0.980094850063324, "learning_rate": 0.0004597813578826237, "loss": 0.3831, "step": 267690 }, { "epoch": 77.0138089758343, "grad_norm": 1.7819674015045166, "learning_rate": 0.0004597238204833142, "loss": 0.4166, "step": 267700 }, { "epoch": 77.01668584579977, "grad_norm": 1.2534958124160767, "learning_rate": 0.0004596662830840046, "loss": 0.3043, "step": 267710 }, { "epoch": 77.01956271576525, "grad_norm": 2.0344185829162598, "learning_rate": 0.00045960874568469506, "loss": 0.3354, "step": 267720 }, { "epoch": 77.02243958573072, "grad_norm": 1.6719143390655518, "learning_rate": 0.0004595512082853855, "loss": 0.3552, "step": 267730 }, { "epoch": 77.0253164556962, "grad_norm": 0.9080528020858765, "learning_rate": 0.00045949367088607597, "loss": 0.4009, "step": 267740 }, { "epoch": 77.02819332566168, "grad_norm": 1.2696691751480103, "learning_rate": 0.00045943613348676637, "loss": 0.3873, "step": 267750 }, { "epoch": 77.03107019562715, "grad_norm": 0.9127414226531982, "learning_rate": 0.0004593785960874569, "loss": 0.2938, "step": 267760 }, { "epoch": 77.03394706559264, "grad_norm": 0.8878703117370605, "learning_rate": 0.00045932105868814733, "loss": 0.338, "step": 267770 }, { "epoch": 77.03682393555812, "grad_norm": 1.2162601947784424, "learning_rate": 0.00045926352128883774, "loss": 0.3619, "step": 267780 }, { "epoch": 77.0397008055236, "grad_norm": 1.2691880464553833, "learning_rate": 0.00045920598388952825, "loss": 0.4206, "step": 267790 }, { "epoch": 77.04257767548907, "grad_norm": 1.6854287385940552, "learning_rate": 0.00045914844649021865, "loss": 0.3758, "step": 267800 }, { "epoch": 77.04545454545455, "grad_norm": 1.186599850654602, "learning_rate": 0.0004590909090909091, "loss": 0.2978, "step": 267810 }, { "epoch": 77.04833141542002, "grad_norm": 1.0176689624786377, "learning_rate": 0.00045903337169159956, "loss": 0.3576, "step": 267820 }, { "epoch": 77.0512082853855, "grad_norm": 1.4293389320373535, "learning_rate": 0.00045897583429229, "loss": 0.466, "step": 267830 }, { "epoch": 77.05408515535098, "grad_norm": 0.8274950385093689, "learning_rate": 0.0004589182968929804, "loss": 0.3894, "step": 267840 }, { "epoch": 77.05696202531645, "grad_norm": 0.6396018862724304, "learning_rate": 0.0004588607594936709, "loss": 0.3018, "step": 267850 }, { "epoch": 77.05983889528193, "grad_norm": 0.6373135447502136, "learning_rate": 0.0004588032220943614, "loss": 0.2919, "step": 267860 }, { "epoch": 77.0627157652474, "grad_norm": 0.8817236423492432, "learning_rate": 0.0004587456846950518, "loss": 0.2979, "step": 267870 }, { "epoch": 77.0655926352129, "grad_norm": 1.064834713935852, "learning_rate": 0.0004586881472957423, "loss": 0.3392, "step": 267880 }, { "epoch": 77.06846950517837, "grad_norm": 1.4162036180496216, "learning_rate": 0.0004586306098964327, "loss": 0.3698, "step": 267890 }, { "epoch": 77.07134637514385, "grad_norm": 1.3926115036010742, "learning_rate": 0.00045857307249712315, "loss": 0.3482, "step": 267900 }, { "epoch": 77.07422324510932, "grad_norm": 1.8627039194107056, "learning_rate": 0.00045851553509781355, "loss": 0.4253, "step": 267910 }, { "epoch": 77.0771001150748, "grad_norm": 0.8586222529411316, "learning_rate": 0.00045845799769850406, "loss": 0.3828, "step": 267920 }, { "epoch": 77.07997698504028, "grad_norm": 1.7651872634887695, "learning_rate": 0.00045840046029919446, "loss": 0.4288, "step": 267930 }, { "epoch": 77.08285385500575, "grad_norm": 1.0906118154525757, "learning_rate": 0.0004583429228998849, "loss": 0.3867, "step": 267940 }, { "epoch": 77.08573072497123, "grad_norm": 0.6224377155303955, "learning_rate": 0.00045828538550057537, "loss": 0.2766, "step": 267950 }, { "epoch": 77.0886075949367, "grad_norm": 1.1085814237594604, "learning_rate": 0.0004582278481012658, "loss": 0.3216, "step": 267960 }, { "epoch": 77.09148446490218, "grad_norm": 1.3205678462982178, "learning_rate": 0.0004581703107019563, "loss": 0.3896, "step": 267970 }, { "epoch": 77.09436133486767, "grad_norm": 1.5924084186553955, "learning_rate": 0.00045811277330264674, "loss": 0.3105, "step": 267980 }, { "epoch": 77.09723820483315, "grad_norm": 0.6386281251907349, "learning_rate": 0.0004580552359033372, "loss": 0.2428, "step": 267990 }, { "epoch": 77.10011507479862, "grad_norm": 2.3477182388305664, "learning_rate": 0.0004579976985040276, "loss": 0.3434, "step": 268000 }, { "epoch": 77.1029919447641, "grad_norm": 1.635292410850525, "learning_rate": 0.0004579401611047181, "loss": 0.3583, "step": 268010 }, { "epoch": 77.10586881472958, "grad_norm": 1.5723224878311157, "learning_rate": 0.0004578826237054085, "loss": 0.3792, "step": 268020 }, { "epoch": 77.10874568469505, "grad_norm": 1.2728804349899292, "learning_rate": 0.00045782508630609896, "loss": 0.3292, "step": 268030 }, { "epoch": 77.11162255466053, "grad_norm": 0.9033673405647278, "learning_rate": 0.0004577675489067894, "loss": 0.3111, "step": 268040 }, { "epoch": 77.114499424626, "grad_norm": 2.2378695011138916, "learning_rate": 0.00045771001150747987, "loss": 0.4336, "step": 268050 }, { "epoch": 77.11737629459148, "grad_norm": 1.4444475173950195, "learning_rate": 0.00045765247410817033, "loss": 0.358, "step": 268060 }, { "epoch": 77.12025316455696, "grad_norm": 0.8767889738082886, "learning_rate": 0.0004575949367088608, "loss": 0.2808, "step": 268070 }, { "epoch": 77.12313003452243, "grad_norm": 1.193260669708252, "learning_rate": 0.00045753739930955124, "loss": 0.2895, "step": 268080 }, { "epoch": 77.12600690448792, "grad_norm": 0.7725430130958557, "learning_rate": 0.00045747986191024164, "loss": 0.3512, "step": 268090 }, { "epoch": 77.1288837744534, "grad_norm": 1.3608076572418213, "learning_rate": 0.00045742232451093215, "loss": 0.3805, "step": 268100 }, { "epoch": 77.13176064441888, "grad_norm": 1.3526018857955933, "learning_rate": 0.00045736478711162255, "loss": 0.372, "step": 268110 }, { "epoch": 77.13463751438435, "grad_norm": 1.7071974277496338, "learning_rate": 0.000457307249712313, "loss": 0.4285, "step": 268120 }, { "epoch": 77.13751438434983, "grad_norm": 1.7119415998458862, "learning_rate": 0.00045724971231300346, "loss": 0.3994, "step": 268130 }, { "epoch": 77.1403912543153, "grad_norm": 0.7237576842308044, "learning_rate": 0.0004571921749136939, "loss": 0.2721, "step": 268140 }, { "epoch": 77.14326812428078, "grad_norm": 1.5012569427490234, "learning_rate": 0.0004571346375143843, "loss": 0.332, "step": 268150 }, { "epoch": 77.14614499424626, "grad_norm": 1.9915341138839722, "learning_rate": 0.00045707710011507483, "loss": 0.285, "step": 268160 }, { "epoch": 77.14902186421173, "grad_norm": 1.3316459655761719, "learning_rate": 0.0004570195627157653, "loss": 0.3954, "step": 268170 }, { "epoch": 77.15189873417721, "grad_norm": 0.8527346849441528, "learning_rate": 0.0004569620253164557, "loss": 0.3415, "step": 268180 }, { "epoch": 77.1547756041427, "grad_norm": 0.9132810235023499, "learning_rate": 0.0004569044879171462, "loss": 0.3089, "step": 268190 }, { "epoch": 77.15765247410818, "grad_norm": 0.6584795117378235, "learning_rate": 0.0004568469505178366, "loss": 0.3497, "step": 268200 }, { "epoch": 77.16052934407365, "grad_norm": 0.8720408082008362, "learning_rate": 0.00045678941311852705, "loss": 0.3505, "step": 268210 }, { "epoch": 77.16340621403913, "grad_norm": 0.8336724042892456, "learning_rate": 0.0004567318757192175, "loss": 0.3473, "step": 268220 }, { "epoch": 77.1662830840046, "grad_norm": 1.2045190334320068, "learning_rate": 0.00045667433831990796, "loss": 0.346, "step": 268230 }, { "epoch": 77.16915995397008, "grad_norm": 0.777766764163971, "learning_rate": 0.00045661680092059836, "loss": 0.3559, "step": 268240 }, { "epoch": 77.17203682393556, "grad_norm": 1.9645949602127075, "learning_rate": 0.0004565592635212889, "loss": 0.4184, "step": 268250 }, { "epoch": 77.17491369390103, "grad_norm": 0.9699607491493225, "learning_rate": 0.00045650172612197933, "loss": 0.3769, "step": 268260 }, { "epoch": 77.17779056386651, "grad_norm": 3.0962629318237305, "learning_rate": 0.00045644418872266973, "loss": 0.4078, "step": 268270 }, { "epoch": 77.18066743383199, "grad_norm": 0.9788714647293091, "learning_rate": 0.00045638665132336024, "loss": 0.3481, "step": 268280 }, { "epoch": 77.18354430379746, "grad_norm": 2.4470224380493164, "learning_rate": 0.00045632911392405064, "loss": 0.3718, "step": 268290 }, { "epoch": 77.18642117376295, "grad_norm": 0.7785610556602478, "learning_rate": 0.0004562715765247411, "loss": 0.3449, "step": 268300 }, { "epoch": 77.18929804372843, "grad_norm": 1.582831621170044, "learning_rate": 0.0004562140391254315, "loss": 0.4568, "step": 268310 }, { "epoch": 77.1921749136939, "grad_norm": 1.1606156826019287, "learning_rate": 0.000456156501726122, "loss": 0.3377, "step": 268320 }, { "epoch": 77.19505178365938, "grad_norm": 1.3701380491256714, "learning_rate": 0.0004560989643268124, "loss": 0.3802, "step": 268330 }, { "epoch": 77.19792865362486, "grad_norm": 0.7778556942939758, "learning_rate": 0.00045604142692750286, "loss": 0.2752, "step": 268340 }, { "epoch": 77.20080552359033, "grad_norm": 1.2972332239151, "learning_rate": 0.0004559838895281933, "loss": 0.3883, "step": 268350 }, { "epoch": 77.20368239355581, "grad_norm": 1.1623886823654175, "learning_rate": 0.0004559263521288838, "loss": 0.3852, "step": 268360 }, { "epoch": 77.20655926352129, "grad_norm": 2.3404791355133057, "learning_rate": 0.00045586881472957423, "loss": 0.3345, "step": 268370 }, { "epoch": 77.20943613348676, "grad_norm": 2.072695255279541, "learning_rate": 0.0004558112773302647, "loss": 0.4837, "step": 268380 }, { "epoch": 77.21231300345224, "grad_norm": 1.216322422027588, "learning_rate": 0.00045575373993095514, "loss": 0.3227, "step": 268390 }, { "epoch": 77.21518987341773, "grad_norm": 1.331628680229187, "learning_rate": 0.00045569620253164554, "loss": 0.3517, "step": 268400 }, { "epoch": 77.2180667433832, "grad_norm": 1.4378447532653809, "learning_rate": 0.00045563866513233605, "loss": 0.3925, "step": 268410 }, { "epoch": 77.22094361334868, "grad_norm": 1.3007121086120605, "learning_rate": 0.00045558112773302645, "loss": 0.3037, "step": 268420 }, { "epoch": 77.22382048331416, "grad_norm": 0.7655221223831177, "learning_rate": 0.0004555235903337169, "loss": 0.3174, "step": 268430 }, { "epoch": 77.22669735327963, "grad_norm": 0.751407265663147, "learning_rate": 0.00045546605293440737, "loss": 0.4013, "step": 268440 }, { "epoch": 77.22957422324511, "grad_norm": 1.8650611639022827, "learning_rate": 0.0004554085155350978, "loss": 0.4393, "step": 268450 }, { "epoch": 77.23245109321059, "grad_norm": 0.8609294295310974, "learning_rate": 0.0004553509781357883, "loss": 0.2972, "step": 268460 }, { "epoch": 77.23532796317606, "grad_norm": 1.6311006546020508, "learning_rate": 0.00045529344073647873, "loss": 0.4742, "step": 268470 }, { "epoch": 77.23820483314154, "grad_norm": 0.8536862730979919, "learning_rate": 0.0004552359033371692, "loss": 0.299, "step": 268480 }, { "epoch": 77.24108170310701, "grad_norm": 2.3484890460968018, "learning_rate": 0.0004551783659378596, "loss": 0.3524, "step": 268490 }, { "epoch": 77.24395857307249, "grad_norm": 1.5673564672470093, "learning_rate": 0.0004551208285385501, "loss": 0.3352, "step": 268500 }, { "epoch": 77.24683544303798, "grad_norm": 1.9114402532577515, "learning_rate": 0.0004550632911392405, "loss": 0.327, "step": 268510 }, { "epoch": 77.24971231300346, "grad_norm": 1.3859490156173706, "learning_rate": 0.00045500575373993096, "loss": 0.3439, "step": 268520 }, { "epoch": 77.25258918296893, "grad_norm": 1.7142702341079712, "learning_rate": 0.0004549482163406214, "loss": 0.3802, "step": 268530 }, { "epoch": 77.25546605293441, "grad_norm": 0.8823937177658081, "learning_rate": 0.00045489067894131187, "loss": 0.3541, "step": 268540 }, { "epoch": 77.25834292289989, "grad_norm": 0.9093953371047974, "learning_rate": 0.0004548331415420023, "loss": 0.2762, "step": 268550 }, { "epoch": 77.26121979286536, "grad_norm": 1.0593594312667847, "learning_rate": 0.0004547756041426928, "loss": 0.3423, "step": 268560 }, { "epoch": 77.26409666283084, "grad_norm": 1.2849318981170654, "learning_rate": 0.00045471806674338323, "loss": 0.3149, "step": 268570 }, { "epoch": 77.26697353279631, "grad_norm": 0.8467769622802734, "learning_rate": 0.00045466052934407363, "loss": 0.382, "step": 268580 }, { "epoch": 77.26985040276179, "grad_norm": 0.7749841213226318, "learning_rate": 0.00045460299194476414, "loss": 0.3156, "step": 268590 }, { "epoch": 77.27272727272727, "grad_norm": 0.7851475477218628, "learning_rate": 0.00045454545454545455, "loss": 0.3764, "step": 268600 }, { "epoch": 77.27560414269276, "grad_norm": 0.7922670245170593, "learning_rate": 0.000454487917146145, "loss": 0.2755, "step": 268610 }, { "epoch": 77.27848101265823, "grad_norm": 1.744728684425354, "learning_rate": 0.00045443037974683546, "loss": 0.337, "step": 268620 }, { "epoch": 77.28135788262371, "grad_norm": 1.690451741218567, "learning_rate": 0.0004543728423475259, "loss": 0.334, "step": 268630 }, { "epoch": 77.28423475258919, "grad_norm": 1.4243896007537842, "learning_rate": 0.0004543153049482163, "loss": 0.4545, "step": 268640 }, { "epoch": 77.28711162255466, "grad_norm": 1.3533298969268799, "learning_rate": 0.0004542577675489068, "loss": 0.3712, "step": 268650 }, { "epoch": 77.28998849252014, "grad_norm": 0.8416758179664612, "learning_rate": 0.0004542002301495973, "loss": 0.3508, "step": 268660 }, { "epoch": 77.29286536248561, "grad_norm": 1.4406598806381226, "learning_rate": 0.0004541426927502877, "loss": 0.3119, "step": 268670 }, { "epoch": 77.29574223245109, "grad_norm": 1.5118861198425293, "learning_rate": 0.0004540851553509782, "loss": 0.3927, "step": 268680 }, { "epoch": 77.29861910241657, "grad_norm": 1.2511229515075684, "learning_rate": 0.0004540276179516686, "loss": 0.434, "step": 268690 }, { "epoch": 77.30149597238204, "grad_norm": 0.7696435451507568, "learning_rate": 0.00045397008055235905, "loss": 0.2887, "step": 268700 }, { "epoch": 77.30437284234753, "grad_norm": 1.9386584758758545, "learning_rate": 0.00045391254315304945, "loss": 0.4111, "step": 268710 }, { "epoch": 77.30724971231301, "grad_norm": 0.935123085975647, "learning_rate": 0.00045385500575373996, "loss": 0.315, "step": 268720 }, { "epoch": 77.31012658227849, "grad_norm": 0.8469741344451904, "learning_rate": 0.00045379746835443036, "loss": 0.3323, "step": 268730 }, { "epoch": 77.31300345224396, "grad_norm": 1.192694902420044, "learning_rate": 0.0004537399309551208, "loss": 0.3717, "step": 268740 }, { "epoch": 77.31588032220944, "grad_norm": 1.4527050256729126, "learning_rate": 0.0004536823935558113, "loss": 0.3373, "step": 268750 }, { "epoch": 77.31875719217491, "grad_norm": 0.7350881099700928, "learning_rate": 0.0004536248561565017, "loss": 0.3814, "step": 268760 }, { "epoch": 77.32163406214039, "grad_norm": 1.21504545211792, "learning_rate": 0.0004535673187571922, "loss": 0.327, "step": 268770 }, { "epoch": 77.32451093210587, "grad_norm": 1.5914537906646729, "learning_rate": 0.00045350978135788264, "loss": 0.4544, "step": 268780 }, { "epoch": 77.32738780207134, "grad_norm": 0.7954394221305847, "learning_rate": 0.0004534522439585731, "loss": 0.2744, "step": 268790 }, { "epoch": 77.33026467203682, "grad_norm": 0.9915119409561157, "learning_rate": 0.0004533947065592635, "loss": 0.3785, "step": 268800 }, { "epoch": 77.3331415420023, "grad_norm": 1.111627459526062, "learning_rate": 0.000453337169159954, "loss": 0.3437, "step": 268810 }, { "epoch": 77.33601841196779, "grad_norm": 0.6583558917045593, "learning_rate": 0.0004532796317606444, "loss": 0.3932, "step": 268820 }, { "epoch": 77.33889528193326, "grad_norm": 0.8341022729873657, "learning_rate": 0.00045322209436133486, "loss": 0.3528, "step": 268830 }, { "epoch": 77.34177215189874, "grad_norm": 1.1106525659561157, "learning_rate": 0.0004531645569620253, "loss": 0.4299, "step": 268840 }, { "epoch": 77.34464902186421, "grad_norm": 1.6768940687179565, "learning_rate": 0.00045310701956271577, "loss": 0.4397, "step": 268850 }, { "epoch": 77.34752589182969, "grad_norm": 0.9131972789764404, "learning_rate": 0.0004530494821634062, "loss": 0.3258, "step": 268860 }, { "epoch": 77.35040276179517, "grad_norm": 1.455578327178955, "learning_rate": 0.0004529919447640967, "loss": 0.297, "step": 268870 }, { "epoch": 77.35327963176064, "grad_norm": 2.0590384006500244, "learning_rate": 0.00045293440736478714, "loss": 0.3656, "step": 268880 }, { "epoch": 77.35615650172612, "grad_norm": 2.194471597671509, "learning_rate": 0.00045287686996547754, "loss": 0.3244, "step": 268890 }, { "epoch": 77.3590333716916, "grad_norm": 2.1413753032684326, "learning_rate": 0.00045281933256616805, "loss": 0.4228, "step": 268900 }, { "epoch": 77.36191024165707, "grad_norm": 0.9691784381866455, "learning_rate": 0.00045276179516685845, "loss": 0.4162, "step": 268910 }, { "epoch": 77.36478711162256, "grad_norm": 1.1526854038238525, "learning_rate": 0.0004527042577675489, "loss": 0.4458, "step": 268920 }, { "epoch": 77.36766398158804, "grad_norm": 1.5206272602081299, "learning_rate": 0.00045264672036823936, "loss": 0.3996, "step": 268930 }, { "epoch": 77.37054085155351, "grad_norm": 2.1003575325012207, "learning_rate": 0.0004525891829689298, "loss": 0.4621, "step": 268940 }, { "epoch": 77.37341772151899, "grad_norm": 1.514022946357727, "learning_rate": 0.00045253164556962027, "loss": 0.3703, "step": 268950 }, { "epoch": 77.37629459148447, "grad_norm": 0.9642714858055115, "learning_rate": 0.0004524741081703107, "loss": 0.3308, "step": 268960 }, { "epoch": 77.37917146144994, "grad_norm": 1.3416601419448853, "learning_rate": 0.0004524165707710012, "loss": 0.3958, "step": 268970 }, { "epoch": 77.38204833141542, "grad_norm": 1.0342406034469604, "learning_rate": 0.0004523590333716916, "loss": 0.2784, "step": 268980 }, { "epoch": 77.3849252013809, "grad_norm": 1.06587815284729, "learning_rate": 0.0004523014959723821, "loss": 0.3814, "step": 268990 }, { "epoch": 77.38780207134637, "grad_norm": 1.5884474515914917, "learning_rate": 0.0004522439585730725, "loss": 0.4455, "step": 269000 }, { "epoch": 77.39067894131185, "grad_norm": 1.5444742441177368, "learning_rate": 0.00045218642117376295, "loss": 0.3801, "step": 269010 }, { "epoch": 77.39355581127732, "grad_norm": 0.6633637547492981, "learning_rate": 0.0004521288837744534, "loss": 0.3942, "step": 269020 }, { "epoch": 77.39643268124281, "grad_norm": 1.1856011152267456, "learning_rate": 0.00045207134637514386, "loss": 0.3933, "step": 269030 }, { "epoch": 77.39930955120829, "grad_norm": 1.2510337829589844, "learning_rate": 0.0004520138089758343, "loss": 0.3148, "step": 269040 }, { "epoch": 77.40218642117377, "grad_norm": 1.3233463764190674, "learning_rate": 0.00045195627157652477, "loss": 0.3363, "step": 269050 }, { "epoch": 77.40506329113924, "grad_norm": 1.2898963689804077, "learning_rate": 0.00045189873417721523, "loss": 0.3893, "step": 269060 }, { "epoch": 77.40794016110472, "grad_norm": 1.0686709880828857, "learning_rate": 0.00045184119677790563, "loss": 0.3639, "step": 269070 }, { "epoch": 77.4108170310702, "grad_norm": 0.9144139885902405, "learning_rate": 0.00045178365937859614, "loss": 0.3308, "step": 269080 }, { "epoch": 77.41369390103567, "grad_norm": 2.8497016429901123, "learning_rate": 0.00045172612197928654, "loss": 0.4998, "step": 269090 }, { "epoch": 77.41657077100115, "grad_norm": 1.5392446517944336, "learning_rate": 0.000451668584579977, "loss": 0.3806, "step": 269100 }, { "epoch": 77.41944764096662, "grad_norm": 0.8695231676101685, "learning_rate": 0.0004516110471806674, "loss": 0.3277, "step": 269110 }, { "epoch": 77.4223245109321, "grad_norm": 0.8090870380401611, "learning_rate": 0.0004515535097813579, "loss": 0.3743, "step": 269120 }, { "epoch": 77.42520138089759, "grad_norm": 0.7520766258239746, "learning_rate": 0.0004514959723820483, "loss": 0.2909, "step": 269130 }, { "epoch": 77.42807825086307, "grad_norm": 1.190314531326294, "learning_rate": 0.00045143843498273876, "loss": 0.3479, "step": 269140 }, { "epoch": 77.43095512082854, "grad_norm": 1.104412317276001, "learning_rate": 0.00045138089758342927, "loss": 0.3918, "step": 269150 }, { "epoch": 77.43383199079402, "grad_norm": 2.8202531337738037, "learning_rate": 0.0004513233601841197, "loss": 0.3342, "step": 269160 }, { "epoch": 77.4367088607595, "grad_norm": 0.7045385837554932, "learning_rate": 0.00045126582278481013, "loss": 0.3146, "step": 269170 }, { "epoch": 77.43958573072497, "grad_norm": 0.973673403263092, "learning_rate": 0.0004512082853855006, "loss": 0.4623, "step": 269180 }, { "epoch": 77.44246260069045, "grad_norm": 0.8125301599502563, "learning_rate": 0.00045115074798619104, "loss": 0.36, "step": 269190 }, { "epoch": 77.44533947065592, "grad_norm": 1.2844213247299194, "learning_rate": 0.00045109321058688144, "loss": 0.3755, "step": 269200 }, { "epoch": 77.4482163406214, "grad_norm": 1.2510980367660522, "learning_rate": 0.00045103567318757195, "loss": 0.4179, "step": 269210 }, { "epoch": 77.45109321058688, "grad_norm": 1.1204530000686646, "learning_rate": 0.00045097813578826235, "loss": 0.2796, "step": 269220 }, { "epoch": 77.45397008055235, "grad_norm": 0.9797215461730957, "learning_rate": 0.0004509205983889528, "loss": 0.3769, "step": 269230 }, { "epoch": 77.45684695051784, "grad_norm": 2.3839666843414307, "learning_rate": 0.0004508630609896433, "loss": 0.4118, "step": 269240 }, { "epoch": 77.45972382048332, "grad_norm": 1.4229445457458496, "learning_rate": 0.0004508055235903337, "loss": 0.3871, "step": 269250 }, { "epoch": 77.4626006904488, "grad_norm": 1.2528988122940063, "learning_rate": 0.0004507479861910242, "loss": 0.3441, "step": 269260 }, { "epoch": 77.46547756041427, "grad_norm": 1.127516508102417, "learning_rate": 0.00045069044879171463, "loss": 0.3584, "step": 269270 }, { "epoch": 77.46835443037975, "grad_norm": 0.8860548138618469, "learning_rate": 0.0004506329113924051, "loss": 0.3725, "step": 269280 }, { "epoch": 77.47123130034522, "grad_norm": 0.7654202580451965, "learning_rate": 0.0004505753739930955, "loss": 0.4194, "step": 269290 }, { "epoch": 77.4741081703107, "grad_norm": 0.9256623983383179, "learning_rate": 0.000450517836593786, "loss": 0.4237, "step": 269300 }, { "epoch": 77.47698504027618, "grad_norm": 1.1285645961761475, "learning_rate": 0.0004504602991944764, "loss": 0.3201, "step": 269310 }, { "epoch": 77.47986191024165, "grad_norm": 0.6556971669197083, "learning_rate": 0.00045040276179516685, "loss": 0.4597, "step": 269320 }, { "epoch": 77.48273878020713, "grad_norm": 1.3489611148834229, "learning_rate": 0.0004503452243958573, "loss": 0.3448, "step": 269330 }, { "epoch": 77.48561565017262, "grad_norm": 1.2925140857696533, "learning_rate": 0.00045028768699654776, "loss": 0.3222, "step": 269340 }, { "epoch": 77.4884925201381, "grad_norm": 1.430834174156189, "learning_rate": 0.0004502301495972382, "loss": 0.4202, "step": 269350 }, { "epoch": 77.49136939010357, "grad_norm": 1.1755367517471313, "learning_rate": 0.0004501726121979287, "loss": 0.3751, "step": 269360 }, { "epoch": 77.49424626006905, "grad_norm": 1.1365264654159546, "learning_rate": 0.00045011507479861913, "loss": 0.4177, "step": 269370 }, { "epoch": 77.49712313003452, "grad_norm": 1.0193507671356201, "learning_rate": 0.00045005753739930953, "loss": 0.3431, "step": 269380 }, { "epoch": 77.5, "grad_norm": 1.9782788753509521, "learning_rate": 0.00045000000000000004, "loss": 0.3488, "step": 269390 }, { "epoch": 77.50287686996548, "grad_norm": 1.261063575744629, "learning_rate": 0.00044994246260069044, "loss": 0.3841, "step": 269400 }, { "epoch": 77.50575373993095, "grad_norm": 0.90828537940979, "learning_rate": 0.0004498849252013809, "loss": 0.2869, "step": 269410 }, { "epoch": 77.50863060989643, "grad_norm": 2.2453062534332275, "learning_rate": 0.00044982738780207135, "loss": 0.3706, "step": 269420 }, { "epoch": 77.5115074798619, "grad_norm": 0.8201050162315369, "learning_rate": 0.0004497698504027618, "loss": 0.3276, "step": 269430 }, { "epoch": 77.51438434982738, "grad_norm": 1.7133370637893677, "learning_rate": 0.00044971231300345227, "loss": 0.473, "step": 269440 }, { "epoch": 77.51726121979287, "grad_norm": 1.0561827421188354, "learning_rate": 0.0004496547756041427, "loss": 0.3033, "step": 269450 }, { "epoch": 77.52013808975835, "grad_norm": 1.0840027332305908, "learning_rate": 0.0004495972382048332, "loss": 0.3172, "step": 269460 }, { "epoch": 77.52301495972382, "grad_norm": 0.9267520904541016, "learning_rate": 0.0004495397008055236, "loss": 0.3873, "step": 269470 }, { "epoch": 77.5258918296893, "grad_norm": 1.6198557615280151, "learning_rate": 0.0004494821634062141, "loss": 0.5103, "step": 269480 }, { "epoch": 77.52876869965478, "grad_norm": 1.1038486957550049, "learning_rate": 0.0004494246260069045, "loss": 0.4964, "step": 269490 }, { "epoch": 77.53164556962025, "grad_norm": 0.7096448540687561, "learning_rate": 0.00044936708860759494, "loss": 0.3289, "step": 269500 }, { "epoch": 77.53452243958573, "grad_norm": 1.1827492713928223, "learning_rate": 0.00044930955120828535, "loss": 0.3439, "step": 269510 }, { "epoch": 77.5373993095512, "grad_norm": 1.0068204402923584, "learning_rate": 0.00044925201380897586, "loss": 0.3774, "step": 269520 }, { "epoch": 77.54027617951668, "grad_norm": 1.5184587240219116, "learning_rate": 0.0004491944764096663, "loss": 0.4284, "step": 269530 }, { "epoch": 77.54315304948216, "grad_norm": 0.941944420337677, "learning_rate": 0.0004491369390103567, "loss": 0.3694, "step": 269540 }, { "epoch": 77.54602991944765, "grad_norm": 2.096120595932007, "learning_rate": 0.0004490794016110472, "loss": 0.3926, "step": 269550 }, { "epoch": 77.54890678941312, "grad_norm": 1.2892919778823853, "learning_rate": 0.0004490218642117376, "loss": 0.3817, "step": 269560 }, { "epoch": 77.5517836593786, "grad_norm": 0.7292340397834778, "learning_rate": 0.0004489643268124281, "loss": 0.2939, "step": 269570 }, { "epoch": 77.55466052934408, "grad_norm": 0.8076913356781006, "learning_rate": 0.00044890678941311853, "loss": 0.3307, "step": 269580 }, { "epoch": 77.55753739930955, "grad_norm": 0.6640044450759888, "learning_rate": 0.000448849252013809, "loss": 0.3677, "step": 269590 }, { "epoch": 77.56041426927503, "grad_norm": 0.6631372570991516, "learning_rate": 0.0004487917146144994, "loss": 0.3903, "step": 269600 }, { "epoch": 77.5632911392405, "grad_norm": 1.1243261098861694, "learning_rate": 0.0004487341772151899, "loss": 0.3956, "step": 269610 }, { "epoch": 77.56616800920598, "grad_norm": 1.6956803798675537, "learning_rate": 0.0004486766398158803, "loss": 0.3871, "step": 269620 }, { "epoch": 77.56904487917146, "grad_norm": 1.446943998336792, "learning_rate": 0.00044861910241657076, "loss": 0.3965, "step": 269630 }, { "epoch": 77.57192174913693, "grad_norm": 1.4160500764846802, "learning_rate": 0.00044856156501726127, "loss": 0.394, "step": 269640 }, { "epoch": 77.57479861910241, "grad_norm": 0.8603317737579346, "learning_rate": 0.00044850402761795167, "loss": 0.3822, "step": 269650 }, { "epoch": 77.5776754890679, "grad_norm": 0.8940510153770447, "learning_rate": 0.0004484464902186421, "loss": 0.3482, "step": 269660 }, { "epoch": 77.58055235903338, "grad_norm": 1.5647120475769043, "learning_rate": 0.0004483889528193326, "loss": 0.3298, "step": 269670 }, { "epoch": 77.58342922899885, "grad_norm": 0.9999746680259705, "learning_rate": 0.00044833141542002303, "loss": 0.3665, "step": 269680 }, { "epoch": 77.58630609896433, "grad_norm": 1.378495454788208, "learning_rate": 0.00044827387802071344, "loss": 0.4214, "step": 269690 }, { "epoch": 77.5891829689298, "grad_norm": 1.766325831413269, "learning_rate": 0.00044821634062140395, "loss": 0.425, "step": 269700 }, { "epoch": 77.59205983889528, "grad_norm": 1.522929072380066, "learning_rate": 0.00044815880322209435, "loss": 0.4495, "step": 269710 }, { "epoch": 77.59493670886076, "grad_norm": 1.0755014419555664, "learning_rate": 0.0004481012658227848, "loss": 0.3687, "step": 269720 }, { "epoch": 77.59781357882623, "grad_norm": 1.9884891510009766, "learning_rate": 0.0004480437284234753, "loss": 0.3717, "step": 269730 }, { "epoch": 77.60069044879171, "grad_norm": 1.0518065690994263, "learning_rate": 0.0004479861910241657, "loss": 0.3669, "step": 269740 }, { "epoch": 77.60356731875719, "grad_norm": 1.5475475788116455, "learning_rate": 0.00044792865362485617, "loss": 0.3291, "step": 269750 }, { "epoch": 77.60644418872268, "grad_norm": 1.7674245834350586, "learning_rate": 0.0004478711162255466, "loss": 0.3859, "step": 269760 }, { "epoch": 77.60932105868815, "grad_norm": 1.51060950756073, "learning_rate": 0.0004478135788262371, "loss": 0.2649, "step": 269770 }, { "epoch": 77.61219792865363, "grad_norm": 1.7909519672393799, "learning_rate": 0.0004477560414269275, "loss": 0.3422, "step": 269780 }, { "epoch": 77.6150747986191, "grad_norm": 1.0084089040756226, "learning_rate": 0.000447698504027618, "loss": 0.3182, "step": 269790 }, { "epoch": 77.61795166858458, "grad_norm": 0.8744723200798035, "learning_rate": 0.0004476409666283084, "loss": 0.3933, "step": 269800 }, { "epoch": 77.62082853855006, "grad_norm": 1.632767915725708, "learning_rate": 0.00044758342922899885, "loss": 0.3265, "step": 269810 }, { "epoch": 77.62370540851553, "grad_norm": 1.8227472305297852, "learning_rate": 0.0004475258918296893, "loss": 0.4078, "step": 269820 }, { "epoch": 77.62658227848101, "grad_norm": 1.3015944957733154, "learning_rate": 0.00044746835443037976, "loss": 0.3623, "step": 269830 }, { "epoch": 77.62945914844649, "grad_norm": 0.5804606676101685, "learning_rate": 0.0004474108170310702, "loss": 0.3109, "step": 269840 }, { "epoch": 77.63233601841196, "grad_norm": 0.8312950134277344, "learning_rate": 0.00044735327963176067, "loss": 0.3426, "step": 269850 }, { "epoch": 77.63521288837744, "grad_norm": 1.269172191619873, "learning_rate": 0.0004472957422324511, "loss": 0.3841, "step": 269860 }, { "epoch": 77.63808975834293, "grad_norm": 1.0063685178756714, "learning_rate": 0.0004472382048331415, "loss": 0.4275, "step": 269870 }, { "epoch": 77.6409666283084, "grad_norm": 0.9089357256889343, "learning_rate": 0.00044718066743383204, "loss": 0.2935, "step": 269880 }, { "epoch": 77.64384349827388, "grad_norm": 1.403949499130249, "learning_rate": 0.00044712313003452244, "loss": 0.3589, "step": 269890 }, { "epoch": 77.64672036823936, "grad_norm": 1.394142508506775, "learning_rate": 0.0004470655926352129, "loss": 0.4287, "step": 269900 }, { "epoch": 77.64959723820483, "grad_norm": 1.771026611328125, "learning_rate": 0.0004470080552359033, "loss": 0.3584, "step": 269910 }, { "epoch": 77.65247410817031, "grad_norm": 0.6932595372200012, "learning_rate": 0.0004469505178365938, "loss": 0.3628, "step": 269920 }, { "epoch": 77.65535097813579, "grad_norm": 0.9099950194358826, "learning_rate": 0.00044689298043728426, "loss": 0.3346, "step": 269930 }, { "epoch": 77.65822784810126, "grad_norm": 1.3390007019042969, "learning_rate": 0.00044683544303797466, "loss": 0.3749, "step": 269940 }, { "epoch": 77.66110471806674, "grad_norm": 1.0151904821395874, "learning_rate": 0.00044677790563866517, "loss": 0.3996, "step": 269950 }, { "epoch": 77.66398158803221, "grad_norm": 0.9497650265693665, "learning_rate": 0.00044672036823935557, "loss": 0.4287, "step": 269960 }, { "epoch": 77.6668584579977, "grad_norm": 1.0602707862854004, "learning_rate": 0.00044666283084004603, "loss": 0.3995, "step": 269970 }, { "epoch": 77.66973532796318, "grad_norm": 1.0509932041168213, "learning_rate": 0.0004466052934407365, "loss": 0.4052, "step": 269980 }, { "epoch": 77.67261219792866, "grad_norm": 0.8586157560348511, "learning_rate": 0.00044654775604142694, "loss": 0.3551, "step": 269990 }, { "epoch": 77.67548906789413, "grad_norm": 1.460530400276184, "learning_rate": 0.00044649021864211734, "loss": 0.4061, "step": 270000 }, { "epoch": 77.67836593785961, "grad_norm": 0.7354366779327393, "learning_rate": 0.00044643268124280785, "loss": 0.349, "step": 270010 }, { "epoch": 77.68124280782509, "grad_norm": 0.8963316679000854, "learning_rate": 0.0004463751438434983, "loss": 0.308, "step": 270020 }, { "epoch": 77.68411967779056, "grad_norm": 0.9685860872268677, "learning_rate": 0.0004463176064441887, "loss": 0.338, "step": 270030 }, { "epoch": 77.68699654775604, "grad_norm": 1.2054784297943115, "learning_rate": 0.0004462600690448792, "loss": 0.3283, "step": 270040 }, { "epoch": 77.68987341772151, "grad_norm": 1.0121147632598877, "learning_rate": 0.0004462025316455696, "loss": 0.4421, "step": 270050 }, { "epoch": 77.69275028768699, "grad_norm": 0.8013333678245544, "learning_rate": 0.00044614499424626007, "loss": 0.3631, "step": 270060 }, { "epoch": 77.69562715765247, "grad_norm": 1.183577060699463, "learning_rate": 0.00044608745684695053, "loss": 0.4162, "step": 270070 }, { "epoch": 77.69850402761796, "grad_norm": 2.634446382522583, "learning_rate": 0.000446029919447641, "loss": 0.4862, "step": 270080 }, { "epoch": 77.70138089758343, "grad_norm": 1.292755365371704, "learning_rate": 0.0004459723820483314, "loss": 0.3789, "step": 270090 }, { "epoch": 77.70425776754891, "grad_norm": 1.9775744676589966, "learning_rate": 0.0004459148446490219, "loss": 0.4308, "step": 270100 }, { "epoch": 77.70713463751439, "grad_norm": 0.9651328325271606, "learning_rate": 0.0004458573072497123, "loss": 0.4038, "step": 270110 }, { "epoch": 77.71001150747986, "grad_norm": 1.7054681777954102, "learning_rate": 0.00044579976985040275, "loss": 0.4018, "step": 270120 }, { "epoch": 77.71288837744534, "grad_norm": 1.4452298879623413, "learning_rate": 0.00044574223245109326, "loss": 0.3811, "step": 270130 }, { "epoch": 77.71576524741081, "grad_norm": 1.500937819480896, "learning_rate": 0.00044568469505178366, "loss": 0.467, "step": 270140 }, { "epoch": 77.71864211737629, "grad_norm": 1.0054569244384766, "learning_rate": 0.0004456271576524741, "loss": 0.3345, "step": 270150 }, { "epoch": 77.72151898734177, "grad_norm": 0.8862149119377136, "learning_rate": 0.0004455696202531646, "loss": 0.3053, "step": 270160 }, { "epoch": 77.72439585730724, "grad_norm": 0.8373035192489624, "learning_rate": 0.00044551208285385503, "loss": 0.3883, "step": 270170 }, { "epoch": 77.72727272727273, "grad_norm": 1.3881075382232666, "learning_rate": 0.00044545454545454543, "loss": 0.3927, "step": 270180 }, { "epoch": 77.73014959723821, "grad_norm": 1.2725565433502197, "learning_rate": 0.00044539700805523594, "loss": 0.3588, "step": 270190 }, { "epoch": 77.73302646720369, "grad_norm": 1.8356757164001465, "learning_rate": 0.00044533947065592634, "loss": 0.3223, "step": 270200 }, { "epoch": 77.73590333716916, "grad_norm": 1.0678246021270752, "learning_rate": 0.0004452819332566168, "loss": 0.4002, "step": 270210 }, { "epoch": 77.73878020713464, "grad_norm": 1.0655555725097656, "learning_rate": 0.0004452243958573073, "loss": 0.3069, "step": 270220 }, { "epoch": 77.74165707710011, "grad_norm": 0.8423113226890564, "learning_rate": 0.0004451668584579977, "loss": 0.343, "step": 270230 }, { "epoch": 77.74453394706559, "grad_norm": 2.557827949523926, "learning_rate": 0.00044510932105868816, "loss": 0.3789, "step": 270240 }, { "epoch": 77.74741081703107, "grad_norm": 2.3563506603240967, "learning_rate": 0.0004450517836593786, "loss": 0.3961, "step": 270250 }, { "epoch": 77.75028768699654, "grad_norm": 1.2013907432556152, "learning_rate": 0.0004449942462600691, "loss": 0.3363, "step": 270260 }, { "epoch": 77.75316455696202, "grad_norm": 1.2454814910888672, "learning_rate": 0.0004449367088607595, "loss": 0.3312, "step": 270270 }, { "epoch": 77.75604142692751, "grad_norm": 1.7959306240081787, "learning_rate": 0.00044487917146145, "loss": 0.2751, "step": 270280 }, { "epoch": 77.75891829689299, "grad_norm": 1.1759002208709717, "learning_rate": 0.0004448216340621404, "loss": 0.4509, "step": 270290 }, { "epoch": 77.76179516685846, "grad_norm": 0.7030718922615051, "learning_rate": 0.00044476409666283084, "loss": 0.4187, "step": 270300 }, { "epoch": 77.76467203682394, "grad_norm": 1.0819350481033325, "learning_rate": 0.0004447065592635213, "loss": 0.4641, "step": 270310 }, { "epoch": 77.76754890678941, "grad_norm": 0.9048421382904053, "learning_rate": 0.00044464902186421175, "loss": 0.4, "step": 270320 }, { "epoch": 77.77042577675489, "grad_norm": 1.8405275344848633, "learning_rate": 0.0004445914844649022, "loss": 0.3854, "step": 270330 }, { "epoch": 77.77330264672037, "grad_norm": 1.671373963356018, "learning_rate": 0.0004445339470655926, "loss": 0.3442, "step": 270340 }, { "epoch": 77.77617951668584, "grad_norm": 1.1127017736434937, "learning_rate": 0.0004444764096662831, "loss": 0.4191, "step": 270350 }, { "epoch": 77.77905638665132, "grad_norm": 0.8874428272247314, "learning_rate": 0.0004444188722669735, "loss": 0.3818, "step": 270360 }, { "epoch": 77.7819332566168, "grad_norm": 2.0954976081848145, "learning_rate": 0.000444361334867664, "loss": 0.3965, "step": 270370 }, { "epoch": 77.78481012658227, "grad_norm": 1.0443532466888428, "learning_rate": 0.00044430379746835443, "loss": 0.3487, "step": 270380 }, { "epoch": 77.78768699654776, "grad_norm": 0.9126856923103333, "learning_rate": 0.0004442462600690449, "loss": 0.5434, "step": 270390 }, { "epoch": 77.79056386651324, "grad_norm": 0.7649624943733215, "learning_rate": 0.0004441887226697353, "loss": 0.4574, "step": 270400 }, { "epoch": 77.79344073647871, "grad_norm": 1.1679593324661255, "learning_rate": 0.0004441311852704258, "loss": 0.3242, "step": 270410 }, { "epoch": 77.79631760644419, "grad_norm": 2.2705490589141846, "learning_rate": 0.00044407364787111625, "loss": 0.4388, "step": 270420 }, { "epoch": 77.79919447640967, "grad_norm": 1.598044991493225, "learning_rate": 0.00044401611047180666, "loss": 0.3786, "step": 270430 }, { "epoch": 77.80207134637514, "grad_norm": 1.4236257076263428, "learning_rate": 0.00044395857307249716, "loss": 0.398, "step": 270440 }, { "epoch": 77.80494821634062, "grad_norm": 0.8291116952896118, "learning_rate": 0.00044390103567318757, "loss": 0.2953, "step": 270450 }, { "epoch": 77.8078250863061, "grad_norm": 0.8691619634628296, "learning_rate": 0.000443843498273878, "loss": 0.3414, "step": 270460 }, { "epoch": 77.81070195627157, "grad_norm": 0.8059034943580627, "learning_rate": 0.0004437859608745685, "loss": 0.3026, "step": 270470 }, { "epoch": 77.81357882623705, "grad_norm": 1.617094874382019, "learning_rate": 0.00044372842347525893, "loss": 0.346, "step": 270480 }, { "epoch": 77.81645569620254, "grad_norm": 1.6260406970977783, "learning_rate": 0.00044367088607594933, "loss": 0.3934, "step": 270490 }, { "epoch": 77.81933256616801, "grad_norm": 1.1731914281845093, "learning_rate": 0.00044361334867663984, "loss": 0.3573, "step": 270500 }, { "epoch": 77.82220943613349, "grad_norm": 1.3565822839736938, "learning_rate": 0.0004435558112773303, "loss": 0.3491, "step": 270510 }, { "epoch": 77.82508630609897, "grad_norm": 1.210554838180542, "learning_rate": 0.0004434982738780207, "loss": 0.4498, "step": 270520 }, { "epoch": 77.82796317606444, "grad_norm": 1.843704104423523, "learning_rate": 0.0004434407364787112, "loss": 0.3782, "step": 270530 }, { "epoch": 77.83084004602992, "grad_norm": 4.566045761108398, "learning_rate": 0.0004433831990794016, "loss": 0.3277, "step": 270540 }, { "epoch": 77.8337169159954, "grad_norm": 1.291786789894104, "learning_rate": 0.00044332566168009207, "loss": 0.3306, "step": 270550 }, { "epoch": 77.83659378596087, "grad_norm": 1.5893325805664062, "learning_rate": 0.0004432681242807825, "loss": 0.3938, "step": 270560 }, { "epoch": 77.83947065592635, "grad_norm": 1.233446717262268, "learning_rate": 0.000443210586881473, "loss": 0.3436, "step": 270570 }, { "epoch": 77.84234752589182, "grad_norm": 1.090261459350586, "learning_rate": 0.0004431530494821634, "loss": 0.32, "step": 270580 }, { "epoch": 77.8452243958573, "grad_norm": 1.3802783489227295, "learning_rate": 0.0004430955120828539, "loss": 0.3837, "step": 270590 }, { "epoch": 77.84810126582279, "grad_norm": 1.2369165420532227, "learning_rate": 0.0004430379746835443, "loss": 0.3919, "step": 270600 }, { "epoch": 77.85097813578827, "grad_norm": 0.9917145371437073, "learning_rate": 0.00044298043728423475, "loss": 0.3162, "step": 270610 }, { "epoch": 77.85385500575374, "grad_norm": 1.8802661895751953, "learning_rate": 0.00044292289988492526, "loss": 0.38, "step": 270620 }, { "epoch": 77.85673187571922, "grad_norm": 0.8609215021133423, "learning_rate": 0.00044286536248561566, "loss": 0.3686, "step": 270630 }, { "epoch": 77.8596087456847, "grad_norm": 1.0764259099960327, "learning_rate": 0.0004428078250863061, "loss": 0.3113, "step": 270640 }, { "epoch": 77.86248561565017, "grad_norm": 1.025447130203247, "learning_rate": 0.00044275028768699657, "loss": 0.4106, "step": 270650 }, { "epoch": 77.86536248561565, "grad_norm": 1.2557876110076904, "learning_rate": 0.000442692750287687, "loss": 0.3901, "step": 270660 }, { "epoch": 77.86823935558112, "grad_norm": 1.48412024974823, "learning_rate": 0.0004426352128883774, "loss": 0.3351, "step": 270670 }, { "epoch": 77.8711162255466, "grad_norm": 2.0514495372772217, "learning_rate": 0.00044257767548906793, "loss": 0.3795, "step": 270680 }, { "epoch": 77.87399309551208, "grad_norm": 1.2649222612380981, "learning_rate": 0.00044252013808975834, "loss": 0.3601, "step": 270690 }, { "epoch": 77.87686996547757, "grad_norm": 0.9743576645851135, "learning_rate": 0.0004424626006904488, "loss": 0.353, "step": 270700 }, { "epoch": 77.87974683544304, "grad_norm": 0.7593143582344055, "learning_rate": 0.0004424050632911393, "loss": 0.3918, "step": 270710 }, { "epoch": 77.88262370540852, "grad_norm": 1.2004454135894775, "learning_rate": 0.0004423475258918297, "loss": 0.4505, "step": 270720 }, { "epoch": 77.885500575374, "grad_norm": 1.1287585496902466, "learning_rate": 0.00044228998849252016, "loss": 0.372, "step": 270730 }, { "epoch": 77.88837744533947, "grad_norm": 1.3615046739578247, "learning_rate": 0.00044223245109321056, "loss": 0.3471, "step": 270740 }, { "epoch": 77.89125431530495, "grad_norm": 0.6658865809440613, "learning_rate": 0.00044217491369390107, "loss": 0.3655, "step": 270750 }, { "epoch": 77.89413118527042, "grad_norm": 1.0792711973190308, "learning_rate": 0.00044211737629459147, "loss": 0.5388, "step": 270760 }, { "epoch": 77.8970080552359, "grad_norm": 1.8600763082504272, "learning_rate": 0.0004420598388952819, "loss": 0.3865, "step": 270770 }, { "epoch": 77.89988492520138, "grad_norm": 0.8313114643096924, "learning_rate": 0.0004420023014959724, "loss": 0.3414, "step": 270780 }, { "epoch": 77.90276179516685, "grad_norm": 0.6957510709762573, "learning_rate": 0.00044194476409666284, "loss": 0.4911, "step": 270790 }, { "epoch": 77.90563866513233, "grad_norm": 1.6897743940353394, "learning_rate": 0.00044188722669735324, "loss": 0.3939, "step": 270800 }, { "epoch": 77.90851553509782, "grad_norm": 0.9434884786605835, "learning_rate": 0.00044182968929804375, "loss": 0.3776, "step": 270810 }, { "epoch": 77.9113924050633, "grad_norm": 0.84786456823349, "learning_rate": 0.0004417721518987342, "loss": 0.3491, "step": 270820 }, { "epoch": 77.91426927502877, "grad_norm": 0.9334434270858765, "learning_rate": 0.0004417146144994246, "loss": 0.42, "step": 270830 }, { "epoch": 77.91714614499425, "grad_norm": 2.26646089553833, "learning_rate": 0.0004416570771001151, "loss": 0.4844, "step": 270840 }, { "epoch": 77.92002301495972, "grad_norm": 1.1075996160507202, "learning_rate": 0.0004415995397008055, "loss": 0.4321, "step": 270850 }, { "epoch": 77.9228998849252, "grad_norm": 1.0526268482208252, "learning_rate": 0.00044154200230149597, "loss": 0.3855, "step": 270860 }, { "epoch": 77.92577675489068, "grad_norm": 1.1291406154632568, "learning_rate": 0.0004414844649021864, "loss": 0.4651, "step": 270870 }, { "epoch": 77.92865362485615, "grad_norm": 1.7498505115509033, "learning_rate": 0.0004414269275028769, "loss": 0.3446, "step": 270880 }, { "epoch": 77.93153049482163, "grad_norm": 1.6960886716842651, "learning_rate": 0.0004413693901035673, "loss": 0.3585, "step": 270890 }, { "epoch": 77.9344073647871, "grad_norm": 1.2446837425231934, "learning_rate": 0.0004413118527042578, "loss": 0.4226, "step": 270900 }, { "epoch": 77.9372842347526, "grad_norm": 1.7042655944824219, "learning_rate": 0.00044125431530494825, "loss": 0.3455, "step": 270910 }, { "epoch": 77.94016110471807, "grad_norm": 1.5672807693481445, "learning_rate": 0.00044119677790563865, "loss": 0.4257, "step": 270920 }, { "epoch": 77.94303797468355, "grad_norm": 1.9813756942749023, "learning_rate": 0.00044113924050632916, "loss": 0.3905, "step": 270930 }, { "epoch": 77.94591484464902, "grad_norm": 0.7249549627304077, "learning_rate": 0.00044108170310701956, "loss": 0.4808, "step": 270940 }, { "epoch": 77.9487917146145, "grad_norm": 1.339167594909668, "learning_rate": 0.00044102416570771, "loss": 0.4107, "step": 270950 }, { "epoch": 77.95166858457998, "grad_norm": 1.1886171102523804, "learning_rate": 0.00044096662830840047, "loss": 0.348, "step": 270960 }, { "epoch": 77.95454545454545, "grad_norm": 2.2510993480682373, "learning_rate": 0.00044090909090909093, "loss": 0.3663, "step": 270970 }, { "epoch": 77.95742232451093, "grad_norm": 1.0855584144592285, "learning_rate": 0.00044085155350978133, "loss": 0.3592, "step": 270980 }, { "epoch": 77.9602991944764, "grad_norm": 1.3673936128616333, "learning_rate": 0.00044079401611047184, "loss": 0.3834, "step": 270990 }, { "epoch": 77.96317606444188, "grad_norm": 1.6111176013946533, "learning_rate": 0.0004407364787111623, "loss": 0.4179, "step": 271000 }, { "epoch": 77.96605293440736, "grad_norm": 1.3765802383422852, "learning_rate": 0.0004406789413118527, "loss": 0.5123, "step": 271010 }, { "epoch": 77.96892980437285, "grad_norm": 1.2847418785095215, "learning_rate": 0.0004406214039125432, "loss": 0.3671, "step": 271020 }, { "epoch": 77.97180667433832, "grad_norm": 0.8062944412231445, "learning_rate": 0.0004405638665132336, "loss": 0.3256, "step": 271030 }, { "epoch": 77.9746835443038, "grad_norm": 1.9611997604370117, "learning_rate": 0.00044050632911392406, "loss": 0.5096, "step": 271040 }, { "epoch": 77.97756041426928, "grad_norm": 1.688478946685791, "learning_rate": 0.0004404487917146145, "loss": 0.443, "step": 271050 }, { "epoch": 77.98043728423475, "grad_norm": 1.8581026792526245, "learning_rate": 0.00044039125431530497, "loss": 0.4234, "step": 271060 }, { "epoch": 77.98331415420023, "grad_norm": 1.596798300743103, "learning_rate": 0.0004403337169159954, "loss": 0.4283, "step": 271070 }, { "epoch": 77.9861910241657, "grad_norm": 1.4420628547668457, "learning_rate": 0.0004402761795166859, "loss": 0.4441, "step": 271080 }, { "epoch": 77.98906789413118, "grad_norm": 2.1804726123809814, "learning_rate": 0.0004402186421173763, "loss": 0.4372, "step": 271090 }, { "epoch": 77.99194476409666, "grad_norm": 0.8742937445640564, "learning_rate": 0.00044016110471806674, "loss": 0.3153, "step": 271100 }, { "epoch": 77.99482163406213, "grad_norm": 1.648740530014038, "learning_rate": 0.00044010356731875725, "loss": 0.342, "step": 271110 }, { "epoch": 77.99769850402762, "grad_norm": 0.9418599009513855, "learning_rate": 0.00044004602991944765, "loss": 0.3899, "step": 271120 }, { "epoch": 78.0005753739931, "grad_norm": 1.498808741569519, "learning_rate": 0.0004399884925201381, "loss": 0.3737, "step": 271130 }, { "epoch": 78.00345224395858, "grad_norm": 1.0321272611618042, "learning_rate": 0.00043993095512082856, "loss": 0.3163, "step": 271140 }, { "epoch": 78.00632911392405, "grad_norm": 1.0497890710830688, "learning_rate": 0.000439873417721519, "loss": 0.3204, "step": 271150 }, { "epoch": 78.00920598388953, "grad_norm": 0.9813144207000732, "learning_rate": 0.0004398158803222094, "loss": 0.3441, "step": 271160 }, { "epoch": 78.012082853855, "grad_norm": 0.8032261729240417, "learning_rate": 0.0004397583429228999, "loss": 0.3683, "step": 271170 }, { "epoch": 78.01495972382048, "grad_norm": 0.8314188122749329, "learning_rate": 0.00043970080552359033, "loss": 0.346, "step": 271180 }, { "epoch": 78.01783659378596, "grad_norm": 1.7039721012115479, "learning_rate": 0.0004396432681242808, "loss": 0.3816, "step": 271190 }, { "epoch": 78.02071346375143, "grad_norm": 0.8591794371604919, "learning_rate": 0.00043958573072497124, "loss": 0.2928, "step": 271200 }, { "epoch": 78.02359033371691, "grad_norm": 1.0710614919662476, "learning_rate": 0.0004395281933256617, "loss": 0.2917, "step": 271210 }, { "epoch": 78.02646720368239, "grad_norm": 1.026474118232727, "learning_rate": 0.00043947065592635215, "loss": 0.3563, "step": 271220 }, { "epoch": 78.02934407364788, "grad_norm": 0.8242943286895752, "learning_rate": 0.00043941311852704255, "loss": 0.3209, "step": 271230 }, { "epoch": 78.03222094361335, "grad_norm": 1.2554521560668945, "learning_rate": 0.00043935558112773306, "loss": 0.3771, "step": 271240 }, { "epoch": 78.03509781357883, "grad_norm": 0.7190502285957336, "learning_rate": 0.00043929804372842346, "loss": 0.4553, "step": 271250 }, { "epoch": 78.0379746835443, "grad_norm": 0.6917891502380371, "learning_rate": 0.0004392405063291139, "loss": 0.3541, "step": 271260 }, { "epoch": 78.04085155350978, "grad_norm": 1.256990909576416, "learning_rate": 0.0004391829689298044, "loss": 0.5095, "step": 271270 }, { "epoch": 78.04372842347526, "grad_norm": 2.163022041320801, "learning_rate": 0.00043912543153049483, "loss": 0.371, "step": 271280 }, { "epoch": 78.04660529344073, "grad_norm": 1.4535201787948608, "learning_rate": 0.00043906789413118523, "loss": 0.3502, "step": 271290 }, { "epoch": 78.04948216340621, "grad_norm": 0.925604522228241, "learning_rate": 0.00043901035673187574, "loss": 0.455, "step": 271300 }, { "epoch": 78.05235903337169, "grad_norm": 1.1903167963027954, "learning_rate": 0.0004389528193325662, "loss": 0.3293, "step": 271310 }, { "epoch": 78.05523590333716, "grad_norm": 1.5519740581512451, "learning_rate": 0.0004388952819332566, "loss": 0.3636, "step": 271320 }, { "epoch": 78.05811277330265, "grad_norm": 1.044095516204834, "learning_rate": 0.0004388377445339471, "loss": 0.3523, "step": 271330 }, { "epoch": 78.06098964326813, "grad_norm": 1.5508826971054077, "learning_rate": 0.0004387802071346375, "loss": 0.3465, "step": 271340 }, { "epoch": 78.0638665132336, "grad_norm": 0.8442961573600769, "learning_rate": 0.00043872266973532797, "loss": 0.4469, "step": 271350 }, { "epoch": 78.06674338319908, "grad_norm": 1.316894292831421, "learning_rate": 0.0004386651323360184, "loss": 0.3181, "step": 271360 }, { "epoch": 78.06962025316456, "grad_norm": 2.391111135482788, "learning_rate": 0.0004386075949367089, "loss": 0.3647, "step": 271370 }, { "epoch": 78.07249712313003, "grad_norm": 1.5522081851959229, "learning_rate": 0.0004385500575373993, "loss": 0.3618, "step": 271380 }, { "epoch": 78.07537399309551, "grad_norm": 1.247886300086975, "learning_rate": 0.0004384925201380898, "loss": 0.2791, "step": 271390 }, { "epoch": 78.07825086306099, "grad_norm": 0.9980137348175049, "learning_rate": 0.00043843498273878024, "loss": 0.3929, "step": 271400 }, { "epoch": 78.08112773302646, "grad_norm": 1.5248141288757324, "learning_rate": 0.00043837744533947064, "loss": 0.319, "step": 271410 }, { "epoch": 78.08400460299194, "grad_norm": 1.6128016710281372, "learning_rate": 0.00043831990794016115, "loss": 0.4133, "step": 271420 }, { "epoch": 78.08688147295742, "grad_norm": 0.804802656173706, "learning_rate": 0.00043826237054085156, "loss": 0.3124, "step": 271430 }, { "epoch": 78.0897583429229, "grad_norm": 1.3738734722137451, "learning_rate": 0.000438204833141542, "loss": 0.2692, "step": 271440 }, { "epoch": 78.09263521288838, "grad_norm": 1.543722152709961, "learning_rate": 0.00043814729574223247, "loss": 0.2999, "step": 271450 }, { "epoch": 78.09551208285386, "grad_norm": 1.8229897022247314, "learning_rate": 0.0004380897583429229, "loss": 0.2783, "step": 271460 }, { "epoch": 78.09838895281933, "grad_norm": 0.6369684338569641, "learning_rate": 0.0004380322209436133, "loss": 0.3404, "step": 271470 }, { "epoch": 78.10126582278481, "grad_norm": 0.8320944905281067, "learning_rate": 0.00043797468354430383, "loss": 0.3071, "step": 271480 }, { "epoch": 78.10414269275029, "grad_norm": 0.9211262464523315, "learning_rate": 0.0004379171461449943, "loss": 0.3369, "step": 271490 }, { "epoch": 78.10701956271576, "grad_norm": 1.0057679414749146, "learning_rate": 0.0004378596087456847, "loss": 0.3838, "step": 271500 }, { "epoch": 78.10989643268124, "grad_norm": 0.8198244571685791, "learning_rate": 0.0004378020713463752, "loss": 0.3102, "step": 271510 }, { "epoch": 78.11277330264672, "grad_norm": 1.1855437755584717, "learning_rate": 0.0004377445339470656, "loss": 0.3417, "step": 271520 }, { "epoch": 78.11565017261219, "grad_norm": 1.5687949657440186, "learning_rate": 0.00043768699654775606, "loss": 0.386, "step": 271530 }, { "epoch": 78.11852704257768, "grad_norm": 1.3641623258590698, "learning_rate": 0.0004376294591484465, "loss": 0.3635, "step": 271540 }, { "epoch": 78.12140391254316, "grad_norm": 1.1337201595306396, "learning_rate": 0.00043757192174913697, "loss": 0.3336, "step": 271550 }, { "epoch": 78.12428078250863, "grad_norm": 1.0039727687835693, "learning_rate": 0.00043751438434982737, "loss": 0.342, "step": 271560 }, { "epoch": 78.12715765247411, "grad_norm": 1.5562376976013184, "learning_rate": 0.0004374568469505178, "loss": 0.3535, "step": 271570 }, { "epoch": 78.13003452243959, "grad_norm": 1.1270207166671753, "learning_rate": 0.0004373993095512083, "loss": 0.3507, "step": 271580 }, { "epoch": 78.13291139240506, "grad_norm": 1.0702816247940063, "learning_rate": 0.00043734177215189873, "loss": 0.3429, "step": 271590 }, { "epoch": 78.13578826237054, "grad_norm": 1.293684959411621, "learning_rate": 0.0004372842347525892, "loss": 0.3044, "step": 271600 }, { "epoch": 78.13866513233602, "grad_norm": 3.0071704387664795, "learning_rate": 0.00043722669735327965, "loss": 0.3709, "step": 271610 }, { "epoch": 78.14154200230149, "grad_norm": 0.5799355506896973, "learning_rate": 0.0004371691599539701, "loss": 0.4121, "step": 271620 }, { "epoch": 78.14441887226697, "grad_norm": 1.2118836641311646, "learning_rate": 0.0004371116225546605, "loss": 0.4527, "step": 271630 }, { "epoch": 78.14729574223244, "grad_norm": 0.9222827553749084, "learning_rate": 0.000437054085155351, "loss": 0.3209, "step": 271640 }, { "epoch": 78.15017261219793, "grad_norm": 1.1171916723251343, "learning_rate": 0.0004369965477560414, "loss": 0.3788, "step": 271650 }, { "epoch": 78.15304948216341, "grad_norm": 1.5598058700561523, "learning_rate": 0.00043693901035673187, "loss": 0.4386, "step": 271660 }, { "epoch": 78.15592635212889, "grad_norm": 2.7348203659057617, "learning_rate": 0.0004368814729574223, "loss": 0.3621, "step": 271670 }, { "epoch": 78.15880322209436, "grad_norm": 1.2710696458816528, "learning_rate": 0.0004368239355581128, "loss": 0.3305, "step": 271680 }, { "epoch": 78.16168009205984, "grad_norm": 1.1485518217086792, "learning_rate": 0.00043676639815880324, "loss": 0.3626, "step": 271690 }, { "epoch": 78.16455696202532, "grad_norm": 2.015399217605591, "learning_rate": 0.0004367088607594937, "loss": 0.3473, "step": 271700 }, { "epoch": 78.16743383199079, "grad_norm": 1.1686890125274658, "learning_rate": 0.00043665132336018415, "loss": 0.3007, "step": 271710 }, { "epoch": 78.17031070195627, "grad_norm": 1.5695686340332031, "learning_rate": 0.00043659378596087455, "loss": 0.3123, "step": 271720 }, { "epoch": 78.17318757192174, "grad_norm": 1.2986899614334106, "learning_rate": 0.00043653624856156506, "loss": 0.3402, "step": 271730 }, { "epoch": 78.17606444188722, "grad_norm": 0.9487626552581787, "learning_rate": 0.00043647871116225546, "loss": 0.3748, "step": 271740 }, { "epoch": 78.17894131185271, "grad_norm": 1.485123872756958, "learning_rate": 0.0004364211737629459, "loss": 0.3921, "step": 271750 }, { "epoch": 78.18181818181819, "grad_norm": 1.4265010356903076, "learning_rate": 0.00043636363636363637, "loss": 0.3587, "step": 271760 }, { "epoch": 78.18469505178366, "grad_norm": 1.9176820516586304, "learning_rate": 0.0004363060989643268, "loss": 0.3988, "step": 271770 }, { "epoch": 78.18757192174914, "grad_norm": 1.1480647325515747, "learning_rate": 0.0004362485615650172, "loss": 0.3309, "step": 271780 }, { "epoch": 78.19044879171462, "grad_norm": 2.6243550777435303, "learning_rate": 0.00043619102416570774, "loss": 0.3174, "step": 271790 }, { "epoch": 78.19332566168009, "grad_norm": 0.8985723853111267, "learning_rate": 0.0004361334867663982, "loss": 0.3665, "step": 271800 }, { "epoch": 78.19620253164557, "grad_norm": 0.8164011836051941, "learning_rate": 0.0004360759493670886, "loss": 0.4364, "step": 271810 }, { "epoch": 78.19907940161104, "grad_norm": 0.7668337225914001, "learning_rate": 0.0004360184119677791, "loss": 0.3757, "step": 271820 }, { "epoch": 78.20195627157652, "grad_norm": 2.6698105335235596, "learning_rate": 0.0004359608745684695, "loss": 0.4657, "step": 271830 }, { "epoch": 78.204833141542, "grad_norm": 2.778162956237793, "learning_rate": 0.00043590333716915996, "loss": 0.3245, "step": 271840 }, { "epoch": 78.20771001150747, "grad_norm": 0.9373986124992371, "learning_rate": 0.0004358457997698504, "loss": 0.3586, "step": 271850 }, { "epoch": 78.21058688147296, "grad_norm": 0.9771344661712646, "learning_rate": 0.00043578826237054087, "loss": 0.3163, "step": 271860 }, { "epoch": 78.21346375143844, "grad_norm": 2.384258985519409, "learning_rate": 0.00043573072497123127, "loss": 0.3729, "step": 271870 }, { "epoch": 78.21634062140392, "grad_norm": 1.7386884689331055, "learning_rate": 0.0004356731875719218, "loss": 0.3759, "step": 271880 }, { "epoch": 78.21921749136939, "grad_norm": 1.9796382188796997, "learning_rate": 0.00043561565017261224, "loss": 0.4552, "step": 271890 }, { "epoch": 78.22209436133487, "grad_norm": 2.035640001296997, "learning_rate": 0.00043555811277330264, "loss": 0.3275, "step": 271900 }, { "epoch": 78.22497123130034, "grad_norm": 1.4925830364227295, "learning_rate": 0.00043550057537399315, "loss": 0.3484, "step": 271910 }, { "epoch": 78.22784810126582, "grad_norm": 0.675643265247345, "learning_rate": 0.00043544303797468355, "loss": 0.3086, "step": 271920 }, { "epoch": 78.2307249712313, "grad_norm": 1.0191967487335205, "learning_rate": 0.000435385500575374, "loss": 0.3856, "step": 271930 }, { "epoch": 78.23360184119677, "grad_norm": 0.6901578903198242, "learning_rate": 0.00043532796317606446, "loss": 0.4451, "step": 271940 }, { "epoch": 78.23647871116225, "grad_norm": 0.641909658908844, "learning_rate": 0.0004352704257767549, "loss": 0.3052, "step": 271950 }, { "epoch": 78.23935558112774, "grad_norm": 1.1879717111587524, "learning_rate": 0.0004352128883774453, "loss": 0.3449, "step": 271960 }, { "epoch": 78.24223245109322, "grad_norm": 1.205676555633545, "learning_rate": 0.0004351553509781358, "loss": 0.3088, "step": 271970 }, { "epoch": 78.24510932105869, "grad_norm": 0.9897533655166626, "learning_rate": 0.0004350978135788263, "loss": 0.3122, "step": 271980 }, { "epoch": 78.24798619102417, "grad_norm": 3.211094856262207, "learning_rate": 0.0004350402761795167, "loss": 0.3697, "step": 271990 }, { "epoch": 78.25086306098964, "grad_norm": 1.5556294918060303, "learning_rate": 0.00043498273878020714, "loss": 0.2852, "step": 272000 }, { "epoch": 78.25373993095512, "grad_norm": 0.982615053653717, "learning_rate": 0.0004349252013808976, "loss": 0.3299, "step": 272010 }, { "epoch": 78.2566168009206, "grad_norm": 1.6001319885253906, "learning_rate": 0.00043486766398158805, "loss": 0.2951, "step": 272020 }, { "epoch": 78.25949367088607, "grad_norm": 1.6422123908996582, "learning_rate": 0.00043481012658227845, "loss": 0.3128, "step": 272030 }, { "epoch": 78.26237054085155, "grad_norm": 1.2782909870147705, "learning_rate": 0.00043475258918296896, "loss": 0.4224, "step": 272040 }, { "epoch": 78.26524741081703, "grad_norm": 1.0662392377853394, "learning_rate": 0.00043469505178365936, "loss": 0.3246, "step": 272050 }, { "epoch": 78.2681242807825, "grad_norm": 1.4664127826690674, "learning_rate": 0.0004346375143843498, "loss": 0.3631, "step": 272060 }, { "epoch": 78.27100115074799, "grad_norm": 0.6934856176376343, "learning_rate": 0.0004345799769850403, "loss": 0.3657, "step": 272070 }, { "epoch": 78.27387802071347, "grad_norm": 1.5513118505477905, "learning_rate": 0.00043452243958573073, "loss": 0.3953, "step": 272080 }, { "epoch": 78.27675489067894, "grad_norm": 1.2994917631149292, "learning_rate": 0.0004344649021864212, "loss": 0.338, "step": 272090 }, { "epoch": 78.27963176064442, "grad_norm": 2.3373827934265137, "learning_rate": 0.00043440736478711164, "loss": 0.3936, "step": 272100 }, { "epoch": 78.2825086306099, "grad_norm": 1.361397385597229, "learning_rate": 0.0004343498273878021, "loss": 0.3444, "step": 272110 }, { "epoch": 78.28538550057537, "grad_norm": 1.8234974145889282, "learning_rate": 0.0004342922899884925, "loss": 0.3963, "step": 272120 }, { "epoch": 78.28826237054085, "grad_norm": 1.2648285627365112, "learning_rate": 0.000434234752589183, "loss": 0.4025, "step": 272130 }, { "epoch": 78.29113924050633, "grad_norm": 1.6405612230300903, "learning_rate": 0.0004341772151898734, "loss": 0.395, "step": 272140 }, { "epoch": 78.2940161104718, "grad_norm": 0.7802708148956299, "learning_rate": 0.00043411967779056386, "loss": 0.3531, "step": 272150 }, { "epoch": 78.29689298043728, "grad_norm": 2.7015984058380127, "learning_rate": 0.0004340621403912543, "loss": 0.3514, "step": 272160 }, { "epoch": 78.29976985040277, "grad_norm": 1.219441294670105, "learning_rate": 0.0004340046029919448, "loss": 0.3594, "step": 272170 }, { "epoch": 78.30264672036824, "grad_norm": 1.6615843772888184, "learning_rate": 0.00043394706559263523, "loss": 0.291, "step": 272180 }, { "epoch": 78.30552359033372, "grad_norm": 1.1700682640075684, "learning_rate": 0.0004338895281933257, "loss": 0.3832, "step": 272190 }, { "epoch": 78.3084004602992, "grad_norm": 1.7700296640396118, "learning_rate": 0.00043383199079401614, "loss": 0.3276, "step": 272200 }, { "epoch": 78.31127733026467, "grad_norm": 1.3649516105651855, "learning_rate": 0.00043377445339470654, "loss": 0.3723, "step": 272210 }, { "epoch": 78.31415420023015, "grad_norm": 0.8895731568336487, "learning_rate": 0.00043371691599539705, "loss": 0.2741, "step": 272220 }, { "epoch": 78.31703107019563, "grad_norm": 1.0003461837768555, "learning_rate": 0.00043365937859608745, "loss": 0.3185, "step": 272230 }, { "epoch": 78.3199079401611, "grad_norm": 0.7551162838935852, "learning_rate": 0.0004336018411967779, "loss": 0.4313, "step": 272240 }, { "epoch": 78.32278481012658, "grad_norm": 0.7828691005706787, "learning_rate": 0.00043354430379746836, "loss": 0.3233, "step": 272250 }, { "epoch": 78.32566168009205, "grad_norm": 1.1695268154144287, "learning_rate": 0.0004334867663981588, "loss": 0.3816, "step": 272260 }, { "epoch": 78.32853855005754, "grad_norm": 1.058588981628418, "learning_rate": 0.0004334292289988492, "loss": 0.3799, "step": 272270 }, { "epoch": 78.33141542002302, "grad_norm": 0.7339637279510498, "learning_rate": 0.00043337169159953973, "loss": 0.2781, "step": 272280 }, { "epoch": 78.3342922899885, "grad_norm": 0.7116574048995972, "learning_rate": 0.0004333141542002302, "loss": 0.3035, "step": 272290 }, { "epoch": 78.33716915995397, "grad_norm": 1.489172101020813, "learning_rate": 0.0004332566168009206, "loss": 0.3652, "step": 272300 }, { "epoch": 78.34004602991945, "grad_norm": 1.0072052478790283, "learning_rate": 0.0004331990794016111, "loss": 0.3477, "step": 272310 }, { "epoch": 78.34292289988493, "grad_norm": 0.7687283158302307, "learning_rate": 0.0004331415420023015, "loss": 0.3793, "step": 272320 }, { "epoch": 78.3457997698504, "grad_norm": 1.1183234453201294, "learning_rate": 0.00043308400460299195, "loss": 0.3512, "step": 272330 }, { "epoch": 78.34867663981588, "grad_norm": 1.4270083904266357, "learning_rate": 0.0004330264672036824, "loss": 0.3991, "step": 272340 }, { "epoch": 78.35155350978135, "grad_norm": 1.4011015892028809, "learning_rate": 0.00043296892980437286, "loss": 0.2751, "step": 272350 }, { "epoch": 78.35443037974683, "grad_norm": 0.9108185172080994, "learning_rate": 0.00043291139240506327, "loss": 0.3519, "step": 272360 }, { "epoch": 78.3573072497123, "grad_norm": 0.8484057188034058, "learning_rate": 0.0004328538550057538, "loss": 0.348, "step": 272370 }, { "epoch": 78.3601841196778, "grad_norm": 1.1773004531860352, "learning_rate": 0.00043279631760644423, "loss": 0.3971, "step": 272380 }, { "epoch": 78.36306098964327, "grad_norm": 1.4090262651443481, "learning_rate": 0.00043273878020713463, "loss": 0.3565, "step": 272390 }, { "epoch": 78.36593785960875, "grad_norm": 0.7662174701690674, "learning_rate": 0.0004326812428078251, "loss": 0.3688, "step": 272400 }, { "epoch": 78.36881472957423, "grad_norm": 0.9837982058525085, "learning_rate": 0.00043262370540851554, "loss": 0.4411, "step": 272410 }, { "epoch": 78.3716915995397, "grad_norm": 1.4755237102508545, "learning_rate": 0.000432566168009206, "loss": 0.4848, "step": 272420 }, { "epoch": 78.37456846950518, "grad_norm": 3.4308364391326904, "learning_rate": 0.0004325086306098964, "loss": 0.4037, "step": 272430 }, { "epoch": 78.37744533947065, "grad_norm": 0.917280375957489, "learning_rate": 0.0004324510932105869, "loss": 0.3298, "step": 272440 }, { "epoch": 78.38032220943613, "grad_norm": 1.1872622966766357, "learning_rate": 0.0004323935558112773, "loss": 0.3112, "step": 272450 }, { "epoch": 78.3831990794016, "grad_norm": 1.3688030242919922, "learning_rate": 0.00043233601841196777, "loss": 0.3309, "step": 272460 }, { "epoch": 78.38607594936708, "grad_norm": 0.9459829330444336, "learning_rate": 0.0004322784810126583, "loss": 0.2733, "step": 272470 }, { "epoch": 78.38895281933257, "grad_norm": 1.088451862335205, "learning_rate": 0.0004322209436133487, "loss": 0.3244, "step": 272480 }, { "epoch": 78.39182968929805, "grad_norm": 0.7633011937141418, "learning_rate": 0.00043216340621403913, "loss": 0.3082, "step": 272490 }, { "epoch": 78.39470655926353, "grad_norm": 1.5390373468399048, "learning_rate": 0.0004321058688147296, "loss": 0.3401, "step": 272500 }, { "epoch": 78.397583429229, "grad_norm": 1.4132739305496216, "learning_rate": 0.00043204833141542004, "loss": 0.3326, "step": 272510 }, { "epoch": 78.40046029919448, "grad_norm": 1.3995943069458008, "learning_rate": 0.00043199079401611045, "loss": 0.3833, "step": 272520 }, { "epoch": 78.40333716915995, "grad_norm": 1.6081664562225342, "learning_rate": 0.00043193325661680096, "loss": 0.2955, "step": 272530 }, { "epoch": 78.40621403912543, "grad_norm": 1.405929684638977, "learning_rate": 0.00043187571921749136, "loss": 0.4015, "step": 272540 }, { "epoch": 78.4090909090909, "grad_norm": 1.0195921659469604, "learning_rate": 0.0004318181818181818, "loss": 0.3609, "step": 272550 }, { "epoch": 78.41196777905638, "grad_norm": 1.3159478902816772, "learning_rate": 0.00043176064441887227, "loss": 0.4238, "step": 272560 }, { "epoch": 78.41484464902186, "grad_norm": 2.488940477371216, "learning_rate": 0.0004317031070195627, "loss": 0.3978, "step": 272570 }, { "epoch": 78.41772151898734, "grad_norm": 1.4650053977966309, "learning_rate": 0.0004316455696202532, "loss": 0.3455, "step": 272580 }, { "epoch": 78.42059838895283, "grad_norm": 1.1637934446334839, "learning_rate": 0.00043158803222094363, "loss": 0.3789, "step": 272590 }, { "epoch": 78.4234752589183, "grad_norm": 1.1899627447128296, "learning_rate": 0.0004315304948216341, "loss": 0.354, "step": 272600 }, { "epoch": 78.42635212888378, "grad_norm": 1.571701169013977, "learning_rate": 0.0004314729574223245, "loss": 0.398, "step": 272610 }, { "epoch": 78.42922899884925, "grad_norm": 1.0784575939178467, "learning_rate": 0.000431415420023015, "loss": 0.326, "step": 272620 }, { "epoch": 78.43210586881473, "grad_norm": 0.8835874795913696, "learning_rate": 0.0004313578826237054, "loss": 0.3657, "step": 272630 }, { "epoch": 78.4349827387802, "grad_norm": 1.1284425258636475, "learning_rate": 0.00043130034522439586, "loss": 0.3378, "step": 272640 }, { "epoch": 78.43785960874568, "grad_norm": 1.2553207874298096, "learning_rate": 0.0004312428078250863, "loss": 0.3772, "step": 272650 }, { "epoch": 78.44073647871116, "grad_norm": 1.1392426490783691, "learning_rate": 0.00043118527042577677, "loss": 0.4454, "step": 272660 }, { "epoch": 78.44361334867664, "grad_norm": 1.4505202770233154, "learning_rate": 0.0004311277330264672, "loss": 0.3208, "step": 272670 }, { "epoch": 78.44649021864211, "grad_norm": 1.4534462690353394, "learning_rate": 0.0004310701956271577, "loss": 0.3529, "step": 272680 }, { "epoch": 78.4493670886076, "grad_norm": 1.572662591934204, "learning_rate": 0.00043101265822784814, "loss": 0.3265, "step": 272690 }, { "epoch": 78.45224395857308, "grad_norm": 1.30618155002594, "learning_rate": 0.00043095512082853854, "loss": 0.3196, "step": 272700 }, { "epoch": 78.45512082853855, "grad_norm": 1.4870582818984985, "learning_rate": 0.00043089758342922905, "loss": 0.3187, "step": 272710 }, { "epoch": 78.45799769850403, "grad_norm": 1.763327717781067, "learning_rate": 0.00043084004602991945, "loss": 0.5069, "step": 272720 }, { "epoch": 78.4608745684695, "grad_norm": 1.0422061681747437, "learning_rate": 0.0004307825086306099, "loss": 0.3203, "step": 272730 }, { "epoch": 78.46375143843498, "grad_norm": 0.6517524719238281, "learning_rate": 0.00043072497123130036, "loss": 0.3429, "step": 272740 }, { "epoch": 78.46662830840046, "grad_norm": 1.2820855379104614, "learning_rate": 0.0004306674338319908, "loss": 0.3879, "step": 272750 }, { "epoch": 78.46950517836594, "grad_norm": 1.493432879447937, "learning_rate": 0.0004306098964326812, "loss": 0.2914, "step": 272760 }, { "epoch": 78.47238204833141, "grad_norm": 0.6694021224975586, "learning_rate": 0.0004305523590333717, "loss": 0.3036, "step": 272770 }, { "epoch": 78.47525891829689, "grad_norm": 1.7410051822662354, "learning_rate": 0.0004304948216340622, "loss": 0.4089, "step": 272780 }, { "epoch": 78.47813578826236, "grad_norm": 2.7374391555786133, "learning_rate": 0.0004304372842347526, "loss": 0.4222, "step": 272790 }, { "epoch": 78.48101265822785, "grad_norm": 2.66444730758667, "learning_rate": 0.0004303797468354431, "loss": 0.406, "step": 272800 }, { "epoch": 78.48388952819333, "grad_norm": 1.1671687364578247, "learning_rate": 0.0004303222094361335, "loss": 0.3133, "step": 272810 }, { "epoch": 78.4867663981588, "grad_norm": 1.6848318576812744, "learning_rate": 0.00043026467203682395, "loss": 0.4344, "step": 272820 }, { "epoch": 78.48964326812428, "grad_norm": 0.9002514481544495, "learning_rate": 0.00043020713463751435, "loss": 0.4398, "step": 272830 }, { "epoch": 78.49252013808976, "grad_norm": 0.9941955208778381, "learning_rate": 0.00043014959723820486, "loss": 0.3136, "step": 272840 }, { "epoch": 78.49539700805524, "grad_norm": 1.5336264371871948, "learning_rate": 0.00043009205983889526, "loss": 0.3398, "step": 272850 }, { "epoch": 78.49827387802071, "grad_norm": 2.489100933074951, "learning_rate": 0.0004300345224395857, "loss": 0.42, "step": 272860 }, { "epoch": 78.50115074798619, "grad_norm": 1.9566224813461304, "learning_rate": 0.0004299769850402762, "loss": 0.3478, "step": 272870 }, { "epoch": 78.50402761795166, "grad_norm": 1.3287851810455322, "learning_rate": 0.00042991944764096663, "loss": 0.3228, "step": 272880 }, { "epoch": 78.50690448791714, "grad_norm": 1.048661470413208, "learning_rate": 0.0004298619102416571, "loss": 0.3547, "step": 272890 }, { "epoch": 78.50978135788263, "grad_norm": 1.4834774732589722, "learning_rate": 0.00042980437284234754, "loss": 0.4471, "step": 272900 }, { "epoch": 78.5126582278481, "grad_norm": 1.5594992637634277, "learning_rate": 0.000429746835443038, "loss": 0.3803, "step": 272910 }, { "epoch": 78.51553509781358, "grad_norm": 1.2639656066894531, "learning_rate": 0.0004296892980437284, "loss": 0.3848, "step": 272920 }, { "epoch": 78.51841196777906, "grad_norm": 1.0061347484588623, "learning_rate": 0.0004296317606444189, "loss": 0.2754, "step": 272930 }, { "epoch": 78.52128883774454, "grad_norm": 0.9834413528442383, "learning_rate": 0.0004295742232451093, "loss": 0.3775, "step": 272940 }, { "epoch": 78.52416570771001, "grad_norm": 0.8252822160720825, "learning_rate": 0.00042951668584579976, "loss": 0.4202, "step": 272950 }, { "epoch": 78.52704257767549, "grad_norm": 1.0220582485198975, "learning_rate": 0.0004294591484464902, "loss": 0.3537, "step": 272960 }, { "epoch": 78.52991944764096, "grad_norm": 0.9296150207519531, "learning_rate": 0.00042940161104718067, "loss": 0.3548, "step": 272970 }, { "epoch": 78.53279631760644, "grad_norm": 1.3055193424224854, "learning_rate": 0.00042934407364787113, "loss": 0.2838, "step": 272980 }, { "epoch": 78.53567318757192, "grad_norm": 1.8890669345855713, "learning_rate": 0.0004292865362485616, "loss": 0.4768, "step": 272990 }, { "epoch": 78.53855005753739, "grad_norm": 1.5100250244140625, "learning_rate": 0.00042922899884925204, "loss": 0.3689, "step": 273000 }, { "epoch": 78.54142692750288, "grad_norm": 2.4798402786254883, "learning_rate": 0.00042917146144994244, "loss": 0.3772, "step": 273010 }, { "epoch": 78.54430379746836, "grad_norm": 0.9118435978889465, "learning_rate": 0.00042911392405063295, "loss": 0.4067, "step": 273020 }, { "epoch": 78.54718066743384, "grad_norm": 0.959277868270874, "learning_rate": 0.00042905638665132335, "loss": 0.4718, "step": 273030 }, { "epoch": 78.55005753739931, "grad_norm": 0.640380322933197, "learning_rate": 0.0004289988492520138, "loss": 0.3158, "step": 273040 }, { "epoch": 78.55293440736479, "grad_norm": 0.7719323635101318, "learning_rate": 0.00042894131185270426, "loss": 0.3538, "step": 273050 }, { "epoch": 78.55581127733026, "grad_norm": 0.9977236986160278, "learning_rate": 0.0004288837744533947, "loss": 0.3408, "step": 273060 }, { "epoch": 78.55868814729574, "grad_norm": 1.0942171812057495, "learning_rate": 0.0004288262370540852, "loss": 0.3478, "step": 273070 }, { "epoch": 78.56156501726122, "grad_norm": 0.8474597334861755, "learning_rate": 0.00042876869965477563, "loss": 0.3727, "step": 273080 }, { "epoch": 78.56444188722669, "grad_norm": 0.8580154180526733, "learning_rate": 0.0004287111622554661, "loss": 0.3627, "step": 273090 }, { "epoch": 78.56731875719217, "grad_norm": 1.225523829460144, "learning_rate": 0.0004286536248561565, "loss": 0.3276, "step": 273100 }, { "epoch": 78.57019562715766, "grad_norm": 0.7219806909561157, "learning_rate": 0.000428596087456847, "loss": 0.3864, "step": 273110 }, { "epoch": 78.57307249712314, "grad_norm": 0.5655247569084167, "learning_rate": 0.0004285385500575374, "loss": 0.3, "step": 273120 }, { "epoch": 78.57594936708861, "grad_norm": 1.2326385974884033, "learning_rate": 0.00042848101265822785, "loss": 0.3656, "step": 273130 }, { "epoch": 78.57882623705409, "grad_norm": 1.3681833744049072, "learning_rate": 0.0004284234752589183, "loss": 0.336, "step": 273140 }, { "epoch": 78.58170310701956, "grad_norm": 2.4203603267669678, "learning_rate": 0.00042836593785960876, "loss": 0.3323, "step": 273150 }, { "epoch": 78.58457997698504, "grad_norm": 1.138020396232605, "learning_rate": 0.0004283084004602992, "loss": 0.3892, "step": 273160 }, { "epoch": 78.58745684695052, "grad_norm": 1.2921757698059082, "learning_rate": 0.0004282508630609897, "loss": 0.4186, "step": 273170 }, { "epoch": 78.59033371691599, "grad_norm": 1.3352787494659424, "learning_rate": 0.00042819332566168013, "loss": 0.3287, "step": 273180 }, { "epoch": 78.59321058688147, "grad_norm": 0.9683669209480286, "learning_rate": 0.00042813578826237053, "loss": 0.3785, "step": 273190 }, { "epoch": 78.59608745684694, "grad_norm": 0.5416494607925415, "learning_rate": 0.00042807825086306104, "loss": 0.4457, "step": 273200 }, { "epoch": 78.59896432681242, "grad_norm": 1.7228386402130127, "learning_rate": 0.00042802071346375144, "loss": 0.2733, "step": 273210 }, { "epoch": 78.60184119677791, "grad_norm": 1.3231784105300903, "learning_rate": 0.0004279631760644419, "loss": 0.355, "step": 273220 }, { "epoch": 78.60471806674339, "grad_norm": 1.3101462125778198, "learning_rate": 0.0004279056386651323, "loss": 0.3716, "step": 273230 }, { "epoch": 78.60759493670886, "grad_norm": 1.3516075611114502, "learning_rate": 0.0004278481012658228, "loss": 0.4175, "step": 273240 }, { "epoch": 78.61047180667434, "grad_norm": 0.9796351194381714, "learning_rate": 0.0004277905638665132, "loss": 0.3752, "step": 273250 }, { "epoch": 78.61334867663982, "grad_norm": 2.062189817428589, "learning_rate": 0.00042773302646720367, "loss": 0.345, "step": 273260 }, { "epoch": 78.61622554660529, "grad_norm": 1.1064274311065674, "learning_rate": 0.0004276754890678942, "loss": 0.3004, "step": 273270 }, { "epoch": 78.61910241657077, "grad_norm": 1.12962007522583, "learning_rate": 0.0004276179516685846, "loss": 0.4126, "step": 273280 }, { "epoch": 78.62197928653625, "grad_norm": 1.565252423286438, "learning_rate": 0.00042756041426927503, "loss": 0.3984, "step": 273290 }, { "epoch": 78.62485615650172, "grad_norm": 0.6513554453849792, "learning_rate": 0.0004275028768699655, "loss": 0.352, "step": 273300 }, { "epoch": 78.6277330264672, "grad_norm": 0.8723086714744568, "learning_rate": 0.00042744533947065594, "loss": 0.3442, "step": 273310 }, { "epoch": 78.63060989643269, "grad_norm": 1.059221863746643, "learning_rate": 0.00042738780207134634, "loss": 0.3495, "step": 273320 }, { "epoch": 78.63348676639816, "grad_norm": 1.5723053216934204, "learning_rate": 0.00042733026467203685, "loss": 0.4487, "step": 273330 }, { "epoch": 78.63636363636364, "grad_norm": 1.1034979820251465, "learning_rate": 0.00042727272727272726, "loss": 0.3157, "step": 273340 }, { "epoch": 78.63924050632912, "grad_norm": 1.2480292320251465, "learning_rate": 0.0004272151898734177, "loss": 0.3967, "step": 273350 }, { "epoch": 78.64211737629459, "grad_norm": 2.7355432510375977, "learning_rate": 0.0004271576524741082, "loss": 0.3702, "step": 273360 }, { "epoch": 78.64499424626007, "grad_norm": 1.6316879987716675, "learning_rate": 0.0004271001150747986, "loss": 0.4233, "step": 273370 }, { "epoch": 78.64787111622555, "grad_norm": 0.987061619758606, "learning_rate": 0.0004270425776754891, "loss": 0.4223, "step": 273380 }, { "epoch": 78.65074798619102, "grad_norm": 1.1749334335327148, "learning_rate": 0.00042698504027617953, "loss": 0.3085, "step": 273390 }, { "epoch": 78.6536248561565, "grad_norm": 1.2168537378311157, "learning_rate": 0.00042692750287687, "loss": 0.4004, "step": 273400 }, { "epoch": 78.65650172612197, "grad_norm": 0.9848799705505371, "learning_rate": 0.0004268699654775604, "loss": 0.38, "step": 273410 }, { "epoch": 78.65937859608745, "grad_norm": 1.190990686416626, "learning_rate": 0.0004268124280782509, "loss": 0.3362, "step": 273420 }, { "epoch": 78.66225546605294, "grad_norm": 1.1619328260421753, "learning_rate": 0.0004267548906789413, "loss": 0.382, "step": 273430 }, { "epoch": 78.66513233601842, "grad_norm": 1.539530634880066, "learning_rate": 0.00042669735327963176, "loss": 0.356, "step": 273440 }, { "epoch": 78.66800920598389, "grad_norm": 0.8126785159111023, "learning_rate": 0.0004266398158803222, "loss": 0.3379, "step": 273450 }, { "epoch": 78.67088607594937, "grad_norm": 1.6982698440551758, "learning_rate": 0.00042658227848101267, "loss": 0.3294, "step": 273460 }, { "epoch": 78.67376294591485, "grad_norm": 0.680083155632019, "learning_rate": 0.0004265247410817031, "loss": 0.3715, "step": 273470 }, { "epoch": 78.67663981588032, "grad_norm": 1.605955719947815, "learning_rate": 0.0004264672036823936, "loss": 0.4769, "step": 273480 }, { "epoch": 78.6795166858458, "grad_norm": 1.6145801544189453, "learning_rate": 0.00042640966628308403, "loss": 0.3569, "step": 273490 }, { "epoch": 78.68239355581127, "grad_norm": 0.8400456309318542, "learning_rate": 0.00042635212888377443, "loss": 0.4202, "step": 273500 }, { "epoch": 78.68527042577675, "grad_norm": 0.876313328742981, "learning_rate": 0.00042629459148446494, "loss": 0.3348, "step": 273510 }, { "epoch": 78.68814729574223, "grad_norm": 1.2392247915267944, "learning_rate": 0.00042623705408515535, "loss": 0.4057, "step": 273520 }, { "epoch": 78.69102416570772, "grad_norm": 1.4386645555496216, "learning_rate": 0.0004261795166858458, "loss": 0.354, "step": 273530 }, { "epoch": 78.69390103567319, "grad_norm": 1.4417282342910767, "learning_rate": 0.00042612197928653626, "loss": 0.3857, "step": 273540 }, { "epoch": 78.69677790563867, "grad_norm": 2.1187899112701416, "learning_rate": 0.0004260644418872267, "loss": 0.3699, "step": 273550 }, { "epoch": 78.69965477560415, "grad_norm": 0.8094171285629272, "learning_rate": 0.00042600690448791717, "loss": 0.3629, "step": 273560 }, { "epoch": 78.70253164556962, "grad_norm": 1.4416571855545044, "learning_rate": 0.0004259493670886076, "loss": 0.3177, "step": 273570 }, { "epoch": 78.7054085155351, "grad_norm": 0.5169146060943604, "learning_rate": 0.0004258918296892981, "loss": 0.3882, "step": 273580 }, { "epoch": 78.70828538550057, "grad_norm": 1.0247782468795776, "learning_rate": 0.0004258342922899885, "loss": 0.3994, "step": 273590 }, { "epoch": 78.71116225546605, "grad_norm": 1.2193841934204102, "learning_rate": 0.000425776754890679, "loss": 0.289, "step": 273600 }, { "epoch": 78.71403912543153, "grad_norm": 0.9593938589096069, "learning_rate": 0.0004257192174913694, "loss": 0.3674, "step": 273610 }, { "epoch": 78.716915995397, "grad_norm": 1.7771706581115723, "learning_rate": 0.00042566168009205985, "loss": 0.3842, "step": 273620 }, { "epoch": 78.71979286536248, "grad_norm": 0.7769762873649597, "learning_rate": 0.00042560414269275025, "loss": 0.4446, "step": 273630 }, { "epoch": 78.72266973532797, "grad_norm": 1.5182139873504639, "learning_rate": 0.00042554660529344076, "loss": 0.3149, "step": 273640 }, { "epoch": 78.72554660529345, "grad_norm": 1.1087144613265991, "learning_rate": 0.0004254890678941312, "loss": 0.3393, "step": 273650 }, { "epoch": 78.72842347525892, "grad_norm": 1.380157470703125, "learning_rate": 0.0004254315304948216, "loss": 0.485, "step": 273660 }, { "epoch": 78.7313003452244, "grad_norm": 1.4103243350982666, "learning_rate": 0.0004253739930955121, "loss": 0.4435, "step": 273670 }, { "epoch": 78.73417721518987, "grad_norm": 1.310922622680664, "learning_rate": 0.0004253164556962025, "loss": 0.3941, "step": 273680 }, { "epoch": 78.73705408515535, "grad_norm": 2.1626052856445312, "learning_rate": 0.000425258918296893, "loss": 0.4081, "step": 273690 }, { "epoch": 78.73993095512083, "grad_norm": 1.2820550203323364, "learning_rate": 0.00042520138089758344, "loss": 0.4163, "step": 273700 }, { "epoch": 78.7428078250863, "grad_norm": 0.896525502204895, "learning_rate": 0.0004251438434982739, "loss": 0.3886, "step": 273710 }, { "epoch": 78.74568469505178, "grad_norm": 1.0346993207931519, "learning_rate": 0.0004250863060989643, "loss": 0.3172, "step": 273720 }, { "epoch": 78.74856156501725, "grad_norm": 1.075172781944275, "learning_rate": 0.0004250287686996548, "loss": 0.3528, "step": 273730 }, { "epoch": 78.75143843498275, "grad_norm": 1.01271653175354, "learning_rate": 0.0004249712313003452, "loss": 0.3415, "step": 273740 }, { "epoch": 78.75431530494822, "grad_norm": 1.9681205749511719, "learning_rate": 0.00042491369390103566, "loss": 0.4343, "step": 273750 }, { "epoch": 78.7571921749137, "grad_norm": 0.7187890410423279, "learning_rate": 0.00042485615650172617, "loss": 0.3147, "step": 273760 }, { "epoch": 78.76006904487917, "grad_norm": 0.8203956484794617, "learning_rate": 0.00042479861910241657, "loss": 0.424, "step": 273770 }, { "epoch": 78.76294591484465, "grad_norm": 0.9885667562484741, "learning_rate": 0.000424741081703107, "loss": 0.3439, "step": 273780 }, { "epoch": 78.76582278481013, "grad_norm": 1.2791521549224854, "learning_rate": 0.0004246835443037975, "loss": 0.2839, "step": 273790 }, { "epoch": 78.7686996547756, "grad_norm": 0.8351567387580872, "learning_rate": 0.00042462600690448794, "loss": 0.3695, "step": 273800 }, { "epoch": 78.77157652474108, "grad_norm": 1.2320291996002197, "learning_rate": 0.00042456846950517834, "loss": 0.4405, "step": 273810 }, { "epoch": 78.77445339470655, "grad_norm": 0.913111686706543, "learning_rate": 0.00042451093210586885, "loss": 0.2925, "step": 273820 }, { "epoch": 78.77733026467203, "grad_norm": 0.7131586670875549, "learning_rate": 0.00042445339470655925, "loss": 0.3427, "step": 273830 }, { "epoch": 78.78020713463752, "grad_norm": 1.4563133716583252, "learning_rate": 0.0004243958573072497, "loss": 0.3699, "step": 273840 }, { "epoch": 78.783084004603, "grad_norm": 2.217776298522949, "learning_rate": 0.0004243383199079402, "loss": 0.4515, "step": 273850 }, { "epoch": 78.78596087456847, "grad_norm": 1.3458874225616455, "learning_rate": 0.0004242807825086306, "loss": 0.3458, "step": 273860 }, { "epoch": 78.78883774453395, "grad_norm": 1.7251276969909668, "learning_rate": 0.00042422324510932107, "loss": 0.3682, "step": 273870 }, { "epoch": 78.79171461449943, "grad_norm": 1.382683515548706, "learning_rate": 0.0004241657077100115, "loss": 0.3721, "step": 273880 }, { "epoch": 78.7945914844649, "grad_norm": 0.6381828188896179, "learning_rate": 0.000424108170310702, "loss": 0.3834, "step": 273890 }, { "epoch": 78.79746835443038, "grad_norm": 1.1941951513290405, "learning_rate": 0.0004240506329113924, "loss": 0.4022, "step": 273900 }, { "epoch": 78.80034522439585, "grad_norm": 1.370225191116333, "learning_rate": 0.0004239930955120829, "loss": 0.455, "step": 273910 }, { "epoch": 78.80322209436133, "grad_norm": 1.220226764678955, "learning_rate": 0.0004239355581127733, "loss": 0.336, "step": 273920 }, { "epoch": 78.80609896432681, "grad_norm": 0.6269116401672363, "learning_rate": 0.00042387802071346375, "loss": 0.3378, "step": 273930 }, { "epoch": 78.80897583429228, "grad_norm": 0.7889525294303894, "learning_rate": 0.0004238204833141542, "loss": 0.3089, "step": 273940 }, { "epoch": 78.81185270425777, "grad_norm": 0.5973318219184875, "learning_rate": 0.00042376294591484466, "loss": 0.3142, "step": 273950 }, { "epoch": 78.81472957422325, "grad_norm": 3.2610979080200195, "learning_rate": 0.0004237054085155351, "loss": 0.359, "step": 273960 }, { "epoch": 78.81760644418873, "grad_norm": 0.9190188050270081, "learning_rate": 0.00042364787111622557, "loss": 0.2988, "step": 273970 }, { "epoch": 78.8204833141542, "grad_norm": 1.636906623840332, "learning_rate": 0.00042359033371691603, "loss": 0.3702, "step": 273980 }, { "epoch": 78.82336018411968, "grad_norm": 1.5637701749801636, "learning_rate": 0.00042353279631760643, "loss": 0.4007, "step": 273990 }, { "epoch": 78.82623705408515, "grad_norm": 1.1699975728988647, "learning_rate": 0.00042347525891829694, "loss": 0.3686, "step": 274000 }, { "epoch": 78.82911392405063, "grad_norm": 1.0692980289459229, "learning_rate": 0.00042341772151898734, "loss": 0.3426, "step": 274010 }, { "epoch": 78.83199079401611, "grad_norm": 1.6156398057937622, "learning_rate": 0.0004233601841196778, "loss": 0.3871, "step": 274020 }, { "epoch": 78.83486766398158, "grad_norm": 1.339271903038025, "learning_rate": 0.0004233026467203682, "loss": 0.3535, "step": 274030 }, { "epoch": 78.83774453394706, "grad_norm": 2.0718111991882324, "learning_rate": 0.0004232451093210587, "loss": 0.4293, "step": 274040 }, { "epoch": 78.84062140391255, "grad_norm": 1.0401183366775513, "learning_rate": 0.00042318757192174916, "loss": 0.3045, "step": 274050 }, { "epoch": 78.84349827387803, "grad_norm": 1.1975380182266235, "learning_rate": 0.00042313003452243956, "loss": 0.433, "step": 274060 }, { "epoch": 78.8463751438435, "grad_norm": 0.6900750994682312, "learning_rate": 0.0004230724971231301, "loss": 0.3199, "step": 274070 }, { "epoch": 78.84925201380898, "grad_norm": 0.9446485638618469, "learning_rate": 0.0004230149597238205, "loss": 0.3352, "step": 274080 }, { "epoch": 78.85212888377445, "grad_norm": 0.9476490020751953, "learning_rate": 0.00042295742232451093, "loss": 0.3513, "step": 274090 }, { "epoch": 78.85500575373993, "grad_norm": 1.4344576597213745, "learning_rate": 0.0004228998849252014, "loss": 0.3731, "step": 274100 }, { "epoch": 78.85788262370541, "grad_norm": 1.0409977436065674, "learning_rate": 0.00042284234752589184, "loss": 0.505, "step": 274110 }, { "epoch": 78.86075949367088, "grad_norm": 1.4856011867523193, "learning_rate": 0.00042278481012658224, "loss": 0.3858, "step": 274120 }, { "epoch": 78.86363636363636, "grad_norm": 1.3090306520462036, "learning_rate": 0.00042272727272727275, "loss": 0.3003, "step": 274130 }, { "epoch": 78.86651323360184, "grad_norm": 1.4125802516937256, "learning_rate": 0.0004226697353279632, "loss": 0.3664, "step": 274140 }, { "epoch": 78.86939010356731, "grad_norm": 1.6173737049102783, "learning_rate": 0.0004226121979286536, "loss": 0.4113, "step": 274150 }, { "epoch": 78.8722669735328, "grad_norm": 1.9334392547607422, "learning_rate": 0.0004225546605293441, "loss": 0.4195, "step": 274160 }, { "epoch": 78.87514384349828, "grad_norm": 0.8712205290794373, "learning_rate": 0.0004224971231300345, "loss": 0.3488, "step": 274170 }, { "epoch": 78.87802071346375, "grad_norm": 1.2147300243377686, "learning_rate": 0.000422439585730725, "loss": 0.3431, "step": 274180 }, { "epoch": 78.88089758342923, "grad_norm": 0.8463814854621887, "learning_rate": 0.00042238204833141543, "loss": 0.433, "step": 274190 }, { "epoch": 78.88377445339471, "grad_norm": 1.1899785995483398, "learning_rate": 0.0004223245109321059, "loss": 0.3072, "step": 274200 }, { "epoch": 78.88665132336018, "grad_norm": 0.8406887054443359, "learning_rate": 0.0004222669735327963, "loss": 0.3106, "step": 274210 }, { "epoch": 78.88952819332566, "grad_norm": 1.2801599502563477, "learning_rate": 0.0004222094361334868, "loss": 0.3298, "step": 274220 }, { "epoch": 78.89240506329114, "grad_norm": 0.6635989546775818, "learning_rate": 0.0004221518987341772, "loss": 0.3296, "step": 274230 }, { "epoch": 78.89528193325661, "grad_norm": 0.9194661378860474, "learning_rate": 0.00042209436133486765, "loss": 0.3297, "step": 274240 }, { "epoch": 78.89815880322209, "grad_norm": 0.9556301832199097, "learning_rate": 0.00042203682393555816, "loss": 0.3415, "step": 274250 }, { "epoch": 78.90103567318758, "grad_norm": 2.110602378845215, "learning_rate": 0.00042197928653624856, "loss": 0.3567, "step": 274260 }, { "epoch": 78.90391254315306, "grad_norm": 0.9696509838104248, "learning_rate": 0.000421921749136939, "loss": 0.338, "step": 274270 }, { "epoch": 78.90678941311853, "grad_norm": 1.8678642511367798, "learning_rate": 0.0004218642117376295, "loss": 0.4422, "step": 274280 }, { "epoch": 78.90966628308401, "grad_norm": 1.494357943534851, "learning_rate": 0.00042180667433831993, "loss": 0.3325, "step": 274290 }, { "epoch": 78.91254315304948, "grad_norm": 0.8732584714889526, "learning_rate": 0.00042174913693901033, "loss": 0.4453, "step": 274300 }, { "epoch": 78.91542002301496, "grad_norm": 2.0595901012420654, "learning_rate": 0.00042169159953970084, "loss": 0.3575, "step": 274310 }, { "epoch": 78.91829689298044, "grad_norm": 0.8437077403068542, "learning_rate": 0.00042163406214039124, "loss": 0.3231, "step": 274320 }, { "epoch": 78.92117376294591, "grad_norm": 1.766924262046814, "learning_rate": 0.0004215765247410817, "loss": 0.3628, "step": 274330 }, { "epoch": 78.92405063291139, "grad_norm": 0.7286140322685242, "learning_rate": 0.0004215189873417722, "loss": 0.3678, "step": 274340 }, { "epoch": 78.92692750287686, "grad_norm": 1.7702441215515137, "learning_rate": 0.0004214614499424626, "loss": 0.4227, "step": 274350 }, { "epoch": 78.92980437284234, "grad_norm": 1.4351292848587036, "learning_rate": 0.00042140391254315307, "loss": 0.4045, "step": 274360 }, { "epoch": 78.93268124280783, "grad_norm": 1.557674527168274, "learning_rate": 0.0004213463751438435, "loss": 0.423, "step": 274370 }, { "epoch": 78.93555811277331, "grad_norm": 0.9984667897224426, "learning_rate": 0.000421288837744534, "loss": 0.4195, "step": 274380 }, { "epoch": 78.93843498273878, "grad_norm": 1.3189268112182617, "learning_rate": 0.0004212313003452244, "loss": 0.4618, "step": 274390 }, { "epoch": 78.94131185270426, "grad_norm": 0.9187620878219604, "learning_rate": 0.0004211737629459149, "loss": 0.3687, "step": 274400 }, { "epoch": 78.94418872266974, "grad_norm": 1.0503370761871338, "learning_rate": 0.0004211162255466053, "loss": 0.2811, "step": 274410 }, { "epoch": 78.94706559263521, "grad_norm": 2.4803338050842285, "learning_rate": 0.00042105868814729574, "loss": 0.4263, "step": 274420 }, { "epoch": 78.94994246260069, "grad_norm": 1.5830175876617432, "learning_rate": 0.00042100115074798615, "loss": 0.414, "step": 274430 }, { "epoch": 78.95281933256616, "grad_norm": 1.1647547483444214, "learning_rate": 0.00042094361334867666, "loss": 0.3322, "step": 274440 }, { "epoch": 78.95569620253164, "grad_norm": 1.3867084980010986, "learning_rate": 0.0004208860759493671, "loss": 0.354, "step": 274450 }, { "epoch": 78.95857307249712, "grad_norm": 0.8957604169845581, "learning_rate": 0.0004208285385500575, "loss": 0.3327, "step": 274460 }, { "epoch": 78.96144994246261, "grad_norm": 0.9470364451408386, "learning_rate": 0.000420771001150748, "loss": 0.2807, "step": 274470 }, { "epoch": 78.96432681242808, "grad_norm": 2.1459362506866455, "learning_rate": 0.0004207134637514384, "loss": 0.4216, "step": 274480 }, { "epoch": 78.96720368239356, "grad_norm": 0.8607889413833618, "learning_rate": 0.0004206559263521289, "loss": 0.3075, "step": 274490 }, { "epoch": 78.97008055235904, "grad_norm": 1.6438535451889038, "learning_rate": 0.00042059838895281933, "loss": 0.3518, "step": 274500 }, { "epoch": 78.97295742232451, "grad_norm": 1.8452695608139038, "learning_rate": 0.0004205408515535098, "loss": 0.3257, "step": 274510 }, { "epoch": 78.97583429228999, "grad_norm": 1.7924245595932007, "learning_rate": 0.0004204833141542002, "loss": 0.3824, "step": 274520 }, { "epoch": 78.97871116225546, "grad_norm": 0.7613486051559448, "learning_rate": 0.0004204257767548907, "loss": 0.3498, "step": 274530 }, { "epoch": 78.98158803222094, "grad_norm": 0.7967175841331482, "learning_rate": 0.00042036823935558116, "loss": 0.4545, "step": 274540 }, { "epoch": 78.98446490218642, "grad_norm": 1.5634965896606445, "learning_rate": 0.00042031070195627156, "loss": 0.3359, "step": 274550 }, { "epoch": 78.9873417721519, "grad_norm": 1.0288469791412354, "learning_rate": 0.00042025316455696207, "loss": 0.3627, "step": 274560 }, { "epoch": 78.99021864211737, "grad_norm": 1.3779767751693726, "learning_rate": 0.00042019562715765247, "loss": 0.4312, "step": 274570 }, { "epoch": 78.99309551208286, "grad_norm": 0.7105382084846497, "learning_rate": 0.0004201380897583429, "loss": 0.3131, "step": 274580 }, { "epoch": 78.99597238204834, "grad_norm": 2.0058248043060303, "learning_rate": 0.0004200805523590334, "loss": 0.4677, "step": 274590 }, { "epoch": 78.99884925201381, "grad_norm": 1.2880659103393555, "learning_rate": 0.00042002301495972384, "loss": 0.3797, "step": 274600 }, { "epoch": 79.00172612197929, "grad_norm": 1.766660451889038, "learning_rate": 0.00041996547756041424, "loss": 0.3732, "step": 274610 }, { "epoch": 79.00460299194476, "grad_norm": 2.208200454711914, "learning_rate": 0.00041990794016110475, "loss": 0.454, "step": 274620 }, { "epoch": 79.00747986191024, "grad_norm": 0.6755769848823547, "learning_rate": 0.0004198504027617952, "loss": 0.359, "step": 274630 }, { "epoch": 79.01035673187572, "grad_norm": 1.091842770576477, "learning_rate": 0.0004197928653624856, "loss": 0.4122, "step": 274640 }, { "epoch": 79.0132336018412, "grad_norm": 0.9264041781425476, "learning_rate": 0.0004197353279631761, "loss": 0.2921, "step": 274650 }, { "epoch": 79.01611047180667, "grad_norm": 1.2611491680145264, "learning_rate": 0.0004196777905638665, "loss": 0.3138, "step": 274660 }, { "epoch": 79.01898734177215, "grad_norm": 1.159203290939331, "learning_rate": 0.00041962025316455697, "loss": 0.3165, "step": 274670 }, { "epoch": 79.02186421173764, "grad_norm": 1.13399338722229, "learning_rate": 0.0004195627157652474, "loss": 0.2716, "step": 274680 }, { "epoch": 79.02474108170311, "grad_norm": 2.0002799034118652, "learning_rate": 0.0004195051783659379, "loss": 0.3181, "step": 274690 }, { "epoch": 79.02761795166859, "grad_norm": 0.9160394668579102, "learning_rate": 0.0004194476409666283, "loss": 0.3376, "step": 274700 }, { "epoch": 79.03049482163406, "grad_norm": 0.7288184762001038, "learning_rate": 0.0004193901035673188, "loss": 0.3198, "step": 274710 }, { "epoch": 79.03337169159954, "grad_norm": 1.1921932697296143, "learning_rate": 0.0004193325661680092, "loss": 0.3374, "step": 274720 }, { "epoch": 79.03624856156502, "grad_norm": 0.7658371925354004, "learning_rate": 0.00041927502876869965, "loss": 0.2822, "step": 274730 }, { "epoch": 79.0391254315305, "grad_norm": 0.9926551580429077, "learning_rate": 0.00041921749136939016, "loss": 0.3701, "step": 274740 }, { "epoch": 79.04200230149597, "grad_norm": 1.6679414510726929, "learning_rate": 0.00041915995397008056, "loss": 0.3792, "step": 274750 }, { "epoch": 79.04487917146145, "grad_norm": 0.7379680275917053, "learning_rate": 0.000419102416570771, "loss": 0.3069, "step": 274760 }, { "epoch": 79.04775604142692, "grad_norm": 1.7039304971694946, "learning_rate": 0.00041904487917146147, "loss": 0.33, "step": 274770 }, { "epoch": 79.0506329113924, "grad_norm": 1.3033510446548462, "learning_rate": 0.0004189873417721519, "loss": 0.2822, "step": 274780 }, { "epoch": 79.05350978135789, "grad_norm": 1.0433928966522217, "learning_rate": 0.00041892980437284233, "loss": 0.3304, "step": 274790 }, { "epoch": 79.05638665132336, "grad_norm": 1.7376834154129028, "learning_rate": 0.00041887226697353284, "loss": 0.4179, "step": 274800 }, { "epoch": 79.05926352128884, "grad_norm": 1.131567358970642, "learning_rate": 0.00041881472957422324, "loss": 0.3808, "step": 274810 }, { "epoch": 79.06214039125432, "grad_norm": 1.8696372509002686, "learning_rate": 0.0004187571921749137, "loss": 0.3887, "step": 274820 }, { "epoch": 79.0650172612198, "grad_norm": 0.7906550765037537, "learning_rate": 0.0004186996547756042, "loss": 0.3675, "step": 274830 }, { "epoch": 79.06789413118527, "grad_norm": 0.8230043053627014, "learning_rate": 0.0004186421173762946, "loss": 0.329, "step": 274840 }, { "epoch": 79.07077100115075, "grad_norm": 0.9796781539916992, "learning_rate": 0.00041858457997698506, "loss": 0.3225, "step": 274850 }, { "epoch": 79.07364787111622, "grad_norm": 0.8376318216323853, "learning_rate": 0.00041852704257767546, "loss": 0.3548, "step": 274860 }, { "epoch": 79.0765247410817, "grad_norm": 1.0145645141601562, "learning_rate": 0.00041846950517836597, "loss": 0.329, "step": 274870 }, { "epoch": 79.07940161104717, "grad_norm": 1.277376413345337, "learning_rate": 0.00041841196777905637, "loss": 0.3158, "step": 274880 }, { "epoch": 79.08227848101266, "grad_norm": 1.2520991563796997, "learning_rate": 0.00041835443037974683, "loss": 0.318, "step": 274890 }, { "epoch": 79.08515535097814, "grad_norm": 0.9046816825866699, "learning_rate": 0.0004182968929804373, "loss": 0.3034, "step": 274900 }, { "epoch": 79.08803222094362, "grad_norm": 1.1809417009353638, "learning_rate": 0.00041823935558112774, "loss": 0.2837, "step": 274910 }, { "epoch": 79.0909090909091, "grad_norm": 1.5236828327178955, "learning_rate": 0.00041818181818181814, "loss": 0.363, "step": 274920 }, { "epoch": 79.09378596087457, "grad_norm": 0.5802337527275085, "learning_rate": 0.00041812428078250865, "loss": 0.3183, "step": 274930 }, { "epoch": 79.09666283084005, "grad_norm": 2.983638048171997, "learning_rate": 0.0004180667433831991, "loss": 0.4387, "step": 274940 }, { "epoch": 79.09953970080552, "grad_norm": 1.3361032009124756, "learning_rate": 0.0004180092059838895, "loss": 0.2995, "step": 274950 }, { "epoch": 79.102416570771, "grad_norm": 1.1909632682800293, "learning_rate": 0.00041795166858458, "loss": 0.3641, "step": 274960 }, { "epoch": 79.10529344073647, "grad_norm": 0.896723747253418, "learning_rate": 0.0004178941311852704, "loss": 0.3504, "step": 274970 }, { "epoch": 79.10817031070195, "grad_norm": 0.6004535555839539, "learning_rate": 0.0004178365937859609, "loss": 0.3058, "step": 274980 }, { "epoch": 79.11104718066743, "grad_norm": 1.3581502437591553, "learning_rate": 0.00041777905638665133, "loss": 0.3347, "step": 274990 }, { "epoch": 79.11392405063292, "grad_norm": 1.7927566766738892, "learning_rate": 0.0004177215189873418, "loss": 0.3274, "step": 275000 }, { "epoch": 79.1168009205984, "grad_norm": 1.9093109369277954, "learning_rate": 0.0004176639815880322, "loss": 0.3256, "step": 275010 }, { "epoch": 79.11967779056387, "grad_norm": 3.1099302768707275, "learning_rate": 0.0004176064441887227, "loss": 0.3757, "step": 275020 }, { "epoch": 79.12255466052935, "grad_norm": 1.2028120756149292, "learning_rate": 0.00041754890678941315, "loss": 0.2999, "step": 275030 }, { "epoch": 79.12543153049482, "grad_norm": 1.3432167768478394, "learning_rate": 0.00041749136939010355, "loss": 0.3402, "step": 275040 }, { "epoch": 79.1283084004603, "grad_norm": 0.8779796361923218, "learning_rate": 0.00041743383199079406, "loss": 0.4095, "step": 275050 }, { "epoch": 79.13118527042577, "grad_norm": 2.1943414211273193, "learning_rate": 0.00041737629459148446, "loss": 0.347, "step": 275060 }, { "epoch": 79.13406214039125, "grad_norm": 1.0488165616989136, "learning_rate": 0.0004173187571921749, "loss": 0.33, "step": 275070 }, { "epoch": 79.13693901035673, "grad_norm": 1.9692384004592896, "learning_rate": 0.0004172612197928654, "loss": 0.3532, "step": 275080 }, { "epoch": 79.1398158803222, "grad_norm": 1.2307665348052979, "learning_rate": 0.00041720368239355583, "loss": 0.3825, "step": 275090 }, { "epoch": 79.1426927502877, "grad_norm": 1.221169352531433, "learning_rate": 0.00041714614499424623, "loss": 0.3438, "step": 275100 }, { "epoch": 79.14556962025317, "grad_norm": 0.8727995157241821, "learning_rate": 0.00041708860759493674, "loss": 0.3322, "step": 275110 }, { "epoch": 79.14844649021865, "grad_norm": 1.7820905447006226, "learning_rate": 0.0004170310701956272, "loss": 0.3745, "step": 275120 }, { "epoch": 79.15132336018412, "grad_norm": 2.560702323913574, "learning_rate": 0.0004169735327963176, "loss": 0.3721, "step": 275130 }, { "epoch": 79.1542002301496, "grad_norm": 1.4443377256393433, "learning_rate": 0.0004169159953970081, "loss": 0.2936, "step": 275140 }, { "epoch": 79.15707710011507, "grad_norm": 2.6232128143310547, "learning_rate": 0.0004168584579976985, "loss": 0.308, "step": 275150 }, { "epoch": 79.15995397008055, "grad_norm": 1.4262871742248535, "learning_rate": 0.00041680092059838896, "loss": 0.4165, "step": 275160 }, { "epoch": 79.16283084004603, "grad_norm": 1.191253900527954, "learning_rate": 0.0004167433831990794, "loss": 0.2825, "step": 275170 }, { "epoch": 79.1657077100115, "grad_norm": 1.4681166410446167, "learning_rate": 0.0004166858457997699, "loss": 0.3389, "step": 275180 }, { "epoch": 79.16858457997698, "grad_norm": 1.0885231494903564, "learning_rate": 0.0004166283084004603, "loss": 0.3288, "step": 275190 }, { "epoch": 79.17146144994246, "grad_norm": 1.2340664863586426, "learning_rate": 0.0004165707710011508, "loss": 0.4173, "step": 275200 }, { "epoch": 79.17433831990795, "grad_norm": 1.0086705684661865, "learning_rate": 0.0004165132336018412, "loss": 0.3673, "step": 275210 }, { "epoch": 79.17721518987342, "grad_norm": 1.8757691383361816, "learning_rate": 0.00041645569620253164, "loss": 0.3829, "step": 275220 }, { "epoch": 79.1800920598389, "grad_norm": 0.659417450428009, "learning_rate": 0.00041639815880322215, "loss": 0.3137, "step": 275230 }, { "epoch": 79.18296892980437, "grad_norm": 2.623687267303467, "learning_rate": 0.00041634062140391255, "loss": 0.4309, "step": 275240 }, { "epoch": 79.18584579976985, "grad_norm": 1.0324788093566895, "learning_rate": 0.000416283084004603, "loss": 0.2893, "step": 275250 }, { "epoch": 79.18872266973533, "grad_norm": 1.3626227378845215, "learning_rate": 0.0004162255466052934, "loss": 0.3845, "step": 275260 }, { "epoch": 79.1915995397008, "grad_norm": 1.0790759325027466, "learning_rate": 0.0004161680092059839, "loss": 0.3528, "step": 275270 }, { "epoch": 79.19447640966628, "grad_norm": 1.886993408203125, "learning_rate": 0.0004161104718066743, "loss": 0.4131, "step": 275280 }, { "epoch": 79.19735327963176, "grad_norm": 1.0589145421981812, "learning_rate": 0.0004160529344073648, "loss": 0.4034, "step": 275290 }, { "epoch": 79.20023014959723, "grad_norm": 1.2162301540374756, "learning_rate": 0.00041599539700805523, "loss": 0.3452, "step": 275300 }, { "epoch": 79.20310701956272, "grad_norm": 1.2187830209732056, "learning_rate": 0.0004159378596087457, "loss": 0.4179, "step": 275310 }, { "epoch": 79.2059838895282, "grad_norm": 0.9164745211601257, "learning_rate": 0.00041588032220943614, "loss": 0.3016, "step": 275320 }, { "epoch": 79.20886075949367, "grad_norm": 1.789279580116272, "learning_rate": 0.0004158227848101266, "loss": 0.3939, "step": 275330 }, { "epoch": 79.21173762945915, "grad_norm": 0.8217934966087341, "learning_rate": 0.00041576524741081705, "loss": 0.4193, "step": 275340 }, { "epoch": 79.21461449942463, "grad_norm": 2.232841730117798, "learning_rate": 0.00041570771001150746, "loss": 0.4452, "step": 275350 }, { "epoch": 79.2174913693901, "grad_norm": 1.4088385105133057, "learning_rate": 0.00041565017261219797, "loss": 0.3736, "step": 275360 }, { "epoch": 79.22036823935558, "grad_norm": 1.1373974084854126, "learning_rate": 0.00041559263521288837, "loss": 0.3049, "step": 275370 }, { "epoch": 79.22324510932106, "grad_norm": 1.2368358373641968, "learning_rate": 0.0004155350978135788, "loss": 0.3591, "step": 275380 }, { "epoch": 79.22612197928653, "grad_norm": 0.8278021216392517, "learning_rate": 0.0004154775604142693, "loss": 0.3153, "step": 275390 }, { "epoch": 79.22899884925201, "grad_norm": 0.5677676200866699, "learning_rate": 0.00041542002301495973, "loss": 0.3774, "step": 275400 }, { "epoch": 79.23187571921748, "grad_norm": 1.2185338735580444, "learning_rate": 0.00041536248561565013, "loss": 0.3367, "step": 275410 }, { "epoch": 79.23475258918297, "grad_norm": 0.9727112054824829, "learning_rate": 0.00041530494821634064, "loss": 0.3443, "step": 275420 }, { "epoch": 79.23762945914845, "grad_norm": 2.921189069747925, "learning_rate": 0.0004152474108170311, "loss": 0.3524, "step": 275430 }, { "epoch": 79.24050632911393, "grad_norm": 1.6917893886566162, "learning_rate": 0.0004151898734177215, "loss": 0.3612, "step": 275440 }, { "epoch": 79.2433831990794, "grad_norm": 0.9820945262908936, "learning_rate": 0.000415132336018412, "loss": 0.3026, "step": 275450 }, { "epoch": 79.24626006904488, "grad_norm": 0.9856967926025391, "learning_rate": 0.0004150747986191024, "loss": 0.2922, "step": 275460 }, { "epoch": 79.24913693901036, "grad_norm": 0.9714428186416626, "learning_rate": 0.00041501726121979287, "loss": 0.2977, "step": 275470 }, { "epoch": 79.25201380897583, "grad_norm": 1.9438872337341309, "learning_rate": 0.0004149597238204833, "loss": 0.3248, "step": 275480 }, { "epoch": 79.25489067894131, "grad_norm": 1.6297318935394287, "learning_rate": 0.0004149021864211738, "loss": 0.3481, "step": 275490 }, { "epoch": 79.25776754890678, "grad_norm": 1.0187209844589233, "learning_rate": 0.0004148446490218642, "loss": 0.369, "step": 275500 }, { "epoch": 79.26064441887226, "grad_norm": 2.3894715309143066, "learning_rate": 0.0004147871116225547, "loss": 0.4178, "step": 275510 }, { "epoch": 79.26352128883775, "grad_norm": 1.0293819904327393, "learning_rate": 0.00041472957422324515, "loss": 0.3649, "step": 275520 }, { "epoch": 79.26639815880323, "grad_norm": 0.8740613460540771, "learning_rate": 0.00041467203682393555, "loss": 0.3302, "step": 275530 }, { "epoch": 79.2692750287687, "grad_norm": 0.9361311197280884, "learning_rate": 0.00041461449942462606, "loss": 0.3544, "step": 275540 }, { "epoch": 79.27215189873418, "grad_norm": 1.0989036560058594, "learning_rate": 0.00041455696202531646, "loss": 0.3413, "step": 275550 }, { "epoch": 79.27502876869966, "grad_norm": 1.1344115734100342, "learning_rate": 0.0004144994246260069, "loss": 0.3491, "step": 275560 }, { "epoch": 79.27790563866513, "grad_norm": 1.081843614578247, "learning_rate": 0.00041444188722669737, "loss": 0.2945, "step": 275570 }, { "epoch": 79.28078250863061, "grad_norm": 0.8513444066047668, "learning_rate": 0.0004143843498273878, "loss": 0.3541, "step": 275580 }, { "epoch": 79.28365937859608, "grad_norm": 1.1037780046463013, "learning_rate": 0.0004143268124280782, "loss": 0.3509, "step": 275590 }, { "epoch": 79.28653624856156, "grad_norm": 1.1494603157043457, "learning_rate": 0.00041426927502876873, "loss": 0.4796, "step": 275600 }, { "epoch": 79.28941311852704, "grad_norm": 2.1737828254699707, "learning_rate": 0.0004142117376294592, "loss": 0.2963, "step": 275610 }, { "epoch": 79.29228998849253, "grad_norm": 0.9569602608680725, "learning_rate": 0.0004141542002301496, "loss": 0.3829, "step": 275620 }, { "epoch": 79.295166858458, "grad_norm": 0.5565130114555359, "learning_rate": 0.0004140966628308401, "loss": 0.252, "step": 275630 }, { "epoch": 79.29804372842348, "grad_norm": 1.8071117401123047, "learning_rate": 0.0004140391254315305, "loss": 0.3001, "step": 275640 }, { "epoch": 79.30092059838896, "grad_norm": 1.7553128004074097, "learning_rate": 0.00041398158803222096, "loss": 0.3854, "step": 275650 }, { "epoch": 79.30379746835443, "grad_norm": 1.2318300008773804, "learning_rate": 0.00041392405063291136, "loss": 0.3683, "step": 275660 }, { "epoch": 79.30667433831991, "grad_norm": 2.0518527030944824, "learning_rate": 0.00041386651323360187, "loss": 0.3119, "step": 275670 }, { "epoch": 79.30955120828538, "grad_norm": 1.5032916069030762, "learning_rate": 0.00041380897583429227, "loss": 0.4047, "step": 275680 }, { "epoch": 79.31242807825086, "grad_norm": 1.7990341186523438, "learning_rate": 0.0004137514384349827, "loss": 0.3515, "step": 275690 }, { "epoch": 79.31530494821634, "grad_norm": 1.658658742904663, "learning_rate": 0.0004136939010356732, "loss": 0.3264, "step": 275700 }, { "epoch": 79.31818181818181, "grad_norm": 1.4348639249801636, "learning_rate": 0.00041363636363636364, "loss": 0.3269, "step": 275710 }, { "epoch": 79.32105868814729, "grad_norm": 1.155885934829712, "learning_rate": 0.0004135788262370541, "loss": 0.3888, "step": 275720 }, { "epoch": 79.32393555811278, "grad_norm": 1.3874987363815308, "learning_rate": 0.00041352128883774455, "loss": 0.3402, "step": 275730 }, { "epoch": 79.32681242807826, "grad_norm": 0.8540427088737488, "learning_rate": 0.000413463751438435, "loss": 0.3212, "step": 275740 }, { "epoch": 79.32968929804373, "grad_norm": 2.742371082305908, "learning_rate": 0.0004134062140391254, "loss": 0.4243, "step": 275750 }, { "epoch": 79.33256616800921, "grad_norm": 0.7581389546394348, "learning_rate": 0.0004133486766398159, "loss": 0.3232, "step": 275760 }, { "epoch": 79.33544303797468, "grad_norm": 0.7390720844268799, "learning_rate": 0.0004132911392405063, "loss": 0.3098, "step": 275770 }, { "epoch": 79.33831990794016, "grad_norm": 1.7243508100509644, "learning_rate": 0.00041323360184119677, "loss": 0.4163, "step": 275780 }, { "epoch": 79.34119677790564, "grad_norm": 1.3196793794631958, "learning_rate": 0.0004131760644418872, "loss": 0.4295, "step": 275790 }, { "epoch": 79.34407364787111, "grad_norm": 0.5438662171363831, "learning_rate": 0.0004131185270425777, "loss": 0.3801, "step": 275800 }, { "epoch": 79.34695051783659, "grad_norm": 1.822558879852295, "learning_rate": 0.00041306098964326814, "loss": 0.3283, "step": 275810 }, { "epoch": 79.34982738780207, "grad_norm": 1.1827291250228882, "learning_rate": 0.0004130034522439586, "loss": 0.4235, "step": 275820 }, { "epoch": 79.35270425776756, "grad_norm": 1.418570876121521, "learning_rate": 0.00041294591484464905, "loss": 0.3814, "step": 275830 }, { "epoch": 79.35558112773303, "grad_norm": 1.9288716316223145, "learning_rate": 0.00041288837744533945, "loss": 0.3102, "step": 275840 }, { "epoch": 79.35845799769851, "grad_norm": 1.4918444156646729, "learning_rate": 0.00041283084004602996, "loss": 0.2898, "step": 275850 }, { "epoch": 79.36133486766398, "grad_norm": 1.4392638206481934, "learning_rate": 0.00041277330264672036, "loss": 0.3331, "step": 275860 }, { "epoch": 79.36421173762946, "grad_norm": 1.1550102233886719, "learning_rate": 0.0004127157652474108, "loss": 0.3439, "step": 275870 }, { "epoch": 79.36708860759494, "grad_norm": 1.0516046285629272, "learning_rate": 0.00041265822784810127, "loss": 0.3372, "step": 275880 }, { "epoch": 79.36996547756041, "grad_norm": 0.9951867461204529, "learning_rate": 0.00041260069044879173, "loss": 0.2974, "step": 275890 }, { "epoch": 79.37284234752589, "grad_norm": 1.5549813508987427, "learning_rate": 0.00041254315304948213, "loss": 0.4193, "step": 275900 }, { "epoch": 79.37571921749137, "grad_norm": 1.2281560897827148, "learning_rate": 0.00041248561565017264, "loss": 0.3684, "step": 275910 }, { "epoch": 79.37859608745684, "grad_norm": 0.8505116105079651, "learning_rate": 0.0004124280782508631, "loss": 0.3254, "step": 275920 }, { "epoch": 79.38147295742232, "grad_norm": 0.6233037114143372, "learning_rate": 0.0004123705408515535, "loss": 0.3006, "step": 275930 }, { "epoch": 79.38434982738781, "grad_norm": 1.0798417329788208, "learning_rate": 0.000412313003452244, "loss": 0.3755, "step": 275940 }, { "epoch": 79.38722669735328, "grad_norm": 1.2803422212600708, "learning_rate": 0.0004122554660529344, "loss": 0.3383, "step": 275950 }, { "epoch": 79.39010356731876, "grad_norm": 2.2108585834503174, "learning_rate": 0.00041219792865362486, "loss": 0.3331, "step": 275960 }, { "epoch": 79.39298043728424, "grad_norm": 0.976855456829071, "learning_rate": 0.0004121403912543153, "loss": 0.3009, "step": 275970 }, { "epoch": 79.39585730724971, "grad_norm": 0.636001467704773, "learning_rate": 0.0004120828538550058, "loss": 0.3088, "step": 275980 }, { "epoch": 79.39873417721519, "grad_norm": 2.3471832275390625, "learning_rate": 0.0004120253164556962, "loss": 0.3707, "step": 275990 }, { "epoch": 79.40161104718067, "grad_norm": 1.401336908340454, "learning_rate": 0.0004119677790563867, "loss": 0.4353, "step": 276000 }, { "epoch": 79.40448791714614, "grad_norm": 0.9294722080230713, "learning_rate": 0.00041191024165707714, "loss": 0.3228, "step": 276010 }, { "epoch": 79.40736478711162, "grad_norm": 0.872996985912323, "learning_rate": 0.00041185270425776754, "loss": 0.2964, "step": 276020 }, { "epoch": 79.4102416570771, "grad_norm": 0.6389473676681519, "learning_rate": 0.00041179516685845805, "loss": 0.2545, "step": 276030 }, { "epoch": 79.41311852704258, "grad_norm": 1.3264188766479492, "learning_rate": 0.00041173762945914845, "loss": 0.3631, "step": 276040 }, { "epoch": 79.41599539700806, "grad_norm": 1.2487947940826416, "learning_rate": 0.0004116800920598389, "loss": 0.3252, "step": 276050 }, { "epoch": 79.41887226697354, "grad_norm": 0.9535372853279114, "learning_rate": 0.00041162255466052936, "loss": 0.3347, "step": 276060 }, { "epoch": 79.42174913693901, "grad_norm": 1.2761160135269165, "learning_rate": 0.0004115650172612198, "loss": 0.3845, "step": 276070 }, { "epoch": 79.42462600690449, "grad_norm": 1.7782020568847656, "learning_rate": 0.0004115074798619102, "loss": 0.3318, "step": 276080 }, { "epoch": 79.42750287686997, "grad_norm": 1.2856321334838867, "learning_rate": 0.0004114499424626007, "loss": 0.3676, "step": 276090 }, { "epoch": 79.43037974683544, "grad_norm": 0.8417417407035828, "learning_rate": 0.0004113924050632912, "loss": 0.3477, "step": 276100 }, { "epoch": 79.43325661680092, "grad_norm": 0.886518120765686, "learning_rate": 0.0004113348676639816, "loss": 0.3559, "step": 276110 }, { "epoch": 79.4361334867664, "grad_norm": 1.5143600702285767, "learning_rate": 0.00041127733026467204, "loss": 0.4139, "step": 276120 }, { "epoch": 79.43901035673187, "grad_norm": 1.4800869226455688, "learning_rate": 0.0004112197928653625, "loss": 0.4482, "step": 276130 }, { "epoch": 79.44188722669735, "grad_norm": 1.0429071187973022, "learning_rate": 0.00041116225546605295, "loss": 0.3024, "step": 276140 }, { "epoch": 79.44476409666284, "grad_norm": 1.039139986038208, "learning_rate": 0.00041110471806674335, "loss": 0.3737, "step": 276150 }, { "epoch": 79.44764096662831, "grad_norm": 1.7421677112579346, "learning_rate": 0.00041104718066743386, "loss": 0.4321, "step": 276160 }, { "epoch": 79.45051783659379, "grad_norm": 0.8177435398101807, "learning_rate": 0.00041098964326812426, "loss": 0.3798, "step": 276170 }, { "epoch": 79.45339470655927, "grad_norm": 1.420507788658142, "learning_rate": 0.0004109321058688147, "loss": 0.4227, "step": 276180 }, { "epoch": 79.45627157652474, "grad_norm": 1.8107831478118896, "learning_rate": 0.0004108745684695052, "loss": 0.3487, "step": 276190 }, { "epoch": 79.45914844649022, "grad_norm": 1.3088849782943726, "learning_rate": 0.00041081703107019563, "loss": 0.2816, "step": 276200 }, { "epoch": 79.4620253164557, "grad_norm": 1.2415502071380615, "learning_rate": 0.0004107594936708861, "loss": 0.3154, "step": 276210 }, { "epoch": 79.46490218642117, "grad_norm": 2.4312477111816406, "learning_rate": 0.00041070195627157654, "loss": 0.5081, "step": 276220 }, { "epoch": 79.46777905638665, "grad_norm": 0.8558283448219299, "learning_rate": 0.000410644418872267, "loss": 0.3908, "step": 276230 }, { "epoch": 79.47065592635212, "grad_norm": 2.1471688747406006, "learning_rate": 0.0004105868814729574, "loss": 0.4164, "step": 276240 }, { "epoch": 79.47353279631761, "grad_norm": 0.6864913702011108, "learning_rate": 0.0004105293440736479, "loss": 0.3721, "step": 276250 }, { "epoch": 79.47640966628309, "grad_norm": 1.0163847208023071, "learning_rate": 0.0004104718066743383, "loss": 0.366, "step": 276260 }, { "epoch": 79.47928653624857, "grad_norm": 1.013642430305481, "learning_rate": 0.00041041426927502877, "loss": 0.3729, "step": 276270 }, { "epoch": 79.48216340621404, "grad_norm": 0.9720398783683777, "learning_rate": 0.0004103567318757192, "loss": 0.3682, "step": 276280 }, { "epoch": 79.48504027617952, "grad_norm": 1.6400277614593506, "learning_rate": 0.0004102991944764097, "loss": 0.2951, "step": 276290 }, { "epoch": 79.487917146145, "grad_norm": 1.4331775903701782, "learning_rate": 0.00041024165707710013, "loss": 0.3641, "step": 276300 }, { "epoch": 79.49079401611047, "grad_norm": 1.0188097953796387, "learning_rate": 0.0004101841196777906, "loss": 0.4059, "step": 276310 }, { "epoch": 79.49367088607595, "grad_norm": 0.7523526549339294, "learning_rate": 0.00041012658227848104, "loss": 0.4042, "step": 276320 }, { "epoch": 79.49654775604142, "grad_norm": 0.634044885635376, "learning_rate": 0.00041006904487917144, "loss": 0.3904, "step": 276330 }, { "epoch": 79.4994246260069, "grad_norm": 1.167688250541687, "learning_rate": 0.00041001150747986195, "loss": 0.3191, "step": 276340 }, { "epoch": 79.50230149597238, "grad_norm": 0.9809685349464417, "learning_rate": 0.00040995397008055236, "loss": 0.3443, "step": 276350 }, { "epoch": 79.50517836593787, "grad_norm": 1.1606874465942383, "learning_rate": 0.0004098964326812428, "loss": 0.3449, "step": 276360 }, { "epoch": 79.50805523590334, "grad_norm": 1.393384575843811, "learning_rate": 0.00040983889528193327, "loss": 0.4681, "step": 276370 }, { "epoch": 79.51093210586882, "grad_norm": 1.0832030773162842, "learning_rate": 0.0004097813578826237, "loss": 0.3741, "step": 276380 }, { "epoch": 79.5138089758343, "grad_norm": 1.514054775238037, "learning_rate": 0.0004097238204833141, "loss": 0.3634, "step": 276390 }, { "epoch": 79.51668584579977, "grad_norm": 0.945450484752655, "learning_rate": 0.00040966628308400463, "loss": 0.3343, "step": 276400 }, { "epoch": 79.51956271576525, "grad_norm": 1.3526519536972046, "learning_rate": 0.0004096087456846951, "loss": 0.4041, "step": 276410 }, { "epoch": 79.52243958573072, "grad_norm": 1.5016878843307495, "learning_rate": 0.0004095512082853855, "loss": 0.4304, "step": 276420 }, { "epoch": 79.5253164556962, "grad_norm": 0.7131282091140747, "learning_rate": 0.000409493670886076, "loss": 0.3442, "step": 276430 }, { "epoch": 79.52819332566168, "grad_norm": 1.04751718044281, "learning_rate": 0.0004094361334867664, "loss": 0.4243, "step": 276440 }, { "epoch": 79.53107019562715, "grad_norm": 1.0248615741729736, "learning_rate": 0.00040937859608745686, "loss": 0.3375, "step": 276450 }, { "epoch": 79.53394706559264, "grad_norm": 1.2841757535934448, "learning_rate": 0.0004093210586881473, "loss": 0.3943, "step": 276460 }, { "epoch": 79.53682393555812, "grad_norm": 1.5331816673278809, "learning_rate": 0.00040926352128883777, "loss": 0.3395, "step": 276470 }, { "epoch": 79.5397008055236, "grad_norm": 0.6029337048530579, "learning_rate": 0.00040920598388952817, "loss": 0.2826, "step": 276480 }, { "epoch": 79.54257767548907, "grad_norm": 1.2917190790176392, "learning_rate": 0.0004091484464902186, "loss": 0.3497, "step": 276490 }, { "epoch": 79.54545454545455, "grad_norm": 0.9502928256988525, "learning_rate": 0.00040909090909090913, "loss": 0.3895, "step": 276500 }, { "epoch": 79.54833141542002, "grad_norm": 1.4139211177825928, "learning_rate": 0.00040903337169159954, "loss": 0.3729, "step": 276510 }, { "epoch": 79.5512082853855, "grad_norm": 1.677936315536499, "learning_rate": 0.00040897583429229, "loss": 0.3457, "step": 276520 }, { "epoch": 79.55408515535098, "grad_norm": 0.8270504474639893, "learning_rate": 0.00040891829689298045, "loss": 0.3293, "step": 276530 }, { "epoch": 79.55696202531645, "grad_norm": 1.728315830230713, "learning_rate": 0.0004088607594936709, "loss": 0.415, "step": 276540 }, { "epoch": 79.55983889528193, "grad_norm": 2.07739520072937, "learning_rate": 0.0004088032220943613, "loss": 0.3629, "step": 276550 }, { "epoch": 79.5627157652474, "grad_norm": 0.6845220923423767, "learning_rate": 0.0004087456846950518, "loss": 0.3019, "step": 276560 }, { "epoch": 79.5655926352129, "grad_norm": 0.7144395112991333, "learning_rate": 0.0004086881472957422, "loss": 0.2687, "step": 276570 }, { "epoch": 79.56846950517837, "grad_norm": 1.4839526414871216, "learning_rate": 0.00040863060989643267, "loss": 0.4739, "step": 276580 }, { "epoch": 79.57134637514385, "grad_norm": 1.5349171161651611, "learning_rate": 0.0004085730724971232, "loss": 0.4796, "step": 276590 }, { "epoch": 79.57422324510932, "grad_norm": 1.1774190664291382, "learning_rate": 0.0004085155350978136, "loss": 0.3811, "step": 276600 }, { "epoch": 79.5771001150748, "grad_norm": 1.3464850187301636, "learning_rate": 0.00040845799769850404, "loss": 0.3694, "step": 276610 }, { "epoch": 79.57997698504028, "grad_norm": 1.1186407804489136, "learning_rate": 0.0004084004602991945, "loss": 0.3553, "step": 276620 }, { "epoch": 79.58285385500575, "grad_norm": 1.3397537469863892, "learning_rate": 0.00040834292289988495, "loss": 0.3187, "step": 276630 }, { "epoch": 79.58573072497123, "grad_norm": 0.6181135773658752, "learning_rate": 0.00040828538550057535, "loss": 0.4334, "step": 276640 }, { "epoch": 79.5886075949367, "grad_norm": 1.7797242403030396, "learning_rate": 0.00040822784810126586, "loss": 0.3654, "step": 276650 }, { "epoch": 79.59148446490218, "grad_norm": 1.5902174711227417, "learning_rate": 0.00040817031070195626, "loss": 0.3117, "step": 276660 }, { "epoch": 79.59436133486767, "grad_norm": 1.1015149354934692, "learning_rate": 0.0004081127733026467, "loss": 0.3436, "step": 276670 }, { "epoch": 79.59723820483315, "grad_norm": 0.8199949264526367, "learning_rate": 0.00040805523590333717, "loss": 0.2867, "step": 276680 }, { "epoch": 79.60011507479862, "grad_norm": 1.4582329988479614, "learning_rate": 0.0004079976985040276, "loss": 0.3773, "step": 276690 }, { "epoch": 79.6029919447641, "grad_norm": 1.1434818506240845, "learning_rate": 0.0004079401611047181, "loss": 0.3297, "step": 276700 }, { "epoch": 79.60586881472958, "grad_norm": 1.1268763542175293, "learning_rate": 0.00040788262370540854, "loss": 0.3164, "step": 276710 }, { "epoch": 79.60874568469505, "grad_norm": 0.951265275478363, "learning_rate": 0.000407825086306099, "loss": 0.3705, "step": 276720 }, { "epoch": 79.61162255466053, "grad_norm": 0.9537535905838013, "learning_rate": 0.0004077675489067894, "loss": 0.3578, "step": 276730 }, { "epoch": 79.614499424626, "grad_norm": 0.9593846797943115, "learning_rate": 0.0004077100115074799, "loss": 0.3254, "step": 276740 }, { "epoch": 79.61737629459148, "grad_norm": 0.9924736022949219, "learning_rate": 0.0004076524741081703, "loss": 0.318, "step": 276750 }, { "epoch": 79.62025316455696, "grad_norm": 0.9281051754951477, "learning_rate": 0.00040759493670886076, "loss": 0.4039, "step": 276760 }, { "epoch": 79.62313003452243, "grad_norm": 1.2528040409088135, "learning_rate": 0.0004075373993095512, "loss": 0.3006, "step": 276770 }, { "epoch": 79.62600690448792, "grad_norm": 1.2979536056518555, "learning_rate": 0.00040747986191024167, "loss": 0.3545, "step": 276780 }, { "epoch": 79.6288837744534, "grad_norm": 7.280087947845459, "learning_rate": 0.0004074223245109321, "loss": 0.3718, "step": 276790 }, { "epoch": 79.63176064441888, "grad_norm": 1.070776104927063, "learning_rate": 0.0004073647871116226, "loss": 0.3664, "step": 276800 }, { "epoch": 79.63463751438435, "grad_norm": 1.7724311351776123, "learning_rate": 0.00040730724971231304, "loss": 0.3817, "step": 276810 }, { "epoch": 79.63751438434983, "grad_norm": 1.7724292278289795, "learning_rate": 0.00040724971231300344, "loss": 0.3518, "step": 276820 }, { "epoch": 79.6403912543153, "grad_norm": 1.4517827033996582, "learning_rate": 0.00040719217491369395, "loss": 0.3048, "step": 276830 }, { "epoch": 79.64326812428078, "grad_norm": 1.997441053390503, "learning_rate": 0.00040713463751438435, "loss": 0.4249, "step": 276840 }, { "epoch": 79.64614499424626, "grad_norm": 0.8953577280044556, "learning_rate": 0.0004070771001150748, "loss": 0.2788, "step": 276850 }, { "epoch": 79.64902186421173, "grad_norm": 1.0616025924682617, "learning_rate": 0.00040701956271576526, "loss": 0.3458, "step": 276860 }, { "epoch": 79.65189873417721, "grad_norm": 1.9288954734802246, "learning_rate": 0.0004069620253164557, "loss": 0.3766, "step": 276870 }, { "epoch": 79.6547756041427, "grad_norm": 1.3261792659759521, "learning_rate": 0.0004069044879171461, "loss": 0.4091, "step": 276880 }, { "epoch": 79.65765247410818, "grad_norm": 0.6556222438812256, "learning_rate": 0.00040684695051783663, "loss": 0.3107, "step": 276890 }, { "epoch": 79.66052934407365, "grad_norm": 1.0219635963439941, "learning_rate": 0.0004067894131185271, "loss": 0.3968, "step": 276900 }, { "epoch": 79.66340621403913, "grad_norm": 1.1513574123382568, "learning_rate": 0.0004067318757192175, "loss": 0.3073, "step": 276910 }, { "epoch": 79.6662830840046, "grad_norm": 0.8046562075614929, "learning_rate": 0.00040667433831990794, "loss": 0.3745, "step": 276920 }, { "epoch": 79.66915995397008, "grad_norm": 0.9071669578552246, "learning_rate": 0.0004066168009205984, "loss": 0.3659, "step": 276930 }, { "epoch": 79.67203682393556, "grad_norm": 1.1347678899765015, "learning_rate": 0.00040655926352128885, "loss": 0.2937, "step": 276940 }, { "epoch": 79.67491369390103, "grad_norm": 2.2608838081359863, "learning_rate": 0.00040650172612197925, "loss": 0.4164, "step": 276950 }, { "epoch": 79.67779056386651, "grad_norm": 1.2098090648651123, "learning_rate": 0.00040644418872266976, "loss": 0.3612, "step": 276960 }, { "epoch": 79.68066743383199, "grad_norm": 2.6595284938812256, "learning_rate": 0.00040638665132336016, "loss": 0.3866, "step": 276970 }, { "epoch": 79.68354430379746, "grad_norm": 0.6048811674118042, "learning_rate": 0.0004063291139240506, "loss": 0.3241, "step": 276980 }, { "epoch": 79.68642117376295, "grad_norm": 1.9450517892837524, "learning_rate": 0.00040627157652474113, "loss": 0.3359, "step": 276990 }, { "epoch": 79.68929804372843, "grad_norm": 0.8398169279098511, "learning_rate": 0.00040621403912543153, "loss": 0.3542, "step": 277000 }, { "epoch": 79.6921749136939, "grad_norm": 0.7684692144393921, "learning_rate": 0.000406156501726122, "loss": 0.3366, "step": 277010 }, { "epoch": 79.69505178365938, "grad_norm": 1.040465235710144, "learning_rate": 0.00040609896432681244, "loss": 0.3422, "step": 277020 }, { "epoch": 79.69792865362486, "grad_norm": 1.568764090538025, "learning_rate": 0.0004060414269275029, "loss": 0.3879, "step": 277030 }, { "epoch": 79.70080552359033, "grad_norm": 2.06197190284729, "learning_rate": 0.0004059838895281933, "loss": 0.4408, "step": 277040 }, { "epoch": 79.70368239355581, "grad_norm": 1.049639344215393, "learning_rate": 0.0004059263521288838, "loss": 0.3236, "step": 277050 }, { "epoch": 79.70655926352129, "grad_norm": 1.5825212001800537, "learning_rate": 0.0004058688147295742, "loss": 0.2989, "step": 277060 }, { "epoch": 79.70943613348676, "grad_norm": 2.2318763732910156, "learning_rate": 0.00040581127733026466, "loss": 0.3287, "step": 277070 }, { "epoch": 79.71231300345224, "grad_norm": 1.4307551383972168, "learning_rate": 0.0004057537399309552, "loss": 0.3126, "step": 277080 }, { "epoch": 79.71518987341773, "grad_norm": 0.8360889554023743, "learning_rate": 0.0004056962025316456, "loss": 0.3407, "step": 277090 }, { "epoch": 79.7180667433832, "grad_norm": 3.1031017303466797, "learning_rate": 0.00040563866513233603, "loss": 0.3408, "step": 277100 }, { "epoch": 79.72094361334868, "grad_norm": 1.808937668800354, "learning_rate": 0.0004055811277330265, "loss": 0.4114, "step": 277110 }, { "epoch": 79.72382048331416, "grad_norm": 3.7679011821746826, "learning_rate": 0.00040552359033371694, "loss": 0.2908, "step": 277120 }, { "epoch": 79.72669735327963, "grad_norm": 1.7952789068222046, "learning_rate": 0.00040546605293440734, "loss": 0.3388, "step": 277130 }, { "epoch": 79.72957422324511, "grad_norm": 1.4841388463974, "learning_rate": 0.00040540851553509785, "loss": 0.283, "step": 277140 }, { "epoch": 79.73245109321059, "grad_norm": 0.9174399971961975, "learning_rate": 0.00040535097813578825, "loss": 0.3885, "step": 277150 }, { "epoch": 79.73532796317606, "grad_norm": 1.7929636240005493, "learning_rate": 0.0004052934407364787, "loss": 0.4192, "step": 277160 }, { "epoch": 79.73820483314154, "grad_norm": 1.3732707500457764, "learning_rate": 0.00040523590333716916, "loss": 0.3237, "step": 277170 }, { "epoch": 79.74108170310701, "grad_norm": 0.8872475624084473, "learning_rate": 0.0004051783659378596, "loss": 0.3778, "step": 277180 }, { "epoch": 79.74395857307249, "grad_norm": 1.170381784439087, "learning_rate": 0.0004051208285385501, "loss": 0.3426, "step": 277190 }, { "epoch": 79.74683544303798, "grad_norm": 1.5867725610733032, "learning_rate": 0.00040506329113924053, "loss": 0.4144, "step": 277200 }, { "epoch": 79.74971231300346, "grad_norm": 0.9353463649749756, "learning_rate": 0.000405005753739931, "loss": 0.3582, "step": 277210 }, { "epoch": 79.75258918296893, "grad_norm": 1.0355610847473145, "learning_rate": 0.0004049482163406214, "loss": 0.4329, "step": 277220 }, { "epoch": 79.75546605293441, "grad_norm": 0.8408569693565369, "learning_rate": 0.0004048906789413119, "loss": 0.3495, "step": 277230 }, { "epoch": 79.75834292289989, "grad_norm": 2.028085947036743, "learning_rate": 0.0004048331415420023, "loss": 0.4669, "step": 277240 }, { "epoch": 79.76121979286536, "grad_norm": 1.2492456436157227, "learning_rate": 0.00040477560414269275, "loss": 0.294, "step": 277250 }, { "epoch": 79.76409666283084, "grad_norm": 1.2628084421157837, "learning_rate": 0.0004047180667433832, "loss": 0.3103, "step": 277260 }, { "epoch": 79.76697353279631, "grad_norm": 3.4175660610198975, "learning_rate": 0.00040466052934407367, "loss": 0.3423, "step": 277270 }, { "epoch": 79.76985040276179, "grad_norm": 0.7713915109634399, "learning_rate": 0.0004046029919447641, "loss": 0.3628, "step": 277280 }, { "epoch": 79.77272727272727, "grad_norm": 1.3895233869552612, "learning_rate": 0.0004045454545454546, "loss": 0.352, "step": 277290 }, { "epoch": 79.77560414269276, "grad_norm": 0.9885085225105286, "learning_rate": 0.00040448791714614503, "loss": 0.3191, "step": 277300 }, { "epoch": 79.77848101265823, "grad_norm": 0.9719255566596985, "learning_rate": 0.00040443037974683543, "loss": 0.3533, "step": 277310 }, { "epoch": 79.78135788262371, "grad_norm": 0.894758939743042, "learning_rate": 0.0004043728423475259, "loss": 0.3879, "step": 277320 }, { "epoch": 79.78423475258919, "grad_norm": 0.9349479675292969, "learning_rate": 0.00040431530494821634, "loss": 0.3833, "step": 277330 }, { "epoch": 79.78711162255466, "grad_norm": 2.061638116836548, "learning_rate": 0.0004042577675489068, "loss": 0.3194, "step": 277340 }, { "epoch": 79.78998849252014, "grad_norm": 0.5751280784606934, "learning_rate": 0.0004042002301495972, "loss": 0.2903, "step": 277350 }, { "epoch": 79.79286536248561, "grad_norm": 0.7310911417007446, "learning_rate": 0.0004041426927502877, "loss": 0.4079, "step": 277360 }, { "epoch": 79.79574223245109, "grad_norm": 0.697010338306427, "learning_rate": 0.0004040851553509781, "loss": 0.3817, "step": 277370 }, { "epoch": 79.79861910241657, "grad_norm": 1.455796241760254, "learning_rate": 0.00040402761795166857, "loss": 0.3719, "step": 277380 }, { "epoch": 79.80149597238204, "grad_norm": 0.8161887526512146, "learning_rate": 0.0004039700805523591, "loss": 0.3924, "step": 277390 }, { "epoch": 79.80437284234753, "grad_norm": 1.8057501316070557, "learning_rate": 0.0004039125431530495, "loss": 0.3527, "step": 277400 }, { "epoch": 79.80724971231301, "grad_norm": 0.9683705568313599, "learning_rate": 0.00040385500575373993, "loss": 0.4407, "step": 277410 }, { "epoch": 79.81012658227849, "grad_norm": 1.9813294410705566, "learning_rate": 0.0004037974683544304, "loss": 0.4082, "step": 277420 }, { "epoch": 79.81300345224396, "grad_norm": 1.6006698608398438, "learning_rate": 0.00040373993095512085, "loss": 0.3587, "step": 277430 }, { "epoch": 79.81588032220944, "grad_norm": 1.735852599143982, "learning_rate": 0.00040368239355581125, "loss": 0.3146, "step": 277440 }, { "epoch": 79.81875719217491, "grad_norm": 1.1582310199737549, "learning_rate": 0.00040362485615650176, "loss": 0.4344, "step": 277450 }, { "epoch": 79.82163406214039, "grad_norm": 1.1696845293045044, "learning_rate": 0.00040356731875719216, "loss": 0.3615, "step": 277460 }, { "epoch": 79.82451093210587, "grad_norm": 1.6408276557922363, "learning_rate": 0.0004035097813578826, "loss": 0.4367, "step": 277470 }, { "epoch": 79.82738780207134, "grad_norm": 2.508845329284668, "learning_rate": 0.0004034522439585731, "loss": 0.3251, "step": 277480 }, { "epoch": 79.83026467203682, "grad_norm": 0.6146604418754578, "learning_rate": 0.0004033947065592635, "loss": 0.3449, "step": 277490 }, { "epoch": 79.8331415420023, "grad_norm": 2.6257541179656982, "learning_rate": 0.000403337169159954, "loss": 0.3402, "step": 277500 }, { "epoch": 79.83601841196779, "grad_norm": 1.7855969667434692, "learning_rate": 0.00040327963176064443, "loss": 0.3844, "step": 277510 }, { "epoch": 79.83889528193326, "grad_norm": 1.2073159217834473, "learning_rate": 0.0004032220943613349, "loss": 0.3789, "step": 277520 }, { "epoch": 79.84177215189874, "grad_norm": 1.4652917385101318, "learning_rate": 0.0004031645569620253, "loss": 0.3134, "step": 277530 }, { "epoch": 79.84464902186421, "grad_norm": 1.4826656579971313, "learning_rate": 0.0004031070195627158, "loss": 0.2817, "step": 277540 }, { "epoch": 79.84752589182969, "grad_norm": 1.0596997737884521, "learning_rate": 0.0004030494821634062, "loss": 0.3711, "step": 277550 }, { "epoch": 79.85040276179517, "grad_norm": 1.1017980575561523, "learning_rate": 0.00040299194476409666, "loss": 0.3487, "step": 277560 }, { "epoch": 79.85327963176064, "grad_norm": 1.1514167785644531, "learning_rate": 0.00040293440736478717, "loss": 0.4483, "step": 277570 }, { "epoch": 79.85615650172612, "grad_norm": 0.7988877892494202, "learning_rate": 0.00040287686996547757, "loss": 0.3118, "step": 277580 }, { "epoch": 79.8590333716916, "grad_norm": 1.0057557821273804, "learning_rate": 0.000402819332566168, "loss": 0.4128, "step": 277590 }, { "epoch": 79.86191024165707, "grad_norm": 1.3597732782363892, "learning_rate": 0.0004027617951668585, "loss": 0.3864, "step": 277600 }, { "epoch": 79.86478711162256, "grad_norm": 1.4345669746398926, "learning_rate": 0.00040270425776754894, "loss": 0.3278, "step": 277610 }, { "epoch": 79.86766398158804, "grad_norm": 1.2118408679962158, "learning_rate": 0.00040264672036823934, "loss": 0.4118, "step": 277620 }, { "epoch": 79.87054085155351, "grad_norm": 1.2709933519363403, "learning_rate": 0.00040258918296892985, "loss": 0.4576, "step": 277630 }, { "epoch": 79.87341772151899, "grad_norm": 1.3426382541656494, "learning_rate": 0.00040253164556962025, "loss": 0.3007, "step": 277640 }, { "epoch": 79.87629459148447, "grad_norm": 1.4045950174331665, "learning_rate": 0.0004024741081703107, "loss": 0.404, "step": 277650 }, { "epoch": 79.87917146144994, "grad_norm": 1.330654263496399, "learning_rate": 0.00040241657077100116, "loss": 0.3657, "step": 277660 }, { "epoch": 79.88204833141542, "grad_norm": 0.9310428500175476, "learning_rate": 0.0004023590333716916, "loss": 0.4022, "step": 277670 }, { "epoch": 79.8849252013809, "grad_norm": 1.6375555992126465, "learning_rate": 0.00040230149597238207, "loss": 0.4019, "step": 277680 }, { "epoch": 79.88780207134637, "grad_norm": 1.249631643295288, "learning_rate": 0.0004022439585730725, "loss": 0.3506, "step": 277690 }, { "epoch": 79.89067894131185, "grad_norm": 1.7612360715866089, "learning_rate": 0.000402186421173763, "loss": 0.3433, "step": 277700 }, { "epoch": 79.89355581127732, "grad_norm": 1.0937135219573975, "learning_rate": 0.0004021288837744534, "loss": 0.3138, "step": 277710 }, { "epoch": 79.89643268124281, "grad_norm": 0.9754557609558105, "learning_rate": 0.00040207134637514384, "loss": 0.3238, "step": 277720 }, { "epoch": 79.89930955120829, "grad_norm": 1.185167670249939, "learning_rate": 0.0004020138089758343, "loss": 0.3061, "step": 277730 }, { "epoch": 79.90218642117377, "grad_norm": 1.073856234550476, "learning_rate": 0.00040195627157652475, "loss": 0.3287, "step": 277740 }, { "epoch": 79.90506329113924, "grad_norm": 1.4348965883255005, "learning_rate": 0.00040189873417721515, "loss": 0.4704, "step": 277750 }, { "epoch": 79.90794016110472, "grad_norm": 1.2766354084014893, "learning_rate": 0.00040184119677790566, "loss": 0.3962, "step": 277760 }, { "epoch": 79.9108170310702, "grad_norm": 1.1406136751174927, "learning_rate": 0.0004017836593785961, "loss": 0.4004, "step": 277770 }, { "epoch": 79.91369390103567, "grad_norm": 1.1586273908615112, "learning_rate": 0.0004017261219792865, "loss": 0.4537, "step": 277780 }, { "epoch": 79.91657077100115, "grad_norm": 2.7017641067504883, "learning_rate": 0.000401668584579977, "loss": 0.4335, "step": 277790 }, { "epoch": 79.91944764096662, "grad_norm": 1.1968286037445068, "learning_rate": 0.00040161104718066743, "loss": 0.5612, "step": 277800 }, { "epoch": 79.9223245109321, "grad_norm": 1.1523898839950562, "learning_rate": 0.0004015535097813579, "loss": 0.3975, "step": 277810 }, { "epoch": 79.92520138089759, "grad_norm": 1.1581624746322632, "learning_rate": 0.00040149597238204834, "loss": 0.3431, "step": 277820 }, { "epoch": 79.92807825086307, "grad_norm": 1.2965173721313477, "learning_rate": 0.0004014384349827388, "loss": 0.3429, "step": 277830 }, { "epoch": 79.93095512082854, "grad_norm": 2.1882243156433105, "learning_rate": 0.0004013808975834292, "loss": 0.3648, "step": 277840 }, { "epoch": 79.93383199079402, "grad_norm": 1.5423920154571533, "learning_rate": 0.0004013233601841197, "loss": 0.39, "step": 277850 }, { "epoch": 79.9367088607595, "grad_norm": 1.0725334882736206, "learning_rate": 0.0004012658227848101, "loss": 0.3327, "step": 277860 }, { "epoch": 79.93958573072497, "grad_norm": 1.9931577444076538, "learning_rate": 0.00040120828538550056, "loss": 0.4579, "step": 277870 }, { "epoch": 79.94246260069045, "grad_norm": 0.7178724408149719, "learning_rate": 0.00040115074798619107, "loss": 0.2757, "step": 277880 }, { "epoch": 79.94533947065592, "grad_norm": 0.7298173904418945, "learning_rate": 0.0004010932105868815, "loss": 0.3634, "step": 277890 }, { "epoch": 79.9482163406214, "grad_norm": 1.2151833772659302, "learning_rate": 0.00040103567318757193, "loss": 0.32, "step": 277900 }, { "epoch": 79.95109321058688, "grad_norm": 1.6500005722045898, "learning_rate": 0.0004009781357882624, "loss": 0.4486, "step": 277910 }, { "epoch": 79.95397008055235, "grad_norm": 1.2115949392318726, "learning_rate": 0.00040092059838895284, "loss": 0.3367, "step": 277920 }, { "epoch": 79.95684695051784, "grad_norm": 1.6567983627319336, "learning_rate": 0.00040086306098964324, "loss": 0.3711, "step": 277930 }, { "epoch": 79.95972382048332, "grad_norm": 2.4383578300476074, "learning_rate": 0.00040080552359033375, "loss": 0.3448, "step": 277940 }, { "epoch": 79.9626006904488, "grad_norm": 0.9950677156448364, "learning_rate": 0.00040074798619102415, "loss": 0.3057, "step": 277950 }, { "epoch": 79.96547756041427, "grad_norm": 1.3602266311645508, "learning_rate": 0.0004006904487917146, "loss": 0.2908, "step": 277960 }, { "epoch": 79.96835443037975, "grad_norm": 0.9113002419471741, "learning_rate": 0.0004006329113924051, "loss": 0.3575, "step": 277970 }, { "epoch": 79.97123130034522, "grad_norm": 2.07071590423584, "learning_rate": 0.0004005753739930955, "loss": 0.3858, "step": 277980 }, { "epoch": 79.9741081703107, "grad_norm": 2.2775843143463135, "learning_rate": 0.000400517836593786, "loss": 0.4468, "step": 277990 }, { "epoch": 79.97698504027618, "grad_norm": 0.8203022480010986, "learning_rate": 0.00040046029919447643, "loss": 0.3542, "step": 278000 }, { "epoch": 79.97986191024165, "grad_norm": 0.6268705725669861, "learning_rate": 0.0004004027617951669, "loss": 0.3931, "step": 278010 }, { "epoch": 79.98273878020713, "grad_norm": 1.0525044202804565, "learning_rate": 0.0004003452243958573, "loss": 0.3748, "step": 278020 }, { "epoch": 79.98561565017262, "grad_norm": 0.8820282816886902, "learning_rate": 0.0004002876869965478, "loss": 0.3851, "step": 278030 }, { "epoch": 79.9884925201381, "grad_norm": 0.9578371644020081, "learning_rate": 0.0004002301495972382, "loss": 0.4239, "step": 278040 }, { "epoch": 79.99136939010357, "grad_norm": 1.737146019935608, "learning_rate": 0.00040017261219792865, "loss": 0.4169, "step": 278050 }, { "epoch": 79.99424626006905, "grad_norm": 2.8848373889923096, "learning_rate": 0.0004001150747986191, "loss": 0.363, "step": 278060 }, { "epoch": 79.99712313003452, "grad_norm": 0.8112225532531738, "learning_rate": 0.00040005753739930956, "loss": 0.4055, "step": 278070 }, { "epoch": 80.0, "grad_norm": 1.4291924238204956, "learning_rate": 0.0004, "loss": 0.3393, "step": 278080 }, { "epoch": 80.00287686996548, "grad_norm": 0.7462745904922485, "learning_rate": 0.0003999424626006905, "loss": 0.2887, "step": 278090 }, { "epoch": 80.00575373993095, "grad_norm": 1.3537135124206543, "learning_rate": 0.00039988492520138093, "loss": 0.3728, "step": 278100 }, { "epoch": 80.00863060989643, "grad_norm": 1.0575469732284546, "learning_rate": 0.00039982738780207133, "loss": 0.3678, "step": 278110 }, { "epoch": 80.0115074798619, "grad_norm": 0.9963817000389099, "learning_rate": 0.00039976985040276184, "loss": 0.2855, "step": 278120 }, { "epoch": 80.01438434982738, "grad_norm": 1.171028733253479, "learning_rate": 0.00039971231300345224, "loss": 0.3701, "step": 278130 }, { "epoch": 80.01726121979287, "grad_norm": 1.9848942756652832, "learning_rate": 0.0003996547756041427, "loss": 0.3862, "step": 278140 }, { "epoch": 80.02013808975835, "grad_norm": 1.4366881847381592, "learning_rate": 0.0003995972382048331, "loss": 0.4086, "step": 278150 }, { "epoch": 80.02301495972382, "grad_norm": 1.1006598472595215, "learning_rate": 0.0003995397008055236, "loss": 0.2673, "step": 278160 }, { "epoch": 80.0258918296893, "grad_norm": 1.3286467790603638, "learning_rate": 0.00039948216340621406, "loss": 0.361, "step": 278170 }, { "epoch": 80.02876869965478, "grad_norm": 0.706030011177063, "learning_rate": 0.00039942462600690447, "loss": 0.2888, "step": 278180 }, { "epoch": 80.03164556962025, "grad_norm": 1.1811296939849854, "learning_rate": 0.000399367088607595, "loss": 0.3136, "step": 278190 }, { "epoch": 80.03452243958573, "grad_norm": 1.3084911108016968, "learning_rate": 0.0003993095512082854, "loss": 0.3443, "step": 278200 }, { "epoch": 80.0373993095512, "grad_norm": 0.9421892166137695, "learning_rate": 0.00039925201380897583, "loss": 0.3002, "step": 278210 }, { "epoch": 80.04027617951668, "grad_norm": 0.8646296262741089, "learning_rate": 0.0003991944764096663, "loss": 0.3176, "step": 278220 }, { "epoch": 80.04315304948216, "grad_norm": 0.9293525815010071, "learning_rate": 0.00039913693901035674, "loss": 0.3452, "step": 278230 }, { "epoch": 80.04602991944765, "grad_norm": 1.230905532836914, "learning_rate": 0.00039907940161104714, "loss": 0.3435, "step": 278240 }, { "epoch": 80.04890678941312, "grad_norm": 1.4308284521102905, "learning_rate": 0.00039902186421173765, "loss": 0.3097, "step": 278250 }, { "epoch": 80.0517836593786, "grad_norm": 0.7287003397941589, "learning_rate": 0.0003989643268124281, "loss": 0.3117, "step": 278260 }, { "epoch": 80.05466052934408, "grad_norm": 1.3257827758789062, "learning_rate": 0.0003989067894131185, "loss": 0.3627, "step": 278270 }, { "epoch": 80.05753739930955, "grad_norm": 0.93743896484375, "learning_rate": 0.000398849252013809, "loss": 0.3024, "step": 278280 }, { "epoch": 80.06041426927503, "grad_norm": 1.2624844312667847, "learning_rate": 0.0003987917146144994, "loss": 0.4261, "step": 278290 }, { "epoch": 80.0632911392405, "grad_norm": 1.3394736051559448, "learning_rate": 0.0003987341772151899, "loss": 0.3134, "step": 278300 }, { "epoch": 80.06616800920598, "grad_norm": 1.048356533050537, "learning_rate": 0.00039867663981588033, "loss": 0.3086, "step": 278310 }, { "epoch": 80.06904487917146, "grad_norm": 0.795937180519104, "learning_rate": 0.0003986191024165708, "loss": 0.3081, "step": 278320 }, { "epoch": 80.07192174913693, "grad_norm": 0.9549130797386169, "learning_rate": 0.0003985615650172612, "loss": 0.3937, "step": 278330 }, { "epoch": 80.07479861910241, "grad_norm": 0.6160338521003723, "learning_rate": 0.0003985040276179517, "loss": 0.2578, "step": 278340 }, { "epoch": 80.0776754890679, "grad_norm": 1.2227036952972412, "learning_rate": 0.0003984464902186421, "loss": 0.3216, "step": 278350 }, { "epoch": 80.08055235903338, "grad_norm": 1.6973860263824463, "learning_rate": 0.00039838895281933256, "loss": 0.3872, "step": 278360 }, { "epoch": 80.08342922899885, "grad_norm": 1.418662667274475, "learning_rate": 0.00039833141542002307, "loss": 0.3111, "step": 278370 }, { "epoch": 80.08630609896433, "grad_norm": 1.0928765535354614, "learning_rate": 0.00039827387802071347, "loss": 0.3571, "step": 278380 }, { "epoch": 80.0891829689298, "grad_norm": 1.2330968379974365, "learning_rate": 0.0003982163406214039, "loss": 0.3398, "step": 278390 }, { "epoch": 80.09205983889528, "grad_norm": 1.520943522453308, "learning_rate": 0.0003981588032220944, "loss": 0.3854, "step": 278400 }, { "epoch": 80.09493670886076, "grad_norm": 1.1874358654022217, "learning_rate": 0.00039810126582278483, "loss": 0.3295, "step": 278410 }, { "epoch": 80.09781357882623, "grad_norm": 1.298437476158142, "learning_rate": 0.00039804372842347524, "loss": 0.3191, "step": 278420 }, { "epoch": 80.10069044879171, "grad_norm": 1.6629587411880493, "learning_rate": 0.00039798619102416574, "loss": 0.3502, "step": 278430 }, { "epoch": 80.10356731875719, "grad_norm": 1.940476894378662, "learning_rate": 0.00039792865362485615, "loss": 0.3527, "step": 278440 }, { "epoch": 80.10644418872268, "grad_norm": 1.120798945426941, "learning_rate": 0.0003978711162255466, "loss": 0.3377, "step": 278450 }, { "epoch": 80.10932105868815, "grad_norm": 2.0348052978515625, "learning_rate": 0.0003978135788262371, "loss": 0.3347, "step": 278460 }, { "epoch": 80.11219792865363, "grad_norm": 1.0912604331970215, "learning_rate": 0.0003977560414269275, "loss": 0.3079, "step": 278470 }, { "epoch": 80.1150747986191, "grad_norm": 0.9703229665756226, "learning_rate": 0.00039769850402761797, "loss": 0.4143, "step": 278480 }, { "epoch": 80.11795166858458, "grad_norm": 1.0833632946014404, "learning_rate": 0.0003976409666283084, "loss": 0.3324, "step": 278490 }, { "epoch": 80.12082853855006, "grad_norm": 1.0039218664169312, "learning_rate": 0.0003975834292289989, "loss": 0.4236, "step": 278500 }, { "epoch": 80.12370540851553, "grad_norm": 2.090287923812866, "learning_rate": 0.0003975258918296893, "loss": 0.4584, "step": 278510 }, { "epoch": 80.12658227848101, "grad_norm": 0.6373893022537231, "learning_rate": 0.0003974683544303798, "loss": 0.2958, "step": 278520 }, { "epoch": 80.12945914844649, "grad_norm": 0.6863846182823181, "learning_rate": 0.0003974108170310702, "loss": 0.3461, "step": 278530 }, { "epoch": 80.13233601841196, "grad_norm": 1.783524990081787, "learning_rate": 0.00039735327963176065, "loss": 0.385, "step": 278540 }, { "epoch": 80.13521288837744, "grad_norm": 1.5159212350845337, "learning_rate": 0.00039729574223245105, "loss": 0.3864, "step": 278550 }, { "epoch": 80.13808975834293, "grad_norm": 0.7427319884300232, "learning_rate": 0.00039723820483314156, "loss": 0.3378, "step": 278560 }, { "epoch": 80.1409666283084, "grad_norm": 0.8464586734771729, "learning_rate": 0.000397180667433832, "loss": 0.3305, "step": 278570 }, { "epoch": 80.14384349827388, "grad_norm": 1.7029829025268555, "learning_rate": 0.0003971231300345224, "loss": 0.3489, "step": 278580 }, { "epoch": 80.14672036823936, "grad_norm": 0.8198148608207703, "learning_rate": 0.0003970655926352129, "loss": 0.2766, "step": 278590 }, { "epoch": 80.14959723820483, "grad_norm": 0.906840980052948, "learning_rate": 0.0003970080552359033, "loss": 0.345, "step": 278600 }, { "epoch": 80.15247410817031, "grad_norm": 1.3165922164916992, "learning_rate": 0.0003969505178365938, "loss": 0.3072, "step": 278610 }, { "epoch": 80.15535097813579, "grad_norm": 1.001196265220642, "learning_rate": 0.00039689298043728424, "loss": 0.3069, "step": 278620 }, { "epoch": 80.15822784810126, "grad_norm": 0.7486385703086853, "learning_rate": 0.0003968354430379747, "loss": 0.2986, "step": 278630 }, { "epoch": 80.16110471806674, "grad_norm": 1.6123813390731812, "learning_rate": 0.0003967779056386651, "loss": 0.361, "step": 278640 }, { "epoch": 80.16398158803221, "grad_norm": 1.4694936275482178, "learning_rate": 0.0003967203682393556, "loss": 0.3236, "step": 278650 }, { "epoch": 80.1668584579977, "grad_norm": 1.0566495656967163, "learning_rate": 0.00039666283084004606, "loss": 0.2784, "step": 278660 }, { "epoch": 80.16973532796318, "grad_norm": 1.7175281047821045, "learning_rate": 0.00039660529344073646, "loss": 0.3586, "step": 278670 }, { "epoch": 80.17261219792866, "grad_norm": 2.235748767852783, "learning_rate": 0.00039654775604142697, "loss": 0.3846, "step": 278680 }, { "epoch": 80.17548906789413, "grad_norm": 0.7682421803474426, "learning_rate": 0.00039649021864211737, "loss": 0.2835, "step": 278690 }, { "epoch": 80.17836593785961, "grad_norm": 1.3507587909698486, "learning_rate": 0.0003964326812428078, "loss": 0.406, "step": 278700 }, { "epoch": 80.18124280782509, "grad_norm": 0.7365249395370483, "learning_rate": 0.0003963751438434983, "loss": 0.2561, "step": 278710 }, { "epoch": 80.18411967779056, "grad_norm": 0.7587319016456604, "learning_rate": 0.00039631760644418874, "loss": 0.3188, "step": 278720 }, { "epoch": 80.18699654775604, "grad_norm": 0.7379431128501892, "learning_rate": 0.00039626006904487914, "loss": 0.3095, "step": 278730 }, { "epoch": 80.18987341772151, "grad_norm": 1.1358522176742554, "learning_rate": 0.00039620253164556965, "loss": 0.3125, "step": 278740 }, { "epoch": 80.19275028768699, "grad_norm": 0.96229088306427, "learning_rate": 0.0003961449942462601, "loss": 0.3774, "step": 278750 }, { "epoch": 80.19562715765247, "grad_norm": 0.7664108276367188, "learning_rate": 0.0003960874568469505, "loss": 0.3057, "step": 278760 }, { "epoch": 80.19850402761796, "grad_norm": 1.0296673774719238, "learning_rate": 0.000396029919447641, "loss": 0.3206, "step": 278770 }, { "epoch": 80.20138089758343, "grad_norm": 0.9488387703895569, "learning_rate": 0.0003959723820483314, "loss": 0.3596, "step": 278780 }, { "epoch": 80.20425776754891, "grad_norm": 0.7282472252845764, "learning_rate": 0.00039591484464902187, "loss": 0.278, "step": 278790 }, { "epoch": 80.20713463751439, "grad_norm": 1.7961348295211792, "learning_rate": 0.00039585730724971233, "loss": 0.4359, "step": 278800 }, { "epoch": 80.21001150747986, "grad_norm": 0.5169141292572021, "learning_rate": 0.0003957997698504028, "loss": 0.2731, "step": 278810 }, { "epoch": 80.21288837744534, "grad_norm": 1.0792405605316162, "learning_rate": 0.0003957422324510932, "loss": 0.3026, "step": 278820 }, { "epoch": 80.21576524741081, "grad_norm": 1.6729366779327393, "learning_rate": 0.0003956846950517837, "loss": 0.3973, "step": 278830 }, { "epoch": 80.21864211737629, "grad_norm": 1.5046321153640747, "learning_rate": 0.0003956271576524741, "loss": 0.3417, "step": 278840 }, { "epoch": 80.22151898734177, "grad_norm": 1.6355091333389282, "learning_rate": 0.00039556962025316455, "loss": 0.3832, "step": 278850 }, { "epoch": 80.22439585730724, "grad_norm": 1.47383451461792, "learning_rate": 0.00039551208285385506, "loss": 0.3484, "step": 278860 }, { "epoch": 80.22727272727273, "grad_norm": 0.8328605890274048, "learning_rate": 0.00039545454545454546, "loss": 0.3125, "step": 278870 }, { "epoch": 80.23014959723821, "grad_norm": 0.6059011816978455, "learning_rate": 0.0003953970080552359, "loss": 0.3915, "step": 278880 }, { "epoch": 80.23302646720369, "grad_norm": 0.9762990474700928, "learning_rate": 0.00039533947065592637, "loss": 0.3095, "step": 278890 }, { "epoch": 80.23590333716916, "grad_norm": 1.3675614595413208, "learning_rate": 0.00039528193325661683, "loss": 0.3043, "step": 278900 }, { "epoch": 80.23878020713464, "grad_norm": 1.02919340133667, "learning_rate": 0.00039522439585730723, "loss": 0.3472, "step": 278910 }, { "epoch": 80.24165707710011, "grad_norm": 1.9114298820495605, "learning_rate": 0.00039516685845799774, "loss": 0.393, "step": 278920 }, { "epoch": 80.24453394706559, "grad_norm": 2.717468738555908, "learning_rate": 0.00039510932105868814, "loss": 0.3149, "step": 278930 }, { "epoch": 80.24741081703107, "grad_norm": 0.8554133176803589, "learning_rate": 0.0003950517836593786, "loss": 0.3749, "step": 278940 }, { "epoch": 80.25028768699654, "grad_norm": 2.27494478225708, "learning_rate": 0.0003949942462600691, "loss": 0.3114, "step": 278950 }, { "epoch": 80.25316455696202, "grad_norm": 1.536879301071167, "learning_rate": 0.0003949367088607595, "loss": 0.2725, "step": 278960 }, { "epoch": 80.25604142692751, "grad_norm": 1.2387733459472656, "learning_rate": 0.00039487917146144996, "loss": 0.4292, "step": 278970 }, { "epoch": 80.25891829689299, "grad_norm": 0.9756880402565002, "learning_rate": 0.00039482163406214036, "loss": 0.4019, "step": 278980 }, { "epoch": 80.26179516685846, "grad_norm": 1.7810560464859009, "learning_rate": 0.0003947640966628309, "loss": 0.3284, "step": 278990 }, { "epoch": 80.26467203682394, "grad_norm": 2.206089735031128, "learning_rate": 0.0003947065592635213, "loss": 0.4277, "step": 279000 }, { "epoch": 80.26754890678941, "grad_norm": 1.2660765647888184, "learning_rate": 0.00039464902186421173, "loss": 0.3426, "step": 279010 }, { "epoch": 80.27042577675489, "grad_norm": 1.1445775032043457, "learning_rate": 0.0003945914844649022, "loss": 0.2936, "step": 279020 }, { "epoch": 80.27330264672037, "grad_norm": 0.937697172164917, "learning_rate": 0.00039453394706559264, "loss": 0.339, "step": 279030 }, { "epoch": 80.27617951668584, "grad_norm": 1.9440743923187256, "learning_rate": 0.00039447640966628304, "loss": 0.376, "step": 279040 }, { "epoch": 80.27905638665132, "grad_norm": 0.8011101484298706, "learning_rate": 0.00039441887226697355, "loss": 0.3551, "step": 279050 }, { "epoch": 80.2819332566168, "grad_norm": 1.0570937395095825, "learning_rate": 0.000394361334867664, "loss": 0.2847, "step": 279060 }, { "epoch": 80.28481012658227, "grad_norm": 1.1149871349334717, "learning_rate": 0.0003943037974683544, "loss": 0.3213, "step": 279070 }, { "epoch": 80.28768699654776, "grad_norm": 1.000190019607544, "learning_rate": 0.0003942462600690449, "loss": 0.3477, "step": 279080 }, { "epoch": 80.29056386651324, "grad_norm": 0.9415506720542908, "learning_rate": 0.0003941887226697353, "loss": 0.3731, "step": 279090 }, { "epoch": 80.29344073647871, "grad_norm": 1.4310932159423828, "learning_rate": 0.0003941311852704258, "loss": 0.2715, "step": 279100 }, { "epoch": 80.29631760644419, "grad_norm": 1.1509510278701782, "learning_rate": 0.00039407364787111623, "loss": 0.3199, "step": 279110 }, { "epoch": 80.29919447640967, "grad_norm": 1.5273723602294922, "learning_rate": 0.0003940161104718067, "loss": 0.2929, "step": 279120 }, { "epoch": 80.30207134637514, "grad_norm": 2.355015993118286, "learning_rate": 0.0003939585730724971, "loss": 0.3592, "step": 279130 }, { "epoch": 80.30494821634062, "grad_norm": 2.1148595809936523, "learning_rate": 0.0003939010356731876, "loss": 0.2838, "step": 279140 }, { "epoch": 80.3078250863061, "grad_norm": 1.208115816116333, "learning_rate": 0.00039384349827387805, "loss": 0.3447, "step": 279150 }, { "epoch": 80.31070195627157, "grad_norm": 0.7111691236495972, "learning_rate": 0.00039378596087456845, "loss": 0.3248, "step": 279160 }, { "epoch": 80.31357882623705, "grad_norm": 1.3479743003845215, "learning_rate": 0.00039372842347525896, "loss": 0.387, "step": 279170 }, { "epoch": 80.31645569620254, "grad_norm": 1.6773658990859985, "learning_rate": 0.00039367088607594937, "loss": 0.3208, "step": 279180 }, { "epoch": 80.31933256616801, "grad_norm": 0.8866918087005615, "learning_rate": 0.0003936133486766398, "loss": 0.3204, "step": 279190 }, { "epoch": 80.32220943613349, "grad_norm": 0.8200383186340332, "learning_rate": 0.0003935558112773303, "loss": 0.3251, "step": 279200 }, { "epoch": 80.32508630609897, "grad_norm": 1.1915000677108765, "learning_rate": 0.00039349827387802073, "loss": 0.336, "step": 279210 }, { "epoch": 80.32796317606444, "grad_norm": 1.0208662748336792, "learning_rate": 0.00039344073647871113, "loss": 0.322, "step": 279220 }, { "epoch": 80.33084004602992, "grad_norm": 1.6571457386016846, "learning_rate": 0.00039338319907940164, "loss": 0.3729, "step": 279230 }, { "epoch": 80.3337169159954, "grad_norm": 1.2793093919754028, "learning_rate": 0.0003933256616800921, "loss": 0.3342, "step": 279240 }, { "epoch": 80.33659378596087, "grad_norm": 1.0034242868423462, "learning_rate": 0.0003932681242807825, "loss": 0.3001, "step": 279250 }, { "epoch": 80.33947065592635, "grad_norm": 1.1679062843322754, "learning_rate": 0.000393210586881473, "loss": 0.3091, "step": 279260 }, { "epoch": 80.34234752589182, "grad_norm": 1.4580720663070679, "learning_rate": 0.0003931530494821634, "loss": 0.3109, "step": 279270 }, { "epoch": 80.3452243958573, "grad_norm": 1.4198815822601318, "learning_rate": 0.00039309551208285387, "loss": 0.3371, "step": 279280 }, { "epoch": 80.34810126582279, "grad_norm": 0.9745889902114868, "learning_rate": 0.0003930379746835443, "loss": 0.3756, "step": 279290 }, { "epoch": 80.35097813578827, "grad_norm": 1.2779569625854492, "learning_rate": 0.0003929804372842348, "loss": 0.3259, "step": 279300 }, { "epoch": 80.35385500575374, "grad_norm": 1.3133745193481445, "learning_rate": 0.0003929228998849252, "loss": 0.3733, "step": 279310 }, { "epoch": 80.35673187571922, "grad_norm": 1.252874493598938, "learning_rate": 0.0003928653624856157, "loss": 0.3321, "step": 279320 }, { "epoch": 80.3596087456847, "grad_norm": 1.655285120010376, "learning_rate": 0.0003928078250863061, "loss": 0.338, "step": 279330 }, { "epoch": 80.36248561565017, "grad_norm": 3.341975212097168, "learning_rate": 0.00039275028768699655, "loss": 0.3164, "step": 279340 }, { "epoch": 80.36536248561565, "grad_norm": 2.3199853897094727, "learning_rate": 0.00039269275028768705, "loss": 0.4328, "step": 279350 }, { "epoch": 80.36823935558112, "grad_norm": 1.30487859249115, "learning_rate": 0.00039263521288837746, "loss": 0.3145, "step": 279360 }, { "epoch": 80.3711162255466, "grad_norm": 0.8842413425445557, "learning_rate": 0.0003925776754890679, "loss": 0.3446, "step": 279370 }, { "epoch": 80.37399309551208, "grad_norm": 0.7706120610237122, "learning_rate": 0.0003925201380897583, "loss": 0.2901, "step": 279380 }, { "epoch": 80.37686996547757, "grad_norm": 0.7586238384246826, "learning_rate": 0.0003924626006904488, "loss": 0.3603, "step": 279390 }, { "epoch": 80.37974683544304, "grad_norm": 0.6601267457008362, "learning_rate": 0.0003924050632911392, "loss": 0.355, "step": 279400 }, { "epoch": 80.38262370540852, "grad_norm": 2.7190911769866943, "learning_rate": 0.0003923475258918297, "loss": 0.3568, "step": 279410 }, { "epoch": 80.385500575374, "grad_norm": 0.8664339780807495, "learning_rate": 0.00039228998849252013, "loss": 0.3537, "step": 279420 }, { "epoch": 80.38837744533947, "grad_norm": 1.8595136404037476, "learning_rate": 0.0003922324510932106, "loss": 0.3873, "step": 279430 }, { "epoch": 80.39125431530495, "grad_norm": 1.3564159870147705, "learning_rate": 0.00039217491369390105, "loss": 0.2849, "step": 279440 }, { "epoch": 80.39413118527042, "grad_norm": 1.0727890729904175, "learning_rate": 0.0003921173762945915, "loss": 0.3427, "step": 279450 }, { "epoch": 80.3970080552359, "grad_norm": 1.0688749551773071, "learning_rate": 0.00039205983889528196, "loss": 0.3318, "step": 279460 }, { "epoch": 80.39988492520138, "grad_norm": 2.668050765991211, "learning_rate": 0.00039200230149597236, "loss": 0.3663, "step": 279470 }, { "epoch": 80.40276179516685, "grad_norm": 0.8882501125335693, "learning_rate": 0.00039194476409666287, "loss": 0.3572, "step": 279480 }, { "epoch": 80.40563866513233, "grad_norm": 1.4379130601882935, "learning_rate": 0.00039188722669735327, "loss": 0.3171, "step": 279490 }, { "epoch": 80.40851553509782, "grad_norm": 1.2615771293640137, "learning_rate": 0.0003918296892980437, "loss": 0.3096, "step": 279500 }, { "epoch": 80.4113924050633, "grad_norm": 0.886115312576294, "learning_rate": 0.0003917721518987342, "loss": 0.3529, "step": 279510 }, { "epoch": 80.41426927502877, "grad_norm": 1.0093352794647217, "learning_rate": 0.00039171461449942464, "loss": 0.3529, "step": 279520 }, { "epoch": 80.41714614499425, "grad_norm": 1.16233491897583, "learning_rate": 0.00039165707710011504, "loss": 0.3506, "step": 279530 }, { "epoch": 80.42002301495972, "grad_norm": 0.9607611298561096, "learning_rate": 0.00039159953970080555, "loss": 0.3394, "step": 279540 }, { "epoch": 80.4228998849252, "grad_norm": 0.9855663776397705, "learning_rate": 0.000391542002301496, "loss": 0.3938, "step": 279550 }, { "epoch": 80.42577675489068, "grad_norm": 1.76327645778656, "learning_rate": 0.0003914844649021864, "loss": 0.2763, "step": 279560 }, { "epoch": 80.42865362485615, "grad_norm": 0.6713857650756836, "learning_rate": 0.0003914269275028769, "loss": 0.3765, "step": 279570 }, { "epoch": 80.43153049482163, "grad_norm": 1.763370156288147, "learning_rate": 0.0003913693901035673, "loss": 0.3664, "step": 279580 }, { "epoch": 80.4344073647871, "grad_norm": 2.466172218322754, "learning_rate": 0.00039131185270425777, "loss": 0.4307, "step": 279590 }, { "epoch": 80.4372842347526, "grad_norm": 1.4507884979248047, "learning_rate": 0.0003912543153049482, "loss": 0.3784, "step": 279600 }, { "epoch": 80.44016110471807, "grad_norm": 1.6320098638534546, "learning_rate": 0.0003911967779056387, "loss": 0.3852, "step": 279610 }, { "epoch": 80.44303797468355, "grad_norm": 1.0723031759262085, "learning_rate": 0.0003911392405063291, "loss": 0.3899, "step": 279620 }, { "epoch": 80.44591484464902, "grad_norm": 1.2876179218292236, "learning_rate": 0.0003910817031070196, "loss": 0.3536, "step": 279630 }, { "epoch": 80.4487917146145, "grad_norm": 0.8736258745193481, "learning_rate": 0.00039102416570771005, "loss": 0.3162, "step": 279640 }, { "epoch": 80.45166858457998, "grad_norm": 1.9837045669555664, "learning_rate": 0.00039096662830840045, "loss": 0.3716, "step": 279650 }, { "epoch": 80.45454545454545, "grad_norm": 1.0270787477493286, "learning_rate": 0.00039090909090909096, "loss": 0.4127, "step": 279660 }, { "epoch": 80.45742232451093, "grad_norm": 2.2530596256256104, "learning_rate": 0.00039085155350978136, "loss": 0.3819, "step": 279670 }, { "epoch": 80.4602991944764, "grad_norm": 1.1476858854293823, "learning_rate": 0.0003907940161104718, "loss": 0.3113, "step": 279680 }, { "epoch": 80.46317606444188, "grad_norm": 2.3375630378723145, "learning_rate": 0.00039073647871116227, "loss": 0.3599, "step": 279690 }, { "epoch": 80.46605293440736, "grad_norm": 0.9427737593650818, "learning_rate": 0.0003906789413118527, "loss": 0.3123, "step": 279700 }, { "epoch": 80.46892980437285, "grad_norm": 0.8909839391708374, "learning_rate": 0.00039062140391254313, "loss": 0.394, "step": 279710 }, { "epoch": 80.47180667433832, "grad_norm": 0.9971868991851807, "learning_rate": 0.00039056386651323364, "loss": 0.3359, "step": 279720 }, { "epoch": 80.4746835443038, "grad_norm": 1.8093290328979492, "learning_rate": 0.0003905063291139241, "loss": 0.3382, "step": 279730 }, { "epoch": 80.47756041426928, "grad_norm": 1.2118160724639893, "learning_rate": 0.0003904487917146145, "loss": 0.405, "step": 279740 }, { "epoch": 80.48043728423475, "grad_norm": 1.8806426525115967, "learning_rate": 0.000390391254315305, "loss": 0.37, "step": 279750 }, { "epoch": 80.48331415420023, "grad_norm": 2.1667585372924805, "learning_rate": 0.0003903337169159954, "loss": 0.4669, "step": 279760 }, { "epoch": 80.4861910241657, "grad_norm": 0.7571621537208557, "learning_rate": 0.00039027617951668586, "loss": 0.3412, "step": 279770 }, { "epoch": 80.48906789413118, "grad_norm": 1.5606467723846436, "learning_rate": 0.00039021864211737626, "loss": 0.4338, "step": 279780 }, { "epoch": 80.49194476409666, "grad_norm": 0.898669421672821, "learning_rate": 0.00039016110471806677, "loss": 0.3495, "step": 279790 }, { "epoch": 80.49482163406213, "grad_norm": 1.0900609493255615, "learning_rate": 0.0003901035673187572, "loss": 0.3535, "step": 279800 }, { "epoch": 80.49769850402762, "grad_norm": 0.9249295592308044, "learning_rate": 0.00039004602991944763, "loss": 0.3552, "step": 279810 }, { "epoch": 80.5005753739931, "grad_norm": 1.3142669200897217, "learning_rate": 0.0003899884925201381, "loss": 0.4267, "step": 279820 }, { "epoch": 80.50345224395858, "grad_norm": 1.0617825984954834, "learning_rate": 0.00038993095512082854, "loss": 0.2959, "step": 279830 }, { "epoch": 80.50632911392405, "grad_norm": 1.00859534740448, "learning_rate": 0.000389873417721519, "loss": 0.3205, "step": 279840 }, { "epoch": 80.50920598388953, "grad_norm": 1.029807686805725, "learning_rate": 0.00038981588032220945, "loss": 0.3485, "step": 279850 }, { "epoch": 80.512082853855, "grad_norm": 0.9841498136520386, "learning_rate": 0.0003897583429228999, "loss": 0.3953, "step": 279860 }, { "epoch": 80.51495972382048, "grad_norm": 1.0523806810379028, "learning_rate": 0.0003897008055235903, "loss": 0.3178, "step": 279870 }, { "epoch": 80.51783659378596, "grad_norm": 1.9213117361068726, "learning_rate": 0.0003896432681242808, "loss": 0.3102, "step": 279880 }, { "epoch": 80.52071346375143, "grad_norm": 1.583924651145935, "learning_rate": 0.0003895857307249712, "loss": 0.309, "step": 279890 }, { "epoch": 80.52359033371691, "grad_norm": 0.914112389087677, "learning_rate": 0.0003895281933256617, "loss": 0.3567, "step": 279900 }, { "epoch": 80.52646720368239, "grad_norm": 0.8469533920288086, "learning_rate": 0.00038947065592635213, "loss": 0.3201, "step": 279910 }, { "epoch": 80.52934407364788, "grad_norm": 1.2755348682403564, "learning_rate": 0.0003894131185270426, "loss": 0.3464, "step": 279920 }, { "epoch": 80.53222094361335, "grad_norm": 0.9761792421340942, "learning_rate": 0.00038935558112773304, "loss": 0.3176, "step": 279930 }, { "epoch": 80.53509781357883, "grad_norm": 1.098271369934082, "learning_rate": 0.0003892980437284235, "loss": 0.343, "step": 279940 }, { "epoch": 80.5379746835443, "grad_norm": 1.8022598028182983, "learning_rate": 0.00038924050632911395, "loss": 0.3651, "step": 279950 }, { "epoch": 80.54085155350978, "grad_norm": 0.7092345356941223, "learning_rate": 0.00038918296892980435, "loss": 0.3463, "step": 279960 }, { "epoch": 80.54372842347526, "grad_norm": 1.1228796243667603, "learning_rate": 0.00038912543153049486, "loss": 0.3795, "step": 279970 }, { "epoch": 80.54660529344073, "grad_norm": 0.8912434577941895, "learning_rate": 0.00038906789413118526, "loss": 0.3292, "step": 279980 }, { "epoch": 80.54948216340621, "grad_norm": 1.165989637374878, "learning_rate": 0.0003890103567318757, "loss": 0.3793, "step": 279990 }, { "epoch": 80.55235903337169, "grad_norm": 0.9768196940422058, "learning_rate": 0.0003889528193325662, "loss": 0.3331, "step": 280000 }, { "epoch": 80.55523590333716, "grad_norm": 1.5088194608688354, "learning_rate": 0.00038889528193325663, "loss": 0.3381, "step": 280010 }, { "epoch": 80.55811277330265, "grad_norm": 1.2508224248886108, "learning_rate": 0.00038883774453394703, "loss": 0.3306, "step": 280020 }, { "epoch": 80.56098964326813, "grad_norm": 0.5535815358161926, "learning_rate": 0.00038878020713463754, "loss": 0.3515, "step": 280030 }, { "epoch": 80.5638665132336, "grad_norm": 0.8639653325080872, "learning_rate": 0.000388722669735328, "loss": 0.3315, "step": 280040 }, { "epoch": 80.56674338319908, "grad_norm": 1.2972906827926636, "learning_rate": 0.0003886651323360184, "loss": 0.3209, "step": 280050 }, { "epoch": 80.56962025316456, "grad_norm": 1.0426214933395386, "learning_rate": 0.0003886075949367089, "loss": 0.412, "step": 280060 }, { "epoch": 80.57249712313003, "grad_norm": 1.0027753114700317, "learning_rate": 0.0003885500575373993, "loss": 0.2738, "step": 280070 }, { "epoch": 80.57537399309551, "grad_norm": 1.1005369424819946, "learning_rate": 0.00038849252013808976, "loss": 0.3328, "step": 280080 }, { "epoch": 80.57825086306099, "grad_norm": 0.7824742197990417, "learning_rate": 0.0003884349827387802, "loss": 0.314, "step": 280090 }, { "epoch": 80.58112773302646, "grad_norm": 1.7848150730133057, "learning_rate": 0.0003883774453394707, "loss": 0.3912, "step": 280100 }, { "epoch": 80.58400460299194, "grad_norm": 2.369267702102661, "learning_rate": 0.0003883199079401611, "loss": 0.4027, "step": 280110 }, { "epoch": 80.58688147295742, "grad_norm": 1.4811383485794067, "learning_rate": 0.0003882623705408516, "loss": 0.3515, "step": 280120 }, { "epoch": 80.5897583429229, "grad_norm": 2.229923725128174, "learning_rate": 0.00038820483314154204, "loss": 0.3874, "step": 280130 }, { "epoch": 80.59263521288838, "grad_norm": 1.1978123188018799, "learning_rate": 0.00038814729574223244, "loss": 0.3872, "step": 280140 }, { "epoch": 80.59551208285386, "grad_norm": 1.8478894233703613, "learning_rate": 0.00038808975834292295, "loss": 0.3893, "step": 280150 }, { "epoch": 80.59838895281933, "grad_norm": 0.6773821115493774, "learning_rate": 0.00038803222094361335, "loss": 0.3235, "step": 280160 }, { "epoch": 80.60126582278481, "grad_norm": 2.2720553874969482, "learning_rate": 0.0003879746835443038, "loss": 0.4072, "step": 280170 }, { "epoch": 80.60414269275029, "grad_norm": 1.2085576057434082, "learning_rate": 0.0003879171461449942, "loss": 0.3841, "step": 280180 }, { "epoch": 80.60701956271576, "grad_norm": 1.0435608625411987, "learning_rate": 0.0003878596087456847, "loss": 0.3271, "step": 280190 }, { "epoch": 80.60989643268124, "grad_norm": 1.1175727844238281, "learning_rate": 0.0003878020713463751, "loss": 0.3036, "step": 280200 }, { "epoch": 80.61277330264672, "grad_norm": 1.0669569969177246, "learning_rate": 0.0003877445339470656, "loss": 0.4034, "step": 280210 }, { "epoch": 80.61565017261219, "grad_norm": 1.7461715936660767, "learning_rate": 0.0003876869965477561, "loss": 0.3639, "step": 280220 }, { "epoch": 80.61852704257768, "grad_norm": 1.7057666778564453, "learning_rate": 0.0003876294591484465, "loss": 0.3513, "step": 280230 }, { "epoch": 80.62140391254316, "grad_norm": 2.026287794113159, "learning_rate": 0.00038757192174913694, "loss": 0.4071, "step": 280240 }, { "epoch": 80.62428078250863, "grad_norm": 1.8688313961029053, "learning_rate": 0.0003875143843498274, "loss": 0.2845, "step": 280250 }, { "epoch": 80.62715765247411, "grad_norm": 1.4027029275894165, "learning_rate": 0.00038745684695051786, "loss": 0.3648, "step": 280260 }, { "epoch": 80.63003452243959, "grad_norm": 0.8937132954597473, "learning_rate": 0.00038739930955120826, "loss": 0.3393, "step": 280270 }, { "epoch": 80.63291139240506, "grad_norm": 1.0141611099243164, "learning_rate": 0.00038734177215189877, "loss": 0.3582, "step": 280280 }, { "epoch": 80.63578826237054, "grad_norm": 1.4428889751434326, "learning_rate": 0.00038728423475258917, "loss": 0.3109, "step": 280290 }, { "epoch": 80.63866513233602, "grad_norm": 1.2103996276855469, "learning_rate": 0.0003872266973532796, "loss": 0.4174, "step": 280300 }, { "epoch": 80.64154200230149, "grad_norm": 1.229605793952942, "learning_rate": 0.0003871691599539701, "loss": 0.3243, "step": 280310 }, { "epoch": 80.64441887226697, "grad_norm": 1.8311779499053955, "learning_rate": 0.00038711162255466053, "loss": 0.3372, "step": 280320 }, { "epoch": 80.64729574223244, "grad_norm": 0.9047578573226929, "learning_rate": 0.000387054085155351, "loss": 0.3222, "step": 280330 }, { "epoch": 80.65017261219793, "grad_norm": 0.9213146567344666, "learning_rate": 0.00038699654775604144, "loss": 0.3023, "step": 280340 }, { "epoch": 80.65304948216341, "grad_norm": 1.3000719547271729, "learning_rate": 0.0003869390103567319, "loss": 0.4054, "step": 280350 }, { "epoch": 80.65592635212889, "grad_norm": 2.454871416091919, "learning_rate": 0.0003868814729574223, "loss": 0.3952, "step": 280360 }, { "epoch": 80.65880322209436, "grad_norm": 1.2735782861709595, "learning_rate": 0.0003868239355581128, "loss": 0.3427, "step": 280370 }, { "epoch": 80.66168009205984, "grad_norm": 1.4256068468093872, "learning_rate": 0.0003867663981588032, "loss": 0.3544, "step": 280380 }, { "epoch": 80.66455696202532, "grad_norm": 2.9489362239837646, "learning_rate": 0.00038670886075949367, "loss": 0.381, "step": 280390 }, { "epoch": 80.66743383199079, "grad_norm": 1.3057738542556763, "learning_rate": 0.0003866513233601841, "loss": 0.3314, "step": 280400 }, { "epoch": 80.67031070195627, "grad_norm": 0.9891929030418396, "learning_rate": 0.0003865937859608746, "loss": 0.3143, "step": 280410 }, { "epoch": 80.67318757192174, "grad_norm": 1.0688953399658203, "learning_rate": 0.00038653624856156503, "loss": 0.3722, "step": 280420 }, { "epoch": 80.67606444188722, "grad_norm": 0.9478650689125061, "learning_rate": 0.0003864787111622555, "loss": 0.3555, "step": 280430 }, { "epoch": 80.67894131185271, "grad_norm": 1.3795346021652222, "learning_rate": 0.00038642117376294595, "loss": 0.543, "step": 280440 }, { "epoch": 80.68181818181819, "grad_norm": 1.2361774444580078, "learning_rate": 0.00038636363636363635, "loss": 0.3178, "step": 280450 }, { "epoch": 80.68469505178366, "grad_norm": 1.5482341051101685, "learning_rate": 0.00038630609896432686, "loss": 0.294, "step": 280460 }, { "epoch": 80.68757192174914, "grad_norm": 2.1386823654174805, "learning_rate": 0.00038624856156501726, "loss": 0.4313, "step": 280470 }, { "epoch": 80.69044879171462, "grad_norm": 1.5732520818710327, "learning_rate": 0.0003861910241657077, "loss": 0.3169, "step": 280480 }, { "epoch": 80.69332566168009, "grad_norm": 1.8125252723693848, "learning_rate": 0.00038613348676639817, "loss": 0.3842, "step": 280490 }, { "epoch": 80.69620253164557, "grad_norm": 1.2718244791030884, "learning_rate": 0.0003860759493670886, "loss": 0.3987, "step": 280500 }, { "epoch": 80.69907940161104, "grad_norm": 1.4033535718917847, "learning_rate": 0.000386018411967779, "loss": 0.422, "step": 280510 }, { "epoch": 80.70195627157652, "grad_norm": 1.9233744144439697, "learning_rate": 0.00038596087456846954, "loss": 0.4235, "step": 280520 }, { "epoch": 80.704833141542, "grad_norm": 0.9843934178352356, "learning_rate": 0.00038590333716916, "loss": 0.282, "step": 280530 }, { "epoch": 80.70771001150747, "grad_norm": 0.6888728737831116, "learning_rate": 0.0003858457997698504, "loss": 0.3904, "step": 280540 }, { "epoch": 80.71058688147296, "grad_norm": 1.213219404220581, "learning_rate": 0.0003857882623705409, "loss": 0.3039, "step": 280550 }, { "epoch": 80.71346375143844, "grad_norm": 0.8649118542671204, "learning_rate": 0.0003857307249712313, "loss": 0.2925, "step": 280560 }, { "epoch": 80.71634062140392, "grad_norm": 0.9688210487365723, "learning_rate": 0.00038567318757192176, "loss": 0.2936, "step": 280570 }, { "epoch": 80.71921749136939, "grad_norm": 0.9668636322021484, "learning_rate": 0.00038561565017261216, "loss": 0.3974, "step": 280580 }, { "epoch": 80.72209436133487, "grad_norm": 1.3392469882965088, "learning_rate": 0.00038555811277330267, "loss": 0.3659, "step": 280590 }, { "epoch": 80.72497123130034, "grad_norm": 3.2758724689483643, "learning_rate": 0.00038550057537399307, "loss": 0.3499, "step": 280600 }, { "epoch": 80.72784810126582, "grad_norm": 1.2514628171920776, "learning_rate": 0.0003854430379746835, "loss": 0.328, "step": 280610 }, { "epoch": 80.7307249712313, "grad_norm": 0.9512790441513062, "learning_rate": 0.00038538550057537404, "loss": 0.3475, "step": 280620 }, { "epoch": 80.73360184119677, "grad_norm": 0.5463466048240662, "learning_rate": 0.00038532796317606444, "loss": 0.376, "step": 280630 }, { "epoch": 80.73647871116225, "grad_norm": 1.039745807647705, "learning_rate": 0.0003852704257767549, "loss": 0.3724, "step": 280640 }, { "epoch": 80.73935558112774, "grad_norm": 1.0962074995040894, "learning_rate": 0.00038521288837744535, "loss": 0.3271, "step": 280650 }, { "epoch": 80.74223245109322, "grad_norm": 0.8732842803001404, "learning_rate": 0.0003851553509781358, "loss": 0.32, "step": 280660 }, { "epoch": 80.74510932105869, "grad_norm": 1.2381640672683716, "learning_rate": 0.0003850978135788262, "loss": 0.3706, "step": 280670 }, { "epoch": 80.74798619102417, "grad_norm": 1.6324763298034668, "learning_rate": 0.0003850402761795167, "loss": 0.3643, "step": 280680 }, { "epoch": 80.75086306098964, "grad_norm": 0.953526496887207, "learning_rate": 0.0003849827387802071, "loss": 0.4968, "step": 280690 }, { "epoch": 80.75373993095512, "grad_norm": 0.9499760866165161, "learning_rate": 0.00038492520138089757, "loss": 0.3953, "step": 280700 }, { "epoch": 80.7566168009206, "grad_norm": 0.9760888814926147, "learning_rate": 0.0003848676639815881, "loss": 0.3723, "step": 280710 }, { "epoch": 80.75949367088607, "grad_norm": 1.4451653957366943, "learning_rate": 0.0003848101265822785, "loss": 0.3183, "step": 280720 }, { "epoch": 80.76237054085155, "grad_norm": 1.3323830366134644, "learning_rate": 0.00038475258918296894, "loss": 0.3793, "step": 280730 }, { "epoch": 80.76524741081703, "grad_norm": 1.397811770439148, "learning_rate": 0.0003846950517836594, "loss": 0.3024, "step": 280740 }, { "epoch": 80.7681242807825, "grad_norm": 1.0606498718261719, "learning_rate": 0.00038463751438434985, "loss": 0.4021, "step": 280750 }, { "epoch": 80.77100115074799, "grad_norm": 1.3569124937057495, "learning_rate": 0.00038457997698504025, "loss": 0.321, "step": 280760 }, { "epoch": 80.77387802071347, "grad_norm": 3.0798332691192627, "learning_rate": 0.00038452243958573076, "loss": 0.3919, "step": 280770 }, { "epoch": 80.77675489067894, "grad_norm": 1.4064407348632812, "learning_rate": 0.00038446490218642116, "loss": 0.3065, "step": 280780 }, { "epoch": 80.77963176064442, "grad_norm": 0.865540087223053, "learning_rate": 0.0003844073647871116, "loss": 0.3545, "step": 280790 }, { "epoch": 80.7825086306099, "grad_norm": 0.8713932633399963, "learning_rate": 0.00038434982738780207, "loss": 0.3959, "step": 280800 }, { "epoch": 80.78538550057537, "grad_norm": 1.8726282119750977, "learning_rate": 0.00038429228998849253, "loss": 0.3986, "step": 280810 }, { "epoch": 80.78826237054085, "grad_norm": 1.360418438911438, "learning_rate": 0.000384234752589183, "loss": 0.3853, "step": 280820 }, { "epoch": 80.79113924050633, "grad_norm": 1.2895199060440063, "learning_rate": 0.00038417721518987344, "loss": 0.3327, "step": 280830 }, { "epoch": 80.7940161104718, "grad_norm": 1.2563564777374268, "learning_rate": 0.0003841196777905639, "loss": 0.4083, "step": 280840 }, { "epoch": 80.79689298043728, "grad_norm": 0.6667423844337463, "learning_rate": 0.0003840621403912543, "loss": 0.3612, "step": 280850 }, { "epoch": 80.79976985040277, "grad_norm": 0.6519535183906555, "learning_rate": 0.0003840046029919448, "loss": 0.3078, "step": 280860 }, { "epoch": 80.80264672036824, "grad_norm": 1.0152956247329712, "learning_rate": 0.0003839470655926352, "loss": 0.3351, "step": 280870 }, { "epoch": 80.80552359033372, "grad_norm": 2.159998655319214, "learning_rate": 0.00038388952819332566, "loss": 0.4132, "step": 280880 }, { "epoch": 80.8084004602992, "grad_norm": 0.8597202897071838, "learning_rate": 0.0003838319907940161, "loss": 0.2984, "step": 280890 }, { "epoch": 80.81127733026467, "grad_norm": 1.162033200263977, "learning_rate": 0.0003837744533947066, "loss": 0.3619, "step": 280900 }, { "epoch": 80.81415420023015, "grad_norm": 1.2240657806396484, "learning_rate": 0.00038371691599539703, "loss": 0.3069, "step": 280910 }, { "epoch": 80.81703107019563, "grad_norm": 1.3498852252960205, "learning_rate": 0.0003836593785960875, "loss": 0.3022, "step": 280920 }, { "epoch": 80.8199079401611, "grad_norm": 1.8092814683914185, "learning_rate": 0.00038360184119677794, "loss": 0.3505, "step": 280930 }, { "epoch": 80.82278481012658, "grad_norm": 1.5266457796096802, "learning_rate": 0.00038354430379746834, "loss": 0.2919, "step": 280940 }, { "epoch": 80.82566168009205, "grad_norm": 0.8178979754447937, "learning_rate": 0.00038348676639815885, "loss": 0.3241, "step": 280950 }, { "epoch": 80.82853855005754, "grad_norm": 1.1651127338409424, "learning_rate": 0.00038342922899884925, "loss": 0.3358, "step": 280960 }, { "epoch": 80.83141542002302, "grad_norm": 0.7735565900802612, "learning_rate": 0.0003833716915995397, "loss": 0.3884, "step": 280970 }, { "epoch": 80.8342922899885, "grad_norm": 1.2997690439224243, "learning_rate": 0.0003833141542002301, "loss": 0.3792, "step": 280980 }, { "epoch": 80.83716915995397, "grad_norm": 1.4338921308517456, "learning_rate": 0.0003832566168009206, "loss": 0.3859, "step": 280990 }, { "epoch": 80.84004602991945, "grad_norm": 0.7568663954734802, "learning_rate": 0.000383199079401611, "loss": 0.3152, "step": 281000 }, { "epoch": 80.84292289988493, "grad_norm": 1.1097697019577026, "learning_rate": 0.0003831415420023015, "loss": 0.392, "step": 281010 }, { "epoch": 80.8457997698504, "grad_norm": 0.9347650408744812, "learning_rate": 0.000383084004602992, "loss": 0.4098, "step": 281020 }, { "epoch": 80.84867663981588, "grad_norm": 1.0300787687301636, "learning_rate": 0.0003830264672036824, "loss": 0.3615, "step": 281030 }, { "epoch": 80.85155350978135, "grad_norm": 1.2370402812957764, "learning_rate": 0.00038296892980437284, "loss": 0.3392, "step": 281040 }, { "epoch": 80.85443037974683, "grad_norm": 2.207839250564575, "learning_rate": 0.0003829113924050633, "loss": 0.4599, "step": 281050 }, { "epoch": 80.8573072497123, "grad_norm": 0.9343259334564209, "learning_rate": 0.00038285385500575375, "loss": 0.4575, "step": 281060 }, { "epoch": 80.8601841196778, "grad_norm": 0.866000771522522, "learning_rate": 0.00038279631760644415, "loss": 0.3424, "step": 281070 }, { "epoch": 80.86306098964327, "grad_norm": 0.7992457151412964, "learning_rate": 0.00038273878020713466, "loss": 0.3748, "step": 281080 }, { "epoch": 80.86593785960875, "grad_norm": 1.3606715202331543, "learning_rate": 0.00038268124280782507, "loss": 0.3062, "step": 281090 }, { "epoch": 80.86881472957423, "grad_norm": 2.832404851913452, "learning_rate": 0.0003826237054085155, "loss": 0.3683, "step": 281100 }, { "epoch": 80.8716915995397, "grad_norm": 1.1920835971832275, "learning_rate": 0.00038256616800920603, "loss": 0.2922, "step": 281110 }, { "epoch": 80.87456846950518, "grad_norm": 0.841789186000824, "learning_rate": 0.00038250863060989643, "loss": 0.3448, "step": 281120 }, { "epoch": 80.87744533947065, "grad_norm": 0.823671281337738, "learning_rate": 0.0003824510932105869, "loss": 0.4194, "step": 281130 }, { "epoch": 80.88032220943613, "grad_norm": 1.1410986185073853, "learning_rate": 0.00038239355581127734, "loss": 0.31, "step": 281140 }, { "epoch": 80.8831990794016, "grad_norm": 1.2604365348815918, "learning_rate": 0.0003823360184119678, "loss": 0.3094, "step": 281150 }, { "epoch": 80.88607594936708, "grad_norm": 1.6201690435409546, "learning_rate": 0.0003822784810126582, "loss": 0.3861, "step": 281160 }, { "epoch": 80.88895281933257, "grad_norm": 2.37937068939209, "learning_rate": 0.0003822209436133487, "loss": 0.3871, "step": 281170 }, { "epoch": 80.89182968929805, "grad_norm": 1.340641736984253, "learning_rate": 0.0003821634062140391, "loss": 0.3108, "step": 281180 }, { "epoch": 80.89470655926353, "grad_norm": 1.2715139389038086, "learning_rate": 0.00038210586881472957, "loss": 0.3359, "step": 281190 }, { "epoch": 80.897583429229, "grad_norm": 1.5367980003356934, "learning_rate": 0.0003820483314154201, "loss": 0.3601, "step": 281200 }, { "epoch": 80.90046029919448, "grad_norm": 0.7771279215812683, "learning_rate": 0.0003819907940161105, "loss": 0.3161, "step": 281210 }, { "epoch": 80.90333716915995, "grad_norm": 1.858001708984375, "learning_rate": 0.00038193325661680093, "loss": 0.3558, "step": 281220 }, { "epoch": 80.90621403912543, "grad_norm": 1.3251880407333374, "learning_rate": 0.0003818757192174914, "loss": 0.4508, "step": 281230 }, { "epoch": 80.9090909090909, "grad_norm": 0.9011223316192627, "learning_rate": 0.00038181818181818184, "loss": 0.3775, "step": 281240 }, { "epoch": 80.91196777905638, "grad_norm": 1.5112065076828003, "learning_rate": 0.00038176064441887225, "loss": 0.4087, "step": 281250 }, { "epoch": 80.91484464902186, "grad_norm": 1.1059452295303345, "learning_rate": 0.00038170310701956275, "loss": 0.4083, "step": 281260 }, { "epoch": 80.91772151898734, "grad_norm": 1.1321266889572144, "learning_rate": 0.00038164556962025316, "loss": 0.3102, "step": 281270 }, { "epoch": 80.92059838895283, "grad_norm": 2.359388589859009, "learning_rate": 0.0003815880322209436, "loss": 0.4202, "step": 281280 }, { "epoch": 80.9234752589183, "grad_norm": 1.8103986978530884, "learning_rate": 0.00038153049482163407, "loss": 0.364, "step": 281290 }, { "epoch": 80.92635212888378, "grad_norm": 1.6490919589996338, "learning_rate": 0.0003814729574223245, "loss": 0.315, "step": 281300 }, { "epoch": 80.92922899884925, "grad_norm": 1.2983050346374512, "learning_rate": 0.000381415420023015, "loss": 0.427, "step": 281310 }, { "epoch": 80.93210586881473, "grad_norm": 1.415027379989624, "learning_rate": 0.00038135788262370543, "loss": 0.3601, "step": 281320 }, { "epoch": 80.9349827387802, "grad_norm": 0.8373035192489624, "learning_rate": 0.0003813003452243959, "loss": 0.4476, "step": 281330 }, { "epoch": 80.93785960874568, "grad_norm": 1.3639564514160156, "learning_rate": 0.0003812428078250863, "loss": 0.4513, "step": 281340 }, { "epoch": 80.94073647871116, "grad_norm": 1.6940953731536865, "learning_rate": 0.0003811852704257768, "loss": 0.383, "step": 281350 }, { "epoch": 80.94361334867664, "grad_norm": 1.3296786546707153, "learning_rate": 0.0003811277330264672, "loss": 0.3509, "step": 281360 }, { "epoch": 80.94649021864211, "grad_norm": 2.240902900695801, "learning_rate": 0.00038107019562715766, "loss": 0.332, "step": 281370 }, { "epoch": 80.9493670886076, "grad_norm": 2.3872013092041016, "learning_rate": 0.0003810126582278481, "loss": 0.3879, "step": 281380 }, { "epoch": 80.95224395857308, "grad_norm": 1.4386943578720093, "learning_rate": 0.00038095512082853857, "loss": 0.344, "step": 281390 }, { "epoch": 80.95512082853855, "grad_norm": 1.0287610292434692, "learning_rate": 0.000380897583429229, "loss": 0.3568, "step": 281400 }, { "epoch": 80.95799769850403, "grad_norm": 0.7633845210075378, "learning_rate": 0.0003808400460299194, "loss": 0.3283, "step": 281410 }, { "epoch": 80.9608745684695, "grad_norm": 1.2613611221313477, "learning_rate": 0.00038078250863060993, "loss": 0.3744, "step": 281420 }, { "epoch": 80.96375143843498, "grad_norm": 0.8383964896202087, "learning_rate": 0.00038072497123130034, "loss": 0.3821, "step": 281430 }, { "epoch": 80.96662830840046, "grad_norm": 2.049636125564575, "learning_rate": 0.0003806674338319908, "loss": 0.5019, "step": 281440 }, { "epoch": 80.96950517836594, "grad_norm": 1.2739049196243286, "learning_rate": 0.00038060989643268125, "loss": 0.4161, "step": 281450 }, { "epoch": 80.97238204833141, "grad_norm": 0.9132530093193054, "learning_rate": 0.0003805523590333717, "loss": 0.3773, "step": 281460 }, { "epoch": 80.97525891829689, "grad_norm": 1.2223618030548096, "learning_rate": 0.0003804948216340621, "loss": 0.3117, "step": 281470 }, { "epoch": 80.97813578826236, "grad_norm": 1.0075101852416992, "learning_rate": 0.0003804372842347526, "loss": 0.2889, "step": 281480 }, { "epoch": 80.98101265822785, "grad_norm": 1.8253527879714966, "learning_rate": 0.000380379746835443, "loss": 0.4186, "step": 281490 }, { "epoch": 80.98388952819333, "grad_norm": 1.6844072341918945, "learning_rate": 0.00038032220943613347, "loss": 0.5574, "step": 281500 }, { "epoch": 80.9867663981588, "grad_norm": 2.3007633686065674, "learning_rate": 0.000380264672036824, "loss": 0.3708, "step": 281510 }, { "epoch": 80.98964326812428, "grad_norm": 0.65743088722229, "learning_rate": 0.0003802071346375144, "loss": 0.3975, "step": 281520 }, { "epoch": 80.99252013808976, "grad_norm": 1.3321415185928345, "learning_rate": 0.00038014959723820484, "loss": 0.369, "step": 281530 }, { "epoch": 80.99539700805524, "grad_norm": 1.9526569843292236, "learning_rate": 0.0003800920598388953, "loss": 0.3157, "step": 281540 }, { "epoch": 80.99827387802071, "grad_norm": 1.1572396755218506, "learning_rate": 0.00038003452243958575, "loss": 0.3555, "step": 281550 }, { "epoch": 81.00115074798619, "grad_norm": 1.0920253992080688, "learning_rate": 0.00037997698504027615, "loss": 0.2878, "step": 281560 }, { "epoch": 81.00402761795166, "grad_norm": 0.7993696928024292, "learning_rate": 0.00037991944764096666, "loss": 0.3403, "step": 281570 }, { "epoch": 81.00690448791714, "grad_norm": 1.497460126876831, "learning_rate": 0.00037986191024165706, "loss": 0.3768, "step": 281580 }, { "epoch": 81.00978135788263, "grad_norm": 1.041506290435791, "learning_rate": 0.0003798043728423475, "loss": 0.3432, "step": 281590 }, { "epoch": 81.0126582278481, "grad_norm": 1.352480173110962, "learning_rate": 0.000379746835443038, "loss": 0.4094, "step": 281600 }, { "epoch": 81.01553509781358, "grad_norm": 0.770788311958313, "learning_rate": 0.0003796892980437284, "loss": 0.3348, "step": 281610 }, { "epoch": 81.01841196777906, "grad_norm": 0.8665176630020142, "learning_rate": 0.0003796317606444189, "loss": 0.3433, "step": 281620 }, { "epoch": 81.02128883774454, "grad_norm": 0.9175428748130798, "learning_rate": 0.00037957422324510934, "loss": 0.3179, "step": 281630 }, { "epoch": 81.02416570771001, "grad_norm": 1.1587806940078735, "learning_rate": 0.0003795166858457998, "loss": 0.2742, "step": 281640 }, { "epoch": 81.02704257767549, "grad_norm": 0.9416159391403198, "learning_rate": 0.0003794591484464902, "loss": 0.2959, "step": 281650 }, { "epoch": 81.02991944764096, "grad_norm": 1.0581095218658447, "learning_rate": 0.0003794016110471807, "loss": 0.3126, "step": 281660 }, { "epoch": 81.03279631760644, "grad_norm": 0.924826443195343, "learning_rate": 0.0003793440736478711, "loss": 0.2741, "step": 281670 }, { "epoch": 81.03567318757192, "grad_norm": 7.801962852478027, "learning_rate": 0.00037928653624856156, "loss": 0.3062, "step": 281680 }, { "epoch": 81.03855005753739, "grad_norm": 1.1081414222717285, "learning_rate": 0.00037922899884925207, "loss": 0.3508, "step": 281690 }, { "epoch": 81.04142692750288, "grad_norm": 2.1224660873413086, "learning_rate": 0.00037917146144994247, "loss": 0.3685, "step": 281700 }, { "epoch": 81.04430379746836, "grad_norm": 1.4478949308395386, "learning_rate": 0.00037911392405063293, "loss": 0.3139, "step": 281710 }, { "epoch": 81.04718066743384, "grad_norm": 0.5526843667030334, "learning_rate": 0.0003790563866513234, "loss": 0.2888, "step": 281720 }, { "epoch": 81.05005753739931, "grad_norm": 0.7069319486618042, "learning_rate": 0.00037899884925201384, "loss": 0.3234, "step": 281730 }, { "epoch": 81.05293440736479, "grad_norm": 0.7514514923095703, "learning_rate": 0.00037894131185270424, "loss": 0.263, "step": 281740 }, { "epoch": 81.05581127733026, "grad_norm": 1.2553651332855225, "learning_rate": 0.00037888377445339475, "loss": 0.3143, "step": 281750 }, { "epoch": 81.05868814729574, "grad_norm": 1.0096901655197144, "learning_rate": 0.00037882623705408515, "loss": 0.2961, "step": 281760 }, { "epoch": 81.06156501726122, "grad_norm": 1.1244438886642456, "learning_rate": 0.0003787686996547756, "loss": 0.3267, "step": 281770 }, { "epoch": 81.06444188722669, "grad_norm": 2.0473687648773193, "learning_rate": 0.00037871116225546606, "loss": 0.4439, "step": 281780 }, { "epoch": 81.06731875719217, "grad_norm": 0.7018278241157532, "learning_rate": 0.0003786536248561565, "loss": 0.2689, "step": 281790 }, { "epoch": 81.07019562715766, "grad_norm": 0.9363600611686707, "learning_rate": 0.00037859608745684697, "loss": 0.2596, "step": 281800 }, { "epoch": 81.07307249712314, "grad_norm": 0.9954626560211182, "learning_rate": 0.0003785385500575374, "loss": 0.346, "step": 281810 }, { "epoch": 81.07594936708861, "grad_norm": 1.3041974306106567, "learning_rate": 0.0003784810126582279, "loss": 0.3817, "step": 281820 }, { "epoch": 81.07882623705409, "grad_norm": 0.9132481217384338, "learning_rate": 0.0003784234752589183, "loss": 0.2961, "step": 281830 }, { "epoch": 81.08170310701956, "grad_norm": 0.7327427864074707, "learning_rate": 0.00037836593785960874, "loss": 0.2958, "step": 281840 }, { "epoch": 81.08457997698504, "grad_norm": 1.3929802179336548, "learning_rate": 0.0003783084004602992, "loss": 0.3876, "step": 281850 }, { "epoch": 81.08745684695052, "grad_norm": 1.379032015800476, "learning_rate": 0.00037825086306098965, "loss": 0.2874, "step": 281860 }, { "epoch": 81.09033371691599, "grad_norm": 1.497816801071167, "learning_rate": 0.00037819332566168005, "loss": 0.3553, "step": 281870 }, { "epoch": 81.09321058688147, "grad_norm": 0.8372557759284973, "learning_rate": 0.00037813578826237056, "loss": 0.3483, "step": 281880 }, { "epoch": 81.09608745684694, "grad_norm": 1.0346872806549072, "learning_rate": 0.000378078250863061, "loss": 0.3679, "step": 281890 }, { "epoch": 81.09896432681242, "grad_norm": 0.6540685892105103, "learning_rate": 0.0003780207134637514, "loss": 0.2593, "step": 281900 }, { "epoch": 81.10184119677791, "grad_norm": 0.8671907186508179, "learning_rate": 0.00037796317606444193, "loss": 0.3105, "step": 281910 }, { "epoch": 81.10471806674339, "grad_norm": 0.784510612487793, "learning_rate": 0.00037790563866513233, "loss": 0.311, "step": 281920 }, { "epoch": 81.10759493670886, "grad_norm": 0.7194762229919434, "learning_rate": 0.0003778481012658228, "loss": 0.3725, "step": 281930 }, { "epoch": 81.11047180667434, "grad_norm": 2.0144522190093994, "learning_rate": 0.00037779056386651324, "loss": 0.3481, "step": 281940 }, { "epoch": 81.11334867663982, "grad_norm": 1.4707149267196655, "learning_rate": 0.0003777330264672037, "loss": 0.3455, "step": 281950 }, { "epoch": 81.11622554660529, "grad_norm": 4.0377726554870605, "learning_rate": 0.0003776754890678941, "loss": 0.3795, "step": 281960 }, { "epoch": 81.11910241657077, "grad_norm": 1.9016270637512207, "learning_rate": 0.0003776179516685846, "loss": 0.302, "step": 281970 }, { "epoch": 81.12197928653625, "grad_norm": 0.5323735475540161, "learning_rate": 0.000377560414269275, "loss": 0.3197, "step": 281980 }, { "epoch": 81.12485615650172, "grad_norm": 0.7409374117851257, "learning_rate": 0.00037750287686996546, "loss": 0.3502, "step": 281990 }, { "epoch": 81.1277330264672, "grad_norm": 0.8160988092422485, "learning_rate": 0.000377445339470656, "loss": 0.2788, "step": 282000 }, { "epoch": 81.13060989643269, "grad_norm": 1.7757542133331299, "learning_rate": 0.0003773878020713464, "loss": 0.327, "step": 282010 }, { "epoch": 81.13348676639816, "grad_norm": 0.8030636310577393, "learning_rate": 0.00037733026467203683, "loss": 0.2881, "step": 282020 }, { "epoch": 81.13636363636364, "grad_norm": 0.9872673153877258, "learning_rate": 0.0003772727272727273, "loss": 0.2581, "step": 282030 }, { "epoch": 81.13924050632912, "grad_norm": 1.0020617246627808, "learning_rate": 0.00037721518987341774, "loss": 0.3258, "step": 282040 }, { "epoch": 81.14211737629459, "grad_norm": 0.6143720746040344, "learning_rate": 0.00037715765247410814, "loss": 0.3398, "step": 282050 }, { "epoch": 81.14499424626007, "grad_norm": 1.1910064220428467, "learning_rate": 0.00037710011507479865, "loss": 0.3889, "step": 282060 }, { "epoch": 81.14787111622555, "grad_norm": 1.3858596086502075, "learning_rate": 0.00037704257767548905, "loss": 0.3139, "step": 282070 }, { "epoch": 81.15074798619102, "grad_norm": 2.4042139053344727, "learning_rate": 0.0003769850402761795, "loss": 0.3828, "step": 282080 }, { "epoch": 81.1536248561565, "grad_norm": 0.9829381108283997, "learning_rate": 0.00037692750287687, "loss": 0.2663, "step": 282090 }, { "epoch": 81.15650172612197, "grad_norm": 0.9193757176399231, "learning_rate": 0.0003768699654775604, "loss": 0.3578, "step": 282100 }, { "epoch": 81.15937859608745, "grad_norm": 2.7637546062469482, "learning_rate": 0.0003768124280782509, "loss": 0.4671, "step": 282110 }, { "epoch": 81.16225546605294, "grad_norm": 1.4865407943725586, "learning_rate": 0.00037675489067894133, "loss": 0.2956, "step": 282120 }, { "epoch": 81.16513233601842, "grad_norm": 0.7925471663475037, "learning_rate": 0.0003766973532796318, "loss": 0.3345, "step": 282130 }, { "epoch": 81.16800920598389, "grad_norm": 1.1375795602798462, "learning_rate": 0.0003766398158803222, "loss": 0.3198, "step": 282140 }, { "epoch": 81.17088607594937, "grad_norm": 1.6756632328033447, "learning_rate": 0.0003765822784810127, "loss": 0.3253, "step": 282150 }, { "epoch": 81.17376294591485, "grad_norm": 1.0371917486190796, "learning_rate": 0.0003765247410817031, "loss": 0.3981, "step": 282160 }, { "epoch": 81.17663981588032, "grad_norm": 1.8631623983383179, "learning_rate": 0.00037646720368239356, "loss": 0.3513, "step": 282170 }, { "epoch": 81.1795166858458, "grad_norm": 1.653234839439392, "learning_rate": 0.00037640966628308406, "loss": 0.3979, "step": 282180 }, { "epoch": 81.18239355581127, "grad_norm": 0.7529476284980774, "learning_rate": 0.00037635212888377447, "loss": 0.3342, "step": 282190 }, { "epoch": 81.18527042577675, "grad_norm": 0.673622190952301, "learning_rate": 0.0003762945914844649, "loss": 0.2595, "step": 282200 }, { "epoch": 81.18814729574223, "grad_norm": 1.6524244546890259, "learning_rate": 0.0003762370540851554, "loss": 0.3581, "step": 282210 }, { "epoch": 81.19102416570772, "grad_norm": 1.8913862705230713, "learning_rate": 0.00037617951668584583, "loss": 0.3752, "step": 282220 }, { "epoch": 81.19390103567319, "grad_norm": 1.1133068799972534, "learning_rate": 0.00037612197928653623, "loss": 0.3585, "step": 282230 }, { "epoch": 81.19677790563867, "grad_norm": 1.273897409439087, "learning_rate": 0.0003760644418872267, "loss": 0.3117, "step": 282240 }, { "epoch": 81.19965477560415, "grad_norm": 1.7331647872924805, "learning_rate": 0.00037600690448791714, "loss": 0.3713, "step": 282250 }, { "epoch": 81.20253164556962, "grad_norm": 1.0056589841842651, "learning_rate": 0.0003759493670886076, "loss": 0.372, "step": 282260 }, { "epoch": 81.2054085155351, "grad_norm": 1.2549558877944946, "learning_rate": 0.000375891829689298, "loss": 0.3212, "step": 282270 }, { "epoch": 81.20828538550057, "grad_norm": 0.7121963500976562, "learning_rate": 0.0003758342922899885, "loss": 0.2996, "step": 282280 }, { "epoch": 81.21116225546605, "grad_norm": 0.8299302458763123, "learning_rate": 0.00037577675489067897, "loss": 0.3248, "step": 282290 }, { "epoch": 81.21403912543153, "grad_norm": 1.3900716304779053, "learning_rate": 0.00037571921749136937, "loss": 0.3277, "step": 282300 }, { "epoch": 81.216915995397, "grad_norm": 1.222444772720337, "learning_rate": 0.0003756616800920599, "loss": 0.3327, "step": 282310 }, { "epoch": 81.21979286536248, "grad_norm": 0.9910092353820801, "learning_rate": 0.0003756041426927503, "loss": 0.2845, "step": 282320 }, { "epoch": 81.22266973532797, "grad_norm": 3.344542980194092, "learning_rate": 0.00037554660529344073, "loss": 0.3584, "step": 282330 }, { "epoch": 81.22554660529345, "grad_norm": 1.6986428499221802, "learning_rate": 0.0003754890678941312, "loss": 0.3012, "step": 282340 }, { "epoch": 81.22842347525892, "grad_norm": 1.4549580812454224, "learning_rate": 0.00037543153049482165, "loss": 0.3184, "step": 282350 }, { "epoch": 81.2313003452244, "grad_norm": 0.8292138576507568, "learning_rate": 0.00037537399309551205, "loss": 0.3328, "step": 282360 }, { "epoch": 81.23417721518987, "grad_norm": 1.7937732934951782, "learning_rate": 0.00037531645569620256, "loss": 0.3764, "step": 282370 }, { "epoch": 81.23705408515535, "grad_norm": 1.0908915996551514, "learning_rate": 0.000375258918296893, "loss": 0.2991, "step": 282380 }, { "epoch": 81.23993095512083, "grad_norm": 0.6493057608604431, "learning_rate": 0.0003752013808975834, "loss": 0.5045, "step": 282390 }, { "epoch": 81.2428078250863, "grad_norm": 1.4889618158340454, "learning_rate": 0.0003751438434982739, "loss": 0.3459, "step": 282400 }, { "epoch": 81.24568469505178, "grad_norm": 1.0982617139816284, "learning_rate": 0.0003750863060989643, "loss": 0.3333, "step": 282410 }, { "epoch": 81.24856156501725, "grad_norm": 0.7014755606651306, "learning_rate": 0.0003750287686996548, "loss": 0.3092, "step": 282420 }, { "epoch": 81.25143843498275, "grad_norm": 1.0689117908477783, "learning_rate": 0.00037497123130034524, "loss": 0.3661, "step": 282430 }, { "epoch": 81.25431530494822, "grad_norm": 1.5285491943359375, "learning_rate": 0.0003749136939010357, "loss": 0.3649, "step": 282440 }, { "epoch": 81.2571921749137, "grad_norm": 1.053728461265564, "learning_rate": 0.0003748561565017261, "loss": 0.2998, "step": 282450 }, { "epoch": 81.26006904487917, "grad_norm": 2.07496976852417, "learning_rate": 0.0003747986191024166, "loss": 0.391, "step": 282460 }, { "epoch": 81.26294591484465, "grad_norm": 1.42302668094635, "learning_rate": 0.000374741081703107, "loss": 0.3566, "step": 282470 }, { "epoch": 81.26582278481013, "grad_norm": 1.1920900344848633, "learning_rate": 0.00037468354430379746, "loss": 0.3173, "step": 282480 }, { "epoch": 81.2686996547756, "grad_norm": 1.5715357065200806, "learning_rate": 0.00037462600690448797, "loss": 0.281, "step": 282490 }, { "epoch": 81.27157652474108, "grad_norm": 1.52358078956604, "learning_rate": 0.00037456846950517837, "loss": 0.3303, "step": 282500 }, { "epoch": 81.27445339470655, "grad_norm": 0.7510954737663269, "learning_rate": 0.0003745109321058688, "loss": 0.347, "step": 282510 }, { "epoch": 81.27733026467203, "grad_norm": 1.7292057275772095, "learning_rate": 0.0003744533947065593, "loss": 0.4099, "step": 282520 }, { "epoch": 81.28020713463752, "grad_norm": 1.7696852684020996, "learning_rate": 0.00037439585730724974, "loss": 0.3902, "step": 282530 }, { "epoch": 81.283084004603, "grad_norm": 1.2443344593048096, "learning_rate": 0.00037433831990794014, "loss": 0.479, "step": 282540 }, { "epoch": 81.28596087456847, "grad_norm": 0.9739359021186829, "learning_rate": 0.00037428078250863065, "loss": 0.3079, "step": 282550 }, { "epoch": 81.28883774453395, "grad_norm": 1.7885587215423584, "learning_rate": 0.00037422324510932105, "loss": 0.2873, "step": 282560 }, { "epoch": 81.29171461449943, "grad_norm": 0.6536420583724976, "learning_rate": 0.0003741657077100115, "loss": 0.2727, "step": 282570 }, { "epoch": 81.2945914844649, "grad_norm": 1.5207425355911255, "learning_rate": 0.000374108170310702, "loss": 0.3772, "step": 282580 }, { "epoch": 81.29746835443038, "grad_norm": 0.9820449352264404, "learning_rate": 0.0003740506329113924, "loss": 0.3748, "step": 282590 }, { "epoch": 81.30034522439585, "grad_norm": 1.188086986541748, "learning_rate": 0.00037399309551208287, "loss": 0.3323, "step": 282600 }, { "epoch": 81.30322209436133, "grad_norm": 1.8682349920272827, "learning_rate": 0.0003739355581127733, "loss": 0.4308, "step": 282610 }, { "epoch": 81.30609896432681, "grad_norm": 1.256234884262085, "learning_rate": 0.0003738780207134638, "loss": 0.3207, "step": 282620 }, { "epoch": 81.30897583429228, "grad_norm": 1.5854016542434692, "learning_rate": 0.0003738204833141542, "loss": 0.3473, "step": 282630 }, { "epoch": 81.31185270425777, "grad_norm": 1.0401207208633423, "learning_rate": 0.00037376294591484464, "loss": 0.4032, "step": 282640 }, { "epoch": 81.31472957422325, "grad_norm": 1.059181809425354, "learning_rate": 0.0003737054085155351, "loss": 0.3943, "step": 282650 }, { "epoch": 81.31760644418873, "grad_norm": 0.8553271293640137, "learning_rate": 0.00037364787111622555, "loss": 0.3461, "step": 282660 }, { "epoch": 81.3204833141542, "grad_norm": 1.7109944820404053, "learning_rate": 0.00037359033371691595, "loss": 0.3337, "step": 282670 }, { "epoch": 81.32336018411968, "grad_norm": 1.1647593975067139, "learning_rate": 0.00037353279631760646, "loss": 0.4235, "step": 282680 }, { "epoch": 81.32623705408515, "grad_norm": 1.2412350177764893, "learning_rate": 0.0003734752589182969, "loss": 0.4296, "step": 282690 }, { "epoch": 81.32911392405063, "grad_norm": 0.9901257753372192, "learning_rate": 0.0003734177215189873, "loss": 0.3617, "step": 282700 }, { "epoch": 81.33199079401611, "grad_norm": 1.6640570163726807, "learning_rate": 0.0003733601841196778, "loss": 0.3863, "step": 282710 }, { "epoch": 81.33486766398158, "grad_norm": 1.350594162940979, "learning_rate": 0.00037330264672036823, "loss": 0.2865, "step": 282720 }, { "epoch": 81.33774453394706, "grad_norm": 1.3094236850738525, "learning_rate": 0.0003732451093210587, "loss": 0.3518, "step": 282730 }, { "epoch": 81.34062140391255, "grad_norm": 1.8932915925979614, "learning_rate": 0.00037318757192174914, "loss": 0.3474, "step": 282740 }, { "epoch": 81.34349827387803, "grad_norm": 0.9565647840499878, "learning_rate": 0.0003731300345224396, "loss": 0.3445, "step": 282750 }, { "epoch": 81.3463751438435, "grad_norm": 2.0672404766082764, "learning_rate": 0.00037307249712313, "loss": 0.3468, "step": 282760 }, { "epoch": 81.34925201380898, "grad_norm": 1.9299676418304443, "learning_rate": 0.0003730149597238205, "loss": 0.3155, "step": 282770 }, { "epoch": 81.35212888377445, "grad_norm": 1.8745076656341553, "learning_rate": 0.00037295742232451096, "loss": 0.3761, "step": 282780 }, { "epoch": 81.35500575373993, "grad_norm": 1.1547119617462158, "learning_rate": 0.00037289988492520136, "loss": 0.358, "step": 282790 }, { "epoch": 81.35788262370541, "grad_norm": 1.4695041179656982, "learning_rate": 0.00037284234752589187, "loss": 0.3242, "step": 282800 }, { "epoch": 81.36075949367088, "grad_norm": 1.3714321851730347, "learning_rate": 0.0003727848101265823, "loss": 0.3086, "step": 282810 }, { "epoch": 81.36363636363636, "grad_norm": 1.1414666175842285, "learning_rate": 0.00037272727272727273, "loss": 0.2847, "step": 282820 }, { "epoch": 81.36651323360184, "grad_norm": 1.2748644351959229, "learning_rate": 0.0003726697353279632, "loss": 0.3433, "step": 282830 }, { "epoch": 81.36939010356731, "grad_norm": 2.049741744995117, "learning_rate": 0.00037261219792865364, "loss": 0.3571, "step": 282840 }, { "epoch": 81.3722669735328, "grad_norm": 1.257919192314148, "learning_rate": 0.00037255466052934404, "loss": 0.3251, "step": 282850 }, { "epoch": 81.37514384349828, "grad_norm": 1.1189391613006592, "learning_rate": 0.00037249712313003455, "loss": 0.3223, "step": 282860 }, { "epoch": 81.37802071346375, "grad_norm": 1.4141082763671875, "learning_rate": 0.000372439585730725, "loss": 0.3254, "step": 282870 }, { "epoch": 81.38089758342923, "grad_norm": 1.2684119939804077, "learning_rate": 0.0003723820483314154, "loss": 0.4307, "step": 282880 }, { "epoch": 81.38377445339471, "grad_norm": 1.9189561605453491, "learning_rate": 0.0003723245109321059, "loss": 0.431, "step": 282890 }, { "epoch": 81.38665132336018, "grad_norm": 1.2835556268692017, "learning_rate": 0.0003722669735327963, "loss": 0.3756, "step": 282900 }, { "epoch": 81.38952819332566, "grad_norm": 0.6631155610084534, "learning_rate": 0.0003722094361334868, "loss": 0.2864, "step": 282910 }, { "epoch": 81.39240506329114, "grad_norm": 0.8456516861915588, "learning_rate": 0.00037215189873417723, "loss": 0.3863, "step": 282920 }, { "epoch": 81.39528193325661, "grad_norm": 1.6615403890609741, "learning_rate": 0.0003720943613348677, "loss": 0.2929, "step": 282930 }, { "epoch": 81.39815880322209, "grad_norm": 1.1561903953552246, "learning_rate": 0.0003720368239355581, "loss": 0.3351, "step": 282940 }, { "epoch": 81.40103567318758, "grad_norm": 0.6946761608123779, "learning_rate": 0.0003719792865362486, "loss": 0.3211, "step": 282950 }, { "epoch": 81.40391254315306, "grad_norm": 1.2119954824447632, "learning_rate": 0.000371921749136939, "loss": 0.3577, "step": 282960 }, { "epoch": 81.40678941311853, "grad_norm": 1.6849167346954346, "learning_rate": 0.00037186421173762945, "loss": 0.3256, "step": 282970 }, { "epoch": 81.40966628308401, "grad_norm": 0.881951093673706, "learning_rate": 0.00037180667433831996, "loss": 0.3129, "step": 282980 }, { "epoch": 81.41254315304948, "grad_norm": 1.1246899366378784, "learning_rate": 0.00037174913693901036, "loss": 0.3497, "step": 282990 }, { "epoch": 81.41542002301496, "grad_norm": 1.6591315269470215, "learning_rate": 0.0003716915995397008, "loss": 0.3211, "step": 283000 }, { "epoch": 81.41829689298044, "grad_norm": 1.5725106000900269, "learning_rate": 0.0003716340621403913, "loss": 0.3128, "step": 283010 }, { "epoch": 81.42117376294591, "grad_norm": 1.480763554573059, "learning_rate": 0.00037157652474108173, "loss": 0.4552, "step": 283020 }, { "epoch": 81.42405063291139, "grad_norm": 0.6741558909416199, "learning_rate": 0.00037151898734177213, "loss": 0.3101, "step": 283030 }, { "epoch": 81.42692750287686, "grad_norm": 1.3160597085952759, "learning_rate": 0.00037146144994246264, "loss": 0.2828, "step": 283040 }, { "epoch": 81.42980437284234, "grad_norm": 1.2911049127578735, "learning_rate": 0.00037140391254315304, "loss": 0.2775, "step": 283050 }, { "epoch": 81.43268124280783, "grad_norm": 1.406947135925293, "learning_rate": 0.0003713463751438435, "loss": 0.3894, "step": 283060 }, { "epoch": 81.43555811277331, "grad_norm": 0.8800328373908997, "learning_rate": 0.00037128883774453395, "loss": 0.3783, "step": 283070 }, { "epoch": 81.43843498273878, "grad_norm": 1.1525951623916626, "learning_rate": 0.0003712313003452244, "loss": 0.4466, "step": 283080 }, { "epoch": 81.44131185270426, "grad_norm": 1.1731553077697754, "learning_rate": 0.00037117376294591487, "loss": 0.3322, "step": 283090 }, { "epoch": 81.44418872266974, "grad_norm": 0.8929747939109802, "learning_rate": 0.00037111622554660527, "loss": 0.2838, "step": 283100 }, { "epoch": 81.44706559263521, "grad_norm": 1.024704933166504, "learning_rate": 0.0003710586881472958, "loss": 0.3224, "step": 283110 }, { "epoch": 81.44994246260069, "grad_norm": 1.0406841039657593, "learning_rate": 0.0003710011507479862, "loss": 0.2285, "step": 283120 }, { "epoch": 81.45281933256616, "grad_norm": 0.7426633238792419, "learning_rate": 0.00037094361334867663, "loss": 0.3419, "step": 283130 }, { "epoch": 81.45569620253164, "grad_norm": 1.3892489671707153, "learning_rate": 0.0003708860759493671, "loss": 0.3756, "step": 283140 }, { "epoch": 81.45857307249712, "grad_norm": 1.6628667116165161, "learning_rate": 0.00037082853855005754, "loss": 0.4216, "step": 283150 }, { "epoch": 81.46144994246261, "grad_norm": 2.1775131225585938, "learning_rate": 0.00037077100115074795, "loss": 0.3239, "step": 283160 }, { "epoch": 81.46432681242808, "grad_norm": 0.9134600162506104, "learning_rate": 0.00037071346375143845, "loss": 0.3392, "step": 283170 }, { "epoch": 81.46720368239356, "grad_norm": 1.0397422313690186, "learning_rate": 0.0003706559263521289, "loss": 0.2581, "step": 283180 }, { "epoch": 81.47008055235904, "grad_norm": 1.1977152824401855, "learning_rate": 0.0003705983889528193, "loss": 0.3884, "step": 283190 }, { "epoch": 81.47295742232451, "grad_norm": 2.3200595378875732, "learning_rate": 0.0003705408515535098, "loss": 0.3259, "step": 283200 }, { "epoch": 81.47583429228999, "grad_norm": 1.9874018430709839, "learning_rate": 0.0003704833141542002, "loss": 0.3906, "step": 283210 }, { "epoch": 81.47871116225546, "grad_norm": 1.1806776523590088, "learning_rate": 0.0003704257767548907, "loss": 0.3819, "step": 283220 }, { "epoch": 81.48158803222094, "grad_norm": 1.3604172468185425, "learning_rate": 0.00037036823935558113, "loss": 0.3615, "step": 283230 }, { "epoch": 81.48446490218642, "grad_norm": 2.0444979667663574, "learning_rate": 0.0003703107019562716, "loss": 0.3721, "step": 283240 }, { "epoch": 81.4873417721519, "grad_norm": 1.6258267164230347, "learning_rate": 0.000370253164556962, "loss": 0.3686, "step": 283250 }, { "epoch": 81.49021864211737, "grad_norm": 0.9245955944061279, "learning_rate": 0.0003701956271576525, "loss": 0.3917, "step": 283260 }, { "epoch": 81.49309551208286, "grad_norm": 1.4167490005493164, "learning_rate": 0.00037013808975834296, "loss": 0.3132, "step": 283270 }, { "epoch": 81.49597238204834, "grad_norm": 1.201409101486206, "learning_rate": 0.00037008055235903336, "loss": 0.292, "step": 283280 }, { "epoch": 81.49884925201381, "grad_norm": 0.992134690284729, "learning_rate": 0.00037002301495972387, "loss": 0.3013, "step": 283290 }, { "epoch": 81.50172612197929, "grad_norm": 1.514149785041809, "learning_rate": 0.00036996547756041427, "loss": 0.3012, "step": 283300 }, { "epoch": 81.50460299194476, "grad_norm": 1.5884400606155396, "learning_rate": 0.0003699079401611047, "loss": 0.4507, "step": 283310 }, { "epoch": 81.50747986191024, "grad_norm": 0.6999353766441345, "learning_rate": 0.0003698504027617952, "loss": 0.3141, "step": 283320 }, { "epoch": 81.51035673187572, "grad_norm": 1.1839072704315186, "learning_rate": 0.00036979286536248563, "loss": 0.4431, "step": 283330 }, { "epoch": 81.5132336018412, "grad_norm": 0.802760899066925, "learning_rate": 0.00036973532796317604, "loss": 0.2957, "step": 283340 }, { "epoch": 81.51611047180667, "grad_norm": 1.6382118463516235, "learning_rate": 0.00036967779056386655, "loss": 0.2996, "step": 283350 }, { "epoch": 81.51898734177215, "grad_norm": 0.9643275141716003, "learning_rate": 0.000369620253164557, "loss": 0.378, "step": 283360 }, { "epoch": 81.52186421173764, "grad_norm": 1.2309858798980713, "learning_rate": 0.0003695627157652474, "loss": 0.3114, "step": 283370 }, { "epoch": 81.52474108170311, "grad_norm": 1.0705583095550537, "learning_rate": 0.0003695051783659379, "loss": 0.3474, "step": 283380 }, { "epoch": 81.52761795166859, "grad_norm": 1.1678091287612915, "learning_rate": 0.0003694476409666283, "loss": 0.4246, "step": 283390 }, { "epoch": 81.53049482163406, "grad_norm": 1.365669846534729, "learning_rate": 0.00036939010356731877, "loss": 0.3523, "step": 283400 }, { "epoch": 81.53337169159954, "grad_norm": 1.3216278553009033, "learning_rate": 0.0003693325661680092, "loss": 0.3074, "step": 283410 }, { "epoch": 81.53624856156502, "grad_norm": 1.824720859527588, "learning_rate": 0.0003692750287686997, "loss": 0.3041, "step": 283420 }, { "epoch": 81.5391254315305, "grad_norm": 1.4601330757141113, "learning_rate": 0.0003692174913693901, "loss": 0.3745, "step": 283430 }, { "epoch": 81.54200230149597, "grad_norm": 0.9538924694061279, "learning_rate": 0.0003691599539700806, "loss": 0.3084, "step": 283440 }, { "epoch": 81.54487917146145, "grad_norm": 1.1755461692810059, "learning_rate": 0.000369102416570771, "loss": 0.3227, "step": 283450 }, { "epoch": 81.54775604142692, "grad_norm": 1.0399049520492554, "learning_rate": 0.00036904487917146145, "loss": 0.3915, "step": 283460 }, { "epoch": 81.5506329113924, "grad_norm": 0.7424941062927246, "learning_rate": 0.0003689873417721519, "loss": 0.3879, "step": 283470 }, { "epoch": 81.55350978135789, "grad_norm": 2.1188457012176514, "learning_rate": 0.00036892980437284236, "loss": 0.366, "step": 283480 }, { "epoch": 81.55638665132336, "grad_norm": 1.685073971748352, "learning_rate": 0.0003688722669735328, "loss": 0.3113, "step": 283490 }, { "epoch": 81.55926352128884, "grad_norm": 2.034658908843994, "learning_rate": 0.0003688147295742232, "loss": 0.3168, "step": 283500 }, { "epoch": 81.56214039125432, "grad_norm": 2.1603331565856934, "learning_rate": 0.0003687571921749137, "loss": 0.3316, "step": 283510 }, { "epoch": 81.5650172612198, "grad_norm": 1.1926509141921997, "learning_rate": 0.0003686996547756041, "loss": 0.2959, "step": 283520 }, { "epoch": 81.56789413118527, "grad_norm": 0.9847034811973572, "learning_rate": 0.0003686421173762946, "loss": 0.3961, "step": 283530 }, { "epoch": 81.57077100115075, "grad_norm": 1.265841007232666, "learning_rate": 0.00036858457997698504, "loss": 0.3149, "step": 283540 }, { "epoch": 81.57364787111622, "grad_norm": 1.1947903633117676, "learning_rate": 0.0003685270425776755, "loss": 0.2594, "step": 283550 }, { "epoch": 81.5765247410817, "grad_norm": 1.3084660768508911, "learning_rate": 0.00036846950517836595, "loss": 0.3633, "step": 283560 }, { "epoch": 81.57940161104717, "grad_norm": 0.866040825843811, "learning_rate": 0.0003684119677790564, "loss": 0.3026, "step": 283570 }, { "epoch": 81.58227848101266, "grad_norm": 1.0799635648727417, "learning_rate": 0.00036835443037974686, "loss": 0.3738, "step": 283580 }, { "epoch": 81.58515535097814, "grad_norm": 1.4920152425765991, "learning_rate": 0.00036829689298043726, "loss": 0.3459, "step": 283590 }, { "epoch": 81.58803222094362, "grad_norm": 0.6339202523231506, "learning_rate": 0.00036823935558112777, "loss": 0.2883, "step": 283600 }, { "epoch": 81.5909090909091, "grad_norm": 1.2690563201904297, "learning_rate": 0.00036818181818181817, "loss": 0.3213, "step": 283610 }, { "epoch": 81.59378596087457, "grad_norm": 1.0232481956481934, "learning_rate": 0.00036812428078250863, "loss": 0.3016, "step": 283620 }, { "epoch": 81.59666283084005, "grad_norm": 1.0773472785949707, "learning_rate": 0.0003680667433831991, "loss": 0.4138, "step": 283630 }, { "epoch": 81.59953970080552, "grad_norm": 1.6975865364074707, "learning_rate": 0.00036800920598388954, "loss": 0.3661, "step": 283640 }, { "epoch": 81.602416570771, "grad_norm": 1.289017915725708, "learning_rate": 0.00036795166858457994, "loss": 0.359, "step": 283650 }, { "epoch": 81.60529344073647, "grad_norm": 1.0163240432739258, "learning_rate": 0.00036789413118527045, "loss": 0.4274, "step": 283660 }, { "epoch": 81.60817031070195, "grad_norm": 1.3702168464660645, "learning_rate": 0.0003678365937859609, "loss": 0.3368, "step": 283670 }, { "epoch": 81.61104718066743, "grad_norm": 1.7617428302764893, "learning_rate": 0.0003677790563866513, "loss": 0.3416, "step": 283680 }, { "epoch": 81.61392405063292, "grad_norm": 1.3731707334518433, "learning_rate": 0.0003677215189873418, "loss": 0.3724, "step": 283690 }, { "epoch": 81.6168009205984, "grad_norm": 0.7568427324295044, "learning_rate": 0.0003676639815880322, "loss": 0.3712, "step": 283700 }, { "epoch": 81.61967779056387, "grad_norm": 0.5657578110694885, "learning_rate": 0.00036760644418872267, "loss": 0.3801, "step": 283710 }, { "epoch": 81.62255466052935, "grad_norm": 1.1520841121673584, "learning_rate": 0.00036754890678941313, "loss": 0.311, "step": 283720 }, { "epoch": 81.62543153049482, "grad_norm": 0.9130833745002747, "learning_rate": 0.0003674913693901036, "loss": 0.3781, "step": 283730 }, { "epoch": 81.6283084004603, "grad_norm": 0.5636884570121765, "learning_rate": 0.000367433831990794, "loss": 0.3059, "step": 283740 }, { "epoch": 81.63118527042577, "grad_norm": 0.9696354866027832, "learning_rate": 0.0003673762945914845, "loss": 0.3272, "step": 283750 }, { "epoch": 81.63406214039125, "grad_norm": 1.7850056886672974, "learning_rate": 0.00036731875719217495, "loss": 0.3605, "step": 283760 }, { "epoch": 81.63693901035673, "grad_norm": 2.3053770065307617, "learning_rate": 0.00036726121979286535, "loss": 0.3816, "step": 283770 }, { "epoch": 81.6398158803222, "grad_norm": 0.8570564985275269, "learning_rate": 0.00036720368239355586, "loss": 0.3132, "step": 283780 }, { "epoch": 81.6426927502877, "grad_norm": 3.489870548248291, "learning_rate": 0.00036714614499424626, "loss": 0.3041, "step": 283790 }, { "epoch": 81.64556962025317, "grad_norm": 1.4299675226211548, "learning_rate": 0.0003670886075949367, "loss": 0.3632, "step": 283800 }, { "epoch": 81.64844649021865, "grad_norm": 1.625744104385376, "learning_rate": 0.0003670310701956272, "loss": 0.3236, "step": 283810 }, { "epoch": 81.65132336018412, "grad_norm": 2.490140914916992, "learning_rate": 0.00036697353279631763, "loss": 0.2938, "step": 283820 }, { "epoch": 81.6542002301496, "grad_norm": 0.8659803867340088, "learning_rate": 0.00036691599539700803, "loss": 0.3716, "step": 283830 }, { "epoch": 81.65707710011507, "grad_norm": 1.8637089729309082, "learning_rate": 0.00036685845799769854, "loss": 0.3476, "step": 283840 }, { "epoch": 81.65995397008055, "grad_norm": 1.1803289651870728, "learning_rate": 0.000366800920598389, "loss": 0.3566, "step": 283850 }, { "epoch": 81.66283084004603, "grad_norm": 1.8509159088134766, "learning_rate": 0.0003667433831990794, "loss": 0.3536, "step": 283860 }, { "epoch": 81.6657077100115, "grad_norm": 0.8681668639183044, "learning_rate": 0.0003666858457997699, "loss": 0.41, "step": 283870 }, { "epoch": 81.66858457997698, "grad_norm": 1.1278631687164307, "learning_rate": 0.0003666283084004603, "loss": 0.3735, "step": 283880 }, { "epoch": 81.67146144994246, "grad_norm": 1.7705092430114746, "learning_rate": 0.00036657077100115076, "loss": 0.3401, "step": 283890 }, { "epoch": 81.67433831990795, "grad_norm": 1.1801244020462036, "learning_rate": 0.00036651323360184116, "loss": 0.3886, "step": 283900 }, { "epoch": 81.67721518987342, "grad_norm": 1.3694334030151367, "learning_rate": 0.0003664556962025317, "loss": 0.4021, "step": 283910 }, { "epoch": 81.6800920598389, "grad_norm": 1.1140573024749756, "learning_rate": 0.0003663981588032221, "loss": 0.3075, "step": 283920 }, { "epoch": 81.68296892980437, "grad_norm": 1.7209006547927856, "learning_rate": 0.00036634062140391253, "loss": 0.356, "step": 283930 }, { "epoch": 81.68584579976985, "grad_norm": 1.0056694746017456, "learning_rate": 0.000366283084004603, "loss": 0.2487, "step": 283940 }, { "epoch": 81.68872266973533, "grad_norm": 1.0939836502075195, "learning_rate": 0.00036622554660529344, "loss": 0.3719, "step": 283950 }, { "epoch": 81.6915995397008, "grad_norm": 1.2024049758911133, "learning_rate": 0.0003661680092059839, "loss": 0.4283, "step": 283960 }, { "epoch": 81.69447640966628, "grad_norm": 0.9863510727882385, "learning_rate": 0.00036611047180667435, "loss": 0.378, "step": 283970 }, { "epoch": 81.69735327963176, "grad_norm": 0.9078401923179626, "learning_rate": 0.0003660529344073648, "loss": 0.3521, "step": 283980 }, { "epoch": 81.70023014959723, "grad_norm": 0.9618842005729675, "learning_rate": 0.0003659953970080552, "loss": 0.3704, "step": 283990 }, { "epoch": 81.70310701956272, "grad_norm": 0.9866456389427185, "learning_rate": 0.0003659378596087457, "loss": 0.3333, "step": 284000 }, { "epoch": 81.7059838895282, "grad_norm": 0.6145156025886536, "learning_rate": 0.0003658803222094361, "loss": 0.311, "step": 284010 }, { "epoch": 81.70886075949367, "grad_norm": 3.440274953842163, "learning_rate": 0.0003658227848101266, "loss": 0.4655, "step": 284020 }, { "epoch": 81.71173762945915, "grad_norm": 1.2453094720840454, "learning_rate": 0.00036576524741081703, "loss": 0.3474, "step": 284030 }, { "epoch": 81.71461449942463, "grad_norm": 1.339040756225586, "learning_rate": 0.0003657077100115075, "loss": 0.3941, "step": 284040 }, { "epoch": 81.7174913693901, "grad_norm": 1.6437251567840576, "learning_rate": 0.00036565017261219794, "loss": 0.3665, "step": 284050 }, { "epoch": 81.72036823935558, "grad_norm": 2.6036930084228516, "learning_rate": 0.0003655926352128884, "loss": 0.4055, "step": 284060 }, { "epoch": 81.72324510932106, "grad_norm": 1.3295016288757324, "learning_rate": 0.00036553509781357885, "loss": 0.3374, "step": 284070 }, { "epoch": 81.72612197928653, "grad_norm": 1.0012683868408203, "learning_rate": 0.00036547756041426926, "loss": 0.2863, "step": 284080 }, { "epoch": 81.72899884925201, "grad_norm": 2.4383795261383057, "learning_rate": 0.00036542002301495976, "loss": 0.4379, "step": 284090 }, { "epoch": 81.7318757192175, "grad_norm": 0.9091464877128601, "learning_rate": 0.00036536248561565017, "loss": 0.3542, "step": 284100 }, { "epoch": 81.73475258918297, "grad_norm": 1.3461077213287354, "learning_rate": 0.0003653049482163406, "loss": 0.3327, "step": 284110 }, { "epoch": 81.73762945914845, "grad_norm": 0.8063532114028931, "learning_rate": 0.0003652474108170311, "loss": 0.2664, "step": 284120 }, { "epoch": 81.74050632911393, "grad_norm": 0.8549692034721375, "learning_rate": 0.00036518987341772153, "loss": 0.2887, "step": 284130 }, { "epoch": 81.7433831990794, "grad_norm": 0.936075747013092, "learning_rate": 0.00036513233601841193, "loss": 0.4164, "step": 284140 }, { "epoch": 81.74626006904488, "grad_norm": 0.8041631579399109, "learning_rate": 0.00036507479861910244, "loss": 0.3552, "step": 284150 }, { "epoch": 81.74913693901036, "grad_norm": 1.057621955871582, "learning_rate": 0.0003650172612197929, "loss": 0.3462, "step": 284160 }, { "epoch": 81.75201380897583, "grad_norm": 0.5794612169265747, "learning_rate": 0.0003649597238204833, "loss": 0.3834, "step": 284170 }, { "epoch": 81.75489067894131, "grad_norm": 0.8708446025848389, "learning_rate": 0.0003649021864211738, "loss": 0.3262, "step": 284180 }, { "epoch": 81.75776754890678, "grad_norm": 1.3537497520446777, "learning_rate": 0.0003648446490218642, "loss": 0.3696, "step": 284190 }, { "epoch": 81.76064441887226, "grad_norm": 1.249898910522461, "learning_rate": 0.00036478711162255467, "loss": 0.3122, "step": 284200 }, { "epoch": 81.76352128883775, "grad_norm": 1.6697136163711548, "learning_rate": 0.0003647295742232451, "loss": 0.3721, "step": 284210 }, { "epoch": 81.76639815880323, "grad_norm": 0.9439132809638977, "learning_rate": 0.0003646720368239356, "loss": 0.338, "step": 284220 }, { "epoch": 81.7692750287687, "grad_norm": 1.912957787513733, "learning_rate": 0.000364614499424626, "loss": 0.3557, "step": 284230 }, { "epoch": 81.77215189873418, "grad_norm": 0.6670565605163574, "learning_rate": 0.0003645569620253165, "loss": 0.3204, "step": 284240 }, { "epoch": 81.77502876869966, "grad_norm": 2.09898042678833, "learning_rate": 0.00036449942462600694, "loss": 0.3848, "step": 284250 }, { "epoch": 81.77790563866513, "grad_norm": 1.0931800603866577, "learning_rate": 0.00036444188722669735, "loss": 0.4131, "step": 284260 }, { "epoch": 81.78078250863061, "grad_norm": 1.9307668209075928, "learning_rate": 0.00036438434982738786, "loss": 0.3764, "step": 284270 }, { "epoch": 81.78365937859608, "grad_norm": 1.093427062034607, "learning_rate": 0.00036432681242807826, "loss": 0.2701, "step": 284280 }, { "epoch": 81.78653624856156, "grad_norm": 0.8437331318855286, "learning_rate": 0.0003642692750287687, "loss": 0.3088, "step": 284290 }, { "epoch": 81.78941311852704, "grad_norm": 1.1089171171188354, "learning_rate": 0.0003642117376294591, "loss": 0.3692, "step": 284300 }, { "epoch": 81.79228998849253, "grad_norm": 1.5050255060195923, "learning_rate": 0.0003641542002301496, "loss": 0.317, "step": 284310 }, { "epoch": 81.795166858458, "grad_norm": 1.0253480672836304, "learning_rate": 0.00036409666283084, "loss": 0.3538, "step": 284320 }, { "epoch": 81.79804372842348, "grad_norm": 0.7862933278083801, "learning_rate": 0.0003640391254315305, "loss": 0.3503, "step": 284330 }, { "epoch": 81.80092059838896, "grad_norm": 1.4998570680618286, "learning_rate": 0.000363981588032221, "loss": 0.3761, "step": 284340 }, { "epoch": 81.80379746835443, "grad_norm": 1.716597080230713, "learning_rate": 0.0003639240506329114, "loss": 0.3142, "step": 284350 }, { "epoch": 81.80667433831991, "grad_norm": 1.0367474555969238, "learning_rate": 0.00036386651323360185, "loss": 0.3288, "step": 284360 }, { "epoch": 81.80955120828538, "grad_norm": 1.4591922760009766, "learning_rate": 0.0003638089758342923, "loss": 0.3762, "step": 284370 }, { "epoch": 81.81242807825086, "grad_norm": 1.1465226411819458, "learning_rate": 0.00036375143843498276, "loss": 0.3635, "step": 284380 }, { "epoch": 81.81530494821634, "grad_norm": 1.0817618370056152, "learning_rate": 0.00036369390103567316, "loss": 0.3764, "step": 284390 }, { "epoch": 81.81818181818181, "grad_norm": 0.8969985246658325, "learning_rate": 0.00036363636363636367, "loss": 0.3253, "step": 284400 }, { "epoch": 81.82105868814729, "grad_norm": 0.8069107532501221, "learning_rate": 0.00036357882623705407, "loss": 0.4014, "step": 284410 }, { "epoch": 81.82393555811278, "grad_norm": 1.6791667938232422, "learning_rate": 0.0003635212888377445, "loss": 0.3256, "step": 284420 }, { "epoch": 81.82681242807826, "grad_norm": 2.3277671337127686, "learning_rate": 0.000363463751438435, "loss": 0.3929, "step": 284430 }, { "epoch": 81.82968929804373, "grad_norm": 1.508713722229004, "learning_rate": 0.00036340621403912544, "loss": 0.4049, "step": 284440 }, { "epoch": 81.83256616800921, "grad_norm": 3.0001003742218018, "learning_rate": 0.0003633486766398159, "loss": 0.322, "step": 284450 }, { "epoch": 81.83544303797468, "grad_norm": 0.866003692150116, "learning_rate": 0.00036329113924050635, "loss": 0.3258, "step": 284460 }, { "epoch": 81.83831990794016, "grad_norm": 1.6222267150878906, "learning_rate": 0.0003632336018411968, "loss": 0.4002, "step": 284470 }, { "epoch": 81.84119677790564, "grad_norm": 1.1428014039993286, "learning_rate": 0.0003631760644418872, "loss": 0.3396, "step": 284480 }, { "epoch": 81.84407364787111, "grad_norm": 0.574432909488678, "learning_rate": 0.0003631185270425777, "loss": 0.4019, "step": 284490 }, { "epoch": 81.84695051783659, "grad_norm": 0.752070426940918, "learning_rate": 0.0003630609896432681, "loss": 0.4364, "step": 284500 }, { "epoch": 81.84982738780207, "grad_norm": 0.6281090378761292, "learning_rate": 0.00036300345224395857, "loss": 0.2964, "step": 284510 }, { "epoch": 81.85270425776756, "grad_norm": 0.7820553779602051, "learning_rate": 0.000362945914844649, "loss": 0.2726, "step": 284520 }, { "epoch": 81.85558112773303, "grad_norm": 1.6761196851730347, "learning_rate": 0.0003628883774453395, "loss": 0.3943, "step": 284530 }, { "epoch": 81.85845799769851, "grad_norm": 1.1989490985870361, "learning_rate": 0.00036283084004602994, "loss": 0.3635, "step": 284540 }, { "epoch": 81.86133486766398, "grad_norm": 1.3055981397628784, "learning_rate": 0.0003627733026467204, "loss": 0.3059, "step": 284550 }, { "epoch": 81.86421173762946, "grad_norm": 0.8690289855003357, "learning_rate": 0.00036271576524741085, "loss": 0.3383, "step": 284560 }, { "epoch": 81.86708860759494, "grad_norm": 0.797782301902771, "learning_rate": 0.00036265822784810125, "loss": 0.4212, "step": 284570 }, { "epoch": 81.86996547756041, "grad_norm": 0.8961788415908813, "learning_rate": 0.00036260069044879176, "loss": 0.3108, "step": 284580 }, { "epoch": 81.87284234752589, "grad_norm": 1.648730754852295, "learning_rate": 0.00036254315304948216, "loss": 0.3638, "step": 284590 }, { "epoch": 81.87571921749137, "grad_norm": 0.9928944110870361, "learning_rate": 0.0003624856156501726, "loss": 0.325, "step": 284600 }, { "epoch": 81.87859608745684, "grad_norm": 0.9032174348831177, "learning_rate": 0.00036242807825086307, "loss": 0.4614, "step": 284610 }, { "epoch": 81.88147295742232, "grad_norm": 1.7409943342208862, "learning_rate": 0.0003623705408515535, "loss": 0.3924, "step": 284620 }, { "epoch": 81.88434982738781, "grad_norm": 1.6654613018035889, "learning_rate": 0.00036231300345224393, "loss": 0.3048, "step": 284630 }, { "epoch": 81.88722669735328, "grad_norm": 0.9157765507698059, "learning_rate": 0.00036225546605293444, "loss": 0.2858, "step": 284640 }, { "epoch": 81.89010356731876, "grad_norm": 0.8992939591407776, "learning_rate": 0.0003621979286536249, "loss": 0.3612, "step": 284650 }, { "epoch": 81.89298043728424, "grad_norm": 1.2113639116287231, "learning_rate": 0.0003621403912543153, "loss": 0.3122, "step": 284660 }, { "epoch": 81.89585730724971, "grad_norm": 1.1493726968765259, "learning_rate": 0.0003620828538550058, "loss": 0.3787, "step": 284670 }, { "epoch": 81.89873417721519, "grad_norm": 1.9642456769943237, "learning_rate": 0.0003620253164556962, "loss": 0.323, "step": 284680 }, { "epoch": 81.90161104718067, "grad_norm": 0.8198975920677185, "learning_rate": 0.00036196777905638666, "loss": 0.3146, "step": 284690 }, { "epoch": 81.90448791714614, "grad_norm": 1.3492305278778076, "learning_rate": 0.00036191024165707706, "loss": 0.3138, "step": 284700 }, { "epoch": 81.90736478711162, "grad_norm": 1.6040918827056885, "learning_rate": 0.00036185270425776757, "loss": 0.3809, "step": 284710 }, { "epoch": 81.9102416570771, "grad_norm": 1.1077810525894165, "learning_rate": 0.000361795166858458, "loss": 0.4116, "step": 284720 }, { "epoch": 81.91311852704258, "grad_norm": 0.949889063835144, "learning_rate": 0.00036173762945914843, "loss": 0.2921, "step": 284730 }, { "epoch": 81.91599539700806, "grad_norm": 1.0325545072555542, "learning_rate": 0.00036168009205983894, "loss": 0.32, "step": 284740 }, { "epoch": 81.91887226697354, "grad_norm": 1.5437747240066528, "learning_rate": 0.00036162255466052934, "loss": 0.4925, "step": 284750 }, { "epoch": 81.92174913693901, "grad_norm": 0.8914321660995483, "learning_rate": 0.0003615650172612198, "loss": 0.3692, "step": 284760 }, { "epoch": 81.92462600690449, "grad_norm": 2.0241665840148926, "learning_rate": 0.00036150747986191025, "loss": 0.3345, "step": 284770 }, { "epoch": 81.92750287686997, "grad_norm": 0.8178399205207825, "learning_rate": 0.0003614499424626007, "loss": 0.3369, "step": 284780 }, { "epoch": 81.93037974683544, "grad_norm": 0.7257810235023499, "learning_rate": 0.0003613924050632911, "loss": 0.3134, "step": 284790 }, { "epoch": 81.93325661680092, "grad_norm": 0.8225942850112915, "learning_rate": 0.0003613348676639816, "loss": 0.3359, "step": 284800 }, { "epoch": 81.9361334867664, "grad_norm": 0.6956068873405457, "learning_rate": 0.000361277330264672, "loss": 0.3724, "step": 284810 }, { "epoch": 81.93901035673187, "grad_norm": 1.5702015161514282, "learning_rate": 0.0003612197928653625, "loss": 0.3991, "step": 284820 }, { "epoch": 81.94188722669735, "grad_norm": 0.8496000170707703, "learning_rate": 0.000361162255466053, "loss": 0.2904, "step": 284830 }, { "epoch": 81.94476409666284, "grad_norm": 0.8912762999534607, "learning_rate": 0.0003611047180667434, "loss": 0.2964, "step": 284840 }, { "epoch": 81.94764096662831, "grad_norm": 1.00699782371521, "learning_rate": 0.00036104718066743384, "loss": 0.2732, "step": 284850 }, { "epoch": 81.95051783659379, "grad_norm": 0.8063919544219971, "learning_rate": 0.0003609896432681243, "loss": 0.4603, "step": 284860 }, { "epoch": 81.95339470655927, "grad_norm": 0.9832556247711182, "learning_rate": 0.00036093210586881475, "loss": 0.3243, "step": 284870 }, { "epoch": 81.95627157652474, "grad_norm": 1.9257569313049316, "learning_rate": 0.00036087456846950515, "loss": 0.4417, "step": 284880 }, { "epoch": 81.95914844649022, "grad_norm": 2.1767945289611816, "learning_rate": 0.00036081703107019566, "loss": 0.3746, "step": 284890 }, { "epoch": 81.9620253164557, "grad_norm": 1.8703396320343018, "learning_rate": 0.00036075949367088606, "loss": 0.3222, "step": 284900 }, { "epoch": 81.96490218642117, "grad_norm": 1.0812808275222778, "learning_rate": 0.0003607019562715765, "loss": 0.3967, "step": 284910 }, { "epoch": 81.96777905638665, "grad_norm": 1.4772148132324219, "learning_rate": 0.000360644418872267, "loss": 0.3498, "step": 284920 }, { "epoch": 81.97065592635212, "grad_norm": 1.5792948007583618, "learning_rate": 0.00036058688147295743, "loss": 0.324, "step": 284930 }, { "epoch": 81.97353279631761, "grad_norm": 0.9321115016937256, "learning_rate": 0.0003605293440736479, "loss": 0.2923, "step": 284940 }, { "epoch": 81.97640966628309, "grad_norm": 2.542567729949951, "learning_rate": 0.00036047180667433834, "loss": 0.3824, "step": 284950 }, { "epoch": 81.97928653624857, "grad_norm": 1.4668841361999512, "learning_rate": 0.0003604142692750288, "loss": 0.4151, "step": 284960 }, { "epoch": 81.98216340621404, "grad_norm": 1.1556419134140015, "learning_rate": 0.0003603567318757192, "loss": 0.2813, "step": 284970 }, { "epoch": 81.98504027617952, "grad_norm": 1.6116411685943604, "learning_rate": 0.0003602991944764097, "loss": 0.3185, "step": 284980 }, { "epoch": 81.987917146145, "grad_norm": 1.6196973323822021, "learning_rate": 0.0003602416570771001, "loss": 0.3822, "step": 284990 }, { "epoch": 81.99079401611047, "grad_norm": 1.748538613319397, "learning_rate": 0.00036018411967779057, "loss": 0.3749, "step": 285000 }, { "epoch": 81.99367088607595, "grad_norm": 1.001497745513916, "learning_rate": 0.000360126582278481, "loss": 0.3963, "step": 285010 }, { "epoch": 81.99654775604142, "grad_norm": 0.9652308821678162, "learning_rate": 0.0003600690448791715, "loss": 0.3431, "step": 285020 }, { "epoch": 81.9994246260069, "grad_norm": 1.8971449136734009, "learning_rate": 0.00036001150747986193, "loss": 0.363, "step": 285030 }, { "epoch": 82.00230149597238, "grad_norm": 1.0101397037506104, "learning_rate": 0.0003599539700805524, "loss": 0.2981, "step": 285040 }, { "epoch": 82.00517836593787, "grad_norm": 2.749396800994873, "learning_rate": 0.00035989643268124284, "loss": 0.2736, "step": 285050 }, { "epoch": 82.00805523590334, "grad_norm": 1.2978620529174805, "learning_rate": 0.00035983889528193324, "loss": 0.3525, "step": 285060 }, { "epoch": 82.01093210586882, "grad_norm": 2.514261484146118, "learning_rate": 0.00035978135788262375, "loss": 0.3001, "step": 285070 }, { "epoch": 82.0138089758343, "grad_norm": 1.0682563781738281, "learning_rate": 0.00035972382048331415, "loss": 0.2996, "step": 285080 }, { "epoch": 82.01668584579977, "grad_norm": 0.9271787405014038, "learning_rate": 0.0003596662830840046, "loss": 0.3018, "step": 285090 }, { "epoch": 82.01956271576525, "grad_norm": 0.8149799108505249, "learning_rate": 0.000359608745684695, "loss": 0.2829, "step": 285100 }, { "epoch": 82.02243958573072, "grad_norm": 1.5946708917617798, "learning_rate": 0.0003595512082853855, "loss": 0.4053, "step": 285110 }, { "epoch": 82.0253164556962, "grad_norm": 0.6030726432800293, "learning_rate": 0.0003594936708860759, "loss": 0.3421, "step": 285120 }, { "epoch": 82.02819332566168, "grad_norm": 1.755022644996643, "learning_rate": 0.0003594361334867664, "loss": 0.3414, "step": 285130 }, { "epoch": 82.03107019562715, "grad_norm": 1.6602742671966553, "learning_rate": 0.0003593785960874569, "loss": 0.3365, "step": 285140 }, { "epoch": 82.03394706559264, "grad_norm": 1.675184965133667, "learning_rate": 0.0003593210586881473, "loss": 0.3538, "step": 285150 }, { "epoch": 82.03682393555812, "grad_norm": 1.2013763189315796, "learning_rate": 0.00035926352128883774, "loss": 0.2959, "step": 285160 }, { "epoch": 82.0397008055236, "grad_norm": 1.5964912176132202, "learning_rate": 0.0003592059838895282, "loss": 0.3424, "step": 285170 }, { "epoch": 82.04257767548907, "grad_norm": 0.8428995013237, "learning_rate": 0.00035914844649021866, "loss": 0.3709, "step": 285180 }, { "epoch": 82.04545454545455, "grad_norm": 1.1486692428588867, "learning_rate": 0.00035909090909090906, "loss": 0.2945, "step": 285190 }, { "epoch": 82.04833141542002, "grad_norm": 0.9139612317085266, "learning_rate": 0.00035903337169159957, "loss": 0.3886, "step": 285200 }, { "epoch": 82.0512082853855, "grad_norm": 0.5822722911834717, "learning_rate": 0.00035897583429228997, "loss": 0.2406, "step": 285210 }, { "epoch": 82.05408515535098, "grad_norm": 1.2955150604248047, "learning_rate": 0.0003589182968929804, "loss": 0.3663, "step": 285220 }, { "epoch": 82.05696202531645, "grad_norm": 1.3801847696304321, "learning_rate": 0.00035886075949367093, "loss": 0.3301, "step": 285230 }, { "epoch": 82.05983889528193, "grad_norm": 1.396409511566162, "learning_rate": 0.00035880322209436133, "loss": 0.3337, "step": 285240 }, { "epoch": 82.0627157652474, "grad_norm": 1.4429357051849365, "learning_rate": 0.0003587456846950518, "loss": 0.3206, "step": 285250 }, { "epoch": 82.0655926352129, "grad_norm": 0.8942840695381165, "learning_rate": 0.00035868814729574225, "loss": 0.3667, "step": 285260 }, { "epoch": 82.06846950517837, "grad_norm": 2.2028307914733887, "learning_rate": 0.0003586306098964327, "loss": 0.3582, "step": 285270 }, { "epoch": 82.07134637514385, "grad_norm": 0.7213691473007202, "learning_rate": 0.0003585730724971231, "loss": 0.281, "step": 285280 }, { "epoch": 82.07422324510932, "grad_norm": 1.7303792238235474, "learning_rate": 0.0003585155350978136, "loss": 0.2996, "step": 285290 }, { "epoch": 82.0771001150748, "grad_norm": 1.6884146928787231, "learning_rate": 0.000358457997698504, "loss": 0.2947, "step": 285300 }, { "epoch": 82.07997698504028, "grad_norm": 1.2707666158676147, "learning_rate": 0.00035840046029919447, "loss": 0.3543, "step": 285310 }, { "epoch": 82.08285385500575, "grad_norm": 0.610007643699646, "learning_rate": 0.000358342922899885, "loss": 0.2911, "step": 285320 }, { "epoch": 82.08573072497123, "grad_norm": 2.0201194286346436, "learning_rate": 0.0003582853855005754, "loss": 0.3196, "step": 285330 }, { "epoch": 82.0886075949367, "grad_norm": 1.4461805820465088, "learning_rate": 0.00035822784810126584, "loss": 0.3105, "step": 285340 }, { "epoch": 82.09148446490218, "grad_norm": 0.7021640539169312, "learning_rate": 0.0003581703107019563, "loss": 0.2629, "step": 285350 }, { "epoch": 82.09436133486767, "grad_norm": 1.063454508781433, "learning_rate": 0.00035811277330264675, "loss": 0.3192, "step": 285360 }, { "epoch": 82.09723820483315, "grad_norm": 1.6180793046951294, "learning_rate": 0.00035805523590333715, "loss": 0.3123, "step": 285370 }, { "epoch": 82.10011507479862, "grad_norm": 0.7580506205558777, "learning_rate": 0.00035799769850402766, "loss": 0.2965, "step": 285380 }, { "epoch": 82.1029919447641, "grad_norm": 0.993013858795166, "learning_rate": 0.00035794016110471806, "loss": 0.3678, "step": 285390 }, { "epoch": 82.10586881472958, "grad_norm": 0.9130436778068542, "learning_rate": 0.0003578826237054085, "loss": 0.3185, "step": 285400 }, { "epoch": 82.10874568469505, "grad_norm": 0.9154815077781677, "learning_rate": 0.00035782508630609897, "loss": 0.3315, "step": 285410 }, { "epoch": 82.11162255466053, "grad_norm": 0.8632319569587708, "learning_rate": 0.0003577675489067894, "loss": 0.3014, "step": 285420 }, { "epoch": 82.114499424626, "grad_norm": 1.4443202018737793, "learning_rate": 0.0003577100115074799, "loss": 0.3469, "step": 285430 }, { "epoch": 82.11737629459148, "grad_norm": 0.7359747290611267, "learning_rate": 0.00035765247410817034, "loss": 0.3086, "step": 285440 }, { "epoch": 82.12025316455696, "grad_norm": 1.5508614778518677, "learning_rate": 0.0003575949367088608, "loss": 0.279, "step": 285450 }, { "epoch": 82.12313003452243, "grad_norm": 1.3498615026474, "learning_rate": 0.0003575373993095512, "loss": 0.3477, "step": 285460 }, { "epoch": 82.12600690448792, "grad_norm": 2.2654075622558594, "learning_rate": 0.0003574798619102417, "loss": 0.3206, "step": 285470 }, { "epoch": 82.1288837744534, "grad_norm": 1.3104993104934692, "learning_rate": 0.0003574223245109321, "loss": 0.3199, "step": 285480 }, { "epoch": 82.13176064441888, "grad_norm": 1.1661537885665894, "learning_rate": 0.00035736478711162256, "loss": 0.3824, "step": 285490 }, { "epoch": 82.13463751438435, "grad_norm": 0.7685389518737793, "learning_rate": 0.00035730724971231296, "loss": 0.3443, "step": 285500 }, { "epoch": 82.13751438434983, "grad_norm": 1.1707558631896973, "learning_rate": 0.00035724971231300347, "loss": 0.2752, "step": 285510 }, { "epoch": 82.1403912543153, "grad_norm": 1.2958472967147827, "learning_rate": 0.0003571921749136939, "loss": 0.262, "step": 285520 }, { "epoch": 82.14326812428078, "grad_norm": 0.6842197179794312, "learning_rate": 0.00035713463751438433, "loss": 0.3969, "step": 285530 }, { "epoch": 82.14614499424626, "grad_norm": 1.2736834287643433, "learning_rate": 0.00035707710011507484, "loss": 0.3879, "step": 285540 }, { "epoch": 82.14902186421173, "grad_norm": 1.4412232637405396, "learning_rate": 0.00035701956271576524, "loss": 0.3206, "step": 285550 }, { "epoch": 82.15189873417721, "grad_norm": 1.0423622131347656, "learning_rate": 0.0003569620253164557, "loss": 0.3064, "step": 285560 }, { "epoch": 82.1547756041427, "grad_norm": 1.1054061651229858, "learning_rate": 0.00035690448791714615, "loss": 0.3803, "step": 285570 }, { "epoch": 82.15765247410818, "grad_norm": 1.802187442779541, "learning_rate": 0.0003568469505178366, "loss": 0.3808, "step": 285580 }, { "epoch": 82.16052934407365, "grad_norm": 1.1066848039627075, "learning_rate": 0.000356789413118527, "loss": 0.3888, "step": 285590 }, { "epoch": 82.16340621403913, "grad_norm": 0.8684167861938477, "learning_rate": 0.0003567318757192175, "loss": 0.3489, "step": 285600 }, { "epoch": 82.1662830840046, "grad_norm": 0.8905234932899475, "learning_rate": 0.0003566743383199079, "loss": 0.2894, "step": 285610 }, { "epoch": 82.16915995397008, "grad_norm": 1.2544140815734863, "learning_rate": 0.00035661680092059837, "loss": 0.3011, "step": 285620 }, { "epoch": 82.17203682393556, "grad_norm": 0.7017982602119446, "learning_rate": 0.0003565592635212889, "loss": 0.2996, "step": 285630 }, { "epoch": 82.17491369390103, "grad_norm": 1.58219575881958, "learning_rate": 0.0003565017261219793, "loss": 0.3756, "step": 285640 }, { "epoch": 82.17779056386651, "grad_norm": 1.3949214220046997, "learning_rate": 0.00035644418872266974, "loss": 0.3296, "step": 285650 }, { "epoch": 82.18066743383199, "grad_norm": 0.8420729637145996, "learning_rate": 0.0003563866513233602, "loss": 0.2887, "step": 285660 }, { "epoch": 82.18354430379746, "grad_norm": 0.7600641846656799, "learning_rate": 0.00035632911392405065, "loss": 0.2532, "step": 285670 }, { "epoch": 82.18642117376295, "grad_norm": 2.1034326553344727, "learning_rate": 0.00035627157652474105, "loss": 0.419, "step": 285680 }, { "epoch": 82.18929804372843, "grad_norm": 1.070803165435791, "learning_rate": 0.00035621403912543156, "loss": 0.3699, "step": 285690 }, { "epoch": 82.1921749136939, "grad_norm": 0.6638868451118469, "learning_rate": 0.00035615650172612196, "loss": 0.3843, "step": 285700 }, { "epoch": 82.19505178365938, "grad_norm": 1.9385594129562378, "learning_rate": 0.0003560989643268124, "loss": 0.3473, "step": 285710 }, { "epoch": 82.19792865362486, "grad_norm": 0.8530266284942627, "learning_rate": 0.00035604142692750293, "loss": 0.3147, "step": 285720 }, { "epoch": 82.20080552359033, "grad_norm": 1.0601680278778076, "learning_rate": 0.00035598388952819333, "loss": 0.3005, "step": 285730 }, { "epoch": 82.20368239355581, "grad_norm": 1.0834331512451172, "learning_rate": 0.0003559263521288838, "loss": 0.2804, "step": 285740 }, { "epoch": 82.20655926352129, "grad_norm": 1.5333867073059082, "learning_rate": 0.00035586881472957424, "loss": 0.3817, "step": 285750 }, { "epoch": 82.20943613348676, "grad_norm": 1.1244008541107178, "learning_rate": 0.0003558112773302647, "loss": 0.2936, "step": 285760 }, { "epoch": 82.21231300345224, "grad_norm": 0.6641153693199158, "learning_rate": 0.0003557537399309551, "loss": 0.4219, "step": 285770 }, { "epoch": 82.21518987341773, "grad_norm": 1.3438959121704102, "learning_rate": 0.0003556962025316456, "loss": 0.3618, "step": 285780 }, { "epoch": 82.2180667433832, "grad_norm": 0.948534369468689, "learning_rate": 0.000355638665132336, "loss": 0.2703, "step": 285790 }, { "epoch": 82.22094361334868, "grad_norm": 0.8123205900192261, "learning_rate": 0.00035558112773302646, "loss": 0.295, "step": 285800 }, { "epoch": 82.22382048331416, "grad_norm": 1.2326310873031616, "learning_rate": 0.000355523590333717, "loss": 0.3858, "step": 285810 }, { "epoch": 82.22669735327963, "grad_norm": 1.3754626512527466, "learning_rate": 0.0003554660529344074, "loss": 0.3501, "step": 285820 }, { "epoch": 82.22957422324511, "grad_norm": 0.6498106122016907, "learning_rate": 0.00035540851553509783, "loss": 0.3135, "step": 285830 }, { "epoch": 82.23245109321059, "grad_norm": 1.0354734659194946, "learning_rate": 0.0003553509781357883, "loss": 0.2924, "step": 285840 }, { "epoch": 82.23532796317606, "grad_norm": 1.168983817100525, "learning_rate": 0.00035529344073647874, "loss": 0.3156, "step": 285850 }, { "epoch": 82.23820483314154, "grad_norm": 1.3237855434417725, "learning_rate": 0.00035523590333716914, "loss": 0.3051, "step": 285860 }, { "epoch": 82.24108170310701, "grad_norm": 2.2351300716400146, "learning_rate": 0.00035517836593785965, "loss": 0.298, "step": 285870 }, { "epoch": 82.24395857307249, "grad_norm": 0.8473442196846008, "learning_rate": 0.00035512082853855005, "loss": 0.3023, "step": 285880 }, { "epoch": 82.24683544303798, "grad_norm": 1.1530866622924805, "learning_rate": 0.0003550632911392405, "loss": 0.3114, "step": 285890 }, { "epoch": 82.24971231300346, "grad_norm": 0.648206889629364, "learning_rate": 0.0003550057537399309, "loss": 0.2657, "step": 285900 }, { "epoch": 82.25258918296893, "grad_norm": 1.3015729188919067, "learning_rate": 0.0003549482163406214, "loss": 0.3247, "step": 285910 }, { "epoch": 82.25546605293441, "grad_norm": 1.229089617729187, "learning_rate": 0.0003548906789413119, "loss": 0.3306, "step": 285920 }, { "epoch": 82.25834292289989, "grad_norm": 0.9204375743865967, "learning_rate": 0.0003548331415420023, "loss": 0.511, "step": 285930 }, { "epoch": 82.26121979286536, "grad_norm": 1.1261507272720337, "learning_rate": 0.0003547756041426928, "loss": 0.3273, "step": 285940 }, { "epoch": 82.26409666283084, "grad_norm": 0.889037549495697, "learning_rate": 0.0003547180667433832, "loss": 0.286, "step": 285950 }, { "epoch": 82.26697353279631, "grad_norm": 1.7935943603515625, "learning_rate": 0.00035466052934407364, "loss": 0.2802, "step": 285960 }, { "epoch": 82.26985040276179, "grad_norm": 1.7538671493530273, "learning_rate": 0.0003546029919447641, "loss": 0.3563, "step": 285970 }, { "epoch": 82.27272727272727, "grad_norm": 1.1005338430404663, "learning_rate": 0.00035454545454545455, "loss": 0.371, "step": 285980 }, { "epoch": 82.27560414269276, "grad_norm": 1.3273464441299438, "learning_rate": 0.00035448791714614496, "loss": 0.3146, "step": 285990 }, { "epoch": 82.27848101265823, "grad_norm": 1.7032192945480347, "learning_rate": 0.00035443037974683546, "loss": 0.3717, "step": 286000 }, { "epoch": 82.28135788262371, "grad_norm": 1.0082676410675049, "learning_rate": 0.0003543728423475259, "loss": 0.2897, "step": 286010 }, { "epoch": 82.28423475258919, "grad_norm": 0.9707586765289307, "learning_rate": 0.0003543153049482163, "loss": 0.2792, "step": 286020 }, { "epoch": 82.28711162255466, "grad_norm": 1.357828140258789, "learning_rate": 0.00035425776754890683, "loss": 0.2883, "step": 286030 }, { "epoch": 82.28998849252014, "grad_norm": 1.3959758281707764, "learning_rate": 0.00035420023014959723, "loss": 0.3061, "step": 286040 }, { "epoch": 82.29286536248561, "grad_norm": 1.0695801973342896, "learning_rate": 0.0003541426927502877, "loss": 0.3657, "step": 286050 }, { "epoch": 82.29574223245109, "grad_norm": 0.8860068321228027, "learning_rate": 0.00035408515535097814, "loss": 0.3452, "step": 286060 }, { "epoch": 82.29861910241657, "grad_norm": 0.9515308737754822, "learning_rate": 0.0003540276179516686, "loss": 0.3426, "step": 286070 }, { "epoch": 82.30149597238204, "grad_norm": 1.5511720180511475, "learning_rate": 0.000353970080552359, "loss": 0.2885, "step": 286080 }, { "epoch": 82.30437284234753, "grad_norm": 1.7402307987213135, "learning_rate": 0.0003539125431530495, "loss": 0.3494, "step": 286090 }, { "epoch": 82.30724971231301, "grad_norm": 1.3729904890060425, "learning_rate": 0.0003538550057537399, "loss": 0.3231, "step": 286100 }, { "epoch": 82.31012658227849, "grad_norm": 0.880244255065918, "learning_rate": 0.00035379746835443037, "loss": 0.336, "step": 286110 }, { "epoch": 82.31300345224396, "grad_norm": 1.028534173965454, "learning_rate": 0.0003537399309551209, "loss": 0.405, "step": 286120 }, { "epoch": 82.31588032220944, "grad_norm": 1.1188803911209106, "learning_rate": 0.0003536823935558113, "loss": 0.3223, "step": 286130 }, { "epoch": 82.31875719217491, "grad_norm": 0.6580283045768738, "learning_rate": 0.00035362485615650173, "loss": 0.3404, "step": 286140 }, { "epoch": 82.32163406214039, "grad_norm": 1.0944541692733765, "learning_rate": 0.0003535673187571922, "loss": 0.3587, "step": 286150 }, { "epoch": 82.32451093210587, "grad_norm": 0.8128039836883545, "learning_rate": 0.00035350978135788264, "loss": 0.304, "step": 286160 }, { "epoch": 82.32738780207134, "grad_norm": 2.4138331413269043, "learning_rate": 0.00035345224395857305, "loss": 0.3772, "step": 286170 }, { "epoch": 82.33026467203682, "grad_norm": 0.8943443894386292, "learning_rate": 0.00035339470655926356, "loss": 0.345, "step": 286180 }, { "epoch": 82.3331415420023, "grad_norm": 3.7550315856933594, "learning_rate": 0.00035333716915995396, "loss": 0.3662, "step": 286190 }, { "epoch": 82.33601841196779, "grad_norm": 2.2461776733398438, "learning_rate": 0.0003532796317606444, "loss": 0.3325, "step": 286200 }, { "epoch": 82.33889528193326, "grad_norm": 1.1704741716384888, "learning_rate": 0.0003532220943613349, "loss": 0.2733, "step": 286210 }, { "epoch": 82.34177215189874, "grad_norm": 1.0754300355911255, "learning_rate": 0.0003531645569620253, "loss": 0.3571, "step": 286220 }, { "epoch": 82.34464902186421, "grad_norm": 2.3119943141937256, "learning_rate": 0.0003531070195627158, "loss": 0.3944, "step": 286230 }, { "epoch": 82.34752589182969, "grad_norm": 1.1624633073806763, "learning_rate": 0.00035304948216340623, "loss": 0.3077, "step": 286240 }, { "epoch": 82.35040276179517, "grad_norm": 1.1844055652618408, "learning_rate": 0.0003529919447640967, "loss": 0.3111, "step": 286250 }, { "epoch": 82.35327963176064, "grad_norm": 1.8038915395736694, "learning_rate": 0.0003529344073647871, "loss": 0.3619, "step": 286260 }, { "epoch": 82.35615650172612, "grad_norm": 1.6922168731689453, "learning_rate": 0.0003528768699654776, "loss": 0.3948, "step": 286270 }, { "epoch": 82.3590333716916, "grad_norm": 1.229703426361084, "learning_rate": 0.000352819332566168, "loss": 0.3814, "step": 286280 }, { "epoch": 82.36191024165707, "grad_norm": 0.9340901374816895, "learning_rate": 0.00035276179516685846, "loss": 0.3107, "step": 286290 }, { "epoch": 82.36478711162256, "grad_norm": 1.9105632305145264, "learning_rate": 0.00035270425776754897, "loss": 0.3864, "step": 286300 }, { "epoch": 82.36766398158804, "grad_norm": 2.0638060569763184, "learning_rate": 0.00035264672036823937, "loss": 0.3313, "step": 286310 }, { "epoch": 82.37054085155351, "grad_norm": 1.8796614408493042, "learning_rate": 0.0003525891829689298, "loss": 0.3111, "step": 286320 }, { "epoch": 82.37341772151899, "grad_norm": 1.0500495433807373, "learning_rate": 0.0003525316455696202, "loss": 0.3375, "step": 286330 }, { "epoch": 82.37629459148447, "grad_norm": 1.3963873386383057, "learning_rate": 0.00035247410817031074, "loss": 0.37, "step": 286340 }, { "epoch": 82.37917146144994, "grad_norm": 2.217574119567871, "learning_rate": 0.00035241657077100114, "loss": 0.3593, "step": 286350 }, { "epoch": 82.38204833141542, "grad_norm": 2.0293514728546143, "learning_rate": 0.0003523590333716916, "loss": 0.3139, "step": 286360 }, { "epoch": 82.3849252013809, "grad_norm": 1.59402334690094, "learning_rate": 0.00035230149597238205, "loss": 0.3937, "step": 286370 }, { "epoch": 82.38780207134637, "grad_norm": 0.9727925062179565, "learning_rate": 0.0003522439585730725, "loss": 0.3582, "step": 286380 }, { "epoch": 82.39067894131185, "grad_norm": 1.2909061908721924, "learning_rate": 0.0003521864211737629, "loss": 0.3603, "step": 286390 }, { "epoch": 82.39355581127732, "grad_norm": 1.4946706295013428, "learning_rate": 0.0003521288837744534, "loss": 0.3892, "step": 286400 }, { "epoch": 82.39643268124281, "grad_norm": 1.8905104398727417, "learning_rate": 0.00035207134637514387, "loss": 0.3639, "step": 286410 }, { "epoch": 82.39930955120829, "grad_norm": 0.5279074907302856, "learning_rate": 0.00035201380897583427, "loss": 0.4066, "step": 286420 }, { "epoch": 82.40218642117377, "grad_norm": 0.7513427734375, "learning_rate": 0.0003519562715765248, "loss": 0.3415, "step": 286430 }, { "epoch": 82.40506329113924, "grad_norm": 1.2100131511688232, "learning_rate": 0.0003518987341772152, "loss": 0.2867, "step": 286440 }, { "epoch": 82.40794016110472, "grad_norm": 1.0534850358963013, "learning_rate": 0.00035184119677790564, "loss": 0.3462, "step": 286450 }, { "epoch": 82.4108170310702, "grad_norm": 0.9482805728912354, "learning_rate": 0.0003517836593785961, "loss": 0.3665, "step": 286460 }, { "epoch": 82.41369390103567, "grad_norm": 2.1801183223724365, "learning_rate": 0.00035172612197928655, "loss": 0.4082, "step": 286470 }, { "epoch": 82.41657077100115, "grad_norm": 1.0977193117141724, "learning_rate": 0.00035166858457997695, "loss": 0.3257, "step": 286480 }, { "epoch": 82.41944764096662, "grad_norm": 0.7878903746604919, "learning_rate": 0.00035161104718066746, "loss": 0.2683, "step": 286490 }, { "epoch": 82.4223245109321, "grad_norm": 0.8266456723213196, "learning_rate": 0.0003515535097813579, "loss": 0.3382, "step": 286500 }, { "epoch": 82.42520138089759, "grad_norm": 1.3096288442611694, "learning_rate": 0.0003514959723820483, "loss": 0.281, "step": 286510 }, { "epoch": 82.42807825086307, "grad_norm": 2.121124029159546, "learning_rate": 0.0003514384349827388, "loss": 0.3403, "step": 286520 }, { "epoch": 82.43095512082854, "grad_norm": 0.9061451554298401, "learning_rate": 0.0003513808975834292, "loss": 0.3116, "step": 286530 }, { "epoch": 82.43383199079402, "grad_norm": 1.1411137580871582, "learning_rate": 0.0003513233601841197, "loss": 0.2867, "step": 286540 }, { "epoch": 82.4367088607595, "grad_norm": 2.171273708343506, "learning_rate": 0.00035126582278481014, "loss": 0.3549, "step": 286550 }, { "epoch": 82.43958573072497, "grad_norm": 1.8624374866485596, "learning_rate": 0.0003512082853855006, "loss": 0.3809, "step": 286560 }, { "epoch": 82.44246260069045, "grad_norm": 1.3442363739013672, "learning_rate": 0.000351150747986191, "loss": 0.2767, "step": 286570 }, { "epoch": 82.44533947065592, "grad_norm": 1.445963978767395, "learning_rate": 0.0003510932105868815, "loss": 0.2935, "step": 286580 }, { "epoch": 82.4482163406214, "grad_norm": 1.1308491230010986, "learning_rate": 0.0003510356731875719, "loss": 0.3637, "step": 286590 }, { "epoch": 82.45109321058688, "grad_norm": 1.528023600578308, "learning_rate": 0.00035097813578826236, "loss": 0.4327, "step": 286600 }, { "epoch": 82.45397008055235, "grad_norm": 1.2378405332565308, "learning_rate": 0.00035092059838895287, "loss": 0.3159, "step": 286610 }, { "epoch": 82.45684695051784, "grad_norm": 1.1854498386383057, "learning_rate": 0.00035086306098964327, "loss": 0.3144, "step": 286620 }, { "epoch": 82.45972382048332, "grad_norm": 1.2211071252822876, "learning_rate": 0.00035080552359033373, "loss": 0.3994, "step": 286630 }, { "epoch": 82.4626006904488, "grad_norm": 1.4802035093307495, "learning_rate": 0.0003507479861910242, "loss": 0.4242, "step": 286640 }, { "epoch": 82.46547756041427, "grad_norm": 0.8109785318374634, "learning_rate": 0.00035069044879171464, "loss": 0.4144, "step": 286650 }, { "epoch": 82.46835443037975, "grad_norm": 0.9434641003608704, "learning_rate": 0.00035063291139240504, "loss": 0.3237, "step": 286660 }, { "epoch": 82.47123130034522, "grad_norm": 1.5443406105041504, "learning_rate": 0.00035057537399309555, "loss": 0.3211, "step": 286670 }, { "epoch": 82.4741081703107, "grad_norm": 1.043428659439087, "learning_rate": 0.00035051783659378595, "loss": 0.2888, "step": 286680 }, { "epoch": 82.47698504027618, "grad_norm": 1.427123785018921, "learning_rate": 0.0003504602991944764, "loss": 0.2719, "step": 286690 }, { "epoch": 82.47986191024165, "grad_norm": 1.051188588142395, "learning_rate": 0.0003504027617951669, "loss": 0.2787, "step": 286700 }, { "epoch": 82.48273878020713, "grad_norm": 1.3227508068084717, "learning_rate": 0.0003503452243958573, "loss": 0.2883, "step": 286710 }, { "epoch": 82.48561565017262, "grad_norm": 1.693103313446045, "learning_rate": 0.0003502876869965478, "loss": 0.4255, "step": 286720 }, { "epoch": 82.4884925201381, "grad_norm": 1.031779408454895, "learning_rate": 0.0003502301495972382, "loss": 0.3067, "step": 286730 }, { "epoch": 82.49136939010357, "grad_norm": 1.0179275274276733, "learning_rate": 0.0003501726121979287, "loss": 0.3107, "step": 286740 }, { "epoch": 82.49424626006905, "grad_norm": 1.9942315816879272, "learning_rate": 0.0003501150747986191, "loss": 0.37, "step": 286750 }, { "epoch": 82.49712313003452, "grad_norm": 0.6290258765220642, "learning_rate": 0.00035005753739930954, "loss": 0.3626, "step": 286760 }, { "epoch": 82.5, "grad_norm": 2.1308720111846924, "learning_rate": 0.00035, "loss": 0.3615, "step": 286770 }, { "epoch": 82.50287686996548, "grad_norm": 1.3966360092163086, "learning_rate": 0.00034994246260069045, "loss": 0.3162, "step": 286780 }, { "epoch": 82.50575373993095, "grad_norm": 1.6652861833572388, "learning_rate": 0.0003498849252013809, "loss": 0.2832, "step": 286790 }, { "epoch": 82.50863060989643, "grad_norm": 0.8870100975036621, "learning_rate": 0.00034982738780207136, "loss": 0.3366, "step": 286800 }, { "epoch": 82.5115074798619, "grad_norm": 1.2583626508712769, "learning_rate": 0.0003497698504027618, "loss": 0.3262, "step": 286810 }, { "epoch": 82.51438434982738, "grad_norm": 2.0306320190429688, "learning_rate": 0.0003497123130034522, "loss": 0.3043, "step": 286820 }, { "epoch": 82.51726121979287, "grad_norm": 1.3313438892364502, "learning_rate": 0.00034965477560414273, "loss": 0.3933, "step": 286830 }, { "epoch": 82.52013808975835, "grad_norm": 0.7213799357414246, "learning_rate": 0.00034959723820483313, "loss": 0.3585, "step": 286840 }, { "epoch": 82.52301495972382, "grad_norm": 1.313868522644043, "learning_rate": 0.0003495397008055236, "loss": 0.3018, "step": 286850 }, { "epoch": 82.5258918296893, "grad_norm": 1.590848445892334, "learning_rate": 0.00034948216340621404, "loss": 0.3473, "step": 286860 }, { "epoch": 82.52876869965478, "grad_norm": 1.4569376707077026, "learning_rate": 0.0003494246260069045, "loss": 0.3762, "step": 286870 }, { "epoch": 82.53164556962025, "grad_norm": 1.2223261594772339, "learning_rate": 0.0003493670886075949, "loss": 0.3761, "step": 286880 }, { "epoch": 82.53452243958573, "grad_norm": 1.42673921585083, "learning_rate": 0.0003493095512082854, "loss": 0.3329, "step": 286890 }, { "epoch": 82.5373993095512, "grad_norm": 0.7986379861831665, "learning_rate": 0.00034925201380897586, "loss": 0.3404, "step": 286900 }, { "epoch": 82.54027617951668, "grad_norm": 1.4276907444000244, "learning_rate": 0.00034919447640966627, "loss": 0.404, "step": 286910 }, { "epoch": 82.54315304948216, "grad_norm": 0.8475366234779358, "learning_rate": 0.0003491369390103568, "loss": 0.3892, "step": 286920 }, { "epoch": 82.54602991944765, "grad_norm": 1.4938628673553467, "learning_rate": 0.0003490794016110472, "loss": 0.3058, "step": 286930 }, { "epoch": 82.54890678941312, "grad_norm": 0.7694796323776245, "learning_rate": 0.00034902186421173763, "loss": 0.2962, "step": 286940 }, { "epoch": 82.5517836593786, "grad_norm": 2.2744712829589844, "learning_rate": 0.0003489643268124281, "loss": 0.3941, "step": 286950 }, { "epoch": 82.55466052934408, "grad_norm": 1.44473135471344, "learning_rate": 0.00034890678941311854, "loss": 0.3607, "step": 286960 }, { "epoch": 82.55753739930955, "grad_norm": 0.37296372652053833, "learning_rate": 0.00034884925201380894, "loss": 0.2308, "step": 286970 }, { "epoch": 82.56041426927503, "grad_norm": 1.7959072589874268, "learning_rate": 0.00034879171461449945, "loss": 0.3271, "step": 286980 }, { "epoch": 82.5632911392405, "grad_norm": 1.037650465965271, "learning_rate": 0.0003487341772151899, "loss": 0.33, "step": 286990 }, { "epoch": 82.56616800920598, "grad_norm": 2.0715866088867188, "learning_rate": 0.0003486766398158803, "loss": 0.3864, "step": 287000 }, { "epoch": 82.56904487917146, "grad_norm": 1.1971642971038818, "learning_rate": 0.0003486191024165708, "loss": 0.2944, "step": 287010 }, { "epoch": 82.57192174913693, "grad_norm": 1.4951168298721313, "learning_rate": 0.0003485615650172612, "loss": 0.3065, "step": 287020 }, { "epoch": 82.57479861910241, "grad_norm": 1.6343175172805786, "learning_rate": 0.0003485040276179517, "loss": 0.3319, "step": 287030 }, { "epoch": 82.5776754890679, "grad_norm": 1.0191047191619873, "learning_rate": 0.00034844649021864213, "loss": 0.3173, "step": 287040 }, { "epoch": 82.58055235903338, "grad_norm": 1.7761709690093994, "learning_rate": 0.0003483889528193326, "loss": 0.4431, "step": 287050 }, { "epoch": 82.58342922899885, "grad_norm": 1.322606086730957, "learning_rate": 0.000348331415420023, "loss": 0.432, "step": 287060 }, { "epoch": 82.58630609896433, "grad_norm": 1.474435806274414, "learning_rate": 0.0003482738780207135, "loss": 0.3616, "step": 287070 }, { "epoch": 82.5891829689298, "grad_norm": 1.2425239086151123, "learning_rate": 0.0003482163406214039, "loss": 0.3332, "step": 287080 }, { "epoch": 82.59205983889528, "grad_norm": 1.9442845582962036, "learning_rate": 0.00034815880322209436, "loss": 0.3979, "step": 287090 }, { "epoch": 82.59493670886076, "grad_norm": 0.6100054979324341, "learning_rate": 0.00034810126582278487, "loss": 0.2809, "step": 287100 }, { "epoch": 82.59781357882623, "grad_norm": 1.3197364807128906, "learning_rate": 0.00034804372842347527, "loss": 0.3486, "step": 287110 }, { "epoch": 82.60069044879171, "grad_norm": 1.3060622215270996, "learning_rate": 0.0003479861910241657, "loss": 0.3256, "step": 287120 }, { "epoch": 82.60356731875719, "grad_norm": 0.7633944153785706, "learning_rate": 0.0003479286536248562, "loss": 0.3158, "step": 287130 }, { "epoch": 82.60644418872268, "grad_norm": 0.972688615322113, "learning_rate": 0.00034787111622554663, "loss": 0.3225, "step": 287140 }, { "epoch": 82.60932105868815, "grad_norm": 0.8177304863929749, "learning_rate": 0.00034781357882623703, "loss": 0.2773, "step": 287150 }, { "epoch": 82.61219792865363, "grad_norm": 1.372871994972229, "learning_rate": 0.0003477560414269275, "loss": 0.3638, "step": 287160 }, { "epoch": 82.6150747986191, "grad_norm": 1.3716084957122803, "learning_rate": 0.00034769850402761795, "loss": 0.413, "step": 287170 }, { "epoch": 82.61795166858458, "grad_norm": 1.4286760091781616, "learning_rate": 0.0003476409666283084, "loss": 0.3841, "step": 287180 }, { "epoch": 82.62082853855006, "grad_norm": 0.9940195679664612, "learning_rate": 0.00034758342922899886, "loss": 0.2559, "step": 287190 }, { "epoch": 82.62370540851553, "grad_norm": 1.3320016860961914, "learning_rate": 0.0003475258918296893, "loss": 0.3416, "step": 287200 }, { "epoch": 82.62658227848101, "grad_norm": 0.9146090149879456, "learning_rate": 0.00034746835443037977, "loss": 0.3052, "step": 287210 }, { "epoch": 82.62945914844649, "grad_norm": 0.9360948801040649, "learning_rate": 0.00034741081703107017, "loss": 0.3439, "step": 287220 }, { "epoch": 82.63233601841196, "grad_norm": 1.039498209953308, "learning_rate": 0.0003473532796317607, "loss": 0.319, "step": 287230 }, { "epoch": 82.63521288837744, "grad_norm": 0.8834120631217957, "learning_rate": 0.0003472957422324511, "loss": 0.3976, "step": 287240 }, { "epoch": 82.63808975834293, "grad_norm": 1.789868712425232, "learning_rate": 0.00034723820483314154, "loss": 0.3472, "step": 287250 }, { "epoch": 82.6409666283084, "grad_norm": 1.007699966430664, "learning_rate": 0.000347180667433832, "loss": 0.3595, "step": 287260 }, { "epoch": 82.64384349827388, "grad_norm": 2.0603530406951904, "learning_rate": 0.00034712313003452245, "loss": 0.3485, "step": 287270 }, { "epoch": 82.64672036823936, "grad_norm": 0.8020861744880676, "learning_rate": 0.00034706559263521285, "loss": 0.4173, "step": 287280 }, { "epoch": 82.64959723820483, "grad_norm": 2.5499629974365234, "learning_rate": 0.00034700805523590336, "loss": 0.3819, "step": 287290 }, { "epoch": 82.65247410817031, "grad_norm": 0.9148259162902832, "learning_rate": 0.0003469505178365938, "loss": 0.3563, "step": 287300 }, { "epoch": 82.65535097813579, "grad_norm": 0.9647265076637268, "learning_rate": 0.0003468929804372842, "loss": 0.3503, "step": 287310 }, { "epoch": 82.65822784810126, "grad_norm": 0.7416698932647705, "learning_rate": 0.0003468354430379747, "loss": 0.3748, "step": 287320 }, { "epoch": 82.66110471806674, "grad_norm": 1.372431993484497, "learning_rate": 0.0003467779056386651, "loss": 0.389, "step": 287330 }, { "epoch": 82.66398158803221, "grad_norm": 0.713890552520752, "learning_rate": 0.0003467203682393556, "loss": 0.281, "step": 287340 }, { "epoch": 82.6668584579977, "grad_norm": 0.8808153867721558, "learning_rate": 0.00034666283084004604, "loss": 0.2979, "step": 287350 }, { "epoch": 82.66973532796318, "grad_norm": 1.5557332038879395, "learning_rate": 0.0003466052934407365, "loss": 0.4075, "step": 287360 }, { "epoch": 82.67261219792866, "grad_norm": 2.240436553955078, "learning_rate": 0.0003465477560414269, "loss": 0.3288, "step": 287370 }, { "epoch": 82.67548906789413, "grad_norm": 1.1887962818145752, "learning_rate": 0.0003464902186421174, "loss": 0.3054, "step": 287380 }, { "epoch": 82.67836593785961, "grad_norm": 1.9786759614944458, "learning_rate": 0.00034643268124280786, "loss": 0.2752, "step": 287390 }, { "epoch": 82.68124280782509, "grad_norm": 1.2105365991592407, "learning_rate": 0.00034637514384349826, "loss": 0.2944, "step": 287400 }, { "epoch": 82.68411967779056, "grad_norm": 0.9082688689231873, "learning_rate": 0.00034631760644418877, "loss": 0.3705, "step": 287410 }, { "epoch": 82.68699654775604, "grad_norm": 2.983058452606201, "learning_rate": 0.00034626006904487917, "loss": 0.3219, "step": 287420 }, { "epoch": 82.68987341772151, "grad_norm": 1.7897776365280151, "learning_rate": 0.0003462025316455696, "loss": 0.3533, "step": 287430 }, { "epoch": 82.69275028768699, "grad_norm": 0.9361495971679688, "learning_rate": 0.0003461449942462601, "loss": 0.3395, "step": 287440 }, { "epoch": 82.69562715765247, "grad_norm": 0.8103236556053162, "learning_rate": 0.00034608745684695054, "loss": 0.3611, "step": 287450 }, { "epoch": 82.69850402761796, "grad_norm": 1.3262856006622314, "learning_rate": 0.00034602991944764094, "loss": 0.4203, "step": 287460 }, { "epoch": 82.70138089758343, "grad_norm": 3.169907569885254, "learning_rate": 0.00034597238204833145, "loss": 0.354, "step": 287470 }, { "epoch": 82.70425776754891, "grad_norm": 0.8971473574638367, "learning_rate": 0.0003459148446490219, "loss": 0.336, "step": 287480 }, { "epoch": 82.70713463751439, "grad_norm": 1.2638962268829346, "learning_rate": 0.0003458573072497123, "loss": 0.3202, "step": 287490 }, { "epoch": 82.71001150747986, "grad_norm": 1.280936598777771, "learning_rate": 0.0003457997698504028, "loss": 0.3367, "step": 287500 }, { "epoch": 82.71288837744534, "grad_norm": 0.9152662754058838, "learning_rate": 0.0003457422324510932, "loss": 0.374, "step": 287510 }, { "epoch": 82.71576524741081, "grad_norm": 1.6603024005889893, "learning_rate": 0.00034568469505178367, "loss": 0.4204, "step": 287520 }, { "epoch": 82.71864211737629, "grad_norm": 0.7816804647445679, "learning_rate": 0.0003456271576524741, "loss": 0.3028, "step": 287530 }, { "epoch": 82.72151898734177, "grad_norm": 1.3127033710479736, "learning_rate": 0.0003455696202531646, "loss": 0.3212, "step": 287540 }, { "epoch": 82.72439585730724, "grad_norm": 1.3872833251953125, "learning_rate": 0.000345512082853855, "loss": 0.3222, "step": 287550 }, { "epoch": 82.72727272727273, "grad_norm": 1.420669436454773, "learning_rate": 0.00034545454545454544, "loss": 0.2997, "step": 287560 }, { "epoch": 82.73014959723821, "grad_norm": 1.7158808708190918, "learning_rate": 0.0003453970080552359, "loss": 0.3026, "step": 287570 }, { "epoch": 82.73302646720369, "grad_norm": 0.5131593942642212, "learning_rate": 0.00034533947065592635, "loss": 0.3829, "step": 287580 }, { "epoch": 82.73590333716916, "grad_norm": 1.2289265394210815, "learning_rate": 0.0003452819332566168, "loss": 0.3391, "step": 287590 }, { "epoch": 82.73878020713464, "grad_norm": 1.7933443784713745, "learning_rate": 0.00034522439585730726, "loss": 0.3598, "step": 287600 }, { "epoch": 82.74165707710011, "grad_norm": 2.0302140712738037, "learning_rate": 0.0003451668584579977, "loss": 0.3574, "step": 287610 }, { "epoch": 82.74453394706559, "grad_norm": 1.0536366701126099, "learning_rate": 0.0003451093210586881, "loss": 0.4391, "step": 287620 }, { "epoch": 82.74741081703107, "grad_norm": 1.270325779914856, "learning_rate": 0.00034505178365937863, "loss": 0.4202, "step": 287630 }, { "epoch": 82.75028768699654, "grad_norm": 0.9952666163444519, "learning_rate": 0.00034499424626006903, "loss": 0.267, "step": 287640 }, { "epoch": 82.75316455696202, "grad_norm": 0.7254078388214111, "learning_rate": 0.0003449367088607595, "loss": 0.2744, "step": 287650 }, { "epoch": 82.75604142692751, "grad_norm": 1.393500566482544, "learning_rate": 0.00034487917146144994, "loss": 0.4228, "step": 287660 }, { "epoch": 82.75891829689299, "grad_norm": 0.8635119795799255, "learning_rate": 0.0003448216340621404, "loss": 0.3548, "step": 287670 }, { "epoch": 82.76179516685846, "grad_norm": 1.5713602304458618, "learning_rate": 0.00034476409666283085, "loss": 0.4049, "step": 287680 }, { "epoch": 82.76467203682394, "grad_norm": 1.0790984630584717, "learning_rate": 0.0003447065592635213, "loss": 0.3414, "step": 287690 }, { "epoch": 82.76754890678941, "grad_norm": 0.8510269522666931, "learning_rate": 0.00034464902186421176, "loss": 0.2888, "step": 287700 }, { "epoch": 82.77042577675489, "grad_norm": 1.8011739253997803, "learning_rate": 0.00034459148446490216, "loss": 0.409, "step": 287710 }, { "epoch": 82.77330264672037, "grad_norm": 2.436919689178467, "learning_rate": 0.0003445339470655927, "loss": 0.3614, "step": 287720 }, { "epoch": 82.77617951668584, "grad_norm": 1.4720520973205566, "learning_rate": 0.0003444764096662831, "loss": 0.3663, "step": 287730 }, { "epoch": 82.77905638665132, "grad_norm": 1.3572741746902466, "learning_rate": 0.00034441887226697353, "loss": 0.4235, "step": 287740 }, { "epoch": 82.7819332566168, "grad_norm": 1.562051773071289, "learning_rate": 0.000344361334867664, "loss": 0.3046, "step": 287750 }, { "epoch": 82.78481012658227, "grad_norm": 1.2659698724746704, "learning_rate": 0.00034430379746835444, "loss": 0.3413, "step": 287760 }, { "epoch": 82.78768699654776, "grad_norm": 1.540043830871582, "learning_rate": 0.00034424626006904484, "loss": 0.5052, "step": 287770 }, { "epoch": 82.79056386651324, "grad_norm": 0.8496208190917969, "learning_rate": 0.00034418872266973535, "loss": 0.3385, "step": 287780 }, { "epoch": 82.79344073647871, "grad_norm": 1.7575666904449463, "learning_rate": 0.0003441311852704258, "loss": 0.3635, "step": 287790 }, { "epoch": 82.79631760644419, "grad_norm": 0.855925440788269, "learning_rate": 0.0003440736478711162, "loss": 0.305, "step": 287800 }, { "epoch": 82.79919447640967, "grad_norm": 0.9253110289573669, "learning_rate": 0.0003440161104718067, "loss": 0.3226, "step": 287810 }, { "epoch": 82.80207134637514, "grad_norm": 1.0202468633651733, "learning_rate": 0.0003439585730724971, "loss": 0.3035, "step": 287820 }, { "epoch": 82.80494821634062, "grad_norm": 1.1473768949508667, "learning_rate": 0.0003439010356731876, "loss": 0.3112, "step": 287830 }, { "epoch": 82.8078250863061, "grad_norm": 1.177724838256836, "learning_rate": 0.00034384349827387803, "loss": 0.3245, "step": 287840 }, { "epoch": 82.81070195627157, "grad_norm": 2.3918912410736084, "learning_rate": 0.0003437859608745685, "loss": 0.3471, "step": 287850 }, { "epoch": 82.81357882623705, "grad_norm": 1.1236928701400757, "learning_rate": 0.0003437284234752589, "loss": 0.3653, "step": 287860 }, { "epoch": 82.81645569620254, "grad_norm": 1.350961685180664, "learning_rate": 0.0003436708860759494, "loss": 0.3538, "step": 287870 }, { "epoch": 82.81933256616801, "grad_norm": 0.787726640701294, "learning_rate": 0.00034361334867663985, "loss": 0.3207, "step": 287880 }, { "epoch": 82.82220943613349, "grad_norm": 1.5642307996749878, "learning_rate": 0.00034355581127733025, "loss": 0.3046, "step": 287890 }, { "epoch": 82.82508630609897, "grad_norm": 1.5460017919540405, "learning_rate": 0.00034349827387802076, "loss": 0.3776, "step": 287900 }, { "epoch": 82.82796317606444, "grad_norm": 1.392378330230713, "learning_rate": 0.00034344073647871116, "loss": 0.3017, "step": 287910 }, { "epoch": 82.83084004602992, "grad_norm": 0.693242609500885, "learning_rate": 0.0003433831990794016, "loss": 0.2897, "step": 287920 }, { "epoch": 82.8337169159954, "grad_norm": 1.226028561592102, "learning_rate": 0.0003433256616800921, "loss": 0.3435, "step": 287930 }, { "epoch": 82.83659378596087, "grad_norm": 1.225501537322998, "learning_rate": 0.00034326812428078253, "loss": 0.3193, "step": 287940 }, { "epoch": 82.83947065592635, "grad_norm": 1.1057440042495728, "learning_rate": 0.00034321058688147293, "loss": 0.4118, "step": 287950 }, { "epoch": 82.84234752589182, "grad_norm": 1.1635513305664062, "learning_rate": 0.0003431530494821634, "loss": 0.3937, "step": 287960 }, { "epoch": 82.8452243958573, "grad_norm": 1.6563947200775146, "learning_rate": 0.0003430955120828539, "loss": 0.3127, "step": 287970 }, { "epoch": 82.84810126582279, "grad_norm": 0.8954085111618042, "learning_rate": 0.0003430379746835443, "loss": 0.3937, "step": 287980 }, { "epoch": 82.85097813578827, "grad_norm": 0.9246358275413513, "learning_rate": 0.00034298043728423475, "loss": 0.3054, "step": 287990 }, { "epoch": 82.85385500575374, "grad_norm": 1.2684009075164795, "learning_rate": 0.0003429228998849252, "loss": 0.3645, "step": 288000 }, { "epoch": 82.85673187571922, "grad_norm": 1.7657185792922974, "learning_rate": 0.00034286536248561567, "loss": 0.3852, "step": 288010 }, { "epoch": 82.8596087456847, "grad_norm": 1.0401976108551025, "learning_rate": 0.00034280782508630607, "loss": 0.3269, "step": 288020 }, { "epoch": 82.86248561565017, "grad_norm": 1.1107187271118164, "learning_rate": 0.0003427502876869966, "loss": 0.316, "step": 288030 }, { "epoch": 82.86536248561565, "grad_norm": 0.6152812838554382, "learning_rate": 0.000342692750287687, "loss": 0.2863, "step": 288040 }, { "epoch": 82.86823935558112, "grad_norm": 0.6316617727279663, "learning_rate": 0.00034263521288837743, "loss": 0.3226, "step": 288050 }, { "epoch": 82.8711162255466, "grad_norm": 0.8656253218650818, "learning_rate": 0.0003425776754890679, "loss": 0.3134, "step": 288060 }, { "epoch": 82.87399309551208, "grad_norm": 1.546993613243103, "learning_rate": 0.00034252013808975834, "loss": 0.3675, "step": 288070 }, { "epoch": 82.87686996547757, "grad_norm": 1.0735812187194824, "learning_rate": 0.0003424626006904488, "loss": 0.336, "step": 288080 }, { "epoch": 82.87974683544304, "grad_norm": 1.0909944772720337, "learning_rate": 0.00034240506329113926, "loss": 0.4061, "step": 288090 }, { "epoch": 82.88262370540852, "grad_norm": 1.6427135467529297, "learning_rate": 0.0003423475258918297, "loss": 0.3524, "step": 288100 }, { "epoch": 82.885500575374, "grad_norm": 1.135094165802002, "learning_rate": 0.0003422899884925201, "loss": 0.382, "step": 288110 }, { "epoch": 82.88837744533947, "grad_norm": 0.9591743350028992, "learning_rate": 0.0003422324510932106, "loss": 0.3311, "step": 288120 }, { "epoch": 82.89125431530495, "grad_norm": 1.3185014724731445, "learning_rate": 0.000342174913693901, "loss": 0.3244, "step": 288130 }, { "epoch": 82.89413118527042, "grad_norm": 0.6400561332702637, "learning_rate": 0.0003421173762945915, "loss": 0.3193, "step": 288140 }, { "epoch": 82.8970080552359, "grad_norm": 1.497531771659851, "learning_rate": 0.00034205983889528193, "loss": 0.3858, "step": 288150 }, { "epoch": 82.89988492520138, "grad_norm": 1.9101759195327759, "learning_rate": 0.0003420023014959724, "loss": 0.3783, "step": 288160 }, { "epoch": 82.90276179516685, "grad_norm": 0.808768093585968, "learning_rate": 0.00034194476409666285, "loss": 0.3484, "step": 288170 }, { "epoch": 82.90563866513233, "grad_norm": 0.8970578908920288, "learning_rate": 0.0003418872266973533, "loss": 0.351, "step": 288180 }, { "epoch": 82.90851553509782, "grad_norm": 0.7961899042129517, "learning_rate": 0.00034182968929804376, "loss": 0.2908, "step": 288190 }, { "epoch": 82.9113924050633, "grad_norm": 1.164513111114502, "learning_rate": 0.00034177215189873416, "loss": 0.3306, "step": 288200 }, { "epoch": 82.91426927502877, "grad_norm": 1.2493032217025757, "learning_rate": 0.00034171461449942467, "loss": 0.2979, "step": 288210 }, { "epoch": 82.91714614499425, "grad_norm": 1.881939172744751, "learning_rate": 0.00034165707710011507, "loss": 0.3985, "step": 288220 }, { "epoch": 82.92002301495972, "grad_norm": 1.5502859354019165, "learning_rate": 0.0003415995397008055, "loss": 0.3603, "step": 288230 }, { "epoch": 82.9228998849252, "grad_norm": 1.256394386291504, "learning_rate": 0.000341542002301496, "loss": 0.3728, "step": 288240 }, { "epoch": 82.92577675489068, "grad_norm": 1.3456485271453857, "learning_rate": 0.00034148446490218644, "loss": 0.3074, "step": 288250 }, { "epoch": 82.92865362485615, "grad_norm": 1.5822840929031372, "learning_rate": 0.00034142692750287684, "loss": 0.3239, "step": 288260 }, { "epoch": 82.93153049482163, "grad_norm": 0.826054573059082, "learning_rate": 0.00034136939010356735, "loss": 0.3161, "step": 288270 }, { "epoch": 82.9344073647871, "grad_norm": 0.698940634727478, "learning_rate": 0.0003413118527042578, "loss": 0.2829, "step": 288280 }, { "epoch": 82.9372842347526, "grad_norm": 1.0045924186706543, "learning_rate": 0.0003412543153049482, "loss": 0.296, "step": 288290 }, { "epoch": 82.94016110471807, "grad_norm": 0.9130937457084656, "learning_rate": 0.0003411967779056387, "loss": 0.3469, "step": 288300 }, { "epoch": 82.94303797468355, "grad_norm": 0.9243244528770447, "learning_rate": 0.0003411392405063291, "loss": 0.336, "step": 288310 }, { "epoch": 82.94591484464902, "grad_norm": 1.058980941772461, "learning_rate": 0.00034108170310701957, "loss": 0.3374, "step": 288320 }, { "epoch": 82.9487917146145, "grad_norm": 1.2089741230010986, "learning_rate": 0.00034102416570771, "loss": 0.3538, "step": 288330 }, { "epoch": 82.95166858457998, "grad_norm": 1.3073456287384033, "learning_rate": 0.0003409666283084005, "loss": 0.3585, "step": 288340 }, { "epoch": 82.95454545454545, "grad_norm": 1.2541635036468506, "learning_rate": 0.0003409090909090909, "loss": 0.403, "step": 288350 }, { "epoch": 82.95742232451093, "grad_norm": 0.9257906675338745, "learning_rate": 0.0003408515535097814, "loss": 0.3922, "step": 288360 }, { "epoch": 82.9602991944764, "grad_norm": 0.568983793258667, "learning_rate": 0.00034079401611047185, "loss": 0.2569, "step": 288370 }, { "epoch": 82.96317606444188, "grad_norm": 0.8540911674499512, "learning_rate": 0.00034073647871116225, "loss": 0.325, "step": 288380 }, { "epoch": 82.96605293440736, "grad_norm": 1.9813692569732666, "learning_rate": 0.0003406789413118527, "loss": 0.3622, "step": 288390 }, { "epoch": 82.96892980437285, "grad_norm": 1.295566201210022, "learning_rate": 0.00034062140391254316, "loss": 0.294, "step": 288400 }, { "epoch": 82.97180667433832, "grad_norm": 1.4661312103271484, "learning_rate": 0.0003405638665132336, "loss": 0.3472, "step": 288410 }, { "epoch": 82.9746835443038, "grad_norm": 0.9467505216598511, "learning_rate": 0.000340506329113924, "loss": 0.3562, "step": 288420 }, { "epoch": 82.97756041426928, "grad_norm": 1.0551772117614746, "learning_rate": 0.0003404487917146145, "loss": 0.3845, "step": 288430 }, { "epoch": 82.98043728423475, "grad_norm": 1.732661485671997, "learning_rate": 0.0003403912543153049, "loss": 0.3608, "step": 288440 }, { "epoch": 82.98331415420023, "grad_norm": 1.24614679813385, "learning_rate": 0.0003403337169159954, "loss": 0.3429, "step": 288450 }, { "epoch": 82.9861910241657, "grad_norm": 1.7921786308288574, "learning_rate": 0.0003402761795166859, "loss": 0.3602, "step": 288460 }, { "epoch": 82.98906789413118, "grad_norm": 0.7753949165344238, "learning_rate": 0.0003402186421173763, "loss": 0.3422, "step": 288470 }, { "epoch": 82.99194476409666, "grad_norm": 0.9366012811660767, "learning_rate": 0.00034016110471806675, "loss": 0.3569, "step": 288480 }, { "epoch": 82.99482163406213, "grad_norm": 2.1544454097747803, "learning_rate": 0.0003401035673187572, "loss": 0.3668, "step": 288490 }, { "epoch": 82.99769850402762, "grad_norm": 2.4644222259521484, "learning_rate": 0.00034004602991944766, "loss": 0.3847, "step": 288500 }, { "epoch": 83.0005753739931, "grad_norm": 0.8619434237480164, "learning_rate": 0.00033998849252013806, "loss": 0.2785, "step": 288510 }, { "epoch": 83.00345224395858, "grad_norm": 1.1751189231872559, "learning_rate": 0.00033993095512082857, "loss": 0.3645, "step": 288520 }, { "epoch": 83.00632911392405, "grad_norm": 1.1545143127441406, "learning_rate": 0.00033987341772151897, "loss": 0.2879, "step": 288530 }, { "epoch": 83.00920598388953, "grad_norm": 0.6380687952041626, "learning_rate": 0.00033981588032220943, "loss": 0.3726, "step": 288540 }, { "epoch": 83.012082853855, "grad_norm": 1.5193833112716675, "learning_rate": 0.0003397583429228999, "loss": 0.2938, "step": 288550 }, { "epoch": 83.01495972382048, "grad_norm": 1.5770260095596313, "learning_rate": 0.00033970080552359034, "loss": 0.3039, "step": 288560 }, { "epoch": 83.01783659378596, "grad_norm": 0.6856518983840942, "learning_rate": 0.0003396432681242808, "loss": 0.3078, "step": 288570 }, { "epoch": 83.02071346375143, "grad_norm": 1.0564231872558594, "learning_rate": 0.00033958573072497125, "loss": 0.3711, "step": 288580 }, { "epoch": 83.02359033371691, "grad_norm": 1.1875252723693848, "learning_rate": 0.0003395281933256617, "loss": 0.2946, "step": 288590 }, { "epoch": 83.02646720368239, "grad_norm": 1.0494096279144287, "learning_rate": 0.0003394706559263521, "loss": 0.2818, "step": 288600 }, { "epoch": 83.02934407364788, "grad_norm": 1.1474566459655762, "learning_rate": 0.0003394131185270426, "loss": 0.2579, "step": 288610 }, { "epoch": 83.03222094361335, "grad_norm": 0.8196591734886169, "learning_rate": 0.000339355581127733, "loss": 0.3246, "step": 288620 }, { "epoch": 83.03509781357883, "grad_norm": 1.000632405281067, "learning_rate": 0.0003392980437284235, "loss": 0.3536, "step": 288630 }, { "epoch": 83.0379746835443, "grad_norm": 0.9483717083930969, "learning_rate": 0.00033924050632911393, "loss": 0.312, "step": 288640 }, { "epoch": 83.04085155350978, "grad_norm": 0.9982461929321289, "learning_rate": 0.0003391829689298044, "loss": 0.2579, "step": 288650 }, { "epoch": 83.04372842347526, "grad_norm": 0.7748968005180359, "learning_rate": 0.00033912543153049484, "loss": 0.3024, "step": 288660 }, { "epoch": 83.04660529344073, "grad_norm": 1.3155920505523682, "learning_rate": 0.0003390678941311853, "loss": 0.3547, "step": 288670 }, { "epoch": 83.04948216340621, "grad_norm": 1.2363426685333252, "learning_rate": 0.00033901035673187575, "loss": 0.2825, "step": 288680 }, { "epoch": 83.05235903337169, "grad_norm": 1.0864160060882568, "learning_rate": 0.00033895281933256615, "loss": 0.3877, "step": 288690 }, { "epoch": 83.05523590333716, "grad_norm": 1.219896912574768, "learning_rate": 0.00033889528193325666, "loss": 0.3272, "step": 288700 }, { "epoch": 83.05811277330265, "grad_norm": 0.8976195454597473, "learning_rate": 0.00033883774453394706, "loss": 0.3152, "step": 288710 }, { "epoch": 83.06098964326813, "grad_norm": 0.8247678279876709, "learning_rate": 0.0003387802071346375, "loss": 0.2921, "step": 288720 }, { "epoch": 83.0638665132336, "grad_norm": 0.904376745223999, "learning_rate": 0.000338722669735328, "loss": 0.3624, "step": 288730 }, { "epoch": 83.06674338319908, "grad_norm": 0.7352441549301147, "learning_rate": 0.00033866513233601843, "loss": 0.2638, "step": 288740 }, { "epoch": 83.06962025316456, "grad_norm": 1.7073159217834473, "learning_rate": 0.00033860759493670883, "loss": 0.3218, "step": 288750 }, { "epoch": 83.07249712313003, "grad_norm": 0.9409424662590027, "learning_rate": 0.00033855005753739934, "loss": 0.4268, "step": 288760 }, { "epoch": 83.07537399309551, "grad_norm": 2.0908541679382324, "learning_rate": 0.0003384925201380898, "loss": 0.4275, "step": 288770 }, { "epoch": 83.07825086306099, "grad_norm": 1.3005622625350952, "learning_rate": 0.0003384349827387802, "loss": 0.391, "step": 288780 }, { "epoch": 83.08112773302646, "grad_norm": 0.849653422832489, "learning_rate": 0.00033837744533947065, "loss": 0.2895, "step": 288790 }, { "epoch": 83.08400460299194, "grad_norm": 0.8406359553337097, "learning_rate": 0.0003383199079401611, "loss": 0.2817, "step": 288800 }, { "epoch": 83.08688147295742, "grad_norm": 1.520462155342102, "learning_rate": 0.00033826237054085156, "loss": 0.2745, "step": 288810 }, { "epoch": 83.0897583429229, "grad_norm": 1.421831727027893, "learning_rate": 0.00033820483314154197, "loss": 0.3609, "step": 288820 }, { "epoch": 83.09263521288838, "grad_norm": 1.0152006149291992, "learning_rate": 0.0003381472957422325, "loss": 0.2301, "step": 288830 }, { "epoch": 83.09551208285386, "grad_norm": 1.207761287689209, "learning_rate": 0.0003380897583429229, "loss": 0.4573, "step": 288840 }, { "epoch": 83.09838895281933, "grad_norm": 1.5774734020233154, "learning_rate": 0.00033803222094361333, "loss": 0.3042, "step": 288850 }, { "epoch": 83.10126582278481, "grad_norm": 1.6705224514007568, "learning_rate": 0.00033797468354430384, "loss": 0.2906, "step": 288860 }, { "epoch": 83.10414269275029, "grad_norm": 2.3655800819396973, "learning_rate": 0.00033791714614499424, "loss": 0.3398, "step": 288870 }, { "epoch": 83.10701956271576, "grad_norm": 1.7071259021759033, "learning_rate": 0.0003378596087456847, "loss": 0.3246, "step": 288880 }, { "epoch": 83.10989643268124, "grad_norm": 1.107002854347229, "learning_rate": 0.00033780207134637515, "loss": 0.2938, "step": 288890 }, { "epoch": 83.11277330264672, "grad_norm": 0.9948388934135437, "learning_rate": 0.0003377445339470656, "loss": 0.3032, "step": 288900 }, { "epoch": 83.11565017261219, "grad_norm": 1.481788992881775, "learning_rate": 0.000337686996547756, "loss": 0.3833, "step": 288910 }, { "epoch": 83.11852704257768, "grad_norm": 0.8261536359786987, "learning_rate": 0.0003376294591484465, "loss": 0.4238, "step": 288920 }, { "epoch": 83.12140391254316, "grad_norm": 1.1195508241653442, "learning_rate": 0.0003375719217491369, "loss": 0.2842, "step": 288930 }, { "epoch": 83.12428078250863, "grad_norm": 1.1747361421585083, "learning_rate": 0.0003375143843498274, "loss": 0.2492, "step": 288940 }, { "epoch": 83.12715765247411, "grad_norm": 1.1363669633865356, "learning_rate": 0.0003374568469505179, "loss": 0.2494, "step": 288950 }, { "epoch": 83.13003452243959, "grad_norm": 2.445611000061035, "learning_rate": 0.0003373993095512083, "loss": 0.3504, "step": 288960 }, { "epoch": 83.13291139240506, "grad_norm": 1.8081239461898804, "learning_rate": 0.00033734177215189874, "loss": 0.2871, "step": 288970 }, { "epoch": 83.13578826237054, "grad_norm": 1.3930847644805908, "learning_rate": 0.0003372842347525892, "loss": 0.2941, "step": 288980 }, { "epoch": 83.13866513233602, "grad_norm": 1.1239162683486938, "learning_rate": 0.00033722669735327965, "loss": 0.2508, "step": 288990 }, { "epoch": 83.14154200230149, "grad_norm": 1.0703494548797607, "learning_rate": 0.00033716915995397006, "loss": 0.3787, "step": 289000 }, { "epoch": 83.14441887226697, "grad_norm": 1.5103607177734375, "learning_rate": 0.00033711162255466057, "loss": 0.3155, "step": 289010 }, { "epoch": 83.14729574223244, "grad_norm": 1.1940863132476807, "learning_rate": 0.00033705408515535097, "loss": 0.3887, "step": 289020 }, { "epoch": 83.15017261219793, "grad_norm": 1.559455156326294, "learning_rate": 0.0003369965477560414, "loss": 0.3863, "step": 289030 }, { "epoch": 83.15304948216341, "grad_norm": 1.817412257194519, "learning_rate": 0.0003369390103567319, "loss": 0.4267, "step": 289040 }, { "epoch": 83.15592635212889, "grad_norm": 1.8728035688400269, "learning_rate": 0.00033688147295742233, "loss": 0.382, "step": 289050 }, { "epoch": 83.15880322209436, "grad_norm": 1.640334129333496, "learning_rate": 0.0003368239355581128, "loss": 0.3433, "step": 289060 }, { "epoch": 83.16168009205984, "grad_norm": 1.092846155166626, "learning_rate": 0.00033676639815880324, "loss": 0.3856, "step": 289070 }, { "epoch": 83.16455696202532, "grad_norm": 2.301938056945801, "learning_rate": 0.0003367088607594937, "loss": 0.2968, "step": 289080 }, { "epoch": 83.16743383199079, "grad_norm": 1.1662077903747559, "learning_rate": 0.0003366513233601841, "loss": 0.3343, "step": 289090 }, { "epoch": 83.17031070195627, "grad_norm": 0.9957229495048523, "learning_rate": 0.0003365937859608746, "loss": 0.2882, "step": 289100 }, { "epoch": 83.17318757192174, "grad_norm": 1.7252401113510132, "learning_rate": 0.000336536248561565, "loss": 0.3015, "step": 289110 }, { "epoch": 83.17606444188722, "grad_norm": 0.8334817886352539, "learning_rate": 0.00033647871116225547, "loss": 0.348, "step": 289120 }, { "epoch": 83.17894131185271, "grad_norm": 1.122570276260376, "learning_rate": 0.0003364211737629459, "loss": 0.4134, "step": 289130 }, { "epoch": 83.18181818181819, "grad_norm": 2.0967109203338623, "learning_rate": 0.0003363636363636364, "loss": 0.3767, "step": 289140 }, { "epoch": 83.18469505178366, "grad_norm": 0.8382843136787415, "learning_rate": 0.00033630609896432683, "loss": 0.3316, "step": 289150 }, { "epoch": 83.18757192174914, "grad_norm": 1.6098036766052246, "learning_rate": 0.0003362485615650173, "loss": 0.3183, "step": 289160 }, { "epoch": 83.19044879171462, "grad_norm": 1.605280876159668, "learning_rate": 0.00033619102416570775, "loss": 0.3557, "step": 289170 }, { "epoch": 83.19332566168009, "grad_norm": 1.3988007307052612, "learning_rate": 0.00033613348676639815, "loss": 0.326, "step": 289180 }, { "epoch": 83.19620253164557, "grad_norm": 0.9665027856826782, "learning_rate": 0.00033607594936708866, "loss": 0.3526, "step": 289190 }, { "epoch": 83.19907940161104, "grad_norm": 1.8092819452285767, "learning_rate": 0.00033601841196777906, "loss": 0.296, "step": 289200 }, { "epoch": 83.20195627157652, "grad_norm": 1.1429874897003174, "learning_rate": 0.0003359608745684695, "loss": 0.2967, "step": 289210 }, { "epoch": 83.204833141542, "grad_norm": 0.9851952791213989, "learning_rate": 0.0003359033371691599, "loss": 0.3691, "step": 289220 }, { "epoch": 83.20771001150747, "grad_norm": 1.361471176147461, "learning_rate": 0.0003358457997698504, "loss": 0.2681, "step": 289230 }, { "epoch": 83.21058688147296, "grad_norm": 1.2560571432113647, "learning_rate": 0.0003357882623705408, "loss": 0.3613, "step": 289240 }, { "epoch": 83.21346375143844, "grad_norm": 0.9389520287513733, "learning_rate": 0.0003357307249712313, "loss": 0.2537, "step": 289250 }, { "epoch": 83.21634062140392, "grad_norm": 0.6923196911811829, "learning_rate": 0.0003356731875719218, "loss": 0.4001, "step": 289260 }, { "epoch": 83.21921749136939, "grad_norm": 2.5840108394622803, "learning_rate": 0.0003356156501726122, "loss": 0.3537, "step": 289270 }, { "epoch": 83.22209436133487, "grad_norm": 1.0973204374313354, "learning_rate": 0.00033555811277330265, "loss": 0.3199, "step": 289280 }, { "epoch": 83.22497123130034, "grad_norm": 1.0810250043869019, "learning_rate": 0.0003355005753739931, "loss": 0.3713, "step": 289290 }, { "epoch": 83.22784810126582, "grad_norm": 1.401903510093689, "learning_rate": 0.00033544303797468356, "loss": 0.3034, "step": 289300 }, { "epoch": 83.2307249712313, "grad_norm": 0.8071818351745605, "learning_rate": 0.00033538550057537396, "loss": 0.3209, "step": 289310 }, { "epoch": 83.23360184119677, "grad_norm": 0.6410363912582397, "learning_rate": 0.00033532796317606447, "loss": 0.2732, "step": 289320 }, { "epoch": 83.23647871116225, "grad_norm": 1.1207679510116577, "learning_rate": 0.00033527042577675487, "loss": 0.3217, "step": 289330 }, { "epoch": 83.23935558112774, "grad_norm": 1.4164983034133911, "learning_rate": 0.0003352128883774453, "loss": 0.323, "step": 289340 }, { "epoch": 83.24223245109322, "grad_norm": 0.7955042123794556, "learning_rate": 0.00033515535097813584, "loss": 0.326, "step": 289350 }, { "epoch": 83.24510932105869, "grad_norm": 0.5788489580154419, "learning_rate": 0.00033509781357882624, "loss": 0.2603, "step": 289360 }, { "epoch": 83.24798619102417, "grad_norm": 0.8612238168716431, "learning_rate": 0.0003350402761795167, "loss": 0.329, "step": 289370 }, { "epoch": 83.25086306098964, "grad_norm": 0.7949877977371216, "learning_rate": 0.00033498273878020715, "loss": 0.2575, "step": 289380 }, { "epoch": 83.25373993095512, "grad_norm": 1.013489007949829, "learning_rate": 0.0003349252013808976, "loss": 0.3149, "step": 289390 }, { "epoch": 83.2566168009206, "grad_norm": 0.9217557311058044, "learning_rate": 0.000334867663981588, "loss": 0.3323, "step": 289400 }, { "epoch": 83.25949367088607, "grad_norm": 2.550522565841675, "learning_rate": 0.0003348101265822785, "loss": 0.3342, "step": 289410 }, { "epoch": 83.26237054085155, "grad_norm": 0.937248170375824, "learning_rate": 0.0003347525891829689, "loss": 0.3554, "step": 289420 }, { "epoch": 83.26524741081703, "grad_norm": 0.8271678686141968, "learning_rate": 0.00033469505178365937, "loss": 0.33, "step": 289430 }, { "epoch": 83.2681242807825, "grad_norm": 1.6181806325912476, "learning_rate": 0.0003346375143843499, "loss": 0.3241, "step": 289440 }, { "epoch": 83.27100115074799, "grad_norm": 1.0155837535858154, "learning_rate": 0.0003345799769850403, "loss": 0.3688, "step": 289450 }, { "epoch": 83.27387802071347, "grad_norm": 1.3103688955307007, "learning_rate": 0.00033452243958573074, "loss": 0.3327, "step": 289460 }, { "epoch": 83.27675489067894, "grad_norm": 1.5841484069824219, "learning_rate": 0.0003344649021864212, "loss": 0.3432, "step": 289470 }, { "epoch": 83.27963176064442, "grad_norm": 1.0128551721572876, "learning_rate": 0.00033440736478711165, "loss": 0.2783, "step": 289480 }, { "epoch": 83.2825086306099, "grad_norm": 0.7594716548919678, "learning_rate": 0.00033434982738780205, "loss": 0.2922, "step": 289490 }, { "epoch": 83.28538550057537, "grad_norm": 1.7067475318908691, "learning_rate": 0.00033429228998849256, "loss": 0.3013, "step": 289500 }, { "epoch": 83.28826237054085, "grad_norm": 1.0423884391784668, "learning_rate": 0.00033423475258918296, "loss": 0.3036, "step": 289510 }, { "epoch": 83.29113924050633, "grad_norm": 1.592626690864563, "learning_rate": 0.0003341772151898734, "loss": 0.2387, "step": 289520 }, { "epoch": 83.2940161104718, "grad_norm": 0.8217449188232422, "learning_rate": 0.00033411967779056387, "loss": 0.3516, "step": 289530 }, { "epoch": 83.29689298043728, "grad_norm": 1.7420616149902344, "learning_rate": 0.00033406214039125433, "loss": 0.309, "step": 289540 }, { "epoch": 83.29976985040277, "grad_norm": 1.1933993101119995, "learning_rate": 0.0003340046029919448, "loss": 0.2739, "step": 289550 }, { "epoch": 83.30264672036824, "grad_norm": 0.7176331877708435, "learning_rate": 0.00033394706559263524, "loss": 0.2586, "step": 289560 }, { "epoch": 83.30552359033372, "grad_norm": 0.98269122838974, "learning_rate": 0.0003338895281933257, "loss": 0.339, "step": 289570 }, { "epoch": 83.3084004602992, "grad_norm": 1.083878993988037, "learning_rate": 0.0003338319907940161, "loss": 0.2698, "step": 289580 }, { "epoch": 83.31127733026467, "grad_norm": 1.578824758529663, "learning_rate": 0.0003337744533947066, "loss": 0.3641, "step": 289590 }, { "epoch": 83.31415420023015, "grad_norm": 1.2245334386825562, "learning_rate": 0.000333716915995397, "loss": 0.3686, "step": 289600 }, { "epoch": 83.31703107019563, "grad_norm": 0.9165387749671936, "learning_rate": 0.00033365937859608746, "loss": 0.2512, "step": 289610 }, { "epoch": 83.3199079401611, "grad_norm": 1.2032004594802856, "learning_rate": 0.00033360184119677786, "loss": 0.2824, "step": 289620 }, { "epoch": 83.32278481012658, "grad_norm": 0.906330943107605, "learning_rate": 0.0003335443037974684, "loss": 0.3474, "step": 289630 }, { "epoch": 83.32566168009205, "grad_norm": 2.4563534259796143, "learning_rate": 0.00033348676639815883, "loss": 0.3738, "step": 289640 }, { "epoch": 83.32853855005754, "grad_norm": 1.7660551071166992, "learning_rate": 0.00033342922899884923, "loss": 0.4817, "step": 289650 }, { "epoch": 83.33141542002302, "grad_norm": 1.3911585807800293, "learning_rate": 0.00033337169159953974, "loss": 0.3452, "step": 289660 }, { "epoch": 83.3342922899885, "grad_norm": 1.0095806121826172, "learning_rate": 0.00033331415420023014, "loss": 0.3022, "step": 289670 }, { "epoch": 83.33716915995397, "grad_norm": 2.0906074047088623, "learning_rate": 0.0003332566168009206, "loss": 0.3411, "step": 289680 }, { "epoch": 83.34004602991945, "grad_norm": 1.1866666078567505, "learning_rate": 0.00033319907940161105, "loss": 0.278, "step": 289690 }, { "epoch": 83.34292289988493, "grad_norm": 1.8885530233383179, "learning_rate": 0.0003331415420023015, "loss": 0.3117, "step": 289700 }, { "epoch": 83.3457997698504, "grad_norm": 1.3760687112808228, "learning_rate": 0.0003330840046029919, "loss": 0.3177, "step": 289710 }, { "epoch": 83.34867663981588, "grad_norm": 0.8484562039375305, "learning_rate": 0.0003330264672036824, "loss": 0.2941, "step": 289720 }, { "epoch": 83.35155350978135, "grad_norm": 1.076600193977356, "learning_rate": 0.0003329689298043728, "loss": 0.3539, "step": 289730 }, { "epoch": 83.35443037974683, "grad_norm": 0.8021819591522217, "learning_rate": 0.0003329113924050633, "loss": 0.3123, "step": 289740 }, { "epoch": 83.3573072497123, "grad_norm": 0.7725135087966919, "learning_rate": 0.0003328538550057538, "loss": 0.2777, "step": 289750 }, { "epoch": 83.3601841196778, "grad_norm": 1.3909907341003418, "learning_rate": 0.0003327963176064442, "loss": 0.2956, "step": 289760 }, { "epoch": 83.36306098964327, "grad_norm": 1.2069133520126343, "learning_rate": 0.00033273878020713464, "loss": 0.3029, "step": 289770 }, { "epoch": 83.36593785960875, "grad_norm": 0.7504297494888306, "learning_rate": 0.0003326812428078251, "loss": 0.3747, "step": 289780 }, { "epoch": 83.36881472957423, "grad_norm": 1.5458264350891113, "learning_rate": 0.00033262370540851555, "loss": 0.3067, "step": 289790 }, { "epoch": 83.3716915995397, "grad_norm": 0.7134077548980713, "learning_rate": 0.00033256616800920595, "loss": 0.3683, "step": 289800 }, { "epoch": 83.37456846950518, "grad_norm": 0.894859254360199, "learning_rate": 0.00033250863060989646, "loss": 0.3416, "step": 289810 }, { "epoch": 83.37744533947065, "grad_norm": 1.2903019189834595, "learning_rate": 0.00033245109321058686, "loss": 0.2889, "step": 289820 }, { "epoch": 83.38032220943613, "grad_norm": 1.0448311567306519, "learning_rate": 0.0003323935558112773, "loss": 0.3217, "step": 289830 }, { "epoch": 83.3831990794016, "grad_norm": 1.6333069801330566, "learning_rate": 0.00033233601841196783, "loss": 0.3609, "step": 289840 }, { "epoch": 83.38607594936708, "grad_norm": 1.1028982400894165, "learning_rate": 0.00033227848101265823, "loss": 0.3214, "step": 289850 }, { "epoch": 83.38895281933257, "grad_norm": 1.4430766105651855, "learning_rate": 0.0003322209436133487, "loss": 0.3001, "step": 289860 }, { "epoch": 83.39182968929805, "grad_norm": 1.2323585748672485, "learning_rate": 0.00033216340621403914, "loss": 0.2651, "step": 289870 }, { "epoch": 83.39470655926353, "grad_norm": 0.9679440855979919, "learning_rate": 0.0003321058688147296, "loss": 0.3851, "step": 289880 }, { "epoch": 83.397583429229, "grad_norm": 1.3953076601028442, "learning_rate": 0.00033204833141542, "loss": 0.3464, "step": 289890 }, { "epoch": 83.40046029919448, "grad_norm": 1.9415189027786255, "learning_rate": 0.0003319907940161105, "loss": 0.3847, "step": 289900 }, { "epoch": 83.40333716915995, "grad_norm": 1.0745912790298462, "learning_rate": 0.0003319332566168009, "loss": 0.306, "step": 289910 }, { "epoch": 83.40621403912543, "grad_norm": 1.3402854204177856, "learning_rate": 0.00033187571921749137, "loss": 0.3284, "step": 289920 }, { "epoch": 83.4090909090909, "grad_norm": 1.0353001356124878, "learning_rate": 0.0003318181818181819, "loss": 0.2579, "step": 289930 }, { "epoch": 83.41196777905638, "grad_norm": 0.9597141146659851, "learning_rate": 0.0003317606444188723, "loss": 0.3055, "step": 289940 }, { "epoch": 83.41484464902186, "grad_norm": 1.387657880783081, "learning_rate": 0.00033170310701956273, "loss": 0.3295, "step": 289950 }, { "epoch": 83.41772151898734, "grad_norm": 0.5817629098892212, "learning_rate": 0.0003316455696202532, "loss": 0.3965, "step": 289960 }, { "epoch": 83.42059838895283, "grad_norm": 0.7517702579498291, "learning_rate": 0.00033158803222094364, "loss": 0.2943, "step": 289970 }, { "epoch": 83.4234752589183, "grad_norm": 1.0143615007400513, "learning_rate": 0.00033153049482163404, "loss": 0.3455, "step": 289980 }, { "epoch": 83.42635212888378, "grad_norm": 1.8242374658584595, "learning_rate": 0.00033147295742232455, "loss": 0.3769, "step": 289990 }, { "epoch": 83.42922899884925, "grad_norm": 1.30453622341156, "learning_rate": 0.00033141542002301496, "loss": 0.2985, "step": 290000 }, { "epoch": 83.43210586881473, "grad_norm": 1.9349972009658813, "learning_rate": 0.0003313578826237054, "loss": 0.356, "step": 290010 }, { "epoch": 83.4349827387802, "grad_norm": 1.2727782726287842, "learning_rate": 0.0003313003452243958, "loss": 0.2673, "step": 290020 }, { "epoch": 83.43785960874568, "grad_norm": 1.5279686450958252, "learning_rate": 0.0003312428078250863, "loss": 0.2944, "step": 290030 }, { "epoch": 83.44073647871116, "grad_norm": 1.6360257863998413, "learning_rate": 0.0003311852704257768, "loss": 0.3412, "step": 290040 }, { "epoch": 83.44361334867664, "grad_norm": 2.0342519283294678, "learning_rate": 0.0003311277330264672, "loss": 0.3241, "step": 290050 }, { "epoch": 83.44649021864211, "grad_norm": 3.412182092666626, "learning_rate": 0.0003310701956271577, "loss": 0.3557, "step": 290060 }, { "epoch": 83.4493670886076, "grad_norm": 0.9591366648674011, "learning_rate": 0.0003310126582278481, "loss": 0.3508, "step": 290070 }, { "epoch": 83.45224395857308, "grad_norm": 0.6421165466308594, "learning_rate": 0.00033095512082853855, "loss": 0.2505, "step": 290080 }, { "epoch": 83.45512082853855, "grad_norm": 1.7765978574752808, "learning_rate": 0.000330897583429229, "loss": 0.2923, "step": 290090 }, { "epoch": 83.45799769850403, "grad_norm": 0.8112356066703796, "learning_rate": 0.00033084004602991946, "loss": 0.2856, "step": 290100 }, { "epoch": 83.4608745684695, "grad_norm": 2.2592501640319824, "learning_rate": 0.00033078250863060986, "loss": 0.3545, "step": 290110 }, { "epoch": 83.46375143843498, "grad_norm": 0.8798529505729675, "learning_rate": 0.00033072497123130037, "loss": 0.3203, "step": 290120 }, { "epoch": 83.46662830840046, "grad_norm": 1.2093585729599, "learning_rate": 0.0003306674338319908, "loss": 0.3072, "step": 290130 }, { "epoch": 83.46950517836594, "grad_norm": 1.0816909074783325, "learning_rate": 0.0003306098964326812, "loss": 0.3538, "step": 290140 }, { "epoch": 83.47238204833141, "grad_norm": 0.6796866655349731, "learning_rate": 0.00033055235903337173, "loss": 0.3382, "step": 290150 }, { "epoch": 83.47525891829689, "grad_norm": 1.1869772672653198, "learning_rate": 0.00033049482163406214, "loss": 0.3782, "step": 290160 }, { "epoch": 83.47813578826236, "grad_norm": 1.432019829750061, "learning_rate": 0.0003304372842347526, "loss": 0.3494, "step": 290170 }, { "epoch": 83.48101265822785, "grad_norm": 1.4210299253463745, "learning_rate": 0.00033037974683544305, "loss": 0.3401, "step": 290180 }, { "epoch": 83.48388952819333, "grad_norm": 2.3710947036743164, "learning_rate": 0.0003303222094361335, "loss": 0.3225, "step": 290190 }, { "epoch": 83.4867663981588, "grad_norm": 1.0003061294555664, "learning_rate": 0.0003302646720368239, "loss": 0.3159, "step": 290200 }, { "epoch": 83.48964326812428, "grad_norm": 0.7403045296669006, "learning_rate": 0.0003302071346375144, "loss": 0.4375, "step": 290210 }, { "epoch": 83.49252013808976, "grad_norm": 1.3628431558609009, "learning_rate": 0.0003301495972382048, "loss": 0.3476, "step": 290220 }, { "epoch": 83.49539700805524, "grad_norm": 1.3112459182739258, "learning_rate": 0.00033009205983889527, "loss": 0.3775, "step": 290230 }, { "epoch": 83.49827387802071, "grad_norm": 1.0103760957717896, "learning_rate": 0.0003300345224395858, "loss": 0.2832, "step": 290240 }, { "epoch": 83.50115074798619, "grad_norm": 1.665257215499878, "learning_rate": 0.0003299769850402762, "loss": 0.2635, "step": 290250 }, { "epoch": 83.50402761795166, "grad_norm": 2.2807188034057617, "learning_rate": 0.00032991944764096664, "loss": 0.3326, "step": 290260 }, { "epoch": 83.50690448791714, "grad_norm": 1.7539650201797485, "learning_rate": 0.0003298619102416571, "loss": 0.4844, "step": 290270 }, { "epoch": 83.50978135788263, "grad_norm": 1.202082633972168, "learning_rate": 0.00032980437284234755, "loss": 0.2777, "step": 290280 }, { "epoch": 83.5126582278481, "grad_norm": 1.6707614660263062, "learning_rate": 0.00032974683544303795, "loss": 0.3468, "step": 290290 }, { "epoch": 83.51553509781358, "grad_norm": 1.4872735738754272, "learning_rate": 0.00032968929804372846, "loss": 0.3593, "step": 290300 }, { "epoch": 83.51841196777906, "grad_norm": 0.9852875471115112, "learning_rate": 0.00032963176064441886, "loss": 0.3047, "step": 290310 }, { "epoch": 83.52128883774454, "grad_norm": 2.603306531906128, "learning_rate": 0.0003295742232451093, "loss": 0.3902, "step": 290320 }, { "epoch": 83.52416570771001, "grad_norm": 2.0897440910339355, "learning_rate": 0.0003295166858457998, "loss": 0.3445, "step": 290330 }, { "epoch": 83.52704257767549, "grad_norm": 1.4496067762374878, "learning_rate": 0.0003294591484464902, "loss": 0.321, "step": 290340 }, { "epoch": 83.52991944764096, "grad_norm": 1.62773859500885, "learning_rate": 0.0003294016110471807, "loss": 0.4332, "step": 290350 }, { "epoch": 83.53279631760644, "grad_norm": 1.7255643606185913, "learning_rate": 0.00032934407364787114, "loss": 0.382, "step": 290360 }, { "epoch": 83.53567318757192, "grad_norm": 0.83944171667099, "learning_rate": 0.0003292865362485616, "loss": 0.4033, "step": 290370 }, { "epoch": 83.53855005753739, "grad_norm": 1.7882274389266968, "learning_rate": 0.000329228998849252, "loss": 0.3052, "step": 290380 }, { "epoch": 83.54142692750288, "grad_norm": 2.197105646133423, "learning_rate": 0.0003291714614499425, "loss": 0.4003, "step": 290390 }, { "epoch": 83.54430379746836, "grad_norm": 0.8806336522102356, "learning_rate": 0.0003291139240506329, "loss": 0.2971, "step": 290400 }, { "epoch": 83.54718066743384, "grad_norm": 1.0027698278427124, "learning_rate": 0.00032905638665132336, "loss": 0.3572, "step": 290410 }, { "epoch": 83.55005753739931, "grad_norm": 0.7853014469146729, "learning_rate": 0.00032899884925201387, "loss": 0.3583, "step": 290420 }, { "epoch": 83.55293440736479, "grad_norm": 1.1041724681854248, "learning_rate": 0.00032894131185270427, "loss": 0.4132, "step": 290430 }, { "epoch": 83.55581127733026, "grad_norm": 1.0526233911514282, "learning_rate": 0.0003288837744533947, "loss": 0.2921, "step": 290440 }, { "epoch": 83.55868814729574, "grad_norm": 0.9232211112976074, "learning_rate": 0.00032882623705408513, "loss": 0.4141, "step": 290450 }, { "epoch": 83.56156501726122, "grad_norm": 1.1936558485031128, "learning_rate": 0.00032876869965477564, "loss": 0.3224, "step": 290460 }, { "epoch": 83.56444188722669, "grad_norm": 0.7068174481391907, "learning_rate": 0.00032871116225546604, "loss": 0.275, "step": 290470 }, { "epoch": 83.56731875719217, "grad_norm": 0.8074852824211121, "learning_rate": 0.0003286536248561565, "loss": 0.3005, "step": 290480 }, { "epoch": 83.57019562715766, "grad_norm": 1.2861014604568481, "learning_rate": 0.00032859608745684695, "loss": 0.3476, "step": 290490 }, { "epoch": 83.57307249712314, "grad_norm": 1.5876764059066772, "learning_rate": 0.0003285385500575374, "loss": 0.3468, "step": 290500 }, { "epoch": 83.57594936708861, "grad_norm": 0.9410041570663452, "learning_rate": 0.0003284810126582278, "loss": 0.3264, "step": 290510 }, { "epoch": 83.57882623705409, "grad_norm": 1.1752387285232544, "learning_rate": 0.0003284234752589183, "loss": 0.3081, "step": 290520 }, { "epoch": 83.58170310701956, "grad_norm": 0.6041451096534729, "learning_rate": 0.00032836593785960877, "loss": 0.3275, "step": 290530 }, { "epoch": 83.58457997698504, "grad_norm": 0.9977905750274658, "learning_rate": 0.0003283084004602992, "loss": 0.3717, "step": 290540 }, { "epoch": 83.58745684695052, "grad_norm": 1.4382902383804321, "learning_rate": 0.0003282508630609897, "loss": 0.3983, "step": 290550 }, { "epoch": 83.59033371691599, "grad_norm": 1.590870976448059, "learning_rate": 0.0003281933256616801, "loss": 0.3185, "step": 290560 }, { "epoch": 83.59321058688147, "grad_norm": 1.1104556322097778, "learning_rate": 0.00032813578826237054, "loss": 0.371, "step": 290570 }, { "epoch": 83.59608745684694, "grad_norm": 1.2173513174057007, "learning_rate": 0.000328078250863061, "loss": 0.4311, "step": 290580 }, { "epoch": 83.59896432681242, "grad_norm": 1.0861841440200806, "learning_rate": 0.00032802071346375145, "loss": 0.3373, "step": 290590 }, { "epoch": 83.60184119677791, "grad_norm": 1.2695183753967285, "learning_rate": 0.00032796317606444185, "loss": 0.3752, "step": 290600 }, { "epoch": 83.60471806674339, "grad_norm": 1.0872676372528076, "learning_rate": 0.00032790563866513236, "loss": 0.3564, "step": 290610 }, { "epoch": 83.60759493670886, "grad_norm": 1.2562098503112793, "learning_rate": 0.0003278481012658228, "loss": 0.3112, "step": 290620 }, { "epoch": 83.61047180667434, "grad_norm": 1.01090407371521, "learning_rate": 0.0003277905638665132, "loss": 0.2878, "step": 290630 }, { "epoch": 83.61334867663982, "grad_norm": 1.156209111213684, "learning_rate": 0.00032773302646720373, "loss": 0.3029, "step": 290640 }, { "epoch": 83.61622554660529, "grad_norm": 2.6236467361450195, "learning_rate": 0.00032767548906789413, "loss": 0.3926, "step": 290650 }, { "epoch": 83.61910241657077, "grad_norm": 0.8607842922210693, "learning_rate": 0.0003276179516685846, "loss": 0.3056, "step": 290660 }, { "epoch": 83.62197928653625, "grad_norm": 1.6074000597000122, "learning_rate": 0.00032756041426927504, "loss": 0.3296, "step": 290670 }, { "epoch": 83.62485615650172, "grad_norm": 2.341858386993408, "learning_rate": 0.0003275028768699655, "loss": 0.3575, "step": 290680 }, { "epoch": 83.6277330264672, "grad_norm": 2.7494044303894043, "learning_rate": 0.0003274453394706559, "loss": 0.4092, "step": 290690 }, { "epoch": 83.63060989643269, "grad_norm": 1.0539485216140747, "learning_rate": 0.0003273878020713464, "loss": 0.3077, "step": 290700 }, { "epoch": 83.63348676639816, "grad_norm": 0.9444486498832703, "learning_rate": 0.0003273302646720368, "loss": 0.3843, "step": 290710 }, { "epoch": 83.63636363636364, "grad_norm": 1.1437175273895264, "learning_rate": 0.00032727272727272726, "loss": 0.2994, "step": 290720 }, { "epoch": 83.63924050632912, "grad_norm": 0.8335848450660706, "learning_rate": 0.0003272151898734178, "loss": 0.3669, "step": 290730 }, { "epoch": 83.64211737629459, "grad_norm": 1.9512354135513306, "learning_rate": 0.0003271576524741082, "loss": 0.3487, "step": 290740 }, { "epoch": 83.64499424626007, "grad_norm": 1.4313777685165405, "learning_rate": 0.00032710011507479863, "loss": 0.3442, "step": 290750 }, { "epoch": 83.64787111622555, "grad_norm": 1.2100518941879272, "learning_rate": 0.0003270425776754891, "loss": 0.3418, "step": 290760 }, { "epoch": 83.65074798619102, "grad_norm": 1.4121487140655518, "learning_rate": 0.00032698504027617954, "loss": 0.3778, "step": 290770 }, { "epoch": 83.6536248561565, "grad_norm": 1.8929420709609985, "learning_rate": 0.00032692750287686994, "loss": 0.3558, "step": 290780 }, { "epoch": 83.65650172612197, "grad_norm": 0.8839015960693359, "learning_rate": 0.00032686996547756045, "loss": 0.3081, "step": 290790 }, { "epoch": 83.65937859608745, "grad_norm": 1.1991724967956543, "learning_rate": 0.00032681242807825085, "loss": 0.3237, "step": 290800 }, { "epoch": 83.66225546605294, "grad_norm": 0.6315186023712158, "learning_rate": 0.0003267548906789413, "loss": 0.288, "step": 290810 }, { "epoch": 83.66513233601842, "grad_norm": 1.3142529726028442, "learning_rate": 0.0003266973532796318, "loss": 0.3471, "step": 290820 }, { "epoch": 83.66800920598389, "grad_norm": 0.9100334048271179, "learning_rate": 0.0003266398158803222, "loss": 0.3191, "step": 290830 }, { "epoch": 83.67088607594937, "grad_norm": 0.7444555759429932, "learning_rate": 0.0003265822784810127, "loss": 0.3844, "step": 290840 }, { "epoch": 83.67376294591485, "grad_norm": 1.0382564067840576, "learning_rate": 0.0003265247410817031, "loss": 0.3561, "step": 290850 }, { "epoch": 83.67663981588032, "grad_norm": 1.9916634559631348, "learning_rate": 0.0003264672036823936, "loss": 0.3602, "step": 290860 }, { "epoch": 83.6795166858458, "grad_norm": 1.0724416971206665, "learning_rate": 0.000326409666283084, "loss": 0.2832, "step": 290870 }, { "epoch": 83.68239355581127, "grad_norm": 1.3637120723724365, "learning_rate": 0.00032635212888377444, "loss": 0.3407, "step": 290880 }, { "epoch": 83.68527042577675, "grad_norm": 1.641528606414795, "learning_rate": 0.0003262945914844649, "loss": 0.3615, "step": 290890 }, { "epoch": 83.68814729574223, "grad_norm": 0.9024119973182678, "learning_rate": 0.00032623705408515535, "loss": 0.3113, "step": 290900 }, { "epoch": 83.69102416570772, "grad_norm": 1.528069257736206, "learning_rate": 0.0003261795166858458, "loss": 0.3697, "step": 290910 }, { "epoch": 83.69390103567319, "grad_norm": 1.5507451295852661, "learning_rate": 0.00032612197928653627, "loss": 0.3159, "step": 290920 }, { "epoch": 83.69677790563867, "grad_norm": 1.6124603748321533, "learning_rate": 0.0003260644418872267, "loss": 0.3119, "step": 290930 }, { "epoch": 83.69965477560415, "grad_norm": 1.7491979598999023, "learning_rate": 0.0003260069044879171, "loss": 0.4907, "step": 290940 }, { "epoch": 83.70253164556962, "grad_norm": 1.7480980157852173, "learning_rate": 0.00032594936708860763, "loss": 0.2642, "step": 290950 }, { "epoch": 83.7054085155351, "grad_norm": 1.4344984292984009, "learning_rate": 0.00032589182968929803, "loss": 0.3554, "step": 290960 }, { "epoch": 83.70828538550057, "grad_norm": 1.331101655960083, "learning_rate": 0.0003258342922899885, "loss": 0.3455, "step": 290970 }, { "epoch": 83.71116225546605, "grad_norm": 1.0298930406570435, "learning_rate": 0.00032577675489067894, "loss": 0.3463, "step": 290980 }, { "epoch": 83.71403912543153, "grad_norm": 0.7787291407585144, "learning_rate": 0.0003257192174913694, "loss": 0.3621, "step": 290990 }, { "epoch": 83.716915995397, "grad_norm": 0.9540235996246338, "learning_rate": 0.0003256616800920598, "loss": 0.277, "step": 291000 }, { "epoch": 83.71979286536248, "grad_norm": 1.7859450578689575, "learning_rate": 0.0003256041426927503, "loss": 0.2996, "step": 291010 }, { "epoch": 83.72266973532797, "grad_norm": 0.8006434440612793, "learning_rate": 0.00032554660529344077, "loss": 0.3524, "step": 291020 }, { "epoch": 83.72554660529345, "grad_norm": 2.268415927886963, "learning_rate": 0.00032548906789413117, "loss": 0.4643, "step": 291030 }, { "epoch": 83.72842347525892, "grad_norm": 1.0607869625091553, "learning_rate": 0.0003254315304948217, "loss": 0.2313, "step": 291040 }, { "epoch": 83.7313003452244, "grad_norm": 1.1060619354248047, "learning_rate": 0.0003253739930955121, "loss": 0.318, "step": 291050 }, { "epoch": 83.73417721518987, "grad_norm": 0.9029608368873596, "learning_rate": 0.00032531645569620253, "loss": 0.2757, "step": 291060 }, { "epoch": 83.73705408515535, "grad_norm": 1.09450364112854, "learning_rate": 0.000325258918296893, "loss": 0.3252, "step": 291070 }, { "epoch": 83.73993095512083, "grad_norm": 1.2532298564910889, "learning_rate": 0.00032520138089758345, "loss": 0.3945, "step": 291080 }, { "epoch": 83.7428078250863, "grad_norm": 1.157665729522705, "learning_rate": 0.00032514384349827385, "loss": 0.3787, "step": 291090 }, { "epoch": 83.74568469505178, "grad_norm": 1.7635095119476318, "learning_rate": 0.00032508630609896436, "loss": 0.4108, "step": 291100 }, { "epoch": 83.74856156501725, "grad_norm": 1.2367799282073975, "learning_rate": 0.0003250287686996548, "loss": 0.3366, "step": 291110 }, { "epoch": 83.75143843498275, "grad_norm": 0.8600829243659973, "learning_rate": 0.0003249712313003452, "loss": 0.3152, "step": 291120 }, { "epoch": 83.75431530494822, "grad_norm": 1.1678342819213867, "learning_rate": 0.0003249136939010357, "loss": 0.3108, "step": 291130 }, { "epoch": 83.7571921749137, "grad_norm": 0.9127931594848633, "learning_rate": 0.0003248561565017261, "loss": 0.2978, "step": 291140 }, { "epoch": 83.76006904487917, "grad_norm": 1.3141109943389893, "learning_rate": 0.0003247986191024166, "loss": 0.2849, "step": 291150 }, { "epoch": 83.76294591484465, "grad_norm": 3.4330215454101562, "learning_rate": 0.00032474108170310703, "loss": 0.3641, "step": 291160 }, { "epoch": 83.76582278481013, "grad_norm": 1.5017342567443848, "learning_rate": 0.0003246835443037975, "loss": 0.3619, "step": 291170 }, { "epoch": 83.7686996547756, "grad_norm": 1.4073505401611328, "learning_rate": 0.0003246260069044879, "loss": 0.3331, "step": 291180 }, { "epoch": 83.77157652474108, "grad_norm": 1.0408954620361328, "learning_rate": 0.0003245684695051784, "loss": 0.3775, "step": 291190 }, { "epoch": 83.77445339470655, "grad_norm": 0.8852109909057617, "learning_rate": 0.0003245109321058688, "loss": 0.396, "step": 291200 }, { "epoch": 83.77733026467203, "grad_norm": 1.4616068601608276, "learning_rate": 0.00032445339470655926, "loss": 0.2653, "step": 291210 }, { "epoch": 83.78020713463752, "grad_norm": 2.447150707244873, "learning_rate": 0.00032439585730724977, "loss": 0.4079, "step": 291220 }, { "epoch": 83.783084004603, "grad_norm": 0.7538420557975769, "learning_rate": 0.00032433831990794017, "loss": 0.3936, "step": 291230 }, { "epoch": 83.78596087456847, "grad_norm": 2.2144033908843994, "learning_rate": 0.0003242807825086306, "loss": 0.3363, "step": 291240 }, { "epoch": 83.78883774453395, "grad_norm": 0.7070952653884888, "learning_rate": 0.000324223245109321, "loss": 0.3042, "step": 291250 }, { "epoch": 83.79171461449943, "grad_norm": 0.9998722672462463, "learning_rate": 0.00032416570771001154, "loss": 0.3013, "step": 291260 }, { "epoch": 83.7945914844649, "grad_norm": 0.68452388048172, "learning_rate": 0.00032410817031070194, "loss": 0.2835, "step": 291270 }, { "epoch": 83.79746835443038, "grad_norm": 0.6328507661819458, "learning_rate": 0.0003240506329113924, "loss": 0.2742, "step": 291280 }, { "epoch": 83.80034522439585, "grad_norm": 2.5211994647979736, "learning_rate": 0.00032399309551208285, "loss": 0.4081, "step": 291290 }, { "epoch": 83.80322209436133, "grad_norm": 1.101515531539917, "learning_rate": 0.0003239355581127733, "loss": 0.3545, "step": 291300 }, { "epoch": 83.80609896432681, "grad_norm": 1.039497971534729, "learning_rate": 0.00032387802071346376, "loss": 0.4143, "step": 291310 }, { "epoch": 83.80897583429228, "grad_norm": 0.9149123430252075, "learning_rate": 0.0003238204833141542, "loss": 0.3587, "step": 291320 }, { "epoch": 83.81185270425777, "grad_norm": 1.3477463722229004, "learning_rate": 0.00032376294591484467, "loss": 0.3398, "step": 291330 }, { "epoch": 83.81472957422325, "grad_norm": 0.9091359972953796, "learning_rate": 0.00032370540851553507, "loss": 0.248, "step": 291340 }, { "epoch": 83.81760644418873, "grad_norm": 0.7633432149887085, "learning_rate": 0.0003236478711162256, "loss": 0.431, "step": 291350 }, { "epoch": 83.8204833141542, "grad_norm": 1.2380257844924927, "learning_rate": 0.000323590333716916, "loss": 0.3566, "step": 291360 }, { "epoch": 83.82336018411968, "grad_norm": 1.0141359567642212, "learning_rate": 0.00032353279631760644, "loss": 0.3196, "step": 291370 }, { "epoch": 83.82623705408515, "grad_norm": 1.1803722381591797, "learning_rate": 0.0003234752589182969, "loss": 0.3594, "step": 291380 }, { "epoch": 83.82911392405063, "grad_norm": 1.6379269361495972, "learning_rate": 0.00032341772151898735, "loss": 0.3621, "step": 291390 }, { "epoch": 83.83199079401611, "grad_norm": 0.9059869050979614, "learning_rate": 0.0003233601841196778, "loss": 0.2577, "step": 291400 }, { "epoch": 83.83486766398158, "grad_norm": 2.387427806854248, "learning_rate": 0.00032330264672036826, "loss": 0.2829, "step": 291410 }, { "epoch": 83.83774453394706, "grad_norm": 1.807087779045105, "learning_rate": 0.0003232451093210587, "loss": 0.3475, "step": 291420 }, { "epoch": 83.84062140391255, "grad_norm": 1.133439064025879, "learning_rate": 0.0003231875719217491, "loss": 0.3312, "step": 291430 }, { "epoch": 83.84349827387803, "grad_norm": 1.0583031177520752, "learning_rate": 0.0003231300345224396, "loss": 0.3414, "step": 291440 }, { "epoch": 83.8463751438435, "grad_norm": 0.8244388103485107, "learning_rate": 0.00032307249712313003, "loss": 0.3645, "step": 291450 }, { "epoch": 83.84925201380898, "grad_norm": 1.0152806043624878, "learning_rate": 0.0003230149597238205, "loss": 0.3364, "step": 291460 }, { "epoch": 83.85212888377445, "grad_norm": 1.1794029474258423, "learning_rate": 0.00032295742232451094, "loss": 0.4157, "step": 291470 }, { "epoch": 83.85500575373993, "grad_norm": 2.0579569339752197, "learning_rate": 0.0003228998849252014, "loss": 0.4635, "step": 291480 }, { "epoch": 83.85788262370541, "grad_norm": 1.489579200744629, "learning_rate": 0.0003228423475258918, "loss": 0.2989, "step": 291490 }, { "epoch": 83.86075949367088, "grad_norm": 0.883350133895874, "learning_rate": 0.0003227848101265823, "loss": 0.3636, "step": 291500 }, { "epoch": 83.86363636363636, "grad_norm": 1.591344952583313, "learning_rate": 0.00032272727272727276, "loss": 0.3016, "step": 291510 }, { "epoch": 83.86651323360184, "grad_norm": 0.8692513108253479, "learning_rate": 0.00032266973532796316, "loss": 0.3005, "step": 291520 }, { "epoch": 83.86939010356731, "grad_norm": 0.8500276207923889, "learning_rate": 0.00032261219792865367, "loss": 0.4199, "step": 291530 }, { "epoch": 83.8722669735328, "grad_norm": 1.2517657279968262, "learning_rate": 0.0003225546605293441, "loss": 0.3485, "step": 291540 }, { "epoch": 83.87514384349828, "grad_norm": 1.0226187705993652, "learning_rate": 0.00032249712313003453, "loss": 0.3314, "step": 291550 }, { "epoch": 83.87802071346375, "grad_norm": 1.6083344221115112, "learning_rate": 0.000322439585730725, "loss": 0.4028, "step": 291560 }, { "epoch": 83.88089758342923, "grad_norm": 1.6419181823730469, "learning_rate": 0.00032238204833141544, "loss": 0.351, "step": 291570 }, { "epoch": 83.88377445339471, "grad_norm": 0.9087265133857727, "learning_rate": 0.00032232451093210584, "loss": 0.3064, "step": 291580 }, { "epoch": 83.88665132336018, "grad_norm": 1.1762242317199707, "learning_rate": 0.00032226697353279635, "loss": 0.3437, "step": 291590 }, { "epoch": 83.88952819332566, "grad_norm": 1.5235158205032349, "learning_rate": 0.0003222094361334868, "loss": 0.3302, "step": 291600 }, { "epoch": 83.89240506329114, "grad_norm": 1.2734278440475464, "learning_rate": 0.0003221518987341772, "loss": 0.2607, "step": 291610 }, { "epoch": 83.89528193325661, "grad_norm": 3.0136420726776123, "learning_rate": 0.0003220943613348677, "loss": 0.3755, "step": 291620 }, { "epoch": 83.89815880322209, "grad_norm": 0.7783588767051697, "learning_rate": 0.0003220368239355581, "loss": 0.2842, "step": 291630 }, { "epoch": 83.90103567318758, "grad_norm": 2.2475709915161133, "learning_rate": 0.0003219792865362486, "loss": 0.3525, "step": 291640 }, { "epoch": 83.90391254315306, "grad_norm": 0.9446348547935486, "learning_rate": 0.000321921749136939, "loss": 0.3441, "step": 291650 }, { "epoch": 83.90678941311853, "grad_norm": 1.2246989011764526, "learning_rate": 0.0003218642117376295, "loss": 0.3765, "step": 291660 }, { "epoch": 83.90966628308401, "grad_norm": 1.1479400396347046, "learning_rate": 0.0003218066743383199, "loss": 0.412, "step": 291670 }, { "epoch": 83.91254315304948, "grad_norm": 1.1883411407470703, "learning_rate": 0.00032174913693901034, "loss": 0.2997, "step": 291680 }, { "epoch": 83.91542002301496, "grad_norm": 1.4823745489120483, "learning_rate": 0.0003216915995397008, "loss": 0.3023, "step": 291690 }, { "epoch": 83.91829689298044, "grad_norm": 2.455402374267578, "learning_rate": 0.00032163406214039125, "loss": 0.3935, "step": 291700 }, { "epoch": 83.92117376294591, "grad_norm": 0.7938430309295654, "learning_rate": 0.0003215765247410817, "loss": 0.3624, "step": 291710 }, { "epoch": 83.92405063291139, "grad_norm": 1.1530176401138306, "learning_rate": 0.00032151898734177216, "loss": 0.2897, "step": 291720 }, { "epoch": 83.92692750287686, "grad_norm": 1.4072078466415405, "learning_rate": 0.0003214614499424626, "loss": 0.2942, "step": 291730 }, { "epoch": 83.92980437284234, "grad_norm": 1.0410410165786743, "learning_rate": 0.000321403912543153, "loss": 0.3278, "step": 291740 }, { "epoch": 83.93268124280783, "grad_norm": 1.0529875755310059, "learning_rate": 0.00032134637514384353, "loss": 0.2792, "step": 291750 }, { "epoch": 83.93555811277331, "grad_norm": 1.7967503070831299, "learning_rate": 0.00032128883774453393, "loss": 0.3685, "step": 291760 }, { "epoch": 83.93843498273878, "grad_norm": 1.7144763469696045, "learning_rate": 0.0003212313003452244, "loss": 0.2843, "step": 291770 }, { "epoch": 83.94131185270426, "grad_norm": 1.0870102643966675, "learning_rate": 0.00032117376294591484, "loss": 0.3223, "step": 291780 }, { "epoch": 83.94418872266974, "grad_norm": 2.73942494392395, "learning_rate": 0.0003211162255466053, "loss": 0.3681, "step": 291790 }, { "epoch": 83.94706559263521, "grad_norm": 0.6067092418670654, "learning_rate": 0.00032105868814729575, "loss": 0.2865, "step": 291800 }, { "epoch": 83.94994246260069, "grad_norm": 1.0331295728683472, "learning_rate": 0.0003210011507479862, "loss": 0.3017, "step": 291810 }, { "epoch": 83.95281933256616, "grad_norm": 1.3233028650283813, "learning_rate": 0.00032094361334867666, "loss": 0.3298, "step": 291820 }, { "epoch": 83.95569620253164, "grad_norm": 0.7696264982223511, "learning_rate": 0.00032088607594936707, "loss": 0.2996, "step": 291830 }, { "epoch": 83.95857307249712, "grad_norm": 1.262015461921692, "learning_rate": 0.0003208285385500576, "loss": 0.3142, "step": 291840 }, { "epoch": 83.96144994246261, "grad_norm": 0.9703548550605774, "learning_rate": 0.000320771001150748, "loss": 0.2947, "step": 291850 }, { "epoch": 83.96432681242808, "grad_norm": 1.995209813117981, "learning_rate": 0.00032071346375143843, "loss": 0.3877, "step": 291860 }, { "epoch": 83.96720368239356, "grad_norm": 1.9290026426315308, "learning_rate": 0.0003206559263521289, "loss": 0.3917, "step": 291870 }, { "epoch": 83.97008055235904, "grad_norm": 1.2232575416564941, "learning_rate": 0.00032059838895281934, "loss": 0.311, "step": 291880 }, { "epoch": 83.97295742232451, "grad_norm": 1.511839747428894, "learning_rate": 0.0003205408515535098, "loss": 0.4071, "step": 291890 }, { "epoch": 83.97583429228999, "grad_norm": 0.6488548517227173, "learning_rate": 0.00032048331415420025, "loss": 0.253, "step": 291900 }, { "epoch": 83.97871116225546, "grad_norm": 0.6203511357307434, "learning_rate": 0.0003204257767548907, "loss": 0.3386, "step": 291910 }, { "epoch": 83.98158803222094, "grad_norm": 2.282195806503296, "learning_rate": 0.0003203682393555811, "loss": 0.3674, "step": 291920 }, { "epoch": 83.98446490218642, "grad_norm": 0.899863600730896, "learning_rate": 0.0003203107019562716, "loss": 0.3528, "step": 291930 }, { "epoch": 83.9873417721519, "grad_norm": 0.7086771726608276, "learning_rate": 0.000320253164556962, "loss": 0.331, "step": 291940 }, { "epoch": 83.99021864211737, "grad_norm": 1.5335910320281982, "learning_rate": 0.0003201956271576525, "loss": 0.2743, "step": 291950 }, { "epoch": 83.99309551208286, "grad_norm": 0.5767157673835754, "learning_rate": 0.00032013808975834293, "loss": 0.3286, "step": 291960 }, { "epoch": 83.99597238204834, "grad_norm": 1.3371015787124634, "learning_rate": 0.0003200805523590334, "loss": 0.3348, "step": 291970 }, { "epoch": 83.99884925201381, "grad_norm": 0.6620042324066162, "learning_rate": 0.0003200230149597238, "loss": 0.2673, "step": 291980 }, { "epoch": 84.00172612197929, "grad_norm": 1.0101126432418823, "learning_rate": 0.0003199654775604143, "loss": 0.3848, "step": 291990 }, { "epoch": 84.00460299194476, "grad_norm": 0.8774856925010681, "learning_rate": 0.00031990794016110475, "loss": 0.3413, "step": 292000 }, { "epoch": 84.00747986191024, "grad_norm": 0.7955089211463928, "learning_rate": 0.00031985040276179516, "loss": 0.2675, "step": 292010 }, { "epoch": 84.01035673187572, "grad_norm": 1.3190287351608276, "learning_rate": 0.00031979286536248567, "loss": 0.3092, "step": 292020 }, { "epoch": 84.0132336018412, "grad_norm": 1.7188602685928345, "learning_rate": 0.00031973532796317607, "loss": 0.299, "step": 292030 }, { "epoch": 84.01611047180667, "grad_norm": 1.2879395484924316, "learning_rate": 0.0003196777905638665, "loss": 0.3366, "step": 292040 }, { "epoch": 84.01898734177215, "grad_norm": 0.8996648788452148, "learning_rate": 0.0003196202531645569, "loss": 0.334, "step": 292050 }, { "epoch": 84.02186421173764, "grad_norm": 0.7383049726486206, "learning_rate": 0.00031956271576524743, "loss": 0.286, "step": 292060 }, { "epoch": 84.02474108170311, "grad_norm": 1.2714852094650269, "learning_rate": 0.00031950517836593784, "loss": 0.2951, "step": 292070 }, { "epoch": 84.02761795166859, "grad_norm": 0.9301482439041138, "learning_rate": 0.0003194476409666283, "loss": 0.2717, "step": 292080 }, { "epoch": 84.03049482163406, "grad_norm": 1.906288981437683, "learning_rate": 0.0003193901035673188, "loss": 0.3235, "step": 292090 }, { "epoch": 84.03337169159954, "grad_norm": 1.756705403327942, "learning_rate": 0.0003193325661680092, "loss": 0.3502, "step": 292100 }, { "epoch": 84.03624856156502, "grad_norm": 1.9737628698349, "learning_rate": 0.00031927502876869966, "loss": 0.3477, "step": 292110 }, { "epoch": 84.0391254315305, "grad_norm": 0.4902461767196655, "learning_rate": 0.0003192174913693901, "loss": 0.306, "step": 292120 }, { "epoch": 84.04200230149597, "grad_norm": 0.8933570384979248, "learning_rate": 0.00031915995397008057, "loss": 0.2832, "step": 292130 }, { "epoch": 84.04487917146145, "grad_norm": 1.5929086208343506, "learning_rate": 0.00031910241657077097, "loss": 0.3164, "step": 292140 }, { "epoch": 84.04775604142692, "grad_norm": 1.153482437133789, "learning_rate": 0.0003190448791714615, "loss": 0.2681, "step": 292150 }, { "epoch": 84.0506329113924, "grad_norm": 1.7082154750823975, "learning_rate": 0.0003189873417721519, "loss": 0.2945, "step": 292160 }, { "epoch": 84.05350978135789, "grad_norm": 0.7544595003128052, "learning_rate": 0.00031892980437284234, "loss": 0.2786, "step": 292170 }, { "epoch": 84.05638665132336, "grad_norm": 0.7781096696853638, "learning_rate": 0.0003188722669735328, "loss": 0.2788, "step": 292180 }, { "epoch": 84.05926352128884, "grad_norm": 2.4096760749816895, "learning_rate": 0.00031881472957422325, "loss": 0.3864, "step": 292190 }, { "epoch": 84.06214039125432, "grad_norm": 1.3731132745742798, "learning_rate": 0.0003187571921749137, "loss": 0.2705, "step": 292200 }, { "epoch": 84.0650172612198, "grad_norm": 1.0559265613555908, "learning_rate": 0.00031869965477560416, "loss": 0.2988, "step": 292210 }, { "epoch": 84.06789413118527, "grad_norm": 1.2150845527648926, "learning_rate": 0.0003186421173762946, "loss": 0.2918, "step": 292220 }, { "epoch": 84.07077100115075, "grad_norm": 1.424423098564148, "learning_rate": 0.000318584579976985, "loss": 0.2773, "step": 292230 }, { "epoch": 84.07364787111622, "grad_norm": 0.7772596478462219, "learning_rate": 0.0003185270425776755, "loss": 0.3366, "step": 292240 }, { "epoch": 84.0765247410817, "grad_norm": 0.7925577163696289, "learning_rate": 0.0003184695051783659, "loss": 0.2587, "step": 292250 }, { "epoch": 84.07940161104717, "grad_norm": 1.059322714805603, "learning_rate": 0.0003184119677790564, "loss": 0.3055, "step": 292260 }, { "epoch": 84.08227848101266, "grad_norm": 1.4602906703948975, "learning_rate": 0.00031835443037974684, "loss": 0.3488, "step": 292270 }, { "epoch": 84.08515535097814, "grad_norm": 0.8351064920425415, "learning_rate": 0.0003182968929804373, "loss": 0.3227, "step": 292280 }, { "epoch": 84.08803222094362, "grad_norm": 1.3520532846450806, "learning_rate": 0.00031823935558112775, "loss": 0.2977, "step": 292290 }, { "epoch": 84.0909090909091, "grad_norm": 1.2292569875717163, "learning_rate": 0.0003181818181818182, "loss": 0.2499, "step": 292300 }, { "epoch": 84.09378596087457, "grad_norm": 0.7731308937072754, "learning_rate": 0.00031812428078250866, "loss": 0.291, "step": 292310 }, { "epoch": 84.09666283084005, "grad_norm": 1.111083984375, "learning_rate": 0.00031806674338319906, "loss": 0.2698, "step": 292320 }, { "epoch": 84.09953970080552, "grad_norm": 2.794830083847046, "learning_rate": 0.00031800920598388957, "loss": 0.3019, "step": 292330 }, { "epoch": 84.102416570771, "grad_norm": 1.3309262990951538, "learning_rate": 0.00031795166858457997, "loss": 0.3512, "step": 292340 }, { "epoch": 84.10529344073647, "grad_norm": 0.79415363073349, "learning_rate": 0.0003178941311852704, "loss": 0.2614, "step": 292350 }, { "epoch": 84.10817031070195, "grad_norm": 0.8557801246643066, "learning_rate": 0.0003178365937859609, "loss": 0.292, "step": 292360 }, { "epoch": 84.11104718066743, "grad_norm": 1.253098964691162, "learning_rate": 0.00031777905638665134, "loss": 0.3285, "step": 292370 }, { "epoch": 84.11392405063292, "grad_norm": 0.9522444009780884, "learning_rate": 0.00031772151898734174, "loss": 0.2713, "step": 292380 }, { "epoch": 84.1168009205984, "grad_norm": 1.0920140743255615, "learning_rate": 0.00031766398158803225, "loss": 0.2925, "step": 292390 }, { "epoch": 84.11967779056387, "grad_norm": 1.4176852703094482, "learning_rate": 0.0003176064441887227, "loss": 0.2694, "step": 292400 }, { "epoch": 84.12255466052935, "grad_norm": 1.9415936470031738, "learning_rate": 0.0003175489067894131, "loss": 0.3522, "step": 292410 }, { "epoch": 84.12543153049482, "grad_norm": 1.013457179069519, "learning_rate": 0.0003174913693901036, "loss": 0.335, "step": 292420 }, { "epoch": 84.1283084004603, "grad_norm": 1.4084409475326538, "learning_rate": 0.000317433831990794, "loss": 0.3886, "step": 292430 }, { "epoch": 84.13118527042577, "grad_norm": 1.0846272706985474, "learning_rate": 0.00031737629459148447, "loss": 0.2648, "step": 292440 }, { "epoch": 84.13406214039125, "grad_norm": 1.611005425453186, "learning_rate": 0.00031731875719217493, "loss": 0.2976, "step": 292450 }, { "epoch": 84.13693901035673, "grad_norm": 1.5992172956466675, "learning_rate": 0.0003172612197928654, "loss": 0.337, "step": 292460 }, { "epoch": 84.1398158803222, "grad_norm": 1.5236588716506958, "learning_rate": 0.0003172036823935558, "loss": 0.2847, "step": 292470 }, { "epoch": 84.1426927502877, "grad_norm": 1.440544605255127, "learning_rate": 0.00031714614499424624, "loss": 0.2811, "step": 292480 }, { "epoch": 84.14556962025317, "grad_norm": 1.2459824085235596, "learning_rate": 0.00031708860759493675, "loss": 0.3191, "step": 292490 }, { "epoch": 84.14844649021865, "grad_norm": 2.337829351425171, "learning_rate": 0.00031703107019562715, "loss": 0.3565, "step": 292500 }, { "epoch": 84.15132336018412, "grad_norm": 1.3878555297851562, "learning_rate": 0.0003169735327963176, "loss": 0.3047, "step": 292510 }, { "epoch": 84.1542002301496, "grad_norm": 0.6142323017120361, "learning_rate": 0.00031691599539700806, "loss": 0.3134, "step": 292520 }, { "epoch": 84.15707710011507, "grad_norm": 0.9404006600379944, "learning_rate": 0.0003168584579976985, "loss": 0.3826, "step": 292530 }, { "epoch": 84.15995397008055, "grad_norm": 1.9422229528427124, "learning_rate": 0.0003168009205983889, "loss": 0.3883, "step": 292540 }, { "epoch": 84.16283084004603, "grad_norm": 1.0384396314620972, "learning_rate": 0.00031674338319907943, "loss": 0.3355, "step": 292550 }, { "epoch": 84.1657077100115, "grad_norm": 1.1960372924804688, "learning_rate": 0.00031668584579976983, "loss": 0.3325, "step": 292560 }, { "epoch": 84.16858457997698, "grad_norm": 1.1541978120803833, "learning_rate": 0.0003166283084004603, "loss": 0.3685, "step": 292570 }, { "epoch": 84.17146144994246, "grad_norm": 1.4284480810165405, "learning_rate": 0.0003165707710011508, "loss": 0.3166, "step": 292580 }, { "epoch": 84.17433831990795, "grad_norm": 1.7190933227539062, "learning_rate": 0.0003165132336018412, "loss": 0.3036, "step": 292590 }, { "epoch": 84.17721518987342, "grad_norm": 2.2133078575134277, "learning_rate": 0.00031645569620253165, "loss": 0.3352, "step": 292600 }, { "epoch": 84.1800920598389, "grad_norm": 2.558748722076416, "learning_rate": 0.0003163981588032221, "loss": 0.3231, "step": 292610 }, { "epoch": 84.18296892980437, "grad_norm": 2.3441286087036133, "learning_rate": 0.00031634062140391256, "loss": 0.3829, "step": 292620 }, { "epoch": 84.18584579976985, "grad_norm": 1.3342325687408447, "learning_rate": 0.00031628308400460296, "loss": 0.3189, "step": 292630 }, { "epoch": 84.18872266973533, "grad_norm": 0.47519341111183167, "learning_rate": 0.0003162255466052935, "loss": 0.3142, "step": 292640 }, { "epoch": 84.1915995397008, "grad_norm": 0.6220288276672363, "learning_rate": 0.0003161680092059839, "loss": 0.2963, "step": 292650 }, { "epoch": 84.19447640966628, "grad_norm": 1.0349876880645752, "learning_rate": 0.00031611047180667433, "loss": 0.2603, "step": 292660 }, { "epoch": 84.19735327963176, "grad_norm": 1.0156400203704834, "learning_rate": 0.0003160529344073648, "loss": 0.3629, "step": 292670 }, { "epoch": 84.20023014959723, "grad_norm": 1.6687935590744019, "learning_rate": 0.00031599539700805524, "loss": 0.33, "step": 292680 }, { "epoch": 84.20310701956272, "grad_norm": 1.474848985671997, "learning_rate": 0.0003159378596087457, "loss": 0.361, "step": 292690 }, { "epoch": 84.2059838895282, "grad_norm": 2.045679807662964, "learning_rate": 0.00031588032220943615, "loss": 0.2851, "step": 292700 }, { "epoch": 84.20886075949367, "grad_norm": 1.5894510746002197, "learning_rate": 0.0003158227848101266, "loss": 0.3748, "step": 292710 }, { "epoch": 84.21173762945915, "grad_norm": 1.1658830642700195, "learning_rate": 0.000315765247410817, "loss": 0.3258, "step": 292720 }, { "epoch": 84.21461449942463, "grad_norm": 1.1474491357803345, "learning_rate": 0.0003157077100115075, "loss": 0.2984, "step": 292730 }, { "epoch": 84.2174913693901, "grad_norm": 1.3215354681015015, "learning_rate": 0.0003156501726121979, "loss": 0.3607, "step": 292740 }, { "epoch": 84.22036823935558, "grad_norm": 1.1854883432388306, "learning_rate": 0.0003155926352128884, "loss": 0.3741, "step": 292750 }, { "epoch": 84.22324510932106, "grad_norm": 1.4699403047561646, "learning_rate": 0.00031553509781357883, "loss": 0.2681, "step": 292760 }, { "epoch": 84.22612197928653, "grad_norm": 1.0022246837615967, "learning_rate": 0.0003154775604142693, "loss": 0.2826, "step": 292770 }, { "epoch": 84.22899884925201, "grad_norm": 1.7590035200119019, "learning_rate": 0.00031542002301495974, "loss": 0.2977, "step": 292780 }, { "epoch": 84.23187571921748, "grad_norm": 1.2416515350341797, "learning_rate": 0.0003153624856156502, "loss": 0.4392, "step": 292790 }, { "epoch": 84.23475258918297, "grad_norm": 0.8643519282341003, "learning_rate": 0.00031530494821634065, "loss": 0.2948, "step": 292800 }, { "epoch": 84.23762945914845, "grad_norm": 0.944556474685669, "learning_rate": 0.00031524741081703105, "loss": 0.2805, "step": 292810 }, { "epoch": 84.24050632911393, "grad_norm": 1.7152374982833862, "learning_rate": 0.00031518987341772156, "loss": 0.2551, "step": 292820 }, { "epoch": 84.2433831990794, "grad_norm": 0.7601770162582397, "learning_rate": 0.00031513233601841197, "loss": 0.3438, "step": 292830 }, { "epoch": 84.24626006904488, "grad_norm": 1.3374310731887817, "learning_rate": 0.0003150747986191024, "loss": 0.2908, "step": 292840 }, { "epoch": 84.24913693901036, "grad_norm": 1.7205450534820557, "learning_rate": 0.0003150172612197929, "loss": 0.2945, "step": 292850 }, { "epoch": 84.25201380897583, "grad_norm": 1.4028100967407227, "learning_rate": 0.00031495972382048333, "loss": 0.2682, "step": 292860 }, { "epoch": 84.25489067894131, "grad_norm": 1.3436988592147827, "learning_rate": 0.00031490218642117373, "loss": 0.3269, "step": 292870 }, { "epoch": 84.25776754890678, "grad_norm": 1.5299561023712158, "learning_rate": 0.0003148446490218642, "loss": 0.3234, "step": 292880 }, { "epoch": 84.26064441887226, "grad_norm": 0.6806197762489319, "learning_rate": 0.0003147871116225547, "loss": 0.309, "step": 292890 }, { "epoch": 84.26352128883775, "grad_norm": 1.3260165452957153, "learning_rate": 0.0003147295742232451, "loss": 0.3293, "step": 292900 }, { "epoch": 84.26639815880323, "grad_norm": 1.278557300567627, "learning_rate": 0.00031467203682393556, "loss": 0.2927, "step": 292910 }, { "epoch": 84.2692750287687, "grad_norm": 0.7510617971420288, "learning_rate": 0.000314614499424626, "loss": 0.2854, "step": 292920 }, { "epoch": 84.27215189873418, "grad_norm": 2.9116013050079346, "learning_rate": 0.00031455696202531647, "loss": 0.335, "step": 292930 }, { "epoch": 84.27502876869966, "grad_norm": 1.1796952486038208, "learning_rate": 0.00031449942462600687, "loss": 0.2622, "step": 292940 }, { "epoch": 84.27790563866513, "grad_norm": 1.5036120414733887, "learning_rate": 0.0003144418872266974, "loss": 0.3799, "step": 292950 }, { "epoch": 84.28078250863061, "grad_norm": 0.6755063533782959, "learning_rate": 0.0003143843498273878, "loss": 0.3391, "step": 292960 }, { "epoch": 84.28365937859608, "grad_norm": 1.490127682685852, "learning_rate": 0.00031432681242807823, "loss": 0.3402, "step": 292970 }, { "epoch": 84.28653624856156, "grad_norm": 1.0316959619522095, "learning_rate": 0.00031426927502876874, "loss": 0.2928, "step": 292980 }, { "epoch": 84.28941311852704, "grad_norm": 2.831735849380493, "learning_rate": 0.00031421173762945915, "loss": 0.2717, "step": 292990 }, { "epoch": 84.29228998849253, "grad_norm": 1.113782525062561, "learning_rate": 0.0003141542002301496, "loss": 0.3249, "step": 293000 }, { "epoch": 84.295166858458, "grad_norm": 1.6534600257873535, "learning_rate": 0.00031409666283084006, "loss": 0.343, "step": 293010 }, { "epoch": 84.29804372842348, "grad_norm": 1.5538032054901123, "learning_rate": 0.0003140391254315305, "loss": 0.3131, "step": 293020 }, { "epoch": 84.30092059838896, "grad_norm": 1.047133207321167, "learning_rate": 0.0003139815880322209, "loss": 0.3059, "step": 293030 }, { "epoch": 84.30379746835443, "grad_norm": 1.0170094966888428, "learning_rate": 0.0003139240506329114, "loss": 0.2866, "step": 293040 }, { "epoch": 84.30667433831991, "grad_norm": 1.060021162033081, "learning_rate": 0.0003138665132336018, "loss": 0.284, "step": 293050 }, { "epoch": 84.30955120828538, "grad_norm": 1.6574907302856445, "learning_rate": 0.0003138089758342923, "loss": 0.2947, "step": 293060 }, { "epoch": 84.31242807825086, "grad_norm": 0.8654298782348633, "learning_rate": 0.0003137514384349828, "loss": 0.3109, "step": 293070 }, { "epoch": 84.31530494821634, "grad_norm": 2.335413932800293, "learning_rate": 0.0003136939010356732, "loss": 0.2972, "step": 293080 }, { "epoch": 84.31818181818181, "grad_norm": 1.280991554260254, "learning_rate": 0.00031363636363636365, "loss": 0.318, "step": 293090 }, { "epoch": 84.32105868814729, "grad_norm": 1.0560849905014038, "learning_rate": 0.0003135788262370541, "loss": 0.3485, "step": 293100 }, { "epoch": 84.32393555811278, "grad_norm": 1.370219111442566, "learning_rate": 0.00031352128883774456, "loss": 0.3352, "step": 293110 }, { "epoch": 84.32681242807826, "grad_norm": 1.6770025491714478, "learning_rate": 0.00031346375143843496, "loss": 0.3571, "step": 293120 }, { "epoch": 84.32968929804373, "grad_norm": 0.8876288533210754, "learning_rate": 0.00031340621403912547, "loss": 0.3592, "step": 293130 }, { "epoch": 84.33256616800921, "grad_norm": 1.886206865310669, "learning_rate": 0.00031334867663981587, "loss": 0.3635, "step": 293140 }, { "epoch": 84.33544303797468, "grad_norm": 1.3984370231628418, "learning_rate": 0.0003132911392405063, "loss": 0.3102, "step": 293150 }, { "epoch": 84.33831990794016, "grad_norm": 0.8099440932273865, "learning_rate": 0.0003132336018411968, "loss": 0.292, "step": 293160 }, { "epoch": 84.34119677790564, "grad_norm": 1.3240416049957275, "learning_rate": 0.00031317606444188724, "loss": 0.268, "step": 293170 }, { "epoch": 84.34407364787111, "grad_norm": 0.9589749574661255, "learning_rate": 0.0003131185270425777, "loss": 0.3271, "step": 293180 }, { "epoch": 84.34695051783659, "grad_norm": 1.3320978879928589, "learning_rate": 0.00031306098964326815, "loss": 0.3628, "step": 293190 }, { "epoch": 84.34982738780207, "grad_norm": 1.1867600679397583, "learning_rate": 0.0003130034522439586, "loss": 0.2755, "step": 293200 }, { "epoch": 84.35270425776756, "grad_norm": 1.0908395051956177, "learning_rate": 0.000312945914844649, "loss": 0.3396, "step": 293210 }, { "epoch": 84.35558112773303, "grad_norm": 1.9650659561157227, "learning_rate": 0.0003128883774453395, "loss": 0.4228, "step": 293220 }, { "epoch": 84.35845799769851, "grad_norm": 1.262190580368042, "learning_rate": 0.0003128308400460299, "loss": 0.3296, "step": 293230 }, { "epoch": 84.36133486766398, "grad_norm": 0.8982672095298767, "learning_rate": 0.00031277330264672037, "loss": 0.355, "step": 293240 }, { "epoch": 84.36421173762946, "grad_norm": 0.9143806099891663, "learning_rate": 0.0003127157652474108, "loss": 0.3048, "step": 293250 }, { "epoch": 84.36708860759494, "grad_norm": 1.6586960554122925, "learning_rate": 0.0003126582278481013, "loss": 0.304, "step": 293260 }, { "epoch": 84.36996547756041, "grad_norm": 1.3132835626602173, "learning_rate": 0.00031260069044879174, "loss": 0.3323, "step": 293270 }, { "epoch": 84.37284234752589, "grad_norm": 1.381505012512207, "learning_rate": 0.0003125431530494822, "loss": 0.3004, "step": 293280 }, { "epoch": 84.37571921749137, "grad_norm": 0.7345302700996399, "learning_rate": 0.00031248561565017265, "loss": 0.2506, "step": 293290 }, { "epoch": 84.37859608745684, "grad_norm": 0.9085520505905151, "learning_rate": 0.00031242807825086305, "loss": 0.3965, "step": 293300 }, { "epoch": 84.38147295742232, "grad_norm": 0.8120918869972229, "learning_rate": 0.0003123705408515535, "loss": 0.2586, "step": 293310 }, { "epoch": 84.38434982738781, "grad_norm": 0.8211647868156433, "learning_rate": 0.00031231300345224396, "loss": 0.2596, "step": 293320 }, { "epoch": 84.38722669735328, "grad_norm": 1.1347706317901611, "learning_rate": 0.0003122554660529344, "loss": 0.3473, "step": 293330 }, { "epoch": 84.39010356731876, "grad_norm": 0.9401320219039917, "learning_rate": 0.0003121979286536248, "loss": 0.323, "step": 293340 }, { "epoch": 84.39298043728424, "grad_norm": 0.9282920956611633, "learning_rate": 0.0003121403912543153, "loss": 0.3861, "step": 293350 }, { "epoch": 84.39585730724971, "grad_norm": 1.2490015029907227, "learning_rate": 0.00031208285385500573, "loss": 0.3431, "step": 293360 }, { "epoch": 84.39873417721519, "grad_norm": 1.0875991582870483, "learning_rate": 0.0003120253164556962, "loss": 0.3502, "step": 293370 }, { "epoch": 84.40161104718067, "grad_norm": 1.1720457077026367, "learning_rate": 0.0003119677790563867, "loss": 0.3572, "step": 293380 }, { "epoch": 84.40448791714614, "grad_norm": 0.514011561870575, "learning_rate": 0.0003119102416570771, "loss": 0.4353, "step": 293390 }, { "epoch": 84.40736478711162, "grad_norm": 0.8532170057296753, "learning_rate": 0.00031185270425776755, "loss": 0.2953, "step": 293400 }, { "epoch": 84.4102416570771, "grad_norm": 0.8367255330085754, "learning_rate": 0.000311795166858458, "loss": 0.3176, "step": 293410 }, { "epoch": 84.41311852704258, "grad_norm": 1.7436798810958862, "learning_rate": 0.00031173762945914846, "loss": 0.322, "step": 293420 }, { "epoch": 84.41599539700806, "grad_norm": 0.8146660327911377, "learning_rate": 0.00031168009205983886, "loss": 0.2647, "step": 293430 }, { "epoch": 84.41887226697354, "grad_norm": 1.2535871267318726, "learning_rate": 0.00031162255466052937, "loss": 0.3215, "step": 293440 }, { "epoch": 84.42174913693901, "grad_norm": 1.1927207708358765, "learning_rate": 0.0003115650172612198, "loss": 0.3653, "step": 293450 }, { "epoch": 84.42462600690449, "grad_norm": 1.5396289825439453, "learning_rate": 0.00031150747986191023, "loss": 0.396, "step": 293460 }, { "epoch": 84.42750287686997, "grad_norm": 1.298557162284851, "learning_rate": 0.00031144994246260074, "loss": 0.2416, "step": 293470 }, { "epoch": 84.43037974683544, "grad_norm": 1.1330901384353638, "learning_rate": 0.00031139240506329114, "loss": 0.3625, "step": 293480 }, { "epoch": 84.43325661680092, "grad_norm": 1.2730720043182373, "learning_rate": 0.0003113348676639816, "loss": 0.329, "step": 293490 }, { "epoch": 84.4361334867664, "grad_norm": 0.8690682053565979, "learning_rate": 0.00031127733026467205, "loss": 0.2753, "step": 293500 }, { "epoch": 84.43901035673187, "grad_norm": 1.908813714981079, "learning_rate": 0.0003112197928653625, "loss": 0.3702, "step": 293510 }, { "epoch": 84.44188722669735, "grad_norm": 1.7038660049438477, "learning_rate": 0.0003111622554660529, "loss": 0.2663, "step": 293520 }, { "epoch": 84.44476409666284, "grad_norm": 0.9488498568534851, "learning_rate": 0.0003111047180667434, "loss": 0.322, "step": 293530 }, { "epoch": 84.44764096662831, "grad_norm": 1.331213355064392, "learning_rate": 0.0003110471806674338, "loss": 0.2765, "step": 293540 }, { "epoch": 84.45051783659379, "grad_norm": 0.8823593258857727, "learning_rate": 0.0003109896432681243, "loss": 0.2606, "step": 293550 }, { "epoch": 84.45339470655927, "grad_norm": 1.2371586561203003, "learning_rate": 0.0003109321058688148, "loss": 0.2679, "step": 293560 }, { "epoch": 84.45627157652474, "grad_norm": 1.3416913747787476, "learning_rate": 0.0003108745684695052, "loss": 0.2851, "step": 293570 }, { "epoch": 84.45914844649022, "grad_norm": 1.0798208713531494, "learning_rate": 0.00031081703107019564, "loss": 0.3365, "step": 293580 }, { "epoch": 84.4620253164557, "grad_norm": 1.4853274822235107, "learning_rate": 0.0003107594936708861, "loss": 0.3792, "step": 293590 }, { "epoch": 84.46490218642117, "grad_norm": 0.775516927242279, "learning_rate": 0.00031070195627157655, "loss": 0.2731, "step": 293600 }, { "epoch": 84.46777905638665, "grad_norm": 1.3939589262008667, "learning_rate": 0.00031064441887226695, "loss": 0.3246, "step": 293610 }, { "epoch": 84.47065592635212, "grad_norm": 0.9584147334098816, "learning_rate": 0.00031058688147295746, "loss": 0.3658, "step": 293620 }, { "epoch": 84.47353279631761, "grad_norm": 1.9087058305740356, "learning_rate": 0.00031052934407364786, "loss": 0.3263, "step": 293630 }, { "epoch": 84.47640966628309, "grad_norm": 1.2996915578842163, "learning_rate": 0.0003104718066743383, "loss": 0.3457, "step": 293640 }, { "epoch": 84.47928653624857, "grad_norm": 1.094622015953064, "learning_rate": 0.0003104142692750288, "loss": 0.2814, "step": 293650 }, { "epoch": 84.48216340621404, "grad_norm": 1.3511085510253906, "learning_rate": 0.00031035673187571923, "loss": 0.288, "step": 293660 }, { "epoch": 84.48504027617952, "grad_norm": 1.142379641532898, "learning_rate": 0.0003102991944764097, "loss": 0.3977, "step": 293670 }, { "epoch": 84.487917146145, "grad_norm": 1.4739514589309692, "learning_rate": 0.00031024165707710014, "loss": 0.2894, "step": 293680 }, { "epoch": 84.49079401611047, "grad_norm": 1.386639952659607, "learning_rate": 0.0003101841196777906, "loss": 0.4124, "step": 293690 }, { "epoch": 84.49367088607595, "grad_norm": 1.7042198181152344, "learning_rate": 0.000310126582278481, "loss": 0.306, "step": 293700 }, { "epoch": 84.49654775604142, "grad_norm": 1.2101978063583374, "learning_rate": 0.00031006904487917145, "loss": 0.3235, "step": 293710 }, { "epoch": 84.4994246260069, "grad_norm": 0.8762224912643433, "learning_rate": 0.0003100115074798619, "loss": 0.2923, "step": 293720 }, { "epoch": 84.50230149597238, "grad_norm": 1.119009017944336, "learning_rate": 0.00030995397008055236, "loss": 0.4337, "step": 293730 }, { "epoch": 84.50517836593787, "grad_norm": 1.078061819076538, "learning_rate": 0.00030989643268124277, "loss": 0.291, "step": 293740 }, { "epoch": 84.50805523590334, "grad_norm": 0.9697322845458984, "learning_rate": 0.0003098388952819333, "loss": 0.2675, "step": 293750 }, { "epoch": 84.51093210586882, "grad_norm": 0.9074267745018005, "learning_rate": 0.00030978135788262373, "loss": 0.3991, "step": 293760 }, { "epoch": 84.5138089758343, "grad_norm": 1.815647006034851, "learning_rate": 0.00030972382048331413, "loss": 0.3921, "step": 293770 }, { "epoch": 84.51668584579977, "grad_norm": 0.6779555082321167, "learning_rate": 0.00030966628308400464, "loss": 0.2846, "step": 293780 }, { "epoch": 84.51956271576525, "grad_norm": 2.161428689956665, "learning_rate": 0.00030960874568469504, "loss": 0.4007, "step": 293790 }, { "epoch": 84.52243958573072, "grad_norm": 1.0512250661849976, "learning_rate": 0.0003095512082853855, "loss": 0.2859, "step": 293800 }, { "epoch": 84.5253164556962, "grad_norm": 1.7762815952301025, "learning_rate": 0.00030949367088607595, "loss": 0.4103, "step": 293810 }, { "epoch": 84.52819332566168, "grad_norm": 1.5934807062149048, "learning_rate": 0.0003094361334867664, "loss": 0.2937, "step": 293820 }, { "epoch": 84.53107019562715, "grad_norm": 1.6675472259521484, "learning_rate": 0.0003093785960874568, "loss": 0.3199, "step": 293830 }, { "epoch": 84.53394706559264, "grad_norm": 1.564170002937317, "learning_rate": 0.0003093210586881473, "loss": 0.3961, "step": 293840 }, { "epoch": 84.53682393555812, "grad_norm": 1.3078749179840088, "learning_rate": 0.0003092635212888377, "loss": 0.347, "step": 293850 }, { "epoch": 84.5397008055236, "grad_norm": 1.611128807067871, "learning_rate": 0.0003092059838895282, "loss": 0.2935, "step": 293860 }, { "epoch": 84.54257767548907, "grad_norm": 1.1863594055175781, "learning_rate": 0.0003091484464902187, "loss": 0.3422, "step": 293870 }, { "epoch": 84.54545454545455, "grad_norm": 1.4570581912994385, "learning_rate": 0.0003090909090909091, "loss": 0.3225, "step": 293880 }, { "epoch": 84.54833141542002, "grad_norm": 1.1901217699050903, "learning_rate": 0.00030903337169159954, "loss": 0.2881, "step": 293890 }, { "epoch": 84.5512082853855, "grad_norm": 1.371806025505066, "learning_rate": 0.00030897583429229, "loss": 0.295, "step": 293900 }, { "epoch": 84.55408515535098, "grad_norm": 1.093085765838623, "learning_rate": 0.00030891829689298045, "loss": 0.348, "step": 293910 }, { "epoch": 84.55696202531645, "grad_norm": 1.4849108457565308, "learning_rate": 0.00030886075949367086, "loss": 0.3536, "step": 293920 }, { "epoch": 84.55983889528193, "grad_norm": 1.2297275066375732, "learning_rate": 0.00030880322209436137, "loss": 0.3599, "step": 293930 }, { "epoch": 84.5627157652474, "grad_norm": 2.2107436656951904, "learning_rate": 0.00030874568469505177, "loss": 0.3171, "step": 293940 }, { "epoch": 84.5655926352129, "grad_norm": 0.7356226444244385, "learning_rate": 0.0003086881472957422, "loss": 0.3111, "step": 293950 }, { "epoch": 84.56846950517837, "grad_norm": 1.7147244215011597, "learning_rate": 0.00030863060989643273, "loss": 0.3442, "step": 293960 }, { "epoch": 84.57134637514385, "grad_norm": 2.0033349990844727, "learning_rate": 0.00030857307249712313, "loss": 0.3827, "step": 293970 }, { "epoch": 84.57422324510932, "grad_norm": 0.7363771200180054, "learning_rate": 0.0003085155350978136, "loss": 0.2958, "step": 293980 }, { "epoch": 84.5771001150748, "grad_norm": 1.2951321601867676, "learning_rate": 0.00030845799769850404, "loss": 0.3325, "step": 293990 }, { "epoch": 84.57997698504028, "grad_norm": 1.6483427286148071, "learning_rate": 0.0003084004602991945, "loss": 0.3751, "step": 294000 }, { "epoch": 84.58285385500575, "grad_norm": 1.055034875869751, "learning_rate": 0.0003083429228998849, "loss": 0.3909, "step": 294010 }, { "epoch": 84.58573072497123, "grad_norm": 1.8535114526748657, "learning_rate": 0.0003082853855005754, "loss": 0.274, "step": 294020 }, { "epoch": 84.5886075949367, "grad_norm": 0.44344231486320496, "learning_rate": 0.0003082278481012658, "loss": 0.3833, "step": 294030 }, { "epoch": 84.59148446490218, "grad_norm": 1.5012646913528442, "learning_rate": 0.00030817031070195627, "loss": 0.3164, "step": 294040 }, { "epoch": 84.59436133486767, "grad_norm": 0.8454564809799194, "learning_rate": 0.0003081127733026468, "loss": 0.26, "step": 294050 }, { "epoch": 84.59723820483315, "grad_norm": 1.2591880559921265, "learning_rate": 0.0003080552359033372, "loss": 0.3058, "step": 294060 }, { "epoch": 84.60011507479862, "grad_norm": 2.0655620098114014, "learning_rate": 0.00030799769850402763, "loss": 0.3805, "step": 294070 }, { "epoch": 84.6029919447641, "grad_norm": 0.8208214044570923, "learning_rate": 0.0003079401611047181, "loss": 0.2766, "step": 294080 }, { "epoch": 84.60586881472958, "grad_norm": 1.0311874151229858, "learning_rate": 0.00030788262370540855, "loss": 0.3261, "step": 294090 }, { "epoch": 84.60874568469505, "grad_norm": 1.2953308820724487, "learning_rate": 0.00030782508630609895, "loss": 0.3022, "step": 294100 }, { "epoch": 84.61162255466053, "grad_norm": 1.2166746854782104, "learning_rate": 0.00030776754890678946, "loss": 0.3118, "step": 294110 }, { "epoch": 84.614499424626, "grad_norm": 0.8000227212905884, "learning_rate": 0.00030771001150747986, "loss": 0.2531, "step": 294120 }, { "epoch": 84.61737629459148, "grad_norm": 1.76405668258667, "learning_rate": 0.0003076524741081703, "loss": 0.2961, "step": 294130 }, { "epoch": 84.62025316455696, "grad_norm": 1.1875463724136353, "learning_rate": 0.0003075949367088607, "loss": 0.3281, "step": 294140 }, { "epoch": 84.62313003452243, "grad_norm": 0.8007423877716064, "learning_rate": 0.0003075373993095512, "loss": 0.3337, "step": 294150 }, { "epoch": 84.62600690448792, "grad_norm": 0.8190953731536865, "learning_rate": 0.0003074798619102417, "loss": 0.3111, "step": 294160 }, { "epoch": 84.6288837744534, "grad_norm": 1.6999772787094116, "learning_rate": 0.0003074223245109321, "loss": 0.2881, "step": 294170 }, { "epoch": 84.63176064441888, "grad_norm": 2.7669432163238525, "learning_rate": 0.0003073647871116226, "loss": 0.3057, "step": 294180 }, { "epoch": 84.63463751438435, "grad_norm": 1.3824098110198975, "learning_rate": 0.000307307249712313, "loss": 0.2989, "step": 294190 }, { "epoch": 84.63751438434983, "grad_norm": 0.9858238101005554, "learning_rate": 0.00030724971231300345, "loss": 0.3433, "step": 294200 }, { "epoch": 84.6403912543153, "grad_norm": 0.8178405165672302, "learning_rate": 0.0003071921749136939, "loss": 0.3295, "step": 294210 }, { "epoch": 84.64326812428078, "grad_norm": 1.3132787942886353, "learning_rate": 0.00030713463751438436, "loss": 0.313, "step": 294220 }, { "epoch": 84.64614499424626, "grad_norm": 1.3658465147018433, "learning_rate": 0.00030707710011507476, "loss": 0.3059, "step": 294230 }, { "epoch": 84.64902186421173, "grad_norm": 1.7370723485946655, "learning_rate": 0.00030701956271576527, "loss": 0.3451, "step": 294240 }, { "epoch": 84.65189873417721, "grad_norm": 1.1567264795303345, "learning_rate": 0.0003069620253164557, "loss": 0.3039, "step": 294250 }, { "epoch": 84.6547756041427, "grad_norm": 1.9227272272109985, "learning_rate": 0.0003069044879171461, "loss": 0.3366, "step": 294260 }, { "epoch": 84.65765247410818, "grad_norm": 1.2472203969955444, "learning_rate": 0.00030684695051783664, "loss": 0.2618, "step": 294270 }, { "epoch": 84.66052934407365, "grad_norm": 1.01864492893219, "learning_rate": 0.00030678941311852704, "loss": 0.2912, "step": 294280 }, { "epoch": 84.66340621403913, "grad_norm": 2.103227376937866, "learning_rate": 0.0003067318757192175, "loss": 0.3334, "step": 294290 }, { "epoch": 84.6662830840046, "grad_norm": 0.8489256501197815, "learning_rate": 0.00030667433831990795, "loss": 0.3061, "step": 294300 }, { "epoch": 84.66915995397008, "grad_norm": 1.7095558643341064, "learning_rate": 0.0003066168009205984, "loss": 0.3382, "step": 294310 }, { "epoch": 84.67203682393556, "grad_norm": 1.9745041131973267, "learning_rate": 0.0003065592635212888, "loss": 0.3811, "step": 294320 }, { "epoch": 84.67491369390103, "grad_norm": 0.7639603614807129, "learning_rate": 0.0003065017261219793, "loss": 0.3187, "step": 294330 }, { "epoch": 84.67779056386651, "grad_norm": 1.4166711568832397, "learning_rate": 0.0003064441887226697, "loss": 0.3067, "step": 294340 }, { "epoch": 84.68066743383199, "grad_norm": 1.1758705377578735, "learning_rate": 0.00030638665132336017, "loss": 0.31, "step": 294350 }, { "epoch": 84.68354430379746, "grad_norm": 0.8946905136108398, "learning_rate": 0.0003063291139240507, "loss": 0.261, "step": 294360 }, { "epoch": 84.68642117376295, "grad_norm": 0.7007139325141907, "learning_rate": 0.0003062715765247411, "loss": 0.3253, "step": 294370 }, { "epoch": 84.68929804372843, "grad_norm": 0.8694321513175964, "learning_rate": 0.00030621403912543154, "loss": 0.3505, "step": 294380 }, { "epoch": 84.6921749136939, "grad_norm": 1.3765507936477661, "learning_rate": 0.000306156501726122, "loss": 0.3438, "step": 294390 }, { "epoch": 84.69505178365938, "grad_norm": 0.8230627775192261, "learning_rate": 0.00030609896432681245, "loss": 0.3393, "step": 294400 }, { "epoch": 84.69792865362486, "grad_norm": 1.395707607269287, "learning_rate": 0.00030604142692750285, "loss": 0.3356, "step": 294410 }, { "epoch": 84.70080552359033, "grad_norm": 0.8650650978088379, "learning_rate": 0.00030598388952819336, "loss": 0.3205, "step": 294420 }, { "epoch": 84.70368239355581, "grad_norm": 0.9245300889015198, "learning_rate": 0.00030592635212888376, "loss": 0.401, "step": 294430 }, { "epoch": 84.70655926352129, "grad_norm": 1.0238608121871948, "learning_rate": 0.0003058688147295742, "loss": 0.394, "step": 294440 }, { "epoch": 84.70943613348676, "grad_norm": 1.761630654335022, "learning_rate": 0.0003058112773302647, "loss": 0.3835, "step": 294450 }, { "epoch": 84.71231300345224, "grad_norm": 1.7287161350250244, "learning_rate": 0.00030575373993095513, "loss": 0.3342, "step": 294460 }, { "epoch": 84.71518987341773, "grad_norm": 1.8813822269439697, "learning_rate": 0.0003056962025316456, "loss": 0.3909, "step": 294470 }, { "epoch": 84.7180667433832, "grad_norm": 0.810564398765564, "learning_rate": 0.00030563866513233604, "loss": 0.3044, "step": 294480 }, { "epoch": 84.72094361334868, "grad_norm": 1.0183063745498657, "learning_rate": 0.0003055811277330265, "loss": 0.3177, "step": 294490 }, { "epoch": 84.72382048331416, "grad_norm": 1.026873230934143, "learning_rate": 0.0003055235903337169, "loss": 0.3387, "step": 294500 }, { "epoch": 84.72669735327963, "grad_norm": 0.8528129458427429, "learning_rate": 0.0003054660529344074, "loss": 0.3956, "step": 294510 }, { "epoch": 84.72957422324511, "grad_norm": 1.5496779680252075, "learning_rate": 0.0003054085155350978, "loss": 0.4094, "step": 294520 }, { "epoch": 84.73245109321059, "grad_norm": 1.7088701725006104, "learning_rate": 0.00030535097813578826, "loss": 0.3463, "step": 294530 }, { "epoch": 84.73532796317606, "grad_norm": 0.9666265249252319, "learning_rate": 0.0003052934407364787, "loss": 0.3741, "step": 294540 }, { "epoch": 84.73820483314154, "grad_norm": 0.8675743341445923, "learning_rate": 0.0003052359033371692, "loss": 0.3457, "step": 294550 }, { "epoch": 84.74108170310701, "grad_norm": 0.876953125, "learning_rate": 0.00030517836593785963, "loss": 0.3308, "step": 294560 }, { "epoch": 84.74395857307249, "grad_norm": 0.8057128190994263, "learning_rate": 0.00030512082853855003, "loss": 0.2974, "step": 294570 }, { "epoch": 84.74683544303798, "grad_norm": 1.9771678447723389, "learning_rate": 0.00030506329113924054, "loss": 0.3022, "step": 294580 }, { "epoch": 84.74971231300346, "grad_norm": 1.0821232795715332, "learning_rate": 0.00030500575373993094, "loss": 0.3304, "step": 294590 }, { "epoch": 84.75258918296893, "grad_norm": 0.6753941774368286, "learning_rate": 0.0003049482163406214, "loss": 0.3139, "step": 294600 }, { "epoch": 84.75546605293441, "grad_norm": 1.893011212348938, "learning_rate": 0.00030489067894131185, "loss": 0.4678, "step": 294610 }, { "epoch": 84.75834292289989, "grad_norm": 1.1983107328414917, "learning_rate": 0.0003048331415420023, "loss": 0.4611, "step": 294620 }, { "epoch": 84.76121979286536, "grad_norm": 2.597254514694214, "learning_rate": 0.0003047756041426927, "loss": 0.3487, "step": 294630 }, { "epoch": 84.76409666283084, "grad_norm": 0.8403412699699402, "learning_rate": 0.0003047180667433832, "loss": 0.3905, "step": 294640 }, { "epoch": 84.76697353279631, "grad_norm": 0.761944591999054, "learning_rate": 0.0003046605293440737, "loss": 0.3489, "step": 294650 }, { "epoch": 84.76985040276179, "grad_norm": 1.9218344688415527, "learning_rate": 0.0003046029919447641, "loss": 0.381, "step": 294660 }, { "epoch": 84.77272727272727, "grad_norm": 1.2423171997070312, "learning_rate": 0.0003045454545454546, "loss": 0.4433, "step": 294670 }, { "epoch": 84.77560414269276, "grad_norm": 1.0653605461120605, "learning_rate": 0.000304487917146145, "loss": 0.2767, "step": 294680 }, { "epoch": 84.77848101265823, "grad_norm": 1.1915990114212036, "learning_rate": 0.00030443037974683544, "loss": 0.2465, "step": 294690 }, { "epoch": 84.78135788262371, "grad_norm": 1.118891716003418, "learning_rate": 0.0003043728423475259, "loss": 0.2898, "step": 294700 }, { "epoch": 84.78423475258919, "grad_norm": 1.5862343311309814, "learning_rate": 0.00030431530494821635, "loss": 0.3364, "step": 294710 }, { "epoch": 84.78711162255466, "grad_norm": 0.9151975512504578, "learning_rate": 0.00030425776754890675, "loss": 0.3352, "step": 294720 }, { "epoch": 84.78998849252014, "grad_norm": 1.279299020767212, "learning_rate": 0.00030420023014959726, "loss": 0.3361, "step": 294730 }, { "epoch": 84.79286536248561, "grad_norm": 0.8289332389831543, "learning_rate": 0.0003041426927502877, "loss": 0.2564, "step": 294740 }, { "epoch": 84.79574223245109, "grad_norm": 1.2260316610336304, "learning_rate": 0.0003040851553509781, "loss": 0.3626, "step": 294750 }, { "epoch": 84.79861910241657, "grad_norm": 1.6208720207214355, "learning_rate": 0.00030402761795166863, "loss": 0.3031, "step": 294760 }, { "epoch": 84.80149597238204, "grad_norm": 1.1086057424545288, "learning_rate": 0.00030397008055235903, "loss": 0.3594, "step": 294770 }, { "epoch": 84.80437284234753, "grad_norm": 0.9045600295066833, "learning_rate": 0.0003039125431530495, "loss": 0.3438, "step": 294780 }, { "epoch": 84.80724971231301, "grad_norm": 1.2671819925308228, "learning_rate": 0.00030385500575373994, "loss": 0.3441, "step": 294790 }, { "epoch": 84.81012658227849, "grad_norm": 1.1763612031936646, "learning_rate": 0.0003037974683544304, "loss": 0.2848, "step": 294800 }, { "epoch": 84.81300345224396, "grad_norm": 1.1221508979797363, "learning_rate": 0.0003037399309551208, "loss": 0.3184, "step": 294810 }, { "epoch": 84.81588032220944, "grad_norm": 1.3421441316604614, "learning_rate": 0.0003036823935558113, "loss": 0.3531, "step": 294820 }, { "epoch": 84.81875719217491, "grad_norm": 1.8098093271255493, "learning_rate": 0.0003036248561565017, "loss": 0.3069, "step": 294830 }, { "epoch": 84.82163406214039, "grad_norm": 1.395632028579712, "learning_rate": 0.00030356731875719217, "loss": 0.3008, "step": 294840 }, { "epoch": 84.82451093210587, "grad_norm": 1.235182523727417, "learning_rate": 0.0003035097813578827, "loss": 0.3741, "step": 294850 }, { "epoch": 84.82738780207134, "grad_norm": 0.9265809655189514, "learning_rate": 0.0003034522439585731, "loss": 0.319, "step": 294860 }, { "epoch": 84.83026467203682, "grad_norm": 1.7823548316955566, "learning_rate": 0.00030339470655926353, "loss": 0.3346, "step": 294870 }, { "epoch": 84.8331415420023, "grad_norm": 1.0904560089111328, "learning_rate": 0.000303337169159954, "loss": 0.3548, "step": 294880 }, { "epoch": 84.83601841196779, "grad_norm": 1.4613336324691772, "learning_rate": 0.00030327963176064444, "loss": 0.3056, "step": 294890 }, { "epoch": 84.83889528193326, "grad_norm": 1.7534830570220947, "learning_rate": 0.00030322209436133485, "loss": 0.3227, "step": 294900 }, { "epoch": 84.84177215189874, "grad_norm": 1.6048178672790527, "learning_rate": 0.00030316455696202535, "loss": 0.2882, "step": 294910 }, { "epoch": 84.84464902186421, "grad_norm": 0.9744325280189514, "learning_rate": 0.00030310701956271576, "loss": 0.4503, "step": 294920 }, { "epoch": 84.84752589182969, "grad_norm": 1.9522722959518433, "learning_rate": 0.0003030494821634062, "loss": 0.3593, "step": 294930 }, { "epoch": 84.85040276179517, "grad_norm": 1.5690641403198242, "learning_rate": 0.0003029919447640967, "loss": 0.3512, "step": 294940 }, { "epoch": 84.85327963176064, "grad_norm": 1.9175392389297485, "learning_rate": 0.0003029344073647871, "loss": 0.2889, "step": 294950 }, { "epoch": 84.85615650172612, "grad_norm": 1.2421674728393555, "learning_rate": 0.0003028768699654776, "loss": 0.3287, "step": 294960 }, { "epoch": 84.8590333716916, "grad_norm": 1.571925401687622, "learning_rate": 0.000302819332566168, "loss": 0.3614, "step": 294970 }, { "epoch": 84.86191024165707, "grad_norm": 1.1660045385360718, "learning_rate": 0.0003027617951668585, "loss": 0.3386, "step": 294980 }, { "epoch": 84.86478711162256, "grad_norm": 0.8808085322380066, "learning_rate": 0.0003027042577675489, "loss": 0.3226, "step": 294990 }, { "epoch": 84.86766398158804, "grad_norm": 0.9225724339485168, "learning_rate": 0.00030264672036823935, "loss": 0.3788, "step": 295000 }, { "epoch": 84.87054085155351, "grad_norm": 1.7730895280838013, "learning_rate": 0.0003025891829689298, "loss": 0.3084, "step": 295010 }, { "epoch": 84.87341772151899, "grad_norm": 2.712259531021118, "learning_rate": 0.00030253164556962026, "loss": 0.3227, "step": 295020 }, { "epoch": 84.87629459148447, "grad_norm": 0.9401499629020691, "learning_rate": 0.0003024741081703107, "loss": 0.3082, "step": 295030 }, { "epoch": 84.87917146144994, "grad_norm": 1.282375454902649, "learning_rate": 0.00030241657077100117, "loss": 0.3453, "step": 295040 }, { "epoch": 84.88204833141542, "grad_norm": 1.8888083696365356, "learning_rate": 0.0003023590333716916, "loss": 0.3515, "step": 295050 }, { "epoch": 84.8849252013809, "grad_norm": 0.769572377204895, "learning_rate": 0.000302301495972382, "loss": 0.277, "step": 295060 }, { "epoch": 84.88780207134637, "grad_norm": 1.6277244091033936, "learning_rate": 0.00030224395857307253, "loss": 0.3336, "step": 295070 }, { "epoch": 84.89067894131185, "grad_norm": 1.33672297000885, "learning_rate": 0.00030218642117376294, "loss": 0.2801, "step": 295080 }, { "epoch": 84.89355581127732, "grad_norm": 1.3125652074813843, "learning_rate": 0.0003021288837744534, "loss": 0.368, "step": 295090 }, { "epoch": 84.89643268124281, "grad_norm": 1.2143992185592651, "learning_rate": 0.00030207134637514385, "loss": 0.326, "step": 295100 }, { "epoch": 84.89930955120829, "grad_norm": 0.8813221454620361, "learning_rate": 0.0003020138089758343, "loss": 0.257, "step": 295110 }, { "epoch": 84.90218642117377, "grad_norm": 1.1601580381393433, "learning_rate": 0.0003019562715765247, "loss": 0.3435, "step": 295120 }, { "epoch": 84.90506329113924, "grad_norm": 2.1303694248199463, "learning_rate": 0.0003018987341772152, "loss": 0.3407, "step": 295130 }, { "epoch": 84.90794016110472, "grad_norm": 0.7833288908004761, "learning_rate": 0.00030184119677790567, "loss": 0.4353, "step": 295140 }, { "epoch": 84.9108170310702, "grad_norm": 0.8880180716514587, "learning_rate": 0.00030178365937859607, "loss": 0.3215, "step": 295150 }, { "epoch": 84.91369390103567, "grad_norm": 0.8920565247535706, "learning_rate": 0.0003017261219792866, "loss": 0.2998, "step": 295160 }, { "epoch": 84.91657077100115, "grad_norm": 1.358113408088684, "learning_rate": 0.000301668584579977, "loss": 0.2795, "step": 295170 }, { "epoch": 84.91944764096662, "grad_norm": 2.6543049812316895, "learning_rate": 0.00030161104718066744, "loss": 0.4065, "step": 295180 }, { "epoch": 84.9223245109321, "grad_norm": 1.213716983795166, "learning_rate": 0.0003015535097813579, "loss": 0.2909, "step": 295190 }, { "epoch": 84.92520138089759, "grad_norm": 1.1706268787384033, "learning_rate": 0.00030149597238204835, "loss": 0.3483, "step": 295200 }, { "epoch": 84.92807825086307, "grad_norm": 0.8030696511268616, "learning_rate": 0.00030143843498273875, "loss": 0.3901, "step": 295210 }, { "epoch": 84.93095512082854, "grad_norm": 0.7992970943450928, "learning_rate": 0.00030138089758342926, "loss": 0.3508, "step": 295220 }, { "epoch": 84.93383199079402, "grad_norm": 0.977348804473877, "learning_rate": 0.0003013233601841197, "loss": 0.4019, "step": 295230 }, { "epoch": 84.9367088607595, "grad_norm": 0.9683333039283752, "learning_rate": 0.0003012658227848101, "loss": 0.3231, "step": 295240 }, { "epoch": 84.93958573072497, "grad_norm": 1.044223666191101, "learning_rate": 0.0003012082853855006, "loss": 0.3593, "step": 295250 }, { "epoch": 84.94246260069045, "grad_norm": 0.7079958319664001, "learning_rate": 0.000301150747986191, "loss": 0.3337, "step": 295260 }, { "epoch": 84.94533947065592, "grad_norm": 0.7238922715187073, "learning_rate": 0.0003010932105868815, "loss": 0.3042, "step": 295270 }, { "epoch": 84.9482163406214, "grad_norm": 1.1746562719345093, "learning_rate": 0.00030103567318757194, "loss": 0.3364, "step": 295280 }, { "epoch": 84.95109321058688, "grad_norm": 1.199414610862732, "learning_rate": 0.0003009781357882624, "loss": 0.4039, "step": 295290 }, { "epoch": 84.95397008055235, "grad_norm": 0.9198391437530518, "learning_rate": 0.0003009205983889528, "loss": 0.3238, "step": 295300 }, { "epoch": 84.95684695051784, "grad_norm": 1.2081130743026733, "learning_rate": 0.0003008630609896433, "loss": 0.3234, "step": 295310 }, { "epoch": 84.95972382048332, "grad_norm": 0.6641111373901367, "learning_rate": 0.0003008055235903337, "loss": 0.3355, "step": 295320 }, { "epoch": 84.9626006904488, "grad_norm": 2.31368350982666, "learning_rate": 0.00030074798619102416, "loss": 0.3748, "step": 295330 }, { "epoch": 84.96547756041427, "grad_norm": 1.677878499031067, "learning_rate": 0.00030069044879171467, "loss": 0.3166, "step": 295340 }, { "epoch": 84.96835443037975, "grad_norm": 1.3631669282913208, "learning_rate": 0.00030063291139240507, "loss": 0.3325, "step": 295350 }, { "epoch": 84.97123130034522, "grad_norm": 1.5185753107070923, "learning_rate": 0.00030057537399309553, "loss": 0.319, "step": 295360 }, { "epoch": 84.9741081703107, "grad_norm": 1.1473771333694458, "learning_rate": 0.00030051783659378593, "loss": 0.32, "step": 295370 }, { "epoch": 84.97698504027618, "grad_norm": 0.7712924480438232, "learning_rate": 0.00030046029919447644, "loss": 0.3887, "step": 295380 }, { "epoch": 84.97986191024165, "grad_norm": 1.2154302597045898, "learning_rate": 0.00030040276179516684, "loss": 0.3604, "step": 295390 }, { "epoch": 84.98273878020713, "grad_norm": 1.5779893398284912, "learning_rate": 0.0003003452243958573, "loss": 0.3918, "step": 295400 }, { "epoch": 84.98561565017262, "grad_norm": 1.375069499015808, "learning_rate": 0.00030028768699654775, "loss": 0.2947, "step": 295410 }, { "epoch": 84.9884925201381, "grad_norm": 1.1795506477355957, "learning_rate": 0.0003002301495972382, "loss": 0.2734, "step": 295420 }, { "epoch": 84.99136939010357, "grad_norm": 0.8625043630599976, "learning_rate": 0.00030017261219792866, "loss": 0.3621, "step": 295430 }, { "epoch": 84.99424626006905, "grad_norm": 0.824099063873291, "learning_rate": 0.0003001150747986191, "loss": 0.3435, "step": 295440 }, { "epoch": 84.99712313003452, "grad_norm": 1.2159423828125, "learning_rate": 0.00030005753739930957, "loss": 0.3606, "step": 295450 }, { "epoch": 85.0, "grad_norm": 1.1025350093841553, "learning_rate": 0.0003, "loss": 0.3585, "step": 295460 }, { "epoch": 85.00287686996548, "grad_norm": 1.7074419260025024, "learning_rate": 0.0002999424626006905, "loss": 0.2688, "step": 295470 }, { "epoch": 85.00575373993095, "grad_norm": 0.7715774774551392, "learning_rate": 0.0002998849252013809, "loss": 0.3593, "step": 295480 }, { "epoch": 85.00863060989643, "grad_norm": 1.3723068237304688, "learning_rate": 0.00029982738780207134, "loss": 0.3309, "step": 295490 }, { "epoch": 85.0115074798619, "grad_norm": 2.185814142227173, "learning_rate": 0.0002997698504027618, "loss": 0.3144, "step": 295500 }, { "epoch": 85.01438434982738, "grad_norm": 1.4282456636428833, "learning_rate": 0.00029971231300345225, "loss": 0.3668, "step": 295510 }, { "epoch": 85.01726121979287, "grad_norm": 1.001711130142212, "learning_rate": 0.0002996547756041427, "loss": 0.3412, "step": 295520 }, { "epoch": 85.02013808975835, "grad_norm": 1.6300199031829834, "learning_rate": 0.00029959723820483316, "loss": 0.2936, "step": 295530 }, { "epoch": 85.02301495972382, "grad_norm": 0.809182345867157, "learning_rate": 0.0002995397008055236, "loss": 0.2626, "step": 295540 }, { "epoch": 85.0258918296893, "grad_norm": 1.5666733980178833, "learning_rate": 0.000299482163406214, "loss": 0.2923, "step": 295550 }, { "epoch": 85.02876869965478, "grad_norm": 0.6862331032752991, "learning_rate": 0.00029942462600690453, "loss": 0.2658, "step": 295560 }, { "epoch": 85.03164556962025, "grad_norm": 0.6927407383918762, "learning_rate": 0.00029936708860759493, "loss": 0.3047, "step": 295570 }, { "epoch": 85.03452243958573, "grad_norm": 0.9930645823478699, "learning_rate": 0.0002993095512082854, "loss": 0.3356, "step": 295580 }, { "epoch": 85.0373993095512, "grad_norm": 0.9225826263427734, "learning_rate": 0.00029925201380897584, "loss": 0.3111, "step": 295590 }, { "epoch": 85.04027617951668, "grad_norm": 1.1686203479766846, "learning_rate": 0.0002991944764096663, "loss": 0.2859, "step": 295600 }, { "epoch": 85.04315304948216, "grad_norm": 1.2216569185256958, "learning_rate": 0.0002991369390103567, "loss": 0.3141, "step": 295610 }, { "epoch": 85.04602991944765, "grad_norm": 1.0995888710021973, "learning_rate": 0.0002990794016110472, "loss": 0.3247, "step": 295620 }, { "epoch": 85.04890678941312, "grad_norm": 1.2868623733520508, "learning_rate": 0.00029902186421173766, "loss": 0.2511, "step": 295630 }, { "epoch": 85.0517836593786, "grad_norm": 1.2441000938415527, "learning_rate": 0.00029896432681242806, "loss": 0.2995, "step": 295640 }, { "epoch": 85.05466052934408, "grad_norm": 0.8425699472427368, "learning_rate": 0.0002989067894131186, "loss": 0.2459, "step": 295650 }, { "epoch": 85.05753739930955, "grad_norm": 1.7768100500106812, "learning_rate": 0.000298849252013809, "loss": 0.4172, "step": 295660 }, { "epoch": 85.06041426927503, "grad_norm": 1.3497933149337769, "learning_rate": 0.00029879171461449943, "loss": 0.2684, "step": 295670 }, { "epoch": 85.0632911392405, "grad_norm": 1.1917396783828735, "learning_rate": 0.0002987341772151899, "loss": 0.2609, "step": 295680 }, { "epoch": 85.06616800920598, "grad_norm": 1.650130271911621, "learning_rate": 0.00029867663981588034, "loss": 0.3203, "step": 295690 }, { "epoch": 85.06904487917146, "grad_norm": 0.7697235345840454, "learning_rate": 0.00029861910241657074, "loss": 0.2855, "step": 295700 }, { "epoch": 85.07192174913693, "grad_norm": 2.0282340049743652, "learning_rate": 0.00029856156501726125, "loss": 0.2819, "step": 295710 }, { "epoch": 85.07479861910241, "grad_norm": 1.3291643857955933, "learning_rate": 0.0002985040276179517, "loss": 0.2974, "step": 295720 }, { "epoch": 85.0776754890679, "grad_norm": 0.7078995704650879, "learning_rate": 0.0002984464902186421, "loss": 0.3755, "step": 295730 }, { "epoch": 85.08055235903338, "grad_norm": 1.1780779361724854, "learning_rate": 0.0002983889528193326, "loss": 0.3426, "step": 295740 }, { "epoch": 85.08342922899885, "grad_norm": 0.8217483162879944, "learning_rate": 0.000298331415420023, "loss": 0.2918, "step": 295750 }, { "epoch": 85.08630609896433, "grad_norm": 1.065953016281128, "learning_rate": 0.0002982738780207135, "loss": 0.292, "step": 295760 }, { "epoch": 85.0891829689298, "grad_norm": 2.11287260055542, "learning_rate": 0.0002982163406214039, "loss": 0.4016, "step": 295770 }, { "epoch": 85.09205983889528, "grad_norm": 1.2567272186279297, "learning_rate": 0.0002981588032220944, "loss": 0.282, "step": 295780 }, { "epoch": 85.09493670886076, "grad_norm": 0.814672589302063, "learning_rate": 0.0002981012658227848, "loss": 0.2923, "step": 295790 }, { "epoch": 85.09781357882623, "grad_norm": 0.9959799647331238, "learning_rate": 0.00029804372842347524, "loss": 0.2903, "step": 295800 }, { "epoch": 85.10069044879171, "grad_norm": 2.2678020000457764, "learning_rate": 0.0002979861910241657, "loss": 0.2926, "step": 295810 }, { "epoch": 85.10356731875719, "grad_norm": 0.7347761392593384, "learning_rate": 0.00029792865362485615, "loss": 0.3157, "step": 295820 }, { "epoch": 85.10644418872268, "grad_norm": 1.1083970069885254, "learning_rate": 0.0002978711162255466, "loss": 0.2694, "step": 295830 }, { "epoch": 85.10932105868815, "grad_norm": 1.2226762771606445, "learning_rate": 0.00029781357882623707, "loss": 0.3384, "step": 295840 }, { "epoch": 85.11219792865363, "grad_norm": 0.9006851315498352, "learning_rate": 0.0002977560414269275, "loss": 0.3102, "step": 295850 }, { "epoch": 85.1150747986191, "grad_norm": 1.2805085182189941, "learning_rate": 0.0002976985040276179, "loss": 0.2957, "step": 295860 }, { "epoch": 85.11795166858458, "grad_norm": 1.4040228128433228, "learning_rate": 0.00029764096662830843, "loss": 0.3388, "step": 295870 }, { "epoch": 85.12082853855006, "grad_norm": 1.3455172777175903, "learning_rate": 0.00029758342922899883, "loss": 0.3118, "step": 295880 }, { "epoch": 85.12370540851553, "grad_norm": 1.5079026222229004, "learning_rate": 0.0002975258918296893, "loss": 0.2533, "step": 295890 }, { "epoch": 85.12658227848101, "grad_norm": 0.9168719053268433, "learning_rate": 0.00029746835443037974, "loss": 0.3204, "step": 295900 }, { "epoch": 85.12945914844649, "grad_norm": 1.1362141370773315, "learning_rate": 0.0002974108170310702, "loss": 0.2882, "step": 295910 }, { "epoch": 85.13233601841196, "grad_norm": 0.6907497048377991, "learning_rate": 0.00029735327963176066, "loss": 0.3225, "step": 295920 }, { "epoch": 85.13521288837744, "grad_norm": 0.9117810130119324, "learning_rate": 0.0002972957422324511, "loss": 0.4984, "step": 295930 }, { "epoch": 85.13808975834293, "grad_norm": 1.3529868125915527, "learning_rate": 0.00029723820483314157, "loss": 0.355, "step": 295940 }, { "epoch": 85.1409666283084, "grad_norm": 0.897819459438324, "learning_rate": 0.00029718066743383197, "loss": 0.3666, "step": 295950 }, { "epoch": 85.14384349827388, "grad_norm": 0.9829294085502625, "learning_rate": 0.0002971231300345225, "loss": 0.2564, "step": 295960 }, { "epoch": 85.14672036823936, "grad_norm": 2.6600098609924316, "learning_rate": 0.0002970655926352129, "loss": 0.3561, "step": 295970 }, { "epoch": 85.14959723820483, "grad_norm": 1.0299944877624512, "learning_rate": 0.00029700805523590333, "loss": 0.3309, "step": 295980 }, { "epoch": 85.15247410817031, "grad_norm": 1.5101152658462524, "learning_rate": 0.0002969505178365938, "loss": 0.3043, "step": 295990 }, { "epoch": 85.15535097813579, "grad_norm": 1.1180657148361206, "learning_rate": 0.00029689298043728425, "loss": 0.2536, "step": 296000 }, { "epoch": 85.15822784810126, "grad_norm": 1.4395169019699097, "learning_rate": 0.0002968354430379747, "loss": 0.3668, "step": 296010 }, { "epoch": 85.16110471806674, "grad_norm": 0.9102181792259216, "learning_rate": 0.00029677790563866516, "loss": 0.2958, "step": 296020 }, { "epoch": 85.16398158803221, "grad_norm": 1.1180073022842407, "learning_rate": 0.0002967203682393556, "loss": 0.2914, "step": 296030 }, { "epoch": 85.1668584579977, "grad_norm": 0.9178212285041809, "learning_rate": 0.000296662830840046, "loss": 0.3104, "step": 296040 }, { "epoch": 85.16973532796318, "grad_norm": 1.539465308189392, "learning_rate": 0.0002966052934407365, "loss": 0.3043, "step": 296050 }, { "epoch": 85.17261219792866, "grad_norm": 1.7654919624328613, "learning_rate": 0.0002965477560414269, "loss": 0.3018, "step": 296060 }, { "epoch": 85.17548906789413, "grad_norm": 1.086643934249878, "learning_rate": 0.0002964902186421174, "loss": 0.3307, "step": 296070 }, { "epoch": 85.17836593785961, "grad_norm": 1.518818974494934, "learning_rate": 0.00029643268124280784, "loss": 0.3988, "step": 296080 }, { "epoch": 85.18124280782509, "grad_norm": 1.2932008504867554, "learning_rate": 0.0002963751438434983, "loss": 0.2961, "step": 296090 }, { "epoch": 85.18411967779056, "grad_norm": 0.9571681618690491, "learning_rate": 0.0002963176064441887, "loss": 0.3415, "step": 296100 }, { "epoch": 85.18699654775604, "grad_norm": 1.8457765579223633, "learning_rate": 0.0002962600690448792, "loss": 0.3516, "step": 296110 }, { "epoch": 85.18987341772151, "grad_norm": 0.8618284463882446, "learning_rate": 0.00029620253164556966, "loss": 0.341, "step": 296120 }, { "epoch": 85.19275028768699, "grad_norm": 0.8167706727981567, "learning_rate": 0.00029614499424626006, "loss": 0.311, "step": 296130 }, { "epoch": 85.19562715765247, "grad_norm": 0.8101646304130554, "learning_rate": 0.00029608745684695057, "loss": 0.2529, "step": 296140 }, { "epoch": 85.19850402761796, "grad_norm": 0.9249211549758911, "learning_rate": 0.00029602991944764097, "loss": 0.3439, "step": 296150 }, { "epoch": 85.20138089758343, "grad_norm": 0.7856398224830627, "learning_rate": 0.0002959723820483314, "loss": 0.3564, "step": 296160 }, { "epoch": 85.20425776754891, "grad_norm": 1.1289949417114258, "learning_rate": 0.0002959148446490218, "loss": 0.3043, "step": 296170 }, { "epoch": 85.20713463751439, "grad_norm": 0.8171117305755615, "learning_rate": 0.00029585730724971234, "loss": 0.2978, "step": 296180 }, { "epoch": 85.21001150747986, "grad_norm": 1.1196258068084717, "learning_rate": 0.00029579976985040274, "loss": 0.2852, "step": 296190 }, { "epoch": 85.21288837744534, "grad_norm": 0.8874208331108093, "learning_rate": 0.0002957422324510932, "loss": 0.3055, "step": 296200 }, { "epoch": 85.21576524741081, "grad_norm": 1.0770989656448364, "learning_rate": 0.0002956846950517837, "loss": 0.2843, "step": 296210 }, { "epoch": 85.21864211737629, "grad_norm": 1.1016530990600586, "learning_rate": 0.0002956271576524741, "loss": 0.3476, "step": 296220 }, { "epoch": 85.22151898734177, "grad_norm": 0.9548646211624146, "learning_rate": 0.00029556962025316456, "loss": 0.2743, "step": 296230 }, { "epoch": 85.22439585730724, "grad_norm": 1.6640856266021729, "learning_rate": 0.000295512082853855, "loss": 0.3531, "step": 296240 }, { "epoch": 85.22727272727273, "grad_norm": 1.3378466367721558, "learning_rate": 0.00029545454545454547, "loss": 0.3158, "step": 296250 }, { "epoch": 85.23014959723821, "grad_norm": 1.2178634405136108, "learning_rate": 0.00029539700805523587, "loss": 0.4351, "step": 296260 }, { "epoch": 85.23302646720369, "grad_norm": 1.081087589263916, "learning_rate": 0.0002953394706559264, "loss": 0.3829, "step": 296270 }, { "epoch": 85.23590333716916, "grad_norm": 0.95819491147995, "learning_rate": 0.0002952819332566168, "loss": 0.3058, "step": 296280 }, { "epoch": 85.23878020713464, "grad_norm": 1.0934438705444336, "learning_rate": 0.00029522439585730724, "loss": 0.3244, "step": 296290 }, { "epoch": 85.24165707710011, "grad_norm": 1.266575574874878, "learning_rate": 0.0002951668584579977, "loss": 0.3584, "step": 296300 }, { "epoch": 85.24453394706559, "grad_norm": 1.1080763339996338, "learning_rate": 0.00029510932105868815, "loss": 0.3138, "step": 296310 }, { "epoch": 85.24741081703107, "grad_norm": 1.1828242540359497, "learning_rate": 0.0002950517836593786, "loss": 0.3274, "step": 296320 }, { "epoch": 85.25028768699654, "grad_norm": 0.8602094650268555, "learning_rate": 0.00029499424626006906, "loss": 0.2815, "step": 296330 }, { "epoch": 85.25316455696202, "grad_norm": 0.8554314970970154, "learning_rate": 0.0002949367088607595, "loss": 0.3577, "step": 296340 }, { "epoch": 85.25604142692751, "grad_norm": 0.7842566967010498, "learning_rate": 0.0002948791714614499, "loss": 0.2868, "step": 296350 }, { "epoch": 85.25891829689299, "grad_norm": 1.9437025785446167, "learning_rate": 0.0002948216340621404, "loss": 0.3332, "step": 296360 }, { "epoch": 85.26179516685846, "grad_norm": 1.3786664009094238, "learning_rate": 0.00029476409666283083, "loss": 0.336, "step": 296370 }, { "epoch": 85.26467203682394, "grad_norm": 1.4006725549697876, "learning_rate": 0.0002947065592635213, "loss": 0.3226, "step": 296380 }, { "epoch": 85.26754890678941, "grad_norm": 0.9399663805961609, "learning_rate": 0.00029464902186421174, "loss": 0.3295, "step": 296390 }, { "epoch": 85.27042577675489, "grad_norm": 1.3988580703735352, "learning_rate": 0.0002945914844649022, "loss": 0.3888, "step": 296400 }, { "epoch": 85.27330264672037, "grad_norm": 1.8672066926956177, "learning_rate": 0.00029453394706559265, "loss": 0.2828, "step": 296410 }, { "epoch": 85.27617951668584, "grad_norm": 0.7296004295349121, "learning_rate": 0.0002944764096662831, "loss": 0.3368, "step": 296420 }, { "epoch": 85.27905638665132, "grad_norm": 1.2434488534927368, "learning_rate": 0.00029441887226697356, "loss": 0.2836, "step": 296430 }, { "epoch": 85.2819332566168, "grad_norm": 1.5591031312942505, "learning_rate": 0.00029436133486766396, "loss": 0.3174, "step": 296440 }, { "epoch": 85.28481012658227, "grad_norm": 1.1440600156784058, "learning_rate": 0.00029430379746835447, "loss": 0.2574, "step": 296450 }, { "epoch": 85.28768699654776, "grad_norm": 0.8647182583808899, "learning_rate": 0.0002942462600690449, "loss": 0.3336, "step": 296460 }, { "epoch": 85.29056386651324, "grad_norm": 1.3491297960281372, "learning_rate": 0.00029418872266973533, "loss": 0.2929, "step": 296470 }, { "epoch": 85.29344073647871, "grad_norm": 0.8691011667251587, "learning_rate": 0.0002941311852704258, "loss": 0.3526, "step": 296480 }, { "epoch": 85.29631760644419, "grad_norm": 0.6093640327453613, "learning_rate": 0.00029407364787111624, "loss": 0.2818, "step": 296490 }, { "epoch": 85.29919447640967, "grad_norm": 1.116436243057251, "learning_rate": 0.0002940161104718067, "loss": 0.3192, "step": 296500 }, { "epoch": 85.30207134637514, "grad_norm": 1.352234125137329, "learning_rate": 0.00029395857307249715, "loss": 0.2828, "step": 296510 }, { "epoch": 85.30494821634062, "grad_norm": 1.5118999481201172, "learning_rate": 0.0002939010356731876, "loss": 0.3131, "step": 296520 }, { "epoch": 85.3078250863061, "grad_norm": 0.7777518033981323, "learning_rate": 0.000293843498273878, "loss": 0.2692, "step": 296530 }, { "epoch": 85.31070195627157, "grad_norm": 1.2333946228027344, "learning_rate": 0.0002937859608745685, "loss": 0.3481, "step": 296540 }, { "epoch": 85.31357882623705, "grad_norm": 1.7453423738479614, "learning_rate": 0.0002937284234752589, "loss": 0.3646, "step": 296550 }, { "epoch": 85.31645569620254, "grad_norm": 0.9962944984436035, "learning_rate": 0.0002936708860759494, "loss": 0.3314, "step": 296560 }, { "epoch": 85.31933256616801, "grad_norm": 1.5551483631134033, "learning_rate": 0.0002936133486766398, "loss": 0.2643, "step": 296570 }, { "epoch": 85.32220943613349, "grad_norm": 1.061948537826538, "learning_rate": 0.0002935558112773303, "loss": 0.3023, "step": 296580 }, { "epoch": 85.32508630609897, "grad_norm": 1.7655211687088013, "learning_rate": 0.0002934982738780207, "loss": 0.3641, "step": 296590 }, { "epoch": 85.32796317606444, "grad_norm": 1.585567831993103, "learning_rate": 0.00029344073647871114, "loss": 0.3398, "step": 296600 }, { "epoch": 85.33084004602992, "grad_norm": 1.2430777549743652, "learning_rate": 0.00029338319907940165, "loss": 0.3567, "step": 296610 }, { "epoch": 85.3337169159954, "grad_norm": 0.7156617641448975, "learning_rate": 0.00029332566168009205, "loss": 0.2711, "step": 296620 }, { "epoch": 85.33659378596087, "grad_norm": 0.8195418119430542, "learning_rate": 0.0002932681242807825, "loss": 0.3142, "step": 296630 }, { "epoch": 85.33947065592635, "grad_norm": 0.6473122239112854, "learning_rate": 0.00029321058688147296, "loss": 0.2986, "step": 296640 }, { "epoch": 85.34234752589182, "grad_norm": 2.346785545349121, "learning_rate": 0.0002931530494821634, "loss": 0.3426, "step": 296650 }, { "epoch": 85.3452243958573, "grad_norm": 1.133015751838684, "learning_rate": 0.0002930955120828538, "loss": 0.2475, "step": 296660 }, { "epoch": 85.34810126582279, "grad_norm": 1.13925302028656, "learning_rate": 0.00029303797468354433, "loss": 0.2813, "step": 296670 }, { "epoch": 85.35097813578827, "grad_norm": 0.9735186100006104, "learning_rate": 0.00029298043728423473, "loss": 0.3128, "step": 296680 }, { "epoch": 85.35385500575374, "grad_norm": 0.7006235122680664, "learning_rate": 0.0002929228998849252, "loss": 0.2993, "step": 296690 }, { "epoch": 85.35673187571922, "grad_norm": 0.6664098501205444, "learning_rate": 0.0002928653624856157, "loss": 0.3852, "step": 296700 }, { "epoch": 85.3596087456847, "grad_norm": 1.270458459854126, "learning_rate": 0.0002928078250863061, "loss": 0.2688, "step": 296710 }, { "epoch": 85.36248561565017, "grad_norm": 0.7591350674629211, "learning_rate": 0.00029275028768699655, "loss": 0.2493, "step": 296720 }, { "epoch": 85.36536248561565, "grad_norm": 1.0683704614639282, "learning_rate": 0.000292692750287687, "loss": 0.3449, "step": 296730 }, { "epoch": 85.36823935558112, "grad_norm": 0.855644166469574, "learning_rate": 0.00029263521288837746, "loss": 0.2906, "step": 296740 }, { "epoch": 85.3711162255466, "grad_norm": 0.820343017578125, "learning_rate": 0.00029257767548906787, "loss": 0.3356, "step": 296750 }, { "epoch": 85.37399309551208, "grad_norm": 1.9012491703033447, "learning_rate": 0.0002925201380897584, "loss": 0.3192, "step": 296760 }, { "epoch": 85.37686996547757, "grad_norm": 1.4352489709854126, "learning_rate": 0.0002924626006904488, "loss": 0.3102, "step": 296770 }, { "epoch": 85.37974683544304, "grad_norm": 2.2402186393737793, "learning_rate": 0.00029240506329113923, "loss": 0.2879, "step": 296780 }, { "epoch": 85.38262370540852, "grad_norm": 1.3088512420654297, "learning_rate": 0.0002923475258918297, "loss": 0.3116, "step": 296790 }, { "epoch": 85.385500575374, "grad_norm": 0.8777213096618652, "learning_rate": 0.00029228998849252014, "loss": 0.3297, "step": 296800 }, { "epoch": 85.38837744533947, "grad_norm": 0.8764235973358154, "learning_rate": 0.0002922324510932106, "loss": 0.286, "step": 296810 }, { "epoch": 85.39125431530495, "grad_norm": 1.1164501905441284, "learning_rate": 0.00029217491369390105, "loss": 0.31, "step": 296820 }, { "epoch": 85.39413118527042, "grad_norm": 2.3632569313049316, "learning_rate": 0.0002921173762945915, "loss": 0.333, "step": 296830 }, { "epoch": 85.3970080552359, "grad_norm": 1.2849911451339722, "learning_rate": 0.0002920598388952819, "loss": 0.3207, "step": 296840 }, { "epoch": 85.39988492520138, "grad_norm": 1.1283625364303589, "learning_rate": 0.0002920023014959724, "loss": 0.3473, "step": 296850 }, { "epoch": 85.40276179516685, "grad_norm": 0.9854861497879028, "learning_rate": 0.0002919447640966628, "loss": 0.2706, "step": 296860 }, { "epoch": 85.40563866513233, "grad_norm": 1.175900936126709, "learning_rate": 0.0002918872266973533, "loss": 0.3339, "step": 296870 }, { "epoch": 85.40851553509782, "grad_norm": 1.1769952774047852, "learning_rate": 0.00029182968929804373, "loss": 0.3118, "step": 296880 }, { "epoch": 85.4113924050633, "grad_norm": 0.5723040699958801, "learning_rate": 0.0002917721518987342, "loss": 0.2824, "step": 296890 }, { "epoch": 85.41426927502877, "grad_norm": 1.5233008861541748, "learning_rate": 0.00029171461449942464, "loss": 0.4138, "step": 296900 }, { "epoch": 85.41714614499425, "grad_norm": 0.7760969400405884, "learning_rate": 0.0002916570771001151, "loss": 0.3721, "step": 296910 }, { "epoch": 85.42002301495972, "grad_norm": 1.1194385290145874, "learning_rate": 0.00029159953970080556, "loss": 0.3175, "step": 296920 }, { "epoch": 85.4228998849252, "grad_norm": 1.1075787544250488, "learning_rate": 0.00029154200230149596, "loss": 0.3215, "step": 296930 }, { "epoch": 85.42577675489068, "grad_norm": 2.421210527420044, "learning_rate": 0.00029148446490218647, "loss": 0.35, "step": 296940 }, { "epoch": 85.42865362485615, "grad_norm": 0.9333347082138062, "learning_rate": 0.00029142692750287687, "loss": 0.3297, "step": 296950 }, { "epoch": 85.43153049482163, "grad_norm": 2.281252861022949, "learning_rate": 0.0002913693901035673, "loss": 0.3612, "step": 296960 }, { "epoch": 85.4344073647871, "grad_norm": 1.3299962282180786, "learning_rate": 0.0002913118527042577, "loss": 0.3729, "step": 296970 }, { "epoch": 85.4372842347526, "grad_norm": 1.1456801891326904, "learning_rate": 0.00029125431530494823, "loss": 0.3232, "step": 296980 }, { "epoch": 85.44016110471807, "grad_norm": 1.1192291975021362, "learning_rate": 0.00029119677790563864, "loss": 0.3012, "step": 296990 }, { "epoch": 85.44303797468355, "grad_norm": 1.4491885900497437, "learning_rate": 0.0002911392405063291, "loss": 0.3093, "step": 297000 }, { "epoch": 85.44591484464902, "grad_norm": 1.0081650018692017, "learning_rate": 0.0002910817031070196, "loss": 0.2831, "step": 297010 }, { "epoch": 85.4487917146145, "grad_norm": 0.7647101879119873, "learning_rate": 0.00029102416570771, "loss": 0.2692, "step": 297020 }, { "epoch": 85.45166858457998, "grad_norm": 1.8344404697418213, "learning_rate": 0.00029096662830840046, "loss": 0.3557, "step": 297030 }, { "epoch": 85.45454545454545, "grad_norm": 2.0197105407714844, "learning_rate": 0.0002909090909090909, "loss": 0.3589, "step": 297040 }, { "epoch": 85.45742232451093, "grad_norm": 1.0056325197219849, "learning_rate": 0.00029085155350978137, "loss": 0.273, "step": 297050 }, { "epoch": 85.4602991944764, "grad_norm": 0.8859285116195679, "learning_rate": 0.00029079401611047177, "loss": 0.4224, "step": 297060 }, { "epoch": 85.46317606444188, "grad_norm": 1.5864180326461792, "learning_rate": 0.0002907364787111623, "loss": 0.362, "step": 297070 }, { "epoch": 85.46605293440736, "grad_norm": 1.2180677652359009, "learning_rate": 0.0002906789413118527, "loss": 0.3576, "step": 297080 }, { "epoch": 85.46892980437285, "grad_norm": 1.1149985790252686, "learning_rate": 0.00029062140391254314, "loss": 0.2973, "step": 297090 }, { "epoch": 85.47180667433832, "grad_norm": 0.4126628041267395, "learning_rate": 0.00029056386651323365, "loss": 0.2903, "step": 297100 }, { "epoch": 85.4746835443038, "grad_norm": 1.184095859527588, "learning_rate": 0.00029050632911392405, "loss": 0.3528, "step": 297110 }, { "epoch": 85.47756041426928, "grad_norm": 0.8289141654968262, "learning_rate": 0.0002904487917146145, "loss": 0.3093, "step": 297120 }, { "epoch": 85.48043728423475, "grad_norm": 1.5539250373840332, "learning_rate": 0.00029039125431530496, "loss": 0.3239, "step": 297130 }, { "epoch": 85.48331415420023, "grad_norm": 1.618098258972168, "learning_rate": 0.0002903337169159954, "loss": 0.3285, "step": 297140 }, { "epoch": 85.4861910241657, "grad_norm": 1.0313973426818848, "learning_rate": 0.0002902761795166858, "loss": 0.2917, "step": 297150 }, { "epoch": 85.48906789413118, "grad_norm": 1.2211863994598389, "learning_rate": 0.0002902186421173763, "loss": 0.4746, "step": 297160 }, { "epoch": 85.49194476409666, "grad_norm": 1.4954311847686768, "learning_rate": 0.0002901611047180667, "loss": 0.2877, "step": 297170 }, { "epoch": 85.49482163406213, "grad_norm": 1.049436092376709, "learning_rate": 0.0002901035673187572, "loss": 0.2777, "step": 297180 }, { "epoch": 85.49769850402762, "grad_norm": 1.2027658224105835, "learning_rate": 0.0002900460299194477, "loss": 0.2854, "step": 297190 }, { "epoch": 85.5005753739931, "grad_norm": 1.183006763458252, "learning_rate": 0.0002899884925201381, "loss": 0.3437, "step": 297200 }, { "epoch": 85.50345224395858, "grad_norm": 0.7862216830253601, "learning_rate": 0.00028993095512082855, "loss": 0.3124, "step": 297210 }, { "epoch": 85.50632911392405, "grad_norm": 1.384250283241272, "learning_rate": 0.000289873417721519, "loss": 0.4013, "step": 297220 }, { "epoch": 85.50920598388953, "grad_norm": 0.9768296480178833, "learning_rate": 0.00028981588032220946, "loss": 0.2797, "step": 297230 }, { "epoch": 85.512082853855, "grad_norm": 1.9485348463058472, "learning_rate": 0.00028975834292289986, "loss": 0.3027, "step": 297240 }, { "epoch": 85.51495972382048, "grad_norm": 1.5836106538772583, "learning_rate": 0.00028970080552359037, "loss": 0.3285, "step": 297250 }, { "epoch": 85.51783659378596, "grad_norm": 1.0272318124771118, "learning_rate": 0.00028964326812428077, "loss": 0.2867, "step": 297260 }, { "epoch": 85.52071346375143, "grad_norm": 1.0831480026245117, "learning_rate": 0.00028958573072497123, "loss": 0.3087, "step": 297270 }, { "epoch": 85.52359033371691, "grad_norm": 1.1052159070968628, "learning_rate": 0.0002895281933256617, "loss": 0.2866, "step": 297280 }, { "epoch": 85.52646720368239, "grad_norm": 1.468093752861023, "learning_rate": 0.00028947065592635214, "loss": 0.3, "step": 297290 }, { "epoch": 85.52934407364788, "grad_norm": 1.16781747341156, "learning_rate": 0.0002894131185270426, "loss": 0.2926, "step": 297300 }, { "epoch": 85.53222094361335, "grad_norm": 1.0905487537384033, "learning_rate": 0.00028935558112773305, "loss": 0.2762, "step": 297310 }, { "epoch": 85.53509781357883, "grad_norm": 1.8656680583953857, "learning_rate": 0.0002892980437284235, "loss": 0.3155, "step": 297320 }, { "epoch": 85.5379746835443, "grad_norm": 1.8969833850860596, "learning_rate": 0.0002892405063291139, "loss": 0.4098, "step": 297330 }, { "epoch": 85.54085155350978, "grad_norm": 0.8834578990936279, "learning_rate": 0.0002891829689298044, "loss": 0.2921, "step": 297340 }, { "epoch": 85.54372842347526, "grad_norm": 1.0981152057647705, "learning_rate": 0.0002891254315304948, "loss": 0.3313, "step": 297350 }, { "epoch": 85.54660529344073, "grad_norm": 1.3101307153701782, "learning_rate": 0.00028906789413118527, "loss": 0.3058, "step": 297360 }, { "epoch": 85.54948216340621, "grad_norm": 1.0368291139602661, "learning_rate": 0.00028901035673187573, "loss": 0.3374, "step": 297370 }, { "epoch": 85.55235903337169, "grad_norm": 1.0694912672042847, "learning_rate": 0.0002889528193325662, "loss": 0.2859, "step": 297380 }, { "epoch": 85.55523590333716, "grad_norm": 0.8127052187919617, "learning_rate": 0.00028889528193325664, "loss": 0.313, "step": 297390 }, { "epoch": 85.55811277330265, "grad_norm": 1.3069992065429688, "learning_rate": 0.00028883774453394704, "loss": 0.2831, "step": 297400 }, { "epoch": 85.56098964326813, "grad_norm": 1.433254599571228, "learning_rate": 0.00028878020713463755, "loss": 0.4058, "step": 297410 }, { "epoch": 85.5638665132336, "grad_norm": 0.7462999224662781, "learning_rate": 0.00028872266973532795, "loss": 0.3691, "step": 297420 }, { "epoch": 85.56674338319908, "grad_norm": 0.8220103979110718, "learning_rate": 0.0002886651323360184, "loss": 0.2892, "step": 297430 }, { "epoch": 85.56962025316456, "grad_norm": 1.6092076301574707, "learning_rate": 0.00028860759493670886, "loss": 0.3909, "step": 297440 }, { "epoch": 85.57249712313003, "grad_norm": 1.1076874732971191, "learning_rate": 0.0002885500575373993, "loss": 0.3406, "step": 297450 }, { "epoch": 85.57537399309551, "grad_norm": 1.0183053016662598, "learning_rate": 0.0002884925201380897, "loss": 0.2717, "step": 297460 }, { "epoch": 85.57825086306099, "grad_norm": 1.781050682067871, "learning_rate": 0.00028843498273878023, "loss": 0.3232, "step": 297470 }, { "epoch": 85.58112773302646, "grad_norm": 0.9327560663223267, "learning_rate": 0.00028837744533947063, "loss": 0.2288, "step": 297480 }, { "epoch": 85.58400460299194, "grad_norm": 1.392438292503357, "learning_rate": 0.0002883199079401611, "loss": 0.2833, "step": 297490 }, { "epoch": 85.58688147295742, "grad_norm": 1.6467852592468262, "learning_rate": 0.0002882623705408516, "loss": 0.3555, "step": 297500 }, { "epoch": 85.5897583429229, "grad_norm": 2.1477882862091064, "learning_rate": 0.000288204833141542, "loss": 0.285, "step": 297510 }, { "epoch": 85.59263521288838, "grad_norm": 0.9491652846336365, "learning_rate": 0.00028814729574223245, "loss": 0.3232, "step": 297520 }, { "epoch": 85.59551208285386, "grad_norm": 1.6622874736785889, "learning_rate": 0.0002880897583429229, "loss": 0.3434, "step": 297530 }, { "epoch": 85.59838895281933, "grad_norm": 0.8635867834091187, "learning_rate": 0.00028803222094361336, "loss": 0.3164, "step": 297540 }, { "epoch": 85.60126582278481, "grad_norm": 1.1104514598846436, "learning_rate": 0.00028797468354430376, "loss": 0.2575, "step": 297550 }, { "epoch": 85.60414269275029, "grad_norm": 0.8023871779441833, "learning_rate": 0.0002879171461449943, "loss": 0.2619, "step": 297560 }, { "epoch": 85.60701956271576, "grad_norm": 1.0439223051071167, "learning_rate": 0.0002878596087456847, "loss": 0.3419, "step": 297570 }, { "epoch": 85.60989643268124, "grad_norm": 0.7979457378387451, "learning_rate": 0.00028780207134637513, "loss": 0.3484, "step": 297580 }, { "epoch": 85.61277330264672, "grad_norm": 1.8970365524291992, "learning_rate": 0.00028774453394706564, "loss": 0.3571, "step": 297590 }, { "epoch": 85.61565017261219, "grad_norm": 1.325563669204712, "learning_rate": 0.00028768699654775604, "loss": 0.3525, "step": 297600 }, { "epoch": 85.61852704257768, "grad_norm": 1.3688427209854126, "learning_rate": 0.0002876294591484465, "loss": 0.3176, "step": 297610 }, { "epoch": 85.62140391254316, "grad_norm": 0.68543541431427, "learning_rate": 0.00028757192174913695, "loss": 0.3809, "step": 297620 }, { "epoch": 85.62428078250863, "grad_norm": 1.5277656316757202, "learning_rate": 0.0002875143843498274, "loss": 0.3292, "step": 297630 }, { "epoch": 85.62715765247411, "grad_norm": 1.2475627660751343, "learning_rate": 0.0002874568469505178, "loss": 0.2921, "step": 297640 }, { "epoch": 85.63003452243959, "grad_norm": 1.4307992458343506, "learning_rate": 0.0002873993095512083, "loss": 0.2875, "step": 297650 }, { "epoch": 85.63291139240506, "grad_norm": 0.845664918422699, "learning_rate": 0.0002873417721518987, "loss": 0.2755, "step": 297660 }, { "epoch": 85.63578826237054, "grad_norm": 2.5462582111358643, "learning_rate": 0.0002872842347525892, "loss": 0.3319, "step": 297670 }, { "epoch": 85.63866513233602, "grad_norm": 1.550583839416504, "learning_rate": 0.0002872266973532797, "loss": 0.3184, "step": 297680 }, { "epoch": 85.64154200230149, "grad_norm": 1.643986463546753, "learning_rate": 0.0002871691599539701, "loss": 0.3799, "step": 297690 }, { "epoch": 85.64441887226697, "grad_norm": 1.4368531703948975, "learning_rate": 0.00028711162255466054, "loss": 0.3349, "step": 297700 }, { "epoch": 85.64729574223244, "grad_norm": 2.119441509246826, "learning_rate": 0.000287054085155351, "loss": 0.3402, "step": 297710 }, { "epoch": 85.65017261219793, "grad_norm": 2.9408674240112305, "learning_rate": 0.00028699654775604145, "loss": 0.4822, "step": 297720 }, { "epoch": 85.65304948216341, "grad_norm": 0.5635002255439758, "learning_rate": 0.00028693901035673185, "loss": 0.2862, "step": 297730 }, { "epoch": 85.65592635212889, "grad_norm": 0.9125904440879822, "learning_rate": 0.00028688147295742236, "loss": 0.3485, "step": 297740 }, { "epoch": 85.65880322209436, "grad_norm": 0.9394158124923706, "learning_rate": 0.00028682393555811277, "loss": 0.3492, "step": 297750 }, { "epoch": 85.66168009205984, "grad_norm": 1.3618184328079224, "learning_rate": 0.0002867663981588032, "loss": 0.387, "step": 297760 }, { "epoch": 85.66455696202532, "grad_norm": 1.157480001449585, "learning_rate": 0.0002867088607594937, "loss": 0.32, "step": 297770 }, { "epoch": 85.66743383199079, "grad_norm": 1.6182054281234741, "learning_rate": 0.00028665132336018413, "loss": 0.3845, "step": 297780 }, { "epoch": 85.67031070195627, "grad_norm": 1.5030807256698608, "learning_rate": 0.0002865937859608746, "loss": 0.2481, "step": 297790 }, { "epoch": 85.67318757192174, "grad_norm": 0.9911162257194519, "learning_rate": 0.000286536248561565, "loss": 0.3333, "step": 297800 }, { "epoch": 85.67606444188722, "grad_norm": 2.0583248138427734, "learning_rate": 0.0002864787111622555, "loss": 0.3656, "step": 297810 }, { "epoch": 85.67894131185271, "grad_norm": 1.266897439956665, "learning_rate": 0.0002864211737629459, "loss": 0.3323, "step": 297820 }, { "epoch": 85.68181818181819, "grad_norm": 1.2449736595153809, "learning_rate": 0.00028636363636363636, "loss": 0.3083, "step": 297830 }, { "epoch": 85.68469505178366, "grad_norm": 1.8992515802383423, "learning_rate": 0.0002863060989643268, "loss": 0.3337, "step": 297840 }, { "epoch": 85.68757192174914, "grad_norm": 0.8076899647712708, "learning_rate": 0.00028624856156501727, "loss": 0.2894, "step": 297850 }, { "epoch": 85.69044879171462, "grad_norm": 0.9293710589408875, "learning_rate": 0.00028619102416570767, "loss": 0.3008, "step": 297860 }, { "epoch": 85.69332566168009, "grad_norm": 2.846576452255249, "learning_rate": 0.0002861334867663982, "loss": 0.3826, "step": 297870 }, { "epoch": 85.69620253164557, "grad_norm": 1.845807433128357, "learning_rate": 0.00028607594936708863, "loss": 0.3856, "step": 297880 }, { "epoch": 85.69907940161104, "grad_norm": 1.0553611516952515, "learning_rate": 0.00028601841196777903, "loss": 0.2969, "step": 297890 }, { "epoch": 85.70195627157652, "grad_norm": 0.9049162864685059, "learning_rate": 0.00028596087456846954, "loss": 0.2786, "step": 297900 }, { "epoch": 85.704833141542, "grad_norm": 1.3277339935302734, "learning_rate": 0.00028590333716915995, "loss": 0.3339, "step": 297910 }, { "epoch": 85.70771001150747, "grad_norm": 1.1360268592834473, "learning_rate": 0.0002858457997698504, "loss": 0.3476, "step": 297920 }, { "epoch": 85.71058688147296, "grad_norm": 1.1683388948440552, "learning_rate": 0.00028578826237054086, "loss": 0.3136, "step": 297930 }, { "epoch": 85.71346375143844, "grad_norm": 1.279665470123291, "learning_rate": 0.0002857307249712313, "loss": 0.3673, "step": 297940 }, { "epoch": 85.71634062140392, "grad_norm": 1.0974446535110474, "learning_rate": 0.0002856731875719217, "loss": 0.3274, "step": 297950 }, { "epoch": 85.71921749136939, "grad_norm": 1.1148453950881958, "learning_rate": 0.0002856156501726122, "loss": 0.3322, "step": 297960 }, { "epoch": 85.72209436133487, "grad_norm": 1.1459321975708008, "learning_rate": 0.0002855581127733026, "loss": 0.3162, "step": 297970 }, { "epoch": 85.72497123130034, "grad_norm": 1.5676333904266357, "learning_rate": 0.0002855005753739931, "loss": 0.3283, "step": 297980 }, { "epoch": 85.72784810126582, "grad_norm": 0.8737313151359558, "learning_rate": 0.0002854430379746836, "loss": 0.3269, "step": 297990 }, { "epoch": 85.7307249712313, "grad_norm": 0.9587047696113586, "learning_rate": 0.000285385500575374, "loss": 0.3033, "step": 298000 }, { "epoch": 85.73360184119677, "grad_norm": 1.1512315273284912, "learning_rate": 0.00028532796317606445, "loss": 0.2985, "step": 298010 }, { "epoch": 85.73647871116225, "grad_norm": 1.379241704940796, "learning_rate": 0.0002852704257767549, "loss": 0.3482, "step": 298020 }, { "epoch": 85.73935558112774, "grad_norm": 0.7823775410652161, "learning_rate": 0.00028521288837744536, "loss": 0.364, "step": 298030 }, { "epoch": 85.74223245109322, "grad_norm": 1.206270456314087, "learning_rate": 0.00028515535097813576, "loss": 0.3262, "step": 298040 }, { "epoch": 85.74510932105869, "grad_norm": 0.9951960444450378, "learning_rate": 0.00028509781357882627, "loss": 0.2945, "step": 298050 }, { "epoch": 85.74798619102417, "grad_norm": 1.12458336353302, "learning_rate": 0.00028504027617951667, "loss": 0.3111, "step": 298060 }, { "epoch": 85.75086306098964, "grad_norm": 0.9425390362739563, "learning_rate": 0.0002849827387802071, "loss": 0.286, "step": 298070 }, { "epoch": 85.75373993095512, "grad_norm": 0.8515344262123108, "learning_rate": 0.00028492520138089763, "loss": 0.292, "step": 298080 }, { "epoch": 85.7566168009206, "grad_norm": 0.8343616724014282, "learning_rate": 0.00028486766398158804, "loss": 0.3198, "step": 298090 }, { "epoch": 85.75949367088607, "grad_norm": 1.0123177766799927, "learning_rate": 0.0002848101265822785, "loss": 0.3226, "step": 298100 }, { "epoch": 85.76237054085155, "grad_norm": 1.5974249839782715, "learning_rate": 0.00028475258918296895, "loss": 0.3533, "step": 298110 }, { "epoch": 85.76524741081703, "grad_norm": 1.0263763666152954, "learning_rate": 0.0002846950517836594, "loss": 0.3727, "step": 298120 }, { "epoch": 85.7681242807825, "grad_norm": 1.3829741477966309, "learning_rate": 0.0002846375143843498, "loss": 0.3066, "step": 298130 }, { "epoch": 85.77100115074799, "grad_norm": 1.0764203071594238, "learning_rate": 0.0002845799769850403, "loss": 0.3288, "step": 298140 }, { "epoch": 85.77387802071347, "grad_norm": 0.5928220152854919, "learning_rate": 0.0002845224395857307, "loss": 0.3081, "step": 298150 }, { "epoch": 85.77675489067894, "grad_norm": 2.083097457885742, "learning_rate": 0.00028446490218642117, "loss": 0.2811, "step": 298160 }, { "epoch": 85.77963176064442, "grad_norm": 1.1191282272338867, "learning_rate": 0.0002844073647871117, "loss": 0.3445, "step": 298170 }, { "epoch": 85.7825086306099, "grad_norm": 1.01192307472229, "learning_rate": 0.0002843498273878021, "loss": 0.3227, "step": 298180 }, { "epoch": 85.78538550057537, "grad_norm": 0.7472450733184814, "learning_rate": 0.00028429228998849254, "loss": 0.3117, "step": 298190 }, { "epoch": 85.78826237054085, "grad_norm": 1.28880774974823, "learning_rate": 0.000284234752589183, "loss": 0.341, "step": 298200 }, { "epoch": 85.79113924050633, "grad_norm": 1.4975560903549194, "learning_rate": 0.00028417721518987345, "loss": 0.3263, "step": 298210 }, { "epoch": 85.7940161104718, "grad_norm": 1.8037749528884888, "learning_rate": 0.00028411967779056385, "loss": 0.2787, "step": 298220 }, { "epoch": 85.79689298043728, "grad_norm": 1.6407133340835571, "learning_rate": 0.0002840621403912543, "loss": 0.3264, "step": 298230 }, { "epoch": 85.79976985040277, "grad_norm": 3.3165602684020996, "learning_rate": 0.00028400460299194476, "loss": 0.3392, "step": 298240 }, { "epoch": 85.80264672036824, "grad_norm": 1.5663045644760132, "learning_rate": 0.0002839470655926352, "loss": 0.3216, "step": 298250 }, { "epoch": 85.80552359033372, "grad_norm": 1.1354613304138184, "learning_rate": 0.0002838895281933256, "loss": 0.3281, "step": 298260 }, { "epoch": 85.8084004602992, "grad_norm": 1.369847297668457, "learning_rate": 0.0002838319907940161, "loss": 0.2983, "step": 298270 }, { "epoch": 85.81127733026467, "grad_norm": 2.3938827514648438, "learning_rate": 0.0002837744533947066, "loss": 0.3174, "step": 298280 }, { "epoch": 85.81415420023015, "grad_norm": 0.994534969329834, "learning_rate": 0.000283716915995397, "loss": 0.2907, "step": 298290 }, { "epoch": 85.81703107019563, "grad_norm": 1.0159780979156494, "learning_rate": 0.0002836593785960875, "loss": 0.2908, "step": 298300 }, { "epoch": 85.8199079401611, "grad_norm": 0.9747731685638428, "learning_rate": 0.0002836018411967779, "loss": 0.2473, "step": 298310 }, { "epoch": 85.82278481012658, "grad_norm": 1.4601467847824097, "learning_rate": 0.00028354430379746835, "loss": 0.3618, "step": 298320 }, { "epoch": 85.82566168009205, "grad_norm": 0.7788026928901672, "learning_rate": 0.0002834867663981588, "loss": 0.317, "step": 298330 }, { "epoch": 85.82853855005754, "grad_norm": 1.5179625749588013, "learning_rate": 0.00028342922899884926, "loss": 0.3204, "step": 298340 }, { "epoch": 85.83141542002302, "grad_norm": 1.8487575054168701, "learning_rate": 0.00028337169159953966, "loss": 0.3652, "step": 298350 }, { "epoch": 85.8342922899885, "grad_norm": 1.5036625862121582, "learning_rate": 0.00028331415420023017, "loss": 0.3339, "step": 298360 }, { "epoch": 85.83716915995397, "grad_norm": 1.1332180500030518, "learning_rate": 0.00028325661680092063, "loss": 0.3876, "step": 298370 }, { "epoch": 85.84004602991945, "grad_norm": 1.6659516096115112, "learning_rate": 0.00028319907940161103, "loss": 0.3593, "step": 298380 }, { "epoch": 85.84292289988493, "grad_norm": 1.203541874885559, "learning_rate": 0.00028314154200230154, "loss": 0.3155, "step": 298390 }, { "epoch": 85.8457997698504, "grad_norm": 1.1506037712097168, "learning_rate": 0.00028308400460299194, "loss": 0.3473, "step": 298400 }, { "epoch": 85.84867663981588, "grad_norm": 0.9391691088676453, "learning_rate": 0.0002830264672036824, "loss": 0.2978, "step": 298410 }, { "epoch": 85.85155350978135, "grad_norm": 1.4305667877197266, "learning_rate": 0.00028296892980437285, "loss": 0.3974, "step": 298420 }, { "epoch": 85.85443037974683, "grad_norm": 1.0790425539016724, "learning_rate": 0.0002829113924050633, "loss": 0.3826, "step": 298430 }, { "epoch": 85.8573072497123, "grad_norm": 1.1665947437286377, "learning_rate": 0.0002828538550057537, "loss": 0.2562, "step": 298440 }, { "epoch": 85.8601841196778, "grad_norm": 1.5754919052124023, "learning_rate": 0.0002827963176064442, "loss": 0.3573, "step": 298450 }, { "epoch": 85.86306098964327, "grad_norm": 1.302956223487854, "learning_rate": 0.0002827387802071346, "loss": 0.3407, "step": 298460 }, { "epoch": 85.86593785960875, "grad_norm": 1.5224357843399048, "learning_rate": 0.0002826812428078251, "loss": 0.4024, "step": 298470 }, { "epoch": 85.86881472957423, "grad_norm": 1.1245834827423096, "learning_rate": 0.0002826237054085156, "loss": 0.3079, "step": 298480 }, { "epoch": 85.8716915995397, "grad_norm": 1.028395414352417, "learning_rate": 0.000282566168009206, "loss": 0.3597, "step": 298490 }, { "epoch": 85.87456846950518, "grad_norm": 0.9346681237220764, "learning_rate": 0.00028250863060989644, "loss": 0.348, "step": 298500 }, { "epoch": 85.87744533947065, "grad_norm": 1.0735944509506226, "learning_rate": 0.0002824510932105869, "loss": 0.313, "step": 298510 }, { "epoch": 85.88032220943613, "grad_norm": 1.0975008010864258, "learning_rate": 0.00028239355581127735, "loss": 0.2965, "step": 298520 }, { "epoch": 85.8831990794016, "grad_norm": 1.3552370071411133, "learning_rate": 0.00028233601841196775, "loss": 0.3628, "step": 298530 }, { "epoch": 85.88607594936708, "grad_norm": 0.9755902290344238, "learning_rate": 0.00028227848101265826, "loss": 0.428, "step": 298540 }, { "epoch": 85.88895281933257, "grad_norm": 1.3866138458251953, "learning_rate": 0.00028222094361334866, "loss": 0.3638, "step": 298550 }, { "epoch": 85.89182968929805, "grad_norm": 1.0931146144866943, "learning_rate": 0.0002821634062140391, "loss": 0.2992, "step": 298560 }, { "epoch": 85.89470655926353, "grad_norm": 2.044442653656006, "learning_rate": 0.00028210586881472963, "loss": 0.3977, "step": 298570 }, { "epoch": 85.897583429229, "grad_norm": 1.9658193588256836, "learning_rate": 0.00028204833141542003, "loss": 0.2969, "step": 298580 }, { "epoch": 85.90046029919448, "grad_norm": 1.043853998184204, "learning_rate": 0.0002819907940161105, "loss": 0.2495, "step": 298590 }, { "epoch": 85.90333716915995, "grad_norm": 0.8606551885604858, "learning_rate": 0.00028193325661680094, "loss": 0.2816, "step": 298600 }, { "epoch": 85.90621403912543, "grad_norm": 1.0248463153839111, "learning_rate": 0.0002818757192174914, "loss": 0.303, "step": 298610 }, { "epoch": 85.9090909090909, "grad_norm": 1.2344253063201904, "learning_rate": 0.0002818181818181818, "loss": 0.299, "step": 298620 }, { "epoch": 85.91196777905638, "grad_norm": 2.0561249256134033, "learning_rate": 0.00028176064441887225, "loss": 0.308, "step": 298630 }, { "epoch": 85.91484464902186, "grad_norm": 0.8080663681030273, "learning_rate": 0.0002817031070195627, "loss": 0.3409, "step": 298640 }, { "epoch": 85.91772151898734, "grad_norm": 1.2054967880249023, "learning_rate": 0.00028164556962025316, "loss": 0.2725, "step": 298650 }, { "epoch": 85.92059838895283, "grad_norm": 2.0864365100860596, "learning_rate": 0.0002815880322209436, "loss": 0.3171, "step": 298660 }, { "epoch": 85.9234752589183, "grad_norm": 1.0565108060836792, "learning_rate": 0.0002815304948216341, "loss": 0.3416, "step": 298670 }, { "epoch": 85.92635212888378, "grad_norm": 0.8486594557762146, "learning_rate": 0.00028147295742232453, "loss": 0.2931, "step": 298680 }, { "epoch": 85.92922899884925, "grad_norm": 2.1952037811279297, "learning_rate": 0.00028141542002301493, "loss": 0.3223, "step": 298690 }, { "epoch": 85.93210586881473, "grad_norm": 1.379135012626648, "learning_rate": 0.00028135788262370544, "loss": 0.3247, "step": 298700 }, { "epoch": 85.9349827387802, "grad_norm": 0.746670663356781, "learning_rate": 0.00028130034522439584, "loss": 0.3581, "step": 298710 }, { "epoch": 85.93785960874568, "grad_norm": 1.5597827434539795, "learning_rate": 0.0002812428078250863, "loss": 0.2922, "step": 298720 }, { "epoch": 85.94073647871116, "grad_norm": 0.7312424778938293, "learning_rate": 0.00028118527042577675, "loss": 0.31, "step": 298730 }, { "epoch": 85.94361334867664, "grad_norm": 1.6732089519500732, "learning_rate": 0.0002811277330264672, "loss": 0.3381, "step": 298740 }, { "epoch": 85.94649021864211, "grad_norm": 1.2290008068084717, "learning_rate": 0.0002810701956271576, "loss": 0.3662, "step": 298750 }, { "epoch": 85.9493670886076, "grad_norm": 1.2464441061019897, "learning_rate": 0.0002810126582278481, "loss": 0.2859, "step": 298760 }, { "epoch": 85.95224395857308, "grad_norm": 1.5028338432312012, "learning_rate": 0.0002809551208285386, "loss": 0.3306, "step": 298770 }, { "epoch": 85.95512082853855, "grad_norm": 2.2290799617767334, "learning_rate": 0.000280897583429229, "loss": 0.3214, "step": 298780 }, { "epoch": 85.95799769850403, "grad_norm": 0.9067310094833374, "learning_rate": 0.0002808400460299195, "loss": 0.3517, "step": 298790 }, { "epoch": 85.9608745684695, "grad_norm": 1.192781686782837, "learning_rate": 0.0002807825086306099, "loss": 0.3632, "step": 298800 }, { "epoch": 85.96375143843498, "grad_norm": 3.120187282562256, "learning_rate": 0.00028072497123130034, "loss": 0.3513, "step": 298810 }, { "epoch": 85.96662830840046, "grad_norm": 0.9724025726318359, "learning_rate": 0.0002806674338319908, "loss": 0.3557, "step": 298820 }, { "epoch": 85.96950517836594, "grad_norm": 1.1447772979736328, "learning_rate": 0.00028060989643268126, "loss": 0.3849, "step": 298830 }, { "epoch": 85.97238204833141, "grad_norm": 2.2598161697387695, "learning_rate": 0.00028055235903337166, "loss": 0.3386, "step": 298840 }, { "epoch": 85.97525891829689, "grad_norm": 0.8674583435058594, "learning_rate": 0.00028049482163406217, "loss": 0.3023, "step": 298850 }, { "epoch": 85.97813578826236, "grad_norm": 1.5235189199447632, "learning_rate": 0.0002804372842347526, "loss": 0.378, "step": 298860 }, { "epoch": 85.98101265822785, "grad_norm": 1.421179175376892, "learning_rate": 0.000280379746835443, "loss": 0.3915, "step": 298870 }, { "epoch": 85.98388952819333, "grad_norm": 0.5711382031440735, "learning_rate": 0.00028032220943613353, "loss": 0.2977, "step": 298880 }, { "epoch": 85.9867663981588, "grad_norm": 1.2269037961959839, "learning_rate": 0.00028026467203682393, "loss": 0.3245, "step": 298890 }, { "epoch": 85.98964326812428, "grad_norm": 1.9310003519058228, "learning_rate": 0.0002802071346375144, "loss": 0.3355, "step": 298900 }, { "epoch": 85.99252013808976, "grad_norm": 0.904339075088501, "learning_rate": 0.00028014959723820485, "loss": 0.3001, "step": 298910 }, { "epoch": 85.99539700805524, "grad_norm": 1.0612568855285645, "learning_rate": 0.0002800920598388953, "loss": 0.3165, "step": 298920 }, { "epoch": 85.99827387802071, "grad_norm": 1.0281990766525269, "learning_rate": 0.0002800345224395857, "loss": 0.2963, "step": 298930 }, { "epoch": 86.00115074798619, "grad_norm": 0.6784623861312866, "learning_rate": 0.0002799769850402762, "loss": 0.2891, "step": 298940 }, { "epoch": 86.00402761795166, "grad_norm": 1.6625066995620728, "learning_rate": 0.0002799194476409666, "loss": 0.3698, "step": 298950 }, { "epoch": 86.00690448791714, "grad_norm": 2.2964460849761963, "learning_rate": 0.00027986191024165707, "loss": 0.3491, "step": 298960 }, { "epoch": 86.00978135788263, "grad_norm": 1.1714200973510742, "learning_rate": 0.0002798043728423476, "loss": 0.2985, "step": 298970 }, { "epoch": 86.0126582278481, "grad_norm": 0.9354197382926941, "learning_rate": 0.000279746835443038, "loss": 0.3948, "step": 298980 }, { "epoch": 86.01553509781358, "grad_norm": 1.0286619663238525, "learning_rate": 0.00027968929804372844, "loss": 0.2709, "step": 298990 }, { "epoch": 86.01841196777906, "grad_norm": 1.574141025543213, "learning_rate": 0.0002796317606444189, "loss": 0.3405, "step": 299000 }, { "epoch": 86.02128883774454, "grad_norm": 1.6442440748214722, "learning_rate": 0.00027957422324510935, "loss": 0.3035, "step": 299010 }, { "epoch": 86.02416570771001, "grad_norm": 0.7888340950012207, "learning_rate": 0.00027951668584579975, "loss": 0.2885, "step": 299020 }, { "epoch": 86.02704257767549, "grad_norm": 1.3649563789367676, "learning_rate": 0.0002794591484464902, "loss": 0.4365, "step": 299030 }, { "epoch": 86.02991944764096, "grad_norm": 1.667728066444397, "learning_rate": 0.00027940161104718066, "loss": 0.2951, "step": 299040 }, { "epoch": 86.03279631760644, "grad_norm": 0.9436847567558289, "learning_rate": 0.0002793440736478711, "loss": 0.2745, "step": 299050 }, { "epoch": 86.03567318757192, "grad_norm": 0.7501729726791382, "learning_rate": 0.00027928653624856157, "loss": 0.323, "step": 299060 }, { "epoch": 86.03855005753739, "grad_norm": 1.1232168674468994, "learning_rate": 0.000279228998849252, "loss": 0.2699, "step": 299070 }, { "epoch": 86.04142692750288, "grad_norm": 1.9842880964279175, "learning_rate": 0.0002791714614499425, "loss": 0.3666, "step": 299080 }, { "epoch": 86.04430379746836, "grad_norm": 0.9665176868438721, "learning_rate": 0.0002791139240506329, "loss": 0.307, "step": 299090 }, { "epoch": 86.04718066743384, "grad_norm": 0.648456871509552, "learning_rate": 0.0002790563866513234, "loss": 0.2617, "step": 299100 }, { "epoch": 86.05005753739931, "grad_norm": 0.995884120464325, "learning_rate": 0.0002789988492520138, "loss": 0.3384, "step": 299110 }, { "epoch": 86.05293440736479, "grad_norm": 0.7959051728248596, "learning_rate": 0.00027894131185270425, "loss": 0.2185, "step": 299120 }, { "epoch": 86.05581127733026, "grad_norm": 2.044743299484253, "learning_rate": 0.0002788837744533947, "loss": 0.3165, "step": 299130 }, { "epoch": 86.05868814729574, "grad_norm": 0.931300699710846, "learning_rate": 0.00027882623705408516, "loss": 0.2849, "step": 299140 }, { "epoch": 86.06156501726122, "grad_norm": 1.6904423236846924, "learning_rate": 0.0002787686996547756, "loss": 0.3862, "step": 299150 }, { "epoch": 86.06444188722669, "grad_norm": 0.9792946577072144, "learning_rate": 0.00027871116225546607, "loss": 0.2487, "step": 299160 }, { "epoch": 86.06731875719217, "grad_norm": 1.3728786706924438, "learning_rate": 0.0002786536248561565, "loss": 0.2804, "step": 299170 }, { "epoch": 86.07019562715766, "grad_norm": 1.1654794216156006, "learning_rate": 0.00027859608745684693, "loss": 0.2851, "step": 299180 }, { "epoch": 86.07307249712314, "grad_norm": 1.1382625102996826, "learning_rate": 0.00027853855005753744, "loss": 0.3681, "step": 299190 }, { "epoch": 86.07594936708861, "grad_norm": 1.2804319858551025, "learning_rate": 0.00027848101265822784, "loss": 0.24, "step": 299200 }, { "epoch": 86.07882623705409, "grad_norm": 0.5830917954444885, "learning_rate": 0.0002784234752589183, "loss": 0.2632, "step": 299210 }, { "epoch": 86.08170310701956, "grad_norm": 0.6879463195800781, "learning_rate": 0.00027836593785960875, "loss": 0.3297, "step": 299220 }, { "epoch": 86.08457997698504, "grad_norm": 0.5936896204948425, "learning_rate": 0.0002783084004602992, "loss": 0.2657, "step": 299230 }, { "epoch": 86.08745684695052, "grad_norm": 0.8496596217155457, "learning_rate": 0.0002782508630609896, "loss": 0.2803, "step": 299240 }, { "epoch": 86.09033371691599, "grad_norm": 0.946678638458252, "learning_rate": 0.0002781933256616801, "loss": 0.2894, "step": 299250 }, { "epoch": 86.09321058688147, "grad_norm": 1.3601030111312866, "learning_rate": 0.00027813578826237057, "loss": 0.3255, "step": 299260 }, { "epoch": 86.09608745684694, "grad_norm": 0.9463670253753662, "learning_rate": 0.00027807825086306097, "loss": 0.3115, "step": 299270 }, { "epoch": 86.09896432681242, "grad_norm": 1.0233604907989502, "learning_rate": 0.0002780207134637515, "loss": 0.3709, "step": 299280 }, { "epoch": 86.10184119677791, "grad_norm": 1.0927814245224, "learning_rate": 0.0002779631760644419, "loss": 0.2608, "step": 299290 }, { "epoch": 86.10471806674339, "grad_norm": 2.103872537612915, "learning_rate": 0.00027790563866513234, "loss": 0.3389, "step": 299300 }, { "epoch": 86.10759493670886, "grad_norm": 0.8819574117660522, "learning_rate": 0.0002778481012658228, "loss": 0.3327, "step": 299310 }, { "epoch": 86.11047180667434, "grad_norm": 0.7099915146827698, "learning_rate": 0.00027779056386651325, "loss": 0.26, "step": 299320 }, { "epoch": 86.11334867663982, "grad_norm": 1.514328956604004, "learning_rate": 0.00027773302646720365, "loss": 0.2967, "step": 299330 }, { "epoch": 86.11622554660529, "grad_norm": 1.8454604148864746, "learning_rate": 0.00027767548906789416, "loss": 0.3045, "step": 299340 }, { "epoch": 86.11910241657077, "grad_norm": 1.6447319984436035, "learning_rate": 0.0002776179516685846, "loss": 0.2628, "step": 299350 }, { "epoch": 86.12197928653625, "grad_norm": 0.6196891665458679, "learning_rate": 0.000277560414269275, "loss": 0.2742, "step": 299360 }, { "epoch": 86.12485615650172, "grad_norm": 0.998069167137146, "learning_rate": 0.00027750287686996553, "loss": 0.3862, "step": 299370 }, { "epoch": 86.1277330264672, "grad_norm": 1.4127545356750488, "learning_rate": 0.00027744533947065593, "loss": 0.2391, "step": 299380 }, { "epoch": 86.13060989643269, "grad_norm": 1.2093110084533691, "learning_rate": 0.0002773878020713464, "loss": 0.3283, "step": 299390 }, { "epoch": 86.13348676639816, "grad_norm": 2.149782180786133, "learning_rate": 0.00027733026467203684, "loss": 0.4105, "step": 299400 }, { "epoch": 86.13636363636364, "grad_norm": 0.6518277525901794, "learning_rate": 0.0002772727272727273, "loss": 0.3013, "step": 299410 }, { "epoch": 86.13924050632912, "grad_norm": 1.2295422554016113, "learning_rate": 0.0002772151898734177, "loss": 0.3688, "step": 299420 }, { "epoch": 86.14211737629459, "grad_norm": 0.9537468552589417, "learning_rate": 0.0002771576524741082, "loss": 0.2594, "step": 299430 }, { "epoch": 86.14499424626007, "grad_norm": 1.6840976476669312, "learning_rate": 0.0002771001150747986, "loss": 0.3097, "step": 299440 }, { "epoch": 86.14787111622555, "grad_norm": 1.593304991722107, "learning_rate": 0.00027704257767548906, "loss": 0.2805, "step": 299450 }, { "epoch": 86.15074798619102, "grad_norm": 2.1639797687530518, "learning_rate": 0.0002769850402761795, "loss": 0.3916, "step": 299460 }, { "epoch": 86.1536248561565, "grad_norm": 1.0875967741012573, "learning_rate": 0.00027692750287687, "loss": 0.3366, "step": 299470 }, { "epoch": 86.15650172612197, "grad_norm": 0.9765085577964783, "learning_rate": 0.00027686996547756043, "loss": 0.3077, "step": 299480 }, { "epoch": 86.15937859608745, "grad_norm": 0.9164724946022034, "learning_rate": 0.00027681242807825083, "loss": 0.3792, "step": 299490 }, { "epoch": 86.16225546605294, "grad_norm": 1.6961196660995483, "learning_rate": 0.00027675489067894134, "loss": 0.4171, "step": 299500 }, { "epoch": 86.16513233601842, "grad_norm": 1.0718142986297607, "learning_rate": 0.00027669735327963174, "loss": 0.2958, "step": 299510 }, { "epoch": 86.16800920598389, "grad_norm": 1.6075290441513062, "learning_rate": 0.0002766398158803222, "loss": 0.2485, "step": 299520 }, { "epoch": 86.17088607594937, "grad_norm": 0.9094268679618835, "learning_rate": 0.00027658227848101265, "loss": 0.3431, "step": 299530 }, { "epoch": 86.17376294591485, "grad_norm": 1.1897486448287964, "learning_rate": 0.0002765247410817031, "loss": 0.3653, "step": 299540 }, { "epoch": 86.17663981588032, "grad_norm": 0.6224063634872437, "learning_rate": 0.00027646720368239356, "loss": 0.2866, "step": 299550 }, { "epoch": 86.1795166858458, "grad_norm": 0.8115789890289307, "learning_rate": 0.000276409666283084, "loss": 0.2641, "step": 299560 }, { "epoch": 86.18239355581127, "grad_norm": 1.0070981979370117, "learning_rate": 0.0002763521288837745, "loss": 0.3801, "step": 299570 }, { "epoch": 86.18527042577675, "grad_norm": 1.1467432975769043, "learning_rate": 0.0002762945914844649, "loss": 0.2918, "step": 299580 }, { "epoch": 86.18814729574223, "grad_norm": 0.6235450506210327, "learning_rate": 0.0002762370540851554, "loss": 0.3535, "step": 299590 }, { "epoch": 86.19102416570772, "grad_norm": 1.2804620265960693, "learning_rate": 0.0002761795166858458, "loss": 0.3216, "step": 299600 }, { "epoch": 86.19390103567319, "grad_norm": 0.9011534452438354, "learning_rate": 0.00027612197928653624, "loss": 0.2798, "step": 299610 }, { "epoch": 86.19677790563867, "grad_norm": 0.8547273874282837, "learning_rate": 0.0002760644418872267, "loss": 0.2635, "step": 299620 }, { "epoch": 86.19965477560415, "grad_norm": 0.9477279782295227, "learning_rate": 0.00027600690448791715, "loss": 0.2669, "step": 299630 }, { "epoch": 86.20253164556962, "grad_norm": 1.7904266119003296, "learning_rate": 0.0002759493670886076, "loss": 0.3754, "step": 299640 }, { "epoch": 86.2054085155351, "grad_norm": 1.6948744058609009, "learning_rate": 0.00027589182968929806, "loss": 0.3026, "step": 299650 }, { "epoch": 86.20828538550057, "grad_norm": 1.5753577947616577, "learning_rate": 0.0002758342922899885, "loss": 0.3507, "step": 299660 }, { "epoch": 86.21116225546605, "grad_norm": 0.8124764561653137, "learning_rate": 0.0002757767548906789, "loss": 0.3222, "step": 299670 }, { "epoch": 86.21403912543153, "grad_norm": 1.1716440916061401, "learning_rate": 0.00027571921749136943, "loss": 0.2748, "step": 299680 }, { "epoch": 86.216915995397, "grad_norm": 1.3783475160598755, "learning_rate": 0.00027566168009205983, "loss": 0.273, "step": 299690 }, { "epoch": 86.21979286536248, "grad_norm": 1.3448833227157593, "learning_rate": 0.0002756041426927503, "loss": 0.3878, "step": 299700 }, { "epoch": 86.22266973532797, "grad_norm": 0.8255137801170349, "learning_rate": 0.00027554660529344074, "loss": 0.3283, "step": 299710 }, { "epoch": 86.22554660529345, "grad_norm": 1.2319738864898682, "learning_rate": 0.0002754890678941312, "loss": 0.2882, "step": 299720 }, { "epoch": 86.22842347525892, "grad_norm": 1.3358278274536133, "learning_rate": 0.0002754315304948216, "loss": 0.2538, "step": 299730 }, { "epoch": 86.2313003452244, "grad_norm": 0.6563988924026489, "learning_rate": 0.0002753739930955121, "loss": 0.3905, "step": 299740 }, { "epoch": 86.23417721518987, "grad_norm": 1.4134562015533447, "learning_rate": 0.00027531645569620257, "loss": 0.2602, "step": 299750 }, { "epoch": 86.23705408515535, "grad_norm": 1.5443724393844604, "learning_rate": 0.00027525891829689297, "loss": 0.3443, "step": 299760 }, { "epoch": 86.23993095512083, "grad_norm": 1.008980393409729, "learning_rate": 0.0002752013808975835, "loss": 0.349, "step": 299770 }, { "epoch": 86.2428078250863, "grad_norm": 1.878167748451233, "learning_rate": 0.0002751438434982739, "loss": 0.2529, "step": 299780 }, { "epoch": 86.24568469505178, "grad_norm": 1.268493890762329, "learning_rate": 0.00027508630609896433, "loss": 0.3273, "step": 299790 }, { "epoch": 86.24856156501725, "grad_norm": 0.7525321841239929, "learning_rate": 0.0002750287686996548, "loss": 0.2484, "step": 299800 }, { "epoch": 86.25143843498275, "grad_norm": 0.7317057251930237, "learning_rate": 0.00027497123130034524, "loss": 0.3081, "step": 299810 }, { "epoch": 86.25431530494822, "grad_norm": 1.3436120748519897, "learning_rate": 0.00027491369390103565, "loss": 0.3237, "step": 299820 }, { "epoch": 86.2571921749137, "grad_norm": 1.3744728565216064, "learning_rate": 0.00027485615650172616, "loss": 0.3363, "step": 299830 }, { "epoch": 86.26006904487917, "grad_norm": 0.6319078803062439, "learning_rate": 0.0002747986191024166, "loss": 0.3297, "step": 299840 }, { "epoch": 86.26294591484465, "grad_norm": 1.1308132410049438, "learning_rate": 0.000274741081703107, "loss": 0.3574, "step": 299850 }, { "epoch": 86.26582278481013, "grad_norm": 1.025755524635315, "learning_rate": 0.00027468354430379747, "loss": 0.3163, "step": 299860 }, { "epoch": 86.2686996547756, "grad_norm": 0.551407516002655, "learning_rate": 0.0002746260069044879, "loss": 0.2588, "step": 299870 }, { "epoch": 86.27157652474108, "grad_norm": 1.1405609846115112, "learning_rate": 0.0002745684695051784, "loss": 0.3695, "step": 299880 }, { "epoch": 86.27445339470655, "grad_norm": 1.800607442855835, "learning_rate": 0.0002745109321058688, "loss": 0.3676, "step": 299890 }, { "epoch": 86.27733026467203, "grad_norm": 1.5892667770385742, "learning_rate": 0.0002744533947065593, "loss": 0.3384, "step": 299900 }, { "epoch": 86.28020713463752, "grad_norm": 0.9327741861343384, "learning_rate": 0.0002743958573072497, "loss": 0.3113, "step": 299910 }, { "epoch": 86.283084004603, "grad_norm": 0.8598697185516357, "learning_rate": 0.00027433831990794015, "loss": 0.3287, "step": 299920 }, { "epoch": 86.28596087456847, "grad_norm": 1.4295614957809448, "learning_rate": 0.0002742807825086306, "loss": 0.2982, "step": 299930 }, { "epoch": 86.28883774453395, "grad_norm": 0.8976594805717468, "learning_rate": 0.00027422324510932106, "loss": 0.3266, "step": 299940 }, { "epoch": 86.29171461449943, "grad_norm": 1.130437970161438, "learning_rate": 0.0002741657077100115, "loss": 0.3336, "step": 299950 }, { "epoch": 86.2945914844649, "grad_norm": 0.8137632012367249, "learning_rate": 0.00027410817031070197, "loss": 0.2425, "step": 299960 }, { "epoch": 86.29746835443038, "grad_norm": 2.0048792362213135, "learning_rate": 0.0002740506329113924, "loss": 0.3246, "step": 299970 }, { "epoch": 86.30034522439585, "grad_norm": 1.5737594366073608, "learning_rate": 0.0002739930955120828, "loss": 0.3085, "step": 299980 }, { "epoch": 86.30322209436133, "grad_norm": 1.1296954154968262, "learning_rate": 0.00027393555811277333, "loss": 0.3684, "step": 299990 }, { "epoch": 86.30609896432681, "grad_norm": 1.1048552989959717, "learning_rate": 0.00027387802071346374, "loss": 0.2615, "step": 300000 }, { "epoch": 86.30897583429228, "grad_norm": 1.0086508989334106, "learning_rate": 0.0002738204833141542, "loss": 0.3723, "step": 300010 }, { "epoch": 86.31185270425777, "grad_norm": 1.7262321710586548, "learning_rate": 0.00027376294591484465, "loss": 0.3606, "step": 300020 }, { "epoch": 86.31472957422325, "grad_norm": 0.6063361763954163, "learning_rate": 0.0002737054085155351, "loss": 0.3241, "step": 300030 }, { "epoch": 86.31760644418873, "grad_norm": 0.9091234803199768, "learning_rate": 0.00027364787111622556, "loss": 0.2889, "step": 300040 }, { "epoch": 86.3204833141542, "grad_norm": 1.197618007659912, "learning_rate": 0.000273590333716916, "loss": 0.3709, "step": 300050 }, { "epoch": 86.32336018411968, "grad_norm": 1.1578625440597534, "learning_rate": 0.00027353279631760647, "loss": 0.2561, "step": 300060 }, { "epoch": 86.32623705408515, "grad_norm": 1.8042609691619873, "learning_rate": 0.00027347525891829687, "loss": 0.3393, "step": 300070 }, { "epoch": 86.32911392405063, "grad_norm": 1.2971068620681763, "learning_rate": 0.0002734177215189874, "loss": 0.2755, "step": 300080 }, { "epoch": 86.33199079401611, "grad_norm": 1.8494237661361694, "learning_rate": 0.0002733601841196778, "loss": 0.3531, "step": 300090 }, { "epoch": 86.33486766398158, "grad_norm": 1.649770975112915, "learning_rate": 0.00027330264672036824, "loss": 0.323, "step": 300100 }, { "epoch": 86.33774453394706, "grad_norm": 2.1018497943878174, "learning_rate": 0.0002732451093210587, "loss": 0.3933, "step": 300110 }, { "epoch": 86.34062140391255, "grad_norm": 1.2683671712875366, "learning_rate": 0.00027318757192174915, "loss": 0.3091, "step": 300120 }, { "epoch": 86.34349827387803, "grad_norm": 2.027230739593506, "learning_rate": 0.0002731300345224396, "loss": 0.3032, "step": 300130 }, { "epoch": 86.3463751438435, "grad_norm": 1.0892523527145386, "learning_rate": 0.00027307249712313006, "loss": 0.311, "step": 300140 }, { "epoch": 86.34925201380898, "grad_norm": 1.9230246543884277, "learning_rate": 0.0002730149597238205, "loss": 0.3851, "step": 300150 }, { "epoch": 86.35212888377445, "grad_norm": 1.7484866380691528, "learning_rate": 0.0002729574223245109, "loss": 0.328, "step": 300160 }, { "epoch": 86.35500575373993, "grad_norm": 0.5667188763618469, "learning_rate": 0.0002728998849252014, "loss": 0.3249, "step": 300170 }, { "epoch": 86.35788262370541, "grad_norm": 1.8855403661727905, "learning_rate": 0.0002728423475258918, "loss": 0.3526, "step": 300180 }, { "epoch": 86.36075949367088, "grad_norm": 0.9006562232971191, "learning_rate": 0.0002727848101265823, "loss": 0.3202, "step": 300190 }, { "epoch": 86.36363636363636, "grad_norm": 1.684201955795288, "learning_rate": 0.00027272727272727274, "loss": 0.3366, "step": 300200 }, { "epoch": 86.36651323360184, "grad_norm": 1.4825081825256348, "learning_rate": 0.0002726697353279632, "loss": 0.2882, "step": 300210 }, { "epoch": 86.36939010356731, "grad_norm": 1.5484447479248047, "learning_rate": 0.0002726121979286536, "loss": 0.2712, "step": 300220 }, { "epoch": 86.3722669735328, "grad_norm": 2.257652521133423, "learning_rate": 0.0002725546605293441, "loss": 0.391, "step": 300230 }, { "epoch": 86.37514384349828, "grad_norm": 1.8868201971054077, "learning_rate": 0.00027249712313003456, "loss": 0.3513, "step": 300240 }, { "epoch": 86.37802071346375, "grad_norm": 0.9223416447639465, "learning_rate": 0.00027243958573072496, "loss": 0.343, "step": 300250 }, { "epoch": 86.38089758342923, "grad_norm": 1.450636863708496, "learning_rate": 0.00027238204833141547, "loss": 0.3941, "step": 300260 }, { "epoch": 86.38377445339471, "grad_norm": 0.7276235222816467, "learning_rate": 0.00027232451093210587, "loss": 0.3384, "step": 300270 }, { "epoch": 86.38665132336018, "grad_norm": 1.188163161277771, "learning_rate": 0.00027226697353279633, "loss": 0.2835, "step": 300280 }, { "epoch": 86.38952819332566, "grad_norm": 0.9399996995925903, "learning_rate": 0.00027220943613348673, "loss": 0.277, "step": 300290 }, { "epoch": 86.39240506329114, "grad_norm": 0.8633961081504822, "learning_rate": 0.00027215189873417724, "loss": 0.3011, "step": 300300 }, { "epoch": 86.39528193325661, "grad_norm": 1.123573660850525, "learning_rate": 0.00027209436133486764, "loss": 0.2884, "step": 300310 }, { "epoch": 86.39815880322209, "grad_norm": 1.5133463144302368, "learning_rate": 0.0002720368239355581, "loss": 0.3845, "step": 300320 }, { "epoch": 86.40103567318758, "grad_norm": 1.0191473960876465, "learning_rate": 0.0002719792865362486, "loss": 0.269, "step": 300330 }, { "epoch": 86.40391254315306, "grad_norm": 0.7565087080001831, "learning_rate": 0.000271921749136939, "loss": 0.363, "step": 300340 }, { "epoch": 86.40678941311853, "grad_norm": 3.111828327178955, "learning_rate": 0.00027186421173762946, "loss": 0.3924, "step": 300350 }, { "epoch": 86.40966628308401, "grad_norm": 1.308957815170288, "learning_rate": 0.0002718066743383199, "loss": 0.3364, "step": 300360 }, { "epoch": 86.41254315304948, "grad_norm": 1.0874905586242676, "learning_rate": 0.0002717491369390104, "loss": 0.2706, "step": 300370 }, { "epoch": 86.41542002301496, "grad_norm": 1.2021766901016235, "learning_rate": 0.0002716915995397008, "loss": 0.3328, "step": 300380 }, { "epoch": 86.41829689298044, "grad_norm": 1.5889902114868164, "learning_rate": 0.0002716340621403913, "loss": 0.3477, "step": 300390 }, { "epoch": 86.42117376294591, "grad_norm": 0.8923842310905457, "learning_rate": 0.0002715765247410817, "loss": 0.2844, "step": 300400 }, { "epoch": 86.42405063291139, "grad_norm": 0.9625208377838135, "learning_rate": 0.00027151898734177214, "loss": 0.2735, "step": 300410 }, { "epoch": 86.42692750287686, "grad_norm": 1.0602953433990479, "learning_rate": 0.0002714614499424626, "loss": 0.3187, "step": 300420 }, { "epoch": 86.42980437284234, "grad_norm": 1.7734545469284058, "learning_rate": 0.00027140391254315305, "loss": 0.3386, "step": 300430 }, { "epoch": 86.43268124280783, "grad_norm": 2.0961735248565674, "learning_rate": 0.0002713463751438435, "loss": 0.3754, "step": 300440 }, { "epoch": 86.43555811277331, "grad_norm": 1.9041061401367188, "learning_rate": 0.00027128883774453396, "loss": 0.376, "step": 300450 }, { "epoch": 86.43843498273878, "grad_norm": 1.4817588329315186, "learning_rate": 0.0002712313003452244, "loss": 0.3055, "step": 300460 }, { "epoch": 86.44131185270426, "grad_norm": 1.1592882871627808, "learning_rate": 0.0002711737629459148, "loss": 0.2876, "step": 300470 }, { "epoch": 86.44418872266974, "grad_norm": 1.0529457330703735, "learning_rate": 0.00027111622554660533, "loss": 0.3611, "step": 300480 }, { "epoch": 86.44706559263521, "grad_norm": 1.323676586151123, "learning_rate": 0.00027105868814729573, "loss": 0.2693, "step": 300490 }, { "epoch": 86.44994246260069, "grad_norm": 0.9328927397727966, "learning_rate": 0.0002710011507479862, "loss": 0.3213, "step": 300500 }, { "epoch": 86.45281933256616, "grad_norm": 1.5623183250427246, "learning_rate": 0.00027094361334867664, "loss": 0.307, "step": 300510 }, { "epoch": 86.45569620253164, "grad_norm": 0.4899713397026062, "learning_rate": 0.0002708860759493671, "loss": 0.2343, "step": 300520 }, { "epoch": 86.45857307249712, "grad_norm": 1.1763867139816284, "learning_rate": 0.00027082853855005755, "loss": 0.2807, "step": 300530 }, { "epoch": 86.46144994246261, "grad_norm": 1.6010698080062866, "learning_rate": 0.000270771001150748, "loss": 0.2741, "step": 300540 }, { "epoch": 86.46432681242808, "grad_norm": 1.2902714014053345, "learning_rate": 0.00027071346375143846, "loss": 0.3573, "step": 300550 }, { "epoch": 86.46720368239356, "grad_norm": 0.9379978775978088, "learning_rate": 0.00027065592635212886, "loss": 0.3146, "step": 300560 }, { "epoch": 86.47008055235904, "grad_norm": 1.2949914932250977, "learning_rate": 0.0002705983889528194, "loss": 0.3361, "step": 300570 }, { "epoch": 86.47295742232451, "grad_norm": 1.1533565521240234, "learning_rate": 0.0002705408515535098, "loss": 0.2637, "step": 300580 }, { "epoch": 86.47583429228999, "grad_norm": 0.5858020186424255, "learning_rate": 0.00027048331415420023, "loss": 0.3266, "step": 300590 }, { "epoch": 86.47871116225546, "grad_norm": 1.4524520635604858, "learning_rate": 0.0002704257767548907, "loss": 0.3192, "step": 300600 }, { "epoch": 86.48158803222094, "grad_norm": 0.834668755531311, "learning_rate": 0.00027036823935558114, "loss": 0.2888, "step": 300610 }, { "epoch": 86.48446490218642, "grad_norm": 1.693947196006775, "learning_rate": 0.0002703107019562716, "loss": 0.3536, "step": 300620 }, { "epoch": 86.4873417721519, "grad_norm": 1.0439152717590332, "learning_rate": 0.00027025316455696205, "loss": 0.307, "step": 300630 }, { "epoch": 86.49021864211737, "grad_norm": 0.8852064609527588, "learning_rate": 0.0002701956271576525, "loss": 0.3215, "step": 300640 }, { "epoch": 86.49309551208286, "grad_norm": 1.176943063735962, "learning_rate": 0.0002701380897583429, "loss": 0.343, "step": 300650 }, { "epoch": 86.49597238204834, "grad_norm": 0.8707530498504639, "learning_rate": 0.0002700805523590334, "loss": 0.2739, "step": 300660 }, { "epoch": 86.49884925201381, "grad_norm": 1.0812313556671143, "learning_rate": 0.0002700230149597238, "loss": 0.3239, "step": 300670 }, { "epoch": 86.50172612197929, "grad_norm": 0.773157000541687, "learning_rate": 0.0002699654775604143, "loss": 0.2829, "step": 300680 }, { "epoch": 86.50460299194476, "grad_norm": 0.6609089970588684, "learning_rate": 0.0002699079401611047, "loss": 0.2944, "step": 300690 }, { "epoch": 86.50747986191024, "grad_norm": 0.7332086563110352, "learning_rate": 0.0002698504027617952, "loss": 0.287, "step": 300700 }, { "epoch": 86.51035673187572, "grad_norm": 1.2988734245300293, "learning_rate": 0.0002697928653624856, "loss": 0.2587, "step": 300710 }, { "epoch": 86.5132336018412, "grad_norm": 0.8228761553764343, "learning_rate": 0.00026973532796317604, "loss": 0.3231, "step": 300720 }, { "epoch": 86.51611047180667, "grad_norm": 0.8329190015792847, "learning_rate": 0.00026967779056386655, "loss": 0.2972, "step": 300730 }, { "epoch": 86.51898734177215, "grad_norm": 0.7319384217262268, "learning_rate": 0.00026962025316455696, "loss": 0.291, "step": 300740 }, { "epoch": 86.52186421173764, "grad_norm": 1.297537922859192, "learning_rate": 0.0002695627157652474, "loss": 0.2815, "step": 300750 }, { "epoch": 86.52474108170311, "grad_norm": 0.6320379972457886, "learning_rate": 0.00026950517836593787, "loss": 0.3533, "step": 300760 }, { "epoch": 86.52761795166859, "grad_norm": 1.3687089681625366, "learning_rate": 0.0002694476409666283, "loss": 0.3186, "step": 300770 }, { "epoch": 86.53049482163406, "grad_norm": 1.8799822330474854, "learning_rate": 0.0002693901035673187, "loss": 0.3001, "step": 300780 }, { "epoch": 86.53337169159954, "grad_norm": 1.1769506931304932, "learning_rate": 0.00026933256616800923, "loss": 0.3299, "step": 300790 }, { "epoch": 86.53624856156502, "grad_norm": 1.7293822765350342, "learning_rate": 0.00026927502876869963, "loss": 0.4109, "step": 300800 }, { "epoch": 86.5391254315305, "grad_norm": 1.4309420585632324, "learning_rate": 0.0002692174913693901, "loss": 0.3127, "step": 300810 }, { "epoch": 86.54200230149597, "grad_norm": 0.556969165802002, "learning_rate": 0.0002691599539700806, "loss": 0.2989, "step": 300820 }, { "epoch": 86.54487917146145, "grad_norm": 2.18918514251709, "learning_rate": 0.000269102416570771, "loss": 0.3439, "step": 300830 }, { "epoch": 86.54775604142692, "grad_norm": 1.4170503616333008, "learning_rate": 0.00026904487917146146, "loss": 0.3201, "step": 300840 }, { "epoch": 86.5506329113924, "grad_norm": 1.4026825428009033, "learning_rate": 0.0002689873417721519, "loss": 0.3053, "step": 300850 }, { "epoch": 86.55350978135789, "grad_norm": 0.9199923872947693, "learning_rate": 0.00026892980437284237, "loss": 0.3748, "step": 300860 }, { "epoch": 86.55638665132336, "grad_norm": 1.2529290914535522, "learning_rate": 0.00026887226697353277, "loss": 0.3464, "step": 300870 }, { "epoch": 86.55926352128884, "grad_norm": 1.0570549964904785, "learning_rate": 0.0002688147295742233, "loss": 0.268, "step": 300880 }, { "epoch": 86.56214039125432, "grad_norm": 1.1589338779449463, "learning_rate": 0.0002687571921749137, "loss": 0.3446, "step": 300890 }, { "epoch": 86.5650172612198, "grad_norm": 0.6329284906387329, "learning_rate": 0.00026869965477560414, "loss": 0.2984, "step": 300900 }, { "epoch": 86.56789413118527, "grad_norm": 1.3702117204666138, "learning_rate": 0.0002686421173762946, "loss": 0.2709, "step": 300910 }, { "epoch": 86.57077100115075, "grad_norm": 3.0125410556793213, "learning_rate": 0.00026858457997698505, "loss": 0.3206, "step": 300920 }, { "epoch": 86.57364787111622, "grad_norm": 1.5905132293701172, "learning_rate": 0.0002685270425776755, "loss": 0.2568, "step": 300930 }, { "epoch": 86.5765247410817, "grad_norm": 1.2785521745681763, "learning_rate": 0.00026846950517836596, "loss": 0.3772, "step": 300940 }, { "epoch": 86.57940161104717, "grad_norm": 0.9401551485061646, "learning_rate": 0.0002684119677790564, "loss": 0.2647, "step": 300950 }, { "epoch": 86.58227848101266, "grad_norm": 2.6432688236236572, "learning_rate": 0.0002683544303797468, "loss": 0.3084, "step": 300960 }, { "epoch": 86.58515535097814, "grad_norm": 1.4109690189361572, "learning_rate": 0.0002682968929804373, "loss": 0.2428, "step": 300970 }, { "epoch": 86.58803222094362, "grad_norm": 2.065584421157837, "learning_rate": 0.0002682393555811277, "loss": 0.3159, "step": 300980 }, { "epoch": 86.5909090909091, "grad_norm": 0.979154109954834, "learning_rate": 0.0002681818181818182, "loss": 0.345, "step": 300990 }, { "epoch": 86.59378596087457, "grad_norm": 1.9422411918640137, "learning_rate": 0.00026812428078250864, "loss": 0.3812, "step": 301000 }, { "epoch": 86.59666283084005, "grad_norm": 0.9397455453872681, "learning_rate": 0.0002680667433831991, "loss": 0.314, "step": 301010 }, { "epoch": 86.59953970080552, "grad_norm": 0.9415237903594971, "learning_rate": 0.00026800920598388955, "loss": 0.3788, "step": 301020 }, { "epoch": 86.602416570771, "grad_norm": 1.014611005783081, "learning_rate": 0.00026795166858458, "loss": 0.3171, "step": 301030 }, { "epoch": 86.60529344073647, "grad_norm": 1.810185432434082, "learning_rate": 0.00026789413118527046, "loss": 0.3133, "step": 301040 }, { "epoch": 86.60817031070195, "grad_norm": 0.9029675722122192, "learning_rate": 0.00026783659378596086, "loss": 0.3131, "step": 301050 }, { "epoch": 86.61104718066743, "grad_norm": 0.8689854741096497, "learning_rate": 0.00026777905638665137, "loss": 0.3032, "step": 301060 }, { "epoch": 86.61392405063292, "grad_norm": 0.717369794845581, "learning_rate": 0.00026772151898734177, "loss": 0.2749, "step": 301070 }, { "epoch": 86.6168009205984, "grad_norm": 1.662203073501587, "learning_rate": 0.0002676639815880322, "loss": 0.2735, "step": 301080 }, { "epoch": 86.61967779056387, "grad_norm": 1.663791298866272, "learning_rate": 0.00026760644418872263, "loss": 0.3042, "step": 301090 }, { "epoch": 86.62255466052935, "grad_norm": 0.9120163321495056, "learning_rate": 0.00026754890678941314, "loss": 0.346, "step": 301100 }, { "epoch": 86.62543153049482, "grad_norm": 0.8943827152252197, "learning_rate": 0.0002674913693901036, "loss": 0.3669, "step": 301110 }, { "epoch": 86.6283084004603, "grad_norm": 1.1114068031311035, "learning_rate": 0.000267433831990794, "loss": 0.3313, "step": 301120 }, { "epoch": 86.63118527042577, "grad_norm": 0.9963265061378479, "learning_rate": 0.0002673762945914845, "loss": 0.2847, "step": 301130 }, { "epoch": 86.63406214039125, "grad_norm": 1.4327917098999023, "learning_rate": 0.0002673187571921749, "loss": 0.3065, "step": 301140 }, { "epoch": 86.63693901035673, "grad_norm": 0.8509055972099304, "learning_rate": 0.00026726121979286536, "loss": 0.3142, "step": 301150 }, { "epoch": 86.6398158803222, "grad_norm": 1.0391300916671753, "learning_rate": 0.0002672036823935558, "loss": 0.3177, "step": 301160 }, { "epoch": 86.6426927502877, "grad_norm": 1.2285492420196533, "learning_rate": 0.00026714614499424627, "loss": 0.274, "step": 301170 }, { "epoch": 86.64556962025317, "grad_norm": 1.0076806545257568, "learning_rate": 0.00026708860759493667, "loss": 0.2874, "step": 301180 }, { "epoch": 86.64844649021865, "grad_norm": 1.8955916166305542, "learning_rate": 0.0002670310701956272, "loss": 0.2676, "step": 301190 }, { "epoch": 86.65132336018412, "grad_norm": 1.4099408388137817, "learning_rate": 0.0002669735327963176, "loss": 0.2927, "step": 301200 }, { "epoch": 86.6542002301496, "grad_norm": 1.3394314050674438, "learning_rate": 0.00026691599539700804, "loss": 0.383, "step": 301210 }, { "epoch": 86.65707710011507, "grad_norm": 1.070421814918518, "learning_rate": 0.00026685845799769855, "loss": 0.278, "step": 301220 }, { "epoch": 86.65995397008055, "grad_norm": 1.2843316793441772, "learning_rate": 0.00026680092059838895, "loss": 0.2964, "step": 301230 }, { "epoch": 86.66283084004603, "grad_norm": 1.2800378799438477, "learning_rate": 0.0002667433831990794, "loss": 0.2726, "step": 301240 }, { "epoch": 86.6657077100115, "grad_norm": 0.9233953952789307, "learning_rate": 0.00026668584579976986, "loss": 0.3099, "step": 301250 }, { "epoch": 86.66858457997698, "grad_norm": 0.9874232411384583, "learning_rate": 0.0002666283084004603, "loss": 0.292, "step": 301260 }, { "epoch": 86.67146144994246, "grad_norm": 0.9536915421485901, "learning_rate": 0.0002665707710011507, "loss": 0.3298, "step": 301270 }, { "epoch": 86.67433831990795, "grad_norm": 1.718921184539795, "learning_rate": 0.00026651323360184123, "loss": 0.369, "step": 301280 }, { "epoch": 86.67721518987342, "grad_norm": 0.8620375990867615, "learning_rate": 0.00026645569620253163, "loss": 0.2895, "step": 301290 }, { "epoch": 86.6800920598389, "grad_norm": 1.068625569343567, "learning_rate": 0.0002663981588032221, "loss": 0.4029, "step": 301300 }, { "epoch": 86.68296892980437, "grad_norm": 1.278723955154419, "learning_rate": 0.0002663406214039126, "loss": 0.3124, "step": 301310 }, { "epoch": 86.68584579976985, "grad_norm": 1.7961606979370117, "learning_rate": 0.000266283084004603, "loss": 0.314, "step": 301320 }, { "epoch": 86.68872266973533, "grad_norm": 0.9444628357887268, "learning_rate": 0.00026622554660529345, "loss": 0.2231, "step": 301330 }, { "epoch": 86.6915995397008, "grad_norm": 1.337733268737793, "learning_rate": 0.0002661680092059839, "loss": 0.3101, "step": 301340 }, { "epoch": 86.69447640966628, "grad_norm": 2.7317590713500977, "learning_rate": 0.00026611047180667436, "loss": 0.2838, "step": 301350 }, { "epoch": 86.69735327963176, "grad_norm": 1.5485018491744995, "learning_rate": 0.00026605293440736476, "loss": 0.3576, "step": 301360 }, { "epoch": 86.70023014959723, "grad_norm": 0.9149934649467468, "learning_rate": 0.00026599539700805527, "loss": 0.3197, "step": 301370 }, { "epoch": 86.70310701956272, "grad_norm": 1.2672733068466187, "learning_rate": 0.0002659378596087457, "loss": 0.3164, "step": 301380 }, { "epoch": 86.7059838895282, "grad_norm": 2.5622198581695557, "learning_rate": 0.00026588032220943613, "loss": 0.2735, "step": 301390 }, { "epoch": 86.70886075949367, "grad_norm": 1.0067118406295776, "learning_rate": 0.0002658227848101266, "loss": 0.3156, "step": 301400 }, { "epoch": 86.71173762945915, "grad_norm": 1.1849069595336914, "learning_rate": 0.00026576524741081704, "loss": 0.3333, "step": 301410 }, { "epoch": 86.71461449942463, "grad_norm": 2.964498519897461, "learning_rate": 0.0002657077100115075, "loss": 0.3542, "step": 301420 }, { "epoch": 86.7174913693901, "grad_norm": 0.7907363176345825, "learning_rate": 0.00026565017261219795, "loss": 0.3177, "step": 301430 }, { "epoch": 86.72036823935558, "grad_norm": 1.4514411687850952, "learning_rate": 0.0002655926352128884, "loss": 0.2868, "step": 301440 }, { "epoch": 86.72324510932106, "grad_norm": 0.7351871728897095, "learning_rate": 0.0002655350978135788, "loss": 0.2696, "step": 301450 }, { "epoch": 86.72612197928653, "grad_norm": 1.2174509763717651, "learning_rate": 0.0002654775604142693, "loss": 0.3127, "step": 301460 }, { "epoch": 86.72899884925201, "grad_norm": 1.603652000427246, "learning_rate": 0.0002654200230149597, "loss": 0.3578, "step": 301470 }, { "epoch": 86.7318757192175, "grad_norm": 2.5176265239715576, "learning_rate": 0.0002653624856156502, "loss": 0.4048, "step": 301480 }, { "epoch": 86.73475258918297, "grad_norm": 1.435656189918518, "learning_rate": 0.0002653049482163406, "loss": 0.3203, "step": 301490 }, { "epoch": 86.73762945914845, "grad_norm": 0.9466649889945984, "learning_rate": 0.0002652474108170311, "loss": 0.3515, "step": 301500 }, { "epoch": 86.74050632911393, "grad_norm": 0.8648273944854736, "learning_rate": 0.00026518987341772154, "loss": 0.2015, "step": 301510 }, { "epoch": 86.7433831990794, "grad_norm": 1.8696178197860718, "learning_rate": 0.00026513233601841194, "loss": 0.3132, "step": 301520 }, { "epoch": 86.74626006904488, "grad_norm": 0.8452661633491516, "learning_rate": 0.00026507479861910245, "loss": 0.3474, "step": 301530 }, { "epoch": 86.74913693901036, "grad_norm": 0.8470406532287598, "learning_rate": 0.00026501726121979285, "loss": 0.3276, "step": 301540 }, { "epoch": 86.75201380897583, "grad_norm": 1.2280341386795044, "learning_rate": 0.0002649597238204833, "loss": 0.2997, "step": 301550 }, { "epoch": 86.75489067894131, "grad_norm": 1.954376459121704, "learning_rate": 0.00026490218642117376, "loss": 0.3476, "step": 301560 }, { "epoch": 86.75776754890678, "grad_norm": 1.599401593208313, "learning_rate": 0.0002648446490218642, "loss": 0.3529, "step": 301570 }, { "epoch": 86.76064441887226, "grad_norm": 1.2547563314437866, "learning_rate": 0.0002647871116225546, "loss": 0.3458, "step": 301580 }, { "epoch": 86.76352128883775, "grad_norm": 2.160010814666748, "learning_rate": 0.00026472957422324513, "loss": 0.3347, "step": 301590 }, { "epoch": 86.76639815880323, "grad_norm": 0.9518389701843262, "learning_rate": 0.0002646720368239356, "loss": 0.2764, "step": 301600 }, { "epoch": 86.7692750287687, "grad_norm": 1.2524094581604004, "learning_rate": 0.000264614499424626, "loss": 0.2715, "step": 301610 }, { "epoch": 86.77215189873418, "grad_norm": 1.001828908920288, "learning_rate": 0.0002645569620253165, "loss": 0.3664, "step": 301620 }, { "epoch": 86.77502876869966, "grad_norm": 0.5985483527183533, "learning_rate": 0.0002644994246260069, "loss": 0.395, "step": 301630 }, { "epoch": 86.77790563866513, "grad_norm": 0.9439879655838013, "learning_rate": 0.00026444188722669735, "loss": 0.2693, "step": 301640 }, { "epoch": 86.78078250863061, "grad_norm": 1.1530587673187256, "learning_rate": 0.0002643843498273878, "loss": 0.3468, "step": 301650 }, { "epoch": 86.78365937859608, "grad_norm": 1.9248733520507812, "learning_rate": 0.00026432681242807827, "loss": 0.2948, "step": 301660 }, { "epoch": 86.78653624856156, "grad_norm": 2.4280412197113037, "learning_rate": 0.00026426927502876867, "loss": 0.3041, "step": 301670 }, { "epoch": 86.78941311852704, "grad_norm": 1.2829080820083618, "learning_rate": 0.0002642117376294592, "loss": 0.4024, "step": 301680 }, { "epoch": 86.79228998849253, "grad_norm": 1.6304466724395752, "learning_rate": 0.0002641542002301496, "loss": 0.2586, "step": 301690 }, { "epoch": 86.795166858458, "grad_norm": 1.315112590789795, "learning_rate": 0.00026409666283084003, "loss": 0.2644, "step": 301700 }, { "epoch": 86.79804372842348, "grad_norm": 1.0217185020446777, "learning_rate": 0.00026403912543153054, "loss": 0.2935, "step": 301710 }, { "epoch": 86.80092059838896, "grad_norm": 1.679051160812378, "learning_rate": 0.00026398158803222094, "loss": 0.2805, "step": 301720 }, { "epoch": 86.80379746835443, "grad_norm": 1.3571851253509521, "learning_rate": 0.0002639240506329114, "loss": 0.309, "step": 301730 }, { "epoch": 86.80667433831991, "grad_norm": 1.0888949632644653, "learning_rate": 0.00026386651323360186, "loss": 0.3731, "step": 301740 }, { "epoch": 86.80955120828538, "grad_norm": 1.2944159507751465, "learning_rate": 0.0002638089758342923, "loss": 0.4227, "step": 301750 }, { "epoch": 86.81242807825086, "grad_norm": 1.3799501657485962, "learning_rate": 0.0002637514384349827, "loss": 0.2817, "step": 301760 }, { "epoch": 86.81530494821634, "grad_norm": 1.6957895755767822, "learning_rate": 0.0002636939010356732, "loss": 0.2621, "step": 301770 }, { "epoch": 86.81818181818181, "grad_norm": 1.4757356643676758, "learning_rate": 0.0002636363636363636, "loss": 0.2919, "step": 301780 }, { "epoch": 86.82105868814729, "grad_norm": 1.1641299724578857, "learning_rate": 0.0002635788262370541, "loss": 0.3133, "step": 301790 }, { "epoch": 86.82393555811278, "grad_norm": 1.3415518999099731, "learning_rate": 0.0002635212888377446, "loss": 0.3035, "step": 301800 }, { "epoch": 86.82681242807826, "grad_norm": 1.2166435718536377, "learning_rate": 0.000263463751438435, "loss": 0.3389, "step": 301810 }, { "epoch": 86.82968929804373, "grad_norm": 0.8864694833755493, "learning_rate": 0.00026340621403912545, "loss": 0.3514, "step": 301820 }, { "epoch": 86.83256616800921, "grad_norm": 1.3511868715286255, "learning_rate": 0.0002633486766398159, "loss": 0.3148, "step": 301830 }, { "epoch": 86.83544303797468, "grad_norm": 2.9413068294525146, "learning_rate": 0.00026329113924050636, "loss": 0.3026, "step": 301840 }, { "epoch": 86.83831990794016, "grad_norm": 0.9001905918121338, "learning_rate": 0.00026323360184119676, "loss": 0.354, "step": 301850 }, { "epoch": 86.84119677790564, "grad_norm": 1.3653968572616577, "learning_rate": 0.00026317606444188727, "loss": 0.3147, "step": 301860 }, { "epoch": 86.84407364787111, "grad_norm": 1.1794250011444092, "learning_rate": 0.00026311852704257767, "loss": 0.3134, "step": 301870 }, { "epoch": 86.84695051783659, "grad_norm": 1.4046525955200195, "learning_rate": 0.0002630609896432681, "loss": 0.3314, "step": 301880 }, { "epoch": 86.84982738780207, "grad_norm": 0.8698576092720032, "learning_rate": 0.0002630034522439585, "loss": 0.3904, "step": 301890 }, { "epoch": 86.85270425776756, "grad_norm": 1.3335776329040527, "learning_rate": 0.00026294591484464903, "loss": 0.3085, "step": 301900 }, { "epoch": 86.85558112773303, "grad_norm": 1.0083143711090088, "learning_rate": 0.0002628883774453395, "loss": 0.3202, "step": 301910 }, { "epoch": 86.85845799769851, "grad_norm": 1.3027970790863037, "learning_rate": 0.0002628308400460299, "loss": 0.289, "step": 301920 }, { "epoch": 86.86133486766398, "grad_norm": 2.447305679321289, "learning_rate": 0.0002627733026467204, "loss": 0.3345, "step": 301930 }, { "epoch": 86.86421173762946, "grad_norm": 0.8362049460411072, "learning_rate": 0.0002627157652474108, "loss": 0.3602, "step": 301940 }, { "epoch": 86.86708860759494, "grad_norm": 0.776759147644043, "learning_rate": 0.00026265822784810126, "loss": 0.3106, "step": 301950 }, { "epoch": 86.86996547756041, "grad_norm": 0.9199700951576233, "learning_rate": 0.0002626006904487917, "loss": 0.3064, "step": 301960 }, { "epoch": 86.87284234752589, "grad_norm": 1.7883992195129395, "learning_rate": 0.00026254315304948217, "loss": 0.2739, "step": 301970 }, { "epoch": 86.87571921749137, "grad_norm": 1.0016509294509888, "learning_rate": 0.00026248561565017257, "loss": 0.417, "step": 301980 }, { "epoch": 86.87859608745684, "grad_norm": 0.7528254389762878, "learning_rate": 0.0002624280782508631, "loss": 0.4019, "step": 301990 }, { "epoch": 86.88147295742232, "grad_norm": 1.0339930057525635, "learning_rate": 0.00026237054085155354, "loss": 0.3399, "step": 302000 }, { "epoch": 86.88434982738781, "grad_norm": 1.0064096450805664, "learning_rate": 0.00026231300345224394, "loss": 0.3685, "step": 302010 }, { "epoch": 86.88722669735328, "grad_norm": 0.7927072048187256, "learning_rate": 0.00026225546605293445, "loss": 0.2978, "step": 302020 }, { "epoch": 86.89010356731876, "grad_norm": 1.0375497341156006, "learning_rate": 0.00026219792865362485, "loss": 0.2821, "step": 302030 }, { "epoch": 86.89298043728424, "grad_norm": 0.8625051379203796, "learning_rate": 0.0002621403912543153, "loss": 0.3311, "step": 302040 }, { "epoch": 86.89585730724971, "grad_norm": 2.3413305282592773, "learning_rate": 0.00026208285385500576, "loss": 0.3144, "step": 302050 }, { "epoch": 86.89873417721519, "grad_norm": 1.1110584735870361, "learning_rate": 0.0002620253164556962, "loss": 0.2911, "step": 302060 }, { "epoch": 86.90161104718067, "grad_norm": 1.334201455116272, "learning_rate": 0.0002619677790563866, "loss": 0.2894, "step": 302070 }, { "epoch": 86.90448791714614, "grad_norm": 0.5308454632759094, "learning_rate": 0.0002619102416570771, "loss": 0.2944, "step": 302080 }, { "epoch": 86.90736478711162, "grad_norm": 1.0792477130889893, "learning_rate": 0.0002618527042577675, "loss": 0.3279, "step": 302090 }, { "epoch": 86.9102416570771, "grad_norm": 0.6274083852767944, "learning_rate": 0.000261795166858458, "loss": 0.2597, "step": 302100 }, { "epoch": 86.91311852704258, "grad_norm": 1.8673690557479858, "learning_rate": 0.0002617376294591485, "loss": 0.2747, "step": 302110 }, { "epoch": 86.91599539700806, "grad_norm": 1.833335041999817, "learning_rate": 0.0002616800920598389, "loss": 0.3467, "step": 302120 }, { "epoch": 86.91887226697354, "grad_norm": 1.4543845653533936, "learning_rate": 0.00026162255466052935, "loss": 0.2799, "step": 302130 }, { "epoch": 86.92174913693901, "grad_norm": 1.021057367324829, "learning_rate": 0.0002615650172612198, "loss": 0.3319, "step": 302140 }, { "epoch": 86.92462600690449, "grad_norm": 0.7151677012443542, "learning_rate": 0.00026150747986191026, "loss": 0.3046, "step": 302150 }, { "epoch": 86.92750287686997, "grad_norm": 1.468858242034912, "learning_rate": 0.00026144994246260066, "loss": 0.2724, "step": 302160 }, { "epoch": 86.93037974683544, "grad_norm": 1.09453547000885, "learning_rate": 0.00026139240506329117, "loss": 0.383, "step": 302170 }, { "epoch": 86.93325661680092, "grad_norm": 1.13510262966156, "learning_rate": 0.00026133486766398157, "loss": 0.288, "step": 302180 }, { "epoch": 86.9361334867664, "grad_norm": 1.771525502204895, "learning_rate": 0.00026127733026467203, "loss": 0.3464, "step": 302190 }, { "epoch": 86.93901035673187, "grad_norm": 1.0379548072814941, "learning_rate": 0.00026121979286536254, "loss": 0.2551, "step": 302200 }, { "epoch": 86.94188722669735, "grad_norm": 1.0802276134490967, "learning_rate": 0.00026116225546605294, "loss": 0.2579, "step": 302210 }, { "epoch": 86.94476409666284, "grad_norm": 0.8830419778823853, "learning_rate": 0.0002611047180667434, "loss": 0.3018, "step": 302220 }, { "epoch": 86.94764096662831, "grad_norm": 2.3034305572509766, "learning_rate": 0.00026104718066743385, "loss": 0.348, "step": 302230 }, { "epoch": 86.95051783659379, "grad_norm": 0.8464745283126831, "learning_rate": 0.0002609896432681243, "loss": 0.3557, "step": 302240 }, { "epoch": 86.95339470655927, "grad_norm": 1.1590039730072021, "learning_rate": 0.0002609321058688147, "loss": 0.3195, "step": 302250 }, { "epoch": 86.95627157652474, "grad_norm": 0.9301288723945618, "learning_rate": 0.0002608745684695052, "loss": 0.3487, "step": 302260 }, { "epoch": 86.95914844649022, "grad_norm": 0.7784001231193542, "learning_rate": 0.0002608170310701956, "loss": 0.3049, "step": 302270 }, { "epoch": 86.9620253164557, "grad_norm": 1.2230976819992065, "learning_rate": 0.0002607594936708861, "loss": 0.3627, "step": 302280 }, { "epoch": 86.96490218642117, "grad_norm": 0.9065201878547668, "learning_rate": 0.0002607019562715766, "loss": 0.3144, "step": 302290 }, { "epoch": 86.96777905638665, "grad_norm": 1.9679456949234009, "learning_rate": 0.000260644418872267, "loss": 0.4136, "step": 302300 }, { "epoch": 86.97065592635212, "grad_norm": 1.4574133157730103, "learning_rate": 0.00026058688147295744, "loss": 0.3002, "step": 302310 }, { "epoch": 86.97353279631761, "grad_norm": 1.4200985431671143, "learning_rate": 0.00026052934407364784, "loss": 0.3355, "step": 302320 }, { "epoch": 86.97640966628309, "grad_norm": 1.2444795370101929, "learning_rate": 0.00026047180667433835, "loss": 0.2906, "step": 302330 }, { "epoch": 86.97928653624857, "grad_norm": 1.9795767068862915, "learning_rate": 0.00026041426927502875, "loss": 0.3281, "step": 302340 }, { "epoch": 86.98216340621404, "grad_norm": 0.6715266704559326, "learning_rate": 0.0002603567318757192, "loss": 0.3008, "step": 302350 }, { "epoch": 86.98504027617952, "grad_norm": 1.2237855195999146, "learning_rate": 0.00026029919447640966, "loss": 0.3317, "step": 302360 }, { "epoch": 86.987917146145, "grad_norm": 1.7107349634170532, "learning_rate": 0.0002602416570771001, "loss": 0.3171, "step": 302370 }, { "epoch": 86.99079401611047, "grad_norm": 1.5894356966018677, "learning_rate": 0.0002601841196777905, "loss": 0.3453, "step": 302380 }, { "epoch": 86.99367088607595, "grad_norm": 1.1431156396865845, "learning_rate": 0.00026012658227848103, "loss": 0.3607, "step": 302390 }, { "epoch": 86.99654775604142, "grad_norm": 0.9591652154922485, "learning_rate": 0.0002600690448791715, "loss": 0.2707, "step": 302400 }, { "epoch": 86.9994246260069, "grad_norm": 1.496839165687561, "learning_rate": 0.0002600115074798619, "loss": 0.308, "step": 302410 }, { "epoch": 87.00230149597238, "grad_norm": 1.1267355680465698, "learning_rate": 0.0002599539700805524, "loss": 0.3038, "step": 302420 }, { "epoch": 87.00517836593787, "grad_norm": 0.5577425956726074, "learning_rate": 0.0002598964326812428, "loss": 0.2569, "step": 302430 }, { "epoch": 87.00805523590334, "grad_norm": 1.8174175024032593, "learning_rate": 0.00025983889528193325, "loss": 0.2689, "step": 302440 }, { "epoch": 87.01093210586882, "grad_norm": 0.8273252844810486, "learning_rate": 0.0002597813578826237, "loss": 0.3167, "step": 302450 }, { "epoch": 87.0138089758343, "grad_norm": 1.2775356769561768, "learning_rate": 0.00025972382048331416, "loss": 0.3136, "step": 302460 }, { "epoch": 87.01668584579977, "grad_norm": 0.4937455356121063, "learning_rate": 0.00025966628308400456, "loss": 0.274, "step": 302470 }, { "epoch": 87.01956271576525, "grad_norm": 1.1377265453338623, "learning_rate": 0.0002596087456846951, "loss": 0.2981, "step": 302480 }, { "epoch": 87.02243958573072, "grad_norm": 1.9640462398529053, "learning_rate": 0.00025955120828538553, "loss": 0.2803, "step": 302490 }, { "epoch": 87.0253164556962, "grad_norm": 0.9917334318161011, "learning_rate": 0.00025949367088607593, "loss": 0.2914, "step": 302500 }, { "epoch": 87.02819332566168, "grad_norm": 0.5045850872993469, "learning_rate": 0.00025943613348676644, "loss": 0.2316, "step": 302510 }, { "epoch": 87.03107019562715, "grad_norm": 0.893801212310791, "learning_rate": 0.00025937859608745684, "loss": 0.2637, "step": 302520 }, { "epoch": 87.03394706559264, "grad_norm": 1.0418713092803955, "learning_rate": 0.0002593210586881473, "loss": 0.261, "step": 302530 }, { "epoch": 87.03682393555812, "grad_norm": 1.6099953651428223, "learning_rate": 0.00025926352128883775, "loss": 0.2578, "step": 302540 }, { "epoch": 87.0397008055236, "grad_norm": 1.228043794631958, "learning_rate": 0.0002592059838895282, "loss": 0.2563, "step": 302550 }, { "epoch": 87.04257767548907, "grad_norm": 1.5072224140167236, "learning_rate": 0.0002591484464902186, "loss": 0.374, "step": 302560 }, { "epoch": 87.04545454545455, "grad_norm": 0.8133577704429626, "learning_rate": 0.0002590909090909091, "loss": 0.2781, "step": 302570 }, { "epoch": 87.04833141542002, "grad_norm": 1.2012226581573486, "learning_rate": 0.0002590333716915995, "loss": 0.2965, "step": 302580 }, { "epoch": 87.0512082853855, "grad_norm": 0.6857205033302307, "learning_rate": 0.00025897583429229, "loss": 0.2662, "step": 302590 }, { "epoch": 87.05408515535098, "grad_norm": 0.971225380897522, "learning_rate": 0.0002589182968929805, "loss": 0.284, "step": 302600 }, { "epoch": 87.05696202531645, "grad_norm": 1.1713725328445435, "learning_rate": 0.0002588607594936709, "loss": 0.2381, "step": 302610 }, { "epoch": 87.05983889528193, "grad_norm": 1.5791376829147339, "learning_rate": 0.00025880322209436134, "loss": 0.2889, "step": 302620 }, { "epoch": 87.0627157652474, "grad_norm": 1.3618918657302856, "learning_rate": 0.0002587456846950518, "loss": 0.2958, "step": 302630 }, { "epoch": 87.0655926352129, "grad_norm": 2.185972213745117, "learning_rate": 0.00025868814729574225, "loss": 0.2993, "step": 302640 }, { "epoch": 87.06846950517837, "grad_norm": 0.795957088470459, "learning_rate": 0.00025863060989643266, "loss": 0.2396, "step": 302650 }, { "epoch": 87.07134637514385, "grad_norm": 1.1391105651855469, "learning_rate": 0.00025857307249712317, "loss": 0.2635, "step": 302660 }, { "epoch": 87.07422324510932, "grad_norm": 0.7913269996643066, "learning_rate": 0.00025851553509781357, "loss": 0.3351, "step": 302670 }, { "epoch": 87.0771001150748, "grad_norm": 2.1458725929260254, "learning_rate": 0.000258457997698504, "loss": 0.311, "step": 302680 }, { "epoch": 87.07997698504028, "grad_norm": 1.02191960811615, "learning_rate": 0.00025840046029919453, "loss": 0.333, "step": 302690 }, { "epoch": 87.08285385500575, "grad_norm": 0.8908408880233765, "learning_rate": 0.00025834292289988493, "loss": 0.2771, "step": 302700 }, { "epoch": 87.08573072497123, "grad_norm": 1.1534494161605835, "learning_rate": 0.0002582853855005754, "loss": 0.3179, "step": 302710 }, { "epoch": 87.0886075949367, "grad_norm": 0.7646901607513428, "learning_rate": 0.0002582278481012658, "loss": 0.2618, "step": 302720 }, { "epoch": 87.09148446490218, "grad_norm": 1.4306387901306152, "learning_rate": 0.0002581703107019563, "loss": 0.2824, "step": 302730 }, { "epoch": 87.09436133486767, "grad_norm": 0.6120149493217468, "learning_rate": 0.0002581127733026467, "loss": 0.3045, "step": 302740 }, { "epoch": 87.09723820483315, "grad_norm": 1.656587839126587, "learning_rate": 0.00025805523590333716, "loss": 0.3882, "step": 302750 }, { "epoch": 87.10011507479862, "grad_norm": 0.9577045440673828, "learning_rate": 0.0002579976985040276, "loss": 0.2991, "step": 302760 }, { "epoch": 87.1029919447641, "grad_norm": 0.9610371589660645, "learning_rate": 0.00025794016110471807, "loss": 0.3318, "step": 302770 }, { "epoch": 87.10586881472958, "grad_norm": 1.683448314666748, "learning_rate": 0.0002578826237054085, "loss": 0.4248, "step": 302780 }, { "epoch": 87.10874568469505, "grad_norm": 0.9801628589630127, "learning_rate": 0.000257825086306099, "loss": 0.3296, "step": 302790 }, { "epoch": 87.11162255466053, "grad_norm": 1.6230287551879883, "learning_rate": 0.00025776754890678943, "loss": 0.3374, "step": 302800 }, { "epoch": 87.114499424626, "grad_norm": 1.3373000621795654, "learning_rate": 0.00025771001150747984, "loss": 0.2681, "step": 302810 }, { "epoch": 87.11737629459148, "grad_norm": 1.1414676904678345, "learning_rate": 0.00025765247410817034, "loss": 0.3112, "step": 302820 }, { "epoch": 87.12025316455696, "grad_norm": 1.3778610229492188, "learning_rate": 0.00025759493670886075, "loss": 0.3305, "step": 302830 }, { "epoch": 87.12313003452243, "grad_norm": 1.0189323425292969, "learning_rate": 0.0002575373993095512, "loss": 0.2689, "step": 302840 }, { "epoch": 87.12600690448792, "grad_norm": 0.848319411277771, "learning_rate": 0.00025747986191024166, "loss": 0.226, "step": 302850 }, { "epoch": 87.1288837744534, "grad_norm": 1.0997869968414307, "learning_rate": 0.0002574223245109321, "loss": 0.3388, "step": 302860 }, { "epoch": 87.13176064441888, "grad_norm": 1.159468650817871, "learning_rate": 0.0002573647871116225, "loss": 0.2845, "step": 302870 }, { "epoch": 87.13463751438435, "grad_norm": 0.8209163546562195, "learning_rate": 0.000257307249712313, "loss": 0.2691, "step": 302880 }, { "epoch": 87.13751438434983, "grad_norm": 2.0817503929138184, "learning_rate": 0.0002572497123130035, "loss": 0.3263, "step": 302890 }, { "epoch": 87.1403912543153, "grad_norm": 1.6114428043365479, "learning_rate": 0.0002571921749136939, "loss": 0.2679, "step": 302900 }, { "epoch": 87.14326812428078, "grad_norm": 2.4667131900787354, "learning_rate": 0.0002571346375143844, "loss": 0.2991, "step": 302910 }, { "epoch": 87.14614499424626, "grad_norm": 2.572042942047119, "learning_rate": 0.0002570771001150748, "loss": 0.2892, "step": 302920 }, { "epoch": 87.14902186421173, "grad_norm": 0.9146684408187866, "learning_rate": 0.00025701956271576525, "loss": 0.3141, "step": 302930 }, { "epoch": 87.15189873417721, "grad_norm": 0.8380536437034607, "learning_rate": 0.0002569620253164557, "loss": 0.2485, "step": 302940 }, { "epoch": 87.1547756041427, "grad_norm": 1.1430639028549194, "learning_rate": 0.00025690448791714616, "loss": 0.2908, "step": 302950 }, { "epoch": 87.15765247410818, "grad_norm": 0.8033429980278015, "learning_rate": 0.00025684695051783656, "loss": 0.3009, "step": 302960 }, { "epoch": 87.16052934407365, "grad_norm": 2.143846273422241, "learning_rate": 0.00025678941311852707, "loss": 0.3191, "step": 302970 }, { "epoch": 87.16340621403913, "grad_norm": 0.8997467160224915, "learning_rate": 0.0002567318757192175, "loss": 0.3019, "step": 302980 }, { "epoch": 87.1662830840046, "grad_norm": 0.9224861860275269, "learning_rate": 0.0002566743383199079, "loss": 0.3041, "step": 302990 }, { "epoch": 87.16915995397008, "grad_norm": 1.0302987098693848, "learning_rate": 0.00025661680092059844, "loss": 0.3218, "step": 303000 }, { "epoch": 87.17203682393556, "grad_norm": 1.2316412925720215, "learning_rate": 0.00025655926352128884, "loss": 0.3226, "step": 303010 }, { "epoch": 87.17491369390103, "grad_norm": 1.2969378232955933, "learning_rate": 0.0002565017261219793, "loss": 0.2761, "step": 303020 }, { "epoch": 87.17779056386651, "grad_norm": 1.160940408706665, "learning_rate": 0.00025644418872266975, "loss": 0.2796, "step": 303030 }, { "epoch": 87.18066743383199, "grad_norm": 1.0442792177200317, "learning_rate": 0.0002563866513233602, "loss": 0.2633, "step": 303040 }, { "epoch": 87.18354430379746, "grad_norm": 1.6600139141082764, "learning_rate": 0.0002563291139240506, "loss": 0.3216, "step": 303050 }, { "epoch": 87.18642117376295, "grad_norm": 0.952399492263794, "learning_rate": 0.0002562715765247411, "loss": 0.27, "step": 303060 }, { "epoch": 87.18929804372843, "grad_norm": 0.709538996219635, "learning_rate": 0.0002562140391254315, "loss": 0.2418, "step": 303070 }, { "epoch": 87.1921749136939, "grad_norm": 0.9708514213562012, "learning_rate": 0.00025615650172612197, "loss": 0.4012, "step": 303080 }, { "epoch": 87.19505178365938, "grad_norm": 0.8968518972396851, "learning_rate": 0.0002560989643268125, "loss": 0.3269, "step": 303090 }, { "epoch": 87.19792865362486, "grad_norm": 0.9382525086402893, "learning_rate": 0.0002560414269275029, "loss": 0.4376, "step": 303100 }, { "epoch": 87.20080552359033, "grad_norm": 1.957582950592041, "learning_rate": 0.00025598388952819334, "loss": 0.3828, "step": 303110 }, { "epoch": 87.20368239355581, "grad_norm": 1.084715723991394, "learning_rate": 0.00025592635212888374, "loss": 0.2461, "step": 303120 }, { "epoch": 87.20655926352129, "grad_norm": 0.9511516690254211, "learning_rate": 0.00025586881472957425, "loss": 0.307, "step": 303130 }, { "epoch": 87.20943613348676, "grad_norm": 1.5277894735336304, "learning_rate": 0.00025581127733026465, "loss": 0.3113, "step": 303140 }, { "epoch": 87.21231300345224, "grad_norm": 1.1895204782485962, "learning_rate": 0.0002557537399309551, "loss": 0.3145, "step": 303150 }, { "epoch": 87.21518987341773, "grad_norm": 1.2785954475402832, "learning_rate": 0.00025569620253164556, "loss": 0.3047, "step": 303160 }, { "epoch": 87.2180667433832, "grad_norm": 1.401450514793396, "learning_rate": 0.000255638665132336, "loss": 0.3757, "step": 303170 }, { "epoch": 87.22094361334868, "grad_norm": 1.0421797037124634, "learning_rate": 0.00025558112773302647, "loss": 0.2878, "step": 303180 }, { "epoch": 87.22382048331416, "grad_norm": 1.2754591703414917, "learning_rate": 0.00025552359033371693, "loss": 0.2898, "step": 303190 }, { "epoch": 87.22669735327963, "grad_norm": 0.9216597080230713, "learning_rate": 0.0002554660529344074, "loss": 0.3503, "step": 303200 }, { "epoch": 87.22957422324511, "grad_norm": 1.3318744897842407, "learning_rate": 0.0002554085155350978, "loss": 0.3392, "step": 303210 }, { "epoch": 87.23245109321059, "grad_norm": 0.9048700928688049, "learning_rate": 0.0002553509781357883, "loss": 0.3653, "step": 303220 }, { "epoch": 87.23532796317606, "grad_norm": 1.6310659646987915, "learning_rate": 0.0002552934407364787, "loss": 0.3237, "step": 303230 }, { "epoch": 87.23820483314154, "grad_norm": 0.7134137749671936, "learning_rate": 0.00025523590333716915, "loss": 0.4513, "step": 303240 }, { "epoch": 87.24108170310701, "grad_norm": 1.033873200416565, "learning_rate": 0.0002551783659378596, "loss": 0.432, "step": 303250 }, { "epoch": 87.24395857307249, "grad_norm": 0.8394960761070251, "learning_rate": 0.00025512082853855006, "loss": 0.2825, "step": 303260 }, { "epoch": 87.24683544303798, "grad_norm": 1.052233338356018, "learning_rate": 0.0002550632911392405, "loss": 0.3302, "step": 303270 }, { "epoch": 87.24971231300346, "grad_norm": 1.3462128639221191, "learning_rate": 0.00025500575373993097, "loss": 0.361, "step": 303280 }, { "epoch": 87.25258918296893, "grad_norm": 0.9273473024368286, "learning_rate": 0.00025494821634062143, "loss": 0.3646, "step": 303290 }, { "epoch": 87.25546605293441, "grad_norm": 0.6418212056159973, "learning_rate": 0.00025489067894131183, "loss": 0.269, "step": 303300 }, { "epoch": 87.25834292289989, "grad_norm": 1.5523312091827393, "learning_rate": 0.00025483314154200234, "loss": 0.346, "step": 303310 }, { "epoch": 87.26121979286536, "grad_norm": 0.8382956981658936, "learning_rate": 0.00025477560414269274, "loss": 0.2347, "step": 303320 }, { "epoch": 87.26409666283084, "grad_norm": 1.5276530981063843, "learning_rate": 0.0002547180667433832, "loss": 0.2995, "step": 303330 }, { "epoch": 87.26697353279631, "grad_norm": 0.8872143626213074, "learning_rate": 0.00025466052934407365, "loss": 0.262, "step": 303340 }, { "epoch": 87.26985040276179, "grad_norm": 1.627618432044983, "learning_rate": 0.0002546029919447641, "loss": 0.2936, "step": 303350 }, { "epoch": 87.27272727272727, "grad_norm": 1.6701388359069824, "learning_rate": 0.0002545454545454545, "loss": 0.3182, "step": 303360 }, { "epoch": 87.27560414269276, "grad_norm": 1.4912002086639404, "learning_rate": 0.000254487917146145, "loss": 0.294, "step": 303370 }, { "epoch": 87.27848101265823, "grad_norm": 0.9025678634643555, "learning_rate": 0.0002544303797468355, "loss": 0.2773, "step": 303380 }, { "epoch": 87.28135788262371, "grad_norm": 1.270403265953064, "learning_rate": 0.0002543728423475259, "loss": 0.3176, "step": 303390 }, { "epoch": 87.28423475258919, "grad_norm": 1.2258763313293457, "learning_rate": 0.0002543153049482164, "loss": 0.2721, "step": 303400 }, { "epoch": 87.28711162255466, "grad_norm": 2.0428380966186523, "learning_rate": 0.0002542577675489068, "loss": 0.3401, "step": 303410 }, { "epoch": 87.28998849252014, "grad_norm": 1.6561094522476196, "learning_rate": 0.00025420023014959724, "loss": 0.2887, "step": 303420 }, { "epoch": 87.29286536248561, "grad_norm": 1.4429960250854492, "learning_rate": 0.0002541426927502877, "loss": 0.3492, "step": 303430 }, { "epoch": 87.29574223245109, "grad_norm": 1.576977014541626, "learning_rate": 0.00025408515535097815, "loss": 0.3051, "step": 303440 }, { "epoch": 87.29861910241657, "grad_norm": 1.077805757522583, "learning_rate": 0.00025402761795166855, "loss": 0.2931, "step": 303450 }, { "epoch": 87.30149597238204, "grad_norm": 1.437750220298767, "learning_rate": 0.00025397008055235906, "loss": 0.2803, "step": 303460 }, { "epoch": 87.30437284234753, "grad_norm": 1.2373583316802979, "learning_rate": 0.0002539125431530495, "loss": 0.2852, "step": 303470 }, { "epoch": 87.30724971231301, "grad_norm": 0.8730827569961548, "learning_rate": 0.0002538550057537399, "loss": 0.3268, "step": 303480 }, { "epoch": 87.31012658227849, "grad_norm": 0.6731216311454773, "learning_rate": 0.00025379746835443043, "loss": 0.333, "step": 303490 }, { "epoch": 87.31300345224396, "grad_norm": 1.0735387802124023, "learning_rate": 0.00025373993095512083, "loss": 0.3157, "step": 303500 }, { "epoch": 87.31588032220944, "grad_norm": 1.1288073062896729, "learning_rate": 0.0002536823935558113, "loss": 0.3185, "step": 303510 }, { "epoch": 87.31875719217491, "grad_norm": 2.5723414421081543, "learning_rate": 0.00025362485615650174, "loss": 0.2725, "step": 303520 }, { "epoch": 87.32163406214039, "grad_norm": 0.9695488810539246, "learning_rate": 0.0002535673187571922, "loss": 0.2809, "step": 303530 }, { "epoch": 87.32451093210587, "grad_norm": 0.7630110383033752, "learning_rate": 0.0002535097813578826, "loss": 0.3134, "step": 303540 }, { "epoch": 87.32738780207134, "grad_norm": 1.0215198993682861, "learning_rate": 0.00025345224395857305, "loss": 0.336, "step": 303550 }, { "epoch": 87.33026467203682, "grad_norm": 1.3596172332763672, "learning_rate": 0.0002533947065592635, "loss": 0.3039, "step": 303560 }, { "epoch": 87.3331415420023, "grad_norm": 1.4254496097564697, "learning_rate": 0.00025333716915995397, "loss": 0.2785, "step": 303570 }, { "epoch": 87.33601841196779, "grad_norm": 1.698641061782837, "learning_rate": 0.0002532796317606444, "loss": 0.2965, "step": 303580 }, { "epoch": 87.33889528193326, "grad_norm": 0.8921737670898438, "learning_rate": 0.0002532220943613349, "loss": 0.344, "step": 303590 }, { "epoch": 87.34177215189874, "grad_norm": 1.4387716054916382, "learning_rate": 0.00025316455696202533, "loss": 0.3195, "step": 303600 }, { "epoch": 87.34464902186421, "grad_norm": 1.4900789260864258, "learning_rate": 0.00025310701956271573, "loss": 0.3121, "step": 303610 }, { "epoch": 87.34752589182969, "grad_norm": 2.4263386726379395, "learning_rate": 0.00025304948216340624, "loss": 0.3425, "step": 303620 }, { "epoch": 87.35040276179517, "grad_norm": 0.8061593174934387, "learning_rate": 0.00025299194476409664, "loss": 0.2835, "step": 303630 }, { "epoch": 87.35327963176064, "grad_norm": 1.787477970123291, "learning_rate": 0.0002529344073647871, "loss": 0.2945, "step": 303640 }, { "epoch": 87.35615650172612, "grad_norm": 1.5866913795471191, "learning_rate": 0.00025287686996547756, "loss": 0.246, "step": 303650 }, { "epoch": 87.3590333716916, "grad_norm": 2.3956496715545654, "learning_rate": 0.000252819332566168, "loss": 0.3525, "step": 303660 }, { "epoch": 87.36191024165707, "grad_norm": 1.5317699909210205, "learning_rate": 0.00025276179516685847, "loss": 0.3092, "step": 303670 }, { "epoch": 87.36478711162256, "grad_norm": 2.2414016723632812, "learning_rate": 0.0002527042577675489, "loss": 0.3119, "step": 303680 }, { "epoch": 87.36766398158804, "grad_norm": 0.8453472256660461, "learning_rate": 0.0002526467203682394, "loss": 0.2642, "step": 303690 }, { "epoch": 87.37054085155351, "grad_norm": 1.0806806087493896, "learning_rate": 0.0002525891829689298, "loss": 0.3763, "step": 303700 }, { "epoch": 87.37341772151899, "grad_norm": 1.1174780130386353, "learning_rate": 0.0002525316455696203, "loss": 0.317, "step": 303710 }, { "epoch": 87.37629459148447, "grad_norm": 1.4738526344299316, "learning_rate": 0.0002524741081703107, "loss": 0.2406, "step": 303720 }, { "epoch": 87.37917146144994, "grad_norm": 1.0101224184036255, "learning_rate": 0.00025241657077100115, "loss": 0.2732, "step": 303730 }, { "epoch": 87.38204833141542, "grad_norm": 0.6901280879974365, "learning_rate": 0.0002523590333716916, "loss": 0.2756, "step": 303740 }, { "epoch": 87.3849252013809, "grad_norm": 1.1843914985656738, "learning_rate": 0.00025230149597238206, "loss": 0.307, "step": 303750 }, { "epoch": 87.38780207134637, "grad_norm": 0.9747999906539917, "learning_rate": 0.0002522439585730725, "loss": 0.3874, "step": 303760 }, { "epoch": 87.39067894131185, "grad_norm": 2.555924892425537, "learning_rate": 0.00025218642117376297, "loss": 0.2919, "step": 303770 }, { "epoch": 87.39355581127732, "grad_norm": 1.058760404586792, "learning_rate": 0.0002521288837744534, "loss": 0.2542, "step": 303780 }, { "epoch": 87.39643268124281, "grad_norm": 1.2912814617156982, "learning_rate": 0.0002520713463751438, "loss": 0.2955, "step": 303790 }, { "epoch": 87.39930955120829, "grad_norm": 1.1747798919677734, "learning_rate": 0.00025201380897583433, "loss": 0.2255, "step": 303800 }, { "epoch": 87.40218642117377, "grad_norm": 1.6100456714630127, "learning_rate": 0.00025195627157652473, "loss": 0.2921, "step": 303810 }, { "epoch": 87.40506329113924, "grad_norm": 1.4851348400115967, "learning_rate": 0.0002518987341772152, "loss": 0.327, "step": 303820 }, { "epoch": 87.40794016110472, "grad_norm": 1.6867234706878662, "learning_rate": 0.00025184119677790565, "loss": 0.3506, "step": 303830 }, { "epoch": 87.4108170310702, "grad_norm": 1.7375885248184204, "learning_rate": 0.0002517836593785961, "loss": 0.3238, "step": 303840 }, { "epoch": 87.41369390103567, "grad_norm": 0.8388893008232117, "learning_rate": 0.0002517261219792865, "loss": 0.2982, "step": 303850 }, { "epoch": 87.41657077100115, "grad_norm": 0.8137900233268738, "learning_rate": 0.000251668584579977, "loss": 0.3473, "step": 303860 }, { "epoch": 87.41944764096662, "grad_norm": 0.8934319615364075, "learning_rate": 0.00025161104718066747, "loss": 0.3201, "step": 303870 }, { "epoch": 87.4223245109321, "grad_norm": 0.9497560858726501, "learning_rate": 0.00025155350978135787, "loss": 0.2832, "step": 303880 }, { "epoch": 87.42520138089759, "grad_norm": 1.323701024055481, "learning_rate": 0.0002514959723820484, "loss": 0.3254, "step": 303890 }, { "epoch": 87.42807825086307, "grad_norm": 0.8261942863464355, "learning_rate": 0.0002514384349827388, "loss": 0.265, "step": 303900 }, { "epoch": 87.43095512082854, "grad_norm": 1.4975558519363403, "learning_rate": 0.00025138089758342924, "loss": 0.3603, "step": 303910 }, { "epoch": 87.43383199079402, "grad_norm": 1.3792834281921387, "learning_rate": 0.0002513233601841197, "loss": 0.3357, "step": 303920 }, { "epoch": 87.4367088607595, "grad_norm": 1.2084429264068604, "learning_rate": 0.00025126582278481015, "loss": 0.3959, "step": 303930 }, { "epoch": 87.43958573072497, "grad_norm": 0.9163634777069092, "learning_rate": 0.00025120828538550055, "loss": 0.3181, "step": 303940 }, { "epoch": 87.44246260069045, "grad_norm": 2.007910966873169, "learning_rate": 0.000251150747986191, "loss": 0.3448, "step": 303950 }, { "epoch": 87.44533947065592, "grad_norm": 0.586897075176239, "learning_rate": 0.0002510932105868815, "loss": 0.3015, "step": 303960 }, { "epoch": 87.4482163406214, "grad_norm": 1.496752142906189, "learning_rate": 0.0002510356731875719, "loss": 0.2853, "step": 303970 }, { "epoch": 87.45109321058688, "grad_norm": 1.0846163034439087, "learning_rate": 0.00025097813578826237, "loss": 0.3579, "step": 303980 }, { "epoch": 87.45397008055235, "grad_norm": 1.5198228359222412, "learning_rate": 0.0002509205983889528, "loss": 0.2264, "step": 303990 }, { "epoch": 87.45684695051784, "grad_norm": 1.2967727184295654, "learning_rate": 0.0002508630609896433, "loss": 0.3098, "step": 304000 }, { "epoch": 87.45972382048332, "grad_norm": 1.620309591293335, "learning_rate": 0.0002508055235903337, "loss": 0.279, "step": 304010 }, { "epoch": 87.4626006904488, "grad_norm": 2.122246265411377, "learning_rate": 0.0002507479861910242, "loss": 0.3095, "step": 304020 }, { "epoch": 87.46547756041427, "grad_norm": 0.9024145603179932, "learning_rate": 0.0002506904487917146, "loss": 0.3122, "step": 304030 }, { "epoch": 87.46835443037975, "grad_norm": 1.4187233448028564, "learning_rate": 0.00025063291139240505, "loss": 0.2748, "step": 304040 }, { "epoch": 87.47123130034522, "grad_norm": 1.3022159337997437, "learning_rate": 0.0002505753739930955, "loss": 0.3203, "step": 304050 }, { "epoch": 87.4741081703107, "grad_norm": 0.6292051076889038, "learning_rate": 0.00025051783659378596, "loss": 0.2665, "step": 304060 }, { "epoch": 87.47698504027618, "grad_norm": 0.9139085412025452, "learning_rate": 0.0002504602991944764, "loss": 0.3267, "step": 304070 }, { "epoch": 87.47986191024165, "grad_norm": 0.8488079905509949, "learning_rate": 0.00025040276179516687, "loss": 0.339, "step": 304080 }, { "epoch": 87.48273878020713, "grad_norm": 0.5950059294700623, "learning_rate": 0.0002503452243958573, "loss": 0.2985, "step": 304090 }, { "epoch": 87.48561565017262, "grad_norm": 0.8109608888626099, "learning_rate": 0.00025028768699654773, "loss": 0.3197, "step": 304100 }, { "epoch": 87.4884925201381, "grad_norm": 1.5682356357574463, "learning_rate": 0.00025023014959723824, "loss": 0.3169, "step": 304110 }, { "epoch": 87.49136939010357, "grad_norm": 1.5346665382385254, "learning_rate": 0.00025017261219792864, "loss": 0.2597, "step": 304120 }, { "epoch": 87.49424626006905, "grad_norm": 1.0851088762283325, "learning_rate": 0.0002501150747986191, "loss": 0.3277, "step": 304130 }, { "epoch": 87.49712313003452, "grad_norm": 2.3954594135284424, "learning_rate": 0.00025005753739930955, "loss": 0.3648, "step": 304140 }, { "epoch": 87.5, "grad_norm": 1.4471184015274048, "learning_rate": 0.00025, "loss": 0.3541, "step": 304150 }, { "epoch": 87.50287686996548, "grad_norm": 1.0053062438964844, "learning_rate": 0.00024994246260069046, "loss": 0.28, "step": 304160 }, { "epoch": 87.50575373993095, "grad_norm": 1.2918858528137207, "learning_rate": 0.0002498849252013809, "loss": 0.2951, "step": 304170 }, { "epoch": 87.50863060989643, "grad_norm": 1.5108743906021118, "learning_rate": 0.0002498273878020713, "loss": 0.4528, "step": 304180 }, { "epoch": 87.5115074798619, "grad_norm": 2.622485399246216, "learning_rate": 0.0002497698504027618, "loss": 0.3337, "step": 304190 }, { "epoch": 87.51438434982738, "grad_norm": 0.7466037273406982, "learning_rate": 0.0002497123130034523, "loss": 0.3568, "step": 304200 }, { "epoch": 87.51726121979287, "grad_norm": 0.8430892825126648, "learning_rate": 0.0002496547756041427, "loss": 0.2827, "step": 304210 }, { "epoch": 87.52013808975835, "grad_norm": 1.3491530418395996, "learning_rate": 0.00024959723820483314, "loss": 0.2871, "step": 304220 }, { "epoch": 87.52301495972382, "grad_norm": 1.4765679836273193, "learning_rate": 0.0002495397008055236, "loss": 0.2692, "step": 304230 }, { "epoch": 87.5258918296893, "grad_norm": 0.5482121706008911, "learning_rate": 0.00024948216340621405, "loss": 0.3133, "step": 304240 }, { "epoch": 87.52876869965478, "grad_norm": 0.8015373349189758, "learning_rate": 0.0002494246260069045, "loss": 0.3452, "step": 304250 }, { "epoch": 87.53164556962025, "grad_norm": 2.186523675918579, "learning_rate": 0.00024936708860759496, "loss": 0.3195, "step": 304260 }, { "epoch": 87.53452243958573, "grad_norm": 1.4897233247756958, "learning_rate": 0.00024930955120828536, "loss": 0.3434, "step": 304270 }, { "epoch": 87.5373993095512, "grad_norm": 2.9693689346313477, "learning_rate": 0.0002492520138089758, "loss": 0.3253, "step": 304280 }, { "epoch": 87.54027617951668, "grad_norm": 1.0075947046279907, "learning_rate": 0.0002491944764096663, "loss": 0.4109, "step": 304290 }, { "epoch": 87.54315304948216, "grad_norm": 1.0925325155258179, "learning_rate": 0.00024913693901035673, "loss": 0.2695, "step": 304300 }, { "epoch": 87.54602991944765, "grad_norm": 0.9631057977676392, "learning_rate": 0.0002490794016110472, "loss": 0.3383, "step": 304310 }, { "epoch": 87.54890678941312, "grad_norm": 3.0787506103515625, "learning_rate": 0.00024902186421173764, "loss": 0.3368, "step": 304320 }, { "epoch": 87.5517836593786, "grad_norm": 0.8488658666610718, "learning_rate": 0.0002489643268124281, "loss": 0.2643, "step": 304330 }, { "epoch": 87.55466052934408, "grad_norm": 0.6193538904190063, "learning_rate": 0.00024890678941311855, "loss": 0.2923, "step": 304340 }, { "epoch": 87.55753739930955, "grad_norm": 1.1865342855453491, "learning_rate": 0.000248849252013809, "loss": 0.3534, "step": 304350 }, { "epoch": 87.56041426927503, "grad_norm": 0.6893786191940308, "learning_rate": 0.0002487917146144994, "loss": 0.385, "step": 304360 }, { "epoch": 87.5632911392405, "grad_norm": 1.1890449523925781, "learning_rate": 0.00024873417721518986, "loss": 0.2827, "step": 304370 }, { "epoch": 87.56616800920598, "grad_norm": 1.1171425580978394, "learning_rate": 0.0002486766398158803, "loss": 0.2706, "step": 304380 }, { "epoch": 87.56904487917146, "grad_norm": 0.8348594307899475, "learning_rate": 0.0002486191024165708, "loss": 0.2545, "step": 304390 }, { "epoch": 87.57192174913693, "grad_norm": 1.3054654598236084, "learning_rate": 0.00024856156501726123, "loss": 0.2923, "step": 304400 }, { "epoch": 87.57479861910241, "grad_norm": 1.1067379713058472, "learning_rate": 0.0002485040276179517, "loss": 0.393, "step": 304410 }, { "epoch": 87.5776754890679, "grad_norm": 1.8392531871795654, "learning_rate": 0.00024844649021864214, "loss": 0.3684, "step": 304420 }, { "epoch": 87.58055235903338, "grad_norm": 0.9894475340843201, "learning_rate": 0.0002483889528193326, "loss": 0.293, "step": 304430 }, { "epoch": 87.58342922899885, "grad_norm": 1.0061194896697998, "learning_rate": 0.000248331415420023, "loss": 0.3376, "step": 304440 }, { "epoch": 87.58630609896433, "grad_norm": 1.4277994632720947, "learning_rate": 0.00024827387802071345, "loss": 0.2968, "step": 304450 }, { "epoch": 87.5891829689298, "grad_norm": 1.5593547821044922, "learning_rate": 0.0002482163406214039, "loss": 0.275, "step": 304460 }, { "epoch": 87.59205983889528, "grad_norm": 1.0183080434799194, "learning_rate": 0.00024815880322209436, "loss": 0.3031, "step": 304470 }, { "epoch": 87.59493670886076, "grad_norm": 0.8545345067977905, "learning_rate": 0.0002481012658227848, "loss": 0.3158, "step": 304480 }, { "epoch": 87.59781357882623, "grad_norm": 1.4127295017242432, "learning_rate": 0.0002480437284234753, "loss": 0.2678, "step": 304490 }, { "epoch": 87.60069044879171, "grad_norm": 2.0234155654907227, "learning_rate": 0.00024798619102416573, "loss": 0.3593, "step": 304500 }, { "epoch": 87.60356731875719, "grad_norm": 1.3200634717941284, "learning_rate": 0.0002479286536248562, "loss": 0.3115, "step": 304510 }, { "epoch": 87.60644418872268, "grad_norm": 1.1067570447921753, "learning_rate": 0.00024787111622554664, "loss": 0.2803, "step": 304520 }, { "epoch": 87.60932105868815, "grad_norm": 0.930537760257721, "learning_rate": 0.00024781357882623704, "loss": 0.2853, "step": 304530 }, { "epoch": 87.61219792865363, "grad_norm": 0.9285176992416382, "learning_rate": 0.0002477560414269275, "loss": 0.3161, "step": 304540 }, { "epoch": 87.6150747986191, "grad_norm": 0.8944581747055054, "learning_rate": 0.00024769850402761795, "loss": 0.2738, "step": 304550 }, { "epoch": 87.61795166858458, "grad_norm": 1.1558881998062134, "learning_rate": 0.0002476409666283084, "loss": 0.3253, "step": 304560 }, { "epoch": 87.62082853855006, "grad_norm": 0.8871727585792542, "learning_rate": 0.00024758342922899887, "loss": 0.2747, "step": 304570 }, { "epoch": 87.62370540851553, "grad_norm": 1.6281737089157104, "learning_rate": 0.00024752589182968927, "loss": 0.2621, "step": 304580 }, { "epoch": 87.62658227848101, "grad_norm": 1.2244850397109985, "learning_rate": 0.0002474683544303797, "loss": 0.3226, "step": 304590 }, { "epoch": 87.62945914844649, "grad_norm": 1.1361193656921387, "learning_rate": 0.00024741081703107023, "loss": 0.2836, "step": 304600 }, { "epoch": 87.63233601841196, "grad_norm": 0.9857292771339417, "learning_rate": 0.00024735327963176063, "loss": 0.2418, "step": 304610 }, { "epoch": 87.63521288837744, "grad_norm": 1.245047688484192, "learning_rate": 0.0002472957422324511, "loss": 0.3184, "step": 304620 }, { "epoch": 87.63808975834293, "grad_norm": 1.629997730255127, "learning_rate": 0.00024723820483314154, "loss": 0.3255, "step": 304630 }, { "epoch": 87.6409666283084, "grad_norm": 1.241897463798523, "learning_rate": 0.000247180667433832, "loss": 0.2988, "step": 304640 }, { "epoch": 87.64384349827388, "grad_norm": 0.9448589086532593, "learning_rate": 0.00024712313003452246, "loss": 0.3328, "step": 304650 }, { "epoch": 87.64672036823936, "grad_norm": 1.2798044681549072, "learning_rate": 0.0002470655926352129, "loss": 0.3987, "step": 304660 }, { "epoch": 87.64959723820483, "grad_norm": 0.8950328230857849, "learning_rate": 0.0002470080552359033, "loss": 0.414, "step": 304670 }, { "epoch": 87.65247410817031, "grad_norm": 0.7944730520248413, "learning_rate": 0.00024695051783659377, "loss": 0.2836, "step": 304680 }, { "epoch": 87.65535097813579, "grad_norm": 1.866788387298584, "learning_rate": 0.0002468929804372843, "loss": 0.3108, "step": 304690 }, { "epoch": 87.65822784810126, "grad_norm": 0.8584800958633423, "learning_rate": 0.0002468354430379747, "loss": 0.2719, "step": 304700 }, { "epoch": 87.66110471806674, "grad_norm": 1.4053291082382202, "learning_rate": 0.00024677790563866513, "loss": 0.2934, "step": 304710 }, { "epoch": 87.66398158803221, "grad_norm": 0.7895085215568542, "learning_rate": 0.0002467203682393556, "loss": 0.3191, "step": 304720 }, { "epoch": 87.6668584579977, "grad_norm": 0.9036996960639954, "learning_rate": 0.00024666283084004604, "loss": 0.2969, "step": 304730 }, { "epoch": 87.66973532796318, "grad_norm": 0.709044337272644, "learning_rate": 0.0002466052934407365, "loss": 0.3136, "step": 304740 }, { "epoch": 87.67261219792866, "grad_norm": 1.1345895528793335, "learning_rate": 0.00024654775604142696, "loss": 0.2907, "step": 304750 }, { "epoch": 87.67548906789413, "grad_norm": 1.0978567600250244, "learning_rate": 0.00024649021864211736, "loss": 0.2917, "step": 304760 }, { "epoch": 87.67836593785961, "grad_norm": 1.3061531782150269, "learning_rate": 0.0002464326812428078, "loss": 0.2842, "step": 304770 }, { "epoch": 87.68124280782509, "grad_norm": 0.9709532260894775, "learning_rate": 0.00024637514384349827, "loss": 0.3042, "step": 304780 }, { "epoch": 87.68411967779056, "grad_norm": 1.5298504829406738, "learning_rate": 0.0002463176064441887, "loss": 0.3124, "step": 304790 }, { "epoch": 87.68699654775604, "grad_norm": 1.124938726425171, "learning_rate": 0.0002462600690448792, "loss": 0.2989, "step": 304800 }, { "epoch": 87.68987341772151, "grad_norm": 1.2676366567611694, "learning_rate": 0.00024620253164556963, "loss": 0.2905, "step": 304810 }, { "epoch": 87.69275028768699, "grad_norm": 0.7986769676208496, "learning_rate": 0.0002461449942462601, "loss": 0.254, "step": 304820 }, { "epoch": 87.69562715765247, "grad_norm": 1.2525629997253418, "learning_rate": 0.00024608745684695055, "loss": 0.2387, "step": 304830 }, { "epoch": 87.69850402761796, "grad_norm": 2.0120322704315186, "learning_rate": 0.00024602991944764095, "loss": 0.3268, "step": 304840 }, { "epoch": 87.70138089758343, "grad_norm": 0.9113065004348755, "learning_rate": 0.0002459723820483314, "loss": 0.3202, "step": 304850 }, { "epoch": 87.70425776754891, "grad_norm": 1.0189323425292969, "learning_rate": 0.00024591484464902186, "loss": 0.3166, "step": 304860 }, { "epoch": 87.70713463751439, "grad_norm": 1.657731056213379, "learning_rate": 0.0002458573072497123, "loss": 0.3347, "step": 304870 }, { "epoch": 87.71001150747986, "grad_norm": 1.1220202445983887, "learning_rate": 0.00024579976985040277, "loss": 0.2747, "step": 304880 }, { "epoch": 87.71288837744534, "grad_norm": 1.1827466487884521, "learning_rate": 0.0002457422324510932, "loss": 0.2897, "step": 304890 }, { "epoch": 87.71576524741081, "grad_norm": 1.7662800550460815, "learning_rate": 0.0002456846950517837, "loss": 0.3717, "step": 304900 }, { "epoch": 87.71864211737629, "grad_norm": 1.4999542236328125, "learning_rate": 0.00024562715765247414, "loss": 0.3049, "step": 304910 }, { "epoch": 87.72151898734177, "grad_norm": 1.90281081199646, "learning_rate": 0.0002455696202531646, "loss": 0.3318, "step": 304920 }, { "epoch": 87.72439585730724, "grad_norm": 1.3502678871154785, "learning_rate": 0.000245512082853855, "loss": 0.3258, "step": 304930 }, { "epoch": 87.72727272727273, "grad_norm": 0.7626177668571472, "learning_rate": 0.00024545454545454545, "loss": 0.3321, "step": 304940 }, { "epoch": 87.73014959723821, "grad_norm": 0.9905557036399841, "learning_rate": 0.0002453970080552359, "loss": 0.3003, "step": 304950 }, { "epoch": 87.73302646720369, "grad_norm": 1.2645856142044067, "learning_rate": 0.00024533947065592636, "loss": 0.2724, "step": 304960 }, { "epoch": 87.73590333716916, "grad_norm": 1.2222349643707275, "learning_rate": 0.0002452819332566168, "loss": 0.3191, "step": 304970 }, { "epoch": 87.73878020713464, "grad_norm": 1.5410292148590088, "learning_rate": 0.0002452243958573072, "loss": 0.3181, "step": 304980 }, { "epoch": 87.74165707710011, "grad_norm": 0.8268160223960876, "learning_rate": 0.0002451668584579977, "loss": 0.3083, "step": 304990 }, { "epoch": 87.74453394706559, "grad_norm": 0.8602206707000732, "learning_rate": 0.0002451093210586882, "loss": 0.2754, "step": 305000 }, { "epoch": 87.74741081703107, "grad_norm": 1.855495810508728, "learning_rate": 0.0002450517836593786, "loss": 0.3043, "step": 305010 }, { "epoch": 87.75028768699654, "grad_norm": 1.5811678171157837, "learning_rate": 0.00024499424626006904, "loss": 0.3151, "step": 305020 }, { "epoch": 87.75316455696202, "grad_norm": 1.5512853860855103, "learning_rate": 0.0002449367088607595, "loss": 0.3225, "step": 305030 }, { "epoch": 87.75604142692751, "grad_norm": 0.8102391958236694, "learning_rate": 0.00024487917146144995, "loss": 0.3078, "step": 305040 }, { "epoch": 87.75891829689299, "grad_norm": 1.4690346717834473, "learning_rate": 0.0002448216340621404, "loss": 0.3539, "step": 305050 }, { "epoch": 87.76179516685846, "grad_norm": 1.7904167175292969, "learning_rate": 0.00024476409666283086, "loss": 0.2799, "step": 305060 }, { "epoch": 87.76467203682394, "grad_norm": 0.8540557026863098, "learning_rate": 0.00024470655926352126, "loss": 0.3725, "step": 305070 }, { "epoch": 87.76754890678941, "grad_norm": 1.228570580482483, "learning_rate": 0.0002446490218642117, "loss": 0.3225, "step": 305080 }, { "epoch": 87.77042577675489, "grad_norm": 0.9392231702804565, "learning_rate": 0.0002445914844649022, "loss": 0.2724, "step": 305090 }, { "epoch": 87.77330264672037, "grad_norm": 0.6657331585884094, "learning_rate": 0.00024453394706559263, "loss": 0.3011, "step": 305100 }, { "epoch": 87.77617951668584, "grad_norm": 1.14120614528656, "learning_rate": 0.0002444764096662831, "loss": 0.3141, "step": 305110 }, { "epoch": 87.77905638665132, "grad_norm": 2.458660125732422, "learning_rate": 0.00024441887226697354, "loss": 0.3388, "step": 305120 }, { "epoch": 87.7819332566168, "grad_norm": 1.538048267364502, "learning_rate": 0.000244361334867664, "loss": 0.3896, "step": 305130 }, { "epoch": 87.78481012658227, "grad_norm": 1.5780256986618042, "learning_rate": 0.00024430379746835445, "loss": 0.3024, "step": 305140 }, { "epoch": 87.78768699654776, "grad_norm": 0.840949296951294, "learning_rate": 0.0002442462600690449, "loss": 0.3403, "step": 305150 }, { "epoch": 87.79056386651324, "grad_norm": 2.077113389968872, "learning_rate": 0.0002441887226697353, "loss": 0.3282, "step": 305160 }, { "epoch": 87.79344073647871, "grad_norm": 0.9421226382255554, "learning_rate": 0.00024413118527042576, "loss": 0.2812, "step": 305170 }, { "epoch": 87.79631760644419, "grad_norm": 1.2890164852142334, "learning_rate": 0.00024407364787111624, "loss": 0.2747, "step": 305180 }, { "epoch": 87.79919447640967, "grad_norm": 0.9860957264900208, "learning_rate": 0.0002440161104718067, "loss": 0.2952, "step": 305190 }, { "epoch": 87.80207134637514, "grad_norm": 1.159425973892212, "learning_rate": 0.00024395857307249713, "loss": 0.291, "step": 305200 }, { "epoch": 87.80494821634062, "grad_norm": 1.277989149093628, "learning_rate": 0.00024390103567318758, "loss": 0.3464, "step": 305210 }, { "epoch": 87.8078250863061, "grad_norm": 0.7214768528938293, "learning_rate": 0.00024384349827387804, "loss": 0.2572, "step": 305220 }, { "epoch": 87.81070195627157, "grad_norm": 1.0410230159759521, "learning_rate": 0.00024378596087456847, "loss": 0.2437, "step": 305230 }, { "epoch": 87.81357882623705, "grad_norm": 2.673372507095337, "learning_rate": 0.00024372842347525892, "loss": 0.3142, "step": 305240 }, { "epoch": 87.81645569620254, "grad_norm": 1.0134327411651611, "learning_rate": 0.00024367088607594938, "loss": 0.3146, "step": 305250 }, { "epoch": 87.81933256616801, "grad_norm": 0.9234111905097961, "learning_rate": 0.0002436133486766398, "loss": 0.2751, "step": 305260 }, { "epoch": 87.82220943613349, "grad_norm": 0.8698591589927673, "learning_rate": 0.00024355581127733026, "loss": 0.2889, "step": 305270 }, { "epoch": 87.82508630609897, "grad_norm": 1.21244215965271, "learning_rate": 0.00024349827387802075, "loss": 0.2954, "step": 305280 }, { "epoch": 87.82796317606444, "grad_norm": 1.0894755125045776, "learning_rate": 0.00024344073647871117, "loss": 0.3096, "step": 305290 }, { "epoch": 87.83084004602992, "grad_norm": 2.5020039081573486, "learning_rate": 0.00024338319907940163, "loss": 0.3396, "step": 305300 }, { "epoch": 87.8337169159954, "grad_norm": 1.6105355024337769, "learning_rate": 0.00024332566168009206, "loss": 0.3665, "step": 305310 }, { "epoch": 87.83659378596087, "grad_norm": 1.7978496551513672, "learning_rate": 0.0002432681242807825, "loss": 0.3056, "step": 305320 }, { "epoch": 87.83947065592635, "grad_norm": 1.039460301399231, "learning_rate": 0.00024321058688147297, "loss": 0.268, "step": 305330 }, { "epoch": 87.84234752589182, "grad_norm": 1.8800495862960815, "learning_rate": 0.0002431530494821634, "loss": 0.3258, "step": 305340 }, { "epoch": 87.8452243958573, "grad_norm": 0.8354871273040771, "learning_rate": 0.00024309551208285385, "loss": 0.2823, "step": 305350 }, { "epoch": 87.84810126582279, "grad_norm": 0.827617347240448, "learning_rate": 0.0002430379746835443, "loss": 0.3128, "step": 305360 }, { "epoch": 87.85097813578827, "grad_norm": 1.083180546760559, "learning_rate": 0.00024298043728423474, "loss": 0.2985, "step": 305370 }, { "epoch": 87.85385500575374, "grad_norm": 1.9325531721115112, "learning_rate": 0.00024292289988492522, "loss": 0.3336, "step": 305380 }, { "epoch": 87.85673187571922, "grad_norm": 0.8533260822296143, "learning_rate": 0.00024286536248561567, "loss": 0.3048, "step": 305390 }, { "epoch": 87.8596087456847, "grad_norm": 1.1359689235687256, "learning_rate": 0.0002428078250863061, "loss": 0.3307, "step": 305400 }, { "epoch": 87.86248561565017, "grad_norm": 0.9760831594467163, "learning_rate": 0.00024275028768699656, "loss": 0.4448, "step": 305410 }, { "epoch": 87.86536248561565, "grad_norm": 0.9442492723464966, "learning_rate": 0.00024269275028768701, "loss": 0.3315, "step": 305420 }, { "epoch": 87.86823935558112, "grad_norm": 2.001220464706421, "learning_rate": 0.00024263521288837744, "loss": 0.3258, "step": 305430 }, { "epoch": 87.8711162255466, "grad_norm": 0.7709594964981079, "learning_rate": 0.0002425776754890679, "loss": 0.3288, "step": 305440 }, { "epoch": 87.87399309551208, "grad_norm": 0.9388510584831238, "learning_rate": 0.00024252013808975835, "loss": 0.2952, "step": 305450 }, { "epoch": 87.87686996547757, "grad_norm": 1.2603540420532227, "learning_rate": 0.00024246260069044878, "loss": 0.3395, "step": 305460 }, { "epoch": 87.87974683544304, "grad_norm": 0.9639165997505188, "learning_rate": 0.00024240506329113924, "loss": 0.2628, "step": 305470 }, { "epoch": 87.88262370540852, "grad_norm": 1.5084277391433716, "learning_rate": 0.00024234752589182972, "loss": 0.3219, "step": 305480 }, { "epoch": 87.885500575374, "grad_norm": 1.5821398496627808, "learning_rate": 0.00024228998849252015, "loss": 0.3869, "step": 305490 }, { "epoch": 87.88837744533947, "grad_norm": 1.0720003843307495, "learning_rate": 0.0002422324510932106, "loss": 0.2537, "step": 305500 }, { "epoch": 87.89125431530495, "grad_norm": 0.7946493625640869, "learning_rate": 0.00024217491369390103, "loss": 0.3088, "step": 305510 }, { "epoch": 87.89413118527042, "grad_norm": 1.359798789024353, "learning_rate": 0.0002421173762945915, "loss": 0.3038, "step": 305520 }, { "epoch": 87.8970080552359, "grad_norm": 0.9012067317962646, "learning_rate": 0.00024205983889528194, "loss": 0.3606, "step": 305530 }, { "epoch": 87.89988492520138, "grad_norm": 2.154832124710083, "learning_rate": 0.00024200230149597237, "loss": 0.3315, "step": 305540 }, { "epoch": 87.90276179516685, "grad_norm": 0.8937297463417053, "learning_rate": 0.00024194476409666283, "loss": 0.3333, "step": 305550 }, { "epoch": 87.90563866513233, "grad_norm": 1.2357555627822876, "learning_rate": 0.00024188722669735328, "loss": 0.272, "step": 305560 }, { "epoch": 87.90851553509782, "grad_norm": 0.9156897068023682, "learning_rate": 0.0002418296892980437, "loss": 0.3055, "step": 305570 }, { "epoch": 87.9113924050633, "grad_norm": 0.678461492061615, "learning_rate": 0.0002417721518987342, "loss": 0.32, "step": 305580 }, { "epoch": 87.91426927502877, "grad_norm": 1.1782104969024658, "learning_rate": 0.00024171461449942465, "loss": 0.3106, "step": 305590 }, { "epoch": 87.91714614499425, "grad_norm": 1.0416498184204102, "learning_rate": 0.00024165707710011508, "loss": 0.2692, "step": 305600 }, { "epoch": 87.92002301495972, "grad_norm": 2.3279991149902344, "learning_rate": 0.00024159953970080553, "loss": 0.3239, "step": 305610 }, { "epoch": 87.9228998849252, "grad_norm": 2.193899154663086, "learning_rate": 0.000241542002301496, "loss": 0.3354, "step": 305620 }, { "epoch": 87.92577675489068, "grad_norm": 1.7301477193832397, "learning_rate": 0.00024148446490218642, "loss": 0.2908, "step": 305630 }, { "epoch": 87.92865362485615, "grad_norm": 5.55592155456543, "learning_rate": 0.00024142692750287687, "loss": 0.3425, "step": 305640 }, { "epoch": 87.93153049482163, "grad_norm": 1.044947862625122, "learning_rate": 0.00024136939010356733, "loss": 0.2757, "step": 305650 }, { "epoch": 87.9344073647871, "grad_norm": 1.2332314252853394, "learning_rate": 0.00024131185270425776, "loss": 0.2926, "step": 305660 }, { "epoch": 87.9372842347526, "grad_norm": 0.8123664855957031, "learning_rate": 0.0002412543153049482, "loss": 0.291, "step": 305670 }, { "epoch": 87.94016110471807, "grad_norm": 1.6558386087417603, "learning_rate": 0.0002411967779056387, "loss": 0.3608, "step": 305680 }, { "epoch": 87.94303797468355, "grad_norm": 1.3985675573349, "learning_rate": 0.00024113924050632912, "loss": 0.3425, "step": 305690 }, { "epoch": 87.94591484464902, "grad_norm": 0.7461369037628174, "learning_rate": 0.00024108170310701958, "loss": 0.3511, "step": 305700 }, { "epoch": 87.9487917146145, "grad_norm": 1.2226290702819824, "learning_rate": 0.00024102416570771, "loss": 0.3602, "step": 305710 }, { "epoch": 87.95166858457998, "grad_norm": 0.9478652477264404, "learning_rate": 0.00024096662830840046, "loss": 0.2958, "step": 305720 }, { "epoch": 87.95454545454545, "grad_norm": 0.6652852892875671, "learning_rate": 0.00024090909090909092, "loss": 0.2622, "step": 305730 }, { "epoch": 87.95742232451093, "grad_norm": 0.7356266379356384, "learning_rate": 0.00024085155350978135, "loss": 0.4208, "step": 305740 }, { "epoch": 87.9602991944764, "grad_norm": 1.5251291990280151, "learning_rate": 0.0002407940161104718, "loss": 0.3223, "step": 305750 }, { "epoch": 87.96317606444188, "grad_norm": 0.9909951090812683, "learning_rate": 0.00024073647871116226, "loss": 0.3024, "step": 305760 }, { "epoch": 87.96605293440736, "grad_norm": 1.755087971687317, "learning_rate": 0.0002406789413118527, "loss": 0.3995, "step": 305770 }, { "epoch": 87.96892980437285, "grad_norm": 1.551135540008545, "learning_rate": 0.00024062140391254317, "loss": 0.3139, "step": 305780 }, { "epoch": 87.97180667433832, "grad_norm": 1.1122920513153076, "learning_rate": 0.00024056386651323362, "loss": 0.3446, "step": 305790 }, { "epoch": 87.9746835443038, "grad_norm": 1.3158892393112183, "learning_rate": 0.00024050632911392405, "loss": 0.3454, "step": 305800 }, { "epoch": 87.97756041426928, "grad_norm": 1.1745519638061523, "learning_rate": 0.0002404487917146145, "loss": 0.3158, "step": 305810 }, { "epoch": 87.98043728423475, "grad_norm": 1.6196907758712769, "learning_rate": 0.00024039125431530496, "loss": 0.3218, "step": 305820 }, { "epoch": 87.98331415420023, "grad_norm": 1.0340380668640137, "learning_rate": 0.0002403337169159954, "loss": 0.3894, "step": 305830 }, { "epoch": 87.9861910241657, "grad_norm": 0.7573241591453552, "learning_rate": 0.00024027617951668585, "loss": 0.3483, "step": 305840 }, { "epoch": 87.98906789413118, "grad_norm": 0.7218189239501953, "learning_rate": 0.0002402186421173763, "loss": 0.3229, "step": 305850 }, { "epoch": 87.99194476409666, "grad_norm": 1.0685524940490723, "learning_rate": 0.00024016110471806673, "loss": 0.3457, "step": 305860 }, { "epoch": 87.99482163406213, "grad_norm": 1.4563835859298706, "learning_rate": 0.0002401035673187572, "loss": 0.3236, "step": 305870 }, { "epoch": 87.99769850402762, "grad_norm": 0.7197810411453247, "learning_rate": 0.00024004602991944767, "loss": 0.2309, "step": 305880 }, { "epoch": 88.0005753739931, "grad_norm": 1.5270872116088867, "learning_rate": 0.0002399884925201381, "loss": 0.3435, "step": 305890 }, { "epoch": 88.00345224395858, "grad_norm": 1.6433939933776855, "learning_rate": 0.00023993095512082855, "loss": 0.2931, "step": 305900 }, { "epoch": 88.00632911392405, "grad_norm": 1.8855830430984497, "learning_rate": 0.00023987341772151898, "loss": 0.3091, "step": 305910 }, { "epoch": 88.00920598388953, "grad_norm": 1.141223669052124, "learning_rate": 0.00023981588032220944, "loss": 0.2909, "step": 305920 }, { "epoch": 88.012082853855, "grad_norm": 0.9314973950386047, "learning_rate": 0.0002397583429228999, "loss": 0.2558, "step": 305930 }, { "epoch": 88.01495972382048, "grad_norm": 0.9293624758720398, "learning_rate": 0.00023970080552359032, "loss": 0.3021, "step": 305940 }, { "epoch": 88.01783659378596, "grad_norm": 1.099239468574524, "learning_rate": 0.00023964326812428078, "loss": 0.3247, "step": 305950 }, { "epoch": 88.02071346375143, "grad_norm": 1.1072089672088623, "learning_rate": 0.00023958573072497123, "loss": 0.3137, "step": 305960 }, { "epoch": 88.02359033371691, "grad_norm": 1.4225389957427979, "learning_rate": 0.0002395281933256617, "loss": 0.2765, "step": 305970 }, { "epoch": 88.02646720368239, "grad_norm": 1.1060030460357666, "learning_rate": 0.00023947065592635214, "loss": 0.2834, "step": 305980 }, { "epoch": 88.02934407364788, "grad_norm": 1.2656649351119995, "learning_rate": 0.0002394131185270426, "loss": 0.2673, "step": 305990 }, { "epoch": 88.03222094361335, "grad_norm": 0.7276740670204163, "learning_rate": 0.00023935558112773303, "loss": 0.3083, "step": 306000 }, { "epoch": 88.03509781357883, "grad_norm": 1.1069241762161255, "learning_rate": 0.00023929804372842348, "loss": 0.2689, "step": 306010 }, { "epoch": 88.0379746835443, "grad_norm": 0.7982574105262756, "learning_rate": 0.00023924050632911394, "loss": 0.2592, "step": 306020 }, { "epoch": 88.04085155350978, "grad_norm": 1.0652292966842651, "learning_rate": 0.00023918296892980437, "loss": 0.2973, "step": 306030 }, { "epoch": 88.04372842347526, "grad_norm": 1.5136219263076782, "learning_rate": 0.00023912543153049482, "loss": 0.3159, "step": 306040 }, { "epoch": 88.04660529344073, "grad_norm": 1.8884153366088867, "learning_rate": 0.00023906789413118528, "loss": 0.3201, "step": 306050 }, { "epoch": 88.04948216340621, "grad_norm": 0.8809701800346375, "learning_rate": 0.0002390103567318757, "loss": 0.3167, "step": 306060 }, { "epoch": 88.05235903337169, "grad_norm": 2.091559648513794, "learning_rate": 0.0002389528193325662, "loss": 0.3306, "step": 306070 }, { "epoch": 88.05523590333716, "grad_norm": 2.565619707107544, "learning_rate": 0.00023889528193325664, "loss": 0.2866, "step": 306080 }, { "epoch": 88.05811277330265, "grad_norm": 1.171618938446045, "learning_rate": 0.00023883774453394707, "loss": 0.31, "step": 306090 }, { "epoch": 88.06098964326813, "grad_norm": 0.7586838603019714, "learning_rate": 0.00023878020713463753, "loss": 0.2555, "step": 306100 }, { "epoch": 88.0638665132336, "grad_norm": 1.0895313024520874, "learning_rate": 0.00023872266973532796, "loss": 0.2575, "step": 306110 }, { "epoch": 88.06674338319908, "grad_norm": 1.0992631912231445, "learning_rate": 0.0002386651323360184, "loss": 0.3006, "step": 306120 }, { "epoch": 88.06962025316456, "grad_norm": 0.8131331205368042, "learning_rate": 0.00023860759493670887, "loss": 0.3161, "step": 306130 }, { "epoch": 88.07249712313003, "grad_norm": 1.3476601839065552, "learning_rate": 0.0002385500575373993, "loss": 0.2762, "step": 306140 }, { "epoch": 88.07537399309551, "grad_norm": 1.0052130222320557, "learning_rate": 0.00023849252013808975, "loss": 0.3416, "step": 306150 }, { "epoch": 88.07825086306099, "grad_norm": 1.5749763250350952, "learning_rate": 0.0002384349827387802, "loss": 0.3157, "step": 306160 }, { "epoch": 88.08112773302646, "grad_norm": 2.073458194732666, "learning_rate": 0.00023837744533947066, "loss": 0.2831, "step": 306170 }, { "epoch": 88.08400460299194, "grad_norm": 0.9379890561103821, "learning_rate": 0.00023831990794016112, "loss": 0.3182, "step": 306180 }, { "epoch": 88.08688147295742, "grad_norm": 0.9902489185333252, "learning_rate": 0.00023826237054085157, "loss": 0.3686, "step": 306190 }, { "epoch": 88.0897583429229, "grad_norm": 1.0752681493759155, "learning_rate": 0.000238204833141542, "loss": 0.2353, "step": 306200 }, { "epoch": 88.09263521288838, "grad_norm": 1.0566152334213257, "learning_rate": 0.00023814729574223246, "loss": 0.251, "step": 306210 }, { "epoch": 88.09551208285386, "grad_norm": 0.7092938423156738, "learning_rate": 0.0002380897583429229, "loss": 0.2401, "step": 306220 }, { "epoch": 88.09838895281933, "grad_norm": 0.7782754898071289, "learning_rate": 0.00023803222094361334, "loss": 0.2887, "step": 306230 }, { "epoch": 88.10126582278481, "grad_norm": 1.0643247365951538, "learning_rate": 0.0002379746835443038, "loss": 0.3119, "step": 306240 }, { "epoch": 88.10414269275029, "grad_norm": 1.3205214738845825, "learning_rate": 0.00023791714614499425, "loss": 0.272, "step": 306250 }, { "epoch": 88.10701956271576, "grad_norm": 1.2980300188064575, "learning_rate": 0.0002378596087456847, "loss": 0.3038, "step": 306260 }, { "epoch": 88.10989643268124, "grad_norm": 2.1542553901672363, "learning_rate": 0.00023780207134637516, "loss": 0.3012, "step": 306270 }, { "epoch": 88.11277330264672, "grad_norm": 1.0392844676971436, "learning_rate": 0.00023774453394706562, "loss": 0.3091, "step": 306280 }, { "epoch": 88.11565017261219, "grad_norm": 0.9076734185218811, "learning_rate": 0.00023768699654775605, "loss": 0.3346, "step": 306290 }, { "epoch": 88.11852704257768, "grad_norm": 1.0659762620925903, "learning_rate": 0.0002376294591484465, "loss": 0.2576, "step": 306300 }, { "epoch": 88.12140391254316, "grad_norm": 0.8330084681510925, "learning_rate": 0.00023757192174913693, "loss": 0.2618, "step": 306310 }, { "epoch": 88.12428078250863, "grad_norm": 0.8277826905250549, "learning_rate": 0.00023751438434982739, "loss": 0.2946, "step": 306320 }, { "epoch": 88.12715765247411, "grad_norm": 2.103802442550659, "learning_rate": 0.00023745684695051784, "loss": 0.3473, "step": 306330 }, { "epoch": 88.13003452243959, "grad_norm": 0.9659684896469116, "learning_rate": 0.00023739930955120827, "loss": 0.3579, "step": 306340 }, { "epoch": 88.13291139240506, "grad_norm": 0.6827701926231384, "learning_rate": 0.00023734177215189873, "loss": 0.314, "step": 306350 }, { "epoch": 88.13578826237054, "grad_norm": 0.9550127387046814, "learning_rate": 0.0002372842347525892, "loss": 0.3251, "step": 306360 }, { "epoch": 88.13866513233602, "grad_norm": 1.1379632949829102, "learning_rate": 0.00023722669735327964, "loss": 0.2787, "step": 306370 }, { "epoch": 88.14154200230149, "grad_norm": 0.968679666519165, "learning_rate": 0.0002371691599539701, "loss": 0.3981, "step": 306380 }, { "epoch": 88.14441887226697, "grad_norm": 1.6114991903305054, "learning_rate": 0.00023711162255466055, "loss": 0.3192, "step": 306390 }, { "epoch": 88.14729574223244, "grad_norm": 0.6647849082946777, "learning_rate": 0.00023705408515535098, "loss": 0.2863, "step": 306400 }, { "epoch": 88.15017261219793, "grad_norm": 0.7708958387374878, "learning_rate": 0.00023699654775604143, "loss": 0.3944, "step": 306410 }, { "epoch": 88.15304948216341, "grad_norm": 0.6829400658607483, "learning_rate": 0.0002369390103567319, "loss": 0.2697, "step": 306420 }, { "epoch": 88.15592635212889, "grad_norm": 0.8876635432243347, "learning_rate": 0.00023688147295742231, "loss": 0.3048, "step": 306430 }, { "epoch": 88.15880322209436, "grad_norm": 1.2117433547973633, "learning_rate": 0.00023682393555811277, "loss": 0.2193, "step": 306440 }, { "epoch": 88.16168009205984, "grad_norm": 0.948621392250061, "learning_rate": 0.00023676639815880323, "loss": 0.2743, "step": 306450 }, { "epoch": 88.16455696202532, "grad_norm": 1.2357518672943115, "learning_rate": 0.00023670886075949368, "loss": 0.2307, "step": 306460 }, { "epoch": 88.16743383199079, "grad_norm": 1.0197219848632812, "learning_rate": 0.00023665132336018414, "loss": 0.2935, "step": 306470 }, { "epoch": 88.17031070195627, "grad_norm": 0.5890368223190308, "learning_rate": 0.0002365937859608746, "loss": 0.2636, "step": 306480 }, { "epoch": 88.17318757192174, "grad_norm": 1.4896291494369507, "learning_rate": 0.00023653624856156502, "loss": 0.2761, "step": 306490 }, { "epoch": 88.17606444188722, "grad_norm": 0.7447121739387512, "learning_rate": 0.00023647871116225548, "loss": 0.2639, "step": 306500 }, { "epoch": 88.17894131185271, "grad_norm": 0.7081153392791748, "learning_rate": 0.0002364211737629459, "loss": 0.3386, "step": 306510 }, { "epoch": 88.18181818181819, "grad_norm": 2.1698694229125977, "learning_rate": 0.00023636363636363636, "loss": 0.2947, "step": 306520 }, { "epoch": 88.18469505178366, "grad_norm": 0.8265420794487, "learning_rate": 0.00023630609896432682, "loss": 0.2402, "step": 306530 }, { "epoch": 88.18757192174914, "grad_norm": 1.4377962350845337, "learning_rate": 0.00023624856156501724, "loss": 0.3001, "step": 306540 }, { "epoch": 88.19044879171462, "grad_norm": 0.7128661274909973, "learning_rate": 0.0002361910241657077, "loss": 0.2623, "step": 306550 }, { "epoch": 88.19332566168009, "grad_norm": 1.1395111083984375, "learning_rate": 0.00023613348676639818, "loss": 0.3114, "step": 306560 }, { "epoch": 88.19620253164557, "grad_norm": 1.1030644178390503, "learning_rate": 0.0002360759493670886, "loss": 0.338, "step": 306570 }, { "epoch": 88.19907940161104, "grad_norm": 1.0760631561279297, "learning_rate": 0.00023601841196777907, "loss": 0.2733, "step": 306580 }, { "epoch": 88.20195627157652, "grad_norm": 0.9905709624290466, "learning_rate": 0.00023596087456846952, "loss": 0.3381, "step": 306590 }, { "epoch": 88.204833141542, "grad_norm": 1.2073622941970825, "learning_rate": 0.00023590333716915995, "loss": 0.3035, "step": 306600 }, { "epoch": 88.20771001150747, "grad_norm": 1.1320356130599976, "learning_rate": 0.0002358457997698504, "loss": 0.2987, "step": 306610 }, { "epoch": 88.21058688147296, "grad_norm": 0.922261655330658, "learning_rate": 0.00023578826237054086, "loss": 0.2845, "step": 306620 }, { "epoch": 88.21346375143844, "grad_norm": 1.4385576248168945, "learning_rate": 0.0002357307249712313, "loss": 0.2874, "step": 306630 }, { "epoch": 88.21634062140392, "grad_norm": 0.9237313866615295, "learning_rate": 0.00023567318757192174, "loss": 0.259, "step": 306640 }, { "epoch": 88.21921749136939, "grad_norm": 1.0638056993484497, "learning_rate": 0.0002356156501726122, "loss": 0.239, "step": 306650 }, { "epoch": 88.22209436133487, "grad_norm": 0.8038231134414673, "learning_rate": 0.00023555811277330266, "loss": 0.2624, "step": 306660 }, { "epoch": 88.22497123130034, "grad_norm": 1.4602736234664917, "learning_rate": 0.0002355005753739931, "loss": 0.316, "step": 306670 }, { "epoch": 88.22784810126582, "grad_norm": 0.9651046395301819, "learning_rate": 0.00023544303797468357, "loss": 0.2874, "step": 306680 }, { "epoch": 88.2307249712313, "grad_norm": 0.9408304691314697, "learning_rate": 0.000235385500575374, "loss": 0.2795, "step": 306690 }, { "epoch": 88.23360184119677, "grad_norm": 0.7009096145629883, "learning_rate": 0.00023532796317606445, "loss": 0.2971, "step": 306700 }, { "epoch": 88.23647871116225, "grad_norm": 1.0420126914978027, "learning_rate": 0.0002352704257767549, "loss": 0.2844, "step": 306710 }, { "epoch": 88.23935558112774, "grad_norm": 0.8817811012268066, "learning_rate": 0.00023521288837744533, "loss": 0.3055, "step": 306720 }, { "epoch": 88.24223245109322, "grad_norm": 0.7514153718948364, "learning_rate": 0.0002351553509781358, "loss": 0.3487, "step": 306730 }, { "epoch": 88.24510932105869, "grad_norm": 1.0296956300735474, "learning_rate": 0.00023509781357882622, "loss": 0.3529, "step": 306740 }, { "epoch": 88.24798619102417, "grad_norm": 0.8652594685554504, "learning_rate": 0.0002350402761795167, "loss": 0.358, "step": 306750 }, { "epoch": 88.25086306098964, "grad_norm": 0.949921727180481, "learning_rate": 0.00023498273878020716, "loss": 0.326, "step": 306760 }, { "epoch": 88.25373993095512, "grad_norm": 3.3686068058013916, "learning_rate": 0.00023492520138089759, "loss": 0.3138, "step": 306770 }, { "epoch": 88.2566168009206, "grad_norm": 1.336502194404602, "learning_rate": 0.00023486766398158804, "loss": 0.2777, "step": 306780 }, { "epoch": 88.25949367088607, "grad_norm": 0.9209641218185425, "learning_rate": 0.0002348101265822785, "loss": 0.3113, "step": 306790 }, { "epoch": 88.26237054085155, "grad_norm": 0.7619299292564392, "learning_rate": 0.00023475258918296892, "loss": 0.323, "step": 306800 }, { "epoch": 88.26524741081703, "grad_norm": 0.9919434189796448, "learning_rate": 0.00023469505178365938, "loss": 0.3071, "step": 306810 }, { "epoch": 88.2681242807825, "grad_norm": 0.9282394051551819, "learning_rate": 0.00023463751438434984, "loss": 0.2797, "step": 306820 }, { "epoch": 88.27100115074799, "grad_norm": 1.879081130027771, "learning_rate": 0.00023457997698504026, "loss": 0.2884, "step": 306830 }, { "epoch": 88.27387802071347, "grad_norm": 1.3955605030059814, "learning_rate": 0.00023452243958573072, "loss": 0.2358, "step": 306840 }, { "epoch": 88.27675489067894, "grad_norm": 1.486092448234558, "learning_rate": 0.0002344649021864212, "loss": 0.2989, "step": 306850 }, { "epoch": 88.27963176064442, "grad_norm": 0.8834573030471802, "learning_rate": 0.00023440736478711163, "loss": 0.3779, "step": 306860 }, { "epoch": 88.2825086306099, "grad_norm": 0.7994996905326843, "learning_rate": 0.00023434982738780209, "loss": 0.2768, "step": 306870 }, { "epoch": 88.28538550057537, "grad_norm": 0.8872630596160889, "learning_rate": 0.00023429228998849254, "loss": 0.2187, "step": 306880 }, { "epoch": 88.28826237054085, "grad_norm": 1.5518757104873657, "learning_rate": 0.00023423475258918297, "loss": 0.3144, "step": 306890 }, { "epoch": 88.29113924050633, "grad_norm": 1.345633625984192, "learning_rate": 0.00023417721518987343, "loss": 0.2703, "step": 306900 }, { "epoch": 88.2940161104718, "grad_norm": 2.0444436073303223, "learning_rate": 0.00023411967779056388, "loss": 0.3358, "step": 306910 }, { "epoch": 88.29689298043728, "grad_norm": 0.8080666065216064, "learning_rate": 0.0002340621403912543, "loss": 0.3132, "step": 306920 }, { "epoch": 88.29976985040277, "grad_norm": 1.311091661453247, "learning_rate": 0.00023400460299194476, "loss": 0.4018, "step": 306930 }, { "epoch": 88.30264672036824, "grad_norm": 2.149827003479004, "learning_rate": 0.0002339470655926352, "loss": 0.3405, "step": 306940 }, { "epoch": 88.30552359033372, "grad_norm": 2.5783071517944336, "learning_rate": 0.00023388952819332568, "loss": 0.4055, "step": 306950 }, { "epoch": 88.3084004602992, "grad_norm": 0.9705491662025452, "learning_rate": 0.00023383199079401613, "loss": 0.3201, "step": 306960 }, { "epoch": 88.31127733026467, "grad_norm": 1.0095194578170776, "learning_rate": 0.00023377445339470656, "loss": 0.2901, "step": 306970 }, { "epoch": 88.31415420023015, "grad_norm": 2.1868972778320312, "learning_rate": 0.00023371691599539702, "loss": 0.3329, "step": 306980 }, { "epoch": 88.31703107019563, "grad_norm": 0.5082353949546814, "learning_rate": 0.00023365937859608747, "loss": 0.2329, "step": 306990 }, { "epoch": 88.3199079401611, "grad_norm": 2.0164756774902344, "learning_rate": 0.0002336018411967779, "loss": 0.2713, "step": 307000 }, { "epoch": 88.32278481012658, "grad_norm": 1.087085485458374, "learning_rate": 0.00023354430379746835, "loss": 0.2544, "step": 307010 }, { "epoch": 88.32566168009205, "grad_norm": 0.8392175436019897, "learning_rate": 0.0002334867663981588, "loss": 0.2612, "step": 307020 }, { "epoch": 88.32853855005754, "grad_norm": 1.154287576675415, "learning_rate": 0.00023342922899884924, "loss": 0.2797, "step": 307030 }, { "epoch": 88.33141542002302, "grad_norm": 2.315748453140259, "learning_rate": 0.0002333716915995397, "loss": 0.392, "step": 307040 }, { "epoch": 88.3342922899885, "grad_norm": 0.8100311160087585, "learning_rate": 0.00023331415420023018, "loss": 0.2354, "step": 307050 }, { "epoch": 88.33716915995397, "grad_norm": 1.1714640855789185, "learning_rate": 0.0002332566168009206, "loss": 0.3214, "step": 307060 }, { "epoch": 88.34004602991945, "grad_norm": 0.8975732922554016, "learning_rate": 0.00023319907940161106, "loss": 0.2892, "step": 307070 }, { "epoch": 88.34292289988493, "grad_norm": 1.119293451309204, "learning_rate": 0.00023314154200230152, "loss": 0.298, "step": 307080 }, { "epoch": 88.3457997698504, "grad_norm": 1.866681694984436, "learning_rate": 0.00023308400460299194, "loss": 0.3135, "step": 307090 }, { "epoch": 88.34867663981588, "grad_norm": 1.1090604066848755, "learning_rate": 0.0002330264672036824, "loss": 0.3329, "step": 307100 }, { "epoch": 88.35155350978135, "grad_norm": 1.6122714281082153, "learning_rate": 0.00023296892980437286, "loss": 0.3292, "step": 307110 }, { "epoch": 88.35443037974683, "grad_norm": 1.256033182144165, "learning_rate": 0.00023291139240506328, "loss": 0.2995, "step": 307120 }, { "epoch": 88.3573072497123, "grad_norm": 0.8484428524971008, "learning_rate": 0.00023285385500575374, "loss": 0.3378, "step": 307130 }, { "epoch": 88.3601841196778, "grad_norm": 1.422911524772644, "learning_rate": 0.00023279631760644417, "loss": 0.2564, "step": 307140 }, { "epoch": 88.36306098964327, "grad_norm": 1.3534234762191772, "learning_rate": 0.00023273878020713465, "loss": 0.3153, "step": 307150 }, { "epoch": 88.36593785960875, "grad_norm": 0.8942115306854248, "learning_rate": 0.0002326812428078251, "loss": 0.2623, "step": 307160 }, { "epoch": 88.36881472957423, "grad_norm": 2.3850362300872803, "learning_rate": 0.00023262370540851553, "loss": 0.2673, "step": 307170 }, { "epoch": 88.3716915995397, "grad_norm": 1.2895148992538452, "learning_rate": 0.000232566168009206, "loss": 0.3251, "step": 307180 }, { "epoch": 88.37456846950518, "grad_norm": 2.0279905796051025, "learning_rate": 0.00023250863060989645, "loss": 0.4064, "step": 307190 }, { "epoch": 88.37744533947065, "grad_norm": 0.9628148674964905, "learning_rate": 0.00023245109321058687, "loss": 0.2925, "step": 307200 }, { "epoch": 88.38032220943613, "grad_norm": 1.2298080921173096, "learning_rate": 0.00023239355581127733, "loss": 0.3006, "step": 307210 }, { "epoch": 88.3831990794016, "grad_norm": 1.8634830713272095, "learning_rate": 0.00023233601841196778, "loss": 0.2315, "step": 307220 }, { "epoch": 88.38607594936708, "grad_norm": 1.8500077724456787, "learning_rate": 0.0002322784810126582, "loss": 0.322, "step": 307230 }, { "epoch": 88.38895281933257, "grad_norm": 1.0673424005508423, "learning_rate": 0.0002322209436133487, "loss": 0.3386, "step": 307240 }, { "epoch": 88.39182968929805, "grad_norm": 0.8803002834320068, "learning_rate": 0.00023216340621403915, "loss": 0.2677, "step": 307250 }, { "epoch": 88.39470655926353, "grad_norm": 1.1968590021133423, "learning_rate": 0.00023210586881472958, "loss": 0.3441, "step": 307260 }, { "epoch": 88.397583429229, "grad_norm": 1.0772523880004883, "learning_rate": 0.00023204833141542004, "loss": 0.2851, "step": 307270 }, { "epoch": 88.40046029919448, "grad_norm": 1.4571784734725952, "learning_rate": 0.0002319907940161105, "loss": 0.3374, "step": 307280 }, { "epoch": 88.40333716915995, "grad_norm": 0.8832457661628723, "learning_rate": 0.00023193325661680092, "loss": 0.2747, "step": 307290 }, { "epoch": 88.40621403912543, "grad_norm": 1.205889344215393, "learning_rate": 0.00023187571921749137, "loss": 0.3128, "step": 307300 }, { "epoch": 88.4090909090909, "grad_norm": 2.5091171264648438, "learning_rate": 0.00023181818181818183, "loss": 0.3116, "step": 307310 }, { "epoch": 88.41196777905638, "grad_norm": 1.1445685625076294, "learning_rate": 0.00023176064441887226, "loss": 0.2574, "step": 307320 }, { "epoch": 88.41484464902186, "grad_norm": 0.7194293737411499, "learning_rate": 0.00023170310701956271, "loss": 0.2813, "step": 307330 }, { "epoch": 88.41772151898734, "grad_norm": 0.7753716111183167, "learning_rate": 0.00023164556962025317, "loss": 0.2732, "step": 307340 }, { "epoch": 88.42059838895283, "grad_norm": 0.6061034202575684, "learning_rate": 0.00023158803222094362, "loss": 0.3025, "step": 307350 }, { "epoch": 88.4234752589183, "grad_norm": 1.2104148864746094, "learning_rate": 0.00023153049482163408, "loss": 0.2553, "step": 307360 }, { "epoch": 88.42635212888378, "grad_norm": 0.8763605952262878, "learning_rate": 0.0002314729574223245, "loss": 0.2785, "step": 307370 }, { "epoch": 88.42922899884925, "grad_norm": 2.1796722412109375, "learning_rate": 0.00023141542002301496, "loss": 0.3199, "step": 307380 }, { "epoch": 88.43210586881473, "grad_norm": 1.27415132522583, "learning_rate": 0.00023135788262370542, "loss": 0.2902, "step": 307390 }, { "epoch": 88.4349827387802, "grad_norm": 1.2097269296646118, "learning_rate": 0.00023130034522439585, "loss": 0.2809, "step": 307400 }, { "epoch": 88.43785960874568, "grad_norm": 1.182059645652771, "learning_rate": 0.0002312428078250863, "loss": 0.2748, "step": 307410 }, { "epoch": 88.44073647871116, "grad_norm": 1.3505269289016724, "learning_rate": 0.00023118527042577676, "loss": 0.2993, "step": 307420 }, { "epoch": 88.44361334867664, "grad_norm": 0.9321128726005554, "learning_rate": 0.0002311277330264672, "loss": 0.3053, "step": 307430 }, { "epoch": 88.44649021864211, "grad_norm": 1.4850789308547974, "learning_rate": 0.00023107019562715767, "loss": 0.3008, "step": 307440 }, { "epoch": 88.4493670886076, "grad_norm": 0.9661684036254883, "learning_rate": 0.00023101265822784813, "loss": 0.2374, "step": 307450 }, { "epoch": 88.45224395857308, "grad_norm": 1.0347331762313843, "learning_rate": 0.00023095512082853855, "loss": 0.3319, "step": 307460 }, { "epoch": 88.45512082853855, "grad_norm": 1.3439973592758179, "learning_rate": 0.000230897583429229, "loss": 0.3318, "step": 307470 }, { "epoch": 88.45799769850403, "grad_norm": 1.3474555015563965, "learning_rate": 0.00023084004602991947, "loss": 0.261, "step": 307480 }, { "epoch": 88.4608745684695, "grad_norm": 0.7609173655509949, "learning_rate": 0.0002307825086306099, "loss": 0.3996, "step": 307490 }, { "epoch": 88.46375143843498, "grad_norm": 0.9654059410095215, "learning_rate": 0.00023072497123130035, "loss": 0.2756, "step": 307500 }, { "epoch": 88.46662830840046, "grad_norm": 1.4594694375991821, "learning_rate": 0.0002306674338319908, "loss": 0.3097, "step": 307510 }, { "epoch": 88.46950517836594, "grad_norm": 1.3775508403778076, "learning_rate": 0.00023060989643268123, "loss": 0.2613, "step": 307520 }, { "epoch": 88.47238204833141, "grad_norm": 1.3509191274642944, "learning_rate": 0.0002305523590333717, "loss": 0.3207, "step": 307530 }, { "epoch": 88.47525891829689, "grad_norm": 1.9827139377593994, "learning_rate": 0.00023049482163406214, "loss": 0.3419, "step": 307540 }, { "epoch": 88.47813578826236, "grad_norm": 0.8663426041603088, "learning_rate": 0.0002304372842347526, "loss": 0.2691, "step": 307550 }, { "epoch": 88.48101265822785, "grad_norm": 1.143191933631897, "learning_rate": 0.00023037974683544305, "loss": 0.3151, "step": 307560 }, { "epoch": 88.48388952819333, "grad_norm": 1.8686091899871826, "learning_rate": 0.00023032220943613348, "loss": 0.3575, "step": 307570 }, { "epoch": 88.4867663981588, "grad_norm": 1.3335390090942383, "learning_rate": 0.00023026467203682394, "loss": 0.2654, "step": 307580 }, { "epoch": 88.48964326812428, "grad_norm": 1.2646156549453735, "learning_rate": 0.0002302071346375144, "loss": 0.3117, "step": 307590 }, { "epoch": 88.49252013808976, "grad_norm": 1.0137888193130493, "learning_rate": 0.00023014959723820482, "loss": 0.2112, "step": 307600 }, { "epoch": 88.49539700805524, "grad_norm": 1.0240870714187622, "learning_rate": 0.00023009205983889528, "loss": 0.3166, "step": 307610 }, { "epoch": 88.49827387802071, "grad_norm": 0.7564100623130798, "learning_rate": 0.00023003452243958573, "loss": 0.2536, "step": 307620 }, { "epoch": 88.50115074798619, "grad_norm": 1.5174055099487305, "learning_rate": 0.00022997698504027616, "loss": 0.3694, "step": 307630 }, { "epoch": 88.50402761795166, "grad_norm": 1.4294819831848145, "learning_rate": 0.00022991944764096664, "loss": 0.3293, "step": 307640 }, { "epoch": 88.50690448791714, "grad_norm": 0.9219249486923218, "learning_rate": 0.0002298619102416571, "loss": 0.2573, "step": 307650 }, { "epoch": 88.50978135788263, "grad_norm": 0.824951708316803, "learning_rate": 0.00022980437284234753, "loss": 0.3233, "step": 307660 }, { "epoch": 88.5126582278481, "grad_norm": 0.78140789270401, "learning_rate": 0.00022974683544303798, "loss": 0.2748, "step": 307670 }, { "epoch": 88.51553509781358, "grad_norm": 1.4662588834762573, "learning_rate": 0.00022968929804372844, "loss": 0.3062, "step": 307680 }, { "epoch": 88.51841196777906, "grad_norm": 1.1833364963531494, "learning_rate": 0.00022963176064441887, "loss": 0.277, "step": 307690 }, { "epoch": 88.52128883774454, "grad_norm": 1.4035191535949707, "learning_rate": 0.00022957422324510932, "loss": 0.2705, "step": 307700 }, { "epoch": 88.52416570771001, "grad_norm": 1.4407072067260742, "learning_rate": 0.00022951668584579978, "loss": 0.3039, "step": 307710 }, { "epoch": 88.52704257767549, "grad_norm": 1.0736271142959595, "learning_rate": 0.0002294591484464902, "loss": 0.2707, "step": 307720 }, { "epoch": 88.52991944764096, "grad_norm": 0.894149124622345, "learning_rate": 0.0002294016110471807, "loss": 0.3328, "step": 307730 }, { "epoch": 88.53279631760644, "grad_norm": 2.0831024646759033, "learning_rate": 0.00022934407364787115, "loss": 0.3254, "step": 307740 }, { "epoch": 88.53567318757192, "grad_norm": 2.481250762939453, "learning_rate": 0.00022928653624856157, "loss": 0.3368, "step": 307750 }, { "epoch": 88.53855005753739, "grad_norm": 1.535973072052002, "learning_rate": 0.00022922899884925203, "loss": 0.3242, "step": 307760 }, { "epoch": 88.54142692750288, "grad_norm": 1.211655616760254, "learning_rate": 0.00022917146144994246, "loss": 0.3038, "step": 307770 }, { "epoch": 88.54430379746836, "grad_norm": 1.4992649555206299, "learning_rate": 0.0002291139240506329, "loss": 0.2937, "step": 307780 }, { "epoch": 88.54718066743384, "grad_norm": 0.9342204332351685, "learning_rate": 0.00022905638665132337, "loss": 0.2927, "step": 307790 }, { "epoch": 88.55005753739931, "grad_norm": 2.781973361968994, "learning_rate": 0.0002289988492520138, "loss": 0.3058, "step": 307800 }, { "epoch": 88.55293440736479, "grad_norm": 1.1556013822555542, "learning_rate": 0.00022894131185270425, "loss": 0.3047, "step": 307810 }, { "epoch": 88.55581127733026, "grad_norm": 1.3364509344100952, "learning_rate": 0.0002288837744533947, "loss": 0.3812, "step": 307820 }, { "epoch": 88.55868814729574, "grad_norm": 1.9662941694259644, "learning_rate": 0.00022882623705408516, "loss": 0.3166, "step": 307830 }, { "epoch": 88.56156501726122, "grad_norm": 1.9219177961349487, "learning_rate": 0.00022876869965477562, "loss": 0.3344, "step": 307840 }, { "epoch": 88.56444188722669, "grad_norm": 1.005293846130371, "learning_rate": 0.00022871116225546607, "loss": 0.2561, "step": 307850 }, { "epoch": 88.56731875719217, "grad_norm": 1.196280837059021, "learning_rate": 0.0002286536248561565, "loss": 0.286, "step": 307860 }, { "epoch": 88.57019562715766, "grad_norm": 1.6659600734710693, "learning_rate": 0.00022859608745684696, "loss": 0.372, "step": 307870 }, { "epoch": 88.57307249712314, "grad_norm": 2.346773862838745, "learning_rate": 0.00022853855005753741, "loss": 0.3626, "step": 307880 }, { "epoch": 88.57594936708861, "grad_norm": 0.9980812072753906, "learning_rate": 0.00022848101265822784, "loss": 0.2906, "step": 307890 }, { "epoch": 88.57882623705409, "grad_norm": 2.021054744720459, "learning_rate": 0.0002284234752589183, "loss": 0.4122, "step": 307900 }, { "epoch": 88.58170310701956, "grad_norm": 2.1079111099243164, "learning_rate": 0.00022836593785960875, "loss": 0.3334, "step": 307910 }, { "epoch": 88.58457997698504, "grad_norm": 1.0644606351852417, "learning_rate": 0.00022830840046029918, "loss": 0.273, "step": 307920 }, { "epoch": 88.58745684695052, "grad_norm": 1.0606050491333008, "learning_rate": 0.00022825086306098966, "loss": 0.3339, "step": 307930 }, { "epoch": 88.59033371691599, "grad_norm": 1.9717662334442139, "learning_rate": 0.00022819332566168012, "loss": 0.3422, "step": 307940 }, { "epoch": 88.59321058688147, "grad_norm": 1.1747654676437378, "learning_rate": 0.00022813578826237055, "loss": 0.2931, "step": 307950 }, { "epoch": 88.59608745684694, "grad_norm": 0.9418190717697144, "learning_rate": 0.000228078250863061, "loss": 0.3644, "step": 307960 }, { "epoch": 88.59896432681242, "grad_norm": 0.6096141338348389, "learning_rate": 0.00022802071346375143, "loss": 0.2426, "step": 307970 }, { "epoch": 88.60184119677791, "grad_norm": 1.934857964515686, "learning_rate": 0.0002279631760644419, "loss": 0.2548, "step": 307980 }, { "epoch": 88.60471806674339, "grad_norm": 1.4204293489456177, "learning_rate": 0.00022790563866513234, "loss": 0.2943, "step": 307990 }, { "epoch": 88.60759493670886, "grad_norm": 0.8252513408660889, "learning_rate": 0.00022784810126582277, "loss": 0.3082, "step": 308000 }, { "epoch": 88.61047180667434, "grad_norm": 1.2083784341812134, "learning_rate": 0.00022779056386651323, "loss": 0.2822, "step": 308010 }, { "epoch": 88.61334867663982, "grad_norm": 1.4856714010238647, "learning_rate": 0.00022773302646720368, "loss": 0.3778, "step": 308020 }, { "epoch": 88.61622554660529, "grad_norm": 0.7178937792778015, "learning_rate": 0.00022767548906789414, "loss": 0.2681, "step": 308030 }, { "epoch": 88.61910241657077, "grad_norm": 1.5886237621307373, "learning_rate": 0.0002276179516685846, "loss": 0.4193, "step": 308040 }, { "epoch": 88.62197928653625, "grad_norm": 1.7237279415130615, "learning_rate": 0.00022756041426927505, "loss": 0.3273, "step": 308050 }, { "epoch": 88.62485615650172, "grad_norm": 0.9636284112930298, "learning_rate": 0.00022750287686996548, "loss": 0.2937, "step": 308060 }, { "epoch": 88.6277330264672, "grad_norm": 1.5171388387680054, "learning_rate": 0.00022744533947065593, "loss": 0.3446, "step": 308070 }, { "epoch": 88.63060989643269, "grad_norm": 0.929562509059906, "learning_rate": 0.0002273878020713464, "loss": 0.282, "step": 308080 }, { "epoch": 88.63348676639816, "grad_norm": 1.240323543548584, "learning_rate": 0.00022733026467203682, "loss": 0.2542, "step": 308090 }, { "epoch": 88.63636363636364, "grad_norm": 0.8144460916519165, "learning_rate": 0.00022727272727272727, "loss": 0.3115, "step": 308100 }, { "epoch": 88.63924050632912, "grad_norm": 1.537740707397461, "learning_rate": 0.00022721518987341773, "loss": 0.2556, "step": 308110 }, { "epoch": 88.64211737629459, "grad_norm": 1.8019384145736694, "learning_rate": 0.00022715765247410816, "loss": 0.2989, "step": 308120 }, { "epoch": 88.64499424626007, "grad_norm": 1.031335711479187, "learning_rate": 0.00022710011507479864, "loss": 0.3112, "step": 308130 }, { "epoch": 88.64787111622555, "grad_norm": 1.2472200393676758, "learning_rate": 0.0002270425776754891, "loss": 0.276, "step": 308140 }, { "epoch": 88.65074798619102, "grad_norm": 1.703115463256836, "learning_rate": 0.00022698504027617952, "loss": 0.2744, "step": 308150 }, { "epoch": 88.6536248561565, "grad_norm": 0.9641527533531189, "learning_rate": 0.00022692750287686998, "loss": 0.3159, "step": 308160 }, { "epoch": 88.65650172612197, "grad_norm": 1.4228519201278687, "learning_rate": 0.0002268699654775604, "loss": 0.3124, "step": 308170 }, { "epoch": 88.65937859608745, "grad_norm": 0.9217642545700073, "learning_rate": 0.00022681242807825086, "loss": 0.3479, "step": 308180 }, { "epoch": 88.66225546605294, "grad_norm": 1.2578754425048828, "learning_rate": 0.00022675489067894132, "loss": 0.243, "step": 308190 }, { "epoch": 88.66513233601842, "grad_norm": 0.6583155393600464, "learning_rate": 0.00022669735327963175, "loss": 0.2951, "step": 308200 }, { "epoch": 88.66800920598389, "grad_norm": 1.2714091539382935, "learning_rate": 0.0002266398158803222, "loss": 0.288, "step": 308210 }, { "epoch": 88.67088607594937, "grad_norm": 1.3141298294067383, "learning_rate": 0.00022658227848101266, "loss": 0.21, "step": 308220 }, { "epoch": 88.67376294591485, "grad_norm": 1.7755635976791382, "learning_rate": 0.0002265247410817031, "loss": 0.2772, "step": 308230 }, { "epoch": 88.67663981588032, "grad_norm": 0.789089024066925, "learning_rate": 0.00022646720368239357, "loss": 0.2463, "step": 308240 }, { "epoch": 88.6795166858458, "grad_norm": 1.1008453369140625, "learning_rate": 0.00022640966628308402, "loss": 0.3562, "step": 308250 }, { "epoch": 88.68239355581127, "grad_norm": 3.6151821613311768, "learning_rate": 0.00022635212888377445, "loss": 0.5017, "step": 308260 }, { "epoch": 88.68527042577675, "grad_norm": 1.0571364164352417, "learning_rate": 0.0002262945914844649, "loss": 0.2671, "step": 308270 }, { "epoch": 88.68814729574223, "grad_norm": 1.030212640762329, "learning_rate": 0.00022623705408515536, "loss": 0.3163, "step": 308280 }, { "epoch": 88.69102416570772, "grad_norm": 1.308929443359375, "learning_rate": 0.0002261795166858458, "loss": 0.2703, "step": 308290 }, { "epoch": 88.69390103567319, "grad_norm": 1.8473289012908936, "learning_rate": 0.00022612197928653625, "loss": 0.3128, "step": 308300 }, { "epoch": 88.69677790563867, "grad_norm": 0.8448511958122253, "learning_rate": 0.0002260644418872267, "loss": 0.2752, "step": 308310 }, { "epoch": 88.69965477560415, "grad_norm": 0.8202069401741028, "learning_rate": 0.00022600690448791716, "loss": 0.2916, "step": 308320 }, { "epoch": 88.70253164556962, "grad_norm": 1.057111382484436, "learning_rate": 0.00022594936708860761, "loss": 0.2675, "step": 308330 }, { "epoch": 88.7054085155351, "grad_norm": 1.7304803133010864, "learning_rate": 0.00022589182968929807, "loss": 0.3068, "step": 308340 }, { "epoch": 88.70828538550057, "grad_norm": 1.7348285913467407, "learning_rate": 0.0002258342922899885, "loss": 0.3416, "step": 308350 }, { "epoch": 88.71116225546605, "grad_norm": 1.1898829936981201, "learning_rate": 0.00022577675489067895, "loss": 0.3669, "step": 308360 }, { "epoch": 88.71403912543153, "grad_norm": 2.1780545711517334, "learning_rate": 0.00022571921749136938, "loss": 0.3428, "step": 308370 }, { "epoch": 88.716915995397, "grad_norm": 2.1589324474334717, "learning_rate": 0.00022566168009205984, "loss": 0.2925, "step": 308380 }, { "epoch": 88.71979286536248, "grad_norm": 1.284557819366455, "learning_rate": 0.0002256041426927503, "loss": 0.3303, "step": 308390 }, { "epoch": 88.72266973532797, "grad_norm": 0.737330436706543, "learning_rate": 0.00022554660529344072, "loss": 0.2691, "step": 308400 }, { "epoch": 88.72554660529345, "grad_norm": 0.6144204139709473, "learning_rate": 0.00022548906789413118, "loss": 0.238, "step": 308410 }, { "epoch": 88.72842347525892, "grad_norm": 1.040481686592102, "learning_rate": 0.00022543153049482166, "loss": 0.3255, "step": 308420 }, { "epoch": 88.7313003452244, "grad_norm": 2.094055414199829, "learning_rate": 0.0002253739930955121, "loss": 0.2954, "step": 308430 }, { "epoch": 88.73417721518987, "grad_norm": 1.2615715265274048, "learning_rate": 0.00022531645569620254, "loss": 0.2724, "step": 308440 }, { "epoch": 88.73705408515535, "grad_norm": 0.8853235840797424, "learning_rate": 0.000225258918296893, "loss": 0.3411, "step": 308450 }, { "epoch": 88.73993095512083, "grad_norm": 1.6660219430923462, "learning_rate": 0.00022520138089758343, "loss": 0.293, "step": 308460 }, { "epoch": 88.7428078250863, "grad_norm": 1.6258978843688965, "learning_rate": 0.00022514384349827388, "loss": 0.3178, "step": 308470 }, { "epoch": 88.74568469505178, "grad_norm": 0.654305100440979, "learning_rate": 0.00022508630609896434, "loss": 0.3216, "step": 308480 }, { "epoch": 88.74856156501725, "grad_norm": 1.440151333808899, "learning_rate": 0.00022502876869965477, "loss": 0.3545, "step": 308490 }, { "epoch": 88.75143843498275, "grad_norm": 1.3689639568328857, "learning_rate": 0.00022497123130034522, "loss": 0.2252, "step": 308500 }, { "epoch": 88.75431530494822, "grad_norm": 1.367724895477295, "learning_rate": 0.00022491369390103568, "loss": 0.2668, "step": 308510 }, { "epoch": 88.7571921749137, "grad_norm": 2.2087361812591553, "learning_rate": 0.00022485615650172613, "loss": 0.337, "step": 308520 }, { "epoch": 88.76006904487917, "grad_norm": 0.5851683020591736, "learning_rate": 0.0002247986191024166, "loss": 0.2623, "step": 308530 }, { "epoch": 88.76294591484465, "grad_norm": 1.600772500038147, "learning_rate": 0.00022474108170310704, "loss": 0.2665, "step": 308540 }, { "epoch": 88.76582278481013, "grad_norm": 2.171299457550049, "learning_rate": 0.00022468354430379747, "loss": 0.3164, "step": 308550 }, { "epoch": 88.7686996547756, "grad_norm": 1.3219332695007324, "learning_rate": 0.00022462600690448793, "loss": 0.2856, "step": 308560 }, { "epoch": 88.77157652474108, "grad_norm": 1.1873501539230347, "learning_rate": 0.00022456846950517836, "loss": 0.2979, "step": 308570 }, { "epoch": 88.77445339470655, "grad_norm": 0.9463462233543396, "learning_rate": 0.0002245109321058688, "loss": 0.2805, "step": 308580 }, { "epoch": 88.77733026467203, "grad_norm": 1.55326509475708, "learning_rate": 0.00022445339470655927, "loss": 0.333, "step": 308590 }, { "epoch": 88.78020713463752, "grad_norm": 1.3374375104904175, "learning_rate": 0.0002243958573072497, "loss": 0.3497, "step": 308600 }, { "epoch": 88.783084004603, "grad_norm": 0.7670477032661438, "learning_rate": 0.00022433831990794015, "loss": 0.307, "step": 308610 }, { "epoch": 88.78596087456847, "grad_norm": 1.2687921524047852, "learning_rate": 0.00022428078250863063, "loss": 0.2522, "step": 308620 }, { "epoch": 88.78883774453395, "grad_norm": 0.9154034852981567, "learning_rate": 0.00022422324510932106, "loss": 0.2617, "step": 308630 }, { "epoch": 88.79171461449943, "grad_norm": 1.2050920724868774, "learning_rate": 0.00022416570771001152, "loss": 0.3489, "step": 308640 }, { "epoch": 88.7945914844649, "grad_norm": 1.315463662147522, "learning_rate": 0.00022410817031070197, "loss": 0.3529, "step": 308650 }, { "epoch": 88.79746835443038, "grad_norm": 1.807301640510559, "learning_rate": 0.0002240506329113924, "loss": 0.3476, "step": 308660 }, { "epoch": 88.80034522439585, "grad_norm": 0.8999072313308716, "learning_rate": 0.00022399309551208286, "loss": 0.2843, "step": 308670 }, { "epoch": 88.80322209436133, "grad_norm": 0.9652876853942871, "learning_rate": 0.0002239355581127733, "loss": 0.3001, "step": 308680 }, { "epoch": 88.80609896432681, "grad_norm": 1.710433006286621, "learning_rate": 0.00022387802071346374, "loss": 0.2667, "step": 308690 }, { "epoch": 88.80897583429228, "grad_norm": 0.8839274644851685, "learning_rate": 0.0002238204833141542, "loss": 0.334, "step": 308700 }, { "epoch": 88.81185270425777, "grad_norm": 0.828263521194458, "learning_rate": 0.00022376294591484465, "loss": 0.3946, "step": 308710 }, { "epoch": 88.81472957422325, "grad_norm": 2.3791189193725586, "learning_rate": 0.0002237054085155351, "loss": 0.3621, "step": 308720 }, { "epoch": 88.81760644418873, "grad_norm": 1.238019585609436, "learning_rate": 0.00022364787111622556, "loss": 0.3061, "step": 308730 }, { "epoch": 88.8204833141542, "grad_norm": 1.3741726875305176, "learning_rate": 0.00022359033371691602, "loss": 0.2321, "step": 308740 }, { "epoch": 88.82336018411968, "grad_norm": 1.0759713649749756, "learning_rate": 0.00022353279631760645, "loss": 0.2941, "step": 308750 }, { "epoch": 88.82623705408515, "grad_norm": 1.2701283693313599, "learning_rate": 0.0002234752589182969, "loss": 0.2664, "step": 308760 }, { "epoch": 88.82911392405063, "grad_norm": 1.5197083950042725, "learning_rate": 0.00022341772151898733, "loss": 0.3292, "step": 308770 }, { "epoch": 88.83199079401611, "grad_norm": 1.1326336860656738, "learning_rate": 0.00022336018411967779, "loss": 0.2774, "step": 308780 }, { "epoch": 88.83486766398158, "grad_norm": 1.316017746925354, "learning_rate": 0.00022330264672036824, "loss": 0.2892, "step": 308790 }, { "epoch": 88.83774453394706, "grad_norm": 1.5863890647888184, "learning_rate": 0.00022324510932105867, "loss": 0.3813, "step": 308800 }, { "epoch": 88.84062140391255, "grad_norm": 1.1956932544708252, "learning_rate": 0.00022318757192174915, "loss": 0.3329, "step": 308810 }, { "epoch": 88.84349827387803, "grad_norm": 1.2755011320114136, "learning_rate": 0.0002231300345224396, "loss": 0.3139, "step": 308820 }, { "epoch": 88.8463751438435, "grad_norm": 1.5149106979370117, "learning_rate": 0.00022307249712313004, "loss": 0.3391, "step": 308830 }, { "epoch": 88.84925201380898, "grad_norm": 1.4798616170883179, "learning_rate": 0.0002230149597238205, "loss": 0.3414, "step": 308840 }, { "epoch": 88.85212888377445, "grad_norm": 1.5555174350738525, "learning_rate": 0.00022295742232451095, "loss": 0.2732, "step": 308850 }, { "epoch": 88.85500575373993, "grad_norm": 0.8450655937194824, "learning_rate": 0.00022289988492520138, "loss": 0.2483, "step": 308860 }, { "epoch": 88.85788262370541, "grad_norm": 1.085364818572998, "learning_rate": 0.00022284234752589183, "loss": 0.2725, "step": 308870 }, { "epoch": 88.86075949367088, "grad_norm": 2.1107141971588135, "learning_rate": 0.0002227848101265823, "loss": 0.2713, "step": 308880 }, { "epoch": 88.86363636363636, "grad_norm": 0.730121910572052, "learning_rate": 0.00022272727272727272, "loss": 0.2071, "step": 308890 }, { "epoch": 88.86651323360184, "grad_norm": 1.5505352020263672, "learning_rate": 0.00022266973532796317, "loss": 0.4424, "step": 308900 }, { "epoch": 88.86939010356731, "grad_norm": 1.6162363290786743, "learning_rate": 0.00022261219792865365, "loss": 0.3149, "step": 308910 }, { "epoch": 88.8722669735328, "grad_norm": 2.289774179458618, "learning_rate": 0.00022255466052934408, "loss": 0.3464, "step": 308920 }, { "epoch": 88.87514384349828, "grad_norm": 1.5547161102294922, "learning_rate": 0.00022249712313003454, "loss": 0.2869, "step": 308930 }, { "epoch": 88.87802071346375, "grad_norm": 1.0024632215499878, "learning_rate": 0.000222439585730725, "loss": 0.3075, "step": 308940 }, { "epoch": 88.88089758342923, "grad_norm": 1.3939934968948364, "learning_rate": 0.00022238204833141542, "loss": 0.3238, "step": 308950 }, { "epoch": 88.88377445339471, "grad_norm": 1.5405305624008179, "learning_rate": 0.00022232451093210588, "loss": 0.3721, "step": 308960 }, { "epoch": 88.88665132336018, "grad_norm": 1.0492258071899414, "learning_rate": 0.0002222669735327963, "loss": 0.2886, "step": 308970 }, { "epoch": 88.88952819332566, "grad_norm": 1.6793354749679565, "learning_rate": 0.00022220943613348676, "loss": 0.2848, "step": 308980 }, { "epoch": 88.89240506329114, "grad_norm": 1.0678963661193848, "learning_rate": 0.00022215189873417722, "loss": 0.3161, "step": 308990 }, { "epoch": 88.89528193325661, "grad_norm": 1.581896185874939, "learning_rate": 0.00022209436133486764, "loss": 0.3294, "step": 309000 }, { "epoch": 88.89815880322209, "grad_norm": 0.8746012449264526, "learning_rate": 0.00022203682393555813, "loss": 0.2659, "step": 309010 }, { "epoch": 88.90103567318758, "grad_norm": 2.0266425609588623, "learning_rate": 0.00022197928653624858, "loss": 0.3034, "step": 309020 }, { "epoch": 88.90391254315306, "grad_norm": 1.4257110357284546, "learning_rate": 0.000221921749136939, "loss": 0.3036, "step": 309030 }, { "epoch": 88.90678941311853, "grad_norm": 1.1149744987487793, "learning_rate": 0.00022186421173762947, "loss": 0.3319, "step": 309040 }, { "epoch": 88.90966628308401, "grad_norm": 1.110644817352295, "learning_rate": 0.00022180667433831992, "loss": 0.3003, "step": 309050 }, { "epoch": 88.91254315304948, "grad_norm": 0.9887632131576538, "learning_rate": 0.00022174913693901035, "loss": 0.2613, "step": 309060 }, { "epoch": 88.91542002301496, "grad_norm": 1.3968621492385864, "learning_rate": 0.0002216915995397008, "loss": 0.2932, "step": 309070 }, { "epoch": 88.91829689298044, "grad_norm": 1.3578953742980957, "learning_rate": 0.00022163406214039126, "loss": 0.3028, "step": 309080 }, { "epoch": 88.92117376294591, "grad_norm": 1.5690172910690308, "learning_rate": 0.0002215765247410817, "loss": 0.2801, "step": 309090 }, { "epoch": 88.92405063291139, "grad_norm": 1.8680604696273804, "learning_rate": 0.00022151898734177215, "loss": 0.3223, "step": 309100 }, { "epoch": 88.92692750287686, "grad_norm": 1.0696327686309814, "learning_rate": 0.00022146144994246263, "loss": 0.3288, "step": 309110 }, { "epoch": 88.92980437284234, "grad_norm": 0.7258756756782532, "learning_rate": 0.00022140391254315306, "loss": 0.3013, "step": 309120 }, { "epoch": 88.93268124280783, "grad_norm": 1.5774258375167847, "learning_rate": 0.0002213463751438435, "loss": 0.2766, "step": 309130 }, { "epoch": 88.93555811277331, "grad_norm": 1.3442498445510864, "learning_rate": 0.00022128883774453397, "loss": 0.2996, "step": 309140 }, { "epoch": 88.93843498273878, "grad_norm": 1.5732882022857666, "learning_rate": 0.0002212313003452244, "loss": 0.2998, "step": 309150 }, { "epoch": 88.94131185270426, "grad_norm": 2.3313498497009277, "learning_rate": 0.00022117376294591485, "loss": 0.3228, "step": 309160 }, { "epoch": 88.94418872266974, "grad_norm": 0.8684033155441284, "learning_rate": 0.00022111622554660528, "loss": 0.3791, "step": 309170 }, { "epoch": 88.94706559263521, "grad_norm": 2.775054931640625, "learning_rate": 0.00022105868814729574, "loss": 0.3117, "step": 309180 }, { "epoch": 88.94994246260069, "grad_norm": 1.3319406509399414, "learning_rate": 0.0002210011507479862, "loss": 0.2691, "step": 309190 }, { "epoch": 88.95281933256616, "grad_norm": 0.8424270153045654, "learning_rate": 0.00022094361334867662, "loss": 0.316, "step": 309200 }, { "epoch": 88.95569620253164, "grad_norm": 0.6155050992965698, "learning_rate": 0.0002208860759493671, "loss": 0.3097, "step": 309210 }, { "epoch": 88.95857307249712, "grad_norm": 1.4414883852005005, "learning_rate": 0.00022082853855005756, "loss": 0.2823, "step": 309220 }, { "epoch": 88.96144994246261, "grad_norm": 1.3652797937393188, "learning_rate": 0.00022077100115074799, "loss": 0.3027, "step": 309230 }, { "epoch": 88.96432681242808, "grad_norm": 0.9718539714813232, "learning_rate": 0.00022071346375143844, "loss": 0.3321, "step": 309240 }, { "epoch": 88.96720368239356, "grad_norm": 1.355629324913025, "learning_rate": 0.0002206559263521289, "loss": 0.3705, "step": 309250 }, { "epoch": 88.97008055235904, "grad_norm": 1.3645431995391846, "learning_rate": 0.00022059838895281932, "loss": 0.2577, "step": 309260 }, { "epoch": 88.97295742232451, "grad_norm": 1.0329760313034058, "learning_rate": 0.00022054085155350978, "loss": 0.3299, "step": 309270 }, { "epoch": 88.97583429228999, "grad_norm": 0.7567709684371948, "learning_rate": 0.00022048331415420024, "loss": 0.3509, "step": 309280 }, { "epoch": 88.97871116225546, "grad_norm": 1.1652565002441406, "learning_rate": 0.00022042577675489066, "loss": 0.2736, "step": 309290 }, { "epoch": 88.98158803222094, "grad_norm": 1.2605286836624146, "learning_rate": 0.00022036823935558115, "loss": 0.3637, "step": 309300 }, { "epoch": 88.98446490218642, "grad_norm": 1.349786400794983, "learning_rate": 0.0002203107019562716, "loss": 0.2812, "step": 309310 }, { "epoch": 88.9873417721519, "grad_norm": 1.7009743452072144, "learning_rate": 0.00022025316455696203, "loss": 0.3412, "step": 309320 }, { "epoch": 88.99021864211737, "grad_norm": 1.3667876720428467, "learning_rate": 0.00022019562715765249, "loss": 0.233, "step": 309330 }, { "epoch": 88.99309551208286, "grad_norm": 1.2650965452194214, "learning_rate": 0.00022013808975834294, "loss": 0.2886, "step": 309340 }, { "epoch": 88.99597238204834, "grad_norm": 1.1149451732635498, "learning_rate": 0.00022008055235903337, "loss": 0.2725, "step": 309350 }, { "epoch": 88.99884925201381, "grad_norm": 0.6977365612983704, "learning_rate": 0.00022002301495972383, "loss": 0.3023, "step": 309360 }, { "epoch": 89.00172612197929, "grad_norm": 0.6466597318649292, "learning_rate": 0.00021996547756041428, "loss": 0.272, "step": 309370 }, { "epoch": 89.00460299194476, "grad_norm": 1.0137407779693604, "learning_rate": 0.0002199079401611047, "loss": 0.2799, "step": 309380 }, { "epoch": 89.00747986191024, "grad_norm": 1.168871283531189, "learning_rate": 0.00021985040276179517, "loss": 0.3142, "step": 309390 }, { "epoch": 89.01035673187572, "grad_norm": 0.9641549587249756, "learning_rate": 0.00021979286536248562, "loss": 0.277, "step": 309400 }, { "epoch": 89.0132336018412, "grad_norm": 1.036586880683899, "learning_rate": 0.00021973532796317608, "loss": 0.2555, "step": 309410 }, { "epoch": 89.01611047180667, "grad_norm": 1.04020094871521, "learning_rate": 0.00021967779056386653, "loss": 0.2523, "step": 309420 }, { "epoch": 89.01898734177215, "grad_norm": 0.9391879439353943, "learning_rate": 0.00021962025316455696, "loss": 0.2795, "step": 309430 }, { "epoch": 89.02186421173764, "grad_norm": 0.6599713563919067, "learning_rate": 0.00021956271576524742, "loss": 0.2979, "step": 309440 }, { "epoch": 89.02474108170311, "grad_norm": 1.1505173444747925, "learning_rate": 0.00021950517836593787, "loss": 0.3359, "step": 309450 }, { "epoch": 89.02761795166859, "grad_norm": 1.3227407932281494, "learning_rate": 0.0002194476409666283, "loss": 0.2948, "step": 309460 }, { "epoch": 89.03049482163406, "grad_norm": 1.3334192037582397, "learning_rate": 0.00021939010356731875, "loss": 0.2935, "step": 309470 }, { "epoch": 89.03337169159954, "grad_norm": 1.0176587104797363, "learning_rate": 0.0002193325661680092, "loss": 0.2377, "step": 309480 }, { "epoch": 89.03624856156502, "grad_norm": 0.7251080274581909, "learning_rate": 0.00021927502876869964, "loss": 0.2553, "step": 309490 }, { "epoch": 89.0391254315305, "grad_norm": 1.2070468664169312, "learning_rate": 0.00021921749136939012, "loss": 0.2862, "step": 309500 }, { "epoch": 89.04200230149597, "grad_norm": 0.9367154836654663, "learning_rate": 0.00021915995397008058, "loss": 0.2789, "step": 309510 }, { "epoch": 89.04487917146145, "grad_norm": 1.0001963376998901, "learning_rate": 0.000219102416570771, "loss": 0.2838, "step": 309520 }, { "epoch": 89.04775604142692, "grad_norm": 0.8964278697967529, "learning_rate": 0.00021904487917146146, "loss": 0.2628, "step": 309530 }, { "epoch": 89.0506329113924, "grad_norm": 1.6766473054885864, "learning_rate": 0.00021898734177215192, "loss": 0.2722, "step": 309540 }, { "epoch": 89.05350978135789, "grad_norm": 0.8374184966087341, "learning_rate": 0.00021892980437284234, "loss": 0.2671, "step": 309550 }, { "epoch": 89.05638665132336, "grad_norm": 0.8354123830795288, "learning_rate": 0.0002188722669735328, "loss": 0.2633, "step": 309560 }, { "epoch": 89.05926352128884, "grad_norm": 1.1831622123718262, "learning_rate": 0.00021881472957422326, "loss": 0.4214, "step": 309570 }, { "epoch": 89.06214039125432, "grad_norm": 1.1759774684906006, "learning_rate": 0.00021875719217491368, "loss": 0.2659, "step": 309580 }, { "epoch": 89.0650172612198, "grad_norm": 1.9157252311706543, "learning_rate": 0.00021869965477560414, "loss": 0.2634, "step": 309590 }, { "epoch": 89.06789413118527, "grad_norm": 1.690982699394226, "learning_rate": 0.0002186421173762946, "loss": 0.3519, "step": 309600 }, { "epoch": 89.07077100115075, "grad_norm": 1.2203481197357178, "learning_rate": 0.00021858457997698505, "loss": 0.2823, "step": 309610 }, { "epoch": 89.07364787111622, "grad_norm": 0.7643173933029175, "learning_rate": 0.0002185270425776755, "loss": 0.2655, "step": 309620 }, { "epoch": 89.0765247410817, "grad_norm": 1.0738564729690552, "learning_rate": 0.00021846950517836593, "loss": 0.3167, "step": 309630 }, { "epoch": 89.07940161104717, "grad_norm": 1.4703805446624756, "learning_rate": 0.0002184119677790564, "loss": 0.3436, "step": 309640 }, { "epoch": 89.08227848101266, "grad_norm": 0.8885443210601807, "learning_rate": 0.00021835443037974685, "loss": 0.2881, "step": 309650 }, { "epoch": 89.08515535097814, "grad_norm": 1.8710758686065674, "learning_rate": 0.00021829689298043727, "loss": 0.3006, "step": 309660 }, { "epoch": 89.08803222094362, "grad_norm": 0.9140520691871643, "learning_rate": 0.00021823935558112773, "loss": 0.3426, "step": 309670 }, { "epoch": 89.0909090909091, "grad_norm": 0.6837471127510071, "learning_rate": 0.00021818181818181818, "loss": 0.2577, "step": 309680 }, { "epoch": 89.09378596087457, "grad_norm": 1.402329921722412, "learning_rate": 0.0002181242807825086, "loss": 0.2511, "step": 309690 }, { "epoch": 89.09666283084005, "grad_norm": 1.4142526388168335, "learning_rate": 0.0002180667433831991, "loss": 0.2972, "step": 309700 }, { "epoch": 89.09953970080552, "grad_norm": 1.4420298337936401, "learning_rate": 0.00021800920598388955, "loss": 0.2966, "step": 309710 }, { "epoch": 89.102416570771, "grad_norm": 1.557573676109314, "learning_rate": 0.00021795166858457998, "loss": 0.2704, "step": 309720 }, { "epoch": 89.10529344073647, "grad_norm": 0.7314257621765137, "learning_rate": 0.00021789413118527044, "loss": 0.2893, "step": 309730 }, { "epoch": 89.10817031070195, "grad_norm": 0.9353801608085632, "learning_rate": 0.0002178365937859609, "loss": 0.3157, "step": 309740 }, { "epoch": 89.11104718066743, "grad_norm": 1.1703733205795288, "learning_rate": 0.00021777905638665132, "loss": 0.2551, "step": 309750 }, { "epoch": 89.11392405063292, "grad_norm": 1.0556137561798096, "learning_rate": 0.00021772151898734177, "loss": 0.4035, "step": 309760 }, { "epoch": 89.1168009205984, "grad_norm": 0.7390289306640625, "learning_rate": 0.00021766398158803223, "loss": 0.2458, "step": 309770 }, { "epoch": 89.11967779056387, "grad_norm": 1.3300385475158691, "learning_rate": 0.00021760644418872266, "loss": 0.2755, "step": 309780 }, { "epoch": 89.12255466052935, "grad_norm": 1.0004613399505615, "learning_rate": 0.00021754890678941314, "loss": 0.3064, "step": 309790 }, { "epoch": 89.12543153049482, "grad_norm": 0.8969210386276245, "learning_rate": 0.00021749136939010357, "loss": 0.2891, "step": 309800 }, { "epoch": 89.1283084004603, "grad_norm": 1.2889035940170288, "learning_rate": 0.00021743383199079403, "loss": 0.3185, "step": 309810 }, { "epoch": 89.13118527042577, "grad_norm": 1.2375826835632324, "learning_rate": 0.00021737629459148448, "loss": 0.2807, "step": 309820 }, { "epoch": 89.13406214039125, "grad_norm": 1.231756567955017, "learning_rate": 0.0002173187571921749, "loss": 0.2638, "step": 309830 }, { "epoch": 89.13693901035673, "grad_norm": 1.1915112733840942, "learning_rate": 0.00021726121979286536, "loss": 0.2884, "step": 309840 }, { "epoch": 89.1398158803222, "grad_norm": 0.9691506624221802, "learning_rate": 0.00021720368239355582, "loss": 0.3036, "step": 309850 }, { "epoch": 89.1426927502877, "grad_norm": 1.2996829748153687, "learning_rate": 0.00021714614499424625, "loss": 0.2783, "step": 309860 }, { "epoch": 89.14556962025317, "grad_norm": 1.2600516080856323, "learning_rate": 0.0002170886075949367, "loss": 0.2996, "step": 309870 }, { "epoch": 89.14844649021865, "grad_norm": 1.018758773803711, "learning_rate": 0.00021703107019562716, "loss": 0.2687, "step": 309880 }, { "epoch": 89.15132336018412, "grad_norm": 0.8591777086257935, "learning_rate": 0.00021697353279631761, "loss": 0.2934, "step": 309890 }, { "epoch": 89.1542002301496, "grad_norm": 1.0096763372421265, "learning_rate": 0.00021691599539700807, "loss": 0.2628, "step": 309900 }, { "epoch": 89.15707710011507, "grad_norm": 0.8574146628379822, "learning_rate": 0.00021685845799769853, "loss": 0.3764, "step": 309910 }, { "epoch": 89.15995397008055, "grad_norm": 1.259954571723938, "learning_rate": 0.00021680092059838895, "loss": 0.3176, "step": 309920 }, { "epoch": 89.16283084004603, "grad_norm": 1.060194969177246, "learning_rate": 0.0002167433831990794, "loss": 0.288, "step": 309930 }, { "epoch": 89.1657077100115, "grad_norm": 2.1690566539764404, "learning_rate": 0.00021668584579976987, "loss": 0.3499, "step": 309940 }, { "epoch": 89.16858457997698, "grad_norm": 1.4400956630706787, "learning_rate": 0.0002166283084004603, "loss": 0.2596, "step": 309950 }, { "epoch": 89.17146144994246, "grad_norm": 0.8813738226890564, "learning_rate": 0.00021657077100115075, "loss": 0.2608, "step": 309960 }, { "epoch": 89.17433831990795, "grad_norm": 0.7230464220046997, "learning_rate": 0.0002165132336018412, "loss": 0.3252, "step": 309970 }, { "epoch": 89.17721518987342, "grad_norm": 1.4831663370132446, "learning_rate": 0.00021645569620253163, "loss": 0.2564, "step": 309980 }, { "epoch": 89.1800920598389, "grad_norm": 0.9284740686416626, "learning_rate": 0.00021639815880322212, "loss": 0.2803, "step": 309990 }, { "epoch": 89.18296892980437, "grad_norm": 1.3049476146697998, "learning_rate": 0.00021634062140391254, "loss": 0.2542, "step": 310000 }, { "epoch": 89.18584579976985, "grad_norm": 0.8465309143066406, "learning_rate": 0.000216283084004603, "loss": 0.2878, "step": 310010 }, { "epoch": 89.18872266973533, "grad_norm": 1.0323870182037354, "learning_rate": 0.00021622554660529346, "loss": 0.281, "step": 310020 }, { "epoch": 89.1915995397008, "grad_norm": 0.9562811851501465, "learning_rate": 0.00021616800920598388, "loss": 0.2591, "step": 310030 }, { "epoch": 89.19447640966628, "grad_norm": 1.027812123298645, "learning_rate": 0.00021611047180667434, "loss": 0.3135, "step": 310040 }, { "epoch": 89.19735327963176, "grad_norm": 0.7402271032333374, "learning_rate": 0.0002160529344073648, "loss": 0.3244, "step": 310050 }, { "epoch": 89.20023014959723, "grad_norm": 0.7757048606872559, "learning_rate": 0.00021599539700805522, "loss": 0.3026, "step": 310060 }, { "epoch": 89.20310701956272, "grad_norm": 0.834297776222229, "learning_rate": 0.00021593785960874568, "loss": 0.266, "step": 310070 }, { "epoch": 89.2059838895282, "grad_norm": 1.6849558353424072, "learning_rate": 0.00021588032220943613, "loss": 0.2887, "step": 310080 }, { "epoch": 89.20886075949367, "grad_norm": 2.052131175994873, "learning_rate": 0.0002158227848101266, "loss": 0.2969, "step": 310090 }, { "epoch": 89.21173762945915, "grad_norm": 1.226019263267517, "learning_rate": 0.00021576524741081704, "loss": 0.2871, "step": 310100 }, { "epoch": 89.21461449942463, "grad_norm": 1.7617281675338745, "learning_rate": 0.0002157077100115075, "loss": 0.306, "step": 310110 }, { "epoch": 89.2174913693901, "grad_norm": 1.0772783756256104, "learning_rate": 0.00021565017261219793, "loss": 0.2601, "step": 310120 }, { "epoch": 89.22036823935558, "grad_norm": 1.3874715566635132, "learning_rate": 0.00021559263521288838, "loss": 0.3224, "step": 310130 }, { "epoch": 89.22324510932106, "grad_norm": 1.1135103702545166, "learning_rate": 0.00021553509781357884, "loss": 0.2714, "step": 310140 }, { "epoch": 89.22612197928653, "grad_norm": 1.0787256956100464, "learning_rate": 0.00021547756041426927, "loss": 0.3108, "step": 310150 }, { "epoch": 89.22899884925201, "grad_norm": 0.9767640829086304, "learning_rate": 0.00021542002301495972, "loss": 0.311, "step": 310160 }, { "epoch": 89.23187571921748, "grad_norm": 0.7721216678619385, "learning_rate": 0.00021536248561565018, "loss": 0.3533, "step": 310170 }, { "epoch": 89.23475258918297, "grad_norm": 1.2667479515075684, "learning_rate": 0.0002153049482163406, "loss": 0.261, "step": 310180 }, { "epoch": 89.23762945914845, "grad_norm": 1.9717164039611816, "learning_rate": 0.0002152474108170311, "loss": 0.2661, "step": 310190 }, { "epoch": 89.24050632911393, "grad_norm": 1.2592942714691162, "learning_rate": 0.00021518987341772155, "loss": 0.3266, "step": 310200 }, { "epoch": 89.2433831990794, "grad_norm": 2.6147050857543945, "learning_rate": 0.00021513233601841197, "loss": 0.3551, "step": 310210 }, { "epoch": 89.24626006904488, "grad_norm": 1.6286778450012207, "learning_rate": 0.00021507479861910243, "loss": 0.3217, "step": 310220 }, { "epoch": 89.24913693901036, "grad_norm": 1.7590056657791138, "learning_rate": 0.00021501726121979286, "loss": 0.3199, "step": 310230 }, { "epoch": 89.25201380897583, "grad_norm": 0.959117591381073, "learning_rate": 0.00021495972382048331, "loss": 0.2675, "step": 310240 }, { "epoch": 89.25489067894131, "grad_norm": 1.4205563068389893, "learning_rate": 0.00021490218642117377, "loss": 0.2587, "step": 310250 }, { "epoch": 89.25776754890678, "grad_norm": 1.1818220615386963, "learning_rate": 0.0002148446490218642, "loss": 0.3862, "step": 310260 }, { "epoch": 89.26064441887226, "grad_norm": 1.9465162754058838, "learning_rate": 0.00021478711162255465, "loss": 0.2961, "step": 310270 }, { "epoch": 89.26352128883775, "grad_norm": 0.8191990256309509, "learning_rate": 0.0002147295742232451, "loss": 0.3354, "step": 310280 }, { "epoch": 89.26639815880323, "grad_norm": 0.9799808263778687, "learning_rate": 0.00021467203682393556, "loss": 0.2867, "step": 310290 }, { "epoch": 89.2692750287687, "grad_norm": 1.6541202068328857, "learning_rate": 0.00021461449942462602, "loss": 0.3346, "step": 310300 }, { "epoch": 89.27215189873418, "grad_norm": 1.4691288471221924, "learning_rate": 0.00021455696202531647, "loss": 0.229, "step": 310310 }, { "epoch": 89.27502876869966, "grad_norm": 1.780312180519104, "learning_rate": 0.0002144994246260069, "loss": 0.3556, "step": 310320 }, { "epoch": 89.27790563866513, "grad_norm": 0.932460606098175, "learning_rate": 0.00021444188722669736, "loss": 0.2763, "step": 310330 }, { "epoch": 89.28078250863061, "grad_norm": 1.603192687034607, "learning_rate": 0.00021438434982738781, "loss": 0.302, "step": 310340 }, { "epoch": 89.28365937859608, "grad_norm": 1.4704586267471313, "learning_rate": 0.00021432681242807824, "loss": 0.2906, "step": 310350 }, { "epoch": 89.28653624856156, "grad_norm": 1.1154844760894775, "learning_rate": 0.0002142692750287687, "loss": 0.2872, "step": 310360 }, { "epoch": 89.28941311852704, "grad_norm": 1.0568056106567383, "learning_rate": 0.00021421173762945915, "loss": 0.275, "step": 310370 }, { "epoch": 89.29228998849253, "grad_norm": 1.0994549989700317, "learning_rate": 0.0002141542002301496, "loss": 0.2615, "step": 310380 }, { "epoch": 89.295166858458, "grad_norm": 2.227703809738159, "learning_rate": 0.00021409666283084006, "loss": 0.2949, "step": 310390 }, { "epoch": 89.29804372842348, "grad_norm": 2.218331813812256, "learning_rate": 0.00021403912543153052, "loss": 0.4662, "step": 310400 }, { "epoch": 89.30092059838896, "grad_norm": 1.1601895093917847, "learning_rate": 0.00021398158803222095, "loss": 0.3002, "step": 310410 }, { "epoch": 89.30379746835443, "grad_norm": 0.9630935192108154, "learning_rate": 0.0002139240506329114, "loss": 0.2503, "step": 310420 }, { "epoch": 89.30667433831991, "grad_norm": 1.885495901107788, "learning_rate": 0.00021386651323360183, "loss": 0.2868, "step": 310430 }, { "epoch": 89.30955120828538, "grad_norm": 1.142744541168213, "learning_rate": 0.0002138089758342923, "loss": 0.2995, "step": 310440 }, { "epoch": 89.31242807825086, "grad_norm": 0.975471556186676, "learning_rate": 0.00021375143843498274, "loss": 0.3116, "step": 310450 }, { "epoch": 89.31530494821634, "grad_norm": 1.21129310131073, "learning_rate": 0.00021369390103567317, "loss": 0.2655, "step": 310460 }, { "epoch": 89.31818181818181, "grad_norm": 1.471705436706543, "learning_rate": 0.00021363636363636363, "loss": 0.2829, "step": 310470 }, { "epoch": 89.32105868814729, "grad_norm": 1.1268532276153564, "learning_rate": 0.0002135788262370541, "loss": 0.2554, "step": 310480 }, { "epoch": 89.32393555811278, "grad_norm": 1.47620689868927, "learning_rate": 0.00021352128883774454, "loss": 0.2619, "step": 310490 }, { "epoch": 89.32681242807826, "grad_norm": 0.8188948035240173, "learning_rate": 0.000213463751438435, "loss": 0.3115, "step": 310500 }, { "epoch": 89.32968929804373, "grad_norm": 1.1524882316589355, "learning_rate": 0.00021340621403912545, "loss": 0.293, "step": 310510 }, { "epoch": 89.33256616800921, "grad_norm": 2.4449515342712402, "learning_rate": 0.00021334867663981588, "loss": 0.325, "step": 310520 }, { "epoch": 89.33544303797468, "grad_norm": 1.3368639945983887, "learning_rate": 0.00021329113924050633, "loss": 0.2705, "step": 310530 }, { "epoch": 89.33831990794016, "grad_norm": 1.681104302406311, "learning_rate": 0.0002132336018411968, "loss": 0.3382, "step": 310540 }, { "epoch": 89.34119677790564, "grad_norm": 0.8297922611236572, "learning_rate": 0.00021317606444188722, "loss": 0.3052, "step": 310550 }, { "epoch": 89.34407364787111, "grad_norm": 1.1239628791809082, "learning_rate": 0.00021311852704257767, "loss": 0.3547, "step": 310560 }, { "epoch": 89.34695051783659, "grad_norm": 1.5127499103546143, "learning_rate": 0.00021306098964326813, "loss": 0.3304, "step": 310570 }, { "epoch": 89.34982738780207, "grad_norm": 1.0039358139038086, "learning_rate": 0.00021300345224395858, "loss": 0.2922, "step": 310580 }, { "epoch": 89.35270425776756, "grad_norm": 1.8877627849578857, "learning_rate": 0.00021294591484464904, "loss": 0.2652, "step": 310590 }, { "epoch": 89.35558112773303, "grad_norm": 2.64131236076355, "learning_rate": 0.0002128883774453395, "loss": 0.4163, "step": 310600 }, { "epoch": 89.35845799769851, "grad_norm": 1.0343223810195923, "learning_rate": 0.00021283084004602992, "loss": 0.3112, "step": 310610 }, { "epoch": 89.36133486766398, "grad_norm": 0.6445164680480957, "learning_rate": 0.00021277330264672038, "loss": 0.2597, "step": 310620 }, { "epoch": 89.36421173762946, "grad_norm": 0.9821062684059143, "learning_rate": 0.0002127157652474108, "loss": 0.2755, "step": 310630 }, { "epoch": 89.36708860759494, "grad_norm": 1.009417176246643, "learning_rate": 0.00021265822784810126, "loss": 0.3606, "step": 310640 }, { "epoch": 89.36996547756041, "grad_norm": 0.9868456721305847, "learning_rate": 0.00021260069044879172, "loss": 0.2702, "step": 310650 }, { "epoch": 89.37284234752589, "grad_norm": 0.929692268371582, "learning_rate": 0.00021254315304948215, "loss": 0.2736, "step": 310660 }, { "epoch": 89.37571921749137, "grad_norm": 1.3288589715957642, "learning_rate": 0.0002124856156501726, "loss": 0.3438, "step": 310670 }, { "epoch": 89.37859608745684, "grad_norm": 1.1227362155914307, "learning_rate": 0.00021242807825086308, "loss": 0.2649, "step": 310680 }, { "epoch": 89.38147295742232, "grad_norm": 1.8934911489486694, "learning_rate": 0.0002123705408515535, "loss": 0.2697, "step": 310690 }, { "epoch": 89.38434982738781, "grad_norm": 0.954340934753418, "learning_rate": 0.00021231300345224397, "loss": 0.2558, "step": 310700 }, { "epoch": 89.38722669735328, "grad_norm": 1.2046115398406982, "learning_rate": 0.00021225546605293442, "loss": 0.3404, "step": 310710 }, { "epoch": 89.39010356731876, "grad_norm": 0.8706210255622864, "learning_rate": 0.00021219792865362485, "loss": 0.2952, "step": 310720 }, { "epoch": 89.39298043728424, "grad_norm": 0.7470324039459229, "learning_rate": 0.0002121403912543153, "loss": 0.3075, "step": 310730 }, { "epoch": 89.39585730724971, "grad_norm": 2.244882583618164, "learning_rate": 0.00021208285385500576, "loss": 0.323, "step": 310740 }, { "epoch": 89.39873417721519, "grad_norm": 0.7636555433273315, "learning_rate": 0.0002120253164556962, "loss": 0.2396, "step": 310750 }, { "epoch": 89.40161104718067, "grad_norm": 1.307127833366394, "learning_rate": 0.00021196777905638665, "loss": 0.2646, "step": 310760 }, { "epoch": 89.40448791714614, "grad_norm": 1.3283724784851074, "learning_rate": 0.0002119102416570771, "loss": 0.3013, "step": 310770 }, { "epoch": 89.40736478711162, "grad_norm": 1.1871390342712402, "learning_rate": 0.00021185270425776756, "loss": 0.2792, "step": 310780 }, { "epoch": 89.4102416570771, "grad_norm": 0.8683919906616211, "learning_rate": 0.00021179516685845801, "loss": 0.2804, "step": 310790 }, { "epoch": 89.41311852704258, "grad_norm": 1.4686652421951294, "learning_rate": 0.00021173762945914847, "loss": 0.2727, "step": 310800 }, { "epoch": 89.41599539700806, "grad_norm": 0.7143228650093079, "learning_rate": 0.0002116800920598389, "loss": 0.2525, "step": 310810 }, { "epoch": 89.41887226697354, "grad_norm": 1.07561194896698, "learning_rate": 0.00021162255466052935, "loss": 0.2909, "step": 310820 }, { "epoch": 89.42174913693901, "grad_norm": 1.7038599252700806, "learning_rate": 0.00021156501726121978, "loss": 0.2884, "step": 310830 }, { "epoch": 89.42462600690449, "grad_norm": 1.0451222658157349, "learning_rate": 0.00021150747986191024, "loss": 0.2698, "step": 310840 }, { "epoch": 89.42750287686997, "grad_norm": 1.3765027523040771, "learning_rate": 0.0002114499424626007, "loss": 0.3294, "step": 310850 }, { "epoch": 89.43037974683544, "grad_norm": 1.1969233751296997, "learning_rate": 0.00021139240506329112, "loss": 0.3124, "step": 310860 }, { "epoch": 89.43325661680092, "grad_norm": 1.2731666564941406, "learning_rate": 0.0002113348676639816, "loss": 0.3596, "step": 310870 }, { "epoch": 89.4361334867664, "grad_norm": 1.2127277851104736, "learning_rate": 0.00021127733026467206, "loss": 0.372, "step": 310880 }, { "epoch": 89.43901035673187, "grad_norm": 0.9762656688690186, "learning_rate": 0.0002112197928653625, "loss": 0.2666, "step": 310890 }, { "epoch": 89.44188722669735, "grad_norm": 1.2624822854995728, "learning_rate": 0.00021116225546605294, "loss": 0.3468, "step": 310900 }, { "epoch": 89.44476409666284, "grad_norm": 0.663508951663971, "learning_rate": 0.0002111047180667434, "loss": 0.3304, "step": 310910 }, { "epoch": 89.44764096662831, "grad_norm": 1.5740240812301636, "learning_rate": 0.00021104718066743383, "loss": 0.3001, "step": 310920 }, { "epoch": 89.45051783659379, "grad_norm": 0.8830869793891907, "learning_rate": 0.00021098964326812428, "loss": 0.3346, "step": 310930 }, { "epoch": 89.45339470655927, "grad_norm": 1.4644683599472046, "learning_rate": 0.00021093210586881474, "loss": 0.2578, "step": 310940 }, { "epoch": 89.45627157652474, "grad_norm": 0.9983639121055603, "learning_rate": 0.00021087456846950517, "loss": 0.28, "step": 310950 }, { "epoch": 89.45914844649022, "grad_norm": 1.117186188697815, "learning_rate": 0.00021081703107019562, "loss": 0.3191, "step": 310960 }, { "epoch": 89.4620253164557, "grad_norm": 1.1528867483139038, "learning_rate": 0.0002107594936708861, "loss": 0.292, "step": 310970 }, { "epoch": 89.46490218642117, "grad_norm": 1.6791223287582397, "learning_rate": 0.00021070195627157653, "loss": 0.2819, "step": 310980 }, { "epoch": 89.46777905638665, "grad_norm": 1.211314082145691, "learning_rate": 0.000210644418872267, "loss": 0.29, "step": 310990 }, { "epoch": 89.47065592635212, "grad_norm": 1.0172322988510132, "learning_rate": 0.00021058688147295744, "loss": 0.3443, "step": 311000 }, { "epoch": 89.47353279631761, "grad_norm": 0.9458251595497131, "learning_rate": 0.00021052934407364787, "loss": 0.2366, "step": 311010 }, { "epoch": 89.47640966628309, "grad_norm": 1.7742464542388916, "learning_rate": 0.00021047180667433833, "loss": 0.2948, "step": 311020 }, { "epoch": 89.47928653624857, "grad_norm": 1.238965630531311, "learning_rate": 0.00021041426927502876, "loss": 0.2858, "step": 311030 }, { "epoch": 89.48216340621404, "grad_norm": 1.0240185260772705, "learning_rate": 0.0002103567318757192, "loss": 0.3118, "step": 311040 }, { "epoch": 89.48504027617952, "grad_norm": 1.2990318536758423, "learning_rate": 0.00021029919447640967, "loss": 0.2836, "step": 311050 }, { "epoch": 89.487917146145, "grad_norm": 0.9431288242340088, "learning_rate": 0.0002102416570771001, "loss": 0.2853, "step": 311060 }, { "epoch": 89.49079401611047, "grad_norm": 0.5867667198181152, "learning_rate": 0.00021018411967779058, "loss": 0.2633, "step": 311070 }, { "epoch": 89.49367088607595, "grad_norm": 0.8806234002113342, "learning_rate": 0.00021012658227848103, "loss": 0.3418, "step": 311080 }, { "epoch": 89.49654775604142, "grad_norm": 1.386979341506958, "learning_rate": 0.00021006904487917146, "loss": 0.3309, "step": 311090 }, { "epoch": 89.4994246260069, "grad_norm": 1.1343554258346558, "learning_rate": 0.00021001150747986192, "loss": 0.2728, "step": 311100 }, { "epoch": 89.50230149597238, "grad_norm": 1.7737852334976196, "learning_rate": 0.00020995397008055237, "loss": 0.3082, "step": 311110 }, { "epoch": 89.50517836593787, "grad_norm": 1.5699113607406616, "learning_rate": 0.0002098964326812428, "loss": 0.3383, "step": 311120 }, { "epoch": 89.50805523590334, "grad_norm": 0.9050644040107727, "learning_rate": 0.00020983889528193326, "loss": 0.286, "step": 311130 }, { "epoch": 89.51093210586882, "grad_norm": 1.0534179210662842, "learning_rate": 0.0002097813578826237, "loss": 0.2981, "step": 311140 }, { "epoch": 89.5138089758343, "grad_norm": 1.234847068786621, "learning_rate": 0.00020972382048331414, "loss": 0.3765, "step": 311150 }, { "epoch": 89.51668584579977, "grad_norm": 1.0972801446914673, "learning_rate": 0.0002096662830840046, "loss": 0.3101, "step": 311160 }, { "epoch": 89.51956271576525, "grad_norm": 1.4280829429626465, "learning_rate": 0.00020960874568469508, "loss": 0.2791, "step": 311170 }, { "epoch": 89.52243958573072, "grad_norm": 1.057250738143921, "learning_rate": 0.0002095512082853855, "loss": 0.3123, "step": 311180 }, { "epoch": 89.5253164556962, "grad_norm": 1.3293107748031616, "learning_rate": 0.00020949367088607596, "loss": 0.3148, "step": 311190 }, { "epoch": 89.52819332566168, "grad_norm": 1.6528657674789429, "learning_rate": 0.00020943613348676642, "loss": 0.2901, "step": 311200 }, { "epoch": 89.53107019562715, "grad_norm": 1.4734561443328857, "learning_rate": 0.00020937859608745685, "loss": 0.3007, "step": 311210 }, { "epoch": 89.53394706559264, "grad_norm": 1.1797527074813843, "learning_rate": 0.0002093210586881473, "loss": 0.3209, "step": 311220 }, { "epoch": 89.53682393555812, "grad_norm": 1.1775107383728027, "learning_rate": 0.00020926352128883773, "loss": 0.3162, "step": 311230 }, { "epoch": 89.5397008055236, "grad_norm": 2.540112018585205, "learning_rate": 0.00020920598388952819, "loss": 0.3187, "step": 311240 }, { "epoch": 89.54257767548907, "grad_norm": 1.8438187837600708, "learning_rate": 0.00020914844649021864, "loss": 0.3353, "step": 311250 }, { "epoch": 89.54545454545455, "grad_norm": 1.403821587562561, "learning_rate": 0.00020909090909090907, "loss": 0.2952, "step": 311260 }, { "epoch": 89.54833141542002, "grad_norm": 1.885557770729065, "learning_rate": 0.00020903337169159955, "loss": 0.3441, "step": 311270 }, { "epoch": 89.5512082853855, "grad_norm": 0.9419282078742981, "learning_rate": 0.00020897583429229, "loss": 0.2428, "step": 311280 }, { "epoch": 89.55408515535098, "grad_norm": 1.1302076578140259, "learning_rate": 0.00020891829689298044, "loss": 0.2861, "step": 311290 }, { "epoch": 89.55696202531645, "grad_norm": 1.0328023433685303, "learning_rate": 0.0002088607594936709, "loss": 0.2931, "step": 311300 }, { "epoch": 89.55983889528193, "grad_norm": 0.9849163889884949, "learning_rate": 0.00020880322209436135, "loss": 0.3174, "step": 311310 }, { "epoch": 89.5627157652474, "grad_norm": 2.0057945251464844, "learning_rate": 0.00020874568469505178, "loss": 0.4038, "step": 311320 }, { "epoch": 89.5655926352129, "grad_norm": 1.3680768013000488, "learning_rate": 0.00020868814729574223, "loss": 0.368, "step": 311330 }, { "epoch": 89.56846950517837, "grad_norm": 1.3153538703918457, "learning_rate": 0.0002086306098964327, "loss": 0.271, "step": 311340 }, { "epoch": 89.57134637514385, "grad_norm": 1.612912654876709, "learning_rate": 0.00020857307249712312, "loss": 0.3152, "step": 311350 }, { "epoch": 89.57422324510932, "grad_norm": 1.7484378814697266, "learning_rate": 0.0002085155350978136, "loss": 0.3317, "step": 311360 }, { "epoch": 89.5771001150748, "grad_norm": 0.9881995320320129, "learning_rate": 0.00020845799769850405, "loss": 0.3159, "step": 311370 }, { "epoch": 89.57997698504028, "grad_norm": 0.6095183491706848, "learning_rate": 0.00020840046029919448, "loss": 0.2974, "step": 311380 }, { "epoch": 89.58285385500575, "grad_norm": 0.9123234152793884, "learning_rate": 0.00020834292289988494, "loss": 0.3284, "step": 311390 }, { "epoch": 89.58573072497123, "grad_norm": 1.4259300231933594, "learning_rate": 0.0002082853855005754, "loss": 0.2917, "step": 311400 }, { "epoch": 89.5886075949367, "grad_norm": 1.2170774936676025, "learning_rate": 0.00020822784810126582, "loss": 0.2657, "step": 311410 }, { "epoch": 89.59148446490218, "grad_norm": 1.4194560050964355, "learning_rate": 0.00020817031070195628, "loss": 0.2623, "step": 311420 }, { "epoch": 89.59436133486767, "grad_norm": 1.3756976127624512, "learning_rate": 0.0002081127733026467, "loss": 0.27, "step": 311430 }, { "epoch": 89.59723820483315, "grad_norm": 1.9553327560424805, "learning_rate": 0.00020805523590333716, "loss": 0.3467, "step": 311440 }, { "epoch": 89.60011507479862, "grad_norm": 0.9867158532142639, "learning_rate": 0.00020799769850402762, "loss": 0.287, "step": 311450 }, { "epoch": 89.6029919447641, "grad_norm": 0.7219287157058716, "learning_rate": 0.00020794016110471807, "loss": 0.3033, "step": 311460 }, { "epoch": 89.60586881472958, "grad_norm": 1.1313039064407349, "learning_rate": 0.00020788262370540853, "loss": 0.3224, "step": 311470 }, { "epoch": 89.60874568469505, "grad_norm": 1.765965223312378, "learning_rate": 0.00020782508630609898, "loss": 0.2621, "step": 311480 }, { "epoch": 89.61162255466053, "grad_norm": 1.0872433185577393, "learning_rate": 0.0002077675489067894, "loss": 0.2641, "step": 311490 }, { "epoch": 89.614499424626, "grad_norm": 0.8034403324127197, "learning_rate": 0.00020771001150747987, "loss": 0.2541, "step": 311500 }, { "epoch": 89.61737629459148, "grad_norm": 0.8607601523399353, "learning_rate": 0.00020765247410817032, "loss": 0.2696, "step": 311510 }, { "epoch": 89.62025316455696, "grad_norm": 1.7898699045181274, "learning_rate": 0.00020759493670886075, "loss": 0.2946, "step": 311520 }, { "epoch": 89.62313003452243, "grad_norm": 0.9703875184059143, "learning_rate": 0.0002075373993095512, "loss": 0.2874, "step": 311530 }, { "epoch": 89.62600690448792, "grad_norm": 1.793708086013794, "learning_rate": 0.00020747986191024166, "loss": 0.3604, "step": 311540 }, { "epoch": 89.6288837744534, "grad_norm": 1.8607921600341797, "learning_rate": 0.0002074223245109321, "loss": 0.3261, "step": 311550 }, { "epoch": 89.63176064441888, "grad_norm": 0.9944487810134888, "learning_rate": 0.00020736478711162257, "loss": 0.2768, "step": 311560 }, { "epoch": 89.63463751438435, "grad_norm": 1.3648438453674316, "learning_rate": 0.00020730724971231303, "loss": 0.2579, "step": 311570 }, { "epoch": 89.63751438434983, "grad_norm": 1.816429615020752, "learning_rate": 0.00020724971231300346, "loss": 0.3766, "step": 311580 }, { "epoch": 89.6403912543153, "grad_norm": 1.0480247735977173, "learning_rate": 0.0002071921749136939, "loss": 0.2477, "step": 311590 }, { "epoch": 89.64326812428078, "grad_norm": 0.9148752689361572, "learning_rate": 0.00020713463751438437, "loss": 0.2973, "step": 311600 }, { "epoch": 89.64614499424626, "grad_norm": 1.8230546712875366, "learning_rate": 0.0002070771001150748, "loss": 0.2794, "step": 311610 }, { "epoch": 89.64902186421173, "grad_norm": 1.9511516094207764, "learning_rate": 0.00020701956271576525, "loss": 0.2734, "step": 311620 }, { "epoch": 89.65189873417721, "grad_norm": 1.1276684999465942, "learning_rate": 0.00020696202531645568, "loss": 0.2799, "step": 311630 }, { "epoch": 89.6547756041427, "grad_norm": 0.8716979622840881, "learning_rate": 0.00020690448791714614, "loss": 0.2856, "step": 311640 }, { "epoch": 89.65765247410818, "grad_norm": 1.244507908821106, "learning_rate": 0.0002068469505178366, "loss": 0.3313, "step": 311650 }, { "epoch": 89.66052934407365, "grad_norm": 1.033515214920044, "learning_rate": 0.00020678941311852705, "loss": 0.2831, "step": 311660 }, { "epoch": 89.66340621403913, "grad_norm": 0.7162938117980957, "learning_rate": 0.0002067318757192175, "loss": 0.3052, "step": 311670 }, { "epoch": 89.6662830840046, "grad_norm": 0.8181139230728149, "learning_rate": 0.00020667433831990796, "loss": 0.3321, "step": 311680 }, { "epoch": 89.66915995397008, "grad_norm": 0.7949811816215515, "learning_rate": 0.00020661680092059839, "loss": 0.2771, "step": 311690 }, { "epoch": 89.67203682393556, "grad_norm": 1.7147704362869263, "learning_rate": 0.00020655926352128884, "loss": 0.3082, "step": 311700 }, { "epoch": 89.67491369390103, "grad_norm": 1.4998520612716675, "learning_rate": 0.0002065017261219793, "loss": 0.2767, "step": 311710 }, { "epoch": 89.67779056386651, "grad_norm": 2.3336520195007324, "learning_rate": 0.00020644418872266973, "loss": 0.3558, "step": 311720 }, { "epoch": 89.68066743383199, "grad_norm": 1.6108402013778687, "learning_rate": 0.00020638665132336018, "loss": 0.3321, "step": 311730 }, { "epoch": 89.68354430379746, "grad_norm": 0.8791632652282715, "learning_rate": 0.00020632911392405064, "loss": 0.3074, "step": 311740 }, { "epoch": 89.68642117376295, "grad_norm": 0.9963803887367249, "learning_rate": 0.00020627157652474106, "loss": 0.2839, "step": 311750 }, { "epoch": 89.68929804372843, "grad_norm": 1.4986436367034912, "learning_rate": 0.00020621403912543155, "loss": 0.3332, "step": 311760 }, { "epoch": 89.6921749136939, "grad_norm": 0.8547908067703247, "learning_rate": 0.000206156501726122, "loss": 0.224, "step": 311770 }, { "epoch": 89.69505178365938, "grad_norm": 1.045823335647583, "learning_rate": 0.00020609896432681243, "loss": 0.2835, "step": 311780 }, { "epoch": 89.69792865362486, "grad_norm": 1.2680903673171997, "learning_rate": 0.0002060414269275029, "loss": 0.3096, "step": 311790 }, { "epoch": 89.70080552359033, "grad_norm": 1.6551826000213623, "learning_rate": 0.00020598388952819334, "loss": 0.3543, "step": 311800 }, { "epoch": 89.70368239355581, "grad_norm": 0.9519853591918945, "learning_rate": 0.00020592635212888377, "loss": 0.2556, "step": 311810 }, { "epoch": 89.70655926352129, "grad_norm": 0.6493368148803711, "learning_rate": 0.00020586881472957423, "loss": 0.2638, "step": 311820 }, { "epoch": 89.70943613348676, "grad_norm": 0.9884347915649414, "learning_rate": 0.00020581127733026468, "loss": 0.2644, "step": 311830 }, { "epoch": 89.71231300345224, "grad_norm": 0.9261664152145386, "learning_rate": 0.0002057537399309551, "loss": 0.3133, "step": 311840 }, { "epoch": 89.71518987341773, "grad_norm": 1.5138529539108276, "learning_rate": 0.0002056962025316456, "loss": 0.2157, "step": 311850 }, { "epoch": 89.7180667433832, "grad_norm": 1.9693208932876587, "learning_rate": 0.00020563866513233602, "loss": 0.2801, "step": 311860 }, { "epoch": 89.72094361334868, "grad_norm": 2.159337282180786, "learning_rate": 0.00020558112773302648, "loss": 0.2803, "step": 311870 }, { "epoch": 89.72382048331416, "grad_norm": 1.1737632751464844, "learning_rate": 0.00020552359033371693, "loss": 0.3416, "step": 311880 }, { "epoch": 89.72669735327963, "grad_norm": 1.6101831197738647, "learning_rate": 0.00020546605293440736, "loss": 0.2893, "step": 311890 }, { "epoch": 89.72957422324511, "grad_norm": 2.3305470943450928, "learning_rate": 0.00020540851553509782, "loss": 0.3133, "step": 311900 }, { "epoch": 89.73245109321059, "grad_norm": 0.948609471321106, "learning_rate": 0.00020535097813578827, "loss": 0.3119, "step": 311910 }, { "epoch": 89.73532796317606, "grad_norm": 1.4889065027236938, "learning_rate": 0.0002052934407364787, "loss": 0.2693, "step": 311920 }, { "epoch": 89.73820483314154, "grad_norm": 1.9419867992401123, "learning_rate": 0.00020523590333716916, "loss": 0.302, "step": 311930 }, { "epoch": 89.74108170310701, "grad_norm": 1.7992159128189087, "learning_rate": 0.0002051783659378596, "loss": 0.2299, "step": 311940 }, { "epoch": 89.74395857307249, "grad_norm": 2.189974546432495, "learning_rate": 0.00020512082853855007, "loss": 0.3583, "step": 311950 }, { "epoch": 89.74683544303798, "grad_norm": 1.3818542957305908, "learning_rate": 0.00020506329113924052, "loss": 0.3508, "step": 311960 }, { "epoch": 89.74971231300346, "grad_norm": 1.2834995985031128, "learning_rate": 0.00020500575373993098, "loss": 0.3069, "step": 311970 }, { "epoch": 89.75258918296893, "grad_norm": 1.1622817516326904, "learning_rate": 0.0002049482163406214, "loss": 0.3355, "step": 311980 }, { "epoch": 89.75546605293441, "grad_norm": 1.0758674144744873, "learning_rate": 0.00020489067894131186, "loss": 0.2412, "step": 311990 }, { "epoch": 89.75834292289989, "grad_norm": 0.6898071765899658, "learning_rate": 0.00020483314154200232, "loss": 0.2866, "step": 312000 }, { "epoch": 89.76121979286536, "grad_norm": 1.1115092039108276, "learning_rate": 0.00020477560414269274, "loss": 0.2827, "step": 312010 }, { "epoch": 89.76409666283084, "grad_norm": 1.0155478715896606, "learning_rate": 0.0002047180667433832, "loss": 0.3146, "step": 312020 }, { "epoch": 89.76697353279631, "grad_norm": 1.0089725255966187, "learning_rate": 0.00020466052934407366, "loss": 0.2993, "step": 312030 }, { "epoch": 89.76985040276179, "grad_norm": 0.7899357080459595, "learning_rate": 0.00020460299194476408, "loss": 0.288, "step": 312040 }, { "epoch": 89.77272727272727, "grad_norm": 0.9872832894325256, "learning_rate": 0.00020454545454545457, "loss": 0.269, "step": 312050 }, { "epoch": 89.77560414269276, "grad_norm": 1.2028522491455078, "learning_rate": 0.000204487917146145, "loss": 0.2677, "step": 312060 }, { "epoch": 89.77848101265823, "grad_norm": 0.9374430179595947, "learning_rate": 0.00020443037974683545, "loss": 0.2641, "step": 312070 }, { "epoch": 89.78135788262371, "grad_norm": 1.0649914741516113, "learning_rate": 0.0002043728423475259, "loss": 0.2651, "step": 312080 }, { "epoch": 89.78423475258919, "grad_norm": 1.3981187343597412, "learning_rate": 0.00020431530494821633, "loss": 0.3204, "step": 312090 }, { "epoch": 89.78711162255466, "grad_norm": 0.860757052898407, "learning_rate": 0.0002042577675489068, "loss": 0.3466, "step": 312100 }, { "epoch": 89.78998849252014, "grad_norm": 0.8279395699501038, "learning_rate": 0.00020420023014959725, "loss": 0.2955, "step": 312110 }, { "epoch": 89.79286536248561, "grad_norm": 1.99944007396698, "learning_rate": 0.00020414269275028767, "loss": 0.3411, "step": 312120 }, { "epoch": 89.79574223245109, "grad_norm": 1.6103769540786743, "learning_rate": 0.00020408515535097813, "loss": 0.3159, "step": 312130 }, { "epoch": 89.79861910241657, "grad_norm": 0.6372037529945374, "learning_rate": 0.00020402761795166859, "loss": 0.2427, "step": 312140 }, { "epoch": 89.80149597238204, "grad_norm": 0.9894046783447266, "learning_rate": 0.00020397008055235904, "loss": 0.3206, "step": 312150 }, { "epoch": 89.80437284234753, "grad_norm": 1.5255955457687378, "learning_rate": 0.0002039125431530495, "loss": 0.2906, "step": 312160 }, { "epoch": 89.80724971231301, "grad_norm": 1.7920863628387451, "learning_rate": 0.00020385500575373995, "loss": 0.3285, "step": 312170 }, { "epoch": 89.81012658227849, "grad_norm": 1.6827585697174072, "learning_rate": 0.00020379746835443038, "loss": 0.2701, "step": 312180 }, { "epoch": 89.81300345224396, "grad_norm": 0.8008901476860046, "learning_rate": 0.00020373993095512084, "loss": 0.3444, "step": 312190 }, { "epoch": 89.81588032220944, "grad_norm": 1.7041919231414795, "learning_rate": 0.0002036823935558113, "loss": 0.3353, "step": 312200 }, { "epoch": 89.81875719217491, "grad_norm": 1.008694052696228, "learning_rate": 0.00020362485615650172, "loss": 0.3215, "step": 312210 }, { "epoch": 89.82163406214039, "grad_norm": 1.3880962133407593, "learning_rate": 0.00020356731875719217, "loss": 0.2658, "step": 312220 }, { "epoch": 89.82451093210587, "grad_norm": 1.0187150239944458, "learning_rate": 0.00020350978135788263, "loss": 0.2748, "step": 312230 }, { "epoch": 89.82738780207134, "grad_norm": 1.434842586517334, "learning_rate": 0.00020345224395857306, "loss": 0.3025, "step": 312240 }, { "epoch": 89.83026467203682, "grad_norm": 0.9393511414527893, "learning_rate": 0.00020339470655926354, "loss": 0.2945, "step": 312250 }, { "epoch": 89.8331415420023, "grad_norm": 0.9611086249351501, "learning_rate": 0.00020333716915995397, "loss": 0.2719, "step": 312260 }, { "epoch": 89.83601841196779, "grad_norm": 2.2627832889556885, "learning_rate": 0.00020327963176064443, "loss": 0.3823, "step": 312270 }, { "epoch": 89.83889528193326, "grad_norm": 1.8052526712417603, "learning_rate": 0.00020322209436133488, "loss": 0.2857, "step": 312280 }, { "epoch": 89.84177215189874, "grad_norm": 0.6761360168457031, "learning_rate": 0.0002031645569620253, "loss": 0.2853, "step": 312290 }, { "epoch": 89.84464902186421, "grad_norm": 1.392427921295166, "learning_rate": 0.00020310701956271576, "loss": 0.2681, "step": 312300 }, { "epoch": 89.84752589182969, "grad_norm": 0.8705562949180603, "learning_rate": 0.00020304948216340622, "loss": 0.3264, "step": 312310 }, { "epoch": 89.85040276179517, "grad_norm": 1.4493656158447266, "learning_rate": 0.00020299194476409665, "loss": 0.3121, "step": 312320 }, { "epoch": 89.85327963176064, "grad_norm": 0.716401219367981, "learning_rate": 0.0002029344073647871, "loss": 0.2613, "step": 312330 }, { "epoch": 89.85615650172612, "grad_norm": 0.6676034927368164, "learning_rate": 0.0002028768699654776, "loss": 0.3158, "step": 312340 }, { "epoch": 89.8590333716916, "grad_norm": 1.3934657573699951, "learning_rate": 0.00020281933256616802, "loss": 0.3171, "step": 312350 }, { "epoch": 89.86191024165707, "grad_norm": 1.9741592407226562, "learning_rate": 0.00020276179516685847, "loss": 0.3761, "step": 312360 }, { "epoch": 89.86478711162256, "grad_norm": 1.1712243556976318, "learning_rate": 0.00020270425776754893, "loss": 0.2664, "step": 312370 }, { "epoch": 89.86766398158804, "grad_norm": 1.0173929929733276, "learning_rate": 0.00020264672036823935, "loss": 0.2977, "step": 312380 }, { "epoch": 89.87054085155351, "grad_norm": 1.3172262907028198, "learning_rate": 0.0002025891829689298, "loss": 0.2669, "step": 312390 }, { "epoch": 89.87341772151899, "grad_norm": 2.0858917236328125, "learning_rate": 0.00020253164556962027, "loss": 0.3032, "step": 312400 }, { "epoch": 89.87629459148447, "grad_norm": 0.8494952917098999, "learning_rate": 0.0002024741081703107, "loss": 0.3684, "step": 312410 }, { "epoch": 89.87917146144994, "grad_norm": 1.472899317741394, "learning_rate": 0.00020241657077100115, "loss": 0.279, "step": 312420 }, { "epoch": 89.88204833141542, "grad_norm": 1.0966830253601074, "learning_rate": 0.0002023590333716916, "loss": 0.3052, "step": 312430 }, { "epoch": 89.8849252013809, "grad_norm": 1.0222704410552979, "learning_rate": 0.00020230149597238206, "loss": 0.2509, "step": 312440 }, { "epoch": 89.88780207134637, "grad_norm": 0.9223868250846863, "learning_rate": 0.00020224395857307252, "loss": 0.2958, "step": 312450 }, { "epoch": 89.89067894131185, "grad_norm": 1.209226131439209, "learning_rate": 0.00020218642117376294, "loss": 0.2548, "step": 312460 }, { "epoch": 89.89355581127732, "grad_norm": 1.6911581754684448, "learning_rate": 0.0002021288837744534, "loss": 0.2599, "step": 312470 }, { "epoch": 89.89643268124281, "grad_norm": 1.6070992946624756, "learning_rate": 0.00020207134637514386, "loss": 0.2801, "step": 312480 }, { "epoch": 89.89930955120829, "grad_norm": 1.423406720161438, "learning_rate": 0.00020201380897583428, "loss": 0.3125, "step": 312490 }, { "epoch": 89.90218642117377, "grad_norm": 1.3988274335861206, "learning_rate": 0.00020195627157652474, "loss": 0.3086, "step": 312500 }, { "epoch": 89.90506329113924, "grad_norm": 1.9324653148651123, "learning_rate": 0.0002018987341772152, "loss": 0.2775, "step": 312510 }, { "epoch": 89.90794016110472, "grad_norm": 1.1612645387649536, "learning_rate": 0.00020184119677790562, "loss": 0.3503, "step": 312520 }, { "epoch": 89.9108170310702, "grad_norm": 0.6296808123588562, "learning_rate": 0.00020178365937859608, "loss": 0.2326, "step": 312530 }, { "epoch": 89.91369390103567, "grad_norm": 1.0858687162399292, "learning_rate": 0.00020172612197928656, "loss": 0.3016, "step": 312540 }, { "epoch": 89.91657077100115, "grad_norm": 0.8034352660179138, "learning_rate": 0.000201668584579977, "loss": 0.2583, "step": 312550 }, { "epoch": 89.91944764096662, "grad_norm": 1.5142964124679565, "learning_rate": 0.00020161104718066745, "loss": 0.328, "step": 312560 }, { "epoch": 89.9223245109321, "grad_norm": 0.9230496883392334, "learning_rate": 0.0002015535097813579, "loss": 0.2745, "step": 312570 }, { "epoch": 89.92520138089759, "grad_norm": 1.0591837167739868, "learning_rate": 0.00020149597238204833, "loss": 0.3145, "step": 312580 }, { "epoch": 89.92807825086307, "grad_norm": 1.558569073677063, "learning_rate": 0.00020143843498273878, "loss": 0.4052, "step": 312590 }, { "epoch": 89.93095512082854, "grad_norm": 0.9133491516113281, "learning_rate": 0.00020138089758342924, "loss": 0.4054, "step": 312600 }, { "epoch": 89.93383199079402, "grad_norm": 2.379058837890625, "learning_rate": 0.00020132336018411967, "loss": 0.2877, "step": 312610 }, { "epoch": 89.9367088607595, "grad_norm": 1.123534917831421, "learning_rate": 0.00020126582278481012, "loss": 0.2929, "step": 312620 }, { "epoch": 89.93958573072497, "grad_norm": 0.8455742597579956, "learning_rate": 0.00020120828538550058, "loss": 0.2688, "step": 312630 }, { "epoch": 89.94246260069045, "grad_norm": 1.2248263359069824, "learning_rate": 0.00020115074798619104, "loss": 0.2989, "step": 312640 }, { "epoch": 89.94533947065592, "grad_norm": 1.8320623636245728, "learning_rate": 0.0002010932105868815, "loss": 0.3325, "step": 312650 }, { "epoch": 89.9482163406214, "grad_norm": 0.8837438225746155, "learning_rate": 0.00020103567318757192, "loss": 0.2653, "step": 312660 }, { "epoch": 89.95109321058688, "grad_norm": 0.6420301795005798, "learning_rate": 0.00020097813578826237, "loss": 0.3308, "step": 312670 }, { "epoch": 89.95397008055235, "grad_norm": 1.3295328617095947, "learning_rate": 0.00020092059838895283, "loss": 0.2612, "step": 312680 }, { "epoch": 89.95684695051784, "grad_norm": 1.0654520988464355, "learning_rate": 0.00020086306098964326, "loss": 0.269, "step": 312690 }, { "epoch": 89.95972382048332, "grad_norm": 1.1448653936386108, "learning_rate": 0.00020080552359033371, "loss": 0.3097, "step": 312700 }, { "epoch": 89.9626006904488, "grad_norm": 1.1248468160629272, "learning_rate": 0.00020074798619102417, "loss": 0.2957, "step": 312710 }, { "epoch": 89.96547756041427, "grad_norm": 0.7645096182823181, "learning_rate": 0.0002006904487917146, "loss": 0.2762, "step": 312720 }, { "epoch": 89.96835443037975, "grad_norm": 0.8782002925872803, "learning_rate": 0.00020063291139240505, "loss": 0.3192, "step": 312730 }, { "epoch": 89.97123130034522, "grad_norm": 1.8193968534469604, "learning_rate": 0.00020057537399309554, "loss": 0.3533, "step": 312740 }, { "epoch": 89.9741081703107, "grad_norm": 0.9760197401046753, "learning_rate": 0.00020051783659378596, "loss": 0.2997, "step": 312750 }, { "epoch": 89.97698504027618, "grad_norm": 1.6818886995315552, "learning_rate": 0.00020046029919447642, "loss": 0.2821, "step": 312760 }, { "epoch": 89.97986191024165, "grad_norm": 1.3443249464035034, "learning_rate": 0.00020040276179516688, "loss": 0.3143, "step": 312770 }, { "epoch": 89.98273878020713, "grad_norm": 1.704733967781067, "learning_rate": 0.0002003452243958573, "loss": 0.2824, "step": 312780 }, { "epoch": 89.98561565017262, "grad_norm": 1.2098824977874756, "learning_rate": 0.00020028768699654776, "loss": 0.247, "step": 312790 }, { "epoch": 89.9884925201381, "grad_norm": 2.2795755863189697, "learning_rate": 0.00020023014959723821, "loss": 0.3298, "step": 312800 }, { "epoch": 89.99136939010357, "grad_norm": 1.1278196573257446, "learning_rate": 0.00020017261219792864, "loss": 0.3099, "step": 312810 }, { "epoch": 89.99424626006905, "grad_norm": 0.9451940655708313, "learning_rate": 0.0002001150747986191, "loss": 0.3014, "step": 312820 }, { "epoch": 89.99712313003452, "grad_norm": 0.711621880531311, "learning_rate": 0.00020005753739930955, "loss": 0.2787, "step": 312830 }, { "epoch": 90.0, "grad_norm": 1.473036766052246, "learning_rate": 0.0002, "loss": 0.3539, "step": 312840 }, { "epoch": 90.00287686996548, "grad_norm": 1.8427951335906982, "learning_rate": 0.00019994246260069047, "loss": 0.3481, "step": 312850 }, { "epoch": 90.00575373993095, "grad_norm": 2.035128593444824, "learning_rate": 0.00019988492520138092, "loss": 0.2992, "step": 312860 }, { "epoch": 90.00863060989643, "grad_norm": 1.1773287057876587, "learning_rate": 0.00019982738780207135, "loss": 0.2435, "step": 312870 }, { "epoch": 90.0115074798619, "grad_norm": 1.5777658224105835, "learning_rate": 0.0001997698504027618, "loss": 0.2349, "step": 312880 }, { "epoch": 90.01438434982738, "grad_norm": 1.2946630716323853, "learning_rate": 0.00019971231300345223, "loss": 0.2849, "step": 312890 }, { "epoch": 90.01726121979287, "grad_norm": 1.2866907119750977, "learning_rate": 0.0001996547756041427, "loss": 0.2393, "step": 312900 }, { "epoch": 90.02013808975835, "grad_norm": 1.8949819803237915, "learning_rate": 0.00019959723820483314, "loss": 0.3245, "step": 312910 }, { "epoch": 90.02301495972382, "grad_norm": 0.5240654349327087, "learning_rate": 0.00019953970080552357, "loss": 0.3047, "step": 312920 }, { "epoch": 90.0258918296893, "grad_norm": 0.5087935924530029, "learning_rate": 0.00019948216340621405, "loss": 0.3035, "step": 312930 }, { "epoch": 90.02876869965478, "grad_norm": 0.9201937317848206, "learning_rate": 0.0001994246260069045, "loss": 0.2664, "step": 312940 }, { "epoch": 90.03164556962025, "grad_norm": 1.6986010074615479, "learning_rate": 0.00019936708860759494, "loss": 0.3102, "step": 312950 }, { "epoch": 90.03452243958573, "grad_norm": 0.7532168626785278, "learning_rate": 0.0001993095512082854, "loss": 0.2559, "step": 312960 }, { "epoch": 90.0373993095512, "grad_norm": 1.3614559173583984, "learning_rate": 0.00019925201380897585, "loss": 0.2885, "step": 312970 }, { "epoch": 90.04027617951668, "grad_norm": 0.9507145881652832, "learning_rate": 0.00019919447640966628, "loss": 0.258, "step": 312980 }, { "epoch": 90.04315304948216, "grad_norm": 0.9704354405403137, "learning_rate": 0.00019913693901035673, "loss": 0.2694, "step": 312990 }, { "epoch": 90.04602991944765, "grad_norm": 1.6834983825683594, "learning_rate": 0.0001990794016110472, "loss": 0.2773, "step": 313000 }, { "epoch": 90.04890678941312, "grad_norm": 0.992051899433136, "learning_rate": 0.00019902186421173762, "loss": 0.2471, "step": 313010 }, { "epoch": 90.0517836593786, "grad_norm": 0.8219708800315857, "learning_rate": 0.00019896432681242807, "loss": 0.2336, "step": 313020 }, { "epoch": 90.05466052934408, "grad_norm": 0.9866856336593628, "learning_rate": 0.00019890678941311856, "loss": 0.2731, "step": 313030 }, { "epoch": 90.05753739930955, "grad_norm": 0.6895811557769775, "learning_rate": 0.00019884925201380898, "loss": 0.2522, "step": 313040 }, { "epoch": 90.06041426927503, "grad_norm": 1.8522250652313232, "learning_rate": 0.00019879171461449944, "loss": 0.3424, "step": 313050 }, { "epoch": 90.0632911392405, "grad_norm": 0.6795094013214111, "learning_rate": 0.0001987341772151899, "loss": 0.2983, "step": 313060 }, { "epoch": 90.06616800920598, "grad_norm": 1.074665904045105, "learning_rate": 0.00019867663981588032, "loss": 0.2372, "step": 313070 }, { "epoch": 90.06904487917146, "grad_norm": 1.2624869346618652, "learning_rate": 0.00019861910241657078, "loss": 0.3075, "step": 313080 }, { "epoch": 90.07192174913693, "grad_norm": 2.026334047317505, "learning_rate": 0.0001985615650172612, "loss": 0.2781, "step": 313090 }, { "epoch": 90.07479861910241, "grad_norm": 1.9265830516815186, "learning_rate": 0.00019850402761795166, "loss": 0.251, "step": 313100 }, { "epoch": 90.0776754890679, "grad_norm": 0.8252697587013245, "learning_rate": 0.00019844649021864212, "loss": 0.2418, "step": 313110 }, { "epoch": 90.08055235903338, "grad_norm": 0.8747624754905701, "learning_rate": 0.00019838895281933255, "loss": 0.2451, "step": 313120 }, { "epoch": 90.08342922899885, "grad_norm": 0.9297791123390198, "learning_rate": 0.00019833141542002303, "loss": 0.3045, "step": 313130 }, { "epoch": 90.08630609896433, "grad_norm": 1.2237236499786377, "learning_rate": 0.00019827387802071348, "loss": 0.3319, "step": 313140 }, { "epoch": 90.0891829689298, "grad_norm": 1.1211662292480469, "learning_rate": 0.0001982163406214039, "loss": 0.2588, "step": 313150 }, { "epoch": 90.09205983889528, "grad_norm": 0.8170650601387024, "learning_rate": 0.00019815880322209437, "loss": 0.2783, "step": 313160 }, { "epoch": 90.09493670886076, "grad_norm": 1.8939763307571411, "learning_rate": 0.00019810126582278482, "loss": 0.2857, "step": 313170 }, { "epoch": 90.09781357882623, "grad_norm": 1.2180454730987549, "learning_rate": 0.00019804372842347525, "loss": 0.2305, "step": 313180 }, { "epoch": 90.10069044879171, "grad_norm": 0.7034903168678284, "learning_rate": 0.0001979861910241657, "loss": 0.2876, "step": 313190 }, { "epoch": 90.10356731875719, "grad_norm": 1.3787434101104736, "learning_rate": 0.00019792865362485616, "loss": 0.321, "step": 313200 }, { "epoch": 90.10644418872268, "grad_norm": 0.7483963966369629, "learning_rate": 0.0001978711162255466, "loss": 0.2514, "step": 313210 }, { "epoch": 90.10932105868815, "grad_norm": 1.524073600769043, "learning_rate": 0.00019781357882623705, "loss": 0.299, "step": 313220 }, { "epoch": 90.11219792865363, "grad_norm": 0.9636046886444092, "learning_rate": 0.00019775604142692753, "loss": 0.2448, "step": 313230 }, { "epoch": 90.1150747986191, "grad_norm": 0.8391215801239014, "learning_rate": 0.00019769850402761796, "loss": 0.2551, "step": 313240 }, { "epoch": 90.11795166858458, "grad_norm": 0.6245182752609253, "learning_rate": 0.00019764096662830841, "loss": 0.3756, "step": 313250 }, { "epoch": 90.12082853855006, "grad_norm": 0.9184819459915161, "learning_rate": 0.00019758342922899887, "loss": 0.2391, "step": 313260 }, { "epoch": 90.12370540851553, "grad_norm": 1.1365076303482056, "learning_rate": 0.0001975258918296893, "loss": 0.2964, "step": 313270 }, { "epoch": 90.12658227848101, "grad_norm": 1.553015947341919, "learning_rate": 0.00019746835443037975, "loss": 0.3446, "step": 313280 }, { "epoch": 90.12945914844649, "grad_norm": 0.7103137969970703, "learning_rate": 0.00019741081703107018, "loss": 0.2544, "step": 313290 }, { "epoch": 90.13233601841196, "grad_norm": 1.0558596849441528, "learning_rate": 0.00019735327963176064, "loss": 0.2827, "step": 313300 }, { "epoch": 90.13521288837744, "grad_norm": 0.6167342066764832, "learning_rate": 0.0001972957422324511, "loss": 0.2867, "step": 313310 }, { "epoch": 90.13808975834293, "grad_norm": 1.0895030498504639, "learning_rate": 0.00019723820483314152, "loss": 0.272, "step": 313320 }, { "epoch": 90.1409666283084, "grad_norm": 1.4687674045562744, "learning_rate": 0.000197180667433832, "loss": 0.326, "step": 313330 }, { "epoch": 90.14384349827388, "grad_norm": 1.2056567668914795, "learning_rate": 0.00019712313003452246, "loss": 0.2881, "step": 313340 }, { "epoch": 90.14672036823936, "grad_norm": 1.5105934143066406, "learning_rate": 0.0001970655926352129, "loss": 0.2837, "step": 313350 }, { "epoch": 90.14959723820483, "grad_norm": 1.4574034214019775, "learning_rate": 0.00019700805523590334, "loss": 0.2755, "step": 313360 }, { "epoch": 90.15247410817031, "grad_norm": 1.5014301538467407, "learning_rate": 0.0001969505178365938, "loss": 0.2676, "step": 313370 }, { "epoch": 90.15535097813579, "grad_norm": 1.0323657989501953, "learning_rate": 0.00019689298043728423, "loss": 0.268, "step": 313380 }, { "epoch": 90.15822784810126, "grad_norm": 2.326538562774658, "learning_rate": 0.00019683544303797468, "loss": 0.2596, "step": 313390 }, { "epoch": 90.16110471806674, "grad_norm": 1.4584345817565918, "learning_rate": 0.00019677790563866514, "loss": 0.2794, "step": 313400 }, { "epoch": 90.16398158803221, "grad_norm": 1.0738643407821655, "learning_rate": 0.00019672036823935557, "loss": 0.2796, "step": 313410 }, { "epoch": 90.1668584579977, "grad_norm": 0.9278303980827332, "learning_rate": 0.00019666283084004605, "loss": 0.2383, "step": 313420 }, { "epoch": 90.16973532796318, "grad_norm": 1.7325035333633423, "learning_rate": 0.0001966052934407365, "loss": 0.2891, "step": 313430 }, { "epoch": 90.17261219792866, "grad_norm": 0.8466410636901855, "learning_rate": 0.00019654775604142693, "loss": 0.2677, "step": 313440 }, { "epoch": 90.17548906789413, "grad_norm": 1.120988130569458, "learning_rate": 0.0001964902186421174, "loss": 0.2544, "step": 313450 }, { "epoch": 90.17836593785961, "grad_norm": 0.8084256052970886, "learning_rate": 0.00019643268124280784, "loss": 0.2718, "step": 313460 }, { "epoch": 90.18124280782509, "grad_norm": 1.7227506637573242, "learning_rate": 0.00019637514384349827, "loss": 0.2792, "step": 313470 }, { "epoch": 90.18411967779056, "grad_norm": 0.9439088702201843, "learning_rate": 0.00019631760644418873, "loss": 0.2182, "step": 313480 }, { "epoch": 90.18699654775604, "grad_norm": 2.789734125137329, "learning_rate": 0.00019626006904487916, "loss": 0.3409, "step": 313490 }, { "epoch": 90.18987341772151, "grad_norm": 0.9498010873794556, "learning_rate": 0.0001962025316455696, "loss": 0.2512, "step": 313500 }, { "epoch": 90.19275028768699, "grad_norm": 2.3369979858398438, "learning_rate": 0.00019614499424626007, "loss": 0.276, "step": 313510 }, { "epoch": 90.19562715765247, "grad_norm": 0.84397953748703, "learning_rate": 0.00019608745684695052, "loss": 0.2907, "step": 313520 }, { "epoch": 90.19850402761796, "grad_norm": 0.7036689519882202, "learning_rate": 0.00019602991944764098, "loss": 0.3016, "step": 313530 }, { "epoch": 90.20138089758343, "grad_norm": 0.7470217347145081, "learning_rate": 0.00019597238204833143, "loss": 0.3629, "step": 313540 }, { "epoch": 90.20425776754891, "grad_norm": 0.7210049033164978, "learning_rate": 0.00019591484464902186, "loss": 0.2924, "step": 313550 }, { "epoch": 90.20713463751439, "grad_norm": 0.8457767963409424, "learning_rate": 0.00019585730724971232, "loss": 0.3151, "step": 313560 }, { "epoch": 90.21001150747986, "grad_norm": 1.0757322311401367, "learning_rate": 0.00019579976985040277, "loss": 0.2577, "step": 313570 }, { "epoch": 90.21288837744534, "grad_norm": 1.0184640884399414, "learning_rate": 0.0001957422324510932, "loss": 0.3014, "step": 313580 }, { "epoch": 90.21576524741081, "grad_norm": 1.2196881771087646, "learning_rate": 0.00019568469505178366, "loss": 0.2966, "step": 313590 }, { "epoch": 90.21864211737629, "grad_norm": 1.6103546619415283, "learning_rate": 0.0001956271576524741, "loss": 0.2908, "step": 313600 }, { "epoch": 90.22151898734177, "grad_norm": 0.8519541621208191, "learning_rate": 0.00019556962025316454, "loss": 0.2691, "step": 313610 }, { "epoch": 90.22439585730724, "grad_norm": 1.3866069316864014, "learning_rate": 0.00019551208285385502, "loss": 0.2643, "step": 313620 }, { "epoch": 90.22727272727273, "grad_norm": 1.8162052631378174, "learning_rate": 0.00019545454545454548, "loss": 0.2927, "step": 313630 }, { "epoch": 90.23014959723821, "grad_norm": 0.9856147766113281, "learning_rate": 0.0001953970080552359, "loss": 0.2532, "step": 313640 }, { "epoch": 90.23302646720369, "grad_norm": 1.332214117050171, "learning_rate": 0.00019533947065592636, "loss": 0.3117, "step": 313650 }, { "epoch": 90.23590333716916, "grad_norm": 1.2725560665130615, "learning_rate": 0.00019528193325661682, "loss": 0.2704, "step": 313660 }, { "epoch": 90.23878020713464, "grad_norm": 0.7824367880821228, "learning_rate": 0.00019522439585730725, "loss": 0.2764, "step": 313670 }, { "epoch": 90.24165707710011, "grad_norm": 2.701927900314331, "learning_rate": 0.0001951668584579977, "loss": 0.2648, "step": 313680 }, { "epoch": 90.24453394706559, "grad_norm": 1.4068855047225952, "learning_rate": 0.00019510932105868813, "loss": 0.2892, "step": 313690 }, { "epoch": 90.24741081703107, "grad_norm": 1.6564003229141235, "learning_rate": 0.0001950517836593786, "loss": 0.2693, "step": 313700 }, { "epoch": 90.25028768699654, "grad_norm": 0.7691037058830261, "learning_rate": 0.00019499424626006904, "loss": 0.2854, "step": 313710 }, { "epoch": 90.25316455696202, "grad_norm": 1.6083879470825195, "learning_rate": 0.0001949367088607595, "loss": 0.2984, "step": 313720 }, { "epoch": 90.25604142692751, "grad_norm": 1.1961259841918945, "learning_rate": 0.00019487917146144995, "loss": 0.2919, "step": 313730 }, { "epoch": 90.25891829689299, "grad_norm": 2.9653429985046387, "learning_rate": 0.0001948216340621404, "loss": 0.437, "step": 313740 }, { "epoch": 90.26179516685846, "grad_norm": 1.8514761924743652, "learning_rate": 0.00019476409666283084, "loss": 0.3229, "step": 313750 }, { "epoch": 90.26467203682394, "grad_norm": 1.328429102897644, "learning_rate": 0.0001947065592635213, "loss": 0.3049, "step": 313760 }, { "epoch": 90.26754890678941, "grad_norm": 1.2222328186035156, "learning_rate": 0.00019464902186421175, "loss": 0.3752, "step": 313770 }, { "epoch": 90.27042577675489, "grad_norm": 1.3269288539886475, "learning_rate": 0.00019459148446490218, "loss": 0.2386, "step": 313780 }, { "epoch": 90.27330264672037, "grad_norm": 1.1572837829589844, "learning_rate": 0.00019453394706559263, "loss": 0.2673, "step": 313790 }, { "epoch": 90.27617951668584, "grad_norm": 1.3151392936706543, "learning_rate": 0.0001944764096662831, "loss": 0.252, "step": 313800 }, { "epoch": 90.27905638665132, "grad_norm": 2.7798688411712646, "learning_rate": 0.00019441887226697352, "loss": 0.3072, "step": 313810 }, { "epoch": 90.2819332566168, "grad_norm": 0.7367158532142639, "learning_rate": 0.000194361334867664, "loss": 0.3044, "step": 313820 }, { "epoch": 90.28481012658227, "grad_norm": 0.9762572050094604, "learning_rate": 0.00019430379746835445, "loss": 0.2506, "step": 313830 }, { "epoch": 90.28768699654776, "grad_norm": 1.3299357891082764, "learning_rate": 0.00019424626006904488, "loss": 0.2917, "step": 313840 }, { "epoch": 90.29056386651324, "grad_norm": 0.9774007797241211, "learning_rate": 0.00019418872266973534, "loss": 0.2633, "step": 313850 }, { "epoch": 90.29344073647871, "grad_norm": 0.6762272715568542, "learning_rate": 0.0001941311852704258, "loss": 0.3822, "step": 313860 }, { "epoch": 90.29631760644419, "grad_norm": 1.512900948524475, "learning_rate": 0.00019407364787111622, "loss": 0.2833, "step": 313870 }, { "epoch": 90.29919447640967, "grad_norm": 1.4567888975143433, "learning_rate": 0.00019401611047180668, "loss": 0.2721, "step": 313880 }, { "epoch": 90.30207134637514, "grad_norm": 0.8487271070480347, "learning_rate": 0.0001939585730724971, "loss": 0.3356, "step": 313890 }, { "epoch": 90.30494821634062, "grad_norm": 0.7696360349655151, "learning_rate": 0.00019390103567318756, "loss": 0.3192, "step": 313900 }, { "epoch": 90.3078250863061, "grad_norm": 1.3693276643753052, "learning_rate": 0.00019384349827387804, "loss": 0.2772, "step": 313910 }, { "epoch": 90.31070195627157, "grad_norm": 1.323392391204834, "learning_rate": 0.00019378596087456847, "loss": 0.259, "step": 313920 }, { "epoch": 90.31357882623705, "grad_norm": 0.8873403668403625, "learning_rate": 0.00019372842347525893, "loss": 0.2941, "step": 313930 }, { "epoch": 90.31645569620254, "grad_norm": 2.0573785305023193, "learning_rate": 0.00019367088607594938, "loss": 0.2955, "step": 313940 }, { "epoch": 90.31933256616801, "grad_norm": 1.0106266736984253, "learning_rate": 0.0001936133486766398, "loss": 0.2622, "step": 313950 }, { "epoch": 90.32220943613349, "grad_norm": 1.3796089887619019, "learning_rate": 0.00019355581127733027, "loss": 0.2441, "step": 313960 }, { "epoch": 90.32508630609897, "grad_norm": 1.1855192184448242, "learning_rate": 0.00019349827387802072, "loss": 0.3361, "step": 313970 }, { "epoch": 90.32796317606444, "grad_norm": 1.2160139083862305, "learning_rate": 0.00019344073647871115, "loss": 0.3454, "step": 313980 }, { "epoch": 90.33084004602992, "grad_norm": 1.5333374738693237, "learning_rate": 0.0001933831990794016, "loss": 0.2974, "step": 313990 }, { "epoch": 90.3337169159954, "grad_norm": 1.477225422859192, "learning_rate": 0.00019332566168009206, "loss": 0.3046, "step": 314000 }, { "epoch": 90.33659378596087, "grad_norm": 0.986721932888031, "learning_rate": 0.00019326812428078252, "loss": 0.2922, "step": 314010 }, { "epoch": 90.33947065592635, "grad_norm": 0.7881171107292175, "learning_rate": 0.00019321058688147297, "loss": 0.2729, "step": 314020 }, { "epoch": 90.34234752589182, "grad_norm": 1.5622261762619019, "learning_rate": 0.00019315304948216343, "loss": 0.2951, "step": 314030 }, { "epoch": 90.3452243958573, "grad_norm": 0.8916741609573364, "learning_rate": 0.00019309551208285386, "loss": 0.2425, "step": 314040 }, { "epoch": 90.34810126582279, "grad_norm": 0.9194003939628601, "learning_rate": 0.0001930379746835443, "loss": 0.4154, "step": 314050 }, { "epoch": 90.35097813578827, "grad_norm": 1.366001844406128, "learning_rate": 0.00019298043728423477, "loss": 0.2946, "step": 314060 }, { "epoch": 90.35385500575374, "grad_norm": 1.4393166303634644, "learning_rate": 0.0001929228998849252, "loss": 0.2957, "step": 314070 }, { "epoch": 90.35673187571922, "grad_norm": 0.7843059301376343, "learning_rate": 0.00019286536248561565, "loss": 0.2499, "step": 314080 }, { "epoch": 90.3596087456847, "grad_norm": 1.1633310317993164, "learning_rate": 0.00019280782508630608, "loss": 0.2852, "step": 314090 }, { "epoch": 90.36248561565017, "grad_norm": 1.0315362215042114, "learning_rate": 0.00019275028768699654, "loss": 0.2992, "step": 314100 }, { "epoch": 90.36536248561565, "grad_norm": 1.6967494487762451, "learning_rate": 0.00019269275028768702, "loss": 0.3658, "step": 314110 }, { "epoch": 90.36823935558112, "grad_norm": 0.9155293107032776, "learning_rate": 0.00019263521288837745, "loss": 0.3066, "step": 314120 }, { "epoch": 90.3711162255466, "grad_norm": 0.7367439270019531, "learning_rate": 0.0001925776754890679, "loss": 0.294, "step": 314130 }, { "epoch": 90.37399309551208, "grad_norm": 1.2025598287582397, "learning_rate": 0.00019252013808975836, "loss": 0.2956, "step": 314140 }, { "epoch": 90.37686996547757, "grad_norm": 0.9139834046363831, "learning_rate": 0.00019246260069044879, "loss": 0.2636, "step": 314150 }, { "epoch": 90.37974683544304, "grad_norm": 1.229766607284546, "learning_rate": 0.00019240506329113924, "loss": 0.3318, "step": 314160 }, { "epoch": 90.38262370540852, "grad_norm": 1.0823140144348145, "learning_rate": 0.0001923475258918297, "loss": 0.2481, "step": 314170 }, { "epoch": 90.385500575374, "grad_norm": 1.6594351530075073, "learning_rate": 0.00019228998849252013, "loss": 0.2909, "step": 314180 }, { "epoch": 90.38837744533947, "grad_norm": 2.401305913925171, "learning_rate": 0.00019223245109321058, "loss": 0.3316, "step": 314190 }, { "epoch": 90.39125431530495, "grad_norm": 1.2667558193206787, "learning_rate": 0.00019217491369390104, "loss": 0.3499, "step": 314200 }, { "epoch": 90.39413118527042, "grad_norm": 1.3140742778778076, "learning_rate": 0.0001921173762945915, "loss": 0.2846, "step": 314210 }, { "epoch": 90.3970080552359, "grad_norm": 1.351776123046875, "learning_rate": 0.00019205983889528195, "loss": 0.3041, "step": 314220 }, { "epoch": 90.39988492520138, "grad_norm": 1.7123647928237915, "learning_rate": 0.0001920023014959724, "loss": 0.2847, "step": 314230 }, { "epoch": 90.40276179516685, "grad_norm": 0.8179526925086975, "learning_rate": 0.00019194476409666283, "loss": 0.2751, "step": 314240 }, { "epoch": 90.40563866513233, "grad_norm": 0.6660162806510925, "learning_rate": 0.0001918872266973533, "loss": 0.2377, "step": 314250 }, { "epoch": 90.40851553509782, "grad_norm": 0.9807694554328918, "learning_rate": 0.00019182968929804374, "loss": 0.3054, "step": 314260 }, { "epoch": 90.4113924050633, "grad_norm": 1.7333418130874634, "learning_rate": 0.00019177215189873417, "loss": 0.2989, "step": 314270 }, { "epoch": 90.41426927502877, "grad_norm": 1.8324477672576904, "learning_rate": 0.00019171461449942463, "loss": 0.2404, "step": 314280 }, { "epoch": 90.41714614499425, "grad_norm": 1.5126023292541504, "learning_rate": 0.00019165707710011505, "loss": 0.292, "step": 314290 }, { "epoch": 90.42002301495972, "grad_norm": 1.4002702236175537, "learning_rate": 0.0001915995397008055, "loss": 0.2782, "step": 314300 }, { "epoch": 90.4228998849252, "grad_norm": 0.6248191595077515, "learning_rate": 0.000191542002301496, "loss": 0.3124, "step": 314310 }, { "epoch": 90.42577675489068, "grad_norm": 1.488840937614441, "learning_rate": 0.00019148446490218642, "loss": 0.3188, "step": 314320 }, { "epoch": 90.42865362485615, "grad_norm": 0.9817549586296082, "learning_rate": 0.00019142692750287688, "loss": 0.3694, "step": 314330 }, { "epoch": 90.43153049482163, "grad_norm": 1.923531174659729, "learning_rate": 0.00019136939010356733, "loss": 0.3, "step": 314340 }, { "epoch": 90.4344073647871, "grad_norm": 1.0451356172561646, "learning_rate": 0.00019131185270425776, "loss": 0.3268, "step": 314350 }, { "epoch": 90.4372842347526, "grad_norm": 0.7677781581878662, "learning_rate": 0.00019125431530494822, "loss": 0.2953, "step": 314360 }, { "epoch": 90.44016110471807, "grad_norm": 1.3660390377044678, "learning_rate": 0.00019119677790563867, "loss": 0.2598, "step": 314370 }, { "epoch": 90.44303797468355, "grad_norm": 3.2738237380981445, "learning_rate": 0.0001911392405063291, "loss": 0.3736, "step": 314380 }, { "epoch": 90.44591484464902, "grad_norm": 1.635847568511963, "learning_rate": 0.00019108170310701956, "loss": 0.2561, "step": 314390 }, { "epoch": 90.4487917146145, "grad_norm": 1.575722336769104, "learning_rate": 0.00019102416570771004, "loss": 0.2747, "step": 314400 }, { "epoch": 90.45166858457998, "grad_norm": 0.9797362685203552, "learning_rate": 0.00019096662830840047, "loss": 0.3191, "step": 314410 }, { "epoch": 90.45454545454545, "grad_norm": 2.1491105556488037, "learning_rate": 0.00019090909090909092, "loss": 0.2602, "step": 314420 }, { "epoch": 90.45742232451093, "grad_norm": 1.4866514205932617, "learning_rate": 0.00019085155350978138, "loss": 0.3276, "step": 314430 }, { "epoch": 90.4602991944764, "grad_norm": 1.8035095930099487, "learning_rate": 0.0001907940161104718, "loss": 0.2703, "step": 314440 }, { "epoch": 90.46317606444188, "grad_norm": 1.1308170557022095, "learning_rate": 0.00019073647871116226, "loss": 0.2811, "step": 314450 }, { "epoch": 90.46605293440736, "grad_norm": 1.0919559001922607, "learning_rate": 0.00019067894131185272, "loss": 0.2793, "step": 314460 }, { "epoch": 90.46892980437285, "grad_norm": 0.6203504800796509, "learning_rate": 0.00019062140391254315, "loss": 0.2839, "step": 314470 }, { "epoch": 90.47180667433832, "grad_norm": 0.8916904330253601, "learning_rate": 0.0001905638665132336, "loss": 0.3458, "step": 314480 }, { "epoch": 90.4746835443038, "grad_norm": 0.9804986715316772, "learning_rate": 0.00019050632911392406, "loss": 0.3031, "step": 314490 }, { "epoch": 90.47756041426928, "grad_norm": 1.7018210887908936, "learning_rate": 0.0001904487917146145, "loss": 0.2802, "step": 314500 }, { "epoch": 90.48043728423475, "grad_norm": 2.202544927597046, "learning_rate": 0.00019039125431530497, "loss": 0.3365, "step": 314510 }, { "epoch": 90.48331415420023, "grad_norm": 0.7742264866828918, "learning_rate": 0.0001903337169159954, "loss": 0.2895, "step": 314520 }, { "epoch": 90.4861910241657, "grad_norm": 2.1883795261383057, "learning_rate": 0.00019027617951668585, "loss": 0.3521, "step": 314530 }, { "epoch": 90.48906789413118, "grad_norm": 0.8600975275039673, "learning_rate": 0.0001902186421173763, "loss": 0.2675, "step": 314540 }, { "epoch": 90.49194476409666, "grad_norm": 1.2686747312545776, "learning_rate": 0.00019016110471806674, "loss": 0.3111, "step": 314550 }, { "epoch": 90.49482163406213, "grad_norm": 1.085076093673706, "learning_rate": 0.0001901035673187572, "loss": 0.2698, "step": 314560 }, { "epoch": 90.49769850402762, "grad_norm": 1.0340732336044312, "learning_rate": 0.00019004602991944765, "loss": 0.2889, "step": 314570 }, { "epoch": 90.5005753739931, "grad_norm": 0.9342890381813049, "learning_rate": 0.00018998849252013807, "loss": 0.2925, "step": 314580 }, { "epoch": 90.50345224395858, "grad_norm": 1.2108774185180664, "learning_rate": 0.00018993095512082853, "loss": 0.3952, "step": 314590 }, { "epoch": 90.50632911392405, "grad_norm": 1.1889843940734863, "learning_rate": 0.000189873417721519, "loss": 0.3188, "step": 314600 }, { "epoch": 90.50920598388953, "grad_norm": 0.917354941368103, "learning_rate": 0.00018981588032220944, "loss": 0.334, "step": 314610 }, { "epoch": 90.512082853855, "grad_norm": 1.1069374084472656, "learning_rate": 0.0001897583429228999, "loss": 0.2497, "step": 314620 }, { "epoch": 90.51495972382048, "grad_norm": 0.8856582045555115, "learning_rate": 0.00018970080552359035, "loss": 0.3431, "step": 314630 }, { "epoch": 90.51783659378596, "grad_norm": 0.8975428342819214, "learning_rate": 0.00018964326812428078, "loss": 0.3267, "step": 314640 }, { "epoch": 90.52071346375143, "grad_norm": 0.8649385571479797, "learning_rate": 0.00018958573072497124, "loss": 0.2642, "step": 314650 }, { "epoch": 90.52359033371691, "grad_norm": 0.9286928772926331, "learning_rate": 0.0001895281933256617, "loss": 0.2391, "step": 314660 }, { "epoch": 90.52646720368239, "grad_norm": 1.073041558265686, "learning_rate": 0.00018947065592635212, "loss": 0.2265, "step": 314670 }, { "epoch": 90.52934407364788, "grad_norm": 1.7885655164718628, "learning_rate": 0.00018941311852704258, "loss": 0.3149, "step": 314680 }, { "epoch": 90.53222094361335, "grad_norm": 1.5035330057144165, "learning_rate": 0.00018935558112773303, "loss": 0.286, "step": 314690 }, { "epoch": 90.53509781357883, "grad_norm": 1.0224977731704712, "learning_rate": 0.00018929804372842349, "loss": 0.2742, "step": 314700 }, { "epoch": 90.5379746835443, "grad_norm": 0.9832314252853394, "learning_rate": 0.00018924050632911394, "loss": 0.3029, "step": 314710 }, { "epoch": 90.54085155350978, "grad_norm": 0.624666690826416, "learning_rate": 0.00018918296892980437, "loss": 0.2185, "step": 314720 }, { "epoch": 90.54372842347526, "grad_norm": 1.3189630508422852, "learning_rate": 0.00018912543153049483, "loss": 0.2813, "step": 314730 }, { "epoch": 90.54660529344073, "grad_norm": 0.7784174680709839, "learning_rate": 0.00018906789413118528, "loss": 0.2445, "step": 314740 }, { "epoch": 90.54948216340621, "grad_norm": 2.170560598373413, "learning_rate": 0.0001890103567318757, "loss": 0.3013, "step": 314750 }, { "epoch": 90.55235903337169, "grad_norm": 1.0672478675842285, "learning_rate": 0.00018895281933256617, "loss": 0.2551, "step": 314760 }, { "epoch": 90.55523590333716, "grad_norm": 0.8050630688667297, "learning_rate": 0.00018889528193325662, "loss": 0.3121, "step": 314770 }, { "epoch": 90.55811277330265, "grad_norm": 1.480263590812683, "learning_rate": 0.00018883774453394705, "loss": 0.2881, "step": 314780 }, { "epoch": 90.56098964326813, "grad_norm": 1.3063018321990967, "learning_rate": 0.0001887802071346375, "loss": 0.3134, "step": 314790 }, { "epoch": 90.5638665132336, "grad_norm": 1.166677713394165, "learning_rate": 0.000188722669735328, "loss": 0.2479, "step": 314800 }, { "epoch": 90.56674338319908, "grad_norm": 1.6830308437347412, "learning_rate": 0.00018866513233601842, "loss": 0.2755, "step": 314810 }, { "epoch": 90.56962025316456, "grad_norm": 2.3040733337402344, "learning_rate": 0.00018860759493670887, "loss": 0.2757, "step": 314820 }, { "epoch": 90.57249712313003, "grad_norm": 1.7249188423156738, "learning_rate": 0.00018855005753739933, "loss": 0.2866, "step": 314830 }, { "epoch": 90.57537399309551, "grad_norm": 2.2362287044525146, "learning_rate": 0.00018849252013808975, "loss": 0.3273, "step": 314840 }, { "epoch": 90.57825086306099, "grad_norm": 1.699014663696289, "learning_rate": 0.0001884349827387802, "loss": 0.4136, "step": 314850 }, { "epoch": 90.58112773302646, "grad_norm": 0.7231440544128418, "learning_rate": 0.00018837744533947067, "loss": 0.2774, "step": 314860 }, { "epoch": 90.58400460299194, "grad_norm": 1.0420112609863281, "learning_rate": 0.0001883199079401611, "loss": 0.2117, "step": 314870 }, { "epoch": 90.58688147295742, "grad_norm": 1.495065689086914, "learning_rate": 0.00018826237054085155, "loss": 0.3067, "step": 314880 }, { "epoch": 90.5897583429229, "grad_norm": 1.2171608209609985, "learning_rate": 0.00018820483314154203, "loss": 0.2551, "step": 314890 }, { "epoch": 90.59263521288838, "grad_norm": 1.0299983024597168, "learning_rate": 0.00018814729574223246, "loss": 0.2607, "step": 314900 }, { "epoch": 90.59551208285386, "grad_norm": 1.1966588497161865, "learning_rate": 0.00018808975834292292, "loss": 0.276, "step": 314910 }, { "epoch": 90.59838895281933, "grad_norm": 1.1330525875091553, "learning_rate": 0.00018803222094361334, "loss": 0.2975, "step": 314920 }, { "epoch": 90.60126582278481, "grad_norm": 0.9122841954231262, "learning_rate": 0.0001879746835443038, "loss": 0.2858, "step": 314930 }, { "epoch": 90.60414269275029, "grad_norm": 1.1187471151351929, "learning_rate": 0.00018791714614499426, "loss": 0.3127, "step": 314940 }, { "epoch": 90.60701956271576, "grad_norm": 1.7878437042236328, "learning_rate": 0.00018785960874568468, "loss": 0.2627, "step": 314950 }, { "epoch": 90.60989643268124, "grad_norm": 0.7147758603096008, "learning_rate": 0.00018780207134637514, "loss": 0.3078, "step": 314960 }, { "epoch": 90.61277330264672, "grad_norm": 1.1267117261886597, "learning_rate": 0.0001877445339470656, "loss": 0.3604, "step": 314970 }, { "epoch": 90.61565017261219, "grad_norm": 1.0613884925842285, "learning_rate": 0.00018768699654775602, "loss": 0.3352, "step": 314980 }, { "epoch": 90.61852704257768, "grad_norm": 1.6378062963485718, "learning_rate": 0.0001876294591484465, "loss": 0.3568, "step": 314990 }, { "epoch": 90.62140391254316, "grad_norm": 0.929035484790802, "learning_rate": 0.00018757192174913696, "loss": 0.2515, "step": 315000 }, { "epoch": 90.62428078250863, "grad_norm": 0.9462660551071167, "learning_rate": 0.0001875143843498274, "loss": 0.2112, "step": 315010 }, { "epoch": 90.62715765247411, "grad_norm": 1.4981738328933716, "learning_rate": 0.00018745684695051785, "loss": 0.2937, "step": 315020 }, { "epoch": 90.63003452243959, "grad_norm": 0.7125037312507629, "learning_rate": 0.0001873993095512083, "loss": 0.3219, "step": 315030 }, { "epoch": 90.63291139240506, "grad_norm": 1.7914756536483765, "learning_rate": 0.00018734177215189873, "loss": 0.3464, "step": 315040 }, { "epoch": 90.63578826237054, "grad_norm": 1.109611988067627, "learning_rate": 0.00018728423475258918, "loss": 0.3128, "step": 315050 }, { "epoch": 90.63866513233602, "grad_norm": 0.5212801694869995, "learning_rate": 0.00018722669735327964, "loss": 0.2743, "step": 315060 }, { "epoch": 90.64154200230149, "grad_norm": 1.3260698318481445, "learning_rate": 0.00018716915995397007, "loss": 0.2875, "step": 315070 }, { "epoch": 90.64441887226697, "grad_norm": 0.9064053893089294, "learning_rate": 0.00018711162255466052, "loss": 0.3557, "step": 315080 }, { "epoch": 90.64729574223244, "grad_norm": 0.7136792540550232, "learning_rate": 0.000187054085155351, "loss": 0.2468, "step": 315090 }, { "epoch": 90.65017261219793, "grad_norm": 0.9122639894485474, "learning_rate": 0.00018699654775604144, "loss": 0.3525, "step": 315100 }, { "epoch": 90.65304948216341, "grad_norm": 1.4727026224136353, "learning_rate": 0.0001869390103567319, "loss": 0.2978, "step": 315110 }, { "epoch": 90.65592635212889, "grad_norm": 1.2506762742996216, "learning_rate": 0.00018688147295742232, "loss": 0.2715, "step": 315120 }, { "epoch": 90.65880322209436, "grad_norm": 1.0198497772216797, "learning_rate": 0.00018682393555811277, "loss": 0.3116, "step": 315130 }, { "epoch": 90.66168009205984, "grad_norm": 0.8014049530029297, "learning_rate": 0.00018676639815880323, "loss": 0.3175, "step": 315140 }, { "epoch": 90.66455696202532, "grad_norm": 0.9432719945907593, "learning_rate": 0.00018670886075949366, "loss": 0.3012, "step": 315150 }, { "epoch": 90.66743383199079, "grad_norm": 0.8503019213676453, "learning_rate": 0.00018665132336018411, "loss": 0.2523, "step": 315160 }, { "epoch": 90.67031070195627, "grad_norm": 1.8370579481124878, "learning_rate": 0.00018659378596087457, "loss": 0.2569, "step": 315170 }, { "epoch": 90.67318757192174, "grad_norm": 1.4598091840744019, "learning_rate": 0.000186536248561565, "loss": 0.3415, "step": 315180 }, { "epoch": 90.67606444188722, "grad_norm": 1.0801669359207153, "learning_rate": 0.00018647871116225548, "loss": 0.241, "step": 315190 }, { "epoch": 90.67894131185271, "grad_norm": 0.8861957788467407, "learning_rate": 0.00018642117376294594, "loss": 0.304, "step": 315200 }, { "epoch": 90.68181818181819, "grad_norm": 1.7202324867248535, "learning_rate": 0.00018636363636363636, "loss": 0.3444, "step": 315210 }, { "epoch": 90.68469505178366, "grad_norm": 1.1489468812942505, "learning_rate": 0.00018630609896432682, "loss": 0.2885, "step": 315220 }, { "epoch": 90.68757192174914, "grad_norm": 1.012958288192749, "learning_rate": 0.00018624856156501728, "loss": 0.2559, "step": 315230 }, { "epoch": 90.69044879171462, "grad_norm": 1.1561862230300903, "learning_rate": 0.0001861910241657077, "loss": 0.2957, "step": 315240 }, { "epoch": 90.69332566168009, "grad_norm": 1.2340449094772339, "learning_rate": 0.00018613348676639816, "loss": 0.3142, "step": 315250 }, { "epoch": 90.69620253164557, "grad_norm": 1.1574150323867798, "learning_rate": 0.00018607594936708861, "loss": 0.2836, "step": 315260 }, { "epoch": 90.69907940161104, "grad_norm": 1.2762507200241089, "learning_rate": 0.00018601841196777904, "loss": 0.2656, "step": 315270 }, { "epoch": 90.70195627157652, "grad_norm": 0.6410038471221924, "learning_rate": 0.0001859608745684695, "loss": 0.2846, "step": 315280 }, { "epoch": 90.704833141542, "grad_norm": 0.8958905935287476, "learning_rate": 0.00018590333716915998, "loss": 0.3083, "step": 315290 }, { "epoch": 90.70771001150747, "grad_norm": 0.9968429803848267, "learning_rate": 0.0001858457997698504, "loss": 0.3187, "step": 315300 }, { "epoch": 90.71058688147296, "grad_norm": 1.156800389289856, "learning_rate": 0.00018578826237054087, "loss": 0.2828, "step": 315310 }, { "epoch": 90.71346375143844, "grad_norm": 1.0663784742355347, "learning_rate": 0.00018573072497123132, "loss": 0.2992, "step": 315320 }, { "epoch": 90.71634062140392, "grad_norm": 1.3642812967300415, "learning_rate": 0.00018567318757192175, "loss": 0.2968, "step": 315330 }, { "epoch": 90.71921749136939, "grad_norm": 1.2467550039291382, "learning_rate": 0.0001856156501726122, "loss": 0.3587, "step": 315340 }, { "epoch": 90.72209436133487, "grad_norm": 2.4661502838134766, "learning_rate": 0.00018555811277330263, "loss": 0.3228, "step": 315350 }, { "epoch": 90.72497123130034, "grad_norm": 1.0779621601104736, "learning_rate": 0.0001855005753739931, "loss": 0.2416, "step": 315360 }, { "epoch": 90.72784810126582, "grad_norm": 1.1298807859420776, "learning_rate": 0.00018544303797468354, "loss": 0.2883, "step": 315370 }, { "epoch": 90.7307249712313, "grad_norm": 1.5096341371536255, "learning_rate": 0.00018538550057537397, "loss": 0.3453, "step": 315380 }, { "epoch": 90.73360184119677, "grad_norm": 1.3744614124298096, "learning_rate": 0.00018532796317606446, "loss": 0.2584, "step": 315390 }, { "epoch": 90.73647871116225, "grad_norm": 1.9078811407089233, "learning_rate": 0.0001852704257767549, "loss": 0.3088, "step": 315400 }, { "epoch": 90.73935558112774, "grad_norm": 1.314189076423645, "learning_rate": 0.00018521288837744534, "loss": 0.2814, "step": 315410 }, { "epoch": 90.74223245109322, "grad_norm": 1.5927082300186157, "learning_rate": 0.0001851553509781358, "loss": 0.2656, "step": 315420 }, { "epoch": 90.74510932105869, "grad_norm": 1.1842520236968994, "learning_rate": 0.00018509781357882625, "loss": 0.2988, "step": 315430 }, { "epoch": 90.74798619102417, "grad_norm": 1.2194671630859375, "learning_rate": 0.00018504027617951668, "loss": 0.3193, "step": 315440 }, { "epoch": 90.75086306098964, "grad_norm": 0.7787519693374634, "learning_rate": 0.00018498273878020713, "loss": 0.2523, "step": 315450 }, { "epoch": 90.75373993095512, "grad_norm": 1.102569818496704, "learning_rate": 0.0001849252013808976, "loss": 0.3087, "step": 315460 }, { "epoch": 90.7566168009206, "grad_norm": 2.009579658508301, "learning_rate": 0.00018486766398158802, "loss": 0.3387, "step": 315470 }, { "epoch": 90.75949367088607, "grad_norm": 1.0637085437774658, "learning_rate": 0.0001848101265822785, "loss": 0.2881, "step": 315480 }, { "epoch": 90.76237054085155, "grad_norm": 1.736626386642456, "learning_rate": 0.00018475258918296896, "loss": 0.281, "step": 315490 }, { "epoch": 90.76524741081703, "grad_norm": 1.4115983247756958, "learning_rate": 0.00018469505178365938, "loss": 0.284, "step": 315500 }, { "epoch": 90.7681242807825, "grad_norm": 1.136457085609436, "learning_rate": 0.00018463751438434984, "loss": 0.3579, "step": 315510 }, { "epoch": 90.77100115074799, "grad_norm": 1.8988165855407715, "learning_rate": 0.0001845799769850403, "loss": 0.3293, "step": 315520 }, { "epoch": 90.77387802071347, "grad_norm": 1.5797879695892334, "learning_rate": 0.00018452243958573072, "loss": 0.2775, "step": 315530 }, { "epoch": 90.77675489067894, "grad_norm": 0.9629378914833069, "learning_rate": 0.00018446490218642118, "loss": 0.2172, "step": 315540 }, { "epoch": 90.77963176064442, "grad_norm": 1.5026835203170776, "learning_rate": 0.0001844073647871116, "loss": 0.3389, "step": 315550 }, { "epoch": 90.7825086306099, "grad_norm": 1.3815475702285767, "learning_rate": 0.00018434982738780206, "loss": 0.2449, "step": 315560 }, { "epoch": 90.78538550057537, "grad_norm": 1.210255742073059, "learning_rate": 0.00018429228998849252, "loss": 0.2869, "step": 315570 }, { "epoch": 90.78826237054085, "grad_norm": 0.9150233268737793, "learning_rate": 0.00018423475258918297, "loss": 0.2903, "step": 315580 }, { "epoch": 90.79113924050633, "grad_norm": 1.2472552061080933, "learning_rate": 0.00018417721518987343, "loss": 0.3093, "step": 315590 }, { "epoch": 90.7940161104718, "grad_norm": 2.074155807495117, "learning_rate": 0.00018411967779056389, "loss": 0.2691, "step": 315600 }, { "epoch": 90.79689298043728, "grad_norm": 0.7666621208190918, "learning_rate": 0.00018406214039125431, "loss": 0.3437, "step": 315610 }, { "epoch": 90.79976985040277, "grad_norm": 0.6840755343437195, "learning_rate": 0.00018400460299194477, "loss": 0.3189, "step": 315620 }, { "epoch": 90.80264672036824, "grad_norm": 1.1571625471115112, "learning_rate": 0.00018394706559263522, "loss": 0.2819, "step": 315630 }, { "epoch": 90.80552359033372, "grad_norm": 1.3197474479675293, "learning_rate": 0.00018388952819332565, "loss": 0.2652, "step": 315640 }, { "epoch": 90.8084004602992, "grad_norm": 0.6722560524940491, "learning_rate": 0.0001838319907940161, "loss": 0.3348, "step": 315650 }, { "epoch": 90.81127733026467, "grad_norm": 0.8746916651725769, "learning_rate": 0.00018377445339470656, "loss": 0.2643, "step": 315660 }, { "epoch": 90.81415420023015, "grad_norm": 1.0214954614639282, "learning_rate": 0.000183716915995397, "loss": 0.2365, "step": 315670 }, { "epoch": 90.81703107019563, "grad_norm": 0.643065869808197, "learning_rate": 0.00018365937859608748, "loss": 0.282, "step": 315680 }, { "epoch": 90.8199079401611, "grad_norm": 1.0301710367202759, "learning_rate": 0.00018360184119677793, "loss": 0.2963, "step": 315690 }, { "epoch": 90.82278481012658, "grad_norm": 1.1150988340377808, "learning_rate": 0.00018354430379746836, "loss": 0.2328, "step": 315700 }, { "epoch": 90.82566168009205, "grad_norm": 1.52657151222229, "learning_rate": 0.00018348676639815881, "loss": 0.2674, "step": 315710 }, { "epoch": 90.82853855005754, "grad_norm": 2.368781805038452, "learning_rate": 0.00018342922899884927, "loss": 0.276, "step": 315720 }, { "epoch": 90.83141542002302, "grad_norm": 1.371485948562622, "learning_rate": 0.0001833716915995397, "loss": 0.3688, "step": 315730 }, { "epoch": 90.8342922899885, "grad_norm": 1.0195245742797852, "learning_rate": 0.00018331415420023015, "loss": 0.2995, "step": 315740 }, { "epoch": 90.83716915995397, "grad_norm": 1.1627771854400635, "learning_rate": 0.00018325661680092058, "loss": 0.3098, "step": 315750 }, { "epoch": 90.84004602991945, "grad_norm": 2.1890594959259033, "learning_rate": 0.00018319907940161104, "loss": 0.2862, "step": 315760 }, { "epoch": 90.84292289988493, "grad_norm": 2.5781989097595215, "learning_rate": 0.0001831415420023015, "loss": 0.3442, "step": 315770 }, { "epoch": 90.8457997698504, "grad_norm": 0.7221502065658569, "learning_rate": 0.00018308400460299195, "loss": 0.2559, "step": 315780 }, { "epoch": 90.84867663981588, "grad_norm": 1.1722532510757446, "learning_rate": 0.0001830264672036824, "loss": 0.3651, "step": 315790 }, { "epoch": 90.85155350978135, "grad_norm": 0.9965834021568298, "learning_rate": 0.00018296892980437286, "loss": 0.2288, "step": 315800 }, { "epoch": 90.85443037974683, "grad_norm": 1.6233340501785278, "learning_rate": 0.0001829113924050633, "loss": 0.3526, "step": 315810 }, { "epoch": 90.8573072497123, "grad_norm": 1.4054673910140991, "learning_rate": 0.00018285385500575374, "loss": 0.301, "step": 315820 }, { "epoch": 90.8601841196778, "grad_norm": 0.8630613088607788, "learning_rate": 0.0001827963176064442, "loss": 0.2653, "step": 315830 }, { "epoch": 90.86306098964327, "grad_norm": 1.3560681343078613, "learning_rate": 0.00018273878020713463, "loss": 0.3496, "step": 315840 }, { "epoch": 90.86593785960875, "grad_norm": 0.8360317349433899, "learning_rate": 0.00018268124280782508, "loss": 0.2609, "step": 315850 }, { "epoch": 90.86881472957423, "grad_norm": 1.8446314334869385, "learning_rate": 0.00018262370540851554, "loss": 0.3284, "step": 315860 }, { "epoch": 90.8716915995397, "grad_norm": 1.065898060798645, "learning_rate": 0.00018256616800920597, "loss": 0.3079, "step": 315870 }, { "epoch": 90.87456846950518, "grad_norm": 0.9648934602737427, "learning_rate": 0.00018250863060989645, "loss": 0.3433, "step": 315880 }, { "epoch": 90.87744533947065, "grad_norm": 0.5952966809272766, "learning_rate": 0.0001824510932105869, "loss": 0.2302, "step": 315890 }, { "epoch": 90.88032220943613, "grad_norm": 1.1310361623764038, "learning_rate": 0.00018239355581127733, "loss": 0.2904, "step": 315900 }, { "epoch": 90.8831990794016, "grad_norm": 1.1485378742218018, "learning_rate": 0.0001823360184119678, "loss": 0.33, "step": 315910 }, { "epoch": 90.88607594936708, "grad_norm": 1.5886220932006836, "learning_rate": 0.00018227848101265824, "loss": 0.3455, "step": 315920 }, { "epoch": 90.88895281933257, "grad_norm": 0.8638713359832764, "learning_rate": 0.00018222094361334867, "loss": 0.2956, "step": 315930 }, { "epoch": 90.89182968929805, "grad_norm": 1.7667428255081177, "learning_rate": 0.00018216340621403913, "loss": 0.3177, "step": 315940 }, { "epoch": 90.89470655926353, "grad_norm": 1.7712256908416748, "learning_rate": 0.00018210586881472956, "loss": 0.2998, "step": 315950 }, { "epoch": 90.897583429229, "grad_norm": 1.7962170839309692, "learning_rate": 0.00018204833141542, "loss": 0.3302, "step": 315960 }, { "epoch": 90.90046029919448, "grad_norm": 0.8329983949661255, "learning_rate": 0.0001819907940161105, "loss": 0.2427, "step": 315970 }, { "epoch": 90.90333716915995, "grad_norm": 0.9282257556915283, "learning_rate": 0.00018193325661680092, "loss": 0.3037, "step": 315980 }, { "epoch": 90.90621403912543, "grad_norm": 0.7294615507125854, "learning_rate": 0.00018187571921749138, "loss": 0.3166, "step": 315990 }, { "epoch": 90.9090909090909, "grad_norm": 1.0776917934417725, "learning_rate": 0.00018181818181818183, "loss": 0.3711, "step": 316000 }, { "epoch": 90.91196777905638, "grad_norm": 1.2463138103485107, "learning_rate": 0.00018176064441887226, "loss": 0.2926, "step": 316010 }, { "epoch": 90.91484464902186, "grad_norm": 1.6876304149627686, "learning_rate": 0.00018170310701956272, "loss": 0.3072, "step": 316020 }, { "epoch": 90.91772151898734, "grad_norm": 0.7751162052154541, "learning_rate": 0.00018164556962025317, "loss": 0.2555, "step": 316030 }, { "epoch": 90.92059838895283, "grad_norm": 0.7529869675636292, "learning_rate": 0.0001815880322209436, "loss": 0.2573, "step": 316040 }, { "epoch": 90.9234752589183, "grad_norm": 1.0905771255493164, "learning_rate": 0.00018153049482163406, "loss": 0.3416, "step": 316050 }, { "epoch": 90.92635212888378, "grad_norm": 2.0517544746398926, "learning_rate": 0.0001814729574223245, "loss": 0.375, "step": 316060 }, { "epoch": 90.92922899884925, "grad_norm": 1.865374207496643, "learning_rate": 0.00018141542002301497, "loss": 0.3528, "step": 316070 }, { "epoch": 90.93210586881473, "grad_norm": 0.8047626614570618, "learning_rate": 0.00018135788262370542, "loss": 0.3146, "step": 316080 }, { "epoch": 90.9349827387802, "grad_norm": 1.7956963777542114, "learning_rate": 0.00018130034522439588, "loss": 0.3005, "step": 316090 }, { "epoch": 90.93785960874568, "grad_norm": 1.0897828340530396, "learning_rate": 0.0001812428078250863, "loss": 0.3608, "step": 316100 }, { "epoch": 90.94073647871116, "grad_norm": 1.8195037841796875, "learning_rate": 0.00018118527042577676, "loss": 0.3145, "step": 316110 }, { "epoch": 90.94361334867664, "grad_norm": 0.8844497203826904, "learning_rate": 0.00018112773302646722, "loss": 0.2467, "step": 316120 }, { "epoch": 90.94649021864211, "grad_norm": 1.279603362083435, "learning_rate": 0.00018107019562715765, "loss": 0.3738, "step": 316130 }, { "epoch": 90.9493670886076, "grad_norm": 1.1621968746185303, "learning_rate": 0.0001810126582278481, "loss": 0.2922, "step": 316140 }, { "epoch": 90.95224395857308, "grad_norm": 0.7893831729888916, "learning_rate": 0.00018095512082853853, "loss": 0.2862, "step": 316150 }, { "epoch": 90.95512082853855, "grad_norm": 1.2506521940231323, "learning_rate": 0.000180897583429229, "loss": 0.2985, "step": 316160 }, { "epoch": 90.95799769850403, "grad_norm": 1.1054044961929321, "learning_rate": 0.00018084004602991947, "loss": 0.2745, "step": 316170 }, { "epoch": 90.9608745684695, "grad_norm": 0.7203617691993713, "learning_rate": 0.0001807825086306099, "loss": 0.3278, "step": 316180 }, { "epoch": 90.96375143843498, "grad_norm": 1.0550795793533325, "learning_rate": 0.00018072497123130035, "loss": 0.2446, "step": 316190 }, { "epoch": 90.96662830840046, "grad_norm": 0.955539345741272, "learning_rate": 0.0001806674338319908, "loss": 0.2852, "step": 316200 }, { "epoch": 90.96950517836594, "grad_norm": 1.0820337533950806, "learning_rate": 0.00018060989643268124, "loss": 0.2421, "step": 316210 }, { "epoch": 90.97238204833141, "grad_norm": 1.0451725721359253, "learning_rate": 0.0001805523590333717, "loss": 0.2858, "step": 316220 }, { "epoch": 90.97525891829689, "grad_norm": 0.6569662690162659, "learning_rate": 0.00018049482163406215, "loss": 0.2647, "step": 316230 }, { "epoch": 90.97813578826236, "grad_norm": 0.9720225930213928, "learning_rate": 0.00018043728423475258, "loss": 0.3114, "step": 316240 }, { "epoch": 90.98101265822785, "grad_norm": 1.4990088939666748, "learning_rate": 0.00018037974683544303, "loss": 0.3357, "step": 316250 }, { "epoch": 90.98388952819333, "grad_norm": 1.5513948202133179, "learning_rate": 0.0001803222094361335, "loss": 0.288, "step": 316260 }, { "epoch": 90.9867663981588, "grad_norm": 0.9586016535758972, "learning_rate": 0.00018026467203682394, "loss": 0.2902, "step": 316270 }, { "epoch": 90.98964326812428, "grad_norm": 1.39626944065094, "learning_rate": 0.0001802071346375144, "loss": 0.2649, "step": 316280 }, { "epoch": 90.99252013808976, "grad_norm": 1.2494069337844849, "learning_rate": 0.00018014959723820485, "loss": 0.2555, "step": 316290 }, { "epoch": 90.99539700805524, "grad_norm": 0.901350200176239, "learning_rate": 0.00018009205983889528, "loss": 0.253, "step": 316300 }, { "epoch": 90.99827387802071, "grad_norm": 0.856986939907074, "learning_rate": 0.00018003452243958574, "loss": 0.259, "step": 316310 }, { "epoch": 91.00115074798619, "grad_norm": 1.4394625425338745, "learning_rate": 0.0001799769850402762, "loss": 0.2415, "step": 316320 }, { "epoch": 91.00402761795166, "grad_norm": 1.2996890544891357, "learning_rate": 0.00017991944764096662, "loss": 0.2291, "step": 316330 }, { "epoch": 91.00690448791714, "grad_norm": 0.7676728963851929, "learning_rate": 0.00017986191024165708, "loss": 0.2709, "step": 316340 }, { "epoch": 91.00978135788263, "grad_norm": 1.15982186794281, "learning_rate": 0.0001798043728423475, "loss": 0.3424, "step": 316350 }, { "epoch": 91.0126582278481, "grad_norm": 1.7055352926254272, "learning_rate": 0.00017974683544303796, "loss": 0.2693, "step": 316360 }, { "epoch": 91.01553509781358, "grad_norm": 1.2119519710540771, "learning_rate": 0.00017968929804372844, "loss": 0.2999, "step": 316370 }, { "epoch": 91.01841196777906, "grad_norm": 1.2201027870178223, "learning_rate": 0.00017963176064441887, "loss": 0.3063, "step": 316380 }, { "epoch": 91.02128883774454, "grad_norm": 2.4775307178497314, "learning_rate": 0.00017957422324510933, "loss": 0.2613, "step": 316390 }, { "epoch": 91.02416570771001, "grad_norm": 1.0988436937332153, "learning_rate": 0.00017951668584579978, "loss": 0.2758, "step": 316400 }, { "epoch": 91.02704257767549, "grad_norm": 1.340830683708191, "learning_rate": 0.0001794591484464902, "loss": 0.2248, "step": 316410 }, { "epoch": 91.02991944764096, "grad_norm": 0.6922798156738281, "learning_rate": 0.00017940161104718067, "loss": 0.2796, "step": 316420 }, { "epoch": 91.03279631760644, "grad_norm": 1.7565196752548218, "learning_rate": 0.00017934407364787112, "loss": 0.2407, "step": 316430 }, { "epoch": 91.03567318757192, "grad_norm": 1.3508636951446533, "learning_rate": 0.00017928653624856155, "loss": 0.2861, "step": 316440 }, { "epoch": 91.03855005753739, "grad_norm": 0.9860749840736389, "learning_rate": 0.000179228998849252, "loss": 0.3009, "step": 316450 }, { "epoch": 91.04142692750288, "grad_norm": 0.87990802526474, "learning_rate": 0.0001791714614499425, "loss": 0.2807, "step": 316460 }, { "epoch": 91.04430379746836, "grad_norm": 0.879862904548645, "learning_rate": 0.00017911392405063292, "loss": 0.2459, "step": 316470 }, { "epoch": 91.04718066743384, "grad_norm": 1.2925095558166504, "learning_rate": 0.00017905638665132337, "loss": 0.2885, "step": 316480 }, { "epoch": 91.05005753739931, "grad_norm": 0.8029193878173828, "learning_rate": 0.00017899884925201383, "loss": 0.2124, "step": 316490 }, { "epoch": 91.05293440736479, "grad_norm": 1.2090460062026978, "learning_rate": 0.00017894131185270426, "loss": 0.2593, "step": 316500 }, { "epoch": 91.05581127733026, "grad_norm": 1.3951736688613892, "learning_rate": 0.0001788837744533947, "loss": 0.2995, "step": 316510 }, { "epoch": 91.05868814729574, "grad_norm": 1.1317224502563477, "learning_rate": 0.00017882623705408517, "loss": 0.2352, "step": 316520 }, { "epoch": 91.06156501726122, "grad_norm": 1.0890854597091675, "learning_rate": 0.0001787686996547756, "loss": 0.3038, "step": 316530 }, { "epoch": 91.06444188722669, "grad_norm": 1.1991301774978638, "learning_rate": 0.00017871116225546605, "loss": 0.2931, "step": 316540 }, { "epoch": 91.06731875719217, "grad_norm": 1.467163324356079, "learning_rate": 0.00017865362485615648, "loss": 0.2531, "step": 316550 }, { "epoch": 91.07019562715766, "grad_norm": 1.066137671470642, "learning_rate": 0.00017859608745684696, "loss": 0.3487, "step": 316560 }, { "epoch": 91.07307249712314, "grad_norm": 1.2872411012649536, "learning_rate": 0.00017853855005753742, "loss": 0.3018, "step": 316570 }, { "epoch": 91.07594936708861, "grad_norm": 1.2631475925445557, "learning_rate": 0.00017848101265822785, "loss": 0.2693, "step": 316580 }, { "epoch": 91.07882623705409, "grad_norm": 1.3367029428482056, "learning_rate": 0.0001784234752589183, "loss": 0.3135, "step": 316590 }, { "epoch": 91.08170310701956, "grad_norm": 1.0537382364273071, "learning_rate": 0.00017836593785960876, "loss": 0.2656, "step": 316600 }, { "epoch": 91.08457997698504, "grad_norm": 1.023133635520935, "learning_rate": 0.00017830840046029919, "loss": 0.2713, "step": 316610 }, { "epoch": 91.08745684695052, "grad_norm": 1.1007492542266846, "learning_rate": 0.00017825086306098964, "loss": 0.2738, "step": 316620 }, { "epoch": 91.09033371691599, "grad_norm": 1.1940784454345703, "learning_rate": 0.0001781933256616801, "loss": 0.2599, "step": 316630 }, { "epoch": 91.09321058688147, "grad_norm": 1.4159201383590698, "learning_rate": 0.00017813578826237053, "loss": 0.2977, "step": 316640 }, { "epoch": 91.09608745684694, "grad_norm": 1.7435966730117798, "learning_rate": 0.00017807825086306098, "loss": 0.2884, "step": 316650 }, { "epoch": 91.09896432681242, "grad_norm": 0.9147489070892334, "learning_rate": 0.00017802071346375146, "loss": 0.2658, "step": 316660 }, { "epoch": 91.10184119677791, "grad_norm": 1.3147845268249512, "learning_rate": 0.0001779631760644419, "loss": 0.2694, "step": 316670 }, { "epoch": 91.10471806674339, "grad_norm": 1.1274893283843994, "learning_rate": 0.00017790563866513235, "loss": 0.2324, "step": 316680 }, { "epoch": 91.10759493670886, "grad_norm": 1.8208138942718506, "learning_rate": 0.0001778481012658228, "loss": 0.3419, "step": 316690 }, { "epoch": 91.11047180667434, "grad_norm": 0.9535107016563416, "learning_rate": 0.00017779056386651323, "loss": 0.2472, "step": 316700 }, { "epoch": 91.11334867663982, "grad_norm": 1.4945769309997559, "learning_rate": 0.0001777330264672037, "loss": 0.3129, "step": 316710 }, { "epoch": 91.11622554660529, "grad_norm": 1.3172975778579712, "learning_rate": 0.00017767548906789414, "loss": 0.1984, "step": 316720 }, { "epoch": 91.11910241657077, "grad_norm": 1.4246366024017334, "learning_rate": 0.00017761795166858457, "loss": 0.2835, "step": 316730 }, { "epoch": 91.12197928653625, "grad_norm": 0.6461691856384277, "learning_rate": 0.00017756041426927503, "loss": 0.2776, "step": 316740 }, { "epoch": 91.12485615650172, "grad_norm": 1.0461355447769165, "learning_rate": 0.00017750287686996545, "loss": 0.3313, "step": 316750 }, { "epoch": 91.1277330264672, "grad_norm": 2.145688056945801, "learning_rate": 0.00017744533947065594, "loss": 0.2475, "step": 316760 }, { "epoch": 91.13060989643269, "grad_norm": 0.8979711532592773, "learning_rate": 0.0001773878020713464, "loss": 0.2443, "step": 316770 }, { "epoch": 91.13348676639816, "grad_norm": 1.1188702583312988, "learning_rate": 0.00017733026467203682, "loss": 0.3162, "step": 316780 }, { "epoch": 91.13636363636364, "grad_norm": 1.2144858837127686, "learning_rate": 0.00017727272727272728, "loss": 0.3422, "step": 316790 }, { "epoch": 91.13924050632912, "grad_norm": 0.8596500158309937, "learning_rate": 0.00017721518987341773, "loss": 0.2832, "step": 316800 }, { "epoch": 91.14211737629459, "grad_norm": 1.0235817432403564, "learning_rate": 0.00017715765247410816, "loss": 0.2786, "step": 316810 }, { "epoch": 91.14499424626007, "grad_norm": 0.8209638595581055, "learning_rate": 0.00017710011507479862, "loss": 0.3372, "step": 316820 }, { "epoch": 91.14787111622555, "grad_norm": 1.3347123861312866, "learning_rate": 0.00017704257767548907, "loss": 0.2901, "step": 316830 }, { "epoch": 91.15074798619102, "grad_norm": 1.159994125366211, "learning_rate": 0.0001769850402761795, "loss": 0.2493, "step": 316840 }, { "epoch": 91.1536248561565, "grad_norm": 0.8497760891914368, "learning_rate": 0.00017692750287686996, "loss": 0.2419, "step": 316850 }, { "epoch": 91.15650172612197, "grad_norm": 1.2591030597686768, "learning_rate": 0.00017686996547756044, "loss": 0.3096, "step": 316860 }, { "epoch": 91.15937859608745, "grad_norm": 0.9871047735214233, "learning_rate": 0.00017681242807825087, "loss": 0.2809, "step": 316870 }, { "epoch": 91.16225546605294, "grad_norm": 0.7727714776992798, "learning_rate": 0.00017675489067894132, "loss": 0.2487, "step": 316880 }, { "epoch": 91.16513233601842, "grad_norm": 1.6745622158050537, "learning_rate": 0.00017669735327963178, "loss": 0.2835, "step": 316890 }, { "epoch": 91.16800920598389, "grad_norm": 1.3319593667984009, "learning_rate": 0.0001766398158803222, "loss": 0.2981, "step": 316900 }, { "epoch": 91.17088607594937, "grad_norm": 0.80308598279953, "learning_rate": 0.00017658227848101266, "loss": 0.3141, "step": 316910 }, { "epoch": 91.17376294591485, "grad_norm": 1.0419045686721802, "learning_rate": 0.00017652474108170312, "loss": 0.3011, "step": 316920 }, { "epoch": 91.17663981588032, "grad_norm": 1.3563541173934937, "learning_rate": 0.00017646720368239355, "loss": 0.2493, "step": 316930 }, { "epoch": 91.1795166858458, "grad_norm": 1.0905206203460693, "learning_rate": 0.000176409666283084, "loss": 0.2402, "step": 316940 }, { "epoch": 91.18239355581127, "grad_norm": 1.1793993711471558, "learning_rate": 0.00017635212888377448, "loss": 0.3308, "step": 316950 }, { "epoch": 91.18527042577675, "grad_norm": 1.8701026439666748, "learning_rate": 0.0001762945914844649, "loss": 0.3308, "step": 316960 }, { "epoch": 91.18814729574223, "grad_norm": 1.2955836057662964, "learning_rate": 0.00017623705408515537, "loss": 0.2918, "step": 316970 }, { "epoch": 91.19102416570772, "grad_norm": 0.7222246527671814, "learning_rate": 0.0001761795166858458, "loss": 0.3446, "step": 316980 }, { "epoch": 91.19390103567319, "grad_norm": 0.9777657985687256, "learning_rate": 0.00017612197928653625, "loss": 0.2594, "step": 316990 }, { "epoch": 91.19677790563867, "grad_norm": 0.9503878355026245, "learning_rate": 0.0001760644418872267, "loss": 0.2361, "step": 317000 }, { "epoch": 91.19965477560415, "grad_norm": 1.462186574935913, "learning_rate": 0.00017600690448791714, "loss": 0.2838, "step": 317010 }, { "epoch": 91.20253164556962, "grad_norm": 0.9030584692955017, "learning_rate": 0.0001759493670886076, "loss": 0.2435, "step": 317020 }, { "epoch": 91.2054085155351, "grad_norm": 0.6250969767570496, "learning_rate": 0.00017589182968929805, "loss": 0.2767, "step": 317030 }, { "epoch": 91.20828538550057, "grad_norm": 2.01629900932312, "learning_rate": 0.00017583429228998847, "loss": 0.2775, "step": 317040 }, { "epoch": 91.21116225546605, "grad_norm": 1.4783540964126587, "learning_rate": 0.00017577675489067896, "loss": 0.2837, "step": 317050 }, { "epoch": 91.21403912543153, "grad_norm": 1.293157935142517, "learning_rate": 0.0001757192174913694, "loss": 0.3096, "step": 317060 }, { "epoch": 91.216915995397, "grad_norm": 0.9273175597190857, "learning_rate": 0.00017566168009205984, "loss": 0.2902, "step": 317070 }, { "epoch": 91.21979286536248, "grad_norm": 1.6485097408294678, "learning_rate": 0.0001756041426927503, "loss": 0.3025, "step": 317080 }, { "epoch": 91.22266973532797, "grad_norm": 1.3885303735733032, "learning_rate": 0.00017554660529344075, "loss": 0.253, "step": 317090 }, { "epoch": 91.22554660529345, "grad_norm": 1.5821571350097656, "learning_rate": 0.00017548906789413118, "loss": 0.309, "step": 317100 }, { "epoch": 91.22842347525892, "grad_norm": 0.8358222842216492, "learning_rate": 0.00017543153049482164, "loss": 0.3256, "step": 317110 }, { "epoch": 91.2313003452244, "grad_norm": 1.7651880979537964, "learning_rate": 0.0001753739930955121, "loss": 0.2757, "step": 317120 }, { "epoch": 91.23417721518987, "grad_norm": 1.4674450159072876, "learning_rate": 0.00017531645569620252, "loss": 0.2958, "step": 317130 }, { "epoch": 91.23705408515535, "grad_norm": 1.366023302078247, "learning_rate": 0.00017525891829689298, "loss": 0.2534, "step": 317140 }, { "epoch": 91.23993095512083, "grad_norm": 2.0621867179870605, "learning_rate": 0.00017520138089758346, "loss": 0.2537, "step": 317150 }, { "epoch": 91.2428078250863, "grad_norm": 0.6672325134277344, "learning_rate": 0.0001751438434982739, "loss": 0.3315, "step": 317160 }, { "epoch": 91.24568469505178, "grad_norm": 1.8381409645080566, "learning_rate": 0.00017508630609896434, "loss": 0.3166, "step": 317170 }, { "epoch": 91.24856156501725, "grad_norm": 0.7499780654907227, "learning_rate": 0.00017502876869965477, "loss": 0.2935, "step": 317180 }, { "epoch": 91.25143843498275, "grad_norm": 1.299552321434021, "learning_rate": 0.00017497123130034523, "loss": 0.3341, "step": 317190 }, { "epoch": 91.25431530494822, "grad_norm": 0.985277533531189, "learning_rate": 0.00017491369390103568, "loss": 0.2922, "step": 317200 }, { "epoch": 91.2571921749137, "grad_norm": 0.9836694002151489, "learning_rate": 0.0001748561565017261, "loss": 0.2789, "step": 317210 }, { "epoch": 91.26006904487917, "grad_norm": 2.1407084465026855, "learning_rate": 0.00017479861910241657, "loss": 0.3502, "step": 317220 }, { "epoch": 91.26294591484465, "grad_norm": 1.1834725141525269, "learning_rate": 0.00017474108170310702, "loss": 0.3099, "step": 317230 }, { "epoch": 91.26582278481013, "grad_norm": 1.065281867980957, "learning_rate": 0.00017468354430379745, "loss": 0.2585, "step": 317240 }, { "epoch": 91.2686996547756, "grad_norm": 1.8148114681243896, "learning_rate": 0.00017462600690448793, "loss": 0.2687, "step": 317250 }, { "epoch": 91.27157652474108, "grad_norm": 0.7881969809532166, "learning_rate": 0.0001745684695051784, "loss": 0.2783, "step": 317260 }, { "epoch": 91.27445339470655, "grad_norm": 0.8940742015838623, "learning_rate": 0.00017451093210586882, "loss": 0.2319, "step": 317270 }, { "epoch": 91.27733026467203, "grad_norm": 1.2221202850341797, "learning_rate": 0.00017445339470655927, "loss": 0.3775, "step": 317280 }, { "epoch": 91.28020713463752, "grad_norm": 2.023585081100464, "learning_rate": 0.00017439585730724973, "loss": 0.3166, "step": 317290 }, { "epoch": 91.283084004603, "grad_norm": 2.1423885822296143, "learning_rate": 0.00017433831990794016, "loss": 0.2707, "step": 317300 }, { "epoch": 91.28596087456847, "grad_norm": 0.7996683716773987, "learning_rate": 0.0001742807825086306, "loss": 0.2626, "step": 317310 }, { "epoch": 91.28883774453395, "grad_norm": 0.8186953663825989, "learning_rate": 0.00017422324510932107, "loss": 0.2381, "step": 317320 }, { "epoch": 91.29171461449943, "grad_norm": 0.7910247445106506, "learning_rate": 0.0001741657077100115, "loss": 0.2244, "step": 317330 }, { "epoch": 91.2945914844649, "grad_norm": 0.7307549118995667, "learning_rate": 0.00017410817031070195, "loss": 0.3176, "step": 317340 }, { "epoch": 91.29746835443038, "grad_norm": 1.2657980918884277, "learning_rate": 0.00017405063291139243, "loss": 0.2391, "step": 317350 }, { "epoch": 91.30034522439585, "grad_norm": 2.078315258026123, "learning_rate": 0.00017399309551208286, "loss": 0.2721, "step": 317360 }, { "epoch": 91.30322209436133, "grad_norm": 1.0330148935317993, "learning_rate": 0.00017393555811277332, "loss": 0.2649, "step": 317370 }, { "epoch": 91.30609896432681, "grad_norm": 1.380974531173706, "learning_rate": 0.00017387802071346375, "loss": 0.2336, "step": 317380 }, { "epoch": 91.30897583429228, "grad_norm": 3.700681686401367, "learning_rate": 0.0001738204833141542, "loss": 0.3174, "step": 317390 }, { "epoch": 91.31185270425777, "grad_norm": 0.9251365065574646, "learning_rate": 0.00017376294591484466, "loss": 0.3021, "step": 317400 }, { "epoch": 91.31472957422325, "grad_norm": 1.5120404958724976, "learning_rate": 0.00017370540851553508, "loss": 0.2869, "step": 317410 }, { "epoch": 91.31760644418873, "grad_norm": 1.5387718677520752, "learning_rate": 0.00017364787111622554, "loss": 0.221, "step": 317420 }, { "epoch": 91.3204833141542, "grad_norm": 1.2636500597000122, "learning_rate": 0.000173590333716916, "loss": 0.285, "step": 317430 }, { "epoch": 91.32336018411968, "grad_norm": 1.5684421062469482, "learning_rate": 0.00017353279631760642, "loss": 0.2706, "step": 317440 }, { "epoch": 91.32623705408515, "grad_norm": 1.3963826894760132, "learning_rate": 0.0001734752589182969, "loss": 0.2862, "step": 317450 }, { "epoch": 91.32911392405063, "grad_norm": 1.300018548965454, "learning_rate": 0.00017341772151898736, "loss": 0.2633, "step": 317460 }, { "epoch": 91.33199079401611, "grad_norm": 1.6802984476089478, "learning_rate": 0.0001733601841196778, "loss": 0.3029, "step": 317470 }, { "epoch": 91.33486766398158, "grad_norm": 1.0102336406707764, "learning_rate": 0.00017330264672036825, "loss": 0.3596, "step": 317480 }, { "epoch": 91.33774453394706, "grad_norm": 1.137357473373413, "learning_rate": 0.0001732451093210587, "loss": 0.2947, "step": 317490 }, { "epoch": 91.34062140391255, "grad_norm": 1.7430078983306885, "learning_rate": 0.00017318757192174913, "loss": 0.3316, "step": 317500 }, { "epoch": 91.34349827387803, "grad_norm": 1.2089672088623047, "learning_rate": 0.00017313003452243959, "loss": 0.2445, "step": 317510 }, { "epoch": 91.3463751438435, "grad_norm": 1.2022180557250977, "learning_rate": 0.00017307249712313004, "loss": 0.3029, "step": 317520 }, { "epoch": 91.34925201380898, "grad_norm": 1.139556646347046, "learning_rate": 0.00017301495972382047, "loss": 0.289, "step": 317530 }, { "epoch": 91.35212888377445, "grad_norm": 1.4142745733261108, "learning_rate": 0.00017295742232451095, "loss": 0.3199, "step": 317540 }, { "epoch": 91.35500575373993, "grad_norm": 1.5369977951049805, "learning_rate": 0.0001728998849252014, "loss": 0.4184, "step": 317550 }, { "epoch": 91.35788262370541, "grad_norm": 1.2510154247283936, "learning_rate": 0.00017284234752589184, "loss": 0.2082, "step": 317560 }, { "epoch": 91.36075949367088, "grad_norm": 0.957815945148468, "learning_rate": 0.0001727848101265823, "loss": 0.2657, "step": 317570 }, { "epoch": 91.36363636363636, "grad_norm": 1.3451664447784424, "learning_rate": 0.00017272727272727272, "loss": 0.2751, "step": 317580 }, { "epoch": 91.36651323360184, "grad_norm": 2.1267518997192383, "learning_rate": 0.00017266973532796318, "loss": 0.3694, "step": 317590 }, { "epoch": 91.36939010356731, "grad_norm": 1.0289748907089233, "learning_rate": 0.00017261219792865363, "loss": 0.2841, "step": 317600 }, { "epoch": 91.3722669735328, "grad_norm": 0.9581921696662903, "learning_rate": 0.00017255466052934406, "loss": 0.3538, "step": 317610 }, { "epoch": 91.37514384349828, "grad_norm": 0.9977124929428101, "learning_rate": 0.00017249712313003451, "loss": 0.2817, "step": 317620 }, { "epoch": 91.37802071346375, "grad_norm": 1.095238447189331, "learning_rate": 0.00017243958573072497, "loss": 0.2826, "step": 317630 }, { "epoch": 91.38089758342923, "grad_norm": 1.612804651260376, "learning_rate": 0.00017238204833141543, "loss": 0.2615, "step": 317640 }, { "epoch": 91.38377445339471, "grad_norm": 1.225341558456421, "learning_rate": 0.00017232451093210588, "loss": 0.3136, "step": 317650 }, { "epoch": 91.38665132336018, "grad_norm": 0.8780101537704468, "learning_rate": 0.00017226697353279634, "loss": 0.3178, "step": 317660 }, { "epoch": 91.38952819332566, "grad_norm": 0.8970305919647217, "learning_rate": 0.00017220943613348676, "loss": 0.2619, "step": 317670 }, { "epoch": 91.39240506329114, "grad_norm": 0.9615393877029419, "learning_rate": 0.00017215189873417722, "loss": 0.2512, "step": 317680 }, { "epoch": 91.39528193325661, "grad_norm": 2.0366480350494385, "learning_rate": 0.00017209436133486768, "loss": 0.342, "step": 317690 }, { "epoch": 91.39815880322209, "grad_norm": 1.0860297679901123, "learning_rate": 0.0001720368239355581, "loss": 0.3194, "step": 317700 }, { "epoch": 91.40103567318758, "grad_norm": 1.31119704246521, "learning_rate": 0.00017197928653624856, "loss": 0.2391, "step": 317710 }, { "epoch": 91.40391254315306, "grad_norm": 1.375841498374939, "learning_rate": 0.00017192174913693902, "loss": 0.2893, "step": 317720 }, { "epoch": 91.40678941311853, "grad_norm": 1.013758897781372, "learning_rate": 0.00017186421173762944, "loss": 0.3287, "step": 317730 }, { "epoch": 91.40966628308401, "grad_norm": 1.0889390707015991, "learning_rate": 0.00017180667433831993, "loss": 0.2862, "step": 317740 }, { "epoch": 91.41254315304948, "grad_norm": 1.2687472105026245, "learning_rate": 0.00017174913693901038, "loss": 0.2588, "step": 317750 }, { "epoch": 91.41542002301496, "grad_norm": 1.1571953296661377, "learning_rate": 0.0001716915995397008, "loss": 0.3249, "step": 317760 }, { "epoch": 91.41829689298044, "grad_norm": 1.1203811168670654, "learning_rate": 0.00017163406214039127, "loss": 0.2529, "step": 317770 }, { "epoch": 91.42117376294591, "grad_norm": 1.3775664567947388, "learning_rate": 0.0001715765247410817, "loss": 0.2064, "step": 317780 }, { "epoch": 91.42405063291139, "grad_norm": 0.9640159010887146, "learning_rate": 0.00017151898734177215, "loss": 0.399, "step": 317790 }, { "epoch": 91.42692750287686, "grad_norm": 0.7147108316421509, "learning_rate": 0.0001714614499424626, "loss": 0.2778, "step": 317800 }, { "epoch": 91.42980437284234, "grad_norm": 1.0686639547348022, "learning_rate": 0.00017140391254315303, "loss": 0.3227, "step": 317810 }, { "epoch": 91.43268124280783, "grad_norm": 1.447299838066101, "learning_rate": 0.0001713463751438435, "loss": 0.2489, "step": 317820 }, { "epoch": 91.43555811277331, "grad_norm": 0.7076513171195984, "learning_rate": 0.00017128883774453394, "loss": 0.3403, "step": 317830 }, { "epoch": 91.43843498273878, "grad_norm": 1.5767844915390015, "learning_rate": 0.0001712313003452244, "loss": 0.2549, "step": 317840 }, { "epoch": 91.44131185270426, "grad_norm": 0.9425255656242371, "learning_rate": 0.00017117376294591486, "loss": 0.2792, "step": 317850 }, { "epoch": 91.44418872266974, "grad_norm": 0.8376453518867493, "learning_rate": 0.0001711162255466053, "loss": 0.2824, "step": 317860 }, { "epoch": 91.44706559263521, "grad_norm": 1.1767351627349854, "learning_rate": 0.00017105868814729574, "loss": 0.3878, "step": 317870 }, { "epoch": 91.44994246260069, "grad_norm": 1.8115017414093018, "learning_rate": 0.0001710011507479862, "loss": 0.3698, "step": 317880 }, { "epoch": 91.45281933256616, "grad_norm": 2.600677251815796, "learning_rate": 0.00017094361334867665, "loss": 0.3808, "step": 317890 }, { "epoch": 91.45569620253164, "grad_norm": 0.8466233611106873, "learning_rate": 0.00017088607594936708, "loss": 0.2639, "step": 317900 }, { "epoch": 91.45857307249712, "grad_norm": 1.1079944372177124, "learning_rate": 0.00017082853855005753, "loss": 0.3226, "step": 317910 }, { "epoch": 91.46144994246261, "grad_norm": 1.1984434127807617, "learning_rate": 0.000170771001150748, "loss": 0.2636, "step": 317920 }, { "epoch": 91.46432681242808, "grad_norm": 2.0558340549468994, "learning_rate": 0.00017071346375143842, "loss": 0.2842, "step": 317930 }, { "epoch": 91.46720368239356, "grad_norm": 0.8939725160598755, "learning_rate": 0.0001706559263521289, "loss": 0.2755, "step": 317940 }, { "epoch": 91.47008055235904, "grad_norm": 1.7383849620819092, "learning_rate": 0.00017059838895281936, "loss": 0.3037, "step": 317950 }, { "epoch": 91.47295742232451, "grad_norm": 0.6429883241653442, "learning_rate": 0.00017054085155350978, "loss": 0.2727, "step": 317960 }, { "epoch": 91.47583429228999, "grad_norm": 1.2398489713668823, "learning_rate": 0.00017048331415420024, "loss": 0.2622, "step": 317970 }, { "epoch": 91.47871116225546, "grad_norm": 1.981907844543457, "learning_rate": 0.0001704257767548907, "loss": 0.3111, "step": 317980 }, { "epoch": 91.48158803222094, "grad_norm": 0.707892119884491, "learning_rate": 0.00017036823935558112, "loss": 0.2828, "step": 317990 }, { "epoch": 91.48446490218642, "grad_norm": 0.708026647567749, "learning_rate": 0.00017031070195627158, "loss": 0.24, "step": 318000 }, { "epoch": 91.4873417721519, "grad_norm": 1.4992512464523315, "learning_rate": 0.000170253164556962, "loss": 0.3147, "step": 318010 }, { "epoch": 91.49021864211737, "grad_norm": 1.1575337648391724, "learning_rate": 0.00017019562715765246, "loss": 0.2611, "step": 318020 }, { "epoch": 91.49309551208286, "grad_norm": 0.5991941094398499, "learning_rate": 0.00017013808975834295, "loss": 0.2526, "step": 318030 }, { "epoch": 91.49597238204834, "grad_norm": 1.6531026363372803, "learning_rate": 0.00017008055235903337, "loss": 0.2151, "step": 318040 }, { "epoch": 91.49884925201381, "grad_norm": 0.9678887128829956, "learning_rate": 0.00017002301495972383, "loss": 0.292, "step": 318050 }, { "epoch": 91.50172612197929, "grad_norm": 1.481549859046936, "learning_rate": 0.00016996547756041429, "loss": 0.3751, "step": 318060 }, { "epoch": 91.50460299194476, "grad_norm": 2.1256821155548096, "learning_rate": 0.00016990794016110471, "loss": 0.2969, "step": 318070 }, { "epoch": 91.50747986191024, "grad_norm": 1.1023420095443726, "learning_rate": 0.00016985040276179517, "loss": 0.31, "step": 318080 }, { "epoch": 91.51035673187572, "grad_norm": 0.9393850564956665, "learning_rate": 0.00016979286536248562, "loss": 0.291, "step": 318090 }, { "epoch": 91.5132336018412, "grad_norm": 1.10050630569458, "learning_rate": 0.00016973532796317605, "loss": 0.2853, "step": 318100 }, { "epoch": 91.51611047180667, "grad_norm": 1.5162904262542725, "learning_rate": 0.0001696777905638665, "loss": 0.3659, "step": 318110 }, { "epoch": 91.51898734177215, "grad_norm": 1.2457780838012695, "learning_rate": 0.00016962025316455696, "loss": 0.2979, "step": 318120 }, { "epoch": 91.52186421173764, "grad_norm": 1.6427464485168457, "learning_rate": 0.00016956271576524742, "loss": 0.3164, "step": 318130 }, { "epoch": 91.52474108170311, "grad_norm": 0.9831644296646118, "learning_rate": 0.00016950517836593788, "loss": 0.316, "step": 318140 }, { "epoch": 91.52761795166859, "grad_norm": 1.0586090087890625, "learning_rate": 0.00016944764096662833, "loss": 0.2846, "step": 318150 }, { "epoch": 91.53049482163406, "grad_norm": 1.137825846672058, "learning_rate": 0.00016939010356731876, "loss": 0.303, "step": 318160 }, { "epoch": 91.53337169159954, "grad_norm": 1.5620976686477661, "learning_rate": 0.00016933256616800921, "loss": 0.2385, "step": 318170 }, { "epoch": 91.53624856156502, "grad_norm": 0.8746042251586914, "learning_rate": 0.00016927502876869967, "loss": 0.26, "step": 318180 }, { "epoch": 91.5391254315305, "grad_norm": 0.6902541518211365, "learning_rate": 0.0001692174913693901, "loss": 0.2925, "step": 318190 }, { "epoch": 91.54200230149597, "grad_norm": 1.5325002670288086, "learning_rate": 0.00016915995397008055, "loss": 0.2946, "step": 318200 }, { "epoch": 91.54487917146145, "grad_norm": 1.1224311590194702, "learning_rate": 0.00016910241657077098, "loss": 0.2657, "step": 318210 }, { "epoch": 91.54775604142692, "grad_norm": 1.8210088014602661, "learning_rate": 0.00016904487917146144, "loss": 0.3244, "step": 318220 }, { "epoch": 91.5506329113924, "grad_norm": 0.9549788236618042, "learning_rate": 0.00016898734177215192, "loss": 0.2759, "step": 318230 }, { "epoch": 91.55350978135789, "grad_norm": 1.3458467721939087, "learning_rate": 0.00016892980437284235, "loss": 0.2845, "step": 318240 }, { "epoch": 91.55638665132336, "grad_norm": 0.8957902193069458, "learning_rate": 0.0001688722669735328, "loss": 0.2878, "step": 318250 }, { "epoch": 91.55926352128884, "grad_norm": 2.2781879901885986, "learning_rate": 0.00016881472957422326, "loss": 0.2622, "step": 318260 }, { "epoch": 91.56214039125432, "grad_norm": 1.1672203540802002, "learning_rate": 0.0001687571921749137, "loss": 0.237, "step": 318270 }, { "epoch": 91.5650172612198, "grad_norm": 1.2466708421707153, "learning_rate": 0.00016869965477560414, "loss": 0.2873, "step": 318280 }, { "epoch": 91.56789413118527, "grad_norm": 1.0899927616119385, "learning_rate": 0.0001686421173762946, "loss": 0.3465, "step": 318290 }, { "epoch": 91.57077100115075, "grad_norm": 0.9211914539337158, "learning_rate": 0.00016858457997698503, "loss": 0.3018, "step": 318300 }, { "epoch": 91.57364787111622, "grad_norm": 1.8094303607940674, "learning_rate": 0.00016852704257767548, "loss": 0.3113, "step": 318310 }, { "epoch": 91.5765247410817, "grad_norm": 1.474785566329956, "learning_rate": 0.00016846950517836594, "loss": 0.3209, "step": 318320 }, { "epoch": 91.57940161104717, "grad_norm": 1.5340603590011597, "learning_rate": 0.0001684119677790564, "loss": 0.2836, "step": 318330 }, { "epoch": 91.58227848101266, "grad_norm": 0.8539614081382751, "learning_rate": 0.00016835443037974685, "loss": 0.2569, "step": 318340 }, { "epoch": 91.58515535097814, "grad_norm": 0.9170923829078674, "learning_rate": 0.0001682968929804373, "loss": 0.2525, "step": 318350 }, { "epoch": 91.58803222094362, "grad_norm": 0.9472751617431641, "learning_rate": 0.00016823935558112773, "loss": 0.2781, "step": 318360 }, { "epoch": 91.5909090909091, "grad_norm": 1.0298206806182861, "learning_rate": 0.0001681818181818182, "loss": 0.3144, "step": 318370 }, { "epoch": 91.59378596087457, "grad_norm": 1.5233237743377686, "learning_rate": 0.00016812428078250864, "loss": 0.3025, "step": 318380 }, { "epoch": 91.59666283084005, "grad_norm": 0.9690896272659302, "learning_rate": 0.00016806674338319907, "loss": 0.2768, "step": 318390 }, { "epoch": 91.59953970080552, "grad_norm": 1.0039992332458496, "learning_rate": 0.00016800920598388953, "loss": 0.2797, "step": 318400 }, { "epoch": 91.602416570771, "grad_norm": 1.2729887962341309, "learning_rate": 0.00016795166858457996, "loss": 0.2291, "step": 318410 }, { "epoch": 91.60529344073647, "grad_norm": 1.3008614778518677, "learning_rate": 0.0001678941311852704, "loss": 0.3021, "step": 318420 }, { "epoch": 91.60817031070195, "grad_norm": 1.126615047454834, "learning_rate": 0.0001678365937859609, "loss": 0.3183, "step": 318430 }, { "epoch": 91.61104718066743, "grad_norm": 1.281734824180603, "learning_rate": 0.00016777905638665132, "loss": 0.2658, "step": 318440 }, { "epoch": 91.61392405063292, "grad_norm": 1.146165370941162, "learning_rate": 0.00016772151898734178, "loss": 0.355, "step": 318450 }, { "epoch": 91.6168009205984, "grad_norm": 1.103205919265747, "learning_rate": 0.00016766398158803223, "loss": 0.3187, "step": 318460 }, { "epoch": 91.61967779056387, "grad_norm": 1.363989233970642, "learning_rate": 0.00016760644418872266, "loss": 0.2832, "step": 318470 }, { "epoch": 91.62255466052935, "grad_norm": 1.3107870817184448, "learning_rate": 0.00016754890678941312, "loss": 0.2591, "step": 318480 }, { "epoch": 91.62543153049482, "grad_norm": 0.8971742987632751, "learning_rate": 0.00016749136939010357, "loss": 0.2782, "step": 318490 }, { "epoch": 91.6283084004603, "grad_norm": 1.3999029397964478, "learning_rate": 0.000167433831990794, "loss": 0.293, "step": 318500 }, { "epoch": 91.63118527042577, "grad_norm": 1.1337052583694458, "learning_rate": 0.00016737629459148446, "loss": 0.2334, "step": 318510 }, { "epoch": 91.63406214039125, "grad_norm": 1.899857759475708, "learning_rate": 0.00016731875719217494, "loss": 0.2748, "step": 318520 }, { "epoch": 91.63693901035673, "grad_norm": 0.9125436544418335, "learning_rate": 0.00016726121979286537, "loss": 0.2544, "step": 318530 }, { "epoch": 91.6398158803222, "grad_norm": 1.0658925771713257, "learning_rate": 0.00016720368239355582, "loss": 0.3328, "step": 318540 }, { "epoch": 91.6426927502877, "grad_norm": 0.7643482089042664, "learning_rate": 0.00016714614499424628, "loss": 0.2471, "step": 318550 }, { "epoch": 91.64556962025317, "grad_norm": 0.8575714826583862, "learning_rate": 0.0001670886075949367, "loss": 0.3059, "step": 318560 }, { "epoch": 91.64844649021865, "grad_norm": 0.6953721642494202, "learning_rate": 0.00016703107019562716, "loss": 0.2967, "step": 318570 }, { "epoch": 91.65132336018412, "grad_norm": 0.726392924785614, "learning_rate": 0.00016697353279631762, "loss": 0.2649, "step": 318580 }, { "epoch": 91.6542002301496, "grad_norm": 1.8920246362686157, "learning_rate": 0.00016691599539700805, "loss": 0.3614, "step": 318590 }, { "epoch": 91.65707710011507, "grad_norm": 1.618316888809204, "learning_rate": 0.0001668584579976985, "loss": 0.2996, "step": 318600 }, { "epoch": 91.65995397008055, "grad_norm": 1.9065712690353394, "learning_rate": 0.00016680092059838893, "loss": 0.3037, "step": 318610 }, { "epoch": 91.66283084004603, "grad_norm": 1.0703521966934204, "learning_rate": 0.00016674338319907941, "loss": 0.2988, "step": 318620 }, { "epoch": 91.6657077100115, "grad_norm": 0.7688801288604736, "learning_rate": 0.00016668584579976987, "loss": 0.2773, "step": 318630 }, { "epoch": 91.66858457997698, "grad_norm": 2.6053619384765625, "learning_rate": 0.0001666283084004603, "loss": 0.3522, "step": 318640 }, { "epoch": 91.67146144994246, "grad_norm": 0.7739759087562561, "learning_rate": 0.00016657077100115075, "loss": 0.3388, "step": 318650 }, { "epoch": 91.67433831990795, "grad_norm": 1.6829736232757568, "learning_rate": 0.0001665132336018412, "loss": 0.2686, "step": 318660 }, { "epoch": 91.67721518987342, "grad_norm": 1.3177721500396729, "learning_rate": 0.00016645569620253164, "loss": 0.2756, "step": 318670 }, { "epoch": 91.6800920598389, "grad_norm": 1.0649076700210571, "learning_rate": 0.0001663981588032221, "loss": 0.3113, "step": 318680 }, { "epoch": 91.68296892980437, "grad_norm": 0.8576387166976929, "learning_rate": 0.00016634062140391255, "loss": 0.3118, "step": 318690 }, { "epoch": 91.68584579976985, "grad_norm": 1.148072600364685, "learning_rate": 0.00016628308400460298, "loss": 0.2721, "step": 318700 }, { "epoch": 91.68872266973533, "grad_norm": 1.8866139650344849, "learning_rate": 0.00016622554660529343, "loss": 0.2865, "step": 318710 }, { "epoch": 91.6915995397008, "grad_norm": 0.8312634229660034, "learning_rate": 0.00016616800920598392, "loss": 0.3182, "step": 318720 }, { "epoch": 91.69447640966628, "grad_norm": 0.9336033463478088, "learning_rate": 0.00016611047180667434, "loss": 0.3433, "step": 318730 }, { "epoch": 91.69735327963176, "grad_norm": 1.5595546960830688, "learning_rate": 0.0001660529344073648, "loss": 0.3035, "step": 318740 }, { "epoch": 91.70023014959723, "grad_norm": 0.9644907712936401, "learning_rate": 0.00016599539700805525, "loss": 0.2299, "step": 318750 }, { "epoch": 91.70310701956272, "grad_norm": 0.9145274758338928, "learning_rate": 0.00016593785960874568, "loss": 0.2503, "step": 318760 }, { "epoch": 91.7059838895282, "grad_norm": 1.4375238418579102, "learning_rate": 0.00016588032220943614, "loss": 0.2894, "step": 318770 }, { "epoch": 91.70886075949367, "grad_norm": 1.0410386323928833, "learning_rate": 0.0001658227848101266, "loss": 0.2795, "step": 318780 }, { "epoch": 91.71173762945915, "grad_norm": 2.9304323196411133, "learning_rate": 0.00016576524741081702, "loss": 0.3335, "step": 318790 }, { "epoch": 91.71461449942463, "grad_norm": 0.7131033539772034, "learning_rate": 0.00016570771001150748, "loss": 0.2729, "step": 318800 }, { "epoch": 91.7174913693901, "grad_norm": 1.6575576066970825, "learning_rate": 0.0001656501726121979, "loss": 0.2451, "step": 318810 }, { "epoch": 91.72036823935558, "grad_norm": 1.3626443147659302, "learning_rate": 0.0001655926352128884, "loss": 0.2558, "step": 318820 }, { "epoch": 91.72324510932106, "grad_norm": 1.1047261953353882, "learning_rate": 0.00016553509781357884, "loss": 0.2908, "step": 318830 }, { "epoch": 91.72612197928653, "grad_norm": 0.6655371785163879, "learning_rate": 0.00016547756041426927, "loss": 0.2669, "step": 318840 }, { "epoch": 91.72899884925201, "grad_norm": 1.4552942514419556, "learning_rate": 0.00016542002301495973, "loss": 0.3398, "step": 318850 }, { "epoch": 91.7318757192175, "grad_norm": 1.3762403726577759, "learning_rate": 0.00016536248561565018, "loss": 0.2422, "step": 318860 }, { "epoch": 91.73475258918297, "grad_norm": 0.9704846143722534, "learning_rate": 0.0001653049482163406, "loss": 0.2569, "step": 318870 }, { "epoch": 91.73762945914845, "grad_norm": 0.7525336742401123, "learning_rate": 0.00016524741081703107, "loss": 0.2759, "step": 318880 }, { "epoch": 91.74050632911393, "grad_norm": 0.8126447796821594, "learning_rate": 0.00016518987341772152, "loss": 0.328, "step": 318890 }, { "epoch": 91.7433831990794, "grad_norm": 0.9953305721282959, "learning_rate": 0.00016513233601841195, "loss": 0.2692, "step": 318900 }, { "epoch": 91.74626006904488, "grad_norm": 1.3922830820083618, "learning_rate": 0.0001650747986191024, "loss": 0.3411, "step": 318910 }, { "epoch": 91.74913693901036, "grad_norm": 0.7409944534301758, "learning_rate": 0.0001650172612197929, "loss": 0.2786, "step": 318920 }, { "epoch": 91.75201380897583, "grad_norm": 1.066392421722412, "learning_rate": 0.00016495972382048332, "loss": 0.3042, "step": 318930 }, { "epoch": 91.75489067894131, "grad_norm": 1.1974228620529175, "learning_rate": 0.00016490218642117377, "loss": 0.3213, "step": 318940 }, { "epoch": 91.75776754890678, "grad_norm": 1.4040758609771729, "learning_rate": 0.00016484464902186423, "loss": 0.263, "step": 318950 }, { "epoch": 91.76064441887226, "grad_norm": 0.8383435606956482, "learning_rate": 0.00016478711162255466, "loss": 0.2968, "step": 318960 }, { "epoch": 91.76352128883775, "grad_norm": 1.415881633758545, "learning_rate": 0.0001647295742232451, "loss": 0.2503, "step": 318970 }, { "epoch": 91.76639815880323, "grad_norm": 1.2499127388000488, "learning_rate": 0.00016467203682393557, "loss": 0.3011, "step": 318980 }, { "epoch": 91.7692750287687, "grad_norm": 1.3148194551467896, "learning_rate": 0.000164614499424626, "loss": 0.2857, "step": 318990 }, { "epoch": 91.77215189873418, "grad_norm": 0.9975422620773315, "learning_rate": 0.00016455696202531645, "loss": 0.3319, "step": 319000 }, { "epoch": 91.77502876869966, "grad_norm": 0.8339292407035828, "learning_rate": 0.00016449942462600693, "loss": 0.2249, "step": 319010 }, { "epoch": 91.77790563866513, "grad_norm": 0.7572203278541565, "learning_rate": 0.00016444188722669736, "loss": 0.3031, "step": 319020 }, { "epoch": 91.78078250863061, "grad_norm": 1.8480594158172607, "learning_rate": 0.00016438434982738782, "loss": 0.2601, "step": 319030 }, { "epoch": 91.78365937859608, "grad_norm": 0.7606335282325745, "learning_rate": 0.00016432681242807825, "loss": 0.2678, "step": 319040 }, { "epoch": 91.78653624856156, "grad_norm": 1.2305850982666016, "learning_rate": 0.0001642692750287687, "loss": 0.3147, "step": 319050 }, { "epoch": 91.78941311852704, "grad_norm": 1.3979111909866333, "learning_rate": 0.00016421173762945916, "loss": 0.2444, "step": 319060 }, { "epoch": 91.79228998849253, "grad_norm": 0.8241892457008362, "learning_rate": 0.0001641542002301496, "loss": 0.2564, "step": 319070 }, { "epoch": 91.795166858458, "grad_norm": 1.003588080406189, "learning_rate": 0.00016409666283084004, "loss": 0.295, "step": 319080 }, { "epoch": 91.79804372842348, "grad_norm": 1.013007402420044, "learning_rate": 0.0001640391254315305, "loss": 0.2715, "step": 319090 }, { "epoch": 91.80092059838896, "grad_norm": 1.2663328647613525, "learning_rate": 0.00016398158803222093, "loss": 0.2705, "step": 319100 }, { "epoch": 91.80379746835443, "grad_norm": 1.0434170961380005, "learning_rate": 0.0001639240506329114, "loss": 0.2602, "step": 319110 }, { "epoch": 91.80667433831991, "grad_norm": 1.5596928596496582, "learning_rate": 0.00016386651323360186, "loss": 0.377, "step": 319120 }, { "epoch": 91.80955120828538, "grad_norm": 1.0810118913650513, "learning_rate": 0.0001638089758342923, "loss": 0.2675, "step": 319130 }, { "epoch": 91.81242807825086, "grad_norm": 0.8671287894248962, "learning_rate": 0.00016375143843498275, "loss": 0.3148, "step": 319140 }, { "epoch": 91.81530494821634, "grad_norm": 1.255983829498291, "learning_rate": 0.0001636939010356732, "loss": 0.3391, "step": 319150 }, { "epoch": 91.81818181818181, "grad_norm": 1.0429182052612305, "learning_rate": 0.00016363636363636363, "loss": 0.3058, "step": 319160 }, { "epoch": 91.82105868814729, "grad_norm": 2.955265998840332, "learning_rate": 0.0001635788262370541, "loss": 0.3756, "step": 319170 }, { "epoch": 91.82393555811278, "grad_norm": 0.8656466007232666, "learning_rate": 0.00016352128883774454, "loss": 0.3173, "step": 319180 }, { "epoch": 91.82681242807826, "grad_norm": 1.2582815885543823, "learning_rate": 0.00016346375143843497, "loss": 0.2899, "step": 319190 }, { "epoch": 91.82968929804373, "grad_norm": 1.6715251207351685, "learning_rate": 0.00016340621403912543, "loss": 0.2717, "step": 319200 }, { "epoch": 91.83256616800921, "grad_norm": 1.578674554824829, "learning_rate": 0.0001633486766398159, "loss": 0.2214, "step": 319210 }, { "epoch": 91.83544303797468, "grad_norm": 0.6690995693206787, "learning_rate": 0.00016329113924050634, "loss": 0.2843, "step": 319220 }, { "epoch": 91.83831990794016, "grad_norm": 0.9543512463569641, "learning_rate": 0.0001632336018411968, "loss": 0.3441, "step": 319230 }, { "epoch": 91.84119677790564, "grad_norm": 0.9582756757736206, "learning_rate": 0.00016317606444188722, "loss": 0.3062, "step": 319240 }, { "epoch": 91.84407364787111, "grad_norm": 0.8619369268417358, "learning_rate": 0.00016311852704257768, "loss": 0.2589, "step": 319250 }, { "epoch": 91.84695051783659, "grad_norm": 1.6421663761138916, "learning_rate": 0.00016306098964326813, "loss": 0.283, "step": 319260 }, { "epoch": 91.84982738780207, "grad_norm": 1.2065026760101318, "learning_rate": 0.00016300345224395856, "loss": 0.2551, "step": 319270 }, { "epoch": 91.85270425776756, "grad_norm": 1.1732507944107056, "learning_rate": 0.00016294591484464902, "loss": 0.2449, "step": 319280 }, { "epoch": 91.85558112773303, "grad_norm": 1.784672737121582, "learning_rate": 0.00016288837744533947, "loss": 0.3172, "step": 319290 }, { "epoch": 91.85845799769851, "grad_norm": 1.367469072341919, "learning_rate": 0.0001628308400460299, "loss": 0.2719, "step": 319300 }, { "epoch": 91.86133486766398, "grad_norm": 1.4699102640151978, "learning_rate": 0.00016277330264672038, "loss": 0.3056, "step": 319310 }, { "epoch": 91.86421173762946, "grad_norm": 0.6731908321380615, "learning_rate": 0.00016271576524741084, "loss": 0.2954, "step": 319320 }, { "epoch": 91.86708860759494, "grad_norm": 1.4055180549621582, "learning_rate": 0.00016265822784810127, "loss": 0.2851, "step": 319330 }, { "epoch": 91.86996547756041, "grad_norm": 0.8177388906478882, "learning_rate": 0.00016260069044879172, "loss": 0.2958, "step": 319340 }, { "epoch": 91.87284234752589, "grad_norm": 1.439843773841858, "learning_rate": 0.00016254315304948218, "loss": 0.275, "step": 319350 }, { "epoch": 91.87571921749137, "grad_norm": 1.1774933338165283, "learning_rate": 0.0001624856156501726, "loss": 0.2801, "step": 319360 }, { "epoch": 91.87859608745684, "grad_norm": 1.4679527282714844, "learning_rate": 0.00016242807825086306, "loss": 0.2687, "step": 319370 }, { "epoch": 91.88147295742232, "grad_norm": 0.5531913042068481, "learning_rate": 0.00016237054085155352, "loss": 0.2597, "step": 319380 }, { "epoch": 91.88434982738781, "grad_norm": 1.6044948101043701, "learning_rate": 0.00016231300345224395, "loss": 0.2727, "step": 319390 }, { "epoch": 91.88722669735328, "grad_norm": 1.1407923698425293, "learning_rate": 0.0001622554660529344, "loss": 0.2502, "step": 319400 }, { "epoch": 91.89010356731876, "grad_norm": 2.072580099105835, "learning_rate": 0.00016219792865362488, "loss": 0.3254, "step": 319410 }, { "epoch": 91.89298043728424, "grad_norm": 0.8708709478378296, "learning_rate": 0.0001621403912543153, "loss": 0.2818, "step": 319420 }, { "epoch": 91.89585730724971, "grad_norm": 0.804534912109375, "learning_rate": 0.00016208285385500577, "loss": 0.2279, "step": 319430 }, { "epoch": 91.89873417721519, "grad_norm": 1.650328516960144, "learning_rate": 0.0001620253164556962, "loss": 0.325, "step": 319440 }, { "epoch": 91.90161104718067, "grad_norm": 1.2550498247146606, "learning_rate": 0.00016196777905638665, "loss": 0.2375, "step": 319450 }, { "epoch": 91.90448791714614, "grad_norm": 1.441169261932373, "learning_rate": 0.0001619102416570771, "loss": 0.311, "step": 319460 }, { "epoch": 91.90736478711162, "grad_norm": 0.9702950119972229, "learning_rate": 0.00016185270425776754, "loss": 0.2795, "step": 319470 }, { "epoch": 91.9102416570771, "grad_norm": 0.8763746023178101, "learning_rate": 0.000161795166858458, "loss": 0.2451, "step": 319480 }, { "epoch": 91.91311852704258, "grad_norm": 0.6512051224708557, "learning_rate": 0.00016173762945914845, "loss": 0.2796, "step": 319490 }, { "epoch": 91.91599539700806, "grad_norm": 0.8222735524177551, "learning_rate": 0.0001616800920598389, "loss": 0.2585, "step": 319500 }, { "epoch": 91.91887226697354, "grad_norm": 1.2957121133804321, "learning_rate": 0.00016162255466052936, "loss": 0.4235, "step": 319510 }, { "epoch": 91.92174913693901, "grad_norm": 0.5694031119346619, "learning_rate": 0.0001615650172612198, "loss": 0.2573, "step": 319520 }, { "epoch": 91.92462600690449, "grad_norm": 1.0747860670089722, "learning_rate": 0.00016150747986191024, "loss": 0.2474, "step": 319530 }, { "epoch": 91.92750287686997, "grad_norm": 0.9913811087608337, "learning_rate": 0.0001614499424626007, "loss": 0.3145, "step": 319540 }, { "epoch": 91.93037974683544, "grad_norm": 2.00652813911438, "learning_rate": 0.00016139240506329115, "loss": 0.3003, "step": 319550 }, { "epoch": 91.93325661680092, "grad_norm": 1.1553231477737427, "learning_rate": 0.00016133486766398158, "loss": 0.2483, "step": 319560 }, { "epoch": 91.9361334867664, "grad_norm": 1.8096734285354614, "learning_rate": 0.00016127733026467204, "loss": 0.2661, "step": 319570 }, { "epoch": 91.93901035673187, "grad_norm": 1.251221776008606, "learning_rate": 0.0001612197928653625, "loss": 0.2879, "step": 319580 }, { "epoch": 91.94188722669735, "grad_norm": 1.0229079723358154, "learning_rate": 0.00016116225546605292, "loss": 0.2624, "step": 319590 }, { "epoch": 91.94476409666284, "grad_norm": 0.7614874243736267, "learning_rate": 0.0001611047180667434, "loss": 0.2452, "step": 319600 }, { "epoch": 91.94764096662831, "grad_norm": 2.1264538764953613, "learning_rate": 0.00016104718066743386, "loss": 0.3066, "step": 319610 }, { "epoch": 91.95051783659379, "grad_norm": 1.6606980562210083, "learning_rate": 0.0001609896432681243, "loss": 0.2679, "step": 319620 }, { "epoch": 91.95339470655927, "grad_norm": 0.6190292835235596, "learning_rate": 0.00016093210586881474, "loss": 0.2153, "step": 319630 }, { "epoch": 91.95627157652474, "grad_norm": 1.3133898973464966, "learning_rate": 0.00016087456846950517, "loss": 0.273, "step": 319640 }, { "epoch": 91.95914844649022, "grad_norm": 0.9425269961357117, "learning_rate": 0.00016081703107019563, "loss": 0.2786, "step": 319650 }, { "epoch": 91.9620253164557, "grad_norm": 0.9609005451202393, "learning_rate": 0.00016075949367088608, "loss": 0.2862, "step": 319660 }, { "epoch": 91.96490218642117, "grad_norm": 1.3523948192596436, "learning_rate": 0.0001607019562715765, "loss": 0.3058, "step": 319670 }, { "epoch": 91.96777905638665, "grad_norm": 1.5205895900726318, "learning_rate": 0.00016064441887226697, "loss": 0.2798, "step": 319680 }, { "epoch": 91.97065592635212, "grad_norm": 1.3577734231948853, "learning_rate": 0.00016058688147295742, "loss": 0.2304, "step": 319690 }, { "epoch": 91.97353279631761, "grad_norm": 0.7868013978004456, "learning_rate": 0.00016052934407364788, "loss": 0.2393, "step": 319700 }, { "epoch": 91.97640966628309, "grad_norm": 1.4336313009262085, "learning_rate": 0.00016047180667433833, "loss": 0.2704, "step": 319710 }, { "epoch": 91.97928653624857, "grad_norm": 0.8585217595100403, "learning_rate": 0.0001604142692750288, "loss": 0.2863, "step": 319720 }, { "epoch": 91.98216340621404, "grad_norm": 1.4337904453277588, "learning_rate": 0.00016035673187571922, "loss": 0.3012, "step": 319730 }, { "epoch": 91.98504027617952, "grad_norm": 1.5906646251678467, "learning_rate": 0.00016029919447640967, "loss": 0.2651, "step": 319740 }, { "epoch": 91.987917146145, "grad_norm": 1.4015783071517944, "learning_rate": 0.00016024165707710013, "loss": 0.2935, "step": 319750 }, { "epoch": 91.99079401611047, "grad_norm": 1.1830443143844604, "learning_rate": 0.00016018411967779056, "loss": 0.3562, "step": 319760 }, { "epoch": 91.99367088607595, "grad_norm": 1.0406743288040161, "learning_rate": 0.000160126582278481, "loss": 0.31, "step": 319770 }, { "epoch": 91.99654775604142, "grad_norm": 1.026296854019165, "learning_rate": 0.00016006904487917147, "loss": 0.251, "step": 319780 }, { "epoch": 91.9994246260069, "grad_norm": 0.8857969641685486, "learning_rate": 0.0001600115074798619, "loss": 0.2444, "step": 319790 }, { "epoch": 92.00230149597238, "grad_norm": 0.9908824563026428, "learning_rate": 0.00015995397008055238, "loss": 0.2898, "step": 319800 }, { "epoch": 92.00517836593787, "grad_norm": 0.9196643233299255, "learning_rate": 0.00015989643268124283, "loss": 0.2512, "step": 319810 }, { "epoch": 92.00805523590334, "grad_norm": 1.2346254587173462, "learning_rate": 0.00015983889528193326, "loss": 0.2656, "step": 319820 }, { "epoch": 92.01093210586882, "grad_norm": 0.8401652574539185, "learning_rate": 0.00015978135788262372, "loss": 0.2875, "step": 319830 }, { "epoch": 92.0138089758343, "grad_norm": 0.7965103983879089, "learning_rate": 0.00015972382048331415, "loss": 0.254, "step": 319840 }, { "epoch": 92.01668584579977, "grad_norm": 0.9274178147315979, "learning_rate": 0.0001596662830840046, "loss": 0.2298, "step": 319850 }, { "epoch": 92.01956271576525, "grad_norm": 0.8657306432723999, "learning_rate": 0.00015960874568469506, "loss": 0.3053, "step": 319860 }, { "epoch": 92.02243958573072, "grad_norm": 0.9700339436531067, "learning_rate": 0.00015955120828538548, "loss": 0.2833, "step": 319870 }, { "epoch": 92.0253164556962, "grad_norm": 1.0891867876052856, "learning_rate": 0.00015949367088607594, "loss": 0.3906, "step": 319880 }, { "epoch": 92.02819332566168, "grad_norm": 1.1670808792114258, "learning_rate": 0.0001594361334867664, "loss": 0.3739, "step": 319890 }, { "epoch": 92.03107019562715, "grad_norm": 1.3560415506362915, "learning_rate": 0.00015937859608745685, "loss": 0.2767, "step": 319900 }, { "epoch": 92.03394706559264, "grad_norm": 1.4770069122314453, "learning_rate": 0.0001593210586881473, "loss": 0.2851, "step": 319910 }, { "epoch": 92.03682393555812, "grad_norm": 0.9873935580253601, "learning_rate": 0.00015926352128883776, "loss": 0.2861, "step": 319920 }, { "epoch": 92.0397008055236, "grad_norm": 1.0508040189743042, "learning_rate": 0.0001592059838895282, "loss": 0.2433, "step": 319930 }, { "epoch": 92.04257767548907, "grad_norm": 1.473443627357483, "learning_rate": 0.00015914844649021865, "loss": 0.3107, "step": 319940 }, { "epoch": 92.04545454545455, "grad_norm": 1.4324508905410767, "learning_rate": 0.0001590909090909091, "loss": 0.2541, "step": 319950 }, { "epoch": 92.04833141542002, "grad_norm": 0.8697609305381775, "learning_rate": 0.00015903337169159953, "loss": 0.2574, "step": 319960 }, { "epoch": 92.0512082853855, "grad_norm": 1.6453555822372437, "learning_rate": 0.00015897583429228999, "loss": 0.3013, "step": 319970 }, { "epoch": 92.05408515535098, "grad_norm": 1.4513133764266968, "learning_rate": 0.00015891829689298044, "loss": 0.2842, "step": 319980 }, { "epoch": 92.05696202531645, "grad_norm": 0.8466829657554626, "learning_rate": 0.00015886075949367087, "loss": 0.3013, "step": 319990 }, { "epoch": 92.05983889528193, "grad_norm": 1.0333669185638428, "learning_rate": 0.00015880322209436135, "loss": 0.2738, "step": 320000 }, { "epoch": 92.0627157652474, "grad_norm": 1.1283767223358154, "learning_rate": 0.0001587456846950518, "loss": 0.3192, "step": 320010 }, { "epoch": 92.0655926352129, "grad_norm": 1.2407150268554688, "learning_rate": 0.00015868814729574224, "loss": 0.2796, "step": 320020 }, { "epoch": 92.06846950517837, "grad_norm": 0.8300204873085022, "learning_rate": 0.0001586306098964327, "loss": 0.276, "step": 320030 }, { "epoch": 92.07134637514385, "grad_norm": 0.8502442836761475, "learning_rate": 0.00015857307249712312, "loss": 0.3053, "step": 320040 }, { "epoch": 92.07422324510932, "grad_norm": 1.2231268882751465, "learning_rate": 0.00015851553509781358, "loss": 0.3025, "step": 320050 }, { "epoch": 92.0771001150748, "grad_norm": 0.875001847743988, "learning_rate": 0.00015845799769850403, "loss": 0.3264, "step": 320060 }, { "epoch": 92.07997698504028, "grad_norm": 1.170064091682434, "learning_rate": 0.00015840046029919446, "loss": 0.2316, "step": 320070 }, { "epoch": 92.08285385500575, "grad_norm": 1.7388725280761719, "learning_rate": 0.00015834292289988491, "loss": 0.2284, "step": 320080 }, { "epoch": 92.08573072497123, "grad_norm": 1.3185800313949585, "learning_rate": 0.0001582853855005754, "loss": 0.251, "step": 320090 }, { "epoch": 92.0886075949367, "grad_norm": 1.412534236907959, "learning_rate": 0.00015822784810126583, "loss": 0.2463, "step": 320100 }, { "epoch": 92.09148446490218, "grad_norm": 1.9228099584579468, "learning_rate": 0.00015817031070195628, "loss": 0.2781, "step": 320110 }, { "epoch": 92.09436133486767, "grad_norm": 1.3147428035736084, "learning_rate": 0.00015811277330264674, "loss": 0.343, "step": 320120 }, { "epoch": 92.09723820483315, "grad_norm": 1.0684031248092651, "learning_rate": 0.00015805523590333717, "loss": 0.3062, "step": 320130 }, { "epoch": 92.10011507479862, "grad_norm": 2.064321756362915, "learning_rate": 0.00015799769850402762, "loss": 0.257, "step": 320140 }, { "epoch": 92.1029919447641, "grad_norm": 1.6255829334259033, "learning_rate": 0.00015794016110471808, "loss": 0.3173, "step": 320150 }, { "epoch": 92.10586881472958, "grad_norm": 1.516609787940979, "learning_rate": 0.0001578826237054085, "loss": 0.2778, "step": 320160 }, { "epoch": 92.10874568469505, "grad_norm": 1.627832293510437, "learning_rate": 0.00015782508630609896, "loss": 0.3237, "step": 320170 }, { "epoch": 92.11162255466053, "grad_norm": 1.196548581123352, "learning_rate": 0.00015776754890678942, "loss": 0.242, "step": 320180 }, { "epoch": 92.114499424626, "grad_norm": 1.1484804153442383, "learning_rate": 0.00015771001150747987, "loss": 0.2644, "step": 320190 }, { "epoch": 92.11737629459148, "grad_norm": 1.70497465133667, "learning_rate": 0.00015765247410817033, "loss": 0.2738, "step": 320200 }, { "epoch": 92.12025316455696, "grad_norm": 1.1005051136016846, "learning_rate": 0.00015759493670886078, "loss": 0.2894, "step": 320210 }, { "epoch": 92.12313003452243, "grad_norm": 0.8972352147102356, "learning_rate": 0.0001575373993095512, "loss": 0.2409, "step": 320220 }, { "epoch": 92.12600690448792, "grad_norm": 1.3143340349197388, "learning_rate": 0.00015747986191024167, "loss": 0.2867, "step": 320230 }, { "epoch": 92.1288837744534, "grad_norm": 1.8018767833709717, "learning_rate": 0.0001574223245109321, "loss": 0.3157, "step": 320240 }, { "epoch": 92.13176064441888, "grad_norm": 1.4697446823120117, "learning_rate": 0.00015736478711162255, "loss": 0.271, "step": 320250 }, { "epoch": 92.13463751438435, "grad_norm": 0.5572279095649719, "learning_rate": 0.000157307249712313, "loss": 0.3085, "step": 320260 }, { "epoch": 92.13751438434983, "grad_norm": 0.47983217239379883, "learning_rate": 0.00015724971231300343, "loss": 0.2518, "step": 320270 }, { "epoch": 92.1403912543153, "grad_norm": 2.669163942337036, "learning_rate": 0.0001571921749136939, "loss": 0.2748, "step": 320280 }, { "epoch": 92.14326812428078, "grad_norm": 0.9632558822631836, "learning_rate": 0.00015713463751438437, "loss": 0.2684, "step": 320290 }, { "epoch": 92.14614499424626, "grad_norm": 1.2235110998153687, "learning_rate": 0.0001570771001150748, "loss": 0.2505, "step": 320300 }, { "epoch": 92.14902186421173, "grad_norm": 0.8054375052452087, "learning_rate": 0.00015701956271576526, "loss": 0.2556, "step": 320310 }, { "epoch": 92.15189873417721, "grad_norm": 0.7942550182342529, "learning_rate": 0.0001569620253164557, "loss": 0.2499, "step": 320320 }, { "epoch": 92.1547756041427, "grad_norm": 1.1467498540878296, "learning_rate": 0.00015690448791714614, "loss": 0.2618, "step": 320330 }, { "epoch": 92.15765247410818, "grad_norm": 1.0767019987106323, "learning_rate": 0.0001568469505178366, "loss": 0.2386, "step": 320340 }, { "epoch": 92.16052934407365, "grad_norm": 1.2593610286712646, "learning_rate": 0.00015678941311852705, "loss": 0.256, "step": 320350 }, { "epoch": 92.16340621403913, "grad_norm": 0.9608182311058044, "learning_rate": 0.00015673187571921748, "loss": 0.2907, "step": 320360 }, { "epoch": 92.1662830840046, "grad_norm": 1.3095424175262451, "learning_rate": 0.00015667433831990793, "loss": 0.2174, "step": 320370 }, { "epoch": 92.16915995397008, "grad_norm": 0.8783453702926636, "learning_rate": 0.0001566168009205984, "loss": 0.2442, "step": 320380 }, { "epoch": 92.17203682393556, "grad_norm": 1.1219706535339355, "learning_rate": 0.00015655926352128885, "loss": 0.3889, "step": 320390 }, { "epoch": 92.17491369390103, "grad_norm": 1.5347751379013062, "learning_rate": 0.0001565017261219793, "loss": 0.2627, "step": 320400 }, { "epoch": 92.17779056386651, "grad_norm": 1.015637993812561, "learning_rate": 0.00015644418872266976, "loss": 0.2637, "step": 320410 }, { "epoch": 92.18066743383199, "grad_norm": 1.7373510599136353, "learning_rate": 0.00015638665132336018, "loss": 0.2427, "step": 320420 }, { "epoch": 92.18354430379746, "grad_norm": 0.9115926027297974, "learning_rate": 0.00015632911392405064, "loss": 0.2934, "step": 320430 }, { "epoch": 92.18642117376295, "grad_norm": 1.0606364011764526, "learning_rate": 0.0001562715765247411, "loss": 0.2361, "step": 320440 }, { "epoch": 92.18929804372843, "grad_norm": 0.7991169691085815, "learning_rate": 0.00015621403912543152, "loss": 0.2343, "step": 320450 }, { "epoch": 92.1921749136939, "grad_norm": 1.521012306213379, "learning_rate": 0.00015615650172612198, "loss": 0.2857, "step": 320460 }, { "epoch": 92.19505178365938, "grad_norm": 1.3163195848464966, "learning_rate": 0.0001560989643268124, "loss": 0.2456, "step": 320470 }, { "epoch": 92.19792865362486, "grad_norm": 1.249005675315857, "learning_rate": 0.00015604142692750286, "loss": 0.3084, "step": 320480 }, { "epoch": 92.20080552359033, "grad_norm": 1.1913031339645386, "learning_rate": 0.00015598388952819335, "loss": 0.2748, "step": 320490 }, { "epoch": 92.20368239355581, "grad_norm": 0.9601419568061829, "learning_rate": 0.00015592635212888377, "loss": 0.2436, "step": 320500 }, { "epoch": 92.20655926352129, "grad_norm": 0.89668869972229, "learning_rate": 0.00015586881472957423, "loss": 0.2344, "step": 320510 }, { "epoch": 92.20943613348676, "grad_norm": 1.1909949779510498, "learning_rate": 0.00015581127733026469, "loss": 0.2561, "step": 320520 }, { "epoch": 92.21231300345224, "grad_norm": 2.5559399127960205, "learning_rate": 0.00015575373993095511, "loss": 0.2747, "step": 320530 }, { "epoch": 92.21518987341773, "grad_norm": 1.3558835983276367, "learning_rate": 0.00015569620253164557, "loss": 0.328, "step": 320540 }, { "epoch": 92.2180667433832, "grad_norm": 1.8117314577102661, "learning_rate": 0.00015563866513233603, "loss": 0.2941, "step": 320550 }, { "epoch": 92.22094361334868, "grad_norm": 0.5964013934135437, "learning_rate": 0.00015558112773302645, "loss": 0.2971, "step": 320560 }, { "epoch": 92.22382048331416, "grad_norm": 1.2092230319976807, "learning_rate": 0.0001555235903337169, "loss": 0.2307, "step": 320570 }, { "epoch": 92.22669735327963, "grad_norm": 1.0960934162139893, "learning_rate": 0.0001554660529344074, "loss": 0.3082, "step": 320580 }, { "epoch": 92.22957422324511, "grad_norm": 1.15182363986969, "learning_rate": 0.00015540851553509782, "loss": 0.262, "step": 320590 }, { "epoch": 92.23245109321059, "grad_norm": 1.1047546863555908, "learning_rate": 0.00015535097813578828, "loss": 0.3137, "step": 320600 }, { "epoch": 92.23532796317606, "grad_norm": 1.316826581954956, "learning_rate": 0.00015529344073647873, "loss": 0.2554, "step": 320610 }, { "epoch": 92.23820483314154, "grad_norm": 1.3404990434646606, "learning_rate": 0.00015523590333716916, "loss": 0.2768, "step": 320620 }, { "epoch": 92.24108170310701, "grad_norm": 2.702197790145874, "learning_rate": 0.00015517836593785962, "loss": 0.3167, "step": 320630 }, { "epoch": 92.24395857307249, "grad_norm": 1.8719148635864258, "learning_rate": 0.00015512082853855007, "loss": 0.2603, "step": 320640 }, { "epoch": 92.24683544303798, "grad_norm": 1.5316803455352783, "learning_rate": 0.0001550632911392405, "loss": 0.3033, "step": 320650 }, { "epoch": 92.24971231300346, "grad_norm": 1.3219738006591797, "learning_rate": 0.00015500575373993095, "loss": 0.2679, "step": 320660 }, { "epoch": 92.25258918296893, "grad_norm": 1.5959943532943726, "learning_rate": 0.00015494821634062138, "loss": 0.278, "step": 320670 }, { "epoch": 92.25546605293441, "grad_norm": 0.6792150735855103, "learning_rate": 0.00015489067894131187, "loss": 0.314, "step": 320680 }, { "epoch": 92.25834292289989, "grad_norm": 1.1023908853530884, "learning_rate": 0.00015483314154200232, "loss": 0.2673, "step": 320690 }, { "epoch": 92.26121979286536, "grad_norm": 1.3821433782577515, "learning_rate": 0.00015477560414269275, "loss": 0.3264, "step": 320700 }, { "epoch": 92.26409666283084, "grad_norm": 1.0730701684951782, "learning_rate": 0.0001547180667433832, "loss": 0.2848, "step": 320710 }, { "epoch": 92.26697353279631, "grad_norm": 1.064437985420227, "learning_rate": 0.00015466052934407366, "loss": 0.2221, "step": 320720 }, { "epoch": 92.26985040276179, "grad_norm": 1.6247363090515137, "learning_rate": 0.0001546029919447641, "loss": 0.2944, "step": 320730 }, { "epoch": 92.27272727272727, "grad_norm": 0.8339266180992126, "learning_rate": 0.00015454545454545454, "loss": 0.2315, "step": 320740 }, { "epoch": 92.27560414269276, "grad_norm": 1.170210599899292, "learning_rate": 0.000154487917146145, "loss": 0.2772, "step": 320750 }, { "epoch": 92.27848101265823, "grad_norm": 1.552837610244751, "learning_rate": 0.00015443037974683543, "loss": 0.2753, "step": 320760 }, { "epoch": 92.28135788262371, "grad_norm": 1.6791008710861206, "learning_rate": 0.00015437284234752588, "loss": 0.2659, "step": 320770 }, { "epoch": 92.28423475258919, "grad_norm": 1.0318820476531982, "learning_rate": 0.00015431530494821637, "loss": 0.3048, "step": 320780 }, { "epoch": 92.28711162255466, "grad_norm": 2.256807804107666, "learning_rate": 0.0001542577675489068, "loss": 0.2382, "step": 320790 }, { "epoch": 92.28998849252014, "grad_norm": 0.8926679491996765, "learning_rate": 0.00015420023014959725, "loss": 0.2422, "step": 320800 }, { "epoch": 92.29286536248561, "grad_norm": 0.7001895904541016, "learning_rate": 0.0001541426927502877, "loss": 0.2855, "step": 320810 }, { "epoch": 92.29574223245109, "grad_norm": 3.5132009983062744, "learning_rate": 0.00015408515535097813, "loss": 0.2794, "step": 320820 }, { "epoch": 92.29861910241657, "grad_norm": 0.6658384203910828, "learning_rate": 0.0001540276179516686, "loss": 0.2272, "step": 320830 }, { "epoch": 92.30149597238204, "grad_norm": 0.9431984424591064, "learning_rate": 0.00015397008055235905, "loss": 0.3161, "step": 320840 }, { "epoch": 92.30437284234753, "grad_norm": 0.9773235321044922, "learning_rate": 0.00015391254315304947, "loss": 0.3425, "step": 320850 }, { "epoch": 92.30724971231301, "grad_norm": 1.2300341129302979, "learning_rate": 0.00015385500575373993, "loss": 0.307, "step": 320860 }, { "epoch": 92.31012658227849, "grad_norm": 1.5513004064559937, "learning_rate": 0.00015379746835443036, "loss": 0.3273, "step": 320870 }, { "epoch": 92.31300345224396, "grad_norm": 1.3225709199905396, "learning_rate": 0.00015373993095512084, "loss": 0.2561, "step": 320880 }, { "epoch": 92.31588032220944, "grad_norm": 0.6819370985031128, "learning_rate": 0.0001536823935558113, "loss": 0.2596, "step": 320890 }, { "epoch": 92.31875719217491, "grad_norm": 0.6813349723815918, "learning_rate": 0.00015362485615650172, "loss": 0.2966, "step": 320900 }, { "epoch": 92.32163406214039, "grad_norm": 1.4691340923309326, "learning_rate": 0.00015356731875719218, "loss": 0.2727, "step": 320910 }, { "epoch": 92.32451093210587, "grad_norm": 1.3682740926742554, "learning_rate": 0.00015350978135788263, "loss": 0.2719, "step": 320920 }, { "epoch": 92.32738780207134, "grad_norm": 1.0724871158599854, "learning_rate": 0.00015345224395857306, "loss": 0.2976, "step": 320930 }, { "epoch": 92.33026467203682, "grad_norm": 1.203680157661438, "learning_rate": 0.00015339470655926352, "loss": 0.3122, "step": 320940 }, { "epoch": 92.3331415420023, "grad_norm": 0.9234057068824768, "learning_rate": 0.00015333716915995397, "loss": 0.3311, "step": 320950 }, { "epoch": 92.33601841196779, "grad_norm": 0.8186452984809875, "learning_rate": 0.0001532796317606444, "loss": 0.2482, "step": 320960 }, { "epoch": 92.33889528193326, "grad_norm": 1.5864529609680176, "learning_rate": 0.00015322209436133486, "loss": 0.2823, "step": 320970 }, { "epoch": 92.34177215189874, "grad_norm": 0.8192035555839539, "learning_rate": 0.00015316455696202534, "loss": 0.2638, "step": 320980 }, { "epoch": 92.34464902186421, "grad_norm": 0.9261872172355652, "learning_rate": 0.00015310701956271577, "loss": 0.229, "step": 320990 }, { "epoch": 92.34752589182969, "grad_norm": 0.7296893000602722, "learning_rate": 0.00015304948216340622, "loss": 0.2418, "step": 321000 }, { "epoch": 92.35040276179517, "grad_norm": 1.4473187923431396, "learning_rate": 0.00015299194476409668, "loss": 0.2459, "step": 321010 }, { "epoch": 92.35327963176064, "grad_norm": 0.6719527840614319, "learning_rate": 0.0001529344073647871, "loss": 0.2988, "step": 321020 }, { "epoch": 92.35615650172612, "grad_norm": 1.131856083869934, "learning_rate": 0.00015287686996547756, "loss": 0.2571, "step": 321030 }, { "epoch": 92.3590333716916, "grad_norm": 0.7500103116035461, "learning_rate": 0.00015281933256616802, "loss": 0.2521, "step": 321040 }, { "epoch": 92.36191024165707, "grad_norm": 0.9200279116630554, "learning_rate": 0.00015276179516685845, "loss": 0.3093, "step": 321050 }, { "epoch": 92.36478711162256, "grad_norm": 1.1539314985275269, "learning_rate": 0.0001527042577675489, "loss": 0.2644, "step": 321060 }, { "epoch": 92.36766398158804, "grad_norm": 1.7999478578567505, "learning_rate": 0.00015264672036823936, "loss": 0.2658, "step": 321070 }, { "epoch": 92.37054085155351, "grad_norm": 1.9727942943572998, "learning_rate": 0.00015258918296892981, "loss": 0.3094, "step": 321080 }, { "epoch": 92.37341772151899, "grad_norm": 2.446725368499756, "learning_rate": 0.00015253164556962027, "loss": 0.292, "step": 321090 }, { "epoch": 92.37629459148447, "grad_norm": 1.2252297401428223, "learning_rate": 0.0001524741081703107, "loss": 0.2968, "step": 321100 }, { "epoch": 92.37917146144994, "grad_norm": 1.3317008018493652, "learning_rate": 0.00015241657077100115, "loss": 0.2646, "step": 321110 }, { "epoch": 92.38204833141542, "grad_norm": 0.812649667263031, "learning_rate": 0.0001523590333716916, "loss": 0.2822, "step": 321120 }, { "epoch": 92.3849252013809, "grad_norm": 1.7348406314849854, "learning_rate": 0.00015230149597238204, "loss": 0.2273, "step": 321130 }, { "epoch": 92.38780207134637, "grad_norm": 0.8682802319526672, "learning_rate": 0.0001522439585730725, "loss": 0.361, "step": 321140 }, { "epoch": 92.39067894131185, "grad_norm": 0.9148054718971252, "learning_rate": 0.00015218642117376295, "loss": 0.2941, "step": 321150 }, { "epoch": 92.39355581127732, "grad_norm": 0.9303020238876343, "learning_rate": 0.00015212888377445338, "loss": 0.2047, "step": 321160 }, { "epoch": 92.39643268124281, "grad_norm": 3.5309648513793945, "learning_rate": 0.00015207134637514386, "loss": 0.2398, "step": 321170 }, { "epoch": 92.39930955120829, "grad_norm": 0.8991556167602539, "learning_rate": 0.00015201380897583432, "loss": 0.2366, "step": 321180 }, { "epoch": 92.40218642117377, "grad_norm": 1.4050511121749878, "learning_rate": 0.00015195627157652474, "loss": 0.338, "step": 321190 }, { "epoch": 92.40506329113924, "grad_norm": 1.9135133028030396, "learning_rate": 0.0001518987341772152, "loss": 0.2419, "step": 321200 }, { "epoch": 92.40794016110472, "grad_norm": 2.2193124294281006, "learning_rate": 0.00015184119677790565, "loss": 0.2473, "step": 321210 }, { "epoch": 92.4108170310702, "grad_norm": 1.340090036392212, "learning_rate": 0.00015178365937859608, "loss": 0.303, "step": 321220 }, { "epoch": 92.41369390103567, "grad_norm": 1.4976239204406738, "learning_rate": 0.00015172612197928654, "loss": 0.2678, "step": 321230 }, { "epoch": 92.41657077100115, "grad_norm": 1.6290392875671387, "learning_rate": 0.000151668584579977, "loss": 0.2489, "step": 321240 }, { "epoch": 92.41944764096662, "grad_norm": 1.356380820274353, "learning_rate": 0.00015161104718066742, "loss": 0.2591, "step": 321250 }, { "epoch": 92.4223245109321, "grad_norm": 1.4382045269012451, "learning_rate": 0.00015155350978135788, "loss": 0.2642, "step": 321260 }, { "epoch": 92.42520138089759, "grad_norm": 1.5157065391540527, "learning_rate": 0.00015149597238204836, "loss": 0.2714, "step": 321270 }, { "epoch": 92.42807825086307, "grad_norm": 0.5703466534614563, "learning_rate": 0.0001514384349827388, "loss": 0.2299, "step": 321280 }, { "epoch": 92.43095512082854, "grad_norm": 0.8557537198066711, "learning_rate": 0.00015138089758342924, "loss": 0.2877, "step": 321290 }, { "epoch": 92.43383199079402, "grad_norm": 0.8446662425994873, "learning_rate": 0.00015132336018411967, "loss": 0.2838, "step": 321300 }, { "epoch": 92.4367088607595, "grad_norm": 0.9084123373031616, "learning_rate": 0.00015126582278481013, "loss": 0.2838, "step": 321310 }, { "epoch": 92.43958573072497, "grad_norm": 0.8968050479888916, "learning_rate": 0.00015120828538550058, "loss": 0.3017, "step": 321320 }, { "epoch": 92.44246260069045, "grad_norm": 1.6268606185913086, "learning_rate": 0.000151150747986191, "loss": 0.3488, "step": 321330 }, { "epoch": 92.44533947065592, "grad_norm": 1.6302714347839355, "learning_rate": 0.00015109321058688147, "loss": 0.2908, "step": 321340 }, { "epoch": 92.4482163406214, "grad_norm": 1.4898934364318848, "learning_rate": 0.00015103567318757192, "loss": 0.2944, "step": 321350 }, { "epoch": 92.45109321058688, "grad_norm": 0.8914539813995361, "learning_rate": 0.00015097813578826235, "loss": 0.3257, "step": 321360 }, { "epoch": 92.45397008055235, "grad_norm": 2.3643851280212402, "learning_rate": 0.00015092059838895283, "loss": 0.4881, "step": 321370 }, { "epoch": 92.45684695051784, "grad_norm": 1.282877802848816, "learning_rate": 0.0001508630609896433, "loss": 0.2221, "step": 321380 }, { "epoch": 92.45972382048332, "grad_norm": 1.2507671117782593, "learning_rate": 0.00015080552359033372, "loss": 0.2627, "step": 321390 }, { "epoch": 92.4626006904488, "grad_norm": 1.5332735776901245, "learning_rate": 0.00015074798619102417, "loss": 0.2401, "step": 321400 }, { "epoch": 92.46547756041427, "grad_norm": 0.81983482837677, "learning_rate": 0.00015069044879171463, "loss": 0.2681, "step": 321410 }, { "epoch": 92.46835443037975, "grad_norm": 0.8101671934127808, "learning_rate": 0.00015063291139240506, "loss": 0.2436, "step": 321420 }, { "epoch": 92.47123130034522, "grad_norm": 1.8557770252227783, "learning_rate": 0.0001505753739930955, "loss": 0.2976, "step": 321430 }, { "epoch": 92.4741081703107, "grad_norm": 1.2844946384429932, "learning_rate": 0.00015051783659378597, "loss": 0.2322, "step": 321440 }, { "epoch": 92.47698504027618, "grad_norm": 1.0370664596557617, "learning_rate": 0.0001504602991944764, "loss": 0.2825, "step": 321450 }, { "epoch": 92.47986191024165, "grad_norm": 1.3483456373214722, "learning_rate": 0.00015040276179516685, "loss": 0.2454, "step": 321460 }, { "epoch": 92.48273878020713, "grad_norm": 0.9441729784011841, "learning_rate": 0.00015034522439585734, "loss": 0.2974, "step": 321470 }, { "epoch": 92.48561565017262, "grad_norm": 1.4128673076629639, "learning_rate": 0.00015028768699654776, "loss": 0.3134, "step": 321480 }, { "epoch": 92.4884925201381, "grad_norm": 0.6914435029029846, "learning_rate": 0.00015023014959723822, "loss": 0.2635, "step": 321490 }, { "epoch": 92.49136939010357, "grad_norm": 0.7644015550613403, "learning_rate": 0.00015017261219792865, "loss": 0.288, "step": 321500 }, { "epoch": 92.49424626006905, "grad_norm": 0.92539381980896, "learning_rate": 0.0001501150747986191, "loss": 0.2455, "step": 321510 }, { "epoch": 92.49712313003452, "grad_norm": 0.8465116024017334, "learning_rate": 0.00015005753739930956, "loss": 0.2484, "step": 321520 }, { "epoch": 92.5, "grad_norm": 1.7289918661117554, "learning_rate": 0.00015, "loss": 0.2655, "step": 321530 }, { "epoch": 92.50287686996548, "grad_norm": 1.0841550827026367, "learning_rate": 0.00014994246260069044, "loss": 0.2565, "step": 321540 }, { "epoch": 92.50575373993095, "grad_norm": 0.6788005232810974, "learning_rate": 0.0001498849252013809, "loss": 0.2584, "step": 321550 }, { "epoch": 92.50863060989643, "grad_norm": 0.7680851221084595, "learning_rate": 0.00014982738780207135, "loss": 0.2602, "step": 321560 }, { "epoch": 92.5115074798619, "grad_norm": 0.9905588030815125, "learning_rate": 0.0001497698504027618, "loss": 0.2754, "step": 321570 }, { "epoch": 92.51438434982738, "grad_norm": 0.8668267726898193, "learning_rate": 0.00014971231300345226, "loss": 0.2611, "step": 321580 }, { "epoch": 92.51726121979287, "grad_norm": 1.3997091054916382, "learning_rate": 0.0001496547756041427, "loss": 0.3135, "step": 321590 }, { "epoch": 92.52013808975835, "grad_norm": 1.1378962993621826, "learning_rate": 0.00014959723820483315, "loss": 0.2514, "step": 321600 }, { "epoch": 92.52301495972382, "grad_norm": 3.2155051231384277, "learning_rate": 0.0001495397008055236, "loss": 0.3007, "step": 321610 }, { "epoch": 92.5258918296893, "grad_norm": 1.3527988195419312, "learning_rate": 0.00014948216340621403, "loss": 0.3234, "step": 321620 }, { "epoch": 92.52876869965478, "grad_norm": 1.0517483949661255, "learning_rate": 0.0001494246260069045, "loss": 0.2701, "step": 321630 }, { "epoch": 92.53164556962025, "grad_norm": 1.1993167400360107, "learning_rate": 0.00014936708860759494, "loss": 0.3311, "step": 321640 }, { "epoch": 92.53452243958573, "grad_norm": 1.0476717948913574, "learning_rate": 0.00014930955120828537, "loss": 0.1808, "step": 321650 }, { "epoch": 92.5373993095512, "grad_norm": 1.129312515258789, "learning_rate": 0.00014925201380897585, "loss": 0.2628, "step": 321660 }, { "epoch": 92.54027617951668, "grad_norm": 1.4234813451766968, "learning_rate": 0.0001491944764096663, "loss": 0.2547, "step": 321670 }, { "epoch": 92.54315304948216, "grad_norm": 1.3175972700119019, "learning_rate": 0.00014913693901035674, "loss": 0.3447, "step": 321680 }, { "epoch": 92.54602991944765, "grad_norm": 2.459205150604248, "learning_rate": 0.0001490794016110472, "loss": 0.3251, "step": 321690 }, { "epoch": 92.54890678941312, "grad_norm": 1.2176707983016968, "learning_rate": 0.00014902186421173762, "loss": 0.3302, "step": 321700 }, { "epoch": 92.5517836593786, "grad_norm": 2.4501399993896484, "learning_rate": 0.00014896432681242808, "loss": 0.253, "step": 321710 }, { "epoch": 92.55466052934408, "grad_norm": 1.2762035131454468, "learning_rate": 0.00014890678941311853, "loss": 0.3038, "step": 321720 }, { "epoch": 92.55753739930955, "grad_norm": 1.1496878862380981, "learning_rate": 0.00014884925201380896, "loss": 0.1982, "step": 321730 }, { "epoch": 92.56041426927503, "grad_norm": 2.064492702484131, "learning_rate": 0.00014879171461449942, "loss": 0.4402, "step": 321740 }, { "epoch": 92.5632911392405, "grad_norm": 0.585761308670044, "learning_rate": 0.00014873417721518987, "loss": 0.2991, "step": 321750 }, { "epoch": 92.56616800920598, "grad_norm": 1.179866075515747, "learning_rate": 0.00014867663981588033, "loss": 0.2908, "step": 321760 }, { "epoch": 92.56904487917146, "grad_norm": 1.734106183052063, "learning_rate": 0.00014861910241657078, "loss": 0.3049, "step": 321770 }, { "epoch": 92.57192174913693, "grad_norm": 0.6025296449661255, "learning_rate": 0.00014856156501726124, "loss": 0.2517, "step": 321780 }, { "epoch": 92.57479861910241, "grad_norm": 1.3686383962631226, "learning_rate": 0.00014850402761795167, "loss": 0.2697, "step": 321790 }, { "epoch": 92.5776754890679, "grad_norm": 0.9931153059005737, "learning_rate": 0.00014844649021864212, "loss": 0.2495, "step": 321800 }, { "epoch": 92.58055235903338, "grad_norm": 1.001989722251892, "learning_rate": 0.00014838895281933258, "loss": 0.2961, "step": 321810 }, { "epoch": 92.58342922899885, "grad_norm": 1.2758073806762695, "learning_rate": 0.000148331415420023, "loss": 0.2924, "step": 321820 }, { "epoch": 92.58630609896433, "grad_norm": 0.756064236164093, "learning_rate": 0.00014827387802071346, "loss": 0.1996, "step": 321830 }, { "epoch": 92.5891829689298, "grad_norm": 1.210218071937561, "learning_rate": 0.00014821634062140392, "loss": 0.2629, "step": 321840 }, { "epoch": 92.59205983889528, "grad_norm": 1.2126598358154297, "learning_rate": 0.00014815880322209435, "loss": 0.2611, "step": 321850 }, { "epoch": 92.59493670886076, "grad_norm": 0.7644577622413635, "learning_rate": 0.00014810126582278483, "loss": 0.2807, "step": 321860 }, { "epoch": 92.59781357882623, "grad_norm": 3.0850822925567627, "learning_rate": 0.00014804372842347528, "loss": 0.2637, "step": 321870 }, { "epoch": 92.60069044879171, "grad_norm": 0.8240319490432739, "learning_rate": 0.0001479861910241657, "loss": 0.2659, "step": 321880 }, { "epoch": 92.60356731875719, "grad_norm": 1.0021878480911255, "learning_rate": 0.00014792865362485617, "loss": 0.2232, "step": 321890 }, { "epoch": 92.60644418872268, "grad_norm": 1.2204591035842896, "learning_rate": 0.0001478711162255466, "loss": 0.2976, "step": 321900 }, { "epoch": 92.60932105868815, "grad_norm": 1.2703757286071777, "learning_rate": 0.00014781357882623705, "loss": 0.2667, "step": 321910 }, { "epoch": 92.61219792865363, "grad_norm": 1.0977295637130737, "learning_rate": 0.0001477560414269275, "loss": 0.3005, "step": 321920 }, { "epoch": 92.6150747986191, "grad_norm": 1.2050451040267944, "learning_rate": 0.00014769850402761794, "loss": 0.2446, "step": 321930 }, { "epoch": 92.61795166858458, "grad_norm": 0.9457811117172241, "learning_rate": 0.0001476409666283084, "loss": 0.3049, "step": 321940 }, { "epoch": 92.62082853855006, "grad_norm": 0.9027737379074097, "learning_rate": 0.00014758342922899885, "loss": 0.3073, "step": 321950 }, { "epoch": 92.62370540851553, "grad_norm": 0.8585420250892639, "learning_rate": 0.0001475258918296893, "loss": 0.2457, "step": 321960 }, { "epoch": 92.62658227848101, "grad_norm": 0.7825068235397339, "learning_rate": 0.00014746835443037976, "loss": 0.291, "step": 321970 }, { "epoch": 92.62945914844649, "grad_norm": 1.2711560726165771, "learning_rate": 0.0001474108170310702, "loss": 0.3559, "step": 321980 }, { "epoch": 92.63233601841196, "grad_norm": 1.4237464666366577, "learning_rate": 0.00014735327963176064, "loss": 0.2703, "step": 321990 }, { "epoch": 92.63521288837744, "grad_norm": 1.9857711791992188, "learning_rate": 0.0001472957422324511, "loss": 0.3052, "step": 322000 }, { "epoch": 92.63808975834293, "grad_norm": 1.2784366607666016, "learning_rate": 0.00014723820483314155, "loss": 0.255, "step": 322010 }, { "epoch": 92.6409666283084, "grad_norm": 1.121276617050171, "learning_rate": 0.00014718066743383198, "loss": 0.2873, "step": 322020 }, { "epoch": 92.64384349827388, "grad_norm": 0.6005972623825073, "learning_rate": 0.00014712313003452244, "loss": 0.2268, "step": 322030 }, { "epoch": 92.64672036823936, "grad_norm": 0.9837836623191833, "learning_rate": 0.0001470655926352129, "loss": 0.2769, "step": 322040 }, { "epoch": 92.64959723820483, "grad_norm": 1.1500869989395142, "learning_rate": 0.00014700805523590335, "loss": 0.3455, "step": 322050 }, { "epoch": 92.65247410817031, "grad_norm": 0.7812860012054443, "learning_rate": 0.0001469505178365938, "loss": 0.2886, "step": 322060 }, { "epoch": 92.65535097813579, "grad_norm": 1.5684090852737427, "learning_rate": 0.00014689298043728426, "loss": 0.3215, "step": 322070 }, { "epoch": 92.65822784810126, "grad_norm": 1.197618842124939, "learning_rate": 0.0001468354430379747, "loss": 0.2595, "step": 322080 }, { "epoch": 92.66110471806674, "grad_norm": 1.2725658416748047, "learning_rate": 0.00014677790563866514, "loss": 0.3462, "step": 322090 }, { "epoch": 92.66398158803221, "grad_norm": 1.3248510360717773, "learning_rate": 0.00014672036823935557, "loss": 0.3061, "step": 322100 }, { "epoch": 92.6668584579977, "grad_norm": 1.4283989667892456, "learning_rate": 0.00014666283084004603, "loss": 0.2501, "step": 322110 }, { "epoch": 92.66973532796318, "grad_norm": 1.016432523727417, "learning_rate": 0.00014660529344073648, "loss": 0.22, "step": 322120 }, { "epoch": 92.67261219792866, "grad_norm": 0.8628327250480652, "learning_rate": 0.0001465477560414269, "loss": 0.2575, "step": 322130 }, { "epoch": 92.67548906789413, "grad_norm": 1.1552016735076904, "learning_rate": 0.00014649021864211737, "loss": 0.2518, "step": 322140 }, { "epoch": 92.67836593785961, "grad_norm": 1.128570795059204, "learning_rate": 0.00014643268124280785, "loss": 0.2554, "step": 322150 }, { "epoch": 92.68124280782509, "grad_norm": 1.1788640022277832, "learning_rate": 0.00014637514384349828, "loss": 0.2539, "step": 322160 }, { "epoch": 92.68411967779056, "grad_norm": 0.6641547083854675, "learning_rate": 0.00014631760644418873, "loss": 0.2875, "step": 322170 }, { "epoch": 92.68699654775604, "grad_norm": 1.539261817932129, "learning_rate": 0.0001462600690448792, "loss": 0.3339, "step": 322180 }, { "epoch": 92.68987341772151, "grad_norm": 0.925254225730896, "learning_rate": 0.00014620253164556962, "loss": 0.28, "step": 322190 }, { "epoch": 92.69275028768699, "grad_norm": 1.530511736869812, "learning_rate": 0.00014614499424626007, "loss": 0.3167, "step": 322200 }, { "epoch": 92.69562715765247, "grad_norm": 0.9417706727981567, "learning_rate": 0.00014608745684695053, "loss": 0.2964, "step": 322210 }, { "epoch": 92.69850402761796, "grad_norm": 1.1071195602416992, "learning_rate": 0.00014602991944764096, "loss": 0.3135, "step": 322220 }, { "epoch": 92.70138089758343, "grad_norm": 0.8197818994522095, "learning_rate": 0.0001459723820483314, "loss": 0.2731, "step": 322230 }, { "epoch": 92.70425776754891, "grad_norm": 0.7701306939125061, "learning_rate": 0.00014591484464902187, "loss": 0.2426, "step": 322240 }, { "epoch": 92.70713463751439, "grad_norm": 0.8710044026374817, "learning_rate": 0.00014585730724971232, "loss": 0.2828, "step": 322250 }, { "epoch": 92.71001150747986, "grad_norm": 1.313352346420288, "learning_rate": 0.00014579976985040278, "loss": 0.37, "step": 322260 }, { "epoch": 92.71288837744534, "grad_norm": 0.7387834191322327, "learning_rate": 0.00014574223245109323, "loss": 0.2534, "step": 322270 }, { "epoch": 92.71576524741081, "grad_norm": 1.0137646198272705, "learning_rate": 0.00014568469505178366, "loss": 0.3036, "step": 322280 }, { "epoch": 92.71864211737629, "grad_norm": 1.220434546470642, "learning_rate": 0.00014562715765247412, "loss": 0.2154, "step": 322290 }, { "epoch": 92.72151898734177, "grad_norm": 1.0166015625, "learning_rate": 0.00014556962025316455, "loss": 0.3369, "step": 322300 }, { "epoch": 92.72439585730724, "grad_norm": 1.4880784749984741, "learning_rate": 0.000145512082853855, "loss": 0.2686, "step": 322310 }, { "epoch": 92.72727272727273, "grad_norm": 1.9673110246658325, "learning_rate": 0.00014545454545454546, "loss": 0.2626, "step": 322320 }, { "epoch": 92.73014959723821, "grad_norm": 0.6756556034088135, "learning_rate": 0.00014539700805523588, "loss": 0.2217, "step": 322330 }, { "epoch": 92.73302646720369, "grad_norm": 0.7433488965034485, "learning_rate": 0.00014533947065592634, "loss": 0.3118, "step": 322340 }, { "epoch": 92.73590333716916, "grad_norm": 1.2669918537139893, "learning_rate": 0.00014528193325661682, "loss": 0.2087, "step": 322350 }, { "epoch": 92.73878020713464, "grad_norm": 0.955626904964447, "learning_rate": 0.00014522439585730725, "loss": 0.3102, "step": 322360 }, { "epoch": 92.74165707710011, "grad_norm": 1.6152312755584717, "learning_rate": 0.0001451668584579977, "loss": 0.2613, "step": 322370 }, { "epoch": 92.74453394706559, "grad_norm": 1.0647081136703491, "learning_rate": 0.00014510932105868816, "loss": 0.3432, "step": 322380 }, { "epoch": 92.74741081703107, "grad_norm": 1.161299228668213, "learning_rate": 0.0001450517836593786, "loss": 0.3275, "step": 322390 }, { "epoch": 92.75028768699654, "grad_norm": 1.3040701150894165, "learning_rate": 0.00014499424626006905, "loss": 0.3529, "step": 322400 }, { "epoch": 92.75316455696202, "grad_norm": 1.6426955461502075, "learning_rate": 0.0001449367088607595, "loss": 0.286, "step": 322410 }, { "epoch": 92.75604142692751, "grad_norm": 1.3055036067962646, "learning_rate": 0.00014487917146144993, "loss": 0.346, "step": 322420 }, { "epoch": 92.75891829689299, "grad_norm": 0.7142582535743713, "learning_rate": 0.00014482163406214039, "loss": 0.2571, "step": 322430 }, { "epoch": 92.76179516685846, "grad_norm": 1.4510189294815063, "learning_rate": 0.00014476409666283084, "loss": 0.2762, "step": 322440 }, { "epoch": 92.76467203682394, "grad_norm": 0.769216001033783, "learning_rate": 0.0001447065592635213, "loss": 0.2695, "step": 322450 }, { "epoch": 92.76754890678941, "grad_norm": 1.0974278450012207, "learning_rate": 0.00014464902186421175, "loss": 0.2894, "step": 322460 }, { "epoch": 92.77042577675489, "grad_norm": 1.1948691606521606, "learning_rate": 0.0001445914844649022, "loss": 0.3022, "step": 322470 }, { "epoch": 92.77330264672037, "grad_norm": 0.9655029773712158, "learning_rate": 0.00014453394706559264, "loss": 0.3229, "step": 322480 }, { "epoch": 92.77617951668584, "grad_norm": 0.8195898532867432, "learning_rate": 0.0001444764096662831, "loss": 0.2454, "step": 322490 }, { "epoch": 92.77905638665132, "grad_norm": 1.5351992845535278, "learning_rate": 0.00014441887226697352, "loss": 0.2957, "step": 322500 }, { "epoch": 92.7819332566168, "grad_norm": 0.8670629858970642, "learning_rate": 0.00014436133486766398, "loss": 0.2462, "step": 322510 }, { "epoch": 92.78481012658227, "grad_norm": 1.0756947994232178, "learning_rate": 0.00014430379746835443, "loss": 0.2372, "step": 322520 }, { "epoch": 92.78768699654776, "grad_norm": 2.2202188968658447, "learning_rate": 0.00014424626006904486, "loss": 0.3396, "step": 322530 }, { "epoch": 92.79056386651324, "grad_norm": 0.546612024307251, "learning_rate": 0.00014418872266973532, "loss": 0.2836, "step": 322540 }, { "epoch": 92.79344073647871, "grad_norm": 0.8352276682853699, "learning_rate": 0.0001441311852704258, "loss": 0.3259, "step": 322550 }, { "epoch": 92.79631760644419, "grad_norm": 1.0869641304016113, "learning_rate": 0.00014407364787111623, "loss": 0.383, "step": 322560 }, { "epoch": 92.79919447640967, "grad_norm": 0.6615959405899048, "learning_rate": 0.00014401611047180668, "loss": 0.2208, "step": 322570 }, { "epoch": 92.80207134637514, "grad_norm": 0.9019140005111694, "learning_rate": 0.00014395857307249714, "loss": 0.2462, "step": 322580 }, { "epoch": 92.80494821634062, "grad_norm": 1.9209529161453247, "learning_rate": 0.00014390103567318757, "loss": 0.2957, "step": 322590 }, { "epoch": 92.8078250863061, "grad_norm": 0.969679594039917, "learning_rate": 0.00014384349827387802, "loss": 0.2526, "step": 322600 }, { "epoch": 92.81070195627157, "grad_norm": 1.1883736848831177, "learning_rate": 0.00014378596087456848, "loss": 0.2616, "step": 322610 }, { "epoch": 92.81357882623705, "grad_norm": 0.9542023539543152, "learning_rate": 0.0001437284234752589, "loss": 0.2838, "step": 322620 }, { "epoch": 92.81645569620254, "grad_norm": 0.9575905203819275, "learning_rate": 0.00014367088607594936, "loss": 0.3506, "step": 322630 }, { "epoch": 92.81933256616801, "grad_norm": 0.8359972238540649, "learning_rate": 0.00014361334867663984, "loss": 0.244, "step": 322640 }, { "epoch": 92.82220943613349, "grad_norm": 1.5189754962921143, "learning_rate": 0.00014355581127733027, "loss": 0.2486, "step": 322650 }, { "epoch": 92.82508630609897, "grad_norm": 1.4250057935714722, "learning_rate": 0.00014349827387802073, "loss": 0.2806, "step": 322660 }, { "epoch": 92.82796317606444, "grad_norm": 0.8769973516464233, "learning_rate": 0.00014344073647871118, "loss": 0.2564, "step": 322670 }, { "epoch": 92.83084004602992, "grad_norm": 1.222721815109253, "learning_rate": 0.0001433831990794016, "loss": 0.3534, "step": 322680 }, { "epoch": 92.8337169159954, "grad_norm": 0.8788958787918091, "learning_rate": 0.00014332566168009207, "loss": 0.3125, "step": 322690 }, { "epoch": 92.83659378596087, "grad_norm": 1.8342362642288208, "learning_rate": 0.0001432681242807825, "loss": 0.3082, "step": 322700 }, { "epoch": 92.83947065592635, "grad_norm": 0.7890145182609558, "learning_rate": 0.00014321058688147295, "loss": 0.3154, "step": 322710 }, { "epoch": 92.84234752589182, "grad_norm": 2.114173173904419, "learning_rate": 0.0001431530494821634, "loss": 0.2868, "step": 322720 }, { "epoch": 92.8452243958573, "grad_norm": 1.3594383001327515, "learning_rate": 0.00014309551208285383, "loss": 0.3056, "step": 322730 }, { "epoch": 92.84810126582279, "grad_norm": 0.7975945472717285, "learning_rate": 0.00014303797468354432, "loss": 0.2505, "step": 322740 }, { "epoch": 92.85097813578827, "grad_norm": 0.8787246942520142, "learning_rate": 0.00014298043728423477, "loss": 0.2529, "step": 322750 }, { "epoch": 92.85385500575374, "grad_norm": 1.6141191720962524, "learning_rate": 0.0001429228998849252, "loss": 0.3536, "step": 322760 }, { "epoch": 92.85673187571922, "grad_norm": 0.6985459327697754, "learning_rate": 0.00014286536248561566, "loss": 0.2974, "step": 322770 }, { "epoch": 92.8596087456847, "grad_norm": 1.1576299667358398, "learning_rate": 0.0001428078250863061, "loss": 0.2589, "step": 322780 }, { "epoch": 92.86248561565017, "grad_norm": 1.0170786380767822, "learning_rate": 0.00014275028768699654, "loss": 0.2512, "step": 322790 }, { "epoch": 92.86536248561565, "grad_norm": 1.036830186843872, "learning_rate": 0.000142692750287687, "loss": 0.2904, "step": 322800 }, { "epoch": 92.86823935558112, "grad_norm": 1.6172963380813599, "learning_rate": 0.00014263521288837745, "loss": 0.3209, "step": 322810 }, { "epoch": 92.8711162255466, "grad_norm": 0.6485022902488708, "learning_rate": 0.00014257767548906788, "loss": 0.3164, "step": 322820 }, { "epoch": 92.87399309551208, "grad_norm": 1.2108834981918335, "learning_rate": 0.00014252013808975833, "loss": 0.2787, "step": 322830 }, { "epoch": 92.87686996547757, "grad_norm": 1.0729587078094482, "learning_rate": 0.00014246260069044882, "loss": 0.3066, "step": 322840 }, { "epoch": 92.87974683544304, "grad_norm": 1.126064658164978, "learning_rate": 0.00014240506329113925, "loss": 0.3681, "step": 322850 }, { "epoch": 92.88262370540852, "grad_norm": 1.2606329917907715, "learning_rate": 0.0001423475258918297, "loss": 0.2686, "step": 322860 }, { "epoch": 92.885500575374, "grad_norm": 2.4462575912475586, "learning_rate": 0.00014228998849252016, "loss": 0.2956, "step": 322870 }, { "epoch": 92.88837744533947, "grad_norm": 2.042830467224121, "learning_rate": 0.00014223245109321059, "loss": 0.3678, "step": 322880 }, { "epoch": 92.89125431530495, "grad_norm": 0.5815764665603638, "learning_rate": 0.00014217491369390104, "loss": 0.3083, "step": 322890 }, { "epoch": 92.89413118527042, "grad_norm": 0.9352042078971863, "learning_rate": 0.0001421173762945915, "loss": 0.2476, "step": 322900 }, { "epoch": 92.8970080552359, "grad_norm": 0.9369130730628967, "learning_rate": 0.00014205983889528192, "loss": 0.3662, "step": 322910 }, { "epoch": 92.89988492520138, "grad_norm": 1.8107620477676392, "learning_rate": 0.00014200230149597238, "loss": 0.237, "step": 322920 }, { "epoch": 92.90276179516685, "grad_norm": 1.6644885540008545, "learning_rate": 0.0001419447640966628, "loss": 0.2681, "step": 322930 }, { "epoch": 92.90563866513233, "grad_norm": 1.1799371242523193, "learning_rate": 0.0001418872266973533, "loss": 0.2383, "step": 322940 }, { "epoch": 92.90851553509782, "grad_norm": 1.0443145036697388, "learning_rate": 0.00014182968929804375, "loss": 0.2937, "step": 322950 }, { "epoch": 92.9113924050633, "grad_norm": 1.395282506942749, "learning_rate": 0.00014177215189873418, "loss": 0.2895, "step": 322960 }, { "epoch": 92.91426927502877, "grad_norm": 1.4970459938049316, "learning_rate": 0.00014171461449942463, "loss": 0.2877, "step": 322970 }, { "epoch": 92.91714614499425, "grad_norm": 0.9249800443649292, "learning_rate": 0.00014165707710011509, "loss": 0.267, "step": 322980 }, { "epoch": 92.92002301495972, "grad_norm": 0.9408358931541443, "learning_rate": 0.00014159953970080551, "loss": 0.2985, "step": 322990 }, { "epoch": 92.9228998849252, "grad_norm": 0.8844651579856873, "learning_rate": 0.00014154200230149597, "loss": 0.2348, "step": 323000 }, { "epoch": 92.92577675489068, "grad_norm": 2.1639840602874756, "learning_rate": 0.00014148446490218643, "loss": 0.2675, "step": 323010 }, { "epoch": 92.92865362485615, "grad_norm": 0.9434229731559753, "learning_rate": 0.00014142692750287685, "loss": 0.2365, "step": 323020 }, { "epoch": 92.93153049482163, "grad_norm": 1.4800934791564941, "learning_rate": 0.0001413693901035673, "loss": 0.2795, "step": 323030 }, { "epoch": 92.9344073647871, "grad_norm": 1.1105133295059204, "learning_rate": 0.0001413118527042578, "loss": 0.2545, "step": 323040 }, { "epoch": 92.9372842347526, "grad_norm": 1.4117984771728516, "learning_rate": 0.00014125431530494822, "loss": 0.3318, "step": 323050 }, { "epoch": 92.94016110471807, "grad_norm": 1.416894793510437, "learning_rate": 0.00014119677790563868, "loss": 0.2426, "step": 323060 }, { "epoch": 92.94303797468355, "grad_norm": 0.8840205073356628, "learning_rate": 0.00014113924050632913, "loss": 0.2628, "step": 323070 }, { "epoch": 92.94591484464902, "grad_norm": 1.2960237264633179, "learning_rate": 0.00014108170310701956, "loss": 0.2535, "step": 323080 }, { "epoch": 92.9487917146145, "grad_norm": 1.0485738515853882, "learning_rate": 0.00014102416570771002, "loss": 0.2645, "step": 323090 }, { "epoch": 92.95166858457998, "grad_norm": 1.2335741519927979, "learning_rate": 0.00014096662830840047, "loss": 0.3197, "step": 323100 }, { "epoch": 92.95454545454545, "grad_norm": 1.8216416835784912, "learning_rate": 0.0001409090909090909, "loss": 0.2809, "step": 323110 }, { "epoch": 92.95742232451093, "grad_norm": 0.7127804160118103, "learning_rate": 0.00014085155350978135, "loss": 0.3084, "step": 323120 }, { "epoch": 92.9602991944764, "grad_norm": 1.566239595413208, "learning_rate": 0.0001407940161104718, "loss": 0.2598, "step": 323130 }, { "epoch": 92.96317606444188, "grad_norm": 1.7432637214660645, "learning_rate": 0.00014073647871116227, "loss": 0.2776, "step": 323140 }, { "epoch": 92.96605293440736, "grad_norm": 0.8784983158111572, "learning_rate": 0.00014067894131185272, "loss": 0.2572, "step": 323150 }, { "epoch": 92.96892980437285, "grad_norm": 0.7561928033828735, "learning_rate": 0.00014062140391254315, "loss": 0.28, "step": 323160 }, { "epoch": 92.97180667433832, "grad_norm": 1.393122673034668, "learning_rate": 0.0001405638665132336, "loss": 0.3236, "step": 323170 }, { "epoch": 92.9746835443038, "grad_norm": 0.8061278462409973, "learning_rate": 0.00014050632911392406, "loss": 0.2848, "step": 323180 }, { "epoch": 92.97756041426928, "grad_norm": 0.8179650902748108, "learning_rate": 0.0001404487917146145, "loss": 0.3123, "step": 323190 }, { "epoch": 92.98043728423475, "grad_norm": 2.1518712043762207, "learning_rate": 0.00014039125431530494, "loss": 0.3098, "step": 323200 }, { "epoch": 92.98331415420023, "grad_norm": 1.7747061252593994, "learning_rate": 0.0001403337169159954, "loss": 0.3378, "step": 323210 }, { "epoch": 92.9861910241657, "grad_norm": 2.4350361824035645, "learning_rate": 0.00014027617951668583, "loss": 0.2965, "step": 323220 }, { "epoch": 92.98906789413118, "grad_norm": 1.0366171598434448, "learning_rate": 0.0001402186421173763, "loss": 0.2939, "step": 323230 }, { "epoch": 92.99194476409666, "grad_norm": 0.9791994094848633, "learning_rate": 0.00014016110471806677, "loss": 0.2769, "step": 323240 }, { "epoch": 92.99482163406213, "grad_norm": 1.4941562414169312, "learning_rate": 0.0001401035673187572, "loss": 0.2723, "step": 323250 }, { "epoch": 92.99769850402762, "grad_norm": 0.827545166015625, "learning_rate": 0.00014004602991944765, "loss": 0.3219, "step": 323260 }, { "epoch": 93.0005753739931, "grad_norm": 1.104914903640747, "learning_rate": 0.0001399884925201381, "loss": 0.2384, "step": 323270 }, { "epoch": 93.00345224395858, "grad_norm": 0.4244053363800049, "learning_rate": 0.00013993095512082853, "loss": 0.2159, "step": 323280 }, { "epoch": 93.00632911392405, "grad_norm": 1.813637614250183, "learning_rate": 0.000139873417721519, "loss": 0.3208, "step": 323290 }, { "epoch": 93.00920598388953, "grad_norm": 1.0149221420288086, "learning_rate": 0.00013981588032220945, "loss": 0.2666, "step": 323300 }, { "epoch": 93.012082853855, "grad_norm": 1.2384599447250366, "learning_rate": 0.00013975834292289987, "loss": 0.3474, "step": 323310 }, { "epoch": 93.01495972382048, "grad_norm": 1.9584667682647705, "learning_rate": 0.00013970080552359033, "loss": 0.3205, "step": 323320 }, { "epoch": 93.01783659378596, "grad_norm": 0.6677089929580688, "learning_rate": 0.00013964326812428078, "loss": 0.2498, "step": 323330 }, { "epoch": 93.02071346375143, "grad_norm": 1.1087080240249634, "learning_rate": 0.00013958573072497124, "loss": 0.2311, "step": 323340 }, { "epoch": 93.02359033371691, "grad_norm": 1.6819427013397217, "learning_rate": 0.0001395281933256617, "loss": 0.302, "step": 323350 }, { "epoch": 93.02646720368239, "grad_norm": 1.6958194971084595, "learning_rate": 0.00013947065592635212, "loss": 0.3269, "step": 323360 }, { "epoch": 93.02934407364788, "grad_norm": 0.8979522585868835, "learning_rate": 0.00013941311852704258, "loss": 0.2379, "step": 323370 }, { "epoch": 93.03222094361335, "grad_norm": 0.6325327754020691, "learning_rate": 0.00013935558112773304, "loss": 0.2596, "step": 323380 }, { "epoch": 93.03509781357883, "grad_norm": 0.6675505638122559, "learning_rate": 0.00013929804372842346, "loss": 0.2256, "step": 323390 }, { "epoch": 93.0379746835443, "grad_norm": 0.8296393752098083, "learning_rate": 0.00013924050632911392, "loss": 0.2468, "step": 323400 }, { "epoch": 93.04085155350978, "grad_norm": 0.7756078243255615, "learning_rate": 0.00013918296892980437, "loss": 0.2427, "step": 323410 }, { "epoch": 93.04372842347526, "grad_norm": 1.2879897356033325, "learning_rate": 0.0001391254315304948, "loss": 0.2733, "step": 323420 }, { "epoch": 93.04660529344073, "grad_norm": 1.3999550342559814, "learning_rate": 0.00013906789413118529, "loss": 0.2638, "step": 323430 }, { "epoch": 93.04948216340621, "grad_norm": 0.8899081945419312, "learning_rate": 0.00013901035673187574, "loss": 0.2538, "step": 323440 }, { "epoch": 93.05235903337169, "grad_norm": 1.0831875801086426, "learning_rate": 0.00013895281933256617, "loss": 0.2768, "step": 323450 }, { "epoch": 93.05523590333716, "grad_norm": 0.869907796382904, "learning_rate": 0.00013889528193325662, "loss": 0.3272, "step": 323460 }, { "epoch": 93.05811277330265, "grad_norm": 0.8770142793655396, "learning_rate": 0.00013883774453394708, "loss": 0.2357, "step": 323470 }, { "epoch": 93.06098964326813, "grad_norm": 0.9184585213661194, "learning_rate": 0.0001387802071346375, "loss": 0.2407, "step": 323480 }, { "epoch": 93.0638665132336, "grad_norm": 0.8108739852905273, "learning_rate": 0.00013872266973532796, "loss": 0.2718, "step": 323490 }, { "epoch": 93.06674338319908, "grad_norm": 0.8924261927604675, "learning_rate": 0.00013866513233601842, "loss": 0.2089, "step": 323500 }, { "epoch": 93.06962025316456, "grad_norm": 0.7309480905532837, "learning_rate": 0.00013860759493670885, "loss": 0.2556, "step": 323510 }, { "epoch": 93.07249712313003, "grad_norm": 0.572037398815155, "learning_rate": 0.0001385500575373993, "loss": 0.2697, "step": 323520 }, { "epoch": 93.07537399309551, "grad_norm": 1.523803472518921, "learning_rate": 0.00013849252013808976, "loss": 0.2758, "step": 323530 }, { "epoch": 93.07825086306099, "grad_norm": 1.5727171897888184, "learning_rate": 0.00013843498273878021, "loss": 0.2602, "step": 323540 }, { "epoch": 93.08112773302646, "grad_norm": 1.5874111652374268, "learning_rate": 0.00013837744533947067, "loss": 0.2124, "step": 323550 }, { "epoch": 93.08400460299194, "grad_norm": 1.3506284952163696, "learning_rate": 0.0001383199079401611, "loss": 0.2897, "step": 323560 }, { "epoch": 93.08688147295742, "grad_norm": 1.4468814134597778, "learning_rate": 0.00013826237054085155, "loss": 0.2459, "step": 323570 }, { "epoch": 93.0897583429229, "grad_norm": 1.7278443574905396, "learning_rate": 0.000138204833141542, "loss": 0.2516, "step": 323580 }, { "epoch": 93.09263521288838, "grad_norm": 1.3457199335098267, "learning_rate": 0.00013814729574223244, "loss": 0.2512, "step": 323590 }, { "epoch": 93.09551208285386, "grad_norm": 1.0084588527679443, "learning_rate": 0.0001380897583429229, "loss": 0.2461, "step": 323600 }, { "epoch": 93.09838895281933, "grad_norm": 1.3524110317230225, "learning_rate": 0.00013803222094361335, "loss": 0.2582, "step": 323610 }, { "epoch": 93.10126582278481, "grad_norm": 2.1706197261810303, "learning_rate": 0.0001379746835443038, "loss": 0.2646, "step": 323620 }, { "epoch": 93.10414269275029, "grad_norm": 1.394776701927185, "learning_rate": 0.00013791714614499426, "loss": 0.2538, "step": 323630 }, { "epoch": 93.10701956271576, "grad_norm": 0.7857543230056763, "learning_rate": 0.00013785960874568472, "loss": 0.2439, "step": 323640 }, { "epoch": 93.10989643268124, "grad_norm": 1.7660090923309326, "learning_rate": 0.00013780207134637514, "loss": 0.3107, "step": 323650 }, { "epoch": 93.11277330264672, "grad_norm": 0.7069084048271179, "learning_rate": 0.0001377445339470656, "loss": 0.2645, "step": 323660 }, { "epoch": 93.11565017261219, "grad_norm": 1.7664669752120972, "learning_rate": 0.00013768699654775606, "loss": 0.2959, "step": 323670 }, { "epoch": 93.11852704257768, "grad_norm": 2.2739927768707275, "learning_rate": 0.00013762945914844648, "loss": 0.2777, "step": 323680 }, { "epoch": 93.12140391254316, "grad_norm": 1.9842414855957031, "learning_rate": 0.00013757192174913694, "loss": 0.3454, "step": 323690 }, { "epoch": 93.12428078250863, "grad_norm": 0.9771868586540222, "learning_rate": 0.0001375143843498274, "loss": 0.3396, "step": 323700 }, { "epoch": 93.12715765247411, "grad_norm": 1.0959678888320923, "learning_rate": 0.00013745684695051782, "loss": 0.2784, "step": 323710 }, { "epoch": 93.13003452243959, "grad_norm": 1.3502321243286133, "learning_rate": 0.0001373993095512083, "loss": 0.3168, "step": 323720 }, { "epoch": 93.13291139240506, "grad_norm": 1.3101214170455933, "learning_rate": 0.00013734177215189873, "loss": 0.2828, "step": 323730 }, { "epoch": 93.13578826237054, "grad_norm": 0.8360276818275452, "learning_rate": 0.0001372842347525892, "loss": 0.2667, "step": 323740 }, { "epoch": 93.13866513233602, "grad_norm": 0.5566766858100891, "learning_rate": 0.00013722669735327964, "loss": 0.2594, "step": 323750 }, { "epoch": 93.14154200230149, "grad_norm": 1.019984483718872, "learning_rate": 0.00013716915995397007, "loss": 0.2992, "step": 323760 }, { "epoch": 93.14441887226697, "grad_norm": 0.8594874739646912, "learning_rate": 0.00013711162255466053, "loss": 0.2442, "step": 323770 }, { "epoch": 93.14729574223244, "grad_norm": 1.2334678173065186, "learning_rate": 0.00013705408515535098, "loss": 0.2836, "step": 323780 }, { "epoch": 93.15017261219793, "grad_norm": 0.802638053894043, "learning_rate": 0.0001369965477560414, "loss": 0.2629, "step": 323790 }, { "epoch": 93.15304948216341, "grad_norm": 1.412917971611023, "learning_rate": 0.00013693901035673187, "loss": 0.2602, "step": 323800 }, { "epoch": 93.15592635212889, "grad_norm": 1.0937505960464478, "learning_rate": 0.00013688147295742232, "loss": 0.2617, "step": 323810 }, { "epoch": 93.15880322209436, "grad_norm": 0.7444301247596741, "learning_rate": 0.00013682393555811278, "loss": 0.2434, "step": 323820 }, { "epoch": 93.16168009205984, "grad_norm": 1.8744877576828003, "learning_rate": 0.00013676639815880323, "loss": 0.3731, "step": 323830 }, { "epoch": 93.16455696202532, "grad_norm": 0.7025884389877319, "learning_rate": 0.0001367088607594937, "loss": 0.2968, "step": 323840 }, { "epoch": 93.16743383199079, "grad_norm": 1.2728543281555176, "learning_rate": 0.00013665132336018412, "loss": 0.2879, "step": 323850 }, { "epoch": 93.17031070195627, "grad_norm": 1.3163342475891113, "learning_rate": 0.00013659378596087457, "loss": 0.2537, "step": 323860 }, { "epoch": 93.17318757192174, "grad_norm": 0.9024620652198792, "learning_rate": 0.00013653624856156503, "loss": 0.2693, "step": 323870 }, { "epoch": 93.17606444188722, "grad_norm": 0.9291835427284241, "learning_rate": 0.00013647871116225546, "loss": 0.2404, "step": 323880 }, { "epoch": 93.17894131185271, "grad_norm": 1.4142754077911377, "learning_rate": 0.0001364211737629459, "loss": 0.2693, "step": 323890 }, { "epoch": 93.18181818181819, "grad_norm": 1.1337732076644897, "learning_rate": 0.00013636363636363637, "loss": 0.2656, "step": 323900 }, { "epoch": 93.18469505178366, "grad_norm": 1.190242052078247, "learning_rate": 0.0001363060989643268, "loss": 0.2193, "step": 323910 }, { "epoch": 93.18757192174914, "grad_norm": 1.409302830696106, "learning_rate": 0.00013624856156501728, "loss": 0.2972, "step": 323920 }, { "epoch": 93.19044879171462, "grad_norm": 0.5267312526702881, "learning_rate": 0.00013619102416570774, "loss": 0.226, "step": 323930 }, { "epoch": 93.19332566168009, "grad_norm": 0.7546529173851013, "learning_rate": 0.00013613348676639816, "loss": 0.2912, "step": 323940 }, { "epoch": 93.19620253164557, "grad_norm": 1.6487971544265747, "learning_rate": 0.00013607594936708862, "loss": 0.234, "step": 323950 }, { "epoch": 93.19907940161104, "grad_norm": 1.4487180709838867, "learning_rate": 0.00013601841196777905, "loss": 0.3632, "step": 323960 }, { "epoch": 93.20195627157652, "grad_norm": 1.559881329536438, "learning_rate": 0.0001359608745684695, "loss": 0.3048, "step": 323970 }, { "epoch": 93.204833141542, "grad_norm": 1.1562188863754272, "learning_rate": 0.00013590333716915996, "loss": 0.2532, "step": 323980 }, { "epoch": 93.20771001150747, "grad_norm": 1.5391569137573242, "learning_rate": 0.0001358457997698504, "loss": 0.2292, "step": 323990 }, { "epoch": 93.21058688147296, "grad_norm": 1.2090017795562744, "learning_rate": 0.00013578826237054084, "loss": 0.3156, "step": 324000 }, { "epoch": 93.21346375143844, "grad_norm": 1.7418460845947266, "learning_rate": 0.0001357307249712313, "loss": 0.2955, "step": 324010 }, { "epoch": 93.21634062140392, "grad_norm": 1.6929757595062256, "learning_rate": 0.00013567318757192175, "loss": 0.2551, "step": 324020 }, { "epoch": 93.21921749136939, "grad_norm": 0.6328943371772766, "learning_rate": 0.0001356156501726122, "loss": 0.2552, "step": 324030 }, { "epoch": 93.22209436133487, "grad_norm": 0.7241759300231934, "learning_rate": 0.00013555811277330266, "loss": 0.2547, "step": 324040 }, { "epoch": 93.22497123130034, "grad_norm": 1.2561440467834473, "learning_rate": 0.0001355005753739931, "loss": 0.2171, "step": 324050 }, { "epoch": 93.22784810126582, "grad_norm": 2.0139336585998535, "learning_rate": 0.00013544303797468355, "loss": 0.2793, "step": 324060 }, { "epoch": 93.2307249712313, "grad_norm": 1.788325548171997, "learning_rate": 0.000135385500575374, "loss": 0.2668, "step": 324070 }, { "epoch": 93.23360184119677, "grad_norm": 1.185127854347229, "learning_rate": 0.00013532796317606443, "loss": 0.2828, "step": 324080 }, { "epoch": 93.23647871116225, "grad_norm": 1.186376690864563, "learning_rate": 0.0001352704257767549, "loss": 0.2696, "step": 324090 }, { "epoch": 93.23935558112774, "grad_norm": 1.5465482473373413, "learning_rate": 0.00013521288837744534, "loss": 0.2858, "step": 324100 }, { "epoch": 93.24223245109322, "grad_norm": 1.1946179866790771, "learning_rate": 0.0001351553509781358, "loss": 0.2806, "step": 324110 }, { "epoch": 93.24510932105869, "grad_norm": 0.744651734828949, "learning_rate": 0.00013509781357882625, "loss": 0.2834, "step": 324120 }, { "epoch": 93.24798619102417, "grad_norm": 0.7582249045372009, "learning_rate": 0.0001350402761795167, "loss": 0.2552, "step": 324130 }, { "epoch": 93.25086306098964, "grad_norm": 0.9783573150634766, "learning_rate": 0.00013498273878020714, "loss": 0.3886, "step": 324140 }, { "epoch": 93.25373993095512, "grad_norm": 1.8948689699172974, "learning_rate": 0.0001349252013808976, "loss": 0.2405, "step": 324150 }, { "epoch": 93.2566168009206, "grad_norm": 1.0891685485839844, "learning_rate": 0.00013486766398158802, "loss": 0.2658, "step": 324160 }, { "epoch": 93.25949367088607, "grad_norm": 0.6852043867111206, "learning_rate": 0.00013481012658227848, "loss": 0.2224, "step": 324170 }, { "epoch": 93.26237054085155, "grad_norm": 1.2317204475402832, "learning_rate": 0.00013475258918296893, "loss": 0.3177, "step": 324180 }, { "epoch": 93.26524741081703, "grad_norm": 0.7742636799812317, "learning_rate": 0.00013469505178365936, "loss": 0.2234, "step": 324190 }, { "epoch": 93.2681242807825, "grad_norm": 0.788483202457428, "learning_rate": 0.00013463751438434982, "loss": 0.3339, "step": 324200 }, { "epoch": 93.27100115074799, "grad_norm": 0.8820870518684387, "learning_rate": 0.0001345799769850403, "loss": 0.3031, "step": 324210 }, { "epoch": 93.27387802071347, "grad_norm": 1.3994606733322144, "learning_rate": 0.00013452243958573073, "loss": 0.2658, "step": 324220 }, { "epoch": 93.27675489067894, "grad_norm": 0.9603366851806641, "learning_rate": 0.00013446490218642118, "loss": 0.3074, "step": 324230 }, { "epoch": 93.27963176064442, "grad_norm": 1.2254562377929688, "learning_rate": 0.00013440736478711164, "loss": 0.3285, "step": 324240 }, { "epoch": 93.2825086306099, "grad_norm": 0.9211281538009644, "learning_rate": 0.00013434982738780207, "loss": 0.26, "step": 324250 }, { "epoch": 93.28538550057537, "grad_norm": 0.7380797863006592, "learning_rate": 0.00013429228998849252, "loss": 0.2498, "step": 324260 }, { "epoch": 93.28826237054085, "grad_norm": 0.7337656021118164, "learning_rate": 0.00013423475258918298, "loss": 0.3619, "step": 324270 }, { "epoch": 93.29113924050633, "grad_norm": 1.2602747678756714, "learning_rate": 0.0001341772151898734, "loss": 0.2884, "step": 324280 }, { "epoch": 93.2940161104718, "grad_norm": 1.068015694618225, "learning_rate": 0.00013411967779056386, "loss": 0.2629, "step": 324290 }, { "epoch": 93.29689298043728, "grad_norm": 2.32425594329834, "learning_rate": 0.00013406214039125432, "loss": 0.29, "step": 324300 }, { "epoch": 93.29976985040277, "grad_norm": 1.4228427410125732, "learning_rate": 0.00013400460299194477, "loss": 0.239, "step": 324310 }, { "epoch": 93.30264672036824, "grad_norm": 0.8701231479644775, "learning_rate": 0.00013394706559263523, "loss": 0.311, "step": 324320 }, { "epoch": 93.30552359033372, "grad_norm": 0.9333267211914062, "learning_rate": 0.00013388952819332568, "loss": 0.2838, "step": 324330 }, { "epoch": 93.3084004602992, "grad_norm": 1.8594269752502441, "learning_rate": 0.0001338319907940161, "loss": 0.2852, "step": 324340 }, { "epoch": 93.31127733026467, "grad_norm": 1.0194045305252075, "learning_rate": 0.00013377445339470657, "loss": 0.2522, "step": 324350 }, { "epoch": 93.31415420023015, "grad_norm": 0.8998157978057861, "learning_rate": 0.000133716915995397, "loss": 0.2704, "step": 324360 }, { "epoch": 93.31703107019563, "grad_norm": 1.0719329118728638, "learning_rate": 0.00013365937859608745, "loss": 0.3215, "step": 324370 }, { "epoch": 93.3199079401611, "grad_norm": 1.052134394645691, "learning_rate": 0.0001336018411967779, "loss": 0.2277, "step": 324380 }, { "epoch": 93.32278481012658, "grad_norm": 0.8566039204597473, "learning_rate": 0.00013354430379746834, "loss": 0.2175, "step": 324390 }, { "epoch": 93.32566168009205, "grad_norm": 1.4271150827407837, "learning_rate": 0.0001334867663981588, "loss": 0.3676, "step": 324400 }, { "epoch": 93.32853855005754, "grad_norm": 1.068202018737793, "learning_rate": 0.00013342922899884927, "loss": 0.253, "step": 324410 }, { "epoch": 93.33141542002302, "grad_norm": 1.0164005756378174, "learning_rate": 0.0001333716915995397, "loss": 0.2853, "step": 324420 }, { "epoch": 93.3342922899885, "grad_norm": 1.1203289031982422, "learning_rate": 0.00013331415420023016, "loss": 0.2401, "step": 324430 }, { "epoch": 93.33716915995397, "grad_norm": 2.219759702682495, "learning_rate": 0.00013325661680092061, "loss": 0.291, "step": 324440 }, { "epoch": 93.34004602991945, "grad_norm": 2.2732088565826416, "learning_rate": 0.00013319907940161104, "loss": 0.3204, "step": 324450 }, { "epoch": 93.34292289988493, "grad_norm": 1.1342045068740845, "learning_rate": 0.0001331415420023015, "loss": 0.2726, "step": 324460 }, { "epoch": 93.3457997698504, "grad_norm": 2.112116813659668, "learning_rate": 0.00013308400460299195, "loss": 0.2964, "step": 324470 }, { "epoch": 93.34867663981588, "grad_norm": 0.7094216346740723, "learning_rate": 0.00013302646720368238, "loss": 0.2489, "step": 324480 }, { "epoch": 93.35155350978135, "grad_norm": 1.758175015449524, "learning_rate": 0.00013296892980437284, "loss": 0.2832, "step": 324490 }, { "epoch": 93.35443037974683, "grad_norm": 1.8815972805023193, "learning_rate": 0.0001329113924050633, "loss": 0.3951, "step": 324500 }, { "epoch": 93.3573072497123, "grad_norm": 1.227449655532837, "learning_rate": 0.00013285385500575375, "loss": 0.3685, "step": 324510 }, { "epoch": 93.3601841196778, "grad_norm": 1.070691466331482, "learning_rate": 0.0001327963176064442, "loss": 0.2634, "step": 324520 }, { "epoch": 93.36306098964327, "grad_norm": 1.3340044021606445, "learning_rate": 0.00013273878020713466, "loss": 0.243, "step": 324530 }, { "epoch": 93.36593785960875, "grad_norm": 1.1217148303985596, "learning_rate": 0.0001326812428078251, "loss": 0.2488, "step": 324540 }, { "epoch": 93.36881472957423, "grad_norm": 1.0127487182617188, "learning_rate": 0.00013262370540851554, "loss": 0.2494, "step": 324550 }, { "epoch": 93.3716915995397, "grad_norm": 1.5588257312774658, "learning_rate": 0.00013256616800920597, "loss": 0.2672, "step": 324560 }, { "epoch": 93.37456846950518, "grad_norm": 1.087087869644165, "learning_rate": 0.00013250863060989643, "loss": 0.2596, "step": 324570 }, { "epoch": 93.37744533947065, "grad_norm": 1.230405330657959, "learning_rate": 0.00013245109321058688, "loss": 0.2878, "step": 324580 }, { "epoch": 93.38032220943613, "grad_norm": 0.7496709227561951, "learning_rate": 0.0001323935558112773, "loss": 0.2599, "step": 324590 }, { "epoch": 93.3831990794016, "grad_norm": 0.7111870646476746, "learning_rate": 0.0001323360184119678, "loss": 0.2874, "step": 324600 }, { "epoch": 93.38607594936708, "grad_norm": 0.8888126015663147, "learning_rate": 0.00013227848101265825, "loss": 0.266, "step": 324610 }, { "epoch": 93.38895281933257, "grad_norm": 1.4410159587860107, "learning_rate": 0.00013222094361334868, "loss": 0.2397, "step": 324620 }, { "epoch": 93.39182968929805, "grad_norm": 0.8281757235527039, "learning_rate": 0.00013216340621403913, "loss": 0.2496, "step": 324630 }, { "epoch": 93.39470655926353, "grad_norm": 1.670455813407898, "learning_rate": 0.0001321058688147296, "loss": 0.3079, "step": 324640 }, { "epoch": 93.397583429229, "grad_norm": 2.080320119857788, "learning_rate": 0.00013204833141542002, "loss": 0.2869, "step": 324650 }, { "epoch": 93.40046029919448, "grad_norm": 1.2373089790344238, "learning_rate": 0.00013199079401611047, "loss": 0.224, "step": 324660 }, { "epoch": 93.40333716915995, "grad_norm": 0.8533246517181396, "learning_rate": 0.00013193325661680093, "loss": 0.2235, "step": 324670 }, { "epoch": 93.40621403912543, "grad_norm": 2.2588255405426025, "learning_rate": 0.00013187571921749136, "loss": 0.27, "step": 324680 }, { "epoch": 93.4090909090909, "grad_norm": 0.6326538324356079, "learning_rate": 0.0001318181818181818, "loss": 0.2443, "step": 324690 }, { "epoch": 93.41196777905638, "grad_norm": 1.069204568862915, "learning_rate": 0.0001317606444188723, "loss": 0.2492, "step": 324700 }, { "epoch": 93.41484464902186, "grad_norm": 0.8188316226005554, "learning_rate": 0.00013170310701956272, "loss": 0.2815, "step": 324710 }, { "epoch": 93.41772151898734, "grad_norm": 2.8629684448242188, "learning_rate": 0.00013164556962025318, "loss": 0.3111, "step": 324720 }, { "epoch": 93.42059838895283, "grad_norm": 1.0506447553634644, "learning_rate": 0.00013158803222094363, "loss": 0.261, "step": 324730 }, { "epoch": 93.4234752589183, "grad_norm": 1.1999307870864868, "learning_rate": 0.00013153049482163406, "loss": 0.2295, "step": 324740 }, { "epoch": 93.42635212888378, "grad_norm": 1.2823829650878906, "learning_rate": 0.00013147295742232452, "loss": 0.2609, "step": 324750 }, { "epoch": 93.42922899884925, "grad_norm": 0.7386608719825745, "learning_rate": 0.00013141542002301495, "loss": 0.2554, "step": 324760 }, { "epoch": 93.43210586881473, "grad_norm": 1.2409765720367432, "learning_rate": 0.0001313578826237054, "loss": 0.3067, "step": 324770 }, { "epoch": 93.4349827387802, "grad_norm": 2.033844232559204, "learning_rate": 0.00013130034522439586, "loss": 0.3161, "step": 324780 }, { "epoch": 93.43785960874568, "grad_norm": 1.0474835634231567, "learning_rate": 0.00013124280782508629, "loss": 0.3452, "step": 324790 }, { "epoch": 93.44073647871116, "grad_norm": 1.1274528503417969, "learning_rate": 0.00013118527042577677, "loss": 0.2749, "step": 324800 }, { "epoch": 93.44361334867664, "grad_norm": 1.099918246269226, "learning_rate": 0.00013112773302646722, "loss": 0.2797, "step": 324810 }, { "epoch": 93.44649021864211, "grad_norm": 1.4501782655715942, "learning_rate": 0.00013107019562715765, "loss": 0.2701, "step": 324820 }, { "epoch": 93.4493670886076, "grad_norm": 1.0738564729690552, "learning_rate": 0.0001310126582278481, "loss": 0.2368, "step": 324830 }, { "epoch": 93.45224395857308, "grad_norm": 0.9661113023757935, "learning_rate": 0.00013095512082853856, "loss": 0.2334, "step": 324840 }, { "epoch": 93.45512082853855, "grad_norm": 1.5793190002441406, "learning_rate": 0.000130897583429229, "loss": 0.3593, "step": 324850 }, { "epoch": 93.45799769850403, "grad_norm": 1.380823016166687, "learning_rate": 0.00013084004602991945, "loss": 0.2413, "step": 324860 }, { "epoch": 93.4608745684695, "grad_norm": 1.5993585586547852, "learning_rate": 0.0001307825086306099, "loss": 0.2454, "step": 324870 }, { "epoch": 93.46375143843498, "grad_norm": 1.077422857284546, "learning_rate": 0.00013072497123130033, "loss": 0.2325, "step": 324880 }, { "epoch": 93.46662830840046, "grad_norm": 1.6617716550827026, "learning_rate": 0.00013066743383199079, "loss": 0.2684, "step": 324890 }, { "epoch": 93.46950517836594, "grad_norm": 0.8535457253456116, "learning_rate": 0.00013060989643268127, "loss": 0.2957, "step": 324900 }, { "epoch": 93.47238204833141, "grad_norm": 0.7156521081924438, "learning_rate": 0.0001305523590333717, "loss": 0.2542, "step": 324910 }, { "epoch": 93.47525891829689, "grad_norm": 0.9744488596916199, "learning_rate": 0.00013049482163406215, "loss": 0.2685, "step": 324920 }, { "epoch": 93.47813578826236, "grad_norm": 0.9590361714363098, "learning_rate": 0.0001304372842347526, "loss": 0.3124, "step": 324930 }, { "epoch": 93.48101265822785, "grad_norm": 0.7305168509483337, "learning_rate": 0.00013037974683544304, "loss": 0.2188, "step": 324940 }, { "epoch": 93.48388952819333, "grad_norm": 1.6791224479675293, "learning_rate": 0.0001303222094361335, "loss": 0.2542, "step": 324950 }, { "epoch": 93.4867663981588, "grad_norm": 1.667802095413208, "learning_rate": 0.00013026467203682392, "loss": 0.239, "step": 324960 }, { "epoch": 93.48964326812428, "grad_norm": 0.8311356902122498, "learning_rate": 0.00013020713463751438, "loss": 0.2702, "step": 324970 }, { "epoch": 93.49252013808976, "grad_norm": 0.9226763248443604, "learning_rate": 0.00013014959723820483, "loss": 0.2763, "step": 324980 }, { "epoch": 93.49539700805524, "grad_norm": 0.7773282527923584, "learning_rate": 0.00013009205983889526, "loss": 0.2995, "step": 324990 }, { "epoch": 93.49827387802071, "grad_norm": 0.9560686349868774, "learning_rate": 0.00013003452243958574, "loss": 0.2949, "step": 325000 }, { "epoch": 93.50115074798619, "grad_norm": 1.5479350090026855, "learning_rate": 0.0001299769850402762, "loss": 0.293, "step": 325010 }, { "epoch": 93.50402761795166, "grad_norm": 0.7708339691162109, "learning_rate": 0.00012991944764096663, "loss": 0.3505, "step": 325020 }, { "epoch": 93.50690448791714, "grad_norm": 1.491784930229187, "learning_rate": 0.00012986191024165708, "loss": 0.2659, "step": 325030 }, { "epoch": 93.50978135788263, "grad_norm": 0.9564074873924255, "learning_rate": 0.00012980437284234754, "loss": 0.2483, "step": 325040 }, { "epoch": 93.5126582278481, "grad_norm": 1.60480797290802, "learning_rate": 0.00012974683544303797, "loss": 0.2504, "step": 325050 }, { "epoch": 93.51553509781358, "grad_norm": 1.184253454208374, "learning_rate": 0.00012968929804372842, "loss": 0.3209, "step": 325060 }, { "epoch": 93.51841196777906, "grad_norm": 0.9650664329528809, "learning_rate": 0.00012963176064441888, "loss": 0.2293, "step": 325070 }, { "epoch": 93.52128883774454, "grad_norm": 1.7594568729400635, "learning_rate": 0.0001295742232451093, "loss": 0.2484, "step": 325080 }, { "epoch": 93.52416570771001, "grad_norm": 1.0910543203353882, "learning_rate": 0.00012951668584579976, "loss": 0.3298, "step": 325090 }, { "epoch": 93.52704257767549, "grad_norm": 1.4296321868896484, "learning_rate": 0.00012945914844649024, "loss": 0.2129, "step": 325100 }, { "epoch": 93.52991944764096, "grad_norm": 0.9741095304489136, "learning_rate": 0.00012940161104718067, "loss": 0.2566, "step": 325110 }, { "epoch": 93.53279631760644, "grad_norm": 0.5144251585006714, "learning_rate": 0.00012934407364787113, "loss": 0.2865, "step": 325120 }, { "epoch": 93.53567318757192, "grad_norm": 1.5100661516189575, "learning_rate": 0.00012928653624856158, "loss": 0.2831, "step": 325130 }, { "epoch": 93.53855005753739, "grad_norm": 1.2211530208587646, "learning_rate": 0.000129228998849252, "loss": 0.2664, "step": 325140 }, { "epoch": 93.54142692750288, "grad_norm": 1.359378457069397, "learning_rate": 0.00012917146144994247, "loss": 0.3153, "step": 325150 }, { "epoch": 93.54430379746836, "grad_norm": 1.3455431461334229, "learning_rate": 0.0001291139240506329, "loss": 0.2205, "step": 325160 }, { "epoch": 93.54718066743384, "grad_norm": 0.6905553936958313, "learning_rate": 0.00012905638665132335, "loss": 0.3098, "step": 325170 }, { "epoch": 93.55005753739931, "grad_norm": 1.152158498764038, "learning_rate": 0.0001289988492520138, "loss": 0.2786, "step": 325180 }, { "epoch": 93.55293440736479, "grad_norm": 2.4837241172790527, "learning_rate": 0.00012894131185270426, "loss": 0.3114, "step": 325190 }, { "epoch": 93.55581127733026, "grad_norm": 1.4106826782226562, "learning_rate": 0.00012888377445339472, "loss": 0.2415, "step": 325200 }, { "epoch": 93.55868814729574, "grad_norm": 1.3815631866455078, "learning_rate": 0.00012882623705408517, "loss": 0.3287, "step": 325210 }, { "epoch": 93.56156501726122, "grad_norm": 2.2388360500335693, "learning_rate": 0.0001287686996547756, "loss": 0.3204, "step": 325220 }, { "epoch": 93.56444188722669, "grad_norm": 0.7133941650390625, "learning_rate": 0.00012871116225546606, "loss": 0.2695, "step": 325230 }, { "epoch": 93.56731875719217, "grad_norm": 0.6460534334182739, "learning_rate": 0.0001286536248561565, "loss": 0.2638, "step": 325240 }, { "epoch": 93.57019562715766, "grad_norm": 0.8439812660217285, "learning_rate": 0.00012859608745684694, "loss": 0.2372, "step": 325250 }, { "epoch": 93.57307249712314, "grad_norm": 0.8132932782173157, "learning_rate": 0.0001285385500575374, "loss": 0.219, "step": 325260 }, { "epoch": 93.57594936708861, "grad_norm": 1.0936412811279297, "learning_rate": 0.00012848101265822785, "loss": 0.2755, "step": 325270 }, { "epoch": 93.57882623705409, "grad_norm": 1.0941232442855835, "learning_rate": 0.00012842347525891828, "loss": 0.2725, "step": 325280 }, { "epoch": 93.58170310701956, "grad_norm": 1.6844696998596191, "learning_rate": 0.00012836593785960876, "loss": 0.2552, "step": 325290 }, { "epoch": 93.58457997698504, "grad_norm": 1.1819190979003906, "learning_rate": 0.00012830840046029922, "loss": 0.3094, "step": 325300 }, { "epoch": 93.58745684695052, "grad_norm": 0.7518423199653625, "learning_rate": 0.00012825086306098965, "loss": 0.3105, "step": 325310 }, { "epoch": 93.59033371691599, "grad_norm": 2.456012725830078, "learning_rate": 0.0001281933256616801, "loss": 0.2729, "step": 325320 }, { "epoch": 93.59321058688147, "grad_norm": 1.1458913087844849, "learning_rate": 0.00012813578826237056, "loss": 0.2489, "step": 325330 }, { "epoch": 93.59608745684694, "grad_norm": 0.9897890686988831, "learning_rate": 0.00012807825086306099, "loss": 0.2563, "step": 325340 }, { "epoch": 93.59896432681242, "grad_norm": 1.3268945217132568, "learning_rate": 0.00012802071346375144, "loss": 0.3083, "step": 325350 }, { "epoch": 93.60184119677791, "grad_norm": 1.2155681848526, "learning_rate": 0.00012796317606444187, "loss": 0.2782, "step": 325360 }, { "epoch": 93.60471806674339, "grad_norm": 1.327366590499878, "learning_rate": 0.00012790563866513232, "loss": 0.284, "step": 325370 }, { "epoch": 93.60759493670886, "grad_norm": 1.1748096942901611, "learning_rate": 0.00012784810126582278, "loss": 0.2721, "step": 325380 }, { "epoch": 93.61047180667434, "grad_norm": 1.4449957609176636, "learning_rate": 0.00012779056386651324, "loss": 0.3377, "step": 325390 }, { "epoch": 93.61334867663982, "grad_norm": 1.2938424348831177, "learning_rate": 0.0001277330264672037, "loss": 0.3167, "step": 325400 }, { "epoch": 93.61622554660529, "grad_norm": 1.1298778057098389, "learning_rate": 0.00012767548906789415, "loss": 0.2498, "step": 325410 }, { "epoch": 93.61910241657077, "grad_norm": 1.7542284727096558, "learning_rate": 0.00012761795166858458, "loss": 0.3445, "step": 325420 }, { "epoch": 93.62197928653625, "grad_norm": 1.2992148399353027, "learning_rate": 0.00012756041426927503, "loss": 0.294, "step": 325430 }, { "epoch": 93.62485615650172, "grad_norm": 2.1111807823181152, "learning_rate": 0.00012750287686996549, "loss": 0.2538, "step": 325440 }, { "epoch": 93.6277330264672, "grad_norm": 1.6013542413711548, "learning_rate": 0.00012744533947065591, "loss": 0.3516, "step": 325450 }, { "epoch": 93.63060989643269, "grad_norm": 1.47336745262146, "learning_rate": 0.00012738780207134637, "loss": 0.2811, "step": 325460 }, { "epoch": 93.63348676639816, "grad_norm": 1.6343564987182617, "learning_rate": 0.00012733026467203683, "loss": 0.2735, "step": 325470 }, { "epoch": 93.63636363636364, "grad_norm": 0.7864694595336914, "learning_rate": 0.00012727272727272725, "loss": 0.3021, "step": 325480 }, { "epoch": 93.63924050632912, "grad_norm": 2.928473472595215, "learning_rate": 0.00012721518987341774, "loss": 0.3091, "step": 325490 }, { "epoch": 93.64211737629459, "grad_norm": 1.3068333864212036, "learning_rate": 0.0001271576524741082, "loss": 0.3105, "step": 325500 }, { "epoch": 93.64499424626007, "grad_norm": 0.8542620539665222, "learning_rate": 0.00012710011507479862, "loss": 0.3113, "step": 325510 }, { "epoch": 93.64787111622555, "grad_norm": 1.3686209917068481, "learning_rate": 0.00012704257767548908, "loss": 0.2613, "step": 325520 }, { "epoch": 93.65074798619102, "grad_norm": 1.3710005283355713, "learning_rate": 0.00012698504027617953, "loss": 0.288, "step": 325530 }, { "epoch": 93.6536248561565, "grad_norm": 1.5759764909744263, "learning_rate": 0.00012692750287686996, "loss": 0.2801, "step": 325540 }, { "epoch": 93.65650172612197, "grad_norm": 1.053127646446228, "learning_rate": 0.00012686996547756042, "loss": 0.2776, "step": 325550 }, { "epoch": 93.65937859608745, "grad_norm": 0.9485054612159729, "learning_rate": 0.00012681242807825087, "loss": 0.3361, "step": 325560 }, { "epoch": 93.66225546605294, "grad_norm": 1.4221409559249878, "learning_rate": 0.0001267548906789413, "loss": 0.273, "step": 325570 }, { "epoch": 93.66513233601842, "grad_norm": 0.7759813666343689, "learning_rate": 0.00012669735327963176, "loss": 0.226, "step": 325580 }, { "epoch": 93.66800920598389, "grad_norm": 0.8265765309333801, "learning_rate": 0.0001266398158803222, "loss": 0.2238, "step": 325590 }, { "epoch": 93.67088607594937, "grad_norm": 1.1736345291137695, "learning_rate": 0.00012658227848101267, "loss": 0.2563, "step": 325600 }, { "epoch": 93.67376294591485, "grad_norm": 0.8290054798126221, "learning_rate": 0.00012652474108170312, "loss": 0.3153, "step": 325610 }, { "epoch": 93.67663981588032, "grad_norm": 1.1092665195465088, "learning_rate": 0.00012646720368239355, "loss": 0.2825, "step": 325620 }, { "epoch": 93.6795166858458, "grad_norm": 1.6830083131790161, "learning_rate": 0.000126409666283084, "loss": 0.2674, "step": 325630 }, { "epoch": 93.68239355581127, "grad_norm": 1.032857060432434, "learning_rate": 0.00012635212888377446, "loss": 0.2163, "step": 325640 }, { "epoch": 93.68527042577675, "grad_norm": 1.347806692123413, "learning_rate": 0.0001262945914844649, "loss": 0.2471, "step": 325650 }, { "epoch": 93.68814729574223, "grad_norm": 1.310577154159546, "learning_rate": 0.00012623705408515534, "loss": 0.247, "step": 325660 }, { "epoch": 93.69102416570772, "grad_norm": 1.350836157798767, "learning_rate": 0.0001261795166858458, "loss": 0.2848, "step": 325670 }, { "epoch": 93.69390103567319, "grad_norm": 1.61818265914917, "learning_rate": 0.00012612197928653626, "loss": 0.2738, "step": 325680 }, { "epoch": 93.69677790563867, "grad_norm": 1.1713536977767944, "learning_rate": 0.0001260644418872267, "loss": 0.257, "step": 325690 }, { "epoch": 93.69965477560415, "grad_norm": 0.8415582776069641, "learning_rate": 0.00012600690448791717, "loss": 0.3165, "step": 325700 }, { "epoch": 93.70253164556962, "grad_norm": 1.1004191637039185, "learning_rate": 0.0001259493670886076, "loss": 0.28, "step": 325710 }, { "epoch": 93.7054085155351, "grad_norm": 1.3210508823394775, "learning_rate": 0.00012589182968929805, "loss": 0.2662, "step": 325720 }, { "epoch": 93.70828538550057, "grad_norm": 0.736110270023346, "learning_rate": 0.0001258342922899885, "loss": 0.2421, "step": 325730 }, { "epoch": 93.71116225546605, "grad_norm": 1.1680883169174194, "learning_rate": 0.00012577675489067893, "loss": 0.2917, "step": 325740 }, { "epoch": 93.71403912543153, "grad_norm": 0.6466847658157349, "learning_rate": 0.0001257192174913694, "loss": 0.3688, "step": 325750 }, { "epoch": 93.716915995397, "grad_norm": 0.6731382608413696, "learning_rate": 0.00012566168009205985, "loss": 0.292, "step": 325760 }, { "epoch": 93.71979286536248, "grad_norm": 1.22019362449646, "learning_rate": 0.00012560414269275027, "loss": 0.2607, "step": 325770 }, { "epoch": 93.72266973532797, "grad_norm": 1.9363945722579956, "learning_rate": 0.00012554660529344076, "loss": 0.2265, "step": 325780 }, { "epoch": 93.72554660529345, "grad_norm": 1.4157435894012451, "learning_rate": 0.00012548906789413119, "loss": 0.3241, "step": 325790 }, { "epoch": 93.72842347525892, "grad_norm": 0.813824474811554, "learning_rate": 0.00012543153049482164, "loss": 0.2916, "step": 325800 }, { "epoch": 93.7313003452244, "grad_norm": 1.3113517761230469, "learning_rate": 0.0001253739930955121, "loss": 0.2732, "step": 325810 }, { "epoch": 93.73417721518987, "grad_norm": 1.6768615245819092, "learning_rate": 0.00012531645569620252, "loss": 0.2591, "step": 325820 }, { "epoch": 93.73705408515535, "grad_norm": 0.753493070602417, "learning_rate": 0.00012525891829689298, "loss": 0.2588, "step": 325830 }, { "epoch": 93.73993095512083, "grad_norm": 1.9089032411575317, "learning_rate": 0.00012520138089758344, "loss": 0.3214, "step": 325840 }, { "epoch": 93.7428078250863, "grad_norm": 0.9222670197486877, "learning_rate": 0.00012514384349827386, "loss": 0.2786, "step": 325850 }, { "epoch": 93.74568469505178, "grad_norm": 1.178375244140625, "learning_rate": 0.00012508630609896432, "loss": 0.2525, "step": 325860 }, { "epoch": 93.74856156501725, "grad_norm": 1.4660700559616089, "learning_rate": 0.00012502876869965477, "loss": 0.2486, "step": 325870 }, { "epoch": 93.75143843498275, "grad_norm": 1.540747046470642, "learning_rate": 0.00012497123130034523, "loss": 0.2983, "step": 325880 }, { "epoch": 93.75431530494822, "grad_norm": 2.0102641582489014, "learning_rate": 0.00012491369390103566, "loss": 0.315, "step": 325890 }, { "epoch": 93.7571921749137, "grad_norm": 2.2227611541748047, "learning_rate": 0.00012485615650172614, "loss": 0.3891, "step": 325900 }, { "epoch": 93.76006904487917, "grad_norm": 1.7689648866653442, "learning_rate": 0.00012479861910241657, "loss": 0.2975, "step": 325910 }, { "epoch": 93.76294591484465, "grad_norm": 0.9260731935501099, "learning_rate": 0.00012474108170310703, "loss": 0.2822, "step": 325920 }, { "epoch": 93.76582278481013, "grad_norm": 1.2452341318130493, "learning_rate": 0.00012468354430379748, "loss": 0.225, "step": 325930 }, { "epoch": 93.7686996547756, "grad_norm": 0.8728945255279541, "learning_rate": 0.0001246260069044879, "loss": 0.2403, "step": 325940 }, { "epoch": 93.77157652474108, "grad_norm": 1.3148776292800903, "learning_rate": 0.00012456846950517836, "loss": 0.2442, "step": 325950 }, { "epoch": 93.77445339470655, "grad_norm": 1.10359525680542, "learning_rate": 0.00012451093210586882, "loss": 0.2949, "step": 325960 }, { "epoch": 93.77733026467203, "grad_norm": 1.031083583831787, "learning_rate": 0.00012445339470655928, "loss": 0.2952, "step": 325970 }, { "epoch": 93.78020713463752, "grad_norm": 0.9197956323623657, "learning_rate": 0.0001243958573072497, "loss": 0.2738, "step": 325980 }, { "epoch": 93.783084004603, "grad_norm": 1.211298942565918, "learning_rate": 0.00012433831990794016, "loss": 0.2473, "step": 325990 }, { "epoch": 93.78596087456847, "grad_norm": 2.352881669998169, "learning_rate": 0.00012428078250863062, "loss": 0.3121, "step": 326000 }, { "epoch": 93.78883774453395, "grad_norm": 1.03166925907135, "learning_rate": 0.00012422324510932107, "loss": 0.2722, "step": 326010 }, { "epoch": 93.79171461449943, "grad_norm": 1.8805453777313232, "learning_rate": 0.0001241657077100115, "loss": 0.2551, "step": 326020 }, { "epoch": 93.7945914844649, "grad_norm": 1.3055646419525146, "learning_rate": 0.00012410817031070195, "loss": 0.3194, "step": 326030 }, { "epoch": 93.79746835443038, "grad_norm": 1.3159736394882202, "learning_rate": 0.0001240506329113924, "loss": 0.3224, "step": 326040 }, { "epoch": 93.80034522439585, "grad_norm": 0.8793744444847107, "learning_rate": 0.00012399309551208287, "loss": 0.3063, "step": 326050 }, { "epoch": 93.80322209436133, "grad_norm": 1.760928750038147, "learning_rate": 0.00012393555811277332, "loss": 0.2922, "step": 326060 }, { "epoch": 93.80609896432681, "grad_norm": 2.3146214485168457, "learning_rate": 0.00012387802071346375, "loss": 0.3248, "step": 326070 }, { "epoch": 93.80897583429228, "grad_norm": 1.559507131576538, "learning_rate": 0.0001238204833141542, "loss": 0.2345, "step": 326080 }, { "epoch": 93.81185270425777, "grad_norm": 1.1906349658966064, "learning_rate": 0.00012376294591484463, "loss": 0.2949, "step": 326090 }, { "epoch": 93.81472957422325, "grad_norm": 0.9176565408706665, "learning_rate": 0.00012370540851553512, "loss": 0.2664, "step": 326100 }, { "epoch": 93.81760644418873, "grad_norm": 1.4687384366989136, "learning_rate": 0.00012364787111622554, "loss": 0.2771, "step": 326110 }, { "epoch": 93.8204833141542, "grad_norm": 0.8358311653137207, "learning_rate": 0.000123590333716916, "loss": 0.2975, "step": 326120 }, { "epoch": 93.82336018411968, "grad_norm": 0.9883936643600464, "learning_rate": 0.00012353279631760646, "loss": 0.295, "step": 326130 }, { "epoch": 93.82623705408515, "grad_norm": 1.8101496696472168, "learning_rate": 0.00012347525891829688, "loss": 0.2344, "step": 326140 }, { "epoch": 93.82911392405063, "grad_norm": 1.69551420211792, "learning_rate": 0.00012341772151898734, "loss": 0.3181, "step": 326150 }, { "epoch": 93.83199079401611, "grad_norm": 1.6807516813278198, "learning_rate": 0.0001233601841196778, "loss": 0.3024, "step": 326160 }, { "epoch": 93.83486766398158, "grad_norm": 1.7329906225204468, "learning_rate": 0.00012330264672036825, "loss": 0.2987, "step": 326170 }, { "epoch": 93.83774453394706, "grad_norm": 1.0759350061416626, "learning_rate": 0.00012324510932105868, "loss": 0.2646, "step": 326180 }, { "epoch": 93.84062140391255, "grad_norm": 1.5582116842269897, "learning_rate": 0.00012318757192174913, "loss": 0.3018, "step": 326190 }, { "epoch": 93.84349827387803, "grad_norm": 1.3248411417007446, "learning_rate": 0.0001231300345224396, "loss": 0.2839, "step": 326200 }, { "epoch": 93.8463751438435, "grad_norm": 1.2055457830429077, "learning_rate": 0.00012307249712313005, "loss": 0.2849, "step": 326210 }, { "epoch": 93.84925201380898, "grad_norm": 3.0571210384368896, "learning_rate": 0.00012301495972382047, "loss": 0.2912, "step": 326220 }, { "epoch": 93.85212888377445, "grad_norm": 1.9436215162277222, "learning_rate": 0.00012295742232451093, "loss": 0.323, "step": 326230 }, { "epoch": 93.85500575373993, "grad_norm": 0.8750289082527161, "learning_rate": 0.00012289988492520138, "loss": 0.2724, "step": 326240 }, { "epoch": 93.85788262370541, "grad_norm": 1.7138816118240356, "learning_rate": 0.00012284234752589184, "loss": 0.2718, "step": 326250 }, { "epoch": 93.86075949367088, "grad_norm": 0.6774618029594421, "learning_rate": 0.0001227848101265823, "loss": 0.2589, "step": 326260 }, { "epoch": 93.86363636363636, "grad_norm": 1.013303279876709, "learning_rate": 0.00012272727272727272, "loss": 0.2685, "step": 326270 }, { "epoch": 93.86651323360184, "grad_norm": 1.0676380395889282, "learning_rate": 0.00012266973532796318, "loss": 0.2154, "step": 326280 }, { "epoch": 93.86939010356731, "grad_norm": 1.6839089393615723, "learning_rate": 0.0001226121979286536, "loss": 0.3215, "step": 326290 }, { "epoch": 93.8722669735328, "grad_norm": 1.3080111742019653, "learning_rate": 0.0001225546605293441, "loss": 0.2616, "step": 326300 }, { "epoch": 93.87514384349828, "grad_norm": 0.8298411965370178, "learning_rate": 0.00012249712313003452, "loss": 0.2056, "step": 326310 }, { "epoch": 93.87802071346375, "grad_norm": 1.740620493888855, "learning_rate": 0.00012243958573072497, "loss": 0.2866, "step": 326320 }, { "epoch": 93.88089758342923, "grad_norm": 0.8544343113899231, "learning_rate": 0.00012238204833141543, "loss": 0.2824, "step": 326330 }, { "epoch": 93.88377445339471, "grad_norm": 1.4706288576126099, "learning_rate": 0.00012232451093210586, "loss": 0.2729, "step": 326340 }, { "epoch": 93.88665132336018, "grad_norm": 1.6137382984161377, "learning_rate": 0.00012226697353279631, "loss": 0.2552, "step": 326350 }, { "epoch": 93.88952819332566, "grad_norm": 1.355912685394287, "learning_rate": 0.00012220943613348677, "loss": 0.2642, "step": 326360 }, { "epoch": 93.89240506329114, "grad_norm": 0.9322630763053894, "learning_rate": 0.00012215189873417722, "loss": 0.2679, "step": 326370 }, { "epoch": 93.89528193325661, "grad_norm": 2.252890110015869, "learning_rate": 0.00012209436133486765, "loss": 0.3844, "step": 326380 }, { "epoch": 93.89815880322209, "grad_norm": 1.177531123161316, "learning_rate": 0.00012203682393555812, "loss": 0.2892, "step": 326390 }, { "epoch": 93.90103567318758, "grad_norm": 0.6965845823287964, "learning_rate": 0.00012197928653624856, "loss": 0.2464, "step": 326400 }, { "epoch": 93.90391254315306, "grad_norm": 1.735852837562561, "learning_rate": 0.00012192174913693902, "loss": 0.3399, "step": 326410 }, { "epoch": 93.90678941311853, "grad_norm": 1.7870665788650513, "learning_rate": 0.00012186421173762946, "loss": 0.2539, "step": 326420 }, { "epoch": 93.90966628308401, "grad_norm": 0.7154120206832886, "learning_rate": 0.0001218066743383199, "loss": 0.2784, "step": 326430 }, { "epoch": 93.91254315304948, "grad_norm": 1.4164773225784302, "learning_rate": 0.00012174913693901037, "loss": 0.2585, "step": 326440 }, { "epoch": 93.91542002301496, "grad_norm": 1.1216641664505005, "learning_rate": 0.00012169159953970081, "loss": 0.2264, "step": 326450 }, { "epoch": 93.91829689298044, "grad_norm": 1.4032297134399414, "learning_rate": 0.00012163406214039126, "loss": 0.2692, "step": 326460 }, { "epoch": 93.92117376294591, "grad_norm": 0.767090916633606, "learning_rate": 0.0001215765247410817, "loss": 0.2678, "step": 326470 }, { "epoch": 93.92405063291139, "grad_norm": 2.21811580657959, "learning_rate": 0.00012151898734177215, "loss": 0.3718, "step": 326480 }, { "epoch": 93.92692750287686, "grad_norm": 1.8968924283981323, "learning_rate": 0.00012146144994246261, "loss": 0.2621, "step": 326490 }, { "epoch": 93.92980437284234, "grad_norm": 1.6579627990722656, "learning_rate": 0.00012140391254315305, "loss": 0.2504, "step": 326500 }, { "epoch": 93.93268124280783, "grad_norm": 2.2757577896118164, "learning_rate": 0.00012134637514384351, "loss": 0.3662, "step": 326510 }, { "epoch": 93.93555811277331, "grad_norm": 1.4554487466812134, "learning_rate": 0.00012128883774453395, "loss": 0.2747, "step": 326520 }, { "epoch": 93.93843498273878, "grad_norm": 1.8234400749206543, "learning_rate": 0.00012123130034522439, "loss": 0.2994, "step": 326530 }, { "epoch": 93.94131185270426, "grad_norm": 1.1675302982330322, "learning_rate": 0.00012117376294591486, "loss": 0.3215, "step": 326540 }, { "epoch": 93.94418872266974, "grad_norm": 0.987312376499176, "learning_rate": 0.0001211162255466053, "loss": 0.2904, "step": 326550 }, { "epoch": 93.94706559263521, "grad_norm": 0.9571155309677124, "learning_rate": 0.00012105868814729574, "loss": 0.2254, "step": 326560 }, { "epoch": 93.94994246260069, "grad_norm": 1.1766095161437988, "learning_rate": 0.00012100115074798619, "loss": 0.3165, "step": 326570 }, { "epoch": 93.95281933256616, "grad_norm": 1.1300936937332153, "learning_rate": 0.00012094361334867664, "loss": 0.328, "step": 326580 }, { "epoch": 93.95569620253164, "grad_norm": 1.2589191198349, "learning_rate": 0.0001208860759493671, "loss": 0.3241, "step": 326590 }, { "epoch": 93.95857307249712, "grad_norm": 1.3146228790283203, "learning_rate": 0.00012082853855005754, "loss": 0.2621, "step": 326600 }, { "epoch": 93.96144994246261, "grad_norm": 0.5929273962974548, "learning_rate": 0.000120771001150748, "loss": 0.2904, "step": 326610 }, { "epoch": 93.96432681242808, "grad_norm": 1.0826715230941772, "learning_rate": 0.00012071346375143844, "loss": 0.2389, "step": 326620 }, { "epoch": 93.96720368239356, "grad_norm": 1.1539819240570068, "learning_rate": 0.00012065592635212888, "loss": 0.2824, "step": 326630 }, { "epoch": 93.97008055235904, "grad_norm": 1.501076579093933, "learning_rate": 0.00012059838895281935, "loss": 0.2985, "step": 326640 }, { "epoch": 93.97295742232451, "grad_norm": 1.0751155614852905, "learning_rate": 0.00012054085155350979, "loss": 0.2466, "step": 326650 }, { "epoch": 93.97583429228999, "grad_norm": 1.5983505249023438, "learning_rate": 0.00012048331415420023, "loss": 0.3136, "step": 326660 }, { "epoch": 93.97871116225546, "grad_norm": 0.8662156462669373, "learning_rate": 0.00012042577675489067, "loss": 0.2457, "step": 326670 }, { "epoch": 93.98158803222094, "grad_norm": 0.9732303619384766, "learning_rate": 0.00012036823935558113, "loss": 0.2553, "step": 326680 }, { "epoch": 93.98446490218642, "grad_norm": 1.5611830949783325, "learning_rate": 0.00012031070195627158, "loss": 0.3079, "step": 326690 }, { "epoch": 93.9873417721519, "grad_norm": 0.6225669980049133, "learning_rate": 0.00012025316455696203, "loss": 0.2148, "step": 326700 }, { "epoch": 93.99021864211737, "grad_norm": 0.7228904962539673, "learning_rate": 0.00012019562715765248, "loss": 0.2804, "step": 326710 }, { "epoch": 93.99309551208286, "grad_norm": 2.2394559383392334, "learning_rate": 0.00012013808975834292, "loss": 0.432, "step": 326720 }, { "epoch": 93.99597238204834, "grad_norm": 1.8674205541610718, "learning_rate": 0.00012008055235903337, "loss": 0.2314, "step": 326730 }, { "epoch": 93.99884925201381, "grad_norm": 0.8507459759712219, "learning_rate": 0.00012002301495972383, "loss": 0.3682, "step": 326740 }, { "epoch": 94.00172612197929, "grad_norm": 0.7304379940032959, "learning_rate": 0.00011996547756041428, "loss": 0.2571, "step": 326750 }, { "epoch": 94.00460299194476, "grad_norm": 1.311812400817871, "learning_rate": 0.00011990794016110472, "loss": 0.2368, "step": 326760 }, { "epoch": 94.00747986191024, "grad_norm": 0.991668164730072, "learning_rate": 0.00011985040276179516, "loss": 0.2728, "step": 326770 }, { "epoch": 94.01035673187572, "grad_norm": 1.1176196336746216, "learning_rate": 0.00011979286536248562, "loss": 0.2714, "step": 326780 }, { "epoch": 94.0132336018412, "grad_norm": 0.844612717628479, "learning_rate": 0.00011973532796317607, "loss": 0.2442, "step": 326790 }, { "epoch": 94.01611047180667, "grad_norm": 0.9748495817184448, "learning_rate": 0.00011967779056386651, "loss": 0.2676, "step": 326800 }, { "epoch": 94.01898734177215, "grad_norm": 1.0862787961959839, "learning_rate": 0.00011962025316455697, "loss": 0.2334, "step": 326810 }, { "epoch": 94.02186421173764, "grad_norm": 0.9622014164924622, "learning_rate": 0.00011956271576524741, "loss": 0.2588, "step": 326820 }, { "epoch": 94.02474108170311, "grad_norm": 1.0410531759262085, "learning_rate": 0.00011950517836593785, "loss": 0.2673, "step": 326830 }, { "epoch": 94.02761795166859, "grad_norm": 1.296826958656311, "learning_rate": 0.00011944764096662832, "loss": 0.3246, "step": 326840 }, { "epoch": 94.03049482163406, "grad_norm": 0.7581610083580017, "learning_rate": 0.00011939010356731876, "loss": 0.2495, "step": 326850 }, { "epoch": 94.03337169159954, "grad_norm": 0.8972684144973755, "learning_rate": 0.0001193325661680092, "loss": 0.2186, "step": 326860 }, { "epoch": 94.03624856156502, "grad_norm": 1.3539607524871826, "learning_rate": 0.00011927502876869965, "loss": 0.2363, "step": 326870 }, { "epoch": 94.0391254315305, "grad_norm": 1.3479331731796265, "learning_rate": 0.0001192174913693901, "loss": 0.2811, "step": 326880 }, { "epoch": 94.04200230149597, "grad_norm": 1.1060576438903809, "learning_rate": 0.00011915995397008056, "loss": 0.2548, "step": 326890 }, { "epoch": 94.04487917146145, "grad_norm": 0.8254055976867676, "learning_rate": 0.000119102416570771, "loss": 0.2874, "step": 326900 }, { "epoch": 94.04775604142692, "grad_norm": 1.7982929944992065, "learning_rate": 0.00011904487917146146, "loss": 0.3161, "step": 326910 }, { "epoch": 94.0506329113924, "grad_norm": 0.587215781211853, "learning_rate": 0.0001189873417721519, "loss": 0.3119, "step": 326920 }, { "epoch": 94.05350978135789, "grad_norm": 1.0017688274383545, "learning_rate": 0.00011892980437284235, "loss": 0.2684, "step": 326930 }, { "epoch": 94.05638665132336, "grad_norm": 1.6306712627410889, "learning_rate": 0.00011887226697353281, "loss": 0.2313, "step": 326940 }, { "epoch": 94.05926352128884, "grad_norm": 2.071697473526001, "learning_rate": 0.00011881472957422325, "loss": 0.2327, "step": 326950 }, { "epoch": 94.06214039125432, "grad_norm": 1.2963001728057861, "learning_rate": 0.00011875719217491369, "loss": 0.3188, "step": 326960 }, { "epoch": 94.0650172612198, "grad_norm": 0.6662015318870544, "learning_rate": 0.00011869965477560413, "loss": 0.2315, "step": 326970 }, { "epoch": 94.06789413118527, "grad_norm": 1.1558340787887573, "learning_rate": 0.0001186421173762946, "loss": 0.3039, "step": 326980 }, { "epoch": 94.07077100115075, "grad_norm": 1.270646572113037, "learning_rate": 0.00011858457997698505, "loss": 0.2969, "step": 326990 }, { "epoch": 94.07364787111622, "grad_norm": 0.6606437563896179, "learning_rate": 0.00011852704257767549, "loss": 0.2787, "step": 327000 }, { "epoch": 94.0765247410817, "grad_norm": 2.2989814281463623, "learning_rate": 0.00011846950517836594, "loss": 0.3449, "step": 327010 }, { "epoch": 94.07940161104717, "grad_norm": 0.9283066987991333, "learning_rate": 0.00011841196777905639, "loss": 0.2711, "step": 327020 }, { "epoch": 94.08227848101266, "grad_norm": 0.9243260025978088, "learning_rate": 0.00011835443037974684, "loss": 0.269, "step": 327030 }, { "epoch": 94.08515535097814, "grad_norm": 1.1492321491241455, "learning_rate": 0.0001182968929804373, "loss": 0.2424, "step": 327040 }, { "epoch": 94.08803222094362, "grad_norm": 0.9790874719619751, "learning_rate": 0.00011823935558112774, "loss": 0.2715, "step": 327050 }, { "epoch": 94.0909090909091, "grad_norm": 0.8950165510177612, "learning_rate": 0.00011818181818181818, "loss": 0.251, "step": 327060 }, { "epoch": 94.09378596087457, "grad_norm": 1.3865612745285034, "learning_rate": 0.00011812428078250862, "loss": 0.3191, "step": 327070 }, { "epoch": 94.09666283084005, "grad_norm": 1.2087597846984863, "learning_rate": 0.00011806674338319909, "loss": 0.2321, "step": 327080 }, { "epoch": 94.09953970080552, "grad_norm": 0.9825507998466492, "learning_rate": 0.00011800920598388953, "loss": 0.2993, "step": 327090 }, { "epoch": 94.102416570771, "grad_norm": 0.958415687084198, "learning_rate": 0.00011795166858457997, "loss": 0.2123, "step": 327100 }, { "epoch": 94.10529344073647, "grad_norm": 1.3708149194717407, "learning_rate": 0.00011789413118527043, "loss": 0.276, "step": 327110 }, { "epoch": 94.10817031070195, "grad_norm": 1.1307231187820435, "learning_rate": 0.00011783659378596087, "loss": 0.2241, "step": 327120 }, { "epoch": 94.11104718066743, "grad_norm": 1.8007429838180542, "learning_rate": 0.00011777905638665133, "loss": 0.3372, "step": 327130 }, { "epoch": 94.11392405063292, "grad_norm": 1.239691138267517, "learning_rate": 0.00011772151898734178, "loss": 0.3221, "step": 327140 }, { "epoch": 94.1168009205984, "grad_norm": 1.2639520168304443, "learning_rate": 0.00011766398158803223, "loss": 0.3105, "step": 327150 }, { "epoch": 94.11967779056387, "grad_norm": 0.9325562119483948, "learning_rate": 0.00011760644418872267, "loss": 0.2503, "step": 327160 }, { "epoch": 94.12255466052935, "grad_norm": 0.810379147529602, "learning_rate": 0.00011754890678941311, "loss": 0.2954, "step": 327170 }, { "epoch": 94.12543153049482, "grad_norm": 0.7990702986717224, "learning_rate": 0.00011749136939010358, "loss": 0.2396, "step": 327180 }, { "epoch": 94.1283084004603, "grad_norm": 1.2955104112625122, "learning_rate": 0.00011743383199079402, "loss": 0.3001, "step": 327190 }, { "epoch": 94.13118527042577, "grad_norm": 0.5617355108261108, "learning_rate": 0.00011737629459148446, "loss": 0.214, "step": 327200 }, { "epoch": 94.13406214039125, "grad_norm": 1.0336782932281494, "learning_rate": 0.00011731875719217492, "loss": 0.259, "step": 327210 }, { "epoch": 94.13693901035673, "grad_norm": 0.6560036540031433, "learning_rate": 0.00011726121979286536, "loss": 0.2743, "step": 327220 }, { "epoch": 94.1398158803222, "grad_norm": 1.2078932523727417, "learning_rate": 0.00011720368239355582, "loss": 0.2474, "step": 327230 }, { "epoch": 94.1426927502877, "grad_norm": 1.4989724159240723, "learning_rate": 0.00011714614499424627, "loss": 0.2726, "step": 327240 }, { "epoch": 94.14556962025317, "grad_norm": 1.3413352966308594, "learning_rate": 0.00011708860759493671, "loss": 0.3149, "step": 327250 }, { "epoch": 94.14844649021865, "grad_norm": 1.5021662712097168, "learning_rate": 0.00011703107019562715, "loss": 0.2368, "step": 327260 }, { "epoch": 94.15132336018412, "grad_norm": 0.7402523159980774, "learning_rate": 0.0001169735327963176, "loss": 0.2537, "step": 327270 }, { "epoch": 94.1542002301496, "grad_norm": 1.1304402351379395, "learning_rate": 0.00011691599539700807, "loss": 0.2795, "step": 327280 }, { "epoch": 94.15707710011507, "grad_norm": 0.9929704666137695, "learning_rate": 0.00011685845799769851, "loss": 0.2231, "step": 327290 }, { "epoch": 94.15995397008055, "grad_norm": 1.9110697507858276, "learning_rate": 0.00011680092059838895, "loss": 0.2928, "step": 327300 }, { "epoch": 94.16283084004603, "grad_norm": 1.00438392162323, "learning_rate": 0.0001167433831990794, "loss": 0.334, "step": 327310 }, { "epoch": 94.1657077100115, "grad_norm": 1.4772002696990967, "learning_rate": 0.00011668584579976985, "loss": 0.2568, "step": 327320 }, { "epoch": 94.16858457997698, "grad_norm": 1.8205567598342896, "learning_rate": 0.0001166283084004603, "loss": 0.2985, "step": 327330 }, { "epoch": 94.17146144994246, "grad_norm": 1.4317798614501953, "learning_rate": 0.00011657077100115076, "loss": 0.2642, "step": 327340 }, { "epoch": 94.17433831990795, "grad_norm": 1.185172200202942, "learning_rate": 0.0001165132336018412, "loss": 0.2639, "step": 327350 }, { "epoch": 94.17721518987342, "grad_norm": 1.129797339439392, "learning_rate": 0.00011645569620253164, "loss": 0.2377, "step": 327360 }, { "epoch": 94.1800920598389, "grad_norm": 0.7621638774871826, "learning_rate": 0.00011639815880322208, "loss": 0.3316, "step": 327370 }, { "epoch": 94.18296892980437, "grad_norm": 1.1635866165161133, "learning_rate": 0.00011634062140391255, "loss": 0.2079, "step": 327380 }, { "epoch": 94.18584579976985, "grad_norm": 1.0061620473861694, "learning_rate": 0.000116283084004603, "loss": 0.2132, "step": 327390 }, { "epoch": 94.18872266973533, "grad_norm": 1.1826163530349731, "learning_rate": 0.00011622554660529344, "loss": 0.2986, "step": 327400 }, { "epoch": 94.1915995397008, "grad_norm": 1.692294955253601, "learning_rate": 0.00011616800920598389, "loss": 0.3472, "step": 327410 }, { "epoch": 94.19447640966628, "grad_norm": 1.0968279838562012, "learning_rate": 0.00011611047180667435, "loss": 0.2493, "step": 327420 }, { "epoch": 94.19735327963176, "grad_norm": 0.8578348159790039, "learning_rate": 0.00011605293440736479, "loss": 0.3017, "step": 327430 }, { "epoch": 94.20023014959723, "grad_norm": 1.0956276655197144, "learning_rate": 0.00011599539700805525, "loss": 0.2435, "step": 327440 }, { "epoch": 94.20310701956272, "grad_norm": 0.7558486461639404, "learning_rate": 0.00011593785960874569, "loss": 0.2652, "step": 327450 }, { "epoch": 94.2059838895282, "grad_norm": 1.3467116355895996, "learning_rate": 0.00011588032220943613, "loss": 0.3063, "step": 327460 }, { "epoch": 94.20886075949367, "grad_norm": 0.9154503345489502, "learning_rate": 0.00011582278481012658, "loss": 0.2092, "step": 327470 }, { "epoch": 94.21173762945915, "grad_norm": 1.2336777448654175, "learning_rate": 0.00011576524741081704, "loss": 0.3397, "step": 327480 }, { "epoch": 94.21461449942463, "grad_norm": 0.9550907015800476, "learning_rate": 0.00011570771001150748, "loss": 0.2323, "step": 327490 }, { "epoch": 94.2174913693901, "grad_norm": 1.2830464839935303, "learning_rate": 0.00011565017261219792, "loss": 0.3031, "step": 327500 }, { "epoch": 94.22036823935558, "grad_norm": 1.5536433458328247, "learning_rate": 0.00011559263521288838, "loss": 0.2804, "step": 327510 }, { "epoch": 94.22324510932106, "grad_norm": 1.0029715299606323, "learning_rate": 0.00011553509781357884, "loss": 0.2986, "step": 327520 }, { "epoch": 94.22612197928653, "grad_norm": 0.9844235181808472, "learning_rate": 0.00011547756041426928, "loss": 0.3188, "step": 327530 }, { "epoch": 94.22899884925201, "grad_norm": 2.3089420795440674, "learning_rate": 0.00011542002301495973, "loss": 0.2529, "step": 327540 }, { "epoch": 94.23187571921748, "grad_norm": 0.9108047485351562, "learning_rate": 0.00011536248561565017, "loss": 0.2689, "step": 327550 }, { "epoch": 94.23475258918297, "grad_norm": 1.017340064048767, "learning_rate": 0.00011530494821634062, "loss": 0.2598, "step": 327560 }, { "epoch": 94.23762945914845, "grad_norm": 0.9484134316444397, "learning_rate": 0.00011524741081703107, "loss": 0.2238, "step": 327570 }, { "epoch": 94.24050632911393, "grad_norm": 1.4847975969314575, "learning_rate": 0.00011518987341772153, "loss": 0.241, "step": 327580 }, { "epoch": 94.2433831990794, "grad_norm": 1.2599412202835083, "learning_rate": 0.00011513233601841197, "loss": 0.352, "step": 327590 }, { "epoch": 94.24626006904488, "grad_norm": 1.648138403892517, "learning_rate": 0.00011507479861910241, "loss": 0.2564, "step": 327600 }, { "epoch": 94.24913693901036, "grad_norm": 1.3558293581008911, "learning_rate": 0.00011501726121979287, "loss": 0.2902, "step": 327610 }, { "epoch": 94.25201380897583, "grad_norm": 0.7465510368347168, "learning_rate": 0.00011495972382048332, "loss": 0.2543, "step": 327620 }, { "epoch": 94.25489067894131, "grad_norm": 1.4252920150756836, "learning_rate": 0.00011490218642117376, "loss": 0.2601, "step": 327630 }, { "epoch": 94.25776754890678, "grad_norm": 1.603525996208191, "learning_rate": 0.00011484464902186422, "loss": 0.2763, "step": 327640 }, { "epoch": 94.26064441887226, "grad_norm": 0.9401931166648865, "learning_rate": 0.00011478711162255466, "loss": 0.2543, "step": 327650 }, { "epoch": 94.26352128883775, "grad_norm": 1.4301608800888062, "learning_rate": 0.0001147295742232451, "loss": 0.2466, "step": 327660 }, { "epoch": 94.26639815880323, "grad_norm": 1.1562864780426025, "learning_rate": 0.00011467203682393557, "loss": 0.2608, "step": 327670 }, { "epoch": 94.2692750287687, "grad_norm": 1.5174838304519653, "learning_rate": 0.00011461449942462601, "loss": 0.2461, "step": 327680 }, { "epoch": 94.27215189873418, "grad_norm": 1.69906485080719, "learning_rate": 0.00011455696202531646, "loss": 0.2938, "step": 327690 }, { "epoch": 94.27502876869966, "grad_norm": 1.6763087511062622, "learning_rate": 0.0001144994246260069, "loss": 0.2737, "step": 327700 }, { "epoch": 94.27790563866513, "grad_norm": 1.0456005334854126, "learning_rate": 0.00011444188722669735, "loss": 0.2394, "step": 327710 }, { "epoch": 94.28078250863061, "grad_norm": 0.6285583972930908, "learning_rate": 0.00011438434982738781, "loss": 0.2551, "step": 327720 }, { "epoch": 94.28365937859608, "grad_norm": 0.8378381133079529, "learning_rate": 0.00011432681242807825, "loss": 0.3274, "step": 327730 }, { "epoch": 94.28653624856156, "grad_norm": 0.7524807453155518, "learning_rate": 0.00011426927502876871, "loss": 0.2409, "step": 327740 }, { "epoch": 94.28941311852704, "grad_norm": 1.8416857719421387, "learning_rate": 0.00011421173762945915, "loss": 0.2648, "step": 327750 }, { "epoch": 94.29228998849253, "grad_norm": 1.46660315990448, "learning_rate": 0.00011415420023014959, "loss": 0.3024, "step": 327760 }, { "epoch": 94.295166858458, "grad_norm": 1.0000325441360474, "learning_rate": 0.00011409666283084006, "loss": 0.2165, "step": 327770 }, { "epoch": 94.29804372842348, "grad_norm": 1.493592619895935, "learning_rate": 0.0001140391254315305, "loss": 0.2603, "step": 327780 }, { "epoch": 94.30092059838896, "grad_norm": 0.7859343886375427, "learning_rate": 0.00011398158803222094, "loss": 0.2572, "step": 327790 }, { "epoch": 94.30379746835443, "grad_norm": 0.8680781722068787, "learning_rate": 0.00011392405063291139, "loss": 0.2568, "step": 327800 }, { "epoch": 94.30667433831991, "grad_norm": 0.9403213262557983, "learning_rate": 0.00011386651323360184, "loss": 0.2265, "step": 327810 }, { "epoch": 94.30955120828538, "grad_norm": 1.140864372253418, "learning_rate": 0.0001138089758342923, "loss": 0.2914, "step": 327820 }, { "epoch": 94.31242807825086, "grad_norm": 1.0401268005371094, "learning_rate": 0.00011375143843498274, "loss": 0.272, "step": 327830 }, { "epoch": 94.31530494821634, "grad_norm": 0.9479875564575195, "learning_rate": 0.0001136939010356732, "loss": 0.2565, "step": 327840 }, { "epoch": 94.31818181818181, "grad_norm": 1.1904714107513428, "learning_rate": 0.00011363636363636364, "loss": 0.251, "step": 327850 }, { "epoch": 94.32105868814729, "grad_norm": 0.802463173866272, "learning_rate": 0.00011357882623705408, "loss": 0.2942, "step": 327860 }, { "epoch": 94.32393555811278, "grad_norm": 0.6834057569503784, "learning_rate": 0.00011352128883774455, "loss": 0.2006, "step": 327870 }, { "epoch": 94.32681242807826, "grad_norm": 0.7668585777282715, "learning_rate": 0.00011346375143843499, "loss": 0.2177, "step": 327880 }, { "epoch": 94.32968929804373, "grad_norm": 1.2560213804244995, "learning_rate": 0.00011340621403912543, "loss": 0.3067, "step": 327890 }, { "epoch": 94.33256616800921, "grad_norm": 1.041820764541626, "learning_rate": 0.00011334867663981587, "loss": 0.2344, "step": 327900 }, { "epoch": 94.33544303797468, "grad_norm": 1.8154925107955933, "learning_rate": 0.00011329113924050633, "loss": 0.2206, "step": 327910 }, { "epoch": 94.33831990794016, "grad_norm": 1.201332688331604, "learning_rate": 0.00011323360184119678, "loss": 0.2873, "step": 327920 }, { "epoch": 94.34119677790564, "grad_norm": 0.3951861560344696, "learning_rate": 0.00011317606444188723, "loss": 0.2647, "step": 327930 }, { "epoch": 94.34407364787111, "grad_norm": 1.086495041847229, "learning_rate": 0.00011311852704257768, "loss": 0.2785, "step": 327940 }, { "epoch": 94.34695051783659, "grad_norm": 1.2630800008773804, "learning_rate": 0.00011306098964326812, "loss": 0.2912, "step": 327950 }, { "epoch": 94.34982738780207, "grad_norm": 1.0843807458877563, "learning_rate": 0.00011300345224395858, "loss": 0.2196, "step": 327960 }, { "epoch": 94.35270425776756, "grad_norm": 0.774773359298706, "learning_rate": 0.00011294591484464903, "loss": 0.245, "step": 327970 }, { "epoch": 94.35558112773303, "grad_norm": 1.0892990827560425, "learning_rate": 0.00011288837744533948, "loss": 0.2883, "step": 327980 }, { "epoch": 94.35845799769851, "grad_norm": 0.7614263892173767, "learning_rate": 0.00011283084004602992, "loss": 0.2746, "step": 327990 }, { "epoch": 94.36133486766398, "grad_norm": 1.2252496480941772, "learning_rate": 0.00011277330264672036, "loss": 0.2578, "step": 328000 }, { "epoch": 94.36421173762946, "grad_norm": 0.6438856720924377, "learning_rate": 0.00011271576524741083, "loss": 0.2835, "step": 328010 }, { "epoch": 94.36708860759494, "grad_norm": 0.6051538586616516, "learning_rate": 0.00011265822784810127, "loss": 0.225, "step": 328020 }, { "epoch": 94.36996547756041, "grad_norm": 1.1615232229232788, "learning_rate": 0.00011260069044879171, "loss": 0.256, "step": 328030 }, { "epoch": 94.37284234752589, "grad_norm": 0.8460673689842224, "learning_rate": 0.00011254315304948217, "loss": 0.2104, "step": 328040 }, { "epoch": 94.37571921749137, "grad_norm": 1.4025378227233887, "learning_rate": 0.00011248561565017261, "loss": 0.3174, "step": 328050 }, { "epoch": 94.37859608745684, "grad_norm": 1.1466056108474731, "learning_rate": 0.00011242807825086307, "loss": 0.2357, "step": 328060 }, { "epoch": 94.38147295742232, "grad_norm": 1.650391936302185, "learning_rate": 0.00011237054085155352, "loss": 0.2858, "step": 328070 }, { "epoch": 94.38434982738781, "grad_norm": 1.3357175588607788, "learning_rate": 0.00011231300345224396, "loss": 0.2804, "step": 328080 }, { "epoch": 94.38722669735328, "grad_norm": 1.3817476034164429, "learning_rate": 0.0001122554660529344, "loss": 0.3634, "step": 328090 }, { "epoch": 94.39010356731876, "grad_norm": 2.1073567867279053, "learning_rate": 0.00011219792865362485, "loss": 0.2692, "step": 328100 }, { "epoch": 94.39298043728424, "grad_norm": 1.0699199438095093, "learning_rate": 0.00011214039125431532, "loss": 0.3161, "step": 328110 }, { "epoch": 94.39585730724971, "grad_norm": 1.1120405197143555, "learning_rate": 0.00011208285385500576, "loss": 0.2311, "step": 328120 }, { "epoch": 94.39873417721519, "grad_norm": 1.257819652557373, "learning_rate": 0.0001120253164556962, "loss": 0.2795, "step": 328130 }, { "epoch": 94.40161104718067, "grad_norm": 0.569047212600708, "learning_rate": 0.00011196777905638666, "loss": 0.241, "step": 328140 }, { "epoch": 94.40448791714614, "grad_norm": 0.9196865558624268, "learning_rate": 0.0001119102416570771, "loss": 0.2918, "step": 328150 }, { "epoch": 94.40736478711162, "grad_norm": 1.0164903402328491, "learning_rate": 0.00011185270425776755, "loss": 0.242, "step": 328160 }, { "epoch": 94.4102416570771, "grad_norm": 0.7292895317077637, "learning_rate": 0.00011179516685845801, "loss": 0.2403, "step": 328170 }, { "epoch": 94.41311852704258, "grad_norm": 0.9400175213813782, "learning_rate": 0.00011173762945914845, "loss": 0.2909, "step": 328180 }, { "epoch": 94.41599539700806, "grad_norm": 1.4588396549224854, "learning_rate": 0.00011168009205983889, "loss": 0.2838, "step": 328190 }, { "epoch": 94.41887226697354, "grad_norm": 0.724104642868042, "learning_rate": 0.00011162255466052933, "loss": 0.2486, "step": 328200 }, { "epoch": 94.42174913693901, "grad_norm": 1.331247091293335, "learning_rate": 0.0001115650172612198, "loss": 0.2514, "step": 328210 }, { "epoch": 94.42462600690449, "grad_norm": 0.7294431328773499, "learning_rate": 0.00011150747986191025, "loss": 0.2593, "step": 328220 }, { "epoch": 94.42750287686997, "grad_norm": 0.976518452167511, "learning_rate": 0.00011144994246260069, "loss": 0.2695, "step": 328230 }, { "epoch": 94.43037974683544, "grad_norm": 1.0532118082046509, "learning_rate": 0.00011139240506329114, "loss": 0.306, "step": 328240 }, { "epoch": 94.43325661680092, "grad_norm": 2.319157600402832, "learning_rate": 0.00011133486766398159, "loss": 0.3156, "step": 328250 }, { "epoch": 94.4361334867664, "grad_norm": 0.5973051190376282, "learning_rate": 0.00011127733026467204, "loss": 0.2436, "step": 328260 }, { "epoch": 94.43901035673187, "grad_norm": 1.1720924377441406, "learning_rate": 0.0001112197928653625, "loss": 0.2335, "step": 328270 }, { "epoch": 94.44188722669735, "grad_norm": 1.7013905048370361, "learning_rate": 0.00011116225546605294, "loss": 0.2899, "step": 328280 }, { "epoch": 94.44476409666284, "grad_norm": 1.3922771215438843, "learning_rate": 0.00011110471806674338, "loss": 0.2617, "step": 328290 }, { "epoch": 94.44764096662831, "grad_norm": 0.8174340128898621, "learning_rate": 0.00011104718066743382, "loss": 0.3031, "step": 328300 }, { "epoch": 94.45051783659379, "grad_norm": 1.1692259311676025, "learning_rate": 0.00011098964326812429, "loss": 0.2361, "step": 328310 }, { "epoch": 94.45339470655927, "grad_norm": 0.9855039715766907, "learning_rate": 0.00011093210586881473, "loss": 0.2555, "step": 328320 }, { "epoch": 94.45627157652474, "grad_norm": 1.7897552251815796, "learning_rate": 0.00011087456846950518, "loss": 0.2909, "step": 328330 }, { "epoch": 94.45914844649022, "grad_norm": 1.0190855264663696, "learning_rate": 0.00011081703107019563, "loss": 0.2705, "step": 328340 }, { "epoch": 94.4620253164557, "grad_norm": 1.0508004426956177, "learning_rate": 0.00011075949367088607, "loss": 0.2613, "step": 328350 }, { "epoch": 94.46490218642117, "grad_norm": 1.2875910997390747, "learning_rate": 0.00011070195627157653, "loss": 0.2965, "step": 328360 }, { "epoch": 94.46777905638665, "grad_norm": 0.8550615310668945, "learning_rate": 0.00011064441887226698, "loss": 0.3176, "step": 328370 }, { "epoch": 94.47065592635212, "grad_norm": 2.2394649982452393, "learning_rate": 0.00011058688147295743, "loss": 0.3107, "step": 328380 }, { "epoch": 94.47353279631761, "grad_norm": 1.2207738161087036, "learning_rate": 0.00011052934407364787, "loss": 0.2605, "step": 328390 }, { "epoch": 94.47640966628309, "grad_norm": 1.059441089630127, "learning_rate": 0.00011047180667433831, "loss": 0.3138, "step": 328400 }, { "epoch": 94.47928653624857, "grad_norm": 1.598585605621338, "learning_rate": 0.00011041426927502878, "loss": 0.2522, "step": 328410 }, { "epoch": 94.48216340621404, "grad_norm": 1.564690351486206, "learning_rate": 0.00011035673187571922, "loss": 0.2469, "step": 328420 }, { "epoch": 94.48504027617952, "grad_norm": 0.9423285722732544, "learning_rate": 0.00011029919447640966, "loss": 0.2645, "step": 328430 }, { "epoch": 94.487917146145, "grad_norm": 0.7839896082878113, "learning_rate": 0.00011024165707710012, "loss": 0.2696, "step": 328440 }, { "epoch": 94.49079401611047, "grad_norm": 1.794499397277832, "learning_rate": 0.00011018411967779057, "loss": 0.2489, "step": 328450 }, { "epoch": 94.49367088607595, "grad_norm": 0.6105074286460876, "learning_rate": 0.00011012658227848102, "loss": 0.2532, "step": 328460 }, { "epoch": 94.49654775604142, "grad_norm": 1.8504688739776611, "learning_rate": 0.00011006904487917147, "loss": 0.2537, "step": 328470 }, { "epoch": 94.4994246260069, "grad_norm": 1.1314915418624878, "learning_rate": 0.00011001150747986191, "loss": 0.2734, "step": 328480 }, { "epoch": 94.50230149597238, "grad_norm": 1.3288787603378296, "learning_rate": 0.00010995397008055235, "loss": 0.2601, "step": 328490 }, { "epoch": 94.50517836593787, "grad_norm": 0.8404002785682678, "learning_rate": 0.00010989643268124281, "loss": 0.256, "step": 328500 }, { "epoch": 94.50805523590334, "grad_norm": 1.2848422527313232, "learning_rate": 0.00010983889528193327, "loss": 0.2818, "step": 328510 }, { "epoch": 94.51093210586882, "grad_norm": 1.012866497039795, "learning_rate": 0.00010978135788262371, "loss": 0.3578, "step": 328520 }, { "epoch": 94.5138089758343, "grad_norm": 0.7889034748077393, "learning_rate": 0.00010972382048331415, "loss": 0.2627, "step": 328530 }, { "epoch": 94.51668584579977, "grad_norm": 1.1378443241119385, "learning_rate": 0.0001096662830840046, "loss": 0.2476, "step": 328540 }, { "epoch": 94.51956271576525, "grad_norm": 1.0861672163009644, "learning_rate": 0.00010960874568469506, "loss": 0.327, "step": 328550 }, { "epoch": 94.52243958573072, "grad_norm": 1.612484335899353, "learning_rate": 0.0001095512082853855, "loss": 0.2695, "step": 328560 }, { "epoch": 94.5253164556962, "grad_norm": 0.765892744064331, "learning_rate": 0.00010949367088607596, "loss": 0.2854, "step": 328570 }, { "epoch": 94.52819332566168, "grad_norm": 1.3959342241287231, "learning_rate": 0.0001094361334867664, "loss": 0.2156, "step": 328580 }, { "epoch": 94.53107019562715, "grad_norm": 0.9538525938987732, "learning_rate": 0.00010937859608745684, "loss": 0.25, "step": 328590 }, { "epoch": 94.53394706559264, "grad_norm": 1.6636898517608643, "learning_rate": 0.0001093210586881473, "loss": 0.2632, "step": 328600 }, { "epoch": 94.53682393555812, "grad_norm": 1.5970041751861572, "learning_rate": 0.00010926352128883775, "loss": 0.235, "step": 328610 }, { "epoch": 94.5397008055236, "grad_norm": 1.8417001962661743, "learning_rate": 0.0001092059838895282, "loss": 0.33, "step": 328620 }, { "epoch": 94.54257767548907, "grad_norm": 1.2624232769012451, "learning_rate": 0.00010914844649021864, "loss": 0.2718, "step": 328630 }, { "epoch": 94.54545454545455, "grad_norm": 1.8472265005111694, "learning_rate": 0.00010909090909090909, "loss": 0.3081, "step": 328640 }, { "epoch": 94.54833141542002, "grad_norm": 1.895620584487915, "learning_rate": 0.00010903337169159955, "loss": 0.2614, "step": 328650 }, { "epoch": 94.5512082853855, "grad_norm": 0.9500526785850525, "learning_rate": 0.00010897583429228999, "loss": 0.2513, "step": 328660 }, { "epoch": 94.55408515535098, "grad_norm": 1.523238182067871, "learning_rate": 0.00010891829689298045, "loss": 0.3323, "step": 328670 }, { "epoch": 94.55696202531645, "grad_norm": 1.8750466108322144, "learning_rate": 0.00010886075949367089, "loss": 0.2805, "step": 328680 }, { "epoch": 94.55983889528193, "grad_norm": 1.340720772743225, "learning_rate": 0.00010880322209436133, "loss": 0.2497, "step": 328690 }, { "epoch": 94.5627157652474, "grad_norm": 0.9566484093666077, "learning_rate": 0.00010874568469505178, "loss": 0.3377, "step": 328700 }, { "epoch": 94.5655926352129, "grad_norm": 1.0421350002288818, "learning_rate": 0.00010868814729574224, "loss": 0.2626, "step": 328710 }, { "epoch": 94.56846950517837, "grad_norm": 1.0800687074661255, "learning_rate": 0.00010863060989643268, "loss": 0.2434, "step": 328720 }, { "epoch": 94.57134637514385, "grad_norm": 1.5384036302566528, "learning_rate": 0.00010857307249712312, "loss": 0.2778, "step": 328730 }, { "epoch": 94.57422324510932, "grad_norm": 1.1649388074874878, "learning_rate": 0.00010851553509781358, "loss": 0.3137, "step": 328740 }, { "epoch": 94.5771001150748, "grad_norm": 1.5323100090026855, "learning_rate": 0.00010845799769850404, "loss": 0.2508, "step": 328750 }, { "epoch": 94.57997698504028, "grad_norm": 1.327397346496582, "learning_rate": 0.00010840046029919448, "loss": 0.2664, "step": 328760 }, { "epoch": 94.58285385500575, "grad_norm": 0.7148263454437256, "learning_rate": 0.00010834292289988493, "loss": 0.2334, "step": 328770 }, { "epoch": 94.58573072497123, "grad_norm": 1.0517821311950684, "learning_rate": 0.00010828538550057537, "loss": 0.2283, "step": 328780 }, { "epoch": 94.5886075949367, "grad_norm": 0.9196560978889465, "learning_rate": 0.00010822784810126582, "loss": 0.2264, "step": 328790 }, { "epoch": 94.59148446490218, "grad_norm": 0.9202188849449158, "learning_rate": 0.00010817031070195627, "loss": 0.272, "step": 328800 }, { "epoch": 94.59436133486767, "grad_norm": 1.073607087135315, "learning_rate": 0.00010811277330264673, "loss": 0.2933, "step": 328810 }, { "epoch": 94.59723820483315, "grad_norm": 1.852549433708191, "learning_rate": 0.00010805523590333717, "loss": 0.3105, "step": 328820 }, { "epoch": 94.60011507479862, "grad_norm": 1.8241853713989258, "learning_rate": 0.00010799769850402761, "loss": 0.3966, "step": 328830 }, { "epoch": 94.6029919447641, "grad_norm": 1.0346862077713013, "learning_rate": 0.00010794016110471807, "loss": 0.2629, "step": 328840 }, { "epoch": 94.60586881472958, "grad_norm": 1.579711675643921, "learning_rate": 0.00010788262370540852, "loss": 0.407, "step": 328850 }, { "epoch": 94.60874568469505, "grad_norm": 1.7306923866271973, "learning_rate": 0.00010782508630609896, "loss": 0.3466, "step": 328860 }, { "epoch": 94.61162255466053, "grad_norm": 1.361633062362671, "learning_rate": 0.00010776754890678942, "loss": 0.2185, "step": 328870 }, { "epoch": 94.614499424626, "grad_norm": 1.0552723407745361, "learning_rate": 0.00010771001150747986, "loss": 0.2425, "step": 328880 }, { "epoch": 94.61737629459148, "grad_norm": 1.5142589807510376, "learning_rate": 0.0001076524741081703, "loss": 0.2328, "step": 328890 }, { "epoch": 94.62025316455696, "grad_norm": 1.115578055381775, "learning_rate": 0.00010759493670886077, "loss": 0.2391, "step": 328900 }, { "epoch": 94.62313003452243, "grad_norm": 1.2038291692733765, "learning_rate": 0.00010753739930955121, "loss": 0.3327, "step": 328910 }, { "epoch": 94.62600690448792, "grad_norm": 0.8568629026412964, "learning_rate": 0.00010747986191024166, "loss": 0.2988, "step": 328920 }, { "epoch": 94.6288837744534, "grad_norm": 1.0711488723754883, "learning_rate": 0.0001074223245109321, "loss": 0.2328, "step": 328930 }, { "epoch": 94.63176064441888, "grad_norm": 1.1536785364151, "learning_rate": 0.00010736478711162255, "loss": 0.2929, "step": 328940 }, { "epoch": 94.63463751438435, "grad_norm": 0.933278501033783, "learning_rate": 0.00010730724971231301, "loss": 0.2509, "step": 328950 }, { "epoch": 94.63751438434983, "grad_norm": 1.5990123748779297, "learning_rate": 0.00010724971231300345, "loss": 0.2478, "step": 328960 }, { "epoch": 94.6403912543153, "grad_norm": 0.8023537397384644, "learning_rate": 0.00010719217491369391, "loss": 0.2231, "step": 328970 }, { "epoch": 94.64326812428078, "grad_norm": 0.9525822401046753, "learning_rate": 0.00010713463751438435, "loss": 0.2974, "step": 328980 }, { "epoch": 94.64614499424626, "grad_norm": 1.570056676864624, "learning_rate": 0.0001070771001150748, "loss": 0.2918, "step": 328990 }, { "epoch": 94.64902186421173, "grad_norm": 1.188986897468567, "learning_rate": 0.00010701956271576526, "loss": 0.2508, "step": 329000 }, { "epoch": 94.65189873417721, "grad_norm": 1.576585292816162, "learning_rate": 0.0001069620253164557, "loss": 0.2828, "step": 329010 }, { "epoch": 94.6547756041427, "grad_norm": 1.345949411392212, "learning_rate": 0.00010690448791714614, "loss": 0.3036, "step": 329020 }, { "epoch": 94.65765247410818, "grad_norm": 1.0511693954467773, "learning_rate": 0.00010684695051783659, "loss": 0.2222, "step": 329030 }, { "epoch": 94.66052934407365, "grad_norm": 1.4071611166000366, "learning_rate": 0.00010678941311852706, "loss": 0.2543, "step": 329040 }, { "epoch": 94.66340621403913, "grad_norm": 2.0625953674316406, "learning_rate": 0.0001067318757192175, "loss": 0.2694, "step": 329050 }, { "epoch": 94.6662830840046, "grad_norm": 0.7898781299591064, "learning_rate": 0.00010667433831990794, "loss": 0.2769, "step": 329060 }, { "epoch": 94.66915995397008, "grad_norm": 1.0283987522125244, "learning_rate": 0.0001066168009205984, "loss": 0.2427, "step": 329070 }, { "epoch": 94.67203682393556, "grad_norm": 0.8458985686302185, "learning_rate": 0.00010655926352128884, "loss": 0.2188, "step": 329080 }, { "epoch": 94.67491369390103, "grad_norm": 0.9372623562812805, "learning_rate": 0.00010650172612197929, "loss": 0.2703, "step": 329090 }, { "epoch": 94.67779056386651, "grad_norm": 1.8130968809127808, "learning_rate": 0.00010644418872266975, "loss": 0.2448, "step": 329100 }, { "epoch": 94.68066743383199, "grad_norm": 1.712005615234375, "learning_rate": 0.00010638665132336019, "loss": 0.2914, "step": 329110 }, { "epoch": 94.68354430379746, "grad_norm": 1.2546674013137817, "learning_rate": 0.00010632911392405063, "loss": 0.2416, "step": 329120 }, { "epoch": 94.68642117376295, "grad_norm": 0.9676104784011841, "learning_rate": 0.00010627157652474107, "loss": 0.2745, "step": 329130 }, { "epoch": 94.68929804372843, "grad_norm": 1.8405804634094238, "learning_rate": 0.00010621403912543154, "loss": 0.2399, "step": 329140 }, { "epoch": 94.6921749136939, "grad_norm": 0.6696153283119202, "learning_rate": 0.00010615650172612198, "loss": 0.2302, "step": 329150 }, { "epoch": 94.69505178365938, "grad_norm": 1.287052035331726, "learning_rate": 0.00010609896432681243, "loss": 0.245, "step": 329160 }, { "epoch": 94.69792865362486, "grad_norm": 1.300010085105896, "learning_rate": 0.00010604142692750288, "loss": 0.2576, "step": 329170 }, { "epoch": 94.70080552359033, "grad_norm": 2.233358144760132, "learning_rate": 0.00010598388952819332, "loss": 0.3072, "step": 329180 }, { "epoch": 94.70368239355581, "grad_norm": 0.7952907085418701, "learning_rate": 0.00010592635212888378, "loss": 0.2357, "step": 329190 }, { "epoch": 94.70655926352129, "grad_norm": 1.4246636629104614, "learning_rate": 0.00010586881472957423, "loss": 0.2266, "step": 329200 }, { "epoch": 94.70943613348676, "grad_norm": 0.9441733956336975, "learning_rate": 0.00010581127733026468, "loss": 0.2363, "step": 329210 }, { "epoch": 94.71231300345224, "grad_norm": 1.0949074029922485, "learning_rate": 0.00010575373993095512, "loss": 0.2466, "step": 329220 }, { "epoch": 94.71518987341773, "grad_norm": 1.1331161260604858, "learning_rate": 0.00010569620253164556, "loss": 0.2923, "step": 329230 }, { "epoch": 94.7180667433832, "grad_norm": 0.9011446833610535, "learning_rate": 0.00010563866513233603, "loss": 0.2683, "step": 329240 }, { "epoch": 94.72094361334868, "grad_norm": 1.3460887670516968, "learning_rate": 0.00010558112773302647, "loss": 0.2531, "step": 329250 }, { "epoch": 94.72382048331416, "grad_norm": 1.1848628520965576, "learning_rate": 0.00010552359033371691, "loss": 0.2772, "step": 329260 }, { "epoch": 94.72669735327963, "grad_norm": 1.2576528787612915, "learning_rate": 0.00010546605293440737, "loss": 0.2679, "step": 329270 }, { "epoch": 94.72957422324511, "grad_norm": 2.034714698791504, "learning_rate": 0.00010540851553509781, "loss": 0.2994, "step": 329280 }, { "epoch": 94.73245109321059, "grad_norm": 0.8126630783081055, "learning_rate": 0.00010535097813578827, "loss": 0.2875, "step": 329290 }, { "epoch": 94.73532796317606, "grad_norm": 1.1809828281402588, "learning_rate": 0.00010529344073647872, "loss": 0.2889, "step": 329300 }, { "epoch": 94.73820483314154, "grad_norm": 1.564595341682434, "learning_rate": 0.00010523590333716916, "loss": 0.2471, "step": 329310 }, { "epoch": 94.74108170310701, "grad_norm": 1.752706527709961, "learning_rate": 0.0001051783659378596, "loss": 0.315, "step": 329320 }, { "epoch": 94.74395857307249, "grad_norm": 1.0989292860031128, "learning_rate": 0.00010512082853855005, "loss": 0.2536, "step": 329330 }, { "epoch": 94.74683544303798, "grad_norm": 0.8739280700683594, "learning_rate": 0.00010506329113924052, "loss": 0.2424, "step": 329340 }, { "epoch": 94.74971231300346, "grad_norm": 0.9497767686843872, "learning_rate": 0.00010500575373993096, "loss": 0.2321, "step": 329350 }, { "epoch": 94.75258918296893, "grad_norm": 0.6359221339225769, "learning_rate": 0.0001049482163406214, "loss": 0.2859, "step": 329360 }, { "epoch": 94.75546605293441, "grad_norm": 0.8223579525947571, "learning_rate": 0.00010489067894131186, "loss": 0.2396, "step": 329370 }, { "epoch": 94.75834292289989, "grad_norm": 1.148890495300293, "learning_rate": 0.0001048331415420023, "loss": 0.2644, "step": 329380 }, { "epoch": 94.76121979286536, "grad_norm": 1.3427399396896362, "learning_rate": 0.00010477560414269275, "loss": 0.2345, "step": 329390 }, { "epoch": 94.76409666283084, "grad_norm": 1.0762180089950562, "learning_rate": 0.00010471806674338321, "loss": 0.3286, "step": 329400 }, { "epoch": 94.76697353279631, "grad_norm": 1.7390344142913818, "learning_rate": 0.00010466052934407365, "loss": 0.2648, "step": 329410 }, { "epoch": 94.76985040276179, "grad_norm": 0.9181858897209167, "learning_rate": 0.00010460299194476409, "loss": 0.3668, "step": 329420 }, { "epoch": 94.77272727272727, "grad_norm": 0.959908664226532, "learning_rate": 0.00010454545454545454, "loss": 0.2563, "step": 329430 }, { "epoch": 94.77560414269276, "grad_norm": 1.2104724645614624, "learning_rate": 0.000104487917146145, "loss": 0.2874, "step": 329440 }, { "epoch": 94.77848101265823, "grad_norm": 0.9960565567016602, "learning_rate": 0.00010443037974683545, "loss": 0.2176, "step": 329450 }, { "epoch": 94.78135788262371, "grad_norm": 1.0259507894515991, "learning_rate": 0.00010437284234752589, "loss": 0.2261, "step": 329460 }, { "epoch": 94.78423475258919, "grad_norm": 1.4324389696121216, "learning_rate": 0.00010431530494821634, "loss": 0.2759, "step": 329470 }, { "epoch": 94.78711162255466, "grad_norm": 1.671700119972229, "learning_rate": 0.0001042577675489068, "loss": 0.3086, "step": 329480 }, { "epoch": 94.78998849252014, "grad_norm": 1.1296387910842896, "learning_rate": 0.00010420023014959724, "loss": 0.2944, "step": 329490 }, { "epoch": 94.79286536248561, "grad_norm": 1.1496914625167847, "learning_rate": 0.0001041426927502877, "loss": 0.3185, "step": 329500 }, { "epoch": 94.79574223245109, "grad_norm": 1.3907544612884521, "learning_rate": 0.00010408515535097814, "loss": 0.227, "step": 329510 }, { "epoch": 94.79861910241657, "grad_norm": 1.5100215673446655, "learning_rate": 0.00010402761795166858, "loss": 0.2552, "step": 329520 }, { "epoch": 94.80149597238204, "grad_norm": 1.6204278469085693, "learning_rate": 0.00010397008055235904, "loss": 0.3065, "step": 329530 }, { "epoch": 94.80437284234753, "grad_norm": 1.5276929140090942, "learning_rate": 0.00010391254315304949, "loss": 0.2953, "step": 329540 }, { "epoch": 94.80724971231301, "grad_norm": 1.5256308317184448, "learning_rate": 0.00010385500575373993, "loss": 0.3144, "step": 329550 }, { "epoch": 94.81012658227849, "grad_norm": 0.7558026313781738, "learning_rate": 0.00010379746835443038, "loss": 0.2343, "step": 329560 }, { "epoch": 94.81300345224396, "grad_norm": 0.8160846829414368, "learning_rate": 0.00010373993095512083, "loss": 0.2449, "step": 329570 }, { "epoch": 94.81588032220944, "grad_norm": 0.970757782459259, "learning_rate": 0.00010368239355581129, "loss": 0.2418, "step": 329580 }, { "epoch": 94.81875719217491, "grad_norm": 0.9649892449378967, "learning_rate": 0.00010362485615650173, "loss": 0.3035, "step": 329590 }, { "epoch": 94.82163406214039, "grad_norm": 1.8750609159469604, "learning_rate": 0.00010356731875719218, "loss": 0.2257, "step": 329600 }, { "epoch": 94.82451093210587, "grad_norm": 0.922920823097229, "learning_rate": 0.00010350978135788263, "loss": 0.2609, "step": 329610 }, { "epoch": 94.82738780207134, "grad_norm": 1.0952649116516113, "learning_rate": 0.00010345224395857307, "loss": 0.2748, "step": 329620 }, { "epoch": 94.83026467203682, "grad_norm": 1.0059846639633179, "learning_rate": 0.00010339470655926352, "loss": 0.2868, "step": 329630 }, { "epoch": 94.8331415420023, "grad_norm": 0.75111323595047, "learning_rate": 0.00010333716915995398, "loss": 0.2914, "step": 329640 }, { "epoch": 94.83601841196779, "grad_norm": 0.9813758730888367, "learning_rate": 0.00010327963176064442, "loss": 0.2725, "step": 329650 }, { "epoch": 94.83889528193326, "grad_norm": 1.786601185798645, "learning_rate": 0.00010322209436133486, "loss": 0.2806, "step": 329660 }, { "epoch": 94.84177215189874, "grad_norm": 1.0132561922073364, "learning_rate": 0.00010316455696202532, "loss": 0.3254, "step": 329670 }, { "epoch": 94.84464902186421, "grad_norm": 0.8929244875907898, "learning_rate": 0.00010310701956271577, "loss": 0.2498, "step": 329680 }, { "epoch": 94.84752589182969, "grad_norm": 0.7551122307777405, "learning_rate": 0.00010304948216340622, "loss": 0.2346, "step": 329690 }, { "epoch": 94.85040276179517, "grad_norm": 1.363850474357605, "learning_rate": 0.00010299194476409667, "loss": 0.3065, "step": 329700 }, { "epoch": 94.85327963176064, "grad_norm": 1.370993971824646, "learning_rate": 0.00010293440736478711, "loss": 0.2439, "step": 329710 }, { "epoch": 94.85615650172612, "grad_norm": 1.3033586740493774, "learning_rate": 0.00010287686996547755, "loss": 0.2641, "step": 329720 }, { "epoch": 94.8590333716916, "grad_norm": 1.555112600326538, "learning_rate": 0.00010281933256616801, "loss": 0.2945, "step": 329730 }, { "epoch": 94.86191024165707, "grad_norm": 1.7375353574752808, "learning_rate": 0.00010276179516685847, "loss": 0.3211, "step": 329740 }, { "epoch": 94.86478711162256, "grad_norm": 1.4498804807662964, "learning_rate": 0.00010270425776754891, "loss": 0.2523, "step": 329750 }, { "epoch": 94.86766398158804, "grad_norm": 2.455556869506836, "learning_rate": 0.00010264672036823935, "loss": 0.3897, "step": 329760 }, { "epoch": 94.87054085155351, "grad_norm": 1.7765992879867554, "learning_rate": 0.0001025891829689298, "loss": 0.2829, "step": 329770 }, { "epoch": 94.87341772151899, "grad_norm": 1.513145089149475, "learning_rate": 0.00010253164556962026, "loss": 0.3199, "step": 329780 }, { "epoch": 94.87629459148447, "grad_norm": 0.8386414051055908, "learning_rate": 0.0001024741081703107, "loss": 0.2543, "step": 329790 }, { "epoch": 94.87917146144994, "grad_norm": 1.1183902025222778, "learning_rate": 0.00010241657077100116, "loss": 0.2296, "step": 329800 }, { "epoch": 94.88204833141542, "grad_norm": 0.8750931620597839, "learning_rate": 0.0001023590333716916, "loss": 0.3279, "step": 329810 }, { "epoch": 94.8849252013809, "grad_norm": 0.8096060156822205, "learning_rate": 0.00010230149597238204, "loss": 0.2368, "step": 329820 }, { "epoch": 94.88780207134637, "grad_norm": 1.0649352073669434, "learning_rate": 0.0001022439585730725, "loss": 0.2553, "step": 329830 }, { "epoch": 94.89067894131185, "grad_norm": 1.082470417022705, "learning_rate": 0.00010218642117376295, "loss": 0.2682, "step": 329840 }, { "epoch": 94.89355581127732, "grad_norm": 2.0382158756256104, "learning_rate": 0.0001021288837744534, "loss": 0.3255, "step": 329850 }, { "epoch": 94.89643268124281, "grad_norm": 1.2860225439071655, "learning_rate": 0.00010207134637514384, "loss": 0.2815, "step": 329860 }, { "epoch": 94.89930955120829, "grad_norm": 1.0719046592712402, "learning_rate": 0.00010201380897583429, "loss": 0.236, "step": 329870 }, { "epoch": 94.90218642117377, "grad_norm": 0.9391180872917175, "learning_rate": 0.00010195627157652475, "loss": 0.2714, "step": 329880 }, { "epoch": 94.90506329113924, "grad_norm": 0.6634247303009033, "learning_rate": 0.00010189873417721519, "loss": 0.2272, "step": 329890 }, { "epoch": 94.90794016110472, "grad_norm": 1.3094468116760254, "learning_rate": 0.00010184119677790565, "loss": 0.2566, "step": 329900 }, { "epoch": 94.9108170310702, "grad_norm": 1.75236177444458, "learning_rate": 0.00010178365937859609, "loss": 0.2515, "step": 329910 }, { "epoch": 94.91369390103567, "grad_norm": 1.2523387670516968, "learning_rate": 0.00010172612197928653, "loss": 0.2605, "step": 329920 }, { "epoch": 94.91657077100115, "grad_norm": 0.6986128687858582, "learning_rate": 0.00010166858457997698, "loss": 0.2332, "step": 329930 }, { "epoch": 94.91944764096662, "grad_norm": 1.0839285850524902, "learning_rate": 0.00010161104718066744, "loss": 0.322, "step": 329940 }, { "epoch": 94.9223245109321, "grad_norm": 1.2015219926834106, "learning_rate": 0.00010155350978135788, "loss": 0.2556, "step": 329950 }, { "epoch": 94.92520138089759, "grad_norm": 1.0038100481033325, "learning_rate": 0.00010149597238204832, "loss": 0.2395, "step": 329960 }, { "epoch": 94.92807825086307, "grad_norm": 1.2776802778244019, "learning_rate": 0.0001014384349827388, "loss": 0.3465, "step": 329970 }, { "epoch": 94.93095512082854, "grad_norm": 1.0242841243743896, "learning_rate": 0.00010138089758342924, "loss": 0.2221, "step": 329980 }, { "epoch": 94.93383199079402, "grad_norm": 0.7047044038772583, "learning_rate": 0.00010132336018411968, "loss": 0.2943, "step": 329990 }, { "epoch": 94.9367088607595, "grad_norm": 1.1778360605239868, "learning_rate": 0.00010126582278481013, "loss": 0.2862, "step": 330000 }, { "epoch": 94.93958573072497, "grad_norm": 0.9189364314079285, "learning_rate": 0.00010120828538550057, "loss": 0.2901, "step": 330010 }, { "epoch": 94.94246260069045, "grad_norm": 1.0417548418045044, "learning_rate": 0.00010115074798619103, "loss": 0.2899, "step": 330020 }, { "epoch": 94.94533947065592, "grad_norm": 0.9140356779098511, "learning_rate": 0.00010109321058688147, "loss": 0.2719, "step": 330030 }, { "epoch": 94.9482163406214, "grad_norm": 0.9173753261566162, "learning_rate": 0.00010103567318757193, "loss": 0.2909, "step": 330040 }, { "epoch": 94.95109321058688, "grad_norm": 0.8642417192459106, "learning_rate": 0.00010097813578826237, "loss": 0.2694, "step": 330050 }, { "epoch": 94.95397008055235, "grad_norm": 1.1223348379135132, "learning_rate": 0.00010092059838895281, "loss": 0.2816, "step": 330060 }, { "epoch": 94.95684695051784, "grad_norm": 1.612295150756836, "learning_rate": 0.00010086306098964328, "loss": 0.2502, "step": 330070 }, { "epoch": 94.95972382048332, "grad_norm": 1.0739991664886475, "learning_rate": 0.00010080552359033372, "loss": 0.3297, "step": 330080 }, { "epoch": 94.9626006904488, "grad_norm": 1.0272395610809326, "learning_rate": 0.00010074798619102416, "loss": 0.293, "step": 330090 }, { "epoch": 94.96547756041427, "grad_norm": 1.194346308708191, "learning_rate": 0.00010069044879171462, "loss": 0.2851, "step": 330100 }, { "epoch": 94.96835443037975, "grad_norm": 0.753929078578949, "learning_rate": 0.00010063291139240506, "loss": 0.2438, "step": 330110 }, { "epoch": 94.97123130034522, "grad_norm": 0.9368829727172852, "learning_rate": 0.00010057537399309552, "loss": 0.2813, "step": 330120 }, { "epoch": 94.9741081703107, "grad_norm": 1.2512032985687256, "learning_rate": 0.00010051783659378596, "loss": 0.3798, "step": 330130 }, { "epoch": 94.97698504027618, "grad_norm": 1.2853550910949707, "learning_rate": 0.00010046029919447641, "loss": 0.301, "step": 330140 }, { "epoch": 94.97986191024165, "grad_norm": 0.5323103070259094, "learning_rate": 0.00010040276179516686, "loss": 0.277, "step": 330150 }, { "epoch": 94.98273878020713, "grad_norm": 0.8610302805900574, "learning_rate": 0.0001003452243958573, "loss": 0.2279, "step": 330160 }, { "epoch": 94.98561565017262, "grad_norm": 1.0543248653411865, "learning_rate": 0.00010028768699654777, "loss": 0.2824, "step": 330170 }, { "epoch": 94.9884925201381, "grad_norm": 1.1261438131332397, "learning_rate": 0.00010023014959723821, "loss": 0.2679, "step": 330180 }, { "epoch": 94.99136939010357, "grad_norm": 1.7301928997039795, "learning_rate": 0.00010017261219792865, "loss": 0.2889, "step": 330190 }, { "epoch": 94.99424626006905, "grad_norm": 1.8603103160858154, "learning_rate": 0.00010011507479861911, "loss": 0.2716, "step": 330200 }, { "epoch": 94.99712313003452, "grad_norm": 1.0108970403671265, "learning_rate": 0.00010005753739930955, "loss": 0.2975, "step": 330210 }, { "epoch": 95.0, "grad_norm": 1.7702698707580566, "learning_rate": 0.0001, "loss": 0.2776, "step": 330220 }, { "epoch": 95.00287686996548, "grad_norm": 1.3503072261810303, "learning_rate": 9.994246260069046e-05, "loss": 0.2128, "step": 330230 }, { "epoch": 95.00575373993095, "grad_norm": 0.8373337388038635, "learning_rate": 9.98849252013809e-05, "loss": 0.3028, "step": 330240 }, { "epoch": 95.00863060989643, "grad_norm": 2.2376203536987305, "learning_rate": 9.982738780207134e-05, "loss": 0.2645, "step": 330250 }, { "epoch": 95.0115074798619, "grad_norm": 0.9204315543174744, "learning_rate": 9.976985040276179e-05, "loss": 0.2462, "step": 330260 }, { "epoch": 95.01438434982738, "grad_norm": 1.182666301727295, "learning_rate": 9.971231300345226e-05, "loss": 0.3915, "step": 330270 }, { "epoch": 95.01726121979287, "grad_norm": 1.4189826250076294, "learning_rate": 9.96547756041427e-05, "loss": 0.2136, "step": 330280 }, { "epoch": 95.02013808975835, "grad_norm": 1.0131933689117432, "learning_rate": 9.959723820483314e-05, "loss": 0.3217, "step": 330290 }, { "epoch": 95.02301495972382, "grad_norm": 0.9602558612823486, "learning_rate": 9.95397008055236e-05, "loss": 0.2747, "step": 330300 }, { "epoch": 95.0258918296893, "grad_norm": 1.8469738960266113, "learning_rate": 9.948216340621404e-05, "loss": 0.3071, "step": 330310 }, { "epoch": 95.02876869965478, "grad_norm": 1.1975575685501099, "learning_rate": 9.942462600690449e-05, "loss": 0.3194, "step": 330320 }, { "epoch": 95.03164556962025, "grad_norm": 0.9658759832382202, "learning_rate": 9.936708860759495e-05, "loss": 0.2263, "step": 330330 }, { "epoch": 95.03452243958573, "grad_norm": 0.8642453551292419, "learning_rate": 9.930955120828539e-05, "loss": 0.248, "step": 330340 }, { "epoch": 95.0373993095512, "grad_norm": 1.5657864809036255, "learning_rate": 9.925201380897583e-05, "loss": 0.2315, "step": 330350 }, { "epoch": 95.04027617951668, "grad_norm": 1.907422423362732, "learning_rate": 9.919447640966627e-05, "loss": 0.3081, "step": 330360 }, { "epoch": 95.04315304948216, "grad_norm": 0.6580489873886108, "learning_rate": 9.913693901035674e-05, "loss": 0.2686, "step": 330370 }, { "epoch": 95.04602991944765, "grad_norm": 1.264229655265808, "learning_rate": 9.907940161104718e-05, "loss": 0.3277, "step": 330380 }, { "epoch": 95.04890678941312, "grad_norm": 1.1142656803131104, "learning_rate": 9.902186421173763e-05, "loss": 0.2521, "step": 330390 }, { "epoch": 95.0517836593786, "grad_norm": 1.2407426834106445, "learning_rate": 9.896432681242808e-05, "loss": 0.2946, "step": 330400 }, { "epoch": 95.05466052934408, "grad_norm": 0.8614243268966675, "learning_rate": 9.890678941311852e-05, "loss": 0.2594, "step": 330410 }, { "epoch": 95.05753739930955, "grad_norm": 0.7604190707206726, "learning_rate": 9.884925201380898e-05, "loss": 0.2812, "step": 330420 }, { "epoch": 95.06041426927503, "grad_norm": 1.0548306703567505, "learning_rate": 9.879171461449943e-05, "loss": 0.2159, "step": 330430 }, { "epoch": 95.0632911392405, "grad_norm": 0.5465248227119446, "learning_rate": 9.873417721518988e-05, "loss": 0.2412, "step": 330440 }, { "epoch": 95.06616800920598, "grad_norm": 0.9867873787879944, "learning_rate": 9.867663981588032e-05, "loss": 0.2302, "step": 330450 }, { "epoch": 95.06904487917146, "grad_norm": 1.343077540397644, "learning_rate": 9.861910241657076e-05, "loss": 0.2249, "step": 330460 }, { "epoch": 95.07192174913693, "grad_norm": 1.1682031154632568, "learning_rate": 9.856156501726123e-05, "loss": 0.2126, "step": 330470 }, { "epoch": 95.07479861910241, "grad_norm": 0.9520455598831177, "learning_rate": 9.850402761795167e-05, "loss": 0.2533, "step": 330480 }, { "epoch": 95.0776754890679, "grad_norm": 0.9446020126342773, "learning_rate": 9.844649021864211e-05, "loss": 0.2478, "step": 330490 }, { "epoch": 95.08055235903338, "grad_norm": 1.4520456790924072, "learning_rate": 9.838895281933257e-05, "loss": 0.2745, "step": 330500 }, { "epoch": 95.08342922899885, "grad_norm": 1.7985453605651855, "learning_rate": 9.833141542002302e-05, "loss": 0.2758, "step": 330510 }, { "epoch": 95.08630609896433, "grad_norm": 1.563893437385559, "learning_rate": 9.827387802071347e-05, "loss": 0.2936, "step": 330520 }, { "epoch": 95.0891829689298, "grad_norm": 1.7520277500152588, "learning_rate": 9.821634062140392e-05, "loss": 0.2868, "step": 330530 }, { "epoch": 95.09205983889528, "grad_norm": 0.6930196285247803, "learning_rate": 9.815880322209436e-05, "loss": 0.2563, "step": 330540 }, { "epoch": 95.09493670886076, "grad_norm": 0.9434547424316406, "learning_rate": 9.81012658227848e-05, "loss": 0.2301, "step": 330550 }, { "epoch": 95.09781357882623, "grad_norm": 1.0883979797363281, "learning_rate": 9.804372842347526e-05, "loss": 0.3132, "step": 330560 }, { "epoch": 95.10069044879171, "grad_norm": 0.9236730933189392, "learning_rate": 9.798619102416572e-05, "loss": 0.2208, "step": 330570 }, { "epoch": 95.10356731875719, "grad_norm": 0.7823807597160339, "learning_rate": 9.792865362485616e-05, "loss": 0.296, "step": 330580 }, { "epoch": 95.10644418872268, "grad_norm": 1.2566332817077637, "learning_rate": 9.78711162255466e-05, "loss": 0.2314, "step": 330590 }, { "epoch": 95.10932105868815, "grad_norm": 1.0053664445877075, "learning_rate": 9.781357882623706e-05, "loss": 0.2544, "step": 330600 }, { "epoch": 95.11219792865363, "grad_norm": 1.5130311250686646, "learning_rate": 9.775604142692751e-05, "loss": 0.3239, "step": 330610 }, { "epoch": 95.1150747986191, "grad_norm": 1.147914171218872, "learning_rate": 9.769850402761795e-05, "loss": 0.243, "step": 330620 }, { "epoch": 95.11795166858458, "grad_norm": 0.69263756275177, "learning_rate": 9.764096662830841e-05, "loss": 0.2539, "step": 330630 }, { "epoch": 95.12082853855006, "grad_norm": 1.8472068309783936, "learning_rate": 9.758342922899885e-05, "loss": 0.2641, "step": 330640 }, { "epoch": 95.12370540851553, "grad_norm": 3.3567261695861816, "learning_rate": 9.75258918296893e-05, "loss": 0.341, "step": 330650 }, { "epoch": 95.12658227848101, "grad_norm": 2.3352723121643066, "learning_rate": 9.746835443037975e-05, "loss": 0.2535, "step": 330660 }, { "epoch": 95.12945914844649, "grad_norm": 0.7781589031219482, "learning_rate": 9.74108170310702e-05, "loss": 0.2743, "step": 330670 }, { "epoch": 95.13233601841196, "grad_norm": 1.2282536029815674, "learning_rate": 9.735327963176065e-05, "loss": 0.3257, "step": 330680 }, { "epoch": 95.13521288837744, "grad_norm": 1.09628427028656, "learning_rate": 9.729574223245109e-05, "loss": 0.2585, "step": 330690 }, { "epoch": 95.13808975834293, "grad_norm": 1.0843474864959717, "learning_rate": 9.723820483314154e-05, "loss": 0.2128, "step": 330700 }, { "epoch": 95.1409666283084, "grad_norm": 1.07071852684021, "learning_rate": 9.7180667433832e-05, "loss": 0.263, "step": 330710 }, { "epoch": 95.14384349827388, "grad_norm": 1.3575751781463623, "learning_rate": 9.712313003452244e-05, "loss": 0.387, "step": 330720 }, { "epoch": 95.14672036823936, "grad_norm": 1.1273729801177979, "learning_rate": 9.70655926352129e-05, "loss": 0.2469, "step": 330730 }, { "epoch": 95.14959723820483, "grad_norm": 1.4960813522338867, "learning_rate": 9.700805523590334e-05, "loss": 0.2616, "step": 330740 }, { "epoch": 95.15247410817031, "grad_norm": 0.49693241715431213, "learning_rate": 9.695051783659378e-05, "loss": 0.2051, "step": 330750 }, { "epoch": 95.15535097813579, "grad_norm": 0.7882715463638306, "learning_rate": 9.689298043728424e-05, "loss": 0.2669, "step": 330760 }, { "epoch": 95.15822784810126, "grad_norm": 0.902103066444397, "learning_rate": 9.683544303797469e-05, "loss": 0.2123, "step": 330770 }, { "epoch": 95.16110471806674, "grad_norm": 1.5144046545028687, "learning_rate": 9.677790563866513e-05, "loss": 0.2438, "step": 330780 }, { "epoch": 95.16398158803221, "grad_norm": 1.7784326076507568, "learning_rate": 9.672036823935558e-05, "loss": 0.2499, "step": 330790 }, { "epoch": 95.1668584579977, "grad_norm": 0.7456380724906921, "learning_rate": 9.666283084004603e-05, "loss": 0.2106, "step": 330800 }, { "epoch": 95.16973532796318, "grad_norm": 1.3819599151611328, "learning_rate": 9.660529344073649e-05, "loss": 0.2608, "step": 330810 }, { "epoch": 95.17261219792866, "grad_norm": 0.9889398217201233, "learning_rate": 9.654775604142693e-05, "loss": 0.2534, "step": 330820 }, { "epoch": 95.17548906789413, "grad_norm": 0.9297361373901367, "learning_rate": 9.649021864211738e-05, "loss": 0.2977, "step": 330830 }, { "epoch": 95.17836593785961, "grad_norm": 1.0781314373016357, "learning_rate": 9.643268124280783e-05, "loss": 0.2817, "step": 330840 }, { "epoch": 95.18124280782509, "grad_norm": 0.9278472065925598, "learning_rate": 9.637514384349827e-05, "loss": 0.2377, "step": 330850 }, { "epoch": 95.18411967779056, "grad_norm": 0.7594012022018433, "learning_rate": 9.631760644418872e-05, "loss": 0.2847, "step": 330860 }, { "epoch": 95.18699654775604, "grad_norm": 0.6158739924430847, "learning_rate": 9.626006904487918e-05, "loss": 0.2078, "step": 330870 }, { "epoch": 95.18987341772151, "grad_norm": 0.712063193321228, "learning_rate": 9.620253164556962e-05, "loss": 0.2794, "step": 330880 }, { "epoch": 95.19275028768699, "grad_norm": 1.0302737951278687, "learning_rate": 9.614499424626006e-05, "loss": 0.2656, "step": 330890 }, { "epoch": 95.19562715765247, "grad_norm": 0.5984020233154297, "learning_rate": 9.608745684695052e-05, "loss": 0.3413, "step": 330900 }, { "epoch": 95.19850402761796, "grad_norm": 0.8372993469238281, "learning_rate": 9.602991944764097e-05, "loss": 0.2748, "step": 330910 }, { "epoch": 95.20138089758343, "grad_norm": 1.7215440273284912, "learning_rate": 9.597238204833142e-05, "loss": 0.2778, "step": 330920 }, { "epoch": 95.20425776754891, "grad_norm": 1.2689274549484253, "learning_rate": 9.591484464902187e-05, "loss": 0.2407, "step": 330930 }, { "epoch": 95.20713463751439, "grad_norm": 1.2507719993591309, "learning_rate": 9.585730724971231e-05, "loss": 0.3241, "step": 330940 }, { "epoch": 95.21001150747986, "grad_norm": 1.0274251699447632, "learning_rate": 9.579976985040276e-05, "loss": 0.2559, "step": 330950 }, { "epoch": 95.21288837744534, "grad_norm": 0.978918731212616, "learning_rate": 9.574223245109321e-05, "loss": 0.2054, "step": 330960 }, { "epoch": 95.21576524741081, "grad_norm": 0.8501438498497009, "learning_rate": 9.568469505178367e-05, "loss": 0.2868, "step": 330970 }, { "epoch": 95.21864211737629, "grad_norm": 1.7457730770111084, "learning_rate": 9.562715765247411e-05, "loss": 0.2456, "step": 330980 }, { "epoch": 95.22151898734177, "grad_norm": 0.8386597037315369, "learning_rate": 9.556962025316455e-05, "loss": 0.2479, "step": 330990 }, { "epoch": 95.22439585730724, "grad_norm": 0.6128054857254028, "learning_rate": 9.551208285385502e-05, "loss": 0.233, "step": 331000 }, { "epoch": 95.22727272727273, "grad_norm": 1.2968101501464844, "learning_rate": 9.545454545454546e-05, "loss": 0.2976, "step": 331010 }, { "epoch": 95.23014959723821, "grad_norm": 0.7897170186042786, "learning_rate": 9.53970080552359e-05, "loss": 0.2882, "step": 331020 }, { "epoch": 95.23302646720369, "grad_norm": 0.7601824998855591, "learning_rate": 9.533947065592636e-05, "loss": 0.2043, "step": 331030 }, { "epoch": 95.23590333716916, "grad_norm": 1.056351900100708, "learning_rate": 9.52819332566168e-05, "loss": 0.2594, "step": 331040 }, { "epoch": 95.23878020713464, "grad_norm": 1.6914199590682983, "learning_rate": 9.522439585730726e-05, "loss": 0.2701, "step": 331050 }, { "epoch": 95.24165707710011, "grad_norm": 1.0747536420822144, "learning_rate": 9.51668584579977e-05, "loss": 0.2188, "step": 331060 }, { "epoch": 95.24453394706559, "grad_norm": 1.0237854719161987, "learning_rate": 9.510932105868815e-05, "loss": 0.2307, "step": 331070 }, { "epoch": 95.24741081703107, "grad_norm": 1.3789851665496826, "learning_rate": 9.50517836593786e-05, "loss": 0.2625, "step": 331080 }, { "epoch": 95.25028768699654, "grad_norm": 0.6184459924697876, "learning_rate": 9.499424626006904e-05, "loss": 0.2573, "step": 331090 }, { "epoch": 95.25316455696202, "grad_norm": 1.4855808019638062, "learning_rate": 9.49367088607595e-05, "loss": 0.2487, "step": 331100 }, { "epoch": 95.25604142692751, "grad_norm": 1.385299563407898, "learning_rate": 9.487917146144995e-05, "loss": 0.2789, "step": 331110 }, { "epoch": 95.25891829689299, "grad_norm": 0.9340323209762573, "learning_rate": 9.482163406214039e-05, "loss": 0.2408, "step": 331120 }, { "epoch": 95.26179516685846, "grad_norm": 0.8574644923210144, "learning_rate": 9.476409666283085e-05, "loss": 0.2616, "step": 331130 }, { "epoch": 95.26467203682394, "grad_norm": 1.532912015914917, "learning_rate": 9.470655926352129e-05, "loss": 0.2713, "step": 331140 }, { "epoch": 95.26754890678941, "grad_norm": 0.8682839870452881, "learning_rate": 9.464902186421174e-05, "loss": 0.2807, "step": 331150 }, { "epoch": 95.27042577675489, "grad_norm": 0.8468509912490845, "learning_rate": 9.459148446490219e-05, "loss": 0.2791, "step": 331160 }, { "epoch": 95.27330264672037, "grad_norm": 2.8647620677948, "learning_rate": 9.453394706559264e-05, "loss": 0.2533, "step": 331170 }, { "epoch": 95.27617951668584, "grad_norm": 1.9546055793762207, "learning_rate": 9.447640966628308e-05, "loss": 0.2553, "step": 331180 }, { "epoch": 95.27905638665132, "grad_norm": 2.528721809387207, "learning_rate": 9.441887226697352e-05, "loss": 0.2629, "step": 331190 }, { "epoch": 95.2819332566168, "grad_norm": 1.4230831861495972, "learning_rate": 9.4361334867664e-05, "loss": 0.2666, "step": 331200 }, { "epoch": 95.28481012658227, "grad_norm": 0.7662917971611023, "learning_rate": 9.430379746835444e-05, "loss": 0.2722, "step": 331210 }, { "epoch": 95.28768699654776, "grad_norm": 1.1457651853561401, "learning_rate": 9.424626006904488e-05, "loss": 0.2942, "step": 331220 }, { "epoch": 95.29056386651324, "grad_norm": 1.0125614404678345, "learning_rate": 9.418872266973533e-05, "loss": 0.248, "step": 331230 }, { "epoch": 95.29344073647871, "grad_norm": 0.5614899396896362, "learning_rate": 9.413118527042577e-05, "loss": 0.292, "step": 331240 }, { "epoch": 95.29631760644419, "grad_norm": 1.048568606376648, "learning_rate": 9.407364787111623e-05, "loss": 0.2852, "step": 331250 }, { "epoch": 95.29919447640967, "grad_norm": 1.3862030506134033, "learning_rate": 9.401611047180667e-05, "loss": 0.271, "step": 331260 }, { "epoch": 95.30207134637514, "grad_norm": 1.229543924331665, "learning_rate": 9.395857307249713e-05, "loss": 0.2493, "step": 331270 }, { "epoch": 95.30494821634062, "grad_norm": 1.139847993850708, "learning_rate": 9.390103567318757e-05, "loss": 0.2532, "step": 331280 }, { "epoch": 95.3078250863061, "grad_norm": 1.1955314874649048, "learning_rate": 9.384349827387801e-05, "loss": 0.2753, "step": 331290 }, { "epoch": 95.31070195627157, "grad_norm": 1.1496163606643677, "learning_rate": 9.378596087456848e-05, "loss": 0.2504, "step": 331300 }, { "epoch": 95.31357882623705, "grad_norm": 1.0016366243362427, "learning_rate": 9.372842347525892e-05, "loss": 0.2563, "step": 331310 }, { "epoch": 95.31645569620254, "grad_norm": 1.235860824584961, "learning_rate": 9.367088607594936e-05, "loss": 0.2225, "step": 331320 }, { "epoch": 95.31933256616801, "grad_norm": 1.7417702674865723, "learning_rate": 9.361334867663982e-05, "loss": 0.2363, "step": 331330 }, { "epoch": 95.32220943613349, "grad_norm": 0.6041914224624634, "learning_rate": 9.355581127733026e-05, "loss": 0.2281, "step": 331340 }, { "epoch": 95.32508630609897, "grad_norm": 0.8243401050567627, "learning_rate": 9.349827387802072e-05, "loss": 0.2515, "step": 331350 }, { "epoch": 95.32796317606444, "grad_norm": 1.7521107196807861, "learning_rate": 9.344073647871116e-05, "loss": 0.2443, "step": 331360 }, { "epoch": 95.33084004602992, "grad_norm": 0.7665650248527527, "learning_rate": 9.338319907940162e-05, "loss": 0.2868, "step": 331370 }, { "epoch": 95.3337169159954, "grad_norm": 1.5485097169876099, "learning_rate": 9.332566168009206e-05, "loss": 0.2649, "step": 331380 }, { "epoch": 95.33659378596087, "grad_norm": 1.8824563026428223, "learning_rate": 9.32681242807825e-05, "loss": 0.2877, "step": 331390 }, { "epoch": 95.33947065592635, "grad_norm": 0.9792370200157166, "learning_rate": 9.321058688147297e-05, "loss": 0.2714, "step": 331400 }, { "epoch": 95.34234752589182, "grad_norm": 0.8081331849098206, "learning_rate": 9.315304948216341e-05, "loss": 0.2204, "step": 331410 }, { "epoch": 95.3452243958573, "grad_norm": 0.8343082666397095, "learning_rate": 9.309551208285385e-05, "loss": 0.232, "step": 331420 }, { "epoch": 95.34810126582279, "grad_norm": 1.5753412246704102, "learning_rate": 9.303797468354431e-05, "loss": 0.259, "step": 331430 }, { "epoch": 95.35097813578827, "grad_norm": 1.5161668062210083, "learning_rate": 9.298043728423475e-05, "loss": 0.243, "step": 331440 }, { "epoch": 95.35385500575374, "grad_norm": 1.4002909660339355, "learning_rate": 9.29228998849252e-05, "loss": 0.2774, "step": 331450 }, { "epoch": 95.35673187571922, "grad_norm": 1.1407279968261719, "learning_rate": 9.286536248561566e-05, "loss": 0.2536, "step": 331460 }, { "epoch": 95.3596087456847, "grad_norm": 1.3805745840072632, "learning_rate": 9.28078250863061e-05, "loss": 0.2486, "step": 331470 }, { "epoch": 95.36248561565017, "grad_norm": 0.7404927015304565, "learning_rate": 9.275028768699654e-05, "loss": 0.219, "step": 331480 }, { "epoch": 95.36536248561565, "grad_norm": 0.9048866033554077, "learning_rate": 9.269275028768699e-05, "loss": 0.232, "step": 331490 }, { "epoch": 95.36823935558112, "grad_norm": 0.7727029323577881, "learning_rate": 9.263521288837746e-05, "loss": 0.2276, "step": 331500 }, { "epoch": 95.3711162255466, "grad_norm": 1.8974099159240723, "learning_rate": 9.25776754890679e-05, "loss": 0.2547, "step": 331510 }, { "epoch": 95.37399309551208, "grad_norm": 1.2656476497650146, "learning_rate": 9.252013808975834e-05, "loss": 0.2672, "step": 331520 }, { "epoch": 95.37686996547757, "grad_norm": 1.3527891635894775, "learning_rate": 9.24626006904488e-05, "loss": 0.2538, "step": 331530 }, { "epoch": 95.37974683544304, "grad_norm": 0.9550029039382935, "learning_rate": 9.240506329113925e-05, "loss": 0.3196, "step": 331540 }, { "epoch": 95.38262370540852, "grad_norm": 1.6935878992080688, "learning_rate": 9.234752589182969e-05, "loss": 0.2951, "step": 331550 }, { "epoch": 95.385500575374, "grad_norm": 0.9188149571418762, "learning_rate": 9.228998849252015e-05, "loss": 0.2394, "step": 331560 }, { "epoch": 95.38837744533947, "grad_norm": 1.1849119663238525, "learning_rate": 9.223245109321059e-05, "loss": 0.2515, "step": 331570 }, { "epoch": 95.39125431530495, "grad_norm": 0.659803032875061, "learning_rate": 9.217491369390103e-05, "loss": 0.2428, "step": 331580 }, { "epoch": 95.39413118527042, "grad_norm": 1.4665324687957764, "learning_rate": 9.211737629459149e-05, "loss": 0.2681, "step": 331590 }, { "epoch": 95.3970080552359, "grad_norm": 1.7228788137435913, "learning_rate": 9.205983889528194e-05, "loss": 0.2302, "step": 331600 }, { "epoch": 95.39988492520138, "grad_norm": 1.811721682548523, "learning_rate": 9.200230149597238e-05, "loss": 0.3144, "step": 331610 }, { "epoch": 95.40276179516685, "grad_norm": 0.8623467683792114, "learning_rate": 9.194476409666283e-05, "loss": 0.2448, "step": 331620 }, { "epoch": 95.40563866513233, "grad_norm": 0.9852603673934937, "learning_rate": 9.188722669735328e-05, "loss": 0.2394, "step": 331630 }, { "epoch": 95.40851553509782, "grad_norm": 1.2707488536834717, "learning_rate": 9.182968929804374e-05, "loss": 0.3041, "step": 331640 }, { "epoch": 95.4113924050633, "grad_norm": 2.168039560317993, "learning_rate": 9.177215189873418e-05, "loss": 0.2824, "step": 331650 }, { "epoch": 95.41426927502877, "grad_norm": 1.1496379375457764, "learning_rate": 9.171461449942463e-05, "loss": 0.2692, "step": 331660 }, { "epoch": 95.41714614499425, "grad_norm": 0.7823283076286316, "learning_rate": 9.165707710011508e-05, "loss": 0.2394, "step": 331670 }, { "epoch": 95.42002301495972, "grad_norm": 0.8084007501602173, "learning_rate": 9.159953970080552e-05, "loss": 0.236, "step": 331680 }, { "epoch": 95.4228998849252, "grad_norm": 1.568639874458313, "learning_rate": 9.154200230149597e-05, "loss": 0.3043, "step": 331690 }, { "epoch": 95.42577675489068, "grad_norm": 1.1688225269317627, "learning_rate": 9.148446490218643e-05, "loss": 0.2797, "step": 331700 }, { "epoch": 95.42865362485615, "grad_norm": 1.1132211685180664, "learning_rate": 9.142692750287687e-05, "loss": 0.2474, "step": 331710 }, { "epoch": 95.43153049482163, "grad_norm": 1.3175734281539917, "learning_rate": 9.136939010356731e-05, "loss": 0.2754, "step": 331720 }, { "epoch": 95.4344073647871, "grad_norm": 0.7207631468772888, "learning_rate": 9.131185270425777e-05, "loss": 0.2431, "step": 331730 }, { "epoch": 95.4372842347526, "grad_norm": 1.5146557092666626, "learning_rate": 9.125431530494822e-05, "loss": 0.2575, "step": 331740 }, { "epoch": 95.44016110471807, "grad_norm": 1.2380447387695312, "learning_rate": 9.119677790563867e-05, "loss": 0.2588, "step": 331750 }, { "epoch": 95.44303797468355, "grad_norm": 0.9223893880844116, "learning_rate": 9.113924050632912e-05, "loss": 0.2838, "step": 331760 }, { "epoch": 95.44591484464902, "grad_norm": 1.2737034559249878, "learning_rate": 9.108170310701956e-05, "loss": 0.2469, "step": 331770 }, { "epoch": 95.4487917146145, "grad_norm": 1.6016488075256348, "learning_rate": 9.102416570771e-05, "loss": 0.2658, "step": 331780 }, { "epoch": 95.45166858457998, "grad_norm": 0.730895459651947, "learning_rate": 9.096662830840046e-05, "loss": 0.2996, "step": 331790 }, { "epoch": 95.45454545454545, "grad_norm": 1.4711687564849854, "learning_rate": 9.090909090909092e-05, "loss": 0.2864, "step": 331800 }, { "epoch": 95.45742232451093, "grad_norm": 1.9690595865249634, "learning_rate": 9.085155350978136e-05, "loss": 0.2331, "step": 331810 }, { "epoch": 95.4602991944764, "grad_norm": 1.0808475017547607, "learning_rate": 9.07940161104718e-05, "loss": 0.2574, "step": 331820 }, { "epoch": 95.46317606444188, "grad_norm": 1.1841377019882202, "learning_rate": 9.073647871116226e-05, "loss": 0.2784, "step": 331830 }, { "epoch": 95.46605293440736, "grad_norm": 1.243900179862976, "learning_rate": 9.067894131185271e-05, "loss": 0.2749, "step": 331840 }, { "epoch": 95.46892980437285, "grad_norm": 1.1621804237365723, "learning_rate": 9.062140391254315e-05, "loss": 0.2189, "step": 331850 }, { "epoch": 95.47180667433832, "grad_norm": 1.2359707355499268, "learning_rate": 9.056386651323361e-05, "loss": 0.2662, "step": 331860 }, { "epoch": 95.4746835443038, "grad_norm": 1.0883811712265015, "learning_rate": 9.050632911392405e-05, "loss": 0.238, "step": 331870 }, { "epoch": 95.47756041426928, "grad_norm": 1.627380609512329, "learning_rate": 9.04487917146145e-05, "loss": 0.2831, "step": 331880 }, { "epoch": 95.48043728423475, "grad_norm": 0.8038659691810608, "learning_rate": 9.039125431530495e-05, "loss": 0.2455, "step": 331890 }, { "epoch": 95.48331415420023, "grad_norm": 1.3578834533691406, "learning_rate": 9.03337169159954e-05, "loss": 0.2313, "step": 331900 }, { "epoch": 95.4861910241657, "grad_norm": 0.7904465198516846, "learning_rate": 9.027617951668585e-05, "loss": 0.2557, "step": 331910 }, { "epoch": 95.48906789413118, "grad_norm": 1.3697954416275024, "learning_rate": 9.021864211737629e-05, "loss": 0.2381, "step": 331920 }, { "epoch": 95.49194476409666, "grad_norm": 1.3860307931900024, "learning_rate": 9.016110471806674e-05, "loss": 0.2983, "step": 331930 }, { "epoch": 95.49482163406213, "grad_norm": 0.6881849765777588, "learning_rate": 9.01035673187572e-05, "loss": 0.2317, "step": 331940 }, { "epoch": 95.49769850402762, "grad_norm": 1.8944118022918701, "learning_rate": 9.004602991944764e-05, "loss": 0.2722, "step": 331950 }, { "epoch": 95.5005753739931, "grad_norm": 0.7584204077720642, "learning_rate": 8.99884925201381e-05, "loss": 0.2704, "step": 331960 }, { "epoch": 95.50345224395858, "grad_norm": 1.541936993598938, "learning_rate": 8.993095512082854e-05, "loss": 0.2205, "step": 331970 }, { "epoch": 95.50632911392405, "grad_norm": 1.080428957939148, "learning_rate": 8.987341772151898e-05, "loss": 0.2928, "step": 331980 }, { "epoch": 95.50920598388953, "grad_norm": 1.7254644632339478, "learning_rate": 8.981588032220944e-05, "loss": 0.3164, "step": 331990 }, { "epoch": 95.512082853855, "grad_norm": 1.2915141582489014, "learning_rate": 8.975834292289989e-05, "loss": 0.227, "step": 332000 }, { "epoch": 95.51495972382048, "grad_norm": 0.5323435068130493, "learning_rate": 8.970080552359033e-05, "loss": 0.2724, "step": 332010 }, { "epoch": 95.51783659378596, "grad_norm": 0.6966019868850708, "learning_rate": 8.964326812428078e-05, "loss": 0.2379, "step": 332020 }, { "epoch": 95.52071346375143, "grad_norm": 1.5731827020645142, "learning_rate": 8.958573072497124e-05, "loss": 0.2721, "step": 332030 }, { "epoch": 95.52359033371691, "grad_norm": 0.9583075642585754, "learning_rate": 8.952819332566169e-05, "loss": 0.2181, "step": 332040 }, { "epoch": 95.52646720368239, "grad_norm": 1.176062822341919, "learning_rate": 8.947065592635213e-05, "loss": 0.2661, "step": 332050 }, { "epoch": 95.52934407364788, "grad_norm": 1.1174838542938232, "learning_rate": 8.941311852704258e-05, "loss": 0.2219, "step": 332060 }, { "epoch": 95.53222094361335, "grad_norm": 1.7150802612304688, "learning_rate": 8.935558112773303e-05, "loss": 0.2531, "step": 332070 }, { "epoch": 95.53509781357883, "grad_norm": 1.0190480947494507, "learning_rate": 8.929804372842348e-05, "loss": 0.2781, "step": 332080 }, { "epoch": 95.5379746835443, "grad_norm": 0.8969925045967102, "learning_rate": 8.924050632911392e-05, "loss": 0.2328, "step": 332090 }, { "epoch": 95.54085155350978, "grad_norm": 1.3325705528259277, "learning_rate": 8.918296892980438e-05, "loss": 0.2778, "step": 332100 }, { "epoch": 95.54372842347526, "grad_norm": 0.9255566000938416, "learning_rate": 8.912543153049482e-05, "loss": 0.2575, "step": 332110 }, { "epoch": 95.54660529344073, "grad_norm": 1.2077797651290894, "learning_rate": 8.906789413118526e-05, "loss": 0.2235, "step": 332120 }, { "epoch": 95.54948216340621, "grad_norm": 1.9253098964691162, "learning_rate": 8.901035673187573e-05, "loss": 0.2965, "step": 332130 }, { "epoch": 95.55235903337169, "grad_norm": 1.1386663913726807, "learning_rate": 8.895281933256617e-05, "loss": 0.2902, "step": 332140 }, { "epoch": 95.55523590333716, "grad_norm": 2.7429404258728027, "learning_rate": 8.889528193325662e-05, "loss": 0.3065, "step": 332150 }, { "epoch": 95.55811277330265, "grad_norm": 1.0928019285202026, "learning_rate": 8.883774453394707e-05, "loss": 0.2857, "step": 332160 }, { "epoch": 95.56098964326813, "grad_norm": 0.7997029423713684, "learning_rate": 8.878020713463751e-05, "loss": 0.2378, "step": 332170 }, { "epoch": 95.5638665132336, "grad_norm": 1.1820480823516846, "learning_rate": 8.872266973532797e-05, "loss": 0.2996, "step": 332180 }, { "epoch": 95.56674338319908, "grad_norm": 0.7506201863288879, "learning_rate": 8.866513233601841e-05, "loss": 0.232, "step": 332190 }, { "epoch": 95.56962025316456, "grad_norm": 0.9212546348571777, "learning_rate": 8.860759493670887e-05, "loss": 0.2782, "step": 332200 }, { "epoch": 95.57249712313003, "grad_norm": 0.8707376718521118, "learning_rate": 8.855005753739931e-05, "loss": 0.2461, "step": 332210 }, { "epoch": 95.57537399309551, "grad_norm": 0.5040639042854309, "learning_rate": 8.849252013808975e-05, "loss": 0.2769, "step": 332220 }, { "epoch": 95.57825086306099, "grad_norm": 1.6181074380874634, "learning_rate": 8.843498273878022e-05, "loss": 0.2645, "step": 332230 }, { "epoch": 95.58112773302646, "grad_norm": 0.797053337097168, "learning_rate": 8.837744533947066e-05, "loss": 0.2888, "step": 332240 }, { "epoch": 95.58400460299194, "grad_norm": 1.2574297189712524, "learning_rate": 8.83199079401611e-05, "loss": 0.2304, "step": 332250 }, { "epoch": 95.58688147295742, "grad_norm": 0.9529702067375183, "learning_rate": 8.826237054085156e-05, "loss": 0.2818, "step": 332260 }, { "epoch": 95.5897583429229, "grad_norm": 0.8943048119544983, "learning_rate": 8.8204833141542e-05, "loss": 0.2508, "step": 332270 }, { "epoch": 95.59263521288838, "grad_norm": 1.134364366531372, "learning_rate": 8.814729574223246e-05, "loss": 0.2166, "step": 332280 }, { "epoch": 95.59551208285386, "grad_norm": 1.9175666570663452, "learning_rate": 8.80897583429229e-05, "loss": 0.3086, "step": 332290 }, { "epoch": 95.59838895281933, "grad_norm": 0.788413941860199, "learning_rate": 8.803222094361335e-05, "loss": 0.2839, "step": 332300 }, { "epoch": 95.60126582278481, "grad_norm": 0.9739207625389099, "learning_rate": 8.79746835443038e-05, "loss": 0.3006, "step": 332310 }, { "epoch": 95.60414269275029, "grad_norm": 0.9358388781547546, "learning_rate": 8.791714614499424e-05, "loss": 0.2765, "step": 332320 }, { "epoch": 95.60701956271576, "grad_norm": 1.08974289894104, "learning_rate": 8.78596087456847e-05, "loss": 0.271, "step": 332330 }, { "epoch": 95.60989643268124, "grad_norm": 1.0446397066116333, "learning_rate": 8.780207134637515e-05, "loss": 0.3182, "step": 332340 }, { "epoch": 95.61277330264672, "grad_norm": 0.6833512783050537, "learning_rate": 8.774453394706559e-05, "loss": 0.2098, "step": 332350 }, { "epoch": 95.61565017261219, "grad_norm": 3.149966239929199, "learning_rate": 8.768699654775605e-05, "loss": 0.2234, "step": 332360 }, { "epoch": 95.61852704257768, "grad_norm": 0.9766119718551636, "learning_rate": 8.762945914844649e-05, "loss": 0.264, "step": 332370 }, { "epoch": 95.62140391254316, "grad_norm": 2.1778438091278076, "learning_rate": 8.757192174913694e-05, "loss": 0.2441, "step": 332380 }, { "epoch": 95.62428078250863, "grad_norm": 1.4478219747543335, "learning_rate": 8.751438434982739e-05, "loss": 0.2578, "step": 332390 }, { "epoch": 95.62715765247411, "grad_norm": 0.9547141194343567, "learning_rate": 8.745684695051784e-05, "loss": 0.2548, "step": 332400 }, { "epoch": 95.63003452243959, "grad_norm": 0.7286885976791382, "learning_rate": 8.739930955120828e-05, "loss": 0.2264, "step": 332410 }, { "epoch": 95.63291139240506, "grad_norm": 1.7082018852233887, "learning_rate": 8.734177215189872e-05, "loss": 0.2457, "step": 332420 }, { "epoch": 95.63578826237054, "grad_norm": 1.6459324359893799, "learning_rate": 8.72842347525892e-05, "loss": 0.2919, "step": 332430 }, { "epoch": 95.63866513233602, "grad_norm": 1.5498584508895874, "learning_rate": 8.722669735327964e-05, "loss": 0.3157, "step": 332440 }, { "epoch": 95.64154200230149, "grad_norm": 1.6076223850250244, "learning_rate": 8.716915995397008e-05, "loss": 0.3119, "step": 332450 }, { "epoch": 95.64441887226697, "grad_norm": 1.2769163846969604, "learning_rate": 8.711162255466053e-05, "loss": 0.2496, "step": 332460 }, { "epoch": 95.64729574223244, "grad_norm": 1.190901517868042, "learning_rate": 8.705408515535098e-05, "loss": 0.2796, "step": 332470 }, { "epoch": 95.65017261219793, "grad_norm": 1.1435275077819824, "learning_rate": 8.699654775604143e-05, "loss": 0.211, "step": 332480 }, { "epoch": 95.65304948216341, "grad_norm": 2.220489263534546, "learning_rate": 8.693901035673187e-05, "loss": 0.2568, "step": 332490 }, { "epoch": 95.65592635212889, "grad_norm": 1.2508128881454468, "learning_rate": 8.688147295742233e-05, "loss": 0.2497, "step": 332500 }, { "epoch": 95.65880322209436, "grad_norm": 0.8403205871582031, "learning_rate": 8.682393555811277e-05, "loss": 0.2394, "step": 332510 }, { "epoch": 95.66168009205984, "grad_norm": 1.3297730684280396, "learning_rate": 8.676639815880321e-05, "loss": 0.2152, "step": 332520 }, { "epoch": 95.66455696202532, "grad_norm": 1.235876441001892, "learning_rate": 8.670886075949368e-05, "loss": 0.2601, "step": 332530 }, { "epoch": 95.66743383199079, "grad_norm": 1.376758098602295, "learning_rate": 8.665132336018412e-05, "loss": 0.2604, "step": 332540 }, { "epoch": 95.67031070195627, "grad_norm": 1.746453046798706, "learning_rate": 8.659378596087456e-05, "loss": 0.2699, "step": 332550 }, { "epoch": 95.67318757192174, "grad_norm": 1.289286732673645, "learning_rate": 8.653624856156502e-05, "loss": 0.2935, "step": 332560 }, { "epoch": 95.67606444188722, "grad_norm": 0.8930835127830505, "learning_rate": 8.647871116225548e-05, "loss": 0.2656, "step": 332570 }, { "epoch": 95.67894131185271, "grad_norm": 1.3268662691116333, "learning_rate": 8.642117376294592e-05, "loss": 0.2714, "step": 332580 }, { "epoch": 95.68181818181819, "grad_norm": 1.170397162437439, "learning_rate": 8.636363636363636e-05, "loss": 0.2889, "step": 332590 }, { "epoch": 95.68469505178366, "grad_norm": 1.1344599723815918, "learning_rate": 8.630609896432682e-05, "loss": 0.3077, "step": 332600 }, { "epoch": 95.68757192174914, "grad_norm": 0.9007226228713989, "learning_rate": 8.624856156501726e-05, "loss": 0.316, "step": 332610 }, { "epoch": 95.69044879171462, "grad_norm": 0.9820928573608398, "learning_rate": 8.619102416570771e-05, "loss": 0.224, "step": 332620 }, { "epoch": 95.69332566168009, "grad_norm": 1.5470714569091797, "learning_rate": 8.613348676639817e-05, "loss": 0.2508, "step": 332630 }, { "epoch": 95.69620253164557, "grad_norm": 0.7597443461418152, "learning_rate": 8.607594936708861e-05, "loss": 0.2549, "step": 332640 }, { "epoch": 95.69907940161104, "grad_norm": 1.778246521949768, "learning_rate": 8.601841196777905e-05, "loss": 0.2529, "step": 332650 }, { "epoch": 95.70195627157652, "grad_norm": 1.6980788707733154, "learning_rate": 8.596087456846951e-05, "loss": 0.2847, "step": 332660 }, { "epoch": 95.704833141542, "grad_norm": 0.5989035367965698, "learning_rate": 8.590333716915996e-05, "loss": 0.1949, "step": 332670 }, { "epoch": 95.70771001150747, "grad_norm": 1.2385090589523315, "learning_rate": 8.58457997698504e-05, "loss": 0.2671, "step": 332680 }, { "epoch": 95.71058688147296, "grad_norm": 0.9008600115776062, "learning_rate": 8.578826237054085e-05, "loss": 0.2586, "step": 332690 }, { "epoch": 95.71346375143844, "grad_norm": 0.8823531866073608, "learning_rate": 8.57307249712313e-05, "loss": 0.2256, "step": 332700 }, { "epoch": 95.71634062140392, "grad_norm": 1.5773380994796753, "learning_rate": 8.567318757192174e-05, "loss": 0.2524, "step": 332710 }, { "epoch": 95.71921749136939, "grad_norm": 1.3763052225112915, "learning_rate": 8.56156501726122e-05, "loss": 0.2748, "step": 332720 }, { "epoch": 95.72209436133487, "grad_norm": 1.954156517982483, "learning_rate": 8.555811277330266e-05, "loss": 0.3152, "step": 332730 }, { "epoch": 95.72497123130034, "grad_norm": 0.8265191912651062, "learning_rate": 8.55005753739931e-05, "loss": 0.2357, "step": 332740 }, { "epoch": 95.72784810126582, "grad_norm": 0.7193633913993835, "learning_rate": 8.544303797468354e-05, "loss": 0.2364, "step": 332750 }, { "epoch": 95.7307249712313, "grad_norm": 1.087774395942688, "learning_rate": 8.5385500575374e-05, "loss": 0.2756, "step": 332760 }, { "epoch": 95.73360184119677, "grad_norm": 0.9086180329322815, "learning_rate": 8.532796317606445e-05, "loss": 0.2596, "step": 332770 }, { "epoch": 95.73647871116225, "grad_norm": 0.9817894101142883, "learning_rate": 8.527042577675489e-05, "loss": 0.254, "step": 332780 }, { "epoch": 95.73935558112774, "grad_norm": 1.5675976276397705, "learning_rate": 8.521288837744535e-05, "loss": 0.3403, "step": 332790 }, { "epoch": 95.74223245109322, "grad_norm": 1.1118805408477783, "learning_rate": 8.515535097813579e-05, "loss": 0.2428, "step": 332800 }, { "epoch": 95.74510932105869, "grad_norm": 1.1861063241958618, "learning_rate": 8.509781357882623e-05, "loss": 0.2443, "step": 332810 }, { "epoch": 95.74798619102417, "grad_norm": 1.0928815603256226, "learning_rate": 8.504027617951669e-05, "loss": 0.2256, "step": 332820 }, { "epoch": 95.75086306098964, "grad_norm": 0.7462756633758545, "learning_rate": 8.498273878020714e-05, "loss": 0.2822, "step": 332830 }, { "epoch": 95.75373993095512, "grad_norm": 1.1440165042877197, "learning_rate": 8.492520138089758e-05, "loss": 0.2497, "step": 332840 }, { "epoch": 95.7566168009206, "grad_norm": 1.4342429637908936, "learning_rate": 8.486766398158803e-05, "loss": 0.32, "step": 332850 }, { "epoch": 95.75949367088607, "grad_norm": 0.8526073098182678, "learning_rate": 8.481012658227848e-05, "loss": 0.263, "step": 332860 }, { "epoch": 95.76237054085155, "grad_norm": 1.360019326210022, "learning_rate": 8.475258918296894e-05, "loss": 0.3015, "step": 332870 }, { "epoch": 95.76524741081703, "grad_norm": 1.2020161151885986, "learning_rate": 8.469505178365938e-05, "loss": 0.2405, "step": 332880 }, { "epoch": 95.7681242807825, "grad_norm": 1.632976770401001, "learning_rate": 8.463751438434984e-05, "loss": 0.2525, "step": 332890 }, { "epoch": 95.77100115074799, "grad_norm": 1.3766467571258545, "learning_rate": 8.457997698504028e-05, "loss": 0.2178, "step": 332900 }, { "epoch": 95.77387802071347, "grad_norm": 0.7650837302207947, "learning_rate": 8.452243958573072e-05, "loss": 0.2389, "step": 332910 }, { "epoch": 95.77675489067894, "grad_norm": 1.5300450325012207, "learning_rate": 8.446490218642117e-05, "loss": 0.2405, "step": 332920 }, { "epoch": 95.77963176064442, "grad_norm": 1.2137670516967773, "learning_rate": 8.440736478711163e-05, "loss": 0.2177, "step": 332930 }, { "epoch": 95.7825086306099, "grad_norm": 1.290716290473938, "learning_rate": 8.434982738780207e-05, "loss": 0.2479, "step": 332940 }, { "epoch": 95.78538550057537, "grad_norm": 1.228898286819458, "learning_rate": 8.429228998849251e-05, "loss": 0.2817, "step": 332950 }, { "epoch": 95.78826237054085, "grad_norm": 1.5904394388198853, "learning_rate": 8.423475258918297e-05, "loss": 0.2816, "step": 332960 }, { "epoch": 95.79113924050633, "grad_norm": 1.3367900848388672, "learning_rate": 8.417721518987342e-05, "loss": 0.2873, "step": 332970 }, { "epoch": 95.7940161104718, "grad_norm": 1.1893967390060425, "learning_rate": 8.411967779056387e-05, "loss": 0.2772, "step": 332980 }, { "epoch": 95.79689298043728, "grad_norm": 0.856354296207428, "learning_rate": 8.406214039125432e-05, "loss": 0.2339, "step": 332990 }, { "epoch": 95.79976985040277, "grad_norm": 1.4735227823257446, "learning_rate": 8.400460299194476e-05, "loss": 0.2611, "step": 333000 }, { "epoch": 95.80264672036824, "grad_norm": 0.9076979756355286, "learning_rate": 8.39470655926352e-05, "loss": 0.2597, "step": 333010 }, { "epoch": 95.80552359033372, "grad_norm": 0.8988927006721497, "learning_rate": 8.388952819332566e-05, "loss": 0.2391, "step": 333020 }, { "epoch": 95.8084004602992, "grad_norm": 1.1856932640075684, "learning_rate": 8.383199079401612e-05, "loss": 0.2456, "step": 333030 }, { "epoch": 95.81127733026467, "grad_norm": 0.8708293437957764, "learning_rate": 8.377445339470656e-05, "loss": 0.3455, "step": 333040 }, { "epoch": 95.81415420023015, "grad_norm": 1.067975640296936, "learning_rate": 8.3716915995397e-05, "loss": 0.298, "step": 333050 }, { "epoch": 95.81703107019563, "grad_norm": 0.8188386559486389, "learning_rate": 8.365937859608747e-05, "loss": 0.2605, "step": 333060 }, { "epoch": 95.8199079401611, "grad_norm": 1.5965633392333984, "learning_rate": 8.360184119677791e-05, "loss": 0.3726, "step": 333070 }, { "epoch": 95.82278481012658, "grad_norm": 0.8765289783477783, "learning_rate": 8.354430379746835e-05, "loss": 0.2352, "step": 333080 }, { "epoch": 95.82566168009205, "grad_norm": 1.2065647840499878, "learning_rate": 8.348676639815881e-05, "loss": 0.2882, "step": 333090 }, { "epoch": 95.82853855005754, "grad_norm": 1.2399126291275024, "learning_rate": 8.342922899884925e-05, "loss": 0.3133, "step": 333100 }, { "epoch": 95.83141542002302, "grad_norm": 1.0156103372573853, "learning_rate": 8.337169159953971e-05, "loss": 0.2885, "step": 333110 }, { "epoch": 95.8342922899885, "grad_norm": 1.7181566953659058, "learning_rate": 8.331415420023015e-05, "loss": 0.306, "step": 333120 }, { "epoch": 95.83716915995397, "grad_norm": 0.7784190773963928, "learning_rate": 8.32566168009206e-05, "loss": 0.2884, "step": 333130 }, { "epoch": 95.84004602991945, "grad_norm": 0.9695363640785217, "learning_rate": 8.319907940161105e-05, "loss": 0.2322, "step": 333140 }, { "epoch": 95.84292289988493, "grad_norm": 0.8289562463760376, "learning_rate": 8.314154200230149e-05, "loss": 0.2532, "step": 333150 }, { "epoch": 95.8457997698504, "grad_norm": 0.824026346206665, "learning_rate": 8.308400460299196e-05, "loss": 0.2186, "step": 333160 }, { "epoch": 95.84867663981588, "grad_norm": 1.4515464305877686, "learning_rate": 8.30264672036824e-05, "loss": 0.3303, "step": 333170 }, { "epoch": 95.85155350978135, "grad_norm": 0.6724424362182617, "learning_rate": 8.296892980437284e-05, "loss": 0.2281, "step": 333180 }, { "epoch": 95.85443037974683, "grad_norm": 0.93742436170578, "learning_rate": 8.29113924050633e-05, "loss": 0.2589, "step": 333190 }, { "epoch": 95.8573072497123, "grad_norm": 1.1932076215744019, "learning_rate": 8.285385500575374e-05, "loss": 0.3054, "step": 333200 }, { "epoch": 95.8601841196778, "grad_norm": 1.1952688694000244, "learning_rate": 8.27963176064442e-05, "loss": 0.2196, "step": 333210 }, { "epoch": 95.86306098964327, "grad_norm": 1.1104614734649658, "learning_rate": 8.273878020713464e-05, "loss": 0.352, "step": 333220 }, { "epoch": 95.86593785960875, "grad_norm": 1.5664132833480835, "learning_rate": 8.268124280782509e-05, "loss": 0.2767, "step": 333230 }, { "epoch": 95.86881472957423, "grad_norm": 0.7393918633460999, "learning_rate": 8.262370540851553e-05, "loss": 0.2909, "step": 333240 }, { "epoch": 95.8716915995397, "grad_norm": 1.108046531677246, "learning_rate": 8.256616800920598e-05, "loss": 0.2693, "step": 333250 }, { "epoch": 95.87456846950518, "grad_norm": 1.451006293296814, "learning_rate": 8.250863060989644e-05, "loss": 0.2945, "step": 333260 }, { "epoch": 95.87744533947065, "grad_norm": 2.022867441177368, "learning_rate": 8.245109321058689e-05, "loss": 0.3135, "step": 333270 }, { "epoch": 95.88032220943613, "grad_norm": 1.7134150266647339, "learning_rate": 8.239355581127733e-05, "loss": 0.2682, "step": 333280 }, { "epoch": 95.8831990794016, "grad_norm": 1.3645358085632324, "learning_rate": 8.233601841196778e-05, "loss": 0.2977, "step": 333290 }, { "epoch": 95.88607594936708, "grad_norm": 1.051325798034668, "learning_rate": 8.227848101265823e-05, "loss": 0.3384, "step": 333300 }, { "epoch": 95.88895281933257, "grad_norm": 1.520963430404663, "learning_rate": 8.222094361334868e-05, "loss": 0.2713, "step": 333310 }, { "epoch": 95.89182968929805, "grad_norm": 0.9805607199668884, "learning_rate": 8.216340621403912e-05, "loss": 0.217, "step": 333320 }, { "epoch": 95.89470655926353, "grad_norm": 1.013278603553772, "learning_rate": 8.210586881472958e-05, "loss": 0.2237, "step": 333330 }, { "epoch": 95.897583429229, "grad_norm": 0.7389155626296997, "learning_rate": 8.204833141542002e-05, "loss": 0.2679, "step": 333340 }, { "epoch": 95.90046029919448, "grad_norm": 1.549048900604248, "learning_rate": 8.199079401611046e-05, "loss": 0.2553, "step": 333350 }, { "epoch": 95.90333716915995, "grad_norm": 1.111088752746582, "learning_rate": 8.193325661680093e-05, "loss": 0.262, "step": 333360 }, { "epoch": 95.90621403912543, "grad_norm": 1.3504754304885864, "learning_rate": 8.187571921749137e-05, "loss": 0.32, "step": 333370 }, { "epoch": 95.9090909090909, "grad_norm": 2.015796661376953, "learning_rate": 8.181818181818182e-05, "loss": 0.3857, "step": 333380 }, { "epoch": 95.91196777905638, "grad_norm": 0.9702317118644714, "learning_rate": 8.176064441887227e-05, "loss": 0.3072, "step": 333390 }, { "epoch": 95.91484464902186, "grad_norm": 1.3083078861236572, "learning_rate": 8.170310701956271e-05, "loss": 0.2719, "step": 333400 }, { "epoch": 95.91772151898734, "grad_norm": 0.698885977268219, "learning_rate": 8.164556962025317e-05, "loss": 0.3379, "step": 333410 }, { "epoch": 95.92059838895283, "grad_norm": 0.8668916821479797, "learning_rate": 8.158803222094361e-05, "loss": 0.3205, "step": 333420 }, { "epoch": 95.9234752589183, "grad_norm": 1.7899311780929565, "learning_rate": 8.153049482163407e-05, "loss": 0.2566, "step": 333430 }, { "epoch": 95.92635212888378, "grad_norm": 0.5983623266220093, "learning_rate": 8.147295742232451e-05, "loss": 0.2278, "step": 333440 }, { "epoch": 95.92922899884925, "grad_norm": 1.1436430215835571, "learning_rate": 8.141542002301495e-05, "loss": 0.3012, "step": 333450 }, { "epoch": 95.93210586881473, "grad_norm": 1.107153296470642, "learning_rate": 8.135788262370542e-05, "loss": 0.3406, "step": 333460 }, { "epoch": 95.9349827387802, "grad_norm": 0.883012592792511, "learning_rate": 8.130034522439586e-05, "loss": 0.2702, "step": 333470 }, { "epoch": 95.93785960874568, "grad_norm": 1.1239346265792847, "learning_rate": 8.12428078250863e-05, "loss": 0.3796, "step": 333480 }, { "epoch": 95.94073647871116, "grad_norm": 1.501118779182434, "learning_rate": 8.118527042577676e-05, "loss": 0.2386, "step": 333490 }, { "epoch": 95.94361334867664, "grad_norm": 1.8987210988998413, "learning_rate": 8.11277330264672e-05, "loss": 0.2666, "step": 333500 }, { "epoch": 95.94649021864211, "grad_norm": 1.2624633312225342, "learning_rate": 8.107019562715766e-05, "loss": 0.2359, "step": 333510 }, { "epoch": 95.9493670886076, "grad_norm": 0.8329757452011108, "learning_rate": 8.10126582278481e-05, "loss": 0.2601, "step": 333520 }, { "epoch": 95.95224395857308, "grad_norm": 0.7155967950820923, "learning_rate": 8.095512082853855e-05, "loss": 0.2411, "step": 333530 }, { "epoch": 95.95512082853855, "grad_norm": 1.3684600591659546, "learning_rate": 8.0897583429229e-05, "loss": 0.2334, "step": 333540 }, { "epoch": 95.95799769850403, "grad_norm": 1.4421442747116089, "learning_rate": 8.084004602991945e-05, "loss": 0.3222, "step": 333550 }, { "epoch": 95.9608745684695, "grad_norm": 0.6683627963066101, "learning_rate": 8.07825086306099e-05, "loss": 0.2291, "step": 333560 }, { "epoch": 95.96375143843498, "grad_norm": 0.6745085716247559, "learning_rate": 8.072497123130035e-05, "loss": 0.2742, "step": 333570 }, { "epoch": 95.96662830840046, "grad_norm": 1.9628101587295532, "learning_rate": 8.066743383199079e-05, "loss": 0.3015, "step": 333580 }, { "epoch": 95.96950517836594, "grad_norm": 1.8434542417526245, "learning_rate": 8.060989643268125e-05, "loss": 0.3328, "step": 333590 }, { "epoch": 95.97238204833141, "grad_norm": 1.0923280715942383, "learning_rate": 8.05523590333717e-05, "loss": 0.2278, "step": 333600 }, { "epoch": 95.97525891829689, "grad_norm": 0.9843027591705322, "learning_rate": 8.049482163406214e-05, "loss": 0.2552, "step": 333610 }, { "epoch": 95.97813578826236, "grad_norm": 0.7959150671958923, "learning_rate": 8.043728423475259e-05, "loss": 0.2656, "step": 333620 }, { "epoch": 95.98101265822785, "grad_norm": 0.7823224663734436, "learning_rate": 8.037974683544304e-05, "loss": 0.2065, "step": 333630 }, { "epoch": 95.98388952819333, "grad_norm": 1.3620120286941528, "learning_rate": 8.032220943613348e-05, "loss": 0.2581, "step": 333640 }, { "epoch": 95.9867663981588, "grad_norm": 1.0917104482650757, "learning_rate": 8.026467203682394e-05, "loss": 0.3079, "step": 333650 }, { "epoch": 95.98964326812428, "grad_norm": 0.8557631969451904, "learning_rate": 8.02071346375144e-05, "loss": 0.2289, "step": 333660 }, { "epoch": 95.99252013808976, "grad_norm": 1.1724951267242432, "learning_rate": 8.014959723820484e-05, "loss": 0.2944, "step": 333670 }, { "epoch": 95.99539700805524, "grad_norm": 1.10608971118927, "learning_rate": 8.009205983889528e-05, "loss": 0.2759, "step": 333680 }, { "epoch": 95.99827387802071, "grad_norm": 1.3110822439193726, "learning_rate": 8.003452243958573e-05, "loss": 0.2727, "step": 333690 }, { "epoch": 96.00115074798619, "grad_norm": 0.8748716115951538, "learning_rate": 7.997698504027619e-05, "loss": 0.2408, "step": 333700 }, { "epoch": 96.00402761795166, "grad_norm": 0.737577497959137, "learning_rate": 7.991944764096663e-05, "loss": 0.2425, "step": 333710 }, { "epoch": 96.00690448791714, "grad_norm": 1.1563303470611572, "learning_rate": 7.986191024165707e-05, "loss": 0.2835, "step": 333720 }, { "epoch": 96.00978135788263, "grad_norm": 1.2760920524597168, "learning_rate": 7.980437284234753e-05, "loss": 0.2553, "step": 333730 }, { "epoch": 96.0126582278481, "grad_norm": 0.7506577968597412, "learning_rate": 7.974683544303797e-05, "loss": 0.2588, "step": 333740 }, { "epoch": 96.01553509781358, "grad_norm": 0.704261302947998, "learning_rate": 7.968929804372843e-05, "loss": 0.2711, "step": 333750 }, { "epoch": 96.01841196777906, "grad_norm": 0.7251352667808533, "learning_rate": 7.963176064441888e-05, "loss": 0.2491, "step": 333760 }, { "epoch": 96.02128883774454, "grad_norm": 0.807226300239563, "learning_rate": 7.957422324510932e-05, "loss": 0.2528, "step": 333770 }, { "epoch": 96.02416570771001, "grad_norm": 0.8941640257835388, "learning_rate": 7.951668584579977e-05, "loss": 0.2172, "step": 333780 }, { "epoch": 96.02704257767549, "grad_norm": 0.5924692153930664, "learning_rate": 7.945914844649022e-05, "loss": 0.2511, "step": 333790 }, { "epoch": 96.02991944764096, "grad_norm": 1.0089696645736694, "learning_rate": 7.940161104718068e-05, "loss": 0.2297, "step": 333800 }, { "epoch": 96.03279631760644, "grad_norm": 1.292108178138733, "learning_rate": 7.934407364787112e-05, "loss": 0.3145, "step": 333810 }, { "epoch": 96.03567318757192, "grad_norm": 0.9107731580734253, "learning_rate": 7.928653624856156e-05, "loss": 0.274, "step": 333820 }, { "epoch": 96.03855005753739, "grad_norm": 0.7905691266059875, "learning_rate": 7.922899884925202e-05, "loss": 0.2243, "step": 333830 }, { "epoch": 96.04142692750288, "grad_norm": 0.7799434661865234, "learning_rate": 7.917146144994246e-05, "loss": 0.2371, "step": 333840 }, { "epoch": 96.04430379746836, "grad_norm": 1.2932335138320923, "learning_rate": 7.911392405063291e-05, "loss": 0.2934, "step": 333850 }, { "epoch": 96.04718066743384, "grad_norm": 1.5306991338729858, "learning_rate": 7.905638665132337e-05, "loss": 0.2524, "step": 333860 }, { "epoch": 96.05005753739931, "grad_norm": 0.6391482949256897, "learning_rate": 7.899884925201381e-05, "loss": 0.2435, "step": 333870 }, { "epoch": 96.05293440736479, "grad_norm": 0.9547448754310608, "learning_rate": 7.894131185270425e-05, "loss": 0.3009, "step": 333880 }, { "epoch": 96.05581127733026, "grad_norm": 1.6227189302444458, "learning_rate": 7.888377445339471e-05, "loss": 0.2141, "step": 333890 }, { "epoch": 96.05868814729574, "grad_norm": 1.3102971315383911, "learning_rate": 7.882623705408516e-05, "loss": 0.2759, "step": 333900 }, { "epoch": 96.06156501726122, "grad_norm": 0.8873953223228455, "learning_rate": 7.87686996547756e-05, "loss": 0.2086, "step": 333910 }, { "epoch": 96.06444188722669, "grad_norm": 0.8349300026893616, "learning_rate": 7.871116225546605e-05, "loss": 0.3118, "step": 333920 }, { "epoch": 96.06731875719217, "grad_norm": 1.0530399084091187, "learning_rate": 7.86536248561565e-05, "loss": 0.2662, "step": 333930 }, { "epoch": 96.07019562715766, "grad_norm": 0.7772189378738403, "learning_rate": 7.859608745684694e-05, "loss": 0.2142, "step": 333940 }, { "epoch": 96.07307249712314, "grad_norm": 0.9791553020477295, "learning_rate": 7.85385500575374e-05, "loss": 0.2461, "step": 333950 }, { "epoch": 96.07594936708861, "grad_norm": 1.3783926963806152, "learning_rate": 7.848101265822786e-05, "loss": 0.2645, "step": 333960 }, { "epoch": 96.07882623705409, "grad_norm": 1.2956558465957642, "learning_rate": 7.84234752589183e-05, "loss": 0.3371, "step": 333970 }, { "epoch": 96.08170310701956, "grad_norm": 0.5145241618156433, "learning_rate": 7.836593785960874e-05, "loss": 0.2336, "step": 333980 }, { "epoch": 96.08457997698504, "grad_norm": 1.732306718826294, "learning_rate": 7.83084004602992e-05, "loss": 0.2387, "step": 333990 }, { "epoch": 96.08745684695052, "grad_norm": 1.8910590410232544, "learning_rate": 7.825086306098965e-05, "loss": 0.268, "step": 334000 }, { "epoch": 96.09033371691599, "grad_norm": 1.974069595336914, "learning_rate": 7.819332566168009e-05, "loss": 0.2164, "step": 334010 }, { "epoch": 96.09321058688147, "grad_norm": 1.5342533588409424, "learning_rate": 7.813578826237055e-05, "loss": 0.2784, "step": 334020 }, { "epoch": 96.09608745684694, "grad_norm": 1.1686500310897827, "learning_rate": 7.807825086306099e-05, "loss": 0.2522, "step": 334030 }, { "epoch": 96.09896432681242, "grad_norm": 1.2973712682724, "learning_rate": 7.802071346375143e-05, "loss": 0.2062, "step": 334040 }, { "epoch": 96.10184119677791, "grad_norm": 1.8409098386764526, "learning_rate": 7.796317606444189e-05, "loss": 0.2436, "step": 334050 }, { "epoch": 96.10471806674339, "grad_norm": 0.9832778573036194, "learning_rate": 7.790563866513234e-05, "loss": 0.2648, "step": 334060 }, { "epoch": 96.10759493670886, "grad_norm": 1.390621542930603, "learning_rate": 7.784810126582278e-05, "loss": 0.2618, "step": 334070 }, { "epoch": 96.11047180667434, "grad_norm": 0.6644284129142761, "learning_rate": 7.779056386651323e-05, "loss": 0.2049, "step": 334080 }, { "epoch": 96.11334867663982, "grad_norm": 0.6111105680465698, "learning_rate": 7.77330264672037e-05, "loss": 0.2172, "step": 334090 }, { "epoch": 96.11622554660529, "grad_norm": 0.9821423888206482, "learning_rate": 7.767548906789414e-05, "loss": 0.3345, "step": 334100 }, { "epoch": 96.11910241657077, "grad_norm": 1.662401795387268, "learning_rate": 7.761795166858458e-05, "loss": 0.2862, "step": 334110 }, { "epoch": 96.12197928653625, "grad_norm": 1.1966054439544678, "learning_rate": 7.756041426927504e-05, "loss": 0.233, "step": 334120 }, { "epoch": 96.12485615650172, "grad_norm": 1.5044915676116943, "learning_rate": 7.750287686996548e-05, "loss": 0.2461, "step": 334130 }, { "epoch": 96.1277330264672, "grad_norm": 1.4664689302444458, "learning_rate": 7.744533947065593e-05, "loss": 0.2423, "step": 334140 }, { "epoch": 96.13060989643269, "grad_norm": 1.7154980897903442, "learning_rate": 7.738780207134637e-05, "loss": 0.2219, "step": 334150 }, { "epoch": 96.13348676639816, "grad_norm": 1.5725057125091553, "learning_rate": 7.733026467203683e-05, "loss": 0.2303, "step": 334160 }, { "epoch": 96.13636363636364, "grad_norm": 0.8953549861907959, "learning_rate": 7.727272727272727e-05, "loss": 0.2567, "step": 334170 }, { "epoch": 96.13924050632912, "grad_norm": 1.0447402000427246, "learning_rate": 7.721518987341771e-05, "loss": 0.2986, "step": 334180 }, { "epoch": 96.14211737629459, "grad_norm": 0.5197002291679382, "learning_rate": 7.715765247410818e-05, "loss": 0.2469, "step": 334190 }, { "epoch": 96.14499424626007, "grad_norm": 0.9189786314964294, "learning_rate": 7.710011507479863e-05, "loss": 0.3714, "step": 334200 }, { "epoch": 96.14787111622555, "grad_norm": 1.518485426902771, "learning_rate": 7.704257767548907e-05, "loss": 0.2433, "step": 334210 }, { "epoch": 96.15074798619102, "grad_norm": 0.8658158779144287, "learning_rate": 7.698504027617952e-05, "loss": 0.244, "step": 334220 }, { "epoch": 96.1536248561565, "grad_norm": 1.952839970588684, "learning_rate": 7.692750287686996e-05, "loss": 0.2887, "step": 334230 }, { "epoch": 96.15650172612197, "grad_norm": 0.6921830177307129, "learning_rate": 7.686996547756042e-05, "loss": 0.2681, "step": 334240 }, { "epoch": 96.15937859608745, "grad_norm": 0.9722203016281128, "learning_rate": 7.681242807825086e-05, "loss": 0.2787, "step": 334250 }, { "epoch": 96.16225546605294, "grad_norm": 0.7234296798706055, "learning_rate": 7.675489067894132e-05, "loss": 0.3416, "step": 334260 }, { "epoch": 96.16513233601842, "grad_norm": 0.9462065100669861, "learning_rate": 7.669735327963176e-05, "loss": 0.215, "step": 334270 }, { "epoch": 96.16800920598389, "grad_norm": 1.3242663145065308, "learning_rate": 7.66398158803222e-05, "loss": 0.2437, "step": 334280 }, { "epoch": 96.17088607594937, "grad_norm": 0.690697431564331, "learning_rate": 7.658227848101267e-05, "loss": 0.2172, "step": 334290 }, { "epoch": 96.17376294591485, "grad_norm": 1.3338931798934937, "learning_rate": 7.652474108170311e-05, "loss": 0.2605, "step": 334300 }, { "epoch": 96.17663981588032, "grad_norm": 1.002814769744873, "learning_rate": 7.646720368239355e-05, "loss": 0.2409, "step": 334310 }, { "epoch": 96.1795166858458, "grad_norm": 1.1617799997329712, "learning_rate": 7.640966628308401e-05, "loss": 0.2951, "step": 334320 }, { "epoch": 96.18239355581127, "grad_norm": 1.0358363389968872, "learning_rate": 7.635212888377445e-05, "loss": 0.294, "step": 334330 }, { "epoch": 96.18527042577675, "grad_norm": 0.6305177211761475, "learning_rate": 7.629459148446491e-05, "loss": 0.2835, "step": 334340 }, { "epoch": 96.18814729574223, "grad_norm": 0.8765042424201965, "learning_rate": 7.623705408515535e-05, "loss": 0.2578, "step": 334350 }, { "epoch": 96.19102416570772, "grad_norm": 0.9614129066467285, "learning_rate": 7.61795166858458e-05, "loss": 0.2653, "step": 334360 }, { "epoch": 96.19390103567319, "grad_norm": 1.2768096923828125, "learning_rate": 7.612197928653625e-05, "loss": 0.2738, "step": 334370 }, { "epoch": 96.19677790563867, "grad_norm": 0.8640223741531372, "learning_rate": 7.606444188722669e-05, "loss": 0.2189, "step": 334380 }, { "epoch": 96.19965477560415, "grad_norm": 1.1904021501541138, "learning_rate": 7.600690448791716e-05, "loss": 0.2601, "step": 334390 }, { "epoch": 96.20253164556962, "grad_norm": 1.6412633657455444, "learning_rate": 7.59493670886076e-05, "loss": 0.3183, "step": 334400 }, { "epoch": 96.2054085155351, "grad_norm": 1.701230764389038, "learning_rate": 7.589182968929804e-05, "loss": 0.3095, "step": 334410 }, { "epoch": 96.20828538550057, "grad_norm": 1.5014026165008545, "learning_rate": 7.58342922899885e-05, "loss": 0.2265, "step": 334420 }, { "epoch": 96.21116225546605, "grad_norm": 1.0040264129638672, "learning_rate": 7.577675489067894e-05, "loss": 0.2731, "step": 334430 }, { "epoch": 96.21403912543153, "grad_norm": 0.9959470629692078, "learning_rate": 7.57192174913694e-05, "loss": 0.2505, "step": 334440 }, { "epoch": 96.216915995397, "grad_norm": 1.6439279317855835, "learning_rate": 7.566168009205984e-05, "loss": 0.2942, "step": 334450 }, { "epoch": 96.21979286536248, "grad_norm": 0.6982399225234985, "learning_rate": 7.560414269275029e-05, "loss": 0.2184, "step": 334460 }, { "epoch": 96.22266973532797, "grad_norm": 1.2874079942703247, "learning_rate": 7.554660529344073e-05, "loss": 0.2276, "step": 334470 }, { "epoch": 96.22554660529345, "grad_norm": 0.8767635822296143, "learning_rate": 7.548906789413118e-05, "loss": 0.2822, "step": 334480 }, { "epoch": 96.22842347525892, "grad_norm": 1.4375331401824951, "learning_rate": 7.543153049482164e-05, "loss": 0.2657, "step": 334490 }, { "epoch": 96.2313003452244, "grad_norm": 1.1611112356185913, "learning_rate": 7.537399309551209e-05, "loss": 0.2444, "step": 334500 }, { "epoch": 96.23417721518987, "grad_norm": 0.8259395956993103, "learning_rate": 7.531645569620253e-05, "loss": 0.2488, "step": 334510 }, { "epoch": 96.23705408515535, "grad_norm": 1.640183925628662, "learning_rate": 7.525891829689298e-05, "loss": 0.2263, "step": 334520 }, { "epoch": 96.23993095512083, "grad_norm": 1.4547806978225708, "learning_rate": 7.520138089758343e-05, "loss": 0.2172, "step": 334530 }, { "epoch": 96.2428078250863, "grad_norm": 1.4628463983535767, "learning_rate": 7.514384349827388e-05, "loss": 0.3149, "step": 334540 }, { "epoch": 96.24568469505178, "grad_norm": 0.6085467338562012, "learning_rate": 7.508630609896432e-05, "loss": 0.2457, "step": 334550 }, { "epoch": 96.24856156501725, "grad_norm": 0.9764499068260193, "learning_rate": 7.502876869965478e-05, "loss": 0.2744, "step": 334560 }, { "epoch": 96.25143843498275, "grad_norm": 1.154141902923584, "learning_rate": 7.497123130034522e-05, "loss": 0.2839, "step": 334570 }, { "epoch": 96.25431530494822, "grad_norm": 1.3110188245773315, "learning_rate": 7.491369390103568e-05, "loss": 0.2256, "step": 334580 }, { "epoch": 96.2571921749137, "grad_norm": 0.9551037549972534, "learning_rate": 7.485615650172613e-05, "loss": 0.2944, "step": 334590 }, { "epoch": 96.26006904487917, "grad_norm": 0.7319582104682922, "learning_rate": 7.479861910241657e-05, "loss": 0.2569, "step": 334600 }, { "epoch": 96.26294591484465, "grad_norm": 1.32699716091156, "learning_rate": 7.474108170310702e-05, "loss": 0.2515, "step": 334610 }, { "epoch": 96.26582278481013, "grad_norm": 0.9638704061508179, "learning_rate": 7.468354430379747e-05, "loss": 0.2586, "step": 334620 }, { "epoch": 96.2686996547756, "grad_norm": 1.2412605285644531, "learning_rate": 7.462600690448793e-05, "loss": 0.2411, "step": 334630 }, { "epoch": 96.27157652474108, "grad_norm": 1.034226655960083, "learning_rate": 7.456846950517837e-05, "loss": 0.3006, "step": 334640 }, { "epoch": 96.27445339470655, "grad_norm": 0.9509585499763489, "learning_rate": 7.451093210586881e-05, "loss": 0.22, "step": 334650 }, { "epoch": 96.27733026467203, "grad_norm": 0.9459145069122314, "learning_rate": 7.445339470655927e-05, "loss": 0.2453, "step": 334660 }, { "epoch": 96.28020713463752, "grad_norm": 0.8308127522468567, "learning_rate": 7.439585730724971e-05, "loss": 0.2153, "step": 334670 }, { "epoch": 96.283084004603, "grad_norm": 0.9409906268119812, "learning_rate": 7.433831990794016e-05, "loss": 0.2777, "step": 334680 }, { "epoch": 96.28596087456847, "grad_norm": 0.840569794178009, "learning_rate": 7.428078250863062e-05, "loss": 0.179, "step": 334690 }, { "epoch": 96.28883774453395, "grad_norm": 0.7617945671081543, "learning_rate": 7.422324510932106e-05, "loss": 0.2575, "step": 334700 }, { "epoch": 96.29171461449943, "grad_norm": 0.7506580948829651, "learning_rate": 7.41657077100115e-05, "loss": 0.2042, "step": 334710 }, { "epoch": 96.2945914844649, "grad_norm": 1.2015713453292847, "learning_rate": 7.410817031070196e-05, "loss": 0.2398, "step": 334720 }, { "epoch": 96.29746835443038, "grad_norm": 0.8233402967453003, "learning_rate": 7.405063291139241e-05, "loss": 0.2243, "step": 334730 }, { "epoch": 96.30034522439585, "grad_norm": 1.180517554283142, "learning_rate": 7.399309551208286e-05, "loss": 0.2931, "step": 334740 }, { "epoch": 96.30322209436133, "grad_norm": 1.1754703521728516, "learning_rate": 7.39355581127733e-05, "loss": 0.3048, "step": 334750 }, { "epoch": 96.30609896432681, "grad_norm": 0.986039400100708, "learning_rate": 7.387802071346375e-05, "loss": 0.2206, "step": 334760 }, { "epoch": 96.30897583429228, "grad_norm": 0.9689062237739563, "learning_rate": 7.38204833141542e-05, "loss": 0.2933, "step": 334770 }, { "epoch": 96.31185270425777, "grad_norm": 1.0067461729049683, "learning_rate": 7.376294591484465e-05, "loss": 0.2507, "step": 334780 }, { "epoch": 96.31472957422325, "grad_norm": 0.7649000883102417, "learning_rate": 7.37054085155351e-05, "loss": 0.2262, "step": 334790 }, { "epoch": 96.31760644418873, "grad_norm": 0.7405994534492493, "learning_rate": 7.364787111622555e-05, "loss": 0.2262, "step": 334800 }, { "epoch": 96.3204833141542, "grad_norm": 1.38093900680542, "learning_rate": 7.359033371691599e-05, "loss": 0.2618, "step": 334810 }, { "epoch": 96.32336018411968, "grad_norm": 1.3596248626708984, "learning_rate": 7.353279631760645e-05, "loss": 0.2198, "step": 334820 }, { "epoch": 96.32623705408515, "grad_norm": 1.228515863418579, "learning_rate": 7.34752589182969e-05, "loss": 0.2736, "step": 334830 }, { "epoch": 96.32911392405063, "grad_norm": 1.1736012697219849, "learning_rate": 7.341772151898734e-05, "loss": 0.2661, "step": 334840 }, { "epoch": 96.33199079401611, "grad_norm": 0.7680938243865967, "learning_rate": 7.336018411967779e-05, "loss": 0.2372, "step": 334850 }, { "epoch": 96.33486766398158, "grad_norm": 1.2820497751235962, "learning_rate": 7.330264672036824e-05, "loss": 0.2632, "step": 334860 }, { "epoch": 96.33774453394706, "grad_norm": 0.643311083316803, "learning_rate": 7.324510932105868e-05, "loss": 0.233, "step": 334870 }, { "epoch": 96.34062140391255, "grad_norm": 1.27812659740448, "learning_rate": 7.318757192174914e-05, "loss": 0.2547, "step": 334880 }, { "epoch": 96.34349827387803, "grad_norm": 0.8470661640167236, "learning_rate": 7.31300345224396e-05, "loss": 0.2325, "step": 334890 }, { "epoch": 96.3463751438435, "grad_norm": 1.1744821071624756, "learning_rate": 7.307249712313004e-05, "loss": 0.239, "step": 334900 }, { "epoch": 96.34925201380898, "grad_norm": 0.5956889986991882, "learning_rate": 7.301495972382048e-05, "loss": 0.2774, "step": 334910 }, { "epoch": 96.35212888377445, "grad_norm": 0.7333849668502808, "learning_rate": 7.295742232451093e-05, "loss": 0.2661, "step": 334920 }, { "epoch": 96.35500575373993, "grad_norm": 1.2560421228408813, "learning_rate": 7.289988492520139e-05, "loss": 0.2323, "step": 334930 }, { "epoch": 96.35788262370541, "grad_norm": 0.9663156270980835, "learning_rate": 7.284234752589183e-05, "loss": 0.2578, "step": 334940 }, { "epoch": 96.36075949367088, "grad_norm": 0.9712693095207214, "learning_rate": 7.278481012658227e-05, "loss": 0.3612, "step": 334950 }, { "epoch": 96.36363636363636, "grad_norm": 1.298538088798523, "learning_rate": 7.272727272727273e-05, "loss": 0.2537, "step": 334960 }, { "epoch": 96.36651323360184, "grad_norm": 1.3112374544143677, "learning_rate": 7.266973532796317e-05, "loss": 0.2271, "step": 334970 }, { "epoch": 96.36939010356731, "grad_norm": 1.2602624893188477, "learning_rate": 7.261219792865363e-05, "loss": 0.2519, "step": 334980 }, { "epoch": 96.3722669735328, "grad_norm": 1.1072827577590942, "learning_rate": 7.255466052934408e-05, "loss": 0.3336, "step": 334990 }, { "epoch": 96.37514384349828, "grad_norm": 1.9547942876815796, "learning_rate": 7.249712313003452e-05, "loss": 0.3407, "step": 335000 }, { "epoch": 96.37802071346375, "grad_norm": 0.7449190020561218, "learning_rate": 7.243958573072497e-05, "loss": 0.2511, "step": 335010 }, { "epoch": 96.38089758342923, "grad_norm": 1.1925489902496338, "learning_rate": 7.238204833141542e-05, "loss": 0.2999, "step": 335020 }, { "epoch": 96.38377445339471, "grad_norm": 0.8509834408760071, "learning_rate": 7.232451093210588e-05, "loss": 0.259, "step": 335030 }, { "epoch": 96.38665132336018, "grad_norm": 1.3017730712890625, "learning_rate": 7.226697353279632e-05, "loss": 0.3722, "step": 335040 }, { "epoch": 96.38952819332566, "grad_norm": 0.8365693092346191, "learning_rate": 7.220943613348676e-05, "loss": 0.2753, "step": 335050 }, { "epoch": 96.39240506329114, "grad_norm": 0.7537958025932312, "learning_rate": 7.215189873417722e-05, "loss": 0.3348, "step": 335060 }, { "epoch": 96.39528193325661, "grad_norm": 1.0806920528411865, "learning_rate": 7.209436133486766e-05, "loss": 0.2577, "step": 335070 }, { "epoch": 96.39815880322209, "grad_norm": 0.8344001770019531, "learning_rate": 7.203682393555811e-05, "loss": 0.2351, "step": 335080 }, { "epoch": 96.40103567318758, "grad_norm": 1.1365658044815063, "learning_rate": 7.197928653624857e-05, "loss": 0.2094, "step": 335090 }, { "epoch": 96.40391254315306, "grad_norm": 0.7641244530677795, "learning_rate": 7.192174913693901e-05, "loss": 0.2429, "step": 335100 }, { "epoch": 96.40678941311853, "grad_norm": 1.2352910041809082, "learning_rate": 7.186421173762945e-05, "loss": 0.2208, "step": 335110 }, { "epoch": 96.40966628308401, "grad_norm": 0.9588919878005981, "learning_rate": 7.180667433831992e-05, "loss": 0.2567, "step": 335120 }, { "epoch": 96.41254315304948, "grad_norm": 0.9603439569473267, "learning_rate": 7.174913693901036e-05, "loss": 0.2363, "step": 335130 }, { "epoch": 96.41542002301496, "grad_norm": 0.7421906590461731, "learning_rate": 7.16915995397008e-05, "loss": 0.2397, "step": 335140 }, { "epoch": 96.41829689298044, "grad_norm": 1.191808819770813, "learning_rate": 7.163406214039125e-05, "loss": 0.2882, "step": 335150 }, { "epoch": 96.42117376294591, "grad_norm": 1.3934268951416016, "learning_rate": 7.15765247410817e-05, "loss": 0.3161, "step": 335160 }, { "epoch": 96.42405063291139, "grad_norm": 1.2141221761703491, "learning_rate": 7.151898734177216e-05, "loss": 0.2919, "step": 335170 }, { "epoch": 96.42692750287686, "grad_norm": 1.3717851638793945, "learning_rate": 7.14614499424626e-05, "loss": 0.2544, "step": 335180 }, { "epoch": 96.42980437284234, "grad_norm": 1.255187749862671, "learning_rate": 7.140391254315306e-05, "loss": 0.2105, "step": 335190 }, { "epoch": 96.43268124280783, "grad_norm": 1.0084822177886963, "learning_rate": 7.13463751438435e-05, "loss": 0.2283, "step": 335200 }, { "epoch": 96.43555811277331, "grad_norm": 1.7970929145812988, "learning_rate": 7.128883774453394e-05, "loss": 0.2858, "step": 335210 }, { "epoch": 96.43843498273878, "grad_norm": 1.1890019178390503, "learning_rate": 7.123130034522441e-05, "loss": 0.2499, "step": 335220 }, { "epoch": 96.44131185270426, "grad_norm": 0.9419093728065491, "learning_rate": 7.117376294591485e-05, "loss": 0.2233, "step": 335230 }, { "epoch": 96.44418872266974, "grad_norm": 0.8529404997825623, "learning_rate": 7.111622554660529e-05, "loss": 0.2418, "step": 335240 }, { "epoch": 96.44706559263521, "grad_norm": 0.6747693419456482, "learning_rate": 7.105868814729575e-05, "loss": 0.1876, "step": 335250 }, { "epoch": 96.44994246260069, "grad_norm": 0.9093495607376099, "learning_rate": 7.100115074798619e-05, "loss": 0.295, "step": 335260 }, { "epoch": 96.45281933256616, "grad_norm": 1.4210236072540283, "learning_rate": 7.094361334867665e-05, "loss": 0.2892, "step": 335270 }, { "epoch": 96.45569620253164, "grad_norm": 1.2996196746826172, "learning_rate": 7.088607594936709e-05, "loss": 0.3022, "step": 335280 }, { "epoch": 96.45857307249712, "grad_norm": 1.6344633102416992, "learning_rate": 7.082853855005754e-05, "loss": 0.2868, "step": 335290 }, { "epoch": 96.46144994246261, "grad_norm": 0.6753412485122681, "learning_rate": 7.077100115074798e-05, "loss": 0.2548, "step": 335300 }, { "epoch": 96.46432681242808, "grad_norm": 2.037677526473999, "learning_rate": 7.071346375143843e-05, "loss": 0.2609, "step": 335310 }, { "epoch": 96.46720368239356, "grad_norm": 1.1862002611160278, "learning_rate": 7.06559263521289e-05, "loss": 0.2356, "step": 335320 }, { "epoch": 96.47008055235904, "grad_norm": 1.4452892541885376, "learning_rate": 7.059838895281934e-05, "loss": 0.2499, "step": 335330 }, { "epoch": 96.47295742232451, "grad_norm": 1.0689336061477661, "learning_rate": 7.054085155350978e-05, "loss": 0.2923, "step": 335340 }, { "epoch": 96.47583429228999, "grad_norm": 1.0699944496154785, "learning_rate": 7.048331415420024e-05, "loss": 0.2508, "step": 335350 }, { "epoch": 96.47871116225546, "grad_norm": 1.0147265195846558, "learning_rate": 7.042577675489068e-05, "loss": 0.2448, "step": 335360 }, { "epoch": 96.48158803222094, "grad_norm": 1.6139391660690308, "learning_rate": 7.036823935558113e-05, "loss": 0.2344, "step": 335370 }, { "epoch": 96.48446490218642, "grad_norm": 1.8298704624176025, "learning_rate": 7.031070195627157e-05, "loss": 0.2693, "step": 335380 }, { "epoch": 96.4873417721519, "grad_norm": 1.1811586618423462, "learning_rate": 7.025316455696203e-05, "loss": 0.2774, "step": 335390 }, { "epoch": 96.49021864211737, "grad_norm": 0.88303142786026, "learning_rate": 7.019562715765247e-05, "loss": 0.2002, "step": 335400 }, { "epoch": 96.49309551208286, "grad_norm": 0.7952219247817993, "learning_rate": 7.013808975834291e-05, "loss": 0.2918, "step": 335410 }, { "epoch": 96.49597238204834, "grad_norm": 1.2633785009384155, "learning_rate": 7.008055235903338e-05, "loss": 0.2702, "step": 335420 }, { "epoch": 96.49884925201381, "grad_norm": 0.6841192841529846, "learning_rate": 7.002301495972383e-05, "loss": 0.2842, "step": 335430 }, { "epoch": 96.50172612197929, "grad_norm": 0.7157368063926697, "learning_rate": 6.996547756041427e-05, "loss": 0.3087, "step": 335440 }, { "epoch": 96.50460299194476, "grad_norm": 0.9164406657218933, "learning_rate": 6.990794016110472e-05, "loss": 0.245, "step": 335450 }, { "epoch": 96.50747986191024, "grad_norm": 0.9377124309539795, "learning_rate": 6.985040276179516e-05, "loss": 0.2784, "step": 335460 }, { "epoch": 96.51035673187572, "grad_norm": 1.466934084892273, "learning_rate": 6.979286536248562e-05, "loss": 0.2914, "step": 335470 }, { "epoch": 96.5132336018412, "grad_norm": 0.8679559230804443, "learning_rate": 6.973532796317606e-05, "loss": 0.2812, "step": 335480 }, { "epoch": 96.51611047180667, "grad_norm": 0.7864139080047607, "learning_rate": 6.967779056386652e-05, "loss": 0.2408, "step": 335490 }, { "epoch": 96.51898734177215, "grad_norm": 1.3588106632232666, "learning_rate": 6.962025316455696e-05, "loss": 0.334, "step": 335500 }, { "epoch": 96.52186421173764, "grad_norm": 1.2282958030700684, "learning_rate": 6.95627157652474e-05, "loss": 0.2202, "step": 335510 }, { "epoch": 96.52474108170311, "grad_norm": 0.9770293235778809, "learning_rate": 6.950517836593787e-05, "loss": 0.2606, "step": 335520 }, { "epoch": 96.52761795166859, "grad_norm": 0.9328363537788391, "learning_rate": 6.944764096662831e-05, "loss": 0.3158, "step": 335530 }, { "epoch": 96.53049482163406, "grad_norm": 1.2948484420776367, "learning_rate": 6.939010356731875e-05, "loss": 0.2368, "step": 335540 }, { "epoch": 96.53337169159954, "grad_norm": 0.9696745276451111, "learning_rate": 6.933256616800921e-05, "loss": 0.2774, "step": 335550 }, { "epoch": 96.53624856156502, "grad_norm": 1.0412404537200928, "learning_rate": 6.927502876869965e-05, "loss": 0.2515, "step": 335560 }, { "epoch": 96.5391254315305, "grad_norm": 1.8398274183273315, "learning_rate": 6.921749136939011e-05, "loss": 0.3136, "step": 335570 }, { "epoch": 96.54200230149597, "grad_norm": 1.2970792055130005, "learning_rate": 6.915995397008055e-05, "loss": 0.3, "step": 335580 }, { "epoch": 96.54487917146145, "grad_norm": 1.5776276588439941, "learning_rate": 6.9102416570771e-05, "loss": 0.3853, "step": 335590 }, { "epoch": 96.54775604142692, "grad_norm": 0.7578830718994141, "learning_rate": 6.904487917146145e-05, "loss": 0.2246, "step": 335600 }, { "epoch": 96.5506329113924, "grad_norm": 1.3909214735031128, "learning_rate": 6.89873417721519e-05, "loss": 0.2165, "step": 335610 }, { "epoch": 96.55350978135789, "grad_norm": 0.8736687898635864, "learning_rate": 6.892980437284236e-05, "loss": 0.2159, "step": 335620 }, { "epoch": 96.55638665132336, "grad_norm": 1.1030691862106323, "learning_rate": 6.88722669735328e-05, "loss": 0.2785, "step": 335630 }, { "epoch": 96.55926352128884, "grad_norm": 2.447211265563965, "learning_rate": 6.881472957422324e-05, "loss": 0.2686, "step": 335640 }, { "epoch": 96.56214039125432, "grad_norm": 1.1377097368240356, "learning_rate": 6.87571921749137e-05, "loss": 0.2125, "step": 335650 }, { "epoch": 96.5650172612198, "grad_norm": 1.129292607307434, "learning_rate": 6.869965477560415e-05, "loss": 0.226, "step": 335660 }, { "epoch": 96.56789413118527, "grad_norm": 2.229339599609375, "learning_rate": 6.86421173762946e-05, "loss": 0.2707, "step": 335670 }, { "epoch": 96.57077100115075, "grad_norm": 0.6771567463874817, "learning_rate": 6.858457997698504e-05, "loss": 0.2049, "step": 335680 }, { "epoch": 96.57364787111622, "grad_norm": 1.5246315002441406, "learning_rate": 6.852704257767549e-05, "loss": 0.2277, "step": 335690 }, { "epoch": 96.5765247410817, "grad_norm": 1.0301315784454346, "learning_rate": 6.846950517836593e-05, "loss": 0.2529, "step": 335700 }, { "epoch": 96.57940161104717, "grad_norm": 0.7337216138839722, "learning_rate": 6.841196777905639e-05, "loss": 0.2085, "step": 335710 }, { "epoch": 96.58227848101266, "grad_norm": 1.027300238609314, "learning_rate": 6.835443037974685e-05, "loss": 0.285, "step": 335720 }, { "epoch": 96.58515535097814, "grad_norm": 1.3144714832305908, "learning_rate": 6.829689298043729e-05, "loss": 0.2596, "step": 335730 }, { "epoch": 96.58803222094362, "grad_norm": 0.765630304813385, "learning_rate": 6.823935558112773e-05, "loss": 0.2629, "step": 335740 }, { "epoch": 96.5909090909091, "grad_norm": 0.931865394115448, "learning_rate": 6.818181818181818e-05, "loss": 0.2927, "step": 335750 }, { "epoch": 96.59378596087457, "grad_norm": 1.05826997756958, "learning_rate": 6.812428078250864e-05, "loss": 0.248, "step": 335760 }, { "epoch": 96.59666283084005, "grad_norm": 1.0421910285949707, "learning_rate": 6.806674338319908e-05, "loss": 0.2576, "step": 335770 }, { "epoch": 96.59953970080552, "grad_norm": 1.547939658164978, "learning_rate": 6.800920598388952e-05, "loss": 0.2601, "step": 335780 }, { "epoch": 96.602416570771, "grad_norm": 1.117794394493103, "learning_rate": 6.795166858457998e-05, "loss": 0.2253, "step": 335790 }, { "epoch": 96.60529344073647, "grad_norm": 1.5129756927490234, "learning_rate": 6.789413118527042e-05, "loss": 0.256, "step": 335800 }, { "epoch": 96.60817031070195, "grad_norm": 1.044192910194397, "learning_rate": 6.783659378596088e-05, "loss": 0.2083, "step": 335810 }, { "epoch": 96.61104718066743, "grad_norm": 1.1204750537872314, "learning_rate": 6.777905638665133e-05, "loss": 0.2887, "step": 335820 }, { "epoch": 96.61392405063292, "grad_norm": 0.802332878112793, "learning_rate": 6.772151898734177e-05, "loss": 0.2966, "step": 335830 }, { "epoch": 96.6168009205984, "grad_norm": 0.8836177587509155, "learning_rate": 6.766398158803222e-05, "loss": 0.2166, "step": 335840 }, { "epoch": 96.61967779056387, "grad_norm": 0.7421521544456482, "learning_rate": 6.760644418872267e-05, "loss": 0.2233, "step": 335850 }, { "epoch": 96.62255466052935, "grad_norm": 1.8972837924957275, "learning_rate": 6.754890678941313e-05, "loss": 0.2478, "step": 335860 }, { "epoch": 96.62543153049482, "grad_norm": 1.4757474660873413, "learning_rate": 6.749136939010357e-05, "loss": 0.2763, "step": 335870 }, { "epoch": 96.6283084004603, "grad_norm": 1.2592159509658813, "learning_rate": 6.743383199079401e-05, "loss": 0.2732, "step": 335880 }, { "epoch": 96.63118527042577, "grad_norm": 0.9400672912597656, "learning_rate": 6.737629459148447e-05, "loss": 0.2528, "step": 335890 }, { "epoch": 96.63406214039125, "grad_norm": 0.7945535778999329, "learning_rate": 6.731875719217491e-05, "loss": 0.2346, "step": 335900 }, { "epoch": 96.63693901035673, "grad_norm": 1.2325809001922607, "learning_rate": 6.726121979286536e-05, "loss": 0.2637, "step": 335910 }, { "epoch": 96.6398158803222, "grad_norm": 1.292872428894043, "learning_rate": 6.720368239355582e-05, "loss": 0.257, "step": 335920 }, { "epoch": 96.6426927502877, "grad_norm": 1.672013521194458, "learning_rate": 6.714614499424626e-05, "loss": 0.2808, "step": 335930 }, { "epoch": 96.64556962025317, "grad_norm": 1.22515070438385, "learning_rate": 6.70886075949367e-05, "loss": 0.2552, "step": 335940 }, { "epoch": 96.64844649021865, "grad_norm": 2.616205930709839, "learning_rate": 6.703107019562716e-05, "loss": 0.2628, "step": 335950 }, { "epoch": 96.65132336018412, "grad_norm": 1.2422244548797607, "learning_rate": 6.697353279631761e-05, "loss": 0.2422, "step": 335960 }, { "epoch": 96.6542002301496, "grad_norm": 1.2567662000656128, "learning_rate": 6.691599539700806e-05, "loss": 0.2194, "step": 335970 }, { "epoch": 96.65707710011507, "grad_norm": 1.2403141260147095, "learning_rate": 6.68584579976985e-05, "loss": 0.2797, "step": 335980 }, { "epoch": 96.65995397008055, "grad_norm": 0.9363183975219727, "learning_rate": 6.680092059838895e-05, "loss": 0.239, "step": 335990 }, { "epoch": 96.66283084004603, "grad_norm": 0.8109729886054993, "learning_rate": 6.67433831990794e-05, "loss": 0.266, "step": 336000 }, { "epoch": 96.6657077100115, "grad_norm": 1.0564987659454346, "learning_rate": 6.668584579976985e-05, "loss": 0.2643, "step": 336010 }, { "epoch": 96.66858457997698, "grad_norm": 1.9393298625946045, "learning_rate": 6.662830840046031e-05, "loss": 0.2499, "step": 336020 }, { "epoch": 96.67146144994246, "grad_norm": 1.1431406736373901, "learning_rate": 6.657077100115075e-05, "loss": 0.2751, "step": 336030 }, { "epoch": 96.67433831990795, "grad_norm": 1.033058524131775, "learning_rate": 6.651323360184119e-05, "loss": 0.235, "step": 336040 }, { "epoch": 96.67721518987342, "grad_norm": 2.122342109680176, "learning_rate": 6.645569620253165e-05, "loss": 0.2779, "step": 336050 }, { "epoch": 96.6800920598389, "grad_norm": 1.119500994682312, "learning_rate": 6.63981588032221e-05, "loss": 0.2607, "step": 336060 }, { "epoch": 96.68296892980437, "grad_norm": 2.3486039638519287, "learning_rate": 6.634062140391254e-05, "loss": 0.2941, "step": 336070 }, { "epoch": 96.68584579976985, "grad_norm": 0.7381419539451599, "learning_rate": 6.628308400460299e-05, "loss": 0.2849, "step": 336080 }, { "epoch": 96.68872266973533, "grad_norm": 2.654158353805542, "learning_rate": 6.622554660529344e-05, "loss": 0.2355, "step": 336090 }, { "epoch": 96.6915995397008, "grad_norm": 1.4639873504638672, "learning_rate": 6.61680092059839e-05, "loss": 0.2692, "step": 336100 }, { "epoch": 96.69447640966628, "grad_norm": 1.214276909828186, "learning_rate": 6.611047180667434e-05, "loss": 0.2302, "step": 336110 }, { "epoch": 96.69735327963176, "grad_norm": 1.0556509494781494, "learning_rate": 6.60529344073648e-05, "loss": 0.2605, "step": 336120 }, { "epoch": 96.70023014959723, "grad_norm": 1.4151946306228638, "learning_rate": 6.599539700805524e-05, "loss": 0.2853, "step": 336130 }, { "epoch": 96.70310701956272, "grad_norm": 1.6892905235290527, "learning_rate": 6.593785960874568e-05, "loss": 0.2263, "step": 336140 }, { "epoch": 96.7059838895282, "grad_norm": 1.151833176612854, "learning_rate": 6.588032220943615e-05, "loss": 0.2354, "step": 336150 }, { "epoch": 96.70886075949367, "grad_norm": 1.827094316482544, "learning_rate": 6.582278481012659e-05, "loss": 0.2932, "step": 336160 }, { "epoch": 96.71173762945915, "grad_norm": 0.707205593585968, "learning_rate": 6.576524741081703e-05, "loss": 0.3055, "step": 336170 }, { "epoch": 96.71461449942463, "grad_norm": 1.615390658378601, "learning_rate": 6.570771001150747e-05, "loss": 0.3554, "step": 336180 }, { "epoch": 96.7174913693901, "grad_norm": 1.773682951927185, "learning_rate": 6.565017261219793e-05, "loss": 0.3407, "step": 336190 }, { "epoch": 96.72036823935558, "grad_norm": 1.7591296434402466, "learning_rate": 6.559263521288838e-05, "loss": 0.2252, "step": 336200 }, { "epoch": 96.72324510932106, "grad_norm": 0.6777658462524414, "learning_rate": 6.553509781357883e-05, "loss": 0.2331, "step": 336210 }, { "epoch": 96.72612197928653, "grad_norm": 1.119969129562378, "learning_rate": 6.547756041426928e-05, "loss": 0.2146, "step": 336220 }, { "epoch": 96.72899884925201, "grad_norm": 1.3860771656036377, "learning_rate": 6.542002301495972e-05, "loss": 0.3044, "step": 336230 }, { "epoch": 96.7318757192175, "grad_norm": 0.9766537547111511, "learning_rate": 6.536248561565017e-05, "loss": 0.2495, "step": 336240 }, { "epoch": 96.73475258918297, "grad_norm": 1.5443938970565796, "learning_rate": 6.530494821634063e-05, "loss": 0.2777, "step": 336250 }, { "epoch": 96.73762945914845, "grad_norm": 2.2477481365203857, "learning_rate": 6.524741081703108e-05, "loss": 0.2889, "step": 336260 }, { "epoch": 96.74050632911393, "grad_norm": 1.4096250534057617, "learning_rate": 6.518987341772152e-05, "loss": 0.2928, "step": 336270 }, { "epoch": 96.7433831990794, "grad_norm": 1.0610889196395874, "learning_rate": 6.513233601841196e-05, "loss": 0.2273, "step": 336280 }, { "epoch": 96.74626006904488, "grad_norm": 1.0529967546463013, "learning_rate": 6.507479861910242e-05, "loss": 0.3586, "step": 336290 }, { "epoch": 96.74913693901036, "grad_norm": 1.1095409393310547, "learning_rate": 6.501726121979287e-05, "loss": 0.2163, "step": 336300 }, { "epoch": 96.75201380897583, "grad_norm": 1.6723992824554443, "learning_rate": 6.495972382048331e-05, "loss": 0.2623, "step": 336310 }, { "epoch": 96.75489067894131, "grad_norm": 0.7686030864715576, "learning_rate": 6.490218642117377e-05, "loss": 0.2414, "step": 336320 }, { "epoch": 96.75776754890678, "grad_norm": 1.2025413513183594, "learning_rate": 6.484464902186421e-05, "loss": 0.3005, "step": 336330 }, { "epoch": 96.76064441887226, "grad_norm": 1.7021862268447876, "learning_rate": 6.478711162255465e-05, "loss": 0.2864, "step": 336340 }, { "epoch": 96.76352128883775, "grad_norm": 0.7836635112762451, "learning_rate": 6.472957422324512e-05, "loss": 0.2502, "step": 336350 }, { "epoch": 96.76639815880323, "grad_norm": 0.8183863162994385, "learning_rate": 6.467203682393556e-05, "loss": 0.278, "step": 336360 }, { "epoch": 96.7692750287687, "grad_norm": 1.9806019067764282, "learning_rate": 6.4614499424626e-05, "loss": 0.3142, "step": 336370 }, { "epoch": 96.77215189873418, "grad_norm": 0.9980470538139343, "learning_rate": 6.455696202531645e-05, "loss": 0.235, "step": 336380 }, { "epoch": 96.77502876869966, "grad_norm": 0.9215737581253052, "learning_rate": 6.44994246260069e-05, "loss": 0.2384, "step": 336390 }, { "epoch": 96.77790563866513, "grad_norm": 1.3377101421356201, "learning_rate": 6.444188722669736e-05, "loss": 0.2933, "step": 336400 }, { "epoch": 96.78078250863061, "grad_norm": 1.1214979887008667, "learning_rate": 6.43843498273878e-05, "loss": 0.222, "step": 336410 }, { "epoch": 96.78365937859608, "grad_norm": 1.5690276622772217, "learning_rate": 6.432681242807826e-05, "loss": 0.4146, "step": 336420 }, { "epoch": 96.78653624856156, "grad_norm": 1.0548356771469116, "learning_rate": 6.42692750287687e-05, "loss": 0.2446, "step": 336430 }, { "epoch": 96.78941311852704, "grad_norm": 1.2549843788146973, "learning_rate": 6.421173762945914e-05, "loss": 0.2459, "step": 336440 }, { "epoch": 96.79228998849253, "grad_norm": 1.6286858320236206, "learning_rate": 6.415420023014961e-05, "loss": 0.2667, "step": 336450 }, { "epoch": 96.795166858458, "grad_norm": 0.9941293001174927, "learning_rate": 6.409666283084005e-05, "loss": 0.2398, "step": 336460 }, { "epoch": 96.79804372842348, "grad_norm": 1.2899551391601562, "learning_rate": 6.403912543153049e-05, "loss": 0.266, "step": 336470 }, { "epoch": 96.80092059838896, "grad_norm": 1.413997769355774, "learning_rate": 6.398158803222093e-05, "loss": 0.2562, "step": 336480 }, { "epoch": 96.80379746835443, "grad_norm": 0.685258150100708, "learning_rate": 6.392405063291139e-05, "loss": 0.2843, "step": 336490 }, { "epoch": 96.80667433831991, "grad_norm": 0.865123987197876, "learning_rate": 6.386651323360185e-05, "loss": 0.2899, "step": 336500 }, { "epoch": 96.80955120828538, "grad_norm": 2.428238868713379, "learning_rate": 6.380897583429229e-05, "loss": 0.2706, "step": 336510 }, { "epoch": 96.81242807825086, "grad_norm": 1.6084411144256592, "learning_rate": 6.375143843498274e-05, "loss": 0.2616, "step": 336520 }, { "epoch": 96.81530494821634, "grad_norm": 0.770510196685791, "learning_rate": 6.369390103567319e-05, "loss": 0.2739, "step": 336530 }, { "epoch": 96.81818181818181, "grad_norm": 1.1080535650253296, "learning_rate": 6.363636363636363e-05, "loss": 0.2714, "step": 336540 }, { "epoch": 96.82105868814729, "grad_norm": 0.6902855038642883, "learning_rate": 6.35788262370541e-05, "loss": 0.2106, "step": 336550 }, { "epoch": 96.82393555811278, "grad_norm": 1.5894429683685303, "learning_rate": 6.352128883774454e-05, "loss": 0.3046, "step": 336560 }, { "epoch": 96.82681242807826, "grad_norm": 0.9562681317329407, "learning_rate": 6.346375143843498e-05, "loss": 0.2264, "step": 336570 }, { "epoch": 96.82968929804373, "grad_norm": 1.4501162767410278, "learning_rate": 6.340621403912544e-05, "loss": 0.2565, "step": 336580 }, { "epoch": 96.83256616800921, "grad_norm": 1.7170907258987427, "learning_rate": 6.334867663981588e-05, "loss": 0.2977, "step": 336590 }, { "epoch": 96.83544303797468, "grad_norm": 1.45516836643219, "learning_rate": 6.329113924050633e-05, "loss": 0.2347, "step": 336600 }, { "epoch": 96.83831990794016, "grad_norm": 0.8974364995956421, "learning_rate": 6.323360184119677e-05, "loss": 0.2478, "step": 336610 }, { "epoch": 96.84119677790564, "grad_norm": 0.9473962783813477, "learning_rate": 6.317606444188723e-05, "loss": 0.2324, "step": 336620 }, { "epoch": 96.84407364787111, "grad_norm": 0.9443554282188416, "learning_rate": 6.311852704257767e-05, "loss": 0.2347, "step": 336630 }, { "epoch": 96.84695051783659, "grad_norm": 0.8409125208854675, "learning_rate": 6.306098964326813e-05, "loss": 0.2882, "step": 336640 }, { "epoch": 96.84982738780207, "grad_norm": 1.8472870588302612, "learning_rate": 6.300345224395858e-05, "loss": 0.2202, "step": 336650 }, { "epoch": 96.85270425776756, "grad_norm": 1.6637002229690552, "learning_rate": 6.294591484464903e-05, "loss": 0.313, "step": 336660 }, { "epoch": 96.85558112773303, "grad_norm": 1.2428346872329712, "learning_rate": 6.288837744533947e-05, "loss": 0.2148, "step": 336670 }, { "epoch": 96.85845799769851, "grad_norm": 1.8170567750930786, "learning_rate": 6.283084004602992e-05, "loss": 0.3202, "step": 336680 }, { "epoch": 96.86133486766398, "grad_norm": 0.7527452111244202, "learning_rate": 6.277330264672038e-05, "loss": 0.2829, "step": 336690 }, { "epoch": 96.86421173762946, "grad_norm": 0.8741695284843445, "learning_rate": 6.271576524741082e-05, "loss": 0.245, "step": 336700 }, { "epoch": 96.86708860759494, "grad_norm": 1.297521948814392, "learning_rate": 6.265822784810126e-05, "loss": 0.2334, "step": 336710 }, { "epoch": 96.86996547756041, "grad_norm": 1.6726473569869995, "learning_rate": 6.260069044879172e-05, "loss": 0.2599, "step": 336720 }, { "epoch": 96.87284234752589, "grad_norm": 0.9330535531044006, "learning_rate": 6.254315304948216e-05, "loss": 0.2718, "step": 336730 }, { "epoch": 96.87571921749137, "grad_norm": 1.3509690761566162, "learning_rate": 6.248561565017262e-05, "loss": 0.2709, "step": 336740 }, { "epoch": 96.87859608745684, "grad_norm": 0.9166106581687927, "learning_rate": 6.242807825086307e-05, "loss": 0.2758, "step": 336750 }, { "epoch": 96.88147295742232, "grad_norm": 1.1476001739501953, "learning_rate": 6.237054085155351e-05, "loss": 0.2881, "step": 336760 }, { "epoch": 96.88434982738781, "grad_norm": 1.0302773714065552, "learning_rate": 6.231300345224395e-05, "loss": 0.2915, "step": 336770 }, { "epoch": 96.88722669735328, "grad_norm": 0.863924503326416, "learning_rate": 6.225546605293441e-05, "loss": 0.2305, "step": 336780 }, { "epoch": 96.89010356731876, "grad_norm": 0.8814195990562439, "learning_rate": 6.219792865362485e-05, "loss": 0.2698, "step": 336790 }, { "epoch": 96.89298043728424, "grad_norm": 3.357428789138794, "learning_rate": 6.214039125431531e-05, "loss": 0.2765, "step": 336800 }, { "epoch": 96.89585730724971, "grad_norm": 1.1543077230453491, "learning_rate": 6.208285385500575e-05, "loss": 0.2895, "step": 336810 }, { "epoch": 96.89873417721519, "grad_norm": 0.8615403175354004, "learning_rate": 6.20253164556962e-05, "loss": 0.2382, "step": 336820 }, { "epoch": 96.90161104718067, "grad_norm": 1.172100305557251, "learning_rate": 6.196777905638666e-05, "loss": 0.2376, "step": 336830 }, { "epoch": 96.90448791714614, "grad_norm": 1.2423523664474487, "learning_rate": 6.19102416570771e-05, "loss": 0.2822, "step": 336840 }, { "epoch": 96.90736478711162, "grad_norm": 1.8834881782531738, "learning_rate": 6.185270425776756e-05, "loss": 0.2957, "step": 336850 }, { "epoch": 96.9102416570771, "grad_norm": 0.5893236994743347, "learning_rate": 6.1795166858458e-05, "loss": 0.2544, "step": 336860 }, { "epoch": 96.91311852704258, "grad_norm": 1.367754578590393, "learning_rate": 6.173762945914844e-05, "loss": 0.2835, "step": 336870 }, { "epoch": 96.91599539700806, "grad_norm": 0.9338353276252747, "learning_rate": 6.16800920598389e-05, "loss": 0.257, "step": 336880 }, { "epoch": 96.91887226697354, "grad_norm": 0.9721154570579529, "learning_rate": 6.162255466052934e-05, "loss": 0.24, "step": 336890 }, { "epoch": 96.92174913693901, "grad_norm": 1.234218716621399, "learning_rate": 6.15650172612198e-05, "loss": 0.2878, "step": 336900 }, { "epoch": 96.92462600690449, "grad_norm": 0.8124415278434753, "learning_rate": 6.150747986191024e-05, "loss": 0.2736, "step": 336910 }, { "epoch": 96.92750287686997, "grad_norm": 1.3823308944702148, "learning_rate": 6.144994246260069e-05, "loss": 0.3916, "step": 336920 }, { "epoch": 96.93037974683544, "grad_norm": 1.1997368335723877, "learning_rate": 6.139240506329115e-05, "loss": 0.2469, "step": 336930 }, { "epoch": 96.93325661680092, "grad_norm": 1.0317013263702393, "learning_rate": 6.133486766398159e-05, "loss": 0.2853, "step": 336940 }, { "epoch": 96.9361334867664, "grad_norm": 0.8842900395393372, "learning_rate": 6.127733026467205e-05, "loss": 0.2484, "step": 336950 }, { "epoch": 96.93901035673187, "grad_norm": 0.5321264863014221, "learning_rate": 6.121979286536249e-05, "loss": 0.2687, "step": 336960 }, { "epoch": 96.94188722669735, "grad_norm": 0.6904510259628296, "learning_rate": 6.116225546605293e-05, "loss": 0.2844, "step": 336970 }, { "epoch": 96.94476409666284, "grad_norm": 1.0000944137573242, "learning_rate": 6.110471806674338e-05, "loss": 0.2676, "step": 336980 }, { "epoch": 96.94764096662831, "grad_norm": 1.325251579284668, "learning_rate": 6.104718066743383e-05, "loss": 0.2627, "step": 336990 }, { "epoch": 96.95051783659379, "grad_norm": 1.493870735168457, "learning_rate": 6.098964326812428e-05, "loss": 0.2759, "step": 337000 }, { "epoch": 96.95339470655927, "grad_norm": 1.0473766326904297, "learning_rate": 6.093210586881473e-05, "loss": 0.2544, "step": 337010 }, { "epoch": 96.95627157652474, "grad_norm": 0.7432768940925598, "learning_rate": 6.0874568469505186e-05, "loss": 0.3327, "step": 337020 }, { "epoch": 96.95914844649022, "grad_norm": 1.8987997770309448, "learning_rate": 6.081703107019563e-05, "loss": 0.3179, "step": 337030 }, { "epoch": 96.9620253164557, "grad_norm": 1.09477698802948, "learning_rate": 6.075949367088608e-05, "loss": 0.2654, "step": 337040 }, { "epoch": 96.96490218642117, "grad_norm": 0.5170897245407104, "learning_rate": 6.0701956271576526e-05, "loss": 0.273, "step": 337050 }, { "epoch": 96.96777905638665, "grad_norm": 0.9971150755882263, "learning_rate": 6.0644418872266974e-05, "loss": 0.211, "step": 337060 }, { "epoch": 96.97065592635212, "grad_norm": 1.062721848487854, "learning_rate": 6.058688147295743e-05, "loss": 0.2255, "step": 337070 }, { "epoch": 96.97353279631761, "grad_norm": 0.7818835377693176, "learning_rate": 6.052934407364787e-05, "loss": 0.196, "step": 337080 }, { "epoch": 96.97640966628309, "grad_norm": 1.4591591358184814, "learning_rate": 6.047180667433832e-05, "loss": 0.2571, "step": 337090 }, { "epoch": 96.97928653624857, "grad_norm": 1.1470483541488647, "learning_rate": 6.041426927502877e-05, "loss": 0.2669, "step": 337100 }, { "epoch": 96.98216340621404, "grad_norm": 0.6624881029129028, "learning_rate": 6.035673187571922e-05, "loss": 0.2471, "step": 337110 }, { "epoch": 96.98504027617952, "grad_norm": 1.3565711975097656, "learning_rate": 6.0299194476409674e-05, "loss": 0.2386, "step": 337120 }, { "epoch": 96.987917146145, "grad_norm": 0.978537380695343, "learning_rate": 6.0241657077100116e-05, "loss": 0.2507, "step": 337130 }, { "epoch": 96.99079401611047, "grad_norm": 1.152397871017456, "learning_rate": 6.0184119677790564e-05, "loss": 0.2205, "step": 337140 }, { "epoch": 96.99367088607595, "grad_norm": 1.2822357416152954, "learning_rate": 6.012658227848101e-05, "loss": 0.268, "step": 337150 }, { "epoch": 96.99654775604142, "grad_norm": 1.277077078819275, "learning_rate": 6.006904487917146e-05, "loss": 0.2551, "step": 337160 }, { "epoch": 96.9994246260069, "grad_norm": 1.0462216138839722, "learning_rate": 6.001150747986192e-05, "loss": 0.2282, "step": 337170 }, { "epoch": 97.00230149597238, "grad_norm": 0.7555956840515137, "learning_rate": 5.995397008055236e-05, "loss": 0.2183, "step": 337180 }, { "epoch": 97.00517836593787, "grad_norm": 0.7640622854232788, "learning_rate": 5.989643268124281e-05, "loss": 0.2359, "step": 337190 }, { "epoch": 97.00805523590334, "grad_norm": 0.7184589505195618, "learning_rate": 5.9838895281933257e-05, "loss": 0.2032, "step": 337200 }, { "epoch": 97.01093210586882, "grad_norm": 0.6955248117446899, "learning_rate": 5.9781357882623705e-05, "loss": 0.2012, "step": 337210 }, { "epoch": 97.0138089758343, "grad_norm": 0.9874264597892761, "learning_rate": 5.972382048331416e-05, "loss": 0.271, "step": 337220 }, { "epoch": 97.01668584579977, "grad_norm": 0.9942023158073425, "learning_rate": 5.96662830840046e-05, "loss": 0.2364, "step": 337230 }, { "epoch": 97.01956271576525, "grad_norm": 0.8915383815765381, "learning_rate": 5.960874568469505e-05, "loss": 0.2674, "step": 337240 }, { "epoch": 97.02243958573072, "grad_norm": 1.559147834777832, "learning_rate": 5.95512082853855e-05, "loss": 0.2489, "step": 337250 }, { "epoch": 97.0253164556962, "grad_norm": 1.6962038278579712, "learning_rate": 5.949367088607595e-05, "loss": 0.2416, "step": 337260 }, { "epoch": 97.02819332566168, "grad_norm": 1.3385250568389893, "learning_rate": 5.9436133486766404e-05, "loss": 0.238, "step": 337270 }, { "epoch": 97.03107019562715, "grad_norm": 1.869126796722412, "learning_rate": 5.9378596087456846e-05, "loss": 0.2609, "step": 337280 }, { "epoch": 97.03394706559264, "grad_norm": 0.9387588500976562, "learning_rate": 5.93210586881473e-05, "loss": 0.2777, "step": 337290 }, { "epoch": 97.03682393555812, "grad_norm": 1.3053110837936401, "learning_rate": 5.9263521288837744e-05, "loss": 0.2592, "step": 337300 }, { "epoch": 97.0397008055236, "grad_norm": 1.1442546844482422, "learning_rate": 5.920598388952819e-05, "loss": 0.2439, "step": 337310 }, { "epoch": 97.04257767548907, "grad_norm": 2.3378467559814453, "learning_rate": 5.914844649021865e-05, "loss": 0.289, "step": 337320 }, { "epoch": 97.04545454545455, "grad_norm": 1.3918079137802124, "learning_rate": 5.909090909090909e-05, "loss": 0.2378, "step": 337330 }, { "epoch": 97.04833141542002, "grad_norm": 1.1094483137130737, "learning_rate": 5.9033371691599546e-05, "loss": 0.2266, "step": 337340 }, { "epoch": 97.0512082853855, "grad_norm": 1.706417441368103, "learning_rate": 5.897583429228999e-05, "loss": 0.3658, "step": 337350 }, { "epoch": 97.05408515535098, "grad_norm": 1.2016535997390747, "learning_rate": 5.8918296892980436e-05, "loss": 0.2653, "step": 337360 }, { "epoch": 97.05696202531645, "grad_norm": 0.6257753372192383, "learning_rate": 5.886075949367089e-05, "loss": 0.2385, "step": 337370 }, { "epoch": 97.05983889528193, "grad_norm": 1.5788490772247314, "learning_rate": 5.8803222094361334e-05, "loss": 0.2666, "step": 337380 }, { "epoch": 97.0627157652474, "grad_norm": 1.0468387603759766, "learning_rate": 5.874568469505179e-05, "loss": 0.2298, "step": 337390 }, { "epoch": 97.0655926352129, "grad_norm": 1.2824087142944336, "learning_rate": 5.868814729574223e-05, "loss": 0.2776, "step": 337400 }, { "epoch": 97.06846950517837, "grad_norm": 1.453773021697998, "learning_rate": 5.863060989643268e-05, "loss": 0.2909, "step": 337410 }, { "epoch": 97.07134637514385, "grad_norm": 1.6226361989974976, "learning_rate": 5.8573072497123135e-05, "loss": 0.2854, "step": 337420 }, { "epoch": 97.07422324510932, "grad_norm": 1.0682525634765625, "learning_rate": 5.851553509781358e-05, "loss": 0.2176, "step": 337430 }, { "epoch": 97.0771001150748, "grad_norm": 0.9794442057609558, "learning_rate": 5.845799769850403e-05, "loss": 0.2307, "step": 337440 }, { "epoch": 97.07997698504028, "grad_norm": 1.6920254230499268, "learning_rate": 5.8400460299194475e-05, "loss": 0.2246, "step": 337450 }, { "epoch": 97.08285385500575, "grad_norm": 1.3636534214019775, "learning_rate": 5.8342922899884923e-05, "loss": 0.236, "step": 337460 }, { "epoch": 97.08573072497123, "grad_norm": 0.9015385508537292, "learning_rate": 5.828538550057538e-05, "loss": 0.2663, "step": 337470 }, { "epoch": 97.0886075949367, "grad_norm": 0.9165331721305847, "learning_rate": 5.822784810126582e-05, "loss": 0.3089, "step": 337480 }, { "epoch": 97.09148446490218, "grad_norm": 1.4744526147842407, "learning_rate": 5.8170310701956276e-05, "loss": 0.2422, "step": 337490 }, { "epoch": 97.09436133486767, "grad_norm": 1.0428262948989868, "learning_rate": 5.811277330264672e-05, "loss": 0.2347, "step": 337500 }, { "epoch": 97.09723820483315, "grad_norm": 1.1229851245880127, "learning_rate": 5.8055235903337174e-05, "loss": 0.2574, "step": 337510 }, { "epoch": 97.10011507479862, "grad_norm": 1.2831915616989136, "learning_rate": 5.799769850402762e-05, "loss": 0.2226, "step": 337520 }, { "epoch": 97.1029919447641, "grad_norm": 0.9891877770423889, "learning_rate": 5.7940161104718065e-05, "loss": 0.2028, "step": 337530 }, { "epoch": 97.10586881472958, "grad_norm": 0.8608967065811157, "learning_rate": 5.788262370540852e-05, "loss": 0.2357, "step": 337540 }, { "epoch": 97.10874568469505, "grad_norm": 1.3847252130508423, "learning_rate": 5.782508630609896e-05, "loss": 0.2214, "step": 337550 }, { "epoch": 97.11162255466053, "grad_norm": 2.027737855911255, "learning_rate": 5.776754890678942e-05, "loss": 0.3549, "step": 337560 }, { "epoch": 97.114499424626, "grad_norm": 1.0442215204238892, "learning_rate": 5.7710011507479866e-05, "loss": 0.2381, "step": 337570 }, { "epoch": 97.11737629459148, "grad_norm": 1.3242406845092773, "learning_rate": 5.765247410817031e-05, "loss": 0.2406, "step": 337580 }, { "epoch": 97.12025316455696, "grad_norm": 1.1947970390319824, "learning_rate": 5.7594936708860764e-05, "loss": 0.2423, "step": 337590 }, { "epoch": 97.12313003452243, "grad_norm": 1.182951807975769, "learning_rate": 5.7537399309551206e-05, "loss": 0.2337, "step": 337600 }, { "epoch": 97.12600690448792, "grad_norm": 1.0225107669830322, "learning_rate": 5.747986191024166e-05, "loss": 0.2871, "step": 337610 }, { "epoch": 97.1288837744534, "grad_norm": 0.6158990859985352, "learning_rate": 5.742232451093211e-05, "loss": 0.2142, "step": 337620 }, { "epoch": 97.13176064441888, "grad_norm": 1.354652762413025, "learning_rate": 5.736478711162255e-05, "loss": 0.2976, "step": 337630 }, { "epoch": 97.13463751438435, "grad_norm": 1.3486610651016235, "learning_rate": 5.730724971231301e-05, "loss": 0.2453, "step": 337640 }, { "epoch": 97.13751438434983, "grad_norm": 0.6672971248626709, "learning_rate": 5.724971231300345e-05, "loss": 0.2501, "step": 337650 }, { "epoch": 97.1403912543153, "grad_norm": 0.8993656635284424, "learning_rate": 5.7192174913693905e-05, "loss": 0.2236, "step": 337660 }, { "epoch": 97.14326812428078, "grad_norm": 0.954336941242218, "learning_rate": 5.7134637514384354e-05, "loss": 0.3049, "step": 337670 }, { "epoch": 97.14614499424626, "grad_norm": 1.5315905809402466, "learning_rate": 5.7077100115074795e-05, "loss": 0.3225, "step": 337680 }, { "epoch": 97.14902186421173, "grad_norm": 2.150106430053711, "learning_rate": 5.701956271576525e-05, "loss": 0.2928, "step": 337690 }, { "epoch": 97.15189873417721, "grad_norm": 1.4580087661743164, "learning_rate": 5.696202531645569e-05, "loss": 0.2664, "step": 337700 }, { "epoch": 97.1547756041427, "grad_norm": 1.4018851518630981, "learning_rate": 5.690448791714615e-05, "loss": 0.2171, "step": 337710 }, { "epoch": 97.15765247410818, "grad_norm": 0.6564738154411316, "learning_rate": 5.68469505178366e-05, "loss": 0.2637, "step": 337720 }, { "epoch": 97.16052934407365, "grad_norm": 1.840122938156128, "learning_rate": 5.678941311852704e-05, "loss": 0.2503, "step": 337730 }, { "epoch": 97.16340621403913, "grad_norm": 0.9739744663238525, "learning_rate": 5.6731875719217495e-05, "loss": 0.2309, "step": 337740 }, { "epoch": 97.1662830840046, "grad_norm": 1.1077741384506226, "learning_rate": 5.6674338319907937e-05, "loss": 0.2366, "step": 337750 }, { "epoch": 97.16915995397008, "grad_norm": 0.4829220771789551, "learning_rate": 5.661680092059839e-05, "loss": 0.2027, "step": 337760 }, { "epoch": 97.17203682393556, "grad_norm": 3.2203164100646973, "learning_rate": 5.655926352128884e-05, "loss": 0.3236, "step": 337770 }, { "epoch": 97.17491369390103, "grad_norm": 0.8779129385948181, "learning_rate": 5.650172612197929e-05, "loss": 0.2566, "step": 337780 }, { "epoch": 97.17779056386651, "grad_norm": 0.844218373298645, "learning_rate": 5.644418872266974e-05, "loss": 0.1937, "step": 337790 }, { "epoch": 97.18066743383199, "grad_norm": 1.1613523960113525, "learning_rate": 5.638665132336018e-05, "loss": 0.3156, "step": 337800 }, { "epoch": 97.18354430379746, "grad_norm": 1.0904394388198853, "learning_rate": 5.6329113924050636e-05, "loss": 0.28, "step": 337810 }, { "epoch": 97.18642117376295, "grad_norm": 0.8135716319084167, "learning_rate": 5.6271576524741084e-05, "loss": 0.2109, "step": 337820 }, { "epoch": 97.18929804372843, "grad_norm": 1.520795226097107, "learning_rate": 5.621403912543153e-05, "loss": 0.2757, "step": 337830 }, { "epoch": 97.1921749136939, "grad_norm": 0.8878599405288696, "learning_rate": 5.615650172612198e-05, "loss": 0.2434, "step": 337840 }, { "epoch": 97.19505178365938, "grad_norm": 1.0566257238388062, "learning_rate": 5.6098964326812424e-05, "loss": 0.2362, "step": 337850 }, { "epoch": 97.19792865362486, "grad_norm": 1.3186256885528564, "learning_rate": 5.604142692750288e-05, "loss": 0.2286, "step": 337860 }, { "epoch": 97.20080552359033, "grad_norm": 1.048478126525879, "learning_rate": 5.598388952819333e-05, "loss": 0.2413, "step": 337870 }, { "epoch": 97.20368239355581, "grad_norm": 1.9705934524536133, "learning_rate": 5.592635212888378e-05, "loss": 0.2706, "step": 337880 }, { "epoch": 97.20655926352129, "grad_norm": 0.6483186483383179, "learning_rate": 5.5868814729574226e-05, "loss": 0.2917, "step": 337890 }, { "epoch": 97.20943613348676, "grad_norm": 1.0641624927520752, "learning_rate": 5.581127733026467e-05, "loss": 0.2488, "step": 337900 }, { "epoch": 97.21231300345224, "grad_norm": 2.7110626697540283, "learning_rate": 5.575373993095512e-05, "loss": 0.276, "step": 337910 }, { "epoch": 97.21518987341773, "grad_norm": 1.9835772514343262, "learning_rate": 5.569620253164557e-05, "loss": 0.247, "step": 337920 }, { "epoch": 97.2180667433832, "grad_norm": 1.675865650177002, "learning_rate": 5.563866513233602e-05, "loss": 0.2327, "step": 337930 }, { "epoch": 97.22094361334868, "grad_norm": 1.1188327074050903, "learning_rate": 5.558112773302647e-05, "loss": 0.2293, "step": 337940 }, { "epoch": 97.22382048331416, "grad_norm": 1.2501157522201538, "learning_rate": 5.552359033371691e-05, "loss": 0.2491, "step": 337950 }, { "epoch": 97.22669735327963, "grad_norm": 1.905328631401062, "learning_rate": 5.5466052934407367e-05, "loss": 0.2239, "step": 337960 }, { "epoch": 97.22957422324511, "grad_norm": 0.9310629367828369, "learning_rate": 5.5408515535097815e-05, "loss": 0.2638, "step": 337970 }, { "epoch": 97.23245109321059, "grad_norm": 0.9375734329223633, "learning_rate": 5.5350978135788264e-05, "loss": 0.2311, "step": 337980 }, { "epoch": 97.23532796317606, "grad_norm": 0.7474629878997803, "learning_rate": 5.529344073647871e-05, "loss": 0.2668, "step": 337990 }, { "epoch": 97.23820483314154, "grad_norm": 0.8649928569793701, "learning_rate": 5.5235903337169155e-05, "loss": 0.2266, "step": 338000 }, { "epoch": 97.24108170310701, "grad_norm": 0.885966420173645, "learning_rate": 5.517836593785961e-05, "loss": 0.2113, "step": 338010 }, { "epoch": 97.24395857307249, "grad_norm": 0.7768287062644958, "learning_rate": 5.512082853855006e-05, "loss": 0.2808, "step": 338020 }, { "epoch": 97.24683544303798, "grad_norm": 1.1499528884887695, "learning_rate": 5.506329113924051e-05, "loss": 0.1997, "step": 338030 }, { "epoch": 97.24971231300346, "grad_norm": 1.271701455116272, "learning_rate": 5.5005753739930956e-05, "loss": 0.229, "step": 338040 }, { "epoch": 97.25258918296893, "grad_norm": 1.1689761877059937, "learning_rate": 5.4948216340621405e-05, "loss": 0.2377, "step": 338050 }, { "epoch": 97.25546605293441, "grad_norm": 0.9345697164535522, "learning_rate": 5.4890678941311854e-05, "loss": 0.2443, "step": 338060 }, { "epoch": 97.25834292289989, "grad_norm": 0.952602207660675, "learning_rate": 5.48331415420023e-05, "loss": 0.2266, "step": 338070 }, { "epoch": 97.26121979286536, "grad_norm": 1.1830445528030396, "learning_rate": 5.477560414269275e-05, "loss": 0.2218, "step": 338080 }, { "epoch": 97.26409666283084, "grad_norm": 2.0209403038024902, "learning_rate": 5.47180667433832e-05, "loss": 0.2912, "step": 338090 }, { "epoch": 97.26697353279631, "grad_norm": 0.784379780292511, "learning_rate": 5.466052934407365e-05, "loss": 0.3016, "step": 338100 }, { "epoch": 97.26985040276179, "grad_norm": 1.8575559854507446, "learning_rate": 5.46029919447641e-05, "loss": 0.2521, "step": 338110 }, { "epoch": 97.27272727272727, "grad_norm": 0.6945725679397583, "learning_rate": 5.4545454545454546e-05, "loss": 0.2294, "step": 338120 }, { "epoch": 97.27560414269276, "grad_norm": 1.523381233215332, "learning_rate": 5.4487917146144995e-05, "loss": 0.2614, "step": 338130 }, { "epoch": 97.27848101265823, "grad_norm": 1.320629596710205, "learning_rate": 5.4430379746835444e-05, "loss": 0.3012, "step": 338140 }, { "epoch": 97.28135788262371, "grad_norm": 0.9121729135513306, "learning_rate": 5.437284234752589e-05, "loss": 0.2845, "step": 338150 }, { "epoch": 97.28423475258919, "grad_norm": 1.264987587928772, "learning_rate": 5.431530494821634e-05, "loss": 0.2709, "step": 338160 }, { "epoch": 97.28711162255466, "grad_norm": 1.4728749990463257, "learning_rate": 5.425776754890679e-05, "loss": 0.2639, "step": 338170 }, { "epoch": 97.28998849252014, "grad_norm": 1.414979100227356, "learning_rate": 5.420023014959724e-05, "loss": 0.3018, "step": 338180 }, { "epoch": 97.29286536248561, "grad_norm": 1.8981635570526123, "learning_rate": 5.414269275028769e-05, "loss": 0.246, "step": 338190 }, { "epoch": 97.29574223245109, "grad_norm": 0.6912804841995239, "learning_rate": 5.4085155350978136e-05, "loss": 0.241, "step": 338200 }, { "epoch": 97.29861910241657, "grad_norm": 1.2168941497802734, "learning_rate": 5.4027617951668585e-05, "loss": 0.2608, "step": 338210 }, { "epoch": 97.30149597238204, "grad_norm": 0.7781728506088257, "learning_rate": 5.3970080552359033e-05, "loss": 0.2165, "step": 338220 }, { "epoch": 97.30437284234753, "grad_norm": 1.4358700513839722, "learning_rate": 5.391254315304948e-05, "loss": 0.2669, "step": 338230 }, { "epoch": 97.30724971231301, "grad_norm": 0.7092401385307312, "learning_rate": 5.385500575373993e-05, "loss": 0.2436, "step": 338240 }, { "epoch": 97.31012658227849, "grad_norm": 1.0930430889129639, "learning_rate": 5.3797468354430386e-05, "loss": 0.2415, "step": 338250 }, { "epoch": 97.31300345224396, "grad_norm": 1.254206657409668, "learning_rate": 5.373993095512083e-05, "loss": 0.2931, "step": 338260 }, { "epoch": 97.31588032220944, "grad_norm": 1.4094165563583374, "learning_rate": 5.368239355581128e-05, "loss": 0.2763, "step": 338270 }, { "epoch": 97.31875719217491, "grad_norm": 0.8361220955848694, "learning_rate": 5.3624856156501726e-05, "loss": 0.237, "step": 338280 }, { "epoch": 97.32163406214039, "grad_norm": 1.069108009338379, "learning_rate": 5.3567318757192175e-05, "loss": 0.2689, "step": 338290 }, { "epoch": 97.32451093210587, "grad_norm": 0.9380088448524475, "learning_rate": 5.350978135788263e-05, "loss": 0.2699, "step": 338300 }, { "epoch": 97.32738780207134, "grad_norm": 1.5502705574035645, "learning_rate": 5.345224395857307e-05, "loss": 0.2347, "step": 338310 }, { "epoch": 97.33026467203682, "grad_norm": 0.8858440518379211, "learning_rate": 5.339470655926353e-05, "loss": 0.24, "step": 338320 }, { "epoch": 97.3331415420023, "grad_norm": 0.9760244488716125, "learning_rate": 5.333716915995397e-05, "loss": 0.2513, "step": 338330 }, { "epoch": 97.33601841196779, "grad_norm": 2.0134732723236084, "learning_rate": 5.327963176064442e-05, "loss": 0.2806, "step": 338340 }, { "epoch": 97.33889528193326, "grad_norm": 1.4838019609451294, "learning_rate": 5.3222094361334874e-05, "loss": 0.3283, "step": 338350 }, { "epoch": 97.34177215189874, "grad_norm": 1.7774872779846191, "learning_rate": 5.3164556962025316e-05, "loss": 0.2654, "step": 338360 }, { "epoch": 97.34464902186421, "grad_norm": 1.2986124753952026, "learning_rate": 5.310701956271577e-05, "loss": 0.3089, "step": 338370 }, { "epoch": 97.34752589182969, "grad_norm": 1.4261822700500488, "learning_rate": 5.304948216340621e-05, "loss": 0.2818, "step": 338380 }, { "epoch": 97.35040276179517, "grad_norm": 0.8710718750953674, "learning_rate": 5.299194476409666e-05, "loss": 0.2004, "step": 338390 }, { "epoch": 97.35327963176064, "grad_norm": 1.2281426191329956, "learning_rate": 5.293440736478712e-05, "loss": 0.3142, "step": 338400 }, { "epoch": 97.35615650172612, "grad_norm": 2.295602560043335, "learning_rate": 5.287686996547756e-05, "loss": 0.2408, "step": 338410 }, { "epoch": 97.3590333716916, "grad_norm": 1.1823875904083252, "learning_rate": 5.2819332566168015e-05, "loss": 0.2968, "step": 338420 }, { "epoch": 97.36191024165707, "grad_norm": 0.7969112992286682, "learning_rate": 5.276179516685846e-05, "loss": 0.3044, "step": 338430 }, { "epoch": 97.36478711162256, "grad_norm": 0.7969014644622803, "learning_rate": 5.2704257767548905e-05, "loss": 0.2826, "step": 338440 }, { "epoch": 97.36766398158804, "grad_norm": 1.3650473356246948, "learning_rate": 5.264672036823936e-05, "loss": 0.2519, "step": 338450 }, { "epoch": 97.37054085155351, "grad_norm": 0.9757124781608582, "learning_rate": 5.25891829689298e-05, "loss": 0.3072, "step": 338460 }, { "epoch": 97.37341772151899, "grad_norm": 1.7302350997924805, "learning_rate": 5.253164556962026e-05, "loss": 0.2367, "step": 338470 }, { "epoch": 97.37629459148447, "grad_norm": 0.9492955207824707, "learning_rate": 5.24741081703107e-05, "loss": 0.2512, "step": 338480 }, { "epoch": 97.37917146144994, "grad_norm": 1.3358432054519653, "learning_rate": 5.241657077100115e-05, "loss": 0.3056, "step": 338490 }, { "epoch": 97.38204833141542, "grad_norm": 1.4397940635681152, "learning_rate": 5.2359033371691605e-05, "loss": 0.2544, "step": 338500 }, { "epoch": 97.3849252013809, "grad_norm": 0.9047464728355408, "learning_rate": 5.2301495972382047e-05, "loss": 0.2185, "step": 338510 }, { "epoch": 97.38780207134637, "grad_norm": 0.8469604253768921, "learning_rate": 5.22439585730725e-05, "loss": 0.2018, "step": 338520 }, { "epoch": 97.39067894131185, "grad_norm": 1.3309612274169922, "learning_rate": 5.2186421173762944e-05, "loss": 0.251, "step": 338530 }, { "epoch": 97.39355581127732, "grad_norm": 1.25483238697052, "learning_rate": 5.21288837744534e-05, "loss": 0.2562, "step": 338540 }, { "epoch": 97.39643268124281, "grad_norm": 2.0019755363464355, "learning_rate": 5.207134637514385e-05, "loss": 0.2876, "step": 338550 }, { "epoch": 97.39930955120829, "grad_norm": 1.870851755142212, "learning_rate": 5.201380897583429e-05, "loss": 0.2342, "step": 338560 }, { "epoch": 97.40218642117377, "grad_norm": 0.9691773056983948, "learning_rate": 5.1956271576524746e-05, "loss": 0.2373, "step": 338570 }, { "epoch": 97.40506329113924, "grad_norm": 1.3449491262435913, "learning_rate": 5.189873417721519e-05, "loss": 0.3087, "step": 338580 }, { "epoch": 97.40794016110472, "grad_norm": 0.8420079946517944, "learning_rate": 5.184119677790564e-05, "loss": 0.2134, "step": 338590 }, { "epoch": 97.4108170310702, "grad_norm": 0.7807190418243408, "learning_rate": 5.178365937859609e-05, "loss": 0.2416, "step": 338600 }, { "epoch": 97.41369390103567, "grad_norm": 0.7970596551895142, "learning_rate": 5.1726121979286534e-05, "loss": 0.2568, "step": 338610 }, { "epoch": 97.41657077100115, "grad_norm": 0.533798098564148, "learning_rate": 5.166858457997699e-05, "loss": 0.2333, "step": 338620 }, { "epoch": 97.41944764096662, "grad_norm": 1.022005319595337, "learning_rate": 5.161104718066743e-05, "loss": 0.2657, "step": 338630 }, { "epoch": 97.4223245109321, "grad_norm": 1.5849237442016602, "learning_rate": 5.155350978135789e-05, "loss": 0.2539, "step": 338640 }, { "epoch": 97.42520138089759, "grad_norm": 1.179456353187561, "learning_rate": 5.1495972382048336e-05, "loss": 0.2666, "step": 338650 }, { "epoch": 97.42807825086307, "grad_norm": 0.921326756477356, "learning_rate": 5.143843498273878e-05, "loss": 0.2527, "step": 338660 }, { "epoch": 97.43095512082854, "grad_norm": 1.3249895572662354, "learning_rate": 5.138089758342923e-05, "loss": 0.2373, "step": 338670 }, { "epoch": 97.43383199079402, "grad_norm": 1.0180259943008423, "learning_rate": 5.1323360184119675e-05, "loss": 0.2453, "step": 338680 }, { "epoch": 97.4367088607595, "grad_norm": 1.8228520154953003, "learning_rate": 5.126582278481013e-05, "loss": 0.2312, "step": 338690 }, { "epoch": 97.43958573072497, "grad_norm": 0.8147156834602356, "learning_rate": 5.120828538550058e-05, "loss": 0.2544, "step": 338700 }, { "epoch": 97.44246260069045, "grad_norm": 0.9360862374305725, "learning_rate": 5.115074798619102e-05, "loss": 0.2734, "step": 338710 }, { "epoch": 97.44533947065592, "grad_norm": 0.8652842044830322, "learning_rate": 5.1093210586881477e-05, "loss": 0.216, "step": 338720 }, { "epoch": 97.4482163406214, "grad_norm": 1.989981770515442, "learning_rate": 5.103567318757192e-05, "loss": 0.2348, "step": 338730 }, { "epoch": 97.45109321058688, "grad_norm": 0.8335196375846863, "learning_rate": 5.0978135788262374e-05, "loss": 0.28, "step": 338740 }, { "epoch": 97.45397008055235, "grad_norm": 0.8888846039772034, "learning_rate": 5.092059838895282e-05, "loss": 0.2234, "step": 338750 }, { "epoch": 97.45684695051784, "grad_norm": 2.844115734100342, "learning_rate": 5.0863060989643265e-05, "loss": 0.2745, "step": 338760 }, { "epoch": 97.45972382048332, "grad_norm": 1.009421706199646, "learning_rate": 5.080552359033372e-05, "loss": 0.3693, "step": 338770 }, { "epoch": 97.4626006904488, "grad_norm": 0.7451465725898743, "learning_rate": 5.074798619102416e-05, "loss": 0.2755, "step": 338780 }, { "epoch": 97.46547756041427, "grad_norm": 0.7513288259506226, "learning_rate": 5.069044879171462e-05, "loss": 0.2154, "step": 338790 }, { "epoch": 97.46835443037975, "grad_norm": 1.910792350769043, "learning_rate": 5.0632911392405066e-05, "loss": 0.3005, "step": 338800 }, { "epoch": 97.47123130034522, "grad_norm": 1.3123931884765625, "learning_rate": 5.0575373993095515e-05, "loss": 0.2947, "step": 338810 }, { "epoch": 97.4741081703107, "grad_norm": 1.6647173166275024, "learning_rate": 5.0517836593785964e-05, "loss": 0.2787, "step": 338820 }, { "epoch": 97.47698504027618, "grad_norm": 0.9048742651939392, "learning_rate": 5.0460299194476406e-05, "loss": 0.2477, "step": 338830 }, { "epoch": 97.47986191024165, "grad_norm": 1.4223493337631226, "learning_rate": 5.040276179516686e-05, "loss": 0.2106, "step": 338840 }, { "epoch": 97.48273878020713, "grad_norm": 1.0901768207550049, "learning_rate": 5.034522439585731e-05, "loss": 0.2564, "step": 338850 }, { "epoch": 97.48561565017262, "grad_norm": 0.6688347458839417, "learning_rate": 5.028768699654776e-05, "loss": 0.3419, "step": 338860 }, { "epoch": 97.4884925201381, "grad_norm": 2.365011215209961, "learning_rate": 5.023014959723821e-05, "loss": 0.2539, "step": 338870 }, { "epoch": 97.49136939010357, "grad_norm": 1.3556214570999146, "learning_rate": 5.017261219792865e-05, "loss": 0.2315, "step": 338880 }, { "epoch": 97.49424626006905, "grad_norm": 1.5417547225952148, "learning_rate": 5.0115074798619105e-05, "loss": 0.2756, "step": 338890 }, { "epoch": 97.49712313003452, "grad_norm": 0.9000378251075745, "learning_rate": 5.0057537399309554e-05, "loss": 0.2404, "step": 338900 }, { "epoch": 97.5, "grad_norm": 1.275705337524414, "learning_rate": 5e-05, "loss": 0.283, "step": 338910 }, { "epoch": 97.50287686996548, "grad_norm": 1.2418495416641235, "learning_rate": 4.994246260069045e-05, "loss": 0.2587, "step": 338920 }, { "epoch": 97.50575373993095, "grad_norm": 0.5936278104782104, "learning_rate": 4.988492520138089e-05, "loss": 0.2675, "step": 338930 }, { "epoch": 97.50863060989643, "grad_norm": 0.848995566368103, "learning_rate": 4.982738780207135e-05, "loss": 0.2097, "step": 338940 }, { "epoch": 97.5115074798619, "grad_norm": 1.3515208959579468, "learning_rate": 4.97698504027618e-05, "loss": 0.2994, "step": 338950 }, { "epoch": 97.51438434982738, "grad_norm": 1.6324183940887451, "learning_rate": 4.9712313003452246e-05, "loss": 0.2258, "step": 338960 }, { "epoch": 97.51726121979287, "grad_norm": 1.1857290267944336, "learning_rate": 4.9654775604142695e-05, "loss": 0.2512, "step": 338970 }, { "epoch": 97.52013808975835, "grad_norm": 2.1777138710021973, "learning_rate": 4.959723820483314e-05, "loss": 0.2742, "step": 338980 }, { "epoch": 97.52301495972382, "grad_norm": 0.8836776614189148, "learning_rate": 4.953970080552359e-05, "loss": 0.2622, "step": 338990 }, { "epoch": 97.5258918296893, "grad_norm": 0.7573322653770447, "learning_rate": 4.948216340621404e-05, "loss": 0.2409, "step": 339000 }, { "epoch": 97.52876869965478, "grad_norm": 1.232198715209961, "learning_rate": 4.942462600690449e-05, "loss": 0.2585, "step": 339010 }, { "epoch": 97.53164556962025, "grad_norm": 0.932914674282074, "learning_rate": 4.936708860759494e-05, "loss": 0.2611, "step": 339020 }, { "epoch": 97.53452243958573, "grad_norm": 0.8651649951934814, "learning_rate": 4.930955120828538e-05, "loss": 0.2829, "step": 339030 }, { "epoch": 97.5373993095512, "grad_norm": 1.7916725873947144, "learning_rate": 4.9252013808975836e-05, "loss": 0.2702, "step": 339040 }, { "epoch": 97.54027617951668, "grad_norm": 0.7513349652290344, "learning_rate": 4.9194476409666285e-05, "loss": 0.3019, "step": 339050 }, { "epoch": 97.54315304948216, "grad_norm": 0.9360284805297852, "learning_rate": 4.913693901035673e-05, "loss": 0.2427, "step": 339060 }, { "epoch": 97.54602991944765, "grad_norm": 0.8215953707695007, "learning_rate": 4.907940161104718e-05, "loss": 0.2817, "step": 339070 }, { "epoch": 97.54890678941312, "grad_norm": 0.9360371828079224, "learning_rate": 4.902186421173763e-05, "loss": 0.2186, "step": 339080 }, { "epoch": 97.5517836593786, "grad_norm": 0.7628898620605469, "learning_rate": 4.896432681242808e-05, "loss": 0.2313, "step": 339090 }, { "epoch": 97.55466052934408, "grad_norm": 1.1052045822143555, "learning_rate": 4.890678941311853e-05, "loss": 0.306, "step": 339100 }, { "epoch": 97.55753739930955, "grad_norm": 1.0993777513504028, "learning_rate": 4.884925201380898e-05, "loss": 0.2804, "step": 339110 }, { "epoch": 97.56041426927503, "grad_norm": 0.751132071018219, "learning_rate": 4.8791714614499426e-05, "loss": 0.2334, "step": 339120 }, { "epoch": 97.5632911392405, "grad_norm": 1.2800171375274658, "learning_rate": 4.8734177215189874e-05, "loss": 0.3286, "step": 339130 }, { "epoch": 97.56616800920598, "grad_norm": 1.2180935144424438, "learning_rate": 4.867663981588032e-05, "loss": 0.2477, "step": 339140 }, { "epoch": 97.56904487917146, "grad_norm": 1.0637257099151611, "learning_rate": 4.861910241657077e-05, "loss": 0.2932, "step": 339150 }, { "epoch": 97.57192174913693, "grad_norm": 0.9920893907546997, "learning_rate": 4.856156501726122e-05, "loss": 0.2047, "step": 339160 }, { "epoch": 97.57479861910241, "grad_norm": 0.9350317716598511, "learning_rate": 4.850402761795167e-05, "loss": 0.2592, "step": 339170 }, { "epoch": 97.5776754890679, "grad_norm": 1.2359634637832642, "learning_rate": 4.844649021864212e-05, "loss": 0.2459, "step": 339180 }, { "epoch": 97.58055235903338, "grad_norm": 1.7958757877349854, "learning_rate": 4.838895281933257e-05, "loss": 0.2895, "step": 339190 }, { "epoch": 97.58342922899885, "grad_norm": 1.4173952341079712, "learning_rate": 4.8331415420023015e-05, "loss": 0.2944, "step": 339200 }, { "epoch": 97.58630609896433, "grad_norm": 1.4521851539611816, "learning_rate": 4.8273878020713464e-05, "loss": 0.3079, "step": 339210 }, { "epoch": 97.5891829689298, "grad_norm": 0.794349730014801, "learning_rate": 4.821634062140391e-05, "loss": 0.2211, "step": 339220 }, { "epoch": 97.59205983889528, "grad_norm": 0.9028142094612122, "learning_rate": 4.815880322209436e-05, "loss": 0.2431, "step": 339230 }, { "epoch": 97.59493670886076, "grad_norm": 1.5646300315856934, "learning_rate": 4.810126582278481e-05, "loss": 0.2562, "step": 339240 }, { "epoch": 97.59781357882623, "grad_norm": 1.4314115047454834, "learning_rate": 4.804372842347526e-05, "loss": 0.2888, "step": 339250 }, { "epoch": 97.60069044879171, "grad_norm": 1.316277027130127, "learning_rate": 4.798619102416571e-05, "loss": 0.2277, "step": 339260 }, { "epoch": 97.60356731875719, "grad_norm": 1.4691766500473022, "learning_rate": 4.7928653624856157e-05, "loss": 0.2963, "step": 339270 }, { "epoch": 97.60644418872268, "grad_norm": 0.7680964469909668, "learning_rate": 4.7871116225546605e-05, "loss": 0.2565, "step": 339280 }, { "epoch": 97.60932105868815, "grad_norm": 0.8767337799072266, "learning_rate": 4.7813578826237054e-05, "loss": 0.3608, "step": 339290 }, { "epoch": 97.61219792865363, "grad_norm": 1.4319993257522583, "learning_rate": 4.775604142692751e-05, "loss": 0.2579, "step": 339300 }, { "epoch": 97.6150747986191, "grad_norm": 0.7754153609275818, "learning_rate": 4.769850402761795e-05, "loss": 0.2102, "step": 339310 }, { "epoch": 97.61795166858458, "grad_norm": 0.8147026300430298, "learning_rate": 4.76409666283084e-05, "loss": 0.245, "step": 339320 }, { "epoch": 97.62082853855006, "grad_norm": 0.8772786259651184, "learning_rate": 4.758342922899885e-05, "loss": 0.2008, "step": 339330 }, { "epoch": 97.62370540851553, "grad_norm": 0.7414641976356506, "learning_rate": 4.75258918296893e-05, "loss": 0.2388, "step": 339340 }, { "epoch": 97.62658227848101, "grad_norm": 1.017153263092041, "learning_rate": 4.746835443037975e-05, "loss": 0.2179, "step": 339350 }, { "epoch": 97.62945914844649, "grad_norm": 0.6916298866271973, "learning_rate": 4.7410817031070195e-05, "loss": 0.2453, "step": 339360 }, { "epoch": 97.63233601841196, "grad_norm": 1.4037470817565918, "learning_rate": 4.7353279631760644e-05, "loss": 0.2417, "step": 339370 }, { "epoch": 97.63521288837744, "grad_norm": 1.0416672229766846, "learning_rate": 4.729574223245109e-05, "loss": 0.2425, "step": 339380 }, { "epoch": 97.63808975834293, "grad_norm": 1.472811222076416, "learning_rate": 4.723820483314154e-05, "loss": 0.2512, "step": 339390 }, { "epoch": 97.6409666283084, "grad_norm": 0.8559451699256897, "learning_rate": 4.7180667433832e-05, "loss": 0.2101, "step": 339400 }, { "epoch": 97.64384349827388, "grad_norm": 0.7810558676719666, "learning_rate": 4.712313003452244e-05, "loss": 0.284, "step": 339410 }, { "epoch": 97.64672036823936, "grad_norm": 0.9678342342376709, "learning_rate": 4.706559263521289e-05, "loss": 0.215, "step": 339420 }, { "epoch": 97.64959723820483, "grad_norm": 0.762255847454071, "learning_rate": 4.7008055235903336e-05, "loss": 0.2307, "step": 339430 }, { "epoch": 97.65247410817031, "grad_norm": 1.4814860820770264, "learning_rate": 4.6950517836593785e-05, "loss": 0.2877, "step": 339440 }, { "epoch": 97.65535097813579, "grad_norm": 1.0367498397827148, "learning_rate": 4.689298043728424e-05, "loss": 0.2258, "step": 339450 }, { "epoch": 97.65822784810126, "grad_norm": 1.1185365915298462, "learning_rate": 4.683544303797468e-05, "loss": 0.2678, "step": 339460 }, { "epoch": 97.66110471806674, "grad_norm": 0.8211207985877991, "learning_rate": 4.677790563866513e-05, "loss": 0.2943, "step": 339470 }, { "epoch": 97.66398158803221, "grad_norm": 1.1069010496139526, "learning_rate": 4.672036823935558e-05, "loss": 0.2866, "step": 339480 }, { "epoch": 97.6668584579977, "grad_norm": 0.8705726861953735, "learning_rate": 4.666283084004603e-05, "loss": 0.2469, "step": 339490 }, { "epoch": 97.66973532796318, "grad_norm": 1.0044820308685303, "learning_rate": 4.6605293440736484e-05, "loss": 0.3063, "step": 339500 }, { "epoch": 97.67261219792866, "grad_norm": 1.5965955257415771, "learning_rate": 4.6547756041426926e-05, "loss": 0.242, "step": 339510 }, { "epoch": 97.67548906789413, "grad_norm": 1.424567699432373, "learning_rate": 4.6490218642117375e-05, "loss": 0.2379, "step": 339520 }, { "epoch": 97.67836593785961, "grad_norm": 0.7996363639831543, "learning_rate": 4.643268124280783e-05, "loss": 0.2348, "step": 339530 }, { "epoch": 97.68124280782509, "grad_norm": 1.6075434684753418, "learning_rate": 4.637514384349827e-05, "loss": 0.2453, "step": 339540 }, { "epoch": 97.68411967779056, "grad_norm": 1.3953107595443726, "learning_rate": 4.631760644418873e-05, "loss": 0.2447, "step": 339550 }, { "epoch": 97.68699654775604, "grad_norm": 0.7894698977470398, "learning_rate": 4.626006904487917e-05, "loss": 0.2834, "step": 339560 }, { "epoch": 97.68987341772151, "grad_norm": 1.0465906858444214, "learning_rate": 4.6202531645569625e-05, "loss": 0.3087, "step": 339570 }, { "epoch": 97.69275028768699, "grad_norm": 1.0531660318374634, "learning_rate": 4.6144994246260074e-05, "loss": 0.294, "step": 339580 }, { "epoch": 97.69562715765247, "grad_norm": 1.2792632579803467, "learning_rate": 4.6087456846950516e-05, "loss": 0.3362, "step": 339590 }, { "epoch": 97.69850402761796, "grad_norm": 1.2848989963531494, "learning_rate": 4.602991944764097e-05, "loss": 0.2326, "step": 339600 }, { "epoch": 97.70138089758343, "grad_norm": 0.9854283928871155, "learning_rate": 4.597238204833141e-05, "loss": 0.2988, "step": 339610 }, { "epoch": 97.70425776754891, "grad_norm": 1.454365849494934, "learning_rate": 4.591484464902187e-05, "loss": 0.2675, "step": 339620 }, { "epoch": 97.70713463751439, "grad_norm": 0.9009522199630737, "learning_rate": 4.585730724971232e-05, "loss": 0.2908, "step": 339630 }, { "epoch": 97.71001150747986, "grad_norm": 0.641148567199707, "learning_rate": 4.579976985040276e-05, "loss": 0.2521, "step": 339640 }, { "epoch": 97.71288837744534, "grad_norm": 1.0758635997772217, "learning_rate": 4.5742232451093215e-05, "loss": 0.287, "step": 339650 }, { "epoch": 97.71576524741081, "grad_norm": 1.5910884141921997, "learning_rate": 4.568469505178366e-05, "loss": 0.2389, "step": 339660 }, { "epoch": 97.71864211737629, "grad_norm": 0.7738966345787048, "learning_rate": 4.562715765247411e-05, "loss": 0.3023, "step": 339670 }, { "epoch": 97.72151898734177, "grad_norm": 2.0765419006347656, "learning_rate": 4.556962025316456e-05, "loss": 0.2786, "step": 339680 }, { "epoch": 97.72439585730724, "grad_norm": 1.6032061576843262, "learning_rate": 4.5512082853855e-05, "loss": 0.2196, "step": 339690 }, { "epoch": 97.72727272727273, "grad_norm": 0.7938126921653748, "learning_rate": 4.545454545454546e-05, "loss": 0.2423, "step": 339700 }, { "epoch": 97.73014959723821, "grad_norm": 1.0471181869506836, "learning_rate": 4.53970080552359e-05, "loss": 0.2319, "step": 339710 }, { "epoch": 97.73302646720369, "grad_norm": 0.7248502969741821, "learning_rate": 4.5339470655926356e-05, "loss": 0.2628, "step": 339720 }, { "epoch": 97.73590333716916, "grad_norm": 1.082710862159729, "learning_rate": 4.5281933256616805e-05, "loss": 0.2447, "step": 339730 }, { "epoch": 97.73878020713464, "grad_norm": 1.1132760047912598, "learning_rate": 4.522439585730725e-05, "loss": 0.2439, "step": 339740 }, { "epoch": 97.74165707710011, "grad_norm": 1.565856695175171, "learning_rate": 4.51668584579977e-05, "loss": 0.2462, "step": 339750 }, { "epoch": 97.74453394706559, "grad_norm": 0.8407507538795471, "learning_rate": 4.5109321058688144e-05, "loss": 0.2959, "step": 339760 }, { "epoch": 97.74741081703107, "grad_norm": 2.5445680618286133, "learning_rate": 4.50517836593786e-05, "loss": 0.3905, "step": 339770 }, { "epoch": 97.75028768699654, "grad_norm": 0.9232814311981201, "learning_rate": 4.499424626006905e-05, "loss": 0.2373, "step": 339780 }, { "epoch": 97.75316455696202, "grad_norm": 1.4353399276733398, "learning_rate": 4.493670886075949e-05, "loss": 0.3025, "step": 339790 }, { "epoch": 97.75604142692751, "grad_norm": 0.8693227767944336, "learning_rate": 4.4879171461449946e-05, "loss": 0.2976, "step": 339800 }, { "epoch": 97.75891829689299, "grad_norm": 0.8321044445037842, "learning_rate": 4.482163406214039e-05, "loss": 0.2406, "step": 339810 }, { "epoch": 97.76179516685846, "grad_norm": 1.1622453927993774, "learning_rate": 4.476409666283084e-05, "loss": 0.2602, "step": 339820 }, { "epoch": 97.76467203682394, "grad_norm": 1.3957821130752563, "learning_rate": 4.470655926352129e-05, "loss": 0.2575, "step": 339830 }, { "epoch": 97.76754890678941, "grad_norm": 0.802854061126709, "learning_rate": 4.464902186421174e-05, "loss": 0.2363, "step": 339840 }, { "epoch": 97.77042577675489, "grad_norm": 1.5611027479171753, "learning_rate": 4.459148446490219e-05, "loss": 0.276, "step": 339850 }, { "epoch": 97.77330264672037, "grad_norm": 1.1713749170303345, "learning_rate": 4.453394706559263e-05, "loss": 0.2963, "step": 339860 }, { "epoch": 97.77617951668584, "grad_norm": 1.0806866884231567, "learning_rate": 4.447640966628309e-05, "loss": 0.2207, "step": 339870 }, { "epoch": 97.77905638665132, "grad_norm": 1.2238439321517944, "learning_rate": 4.4418872266973536e-05, "loss": 0.2253, "step": 339880 }, { "epoch": 97.7819332566168, "grad_norm": 0.9187849164009094, "learning_rate": 4.4361334867663984e-05, "loss": 0.2328, "step": 339890 }, { "epoch": 97.78481012658227, "grad_norm": 1.1702156066894531, "learning_rate": 4.430379746835443e-05, "loss": 0.2368, "step": 339900 }, { "epoch": 97.78768699654776, "grad_norm": 0.8381474018096924, "learning_rate": 4.4246260069044875e-05, "loss": 0.2438, "step": 339910 }, { "epoch": 97.79056386651324, "grad_norm": 0.5857372879981995, "learning_rate": 4.418872266973533e-05, "loss": 0.2373, "step": 339920 }, { "epoch": 97.79344073647871, "grad_norm": 0.9879949688911438, "learning_rate": 4.413118527042578e-05, "loss": 0.2861, "step": 339930 }, { "epoch": 97.79631760644419, "grad_norm": 1.2610247135162354, "learning_rate": 4.407364787111623e-05, "loss": 0.2147, "step": 339940 }, { "epoch": 97.79919447640967, "grad_norm": 1.4284690618515015, "learning_rate": 4.401611047180668e-05, "loss": 0.2433, "step": 339950 }, { "epoch": 97.80207134637514, "grad_norm": 0.7871990203857422, "learning_rate": 4.395857307249712e-05, "loss": 0.2585, "step": 339960 }, { "epoch": 97.80494821634062, "grad_norm": 1.167900562286377, "learning_rate": 4.3901035673187574e-05, "loss": 0.2631, "step": 339970 }, { "epoch": 97.8078250863061, "grad_norm": 1.0913087129592896, "learning_rate": 4.384349827387802e-05, "loss": 0.2224, "step": 339980 }, { "epoch": 97.81070195627157, "grad_norm": 1.2930989265441895, "learning_rate": 4.378596087456847e-05, "loss": 0.2917, "step": 339990 }, { "epoch": 97.81357882623705, "grad_norm": 3.391737461090088, "learning_rate": 4.372842347525892e-05, "loss": 0.273, "step": 340000 }, { "epoch": 97.81645569620254, "grad_norm": 1.5720213651657104, "learning_rate": 4.367088607594936e-05, "loss": 0.2839, "step": 340010 }, { "epoch": 97.81933256616801, "grad_norm": 0.7578510046005249, "learning_rate": 4.361334867663982e-05, "loss": 0.2546, "step": 340020 }, { "epoch": 97.82220943613349, "grad_norm": 0.8551876544952393, "learning_rate": 4.3555811277330267e-05, "loss": 0.2686, "step": 340030 }, { "epoch": 97.82508630609897, "grad_norm": 1.6887677907943726, "learning_rate": 4.3498273878020715e-05, "loss": 0.3465, "step": 340040 }, { "epoch": 97.82796317606444, "grad_norm": 1.6243871450424194, "learning_rate": 4.3440736478711164e-05, "loss": 0.2795, "step": 340050 }, { "epoch": 97.83084004602992, "grad_norm": 0.631308376789093, "learning_rate": 4.3383199079401606e-05, "loss": 0.272, "step": 340060 }, { "epoch": 97.8337169159954, "grad_norm": 0.65153968334198, "learning_rate": 4.332566168009206e-05, "loss": 0.2344, "step": 340070 }, { "epoch": 97.83659378596087, "grad_norm": 1.0171113014221191, "learning_rate": 4.326812428078251e-05, "loss": 0.2707, "step": 340080 }, { "epoch": 97.83947065592635, "grad_norm": 1.3013601303100586, "learning_rate": 4.321058688147296e-05, "loss": 0.2509, "step": 340090 }, { "epoch": 97.84234752589182, "grad_norm": 1.255751371383667, "learning_rate": 4.315304948216341e-05, "loss": 0.2733, "step": 340100 }, { "epoch": 97.8452243958573, "grad_norm": 0.9087263345718384, "learning_rate": 4.3095512082853856e-05, "loss": 0.2254, "step": 340110 }, { "epoch": 97.84810126582279, "grad_norm": 1.2013428211212158, "learning_rate": 4.3037974683544305e-05, "loss": 0.2276, "step": 340120 }, { "epoch": 97.85097813578827, "grad_norm": 1.0761388540267944, "learning_rate": 4.2980437284234754e-05, "loss": 0.2539, "step": 340130 }, { "epoch": 97.85385500575374, "grad_norm": 0.7360619902610779, "learning_rate": 4.29228998849252e-05, "loss": 0.2402, "step": 340140 }, { "epoch": 97.85673187571922, "grad_norm": 0.8351040482521057, "learning_rate": 4.286536248561565e-05, "loss": 0.3581, "step": 340150 }, { "epoch": 97.8596087456847, "grad_norm": 0.7319381833076477, "learning_rate": 4.28078250863061e-05, "loss": 0.2054, "step": 340160 }, { "epoch": 97.86248561565017, "grad_norm": 0.7422205209732056, "learning_rate": 4.275028768699655e-05, "loss": 0.2901, "step": 340170 }, { "epoch": 97.86536248561565, "grad_norm": 1.2757760286331177, "learning_rate": 4.2692750287687e-05, "loss": 0.3104, "step": 340180 }, { "epoch": 97.86823935558112, "grad_norm": 1.5824415683746338, "learning_rate": 4.2635212888377446e-05, "loss": 0.3001, "step": 340190 }, { "epoch": 97.8711162255466, "grad_norm": 0.7672368288040161, "learning_rate": 4.2577675489067895e-05, "loss": 0.3032, "step": 340200 }, { "epoch": 97.87399309551208, "grad_norm": 0.801538348197937, "learning_rate": 4.2520138089758344e-05, "loss": 0.2361, "step": 340210 }, { "epoch": 97.87686996547757, "grad_norm": 1.0883914232254028, "learning_rate": 4.246260069044879e-05, "loss": 0.2629, "step": 340220 }, { "epoch": 97.87974683544304, "grad_norm": 2.3020448684692383, "learning_rate": 4.240506329113924e-05, "loss": 0.324, "step": 340230 }, { "epoch": 97.88262370540852, "grad_norm": 1.1457120180130005, "learning_rate": 4.234752589182969e-05, "loss": 0.2799, "step": 340240 }, { "epoch": 97.885500575374, "grad_norm": 0.7114917039871216, "learning_rate": 4.228998849252014e-05, "loss": 0.3013, "step": 340250 }, { "epoch": 97.88837744533947, "grad_norm": 0.9035434722900391, "learning_rate": 4.223245109321059e-05, "loss": 0.2883, "step": 340260 }, { "epoch": 97.89125431530495, "grad_norm": 0.7574723362922668, "learning_rate": 4.2174913693901036e-05, "loss": 0.2421, "step": 340270 }, { "epoch": 97.89413118527042, "grad_norm": 1.0060367584228516, "learning_rate": 4.2117376294591485e-05, "loss": 0.2597, "step": 340280 }, { "epoch": 97.8970080552359, "grad_norm": 1.21909499168396, "learning_rate": 4.2059838895281933e-05, "loss": 0.2223, "step": 340290 }, { "epoch": 97.89988492520138, "grad_norm": 1.3901748657226562, "learning_rate": 4.200230149597238e-05, "loss": 0.2248, "step": 340300 }, { "epoch": 97.90276179516685, "grad_norm": 0.8233561515808105, "learning_rate": 4.194476409666283e-05, "loss": 0.2376, "step": 340310 }, { "epoch": 97.90563866513233, "grad_norm": 0.7563440203666687, "learning_rate": 4.188722669735328e-05, "loss": 0.2461, "step": 340320 }, { "epoch": 97.90851553509782, "grad_norm": 1.37386155128479, "learning_rate": 4.1829689298043735e-05, "loss": 0.2587, "step": 340330 }, { "epoch": 97.9113924050633, "grad_norm": 1.1060898303985596, "learning_rate": 4.177215189873418e-05, "loss": 0.2695, "step": 340340 }, { "epoch": 97.91426927502877, "grad_norm": 1.1913620233535767, "learning_rate": 4.1714614499424626e-05, "loss": 0.2377, "step": 340350 }, { "epoch": 97.91714614499425, "grad_norm": 1.5420207977294922, "learning_rate": 4.1657077100115075e-05, "loss": 0.23, "step": 340360 }, { "epoch": 97.92002301495972, "grad_norm": 0.966530978679657, "learning_rate": 4.159953970080552e-05, "loss": 0.2099, "step": 340370 }, { "epoch": 97.9228998849252, "grad_norm": 1.24337899684906, "learning_rate": 4.154200230149598e-05, "loss": 0.2286, "step": 340380 }, { "epoch": 97.92577675489068, "grad_norm": 1.2845523357391357, "learning_rate": 4.148446490218642e-05, "loss": 0.2482, "step": 340390 }, { "epoch": 97.92865362485615, "grad_norm": 1.4492462873458862, "learning_rate": 4.142692750287687e-05, "loss": 0.2417, "step": 340400 }, { "epoch": 97.93153049482163, "grad_norm": 1.6255820989608765, "learning_rate": 4.136939010356732e-05, "loss": 0.2577, "step": 340410 }, { "epoch": 97.9344073647871, "grad_norm": 0.8362924456596375, "learning_rate": 4.131185270425777e-05, "loss": 0.2107, "step": 340420 }, { "epoch": 97.9372842347526, "grad_norm": 2.2037103176116943, "learning_rate": 4.125431530494822e-05, "loss": 0.2643, "step": 340430 }, { "epoch": 97.94016110471807, "grad_norm": 0.9262727499008179, "learning_rate": 4.1196777905638664e-05, "loss": 0.2426, "step": 340440 }, { "epoch": 97.94303797468355, "grad_norm": 0.8494892716407776, "learning_rate": 4.113924050632911e-05, "loss": 0.2826, "step": 340450 }, { "epoch": 97.94591484464902, "grad_norm": 1.439743161201477, "learning_rate": 4.108170310701956e-05, "loss": 0.2312, "step": 340460 }, { "epoch": 97.9487917146145, "grad_norm": 1.0493592023849487, "learning_rate": 4.102416570771001e-05, "loss": 0.258, "step": 340470 }, { "epoch": 97.95166858457998, "grad_norm": 1.1304008960723877, "learning_rate": 4.0966628308400466e-05, "loss": 0.2369, "step": 340480 }, { "epoch": 97.95454545454545, "grad_norm": 2.125258684158325, "learning_rate": 4.090909090909091e-05, "loss": 0.31, "step": 340490 }, { "epoch": 97.95742232451093, "grad_norm": 1.5602400302886963, "learning_rate": 4.085155350978136e-05, "loss": 0.2719, "step": 340500 }, { "epoch": 97.9602991944764, "grad_norm": 0.8554592132568359, "learning_rate": 4.0794016110471805e-05, "loss": 0.3045, "step": 340510 }, { "epoch": 97.96317606444188, "grad_norm": 0.5851342678070068, "learning_rate": 4.0736478711162254e-05, "loss": 0.2449, "step": 340520 }, { "epoch": 97.96605293440736, "grad_norm": 0.7006160020828247, "learning_rate": 4.067894131185271e-05, "loss": 0.2344, "step": 340530 }, { "epoch": 97.96892980437285, "grad_norm": 1.0098731517791748, "learning_rate": 4.062140391254315e-05, "loss": 0.2997, "step": 340540 }, { "epoch": 97.97180667433832, "grad_norm": 1.3647717237472534, "learning_rate": 4.05638665132336e-05, "loss": 0.2468, "step": 340550 }, { "epoch": 97.9746835443038, "grad_norm": 2.0409483909606934, "learning_rate": 4.050632911392405e-05, "loss": 0.2799, "step": 340560 }, { "epoch": 97.97756041426928, "grad_norm": 1.3158197402954102, "learning_rate": 4.04487917146145e-05, "loss": 0.2564, "step": 340570 }, { "epoch": 97.98043728423475, "grad_norm": 1.0227688550949097, "learning_rate": 4.039125431530495e-05, "loss": 0.2449, "step": 340580 }, { "epoch": 97.98331415420023, "grad_norm": 1.7957606315612793, "learning_rate": 4.0333716915995395e-05, "loss": 0.2718, "step": 340590 }, { "epoch": 97.9861910241657, "grad_norm": 1.0292179584503174, "learning_rate": 4.027617951668585e-05, "loss": 0.2242, "step": 340600 }, { "epoch": 97.98906789413118, "grad_norm": 1.1985249519348145, "learning_rate": 4.021864211737629e-05, "loss": 0.2881, "step": 340610 }, { "epoch": 97.99194476409666, "grad_norm": 1.1688627004623413, "learning_rate": 4.016110471806674e-05, "loss": 0.2671, "step": 340620 }, { "epoch": 97.99482163406213, "grad_norm": 1.0606824159622192, "learning_rate": 4.01035673187572e-05, "loss": 0.2323, "step": 340630 }, { "epoch": 97.99769850402762, "grad_norm": 0.7825783491134644, "learning_rate": 4.004602991944764e-05, "loss": 0.2935, "step": 340640 }, { "epoch": 98.0005753739931, "grad_norm": 1.0480685234069824, "learning_rate": 3.9988492520138094e-05, "loss": 0.2327, "step": 340650 }, { "epoch": 98.00345224395858, "grad_norm": 2.229525566101074, "learning_rate": 3.9930955120828536e-05, "loss": 0.2535, "step": 340660 }, { "epoch": 98.00632911392405, "grad_norm": 1.0942449569702148, "learning_rate": 3.9873417721518985e-05, "loss": 0.3717, "step": 340670 }, { "epoch": 98.00920598388953, "grad_norm": 0.8562994003295898, "learning_rate": 3.981588032220944e-05, "loss": 0.3148, "step": 340680 }, { "epoch": 98.012082853855, "grad_norm": 0.8172922134399414, "learning_rate": 3.975834292289988e-05, "loss": 0.2425, "step": 340690 }, { "epoch": 98.01495972382048, "grad_norm": 1.1134217977523804, "learning_rate": 3.970080552359034e-05, "loss": 0.2424, "step": 340700 }, { "epoch": 98.01783659378596, "grad_norm": 0.779384434223175, "learning_rate": 3.964326812428078e-05, "loss": 0.2595, "step": 340710 }, { "epoch": 98.02071346375143, "grad_norm": 0.5351538062095642, "learning_rate": 3.958573072497123e-05, "loss": 0.2092, "step": 340720 }, { "epoch": 98.02359033371691, "grad_norm": 1.160595178604126, "learning_rate": 3.9528193325661684e-05, "loss": 0.2055, "step": 340730 }, { "epoch": 98.02646720368239, "grad_norm": 0.6762804388999939, "learning_rate": 3.9470655926352126e-05, "loss": 0.2115, "step": 340740 }, { "epoch": 98.02934407364788, "grad_norm": 0.5890066623687744, "learning_rate": 3.941311852704258e-05, "loss": 0.2425, "step": 340750 }, { "epoch": 98.03222094361335, "grad_norm": 0.9324814081192017, "learning_rate": 3.9355581127733024e-05, "loss": 0.2971, "step": 340760 }, { "epoch": 98.03509781357883, "grad_norm": 1.0637699365615845, "learning_rate": 3.929804372842347e-05, "loss": 0.251, "step": 340770 }, { "epoch": 98.0379746835443, "grad_norm": 0.9737790822982788, "learning_rate": 3.924050632911393e-05, "loss": 0.2086, "step": 340780 }, { "epoch": 98.04085155350978, "grad_norm": 1.4257131814956665, "learning_rate": 3.918296892980437e-05, "loss": 0.2413, "step": 340790 }, { "epoch": 98.04372842347526, "grad_norm": 1.219024658203125, "learning_rate": 3.9125431530494825e-05, "loss": 0.243, "step": 340800 }, { "epoch": 98.04660529344073, "grad_norm": 0.8617724776268005, "learning_rate": 3.9067894131185274e-05, "loss": 0.2737, "step": 340810 }, { "epoch": 98.04948216340621, "grad_norm": 1.0625861883163452, "learning_rate": 3.9010356731875716e-05, "loss": 0.262, "step": 340820 }, { "epoch": 98.05235903337169, "grad_norm": 1.4049851894378662, "learning_rate": 3.895281933256617e-05, "loss": 0.2399, "step": 340830 }, { "epoch": 98.05523590333716, "grad_norm": 1.4685535430908203, "learning_rate": 3.8895281933256613e-05, "loss": 0.2389, "step": 340840 }, { "epoch": 98.05811277330265, "grad_norm": 0.8031576871871948, "learning_rate": 3.883774453394707e-05, "loss": 0.2434, "step": 340850 }, { "epoch": 98.06098964326813, "grad_norm": 1.1836813688278198, "learning_rate": 3.878020713463752e-05, "loss": 0.2893, "step": 340860 }, { "epoch": 98.0638665132336, "grad_norm": 0.8999091386795044, "learning_rate": 3.8722669735327966e-05, "loss": 0.3007, "step": 340870 }, { "epoch": 98.06674338319908, "grad_norm": 0.6976573467254639, "learning_rate": 3.8665132336018415e-05, "loss": 0.2232, "step": 340880 }, { "epoch": 98.06962025316456, "grad_norm": 0.9952710866928101, "learning_rate": 3.860759493670886e-05, "loss": 0.2365, "step": 340890 }, { "epoch": 98.07249712313003, "grad_norm": 0.7296663522720337, "learning_rate": 3.855005753739931e-05, "loss": 0.2732, "step": 340900 }, { "epoch": 98.07537399309551, "grad_norm": 0.9010481834411621, "learning_rate": 3.849252013808976e-05, "loss": 0.2479, "step": 340910 }, { "epoch": 98.07825086306099, "grad_norm": 1.114309549331665, "learning_rate": 3.843498273878021e-05, "loss": 0.3091, "step": 340920 }, { "epoch": 98.08112773302646, "grad_norm": 1.0809892416000366, "learning_rate": 3.837744533947066e-05, "loss": 0.2198, "step": 340930 }, { "epoch": 98.08400460299194, "grad_norm": 1.2180839776992798, "learning_rate": 3.83199079401611e-05, "loss": 0.2592, "step": 340940 }, { "epoch": 98.08688147295742, "grad_norm": 0.6600455045700073, "learning_rate": 3.8262370540851556e-05, "loss": 0.2235, "step": 340950 }, { "epoch": 98.0897583429229, "grad_norm": 1.5532348155975342, "learning_rate": 3.8204833141542005e-05, "loss": 0.2557, "step": 340960 }, { "epoch": 98.09263521288838, "grad_norm": 1.4521230459213257, "learning_rate": 3.8147295742232454e-05, "loss": 0.269, "step": 340970 }, { "epoch": 98.09551208285386, "grad_norm": 1.3555067777633667, "learning_rate": 3.80897583429229e-05, "loss": 0.2419, "step": 340980 }, { "epoch": 98.09838895281933, "grad_norm": 1.5922331809997559, "learning_rate": 3.8032220943613344e-05, "loss": 0.3178, "step": 340990 }, { "epoch": 98.10126582278481, "grad_norm": 0.8881219625473022, "learning_rate": 3.79746835443038e-05, "loss": 0.2756, "step": 341000 }, { "epoch": 98.10414269275029, "grad_norm": 0.6121795177459717, "learning_rate": 3.791714614499425e-05, "loss": 0.2938, "step": 341010 }, { "epoch": 98.10701956271576, "grad_norm": 1.2102808952331543, "learning_rate": 3.78596087456847e-05, "loss": 0.2482, "step": 341020 }, { "epoch": 98.10989643268124, "grad_norm": 1.0717650651931763, "learning_rate": 3.7802071346375146e-05, "loss": 0.2529, "step": 341030 }, { "epoch": 98.11277330264672, "grad_norm": 1.072168231010437, "learning_rate": 3.774453394706559e-05, "loss": 0.2727, "step": 341040 }, { "epoch": 98.11565017261219, "grad_norm": 1.0908244848251343, "learning_rate": 3.7686996547756043e-05, "loss": 0.261, "step": 341050 }, { "epoch": 98.11852704257768, "grad_norm": 1.5506311655044556, "learning_rate": 3.762945914844649e-05, "loss": 0.2565, "step": 341060 }, { "epoch": 98.12140391254316, "grad_norm": 0.9232326149940491, "learning_rate": 3.757192174913694e-05, "loss": 0.2109, "step": 341070 }, { "epoch": 98.12428078250863, "grad_norm": 1.8165414333343506, "learning_rate": 3.751438434982739e-05, "loss": 0.2436, "step": 341080 }, { "epoch": 98.12715765247411, "grad_norm": 1.4937307834625244, "learning_rate": 3.745684695051784e-05, "loss": 0.2547, "step": 341090 }, { "epoch": 98.13003452243959, "grad_norm": 1.5074220895767212, "learning_rate": 3.739930955120829e-05, "loss": 0.2323, "step": 341100 }, { "epoch": 98.13291139240506, "grad_norm": 1.099658489227295, "learning_rate": 3.7341772151898736e-05, "loss": 0.3192, "step": 341110 }, { "epoch": 98.13578826237054, "grad_norm": 0.9675432443618774, "learning_rate": 3.7284234752589185e-05, "loss": 0.2538, "step": 341120 }, { "epoch": 98.13866513233602, "grad_norm": 1.131332516670227, "learning_rate": 3.722669735327963e-05, "loss": 0.2357, "step": 341130 }, { "epoch": 98.14154200230149, "grad_norm": 0.6212317943572998, "learning_rate": 3.716915995397008e-05, "loss": 0.2929, "step": 341140 }, { "epoch": 98.14441887226697, "grad_norm": 1.8944793939590454, "learning_rate": 3.711162255466053e-05, "loss": 0.2183, "step": 341150 }, { "epoch": 98.14729574223244, "grad_norm": 1.1139558553695679, "learning_rate": 3.705408515535098e-05, "loss": 0.2588, "step": 341160 }, { "epoch": 98.15017261219793, "grad_norm": 1.76997971534729, "learning_rate": 3.699654775604143e-05, "loss": 0.2094, "step": 341170 }, { "epoch": 98.15304948216341, "grad_norm": 0.9746003746986389, "learning_rate": 3.693901035673188e-05, "loss": 0.213, "step": 341180 }, { "epoch": 98.15592635212889, "grad_norm": 1.1224738359451294, "learning_rate": 3.6881472957422326e-05, "loss": 0.3091, "step": 341190 }, { "epoch": 98.15880322209436, "grad_norm": 0.8711864352226257, "learning_rate": 3.6823935558112774e-05, "loss": 0.2241, "step": 341200 }, { "epoch": 98.16168009205984, "grad_norm": 1.9158238172531128, "learning_rate": 3.676639815880322e-05, "loss": 0.3033, "step": 341210 }, { "epoch": 98.16455696202532, "grad_norm": 1.7678680419921875, "learning_rate": 3.670886075949367e-05, "loss": 0.2778, "step": 341220 }, { "epoch": 98.16743383199079, "grad_norm": 0.7168439626693726, "learning_rate": 3.665132336018412e-05, "loss": 0.1827, "step": 341230 }, { "epoch": 98.17031070195627, "grad_norm": 1.178845763206482, "learning_rate": 3.659378596087457e-05, "loss": 0.1955, "step": 341240 }, { "epoch": 98.17318757192174, "grad_norm": 0.6778704524040222, "learning_rate": 3.653624856156502e-05, "loss": 0.1864, "step": 341250 }, { "epoch": 98.17606444188722, "grad_norm": 1.0503355264663696, "learning_rate": 3.647871116225547e-05, "loss": 0.2774, "step": 341260 }, { "epoch": 98.17894131185271, "grad_norm": 1.0488660335540771, "learning_rate": 3.6421173762945915e-05, "loss": 0.2175, "step": 341270 }, { "epoch": 98.18181818181819, "grad_norm": 0.9313967823982239, "learning_rate": 3.6363636363636364e-05, "loss": 0.2902, "step": 341280 }, { "epoch": 98.18469505178366, "grad_norm": 0.9883120656013489, "learning_rate": 3.630609896432681e-05, "loss": 0.244, "step": 341290 }, { "epoch": 98.18757192174914, "grad_norm": 1.1268941164016724, "learning_rate": 3.624856156501726e-05, "loss": 0.256, "step": 341300 }, { "epoch": 98.19044879171462, "grad_norm": 0.7521207928657532, "learning_rate": 3.619102416570771e-05, "loss": 0.2512, "step": 341310 }, { "epoch": 98.19332566168009, "grad_norm": 1.392680287361145, "learning_rate": 3.613348676639816e-05, "loss": 0.24, "step": 341320 }, { "epoch": 98.19620253164557, "grad_norm": 0.5308014750480652, "learning_rate": 3.607594936708861e-05, "loss": 0.2662, "step": 341330 }, { "epoch": 98.19907940161104, "grad_norm": 0.8151541948318481, "learning_rate": 3.6018411967779057e-05, "loss": 0.243, "step": 341340 }, { "epoch": 98.20195627157652, "grad_norm": 0.9022033214569092, "learning_rate": 3.5960874568469505e-05, "loss": 0.2526, "step": 341350 }, { "epoch": 98.204833141542, "grad_norm": 2.2393569946289062, "learning_rate": 3.590333716915996e-05, "loss": 0.276, "step": 341360 }, { "epoch": 98.20771001150747, "grad_norm": 0.9620497226715088, "learning_rate": 3.58457997698504e-05, "loss": 0.214, "step": 341370 }, { "epoch": 98.21058688147296, "grad_norm": 3.287879467010498, "learning_rate": 3.578826237054085e-05, "loss": 0.2935, "step": 341380 }, { "epoch": 98.21346375143844, "grad_norm": 1.6858830451965332, "learning_rate": 3.57307249712313e-05, "loss": 0.2623, "step": 341390 }, { "epoch": 98.21634062140392, "grad_norm": 1.3871628046035767, "learning_rate": 3.567318757192175e-05, "loss": 0.194, "step": 341400 }, { "epoch": 98.21921749136939, "grad_norm": 1.1465221643447876, "learning_rate": 3.5615650172612204e-05, "loss": 0.2482, "step": 341410 }, { "epoch": 98.22209436133487, "grad_norm": 1.6640974283218384, "learning_rate": 3.5558112773302646e-05, "loss": 0.2952, "step": 341420 }, { "epoch": 98.22497123130034, "grad_norm": 1.1606398820877075, "learning_rate": 3.5500575373993095e-05, "loss": 0.2612, "step": 341430 }, { "epoch": 98.22784810126582, "grad_norm": 0.866303563117981, "learning_rate": 3.5443037974683544e-05, "loss": 0.2488, "step": 341440 }, { "epoch": 98.2307249712313, "grad_norm": 0.8192406892776489, "learning_rate": 3.538550057537399e-05, "loss": 0.2608, "step": 341450 }, { "epoch": 98.23360184119677, "grad_norm": 1.5858805179595947, "learning_rate": 3.532796317606445e-05, "loss": 0.2505, "step": 341460 }, { "epoch": 98.23647871116225, "grad_norm": 0.8661216497421265, "learning_rate": 3.527042577675489e-05, "loss": 0.2457, "step": 341470 }, { "epoch": 98.23935558112774, "grad_norm": 0.8422536849975586, "learning_rate": 3.521288837744534e-05, "loss": 0.2762, "step": 341480 }, { "epoch": 98.24223245109322, "grad_norm": 0.8049980401992798, "learning_rate": 3.515535097813579e-05, "loss": 0.3075, "step": 341490 }, { "epoch": 98.24510932105869, "grad_norm": 1.032362461090088, "learning_rate": 3.5097813578826236e-05, "loss": 0.2895, "step": 341500 }, { "epoch": 98.24798619102417, "grad_norm": 1.0123721361160278, "learning_rate": 3.504027617951669e-05, "loss": 0.2816, "step": 341510 }, { "epoch": 98.25086306098964, "grad_norm": 1.4900492429733276, "learning_rate": 3.4982738780207134e-05, "loss": 0.2769, "step": 341520 }, { "epoch": 98.25373993095512, "grad_norm": 0.716967761516571, "learning_rate": 3.492520138089758e-05, "loss": 0.2177, "step": 341530 }, { "epoch": 98.2566168009206, "grad_norm": 1.99535071849823, "learning_rate": 3.486766398158803e-05, "loss": 0.2479, "step": 341540 }, { "epoch": 98.25949367088607, "grad_norm": 1.4729846715927124, "learning_rate": 3.481012658227848e-05, "loss": 0.2066, "step": 341550 }, { "epoch": 98.26237054085155, "grad_norm": 1.319655179977417, "learning_rate": 3.4752589182968935e-05, "loss": 0.2712, "step": 341560 }, { "epoch": 98.26524741081703, "grad_norm": 0.9835307598114014, "learning_rate": 3.469505178365938e-05, "loss": 0.233, "step": 341570 }, { "epoch": 98.2681242807825, "grad_norm": 1.0853056907653809, "learning_rate": 3.4637514384349826e-05, "loss": 0.2326, "step": 341580 }, { "epoch": 98.27100115074799, "grad_norm": 1.0747182369232178, "learning_rate": 3.4579976985040275e-05, "loss": 0.241, "step": 341590 }, { "epoch": 98.27387802071347, "grad_norm": 0.847117006778717, "learning_rate": 3.4522439585730723e-05, "loss": 0.2511, "step": 341600 }, { "epoch": 98.27675489067894, "grad_norm": 1.2468271255493164, "learning_rate": 3.446490218642118e-05, "loss": 0.2899, "step": 341610 }, { "epoch": 98.27963176064442, "grad_norm": 0.9104768633842468, "learning_rate": 3.440736478711162e-05, "loss": 0.2575, "step": 341620 }, { "epoch": 98.2825086306099, "grad_norm": 0.8842414617538452, "learning_rate": 3.4349827387802076e-05, "loss": 0.2856, "step": 341630 }, { "epoch": 98.28538550057537, "grad_norm": 1.2012476921081543, "learning_rate": 3.429228998849252e-05, "loss": 0.25, "step": 341640 }, { "epoch": 98.28826237054085, "grad_norm": 1.3703374862670898, "learning_rate": 3.423475258918297e-05, "loss": 0.2349, "step": 341650 }, { "epoch": 98.29113924050633, "grad_norm": 1.479710340499878, "learning_rate": 3.417721518987342e-05, "loss": 0.3314, "step": 341660 }, { "epoch": 98.2940161104718, "grad_norm": 0.7791826725006104, "learning_rate": 3.4119677790563864e-05, "loss": 0.2343, "step": 341670 }, { "epoch": 98.29689298043728, "grad_norm": 1.7275077104568481, "learning_rate": 3.406214039125432e-05, "loss": 0.2441, "step": 341680 }, { "epoch": 98.29976985040277, "grad_norm": 1.3443241119384766, "learning_rate": 3.400460299194476e-05, "loss": 0.2651, "step": 341690 }, { "epoch": 98.30264672036824, "grad_norm": 1.332924723625183, "learning_rate": 3.394706559263521e-05, "loss": 0.2649, "step": 341700 }, { "epoch": 98.30552359033372, "grad_norm": 0.982814610004425, "learning_rate": 3.3889528193325666e-05, "loss": 0.2186, "step": 341710 }, { "epoch": 98.3084004602992, "grad_norm": 1.0699743032455444, "learning_rate": 3.383199079401611e-05, "loss": 0.3046, "step": 341720 }, { "epoch": 98.31127733026467, "grad_norm": 1.5075485706329346, "learning_rate": 3.3774453394706564e-05, "loss": 0.198, "step": 341730 }, { "epoch": 98.31415420023015, "grad_norm": 0.8581148386001587, "learning_rate": 3.3716915995397006e-05, "loss": 0.2717, "step": 341740 }, { "epoch": 98.31703107019563, "grad_norm": 2.084408760070801, "learning_rate": 3.3659378596087454e-05, "loss": 0.242, "step": 341750 }, { "epoch": 98.3199079401611, "grad_norm": 1.2800085544586182, "learning_rate": 3.360184119677791e-05, "loss": 0.2388, "step": 341760 }, { "epoch": 98.32278481012658, "grad_norm": 0.8184646964073181, "learning_rate": 3.354430379746835e-05, "loss": 0.2475, "step": 341770 }, { "epoch": 98.32566168009205, "grad_norm": 1.5132004022598267, "learning_rate": 3.348676639815881e-05, "loss": 0.2853, "step": 341780 }, { "epoch": 98.32853855005754, "grad_norm": 1.5275516510009766, "learning_rate": 3.342922899884925e-05, "loss": 0.263, "step": 341790 }, { "epoch": 98.33141542002302, "grad_norm": 0.8975980877876282, "learning_rate": 3.33716915995397e-05, "loss": 0.2451, "step": 341800 }, { "epoch": 98.3342922899885, "grad_norm": 0.5117254257202148, "learning_rate": 3.3314154200230153e-05, "loss": 0.277, "step": 341810 }, { "epoch": 98.33716915995397, "grad_norm": 1.1308369636535645, "learning_rate": 3.3256616800920595e-05, "loss": 0.2655, "step": 341820 }, { "epoch": 98.34004602991945, "grad_norm": 1.7764819860458374, "learning_rate": 3.319907940161105e-05, "loss": 0.297, "step": 341830 }, { "epoch": 98.34292289988493, "grad_norm": 1.369991421699524, "learning_rate": 3.314154200230149e-05, "loss": 0.2233, "step": 341840 }, { "epoch": 98.3457997698504, "grad_norm": 1.5524652004241943, "learning_rate": 3.308400460299195e-05, "loss": 0.2508, "step": 341850 }, { "epoch": 98.34867663981588, "grad_norm": 1.08164644241333, "learning_rate": 3.30264672036824e-05, "loss": 0.2649, "step": 341860 }, { "epoch": 98.35155350978135, "grad_norm": 1.0760570764541626, "learning_rate": 3.296892980437284e-05, "loss": 0.2491, "step": 341870 }, { "epoch": 98.35443037974683, "grad_norm": 0.7548704743385315, "learning_rate": 3.2911392405063295e-05, "loss": 0.1967, "step": 341880 }, { "epoch": 98.3573072497123, "grad_norm": 0.9481642246246338, "learning_rate": 3.2853855005753736e-05, "loss": 0.2709, "step": 341890 }, { "epoch": 98.3601841196778, "grad_norm": 1.6552422046661377, "learning_rate": 3.279631760644419e-05, "loss": 0.2526, "step": 341900 }, { "epoch": 98.36306098964327, "grad_norm": 1.6849477291107178, "learning_rate": 3.273878020713464e-05, "loss": 0.2616, "step": 341910 }, { "epoch": 98.36593785960875, "grad_norm": 1.7064063549041748, "learning_rate": 3.268124280782508e-05, "loss": 0.252, "step": 341920 }, { "epoch": 98.36881472957423, "grad_norm": 1.2420079708099365, "learning_rate": 3.262370540851554e-05, "loss": 0.276, "step": 341930 }, { "epoch": 98.3716915995397, "grad_norm": 0.9918617010116577, "learning_rate": 3.256616800920598e-05, "loss": 0.2281, "step": 341940 }, { "epoch": 98.37456846950518, "grad_norm": 0.4688335955142975, "learning_rate": 3.2508630609896436e-05, "loss": 0.2133, "step": 341950 }, { "epoch": 98.37744533947065, "grad_norm": 1.6992733478546143, "learning_rate": 3.2451093210586884e-05, "loss": 0.258, "step": 341960 }, { "epoch": 98.38032220943613, "grad_norm": 1.055674433708191, "learning_rate": 3.2393555811277326e-05, "loss": 0.2579, "step": 341970 }, { "epoch": 98.3831990794016, "grad_norm": 1.7245054244995117, "learning_rate": 3.233601841196778e-05, "loss": 0.3127, "step": 341980 }, { "epoch": 98.38607594936708, "grad_norm": 1.1549530029296875, "learning_rate": 3.2278481012658224e-05, "loss": 0.2434, "step": 341990 }, { "epoch": 98.38895281933257, "grad_norm": 1.129339337348938, "learning_rate": 3.222094361334868e-05, "loss": 0.201, "step": 342000 }, { "epoch": 98.39182968929805, "grad_norm": 0.9057912230491638, "learning_rate": 3.216340621403913e-05, "loss": 0.2315, "step": 342010 }, { "epoch": 98.39470655926353, "grad_norm": 0.826156497001648, "learning_rate": 3.210586881472957e-05, "loss": 0.2513, "step": 342020 }, { "epoch": 98.397583429229, "grad_norm": 1.4396764039993286, "learning_rate": 3.2048331415420025e-05, "loss": 0.2225, "step": 342030 }, { "epoch": 98.40046029919448, "grad_norm": 1.2257362604141235, "learning_rate": 3.199079401611047e-05, "loss": 0.2212, "step": 342040 }, { "epoch": 98.40333716915995, "grad_norm": 1.471511960029602, "learning_rate": 3.193325661680092e-05, "loss": 0.2798, "step": 342050 }, { "epoch": 98.40621403912543, "grad_norm": 0.7645639777183533, "learning_rate": 3.187571921749137e-05, "loss": 0.2143, "step": 342060 }, { "epoch": 98.4090909090909, "grad_norm": 1.459030270576477, "learning_rate": 3.1818181818181814e-05, "loss": 0.2636, "step": 342070 }, { "epoch": 98.41196777905638, "grad_norm": 0.9240390062332153, "learning_rate": 3.176064441887227e-05, "loss": 0.2109, "step": 342080 }, { "epoch": 98.41484464902186, "grad_norm": 0.943461537361145, "learning_rate": 3.170310701956272e-05, "loss": 0.2266, "step": 342090 }, { "epoch": 98.41772151898734, "grad_norm": 0.6276825666427612, "learning_rate": 3.1645569620253167e-05, "loss": 0.2719, "step": 342100 }, { "epoch": 98.42059838895283, "grad_norm": 1.3407922983169556, "learning_rate": 3.1588032220943615e-05, "loss": 0.1737, "step": 342110 }, { "epoch": 98.4234752589183, "grad_norm": 1.3496497869491577, "learning_rate": 3.1530494821634064e-05, "loss": 0.267, "step": 342120 }, { "epoch": 98.42635212888378, "grad_norm": 1.214255928993225, "learning_rate": 3.147295742232451e-05, "loss": 0.1845, "step": 342130 }, { "epoch": 98.42922899884925, "grad_norm": 0.9546146392822266, "learning_rate": 3.141542002301496e-05, "loss": 0.2674, "step": 342140 }, { "epoch": 98.43210586881473, "grad_norm": 0.5224170088768005, "learning_rate": 3.135788262370541e-05, "loss": 0.2671, "step": 342150 }, { "epoch": 98.4349827387802, "grad_norm": 0.7837913632392883, "learning_rate": 3.130034522439586e-05, "loss": 0.1907, "step": 342160 }, { "epoch": 98.43785960874568, "grad_norm": 0.9026668071746826, "learning_rate": 3.124280782508631e-05, "loss": 0.2581, "step": 342170 }, { "epoch": 98.44073647871116, "grad_norm": 1.4452389478683472, "learning_rate": 3.1185270425776756e-05, "loss": 0.2688, "step": 342180 }, { "epoch": 98.44361334867664, "grad_norm": 1.009721279144287, "learning_rate": 3.1127733026467205e-05, "loss": 0.2595, "step": 342190 }, { "epoch": 98.44649021864211, "grad_norm": 1.4960483312606812, "learning_rate": 3.1070195627157654e-05, "loss": 0.2479, "step": 342200 }, { "epoch": 98.4493670886076, "grad_norm": 1.0942676067352295, "learning_rate": 3.10126582278481e-05, "loss": 0.241, "step": 342210 }, { "epoch": 98.45224395857308, "grad_norm": 1.5067574977874756, "learning_rate": 3.095512082853855e-05, "loss": 0.2241, "step": 342220 }, { "epoch": 98.45512082853855, "grad_norm": 1.2601139545440674, "learning_rate": 3.0897583429229e-05, "loss": 0.2427, "step": 342230 }, { "epoch": 98.45799769850403, "grad_norm": 0.8876769542694092, "learning_rate": 3.084004602991945e-05, "loss": 0.2243, "step": 342240 }, { "epoch": 98.4608745684695, "grad_norm": 0.5345882773399353, "learning_rate": 3.07825086306099e-05, "loss": 0.2466, "step": 342250 }, { "epoch": 98.46375143843498, "grad_norm": 2.2092127799987793, "learning_rate": 3.0724971231300346e-05, "loss": 0.2749, "step": 342260 }, { "epoch": 98.46662830840046, "grad_norm": 0.6393832564353943, "learning_rate": 3.0667433831990795e-05, "loss": 0.2783, "step": 342270 }, { "epoch": 98.46950517836594, "grad_norm": 1.8354389667510986, "learning_rate": 3.0609896432681244e-05, "loss": 0.2655, "step": 342280 }, { "epoch": 98.47238204833141, "grad_norm": 1.458545446395874, "learning_rate": 3.055235903337169e-05, "loss": 0.2141, "step": 342290 }, { "epoch": 98.47525891829689, "grad_norm": 0.7404317259788513, "learning_rate": 3.049482163406214e-05, "loss": 0.1888, "step": 342300 }, { "epoch": 98.47813578826236, "grad_norm": 1.5353305339813232, "learning_rate": 3.0437284234752593e-05, "loss": 0.274, "step": 342310 }, { "epoch": 98.48101265822785, "grad_norm": 1.62283456325531, "learning_rate": 3.037974683544304e-05, "loss": 0.2059, "step": 342320 }, { "epoch": 98.48388952819333, "grad_norm": 1.2463704347610474, "learning_rate": 3.0322209436133487e-05, "loss": 0.2622, "step": 342330 }, { "epoch": 98.4867663981588, "grad_norm": 0.9776034951210022, "learning_rate": 3.0264672036823936e-05, "loss": 0.2217, "step": 342340 }, { "epoch": 98.48964326812428, "grad_norm": 0.9615660905838013, "learning_rate": 3.0207134637514385e-05, "loss": 0.2837, "step": 342350 }, { "epoch": 98.49252013808976, "grad_norm": 0.8343402743339539, "learning_rate": 3.0149597238204837e-05, "loss": 0.2883, "step": 342360 }, { "epoch": 98.49539700805524, "grad_norm": 0.9279578328132629, "learning_rate": 3.0092059838895282e-05, "loss": 0.2694, "step": 342370 }, { "epoch": 98.49827387802071, "grad_norm": 0.8789221048355103, "learning_rate": 3.003452243958573e-05, "loss": 0.2091, "step": 342380 }, { "epoch": 98.50115074798619, "grad_norm": 1.3514833450317383, "learning_rate": 2.997698504027618e-05, "loss": 0.2604, "step": 342390 }, { "epoch": 98.50402761795166, "grad_norm": 0.8055588603019714, "learning_rate": 2.9919447640966628e-05, "loss": 0.2195, "step": 342400 }, { "epoch": 98.50690448791714, "grad_norm": 0.9898144602775574, "learning_rate": 2.986191024165708e-05, "loss": 0.2697, "step": 342410 }, { "epoch": 98.50978135788263, "grad_norm": 1.344184398651123, "learning_rate": 2.9804372842347526e-05, "loss": 0.2175, "step": 342420 }, { "epoch": 98.5126582278481, "grad_norm": 1.0365386009216309, "learning_rate": 2.9746835443037974e-05, "loss": 0.2504, "step": 342430 }, { "epoch": 98.51553509781358, "grad_norm": 1.0646048784255981, "learning_rate": 2.9689298043728423e-05, "loss": 0.2635, "step": 342440 }, { "epoch": 98.51841196777906, "grad_norm": 1.0371299982070923, "learning_rate": 2.9631760644418872e-05, "loss": 0.2542, "step": 342450 }, { "epoch": 98.52128883774454, "grad_norm": 1.1187551021575928, "learning_rate": 2.9574223245109324e-05, "loss": 0.2386, "step": 342460 }, { "epoch": 98.52416570771001, "grad_norm": 1.291356086730957, "learning_rate": 2.9516685845799773e-05, "loss": 0.2865, "step": 342470 }, { "epoch": 98.52704257767549, "grad_norm": 1.3918118476867676, "learning_rate": 2.9459148446490218e-05, "loss": 0.2827, "step": 342480 }, { "epoch": 98.52991944764096, "grad_norm": 0.7447096705436707, "learning_rate": 2.9401611047180667e-05, "loss": 0.3064, "step": 342490 }, { "epoch": 98.53279631760644, "grad_norm": 0.98839271068573, "learning_rate": 2.9344073647871116e-05, "loss": 0.303, "step": 342500 }, { "epoch": 98.53567318757192, "grad_norm": 1.3388209342956543, "learning_rate": 2.9286536248561568e-05, "loss": 0.2488, "step": 342510 }, { "epoch": 98.53855005753739, "grad_norm": 0.6363640427589417, "learning_rate": 2.9228998849252016e-05, "loss": 0.267, "step": 342520 }, { "epoch": 98.54142692750288, "grad_norm": 1.8904272317886353, "learning_rate": 2.9171461449942462e-05, "loss": 0.2677, "step": 342530 }, { "epoch": 98.54430379746836, "grad_norm": 0.7525038123130798, "learning_rate": 2.911392405063291e-05, "loss": 0.2675, "step": 342540 }, { "epoch": 98.54718066743384, "grad_norm": 0.9496176838874817, "learning_rate": 2.905638665132336e-05, "loss": 0.2568, "step": 342550 }, { "epoch": 98.55005753739931, "grad_norm": 0.8495301008224487, "learning_rate": 2.899884925201381e-05, "loss": 0.2486, "step": 342560 }, { "epoch": 98.55293440736479, "grad_norm": 0.9508417844772339, "learning_rate": 2.894131185270426e-05, "loss": 0.1927, "step": 342570 }, { "epoch": 98.55581127733026, "grad_norm": 0.8796753883361816, "learning_rate": 2.888377445339471e-05, "loss": 0.2198, "step": 342580 }, { "epoch": 98.55868814729574, "grad_norm": 1.2114794254302979, "learning_rate": 2.8826237054085154e-05, "loss": 0.3019, "step": 342590 }, { "epoch": 98.56156501726122, "grad_norm": 0.603139340877533, "learning_rate": 2.8768699654775603e-05, "loss": 0.3071, "step": 342600 }, { "epoch": 98.56444188722669, "grad_norm": 0.9128803610801697, "learning_rate": 2.8711162255466055e-05, "loss": 0.2518, "step": 342610 }, { "epoch": 98.56731875719217, "grad_norm": 1.7357869148254395, "learning_rate": 2.8653624856156504e-05, "loss": 0.2391, "step": 342620 }, { "epoch": 98.57019562715766, "grad_norm": 1.0037376880645752, "learning_rate": 2.8596087456846952e-05, "loss": 0.2264, "step": 342630 }, { "epoch": 98.57307249712314, "grad_norm": 0.870611846446991, "learning_rate": 2.8538550057537398e-05, "loss": 0.199, "step": 342640 }, { "epoch": 98.57594936708861, "grad_norm": 1.183604121208191, "learning_rate": 2.8481012658227846e-05, "loss": 0.2763, "step": 342650 }, { "epoch": 98.57882623705409, "grad_norm": 1.0981578826904297, "learning_rate": 2.84234752589183e-05, "loss": 0.246, "step": 342660 }, { "epoch": 98.58170310701956, "grad_norm": 1.4479786157608032, "learning_rate": 2.8365937859608747e-05, "loss": 0.218, "step": 342670 }, { "epoch": 98.58457997698504, "grad_norm": 1.1924775838851929, "learning_rate": 2.8308400460299196e-05, "loss": 0.2815, "step": 342680 }, { "epoch": 98.58745684695052, "grad_norm": 0.7946639657020569, "learning_rate": 2.8250863060989645e-05, "loss": 0.2158, "step": 342690 }, { "epoch": 98.59033371691599, "grad_norm": 0.9586167931556702, "learning_rate": 2.819332566168009e-05, "loss": 0.2881, "step": 342700 }, { "epoch": 98.59321058688147, "grad_norm": 1.491910457611084, "learning_rate": 2.8135788262370542e-05, "loss": 0.2522, "step": 342710 }, { "epoch": 98.59608745684694, "grad_norm": 0.9158319234848022, "learning_rate": 2.807825086306099e-05, "loss": 0.22, "step": 342720 }, { "epoch": 98.59896432681242, "grad_norm": 0.7823840975761414, "learning_rate": 2.802071346375144e-05, "loss": 0.2565, "step": 342730 }, { "epoch": 98.60184119677791, "grad_norm": 2.1384100914001465, "learning_rate": 2.796317606444189e-05, "loss": 0.2604, "step": 342740 }, { "epoch": 98.60471806674339, "grad_norm": 1.4600355625152588, "learning_rate": 2.7905638665132334e-05, "loss": 0.2788, "step": 342750 }, { "epoch": 98.60759493670886, "grad_norm": 2.0282487869262695, "learning_rate": 2.7848101265822786e-05, "loss": 0.2899, "step": 342760 }, { "epoch": 98.61047180667434, "grad_norm": 0.6911695599555969, "learning_rate": 2.7790563866513235e-05, "loss": 0.2348, "step": 342770 }, { "epoch": 98.61334867663982, "grad_norm": 1.6899447441101074, "learning_rate": 2.7733026467203683e-05, "loss": 0.2558, "step": 342780 }, { "epoch": 98.61622554660529, "grad_norm": 1.6068017482757568, "learning_rate": 2.7675489067894132e-05, "loss": 0.2678, "step": 342790 }, { "epoch": 98.61910241657077, "grad_norm": 0.592288613319397, "learning_rate": 2.7617951668584577e-05, "loss": 0.2719, "step": 342800 }, { "epoch": 98.62197928653625, "grad_norm": 1.2833497524261475, "learning_rate": 2.756041426927503e-05, "loss": 0.2861, "step": 342810 }, { "epoch": 98.62485615650172, "grad_norm": 1.4195133447647095, "learning_rate": 2.7502876869965478e-05, "loss": 0.2085, "step": 342820 }, { "epoch": 98.6277330264672, "grad_norm": 1.0342416763305664, "learning_rate": 2.7445339470655927e-05, "loss": 0.296, "step": 342830 }, { "epoch": 98.63060989643269, "grad_norm": 0.9882327318191528, "learning_rate": 2.7387802071346376e-05, "loss": 0.1928, "step": 342840 }, { "epoch": 98.63348676639816, "grad_norm": 1.1564760208129883, "learning_rate": 2.7330264672036824e-05, "loss": 0.2573, "step": 342850 }, { "epoch": 98.63636363636364, "grad_norm": 1.6315010786056519, "learning_rate": 2.7272727272727273e-05, "loss": 0.267, "step": 342860 }, { "epoch": 98.63924050632912, "grad_norm": 1.1169896125793457, "learning_rate": 2.7215189873417722e-05, "loss": 0.1928, "step": 342870 }, { "epoch": 98.64211737629459, "grad_norm": 1.6136332750320435, "learning_rate": 2.715765247410817e-05, "loss": 0.29, "step": 342880 }, { "epoch": 98.64499424626007, "grad_norm": 1.6619726419448853, "learning_rate": 2.710011507479862e-05, "loss": 0.2285, "step": 342890 }, { "epoch": 98.64787111622555, "grad_norm": 1.1846096515655518, "learning_rate": 2.7042577675489068e-05, "loss": 0.277, "step": 342900 }, { "epoch": 98.65074798619102, "grad_norm": 0.6316179633140564, "learning_rate": 2.6985040276179517e-05, "loss": 0.2374, "step": 342910 }, { "epoch": 98.6536248561565, "grad_norm": 1.6760414838790894, "learning_rate": 2.6927502876869965e-05, "loss": 0.2439, "step": 342920 }, { "epoch": 98.65650172612197, "grad_norm": 0.8335948586463928, "learning_rate": 2.6869965477560414e-05, "loss": 0.2535, "step": 342930 }, { "epoch": 98.65937859608745, "grad_norm": 1.4601922035217285, "learning_rate": 2.6812428078250863e-05, "loss": 0.2605, "step": 342940 }, { "epoch": 98.66225546605294, "grad_norm": 0.8969718813896179, "learning_rate": 2.6754890678941315e-05, "loss": 0.2267, "step": 342950 }, { "epoch": 98.66513233601842, "grad_norm": 1.4008954763412476, "learning_rate": 2.6697353279631764e-05, "loss": 0.2842, "step": 342960 }, { "epoch": 98.66800920598389, "grad_norm": 1.80355703830719, "learning_rate": 2.663981588032221e-05, "loss": 0.277, "step": 342970 }, { "epoch": 98.67088607594937, "grad_norm": 0.9228350520133972, "learning_rate": 2.6582278481012658e-05, "loss": 0.2243, "step": 342980 }, { "epoch": 98.67376294591485, "grad_norm": 0.7951944470405579, "learning_rate": 2.6524741081703107e-05, "loss": 0.2433, "step": 342990 }, { "epoch": 98.67663981588032, "grad_norm": 1.076659917831421, "learning_rate": 2.646720368239356e-05, "loss": 0.2631, "step": 343000 }, { "epoch": 98.6795166858458, "grad_norm": 0.7951699495315552, "learning_rate": 2.6409666283084007e-05, "loss": 0.2794, "step": 343010 }, { "epoch": 98.68239355581127, "grad_norm": 1.787745714187622, "learning_rate": 2.6352128883774453e-05, "loss": 0.2526, "step": 343020 }, { "epoch": 98.68527042577675, "grad_norm": 1.0327399969100952, "learning_rate": 2.62945914844649e-05, "loss": 0.232, "step": 343030 }, { "epoch": 98.68814729574223, "grad_norm": 1.1060740947723389, "learning_rate": 2.623705408515535e-05, "loss": 0.2585, "step": 343040 }, { "epoch": 98.69102416570772, "grad_norm": 1.1262603998184204, "learning_rate": 2.6179516685845802e-05, "loss": 0.2362, "step": 343050 }, { "epoch": 98.69390103567319, "grad_norm": 0.9867911338806152, "learning_rate": 2.612197928653625e-05, "loss": 0.2279, "step": 343060 }, { "epoch": 98.69677790563867, "grad_norm": 0.8458698391914368, "learning_rate": 2.60644418872267e-05, "loss": 0.217, "step": 343070 }, { "epoch": 98.69965477560415, "grad_norm": 0.5974214673042297, "learning_rate": 2.6006904487917145e-05, "loss": 0.2398, "step": 343080 }, { "epoch": 98.70253164556962, "grad_norm": 1.354423999786377, "learning_rate": 2.5949367088607594e-05, "loss": 0.2276, "step": 343090 }, { "epoch": 98.7054085155351, "grad_norm": 0.9247008562088013, "learning_rate": 2.5891829689298046e-05, "loss": 0.3182, "step": 343100 }, { "epoch": 98.70828538550057, "grad_norm": 1.0935131311416626, "learning_rate": 2.5834292289988495e-05, "loss": 0.296, "step": 343110 }, { "epoch": 98.71116225546605, "grad_norm": 1.2914032936096191, "learning_rate": 2.5776754890678943e-05, "loss": 0.2672, "step": 343120 }, { "epoch": 98.71403912543153, "grad_norm": 1.31632399559021, "learning_rate": 2.571921749136939e-05, "loss": 0.2692, "step": 343130 }, { "epoch": 98.716915995397, "grad_norm": 0.9244070649147034, "learning_rate": 2.5661680092059837e-05, "loss": 0.2208, "step": 343140 }, { "epoch": 98.71979286536248, "grad_norm": 2.700836658477783, "learning_rate": 2.560414269275029e-05, "loss": 0.3265, "step": 343150 }, { "epoch": 98.72266973532797, "grad_norm": 1.3611313104629517, "learning_rate": 2.5546605293440738e-05, "loss": 0.2428, "step": 343160 }, { "epoch": 98.72554660529345, "grad_norm": 1.587119221687317, "learning_rate": 2.5489067894131187e-05, "loss": 0.2566, "step": 343170 }, { "epoch": 98.72842347525892, "grad_norm": 0.9786141514778137, "learning_rate": 2.5431530494821632e-05, "loss": 0.2118, "step": 343180 }, { "epoch": 98.7313003452244, "grad_norm": 2.9164888858795166, "learning_rate": 2.537399309551208e-05, "loss": 0.1988, "step": 343190 }, { "epoch": 98.73417721518987, "grad_norm": 1.497680902481079, "learning_rate": 2.5316455696202533e-05, "loss": 0.3155, "step": 343200 }, { "epoch": 98.73705408515535, "grad_norm": 1.0598214864730835, "learning_rate": 2.5258918296892982e-05, "loss": 0.3104, "step": 343210 }, { "epoch": 98.73993095512083, "grad_norm": 1.3878698348999023, "learning_rate": 2.520138089758343e-05, "loss": 0.2661, "step": 343220 }, { "epoch": 98.7428078250863, "grad_norm": 1.1242820024490356, "learning_rate": 2.514384349827388e-05, "loss": 0.2653, "step": 343230 }, { "epoch": 98.74568469505178, "grad_norm": 0.966616153717041, "learning_rate": 2.5086306098964325e-05, "loss": 0.2725, "step": 343240 }, { "epoch": 98.74856156501725, "grad_norm": 1.485458493232727, "learning_rate": 2.5028768699654777e-05, "loss": 0.2113, "step": 343250 }, { "epoch": 98.75143843498275, "grad_norm": 2.134500026702881, "learning_rate": 2.4971231300345226e-05, "loss": 0.2285, "step": 343260 }, { "epoch": 98.75431530494822, "grad_norm": 0.6878830790519714, "learning_rate": 2.4913693901035674e-05, "loss": 0.2664, "step": 343270 }, { "epoch": 98.7571921749137, "grad_norm": 2.0887482166290283, "learning_rate": 2.4856156501726123e-05, "loss": 0.3527, "step": 343280 }, { "epoch": 98.76006904487917, "grad_norm": 1.058166742324829, "learning_rate": 2.479861910241657e-05, "loss": 0.2355, "step": 343290 }, { "epoch": 98.76294591484465, "grad_norm": 0.641601026058197, "learning_rate": 2.474108170310702e-05, "loss": 0.2323, "step": 343300 }, { "epoch": 98.76582278481013, "grad_norm": 1.1239017248153687, "learning_rate": 2.468354430379747e-05, "loss": 0.2545, "step": 343310 }, { "epoch": 98.7686996547756, "grad_norm": 1.166350245475769, "learning_rate": 2.4626006904487918e-05, "loss": 0.2316, "step": 343320 }, { "epoch": 98.77157652474108, "grad_norm": 0.9510771632194519, "learning_rate": 2.4568469505178367e-05, "loss": 0.338, "step": 343330 }, { "epoch": 98.77445339470655, "grad_norm": 1.1917105913162231, "learning_rate": 2.4510932105868815e-05, "loss": 0.2358, "step": 343340 }, { "epoch": 98.77733026467203, "grad_norm": 0.803220272064209, "learning_rate": 2.4453394706559264e-05, "loss": 0.2426, "step": 343350 }, { "epoch": 98.78020713463752, "grad_norm": 1.0066885948181152, "learning_rate": 2.4395857307249713e-05, "loss": 0.2058, "step": 343360 }, { "epoch": 98.783084004603, "grad_norm": 1.1684494018554688, "learning_rate": 2.433831990794016e-05, "loss": 0.2381, "step": 343370 }, { "epoch": 98.78596087456847, "grad_norm": 0.9524599313735962, "learning_rate": 2.428078250863061e-05, "loss": 0.2536, "step": 343380 }, { "epoch": 98.78883774453395, "grad_norm": 0.81953364610672, "learning_rate": 2.422324510932106e-05, "loss": 0.2773, "step": 343390 }, { "epoch": 98.79171461449943, "grad_norm": 0.9360904097557068, "learning_rate": 2.4165707710011508e-05, "loss": 0.2559, "step": 343400 }, { "epoch": 98.7945914844649, "grad_norm": 0.7253095507621765, "learning_rate": 2.4108170310701956e-05, "loss": 0.2797, "step": 343410 }, { "epoch": 98.79746835443038, "grad_norm": 1.204071044921875, "learning_rate": 2.4050632911392405e-05, "loss": 0.249, "step": 343420 }, { "epoch": 98.80034522439585, "grad_norm": 1.1431752443313599, "learning_rate": 2.3993095512082854e-05, "loss": 0.2457, "step": 343430 }, { "epoch": 98.80322209436133, "grad_norm": 1.229275107383728, "learning_rate": 2.3935558112773303e-05, "loss": 0.275, "step": 343440 }, { "epoch": 98.80609896432681, "grad_norm": 1.3052724599838257, "learning_rate": 2.3878020713463755e-05, "loss": 0.2122, "step": 343450 }, { "epoch": 98.80897583429228, "grad_norm": 0.9695470333099365, "learning_rate": 2.38204833141542e-05, "loss": 0.2758, "step": 343460 }, { "epoch": 98.81185270425777, "grad_norm": 1.0347585678100586, "learning_rate": 2.376294591484465e-05, "loss": 0.219, "step": 343470 }, { "epoch": 98.81472957422325, "grad_norm": 1.5093330144882202, "learning_rate": 2.3705408515535098e-05, "loss": 0.2746, "step": 343480 }, { "epoch": 98.81760644418873, "grad_norm": 1.5787320137023926, "learning_rate": 2.3647871116225546e-05, "loss": 0.2723, "step": 343490 }, { "epoch": 98.8204833141542, "grad_norm": 1.4050263166427612, "learning_rate": 2.3590333716916e-05, "loss": 0.2714, "step": 343500 }, { "epoch": 98.82336018411968, "grad_norm": 1.9493076801300049, "learning_rate": 2.3532796317606444e-05, "loss": 0.2962, "step": 343510 }, { "epoch": 98.82623705408515, "grad_norm": 0.7417156100273132, "learning_rate": 2.3475258918296892e-05, "loss": 0.278, "step": 343520 }, { "epoch": 98.82911392405063, "grad_norm": 1.9839431047439575, "learning_rate": 2.341772151898734e-05, "loss": 0.2709, "step": 343530 }, { "epoch": 98.83199079401611, "grad_norm": 2.0468902587890625, "learning_rate": 2.336018411967779e-05, "loss": 0.2784, "step": 343540 }, { "epoch": 98.83486766398158, "grad_norm": 1.3792564868927002, "learning_rate": 2.3302646720368242e-05, "loss": 0.2775, "step": 343550 }, { "epoch": 98.83774453394706, "grad_norm": 0.9731492400169373, "learning_rate": 2.3245109321058687e-05, "loss": 0.2231, "step": 343560 }, { "epoch": 98.84062140391255, "grad_norm": 1.4341309070587158, "learning_rate": 2.3187571921749136e-05, "loss": 0.2346, "step": 343570 }, { "epoch": 98.84349827387803, "grad_norm": 1.0407426357269287, "learning_rate": 2.3130034522439585e-05, "loss": 0.3591, "step": 343580 }, { "epoch": 98.8463751438435, "grad_norm": 1.2497974634170532, "learning_rate": 2.3072497123130037e-05, "loss": 0.3021, "step": 343590 }, { "epoch": 98.84925201380898, "grad_norm": 1.1151200532913208, "learning_rate": 2.3014959723820486e-05, "loss": 0.1804, "step": 343600 }, { "epoch": 98.85212888377445, "grad_norm": 1.1882438659667969, "learning_rate": 2.2957422324510934e-05, "loss": 0.2582, "step": 343610 }, { "epoch": 98.85500575373993, "grad_norm": 1.5561848878860474, "learning_rate": 2.289988492520138e-05, "loss": 0.2624, "step": 343620 }, { "epoch": 98.85788262370541, "grad_norm": 1.1978256702423096, "learning_rate": 2.284234752589183e-05, "loss": 0.2895, "step": 343630 }, { "epoch": 98.86075949367088, "grad_norm": 0.8122279644012451, "learning_rate": 2.278481012658228e-05, "loss": 0.2383, "step": 343640 }, { "epoch": 98.86363636363636, "grad_norm": 1.0493863821029663, "learning_rate": 2.272727272727273e-05, "loss": 0.2397, "step": 343650 }, { "epoch": 98.86651323360184, "grad_norm": 0.9272457957267761, "learning_rate": 2.2669735327963178e-05, "loss": 0.2523, "step": 343660 }, { "epoch": 98.86939010356731, "grad_norm": 0.5099489092826843, "learning_rate": 2.2612197928653623e-05, "loss": 0.2338, "step": 343670 }, { "epoch": 98.8722669735328, "grad_norm": 1.0795096158981323, "learning_rate": 2.2554660529344072e-05, "loss": 0.2389, "step": 343680 }, { "epoch": 98.87514384349828, "grad_norm": 1.7449101209640503, "learning_rate": 2.2497123130034524e-05, "loss": 0.2405, "step": 343690 }, { "epoch": 98.87802071346375, "grad_norm": 0.9664406776428223, "learning_rate": 2.2439585730724973e-05, "loss": 0.3443, "step": 343700 }, { "epoch": 98.88089758342923, "grad_norm": 1.2236979007720947, "learning_rate": 2.238204833141542e-05, "loss": 0.2741, "step": 343710 }, { "epoch": 98.88377445339471, "grad_norm": 1.1902129650115967, "learning_rate": 2.232451093210587e-05, "loss": 0.2679, "step": 343720 }, { "epoch": 98.88665132336018, "grad_norm": 1.3590673208236694, "learning_rate": 2.2266973532796316e-05, "loss": 0.2776, "step": 343730 }, { "epoch": 98.88952819332566, "grad_norm": 1.2644567489624023, "learning_rate": 2.2209436133486768e-05, "loss": 0.3931, "step": 343740 }, { "epoch": 98.89240506329114, "grad_norm": 1.6599562168121338, "learning_rate": 2.2151898734177217e-05, "loss": 0.218, "step": 343750 }, { "epoch": 98.89528193325661, "grad_norm": 1.0904712677001953, "learning_rate": 2.2094361334867665e-05, "loss": 0.2795, "step": 343760 }, { "epoch": 98.89815880322209, "grad_norm": 1.0392988920211792, "learning_rate": 2.2036823935558114e-05, "loss": 0.3128, "step": 343770 }, { "epoch": 98.90103567318758, "grad_norm": 1.6677321195602417, "learning_rate": 2.197928653624856e-05, "loss": 0.2483, "step": 343780 }, { "epoch": 98.90391254315306, "grad_norm": 0.61446213722229, "learning_rate": 2.192174913693901e-05, "loss": 0.1805, "step": 343790 }, { "epoch": 98.90678941311853, "grad_norm": 1.213510274887085, "learning_rate": 2.186421173762946e-05, "loss": 0.2604, "step": 343800 }, { "epoch": 98.90966628308401, "grad_norm": 1.7910077571868896, "learning_rate": 2.180667433831991e-05, "loss": 0.2329, "step": 343810 }, { "epoch": 98.91254315304948, "grad_norm": 1.189990758895874, "learning_rate": 2.1749136939010358e-05, "loss": 0.2728, "step": 343820 }, { "epoch": 98.91542002301496, "grad_norm": 0.5391385555267334, "learning_rate": 2.1691599539700803e-05, "loss": 0.2667, "step": 343830 }, { "epoch": 98.91829689298044, "grad_norm": 1.5240967273712158, "learning_rate": 2.1634062140391255e-05, "loss": 0.2787, "step": 343840 }, { "epoch": 98.92117376294591, "grad_norm": 1.0900849103927612, "learning_rate": 2.1576524741081704e-05, "loss": 0.222, "step": 343850 }, { "epoch": 98.92405063291139, "grad_norm": 1.235272765159607, "learning_rate": 2.1518987341772153e-05, "loss": 0.2619, "step": 343860 }, { "epoch": 98.92692750287686, "grad_norm": 0.6385499238967896, "learning_rate": 2.14614499424626e-05, "loss": 0.235, "step": 343870 }, { "epoch": 98.92980437284234, "grad_norm": 0.80967777967453, "learning_rate": 2.140391254315305e-05, "loss": 0.2332, "step": 343880 }, { "epoch": 98.93268124280783, "grad_norm": 0.5759852528572083, "learning_rate": 2.13463751438435e-05, "loss": 0.1948, "step": 343890 }, { "epoch": 98.93555811277331, "grad_norm": 1.6488184928894043, "learning_rate": 2.1288837744533947e-05, "loss": 0.2108, "step": 343900 }, { "epoch": 98.93843498273878, "grad_norm": 0.5215789675712585, "learning_rate": 2.1231300345224396e-05, "loss": 0.2197, "step": 343910 }, { "epoch": 98.94131185270426, "grad_norm": 0.9788842797279358, "learning_rate": 2.1173762945914845e-05, "loss": 0.2847, "step": 343920 }, { "epoch": 98.94418872266974, "grad_norm": 1.059557557106018, "learning_rate": 2.1116225546605294e-05, "loss": 0.2003, "step": 343930 }, { "epoch": 98.94706559263521, "grad_norm": 1.181694746017456, "learning_rate": 2.1058688147295742e-05, "loss": 0.2468, "step": 343940 }, { "epoch": 98.94994246260069, "grad_norm": 2.375155210494995, "learning_rate": 2.100115074798619e-05, "loss": 0.3126, "step": 343950 }, { "epoch": 98.95281933256616, "grad_norm": 1.1215285062789917, "learning_rate": 2.094361334867664e-05, "loss": 0.2797, "step": 343960 }, { "epoch": 98.95569620253164, "grad_norm": 1.2676794528961182, "learning_rate": 2.088607594936709e-05, "loss": 0.248, "step": 343970 }, { "epoch": 98.95857307249712, "grad_norm": 0.8204348087310791, "learning_rate": 2.0828538550057537e-05, "loss": 0.2839, "step": 343980 }, { "epoch": 98.96144994246261, "grad_norm": 1.108241319656372, "learning_rate": 2.077100115074799e-05, "loss": 0.2366, "step": 343990 }, { "epoch": 98.96432681242808, "grad_norm": 0.8239741921424866, "learning_rate": 2.0713463751438435e-05, "loss": 0.2569, "step": 344000 }, { "epoch": 98.96720368239356, "grad_norm": 1.16914963722229, "learning_rate": 2.0655926352128883e-05, "loss": 0.2636, "step": 344010 }, { "epoch": 98.97008055235904, "grad_norm": 0.7646788954734802, "learning_rate": 2.0598388952819332e-05, "loss": 0.2716, "step": 344020 }, { "epoch": 98.97295742232451, "grad_norm": 1.6084575653076172, "learning_rate": 2.054085155350978e-05, "loss": 0.2831, "step": 344030 }, { "epoch": 98.97583429228999, "grad_norm": 1.2276124954223633, "learning_rate": 2.0483314154200233e-05, "loss": 0.2902, "step": 344040 }, { "epoch": 98.97871116225546, "grad_norm": 1.6130828857421875, "learning_rate": 2.042577675489068e-05, "loss": 0.2268, "step": 344050 }, { "epoch": 98.98158803222094, "grad_norm": 1.1285302639007568, "learning_rate": 2.0368239355581127e-05, "loss": 0.2609, "step": 344060 }, { "epoch": 98.98446490218642, "grad_norm": 0.8224568963050842, "learning_rate": 2.0310701956271576e-05, "loss": 0.2109, "step": 344070 }, { "epoch": 98.9873417721519, "grad_norm": 1.089530348777771, "learning_rate": 2.0253164556962025e-05, "loss": 0.2094, "step": 344080 }, { "epoch": 98.99021864211737, "grad_norm": 0.7444837689399719, "learning_rate": 2.0195627157652477e-05, "loss": 0.2115, "step": 344090 }, { "epoch": 98.99309551208286, "grad_norm": 1.6216366291046143, "learning_rate": 2.0138089758342925e-05, "loss": 0.2385, "step": 344100 }, { "epoch": 98.99597238204834, "grad_norm": 1.3557542562484741, "learning_rate": 2.008055235903337e-05, "loss": 0.2819, "step": 344110 }, { "epoch": 98.99884925201381, "grad_norm": 1.4055962562561035, "learning_rate": 2.002301495972382e-05, "loss": 0.3008, "step": 344120 }, { "epoch": 99.00172612197929, "grad_norm": 0.955611526966095, "learning_rate": 1.9965477560414268e-05, "loss": 0.2162, "step": 344130 }, { "epoch": 99.00460299194476, "grad_norm": 1.652909278869629, "learning_rate": 1.990794016110472e-05, "loss": 0.3245, "step": 344140 }, { "epoch": 99.00747986191024, "grad_norm": 1.1859517097473145, "learning_rate": 1.985040276179517e-05, "loss": 0.2011, "step": 344150 }, { "epoch": 99.01035673187572, "grad_norm": 0.9223184585571289, "learning_rate": 1.9792865362485614e-05, "loss": 0.1881, "step": 344160 }, { "epoch": 99.0132336018412, "grad_norm": 1.485580325126648, "learning_rate": 1.9735327963176063e-05, "loss": 0.2165, "step": 344170 }, { "epoch": 99.01611047180667, "grad_norm": 1.2710916996002197, "learning_rate": 1.9677790563866512e-05, "loss": 0.2179, "step": 344180 }, { "epoch": 99.01898734177215, "grad_norm": 1.6377376317977905, "learning_rate": 1.9620253164556964e-05, "loss": 0.2335, "step": 344190 }, { "epoch": 99.02186421173764, "grad_norm": 1.2576913833618164, "learning_rate": 1.9562715765247413e-05, "loss": 0.252, "step": 344200 }, { "epoch": 99.02474108170311, "grad_norm": 0.9038400650024414, "learning_rate": 1.9505178365937858e-05, "loss": 0.3581, "step": 344210 }, { "epoch": 99.02761795166859, "grad_norm": 1.595720887184143, "learning_rate": 1.9447640966628307e-05, "loss": 0.246, "step": 344220 }, { "epoch": 99.03049482163406, "grad_norm": 1.366201639175415, "learning_rate": 1.939010356731876e-05, "loss": 0.2905, "step": 344230 }, { "epoch": 99.03337169159954, "grad_norm": 0.6922603249549866, "learning_rate": 1.9332566168009208e-05, "loss": 0.2713, "step": 344240 }, { "epoch": 99.03624856156502, "grad_norm": 0.6558164954185486, "learning_rate": 1.9275028768699656e-05, "loss": 0.2759, "step": 344250 }, { "epoch": 99.0391254315305, "grad_norm": 1.2361608743667603, "learning_rate": 1.9217491369390105e-05, "loss": 0.2392, "step": 344260 }, { "epoch": 99.04200230149597, "grad_norm": 1.4304522275924683, "learning_rate": 1.915995397008055e-05, "loss": 0.3111, "step": 344270 }, { "epoch": 99.04487917146145, "grad_norm": 0.8720359802246094, "learning_rate": 1.9102416570771002e-05, "loss": 0.2097, "step": 344280 }, { "epoch": 99.04775604142692, "grad_norm": 1.5018694400787354, "learning_rate": 1.904487917146145e-05, "loss": 0.3655, "step": 344290 }, { "epoch": 99.0506329113924, "grad_norm": 1.7062935829162598, "learning_rate": 1.89873417721519e-05, "loss": 0.2528, "step": 344300 }, { "epoch": 99.05350978135789, "grad_norm": 2.042052745819092, "learning_rate": 1.892980437284235e-05, "loss": 0.2895, "step": 344310 }, { "epoch": 99.05638665132336, "grad_norm": 1.5183695554733276, "learning_rate": 1.8872266973532794e-05, "loss": 0.2225, "step": 344320 }, { "epoch": 99.05926352128884, "grad_norm": 1.9399604797363281, "learning_rate": 1.8814729574223246e-05, "loss": 0.2662, "step": 344330 }, { "epoch": 99.06214039125432, "grad_norm": 0.7196126580238342, "learning_rate": 1.8757192174913695e-05, "loss": 0.2155, "step": 344340 }, { "epoch": 99.0650172612198, "grad_norm": 0.9059213399887085, "learning_rate": 1.8699654775604144e-05, "loss": 0.1923, "step": 344350 }, { "epoch": 99.06789413118527, "grad_norm": 0.684747040271759, "learning_rate": 1.8642117376294592e-05, "loss": 0.2898, "step": 344360 }, { "epoch": 99.07077100115075, "grad_norm": 1.1214933395385742, "learning_rate": 1.858457997698504e-05, "loss": 0.2485, "step": 344370 }, { "epoch": 99.07364787111622, "grad_norm": 0.9819902181625366, "learning_rate": 1.852704257767549e-05, "loss": 0.2644, "step": 344380 }, { "epoch": 99.0765247410817, "grad_norm": 0.8149042725563049, "learning_rate": 1.846950517836594e-05, "loss": 0.2447, "step": 344390 }, { "epoch": 99.07940161104717, "grad_norm": 0.7604230642318726, "learning_rate": 1.8411967779056387e-05, "loss": 0.2266, "step": 344400 }, { "epoch": 99.08227848101266, "grad_norm": 1.6331286430358887, "learning_rate": 1.8354430379746836e-05, "loss": 0.2286, "step": 344410 }, { "epoch": 99.08515535097814, "grad_norm": 1.1479644775390625, "learning_rate": 1.8296892980437285e-05, "loss": 0.2464, "step": 344420 }, { "epoch": 99.08803222094362, "grad_norm": 0.9740152955055237, "learning_rate": 1.8239355581127733e-05, "loss": 0.2967, "step": 344430 }, { "epoch": 99.0909090909091, "grad_norm": 0.9618626236915588, "learning_rate": 1.8181818181818182e-05, "loss": 0.2552, "step": 344440 }, { "epoch": 99.09378596087457, "grad_norm": 0.8117769360542297, "learning_rate": 1.812428078250863e-05, "loss": 0.2383, "step": 344450 }, { "epoch": 99.09666283084005, "grad_norm": 0.811028242111206, "learning_rate": 1.806674338319908e-05, "loss": 0.2945, "step": 344460 }, { "epoch": 99.09953970080552, "grad_norm": 0.7708529233932495, "learning_rate": 1.8009205983889528e-05, "loss": 0.2432, "step": 344470 }, { "epoch": 99.102416570771, "grad_norm": 0.7083722352981567, "learning_rate": 1.795166858457998e-05, "loss": 0.3094, "step": 344480 }, { "epoch": 99.10529344073647, "grad_norm": 1.4598809480667114, "learning_rate": 1.7894131185270426e-05, "loss": 0.2227, "step": 344490 }, { "epoch": 99.10817031070195, "grad_norm": 1.197048544883728, "learning_rate": 1.7836593785960874e-05, "loss": 0.2505, "step": 344500 }, { "epoch": 99.11104718066743, "grad_norm": 0.8604323267936707, "learning_rate": 1.7779056386651323e-05, "loss": 0.2539, "step": 344510 }, { "epoch": 99.11392405063292, "grad_norm": 1.981135606765747, "learning_rate": 1.7721518987341772e-05, "loss": 0.243, "step": 344520 }, { "epoch": 99.1168009205984, "grad_norm": 0.7880893349647522, "learning_rate": 1.7663981588032224e-05, "loss": 0.2938, "step": 344530 }, { "epoch": 99.11967779056387, "grad_norm": 1.2814617156982422, "learning_rate": 1.760644418872267e-05, "loss": 0.2299, "step": 344540 }, { "epoch": 99.12255466052935, "grad_norm": 1.3279072046279907, "learning_rate": 1.7548906789413118e-05, "loss": 0.2437, "step": 344550 }, { "epoch": 99.12543153049482, "grad_norm": 1.3249109983444214, "learning_rate": 1.7491369390103567e-05, "loss": 0.2522, "step": 344560 }, { "epoch": 99.1283084004603, "grad_norm": 0.6046386361122131, "learning_rate": 1.7433831990794016e-05, "loss": 0.2615, "step": 344570 }, { "epoch": 99.13118527042577, "grad_norm": 2.2948758602142334, "learning_rate": 1.7376294591484468e-05, "loss": 0.2646, "step": 344580 }, { "epoch": 99.13406214039125, "grad_norm": 0.9525010585784912, "learning_rate": 1.7318757192174913e-05, "loss": 0.2358, "step": 344590 }, { "epoch": 99.13693901035673, "grad_norm": 0.9291172623634338, "learning_rate": 1.7261219792865362e-05, "loss": 0.2358, "step": 344600 }, { "epoch": 99.1398158803222, "grad_norm": 1.5825906991958618, "learning_rate": 1.720368239355581e-05, "loss": 0.2222, "step": 344610 }, { "epoch": 99.1426927502877, "grad_norm": 1.4319061040878296, "learning_rate": 1.714614499424626e-05, "loss": 0.2949, "step": 344620 }, { "epoch": 99.14556962025317, "grad_norm": 0.8949119448661804, "learning_rate": 1.708860759493671e-05, "loss": 0.2081, "step": 344630 }, { "epoch": 99.14844649021865, "grad_norm": 1.0123708248138428, "learning_rate": 1.703107019562716e-05, "loss": 0.2069, "step": 344640 }, { "epoch": 99.15132336018412, "grad_norm": 0.7616326212882996, "learning_rate": 1.6973532796317605e-05, "loss": 0.2389, "step": 344650 }, { "epoch": 99.1542002301496, "grad_norm": 1.4084488153457642, "learning_rate": 1.6915995397008054e-05, "loss": 0.2488, "step": 344660 }, { "epoch": 99.15707710011507, "grad_norm": 1.202344298362732, "learning_rate": 1.6858457997698503e-05, "loss": 0.2627, "step": 344670 }, { "epoch": 99.15995397008055, "grad_norm": 1.8671071529388428, "learning_rate": 1.6800920598388955e-05, "loss": 0.2735, "step": 344680 }, { "epoch": 99.16283084004603, "grad_norm": 1.4852324724197388, "learning_rate": 1.6743383199079404e-05, "loss": 0.1913, "step": 344690 }, { "epoch": 99.1657077100115, "grad_norm": 0.8894421458244324, "learning_rate": 1.668584579976985e-05, "loss": 0.2146, "step": 344700 }, { "epoch": 99.16858457997698, "grad_norm": 1.6569143533706665, "learning_rate": 1.6628308400460298e-05, "loss": 0.2418, "step": 344710 }, { "epoch": 99.17146144994246, "grad_norm": 1.8857946395874023, "learning_rate": 1.6570771001150746e-05, "loss": 0.2457, "step": 344720 }, { "epoch": 99.17433831990795, "grad_norm": 1.2984834909439087, "learning_rate": 1.65132336018412e-05, "loss": 0.2598, "step": 344730 }, { "epoch": 99.17721518987342, "grad_norm": 1.5215260982513428, "learning_rate": 1.6455696202531647e-05, "loss": 0.2573, "step": 344740 }, { "epoch": 99.1800920598389, "grad_norm": 1.5090841054916382, "learning_rate": 1.6398158803222096e-05, "loss": 0.3019, "step": 344750 }, { "epoch": 99.18296892980437, "grad_norm": 1.288021206855774, "learning_rate": 1.634062140391254e-05, "loss": 0.2549, "step": 344760 }, { "epoch": 99.18584579976985, "grad_norm": 0.8235071301460266, "learning_rate": 1.628308400460299e-05, "loss": 0.2365, "step": 344770 }, { "epoch": 99.18872266973533, "grad_norm": 1.0716947317123413, "learning_rate": 1.6225546605293442e-05, "loss": 0.2889, "step": 344780 }, { "epoch": 99.1915995397008, "grad_norm": 1.369665503501892, "learning_rate": 1.616800920598389e-05, "loss": 0.2171, "step": 344790 }, { "epoch": 99.19447640966628, "grad_norm": 1.1572294235229492, "learning_rate": 1.611047180667434e-05, "loss": 0.2409, "step": 344800 }, { "epoch": 99.19735327963176, "grad_norm": 1.1096022129058838, "learning_rate": 1.6052934407364785e-05, "loss": 0.2044, "step": 344810 }, { "epoch": 99.20023014959723, "grad_norm": 0.646987795829773, "learning_rate": 1.5995397008055234e-05, "loss": 0.2809, "step": 344820 }, { "epoch": 99.20310701956272, "grad_norm": 0.8282533884048462, "learning_rate": 1.5937859608745686e-05, "loss": 0.2336, "step": 344830 }, { "epoch": 99.2059838895282, "grad_norm": 1.2391328811645508, "learning_rate": 1.5880322209436135e-05, "loss": 0.2209, "step": 344840 }, { "epoch": 99.20886075949367, "grad_norm": 0.9376530051231384, "learning_rate": 1.5822784810126583e-05, "loss": 0.2497, "step": 344850 }, { "epoch": 99.21173762945915, "grad_norm": 1.2122105360031128, "learning_rate": 1.5765247410817032e-05, "loss": 0.2474, "step": 344860 }, { "epoch": 99.21461449942463, "grad_norm": 0.7335684299468994, "learning_rate": 1.570771001150748e-05, "loss": 0.2363, "step": 344870 }, { "epoch": 99.2174913693901, "grad_norm": 0.8264676928520203, "learning_rate": 1.565017261219793e-05, "loss": 0.2558, "step": 344880 }, { "epoch": 99.22036823935558, "grad_norm": 0.8428528308868408, "learning_rate": 1.5592635212888378e-05, "loss": 0.2534, "step": 344890 }, { "epoch": 99.22324510932106, "grad_norm": 0.7977716326713562, "learning_rate": 1.5535097813578827e-05, "loss": 0.2293, "step": 344900 }, { "epoch": 99.22612197928653, "grad_norm": 0.8327205181121826, "learning_rate": 1.5477560414269276e-05, "loss": 0.2795, "step": 344910 }, { "epoch": 99.22899884925201, "grad_norm": 1.1219050884246826, "learning_rate": 1.5420023014959724e-05, "loss": 0.2478, "step": 344920 }, { "epoch": 99.23187571921748, "grad_norm": 1.5394270420074463, "learning_rate": 1.5362485615650173e-05, "loss": 0.221, "step": 344930 }, { "epoch": 99.23475258918297, "grad_norm": 1.426642656326294, "learning_rate": 1.5304948216340622e-05, "loss": 0.2459, "step": 344940 }, { "epoch": 99.23762945914845, "grad_norm": 2.3028793334960938, "learning_rate": 1.524741081703107e-05, "loss": 0.3527, "step": 344950 }, { "epoch": 99.24050632911393, "grad_norm": 1.039123296737671, "learning_rate": 1.518987341772152e-05, "loss": 0.3052, "step": 344960 }, { "epoch": 99.2433831990794, "grad_norm": 1.691206693649292, "learning_rate": 1.5132336018411968e-05, "loss": 0.3584, "step": 344970 }, { "epoch": 99.24626006904488, "grad_norm": 1.208730697631836, "learning_rate": 1.5074798619102418e-05, "loss": 0.3459, "step": 344980 }, { "epoch": 99.24913693901036, "grad_norm": 0.8996316194534302, "learning_rate": 1.5017261219792865e-05, "loss": 0.2385, "step": 344990 }, { "epoch": 99.25201380897583, "grad_norm": 1.016156554222107, "learning_rate": 1.4959723820483314e-05, "loss": 0.2688, "step": 345000 }, { "epoch": 99.25489067894131, "grad_norm": 0.9919827580451965, "learning_rate": 1.4902186421173763e-05, "loss": 0.2472, "step": 345010 }, { "epoch": 99.25776754890678, "grad_norm": 1.1770676374435425, "learning_rate": 1.4844649021864212e-05, "loss": 0.2334, "step": 345020 }, { "epoch": 99.26064441887226, "grad_norm": 0.9486219882965088, "learning_rate": 1.4787111622554662e-05, "loss": 0.23, "step": 345030 }, { "epoch": 99.26352128883775, "grad_norm": 0.8631941676139832, "learning_rate": 1.4729574223245109e-05, "loss": 0.2682, "step": 345040 }, { "epoch": 99.26639815880323, "grad_norm": 1.2436631917953491, "learning_rate": 1.4672036823935558e-05, "loss": 0.2767, "step": 345050 }, { "epoch": 99.2692750287687, "grad_norm": 0.9163026213645935, "learning_rate": 1.4614499424626008e-05, "loss": 0.258, "step": 345060 }, { "epoch": 99.27215189873418, "grad_norm": 1.2636250257492065, "learning_rate": 1.4556962025316455e-05, "loss": 0.2082, "step": 345070 }, { "epoch": 99.27502876869966, "grad_norm": 1.4482063055038452, "learning_rate": 1.4499424626006906e-05, "loss": 0.2675, "step": 345080 }, { "epoch": 99.27790563866513, "grad_norm": 0.9010289907455444, "learning_rate": 1.4441887226697354e-05, "loss": 0.2195, "step": 345090 }, { "epoch": 99.28078250863061, "grad_norm": 1.8110171556472778, "learning_rate": 1.4384349827387801e-05, "loss": 0.2809, "step": 345100 }, { "epoch": 99.28365937859608, "grad_norm": 0.8826389312744141, "learning_rate": 1.4326812428078252e-05, "loss": 0.2241, "step": 345110 }, { "epoch": 99.28653624856156, "grad_norm": 0.6776638031005859, "learning_rate": 1.4269275028768699e-05, "loss": 0.2158, "step": 345120 }, { "epoch": 99.28941311852704, "grad_norm": 1.4502577781677246, "learning_rate": 1.421173762945915e-05, "loss": 0.2644, "step": 345130 }, { "epoch": 99.29228998849253, "grad_norm": 0.9625729918479919, "learning_rate": 1.4154200230149598e-05, "loss": 0.2188, "step": 345140 }, { "epoch": 99.295166858458, "grad_norm": 1.3132792711257935, "learning_rate": 1.4096662830840045e-05, "loss": 0.288, "step": 345150 }, { "epoch": 99.29804372842348, "grad_norm": 1.1212841272354126, "learning_rate": 1.4039125431530495e-05, "loss": 0.2447, "step": 345160 }, { "epoch": 99.30092059838896, "grad_norm": 0.6722024083137512, "learning_rate": 1.3981588032220944e-05, "loss": 0.2779, "step": 345170 }, { "epoch": 99.30379746835443, "grad_norm": 0.7646071314811707, "learning_rate": 1.3924050632911393e-05, "loss": 0.2478, "step": 345180 }, { "epoch": 99.30667433831991, "grad_norm": 0.7876080274581909, "learning_rate": 1.3866513233601842e-05, "loss": 0.2183, "step": 345190 }, { "epoch": 99.30955120828538, "grad_norm": 0.7768186330795288, "learning_rate": 1.3808975834292289e-05, "loss": 0.217, "step": 345200 }, { "epoch": 99.31242807825086, "grad_norm": 1.2390592098236084, "learning_rate": 1.3751438434982739e-05, "loss": 0.2818, "step": 345210 }, { "epoch": 99.31530494821634, "grad_norm": 1.5343585014343262, "learning_rate": 1.3693901035673188e-05, "loss": 0.2191, "step": 345220 }, { "epoch": 99.31818181818181, "grad_norm": 2.025043249130249, "learning_rate": 1.3636363636363637e-05, "loss": 0.2822, "step": 345230 }, { "epoch": 99.32105868814729, "grad_norm": 1.7869459390640259, "learning_rate": 1.3578826237054085e-05, "loss": 0.3571, "step": 345240 }, { "epoch": 99.32393555811278, "grad_norm": 1.0047242641448975, "learning_rate": 1.3521288837744534e-05, "loss": 0.2151, "step": 345250 }, { "epoch": 99.32681242807826, "grad_norm": 1.3313647508621216, "learning_rate": 1.3463751438434983e-05, "loss": 0.2721, "step": 345260 }, { "epoch": 99.32968929804373, "grad_norm": 1.2115962505340576, "learning_rate": 1.3406214039125431e-05, "loss": 0.2445, "step": 345270 }, { "epoch": 99.33256616800921, "grad_norm": 0.6812325716018677, "learning_rate": 1.3348676639815882e-05, "loss": 0.2479, "step": 345280 }, { "epoch": 99.33544303797468, "grad_norm": 1.8330832719802856, "learning_rate": 1.3291139240506329e-05, "loss": 0.256, "step": 345290 }, { "epoch": 99.33831990794016, "grad_norm": 0.9781385660171509, "learning_rate": 1.323360184119678e-05, "loss": 0.2305, "step": 345300 }, { "epoch": 99.34119677790564, "grad_norm": 1.9007151126861572, "learning_rate": 1.3176064441887226e-05, "loss": 0.2729, "step": 345310 }, { "epoch": 99.34407364787111, "grad_norm": 0.7201894521713257, "learning_rate": 1.3118527042577675e-05, "loss": 0.1946, "step": 345320 }, { "epoch": 99.34695051783659, "grad_norm": 1.2627136707305908, "learning_rate": 1.3060989643268126e-05, "loss": 0.2317, "step": 345330 }, { "epoch": 99.34982738780207, "grad_norm": 1.1088918447494507, "learning_rate": 1.3003452243958573e-05, "loss": 0.2323, "step": 345340 }, { "epoch": 99.35270425776756, "grad_norm": 0.9650281667709351, "learning_rate": 1.2945914844649023e-05, "loss": 0.2765, "step": 345350 }, { "epoch": 99.35558112773303, "grad_norm": 1.0741184949874878, "learning_rate": 1.2888377445339472e-05, "loss": 0.2622, "step": 345360 }, { "epoch": 99.35845799769851, "grad_norm": 1.952007532119751, "learning_rate": 1.2830840046029919e-05, "loss": 0.2965, "step": 345370 }, { "epoch": 99.36133486766398, "grad_norm": 1.0062001943588257, "learning_rate": 1.2773302646720369e-05, "loss": 0.2132, "step": 345380 }, { "epoch": 99.36421173762946, "grad_norm": 1.831687569618225, "learning_rate": 1.2715765247410816e-05, "loss": 0.2754, "step": 345390 }, { "epoch": 99.36708860759494, "grad_norm": 0.7705663442611694, "learning_rate": 1.2658227848101267e-05, "loss": 0.2179, "step": 345400 }, { "epoch": 99.36996547756041, "grad_norm": 0.5324913263320923, "learning_rate": 1.2600690448791715e-05, "loss": 0.2062, "step": 345410 }, { "epoch": 99.37284234752589, "grad_norm": 1.1074997186660767, "learning_rate": 1.2543153049482162e-05, "loss": 0.2297, "step": 345420 }, { "epoch": 99.37571921749137, "grad_norm": 1.300670862197876, "learning_rate": 1.2485615650172613e-05, "loss": 0.2232, "step": 345430 }, { "epoch": 99.37859608745684, "grad_norm": 1.3351863622665405, "learning_rate": 1.2428078250863062e-05, "loss": 0.2811, "step": 345440 }, { "epoch": 99.38147295742232, "grad_norm": 0.7267436385154724, "learning_rate": 1.237054085155351e-05, "loss": 0.2436, "step": 345450 }, { "epoch": 99.38434982738781, "grad_norm": 1.173797607421875, "learning_rate": 1.2313003452243959e-05, "loss": 0.2338, "step": 345460 }, { "epoch": 99.38722669735328, "grad_norm": 1.0123367309570312, "learning_rate": 1.2255466052934408e-05, "loss": 0.2255, "step": 345470 }, { "epoch": 99.39010356731876, "grad_norm": 1.220963716506958, "learning_rate": 1.2197928653624856e-05, "loss": 0.2212, "step": 345480 }, { "epoch": 99.39298043728424, "grad_norm": 0.7238691449165344, "learning_rate": 1.2140391254315305e-05, "loss": 0.2234, "step": 345490 }, { "epoch": 99.39585730724971, "grad_norm": 1.0939913988113403, "learning_rate": 1.2082853855005754e-05, "loss": 0.2803, "step": 345500 }, { "epoch": 99.39873417721519, "grad_norm": 0.6315714120864868, "learning_rate": 1.2025316455696203e-05, "loss": 0.1987, "step": 345510 }, { "epoch": 99.40161104718067, "grad_norm": 1.4927922487258911, "learning_rate": 1.1967779056386651e-05, "loss": 0.2607, "step": 345520 }, { "epoch": 99.40448791714614, "grad_norm": 1.4323804378509521, "learning_rate": 1.19102416570771e-05, "loss": 0.2693, "step": 345530 }, { "epoch": 99.40736478711162, "grad_norm": 1.7301079034805298, "learning_rate": 1.1852704257767549e-05, "loss": 0.2737, "step": 345540 }, { "epoch": 99.4102416570771, "grad_norm": 1.389808177947998, "learning_rate": 1.1795166858458e-05, "loss": 0.2467, "step": 345550 }, { "epoch": 99.41311852704258, "grad_norm": 0.7227850556373596, "learning_rate": 1.1737629459148446e-05, "loss": 0.199, "step": 345560 }, { "epoch": 99.41599539700806, "grad_norm": 1.3424348831176758, "learning_rate": 1.1680092059838895e-05, "loss": 0.2447, "step": 345570 }, { "epoch": 99.41887226697354, "grad_norm": 0.8779851794242859, "learning_rate": 1.1622554660529344e-05, "loss": 0.2485, "step": 345580 }, { "epoch": 99.42174913693901, "grad_norm": 1.9769959449768066, "learning_rate": 1.1565017261219792e-05, "loss": 0.2935, "step": 345590 }, { "epoch": 99.42462600690449, "grad_norm": 1.4931458234786987, "learning_rate": 1.1507479861910243e-05, "loss": 0.2602, "step": 345600 }, { "epoch": 99.42750287686997, "grad_norm": 1.1525486707687378, "learning_rate": 1.144994246260069e-05, "loss": 0.2437, "step": 345610 }, { "epoch": 99.43037974683544, "grad_norm": 1.5926573276519775, "learning_rate": 1.139240506329114e-05, "loss": 0.2655, "step": 345620 }, { "epoch": 99.43325661680092, "grad_norm": 1.0666717290878296, "learning_rate": 1.1334867663981589e-05, "loss": 0.2219, "step": 345630 }, { "epoch": 99.4361334867664, "grad_norm": 0.9070844054222107, "learning_rate": 1.1277330264672036e-05, "loss": 0.2537, "step": 345640 }, { "epoch": 99.43901035673187, "grad_norm": 0.5968155860900879, "learning_rate": 1.1219792865362486e-05, "loss": 0.2297, "step": 345650 }, { "epoch": 99.44188722669735, "grad_norm": 1.1619796752929688, "learning_rate": 1.1162255466052935e-05, "loss": 0.2355, "step": 345660 }, { "epoch": 99.44476409666284, "grad_norm": 1.0273005962371826, "learning_rate": 1.1104718066743384e-05, "loss": 0.28, "step": 345670 }, { "epoch": 99.44764096662831, "grad_norm": 0.712295413017273, "learning_rate": 1.1047180667433833e-05, "loss": 0.2334, "step": 345680 }, { "epoch": 99.45051783659379, "grad_norm": 0.5965291261672974, "learning_rate": 1.098964326812428e-05, "loss": 0.2049, "step": 345690 }, { "epoch": 99.45339470655927, "grad_norm": 0.7905817031860352, "learning_rate": 1.093210586881473e-05, "loss": 0.283, "step": 345700 }, { "epoch": 99.45627157652474, "grad_norm": 1.9055874347686768, "learning_rate": 1.0874568469505179e-05, "loss": 0.2374, "step": 345710 }, { "epoch": 99.45914844649022, "grad_norm": 0.9493276476860046, "learning_rate": 1.0817031070195628e-05, "loss": 0.2689, "step": 345720 }, { "epoch": 99.4620253164557, "grad_norm": 0.926815390586853, "learning_rate": 1.0759493670886076e-05, "loss": 0.2331, "step": 345730 }, { "epoch": 99.46490218642117, "grad_norm": 1.6793766021728516, "learning_rate": 1.0701956271576525e-05, "loss": 0.261, "step": 345740 }, { "epoch": 99.46777905638665, "grad_norm": 0.8238601088523865, "learning_rate": 1.0644418872266974e-05, "loss": 0.2292, "step": 345750 }, { "epoch": 99.47065592635212, "grad_norm": 1.35731840133667, "learning_rate": 1.0586881472957422e-05, "loss": 0.2434, "step": 345760 }, { "epoch": 99.47353279631761, "grad_norm": 2.203495502471924, "learning_rate": 1.0529344073647871e-05, "loss": 0.2775, "step": 345770 }, { "epoch": 99.47640966628309, "grad_norm": 1.3587440252304077, "learning_rate": 1.047180667433832e-05, "loss": 0.3273, "step": 345780 }, { "epoch": 99.47928653624857, "grad_norm": 1.5239801406860352, "learning_rate": 1.0414269275028769e-05, "loss": 0.2123, "step": 345790 }, { "epoch": 99.48216340621404, "grad_norm": 1.6201194524765015, "learning_rate": 1.0356731875719217e-05, "loss": 0.2968, "step": 345800 }, { "epoch": 99.48504027617952, "grad_norm": 0.8999478220939636, "learning_rate": 1.0299194476409666e-05, "loss": 0.309, "step": 345810 }, { "epoch": 99.487917146145, "grad_norm": 0.9950757622718811, "learning_rate": 1.0241657077100117e-05, "loss": 0.2586, "step": 345820 }, { "epoch": 99.49079401611047, "grad_norm": 1.5709806680679321, "learning_rate": 1.0184119677790564e-05, "loss": 0.2205, "step": 345830 }, { "epoch": 99.49367088607595, "grad_norm": 0.8013941049575806, "learning_rate": 1.0126582278481012e-05, "loss": 0.2268, "step": 345840 }, { "epoch": 99.49654775604142, "grad_norm": 0.9214798808097839, "learning_rate": 1.0069044879171463e-05, "loss": 0.2215, "step": 345850 }, { "epoch": 99.4994246260069, "grad_norm": 1.147365927696228, "learning_rate": 1.001150747986191e-05, "loss": 0.215, "step": 345860 }, { "epoch": 99.50230149597238, "grad_norm": 0.615519642829895, "learning_rate": 9.95397008055236e-06, "loss": 0.2276, "step": 345870 }, { "epoch": 99.50517836593787, "grad_norm": 1.2114514112472534, "learning_rate": 9.896432681242807e-06, "loss": 0.2924, "step": 345880 }, { "epoch": 99.50805523590334, "grad_norm": 0.8044854402542114, "learning_rate": 9.838895281933256e-06, "loss": 0.2881, "step": 345890 }, { "epoch": 99.51093210586882, "grad_norm": 1.2022819519042969, "learning_rate": 9.781357882623706e-06, "loss": 0.2583, "step": 345900 }, { "epoch": 99.5138089758343, "grad_norm": 1.039957046508789, "learning_rate": 9.723820483314153e-06, "loss": 0.2002, "step": 345910 }, { "epoch": 99.51668584579977, "grad_norm": 1.3994905948638916, "learning_rate": 9.666283084004604e-06, "loss": 0.2742, "step": 345920 }, { "epoch": 99.51956271576525, "grad_norm": 1.1240882873535156, "learning_rate": 9.608745684695052e-06, "loss": 0.2359, "step": 345930 }, { "epoch": 99.52243958573072, "grad_norm": 1.78367280960083, "learning_rate": 9.551208285385501e-06, "loss": 0.2638, "step": 345940 }, { "epoch": 99.5253164556962, "grad_norm": 0.6994134187698364, "learning_rate": 9.49367088607595e-06, "loss": 0.1862, "step": 345950 }, { "epoch": 99.52819332566168, "grad_norm": 1.3480913639068604, "learning_rate": 9.436133486766397e-06, "loss": 0.2744, "step": 345960 }, { "epoch": 99.53107019562715, "grad_norm": 1.2632492780685425, "learning_rate": 9.378596087456847e-06, "loss": 0.2197, "step": 345970 }, { "epoch": 99.53394706559264, "grad_norm": 0.8755698800086975, "learning_rate": 9.321058688147296e-06, "loss": 0.1987, "step": 345980 }, { "epoch": 99.53682393555812, "grad_norm": 1.003329873085022, "learning_rate": 9.263521288837745e-06, "loss": 0.2768, "step": 345990 }, { "epoch": 99.5397008055236, "grad_norm": 1.4481425285339355, "learning_rate": 9.205983889528194e-06, "loss": 0.2288, "step": 346000 }, { "epoch": 99.54257767548907, "grad_norm": 1.247292160987854, "learning_rate": 9.148446490218642e-06, "loss": 0.2907, "step": 346010 }, { "epoch": 99.54545454545455, "grad_norm": 1.1598743200302124, "learning_rate": 9.090909090909091e-06, "loss": 0.212, "step": 346020 }, { "epoch": 99.54833141542002, "grad_norm": 1.8165746927261353, "learning_rate": 9.03337169159954e-06, "loss": 0.2561, "step": 346030 }, { "epoch": 99.5512082853855, "grad_norm": 1.094373345375061, "learning_rate": 8.97583429228999e-06, "loss": 0.2364, "step": 346040 }, { "epoch": 99.55408515535098, "grad_norm": 0.9522109627723694, "learning_rate": 8.918296892980437e-06, "loss": 0.2595, "step": 346050 }, { "epoch": 99.55696202531645, "grad_norm": 1.4484140872955322, "learning_rate": 8.860759493670886e-06, "loss": 0.2566, "step": 346060 }, { "epoch": 99.55983889528193, "grad_norm": 1.8593907356262207, "learning_rate": 8.803222094361335e-06, "loss": 0.2116, "step": 346070 }, { "epoch": 99.5627157652474, "grad_norm": 1.6654475927352905, "learning_rate": 8.745684695051783e-06, "loss": 0.2603, "step": 346080 }, { "epoch": 99.5655926352129, "grad_norm": 0.7218950390815735, "learning_rate": 8.688147295742234e-06, "loss": 0.2244, "step": 346090 }, { "epoch": 99.56846950517837, "grad_norm": 0.7772055864334106, "learning_rate": 8.630609896432681e-06, "loss": 0.1931, "step": 346100 }, { "epoch": 99.57134637514385, "grad_norm": 1.3244320154190063, "learning_rate": 8.57307249712313e-06, "loss": 0.2878, "step": 346110 }, { "epoch": 99.57422324510932, "grad_norm": 1.6658997535705566, "learning_rate": 8.51553509781358e-06, "loss": 0.2432, "step": 346120 }, { "epoch": 99.5771001150748, "grad_norm": 0.9500097632408142, "learning_rate": 8.457997698504027e-06, "loss": 0.2835, "step": 346130 }, { "epoch": 99.57997698504028, "grad_norm": 1.6644524335861206, "learning_rate": 8.400460299194477e-06, "loss": 0.2459, "step": 346140 }, { "epoch": 99.58285385500575, "grad_norm": 0.7736058831214905, "learning_rate": 8.342922899884924e-06, "loss": 0.1871, "step": 346150 }, { "epoch": 99.58573072497123, "grad_norm": 2.119394540786743, "learning_rate": 8.285385500575373e-06, "loss": 0.2574, "step": 346160 }, { "epoch": 99.5886075949367, "grad_norm": 1.2260979413986206, "learning_rate": 8.227848101265824e-06, "loss": 0.3016, "step": 346170 }, { "epoch": 99.59148446490218, "grad_norm": 1.7071558237075806, "learning_rate": 8.17031070195627e-06, "loss": 0.2265, "step": 346180 }, { "epoch": 99.59436133486767, "grad_norm": 1.2596426010131836, "learning_rate": 8.112773302646721e-06, "loss": 0.2644, "step": 346190 }, { "epoch": 99.59723820483315, "grad_norm": 1.2136114835739136, "learning_rate": 8.05523590333717e-06, "loss": 0.3413, "step": 346200 }, { "epoch": 99.60011507479862, "grad_norm": 0.684646725654602, "learning_rate": 7.997698504027617e-06, "loss": 0.2393, "step": 346210 }, { "epoch": 99.6029919447641, "grad_norm": 1.5769137144088745, "learning_rate": 7.940161104718067e-06, "loss": 0.265, "step": 346220 }, { "epoch": 99.60586881472958, "grad_norm": 1.4826545715332031, "learning_rate": 7.882623705408516e-06, "loss": 0.2, "step": 346230 }, { "epoch": 99.60874568469505, "grad_norm": 0.6405118107795715, "learning_rate": 7.825086306098965e-06, "loss": 0.1958, "step": 346240 }, { "epoch": 99.61162255466053, "grad_norm": 0.8092379570007324, "learning_rate": 7.767548906789413e-06, "loss": 0.2832, "step": 346250 }, { "epoch": 99.614499424626, "grad_norm": 1.9696842432022095, "learning_rate": 7.710011507479862e-06, "loss": 0.2403, "step": 346260 }, { "epoch": 99.61737629459148, "grad_norm": 1.2212203741073608, "learning_rate": 7.652474108170311e-06, "loss": 0.263, "step": 346270 }, { "epoch": 99.62025316455696, "grad_norm": 1.087648868560791, "learning_rate": 7.59493670886076e-06, "loss": 0.2065, "step": 346280 }, { "epoch": 99.62313003452243, "grad_norm": 1.8747222423553467, "learning_rate": 7.537399309551209e-06, "loss": 0.2357, "step": 346290 }, { "epoch": 99.62600690448792, "grad_norm": 1.458372712135315, "learning_rate": 7.479861910241657e-06, "loss": 0.2622, "step": 346300 }, { "epoch": 99.6288837744534, "grad_norm": 1.113430380821228, "learning_rate": 7.422324510932106e-06, "loss": 0.2577, "step": 346310 }, { "epoch": 99.63176064441888, "grad_norm": 0.6448370814323425, "learning_rate": 7.3647871116225545e-06, "loss": 0.2111, "step": 346320 }, { "epoch": 99.63463751438435, "grad_norm": 0.8880612850189209, "learning_rate": 7.307249712313004e-06, "loss": 0.2365, "step": 346330 }, { "epoch": 99.63751438434983, "grad_norm": 1.0637725591659546, "learning_rate": 7.249712313003453e-06, "loss": 0.2192, "step": 346340 }, { "epoch": 99.6403912543153, "grad_norm": 1.5412410497665405, "learning_rate": 7.192174913693901e-06, "loss": 0.2588, "step": 346350 }, { "epoch": 99.64326812428078, "grad_norm": 0.9773905873298645, "learning_rate": 7.1346375143843494e-06, "loss": 0.2062, "step": 346360 }, { "epoch": 99.64614499424626, "grad_norm": 1.5940548181533813, "learning_rate": 7.077100115074799e-06, "loss": 0.2748, "step": 346370 }, { "epoch": 99.64902186421173, "grad_norm": 1.2300776243209839, "learning_rate": 7.019562715765248e-06, "loss": 0.2711, "step": 346380 }, { "epoch": 99.65189873417721, "grad_norm": 1.057633399963379, "learning_rate": 6.9620253164556965e-06, "loss": 0.2784, "step": 346390 }, { "epoch": 99.6547756041427, "grad_norm": 1.286939263343811, "learning_rate": 6.904487917146144e-06, "loss": 0.2806, "step": 346400 }, { "epoch": 99.65765247410818, "grad_norm": 1.4084895849227905, "learning_rate": 6.846950517836594e-06, "loss": 0.2727, "step": 346410 }, { "epoch": 99.66052934407365, "grad_norm": 1.576382040977478, "learning_rate": 6.789413118527043e-06, "loss": 0.2372, "step": 346420 }, { "epoch": 99.66340621403913, "grad_norm": 0.9766784310340881, "learning_rate": 6.731875719217491e-06, "loss": 0.2217, "step": 346430 }, { "epoch": 99.6662830840046, "grad_norm": 0.9059715867042542, "learning_rate": 6.674338319907941e-06, "loss": 0.2817, "step": 346440 }, { "epoch": 99.66915995397008, "grad_norm": 1.1693214178085327, "learning_rate": 6.61680092059839e-06, "loss": 0.2206, "step": 346450 }, { "epoch": 99.67203682393556, "grad_norm": 2.1087520122528076, "learning_rate": 6.5592635212888375e-06, "loss": 0.2851, "step": 346460 }, { "epoch": 99.67491369390103, "grad_norm": 1.5917677879333496, "learning_rate": 6.501726121979286e-06, "loss": 0.2546, "step": 346470 }, { "epoch": 99.67779056386651, "grad_norm": 1.304298758506775, "learning_rate": 6.444188722669736e-06, "loss": 0.2665, "step": 346480 }, { "epoch": 99.68066743383199, "grad_norm": 1.0068209171295166, "learning_rate": 6.3866513233601846e-06, "loss": 0.2075, "step": 346490 }, { "epoch": 99.68354430379746, "grad_norm": 0.8782013654708862, "learning_rate": 6.329113924050633e-06, "loss": 0.2294, "step": 346500 }, { "epoch": 99.68642117376295, "grad_norm": 0.6558417677879333, "learning_rate": 6.271576524741081e-06, "loss": 0.2775, "step": 346510 }, { "epoch": 99.68929804372843, "grad_norm": 0.9026117920875549, "learning_rate": 6.214039125431531e-06, "loss": 0.2559, "step": 346520 }, { "epoch": 99.6921749136939, "grad_norm": 3.0925240516662598, "learning_rate": 6.1565017261219795e-06, "loss": 0.2281, "step": 346530 }, { "epoch": 99.69505178365938, "grad_norm": 1.1510019302368164, "learning_rate": 6.098964326812428e-06, "loss": 0.2735, "step": 346540 }, { "epoch": 99.69792865362486, "grad_norm": 0.9282261729240417, "learning_rate": 6.041426927502877e-06, "loss": 0.2373, "step": 346550 }, { "epoch": 99.70080552359033, "grad_norm": 0.6454014778137207, "learning_rate": 5.983889528193326e-06, "loss": 0.2432, "step": 346560 }, { "epoch": 99.70368239355581, "grad_norm": 0.7883508801460266, "learning_rate": 5.926352128883774e-06, "loss": 0.2202, "step": 346570 }, { "epoch": 99.70655926352129, "grad_norm": 0.9507426023483276, "learning_rate": 5.868814729574223e-06, "loss": 0.2256, "step": 346580 }, { "epoch": 99.70943613348676, "grad_norm": 0.5894128680229187, "learning_rate": 5.811277330264672e-06, "loss": 0.245, "step": 346590 }, { "epoch": 99.71231300345224, "grad_norm": 1.6629548072814941, "learning_rate": 5.753739930955121e-06, "loss": 0.2181, "step": 346600 }, { "epoch": 99.71518987341773, "grad_norm": 1.3135699033737183, "learning_rate": 5.69620253164557e-06, "loss": 0.2651, "step": 346610 }, { "epoch": 99.7180667433832, "grad_norm": 1.0802175998687744, "learning_rate": 5.638665132336018e-06, "loss": 0.2799, "step": 346620 }, { "epoch": 99.72094361334868, "grad_norm": 1.8057448863983154, "learning_rate": 5.581127733026468e-06, "loss": 0.2812, "step": 346630 }, { "epoch": 99.72382048331416, "grad_norm": 0.696365237236023, "learning_rate": 5.523590333716916e-06, "loss": 0.2554, "step": 346640 }, { "epoch": 99.72669735327963, "grad_norm": 1.446332335472107, "learning_rate": 5.466052934407365e-06, "loss": 0.2261, "step": 346650 }, { "epoch": 99.72957422324511, "grad_norm": 0.8575741052627563, "learning_rate": 5.408515535097814e-06, "loss": 0.3099, "step": 346660 }, { "epoch": 99.73245109321059, "grad_norm": 1.2788487672805786, "learning_rate": 5.3509781357882625e-06, "loss": 0.2744, "step": 346670 }, { "epoch": 99.73532796317606, "grad_norm": 1.1739521026611328, "learning_rate": 5.293440736478711e-06, "loss": 0.2965, "step": 346680 }, { "epoch": 99.73820483314154, "grad_norm": 0.6152559518814087, "learning_rate": 5.23590333716916e-06, "loss": 0.2685, "step": 346690 }, { "epoch": 99.74108170310701, "grad_norm": 0.5824266076087952, "learning_rate": 5.178365937859609e-06, "loss": 0.2178, "step": 346700 }, { "epoch": 99.74395857307249, "grad_norm": 0.8769983053207397, "learning_rate": 5.120828538550058e-06, "loss": 0.2628, "step": 346710 }, { "epoch": 99.74683544303798, "grad_norm": 0.7372888326644897, "learning_rate": 5.063291139240506e-06, "loss": 0.2456, "step": 346720 }, { "epoch": 99.74971231300346, "grad_norm": 2.1773014068603516, "learning_rate": 5.005753739930955e-06, "loss": 0.3313, "step": 346730 }, { "epoch": 99.75258918296893, "grad_norm": 1.089095115661621, "learning_rate": 4.948216340621404e-06, "loss": 0.2387, "step": 346740 }, { "epoch": 99.75546605293441, "grad_norm": 0.7333531975746155, "learning_rate": 4.890678941311853e-06, "loss": 0.2294, "step": 346750 }, { "epoch": 99.75834292289989, "grad_norm": 1.1175347566604614, "learning_rate": 4.833141542002302e-06, "loss": 0.3388, "step": 346760 }, { "epoch": 99.76121979286536, "grad_norm": 1.0711122751235962, "learning_rate": 4.775604142692751e-06, "loss": 0.2207, "step": 346770 }, { "epoch": 99.76409666283084, "grad_norm": 2.366724967956543, "learning_rate": 4.7180667433831985e-06, "loss": 0.2664, "step": 346780 }, { "epoch": 99.76697353279631, "grad_norm": 1.1977499723434448, "learning_rate": 4.660529344073648e-06, "loss": 0.2811, "step": 346790 }, { "epoch": 99.76985040276179, "grad_norm": 1.5899988412857056, "learning_rate": 4.602991944764097e-06, "loss": 0.2338, "step": 346800 }, { "epoch": 99.77272727272727, "grad_norm": 1.382252812385559, "learning_rate": 4.5454545454545455e-06, "loss": 0.257, "step": 346810 }, { "epoch": 99.77560414269276, "grad_norm": 0.8866742253303528, "learning_rate": 4.487917146144995e-06, "loss": 0.2708, "step": 346820 }, { "epoch": 99.77848101265823, "grad_norm": 1.8490886688232422, "learning_rate": 4.430379746835443e-06, "loss": 0.234, "step": 346830 }, { "epoch": 99.78135788262371, "grad_norm": 1.1467100381851196, "learning_rate": 4.372842347525892e-06, "loss": 0.2827, "step": 346840 }, { "epoch": 99.78423475258919, "grad_norm": 0.7356424331665039, "learning_rate": 4.3153049482163404e-06, "loss": 0.2483, "step": 346850 }, { "epoch": 99.78711162255466, "grad_norm": 0.7302685976028442, "learning_rate": 4.25776754890679e-06, "loss": 0.2488, "step": 346860 }, { "epoch": 99.78998849252014, "grad_norm": 0.8321601152420044, "learning_rate": 4.200230149597239e-06, "loss": 0.2825, "step": 346870 }, { "epoch": 99.79286536248561, "grad_norm": 1.3959805965423584, "learning_rate": 4.142692750287687e-06, "loss": 0.2511, "step": 346880 }, { "epoch": 99.79574223245109, "grad_norm": 0.9324000477790833, "learning_rate": 4.085155350978135e-06, "loss": 0.2549, "step": 346890 }, { "epoch": 99.79861910241657, "grad_norm": 1.3001210689544678, "learning_rate": 4.027617951668585e-06, "loss": 0.2402, "step": 346900 }, { "epoch": 99.80149597238204, "grad_norm": 0.6506217122077942, "learning_rate": 3.970080552359034e-06, "loss": 0.1909, "step": 346910 }, { "epoch": 99.80437284234753, "grad_norm": 0.784027099609375, "learning_rate": 3.912543153049482e-06, "loss": 0.2316, "step": 346920 }, { "epoch": 99.80724971231301, "grad_norm": 0.5665224194526672, "learning_rate": 3.855005753739931e-06, "loss": 0.2349, "step": 346930 }, { "epoch": 99.81012658227849, "grad_norm": 2.704143762588501, "learning_rate": 3.79746835443038e-06, "loss": 0.2911, "step": 346940 }, { "epoch": 99.81300345224396, "grad_norm": 1.4367598295211792, "learning_rate": 3.7399309551208285e-06, "loss": 0.253, "step": 346950 }, { "epoch": 99.81588032220944, "grad_norm": 1.0760780572891235, "learning_rate": 3.6823935558112773e-06, "loss": 0.2159, "step": 346960 }, { "epoch": 99.81875719217491, "grad_norm": 0.8132625222206116, "learning_rate": 3.6248561565017264e-06, "loss": 0.2612, "step": 346970 }, { "epoch": 99.82163406214039, "grad_norm": 1.8299232721328735, "learning_rate": 3.5673187571921747e-06, "loss": 0.275, "step": 346980 }, { "epoch": 99.82451093210587, "grad_norm": 1.143696665763855, "learning_rate": 3.509781357882624e-06, "loss": 0.2571, "step": 346990 }, { "epoch": 99.82738780207134, "grad_norm": 1.4726918935775757, "learning_rate": 3.452243958573072e-06, "loss": 0.2564, "step": 347000 }, { "epoch": 99.83026467203682, "grad_norm": 0.8910856246948242, "learning_rate": 3.3947065592635213e-06, "loss": 0.1948, "step": 347010 }, { "epoch": 99.8331415420023, "grad_norm": 1.1559157371520996, "learning_rate": 3.3371691599539705e-06, "loss": 0.2486, "step": 347020 }, { "epoch": 99.83601841196779, "grad_norm": 0.9117332696914673, "learning_rate": 3.2796317606444188e-06, "loss": 0.2357, "step": 347030 }, { "epoch": 99.83889528193326, "grad_norm": 0.8709837198257446, "learning_rate": 3.222094361334868e-06, "loss": 0.2245, "step": 347040 }, { "epoch": 99.84177215189874, "grad_norm": 0.7746337652206421, "learning_rate": 3.1645569620253167e-06, "loss": 0.2109, "step": 347050 }, { "epoch": 99.84464902186421, "grad_norm": 1.4076787233352661, "learning_rate": 3.1070195627157654e-06, "loss": 0.2346, "step": 347060 }, { "epoch": 99.84752589182969, "grad_norm": 0.8944039344787598, "learning_rate": 3.049482163406214e-06, "loss": 0.2285, "step": 347070 }, { "epoch": 99.85040276179517, "grad_norm": 1.1270134449005127, "learning_rate": 2.991944764096663e-06, "loss": 0.2074, "step": 347080 }, { "epoch": 99.85327963176064, "grad_norm": 0.9294359087944031, "learning_rate": 2.9344073647871116e-06, "loss": 0.2637, "step": 347090 }, { "epoch": 99.85615650172612, "grad_norm": 0.9399791955947876, "learning_rate": 2.8768699654775607e-06, "loss": 0.2545, "step": 347100 }, { "epoch": 99.8590333716916, "grad_norm": 0.6602448225021362, "learning_rate": 2.819332566168009e-06, "loss": 0.2061, "step": 347110 }, { "epoch": 99.86191024165707, "grad_norm": 1.6345878839492798, "learning_rate": 2.761795166858458e-06, "loss": 0.2218, "step": 347120 }, { "epoch": 99.86478711162256, "grad_norm": 0.9432629942893982, "learning_rate": 2.704257767548907e-06, "loss": 0.2473, "step": 347130 }, { "epoch": 99.86766398158804, "grad_norm": 0.7188307046890259, "learning_rate": 2.6467203682393556e-06, "loss": 0.2145, "step": 347140 }, { "epoch": 99.87054085155351, "grad_norm": 0.9238572120666504, "learning_rate": 2.5891829689298043e-06, "loss": 0.2076, "step": 347150 }, { "epoch": 99.87341772151899, "grad_norm": 0.9890775680541992, "learning_rate": 2.531645569620253e-06, "loss": 0.2652, "step": 347160 }, { "epoch": 99.87629459148447, "grad_norm": 0.7148309350013733, "learning_rate": 2.474108170310702e-06, "loss": 0.2473, "step": 347170 }, { "epoch": 99.87917146144994, "grad_norm": 2.115419626235962, "learning_rate": 2.416570771001151e-06, "loss": 0.2424, "step": 347180 }, { "epoch": 99.88204833141542, "grad_norm": 1.3421555757522583, "learning_rate": 2.3590333716915992e-06, "loss": 0.3619, "step": 347190 }, { "epoch": 99.8849252013809, "grad_norm": 0.7858401536941528, "learning_rate": 2.3014959723820484e-06, "loss": 0.2415, "step": 347200 }, { "epoch": 99.88780207134637, "grad_norm": 0.5686743855476379, "learning_rate": 2.2439585730724975e-06, "loss": 0.2884, "step": 347210 }, { "epoch": 99.89067894131185, "grad_norm": 1.3127750158309937, "learning_rate": 2.186421173762946e-06, "loss": 0.2146, "step": 347220 }, { "epoch": 99.89355581127732, "grad_norm": 1.3415662050247192, "learning_rate": 2.128883774453395e-06, "loss": 0.2361, "step": 347230 }, { "epoch": 99.89643268124281, "grad_norm": 1.4728652238845825, "learning_rate": 2.0713463751438433e-06, "loss": 0.2682, "step": 347240 }, { "epoch": 99.89930955120829, "grad_norm": 1.0377132892608643, "learning_rate": 2.0138089758342925e-06, "loss": 0.2752, "step": 347250 }, { "epoch": 99.90218642117377, "grad_norm": 1.3843544721603394, "learning_rate": 1.956271576524741e-06, "loss": 0.2178, "step": 347260 }, { "epoch": 99.90506329113924, "grad_norm": 1.434136152267456, "learning_rate": 1.89873417721519e-06, "loss": 0.2812, "step": 347270 }, { "epoch": 99.90794016110472, "grad_norm": 1.2659308910369873, "learning_rate": 1.8411967779056386e-06, "loss": 0.2472, "step": 347280 }, { "epoch": 99.9108170310702, "grad_norm": 1.5827100276947021, "learning_rate": 1.7836593785960874e-06, "loss": 0.2378, "step": 347290 }, { "epoch": 99.91369390103567, "grad_norm": 0.7189927697181702, "learning_rate": 1.726121979286536e-06, "loss": 0.2761, "step": 347300 }, { "epoch": 99.91657077100115, "grad_norm": 1.1327438354492188, "learning_rate": 1.6685845799769852e-06, "loss": 0.2213, "step": 347310 }, { "epoch": 99.91944764096662, "grad_norm": 1.9791901111602783, "learning_rate": 1.611047180667434e-06, "loss": 0.2308, "step": 347320 }, { "epoch": 99.9223245109321, "grad_norm": 1.0467275381088257, "learning_rate": 1.5535097813578827e-06, "loss": 0.2586, "step": 347330 }, { "epoch": 99.92520138089759, "grad_norm": 0.7920855283737183, "learning_rate": 1.4959723820483314e-06, "loss": 0.2294, "step": 347340 }, { "epoch": 99.92807825086307, "grad_norm": 1.0518957376480103, "learning_rate": 1.4384349827387804e-06, "loss": 0.2615, "step": 347350 }, { "epoch": 99.93095512082854, "grad_norm": 0.7409064769744873, "learning_rate": 1.380897583429229e-06, "loss": 0.2412, "step": 347360 }, { "epoch": 99.93383199079402, "grad_norm": 0.710264265537262, "learning_rate": 1.3233601841196778e-06, "loss": 0.2173, "step": 347370 }, { "epoch": 99.9367088607595, "grad_norm": 1.2215447425842285, "learning_rate": 1.2658227848101265e-06, "loss": 0.1985, "step": 347380 }, { "epoch": 99.93958573072497, "grad_norm": 0.8385075926780701, "learning_rate": 1.2082853855005755e-06, "loss": 0.2354, "step": 347390 }, { "epoch": 99.94246260069045, "grad_norm": 0.7586975693702698, "learning_rate": 1.1507479861910242e-06, "loss": 0.269, "step": 347400 }, { "epoch": 99.94533947065592, "grad_norm": 0.7921950817108154, "learning_rate": 1.093210586881473e-06, "loss": 0.2267, "step": 347410 }, { "epoch": 99.9482163406214, "grad_norm": 0.745124876499176, "learning_rate": 1.0356731875719217e-06, "loss": 0.238, "step": 347420 }, { "epoch": 99.95109321058688, "grad_norm": 1.0767332315444946, "learning_rate": 9.781357882623706e-07, "loss": 0.2216, "step": 347430 }, { "epoch": 99.95397008055235, "grad_norm": 1.599437952041626, "learning_rate": 9.205983889528193e-07, "loss": 0.248, "step": 347440 }, { "epoch": 99.95684695051784, "grad_norm": 1.7742327451705933, "learning_rate": 8.63060989643268e-07, "loss": 0.2831, "step": 347450 }, { "epoch": 99.95972382048332, "grad_norm": 1.0262647867202759, "learning_rate": 8.05523590333717e-07, "loss": 0.277, "step": 347460 }, { "epoch": 99.9626006904488, "grad_norm": 0.9379214644432068, "learning_rate": 7.479861910241657e-07, "loss": 0.2154, "step": 347470 }, { "epoch": 99.96547756041427, "grad_norm": 0.8467211723327637, "learning_rate": 6.904487917146145e-07, "loss": 0.2485, "step": 347480 }, { "epoch": 99.96835443037975, "grad_norm": 1.0677841901779175, "learning_rate": 6.329113924050633e-07, "loss": 0.2608, "step": 347490 }, { "epoch": 99.97123130034522, "grad_norm": 1.5928685665130615, "learning_rate": 5.753739930955121e-07, "loss": 0.2499, "step": 347500 } ], "logging_steps": 10, "max_steps": 347600, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.157484541573837e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }