{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.999379998759997, "eval_steps": 500, "global_step": 40320, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00248000496000992, "grad_norm": 0.7131296396255493, "learning_rate": 0.00019999996964510877, "loss": 2.4732, "step": 10 }, { "epoch": 0.00496000992001984, "grad_norm": 0.6677972078323364, "learning_rate": 0.00019999987858045355, "loss": 2.2955, "step": 20 }, { "epoch": 0.00744001488002976, "grad_norm": 0.5878852605819702, "learning_rate": 0.0001999997268060896, "loss": 2.2142, "step": 30 }, { "epoch": 0.00992001984003968, "grad_norm": 0.5556073784828186, "learning_rate": 0.00019999951432210904, "loss": 2.1472, "step": 40 }, { "epoch": 0.0124000248000496, "grad_norm": 0.5539277195930481, "learning_rate": 0.00019999924112864092, "loss": 2.1705, "step": 50 }, { "epoch": 0.01488002976005952, "grad_norm": 0.5625210404396057, "learning_rate": 0.00019999890722585106, "loss": 2.1509, "step": 60 }, { "epoch": 0.01736003472006944, "grad_norm": 0.5463761687278748, "learning_rate": 0.00019999851261394218, "loss": 2.1414, "step": 70 }, { "epoch": 0.01984003968007936, "grad_norm": 0.515055775642395, "learning_rate": 0.00019999805729315381, "loss": 2.1695, "step": 80 }, { "epoch": 0.02232004464008928, "grad_norm": 0.5034283995628357, "learning_rate": 0.00019999754126376248, "loss": 2.13, "step": 90 }, { "epoch": 0.0248000496000992, "grad_norm": 0.6123887896537781, "learning_rate": 0.00019999696452608135, "loss": 2.1778, "step": 100 }, { "epoch": 0.02728005456010912, "grad_norm": 0.5200942158699036, "learning_rate": 0.00019999632708046064, "loss": 2.141, "step": 110 }, { "epoch": 0.02976005952011904, "grad_norm": 0.5305571556091309, "learning_rate": 0.0001999956289272873, "loss": 2.1276, "step": 120 }, { "epoch": 0.03224006448012896, "grad_norm": 0.5080032348632812, "learning_rate": 0.0001999948700669852, "loss": 2.1138, "step": 130 }, { "epoch": 0.03472006944013888, "grad_norm": 0.5774938464164734, "learning_rate": 0.00019999405050001497, "loss": 2.155, "step": 140 }, { "epoch": 0.0372000744001488, "grad_norm": 0.5072618722915649, "learning_rate": 0.0001999931702268743, "loss": 2.1443, "step": 150 }, { "epoch": 0.03968007936015872, "grad_norm": 0.4849551320075989, "learning_rate": 0.0001999922292480975, "loss": 2.1275, "step": 160 }, { "epoch": 0.04216008432016864, "grad_norm": 0.5276527404785156, "learning_rate": 0.00019999122756425585, "loss": 2.1001, "step": 170 }, { "epoch": 0.04464008928017856, "grad_norm": 0.46588486433029175, "learning_rate": 0.00019999016517595753, "loss": 2.0843, "step": 180 }, { "epoch": 0.04712009424018848, "grad_norm": 0.528197169303894, "learning_rate": 0.00019998904208384742, "loss": 2.0699, "step": 190 }, { "epoch": 0.0496000992001984, "grad_norm": 0.5729973912239075, "learning_rate": 0.00019998785828860742, "loss": 2.0668, "step": 200 }, { "epoch": 0.05208010416020832, "grad_norm": 0.5057010650634766, "learning_rate": 0.0001999866137909562, "loss": 2.08, "step": 210 }, { "epoch": 0.05456010912021824, "grad_norm": 0.5133991837501526, "learning_rate": 0.00019998530859164927, "loss": 2.0329, "step": 220 }, { "epoch": 0.05704011408022816, "grad_norm": 0.5246201157569885, "learning_rate": 0.000199983942691479, "loss": 2.0358, "step": 230 }, { "epoch": 0.05952011904023808, "grad_norm": 0.556400716304779, "learning_rate": 0.00019998251609127464, "loss": 2.0534, "step": 240 }, { "epoch": 0.062000124000248, "grad_norm": 0.5591869950294495, "learning_rate": 0.0001999810287919023, "loss": 2.0162, "step": 250 }, { "epoch": 0.06448012896025793, "grad_norm": 0.5347974896430969, "learning_rate": 0.00019997948079426492, "loss": 2.0735, "step": 260 }, { "epoch": 0.06696013392026784, "grad_norm": 0.5143352150917053, "learning_rate": 0.00019997787209930222, "loss": 2.0251, "step": 270 }, { "epoch": 0.06944013888027777, "grad_norm": 0.5554919838905334, "learning_rate": 0.00019997620270799092, "loss": 2.1207, "step": 280 }, { "epoch": 0.07192014384028768, "grad_norm": 0.5315896272659302, "learning_rate": 0.00019997447262134444, "loss": 2.0936, "step": 290 }, { "epoch": 0.0744001488002976, "grad_norm": 0.5028589367866516, "learning_rate": 0.00019997268184041315, "loss": 2.048, "step": 300 }, { "epoch": 0.07688015376030752, "grad_norm": 0.4932255148887634, "learning_rate": 0.00019997083036628422, "loss": 2.0523, "step": 310 }, { "epoch": 0.07936015872031744, "grad_norm": 0.5493288040161133, "learning_rate": 0.00019996891820008164, "loss": 2.0368, "step": 320 }, { "epoch": 0.08184016368032736, "grad_norm": 0.51752769947052, "learning_rate": 0.0001999669453429663, "loss": 2.0567, "step": 330 }, { "epoch": 0.08432016864033728, "grad_norm": 0.5354537963867188, "learning_rate": 0.00019996491179613597, "loss": 2.0354, "step": 340 }, { "epoch": 0.0868001736003472, "grad_norm": 0.536638081073761, "learning_rate": 0.00019996281756082517, "loss": 2.0324, "step": 350 }, { "epoch": 0.08928017856035712, "grad_norm": 0.5761566758155823, "learning_rate": 0.00019996066263830531, "loss": 2.1082, "step": 360 }, { "epoch": 0.09176018352036704, "grad_norm": 0.553530216217041, "learning_rate": 0.00019995844702988463, "loss": 2.0161, "step": 370 }, { "epoch": 0.09424018848037696, "grad_norm": 0.5311240553855896, "learning_rate": 0.0001999561707369082, "loss": 2.0226, "step": 380 }, { "epoch": 0.09672019344038688, "grad_norm": 0.546971321105957, "learning_rate": 0.000199953833760758, "loss": 2.0872, "step": 390 }, { "epoch": 0.0992001984003968, "grad_norm": 0.5235766172409058, "learning_rate": 0.00019995143610285277, "loss": 2.0279, "step": 400 }, { "epoch": 0.10168020336040672, "grad_norm": 0.5465070009231567, "learning_rate": 0.00019994897776464817, "loss": 2.0212, "step": 410 }, { "epoch": 0.10416020832041664, "grad_norm": 0.5408602356910706, "learning_rate": 0.00019994645874763658, "loss": 2.0009, "step": 420 }, { "epoch": 0.10664021328042655, "grad_norm": 0.5485517382621765, "learning_rate": 0.00019994387905334735, "loss": 2.0427, "step": 430 }, { "epoch": 0.10912021824043648, "grad_norm": 0.5387979745864868, "learning_rate": 0.00019994123868334655, "loss": 2.0833, "step": 440 }, { "epoch": 0.1116002232004464, "grad_norm": 0.5328435301780701, "learning_rate": 0.00019993853763923723, "loss": 2.0441, "step": 450 }, { "epoch": 0.11408022816045632, "grad_norm": 0.5278816819190979, "learning_rate": 0.0001999357759226591, "loss": 2.0305, "step": 460 }, { "epoch": 0.11656023312046625, "grad_norm": 0.5591515898704529, "learning_rate": 0.00019993295353528884, "loss": 2.0095, "step": 470 }, { "epoch": 0.11904023808047616, "grad_norm": 0.536952555179596, "learning_rate": 0.00019993007047883988, "loss": 2.0396, "step": 480 }, { "epoch": 0.12152024304048609, "grad_norm": 0.5500532984733582, "learning_rate": 0.00019992712675506253, "loss": 2.1116, "step": 490 }, { "epoch": 0.124000248000496, "grad_norm": 0.5213359594345093, "learning_rate": 0.00019992412236574395, "loss": 2.03, "step": 500 }, { "epoch": 0.1264802529605059, "grad_norm": 0.5710284113883972, "learning_rate": 0.00019992105731270806, "loss": 2.0542, "step": 510 }, { "epoch": 0.12896025792051585, "grad_norm": 0.5325077772140503, "learning_rate": 0.00019991793159781569, "loss": 2.0765, "step": 520 }, { "epoch": 0.13144026288052577, "grad_norm": 0.5777318477630615, "learning_rate": 0.00019991474522296443, "loss": 1.9969, "step": 530 }, { "epoch": 0.13392026784053568, "grad_norm": 0.5040794610977173, "learning_rate": 0.0001999114981900887, "loss": 1.9853, "step": 540 }, { "epoch": 0.1364002728005456, "grad_norm": 0.5974857807159424, "learning_rate": 0.0001999081905011598, "loss": 2.0857, "step": 550 }, { "epoch": 0.13888027776055553, "grad_norm": 0.5376875996589661, "learning_rate": 0.0001999048221581858, "loss": 2.0042, "step": 560 }, { "epoch": 0.14136028272056544, "grad_norm": 0.5114587545394897, "learning_rate": 0.00019990139316321162, "loss": 2.0838, "step": 570 }, { "epoch": 0.14384028768057536, "grad_norm": 0.554987907409668, "learning_rate": 0.00019989790351831896, "loss": 2.0281, "step": 580 }, { "epoch": 0.14632029264058527, "grad_norm": 0.5457165837287903, "learning_rate": 0.00019989435322562645, "loss": 2.0423, "step": 590 }, { "epoch": 0.1488002976005952, "grad_norm": 0.5625684857368469, "learning_rate": 0.0001998907422872894, "loss": 2.0691, "step": 600 }, { "epoch": 0.15128030256060512, "grad_norm": 0.5338137149810791, "learning_rate": 0.00019988707070550007, "loss": 2.0549, "step": 610 }, { "epoch": 0.15376030752061504, "grad_norm": 0.5166420936584473, "learning_rate": 0.0001998833384824874, "loss": 1.9901, "step": 620 }, { "epoch": 0.15624031248062495, "grad_norm": 0.5435931086540222, "learning_rate": 0.00019987954562051725, "loss": 2.0217, "step": 630 }, { "epoch": 0.1587203174406349, "grad_norm": 0.5430518984794617, "learning_rate": 0.00019987569212189224, "loss": 1.9782, "step": 640 }, { "epoch": 0.1612003224006448, "grad_norm": 0.5538361668586731, "learning_rate": 0.00019987177798895184, "loss": 2.0747, "step": 650 }, { "epoch": 0.16368032736065471, "grad_norm": 0.5739989280700684, "learning_rate": 0.0001998678032240723, "loss": 1.9879, "step": 660 }, { "epoch": 0.16616033232066463, "grad_norm": 0.581364631652832, "learning_rate": 0.0001998637678296667, "loss": 2.0, "step": 670 }, { "epoch": 0.16864033728067457, "grad_norm": 0.5130109786987305, "learning_rate": 0.0001998596718081849, "loss": 2.082, "step": 680 }, { "epoch": 0.17112034224068448, "grad_norm": 0.5570313334465027, "learning_rate": 0.00019985551516211363, "loss": 2.0369, "step": 690 }, { "epoch": 0.1736003472006944, "grad_norm": 0.5379239916801453, "learning_rate": 0.00019985129789397633, "loss": 2.0345, "step": 700 }, { "epoch": 0.17608035216070433, "grad_norm": 0.5915724635124207, "learning_rate": 0.0001998470200063333, "loss": 1.9996, "step": 710 }, { "epoch": 0.17856035712071425, "grad_norm": 0.574665904045105, "learning_rate": 0.00019984268150178167, "loss": 1.9805, "step": 720 }, { "epoch": 0.18104036208072416, "grad_norm": 0.5390008687973022, "learning_rate": 0.0001998382823829553, "loss": 1.9605, "step": 730 }, { "epoch": 0.18352036704073407, "grad_norm": 0.5619198083877563, "learning_rate": 0.00019983382265252491, "loss": 2.0536, "step": 740 }, { "epoch": 0.186000372000744, "grad_norm": 0.5629109144210815, "learning_rate": 0.000199829302313198, "loss": 1.9862, "step": 750 }, { "epoch": 0.18848037696075393, "grad_norm": 0.5493379235267639, "learning_rate": 0.00019982472136771882, "loss": 1.9932, "step": 760 }, { "epoch": 0.19096038192076384, "grad_norm": 0.5466563701629639, "learning_rate": 0.00019982007981886847, "loss": 2.0434, "step": 770 }, { "epoch": 0.19344038688077375, "grad_norm": 0.5508472919464111, "learning_rate": 0.00019981537766946484, "loss": 2.0423, "step": 780 }, { "epoch": 0.1959203918407837, "grad_norm": 0.5475140810012817, "learning_rate": 0.00019981061492236262, "loss": 1.9899, "step": 790 }, { "epoch": 0.1984003968007936, "grad_norm": 0.5537005066871643, "learning_rate": 0.0001998057915804532, "loss": 1.9943, "step": 800 }, { "epoch": 0.20088040176080352, "grad_norm": 0.5661734342575073, "learning_rate": 0.00019980090764666484, "loss": 2.0474, "step": 810 }, { "epoch": 0.20336040672081343, "grad_norm": 0.5358636975288391, "learning_rate": 0.0001997959631239626, "loss": 1.9881, "step": 820 }, { "epoch": 0.20584041168082337, "grad_norm": 0.5859567523002625, "learning_rate": 0.00019979095801534821, "loss": 2.0211, "step": 830 }, { "epoch": 0.20832041664083328, "grad_norm": 0.5483726263046265, "learning_rate": 0.00019978589232386035, "loss": 2.0127, "step": 840 }, { "epoch": 0.2108004216008432, "grad_norm": 0.5464625954627991, "learning_rate": 0.00019978076605257436, "loss": 2.0091, "step": 850 }, { "epoch": 0.2132804265608531, "grad_norm": 0.5446053147315979, "learning_rate": 0.00019977557920460237, "loss": 1.9911, "step": 860 }, { "epoch": 0.21576043152086305, "grad_norm": 0.5547277331352234, "learning_rate": 0.00019977033178309332, "loss": 2.0402, "step": 870 }, { "epoch": 0.21824043648087296, "grad_norm": 0.5449572801589966, "learning_rate": 0.0001997650237912329, "loss": 2.0366, "step": 880 }, { "epoch": 0.22072044144088288, "grad_norm": 0.5232194066047668, "learning_rate": 0.00019975965523224358, "loss": 1.9832, "step": 890 }, { "epoch": 0.2232004464008928, "grad_norm": 0.5563146471977234, "learning_rate": 0.00019975422610938462, "loss": 1.9306, "step": 900 }, { "epoch": 0.22568045136090273, "grad_norm": 0.5598681569099426, "learning_rate": 0.000199748736425952, "loss": 1.8938, "step": 910 }, { "epoch": 0.22816045632091264, "grad_norm": 0.5681236386299133, "learning_rate": 0.00019974318618527849, "loss": 1.9727, "step": 920 }, { "epoch": 0.23064046128092255, "grad_norm": 0.5816565155982971, "learning_rate": 0.00019973757539073366, "loss": 1.9525, "step": 930 }, { "epoch": 0.2331204662409325, "grad_norm": 0.5877417922019958, "learning_rate": 0.0001997319040457238, "loss": 2.0935, "step": 940 }, { "epoch": 0.2356004712009424, "grad_norm": 0.5637423992156982, "learning_rate": 0.000199726172153692, "loss": 2.0272, "step": 950 }, { "epoch": 0.23808047616095232, "grad_norm": 0.6446289420127869, "learning_rate": 0.00019972037971811802, "loss": 2.0328, "step": 960 }, { "epoch": 0.24056048112096223, "grad_norm": 0.5652419328689575, "learning_rate": 0.00019971452674251848, "loss": 2.014, "step": 970 }, { "epoch": 0.24304048608097217, "grad_norm": 0.5587862730026245, "learning_rate": 0.00019970861323044667, "loss": 2.0134, "step": 980 }, { "epoch": 0.2455204910409821, "grad_norm": 0.5309275984764099, "learning_rate": 0.00019970263918549274, "loss": 2.0183, "step": 990 }, { "epoch": 0.248000496000992, "grad_norm": 0.5930149555206299, "learning_rate": 0.00019969660461128342, "loss": 1.9548, "step": 1000 }, { "epoch": 0.25048050096100194, "grad_norm": 0.59003746509552, "learning_rate": 0.00019969050951148233, "loss": 1.9657, "step": 1010 }, { "epoch": 0.2529605059210118, "grad_norm": 0.5892198085784912, "learning_rate": 0.00019968435388978984, "loss": 1.9756, "step": 1020 }, { "epoch": 0.25544051088102177, "grad_norm": 0.57949298620224, "learning_rate": 0.00019967813774994294, "loss": 1.9637, "step": 1030 }, { "epoch": 0.2579205158410317, "grad_norm": 0.556459367275238, "learning_rate": 0.00019967186109571552, "loss": 1.991, "step": 1040 }, { "epoch": 0.2604005208010416, "grad_norm": 0.5568014979362488, "learning_rate": 0.00019966552393091804, "loss": 1.9393, "step": 1050 }, { "epoch": 0.26288052576105153, "grad_norm": 0.5933910012245178, "learning_rate": 0.00019965912625939783, "loss": 1.9554, "step": 1060 }, { "epoch": 0.2653605307210614, "grad_norm": 0.5912190079689026, "learning_rate": 0.00019965266808503887, "loss": 1.9702, "step": 1070 }, { "epoch": 0.26784053568107136, "grad_norm": 0.5662450194358826, "learning_rate": 0.00019964614941176195, "loss": 2.0206, "step": 1080 }, { "epoch": 0.2703205406410813, "grad_norm": 0.5403727889060974, "learning_rate": 0.00019963957024352446, "loss": 2.0103, "step": 1090 }, { "epoch": 0.2728005456010912, "grad_norm": 0.5555917024612427, "learning_rate": 0.00019963293058432066, "loss": 1.875, "step": 1100 }, { "epoch": 0.2752805505611011, "grad_norm": 0.5734595060348511, "learning_rate": 0.0001996262304381815, "loss": 2.0566, "step": 1110 }, { "epoch": 0.27776055552111106, "grad_norm": 0.5790075659751892, "learning_rate": 0.00019961946980917456, "loss": 2.0087, "step": 1120 }, { "epoch": 0.28024056048112095, "grad_norm": 0.5560197234153748, "learning_rate": 0.00019961264870140422, "loss": 1.967, "step": 1130 }, { "epoch": 0.2827205654411309, "grad_norm": 0.5903038382530212, "learning_rate": 0.0001996057671190116, "loss": 1.9977, "step": 1140 }, { "epoch": 0.28520057040114083, "grad_norm": 0.5051490068435669, "learning_rate": 0.00019959882506617445, "loss": 1.9385, "step": 1150 }, { "epoch": 0.2876805753611507, "grad_norm": 0.5680397152900696, "learning_rate": 0.0001995918225471073, "loss": 1.9929, "step": 1160 }, { "epoch": 0.29016058032116065, "grad_norm": 0.5738064050674438, "learning_rate": 0.00019958475956606133, "loss": 2.0118, "step": 1170 }, { "epoch": 0.29264058528117054, "grad_norm": 0.5676369667053223, "learning_rate": 0.0001995776361273245, "loss": 1.9608, "step": 1180 }, { "epoch": 0.2951205902411805, "grad_norm": 0.5738691687583923, "learning_rate": 0.0001995704522352214, "loss": 1.9918, "step": 1190 }, { "epoch": 0.2976005952011904, "grad_norm": 0.5853675007820129, "learning_rate": 0.00019956320789411337, "loss": 1.9739, "step": 1200 }, { "epoch": 0.3000806001612003, "grad_norm": 0.5621643662452698, "learning_rate": 0.00019955590310839847, "loss": 1.9861, "step": 1210 }, { "epoch": 0.30256060512121025, "grad_norm": 0.5738389492034912, "learning_rate": 0.00019954853788251137, "loss": 1.9618, "step": 1220 }, { "epoch": 0.3050406100812202, "grad_norm": 0.5730957984924316, "learning_rate": 0.0001995411122209235, "loss": 1.9789, "step": 1230 }, { "epoch": 0.30752061504123007, "grad_norm": 0.5502608418464661, "learning_rate": 0.00019953362612814296, "loss": 1.9983, "step": 1240 }, { "epoch": 0.31000062000124, "grad_norm": 0.6123182773590088, "learning_rate": 0.00019952607960871453, "loss": 2.025, "step": 1250 }, { "epoch": 0.3124806249612499, "grad_norm": 0.562450647354126, "learning_rate": 0.0001995184726672197, "loss": 1.9941, "step": 1260 }, { "epoch": 0.31496062992125984, "grad_norm": 0.5684399008750916, "learning_rate": 0.00019951080530827663, "loss": 1.9974, "step": 1270 }, { "epoch": 0.3174406348812698, "grad_norm": 0.5505920052528381, "learning_rate": 0.00019950307753654017, "loss": 2.0099, "step": 1280 }, { "epoch": 0.31992063984127966, "grad_norm": 0.5620154738426208, "learning_rate": 0.0001994952893567018, "loss": 1.97, "step": 1290 }, { "epoch": 0.3224006448012896, "grad_norm": 0.5569179058074951, "learning_rate": 0.00019948744077348968, "loss": 1.9819, "step": 1300 }, { "epoch": 0.32488064976129954, "grad_norm": 0.6155531406402588, "learning_rate": 0.00019947953179166877, "loss": 1.9119, "step": 1310 }, { "epoch": 0.32736065472130943, "grad_norm": 0.5746706128120422, "learning_rate": 0.0001994715624160405, "loss": 1.925, "step": 1320 }, { "epoch": 0.32984065968131937, "grad_norm": 0.5678473711013794, "learning_rate": 0.00019946353265144312, "loss": 1.9679, "step": 1330 }, { "epoch": 0.33232066464132926, "grad_norm": 0.5691818594932556, "learning_rate": 0.00019945544250275145, "loss": 1.9778, "step": 1340 }, { "epoch": 0.3348006696013392, "grad_norm": 0.5543385744094849, "learning_rate": 0.000199447291974877, "loss": 1.9585, "step": 1350 }, { "epoch": 0.33728067456134914, "grad_norm": 0.5823257565498352, "learning_rate": 0.00019943908107276798, "loss": 1.9624, "step": 1360 }, { "epoch": 0.339760679521359, "grad_norm": 0.5965515971183777, "learning_rate": 0.00019943080980140918, "loss": 2.0268, "step": 1370 }, { "epoch": 0.34224068448136896, "grad_norm": 0.6067745685577393, "learning_rate": 0.00019942247816582206, "loss": 1.9783, "step": 1380 }, { "epoch": 0.3447206894413789, "grad_norm": 0.5427114963531494, "learning_rate": 0.00019941408617106472, "loss": 1.9887, "step": 1390 }, { "epoch": 0.3472006944013888, "grad_norm": 0.5312680602073669, "learning_rate": 0.00019940563382223197, "loss": 1.998, "step": 1400 }, { "epoch": 0.34968069936139873, "grad_norm": 0.6098049283027649, "learning_rate": 0.0001993971211244552, "loss": 1.9817, "step": 1410 }, { "epoch": 0.35216070432140867, "grad_norm": 0.5767588019371033, "learning_rate": 0.00019938854808290244, "loss": 1.951, "step": 1420 }, { "epoch": 0.35464070928141855, "grad_norm": 0.5756887197494507, "learning_rate": 0.00019937991470277835, "loss": 1.9462, "step": 1430 }, { "epoch": 0.3571207142414285, "grad_norm": 0.6078529357910156, "learning_rate": 0.00019937122098932428, "loss": 1.9716, "step": 1440 }, { "epoch": 0.3596007192014384, "grad_norm": 0.5578196048736572, "learning_rate": 0.0001993624669478181, "loss": 1.9097, "step": 1450 }, { "epoch": 0.3620807241614483, "grad_norm": 0.5579915642738342, "learning_rate": 0.00019935365258357446, "loss": 1.9615, "step": 1460 }, { "epoch": 0.36456072912145826, "grad_norm": 0.5643064975738525, "learning_rate": 0.00019934477790194445, "loss": 1.9567, "step": 1470 }, { "epoch": 0.36704073408146815, "grad_norm": 0.5760548114776611, "learning_rate": 0.0001993358429083159, "loss": 1.9476, "step": 1480 }, { "epoch": 0.3695207390414781, "grad_norm": 0.5931412577629089, "learning_rate": 0.00019932684760811328, "loss": 1.984, "step": 1490 }, { "epoch": 0.372000744001488, "grad_norm": 0.5657218098640442, "learning_rate": 0.00019931779200679754, "loss": 1.9508, "step": 1500 }, { "epoch": 0.3744807489614979, "grad_norm": 0.5403887629508972, "learning_rate": 0.00019930867610986637, "loss": 1.9676, "step": 1510 }, { "epoch": 0.37696075392150785, "grad_norm": 0.5199195742607117, "learning_rate": 0.00019929949992285396, "loss": 1.9848, "step": 1520 }, { "epoch": 0.37944075888151774, "grad_norm": 0.5714607834815979, "learning_rate": 0.00019929026345133122, "loss": 1.9478, "step": 1530 }, { "epoch": 0.3819207638415277, "grad_norm": 0.6201160550117493, "learning_rate": 0.0001992809667009055, "loss": 1.9482, "step": 1540 }, { "epoch": 0.3844007688015376, "grad_norm": 0.5621275305747986, "learning_rate": 0.00019927160967722092, "loss": 2.0238, "step": 1550 }, { "epoch": 0.3868807737615475, "grad_norm": 0.5677786469459534, "learning_rate": 0.0001992621923859581, "loss": 1.9221, "step": 1560 }, { "epoch": 0.38936077872155744, "grad_norm": 0.6040672063827515, "learning_rate": 0.0001992527148328342, "loss": 1.9526, "step": 1570 }, { "epoch": 0.3918407836815674, "grad_norm": 0.6181504130363464, "learning_rate": 0.0001992431770236031, "loss": 1.9546, "step": 1580 }, { "epoch": 0.39432078864157727, "grad_norm": 0.5984564423561096, "learning_rate": 0.0001992335789640551, "loss": 1.9511, "step": 1590 }, { "epoch": 0.3968007936015872, "grad_norm": 0.578197717666626, "learning_rate": 0.00019922392066001722, "loss": 1.9471, "step": 1600 }, { "epoch": 0.39928079856159715, "grad_norm": 0.5593079328536987, "learning_rate": 0.00019921420211735299, "loss": 1.9363, "step": 1610 }, { "epoch": 0.40176080352160704, "grad_norm": 0.5923002362251282, "learning_rate": 0.00019920442334196248, "loss": 2.0088, "step": 1620 }, { "epoch": 0.404240808481617, "grad_norm": 0.6107719540596008, "learning_rate": 0.0001991945843397824, "loss": 1.9541, "step": 1630 }, { "epoch": 0.40672081344162686, "grad_norm": 0.6352981925010681, "learning_rate": 0.00019918468511678596, "loss": 1.986, "step": 1640 }, { "epoch": 0.4092008184016368, "grad_norm": 0.5772244334220886, "learning_rate": 0.00019917472567898298, "loss": 1.9746, "step": 1650 }, { "epoch": 0.41168082336164674, "grad_norm": 0.6013234853744507, "learning_rate": 0.0001991647060324198, "loss": 1.9267, "step": 1660 }, { "epoch": 0.4141608283216566, "grad_norm": 0.5733570456504822, "learning_rate": 0.00019915462618317936, "loss": 1.9582, "step": 1670 }, { "epoch": 0.41664083328166657, "grad_norm": 0.6049069166183472, "learning_rate": 0.00019914448613738106, "loss": 1.9562, "step": 1680 }, { "epoch": 0.4191208382416765, "grad_norm": 0.5593000650405884, "learning_rate": 0.00019913428590118094, "loss": 1.9757, "step": 1690 }, { "epoch": 0.4216008432016864, "grad_norm": 0.5543436408042908, "learning_rate": 0.0001991240254807715, "loss": 1.9765, "step": 1700 }, { "epoch": 0.42408084816169633, "grad_norm": 0.5699513554573059, "learning_rate": 0.00019911370488238182, "loss": 1.9864, "step": 1710 }, { "epoch": 0.4265608531217062, "grad_norm": 0.6728619933128357, "learning_rate": 0.00019910332411227758, "loss": 1.9937, "step": 1720 }, { "epoch": 0.42904085808171616, "grad_norm": 0.5685863494873047, "learning_rate": 0.0001990928831767609, "loss": 1.941, "step": 1730 }, { "epoch": 0.4315208630417261, "grad_norm": 0.6041725873947144, "learning_rate": 0.00019908238208217038, "loss": 1.9517, "step": 1740 }, { "epoch": 0.434000868001736, "grad_norm": 0.570541501045227, "learning_rate": 0.00019907182083488129, "loss": 1.974, "step": 1750 }, { "epoch": 0.4364808729617459, "grad_norm": 0.5913785696029663, "learning_rate": 0.0001990611994413053, "loss": 1.9703, "step": 1760 }, { "epoch": 0.43896087792175587, "grad_norm": 0.5988500714302063, "learning_rate": 0.00019905051790789063, "loss": 1.9865, "step": 1770 }, { "epoch": 0.44144088288176575, "grad_norm": 0.67069411277771, "learning_rate": 0.00019903977624112206, "loss": 1.9606, "step": 1780 }, { "epoch": 0.4439208878417757, "grad_norm": 0.5693567991256714, "learning_rate": 0.00019902897444752077, "loss": 1.9973, "step": 1790 }, { "epoch": 0.4464008928017856, "grad_norm": 0.5602978467941284, "learning_rate": 0.00019901811253364456, "loss": 1.98, "step": 1800 }, { "epoch": 0.4488808977617955, "grad_norm": 0.5763017535209656, "learning_rate": 0.00019900719050608764, "loss": 1.9746, "step": 1810 }, { "epoch": 0.45136090272180546, "grad_norm": 0.6153312921524048, "learning_rate": 0.00019899620837148077, "loss": 1.9261, "step": 1820 }, { "epoch": 0.45384090768181534, "grad_norm": 0.5938084721565247, "learning_rate": 0.0001989851661364912, "loss": 1.9688, "step": 1830 }, { "epoch": 0.4563209126418253, "grad_norm": 0.9307805895805359, "learning_rate": 0.00019897406380782261, "loss": 1.963, "step": 1840 }, { "epoch": 0.4588009176018352, "grad_norm": 0.6255986094474792, "learning_rate": 0.0001989629013922152, "loss": 1.9374, "step": 1850 }, { "epoch": 0.4612809225618451, "grad_norm": 0.5881803631782532, "learning_rate": 0.00019895167889644565, "loss": 1.9721, "step": 1860 }, { "epoch": 0.46376092752185505, "grad_norm": 0.5993257761001587, "learning_rate": 0.00019894039632732712, "loss": 1.8728, "step": 1870 }, { "epoch": 0.466240932481865, "grad_norm": 0.6063452959060669, "learning_rate": 0.00019892905369170926, "loss": 1.929, "step": 1880 }, { "epoch": 0.4687209374418749, "grad_norm": 0.6029859185218811, "learning_rate": 0.0001989176509964781, "loss": 1.9544, "step": 1890 }, { "epoch": 0.4712009424018848, "grad_norm": 0.5588030815124512, "learning_rate": 0.00019890618824855625, "loss": 1.9094, "step": 1900 }, { "epoch": 0.4736809473618947, "grad_norm": 0.617078959941864, "learning_rate": 0.00019889466545490269, "loss": 1.9294, "step": 1910 }, { "epoch": 0.47616095232190464, "grad_norm": 0.6039510369300842, "learning_rate": 0.00019888308262251285, "loss": 2.0049, "step": 1920 }, { "epoch": 0.4786409572819146, "grad_norm": 0.5853057503700256, "learning_rate": 0.00019887143975841874, "loss": 1.9758, "step": 1930 }, { "epoch": 0.48112096224192447, "grad_norm": 0.5796060562133789, "learning_rate": 0.0001988597368696886, "loss": 1.9671, "step": 1940 }, { "epoch": 0.4836009672019344, "grad_norm": 0.5971837043762207, "learning_rate": 0.0001988479739634273, "loss": 1.9011, "step": 1950 }, { "epoch": 0.48608097216194435, "grad_norm": 0.5895419716835022, "learning_rate": 0.00019883615104677608, "loss": 1.9518, "step": 1960 }, { "epoch": 0.48856097712195423, "grad_norm": 0.5794113874435425, "learning_rate": 0.00019882426812691258, "loss": 1.9927, "step": 1970 }, { "epoch": 0.4910409820819642, "grad_norm": 0.5955272316932678, "learning_rate": 0.00019881232521105089, "loss": 1.9457, "step": 1980 }, { "epoch": 0.49352098704197406, "grad_norm": 0.5644362568855286, "learning_rate": 0.00019880032230644155, "loss": 1.8852, "step": 1990 }, { "epoch": 0.496000992001984, "grad_norm": 0.5644434690475464, "learning_rate": 0.00019878825942037148, "loss": 1.9762, "step": 2000 }, { "epoch": 0.49848099696199394, "grad_norm": 0.5878923535346985, "learning_rate": 0.00019877613656016404, "loss": 1.8868, "step": 2010 }, { "epoch": 0.5009610019220039, "grad_norm": 0.5987402200698853, "learning_rate": 0.000198763953733179, "loss": 1.9813, "step": 2020 }, { "epoch": 0.5034410068820138, "grad_norm": 0.543759822845459, "learning_rate": 0.00019875171094681248, "loss": 1.9633, "step": 2030 }, { "epoch": 0.5059210118420236, "grad_norm": 0.6039268374443054, "learning_rate": 0.00019873940820849714, "loss": 1.9667, "step": 2040 }, { "epoch": 0.5084010168020336, "grad_norm": 0.5571461319923401, "learning_rate": 0.00019872704552570188, "loss": 1.9474, "step": 2050 }, { "epoch": 0.5108810217620435, "grad_norm": 0.5795441269874573, "learning_rate": 0.00019871462290593206, "loss": 1.9691, "step": 2060 }, { "epoch": 0.5133610267220534, "grad_norm": 0.5893127918243408, "learning_rate": 0.00019870214035672942, "loss": 1.9224, "step": 2070 }, { "epoch": 0.5158410316820634, "grad_norm": 0.6003038883209229, "learning_rate": 0.00019868959788567212, "loss": 1.9734, "step": 2080 }, { "epoch": 0.5183210366420733, "grad_norm": 0.6020908355712891, "learning_rate": 0.00019867699550037466, "loss": 1.963, "step": 2090 }, { "epoch": 0.5208010416020832, "grad_norm": 0.5741065144538879, "learning_rate": 0.0001986643332084879, "loss": 1.9665, "step": 2100 }, { "epoch": 0.5232810465620932, "grad_norm": 0.5555444955825806, "learning_rate": 0.0001986516110176991, "loss": 1.9648, "step": 2110 }, { "epoch": 0.5257610515221031, "grad_norm": 0.563761293888092, "learning_rate": 0.00019863882893573188, "loss": 1.9516, "step": 2120 }, { "epoch": 0.528241056482113, "grad_norm": 0.6273584961891174, "learning_rate": 0.00019862598697034623, "loss": 1.9514, "step": 2130 }, { "epoch": 0.5307210614421228, "grad_norm": 0.5726107954978943, "learning_rate": 0.00019861308512933844, "loss": 1.9276, "step": 2140 }, { "epoch": 0.5332010664021328, "grad_norm": 0.5795220732688904, "learning_rate": 0.00019860012342054123, "loss": 1.9434, "step": 2150 }, { "epoch": 0.5356810713621427, "grad_norm": 0.6007318496704102, "learning_rate": 0.0001985871018518236, "loss": 1.9402, "step": 2160 }, { "epoch": 0.5381610763221526, "grad_norm": 0.6281079053878784, "learning_rate": 0.0001985740204310909, "loss": 1.9485, "step": 2170 }, { "epoch": 0.5406410812821626, "grad_norm": 0.5630472898483276, "learning_rate": 0.00019856087916628488, "loss": 1.9213, "step": 2180 }, { "epoch": 0.5431210862421725, "grad_norm": 0.5595583915710449, "learning_rate": 0.00019854767806538353, "loss": 1.9632, "step": 2190 }, { "epoch": 0.5456010912021824, "grad_norm": 0.5649421215057373, "learning_rate": 0.0001985344171364012, "loss": 1.9448, "step": 2200 }, { "epoch": 0.5480810961621924, "grad_norm": 0.6145544052124023, "learning_rate": 0.00019852109638738866, "loss": 1.9589, "step": 2210 }, { "epoch": 0.5505611011222022, "grad_norm": 0.6031001806259155, "learning_rate": 0.0001985077158264328, "loss": 1.9201, "step": 2220 }, { "epoch": 0.5530411060822121, "grad_norm": 0.6062312722206116, "learning_rate": 0.000198494275461657, "loss": 1.9224, "step": 2230 }, { "epoch": 0.5555211110422221, "grad_norm": 0.5883815288543701, "learning_rate": 0.00019848077530122083, "loss": 1.9887, "step": 2240 }, { "epoch": 0.558001116002232, "grad_norm": 0.5546337962150574, "learning_rate": 0.00019846721535332021, "loss": 1.8561, "step": 2250 }, { "epoch": 0.5604811209622419, "grad_norm": 0.5851352214813232, "learning_rate": 0.0001984535956261874, "loss": 1.9077, "step": 2260 }, { "epoch": 0.5629611259222519, "grad_norm": 0.9459104537963867, "learning_rate": 0.00019843991612809088, "loss": 1.9316, "step": 2270 }, { "epoch": 0.5654411308822618, "grad_norm": 0.5813398957252502, "learning_rate": 0.00019842617686733545, "loss": 1.9449, "step": 2280 }, { "epoch": 0.5679211358422717, "grad_norm": 0.6058304905891418, "learning_rate": 0.00019841237785226214, "loss": 1.9019, "step": 2290 }, { "epoch": 0.5704011408022817, "grad_norm": 0.5609736442565918, "learning_rate": 0.00019839851909124839, "loss": 1.9281, "step": 2300 }, { "epoch": 0.5728811457622915, "grad_norm": 0.6068705916404724, "learning_rate": 0.00019838460059270775, "loss": 1.9794, "step": 2310 }, { "epoch": 0.5753611507223014, "grad_norm": 0.6005422472953796, "learning_rate": 0.00019837062236509014, "loss": 1.9014, "step": 2320 }, { "epoch": 0.5778411556823113, "grad_norm": 0.5585853457450867, "learning_rate": 0.0001983565844168817, "loss": 1.9306, "step": 2330 }, { "epoch": 0.5803211606423213, "grad_norm": 0.5718742609024048, "learning_rate": 0.00019834248675660486, "loss": 1.9956, "step": 2340 }, { "epoch": 0.5828011656023312, "grad_norm": 0.6195574998855591, "learning_rate": 0.00019832832939281825, "loss": 1.9504, "step": 2350 }, { "epoch": 0.5852811705623411, "grad_norm": 0.5761587023735046, "learning_rate": 0.00019831411233411678, "loss": 1.9483, "step": 2360 }, { "epoch": 0.5877611755223511, "grad_norm": 0.5685802698135376, "learning_rate": 0.00019829983558913164, "loss": 1.8909, "step": 2370 }, { "epoch": 0.590241180482361, "grad_norm": 0.5675533413887024, "learning_rate": 0.0001982854991665301, "loss": 1.9896, "step": 2380 }, { "epoch": 0.5927211854423708, "grad_norm": 0.6346814036369324, "learning_rate": 0.00019827110307501592, "loss": 1.962, "step": 2390 }, { "epoch": 0.5952011904023808, "grad_norm": 0.6148980855941772, "learning_rate": 0.00019825664732332884, "loss": 1.9433, "step": 2400 }, { "epoch": 0.5976811953623907, "grad_norm": 0.547666609287262, "learning_rate": 0.00019824213192024491, "loss": 1.9084, "step": 2410 }, { "epoch": 0.6001612003224006, "grad_norm": 0.5827407240867615, "learning_rate": 0.00019822755687457647, "loss": 1.942, "step": 2420 }, { "epoch": 0.6026412052824106, "grad_norm": 0.5866121649742126, "learning_rate": 0.00019821292219517192, "loss": 1.9391, "step": 2430 }, { "epoch": 0.6051212102424205, "grad_norm": 0.5981879830360413, "learning_rate": 0.00019819822789091598, "loss": 1.8883, "step": 2440 }, { "epoch": 0.6076012152024304, "grad_norm": 0.6277055144309998, "learning_rate": 0.00019818347397072955, "loss": 1.8806, "step": 2450 }, { "epoch": 0.6100812201624404, "grad_norm": 0.5904166102409363, "learning_rate": 0.00019816866044356966, "loss": 1.9563, "step": 2460 }, { "epoch": 0.6125612251224503, "grad_norm": 0.5788779258728027, "learning_rate": 0.0001981537873184296, "loss": 1.9807, "step": 2470 }, { "epoch": 0.6150412300824601, "grad_norm": 0.6119228601455688, "learning_rate": 0.00019813885460433879, "loss": 1.942, "step": 2480 }, { "epoch": 0.6175212350424701, "grad_norm": 0.5789150595664978, "learning_rate": 0.00019812386231036287, "loss": 1.9688, "step": 2490 }, { "epoch": 0.62000124000248, "grad_norm": 0.6484764218330383, "learning_rate": 0.0001981088104456036, "loss": 1.9767, "step": 2500 }, { "epoch": 0.6224812449624899, "grad_norm": 0.5884684920310974, "learning_rate": 0.00019809369901919896, "loss": 1.9175, "step": 2510 }, { "epoch": 0.6249612499224998, "grad_norm": 0.615318238735199, "learning_rate": 0.00019807852804032305, "loss": 1.9431, "step": 2520 }, { "epoch": 0.6274412548825098, "grad_norm": 0.606164813041687, "learning_rate": 0.00019806329751818614, "loss": 1.9414, "step": 2530 }, { "epoch": 0.6299212598425197, "grad_norm": 0.6334375143051147, "learning_rate": 0.0001980480074620347, "loss": 1.913, "step": 2540 }, { "epoch": 0.6324012648025296, "grad_norm": 0.6048720479011536, "learning_rate": 0.0001980326578811512, "loss": 1.9082, "step": 2550 }, { "epoch": 0.6348812697625396, "grad_norm": 0.5949190258979797, "learning_rate": 0.00019801724878485438, "loss": 1.8712, "step": 2560 }, { "epoch": 0.6373612747225494, "grad_norm": 0.6880390048027039, "learning_rate": 0.00019800178018249912, "loss": 2.0191, "step": 2570 }, { "epoch": 0.6398412796825593, "grad_norm": 0.6032581329345703, "learning_rate": 0.00019798625208347626, "loss": 1.9965, "step": 2580 }, { "epoch": 0.6423212846425693, "grad_norm": 0.5974915027618408, "learning_rate": 0.00019797066449721295, "loss": 1.9477, "step": 2590 }, { "epoch": 0.6448012896025792, "grad_norm": 0.5914649367332458, "learning_rate": 0.0001979550174331724, "loss": 1.8974, "step": 2600 }, { "epoch": 0.6472812945625891, "grad_norm": 0.6158638000488281, "learning_rate": 0.0001979393109008538, "loss": 1.9837, "step": 2610 }, { "epoch": 0.6497612995225991, "grad_norm": 0.6009888052940369, "learning_rate": 0.0001979235449097927, "loss": 1.936, "step": 2620 }, { "epoch": 0.652241304482609, "grad_norm": 0.6107520461082458, "learning_rate": 0.00019790771946956052, "loss": 1.9553, "step": 2630 }, { "epoch": 0.6547213094426189, "grad_norm": 0.6255472898483276, "learning_rate": 0.00019789183458976484, "loss": 1.934, "step": 2640 }, { "epoch": 0.6572013144026289, "grad_norm": 0.6186276078224182, "learning_rate": 0.00019787589028004936, "loss": 1.952, "step": 2650 }, { "epoch": 0.6596813193626387, "grad_norm": 0.5936594605445862, "learning_rate": 0.00019785988655009385, "loss": 1.9244, "step": 2660 }, { "epoch": 0.6621613243226486, "grad_norm": 0.6173788905143738, "learning_rate": 0.0001978438234096141, "loss": 1.9429, "step": 2670 }, { "epoch": 0.6646413292826585, "grad_norm": 0.5978009104728699, "learning_rate": 0.000197827700868362, "loss": 1.9723, "step": 2680 }, { "epoch": 0.6671213342426685, "grad_norm": 0.6397370100021362, "learning_rate": 0.00019781151893612556, "loss": 1.9479, "step": 2690 }, { "epoch": 0.6696013392026784, "grad_norm": 0.5891425013542175, "learning_rate": 0.00019779527762272877, "loss": 1.9741, "step": 2700 }, { "epoch": 0.6720813441626883, "grad_norm": 0.5772106051445007, "learning_rate": 0.0001977789769380317, "loss": 1.9108, "step": 2710 }, { "epoch": 0.6745613491226983, "grad_norm": 0.6096621155738831, "learning_rate": 0.00019776261689193048, "loss": 1.9042, "step": 2720 }, { "epoch": 0.6770413540827082, "grad_norm": 0.5689034461975098, "learning_rate": 0.0001977461974943572, "loss": 1.9529, "step": 2730 }, { "epoch": 0.679521359042718, "grad_norm": 0.7213597893714905, "learning_rate": 0.00019772971875528007, "loss": 1.9048, "step": 2740 }, { "epoch": 0.682001364002728, "grad_norm": 0.5970284938812256, "learning_rate": 0.00019771318068470333, "loss": 1.8852, "step": 2750 }, { "epoch": 0.6844813689627379, "grad_norm": 0.6357975006103516, "learning_rate": 0.00019769658329266718, "loss": 1.947, "step": 2760 }, { "epoch": 0.6869613739227478, "grad_norm": 0.615311324596405, "learning_rate": 0.00019767992658924785, "loss": 1.9314, "step": 2770 }, { "epoch": 0.6894413788827578, "grad_norm": 0.5649407505989075, "learning_rate": 0.0001976632105845576, "loss": 1.877, "step": 2780 }, { "epoch": 0.6919213838427677, "grad_norm": 0.597957968711853, "learning_rate": 0.0001976464352887447, "loss": 1.904, "step": 2790 }, { "epoch": 0.6944013888027776, "grad_norm": 0.6019652485847473, "learning_rate": 0.00019762960071199333, "loss": 1.9559, "step": 2800 }, { "epoch": 0.6968813937627876, "grad_norm": 0.5853824019432068, "learning_rate": 0.0001976127068645238, "loss": 1.8575, "step": 2810 }, { "epoch": 0.6993613987227975, "grad_norm": 0.6015722751617432, "learning_rate": 0.0001975957537565923, "loss": 1.9112, "step": 2820 }, { "epoch": 0.7018414036828073, "grad_norm": 0.5531386733055115, "learning_rate": 0.00019757874139849103, "loss": 1.8914, "step": 2830 }, { "epoch": 0.7043214086428173, "grad_norm": 0.6410971283912659, "learning_rate": 0.00019756166980054813, "loss": 1.9788, "step": 2840 }, { "epoch": 0.7068014136028272, "grad_norm": 0.6276296377182007, "learning_rate": 0.00019754453897312776, "loss": 1.928, "step": 2850 }, { "epoch": 0.7092814185628371, "grad_norm": 0.6744928956031799, "learning_rate": 0.00019752734892663, "loss": 1.9618, "step": 2860 }, { "epoch": 0.711761423522847, "grad_norm": 0.6264201998710632, "learning_rate": 0.00019751009967149087, "loss": 1.9456, "step": 2870 }, { "epoch": 0.714241428482857, "grad_norm": 0.6102432012557983, "learning_rate": 0.00019749279121818235, "loss": 1.9352, "step": 2880 }, { "epoch": 0.7167214334428669, "grad_norm": 0.6014752388000488, "learning_rate": 0.00019747542357721244, "loss": 1.9279, "step": 2890 }, { "epoch": 0.7192014384028768, "grad_norm": 0.6810415983200073, "learning_rate": 0.0001974579967591249, "loss": 1.9062, "step": 2900 }, { "epoch": 0.7216814433628868, "grad_norm": 0.6644706726074219, "learning_rate": 0.00019744051077449958, "loss": 1.9392, "step": 2910 }, { "epoch": 0.7241614483228966, "grad_norm": 0.6256020069122314, "learning_rate": 0.00019742296563395216, "loss": 2.0146, "step": 2920 }, { "epoch": 0.7266414532829065, "grad_norm": 0.6017519235610962, "learning_rate": 0.00019740536134813422, "loss": 1.9183, "step": 2930 }, { "epoch": 0.7291214582429165, "grad_norm": 0.6205018758773804, "learning_rate": 0.00019738769792773336, "loss": 1.951, "step": 2940 }, { "epoch": 0.7316014632029264, "grad_norm": 0.6017516851425171, "learning_rate": 0.00019736997538347296, "loss": 1.9254, "step": 2950 }, { "epoch": 0.7340814681629363, "grad_norm": 0.5861731767654419, "learning_rate": 0.00019735219372611233, "loss": 1.9687, "step": 2960 }, { "epoch": 0.7365614731229463, "grad_norm": 0.6167269349098206, "learning_rate": 0.0001973343529664467, "loss": 1.9662, "step": 2970 }, { "epoch": 0.7390414780829562, "grad_norm": 0.5838399529457092, "learning_rate": 0.00019731645311530716, "loss": 1.913, "step": 2980 }, { "epoch": 0.7415214830429661, "grad_norm": 0.6024439334869385, "learning_rate": 0.0001972984941835606, "loss": 1.8726, "step": 2990 }, { "epoch": 0.744001488002976, "grad_norm": 0.6209374666213989, "learning_rate": 0.00019728047618210995, "loss": 1.9045, "step": 3000 }, { "epoch": 0.7464814929629859, "grad_norm": 0.6270463466644287, "learning_rate": 0.00019726239912189382, "loss": 1.8652, "step": 3010 }, { "epoch": 0.7489614979229958, "grad_norm": 0.5481278896331787, "learning_rate": 0.0001972442630138868, "loss": 1.9435, "step": 3020 }, { "epoch": 0.7514415028830058, "grad_norm": 0.5757542252540588, "learning_rate": 0.00019722606786909928, "loss": 1.9238, "step": 3030 }, { "epoch": 0.7539215078430157, "grad_norm": 0.6415989995002747, "learning_rate": 0.00019720781369857746, "loss": 1.9384, "step": 3040 }, { "epoch": 0.7564015128030256, "grad_norm": 0.5898995995521545, "learning_rate": 0.00019718950051340347, "loss": 1.9314, "step": 3050 }, { "epoch": 0.7588815177630355, "grad_norm": 0.5926699042320251, "learning_rate": 0.0001971711283246951, "loss": 1.9161, "step": 3060 }, { "epoch": 0.7613615227230455, "grad_norm": 0.624092698097229, "learning_rate": 0.0001971526971436062, "loss": 1.9285, "step": 3070 }, { "epoch": 0.7638415276830554, "grad_norm": 0.606102705001831, "learning_rate": 0.00019713420698132614, "loss": 1.9185, "step": 3080 }, { "epoch": 0.7663215326430652, "grad_norm": 0.6747568845748901, "learning_rate": 0.0001971156578490804, "loss": 1.9096, "step": 3090 }, { "epoch": 0.7688015376030752, "grad_norm": 0.6271367073059082, "learning_rate": 0.00019709704975813007, "loss": 1.9465, "step": 3100 }, { "epoch": 0.7712815425630851, "grad_norm": 0.6246865391731262, "learning_rate": 0.00019707838271977205, "loss": 1.8818, "step": 3110 }, { "epoch": 0.773761547523095, "grad_norm": 0.6215932369232178, "learning_rate": 0.0001970596567453391, "loss": 1.9701, "step": 3120 }, { "epoch": 0.776241552483105, "grad_norm": 0.6191825866699219, "learning_rate": 0.0001970408718461997, "loss": 1.9358, "step": 3130 }, { "epoch": 0.7787215574431149, "grad_norm": 0.6205334663391113, "learning_rate": 0.0001970220280337581, "loss": 1.8874, "step": 3140 }, { "epoch": 0.7812015624031248, "grad_norm": 0.5906861424446106, "learning_rate": 0.00019700312531945442, "loss": 1.8725, "step": 3150 }, { "epoch": 0.7836815673631348, "grad_norm": 0.6469618082046509, "learning_rate": 0.00019698416371476433, "loss": 1.9795, "step": 3160 }, { "epoch": 0.7861615723231447, "grad_norm": 0.6400570273399353, "learning_rate": 0.0001969651432311995, "loss": 1.9573, "step": 3170 }, { "epoch": 0.7886415772831545, "grad_norm": 0.56891930103302, "learning_rate": 0.00019694606388030714, "loss": 1.9016, "step": 3180 }, { "epoch": 0.7911215822431645, "grad_norm": 0.6157445907592773, "learning_rate": 0.00019692692567367033, "loss": 1.866, "step": 3190 }, { "epoch": 0.7936015872031744, "grad_norm": 0.5788578391075134, "learning_rate": 0.0001969077286229078, "loss": 1.8527, "step": 3200 }, { "epoch": 0.7960815921631843, "grad_norm": 0.59492427110672, "learning_rate": 0.00019688847273967408, "loss": 1.885, "step": 3210 }, { "epoch": 0.7985615971231943, "grad_norm": 0.6037836074829102, "learning_rate": 0.00019686915803565934, "loss": 1.9793, "step": 3220 }, { "epoch": 0.8010416020832042, "grad_norm": 0.6103031039237976, "learning_rate": 0.0001968497845225895, "loss": 1.8906, "step": 3230 }, { "epoch": 0.8035216070432141, "grad_norm": 0.5790871381759644, "learning_rate": 0.00019683035221222618, "loss": 1.9323, "step": 3240 }, { "epoch": 0.806001612003224, "grad_norm": 0.6135836243629456, "learning_rate": 0.00019681086111636667, "loss": 1.9197, "step": 3250 }, { "epoch": 0.808481616963234, "grad_norm": 0.5828390717506409, "learning_rate": 0.00019679131124684403, "loss": 1.9198, "step": 3260 }, { "epoch": 0.8109616219232438, "grad_norm": 0.6304100751876831, "learning_rate": 0.0001967717026155269, "loss": 1.9025, "step": 3270 }, { "epoch": 0.8134416268832537, "grad_norm": 0.5930795073509216, "learning_rate": 0.00019675203523431966, "loss": 1.9309, "step": 3280 }, { "epoch": 0.8159216318432637, "grad_norm": 0.6989979147911072, "learning_rate": 0.00019673230911516226, "loss": 1.956, "step": 3290 }, { "epoch": 0.8184016368032736, "grad_norm": 0.7460166811943054, "learning_rate": 0.0001967125242700305, "loss": 1.9574, "step": 3300 }, { "epoch": 0.8208816417632835, "grad_norm": 0.6681553721427917, "learning_rate": 0.00019669268071093566, "loss": 1.9416, "step": 3310 }, { "epoch": 0.8233616467232935, "grad_norm": 0.644540548324585, "learning_rate": 0.00019667277844992475, "loss": 1.8949, "step": 3320 }, { "epoch": 0.8258416516833034, "grad_norm": 0.5839378833770752, "learning_rate": 0.00019665281749908033, "loss": 1.9243, "step": 3330 }, { "epoch": 0.8283216566433133, "grad_norm": 0.6076035499572754, "learning_rate": 0.00019663279787052068, "loss": 1.92, "step": 3340 }, { "epoch": 0.8308016616033232, "grad_norm": 0.6083244681358337, "learning_rate": 0.0001966127195763997, "loss": 1.9054, "step": 3350 }, { "epoch": 0.8332816665633331, "grad_norm": 0.6293017268180847, "learning_rate": 0.00019659258262890683, "loss": 1.9715, "step": 3360 }, { "epoch": 0.835761671523343, "grad_norm": 0.6333664059638977, "learning_rate": 0.00019657238704026721, "loss": 1.9388, "step": 3370 }, { "epoch": 0.838241676483353, "grad_norm": 0.6385533809661865, "learning_rate": 0.00019655213282274153, "loss": 1.8607, "step": 3380 }, { "epoch": 0.8407216814433629, "grad_norm": 0.5880801677703857, "learning_rate": 0.00019653181998862605, "loss": 1.9777, "step": 3390 }, { "epoch": 0.8432016864033728, "grad_norm": 0.610713541507721, "learning_rate": 0.00019651144855025265, "loss": 1.9246, "step": 3400 }, { "epoch": 0.8456816913633828, "grad_norm": 0.6361165046691895, "learning_rate": 0.00019649101851998885, "loss": 1.9277, "step": 3410 }, { "epoch": 0.8481616963233927, "grad_norm": 0.5994930863380432, "learning_rate": 0.0001964705299102376, "loss": 1.8723, "step": 3420 }, { "epoch": 0.8506417012834026, "grad_norm": 0.5819071531295776, "learning_rate": 0.00019644998273343753, "loss": 1.8956, "step": 3430 }, { "epoch": 0.8531217062434124, "grad_norm": 0.6169791221618652, "learning_rate": 0.0001964293770020628, "loss": 1.9188, "step": 3440 }, { "epoch": 0.8556017112034224, "grad_norm": 0.9334343671798706, "learning_rate": 0.00019640871272862302, "loss": 1.9583, "step": 3450 }, { "epoch": 0.8580817161634323, "grad_norm": 0.6008388996124268, "learning_rate": 0.00019638798992566354, "loss": 1.9483, "step": 3460 }, { "epoch": 0.8605617211234422, "grad_norm": 0.642737627029419, "learning_rate": 0.00019636720860576507, "loss": 1.9526, "step": 3470 }, { "epoch": 0.8630417260834522, "grad_norm": 0.6243256330490112, "learning_rate": 0.0001963463687815439, "loss": 1.923, "step": 3480 }, { "epoch": 0.8655217310434621, "grad_norm": 0.5904022455215454, "learning_rate": 0.00019632547046565186, "loss": 1.8666, "step": 3490 }, { "epoch": 0.868001736003472, "grad_norm": 0.6636185646057129, "learning_rate": 0.00019630451367077628, "loss": 1.8892, "step": 3500 }, { "epoch": 0.870481740963482, "grad_norm": 0.5875678062438965, "learning_rate": 0.00019628349840963995, "loss": 1.9218, "step": 3510 }, { "epoch": 0.8729617459234918, "grad_norm": 0.6118807792663574, "learning_rate": 0.0001962624246950012, "loss": 1.8806, "step": 3520 }, { "epoch": 0.8754417508835017, "grad_norm": 0.6047744750976562, "learning_rate": 0.00019624129253965387, "loss": 1.949, "step": 3530 }, { "epoch": 0.8779217558435117, "grad_norm": 0.6061970591545105, "learning_rate": 0.0001962201019564272, "loss": 1.9103, "step": 3540 }, { "epoch": 0.8804017608035216, "grad_norm": 0.6186763048171997, "learning_rate": 0.000196198852958186, "loss": 1.9533, "step": 3550 }, { "epoch": 0.8828817657635315, "grad_norm": 0.602182924747467, "learning_rate": 0.00019617754555783043, "loss": 1.8873, "step": 3560 }, { "epoch": 0.8853617707235415, "grad_norm": 0.6514226794242859, "learning_rate": 0.0001961561797682962, "loss": 1.9642, "step": 3570 }, { "epoch": 0.8878417756835514, "grad_norm": 0.6289548277854919, "learning_rate": 0.00019613475560255442, "loss": 1.8861, "step": 3580 }, { "epoch": 0.8903217806435613, "grad_norm": 0.6488407850265503, "learning_rate": 0.0001961132730736117, "loss": 1.8255, "step": 3590 }, { "epoch": 0.8928017856035712, "grad_norm": 0.6131303906440735, "learning_rate": 0.00019609173219450998, "loss": 1.905, "step": 3600 }, { "epoch": 0.8952817905635811, "grad_norm": 0.6090990304946899, "learning_rate": 0.00019607013297832669, "loss": 1.8714, "step": 3610 }, { "epoch": 0.897761795523591, "grad_norm": 0.6134405732154846, "learning_rate": 0.00019604847543817466, "loss": 1.9458, "step": 3620 }, { "epoch": 0.9002418004836009, "grad_norm": 0.6179640889167786, "learning_rate": 0.00019602675958720222, "loss": 1.9406, "step": 3630 }, { "epoch": 0.9027218054436109, "grad_norm": 0.6475931406021118, "learning_rate": 0.0001960049854385929, "loss": 1.9175, "step": 3640 }, { "epoch": 0.9052018104036208, "grad_norm": 0.6346822381019592, "learning_rate": 0.00019598315300556576, "loss": 1.859, "step": 3650 }, { "epoch": 0.9076818153636307, "grad_norm": 0.6485849618911743, "learning_rate": 0.0001959612623013753, "loss": 1.9199, "step": 3660 }, { "epoch": 0.9101618203236407, "grad_norm": 0.6322528719902039, "learning_rate": 0.00019593931333931127, "loss": 1.9031, "step": 3670 }, { "epoch": 0.9126418252836506, "grad_norm": 0.5780066251754761, "learning_rate": 0.0001959173061326988, "loss": 1.9051, "step": 3680 }, { "epoch": 0.9151218302436605, "grad_norm": 0.5900623202323914, "learning_rate": 0.0001958952406948985, "loss": 1.8254, "step": 3690 }, { "epoch": 0.9176018352036704, "grad_norm": 0.7210493087768555, "learning_rate": 0.00019587311703930614, "loss": 1.891, "step": 3700 }, { "epoch": 0.9200818401636803, "grad_norm": 0.5721245408058167, "learning_rate": 0.00019585093517935305, "loss": 1.8755, "step": 3710 }, { "epoch": 0.9225618451236902, "grad_norm": 0.5767670273780823, "learning_rate": 0.00019582869512850575, "loss": 1.9101, "step": 3720 }, { "epoch": 0.9250418500837002, "grad_norm": 0.591179370880127, "learning_rate": 0.0001958063969002661, "loss": 1.903, "step": 3730 }, { "epoch": 0.9275218550437101, "grad_norm": 0.6186399459838867, "learning_rate": 0.00019578404050817134, "loss": 1.9523, "step": 3740 }, { "epoch": 0.93000186000372, "grad_norm": 0.5627548694610596, "learning_rate": 0.00019576162596579398, "loss": 1.9311, "step": 3750 }, { "epoch": 0.93248186496373, "grad_norm": 0.6412395238876343, "learning_rate": 0.0001957391532867418, "loss": 1.9825, "step": 3760 }, { "epoch": 0.9349618699237399, "grad_norm": 0.6057267189025879, "learning_rate": 0.000195716622484658, "loss": 1.9442, "step": 3770 }, { "epoch": 0.9374418748837497, "grad_norm": 0.6053009629249573, "learning_rate": 0.0001956940335732209, "loss": 1.9398, "step": 3780 }, { "epoch": 0.9399218798437596, "grad_norm": 0.6406477093696594, "learning_rate": 0.0001956713865661442, "loss": 1.8631, "step": 3790 }, { "epoch": 0.9424018848037696, "grad_norm": 0.6801642775535583, "learning_rate": 0.0001956486814771769, "loss": 1.9436, "step": 3800 }, { "epoch": 0.9448818897637795, "grad_norm": 0.7259861826896667, "learning_rate": 0.00019562591832010312, "loss": 1.854, "step": 3810 }, { "epoch": 0.9473618947237894, "grad_norm": 0.6290436387062073, "learning_rate": 0.00019560309710874238, "loss": 1.8815, "step": 3820 }, { "epoch": 0.9498418996837994, "grad_norm": 0.6102296710014343, "learning_rate": 0.0001955802178569494, "loss": 1.9118, "step": 3830 }, { "epoch": 0.9523219046438093, "grad_norm": 0.6027212738990784, "learning_rate": 0.0001955572805786141, "loss": 1.9514, "step": 3840 }, { "epoch": 0.9548019096038192, "grad_norm": 1.153975486755371, "learning_rate": 0.00019553428528766163, "loss": 1.8342, "step": 3850 }, { "epoch": 0.9572819145638292, "grad_norm": 0.6031201481819153, "learning_rate": 0.0001955112319980524, "loss": 1.9253, "step": 3860 }, { "epoch": 0.959761919523839, "grad_norm": 0.6483965516090393, "learning_rate": 0.00019548812072378205, "loss": 1.8936, "step": 3870 }, { "epoch": 0.9622419244838489, "grad_norm": 0.6587181687355042, "learning_rate": 0.00019546495147888132, "loss": 1.9135, "step": 3880 }, { "epoch": 0.9647219294438589, "grad_norm": 0.5917319655418396, "learning_rate": 0.00019544172427741628, "loss": 1.8195, "step": 3890 }, { "epoch": 0.9672019344038688, "grad_norm": 0.6090488433837891, "learning_rate": 0.00019541843913348805, "loss": 1.8985, "step": 3900 }, { "epoch": 0.9696819393638787, "grad_norm": 0.5770931243896484, "learning_rate": 0.000195395096061233, "loss": 1.8363, "step": 3910 }, { "epoch": 0.9721619443238887, "grad_norm": 0.6690269112586975, "learning_rate": 0.0001953716950748227, "loss": 1.9192, "step": 3920 }, { "epoch": 0.9746419492838986, "grad_norm": 0.6674990653991699, "learning_rate": 0.0001953482361884638, "loss": 1.914, "step": 3930 }, { "epoch": 0.9771219542439085, "grad_norm": 0.6217818856239319, "learning_rate": 0.00019532471941639815, "loss": 1.9067, "step": 3940 }, { "epoch": 0.9796019592039185, "grad_norm": 0.569581151008606, "learning_rate": 0.00019530114477290272, "loss": 1.9381, "step": 3950 }, { "epoch": 0.9820819641639283, "grad_norm": 0.6703535318374634, "learning_rate": 0.00019527751227228963, "loss": 1.9191, "step": 3960 }, { "epoch": 0.9845619691239382, "grad_norm": 0.60328209400177, "learning_rate": 0.00019525382192890614, "loss": 1.9204, "step": 3970 }, { "epoch": 0.9870419740839481, "grad_norm": 0.6037915349006653, "learning_rate": 0.0001952300737571346, "loss": 1.8657, "step": 3980 }, { "epoch": 0.9895219790439581, "grad_norm": 0.6142215728759766, "learning_rate": 0.00019520626777139245, "loss": 1.891, "step": 3990 }, { "epoch": 0.992001984003968, "grad_norm": 0.6303610801696777, "learning_rate": 0.00019518240398613227, "loss": 1.9321, "step": 4000 }, { "epoch": 0.9944819889639779, "grad_norm": 0.6166995763778687, "learning_rate": 0.00019515848241584167, "loss": 1.8985, "step": 4010 }, { "epoch": 0.9969619939239879, "grad_norm": 0.6104696989059448, "learning_rate": 0.00019513450307504348, "loss": 1.8478, "step": 4020 }, { "epoch": 0.9994419988839978, "grad_norm": 0.6207946538925171, "learning_rate": 0.0001951104659782954, "loss": 1.9168, "step": 4030 }, { "epoch": 1.0019220038440078, "grad_norm": 0.6029482483863831, "learning_rate": 0.00019508637114019038, "loss": 1.7681, "step": 4040 }, { "epoch": 1.0044020088040175, "grad_norm": 0.6340354084968567, "learning_rate": 0.0001950622185753563, "loss": 1.816, "step": 4050 }, { "epoch": 1.0068820137640275, "grad_norm": 0.6285779476165771, "learning_rate": 0.0001950380082984561, "loss": 1.7906, "step": 4060 }, { "epoch": 1.0093620187240375, "grad_norm": 0.6094495058059692, "learning_rate": 0.0001950137403241879, "loss": 1.7904, "step": 4070 }, { "epoch": 1.0118420236840473, "grad_norm": 0.7566218376159668, "learning_rate": 0.0001949894146672846, "loss": 1.8968, "step": 4080 }, { "epoch": 1.0143220286440573, "grad_norm": 0.6424439549446106, "learning_rate": 0.00019496503134251435, "loss": 1.7873, "step": 4090 }, { "epoch": 1.0168020336040673, "grad_norm": 0.6438541412353516, "learning_rate": 0.00019494059036468016, "loss": 1.8155, "step": 4100 }, { "epoch": 1.019282038564077, "grad_norm": 0.621476948261261, "learning_rate": 0.0001949160917486201, "loss": 1.8174, "step": 4110 }, { "epoch": 1.021762043524087, "grad_norm": 0.6082315444946289, "learning_rate": 0.00019489153550920728, "loss": 1.774, "step": 4120 }, { "epoch": 1.024242048484097, "grad_norm": 0.652134895324707, "learning_rate": 0.00019486692166134964, "loss": 1.909, "step": 4130 }, { "epoch": 1.0267220534441068, "grad_norm": 0.6727526783943176, "learning_rate": 0.0001948422502199903, "loss": 1.8005, "step": 4140 }, { "epoch": 1.0292020584041168, "grad_norm": 0.6741703748703003, "learning_rate": 0.00019481752120010715, "loss": 1.8373, "step": 4150 }, { "epoch": 1.0316820633641268, "grad_norm": 0.6614888906478882, "learning_rate": 0.0001947927346167132, "loss": 1.8277, "step": 4160 }, { "epoch": 1.0341620683241366, "grad_norm": 0.6599487066268921, "learning_rate": 0.00019476789048485625, "loss": 1.8234, "step": 4170 }, { "epoch": 1.0366420732841466, "grad_norm": 0.6961712837219238, "learning_rate": 0.00019474298881961918, "loss": 1.8076, "step": 4180 }, { "epoch": 1.0391220782441566, "grad_norm": 0.6359785795211792, "learning_rate": 0.00019471802963611973, "loss": 1.8765, "step": 4190 }, { "epoch": 1.0416020832041664, "grad_norm": 0.6598185896873474, "learning_rate": 0.0001946930129495106, "loss": 1.8113, "step": 4200 }, { "epoch": 1.0440820881641764, "grad_norm": 0.6477219462394714, "learning_rate": 0.00019466793877497923, "loss": 1.7794, "step": 4210 }, { "epoch": 1.0465620931241864, "grad_norm": 0.6349904537200928, "learning_rate": 0.00019464280712774826, "loss": 1.8253, "step": 4220 }, { "epoch": 1.0490420980841961, "grad_norm": 0.7010254263877869, "learning_rate": 0.00019461761802307495, "loss": 1.8362, "step": 4230 }, { "epoch": 1.0515221030442061, "grad_norm": 0.6054363250732422, "learning_rate": 0.0001945923714762516, "loss": 1.8158, "step": 4240 }, { "epoch": 1.054002108004216, "grad_norm": 0.6621227264404297, "learning_rate": 0.00019456706750260532, "loss": 1.7927, "step": 4250 }, { "epoch": 1.056482112964226, "grad_norm": 0.656527042388916, "learning_rate": 0.0001945417061174981, "loss": 1.8297, "step": 4260 }, { "epoch": 1.058962117924236, "grad_norm": 0.6720926761627197, "learning_rate": 0.0001945162873363268, "loss": 1.8212, "step": 4270 }, { "epoch": 1.0614421228842457, "grad_norm": 0.670739471912384, "learning_rate": 0.00019449081117452302, "loss": 1.8129, "step": 4280 }, { "epoch": 1.0639221278442557, "grad_norm": 0.6356748938560486, "learning_rate": 0.00019446527764755342, "loss": 1.8215, "step": 4290 }, { "epoch": 1.0664021328042657, "grad_norm": 0.6663000583648682, "learning_rate": 0.00019443968677091927, "loss": 1.782, "step": 4300 }, { "epoch": 1.0688821377642754, "grad_norm": 0.6405335068702698, "learning_rate": 0.0001944140385601567, "loss": 1.7916, "step": 4310 }, { "epoch": 1.0713621427242854, "grad_norm": 0.6829027533531189, "learning_rate": 0.00019438833303083678, "loss": 1.8293, "step": 4320 }, { "epoch": 1.0738421476842954, "grad_norm": 0.6544651389122009, "learning_rate": 0.00019436257019856518, "loss": 1.8509, "step": 4330 }, { "epoch": 1.0763221526443052, "grad_norm": 0.6550015211105347, "learning_rate": 0.00019433675007898255, "loss": 1.8203, "step": 4340 }, { "epoch": 1.0788021576043152, "grad_norm": 0.6609222888946533, "learning_rate": 0.00019431087268776412, "loss": 1.7975, "step": 4350 }, { "epoch": 1.0812821625643252, "grad_norm": 0.6910188794136047, "learning_rate": 0.0001942849380406201, "loss": 1.8557, "step": 4360 }, { "epoch": 1.083762167524335, "grad_norm": 0.6951532959938049, "learning_rate": 0.00019425894615329536, "loss": 1.7861, "step": 4370 }, { "epoch": 1.086242172484345, "grad_norm": 0.6598848104476929, "learning_rate": 0.00019423289704156944, "loss": 1.7694, "step": 4380 }, { "epoch": 1.088722177444355, "grad_norm": 0.7436254024505615, "learning_rate": 0.00019420679072125677, "loss": 1.7986, "step": 4390 }, { "epoch": 1.0912021824043647, "grad_norm": 0.6893884539604187, "learning_rate": 0.00019418062720820637, "loss": 1.7821, "step": 4400 }, { "epoch": 1.0936821873643747, "grad_norm": 0.713741660118103, "learning_rate": 0.00019415440651830208, "loss": 1.8634, "step": 4410 }, { "epoch": 1.0961621923243847, "grad_norm": 0.700987696647644, "learning_rate": 0.00019412812866746246, "loss": 1.7938, "step": 4420 }, { "epoch": 1.0986421972843945, "grad_norm": 0.7220914363861084, "learning_rate": 0.00019410179367164071, "loss": 1.8701, "step": 4430 }, { "epoch": 1.1011222022444045, "grad_norm": 0.7219149470329285, "learning_rate": 0.00019407540154682472, "loss": 1.807, "step": 4440 }, { "epoch": 1.1036022072044145, "grad_norm": 0.7215644717216492, "learning_rate": 0.00019404895230903718, "loss": 1.7848, "step": 4450 }, { "epoch": 1.1060822121644243, "grad_norm": 0.6691747307777405, "learning_rate": 0.00019402244597433525, "loss": 1.8294, "step": 4460 }, { "epoch": 1.1085622171244343, "grad_norm": 0.6951769590377808, "learning_rate": 0.00019399588255881095, "loss": 1.7931, "step": 4470 }, { "epoch": 1.1110422220844443, "grad_norm": 0.8239799737930298, "learning_rate": 0.00019396926207859084, "loss": 1.7905, "step": 4480 }, { "epoch": 1.113522227044454, "grad_norm": 0.68569016456604, "learning_rate": 0.00019394258454983617, "loss": 1.774, "step": 4490 }, { "epoch": 1.116002232004464, "grad_norm": 0.7133092880249023, "learning_rate": 0.0001939158499887428, "loss": 1.8231, "step": 4500 }, { "epoch": 1.118482236964474, "grad_norm": 0.7494200468063354, "learning_rate": 0.0001938890584115412, "loss": 1.7985, "step": 4510 }, { "epoch": 1.1209622419244838, "grad_norm": 0.6811527609825134, "learning_rate": 0.00019386220983449653, "loss": 1.8308, "step": 4520 }, { "epoch": 1.1234422468844938, "grad_norm": 0.7043130993843079, "learning_rate": 0.00019383530427390845, "loss": 1.7974, "step": 4530 }, { "epoch": 1.1259222518445038, "grad_norm": 0.6619195938110352, "learning_rate": 0.00019380834174611132, "loss": 1.7768, "step": 4540 }, { "epoch": 1.1284022568045136, "grad_norm": 0.6526765823364258, "learning_rate": 0.00019378132226747398, "loss": 1.7887, "step": 4550 }, { "epoch": 1.1308822617645236, "grad_norm": 0.6597820520401001, "learning_rate": 0.00019375424585439994, "loss": 1.7811, "step": 4560 }, { "epoch": 1.1333622667245336, "grad_norm": 0.6860714554786682, "learning_rate": 0.00019372711252332717, "loss": 1.8105, "step": 4570 }, { "epoch": 1.1358422716845433, "grad_norm": 0.72899329662323, "learning_rate": 0.00019369992229072836, "loss": 1.8321, "step": 4580 }, { "epoch": 1.1383222766445533, "grad_norm": 0.6844159960746765, "learning_rate": 0.00019367267517311057, "loss": 1.7651, "step": 4590 }, { "epoch": 1.140802281604563, "grad_norm": 0.6703256964683533, "learning_rate": 0.00019364537118701542, "loss": 1.7685, "step": 4600 }, { "epoch": 1.143282286564573, "grad_norm": 0.6494504809379578, "learning_rate": 0.00019361801034901922, "loss": 1.8628, "step": 4610 }, { "epoch": 1.145762291524583, "grad_norm": 0.6616336107254028, "learning_rate": 0.0001935905926757326, "loss": 1.7785, "step": 4620 }, { "epoch": 1.148242296484593, "grad_norm": 0.6509258151054382, "learning_rate": 0.00019356311818380075, "loss": 1.8705, "step": 4630 }, { "epoch": 1.1507223014446029, "grad_norm": 0.711273193359375, "learning_rate": 0.0001935355868899034, "loss": 1.8168, "step": 4640 }, { "epoch": 1.1532023064046129, "grad_norm": 0.6760425567626953, "learning_rate": 0.0001935079988107548, "loss": 1.8339, "step": 4650 }, { "epoch": 1.1556823113646226, "grad_norm": 0.6510275602340698, "learning_rate": 0.0001934803539631035, "loss": 1.8179, "step": 4660 }, { "epoch": 1.1581623163246326, "grad_norm": 0.6667962074279785, "learning_rate": 0.00019345265236373274, "loss": 1.8456, "step": 4670 }, { "epoch": 1.1606423212846426, "grad_norm": 0.6934930086135864, "learning_rate": 0.00019342489402945998, "loss": 1.8093, "step": 4680 }, { "epoch": 1.1631223262446524, "grad_norm": 0.6169160604476929, "learning_rate": 0.00019339707897713738, "loss": 1.793, "step": 4690 }, { "epoch": 1.1656023312046624, "grad_norm": 0.6679814457893372, "learning_rate": 0.00019336920722365125, "loss": 1.8465, "step": 4700 }, { "epoch": 1.1680823361646724, "grad_norm": 0.7247648239135742, "learning_rate": 0.0001933412787859226, "loss": 1.8768, "step": 4710 }, { "epoch": 1.1705623411246822, "grad_norm": 0.7149258255958557, "learning_rate": 0.00019331329368090666, "loss": 1.8649, "step": 4720 }, { "epoch": 1.1730423460846922, "grad_norm": 0.6846023797988892, "learning_rate": 0.00019328525192559312, "loss": 1.8084, "step": 4730 }, { "epoch": 1.1755223510447022, "grad_norm": 0.6747673153877258, "learning_rate": 0.00019325715353700612, "loss": 1.8186, "step": 4740 }, { "epoch": 1.178002356004712, "grad_norm": 0.6865365505218506, "learning_rate": 0.0001932289985322041, "loss": 1.8744, "step": 4750 }, { "epoch": 1.180482360964722, "grad_norm": 0.7503153085708618, "learning_rate": 0.00019320078692827987, "loss": 1.8562, "step": 4760 }, { "epoch": 1.182962365924732, "grad_norm": 0.6764112114906311, "learning_rate": 0.00019317251874236067, "loss": 1.8092, "step": 4770 }, { "epoch": 1.1854423708847417, "grad_norm": 0.6778523921966553, "learning_rate": 0.00019314419399160803, "loss": 1.847, "step": 4780 }, { "epoch": 1.1879223758447517, "grad_norm": 0.6746340394020081, "learning_rate": 0.0001931158126932179, "loss": 1.7979, "step": 4790 }, { "epoch": 1.1904023808047617, "grad_norm": 0.7232058644294739, "learning_rate": 0.00019308737486442045, "loss": 1.875, "step": 4800 }, { "epoch": 1.1928823857647715, "grad_norm": 0.7230646014213562, "learning_rate": 0.00019305888052248023, "loss": 1.8013, "step": 4810 }, { "epoch": 1.1953623907247815, "grad_norm": 0.6311892867088318, "learning_rate": 0.0001930303296846961, "loss": 1.8306, "step": 4820 }, { "epoch": 1.1978423956847914, "grad_norm": 0.70650315284729, "learning_rate": 0.00019300172236840122, "loss": 1.8035, "step": 4830 }, { "epoch": 1.2003224006448012, "grad_norm": 0.6846140623092651, "learning_rate": 0.00019297305859096304, "loss": 1.8046, "step": 4840 }, { "epoch": 1.2028024056048112, "grad_norm": 0.6740015149116516, "learning_rate": 0.00019294433836978322, "loss": 1.8784, "step": 4850 }, { "epoch": 1.2052824105648212, "grad_norm": 0.7008079886436462, "learning_rate": 0.00019291556172229785, "loss": 1.8449, "step": 4860 }, { "epoch": 1.207762415524831, "grad_norm": 0.6753029823303223, "learning_rate": 0.00019288672866597705, "loss": 1.7802, "step": 4870 }, { "epoch": 1.210242420484841, "grad_norm": 0.7091749310493469, "learning_rate": 0.00019285783921832537, "loss": 1.8042, "step": 4880 }, { "epoch": 1.212722425444851, "grad_norm": 0.7150561809539795, "learning_rate": 0.00019282889339688152, "loss": 1.9043, "step": 4890 }, { "epoch": 1.2152024304048608, "grad_norm": 0.8080011606216431, "learning_rate": 0.00019279989121921847, "loss": 1.8447, "step": 4900 }, { "epoch": 1.2176824353648708, "grad_norm": 0.705141544342041, "learning_rate": 0.00019277083270294334, "loss": 1.7947, "step": 4910 }, { "epoch": 1.2201624403248807, "grad_norm": 0.6963218450546265, "learning_rate": 0.0001927417178656975, "loss": 1.7765, "step": 4920 }, { "epoch": 1.2226424452848905, "grad_norm": 0.6910169124603271, "learning_rate": 0.00019271254672515654, "loss": 1.7994, "step": 4930 }, { "epoch": 1.2251224502449005, "grad_norm": 0.6857121586799622, "learning_rate": 0.00019268331929903014, "loss": 1.8188, "step": 4940 }, { "epoch": 1.2276024552049103, "grad_norm": 0.6874817609786987, "learning_rate": 0.00019265403560506222, "loss": 1.8254, "step": 4950 }, { "epoch": 1.2300824601649203, "grad_norm": 0.7109423875808716, "learning_rate": 0.0001926246956610309, "loss": 1.7868, "step": 4960 }, { "epoch": 1.2325624651249303, "grad_norm": 0.6798704266548157, "learning_rate": 0.00019259529948474833, "loss": 1.813, "step": 4970 }, { "epoch": 1.2350424700849403, "grad_norm": 0.7131743431091309, "learning_rate": 0.00019256584709406092, "loss": 1.7819, "step": 4980 }, { "epoch": 1.23752247504495, "grad_norm": 0.7012653946876526, "learning_rate": 0.00019253633850684909, "loss": 1.7884, "step": 4990 }, { "epoch": 1.24000248000496, "grad_norm": 0.6879581809043884, "learning_rate": 0.0001925067737410275, "loss": 1.8382, "step": 5000 }, { "epoch": 1.2424824849649698, "grad_norm": 0.6491703987121582, "learning_rate": 0.00019247715281454483, "loss": 1.8413, "step": 5010 }, { "epoch": 1.2449624899249798, "grad_norm": 0.6270185112953186, "learning_rate": 0.00019244747574538384, "loss": 1.7905, "step": 5020 }, { "epoch": 1.2474424948849898, "grad_norm": 0.6901357769966125, "learning_rate": 0.00019241774255156148, "loss": 1.7767, "step": 5030 }, { "epoch": 1.2499224998449998, "grad_norm": 0.6535638570785522, "learning_rate": 0.0001923879532511287, "loss": 1.7687, "step": 5040 }, { "epoch": 1.2524025048050096, "grad_norm": 0.7104007005691528, "learning_rate": 0.00019235810786217048, "loss": 1.7414, "step": 5050 }, { "epoch": 1.2548825097650196, "grad_norm": 0.7325509190559387, "learning_rate": 0.0001923282064028059, "loss": 1.8172, "step": 5060 }, { "epoch": 1.2573625147250294, "grad_norm": 0.6885420083999634, "learning_rate": 0.0001922982488911881, "loss": 1.7878, "step": 5070 }, { "epoch": 1.2598425196850394, "grad_norm": 0.7148260474205017, "learning_rate": 0.00019226823534550418, "loss": 1.8057, "step": 5080 }, { "epoch": 1.2623225246450493, "grad_norm": 0.6735394597053528, "learning_rate": 0.00019223816578397535, "loss": 1.8795, "step": 5090 }, { "epoch": 1.2648025296050593, "grad_norm": 0.7113747000694275, "learning_rate": 0.00019220804022485673, "loss": 1.8209, "step": 5100 }, { "epoch": 1.2672825345650691, "grad_norm": 0.667385458946228, "learning_rate": 0.00019217785868643747, "loss": 1.8239, "step": 5110 }, { "epoch": 1.2697625395250791, "grad_norm": 0.7234987020492554, "learning_rate": 0.00019214762118704076, "loss": 1.779, "step": 5120 }, { "epoch": 1.2722425444850889, "grad_norm": 0.6842116713523865, "learning_rate": 0.00019211732774502372, "loss": 1.7783, "step": 5130 }, { "epoch": 1.2747225494450989, "grad_norm": 0.7074073553085327, "learning_rate": 0.0001920869783787774, "loss": 1.7763, "step": 5140 }, { "epoch": 1.2772025544051089, "grad_norm": 0.6838715076446533, "learning_rate": 0.00019205657310672686, "loss": 1.7461, "step": 5150 }, { "epoch": 1.2796825593651187, "grad_norm": 0.6861848831176758, "learning_rate": 0.00019202611194733108, "loss": 1.7965, "step": 5160 }, { "epoch": 1.2821625643251287, "grad_norm": 0.7556761503219604, "learning_rate": 0.0001919955949190829, "loss": 1.8114, "step": 5170 }, { "epoch": 1.2846425692851386, "grad_norm": 0.6757796406745911, "learning_rate": 0.00019196502204050922, "loss": 1.8087, "step": 5180 }, { "epoch": 1.2871225742451484, "grad_norm": 0.7062699794769287, "learning_rate": 0.00019193439333017076, "loss": 1.8071, "step": 5190 }, { "epoch": 1.2896025792051584, "grad_norm": 0.6406750679016113, "learning_rate": 0.00019190370880666207, "loss": 1.8025, "step": 5200 }, { "epoch": 1.2920825841651684, "grad_norm": 0.6705310940742493, "learning_rate": 0.00019187296848861176, "loss": 1.8674, "step": 5210 }, { "epoch": 1.2945625891251782, "grad_norm": 0.6770936846733093, "learning_rate": 0.00019184217239468212, "loss": 1.8778, "step": 5220 }, { "epoch": 1.2970425940851882, "grad_norm": 0.7093015313148499, "learning_rate": 0.00019181132054356943, "loss": 1.8537, "step": 5230 }, { "epoch": 1.299522599045198, "grad_norm": 0.6702137589454651, "learning_rate": 0.00019178041295400382, "loss": 1.8195, "step": 5240 }, { "epoch": 1.302002604005208, "grad_norm": 0.7661167979240417, "learning_rate": 0.00019174944964474912, "loss": 1.7991, "step": 5250 }, { "epoch": 1.304482608965218, "grad_norm": 0.6782104969024658, "learning_rate": 0.0001917184306346032, "loss": 1.8504, "step": 5260 }, { "epoch": 1.306962613925228, "grad_norm": 0.727779746055603, "learning_rate": 0.00019168735594239758, "loss": 1.8796, "step": 5270 }, { "epoch": 1.3094426188852377, "grad_norm": 0.665866494178772, "learning_rate": 0.00019165622558699763, "loss": 1.8183, "step": 5280 }, { "epoch": 1.3119226238452477, "grad_norm": 0.6575871706008911, "learning_rate": 0.00019162503958730255, "loss": 1.7832, "step": 5290 }, { "epoch": 1.3144026288052575, "grad_norm": 0.6850647926330566, "learning_rate": 0.00019159379796224524, "loss": 1.8544, "step": 5300 }, { "epoch": 1.3168826337652675, "grad_norm": 0.6749188303947449, "learning_rate": 0.0001915625007307925, "loss": 1.805, "step": 5310 }, { "epoch": 1.3193626387252775, "grad_norm": 0.6634512543678284, "learning_rate": 0.00019153114791194473, "loss": 1.8462, "step": 5320 }, { "epoch": 1.3218426436852875, "grad_norm": 0.6670765280723572, "learning_rate": 0.0001914997395247362, "loss": 1.8249, "step": 5330 }, { "epoch": 1.3243226486452973, "grad_norm": 0.6884291768074036, "learning_rate": 0.00019146827558823488, "loss": 1.8354, "step": 5340 }, { "epoch": 1.3268026536053072, "grad_norm": 0.7221989035606384, "learning_rate": 0.00019143675612154245, "loss": 1.7939, "step": 5350 }, { "epoch": 1.329282658565317, "grad_norm": 0.6698116660118103, "learning_rate": 0.00019140518114379434, "loss": 1.8188, "step": 5360 }, { "epoch": 1.331762663525327, "grad_norm": 0.7133959531784058, "learning_rate": 0.00019137355067415956, "loss": 1.8009, "step": 5370 }, { "epoch": 1.334242668485337, "grad_norm": 0.6672759056091309, "learning_rate": 0.000191341864731841, "loss": 1.7672, "step": 5380 }, { "epoch": 1.336722673445347, "grad_norm": 0.6286668181419373, "learning_rate": 0.00019131012333607506, "loss": 1.8311, "step": 5390 }, { "epoch": 1.3392026784053568, "grad_norm": 0.6913756728172302, "learning_rate": 0.00019127832650613189, "loss": 1.8319, "step": 5400 }, { "epoch": 1.3416826833653668, "grad_norm": 0.7492555379867554, "learning_rate": 0.0001912464742613153, "loss": 1.8288, "step": 5410 }, { "epoch": 1.3441626883253766, "grad_norm": 0.7322856187820435, "learning_rate": 0.00019121456662096268, "loss": 1.773, "step": 5420 }, { "epoch": 1.3466426932853865, "grad_norm": 0.6672503352165222, "learning_rate": 0.00019118260360444515, "loss": 1.8674, "step": 5430 }, { "epoch": 1.3491226982453965, "grad_norm": 0.73240065574646, "learning_rate": 0.00019115058523116733, "loss": 1.8439, "step": 5440 }, { "epoch": 1.3516027032054065, "grad_norm": 0.6532642245292664, "learning_rate": 0.0001911185115205675, "loss": 1.8261, "step": 5450 }, { "epoch": 1.3540827081654163, "grad_norm": 0.7535740733146667, "learning_rate": 0.00019108638249211758, "loss": 1.8175, "step": 5460 }, { "epoch": 1.3565627131254263, "grad_norm": 0.6851351261138916, "learning_rate": 0.000191054198165323, "loss": 1.7735, "step": 5470 }, { "epoch": 1.359042718085436, "grad_norm": 0.7507172226905823, "learning_rate": 0.00019102195855972287, "loss": 1.8291, "step": 5480 }, { "epoch": 1.361522723045446, "grad_norm": 0.6462118625640869, "learning_rate": 0.00019098966369488965, "loss": 1.8241, "step": 5490 }, { "epoch": 1.364002728005456, "grad_norm": 0.7370539307594299, "learning_rate": 0.0001909573135904296, "loss": 1.8278, "step": 5500 }, { "epoch": 1.3664827329654659, "grad_norm": 0.6809933185577393, "learning_rate": 0.00019092490826598234, "loss": 1.8321, "step": 5510 }, { "epoch": 1.3689627379254758, "grad_norm": 0.6588488221168518, "learning_rate": 0.0001908924477412211, "loss": 1.763, "step": 5520 }, { "epoch": 1.3714427428854858, "grad_norm": 0.7076486349105835, "learning_rate": 0.0001908599320358526, "loss": 1.8042, "step": 5530 }, { "epoch": 1.3739227478454956, "grad_norm": 0.6753181219100952, "learning_rate": 0.00019082736116961697, "loss": 1.8308, "step": 5540 }, { "epoch": 1.3764027528055056, "grad_norm": 0.6668750643730164, "learning_rate": 0.00019079473516228802, "loss": 1.8132, "step": 5550 }, { "epoch": 1.3788827577655156, "grad_norm": 0.7110525369644165, "learning_rate": 0.00019076205403367285, "loss": 1.8847, "step": 5560 }, { "epoch": 1.3813627627255254, "grad_norm": 1.0364247560501099, "learning_rate": 0.00019072931780361217, "loss": 1.8394, "step": 5570 }, { "epoch": 1.3838427676855354, "grad_norm": 0.7233348488807678, "learning_rate": 0.00019069652649198005, "loss": 1.8338, "step": 5580 }, { "epoch": 1.3863227726455452, "grad_norm": 0.8322575688362122, "learning_rate": 0.00019066368011868401, "loss": 1.7853, "step": 5590 }, { "epoch": 1.3888027776055551, "grad_norm": 0.6443812251091003, "learning_rate": 0.000190630778703665, "loss": 1.8372, "step": 5600 }, { "epoch": 1.3912827825655651, "grad_norm": 0.8004515171051025, "learning_rate": 0.00019059782226689743, "loss": 1.8277, "step": 5610 }, { "epoch": 1.3937627875255751, "grad_norm": 0.7190122008323669, "learning_rate": 0.00019056481082838905, "loss": 1.8278, "step": 5620 }, { "epoch": 1.396242792485585, "grad_norm": 0.6689620018005371, "learning_rate": 0.00019053174440818106, "loss": 1.863, "step": 5630 }, { "epoch": 1.398722797445595, "grad_norm": 0.6717751622200012, "learning_rate": 0.000190498623026348, "loss": 1.7858, "step": 5640 }, { "epoch": 1.4012028024056047, "grad_norm": 0.6776305437088013, "learning_rate": 0.00019046544670299777, "loss": 1.8747, "step": 5650 }, { "epoch": 1.4036828073656147, "grad_norm": 0.6982606053352356, "learning_rate": 0.0001904322154582717, "loss": 1.793, "step": 5660 }, { "epoch": 1.4061628123256247, "grad_norm": 0.7391812205314636, "learning_rate": 0.00019039892931234435, "loss": 1.8151, "step": 5670 }, { "epoch": 1.4086428172856347, "grad_norm": 0.7182745933532715, "learning_rate": 0.00019036558828542367, "loss": 1.7983, "step": 5680 }, { "epoch": 1.4111228222456444, "grad_norm": 0.7192206978797913, "learning_rate": 0.00019033219239775095, "loss": 1.7845, "step": 5690 }, { "epoch": 1.4136028272056544, "grad_norm": 0.7159098386764526, "learning_rate": 0.00019029874166960075, "loss": 1.7565, "step": 5700 }, { "epoch": 1.4160828321656642, "grad_norm": 0.7049682140350342, "learning_rate": 0.00019026523612128096, "loss": 1.872, "step": 5710 }, { "epoch": 1.4185628371256742, "grad_norm": 0.6793913841247559, "learning_rate": 0.00019023167577313264, "loss": 1.8182, "step": 5720 }, { "epoch": 1.4210428420856842, "grad_norm": 0.7400872707366943, "learning_rate": 0.00019019806064553033, "loss": 1.8298, "step": 5730 }, { "epoch": 1.4235228470456942, "grad_norm": 0.6520139575004578, "learning_rate": 0.00019016439075888162, "loss": 1.8588, "step": 5740 }, { "epoch": 1.426002852005704, "grad_norm": 0.6989015936851501, "learning_rate": 0.00019013066613362742, "loss": 1.7805, "step": 5750 }, { "epoch": 1.428482856965714, "grad_norm": 0.6831812262535095, "learning_rate": 0.0001900968867902419, "loss": 1.8152, "step": 5760 }, { "epoch": 1.4309628619257238, "grad_norm": 0.7414357662200928, "learning_rate": 0.00019006305274923247, "loss": 1.767, "step": 5770 }, { "epoch": 1.4334428668857337, "grad_norm": 0.6898692846298218, "learning_rate": 0.00019002916403113962, "loss": 1.7556, "step": 5780 }, { "epoch": 1.4359228718457437, "grad_norm": 0.6934981942176819, "learning_rate": 0.00018999522065653714, "loss": 1.8059, "step": 5790 }, { "epoch": 1.4384028768057537, "grad_norm": 0.7213485240936279, "learning_rate": 0.00018996122264603203, "loss": 1.7809, "step": 5800 }, { "epoch": 1.4408828817657635, "grad_norm": 0.6892271637916565, "learning_rate": 0.00018992717002026433, "loss": 1.8452, "step": 5810 }, { "epoch": 1.4433628867257735, "grad_norm": 0.7585854530334473, "learning_rate": 0.00018989306279990737, "loss": 1.8235, "step": 5820 }, { "epoch": 1.4458428916857833, "grad_norm": 0.6786388158798218, "learning_rate": 0.00018985890100566753, "loss": 1.8044, "step": 5830 }, { "epoch": 1.4483228966457933, "grad_norm": 0.6582244038581848, "learning_rate": 0.00018982468465828442, "loss": 1.8139, "step": 5840 }, { "epoch": 1.4508029016058033, "grad_norm": 0.6639909744262695, "learning_rate": 0.00018979041377853068, "loss": 1.8363, "step": 5850 }, { "epoch": 1.453282906565813, "grad_norm": 0.728732168674469, "learning_rate": 0.0001897560883872121, "loss": 1.8377, "step": 5860 }, { "epoch": 1.455762911525823, "grad_norm": 0.7115894556045532, "learning_rate": 0.00018972170850516747, "loss": 1.8009, "step": 5870 }, { "epoch": 1.458242916485833, "grad_norm": 0.7272158861160278, "learning_rate": 0.00018968727415326884, "loss": 1.8232, "step": 5880 }, { "epoch": 1.4607229214458428, "grad_norm": 0.6763330101966858, "learning_rate": 0.0001896527853524212, "loss": 1.8064, "step": 5890 }, { "epoch": 1.4632029264058528, "grad_norm": 0.6789953708648682, "learning_rate": 0.0001896182421235626, "loss": 1.8653, "step": 5900 }, { "epoch": 1.4656829313658628, "grad_norm": 0.7505097389221191, "learning_rate": 0.0001895836444876642, "loss": 1.8533, "step": 5910 }, { "epoch": 1.4681629363258726, "grad_norm": 0.7787876129150391, "learning_rate": 0.0001895489924657301, "loss": 1.7521, "step": 5920 }, { "epoch": 1.4706429412858826, "grad_norm": 0.7812589406967163, "learning_rate": 0.00018951428607879753, "loss": 1.8075, "step": 5930 }, { "epoch": 1.4731229462458926, "grad_norm": 0.695969820022583, "learning_rate": 0.00018947952534793661, "loss": 1.8117, "step": 5940 }, { "epoch": 1.4756029512059023, "grad_norm": 0.680429995059967, "learning_rate": 0.00018944471029425054, "loss": 1.8116, "step": 5950 }, { "epoch": 1.4780829561659123, "grad_norm": 0.6775833964347839, "learning_rate": 0.00018940984093887542, "loss": 1.7969, "step": 5960 }, { "epoch": 1.4805629611259223, "grad_norm": 0.6806792616844177, "learning_rate": 0.0001893749173029804, "loss": 1.8317, "step": 5970 }, { "epoch": 1.4830429660859321, "grad_norm": 0.7450188398361206, "learning_rate": 0.00018933993940776752, "loss": 1.7515, "step": 5980 }, { "epoch": 1.485522971045942, "grad_norm": 0.6525201201438904, "learning_rate": 0.00018930490727447178, "loss": 1.813, "step": 5990 }, { "epoch": 1.4880029760059519, "grad_norm": 0.6384407877922058, "learning_rate": 0.00018926982092436117, "loss": 1.7391, "step": 6000 }, { "epoch": 1.4904829809659619, "grad_norm": 0.706158459186554, "learning_rate": 0.00018923468037873646, "loss": 1.8431, "step": 6010 }, { "epoch": 1.4929629859259719, "grad_norm": 0.6749498248100281, "learning_rate": 0.00018919948565893142, "loss": 1.7907, "step": 6020 }, { "epoch": 1.4954429908859819, "grad_norm": 0.7001073360443115, "learning_rate": 0.00018916423678631272, "loss": 1.7967, "step": 6030 }, { "epoch": 1.4979229958459916, "grad_norm": 0.6850577592849731, "learning_rate": 0.00018912893378227985, "loss": 1.8312, "step": 6040 }, { "epoch": 1.5004030008060016, "grad_norm": 0.7084920406341553, "learning_rate": 0.00018909357666826518, "loss": 1.7268, "step": 6050 }, { "epoch": 1.5028830057660114, "grad_norm": 0.701327919960022, "learning_rate": 0.00018905816546573397, "loss": 1.8329, "step": 6060 }, { "epoch": 1.5053630107260214, "grad_norm": 0.7240101099014282, "learning_rate": 0.00018902270019618422, "loss": 1.7809, "step": 6070 }, { "epoch": 1.5078430156860314, "grad_norm": 0.6603263020515442, "learning_rate": 0.0001889871808811469, "loss": 1.8182, "step": 6080 }, { "epoch": 1.5103230206460414, "grad_norm": 0.7466841340065002, "learning_rate": 0.00018895160754218561, "loss": 1.8238, "step": 6090 }, { "epoch": 1.5128030256060512, "grad_norm": 0.7204167246818542, "learning_rate": 0.00018891598020089693, "loss": 1.8201, "step": 6100 }, { "epoch": 1.5152830305660612, "grad_norm": 0.6652133464813232, "learning_rate": 0.00018888029887891014, "loss": 1.8013, "step": 6110 }, { "epoch": 1.517763035526071, "grad_norm": 0.6607919335365295, "learning_rate": 0.00018884456359788724, "loss": 1.7745, "step": 6120 }, { "epoch": 1.520243040486081, "grad_norm": 0.7172577381134033, "learning_rate": 0.0001888087743795231, "loss": 1.8148, "step": 6130 }, { "epoch": 1.522723045446091, "grad_norm": 0.7216603755950928, "learning_rate": 0.0001887729312455452, "loss": 1.7853, "step": 6140 }, { "epoch": 1.525203050406101, "grad_norm": 0.7555466294288635, "learning_rate": 0.0001887370342177139, "loss": 1.7995, "step": 6150 }, { "epoch": 1.5276830553661107, "grad_norm": 0.683175265789032, "learning_rate": 0.00018870108331782217, "loss": 1.8145, "step": 6160 }, { "epoch": 1.5301630603261207, "grad_norm": 0.6748200058937073, "learning_rate": 0.00018866507856769575, "loss": 1.8319, "step": 6170 }, { "epoch": 1.5326430652861305, "grad_norm": 0.8977167010307312, "learning_rate": 0.00018862901998919305, "loss": 1.8121, "step": 6180 }, { "epoch": 1.5351230702461405, "grad_norm": 0.732475757598877, "learning_rate": 0.0001885929076042051, "loss": 1.8169, "step": 6190 }, { "epoch": 1.5376030752061505, "grad_norm": 0.6811177134513855, "learning_rate": 0.00018855674143465566, "loss": 1.8847, "step": 6200 }, { "epoch": 1.5400830801661605, "grad_norm": 0.7238559722900391, "learning_rate": 0.00018852052150250122, "loss": 1.8222, "step": 6210 }, { "epoch": 1.5425630851261702, "grad_norm": 0.748440146446228, "learning_rate": 0.00018848424782973074, "loss": 1.8437, "step": 6220 }, { "epoch": 1.54504309008618, "grad_norm": 0.6953113079071045, "learning_rate": 0.00018844792043836589, "loss": 1.7924, "step": 6230 }, { "epoch": 1.54752309504619, "grad_norm": 0.7056795358657837, "learning_rate": 0.00018841153935046098, "loss": 1.8471, "step": 6240 }, { "epoch": 1.5500031000062, "grad_norm": 0.7412731647491455, "learning_rate": 0.0001883751045881029, "loss": 1.749, "step": 6250 }, { "epoch": 1.55248310496621, "grad_norm": 0.7239865660667419, "learning_rate": 0.00018833861617341107, "loss": 1.8143, "step": 6260 }, { "epoch": 1.55496310992622, "grad_norm": 0.708827555179596, "learning_rate": 0.00018830207412853756, "loss": 1.8153, "step": 6270 }, { "epoch": 1.5574431148862298, "grad_norm": 0.6861132383346558, "learning_rate": 0.00018826547847566693, "loss": 1.8874, "step": 6280 }, { "epoch": 1.5599231198462395, "grad_norm": 0.6977689862251282, "learning_rate": 0.00018822882923701638, "loss": 1.8395, "step": 6290 }, { "epoch": 1.5624031248062495, "grad_norm": 0.6514766216278076, "learning_rate": 0.0001881921264348355, "loss": 1.7926, "step": 6300 }, { "epoch": 1.5648831297662595, "grad_norm": 0.6926382780075073, "learning_rate": 0.00018815537009140657, "loss": 1.7719, "step": 6310 }, { "epoch": 1.5673631347262695, "grad_norm": 0.7346818447113037, "learning_rate": 0.00018811856022904423, "loss": 1.819, "step": 6320 }, { "epoch": 1.5698431396862795, "grad_norm": 0.690940260887146, "learning_rate": 0.00018808169687009572, "loss": 1.8205, "step": 6330 }, { "epoch": 1.5723231446462893, "grad_norm": 0.6950753331184387, "learning_rate": 0.00018804478003694067, "loss": 1.7586, "step": 6340 }, { "epoch": 1.574803149606299, "grad_norm": 0.6781951189041138, "learning_rate": 0.00018800780975199115, "loss": 1.8386, "step": 6350 }, { "epoch": 1.577283154566309, "grad_norm": 0.7350020408630371, "learning_rate": 0.00018797078603769184, "loss": 1.894, "step": 6360 }, { "epoch": 1.579763159526319, "grad_norm": 0.7319828867912292, "learning_rate": 0.00018793370891651972, "loss": 1.8185, "step": 6370 }, { "epoch": 1.582243164486329, "grad_norm": 0.6983395218849182, "learning_rate": 0.0001878965784109842, "loss": 1.8203, "step": 6380 }, { "epoch": 1.5847231694463388, "grad_norm": 0.6651434898376465, "learning_rate": 0.00018785939454362717, "loss": 1.7963, "step": 6390 }, { "epoch": 1.5872031744063488, "grad_norm": 0.6983841061592102, "learning_rate": 0.00018782215733702286, "loss": 1.8204, "step": 6400 }, { "epoch": 1.5896831793663586, "grad_norm": 0.7472341656684875, "learning_rate": 0.0001877848668137779, "loss": 1.8136, "step": 6410 }, { "epoch": 1.5921631843263686, "grad_norm": 0.7456172108650208, "learning_rate": 0.00018774752299653127, "loss": 1.863, "step": 6420 }, { "epoch": 1.5946431892863786, "grad_norm": 0.6850917935371399, "learning_rate": 0.00018771012590795434, "loss": 1.7964, "step": 6430 }, { "epoch": 1.5971231942463886, "grad_norm": 0.6574013233184814, "learning_rate": 0.0001876726755707508, "loss": 1.8635, "step": 6440 }, { "epoch": 1.5996031992063984, "grad_norm": 0.7072936296463013, "learning_rate": 0.00018763517200765664, "loss": 1.8926, "step": 6450 }, { "epoch": 1.6020832041664084, "grad_norm": 0.6977803707122803, "learning_rate": 0.00018759761524144022, "loss": 1.8293, "step": 6460 }, { "epoch": 1.6045632091264181, "grad_norm": 0.7164632678031921, "learning_rate": 0.00018756000529490216, "loss": 1.7896, "step": 6470 }, { "epoch": 1.6070432140864281, "grad_norm": 0.7482587695121765, "learning_rate": 0.00018752234219087538, "loss": 1.8141, "step": 6480 }, { "epoch": 1.6095232190464381, "grad_norm": 0.6833829879760742, "learning_rate": 0.00018748462595222508, "loss": 1.7785, "step": 6490 }, { "epoch": 1.6120032240064481, "grad_norm": 0.7720143795013428, "learning_rate": 0.00018744685660184866, "loss": 1.7982, "step": 6500 }, { "epoch": 1.614483228966458, "grad_norm": 0.6675408482551575, "learning_rate": 0.0001874090341626759, "loss": 1.7722, "step": 6510 }, { "epoch": 1.616963233926468, "grad_norm": 0.6653440594673157, "learning_rate": 0.00018737115865766863, "loss": 1.8461, "step": 6520 }, { "epoch": 1.6194432388864777, "grad_norm": 0.7201128602027893, "learning_rate": 0.00018733323010982104, "loss": 1.8344, "step": 6530 }, { "epoch": 1.6219232438464877, "grad_norm": 0.7022073864936829, "learning_rate": 0.00018729524854215943, "loss": 1.8033, "step": 6540 }, { "epoch": 1.6244032488064977, "grad_norm": 0.6909770965576172, "learning_rate": 0.0001872572139777424, "loss": 1.7777, "step": 6550 }, { "epoch": 1.6268832537665077, "grad_norm": 0.6666807532310486, "learning_rate": 0.00018721912643966055, "loss": 1.7996, "step": 6560 }, { "epoch": 1.6293632587265174, "grad_norm": 0.7246910929679871, "learning_rate": 0.0001871809859510368, "loss": 1.81, "step": 6570 }, { "epoch": 1.6318432636865272, "grad_norm": 0.6781728863716125, "learning_rate": 0.00018714279253502616, "loss": 1.7576, "step": 6580 }, { "epoch": 1.6343232686465372, "grad_norm": 0.6999672651290894, "learning_rate": 0.00018710454621481576, "loss": 1.8126, "step": 6590 }, { "epoch": 1.6368032736065472, "grad_norm": 0.703766405582428, "learning_rate": 0.00018706624701362483, "loss": 1.8109, "step": 6600 }, { "epoch": 1.6392832785665572, "grad_norm": 0.7479855418205261, "learning_rate": 0.00018702789495470478, "loss": 1.7835, "step": 6610 }, { "epoch": 1.6417632835265672, "grad_norm": 0.7109487056732178, "learning_rate": 0.00018698949006133904, "loss": 1.8078, "step": 6620 }, { "epoch": 1.644243288486577, "grad_norm": 0.7246449589729309, "learning_rate": 0.00018695103235684312, "loss": 1.7999, "step": 6630 }, { "epoch": 1.6467232934465867, "grad_norm": 0.6888371706008911, "learning_rate": 0.00018691252186456465, "loss": 1.8556, "step": 6640 }, { "epoch": 1.6492032984065967, "grad_norm": 0.6614171862602234, "learning_rate": 0.00018687395860788323, "loss": 1.8626, "step": 6650 }, { "epoch": 1.6516833033666067, "grad_norm": 0.6451037526130676, "learning_rate": 0.00018683534261021057, "loss": 1.8507, "step": 6660 }, { "epoch": 1.6541633083266167, "grad_norm": 0.7350413203239441, "learning_rate": 0.0001867966738949903, "loss": 1.8001, "step": 6670 }, { "epoch": 1.6566433132866267, "grad_norm": 0.761847972869873, "learning_rate": 0.00018675795248569813, "loss": 1.7653, "step": 6680 }, { "epoch": 1.6591233182466365, "grad_norm": 0.6977943778038025, "learning_rate": 0.0001867191784058418, "loss": 1.7868, "step": 6690 }, { "epoch": 1.6616033232066463, "grad_norm": 0.6676158308982849, "learning_rate": 0.00018668035167896088, "loss": 1.854, "step": 6700 }, { "epoch": 1.6640833281666563, "grad_norm": 0.7719148397445679, "learning_rate": 0.00018664147232862706, "loss": 1.7553, "step": 6710 }, { "epoch": 1.6665633331266663, "grad_norm": 0.6652414202690125, "learning_rate": 0.00018660254037844388, "loss": 1.8375, "step": 6720 }, { "epoch": 1.6690433380866763, "grad_norm": 0.7422439455986023, "learning_rate": 0.00018656355585204684, "loss": 1.8659, "step": 6730 }, { "epoch": 1.671523343046686, "grad_norm": 0.6858110427856445, "learning_rate": 0.00018652451877310336, "loss": 1.8021, "step": 6740 }, { "epoch": 1.674003348006696, "grad_norm": 0.6965064406394958, "learning_rate": 0.0001864854291653128, "loss": 1.8454, "step": 6750 }, { "epoch": 1.6764833529667058, "grad_norm": 0.7218753695487976, "learning_rate": 0.00018644628705240636, "loss": 1.8381, "step": 6760 }, { "epoch": 1.6789633579267158, "grad_norm": 0.6853536367416382, "learning_rate": 0.0001864070924581471, "loss": 1.7834, "step": 6770 }, { "epoch": 1.6814433628867258, "grad_norm": 0.7122074365615845, "learning_rate": 0.00018636784540633, "loss": 1.765, "step": 6780 }, { "epoch": 1.6839233678467358, "grad_norm": 0.7236666083335876, "learning_rate": 0.00018632854592078184, "loss": 1.8046, "step": 6790 }, { "epoch": 1.6864033728067456, "grad_norm": 0.6996268033981323, "learning_rate": 0.0001862891940253613, "loss": 1.8089, "step": 6800 }, { "epoch": 1.6888833777667556, "grad_norm": 0.7394871711730957, "learning_rate": 0.0001862497897439588, "loss": 1.7953, "step": 6810 }, { "epoch": 1.6913633827267653, "grad_norm": 0.6939400434494019, "learning_rate": 0.00018621033310049656, "loss": 1.8292, "step": 6820 }, { "epoch": 1.6938433876867753, "grad_norm": 0.6782166957855225, "learning_rate": 0.00018617082411892866, "loss": 1.8001, "step": 6830 }, { "epoch": 1.6963233926467853, "grad_norm": 0.7154285907745361, "learning_rate": 0.00018613126282324092, "loss": 1.7969, "step": 6840 }, { "epoch": 1.6988033976067953, "grad_norm": 0.759038507938385, "learning_rate": 0.00018609164923745087, "loss": 1.7746, "step": 6850 }, { "epoch": 1.701283402566805, "grad_norm": 0.6908287405967712, "learning_rate": 0.0001860519833856079, "loss": 1.7682, "step": 6860 }, { "epoch": 1.703763407526815, "grad_norm": 0.7424526214599609, "learning_rate": 0.00018601226529179306, "loss": 1.8196, "step": 6870 }, { "epoch": 1.7062434124868249, "grad_norm": 0.7291129231452942, "learning_rate": 0.00018597249498011903, "loss": 1.8359, "step": 6880 }, { "epoch": 1.7087234174468349, "grad_norm": 0.7176723480224609, "learning_rate": 0.0001859326724747304, "loss": 1.8016, "step": 6890 }, { "epoch": 1.7112034224068449, "grad_norm": 0.7186075448989868, "learning_rate": 0.0001858927977998032, "loss": 1.7719, "step": 6900 }, { "epoch": 1.7136834273668549, "grad_norm": 0.6708627939224243, "learning_rate": 0.00018585287097954534, "loss": 1.8005, "step": 6910 }, { "epoch": 1.7161634323268646, "grad_norm": 0.6831984519958496, "learning_rate": 0.0001858128920381963, "loss": 1.7962, "step": 6920 }, { "epoch": 1.7186434372868744, "grad_norm": 0.7038664221763611, "learning_rate": 0.00018577286100002723, "loss": 1.761, "step": 6930 }, { "epoch": 1.7211234422468844, "grad_norm": 0.693360447883606, "learning_rate": 0.00018573277788934084, "loss": 1.7751, "step": 6940 }, { "epoch": 1.7236034472068944, "grad_norm": 0.7013189792633057, "learning_rate": 0.00018569264273047146, "loss": 1.8005, "step": 6950 }, { "epoch": 1.7260834521669044, "grad_norm": 0.8188039064407349, "learning_rate": 0.00018565245554778517, "loss": 1.8367, "step": 6960 }, { "epoch": 1.7285634571269144, "grad_norm": 0.7016475796699524, "learning_rate": 0.00018561221636567943, "loss": 1.7609, "step": 6970 }, { "epoch": 1.7310434620869242, "grad_norm": 0.7186803221702576, "learning_rate": 0.0001855719252085834, "loss": 1.8397, "step": 6980 }, { "epoch": 1.733523467046934, "grad_norm": 0.7351244688034058, "learning_rate": 0.00018553158210095772, "loss": 1.7872, "step": 6990 }, { "epoch": 1.736003472006944, "grad_norm": 0.7762275338172913, "learning_rate": 0.00018549118706729468, "loss": 1.8339, "step": 7000 }, { "epoch": 1.738483476966954, "grad_norm": 0.7965319156646729, "learning_rate": 0.00018545074013211793, "loss": 1.7642, "step": 7010 }, { "epoch": 1.740963481926964, "grad_norm": 0.6989258527755737, "learning_rate": 0.00018541024131998274, "loss": 1.7861, "step": 7020 }, { "epoch": 1.743443486886974, "grad_norm": 0.7284238338470459, "learning_rate": 0.0001853696906554759, "loss": 1.7762, "step": 7030 }, { "epoch": 1.7459234918469837, "grad_norm": 0.6847079396247864, "learning_rate": 0.00018532908816321558, "loss": 1.8342, "step": 7040 }, { "epoch": 1.7484034968069935, "grad_norm": 0.6576705574989319, "learning_rate": 0.00018528843386785145, "loss": 1.815, "step": 7050 }, { "epoch": 1.7508835017670035, "grad_norm": 0.7153476476669312, "learning_rate": 0.0001852477277940647, "loss": 1.7593, "step": 7060 }, { "epoch": 1.7533635067270135, "grad_norm": 0.6890503168106079, "learning_rate": 0.00018520696996656788, "loss": 1.7855, "step": 7070 }, { "epoch": 1.7558435116870235, "grad_norm": 0.7169741988182068, "learning_rate": 0.00018516616041010494, "loss": 1.7918, "step": 7080 }, { "epoch": 1.7583235166470335, "grad_norm": 0.7051769495010376, "learning_rate": 0.00018512529914945135, "loss": 1.783, "step": 7090 }, { "epoch": 1.7608035216070432, "grad_norm": 0.7099260091781616, "learning_rate": 0.00018508438620941378, "loss": 1.859, "step": 7100 }, { "epoch": 1.763283526567053, "grad_norm": 0.733961820602417, "learning_rate": 0.00018504342161483047, "loss": 1.8326, "step": 7110 }, { "epoch": 1.765763531527063, "grad_norm": 0.8449649810791016, "learning_rate": 0.0001850024053905709, "loss": 1.7666, "step": 7120 }, { "epoch": 1.768243536487073, "grad_norm": 0.6947612166404724, "learning_rate": 0.00018496133756153602, "loss": 1.7784, "step": 7130 }, { "epoch": 1.770723541447083, "grad_norm": 0.8235635161399841, "learning_rate": 0.0001849202181526579, "loss": 1.7541, "step": 7140 }, { "epoch": 1.7732035464070928, "grad_norm": 0.7722574472427368, "learning_rate": 0.0001848790471889001, "loss": 1.8518, "step": 7150 }, { "epoch": 1.7756835513671028, "grad_norm": 0.7052320241928101, "learning_rate": 0.00018483782469525738, "loss": 1.7835, "step": 7160 }, { "epoch": 1.7781635563271125, "grad_norm": 0.7227842807769775, "learning_rate": 0.0001847965506967559, "loss": 1.788, "step": 7170 }, { "epoch": 1.7806435612871225, "grad_norm": 0.713796854019165, "learning_rate": 0.00018475522521845295, "loss": 1.8347, "step": 7180 }, { "epoch": 1.7831235662471325, "grad_norm": 0.6927456855773926, "learning_rate": 0.00018471384828543718, "loss": 1.787, "step": 7190 }, { "epoch": 1.7856035712071425, "grad_norm": 0.7317062616348267, "learning_rate": 0.00018467241992282843, "loss": 1.8165, "step": 7200 }, { "epoch": 1.7880835761671523, "grad_norm": 0.7170538306236267, "learning_rate": 0.00018463094015577775, "loss": 1.7905, "step": 7210 }, { "epoch": 1.7905635811271623, "grad_norm": 0.6815361380577087, "learning_rate": 0.00018458940900946739, "loss": 1.8998, "step": 7220 }, { "epoch": 1.793043586087172, "grad_norm": 0.7838888764381409, "learning_rate": 0.0001845478265091109, "loss": 1.7994, "step": 7230 }, { "epoch": 1.795523591047182, "grad_norm": 0.7330991625785828, "learning_rate": 0.00018450619267995283, "loss": 1.8262, "step": 7240 }, { "epoch": 1.798003596007192, "grad_norm": 0.7867507934570312, "learning_rate": 0.00018446450754726907, "loss": 1.7856, "step": 7250 }, { "epoch": 1.800483600967202, "grad_norm": 0.6976866126060486, "learning_rate": 0.00018442277113636654, "loss": 1.837, "step": 7260 }, { "epoch": 1.8029636059272118, "grad_norm": 0.7053040266036987, "learning_rate": 0.0001843809834725833, "loss": 1.8075, "step": 7270 }, { "epoch": 1.8054436108872218, "grad_norm": 0.7264782190322876, "learning_rate": 0.0001843391445812886, "loss": 1.8392, "step": 7280 }, { "epoch": 1.8079236158472316, "grad_norm": 0.7384432554244995, "learning_rate": 0.00018429725448788266, "loss": 1.7901, "step": 7290 }, { "epoch": 1.8104036208072416, "grad_norm": 0.7113457918167114, "learning_rate": 0.00018425531321779697, "loss": 1.859, "step": 7300 }, { "epoch": 1.8128836257672516, "grad_norm": 0.693682074546814, "learning_rate": 0.0001842133207964939, "loss": 1.8757, "step": 7310 }, { "epoch": 1.8153636307272616, "grad_norm": 0.696943998336792, "learning_rate": 0.00018417127724946702, "loss": 1.7946, "step": 7320 }, { "epoch": 1.8178436356872714, "grad_norm": 0.6696971654891968, "learning_rate": 0.0001841291826022408, "loss": 1.7775, "step": 7330 }, { "epoch": 1.8203236406472811, "grad_norm": 0.6955931782722473, "learning_rate": 0.00018408703688037087, "loss": 1.7338, "step": 7340 }, { "epoch": 1.8228036456072911, "grad_norm": 0.8137204051017761, "learning_rate": 0.0001840448401094438, "loss": 1.8533, "step": 7350 }, { "epoch": 1.8252836505673011, "grad_norm": 0.6938469409942627, "learning_rate": 0.00018400259231507717, "loss": 1.818, "step": 7360 }, { "epoch": 1.8277636555273111, "grad_norm": 0.7239459156990051, "learning_rate": 0.00018396029352291945, "loss": 1.7605, "step": 7370 }, { "epoch": 1.8302436604873211, "grad_norm": 0.7191905975341797, "learning_rate": 0.00018391794375865024, "loss": 1.8303, "step": 7380 }, { "epoch": 1.832723665447331, "grad_norm": 0.7267665266990662, "learning_rate": 0.0001838755430479799, "loss": 1.818, "step": 7390 }, { "epoch": 1.8352036704073407, "grad_norm": 0.748570442199707, "learning_rate": 0.0001838330914166499, "loss": 1.7961, "step": 7400 }, { "epoch": 1.8376836753673507, "grad_norm": 0.7724071741104126, "learning_rate": 0.00018379058889043246, "loss": 1.8515, "step": 7410 }, { "epoch": 1.8401636803273607, "grad_norm": 0.750237762928009, "learning_rate": 0.0001837480354951308, "loss": 1.867, "step": 7420 }, { "epoch": 1.8426436852873707, "grad_norm": 0.675631582736969, "learning_rate": 0.000183705431256579, "loss": 1.7754, "step": 7430 }, { "epoch": 1.8451236902473807, "grad_norm": 0.7144594192504883, "learning_rate": 0.00018366277620064197, "loss": 1.8192, "step": 7440 }, { "epoch": 1.8476036952073904, "grad_norm": 0.7249010801315308, "learning_rate": 0.00018362007035321556, "loss": 1.8547, "step": 7450 }, { "epoch": 1.8500837001674002, "grad_norm": 0.7054729461669922, "learning_rate": 0.00018357731374022635, "loss": 1.8139, "step": 7460 }, { "epoch": 1.8525637051274102, "grad_norm": 0.741942286491394, "learning_rate": 0.00018353450638763179, "loss": 1.7698, "step": 7470 }, { "epoch": 1.8550437100874202, "grad_norm": 0.7423719763755798, "learning_rate": 0.00018349164832142013, "loss": 1.7934, "step": 7480 }, { "epoch": 1.8575237150474302, "grad_norm": 0.8158997893333435, "learning_rate": 0.00018344873956761045, "loss": 1.893, "step": 7490 }, { "epoch": 1.86000372000744, "grad_norm": 0.7317871451377869, "learning_rate": 0.0001834057801522525, "loss": 1.8123, "step": 7500 }, { "epoch": 1.86248372496745, "grad_norm": 0.7026477456092834, "learning_rate": 0.0001833627701014269, "loss": 1.8213, "step": 7510 }, { "epoch": 1.8649637299274597, "grad_norm": 0.7163852453231812, "learning_rate": 0.0001833197094412449, "loss": 1.7753, "step": 7520 }, { "epoch": 1.8674437348874697, "grad_norm": 0.7176632285118103, "learning_rate": 0.00018327659819784858, "loss": 1.8194, "step": 7530 }, { "epoch": 1.8699237398474797, "grad_norm": 0.7520141005516052, "learning_rate": 0.00018323343639741068, "loss": 1.7382, "step": 7540 }, { "epoch": 1.8724037448074897, "grad_norm": 0.710927426815033, "learning_rate": 0.00018319022406613464, "loss": 1.8185, "step": 7550 }, { "epoch": 1.8748837497674995, "grad_norm": 0.6701392531394958, "learning_rate": 0.00018314696123025454, "loss": 1.7982, "step": 7560 }, { "epoch": 1.8773637547275095, "grad_norm": 0.7263465523719788, "learning_rate": 0.00018310364791603517, "loss": 1.781, "step": 7570 }, { "epoch": 1.8798437596875193, "grad_norm": 0.7514358758926392, "learning_rate": 0.00018306028414977193, "loss": 1.8347, "step": 7580 }, { "epoch": 1.8823237646475293, "grad_norm": 0.7695625424385071, "learning_rate": 0.00018301686995779093, "loss": 1.7763, "step": 7590 }, { "epoch": 1.8848037696075393, "grad_norm": 0.7509194016456604, "learning_rate": 0.00018297340536644875, "loss": 1.796, "step": 7600 }, { "epoch": 1.8872837745675493, "grad_norm": 0.6772938370704651, "learning_rate": 0.00018292989040213272, "loss": 1.7439, "step": 7610 }, { "epoch": 1.889763779527559, "grad_norm": 0.6906134486198425, "learning_rate": 0.00018288632509126066, "loss": 1.8171, "step": 7620 }, { "epoch": 1.892243784487569, "grad_norm": 0.6892842054367065, "learning_rate": 0.00018284270946028092, "loss": 1.7133, "step": 7630 }, { "epoch": 1.8947237894475788, "grad_norm": 0.7177791595458984, "learning_rate": 0.00018279904353567253, "loss": 1.7848, "step": 7640 }, { "epoch": 1.8972037944075888, "grad_norm": 0.7404572367668152, "learning_rate": 0.0001827553273439449, "loss": 1.8338, "step": 7650 }, { "epoch": 1.8996837993675988, "grad_norm": 0.6802579164505005, "learning_rate": 0.00018271156091163813, "loss": 1.8355, "step": 7660 }, { "epoch": 1.9021638043276088, "grad_norm": 0.6848039031028748, "learning_rate": 0.00018266774426532268, "loss": 1.795, "step": 7670 }, { "epoch": 1.9046438092876186, "grad_norm": 0.7783024907112122, "learning_rate": 0.0001826238774315995, "loss": 1.8309, "step": 7680 }, { "epoch": 1.9071238142476283, "grad_norm": 0.7328544855117798, "learning_rate": 0.00018257996043710012, "loss": 1.8229, "step": 7690 }, { "epoch": 1.9096038192076383, "grad_norm": 0.7623286843299866, "learning_rate": 0.00018253599330848638, "loss": 1.8428, "step": 7700 }, { "epoch": 1.9120838241676483, "grad_norm": 0.7214025855064392, "learning_rate": 0.0001824919760724507, "loss": 1.798, "step": 7710 }, { "epoch": 1.9145638291276583, "grad_norm": 0.734126091003418, "learning_rate": 0.0001824479087557158, "loss": 1.7948, "step": 7720 }, { "epoch": 1.9170438340876683, "grad_norm": 0.7752106785774231, "learning_rate": 0.00018240379138503488, "loss": 1.8656, "step": 7730 }, { "epoch": 1.919523839047678, "grad_norm": 0.6882955431938171, "learning_rate": 0.00018235962398719147, "loss": 1.7882, "step": 7740 }, { "epoch": 1.9220038440076879, "grad_norm": 0.7176995873451233, "learning_rate": 0.00018231540658899956, "loss": 1.7967, "step": 7750 }, { "epoch": 1.9244838489676979, "grad_norm": 0.6762322187423706, "learning_rate": 0.00018227113921730334, "loss": 1.7541, "step": 7760 }, { "epoch": 1.9269638539277079, "grad_norm": 0.6771329045295715, "learning_rate": 0.00018222682189897752, "loss": 1.8081, "step": 7770 }, { "epoch": 1.9294438588877179, "grad_norm": 0.6885058283805847, "learning_rate": 0.00018218245466092702, "loss": 1.8035, "step": 7780 }, { "epoch": 1.9319238638477279, "grad_norm": 0.6635348200798035, "learning_rate": 0.00018213803753008707, "loss": 1.8155, "step": 7790 }, { "epoch": 1.9344038688077376, "grad_norm": 0.7170267105102539, "learning_rate": 0.00018209357053342325, "loss": 1.7877, "step": 7800 }, { "epoch": 1.9368838737677474, "grad_norm": 0.7271670699119568, "learning_rate": 0.00018204905369793135, "loss": 1.8503, "step": 7810 }, { "epoch": 1.9393638787277574, "grad_norm": 0.7565329670906067, "learning_rate": 0.00018200448705063747, "loss": 1.7848, "step": 7820 }, { "epoch": 1.9418438836877674, "grad_norm": 0.7370867133140564, "learning_rate": 0.0001819598706185979, "loss": 1.7872, "step": 7830 }, { "epoch": 1.9443238886477774, "grad_norm": 0.7043139934539795, "learning_rate": 0.0001819152044288992, "loss": 1.764, "step": 7840 }, { "epoch": 1.9468038936077872, "grad_norm": 0.7746196985244751, "learning_rate": 0.00018187048850865808, "loss": 1.7954, "step": 7850 }, { "epoch": 1.9492838985677972, "grad_norm": 0.7171877026557922, "learning_rate": 0.00018182572288502152, "loss": 1.7933, "step": 7860 }, { "epoch": 1.951763903527807, "grad_norm": 0.667573094367981, "learning_rate": 0.00018178090758516662, "loss": 1.7796, "step": 7870 }, { "epoch": 1.954243908487817, "grad_norm": 0.7113328576087952, "learning_rate": 0.00018173604263630063, "loss": 1.8426, "step": 7880 }, { "epoch": 1.956723913447827, "grad_norm": 0.7328044772148132, "learning_rate": 0.000181691128065661, "loss": 1.8522, "step": 7890 }, { "epoch": 1.959203918407837, "grad_norm": 0.6921177506446838, "learning_rate": 0.00018164616390051522, "loss": 1.837, "step": 7900 }, { "epoch": 1.9616839233678467, "grad_norm": 0.7000945806503296, "learning_rate": 0.000181601150168161, "loss": 1.7452, "step": 7910 }, { "epoch": 1.9641639283278567, "grad_norm": 0.835935652256012, "learning_rate": 0.00018155608689592604, "loss": 1.8445, "step": 7920 }, { "epoch": 1.9666439332878665, "grad_norm": 0.7225850820541382, "learning_rate": 0.00018151097411116813, "loss": 1.786, "step": 7930 }, { "epoch": 1.9691239382478765, "grad_norm": 0.6941862106323242, "learning_rate": 0.00018146581184127518, "loss": 1.7506, "step": 7940 }, { "epoch": 1.9716039432078865, "grad_norm": 0.7091786861419678, "learning_rate": 0.00018142060011366512, "loss": 1.8744, "step": 7950 }, { "epoch": 1.9740839481678965, "grad_norm": 0.6893685460090637, "learning_rate": 0.00018137533895578583, "loss": 1.7464, "step": 7960 }, { "epoch": 1.9765639531279062, "grad_norm": 0.6945198774337769, "learning_rate": 0.00018133002839511533, "loss": 1.8894, "step": 7970 }, { "epoch": 1.9790439580879162, "grad_norm": 0.7510820031166077, "learning_rate": 0.00018128466845916154, "loss": 1.8154, "step": 7980 }, { "epoch": 1.981523963047926, "grad_norm": 0.7420031428337097, "learning_rate": 0.00018123925917546237, "loss": 1.8146, "step": 7990 }, { "epoch": 1.984003968007936, "grad_norm": 0.7060868740081787, "learning_rate": 0.00018119380057158568, "loss": 1.7797, "step": 8000 }, { "epoch": 1.986483972967946, "grad_norm": 0.7086240649223328, "learning_rate": 0.00018114829267512928, "loss": 1.7934, "step": 8010 }, { "epoch": 1.988963977927956, "grad_norm": 0.7174015045166016, "learning_rate": 0.000181102735513721, "loss": 1.7562, "step": 8020 }, { "epoch": 1.9914439828879658, "grad_norm": 0.6231516599655151, "learning_rate": 0.00018105712911501838, "loss": 1.7924, "step": 8030 }, { "epoch": 1.9939239878479755, "grad_norm": 0.7719596028327942, "learning_rate": 0.00018101147350670907, "loss": 1.8747, "step": 8040 }, { "epoch": 1.9964039928079855, "grad_norm": 0.749664843082428, "learning_rate": 0.00018096576871651042, "loss": 1.8553, "step": 8050 }, { "epoch": 1.9988839977679955, "grad_norm": 0.7149576544761658, "learning_rate": 0.00018092001477216973, "loss": 1.7741, "step": 8060 }, { "epoch": 2.0013640027280055, "grad_norm": 0.7196642160415649, "learning_rate": 0.00018087421170146412, "loss": 1.7028, "step": 8070 }, { "epoch": 2.0038440076880155, "grad_norm": 0.8466257452964783, "learning_rate": 0.00018082835953220054, "loss": 1.7266, "step": 8080 }, { "epoch": 2.0063240126480255, "grad_norm": 0.6822969913482666, "learning_rate": 0.00018078245829221575, "loss": 1.6703, "step": 8090 }, { "epoch": 2.008804017608035, "grad_norm": 0.8267595171928406, "learning_rate": 0.00018073650800937624, "loss": 1.7133, "step": 8100 }, { "epoch": 2.011284022568045, "grad_norm": 0.7269774675369263, "learning_rate": 0.0001806905087115784, "loss": 1.7366, "step": 8110 }, { "epoch": 2.013764027528055, "grad_norm": 0.7767934799194336, "learning_rate": 0.00018064446042674828, "loss": 1.7637, "step": 8120 }, { "epoch": 2.016244032488065, "grad_norm": 0.7064205408096313, "learning_rate": 0.00018059836318284168, "loss": 1.6629, "step": 8130 }, { "epoch": 2.018724037448075, "grad_norm": 0.7273749709129333, "learning_rate": 0.0001805522170078441, "loss": 1.674, "step": 8140 }, { "epoch": 2.0212040424080846, "grad_norm": 0.7249225974082947, "learning_rate": 0.00018050602192977086, "loss": 1.6633, "step": 8150 }, { "epoch": 2.0236840473680946, "grad_norm": 0.746360719203949, "learning_rate": 0.00018045977797666684, "loss": 1.7123, "step": 8160 }, { "epoch": 2.0261640523281046, "grad_norm": 0.714149534702301, "learning_rate": 0.00018041348517660666, "loss": 1.6978, "step": 8170 }, { "epoch": 2.0286440572881146, "grad_norm": 0.719533383846283, "learning_rate": 0.00018036714355769458, "loss": 1.7, "step": 8180 }, { "epoch": 2.0311240622481246, "grad_norm": 0.7731805443763733, "learning_rate": 0.00018032075314806448, "loss": 1.7369, "step": 8190 }, { "epoch": 2.0336040672081346, "grad_norm": 0.739291250705719, "learning_rate": 0.00018027431397587992, "loss": 1.737, "step": 8200 }, { "epoch": 2.036084072168144, "grad_norm": 0.792762041091919, "learning_rate": 0.00018022782606933398, "loss": 1.7221, "step": 8210 }, { "epoch": 2.038564077128154, "grad_norm": 0.8123416304588318, "learning_rate": 0.00018018128945664937, "loss": 1.6844, "step": 8220 }, { "epoch": 2.041044082088164, "grad_norm": 0.7434293031692505, "learning_rate": 0.00018013470416607837, "loss": 1.7407, "step": 8230 }, { "epoch": 2.043524087048174, "grad_norm": 0.6932642459869385, "learning_rate": 0.00018008807022590283, "loss": 1.7084, "step": 8240 }, { "epoch": 2.046004092008184, "grad_norm": 0.7205861210823059, "learning_rate": 0.00018004138766443406, "loss": 1.6771, "step": 8250 }, { "epoch": 2.048484096968194, "grad_norm": 0.828132688999176, "learning_rate": 0.00017999465651001296, "loss": 1.7001, "step": 8260 }, { "epoch": 2.0509641019282037, "grad_norm": 0.8889887928962708, "learning_rate": 0.00017994787679100998, "loss": 1.7458, "step": 8270 }, { "epoch": 2.0534441068882137, "grad_norm": 0.7904499173164368, "learning_rate": 0.00017990104853582493, "loss": 1.6571, "step": 8280 }, { "epoch": 2.0559241118482237, "grad_norm": 0.8607293367385864, "learning_rate": 0.00017985417177288714, "loss": 1.7113, "step": 8290 }, { "epoch": 2.0584041168082337, "grad_norm": 0.7840188145637512, "learning_rate": 0.00017980724653065538, "loss": 1.7306, "step": 8300 }, { "epoch": 2.0608841217682436, "grad_norm": 0.7568309903144836, "learning_rate": 0.00017976027283761787, "loss": 1.7568, "step": 8310 }, { "epoch": 2.0633641267282536, "grad_norm": 0.7962266206741333, "learning_rate": 0.00017971325072229226, "loss": 1.7057, "step": 8320 }, { "epoch": 2.065844131688263, "grad_norm": 0.7668977379798889, "learning_rate": 0.00017966618021322557, "loss": 1.7208, "step": 8330 }, { "epoch": 2.068324136648273, "grad_norm": 0.797734797000885, "learning_rate": 0.0001796190613389942, "loss": 1.7364, "step": 8340 }, { "epoch": 2.070804141608283, "grad_norm": 0.8187463879585266, "learning_rate": 0.00017957189412820386, "loss": 1.6687, "step": 8350 }, { "epoch": 2.073284146568293, "grad_norm": 0.7944939732551575, "learning_rate": 0.00017952467860948973, "loss": 1.6799, "step": 8360 }, { "epoch": 2.075764151528303, "grad_norm": 0.7479957342147827, "learning_rate": 0.0001794774148115163, "loss": 1.6735, "step": 8370 }, { "epoch": 2.078244156488313, "grad_norm": 0.8243913054466248, "learning_rate": 0.00017943010276297717, "loss": 1.7604, "step": 8380 }, { "epoch": 2.0807241614483227, "grad_norm": 0.7490248680114746, "learning_rate": 0.00017938274249259547, "loss": 1.6228, "step": 8390 }, { "epoch": 2.0832041664083327, "grad_norm": 0.7714105844497681, "learning_rate": 0.00017933533402912354, "loss": 1.6705, "step": 8400 }, { "epoch": 2.0856841713683427, "grad_norm": 0.8160969018936157, "learning_rate": 0.00017928787740134288, "loss": 1.707, "step": 8410 }, { "epoch": 2.0881641763283527, "grad_norm": 0.7864915728569031, "learning_rate": 0.0001792403726380644, "loss": 1.6826, "step": 8420 }, { "epoch": 2.0906441812883627, "grad_norm": 0.7798502445220947, "learning_rate": 0.00017919281976812803, "loss": 1.6958, "step": 8430 }, { "epoch": 2.0931241862483727, "grad_norm": 0.8461658954620361, "learning_rate": 0.0001791452188204031, "loss": 1.6742, "step": 8440 }, { "epoch": 2.0956041912083823, "grad_norm": 0.7308477759361267, "learning_rate": 0.000179097569823788, "loss": 1.706, "step": 8450 }, { "epoch": 2.0980841961683923, "grad_norm": 0.9373131990432739, "learning_rate": 0.00017904987280721035, "loss": 1.676, "step": 8460 }, { "epoch": 2.1005642011284023, "grad_norm": 0.7441970705986023, "learning_rate": 0.0001790021277996269, "loss": 1.6748, "step": 8470 }, { "epoch": 2.1030442060884122, "grad_norm": 0.7834221720695496, "learning_rate": 0.00017895433483002354, "loss": 1.6826, "step": 8480 }, { "epoch": 2.1055242110484222, "grad_norm": 0.8050951361656189, "learning_rate": 0.0001789064939274153, "loss": 1.7652, "step": 8490 }, { "epoch": 2.108004216008432, "grad_norm": 0.807235836982727, "learning_rate": 0.00017885860512084623, "loss": 1.7278, "step": 8500 }, { "epoch": 2.110484220968442, "grad_norm": 0.7822979688644409, "learning_rate": 0.00017881066843938956, "loss": 1.703, "step": 8510 }, { "epoch": 2.112964225928452, "grad_norm": 0.8085262179374695, "learning_rate": 0.00017876268391214754, "loss": 1.712, "step": 8520 }, { "epoch": 2.115444230888462, "grad_norm": 0.7531107664108276, "learning_rate": 0.00017871465156825148, "loss": 1.7009, "step": 8530 }, { "epoch": 2.117924235848472, "grad_norm": 0.7848300337791443, "learning_rate": 0.00017866657143686168, "loss": 1.7549, "step": 8540 }, { "epoch": 2.1204042408084818, "grad_norm": 0.7618765830993652, "learning_rate": 0.00017861844354716757, "loss": 1.7733, "step": 8550 }, { "epoch": 2.1228842457684913, "grad_norm": 0.8201743960380554, "learning_rate": 0.00017857026792838737, "loss": 1.7583, "step": 8560 }, { "epoch": 2.1253642507285013, "grad_norm": 0.7396107912063599, "learning_rate": 0.00017852204460976846, "loss": 1.7147, "step": 8570 }, { "epoch": 2.1278442556885113, "grad_norm": 0.7462581396102905, "learning_rate": 0.00017847377362058712, "loss": 1.7035, "step": 8580 }, { "epoch": 2.1303242606485213, "grad_norm": 0.7520790696144104, "learning_rate": 0.00017842545499014852, "loss": 1.6986, "step": 8590 }, { "epoch": 2.1328042656085313, "grad_norm": 0.7530083060264587, "learning_rate": 0.00017837708874778683, "loss": 1.753, "step": 8600 }, { "epoch": 2.1352842705685413, "grad_norm": 0.8094317317008972, "learning_rate": 0.00017832867492286505, "loss": 1.7664, "step": 8610 }, { "epoch": 2.137764275528551, "grad_norm": 0.7181040048599243, "learning_rate": 0.00017828021354477516, "loss": 1.6667, "step": 8620 }, { "epoch": 2.140244280488561, "grad_norm": 0.8137990832328796, "learning_rate": 0.0001782317046429379, "loss": 1.7005, "step": 8630 }, { "epoch": 2.142724285448571, "grad_norm": 0.792864203453064, "learning_rate": 0.000178183148246803, "loss": 1.6757, "step": 8640 }, { "epoch": 2.145204290408581, "grad_norm": 0.7756101489067078, "learning_rate": 0.00017813454438584886, "loss": 1.7149, "step": 8650 }, { "epoch": 2.147684295368591, "grad_norm": 0.8082769513130188, "learning_rate": 0.00017808589308958284, "loss": 1.6721, "step": 8660 }, { "epoch": 2.150164300328601, "grad_norm": 0.8983075022697449, "learning_rate": 0.00017803719438754095, "loss": 1.7542, "step": 8670 }, { "epoch": 2.1526443052886104, "grad_norm": 0.8210811018943787, "learning_rate": 0.00017798844830928817, "loss": 1.6946, "step": 8680 }, { "epoch": 2.1551243102486204, "grad_norm": 0.7229629755020142, "learning_rate": 0.0001779396548844181, "loss": 1.6686, "step": 8690 }, { "epoch": 2.1576043152086304, "grad_norm": 0.7622604370117188, "learning_rate": 0.00017789081414255312, "loss": 1.7335, "step": 8700 }, { "epoch": 2.1600843201686404, "grad_norm": 0.771666944026947, "learning_rate": 0.0001778419261133443, "loss": 1.6763, "step": 8710 }, { "epoch": 2.1625643251286504, "grad_norm": 0.8421552777290344, "learning_rate": 0.00017779299082647148, "loss": 1.741, "step": 8720 }, { "epoch": 2.1650443300886604, "grad_norm": 0.7799654006958008, "learning_rate": 0.00017774400831164323, "loss": 1.6901, "step": 8730 }, { "epoch": 2.16752433504867, "grad_norm": 0.7670300602912903, "learning_rate": 0.00017769497859859663, "loss": 1.7533, "step": 8740 }, { "epoch": 2.17000434000868, "grad_norm": 0.8089250922203064, "learning_rate": 0.0001776459017170976, "loss": 1.7927, "step": 8750 }, { "epoch": 2.17248434496869, "grad_norm": 0.7334123253822327, "learning_rate": 0.0001775967776969405, "loss": 1.7462, "step": 8760 }, { "epoch": 2.1749643499287, "grad_norm": 0.7781856060028076, "learning_rate": 0.0001775476065679485, "loss": 1.6699, "step": 8770 }, { "epoch": 2.17744435488871, "grad_norm": 0.7868925929069519, "learning_rate": 0.00017749838835997328, "loss": 1.712, "step": 8780 }, { "epoch": 2.1799243598487195, "grad_norm": 0.6954182386398315, "learning_rate": 0.0001774491231028951, "loss": 1.7006, "step": 8790 }, { "epoch": 2.1824043648087295, "grad_norm": 0.7828925251960754, "learning_rate": 0.00017739981082662276, "loss": 1.707, "step": 8800 }, { "epoch": 2.1848843697687395, "grad_norm": 0.8318265676498413, "learning_rate": 0.00017735045156109367, "loss": 1.7669, "step": 8810 }, { "epoch": 2.1873643747287494, "grad_norm": 0.844477653503418, "learning_rate": 0.0001773010453362737, "loss": 1.6718, "step": 8820 }, { "epoch": 2.1898443796887594, "grad_norm": 0.8850805759429932, "learning_rate": 0.0001772515921821573, "loss": 1.7371, "step": 8830 }, { "epoch": 2.1923243846487694, "grad_norm": 0.7837343811988831, "learning_rate": 0.00017720209212876737, "loss": 1.6949, "step": 8840 }, { "epoch": 2.1948043896087794, "grad_norm": 0.7297570109367371, "learning_rate": 0.00017715254520615523, "loss": 1.6592, "step": 8850 }, { "epoch": 2.197284394568789, "grad_norm": 0.8145079612731934, "learning_rate": 0.00017710295144440078, "loss": 1.7381, "step": 8860 }, { "epoch": 2.199764399528799, "grad_norm": 0.8012209534645081, "learning_rate": 0.00017705331087361224, "loss": 1.7826, "step": 8870 }, { "epoch": 2.202244404488809, "grad_norm": 0.7602465152740479, "learning_rate": 0.0001770036235239263, "loss": 1.7411, "step": 8880 }, { "epoch": 2.204724409448819, "grad_norm": 0.7813806533813477, "learning_rate": 0.00017695388942550805, "loss": 1.7317, "step": 8890 }, { "epoch": 2.207204414408829, "grad_norm": 0.8720101714134216, "learning_rate": 0.00017690410860855094, "loss": 1.66, "step": 8900 }, { "epoch": 2.2096844193688385, "grad_norm": 0.8317455649375916, "learning_rate": 0.00017685428110327683, "loss": 1.7734, "step": 8910 }, { "epoch": 2.2121644243288485, "grad_norm": 0.7609864473342896, "learning_rate": 0.00017680440693993585, "loss": 1.6641, "step": 8920 }, { "epoch": 2.2146444292888585, "grad_norm": 0.78978431224823, "learning_rate": 0.0001767544861488065, "loss": 1.6812, "step": 8930 }, { "epoch": 2.2171244342488685, "grad_norm": 0.7690613269805908, "learning_rate": 0.00017670451876019563, "loss": 1.7145, "step": 8940 }, { "epoch": 2.2196044392088785, "grad_norm": 0.7488145232200623, "learning_rate": 0.00017665450480443826, "loss": 1.7223, "step": 8950 }, { "epoch": 2.2220844441688885, "grad_norm": 0.766918420791626, "learning_rate": 0.0001766044443118978, "loss": 1.7422, "step": 8960 }, { "epoch": 2.224564449128898, "grad_norm": 0.8061556220054626, "learning_rate": 0.00017655433731296588, "loss": 1.7434, "step": 8970 }, { "epoch": 2.227044454088908, "grad_norm": 0.7330317497253418, "learning_rate": 0.00017650418383806233, "loss": 1.7188, "step": 8980 }, { "epoch": 2.229524459048918, "grad_norm": 0.822127640247345, "learning_rate": 0.00017645398391763516, "loss": 1.6602, "step": 8990 }, { "epoch": 2.232004464008928, "grad_norm": 0.8580800294876099, "learning_rate": 0.00017640373758216077, "loss": 1.7452, "step": 9000 }, { "epoch": 2.234484468968938, "grad_norm": 0.7564178705215454, "learning_rate": 0.00017635344486214346, "loss": 1.7382, "step": 9010 }, { "epoch": 2.236964473928948, "grad_norm": 0.7477293610572815, "learning_rate": 0.0001763031057881159, "loss": 1.7573, "step": 9020 }, { "epoch": 2.2394444788889576, "grad_norm": 0.7672526836395264, "learning_rate": 0.00017625272039063883, "loss": 1.7245, "step": 9030 }, { "epoch": 2.2419244838489676, "grad_norm": 0.8399450778961182, "learning_rate": 0.00017620228870030108, "loss": 1.6886, "step": 9040 }, { "epoch": 2.2444044888089776, "grad_norm": 0.8164432644844055, "learning_rate": 0.00017615181074771968, "loss": 1.6718, "step": 9050 }, { "epoch": 2.2468844937689876, "grad_norm": 0.77805495262146, "learning_rate": 0.00017610128656353962, "loss": 1.653, "step": 9060 }, { "epoch": 2.2493644987289976, "grad_norm": 0.817104697227478, "learning_rate": 0.00017605071617843407, "loss": 1.7316, "step": 9070 }, { "epoch": 2.2518445036890076, "grad_norm": 0.7542750835418701, "learning_rate": 0.00017600009962310417, "loss": 1.732, "step": 9080 }, { "epoch": 2.254324508649017, "grad_norm": 0.8358319997787476, "learning_rate": 0.00017594943692827915, "loss": 1.7192, "step": 9090 }, { "epoch": 2.256804513609027, "grad_norm": 0.7867502570152283, "learning_rate": 0.0001758987281247162, "loss": 1.7219, "step": 9100 }, { "epoch": 2.259284518569037, "grad_norm": 0.7503636479377747, "learning_rate": 0.00017584797324320052, "loss": 1.722, "step": 9110 }, { "epoch": 2.261764523529047, "grad_norm": 0.7286480069160461, "learning_rate": 0.0001757971723145453, "loss": 1.7113, "step": 9120 }, { "epoch": 2.264244528489057, "grad_norm": 0.9005977511405945, "learning_rate": 0.00017574632536959168, "loss": 1.7213, "step": 9130 }, { "epoch": 2.266724533449067, "grad_norm": 0.8166096806526184, "learning_rate": 0.00017569543243920873, "loss": 1.6483, "step": 9140 }, { "epoch": 2.2692045384090767, "grad_norm": 0.8187897801399231, "learning_rate": 0.0001756444935542934, "loss": 1.6985, "step": 9150 }, { "epoch": 2.2716845433690867, "grad_norm": 0.7724904417991638, "learning_rate": 0.00017559350874577065, "loss": 1.7356, "step": 9160 }, { "epoch": 2.2741645483290966, "grad_norm": 0.8105554580688477, "learning_rate": 0.00017554247804459316, "loss": 1.6206, "step": 9170 }, { "epoch": 2.2766445532891066, "grad_norm": 0.7334214448928833, "learning_rate": 0.0001754914014817416, "loss": 1.6594, "step": 9180 }, { "epoch": 2.2791245582491166, "grad_norm": 0.8149214386940002, "learning_rate": 0.00017544027908822444, "loss": 1.6853, "step": 9190 }, { "epoch": 2.281604563209126, "grad_norm": 0.7332905530929565, "learning_rate": 0.000175389110895078, "loss": 1.7218, "step": 9200 }, { "epoch": 2.284084568169136, "grad_norm": 0.743989884853363, "learning_rate": 0.00017533789693336633, "loss": 1.7004, "step": 9210 }, { "epoch": 2.286564573129146, "grad_norm": 0.7727570533752441, "learning_rate": 0.00017528663723418136, "loss": 1.6864, "step": 9220 }, { "epoch": 2.289044578089156, "grad_norm": 0.8129294514656067, "learning_rate": 0.0001752353318286427, "loss": 1.6946, "step": 9230 }, { "epoch": 2.291524583049166, "grad_norm": 0.9174445867538452, "learning_rate": 0.00017518398074789775, "loss": 1.7763, "step": 9240 }, { "epoch": 2.294004588009176, "grad_norm": 0.7691509127616882, "learning_rate": 0.00017513258402312168, "loss": 1.7373, "step": 9250 }, { "epoch": 2.296484592969186, "grad_norm": 0.7788468599319458, "learning_rate": 0.0001750811416855173, "loss": 1.6936, "step": 9260 }, { "epoch": 2.2989645979291957, "grad_norm": 0.7824164628982544, "learning_rate": 0.00017502965376631514, "loss": 1.743, "step": 9270 }, { "epoch": 2.3014446028892057, "grad_norm": 0.7765858173370361, "learning_rate": 0.00017497812029677344, "loss": 1.7117, "step": 9280 }, { "epoch": 2.3039246078492157, "grad_norm": 0.7741357088088989, "learning_rate": 0.000174926541308178, "loss": 1.7012, "step": 9290 }, { "epoch": 2.3064046128092257, "grad_norm": 0.8304239511489868, "learning_rate": 0.00017487491683184235, "loss": 1.6477, "step": 9300 }, { "epoch": 2.3088846177692357, "grad_norm": 0.9060750603675842, "learning_rate": 0.00017482324689910758, "loss": 1.7314, "step": 9310 }, { "epoch": 2.3113646227292453, "grad_norm": 0.7856857180595398, "learning_rate": 0.00017477153154134243, "loss": 1.7262, "step": 9320 }, { "epoch": 2.3138446276892553, "grad_norm": 0.7838661074638367, "learning_rate": 0.00017471977078994315, "loss": 1.7106, "step": 9330 }, { "epoch": 2.3163246326492652, "grad_norm": 0.7551782727241516, "learning_rate": 0.00017466796467633354, "loss": 1.767, "step": 9340 }, { "epoch": 2.3188046376092752, "grad_norm": 0.7626734375953674, "learning_rate": 0.00017461611323196506, "loss": 1.6888, "step": 9350 }, { "epoch": 2.3212846425692852, "grad_norm": 0.7644707560539246, "learning_rate": 0.00017456421648831655, "loss": 1.6997, "step": 9360 }, { "epoch": 2.3237646475292952, "grad_norm": 0.8020896911621094, "learning_rate": 0.00017451227447689446, "loss": 1.7241, "step": 9370 }, { "epoch": 2.326244652489305, "grad_norm": 0.8020631670951843, "learning_rate": 0.00017446028722923265, "loss": 1.689, "step": 9380 }, { "epoch": 2.328724657449315, "grad_norm": 0.8963233232498169, "learning_rate": 0.00017440825477689243, "loss": 1.6784, "step": 9390 }, { "epoch": 2.331204662409325, "grad_norm": 0.7478158473968506, "learning_rate": 0.00017435617715146263, "loss": 1.7703, "step": 9400 }, { "epoch": 2.3336846673693348, "grad_norm": 0.8088776469230652, "learning_rate": 0.0001743040543845594, "loss": 1.6268, "step": 9410 }, { "epoch": 2.3361646723293448, "grad_norm": 0.780013382434845, "learning_rate": 0.00017425188650782648, "loss": 1.6768, "step": 9420 }, { "epoch": 2.3386446772893548, "grad_norm": 0.7265764474868774, "learning_rate": 0.00017419967355293477, "loss": 1.7766, "step": 9430 }, { "epoch": 2.3411246822493643, "grad_norm": 0.8079309463500977, "learning_rate": 0.00017414741555158266, "loss": 1.7487, "step": 9440 }, { "epoch": 2.3436046872093743, "grad_norm": 0.7889912724494934, "learning_rate": 0.00017409511253549593, "loss": 1.7093, "step": 9450 }, { "epoch": 2.3460846921693843, "grad_norm": 0.8098340034484863, "learning_rate": 0.00017404276453642753, "loss": 1.6022, "step": 9460 }, { "epoch": 2.3485646971293943, "grad_norm": 0.7909839749336243, "learning_rate": 0.0001739903715861579, "loss": 1.757, "step": 9470 }, { "epoch": 2.3510447020894043, "grad_norm": 0.8547044396400452, "learning_rate": 0.00017393793371649462, "loss": 1.7027, "step": 9480 }, { "epoch": 2.353524707049414, "grad_norm": 0.7982743382453918, "learning_rate": 0.00017388545095927267, "loss": 1.7402, "step": 9490 }, { "epoch": 2.356004712009424, "grad_norm": 0.8027835488319397, "learning_rate": 0.0001738329233463542, "loss": 1.6997, "step": 9500 }, { "epoch": 2.358484716969434, "grad_norm": 0.9833059310913086, "learning_rate": 0.00017378035090962856, "loss": 1.6549, "step": 9510 }, { "epoch": 2.360964721929444, "grad_norm": 0.7589620351791382, "learning_rate": 0.0001737277336810124, "loss": 1.6797, "step": 9520 }, { "epoch": 2.363444726889454, "grad_norm": 0.8295498490333557, "learning_rate": 0.00017367507169244956, "loss": 1.8149, "step": 9530 }, { "epoch": 2.365924731849464, "grad_norm": 0.8063281178474426, "learning_rate": 0.00017362236497591094, "loss": 1.7239, "step": 9540 }, { "epoch": 2.368404736809474, "grad_norm": 0.747158944606781, "learning_rate": 0.00017356961356339477, "loss": 1.7549, "step": 9550 }, { "epoch": 2.3708847417694834, "grad_norm": 0.8062665462493896, "learning_rate": 0.0001735168174869262, "loss": 1.7706, "step": 9560 }, { "epoch": 2.3733647467294934, "grad_norm": 0.8283780813217163, "learning_rate": 0.0001734639767785577, "loss": 1.6389, "step": 9570 }, { "epoch": 2.3758447516895034, "grad_norm": 0.7413634061813354, "learning_rate": 0.00017341109147036874, "loss": 1.7445, "step": 9580 }, { "epoch": 2.3783247566495134, "grad_norm": 0.8234320878982544, "learning_rate": 0.00017335816159446584, "loss": 1.6611, "step": 9590 }, { "epoch": 2.3808047616095234, "grad_norm": 0.7659749984741211, "learning_rate": 0.00017330518718298264, "loss": 1.6859, "step": 9600 }, { "epoch": 2.383284766569533, "grad_norm": 0.8007732033729553, "learning_rate": 0.0001732521682680798, "loss": 1.7249, "step": 9610 }, { "epoch": 2.385764771529543, "grad_norm": 0.7565622329711914, "learning_rate": 0.00017319910488194492, "loss": 1.7031, "step": 9620 }, { "epoch": 2.388244776489553, "grad_norm": 0.7995969653129578, "learning_rate": 0.00017314599705679277, "loss": 1.7743, "step": 9630 }, { "epoch": 2.390724781449563, "grad_norm": 0.7712788581848145, "learning_rate": 0.00017309284482486495, "loss": 1.7556, "step": 9640 }, { "epoch": 2.393204786409573, "grad_norm": 0.846427857875824, "learning_rate": 0.00017303964821843002, "loss": 1.7141, "step": 9650 }, { "epoch": 2.395684791369583, "grad_norm": 0.7683253884315491, "learning_rate": 0.00017298640726978357, "loss": 1.7192, "step": 9660 }, { "epoch": 2.398164796329593, "grad_norm": 0.797095537185669, "learning_rate": 0.00017293312201124806, "loss": 1.7032, "step": 9670 }, { "epoch": 2.4006448012896024, "grad_norm": 0.7995537519454956, "learning_rate": 0.00017287979247517286, "loss": 1.7381, "step": 9680 }, { "epoch": 2.4031248062496124, "grad_norm": 0.8047399520874023, "learning_rate": 0.00017282641869393418, "loss": 1.7271, "step": 9690 }, { "epoch": 2.4056048112096224, "grad_norm": 0.7926669120788574, "learning_rate": 0.00017277300069993515, "loss": 1.7047, "step": 9700 }, { "epoch": 2.4080848161696324, "grad_norm": 0.7393646240234375, "learning_rate": 0.00017271953852560575, "loss": 1.7405, "step": 9710 }, { "epoch": 2.4105648211296424, "grad_norm": 0.7627696990966797, "learning_rate": 0.0001726660322034027, "loss": 1.6729, "step": 9720 }, { "epoch": 2.413044826089652, "grad_norm": 0.794486403465271, "learning_rate": 0.00017261248176580958, "loss": 1.6695, "step": 9730 }, { "epoch": 2.415524831049662, "grad_norm": 0.8272292017936707, "learning_rate": 0.0001725588872453368, "loss": 1.7694, "step": 9740 }, { "epoch": 2.418004836009672, "grad_norm": 0.7959834337234497, "learning_rate": 0.0001725052486745214, "loss": 1.6821, "step": 9750 }, { "epoch": 2.420484840969682, "grad_norm": 0.8304336071014404, "learning_rate": 0.00017245156608592727, "loss": 1.702, "step": 9760 }, { "epoch": 2.422964845929692, "grad_norm": 0.7784603238105774, "learning_rate": 0.00017239783951214502, "loss": 1.7451, "step": 9770 }, { "epoch": 2.425444850889702, "grad_norm": 0.8049282431602478, "learning_rate": 0.00017234406898579188, "loss": 1.7577, "step": 9780 }, { "epoch": 2.4279248558497115, "grad_norm": 0.7931556105613708, "learning_rate": 0.00017229025453951183, "loss": 1.705, "step": 9790 }, { "epoch": 2.4304048608097215, "grad_norm": 0.7905335426330566, "learning_rate": 0.00017223639620597556, "loss": 1.7001, "step": 9800 }, { "epoch": 2.4328848657697315, "grad_norm": 0.785211443901062, "learning_rate": 0.00017218249401788031, "loss": 1.7204, "step": 9810 }, { "epoch": 2.4353648707297415, "grad_norm": 0.7745071649551392, "learning_rate": 0.00017212854800794996, "loss": 1.7019, "step": 9820 }, { "epoch": 2.4378448756897515, "grad_norm": 0.7807512879371643, "learning_rate": 0.00017207455820893505, "loss": 1.7346, "step": 9830 }, { "epoch": 2.4403248806497615, "grad_norm": 0.7244405746459961, "learning_rate": 0.00017202052465361268, "loss": 1.7758, "step": 9840 }, { "epoch": 2.442804885609771, "grad_norm": 0.7513141632080078, "learning_rate": 0.00017196644737478645, "loss": 1.7316, "step": 9850 }, { "epoch": 2.445284890569781, "grad_norm": 0.7699015736579895, "learning_rate": 0.0001719123264052866, "loss": 1.7933, "step": 9860 }, { "epoch": 2.447764895529791, "grad_norm": 0.7975505590438843, "learning_rate": 0.0001718581617779698, "loss": 1.6899, "step": 9870 }, { "epoch": 2.450244900489801, "grad_norm": 0.7789102792739868, "learning_rate": 0.0001718039535257194, "loss": 1.7527, "step": 9880 }, { "epoch": 2.452724905449811, "grad_norm": 0.8182029128074646, "learning_rate": 0.00017174970168144495, "loss": 1.7485, "step": 9890 }, { "epoch": 2.4552049104098206, "grad_norm": 0.7718999981880188, "learning_rate": 0.00017169540627808274, "loss": 1.7086, "step": 9900 }, { "epoch": 2.4576849153698306, "grad_norm": 0.7620275616645813, "learning_rate": 0.00017164106734859536, "loss": 1.7243, "step": 9910 }, { "epoch": 2.4601649203298406, "grad_norm": 0.8015621304512024, "learning_rate": 0.00017158668492597186, "loss": 1.6978, "step": 9920 }, { "epoch": 2.4626449252898506, "grad_norm": 0.7627947330474854, "learning_rate": 0.00017153225904322766, "loss": 1.6888, "step": 9930 }, { "epoch": 2.4651249302498606, "grad_norm": 0.738109827041626, "learning_rate": 0.00017147778973340466, "loss": 1.7365, "step": 9940 }, { "epoch": 2.4676049352098706, "grad_norm": 0.7651798129081726, "learning_rate": 0.000171423277029571, "loss": 1.7147, "step": 9950 }, { "epoch": 2.4700849401698806, "grad_norm": 0.8257949352264404, "learning_rate": 0.0001713687209648212, "loss": 1.704, "step": 9960 }, { "epoch": 2.47256494512989, "grad_norm": 0.7816750407218933, "learning_rate": 0.00017131412157227625, "loss": 1.7019, "step": 9970 }, { "epoch": 2.4750449500899, "grad_norm": 0.8303975462913513, "learning_rate": 0.0001712594788850832, "loss": 1.7225, "step": 9980 }, { "epoch": 2.47752495504991, "grad_norm": 0.8580611944198608, "learning_rate": 0.00017120479293641555, "loss": 1.7306, "step": 9990 }, { "epoch": 2.48000496000992, "grad_norm": 0.7864581346511841, "learning_rate": 0.00017115006375947303, "loss": 1.6757, "step": 10000 }, { "epoch": 2.48248496496993, "grad_norm": 0.7828495502471924, "learning_rate": 0.00017109529138748157, "loss": 1.6971, "step": 10010 }, { "epoch": 2.4849649699299396, "grad_norm": 0.7776135206222534, "learning_rate": 0.00017104047585369343, "loss": 1.7392, "step": 10020 }, { "epoch": 2.4874449748899496, "grad_norm": 0.8050247430801392, "learning_rate": 0.00017098561719138694, "loss": 1.7947, "step": 10030 }, { "epoch": 2.4899249798499596, "grad_norm": 0.7671525478363037, "learning_rate": 0.00017093071543386668, "loss": 1.7134, "step": 10040 }, { "epoch": 2.4924049848099696, "grad_norm": 0.824383020401001, "learning_rate": 0.00017087577061446337, "loss": 1.7234, "step": 10050 }, { "epoch": 2.4948849897699796, "grad_norm": 0.74275803565979, "learning_rate": 0.00017082078276653393, "loss": 1.7163, "step": 10060 }, { "epoch": 2.4973649947299896, "grad_norm": 0.7931815981864929, "learning_rate": 0.00017076575192346137, "loss": 1.7078, "step": 10070 }, { "epoch": 2.4998449996899996, "grad_norm": 0.7598164081573486, "learning_rate": 0.00017071067811865476, "loss": 1.7395, "step": 10080 }, { "epoch": 2.502325004650009, "grad_norm": 0.8434139490127563, "learning_rate": 0.0001706555613855493, "loss": 1.6828, "step": 10090 }, { "epoch": 2.504805009610019, "grad_norm": 0.740534245967865, "learning_rate": 0.00017060040175760624, "loss": 1.75, "step": 10100 }, { "epoch": 2.507285014570029, "grad_norm": 0.8243404626846313, "learning_rate": 0.0001705451992683129, "loss": 1.6813, "step": 10110 }, { "epoch": 2.509765019530039, "grad_norm": 0.8587687611579895, "learning_rate": 0.00017048995395118252, "loss": 1.6835, "step": 10120 }, { "epoch": 2.512245024490049, "grad_norm": 0.7877902388572693, "learning_rate": 0.00017043466583975447, "loss": 1.6899, "step": 10130 }, { "epoch": 2.5147250294500587, "grad_norm": 0.8048937916755676, "learning_rate": 0.00017037933496759404, "loss": 1.7113, "step": 10140 }, { "epoch": 2.5172050344100687, "grad_norm": 0.8216809630393982, "learning_rate": 0.00017032396136829246, "loss": 1.7106, "step": 10150 }, { "epoch": 2.5196850393700787, "grad_norm": 0.8559943437576294, "learning_rate": 0.00017026854507546692, "loss": 1.7434, "step": 10160 }, { "epoch": 2.5221650443300887, "grad_norm": 0.8496769070625305, "learning_rate": 0.00017021308612276054, "loss": 1.7544, "step": 10170 }, { "epoch": 2.5246450492900987, "grad_norm": 0.7176957130432129, "learning_rate": 0.00017015758454384233, "loss": 1.6993, "step": 10180 }, { "epoch": 2.5271250542501082, "grad_norm": 0.788009524345398, "learning_rate": 0.00017010204037240718, "loss": 1.806, "step": 10190 }, { "epoch": 2.5296050592101187, "grad_norm": 0.7690684795379639, "learning_rate": 0.00017004645364217583, "loss": 1.7622, "step": 10200 }, { "epoch": 2.5320850641701282, "grad_norm": 0.8179535269737244, "learning_rate": 0.0001699908243868949, "loss": 1.7159, "step": 10210 }, { "epoch": 2.5345650691301382, "grad_norm": 0.8218116760253906, "learning_rate": 0.00016993515264033672, "loss": 1.7361, "step": 10220 }, { "epoch": 2.5370450740901482, "grad_norm": 0.8178150057792664, "learning_rate": 0.00016987943843629953, "loss": 1.7788, "step": 10230 }, { "epoch": 2.5395250790501582, "grad_norm": 0.7793101668357849, "learning_rate": 0.00016982368180860728, "loss": 1.7611, "step": 10240 }, { "epoch": 2.5420050840101682, "grad_norm": 0.8476938605308533, "learning_rate": 0.00016976788279110975, "loss": 1.7419, "step": 10250 }, { "epoch": 2.5444850889701778, "grad_norm": 0.8483054041862488, "learning_rate": 0.00016971204141768233, "loss": 1.7384, "step": 10260 }, { "epoch": 2.5469650939301878, "grad_norm": 0.7705952525138855, "learning_rate": 0.00016965615772222625, "loss": 1.7456, "step": 10270 }, { "epoch": 2.5494450988901978, "grad_norm": 0.8043239712715149, "learning_rate": 0.00016960023173866835, "loss": 1.747, "step": 10280 }, { "epoch": 2.5519251038502078, "grad_norm": 0.8316717147827148, "learning_rate": 0.00016954426350096116, "loss": 1.7278, "step": 10290 }, { "epoch": 2.5544051088102178, "grad_norm": 0.7975428104400635, "learning_rate": 0.00016948825304308293, "loss": 1.7185, "step": 10300 }, { "epoch": 2.5568851137702273, "grad_norm": 0.8466578722000122, "learning_rate": 0.00016943220039903744, "loss": 1.7281, "step": 10310 }, { "epoch": 2.5593651187302373, "grad_norm": 0.784413754940033, "learning_rate": 0.00016937610560285418, "loss": 1.7667, "step": 10320 }, { "epoch": 2.5618451236902473, "grad_norm": 0.7354859113693237, "learning_rate": 0.0001693199686885881, "loss": 1.6981, "step": 10330 }, { "epoch": 2.5643251286502573, "grad_norm": 0.830909013748169, "learning_rate": 0.00016926378969031987, "loss": 1.7656, "step": 10340 }, { "epoch": 2.5668051336102673, "grad_norm": 0.7383814454078674, "learning_rate": 0.00016920756864215557, "loss": 1.7628, "step": 10350 }, { "epoch": 2.5692851385702773, "grad_norm": 0.7582780122756958, "learning_rate": 0.00016915130557822695, "loss": 1.7609, "step": 10360 }, { "epoch": 2.5717651435302873, "grad_norm": 0.8444397449493408, "learning_rate": 0.00016909500053269113, "loss": 1.7922, "step": 10370 }, { "epoch": 2.574245148490297, "grad_norm": 0.8196545243263245, "learning_rate": 0.00016903865353973084, "loss": 1.7598, "step": 10380 }, { "epoch": 2.576725153450307, "grad_norm": 0.8253687024116516, "learning_rate": 0.00016898226463355417, "loss": 1.7142, "step": 10390 }, { "epoch": 2.579205158410317, "grad_norm": 0.93372642993927, "learning_rate": 0.0001689258338483947, "loss": 1.7048, "step": 10400 }, { "epoch": 2.581685163370327, "grad_norm": 0.8133403658866882, "learning_rate": 0.0001688693612185115, "loss": 1.7031, "step": 10410 }, { "epoch": 2.584165168330337, "grad_norm": 0.7580595016479492, "learning_rate": 0.00016881284677818892, "loss": 1.7277, "step": 10420 }, { "epoch": 2.5866451732903464, "grad_norm": 0.7730793952941895, "learning_rate": 0.00016875629056173675, "loss": 1.7465, "step": 10430 }, { "epoch": 2.5891251782503564, "grad_norm": 0.8139658570289612, "learning_rate": 0.00016869969260349018, "loss": 1.7008, "step": 10440 }, { "epoch": 2.5916051832103664, "grad_norm": 0.7631027102470398, "learning_rate": 0.00016864305293780966, "loss": 1.8093, "step": 10450 }, { "epoch": 2.5940851881703764, "grad_norm": 0.7781238555908203, "learning_rate": 0.00016858637159908107, "loss": 1.7592, "step": 10460 }, { "epoch": 2.5965651931303864, "grad_norm": 0.7929747104644775, "learning_rate": 0.00016852964862171553, "loss": 1.6993, "step": 10470 }, { "epoch": 2.599045198090396, "grad_norm": 0.828659176826477, "learning_rate": 0.00016847288404014935, "loss": 1.6902, "step": 10480 }, { "epoch": 2.6015252030504064, "grad_norm": 0.8091562986373901, "learning_rate": 0.00016841607788884425, "loss": 1.7654, "step": 10490 }, { "epoch": 2.604005208010416, "grad_norm": 0.7710474729537964, "learning_rate": 0.00016835923020228712, "loss": 1.7146, "step": 10500 }, { "epoch": 2.606485212970426, "grad_norm": 0.8594821095466614, "learning_rate": 0.00016830234101499007, "loss": 1.7195, "step": 10510 }, { "epoch": 2.608965217930436, "grad_norm": 0.7700628638267517, "learning_rate": 0.00016824541036149037, "loss": 1.6829, "step": 10520 }, { "epoch": 2.611445222890446, "grad_norm": 0.852709174156189, "learning_rate": 0.0001681884382763505, "loss": 1.7721, "step": 10530 }, { "epoch": 2.613925227850456, "grad_norm": 0.8819437623023987, "learning_rate": 0.00016813142479415812, "loss": 1.6806, "step": 10540 }, { "epoch": 2.6164052328104654, "grad_norm": 0.8042049407958984, "learning_rate": 0.000168074369949526, "loss": 1.712, "step": 10550 }, { "epoch": 2.6188852377704754, "grad_norm": 0.8049604296684265, "learning_rate": 0.00016801727377709194, "loss": 1.7204, "step": 10560 }, { "epoch": 2.6213652427304854, "grad_norm": 0.8116569519042969, "learning_rate": 0.00016796013631151897, "loss": 1.684, "step": 10570 }, { "epoch": 2.6238452476904954, "grad_norm": 0.718235969543457, "learning_rate": 0.0001679029575874951, "loss": 1.7264, "step": 10580 }, { "epoch": 2.6263252526505054, "grad_norm": 0.9108774065971375, "learning_rate": 0.0001678457376397334, "loss": 1.7857, "step": 10590 }, { "epoch": 2.628805257610515, "grad_norm": 0.7946862578392029, "learning_rate": 0.00016778847650297197, "loss": 1.6848, "step": 10600 }, { "epoch": 2.6312852625705254, "grad_norm": 0.8489319682121277, "learning_rate": 0.00016773117421197396, "loss": 1.7319, "step": 10610 }, { "epoch": 2.633765267530535, "grad_norm": 0.7626186609268188, "learning_rate": 0.00016767383080152742, "loss": 1.7554, "step": 10620 }, { "epoch": 2.636245272490545, "grad_norm": 0.8287044763565063, "learning_rate": 0.00016761644630644546, "loss": 1.7215, "step": 10630 }, { "epoch": 2.638725277450555, "grad_norm": 0.769493043422699, "learning_rate": 0.00016755902076156604, "loss": 1.7282, "step": 10640 }, { "epoch": 2.641205282410565, "grad_norm": 0.770527184009552, "learning_rate": 0.00016750155420175208, "loss": 1.7322, "step": 10650 }, { "epoch": 2.643685287370575, "grad_norm": 0.8423058986663818, "learning_rate": 0.00016744404666189144, "loss": 1.7149, "step": 10660 }, { "epoch": 2.6461652923305845, "grad_norm": 0.8349190354347229, "learning_rate": 0.0001673864981768968, "loss": 1.743, "step": 10670 }, { "epoch": 2.6486452972905945, "grad_norm": 0.8283718824386597, "learning_rate": 0.0001673289087817057, "loss": 1.7103, "step": 10680 }, { "epoch": 2.6511253022506045, "grad_norm": 0.7439224123954773, "learning_rate": 0.00016727127851128056, "loss": 1.7867, "step": 10690 }, { "epoch": 2.6536053072106145, "grad_norm": 0.8084254264831543, "learning_rate": 0.00016721360740060864, "loss": 1.7138, "step": 10700 }, { "epoch": 2.6560853121706245, "grad_norm": 0.7427732944488525, "learning_rate": 0.00016715589548470185, "loss": 1.7044, "step": 10710 }, { "epoch": 2.658565317130634, "grad_norm": 0.7997560501098633, "learning_rate": 0.00016709814279859702, "loss": 1.6955, "step": 10720 }, { "epoch": 2.661045322090644, "grad_norm": 0.8720448017120361, "learning_rate": 0.0001670403493773557, "loss": 1.7229, "step": 10730 }, { "epoch": 2.663525327050654, "grad_norm": 0.8524209856987, "learning_rate": 0.0001669825152560641, "loss": 1.7147, "step": 10740 }, { "epoch": 2.666005332010664, "grad_norm": 0.7974799871444702, "learning_rate": 0.00016692464046983322, "loss": 1.7396, "step": 10750 }, { "epoch": 2.668485336970674, "grad_norm": 0.7441201210021973, "learning_rate": 0.0001668667250537987, "loss": 1.7124, "step": 10760 }, { "epoch": 2.670965341930684, "grad_norm": 0.8221575617790222, "learning_rate": 0.0001668087690431209, "loss": 1.7115, "step": 10770 }, { "epoch": 2.673445346890694, "grad_norm": 0.799111545085907, "learning_rate": 0.00016675077247298474, "loss": 1.727, "step": 10780 }, { "epoch": 2.6759253518507036, "grad_norm": 0.8170874714851379, "learning_rate": 0.00016669273537859986, "loss": 1.6521, "step": 10790 }, { "epoch": 2.6784053568107136, "grad_norm": 0.8116423487663269, "learning_rate": 0.0001666346577952004, "loss": 1.6758, "step": 10800 }, { "epoch": 2.6808853617707236, "grad_norm": 0.7921115159988403, "learning_rate": 0.00016657653975804516, "loss": 1.6891, "step": 10810 }, { "epoch": 2.6833653667307336, "grad_norm": 0.8088225722312927, "learning_rate": 0.0001665183813024175, "loss": 1.7268, "step": 10820 }, { "epoch": 2.6858453716907436, "grad_norm": 0.8141648173332214, "learning_rate": 0.00016646018246362527, "loss": 1.7644, "step": 10830 }, { "epoch": 2.688325376650753, "grad_norm": 0.8276552557945251, "learning_rate": 0.00016640194327700086, "loss": 1.7645, "step": 10840 }, { "epoch": 2.690805381610763, "grad_norm": 0.7764387130737305, "learning_rate": 0.00016634366377790114, "loss": 1.6971, "step": 10850 }, { "epoch": 2.693285386570773, "grad_norm": 0.9159328937530518, "learning_rate": 0.00016628534400170745, "loss": 1.6994, "step": 10860 }, { "epoch": 2.695765391530783, "grad_norm": 0.8896689414978027, "learning_rate": 0.00016622698398382566, "loss": 1.7405, "step": 10870 }, { "epoch": 2.698245396490793, "grad_norm": 0.7660658359527588, "learning_rate": 0.00016616858375968595, "loss": 1.7525, "step": 10880 }, { "epoch": 2.7007254014508026, "grad_norm": 0.7988965511322021, "learning_rate": 0.00016611014336474303, "loss": 1.7569, "step": 10890 }, { "epoch": 2.703205406410813, "grad_norm": 0.7468309998512268, "learning_rate": 0.00016605166283447585, "loss": 1.7449, "step": 10900 }, { "epoch": 2.7056854113708226, "grad_norm": 0.7522343993186951, "learning_rate": 0.00016599314220438794, "loss": 1.7165, "step": 10910 }, { "epoch": 2.7081654163308326, "grad_norm": 0.8288154006004333, "learning_rate": 0.00016593458151000688, "loss": 1.7, "step": 10920 }, { "epoch": 2.7106454212908426, "grad_norm": 0.7801446914672852, "learning_rate": 0.0001658759807868849, "loss": 1.6664, "step": 10930 }, { "epoch": 2.7131254262508526, "grad_norm": 0.8585086464881897, "learning_rate": 0.00016581734007059827, "loss": 1.7183, "step": 10940 }, { "epoch": 2.7156054312108626, "grad_norm": 0.818763017654419, "learning_rate": 0.00016575865939674772, "loss": 1.7009, "step": 10950 }, { "epoch": 2.718085436170872, "grad_norm": 0.7211923003196716, "learning_rate": 0.00016569993880095806, "loss": 1.6735, "step": 10960 }, { "epoch": 2.720565441130882, "grad_norm": 0.7824010252952576, "learning_rate": 0.00016564117831887852, "loss": 1.6974, "step": 10970 }, { "epoch": 2.723045446090892, "grad_norm": 0.8001604080200195, "learning_rate": 0.00016558237798618245, "loss": 1.6868, "step": 10980 }, { "epoch": 2.725525451050902, "grad_norm": 0.7506614327430725, "learning_rate": 0.00016552353783856732, "loss": 1.6618, "step": 10990 }, { "epoch": 2.728005456010912, "grad_norm": 0.774455189704895, "learning_rate": 0.00016546465791175496, "loss": 1.7212, "step": 11000 }, { "epoch": 2.7304854609709217, "grad_norm": 0.7329303622245789, "learning_rate": 0.00016540573824149122, "loss": 1.7457, "step": 11010 }, { "epoch": 2.7329654659309317, "grad_norm": 0.7539793252944946, "learning_rate": 0.00016534677886354606, "loss": 1.7019, "step": 11020 }, { "epoch": 2.7354454708909417, "grad_norm": 0.8947096467018127, "learning_rate": 0.00016528777981371365, "loss": 1.7468, "step": 11030 }, { "epoch": 2.7379254758509517, "grad_norm": 0.7925174832344055, "learning_rate": 0.00016522874112781213, "loss": 1.702, "step": 11040 }, { "epoch": 2.7404054808109617, "grad_norm": 0.7956472635269165, "learning_rate": 0.00016516966284168378, "loss": 1.7347, "step": 11050 }, { "epoch": 2.7428854857709717, "grad_norm": 0.7949102520942688, "learning_rate": 0.0001651105449911949, "loss": 1.7275, "step": 11060 }, { "epoch": 2.7453654907309817, "grad_norm": 0.8082578182220459, "learning_rate": 0.00016505138761223584, "loss": 1.7234, "step": 11070 }, { "epoch": 2.7478454956909912, "grad_norm": 0.7731408476829529, "learning_rate": 0.00016499219074072086, "loss": 1.7376, "step": 11080 }, { "epoch": 2.7503255006510012, "grad_norm": 0.8738985061645508, "learning_rate": 0.00016493295441258828, "loss": 1.8141, "step": 11090 }, { "epoch": 2.7528055056110112, "grad_norm": 0.7660444974899292, "learning_rate": 0.00016487367866380037, "loss": 1.6608, "step": 11100 }, { "epoch": 2.7552855105710212, "grad_norm": 0.8163630366325378, "learning_rate": 0.00016481436353034325, "loss": 1.7539, "step": 11110 }, { "epoch": 2.757765515531031, "grad_norm": 0.8122231960296631, "learning_rate": 0.00016475500904822706, "loss": 1.714, "step": 11120 }, { "epoch": 2.7602455204910408, "grad_norm": 0.7877174615859985, "learning_rate": 0.00016469561525348577, "loss": 1.7644, "step": 11130 }, { "epoch": 2.7627255254510508, "grad_norm": 0.8161805272102356, "learning_rate": 0.00016463618218217718, "loss": 1.6885, "step": 11140 }, { "epoch": 2.7652055304110608, "grad_norm": 0.7412929534912109, "learning_rate": 0.00016457670987038298, "loss": 1.6591, "step": 11150 }, { "epoch": 2.7676855353710708, "grad_norm": 0.8340774774551392, "learning_rate": 0.00016451719835420877, "loss": 1.65, "step": 11160 }, { "epoch": 2.7701655403310808, "grad_norm": 0.8444342613220215, "learning_rate": 0.00016445764766978379, "loss": 1.7755, "step": 11170 }, { "epoch": 2.7726455452910903, "grad_norm": 0.9029738306999207, "learning_rate": 0.00016439805785326112, "loss": 1.688, "step": 11180 }, { "epoch": 2.7751255502511007, "grad_norm": 0.8989600539207458, "learning_rate": 0.00016433842894081764, "loss": 1.7138, "step": 11190 }, { "epoch": 2.7776055552111103, "grad_norm": 0.8586159348487854, "learning_rate": 0.00016427876096865394, "loss": 1.7075, "step": 11200 }, { "epoch": 2.7800855601711203, "grad_norm": 0.7890956401824951, "learning_rate": 0.0001642190539729943, "loss": 1.7397, "step": 11210 }, { "epoch": 2.7825655651311303, "grad_norm": 0.8047048449516296, "learning_rate": 0.00016415930799008668, "loss": 1.7553, "step": 11220 }, { "epoch": 2.7850455700911403, "grad_norm": 0.777072548866272, "learning_rate": 0.0001640995230562028, "loss": 1.767, "step": 11230 }, { "epoch": 2.7875255750511503, "grad_norm": 0.7799883484840393, "learning_rate": 0.00016403969920763788, "loss": 1.6753, "step": 11240 }, { "epoch": 2.79000558001116, "grad_norm": 0.8299691081047058, "learning_rate": 0.00016397983648071095, "loss": 1.7417, "step": 11250 }, { "epoch": 2.79248558497117, "grad_norm": 0.7810803651809692, "learning_rate": 0.00016391993491176446, "loss": 1.7456, "step": 11260 }, { "epoch": 2.79496558993118, "grad_norm": 0.7570012807846069, "learning_rate": 0.00016385999453716454, "loss": 1.6847, "step": 11270 }, { "epoch": 2.79744559489119, "grad_norm": 0.7709137201309204, "learning_rate": 0.0001638000153933009, "loss": 1.7746, "step": 11280 }, { "epoch": 2.7999255998512, "grad_norm": 0.9917616248130798, "learning_rate": 0.00016373999751658665, "loss": 1.7129, "step": 11290 }, { "epoch": 2.8024056048112094, "grad_norm": 0.8247378468513489, "learning_rate": 0.00016367994094345864, "loss": 1.7178, "step": 11300 }, { "epoch": 2.80488560977122, "grad_norm": 0.8372973203659058, "learning_rate": 0.000163619845710377, "loss": 1.6927, "step": 11310 }, { "epoch": 2.8073656147312294, "grad_norm": 0.8099014759063721, "learning_rate": 0.00016355971185382545, "loss": 1.7366, "step": 11320 }, { "epoch": 2.8098456196912394, "grad_norm": 0.7249281406402588, "learning_rate": 0.00016349953941031112, "loss": 1.7108, "step": 11330 }, { "epoch": 2.8123256246512494, "grad_norm": 0.8013809323310852, "learning_rate": 0.00016343932841636456, "loss": 1.7111, "step": 11340 }, { "epoch": 2.8148056296112594, "grad_norm": 0.8951674103736877, "learning_rate": 0.00016337907890853973, "loss": 1.7322, "step": 11350 }, { "epoch": 2.8172856345712693, "grad_norm": 0.7558394074440002, "learning_rate": 0.000163318790923414, "loss": 1.8479, "step": 11360 }, { "epoch": 2.819765639531279, "grad_norm": 0.7628364562988281, "learning_rate": 0.00016325846449758805, "loss": 1.7534, "step": 11370 }, { "epoch": 2.822245644491289, "grad_norm": 0.8447631597518921, "learning_rate": 0.0001631980996676859, "loss": 1.7115, "step": 11380 }, { "epoch": 2.824725649451299, "grad_norm": 0.7334152460098267, "learning_rate": 0.00016313769647035496, "loss": 1.7544, "step": 11390 }, { "epoch": 2.827205654411309, "grad_norm": 0.8144456148147583, "learning_rate": 0.00016307725494226584, "loss": 1.6882, "step": 11400 }, { "epoch": 2.829685659371319, "grad_norm": 0.8416145443916321, "learning_rate": 0.00016301677512011248, "loss": 1.7176, "step": 11410 }, { "epoch": 2.8321656643313284, "grad_norm": 0.8626994490623474, "learning_rate": 0.00016295625704061204, "loss": 1.7225, "step": 11420 }, { "epoch": 2.8346456692913384, "grad_norm": 0.8473905920982361, "learning_rate": 0.00016289570074050493, "loss": 1.6883, "step": 11430 }, { "epoch": 2.8371256742513484, "grad_norm": 0.7284490466117859, "learning_rate": 0.00016283510625655472, "loss": 1.6837, "step": 11440 }, { "epoch": 2.8396056792113584, "grad_norm": 0.8658609986305237, "learning_rate": 0.00016277447362554823, "loss": 1.7612, "step": 11450 }, { "epoch": 2.8420856841713684, "grad_norm": 0.7673606872558594, "learning_rate": 0.00016271380288429533, "loss": 1.7423, "step": 11460 }, { "epoch": 2.8445656891313784, "grad_norm": 0.8407950401306152, "learning_rate": 0.0001626530940696292, "loss": 1.7509, "step": 11470 }, { "epoch": 2.8470456940913884, "grad_norm": 0.8333432078361511, "learning_rate": 0.00016259234721840591, "loss": 1.6779, "step": 11480 }, { "epoch": 2.849525699051398, "grad_norm": 0.7581879496574402, "learning_rate": 0.00016253156236750485, "loss": 1.7077, "step": 11490 }, { "epoch": 2.852005704011408, "grad_norm": 0.7580318450927734, "learning_rate": 0.0001624707395538283, "loss": 1.7709, "step": 11500 }, { "epoch": 2.854485708971418, "grad_norm": 0.7712346911430359, "learning_rate": 0.00016240987881430163, "loss": 1.7841, "step": 11510 }, { "epoch": 2.856965713931428, "grad_norm": 0.7301474809646606, "learning_rate": 0.00016234898018587337, "loss": 1.6845, "step": 11520 }, { "epoch": 2.859445718891438, "grad_norm": 0.7858560681343079, "learning_rate": 0.00016228804370551486, "loss": 1.7104, "step": 11530 }, { "epoch": 2.8619257238514475, "grad_norm": 0.7861492037773132, "learning_rate": 0.00016222706941022055, "loss": 1.7292, "step": 11540 }, { "epoch": 2.8644057288114575, "grad_norm": 0.8644739389419556, "learning_rate": 0.00016216605733700775, "loss": 1.7029, "step": 11550 }, { "epoch": 2.8668857337714675, "grad_norm": 0.801563560962677, "learning_rate": 0.0001621050075229168, "loss": 1.7611, "step": 11560 }, { "epoch": 2.8693657387314775, "grad_norm": 0.768110454082489, "learning_rate": 0.00016204392000501093, "loss": 1.7673, "step": 11570 }, { "epoch": 2.8718457436914875, "grad_norm": 0.8002123832702637, "learning_rate": 0.00016198279482037618, "loss": 1.7176, "step": 11580 }, { "epoch": 2.874325748651497, "grad_norm": 0.8760389685630798, "learning_rate": 0.00016192163200612155, "loss": 1.734, "step": 11590 }, { "epoch": 2.8768057536115075, "grad_norm": 0.7727819681167603, "learning_rate": 0.00016186043159937882, "loss": 1.7203, "step": 11600 }, { "epoch": 2.879285758571517, "grad_norm": 0.7959619164466858, "learning_rate": 0.00016179919363730266, "loss": 1.788, "step": 11610 }, { "epoch": 2.881765763531527, "grad_norm": 0.8371580839157104, "learning_rate": 0.00016173791815707051, "loss": 1.725, "step": 11620 }, { "epoch": 2.884245768491537, "grad_norm": 0.7960420846939087, "learning_rate": 0.00016167660519588256, "loss": 1.7278, "step": 11630 }, { "epoch": 2.886725773451547, "grad_norm": 0.7642151117324829, "learning_rate": 0.00016161525479096178, "loss": 1.7459, "step": 11640 }, { "epoch": 2.889205778411557, "grad_norm": 0.8472262620925903, "learning_rate": 0.00016155386697955387, "loss": 1.7484, "step": 11650 }, { "epoch": 2.8916857833715666, "grad_norm": 0.7980145812034607, "learning_rate": 0.0001614924417989272, "loss": 1.7002, "step": 11660 }, { "epoch": 2.8941657883315766, "grad_norm": 0.888417661190033, "learning_rate": 0.0001614309792863729, "loss": 1.6796, "step": 11670 }, { "epoch": 2.8966457932915866, "grad_norm": 0.8283897042274475, "learning_rate": 0.00016136947947920476, "loss": 1.7446, "step": 11680 }, { "epoch": 2.8991257982515966, "grad_norm": 0.7665685415267944, "learning_rate": 0.0001613079424147591, "loss": 1.8176, "step": 11690 }, { "epoch": 2.9016058032116065, "grad_norm": 0.7955074310302734, "learning_rate": 0.00016124636813039502, "loss": 1.7458, "step": 11700 }, { "epoch": 2.904085808171616, "grad_norm": 0.7898004651069641, "learning_rate": 0.00016118475666349408, "loss": 1.6708, "step": 11710 }, { "epoch": 2.906565813131626, "grad_norm": 0.8602806329727173, "learning_rate": 0.0001611231080514605, "loss": 1.7355, "step": 11720 }, { "epoch": 2.909045818091636, "grad_norm": 0.7743834853172302, "learning_rate": 0.00016106142233172097, "loss": 1.719, "step": 11730 }, { "epoch": 2.911525823051646, "grad_norm": 0.8028146624565125, "learning_rate": 0.0001609996995417248, "loss": 1.7753, "step": 11740 }, { "epoch": 2.914005828011656, "grad_norm": 0.7885788679122925, "learning_rate": 0.00016093793971894374, "loss": 1.73, "step": 11750 }, { "epoch": 2.916485832971666, "grad_norm": 1.0257158279418945, "learning_rate": 0.00016087614290087208, "loss": 1.7334, "step": 11760 }, { "epoch": 2.918965837931676, "grad_norm": 0.8035752177238464, "learning_rate": 0.00016081430912502648, "loss": 1.7246, "step": 11770 }, { "epoch": 2.9214458428916856, "grad_norm": 0.8140546083450317, "learning_rate": 0.00016075243842894615, "loss": 1.7036, "step": 11780 }, { "epoch": 2.9239258478516956, "grad_norm": 0.829397976398468, "learning_rate": 0.00016069053085019256, "loss": 1.7177, "step": 11790 }, { "epoch": 2.9264058528117056, "grad_norm": 0.8287886381149292, "learning_rate": 0.0001606285864263498, "loss": 1.68, "step": 11800 }, { "epoch": 2.9288858577717156, "grad_norm": 0.8033359050750732, "learning_rate": 0.0001605666051950241, "loss": 1.7039, "step": 11810 }, { "epoch": 2.9313658627317256, "grad_norm": 0.7789167761802673, "learning_rate": 0.00016050458719384412, "loss": 1.7292, "step": 11820 }, { "epoch": 2.933845867691735, "grad_norm": 0.7982509136199951, "learning_rate": 0.0001604425324604609, "loss": 1.7573, "step": 11830 }, { "epoch": 2.936325872651745, "grad_norm": 0.8048049807548523, "learning_rate": 0.00016038044103254775, "loss": 1.7235, "step": 11840 }, { "epoch": 2.938805877611755, "grad_norm": 0.8201325535774231, "learning_rate": 0.0001603183129478002, "loss": 1.8011, "step": 11850 }, { "epoch": 2.941285882571765, "grad_norm": 0.7818323373794556, "learning_rate": 0.00016025614824393607, "loss": 1.6917, "step": 11860 }, { "epoch": 2.943765887531775, "grad_norm": 0.8268362283706665, "learning_rate": 0.00016019394695869545, "loss": 1.7088, "step": 11870 }, { "epoch": 2.946245892491785, "grad_norm": 0.7949564456939697, "learning_rate": 0.00016013170912984058, "loss": 1.7006, "step": 11880 }, { "epoch": 2.948725897451795, "grad_norm": 0.8073463439941406, "learning_rate": 0.00016006943479515595, "loss": 1.6718, "step": 11890 }, { "epoch": 2.9512059024118047, "grad_norm": 0.7876682877540588, "learning_rate": 0.0001600071239924481, "loss": 1.7143, "step": 11900 }, { "epoch": 2.9536859073718147, "grad_norm": 0.8282910585403442, "learning_rate": 0.00015994477675954584, "loss": 1.73, "step": 11910 }, { "epoch": 2.9561659123318247, "grad_norm": 0.7657924890518188, "learning_rate": 0.00015988239313430004, "loss": 1.7406, "step": 11920 }, { "epoch": 2.9586459172918347, "grad_norm": 0.8015568256378174, "learning_rate": 0.00015981997315458362, "loss": 1.7657, "step": 11930 }, { "epoch": 2.9611259222518447, "grad_norm": 0.8141234517097473, "learning_rate": 0.00015975751685829166, "loss": 1.6911, "step": 11940 }, { "epoch": 2.9636059272118542, "grad_norm": 0.7737033367156982, "learning_rate": 0.00015969502428334122, "loss": 1.723, "step": 11950 }, { "epoch": 2.9660859321718642, "grad_norm": 0.7937765121459961, "learning_rate": 0.00015963249546767144, "loss": 1.7345, "step": 11960 }, { "epoch": 2.968565937131874, "grad_norm": 0.7321860790252686, "learning_rate": 0.00015956993044924334, "loss": 1.7622, "step": 11970 }, { "epoch": 2.971045942091884, "grad_norm": 0.7269527912139893, "learning_rate": 0.0001595073292660401, "loss": 1.7536, "step": 11980 }, { "epoch": 2.973525947051894, "grad_norm": 0.7339338064193726, "learning_rate": 0.00015944469195606673, "loss": 1.7122, "step": 11990 }, { "epoch": 2.9760059520119038, "grad_norm": 0.7892552614212036, "learning_rate": 0.00015938201855735014, "loss": 1.7115, "step": 12000 }, { "epoch": 2.978485956971914, "grad_norm": 0.7363120317459106, "learning_rate": 0.00015931930910793938, "loss": 1.6846, "step": 12010 }, { "epoch": 2.9809659619319238, "grad_norm": 0.8925533890724182, "learning_rate": 0.00015925656364590505, "loss": 1.7447, "step": 12020 }, { "epoch": 2.9834459668919338, "grad_norm": 0.8083146214485168, "learning_rate": 0.00015919378220933985, "loss": 1.7259, "step": 12030 }, { "epoch": 2.9859259718519437, "grad_norm": 0.7717840671539307, "learning_rate": 0.00015913096483635824, "loss": 1.713, "step": 12040 }, { "epoch": 2.9884059768119537, "grad_norm": 0.7873446345329285, "learning_rate": 0.00015906811156509655, "loss": 1.6912, "step": 12050 }, { "epoch": 2.9908859817719637, "grad_norm": 0.8484950065612793, "learning_rate": 0.00015900522243371282, "loss": 1.698, "step": 12060 }, { "epoch": 2.9933659867319733, "grad_norm": 0.8505200743675232, "learning_rate": 0.00015894229748038695, "loss": 1.7299, "step": 12070 }, { "epoch": 2.9958459916919833, "grad_norm": 0.7281607389450073, "learning_rate": 0.00015887933674332046, "loss": 1.6884, "step": 12080 }, { "epoch": 2.9983259966519933, "grad_norm": 0.7407076954841614, "learning_rate": 0.00015881634026073676, "loss": 1.6939, "step": 12090 }, { "epoch": 3.0008060016120033, "grad_norm": 0.8531381487846375, "learning_rate": 0.00015875330807088087, "loss": 1.7431, "step": 12100 }, { "epoch": 3.0032860065720133, "grad_norm": 0.7499832510948181, "learning_rate": 0.00015869024021201948, "loss": 1.6307, "step": 12110 }, { "epoch": 3.0057660115320233, "grad_norm": 0.7780420184135437, "learning_rate": 0.00015862713672244093, "loss": 1.5834, "step": 12120 }, { "epoch": 3.008246016492033, "grad_norm": 0.8454210758209229, "learning_rate": 0.00015856399764045518, "loss": 1.605, "step": 12130 }, { "epoch": 3.010726021452043, "grad_norm": 0.8125146627426147, "learning_rate": 0.00015850082300439394, "loss": 1.6195, "step": 12140 }, { "epoch": 3.013206026412053, "grad_norm": 0.8854175806045532, "learning_rate": 0.00015843761285261028, "loss": 1.6233, "step": 12150 }, { "epoch": 3.015686031372063, "grad_norm": 0.856911301612854, "learning_rate": 0.000158374367223479, "loss": 1.5868, "step": 12160 }, { "epoch": 3.018166036332073, "grad_norm": 0.8482590317726135, "learning_rate": 0.00015831108615539638, "loss": 1.59, "step": 12170 }, { "epoch": 3.020646041292083, "grad_norm": 0.8031054139137268, "learning_rate": 0.00015824776968678024, "loss": 1.5753, "step": 12180 }, { "epoch": 3.0231260462520924, "grad_norm": 0.8475634455680847, "learning_rate": 0.0001581844178560698, "loss": 1.6346, "step": 12190 }, { "epoch": 3.0256060512121024, "grad_norm": 0.886826753616333, "learning_rate": 0.0001581210307017259, "loss": 1.6747, "step": 12200 }, { "epoch": 3.0280860561721124, "grad_norm": 0.8088538646697998, "learning_rate": 0.0001580576082622307, "loss": 1.553, "step": 12210 }, { "epoch": 3.0305660611321223, "grad_norm": 0.7956720590591431, "learning_rate": 0.00015799415057608785, "loss": 1.5824, "step": 12220 }, { "epoch": 3.0330460660921323, "grad_norm": 0.8272199630737305, "learning_rate": 0.00015793065768182233, "loss": 1.6197, "step": 12230 }, { "epoch": 3.035526071052142, "grad_norm": 0.8243331909179688, "learning_rate": 0.0001578671296179806, "loss": 1.5601, "step": 12240 }, { "epoch": 3.038006076012152, "grad_norm": 0.9027004241943359, "learning_rate": 0.00015780356642313036, "loss": 1.6884, "step": 12250 }, { "epoch": 3.040486080972162, "grad_norm": 0.8033971190452576, "learning_rate": 0.00015773996813586068, "loss": 1.59, "step": 12260 }, { "epoch": 3.042966085932172, "grad_norm": 0.8007137179374695, "learning_rate": 0.00015767633479478197, "loss": 1.5946, "step": 12270 }, { "epoch": 3.045446090892182, "grad_norm": 0.943191409111023, "learning_rate": 0.00015761266643852587, "loss": 1.62, "step": 12280 }, { "epoch": 3.047926095852192, "grad_norm": 0.8191910982131958, "learning_rate": 0.00015754896310574536, "loss": 1.6174, "step": 12290 }, { "epoch": 3.0504061008122014, "grad_norm": 0.8275129795074463, "learning_rate": 0.00015748522483511452, "loss": 1.679, "step": 12300 }, { "epoch": 3.0528861057722114, "grad_norm": 0.8390007615089417, "learning_rate": 0.00015742145166532876, "loss": 1.5712, "step": 12310 }, { "epoch": 3.0553661107322214, "grad_norm": 0.8464158773422241, "learning_rate": 0.0001573576436351046, "loss": 1.6154, "step": 12320 }, { "epoch": 3.0578461156922314, "grad_norm": 0.8567997813224792, "learning_rate": 0.0001572938007831798, "loss": 1.6661, "step": 12330 }, { "epoch": 3.0603261206522414, "grad_norm": 0.9141848087310791, "learning_rate": 0.00015722992314831319, "loss": 1.6535, "step": 12340 }, { "epoch": 3.0628061256122514, "grad_norm": 0.7453342080116272, "learning_rate": 0.00015716601076928475, "loss": 1.6644, "step": 12350 }, { "epoch": 3.065286130572261, "grad_norm": 0.7754812836647034, "learning_rate": 0.00015710206368489552, "loss": 1.6265, "step": 12360 }, { "epoch": 3.067766135532271, "grad_norm": 0.8418968915939331, "learning_rate": 0.00015703808193396766, "loss": 1.6306, "step": 12370 }, { "epoch": 3.070246140492281, "grad_norm": 0.844227135181427, "learning_rate": 0.00015697406555534437, "loss": 1.6495, "step": 12380 }, { "epoch": 3.072726145452291, "grad_norm": 0.814213752746582, "learning_rate": 0.00015691001458788983, "loss": 1.6155, "step": 12390 }, { "epoch": 3.075206150412301, "grad_norm": 0.8387388586997986, "learning_rate": 0.00015684592907048926, "loss": 1.6246, "step": 12400 }, { "epoch": 3.077686155372311, "grad_norm": 0.7879986763000488, "learning_rate": 0.00015678180904204878, "loss": 1.648, "step": 12410 }, { "epoch": 3.0801661603323205, "grad_norm": 0.8591954112052917, "learning_rate": 0.00015671765454149559, "loss": 1.5457, "step": 12420 }, { "epoch": 3.0826461652923305, "grad_norm": 0.8503502607345581, "learning_rate": 0.00015665346560777774, "loss": 1.609, "step": 12430 }, { "epoch": 3.0851261702523405, "grad_norm": 0.8228851556777954, "learning_rate": 0.00015658924227986414, "loss": 1.6144, "step": 12440 }, { "epoch": 3.0876061752123505, "grad_norm": 0.8595585227012634, "learning_rate": 0.00015652498459674467, "loss": 1.629, "step": 12450 }, { "epoch": 3.0900861801723605, "grad_norm": 0.8532552719116211, "learning_rate": 0.00015646069259743007, "loss": 1.6055, "step": 12460 }, { "epoch": 3.0925661851323705, "grad_norm": 0.8255425095558167, "learning_rate": 0.0001563963663209518, "loss": 1.6725, "step": 12470 }, { "epoch": 3.09504619009238, "grad_norm": 0.8189271688461304, "learning_rate": 0.0001563320058063622, "loss": 1.6298, "step": 12480 }, { "epoch": 3.09752619505239, "grad_norm": 0.7872645854949951, "learning_rate": 0.0001562676110927345, "loss": 1.604, "step": 12490 }, { "epoch": 3.1000062000124, "grad_norm": 0.8693544864654541, "learning_rate": 0.00015620318221916247, "loss": 1.5533, "step": 12500 }, { "epoch": 3.10248620497241, "grad_norm": 0.8340660929679871, "learning_rate": 0.0001561387192247608, "loss": 1.61, "step": 12510 }, { "epoch": 3.10496620993242, "grad_norm": 0.8567219972610474, "learning_rate": 0.0001560742221486648, "loss": 1.6695, "step": 12520 }, { "epoch": 3.1074462148924296, "grad_norm": 0.846973717212677, "learning_rate": 0.00015600969103003056, "loss": 1.6645, "step": 12530 }, { "epoch": 3.1099262198524396, "grad_norm": 0.8784561157226562, "learning_rate": 0.00015594512590803473, "loss": 1.6265, "step": 12540 }, { "epoch": 3.1124062248124496, "grad_norm": 0.7776249647140503, "learning_rate": 0.00015588052682187468, "loss": 1.5806, "step": 12550 }, { "epoch": 3.1148862297724595, "grad_norm": 0.8816384077072144, "learning_rate": 0.0001558158938107684, "loss": 1.6276, "step": 12560 }, { "epoch": 3.1173662347324695, "grad_norm": 0.8353577256202698, "learning_rate": 0.0001557512269139544, "loss": 1.6276, "step": 12570 }, { "epoch": 3.1198462396924795, "grad_norm": 0.8443197011947632, "learning_rate": 0.00015568652617069183, "loss": 1.5794, "step": 12580 }, { "epoch": 3.122326244652489, "grad_norm": 0.9142757654190063, "learning_rate": 0.00015562179162026038, "loss": 1.6802, "step": 12590 }, { "epoch": 3.124806249612499, "grad_norm": 0.8508244156837463, "learning_rate": 0.00015555702330196023, "loss": 1.6376, "step": 12600 }, { "epoch": 3.127286254572509, "grad_norm": 0.9337193369865417, "learning_rate": 0.00015549222125511213, "loss": 1.6247, "step": 12610 }, { "epoch": 3.129766259532519, "grad_norm": 0.8202626705169678, "learning_rate": 0.0001554273855190572, "loss": 1.6098, "step": 12620 }, { "epoch": 3.132246264492529, "grad_norm": 0.8608548045158386, "learning_rate": 0.00015536251613315714, "loss": 1.6122, "step": 12630 }, { "epoch": 3.134726269452539, "grad_norm": 0.8218183517456055, "learning_rate": 0.00015529761313679393, "loss": 1.6801, "step": 12640 }, { "epoch": 3.1372062744125486, "grad_norm": 0.8353672027587891, "learning_rate": 0.00015523267656937018, "loss": 1.6018, "step": 12650 }, { "epoch": 3.1396862793725586, "grad_norm": 0.8429569005966187, "learning_rate": 0.00015516770647030858, "loss": 1.6067, "step": 12660 }, { "epoch": 3.1421662843325686, "grad_norm": 0.8685290813446045, "learning_rate": 0.0001551027028790524, "loss": 1.6714, "step": 12670 }, { "epoch": 3.1446462892925786, "grad_norm": 0.963028073310852, "learning_rate": 0.00015503766583506524, "loss": 1.6326, "step": 12680 }, { "epoch": 3.1471262942525886, "grad_norm": 0.8299250602722168, "learning_rate": 0.00015497259537783085, "loss": 1.6665, "step": 12690 }, { "epoch": 3.1496062992125986, "grad_norm": 0.8954488635063171, "learning_rate": 0.0001549074915468534, "loss": 1.6717, "step": 12700 }, { "epoch": 3.152086304172608, "grad_norm": 0.8646858334541321, "learning_rate": 0.00015484235438165727, "loss": 1.6755, "step": 12710 }, { "epoch": 3.154566309132618, "grad_norm": 0.855476975440979, "learning_rate": 0.00015477718392178716, "loss": 1.5664, "step": 12720 }, { "epoch": 3.157046314092628, "grad_norm": 0.7919172048568726, "learning_rate": 0.00015471198020680781, "loss": 1.5712, "step": 12730 }, { "epoch": 3.159526319052638, "grad_norm": 0.8448411226272583, "learning_rate": 0.00015464674327630434, "loss": 1.5894, "step": 12740 }, { "epoch": 3.162006324012648, "grad_norm": 0.8859073519706726, "learning_rate": 0.00015458147316988193, "loss": 1.6677, "step": 12750 }, { "epoch": 3.164486328972658, "grad_norm": 0.8675896525382996, "learning_rate": 0.0001545161699271659, "loss": 1.6389, "step": 12760 }, { "epoch": 3.1669663339326677, "grad_norm": 0.8519783020019531, "learning_rate": 0.0001544508335878017, "loss": 1.6636, "step": 12770 }, { "epoch": 3.1694463388926777, "grad_norm": 0.8615175485610962, "learning_rate": 0.00015438546419145488, "loss": 1.5697, "step": 12780 }, { "epoch": 3.1719263438526877, "grad_norm": 0.8288758397102356, "learning_rate": 0.00015432006177781108, "loss": 1.5953, "step": 12790 }, { "epoch": 3.1744063488126977, "grad_norm": 0.8835858106613159, "learning_rate": 0.00015425462638657595, "loss": 1.6528, "step": 12800 }, { "epoch": 3.1768863537727077, "grad_norm": 0.8913437128067017, "learning_rate": 0.00015418915805747517, "loss": 1.6063, "step": 12810 }, { "epoch": 3.1793663587327177, "grad_norm": 0.8508837819099426, "learning_rate": 0.00015412365683025448, "loss": 1.6699, "step": 12820 }, { "epoch": 3.181846363692727, "grad_norm": 0.7990511655807495, "learning_rate": 0.0001540581227446794, "loss": 1.6572, "step": 12830 }, { "epoch": 3.184326368652737, "grad_norm": 0.857603907585144, "learning_rate": 0.00015399255584053567, "loss": 1.6515, "step": 12840 }, { "epoch": 3.186806373612747, "grad_norm": 0.8712793588638306, "learning_rate": 0.00015392695615762874, "loss": 1.696, "step": 12850 }, { "epoch": 3.189286378572757, "grad_norm": 0.8377465009689331, "learning_rate": 0.00015386132373578404, "loss": 1.6259, "step": 12860 }, { "epoch": 3.191766383532767, "grad_norm": 0.8834864497184753, "learning_rate": 0.00015379565861484688, "loss": 1.7125, "step": 12870 }, { "epoch": 3.194246388492777, "grad_norm": 0.8212023377418518, "learning_rate": 0.0001537299608346824, "loss": 1.6291, "step": 12880 }, { "epoch": 3.1967263934527868, "grad_norm": 0.8849619626998901, "learning_rate": 0.0001536642304351756, "loss": 1.708, "step": 12890 }, { "epoch": 3.1992063984127967, "grad_norm": 0.8786877393722534, "learning_rate": 0.00015359846745623126, "loss": 1.6772, "step": 12900 }, { "epoch": 3.2016864033728067, "grad_norm": 0.9455540776252747, "learning_rate": 0.00015353267193777393, "loss": 1.6662, "step": 12910 }, { "epoch": 3.2041664083328167, "grad_norm": 0.8279966115951538, "learning_rate": 0.00015346684391974794, "loss": 1.6842, "step": 12920 }, { "epoch": 3.2066464132928267, "grad_norm": 0.8265210390090942, "learning_rate": 0.00015340098344211733, "loss": 1.6751, "step": 12930 }, { "epoch": 3.2091264182528363, "grad_norm": 0.8862226009368896, "learning_rate": 0.00015333509054486584, "loss": 1.6636, "step": 12940 }, { "epoch": 3.2116064232128463, "grad_norm": 0.9301692843437195, "learning_rate": 0.00015326916526799692, "loss": 1.587, "step": 12950 }, { "epoch": 3.2140864281728563, "grad_norm": 0.9067266583442688, "learning_rate": 0.00015320320765153367, "loss": 1.6828, "step": 12960 }, { "epoch": 3.2165664331328663, "grad_norm": 0.8438600301742554, "learning_rate": 0.00015313721773551876, "loss": 1.6409, "step": 12970 }, { "epoch": 3.2190464380928763, "grad_norm": 0.890546977519989, "learning_rate": 0.00015307119556001465, "loss": 1.6323, "step": 12980 }, { "epoch": 3.2215264430528863, "grad_norm": 0.7987525463104248, "learning_rate": 0.0001530051411651031, "loss": 1.6684, "step": 12990 }, { "epoch": 3.224006448012896, "grad_norm": 0.9028956890106201, "learning_rate": 0.00015293905459088568, "loss": 1.6021, "step": 13000 }, { "epoch": 3.226486452972906, "grad_norm": 0.8655980229377747, "learning_rate": 0.00015287293587748342, "loss": 1.6022, "step": 13010 }, { "epoch": 3.228966457932916, "grad_norm": 0.8287554383277893, "learning_rate": 0.0001528067850650368, "loss": 1.6954, "step": 13020 }, { "epoch": 3.231446462892926, "grad_norm": 0.8922123312950134, "learning_rate": 0.00015274060219370585, "loss": 1.6897, "step": 13030 }, { "epoch": 3.233926467852936, "grad_norm": 0.8276147246360779, "learning_rate": 0.0001526743873036701, "loss": 1.6058, "step": 13040 }, { "epoch": 3.236406472812946, "grad_norm": 0.8847476840019226, "learning_rate": 0.00015260814043512836, "loss": 1.5953, "step": 13050 }, { "epoch": 3.2388864777729554, "grad_norm": 0.8755683898925781, "learning_rate": 0.00015254186162829903, "loss": 1.6701, "step": 13060 }, { "epoch": 3.2413664827329653, "grad_norm": 0.8448535203933716, "learning_rate": 0.0001524755509234198, "loss": 1.7325, "step": 13070 }, { "epoch": 3.2438464876929753, "grad_norm": 0.8898816108703613, "learning_rate": 0.00015240920836074776, "loss": 1.642, "step": 13080 }, { "epoch": 3.2463264926529853, "grad_norm": 0.8643108010292053, "learning_rate": 0.0001523428339805594, "loss": 1.6345, "step": 13090 }, { "epoch": 3.2488064976129953, "grad_norm": 0.8506256937980652, "learning_rate": 0.00015227642782315038, "loss": 1.6628, "step": 13100 }, { "epoch": 3.2512865025730053, "grad_norm": 0.949025571346283, "learning_rate": 0.00015220998992883573, "loss": 1.6418, "step": 13110 }, { "epoch": 3.253766507533015, "grad_norm": 0.8501860499382019, "learning_rate": 0.0001521435203379498, "loss": 1.6433, "step": 13120 }, { "epoch": 3.256246512493025, "grad_norm": 0.8821784853935242, "learning_rate": 0.00015207701909084612, "loss": 1.6308, "step": 13130 }, { "epoch": 3.258726517453035, "grad_norm": 0.8270078897476196, "learning_rate": 0.00015201048622789747, "loss": 1.6803, "step": 13140 }, { "epoch": 3.261206522413045, "grad_norm": 0.9211381673812866, "learning_rate": 0.00015194392178949576, "loss": 1.6086, "step": 13150 }, { "epoch": 3.263686527373055, "grad_norm": 0.90462327003479, "learning_rate": 0.00015187732581605217, "loss": 1.5827, "step": 13160 }, { "epoch": 3.266166532333065, "grad_norm": 0.8179154992103577, "learning_rate": 0.00015181069834799691, "loss": 1.6422, "step": 13170 }, { "epoch": 3.2686465372930744, "grad_norm": 0.8857276439666748, "learning_rate": 0.00015174403942577942, "loss": 1.6346, "step": 13180 }, { "epoch": 3.2711265422530844, "grad_norm": 0.8045947551727295, "learning_rate": 0.00015167734908986814, "loss": 1.636, "step": 13190 }, { "epoch": 3.2736065472130944, "grad_norm": 0.8784744739532471, "learning_rate": 0.00015161062738075067, "loss": 1.5687, "step": 13200 }, { "epoch": 3.2760865521731044, "grad_norm": 0.7755175828933716, "learning_rate": 0.0001515438743389336, "loss": 1.6515, "step": 13210 }, { "epoch": 3.2785665571331144, "grad_norm": 0.8107953667640686, "learning_rate": 0.00015147709000494256, "loss": 1.6565, "step": 13220 }, { "epoch": 3.281046562093124, "grad_norm": 0.8617423176765442, "learning_rate": 0.00015141027441932216, "loss": 1.6476, "step": 13230 }, { "epoch": 3.283526567053134, "grad_norm": 1.0366381406784058, "learning_rate": 0.00015134342762263605, "loss": 1.6506, "step": 13240 }, { "epoch": 3.286006572013144, "grad_norm": 0.8942514657974243, "learning_rate": 0.00015127654965546672, "loss": 1.6069, "step": 13250 }, { "epoch": 3.288486576973154, "grad_norm": 0.8343939185142517, "learning_rate": 0.00015120964055841563, "loss": 1.639, "step": 13260 }, { "epoch": 3.290966581933164, "grad_norm": 1.0147429704666138, "learning_rate": 0.00015114270037210316, "loss": 1.6268, "step": 13270 }, { "epoch": 3.293446586893174, "grad_norm": 0.862209141254425, "learning_rate": 0.00015107572913716858, "loss": 1.6769, "step": 13280 }, { "epoch": 3.295926591853184, "grad_norm": 0.8578622937202454, "learning_rate": 0.00015100872689426993, "loss": 1.6625, "step": 13290 }, { "epoch": 3.2984065968131935, "grad_norm": 0.8519478440284729, "learning_rate": 0.0001509416936840842, "loss": 1.6312, "step": 13300 }, { "epoch": 3.3008866017732035, "grad_norm": 0.9173608422279358, "learning_rate": 0.00015087462954730703, "loss": 1.6838, "step": 13310 }, { "epoch": 3.3033666067332135, "grad_norm": 0.8745505213737488, "learning_rate": 0.00015080753452465296, "loss": 1.6099, "step": 13320 }, { "epoch": 3.3058466116932235, "grad_norm": 0.8400318622589111, "learning_rate": 0.00015074040865685522, "loss": 1.7042, "step": 13330 }, { "epoch": 3.3083266166532335, "grad_norm": 0.8654444813728333, "learning_rate": 0.00015067325198466576, "loss": 1.6304, "step": 13340 }, { "epoch": 3.310806621613243, "grad_norm": 0.981468915939331, "learning_rate": 0.00015060606454885526, "loss": 1.67, "step": 13350 }, { "epoch": 3.313286626573253, "grad_norm": 0.8678485751152039, "learning_rate": 0.0001505388463902131, "loss": 1.6023, "step": 13360 }, { "epoch": 3.315766631533263, "grad_norm": 0.852952778339386, "learning_rate": 0.0001504715975495472, "loss": 1.6585, "step": 13370 }, { "epoch": 3.318246636493273, "grad_norm": 0.8926320672035217, "learning_rate": 0.00015040431806768428, "loss": 1.6037, "step": 13380 }, { "epoch": 3.320726641453283, "grad_norm": 0.9940789341926575, "learning_rate": 0.00015033700798546953, "loss": 1.6151, "step": 13390 }, { "epoch": 3.323206646413293, "grad_norm": 0.880584180355072, "learning_rate": 0.0001502696673437667, "loss": 1.6386, "step": 13400 }, { "epoch": 3.325686651373303, "grad_norm": 0.8516528010368347, "learning_rate": 0.0001502022961834582, "loss": 1.6993, "step": 13410 }, { "epoch": 3.3281666563333125, "grad_norm": 0.7992837429046631, "learning_rate": 0.00015013489454544494, "loss": 1.6166, "step": 13420 }, { "epoch": 3.3306466612933225, "grad_norm": 0.848483681678772, "learning_rate": 0.00015006746247064624, "loss": 1.6442, "step": 13430 }, { "epoch": 3.3331266662533325, "grad_norm": 0.8658037781715393, "learning_rate": 0.00015000000000000001, "loss": 1.6639, "step": 13440 }, { "epoch": 3.3356066712133425, "grad_norm": 0.8861033320426941, "learning_rate": 0.00014993250717446256, "loss": 1.6886, "step": 13450 }, { "epoch": 3.3380866761733525, "grad_norm": 0.8217837810516357, "learning_rate": 0.00014986498403500863, "loss": 1.6541, "step": 13460 }, { "epoch": 3.340566681133362, "grad_norm": 0.8470338582992554, "learning_rate": 0.0001497974306226314, "loss": 1.5988, "step": 13470 }, { "epoch": 3.343046686093372, "grad_norm": 0.9460231065750122, "learning_rate": 0.0001497298469783424, "loss": 1.5984, "step": 13480 }, { "epoch": 3.345526691053382, "grad_norm": 0.83684903383255, "learning_rate": 0.0001496622331431714, "loss": 1.6649, "step": 13490 }, { "epoch": 3.348006696013392, "grad_norm": 0.8802300691604614, "learning_rate": 0.0001495945891581668, "loss": 1.5957, "step": 13500 }, { "epoch": 3.350486700973402, "grad_norm": 0.8913207054138184, "learning_rate": 0.00014952691506439495, "loss": 1.6574, "step": 13510 }, { "epoch": 3.3529667059334116, "grad_norm": 0.9203808903694153, "learning_rate": 0.00014945921090294076, "loss": 1.6445, "step": 13520 }, { "epoch": 3.3554467108934216, "grad_norm": 0.8547185063362122, "learning_rate": 0.0001493914767149072, "loss": 1.6289, "step": 13530 }, { "epoch": 3.3579267158534316, "grad_norm": 0.9169796705245972, "learning_rate": 0.0001493237125414156, "loss": 1.6561, "step": 13540 }, { "epoch": 3.3604067208134416, "grad_norm": 0.8416388034820557, "learning_rate": 0.00014925591842360542, "loss": 1.6602, "step": 13550 }, { "epoch": 3.3628867257734516, "grad_norm": 0.8726803660392761, "learning_rate": 0.00014918809440263436, "loss": 1.6112, "step": 13560 }, { "epoch": 3.3653667307334616, "grad_norm": 0.8233663439750671, "learning_rate": 0.00014912024051967816, "loss": 1.6279, "step": 13570 }, { "epoch": 3.3678467356934716, "grad_norm": 0.8870377540588379, "learning_rate": 0.0001490523568159308, "loss": 1.5935, "step": 13580 }, { "epoch": 3.370326740653481, "grad_norm": 0.8920510411262512, "learning_rate": 0.00014898444333260436, "loss": 1.6159, "step": 13590 }, { "epoch": 3.372806745613491, "grad_norm": 0.87026447057724, "learning_rate": 0.00014891650011092896, "loss": 1.7169, "step": 13600 }, { "epoch": 3.375286750573501, "grad_norm": 0.8275659680366516, "learning_rate": 0.0001488485271921527, "loss": 1.6774, "step": 13610 }, { "epoch": 3.377766755533511, "grad_norm": 0.8684106469154358, "learning_rate": 0.0001487805246175419, "loss": 1.6216, "step": 13620 }, { "epoch": 3.380246760493521, "grad_norm": 0.8039277195930481, "learning_rate": 0.00014871249242838074, "loss": 1.5738, "step": 13630 }, { "epoch": 3.3827267654535307, "grad_norm": 0.9385021924972534, "learning_rate": 0.00014864443066597139, "loss": 1.6307, "step": 13640 }, { "epoch": 3.3852067704135407, "grad_norm": 0.9041079878807068, "learning_rate": 0.00014857633937163402, "loss": 1.6933, "step": 13650 }, { "epoch": 3.3876867753735507, "grad_norm": 0.8626042604446411, "learning_rate": 0.00014850821858670667, "loss": 1.6563, "step": 13660 }, { "epoch": 3.3901667803335607, "grad_norm": 0.8708447217941284, "learning_rate": 0.0001484400683525454, "loss": 1.6932, "step": 13670 }, { "epoch": 3.3926467852935707, "grad_norm": 0.8160665035247803, "learning_rate": 0.000148371888710524, "loss": 1.6541, "step": 13680 }, { "epoch": 3.3951267902535807, "grad_norm": 0.8544750213623047, "learning_rate": 0.00014830367970203415, "loss": 1.6279, "step": 13690 }, { "epoch": 3.3976067952135907, "grad_norm": 0.8258888125419617, "learning_rate": 0.00014823544136848552, "loss": 1.6896, "step": 13700 }, { "epoch": 3.4000868001736, "grad_norm": 0.8572070598602295, "learning_rate": 0.00014816717375130533, "loss": 1.6395, "step": 13710 }, { "epoch": 3.40256680513361, "grad_norm": 0.8532230854034424, "learning_rate": 0.00014809887689193877, "loss": 1.6723, "step": 13720 }, { "epoch": 3.40504681009362, "grad_norm": 0.918097198009491, "learning_rate": 0.0001480305508318487, "loss": 1.7022, "step": 13730 }, { "epoch": 3.40752681505363, "grad_norm": 0.909914493560791, "learning_rate": 0.00014796219561251568, "loss": 1.6047, "step": 13740 }, { "epoch": 3.41000682001364, "grad_norm": 0.8644607067108154, "learning_rate": 0.00014789381127543807, "loss": 1.6696, "step": 13750 }, { "epoch": 3.4124868249736497, "grad_norm": 0.8513312339782715, "learning_rate": 0.00014782539786213183, "loss": 1.6378, "step": 13760 }, { "epoch": 3.4149668299336597, "grad_norm": 0.8677627444267273, "learning_rate": 0.00014775695541413062, "loss": 1.6185, "step": 13770 }, { "epoch": 3.4174468348936697, "grad_norm": 0.8117983341217041, "learning_rate": 0.00014768848397298562, "loss": 1.6657, "step": 13780 }, { "epoch": 3.4199268398536797, "grad_norm": 0.8320634961128235, "learning_rate": 0.0001476199835802658, "loss": 1.6567, "step": 13790 }, { "epoch": 3.4224068448136897, "grad_norm": 0.8326737880706787, "learning_rate": 0.00014755145427755754, "loss": 1.7125, "step": 13800 }, { "epoch": 3.4248868497736997, "grad_norm": 0.860938549041748, "learning_rate": 0.00014748289610646482, "loss": 1.5955, "step": 13810 }, { "epoch": 3.4273668547337093, "grad_norm": 0.8838903307914734, "learning_rate": 0.0001474143091086092, "loss": 1.6864, "step": 13820 }, { "epoch": 3.4298468596937193, "grad_norm": 0.9615389108657837, "learning_rate": 0.00014734569332562966, "loss": 1.6906, "step": 13830 }, { "epoch": 3.4323268646537293, "grad_norm": 0.8392242193222046, "learning_rate": 0.0001472770487991827, "loss": 1.653, "step": 13840 }, { "epoch": 3.4348068696137393, "grad_norm": 0.8907983303070068, "learning_rate": 0.00014720837557094228, "loss": 1.6522, "step": 13850 }, { "epoch": 3.4372868745737493, "grad_norm": 0.9216994643211365, "learning_rate": 0.0001471396736825998, "loss": 1.6204, "step": 13860 }, { "epoch": 3.4397668795337593, "grad_norm": 0.9157097935676575, "learning_rate": 0.00014707094317586392, "loss": 1.6045, "step": 13870 }, { "epoch": 3.442246884493769, "grad_norm": 0.861574113368988, "learning_rate": 0.00014700218409246087, "loss": 1.7071, "step": 13880 }, { "epoch": 3.444726889453779, "grad_norm": 0.9511358141899109, "learning_rate": 0.0001469333964741341, "loss": 1.6046, "step": 13890 }, { "epoch": 3.447206894413789, "grad_norm": 0.8342337608337402, "learning_rate": 0.00014686458036264447, "loss": 1.6776, "step": 13900 }, { "epoch": 3.449686899373799, "grad_norm": 0.8049719333648682, "learning_rate": 0.00014679573579977004, "loss": 1.641, "step": 13910 }, { "epoch": 3.452166904333809, "grad_norm": 0.8005982041358948, "learning_rate": 0.0001467268628273062, "loss": 1.6753, "step": 13920 }, { "epoch": 3.4546469092938183, "grad_norm": 0.8201278448104858, "learning_rate": 0.0001466579614870656, "loss": 1.6484, "step": 13930 }, { "epoch": 3.4571269142538283, "grad_norm": 0.8669260144233704, "learning_rate": 0.00014658903182087813, "loss": 1.5958, "step": 13940 }, { "epoch": 3.4596069192138383, "grad_norm": 0.9065529704093933, "learning_rate": 0.00014652007387059077, "loss": 1.6607, "step": 13950 }, { "epoch": 3.4620869241738483, "grad_norm": 0.8734999299049377, "learning_rate": 0.00014645108767806777, "loss": 1.6161, "step": 13960 }, { "epoch": 3.4645669291338583, "grad_norm": 0.8499678373336792, "learning_rate": 0.00014638207328519052, "loss": 1.6, "step": 13970 }, { "epoch": 3.4670469340938683, "grad_norm": 0.828967809677124, "learning_rate": 0.00014631303073385746, "loss": 1.5823, "step": 13980 }, { "epoch": 3.4695269390538783, "grad_norm": 0.9271993637084961, "learning_rate": 0.0001462439600659842, "loss": 1.6964, "step": 13990 }, { "epoch": 3.472006944013888, "grad_norm": 0.8467398881912231, "learning_rate": 0.00014617486132350343, "loss": 1.6077, "step": 14000 }, { "epoch": 3.474486948973898, "grad_norm": 0.8329309821128845, "learning_rate": 0.00014610573454836476, "loss": 1.663, "step": 14010 }, { "epoch": 3.476966953933908, "grad_norm": 0.8573420643806458, "learning_rate": 0.00014603657978253497, "loss": 1.6057, "step": 14020 }, { "epoch": 3.479446958893918, "grad_norm": 0.901738703250885, "learning_rate": 0.00014596739706799775, "loss": 1.6707, "step": 14030 }, { "epoch": 3.481926963853928, "grad_norm": 0.8534144163131714, "learning_rate": 0.00014589818644675378, "loss": 1.5936, "step": 14040 }, { "epoch": 3.4844069688139374, "grad_norm": 1.1463650465011597, "learning_rate": 0.00014582894796082068, "loss": 1.5724, "step": 14050 }, { "epoch": 3.4868869737739474, "grad_norm": 0.8025235533714294, "learning_rate": 0.00014575968165223298, "loss": 1.642, "step": 14060 }, { "epoch": 3.4893669787339574, "grad_norm": 0.8681792616844177, "learning_rate": 0.00014569038756304207, "loss": 1.6466, "step": 14070 }, { "epoch": 3.4918469836939674, "grad_norm": 0.9857133030891418, "learning_rate": 0.0001456210657353163, "loss": 1.6794, "step": 14080 }, { "epoch": 3.4943269886539774, "grad_norm": 0.9428927302360535, "learning_rate": 0.0001455517162111408, "loss": 1.6231, "step": 14090 }, { "epoch": 3.4968069936139874, "grad_norm": 0.8979089856147766, "learning_rate": 0.00014548233903261746, "loss": 1.6431, "step": 14100 }, { "epoch": 3.4992869985739974, "grad_norm": 0.9292755126953125, "learning_rate": 0.00014541293424186503, "loss": 1.6476, "step": 14110 }, { "epoch": 3.501767003534007, "grad_norm": 0.9158943295478821, "learning_rate": 0.00014534350188101904, "loss": 1.6577, "step": 14120 }, { "epoch": 3.504247008494017, "grad_norm": 0.8280609250068665, "learning_rate": 0.00014527404199223172, "loss": 1.6099, "step": 14130 }, { "epoch": 3.506727013454027, "grad_norm": 0.9120189547538757, "learning_rate": 0.000145204554617672, "loss": 1.6737, "step": 14140 }, { "epoch": 3.509207018414037, "grad_norm": 0.7935187816619873, "learning_rate": 0.0001451350397995255, "loss": 1.6808, "step": 14150 }, { "epoch": 3.511687023374047, "grad_norm": 0.8127133250236511, "learning_rate": 0.00014506549757999454, "loss": 1.6635, "step": 14160 }, { "epoch": 3.5141670283340565, "grad_norm": 0.8754492402076721, "learning_rate": 0.00014499592800129804, "loss": 1.6821, "step": 14170 }, { "epoch": 3.5166470332940665, "grad_norm": 0.940440833568573, "learning_rate": 0.00014492633110567155, "loss": 1.6923, "step": 14180 }, { "epoch": 3.5191270382540765, "grad_norm": 0.8897917866706848, "learning_rate": 0.00014485670693536716, "loss": 1.5622, "step": 14190 }, { "epoch": 3.5216070432140865, "grad_norm": 0.8506290912628174, "learning_rate": 0.00014478705553265362, "loss": 1.5945, "step": 14200 }, { "epoch": 3.5240870481740965, "grad_norm": 0.8483827114105225, "learning_rate": 0.0001447173769398161, "loss": 1.6169, "step": 14210 }, { "epoch": 3.526567053134106, "grad_norm": 0.8673563003540039, "learning_rate": 0.00014464767119915629, "loss": 1.6915, "step": 14220 }, { "epoch": 3.5290470580941165, "grad_norm": 0.8081308007240295, "learning_rate": 0.00014457793835299246, "loss": 1.6491, "step": 14230 }, { "epoch": 3.531527063054126, "grad_norm": 0.9240626692771912, "learning_rate": 0.00014450817844365921, "loss": 1.6974, "step": 14240 }, { "epoch": 3.534007068014136, "grad_norm": 0.8092256784439087, "learning_rate": 0.00014443839151350772, "loss": 1.6547, "step": 14250 }, { "epoch": 3.536487072974146, "grad_norm": 0.8489810824394226, "learning_rate": 0.0001443685776049054, "loss": 1.6383, "step": 14260 }, { "epoch": 3.538967077934156, "grad_norm": 0.9102332592010498, "learning_rate": 0.0001442987367602361, "loss": 1.6713, "step": 14270 }, { "epoch": 3.541447082894166, "grad_norm": 0.8811954855918884, "learning_rate": 0.00014422886902190014, "loss": 1.628, "step": 14280 }, { "epoch": 3.5439270878541755, "grad_norm": 0.9573650360107422, "learning_rate": 0.000144158974432314, "loss": 1.6651, "step": 14290 }, { "epoch": 3.5464070928141855, "grad_norm": 0.9757610559463501, "learning_rate": 0.00014408905303391054, "loss": 1.6547, "step": 14300 }, { "epoch": 3.5488870977741955, "grad_norm": 0.8832597732543945, "learning_rate": 0.00014401910486913893, "loss": 1.705, "step": 14310 }, { "epoch": 3.5513671027342055, "grad_norm": 0.781204104423523, "learning_rate": 0.0001439491299804645, "loss": 1.6595, "step": 14320 }, { "epoch": 3.5538471076942155, "grad_norm": 0.809134840965271, "learning_rate": 0.00014387912841036887, "loss": 1.6322, "step": 14330 }, { "epoch": 3.556327112654225, "grad_norm": 0.9049520492553711, "learning_rate": 0.00014380910020134988, "loss": 1.6494, "step": 14340 }, { "epoch": 3.558807117614235, "grad_norm": 0.8458988666534424, "learning_rate": 0.00014373904539592144, "loss": 1.6275, "step": 14350 }, { "epoch": 3.561287122574245, "grad_norm": 0.9220494031906128, "learning_rate": 0.0001436689640366137, "loss": 1.6152, "step": 14360 }, { "epoch": 3.563767127534255, "grad_norm": 0.8901864290237427, "learning_rate": 0.0001435988561659729, "loss": 1.6828, "step": 14370 }, { "epoch": 3.566247132494265, "grad_norm": 0.9905135631561279, "learning_rate": 0.00014352872182656139, "loss": 1.6756, "step": 14380 }, { "epoch": 3.568727137454275, "grad_norm": 0.8881046175956726, "learning_rate": 0.00014345856106095752, "loss": 1.6471, "step": 14390 }, { "epoch": 3.571207142414285, "grad_norm": 0.8560605645179749, "learning_rate": 0.00014338837391175582, "loss": 1.6544, "step": 14400 }, { "epoch": 3.5736871473742946, "grad_norm": 0.8330683708190918, "learning_rate": 0.0001433181604215667, "loss": 1.6811, "step": 14410 }, { "epoch": 3.5761671523343046, "grad_norm": 0.7894644737243652, "learning_rate": 0.00014324792063301662, "loss": 1.6553, "step": 14420 }, { "epoch": 3.5786471572943146, "grad_norm": 0.8979082703590393, "learning_rate": 0.00014317765458874803, "loss": 1.6732, "step": 14430 }, { "epoch": 3.5811271622543246, "grad_norm": 0.8586720824241638, "learning_rate": 0.00014310736233141925, "loss": 1.6311, "step": 14440 }, { "epoch": 3.5836071672143346, "grad_norm": 0.9059020280838013, "learning_rate": 0.00014303704390370464, "loss": 1.6694, "step": 14450 }, { "epoch": 3.586087172174344, "grad_norm": 0.8023223876953125, "learning_rate": 0.00014296669934829424, "loss": 1.6898, "step": 14460 }, { "epoch": 3.588567177134354, "grad_norm": 0.8363684415817261, "learning_rate": 0.00014289632870789418, "loss": 1.6077, "step": 14470 }, { "epoch": 3.591047182094364, "grad_norm": 0.9061567187309265, "learning_rate": 0.00014282593202522627, "loss": 1.699, "step": 14480 }, { "epoch": 3.593527187054374, "grad_norm": 0.8779317736625671, "learning_rate": 0.00014275550934302823, "loss": 1.6464, "step": 14490 }, { "epoch": 3.596007192014384, "grad_norm": 0.9246005415916443, "learning_rate": 0.00014268506070405344, "loss": 1.5933, "step": 14500 }, { "epoch": 3.5984871969743937, "grad_norm": 0.8338398933410645, "learning_rate": 0.0001426145861510712, "loss": 1.6311, "step": 14510 }, { "epoch": 3.600967201934404, "grad_norm": 0.8370964527130127, "learning_rate": 0.00014254408572686642, "loss": 1.6231, "step": 14520 }, { "epoch": 3.6034472068944137, "grad_norm": 0.8520059585571289, "learning_rate": 0.00014247355947423973, "loss": 1.7129, "step": 14530 }, { "epoch": 3.6059272118544237, "grad_norm": 0.8423281908035278, "learning_rate": 0.0001424030074360075, "loss": 1.6319, "step": 14540 }, { "epoch": 3.6084072168144337, "grad_norm": 1.005537748336792, "learning_rate": 0.00014233242965500171, "loss": 1.6302, "step": 14550 }, { "epoch": 3.6108872217744437, "grad_norm": 0.8900558352470398, "learning_rate": 0.00014226182617406996, "loss": 1.5912, "step": 14560 }, { "epoch": 3.6133672267344537, "grad_norm": 0.8765195608139038, "learning_rate": 0.00014219119703607548, "loss": 1.627, "step": 14570 }, { "epoch": 3.615847231694463, "grad_norm": 0.7887884974479675, "learning_rate": 0.0001421205422838971, "loss": 1.6835, "step": 14580 }, { "epoch": 3.618327236654473, "grad_norm": 0.9221144914627075, "learning_rate": 0.0001420498619604291, "loss": 1.6755, "step": 14590 }, { "epoch": 3.620807241614483, "grad_norm": 0.8162357211112976, "learning_rate": 0.00014197915610858144, "loss": 1.6397, "step": 14600 }, { "epoch": 3.623287246574493, "grad_norm": 0.8468723297119141, "learning_rate": 0.0001419084247712794, "loss": 1.6348, "step": 14610 }, { "epoch": 3.625767251534503, "grad_norm": 0.891946017742157, "learning_rate": 0.00014183766799146384, "loss": 1.6595, "step": 14620 }, { "epoch": 3.6282472564945127, "grad_norm": 0.9180444478988647, "learning_rate": 0.0001417668858120911, "loss": 1.6729, "step": 14630 }, { "epoch": 3.630727261454523, "grad_norm": 0.9543078541755676, "learning_rate": 0.00014169607827613283, "loss": 1.5954, "step": 14640 }, { "epoch": 3.6332072664145327, "grad_norm": 0.8497787117958069, "learning_rate": 0.00014162524542657616, "loss": 1.645, "step": 14650 }, { "epoch": 3.6356872713745427, "grad_norm": 0.8668899536132812, "learning_rate": 0.00014155438730642354, "loss": 1.6452, "step": 14660 }, { "epoch": 3.6381672763345527, "grad_norm": 0.901395320892334, "learning_rate": 0.00014148350395869278, "loss": 1.6671, "step": 14670 }, { "epoch": 3.6406472812945627, "grad_norm": 1.0655369758605957, "learning_rate": 0.00014141259542641704, "loss": 1.6761, "step": 14680 }, { "epoch": 3.6431272862545727, "grad_norm": 0.8188184499740601, "learning_rate": 0.00014134166175264475, "loss": 1.6836, "step": 14690 }, { "epoch": 3.6456072912145823, "grad_norm": 0.8401299715042114, "learning_rate": 0.00014127070298043947, "loss": 1.6563, "step": 14700 }, { "epoch": 3.6480872961745923, "grad_norm": 0.8492573499679565, "learning_rate": 0.00014119971915288026, "loss": 1.6963, "step": 14710 }, { "epoch": 3.6505673011346023, "grad_norm": 0.8795192837715149, "learning_rate": 0.00014112871031306119, "loss": 1.6859, "step": 14720 }, { "epoch": 3.6530473060946123, "grad_norm": 0.870771050453186, "learning_rate": 0.00014105767650409153, "loss": 1.6251, "step": 14730 }, { "epoch": 3.6555273110546223, "grad_norm": 0.8470786809921265, "learning_rate": 0.00014098661776909582, "loss": 1.6641, "step": 14740 }, { "epoch": 3.658007316014632, "grad_norm": 0.873631477355957, "learning_rate": 0.0001409155341512136, "loss": 1.6614, "step": 14750 }, { "epoch": 3.660487320974642, "grad_norm": 0.8350811004638672, "learning_rate": 0.00014084442569359964, "loss": 1.6961, "step": 14760 }, { "epoch": 3.662967325934652, "grad_norm": 0.8719115853309631, "learning_rate": 0.00014077329243942369, "loss": 1.6678, "step": 14770 }, { "epoch": 3.665447330894662, "grad_norm": 0.8512424230575562, "learning_rate": 0.00014070213443187062, "loss": 1.6482, "step": 14780 }, { "epoch": 3.667927335854672, "grad_norm": 0.9083479046821594, "learning_rate": 0.00014063095171414024, "loss": 1.6229, "step": 14790 }, { "epoch": 3.670407340814682, "grad_norm": 0.9024813771247864, "learning_rate": 0.00014055974432944753, "loss": 1.6918, "step": 14800 }, { "epoch": 3.672887345774692, "grad_norm": 0.8541541695594788, "learning_rate": 0.00014048851232102222, "loss": 1.6573, "step": 14810 }, { "epoch": 3.6753673507347013, "grad_norm": 0.8761372566223145, "learning_rate": 0.0001404172557321092, "loss": 1.5982, "step": 14820 }, { "epoch": 3.6778473556947113, "grad_norm": 0.8903871774673462, "learning_rate": 0.00014034597460596817, "loss": 1.6667, "step": 14830 }, { "epoch": 3.6803273606547213, "grad_norm": 0.9093607664108276, "learning_rate": 0.00014027466898587374, "loss": 1.5902, "step": 14840 }, { "epoch": 3.6828073656147313, "grad_norm": 0.9126347303390503, "learning_rate": 0.00014020333891511536, "loss": 1.6916, "step": 14850 }, { "epoch": 3.6852873705747413, "grad_norm": 0.8325662612915039, "learning_rate": 0.0001401319844369974, "loss": 1.5759, "step": 14860 }, { "epoch": 3.687767375534751, "grad_norm": 0.8358848690986633, "learning_rate": 0.000140060605594839, "loss": 1.6168, "step": 14870 }, { "epoch": 3.690247380494761, "grad_norm": 0.8498730063438416, "learning_rate": 0.00013998920243197407, "loss": 1.6593, "step": 14880 }, { "epoch": 3.692727385454771, "grad_norm": 0.846355676651001, "learning_rate": 0.0001399177749917514, "loss": 1.662, "step": 14890 }, { "epoch": 3.695207390414781, "grad_norm": 0.8313154578208923, "learning_rate": 0.00013984632331753437, "loss": 1.707, "step": 14900 }, { "epoch": 3.697687395374791, "grad_norm": 0.9697901606559753, "learning_rate": 0.00013977484745270112, "loss": 1.6901, "step": 14910 }, { "epoch": 3.7001674003348004, "grad_norm": 0.8515355587005615, "learning_rate": 0.0001397033474406445, "loss": 1.6474, "step": 14920 }, { "epoch": 3.702647405294811, "grad_norm": 0.9480315446853638, "learning_rate": 0.00013963182332477208, "loss": 1.6305, "step": 14930 }, { "epoch": 3.7051274102548204, "grad_norm": 0.8484178185462952, "learning_rate": 0.0001395602751485059, "loss": 1.6325, "step": 14940 }, { "epoch": 3.7076074152148304, "grad_norm": 0.9017485976219177, "learning_rate": 0.00013948870295528275, "loss": 1.6506, "step": 14950 }, { "epoch": 3.7100874201748404, "grad_norm": 0.9036349058151245, "learning_rate": 0.00013941710678855396, "loss": 1.6943, "step": 14960 }, { "epoch": 3.7125674251348504, "grad_norm": 0.848396360874176, "learning_rate": 0.00013934548669178534, "loss": 1.6135, "step": 14970 }, { "epoch": 3.7150474300948604, "grad_norm": 0.8705415725708008, "learning_rate": 0.0001392738427084574, "loss": 1.6606, "step": 14980 }, { "epoch": 3.71752743505487, "grad_norm": 0.9253519177436829, "learning_rate": 0.00013920217488206502, "loss": 1.6466, "step": 14990 }, { "epoch": 3.72000744001488, "grad_norm": 0.876288890838623, "learning_rate": 0.0001391304832561175, "loss": 1.7095, "step": 15000 }, { "epoch": 3.72248744497489, "grad_norm": 0.8728715181350708, "learning_rate": 0.00013905876787413877, "loss": 1.6841, "step": 15010 }, { "epoch": 3.7249674499349, "grad_norm": 0.8103494048118591, "learning_rate": 0.000138987028779667, "loss": 1.6046, "step": 15020 }, { "epoch": 3.72744745489491, "grad_norm": 0.8583076596260071, "learning_rate": 0.0001389152660162549, "loss": 1.6191, "step": 15030 }, { "epoch": 3.7299274598549195, "grad_norm": 0.9058374762535095, "learning_rate": 0.00013884347962746948, "loss": 1.7004, "step": 15040 }, { "epoch": 3.7324074648149295, "grad_norm": 0.884195864200592, "learning_rate": 0.00013877166965689205, "loss": 1.6381, "step": 15050 }, { "epoch": 3.7348874697749395, "grad_norm": 0.8272615075111389, "learning_rate": 0.00013869983614811838, "loss": 1.6541, "step": 15060 }, { "epoch": 3.7373674747349495, "grad_norm": 0.8531262874603271, "learning_rate": 0.00013862797914475833, "loss": 1.6763, "step": 15070 }, { "epoch": 3.7398474796949595, "grad_norm": 0.8404091000556946, "learning_rate": 0.0001385560986904362, "loss": 1.6767, "step": 15080 }, { "epoch": 3.7423274846549694, "grad_norm": 0.8463233113288879, "learning_rate": 0.00013848419482879041, "loss": 1.6518, "step": 15090 }, { "epoch": 3.7448074896149794, "grad_norm": 1.0089993476867676, "learning_rate": 0.0001384122676034737, "loss": 1.6555, "step": 15100 }, { "epoch": 3.747287494574989, "grad_norm": 0.9475327134132385, "learning_rate": 0.00013834031705815288, "loss": 1.6226, "step": 15110 }, { "epoch": 3.749767499534999, "grad_norm": 0.8604915142059326, "learning_rate": 0.000138268343236509, "loss": 1.6499, "step": 15120 }, { "epoch": 3.752247504495009, "grad_norm": 0.8939393162727356, "learning_rate": 0.0001381963461822372, "loss": 1.6966, "step": 15130 }, { "epoch": 3.754727509455019, "grad_norm": 0.9622209668159485, "learning_rate": 0.0001381243259390467, "loss": 1.6915, "step": 15140 }, { "epoch": 3.757207514415029, "grad_norm": 0.8684049248695374, "learning_rate": 0.00013805228255066087, "loss": 1.6985, "step": 15150 }, { "epoch": 3.7596875193750385, "grad_norm": 0.8036013245582581, "learning_rate": 0.00013798021606081714, "loss": 1.6954, "step": 15160 }, { "epoch": 3.7621675243350485, "grad_norm": 0.8598616123199463, "learning_rate": 0.0001379081265132668, "loss": 1.7063, "step": 15170 }, { "epoch": 3.7646475292950585, "grad_norm": 0.9074985384941101, "learning_rate": 0.00013783601395177538, "loss": 1.6476, "step": 15180 }, { "epoch": 3.7671275342550685, "grad_norm": 0.8250356912612915, "learning_rate": 0.00013776387842012218, "loss": 1.6387, "step": 15190 }, { "epoch": 3.7696075392150785, "grad_norm": 0.9312306642532349, "learning_rate": 0.00013769171996210052, "loss": 1.6544, "step": 15200 }, { "epoch": 3.7720875441750885, "grad_norm": 0.8160702586174011, "learning_rate": 0.00013761953862151773, "loss": 1.6221, "step": 15210 }, { "epoch": 3.7745675491350985, "grad_norm": 0.9043015837669373, "learning_rate": 0.00013754733444219487, "loss": 1.621, "step": 15220 }, { "epoch": 3.777047554095108, "grad_norm": 0.909737229347229, "learning_rate": 0.00013747510746796695, "loss": 1.6104, "step": 15230 }, { "epoch": 3.779527559055118, "grad_norm": 0.8393818736076355, "learning_rate": 0.00013740285774268283, "loss": 1.6653, "step": 15240 }, { "epoch": 3.782007564015128, "grad_norm": 0.819762647151947, "learning_rate": 0.0001373305853102051, "loss": 1.6314, "step": 15250 }, { "epoch": 3.784487568975138, "grad_norm": 0.8263760805130005, "learning_rate": 0.0001372582902144103, "loss": 1.5974, "step": 15260 }, { "epoch": 3.786967573935148, "grad_norm": 0.8518638014793396, "learning_rate": 0.00013718597249918855, "loss": 1.5851, "step": 15270 }, { "epoch": 3.7894475788951576, "grad_norm": 0.8781617879867554, "learning_rate": 0.00013711363220844379, "loss": 1.6306, "step": 15280 }, { "epoch": 3.7919275838551676, "grad_norm": 0.8384988903999329, "learning_rate": 0.0001370412693860937, "loss": 1.6278, "step": 15290 }, { "epoch": 3.7944075888151776, "grad_norm": 0.8870386481285095, "learning_rate": 0.00013696888407606952, "loss": 1.6205, "step": 15300 }, { "epoch": 3.7968875937751876, "grad_norm": 0.8454505801200867, "learning_rate": 0.00013689647632231624, "loss": 1.6872, "step": 15310 }, { "epoch": 3.7993675987351976, "grad_norm": 0.852705717086792, "learning_rate": 0.00013682404616879247, "loss": 1.6451, "step": 15320 }, { "epoch": 3.801847603695207, "grad_norm": 0.9460528492927551, "learning_rate": 0.00013675159365947036, "loss": 1.749, "step": 15330 }, { "epoch": 3.8043276086552176, "grad_norm": 0.8498387336730957, "learning_rate": 0.00013667911883833572, "loss": 1.6879, "step": 15340 }, { "epoch": 3.806807613615227, "grad_norm": 0.8632598519325256, "learning_rate": 0.00013660662174938784, "loss": 1.6336, "step": 15350 }, { "epoch": 3.809287618575237, "grad_norm": 0.8933224081993103, "learning_rate": 0.00013653410243663952, "loss": 1.6859, "step": 15360 }, { "epoch": 3.811767623535247, "grad_norm": 0.8379520773887634, "learning_rate": 0.00013646156094411708, "loss": 1.7119, "step": 15370 }, { "epoch": 3.814247628495257, "grad_norm": 0.9459693431854248, "learning_rate": 0.00013638899731586036, "loss": 1.6674, "step": 15380 }, { "epoch": 3.816727633455267, "grad_norm": 0.8293569684028625, "learning_rate": 0.00013631641159592253, "loss": 1.6721, "step": 15390 }, { "epoch": 3.8192076384152767, "grad_norm": 0.8475781083106995, "learning_rate": 0.00013624380382837016, "loss": 1.6964, "step": 15400 }, { "epoch": 3.8216876433752867, "grad_norm": 0.87235426902771, "learning_rate": 0.0001361711740572834, "loss": 1.6833, "step": 15410 }, { "epoch": 3.8241676483352967, "grad_norm": 0.8890097141265869, "learning_rate": 0.00013609852232675558, "loss": 1.6289, "step": 15420 }, { "epoch": 3.8266476532953067, "grad_norm": 0.8693356513977051, "learning_rate": 0.00013602584868089334, "loss": 1.6395, "step": 15430 }, { "epoch": 3.8291276582553166, "grad_norm": 0.8777374029159546, "learning_rate": 0.00013595315316381677, "loss": 1.6592, "step": 15440 }, { "epoch": 3.831607663215326, "grad_norm": 0.8257191181182861, "learning_rate": 0.00013588043581965912, "loss": 1.6437, "step": 15450 }, { "epoch": 3.834087668175336, "grad_norm": 0.8561131954193115, "learning_rate": 0.00013580769669256694, "loss": 1.7308, "step": 15460 }, { "epoch": 3.836567673135346, "grad_norm": 0.8346956372261047, "learning_rate": 0.00013573493582670003, "loss": 1.6415, "step": 15470 }, { "epoch": 3.839047678095356, "grad_norm": 0.8684515357017517, "learning_rate": 0.0001356621532662313, "loss": 1.5912, "step": 15480 }, { "epoch": 3.841527683055366, "grad_norm": 0.8844882845878601, "learning_rate": 0.00013558934905534692, "loss": 1.5855, "step": 15490 }, { "epoch": 3.844007688015376, "grad_norm": 0.8354759216308594, "learning_rate": 0.00013551652323824618, "loss": 1.6571, "step": 15500 }, { "epoch": 3.846487692975386, "grad_norm": 0.8265485763549805, "learning_rate": 0.00013544367585914142, "loss": 1.5934, "step": 15510 }, { "epoch": 3.8489676979353957, "grad_norm": 0.8156879544258118, "learning_rate": 0.00013537080696225814, "loss": 1.6905, "step": 15520 }, { "epoch": 3.8514477028954057, "grad_norm": 0.8880894184112549, "learning_rate": 0.00013529791659183492, "loss": 1.6486, "step": 15530 }, { "epoch": 3.8539277078554157, "grad_norm": 0.8683051466941833, "learning_rate": 0.00013522500479212337, "loss": 1.6602, "step": 15540 }, { "epoch": 3.8564077128154257, "grad_norm": 0.8110718131065369, "learning_rate": 0.000135152071607388, "loss": 1.6421, "step": 15550 }, { "epoch": 3.8588877177754357, "grad_norm": 0.8966351747512817, "learning_rate": 0.00013507911708190645, "loss": 1.6323, "step": 15560 }, { "epoch": 3.8613677227354453, "grad_norm": 0.8900407552719116, "learning_rate": 0.00013500614125996923, "loss": 1.6449, "step": 15570 }, { "epoch": 3.8638477276954553, "grad_norm": 0.8171928524971008, "learning_rate": 0.00013493314418587982, "loss": 1.7337, "step": 15580 }, { "epoch": 3.8663277326554653, "grad_norm": 0.8650508522987366, "learning_rate": 0.00013486012590395457, "loss": 1.6864, "step": 15590 }, { "epoch": 3.8688077376154753, "grad_norm": 0.8077772259712219, "learning_rate": 0.00013478708645852272, "loss": 1.659, "step": 15600 }, { "epoch": 3.8712877425754852, "grad_norm": 0.9582907557487488, "learning_rate": 0.00013471402589392638, "loss": 1.7033, "step": 15610 }, { "epoch": 3.873767747535495, "grad_norm": 0.897682249546051, "learning_rate": 0.00013464094425452044, "loss": 1.7148, "step": 15620 }, { "epoch": 3.8762477524955052, "grad_norm": 0.8741872310638428, "learning_rate": 0.0001345678415846726, "loss": 1.6151, "step": 15630 }, { "epoch": 3.878727757455515, "grad_norm": 0.8569635152816772, "learning_rate": 0.00013449471792876334, "loss": 1.6303, "step": 15640 }, { "epoch": 3.881207762415525, "grad_norm": 0.9225397706031799, "learning_rate": 0.00013442157333118588, "loss": 1.6697, "step": 15650 }, { "epoch": 3.883687767375535, "grad_norm": 0.8708016872406006, "learning_rate": 0.0001343484078363461, "loss": 1.6731, "step": 15660 }, { "epoch": 3.886167772335545, "grad_norm": 0.8538072109222412, "learning_rate": 0.0001342752214886627, "loss": 1.6666, "step": 15670 }, { "epoch": 3.8886477772955548, "grad_norm": 0.8768512606620789, "learning_rate": 0.00013420201433256689, "loss": 1.6598, "step": 15680 }, { "epoch": 3.8911277822555643, "grad_norm": 0.8463048338890076, "learning_rate": 0.00013412878641250257, "loss": 1.6111, "step": 15690 }, { "epoch": 3.8936077872155743, "grad_norm": 0.9186829328536987, "learning_rate": 0.00013405553777292626, "loss": 1.6806, "step": 15700 }, { "epoch": 3.8960877921755843, "grad_norm": 0.8851925730705261, "learning_rate": 0.0001339822684583071, "loss": 1.6311, "step": 15710 }, { "epoch": 3.8985677971355943, "grad_norm": 0.9072033762931824, "learning_rate": 0.00013390897851312667, "loss": 1.6812, "step": 15720 }, { "epoch": 3.9010478020956043, "grad_norm": 0.8682987093925476, "learning_rate": 0.00013383566798187916, "loss": 1.7077, "step": 15730 }, { "epoch": 3.903527807055614, "grad_norm": 0.9126836657524109, "learning_rate": 0.00013376233690907125, "loss": 1.6453, "step": 15740 }, { "epoch": 3.9060078120156243, "grad_norm": 0.8302199840545654, "learning_rate": 0.000133688985339222, "loss": 1.6918, "step": 15750 }, { "epoch": 3.908487816975634, "grad_norm": 0.8872604966163635, "learning_rate": 0.0001336156133168631, "loss": 1.6749, "step": 15760 }, { "epoch": 3.910967821935644, "grad_norm": 0.8368204832077026, "learning_rate": 0.00013354222088653847, "loss": 1.6291, "step": 15770 }, { "epoch": 3.913447826895654, "grad_norm": 0.8654637336730957, "learning_rate": 0.0001334688080928045, "loss": 1.6324, "step": 15780 }, { "epoch": 3.915927831855664, "grad_norm": 0.8717195391654968, "learning_rate": 0.00013339537498022996, "loss": 1.6609, "step": 15790 }, { "epoch": 3.918407836815674, "grad_norm": 1.0029350519180298, "learning_rate": 0.00013332192159339594, "loss": 1.5961, "step": 15800 }, { "epoch": 3.9208878417756834, "grad_norm": 0.8780935406684875, "learning_rate": 0.0001332484479768958, "loss": 1.6268, "step": 15810 }, { "epoch": 3.9233678467356934, "grad_norm": 0.8545321822166443, "learning_rate": 0.00013317495417533524, "loss": 1.6405, "step": 15820 }, { "epoch": 3.9258478516957034, "grad_norm": 0.8589227199554443, "learning_rate": 0.00013310144023333213, "loss": 1.6694, "step": 15830 }, { "epoch": 3.9283278566557134, "grad_norm": 0.820460855960846, "learning_rate": 0.00013302790619551674, "loss": 1.6752, "step": 15840 }, { "epoch": 3.9308078616157234, "grad_norm": 0.9134910106658936, "learning_rate": 0.0001329543521065313, "loss": 1.6383, "step": 15850 }, { "epoch": 3.933287866575733, "grad_norm": 0.8335714340209961, "learning_rate": 0.0001328807780110304, "loss": 1.6459, "step": 15860 }, { "epoch": 3.935767871535743, "grad_norm": 0.8406058549880981, "learning_rate": 0.00013280718395368072, "loss": 1.6956, "step": 15870 }, { "epoch": 3.938247876495753, "grad_norm": 0.7940702438354492, "learning_rate": 0.00013273356997916104, "loss": 1.6769, "step": 15880 }, { "epoch": 3.940727881455763, "grad_norm": 0.9058804512023926, "learning_rate": 0.00013265993613216224, "loss": 1.6029, "step": 15890 }, { "epoch": 3.943207886415773, "grad_norm": 0.8984003663063049, "learning_rate": 0.00013258628245738725, "loss": 1.6405, "step": 15900 }, { "epoch": 3.945687891375783, "grad_norm": 0.8421421051025391, "learning_rate": 0.0001325126089995511, "loss": 1.6769, "step": 15910 }, { "epoch": 3.948167896335793, "grad_norm": 0.9049249887466431, "learning_rate": 0.00013243891580338072, "loss": 1.7335, "step": 15920 }, { "epoch": 3.9506479012958025, "grad_norm": 0.9383307099342346, "learning_rate": 0.00013236520291361515, "loss": 1.6907, "step": 15930 }, { "epoch": 3.9531279062558125, "grad_norm": 0.8742726445198059, "learning_rate": 0.00013229147037500534, "loss": 1.688, "step": 15940 }, { "epoch": 3.9556079112158224, "grad_norm": 0.8476269245147705, "learning_rate": 0.00013221771823231405, "loss": 1.6047, "step": 15950 }, { "epoch": 3.9580879161758324, "grad_norm": 0.9126971364021301, "learning_rate": 0.00013214394653031616, "loss": 1.6945, "step": 15960 }, { "epoch": 3.9605679211358424, "grad_norm": 0.9117973446846008, "learning_rate": 0.00013207015531379828, "loss": 1.6821, "step": 15970 }, { "epoch": 3.963047926095852, "grad_norm": 0.8037660121917725, "learning_rate": 0.00013199634462755886, "loss": 1.5992, "step": 15980 }, { "epoch": 3.965527931055862, "grad_norm": 0.8710912466049194, "learning_rate": 0.0001319225145164082, "loss": 1.6744, "step": 15990 }, { "epoch": 3.968007936015872, "grad_norm": 0.8851284980773926, "learning_rate": 0.00013184866502516845, "loss": 1.6502, "step": 16000 }, { "epoch": 3.970487940975882, "grad_norm": 0.8333200216293335, "learning_rate": 0.00013177479619867345, "loss": 1.6874, "step": 16010 }, { "epoch": 3.972967945935892, "grad_norm": 0.8478822708129883, "learning_rate": 0.00013170090808176883, "loss": 1.6114, "step": 16020 }, { "epoch": 3.9754479508959015, "grad_norm": 0.8424682021141052, "learning_rate": 0.00013162700071931184, "loss": 1.6576, "step": 16030 }, { "epoch": 3.977927955855912, "grad_norm": 0.8300601840019226, "learning_rate": 0.00013155307415617154, "loss": 1.5744, "step": 16040 }, { "epoch": 3.9804079608159215, "grad_norm": 0.8115140199661255, "learning_rate": 0.0001314791284372286, "loss": 1.6163, "step": 16050 }, { "epoch": 3.9828879657759315, "grad_norm": 0.895980715751648, "learning_rate": 0.00013140516360737523, "loss": 1.5989, "step": 16060 }, { "epoch": 3.9853679707359415, "grad_norm": 0.879489004611969, "learning_rate": 0.00013133117971151533, "loss": 1.6623, "step": 16070 }, { "epoch": 3.9878479756959515, "grad_norm": 0.8440234661102295, "learning_rate": 0.00013125717679456447, "loss": 1.7006, "step": 16080 }, { "epoch": 3.9903279806559615, "grad_norm": 0.8498426675796509, "learning_rate": 0.00013118315490144954, "loss": 1.6202, "step": 16090 }, { "epoch": 3.992807985615971, "grad_norm": 0.9186023473739624, "learning_rate": 0.00013110911407710908, "loss": 1.6572, "step": 16100 }, { "epoch": 3.995287990575981, "grad_norm": 0.858972430229187, "learning_rate": 0.00013103505436649318, "loss": 1.6199, "step": 16110 }, { "epoch": 3.997767995535991, "grad_norm": 0.9124534130096436, "learning_rate": 0.0001309609758145633, "loss": 1.6951, "step": 16120 }, { "epoch": 4.000248000496001, "grad_norm": 0.9108834862709045, "learning_rate": 0.00013088687846629234, "loss": 1.6633, "step": 16130 }, { "epoch": 4.002728005456011, "grad_norm": 0.8702978491783142, "learning_rate": 0.00013081276236666468, "loss": 1.5845, "step": 16140 }, { "epoch": 4.005208010416021, "grad_norm": 0.8718990683555603, "learning_rate": 0.00013073862756067604, "loss": 1.562, "step": 16150 }, { "epoch": 4.007688015376031, "grad_norm": 0.8283565044403076, "learning_rate": 0.00013066447409333345, "loss": 1.5418, "step": 16160 }, { "epoch": 4.010168020336041, "grad_norm": 0.833257794380188, "learning_rate": 0.00013059030200965536, "loss": 1.5805, "step": 16170 }, { "epoch": 4.012648025296051, "grad_norm": 0.8424034714698792, "learning_rate": 0.00013051611135467145, "loss": 1.5345, "step": 16180 }, { "epoch": 4.015128030256061, "grad_norm": 0.9445743560791016, "learning_rate": 0.00013044190217342277, "loss": 1.5442, "step": 16190 }, { "epoch": 4.01760803521607, "grad_norm": 0.9044946432113647, "learning_rate": 0.00013036767451096148, "loss": 1.5643, "step": 16200 }, { "epoch": 4.020088040176081, "grad_norm": 0.8936172127723694, "learning_rate": 0.00013029342841235105, "loss": 1.5514, "step": 16210 }, { "epoch": 4.02256804513609, "grad_norm": 0.8858177661895752, "learning_rate": 0.00013021916392266618, "loss": 1.5782, "step": 16220 }, { "epoch": 4.025048050096101, "grad_norm": 0.8533552885055542, "learning_rate": 0.0001301448810869926, "loss": 1.5399, "step": 16230 }, { "epoch": 4.02752805505611, "grad_norm": 0.8473833203315735, "learning_rate": 0.00013007057995042732, "loss": 1.54, "step": 16240 }, { "epoch": 4.03000806001612, "grad_norm": 0.972991943359375, "learning_rate": 0.00012999626055807838, "loss": 1.555, "step": 16250 }, { "epoch": 4.03248806497613, "grad_norm": 0.9364944100379944, "learning_rate": 0.00012992192295506488, "loss": 1.5752, "step": 16260 }, { "epoch": 4.03496806993614, "grad_norm": 0.852487325668335, "learning_rate": 0.00012984756718651704, "loss": 1.5312, "step": 16270 }, { "epoch": 4.03744807489615, "grad_norm": 0.9019510746002197, "learning_rate": 0.00012977319329757615, "loss": 1.5375, "step": 16280 }, { "epoch": 4.03992807985616, "grad_norm": 0.8527230024337769, "learning_rate": 0.00012969880133339437, "loss": 1.5458, "step": 16290 }, { "epoch": 4.042408084816169, "grad_norm": 0.9539262056350708, "learning_rate": 0.00012962439133913488, "loss": 1.5066, "step": 16300 }, { "epoch": 4.04488808977618, "grad_norm": 0.9539197087287903, "learning_rate": 0.0001295499633599719, "loss": 1.5595, "step": 16310 }, { "epoch": 4.047368094736189, "grad_norm": 0.913061261177063, "learning_rate": 0.00012947551744109043, "loss": 1.5516, "step": 16320 }, { "epoch": 4.0498480996962, "grad_norm": 0.9694399833679199, "learning_rate": 0.00012940105362768643, "loss": 1.5797, "step": 16330 }, { "epoch": 4.052328104656209, "grad_norm": 0.8864725232124329, "learning_rate": 0.0001293265719649668, "loss": 1.5784, "step": 16340 }, { "epoch": 4.05480810961622, "grad_norm": 0.9271555542945862, "learning_rate": 0.0001292520724981491, "loss": 1.5129, "step": 16350 }, { "epoch": 4.057288114576229, "grad_norm": 0.9082526564598083, "learning_rate": 0.00012917755527246179, "loss": 1.5535, "step": 16360 }, { "epoch": 4.059768119536239, "grad_norm": 0.8983141183853149, "learning_rate": 0.0001291030203331442, "loss": 1.4634, "step": 16370 }, { "epoch": 4.062248124496249, "grad_norm": 0.8280670046806335, "learning_rate": 0.00012902846772544624, "loss": 1.5698, "step": 16380 }, { "epoch": 4.064728129456259, "grad_norm": 0.8899021148681641, "learning_rate": 0.00012895389749462873, "loss": 1.576, "step": 16390 }, { "epoch": 4.067208134416269, "grad_norm": 0.9653387665748596, "learning_rate": 0.000128879309685963, "loss": 1.5106, "step": 16400 }, { "epoch": 4.069688139376279, "grad_norm": 0.8746720552444458, "learning_rate": 0.00012880470434473116, "loss": 1.6016, "step": 16410 }, { "epoch": 4.072168144336288, "grad_norm": 0.9301053285598755, "learning_rate": 0.00012873008151622606, "loss": 1.6109, "step": 16420 }, { "epoch": 4.074648149296299, "grad_norm": 0.8845133781433105, "learning_rate": 0.00012865544124575096, "loss": 1.5611, "step": 16430 }, { "epoch": 4.077128154256308, "grad_norm": 0.9510053396224976, "learning_rate": 0.0001285807835786198, "loss": 1.5434, "step": 16440 }, { "epoch": 4.079608159216319, "grad_norm": 0.8830054998397827, "learning_rate": 0.0001285061085601571, "loss": 1.5926, "step": 16450 }, { "epoch": 4.082088164176328, "grad_norm": 0.855392336845398, "learning_rate": 0.00012843141623569793, "loss": 1.61, "step": 16460 }, { "epoch": 4.084568169136339, "grad_norm": 0.8734385967254639, "learning_rate": 0.00012835670665058778, "loss": 1.6126, "step": 16470 }, { "epoch": 4.087048174096348, "grad_norm": 0.8623180985450745, "learning_rate": 0.00012828197985018276, "loss": 1.5737, "step": 16480 }, { "epoch": 4.089528179056358, "grad_norm": 0.8639189600944519, "learning_rate": 0.0001282072358798493, "loss": 1.598, "step": 16490 }, { "epoch": 4.092008184016368, "grad_norm": 0.8384979963302612, "learning_rate": 0.00012813247478496429, "loss": 1.5695, "step": 16500 }, { "epoch": 4.094488188976378, "grad_norm": 0.9530404210090637, "learning_rate": 0.00012805769661091505, "loss": 1.5871, "step": 16510 }, { "epoch": 4.096968193936388, "grad_norm": 1.038830280303955, "learning_rate": 0.00012798290140309923, "loss": 1.5774, "step": 16520 }, { "epoch": 4.099448198896398, "grad_norm": 0.9369829893112183, "learning_rate": 0.00012790808920692484, "loss": 1.5159, "step": 16530 }, { "epoch": 4.101928203856407, "grad_norm": 0.9211897253990173, "learning_rate": 0.00012783326006781022, "loss": 1.5691, "step": 16540 }, { "epoch": 4.104408208816418, "grad_norm": 0.9339462518692017, "learning_rate": 0.000127758414031184, "loss": 1.578, "step": 16550 }, { "epoch": 4.106888213776427, "grad_norm": 0.9960739612579346, "learning_rate": 0.00012768355114248494, "loss": 1.5432, "step": 16560 }, { "epoch": 4.109368218736438, "grad_norm": 0.8537956476211548, "learning_rate": 0.00012760867144716228, "loss": 1.5483, "step": 16570 }, { "epoch": 4.111848223696447, "grad_norm": 0.883642852306366, "learning_rate": 0.00012753377499067522, "loss": 1.5365, "step": 16580 }, { "epoch": 4.114328228656458, "grad_norm": 0.9451387524604797, "learning_rate": 0.00012745886181849325, "loss": 1.6028, "step": 16590 }, { "epoch": 4.116808233616467, "grad_norm": 0.9538312554359436, "learning_rate": 0.00012738393197609602, "loss": 1.6162, "step": 16600 }, { "epoch": 4.119288238576477, "grad_norm": 0.8514898419380188, "learning_rate": 0.00012730898550897325, "loss": 1.5132, "step": 16610 }, { "epoch": 4.121768243536487, "grad_norm": 0.9028187394142151, "learning_rate": 0.00012723402246262483, "loss": 1.5294, "step": 16620 }, { "epoch": 4.124248248496497, "grad_norm": 0.8952030539512634, "learning_rate": 0.0001271590428825606, "loss": 1.5304, "step": 16630 }, { "epoch": 4.126728253456507, "grad_norm": 0.9488352537155151, "learning_rate": 0.00012708404681430053, "loss": 1.5866, "step": 16640 }, { "epoch": 4.129208258416517, "grad_norm": 0.9322410821914673, "learning_rate": 0.00012700903430337457, "loss": 1.5799, "step": 16650 }, { "epoch": 4.131688263376526, "grad_norm": 0.9060160517692566, "learning_rate": 0.00012693400539532263, "loss": 1.6343, "step": 16660 }, { "epoch": 4.134168268336537, "grad_norm": 0.9226081967353821, "learning_rate": 0.0001268589601356946, "loss": 1.5547, "step": 16670 }, { "epoch": 4.136648273296546, "grad_norm": 0.9264705181121826, "learning_rate": 0.00012678389857005034, "loss": 1.6106, "step": 16680 }, { "epoch": 4.139128278256557, "grad_norm": 0.9333235621452332, "learning_rate": 0.0001267088207439595, "loss": 1.5993, "step": 16690 }, { "epoch": 4.141608283216566, "grad_norm": 0.9364868998527527, "learning_rate": 0.0001266337267030017, "loss": 1.5662, "step": 16700 }, { "epoch": 4.144088288176576, "grad_norm": 0.8981188535690308, "learning_rate": 0.00012655861649276635, "loss": 1.5942, "step": 16710 }, { "epoch": 4.146568293136586, "grad_norm": 0.916425347328186, "learning_rate": 0.00012648349015885273, "loss": 1.6121, "step": 16720 }, { "epoch": 4.149048298096596, "grad_norm": 0.8678816556930542, "learning_rate": 0.00012640834774686985, "loss": 1.521, "step": 16730 }, { "epoch": 4.151528303056606, "grad_norm": 0.8749919533729553, "learning_rate": 0.00012633318930243648, "loss": 1.5951, "step": 16740 }, { "epoch": 4.154008308016616, "grad_norm": 0.8698053956031799, "learning_rate": 0.00012625801487118121, "loss": 1.5455, "step": 16750 }, { "epoch": 4.156488312976626, "grad_norm": 0.9387723803520203, "learning_rate": 0.00012618282449874222, "loss": 1.5829, "step": 16760 }, { "epoch": 4.158968317936636, "grad_norm": 0.8954786062240601, "learning_rate": 0.00012610761823076745, "loss": 1.5551, "step": 16770 }, { "epoch": 4.1614483228966455, "grad_norm": 0.9120689630508423, "learning_rate": 0.00012603239611291443, "loss": 1.6123, "step": 16780 }, { "epoch": 4.163928327856656, "grad_norm": 0.9066893458366394, "learning_rate": 0.0001259571581908504, "loss": 1.6861, "step": 16790 }, { "epoch": 4.1664083328166654, "grad_norm": 0.9290622472763062, "learning_rate": 0.00012588190451025207, "loss": 1.5792, "step": 16800 }, { "epoch": 4.168888337776676, "grad_norm": 0.9743390679359436, "learning_rate": 0.00012580663511680586, "loss": 1.5547, "step": 16810 }, { "epoch": 4.171368342736685, "grad_norm": 0.8968302607536316, "learning_rate": 0.00012573135005620757, "loss": 1.5424, "step": 16820 }, { "epoch": 4.173848347696695, "grad_norm": 0.8571000099182129, "learning_rate": 0.00012565604937416267, "loss": 1.5457, "step": 16830 }, { "epoch": 4.176328352656705, "grad_norm": 0.9518805146217346, "learning_rate": 0.00012558073311638604, "loss": 1.5602, "step": 16840 }, { "epoch": 4.178808357616715, "grad_norm": 1.0199187994003296, "learning_rate": 0.0001255054013286019, "loss": 1.5634, "step": 16850 }, { "epoch": 4.181288362576725, "grad_norm": 0.9404400587081909, "learning_rate": 0.00012543005405654418, "loss": 1.5479, "step": 16860 }, { "epoch": 4.183768367536735, "grad_norm": 0.9390067458152771, "learning_rate": 0.00012535469134595595, "loss": 1.5106, "step": 16870 }, { "epoch": 4.186248372496745, "grad_norm": 0.9579924941062927, "learning_rate": 0.00012527931324258975, "loss": 1.5743, "step": 16880 }, { "epoch": 4.188728377456755, "grad_norm": 0.9165119528770447, "learning_rate": 0.0001252039197922075, "loss": 1.5944, "step": 16890 }, { "epoch": 4.1912083824167645, "grad_norm": 0.9501695036888123, "learning_rate": 0.00012512851104058036, "loss": 1.6268, "step": 16900 }, { "epoch": 4.193688387376775, "grad_norm": 0.8611862659454346, "learning_rate": 0.00012505308703348884, "loss": 1.5173, "step": 16910 }, { "epoch": 4.1961683923367845, "grad_norm": 0.9533225893974304, "learning_rate": 0.0001249776478167227, "loss": 1.5802, "step": 16920 }, { "epoch": 4.198648397296795, "grad_norm": 0.8947030305862427, "learning_rate": 0.0001249021934360809, "loss": 1.5093, "step": 16930 }, { "epoch": 4.2011284022568045, "grad_norm": 0.886284351348877, "learning_rate": 0.00012482672393737164, "loss": 1.5132, "step": 16940 }, { "epoch": 4.203608407216814, "grad_norm": 0.9174060225486755, "learning_rate": 0.00012475123936641228, "loss": 1.5967, "step": 16950 }, { "epoch": 4.2060884121768245, "grad_norm": 0.9190420508384705, "learning_rate": 0.00012467573976902935, "loss": 1.5946, "step": 16960 }, { "epoch": 4.208568417136834, "grad_norm": 0.8581568598747253, "learning_rate": 0.00012460022519105853, "loss": 1.5895, "step": 16970 }, { "epoch": 4.2110484220968445, "grad_norm": 0.9697762727737427, "learning_rate": 0.00012452469567834448, "loss": 1.5264, "step": 16980 }, { "epoch": 4.213528427056854, "grad_norm": 0.8641528487205505, "learning_rate": 0.00012444915127674103, "loss": 1.5314, "step": 16990 }, { "epoch": 4.216008432016864, "grad_norm": 0.8917909264564514, "learning_rate": 0.00012437359203211108, "loss": 1.5553, "step": 17000 }, { "epoch": 4.218488436976874, "grad_norm": 0.8812795877456665, "learning_rate": 0.0001242980179903264, "loss": 1.5835, "step": 17010 }, { "epoch": 4.220968441936884, "grad_norm": 0.9558031558990479, "learning_rate": 0.00012422242919726785, "loss": 1.6167, "step": 17020 }, { "epoch": 4.223448446896894, "grad_norm": 0.9204807877540588, "learning_rate": 0.00012414682569882526, "loss": 1.5671, "step": 17030 }, { "epoch": 4.225928451856904, "grad_norm": 0.9132686257362366, "learning_rate": 0.00012407120754089732, "loss": 1.5554, "step": 17040 }, { "epoch": 4.228408456816914, "grad_norm": 0.823129415512085, "learning_rate": 0.00012399557476939162, "loss": 1.604, "step": 17050 }, { "epoch": 4.230888461776924, "grad_norm": 0.889136552810669, "learning_rate": 0.00012391992743022472, "loss": 1.5277, "step": 17060 }, { "epoch": 4.233368466736933, "grad_norm": 0.9668729901313782, "learning_rate": 0.0001238442655693219, "loss": 1.5865, "step": 17070 }, { "epoch": 4.235848471696944, "grad_norm": 0.8794295191764832, "learning_rate": 0.00012376858923261733, "loss": 1.6106, "step": 17080 }, { "epoch": 4.238328476656953, "grad_norm": 0.884594202041626, "learning_rate": 0.00012369289846605395, "loss": 1.65, "step": 17090 }, { "epoch": 4.2408084816169636, "grad_norm": 0.9115503430366516, "learning_rate": 0.00012361719331558345, "loss": 1.5805, "step": 17100 }, { "epoch": 4.243288486576973, "grad_norm": 0.9228629469871521, "learning_rate": 0.00012354147382716627, "loss": 1.5496, "step": 17110 }, { "epoch": 4.245768491536983, "grad_norm": 0.9323840737342834, "learning_rate": 0.00012346574004677154, "loss": 1.5651, "step": 17120 }, { "epoch": 4.248248496496993, "grad_norm": 0.9578195810317993, "learning_rate": 0.00012338999202037712, "loss": 1.5877, "step": 17130 }, { "epoch": 4.250728501457003, "grad_norm": 0.9142360687255859, "learning_rate": 0.00012331422979396935, "loss": 1.6024, "step": 17140 }, { "epoch": 4.253208506417013, "grad_norm": 0.8800964951515198, "learning_rate": 0.0001232384534135435, "loss": 1.5427, "step": 17150 }, { "epoch": 4.255688511377023, "grad_norm": 0.9429461359977722, "learning_rate": 0.00012316266292510306, "loss": 1.5537, "step": 17160 }, { "epoch": 4.258168516337033, "grad_norm": 0.9197307229042053, "learning_rate": 0.00012308685837466037, "loss": 1.5751, "step": 17170 }, { "epoch": 4.260648521297043, "grad_norm": 0.936857283115387, "learning_rate": 0.00012301103980823618, "loss": 1.5294, "step": 17180 }, { "epoch": 4.263128526257052, "grad_norm": 0.8872460126876831, "learning_rate": 0.0001229352072718598, "loss": 1.5525, "step": 17190 }, { "epoch": 4.265608531217063, "grad_norm": 0.9296783804893494, "learning_rate": 0.00012285936081156897, "loss": 1.6134, "step": 17200 }, { "epoch": 4.268088536177072, "grad_norm": 0.9605028629302979, "learning_rate": 0.00012278350047340993, "loss": 1.5943, "step": 17210 }, { "epoch": 4.270568541137083, "grad_norm": 0.9095436930656433, "learning_rate": 0.00012270762630343734, "loss": 1.5653, "step": 17220 }, { "epoch": 4.273048546097092, "grad_norm": 0.8624875545501709, "learning_rate": 0.00012263173834771418, "loss": 1.5652, "step": 17230 }, { "epoch": 4.275528551057102, "grad_norm": 0.8745260238647461, "learning_rate": 0.00012255583665231196, "loss": 1.5874, "step": 17240 }, { "epoch": 4.278008556017112, "grad_norm": 0.9086858034133911, "learning_rate": 0.00012247992126331034, "loss": 1.6238, "step": 17250 }, { "epoch": 4.280488560977122, "grad_norm": 0.8863484859466553, "learning_rate": 0.00012240399222679746, "loss": 1.5528, "step": 17260 }, { "epoch": 4.282968565937132, "grad_norm": 0.9518700838088989, "learning_rate": 0.00012232804958886963, "loss": 1.6185, "step": 17270 }, { "epoch": 4.285448570897142, "grad_norm": 0.9303643107414246, "learning_rate": 0.00012225209339563145, "loss": 1.5571, "step": 17280 }, { "epoch": 4.287928575857151, "grad_norm": 0.8627997636795044, "learning_rate": 0.00012217612369319579, "loss": 1.5377, "step": 17290 }, { "epoch": 4.290408580817162, "grad_norm": 0.9821747541427612, "learning_rate": 0.00012210014052768371, "loss": 1.5882, "step": 17300 }, { "epoch": 4.292888585777171, "grad_norm": 0.9349398612976074, "learning_rate": 0.00012202414394522436, "loss": 1.5169, "step": 17310 }, { "epoch": 4.295368590737182, "grad_norm": 0.8663684725761414, "learning_rate": 0.00012194813399195518, "loss": 1.5934, "step": 17320 }, { "epoch": 4.297848595697191, "grad_norm": 0.9309785962104797, "learning_rate": 0.00012187211071402154, "loss": 1.578, "step": 17330 }, { "epoch": 4.300328600657202, "grad_norm": 0.9538763165473938, "learning_rate": 0.0001217960741575771, "loss": 1.593, "step": 17340 }, { "epoch": 4.302808605617211, "grad_norm": 0.8893478512763977, "learning_rate": 0.00012172002436878347, "loss": 1.5809, "step": 17350 }, { "epoch": 4.305288610577221, "grad_norm": 0.9751872420310974, "learning_rate": 0.00012164396139381029, "loss": 1.6679, "step": 17360 }, { "epoch": 4.307768615537231, "grad_norm": 0.908886730670929, "learning_rate": 0.00012156788527883522, "loss": 1.5627, "step": 17370 }, { "epoch": 4.310248620497241, "grad_norm": 0.8988890051841736, "learning_rate": 0.00012149179607004396, "loss": 1.6336, "step": 17380 }, { "epoch": 4.312728625457251, "grad_norm": 0.936722993850708, "learning_rate": 0.00012141569381363004, "loss": 1.6238, "step": 17390 }, { "epoch": 4.315208630417261, "grad_norm": 0.9053364992141724, "learning_rate": 0.00012133957855579501, "loss": 1.585, "step": 17400 }, { "epoch": 4.317688635377271, "grad_norm": 0.8379243016242981, "learning_rate": 0.00012126345034274824, "loss": 1.6305, "step": 17410 }, { "epoch": 4.320168640337281, "grad_norm": 1.0416642427444458, "learning_rate": 0.00012118730922070707, "loss": 1.5581, "step": 17420 }, { "epoch": 4.32264864529729, "grad_norm": 0.9125607013702393, "learning_rate": 0.00012111115523589651, "loss": 1.5019, "step": 17430 }, { "epoch": 4.325128650257301, "grad_norm": 0.9234462976455688, "learning_rate": 0.00012103498843454959, "loss": 1.5838, "step": 17440 }, { "epoch": 4.32760865521731, "grad_norm": 0.9076301455497742, "learning_rate": 0.00012095880886290691, "loss": 1.6045, "step": 17450 }, { "epoch": 4.330088660177321, "grad_norm": 0.8853614330291748, "learning_rate": 0.000120882616567217, "loss": 1.6001, "step": 17460 }, { "epoch": 4.33256866513733, "grad_norm": 0.9661962389945984, "learning_rate": 0.00012080641159373598, "loss": 1.6082, "step": 17470 }, { "epoch": 4.33504867009734, "grad_norm": 0.9738268852233887, "learning_rate": 0.00012073019398872778, "loss": 1.6306, "step": 17480 }, { "epoch": 4.33752867505735, "grad_norm": 0.9750413298606873, "learning_rate": 0.00012065396379846387, "loss": 1.5809, "step": 17490 }, { "epoch": 4.34000868001736, "grad_norm": 0.9576679468154907, "learning_rate": 0.00012057772106922349, "loss": 1.5498, "step": 17500 }, { "epoch": 4.34248868497737, "grad_norm": 0.9164273738861084, "learning_rate": 0.00012050146584729343, "loss": 1.6447, "step": 17510 }, { "epoch": 4.34496868993738, "grad_norm": 0.9457810521125793, "learning_rate": 0.00012042519817896804, "loss": 1.615, "step": 17520 }, { "epoch": 4.347448694897389, "grad_norm": 0.925794243812561, "learning_rate": 0.00012034891811054929, "loss": 1.5567, "step": 17530 }, { "epoch": 4.3499286998574, "grad_norm": 0.8849574327468872, "learning_rate": 0.00012027262568834658, "loss": 1.5638, "step": 17540 }, { "epoch": 4.352408704817409, "grad_norm": 0.851435661315918, "learning_rate": 0.00012019632095867697, "loss": 1.5958, "step": 17550 }, { "epoch": 4.35488870977742, "grad_norm": 0.9845481514930725, "learning_rate": 0.00012012000396786485, "loss": 1.6096, "step": 17560 }, { "epoch": 4.357368714737429, "grad_norm": 0.9225103259086609, "learning_rate": 0.00012004367476224206, "loss": 1.5877, "step": 17570 }, { "epoch": 4.359848719697439, "grad_norm": 0.8676236867904663, "learning_rate": 0.00011996733338814794, "loss": 1.5761, "step": 17580 }, { "epoch": 4.362328724657449, "grad_norm": 1.0130139589309692, "learning_rate": 0.00011989097989192917, "loss": 1.6344, "step": 17590 }, { "epoch": 4.364808729617459, "grad_norm": 0.9105481505393982, "learning_rate": 0.00011981461431993977, "loss": 1.5979, "step": 17600 }, { "epoch": 4.367288734577469, "grad_norm": 0.8567654490470886, "learning_rate": 0.00011973823671854112, "loss": 1.5648, "step": 17610 }, { "epoch": 4.369768739537479, "grad_norm": 0.8724489808082581, "learning_rate": 0.00011966184713410191, "loss": 1.5438, "step": 17620 }, { "epoch": 4.372248744497489, "grad_norm": 0.9080607891082764, "learning_rate": 0.00011958544561299807, "loss": 1.5705, "step": 17630 }, { "epoch": 4.374728749457499, "grad_norm": 0.9310619831085205, "learning_rate": 0.00011950903220161285, "loss": 1.5964, "step": 17640 }, { "epoch": 4.3772087544175085, "grad_norm": 0.9245249032974243, "learning_rate": 0.0001194326069463366, "loss": 1.6202, "step": 17650 }, { "epoch": 4.379688759377519, "grad_norm": 0.8819152116775513, "learning_rate": 0.00011935616989356692, "loss": 1.5045, "step": 17660 }, { "epoch": 4.382168764337528, "grad_norm": 0.9895660877227783, "learning_rate": 0.00011927972108970864, "loss": 1.5295, "step": 17670 }, { "epoch": 4.384648769297539, "grad_norm": 0.923691987991333, "learning_rate": 0.00011920326058117364, "loss": 1.6531, "step": 17680 }, { "epoch": 4.387128774257548, "grad_norm": 0.9826307892799377, "learning_rate": 0.00011912678841438087, "loss": 1.6019, "step": 17690 }, { "epoch": 4.389608779217559, "grad_norm": 0.8674072027206421, "learning_rate": 0.0001190503046357565, "loss": 1.5409, "step": 17700 }, { "epoch": 4.392088784177568, "grad_norm": 0.9578332304954529, "learning_rate": 0.00011897380929173364, "loss": 1.5687, "step": 17710 }, { "epoch": 4.394568789137578, "grad_norm": 0.9147858619689941, "learning_rate": 0.00011889730242875243, "loss": 1.5923, "step": 17720 }, { "epoch": 4.397048794097588, "grad_norm": 0.8567659854888916, "learning_rate": 0.00011882078409326002, "loss": 1.5652, "step": 17730 }, { "epoch": 4.399528799057598, "grad_norm": 0.9948070645332336, "learning_rate": 0.00011874425433171055, "loss": 1.6276, "step": 17740 }, { "epoch": 4.402008804017608, "grad_norm": 0.9563866257667542, "learning_rate": 0.00011866771319056504, "loss": 1.5963, "step": 17750 }, { "epoch": 4.404488808977618, "grad_norm": 0.8760507702827454, "learning_rate": 0.00011859116071629149, "loss": 1.6185, "step": 17760 }, { "epoch": 4.4069688139376275, "grad_norm": 0.9775795340538025, "learning_rate": 0.00011851459695536467, "loss": 1.6253, "step": 17770 }, { "epoch": 4.409448818897638, "grad_norm": 0.9185107350349426, "learning_rate": 0.00011843802195426635, "loss": 1.6111, "step": 17780 }, { "epoch": 4.4119288238576475, "grad_norm": 0.8888441920280457, "learning_rate": 0.00011836143575948501, "loss": 1.5946, "step": 17790 }, { "epoch": 4.414408828817658, "grad_norm": 0.976132333278656, "learning_rate": 0.00011828483841751596, "loss": 1.6458, "step": 17800 }, { "epoch": 4.4168888337776675, "grad_norm": 0.8497588634490967, "learning_rate": 0.0001182082299748613, "loss": 1.5704, "step": 17810 }, { "epoch": 4.419368838737677, "grad_norm": 0.8618581295013428, "learning_rate": 0.00011813161047802985, "loss": 1.5761, "step": 17820 }, { "epoch": 4.4218488436976875, "grad_norm": 0.8814425468444824, "learning_rate": 0.0001180549799735371, "loss": 1.5556, "step": 17830 }, { "epoch": 4.424328848657697, "grad_norm": 0.9566665291786194, "learning_rate": 0.00011797833850790527, "loss": 1.6298, "step": 17840 }, { "epoch": 4.4268088536177075, "grad_norm": 0.9071756601333618, "learning_rate": 0.0001179016861276633, "loss": 1.5629, "step": 17850 }, { "epoch": 4.429288858577717, "grad_norm": 0.8935988545417786, "learning_rate": 0.00011782502287934657, "loss": 1.5592, "step": 17860 }, { "epoch": 4.4317688635377275, "grad_norm": 0.9023693799972534, "learning_rate": 0.00011774834880949728, "loss": 1.6003, "step": 17870 }, { "epoch": 4.434248868497737, "grad_norm": 0.90963214635849, "learning_rate": 0.00011767166396466403, "loss": 1.6143, "step": 17880 }, { "epoch": 4.436728873457747, "grad_norm": 0.9300664663314819, "learning_rate": 0.000117594968391402, "loss": 1.4829, "step": 17890 }, { "epoch": 4.439208878417757, "grad_norm": 0.9050344824790955, "learning_rate": 0.00011751826213627297, "loss": 1.5639, "step": 17900 }, { "epoch": 4.441688883377767, "grad_norm": 0.9257349371910095, "learning_rate": 0.0001174415452458451, "loss": 1.5882, "step": 17910 }, { "epoch": 4.444168888337777, "grad_norm": 0.8953958749771118, "learning_rate": 0.00011736481776669306, "loss": 1.4813, "step": 17920 }, { "epoch": 4.446648893297787, "grad_norm": 0.8810930848121643, "learning_rate": 0.00011728807974539793, "loss": 1.6231, "step": 17930 }, { "epoch": 4.449128898257796, "grad_norm": 0.8710618019104004, "learning_rate": 0.0001172113312285472, "loss": 1.5967, "step": 17940 }, { "epoch": 4.451608903217807, "grad_norm": 1.0707768201828003, "learning_rate": 0.00011713457226273472, "loss": 1.5689, "step": 17950 }, { "epoch": 4.454088908177816, "grad_norm": 1.0256376266479492, "learning_rate": 0.00011705780289456068, "loss": 1.5905, "step": 17960 }, { "epoch": 4.4565689131378265, "grad_norm": 0.9599533677101135, "learning_rate": 0.00011698102317063164, "loss": 1.6043, "step": 17970 }, { "epoch": 4.459048918097836, "grad_norm": 0.9416408538818359, "learning_rate": 0.00011690423313756035, "loss": 1.6063, "step": 17980 }, { "epoch": 4.4615289230578465, "grad_norm": 0.9655886888504028, "learning_rate": 0.00011682743284196593, "loss": 1.5265, "step": 17990 }, { "epoch": 4.464008928017856, "grad_norm": 0.9687894582748413, "learning_rate": 0.00011675062233047364, "loss": 1.633, "step": 18000 }, { "epoch": 4.466488932977866, "grad_norm": 0.8962520956993103, "learning_rate": 0.00011667380164971495, "loss": 1.5742, "step": 18010 }, { "epoch": 4.468968937937876, "grad_norm": 1.0037689208984375, "learning_rate": 0.00011659697084632759, "loss": 1.5413, "step": 18020 }, { "epoch": 4.471448942897886, "grad_norm": 0.8900139927864075, "learning_rate": 0.00011652012996695533, "loss": 1.5923, "step": 18030 }, { "epoch": 4.473928947857896, "grad_norm": 0.9598802924156189, "learning_rate": 0.00011644327905824808, "loss": 1.6259, "step": 18040 }, { "epoch": 4.476408952817906, "grad_norm": 0.8544650077819824, "learning_rate": 0.00011636641816686191, "loss": 1.5554, "step": 18050 }, { "epoch": 4.478888957777915, "grad_norm": 0.9171319007873535, "learning_rate": 0.0001162895473394589, "loss": 1.5885, "step": 18060 }, { "epoch": 4.481368962737926, "grad_norm": 0.8878941535949707, "learning_rate": 0.00011621266662270709, "loss": 1.6038, "step": 18070 }, { "epoch": 4.483848967697935, "grad_norm": 0.895150363445282, "learning_rate": 0.00011613577606328068, "loss": 1.5593, "step": 18080 }, { "epoch": 4.486328972657946, "grad_norm": 0.8921257257461548, "learning_rate": 0.00011605887570785972, "loss": 1.6331, "step": 18090 }, { "epoch": 4.488808977617955, "grad_norm": 0.8955435752868652, "learning_rate": 0.00011598196560313024, "loss": 1.6114, "step": 18100 }, { "epoch": 4.491288982577965, "grad_norm": 0.9963474273681641, "learning_rate": 0.00011590504579578423, "loss": 1.6417, "step": 18110 }, { "epoch": 4.493768987537975, "grad_norm": 1.062529444694519, "learning_rate": 0.0001158281163325195, "loss": 1.6089, "step": 18120 }, { "epoch": 4.496248992497985, "grad_norm": 1.002374291419983, "learning_rate": 0.00011575117726003979, "loss": 1.5972, "step": 18130 }, { "epoch": 4.498728997457995, "grad_norm": 0.9317733645439148, "learning_rate": 0.00011567422862505464, "loss": 1.6363, "step": 18140 }, { "epoch": 4.501209002418005, "grad_norm": 0.8911933302879333, "learning_rate": 0.00011559727047427938, "loss": 1.5452, "step": 18150 }, { "epoch": 4.503689007378015, "grad_norm": 1.041032075881958, "learning_rate": 0.00011552030285443515, "loss": 1.5243, "step": 18160 }, { "epoch": 4.506169012338025, "grad_norm": 0.9466549158096313, "learning_rate": 0.00011544332581224884, "loss": 1.5773, "step": 18170 }, { "epoch": 4.508649017298034, "grad_norm": 0.9585476517677307, "learning_rate": 0.000115366339394453, "loss": 1.5939, "step": 18180 }, { "epoch": 4.511129022258045, "grad_norm": 0.8418630361557007, "learning_rate": 0.00011528934364778598, "loss": 1.6288, "step": 18190 }, { "epoch": 4.513609027218054, "grad_norm": 0.8050907850265503, "learning_rate": 0.00011521233861899167, "loss": 1.6518, "step": 18200 }, { "epoch": 4.516089032178065, "grad_norm": 0.9089813232421875, "learning_rate": 0.00011513532435481968, "loss": 1.6014, "step": 18210 }, { "epoch": 4.518569037138074, "grad_norm": 0.9099536538124084, "learning_rate": 0.00011505830090202523, "loss": 1.5566, "step": 18220 }, { "epoch": 4.521049042098085, "grad_norm": 0.9340983033180237, "learning_rate": 0.00011498126830736904, "loss": 1.5523, "step": 18230 }, { "epoch": 4.523529047058094, "grad_norm": 0.8862547278404236, "learning_rate": 0.00011490422661761744, "loss": 1.575, "step": 18240 }, { "epoch": 4.526009052018104, "grad_norm": 0.9429985284805298, "learning_rate": 0.00011482717587954232, "loss": 1.6204, "step": 18250 }, { "epoch": 4.528489056978114, "grad_norm": 0.9843054413795471, "learning_rate": 0.00011475011613992097, "loss": 1.5703, "step": 18260 }, { "epoch": 4.530969061938124, "grad_norm": 0.9080626964569092, "learning_rate": 0.00011467304744553618, "loss": 1.5923, "step": 18270 }, { "epoch": 4.533449066898134, "grad_norm": 0.8902535438537598, "learning_rate": 0.00011459596984317622, "loss": 1.5964, "step": 18280 }, { "epoch": 4.535929071858144, "grad_norm": 0.9213718175888062, "learning_rate": 0.00011451888337963475, "loss": 1.5722, "step": 18290 }, { "epoch": 4.538409076818153, "grad_norm": 0.8145584464073181, "learning_rate": 0.00011444178810171073, "loss": 1.6199, "step": 18300 }, { "epoch": 4.540889081778164, "grad_norm": 0.9201323390007019, "learning_rate": 0.00011436468405620859, "loss": 1.5207, "step": 18310 }, { "epoch": 4.543369086738173, "grad_norm": 0.8831585645675659, "learning_rate": 0.00011428757128993802, "loss": 1.5228, "step": 18320 }, { "epoch": 4.545849091698184, "grad_norm": 0.8730006814002991, "learning_rate": 0.00011421044984971399, "loss": 1.583, "step": 18330 }, { "epoch": 4.548329096658193, "grad_norm": 0.9736560583114624, "learning_rate": 0.00011413331978235678, "loss": 1.5421, "step": 18340 }, { "epoch": 4.550809101618203, "grad_norm": 0.9348646402359009, "learning_rate": 0.00011405618113469185, "loss": 1.6008, "step": 18350 }, { "epoch": 4.553289106578213, "grad_norm": 0.9965288043022156, "learning_rate": 0.00011397903395354996, "loss": 1.5903, "step": 18360 }, { "epoch": 4.555769111538223, "grad_norm": 0.9236303567886353, "learning_rate": 0.00011390187828576697, "loss": 1.6122, "step": 18370 }, { "epoch": 4.558249116498233, "grad_norm": 0.8591942191123962, "learning_rate": 0.0001138247141781839, "loss": 1.5646, "step": 18380 }, { "epoch": 4.560729121458243, "grad_norm": 0.9395301342010498, "learning_rate": 0.00011374754167764696, "loss": 1.5958, "step": 18390 }, { "epoch": 4.563209126418252, "grad_norm": 0.9221425652503967, "learning_rate": 0.00011367036083100735, "loss": 1.5468, "step": 18400 }, { "epoch": 4.565689131378263, "grad_norm": 0.9344799518585205, "learning_rate": 0.00011359317168512142, "loss": 1.6367, "step": 18410 }, { "epoch": 4.568169136338272, "grad_norm": 1.4354673624038696, "learning_rate": 0.00011351597428685055, "loss": 1.5494, "step": 18420 }, { "epoch": 4.570649141298283, "grad_norm": 0.9043157696723938, "learning_rate": 0.00011343876868306109, "loss": 1.5759, "step": 18430 }, { "epoch": 4.573129146258292, "grad_norm": 1.0231481790542603, "learning_rate": 0.00011336155492062439, "loss": 1.5701, "step": 18440 }, { "epoch": 4.575609151218303, "grad_norm": 0.9277271032333374, "learning_rate": 0.00011328433304641679, "loss": 1.6045, "step": 18450 }, { "epoch": 4.578089156178312, "grad_norm": 0.9549906253814697, "learning_rate": 0.0001132071031073195, "loss": 1.5703, "step": 18460 }, { "epoch": 4.580569161138322, "grad_norm": 0.8800386190414429, "learning_rate": 0.00011312986515021862, "loss": 1.598, "step": 18470 }, { "epoch": 4.583049166098332, "grad_norm": 0.9046245217323303, "learning_rate": 0.00011305261922200519, "loss": 1.563, "step": 18480 }, { "epoch": 4.585529171058342, "grad_norm": 0.9251183867454529, "learning_rate": 0.000112975365369575, "loss": 1.5971, "step": 18490 }, { "epoch": 4.588009176018352, "grad_norm": 0.969687283039093, "learning_rate": 0.00011289810363982875, "loss": 1.6287, "step": 18500 }, { "epoch": 4.590489180978362, "grad_norm": 0.8724190592765808, "learning_rate": 0.00011282083407967183, "loss": 1.5623, "step": 18510 }, { "epoch": 4.592969185938372, "grad_norm": 0.8921563625335693, "learning_rate": 0.00011274355673601444, "loss": 1.5819, "step": 18520 }, { "epoch": 4.595449190898382, "grad_norm": 1.0023810863494873, "learning_rate": 0.00011266627165577148, "loss": 1.5978, "step": 18530 }, { "epoch": 4.597929195858391, "grad_norm": 0.9489151835441589, "learning_rate": 0.00011258897888586255, "loss": 1.5348, "step": 18540 }, { "epoch": 4.600409200818402, "grad_norm": 0.9250749945640564, "learning_rate": 0.00011251167847321193, "loss": 1.5839, "step": 18550 }, { "epoch": 4.602889205778411, "grad_norm": 0.9519440531730652, "learning_rate": 0.00011243437046474853, "loss": 1.6104, "step": 18560 }, { "epoch": 4.605369210738422, "grad_norm": 0.9788894653320312, "learning_rate": 0.00011235705490740589, "loss": 1.5947, "step": 18570 }, { "epoch": 4.607849215698431, "grad_norm": 0.8717751502990723, "learning_rate": 0.00011227973184812206, "loss": 1.5585, "step": 18580 }, { "epoch": 4.610329220658441, "grad_norm": 0.9002294540405273, "learning_rate": 0.00011220240133383974, "loss": 1.5863, "step": 18590 }, { "epoch": 4.612809225618451, "grad_norm": 0.9561557769775391, "learning_rate": 0.00011212506341150615, "loss": 1.6116, "step": 18600 }, { "epoch": 4.615289230578461, "grad_norm": 0.8865216374397278, "learning_rate": 0.00011204771812807295, "loss": 1.6194, "step": 18610 }, { "epoch": 4.617769235538471, "grad_norm": 0.9733191728591919, "learning_rate": 0.00011197036553049625, "loss": 1.6004, "step": 18620 }, { "epoch": 4.620249240498481, "grad_norm": 0.9483121633529663, "learning_rate": 0.0001118930056657367, "loss": 1.6101, "step": 18630 }, { "epoch": 4.6227292454584905, "grad_norm": 0.8942649364471436, "learning_rate": 0.0001118156385807593, "loss": 1.6269, "step": 18640 }, { "epoch": 4.625209250418501, "grad_norm": 0.9124876856803894, "learning_rate": 0.00011173826432253338, "loss": 1.6196, "step": 18650 }, { "epoch": 4.6276892553785105, "grad_norm": 0.9813550114631653, "learning_rate": 0.00011166088293803276, "loss": 1.6095, "step": 18660 }, { "epoch": 4.630169260338521, "grad_norm": 0.8427887558937073, "learning_rate": 0.00011158349447423549, "loss": 1.6013, "step": 18670 }, { "epoch": 4.6326492652985305, "grad_norm": 0.9709888696670532, "learning_rate": 0.00011150609897812387, "loss": 1.5761, "step": 18680 }, { "epoch": 4.63512927025854, "grad_norm": 0.9658144116401672, "learning_rate": 0.00011142869649668466, "loss": 1.5706, "step": 18690 }, { "epoch": 4.6376092752185505, "grad_norm": 0.920049786567688, "learning_rate": 0.0001113512870769086, "loss": 1.5534, "step": 18700 }, { "epoch": 4.64008928017856, "grad_norm": 0.9348146319389343, "learning_rate": 0.00011127387076579092, "loss": 1.6135, "step": 18710 }, { "epoch": 4.6425692851385705, "grad_norm": 1.042134404182434, "learning_rate": 0.00011119644761033078, "loss": 1.5355, "step": 18720 }, { "epoch": 4.64504929009858, "grad_norm": 0.9624781012535095, "learning_rate": 0.00011111901765753168, "loss": 1.5915, "step": 18730 }, { "epoch": 4.6475292950585905, "grad_norm": 0.9615873098373413, "learning_rate": 0.00011104158095440113, "loss": 1.5729, "step": 18740 }, { "epoch": 4.6500093000186, "grad_norm": 0.9324778318405151, "learning_rate": 0.00011096413754795083, "loss": 1.606, "step": 18750 }, { "epoch": 4.65248930497861, "grad_norm": 0.921046793460846, "learning_rate": 0.00011088668748519647, "loss": 1.6428, "step": 18760 }, { "epoch": 4.65496930993862, "grad_norm": 1.008867859840393, "learning_rate": 0.00011080923081315781, "loss": 1.6306, "step": 18770 }, { "epoch": 4.65744931489863, "grad_norm": 0.8651713132858276, "learning_rate": 0.00011073176757885866, "loss": 1.599, "step": 18780 }, { "epoch": 4.65992931985864, "grad_norm": 0.9174283742904663, "learning_rate": 0.00011065429782932673, "loss": 1.5038, "step": 18790 }, { "epoch": 4.66240932481865, "grad_norm": 0.8761575818061829, "learning_rate": 0.00011057682161159379, "loss": 1.5958, "step": 18800 }, { "epoch": 4.66488932977866, "grad_norm": 0.9216639399528503, "learning_rate": 0.00011049933897269547, "loss": 1.5961, "step": 18810 }, { "epoch": 4.6673693347386696, "grad_norm": 0.9265581965446472, "learning_rate": 0.00011042184995967127, "loss": 1.6, "step": 18820 }, { "epoch": 4.669849339698679, "grad_norm": 0.9563526511192322, "learning_rate": 0.00011034435461956466, "loss": 1.5949, "step": 18830 }, { "epoch": 4.6723293446586895, "grad_norm": 0.8865848183631897, "learning_rate": 0.00011026685299942285, "loss": 1.6383, "step": 18840 }, { "epoch": 4.674809349618699, "grad_norm": 0.955040693283081, "learning_rate": 0.00011018934514629693, "loss": 1.5657, "step": 18850 }, { "epoch": 4.6772893545787095, "grad_norm": 0.9501464366912842, "learning_rate": 0.00011011183110724172, "loss": 1.6361, "step": 18860 }, { "epoch": 4.679769359538719, "grad_norm": 0.9693083167076111, "learning_rate": 0.00011003431092931589, "loss": 1.5893, "step": 18870 }, { "epoch": 4.682249364498729, "grad_norm": 0.893928587436676, "learning_rate": 0.00010995678465958168, "loss": 1.5705, "step": 18880 }, { "epoch": 4.684729369458739, "grad_norm": 0.9571881890296936, "learning_rate": 0.00010987925234510519, "loss": 1.5811, "step": 18890 }, { "epoch": 4.687209374418749, "grad_norm": 0.9786219596862793, "learning_rate": 0.0001098017140329561, "loss": 1.5305, "step": 18900 }, { "epoch": 4.689689379378759, "grad_norm": 0.972812294960022, "learning_rate": 0.0001097241697702077, "loss": 1.5485, "step": 18910 }, { "epoch": 4.692169384338769, "grad_norm": 0.929424524307251, "learning_rate": 0.00010964661960393703, "loss": 1.57, "step": 18920 }, { "epoch": 4.694649389298778, "grad_norm": 0.9796428084373474, "learning_rate": 0.00010956906358122456, "loss": 1.5994, "step": 18930 }, { "epoch": 4.697129394258789, "grad_norm": 0.9514657258987427, "learning_rate": 0.00010949150174915442, "loss": 1.6026, "step": 18940 }, { "epoch": 4.699609399218798, "grad_norm": 0.9882819056510925, "learning_rate": 0.00010941393415481423, "loss": 1.6254, "step": 18950 }, { "epoch": 4.702089404178809, "grad_norm": 0.8934924006462097, "learning_rate": 0.00010933636084529506, "loss": 1.5688, "step": 18960 }, { "epoch": 4.704569409138818, "grad_norm": 0.904549241065979, "learning_rate": 0.00010925878186769158, "loss": 1.5813, "step": 18970 }, { "epoch": 4.707049414098828, "grad_norm": 0.9113224744796753, "learning_rate": 0.00010918119726910174, "loss": 1.6022, "step": 18980 }, { "epoch": 4.709529419058838, "grad_norm": 0.9225690364837646, "learning_rate": 0.00010910360709662701, "loss": 1.5954, "step": 18990 }, { "epoch": 4.712009424018848, "grad_norm": 0.8976655602455139, "learning_rate": 0.00010902601139737225, "loss": 1.5457, "step": 19000 }, { "epoch": 4.714489428978858, "grad_norm": 0.9754193425178528, "learning_rate": 0.0001089484102184456, "loss": 1.5562, "step": 19010 }, { "epoch": 4.716969433938868, "grad_norm": 0.9099609851837158, "learning_rate": 0.00010887080360695854, "loss": 1.5565, "step": 19020 }, { "epoch": 4.719449438898878, "grad_norm": 0.9398124814033508, "learning_rate": 0.00010879319161002594, "loss": 1.611, "step": 19030 }, { "epoch": 4.721929443858888, "grad_norm": 0.9173715710639954, "learning_rate": 0.00010871557427476583, "loss": 1.5626, "step": 19040 }, { "epoch": 4.724409448818898, "grad_norm": 0.9978727698326111, "learning_rate": 0.00010863795164829955, "loss": 1.6209, "step": 19050 }, { "epoch": 4.726889453778908, "grad_norm": 0.9299024939537048, "learning_rate": 0.0001085603237777516, "loss": 1.5473, "step": 19060 }, { "epoch": 4.729369458738917, "grad_norm": 0.938007116317749, "learning_rate": 0.00010848269071024975, "loss": 1.6074, "step": 19070 }, { "epoch": 4.731849463698928, "grad_norm": 0.9142996072769165, "learning_rate": 0.00010840505249292476, "loss": 1.5746, "step": 19080 }, { "epoch": 4.734329468658937, "grad_norm": 0.8934489488601685, "learning_rate": 0.00010832740917291073, "loss": 1.6187, "step": 19090 }, { "epoch": 4.736809473618948, "grad_norm": 0.9229673743247986, "learning_rate": 0.00010824976079734472, "loss": 1.602, "step": 19100 }, { "epoch": 4.739289478578957, "grad_norm": 0.90119469165802, "learning_rate": 0.00010817210741336684, "loss": 1.6417, "step": 19110 }, { "epoch": 4.741769483538967, "grad_norm": 0.8615995645523071, "learning_rate": 0.00010809444906812033, "loss": 1.6084, "step": 19120 }, { "epoch": 4.744249488498977, "grad_norm": 0.9701696634292603, "learning_rate": 0.00010801678580875143, "loss": 1.5592, "step": 19130 }, { "epoch": 4.746729493458987, "grad_norm": 0.977692723274231, "learning_rate": 0.00010793911768240929, "loss": 1.5653, "step": 19140 }, { "epoch": 4.749209498418997, "grad_norm": 0.8544299006462097, "learning_rate": 0.00010786144473624608, "loss": 1.5722, "step": 19150 }, { "epoch": 4.751689503379007, "grad_norm": 0.9128935933113098, "learning_rate": 0.00010778376701741689, "loss": 1.5968, "step": 19160 }, { "epoch": 4.754169508339016, "grad_norm": 0.9116058349609375, "learning_rate": 0.00010770608457307965, "loss": 1.6172, "step": 19170 }, { "epoch": 4.756649513299027, "grad_norm": 0.9581488966941833, "learning_rate": 0.00010762839745039526, "loss": 1.5649, "step": 19180 }, { "epoch": 4.759129518259036, "grad_norm": 1.0204472541809082, "learning_rate": 0.00010755070569652735, "loss": 1.532, "step": 19190 }, { "epoch": 4.761609523219047, "grad_norm": 0.9427098035812378, "learning_rate": 0.00010747300935864243, "loss": 1.6342, "step": 19200 }, { "epoch": 4.764089528179056, "grad_norm": 0.8668059706687927, "learning_rate": 0.00010739530848390981, "loss": 1.5539, "step": 19210 }, { "epoch": 4.766569533139066, "grad_norm": 0.9295413494110107, "learning_rate": 0.0001073176031195015, "loss": 1.5857, "step": 19220 }, { "epoch": 4.769049538099076, "grad_norm": 0.92892986536026, "learning_rate": 0.00010723989331259223, "loss": 1.6505, "step": 19230 }, { "epoch": 4.771529543059086, "grad_norm": 0.9731125235557556, "learning_rate": 0.00010716217911035951, "loss": 1.6123, "step": 19240 }, { "epoch": 4.774009548019096, "grad_norm": 0.9232926964759827, "learning_rate": 0.00010708446055998343, "loss": 1.6521, "step": 19250 }, { "epoch": 4.776489552979106, "grad_norm": 0.9706169366836548, "learning_rate": 0.00010700673770864673, "loss": 1.6323, "step": 19260 }, { "epoch": 4.778969557939116, "grad_norm": 0.9299634099006653, "learning_rate": 0.00010692901060353483, "loss": 1.5784, "step": 19270 }, { "epoch": 4.781449562899126, "grad_norm": 0.8783867955207825, "learning_rate": 0.00010685127929183567, "loss": 1.6084, "step": 19280 }, { "epoch": 4.783929567859135, "grad_norm": 0.8926278948783875, "learning_rate": 0.00010677354382073976, "loss": 1.5303, "step": 19290 }, { "epoch": 4.786409572819146, "grad_norm": 0.9633662700653076, "learning_rate": 0.00010669580423744015, "loss": 1.5834, "step": 19300 }, { "epoch": 4.788889577779155, "grad_norm": 0.8755930066108704, "learning_rate": 0.0001066180605891323, "loss": 1.5767, "step": 19310 }, { "epoch": 4.791369582739166, "grad_norm": 0.912855327129364, "learning_rate": 0.00010654031292301432, "loss": 1.5674, "step": 19320 }, { "epoch": 4.793849587699175, "grad_norm": 0.9317062497138977, "learning_rate": 0.00010646256128628657, "loss": 1.5992, "step": 19330 }, { "epoch": 4.796329592659186, "grad_norm": 0.9335405230522156, "learning_rate": 0.0001063848057261519, "loss": 1.576, "step": 19340 }, { "epoch": 4.798809597619195, "grad_norm": 0.9386693239212036, "learning_rate": 0.0001063070462898156, "loss": 1.6253, "step": 19350 }, { "epoch": 4.801289602579205, "grad_norm": 0.9020708203315735, "learning_rate": 0.00010622928302448523, "loss": 1.5704, "step": 19360 }, { "epoch": 4.803769607539215, "grad_norm": 0.9368181228637695, "learning_rate": 0.00010615151597737065, "loss": 1.6198, "step": 19370 }, { "epoch": 4.806249612499225, "grad_norm": 0.8870874643325806, "learning_rate": 0.00010607374519568412, "loss": 1.6187, "step": 19380 }, { "epoch": 4.808729617459235, "grad_norm": 0.8933711647987366, "learning_rate": 0.00010599597072664012, "loss": 1.5932, "step": 19390 }, { "epoch": 4.811209622419245, "grad_norm": 0.9829477667808533, "learning_rate": 0.00010591819261745528, "loss": 1.6349, "step": 19400 }, { "epoch": 4.813689627379254, "grad_norm": 0.9438134431838989, "learning_rate": 0.00010584041091534863, "loss": 1.5833, "step": 19410 }, { "epoch": 4.816169632339265, "grad_norm": 0.9365798234939575, "learning_rate": 0.00010576262566754121, "loss": 1.5672, "step": 19420 }, { "epoch": 4.818649637299274, "grad_norm": 0.8850331902503967, "learning_rate": 0.00010568483692125624, "loss": 1.5688, "step": 19430 }, { "epoch": 4.821129642259285, "grad_norm": 0.988837480545044, "learning_rate": 0.00010560704472371919, "loss": 1.5419, "step": 19440 }, { "epoch": 4.823609647219294, "grad_norm": 0.9462050795555115, "learning_rate": 0.00010552924912215747, "loss": 1.5893, "step": 19450 }, { "epoch": 4.826089652179304, "grad_norm": 0.9274313449859619, "learning_rate": 0.00010545145016380065, "loss": 1.5462, "step": 19460 }, { "epoch": 4.828569657139314, "grad_norm": 0.8957258462905884, "learning_rate": 0.00010537364789588029, "loss": 1.6319, "step": 19470 }, { "epoch": 4.831049662099324, "grad_norm": 0.9592501521110535, "learning_rate": 0.00010529584236562995, "loss": 1.5837, "step": 19480 }, { "epoch": 4.833529667059334, "grad_norm": 0.8686869144439697, "learning_rate": 0.00010521803362028529, "loss": 1.5705, "step": 19490 }, { "epoch": 4.836009672019344, "grad_norm": 0.8717570304870605, "learning_rate": 0.00010514022170708374, "loss": 1.606, "step": 19500 }, { "epoch": 4.8384896769793535, "grad_norm": 0.9135420322418213, "learning_rate": 0.00010506240667326477, "loss": 1.633, "step": 19510 }, { "epoch": 4.840969681939364, "grad_norm": 0.9622677564620972, "learning_rate": 0.00010498458856606972, "loss": 1.6093, "step": 19520 }, { "epoch": 4.8434496868993735, "grad_norm": 0.9665126204490662, "learning_rate": 0.00010490676743274181, "loss": 1.5636, "step": 19530 }, { "epoch": 4.845929691859384, "grad_norm": 0.971638023853302, "learning_rate": 0.00010482894332052607, "loss": 1.631, "step": 19540 }, { "epoch": 4.8484096968193935, "grad_norm": 0.956616997718811, "learning_rate": 0.00010475111627666934, "loss": 1.61, "step": 19550 }, { "epoch": 4.850889701779404, "grad_norm": 0.9185702800750732, "learning_rate": 0.00010467328634842024, "loss": 1.5968, "step": 19560 }, { "epoch": 4.8533697067394135, "grad_norm": 0.910837709903717, "learning_rate": 0.00010459545358302918, "loss": 1.5601, "step": 19570 }, { "epoch": 4.855849711699423, "grad_norm": 0.885400116443634, "learning_rate": 0.00010451761802774824, "loss": 1.6183, "step": 19580 }, { "epoch": 4.8583297166594335, "grad_norm": 0.893608808517456, "learning_rate": 0.00010443977972983126, "loss": 1.5548, "step": 19590 }, { "epoch": 4.860809721619443, "grad_norm": 0.8942586779594421, "learning_rate": 0.00010436193873653361, "loss": 1.6634, "step": 19600 }, { "epoch": 4.8632897265794535, "grad_norm": 0.9296571016311646, "learning_rate": 0.00010428409509511248, "loss": 1.5669, "step": 19610 }, { "epoch": 4.865769731539463, "grad_norm": 0.946972668170929, "learning_rate": 0.00010420624885282653, "loss": 1.5418, "step": 19620 }, { "epoch": 4.8682497364994735, "grad_norm": 0.8497663140296936, "learning_rate": 0.00010412840005693603, "loss": 1.5891, "step": 19630 }, { "epoch": 4.870729741459483, "grad_norm": 0.8625304102897644, "learning_rate": 0.00010405054875470286, "loss": 1.5861, "step": 19640 }, { "epoch": 4.873209746419493, "grad_norm": 0.9989964962005615, "learning_rate": 0.00010397269499339035, "loss": 1.6085, "step": 19650 }, { "epoch": 4.875689751379503, "grad_norm": 0.9507265090942383, "learning_rate": 0.00010389483882026334, "loss": 1.573, "step": 19660 }, { "epoch": 4.8781697563395126, "grad_norm": 0.9283966422080994, "learning_rate": 0.00010381698028258817, "loss": 1.5744, "step": 19670 }, { "epoch": 4.880649761299523, "grad_norm": 0.9498452544212341, "learning_rate": 0.0001037391194276326, "loss": 1.63, "step": 19680 }, { "epoch": 4.8831297662595325, "grad_norm": 0.8902698159217834, "learning_rate": 0.0001036612563026657, "loss": 1.604, "step": 19690 }, { "epoch": 4.885609771219542, "grad_norm": 0.9303712248802185, "learning_rate": 0.0001035833909549581, "loss": 1.569, "step": 19700 }, { "epoch": 4.8880897761795525, "grad_norm": 0.9185006618499756, "learning_rate": 0.00010350552343178163, "loss": 1.5671, "step": 19710 }, { "epoch": 4.890569781139562, "grad_norm": 0.9281051754951477, "learning_rate": 0.00010342765378040953, "loss": 1.6518, "step": 19720 }, { "epoch": 4.8930497860995725, "grad_norm": 0.894377589225769, "learning_rate": 0.00010334978204811629, "loss": 1.5639, "step": 19730 }, { "epoch": 4.895529791059582, "grad_norm": 0.9298607110977173, "learning_rate": 0.00010327190828217763, "loss": 1.5616, "step": 19740 }, { "epoch": 4.898009796019592, "grad_norm": 0.9328896999359131, "learning_rate": 0.00010319403252987059, "loss": 1.5928, "step": 19750 }, { "epoch": 4.900489800979602, "grad_norm": 0.8884739875793457, "learning_rate": 0.00010311615483847332, "loss": 1.5494, "step": 19760 }, { "epoch": 4.902969805939612, "grad_norm": 0.9351387619972229, "learning_rate": 0.00010303827525526523, "loss": 1.5713, "step": 19770 }, { "epoch": 4.905449810899622, "grad_norm": 0.9865672588348389, "learning_rate": 0.00010296039382752687, "loss": 1.6145, "step": 19780 }, { "epoch": 4.907929815859632, "grad_norm": 0.9271677732467651, "learning_rate": 0.00010288251060253986, "loss": 1.5772, "step": 19790 }, { "epoch": 4.910409820819641, "grad_norm": 0.911442220211029, "learning_rate": 0.0001028046256275869, "loss": 1.5749, "step": 19800 }, { "epoch": 4.912889825779652, "grad_norm": 0.9066318869590759, "learning_rate": 0.00010272673894995187, "loss": 1.5969, "step": 19810 }, { "epoch": 4.915369830739661, "grad_norm": 1.0287082195281982, "learning_rate": 0.00010264885061691954, "loss": 1.4949, "step": 19820 }, { "epoch": 4.917849835699672, "grad_norm": 0.9573404788970947, "learning_rate": 0.00010257096067577572, "loss": 1.6417, "step": 19830 }, { "epoch": 4.920329840659681, "grad_norm": 0.9665036201477051, "learning_rate": 0.0001024930691738073, "loss": 1.5804, "step": 19840 }, { "epoch": 4.922809845619692, "grad_norm": 0.9561458826065063, "learning_rate": 0.000102415176158302, "loss": 1.5897, "step": 19850 }, { "epoch": 4.925289850579701, "grad_norm": 0.9099255800247192, "learning_rate": 0.0001023372816765485, "loss": 1.549, "step": 19860 }, { "epoch": 4.927769855539711, "grad_norm": 0.9153082966804504, "learning_rate": 0.0001022593857758364, "loss": 1.5885, "step": 19870 }, { "epoch": 4.930249860499721, "grad_norm": 0.8561480045318604, "learning_rate": 0.00010218148850345613, "loss": 1.5734, "step": 19880 }, { "epoch": 4.932729865459731, "grad_norm": 0.9554060101509094, "learning_rate": 0.00010210358990669888, "loss": 1.6141, "step": 19890 }, { "epoch": 4.935209870419741, "grad_norm": 0.941474199295044, "learning_rate": 0.00010202569003285682, "loss": 1.5803, "step": 19900 }, { "epoch": 4.937689875379751, "grad_norm": 0.8871884346008301, "learning_rate": 0.00010194778892922274, "loss": 1.5363, "step": 19910 }, { "epoch": 4.940169880339761, "grad_norm": 0.9098392128944397, "learning_rate": 0.00010186988664309023, "loss": 1.6852, "step": 19920 }, { "epoch": 4.942649885299771, "grad_norm": 0.9218733310699463, "learning_rate": 0.00010179198322175361, "loss": 1.5865, "step": 19930 }, { "epoch": 4.94512989025978, "grad_norm": 0.9652133584022522, "learning_rate": 0.00010171407871250789, "loss": 1.6027, "step": 19940 }, { "epoch": 4.947609895219791, "grad_norm": 0.8366691470146179, "learning_rate": 0.00010163617316264869, "loss": 1.5675, "step": 19950 }, { "epoch": 4.9500899001798, "grad_norm": 0.9406192898750305, "learning_rate": 0.00010155826661947231, "loss": 1.6375, "step": 19960 }, { "epoch": 4.952569905139811, "grad_norm": 0.9226821064949036, "learning_rate": 0.00010148035913027569, "loss": 1.6013, "step": 19970 }, { "epoch": 4.95504991009982, "grad_norm": 0.9119101762771606, "learning_rate": 0.00010140245074235624, "loss": 1.6033, "step": 19980 }, { "epoch": 4.95752991505983, "grad_norm": 0.9745951890945435, "learning_rate": 0.00010132454150301201, "loss": 1.566, "step": 19990 }, { "epoch": 4.96000992001984, "grad_norm": 0.8798539638519287, "learning_rate": 0.00010124663145954152, "loss": 1.5833, "step": 20000 }, { "epoch": 4.96248992497985, "grad_norm": 0.9379274249076843, "learning_rate": 0.00010116872065924376, "loss": 1.5883, "step": 20010 }, { "epoch": 4.96496992993986, "grad_norm": 0.9305238127708435, "learning_rate": 0.00010109080914941824, "loss": 1.6406, "step": 20020 }, { "epoch": 4.96744993489987, "grad_norm": 0.9678001999855042, "learning_rate": 0.00010101289697736486, "loss": 1.5633, "step": 20030 }, { "epoch": 4.969929939859879, "grad_norm": 1.002220630645752, "learning_rate": 0.00010093498419038394, "loss": 1.5972, "step": 20040 }, { "epoch": 4.97240994481989, "grad_norm": 0.9045056700706482, "learning_rate": 0.00010085707083577613, "loss": 1.5854, "step": 20050 }, { "epoch": 4.974889949779899, "grad_norm": 0.8465107679367065, "learning_rate": 0.00010077915696084248, "loss": 1.6154, "step": 20060 }, { "epoch": 4.97736995473991, "grad_norm": 0.8597650527954102, "learning_rate": 0.00010070124261288436, "loss": 1.5694, "step": 20070 }, { "epoch": 4.979849959699919, "grad_norm": 0.9025321006774902, "learning_rate": 0.00010062332783920336, "loss": 1.6415, "step": 20080 }, { "epoch": 4.982329964659929, "grad_norm": 0.8586678504943848, "learning_rate": 0.00010054541268710138, "loss": 1.5581, "step": 20090 }, { "epoch": 4.984809969619939, "grad_norm": 0.866348147392273, "learning_rate": 0.00010046749720388055, "loss": 1.6083, "step": 20100 }, { "epoch": 4.987289974579949, "grad_norm": 0.9521636962890625, "learning_rate": 0.0001003895814368432, "loss": 1.5893, "step": 20110 }, { "epoch": 4.989769979539959, "grad_norm": 0.9525920748710632, "learning_rate": 0.00010031166543329178, "loss": 1.5775, "step": 20120 }, { "epoch": 4.992249984499969, "grad_norm": 0.9372658133506775, "learning_rate": 0.00010023374924052897, "loss": 1.6016, "step": 20130 }, { "epoch": 4.994729989459979, "grad_norm": 0.9438722729682922, "learning_rate": 0.0001001558329058575, "loss": 1.5807, "step": 20140 }, { "epoch": 4.997209994419989, "grad_norm": 0.9148738980293274, "learning_rate": 0.00010007791647658022, "loss": 1.66, "step": 20150 }, { "epoch": 4.999689999379999, "grad_norm": 0.9490894675254822, "learning_rate": 0.0001, "loss": 1.5962, "step": 20160 }, { "epoch": 5.002170004340009, "grad_norm": 0.9742351770401001, "learning_rate": 9.992208352341978e-05, "loss": 1.5214, "step": 20170 }, { "epoch": 5.004650009300018, "grad_norm": 0.9827502369880676, "learning_rate": 9.98441670941425e-05, "loss": 1.4653, "step": 20180 }, { "epoch": 5.007130014260029, "grad_norm": 0.9274985790252686, "learning_rate": 9.976625075947103e-05, "loss": 1.5592, "step": 20190 }, { "epoch": 5.009610019220038, "grad_norm": 0.9367074370384216, "learning_rate": 9.968833456670824e-05, "loss": 1.5098, "step": 20200 }, { "epoch": 5.012090024180049, "grad_norm": 0.9240421056747437, "learning_rate": 9.96104185631568e-05, "loss": 1.5163, "step": 20210 }, { "epoch": 5.014570029140058, "grad_norm": 0.9595016241073608, "learning_rate": 9.953250279611946e-05, "loss": 1.4962, "step": 20220 }, { "epoch": 5.017050034100068, "grad_norm": 0.9109339118003845, "learning_rate": 9.945458731289862e-05, "loss": 1.526, "step": 20230 }, { "epoch": 5.019530039060078, "grad_norm": 0.8929713368415833, "learning_rate": 9.937667216079665e-05, "loss": 1.5212, "step": 20240 }, { "epoch": 5.022010044020088, "grad_norm": 0.9416952133178711, "learning_rate": 9.929875738711564e-05, "loss": 1.5036, "step": 20250 }, { "epoch": 5.024490048980098, "grad_norm": 0.9692783951759338, "learning_rate": 9.922084303915753e-05, "loss": 1.4676, "step": 20260 }, { "epoch": 5.026970053940108, "grad_norm": 1.0281188488006592, "learning_rate": 9.914292916422387e-05, "loss": 1.5252, "step": 20270 }, { "epoch": 5.029450058900117, "grad_norm": 0.9119085669517517, "learning_rate": 9.90650158096161e-05, "loss": 1.5494, "step": 20280 }, { "epoch": 5.031930063860128, "grad_norm": 0.8939598798751831, "learning_rate": 9.898710302263515e-05, "loss": 1.4699, "step": 20290 }, { "epoch": 5.034410068820137, "grad_norm": 0.8867928981781006, "learning_rate": 9.890919085058178e-05, "loss": 1.4507, "step": 20300 }, { "epoch": 5.036890073780148, "grad_norm": 0.9023534655570984, "learning_rate": 9.883127934075624e-05, "loss": 1.487, "step": 20310 }, { "epoch": 5.039370078740157, "grad_norm": 0.8967320322990417, "learning_rate": 9.875336854045851e-05, "loss": 1.5008, "step": 20320 }, { "epoch": 5.041850083700167, "grad_norm": 0.9554635882377625, "learning_rate": 9.867545849698799e-05, "loss": 1.5245, "step": 20330 }, { "epoch": 5.044330088660177, "grad_norm": 0.9490965604782104, "learning_rate": 9.859754925764378e-05, "loss": 1.4932, "step": 20340 }, { "epoch": 5.046810093620187, "grad_norm": 0.9715158939361572, "learning_rate": 9.851964086972432e-05, "loss": 1.4802, "step": 20350 }, { "epoch": 5.049290098580197, "grad_norm": 1.0020184516906738, "learning_rate": 9.84417333805277e-05, "loss": 1.5061, "step": 20360 }, { "epoch": 5.051770103540207, "grad_norm": 0.9794074892997742, "learning_rate": 9.836382683735132e-05, "loss": 1.4768, "step": 20370 }, { "epoch": 5.054250108500217, "grad_norm": 0.9327630400657654, "learning_rate": 9.828592128749215e-05, "loss": 1.5129, "step": 20380 }, { "epoch": 5.056730113460227, "grad_norm": 0.9747712016105652, "learning_rate": 9.820801677824639e-05, "loss": 1.5984, "step": 20390 }, { "epoch": 5.0592101184202365, "grad_norm": 0.9542945623397827, "learning_rate": 9.813011335690981e-05, "loss": 1.521, "step": 20400 }, { "epoch": 5.061690123380247, "grad_norm": 0.933824896812439, "learning_rate": 9.805221107077726e-05, "loss": 1.4929, "step": 20410 }, { "epoch": 5.0641701283402565, "grad_norm": 0.9382598996162415, "learning_rate": 9.79743099671432e-05, "loss": 1.54, "step": 20420 }, { "epoch": 5.066650133300267, "grad_norm": 0.9482308030128479, "learning_rate": 9.789641009330111e-05, "loss": 1.53, "step": 20430 }, { "epoch": 5.0691301382602765, "grad_norm": 0.9552004337310791, "learning_rate": 9.78185114965439e-05, "loss": 1.4699, "step": 20440 }, { "epoch": 5.071610143220286, "grad_norm": 0.9345937967300415, "learning_rate": 9.774061422416359e-05, "loss": 1.5226, "step": 20450 }, { "epoch": 5.0740901481802965, "grad_norm": 0.878754734992981, "learning_rate": 9.766271832345152e-05, "loss": 1.546, "step": 20460 }, { "epoch": 5.076570153140306, "grad_norm": 0.9064426422119141, "learning_rate": 9.7584823841698e-05, "loss": 1.4589, "step": 20470 }, { "epoch": 5.0790501581003165, "grad_norm": 0.9096664786338806, "learning_rate": 9.750693082619273e-05, "loss": 1.4949, "step": 20480 }, { "epoch": 5.081530163060326, "grad_norm": 0.9073386192321777, "learning_rate": 9.74290393242243e-05, "loss": 1.4912, "step": 20490 }, { "epoch": 5.0840101680203365, "grad_norm": 0.982649564743042, "learning_rate": 9.735114938308051e-05, "loss": 1.4881, "step": 20500 }, { "epoch": 5.086490172980346, "grad_norm": 0.9858987927436829, "learning_rate": 9.727326105004817e-05, "loss": 1.4769, "step": 20510 }, { "epoch": 5.0889701779403556, "grad_norm": 1.029187560081482, "learning_rate": 9.719537437241312e-05, "loss": 1.4935, "step": 20520 }, { "epoch": 5.091450182900366, "grad_norm": 0.9769375920295715, "learning_rate": 9.711748939746016e-05, "loss": 1.4886, "step": 20530 }, { "epoch": 5.0939301878603755, "grad_norm": 1.0216981172561646, "learning_rate": 9.703960617247317e-05, "loss": 1.537, "step": 20540 }, { "epoch": 5.096410192820386, "grad_norm": 1.0032145977020264, "learning_rate": 9.696172474473479e-05, "loss": 1.5073, "step": 20550 }, { "epoch": 5.0988901977803955, "grad_norm": 0.9975841641426086, "learning_rate": 9.688384516152672e-05, "loss": 1.4811, "step": 20560 }, { "epoch": 5.101370202740405, "grad_norm": 0.9111374020576477, "learning_rate": 9.680596747012945e-05, "loss": 1.5231, "step": 20570 }, { "epoch": 5.1038502077004155, "grad_norm": 0.9400519728660583, "learning_rate": 9.67280917178224e-05, "loss": 1.5583, "step": 20580 }, { "epoch": 5.106330212660425, "grad_norm": 0.9704780578613281, "learning_rate": 9.665021795188373e-05, "loss": 1.4422, "step": 20590 }, { "epoch": 5.1088102176204355, "grad_norm": 0.9567640423774719, "learning_rate": 9.657234621959051e-05, "loss": 1.5286, "step": 20600 }, { "epoch": 5.111290222580445, "grad_norm": 0.9019995927810669, "learning_rate": 9.649447656821839e-05, "loss": 1.5245, "step": 20610 }, { "epoch": 5.1137702275404555, "grad_norm": 0.9860844016075134, "learning_rate": 9.641660904504195e-05, "loss": 1.5136, "step": 20620 }, { "epoch": 5.116250232500465, "grad_norm": 0.9451846480369568, "learning_rate": 9.633874369733432e-05, "loss": 1.5181, "step": 20630 }, { "epoch": 5.118730237460475, "grad_norm": 0.9941641688346863, "learning_rate": 9.626088057236745e-05, "loss": 1.4943, "step": 20640 }, { "epoch": 5.121210242420485, "grad_norm": 0.9477522969245911, "learning_rate": 9.618301971741184e-05, "loss": 1.5175, "step": 20650 }, { "epoch": 5.123690247380495, "grad_norm": 0.9193263053894043, "learning_rate": 9.61051611797367e-05, "loss": 1.5415, "step": 20660 }, { "epoch": 5.126170252340505, "grad_norm": 0.9805010557174683, "learning_rate": 9.602730500660967e-05, "loss": 1.5124, "step": 20670 }, { "epoch": 5.128650257300515, "grad_norm": 1.0175899267196655, "learning_rate": 9.594945124529719e-05, "loss": 1.5062, "step": 20680 }, { "epoch": 5.131130262260524, "grad_norm": 0.9650846719741821, "learning_rate": 9.5871599943064e-05, "loss": 1.5027, "step": 20690 }, { "epoch": 5.133610267220535, "grad_norm": 0.977501392364502, "learning_rate": 9.579375114717351e-05, "loss": 1.5471, "step": 20700 }, { "epoch": 5.136090272180544, "grad_norm": 0.9525604248046875, "learning_rate": 9.571590490488755e-05, "loss": 1.5443, "step": 20710 }, { "epoch": 5.138570277140555, "grad_norm": 0.9992299675941467, "learning_rate": 9.563806126346642e-05, "loss": 1.4349, "step": 20720 }, { "epoch": 5.141050282100564, "grad_norm": 0.9223850965499878, "learning_rate": 9.556022027016878e-05, "loss": 1.4943, "step": 20730 }, { "epoch": 5.143530287060574, "grad_norm": 0.9668163657188416, "learning_rate": 9.54823819722518e-05, "loss": 1.5274, "step": 20740 }, { "epoch": 5.146010292020584, "grad_norm": 0.9030009508132935, "learning_rate": 9.540454641697084e-05, "loss": 1.5134, "step": 20750 }, { "epoch": 5.148490296980594, "grad_norm": 0.9662803411483765, "learning_rate": 9.53267136515798e-05, "loss": 1.4816, "step": 20760 }, { "epoch": 5.150970301940604, "grad_norm": 1.0170722007751465, "learning_rate": 9.52488837233307e-05, "loss": 1.4449, "step": 20770 }, { "epoch": 5.153450306900614, "grad_norm": 0.9316909313201904, "learning_rate": 9.517105667947397e-05, "loss": 1.5169, "step": 20780 }, { "epoch": 5.155930311860624, "grad_norm": 0.9504343867301941, "learning_rate": 9.509323256725821e-05, "loss": 1.4605, "step": 20790 }, { "epoch": 5.158410316820634, "grad_norm": 0.9367119669914246, "learning_rate": 9.501541143393028e-05, "loss": 1.5345, "step": 20800 }, { "epoch": 5.160890321780643, "grad_norm": 1.2000889778137207, "learning_rate": 9.493759332673527e-05, "loss": 1.4495, "step": 20810 }, { "epoch": 5.163370326740654, "grad_norm": 0.9850399494171143, "learning_rate": 9.485977829291627e-05, "loss": 1.5541, "step": 20820 }, { "epoch": 5.165850331700663, "grad_norm": 0.9986492395401001, "learning_rate": 9.478196637971475e-05, "loss": 1.5413, "step": 20830 }, { "epoch": 5.168330336660674, "grad_norm": 0.9224624037742615, "learning_rate": 9.470415763437004e-05, "loss": 1.5242, "step": 20840 }, { "epoch": 5.170810341620683, "grad_norm": 0.9644695520401001, "learning_rate": 9.462635210411974e-05, "loss": 1.5029, "step": 20850 }, { "epoch": 5.173290346580693, "grad_norm": 0.9682588577270508, "learning_rate": 9.454854983619936e-05, "loss": 1.5529, "step": 20860 }, { "epoch": 5.175770351540703, "grad_norm": 0.9783344864845276, "learning_rate": 9.447075087784254e-05, "loss": 1.5205, "step": 20870 }, { "epoch": 5.178250356500713, "grad_norm": 0.9680948853492737, "learning_rate": 9.439295527628081e-05, "loss": 1.5127, "step": 20880 }, { "epoch": 5.180730361460723, "grad_norm": 0.9525998830795288, "learning_rate": 9.431516307874377e-05, "loss": 1.4869, "step": 20890 }, { "epoch": 5.183210366420733, "grad_norm": 0.9906119704246521, "learning_rate": 9.423737433245883e-05, "loss": 1.4938, "step": 20900 }, { "epoch": 5.185690371380743, "grad_norm": 0.9663385152816772, "learning_rate": 9.41595890846514e-05, "loss": 1.4462, "step": 20910 }, { "epoch": 5.188170376340753, "grad_norm": 0.9746707081794739, "learning_rate": 9.408180738254471e-05, "loss": 1.4846, "step": 20920 }, { "epoch": 5.190650381300762, "grad_norm": 0.9378529191017151, "learning_rate": 9.400402927335992e-05, "loss": 1.5291, "step": 20930 }, { "epoch": 5.193130386260773, "grad_norm": 0.9330746531486511, "learning_rate": 9.392625480431587e-05, "loss": 1.5204, "step": 20940 }, { "epoch": 5.195610391220782, "grad_norm": 1.0342406034469604, "learning_rate": 9.384848402262938e-05, "loss": 1.5185, "step": 20950 }, { "epoch": 5.198090396180793, "grad_norm": 0.9911377429962158, "learning_rate": 9.37707169755148e-05, "loss": 1.5061, "step": 20960 }, { "epoch": 5.200570401140802, "grad_norm": 0.9533239603042603, "learning_rate": 9.369295371018442e-05, "loss": 1.5666, "step": 20970 }, { "epoch": 5.203050406100812, "grad_norm": 0.9491400122642517, "learning_rate": 9.361519427384808e-05, "loss": 1.5549, "step": 20980 }, { "epoch": 5.205530411060822, "grad_norm": 0.938381016254425, "learning_rate": 9.353743871371345e-05, "loss": 1.4659, "step": 20990 }, { "epoch": 5.208010416020832, "grad_norm": 0.9344185590744019, "learning_rate": 9.345968707698569e-05, "loss": 1.5561, "step": 21000 }, { "epoch": 5.210490420980842, "grad_norm": 0.9927399754524231, "learning_rate": 9.338193941086772e-05, "loss": 1.5511, "step": 21010 }, { "epoch": 5.212970425940852, "grad_norm": 0.9011238217353821, "learning_rate": 9.330419576255986e-05, "loss": 1.4998, "step": 21020 }, { "epoch": 5.215450430900862, "grad_norm": 0.9369461536407471, "learning_rate": 9.322645617926026e-05, "loss": 1.5338, "step": 21030 }, { "epoch": 5.217930435860872, "grad_norm": 0.9484929442405701, "learning_rate": 9.314872070816434e-05, "loss": 1.4942, "step": 21040 }, { "epoch": 5.220410440820881, "grad_norm": 1.034021258354187, "learning_rate": 9.307098939646518e-05, "loss": 1.563, "step": 21050 }, { "epoch": 5.222890445780892, "grad_norm": 1.0780504941940308, "learning_rate": 9.299326229135326e-05, "loss": 1.4832, "step": 21060 }, { "epoch": 5.225370450740901, "grad_norm": 0.9114693403244019, "learning_rate": 9.29155394400166e-05, "loss": 1.5379, "step": 21070 }, { "epoch": 5.227850455700912, "grad_norm": 0.9753614068031311, "learning_rate": 9.283782088964049e-05, "loss": 1.5208, "step": 21080 }, { "epoch": 5.230330460660921, "grad_norm": 0.9457783102989197, "learning_rate": 9.27601066874078e-05, "loss": 1.5519, "step": 21090 }, { "epoch": 5.232810465620931, "grad_norm": 0.9436642527580261, "learning_rate": 9.268239688049854e-05, "loss": 1.5891, "step": 21100 }, { "epoch": 5.235290470580941, "grad_norm": 0.9453377723693848, "learning_rate": 9.260469151609021e-05, "loss": 1.4666, "step": 21110 }, { "epoch": 5.237770475540951, "grad_norm": 0.9990126490592957, "learning_rate": 9.252699064135758e-05, "loss": 1.547, "step": 21120 }, { "epoch": 5.240250480500961, "grad_norm": 0.9898304343223572, "learning_rate": 9.244929430347269e-05, "loss": 1.5728, "step": 21130 }, { "epoch": 5.242730485460971, "grad_norm": 0.8843860626220703, "learning_rate": 9.237160254960476e-05, "loss": 1.5918, "step": 21140 }, { "epoch": 5.24521049042098, "grad_norm": 1.0320794582366943, "learning_rate": 9.229391542692039e-05, "loss": 1.5393, "step": 21150 }, { "epoch": 5.247690495380991, "grad_norm": 1.0260951519012451, "learning_rate": 9.221623298258315e-05, "loss": 1.5119, "step": 21160 }, { "epoch": 5.250170500341, "grad_norm": 1.0024783611297607, "learning_rate": 9.213855526375395e-05, "loss": 1.5714, "step": 21170 }, { "epoch": 5.252650505301011, "grad_norm": 0.9587418437004089, "learning_rate": 9.206088231759073e-05, "loss": 1.5112, "step": 21180 }, { "epoch": 5.25513051026102, "grad_norm": 0.9575029015541077, "learning_rate": 9.19832141912486e-05, "loss": 1.5, "step": 21190 }, { "epoch": 5.257610515221031, "grad_norm": 0.9021822810173035, "learning_rate": 9.190555093187967e-05, "loss": 1.5117, "step": 21200 }, { "epoch": 5.26009052018104, "grad_norm": 1.0176606178283691, "learning_rate": 9.182789258663321e-05, "loss": 1.5093, "step": 21210 }, { "epoch": 5.26257052514105, "grad_norm": 0.8936800360679626, "learning_rate": 9.17502392026553e-05, "loss": 1.4963, "step": 21220 }, { "epoch": 5.26505053010106, "grad_norm": 0.9021929502487183, "learning_rate": 9.16725908270893e-05, "loss": 1.4675, "step": 21230 }, { "epoch": 5.26753053506107, "grad_norm": 0.9234278798103333, "learning_rate": 9.159494750707526e-05, "loss": 1.5487, "step": 21240 }, { "epoch": 5.27001054002108, "grad_norm": 0.9856491088867188, "learning_rate": 9.15173092897503e-05, "loss": 1.4821, "step": 21250 }, { "epoch": 5.27249054498109, "grad_norm": 0.9914385080337524, "learning_rate": 9.143967622224842e-05, "loss": 1.5057, "step": 21260 }, { "epoch": 5.2749705499410995, "grad_norm": 0.9577724933624268, "learning_rate": 9.13620483517005e-05, "loss": 1.5988, "step": 21270 }, { "epoch": 5.27745055490111, "grad_norm": 0.9790413975715637, "learning_rate": 9.128442572523417e-05, "loss": 1.5017, "step": 21280 }, { "epoch": 5.2799305598611195, "grad_norm": 0.9989108443260193, "learning_rate": 9.120680838997411e-05, "loss": 1.5679, "step": 21290 }, { "epoch": 5.28241056482113, "grad_norm": 0.9500874280929565, "learning_rate": 9.112919639304148e-05, "loss": 1.5154, "step": 21300 }, { "epoch": 5.2848905697811395, "grad_norm": 1.0164166688919067, "learning_rate": 9.105158978155445e-05, "loss": 1.53, "step": 21310 }, { "epoch": 5.28737057474115, "grad_norm": 0.9272359609603882, "learning_rate": 9.097398860262776e-05, "loss": 1.4727, "step": 21320 }, { "epoch": 5.2898505797011595, "grad_norm": 1.0567892789840698, "learning_rate": 9.0896392903373e-05, "loss": 1.5458, "step": 21330 }, { "epoch": 5.292330584661169, "grad_norm": 0.9932803511619568, "learning_rate": 9.081880273089827e-05, "loss": 1.5074, "step": 21340 }, { "epoch": 5.2948105896211795, "grad_norm": 0.9956728219985962, "learning_rate": 9.074121813230847e-05, "loss": 1.4898, "step": 21350 }, { "epoch": 5.297290594581189, "grad_norm": 1.0154134035110474, "learning_rate": 9.066363915470495e-05, "loss": 1.5296, "step": 21360 }, { "epoch": 5.2997705995411994, "grad_norm": 1.0684008598327637, "learning_rate": 9.05860658451858e-05, "loss": 1.4418, "step": 21370 }, { "epoch": 5.302250604501209, "grad_norm": 0.9834532141685486, "learning_rate": 9.050849825084559e-05, "loss": 1.4744, "step": 21380 }, { "epoch": 5.3047306094612185, "grad_norm": 0.9449496269226074, "learning_rate": 9.043093641877547e-05, "loss": 1.5429, "step": 21390 }, { "epoch": 5.307210614421229, "grad_norm": 0.8877017498016357, "learning_rate": 9.0353380396063e-05, "loss": 1.4816, "step": 21400 }, { "epoch": 5.3096906193812385, "grad_norm": 0.9575859308242798, "learning_rate": 9.027583022979234e-05, "loss": 1.5607, "step": 21410 }, { "epoch": 5.312170624341249, "grad_norm": 0.954052746295929, "learning_rate": 9.019828596704394e-05, "loss": 1.5308, "step": 21420 }, { "epoch": 5.3146506293012585, "grad_norm": 0.9780444502830505, "learning_rate": 9.012074765489482e-05, "loss": 1.5246, "step": 21430 }, { "epoch": 5.317130634261268, "grad_norm": 0.9581279754638672, "learning_rate": 9.004321534041835e-05, "loss": 1.5133, "step": 21440 }, { "epoch": 5.3196106392212785, "grad_norm": 0.9770628213882446, "learning_rate": 8.996568907068415e-05, "loss": 1.5733, "step": 21450 }, { "epoch": 5.322090644181288, "grad_norm": 0.9912844896316528, "learning_rate": 8.988816889275829e-05, "loss": 1.5536, "step": 21460 }, { "epoch": 5.3245706491412985, "grad_norm": 1.015771746635437, "learning_rate": 8.981065485370308e-05, "loss": 1.5477, "step": 21470 }, { "epoch": 5.327050654101308, "grad_norm": 0.9286255240440369, "learning_rate": 8.973314700057717e-05, "loss": 1.534, "step": 21480 }, { "epoch": 5.3295306590613185, "grad_norm": 0.9836567640304565, "learning_rate": 8.965564538043535e-05, "loss": 1.4998, "step": 21490 }, { "epoch": 5.332010664021328, "grad_norm": 0.9883909821510315, "learning_rate": 8.957815004032876e-05, "loss": 1.5322, "step": 21500 }, { "epoch": 5.334490668981338, "grad_norm": 0.9234176278114319, "learning_rate": 8.950066102730456e-05, "loss": 1.5265, "step": 21510 }, { "epoch": 5.336970673941348, "grad_norm": 1.0736953020095825, "learning_rate": 8.942317838840623e-05, "loss": 1.5229, "step": 21520 }, { "epoch": 5.339450678901358, "grad_norm": 1.023149847984314, "learning_rate": 8.934570217067327e-05, "loss": 1.5687, "step": 21530 }, { "epoch": 5.341930683861368, "grad_norm": 1.065730094909668, "learning_rate": 8.926823242114136e-05, "loss": 1.493, "step": 21540 }, { "epoch": 5.344410688821378, "grad_norm": 0.9680074453353882, "learning_rate": 8.91907691868422e-05, "loss": 1.5618, "step": 21550 }, { "epoch": 5.346890693781387, "grad_norm": 0.9289672374725342, "learning_rate": 8.911331251480357e-05, "loss": 1.5213, "step": 21560 }, { "epoch": 5.349370698741398, "grad_norm": 0.9402804374694824, "learning_rate": 8.903586245204917e-05, "loss": 1.5191, "step": 21570 }, { "epoch": 5.351850703701407, "grad_norm": 0.9461135268211365, "learning_rate": 8.895841904559888e-05, "loss": 1.4828, "step": 21580 }, { "epoch": 5.354330708661418, "grad_norm": 0.9638580679893494, "learning_rate": 8.888098234246832e-05, "loss": 1.4709, "step": 21590 }, { "epoch": 5.356810713621427, "grad_norm": 0.986600935459137, "learning_rate": 8.880355238966923e-05, "loss": 1.5509, "step": 21600 }, { "epoch": 5.359290718581438, "grad_norm": 0.9543759822845459, "learning_rate": 8.872612923420909e-05, "loss": 1.5371, "step": 21610 }, { "epoch": 5.361770723541447, "grad_norm": 1.040656328201294, "learning_rate": 8.86487129230914e-05, "loss": 1.5651, "step": 21620 }, { "epoch": 5.364250728501457, "grad_norm": 0.995304524898529, "learning_rate": 8.857130350331535e-05, "loss": 1.5591, "step": 21630 }, { "epoch": 5.366730733461467, "grad_norm": 0.9509426355361938, "learning_rate": 8.849390102187614e-05, "loss": 1.4872, "step": 21640 }, { "epoch": 5.369210738421477, "grad_norm": 0.9640476703643799, "learning_rate": 8.841650552576453e-05, "loss": 1.5134, "step": 21650 }, { "epoch": 5.371690743381487, "grad_norm": 0.9115557074546814, "learning_rate": 8.833911706196725e-05, "loss": 1.4922, "step": 21660 }, { "epoch": 5.374170748341497, "grad_norm": 0.9620742797851562, "learning_rate": 8.826173567746662e-05, "loss": 1.5666, "step": 21670 }, { "epoch": 5.376650753301506, "grad_norm": 0.9360364079475403, "learning_rate": 8.818436141924072e-05, "loss": 1.5095, "step": 21680 }, { "epoch": 5.379130758261517, "grad_norm": 0.9433877468109131, "learning_rate": 8.81069943342633e-05, "loss": 1.4912, "step": 21690 }, { "epoch": 5.381610763221526, "grad_norm": 0.9260382652282715, "learning_rate": 8.802963446950377e-05, "loss": 1.4416, "step": 21700 }, { "epoch": 5.384090768181537, "grad_norm": 1.1015149354934692, "learning_rate": 8.795228187192707e-05, "loss": 1.5746, "step": 21710 }, { "epoch": 5.386570773141546, "grad_norm": 0.9676945209503174, "learning_rate": 8.787493658849386e-05, "loss": 1.6083, "step": 21720 }, { "epoch": 5.389050778101556, "grad_norm": 0.9367676973342896, "learning_rate": 8.779759866616025e-05, "loss": 1.5372, "step": 21730 }, { "epoch": 5.391530783061566, "grad_norm": 0.9828059673309326, "learning_rate": 8.772026815187796e-05, "loss": 1.6202, "step": 21740 }, { "epoch": 5.394010788021576, "grad_norm": 1.0444278717041016, "learning_rate": 8.764294509259414e-05, "loss": 1.6003, "step": 21750 }, { "epoch": 5.396490792981586, "grad_norm": 0.9950647354125977, "learning_rate": 8.756562953525152e-05, "loss": 1.5197, "step": 21760 }, { "epoch": 5.398970797941596, "grad_norm": 0.9401552677154541, "learning_rate": 8.74883215267881e-05, "loss": 1.5367, "step": 21770 }, { "epoch": 5.401450802901606, "grad_norm": 0.9246699213981628, "learning_rate": 8.741102111413748e-05, "loss": 1.4873, "step": 21780 }, { "epoch": 5.403930807861616, "grad_norm": 1.1276699304580688, "learning_rate": 8.733372834422854e-05, "loss": 1.5845, "step": 21790 }, { "epoch": 5.406410812821625, "grad_norm": 0.9638933539390564, "learning_rate": 8.725644326398558e-05, "loss": 1.5181, "step": 21800 }, { "epoch": 5.408890817781636, "grad_norm": 0.9737806916236877, "learning_rate": 8.717916592032818e-05, "loss": 1.5819, "step": 21810 }, { "epoch": 5.411370822741645, "grad_norm": 0.9329572916030884, "learning_rate": 8.71018963601713e-05, "loss": 1.5322, "step": 21820 }, { "epoch": 5.413850827701656, "grad_norm": 1.0153096914291382, "learning_rate": 8.7024634630425e-05, "loss": 1.5332, "step": 21830 }, { "epoch": 5.416330832661665, "grad_norm": 1.0339939594268799, "learning_rate": 8.694738077799488e-05, "loss": 1.5554, "step": 21840 }, { "epoch": 5.418810837621676, "grad_norm": 0.9483512043952942, "learning_rate": 8.687013484978142e-05, "loss": 1.4977, "step": 21850 }, { "epoch": 5.421290842581685, "grad_norm": 1.008466362953186, "learning_rate": 8.679289689268056e-05, "loss": 1.5357, "step": 21860 }, { "epoch": 5.423770847541695, "grad_norm": 0.9744418263435364, "learning_rate": 8.671566695358324e-05, "loss": 1.5586, "step": 21870 }, { "epoch": 5.426250852501705, "grad_norm": 0.9113421440124512, "learning_rate": 8.663844507937562e-05, "loss": 1.5162, "step": 21880 }, { "epoch": 5.428730857461715, "grad_norm": 1.041822910308838, "learning_rate": 8.656123131693892e-05, "loss": 1.5326, "step": 21890 }, { "epoch": 5.431210862421725, "grad_norm": 1.0268244743347168, "learning_rate": 8.648402571314949e-05, "loss": 1.4946, "step": 21900 }, { "epoch": 5.433690867381735, "grad_norm": 0.9706840515136719, "learning_rate": 8.64068283148786e-05, "loss": 1.5051, "step": 21910 }, { "epoch": 5.436170872341744, "grad_norm": 0.8814148902893066, "learning_rate": 8.632963916899268e-05, "loss": 1.5925, "step": 21920 }, { "epoch": 5.438650877301755, "grad_norm": 0.9990885853767395, "learning_rate": 8.625245832235307e-05, "loss": 1.5186, "step": 21930 }, { "epoch": 5.441130882261764, "grad_norm": 0.9785584211349487, "learning_rate": 8.617528582181612e-05, "loss": 1.4405, "step": 21940 }, { "epoch": 5.443610887221775, "grad_norm": 1.027880072593689, "learning_rate": 8.609812171423305e-05, "loss": 1.5838, "step": 21950 }, { "epoch": 5.446090892181784, "grad_norm": 0.9656705856323242, "learning_rate": 8.602096604645009e-05, "loss": 1.5129, "step": 21960 }, { "epoch": 5.448570897141794, "grad_norm": 0.9562718272209167, "learning_rate": 8.594381886530818e-05, "loss": 1.5247, "step": 21970 }, { "epoch": 5.451050902101804, "grad_norm": 0.9530388116836548, "learning_rate": 8.586668021764329e-05, "loss": 1.5454, "step": 21980 }, { "epoch": 5.453530907061814, "grad_norm": 1.0748162269592285, "learning_rate": 8.578955015028605e-05, "loss": 1.5, "step": 21990 }, { "epoch": 5.456010912021824, "grad_norm": 1.067697286605835, "learning_rate": 8.571242871006202e-05, "loss": 1.5529, "step": 22000 }, { "epoch": 5.458490916981834, "grad_norm": 0.9875476360321045, "learning_rate": 8.563531594379142e-05, "loss": 1.5706, "step": 22010 }, { "epoch": 5.460970921941843, "grad_norm": 1.0351884365081787, "learning_rate": 8.555821189828932e-05, "loss": 1.6127, "step": 22020 }, { "epoch": 5.463450926901854, "grad_norm": 0.9771510362625122, "learning_rate": 8.548111662036528e-05, "loss": 1.5345, "step": 22030 }, { "epoch": 5.465930931861863, "grad_norm": 0.9876402020454407, "learning_rate": 8.540403015682381e-05, "loss": 1.456, "step": 22040 }, { "epoch": 5.468410936821874, "grad_norm": 1.109188199043274, "learning_rate": 8.532695255446383e-05, "loss": 1.5097, "step": 22050 }, { "epoch": 5.470890941781883, "grad_norm": 0.9162520170211792, "learning_rate": 8.524988386007905e-05, "loss": 1.481, "step": 22060 }, { "epoch": 5.473370946741894, "grad_norm": 0.9496723413467407, "learning_rate": 8.517282412045771e-05, "loss": 1.5208, "step": 22070 }, { "epoch": 5.475850951701903, "grad_norm": 1.0151824951171875, "learning_rate": 8.509577338238255e-05, "loss": 1.5203, "step": 22080 }, { "epoch": 5.478330956661913, "grad_norm": 1.076178789138794, "learning_rate": 8.501873169263097e-05, "loss": 1.4665, "step": 22090 }, { "epoch": 5.480810961621923, "grad_norm": 0.9714109301567078, "learning_rate": 8.494169909797478e-05, "loss": 1.5494, "step": 22100 }, { "epoch": 5.483290966581933, "grad_norm": 0.9446751475334167, "learning_rate": 8.486467564518034e-05, "loss": 1.4615, "step": 22110 }, { "epoch": 5.485770971541943, "grad_norm": 0.9611682295799255, "learning_rate": 8.478766138100834e-05, "loss": 1.575, "step": 22120 }, { "epoch": 5.488250976501953, "grad_norm": 0.9424753785133362, "learning_rate": 8.471065635221404e-05, "loss": 1.4713, "step": 22130 }, { "epoch": 5.490730981461963, "grad_norm": 0.9659827947616577, "learning_rate": 8.463366060554698e-05, "loss": 1.5298, "step": 22140 }, { "epoch": 5.493210986421973, "grad_norm": 0.9294324517250061, "learning_rate": 8.455667418775117e-05, "loss": 1.5346, "step": 22150 }, { "epoch": 5.4956909913819825, "grad_norm": 0.9476675391197205, "learning_rate": 8.447969714556484e-05, "loss": 1.5124, "step": 22160 }, { "epoch": 5.498170996341993, "grad_norm": 0.947379469871521, "learning_rate": 8.440272952572064e-05, "loss": 1.5919, "step": 22170 }, { "epoch": 5.5006510013020025, "grad_norm": 0.9382186532020569, "learning_rate": 8.432577137494537e-05, "loss": 1.4837, "step": 22180 }, { "epoch": 5.503131006262013, "grad_norm": 0.927191436290741, "learning_rate": 8.424882273996024e-05, "loss": 1.5346, "step": 22190 }, { "epoch": 5.5056110112220225, "grad_norm": 0.949855387210846, "learning_rate": 8.417188366748052e-05, "loss": 1.4853, "step": 22200 }, { "epoch": 5.508091016182032, "grad_norm": 1.0020701885223389, "learning_rate": 8.40949542042158e-05, "loss": 1.5281, "step": 22210 }, { "epoch": 5.5105710211420424, "grad_norm": 1.042439341545105, "learning_rate": 8.401803439686977e-05, "loss": 1.5147, "step": 22220 }, { "epoch": 5.513051026102052, "grad_norm": 0.9486450552940369, "learning_rate": 8.39411242921403e-05, "loss": 1.5571, "step": 22230 }, { "epoch": 5.515531031062062, "grad_norm": 0.924824595451355, "learning_rate": 8.386422393671933e-05, "loss": 1.5526, "step": 22240 }, { "epoch": 5.518011036022072, "grad_norm": 0.9190182685852051, "learning_rate": 8.378733337729294e-05, "loss": 1.4867, "step": 22250 }, { "epoch": 5.5204910409820815, "grad_norm": 1.0011485815048218, "learning_rate": 8.371045266054114e-05, "loss": 1.5246, "step": 22260 }, { "epoch": 5.522971045942092, "grad_norm": 0.9253968596458435, "learning_rate": 8.36335818331381e-05, "loss": 1.5336, "step": 22270 }, { "epoch": 5.5254510509021015, "grad_norm": 0.9603173136711121, "learning_rate": 8.35567209417519e-05, "loss": 1.5006, "step": 22280 }, { "epoch": 5.527931055862112, "grad_norm": 1.0172189474105835, "learning_rate": 8.347987003304469e-05, "loss": 1.4873, "step": 22290 }, { "epoch": 5.5304110608221215, "grad_norm": 0.9984214305877686, "learning_rate": 8.340302915367241e-05, "loss": 1.5289, "step": 22300 }, { "epoch": 5.532891065782131, "grad_norm": 0.943496584892273, "learning_rate": 8.332619835028506e-05, "loss": 1.5037, "step": 22310 }, { "epoch": 5.5353710707421415, "grad_norm": 0.9430272579193115, "learning_rate": 8.324937766952638e-05, "loss": 1.5323, "step": 22320 }, { "epoch": 5.537851075702151, "grad_norm": 0.9519038200378418, "learning_rate": 8.317256715803407e-05, "loss": 1.5355, "step": 22330 }, { "epoch": 5.5403310806621615, "grad_norm": 1.0110712051391602, "learning_rate": 8.309576686243964e-05, "loss": 1.5617, "step": 22340 }, { "epoch": 5.542811085622171, "grad_norm": 1.0450818538665771, "learning_rate": 8.301897682936838e-05, "loss": 1.4925, "step": 22350 }, { "epoch": 5.5452910905821815, "grad_norm": 1.0279377698898315, "learning_rate": 8.294219710543932e-05, "loss": 1.5183, "step": 22360 }, { "epoch": 5.547771095542191, "grad_norm": 1.0317164659500122, "learning_rate": 8.286542773726534e-05, "loss": 1.5277, "step": 22370 }, { "epoch": 5.5502511005022015, "grad_norm": 0.9843716025352478, "learning_rate": 8.278866877145282e-05, "loss": 1.5274, "step": 22380 }, { "epoch": 5.552731105462211, "grad_norm": 0.9920136332511902, "learning_rate": 8.271192025460211e-05, "loss": 1.5119, "step": 22390 }, { "epoch": 5.555211110422221, "grad_norm": 0.9626421928405762, "learning_rate": 8.263518223330697e-05, "loss": 1.4893, "step": 22400 }, { "epoch": 5.557691115382231, "grad_norm": 0.930276095867157, "learning_rate": 8.255845475415494e-05, "loss": 1.569, "step": 22410 }, { "epoch": 5.560171120342241, "grad_norm": 0.9820217490196228, "learning_rate": 8.248173786372705e-05, "loss": 1.5701, "step": 22420 }, { "epoch": 5.562651125302251, "grad_norm": 0.9253460764884949, "learning_rate": 8.240503160859804e-05, "loss": 1.4679, "step": 22430 }, { "epoch": 5.565131130262261, "grad_norm": 0.947089433670044, "learning_rate": 8.2328336035336e-05, "loss": 1.4811, "step": 22440 }, { "epoch": 5.56761113522227, "grad_norm": 0.9547702670097351, "learning_rate": 8.225165119050277e-05, "loss": 1.5287, "step": 22450 }, { "epoch": 5.570091140182281, "grad_norm": 0.9524838924407959, "learning_rate": 8.217497712065345e-05, "loss": 1.5594, "step": 22460 }, { "epoch": 5.57257114514229, "grad_norm": 0.9504765868186951, "learning_rate": 8.209831387233676e-05, "loss": 1.5289, "step": 22470 }, { "epoch": 5.575051150102301, "grad_norm": 1.5735292434692383, "learning_rate": 8.202166149209474e-05, "loss": 1.5328, "step": 22480 }, { "epoch": 5.57753115506231, "grad_norm": 0.8840258121490479, "learning_rate": 8.194502002646294e-05, "loss": 1.5362, "step": 22490 }, { "epoch": 5.58001116002232, "grad_norm": 0.886695146560669, "learning_rate": 8.186838952197018e-05, "loss": 1.4982, "step": 22500 }, { "epoch": 5.58249116498233, "grad_norm": 1.0302002429962158, "learning_rate": 8.179177002513873e-05, "loss": 1.5819, "step": 22510 }, { "epoch": 5.58497116994234, "grad_norm": 0.9544497132301331, "learning_rate": 8.171516158248406e-05, "loss": 1.5219, "step": 22520 }, { "epoch": 5.58745117490235, "grad_norm": 0.9738973379135132, "learning_rate": 8.163856424051502e-05, "loss": 1.5627, "step": 22530 }, { "epoch": 5.58993117986236, "grad_norm": 1.01767897605896, "learning_rate": 8.156197804573366e-05, "loss": 1.5328, "step": 22540 }, { "epoch": 5.592411184822369, "grad_norm": 0.9286201596260071, "learning_rate": 8.148540304463536e-05, "loss": 1.5356, "step": 22550 }, { "epoch": 5.59489118978238, "grad_norm": 1.0125569105148315, "learning_rate": 8.140883928370855e-05, "loss": 1.5648, "step": 22560 }, { "epoch": 5.597371194742389, "grad_norm": 0.9836313724517822, "learning_rate": 8.133228680943501e-05, "loss": 1.5341, "step": 22570 }, { "epoch": 5.5998511997024, "grad_norm": 0.9896276593208313, "learning_rate": 8.125574566828946e-05, "loss": 1.5399, "step": 22580 }, { "epoch": 5.602331204662409, "grad_norm": 0.9839266538619995, "learning_rate": 8.117921590674001e-05, "loss": 1.5382, "step": 22590 }, { "epoch": 5.60481120962242, "grad_norm": 0.9478524923324585, "learning_rate": 8.11026975712476e-05, "loss": 1.4931, "step": 22600 }, { "epoch": 5.607291214582429, "grad_norm": 0.9081190228462219, "learning_rate": 8.102619070826639e-05, "loss": 1.5147, "step": 22610 }, { "epoch": 5.609771219542439, "grad_norm": 0.9841187000274658, "learning_rate": 8.094969536424351e-05, "loss": 1.5404, "step": 22620 }, { "epoch": 5.612251224502449, "grad_norm": 0.991550624370575, "learning_rate": 8.087321158561917e-05, "loss": 1.5206, "step": 22630 }, { "epoch": 5.614731229462459, "grad_norm": 0.9358656406402588, "learning_rate": 8.07967394188264e-05, "loss": 1.5052, "step": 22640 }, { "epoch": 5.617211234422469, "grad_norm": 0.9936838150024414, "learning_rate": 8.07202789102914e-05, "loss": 1.5927, "step": 22650 }, { "epoch": 5.619691239382479, "grad_norm": 0.9371949434280396, "learning_rate": 8.06438301064331e-05, "loss": 1.5075, "step": 22660 }, { "epoch": 5.622171244342489, "grad_norm": 1.0312095880508423, "learning_rate": 8.056739305366346e-05, "loss": 1.5319, "step": 22670 }, { "epoch": 5.624651249302499, "grad_norm": 0.9725061655044556, "learning_rate": 8.049096779838719e-05, "loss": 1.5334, "step": 22680 }, { "epoch": 5.627131254262508, "grad_norm": 0.9553120732307434, "learning_rate": 8.041455438700191e-05, "loss": 1.5311, "step": 22690 }, { "epoch": 5.629611259222519, "grad_norm": 0.9907311201095581, "learning_rate": 8.03381528658981e-05, "loss": 1.5381, "step": 22700 }, { "epoch": 5.632091264182528, "grad_norm": 0.9710587859153748, "learning_rate": 8.026176328145887e-05, "loss": 1.4894, "step": 22710 }, { "epoch": 5.634571269142539, "grad_norm": 0.8912402391433716, "learning_rate": 8.018538568006027e-05, "loss": 1.5291, "step": 22720 }, { "epoch": 5.637051274102548, "grad_norm": 0.9066137075424194, "learning_rate": 8.010902010807084e-05, "loss": 1.5306, "step": 22730 }, { "epoch": 5.639531279062558, "grad_norm": 0.9754036068916321, "learning_rate": 8.003266661185209e-05, "loss": 1.5521, "step": 22740 }, { "epoch": 5.642011284022568, "grad_norm": 0.9809487462043762, "learning_rate": 7.995632523775795e-05, "loss": 1.5178, "step": 22750 }, { "epoch": 5.644491288982578, "grad_norm": 0.9499403238296509, "learning_rate": 7.987999603213519e-05, "loss": 1.4584, "step": 22760 }, { "epoch": 5.646971293942588, "grad_norm": 1.0232181549072266, "learning_rate": 7.980367904132302e-05, "loss": 1.5577, "step": 22770 }, { "epoch": 5.649451298902598, "grad_norm": 1.1556051969528198, "learning_rate": 7.972737431165343e-05, "loss": 1.5877, "step": 22780 }, { "epoch": 5.651931303862607, "grad_norm": 0.929179310798645, "learning_rate": 7.965108188945072e-05, "loss": 1.5101, "step": 22790 }, { "epoch": 5.654411308822618, "grad_norm": 0.928312361240387, "learning_rate": 7.957480182103198e-05, "loss": 1.5602, "step": 22800 }, { "epoch": 5.656891313782627, "grad_norm": 0.9117720127105713, "learning_rate": 7.949853415270658e-05, "loss": 1.5158, "step": 22810 }, { "epoch": 5.659371318742638, "grad_norm": 0.9475207924842834, "learning_rate": 7.942227893077652e-05, "loss": 1.4721, "step": 22820 }, { "epoch": 5.661851323702647, "grad_norm": 1.001230239868164, "learning_rate": 7.934603620153614e-05, "loss": 1.535, "step": 22830 }, { "epoch": 5.664331328662657, "grad_norm": 0.9647219777107239, "learning_rate": 7.926980601127225e-05, "loss": 1.5649, "step": 22840 }, { "epoch": 5.666811333622667, "grad_norm": 0.9498812556266785, "learning_rate": 7.9193588406264e-05, "loss": 1.5056, "step": 22850 }, { "epoch": 5.669291338582677, "grad_norm": 0.9786957502365112, "learning_rate": 7.911738343278304e-05, "loss": 1.5892, "step": 22860 }, { "epoch": 5.671771343542687, "grad_norm": 0.980808436870575, "learning_rate": 7.90411911370931e-05, "loss": 1.5487, "step": 22870 }, { "epoch": 5.674251348502697, "grad_norm": 1.0204776525497437, "learning_rate": 7.896501156545045e-05, "loss": 1.5215, "step": 22880 }, { "epoch": 5.676731353462707, "grad_norm": 0.9597605466842651, "learning_rate": 7.888884476410348e-05, "loss": 1.5125, "step": 22890 }, { "epoch": 5.679211358422717, "grad_norm": 0.9643506407737732, "learning_rate": 7.881269077929297e-05, "loss": 1.5202, "step": 22900 }, { "epoch": 5.681691363382726, "grad_norm": 0.9709390997886658, "learning_rate": 7.873654965725175e-05, "loss": 1.5712, "step": 22910 }, { "epoch": 5.684171368342737, "grad_norm": 0.9741882681846619, "learning_rate": 7.866042144420502e-05, "loss": 1.5791, "step": 22920 }, { "epoch": 5.686651373302746, "grad_norm": 0.9291117787361145, "learning_rate": 7.858430618636995e-05, "loss": 1.5708, "step": 22930 }, { "epoch": 5.689131378262757, "grad_norm": 0.9480016231536865, "learning_rate": 7.850820392995605e-05, "loss": 1.544, "step": 22940 }, { "epoch": 5.691611383222766, "grad_norm": 0.9766435027122498, "learning_rate": 7.843211472116476e-05, "loss": 1.5171, "step": 22950 }, { "epoch": 5.694091388182777, "grad_norm": 1.0347306728363037, "learning_rate": 7.835603860618972e-05, "loss": 1.5346, "step": 22960 }, { "epoch": 5.696571393142786, "grad_norm": 1.0211946964263916, "learning_rate": 7.827997563121654e-05, "loss": 1.5056, "step": 22970 }, { "epoch": 5.699051398102796, "grad_norm": 1.0085428953170776, "learning_rate": 7.820392584242293e-05, "loss": 1.5418, "step": 22980 }, { "epoch": 5.701531403062806, "grad_norm": 0.8925297260284424, "learning_rate": 7.812788928597845e-05, "loss": 1.5284, "step": 22990 }, { "epoch": 5.704011408022816, "grad_norm": 0.9773134589195251, "learning_rate": 7.805186600804489e-05, "loss": 1.5926, "step": 23000 }, { "epoch": 5.706491412982826, "grad_norm": 1.0136878490447998, "learning_rate": 7.797585605477566e-05, "loss": 1.5515, "step": 23010 }, { "epoch": 5.708971417942836, "grad_norm": 0.9715712666511536, "learning_rate": 7.789985947231632e-05, "loss": 1.5485, "step": 23020 }, { "epoch": 5.7114514229028455, "grad_norm": 0.9595378637313843, "learning_rate": 7.782387630680421e-05, "loss": 1.4779, "step": 23030 }, { "epoch": 5.713931427862856, "grad_norm": 0.9107018709182739, "learning_rate": 7.774790660436858e-05, "loss": 1.4954, "step": 23040 }, { "epoch": 5.7164114328228655, "grad_norm": 1.0032902956008911, "learning_rate": 7.767195041113039e-05, "loss": 1.5585, "step": 23050 }, { "epoch": 5.718891437782876, "grad_norm": 0.9346843957901001, "learning_rate": 7.759600777320258e-05, "loss": 1.5281, "step": 23060 }, { "epoch": 5.7213714427428854, "grad_norm": 0.9562672972679138, "learning_rate": 7.752007873668967e-05, "loss": 1.4774, "step": 23070 }, { "epoch": 5.723851447702895, "grad_norm": 0.9957661628723145, "learning_rate": 7.744416334768808e-05, "loss": 1.5524, "step": 23080 }, { "epoch": 5.726331452662905, "grad_norm": 1.002894401550293, "learning_rate": 7.736826165228581e-05, "loss": 1.485, "step": 23090 }, { "epoch": 5.728811457622915, "grad_norm": 0.9445948600769043, "learning_rate": 7.729237369656269e-05, "loss": 1.5815, "step": 23100 }, { "epoch": 5.731291462582925, "grad_norm": 0.977097749710083, "learning_rate": 7.721649952659007e-05, "loss": 1.5471, "step": 23110 }, { "epoch": 5.733771467542935, "grad_norm": 0.8809939026832581, "learning_rate": 7.714063918843106e-05, "loss": 1.4514, "step": 23120 }, { "epoch": 5.7362514725029445, "grad_norm": 0.9751113057136536, "learning_rate": 7.706479272814023e-05, "loss": 1.4972, "step": 23130 }, { "epoch": 5.738731477462955, "grad_norm": 0.9898456335067749, "learning_rate": 7.698896019176386e-05, "loss": 1.5328, "step": 23140 }, { "epoch": 5.7412114824229645, "grad_norm": 0.9402456879615784, "learning_rate": 7.691314162533967e-05, "loss": 1.5631, "step": 23150 }, { "epoch": 5.743691487382975, "grad_norm": 1.0204651355743408, "learning_rate": 7.683733707489699e-05, "loss": 1.5, "step": 23160 }, { "epoch": 5.7461714923429845, "grad_norm": 0.995430052280426, "learning_rate": 7.676154658645656e-05, "loss": 1.5573, "step": 23170 }, { "epoch": 5.748651497302995, "grad_norm": 1.022791862487793, "learning_rate": 7.668577020603068e-05, "loss": 1.5647, "step": 23180 }, { "epoch": 5.7511315022630045, "grad_norm": 0.98724764585495, "learning_rate": 7.661000797962292e-05, "loss": 1.5313, "step": 23190 }, { "epoch": 5.753611507223014, "grad_norm": 1.041318655014038, "learning_rate": 7.653425995322851e-05, "loss": 1.5309, "step": 23200 }, { "epoch": 5.7560915121830245, "grad_norm": 1.0121514797210693, "learning_rate": 7.645852617283377e-05, "loss": 1.5208, "step": 23210 }, { "epoch": 5.758571517143034, "grad_norm": 0.9732059836387634, "learning_rate": 7.63828066844166e-05, "loss": 1.5598, "step": 23220 }, { "epoch": 5.7610515221030445, "grad_norm": 0.9636842608451843, "learning_rate": 7.630710153394607e-05, "loss": 1.5274, "step": 23230 }, { "epoch": 5.763531527063054, "grad_norm": 0.9335260987281799, "learning_rate": 7.623141076738271e-05, "loss": 1.514, "step": 23240 }, { "epoch": 5.7660115320230645, "grad_norm": 0.977904736995697, "learning_rate": 7.615573443067812e-05, "loss": 1.487, "step": 23250 }, { "epoch": 5.768491536983074, "grad_norm": 0.9485705494880676, "learning_rate": 7.608007256977533e-05, "loss": 1.5116, "step": 23260 }, { "epoch": 5.770971541943084, "grad_norm": 0.8961159586906433, "learning_rate": 7.60044252306084e-05, "loss": 1.4846, "step": 23270 }, { "epoch": 5.773451546903094, "grad_norm": 0.9879873394966125, "learning_rate": 7.592879245910273e-05, "loss": 1.6182, "step": 23280 }, { "epoch": 5.775931551863104, "grad_norm": 0.9821351766586304, "learning_rate": 7.585317430117475e-05, "loss": 1.5422, "step": 23290 }, { "epoch": 5.778411556823114, "grad_norm": 0.9630663394927979, "learning_rate": 7.577757080273216e-05, "loss": 1.5499, "step": 23300 }, { "epoch": 5.780891561783124, "grad_norm": 0.9344465136528015, "learning_rate": 7.570198200967362e-05, "loss": 1.5832, "step": 23310 }, { "epoch": 5.783371566743133, "grad_norm": 0.9779849052429199, "learning_rate": 7.562640796788892e-05, "loss": 1.5778, "step": 23320 }, { "epoch": 5.785851571703144, "grad_norm": 0.9390009641647339, "learning_rate": 7.555084872325897e-05, "loss": 1.4917, "step": 23330 }, { "epoch": 5.788331576663153, "grad_norm": 1.118727207183838, "learning_rate": 7.54753043216555e-05, "loss": 1.5914, "step": 23340 }, { "epoch": 5.790811581623164, "grad_norm": 1.0065404176712036, "learning_rate": 7.53997748089415e-05, "loss": 1.5559, "step": 23350 }, { "epoch": 5.793291586583173, "grad_norm": 0.9158838987350464, "learning_rate": 7.532426023097063e-05, "loss": 1.6127, "step": 23360 }, { "epoch": 5.795771591543183, "grad_norm": 0.9717867970466614, "learning_rate": 7.524876063358773e-05, "loss": 1.5145, "step": 23370 }, { "epoch": 5.798251596503193, "grad_norm": 0.9658146500587463, "learning_rate": 7.517327606262836e-05, "loss": 1.5459, "step": 23380 }, { "epoch": 5.800731601463203, "grad_norm": 0.9799458384513855, "learning_rate": 7.509780656391912e-05, "loss": 1.5463, "step": 23390 }, { "epoch": 5.803211606423213, "grad_norm": 0.919108510017395, "learning_rate": 7.502235218327731e-05, "loss": 1.5224, "step": 23400 }, { "epoch": 5.805691611383223, "grad_norm": 1.0490554571151733, "learning_rate": 7.494691296651118e-05, "loss": 1.5558, "step": 23410 }, { "epoch": 5.808171616343232, "grad_norm": 0.9717322587966919, "learning_rate": 7.487148895941965e-05, "loss": 1.5053, "step": 23420 }, { "epoch": 5.810651621303243, "grad_norm": 0.9787877798080444, "learning_rate": 7.479608020779252e-05, "loss": 1.5595, "step": 23430 }, { "epoch": 5.813131626263252, "grad_norm": 1.015682339668274, "learning_rate": 7.472068675741025e-05, "loss": 1.5414, "step": 23440 }, { "epoch": 5.815611631223263, "grad_norm": 0.9951199889183044, "learning_rate": 7.464530865404407e-05, "loss": 1.5741, "step": 23450 }, { "epoch": 5.818091636183272, "grad_norm": 0.9637252688407898, "learning_rate": 7.45699459434558e-05, "loss": 1.5815, "step": 23460 }, { "epoch": 5.820571641143283, "grad_norm": 0.9071375131607056, "learning_rate": 7.44945986713981e-05, "loss": 1.5273, "step": 23470 }, { "epoch": 5.823051646103292, "grad_norm": 0.9421333074569702, "learning_rate": 7.4419266883614e-05, "loss": 1.5034, "step": 23480 }, { "epoch": 5.825531651063303, "grad_norm": 1.0461348295211792, "learning_rate": 7.434395062583734e-05, "loss": 1.5687, "step": 23490 }, { "epoch": 5.828011656023312, "grad_norm": 0.9921796321868896, "learning_rate": 7.426864994379243e-05, "loss": 1.4845, "step": 23500 }, { "epoch": 5.830491660983322, "grad_norm": 0.9265093207359314, "learning_rate": 7.419336488319417e-05, "loss": 1.6003, "step": 23510 }, { "epoch": 5.832971665943332, "grad_norm": 0.908584475517273, "learning_rate": 7.411809548974792e-05, "loss": 1.5234, "step": 23520 }, { "epoch": 5.835451670903342, "grad_norm": 0.9192255139350891, "learning_rate": 7.404284180914964e-05, "loss": 1.5112, "step": 23530 }, { "epoch": 5.837931675863352, "grad_norm": 0.9425198435783386, "learning_rate": 7.396760388708555e-05, "loss": 1.5185, "step": 23540 }, { "epoch": 5.840411680823362, "grad_norm": 0.9518033862113953, "learning_rate": 7.389238176923258e-05, "loss": 1.5166, "step": 23550 }, { "epoch": 5.842891685783371, "grad_norm": 0.9426175355911255, "learning_rate": 7.38171755012578e-05, "loss": 1.5618, "step": 23560 }, { "epoch": 5.845371690743382, "grad_norm": 0.9501382112503052, "learning_rate": 7.374198512881881e-05, "loss": 1.5021, "step": 23570 }, { "epoch": 5.847851695703391, "grad_norm": 0.94672030210495, "learning_rate": 7.366681069756352e-05, "loss": 1.5153, "step": 23580 }, { "epoch": 5.850331700663402, "grad_norm": 0.9547986388206482, "learning_rate": 7.359165225313019e-05, "loss": 1.5246, "step": 23590 }, { "epoch": 5.852811705623411, "grad_norm": 0.9420895576477051, "learning_rate": 7.351650984114728e-05, "loss": 1.5534, "step": 23600 }, { "epoch": 5.855291710583421, "grad_norm": 0.9720684289932251, "learning_rate": 7.344138350723369e-05, "loss": 1.5364, "step": 23610 }, { "epoch": 5.857771715543431, "grad_norm": 0.9347152709960938, "learning_rate": 7.336627329699833e-05, "loss": 1.5349, "step": 23620 }, { "epoch": 5.860251720503441, "grad_norm": 0.9553906917572021, "learning_rate": 7.329117925604054e-05, "loss": 1.4873, "step": 23630 }, { "epoch": 5.862731725463451, "grad_norm": 0.9500386714935303, "learning_rate": 7.32161014299497e-05, "loss": 1.5453, "step": 23640 }, { "epoch": 5.865211730423461, "grad_norm": 0.92239910364151, "learning_rate": 7.314103986430543e-05, "loss": 1.4598, "step": 23650 }, { "epoch": 5.86769173538347, "grad_norm": 0.9757826924324036, "learning_rate": 7.30659946046774e-05, "loss": 1.5205, "step": 23660 }, { "epoch": 5.870171740343481, "grad_norm": 0.9532195925712585, "learning_rate": 7.29909656966255e-05, "loss": 1.545, "step": 23670 }, { "epoch": 5.87265174530349, "grad_norm": 0.8907900452613831, "learning_rate": 7.291595318569951e-05, "loss": 1.4541, "step": 23680 }, { "epoch": 5.875131750263501, "grad_norm": 1.0454277992248535, "learning_rate": 7.284095711743944e-05, "loss": 1.5117, "step": 23690 }, { "epoch": 5.87761175522351, "grad_norm": 0.9614602327346802, "learning_rate": 7.27659775373752e-05, "loss": 1.5193, "step": 23700 }, { "epoch": 5.880091760183521, "grad_norm": 0.923225462436676, "learning_rate": 7.269101449102676e-05, "loss": 1.5317, "step": 23710 }, { "epoch": 5.88257176514353, "grad_norm": 0.940643310546875, "learning_rate": 7.2616068023904e-05, "loss": 1.494, "step": 23720 }, { "epoch": 5.88505177010354, "grad_norm": 0.9426398873329163, "learning_rate": 7.25411381815068e-05, "loss": 1.4917, "step": 23730 }, { "epoch": 5.88753177506355, "grad_norm": 0.8780081272125244, "learning_rate": 7.24662250093248e-05, "loss": 1.4823, "step": 23740 }, { "epoch": 5.89001178002356, "grad_norm": 0.962202250957489, "learning_rate": 7.239132855283777e-05, "loss": 1.557, "step": 23750 }, { "epoch": 5.89249178498357, "grad_norm": 1.0483089685440063, "learning_rate": 7.231644885751507e-05, "loss": 1.4819, "step": 23760 }, { "epoch": 5.89497178994358, "grad_norm": 0.9742222428321838, "learning_rate": 7.224158596881606e-05, "loss": 1.5385, "step": 23770 }, { "epoch": 5.89745179490359, "grad_norm": 1.055155634880066, "learning_rate": 7.216673993218979e-05, "loss": 1.5791, "step": 23780 }, { "epoch": 5.8999317998636, "grad_norm": 1.012840986251831, "learning_rate": 7.209191079307521e-05, "loss": 1.5722, "step": 23790 }, { "epoch": 5.902411804823609, "grad_norm": 1.5436009168624878, "learning_rate": 7.20170985969008e-05, "loss": 1.5834, "step": 23800 }, { "epoch": 5.90489180978362, "grad_norm": 0.9777030944824219, "learning_rate": 7.194230338908501e-05, "loss": 1.5576, "step": 23810 }, { "epoch": 5.907371814743629, "grad_norm": 1.1380470991134644, "learning_rate": 7.186752521503575e-05, "loss": 1.6199, "step": 23820 }, { "epoch": 5.90985181970364, "grad_norm": 0.9923495650291443, "learning_rate": 7.179276412015075e-05, "loss": 1.5589, "step": 23830 }, { "epoch": 5.912331824663649, "grad_norm": 1.0055482387542725, "learning_rate": 7.171802014981726e-05, "loss": 1.5621, "step": 23840 }, { "epoch": 5.914811829623659, "grad_norm": 0.960023820400238, "learning_rate": 7.164329334941224e-05, "loss": 1.545, "step": 23850 }, { "epoch": 5.917291834583669, "grad_norm": 0.9554881453514099, "learning_rate": 7.15685837643021e-05, "loss": 1.4998, "step": 23860 }, { "epoch": 5.919771839543679, "grad_norm": 0.9166724681854248, "learning_rate": 7.149389143984295e-05, "loss": 1.5257, "step": 23870 }, { "epoch": 5.922251844503689, "grad_norm": 0.9697259664535522, "learning_rate": 7.141921642138025e-05, "loss": 1.5697, "step": 23880 }, { "epoch": 5.924731849463699, "grad_norm": 0.9463170766830444, "learning_rate": 7.13445587542491e-05, "loss": 1.447, "step": 23890 }, { "epoch": 5.9272118544237085, "grad_norm": 0.9994683861732483, "learning_rate": 7.126991848377395e-05, "loss": 1.5664, "step": 23900 }, { "epoch": 5.929691859383719, "grad_norm": 0.9542251229286194, "learning_rate": 7.119529565526883e-05, "loss": 1.5363, "step": 23910 }, { "epoch": 5.9321718643437285, "grad_norm": 0.8708572387695312, "learning_rate": 7.112069031403704e-05, "loss": 1.5512, "step": 23920 }, { "epoch": 5.934651869303739, "grad_norm": 0.9620928168296814, "learning_rate": 7.104610250537133e-05, "loss": 1.5337, "step": 23930 }, { "epoch": 5.937131874263748, "grad_norm": 0.9948837161064148, "learning_rate": 7.097153227455379e-05, "loss": 1.5175, "step": 23940 }, { "epoch": 5.939611879223758, "grad_norm": 0.9141833782196045, "learning_rate": 7.089697966685581e-05, "loss": 1.5532, "step": 23950 }, { "epoch": 5.942091884183768, "grad_norm": 0.9160381555557251, "learning_rate": 7.082244472753822e-05, "loss": 1.5984, "step": 23960 }, { "epoch": 5.944571889143778, "grad_norm": 1.0294259786605835, "learning_rate": 7.074792750185094e-05, "loss": 1.5502, "step": 23970 }, { "epoch": 5.947051894103788, "grad_norm": 1.0058735609054565, "learning_rate": 7.067342803503323e-05, "loss": 1.5325, "step": 23980 }, { "epoch": 5.949531899063798, "grad_norm": 0.9840842485427856, "learning_rate": 7.059894637231354e-05, "loss": 1.5092, "step": 23990 }, { "epoch": 5.952011904023808, "grad_norm": 0.9545488953590393, "learning_rate": 7.052448255890957e-05, "loss": 1.5778, "step": 24000 }, { "epoch": 5.954491908983818, "grad_norm": 0.9650373458862305, "learning_rate": 7.045003664002809e-05, "loss": 1.497, "step": 24010 }, { "epoch": 5.9569719139438275, "grad_norm": 0.971540629863739, "learning_rate": 7.037560866086513e-05, "loss": 1.5427, "step": 24020 }, { "epoch": 5.959451918903838, "grad_norm": 0.9746721982955933, "learning_rate": 7.030119866660564e-05, "loss": 1.5155, "step": 24030 }, { "epoch": 5.9619319238638475, "grad_norm": 0.9826546907424927, "learning_rate": 7.022680670242387e-05, "loss": 1.5574, "step": 24040 }, { "epoch": 5.964411928823858, "grad_norm": 1.0468549728393555, "learning_rate": 7.015243281348293e-05, "loss": 1.5316, "step": 24050 }, { "epoch": 5.9668919337838675, "grad_norm": 1.0100024938583374, "learning_rate": 7.007807704493514e-05, "loss": 1.5668, "step": 24060 }, { "epoch": 5.969371938743878, "grad_norm": 0.9497383832931519, "learning_rate": 7.000373944192165e-05, "loss": 1.4998, "step": 24070 }, { "epoch": 5.9718519437038875, "grad_norm": 0.9240361452102661, "learning_rate": 6.992942004957271e-05, "loss": 1.5402, "step": 24080 }, { "epoch": 5.974331948663897, "grad_norm": 1.0378366708755493, "learning_rate": 6.985511891300737e-05, "loss": 1.5714, "step": 24090 }, { "epoch": 5.9768119536239075, "grad_norm": 1.0124924182891846, "learning_rate": 6.978083607733384e-05, "loss": 1.5335, "step": 24100 }, { "epoch": 5.979291958583917, "grad_norm": 0.9290186166763306, "learning_rate": 6.970657158764894e-05, "loss": 1.4215, "step": 24110 }, { "epoch": 5.9817719635439275, "grad_norm": 0.8907943964004517, "learning_rate": 6.963232548903853e-05, "loss": 1.5144, "step": 24120 }, { "epoch": 5.984251968503937, "grad_norm": 0.9668370485305786, "learning_rate": 6.955809782657724e-05, "loss": 1.5497, "step": 24130 }, { "epoch": 5.986731973463947, "grad_norm": 0.956092119216919, "learning_rate": 6.948388864532857e-05, "loss": 1.6132, "step": 24140 }, { "epoch": 5.989211978423957, "grad_norm": 1.0998320579528809, "learning_rate": 6.940969799034465e-05, "loss": 1.5673, "step": 24150 }, { "epoch": 5.991691983383967, "grad_norm": 1.0120083093643188, "learning_rate": 6.933552590666659e-05, "loss": 1.5247, "step": 24160 }, { "epoch": 5.994171988343977, "grad_norm": 0.9429424405097961, "learning_rate": 6.926137243932398e-05, "loss": 1.5167, "step": 24170 }, { "epoch": 5.996651993303987, "grad_norm": 0.9629198908805847, "learning_rate": 6.918723763333532e-05, "loss": 1.4815, "step": 24180 }, { "epoch": 5.999131998263996, "grad_norm": 1.0192433595657349, "learning_rate": 6.911312153370765e-05, "loss": 1.4817, "step": 24190 }, { "epoch": 6.001612003224007, "grad_norm": 0.9021772742271423, "learning_rate": 6.903902418543671e-05, "loss": 1.5574, "step": 24200 }, { "epoch": 6.004092008184016, "grad_norm": 0.9498673677444458, "learning_rate": 6.89649456335068e-05, "loss": 1.4026, "step": 24210 }, { "epoch": 6.006572013144027, "grad_norm": 1.02396559715271, "learning_rate": 6.889088592289093e-05, "loss": 1.4367, "step": 24220 }, { "epoch": 6.009052018104036, "grad_norm": 0.9916176199913025, "learning_rate": 6.881684509855049e-05, "loss": 1.5412, "step": 24230 }, { "epoch": 6.0115320230640465, "grad_norm": 0.9803375005722046, "learning_rate": 6.874282320543557e-05, "loss": 1.4064, "step": 24240 }, { "epoch": 6.014012028024056, "grad_norm": 1.0052950382232666, "learning_rate": 6.866882028848465e-05, "loss": 1.5316, "step": 24250 }, { "epoch": 6.016492032984066, "grad_norm": 0.9743315577507019, "learning_rate": 6.859483639262481e-05, "loss": 1.4378, "step": 24260 }, { "epoch": 6.018972037944076, "grad_norm": 0.9998024106025696, "learning_rate": 6.852087156277143e-05, "loss": 1.5104, "step": 24270 }, { "epoch": 6.021452042904086, "grad_norm": 1.0471398830413818, "learning_rate": 6.844692584382848e-05, "loss": 1.5089, "step": 24280 }, { "epoch": 6.023932047864096, "grad_norm": 1.002640724182129, "learning_rate": 6.837299928068817e-05, "loss": 1.4579, "step": 24290 }, { "epoch": 6.026412052824106, "grad_norm": 0.9999975562095642, "learning_rate": 6.829909191823121e-05, "loss": 1.4601, "step": 24300 }, { "epoch": 6.028892057784115, "grad_norm": 0.9147332906723022, "learning_rate": 6.822520380132655e-05, "loss": 1.4445, "step": 24310 }, { "epoch": 6.031372062744126, "grad_norm": 0.9814079999923706, "learning_rate": 6.815133497483157e-05, "loss": 1.4039, "step": 24320 }, { "epoch": 6.033852067704135, "grad_norm": 0.9648290872573853, "learning_rate": 6.807748548359181e-05, "loss": 1.4357, "step": 24330 }, { "epoch": 6.036332072664146, "grad_norm": 1.0147061347961426, "learning_rate": 6.80036553724412e-05, "loss": 1.4742, "step": 24340 }, { "epoch": 6.038812077624155, "grad_norm": 0.9684031009674072, "learning_rate": 6.792984468620174e-05, "loss": 1.4793, "step": 24350 }, { "epoch": 6.041292082584166, "grad_norm": 1.0336523056030273, "learning_rate": 6.785605346968386e-05, "loss": 1.483, "step": 24360 }, { "epoch": 6.043772087544175, "grad_norm": 0.947838306427002, "learning_rate": 6.778228176768595e-05, "loss": 1.4407, "step": 24370 }, { "epoch": 6.046252092504185, "grad_norm": 0.9754374623298645, "learning_rate": 6.770852962499471e-05, "loss": 1.4824, "step": 24380 }, { "epoch": 6.048732097464195, "grad_norm": 1.0519452095031738, "learning_rate": 6.763479708638485e-05, "loss": 1.5115, "step": 24390 }, { "epoch": 6.051212102424205, "grad_norm": 0.9581342339515686, "learning_rate": 6.756108419661931e-05, "loss": 1.4636, "step": 24400 }, { "epoch": 6.053692107384215, "grad_norm": 1.0844944715499878, "learning_rate": 6.748739100044892e-05, "loss": 1.5126, "step": 24410 }, { "epoch": 6.056172112344225, "grad_norm": 0.9884443283081055, "learning_rate": 6.741371754261278e-05, "loss": 1.4207, "step": 24420 }, { "epoch": 6.058652117304234, "grad_norm": 0.9929337501525879, "learning_rate": 6.73400638678378e-05, "loss": 1.4127, "step": 24430 }, { "epoch": 6.061132122264245, "grad_norm": 1.0066174268722534, "learning_rate": 6.7266430020839e-05, "loss": 1.4225, "step": 24440 }, { "epoch": 6.063612127224254, "grad_norm": 1.0377559661865234, "learning_rate": 6.71928160463193e-05, "loss": 1.4664, "step": 24450 }, { "epoch": 6.066092132184265, "grad_norm": 0.9668002128601074, "learning_rate": 6.711922198896962e-05, "loss": 1.4753, "step": 24460 }, { "epoch": 6.068572137144274, "grad_norm": 1.0058457851409912, "learning_rate": 6.704564789346873e-05, "loss": 1.5021, "step": 24470 }, { "epoch": 6.071052142104284, "grad_norm": 0.9895146489143372, "learning_rate": 6.697209380448333e-05, "loss": 1.4896, "step": 24480 }, { "epoch": 6.073532147064294, "grad_norm": 1.07613205909729, "learning_rate": 6.689855976666788e-05, "loss": 1.4871, "step": 24490 }, { "epoch": 6.076012152024304, "grad_norm": 1.0612536668777466, "learning_rate": 6.682504582466482e-05, "loss": 1.5251, "step": 24500 }, { "epoch": 6.078492156984314, "grad_norm": 0.9371278285980225, "learning_rate": 6.675155202310424e-05, "loss": 1.4509, "step": 24510 }, { "epoch": 6.080972161944324, "grad_norm": 0.9730448126792908, "learning_rate": 6.66780784066041e-05, "loss": 1.4651, "step": 24520 }, { "epoch": 6.083452166904334, "grad_norm": 1.00874662399292, "learning_rate": 6.660462501977004e-05, "loss": 1.4882, "step": 24530 }, { "epoch": 6.085932171864344, "grad_norm": 1.0154139995574951, "learning_rate": 6.653119190719554e-05, "loss": 1.4462, "step": 24540 }, { "epoch": 6.088412176824353, "grad_norm": 1.04769766330719, "learning_rate": 6.645777911346155e-05, "loss": 1.435, "step": 24550 }, { "epoch": 6.090892181784364, "grad_norm": 1.0066906213760376, "learning_rate": 6.638438668313695e-05, "loss": 1.4698, "step": 24560 }, { "epoch": 6.093372186744373, "grad_norm": 1.0486011505126953, "learning_rate": 6.6311014660778e-05, "loss": 1.4811, "step": 24570 }, { "epoch": 6.095852191704384, "grad_norm": 1.110256552696228, "learning_rate": 6.623766309092879e-05, "loss": 1.4431, "step": 24580 }, { "epoch": 6.098332196664393, "grad_norm": 1.0279182195663452, "learning_rate": 6.616433201812085e-05, "loss": 1.4469, "step": 24590 }, { "epoch": 6.100812201624403, "grad_norm": 1.0100728273391724, "learning_rate": 6.609102148687333e-05, "loss": 1.4593, "step": 24600 }, { "epoch": 6.103292206584413, "grad_norm": 0.9717130661010742, "learning_rate": 6.60177315416929e-05, "loss": 1.4699, "step": 24610 }, { "epoch": 6.105772211544423, "grad_norm": 1.0001333951950073, "learning_rate": 6.59444622270737e-05, "loss": 1.4175, "step": 24620 }, { "epoch": 6.108252216504433, "grad_norm": 0.9675756692886353, "learning_rate": 6.587121358749745e-05, "loss": 1.4331, "step": 24630 }, { "epoch": 6.110732221464443, "grad_norm": 0.9195937514305115, "learning_rate": 6.579798566743314e-05, "loss": 1.483, "step": 24640 }, { "epoch": 6.113212226424453, "grad_norm": 0.9295458197593689, "learning_rate": 6.572477851133733e-05, "loss": 1.4546, "step": 24650 }, { "epoch": 6.115692231384463, "grad_norm": 1.1020594835281372, "learning_rate": 6.565159216365389e-05, "loss": 1.5096, "step": 24660 }, { "epoch": 6.118172236344472, "grad_norm": 0.9957802295684814, "learning_rate": 6.557842666881416e-05, "loss": 1.4511, "step": 24670 }, { "epoch": 6.120652241304483, "grad_norm": 1.0554035902023315, "learning_rate": 6.550528207123668e-05, "loss": 1.4479, "step": 24680 }, { "epoch": 6.123132246264492, "grad_norm": 0.9450856447219849, "learning_rate": 6.543215841532744e-05, "loss": 1.3555, "step": 24690 }, { "epoch": 6.125612251224503, "grad_norm": 0.9754582643508911, "learning_rate": 6.535905574547956e-05, "loss": 1.5365, "step": 24700 }, { "epoch": 6.128092256184512, "grad_norm": 0.9962130784988403, "learning_rate": 6.528597410607364e-05, "loss": 1.5361, "step": 24710 }, { "epoch": 6.130572261144522, "grad_norm": 1.0566751956939697, "learning_rate": 6.521291354147727e-05, "loss": 1.4661, "step": 24720 }, { "epoch": 6.133052266104532, "grad_norm": 1.0275439023971558, "learning_rate": 6.513987409604545e-05, "loss": 1.42, "step": 24730 }, { "epoch": 6.135532271064542, "grad_norm": 1.0249825716018677, "learning_rate": 6.506685581412018e-05, "loss": 1.4582, "step": 24740 }, { "epoch": 6.138012276024552, "grad_norm": 0.9934401512145996, "learning_rate": 6.499385874003076e-05, "loss": 1.436, "step": 24750 }, { "epoch": 6.140492280984562, "grad_norm": 0.9783113598823547, "learning_rate": 6.492088291809354e-05, "loss": 1.5651, "step": 24760 }, { "epoch": 6.1429722859445715, "grad_norm": 0.9646732807159424, "learning_rate": 6.484792839261203e-05, "loss": 1.4806, "step": 24770 }, { "epoch": 6.145452290904582, "grad_norm": 0.9748082756996155, "learning_rate": 6.477499520787665e-05, "loss": 1.4407, "step": 24780 }, { "epoch": 6.1479322958645914, "grad_norm": 0.9589222073554993, "learning_rate": 6.470208340816508e-05, "loss": 1.4854, "step": 24790 }, { "epoch": 6.150412300824602, "grad_norm": 1.0084720849990845, "learning_rate": 6.462919303774186e-05, "loss": 1.4739, "step": 24800 }, { "epoch": 6.152892305784611, "grad_norm": 1.0057191848754883, "learning_rate": 6.455632414085861e-05, "loss": 1.4447, "step": 24810 }, { "epoch": 6.155372310744622, "grad_norm": 0.9664227366447449, "learning_rate": 6.448347676175384e-05, "loss": 1.5124, "step": 24820 }, { "epoch": 6.157852315704631, "grad_norm": 1.0164328813552856, "learning_rate": 6.441065094465311e-05, "loss": 1.5045, "step": 24830 }, { "epoch": 6.160332320664641, "grad_norm": 1.020713448524475, "learning_rate": 6.43378467337687e-05, "loss": 1.4937, "step": 24840 }, { "epoch": 6.162812325624651, "grad_norm": 0.9904918074607849, "learning_rate": 6.42650641733e-05, "loss": 1.4852, "step": 24850 }, { "epoch": 6.165292330584661, "grad_norm": 1.080602765083313, "learning_rate": 6.419230330743305e-05, "loss": 1.4141, "step": 24860 }, { "epoch": 6.167772335544671, "grad_norm": 1.0138860940933228, "learning_rate": 6.41195641803409e-05, "loss": 1.5157, "step": 24870 }, { "epoch": 6.170252340504681, "grad_norm": 0.9689585566520691, "learning_rate": 6.404684683618325e-05, "loss": 1.5049, "step": 24880 }, { "epoch": 6.1727323454646905, "grad_norm": 1.0130665302276611, "learning_rate": 6.397415131910671e-05, "loss": 1.4695, "step": 24890 }, { "epoch": 6.175212350424701, "grad_norm": 0.97395259141922, "learning_rate": 6.390147767324445e-05, "loss": 1.4501, "step": 24900 }, { "epoch": 6.1776923553847105, "grad_norm": 0.9632650017738342, "learning_rate": 6.382882594271663e-05, "loss": 1.4742, "step": 24910 }, { "epoch": 6.180172360344721, "grad_norm": 1.001616358757019, "learning_rate": 6.375619617162985e-05, "loss": 1.4871, "step": 24920 }, { "epoch": 6.1826523653047305, "grad_norm": 1.0696035623550415, "learning_rate": 6.368358840407753e-05, "loss": 1.5157, "step": 24930 }, { "epoch": 6.185132370264741, "grad_norm": 0.9896767735481262, "learning_rate": 6.361100268413967e-05, "loss": 1.3934, "step": 24940 }, { "epoch": 6.1876123752247505, "grad_norm": 0.9482393264770508, "learning_rate": 6.353843905588295e-05, "loss": 1.467, "step": 24950 }, { "epoch": 6.19009238018476, "grad_norm": 0.9930483102798462, "learning_rate": 6.34658975633605e-05, "loss": 1.4932, "step": 24960 }, { "epoch": 6.1925723851447705, "grad_norm": 1.052764654159546, "learning_rate": 6.33933782506122e-05, "loss": 1.4872, "step": 24970 }, { "epoch": 6.19505239010478, "grad_norm": 1.0381749868392944, "learning_rate": 6.33208811616643e-05, "loss": 1.4503, "step": 24980 }, { "epoch": 6.1975323950647905, "grad_norm": 1.0356924533843994, "learning_rate": 6.324840634052967e-05, "loss": 1.4349, "step": 24990 }, { "epoch": 6.2000124000248, "grad_norm": 0.9789268970489502, "learning_rate": 6.317595383120756e-05, "loss": 1.5251, "step": 25000 }, { "epoch": 6.20249240498481, "grad_norm": 0.9749928712844849, "learning_rate": 6.31035236776838e-05, "loss": 1.4467, "step": 25010 }, { "epoch": 6.20497240994482, "grad_norm": 0.978130042552948, "learning_rate": 6.30311159239305e-05, "loss": 1.5107, "step": 25020 }, { "epoch": 6.20745241490483, "grad_norm": 1.008919358253479, "learning_rate": 6.295873061390636e-05, "loss": 1.4652, "step": 25030 }, { "epoch": 6.20993241986484, "grad_norm": 0.9471341967582703, "learning_rate": 6.288636779155621e-05, "loss": 1.4217, "step": 25040 }, { "epoch": 6.21241242482485, "grad_norm": 1.0002670288085938, "learning_rate": 6.281402750081147e-05, "loss": 1.5057, "step": 25050 }, { "epoch": 6.214892429784859, "grad_norm": 0.9456087946891785, "learning_rate": 6.27417097855897e-05, "loss": 1.4608, "step": 25060 }, { "epoch": 6.21737243474487, "grad_norm": 0.9659121632575989, "learning_rate": 6.26694146897949e-05, "loss": 1.4419, "step": 25070 }, { "epoch": 6.219852439704879, "grad_norm": 0.9993132948875427, "learning_rate": 6.259714225731718e-05, "loss": 1.4938, "step": 25080 }, { "epoch": 6.2223324446648896, "grad_norm": 0.9439984560012817, "learning_rate": 6.252489253203309e-05, "loss": 1.4579, "step": 25090 }, { "epoch": 6.224812449624899, "grad_norm": 1.018552303314209, "learning_rate": 6.245266555780513e-05, "loss": 1.4627, "step": 25100 }, { "epoch": 6.2272924545849095, "grad_norm": 0.9674966931343079, "learning_rate": 6.23804613784823e-05, "loss": 1.4592, "step": 25110 }, { "epoch": 6.229772459544919, "grad_norm": 1.0720467567443848, "learning_rate": 6.230828003789949e-05, "loss": 1.518, "step": 25120 }, { "epoch": 6.232252464504929, "grad_norm": 0.9193775653839111, "learning_rate": 6.223612157987786e-05, "loss": 1.4278, "step": 25130 }, { "epoch": 6.234732469464939, "grad_norm": 1.0076197385787964, "learning_rate": 6.216398604822466e-05, "loss": 1.4888, "step": 25140 }, { "epoch": 6.237212474424949, "grad_norm": 0.9544000625610352, "learning_rate": 6.209187348673325e-05, "loss": 1.4576, "step": 25150 }, { "epoch": 6.239692479384959, "grad_norm": 0.9171335101127625, "learning_rate": 6.20197839391829e-05, "loss": 1.4899, "step": 25160 }, { "epoch": 6.242172484344969, "grad_norm": 1.0323432683944702, "learning_rate": 6.194771744933916e-05, "loss": 1.5335, "step": 25170 }, { "epoch": 6.244652489304978, "grad_norm": 0.9690384864807129, "learning_rate": 6.187567406095333e-05, "loss": 1.4252, "step": 25180 }, { "epoch": 6.247132494264989, "grad_norm": 1.0386059284210205, "learning_rate": 6.180365381776285e-05, "loss": 1.5064, "step": 25190 }, { "epoch": 6.249612499224998, "grad_norm": 0.9646037220954895, "learning_rate": 6.173165676349103e-05, "loss": 1.3969, "step": 25200 }, { "epoch": 6.252092504185009, "grad_norm": 1.0465009212493896, "learning_rate": 6.165968294184713e-05, "loss": 1.4816, "step": 25210 }, { "epoch": 6.254572509145018, "grad_norm": 1.0445176362991333, "learning_rate": 6.158773239652633e-05, "loss": 1.4505, "step": 25220 }, { "epoch": 6.257052514105029, "grad_norm": 0.9644328951835632, "learning_rate": 6.151580517120958e-05, "loss": 1.5001, "step": 25230 }, { "epoch": 6.259532519065038, "grad_norm": 1.0155954360961914, "learning_rate": 6.144390130956384e-05, "loss": 1.4321, "step": 25240 }, { "epoch": 6.262012524025048, "grad_norm": 0.985386848449707, "learning_rate": 6.137202085524167e-05, "loss": 1.5258, "step": 25250 }, { "epoch": 6.264492528985058, "grad_norm": 0.9713849425315857, "learning_rate": 6.130016385188166e-05, "loss": 1.5211, "step": 25260 }, { "epoch": 6.266972533945068, "grad_norm": 1.054305911064148, "learning_rate": 6.122833034310793e-05, "loss": 1.4022, "step": 25270 }, { "epoch": 6.269452538905078, "grad_norm": 0.8938084244728088, "learning_rate": 6.115652037253053e-05, "loss": 1.395, "step": 25280 }, { "epoch": 6.271932543865088, "grad_norm": 0.9827483892440796, "learning_rate": 6.10847339837451e-05, "loss": 1.5041, "step": 25290 }, { "epoch": 6.274412548825097, "grad_norm": 1.0114959478378296, "learning_rate": 6.1012971220333016e-05, "loss": 1.4406, "step": 25300 }, { "epoch": 6.276892553785108, "grad_norm": 0.9963436126708984, "learning_rate": 6.0941232125861235e-05, "loss": 1.4858, "step": 25310 }, { "epoch": 6.279372558745117, "grad_norm": 0.9555656313896179, "learning_rate": 6.0869516743882516e-05, "loss": 1.4382, "step": 25320 }, { "epoch": 6.281852563705128, "grad_norm": 0.9409336447715759, "learning_rate": 6.079782511793502e-05, "loss": 1.4517, "step": 25330 }, { "epoch": 6.284332568665137, "grad_norm": 0.9207855463027954, "learning_rate": 6.0726157291542605e-05, "loss": 1.4633, "step": 25340 }, { "epoch": 6.286812573625147, "grad_norm": 1.043115496635437, "learning_rate": 6.0654513308214645e-05, "loss": 1.4632, "step": 25350 }, { "epoch": 6.289292578585157, "grad_norm": 0.9693536758422852, "learning_rate": 6.058289321144608e-05, "loss": 1.4656, "step": 25360 }, { "epoch": 6.291772583545167, "grad_norm": 1.041846752166748, "learning_rate": 6.0511297044717264e-05, "loss": 1.5087, "step": 25370 }, { "epoch": 6.294252588505177, "grad_norm": 0.9643771052360535, "learning_rate": 6.043972485149414e-05, "loss": 1.4855, "step": 25380 }, { "epoch": 6.296732593465187, "grad_norm": 1.0329574346542358, "learning_rate": 6.0368176675227964e-05, "loss": 1.4842, "step": 25390 }, { "epoch": 6.299212598425197, "grad_norm": 1.0316472053527832, "learning_rate": 6.02966525593555e-05, "loss": 1.5037, "step": 25400 }, { "epoch": 6.301692603385207, "grad_norm": 0.9890727400779724, "learning_rate": 6.02251525472989e-05, "loss": 1.4633, "step": 25410 }, { "epoch": 6.304172608345216, "grad_norm": 0.9751850366592407, "learning_rate": 6.0153676682465654e-05, "loss": 1.4507, "step": 25420 }, { "epoch": 6.306652613305227, "grad_norm": 0.8939128518104553, "learning_rate": 6.00822250082486e-05, "loss": 1.4735, "step": 25430 }, { "epoch": 6.309132618265236, "grad_norm": 0.9928026795387268, "learning_rate": 6.001079756802592e-05, "loss": 1.4833, "step": 25440 }, { "epoch": 6.311612623225247, "grad_norm": 1.0220106840133667, "learning_rate": 5.9939394405160986e-05, "loss": 1.4725, "step": 25450 }, { "epoch": 6.314092628185256, "grad_norm": 1.026551604270935, "learning_rate": 5.9868015563002625e-05, "loss": 1.4409, "step": 25460 }, { "epoch": 6.316572633145267, "grad_norm": 0.993131697177887, "learning_rate": 5.9796661084884644e-05, "loss": 1.5148, "step": 25470 }, { "epoch": 6.319052638105276, "grad_norm": 1.0696313381195068, "learning_rate": 5.9725331014126294e-05, "loss": 1.477, "step": 25480 }, { "epoch": 6.321532643065286, "grad_norm": 0.9706017374992371, "learning_rate": 5.965402539403182e-05, "loss": 1.5219, "step": 25490 }, { "epoch": 6.324012648025296, "grad_norm": 1.0308212041854858, "learning_rate": 5.9582744267890814e-05, "loss": 1.5055, "step": 25500 }, { "epoch": 6.326492652985306, "grad_norm": 0.9662330746650696, "learning_rate": 5.951148767897776e-05, "loss": 1.4692, "step": 25510 }, { "epoch": 6.328972657945316, "grad_norm": 1.0245823860168457, "learning_rate": 5.944025567055251e-05, "loss": 1.4989, "step": 25520 }, { "epoch": 6.331452662905326, "grad_norm": 0.990571141242981, "learning_rate": 5.936904828585976e-05, "loss": 1.4467, "step": 25530 }, { "epoch": 6.333932667865335, "grad_norm": 1.0541971921920776, "learning_rate": 5.929786556812943e-05, "loss": 1.4546, "step": 25540 }, { "epoch": 6.336412672825346, "grad_norm": 0.9833094477653503, "learning_rate": 5.922670756057633e-05, "loss": 1.4629, "step": 25550 }, { "epoch": 6.338892677785355, "grad_norm": 0.9783018231391907, "learning_rate": 5.9155574306400395e-05, "loss": 1.4717, "step": 25560 }, { "epoch": 6.341372682745366, "grad_norm": 0.9881133437156677, "learning_rate": 5.908446584878642e-05, "loss": 1.4355, "step": 25570 }, { "epoch": 6.343852687705375, "grad_norm": 1.0198750495910645, "learning_rate": 5.901338223090425e-05, "loss": 1.4109, "step": 25580 }, { "epoch": 6.346332692665385, "grad_norm": 1.0592362880706787, "learning_rate": 5.89423234959085e-05, "loss": 1.499, "step": 25590 }, { "epoch": 6.348812697625395, "grad_norm": 1.0093663930892944, "learning_rate": 5.887128968693887e-05, "loss": 1.4483, "step": 25600 }, { "epoch": 6.351292702585405, "grad_norm": 1.007263422012329, "learning_rate": 5.880028084711976e-05, "loss": 1.4667, "step": 25610 }, { "epoch": 6.353772707545415, "grad_norm": 1.0381349325180054, "learning_rate": 5.872929701956054e-05, "loss": 1.5031, "step": 25620 }, { "epoch": 6.356252712505425, "grad_norm": 1.0256422758102417, "learning_rate": 5.8658338247355296e-05, "loss": 1.5182, "step": 25630 }, { "epoch": 6.358732717465435, "grad_norm": 1.0254257917404175, "learning_rate": 5.858740457358298e-05, "loss": 1.4856, "step": 25640 }, { "epoch": 6.361212722425445, "grad_norm": 0.9955101013183594, "learning_rate": 5.851649604130723e-05, "loss": 1.444, "step": 25650 }, { "epoch": 6.363692727385454, "grad_norm": 1.0298657417297363, "learning_rate": 5.8445612693576504e-05, "loss": 1.5016, "step": 25660 }, { "epoch": 6.366172732345465, "grad_norm": 1.0355310440063477, "learning_rate": 5.837475457342385e-05, "loss": 1.4489, "step": 25670 }, { "epoch": 6.368652737305474, "grad_norm": 0.996111273765564, "learning_rate": 5.830392172386723e-05, "loss": 1.3925, "step": 25680 }, { "epoch": 6.371132742265485, "grad_norm": 0.9839708805084229, "learning_rate": 5.8233114187908935e-05, "loss": 1.4616, "step": 25690 }, { "epoch": 6.373612747225494, "grad_norm": 1.061486840248108, "learning_rate": 5.816233200853618e-05, "loss": 1.4766, "step": 25700 }, { "epoch": 6.376092752185504, "grad_norm": 0.9838336110115051, "learning_rate": 5.809157522872065e-05, "loss": 1.4451, "step": 25710 }, { "epoch": 6.378572757145514, "grad_norm": 0.9915831089019775, "learning_rate": 5.802084389141862e-05, "loss": 1.5084, "step": 25720 }, { "epoch": 6.381052762105524, "grad_norm": 1.028008222579956, "learning_rate": 5.79501380395709e-05, "loss": 1.4982, "step": 25730 }, { "epoch": 6.383532767065534, "grad_norm": 1.0214972496032715, "learning_rate": 5.787945771610296e-05, "loss": 1.5264, "step": 25740 }, { "epoch": 6.386012772025544, "grad_norm": 1.0997072458267212, "learning_rate": 5.7808802963924524e-05, "loss": 1.4507, "step": 25750 }, { "epoch": 6.388492776985554, "grad_norm": 1.0471376180648804, "learning_rate": 5.773817382593008e-05, "loss": 1.4611, "step": 25760 }, { "epoch": 6.390972781945564, "grad_norm": 0.9893296957015991, "learning_rate": 5.7667570344998343e-05, "loss": 1.5209, "step": 25770 }, { "epoch": 6.3934527869055735, "grad_norm": 0.9793508052825928, "learning_rate": 5.759699256399255e-05, "loss": 1.5272, "step": 25780 }, { "epoch": 6.395932791865584, "grad_norm": 1.0266550779342651, "learning_rate": 5.7526440525760284e-05, "loss": 1.5027, "step": 25790 }, { "epoch": 6.3984127968255935, "grad_norm": 0.9966225624084473, "learning_rate": 5.7455914273133646e-05, "loss": 1.4147, "step": 25800 }, { "epoch": 6.400892801785604, "grad_norm": 0.9305916428565979, "learning_rate": 5.7385413848928814e-05, "loss": 1.4721, "step": 25810 }, { "epoch": 6.4033728067456135, "grad_norm": 0.9645832777023315, "learning_rate": 5.731493929594658e-05, "loss": 1.4901, "step": 25820 }, { "epoch": 6.405852811705623, "grad_norm": 1.1395509243011475, "learning_rate": 5.7244490656971815e-05, "loss": 1.5164, "step": 25830 }, { "epoch": 6.4083328166656335, "grad_norm": 1.0750949382781982, "learning_rate": 5.717406797477372e-05, "loss": 1.4958, "step": 25840 }, { "epoch": 6.410812821625643, "grad_norm": 0.987261950969696, "learning_rate": 5.710367129210583e-05, "loss": 1.4071, "step": 25850 }, { "epoch": 6.4132928265856535, "grad_norm": 1.0140446424484253, "learning_rate": 5.703330065170577e-05, "loss": 1.4673, "step": 25860 }, { "epoch": 6.415772831545663, "grad_norm": 0.9446235299110413, "learning_rate": 5.6962956096295404e-05, "loss": 1.512, "step": 25870 }, { "epoch": 6.418252836505673, "grad_norm": 1.0067250728607178, "learning_rate": 5.6892637668580716e-05, "loss": 1.5007, "step": 25880 }, { "epoch": 6.420732841465683, "grad_norm": 0.998441219329834, "learning_rate": 5.6822345411251995e-05, "loss": 1.4786, "step": 25890 }, { "epoch": 6.423212846425693, "grad_norm": 1.0318422317504883, "learning_rate": 5.675207936698337e-05, "loss": 1.4718, "step": 25900 }, { "epoch": 6.425692851385703, "grad_norm": 1.0681356191635132, "learning_rate": 5.6681839578433314e-05, "loss": 1.4734, "step": 25910 }, { "epoch": 6.428172856345713, "grad_norm": 1.0494177341461182, "learning_rate": 5.6611626088244194e-05, "loss": 1.4677, "step": 25920 }, { "epoch": 6.430652861305723, "grad_norm": 0.9486759901046753, "learning_rate": 5.654143893904249e-05, "loss": 1.4856, "step": 25930 }, { "epoch": 6.4331328662657326, "grad_norm": 1.0628020763397217, "learning_rate": 5.647127817343861e-05, "loss": 1.4124, "step": 25940 }, { "epoch": 6.435612871225742, "grad_norm": 0.9767960906028748, "learning_rate": 5.640114383402714e-05, "loss": 1.4999, "step": 25950 }, { "epoch": 6.4380928761857525, "grad_norm": 0.9298993945121765, "learning_rate": 5.6331035963386315e-05, "loss": 1.445, "step": 25960 }, { "epoch": 6.440572881145762, "grad_norm": 1.0534563064575195, "learning_rate": 5.6260954604078585e-05, "loss": 1.5237, "step": 25970 }, { "epoch": 6.4430528861057725, "grad_norm": 0.9476912617683411, "learning_rate": 5.6190899798650156e-05, "loss": 1.4964, "step": 25980 }, { "epoch": 6.445532891065782, "grad_norm": 1.005138635635376, "learning_rate": 5.6120871589631155e-05, "loss": 1.4278, "step": 25990 }, { "epoch": 6.448012896025792, "grad_norm": 1.0275954008102417, "learning_rate": 5.6050870019535494e-05, "loss": 1.4616, "step": 26000 }, { "epoch": 6.450492900985802, "grad_norm": 1.0758296251296997, "learning_rate": 5.598089513086108e-05, "loss": 1.4958, "step": 26010 }, { "epoch": 6.452972905945812, "grad_norm": 0.9819092154502869, "learning_rate": 5.591094696608946e-05, "loss": 1.5186, "step": 26020 }, { "epoch": 6.455452910905822, "grad_norm": 1.0485687255859375, "learning_rate": 5.5841025567686026e-05, "loss": 1.4086, "step": 26030 }, { "epoch": 6.457932915865832, "grad_norm": 1.0583301782608032, "learning_rate": 5.577113097809989e-05, "loss": 1.5041, "step": 26040 }, { "epoch": 6.460412920825842, "grad_norm": 1.018678069114685, "learning_rate": 5.570126323976391e-05, "loss": 1.4579, "step": 26050 }, { "epoch": 6.462892925785852, "grad_norm": 1.063525915145874, "learning_rate": 5.563142239509461e-05, "loss": 1.477, "step": 26060 }, { "epoch": 6.465372930745861, "grad_norm": 0.9450671076774597, "learning_rate": 5.556160848649228e-05, "loss": 1.5033, "step": 26070 }, { "epoch": 6.467852935705872, "grad_norm": 1.0004218816757202, "learning_rate": 5.549182155634076e-05, "loss": 1.4485, "step": 26080 }, { "epoch": 6.470332940665881, "grad_norm": 1.0630313158035278, "learning_rate": 5.542206164700754e-05, "loss": 1.4359, "step": 26090 }, { "epoch": 6.472812945625892, "grad_norm": 1.0103967189788818, "learning_rate": 5.5352328800843724e-05, "loss": 1.4517, "step": 26100 }, { "epoch": 6.475292950585901, "grad_norm": 1.0282511711120605, "learning_rate": 5.528262306018395e-05, "loss": 1.4638, "step": 26110 }, { "epoch": 6.477772955545911, "grad_norm": 1.0227611064910889, "learning_rate": 5.521294446734637e-05, "loss": 1.4756, "step": 26120 }, { "epoch": 6.480252960505921, "grad_norm": 0.9566037058830261, "learning_rate": 5.514329306463284e-05, "loss": 1.4416, "step": 26130 }, { "epoch": 6.482732965465931, "grad_norm": 1.0739294290542603, "learning_rate": 5.5073668894328475e-05, "loss": 1.5094, "step": 26140 }, { "epoch": 6.485212970425941, "grad_norm": 0.9938433766365051, "learning_rate": 5.500407199870199e-05, "loss": 1.4879, "step": 26150 }, { "epoch": 6.487692975385951, "grad_norm": 0.9702029228210449, "learning_rate": 5.493450242000546e-05, "loss": 1.5264, "step": 26160 }, { "epoch": 6.49017298034596, "grad_norm": 0.9910847544670105, "learning_rate": 5.486496020047456e-05, "loss": 1.4107, "step": 26170 }, { "epoch": 6.492652985305971, "grad_norm": 0.9760767817497253, "learning_rate": 5.4795445382328037e-05, "loss": 1.4871, "step": 26180 }, { "epoch": 6.49513299026598, "grad_norm": 0.9848397374153137, "learning_rate": 5.4725958007768304e-05, "loss": 1.4719, "step": 26190 }, { "epoch": 6.497612995225991, "grad_norm": 0.9554773569107056, "learning_rate": 5.465649811898098e-05, "loss": 1.4414, "step": 26200 }, { "epoch": 6.500093000186, "grad_norm": 0.9741032719612122, "learning_rate": 5.458706575813499e-05, "loss": 1.4004, "step": 26210 }, { "epoch": 6.502573005146011, "grad_norm": 0.9804884791374207, "learning_rate": 5.451766096738256e-05, "loss": 1.4557, "step": 26220 }, { "epoch": 6.50505301010602, "grad_norm": 0.9539157152175903, "learning_rate": 5.4448283788859265e-05, "loss": 1.4847, "step": 26230 }, { "epoch": 6.50753301506603, "grad_norm": 1.0004538297653198, "learning_rate": 5.43789342646837e-05, "loss": 1.4715, "step": 26240 }, { "epoch": 6.51001302002604, "grad_norm": 0.9888569712638855, "learning_rate": 5.4309612436957937e-05, "loss": 1.4583, "step": 26250 }, { "epoch": 6.51249302498605, "grad_norm": 1.0565658807754517, "learning_rate": 5.424031834776706e-05, "loss": 1.5336, "step": 26260 }, { "epoch": 6.51497302994606, "grad_norm": 0.9787483215332031, "learning_rate": 5.417105203917936e-05, "loss": 1.4092, "step": 26270 }, { "epoch": 6.51745303490607, "grad_norm": 0.95024573802948, "learning_rate": 5.410181355324622e-05, "loss": 1.408, "step": 26280 }, { "epoch": 6.51993303986608, "grad_norm": 1.0401235818862915, "learning_rate": 5.40326029320023e-05, "loss": 1.5157, "step": 26290 }, { "epoch": 6.52241304482609, "grad_norm": 0.9956122040748596, "learning_rate": 5.396342021746506e-05, "loss": 1.5031, "step": 26300 }, { "epoch": 6.524893049786099, "grad_norm": 1.0054196119308472, "learning_rate": 5.389426545163527e-05, "loss": 1.5093, "step": 26310 }, { "epoch": 6.52737305474611, "grad_norm": 0.9857822060585022, "learning_rate": 5.382513867649663e-05, "loss": 1.5204, "step": 26320 }, { "epoch": 6.529853059706119, "grad_norm": 1.0408052206039429, "learning_rate": 5.3756039934015836e-05, "loss": 1.4885, "step": 26330 }, { "epoch": 6.53233306466613, "grad_norm": 0.9730471968650818, "learning_rate": 5.368696926614255e-05, "loss": 1.4119, "step": 26340 }, { "epoch": 6.534813069626139, "grad_norm": 0.949488639831543, "learning_rate": 5.3617926714809544e-05, "loss": 1.4324, "step": 26350 }, { "epoch": 6.537293074586149, "grad_norm": 0.9932969808578491, "learning_rate": 5.3548912321932244e-05, "loss": 1.4441, "step": 26360 }, { "epoch": 6.539773079546159, "grad_norm": 1.0820375680923462, "learning_rate": 5.347992612940926e-05, "loss": 1.4928, "step": 26370 }, { "epoch": 6.542253084506169, "grad_norm": 1.0019023418426514, "learning_rate": 5.34109681791219e-05, "loss": 1.5409, "step": 26380 }, { "epoch": 6.544733089466179, "grad_norm": 1.0442291498184204, "learning_rate": 5.3342038512934424e-05, "loss": 1.4661, "step": 26390 }, { "epoch": 6.547213094426189, "grad_norm": 1.1074390411376953, "learning_rate": 5.32731371726938e-05, "loss": 1.4568, "step": 26400 }, { "epoch": 6.549693099386198, "grad_norm": 0.9921213984489441, "learning_rate": 5.320426420023001e-05, "loss": 1.4836, "step": 26410 }, { "epoch": 6.552173104346209, "grad_norm": 1.0667870044708252, "learning_rate": 5.313541963735556e-05, "loss": 1.4685, "step": 26420 }, { "epoch": 6.554653109306218, "grad_norm": 0.9324483871459961, "learning_rate": 5.3066603525865924e-05, "loss": 1.4437, "step": 26430 }, { "epoch": 6.557133114266229, "grad_norm": 1.0055015087127686, "learning_rate": 5.2997815907539164e-05, "loss": 1.4864, "step": 26440 }, { "epoch": 6.559613119226238, "grad_norm": 1.0197795629501343, "learning_rate": 5.292905682413612e-05, "loss": 1.4771, "step": 26450 }, { "epoch": 6.562093124186248, "grad_norm": 1.0034635066986084, "learning_rate": 5.286032631740023e-05, "loss": 1.5043, "step": 26460 }, { "epoch": 6.564573129146258, "grad_norm": 1.0399538278579712, "learning_rate": 5.279162442905772e-05, "loss": 1.4874, "step": 26470 }, { "epoch": 6.567053134106268, "grad_norm": 1.0726194381713867, "learning_rate": 5.272295120081732e-05, "loss": 1.4854, "step": 26480 }, { "epoch": 6.569533139066278, "grad_norm": 0.9627480506896973, "learning_rate": 5.265430667437033e-05, "loss": 1.4264, "step": 26490 }, { "epoch": 6.572013144026288, "grad_norm": 0.9933289885520935, "learning_rate": 5.258569089139085e-05, "loss": 1.4828, "step": 26500 }, { "epoch": 6.574493148986298, "grad_norm": 1.0292378664016724, "learning_rate": 5.251710389353519e-05, "loss": 1.5436, "step": 26510 }, { "epoch": 6.576973153946308, "grad_norm": 0.9901852011680603, "learning_rate": 5.2448545722442486e-05, "loss": 1.4329, "step": 26520 }, { "epoch": 6.579453158906317, "grad_norm": 1.0028679370880127, "learning_rate": 5.238001641973422e-05, "loss": 1.4741, "step": 26530 }, { "epoch": 6.581933163866328, "grad_norm": 1.047088384628296, "learning_rate": 5.2311516027014394e-05, "loss": 1.4756, "step": 26540 }, { "epoch": 6.584413168826337, "grad_norm": 1.0282377004623413, "learning_rate": 5.2243044585869394e-05, "loss": 1.4281, "step": 26550 }, { "epoch": 6.586893173786348, "grad_norm": 0.9611074924468994, "learning_rate": 5.217460213786821e-05, "loss": 1.4849, "step": 26560 }, { "epoch": 6.589373178746357, "grad_norm": 0.9675851464271545, "learning_rate": 5.2106188724561944e-05, "loss": 1.4897, "step": 26570 }, { "epoch": 6.591853183706368, "grad_norm": 1.0171164274215698, "learning_rate": 5.203780438748433e-05, "loss": 1.4731, "step": 26580 }, { "epoch": 6.594333188666377, "grad_norm": 0.9999147653579712, "learning_rate": 5.196944916815134e-05, "loss": 1.5018, "step": 26590 }, { "epoch": 6.596813193626387, "grad_norm": 1.0138448476791382, "learning_rate": 5.190112310806126e-05, "loss": 1.5076, "step": 26600 }, { "epoch": 6.599293198586397, "grad_norm": 0.9509438872337341, "learning_rate": 5.1832826248694654e-05, "loss": 1.4431, "step": 26610 }, { "epoch": 6.601773203546407, "grad_norm": 0.9981142282485962, "learning_rate": 5.176455863151448e-05, "loss": 1.4173, "step": 26620 }, { "epoch": 6.604253208506417, "grad_norm": 0.9768161177635193, "learning_rate": 5.1696320297965825e-05, "loss": 1.4908, "step": 26630 }, { "epoch": 6.606733213466427, "grad_norm": 1.0115209817886353, "learning_rate": 5.162811128947602e-05, "loss": 1.5331, "step": 26640 }, { "epoch": 6.6092132184264365, "grad_norm": 1.0393757820129395, "learning_rate": 5.155993164745463e-05, "loss": 1.4673, "step": 26650 }, { "epoch": 6.611693223386447, "grad_norm": 1.0088094472885132, "learning_rate": 5.149178141329334e-05, "loss": 1.4769, "step": 26660 }, { "epoch": 6.6141732283464565, "grad_norm": 1.0713369846343994, "learning_rate": 5.142366062836599e-05, "loss": 1.4762, "step": 26670 }, { "epoch": 6.616653233306467, "grad_norm": 0.9907805919647217, "learning_rate": 5.135556933402862e-05, "loss": 1.5143, "step": 26680 }, { "epoch": 6.6191332382664765, "grad_norm": 1.0409246683120728, "learning_rate": 5.1287507571619275e-05, "loss": 1.4116, "step": 26690 }, { "epoch": 6.621613243226486, "grad_norm": 1.1151947975158691, "learning_rate": 5.12194753824581e-05, "loss": 1.4693, "step": 26700 }, { "epoch": 6.6240932481864965, "grad_norm": 1.0386748313903809, "learning_rate": 5.115147280784727e-05, "loss": 1.5714, "step": 26710 }, { "epoch": 6.626573253146506, "grad_norm": 1.0036121606826782, "learning_rate": 5.108349988907111e-05, "loss": 1.505, "step": 26720 }, { "epoch": 6.6290532581065165, "grad_norm": 1.053237795829773, "learning_rate": 5.1015556667395636e-05, "loss": 1.475, "step": 26730 }, { "epoch": 6.631533263066526, "grad_norm": 0.996638834476471, "learning_rate": 5.09476431840692e-05, "loss": 1.5783, "step": 26740 }, { "epoch": 6.634013268026536, "grad_norm": 0.9656083583831787, "learning_rate": 5.0879759480321866e-05, "loss": 1.4629, "step": 26750 }, { "epoch": 6.636493272986546, "grad_norm": 0.9791948199272156, "learning_rate": 5.0811905597365684e-05, "loss": 1.4609, "step": 26760 }, { "epoch": 6.638973277946556, "grad_norm": 1.0298975706100464, "learning_rate": 5.0744081576394566e-05, "loss": 1.4529, "step": 26770 }, { "epoch": 6.641453282906566, "grad_norm": 1.0415596961975098, "learning_rate": 5.0676287458584436e-05, "loss": 1.4407, "step": 26780 }, { "epoch": 6.6439332878665756, "grad_norm": 1.0678547620773315, "learning_rate": 5.06085232850928e-05, "loss": 1.4811, "step": 26790 }, { "epoch": 6.646413292826586, "grad_norm": 0.9788858890533447, "learning_rate": 5.054078909705926e-05, "loss": 1.4804, "step": 26800 }, { "epoch": 6.6488932977865955, "grad_norm": 1.0442765951156616, "learning_rate": 5.047308493560506e-05, "loss": 1.5068, "step": 26810 }, { "epoch": 6.651373302746606, "grad_norm": 0.9830688238143921, "learning_rate": 5.0405410841833253e-05, "loss": 1.4581, "step": 26820 }, { "epoch": 6.6538533077066155, "grad_norm": 0.9420977234840393, "learning_rate": 5.033776685682858e-05, "loss": 1.4918, "step": 26830 }, { "epoch": 6.656333312666625, "grad_norm": 1.0370205640792847, "learning_rate": 5.0270153021657674e-05, "loss": 1.5327, "step": 26840 }, { "epoch": 6.6588133176266355, "grad_norm": 0.9369286894798279, "learning_rate": 5.0202569377368616e-05, "loss": 1.4392, "step": 26850 }, { "epoch": 6.661293322586645, "grad_norm": 0.9928011298179626, "learning_rate": 5.0135015964991373e-05, "loss": 1.453, "step": 26860 }, { "epoch": 6.6637733275466555, "grad_norm": 1.0058857202529907, "learning_rate": 5.0067492825537466e-05, "loss": 1.4793, "step": 26870 }, { "epoch": 6.666253332506665, "grad_norm": 0.9954456686973572, "learning_rate": 5.000000000000002e-05, "loss": 1.4629, "step": 26880 }, { "epoch": 6.668733337466675, "grad_norm": 0.9622682332992554, "learning_rate": 4.993253752935376e-05, "loss": 1.5008, "step": 26890 }, { "epoch": 6.671213342426685, "grad_norm": 1.0237582921981812, "learning_rate": 4.9865105454555113e-05, "loss": 1.5273, "step": 26900 }, { "epoch": 6.673693347386695, "grad_norm": 1.0076124668121338, "learning_rate": 4.979770381654181e-05, "loss": 1.4979, "step": 26910 }, { "epoch": 6.676173352346705, "grad_norm": 1.091383457183838, "learning_rate": 4.973033265623332e-05, "loss": 1.48, "step": 26920 }, { "epoch": 6.678653357306715, "grad_norm": 1.0162371397018433, "learning_rate": 4.966299201453052e-05, "loss": 1.4391, "step": 26930 }, { "epoch": 6.681133362266724, "grad_norm": 1.0265648365020752, "learning_rate": 4.959568193231575e-05, "loss": 1.468, "step": 26940 }, { "epoch": 6.683613367226735, "grad_norm": 1.0596145391464233, "learning_rate": 4.952840245045278e-05, "loss": 1.4158, "step": 26950 }, { "epoch": 6.686093372186744, "grad_norm": 1.010204792022705, "learning_rate": 4.946115360978696e-05, "loss": 1.4245, "step": 26960 }, { "epoch": 6.688573377146755, "grad_norm": 1.0146294832229614, "learning_rate": 4.939393545114475e-05, "loss": 1.4877, "step": 26970 }, { "epoch": 6.691053382106764, "grad_norm": 1.0546412467956543, "learning_rate": 4.9326748015334266e-05, "loss": 1.4588, "step": 26980 }, { "epoch": 6.693533387066774, "grad_norm": 0.9743219017982483, "learning_rate": 4.925959134314483e-05, "loss": 1.4712, "step": 26990 }, { "epoch": 6.696013392026784, "grad_norm": 1.0180968046188354, "learning_rate": 4.919246547534708e-05, "loss": 1.4652, "step": 27000 }, { "epoch": 6.698493396986794, "grad_norm": 1.0588595867156982, "learning_rate": 4.912537045269298e-05, "loss": 1.5079, "step": 27010 }, { "epoch": 6.700973401946804, "grad_norm": 0.9891775846481323, "learning_rate": 4.9058306315915826e-05, "loss": 1.4951, "step": 27020 }, { "epoch": 6.703453406906814, "grad_norm": 1.017204999923706, "learning_rate": 4.899127310573009e-05, "loss": 1.4805, "step": 27030 }, { "epoch": 6.705933411866823, "grad_norm": 1.0103859901428223, "learning_rate": 4.892427086283147e-05, "loss": 1.4185, "step": 27040 }, { "epoch": 6.708413416826834, "grad_norm": 0.9853695631027222, "learning_rate": 4.885729962789688e-05, "loss": 1.4916, "step": 27050 }, { "epoch": 6.710893421786843, "grad_norm": 0.973719596862793, "learning_rate": 4.879035944158443e-05, "loss": 1.5332, "step": 27060 }, { "epoch": 6.713373426746854, "grad_norm": 0.9500641822814941, "learning_rate": 4.872345034453332e-05, "loss": 1.4972, "step": 27070 }, { "epoch": 6.715853431706863, "grad_norm": 0.9965008497238159, "learning_rate": 4.8656572377363974e-05, "loss": 1.4828, "step": 27080 }, { "epoch": 6.718333436666874, "grad_norm": 1.011154294013977, "learning_rate": 4.8589725580677835e-05, "loss": 1.4871, "step": 27090 }, { "epoch": 6.720813441626883, "grad_norm": 1.0305109024047852, "learning_rate": 4.852290999505742e-05, "loss": 1.4241, "step": 27100 }, { "epoch": 6.723293446586894, "grad_norm": 1.0206142663955688, "learning_rate": 4.845612566106643e-05, "loss": 1.4975, "step": 27110 }, { "epoch": 6.725773451546903, "grad_norm": 1.0499778985977173, "learning_rate": 4.8389372619249326e-05, "loss": 1.4872, "step": 27120 }, { "epoch": 6.728253456506913, "grad_norm": 1.0595289468765259, "learning_rate": 4.8322650910131874e-05, "loss": 1.5371, "step": 27130 }, { "epoch": 6.730733461466923, "grad_norm": 1.0716408491134644, "learning_rate": 4.8255960574220614e-05, "loss": 1.5045, "step": 27140 }, { "epoch": 6.733213466426933, "grad_norm": 1.0024359226226807, "learning_rate": 4.818930165200313e-05, "loss": 1.4937, "step": 27150 }, { "epoch": 6.735693471386943, "grad_norm": 0.9272238612174988, "learning_rate": 4.8122674183947836e-05, "loss": 1.4816, "step": 27160 }, { "epoch": 6.738173476346953, "grad_norm": 1.0077641010284424, "learning_rate": 4.805607821050423e-05, "loss": 1.4926, "step": 27170 }, { "epoch": 6.740653481306962, "grad_norm": 1.0041226148605347, "learning_rate": 4.7989513772102537e-05, "loss": 1.5087, "step": 27180 }, { "epoch": 6.743133486266973, "grad_norm": 0.9688929915428162, "learning_rate": 4.792298090915388e-05, "loss": 1.4847, "step": 27190 }, { "epoch": 6.745613491226982, "grad_norm": 1.0488873720169067, "learning_rate": 4.78564796620502e-05, "loss": 1.4595, "step": 27200 }, { "epoch": 6.748093496186993, "grad_norm": 1.0456444025039673, "learning_rate": 4.7790010071164295e-05, "loss": 1.5137, "step": 27210 }, { "epoch": 6.750573501147002, "grad_norm": 1.0180890560150146, "learning_rate": 4.772357217684964e-05, "loss": 1.4553, "step": 27220 }, { "epoch": 6.753053506107012, "grad_norm": 0.9567440152168274, "learning_rate": 4.7657166019440614e-05, "loss": 1.4726, "step": 27230 }, { "epoch": 6.755533511067022, "grad_norm": 1.0050458908081055, "learning_rate": 4.759079163925223e-05, "loss": 1.4967, "step": 27240 }, { "epoch": 6.758013516027032, "grad_norm": 0.9629562497138977, "learning_rate": 4.7524449076580224e-05, "loss": 1.4792, "step": 27250 }, { "epoch": 6.760493520987042, "grad_norm": 1.0333480834960938, "learning_rate": 4.745813837170101e-05, "loss": 1.4672, "step": 27260 }, { "epoch": 6.762973525947052, "grad_norm": 1.03449547290802, "learning_rate": 4.739185956487169e-05, "loss": 1.5072, "step": 27270 }, { "epoch": 6.765453530907061, "grad_norm": 0.96829754114151, "learning_rate": 4.732561269632992e-05, "loss": 1.4646, "step": 27280 }, { "epoch": 6.767933535867072, "grad_norm": 1.003289818763733, "learning_rate": 4.725939780629415e-05, "loss": 1.4566, "step": 27290 }, { "epoch": 6.770413540827081, "grad_norm": 0.9847796559333801, "learning_rate": 4.7193214934963206e-05, "loss": 1.4673, "step": 27300 }, { "epoch": 6.772893545787092, "grad_norm": 0.8907151818275452, "learning_rate": 4.7127064122516593e-05, "loss": 1.4547, "step": 27310 }, { "epoch": 6.775373550747101, "grad_norm": 1.001332402229309, "learning_rate": 4.706094540911429e-05, "loss": 1.4919, "step": 27320 }, { "epoch": 6.777853555707112, "grad_norm": 1.0360084772109985, "learning_rate": 4.699485883489693e-05, "loss": 1.5503, "step": 27330 }, { "epoch": 6.780333560667121, "grad_norm": 1.0425145626068115, "learning_rate": 4.692880443998538e-05, "loss": 1.4477, "step": 27340 }, { "epoch": 6.782813565627131, "grad_norm": 0.9380311965942383, "learning_rate": 4.686278226448122e-05, "loss": 1.4753, "step": 27350 }, { "epoch": 6.785293570587141, "grad_norm": 0.9759949445724487, "learning_rate": 4.6796792348466356e-05, "loss": 1.5504, "step": 27360 }, { "epoch": 6.787773575547151, "grad_norm": 0.9939712882041931, "learning_rate": 4.6730834732003104e-05, "loss": 1.471, "step": 27370 }, { "epoch": 6.790253580507161, "grad_norm": 1.0386635065078735, "learning_rate": 4.666490945513415e-05, "loss": 1.4876, "step": 27380 }, { "epoch": 6.792733585467171, "grad_norm": 1.0397119522094727, "learning_rate": 4.6599016557882715e-05, "loss": 1.4365, "step": 27390 }, { "epoch": 6.795213590427181, "grad_norm": 1.0358515977859497, "learning_rate": 4.653315608025207e-05, "loss": 1.5524, "step": 27400 }, { "epoch": 6.797693595387191, "grad_norm": 1.0351741313934326, "learning_rate": 4.646732806222608e-05, "loss": 1.4834, "step": 27410 }, { "epoch": 6.8001736003472, "grad_norm": 0.9974995255470276, "learning_rate": 4.6401532543768766e-05, "loss": 1.4947, "step": 27420 }, { "epoch": 6.802653605307211, "grad_norm": 1.01504385471344, "learning_rate": 4.6335769564824425e-05, "loss": 1.4457, "step": 27430 }, { "epoch": 6.80513361026722, "grad_norm": 0.9690166115760803, "learning_rate": 4.6270039165317605e-05, "loss": 1.4985, "step": 27440 }, { "epoch": 6.807613615227231, "grad_norm": 1.0365806818008423, "learning_rate": 4.6204341385153186e-05, "loss": 1.5117, "step": 27450 }, { "epoch": 6.81009362018724, "grad_norm": 1.0430920124053955, "learning_rate": 4.6138676264216e-05, "loss": 1.4598, "step": 27460 }, { "epoch": 6.81257362514725, "grad_norm": 1.0349470376968384, "learning_rate": 4.60730438423713e-05, "loss": 1.4375, "step": 27470 }, { "epoch": 6.81505363010726, "grad_norm": 0.9820765852928162, "learning_rate": 4.600744415946438e-05, "loss": 1.4243, "step": 27480 }, { "epoch": 6.81753363506727, "grad_norm": 1.0257947444915771, "learning_rate": 4.594187725532063e-05, "loss": 1.4724, "step": 27490 }, { "epoch": 6.82001364002728, "grad_norm": 1.0491620302200317, "learning_rate": 4.587634316974555e-05, "loss": 1.3997, "step": 27500 }, { "epoch": 6.82249364498729, "grad_norm": 1.043031096458435, "learning_rate": 4.5810841942524864e-05, "loss": 1.509, "step": 27510 }, { "epoch": 6.8249736499472995, "grad_norm": 1.0190041065216064, "learning_rate": 4.574537361342407e-05, "loss": 1.4837, "step": 27520 }, { "epoch": 6.82745365490731, "grad_norm": 1.0018768310546875, "learning_rate": 4.5679938222188946e-05, "loss": 1.4611, "step": 27530 }, { "epoch": 6.8299336598673195, "grad_norm": 1.0004897117614746, "learning_rate": 4.561453580854516e-05, "loss": 1.4628, "step": 27540 }, { "epoch": 6.83241366482733, "grad_norm": 0.979097843170166, "learning_rate": 4.554916641219835e-05, "loss": 1.4714, "step": 27550 }, { "epoch": 6.8348936697873395, "grad_norm": 1.035117268562317, "learning_rate": 4.548383007283412e-05, "loss": 1.4824, "step": 27560 }, { "epoch": 6.837373674747349, "grad_norm": 0.9248478412628174, "learning_rate": 4.541852683011812e-05, "loss": 1.4493, "step": 27570 }, { "epoch": 6.8398536797073595, "grad_norm": 1.0277906656265259, "learning_rate": 4.535325672369567e-05, "loss": 1.4597, "step": 27580 }, { "epoch": 6.842333684667369, "grad_norm": 1.085394024848938, "learning_rate": 4.52880197931922e-05, "loss": 1.3996, "step": 27590 }, { "epoch": 6.8448136896273795, "grad_norm": 0.9753702878952026, "learning_rate": 4.522281607821288e-05, "loss": 1.4157, "step": 27600 }, { "epoch": 6.847293694587389, "grad_norm": 1.005562663078308, "learning_rate": 4.515764561834276e-05, "loss": 1.519, "step": 27610 }, { "epoch": 6.8497736995473995, "grad_norm": 0.9952786564826965, "learning_rate": 4.509250845314662e-05, "loss": 1.5127, "step": 27620 }, { "epoch": 6.852253704507409, "grad_norm": 0.9692873358726501, "learning_rate": 4.502740462216919e-05, "loss": 1.509, "step": 27630 }, { "epoch": 6.854733709467419, "grad_norm": 0.954460084438324, "learning_rate": 4.4962334164934806e-05, "loss": 1.4875, "step": 27640 }, { "epoch": 6.857213714427429, "grad_norm": 1.0443848371505737, "learning_rate": 4.4897297120947624e-05, "loss": 1.4615, "step": 27650 }, { "epoch": 6.8596937193874385, "grad_norm": 1.0167874097824097, "learning_rate": 4.483229352969146e-05, "loss": 1.459, "step": 27660 }, { "epoch": 6.862173724347449, "grad_norm": 1.013476848602295, "learning_rate": 4.4767323430629894e-05, "loss": 1.5109, "step": 27670 }, { "epoch": 6.8646537293074585, "grad_norm": 0.9592860341072083, "learning_rate": 4.470238686320606e-05, "loss": 1.4595, "step": 27680 }, { "epoch": 6.867133734267469, "grad_norm": 1.0391489267349243, "learning_rate": 4.463748386684289e-05, "loss": 1.4536, "step": 27690 }, { "epoch": 6.8696137392274785, "grad_norm": 1.093828797340393, "learning_rate": 4.457261448094282e-05, "loss": 1.5159, "step": 27700 }, { "epoch": 6.872093744187488, "grad_norm": 0.9685669541358948, "learning_rate": 4.450777874488792e-05, "loss": 1.4964, "step": 27710 }, { "epoch": 6.8745737491474985, "grad_norm": 0.9868682026863098, "learning_rate": 4.444297669803981e-05, "loss": 1.4689, "step": 27720 }, { "epoch": 6.877053754107508, "grad_norm": 0.94803786277771, "learning_rate": 4.437820837973964e-05, "loss": 1.5238, "step": 27730 }, { "epoch": 6.8795337590675185, "grad_norm": 1.0392026901245117, "learning_rate": 4.431347382930818e-05, "loss": 1.5276, "step": 27740 }, { "epoch": 6.882013764027528, "grad_norm": 0.980580747127533, "learning_rate": 4.424877308604563e-05, "loss": 1.4614, "step": 27750 }, { "epoch": 6.884493768987538, "grad_norm": 0.9709716439247131, "learning_rate": 4.4184106189231625e-05, "loss": 1.5172, "step": 27760 }, { "epoch": 6.886973773947548, "grad_norm": 0.9925903081893921, "learning_rate": 4.411947317812529e-05, "loss": 1.5287, "step": 27770 }, { "epoch": 6.889453778907558, "grad_norm": 1.0866552591323853, "learning_rate": 4.405487409196526e-05, "loss": 1.4618, "step": 27780 }, { "epoch": 6.891933783867568, "grad_norm": 1.0447962284088135, "learning_rate": 4.399030896996945e-05, "loss": 1.4656, "step": 27790 }, { "epoch": 6.894413788827578, "grad_norm": 1.1322532892227173, "learning_rate": 4.3925777851335206e-05, "loss": 1.5223, "step": 27800 }, { "epoch": 6.896893793787587, "grad_norm": 1.0157110691070557, "learning_rate": 4.386128077523923e-05, "loss": 1.4398, "step": 27810 }, { "epoch": 6.899373798747598, "grad_norm": 1.016741156578064, "learning_rate": 4.3796817780837574e-05, "loss": 1.4614, "step": 27820 }, { "epoch": 6.901853803707607, "grad_norm": 1.0230491161346436, "learning_rate": 4.373238890726553e-05, "loss": 1.4625, "step": 27830 }, { "epoch": 6.904333808667618, "grad_norm": 0.9990814924240112, "learning_rate": 4.3667994193637796e-05, "loss": 1.5499, "step": 27840 }, { "epoch": 6.906813813627627, "grad_norm": 0.9694319367408752, "learning_rate": 4.360363367904823e-05, "loss": 1.5205, "step": 27850 }, { "epoch": 6.909293818587637, "grad_norm": 1.0149998664855957, "learning_rate": 4.353930740256996e-05, "loss": 1.4735, "step": 27860 }, { "epoch": 6.911773823547647, "grad_norm": 1.0618832111358643, "learning_rate": 4.3475015403255304e-05, "loss": 1.5441, "step": 27870 }, { "epoch": 6.914253828507657, "grad_norm": 0.9641765356063843, "learning_rate": 4.341075772013589e-05, "loss": 1.4671, "step": 27880 }, { "epoch": 6.916733833467667, "grad_norm": 1.0613869428634644, "learning_rate": 4.3346534392222274e-05, "loss": 1.5015, "step": 27890 }, { "epoch": 6.919213838427677, "grad_norm": 1.0948052406311035, "learning_rate": 4.328234545850442e-05, "loss": 1.4689, "step": 27900 }, { "epoch": 6.921693843387687, "grad_norm": 1.0151530504226685, "learning_rate": 4.321819095795123e-05, "loss": 1.4932, "step": 27910 }, { "epoch": 6.924173848347697, "grad_norm": 1.0254912376403809, "learning_rate": 4.315407092951078e-05, "loss": 1.4806, "step": 27920 }, { "epoch": 6.926653853307707, "grad_norm": 1.0084120035171509, "learning_rate": 4.308998541211015e-05, "loss": 1.5049, "step": 27930 }, { "epoch": 6.929133858267717, "grad_norm": 0.9881705045700073, "learning_rate": 4.3025934444655655e-05, "loss": 1.4716, "step": 27940 }, { "epoch": 6.931613863227726, "grad_norm": 1.069356083869934, "learning_rate": 4.2961918066032326e-05, "loss": 1.544, "step": 27950 }, { "epoch": 6.934093868187737, "grad_norm": 0.9830098748207092, "learning_rate": 4.289793631510448e-05, "loss": 1.4613, "step": 27960 }, { "epoch": 6.936573873147746, "grad_norm": 0.971098005771637, "learning_rate": 4.2833989230715275e-05, "loss": 1.4965, "step": 27970 }, { "epoch": 6.939053878107757, "grad_norm": 0.9721178412437439, "learning_rate": 4.2770076851686824e-05, "loss": 1.4694, "step": 27980 }, { "epoch": 6.941533883067766, "grad_norm": 1.0568654537200928, "learning_rate": 4.2706199216820185e-05, "loss": 1.5414, "step": 27990 }, { "epoch": 6.944013888027776, "grad_norm": 0.9629611372947693, "learning_rate": 4.264235636489542e-05, "loss": 1.4599, "step": 28000 }, { "epoch": 6.946493892987786, "grad_norm": 0.9821149706840515, "learning_rate": 4.257854833467124e-05, "loss": 1.4858, "step": 28010 }, { "epoch": 6.948973897947796, "grad_norm": 0.9563151597976685, "learning_rate": 4.2514775164885487e-05, "loss": 1.4879, "step": 28020 }, { "epoch": 6.951453902907806, "grad_norm": 1.0238558053970337, "learning_rate": 4.245103689425467e-05, "loss": 1.5114, "step": 28030 }, { "epoch": 6.953933907867816, "grad_norm": 1.0556073188781738, "learning_rate": 4.238733356147414e-05, "loss": 1.4774, "step": 28040 }, { "epoch": 6.956413912827825, "grad_norm": 1.0477728843688965, "learning_rate": 4.232366520521803e-05, "loss": 1.4576, "step": 28050 }, { "epoch": 6.958893917787836, "grad_norm": 1.0674879550933838, "learning_rate": 4.226003186413937e-05, "loss": 1.4974, "step": 28060 }, { "epoch": 6.961373922747845, "grad_norm": 1.0021710395812988, "learning_rate": 4.219643357686967e-05, "loss": 1.4779, "step": 28070 }, { "epoch": 6.963853927707856, "grad_norm": 1.0351998805999756, "learning_rate": 4.213287038201943e-05, "loss": 1.4574, "step": 28080 }, { "epoch": 6.966333932667865, "grad_norm": 0.9760614633560181, "learning_rate": 4.206934231817768e-05, "loss": 1.5576, "step": 28090 }, { "epoch": 6.968813937627875, "grad_norm": 0.9329560399055481, "learning_rate": 4.200584942391219e-05, "loss": 1.4752, "step": 28100 }, { "epoch": 6.971293942587885, "grad_norm": 0.914653480052948, "learning_rate": 4.194239173776929e-05, "loss": 1.4993, "step": 28110 }, { "epoch": 6.973773947547895, "grad_norm": 0.9832881689071655, "learning_rate": 4.187896929827414e-05, "loss": 1.4533, "step": 28120 }, { "epoch": 6.976253952507905, "grad_norm": 0.9643089175224304, "learning_rate": 4.18155821439302e-05, "loss": 1.4188, "step": 28130 }, { "epoch": 6.978733957467915, "grad_norm": 1.0578384399414062, "learning_rate": 4.17522303132198e-05, "loss": 1.4795, "step": 28140 }, { "epoch": 6.981213962427924, "grad_norm": 1.0531888008117676, "learning_rate": 4.168891384460365e-05, "loss": 1.4477, "step": 28150 }, { "epoch": 6.983693967387935, "grad_norm": 1.0598180294036865, "learning_rate": 4.1625632776521037e-05, "loss": 1.5376, "step": 28160 }, { "epoch": 6.986173972347944, "grad_norm": 0.9898632764816284, "learning_rate": 4.1562387147389735e-05, "loss": 1.5151, "step": 28170 }, { "epoch": 6.988653977307955, "grad_norm": 1.0068705081939697, "learning_rate": 4.14991769956061e-05, "loss": 1.4815, "step": 28180 }, { "epoch": 6.991133982267964, "grad_norm": 1.069098949432373, "learning_rate": 4.143600235954483e-05, "loss": 1.5614, "step": 28190 }, { "epoch": 6.993613987227975, "grad_norm": 0.9832902550697327, "learning_rate": 4.137286327755913e-05, "loss": 1.4777, "step": 28200 }, { "epoch": 6.996093992187984, "grad_norm": 1.042362093925476, "learning_rate": 4.1309759787980565e-05, "loss": 1.4705, "step": 28210 }, { "epoch": 6.998573997147995, "grad_norm": 0.9542574286460876, "learning_rate": 4.124669192911916e-05, "loss": 1.5014, "step": 28220 }, { "epoch": 7.001054002108004, "grad_norm": 0.9545601010322571, "learning_rate": 4.118365973926322e-05, "loss": 1.4232, "step": 28230 }, { "epoch": 7.003534007068014, "grad_norm": 0.9939719438552856, "learning_rate": 4.112066325667954e-05, "loss": 1.3614, "step": 28240 }, { "epoch": 7.006014012028024, "grad_norm": 0.9878036379814148, "learning_rate": 4.1057702519613084e-05, "loss": 1.4261, "step": 28250 }, { "epoch": 7.008494016988034, "grad_norm": 1.09098482131958, "learning_rate": 4.0994777566287204e-05, "loss": 1.4501, "step": 28260 }, { "epoch": 7.010974021948044, "grad_norm": 0.9886322617530823, "learning_rate": 4.0931888434903484e-05, "loss": 1.4651, "step": 28270 }, { "epoch": 7.013454026908054, "grad_norm": 1.0460489988327026, "learning_rate": 4.086903516364179e-05, "loss": 1.4458, "step": 28280 }, { "epoch": 7.015934031868063, "grad_norm": 0.9749070405960083, "learning_rate": 4.0806217790660176e-05, "loss": 1.3642, "step": 28290 }, { "epoch": 7.018414036828074, "grad_norm": 1.0082083940505981, "learning_rate": 4.074343635409499e-05, "loss": 1.3828, "step": 28300 }, { "epoch": 7.020894041788083, "grad_norm": 1.0243860483169556, "learning_rate": 4.068069089206067e-05, "loss": 1.4127, "step": 28310 }, { "epoch": 7.023374046748094, "grad_norm": 0.9525448679924011, "learning_rate": 4.0617981442649855e-05, "loss": 1.4274, "step": 28320 }, { "epoch": 7.025854051708103, "grad_norm": 1.0825144052505493, "learning_rate": 4.055530804393329e-05, "loss": 1.5011, "step": 28330 }, { "epoch": 7.028334056668113, "grad_norm": 0.9980800747871399, "learning_rate": 4.049267073395995e-05, "loss": 1.3625, "step": 28340 }, { "epoch": 7.030814061628123, "grad_norm": 1.0209801197052002, "learning_rate": 4.0430069550756665e-05, "loss": 1.4427, "step": 28350 }, { "epoch": 7.033294066588133, "grad_norm": 0.9010273218154907, "learning_rate": 4.03675045323286e-05, "loss": 1.3683, "step": 28360 }, { "epoch": 7.035774071548143, "grad_norm": 1.0834155082702637, "learning_rate": 4.03049757166588e-05, "loss": 1.4283, "step": 28370 }, { "epoch": 7.038254076508153, "grad_norm": 0.9353013634681702, "learning_rate": 4.0242483141708334e-05, "loss": 1.407, "step": 28380 }, { "epoch": 7.040734081468163, "grad_norm": 1.0415675640106201, "learning_rate": 4.018002684541638e-05, "loss": 1.4974, "step": 28390 }, { "epoch": 7.043214086428173, "grad_norm": 0.987139880657196, "learning_rate": 4.0117606865699975e-05, "loss": 1.4595, "step": 28400 }, { "epoch": 7.0456940913881825, "grad_norm": 0.9571609497070312, "learning_rate": 4.0055223240454167e-05, "loss": 1.4665, "step": 28410 }, { "epoch": 7.048174096348193, "grad_norm": 1.0244213342666626, "learning_rate": 3.999287600755192e-05, "loss": 1.3859, "step": 28420 }, { "epoch": 7.0506541013082025, "grad_norm": 1.0276232957839966, "learning_rate": 3.9930565204844094e-05, "loss": 1.3819, "step": 28430 }, { "epoch": 7.053134106268213, "grad_norm": 0.9392919540405273, "learning_rate": 3.9868290870159405e-05, "loss": 1.364, "step": 28440 }, { "epoch": 7.0556141112282225, "grad_norm": 1.0440912246704102, "learning_rate": 3.980605304130456e-05, "loss": 1.4126, "step": 28450 }, { "epoch": 7.058094116188232, "grad_norm": 1.0756579637527466, "learning_rate": 3.974385175606393e-05, "loss": 1.4206, "step": 28460 }, { "epoch": 7.0605741211482425, "grad_norm": 1.0449657440185547, "learning_rate": 3.968168705219982e-05, "loss": 1.3799, "step": 28470 }, { "epoch": 7.063054126108252, "grad_norm": 1.0110985040664673, "learning_rate": 3.961955896745224e-05, "loss": 1.3762, "step": 28480 }, { "epoch": 7.0655341310682624, "grad_norm": 1.0575342178344727, "learning_rate": 3.9557467539539115e-05, "loss": 1.4568, "step": 28490 }, { "epoch": 7.068014136028272, "grad_norm": 1.0499767065048218, "learning_rate": 3.9495412806155883e-05, "loss": 1.4177, "step": 28500 }, { "epoch": 7.0704941409882816, "grad_norm": 1.0368854999542236, "learning_rate": 3.943339480497593e-05, "loss": 1.3894, "step": 28510 }, { "epoch": 7.072974145948292, "grad_norm": 1.0026966333389282, "learning_rate": 3.937141357365023e-05, "loss": 1.4974, "step": 28520 }, { "epoch": 7.0754541509083015, "grad_norm": 1.004560112953186, "learning_rate": 3.930946914980744e-05, "loss": 1.4786, "step": 28530 }, { "epoch": 7.077934155868312, "grad_norm": 0.9743759036064148, "learning_rate": 3.924756157105387e-05, "loss": 1.4094, "step": 28540 }, { "epoch": 7.0804141608283215, "grad_norm": 1.0317790508270264, "learning_rate": 3.9185690874973555e-05, "loss": 1.4224, "step": 28550 }, { "epoch": 7.082894165788332, "grad_norm": 1.181426763534546, "learning_rate": 3.9123857099127936e-05, "loss": 1.4553, "step": 28560 }, { "epoch": 7.0853741707483415, "grad_norm": 0.9656166434288025, "learning_rate": 3.906206028105627e-05, "loss": 1.4207, "step": 28570 }, { "epoch": 7.087854175708351, "grad_norm": 1.0018301010131836, "learning_rate": 3.9000300458275216e-05, "loss": 1.4154, "step": 28580 }, { "epoch": 7.0903341806683615, "grad_norm": 1.0536874532699585, "learning_rate": 3.8938577668279055e-05, "loss": 1.3925, "step": 28590 }, { "epoch": 7.092814185628371, "grad_norm": 1.0299558639526367, "learning_rate": 3.887689194853951e-05, "loss": 1.3927, "step": 28600 }, { "epoch": 7.0952941905883815, "grad_norm": 1.037526249885559, "learning_rate": 3.8815243336505954e-05, "loss": 1.4441, "step": 28610 }, { "epoch": 7.097774195548391, "grad_norm": 1.0067542791366577, "learning_rate": 3.875363186960499e-05, "loss": 1.4132, "step": 28620 }, { "epoch": 7.100254200508401, "grad_norm": 1.088285207748413, "learning_rate": 3.8692057585240905e-05, "loss": 1.4681, "step": 28630 }, { "epoch": 7.102734205468411, "grad_norm": 1.0379809141159058, "learning_rate": 3.863052052079528e-05, "loss": 1.4277, "step": 28640 }, { "epoch": 7.105214210428421, "grad_norm": 1.0791525840759277, "learning_rate": 3.8569020713627126e-05, "loss": 1.483, "step": 28650 }, { "epoch": 7.107694215388431, "grad_norm": 1.0711592435836792, "learning_rate": 3.850755820107281e-05, "loss": 1.4331, "step": 28660 }, { "epoch": 7.110174220348441, "grad_norm": 0.9801573157310486, "learning_rate": 3.8446133020446204e-05, "loss": 1.4416, "step": 28670 }, { "epoch": 7.112654225308451, "grad_norm": 1.5557962656021118, "learning_rate": 3.838474520903825e-05, "loss": 1.4729, "step": 28680 }, { "epoch": 7.115134230268461, "grad_norm": 1.0418564081192017, "learning_rate": 3.832339480411747e-05, "loss": 1.438, "step": 28690 }, { "epoch": 7.11761423522847, "grad_norm": 0.9985926747322083, "learning_rate": 3.826208184292952e-05, "loss": 1.4208, "step": 28700 }, { "epoch": 7.120094240188481, "grad_norm": 1.0571954250335693, "learning_rate": 3.820080636269737e-05, "loss": 1.4457, "step": 28710 }, { "epoch": 7.12257424514849, "grad_norm": 0.9708046913146973, "learning_rate": 3.813956840062118e-05, "loss": 1.3994, "step": 28720 }, { "epoch": 7.125054250108501, "grad_norm": 1.052629828453064, "learning_rate": 3.807836799387852e-05, "loss": 1.4197, "step": 28730 }, { "epoch": 7.12753425506851, "grad_norm": 1.021501898765564, "learning_rate": 3.801720517962386e-05, "loss": 1.3936, "step": 28740 }, { "epoch": 7.13001426002852, "grad_norm": 0.9857499003410339, "learning_rate": 3.7956079994989115e-05, "loss": 1.4273, "step": 28750 }, { "epoch": 7.13249426498853, "grad_norm": 0.98251873254776, "learning_rate": 3.7894992477083224e-05, "loss": 1.3984, "step": 28760 }, { "epoch": 7.13497426994854, "grad_norm": 1.0209919214248657, "learning_rate": 3.783394266299228e-05, "loss": 1.419, "step": 28770 }, { "epoch": 7.13745427490855, "grad_norm": 1.0622920989990234, "learning_rate": 3.7772930589779474e-05, "loss": 1.4213, "step": 28780 }, { "epoch": 7.13993427986856, "grad_norm": 1.0227277278900146, "learning_rate": 3.771195629448515e-05, "loss": 1.4288, "step": 28790 }, { "epoch": 7.14241428482857, "grad_norm": 1.0391366481781006, "learning_rate": 3.7651019814126654e-05, "loss": 1.4327, "step": 28800 }, { "epoch": 7.14489428978858, "grad_norm": 1.063780426979065, "learning_rate": 3.759012118569838e-05, "loss": 1.4384, "step": 28810 }, { "epoch": 7.147374294748589, "grad_norm": 0.9601597785949707, "learning_rate": 3.7529260446171756e-05, "loss": 1.3743, "step": 28820 }, { "epoch": 7.1498542997086, "grad_norm": 1.036189079284668, "learning_rate": 3.746843763249521e-05, "loss": 1.4047, "step": 28830 }, { "epoch": 7.152334304668609, "grad_norm": 1.0754128694534302, "learning_rate": 3.7407652781594095e-05, "loss": 1.4245, "step": 28840 }, { "epoch": 7.15481430962862, "grad_norm": 1.0699996948242188, "learning_rate": 3.734690593037083e-05, "loss": 1.4439, "step": 28850 }, { "epoch": 7.157294314588629, "grad_norm": 1.0184645652770996, "learning_rate": 3.728619711570468e-05, "loss": 1.3803, "step": 28860 }, { "epoch": 7.159774319548639, "grad_norm": 1.082371473312378, "learning_rate": 3.7225526374451815e-05, "loss": 1.4388, "step": 28870 }, { "epoch": 7.162254324508649, "grad_norm": 1.0035544633865356, "learning_rate": 3.716489374344527e-05, "loss": 1.4139, "step": 28880 }, { "epoch": 7.164734329468659, "grad_norm": 1.1102402210235596, "learning_rate": 3.710429925949511e-05, "loss": 1.4408, "step": 28890 }, { "epoch": 7.167214334428669, "grad_norm": 1.0354952812194824, "learning_rate": 3.704374295938797e-05, "loss": 1.4069, "step": 28900 }, { "epoch": 7.169694339388679, "grad_norm": 1.008049726486206, "learning_rate": 3.698322487988755e-05, "loss": 1.4966, "step": 28910 }, { "epoch": 7.172174344348688, "grad_norm": 1.06466805934906, "learning_rate": 3.692274505773419e-05, "loss": 1.4678, "step": 28920 }, { "epoch": 7.174654349308699, "grad_norm": 1.097727656364441, "learning_rate": 3.686230352964508e-05, "loss": 1.4584, "step": 28930 }, { "epoch": 7.177134354268708, "grad_norm": 0.9777037501335144, "learning_rate": 3.680190033231411e-05, "loss": 1.3806, "step": 28940 }, { "epoch": 7.179614359228719, "grad_norm": 1.014937400817871, "learning_rate": 3.674153550241202e-05, "loss": 1.3776, "step": 28950 }, { "epoch": 7.182094364188728, "grad_norm": 1.1064553260803223, "learning_rate": 3.668120907658603e-05, "loss": 1.4125, "step": 28960 }, { "epoch": 7.184574369148739, "grad_norm": 1.0743889808654785, "learning_rate": 3.662092109146029e-05, "loss": 1.3742, "step": 28970 }, { "epoch": 7.187054374108748, "grad_norm": 1.1073992252349854, "learning_rate": 3.6560671583635467e-05, "loss": 1.3365, "step": 28980 }, { "epoch": 7.189534379068758, "grad_norm": 0.9936246275901794, "learning_rate": 3.650046058968888e-05, "loss": 1.3753, "step": 28990 }, { "epoch": 7.192014384028768, "grad_norm": 1.1510530710220337, "learning_rate": 3.644028814617454e-05, "loss": 1.4367, "step": 29000 }, { "epoch": 7.194494388988778, "grad_norm": 1.0159631967544556, "learning_rate": 3.6380154289623e-05, "loss": 1.3983, "step": 29010 }, { "epoch": 7.196974393948788, "grad_norm": 1.0250071287155151, "learning_rate": 3.632005905654138e-05, "loss": 1.4273, "step": 29020 }, { "epoch": 7.199454398908798, "grad_norm": 1.075727939605713, "learning_rate": 3.6260002483413325e-05, "loss": 1.4309, "step": 29030 }, { "epoch": 7.201934403868807, "grad_norm": 1.1067150831222534, "learning_rate": 3.6199984606699155e-05, "loss": 1.3985, "step": 29040 }, { "epoch": 7.204414408828818, "grad_norm": 1.0309680700302124, "learning_rate": 3.614000546283547e-05, "loss": 1.4569, "step": 29050 }, { "epoch": 7.206894413788827, "grad_norm": 1.014217734336853, "learning_rate": 3.608006508823556e-05, "loss": 1.4271, "step": 29060 }, { "epoch": 7.209374418748838, "grad_norm": 1.09121835231781, "learning_rate": 3.6020163519289076e-05, "loss": 1.4005, "step": 29070 }, { "epoch": 7.211854423708847, "grad_norm": 1.0779592990875244, "learning_rate": 3.596030079236212e-05, "loss": 1.4582, "step": 29080 }, { "epoch": 7.214334428668858, "grad_norm": 1.1280418634414673, "learning_rate": 3.59004769437972e-05, "loss": 1.4004, "step": 29090 }, { "epoch": 7.216814433628867, "grad_norm": 1.0576897859573364, "learning_rate": 3.5840692009913344e-05, "loss": 1.4606, "step": 29100 }, { "epoch": 7.219294438588877, "grad_norm": 0.9991346001625061, "learning_rate": 3.5780946027005715e-05, "loss": 1.4001, "step": 29110 }, { "epoch": 7.221774443548887, "grad_norm": 1.076958179473877, "learning_rate": 3.5721239031346066e-05, "loss": 1.4134, "step": 29120 }, { "epoch": 7.224254448508897, "grad_norm": 1.0537842512130737, "learning_rate": 3.5661571059182366e-05, "loss": 1.4734, "step": 29130 }, { "epoch": 7.226734453468907, "grad_norm": 1.103651762008667, "learning_rate": 3.56019421467389e-05, "loss": 1.4437, "step": 29140 }, { "epoch": 7.229214458428917, "grad_norm": 1.057870864868164, "learning_rate": 3.554235233021621e-05, "loss": 1.4043, "step": 29150 }, { "epoch": 7.231694463388926, "grad_norm": 1.0721110105514526, "learning_rate": 3.548280164579126e-05, "loss": 1.414, "step": 29160 }, { "epoch": 7.234174468348937, "grad_norm": 1.1894139051437378, "learning_rate": 3.5423290129617e-05, "loss": 1.3853, "step": 29170 }, { "epoch": 7.236654473308946, "grad_norm": 1.1009711027145386, "learning_rate": 3.5363817817822855e-05, "loss": 1.4837, "step": 29180 }, { "epoch": 7.239134478268957, "grad_norm": 1.0206698179244995, "learning_rate": 3.530438474651428e-05, "loss": 1.3513, "step": 29190 }, { "epoch": 7.241614483228966, "grad_norm": 1.024185061454773, "learning_rate": 3.5244990951772974e-05, "loss": 1.4392, "step": 29200 }, { "epoch": 7.244094488188976, "grad_norm": 1.1004998683929443, "learning_rate": 3.5185636469656746e-05, "loss": 1.4252, "step": 29210 }, { "epoch": 7.246574493148986, "grad_norm": 0.9977849721908569, "learning_rate": 3.5126321336199676e-05, "loss": 1.4368, "step": 29220 }, { "epoch": 7.249054498108996, "grad_norm": 1.039672613143921, "learning_rate": 3.5067045587411716e-05, "loss": 1.4761, "step": 29230 }, { "epoch": 7.251534503069006, "grad_norm": 0.9992042183876038, "learning_rate": 3.500780925927915e-05, "loss": 1.4159, "step": 29240 }, { "epoch": 7.254014508029016, "grad_norm": 0.9692917466163635, "learning_rate": 3.494861238776418e-05, "loss": 1.4956, "step": 29250 }, { "epoch": 7.256494512989026, "grad_norm": 1.065436840057373, "learning_rate": 3.4889455008805106e-05, "loss": 1.4487, "step": 29260 }, { "epoch": 7.258974517949036, "grad_norm": 1.0740379095077515, "learning_rate": 3.4830337158316216e-05, "loss": 1.4561, "step": 29270 }, { "epoch": 7.2614545229090455, "grad_norm": 1.0296998023986816, "learning_rate": 3.477125887218792e-05, "loss": 1.4657, "step": 29280 }, { "epoch": 7.263934527869056, "grad_norm": 1.0426205396652222, "learning_rate": 3.471222018628638e-05, "loss": 1.4201, "step": 29290 }, { "epoch": 7.2664145328290655, "grad_norm": 1.0202819108963013, "learning_rate": 3.4653221136453965e-05, "loss": 1.4121, "step": 29300 }, { "epoch": 7.268894537789076, "grad_norm": 1.0543783903121948, "learning_rate": 3.459426175850882e-05, "loss": 1.3914, "step": 29310 }, { "epoch": 7.2713745427490855, "grad_norm": 1.0744569301605225, "learning_rate": 3.453534208824507e-05, "loss": 1.4069, "step": 29320 }, { "epoch": 7.273854547709095, "grad_norm": 0.9860808253288269, "learning_rate": 3.447646216143268e-05, "loss": 1.3699, "step": 29330 }, { "epoch": 7.2763345526691054, "grad_norm": 1.036865234375, "learning_rate": 3.4417622013817595e-05, "loss": 1.4141, "step": 29340 }, { "epoch": 7.278814557629115, "grad_norm": 1.0248973369598389, "learning_rate": 3.43588216811215e-05, "loss": 1.4487, "step": 29350 }, { "epoch": 7.281294562589125, "grad_norm": 1.03071928024292, "learning_rate": 3.430006119904197e-05, "loss": 1.4971, "step": 29360 }, { "epoch": 7.283774567549135, "grad_norm": 1.1092323064804077, "learning_rate": 3.424134060325233e-05, "loss": 1.3836, "step": 29370 }, { "epoch": 7.286254572509145, "grad_norm": 1.075713872909546, "learning_rate": 3.418265992940175e-05, "loss": 1.3099, "step": 29380 }, { "epoch": 7.288734577469155, "grad_norm": 1.04108726978302, "learning_rate": 3.412401921311511e-05, "loss": 1.4589, "step": 29390 }, { "epoch": 7.2912145824291645, "grad_norm": 0.9846664071083069, "learning_rate": 3.406541848999312e-05, "loss": 1.3991, "step": 29400 }, { "epoch": 7.293694587389175, "grad_norm": 1.0906990766525269, "learning_rate": 3.4006857795612115e-05, "loss": 1.4416, "step": 29410 }, { "epoch": 7.2961745923491845, "grad_norm": 0.999430239200592, "learning_rate": 3.394833716552416e-05, "loss": 1.4222, "step": 29420 }, { "epoch": 7.298654597309195, "grad_norm": 1.1052192449569702, "learning_rate": 3.388985663525702e-05, "loss": 1.5021, "step": 29430 }, { "epoch": 7.3011346022692045, "grad_norm": 0.9980838894844055, "learning_rate": 3.383141624031408e-05, "loss": 1.3648, "step": 29440 }, { "epoch": 7.303614607229214, "grad_norm": 1.0518790483474731, "learning_rate": 3.3773016016174366e-05, "loss": 1.4328, "step": 29450 }, { "epoch": 7.3060946121892245, "grad_norm": 1.0774750709533691, "learning_rate": 3.371465599829257e-05, "loss": 1.4478, "step": 29460 }, { "epoch": 7.308574617149234, "grad_norm": 1.0285404920578003, "learning_rate": 3.365633622209891e-05, "loss": 1.3548, "step": 29470 }, { "epoch": 7.3110546221092445, "grad_norm": 1.0568714141845703, "learning_rate": 3.359805672299918e-05, "loss": 1.4449, "step": 29480 }, { "epoch": 7.313534627069254, "grad_norm": 1.0464444160461426, "learning_rate": 3.353981753637473e-05, "loss": 1.3494, "step": 29490 }, { "epoch": 7.316014632029264, "grad_norm": 1.117560625076294, "learning_rate": 3.3481618697582526e-05, "loss": 1.4988, "step": 29500 }, { "epoch": 7.318494636989274, "grad_norm": 0.9742351174354553, "learning_rate": 3.342346024195483e-05, "loss": 1.4054, "step": 29510 }, { "epoch": 7.320974641949284, "grad_norm": 1.0255587100982666, "learning_rate": 3.336534220479961e-05, "loss": 1.4537, "step": 29520 }, { "epoch": 7.323454646909294, "grad_norm": 0.9579190015792847, "learning_rate": 3.330726462140017e-05, "loss": 1.407, "step": 29530 }, { "epoch": 7.325934651869304, "grad_norm": 1.1523399353027344, "learning_rate": 3.324922752701528e-05, "loss": 1.3922, "step": 29540 }, { "epoch": 7.328414656829314, "grad_norm": 1.0771806240081787, "learning_rate": 3.3191230956879104e-05, "loss": 1.4265, "step": 29550 }, { "epoch": 7.330894661789324, "grad_norm": 1.105000376701355, "learning_rate": 3.3133274946201334e-05, "loss": 1.4276, "step": 29560 }, { "epoch": 7.333374666749333, "grad_norm": 1.0332446098327637, "learning_rate": 3.307535953016679e-05, "loss": 1.4173, "step": 29570 }, { "epoch": 7.335854671709344, "grad_norm": 1.025925874710083, "learning_rate": 3.301748474393592e-05, "loss": 1.4031, "step": 29580 }, { "epoch": 7.338334676669353, "grad_norm": 0.991253674030304, "learning_rate": 3.295965062264434e-05, "loss": 1.4386, "step": 29590 }, { "epoch": 7.340814681629364, "grad_norm": 1.0498908758163452, "learning_rate": 3.290185720140301e-05, "loss": 1.4447, "step": 29600 }, { "epoch": 7.343294686589373, "grad_norm": 1.1031231880187988, "learning_rate": 3.2844104515298155e-05, "loss": 1.4773, "step": 29610 }, { "epoch": 7.345774691549384, "grad_norm": 1.076324701309204, "learning_rate": 3.278639259939138e-05, "loss": 1.4199, "step": 29620 }, { "epoch": 7.348254696509393, "grad_norm": 1.0134528875350952, "learning_rate": 3.272872148871944e-05, "loss": 1.4166, "step": 29630 }, { "epoch": 7.350734701469403, "grad_norm": 1.0191160440444946, "learning_rate": 3.2671091218294284e-05, "loss": 1.3918, "step": 29640 }, { "epoch": 7.353214706429413, "grad_norm": 1.0939322710037231, "learning_rate": 3.261350182310324e-05, "loss": 1.4418, "step": 29650 }, { "epoch": 7.355694711389423, "grad_norm": 1.0721194744110107, "learning_rate": 3.255595333810857e-05, "loss": 1.4617, "step": 29660 }, { "epoch": 7.358174716349433, "grad_norm": 1.0546571016311646, "learning_rate": 3.2498445798247926e-05, "loss": 1.4557, "step": 29670 }, { "epoch": 7.360654721309443, "grad_norm": 0.9861288666725159, "learning_rate": 3.244097923843398e-05, "loss": 1.4372, "step": 29680 }, { "epoch": 7.363134726269452, "grad_norm": 0.9901741743087769, "learning_rate": 3.238355369355456e-05, "loss": 1.499, "step": 29690 }, { "epoch": 7.365614731229463, "grad_norm": 1.1363624334335327, "learning_rate": 3.2326169198472556e-05, "loss": 1.4539, "step": 29700 }, { "epoch": 7.368094736189472, "grad_norm": 1.0030924081802368, "learning_rate": 3.226882578802607e-05, "loss": 1.4246, "step": 29710 }, { "epoch": 7.370574741149483, "grad_norm": 1.200905680656433, "learning_rate": 3.2211523497028015e-05, "loss": 1.4518, "step": 29720 }, { "epoch": 7.373054746109492, "grad_norm": 1.0047590732574463, "learning_rate": 3.2154262360266607e-05, "loss": 1.437, "step": 29730 }, { "epoch": 7.375534751069502, "grad_norm": 1.0192116498947144, "learning_rate": 3.209704241250492e-05, "loss": 1.3976, "step": 29740 }, { "epoch": 7.378014756029512, "grad_norm": 1.0943766832351685, "learning_rate": 3.2039863688481055e-05, "loss": 1.4706, "step": 29750 }, { "epoch": 7.380494760989522, "grad_norm": 0.9721190929412842, "learning_rate": 3.198272622290804e-05, "loss": 1.4195, "step": 29760 }, { "epoch": 7.382974765949532, "grad_norm": 0.9810141324996948, "learning_rate": 3.192563005047404e-05, "loss": 1.4659, "step": 29770 }, { "epoch": 7.385454770909542, "grad_norm": 1.0326452255249023, "learning_rate": 3.186857520584187e-05, "loss": 1.4838, "step": 29780 }, { "epoch": 7.387934775869551, "grad_norm": 1.0693917274475098, "learning_rate": 3.18115617236495e-05, "loss": 1.4175, "step": 29790 }, { "epoch": 7.390414780829562, "grad_norm": 0.9504159092903137, "learning_rate": 3.1754589638509644e-05, "loss": 1.389, "step": 29800 }, { "epoch": 7.392894785789571, "grad_norm": 0.9387778639793396, "learning_rate": 3.169765898500995e-05, "loss": 1.4293, "step": 29810 }, { "epoch": 7.395374790749582, "grad_norm": 1.0295659303665161, "learning_rate": 3.164076979771287e-05, "loss": 1.4512, "step": 29820 }, { "epoch": 7.397854795709591, "grad_norm": 1.0724929571151733, "learning_rate": 3.1583922111155776e-05, "loss": 1.4934, "step": 29830 }, { "epoch": 7.400334800669602, "grad_norm": 1.1034655570983887, "learning_rate": 3.152711595985065e-05, "loss": 1.4806, "step": 29840 }, { "epoch": 7.402814805629611, "grad_norm": 1.030177116394043, "learning_rate": 3.14703513782845e-05, "loss": 1.4063, "step": 29850 }, { "epoch": 7.405294810589621, "grad_norm": 1.000606656074524, "learning_rate": 3.1413628400918926e-05, "loss": 1.4082, "step": 29860 }, { "epoch": 7.407774815549631, "grad_norm": 1.1129791736602783, "learning_rate": 3.1356947062190345e-05, "loss": 1.5047, "step": 29870 }, { "epoch": 7.410254820509641, "grad_norm": 0.9959533214569092, "learning_rate": 3.130030739650983e-05, "loss": 1.467, "step": 29880 }, { "epoch": 7.412734825469651, "grad_norm": 0.9653241038322449, "learning_rate": 3.1243709438263255e-05, "loss": 1.4495, "step": 29890 }, { "epoch": 7.415214830429661, "grad_norm": 1.1216167211532593, "learning_rate": 3.1187153221811105e-05, "loss": 1.4328, "step": 29900 }, { "epoch": 7.417694835389671, "grad_norm": 1.0418484210968018, "learning_rate": 3.113063878148852e-05, "loss": 1.3936, "step": 29910 }, { "epoch": 7.420174840349681, "grad_norm": 0.994735836982727, "learning_rate": 3.10741661516053e-05, "loss": 1.4255, "step": 29920 }, { "epoch": 7.42265484530969, "grad_norm": 1.0060540437698364, "learning_rate": 3.1017735366445855e-05, "loss": 1.4469, "step": 29930 }, { "epoch": 7.425134850269701, "grad_norm": 1.046750783920288, "learning_rate": 3.096134646026917e-05, "loss": 1.4321, "step": 29940 }, { "epoch": 7.42761485522971, "grad_norm": 1.0146149396896362, "learning_rate": 3.090499946730887e-05, "loss": 1.4625, "step": 29950 }, { "epoch": 7.430094860189721, "grad_norm": 0.9463431239128113, "learning_rate": 3.0848694421773075e-05, "loss": 1.4214, "step": 29960 }, { "epoch": 7.43257486514973, "grad_norm": 1.04469633102417, "learning_rate": 3.079243135784444e-05, "loss": 1.4258, "step": 29970 }, { "epoch": 7.43505487010974, "grad_norm": 1.025856852531433, "learning_rate": 3.073621030968017e-05, "loss": 1.527, "step": 29980 }, { "epoch": 7.43753487506975, "grad_norm": 0.9592713713645935, "learning_rate": 3.068003131141193e-05, "loss": 1.467, "step": 29990 }, { "epoch": 7.44001488002976, "grad_norm": 1.049975872039795, "learning_rate": 3.062389439714584e-05, "loss": 1.4338, "step": 30000 }, { "epoch": 7.44249488498977, "grad_norm": 1.0892877578735352, "learning_rate": 3.056779960096256e-05, "loss": 1.4806, "step": 30010 }, { "epoch": 7.44497488994978, "grad_norm": 1.0573172569274902, "learning_rate": 3.0511746956917085e-05, "loss": 1.4326, "step": 30020 }, { "epoch": 7.447454894909789, "grad_norm": 0.9932147860527039, "learning_rate": 3.0455736499038845e-05, "loss": 1.447, "step": 30030 }, { "epoch": 7.4499348998698, "grad_norm": 1.0222856998443604, "learning_rate": 3.0399768261331662e-05, "loss": 1.4302, "step": 30040 }, { "epoch": 7.452414904829809, "grad_norm": 1.116368055343628, "learning_rate": 3.0343842277773793e-05, "loss": 1.4759, "step": 30050 }, { "epoch": 7.45489490978982, "grad_norm": 0.9921149015426636, "learning_rate": 3.0287958582317676e-05, "loss": 1.3803, "step": 30060 }, { "epoch": 7.457374914749829, "grad_norm": 1.152550220489502, "learning_rate": 3.0232117208890276e-05, "loss": 1.4771, "step": 30070 }, { "epoch": 7.45985491970984, "grad_norm": 0.9995396137237549, "learning_rate": 3.0176318191392726e-05, "loss": 1.4224, "step": 30080 }, { "epoch": 7.462334924669849, "grad_norm": 1.037562608718872, "learning_rate": 3.01205615637005e-05, "loss": 1.499, "step": 30090 }, { "epoch": 7.464814929629859, "grad_norm": 1.0492740869522095, "learning_rate": 3.0064847359663284e-05, "loss": 1.4988, "step": 30100 }, { "epoch": 7.467294934589869, "grad_norm": 1.0149956941604614, "learning_rate": 3.0009175613105147e-05, "loss": 1.4181, "step": 30110 }, { "epoch": 7.469774939549879, "grad_norm": 0.9838835000991821, "learning_rate": 2.995354635782417e-05, "loss": 1.4438, "step": 30120 }, { "epoch": 7.472254944509889, "grad_norm": 1.0166876316070557, "learning_rate": 2.989795962759283e-05, "loss": 1.4298, "step": 30130 }, { "epoch": 7.474734949469899, "grad_norm": 1.0134931802749634, "learning_rate": 2.984241545615768e-05, "loss": 1.4095, "step": 30140 }, { "epoch": 7.4772149544299085, "grad_norm": 1.0498225688934326, "learning_rate": 2.9786913877239487e-05, "loss": 1.413, "step": 30150 }, { "epoch": 7.479694959389919, "grad_norm": 1.09099280834198, "learning_rate": 2.9731454924533086e-05, "loss": 1.4469, "step": 30160 }, { "epoch": 7.4821749643499285, "grad_norm": 0.9902943968772888, "learning_rate": 2.9676038631707593e-05, "loss": 1.421, "step": 30170 }, { "epoch": 7.484654969309939, "grad_norm": 1.0532760620117188, "learning_rate": 2.962066503240598e-05, "loss": 1.4642, "step": 30180 }, { "epoch": 7.4871349742699485, "grad_norm": 0.9733676314353943, "learning_rate": 2.9565334160245538e-05, "loss": 1.4042, "step": 30190 }, { "epoch": 7.489614979229959, "grad_norm": 1.0446985960006714, "learning_rate": 2.9510046048817497e-05, "loss": 1.4819, "step": 30200 }, { "epoch": 7.492094984189968, "grad_norm": 1.1242960691452026, "learning_rate": 2.945480073168714e-05, "loss": 1.473, "step": 30210 }, { "epoch": 7.494574989149978, "grad_norm": 0.9344368577003479, "learning_rate": 2.9399598242393755e-05, "loss": 1.4249, "step": 30220 }, { "epoch": 7.497054994109988, "grad_norm": 1.0807608366012573, "learning_rate": 2.934443861445073e-05, "loss": 1.475, "step": 30230 }, { "epoch": 7.499534999069998, "grad_norm": 1.0247372388839722, "learning_rate": 2.9289321881345254e-05, "loss": 1.4569, "step": 30240 }, { "epoch": 7.502015004030008, "grad_norm": 0.9852992296218872, "learning_rate": 2.923424807653863e-05, "loss": 1.4907, "step": 30250 }, { "epoch": 7.504495008990018, "grad_norm": 1.0893725156784058, "learning_rate": 2.9179217233466095e-05, "loss": 1.409, "step": 30260 }, { "epoch": 7.5069750139500275, "grad_norm": 1.003062129020691, "learning_rate": 2.912422938553664e-05, "loss": 1.4518, "step": 30270 }, { "epoch": 7.509455018910038, "grad_norm": 1.0472670793533325, "learning_rate": 2.906928456613336e-05, "loss": 1.4337, "step": 30280 }, { "epoch": 7.5119350238700475, "grad_norm": 1.045111894607544, "learning_rate": 2.9014382808613093e-05, "loss": 1.4733, "step": 30290 }, { "epoch": 7.514415028830058, "grad_norm": 1.0085594654083252, "learning_rate": 2.8959524146306603e-05, "loss": 1.4333, "step": 30300 }, { "epoch": 7.5168950337900675, "grad_norm": 1.0071136951446533, "learning_rate": 2.8904708612518404e-05, "loss": 1.4055, "step": 30310 }, { "epoch": 7.519375038750077, "grad_norm": 1.0234194993972778, "learning_rate": 2.8849936240527008e-05, "loss": 1.448, "step": 30320 }, { "epoch": 7.5218550437100875, "grad_norm": 1.0164966583251953, "learning_rate": 2.879520706358446e-05, "loss": 1.4866, "step": 30330 }, { "epoch": 7.524335048670097, "grad_norm": 0.995505690574646, "learning_rate": 2.874052111491682e-05, "loss": 1.4713, "step": 30340 }, { "epoch": 7.5268150536301075, "grad_norm": 1.1226674318313599, "learning_rate": 2.8685878427723777e-05, "loss": 1.4977, "step": 30350 }, { "epoch": 7.529295058590117, "grad_norm": 1.1339809894561768, "learning_rate": 2.8631279035178793e-05, "loss": 1.4647, "step": 30360 }, { "epoch": 7.531775063550127, "grad_norm": 1.034926176071167, "learning_rate": 2.8576722970429016e-05, "loss": 1.4658, "step": 30370 }, { "epoch": 7.534255068510137, "grad_norm": 0.9799590706825256, "learning_rate": 2.8522210266595384e-05, "loss": 1.4583, "step": 30380 }, { "epoch": 7.536735073470147, "grad_norm": 1.005009651184082, "learning_rate": 2.846774095677234e-05, "loss": 1.5044, "step": 30390 }, { "epoch": 7.539215078430157, "grad_norm": 0.9981338977813721, "learning_rate": 2.8413315074028158e-05, "loss": 1.4273, "step": 30400 }, { "epoch": 7.541695083390167, "grad_norm": 1.0441522598266602, "learning_rate": 2.835893265140466e-05, "loss": 1.4125, "step": 30410 }, { "epoch": 7.544175088350177, "grad_norm": 1.0379878282546997, "learning_rate": 2.8304593721917285e-05, "loss": 1.4184, "step": 30420 }, { "epoch": 7.546655093310187, "grad_norm": 1.0805764198303223, "learning_rate": 2.8250298318555045e-05, "loss": 1.4633, "step": 30430 }, { "epoch": 7.549135098270197, "grad_norm": 1.0463159084320068, "learning_rate": 2.8196046474280667e-05, "loss": 1.5028, "step": 30440 }, { "epoch": 7.551615103230207, "grad_norm": 0.99134761095047, "learning_rate": 2.814183822203019e-05, "loss": 1.3978, "step": 30450 }, { "epoch": 7.554095108190216, "grad_norm": 0.9535025358200073, "learning_rate": 2.8087673594713438e-05, "loss": 1.4794, "step": 30460 }, { "epoch": 7.556575113150227, "grad_norm": 0.9892309904098511, "learning_rate": 2.803355262521359e-05, "loss": 1.3823, "step": 30470 }, { "epoch": 7.559055118110236, "grad_norm": 1.0789638757705688, "learning_rate": 2.797947534638736e-05, "loss": 1.3913, "step": 30480 }, { "epoch": 7.5615351230702466, "grad_norm": 0.9983831644058228, "learning_rate": 2.7925441791064945e-05, "loss": 1.4396, "step": 30490 }, { "epoch": 7.564015128030256, "grad_norm": 1.0396370887756348, "learning_rate": 2.7871451992050034e-05, "loss": 1.4401, "step": 30500 }, { "epoch": 7.566495132990266, "grad_norm": 1.1130553483963013, "learning_rate": 2.7817505982119706e-05, "loss": 1.4458, "step": 30510 }, { "epoch": 7.568975137950276, "grad_norm": 1.0616374015808105, "learning_rate": 2.776360379402445e-05, "loss": 1.434, "step": 30520 }, { "epoch": 7.571455142910286, "grad_norm": 1.1154457330703735, "learning_rate": 2.7709745460488177e-05, "loss": 1.4303, "step": 30530 }, { "epoch": 7.573935147870296, "grad_norm": 1.104222297668457, "learning_rate": 2.765593101420816e-05, "loss": 1.4674, "step": 30540 }, { "epoch": 7.576415152830306, "grad_norm": 0.9552186727523804, "learning_rate": 2.7602160487855e-05, "loss": 1.4183, "step": 30550 }, { "epoch": 7.578895157790315, "grad_norm": 1.0074481964111328, "learning_rate": 2.7548433914072734e-05, "loss": 1.4802, "step": 30560 }, { "epoch": 7.581375162750326, "grad_norm": 1.0582637786865234, "learning_rate": 2.749475132547862e-05, "loss": 1.4327, "step": 30570 }, { "epoch": 7.583855167710335, "grad_norm": 1.156577706336975, "learning_rate": 2.7441112754663222e-05, "loss": 1.463, "step": 30580 }, { "epoch": 7.586335172670346, "grad_norm": 1.0950535535812378, "learning_rate": 2.738751823419041e-05, "loss": 1.4868, "step": 30590 }, { "epoch": 7.588815177630355, "grad_norm": 1.0288358926773071, "learning_rate": 2.7333967796597315e-05, "loss": 1.4739, "step": 30600 }, { "epoch": 7.591295182590365, "grad_norm": 0.9875478148460388, "learning_rate": 2.7280461474394247e-05, "loss": 1.4421, "step": 30610 }, { "epoch": 7.593775187550375, "grad_norm": 0.9938637018203735, "learning_rate": 2.7226999300064836e-05, "loss": 1.4035, "step": 30620 }, { "epoch": 7.596255192510385, "grad_norm": 0.9095118045806885, "learning_rate": 2.7173581306065833e-05, "loss": 1.4271, "step": 30630 }, { "epoch": 7.598735197470395, "grad_norm": 0.9821234345436096, "learning_rate": 2.7120207524827168e-05, "loss": 1.4309, "step": 30640 }, { "epoch": 7.601215202430405, "grad_norm": 1.0130341053009033, "learning_rate": 2.7066877988751938e-05, "loss": 1.4619, "step": 30650 }, { "epoch": 7.603695207390415, "grad_norm": 1.0122448205947876, "learning_rate": 2.7013592730216465e-05, "loss": 1.4547, "step": 30660 }, { "epoch": 7.606175212350425, "grad_norm": 1.0752753019332886, "learning_rate": 2.6960351781569992e-05, "loss": 1.3997, "step": 30670 }, { "epoch": 7.608655217310434, "grad_norm": 1.0285594463348389, "learning_rate": 2.690715517513508e-05, "loss": 1.4347, "step": 30680 }, { "epoch": 7.611135222270445, "grad_norm": 1.0548043251037598, "learning_rate": 2.6854002943207246e-05, "loss": 1.4606, "step": 30690 }, { "epoch": 7.613615227230454, "grad_norm": 1.1300123929977417, "learning_rate": 2.680089511805508e-05, "loss": 1.4533, "step": 30700 }, { "epoch": 7.616095232190465, "grad_norm": 0.9323368668556213, "learning_rate": 2.6747831731920214e-05, "loss": 1.3743, "step": 30710 }, { "epoch": 7.618575237150474, "grad_norm": 1.109718918800354, "learning_rate": 2.669481281701739e-05, "loss": 1.4525, "step": 30720 }, { "epoch": 7.621055242110485, "grad_norm": 1.0525087118148804, "learning_rate": 2.664183840553417e-05, "loss": 1.4265, "step": 30730 }, { "epoch": 7.623535247070494, "grad_norm": 0.99526447057724, "learning_rate": 2.6588908529631285e-05, "loss": 1.4605, "step": 30740 }, { "epoch": 7.626015252030504, "grad_norm": 1.0765104293823242, "learning_rate": 2.6536023221442318e-05, "loss": 1.5179, "step": 30750 }, { "epoch": 7.628495256990514, "grad_norm": 1.057417631149292, "learning_rate": 2.6483182513073835e-05, "loss": 1.5021, "step": 30760 }, { "epoch": 7.630975261950524, "grad_norm": 1.0523009300231934, "learning_rate": 2.643038643660526e-05, "loss": 1.4259, "step": 30770 }, { "epoch": 7.633455266910534, "grad_norm": 1.0764293670654297, "learning_rate": 2.6377635024089087e-05, "loss": 1.4572, "step": 30780 }, { "epoch": 7.635935271870544, "grad_norm": 0.9474040865898132, "learning_rate": 2.6324928307550466e-05, "loss": 1.4008, "step": 30790 }, { "epoch": 7.638415276830553, "grad_norm": 1.0371317863464355, "learning_rate": 2.6272266318987603e-05, "loss": 1.4276, "step": 30800 }, { "epoch": 7.640895281790564, "grad_norm": 0.966749370098114, "learning_rate": 2.6219649090371466e-05, "loss": 1.4593, "step": 30810 }, { "epoch": 7.643375286750573, "grad_norm": 1.0057498216629028, "learning_rate": 2.616707665364584e-05, "loss": 1.4272, "step": 30820 }, { "epoch": 7.645855291710584, "grad_norm": 1.0172383785247803, "learning_rate": 2.6114549040727332e-05, "loss": 1.4493, "step": 30830 }, { "epoch": 7.648335296670593, "grad_norm": 0.9629692435264587, "learning_rate": 2.6062066283505403e-05, "loss": 1.417, "step": 30840 }, { "epoch": 7.650815301630603, "grad_norm": 1.0054121017456055, "learning_rate": 2.6009628413842123e-05, "loss": 1.4327, "step": 30850 }, { "epoch": 7.653295306590613, "grad_norm": 1.0924439430236816, "learning_rate": 2.5957235463572484e-05, "loss": 1.4825, "step": 30860 }, { "epoch": 7.655775311550623, "grad_norm": 1.0084000825881958, "learning_rate": 2.5904887464504114e-05, "loss": 1.4974, "step": 30870 }, { "epoch": 7.658255316510633, "grad_norm": 1.0829739570617676, "learning_rate": 2.5852584448417328e-05, "loss": 1.3917, "step": 30880 }, { "epoch": 7.660735321470643, "grad_norm": 0.9569641351699829, "learning_rate": 2.580032644706524e-05, "loss": 1.4001, "step": 30890 }, { "epoch": 7.663215326430652, "grad_norm": 1.0015615224838257, "learning_rate": 2.5748113492173543e-05, "loss": 1.4117, "step": 30900 }, { "epoch": 7.665695331390663, "grad_norm": 0.9971137046813965, "learning_rate": 2.5695945615440596e-05, "loss": 1.435, "step": 30910 }, { "epoch": 7.668175336350672, "grad_norm": 1.0712945461273193, "learning_rate": 2.564382284853738e-05, "loss": 1.4487, "step": 30920 }, { "epoch": 7.670655341310683, "grad_norm": 1.0099509954452515, "learning_rate": 2.559174522310761e-05, "loss": 1.4838, "step": 30930 }, { "epoch": 7.673135346270692, "grad_norm": 1.0410501956939697, "learning_rate": 2.5539712770767376e-05, "loss": 1.3944, "step": 30940 }, { "epoch": 7.675615351230703, "grad_norm": 1.0309990644454956, "learning_rate": 2.5487725523105544e-05, "loss": 1.3999, "step": 30950 }, { "epoch": 7.678095356190712, "grad_norm": 1.100633144378662, "learning_rate": 2.5435783511683443e-05, "loss": 1.4263, "step": 30960 }, { "epoch": 7.680575361150722, "grad_norm": 1.046830415725708, "learning_rate": 2.538388676803496e-05, "loss": 1.4382, "step": 30970 }, { "epoch": 7.683055366110732, "grad_norm": 1.0287320613861084, "learning_rate": 2.5332035323666447e-05, "loss": 1.4257, "step": 30980 }, { "epoch": 7.685535371070742, "grad_norm": 1.0454462766647339, "learning_rate": 2.5280229210056895e-05, "loss": 1.4422, "step": 30990 }, { "epoch": 7.688015376030752, "grad_norm": 1.068619728088379, "learning_rate": 2.5228468458657584e-05, "loss": 1.4173, "step": 31000 }, { "epoch": 7.690495380990762, "grad_norm": 1.054749608039856, "learning_rate": 2.5176753100892426e-05, "loss": 1.4263, "step": 31010 }, { "epoch": 7.692975385950772, "grad_norm": 1.009238839149475, "learning_rate": 2.512508316815767e-05, "loss": 1.4302, "step": 31020 }, { "epoch": 7.695455390910782, "grad_norm": 1.03357994556427, "learning_rate": 2.507345869182203e-05, "loss": 1.38, "step": 31030 }, { "epoch": 7.6979353958707915, "grad_norm": 1.0613250732421875, "learning_rate": 2.502187970322657e-05, "loss": 1.38, "step": 31040 }, { "epoch": 7.700415400830802, "grad_norm": 1.0838817358016968, "learning_rate": 2.497034623368486e-05, "loss": 1.4417, "step": 31050 }, { "epoch": 7.7028954057908114, "grad_norm": 1.040667176246643, "learning_rate": 2.4918858314482718e-05, "loss": 1.4634, "step": 31060 }, { "epoch": 7.705375410750822, "grad_norm": 1.1136744022369385, "learning_rate": 2.4867415976878337e-05, "loss": 1.4517, "step": 31070 }, { "epoch": 7.707855415710831, "grad_norm": 1.0463638305664062, "learning_rate": 2.4816019252102273e-05, "loss": 1.4042, "step": 31080 }, { "epoch": 7.710335420670841, "grad_norm": 0.9802637696266174, "learning_rate": 2.4764668171357343e-05, "loss": 1.3391, "step": 31090 }, { "epoch": 7.712815425630851, "grad_norm": 1.0218875408172607, "learning_rate": 2.4713362765818658e-05, "loss": 1.5236, "step": 31100 }, { "epoch": 7.715295430590861, "grad_norm": 1.0065280199050903, "learning_rate": 2.466210306663368e-05, "loss": 1.4363, "step": 31110 }, { "epoch": 7.717775435550871, "grad_norm": 1.0505353212356567, "learning_rate": 2.461088910492202e-05, "loss": 1.4227, "step": 31120 }, { "epoch": 7.720255440510881, "grad_norm": 1.0982780456542969, "learning_rate": 2.4559720911775564e-05, "loss": 1.4404, "step": 31130 }, { "epoch": 7.7227354454708905, "grad_norm": 1.084692358970642, "learning_rate": 2.450859851825842e-05, "loss": 1.4475, "step": 31140 }, { "epoch": 7.725215450430901, "grad_norm": 0.9937741160392761, "learning_rate": 2.445752195540687e-05, "loss": 1.4108, "step": 31150 }, { "epoch": 7.7276954553909105, "grad_norm": 1.0374422073364258, "learning_rate": 2.440649125422937e-05, "loss": 1.4338, "step": 31160 }, { "epoch": 7.730175460350921, "grad_norm": 1.0560063123703003, "learning_rate": 2.43555064457066e-05, "loss": 1.4561, "step": 31170 }, { "epoch": 7.7326554653109305, "grad_norm": 1.0604078769683838, "learning_rate": 2.4304567560791293e-05, "loss": 1.4373, "step": 31180 }, { "epoch": 7.73513547027094, "grad_norm": 1.0357143878936768, "learning_rate": 2.425367463040834e-05, "loss": 1.4736, "step": 31190 }, { "epoch": 7.7376154752309505, "grad_norm": 1.0199404954910278, "learning_rate": 2.420282768545469e-05, "loss": 1.4684, "step": 31200 }, { "epoch": 7.74009548019096, "grad_norm": 1.1008087396621704, "learning_rate": 2.4152026756799506e-05, "loss": 1.4925, "step": 31210 }, { "epoch": 7.7425754851509705, "grad_norm": 0.9964824318885803, "learning_rate": 2.4101271875283817e-05, "loss": 1.4495, "step": 31220 }, { "epoch": 7.74505549011098, "grad_norm": 1.0641846656799316, "learning_rate": 2.4050563071720867e-05, "loss": 1.4402, "step": 31230 }, { "epoch": 7.7475354950709905, "grad_norm": 1.1189098358154297, "learning_rate": 2.3999900376895845e-05, "loss": 1.4009, "step": 31240 }, { "epoch": 7.750015500031, "grad_norm": 1.0134433507919312, "learning_rate": 2.394928382156596e-05, "loss": 1.4331, "step": 31250 }, { "epoch": 7.7524955049910105, "grad_norm": 1.0753346681594849, "learning_rate": 2.3898713436460375e-05, "loss": 1.4288, "step": 31260 }, { "epoch": 7.75497550995102, "grad_norm": 1.0120718479156494, "learning_rate": 2.384818925228036e-05, "loss": 1.5383, "step": 31270 }, { "epoch": 7.75745551491103, "grad_norm": 1.036631464958191, "learning_rate": 2.3797711299698923e-05, "loss": 1.4406, "step": 31280 }, { "epoch": 7.75993551987104, "grad_norm": 1.0271037817001343, "learning_rate": 2.3747279609361196e-05, "loss": 1.5359, "step": 31290 }, { "epoch": 7.76241552483105, "grad_norm": 1.0515282154083252, "learning_rate": 2.3696894211884123e-05, "loss": 1.4693, "step": 31300 }, { "epoch": 7.76489552979106, "grad_norm": 1.0361008644104004, "learning_rate": 2.3646555137856576e-05, "loss": 1.4372, "step": 31310 }, { "epoch": 7.76737553475107, "grad_norm": 1.026922583580017, "learning_rate": 2.3596262417839255e-05, "loss": 1.432, "step": 31320 }, { "epoch": 7.769855539711079, "grad_norm": 1.0789787769317627, "learning_rate": 2.3546016082364852e-05, "loss": 1.388, "step": 31330 }, { "epoch": 7.77233554467109, "grad_norm": 0.9373264908790588, "learning_rate": 2.34958161619377e-05, "loss": 1.3714, "step": 31340 }, { "epoch": 7.774815549631099, "grad_norm": 0.9983879327774048, "learning_rate": 2.3445662687034144e-05, "loss": 1.4818, "step": 31350 }, { "epoch": 7.7772955545911095, "grad_norm": 1.1489768028259277, "learning_rate": 2.339555568810221e-05, "loss": 1.4882, "step": 31360 }, { "epoch": 7.779775559551119, "grad_norm": 1.3525007963180542, "learning_rate": 2.3345495195561762e-05, "loss": 1.5085, "step": 31370 }, { "epoch": 7.782255564511129, "grad_norm": 0.9903725981712341, "learning_rate": 2.3295481239804384e-05, "loss": 1.4513, "step": 31380 }, { "epoch": 7.784735569471139, "grad_norm": 0.9812037944793701, "learning_rate": 2.3245513851193534e-05, "loss": 1.4733, "step": 31390 }, { "epoch": 7.787215574431149, "grad_norm": 1.1686617136001587, "learning_rate": 2.319559306006417e-05, "loss": 1.4414, "step": 31400 }, { "epoch": 7.789695579391159, "grad_norm": 1.058807373046875, "learning_rate": 2.31457188967232e-05, "loss": 1.4858, "step": 31410 }, { "epoch": 7.792175584351169, "grad_norm": 1.070768117904663, "learning_rate": 2.3095891391449086e-05, "loss": 1.4402, "step": 31420 }, { "epoch": 7.794655589311178, "grad_norm": 1.0862927436828613, "learning_rate": 2.3046110574491985e-05, "loss": 1.4959, "step": 31430 }, { "epoch": 7.797135594271189, "grad_norm": 0.9778184294700623, "learning_rate": 2.2996376476073723e-05, "loss": 1.3462, "step": 31440 }, { "epoch": 7.799615599231198, "grad_norm": 1.089483618736267, "learning_rate": 2.294668912638781e-05, "loss": 1.482, "step": 31450 }, { "epoch": 7.802095604191209, "grad_norm": 1.0856155157089233, "learning_rate": 2.2897048555599253e-05, "loss": 1.4108, "step": 31460 }, { "epoch": 7.804575609151218, "grad_norm": 1.0545732975006104, "learning_rate": 2.2847454793844793e-05, "loss": 1.3956, "step": 31470 }, { "epoch": 7.807055614111228, "grad_norm": 1.056648850440979, "learning_rate": 2.279790787123267e-05, "loss": 1.4274, "step": 31480 }, { "epoch": 7.809535619071238, "grad_norm": 1.0907402038574219, "learning_rate": 2.274840781784273e-05, "loss": 1.4366, "step": 31490 }, { "epoch": 7.812015624031248, "grad_norm": 1.0658317804336548, "learning_rate": 2.26989546637263e-05, "loss": 1.456, "step": 31500 }, { "epoch": 7.814495628991258, "grad_norm": 0.9488449096679688, "learning_rate": 2.2649548438906353e-05, "loss": 1.4676, "step": 31510 }, { "epoch": 7.816975633951268, "grad_norm": 1.0413172245025635, "learning_rate": 2.260018917337726e-05, "loss": 1.506, "step": 31520 }, { "epoch": 7.819455638911278, "grad_norm": 1.0351229906082153, "learning_rate": 2.2550876897104913e-05, "loss": 1.4968, "step": 31530 }, { "epoch": 7.821935643871288, "grad_norm": 0.9866580963134766, "learning_rate": 2.2501611640026743e-05, "loss": 1.3963, "step": 31540 }, { "epoch": 7.824415648831298, "grad_norm": 1.0516616106033325, "learning_rate": 2.24523934320515e-05, "loss": 1.4826, "step": 31550 }, { "epoch": 7.826895653791308, "grad_norm": 0.9789044857025146, "learning_rate": 2.240322230305951e-05, "loss": 1.4426, "step": 31560 }, { "epoch": 7.829375658751317, "grad_norm": 1.0472681522369385, "learning_rate": 2.2354098282902446e-05, "loss": 1.4116, "step": 31570 }, { "epoch": 7.831855663711328, "grad_norm": 1.0835953950881958, "learning_rate": 2.2305021401403382e-05, "loss": 1.4867, "step": 31580 }, { "epoch": 7.834335668671337, "grad_norm": 1.1364092826843262, "learning_rate": 2.225599168835677e-05, "loss": 1.4628, "step": 31590 }, { "epoch": 7.836815673631348, "grad_norm": 0.95635586977005, "learning_rate": 2.2207009173528527e-05, "loss": 1.4281, "step": 31600 }, { "epoch": 7.839295678591357, "grad_norm": 1.0518940687179565, "learning_rate": 2.2158073886655705e-05, "loss": 1.4702, "step": 31610 }, { "epoch": 7.841775683551367, "grad_norm": 1.1285464763641357, "learning_rate": 2.2109185857446903e-05, "loss": 1.4512, "step": 31620 }, { "epoch": 7.844255688511377, "grad_norm": 1.1680338382720947, "learning_rate": 2.2060345115581914e-05, "loss": 1.4784, "step": 31630 }, { "epoch": 7.846735693471387, "grad_norm": 1.005530834197998, "learning_rate": 2.201155169071184e-05, "loss": 1.414, "step": 31640 }, { "epoch": 7.849215698431397, "grad_norm": 1.0277854204177856, "learning_rate": 2.1962805612459024e-05, "loss": 1.4305, "step": 31650 }, { "epoch": 7.851695703391407, "grad_norm": 0.9508567452430725, "learning_rate": 2.1914106910417186e-05, "loss": 1.4075, "step": 31660 }, { "epoch": 7.854175708351416, "grad_norm": 0.9974478483200073, "learning_rate": 2.1865455614151155e-05, "loss": 1.4251, "step": 31670 }, { "epoch": 7.856655713311427, "grad_norm": 1.0750144720077515, "learning_rate": 2.181685175319702e-05, "loss": 1.3923, "step": 31680 }, { "epoch": 7.859135718271436, "grad_norm": 0.9576234817504883, "learning_rate": 2.1768295357062107e-05, "loss": 1.448, "step": 31690 }, { "epoch": 7.861615723231447, "grad_norm": 1.0810132026672363, "learning_rate": 2.1719786455224878e-05, "loss": 1.4417, "step": 31700 }, { "epoch": 7.864095728191456, "grad_norm": 1.040737271308899, "learning_rate": 2.1671325077134963e-05, "loss": 1.425, "step": 31710 }, { "epoch": 7.866575733151466, "grad_norm": 1.0674554109573364, "learning_rate": 2.1622911252213197e-05, "loss": 1.4129, "step": 31720 }, { "epoch": 7.869055738111476, "grad_norm": 1.0067627429962158, "learning_rate": 2.15745450098515e-05, "loss": 1.418, "step": 31730 }, { "epoch": 7.871535743071486, "grad_norm": 1.0110206604003906, "learning_rate": 2.1526226379412906e-05, "loss": 1.4551, "step": 31740 }, { "epoch": 7.874015748031496, "grad_norm": 1.0148226022720337, "learning_rate": 2.147795539023153e-05, "loss": 1.4012, "step": 31750 }, { "epoch": 7.876495752991506, "grad_norm": 1.0703665018081665, "learning_rate": 2.142973207161265e-05, "loss": 1.3901, "step": 31760 }, { "epoch": 7.878975757951516, "grad_norm": 1.0552300214767456, "learning_rate": 2.138155645283244e-05, "loss": 1.481, "step": 31770 }, { "epoch": 7.881455762911526, "grad_norm": 1.0581234693527222, "learning_rate": 2.1333428563138303e-05, "loss": 1.3928, "step": 31780 }, { "epoch": 7.883935767871535, "grad_norm": 1.0785514116287231, "learning_rate": 2.128534843174853e-05, "loss": 1.4399, "step": 31790 }, { "epoch": 7.886415772831546, "grad_norm": 1.0023902654647827, "learning_rate": 2.1237316087852466e-05, "loss": 1.4149, "step": 31800 }, { "epoch": 7.888895777791555, "grad_norm": 1.0342328548431396, "learning_rate": 2.1189331560610435e-05, "loss": 1.4297, "step": 31810 }, { "epoch": 7.891375782751566, "grad_norm": 1.1631563901901245, "learning_rate": 2.114139487915381e-05, "loss": 1.5044, "step": 31820 }, { "epoch": 7.893855787711575, "grad_norm": 0.9945201277732849, "learning_rate": 2.1093506072584724e-05, "loss": 1.4679, "step": 31830 }, { "epoch": 7.896335792671586, "grad_norm": 1.0690093040466309, "learning_rate": 2.1045665169976468e-05, "loss": 1.4696, "step": 31840 }, { "epoch": 7.898815797631595, "grad_norm": 1.1721092462539673, "learning_rate": 2.0997872200373116e-05, "loss": 1.4174, "step": 31850 }, { "epoch": 7.901295802591605, "grad_norm": 0.9855526685714722, "learning_rate": 2.095012719278966e-05, "loss": 1.4592, "step": 31860 }, { "epoch": 7.903775807551615, "grad_norm": 1.0813654661178589, "learning_rate": 2.0902430176211995e-05, "loss": 1.4446, "step": 31870 }, { "epoch": 7.906255812511625, "grad_norm": 1.001652479171753, "learning_rate": 2.0854781179596938e-05, "loss": 1.3965, "step": 31880 }, { "epoch": 7.908735817471635, "grad_norm": 1.0926847457885742, "learning_rate": 2.080718023187198e-05, "loss": 1.3916, "step": 31890 }, { "epoch": 7.911215822431645, "grad_norm": 0.972129762172699, "learning_rate": 2.075962736193564e-05, "loss": 1.4492, "step": 31900 }, { "epoch": 7.9136958273916544, "grad_norm": 1.0363141298294067, "learning_rate": 2.0712122598657135e-05, "loss": 1.5137, "step": 31910 }, { "epoch": 7.916175832351665, "grad_norm": 1.038306713104248, "learning_rate": 2.0664665970876496e-05, "loss": 1.4088, "step": 31920 }, { "epoch": 7.918655837311674, "grad_norm": 1.120137333869934, "learning_rate": 2.0617257507404532e-05, "loss": 1.4371, "step": 31930 }, { "epoch": 7.921135842271685, "grad_norm": 1.0201654434204102, "learning_rate": 2.056989723702287e-05, "loss": 1.4596, "step": 31940 }, { "epoch": 7.923615847231694, "grad_norm": 1.0573019981384277, "learning_rate": 2.0522585188483745e-05, "loss": 1.4672, "step": 31950 }, { "epoch": 7.926095852191704, "grad_norm": 1.104080080986023, "learning_rate": 2.047532139051026e-05, "loss": 1.4372, "step": 31960 }, { "epoch": 7.928575857151714, "grad_norm": 0.9933714866638184, "learning_rate": 2.0428105871796154e-05, "loss": 1.4312, "step": 31970 }, { "epoch": 7.931055862111724, "grad_norm": 1.007702350616455, "learning_rate": 2.0380938661005854e-05, "loss": 1.4008, "step": 31980 }, { "epoch": 7.933535867071734, "grad_norm": 1.020708680152893, "learning_rate": 2.0333819786774444e-05, "loss": 1.4521, "step": 31990 }, { "epoch": 7.936015872031744, "grad_norm": 1.0443121194839478, "learning_rate": 2.0286749277707782e-05, "loss": 1.4368, "step": 32000 }, { "epoch": 7.9384958769917535, "grad_norm": 1.0342239141464233, "learning_rate": 2.0239727162382148e-05, "loss": 1.4525, "step": 32010 }, { "epoch": 7.940975881951764, "grad_norm": 1.0420253276824951, "learning_rate": 2.019275346934466e-05, "loss": 1.4377, "step": 32020 }, { "epoch": 7.9434558869117735, "grad_norm": 0.9696030616760254, "learning_rate": 2.0145828227112917e-05, "loss": 1.4235, "step": 32030 }, { "epoch": 7.945935891871784, "grad_norm": 1.0450141429901123, "learning_rate": 2.009895146417512e-05, "loss": 1.4557, "step": 32040 }, { "epoch": 7.9484158968317935, "grad_norm": 1.0528062582015991, "learning_rate": 2.0052123208990025e-05, "loss": 1.4333, "step": 32050 }, { "epoch": 7.950895901791804, "grad_norm": 1.0280351638793945, "learning_rate": 2.000534348998704e-05, "loss": 1.4952, "step": 32060 }, { "epoch": 7.9533759067518135, "grad_norm": 1.0444345474243164, "learning_rate": 1.9958612335565975e-05, "loss": 1.424, "step": 32070 }, { "epoch": 7.955855911711823, "grad_norm": 1.0241684913635254, "learning_rate": 1.9911929774097215e-05, "loss": 1.4075, "step": 32080 }, { "epoch": 7.9583359166718335, "grad_norm": 1.0345664024353027, "learning_rate": 1.986529583392167e-05, "loss": 1.4826, "step": 32090 }, { "epoch": 7.960815921631843, "grad_norm": 0.9985606074333191, "learning_rate": 1.981871054335067e-05, "loss": 1.4267, "step": 32100 }, { "epoch": 7.9632959265918535, "grad_norm": 1.0109633207321167, "learning_rate": 1.977217393066604e-05, "loss": 1.3878, "step": 32110 }, { "epoch": 7.965775931551863, "grad_norm": 1.0159655809402466, "learning_rate": 1.9725686024120095e-05, "loss": 1.4437, "step": 32120 }, { "epoch": 7.9682559365118735, "grad_norm": 1.05583918094635, "learning_rate": 1.967924685193552e-05, "loss": 1.3912, "step": 32130 }, { "epoch": 7.970735941471883, "grad_norm": 1.030286431312561, "learning_rate": 1.9632856442305415e-05, "loss": 1.4066, "step": 32140 }, { "epoch": 7.973215946431893, "grad_norm": 1.0742601156234741, "learning_rate": 1.9586514823393363e-05, "loss": 1.4134, "step": 32150 }, { "epoch": 7.975695951391903, "grad_norm": 0.9984321594238281, "learning_rate": 1.9540222023333166e-05, "loss": 1.5042, "step": 32160 }, { "epoch": 7.978175956351913, "grad_norm": 1.0197498798370361, "learning_rate": 1.949397807022916e-05, "loss": 1.4358, "step": 32170 }, { "epoch": 7.980655961311923, "grad_norm": 1.0034178495407104, "learning_rate": 1.9447782992155916e-05, "loss": 1.4705, "step": 32180 }, { "epoch": 7.983135966271933, "grad_norm": 1.04500150680542, "learning_rate": 1.9401636817158365e-05, "loss": 1.3831, "step": 32190 }, { "epoch": 7.985615971231942, "grad_norm": 1.114035725593567, "learning_rate": 1.9355539573251734e-05, "loss": 1.4582, "step": 32200 }, { "epoch": 7.9880959761919526, "grad_norm": 1.0428582429885864, "learning_rate": 1.93094912884216e-05, "loss": 1.42, "step": 32210 }, { "epoch": 7.990575981151962, "grad_norm": 1.0457404851913452, "learning_rate": 1.926349199062376e-05, "loss": 1.468, "step": 32220 }, { "epoch": 7.9930559861119725, "grad_norm": 1.023844599723816, "learning_rate": 1.921754170778428e-05, "loss": 1.4377, "step": 32230 }, { "epoch": 7.995535991071982, "grad_norm": 1.0685179233551025, "learning_rate": 1.917164046779948e-05, "loss": 1.4771, "step": 32240 }, { "epoch": 7.998015996031992, "grad_norm": 1.034908652305603, "learning_rate": 1.9125788298535908e-05, "loss": 1.4559, "step": 32250 }, { "epoch": 8.000496000992001, "grad_norm": 1.0679064989089966, "learning_rate": 1.9079985227830276e-05, "loss": 1.4761, "step": 32260 }, { "epoch": 8.002976005952013, "grad_norm": 1.0527770519256592, "learning_rate": 1.903423128348959e-05, "loss": 1.3527, "step": 32270 }, { "epoch": 8.005456010912022, "grad_norm": 0.9856534600257874, "learning_rate": 1.898852649329095e-05, "loss": 1.411, "step": 32280 }, { "epoch": 8.007936015872032, "grad_norm": 1.0472939014434814, "learning_rate": 1.894287088498162e-05, "loss": 1.3859, "step": 32290 }, { "epoch": 8.010416020832041, "grad_norm": 1.021368145942688, "learning_rate": 1.889726448627904e-05, "loss": 1.4096, "step": 32300 }, { "epoch": 8.01289602579205, "grad_norm": 1.0529119968414307, "learning_rate": 1.885170732487074e-05, "loss": 1.3803, "step": 32310 }, { "epoch": 8.015376030752062, "grad_norm": 1.087838053703308, "learning_rate": 1.880619942841435e-05, "loss": 1.4731, "step": 32320 }, { "epoch": 8.017856035712072, "grad_norm": 0.9879887700080872, "learning_rate": 1.876074082453766e-05, "loss": 1.3251, "step": 32330 }, { "epoch": 8.020336040672081, "grad_norm": 1.0613797903060913, "learning_rate": 1.8715331540838487e-05, "loss": 1.4409, "step": 32340 }, { "epoch": 8.02281604563209, "grad_norm": 1.0037634372711182, "learning_rate": 1.8669971604884683e-05, "loss": 1.4334, "step": 32350 }, { "epoch": 8.025296050592102, "grad_norm": 1.1097238063812256, "learning_rate": 1.8624661044214152e-05, "loss": 1.3923, "step": 32360 }, { "epoch": 8.027776055552112, "grad_norm": 1.0040903091430664, "learning_rate": 1.8579399886334914e-05, "loss": 1.3527, "step": 32370 }, { "epoch": 8.030256060512121, "grad_norm": 0.9767634272575378, "learning_rate": 1.853418815872482e-05, "loss": 1.3936, "step": 32380 }, { "epoch": 8.03273606547213, "grad_norm": 0.9867961406707764, "learning_rate": 1.848902588883188e-05, "loss": 1.4003, "step": 32390 }, { "epoch": 8.03521607043214, "grad_norm": 1.016300082206726, "learning_rate": 1.8443913104073983e-05, "loss": 1.3895, "step": 32400 }, { "epoch": 8.037696075392152, "grad_norm": 1.042302131652832, "learning_rate": 1.8398849831839014e-05, "loss": 1.3428, "step": 32410 }, { "epoch": 8.040176080352161, "grad_norm": 0.99931401014328, "learning_rate": 1.8353836099484767e-05, "loss": 1.407, "step": 32420 }, { "epoch": 8.04265608531217, "grad_norm": 1.0488470792770386, "learning_rate": 1.8308871934339033e-05, "loss": 1.4504, "step": 32430 }, { "epoch": 8.04513609027218, "grad_norm": 1.020246982574463, "learning_rate": 1.826395736369937e-05, "loss": 1.4135, "step": 32440 }, { "epoch": 8.04761609523219, "grad_norm": 1.080391526222229, "learning_rate": 1.8219092414833394e-05, "loss": 1.4268, "step": 32450 }, { "epoch": 8.050096100192201, "grad_norm": 0.9757678508758545, "learning_rate": 1.81742771149785e-05, "loss": 1.433, "step": 32460 }, { "epoch": 8.05257610515221, "grad_norm": 1.076442003250122, "learning_rate": 1.8129511491341946e-05, "loss": 1.4138, "step": 32470 }, { "epoch": 8.05505611011222, "grad_norm": 1.091922402381897, "learning_rate": 1.808479557110081e-05, "loss": 1.4215, "step": 32480 }, { "epoch": 8.05753611507223, "grad_norm": 1.0268551111221313, "learning_rate": 1.8040129381402137e-05, "loss": 1.4107, "step": 32490 }, { "epoch": 8.06001612003224, "grad_norm": 1.0977098941802979, "learning_rate": 1.7995512949362547e-05, "loss": 1.4162, "step": 32500 }, { "epoch": 8.06249612499225, "grad_norm": 1.0292160511016846, "learning_rate": 1.795094630206866e-05, "loss": 1.3961, "step": 32510 }, { "epoch": 8.06497612995226, "grad_norm": 1.0747003555297852, "learning_rate": 1.7906429466576767e-05, "loss": 1.3931, "step": 32520 }, { "epoch": 8.06745613491227, "grad_norm": 0.9828006029129028, "learning_rate": 1.7861962469912953e-05, "loss": 1.3724, "step": 32530 }, { "epoch": 8.06993613987228, "grad_norm": 1.0837029218673706, "learning_rate": 1.7817545339072994e-05, "loss": 1.3638, "step": 32540 }, { "epoch": 8.072416144832289, "grad_norm": 1.0823603868484497, "learning_rate": 1.7773178101022514e-05, "loss": 1.44, "step": 32550 }, { "epoch": 8.0748961497923, "grad_norm": 1.0632768869400024, "learning_rate": 1.7728860782696664e-05, "loss": 1.4125, "step": 32560 }, { "epoch": 8.07737615475231, "grad_norm": 1.0829638242721558, "learning_rate": 1.7684593411000472e-05, "loss": 1.4523, "step": 32570 }, { "epoch": 8.07985615971232, "grad_norm": 1.0273957252502441, "learning_rate": 1.7640376012808536e-05, "loss": 1.4002, "step": 32580 }, { "epoch": 8.082336164672329, "grad_norm": 0.9694962501525879, "learning_rate": 1.759620861496515e-05, "loss": 1.44, "step": 32590 }, { "epoch": 8.084816169632338, "grad_norm": 0.9955787062644958, "learning_rate": 1.7552091244284197e-05, "loss": 1.3595, "step": 32600 }, { "epoch": 8.08729617459235, "grad_norm": 1.0302938222885132, "learning_rate": 1.7508023927549333e-05, "loss": 1.4095, "step": 32610 }, { "epoch": 8.08977617955236, "grad_norm": 1.0442672967910767, "learning_rate": 1.7464006691513623e-05, "loss": 1.4573, "step": 32620 }, { "epoch": 8.092256184512369, "grad_norm": 1.0897337198257446, "learning_rate": 1.7420039562899915e-05, "loss": 1.4222, "step": 32630 }, { "epoch": 8.094736189472378, "grad_norm": 1.081861138343811, "learning_rate": 1.7376122568400532e-05, "loss": 1.4007, "step": 32640 }, { "epoch": 8.09721619443239, "grad_norm": 0.9963325262069702, "learning_rate": 1.733225573467737e-05, "loss": 1.3782, "step": 32650 }, { "epoch": 8.0996961993924, "grad_norm": 1.0090794563293457, "learning_rate": 1.7288439088361886e-05, "loss": 1.4332, "step": 32660 }, { "epoch": 8.102176204352409, "grad_norm": 0.9832302927970886, "learning_rate": 1.7244672656055106e-05, "loss": 1.4046, "step": 32670 }, { "epoch": 8.104656209312418, "grad_norm": 1.0199371576309204, "learning_rate": 1.720095646432751e-05, "loss": 1.3729, "step": 32680 }, { "epoch": 8.107136214272428, "grad_norm": 1.0891382694244385, "learning_rate": 1.7157290539719106e-05, "loss": 1.4103, "step": 32690 }, { "epoch": 8.10961621923244, "grad_norm": 1.03241765499115, "learning_rate": 1.7113674908739396e-05, "loss": 1.4102, "step": 32700 }, { "epoch": 8.112096224192449, "grad_norm": 0.988692581653595, "learning_rate": 1.7070109597867312e-05, "loss": 1.4142, "step": 32710 }, { "epoch": 8.114576229152458, "grad_norm": 0.9958691596984863, "learning_rate": 1.702659463355125e-05, "loss": 1.374, "step": 32720 }, { "epoch": 8.117056234112468, "grad_norm": 1.058645248413086, "learning_rate": 1.6983130042209094e-05, "loss": 1.3947, "step": 32730 }, { "epoch": 8.119536239072477, "grad_norm": 1.0806529521942139, "learning_rate": 1.693971585022808e-05, "loss": 1.3713, "step": 32740 }, { "epoch": 8.122016244032489, "grad_norm": 1.0624850988388062, "learning_rate": 1.689635208396486e-05, "loss": 1.4297, "step": 32750 }, { "epoch": 8.124496248992498, "grad_norm": 1.0744974613189697, "learning_rate": 1.6853038769745467e-05, "loss": 1.3951, "step": 32760 }, { "epoch": 8.126976253952508, "grad_norm": 1.0920403003692627, "learning_rate": 1.680977593386538e-05, "loss": 1.3603, "step": 32770 }, { "epoch": 8.129456258912517, "grad_norm": 1.0960181951522827, "learning_rate": 1.6766563602589324e-05, "loss": 1.4094, "step": 32780 }, { "epoch": 8.131936263872527, "grad_norm": 1.077284336090088, "learning_rate": 1.672340180215143e-05, "loss": 1.3769, "step": 32790 }, { "epoch": 8.134416268832538, "grad_norm": 1.00383460521698, "learning_rate": 1.668029055875512e-05, "loss": 1.3663, "step": 32800 }, { "epoch": 8.136896273792548, "grad_norm": 1.0465641021728516, "learning_rate": 1.6637229898573113e-05, "loss": 1.3224, "step": 32810 }, { "epoch": 8.139376278752557, "grad_norm": 1.0937128067016602, "learning_rate": 1.6594219847747504e-05, "loss": 1.3983, "step": 32820 }, { "epoch": 8.141856283712567, "grad_norm": 1.0818277597427368, "learning_rate": 1.655126043238957e-05, "loss": 1.4798, "step": 32830 }, { "epoch": 8.144336288672577, "grad_norm": 1.0414772033691406, "learning_rate": 1.6508351678579882e-05, "loss": 1.3921, "step": 32840 }, { "epoch": 8.146816293632588, "grad_norm": 1.0060216188430786, "learning_rate": 1.6465493612368233e-05, "loss": 1.4231, "step": 32850 }, { "epoch": 8.149296298592597, "grad_norm": 1.0131288766860962, "learning_rate": 1.6422686259773678e-05, "loss": 1.3954, "step": 32860 }, { "epoch": 8.151776303552607, "grad_norm": 1.0088540315628052, "learning_rate": 1.6379929646784443e-05, "loss": 1.4203, "step": 32870 }, { "epoch": 8.154256308512617, "grad_norm": 1.0268006324768066, "learning_rate": 1.6337223799358026e-05, "loss": 1.4408, "step": 32880 }, { "epoch": 8.156736313472628, "grad_norm": 1.0829941034317017, "learning_rate": 1.6294568743421025e-05, "loss": 1.4245, "step": 32890 }, { "epoch": 8.159216318432637, "grad_norm": 1.097237229347229, "learning_rate": 1.6251964504869222e-05, "loss": 1.4472, "step": 32900 }, { "epoch": 8.161696323392647, "grad_norm": 1.127237319946289, "learning_rate": 1.620941110956754e-05, "loss": 1.4938, "step": 32910 }, { "epoch": 8.164176328352656, "grad_norm": 1.1016814708709717, "learning_rate": 1.616690858335014e-05, "loss": 1.3906, "step": 32920 }, { "epoch": 8.166656333312666, "grad_norm": 1.079646348953247, "learning_rate": 1.6124456952020093e-05, "loss": 1.4228, "step": 32930 }, { "epoch": 8.169136338272677, "grad_norm": 1.043107032775879, "learning_rate": 1.6082056241349786e-05, "loss": 1.34, "step": 32940 }, { "epoch": 8.171616343232687, "grad_norm": 1.044143795967102, "learning_rate": 1.6039706477080562e-05, "loss": 1.3492, "step": 32950 }, { "epoch": 8.174096348192696, "grad_norm": 1.0955991744995117, "learning_rate": 1.5997407684922862e-05, "loss": 1.4136, "step": 32960 }, { "epoch": 8.176576353152706, "grad_norm": 0.9900245666503906, "learning_rate": 1.595515989055618e-05, "loss": 1.3789, "step": 32970 }, { "epoch": 8.179056358112716, "grad_norm": 0.9846022725105286, "learning_rate": 1.5912963119629144e-05, "loss": 1.3932, "step": 32980 }, { "epoch": 8.181536363072727, "grad_norm": 1.0475287437438965, "learning_rate": 1.5870817397759198e-05, "loss": 1.3597, "step": 32990 }, { "epoch": 8.184016368032736, "grad_norm": 1.1756962537765503, "learning_rate": 1.5828722750533008e-05, "loss": 1.4158, "step": 33000 }, { "epoch": 8.186496372992746, "grad_norm": 1.143308401107788, "learning_rate": 1.5786679203506105e-05, "loss": 1.3885, "step": 33010 }, { "epoch": 8.188976377952756, "grad_norm": 1.0628608465194702, "learning_rate": 1.5744686782203055e-05, "loss": 1.4024, "step": 33020 }, { "epoch": 8.191456382912765, "grad_norm": 1.023871660232544, "learning_rate": 1.5702745512117324e-05, "loss": 1.4226, "step": 33030 }, { "epoch": 8.193936387872776, "grad_norm": 1.109716534614563, "learning_rate": 1.566085541871145e-05, "loss": 1.4659, "step": 33040 }, { "epoch": 8.196416392832786, "grad_norm": 1.1114068031311035, "learning_rate": 1.5619016527416707e-05, "loss": 1.3765, "step": 33050 }, { "epoch": 8.198896397792796, "grad_norm": 1.0773273706436157, "learning_rate": 1.5577228863633486e-05, "loss": 1.3958, "step": 33060 }, { "epoch": 8.201376402752805, "grad_norm": 0.9811004996299744, "learning_rate": 1.5535492452730947e-05, "loss": 1.4084, "step": 33070 }, { "epoch": 8.203856407712815, "grad_norm": 1.0447224378585815, "learning_rate": 1.549380732004718e-05, "loss": 1.4256, "step": 33080 }, { "epoch": 8.206336412672826, "grad_norm": 1.0115346908569336, "learning_rate": 1.545217349088911e-05, "loss": 1.4162, "step": 33090 }, { "epoch": 8.208816417632836, "grad_norm": 1.0720356702804565, "learning_rate": 1.5410590990532625e-05, "loss": 1.484, "step": 33100 }, { "epoch": 8.211296422592845, "grad_norm": 1.0902684926986694, "learning_rate": 1.5369059844222278e-05, "loss": 1.3935, "step": 33110 }, { "epoch": 8.213776427552855, "grad_norm": 1.0302410125732422, "learning_rate": 1.5327580077171587e-05, "loss": 1.3723, "step": 33120 }, { "epoch": 8.216256432512864, "grad_norm": 1.01740562915802, "learning_rate": 1.5286151714562824e-05, "loss": 1.472, "step": 33130 }, { "epoch": 8.218736437472876, "grad_norm": 1.0869848728179932, "learning_rate": 1.5244774781547055e-05, "loss": 1.4054, "step": 33140 }, { "epoch": 8.221216442432885, "grad_norm": 1.1105198860168457, "learning_rate": 1.5203449303244099e-05, "loss": 1.4238, "step": 33150 }, { "epoch": 8.223696447392895, "grad_norm": 1.002714991569519, "learning_rate": 1.5162175304742632e-05, "loss": 1.4326, "step": 33160 }, { "epoch": 8.226176452352904, "grad_norm": 1.0198575258255005, "learning_rate": 1.512095281109992e-05, "loss": 1.4443, "step": 33170 }, { "epoch": 8.228656457312916, "grad_norm": 0.9816635251045227, "learning_rate": 1.5079781847342123e-05, "loss": 1.4123, "step": 33180 }, { "epoch": 8.231136462272925, "grad_norm": 1.0508211851119995, "learning_rate": 1.5038662438464001e-05, "loss": 1.4052, "step": 33190 }, { "epoch": 8.233616467232935, "grad_norm": 1.0052751302719116, "learning_rate": 1.4997594609429088e-05, "loss": 1.3232, "step": 33200 }, { "epoch": 8.236096472192944, "grad_norm": 1.048970341682434, "learning_rate": 1.4956578385169528e-05, "loss": 1.4079, "step": 33210 }, { "epoch": 8.238576477152954, "grad_norm": 1.1010568141937256, "learning_rate": 1.4915613790586225e-05, "loss": 1.3763, "step": 33220 }, { "epoch": 8.241056482112965, "grad_norm": 1.0694609880447388, "learning_rate": 1.4874700850548695e-05, "loss": 1.3859, "step": 33230 }, { "epoch": 8.243536487072975, "grad_norm": 1.1031140089035034, "learning_rate": 1.4833839589895071e-05, "loss": 1.358, "step": 33240 }, { "epoch": 8.246016492032984, "grad_norm": 1.1470513343811035, "learning_rate": 1.4793030033432142e-05, "loss": 1.4303, "step": 33250 }, { "epoch": 8.248496496992994, "grad_norm": 1.1016775369644165, "learning_rate": 1.4752272205935313e-05, "loss": 1.491, "step": 33260 }, { "epoch": 8.250976501953003, "grad_norm": 1.0250742435455322, "learning_rate": 1.4711566132148536e-05, "loss": 1.3813, "step": 33270 }, { "epoch": 8.253456506913015, "grad_norm": 1.0153090953826904, "learning_rate": 1.467091183678444e-05, "loss": 1.4163, "step": 33280 }, { "epoch": 8.255936511873024, "grad_norm": 1.034895420074463, "learning_rate": 1.4630309344524118e-05, "loss": 1.4455, "step": 33290 }, { "epoch": 8.258416516833034, "grad_norm": 0.9518606662750244, "learning_rate": 1.4589758680017263e-05, "loss": 1.4113, "step": 33300 }, { "epoch": 8.260896521793043, "grad_norm": 1.0583425760269165, "learning_rate": 1.4549259867882092e-05, "loss": 1.4058, "step": 33310 }, { "epoch": 8.263376526753053, "grad_norm": 1.0160610675811768, "learning_rate": 1.4508812932705363e-05, "loss": 1.3968, "step": 33320 }, { "epoch": 8.265856531713064, "grad_norm": 1.0411465167999268, "learning_rate": 1.4468417899042275e-05, "loss": 1.4378, "step": 33330 }, { "epoch": 8.268336536673074, "grad_norm": 1.0812145471572876, "learning_rate": 1.4428074791416624e-05, "loss": 1.3769, "step": 33340 }, { "epoch": 8.270816541633083, "grad_norm": 1.0007779598236084, "learning_rate": 1.4387783634320595e-05, "loss": 1.3606, "step": 33350 }, { "epoch": 8.273296546593093, "grad_norm": 1.0087769031524658, "learning_rate": 1.4347544452214868e-05, "loss": 1.3872, "step": 33360 }, { "epoch": 8.275776551553102, "grad_norm": 1.1092948913574219, "learning_rate": 1.4307357269528542e-05, "loss": 1.4013, "step": 33370 }, { "epoch": 8.278256556513114, "grad_norm": 1.0083212852478027, "learning_rate": 1.4267222110659218e-05, "loss": 1.4702, "step": 33380 }, { "epoch": 8.280736561473123, "grad_norm": 1.1285642385482788, "learning_rate": 1.42271389999728e-05, "loss": 1.4447, "step": 33390 }, { "epoch": 8.283216566433133, "grad_norm": 1.034905195236206, "learning_rate": 1.4187107961803703e-05, "loss": 1.4137, "step": 33400 }, { "epoch": 8.285696571393142, "grad_norm": 1.0655313730239868, "learning_rate": 1.4147129020454663e-05, "loss": 1.396, "step": 33410 }, { "epoch": 8.288176576353152, "grad_norm": 1.0830758810043335, "learning_rate": 1.4107202200196801e-05, "loss": 1.4299, "step": 33420 }, { "epoch": 8.290656581313163, "grad_norm": 1.1033685207366943, "learning_rate": 1.4067327525269636e-05, "loss": 1.3837, "step": 33430 }, { "epoch": 8.293136586273173, "grad_norm": 1.054334282875061, "learning_rate": 1.402750501988097e-05, "loss": 1.4024, "step": 33440 }, { "epoch": 8.295616591233182, "grad_norm": 1.046523094177246, "learning_rate": 1.398773470820698e-05, "loss": 1.3954, "step": 33450 }, { "epoch": 8.298096596193192, "grad_norm": 1.0344880819320679, "learning_rate": 1.3948016614392112e-05, "loss": 1.4287, "step": 33460 }, { "epoch": 8.300576601153203, "grad_norm": 1.0231508016586304, "learning_rate": 1.3908350762549138e-05, "loss": 1.381, "step": 33470 }, { "epoch": 8.303056606113213, "grad_norm": 1.1426787376403809, "learning_rate": 1.3868737176759106e-05, "loss": 1.3853, "step": 33480 }, { "epoch": 8.305536611073222, "grad_norm": 1.0430539846420288, "learning_rate": 1.3829175881071354e-05, "loss": 1.4398, "step": 33490 }, { "epoch": 8.308016616033232, "grad_norm": 1.0005583763122559, "learning_rate": 1.3789666899503462e-05, "loss": 1.4349, "step": 33500 }, { "epoch": 8.310496620993241, "grad_norm": 1.0415821075439453, "learning_rate": 1.3750210256041241e-05, "loss": 1.3943, "step": 33510 }, { "epoch": 8.312976625953253, "grad_norm": 1.0108755826950073, "learning_rate": 1.3710805974638696e-05, "loss": 1.3861, "step": 33520 }, { "epoch": 8.315456630913262, "grad_norm": 1.02773916721344, "learning_rate": 1.367145407921817e-05, "loss": 1.4026, "step": 33530 }, { "epoch": 8.317936635873272, "grad_norm": 1.1380155086517334, "learning_rate": 1.363215459367001e-05, "loss": 1.3809, "step": 33540 }, { "epoch": 8.320416640833281, "grad_norm": 1.0224312543869019, "learning_rate": 1.3592907541852906e-05, "loss": 1.4431, "step": 33550 }, { "epoch": 8.322896645793291, "grad_norm": 1.0225660800933838, "learning_rate": 1.3553712947593656e-05, "loss": 1.4379, "step": 33560 }, { "epoch": 8.325376650753302, "grad_norm": 1.1233614683151245, "learning_rate": 1.3514570834687202e-05, "loss": 1.4483, "step": 33570 }, { "epoch": 8.327856655713312, "grad_norm": 1.0314141511917114, "learning_rate": 1.3475481226896624e-05, "loss": 1.3876, "step": 33580 }, { "epoch": 8.330336660673321, "grad_norm": 1.0322446823120117, "learning_rate": 1.3436444147953187e-05, "loss": 1.3916, "step": 33590 }, { "epoch": 8.332816665633331, "grad_norm": 1.1228183507919312, "learning_rate": 1.339745962155613e-05, "loss": 1.4225, "step": 33600 }, { "epoch": 8.33529667059334, "grad_norm": 0.96470046043396, "learning_rate": 1.3358527671372956e-05, "loss": 1.3723, "step": 33610 }, { "epoch": 8.337776675553352, "grad_norm": 1.0331604480743408, "learning_rate": 1.3319648321039136e-05, "loss": 1.3941, "step": 33620 }, { "epoch": 8.340256680513361, "grad_norm": 1.0940603017807007, "learning_rate": 1.3280821594158243e-05, "loss": 1.4059, "step": 33630 }, { "epoch": 8.34273668547337, "grad_norm": 0.9891999959945679, "learning_rate": 1.3242047514301858e-05, "loss": 1.3959, "step": 33640 }, { "epoch": 8.34521669043338, "grad_norm": 1.0726075172424316, "learning_rate": 1.320332610500974e-05, "loss": 1.4053, "step": 33650 }, { "epoch": 8.34769669539339, "grad_norm": 1.1014840602874756, "learning_rate": 1.3164657389789458e-05, "loss": 1.3337, "step": 33660 }, { "epoch": 8.350176700353401, "grad_norm": 1.047423005104065, "learning_rate": 1.3126041392116772e-05, "loss": 1.3769, "step": 33670 }, { "epoch": 8.35265670531341, "grad_norm": 1.0521049499511719, "learning_rate": 1.308747813543536e-05, "loss": 1.4227, "step": 33680 }, { "epoch": 8.35513671027342, "grad_norm": 0.9757634997367859, "learning_rate": 1.3048967643156884e-05, "loss": 1.3741, "step": 33690 }, { "epoch": 8.35761671523343, "grad_norm": 1.0512996912002563, "learning_rate": 1.3010509938660965e-05, "loss": 1.413, "step": 33700 }, { "epoch": 8.36009672019344, "grad_norm": 1.1035054922103882, "learning_rate": 1.2972105045295247e-05, "loss": 1.4607, "step": 33710 }, { "epoch": 8.36257672515345, "grad_norm": 1.0338871479034424, "learning_rate": 1.293375298637518e-05, "loss": 1.4175, "step": 33720 }, { "epoch": 8.36505673011346, "grad_norm": 1.0466989278793335, "learning_rate": 1.2895453785184275e-05, "loss": 1.3691, "step": 33730 }, { "epoch": 8.36753673507347, "grad_norm": 1.0674350261688232, "learning_rate": 1.2857207464973875e-05, "loss": 1.4637, "step": 33740 }, { "epoch": 8.37001674003348, "grad_norm": 1.09450101852417, "learning_rate": 1.281901404896323e-05, "loss": 1.3582, "step": 33750 }, { "epoch": 8.37249674499349, "grad_norm": 1.075958251953125, "learning_rate": 1.2780873560339468e-05, "loss": 1.4301, "step": 33760 }, { "epoch": 8.3749767499535, "grad_norm": 1.1581681966781616, "learning_rate": 1.2742786022257659e-05, "loss": 1.4585, "step": 33770 }, { "epoch": 8.37745675491351, "grad_norm": 1.0443540811538696, "learning_rate": 1.270475145784057e-05, "loss": 1.411, "step": 33780 }, { "epoch": 8.37993675987352, "grad_norm": 1.0078887939453125, "learning_rate": 1.2666769890178975e-05, "loss": 1.3823, "step": 33790 }, { "epoch": 8.382416764833529, "grad_norm": 1.0253844261169434, "learning_rate": 1.2628841342331388e-05, "loss": 1.4208, "step": 33800 }, { "epoch": 8.38489676979354, "grad_norm": 0.9839961528778076, "learning_rate": 1.2590965837324131e-05, "loss": 1.4099, "step": 33810 }, { "epoch": 8.38737677475355, "grad_norm": 1.0767897367477417, "learning_rate": 1.2553143398151324e-05, "loss": 1.4056, "step": 33820 }, { "epoch": 8.38985677971356, "grad_norm": 1.0594230890274048, "learning_rate": 1.2515374047774941e-05, "loss": 1.4008, "step": 33830 }, { "epoch": 8.392336784673569, "grad_norm": 1.0357154607772827, "learning_rate": 1.2477657809124631e-05, "loss": 1.3909, "step": 33840 }, { "epoch": 8.394816789633579, "grad_norm": 1.07870614528656, "learning_rate": 1.2439994705097869e-05, "loss": 1.439, "step": 33850 }, { "epoch": 8.39729679459359, "grad_norm": 1.042245626449585, "learning_rate": 1.2402384758559815e-05, "loss": 1.4808, "step": 33860 }, { "epoch": 8.3997767995536, "grad_norm": 1.1021331548690796, "learning_rate": 1.2364827992343397e-05, "loss": 1.401, "step": 33870 }, { "epoch": 8.402256804513609, "grad_norm": 1.0259779691696167, "learning_rate": 1.2327324429249232e-05, "loss": 1.4524, "step": 33880 }, { "epoch": 8.404736809473619, "grad_norm": 1.0595521926879883, "learning_rate": 1.2289874092045684e-05, "loss": 1.475, "step": 33890 }, { "epoch": 8.407216814433628, "grad_norm": 1.1025539636611938, "learning_rate": 1.2252477003468743e-05, "loss": 1.5016, "step": 33900 }, { "epoch": 8.40969681939364, "grad_norm": 1.0166747570037842, "learning_rate": 1.2215133186222128e-05, "loss": 1.3846, "step": 33910 }, { "epoch": 8.412176824353649, "grad_norm": 0.9710475206375122, "learning_rate": 1.2177842662977135e-05, "loss": 1.3827, "step": 33920 }, { "epoch": 8.414656829313659, "grad_norm": 1.0323448181152344, "learning_rate": 1.2140605456372855e-05, "loss": 1.4043, "step": 33930 }, { "epoch": 8.417136834273668, "grad_norm": 1.0237551927566528, "learning_rate": 1.2103421589015806e-05, "loss": 1.3894, "step": 33940 }, { "epoch": 8.419616839233678, "grad_norm": 1.030096173286438, "learning_rate": 1.2066291083480296e-05, "loss": 1.3108, "step": 33950 }, { "epoch": 8.422096844193689, "grad_norm": 1.0543568134307861, "learning_rate": 1.2029213962308172e-05, "loss": 1.385, "step": 33960 }, { "epoch": 8.424576849153699, "grad_norm": 0.9940221905708313, "learning_rate": 1.1992190248008861e-05, "loss": 1.3932, "step": 33970 }, { "epoch": 8.427056854113708, "grad_norm": 1.0462923049926758, "learning_rate": 1.195521996305936e-05, "loss": 1.3871, "step": 33980 }, { "epoch": 8.429536859073718, "grad_norm": 1.0675877332687378, "learning_rate": 1.1918303129904317e-05, "loss": 1.3765, "step": 33990 }, { "epoch": 8.432016864033727, "grad_norm": 1.0424073934555054, "learning_rate": 1.188143977095576e-05, "loss": 1.4018, "step": 34000 }, { "epoch": 8.434496868993739, "grad_norm": 1.0719586610794067, "learning_rate": 1.1844629908593441e-05, "loss": 1.406, "step": 34010 }, { "epoch": 8.436976873953748, "grad_norm": 1.080687165260315, "learning_rate": 1.1807873565164506e-05, "loss": 1.446, "step": 34020 }, { "epoch": 8.439456878913758, "grad_norm": 1.0027790069580078, "learning_rate": 1.1771170762983641e-05, "loss": 1.4801, "step": 34030 }, { "epoch": 8.441936883873767, "grad_norm": 1.3560655117034912, "learning_rate": 1.1734521524333086e-05, "loss": 1.4105, "step": 34040 }, { "epoch": 8.444416888833779, "grad_norm": 1.0444145202636719, "learning_rate": 1.1697925871462467e-05, "loss": 1.4516, "step": 34050 }, { "epoch": 8.446896893793788, "grad_norm": 1.1016205549240112, "learning_rate": 1.1661383826588957e-05, "loss": 1.4534, "step": 34060 }, { "epoch": 8.449376898753798, "grad_norm": 1.0291553735733032, "learning_rate": 1.1624895411897107e-05, "loss": 1.3971, "step": 34070 }, { "epoch": 8.451856903713807, "grad_norm": 1.014909029006958, "learning_rate": 1.1588460649539035e-05, "loss": 1.4349, "step": 34080 }, { "epoch": 8.454336908673817, "grad_norm": 1.1258878707885742, "learning_rate": 1.155207956163411e-05, "loss": 1.4256, "step": 34090 }, { "epoch": 8.456816913633828, "grad_norm": 1.0256129503250122, "learning_rate": 1.1515752170269278e-05, "loss": 1.3964, "step": 34100 }, { "epoch": 8.459296918593838, "grad_norm": 0.9788112044334412, "learning_rate": 1.1479478497498797e-05, "loss": 1.4095, "step": 34110 }, { "epoch": 8.461776923553847, "grad_norm": 1.0624443292617798, "learning_rate": 1.1443258565344329e-05, "loss": 1.4325, "step": 34120 }, { "epoch": 8.464256928513857, "grad_norm": 1.0203397274017334, "learning_rate": 1.1407092395794906e-05, "loss": 1.4189, "step": 34130 }, { "epoch": 8.466736933473866, "grad_norm": 1.0989471673965454, "learning_rate": 1.1370980010806997e-05, "loss": 1.418, "step": 34140 }, { "epoch": 8.469216938433878, "grad_norm": 1.0702160596847534, "learning_rate": 1.1334921432304257e-05, "loss": 1.3736, "step": 34150 }, { "epoch": 8.471696943393887, "grad_norm": 1.0528777837753296, "learning_rate": 1.129891668217783e-05, "loss": 1.3751, "step": 34160 }, { "epoch": 8.474176948353897, "grad_norm": 0.9908372163772583, "learning_rate": 1.1262965782286116e-05, "loss": 1.4225, "step": 34170 }, { "epoch": 8.476656953313906, "grad_norm": 1.0847793817520142, "learning_rate": 1.1227068754454816e-05, "loss": 1.3987, "step": 34180 }, { "epoch": 8.479136958273916, "grad_norm": 1.0748522281646729, "learning_rate": 1.119122562047692e-05, "loss": 1.392, "step": 34190 }, { "epoch": 8.481616963233927, "grad_norm": 1.1547174453735352, "learning_rate": 1.1155436402112785e-05, "loss": 1.4226, "step": 34200 }, { "epoch": 8.484096968193937, "grad_norm": 0.9955134391784668, "learning_rate": 1.1119701121089876e-05, "loss": 1.4093, "step": 34210 }, { "epoch": 8.486576973153946, "grad_norm": 0.9794337153434753, "learning_rate": 1.1084019799103074e-05, "loss": 1.3061, "step": 34220 }, { "epoch": 8.489056978113956, "grad_norm": 1.0419962406158447, "learning_rate": 1.1048392457814405e-05, "loss": 1.4271, "step": 34230 }, { "epoch": 8.491536983073965, "grad_norm": 1.068056344985962, "learning_rate": 1.1012819118853147e-05, "loss": 1.4002, "step": 34240 }, { "epoch": 8.494016988033977, "grad_norm": 1.0033705234527588, "learning_rate": 1.0977299803815788e-05, "loss": 1.3555, "step": 34250 }, { "epoch": 8.496496992993986, "grad_norm": 1.0239999294281006, "learning_rate": 1.0941834534266083e-05, "loss": 1.4237, "step": 34260 }, { "epoch": 8.498976997953996, "grad_norm": 1.0543749332427979, "learning_rate": 1.0906423331734839e-05, "loss": 1.4626, "step": 34270 }, { "epoch": 8.501457002914005, "grad_norm": 0.9943702816963196, "learning_rate": 1.0871066217720172e-05, "loss": 1.388, "step": 34280 }, { "epoch": 8.503937007874015, "grad_norm": 1.0968214273452759, "learning_rate": 1.08357632136873e-05, "loss": 1.4635, "step": 34290 }, { "epoch": 8.506417012834026, "grad_norm": 1.0669578313827515, "learning_rate": 1.080051434106859e-05, "loss": 1.3861, "step": 34300 }, { "epoch": 8.508897017794036, "grad_norm": 1.0519565343856812, "learning_rate": 1.0765319621263548e-05, "loss": 1.4476, "step": 34310 }, { "epoch": 8.511377022754045, "grad_norm": 1.0022358894348145, "learning_rate": 1.0730179075638868e-05, "loss": 1.4748, "step": 34320 }, { "epoch": 8.513857027714055, "grad_norm": 1.0759047269821167, "learning_rate": 1.069509272552821e-05, "loss": 1.4024, "step": 34330 }, { "epoch": 8.516337032674066, "grad_norm": 1.0005208253860474, "learning_rate": 1.0660060592232491e-05, "loss": 1.4147, "step": 34340 }, { "epoch": 8.518817037634076, "grad_norm": 1.0109820365905762, "learning_rate": 1.0625082697019628e-05, "loss": 1.3991, "step": 34350 }, { "epoch": 8.521297042594085, "grad_norm": 1.0770360231399536, "learning_rate": 1.0590159061124605e-05, "loss": 1.4605, "step": 34360 }, { "epoch": 8.523777047554095, "grad_norm": 1.0791715383529663, "learning_rate": 1.0555289705749483e-05, "loss": 1.4202, "step": 34370 }, { "epoch": 8.526257052514104, "grad_norm": 1.031917929649353, "learning_rate": 1.0520474652063394e-05, "loss": 1.4212, "step": 34380 }, { "epoch": 8.528737057474116, "grad_norm": 1.0541309118270874, "learning_rate": 1.0485713921202489e-05, "loss": 1.4647, "step": 34390 }, { "epoch": 8.531217062434125, "grad_norm": 1.1268516778945923, "learning_rate": 1.0451007534269907e-05, "loss": 1.3857, "step": 34400 }, { "epoch": 8.533697067394135, "grad_norm": 1.0549654960632324, "learning_rate": 1.041635551233583e-05, "loss": 1.4261, "step": 34410 }, { "epoch": 8.536177072354144, "grad_norm": 1.158935785293579, "learning_rate": 1.0381757876437425e-05, "loss": 1.4153, "step": 34420 }, { "epoch": 8.538657077314154, "grad_norm": 1.0003379583358765, "learning_rate": 1.0347214647578818e-05, "loss": 1.4511, "step": 34430 }, { "epoch": 8.541137082274165, "grad_norm": 1.0294896364212036, "learning_rate": 1.0312725846731175e-05, "loss": 1.3688, "step": 34440 }, { "epoch": 8.543617087234175, "grad_norm": 1.0496424436569214, "learning_rate": 1.0278291494832538e-05, "loss": 1.3769, "step": 34450 }, { "epoch": 8.546097092194184, "grad_norm": 1.0207414627075195, "learning_rate": 1.0243911612787948e-05, "loss": 1.4401, "step": 34460 }, { "epoch": 8.548577097154194, "grad_norm": 1.054727554321289, "learning_rate": 1.0209586221469335e-05, "loss": 1.4387, "step": 34470 }, { "epoch": 8.551057102114203, "grad_norm": 1.0674917697906494, "learning_rate": 1.0175315341715597e-05, "loss": 1.4405, "step": 34480 }, { "epoch": 8.553537107074215, "grad_norm": 1.025215744972229, "learning_rate": 1.0141098994332454e-05, "loss": 1.4608, "step": 34490 }, { "epoch": 8.556017112034224, "grad_norm": 1.0221035480499268, "learning_rate": 1.0106937200092648e-05, "loss": 1.4309, "step": 34500 }, { "epoch": 8.558497116994234, "grad_norm": 1.072180151939392, "learning_rate": 1.0072829979735699e-05, "loss": 1.4098, "step": 34510 }, { "epoch": 8.560977121954243, "grad_norm": 1.0292712450027466, "learning_rate": 1.003877735396801e-05, "loss": 1.3924, "step": 34520 }, { "epoch": 8.563457126914255, "grad_norm": 1.0132172107696533, "learning_rate": 1.0004779343462867e-05, "loss": 1.4269, "step": 34530 }, { "epoch": 8.565937131874264, "grad_norm": 1.067787766456604, "learning_rate": 9.970835968860414e-06, "loss": 1.4548, "step": 34540 }, { "epoch": 8.568417136834274, "grad_norm": 1.0257725715637207, "learning_rate": 9.936947250767558e-06, "loss": 1.4267, "step": 34550 }, { "epoch": 8.570897141794283, "grad_norm": 1.1108858585357666, "learning_rate": 9.903113209758096e-06, "loss": 1.3806, "step": 34560 }, { "epoch": 8.573377146754293, "grad_norm": 1.0043811798095703, "learning_rate": 9.869333866372587e-06, "loss": 1.4635, "step": 34570 }, { "epoch": 8.575857151714303, "grad_norm": 0.9807415008544922, "learning_rate": 9.835609241118404e-06, "loss": 1.4018, "step": 34580 }, { "epoch": 8.578337156674314, "grad_norm": 1.0493428707122803, "learning_rate": 9.80193935446967e-06, "loss": 1.4173, "step": 34590 }, { "epoch": 8.580817161634323, "grad_norm": 1.2100938558578491, "learning_rate": 9.768324226867353e-06, "loss": 1.4688, "step": 34600 }, { "epoch": 8.583297166594333, "grad_norm": 1.0792169570922852, "learning_rate": 9.734763878719067e-06, "loss": 1.3982, "step": 34610 }, { "epoch": 8.585777171554343, "grad_norm": 1.0124642848968506, "learning_rate": 9.701258330399255e-06, "loss": 1.4573, "step": 34620 }, { "epoch": 8.588257176514354, "grad_norm": 1.063339114189148, "learning_rate": 9.667807602249057e-06, "loss": 1.4195, "step": 34630 }, { "epoch": 8.590737181474363, "grad_norm": 1.03120756149292, "learning_rate": 9.634411714576353e-06, "loss": 1.4046, "step": 34640 }, { "epoch": 8.593217186434373, "grad_norm": 1.0576833486557007, "learning_rate": 9.601070687655667e-06, "loss": 1.4082, "step": 34650 }, { "epoch": 8.595697191394382, "grad_norm": 1.1593866348266602, "learning_rate": 9.56778454172832e-06, "loss": 1.4311, "step": 34660 }, { "epoch": 8.598177196354392, "grad_norm": 1.015978217124939, "learning_rate": 9.534553297002225e-06, "loss": 1.4241, "step": 34670 }, { "epoch": 8.600657201314403, "grad_norm": 1.0379754304885864, "learning_rate": 9.501376973651999e-06, "loss": 1.409, "step": 34680 }, { "epoch": 8.603137206274413, "grad_norm": 1.065412998199463, "learning_rate": 9.468255591818953e-06, "loss": 1.4255, "step": 34690 }, { "epoch": 8.605617211234422, "grad_norm": 1.0218896865844727, "learning_rate": 9.435189171610948e-06, "loss": 1.3884, "step": 34700 }, { "epoch": 8.608097216194432, "grad_norm": 1.1107914447784424, "learning_rate": 9.402177733102579e-06, "loss": 1.3714, "step": 34710 }, { "epoch": 8.610577221154442, "grad_norm": 1.1440714597702026, "learning_rate": 9.369221296335006e-06, "loss": 1.4462, "step": 34720 }, { "epoch": 8.613057226114453, "grad_norm": 0.9951843023300171, "learning_rate": 9.336319881316014e-06, "loss": 1.4014, "step": 34730 }, { "epoch": 8.615537231074462, "grad_norm": 1.0567536354064941, "learning_rate": 9.303473508019944e-06, "loss": 1.4029, "step": 34740 }, { "epoch": 8.618017236034472, "grad_norm": 1.1181285381317139, "learning_rate": 9.270682196387836e-06, "loss": 1.4163, "step": 34750 }, { "epoch": 8.620497240994482, "grad_norm": 1.0565319061279297, "learning_rate": 9.237945966327133e-06, "loss": 1.3815, "step": 34760 }, { "epoch": 8.622977245954491, "grad_norm": 1.017255187034607, "learning_rate": 9.205264837711991e-06, "loss": 1.4408, "step": 34770 }, { "epoch": 8.625457250914502, "grad_norm": 1.1201671361923218, "learning_rate": 9.17263883038304e-06, "loss": 1.426, "step": 34780 }, { "epoch": 8.627937255874512, "grad_norm": 1.0846129655838013, "learning_rate": 9.140067964147447e-06, "loss": 1.4052, "step": 34790 }, { "epoch": 8.630417260834522, "grad_norm": 1.1419168710708618, "learning_rate": 9.107552258778907e-06, "loss": 1.4286, "step": 34800 }, { "epoch": 8.632897265794531, "grad_norm": 1.0810461044311523, "learning_rate": 9.075091734017682e-06, "loss": 1.4161, "step": 34810 }, { "epoch": 8.635377270754542, "grad_norm": 1.0595791339874268, "learning_rate": 9.04268640957041e-06, "loss": 1.3417, "step": 34820 }, { "epoch": 8.637857275714552, "grad_norm": 1.0288753509521484, "learning_rate": 9.010336305110345e-06, "loss": 1.3905, "step": 34830 }, { "epoch": 8.640337280674562, "grad_norm": 1.0574288368225098, "learning_rate": 8.978041440277163e-06, "loss": 1.4734, "step": 34840 }, { "epoch": 8.642817285634571, "grad_norm": 1.0483571290969849, "learning_rate": 8.945801834676992e-06, "loss": 1.3855, "step": 34850 }, { "epoch": 8.64529729059458, "grad_norm": 1.087746262550354, "learning_rate": 8.91361750788241e-06, "loss": 1.3799, "step": 34860 }, { "epoch": 8.64777729555459, "grad_norm": 1.0188192129135132, "learning_rate": 8.88148847943251e-06, "loss": 1.3831, "step": 34870 }, { "epoch": 8.650257300514602, "grad_norm": 1.0634173154830933, "learning_rate": 8.849414768832687e-06, "loss": 1.4252, "step": 34880 }, { "epoch": 8.652737305474611, "grad_norm": 0.9798762798309326, "learning_rate": 8.817396395554866e-06, "loss": 1.3535, "step": 34890 }, { "epoch": 8.65521731043462, "grad_norm": 1.0635244846343994, "learning_rate": 8.78543337903731e-06, "loss": 1.3672, "step": 34900 }, { "epoch": 8.65769731539463, "grad_norm": 1.0241966247558594, "learning_rate": 8.75352573868471e-06, "loss": 1.4077, "step": 34910 }, { "epoch": 8.660177320354641, "grad_norm": 1.1106797456741333, "learning_rate": 8.72167349386811e-06, "loss": 1.4681, "step": 34920 }, { "epoch": 8.662657325314651, "grad_norm": 1.0830975770950317, "learning_rate": 8.689876663924957e-06, "loss": 1.4076, "step": 34930 }, { "epoch": 8.66513733027466, "grad_norm": 1.0571904182434082, "learning_rate": 8.658135268159018e-06, "loss": 1.4542, "step": 34940 }, { "epoch": 8.66761733523467, "grad_norm": 1.0933750867843628, "learning_rate": 8.626449325840447e-06, "loss": 1.4156, "step": 34950 }, { "epoch": 8.67009734019468, "grad_norm": 1.0800130367279053, "learning_rate": 8.5948188562057e-06, "loss": 1.3905, "step": 34960 }, { "epoch": 8.672577345154691, "grad_norm": 0.9996431469917297, "learning_rate": 8.56324387845756e-06, "loss": 1.5192, "step": 34970 }, { "epoch": 8.6750573501147, "grad_norm": 1.0539569854736328, "learning_rate": 8.531724411765119e-06, "loss": 1.3983, "step": 34980 }, { "epoch": 8.67753735507471, "grad_norm": 1.0754151344299316, "learning_rate": 8.5002604752638e-06, "loss": 1.4119, "step": 34990 }, { "epoch": 8.68001736003472, "grad_norm": 1.1121124029159546, "learning_rate": 8.46885208805529e-06, "loss": 1.4655, "step": 35000 }, { "epoch": 8.68249736499473, "grad_norm": 1.0148959159851074, "learning_rate": 8.437499269207539e-06, "loss": 1.3647, "step": 35010 }, { "epoch": 8.68497736995474, "grad_norm": 1.091356873512268, "learning_rate": 8.406202037754774e-06, "loss": 1.4008, "step": 35020 }, { "epoch": 8.68745737491475, "grad_norm": 1.0934149026870728, "learning_rate": 8.374960412697485e-06, "loss": 1.4645, "step": 35030 }, { "epoch": 8.68993737987476, "grad_norm": 1.1903691291809082, "learning_rate": 8.343774413002381e-06, "loss": 1.4172, "step": 35040 }, { "epoch": 8.69241738483477, "grad_norm": 1.0634151697158813, "learning_rate": 8.312644057602436e-06, "loss": 1.4175, "step": 35050 }, { "epoch": 8.694897389794779, "grad_norm": 1.0880907773971558, "learning_rate": 8.281569365396812e-06, "loss": 1.4677, "step": 35060 }, { "epoch": 8.69737739475479, "grad_norm": 1.0232330560684204, "learning_rate": 8.250550355250875e-06, "loss": 1.459, "step": 35070 }, { "epoch": 8.6998573997148, "grad_norm": 1.0494637489318848, "learning_rate": 8.219587045996203e-06, "loss": 1.4531, "step": 35080 }, { "epoch": 8.70233740467481, "grad_norm": 1.0898422002792358, "learning_rate": 8.18867945643058e-06, "loss": 1.385, "step": 35090 }, { "epoch": 8.704817409634819, "grad_norm": 1.0978202819824219, "learning_rate": 8.157827605317892e-06, "loss": 1.3983, "step": 35100 }, { "epoch": 8.70729741459483, "grad_norm": 1.0695819854736328, "learning_rate": 8.127031511388273e-06, "loss": 1.444, "step": 35110 }, { "epoch": 8.70977741955484, "grad_norm": 1.1173222064971924, "learning_rate": 8.096291193337934e-06, "loss": 1.3457, "step": 35120 }, { "epoch": 8.71225742451485, "grad_norm": 1.062896966934204, "learning_rate": 8.065606669829273e-06, "loss": 1.463, "step": 35130 }, { "epoch": 8.714737429474859, "grad_norm": 1.0228677988052368, "learning_rate": 8.034977959490775e-06, "loss": 1.4077, "step": 35140 }, { "epoch": 8.717217434434868, "grad_norm": 1.1282140016555786, "learning_rate": 8.00440508091711e-06, "loss": 1.4278, "step": 35150 }, { "epoch": 8.719697439394878, "grad_norm": 1.1063153743743896, "learning_rate": 7.973888052668943e-06, "loss": 1.3547, "step": 35160 }, { "epoch": 8.72217744435489, "grad_norm": 1.0718064308166504, "learning_rate": 7.943426893273142e-06, "loss": 1.3838, "step": 35170 }, { "epoch": 8.724657449314899, "grad_norm": 0.9802948832511902, "learning_rate": 7.913021621222604e-06, "loss": 1.4587, "step": 35180 }, { "epoch": 8.727137454274908, "grad_norm": 1.1451220512390137, "learning_rate": 7.882672254976297e-06, "loss": 1.3912, "step": 35190 }, { "epoch": 8.729617459234918, "grad_norm": 1.0279031991958618, "learning_rate": 7.852378812959227e-06, "loss": 1.4077, "step": 35200 }, { "epoch": 8.73209746419493, "grad_norm": 1.0391227006912231, "learning_rate": 7.822141313562547e-06, "loss": 1.4476, "step": 35210 }, { "epoch": 8.734577469154939, "grad_norm": 1.1570180654525757, "learning_rate": 7.791959775143298e-06, "loss": 1.3834, "step": 35220 }, { "epoch": 8.737057474114948, "grad_norm": 1.0537258386611938, "learning_rate": 7.761834216024678e-06, "loss": 1.3613, "step": 35230 }, { "epoch": 8.739537479074958, "grad_norm": 0.986975371837616, "learning_rate": 7.731764654495832e-06, "loss": 1.4096, "step": 35240 }, { "epoch": 8.742017484034967, "grad_norm": 1.1508692502975464, "learning_rate": 7.701751108811938e-06, "loss": 1.4927, "step": 35250 }, { "epoch": 8.744497488994979, "grad_norm": 1.0710281133651733, "learning_rate": 7.67179359719411e-06, "loss": 1.3809, "step": 35260 }, { "epoch": 8.746977493954988, "grad_norm": 1.0391231775283813, "learning_rate": 7.641892137829566e-06, "loss": 1.3815, "step": 35270 }, { "epoch": 8.749457498914998, "grad_norm": 1.0123393535614014, "learning_rate": 7.612046748871327e-06, "loss": 1.3923, "step": 35280 }, { "epoch": 8.751937503875007, "grad_norm": 1.0555802583694458, "learning_rate": 7.582257448438512e-06, "loss": 1.382, "step": 35290 }, { "epoch": 8.754417508835017, "grad_norm": 1.1561102867126465, "learning_rate": 7.552524254616178e-06, "loss": 1.3976, "step": 35300 }, { "epoch": 8.756897513795028, "grad_norm": 1.1613948345184326, "learning_rate": 7.522847185455195e-06, "loss": 1.4498, "step": 35310 }, { "epoch": 8.759377518755038, "grad_norm": 1.0044676065444946, "learning_rate": 7.493226258972519e-06, "loss": 1.3991, "step": 35320 }, { "epoch": 8.761857523715047, "grad_norm": 1.0604032278060913, "learning_rate": 7.4636614931509304e-06, "loss": 1.3658, "step": 35330 }, { "epoch": 8.764337528675057, "grad_norm": 1.0889891386032104, "learning_rate": 7.434152905939118e-06, "loss": 1.4178, "step": 35340 }, { "epoch": 8.766817533635066, "grad_norm": 1.1055028438568115, "learning_rate": 7.404700515251672e-06, "loss": 1.4237, "step": 35350 }, { "epoch": 8.769297538595078, "grad_norm": 1.104724645614624, "learning_rate": 7.375304338969136e-06, "loss": 1.4087, "step": 35360 }, { "epoch": 8.771777543555087, "grad_norm": 1.016101598739624, "learning_rate": 7.345964394937787e-06, "loss": 1.3964, "step": 35370 }, { "epoch": 8.774257548515097, "grad_norm": 1.0164542198181152, "learning_rate": 7.316680700969891e-06, "loss": 1.4447, "step": 35380 }, { "epoch": 8.776737553475106, "grad_norm": 0.9957998991012573, "learning_rate": 7.287453274843492e-06, "loss": 1.3885, "step": 35390 }, { "epoch": 8.779217558435118, "grad_norm": 1.13089919090271, "learning_rate": 7.258282134302519e-06, "loss": 1.4144, "step": 35400 }, { "epoch": 8.781697563395127, "grad_norm": 1.068381428718567, "learning_rate": 7.229167297056671e-06, "loss": 1.3725, "step": 35410 }, { "epoch": 8.784177568355137, "grad_norm": 1.0708982944488525, "learning_rate": 7.200108780781556e-06, "loss": 1.4271, "step": 35420 }, { "epoch": 8.786657573315146, "grad_norm": 1.0746763944625854, "learning_rate": 7.171106603118482e-06, "loss": 1.401, "step": 35430 }, { "epoch": 8.789137578275156, "grad_norm": 1.074387550354004, "learning_rate": 7.142160781674645e-06, "loss": 1.4837, "step": 35440 }, { "epoch": 8.791617583235167, "grad_norm": 1.0946547985076904, "learning_rate": 7.1132713340229755e-06, "loss": 1.3906, "step": 35450 }, { "epoch": 8.794097588195177, "grad_norm": 1.0786830186843872, "learning_rate": 7.084438277702188e-06, "loss": 1.3938, "step": 35460 }, { "epoch": 8.796577593155186, "grad_norm": 1.033219575881958, "learning_rate": 7.05566163021677e-06, "loss": 1.3912, "step": 35470 }, { "epoch": 8.799057598115196, "grad_norm": 1.1158262491226196, "learning_rate": 7.026941409036991e-06, "loss": 1.459, "step": 35480 }, { "epoch": 8.801537603075205, "grad_norm": 0.9801075458526611, "learning_rate": 6.998277631598793e-06, "loss": 1.4496, "step": 35490 }, { "epoch": 8.804017608035217, "grad_norm": 1.01754891872406, "learning_rate": 6.969670315303911e-06, "loss": 1.5087, "step": 35500 }, { "epoch": 8.806497612995226, "grad_norm": 1.0917232036590576, "learning_rate": 6.941119477519798e-06, "loss": 1.3484, "step": 35510 }, { "epoch": 8.808977617955236, "grad_norm": 0.9824590086936951, "learning_rate": 6.9126251355795864e-06, "loss": 1.4118, "step": 35520 }, { "epoch": 8.811457622915245, "grad_norm": 1.0820149183273315, "learning_rate": 6.884187306782119e-06, "loss": 1.4271, "step": 35530 }, { "epoch": 8.813937627875255, "grad_norm": 1.0591022968292236, "learning_rate": 6.855806008391974e-06, "loss": 1.3741, "step": 35540 }, { "epoch": 8.816417632835266, "grad_norm": 0.9803717732429504, "learning_rate": 6.827481257639345e-06, "loss": 1.4386, "step": 35550 }, { "epoch": 8.818897637795276, "grad_norm": 1.0050376653671265, "learning_rate": 6.7992130717201564e-06, "loss": 1.36, "step": 35560 }, { "epoch": 8.821377642755285, "grad_norm": 1.0928151607513428, "learning_rate": 6.771001467795934e-06, "loss": 1.3858, "step": 35570 }, { "epoch": 8.823857647715295, "grad_norm": 1.05537748336792, "learning_rate": 6.742846462993901e-06, "loss": 1.3562, "step": 35580 }, { "epoch": 8.826337652675305, "grad_norm": 1.044603943824768, "learning_rate": 6.714748074406874e-06, "loss": 1.4484, "step": 35590 }, { "epoch": 8.828817657635316, "grad_norm": 1.0399260520935059, "learning_rate": 6.68670631909335e-06, "loss": 1.3723, "step": 35600 }, { "epoch": 8.831297662595325, "grad_norm": 1.0280603170394897, "learning_rate": 6.658721214077412e-06, "loss": 1.4067, "step": 35610 }, { "epoch": 8.833777667555335, "grad_norm": 1.0593305826187134, "learning_rate": 6.630792776348749e-06, "loss": 1.3742, "step": 35620 }, { "epoch": 8.836257672515345, "grad_norm": 1.0912466049194336, "learning_rate": 6.6029210228626626e-06, "loss": 1.4118, "step": 35630 }, { "epoch": 8.838737677475354, "grad_norm": 1.05013108253479, "learning_rate": 6.5751059705400295e-06, "loss": 1.4158, "step": 35640 }, { "epoch": 8.841217682435365, "grad_norm": 0.9891907572746277, "learning_rate": 6.547347636267287e-06, "loss": 1.4039, "step": 35650 }, { "epoch": 8.843697687395375, "grad_norm": 1.0187528133392334, "learning_rate": 6.519646036896499e-06, "loss": 1.3678, "step": 35660 }, { "epoch": 8.846177692355385, "grad_norm": 1.0717898607254028, "learning_rate": 6.492001189245223e-06, "loss": 1.4115, "step": 35670 }, { "epoch": 8.848657697315394, "grad_norm": 1.029092788696289, "learning_rate": 6.464413110096601e-06, "loss": 1.3866, "step": 35680 }, { "epoch": 8.851137702275405, "grad_norm": 1.0228272676467896, "learning_rate": 6.436881816199258e-06, "loss": 1.373, "step": 35690 }, { "epoch": 8.853617707235415, "grad_norm": 1.0140708684921265, "learning_rate": 6.409407324267447e-06, "loss": 1.3329, "step": 35700 }, { "epoch": 8.856097712195425, "grad_norm": 0.9810483455657959, "learning_rate": 6.381989650980802e-06, "loss": 1.4641, "step": 35710 }, { "epoch": 8.858577717155434, "grad_norm": 1.127550721168518, "learning_rate": 6.354628812984576e-06, "loss": 1.4714, "step": 35720 }, { "epoch": 8.861057722115444, "grad_norm": 1.0540467500686646, "learning_rate": 6.327324826889469e-06, "loss": 1.3891, "step": 35730 }, { "epoch": 8.863537727075455, "grad_norm": 1.0507473945617676, "learning_rate": 6.3000777092716566e-06, "loss": 1.3931, "step": 35740 }, { "epoch": 8.866017732035465, "grad_norm": 1.0516712665557861, "learning_rate": 6.272887476672806e-06, "loss": 1.4759, "step": 35750 }, { "epoch": 8.868497736995474, "grad_norm": 1.0787099599838257, "learning_rate": 6.245754145600091e-06, "loss": 1.3865, "step": 35760 }, { "epoch": 8.870977741955484, "grad_norm": 1.0408920049667358, "learning_rate": 6.218677732526035e-06, "loss": 1.4059, "step": 35770 }, { "epoch": 8.873457746915493, "grad_norm": 1.0530054569244385, "learning_rate": 6.191658253888699e-06, "loss": 1.3508, "step": 35780 }, { "epoch": 8.875937751875504, "grad_norm": 1.031050443649292, "learning_rate": 6.164695726091563e-06, "loss": 1.4165, "step": 35790 }, { "epoch": 8.878417756835514, "grad_norm": 1.0282436609268188, "learning_rate": 6.137790165503499e-06, "loss": 1.4261, "step": 35800 }, { "epoch": 8.880897761795524, "grad_norm": 1.0038362741470337, "learning_rate": 6.110941588458807e-06, "loss": 1.4105, "step": 35810 }, { "epoch": 8.883377766755533, "grad_norm": 1.0419639348983765, "learning_rate": 6.084150011257239e-06, "loss": 1.3816, "step": 35820 }, { "epoch": 8.885857771715543, "grad_norm": 1.1044398546218872, "learning_rate": 6.057415450163839e-06, "loss": 1.3929, "step": 35830 }, { "epoch": 8.888337776675554, "grad_norm": 1.10807466506958, "learning_rate": 6.030737921409169e-06, "loss": 1.3409, "step": 35840 }, { "epoch": 8.890817781635564, "grad_norm": 1.1144291162490845, "learning_rate": 6.004117441189061e-06, "loss": 1.4778, "step": 35850 }, { "epoch": 8.893297786595573, "grad_norm": 0.9802186489105225, "learning_rate": 5.977554025664755e-06, "loss": 1.3595, "step": 35860 }, { "epoch": 8.895777791555583, "grad_norm": 1.0574133396148682, "learning_rate": 5.951047690962841e-06, "loss": 1.4007, "step": 35870 }, { "epoch": 8.898257796515592, "grad_norm": 0.9653461575508118, "learning_rate": 5.9245984531752784e-06, "loss": 1.4166, "step": 35880 }, { "epoch": 8.900737801475604, "grad_norm": 1.0629173517227173, "learning_rate": 5.898206328359302e-06, "loss": 1.3397, "step": 35890 }, { "epoch": 8.903217806435613, "grad_norm": 1.0503249168395996, "learning_rate": 5.8718713325375485e-06, "loss": 1.4268, "step": 35900 }, { "epoch": 8.905697811395623, "grad_norm": 1.0458600521087646, "learning_rate": 5.8455934816979305e-06, "loss": 1.4417, "step": 35910 }, { "epoch": 8.908177816355632, "grad_norm": 1.0539132356643677, "learning_rate": 5.8193727917936536e-06, "loss": 1.3868, "step": 35920 }, { "epoch": 8.910657821315642, "grad_norm": 1.0538716316223145, "learning_rate": 5.793209278743261e-06, "loss": 1.4121, "step": 35930 }, { "epoch": 8.913137826275653, "grad_norm": 1.0634170770645142, "learning_rate": 5.76710295843057e-06, "loss": 1.3727, "step": 35940 }, { "epoch": 8.915617831235663, "grad_norm": 1.0303020477294922, "learning_rate": 5.741053846704658e-06, "loss": 1.4504, "step": 35950 }, { "epoch": 8.918097836195672, "grad_norm": 1.0691232681274414, "learning_rate": 5.715061959379875e-06, "loss": 1.3289, "step": 35960 }, { "epoch": 8.920577841155682, "grad_norm": 1.0344038009643555, "learning_rate": 5.689127312235876e-06, "loss": 1.4227, "step": 35970 }, { "epoch": 8.923057846115693, "grad_norm": 1.0390594005584717, "learning_rate": 5.663249921017477e-06, "loss": 1.3365, "step": 35980 }, { "epoch": 8.925537851075703, "grad_norm": 1.0197139978408813, "learning_rate": 5.637429801434824e-06, "loss": 1.3722, "step": 35990 }, { "epoch": 8.928017856035712, "grad_norm": 1.0387476682662964, "learning_rate": 5.611666969163243e-06, "loss": 1.3939, "step": 36000 }, { "epoch": 8.930497860995722, "grad_norm": 1.1280559301376343, "learning_rate": 5.585961439843302e-06, "loss": 1.3973, "step": 36010 }, { "epoch": 8.932977865955731, "grad_norm": 1.0584766864776611, "learning_rate": 5.560313229080749e-06, "loss": 1.4572, "step": 36020 }, { "epoch": 8.935457870915743, "grad_norm": 1.0336962938308716, "learning_rate": 5.534722352446598e-06, "loss": 1.3458, "step": 36030 }, { "epoch": 8.937937875875752, "grad_norm": 1.0421390533447266, "learning_rate": 5.509188825476963e-06, "loss": 1.366, "step": 36040 }, { "epoch": 8.940417880835762, "grad_norm": 1.0823450088500977, "learning_rate": 5.483712663673224e-06, "loss": 1.4318, "step": 36050 }, { "epoch": 8.942897885795771, "grad_norm": 0.9930027723312378, "learning_rate": 5.458293882501908e-06, "loss": 1.445, "step": 36060 }, { "epoch": 8.94537789075578, "grad_norm": 0.9809021353721619, "learning_rate": 5.432932497394694e-06, "loss": 1.3888, "step": 36070 }, { "epoch": 8.947857895715792, "grad_norm": 1.074016809463501, "learning_rate": 5.4076285237483984e-06, "loss": 1.4226, "step": 36080 }, { "epoch": 8.950337900675802, "grad_norm": 1.1049420833587646, "learning_rate": 5.382381976925044e-06, "loss": 1.3478, "step": 36090 }, { "epoch": 8.952817905635811, "grad_norm": 1.0886642932891846, "learning_rate": 5.357192872251748e-06, "loss": 1.3751, "step": 36100 }, { "epoch": 8.95529791059582, "grad_norm": 0.975545346736908, "learning_rate": 5.332061225020757e-06, "loss": 1.5169, "step": 36110 }, { "epoch": 8.95777791555583, "grad_norm": 1.1444214582443237, "learning_rate": 5.306987050489442e-06, "loss": 1.3858, "step": 36120 }, { "epoch": 8.960257920515842, "grad_norm": 1.0759073495864868, "learning_rate": 5.281970363880262e-06, "loss": 1.3536, "step": 36130 }, { "epoch": 8.962737925475851, "grad_norm": 1.1276054382324219, "learning_rate": 5.2570111803808e-06, "loss": 1.4528, "step": 36140 }, { "epoch": 8.96521793043586, "grad_norm": 1.0630743503570557, "learning_rate": 5.232109515143746e-06, "loss": 1.4241, "step": 36150 }, { "epoch": 8.96769793539587, "grad_norm": 1.0743145942687988, "learning_rate": 5.20726538328683e-06, "loss": 1.4425, "step": 36160 }, { "epoch": 8.97017794035588, "grad_norm": 1.0920917987823486, "learning_rate": 5.182478799892865e-06, "loss": 1.4461, "step": 36170 }, { "epoch": 8.972657945315891, "grad_norm": 1.0802286863327026, "learning_rate": 5.157749780009735e-06, "loss": 1.4009, "step": 36180 }, { "epoch": 8.9751379502759, "grad_norm": 1.0261247158050537, "learning_rate": 5.1330783386503765e-06, "loss": 1.4202, "step": 36190 }, { "epoch": 8.97761795523591, "grad_norm": 1.0045171976089478, "learning_rate": 5.1084644907927526e-06, "loss": 1.4174, "step": 36200 }, { "epoch": 8.98009796019592, "grad_norm": 1.1183091402053833, "learning_rate": 5.08390825137991e-06, "loss": 1.4168, "step": 36210 }, { "epoch": 8.98257796515593, "grad_norm": 1.0777711868286133, "learning_rate": 5.059409635319867e-06, "loss": 1.3862, "step": 36220 }, { "epoch": 8.98505797011594, "grad_norm": 1.0636361837387085, "learning_rate": 5.034968657485673e-06, "loss": 1.4373, "step": 36230 }, { "epoch": 8.98753797507595, "grad_norm": 1.0551748275756836, "learning_rate": 5.0105853327154e-06, "loss": 1.3829, "step": 36240 }, { "epoch": 8.99001798003596, "grad_norm": 1.0396533012390137, "learning_rate": 4.986259675812133e-06, "loss": 1.4231, "step": 36250 }, { "epoch": 8.99249798499597, "grad_norm": 1.0980195999145508, "learning_rate": 4.961991701543889e-06, "loss": 1.4699, "step": 36260 }, { "epoch": 8.99497798995598, "grad_norm": 1.020045280456543, "learning_rate": 4.937781424643728e-06, "loss": 1.3617, "step": 36270 }, { "epoch": 8.99745799491599, "grad_norm": 1.0332356691360474, "learning_rate": 4.913628859809638e-06, "loss": 1.3875, "step": 36280 }, { "epoch": 8.999937999876, "grad_norm": 1.0232480764389038, "learning_rate": 4.889534021704611e-06, "loss": 1.401, "step": 36290 }, { "epoch": 9.00241800483601, "grad_norm": 1.0589970350265503, "learning_rate": 4.865496924956536e-06, "loss": 1.4866, "step": 36300 }, { "epoch": 9.004898009796019, "grad_norm": 1.0917316675186157, "learning_rate": 4.84151758415834e-06, "loss": 1.4285, "step": 36310 }, { "epoch": 9.00737801475603, "grad_norm": 0.9955704212188721, "learning_rate": 4.817596013867764e-06, "loss": 1.3862, "step": 36320 }, { "epoch": 9.00985801971604, "grad_norm": 1.0633028745651245, "learning_rate": 4.7937322286075725e-06, "loss": 1.4126, "step": 36330 }, { "epoch": 9.01233802467605, "grad_norm": 1.0090926885604858, "learning_rate": 4.7699262428654305e-06, "loss": 1.411, "step": 36340 }, { "epoch": 9.014818029636059, "grad_norm": 1.0382198095321655, "learning_rate": 4.746178071093876e-06, "loss": 1.4622, "step": 36350 }, { "epoch": 9.017298034596068, "grad_norm": 1.0646711587905884, "learning_rate": 4.722487727710368e-06, "loss": 1.3662, "step": 36360 }, { "epoch": 9.01977803955608, "grad_norm": 1.0893924236297607, "learning_rate": 4.698855227097298e-06, "loss": 1.4297, "step": 36370 }, { "epoch": 9.02225804451609, "grad_norm": 1.06475830078125, "learning_rate": 4.675280583601871e-06, "loss": 1.4096, "step": 36380 }, { "epoch": 9.024738049476099, "grad_norm": 0.9807024598121643, "learning_rate": 4.6517638115362136e-06, "loss": 1.3532, "step": 36390 }, { "epoch": 9.027218054436108, "grad_norm": 1.0107083320617676, "learning_rate": 4.628304925177318e-06, "loss": 1.4175, "step": 36400 }, { "epoch": 9.029698059396118, "grad_norm": 1.0552297830581665, "learning_rate": 4.604903938767002e-06, "loss": 1.4115, "step": 36410 }, { "epoch": 9.03217806435613, "grad_norm": 1.0633686780929565, "learning_rate": 4.581560866511958e-06, "loss": 1.3529, "step": 36420 }, { "epoch": 9.034658069316139, "grad_norm": 1.0776898860931396, "learning_rate": 4.558275722583749e-06, "loss": 1.418, "step": 36430 }, { "epoch": 9.037138074276148, "grad_norm": 1.032347321510315, "learning_rate": 4.5350485211186675e-06, "loss": 1.4072, "step": 36440 }, { "epoch": 9.039618079236158, "grad_norm": 1.0418621301651, "learning_rate": 4.5118792762179676e-06, "loss": 1.3685, "step": 36450 }, { "epoch": 9.042098084196168, "grad_norm": 0.9533702731132507, "learning_rate": 4.4887680019475965e-06, "loss": 1.4239, "step": 36460 }, { "epoch": 9.044578089156179, "grad_norm": 1.0281661748886108, "learning_rate": 4.465714712338398e-06, "loss": 1.358, "step": 36470 }, { "epoch": 9.047058094116188, "grad_norm": 1.0689723491668701, "learning_rate": 4.442719421385922e-06, "loss": 1.4248, "step": 36480 }, { "epoch": 9.049538099076198, "grad_norm": 1.0380345582962036, "learning_rate": 4.419782143050633e-06, "loss": 1.4749, "step": 36490 }, { "epoch": 9.052018104036208, "grad_norm": 1.0402356386184692, "learning_rate": 4.396902891257626e-06, "loss": 1.3932, "step": 36500 }, { "epoch": 9.054498108996219, "grad_norm": 1.0118567943572998, "learning_rate": 4.374081679896902e-06, "loss": 1.3717, "step": 36510 }, { "epoch": 9.056978113956228, "grad_norm": 1.0979758501052856, "learning_rate": 4.351318522823133e-06, "loss": 1.3867, "step": 36520 }, { "epoch": 9.059458118916238, "grad_norm": 0.9887623190879822, "learning_rate": 4.32861343385581e-06, "loss": 1.2905, "step": 36530 }, { "epoch": 9.061938123876248, "grad_norm": 1.1138519048690796, "learning_rate": 4.305966426779118e-06, "loss": 1.4328, "step": 36540 }, { "epoch": 9.064418128836257, "grad_norm": 1.0642791986465454, "learning_rate": 4.2833775153420245e-06, "loss": 1.3976, "step": 36550 }, { "epoch": 9.066898133796268, "grad_norm": 0.9739475846290588, "learning_rate": 4.260846713258193e-06, "loss": 1.4238, "step": 36560 }, { "epoch": 9.069378138756278, "grad_norm": 1.1281789541244507, "learning_rate": 4.238374034206028e-06, "loss": 1.4407, "step": 36570 }, { "epoch": 9.071858143716288, "grad_norm": 1.0865991115570068, "learning_rate": 4.215959491828681e-06, "loss": 1.3598, "step": 36580 }, { "epoch": 9.074338148676297, "grad_norm": 1.1083083152770996, "learning_rate": 4.1936030997339e-06, "loss": 1.4213, "step": 36590 }, { "epoch": 9.076818153636307, "grad_norm": 1.1080453395843506, "learning_rate": 4.171304871494264e-06, "loss": 1.4361, "step": 36600 }, { "epoch": 9.079298158596318, "grad_norm": 1.1190769672393799, "learning_rate": 4.149064820646953e-06, "loss": 1.4086, "step": 36610 }, { "epoch": 9.081778163556327, "grad_norm": 1.0619183778762817, "learning_rate": 4.126882960693868e-06, "loss": 1.4103, "step": 36620 }, { "epoch": 9.084258168516337, "grad_norm": 1.069216012954712, "learning_rate": 4.104759305101525e-06, "loss": 1.3868, "step": 36630 }, { "epoch": 9.086738173476347, "grad_norm": 1.0546233654022217, "learning_rate": 4.082693867301224e-06, "loss": 1.3681, "step": 36640 }, { "epoch": 9.089218178436356, "grad_norm": 1.0817266702651978, "learning_rate": 4.060686660688762e-06, "loss": 1.3694, "step": 36650 }, { "epoch": 9.091698183396367, "grad_norm": 1.005463719367981, "learning_rate": 4.0387376986247174e-06, "loss": 1.4149, "step": 36660 }, { "epoch": 9.094178188356377, "grad_norm": 1.0748347043991089, "learning_rate": 4.016846994434243e-06, "loss": 1.4455, "step": 36670 }, { "epoch": 9.096658193316387, "grad_norm": 1.0070174932479858, "learning_rate": 3.99501456140714e-06, "loss": 1.3758, "step": 36680 }, { "epoch": 9.099138198276396, "grad_norm": 1.0930790901184082, "learning_rate": 3.973240412797819e-06, "loss": 1.3981, "step": 36690 }, { "epoch": 9.101618203236406, "grad_norm": 0.9995220303535461, "learning_rate": 3.951524561825337e-06, "loss": 1.3719, "step": 36700 }, { "epoch": 9.104098208196417, "grad_norm": 0.9938735365867615, "learning_rate": 3.929867021673328e-06, "loss": 1.4177, "step": 36710 }, { "epoch": 9.106578213156427, "grad_norm": 1.043028712272644, "learning_rate": 3.908267805490051e-06, "loss": 1.3514, "step": 36720 }, { "epoch": 9.109058218116436, "grad_norm": 1.078033208847046, "learning_rate": 3.886726926388329e-06, "loss": 1.4582, "step": 36730 }, { "epoch": 9.111538223076446, "grad_norm": 0.9632874131202698, "learning_rate": 3.865244397445577e-06, "loss": 1.3785, "step": 36740 }, { "epoch": 9.114018228036455, "grad_norm": 1.1127055883407593, "learning_rate": 3.8438202317037986e-06, "loss": 1.3847, "step": 36750 }, { "epoch": 9.116498232996467, "grad_norm": 1.082115888595581, "learning_rate": 3.822454442169576e-06, "loss": 1.3243, "step": 36760 }, { "epoch": 9.118978237956476, "grad_norm": 1.1298651695251465, "learning_rate": 3.801147041814013e-06, "loss": 1.3943, "step": 36770 }, { "epoch": 9.121458242916486, "grad_norm": 1.0635226964950562, "learning_rate": 3.779898043572794e-06, "loss": 1.3355, "step": 36780 }, { "epoch": 9.123938247876495, "grad_norm": 1.1357967853546143, "learning_rate": 3.7587074603461227e-06, "loss": 1.4085, "step": 36790 }, { "epoch": 9.126418252836507, "grad_norm": 1.184017539024353, "learning_rate": 3.7375753049987973e-06, "loss": 1.4233, "step": 36800 }, { "epoch": 9.128898257796516, "grad_norm": 1.040810227394104, "learning_rate": 3.7165015903600555e-06, "loss": 1.4008, "step": 36810 }, { "epoch": 9.131378262756526, "grad_norm": 1.0753012895584106, "learning_rate": 3.6954863292237297e-06, "loss": 1.4354, "step": 36820 }, { "epoch": 9.133858267716535, "grad_norm": 0.9710498452186584, "learning_rate": 3.674529534348137e-06, "loss": 1.3908, "step": 36830 }, { "epoch": 9.136338272676545, "grad_norm": 1.0646116733551025, "learning_rate": 3.6536312184560993e-06, "loss": 1.4451, "step": 36840 }, { "epoch": 9.138818277636556, "grad_norm": 1.073171615600586, "learning_rate": 3.6327913942349357e-06, "loss": 1.3632, "step": 36850 }, { "epoch": 9.141298282596566, "grad_norm": 0.9970207214355469, "learning_rate": 3.6120100743364692e-06, "loss": 1.3601, "step": 36860 }, { "epoch": 9.143778287556575, "grad_norm": 1.032759666442871, "learning_rate": 3.5912872713769753e-06, "loss": 1.4208, "step": 36870 }, { "epoch": 9.146258292516585, "grad_norm": 1.0733903646469116, "learning_rate": 3.570622997937234e-06, "loss": 1.407, "step": 36880 }, { "epoch": 9.148738297476594, "grad_norm": 1.0444880723953247, "learning_rate": 3.550017266562489e-06, "loss": 1.4379, "step": 36890 }, { "epoch": 9.151218302436606, "grad_norm": 1.0926703214645386, "learning_rate": 3.529470089762421e-06, "loss": 1.4767, "step": 36900 }, { "epoch": 9.153698307396615, "grad_norm": 1.046228289604187, "learning_rate": 3.5089814800111643e-06, "loss": 1.4199, "step": 36910 }, { "epoch": 9.156178312356625, "grad_norm": 1.09345543384552, "learning_rate": 3.488551449747357e-06, "loss": 1.4341, "step": 36920 }, { "epoch": 9.158658317316634, "grad_norm": 0.9652727842330933, "learning_rate": 3.4681800113739783e-06, "loss": 1.3601, "step": 36930 }, { "epoch": 9.161138322276644, "grad_norm": 1.0334725379943848, "learning_rate": 3.4478671772585012e-06, "loss": 1.3328, "step": 36940 }, { "epoch": 9.163618327236655, "grad_norm": 1.0892077684402466, "learning_rate": 3.4276129597328064e-06, "loss": 1.4341, "step": 36950 }, { "epoch": 9.166098332196665, "grad_norm": 1.0149070024490356, "learning_rate": 3.40741737109318e-06, "loss": 1.4402, "step": 36960 }, { "epoch": 9.168578337156674, "grad_norm": 1.1309213638305664, "learning_rate": 3.3872804236003143e-06, "loss": 1.4286, "step": 36970 }, { "epoch": 9.171058342116684, "grad_norm": 1.0236012935638428, "learning_rate": 3.3672021294793297e-06, "loss": 1.3888, "step": 36980 }, { "epoch": 9.173538347076693, "grad_norm": 1.181558609008789, "learning_rate": 3.3471825009196766e-06, "loss": 1.3897, "step": 36990 }, { "epoch": 9.176018352036705, "grad_norm": 1.0703715085983276, "learning_rate": 3.3272215500752658e-06, "loss": 1.4069, "step": 37000 }, { "epoch": 9.178498356996714, "grad_norm": 1.0862619876861572, "learning_rate": 3.307319289064337e-06, "loss": 1.4278, "step": 37010 }, { "epoch": 9.180978361956724, "grad_norm": 1.0121700763702393, "learning_rate": 3.2874757299694913e-06, "loss": 1.3398, "step": 37020 }, { "epoch": 9.183458366916733, "grad_norm": 1.0737900733947754, "learning_rate": 3.267690884837726e-06, "loss": 1.3361, "step": 37030 }, { "epoch": 9.185938371876743, "grad_norm": 1.0572049617767334, "learning_rate": 3.247964765680389e-06, "loss": 1.3766, "step": 37040 }, { "epoch": 9.188418376836754, "grad_norm": 1.0468642711639404, "learning_rate": 3.2282973844731224e-06, "loss": 1.3749, "step": 37050 }, { "epoch": 9.190898381796764, "grad_norm": 1.0476771593093872, "learning_rate": 3.2086887531559973e-06, "loss": 1.3749, "step": 37060 }, { "epoch": 9.193378386756773, "grad_norm": 1.0572019815444946, "learning_rate": 3.189138883633347e-06, "loss": 1.3872, "step": 37070 }, { "epoch": 9.195858391716783, "grad_norm": 1.0561579465866089, "learning_rate": 3.169647787773866e-06, "loss": 1.3807, "step": 37080 }, { "epoch": 9.198338396676794, "grad_norm": 1.0616618394851685, "learning_rate": 3.150215477410534e-06, "loss": 1.4009, "step": 37090 }, { "epoch": 9.200818401636804, "grad_norm": 1.012891173362732, "learning_rate": 3.130841964340692e-06, "loss": 1.353, "step": 37100 }, { "epoch": 9.203298406596813, "grad_norm": 1.1254786252975464, "learning_rate": 3.1115272603259416e-06, "loss": 1.391, "step": 37110 }, { "epoch": 9.205778411556823, "grad_norm": 1.0111192464828491, "learning_rate": 3.092271377092215e-06, "loss": 1.3792, "step": 37120 }, { "epoch": 9.208258416516832, "grad_norm": 1.0976078510284424, "learning_rate": 3.0730743263296945e-06, "loss": 1.3778, "step": 37130 }, { "epoch": 9.210738421476844, "grad_norm": 1.0674266815185547, "learning_rate": 3.0539361196928682e-06, "loss": 1.3974, "step": 37140 }, { "epoch": 9.213218426436853, "grad_norm": 1.074644684791565, "learning_rate": 3.034856768800509e-06, "loss": 1.4434, "step": 37150 }, { "epoch": 9.215698431396863, "grad_norm": 1.0942853689193726, "learning_rate": 3.0158362852356626e-06, "loss": 1.4539, "step": 37160 }, { "epoch": 9.218178436356872, "grad_norm": 1.0741316080093384, "learning_rate": 2.996874680545603e-06, "loss": 1.3604, "step": 37170 }, { "epoch": 9.220658441316882, "grad_norm": 1.0775376558303833, "learning_rate": 2.977971966241877e-06, "loss": 1.4091, "step": 37180 }, { "epoch": 9.223138446276893, "grad_norm": 1.051910400390625, "learning_rate": 2.9591281538003167e-06, "loss": 1.3887, "step": 37190 }, { "epoch": 9.225618451236903, "grad_norm": 0.9799169898033142, "learning_rate": 2.9403432546609043e-06, "loss": 1.3789, "step": 37200 }, { "epoch": 9.228098456196912, "grad_norm": 1.0778720378875732, "learning_rate": 2.9216172802279507e-06, "loss": 1.3752, "step": 37210 }, { "epoch": 9.230578461156922, "grad_norm": 1.0666970014572144, "learning_rate": 2.9029502418699505e-06, "loss": 1.3777, "step": 37220 }, { "epoch": 9.233058466116931, "grad_norm": 1.0108171701431274, "learning_rate": 2.8843421509196057e-06, "loss": 1.3827, "step": 37230 }, { "epoch": 9.235538471076943, "grad_norm": 1.0825223922729492, "learning_rate": 2.8657930186738567e-06, "loss": 1.3398, "step": 37240 }, { "epoch": 9.238018476036952, "grad_norm": 1.0724096298217773, "learning_rate": 2.8473028563938407e-06, "loss": 1.3843, "step": 37250 }, { "epoch": 9.240498480996962, "grad_norm": 1.083712100982666, "learning_rate": 2.8288716753049005e-06, "loss": 1.4467, "step": 37260 }, { "epoch": 9.242978485956971, "grad_norm": 1.1053098440170288, "learning_rate": 2.810499486596563e-06, "loss": 1.4234, "step": 37270 }, { "epoch": 9.245458490916981, "grad_norm": 1.0849559307098389, "learning_rate": 2.7921863014225503e-06, "loss": 1.3446, "step": 37280 }, { "epoch": 9.247938495876992, "grad_norm": 1.0642001628875732, "learning_rate": 2.773932130900736e-06, "loss": 1.3362, "step": 37290 }, { "epoch": 9.250418500837002, "grad_norm": 1.1150140762329102, "learning_rate": 2.7557369861132e-06, "loss": 1.4089, "step": 37300 }, { "epoch": 9.252898505797011, "grad_norm": 1.0477197170257568, "learning_rate": 2.7376008781061835e-06, "loss": 1.4083, "step": 37310 }, { "epoch": 9.255378510757021, "grad_norm": 1.0632885694503784, "learning_rate": 2.7195238178900684e-06, "loss": 1.396, "step": 37320 }, { "epoch": 9.257858515717032, "grad_norm": 1.0281466245651245, "learning_rate": 2.701505816439409e-06, "loss": 1.4331, "step": 37330 }, { "epoch": 9.260338520677042, "grad_norm": 0.9859536290168762, "learning_rate": 2.683546884692878e-06, "loss": 1.3713, "step": 37340 }, { "epoch": 9.262818525637051, "grad_norm": 1.028248906135559, "learning_rate": 2.6656470335533092e-06, "loss": 1.4008, "step": 37350 }, { "epoch": 9.265298530597061, "grad_norm": 1.0284212827682495, "learning_rate": 2.647806273887665e-06, "loss": 1.3948, "step": 37360 }, { "epoch": 9.26777853555707, "grad_norm": 1.0324586629867554, "learning_rate": 2.630024616527038e-06, "loss": 1.3841, "step": 37370 }, { "epoch": 9.270258540517082, "grad_norm": 1.093057632446289, "learning_rate": 2.612302072266637e-06, "loss": 1.4453, "step": 37380 }, { "epoch": 9.272738545477091, "grad_norm": 1.0433088541030884, "learning_rate": 2.594638651865766e-06, "loss": 1.4044, "step": 37390 }, { "epoch": 9.275218550437101, "grad_norm": 1.1759510040283203, "learning_rate": 2.577034366047848e-06, "loss": 1.5191, "step": 37400 }, { "epoch": 9.27769855539711, "grad_norm": 0.9739142060279846, "learning_rate": 2.5594892255004334e-06, "loss": 1.378, "step": 37410 }, { "epoch": 9.28017856035712, "grad_norm": 1.0965644121170044, "learning_rate": 2.5420032408751014e-06, "loss": 1.4267, "step": 37420 }, { "epoch": 9.282658565317131, "grad_norm": 1.0043507814407349, "learning_rate": 2.5245764227875722e-06, "loss": 1.4089, "step": 37430 }, { "epoch": 9.285138570277141, "grad_norm": 1.0738314390182495, "learning_rate": 2.5072087818176382e-06, "loss": 1.3901, "step": 37440 }, { "epoch": 9.28761857523715, "grad_norm": 1.0553945302963257, "learning_rate": 2.4899003285091536e-06, "loss": 1.4073, "step": 37450 }, { "epoch": 9.29009858019716, "grad_norm": 1.0140724182128906, "learning_rate": 2.4726510733700246e-06, "loss": 1.3535, "step": 37460 }, { "epoch": 9.29257858515717, "grad_norm": 0.9664640426635742, "learning_rate": 2.4554610268722634e-06, "loss": 1.4218, "step": 37470 }, { "epoch": 9.295058590117181, "grad_norm": 1.074286937713623, "learning_rate": 2.438330199451877e-06, "loss": 1.354, "step": 37480 }, { "epoch": 9.29753859507719, "grad_norm": 1.097893476486206, "learning_rate": 2.421258601508991e-06, "loss": 1.3618, "step": 37490 }, { "epoch": 9.3000186000372, "grad_norm": 0.9826263785362244, "learning_rate": 2.404246243407704e-06, "loss": 1.341, "step": 37500 }, { "epoch": 9.30249860499721, "grad_norm": 1.022720217704773, "learning_rate": 2.3872931354761983e-06, "loss": 1.3872, "step": 37510 }, { "epoch": 9.30497860995722, "grad_norm": 1.042087435722351, "learning_rate": 2.3703992880066638e-06, "loss": 1.3552, "step": 37520 }, { "epoch": 9.30745861491723, "grad_norm": 1.06119966506958, "learning_rate": 2.3535647112553294e-06, "loss": 1.3845, "step": 37530 }, { "epoch": 9.30993861987724, "grad_norm": 1.0763623714447021, "learning_rate": 2.3367894154424087e-06, "loss": 1.3719, "step": 37540 }, { "epoch": 9.31241862483725, "grad_norm": 1.0972094535827637, "learning_rate": 2.320073410752155e-06, "loss": 1.3893, "step": 37550 }, { "epoch": 9.31489862979726, "grad_norm": 1.0489115715026855, "learning_rate": 2.3034167073328284e-06, "loss": 1.3093, "step": 37560 }, { "epoch": 9.317378634757269, "grad_norm": 1.042915940284729, "learning_rate": 2.2868193152966734e-06, "loss": 1.3327, "step": 37570 }, { "epoch": 9.31985863971728, "grad_norm": 1.0937442779541016, "learning_rate": 2.2702812447199185e-06, "loss": 1.4009, "step": 37580 }, { "epoch": 9.32233864467729, "grad_norm": 1.0158263444900513, "learning_rate": 2.2538025056428214e-06, "loss": 1.4095, "step": 37590 }, { "epoch": 9.3248186496373, "grad_norm": 1.0596281290054321, "learning_rate": 2.237383108069546e-06, "loss": 1.3821, "step": 37600 }, { "epoch": 9.327298654597309, "grad_norm": 1.0023540258407593, "learning_rate": 2.2210230619683083e-06, "loss": 1.4085, "step": 37610 }, { "epoch": 9.32977865955732, "grad_norm": 1.0688737630844116, "learning_rate": 2.20472237727124e-06, "loss": 1.3995, "step": 37620 }, { "epoch": 9.33225866451733, "grad_norm": 1.1530110836029053, "learning_rate": 2.188481063874448e-06, "loss": 1.4486, "step": 37630 }, { "epoch": 9.334738669477339, "grad_norm": 1.109735131263733, "learning_rate": 2.1722991316380003e-06, "loss": 1.4128, "step": 37640 }, { "epoch": 9.337218674437349, "grad_norm": 1.0421795845031738, "learning_rate": 2.1561765903859275e-06, "loss": 1.475, "step": 37650 }, { "epoch": 9.339698679397358, "grad_norm": 1.0335205793380737, "learning_rate": 2.140113449906167e-06, "loss": 1.3753, "step": 37660 }, { "epoch": 9.34217868435737, "grad_norm": 0.9735974669456482, "learning_rate": 2.12410971995064e-06, "loss": 1.3865, "step": 37670 }, { "epoch": 9.344658689317379, "grad_norm": 1.105315089225769, "learning_rate": 2.1081654102351635e-06, "loss": 1.4218, "step": 37680 }, { "epoch": 9.347138694277389, "grad_norm": 1.0441200733184814, "learning_rate": 2.0922805304394832e-06, "loss": 1.3724, "step": 37690 }, { "epoch": 9.349618699237398, "grad_norm": 1.056541919708252, "learning_rate": 2.0764550902072854e-06, "loss": 1.3433, "step": 37700 }, { "epoch": 9.352098704197408, "grad_norm": 1.038950800895691, "learning_rate": 2.0606890991461737e-06, "loss": 1.3924, "step": 37710 }, { "epoch": 9.354578709157419, "grad_norm": 0.9514971375465393, "learning_rate": 2.0449825668276244e-06, "loss": 1.4014, "step": 37720 }, { "epoch": 9.357058714117429, "grad_norm": 1.1039893627166748, "learning_rate": 2.0293355027870554e-06, "loss": 1.4024, "step": 37730 }, { "epoch": 9.359538719077438, "grad_norm": 1.0915133953094482, "learning_rate": 2.013747916523756e-06, "loss": 1.4493, "step": 37740 }, { "epoch": 9.362018724037448, "grad_norm": 1.0225774049758911, "learning_rate": 1.998219817500924e-06, "loss": 1.4054, "step": 37750 }, { "epoch": 9.364498728997457, "grad_norm": 1.0327415466308594, "learning_rate": 1.9827512151456173e-06, "loss": 1.3863, "step": 37760 }, { "epoch": 9.366978733957469, "grad_norm": 1.0993423461914062, "learning_rate": 1.9673421188488137e-06, "loss": 1.407, "step": 37770 }, { "epoch": 9.369458738917478, "grad_norm": 1.0667732954025269, "learning_rate": 1.951992537965319e-06, "loss": 1.3781, "step": 37780 }, { "epoch": 9.371938743877488, "grad_norm": 1.065179467201233, "learning_rate": 1.936702481813857e-06, "loss": 1.3953, "step": 37790 }, { "epoch": 9.374418748837497, "grad_norm": 1.0528008937835693, "learning_rate": 1.921471959676957e-06, "loss": 1.3768, "step": 37800 }, { "epoch": 9.376898753797507, "grad_norm": 1.0844627618789673, "learning_rate": 1.9063009808010569e-06, "loss": 1.4497, "step": 37810 }, { "epoch": 9.379378758757518, "grad_norm": 1.1173009872436523, "learning_rate": 1.891189554396422e-06, "loss": 1.3281, "step": 37820 }, { "epoch": 9.381858763717528, "grad_norm": 1.0677911043167114, "learning_rate": 1.8761376896371697e-06, "loss": 1.4197, "step": 37830 }, { "epoch": 9.384338768677537, "grad_norm": 1.0368330478668213, "learning_rate": 1.8611453956612347e-06, "loss": 1.4121, "step": 37840 }, { "epoch": 9.386818773637547, "grad_norm": 1.025154948234558, "learning_rate": 1.846212681570425e-06, "loss": 1.3365, "step": 37850 }, { "epoch": 9.389298778597556, "grad_norm": 1.0957260131835938, "learning_rate": 1.831339556430356e-06, "loss": 1.4194, "step": 37860 }, { "epoch": 9.391778783557568, "grad_norm": 1.013924479484558, "learning_rate": 1.8165260292704711e-06, "loss": 1.3767, "step": 37870 }, { "epoch": 9.394258788517577, "grad_norm": 0.9932572841644287, "learning_rate": 1.8017721090840322e-06, "loss": 1.3851, "step": 37880 }, { "epoch": 9.396738793477587, "grad_norm": 1.075989842414856, "learning_rate": 1.7870778048280966e-06, "loss": 1.4236, "step": 37890 }, { "epoch": 9.399218798437596, "grad_norm": 1.0282961130142212, "learning_rate": 1.7724431254235618e-06, "loss": 1.3502, "step": 37900 }, { "epoch": 9.401698803397608, "grad_norm": 1.0960266590118408, "learning_rate": 1.7578680797550872e-06, "loss": 1.3932, "step": 37910 }, { "epoch": 9.404178808357617, "grad_norm": 1.0175817012786865, "learning_rate": 1.7433526766711728e-06, "loss": 1.3902, "step": 37920 }, { "epoch": 9.406658813317627, "grad_norm": 1.090591311454773, "learning_rate": 1.7288969249840915e-06, "loss": 1.4048, "step": 37930 }, { "epoch": 9.409138818277636, "grad_norm": 1.071060299873352, "learning_rate": 1.7145008334698898e-06, "loss": 1.3387, "step": 37940 }, { "epoch": 9.411618823237646, "grad_norm": 1.0652714967727661, "learning_rate": 1.7001644108683878e-06, "loss": 1.2656, "step": 37950 }, { "epoch": 9.414098828197657, "grad_norm": 1.066692590713501, "learning_rate": 1.6858876658832233e-06, "loss": 1.4334, "step": 37960 }, { "epoch": 9.416578833157667, "grad_norm": 1.169921875, "learning_rate": 1.6716706071817633e-06, "loss": 1.3824, "step": 37970 }, { "epoch": 9.419058838117676, "grad_norm": 0.9965091943740845, "learning_rate": 1.657513243395159e-06, "loss": 1.4842, "step": 37980 }, { "epoch": 9.421538843077686, "grad_norm": 1.088774561882019, "learning_rate": 1.6434155831183018e-06, "loss": 1.4068, "step": 37990 }, { "epoch": 9.424018848037695, "grad_norm": 1.0832850933074951, "learning_rate": 1.6293776349098677e-06, "loss": 1.4314, "step": 38000 }, { "epoch": 9.426498852997707, "grad_norm": 1.0543954372406006, "learning_rate": 1.6153994072922506e-06, "loss": 1.4288, "step": 38010 }, { "epoch": 9.428978857957716, "grad_norm": 0.9991889595985413, "learning_rate": 1.6014809087516291e-06, "loss": 1.4025, "step": 38020 }, { "epoch": 9.431458862917726, "grad_norm": 1.0605100393295288, "learning_rate": 1.587622147737855e-06, "loss": 1.3978, "step": 38030 }, { "epoch": 9.433938867877735, "grad_norm": 1.0560320615768433, "learning_rate": 1.5738231326645758e-06, "loss": 1.4002, "step": 38040 }, { "epoch": 9.436418872837745, "grad_norm": 0.9635500311851501, "learning_rate": 1.5600838719091348e-06, "loss": 1.392, "step": 38050 }, { "epoch": 9.438898877797756, "grad_norm": 1.1183180809020996, "learning_rate": 1.5464043738126045e-06, "loss": 1.4105, "step": 38060 }, { "epoch": 9.441378882757766, "grad_norm": 1.0084953308105469, "learning_rate": 1.5327846466797856e-06, "loss": 1.3811, "step": 38070 }, { "epoch": 9.443858887717775, "grad_norm": 1.0079189538955688, "learning_rate": 1.5192246987791981e-06, "loss": 1.384, "step": 38080 }, { "epoch": 9.446338892677785, "grad_norm": 1.1129961013793945, "learning_rate": 1.505724538343023e-06, "loss": 1.37, "step": 38090 }, { "epoch": 9.448818897637794, "grad_norm": 1.0100232362747192, "learning_rate": 1.4922841735672156e-06, "loss": 1.3868, "step": 38100 }, { "epoch": 9.451298902597806, "grad_norm": 1.0200841426849365, "learning_rate": 1.4789036126113597e-06, "loss": 1.3904, "step": 38110 }, { "epoch": 9.453778907557815, "grad_norm": 1.0760002136230469, "learning_rate": 1.465582863598791e-06, "loss": 1.35, "step": 38120 }, { "epoch": 9.456258912517825, "grad_norm": 0.9961051344871521, "learning_rate": 1.4523219346164852e-06, "loss": 1.3225, "step": 38130 }, { "epoch": 9.458738917477834, "grad_norm": 1.0708684921264648, "learning_rate": 1.4391208337151463e-06, "loss": 1.3798, "step": 38140 }, { "epoch": 9.461218922437844, "grad_norm": 1.0574396848678589, "learning_rate": 1.4259795689090972e-06, "loss": 1.417, "step": 38150 }, { "epoch": 9.463698927397855, "grad_norm": 1.1168031692504883, "learning_rate": 1.4128981481764115e-06, "loss": 1.4336, "step": 38160 }, { "epoch": 9.466178932357865, "grad_norm": 1.0774537324905396, "learning_rate": 1.399876579458781e-06, "loss": 1.4477, "step": 38170 }, { "epoch": 9.468658937317874, "grad_norm": 1.075043797492981, "learning_rate": 1.3869148706615598e-06, "loss": 1.4456, "step": 38180 }, { "epoch": 9.471138942277884, "grad_norm": 1.0636255741119385, "learning_rate": 1.3740130296537646e-06, "loss": 1.3887, "step": 38190 }, { "epoch": 9.473618947237895, "grad_norm": 1.0367157459259033, "learning_rate": 1.3611710642681076e-06, "loss": 1.4313, "step": 38200 }, { "epoch": 9.476098952197905, "grad_norm": 1.1512954235076904, "learning_rate": 1.3483889823008965e-06, "loss": 1.3543, "step": 38210 }, { "epoch": 9.478578957157914, "grad_norm": 1.047049880027771, "learning_rate": 1.3356667915121025e-06, "loss": 1.3982, "step": 38220 }, { "epoch": 9.481058962117924, "grad_norm": 1.0412118434906006, "learning_rate": 1.3230044996253466e-06, "loss": 1.4216, "step": 38230 }, { "epoch": 9.483538967077934, "grad_norm": 1.0545369386672974, "learning_rate": 1.3104021143278911e-06, "loss": 1.3765, "step": 38240 }, { "epoch": 9.486018972037945, "grad_norm": 1.1080199480056763, "learning_rate": 1.2978596432705825e-06, "loss": 1.4318, "step": 38250 }, { "epoch": 9.488498976997954, "grad_norm": 1.0659513473510742, "learning_rate": 1.2853770940679633e-06, "loss": 1.3932, "step": 38260 }, { "epoch": 9.490978981957964, "grad_norm": 1.0260642766952515, "learning_rate": 1.2729544742981492e-06, "loss": 1.4366, "step": 38270 }, { "epoch": 9.493458986917974, "grad_norm": 1.0209259986877441, "learning_rate": 1.2605917915028742e-06, "loss": 1.3627, "step": 38280 }, { "epoch": 9.495938991877983, "grad_norm": 1.1583101749420166, "learning_rate": 1.2482890531875125e-06, "loss": 1.4544, "step": 38290 }, { "epoch": 9.498418996837994, "grad_norm": 1.0427088737487793, "learning_rate": 1.2360462668210227e-06, "loss": 1.3453, "step": 38300 }, { "epoch": 9.500899001798004, "grad_norm": 1.0863418579101562, "learning_rate": 1.2238634398359706e-06, "loss": 1.3817, "step": 38310 }, { "epoch": 9.503379006758014, "grad_norm": 1.1137123107910156, "learning_rate": 1.2117405796285286e-06, "loss": 1.3923, "step": 38320 }, { "epoch": 9.505859011718023, "grad_norm": 1.0869648456573486, "learning_rate": 1.1996776935584653e-06, "loss": 1.3358, "step": 38330 }, { "epoch": 9.508339016678033, "grad_norm": 1.069647192955017, "learning_rate": 1.1876747889491223e-06, "loss": 1.4135, "step": 38340 }, { "epoch": 9.510819021638044, "grad_norm": 1.0883690118789673, "learning_rate": 1.1757318730874379e-06, "loss": 1.3843, "step": 38350 }, { "epoch": 9.513299026598053, "grad_norm": 1.023659586906433, "learning_rate": 1.163848953223934e-06, "loss": 1.4235, "step": 38360 }, { "epoch": 9.515779031558063, "grad_norm": 1.1154769659042358, "learning_rate": 1.1520260365726954e-06, "loss": 1.4074, "step": 38370 }, { "epoch": 9.518259036518073, "grad_norm": 1.0261458158493042, "learning_rate": 1.1402631303114032e-06, "loss": 1.4057, "step": 38380 }, { "epoch": 9.520739041478082, "grad_norm": 1.0308420658111572, "learning_rate": 1.128560241581289e-06, "loss": 1.4169, "step": 38390 }, { "epoch": 9.523219046438093, "grad_norm": 1.0664455890655518, "learning_rate": 1.1169173774871478e-06, "loss": 1.4453, "step": 38400 }, { "epoch": 9.525699051398103, "grad_norm": 1.019059419631958, "learning_rate": 1.1053345450973362e-06, "loss": 1.4105, "step": 38410 }, { "epoch": 9.528179056358113, "grad_norm": 1.0536880493164062, "learning_rate": 1.093811751443774e-06, "loss": 1.4134, "step": 38420 }, { "epoch": 9.530659061318122, "grad_norm": 1.056960940361023, "learning_rate": 1.0823490035218987e-06, "loss": 1.4193, "step": 38430 }, { "epoch": 9.533139066278132, "grad_norm": 1.0073970556259155, "learning_rate": 1.0709463082907545e-06, "loss": 1.4197, "step": 38440 }, { "epoch": 9.535619071238143, "grad_norm": 1.1760201454162598, "learning_rate": 1.0596036726728819e-06, "loss": 1.378, "step": 38450 }, { "epoch": 9.538099076198153, "grad_norm": 1.0135118961334229, "learning_rate": 1.0483211035543504e-06, "loss": 1.4017, "step": 38460 }, { "epoch": 9.540579081158162, "grad_norm": 1.0354691743850708, "learning_rate": 1.0370986077848145e-06, "loss": 1.4306, "step": 38470 }, { "epoch": 9.543059086118172, "grad_norm": 1.0982271432876587, "learning_rate": 1.0259361921774013e-06, "loss": 1.4364, "step": 38480 }, { "epoch": 9.545539091078183, "grad_norm": 1.0671141147613525, "learning_rate": 1.0148338635088018e-06, "loss": 1.3727, "step": 38490 }, { "epoch": 9.548019096038193, "grad_norm": 1.1574429273605347, "learning_rate": 1.003791628519213e-06, "loss": 1.4063, "step": 38500 }, { "epoch": 9.550499100998202, "grad_norm": 1.0300366878509521, "learning_rate": 9.9280949391235e-07, "loss": 1.4205, "step": 38510 }, { "epoch": 9.552979105958212, "grad_norm": 1.1044412851333618, "learning_rate": 9.818874663554357e-07, "loss": 1.401, "step": 38520 }, { "epoch": 9.555459110918221, "grad_norm": 1.1097668409347534, "learning_rate": 9.710255524792322e-07, "loss": 1.4522, "step": 38530 }, { "epoch": 9.557939115878233, "grad_norm": 0.9702837467193604, "learning_rate": 9.60223758877965e-07, "loss": 1.3913, "step": 38540 }, { "epoch": 9.560419120838242, "grad_norm": 1.0994696617126465, "learning_rate": 9.494820921093772e-07, "loss": 1.4454, "step": 38550 }, { "epoch": 9.562899125798252, "grad_norm": 1.0229690074920654, "learning_rate": 9.388005586947191e-07, "loss": 1.3578, "step": 38560 }, { "epoch": 9.565379130758261, "grad_norm": 1.0709983110427856, "learning_rate": 9.281791651187366e-07, "loss": 1.4071, "step": 38570 }, { "epoch": 9.56785913571827, "grad_norm": 1.0604861974716187, "learning_rate": 9.176179178296385e-07, "loss": 1.4025, "step": 38580 }, { "epoch": 9.570339140678282, "grad_norm": 0.9834784865379333, "learning_rate": 9.071168232391292e-07, "loss": 1.3858, "step": 38590 }, { "epoch": 9.572819145638292, "grad_norm": 1.063391089439392, "learning_rate": 8.966758877224201e-07, "loss": 1.4405, "step": 38600 }, { "epoch": 9.575299150598301, "grad_norm": 1.0823516845703125, "learning_rate": 8.862951176181744e-07, "loss": 1.3624, "step": 38610 }, { "epoch": 9.57777915555831, "grad_norm": 0.991945743560791, "learning_rate": 8.759745192285285e-07, "loss": 1.3768, "step": 38620 }, { "epoch": 9.58025916051832, "grad_norm": 1.09054696559906, "learning_rate": 8.657140988191037e-07, "loss": 1.4156, "step": 38630 }, { "epoch": 9.582739165478332, "grad_norm": 0.9679309129714966, "learning_rate": 8.555138626189618e-07, "loss": 1.329, "step": 38640 }, { "epoch": 9.585219170438341, "grad_norm": 1.0478179454803467, "learning_rate": 8.453738168206604e-07, "loss": 1.4078, "step": 38650 }, { "epoch": 9.58769917539835, "grad_norm": 1.062233567237854, "learning_rate": 8.352939675801975e-07, "loss": 1.4452, "step": 38660 }, { "epoch": 9.59017918035836, "grad_norm": 1.1104792356491089, "learning_rate": 8.252743210170222e-07, "loss": 1.391, "step": 38670 }, { "epoch": 9.59265918531837, "grad_norm": 1.1112204790115356, "learning_rate": 8.153148832140466e-07, "loss": 1.349, "step": 38680 }, { "epoch": 9.595139190278381, "grad_norm": 1.0226380825042725, "learning_rate": 8.054156602176233e-07, "loss": 1.4159, "step": 38690 }, { "epoch": 9.59761919523839, "grad_norm": 1.1281567811965942, "learning_rate": 7.955766580375335e-07, "loss": 1.407, "step": 38700 }, { "epoch": 9.6000992001984, "grad_norm": 1.0717427730560303, "learning_rate": 7.857978826470325e-07, "loss": 1.4154, "step": 38710 }, { "epoch": 9.60257920515841, "grad_norm": 1.0515215396881104, "learning_rate": 7.760793399827937e-07, "loss": 1.4024, "step": 38720 }, { "epoch": 9.60505921011842, "grad_norm": 1.0398309230804443, "learning_rate": 7.664210359449086e-07, "loss": 1.3898, "step": 38730 }, { "epoch": 9.60753921507843, "grad_norm": 1.1013152599334717, "learning_rate": 7.568229763969204e-07, "loss": 1.4075, "step": 38740 }, { "epoch": 9.61001922003844, "grad_norm": 1.0536187887191772, "learning_rate": 7.47285167165801e-07, "loss": 1.3514, "step": 38750 }, { "epoch": 9.61249922499845, "grad_norm": 1.0260318517684937, "learning_rate": 7.378076140419187e-07, "loss": 1.3927, "step": 38760 }, { "epoch": 9.61497922995846, "grad_norm": 1.0289767980575562, "learning_rate": 7.283903227790823e-07, "loss": 1.4149, "step": 38770 }, { "epoch": 9.61745923491847, "grad_norm": 1.11062753200531, "learning_rate": 7.19033299094496e-07, "loss": 1.3977, "step": 38780 }, { "epoch": 9.61993923987848, "grad_norm": 1.0465054512023926, "learning_rate": 7.097365486688157e-07, "loss": 1.4034, "step": 38790 }, { "epoch": 9.62241924483849, "grad_norm": 1.0386594533920288, "learning_rate": 7.00500077146038e-07, "loss": 1.3796, "step": 38800 }, { "epoch": 9.6248992497985, "grad_norm": 0.9937693476676941, "learning_rate": 6.913238901336549e-07, "loss": 1.3505, "step": 38810 }, { "epoch": 9.627379254758509, "grad_norm": 1.0787221193313599, "learning_rate": 6.822079932024661e-07, "loss": 1.4293, "step": 38820 }, { "epoch": 9.62985925971852, "grad_norm": 1.031509280204773, "learning_rate": 6.731523918867333e-07, "loss": 1.4084, "step": 38830 }, { "epoch": 9.63233926467853, "grad_norm": 1.0514459609985352, "learning_rate": 6.641570916840922e-07, "loss": 1.3825, "step": 38840 }, { "epoch": 9.63481926963854, "grad_norm": 1.0696451663970947, "learning_rate": 6.552220980555635e-07, "loss": 1.3867, "step": 38850 }, { "epoch": 9.637299274598549, "grad_norm": 1.0945204496383667, "learning_rate": 6.463474164255634e-07, "loss": 1.3632, "step": 38860 }, { "epoch": 9.639779279558558, "grad_norm": 1.0794506072998047, "learning_rate": 6.375330521818934e-07, "loss": 1.3657, "step": 38870 }, { "epoch": 9.64225928451857, "grad_norm": 1.0652384757995605, "learning_rate": 6.287790106757396e-07, "loss": 1.3519, "step": 38880 }, { "epoch": 9.64473928947858, "grad_norm": 0.9793534874916077, "learning_rate": 6.20085297221662e-07, "loss": 1.3986, "step": 38890 }, { "epoch": 9.647219294438589, "grad_norm": 1.1023114919662476, "learning_rate": 6.11451917097583e-07, "loss": 1.4228, "step": 38900 }, { "epoch": 9.649699299398598, "grad_norm": 1.117197871208191, "learning_rate": 6.028788755448211e-07, "loss": 1.4341, "step": 38910 }, { "epoch": 9.652179304358608, "grad_norm": 1.0328142642974854, "learning_rate": 5.943661777680354e-07, "loss": 1.4006, "step": 38920 }, { "epoch": 9.65465930931862, "grad_norm": 1.154806137084961, "learning_rate": 5.859138289352917e-07, "loss": 1.3936, "step": 38930 }, { "epoch": 9.657139314278629, "grad_norm": 1.0782853364944458, "learning_rate": 5.775218341779742e-07, "loss": 1.3953, "step": 38940 }, { "epoch": 9.659619319238638, "grad_norm": 0.9896028637886047, "learning_rate": 5.69190198590841e-07, "loss": 1.44, "step": 38950 }, { "epoch": 9.662099324198648, "grad_norm": 1.1631059646606445, "learning_rate": 5.609189272320237e-07, "loss": 1.3734, "step": 38960 }, { "epoch": 9.66457932915866, "grad_norm": 1.133226752281189, "learning_rate": 5.527080251229833e-07, "loss": 1.3878, "step": 38970 }, { "epoch": 9.667059334118669, "grad_norm": 1.1199979782104492, "learning_rate": 5.445574972485434e-07, "loss": 1.4203, "step": 38980 }, { "epoch": 9.669539339078678, "grad_norm": 1.095866084098816, "learning_rate": 5.364673485568794e-07, "loss": 1.3982, "step": 38990 }, { "epoch": 9.672019344038688, "grad_norm": 0.9899543523788452, "learning_rate": 5.284375839594957e-07, "loss": 1.3403, "step": 39000 }, { "epoch": 9.674499348998697, "grad_norm": 1.0644506216049194, "learning_rate": 5.204682083312484e-07, "loss": 1.4253, "step": 39010 }, { "epoch": 9.676979353958707, "grad_norm": 1.0117524862289429, "learning_rate": 5.125592265103118e-07, "loss": 1.4065, "step": 39020 }, { "epoch": 9.679459358918718, "grad_norm": 1.099260687828064, "learning_rate": 5.04710643298234e-07, "loss": 1.3436, "step": 39030 }, { "epoch": 9.681939363878728, "grad_norm": 1.042979121208191, "learning_rate": 4.969224634598591e-07, "loss": 1.4066, "step": 39040 }, { "epoch": 9.684419368838737, "grad_norm": 1.043346881866455, "learning_rate": 4.891946917233825e-07, "loss": 1.3617, "step": 39050 }, { "epoch": 9.686899373798747, "grad_norm": 1.0590424537658691, "learning_rate": 4.815273327803182e-07, "loss": 1.4236, "step": 39060 }, { "epoch": 9.689379378758758, "grad_norm": 1.0161995887756348, "learning_rate": 4.7392039128548725e-07, "loss": 1.3855, "step": 39070 }, { "epoch": 9.691859383718768, "grad_norm": 1.0723992586135864, "learning_rate": 4.6637387185706206e-07, "loss": 1.415, "step": 39080 }, { "epoch": 9.694339388678777, "grad_norm": 1.0755267143249512, "learning_rate": 4.5888777907652227e-07, "loss": 1.4233, "step": 39090 }, { "epoch": 9.696819393638787, "grad_norm": 1.0378844738006592, "learning_rate": 4.514621174886435e-07, "loss": 1.4311, "step": 39100 }, { "epoch": 9.699299398598797, "grad_norm": 0.9756118059158325, "learning_rate": 4.440968916015309e-07, "loss": 1.3828, "step": 39110 }, { "epoch": 9.701779403558808, "grad_norm": 1.0769411325454712, "learning_rate": 4.3679210588661866e-07, "loss": 1.439, "step": 39120 }, { "epoch": 9.704259408518817, "grad_norm": 1.0276910066604614, "learning_rate": 4.2954776477860393e-07, "loss": 1.3836, "step": 39130 }, { "epoch": 9.706739413478827, "grad_norm": 1.0128233432769775, "learning_rate": 4.22363872675513e-07, "loss": 1.3781, "step": 39140 }, { "epoch": 9.709219418438837, "grad_norm": 1.0176806449890137, "learning_rate": 4.152404339386795e-07, "loss": 1.4247, "step": 39150 }, { "epoch": 9.711699423398846, "grad_norm": 1.0066100358963013, "learning_rate": 4.081774528927218e-07, "loss": 1.3822, "step": 39160 }, { "epoch": 9.714179428358857, "grad_norm": 1.0204678773880005, "learning_rate": 4.011749338255544e-07, "loss": 1.4417, "step": 39170 }, { "epoch": 9.716659433318867, "grad_norm": 1.1039975881576538, "learning_rate": 3.942328809884099e-07, "loss": 1.3699, "step": 39180 }, { "epoch": 9.719139438278876, "grad_norm": 1.1025723218917847, "learning_rate": 3.873512985957728e-07, "loss": 1.4301, "step": 39190 }, { "epoch": 9.721619443238886, "grad_norm": 1.0049769878387451, "learning_rate": 3.805301908254455e-07, "loss": 1.3407, "step": 39200 }, { "epoch": 9.724099448198896, "grad_norm": 1.0349602699279785, "learning_rate": 3.737695618185155e-07, "loss": 1.3534, "step": 39210 }, { "epoch": 9.726579453158907, "grad_norm": 0.9897842407226562, "learning_rate": 3.6706941567933304e-07, "loss": 1.4331, "step": 39220 }, { "epoch": 9.729059458118916, "grad_norm": 1.050890564918518, "learning_rate": 3.6042975647554433e-07, "loss": 1.3952, "step": 39230 }, { "epoch": 9.731539463078926, "grad_norm": 1.050793170928955, "learning_rate": 3.5385058823809156e-07, "loss": 1.3724, "step": 39240 }, { "epoch": 9.734019468038936, "grad_norm": 1.1213107109069824, "learning_rate": 3.4733191496113536e-07, "loss": 1.3988, "step": 39250 }, { "epoch": 9.736499472998947, "grad_norm": 1.0219743251800537, "learning_rate": 3.4087374060218777e-07, "loss": 1.3956, "step": 39260 }, { "epoch": 9.738979477958956, "grad_norm": 1.0561084747314453, "learning_rate": 3.3447606908196817e-07, "loss": 1.4519, "step": 39270 }, { "epoch": 9.741459482918966, "grad_norm": 1.0141974687576294, "learning_rate": 3.281389042844918e-07, "loss": 1.4234, "step": 39280 }, { "epoch": 9.743939487878976, "grad_norm": 1.0682047605514526, "learning_rate": 3.2186225005704784e-07, "loss": 1.3464, "step": 39290 }, { "epoch": 9.746419492838985, "grad_norm": 1.016051173210144, "learning_rate": 3.156461102101771e-07, "loss": 1.3778, "step": 39300 }, { "epoch": 9.748899497798995, "grad_norm": 1.0662356615066528, "learning_rate": 3.0949048851767194e-07, "loss": 1.4213, "step": 39310 }, { "epoch": 9.751379502759006, "grad_norm": 1.0327116250991821, "learning_rate": 3.0339538871660965e-07, "loss": 1.3685, "step": 39320 }, { "epoch": 9.753859507719016, "grad_norm": 1.0809624195098877, "learning_rate": 2.9736081450730815e-07, "loss": 1.3436, "step": 39330 }, { "epoch": 9.756339512679025, "grad_norm": 1.104588508605957, "learning_rate": 2.9138676955333676e-07, "loss": 1.4181, "step": 39340 }, { "epoch": 9.758819517639035, "grad_norm": 1.0799038410186768, "learning_rate": 2.8547325748153885e-07, "loss": 1.3374, "step": 39350 }, { "epoch": 9.761299522599046, "grad_norm": 1.0426992177963257, "learning_rate": 2.7962028188198706e-07, "loss": 1.3495, "step": 39360 }, { "epoch": 9.763779527559056, "grad_norm": 1.0788627862930298, "learning_rate": 2.738278463080057e-07, "loss": 1.3959, "step": 39370 }, { "epoch": 9.766259532519065, "grad_norm": 1.0900304317474365, "learning_rate": 2.6809595427618183e-07, "loss": 1.4163, "step": 39380 }, { "epoch": 9.768739537479075, "grad_norm": 1.0566004514694214, "learning_rate": 2.624246092663318e-07, "loss": 1.3932, "step": 39390 }, { "epoch": 9.771219542439084, "grad_norm": 0.9841712713241577, "learning_rate": 2.5681381472151267e-07, "loss": 1.4414, "step": 39400 }, { "epoch": 9.773699547399096, "grad_norm": 1.0372689962387085, "learning_rate": 2.5126357404802183e-07, "loss": 1.3482, "step": 39410 }, { "epoch": 9.776179552359105, "grad_norm": 1.058873176574707, "learning_rate": 2.457738906153972e-07, "loss": 1.3571, "step": 39420 }, { "epoch": 9.778659557319115, "grad_norm": 1.0845701694488525, "learning_rate": 2.4034476775642856e-07, "loss": 1.3415, "step": 39430 }, { "epoch": 9.781139562279124, "grad_norm": 1.0763607025146484, "learning_rate": 2.3497620876711257e-07, "loss": 1.3309, "step": 39440 }, { "epoch": 9.783619567239134, "grad_norm": 1.013174057006836, "learning_rate": 2.2966821690669771e-07, "loss": 1.3979, "step": 39450 }, { "epoch": 9.786099572199145, "grad_norm": 1.1713688373565674, "learning_rate": 2.244207953976396e-07, "loss": 1.3639, "step": 39460 }, { "epoch": 9.788579577159155, "grad_norm": 1.0831536054611206, "learning_rate": 2.1923394742564553e-07, "loss": 1.4203, "step": 39470 }, { "epoch": 9.791059582119164, "grad_norm": 1.056071162223816, "learning_rate": 2.141076761396521e-07, "loss": 1.4416, "step": 39480 }, { "epoch": 9.793539587079174, "grad_norm": 1.087546706199646, "learning_rate": 2.0904198465178105e-07, "loss": 1.4253, "step": 39490 }, { "epoch": 9.796019592039183, "grad_norm": 1.104124903678894, "learning_rate": 2.0403687603742783e-07, "loss": 1.3681, "step": 39500 }, { "epoch": 9.798499596999195, "grad_norm": 1.0968530178070068, "learning_rate": 1.9909235333517295e-07, "loss": 1.4218, "step": 39510 }, { "epoch": 9.800979601959204, "grad_norm": 1.0319266319274902, "learning_rate": 1.9420841954681525e-07, "loss": 1.4737, "step": 39520 }, { "epoch": 9.803459606919214, "grad_norm": 1.1220463514328003, "learning_rate": 1.89385077637394e-07, "loss": 1.3877, "step": 39530 }, { "epoch": 9.805939611879223, "grad_norm": 1.0038042068481445, "learning_rate": 1.8462233053514467e-07, "loss": 1.3989, "step": 39540 }, { "epoch": 9.808419616839235, "grad_norm": 1.0417869091033936, "learning_rate": 1.7992018113152098e-07, "loss": 1.415, "step": 39550 }, { "epoch": 9.810899621799244, "grad_norm": 1.080538272857666, "learning_rate": 1.752786322811839e-07, "loss": 1.3935, "step": 39560 }, { "epoch": 9.813379626759254, "grad_norm": 1.0630862712860107, "learning_rate": 1.706976868020127e-07, "loss": 1.3429, "step": 39570 }, { "epoch": 9.815859631719263, "grad_norm": 1.1879829168319702, "learning_rate": 1.6617734747509383e-07, "loss": 1.3798, "step": 39580 }, { "epoch": 9.818339636679273, "grad_norm": 1.0357943773269653, "learning_rate": 1.6171761704469878e-07, "loss": 1.4031, "step": 39590 }, { "epoch": 9.820819641639282, "grad_norm": 1.0229921340942383, "learning_rate": 1.5731849821833954e-07, "loss": 1.3625, "step": 39600 }, { "epoch": 9.823299646599294, "grad_norm": 1.0634421110153198, "learning_rate": 1.5297999366671312e-07, "loss": 1.3808, "step": 39610 }, { "epoch": 9.825779651559303, "grad_norm": 1.0519945621490479, "learning_rate": 1.487021060236904e-07, "loss": 1.3687, "step": 39620 }, { "epoch": 9.828259656519313, "grad_norm": 1.0606703758239746, "learning_rate": 1.444848378863828e-07, "loss": 1.3501, "step": 39630 }, { "epoch": 9.830739661479322, "grad_norm": 1.1069635152816772, "learning_rate": 1.403281918150978e-07, "loss": 1.416, "step": 39640 }, { "epoch": 9.833219666439334, "grad_norm": 1.057266354560852, "learning_rate": 1.3623217033330583e-07, "loss": 1.3404, "step": 39650 }, { "epoch": 9.835699671399343, "grad_norm": 1.0354572534561157, "learning_rate": 1.3219677592770652e-07, "loss": 1.3787, "step": 39660 }, { "epoch": 9.838179676359353, "grad_norm": 1.0284291505813599, "learning_rate": 1.2822201104816246e-07, "loss": 1.3968, "step": 39670 }, { "epoch": 9.840659681319362, "grad_norm": 1.1412193775177002, "learning_rate": 1.2430787810776555e-07, "loss": 1.3948, "step": 39680 }, { "epoch": 9.843139686279372, "grad_norm": 1.154058814048767, "learning_rate": 1.204543794827595e-07, "loss": 1.3643, "step": 39690 }, { "epoch": 9.845619691239383, "grad_norm": 1.053155541419983, "learning_rate": 1.1666151751260623e-07, "loss": 1.3957, "step": 39700 }, { "epoch": 9.848099696199393, "grad_norm": 1.0365995168685913, "learning_rate": 1.1292929449994161e-07, "loss": 1.3524, "step": 39710 }, { "epoch": 9.850579701159402, "grad_norm": 1.0026479959487915, "learning_rate": 1.0925771271058649e-07, "loss": 1.406, "step": 39720 }, { "epoch": 9.853059706119412, "grad_norm": 1.0312763452529907, "learning_rate": 1.0564677437355786e-07, "loss": 1.3892, "step": 39730 }, { "epoch": 9.855539711079421, "grad_norm": 1.0404436588287354, "learning_rate": 1.0209648168103547e-07, "loss": 1.442, "step": 39740 }, { "epoch": 9.858019716039433, "grad_norm": 1.1056069135665894, "learning_rate": 9.860683678840632e-08, "loss": 1.4487, "step": 39750 }, { "epoch": 9.860499720999442, "grad_norm": 1.0739343166351318, "learning_rate": 9.517784181422019e-08, "loss": 1.4275, "step": 39760 }, { "epoch": 9.862979725959452, "grad_norm": 1.0319334268569946, "learning_rate": 9.180949884022294e-08, "loss": 1.3365, "step": 39770 }, { "epoch": 9.865459730919461, "grad_norm": 1.0002667903900146, "learning_rate": 8.850180991131219e-08, "loss": 1.4084, "step": 39780 }, { "epoch": 9.867939735879471, "grad_norm": 1.0114107131958008, "learning_rate": 8.525477703558161e-08, "loss": 1.3684, "step": 39790 }, { "epoch": 9.870419740839482, "grad_norm": 1.0454875230789185, "learning_rate": 8.206840218430989e-08, "loss": 1.3802, "step": 39800 }, { "epoch": 9.872899745799492, "grad_norm": 1.1178107261657715, "learning_rate": 7.894268729192744e-08, "loss": 1.3888, "step": 39810 }, { "epoch": 9.875379750759501, "grad_norm": 1.1164809465408325, "learning_rate": 7.587763425604965e-08, "loss": 1.3838, "step": 39820 }, { "epoch": 9.877859755719511, "grad_norm": 1.0479484796524048, "learning_rate": 7.287324493747693e-08, "loss": 1.401, "step": 39830 }, { "epoch": 9.880339760679522, "grad_norm": 1.1312552690505981, "learning_rate": 6.992952116013918e-08, "loss": 1.3807, "step": 39840 }, { "epoch": 9.882819765639532, "grad_norm": 1.045345664024353, "learning_rate": 6.70464647111957e-08, "loss": 1.3687, "step": 39850 }, { "epoch": 9.885299770599541, "grad_norm": 1.0771009922027588, "learning_rate": 6.422407734092416e-08, "loss": 1.4415, "step": 39860 }, { "epoch": 9.887779775559551, "grad_norm": 1.0385736227035522, "learning_rate": 6.146236076279843e-08, "loss": 1.3948, "step": 39870 }, { "epoch": 9.89025978051956, "grad_norm": 1.1350256204605103, "learning_rate": 5.8761316653455076e-08, "loss": 1.3736, "step": 39880 }, { "epoch": 9.892739785479572, "grad_norm": 0.9889593720436096, "learning_rate": 5.612094665267131e-08, "loss": 1.3958, "step": 39890 }, { "epoch": 9.895219790439581, "grad_norm": 0.9963741302490234, "learning_rate": 5.354125236343155e-08, "loss": 1.4073, "step": 39900 }, { "epoch": 9.89769979539959, "grad_norm": 0.9665492177009583, "learning_rate": 5.102223535186079e-08, "loss": 1.3998, "step": 39910 }, { "epoch": 9.9001798003596, "grad_norm": 1.0084357261657715, "learning_rate": 4.856389714723575e-08, "loss": 1.4046, "step": 39920 }, { "epoch": 9.90265980531961, "grad_norm": 1.0370889902114868, "learning_rate": 4.616623924201813e-08, "loss": 1.4053, "step": 39930 }, { "epoch": 9.905139810279621, "grad_norm": 0.9871793985366821, "learning_rate": 4.3829263091810235e-08, "loss": 1.4169, "step": 39940 }, { "epoch": 9.90761981523963, "grad_norm": 1.0563249588012695, "learning_rate": 4.1552970115399384e-08, "loss": 1.3651, "step": 39950 }, { "epoch": 9.91009982019964, "grad_norm": 1.017209768295288, "learning_rate": 3.933736169471347e-08, "loss": 1.3772, "step": 39960 }, { "epoch": 9.91257982515965, "grad_norm": 1.0138312578201294, "learning_rate": 3.7182439174832106e-08, "loss": 1.4503, "step": 39970 }, { "epoch": 9.91505983011966, "grad_norm": 1.13675057888031, "learning_rate": 3.5088203864031e-08, "loss": 1.4347, "step": 39980 }, { "epoch": 9.91753983507967, "grad_norm": 1.0499796867370605, "learning_rate": 3.305465703368205e-08, "loss": 1.4008, "step": 39990 }, { "epoch": 9.92001984003968, "grad_norm": 1.042291522026062, "learning_rate": 3.1081799918375454e-08, "loss": 1.4099, "step": 40000 }, { "epoch": 9.92249984499969, "grad_norm": 1.1130834817886353, "learning_rate": 2.916963371580872e-08, "loss": 1.4223, "step": 40010 }, { "epoch": 9.9249798499597, "grad_norm": 1.0592601299285889, "learning_rate": 2.7318159586864344e-08, "loss": 1.3967, "step": 40020 }, { "epoch": 9.927459854919709, "grad_norm": 1.015381097793579, "learning_rate": 2.5527378655565427e-08, "loss": 1.3518, "step": 40030 }, { "epoch": 9.92993985987972, "grad_norm": 1.0501749515533447, "learning_rate": 2.379729200908676e-08, "loss": 1.3567, "step": 40040 }, { "epoch": 9.93241986483973, "grad_norm": 1.1451592445373535, "learning_rate": 2.2127900697777036e-08, "loss": 1.3506, "step": 40050 }, { "epoch": 9.93489986979974, "grad_norm": 1.1427299976348877, "learning_rate": 2.0519205735103353e-08, "loss": 1.3914, "step": 40060 }, { "epoch": 9.937379874759749, "grad_norm": 1.0566608905792236, "learning_rate": 1.897120809769559e-08, "loss": 1.4098, "step": 40070 }, { "epoch": 9.939859879719759, "grad_norm": 1.0093517303466797, "learning_rate": 1.7483908725357545e-08, "loss": 1.4334, "step": 40080 }, { "epoch": 9.94233988467977, "grad_norm": 1.0438250303268433, "learning_rate": 1.6057308521022498e-08, "loss": 1.3979, "step": 40090 }, { "epoch": 9.94481988963978, "grad_norm": 1.0427708625793457, "learning_rate": 1.4691408350764324e-08, "loss": 1.4356, "step": 40100 }, { "epoch": 9.947299894599789, "grad_norm": 1.1051512956619263, "learning_rate": 1.3386209043819708e-08, "loss": 1.3958, "step": 40110 }, { "epoch": 9.949779899559799, "grad_norm": 1.0951145887374878, "learning_rate": 1.2141711392588129e-08, "loss": 1.3822, "step": 40120 }, { "epoch": 9.95225990451981, "grad_norm": 1.099120855331421, "learning_rate": 1.0957916152587456e-08, "loss": 1.4432, "step": 40130 }, { "epoch": 9.95473990947982, "grad_norm": 1.1092506647109985, "learning_rate": 9.834824042498358e-09, "loss": 1.426, "step": 40140 }, { "epoch": 9.957219914439829, "grad_norm": 1.007125973701477, "learning_rate": 8.772435744153206e-09, "loss": 1.3725, "step": 40150 }, { "epoch": 9.959699919399839, "grad_norm": 1.0477224588394165, "learning_rate": 7.770751902513862e-09, "loss": 1.4066, "step": 40160 }, { "epoch": 9.962179924359848, "grad_norm": 1.011325716972351, "learning_rate": 6.829773125716088e-09, "loss": 1.3982, "step": 40170 }, { "epoch": 9.96465992931986, "grad_norm": 1.067104458808899, "learning_rate": 5.949499985025142e-09, "loss": 1.3502, "step": 40180 }, { "epoch": 9.967139934279869, "grad_norm": 1.0777113437652588, "learning_rate": 5.129933014835775e-09, "loss": 1.3502, "step": 40190 }, { "epoch": 9.969619939239879, "grad_norm": 1.0474942922592163, "learning_rate": 4.371072712727742e-09, "loss": 1.4274, "step": 40200 }, { "epoch": 9.972099944199888, "grad_norm": 1.0896284580230713, "learning_rate": 3.6729195393769843e-09, "loss": 1.4181, "step": 40210 }, { "epoch": 9.974579949159898, "grad_norm": 1.1275458335876465, "learning_rate": 3.0354739186555512e-09, "loss": 1.4095, "step": 40220 }, { "epoch": 9.977059954119909, "grad_norm": 1.0350884199142456, "learning_rate": 2.45873623754278e-09, "loss": 1.3916, "step": 40230 }, { "epoch": 9.979539959079919, "grad_norm": 1.0565330982208252, "learning_rate": 1.9427068461808083e-09, "loss": 1.4198, "step": 40240 }, { "epoch": 9.982019964039928, "grad_norm": 1.0948325395584106, "learning_rate": 1.4873860578412668e-09, "loss": 1.3361, "step": 40250 }, { "epoch": 9.984499968999938, "grad_norm": 1.0515812635421753, "learning_rate": 1.0927741489585862e-09, "loss": 1.4043, "step": 40260 }, { "epoch": 9.986979973959947, "grad_norm": 1.0280400514602661, "learning_rate": 7.588713590966911e-10, "loss": 1.3602, "step": 40270 }, { "epoch": 9.989459978919959, "grad_norm": 1.0958027839660645, "learning_rate": 4.856778909601012e-10, "loss": 1.4598, "step": 40280 }, { "epoch": 9.991939983879968, "grad_norm": 1.048204779624939, "learning_rate": 2.731939104161363e-10, "loss": 1.3762, "step": 40290 }, { "epoch": 9.994419988839978, "grad_norm": 1.0323026180267334, "learning_rate": 1.2141954645050747e-10, "loss": 1.399, "step": 40300 }, { "epoch": 9.996899993799987, "grad_norm": 1.0778690576553345, "learning_rate": 3.035489122282797e-11, "loss": 1.3887, "step": 40310 }, { "epoch": 9.999379998759997, "grad_norm": 1.0634547472000122, "learning_rate": 0.0, "loss": 1.3647, "step": 40320 }, { "epoch": 9.999379998759997, "step": 40320, "total_flos": 2.647899280244736e+18, "train_loss": 1.5969342465911593, "train_runtime": 104090.1136, "train_samples_per_second": 1.55, "train_steps_per_second": 0.387 } ], "logging_steps": 10, "max_steps": 40320, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 2.647899280244736e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }