{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.409074483035535, "eval_steps": 500, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008030515960650472, "grad_norm": 3.689312219619751, "learning_rate": 4.5e-06, "loss": 1.9309, "step": 10 }, { "epoch": 0.0016061031921300944, "grad_norm": 1.117447018623352, "learning_rate": 9.5e-06, "loss": 1.7339, "step": 20 }, { "epoch": 0.0024091547881951417, "grad_norm": 1.1246883869171143, "learning_rate": 1.45e-05, "loss": 1.5095, "step": 30 }, { "epoch": 0.0032122063842601888, "grad_norm": 0.8569725155830383, "learning_rate": 1.9500000000000003e-05, "loss": 1.229, "step": 40 }, { "epoch": 0.004015257980325236, "grad_norm": 0.5154491066932678, "learning_rate": 2.45e-05, "loss": 0.9986, "step": 50 }, { "epoch": 0.004818309576390283, "grad_norm": 0.44289204478263855, "learning_rate": 2.95e-05, "loss": 0.9143, "step": 60 }, { "epoch": 0.0056213611724553305, "grad_norm": 0.40129631757736206, "learning_rate": 3.45e-05, "loss": 0.8261, "step": 70 }, { "epoch": 0.0064244127685203775, "grad_norm": 0.3821078836917877, "learning_rate": 3.9500000000000005e-05, "loss": 0.7933, "step": 80 }, { "epoch": 0.007227464364585425, "grad_norm": 0.3820279836654663, "learning_rate": 4.4500000000000004e-05, "loss": 0.7519, "step": 90 }, { "epoch": 0.008030515960650473, "grad_norm": 0.3348274230957031, "learning_rate": 4.9500000000000004e-05, "loss": 0.6816, "step": 100 }, { "epoch": 0.00883356755671552, "grad_norm": 0.3343372642993927, "learning_rate": 5.45e-05, "loss": 0.6519, "step": 110 }, { "epoch": 0.009636619152780567, "grad_norm": 0.30070552229881287, "learning_rate": 5.95e-05, "loss": 0.6066, "step": 120 }, { "epoch": 0.010439670748845614, "grad_norm": 0.30886363983154297, "learning_rate": 6.450000000000001e-05, "loss": 0.5832, "step": 130 }, { "epoch": 0.011242722344910661, "grad_norm": 0.3156905472278595, "learning_rate": 6.95e-05, "loss": 0.5724, "step": 140 }, { "epoch": 0.012045773940975708, "grad_norm": 0.3148592710494995, "learning_rate": 7.450000000000001e-05, "loss": 0.5695, "step": 150 }, { "epoch": 0.012848825537040755, "grad_norm": 0.2809179723262787, "learning_rate": 7.950000000000001e-05, "loss": 0.5656, "step": 160 }, { "epoch": 0.013651877133105802, "grad_norm": 0.2976081073284149, "learning_rate": 8.450000000000001e-05, "loss": 0.5586, "step": 170 }, { "epoch": 0.01445492872917085, "grad_norm": 0.30838751792907715, "learning_rate": 8.950000000000001e-05, "loss": 0.5414, "step": 180 }, { "epoch": 0.015257980325235896, "grad_norm": 0.3408336043357849, "learning_rate": 9.449999999999999e-05, "loss": 0.5443, "step": 190 }, { "epoch": 0.016061031921300945, "grad_norm": 0.314775288105011, "learning_rate": 9.95e-05, "loss": 0.5104, "step": 200 }, { "epoch": 0.016864083517365992, "grad_norm": 0.2776866853237152, "learning_rate": 0.00010449999999999999, "loss": 0.4885, "step": 210 }, { "epoch": 0.01766713511343104, "grad_norm": 0.2949003279209137, "learning_rate": 0.0001095, "loss": 0.53, "step": 220 }, { "epoch": 0.018470186709496086, "grad_norm": 0.3228117525577545, "learning_rate": 0.0001145, "loss": 0.4951, "step": 230 }, { "epoch": 0.019273238305561134, "grad_norm": 0.2884865999221802, "learning_rate": 0.00011950000000000002, "loss": 0.4972, "step": 240 }, { "epoch": 0.02007628990162618, "grad_norm": 0.2961493730545044, "learning_rate": 0.00012450000000000002, "loss": 0.5213, "step": 250 }, { "epoch": 0.020879341497691228, "grad_norm": 0.2755134105682373, "learning_rate": 0.0001295, "loss": 0.4738, "step": 260 }, { "epoch": 0.021682393093756275, "grad_norm": 0.3073410391807556, "learning_rate": 0.00013450000000000002, "loss": 0.4995, "step": 270 }, { "epoch": 0.022485444689821322, "grad_norm": 0.3017405867576599, "learning_rate": 0.0001395, "loss": 0.5092, "step": 280 }, { "epoch": 0.02328849628588637, "grad_norm": 0.2888385057449341, "learning_rate": 0.00014450000000000002, "loss": 0.4606, "step": 290 }, { "epoch": 0.024091547881951416, "grad_norm": 0.2804155647754669, "learning_rate": 0.00014950000000000003, "loss": 0.5049, "step": 300 }, { "epoch": 0.024894599478016463, "grad_norm": 0.3354802429676056, "learning_rate": 0.0001545, "loss": 0.4937, "step": 310 }, { "epoch": 0.02569765107408151, "grad_norm": 0.2571159601211548, "learning_rate": 0.0001595, "loss": 0.5009, "step": 320 }, { "epoch": 0.026500702670146557, "grad_norm": 0.27897530794143677, "learning_rate": 0.00016450000000000001, "loss": 0.4573, "step": 330 }, { "epoch": 0.027303754266211604, "grad_norm": 0.2837533950805664, "learning_rate": 0.00016950000000000003, "loss": 0.4896, "step": 340 }, { "epoch": 0.02810680586227665, "grad_norm": 0.30231064558029175, "learning_rate": 0.0001745, "loss": 0.4829, "step": 350 }, { "epoch": 0.0289098574583417, "grad_norm": 0.32696929574012756, "learning_rate": 0.0001795, "loss": 0.4935, "step": 360 }, { "epoch": 0.029712909054406746, "grad_norm": 0.307742714881897, "learning_rate": 0.0001845, "loss": 0.4793, "step": 370 }, { "epoch": 0.030515960650471793, "grad_norm": 0.28841400146484375, "learning_rate": 0.0001895, "loss": 0.4661, "step": 380 }, { "epoch": 0.03131901224653684, "grad_norm": 0.3023490607738495, "learning_rate": 0.0001945, "loss": 0.4624, "step": 390 }, { "epoch": 0.03212206384260189, "grad_norm": 0.27655431628227234, "learning_rate": 0.00019950000000000002, "loss": 0.4539, "step": 400 }, { "epoch": 0.032925115438666934, "grad_norm": 0.3161908984184265, "learning_rate": 0.0001999999757848043, "loss": 0.4832, "step": 410 }, { "epoch": 0.033728167034731985, "grad_norm": 0.2971240282058716, "learning_rate": 0.00019999989207797073, "loss": 0.4826, "step": 420 }, { "epoch": 0.03453121863079703, "grad_norm": 0.2571394145488739, "learning_rate": 0.000199999748580599, "loss": 0.4802, "step": 430 }, { "epoch": 0.03533427022686208, "grad_norm": 0.2647230625152588, "learning_rate": 0.00019999954529277938, "loss": 0.4623, "step": 440 }, { "epoch": 0.03613732182292712, "grad_norm": 0.26125994324684143, "learning_rate": 0.00019999928221463978, "loss": 0.4813, "step": 450 }, { "epoch": 0.03694037341899217, "grad_norm": 0.2720921039581299, "learning_rate": 0.00019999895934634586, "loss": 0.4717, "step": 460 }, { "epoch": 0.037743425015057216, "grad_norm": 0.2548144459724426, "learning_rate": 0.00019999857668810076, "loss": 0.4544, "step": 470 }, { "epoch": 0.03854647661112227, "grad_norm": 0.33194082975387573, "learning_rate": 0.00019999813424014532, "loss": 0.4965, "step": 480 }, { "epoch": 0.03934952820718731, "grad_norm": 0.25988927483558655, "learning_rate": 0.00019999763200275806, "loss": 0.4638, "step": 490 }, { "epoch": 0.04015257980325236, "grad_norm": 0.26165077090263367, "learning_rate": 0.00019999706997625506, "loss": 0.4689, "step": 500 }, { "epoch": 0.040955631399317405, "grad_norm": 0.2644157111644745, "learning_rate": 0.00019999644816098998, "loss": 0.4522, "step": 510 }, { "epoch": 0.041758682995382455, "grad_norm": 0.28438523411750793, "learning_rate": 0.00019999576655735423, "loss": 0.4854, "step": 520 }, { "epoch": 0.0425617345914475, "grad_norm": 0.3079022765159607, "learning_rate": 0.00019999502516577678, "loss": 0.4633, "step": 530 }, { "epoch": 0.04336478618751255, "grad_norm": 0.2283269762992859, "learning_rate": 0.00019999422398672424, "loss": 0.4415, "step": 540 }, { "epoch": 0.04416783778357759, "grad_norm": 0.2663537263870239, "learning_rate": 0.00019999336302070083, "loss": 0.4434, "step": 550 }, { "epoch": 0.044970889379642644, "grad_norm": 0.26318255066871643, "learning_rate": 0.0001999924422682485, "loss": 0.4709, "step": 560 }, { "epoch": 0.04577394097570769, "grad_norm": 0.3214571475982666, "learning_rate": 0.00019999146172994668, "loss": 0.4864, "step": 570 }, { "epoch": 0.04657699257177274, "grad_norm": 0.2683916687965393, "learning_rate": 0.0001999904214064125, "loss": 0.4488, "step": 580 }, { "epoch": 0.04738004416783778, "grad_norm": 0.2606896460056305, "learning_rate": 0.00019998932129830074, "loss": 0.4576, "step": 590 }, { "epoch": 0.04818309576390283, "grad_norm": 0.2524845600128174, "learning_rate": 0.00019998816140630375, "loss": 0.4506, "step": 600 }, { "epoch": 0.048986147359967876, "grad_norm": 0.27330440282821655, "learning_rate": 0.00019998694173115156, "loss": 0.4766, "step": 610 }, { "epoch": 0.049789198956032926, "grad_norm": 0.2803438901901245, "learning_rate": 0.00019998566227361178, "loss": 0.4723, "step": 620 }, { "epoch": 0.05059225055209797, "grad_norm": 0.30843380093574524, "learning_rate": 0.00019998432303448973, "loss": 0.4412, "step": 630 }, { "epoch": 0.05139530214816302, "grad_norm": 0.26991620659828186, "learning_rate": 0.0001999829240146282, "loss": 0.4408, "step": 640 }, { "epoch": 0.052198353744228064, "grad_norm": 0.2932959496974945, "learning_rate": 0.00019998146521490778, "loss": 0.4617, "step": 650 }, { "epoch": 0.053001405340293115, "grad_norm": 0.2695216238498688, "learning_rate": 0.00019997994663624655, "loss": 0.4412, "step": 660 }, { "epoch": 0.05380445693635816, "grad_norm": 0.2944924533367157, "learning_rate": 0.00019997836827960027, "loss": 0.4431, "step": 670 }, { "epoch": 0.05460750853242321, "grad_norm": 0.24393890798091888, "learning_rate": 0.00019997673014596237, "loss": 0.4608, "step": 680 }, { "epoch": 0.05541056012848825, "grad_norm": 0.2712589502334595, "learning_rate": 0.00019997503223636375, "loss": 0.4521, "step": 690 }, { "epoch": 0.0562136117245533, "grad_norm": 0.28926366567611694, "learning_rate": 0.00019997327455187314, "loss": 0.481, "step": 700 }, { "epoch": 0.057016663320618347, "grad_norm": 0.2569870948791504, "learning_rate": 0.0001999714570935967, "loss": 0.455, "step": 710 }, { "epoch": 0.0578197149166834, "grad_norm": 0.29588204622268677, "learning_rate": 0.00019996957986267834, "loss": 0.4553, "step": 720 }, { "epoch": 0.05862276651274844, "grad_norm": 0.28197842836380005, "learning_rate": 0.00019996764286029953, "loss": 0.4521, "step": 730 }, { "epoch": 0.05942581810881349, "grad_norm": 0.24128948152065277, "learning_rate": 0.00019996564608767937, "loss": 0.4569, "step": 740 }, { "epoch": 0.06022886970487854, "grad_norm": 0.284736305475235, "learning_rate": 0.00019996358954607458, "loss": 0.4479, "step": 750 }, { "epoch": 0.061031921300943585, "grad_norm": 0.31520378589630127, "learning_rate": 0.0001999614732367795, "loss": 0.4442, "step": 760 }, { "epoch": 0.061834972897008636, "grad_norm": 0.2892698645591736, "learning_rate": 0.0001999592971611261, "loss": 0.4304, "step": 770 }, { "epoch": 0.06263802449307368, "grad_norm": 0.24639274179935455, "learning_rate": 0.00019995706132048386, "loss": 0.4727, "step": 780 }, { "epoch": 0.06344107608913872, "grad_norm": 0.3514740467071533, "learning_rate": 0.00019995476571626003, "loss": 0.4442, "step": 790 }, { "epoch": 0.06424412768520378, "grad_norm": 0.2524435222148895, "learning_rate": 0.00019995241034989945, "loss": 0.4475, "step": 800 }, { "epoch": 0.06504717928126882, "grad_norm": 0.2905042767524719, "learning_rate": 0.00019994999522288443, "loss": 0.4479, "step": 810 }, { "epoch": 0.06585023087733387, "grad_norm": 0.3012372851371765, "learning_rate": 0.00019994752033673506, "loss": 0.4646, "step": 820 }, { "epoch": 0.06665328247339891, "grad_norm": 0.2613834738731384, "learning_rate": 0.00019994498569300893, "loss": 0.4626, "step": 830 }, { "epoch": 0.06745633406946397, "grad_norm": 0.25500670075416565, "learning_rate": 0.00019994239129330133, "loss": 0.4546, "step": 840 }, { "epoch": 0.06825938566552901, "grad_norm": 0.3097769618034363, "learning_rate": 0.00019993973713924504, "loss": 0.4434, "step": 850 }, { "epoch": 0.06906243726159406, "grad_norm": 0.33414244651794434, "learning_rate": 0.00019993702323251055, "loss": 0.4502, "step": 860 }, { "epoch": 0.0698654888576591, "grad_norm": 0.2662931978702545, "learning_rate": 0.00019993424957480597, "loss": 0.4364, "step": 870 }, { "epoch": 0.07066854045372416, "grad_norm": 0.2978106439113617, "learning_rate": 0.00019993141616787691, "loss": 0.475, "step": 880 }, { "epoch": 0.0714715920497892, "grad_norm": 0.24551615118980408, "learning_rate": 0.00019992852301350668, "loss": 0.4286, "step": 890 }, { "epoch": 0.07227464364585424, "grad_norm": 0.27809980511665344, "learning_rate": 0.00019992557011351612, "loss": 0.4395, "step": 900 }, { "epoch": 0.07307769524191929, "grad_norm": 0.24689947068691254, "learning_rate": 0.00019992255746976377, "loss": 0.43, "step": 910 }, { "epoch": 0.07388074683798435, "grad_norm": 0.2925223410129547, "learning_rate": 0.00019991948508414563, "loss": 0.4562, "step": 920 }, { "epoch": 0.07468379843404939, "grad_norm": 0.24244606494903564, "learning_rate": 0.00019991635295859547, "loss": 0.4521, "step": 930 }, { "epoch": 0.07548685003011443, "grad_norm": 0.27935731410980225, "learning_rate": 0.00019991316109508453, "loss": 0.44, "step": 940 }, { "epoch": 0.07628990162617948, "grad_norm": 0.3181900978088379, "learning_rate": 0.00019990990949562168, "loss": 0.4249, "step": 950 }, { "epoch": 0.07709295322224453, "grad_norm": 0.2541589140892029, "learning_rate": 0.00019990659816225342, "loss": 0.4098, "step": 960 }, { "epoch": 0.07789600481830958, "grad_norm": 0.31134289503097534, "learning_rate": 0.00019990322709706378, "loss": 0.4795, "step": 970 }, { "epoch": 0.07869905641437462, "grad_norm": 0.30310437083244324, "learning_rate": 0.00019989979630217445, "loss": 0.4547, "step": 980 }, { "epoch": 0.07950210801043966, "grad_norm": 0.25349077582359314, "learning_rate": 0.00019989630577974468, "loss": 0.4263, "step": 990 }, { "epoch": 0.08030515960650472, "grad_norm": 0.2620362937450409, "learning_rate": 0.00019989275553197129, "loss": 0.4434, "step": 1000 }, { "epoch": 0.08110821120256977, "grad_norm": 0.2222265750169754, "learning_rate": 0.0001998891455610888, "loss": 0.4416, "step": 1010 }, { "epoch": 0.08191126279863481, "grad_norm": 0.2608863115310669, "learning_rate": 0.00019988547586936915, "loss": 0.4468, "step": 1020 }, { "epoch": 0.08271431439469985, "grad_norm": 0.3156661093235016, "learning_rate": 0.00019988174645912198, "loss": 0.4685, "step": 1030 }, { "epoch": 0.08351736599076491, "grad_norm": 0.28053924441337585, "learning_rate": 0.00019987795733269453, "loss": 0.4514, "step": 1040 }, { "epoch": 0.08432041758682995, "grad_norm": 0.24653348326683044, "learning_rate": 0.0001998741084924715, "loss": 0.4521, "step": 1050 }, { "epoch": 0.085123469182895, "grad_norm": 0.2744863033294678, "learning_rate": 0.00019987019994087527, "loss": 0.4461, "step": 1060 }, { "epoch": 0.08592652077896004, "grad_norm": 0.2698332965373993, "learning_rate": 0.00019986623168036585, "loss": 0.4585, "step": 1070 }, { "epoch": 0.0867295723750251, "grad_norm": 0.28703564405441284, "learning_rate": 0.00019986220371344073, "loss": 0.4614, "step": 1080 }, { "epoch": 0.08753262397109014, "grad_norm": 0.2535983622074127, "learning_rate": 0.00019985811604263497, "loss": 0.4408, "step": 1090 }, { "epoch": 0.08833567556715519, "grad_norm": 0.2771255373954773, "learning_rate": 0.0001998539686705213, "loss": 0.4543, "step": 1100 }, { "epoch": 0.08913872716322024, "grad_norm": 0.3209109306335449, "learning_rate": 0.0001998497615997099, "loss": 0.4191, "step": 1110 }, { "epoch": 0.08994177875928529, "grad_norm": 0.31623247265815735, "learning_rate": 0.0001998454948328487, "loss": 0.4564, "step": 1120 }, { "epoch": 0.09074483035535033, "grad_norm": 0.219231516122818, "learning_rate": 0.000199841168372623, "loss": 0.4287, "step": 1130 }, { "epoch": 0.09154788195141537, "grad_norm": 0.23512132465839386, "learning_rate": 0.00019983678222175585, "loss": 0.417, "step": 1140 }, { "epoch": 0.09235093354748043, "grad_norm": 0.22394251823425293, "learning_rate": 0.0001998323363830077, "loss": 0.4243, "step": 1150 }, { "epoch": 0.09315398514354548, "grad_norm": 0.22006626427173615, "learning_rate": 0.0001998278308591767, "loss": 0.4516, "step": 1160 }, { "epoch": 0.09395703673961052, "grad_norm": 0.2771822214126587, "learning_rate": 0.0001998232656530985, "loss": 0.4277, "step": 1170 }, { "epoch": 0.09476008833567556, "grad_norm": 0.31321150064468384, "learning_rate": 0.00019981864076764632, "loss": 0.4158, "step": 1180 }, { "epoch": 0.09556313993174062, "grad_norm": 0.271979957818985, "learning_rate": 0.00019981395620573096, "loss": 0.4466, "step": 1190 }, { "epoch": 0.09636619152780566, "grad_norm": 0.25185930728912354, "learning_rate": 0.00019980921197030072, "loss": 0.4761, "step": 1200 }, { "epoch": 0.09716924312387071, "grad_norm": 0.3032665550708771, "learning_rate": 0.00019980440806434158, "loss": 0.4397, "step": 1210 }, { "epoch": 0.09797229471993575, "grad_norm": 0.3359529376029968, "learning_rate": 0.00019979954449087694, "loss": 0.4317, "step": 1220 }, { "epoch": 0.09877534631600081, "grad_norm": 0.2716955840587616, "learning_rate": 0.00019979462125296784, "loss": 0.4452, "step": 1230 }, { "epoch": 0.09957839791206585, "grad_norm": 0.3040587604045868, "learning_rate": 0.0001997896383537128, "loss": 0.4245, "step": 1240 }, { "epoch": 0.1003814495081309, "grad_norm": 0.2638346254825592, "learning_rate": 0.00019978459579624797, "loss": 0.4564, "step": 1250 }, { "epoch": 0.10118450110419594, "grad_norm": 0.3166419267654419, "learning_rate": 0.000199779493583747, "loss": 0.4236, "step": 1260 }, { "epoch": 0.101987552700261, "grad_norm": 0.2993052005767822, "learning_rate": 0.0001997743317194211, "loss": 0.4141, "step": 1270 }, { "epoch": 0.10279060429632604, "grad_norm": 0.24265843629837036, "learning_rate": 0.00019976911020651902, "loss": 0.441, "step": 1280 }, { "epoch": 0.10359365589239108, "grad_norm": 0.22540034353733063, "learning_rate": 0.00019976382904832702, "loss": 0.44, "step": 1290 }, { "epoch": 0.10439670748845613, "grad_norm": 0.2744080722332001, "learning_rate": 0.00019975848824816897, "loss": 0.4174, "step": 1300 }, { "epoch": 0.10519975908452119, "grad_norm": 0.24610072374343872, "learning_rate": 0.00019975308780940617, "loss": 0.4169, "step": 1310 }, { "epoch": 0.10600281068058623, "grad_norm": 0.2594468295574188, "learning_rate": 0.0001997476277354376, "loss": 0.414, "step": 1320 }, { "epoch": 0.10680586227665127, "grad_norm": 0.2405395656824112, "learning_rate": 0.00019974210802969965, "loss": 0.4199, "step": 1330 }, { "epoch": 0.10760891387271632, "grad_norm": 0.2513597011566162, "learning_rate": 0.00019973652869566624, "loss": 0.4313, "step": 1340 }, { "epoch": 0.10841196546878137, "grad_norm": 0.22517551481723785, "learning_rate": 0.00019973088973684894, "loss": 0.4418, "step": 1350 }, { "epoch": 0.10921501706484642, "grad_norm": 0.2635776698589325, "learning_rate": 0.00019972519115679676, "loss": 0.4237, "step": 1360 }, { "epoch": 0.11001806866091146, "grad_norm": 0.27164286375045776, "learning_rate": 0.00019971943295909618, "loss": 0.4464, "step": 1370 }, { "epoch": 0.1108211202569765, "grad_norm": 0.2682201564311981, "learning_rate": 0.00019971361514737135, "loss": 0.4383, "step": 1380 }, { "epoch": 0.11162417185304156, "grad_norm": 0.22847069799900055, "learning_rate": 0.0001997077377252838, "loss": 0.418, "step": 1390 }, { "epoch": 0.1124272234491066, "grad_norm": 0.28664693236351013, "learning_rate": 0.00019970180069653261, "loss": 0.4256, "step": 1400 }, { "epoch": 0.11323027504517165, "grad_norm": 0.24851150810718536, "learning_rate": 0.00019969580406485444, "loss": 0.4412, "step": 1410 }, { "epoch": 0.11403332664123669, "grad_norm": 0.2393653243780136, "learning_rate": 0.00019968974783402343, "loss": 0.4365, "step": 1420 }, { "epoch": 0.11483637823730175, "grad_norm": 0.3066830635070801, "learning_rate": 0.0001996836320078512, "loss": 0.4428, "step": 1430 }, { "epoch": 0.1156394298333668, "grad_norm": 0.33011773228645325, "learning_rate": 0.00019967745659018685, "loss": 0.4359, "step": 1440 }, { "epoch": 0.11644248142943184, "grad_norm": 0.300329327583313, "learning_rate": 0.0001996712215849171, "loss": 0.4367, "step": 1450 }, { "epoch": 0.11724553302549688, "grad_norm": 0.21983005106449127, "learning_rate": 0.00019966492699596608, "loss": 0.4054, "step": 1460 }, { "epoch": 0.11804858462156194, "grad_norm": 0.29565781354904175, "learning_rate": 0.00019965857282729546, "loss": 0.4282, "step": 1470 }, { "epoch": 0.11885163621762698, "grad_norm": 0.3047868311405182, "learning_rate": 0.00019965215908290435, "loss": 0.4319, "step": 1480 }, { "epoch": 0.11965468781369203, "grad_norm": 0.2538636326789856, "learning_rate": 0.00019964568576682942, "loss": 0.4247, "step": 1490 }, { "epoch": 0.12045773940975708, "grad_norm": 0.2215738296508789, "learning_rate": 0.00019963915288314486, "loss": 0.4439, "step": 1500 }, { "epoch": 0.12126079100582213, "grad_norm": 0.2516620457172394, "learning_rate": 0.00019963256043596225, "loss": 0.4549, "step": 1510 }, { "epoch": 0.12206384260188717, "grad_norm": 0.271609365940094, "learning_rate": 0.0001996259084294307, "loss": 0.4212, "step": 1520 }, { "epoch": 0.12286689419795221, "grad_norm": 0.2880658209323883, "learning_rate": 0.00019961919686773685, "loss": 0.4212, "step": 1530 }, { "epoch": 0.12366994579401727, "grad_norm": 0.21652716398239136, "learning_rate": 0.00019961242575510478, "loss": 0.441, "step": 1540 }, { "epoch": 0.12447299739008232, "grad_norm": 0.2243596315383911, "learning_rate": 0.00019960559509579607, "loss": 0.43, "step": 1550 }, { "epoch": 0.12527604898614736, "grad_norm": 0.2532637119293213, "learning_rate": 0.0001995987048941097, "loss": 0.4625, "step": 1560 }, { "epoch": 0.12607910058221242, "grad_norm": 0.2339617758989334, "learning_rate": 0.0001995917551543823, "loss": 0.406, "step": 1570 }, { "epoch": 0.12688215217827745, "grad_norm": 0.23928692936897278, "learning_rate": 0.00019958474588098777, "loss": 0.4405, "step": 1580 }, { "epoch": 0.1276852037743425, "grad_norm": 0.3313048779964447, "learning_rate": 0.0001995776770783376, "loss": 0.4234, "step": 1590 }, { "epoch": 0.12848825537040756, "grad_norm": 0.23700657486915588, "learning_rate": 0.0001995705487508807, "loss": 0.4506, "step": 1600 }, { "epoch": 0.1292913069664726, "grad_norm": 0.3503771126270294, "learning_rate": 0.00019956336090310348, "loss": 0.4189, "step": 1610 }, { "epoch": 0.13009435856253765, "grad_norm": 0.26418164372444153, "learning_rate": 0.0001995561135395298, "loss": 0.4189, "step": 1620 }, { "epoch": 0.13089741015860268, "grad_norm": 0.24539045989513397, "learning_rate": 0.00019954880666472096, "loss": 0.4229, "step": 1630 }, { "epoch": 0.13170046175466774, "grad_norm": 0.2213798314332962, "learning_rate": 0.0001995414402832757, "loss": 0.4369, "step": 1640 }, { "epoch": 0.1325035133507328, "grad_norm": 0.2704111337661743, "learning_rate": 0.00019953401439983025, "loss": 0.4191, "step": 1650 }, { "epoch": 0.13330656494679782, "grad_norm": 0.2564098536968231, "learning_rate": 0.00019952652901905827, "loss": 0.4575, "step": 1660 }, { "epoch": 0.13410961654286288, "grad_norm": 0.3084269165992737, "learning_rate": 0.00019951898414567086, "loss": 0.4197, "step": 1670 }, { "epoch": 0.13491266813892794, "grad_norm": 0.24151474237442017, "learning_rate": 0.00019951137978441665, "loss": 0.4222, "step": 1680 }, { "epoch": 0.13571571973499297, "grad_norm": 0.26492932438850403, "learning_rate": 0.00019950371594008152, "loss": 0.407, "step": 1690 }, { "epoch": 0.13651877133105803, "grad_norm": 0.24108289182186127, "learning_rate": 0.00019949599261748896, "loss": 0.4452, "step": 1700 }, { "epoch": 0.13732182292712306, "grad_norm": 0.24525177478790283, "learning_rate": 0.00019948820982149985, "loss": 0.403, "step": 1710 }, { "epoch": 0.1381248745231881, "grad_norm": 0.22905588150024414, "learning_rate": 0.00019948036755701242, "loss": 0.4121, "step": 1720 }, { "epoch": 0.13892792611925317, "grad_norm": 0.2403474599123001, "learning_rate": 0.00019947246582896247, "loss": 0.4074, "step": 1730 }, { "epoch": 0.1397309777153182, "grad_norm": 0.26817527413368225, "learning_rate": 0.0001994645046423231, "loss": 0.4083, "step": 1740 }, { "epoch": 0.14053402931138326, "grad_norm": 0.23811475932598114, "learning_rate": 0.00019945648400210487, "loss": 0.4425, "step": 1750 }, { "epoch": 0.14133708090744831, "grad_norm": 0.3011744022369385, "learning_rate": 0.00019944840391335585, "loss": 0.4323, "step": 1760 }, { "epoch": 0.14214013250351334, "grad_norm": 0.2165452092885971, "learning_rate": 0.00019944026438116133, "loss": 0.4072, "step": 1770 }, { "epoch": 0.1429431840995784, "grad_norm": 0.2843163013458252, "learning_rate": 0.00019943206541064422, "loss": 0.4188, "step": 1780 }, { "epoch": 0.14374623569564343, "grad_norm": 0.2741716802120209, "learning_rate": 0.0001994238070069647, "loss": 0.4203, "step": 1790 }, { "epoch": 0.1445492872917085, "grad_norm": 0.23163117468357086, "learning_rate": 0.00019941548917532044, "loss": 0.4422, "step": 1800 }, { "epoch": 0.14535233888777355, "grad_norm": 0.3721010386943817, "learning_rate": 0.0001994071119209464, "loss": 0.4519, "step": 1810 }, { "epoch": 0.14615539048383858, "grad_norm": 0.24776411056518555, "learning_rate": 0.00019939867524911508, "loss": 0.4421, "step": 1820 }, { "epoch": 0.14695844207990363, "grad_norm": 0.2954345643520355, "learning_rate": 0.00019939017916513626, "loss": 0.4301, "step": 1830 }, { "epoch": 0.1477614936759687, "grad_norm": 0.2574305832386017, "learning_rate": 0.0001993816236743572, "loss": 0.4162, "step": 1840 }, { "epoch": 0.14856454527203372, "grad_norm": 0.3324902355670929, "learning_rate": 0.00019937300878216253, "loss": 0.4052, "step": 1850 }, { "epoch": 0.14936759686809878, "grad_norm": 0.32313814759254456, "learning_rate": 0.00019936433449397418, "loss": 0.4299, "step": 1860 }, { "epoch": 0.15017064846416384, "grad_norm": 0.27012696862220764, "learning_rate": 0.00019935560081525157, "loss": 0.4252, "step": 1870 }, { "epoch": 0.15097370006022887, "grad_norm": 0.3864246606826782, "learning_rate": 0.00019934680775149144, "loss": 0.4538, "step": 1880 }, { "epoch": 0.15177675165629392, "grad_norm": 0.20937024056911469, "learning_rate": 0.00019933795530822793, "loss": 0.4257, "step": 1890 }, { "epoch": 0.15257980325235895, "grad_norm": 0.2749878466129303, "learning_rate": 0.00019932904349103257, "loss": 0.4201, "step": 1900 }, { "epoch": 0.153382854848424, "grad_norm": 0.25813013315200806, "learning_rate": 0.0001993200723055142, "loss": 0.4247, "step": 1910 }, { "epoch": 0.15418590644448907, "grad_norm": 0.28514084219932556, "learning_rate": 0.00019931104175731904, "loss": 0.4358, "step": 1920 }, { "epoch": 0.1549889580405541, "grad_norm": 0.2535870373249054, "learning_rate": 0.00019930195185213076, "loss": 0.4185, "step": 1930 }, { "epoch": 0.15579200963661916, "grad_norm": 0.2558729648590088, "learning_rate": 0.00019929280259567026, "loss": 0.4404, "step": 1940 }, { "epoch": 0.1565950612326842, "grad_norm": 0.19585713744163513, "learning_rate": 0.00019928359399369586, "loss": 0.4293, "step": 1950 }, { "epoch": 0.15739811282874924, "grad_norm": 0.25848135352134705, "learning_rate": 0.00019927432605200326, "loss": 0.4297, "step": 1960 }, { "epoch": 0.1582011644248143, "grad_norm": 0.20310550928115845, "learning_rate": 0.00019926499877642538, "loss": 0.4135, "step": 1970 }, { "epoch": 0.15900421602087933, "grad_norm": 0.26933181285858154, "learning_rate": 0.00019925561217283265, "loss": 0.4306, "step": 1980 }, { "epoch": 0.1598072676169444, "grad_norm": 0.3343607485294342, "learning_rate": 0.00019924616624713274, "loss": 0.4208, "step": 1990 }, { "epoch": 0.16061031921300944, "grad_norm": 0.27786028385162354, "learning_rate": 0.0001992366610052707, "loss": 0.4451, "step": 2000 }, { "epoch": 0.16141337080907447, "grad_norm": 0.23709872364997864, "learning_rate": 0.00019922709645322882, "loss": 0.4332, "step": 2010 }, { "epoch": 0.16221642240513953, "grad_norm": 0.24079999327659607, "learning_rate": 0.0001992174725970269, "loss": 0.431, "step": 2020 }, { "epoch": 0.1630194740012046, "grad_norm": 0.2877088189125061, "learning_rate": 0.00019920778944272185, "loss": 0.4077, "step": 2030 }, { "epoch": 0.16382252559726962, "grad_norm": 0.2795614004135132, "learning_rate": 0.00019919804699640805, "loss": 0.4371, "step": 2040 }, { "epoch": 0.16462557719333468, "grad_norm": 0.2564111053943634, "learning_rate": 0.00019918824526421713, "loss": 0.3898, "step": 2050 }, { "epoch": 0.1654286287893997, "grad_norm": 0.2556214928627014, "learning_rate": 0.0001991783842523181, "loss": 0.4146, "step": 2060 }, { "epoch": 0.16623168038546476, "grad_norm": 0.2505608797073364, "learning_rate": 0.00019916846396691716, "loss": 0.417, "step": 2070 }, { "epoch": 0.16703473198152982, "grad_norm": 0.3117467164993286, "learning_rate": 0.00019915848441425795, "loss": 0.4253, "step": 2080 }, { "epoch": 0.16783778357759485, "grad_norm": 0.23363076150417328, "learning_rate": 0.00019914844560062133, "loss": 0.4186, "step": 2090 }, { "epoch": 0.1686408351736599, "grad_norm": 0.22520136833190918, "learning_rate": 0.00019913834753232544, "loss": 0.4402, "step": 2100 }, { "epoch": 0.16944388676972497, "grad_norm": 0.20596590638160706, "learning_rate": 0.00019912819021572582, "loss": 0.4312, "step": 2110 }, { "epoch": 0.17024693836579, "grad_norm": 0.25108909606933594, "learning_rate": 0.0001991179736572152, "loss": 0.4022, "step": 2120 }, { "epoch": 0.17104998996185505, "grad_norm": 0.26635900139808655, "learning_rate": 0.00019910769786322355, "loss": 0.4362, "step": 2130 }, { "epoch": 0.17185304155792008, "grad_norm": 0.2590387761592865, "learning_rate": 0.0001990973628402183, "loss": 0.4141, "step": 2140 }, { "epoch": 0.17265609315398514, "grad_norm": 0.2715902328491211, "learning_rate": 0.00019908696859470402, "loss": 0.4018, "step": 2150 }, { "epoch": 0.1734591447500502, "grad_norm": 0.2484033852815628, "learning_rate": 0.00019907651513322255, "loss": 0.4228, "step": 2160 }, { "epoch": 0.17426219634611523, "grad_norm": 0.4391380250453949, "learning_rate": 0.0001990660024623531, "loss": 0.4141, "step": 2170 }, { "epoch": 0.17506524794218029, "grad_norm": 0.2523720860481262, "learning_rate": 0.00019905543058871202, "loss": 0.4071, "step": 2180 }, { "epoch": 0.17586829953824534, "grad_norm": 0.29365092515945435, "learning_rate": 0.000199044799518953, "loss": 0.4024, "step": 2190 }, { "epoch": 0.17667135113431037, "grad_norm": 0.3026156425476074, "learning_rate": 0.000199034109259767, "loss": 0.4443, "step": 2200 }, { "epoch": 0.17747440273037543, "grad_norm": 0.28730762004852295, "learning_rate": 0.00019902335981788214, "loss": 0.428, "step": 2210 }, { "epoch": 0.1782774543264405, "grad_norm": 0.22475329041481018, "learning_rate": 0.00019901255120006393, "loss": 0.4373, "step": 2220 }, { "epoch": 0.17908050592250552, "grad_norm": 0.2571013569831848, "learning_rate": 0.00019900168341311495, "loss": 0.4357, "step": 2230 }, { "epoch": 0.17988355751857057, "grad_norm": 0.24495641887187958, "learning_rate": 0.00019899075646387515, "loss": 0.3982, "step": 2240 }, { "epoch": 0.1806866091146356, "grad_norm": 0.24661725759506226, "learning_rate": 0.00019897977035922173, "loss": 0.4155, "step": 2250 }, { "epoch": 0.18148966071070066, "grad_norm": 0.2195568084716797, "learning_rate": 0.00019896872510606896, "loss": 0.3838, "step": 2260 }, { "epoch": 0.18229271230676572, "grad_norm": 0.273108571767807, "learning_rate": 0.00019895762071136852, "loss": 0.374, "step": 2270 }, { "epoch": 0.18309576390283075, "grad_norm": 0.2803899049758911, "learning_rate": 0.00019894645718210923, "loss": 0.4423, "step": 2280 }, { "epoch": 0.1838988154988958, "grad_norm": 0.23768208920955658, "learning_rate": 0.00019893523452531713, "loss": 0.4358, "step": 2290 }, { "epoch": 0.18470186709496086, "grad_norm": 0.2413472682237625, "learning_rate": 0.00019892395274805544, "loss": 0.4185, "step": 2300 }, { "epoch": 0.1855049186910259, "grad_norm": 0.22273987531661987, "learning_rate": 0.00019891261185742465, "loss": 0.465, "step": 2310 }, { "epoch": 0.18630797028709095, "grad_norm": 0.27847281098365784, "learning_rate": 0.00019890121186056249, "loss": 0.4154, "step": 2320 }, { "epoch": 0.18711102188315598, "grad_norm": 0.2656284272670746, "learning_rate": 0.00019888975276464375, "loss": 0.4215, "step": 2330 }, { "epoch": 0.18791407347922104, "grad_norm": 0.278242826461792, "learning_rate": 0.00019887823457688048, "loss": 0.4166, "step": 2340 }, { "epoch": 0.1887171250752861, "grad_norm": 0.25722575187683105, "learning_rate": 0.00019886665730452203, "loss": 0.4178, "step": 2350 }, { "epoch": 0.18952017667135113, "grad_norm": 0.21575112640857697, "learning_rate": 0.0001988550209548548, "loss": 0.425, "step": 2360 }, { "epoch": 0.19032322826741618, "grad_norm": 0.22913213074207306, "learning_rate": 0.00019884332553520237, "loss": 0.4086, "step": 2370 }, { "epoch": 0.19112627986348124, "grad_norm": 0.2860106825828552, "learning_rate": 0.0001988315710529256, "loss": 0.3984, "step": 2380 }, { "epoch": 0.19192933145954627, "grad_norm": 0.2391769140958786, "learning_rate": 0.00019881975751542247, "loss": 0.4185, "step": 2390 }, { "epoch": 0.19273238305561133, "grad_norm": 0.2550266683101654, "learning_rate": 0.00019880788493012807, "loss": 0.4129, "step": 2400 }, { "epoch": 0.19353543465167636, "grad_norm": 0.2800440192222595, "learning_rate": 0.00019879595330451474, "loss": 0.4193, "step": 2410 }, { "epoch": 0.19433848624774142, "grad_norm": 0.30267617106437683, "learning_rate": 0.00019878396264609195, "loss": 0.4459, "step": 2420 }, { "epoch": 0.19514153784380647, "grad_norm": 0.2634919285774231, "learning_rate": 0.0001987719129624063, "loss": 0.4119, "step": 2430 }, { "epoch": 0.1959445894398715, "grad_norm": 0.2540627419948578, "learning_rate": 0.00019875980426104157, "loss": 0.3926, "step": 2440 }, { "epoch": 0.19674764103593656, "grad_norm": 0.2531359791755676, "learning_rate": 0.00019874763654961867, "loss": 0.4138, "step": 2450 }, { "epoch": 0.19755069263200162, "grad_norm": 0.24213442206382751, "learning_rate": 0.00019873540983579563, "loss": 0.4294, "step": 2460 }, { "epoch": 0.19835374422806665, "grad_norm": 0.2526065409183502, "learning_rate": 0.00019872312412726767, "loss": 0.4327, "step": 2470 }, { "epoch": 0.1991567958241317, "grad_norm": 0.2455539107322693, "learning_rate": 0.00019871077943176707, "loss": 0.3981, "step": 2480 }, { "epoch": 0.19995984742019673, "grad_norm": 0.2785981297492981, "learning_rate": 0.00019869837575706332, "loss": 0.428, "step": 2490 }, { "epoch": 0.2007628990162618, "grad_norm": 0.2170405238866806, "learning_rate": 0.0001986859131109629, "loss": 0.4348, "step": 2500 }, { "epoch": 0.20156595061232685, "grad_norm": 0.2378895878791809, "learning_rate": 0.0001986733915013096, "loss": 0.4147, "step": 2510 }, { "epoch": 0.20236900220839188, "grad_norm": 0.23596249520778656, "learning_rate": 0.0001986608109359841, "loss": 0.4066, "step": 2520 }, { "epoch": 0.20317205380445694, "grad_norm": 0.25490647554397583, "learning_rate": 0.00019864817142290435, "loss": 0.4212, "step": 2530 }, { "epoch": 0.203975105400522, "grad_norm": 0.28249311447143555, "learning_rate": 0.00019863547297002533, "loss": 0.458, "step": 2540 }, { "epoch": 0.20477815699658702, "grad_norm": 0.24014419317245483, "learning_rate": 0.00019862271558533908, "loss": 0.4134, "step": 2550 }, { "epoch": 0.20558120859265208, "grad_norm": 0.2967718541622162, "learning_rate": 0.00019860989927687484, "loss": 0.404, "step": 2560 }, { "epoch": 0.2063842601887171, "grad_norm": 0.28600576519966125, "learning_rate": 0.00019859702405269883, "loss": 0.4143, "step": 2570 }, { "epoch": 0.20718731178478217, "grad_norm": 0.30197107791900635, "learning_rate": 0.0001985840899209144, "loss": 0.409, "step": 2580 }, { "epoch": 0.20799036338084723, "grad_norm": 0.2531823515892029, "learning_rate": 0.00019857109688966202, "loss": 0.4042, "step": 2590 }, { "epoch": 0.20879341497691226, "grad_norm": 0.24990105628967285, "learning_rate": 0.0001985580449671191, "loss": 0.4314, "step": 2600 }, { "epoch": 0.2095964665729773, "grad_norm": 0.25662386417388916, "learning_rate": 0.00019854493416150023, "loss": 0.4242, "step": 2610 }, { "epoch": 0.21039951816904237, "grad_norm": 0.2728502154350281, "learning_rate": 0.00019853176448105702, "loss": 0.4415, "step": 2620 }, { "epoch": 0.2112025697651074, "grad_norm": 0.2411411702632904, "learning_rate": 0.0001985185359340781, "loss": 0.4154, "step": 2630 }, { "epoch": 0.21200562136117246, "grad_norm": 0.20828589797019958, "learning_rate": 0.00019850524852888925, "loss": 0.4248, "step": 2640 }, { "epoch": 0.21280867295723752, "grad_norm": 0.3309815526008606, "learning_rate": 0.00019849190227385317, "loss": 0.4325, "step": 2650 }, { "epoch": 0.21361172455330255, "grad_norm": 0.2496381253004074, "learning_rate": 0.00019847849717736967, "loss": 0.4281, "step": 2660 }, { "epoch": 0.2144147761493676, "grad_norm": 0.2432192713022232, "learning_rate": 0.0001984650332478756, "loss": 0.4415, "step": 2670 }, { "epoch": 0.21521782774543263, "grad_norm": 0.25351569056510925, "learning_rate": 0.0001984515104938448, "loss": 0.4092, "step": 2680 }, { "epoch": 0.2160208793414977, "grad_norm": 0.2705692648887634, "learning_rate": 0.00019843792892378816, "loss": 0.3899, "step": 2690 }, { "epoch": 0.21682393093756275, "grad_norm": 0.22155418992042542, "learning_rate": 0.0001984242885462536, "loss": 0.4227, "step": 2700 }, { "epoch": 0.21762698253362778, "grad_norm": 0.27241379022598267, "learning_rate": 0.00019841058936982594, "loss": 0.392, "step": 2710 }, { "epoch": 0.21843003412969283, "grad_norm": 0.2642154395580292, "learning_rate": 0.0001983968314031272, "loss": 0.4124, "step": 2720 }, { "epoch": 0.2192330857257579, "grad_norm": 0.25377368927001953, "learning_rate": 0.00019838301465481626, "loss": 0.4366, "step": 2730 }, { "epoch": 0.22003613732182292, "grad_norm": 0.2886144816875458, "learning_rate": 0.000198369139133589, "loss": 0.4137, "step": 2740 }, { "epoch": 0.22083918891788798, "grad_norm": 0.3098689913749695, "learning_rate": 0.0001983552048481784, "loss": 0.4003, "step": 2750 }, { "epoch": 0.221642240513953, "grad_norm": 0.20381991565227509, "learning_rate": 0.00019834121180735428, "loss": 0.4245, "step": 2760 }, { "epoch": 0.22244529211001807, "grad_norm": 0.22995050251483917, "learning_rate": 0.00019832716001992351, "loss": 0.4149, "step": 2770 }, { "epoch": 0.22324834370608312, "grad_norm": 0.2952299416065216, "learning_rate": 0.00019831304949472997, "loss": 0.4412, "step": 2780 }, { "epoch": 0.22405139530214815, "grad_norm": 0.24338269233703613, "learning_rate": 0.00019829888024065447, "loss": 0.3928, "step": 2790 }, { "epoch": 0.2248544468982132, "grad_norm": 0.24572008848190308, "learning_rate": 0.00019828465226661473, "loss": 0.4086, "step": 2800 }, { "epoch": 0.22565749849427827, "grad_norm": 0.3685961365699768, "learning_rate": 0.0001982703655815655, "loss": 0.4244, "step": 2810 }, { "epoch": 0.2264605500903433, "grad_norm": 0.2650720477104187, "learning_rate": 0.0001982560201944985, "loss": 0.4212, "step": 2820 }, { "epoch": 0.22726360168640836, "grad_norm": 0.21160833537578583, "learning_rate": 0.0001982416161144423, "loss": 0.3985, "step": 2830 }, { "epoch": 0.22806665328247339, "grad_norm": 0.3136347830295563, "learning_rate": 0.0001982271533504625, "loss": 0.3726, "step": 2840 }, { "epoch": 0.22886970487853844, "grad_norm": 0.223492830991745, "learning_rate": 0.00019821263191166158, "loss": 0.4495, "step": 2850 }, { "epoch": 0.2296727564746035, "grad_norm": 0.21538667380809784, "learning_rate": 0.00019819805180717895, "loss": 0.4039, "step": 2860 }, { "epoch": 0.23047580807066853, "grad_norm": 0.25754421949386597, "learning_rate": 0.00019818341304619105, "loss": 0.4056, "step": 2870 }, { "epoch": 0.2312788596667336, "grad_norm": 0.20448066294193268, "learning_rate": 0.00019816871563791105, "loss": 0.3997, "step": 2880 }, { "epoch": 0.23208191126279865, "grad_norm": 0.21546851098537445, "learning_rate": 0.00019815395959158914, "loss": 0.417, "step": 2890 }, { "epoch": 0.23288496285886368, "grad_norm": 0.2547251582145691, "learning_rate": 0.00019813914491651247, "loss": 0.4283, "step": 2900 }, { "epoch": 0.23368801445492873, "grad_norm": 0.2827968895435333, "learning_rate": 0.00019812427162200493, "loss": 0.4275, "step": 2910 }, { "epoch": 0.23449106605099376, "grad_norm": 0.2757623493671417, "learning_rate": 0.0001981093397174275, "loss": 0.4145, "step": 2920 }, { "epoch": 0.23529411764705882, "grad_norm": 0.27146077156066895, "learning_rate": 0.00019809434921217784, "loss": 0.4161, "step": 2930 }, { "epoch": 0.23609716924312388, "grad_norm": 0.2602901756763458, "learning_rate": 0.0001980793001156907, "loss": 0.43, "step": 2940 }, { "epoch": 0.2369002208391889, "grad_norm": 0.2029075175523758, "learning_rate": 0.00019806419243743748, "loss": 0.4182, "step": 2950 }, { "epoch": 0.23770327243525396, "grad_norm": 0.22625015676021576, "learning_rate": 0.00019804902618692668, "loss": 0.4435, "step": 2960 }, { "epoch": 0.23850632403131902, "grad_norm": 0.27283939719200134, "learning_rate": 0.00019803380137370353, "loss": 0.4285, "step": 2970 }, { "epoch": 0.23930937562738405, "grad_norm": 0.26332783699035645, "learning_rate": 0.00019801851800735007, "loss": 0.414, "step": 2980 }, { "epoch": 0.2401124272234491, "grad_norm": 0.25488004088401794, "learning_rate": 0.00019800317609748537, "loss": 0.3994, "step": 2990 }, { "epoch": 0.24091547881951417, "grad_norm": 0.232214093208313, "learning_rate": 0.0001979877756537652, "loss": 0.3903, "step": 3000 }, { "epoch": 0.2417185304155792, "grad_norm": 0.2852801978588104, "learning_rate": 0.0001979723166858822, "loss": 0.3755, "step": 3010 }, { "epoch": 0.24252158201164425, "grad_norm": 0.347303569316864, "learning_rate": 0.00019795679920356588, "loss": 0.406, "step": 3020 }, { "epoch": 0.24332463360770928, "grad_norm": 0.23968881368637085, "learning_rate": 0.00019794122321658251, "loss": 0.4041, "step": 3030 }, { "epoch": 0.24412768520377434, "grad_norm": 0.26398399472236633, "learning_rate": 0.00019792558873473528, "loss": 0.4168, "step": 3040 }, { "epoch": 0.2449307367998394, "grad_norm": 0.25920552015304565, "learning_rate": 0.00019790989576786406, "loss": 0.401, "step": 3050 }, { "epoch": 0.24573378839590443, "grad_norm": 0.3455207943916321, "learning_rate": 0.0001978941443258457, "loss": 0.4287, "step": 3060 }, { "epoch": 0.2465368399919695, "grad_norm": 0.28391721844673157, "learning_rate": 0.00019787833441859372, "loss": 0.4458, "step": 3070 }, { "epoch": 0.24733989158803454, "grad_norm": 0.2513786554336548, "learning_rate": 0.00019786246605605845, "loss": 0.4438, "step": 3080 }, { "epoch": 0.24814294318409957, "grad_norm": 0.29112735390663147, "learning_rate": 0.00019784653924822705, "loss": 0.4093, "step": 3090 }, { "epoch": 0.24894599478016463, "grad_norm": 0.2294594794511795, "learning_rate": 0.00019783055400512348, "loss": 0.4262, "step": 3100 }, { "epoch": 0.24974904637622966, "grad_norm": 0.2281404733657837, "learning_rate": 0.00019781451033680842, "loss": 0.414, "step": 3110 }, { "epoch": 0.2505520979722947, "grad_norm": 0.23575541377067566, "learning_rate": 0.00019779840825337938, "loss": 0.4322, "step": 3120 }, { "epoch": 0.25135514956835975, "grad_norm": 0.29296138882637024, "learning_rate": 0.0001977822477649706, "loss": 0.4281, "step": 3130 }, { "epoch": 0.25215820116442483, "grad_norm": 0.3971582353115082, "learning_rate": 0.00019776602888175304, "loss": 0.3851, "step": 3140 }, { "epoch": 0.25296125276048986, "grad_norm": 0.2577185332775116, "learning_rate": 0.00019774975161393447, "loss": 0.4517, "step": 3150 }, { "epoch": 0.2537643043565549, "grad_norm": 0.22963175177574158, "learning_rate": 0.00019773341597175944, "loss": 0.4194, "step": 3160 }, { "epoch": 0.25456735595262, "grad_norm": 0.25718382000923157, "learning_rate": 0.0001977170219655092, "loss": 0.4287, "step": 3170 }, { "epoch": 0.255370407548685, "grad_norm": 0.22592297196388245, "learning_rate": 0.00019770056960550164, "loss": 0.466, "step": 3180 }, { "epoch": 0.25617345914475004, "grad_norm": 0.23403896391391754, "learning_rate": 0.0001976840589020915, "loss": 0.4258, "step": 3190 }, { "epoch": 0.2569765107408151, "grad_norm": 0.3149869740009308, "learning_rate": 0.00019766748986567025, "loss": 0.4347, "step": 3200 }, { "epoch": 0.25777956233688015, "grad_norm": 0.25209617614746094, "learning_rate": 0.00019765086250666598, "loss": 0.4197, "step": 3210 }, { "epoch": 0.2585826139329452, "grad_norm": 0.31252923607826233, "learning_rate": 0.0001976341768355435, "loss": 0.3884, "step": 3220 }, { "epoch": 0.2593856655290102, "grad_norm": 0.24376016855239868, "learning_rate": 0.0001976174328628044, "loss": 0.4086, "step": 3230 }, { "epoch": 0.2601887171250753, "grad_norm": 0.22332137823104858, "learning_rate": 0.00019760063059898688, "loss": 0.398, "step": 3240 }, { "epoch": 0.2609917687211403, "grad_norm": 0.27608510851860046, "learning_rate": 0.00019758377005466593, "loss": 0.4318, "step": 3250 }, { "epoch": 0.26179482031720536, "grad_norm": 0.3297882676124573, "learning_rate": 0.00019756685124045306, "loss": 0.4132, "step": 3260 }, { "epoch": 0.26259787191327044, "grad_norm": 0.2206612378358841, "learning_rate": 0.0001975498741669966, "loss": 0.4245, "step": 3270 }, { "epoch": 0.26340092350933547, "grad_norm": 0.2301427572965622, "learning_rate": 0.00019753283884498145, "loss": 0.4215, "step": 3280 }, { "epoch": 0.2642039751054005, "grad_norm": 0.2943105399608612, "learning_rate": 0.00019751574528512931, "loss": 0.4216, "step": 3290 }, { "epoch": 0.2650070267014656, "grad_norm": 0.24335379898548126, "learning_rate": 0.0001974985934981983, "loss": 0.396, "step": 3300 }, { "epoch": 0.2658100782975306, "grad_norm": 0.24826814234256744, "learning_rate": 0.0001974813834949834, "loss": 0.4502, "step": 3310 }, { "epoch": 0.26661312989359565, "grad_norm": 0.22578881680965424, "learning_rate": 0.0001974641152863162, "loss": 0.3848, "step": 3320 }, { "epoch": 0.26741618148966073, "grad_norm": 0.21702295541763306, "learning_rate": 0.00019744678888306477, "loss": 0.4094, "step": 3330 }, { "epoch": 0.26821923308572576, "grad_norm": 0.19581525027751923, "learning_rate": 0.00019742940429613395, "loss": 0.4139, "step": 3340 }, { "epoch": 0.2690222846817908, "grad_norm": 0.3734869658946991, "learning_rate": 0.0001974119615364652, "loss": 0.407, "step": 3350 }, { "epoch": 0.2698253362778559, "grad_norm": 0.2429914027452469, "learning_rate": 0.0001973944606150365, "loss": 0.4179, "step": 3360 }, { "epoch": 0.2706283878739209, "grad_norm": 0.21789851784706116, "learning_rate": 0.00019737690154286253, "loss": 0.4107, "step": 3370 }, { "epoch": 0.27143143946998594, "grad_norm": 0.22029538452625275, "learning_rate": 0.0001973592843309945, "loss": 0.4268, "step": 3380 }, { "epoch": 0.272234491066051, "grad_norm": 0.27290162444114685, "learning_rate": 0.00019734160899052025, "loss": 0.4147, "step": 3390 }, { "epoch": 0.27303754266211605, "grad_norm": 0.21629735827445984, "learning_rate": 0.0001973238755325642, "loss": 0.4021, "step": 3400 }, { "epoch": 0.2738405942581811, "grad_norm": 0.23839472234249115, "learning_rate": 0.00019730608396828733, "loss": 0.3982, "step": 3410 }, { "epoch": 0.2746436458542461, "grad_norm": 0.33271920680999756, "learning_rate": 0.00019728823430888718, "loss": 0.4375, "step": 3420 }, { "epoch": 0.2754466974503112, "grad_norm": 0.23354139924049377, "learning_rate": 0.00019727032656559795, "loss": 0.406, "step": 3430 }, { "epoch": 0.2762497490463762, "grad_norm": 0.23592320084571838, "learning_rate": 0.00019725236074969027, "loss": 0.3879, "step": 3440 }, { "epoch": 0.27705280064244125, "grad_norm": 0.21572482585906982, "learning_rate": 0.00019723433687247134, "loss": 0.3764, "step": 3450 }, { "epoch": 0.27785585223850634, "grad_norm": 0.25846850872039795, "learning_rate": 0.00019721625494528502, "loss": 0.4148, "step": 3460 }, { "epoch": 0.27865890383457137, "grad_norm": 0.2563669979572296, "learning_rate": 0.00019719811497951155, "loss": 0.395, "step": 3470 }, { "epoch": 0.2794619554306364, "grad_norm": 0.2586056888103485, "learning_rate": 0.0001971799169865678, "loss": 0.4174, "step": 3480 }, { "epoch": 0.2802650070267015, "grad_norm": 0.22834989428520203, "learning_rate": 0.00019716166097790707, "loss": 0.421, "step": 3490 }, { "epoch": 0.2810680586227665, "grad_norm": 0.30397266149520874, "learning_rate": 0.00019714334696501932, "loss": 0.4321, "step": 3500 }, { "epoch": 0.28187111021883154, "grad_norm": 0.29908469319343567, "learning_rate": 0.00019712497495943086, "loss": 0.433, "step": 3510 }, { "epoch": 0.28267416181489663, "grad_norm": 0.2792292535305023, "learning_rate": 0.00019710654497270464, "loss": 0.3975, "step": 3520 }, { "epoch": 0.28347721341096166, "grad_norm": 0.21573412418365479, "learning_rate": 0.00019708805701643993, "loss": 0.3902, "step": 3530 }, { "epoch": 0.2842802650070267, "grad_norm": 0.22266802191734314, "learning_rate": 0.00019706951110227267, "loss": 0.4345, "step": 3540 }, { "epoch": 0.2850833166030918, "grad_norm": 0.2509976327419281, "learning_rate": 0.00019705090724187515, "loss": 0.4187, "step": 3550 }, { "epoch": 0.2858863681991568, "grad_norm": 0.23613792657852173, "learning_rate": 0.00019703224544695617, "loss": 0.3749, "step": 3560 }, { "epoch": 0.28668941979522183, "grad_norm": 0.250315397977829, "learning_rate": 0.00019701352572926102, "loss": 0.4188, "step": 3570 }, { "epoch": 0.28749247139128686, "grad_norm": 0.28120335936546326, "learning_rate": 0.00019699474810057138, "loss": 0.4201, "step": 3580 }, { "epoch": 0.28829552298735195, "grad_norm": 0.23578433692455292, "learning_rate": 0.00019697591257270548, "loss": 0.4074, "step": 3590 }, { "epoch": 0.289098574583417, "grad_norm": 0.21853268146514893, "learning_rate": 0.00019695701915751782, "loss": 0.4293, "step": 3600 }, { "epoch": 0.289901626179482, "grad_norm": 0.24835771322250366, "learning_rate": 0.00019693806786689954, "loss": 0.4127, "step": 3610 }, { "epoch": 0.2907046777755471, "grad_norm": 0.24109549820423126, "learning_rate": 0.00019691905871277806, "loss": 0.3891, "step": 3620 }, { "epoch": 0.2915077293716121, "grad_norm": 0.2725593149662018, "learning_rate": 0.00019689999170711728, "loss": 0.3891, "step": 3630 }, { "epoch": 0.29231078096767715, "grad_norm": 0.2461603879928589, "learning_rate": 0.00019688086686191748, "loss": 0.4257, "step": 3640 }, { "epoch": 0.29311383256374224, "grad_norm": 0.2213657647371292, "learning_rate": 0.00019686168418921534, "loss": 0.3878, "step": 3650 }, { "epoch": 0.29391688415980727, "grad_norm": 0.29648318886756897, "learning_rate": 0.00019684244370108398, "loss": 0.3984, "step": 3660 }, { "epoch": 0.2947199357558723, "grad_norm": 0.26456934213638306, "learning_rate": 0.0001968231454096329, "loss": 0.4342, "step": 3670 }, { "epoch": 0.2955229873519374, "grad_norm": 0.24295799434185028, "learning_rate": 0.00019680378932700787, "loss": 0.4128, "step": 3680 }, { "epoch": 0.2963260389480024, "grad_norm": 0.23266728222370148, "learning_rate": 0.0001967843754653912, "loss": 0.4209, "step": 3690 }, { "epoch": 0.29712909054406744, "grad_norm": 0.19308719038963318, "learning_rate": 0.00019676490383700151, "loss": 0.4179, "step": 3700 }, { "epoch": 0.2979321421401325, "grad_norm": 0.29101601243019104, "learning_rate": 0.00019674537445409367, "loss": 0.4293, "step": 3710 }, { "epoch": 0.29873519373619756, "grad_norm": 0.22185185551643372, "learning_rate": 0.00019672578732895897, "loss": 0.4144, "step": 3720 }, { "epoch": 0.2995382453322626, "grad_norm": 0.26236921548843384, "learning_rate": 0.00019670614247392515, "loss": 0.4198, "step": 3730 }, { "epoch": 0.3003412969283277, "grad_norm": 0.25397491455078125, "learning_rate": 0.00019668643990135615, "loss": 0.4126, "step": 3740 }, { "epoch": 0.3011443485243927, "grad_norm": 0.24769379198551178, "learning_rate": 0.0001966666796236522, "loss": 0.4058, "step": 3750 }, { "epoch": 0.30194740012045773, "grad_norm": 0.2374163568019867, "learning_rate": 0.00019664686165325003, "loss": 0.436, "step": 3760 }, { "epoch": 0.30275045171652276, "grad_norm": 0.19273585081100464, "learning_rate": 0.0001966269860026225, "loss": 0.4209, "step": 3770 }, { "epoch": 0.30355350331258785, "grad_norm": 0.22247867286205292, "learning_rate": 0.0001966070526842789, "loss": 0.4298, "step": 3780 }, { "epoch": 0.3043565549086529, "grad_norm": 0.28769558668136597, "learning_rate": 0.00019658706171076467, "loss": 0.4226, "step": 3790 }, { "epoch": 0.3051596065047179, "grad_norm": 0.23333032429218292, "learning_rate": 0.0001965670130946617, "loss": 0.4253, "step": 3800 }, { "epoch": 0.305962658100783, "grad_norm": 0.20651468634605408, "learning_rate": 0.00019654690684858808, "loss": 0.4281, "step": 3810 }, { "epoch": 0.306765709696848, "grad_norm": 0.23363591730594635, "learning_rate": 0.00019652674298519814, "loss": 0.4138, "step": 3820 }, { "epoch": 0.30756876129291305, "grad_norm": 0.2553456425666809, "learning_rate": 0.0001965065215171825, "loss": 0.3896, "step": 3830 }, { "epoch": 0.30837181288897814, "grad_norm": 0.28723570704460144, "learning_rate": 0.0001964862424572681, "loss": 0.4333, "step": 3840 }, { "epoch": 0.30917486448504317, "grad_norm": 0.23405128717422485, "learning_rate": 0.000196465905818218, "loss": 0.4069, "step": 3850 }, { "epoch": 0.3099779160811082, "grad_norm": 0.2616553008556366, "learning_rate": 0.00019644551161283164, "loss": 0.4194, "step": 3860 }, { "epoch": 0.3107809676771733, "grad_norm": 0.21298611164093018, "learning_rate": 0.00019642505985394457, "loss": 0.4281, "step": 3870 }, { "epoch": 0.3115840192732383, "grad_norm": 0.29617077112197876, "learning_rate": 0.00019640455055442864, "loss": 0.3739, "step": 3880 }, { "epoch": 0.31238707086930334, "grad_norm": 0.23241069912910461, "learning_rate": 0.00019638398372719183, "loss": 0.3913, "step": 3890 }, { "epoch": 0.3131901224653684, "grad_norm": 0.22435371577739716, "learning_rate": 0.00019636335938517845, "loss": 0.4062, "step": 3900 }, { "epoch": 0.31399317406143346, "grad_norm": 0.24457041919231415, "learning_rate": 0.00019634267754136891, "loss": 0.4116, "step": 3910 }, { "epoch": 0.3147962256574985, "grad_norm": 0.20356722176074982, "learning_rate": 0.00019632193820877988, "loss": 0.4135, "step": 3920 }, { "epoch": 0.3155992772535635, "grad_norm": 0.19495058059692383, "learning_rate": 0.0001963011414004641, "loss": 0.4029, "step": 3930 }, { "epoch": 0.3164023288496286, "grad_norm": 0.22952401638031006, "learning_rate": 0.0001962802871295106, "loss": 0.4055, "step": 3940 }, { "epoch": 0.31720538044569363, "grad_norm": 0.2640877366065979, "learning_rate": 0.00019625937540904451, "loss": 0.4407, "step": 3950 }, { "epoch": 0.31800843204175866, "grad_norm": 0.2316255420446396, "learning_rate": 0.0001962384062522272, "loss": 0.4177, "step": 3960 }, { "epoch": 0.31881148363782374, "grad_norm": 0.22001047432422638, "learning_rate": 0.00019621737967225606, "loss": 0.389, "step": 3970 }, { "epoch": 0.3196145352338888, "grad_norm": 0.27387887239456177, "learning_rate": 0.00019619629568236476, "loss": 0.4112, "step": 3980 }, { "epoch": 0.3204175868299538, "grad_norm": 0.21315446496009827, "learning_rate": 0.00019617515429582293, "loss": 0.4142, "step": 3990 }, { "epoch": 0.3212206384260189, "grad_norm": 0.28387251496315, "learning_rate": 0.0001961539555259365, "loss": 0.4325, "step": 4000 }, { "epoch": 0.3220236900220839, "grad_norm": 0.26821061968803406, "learning_rate": 0.00019613269938604743, "loss": 0.3927, "step": 4010 }, { "epoch": 0.32282674161814895, "grad_norm": 0.2472463995218277, "learning_rate": 0.00019611138588953382, "loss": 0.4335, "step": 4020 }, { "epoch": 0.32362979321421403, "grad_norm": 0.24579355120658875, "learning_rate": 0.0001960900150498098, "loss": 0.3879, "step": 4030 }, { "epoch": 0.32443284481027906, "grad_norm": 0.2571146488189697, "learning_rate": 0.00019606858688032568, "loss": 0.3884, "step": 4040 }, { "epoch": 0.3252358964063441, "grad_norm": 0.24032187461853027, "learning_rate": 0.00019604710139456777, "loss": 0.4011, "step": 4050 }, { "epoch": 0.3260389480024092, "grad_norm": 0.2581178545951843, "learning_rate": 0.00019602555860605853, "loss": 0.398, "step": 4060 }, { "epoch": 0.3268419995984742, "grad_norm": 0.24008013308048248, "learning_rate": 0.00019600395852835642, "loss": 0.4013, "step": 4070 }, { "epoch": 0.32764505119453924, "grad_norm": 0.33649250864982605, "learning_rate": 0.00019598230117505599, "loss": 0.4107, "step": 4080 }, { "epoch": 0.3284481027906043, "grad_norm": 0.23610901832580566, "learning_rate": 0.00019596058655978787, "loss": 0.4119, "step": 4090 }, { "epoch": 0.32925115438666935, "grad_norm": 0.21445293724536896, "learning_rate": 0.00019593881469621863, "loss": 0.391, "step": 4100 }, { "epoch": 0.3300542059827344, "grad_norm": 0.2929198145866394, "learning_rate": 0.00019591698559805098, "loss": 0.4233, "step": 4110 }, { "epoch": 0.3308572575787994, "grad_norm": 0.22149035334587097, "learning_rate": 0.00019589509927902358, "loss": 0.4347, "step": 4120 }, { "epoch": 0.3316603091748645, "grad_norm": 0.2639293968677521, "learning_rate": 0.00019587315575291114, "loss": 0.4299, "step": 4130 }, { "epoch": 0.33246336077092953, "grad_norm": 0.39158177375793457, "learning_rate": 0.00019585115503352435, "loss": 0.4375, "step": 4140 }, { "epoch": 0.33326641236699456, "grad_norm": 0.21890054643154144, "learning_rate": 0.00019582909713470993, "loss": 0.4165, "step": 4150 }, { "epoch": 0.33406946396305964, "grad_norm": 0.27459755539894104, "learning_rate": 0.00019580698207035053, "loss": 0.416, "step": 4160 }, { "epoch": 0.3348725155591247, "grad_norm": 0.2410919964313507, "learning_rate": 0.00019578480985436487, "loss": 0.4132, "step": 4170 }, { "epoch": 0.3356755671551897, "grad_norm": 0.22341926395893097, "learning_rate": 0.00019576258050070755, "loss": 0.407, "step": 4180 }, { "epoch": 0.3364786187512548, "grad_norm": 0.2641603350639343, "learning_rate": 0.00019574029402336915, "loss": 0.3979, "step": 4190 }, { "epoch": 0.3372816703473198, "grad_norm": 0.23463548719882965, "learning_rate": 0.00019571795043637623, "loss": 0.4243, "step": 4200 }, { "epoch": 0.33808472194338485, "grad_norm": 0.22591660916805267, "learning_rate": 0.00019569554975379128, "loss": 0.4291, "step": 4210 }, { "epoch": 0.33888777353944993, "grad_norm": 0.28312405943870544, "learning_rate": 0.00019567309198971274, "loss": 0.4249, "step": 4220 }, { "epoch": 0.33969082513551496, "grad_norm": 0.2422867715358734, "learning_rate": 0.00019565057715827493, "loss": 0.4132, "step": 4230 }, { "epoch": 0.34049387673158, "grad_norm": 0.23068667948246002, "learning_rate": 0.00019562800527364815, "loss": 0.4, "step": 4240 }, { "epoch": 0.3412969283276451, "grad_norm": 0.204949751496315, "learning_rate": 0.00019560537635003857, "loss": 0.4167, "step": 4250 }, { "epoch": 0.3420999799237101, "grad_norm": 0.22883160412311554, "learning_rate": 0.00019558269040168824, "loss": 0.4088, "step": 4260 }, { "epoch": 0.34290303151977514, "grad_norm": 0.20862162113189697, "learning_rate": 0.00019555994744287515, "loss": 0.4156, "step": 4270 }, { "epoch": 0.34370608311584017, "grad_norm": 0.228568896651268, "learning_rate": 0.00019553714748791315, "loss": 0.3999, "step": 4280 }, { "epoch": 0.34450913471190525, "grad_norm": 0.25257018208503723, "learning_rate": 0.00019551429055115194, "loss": 0.439, "step": 4290 }, { "epoch": 0.3453121863079703, "grad_norm": 0.26607459783554077, "learning_rate": 0.00019549137664697706, "loss": 0.4241, "step": 4300 }, { "epoch": 0.3461152379040353, "grad_norm": 0.22825919091701508, "learning_rate": 0.00019546840578981003, "loss": 0.4144, "step": 4310 }, { "epoch": 0.3469182895001004, "grad_norm": 0.22419194877147675, "learning_rate": 0.00019544537799410806, "loss": 0.4108, "step": 4320 }, { "epoch": 0.3477213410961654, "grad_norm": 0.26148632168769836, "learning_rate": 0.00019542229327436432, "loss": 0.4474, "step": 4330 }, { "epoch": 0.34852439269223046, "grad_norm": 0.1898316591978073, "learning_rate": 0.0001953991516451077, "loss": 0.3748, "step": 4340 }, { "epoch": 0.34932744428829554, "grad_norm": 0.23988664150238037, "learning_rate": 0.000195375953120903, "loss": 0.401, "step": 4350 }, { "epoch": 0.35013049588436057, "grad_norm": 0.2889849543571472, "learning_rate": 0.00019535269771635074, "loss": 0.3941, "step": 4360 }, { "epoch": 0.3509335474804256, "grad_norm": 0.23460166156291962, "learning_rate": 0.00019532938544608732, "loss": 0.3868, "step": 4370 }, { "epoch": 0.3517365990764907, "grad_norm": 0.19527152180671692, "learning_rate": 0.00019530601632478488, "loss": 0.4034, "step": 4380 }, { "epoch": 0.3525396506725557, "grad_norm": 0.30313417315483093, "learning_rate": 0.0001952825903671514, "loss": 0.4269, "step": 4390 }, { "epoch": 0.35334270226862075, "grad_norm": 0.2850656807422638, "learning_rate": 0.00019525910758793054, "loss": 0.4, "step": 4400 }, { "epoch": 0.35414575386468583, "grad_norm": 0.2195451557636261, "learning_rate": 0.00019523556800190174, "loss": 0.4077, "step": 4410 }, { "epoch": 0.35494880546075086, "grad_norm": 0.24107486009597778, "learning_rate": 0.0001952119716238803, "loss": 0.4033, "step": 4420 }, { "epoch": 0.3557518570568159, "grad_norm": 0.21646204590797424, "learning_rate": 0.0001951883184687171, "loss": 0.3826, "step": 4430 }, { "epoch": 0.356554908652881, "grad_norm": 0.23529164493083954, "learning_rate": 0.00019516460855129893, "loss": 0.3968, "step": 4440 }, { "epoch": 0.357357960248946, "grad_norm": 0.24880169332027435, "learning_rate": 0.00019514084188654816, "loss": 0.3924, "step": 4450 }, { "epoch": 0.35816101184501103, "grad_norm": 0.28833892941474915, "learning_rate": 0.00019511701848942293, "loss": 0.3954, "step": 4460 }, { "epoch": 0.35896406344107606, "grad_norm": 0.24339266121387482, "learning_rate": 0.00019509313837491713, "loss": 0.4131, "step": 4470 }, { "epoch": 0.35976711503714115, "grad_norm": 0.22676114737987518, "learning_rate": 0.00019506920155806025, "loss": 0.3916, "step": 4480 }, { "epoch": 0.3605701666332062, "grad_norm": 0.22020398080348969, "learning_rate": 0.00019504520805391755, "loss": 0.4354, "step": 4490 }, { "epoch": 0.3613732182292712, "grad_norm": 0.2314370572566986, "learning_rate": 0.0001950211578775899, "loss": 0.4167, "step": 4500 }, { "epoch": 0.3621762698253363, "grad_norm": 0.23125658929347992, "learning_rate": 0.00019499705104421392, "loss": 0.3926, "step": 4510 }, { "epoch": 0.3629793214214013, "grad_norm": 0.29559803009033203, "learning_rate": 0.00019497288756896187, "loss": 0.405, "step": 4520 }, { "epoch": 0.36378237301746635, "grad_norm": 0.2431115061044693, "learning_rate": 0.0001949486674670415, "loss": 0.4009, "step": 4530 }, { "epoch": 0.36458542461353144, "grad_norm": 0.24001701176166534, "learning_rate": 0.00019492439075369645, "loss": 0.4144, "step": 4540 }, { "epoch": 0.36538847620959647, "grad_norm": 0.23479294776916504, "learning_rate": 0.0001949000574442059, "loss": 0.4331, "step": 4550 }, { "epoch": 0.3661915278056615, "grad_norm": 0.19959352910518646, "learning_rate": 0.0001948756675538845, "loss": 0.385, "step": 4560 }, { "epoch": 0.3669945794017266, "grad_norm": 0.2564031481742859, "learning_rate": 0.00019485122109808272, "loss": 0.3978, "step": 4570 }, { "epoch": 0.3677976309977916, "grad_norm": 0.2014150768518448, "learning_rate": 0.00019482671809218648, "loss": 0.4086, "step": 4580 }, { "epoch": 0.36860068259385664, "grad_norm": 0.2329677939414978, "learning_rate": 0.00019480215855161744, "loss": 0.4234, "step": 4590 }, { "epoch": 0.36940373418992173, "grad_norm": 0.2647996246814728, "learning_rate": 0.0001947775424918327, "loss": 0.3923, "step": 4600 }, { "epoch": 0.37020678578598676, "grad_norm": 0.2215988039970398, "learning_rate": 0.00019475286992832498, "loss": 0.4293, "step": 4610 }, { "epoch": 0.3710098373820518, "grad_norm": 0.23590205609798431, "learning_rate": 0.0001947281408766226, "loss": 0.4292, "step": 4620 }, { "epoch": 0.3718128889781168, "grad_norm": 0.29801279306411743, "learning_rate": 0.00019470335535228938, "loss": 0.4226, "step": 4630 }, { "epoch": 0.3726159405741819, "grad_norm": 0.24642141163349152, "learning_rate": 0.0001946785133709247, "loss": 0.3807, "step": 4640 }, { "epoch": 0.37341899217024693, "grad_norm": 0.24155759811401367, "learning_rate": 0.00019465361494816353, "loss": 0.3864, "step": 4650 }, { "epoch": 0.37422204376631196, "grad_norm": 0.2536332607269287, "learning_rate": 0.00019462866009967625, "loss": 0.4074, "step": 4660 }, { "epoch": 0.37502509536237705, "grad_norm": 0.2431042641401291, "learning_rate": 0.00019460364884116882, "loss": 0.41, "step": 4670 }, { "epoch": 0.3758281469584421, "grad_norm": 0.20873774588108063, "learning_rate": 0.0001945785811883827, "loss": 0.4096, "step": 4680 }, { "epoch": 0.3766311985545071, "grad_norm": 0.2520483434200287, "learning_rate": 0.00019455345715709487, "loss": 0.4185, "step": 4690 }, { "epoch": 0.3774342501505722, "grad_norm": 0.19780303537845612, "learning_rate": 0.00019452827676311773, "loss": 0.3683, "step": 4700 }, { "epoch": 0.3782373017466372, "grad_norm": 0.22340424358844757, "learning_rate": 0.0001945030400222992, "loss": 0.4155, "step": 4710 }, { "epoch": 0.37904035334270225, "grad_norm": 0.28748923540115356, "learning_rate": 0.00019447774695052262, "loss": 0.3862, "step": 4720 }, { "epoch": 0.37984340493876734, "grad_norm": 0.2585335075855255, "learning_rate": 0.00019445239756370688, "loss": 0.412, "step": 4730 }, { "epoch": 0.38064645653483237, "grad_norm": 0.2341035157442093, "learning_rate": 0.00019442699187780616, "loss": 0.4252, "step": 4740 }, { "epoch": 0.3814495081308974, "grad_norm": 0.27559906244277954, "learning_rate": 0.00019440152990881024, "loss": 0.3938, "step": 4750 }, { "epoch": 0.3822525597269625, "grad_norm": 0.24044209718704224, "learning_rate": 0.00019437601167274416, "loss": 0.4103, "step": 4760 }, { "epoch": 0.3830556113230275, "grad_norm": 0.23778948187828064, "learning_rate": 0.00019435043718566853, "loss": 0.4145, "step": 4770 }, { "epoch": 0.38385866291909254, "grad_norm": 0.23339533805847168, "learning_rate": 0.00019432480646367924, "loss": 0.418, "step": 4780 }, { "epoch": 0.38466171451515757, "grad_norm": 0.2428145706653595, "learning_rate": 0.0001942991195229076, "loss": 0.4249, "step": 4790 }, { "epoch": 0.38546476611122266, "grad_norm": 0.25169894099235535, "learning_rate": 0.00019427337637952042, "loss": 0.4055, "step": 4800 }, { "epoch": 0.3862678177072877, "grad_norm": 0.23322689533233643, "learning_rate": 0.0001942475770497197, "loss": 0.3866, "step": 4810 }, { "epoch": 0.3870708693033527, "grad_norm": 0.23241057991981506, "learning_rate": 0.00019422172154974287, "loss": 0.4015, "step": 4820 }, { "epoch": 0.3878739208994178, "grad_norm": 0.22046995162963867, "learning_rate": 0.00019419580989586284, "loss": 0.4151, "step": 4830 }, { "epoch": 0.38867697249548283, "grad_norm": 0.2632502317428589, "learning_rate": 0.00019416984210438764, "loss": 0.3963, "step": 4840 }, { "epoch": 0.38948002409154786, "grad_norm": 0.2415120005607605, "learning_rate": 0.0001941438181916608, "loss": 0.3834, "step": 4850 }, { "epoch": 0.39028307568761295, "grad_norm": 0.28646889328956604, "learning_rate": 0.0001941177381740611, "loss": 0.4196, "step": 4860 }, { "epoch": 0.391086127283678, "grad_norm": 0.2379418909549713, "learning_rate": 0.00019409160206800263, "loss": 0.4058, "step": 4870 }, { "epoch": 0.391889178879743, "grad_norm": 0.19821414351463318, "learning_rate": 0.00019406540988993484, "loss": 0.3959, "step": 4880 }, { "epoch": 0.3926922304758081, "grad_norm": 0.2230139523744583, "learning_rate": 0.00019403916165634238, "loss": 0.4229, "step": 4890 }, { "epoch": 0.3934952820718731, "grad_norm": 0.22378113865852356, "learning_rate": 0.00019401285738374527, "loss": 0.4159, "step": 4900 }, { "epoch": 0.39429833366793815, "grad_norm": 0.2694958746433258, "learning_rate": 0.0001939864970886987, "loss": 0.3822, "step": 4910 }, { "epoch": 0.39510138526400324, "grad_norm": 0.23877988755702972, "learning_rate": 0.00019396008078779325, "loss": 0.4102, "step": 4920 }, { "epoch": 0.39590443686006827, "grad_norm": 0.23148031532764435, "learning_rate": 0.00019393360849765466, "loss": 0.4028, "step": 4930 }, { "epoch": 0.3967074884561333, "grad_norm": 0.247510626912117, "learning_rate": 0.00019390708023494385, "loss": 0.4077, "step": 4940 }, { "epoch": 0.3975105400521984, "grad_norm": 0.2293589562177658, "learning_rate": 0.00019388049601635706, "loss": 0.3965, "step": 4950 }, { "epoch": 0.3983135916482634, "grad_norm": 0.2634945213794708, "learning_rate": 0.00019385385585862582, "loss": 0.4025, "step": 4960 }, { "epoch": 0.39911664324432844, "grad_norm": 0.23111891746520996, "learning_rate": 0.00019382715977851666, "loss": 0.3838, "step": 4970 }, { "epoch": 0.39991969484039347, "grad_norm": 0.30030253529548645, "learning_rate": 0.00019380040779283145, "loss": 0.4051, "step": 4980 }, { "epoch": 0.40072274643645855, "grad_norm": 0.2468675822019577, "learning_rate": 0.0001937735999184073, "loss": 0.4127, "step": 4990 }, { "epoch": 0.4015257980325236, "grad_norm": 0.2545342445373535, "learning_rate": 0.00019374673617211628, "loss": 0.4193, "step": 5000 }, { "epoch": 0.4023288496285886, "grad_norm": 0.21189703047275543, "learning_rate": 0.00019371981657086588, "loss": 0.4163, "step": 5010 }, { "epoch": 0.4031319012246537, "grad_norm": 0.25947850942611694, "learning_rate": 0.00019369284113159848, "loss": 0.4361, "step": 5020 }, { "epoch": 0.40393495282071873, "grad_norm": 0.25116780400276184, "learning_rate": 0.0001936658098712919, "loss": 0.4037, "step": 5030 }, { "epoch": 0.40473800441678376, "grad_norm": 0.20200896263122559, "learning_rate": 0.0001936387228069588, "loss": 0.3974, "step": 5040 }, { "epoch": 0.40554105601284884, "grad_norm": 0.23786014318466187, "learning_rate": 0.00019361157995564717, "loss": 0.3898, "step": 5050 }, { "epoch": 0.4063441076089139, "grad_norm": 0.2680423855781555, "learning_rate": 0.00019358438133444003, "loss": 0.3793, "step": 5060 }, { "epoch": 0.4071471592049789, "grad_norm": 0.2267007827758789, "learning_rate": 0.00019355712696045546, "loss": 0.4251, "step": 5070 }, { "epoch": 0.407950210801044, "grad_norm": 0.19734500348567963, "learning_rate": 0.00019352981685084674, "loss": 0.4099, "step": 5080 }, { "epoch": 0.408753262397109, "grad_norm": 0.20562127232551575, "learning_rate": 0.0001935024510228021, "loss": 0.3796, "step": 5090 }, { "epoch": 0.40955631399317405, "grad_norm": 0.2551405429840088, "learning_rate": 0.00019347502949354498, "loss": 0.3936, "step": 5100 }, { "epoch": 0.41035936558923913, "grad_norm": 0.26069068908691406, "learning_rate": 0.00019344755228033374, "loss": 0.3973, "step": 5110 }, { "epoch": 0.41116241718530416, "grad_norm": 0.21516703069210052, "learning_rate": 0.00019342001940046187, "loss": 0.4249, "step": 5120 }, { "epoch": 0.4119654687813692, "grad_norm": 0.26459288597106934, "learning_rate": 0.00019339243087125783, "loss": 0.404, "step": 5130 }, { "epoch": 0.4127685203774342, "grad_norm": 0.2111758142709732, "learning_rate": 0.00019336478671008517, "loss": 0.4182, "step": 5140 }, { "epoch": 0.4135715719734993, "grad_norm": 0.24614010751247406, "learning_rate": 0.00019333708693434248, "loss": 0.3989, "step": 5150 }, { "epoch": 0.41437462356956434, "grad_norm": 0.2508879601955414, "learning_rate": 0.00019330933156146322, "loss": 0.414, "step": 5160 }, { "epoch": 0.41517767516562937, "grad_norm": 0.2132941484451294, "learning_rate": 0.000193281520608916, "loss": 0.4063, "step": 5170 }, { "epoch": 0.41598072676169445, "grad_norm": 0.22330623865127563, "learning_rate": 0.00019325365409420426, "loss": 0.4101, "step": 5180 }, { "epoch": 0.4167837783577595, "grad_norm": 0.2597391605377197, "learning_rate": 0.0001932257320348665, "loss": 0.4152, "step": 5190 }, { "epoch": 0.4175868299538245, "grad_norm": 0.20542681217193604, "learning_rate": 0.0001931977544484762, "loss": 0.3916, "step": 5200 }, { "epoch": 0.4183898815498896, "grad_norm": 0.25265881419181824, "learning_rate": 0.00019316972135264174, "loss": 0.3863, "step": 5210 }, { "epoch": 0.4191929331459546, "grad_norm": 0.21475587785243988, "learning_rate": 0.0001931416327650064, "loss": 0.3986, "step": 5220 }, { "epoch": 0.41999598474201966, "grad_norm": 0.25507035851478577, "learning_rate": 0.00019311348870324843, "loss": 0.3987, "step": 5230 }, { "epoch": 0.42079903633808474, "grad_norm": 0.21505789458751678, "learning_rate": 0.00019308528918508106, "loss": 0.4019, "step": 5240 }, { "epoch": 0.42160208793414977, "grad_norm": 0.22257967293262482, "learning_rate": 0.00019305703422825232, "loss": 0.4142, "step": 5250 }, { "epoch": 0.4224051395302148, "grad_norm": 0.22852906584739685, "learning_rate": 0.00019302872385054513, "loss": 0.4172, "step": 5260 }, { "epoch": 0.4232081911262799, "grad_norm": 0.26838418841362, "learning_rate": 0.0001930003580697774, "loss": 0.4224, "step": 5270 }, { "epoch": 0.4240112427223449, "grad_norm": 0.25896695256233215, "learning_rate": 0.00019297193690380178, "loss": 0.4096, "step": 5280 }, { "epoch": 0.42481429431840995, "grad_norm": 0.22285377979278564, "learning_rate": 0.00019294346037050582, "loss": 0.4043, "step": 5290 }, { "epoch": 0.42561734591447503, "grad_norm": 0.3351123034954071, "learning_rate": 0.00019291492848781203, "loss": 0.4234, "step": 5300 }, { "epoch": 0.42642039751054006, "grad_norm": 0.23505814373493195, "learning_rate": 0.00019288634127367755, "loss": 0.3937, "step": 5310 }, { "epoch": 0.4272234491066051, "grad_norm": 0.2881292402744293, "learning_rate": 0.00019285769874609451, "loss": 0.3923, "step": 5320 }, { "epoch": 0.4280265007026701, "grad_norm": 0.21854367852210999, "learning_rate": 0.0001928290009230898, "loss": 0.4215, "step": 5330 }, { "epoch": 0.4288295522987352, "grad_norm": 0.2890002131462097, "learning_rate": 0.00019280024782272507, "loss": 0.4122, "step": 5340 }, { "epoch": 0.42963260389480024, "grad_norm": 0.22099970281124115, "learning_rate": 0.00019277143946309683, "loss": 0.3995, "step": 5350 }, { "epoch": 0.43043565549086527, "grad_norm": 0.2169959843158722, "learning_rate": 0.0001927425758623363, "loss": 0.3902, "step": 5360 }, { "epoch": 0.43123870708693035, "grad_norm": 0.24348892271518707, "learning_rate": 0.00019271365703860956, "loss": 0.4078, "step": 5370 }, { "epoch": 0.4320417586829954, "grad_norm": 0.23780475556850433, "learning_rate": 0.0001926846830101173, "loss": 0.4263, "step": 5380 }, { "epoch": 0.4328448102790604, "grad_norm": 0.2354022115468979, "learning_rate": 0.00019265565379509512, "loss": 0.409, "step": 5390 }, { "epoch": 0.4336478618751255, "grad_norm": 0.26332664489746094, "learning_rate": 0.00019262656941181327, "loss": 0.4377, "step": 5400 }, { "epoch": 0.4344509134711905, "grad_norm": 0.27148425579071045, "learning_rate": 0.0001925974298785767, "loss": 0.4072, "step": 5410 }, { "epoch": 0.43525396506725555, "grad_norm": 0.3029758036136627, "learning_rate": 0.0001925682352137251, "loss": 0.3971, "step": 5420 }, { "epoch": 0.43605701666332064, "grad_norm": 0.24489876627922058, "learning_rate": 0.00019253898543563288, "loss": 0.3623, "step": 5430 }, { "epoch": 0.43686006825938567, "grad_norm": 0.21347884833812714, "learning_rate": 0.0001925096805627091, "loss": 0.4138, "step": 5440 }, { "epoch": 0.4376631198554507, "grad_norm": 0.4747852087020874, "learning_rate": 0.0001924803206133975, "loss": 0.4037, "step": 5450 }, { "epoch": 0.4384661714515158, "grad_norm": 0.22829334437847137, "learning_rate": 0.00019245090560617652, "loss": 0.396, "step": 5460 }, { "epoch": 0.4392692230475808, "grad_norm": 0.22323983907699585, "learning_rate": 0.00019242143555955917, "loss": 0.4159, "step": 5470 }, { "epoch": 0.44007227464364584, "grad_norm": 0.25946852564811707, "learning_rate": 0.00019239191049209321, "loss": 0.3979, "step": 5480 }, { "epoch": 0.4408753262397109, "grad_norm": 0.2568100392818451, "learning_rate": 0.00019236233042236093, "loss": 0.4327, "step": 5490 }, { "epoch": 0.44167837783577596, "grad_norm": 0.22496218979358673, "learning_rate": 0.0001923326953689793, "loss": 0.3879, "step": 5500 }, { "epoch": 0.442481429431841, "grad_norm": 0.2286287248134613, "learning_rate": 0.00019230300535059993, "loss": 0.4167, "step": 5510 }, { "epoch": 0.443284481027906, "grad_norm": 0.24097199738025665, "learning_rate": 0.0001922732603859089, "loss": 0.3951, "step": 5520 }, { "epoch": 0.4440875326239711, "grad_norm": 0.24535052478313446, "learning_rate": 0.00019224346049362694, "loss": 0.4122, "step": 5530 }, { "epoch": 0.44489058422003613, "grad_norm": 0.2105432152748108, "learning_rate": 0.00019221360569250936, "loss": 0.3925, "step": 5540 }, { "epoch": 0.44569363581610116, "grad_norm": 0.2717340290546417, "learning_rate": 0.00019218369600134606, "loss": 0.3974, "step": 5550 }, { "epoch": 0.44649668741216625, "grad_norm": 0.21849365532398224, "learning_rate": 0.00019215373143896138, "loss": 0.404, "step": 5560 }, { "epoch": 0.4472997390082313, "grad_norm": 0.2542933225631714, "learning_rate": 0.00019212371202421432, "loss": 0.3992, "step": 5570 }, { "epoch": 0.4481027906042963, "grad_norm": 0.3299144208431244, "learning_rate": 0.00019209363777599833, "loss": 0.4086, "step": 5580 }, { "epoch": 0.4489058422003614, "grad_norm": 0.2359936535358429, "learning_rate": 0.00019206350871324134, "loss": 0.4099, "step": 5590 }, { "epoch": 0.4497088937964264, "grad_norm": 0.23157168924808502, "learning_rate": 0.0001920333248549059, "loss": 0.3905, "step": 5600 }, { "epoch": 0.45051194539249145, "grad_norm": 0.2585068345069885, "learning_rate": 0.0001920030862199889, "loss": 0.3934, "step": 5610 }, { "epoch": 0.45131499698855654, "grad_norm": 0.25307944416999817, "learning_rate": 0.0001919727928275218, "loss": 0.3801, "step": 5620 }, { "epoch": 0.45211804858462157, "grad_norm": 0.22907043993473053, "learning_rate": 0.00019194244469657051, "loss": 0.3899, "step": 5630 }, { "epoch": 0.4529211001806866, "grad_norm": 0.28393784165382385, "learning_rate": 0.00019191204184623538, "loss": 0.398, "step": 5640 }, { "epoch": 0.4537241517767517, "grad_norm": 0.2734169065952301, "learning_rate": 0.00019188158429565114, "loss": 0.4216, "step": 5650 }, { "epoch": 0.4545272033728167, "grad_norm": 0.1944531500339508, "learning_rate": 0.0001918510720639871, "loss": 0.3794, "step": 5660 }, { "epoch": 0.45533025496888174, "grad_norm": 0.28135764598846436, "learning_rate": 0.0001918205051704468, "loss": 0.3878, "step": 5670 }, { "epoch": 0.45613330656494677, "grad_norm": 0.22071146965026855, "learning_rate": 0.00019178988363426837, "loss": 0.4059, "step": 5680 }, { "epoch": 0.45693635816101186, "grad_norm": 0.2228947877883911, "learning_rate": 0.00019175920747472412, "loss": 0.3961, "step": 5690 }, { "epoch": 0.4577394097570769, "grad_norm": 0.24187617003917694, "learning_rate": 0.00019172847671112094, "loss": 0.409, "step": 5700 }, { "epoch": 0.4585424613531419, "grad_norm": 0.238054558634758, "learning_rate": 0.00019169769136279997, "loss": 0.3867, "step": 5710 }, { "epoch": 0.459345512949207, "grad_norm": 0.21411894261837006, "learning_rate": 0.00019166685144913668, "loss": 0.3837, "step": 5720 }, { "epoch": 0.46014856454527203, "grad_norm": 0.2591460347175598, "learning_rate": 0.00019163595698954103, "loss": 0.4185, "step": 5730 }, { "epoch": 0.46095161614133706, "grad_norm": 0.2219586968421936, "learning_rate": 0.00019160500800345713, "loss": 0.373, "step": 5740 }, { "epoch": 0.46175466773740215, "grad_norm": 0.22929415106773376, "learning_rate": 0.00019157400451036356, "loss": 0.3995, "step": 5750 }, { "epoch": 0.4625577193334672, "grad_norm": 0.23477496206760406, "learning_rate": 0.0001915429465297731, "loss": 0.3876, "step": 5760 }, { "epoch": 0.4633607709295322, "grad_norm": 0.24517874419689178, "learning_rate": 0.00019151183408123288, "loss": 0.4037, "step": 5770 }, { "epoch": 0.4641638225255973, "grad_norm": 0.1972472369670868, "learning_rate": 0.00019148066718432426, "loss": 0.3729, "step": 5780 }, { "epoch": 0.4649668741216623, "grad_norm": 0.20141105353832245, "learning_rate": 0.0001914494458586629, "loss": 0.3975, "step": 5790 }, { "epoch": 0.46576992571772735, "grad_norm": 0.27002057433128357, "learning_rate": 0.00019141817012389878, "loss": 0.4022, "step": 5800 }, { "epoch": 0.46657297731379244, "grad_norm": 0.2164170742034912, "learning_rate": 0.000191386839999716, "loss": 0.4085, "step": 5810 }, { "epoch": 0.46737602890985747, "grad_norm": 0.2119736522436142, "learning_rate": 0.00019135545550583294, "loss": 0.4022, "step": 5820 }, { "epoch": 0.4681790805059225, "grad_norm": 0.28390687704086304, "learning_rate": 0.00019132401666200225, "loss": 0.3999, "step": 5830 }, { "epoch": 0.4689821321019875, "grad_norm": 0.21278752386569977, "learning_rate": 0.00019129252348801072, "loss": 0.3931, "step": 5840 }, { "epoch": 0.4697851836980526, "grad_norm": 0.23051875829696655, "learning_rate": 0.0001912609760036794, "loss": 0.4213, "step": 5850 }, { "epoch": 0.47058823529411764, "grad_norm": 0.22266915440559387, "learning_rate": 0.00019122937422886343, "loss": 0.3837, "step": 5860 }, { "epoch": 0.47139128689018267, "grad_norm": 0.20935536921024323, "learning_rate": 0.00019119771818345215, "loss": 0.4126, "step": 5870 }, { "epoch": 0.47219433848624776, "grad_norm": 0.20687717199325562, "learning_rate": 0.0001911660078873692, "loss": 0.3921, "step": 5880 }, { "epoch": 0.4729973900823128, "grad_norm": 0.22875095903873444, "learning_rate": 0.0001911342433605721, "loss": 0.3988, "step": 5890 }, { "epoch": 0.4738004416783778, "grad_norm": 0.25604599714279175, "learning_rate": 0.00019110242462305272, "loss": 0.3965, "step": 5900 }, { "epoch": 0.4746034932744429, "grad_norm": 0.3062742054462433, "learning_rate": 0.00019107055169483698, "loss": 0.4142, "step": 5910 }, { "epoch": 0.47540654487050793, "grad_norm": 0.21959365904331207, "learning_rate": 0.0001910386245959848, "loss": 0.4119, "step": 5920 }, { "epoch": 0.47620959646657296, "grad_norm": 0.3534756600856781, "learning_rate": 0.00019100664334659043, "loss": 0.4062, "step": 5930 }, { "epoch": 0.47701264806263804, "grad_norm": 0.2529744803905487, "learning_rate": 0.00019097460796678198, "loss": 0.4013, "step": 5940 }, { "epoch": 0.4778156996587031, "grad_norm": 0.2140520066022873, "learning_rate": 0.00019094251847672172, "loss": 0.3964, "step": 5950 }, { "epoch": 0.4786187512547681, "grad_norm": 0.2330731302499771, "learning_rate": 0.00019091037489660603, "loss": 0.4002, "step": 5960 }, { "epoch": 0.4794218028508332, "grad_norm": 0.2545676827430725, "learning_rate": 0.00019087817724666514, "loss": 0.435, "step": 5970 }, { "epoch": 0.4802248544468982, "grad_norm": 0.22503823041915894, "learning_rate": 0.00019084592554716355, "loss": 0.3803, "step": 5980 }, { "epoch": 0.48102790604296325, "grad_norm": 0.22117426991462708, "learning_rate": 0.00019081361981839959, "loss": 0.3851, "step": 5990 }, { "epoch": 0.48183095763902833, "grad_norm": 0.2792169451713562, "learning_rate": 0.00019078126008070573, "loss": 0.3881, "step": 6000 }, { "epoch": 0.48263400923509336, "grad_norm": 0.3301231563091278, "learning_rate": 0.00019074884635444838, "loss": 0.3982, "step": 6010 }, { "epoch": 0.4834370608311584, "grad_norm": 0.23490625619888306, "learning_rate": 0.0001907163786600279, "loss": 0.4324, "step": 6020 }, { "epoch": 0.4842401124272234, "grad_norm": 0.24961413443088531, "learning_rate": 0.0001906838570178786, "loss": 0.4226, "step": 6030 }, { "epoch": 0.4850431640232885, "grad_norm": 0.25633400678634644, "learning_rate": 0.00019065128144846885, "loss": 0.4232, "step": 6040 }, { "epoch": 0.48584621561935354, "grad_norm": 0.26635417342185974, "learning_rate": 0.0001906186519723008, "loss": 0.3891, "step": 6050 }, { "epoch": 0.48664926721541857, "grad_norm": 0.2071598321199417, "learning_rate": 0.00019058596860991074, "loss": 0.3904, "step": 6060 }, { "epoch": 0.48745231881148365, "grad_norm": 0.24502436816692352, "learning_rate": 0.00019055323138186868, "loss": 0.3988, "step": 6070 }, { "epoch": 0.4882553704075487, "grad_norm": 0.26950567960739136, "learning_rate": 0.00019052044030877865, "loss": 0.4333, "step": 6080 }, { "epoch": 0.4890584220036137, "grad_norm": 0.28197935223579407, "learning_rate": 0.00019048759541127845, "loss": 0.4007, "step": 6090 }, { "epoch": 0.4898614735996788, "grad_norm": 0.22891367971897125, "learning_rate": 0.0001904546967100399, "loss": 0.3929, "step": 6100 }, { "epoch": 0.49066452519574383, "grad_norm": 0.206910640001297, "learning_rate": 0.0001904217442257686, "loss": 0.4248, "step": 6110 }, { "epoch": 0.49146757679180886, "grad_norm": 0.2360646277666092, "learning_rate": 0.00019038873797920397, "loss": 0.4047, "step": 6120 }, { "epoch": 0.49227062838787394, "grad_norm": 0.2573632001876831, "learning_rate": 0.0001903556779911194, "loss": 0.4025, "step": 6130 }, { "epoch": 0.493073679983939, "grad_norm": 0.27849552035331726, "learning_rate": 0.00019032256428232194, "loss": 0.4087, "step": 6140 }, { "epoch": 0.493876731580004, "grad_norm": 0.2087561935186386, "learning_rate": 0.0001902893968736525, "loss": 0.3754, "step": 6150 }, { "epoch": 0.4946797831760691, "grad_norm": 0.22215555608272552, "learning_rate": 0.00019025617578598587, "loss": 0.3826, "step": 6160 }, { "epoch": 0.4954828347721341, "grad_norm": 0.23767730593681335, "learning_rate": 0.00019022290104023058, "loss": 0.4134, "step": 6170 }, { "epoch": 0.49628588636819915, "grad_norm": 0.23905590176582336, "learning_rate": 0.00019018957265732887, "loss": 0.3971, "step": 6180 }, { "epoch": 0.4970889379642642, "grad_norm": 0.22282980382442474, "learning_rate": 0.0001901561906582568, "loss": 0.4145, "step": 6190 }, { "epoch": 0.49789198956032926, "grad_norm": 0.2168979048728943, "learning_rate": 0.00019012275506402414, "loss": 0.4057, "step": 6200 }, { "epoch": 0.4986950411563943, "grad_norm": 0.2880740463733673, "learning_rate": 0.00019008926589567443, "loss": 0.3996, "step": 6210 }, { "epoch": 0.4994980927524593, "grad_norm": 0.23460093140602112, "learning_rate": 0.0001900557231742849, "loss": 0.4085, "step": 6220 }, { "epoch": 0.5003011443485244, "grad_norm": 0.23996590077877045, "learning_rate": 0.00019002212692096653, "loss": 0.4123, "step": 6230 }, { "epoch": 0.5011041959445894, "grad_norm": 0.33705589175224304, "learning_rate": 0.00018998847715686392, "loss": 0.4198, "step": 6240 }, { "epoch": 0.5019072475406545, "grad_norm": 0.23651652038097382, "learning_rate": 0.0001899547739031554, "loss": 0.42, "step": 6250 }, { "epoch": 0.5027102991367195, "grad_norm": 0.20431888103485107, "learning_rate": 0.0001899210171810529, "loss": 0.3814, "step": 6260 }, { "epoch": 0.5035133507327846, "grad_norm": 0.27831345796585083, "learning_rate": 0.00018988720701180214, "loss": 0.3814, "step": 6270 }, { "epoch": 0.5043164023288497, "grad_norm": 0.2147676646709442, "learning_rate": 0.0001898533434166823, "loss": 0.359, "step": 6280 }, { "epoch": 0.5051194539249146, "grad_norm": 0.2395770102739334, "learning_rate": 0.0001898194264170063, "loss": 0.4114, "step": 6290 }, { "epoch": 0.5059225055209797, "grad_norm": 0.20979878306388855, "learning_rate": 0.0001897854560341207, "loss": 0.4266, "step": 6300 }, { "epoch": 0.5067255571170448, "grad_norm": 0.20778490602970123, "learning_rate": 0.00018975143228940552, "loss": 0.4227, "step": 6310 }, { "epoch": 0.5075286087131098, "grad_norm": 0.2506459951400757, "learning_rate": 0.00018971735520427452, "loss": 0.3925, "step": 6320 }, { "epoch": 0.5083316603091749, "grad_norm": 0.25285083055496216, "learning_rate": 0.0001896832248001749, "loss": 0.4064, "step": 6330 }, { "epoch": 0.50913471190524, "grad_norm": 0.21155555546283722, "learning_rate": 0.0001896490410985875, "loss": 0.3921, "step": 6340 }, { "epoch": 0.5099377635013049, "grad_norm": 0.25344687700271606, "learning_rate": 0.00018961480412102671, "loss": 0.3948, "step": 6350 }, { "epoch": 0.51074081509737, "grad_norm": 0.20959046483039856, "learning_rate": 0.00018958051388904038, "loss": 0.4001, "step": 6360 }, { "epoch": 0.5115438666934351, "grad_norm": 0.22910785675048828, "learning_rate": 0.0001895461704242099, "loss": 0.3841, "step": 6370 }, { "epoch": 0.5123469182895001, "grad_norm": 0.27860304713249207, "learning_rate": 0.00018951177374815025, "loss": 0.4101, "step": 6380 }, { "epoch": 0.5131499698855652, "grad_norm": 0.2752937376499176, "learning_rate": 0.00018947732388250975, "loss": 0.4066, "step": 6390 }, { "epoch": 0.5139530214816302, "grad_norm": 0.2503402829170227, "learning_rate": 0.00018944282084897037, "loss": 0.3775, "step": 6400 }, { "epoch": 0.5147560730776952, "grad_norm": 0.21920950710773468, "learning_rate": 0.0001894082646692474, "loss": 0.4262, "step": 6410 }, { "epoch": 0.5155591246737603, "grad_norm": 0.2170015126466751, "learning_rate": 0.0001893736553650896, "loss": 0.4377, "step": 6420 }, { "epoch": 0.5163621762698254, "grad_norm": 0.24093946814537048, "learning_rate": 0.0001893389929582792, "loss": 0.3952, "step": 6430 }, { "epoch": 0.5171652278658904, "grad_norm": 0.22773709893226624, "learning_rate": 0.00018930427747063192, "loss": 0.3796, "step": 6440 }, { "epoch": 0.5179682794619554, "grad_norm": 0.21073327958583832, "learning_rate": 0.00018926950892399674, "loss": 0.4029, "step": 6450 }, { "epoch": 0.5187713310580204, "grad_norm": 0.24542798101902008, "learning_rate": 0.00018923468734025612, "loss": 0.4081, "step": 6460 }, { "epoch": 0.5195743826540855, "grad_norm": 0.24575842916965485, "learning_rate": 0.0001891998127413259, "loss": 0.4126, "step": 6470 }, { "epoch": 0.5203774342501506, "grad_norm": 0.1975177675485611, "learning_rate": 0.0001891648851491553, "loss": 0.3849, "step": 6480 }, { "epoch": 0.5211804858462156, "grad_norm": 0.19012582302093506, "learning_rate": 0.0001891299045857268, "loss": 0.3961, "step": 6490 }, { "epoch": 0.5219835374422807, "grad_norm": 0.22669987380504608, "learning_rate": 0.00018909487107305633, "loss": 0.4463, "step": 6500 }, { "epoch": 0.5227865890383457, "grad_norm": 0.2705063819885254, "learning_rate": 0.00018905978463319307, "loss": 0.395, "step": 6510 }, { "epoch": 0.5235896406344107, "grad_norm": 0.18926501274108887, "learning_rate": 0.00018902464528821963, "loss": 0.4151, "step": 6520 }, { "epoch": 0.5243926922304758, "grad_norm": 0.2494933307170868, "learning_rate": 0.0001889894530602517, "loss": 0.4048, "step": 6530 }, { "epoch": 0.5251957438265409, "grad_norm": 0.2140100747346878, "learning_rate": 0.00018895420797143849, "loss": 0.39, "step": 6540 }, { "epoch": 0.5259987954226059, "grad_norm": 0.24057279527187347, "learning_rate": 0.00018891891004396225, "loss": 0.3804, "step": 6550 }, { "epoch": 0.5268018470186709, "grad_norm": 0.21073925495147705, "learning_rate": 0.00018888355930003872, "loss": 0.4176, "step": 6560 }, { "epoch": 0.527604898614736, "grad_norm": 0.20677316188812256, "learning_rate": 0.00018884815576191672, "loss": 0.3988, "step": 6570 }, { "epoch": 0.528407950210801, "grad_norm": 0.2978564202785492, "learning_rate": 0.0001888126994518784, "loss": 0.419, "step": 6580 }, { "epoch": 0.5292110018068661, "grad_norm": 0.21007847785949707, "learning_rate": 0.00018877719039223895, "loss": 0.3966, "step": 6590 }, { "epoch": 0.5300140534029312, "grad_norm": 0.2466752827167511, "learning_rate": 0.00018874162860534698, "loss": 0.428, "step": 6600 }, { "epoch": 0.5308171049989961, "grad_norm": 0.27010902762413025, "learning_rate": 0.00018870601411358415, "loss": 0.4061, "step": 6610 }, { "epoch": 0.5316201565950612, "grad_norm": 0.2642492651939392, "learning_rate": 0.0001886703469393653, "loss": 0.3881, "step": 6620 }, { "epoch": 0.5324232081911263, "grad_norm": 0.17556816339492798, "learning_rate": 0.00018863462710513854, "loss": 0.3709, "step": 6630 }, { "epoch": 0.5332262597871913, "grad_norm": 0.24711012840270996, "learning_rate": 0.0001885988546333849, "loss": 0.4011, "step": 6640 }, { "epoch": 0.5340293113832564, "grad_norm": 0.19060684740543365, "learning_rate": 0.00018856302954661875, "loss": 0.3831, "step": 6650 }, { "epoch": 0.5348323629793215, "grad_norm": 0.2197161167860031, "learning_rate": 0.00018852715186738747, "loss": 0.4217, "step": 6660 }, { "epoch": 0.5356354145753864, "grad_norm": 0.20608733594417572, "learning_rate": 0.0001884912216182716, "loss": 0.3887, "step": 6670 }, { "epoch": 0.5364384661714515, "grad_norm": 0.16759470105171204, "learning_rate": 0.00018845523882188476, "loss": 0.3636, "step": 6680 }, { "epoch": 0.5372415177675166, "grad_norm": 0.22863717377185822, "learning_rate": 0.00018841920350087351, "loss": 0.3882, "step": 6690 }, { "epoch": 0.5380445693635816, "grad_norm": 0.21555203199386597, "learning_rate": 0.00018838311567791769, "loss": 0.4157, "step": 6700 }, { "epoch": 0.5388476209596467, "grad_norm": 0.2482389360666275, "learning_rate": 0.00018834697537573, "loss": 0.4051, "step": 6710 }, { "epoch": 0.5396506725557118, "grad_norm": 0.22927753627300262, "learning_rate": 0.0001883107826170562, "loss": 0.3821, "step": 6720 }, { "epoch": 0.5404537241517767, "grad_norm": 0.2562754154205322, "learning_rate": 0.00018827453742467518, "loss": 0.3922, "step": 6730 }, { "epoch": 0.5412567757478418, "grad_norm": 0.20865361392498016, "learning_rate": 0.0001882382398213987, "loss": 0.4098, "step": 6740 }, { "epoch": 0.5420598273439069, "grad_norm": 0.25448715686798096, "learning_rate": 0.00018820188983007164, "loss": 0.3759, "step": 6750 }, { "epoch": 0.5428628789399719, "grad_norm": 0.19900259375572205, "learning_rate": 0.00018816548747357164, "loss": 0.4099, "step": 6760 }, { "epoch": 0.543665930536037, "grad_norm": 0.22037282586097717, "learning_rate": 0.0001881290327748095, "loss": 0.3893, "step": 6770 }, { "epoch": 0.544468982132102, "grad_norm": 0.24691277742385864, "learning_rate": 0.0001880925257567289, "loss": 0.4136, "step": 6780 }, { "epoch": 0.545272033728167, "grad_norm": 0.23712846636772156, "learning_rate": 0.00018805596644230635, "loss": 0.3825, "step": 6790 }, { "epoch": 0.5460750853242321, "grad_norm": 0.21361668407917023, "learning_rate": 0.00018801935485455152, "loss": 0.4021, "step": 6800 }, { "epoch": 0.5468781369202971, "grad_norm": 0.22351667284965515, "learning_rate": 0.00018798269101650666, "loss": 0.3843, "step": 6810 }, { "epoch": 0.5476811885163622, "grad_norm": 0.24763770401477814, "learning_rate": 0.00018794597495124717, "loss": 0.4188, "step": 6820 }, { "epoch": 0.5484842401124272, "grad_norm": 0.19601532816886902, "learning_rate": 0.00018790920668188116, "loss": 0.3941, "step": 6830 }, { "epoch": 0.5492872917084922, "grad_norm": 0.22133763134479523, "learning_rate": 0.00018787238623154965, "loss": 0.3802, "step": 6840 }, { "epoch": 0.5500903433045573, "grad_norm": 0.24758881330490112, "learning_rate": 0.0001878355136234266, "loss": 0.3841, "step": 6850 }, { "epoch": 0.5508933949006224, "grad_norm": 0.26170071959495544, "learning_rate": 0.00018779858888071855, "loss": 0.3961, "step": 6860 }, { "epoch": 0.5516964464966874, "grad_norm": 0.21910567581653595, "learning_rate": 0.0001877616120266651, "loss": 0.4147, "step": 6870 }, { "epoch": 0.5524994980927525, "grad_norm": 0.20794297754764557, "learning_rate": 0.00018772458308453856, "loss": 0.4006, "step": 6880 }, { "epoch": 0.5533025496888175, "grad_norm": 0.16659502685070038, "learning_rate": 0.000187687502077644, "loss": 0.3912, "step": 6890 }, { "epoch": 0.5541056012848825, "grad_norm": 0.22397157549858093, "learning_rate": 0.00018765036902931918, "loss": 0.4058, "step": 6900 }, { "epoch": 0.5549086528809476, "grad_norm": 0.22604987025260925, "learning_rate": 0.00018761318396293478, "loss": 0.4019, "step": 6910 }, { "epoch": 0.5557117044770127, "grad_norm": 0.23223693668842316, "learning_rate": 0.0001875759469018942, "loss": 0.3931, "step": 6920 }, { "epoch": 0.5565147560730777, "grad_norm": 0.259184330701828, "learning_rate": 0.0001875386578696334, "loss": 0.3725, "step": 6930 }, { "epoch": 0.5573178076691427, "grad_norm": 0.2349756509065628, "learning_rate": 0.00018750131688962125, "loss": 0.4133, "step": 6940 }, { "epoch": 0.5581208592652078, "grad_norm": 0.23266004025936127, "learning_rate": 0.00018746392398535914, "loss": 0.3949, "step": 6950 }, { "epoch": 0.5589239108612728, "grad_norm": 0.256012499332428, "learning_rate": 0.0001874264791803813, "loss": 0.3983, "step": 6960 }, { "epoch": 0.5597269624573379, "grad_norm": 0.2249540537595749, "learning_rate": 0.00018738898249825447, "loss": 0.4226, "step": 6970 }, { "epoch": 0.560530014053403, "grad_norm": 0.19633103907108307, "learning_rate": 0.00018735143396257818, "loss": 0.3938, "step": 6980 }, { "epoch": 0.5613330656494679, "grad_norm": 0.25138577818870544, "learning_rate": 0.00018731383359698448, "loss": 0.3674, "step": 6990 }, { "epoch": 0.562136117245533, "grad_norm": 0.20488348603248596, "learning_rate": 0.00018727618142513815, "loss": 0.4117, "step": 7000 }, { "epoch": 0.5629391688415981, "grad_norm": 0.2229461818933487, "learning_rate": 0.00018723847747073648, "loss": 0.4036, "step": 7010 }, { "epoch": 0.5637422204376631, "grad_norm": 0.20761367678642273, "learning_rate": 0.0001872007217575094, "loss": 0.4085, "step": 7020 }, { "epoch": 0.5645452720337282, "grad_norm": 0.26586493849754333, "learning_rate": 0.00018716291430921937, "loss": 0.395, "step": 7030 }, { "epoch": 0.5653483236297933, "grad_norm": 0.2434822916984558, "learning_rate": 0.0001871250551496615, "loss": 0.3941, "step": 7040 }, { "epoch": 0.5661513752258582, "grad_norm": 0.20937879383563995, "learning_rate": 0.00018708714430266336, "loss": 0.3877, "step": 7050 }, { "epoch": 0.5669544268219233, "grad_norm": 0.18699012696743011, "learning_rate": 0.00018704918179208508, "loss": 0.3953, "step": 7060 }, { "epoch": 0.5677574784179884, "grad_norm": 0.24672946333885193, "learning_rate": 0.00018701116764181932, "loss": 0.392, "step": 7070 }, { "epoch": 0.5685605300140534, "grad_norm": 0.2080293595790863, "learning_rate": 0.00018697310187579125, "loss": 0.4002, "step": 7080 }, { "epoch": 0.5693635816101185, "grad_norm": 0.19961167871952057, "learning_rate": 0.00018693498451795843, "loss": 0.3589, "step": 7090 }, { "epoch": 0.5701666332061835, "grad_norm": 0.22533251345157623, "learning_rate": 0.00018689681559231106, "loss": 0.3609, "step": 7100 }, { "epoch": 0.5709696848022485, "grad_norm": 0.2381286919116974, "learning_rate": 0.00018685859512287162, "loss": 0.3785, "step": 7110 }, { "epoch": 0.5717727363983136, "grad_norm": 0.2534962296485901, "learning_rate": 0.00018682032313369515, "loss": 0.3891, "step": 7120 }, { "epoch": 0.5725757879943787, "grad_norm": 0.17662376165390015, "learning_rate": 0.0001867819996488691, "loss": 0.376, "step": 7130 }, { "epoch": 0.5733788395904437, "grad_norm": 0.2250376045703888, "learning_rate": 0.00018674362469251322, "loss": 0.3943, "step": 7140 }, { "epoch": 0.5741818911865088, "grad_norm": 0.1981520652770996, "learning_rate": 0.00018670519828877983, "loss": 0.3905, "step": 7150 }, { "epoch": 0.5749849427825737, "grad_norm": 0.29976144433021545, "learning_rate": 0.00018666672046185352, "loss": 0.3894, "step": 7160 }, { "epoch": 0.5757879943786388, "grad_norm": 0.21835051476955414, "learning_rate": 0.00018662819123595124, "loss": 0.4124, "step": 7170 }, { "epoch": 0.5765910459747039, "grad_norm": 0.2113235890865326, "learning_rate": 0.00018658961063532228, "loss": 0.4092, "step": 7180 }, { "epoch": 0.5773940975707689, "grad_norm": 0.2419070303440094, "learning_rate": 0.0001865509786842484, "loss": 0.4029, "step": 7190 }, { "epoch": 0.578197149166834, "grad_norm": 0.23955580592155457, "learning_rate": 0.00018651229540704348, "loss": 0.3854, "step": 7200 }, { "epoch": 0.579000200762899, "grad_norm": 0.20884378254413605, "learning_rate": 0.00018647356082805383, "loss": 0.4092, "step": 7210 }, { "epoch": 0.579803252358964, "grad_norm": 0.2025589644908905, "learning_rate": 0.00018643477497165805, "loss": 0.398, "step": 7220 }, { "epoch": 0.5806063039550291, "grad_norm": 0.281221479177475, "learning_rate": 0.00018639593786226697, "loss": 0.4007, "step": 7230 }, { "epoch": 0.5814093555510942, "grad_norm": 0.2180633693933487, "learning_rate": 0.00018635704952432364, "loss": 0.3854, "step": 7240 }, { "epoch": 0.5822124071471592, "grad_norm": 0.23982250690460205, "learning_rate": 0.00018631810998230344, "loss": 0.3819, "step": 7250 }, { "epoch": 0.5830154587432242, "grad_norm": 0.21607322990894318, "learning_rate": 0.00018627911926071396, "loss": 0.4199, "step": 7260 }, { "epoch": 0.5838185103392893, "grad_norm": 0.28813356161117554, "learning_rate": 0.00018624007738409493, "loss": 0.3964, "step": 7270 }, { "epoch": 0.5846215619353543, "grad_norm": 0.20143039524555206, "learning_rate": 0.00018620098437701837, "loss": 0.3873, "step": 7280 }, { "epoch": 0.5854246135314194, "grad_norm": 0.26070600748062134, "learning_rate": 0.00018616184026408835, "loss": 0.4156, "step": 7290 }, { "epoch": 0.5862276651274845, "grad_norm": 0.2765134274959564, "learning_rate": 0.00018612264506994125, "loss": 0.3839, "step": 7300 }, { "epoch": 0.5870307167235495, "grad_norm": 0.2367159128189087, "learning_rate": 0.0001860833988192456, "loss": 0.3948, "step": 7310 }, { "epoch": 0.5878337683196145, "grad_norm": 0.2605177164077759, "learning_rate": 0.00018604410153670185, "loss": 0.3992, "step": 7320 }, { "epoch": 0.5886368199156796, "grad_norm": 0.2558344602584839, "learning_rate": 0.00018600475324704284, "loss": 0.4076, "step": 7330 }, { "epoch": 0.5894398715117446, "grad_norm": 0.2647494971752167, "learning_rate": 0.00018596535397503335, "loss": 0.4222, "step": 7340 }, { "epoch": 0.5902429231078097, "grad_norm": 0.1976073831319809, "learning_rate": 0.00018592590374547028, "loss": 0.3954, "step": 7350 }, { "epoch": 0.5910459747038748, "grad_norm": 0.343650221824646, "learning_rate": 0.00018588640258318263, "loss": 0.4063, "step": 7360 }, { "epoch": 0.5918490262999397, "grad_norm": 0.2675413489341736, "learning_rate": 0.00018584685051303149, "loss": 0.4073, "step": 7370 }, { "epoch": 0.5926520778960048, "grad_norm": 0.24980364739894867, "learning_rate": 0.0001858072475599098, "loss": 0.4051, "step": 7380 }, { "epoch": 0.5934551294920699, "grad_norm": 0.22031521797180176, "learning_rate": 0.00018576759374874276, "loss": 0.3829, "step": 7390 }, { "epoch": 0.5942581810881349, "grad_norm": 0.20192797482013702, "learning_rate": 0.00018572788910448747, "loss": 0.3955, "step": 7400 }, { "epoch": 0.5950612326842, "grad_norm": 0.2092856764793396, "learning_rate": 0.000185688133652133, "loss": 0.3952, "step": 7410 }, { "epoch": 0.595864284280265, "grad_norm": 0.2662719190120697, "learning_rate": 0.00018564832741670045, "loss": 0.4047, "step": 7420 }, { "epoch": 0.59666733587633, "grad_norm": 0.2472047060728073, "learning_rate": 0.00018560847042324287, "loss": 0.4, "step": 7430 }, { "epoch": 0.5974703874723951, "grad_norm": 0.1788444221019745, "learning_rate": 0.00018556856269684522, "loss": 0.3771, "step": 7440 }, { "epoch": 0.5982734390684602, "grad_norm": 0.20003849267959595, "learning_rate": 0.00018552860426262444, "loss": 0.3937, "step": 7450 }, { "epoch": 0.5990764906645252, "grad_norm": 0.19915752112865448, "learning_rate": 0.0001854885951457294, "loss": 0.3844, "step": 7460 }, { "epoch": 0.5998795422605903, "grad_norm": 0.21011032164096832, "learning_rate": 0.00018544853537134072, "loss": 0.4033, "step": 7470 }, { "epoch": 0.6006825938566553, "grad_norm": 0.22801423072814941, "learning_rate": 0.0001854084249646711, "loss": 0.4057, "step": 7480 }, { "epoch": 0.6014856454527203, "grad_norm": 0.22685106098651886, "learning_rate": 0.000185368263950965, "loss": 0.3635, "step": 7490 }, { "epoch": 0.6022886970487854, "grad_norm": 0.2556551992893219, "learning_rate": 0.00018532805235549877, "loss": 0.4097, "step": 7500 }, { "epoch": 0.6030917486448504, "grad_norm": 0.23423558473587036, "learning_rate": 0.0001852877902035805, "loss": 0.3959, "step": 7510 }, { "epoch": 0.6038948002409155, "grad_norm": 0.20022714138031006, "learning_rate": 0.0001852474775205503, "loss": 0.3868, "step": 7520 }, { "epoch": 0.6046978518369805, "grad_norm": 0.29346564412117004, "learning_rate": 0.00018520711433177983, "loss": 0.4352, "step": 7530 }, { "epoch": 0.6055009034330455, "grad_norm": 0.23955214023590088, "learning_rate": 0.00018516670066267275, "loss": 0.4296, "step": 7540 }, { "epoch": 0.6063039550291106, "grad_norm": 0.2470638006925583, "learning_rate": 0.00018512623653866434, "loss": 0.3966, "step": 7550 }, { "epoch": 0.6071070066251757, "grad_norm": 0.207452654838562, "learning_rate": 0.00018508572198522175, "loss": 0.403, "step": 7560 }, { "epoch": 0.6079100582212407, "grad_norm": 0.22814732789993286, "learning_rate": 0.00018504515702784383, "loss": 0.4272, "step": 7570 }, { "epoch": 0.6087131098173058, "grad_norm": 0.25176751613616943, "learning_rate": 0.00018500454169206109, "loss": 0.404, "step": 7580 }, { "epoch": 0.6095161614133708, "grad_norm": 0.2721332013607025, "learning_rate": 0.00018496387600343582, "loss": 0.3929, "step": 7590 }, { "epoch": 0.6103192130094358, "grad_norm": 0.20990660786628723, "learning_rate": 0.00018492315998756197, "loss": 0.3876, "step": 7600 }, { "epoch": 0.6111222646055009, "grad_norm": 0.27433717250823975, "learning_rate": 0.0001848823936700652, "loss": 0.3889, "step": 7610 }, { "epoch": 0.611925316201566, "grad_norm": 0.28143811225891113, "learning_rate": 0.00018484157707660277, "loss": 0.4041, "step": 7620 }, { "epoch": 0.612728367797631, "grad_norm": 0.2882578670978546, "learning_rate": 0.00018480071023286364, "loss": 0.4003, "step": 7630 }, { "epoch": 0.613531419393696, "grad_norm": 0.24294039607048035, "learning_rate": 0.0001847597931645684, "loss": 0.3877, "step": 7640 }, { "epoch": 0.6143344709897611, "grad_norm": 0.21386289596557617, "learning_rate": 0.00018471882589746915, "loss": 0.4067, "step": 7650 }, { "epoch": 0.6151375225858261, "grad_norm": 0.26907631754875183, "learning_rate": 0.0001846778084573497, "loss": 0.3908, "step": 7660 }, { "epoch": 0.6159405741818912, "grad_norm": 0.30849769711494446, "learning_rate": 0.00018463674087002544, "loss": 0.4186, "step": 7670 }, { "epoch": 0.6167436257779563, "grad_norm": 0.21560145914554596, "learning_rate": 0.00018459562316134316, "loss": 0.3699, "step": 7680 }, { "epoch": 0.6175466773740212, "grad_norm": 0.2738913595676422, "learning_rate": 0.0001845544553571814, "loss": 0.4157, "step": 7690 }, { "epoch": 0.6183497289700863, "grad_norm": 0.2101289927959442, "learning_rate": 0.00018451323748345011, "loss": 0.3849, "step": 7700 }, { "epoch": 0.6191527805661514, "grad_norm": 0.23185765743255615, "learning_rate": 0.00018447196956609076, "loss": 0.4177, "step": 7710 }, { "epoch": 0.6199558321622164, "grad_norm": 0.22494180500507355, "learning_rate": 0.00018443065163107638, "loss": 0.3879, "step": 7720 }, { "epoch": 0.6207588837582815, "grad_norm": 0.18990303575992584, "learning_rate": 0.0001843892837044114, "loss": 0.389, "step": 7730 }, { "epoch": 0.6215619353543466, "grad_norm": 0.2365971952676773, "learning_rate": 0.00018434786581213178, "loss": 0.4131, "step": 7740 }, { "epoch": 0.6223649869504115, "grad_norm": 0.2155819535255432, "learning_rate": 0.0001843063979803049, "loss": 0.4027, "step": 7750 }, { "epoch": 0.6231680385464766, "grad_norm": 0.19067560136318207, "learning_rate": 0.00018426488023502952, "loss": 0.3683, "step": 7760 }, { "epoch": 0.6239710901425417, "grad_norm": 0.24813027679920197, "learning_rate": 0.0001842233126024359, "loss": 0.3764, "step": 7770 }, { "epoch": 0.6247741417386067, "grad_norm": 0.23483312129974365, "learning_rate": 0.00018418169510868568, "loss": 0.4103, "step": 7780 }, { "epoch": 0.6255771933346718, "grad_norm": 0.19776403903961182, "learning_rate": 0.0001841400277799718, "loss": 0.3865, "step": 7790 }, { "epoch": 0.6263802449307369, "grad_norm": 0.22929096221923828, "learning_rate": 0.0001840983106425187, "loss": 0.3826, "step": 7800 }, { "epoch": 0.6271832965268018, "grad_norm": 0.20418544113636017, "learning_rate": 0.00018405654372258203, "loss": 0.4171, "step": 7810 }, { "epoch": 0.6279863481228669, "grad_norm": 0.20898815989494324, "learning_rate": 0.00018401472704644887, "loss": 0.4137, "step": 7820 }, { "epoch": 0.628789399718932, "grad_norm": 0.2276865541934967, "learning_rate": 0.0001839728606404376, "loss": 0.3975, "step": 7830 }, { "epoch": 0.629592451314997, "grad_norm": 0.2473233938217163, "learning_rate": 0.00018393094453089784, "loss": 0.4389, "step": 7840 }, { "epoch": 0.6303955029110621, "grad_norm": 0.24145905673503876, "learning_rate": 0.0001838889787442106, "loss": 0.412, "step": 7850 }, { "epoch": 0.631198554507127, "grad_norm": 0.2835339903831482, "learning_rate": 0.00018384696330678798, "loss": 0.4159, "step": 7860 }, { "epoch": 0.6320016061031921, "grad_norm": 0.21569952368736267, "learning_rate": 0.00018380489824507349, "loss": 0.388, "step": 7870 }, { "epoch": 0.6328046576992572, "grad_norm": 0.26097697019577026, "learning_rate": 0.00018376278358554188, "loss": 0.3825, "step": 7880 }, { "epoch": 0.6336077092953222, "grad_norm": 0.2447638213634491, "learning_rate": 0.00018372061935469897, "loss": 0.3901, "step": 7890 }, { "epoch": 0.6344107608913873, "grad_norm": 0.17887325584888458, "learning_rate": 0.00018367840557908193, "loss": 0.3987, "step": 7900 }, { "epoch": 0.6352138124874523, "grad_norm": 0.20007804036140442, "learning_rate": 0.00018363614228525898, "loss": 0.3966, "step": 7910 }, { "epoch": 0.6360168640835173, "grad_norm": 0.23407109081745148, "learning_rate": 0.00018359382949982963, "loss": 0.3796, "step": 7920 }, { "epoch": 0.6368199156795824, "grad_norm": 0.19814062118530273, "learning_rate": 0.00018355146724942446, "loss": 0.3615, "step": 7930 }, { "epoch": 0.6376229672756475, "grad_norm": 0.25817543268203735, "learning_rate": 0.0001835090555607052, "loss": 0.3952, "step": 7940 }, { "epoch": 0.6384260188717125, "grad_norm": 0.24975711107254028, "learning_rate": 0.00018346659446036466, "loss": 0.3905, "step": 7950 }, { "epoch": 0.6392290704677775, "grad_norm": 0.21831083297729492, "learning_rate": 0.00018342408397512686, "loss": 0.3996, "step": 7960 }, { "epoch": 0.6400321220638426, "grad_norm": 0.216525599360466, "learning_rate": 0.0001833815241317468, "loss": 0.4222, "step": 7970 }, { "epoch": 0.6408351736599076, "grad_norm": 0.24411660432815552, "learning_rate": 0.00018333891495701056, "loss": 0.3697, "step": 7980 }, { "epoch": 0.6416382252559727, "grad_norm": 0.43174704909324646, "learning_rate": 0.0001832962564777353, "loss": 0.3883, "step": 7990 }, { "epoch": 0.6424412768520378, "grad_norm": 0.24613375961780548, "learning_rate": 0.00018325354872076913, "loss": 0.4084, "step": 8000 }, { "epoch": 0.6432443284481028, "grad_norm": 0.24125936627388, "learning_rate": 0.00018321079171299134, "loss": 0.4064, "step": 8010 }, { "epoch": 0.6440473800441678, "grad_norm": 0.2194608598947525, "learning_rate": 0.00018316798548131204, "loss": 0.3935, "step": 8020 }, { "epoch": 0.6448504316402329, "grad_norm": 0.21544873714447021, "learning_rate": 0.00018312513005267243, "loss": 0.3739, "step": 8030 }, { "epoch": 0.6456534832362979, "grad_norm": 0.28098297119140625, "learning_rate": 0.0001830822254540446, "loss": 0.4281, "step": 8040 }, { "epoch": 0.646456534832363, "grad_norm": 0.1965770721435547, "learning_rate": 0.00018303927171243165, "loss": 0.3872, "step": 8050 }, { "epoch": 0.6472595864284281, "grad_norm": 0.20592904090881348, "learning_rate": 0.00018299626885486756, "loss": 0.3803, "step": 8060 }, { "epoch": 0.648062638024493, "grad_norm": 0.21880042552947998, "learning_rate": 0.00018295321690841727, "loss": 0.3808, "step": 8070 }, { "epoch": 0.6488656896205581, "grad_norm": 0.26169222593307495, "learning_rate": 0.0001829101159001766, "loss": 0.4108, "step": 8080 }, { "epoch": 0.6496687412166232, "grad_norm": 0.21235030889511108, "learning_rate": 0.0001828669658572722, "loss": 0.4088, "step": 8090 }, { "epoch": 0.6504717928126882, "grad_norm": 0.22417037189006805, "learning_rate": 0.00018282376680686164, "loss": 0.3694, "step": 8100 }, { "epoch": 0.6512748444087533, "grad_norm": 0.2293388992547989, "learning_rate": 0.0001827805187761333, "loss": 0.3979, "step": 8110 }, { "epoch": 0.6520778960048184, "grad_norm": 0.225070983171463, "learning_rate": 0.00018273722179230645, "loss": 0.4, "step": 8120 }, { "epoch": 0.6528809476008833, "grad_norm": 0.2556016743183136, "learning_rate": 0.00018269387588263104, "loss": 0.3966, "step": 8130 }, { "epoch": 0.6536839991969484, "grad_norm": 0.26142725348472595, "learning_rate": 0.00018265048107438795, "loss": 0.4031, "step": 8140 }, { "epoch": 0.6544870507930135, "grad_norm": 0.24790245294570923, "learning_rate": 0.0001826070373948888, "loss": 0.3918, "step": 8150 }, { "epoch": 0.6552901023890785, "grad_norm": 0.2151559442281723, "learning_rate": 0.00018256354487147588, "loss": 0.3961, "step": 8160 }, { "epoch": 0.6560931539851436, "grad_norm": 0.22399644553661346, "learning_rate": 0.00018252000353152233, "loss": 0.3617, "step": 8170 }, { "epoch": 0.6568962055812086, "grad_norm": 0.18134842813014984, "learning_rate": 0.00018247641340243196, "loss": 0.3774, "step": 8180 }, { "epoch": 0.6576992571772736, "grad_norm": 0.2543718218803406, "learning_rate": 0.00018243277451163932, "loss": 0.3798, "step": 8190 }, { "epoch": 0.6585023087733387, "grad_norm": 0.19638460874557495, "learning_rate": 0.0001823890868866096, "loss": 0.3865, "step": 8200 }, { "epoch": 0.6593053603694037, "grad_norm": 0.2515793740749359, "learning_rate": 0.00018234535055483867, "loss": 0.3877, "step": 8210 }, { "epoch": 0.6601084119654688, "grad_norm": 0.20921385288238525, "learning_rate": 0.00018230156554385315, "loss": 0.4099, "step": 8220 }, { "epoch": 0.6609114635615339, "grad_norm": 0.2757245898246765, "learning_rate": 0.00018225773188121016, "loss": 0.4282, "step": 8230 }, { "epoch": 0.6617145151575988, "grad_norm": 0.2325657159090042, "learning_rate": 0.0001822138495944975, "loss": 0.3826, "step": 8240 }, { "epoch": 0.6625175667536639, "grad_norm": 0.2603413760662079, "learning_rate": 0.00018216991871133366, "loss": 0.4166, "step": 8250 }, { "epoch": 0.663320618349729, "grad_norm": 0.2849125266075134, "learning_rate": 0.00018212593925936752, "loss": 0.3777, "step": 8260 }, { "epoch": 0.664123669945794, "grad_norm": 0.2379753142595291, "learning_rate": 0.00018208191126627867, "loss": 0.4037, "step": 8270 }, { "epoch": 0.6649267215418591, "grad_norm": 0.21099522709846497, "learning_rate": 0.00018203783475977727, "loss": 0.3817, "step": 8280 }, { "epoch": 0.6657297731379241, "grad_norm": 0.21475058794021606, "learning_rate": 0.00018199370976760393, "loss": 0.4042, "step": 8290 }, { "epoch": 0.6665328247339891, "grad_norm": 0.25640439987182617, "learning_rate": 0.0001819495363175298, "loss": 0.4076, "step": 8300 }, { "epoch": 0.6673358763300542, "grad_norm": 0.19335483014583588, "learning_rate": 0.00018190531443735652, "loss": 0.3829, "step": 8310 }, { "epoch": 0.6681389279261193, "grad_norm": 0.21216365694999695, "learning_rate": 0.00018186104415491628, "loss": 0.4128, "step": 8320 }, { "epoch": 0.6689419795221843, "grad_norm": 0.27315762639045715, "learning_rate": 0.00018181672549807162, "loss": 0.377, "step": 8330 }, { "epoch": 0.6697450311182493, "grad_norm": 0.2585170269012451, "learning_rate": 0.00018177235849471565, "loss": 0.4003, "step": 8340 }, { "epoch": 0.6705480827143144, "grad_norm": 0.23191003501415253, "learning_rate": 0.0001817279431727718, "loss": 0.4019, "step": 8350 }, { "epoch": 0.6713511343103794, "grad_norm": 0.2946956157684326, "learning_rate": 0.00018168347956019394, "loss": 0.4138, "step": 8360 }, { "epoch": 0.6721541859064445, "grad_norm": 0.22646692395210266, "learning_rate": 0.00018163896768496642, "loss": 0.3942, "step": 8370 }, { "epoch": 0.6729572375025096, "grad_norm": 0.2242247760295868, "learning_rate": 0.0001815944075751038, "loss": 0.3908, "step": 8380 }, { "epoch": 0.6737602890985745, "grad_norm": 0.25752946734428406, "learning_rate": 0.00018154979925865113, "loss": 0.3938, "step": 8390 }, { "epoch": 0.6745633406946396, "grad_norm": 0.23940621316432953, "learning_rate": 0.00018150514276368381, "loss": 0.417, "step": 8400 }, { "epoch": 0.6753663922907047, "grad_norm": 0.22139179706573486, "learning_rate": 0.00018146043811830744, "loss": 0.3915, "step": 8410 }, { "epoch": 0.6761694438867697, "grad_norm": 0.21246902644634247, "learning_rate": 0.00018141568535065802, "loss": 0.4032, "step": 8420 }, { "epoch": 0.6769724954828348, "grad_norm": 0.28037089109420776, "learning_rate": 0.00018137088448890186, "loss": 0.3939, "step": 8430 }, { "epoch": 0.6777755470788999, "grad_norm": 0.20507533848285675, "learning_rate": 0.00018132603556123543, "loss": 0.3985, "step": 8440 }, { "epoch": 0.6785785986749648, "grad_norm": 0.21536138653755188, "learning_rate": 0.00018128113859588552, "loss": 0.4109, "step": 8450 }, { "epoch": 0.6793816502710299, "grad_norm": 0.27040666341781616, "learning_rate": 0.00018123619362110915, "loss": 0.3853, "step": 8460 }, { "epoch": 0.680184701867095, "grad_norm": 0.2892109751701355, "learning_rate": 0.00018119120066519362, "loss": 0.3929, "step": 8470 }, { "epoch": 0.68098775346316, "grad_norm": 0.20533247292041779, "learning_rate": 0.00018114615975645628, "loss": 0.4123, "step": 8480 }, { "epoch": 0.6817908050592251, "grad_norm": 0.22907011210918427, "learning_rate": 0.0001811010709232448, "loss": 0.3907, "step": 8490 }, { "epoch": 0.6825938566552902, "grad_norm": 0.21755269169807434, "learning_rate": 0.00018105593419393692, "loss": 0.3828, "step": 8500 }, { "epoch": 0.6833969082513551, "grad_norm": 0.21787667274475098, "learning_rate": 0.00018101074959694057, "loss": 0.3857, "step": 8510 }, { "epoch": 0.6841999598474202, "grad_norm": 0.2376081347465515, "learning_rate": 0.0001809655171606938, "loss": 0.3877, "step": 8520 }, { "epoch": 0.6850030114434853, "grad_norm": 0.25571152567863464, "learning_rate": 0.00018092023691366473, "loss": 0.416, "step": 8530 }, { "epoch": 0.6858060630395503, "grad_norm": 0.18120457231998444, "learning_rate": 0.00018087490888435168, "loss": 0.3961, "step": 8540 }, { "epoch": 0.6866091146356154, "grad_norm": 0.2685014307498932, "learning_rate": 0.00018082953310128293, "loss": 0.391, "step": 8550 }, { "epoch": 0.6874121662316803, "grad_norm": 0.23632293939590454, "learning_rate": 0.00018078410959301684, "loss": 0.4042, "step": 8560 }, { "epoch": 0.6882152178277454, "grad_norm": 0.2505054175853729, "learning_rate": 0.00018073863838814186, "loss": 0.3888, "step": 8570 }, { "epoch": 0.6890182694238105, "grad_norm": 0.22704572975635529, "learning_rate": 0.00018069311951527636, "loss": 0.3971, "step": 8580 }, { "epoch": 0.6898213210198755, "grad_norm": 0.22792857885360718, "learning_rate": 0.00018064755300306883, "loss": 0.3945, "step": 8590 }, { "epoch": 0.6906243726159406, "grad_norm": 0.2522415220737457, "learning_rate": 0.00018060193888019767, "loss": 0.3793, "step": 8600 }, { "epoch": 0.6914274242120056, "grad_norm": 0.23964688181877136, "learning_rate": 0.00018055627717537123, "loss": 0.3984, "step": 8610 }, { "epoch": 0.6922304758080706, "grad_norm": 0.1806684285402298, "learning_rate": 0.00018051056791732788, "loss": 0.38, "step": 8620 }, { "epoch": 0.6930335274041357, "grad_norm": 0.21252559125423431, "learning_rate": 0.00018046481113483584, "loss": 0.3754, "step": 8630 }, { "epoch": 0.6938365790002008, "grad_norm": 0.22577115893363953, "learning_rate": 0.0001804190068566933, "loss": 0.3737, "step": 8640 }, { "epoch": 0.6946396305962658, "grad_norm": 0.24711287021636963, "learning_rate": 0.00018037315511172831, "loss": 0.384, "step": 8650 }, { "epoch": 0.6954426821923309, "grad_norm": 0.21986767649650574, "learning_rate": 0.0001803272559287988, "loss": 0.3876, "step": 8660 }, { "epoch": 0.6962457337883959, "grad_norm": 0.23126177489757538, "learning_rate": 0.00018028130933679258, "loss": 0.3825, "step": 8670 }, { "epoch": 0.6970487853844609, "grad_norm": 0.24386760592460632, "learning_rate": 0.0001802353153646272, "loss": 0.3696, "step": 8680 }, { "epoch": 0.697851836980526, "grad_norm": 0.2332366704940796, "learning_rate": 0.0001801892740412502, "loss": 0.3963, "step": 8690 }, { "epoch": 0.6986548885765911, "grad_norm": 0.21498219668865204, "learning_rate": 0.00018014318539563882, "loss": 0.3731, "step": 8700 }, { "epoch": 0.6994579401726561, "grad_norm": 0.1991698294878006, "learning_rate": 0.0001800970494568, "loss": 0.3651, "step": 8710 }, { "epoch": 0.7002609917687211, "grad_norm": 0.21116411685943604, "learning_rate": 0.00018005086625377069, "loss": 0.3633, "step": 8720 }, { "epoch": 0.7010640433647862, "grad_norm": 0.20844218134880066, "learning_rate": 0.00018000463581561727, "loss": 0.3882, "step": 8730 }, { "epoch": 0.7018670949608512, "grad_norm": 0.18904000520706177, "learning_rate": 0.00017995835817143611, "loss": 0.3923, "step": 8740 }, { "epoch": 0.7026701465569163, "grad_norm": 0.21291761100292206, "learning_rate": 0.0001799120333503532, "loss": 0.3626, "step": 8750 }, { "epoch": 0.7034731981529814, "grad_norm": 0.22352240979671478, "learning_rate": 0.00017986566138152417, "loss": 0.3933, "step": 8760 }, { "epoch": 0.7042762497490463, "grad_norm": 0.2703261077404022, "learning_rate": 0.00017981924229413436, "loss": 0.4123, "step": 8770 }, { "epoch": 0.7050793013451114, "grad_norm": 0.23805803060531616, "learning_rate": 0.00017977277611739884, "loss": 0.412, "step": 8780 }, { "epoch": 0.7058823529411765, "grad_norm": 0.2864117920398712, "learning_rate": 0.00017972626288056221, "loss": 0.3926, "step": 8790 }, { "epoch": 0.7066854045372415, "grad_norm": 0.24868759512901306, "learning_rate": 0.00017967970261289871, "loss": 0.3994, "step": 8800 }, { "epoch": 0.7074884561333066, "grad_norm": 0.3698653280735016, "learning_rate": 0.00017963309534371223, "loss": 0.39, "step": 8810 }, { "epoch": 0.7082915077293717, "grad_norm": 0.22614340484142303, "learning_rate": 0.0001795864411023362, "loss": 0.4002, "step": 8820 }, { "epoch": 0.7090945593254366, "grad_norm": 0.21136240661144257, "learning_rate": 0.00017953973991813362, "loss": 0.3616, "step": 8830 }, { "epoch": 0.7098976109215017, "grad_norm": 0.1978643536567688, "learning_rate": 0.00017949299182049702, "loss": 0.3998, "step": 8840 }, { "epoch": 0.7107006625175668, "grad_norm": 0.23014165461063385, "learning_rate": 0.00017944619683884854, "loss": 0.3991, "step": 8850 }, { "epoch": 0.7115037141136318, "grad_norm": 0.25488704442977905, "learning_rate": 0.00017939935500263968, "loss": 0.4091, "step": 8860 }, { "epoch": 0.7123067657096969, "grad_norm": 0.28848153352737427, "learning_rate": 0.0001793524663413516, "loss": 0.4018, "step": 8870 }, { "epoch": 0.713109817305762, "grad_norm": 0.25005483627319336, "learning_rate": 0.00017930553088449478, "loss": 0.4327, "step": 8880 }, { "epoch": 0.7139128689018269, "grad_norm": 0.24231795966625214, "learning_rate": 0.0001792585486616092, "loss": 0.3943, "step": 8890 }, { "epoch": 0.714715920497892, "grad_norm": 0.2801241874694824, "learning_rate": 0.00017921151970226433, "loss": 0.385, "step": 8900 }, { "epoch": 0.715518972093957, "grad_norm": 0.28495243191719055, "learning_rate": 0.000179164444036059, "loss": 0.3803, "step": 8910 }, { "epoch": 0.7163220236900221, "grad_norm": 0.24258869886398315, "learning_rate": 0.00017911732169262145, "loss": 0.3831, "step": 8920 }, { "epoch": 0.7171250752860872, "grad_norm": 0.23114562034606934, "learning_rate": 0.00017907015270160928, "loss": 0.3844, "step": 8930 }, { "epoch": 0.7179281268821521, "grad_norm": 0.2252551168203354, "learning_rate": 0.00017902293709270945, "loss": 0.4117, "step": 8940 }, { "epoch": 0.7187311784782172, "grad_norm": 0.23264305293560028, "learning_rate": 0.00017897567489563834, "loss": 0.3887, "step": 8950 }, { "epoch": 0.7195342300742823, "grad_norm": 0.26272451877593994, "learning_rate": 0.0001789283661401415, "loss": 0.4071, "step": 8960 }, { "epoch": 0.7203372816703473, "grad_norm": 0.22586441040039062, "learning_rate": 0.00017888101085599396, "loss": 0.4004, "step": 8970 }, { "epoch": 0.7211403332664124, "grad_norm": 0.22940053045749664, "learning_rate": 0.0001788336090729999, "loss": 0.4016, "step": 8980 }, { "epoch": 0.7219433848624774, "grad_norm": 0.2689235508441925, "learning_rate": 0.00017878616082099276, "loss": 0.3826, "step": 8990 }, { "epoch": 0.7227464364585424, "grad_norm": 0.211382657289505, "learning_rate": 0.00017873866612983533, "loss": 0.4094, "step": 9000 }, { "epoch": 0.7235494880546075, "grad_norm": 0.21285992860794067, "learning_rate": 0.00017869112502941957, "loss": 0.3908, "step": 9010 }, { "epoch": 0.7243525396506726, "grad_norm": 0.2258191555738449, "learning_rate": 0.00017864353754966665, "loss": 0.3832, "step": 9020 }, { "epoch": 0.7251555912467376, "grad_norm": 0.23650678992271423, "learning_rate": 0.00017859590372052689, "loss": 0.4014, "step": 9030 }, { "epoch": 0.7259586428428026, "grad_norm": 0.30802032351493835, "learning_rate": 0.00017854822357197986, "loss": 0.3753, "step": 9040 }, { "epoch": 0.7267616944388677, "grad_norm": 0.23187221586704254, "learning_rate": 0.00017850049713403424, "loss": 0.3785, "step": 9050 }, { "epoch": 0.7275647460349327, "grad_norm": 0.20733876526355743, "learning_rate": 0.00017845272443672787, "loss": 0.3996, "step": 9060 }, { "epoch": 0.7283677976309978, "grad_norm": 0.28873687982559204, "learning_rate": 0.00017840490551012764, "loss": 0.3869, "step": 9070 }, { "epoch": 0.7291708492270629, "grad_norm": 0.19503140449523926, "learning_rate": 0.00017835704038432956, "loss": 0.3867, "step": 9080 }, { "epoch": 0.7299739008231279, "grad_norm": 0.2301810085773468, "learning_rate": 0.0001783091290894588, "loss": 0.4055, "step": 9090 }, { "epoch": 0.7307769524191929, "grad_norm": 0.19899088144302368, "learning_rate": 0.0001782611716556695, "loss": 0.3681, "step": 9100 }, { "epoch": 0.731580004015258, "grad_norm": 0.22330187261104584, "learning_rate": 0.0001782131681131448, "loss": 0.3941, "step": 9110 }, { "epoch": 0.732383055611323, "grad_norm": 0.2142290621995926, "learning_rate": 0.000178165118492097, "loss": 0.3714, "step": 9120 }, { "epoch": 0.7331861072073881, "grad_norm": 0.21483303606510162, "learning_rate": 0.0001781170228227673, "loss": 0.4084, "step": 9130 }, { "epoch": 0.7339891588034532, "grad_norm": 0.2543689012527466, "learning_rate": 0.0001780688811354259, "loss": 0.4139, "step": 9140 }, { "epoch": 0.7347922103995181, "grad_norm": 0.1980743408203125, "learning_rate": 0.00017802069346037192, "loss": 0.4266, "step": 9150 }, { "epoch": 0.7355952619955832, "grad_norm": 0.33610841631889343, "learning_rate": 0.00017797245982793353, "loss": 0.4474, "step": 9160 }, { "epoch": 0.7363983135916483, "grad_norm": 0.2761506736278534, "learning_rate": 0.00017792418026846773, "loss": 0.3953, "step": 9170 }, { "epoch": 0.7372013651877133, "grad_norm": 0.3072396516799927, "learning_rate": 0.00017787585481236046, "loss": 0.3999, "step": 9180 }, { "epoch": 0.7380044167837784, "grad_norm": 0.21221454441547394, "learning_rate": 0.00017782748349002658, "loss": 0.3834, "step": 9190 }, { "epoch": 0.7388074683798435, "grad_norm": 0.23131617903709412, "learning_rate": 0.00017777906633190973, "loss": 0.3906, "step": 9200 }, { "epoch": 0.7396105199759084, "grad_norm": 0.2026665061712265, "learning_rate": 0.00017773060336848245, "loss": 0.4, "step": 9210 }, { "epoch": 0.7404135715719735, "grad_norm": 0.31828537583351135, "learning_rate": 0.00017768209463024615, "loss": 0.4093, "step": 9220 }, { "epoch": 0.7412166231680386, "grad_norm": 0.2184494435787201, "learning_rate": 0.00017763354014773103, "loss": 0.3678, "step": 9230 }, { "epoch": 0.7420196747641036, "grad_norm": 0.20824220776557922, "learning_rate": 0.00017758493995149598, "loss": 0.3924, "step": 9240 }, { "epoch": 0.7428227263601687, "grad_norm": 0.2117159366607666, "learning_rate": 0.00017753629407212874, "loss": 0.4104, "step": 9250 }, { "epoch": 0.7436257779562336, "grad_norm": 0.2718915641307831, "learning_rate": 0.00017748760254024585, "loss": 0.3942, "step": 9260 }, { "epoch": 0.7444288295522987, "grad_norm": 0.22768938541412354, "learning_rate": 0.00017743886538649256, "loss": 0.3868, "step": 9270 }, { "epoch": 0.7452318811483638, "grad_norm": 0.23124480247497559, "learning_rate": 0.00017739008264154276, "loss": 0.3746, "step": 9280 }, { "epoch": 0.7460349327444288, "grad_norm": 0.31417542695999146, "learning_rate": 0.00017734125433609902, "loss": 0.3806, "step": 9290 }, { "epoch": 0.7468379843404939, "grad_norm": 0.26060178875923157, "learning_rate": 0.00017729238050089277, "loss": 0.4061, "step": 9300 }, { "epoch": 0.747641035936559, "grad_norm": 0.23423592746257782, "learning_rate": 0.00017724346116668386, "loss": 0.3945, "step": 9310 }, { "epoch": 0.7484440875326239, "grad_norm": 0.21128147840499878, "learning_rate": 0.00017719449636426098, "loss": 0.3893, "step": 9320 }, { "epoch": 0.749247139128689, "grad_norm": 0.20448382198810577, "learning_rate": 0.0001771454861244413, "loss": 0.389, "step": 9330 }, { "epoch": 0.7500501907247541, "grad_norm": 0.275865375995636, "learning_rate": 0.00017709643047807058, "loss": 0.395, "step": 9340 }, { "epoch": 0.7508532423208191, "grad_norm": 0.23136474192142487, "learning_rate": 0.00017704732945602328, "loss": 0.407, "step": 9350 }, { "epoch": 0.7516562939168842, "grad_norm": 0.21793334186077118, "learning_rate": 0.00017699818308920234, "loss": 0.3556, "step": 9360 }, { "epoch": 0.7524593455129492, "grad_norm": 0.20559340715408325, "learning_rate": 0.00017694899140853924, "loss": 0.3481, "step": 9370 }, { "epoch": 0.7532623971090142, "grad_norm": 0.21088363230228424, "learning_rate": 0.00017689975444499392, "loss": 0.3816, "step": 9380 }, { "epoch": 0.7540654487050793, "grad_norm": 0.20097120106220245, "learning_rate": 0.00017685047222955498, "loss": 0.3943, "step": 9390 }, { "epoch": 0.7548685003011444, "grad_norm": 0.23289404809474945, "learning_rate": 0.00017680114479323933, "loss": 0.394, "step": 9400 }, { "epoch": 0.7556715518972094, "grad_norm": 0.21003173291683197, "learning_rate": 0.00017675177216709248, "loss": 0.3849, "step": 9410 }, { "epoch": 0.7564746034932744, "grad_norm": 0.2648775577545166, "learning_rate": 0.00017670235438218824, "loss": 0.3898, "step": 9420 }, { "epoch": 0.7572776550893395, "grad_norm": 0.22326524555683136, "learning_rate": 0.00017665289146962898, "loss": 0.4036, "step": 9430 }, { "epoch": 0.7580807066854045, "grad_norm": 0.23303963243961334, "learning_rate": 0.00017660338346054538, "loss": 0.4145, "step": 9440 }, { "epoch": 0.7588837582814696, "grad_norm": 0.20515276491641998, "learning_rate": 0.00017655383038609656, "loss": 0.397, "step": 9450 }, { "epoch": 0.7596868098775347, "grad_norm": 0.37429898977279663, "learning_rate": 0.00017650423227746998, "loss": 0.4317, "step": 9460 }, { "epoch": 0.7604898614735996, "grad_norm": 0.2294493019580841, "learning_rate": 0.0001764545891658814, "loss": 0.3627, "step": 9470 }, { "epoch": 0.7612929130696647, "grad_norm": 0.21970562636852264, "learning_rate": 0.00017640490108257494, "loss": 0.4106, "step": 9480 }, { "epoch": 0.7620959646657298, "grad_norm": 0.220896378159523, "learning_rate": 0.00017635516805882307, "loss": 0.3868, "step": 9490 }, { "epoch": 0.7628990162617948, "grad_norm": 0.2595314383506775, "learning_rate": 0.00017630539012592652, "loss": 0.4008, "step": 9500 }, { "epoch": 0.7637020678578599, "grad_norm": 0.2248406559228897, "learning_rate": 0.0001762555673152142, "loss": 0.3934, "step": 9510 }, { "epoch": 0.764505119453925, "grad_norm": 0.22475957870483398, "learning_rate": 0.00017620569965804336, "loss": 0.3877, "step": 9520 }, { "epoch": 0.7653081710499899, "grad_norm": 0.2565280497074127, "learning_rate": 0.00017615578718579945, "loss": 0.4046, "step": 9530 }, { "epoch": 0.766111222646055, "grad_norm": 0.22033476829528809, "learning_rate": 0.0001761058299298961, "loss": 0.3864, "step": 9540 }, { "epoch": 0.7669142742421201, "grad_norm": 0.21526722609996796, "learning_rate": 0.00017605582792177519, "loss": 0.3626, "step": 9550 }, { "epoch": 0.7677173258381851, "grad_norm": 0.2570739686489105, "learning_rate": 0.00017600578119290674, "loss": 0.4134, "step": 9560 }, { "epoch": 0.7685203774342502, "grad_norm": 0.22738192975521088, "learning_rate": 0.00017595568977478882, "loss": 0.3804, "step": 9570 }, { "epoch": 0.7693234290303151, "grad_norm": 0.22180607914924622, "learning_rate": 0.00017590555369894774, "loss": 0.3979, "step": 9580 }, { "epoch": 0.7701264806263802, "grad_norm": 0.21209877729415894, "learning_rate": 0.0001758553729969379, "loss": 0.3788, "step": 9590 }, { "epoch": 0.7709295322224453, "grad_norm": 0.18943189084529877, "learning_rate": 0.0001758051477003418, "loss": 0.3659, "step": 9600 }, { "epoch": 0.7717325838185103, "grad_norm": 0.2436358481645584, "learning_rate": 0.0001757548778407699, "loss": 0.3855, "step": 9610 }, { "epoch": 0.7725356354145754, "grad_norm": 0.2506536841392517, "learning_rate": 0.00017570456344986085, "loss": 0.391, "step": 9620 }, { "epoch": 0.7733386870106405, "grad_norm": 0.23693950474262238, "learning_rate": 0.00017565420455928124, "loss": 0.4002, "step": 9630 }, { "epoch": 0.7741417386067054, "grad_norm": 0.25357210636138916, "learning_rate": 0.00017560380120072562, "loss": 0.4057, "step": 9640 }, { "epoch": 0.7749447902027705, "grad_norm": 0.2179892212152481, "learning_rate": 0.00017555335340591668, "loss": 0.3888, "step": 9650 }, { "epoch": 0.7757478417988356, "grad_norm": 0.2075311690568924, "learning_rate": 0.00017550286120660499, "loss": 0.3835, "step": 9660 }, { "epoch": 0.7765508933949006, "grad_norm": 0.26635420322418213, "learning_rate": 0.000175452324634569, "loss": 0.3901, "step": 9670 }, { "epoch": 0.7773539449909657, "grad_norm": 0.268032968044281, "learning_rate": 0.0001754017437216152, "loss": 0.3858, "step": 9680 }, { "epoch": 0.7781569965870307, "grad_norm": 0.21875862777233124, "learning_rate": 0.00017535111849957793, "loss": 0.3922, "step": 9690 }, { "epoch": 0.7789600481830957, "grad_norm": 0.23733873665332794, "learning_rate": 0.0001753004490003195, "loss": 0.3862, "step": 9700 }, { "epoch": 0.7797630997791608, "grad_norm": 0.2604048252105713, "learning_rate": 0.00017524973525572988, "loss": 0.3865, "step": 9710 }, { "epoch": 0.7805661513752259, "grad_norm": 0.23662330210208893, "learning_rate": 0.0001751989772977271, "loss": 0.4009, "step": 9720 }, { "epoch": 0.7813692029712909, "grad_norm": 0.23925410211086273, "learning_rate": 0.00017514817515825696, "loss": 0.4086, "step": 9730 }, { "epoch": 0.782172254567356, "grad_norm": 0.21597962081432343, "learning_rate": 0.00017509732886929303, "loss": 0.3879, "step": 9740 }, { "epoch": 0.782975306163421, "grad_norm": 0.5957660675048828, "learning_rate": 0.00017504643846283664, "loss": 0.4047, "step": 9750 }, { "epoch": 0.783778357759486, "grad_norm": 0.20510512590408325, "learning_rate": 0.00017499550397091697, "loss": 0.3915, "step": 9760 }, { "epoch": 0.7845814093555511, "grad_norm": 0.20152026414871216, "learning_rate": 0.00017494452542559093, "loss": 0.4169, "step": 9770 }, { "epoch": 0.7853844609516162, "grad_norm": 0.2696515619754791, "learning_rate": 0.00017489350285894306, "loss": 0.3754, "step": 9780 }, { "epoch": 0.7861875125476812, "grad_norm": 0.23385274410247803, "learning_rate": 0.00017484243630308576, "loss": 0.3906, "step": 9790 }, { "epoch": 0.7869905641437462, "grad_norm": 0.21424131095409393, "learning_rate": 0.00017479132579015894, "loss": 0.4061, "step": 9800 }, { "epoch": 0.7877936157398113, "grad_norm": 0.21603573858737946, "learning_rate": 0.00017474017135233038, "loss": 0.3873, "step": 9810 }, { "epoch": 0.7885966673358763, "grad_norm": 0.23877079784870148, "learning_rate": 0.0001746889730217953, "loss": 0.3695, "step": 9820 }, { "epoch": 0.7893997189319414, "grad_norm": 0.2181158810853958, "learning_rate": 0.00017463773083077665, "loss": 0.3997, "step": 9830 }, { "epoch": 0.7902027705280065, "grad_norm": 0.18704405426979065, "learning_rate": 0.00017458644481152505, "loss": 0.3866, "step": 9840 }, { "epoch": 0.7910058221240714, "grad_norm": 0.2406257688999176, "learning_rate": 0.00017453511499631853, "loss": 0.4425, "step": 9850 }, { "epoch": 0.7918088737201365, "grad_norm": 0.29393985867500305, "learning_rate": 0.0001744837414174629, "loss": 0.4074, "step": 9860 }, { "epoch": 0.7926119253162016, "grad_norm": 0.2552378177642822, "learning_rate": 0.00017443232410729124, "loss": 0.4206, "step": 9870 }, { "epoch": 0.7934149769122666, "grad_norm": 0.21950367093086243, "learning_rate": 0.00017438086309816447, "loss": 0.4148, "step": 9880 }, { "epoch": 0.7942180285083317, "grad_norm": 0.22926825284957886, "learning_rate": 0.00017432935842247074, "loss": 0.3792, "step": 9890 }, { "epoch": 0.7950210801043968, "grad_norm": 0.219939187169075, "learning_rate": 0.00017427781011262582, "loss": 0.3909, "step": 9900 }, { "epoch": 0.7958241317004617, "grad_norm": 0.2033262848854065, "learning_rate": 0.00017422621820107294, "loss": 0.3865, "step": 9910 }, { "epoch": 0.7966271832965268, "grad_norm": 0.2212083786725998, "learning_rate": 0.00017417458272028273, "loss": 0.4035, "step": 9920 }, { "epoch": 0.7974302348925918, "grad_norm": 0.18574555218219757, "learning_rate": 0.00017412290370275328, "loss": 0.3927, "step": 9930 }, { "epoch": 0.7982332864886569, "grad_norm": 0.17652761936187744, "learning_rate": 0.00017407118118101004, "loss": 0.3874, "step": 9940 }, { "epoch": 0.799036338084722, "grad_norm": 0.2011159360408783, "learning_rate": 0.00017401941518760587, "loss": 0.3966, "step": 9950 }, { "epoch": 0.7998393896807869, "grad_norm": 0.21914201974868774, "learning_rate": 0.000173967605755121, "loss": 0.3793, "step": 9960 }, { "epoch": 0.800642441276852, "grad_norm": 0.20693127810955048, "learning_rate": 0.00017391575291616295, "loss": 0.4001, "step": 9970 }, { "epoch": 0.8014454928729171, "grad_norm": 0.20415879786014557, "learning_rate": 0.00017386385670336663, "loss": 0.4117, "step": 9980 }, { "epoch": 0.8022485444689821, "grad_norm": 0.2621535360813141, "learning_rate": 0.00017381191714939417, "loss": 0.3809, "step": 9990 }, { "epoch": 0.8030515960650472, "grad_norm": 0.2063026875257492, "learning_rate": 0.000173759934286935, "loss": 0.3871, "step": 10000 }, { "epoch": 0.8038546476611123, "grad_norm": 0.2335769236087799, "learning_rate": 0.0001737079081487059, "loss": 0.3944, "step": 10010 }, { "epoch": 0.8046576992571772, "grad_norm": 0.1818244606256485, "learning_rate": 0.0001736558387674507, "loss": 0.391, "step": 10020 }, { "epoch": 0.8054607508532423, "grad_norm": 0.256436288356781, "learning_rate": 0.0001736037261759407, "loss": 0.3975, "step": 10030 }, { "epoch": 0.8062638024493074, "grad_norm": 0.22372440993785858, "learning_rate": 0.0001735515704069741, "loss": 0.3864, "step": 10040 }, { "epoch": 0.8070668540453724, "grad_norm": 0.19636671245098114, "learning_rate": 0.00017349937149337653, "loss": 0.388, "step": 10050 }, { "epoch": 0.8078699056414375, "grad_norm": 0.21902227401733398, "learning_rate": 0.00017344712946800066, "loss": 0.3911, "step": 10060 }, { "epoch": 0.8086729572375025, "grad_norm": 0.23606406152248383, "learning_rate": 0.00017339484436372624, "loss": 0.3593, "step": 10070 }, { "epoch": 0.8094760088335675, "grad_norm": 0.2473745048046112, "learning_rate": 0.00017334251621346026, "loss": 0.3948, "step": 10080 }, { "epoch": 0.8102790604296326, "grad_norm": 0.23925325274467468, "learning_rate": 0.00017329014505013674, "loss": 0.4135, "step": 10090 }, { "epoch": 0.8110821120256977, "grad_norm": 0.21251752972602844, "learning_rate": 0.00017323773090671676, "loss": 0.3755, "step": 10100 }, { "epoch": 0.8118851636217627, "grad_norm": 0.20152030885219574, "learning_rate": 0.0001731852738161884, "loss": 0.3916, "step": 10110 }, { "epoch": 0.8126882152178277, "grad_norm": 0.2324645221233368, "learning_rate": 0.00017313277381156693, "loss": 0.4093, "step": 10120 }, { "epoch": 0.8134912668138928, "grad_norm": 0.2170443832874298, "learning_rate": 0.00017308023092589444, "loss": 0.3925, "step": 10130 }, { "epoch": 0.8142943184099578, "grad_norm": 0.21443665027618408, "learning_rate": 0.00017302764519224018, "loss": 0.4029, "step": 10140 }, { "epoch": 0.8150973700060229, "grad_norm": 0.22609004378318787, "learning_rate": 0.00017297501664370027, "loss": 0.3902, "step": 10150 }, { "epoch": 0.815900421602088, "grad_norm": 0.2755078375339508, "learning_rate": 0.00017292234531339772, "loss": 0.3989, "step": 10160 }, { "epoch": 0.816703473198153, "grad_norm": 0.23915578424930573, "learning_rate": 0.0001728696312344826, "loss": 0.3769, "step": 10170 }, { "epoch": 0.817506524794218, "grad_norm": 0.20907121896743774, "learning_rate": 0.0001728168744401318, "loss": 0.3625, "step": 10180 }, { "epoch": 0.8183095763902831, "grad_norm": 0.22941021621227264, "learning_rate": 0.00017276407496354915, "loss": 0.415, "step": 10190 }, { "epoch": 0.8191126279863481, "grad_norm": 0.2313588708639145, "learning_rate": 0.00017271123283796525, "loss": 0.4274, "step": 10200 }, { "epoch": 0.8199156795824132, "grad_norm": 0.2399587631225586, "learning_rate": 0.00017265834809663768, "loss": 0.3772, "step": 10210 }, { "epoch": 0.8207187311784783, "grad_norm": 0.22946509718894958, "learning_rate": 0.0001726054207728507, "loss": 0.3837, "step": 10220 }, { "epoch": 0.8215217827745432, "grad_norm": 0.2425631880760193, "learning_rate": 0.00017255245089991547, "loss": 0.3952, "step": 10230 }, { "epoch": 0.8223248343706083, "grad_norm": 0.23920899629592896, "learning_rate": 0.00017249943851116987, "loss": 0.397, "step": 10240 }, { "epoch": 0.8231278859666734, "grad_norm": 0.21412312984466553, "learning_rate": 0.00017244638363997857, "loss": 0.3778, "step": 10250 }, { "epoch": 0.8239309375627384, "grad_norm": 0.2191092073917389, "learning_rate": 0.00017239328631973297, "loss": 0.3784, "step": 10260 }, { "epoch": 0.8247339891588035, "grad_norm": 0.2479000836610794, "learning_rate": 0.0001723401465838512, "loss": 0.4177, "step": 10270 }, { "epoch": 0.8255370407548684, "grad_norm": 0.19038797914981842, "learning_rate": 0.00017228696446577806, "loss": 0.3819, "step": 10280 }, { "epoch": 0.8263400923509335, "grad_norm": 0.20835627615451813, "learning_rate": 0.000172233739998985, "loss": 0.3889, "step": 10290 }, { "epoch": 0.8271431439469986, "grad_norm": 0.23612453043460846, "learning_rate": 0.00017218047321697022, "loss": 0.3633, "step": 10300 }, { "epoch": 0.8279461955430636, "grad_norm": 0.23971840739250183, "learning_rate": 0.00017212716415325845, "loss": 0.3969, "step": 10310 }, { "epoch": 0.8287492471391287, "grad_norm": 0.21604874730110168, "learning_rate": 0.00017207381284140108, "loss": 0.3787, "step": 10320 }, { "epoch": 0.8295522987351938, "grad_norm": 0.20165497064590454, "learning_rate": 0.0001720204193149761, "loss": 0.3728, "step": 10330 }, { "epoch": 0.8303553503312587, "grad_norm": 0.2285691648721695, "learning_rate": 0.000171966983607588, "loss": 0.3791, "step": 10340 }, { "epoch": 0.8311584019273238, "grad_norm": 0.2423318773508072, "learning_rate": 0.00017191350575286796, "loss": 0.4219, "step": 10350 }, { "epoch": 0.8319614535233889, "grad_norm": 0.34159454703330994, "learning_rate": 0.00017185998578447348, "loss": 0.3944, "step": 10360 }, { "epoch": 0.8327645051194539, "grad_norm": 0.23440277576446533, "learning_rate": 0.00017180642373608874, "loss": 0.3785, "step": 10370 }, { "epoch": 0.833567556715519, "grad_norm": 0.2308449149131775, "learning_rate": 0.00017175281964142434, "loss": 0.4087, "step": 10380 }, { "epoch": 0.834370608311584, "grad_norm": 0.20292632281780243, "learning_rate": 0.0001716991735342174, "loss": 0.3683, "step": 10390 }, { "epoch": 0.835173659907649, "grad_norm": 0.22817809879779816, "learning_rate": 0.0001716454854482313, "loss": 0.4103, "step": 10400 }, { "epoch": 0.8359767115037141, "grad_norm": 0.19162170588970184, "learning_rate": 0.00017159175541725608, "loss": 0.3773, "step": 10410 }, { "epoch": 0.8367797630997792, "grad_norm": 0.23191888630390167, "learning_rate": 0.000171537983475108, "loss": 0.4099, "step": 10420 }, { "epoch": 0.8375828146958442, "grad_norm": 0.19668112695217133, "learning_rate": 0.0001714841696556298, "loss": 0.3909, "step": 10430 }, { "epoch": 0.8383858662919093, "grad_norm": 0.2233145534992218, "learning_rate": 0.00017143031399269058, "loss": 0.3712, "step": 10440 }, { "epoch": 0.8391889178879743, "grad_norm": 0.234078049659729, "learning_rate": 0.00017137641652018566, "loss": 0.3871, "step": 10450 }, { "epoch": 0.8399919694840393, "grad_norm": 0.24259483814239502, "learning_rate": 0.00017132247727203676, "loss": 0.3805, "step": 10460 }, { "epoch": 0.8407950210801044, "grad_norm": 0.22303998470306396, "learning_rate": 0.00017126849628219194, "loss": 0.4454, "step": 10470 }, { "epoch": 0.8415980726761695, "grad_norm": 0.2438802570104599, "learning_rate": 0.00017121447358462545, "loss": 0.3871, "step": 10480 }, { "epoch": 0.8424011242722345, "grad_norm": 0.19757722318172455, "learning_rate": 0.00017116040921333778, "loss": 0.3761, "step": 10490 }, { "epoch": 0.8432041758682995, "grad_norm": 0.2515436112880707, "learning_rate": 0.00017110630320235572, "loss": 0.4064, "step": 10500 }, { "epoch": 0.8440072274643646, "grad_norm": 0.18457166850566864, "learning_rate": 0.00017105215558573222, "loss": 0.3893, "step": 10510 }, { "epoch": 0.8448102790604296, "grad_norm": 0.2227247804403305, "learning_rate": 0.00017099796639754642, "loss": 0.3892, "step": 10520 }, { "epoch": 0.8456133306564947, "grad_norm": 0.26140907406806946, "learning_rate": 0.00017094373567190364, "loss": 0.411, "step": 10530 }, { "epoch": 0.8464163822525598, "grad_norm": 0.20250721275806427, "learning_rate": 0.00017088946344293534, "loss": 0.3999, "step": 10540 }, { "epoch": 0.8472194338486247, "grad_norm": 0.21679583191871643, "learning_rate": 0.00017083514974479907, "loss": 0.4379, "step": 10550 }, { "epoch": 0.8480224854446898, "grad_norm": 0.22687940299510956, "learning_rate": 0.0001707807946116785, "loss": 0.3692, "step": 10560 }, { "epoch": 0.8488255370407549, "grad_norm": 0.22261932492256165, "learning_rate": 0.00017072639807778342, "loss": 0.3889, "step": 10570 }, { "epoch": 0.8496285886368199, "grad_norm": 0.20541466772556305, "learning_rate": 0.0001706719601773496, "loss": 0.3658, "step": 10580 }, { "epoch": 0.850431640232885, "grad_norm": 0.19754144549369812, "learning_rate": 0.0001706174809446389, "loss": 0.3684, "step": 10590 }, { "epoch": 0.8512346918289501, "grad_norm": 0.2026623636484146, "learning_rate": 0.00017056296041393916, "loss": 0.3915, "step": 10600 }, { "epoch": 0.852037743425015, "grad_norm": 0.2034168392419815, "learning_rate": 0.00017050839861956424, "loss": 0.3908, "step": 10610 }, { "epoch": 0.8528407950210801, "grad_norm": 0.19928376376628876, "learning_rate": 0.00017045379559585394, "loss": 0.379, "step": 10620 }, { "epoch": 0.8536438466171451, "grad_norm": 0.20712848007678986, "learning_rate": 0.0001703991513771741, "loss": 0.3591, "step": 10630 }, { "epoch": 0.8544468982132102, "grad_norm": 0.25177013874053955, "learning_rate": 0.00017034446599791634, "loss": 0.3974, "step": 10640 }, { "epoch": 0.8552499498092753, "grad_norm": 0.19752544164657593, "learning_rate": 0.00017028973949249828, "loss": 0.4018, "step": 10650 }, { "epoch": 0.8560530014053402, "grad_norm": 0.2893075942993164, "learning_rate": 0.00017023497189536338, "loss": 0.3901, "step": 10660 }, { "epoch": 0.8568560530014053, "grad_norm": 0.1989186406135559, "learning_rate": 0.000170180163240981, "loss": 0.3824, "step": 10670 }, { "epoch": 0.8576591045974704, "grad_norm": 0.22399704158306122, "learning_rate": 0.00017012531356384633, "loss": 0.371, "step": 10680 }, { "epoch": 0.8584621561935354, "grad_norm": 0.2025546133518219, "learning_rate": 0.00017007042289848042, "loss": 0.3707, "step": 10690 }, { "epoch": 0.8592652077896005, "grad_norm": 0.3719926178455353, "learning_rate": 0.00017001549127943, "loss": 0.408, "step": 10700 }, { "epoch": 0.8600682593856656, "grad_norm": 0.18110543489456177, "learning_rate": 0.0001699605187412677, "loss": 0.3806, "step": 10710 }, { "epoch": 0.8608713109817305, "grad_norm": 0.2007499486207962, "learning_rate": 0.00016990550531859183, "loss": 0.395, "step": 10720 }, { "epoch": 0.8616743625777956, "grad_norm": 0.2021133005619049, "learning_rate": 0.0001698504510460264, "loss": 0.3685, "step": 10730 }, { "epoch": 0.8624774141738607, "grad_norm": 0.22817011177539825, "learning_rate": 0.0001697953559582213, "loss": 0.3873, "step": 10740 }, { "epoch": 0.8632804657699257, "grad_norm": 0.23059700429439545, "learning_rate": 0.0001697402200898519, "loss": 0.4046, "step": 10750 }, { "epoch": 0.8640835173659908, "grad_norm": 0.3209605813026428, "learning_rate": 0.0001696850434756193, "loss": 0.3763, "step": 10760 }, { "epoch": 0.8648865689620558, "grad_norm": 0.2518880367279053, "learning_rate": 0.0001696298261502504, "loss": 0.3586, "step": 10770 }, { "epoch": 0.8656896205581208, "grad_norm": 0.23390187323093414, "learning_rate": 0.0001695745681484974, "loss": 0.3611, "step": 10780 }, { "epoch": 0.8664926721541859, "grad_norm": 0.2167815864086151, "learning_rate": 0.00016951926950513848, "loss": 0.3989, "step": 10790 }, { "epoch": 0.867295723750251, "grad_norm": 0.19716718792915344, "learning_rate": 0.00016946393025497712, "loss": 0.3961, "step": 10800 }, { "epoch": 0.868098775346316, "grad_norm": 0.2958979606628418, "learning_rate": 0.00016940855043284247, "loss": 0.4045, "step": 10810 }, { "epoch": 0.868901826942381, "grad_norm": 0.21711929142475128, "learning_rate": 0.0001693531300735892, "loss": 0.3876, "step": 10820 }, { "epoch": 0.8697048785384461, "grad_norm": 0.28414186835289, "learning_rate": 0.0001692976692120975, "loss": 0.3977, "step": 10830 }, { "epoch": 0.8705079301345111, "grad_norm": 0.21933525800704956, "learning_rate": 0.000169242167883273, "loss": 0.4031, "step": 10840 }, { "epoch": 0.8713109817305762, "grad_norm": 0.2437364012002945, "learning_rate": 0.00016918662612204683, "loss": 0.3834, "step": 10850 }, { "epoch": 0.8721140333266413, "grad_norm": 0.22255143523216248, "learning_rate": 0.00016913104396337563, "loss": 0.3704, "step": 10860 }, { "epoch": 0.8729170849227063, "grad_norm": 0.23539595305919647, "learning_rate": 0.00016907542144224138, "loss": 0.3865, "step": 10870 }, { "epoch": 0.8737201365187713, "grad_norm": 0.21141840517520905, "learning_rate": 0.00016901975859365148, "loss": 0.4, "step": 10880 }, { "epoch": 0.8745231881148364, "grad_norm": 0.21460175514221191, "learning_rate": 0.00016896405545263873, "loss": 0.3668, "step": 10890 }, { "epoch": 0.8753262397109014, "grad_norm": 0.24640698730945587, "learning_rate": 0.0001689083120542613, "loss": 0.3993, "step": 10900 }, { "epoch": 0.8761292913069665, "grad_norm": 0.2391546219587326, "learning_rate": 0.0001688525284336027, "loss": 0.4075, "step": 10910 }, { "epoch": 0.8769323429030316, "grad_norm": 0.2253667712211609, "learning_rate": 0.00016879670462577169, "loss": 0.4257, "step": 10920 }, { "epoch": 0.8777353944990965, "grad_norm": 0.21455058455467224, "learning_rate": 0.00016874084066590236, "loss": 0.3716, "step": 10930 }, { "epoch": 0.8785384460951616, "grad_norm": 0.22399316728115082, "learning_rate": 0.00016868493658915413, "loss": 0.3818, "step": 10940 }, { "epoch": 0.8793414976912267, "grad_norm": 0.20563040673732758, "learning_rate": 0.00016862899243071157, "loss": 0.3784, "step": 10950 }, { "epoch": 0.8801445492872917, "grad_norm": 0.2381453663110733, "learning_rate": 0.00016857300822578454, "loss": 0.4014, "step": 10960 }, { "epoch": 0.8809476008833568, "grad_norm": 0.21762752532958984, "learning_rate": 0.00016851698400960813, "loss": 0.4005, "step": 10970 }, { "epoch": 0.8817506524794217, "grad_norm": 0.2178487926721573, "learning_rate": 0.00016846091981744253, "loss": 0.3703, "step": 10980 }, { "epoch": 0.8825537040754868, "grad_norm": 0.2433391809463501, "learning_rate": 0.00016840481568457306, "loss": 0.3706, "step": 10990 }, { "epoch": 0.8833567556715519, "grad_norm": 0.21679739654064178, "learning_rate": 0.0001683486716463104, "loss": 0.3689, "step": 11000 }, { "epoch": 0.8841598072676169, "grad_norm": 0.20312358438968658, "learning_rate": 0.00016829248773799009, "loss": 0.3971, "step": 11010 }, { "epoch": 0.884962858863682, "grad_norm": 0.24343961477279663, "learning_rate": 0.0001682362639949729, "loss": 0.4205, "step": 11020 }, { "epoch": 0.8857659104597471, "grad_norm": 0.19413185119628906, "learning_rate": 0.00016818000045264463, "loss": 0.3695, "step": 11030 }, { "epoch": 0.886568962055812, "grad_norm": 0.17554475367069244, "learning_rate": 0.00016812369714641606, "loss": 0.3708, "step": 11040 }, { "epoch": 0.8873720136518771, "grad_norm": 0.20394432544708252, "learning_rate": 0.00016806735411172322, "loss": 0.4015, "step": 11050 }, { "epoch": 0.8881750652479422, "grad_norm": 0.21549560129642487, "learning_rate": 0.00016801097138402688, "loss": 0.4159, "step": 11060 }, { "epoch": 0.8889781168440072, "grad_norm": 0.23019059002399445, "learning_rate": 0.000167954548998813, "loss": 0.3807, "step": 11070 }, { "epoch": 0.8897811684400723, "grad_norm": 0.25311940908432007, "learning_rate": 0.00016789808699159227, "loss": 0.3966, "step": 11080 }, { "epoch": 0.8905842200361374, "grad_norm": 0.17736291885375977, "learning_rate": 0.00016784158539790065, "loss": 0.3619, "step": 11090 }, { "epoch": 0.8913872716322023, "grad_norm": 0.20521938800811768, "learning_rate": 0.0001677850442532986, "loss": 0.365, "step": 11100 }, { "epoch": 0.8921903232282674, "grad_norm": 0.2632855474948883, "learning_rate": 0.00016772846359337185, "loss": 0.4115, "step": 11110 }, { "epoch": 0.8929933748243325, "grad_norm": 0.21341347694396973, "learning_rate": 0.00016767184345373078, "loss": 0.3802, "step": 11120 }, { "epoch": 0.8937964264203975, "grad_norm": 0.1953052282333374, "learning_rate": 0.00016761518387001074, "loss": 0.3928, "step": 11130 }, { "epoch": 0.8945994780164626, "grad_norm": 0.21254794299602509, "learning_rate": 0.00016755848487787174, "loss": 0.3839, "step": 11140 }, { "epoch": 0.8954025296125276, "grad_norm": 0.2566709518432617, "learning_rate": 0.0001675017465129988, "loss": 0.3977, "step": 11150 }, { "epoch": 0.8962055812085926, "grad_norm": 0.2205357849597931, "learning_rate": 0.00016744496881110154, "loss": 0.4112, "step": 11160 }, { "epoch": 0.8970086328046577, "grad_norm": 0.2155507653951645, "learning_rate": 0.00016738815180791444, "loss": 0.3746, "step": 11170 }, { "epoch": 0.8978116844007228, "grad_norm": 0.25017088651657104, "learning_rate": 0.00016733129553919669, "loss": 0.3789, "step": 11180 }, { "epoch": 0.8986147359967878, "grad_norm": 0.20344428718090057, "learning_rate": 0.0001672744000407322, "loss": 0.3926, "step": 11190 }, { "epoch": 0.8994177875928528, "grad_norm": 0.20587481558322906, "learning_rate": 0.00016721746534832958, "loss": 0.3687, "step": 11200 }, { "epoch": 0.9002208391889179, "grad_norm": 0.2140638828277588, "learning_rate": 0.00016716049149782203, "loss": 0.3796, "step": 11210 }, { "epoch": 0.9010238907849829, "grad_norm": 0.20677605271339417, "learning_rate": 0.00016710347852506757, "loss": 0.3794, "step": 11220 }, { "epoch": 0.901826942381048, "grad_norm": 0.22543197870254517, "learning_rate": 0.00016704642646594861, "loss": 0.4128, "step": 11230 }, { "epoch": 0.9026299939771131, "grad_norm": 0.22827056050300598, "learning_rate": 0.00016698933535637232, "loss": 0.366, "step": 11240 }, { "epoch": 0.903433045573178, "grad_norm": 0.2187155932188034, "learning_rate": 0.00016693220523227045, "loss": 0.3847, "step": 11250 }, { "epoch": 0.9042360971692431, "grad_norm": 0.1943320333957672, "learning_rate": 0.00016687503612959915, "loss": 0.3621, "step": 11260 }, { "epoch": 0.9050391487653082, "grad_norm": 0.23882627487182617, "learning_rate": 0.00016681782808433934, "loss": 0.3873, "step": 11270 }, { "epoch": 0.9058422003613732, "grad_norm": 0.20407848060131073, "learning_rate": 0.00016676058113249627, "loss": 0.3643, "step": 11280 }, { "epoch": 0.9066452519574383, "grad_norm": 0.21808576583862305, "learning_rate": 0.00016670329531009968, "loss": 0.3943, "step": 11290 }, { "epoch": 0.9074483035535034, "grad_norm": 0.1984405368566513, "learning_rate": 0.0001666459706532039, "loss": 0.3959, "step": 11300 }, { "epoch": 0.9082513551495683, "grad_norm": 0.23712213337421417, "learning_rate": 0.00016658860719788758, "loss": 0.4178, "step": 11310 }, { "epoch": 0.9090544067456334, "grad_norm": 0.20988500118255615, "learning_rate": 0.00016653120498025384, "loss": 0.3955, "step": 11320 }, { "epoch": 0.9098574583416984, "grad_norm": 0.18114420771598816, "learning_rate": 0.00016647376403643017, "loss": 0.3902, "step": 11330 }, { "epoch": 0.9106605099377635, "grad_norm": 0.21840542554855347, "learning_rate": 0.00016641628440256854, "loss": 0.3848, "step": 11340 }, { "epoch": 0.9114635615338286, "grad_norm": 0.24785226583480835, "learning_rate": 0.0001663587661148451, "loss": 0.3871, "step": 11350 }, { "epoch": 0.9122666131298935, "grad_norm": 0.1931484192609787, "learning_rate": 0.0001663012092094604, "loss": 0.401, "step": 11360 }, { "epoch": 0.9130696647259586, "grad_norm": 0.3610721230506897, "learning_rate": 0.00016624361372263936, "loss": 0.3958, "step": 11370 }, { "epoch": 0.9138727163220237, "grad_norm": 0.27564680576324463, "learning_rate": 0.00016618597969063106, "loss": 0.3774, "step": 11380 }, { "epoch": 0.9146757679180887, "grad_norm": 0.2448226511478424, "learning_rate": 0.00016612830714970902, "loss": 0.3832, "step": 11390 }, { "epoch": 0.9154788195141538, "grad_norm": 0.2780442535877228, "learning_rate": 0.00016607059613617077, "loss": 0.404, "step": 11400 }, { "epoch": 0.9162818711102189, "grad_norm": 0.21696226298809052, "learning_rate": 0.00016601284668633825, "loss": 0.4125, "step": 11410 }, { "epoch": 0.9170849227062838, "grad_norm": 0.2270691841840744, "learning_rate": 0.00016595505883655744, "loss": 0.41, "step": 11420 }, { "epoch": 0.9178879743023489, "grad_norm": 0.24651728570461273, "learning_rate": 0.00016589723262319862, "loss": 0.3849, "step": 11430 }, { "epoch": 0.918691025898414, "grad_norm": 0.212198406457901, "learning_rate": 0.0001658393680826561, "loss": 0.377, "step": 11440 }, { "epoch": 0.919494077494479, "grad_norm": 0.2539130747318268, "learning_rate": 0.00016578146525134837, "loss": 0.3785, "step": 11450 }, { "epoch": 0.9202971290905441, "grad_norm": 0.22009307146072388, "learning_rate": 0.00016572352416571804, "loss": 0.3914, "step": 11460 }, { "epoch": 0.9211001806866091, "grad_norm": 0.24241310358047485, "learning_rate": 0.00016566554486223176, "loss": 0.388, "step": 11470 }, { "epoch": 0.9219032322826741, "grad_norm": 0.2277384102344513, "learning_rate": 0.00016560752737738023, "loss": 0.3886, "step": 11480 }, { "epoch": 0.9227062838787392, "grad_norm": 0.21074983477592468, "learning_rate": 0.00016554947174767824, "loss": 0.3919, "step": 11490 }, { "epoch": 0.9235093354748043, "grad_norm": 0.1870737075805664, "learning_rate": 0.00016549137800966446, "loss": 0.392, "step": 11500 }, { "epoch": 0.9243123870708693, "grad_norm": 0.17259453237056732, "learning_rate": 0.00016543324619990168, "loss": 0.4059, "step": 11510 }, { "epoch": 0.9251154386669344, "grad_norm": 0.29174187779426575, "learning_rate": 0.0001653750763549766, "loss": 0.3794, "step": 11520 }, { "epoch": 0.9259184902629994, "grad_norm": 0.2542648911476135, "learning_rate": 0.00016531686851149982, "loss": 0.3744, "step": 11530 }, { "epoch": 0.9267215418590644, "grad_norm": 0.20707359910011292, "learning_rate": 0.00016525862270610589, "loss": 0.3769, "step": 11540 }, { "epoch": 0.9275245934551295, "grad_norm": 0.20444117486476898, "learning_rate": 0.0001652003389754533, "loss": 0.3857, "step": 11550 }, { "epoch": 0.9283276450511946, "grad_norm": 0.2325230836868286, "learning_rate": 0.00016514201735622433, "loss": 0.3812, "step": 11560 }, { "epoch": 0.9291306966472596, "grad_norm": 0.2587352395057678, "learning_rate": 0.0001650836578851251, "loss": 0.3873, "step": 11570 }, { "epoch": 0.9299337482433246, "grad_norm": 0.2714049816131592, "learning_rate": 0.0001650252605988857, "loss": 0.3697, "step": 11580 }, { "epoch": 0.9307367998393897, "grad_norm": 0.2240903675556183, "learning_rate": 0.00016496682553425977, "loss": 0.3988, "step": 11590 }, { "epoch": 0.9315398514354547, "grad_norm": 0.20121805369853973, "learning_rate": 0.00016490835272802497, "loss": 0.3646, "step": 11600 }, { "epoch": 0.9323429030315198, "grad_norm": 0.2112904042005539, "learning_rate": 0.00016484984221698258, "loss": 0.3868, "step": 11610 }, { "epoch": 0.9331459546275849, "grad_norm": 0.22558313608169556, "learning_rate": 0.00016479129403795763, "loss": 0.4082, "step": 11620 }, { "epoch": 0.9339490062236498, "grad_norm": 0.21031181514263153, "learning_rate": 0.0001647327082277989, "loss": 0.3939, "step": 11630 }, { "epoch": 0.9347520578197149, "grad_norm": 0.20677433907985687, "learning_rate": 0.00016467408482337878, "loss": 0.3738, "step": 11640 }, { "epoch": 0.93555510941578, "grad_norm": 0.21050354838371277, "learning_rate": 0.00016461542386159337, "loss": 0.3711, "step": 11650 }, { "epoch": 0.936358161011845, "grad_norm": 0.24493633210659027, "learning_rate": 0.00016455672537936243, "loss": 0.3857, "step": 11660 }, { "epoch": 0.9371612126079101, "grad_norm": 0.1847388595342636, "learning_rate": 0.00016449798941362928, "loss": 0.3955, "step": 11670 }, { "epoch": 0.937964264203975, "grad_norm": 0.21129749715328217, "learning_rate": 0.0001644392160013609, "loss": 0.3762, "step": 11680 }, { "epoch": 0.9387673158000401, "grad_norm": 0.24208638072013855, "learning_rate": 0.0001643804051795477, "loss": 0.3634, "step": 11690 }, { "epoch": 0.9395703673961052, "grad_norm": 0.2445094734430313, "learning_rate": 0.00016432155698520385, "loss": 0.3686, "step": 11700 }, { "epoch": 0.9403734189921702, "grad_norm": 0.2306191325187683, "learning_rate": 0.00016426267145536685, "loss": 0.3947, "step": 11710 }, { "epoch": 0.9411764705882353, "grad_norm": 0.21747025847434998, "learning_rate": 0.00016420374862709779, "loss": 0.4033, "step": 11720 }, { "epoch": 0.9419795221843004, "grad_norm": 0.23030199110507965, "learning_rate": 0.00016414478853748118, "loss": 0.4135, "step": 11730 }, { "epoch": 0.9427825737803653, "grad_norm": 0.23792238533496857, "learning_rate": 0.00016408579122362507, "loss": 0.3975, "step": 11740 }, { "epoch": 0.9435856253764304, "grad_norm": 0.19440089166164398, "learning_rate": 0.0001640267567226608, "loss": 0.3851, "step": 11750 }, { "epoch": 0.9443886769724955, "grad_norm": 0.2579725384712219, "learning_rate": 0.00016396768507174328, "loss": 0.3979, "step": 11760 }, { "epoch": 0.9451917285685605, "grad_norm": 0.22423028945922852, "learning_rate": 0.00016390857630805072, "loss": 0.3781, "step": 11770 }, { "epoch": 0.9459947801646256, "grad_norm": 0.22407126426696777, "learning_rate": 0.00016384943046878457, "loss": 0.4117, "step": 11780 }, { "epoch": 0.9467978317606907, "grad_norm": 0.2068873941898346, "learning_rate": 0.0001637902475911699, "loss": 0.3702, "step": 11790 }, { "epoch": 0.9476008833567556, "grad_norm": 0.2051563560962677, "learning_rate": 0.00016373102771245478, "loss": 0.3727, "step": 11800 }, { "epoch": 0.9484039349528207, "grad_norm": 0.20829477906227112, "learning_rate": 0.0001636717708699108, "loss": 0.3469, "step": 11810 }, { "epoch": 0.9492069865488858, "grad_norm": 0.2451125532388687, "learning_rate": 0.00016361247710083268, "loss": 0.3951, "step": 11820 }, { "epoch": 0.9500100381449508, "grad_norm": 0.26416850090026855, "learning_rate": 0.00016355314644253842, "loss": 0.3918, "step": 11830 }, { "epoch": 0.9508130897410159, "grad_norm": 0.20434372127056122, "learning_rate": 0.00016349377893236922, "loss": 0.3645, "step": 11840 }, { "epoch": 0.951616141337081, "grad_norm": 0.2871681749820709, "learning_rate": 0.00016343437460768963, "loss": 0.4054, "step": 11850 }, { "epoch": 0.9524191929331459, "grad_norm": 0.20961476862430573, "learning_rate": 0.00016337493350588705, "loss": 0.4092, "step": 11860 }, { "epoch": 0.953222244529211, "grad_norm": 0.24981115758419037, "learning_rate": 0.00016331545566437234, "loss": 0.3999, "step": 11870 }, { "epoch": 0.9540252961252761, "grad_norm": 0.22366587817668915, "learning_rate": 0.00016325594112057926, "loss": 0.3717, "step": 11880 }, { "epoch": 0.9548283477213411, "grad_norm": 0.23739980161190033, "learning_rate": 0.00016319638991196486, "loss": 0.3795, "step": 11890 }, { "epoch": 0.9556313993174061, "grad_norm": 0.2108997404575348, "learning_rate": 0.00016313680207600913, "loss": 0.3578, "step": 11900 }, { "epoch": 0.9564344509134712, "grad_norm": 0.2276265025138855, "learning_rate": 0.00016307717765021512, "loss": 0.4159, "step": 11910 }, { "epoch": 0.9572375025095362, "grad_norm": 0.23332461714744568, "learning_rate": 0.000163017516672109, "loss": 0.3743, "step": 11920 }, { "epoch": 0.9580405541056013, "grad_norm": 0.20875532925128937, "learning_rate": 0.0001629578191792398, "loss": 0.3758, "step": 11930 }, { "epoch": 0.9588436057016664, "grad_norm": 0.22515438497066498, "learning_rate": 0.00016289808520917972, "loss": 0.3851, "step": 11940 }, { "epoch": 0.9596466572977314, "grad_norm": 0.30109718441963196, "learning_rate": 0.00016283831479952374, "loss": 0.3943, "step": 11950 }, { "epoch": 0.9604497088937964, "grad_norm": 0.20125769078731537, "learning_rate": 0.0001627785079878899, "loss": 0.3715, "step": 11960 }, { "epoch": 0.9612527604898615, "grad_norm": 0.2045583724975586, "learning_rate": 0.00016271866481191907, "loss": 0.3715, "step": 11970 }, { "epoch": 0.9620558120859265, "grad_norm": 0.24240809679031372, "learning_rate": 0.00016265878530927506, "loss": 0.3975, "step": 11980 }, { "epoch": 0.9628588636819916, "grad_norm": 0.2047201544046402, "learning_rate": 0.00016259886951764448, "loss": 0.3762, "step": 11990 }, { "epoch": 0.9636619152780567, "grad_norm": 0.2044094055891037, "learning_rate": 0.00016253891747473686, "loss": 0.3593, "step": 12000 }, { "epoch": 0.9644649668741216, "grad_norm": 0.20810338854789734, "learning_rate": 0.00016247892921828448, "loss": 0.392, "step": 12010 }, { "epoch": 0.9652680184701867, "grad_norm": 0.22421783208847046, "learning_rate": 0.00016241890478604242, "loss": 0.391, "step": 12020 }, { "epoch": 0.9660710700662517, "grad_norm": 0.24564270675182343, "learning_rate": 0.00016235884421578862, "loss": 0.4108, "step": 12030 }, { "epoch": 0.9668741216623168, "grad_norm": 0.20837846398353577, "learning_rate": 0.00016229874754532364, "loss": 0.4041, "step": 12040 }, { "epoch": 0.9676771732583819, "grad_norm": 0.23260825872421265, "learning_rate": 0.00016223861481247078, "loss": 0.3739, "step": 12050 }, { "epoch": 0.9684802248544468, "grad_norm": 0.19550013542175293, "learning_rate": 0.00016217844605507614, "loss": 0.3753, "step": 12060 }, { "epoch": 0.9692832764505119, "grad_norm": 0.20187368988990784, "learning_rate": 0.00016211824131100833, "loss": 0.3746, "step": 12070 }, { "epoch": 0.970086328046577, "grad_norm": 0.2512010633945465, "learning_rate": 0.00016205800061815876, "loss": 0.4006, "step": 12080 }, { "epoch": 0.970889379642642, "grad_norm": 0.18238413333892822, "learning_rate": 0.0001619977240144414, "loss": 0.3874, "step": 12090 }, { "epoch": 0.9716924312387071, "grad_norm": 0.2066279947757721, "learning_rate": 0.00016193741153779282, "loss": 0.3945, "step": 12100 }, { "epoch": 0.9724954828347722, "grad_norm": 0.2411680370569229, "learning_rate": 0.00016187706322617215, "loss": 0.3569, "step": 12110 }, { "epoch": 0.9732985344308371, "grad_norm": 0.22323480248451233, "learning_rate": 0.00016181667911756113, "loss": 0.3761, "step": 12120 }, { "epoch": 0.9741015860269022, "grad_norm": 0.18897737562656403, "learning_rate": 0.000161756259249964, "loss": 0.3788, "step": 12130 }, { "epoch": 0.9749046376229673, "grad_norm": 0.2471979856491089, "learning_rate": 0.00016169580366140747, "loss": 0.3694, "step": 12140 }, { "epoch": 0.9757076892190323, "grad_norm": 0.2586994767189026, "learning_rate": 0.00016163531238994077, "loss": 0.3936, "step": 12150 }, { "epoch": 0.9765107408150974, "grad_norm": 0.2513754069805145, "learning_rate": 0.0001615747854736356, "loss": 0.4007, "step": 12160 }, { "epoch": 0.9773137924111625, "grad_norm": 0.23533135652542114, "learning_rate": 0.00016151422295058605, "loss": 0.385, "step": 12170 }, { "epoch": 0.9781168440072274, "grad_norm": 0.20739638805389404, "learning_rate": 0.00016145362485890863, "loss": 0.4058, "step": 12180 }, { "epoch": 0.9789198956032925, "grad_norm": 0.22134898602962494, "learning_rate": 0.00016139299123674235, "loss": 0.3813, "step": 12190 }, { "epoch": 0.9797229471993576, "grad_norm": 0.2653453052043915, "learning_rate": 0.00016133232212224837, "loss": 0.3783, "step": 12200 }, { "epoch": 0.9805259987954226, "grad_norm": 0.2021111100912094, "learning_rate": 0.0001612716175536104, "loss": 0.3855, "step": 12210 }, { "epoch": 0.9813290503914877, "grad_norm": 0.25272318720817566, "learning_rate": 0.0001612108775690343, "loss": 0.3953, "step": 12220 }, { "epoch": 0.9821321019875527, "grad_norm": 0.22975309193134308, "learning_rate": 0.0001611501022067483, "loss": 0.3817, "step": 12230 }, { "epoch": 0.9829351535836177, "grad_norm": 0.2681120038032532, "learning_rate": 0.00016108929150500286, "loss": 0.3951, "step": 12240 }, { "epoch": 0.9837382051796828, "grad_norm": 0.19752688705921173, "learning_rate": 0.00016102844550207074, "loss": 0.3719, "step": 12250 }, { "epoch": 0.9845412567757479, "grad_norm": 0.20043544471263885, "learning_rate": 0.00016096756423624684, "loss": 0.3787, "step": 12260 }, { "epoch": 0.9853443083718129, "grad_norm": 0.2491510510444641, "learning_rate": 0.00016090664774584835, "loss": 0.3916, "step": 12270 }, { "epoch": 0.986147359967878, "grad_norm": 0.21048179268836975, "learning_rate": 0.00016084569606921457, "loss": 0.3662, "step": 12280 }, { "epoch": 0.986950411563943, "grad_norm": 0.2504265010356903, "learning_rate": 0.0001607847092447069, "loss": 0.3956, "step": 12290 }, { "epoch": 0.987753463160008, "grad_norm": 0.22631047666072845, "learning_rate": 0.00016072368731070893, "loss": 0.3701, "step": 12300 }, { "epoch": 0.9885565147560731, "grad_norm": 0.2040950208902359, "learning_rate": 0.00016066263030562635, "loss": 0.3881, "step": 12310 }, { "epoch": 0.9893595663521382, "grad_norm": 0.20902226865291595, "learning_rate": 0.00016060153826788688, "loss": 0.3861, "step": 12320 }, { "epoch": 0.9901626179482031, "grad_norm": 0.250310480594635, "learning_rate": 0.00016054041123594027, "loss": 0.3898, "step": 12330 }, { "epoch": 0.9909656695442682, "grad_norm": 0.22772696614265442, "learning_rate": 0.00016047924924825845, "loss": 0.3978, "step": 12340 }, { "epoch": 0.9917687211403333, "grad_norm": 0.2238740622997284, "learning_rate": 0.00016041805234333508, "loss": 0.3821, "step": 12350 }, { "epoch": 0.9925717727363983, "grad_norm": 0.22853638231754303, "learning_rate": 0.00016035682055968607, "loss": 0.3765, "step": 12360 }, { "epoch": 0.9933748243324634, "grad_norm": 0.1998811662197113, "learning_rate": 0.00016029555393584914, "loss": 0.3904, "step": 12370 }, { "epoch": 0.9941778759285284, "grad_norm": 0.23843729496002197, "learning_rate": 0.00016023425251038392, "loss": 0.4073, "step": 12380 }, { "epoch": 0.9949809275245934, "grad_norm": 0.2235836684703827, "learning_rate": 0.000160172916321872, "loss": 0.3926, "step": 12390 }, { "epoch": 0.9957839791206585, "grad_norm": 0.23790234327316284, "learning_rate": 0.00016011154540891684, "loss": 0.3974, "step": 12400 }, { "epoch": 0.9965870307167235, "grad_norm": 0.19018179178237915, "learning_rate": 0.00016005013981014373, "loss": 0.3804, "step": 12410 }, { "epoch": 0.9973900823127886, "grad_norm": 0.23500700294971466, "learning_rate": 0.00015998869956419985, "loss": 0.38, "step": 12420 }, { "epoch": 0.9981931339088537, "grad_norm": 0.23316740989685059, "learning_rate": 0.0001599272247097541, "loss": 0.3646, "step": 12430 }, { "epoch": 0.9989961855049186, "grad_norm": 0.2035207599401474, "learning_rate": 0.0001598657152854972, "loss": 0.3747, "step": 12440 }, { "epoch": 0.9997992371009837, "grad_norm": 0.36272937059402466, "learning_rate": 0.00015980417133014166, "loss": 0.3693, "step": 12450 }, { "epoch": 1.0005621361172454, "grad_norm": 0.20568279922008514, "learning_rate": 0.00015974259288242167, "loss": 0.3593, "step": 12460 }, { "epoch": 1.0013651877133105, "grad_norm": 0.272212952375412, "learning_rate": 0.00015968097998109317, "loss": 0.3556, "step": 12470 }, { "epoch": 1.0021682393093756, "grad_norm": 0.24268080294132233, "learning_rate": 0.00015961933266493374, "loss": 0.3448, "step": 12480 }, { "epoch": 1.0029712909054407, "grad_norm": 0.21058140695095062, "learning_rate": 0.0001595576509727427, "loss": 0.3653, "step": 12490 }, { "epoch": 1.0037743425015058, "grad_norm": 0.352271169424057, "learning_rate": 0.0001594959349433409, "loss": 0.3641, "step": 12500 }, { "epoch": 1.0045773940975709, "grad_norm": 0.21685345470905304, "learning_rate": 0.0001594341846155709, "loss": 0.3852, "step": 12510 }, { "epoch": 1.0053804456936357, "grad_norm": 0.19462405145168304, "learning_rate": 0.00015937240002829675, "loss": 0.3614, "step": 12520 }, { "epoch": 1.0061834972897008, "grad_norm": 0.20715761184692383, "learning_rate": 0.00015931058122040415, "loss": 0.3383, "step": 12530 }, { "epoch": 1.0069865488857659, "grad_norm": 0.23805959522724152, "learning_rate": 0.0001592487282308003, "loss": 0.3484, "step": 12540 }, { "epoch": 1.007789600481831, "grad_norm": 0.18878434598445892, "learning_rate": 0.0001591868410984139, "loss": 0.3547, "step": 12550 }, { "epoch": 1.008592652077896, "grad_norm": 0.2479708045721054, "learning_rate": 0.00015912491986219514, "loss": 0.3585, "step": 12560 }, { "epoch": 1.0093957036739611, "grad_norm": 0.21971608698368073, "learning_rate": 0.0001590629645611157, "loss": 0.3668, "step": 12570 }, { "epoch": 1.010198755270026, "grad_norm": 0.2206725776195526, "learning_rate": 0.00015900097523416866, "loss": 0.3657, "step": 12580 }, { "epoch": 1.011001806866091, "grad_norm": 0.2685462236404419, "learning_rate": 0.00015893895192036858, "loss": 0.3545, "step": 12590 }, { "epoch": 1.0118048584621562, "grad_norm": 0.2196381539106369, "learning_rate": 0.00015887689465875134, "loss": 0.3458, "step": 12600 }, { "epoch": 1.0126079100582213, "grad_norm": 0.2576391398906708, "learning_rate": 0.0001588148034883743, "loss": 0.358, "step": 12610 }, { "epoch": 1.0134109616542863, "grad_norm": 0.19495344161987305, "learning_rate": 0.00015875267844831596, "loss": 0.3659, "step": 12620 }, { "epoch": 1.0142140132503514, "grad_norm": 0.22929289937019348, "learning_rate": 0.0001586905195776763, "loss": 0.3643, "step": 12630 }, { "epoch": 1.0150170648464163, "grad_norm": 0.22802314162254333, "learning_rate": 0.00015862832691557657, "loss": 0.37, "step": 12640 }, { "epoch": 1.0158201164424814, "grad_norm": 0.22258809208869934, "learning_rate": 0.00015856610050115922, "loss": 0.3673, "step": 12650 }, { "epoch": 1.0166231680385465, "grad_norm": 0.24601289629936218, "learning_rate": 0.00015850384037358804, "loss": 0.3514, "step": 12660 }, { "epoch": 1.0174262196346116, "grad_norm": 0.21650901436805725, "learning_rate": 0.00015844154657204793, "loss": 0.3562, "step": 12670 }, { "epoch": 1.0182292712306766, "grad_norm": 0.23139341175556183, "learning_rate": 0.00015837921913574508, "loss": 0.3631, "step": 12680 }, { "epoch": 1.0190323228267417, "grad_norm": 0.3136523962020874, "learning_rate": 0.00015831685810390679, "loss": 0.3543, "step": 12690 }, { "epoch": 1.0198353744228066, "grad_norm": 0.21781635284423828, "learning_rate": 0.00015825446351578154, "loss": 0.3714, "step": 12700 }, { "epoch": 1.0206384260188717, "grad_norm": 0.20860549807548523, "learning_rate": 0.0001581920354106389, "loss": 0.3559, "step": 12710 }, { "epoch": 1.0214414776149368, "grad_norm": 0.22569233179092407, "learning_rate": 0.00015812957382776955, "loss": 0.363, "step": 12720 }, { "epoch": 1.0222445292110018, "grad_norm": 0.22582393884658813, "learning_rate": 0.00015806707880648523, "loss": 0.3594, "step": 12730 }, { "epoch": 1.023047580807067, "grad_norm": 0.2554960548877716, "learning_rate": 0.0001580045503861187, "loss": 0.3811, "step": 12740 }, { "epoch": 1.023850632403132, "grad_norm": 0.22790707647800446, "learning_rate": 0.0001579419886060238, "loss": 0.3717, "step": 12750 }, { "epoch": 1.0246536839991969, "grad_norm": 0.24862486124038696, "learning_rate": 0.0001578793935055754, "loss": 0.3661, "step": 12760 }, { "epoch": 1.025456735595262, "grad_norm": 0.24872064590454102, "learning_rate": 0.00015781676512416918, "loss": 0.3779, "step": 12770 }, { "epoch": 1.026259787191327, "grad_norm": 0.21541117131710052, "learning_rate": 0.00015775410350122194, "loss": 0.3638, "step": 12780 }, { "epoch": 1.0270628387873921, "grad_norm": 0.213758647441864, "learning_rate": 0.00015769140867617128, "loss": 0.3699, "step": 12790 }, { "epoch": 1.0278658903834572, "grad_norm": 0.2135878950357437, "learning_rate": 0.00015762868068847573, "loss": 0.3357, "step": 12800 }, { "epoch": 1.028668941979522, "grad_norm": 0.21985405683517456, "learning_rate": 0.00015756591957761474, "loss": 0.3595, "step": 12810 }, { "epoch": 1.0294719935755872, "grad_norm": 0.23515532910823822, "learning_rate": 0.00015750312538308857, "loss": 0.3879, "step": 12820 }, { "epoch": 1.0302750451716522, "grad_norm": 0.27375125885009766, "learning_rate": 0.00015744029814441832, "loss": 0.3888, "step": 12830 }, { "epoch": 1.0310780967677173, "grad_norm": 0.23800234496593475, "learning_rate": 0.0001573774379011458, "loss": 0.3782, "step": 12840 }, { "epoch": 1.0318811483637824, "grad_norm": 0.23717275261878967, "learning_rate": 0.00015731454469283376, "loss": 0.3491, "step": 12850 }, { "epoch": 1.0326841999598475, "grad_norm": 0.2194177657365799, "learning_rate": 0.00015725161855906553, "loss": 0.3721, "step": 12860 }, { "epoch": 1.0334872515559124, "grad_norm": 0.22681769728660583, "learning_rate": 0.00015718865953944525, "loss": 0.394, "step": 12870 }, { "epoch": 1.0342903031519775, "grad_norm": 0.21931907534599304, "learning_rate": 0.00015712566767359774, "loss": 0.3738, "step": 12880 }, { "epoch": 1.0350933547480425, "grad_norm": 0.1933297961950302, "learning_rate": 0.00015706264300116847, "loss": 0.3653, "step": 12890 }, { "epoch": 1.0358964063441076, "grad_norm": 0.2080041915178299, "learning_rate": 0.00015699958556182364, "loss": 0.3853, "step": 12900 }, { "epoch": 1.0366994579401727, "grad_norm": 0.25336548686027527, "learning_rate": 0.00015693649539525, "loss": 0.3737, "step": 12910 }, { "epoch": 1.0375025095362378, "grad_norm": 0.2111888974905014, "learning_rate": 0.00015687337254115485, "loss": 0.3811, "step": 12920 }, { "epoch": 1.0383055611323027, "grad_norm": 0.24476943910121918, "learning_rate": 0.00015681021703926618, "loss": 0.3321, "step": 12930 }, { "epoch": 1.0391086127283677, "grad_norm": 0.31014686822891235, "learning_rate": 0.0001567470289293325, "loss": 0.3767, "step": 12940 }, { "epoch": 1.0399116643244328, "grad_norm": 0.21728768944740295, "learning_rate": 0.00015668380825112273, "loss": 0.3738, "step": 12950 }, { "epoch": 1.040714715920498, "grad_norm": 0.2493576854467392, "learning_rate": 0.00015662055504442646, "loss": 0.3446, "step": 12960 }, { "epoch": 1.041517767516563, "grad_norm": 0.20261907577514648, "learning_rate": 0.00015655726934905357, "loss": 0.336, "step": 12970 }, { "epoch": 1.042320819112628, "grad_norm": 0.2292982041835785, "learning_rate": 0.00015649395120483455, "loss": 0.3729, "step": 12980 }, { "epoch": 1.043123870708693, "grad_norm": 0.2346952259540558, "learning_rate": 0.00015643060065162018, "loss": 0.3954, "step": 12990 }, { "epoch": 1.043926922304758, "grad_norm": 0.20658865571022034, "learning_rate": 0.0001563672177292818, "loss": 0.3654, "step": 13000 }, { "epoch": 1.0447299739008231, "grad_norm": 0.21204674243927002, "learning_rate": 0.00015630380247771096, "loss": 0.381, "step": 13010 }, { "epoch": 1.0455330254968882, "grad_norm": 0.23705600202083588, "learning_rate": 0.0001562403549368196, "loss": 0.3514, "step": 13020 }, { "epoch": 1.0463360770929533, "grad_norm": 0.22020475566387177, "learning_rate": 0.00015617687514654006, "loss": 0.3505, "step": 13030 }, { "epoch": 1.0471391286890184, "grad_norm": 0.21726463735103607, "learning_rate": 0.0001561133631468249, "loss": 0.3736, "step": 13040 }, { "epoch": 1.0479421802850832, "grad_norm": 0.22562284767627716, "learning_rate": 0.00015604981897764697, "loss": 0.3647, "step": 13050 }, { "epoch": 1.0487452318811483, "grad_norm": 0.21871407330036163, "learning_rate": 0.00015598624267899934, "loss": 0.3598, "step": 13060 }, { "epoch": 1.0495482834772134, "grad_norm": 0.2500728964805603, "learning_rate": 0.00015592263429089538, "loss": 0.3607, "step": 13070 }, { "epoch": 1.0503513350732785, "grad_norm": 0.21693852543830872, "learning_rate": 0.00015585899385336856, "loss": 0.3579, "step": 13080 }, { "epoch": 1.0511543866693436, "grad_norm": 0.23051822185516357, "learning_rate": 0.0001557953214064726, "loss": 0.3788, "step": 13090 }, { "epoch": 1.0519574382654087, "grad_norm": 0.2230662703514099, "learning_rate": 0.00015573161699028137, "loss": 0.383, "step": 13100 }, { "epoch": 1.0527604898614735, "grad_norm": 0.19634738564491272, "learning_rate": 0.00015566788064488876, "loss": 0.3696, "step": 13110 }, { "epoch": 1.0535635414575386, "grad_norm": 0.19016587734222412, "learning_rate": 0.0001556041124104088, "loss": 0.3564, "step": 13120 }, { "epoch": 1.0543665930536037, "grad_norm": 0.25980815291404724, "learning_rate": 0.0001555403123269757, "loss": 0.3505, "step": 13130 }, { "epoch": 1.0551696446496688, "grad_norm": 0.22195512056350708, "learning_rate": 0.00015547648043474352, "loss": 0.3736, "step": 13140 }, { "epoch": 1.0559726962457339, "grad_norm": 0.2237214893102646, "learning_rate": 0.00015541261677388653, "loss": 0.3861, "step": 13150 }, { "epoch": 1.0567757478417987, "grad_norm": 0.2394876778125763, "learning_rate": 0.00015534872138459887, "loss": 0.3745, "step": 13160 }, { "epoch": 1.0575787994378638, "grad_norm": 0.23559477925300598, "learning_rate": 0.0001552847943070947, "loss": 0.3692, "step": 13170 }, { "epoch": 1.058381851033929, "grad_norm": 0.2891550362110138, "learning_rate": 0.00015522083558160808, "loss": 0.3601, "step": 13180 }, { "epoch": 1.059184902629994, "grad_norm": 0.22083508968353271, "learning_rate": 0.00015515684524839308, "loss": 0.3506, "step": 13190 }, { "epoch": 1.059987954226059, "grad_norm": 0.23491860926151276, "learning_rate": 0.00015509282334772356, "loss": 0.3869, "step": 13200 }, { "epoch": 1.0607910058221242, "grad_norm": 0.2028515487909317, "learning_rate": 0.0001550287699198933, "loss": 0.3461, "step": 13210 }, { "epoch": 1.061594057418189, "grad_norm": 0.268822580575943, "learning_rate": 0.0001549646850052159, "loss": 0.3545, "step": 13220 }, { "epoch": 1.062397109014254, "grad_norm": 0.24017156660556793, "learning_rate": 0.0001549005686440248, "loss": 0.3613, "step": 13230 }, { "epoch": 1.0632001606103192, "grad_norm": 0.1946527361869812, "learning_rate": 0.0001548364208766733, "loss": 0.3522, "step": 13240 }, { "epoch": 1.0640032122063843, "grad_norm": 0.2398967146873474, "learning_rate": 0.00015477224174353425, "loss": 0.3757, "step": 13250 }, { "epoch": 1.0648062638024494, "grad_norm": 0.2923835515975952, "learning_rate": 0.00015470803128500043, "loss": 0.3531, "step": 13260 }, { "epoch": 1.0656093153985144, "grad_norm": 0.24700918793678284, "learning_rate": 0.00015464378954148432, "loss": 0.3738, "step": 13270 }, { "epoch": 1.0664123669945793, "grad_norm": 0.23645681142807007, "learning_rate": 0.00015457951655341803, "loss": 0.397, "step": 13280 }, { "epoch": 1.0672154185906444, "grad_norm": 0.26535311341285706, "learning_rate": 0.00015451521236125337, "loss": 0.3678, "step": 13290 }, { "epoch": 1.0680184701867095, "grad_norm": 0.183619424700737, "learning_rate": 0.00015445087700546178, "loss": 0.3696, "step": 13300 }, { "epoch": 1.0688215217827746, "grad_norm": 0.22632187604904175, "learning_rate": 0.0001543865105265343, "loss": 0.36, "step": 13310 }, { "epoch": 1.0696245733788396, "grad_norm": 0.24381962418556213, "learning_rate": 0.00015432211296498158, "loss": 0.3476, "step": 13320 }, { "epoch": 1.0704276249749047, "grad_norm": 0.28060051798820496, "learning_rate": 0.00015425768436133378, "loss": 0.3477, "step": 13330 }, { "epoch": 1.0712306765709696, "grad_norm": 0.22932489216327667, "learning_rate": 0.00015419322475614068, "loss": 0.3535, "step": 13340 }, { "epoch": 1.0720337281670347, "grad_norm": 0.19484078884124756, "learning_rate": 0.00015412873418997153, "loss": 0.3438, "step": 13350 }, { "epoch": 1.0728367797630998, "grad_norm": 0.19917187094688416, "learning_rate": 0.00015406421270341508, "loss": 0.3697, "step": 13360 }, { "epoch": 1.0736398313591649, "grad_norm": 0.212655708193779, "learning_rate": 0.00015399966033707952, "loss": 0.3303, "step": 13370 }, { "epoch": 1.07444288295523, "grad_norm": 0.2111903429031372, "learning_rate": 0.00015393507713159246, "loss": 0.3378, "step": 13380 }, { "epoch": 1.075245934551295, "grad_norm": 0.3275805711746216, "learning_rate": 0.00015387046312760096, "loss": 0.352, "step": 13390 }, { "epoch": 1.0760489861473599, "grad_norm": 0.26796185970306396, "learning_rate": 0.00015380581836577144, "loss": 0.3507, "step": 13400 }, { "epoch": 1.076852037743425, "grad_norm": 0.23227065801620483, "learning_rate": 0.00015374114288678966, "loss": 0.3706, "step": 13410 }, { "epoch": 1.07765508933949, "grad_norm": 0.1816856414079666, "learning_rate": 0.0001536764367313608, "loss": 0.376, "step": 13420 }, { "epoch": 1.0784581409355551, "grad_norm": 0.23689280450344086, "learning_rate": 0.00015361169994020923, "loss": 0.3453, "step": 13430 }, { "epoch": 1.0792611925316202, "grad_norm": 0.20987680554389954, "learning_rate": 0.00015354693255407865, "loss": 0.3672, "step": 13440 }, { "epoch": 1.0800642441276853, "grad_norm": 0.22906216979026794, "learning_rate": 0.00015348213461373206, "loss": 0.3696, "step": 13450 }, { "epoch": 1.0808672957237502, "grad_norm": 0.25526055693626404, "learning_rate": 0.00015341730615995162, "loss": 0.374, "step": 13460 }, { "epoch": 1.0816703473198153, "grad_norm": 0.23542800545692444, "learning_rate": 0.0001533524472335388, "loss": 0.3647, "step": 13470 }, { "epoch": 1.0824733989158803, "grad_norm": 0.24260009825229645, "learning_rate": 0.00015328755787531403, "loss": 0.3612, "step": 13480 }, { "epoch": 1.0832764505119454, "grad_norm": 0.2439613938331604, "learning_rate": 0.00015322263812611716, "loss": 0.3701, "step": 13490 }, { "epoch": 1.0840795021080105, "grad_norm": 0.22323745489120483, "learning_rate": 0.00015315768802680698, "loss": 0.37, "step": 13500 }, { "epoch": 1.0848825537040754, "grad_norm": 0.231529101729393, "learning_rate": 0.0001530927076182615, "loss": 0.3814, "step": 13510 }, { "epoch": 1.0856856053001405, "grad_norm": 0.3127823770046234, "learning_rate": 0.0001530276969413777, "loss": 0.3674, "step": 13520 }, { "epoch": 1.0864886568962056, "grad_norm": 0.2632058560848236, "learning_rate": 0.00015296265603707173, "loss": 0.3708, "step": 13530 }, { "epoch": 1.0872917084922706, "grad_norm": 0.22730830311775208, "learning_rate": 0.00015289758494627857, "loss": 0.3765, "step": 13540 }, { "epoch": 1.0880947600883357, "grad_norm": 0.2214752435684204, "learning_rate": 0.00015283248370995245, "loss": 0.3599, "step": 13550 }, { "epoch": 1.0888978116844008, "grad_norm": 0.24189117550849915, "learning_rate": 0.0001527673523690664, "loss": 0.3396, "step": 13560 }, { "epoch": 1.0897008632804657, "grad_norm": 0.31271111965179443, "learning_rate": 0.00015270219096461245, "loss": 0.3523, "step": 13570 }, { "epoch": 1.0905039148765308, "grad_norm": 0.22407500445842743, "learning_rate": 0.0001526369995376015, "loss": 0.3333, "step": 13580 }, { "epoch": 1.0913069664725958, "grad_norm": 0.22127816081047058, "learning_rate": 0.00015257177812906346, "loss": 0.3851, "step": 13590 }, { "epoch": 1.092110018068661, "grad_norm": 0.2134464681148529, "learning_rate": 0.00015250652678004703, "loss": 0.3637, "step": 13600 }, { "epoch": 1.092913069664726, "grad_norm": 0.2050197273492813, "learning_rate": 0.00015244124553161973, "loss": 0.3685, "step": 13610 }, { "epoch": 1.093716121260791, "grad_norm": 0.17919494211673737, "learning_rate": 0.00015237593442486794, "loss": 0.3394, "step": 13620 }, { "epoch": 1.094519172856856, "grad_norm": 0.2465991973876953, "learning_rate": 0.0001523105935008969, "loss": 0.3759, "step": 13630 }, { "epoch": 1.095322224452921, "grad_norm": 0.18376639485359192, "learning_rate": 0.00015224522280083046, "loss": 0.3684, "step": 13640 }, { "epoch": 1.0961252760489861, "grad_norm": 0.25137367844581604, "learning_rate": 0.00015217982236581123, "loss": 0.3887, "step": 13650 }, { "epoch": 1.0969283276450512, "grad_norm": 0.22964181005954742, "learning_rate": 0.00015211439223700076, "loss": 0.3313, "step": 13660 }, { "epoch": 1.0977313792411163, "grad_norm": 0.22041088342666626, "learning_rate": 0.00015204893245557898, "loss": 0.357, "step": 13670 }, { "epoch": 1.0985344308371814, "grad_norm": 0.2327744960784912, "learning_rate": 0.00015198344306274473, "loss": 0.3772, "step": 13680 }, { "epoch": 1.0993374824332462, "grad_norm": 0.279035747051239, "learning_rate": 0.00015191792409971532, "loss": 0.3577, "step": 13690 }, { "epoch": 1.1001405340293113, "grad_norm": 0.22921858727931976, "learning_rate": 0.00015185237560772676, "loss": 0.3823, "step": 13700 }, { "epoch": 1.1009435856253764, "grad_norm": 0.2932458519935608, "learning_rate": 0.0001517867976280336, "loss": 0.3379, "step": 13710 }, { "epoch": 1.1017466372214415, "grad_norm": 0.20722132921218872, "learning_rate": 0.000151721190201909, "loss": 0.3509, "step": 13720 }, { "epoch": 1.1025496888175066, "grad_norm": 0.21235129237174988, "learning_rate": 0.00015165555337064455, "loss": 0.3394, "step": 13730 }, { "epoch": 1.1033527404135717, "grad_norm": 0.24193906784057617, "learning_rate": 0.00015158988717555057, "loss": 0.3597, "step": 13740 }, { "epoch": 1.1041557920096365, "grad_norm": 0.2218015044927597, "learning_rate": 0.00015152419165795553, "loss": 0.3517, "step": 13750 }, { "epoch": 1.1049588436057016, "grad_norm": 0.24626179039478302, "learning_rate": 0.00015145846685920666, "loss": 0.341, "step": 13760 }, { "epoch": 1.1057618952017667, "grad_norm": 0.27104607224464417, "learning_rate": 0.0001513927128206694, "loss": 0.3666, "step": 13770 }, { "epoch": 1.1065649467978318, "grad_norm": 0.18422450125217438, "learning_rate": 0.00015132692958372775, "loss": 0.3633, "step": 13780 }, { "epoch": 1.1073679983938969, "grad_norm": 0.2028447538614273, "learning_rate": 0.00015126111718978397, "loss": 0.3675, "step": 13790 }, { "epoch": 1.108171049989962, "grad_norm": 0.24396829307079315, "learning_rate": 0.0001511952756802588, "loss": 0.3658, "step": 13800 }, { "epoch": 1.1089741015860268, "grad_norm": 0.23070470988750458, "learning_rate": 0.00015112940509659118, "loss": 0.3799, "step": 13810 }, { "epoch": 1.109777153182092, "grad_norm": 0.21535785496234894, "learning_rate": 0.00015106350548023834, "loss": 0.3844, "step": 13820 }, { "epoch": 1.110580204778157, "grad_norm": 0.25049614906311035, "learning_rate": 0.00015099757687267595, "loss": 0.3699, "step": 13830 }, { "epoch": 1.111383256374222, "grad_norm": 0.2815392017364502, "learning_rate": 0.00015093161931539772, "loss": 0.3467, "step": 13840 }, { "epoch": 1.1121863079702872, "grad_norm": 0.21765734255313873, "learning_rate": 0.00015086563284991573, "loss": 0.3382, "step": 13850 }, { "epoch": 1.112989359566352, "grad_norm": 0.22698169946670532, "learning_rate": 0.00015079961751776017, "loss": 0.362, "step": 13860 }, { "epoch": 1.1137924111624171, "grad_norm": 0.21898548305034637, "learning_rate": 0.0001507335733604794, "loss": 0.3643, "step": 13870 }, { "epoch": 1.1145954627584822, "grad_norm": 0.23856307566165924, "learning_rate": 0.00015066750041964005, "loss": 0.3596, "step": 13880 }, { "epoch": 1.1153985143545473, "grad_norm": 0.2723979353904724, "learning_rate": 0.0001506013987368267, "loss": 0.3612, "step": 13890 }, { "epoch": 1.1162015659506124, "grad_norm": 0.24160735309123993, "learning_rate": 0.00015053526835364205, "loss": 0.3621, "step": 13900 }, { "epoch": 1.1170046175466775, "grad_norm": 0.2676889896392822, "learning_rate": 0.00015046910931170696, "loss": 0.3798, "step": 13910 }, { "epoch": 1.1178076691427423, "grad_norm": 0.21599023044109344, "learning_rate": 0.0001504029216526602, "loss": 0.3673, "step": 13920 }, { "epoch": 1.1186107207388074, "grad_norm": 0.22248204052448273, "learning_rate": 0.00015033670541815867, "loss": 0.3666, "step": 13930 }, { "epoch": 1.1194137723348725, "grad_norm": 0.20671497285366058, "learning_rate": 0.00015027046064987717, "loss": 0.3504, "step": 13940 }, { "epoch": 1.1202168239309376, "grad_norm": 0.23115816712379456, "learning_rate": 0.00015020418738950848, "loss": 0.3538, "step": 13950 }, { "epoch": 1.1210198755270027, "grad_norm": 0.24249908328056335, "learning_rate": 0.00015013788567876335, "loss": 0.3872, "step": 13960 }, { "epoch": 1.1218229271230677, "grad_norm": 0.2225581556558609, "learning_rate": 0.0001500715555593704, "loss": 0.3746, "step": 13970 }, { "epoch": 1.1226259787191326, "grad_norm": 0.24591386318206787, "learning_rate": 0.00015000519707307613, "loss": 0.375, "step": 13980 }, { "epoch": 1.1234290303151977, "grad_norm": 0.23468218743801117, "learning_rate": 0.00014993881026164485, "loss": 0.3619, "step": 13990 }, { "epoch": 1.1242320819112628, "grad_norm": 0.24145445227622986, "learning_rate": 0.00014987239516685882, "loss": 0.3679, "step": 14000 }, { "epoch": 1.1250351335073279, "grad_norm": 0.20906691253185272, "learning_rate": 0.00014980595183051803, "loss": 0.3746, "step": 14010 }, { "epoch": 1.125838185103393, "grad_norm": 0.20397093892097473, "learning_rate": 0.0001497394802944402, "loss": 0.3645, "step": 14020 }, { "epoch": 1.126641236699458, "grad_norm": 0.22435276210308075, "learning_rate": 0.00014967298060046083, "loss": 0.3935, "step": 14030 }, { "epoch": 1.127444288295523, "grad_norm": 0.2134280651807785, "learning_rate": 0.0001496064527904332, "loss": 0.33, "step": 14040 }, { "epoch": 1.128247339891588, "grad_norm": 0.24475257098674774, "learning_rate": 0.00014953989690622815, "loss": 0.3609, "step": 14050 }, { "epoch": 1.129050391487653, "grad_norm": 0.24383749067783356, "learning_rate": 0.00014947331298973443, "loss": 0.3822, "step": 14060 }, { "epoch": 1.1298534430837182, "grad_norm": 0.19852443039417267, "learning_rate": 0.00014940670108285812, "loss": 0.3681, "step": 14070 }, { "epoch": 1.1306564946797832, "grad_norm": 0.21554121375083923, "learning_rate": 0.00014934006122752316, "loss": 0.3633, "step": 14080 }, { "epoch": 1.131459546275848, "grad_norm": 0.3357875943183899, "learning_rate": 0.00014927339346567094, "loss": 0.3767, "step": 14090 }, { "epoch": 1.1322625978719132, "grad_norm": 0.22130955755710602, "learning_rate": 0.00014920669783926056, "loss": 0.367, "step": 14100 }, { "epoch": 1.1330656494679783, "grad_norm": 0.24377906322479248, "learning_rate": 0.00014913997439026849, "loss": 0.35, "step": 14110 }, { "epoch": 1.1338687010640434, "grad_norm": 0.22748316824436188, "learning_rate": 0.0001490732231606888, "loss": 0.3874, "step": 14120 }, { "epoch": 1.1346717526601084, "grad_norm": 0.21977785229682922, "learning_rate": 0.00014900644419253307, "loss": 0.3649, "step": 14130 }, { "epoch": 1.1354748042561735, "grad_norm": 0.23348882794380188, "learning_rate": 0.00014893963752783026, "loss": 0.3454, "step": 14140 }, { "epoch": 1.1362778558522386, "grad_norm": 0.29077228903770447, "learning_rate": 0.00014887280320862681, "loss": 0.3856, "step": 14150 }, { "epoch": 1.1370809074483035, "grad_norm": 0.2519056499004364, "learning_rate": 0.00014880594127698657, "loss": 0.3603, "step": 14160 }, { "epoch": 1.1378839590443686, "grad_norm": 0.2090461552143097, "learning_rate": 0.00014873905177499073, "loss": 0.3673, "step": 14170 }, { "epoch": 1.1386870106404337, "grad_norm": 0.2551971971988678, "learning_rate": 0.00014867213474473786, "loss": 0.3558, "step": 14180 }, { "epoch": 1.1394900622364987, "grad_norm": 0.2116887867450714, "learning_rate": 0.0001486051902283439, "loss": 0.3493, "step": 14190 }, { "epoch": 1.1402931138325638, "grad_norm": 0.18818065524101257, "learning_rate": 0.00014853821826794198, "loss": 0.3937, "step": 14200 }, { "epoch": 1.1410961654286287, "grad_norm": 0.18982712924480438, "learning_rate": 0.0001484712189056826, "loss": 0.3637, "step": 14210 }, { "epoch": 1.1418992170246938, "grad_norm": 0.2737131118774414, "learning_rate": 0.00014840419218373348, "loss": 0.3375, "step": 14220 }, { "epoch": 1.1427022686207589, "grad_norm": 0.24221625924110413, "learning_rate": 0.0001483371381442795, "loss": 0.3643, "step": 14230 }, { "epoch": 1.143505320216824, "grad_norm": 0.2147972136735916, "learning_rate": 0.0001482700568295228, "loss": 0.3762, "step": 14240 }, { "epoch": 1.144308371812889, "grad_norm": 0.19829942286014557, "learning_rate": 0.00014820294828168273, "loss": 0.3459, "step": 14250 }, { "epoch": 1.145111423408954, "grad_norm": 0.2462172955274582, "learning_rate": 0.00014813581254299565, "loss": 0.3813, "step": 14260 }, { "epoch": 1.1459144750050192, "grad_norm": 0.2307083159685135, "learning_rate": 0.0001480686496557151, "loss": 0.3574, "step": 14270 }, { "epoch": 1.146717526601084, "grad_norm": 0.2247094213962555, "learning_rate": 0.00014800145966211177, "loss": 0.3636, "step": 14280 }, { "epoch": 1.1475205781971491, "grad_norm": 0.2455109804868698, "learning_rate": 0.00014793424260447328, "loss": 0.3615, "step": 14290 }, { "epoch": 1.1483236297932142, "grad_norm": 0.22117313742637634, "learning_rate": 0.0001478669985251044, "loss": 0.3854, "step": 14300 }, { "epoch": 1.1491266813892793, "grad_norm": 0.2130737006664276, "learning_rate": 0.00014779972746632681, "loss": 0.3629, "step": 14310 }, { "epoch": 1.1499297329853444, "grad_norm": 0.27870503067970276, "learning_rate": 0.0001477324294704793, "loss": 0.3912, "step": 14320 }, { "epoch": 1.1507327845814093, "grad_norm": 0.2788068354129791, "learning_rate": 0.00014766510457991748, "loss": 0.383, "step": 14330 }, { "epoch": 1.1515358361774743, "grad_norm": 0.2523689866065979, "learning_rate": 0.00014759775283701394, "loss": 0.3718, "step": 14340 }, { "epoch": 1.1523388877735394, "grad_norm": 0.24755209684371948, "learning_rate": 0.00014753037428415816, "loss": 0.351, "step": 14350 }, { "epoch": 1.1531419393696045, "grad_norm": 0.18657054007053375, "learning_rate": 0.00014746296896375654, "loss": 0.3586, "step": 14360 }, { "epoch": 1.1539449909656696, "grad_norm": 0.2351766675710678, "learning_rate": 0.0001473955369182322, "loss": 0.3647, "step": 14370 }, { "epoch": 1.1547480425617347, "grad_norm": 0.21045343577861786, "learning_rate": 0.0001473280781900253, "loss": 0.346, "step": 14380 }, { "epoch": 1.1555510941577996, "grad_norm": 0.20967693626880646, "learning_rate": 0.00014726059282159262, "loss": 0.3739, "step": 14390 }, { "epoch": 1.1563541457538646, "grad_norm": 0.2541661560535431, "learning_rate": 0.0001471930808554077, "loss": 0.3716, "step": 14400 }, { "epoch": 1.1571571973499297, "grad_norm": 0.22630064189434052, "learning_rate": 0.00014712554233396087, "loss": 0.3486, "step": 14410 }, { "epoch": 1.1579602489459948, "grad_norm": 0.22613471746444702, "learning_rate": 0.00014705797729975927, "loss": 0.3316, "step": 14420 }, { "epoch": 1.15876330054206, "grad_norm": 0.3030581474304199, "learning_rate": 0.0001469903857953265, "loss": 0.366, "step": 14430 }, { "epoch": 1.1595663521381248, "grad_norm": 0.2155946046113968, "learning_rate": 0.00014692276786320305, "loss": 0.3698, "step": 14440 }, { "epoch": 1.1603694037341898, "grad_norm": 0.2423085719347, "learning_rate": 0.00014685512354594592, "loss": 0.3689, "step": 14450 }, { "epoch": 1.161172455330255, "grad_norm": 0.26056626439094543, "learning_rate": 0.00014678745288612875, "loss": 0.3429, "step": 14460 }, { "epoch": 1.16197550692632, "grad_norm": 0.2114059180021286, "learning_rate": 0.00014671975592634167, "loss": 0.3553, "step": 14470 }, { "epoch": 1.162778558522385, "grad_norm": 0.24662233889102936, "learning_rate": 0.0001466520327091915, "loss": 0.3602, "step": 14480 }, { "epoch": 1.1635816101184502, "grad_norm": 0.2154703587293625, "learning_rate": 0.00014658428327730155, "loss": 0.3555, "step": 14490 }, { "epoch": 1.1643846617145153, "grad_norm": 0.2158900499343872, "learning_rate": 0.00014651650767331156, "loss": 0.3554, "step": 14500 }, { "epoch": 1.1651877133105801, "grad_norm": 0.20012107491493225, "learning_rate": 0.0001464487059398778, "loss": 0.3576, "step": 14510 }, { "epoch": 1.1659907649066452, "grad_norm": 0.2176371067762375, "learning_rate": 0.00014638087811967297, "loss": 0.364, "step": 14520 }, { "epoch": 1.1667938165027103, "grad_norm": 0.2517901659011841, "learning_rate": 0.0001463130242553862, "loss": 0.3619, "step": 14530 }, { "epoch": 1.1675968680987754, "grad_norm": 0.20934440195560455, "learning_rate": 0.000146245144389723, "loss": 0.3644, "step": 14540 }, { "epoch": 1.1683999196948405, "grad_norm": 0.2567499577999115, "learning_rate": 0.00014617723856540527, "loss": 0.3605, "step": 14550 }, { "epoch": 1.1692029712909053, "grad_norm": 0.36634233593940735, "learning_rate": 0.00014610930682517118, "loss": 0.3622, "step": 14560 }, { "epoch": 1.1700060228869704, "grad_norm": 0.2587893307209015, "learning_rate": 0.0001460413492117753, "loss": 0.3636, "step": 14570 }, { "epoch": 1.1708090744830355, "grad_norm": 0.20777659118175507, "learning_rate": 0.0001459733657679884, "loss": 0.3668, "step": 14580 }, { "epoch": 1.1716121260791006, "grad_norm": 0.24207395315170288, "learning_rate": 0.00014590535653659757, "loss": 0.3548, "step": 14590 }, { "epoch": 1.1724151776751657, "grad_norm": 0.2275397628545761, "learning_rate": 0.00014583732156040608, "loss": 0.3727, "step": 14600 }, { "epoch": 1.1732182292712308, "grad_norm": 0.22451961040496826, "learning_rate": 0.00014576926088223348, "loss": 0.3584, "step": 14610 }, { "epoch": 1.1740212808672958, "grad_norm": 0.21630235016345978, "learning_rate": 0.00014570117454491537, "loss": 0.3357, "step": 14620 }, { "epoch": 1.1748243324633607, "grad_norm": 0.21322311460971832, "learning_rate": 0.00014563306259130366, "loss": 0.3542, "step": 14630 }, { "epoch": 1.1756273840594258, "grad_norm": 0.26671719551086426, "learning_rate": 0.00014556492506426625, "loss": 0.3603, "step": 14640 }, { "epoch": 1.1764304356554909, "grad_norm": 0.2702086567878723, "learning_rate": 0.0001454967620066872, "loss": 0.3773, "step": 14650 }, { "epoch": 1.177233487251556, "grad_norm": 0.21533091366291046, "learning_rate": 0.0001454285734614666, "loss": 0.3866, "step": 14660 }, { "epoch": 1.178036538847621, "grad_norm": 0.24762392044067383, "learning_rate": 0.0001453603594715206, "loss": 0.3842, "step": 14670 }, { "epoch": 1.178839590443686, "grad_norm": 0.22172954678535461, "learning_rate": 0.00014529212007978137, "loss": 0.3798, "step": 14680 }, { "epoch": 1.179642642039751, "grad_norm": 0.27102819085121155, "learning_rate": 0.00014522385532919708, "loss": 0.3676, "step": 14690 }, { "epoch": 1.180445693635816, "grad_norm": 0.2441357970237732, "learning_rate": 0.00014515556526273183, "loss": 0.3814, "step": 14700 }, { "epoch": 1.1812487452318812, "grad_norm": 0.1937299370765686, "learning_rate": 0.0001450872499233657, "loss": 0.3776, "step": 14710 }, { "epoch": 1.1820517968279463, "grad_norm": 0.18980099260807037, "learning_rate": 0.00014501890935409462, "loss": 0.3617, "step": 14720 }, { "epoch": 1.1828548484240113, "grad_norm": 0.21997912228107452, "learning_rate": 0.00014495054359793037, "loss": 0.332, "step": 14730 }, { "epoch": 1.1836579000200762, "grad_norm": 0.5819276571273804, "learning_rate": 0.00014488215269790072, "loss": 0.3722, "step": 14740 }, { "epoch": 1.1844609516161413, "grad_norm": 0.1946289986371994, "learning_rate": 0.0001448137366970491, "loss": 0.351, "step": 14750 }, { "epoch": 1.1852640032122064, "grad_norm": 0.24936150014400482, "learning_rate": 0.00014474529563843493, "loss": 0.3525, "step": 14760 }, { "epoch": 1.1860670548082715, "grad_norm": 0.24470986425876617, "learning_rate": 0.0001446768295651332, "loss": 0.3444, "step": 14770 }, { "epoch": 1.1868701064043365, "grad_norm": 0.24420595169067383, "learning_rate": 0.00014460833852023475, "loss": 0.3856, "step": 14780 }, { "epoch": 1.1876731580004014, "grad_norm": 0.21621857583522797, "learning_rate": 0.00014453982254684604, "loss": 0.3361, "step": 14790 }, { "epoch": 1.1884762095964665, "grad_norm": 0.25413256883621216, "learning_rate": 0.00014447128168808946, "loss": 0.3635, "step": 14800 }, { "epoch": 1.1892792611925316, "grad_norm": 0.2027168869972229, "learning_rate": 0.00014440271598710277, "loss": 0.3461, "step": 14810 }, { "epoch": 1.1900823127885967, "grad_norm": 0.2521907687187195, "learning_rate": 0.00014433412548703955, "loss": 0.3564, "step": 14820 }, { "epoch": 1.1908853643846617, "grad_norm": 0.2257986217737198, "learning_rate": 0.0001442655102310689, "loss": 0.3398, "step": 14830 }, { "epoch": 1.1916884159807268, "grad_norm": 0.26712262630462646, "learning_rate": 0.0001441968702623756, "loss": 0.3778, "step": 14840 }, { "epoch": 1.192491467576792, "grad_norm": 0.23575453460216522, "learning_rate": 0.00014412820562415983, "loss": 0.369, "step": 14850 }, { "epoch": 1.1932945191728568, "grad_norm": 0.1903998851776123, "learning_rate": 0.0001440595163596374, "loss": 0.3525, "step": 14860 }, { "epoch": 1.1940975707689219, "grad_norm": 0.20865698158740997, "learning_rate": 0.00014399080251203966, "loss": 0.3807, "step": 14870 }, { "epoch": 1.194900622364987, "grad_norm": 0.214115709066391, "learning_rate": 0.0001439220641246133, "loss": 0.3538, "step": 14880 }, { "epoch": 1.195703673961052, "grad_norm": 0.2079162448644638, "learning_rate": 0.00014385330124062063, "loss": 0.3708, "step": 14890 }, { "epoch": 1.1965067255571171, "grad_norm": 0.22562040388584137, "learning_rate": 0.00014378451390333917, "loss": 0.3774, "step": 14900 }, { "epoch": 1.197309777153182, "grad_norm": 0.22855807840824127, "learning_rate": 0.00014371570215606201, "loss": 0.3776, "step": 14910 }, { "epoch": 1.198112828749247, "grad_norm": 0.23253005743026733, "learning_rate": 0.00014364686604209745, "loss": 0.3652, "step": 14920 }, { "epoch": 1.1989158803453122, "grad_norm": 0.21309100091457367, "learning_rate": 0.00014357800560476928, "loss": 0.3635, "step": 14930 }, { "epoch": 1.1997189319413772, "grad_norm": 0.2514764964580536, "learning_rate": 0.0001435091208874165, "loss": 0.366, "step": 14940 }, { "epoch": 1.2005219835374423, "grad_norm": 0.23203322291374207, "learning_rate": 0.00014344021193339346, "loss": 0.3918, "step": 14950 }, { "epoch": 1.2013250351335074, "grad_norm": 0.2711852788925171, "learning_rate": 0.0001433712787860696, "loss": 0.3633, "step": 14960 }, { "epoch": 1.2021280867295725, "grad_norm": 0.18636895716190338, "learning_rate": 0.00014330232148882982, "loss": 0.3619, "step": 14970 }, { "epoch": 1.2029311383256374, "grad_norm": 0.19680193066596985, "learning_rate": 0.00014323334008507405, "loss": 0.36, "step": 14980 }, { "epoch": 1.2037341899217024, "grad_norm": 0.2117217481136322, "learning_rate": 0.00014316433461821748, "loss": 0.363, "step": 14990 }, { "epoch": 1.2045372415177675, "grad_norm": 0.20111291110515594, "learning_rate": 0.0001430953051316904, "loss": 0.3713, "step": 15000 }, { "epoch": 1.2053402931138326, "grad_norm": 0.20356842875480652, "learning_rate": 0.00014302625166893817, "loss": 0.3711, "step": 15010 }, { "epoch": 1.2061433447098977, "grad_norm": 0.20449994504451752, "learning_rate": 0.0001429571742734214, "loss": 0.357, "step": 15020 }, { "epoch": 1.2069463963059626, "grad_norm": 0.2072397619485855, "learning_rate": 0.00014288807298861562, "loss": 0.3563, "step": 15030 }, { "epoch": 1.2077494479020277, "grad_norm": 0.22128519415855408, "learning_rate": 0.0001428189478580114, "loss": 0.3794, "step": 15040 }, { "epoch": 1.2085524994980927, "grad_norm": 0.26738592982292175, "learning_rate": 0.00014274979892511442, "loss": 0.3684, "step": 15050 }, { "epoch": 1.2093555510941578, "grad_norm": 0.24843327701091766, "learning_rate": 0.00014268062623344524, "loss": 0.3488, "step": 15060 }, { "epoch": 1.210158602690223, "grad_norm": 0.2085292786359787, "learning_rate": 0.0001426114298265394, "loss": 0.3764, "step": 15070 }, { "epoch": 1.210961654286288, "grad_norm": 0.21214967966079712, "learning_rate": 0.0001425422097479474, "loss": 0.365, "step": 15080 }, { "epoch": 1.2117647058823529, "grad_norm": 0.24312300980091095, "learning_rate": 0.0001424729660412346, "loss": 0.3725, "step": 15090 }, { "epoch": 1.212567757478418, "grad_norm": 0.22964461147785187, "learning_rate": 0.00014240369874998123, "loss": 0.3787, "step": 15100 }, { "epoch": 1.213370809074483, "grad_norm": 0.27038419246673584, "learning_rate": 0.0001423344079177824, "loss": 0.373, "step": 15110 }, { "epoch": 1.214173860670548, "grad_norm": 0.22390320897102356, "learning_rate": 0.000142265093588248, "loss": 0.3384, "step": 15120 }, { "epoch": 1.2149769122666132, "grad_norm": 0.25446817278862, "learning_rate": 0.00014219575580500272, "loss": 0.3558, "step": 15130 }, { "epoch": 1.215779963862678, "grad_norm": 0.24429598450660706, "learning_rate": 0.00014212639461168606, "loss": 0.3424, "step": 15140 }, { "epoch": 1.2165830154587431, "grad_norm": 0.22902189195156097, "learning_rate": 0.00014205701005195215, "loss": 0.3777, "step": 15150 }, { "epoch": 1.2173860670548082, "grad_norm": 0.21569588780403137, "learning_rate": 0.0001419876021694699, "loss": 0.3549, "step": 15160 }, { "epoch": 1.2181891186508733, "grad_norm": 0.25858959555625916, "learning_rate": 0.0001419181710079229, "loss": 0.3798, "step": 15170 }, { "epoch": 1.2189921702469384, "grad_norm": 0.17341583967208862, "learning_rate": 0.00014184871661100933, "loss": 0.3618, "step": 15180 }, { "epoch": 1.2197952218430035, "grad_norm": 0.25263941287994385, "learning_rate": 0.00014177923902244208, "loss": 0.3609, "step": 15190 }, { "epoch": 1.2205982734390686, "grad_norm": 0.2502388060092926, "learning_rate": 0.00014170973828594858, "loss": 0.3734, "step": 15200 }, { "epoch": 1.2214013250351334, "grad_norm": 0.24805329740047455, "learning_rate": 0.00014164021444527087, "loss": 0.3409, "step": 15210 }, { "epoch": 1.2222043766311985, "grad_norm": 0.22757889330387115, "learning_rate": 0.00014157066754416544, "loss": 0.3533, "step": 15220 }, { "epoch": 1.2230074282272636, "grad_norm": 0.2058122158050537, "learning_rate": 0.0001415010976264034, "loss": 0.3756, "step": 15230 }, { "epoch": 1.2238104798233287, "grad_norm": 0.2572817802429199, "learning_rate": 0.0001414315047357703, "loss": 0.3697, "step": 15240 }, { "epoch": 1.2246135314193938, "grad_norm": 0.2558475136756897, "learning_rate": 0.00014136188891606614, "loss": 0.3438, "step": 15250 }, { "epoch": 1.2254165830154586, "grad_norm": 0.22284477949142456, "learning_rate": 0.00014129225021110534, "loss": 0.3559, "step": 15260 }, { "epoch": 1.2262196346115237, "grad_norm": 0.23896591365337372, "learning_rate": 0.00014122258866471684, "loss": 0.3509, "step": 15270 }, { "epoch": 1.2270226862075888, "grad_norm": 0.2543688416481018, "learning_rate": 0.00014115290432074377, "loss": 0.3493, "step": 15280 }, { "epoch": 1.227825737803654, "grad_norm": 0.1911967545747757, "learning_rate": 0.00014108319722304374, "loss": 0.3296, "step": 15290 }, { "epoch": 1.228628789399719, "grad_norm": 0.2820916771888733, "learning_rate": 0.0001410134674154886, "loss": 0.3928, "step": 15300 }, { "epoch": 1.229431840995784, "grad_norm": 0.23577670753002167, "learning_rate": 0.00014094371494196458, "loss": 0.3641, "step": 15310 }, { "epoch": 1.2302348925918491, "grad_norm": 0.2493124008178711, "learning_rate": 0.0001408739398463721, "loss": 0.3738, "step": 15320 }, { "epoch": 1.231037944187914, "grad_norm": 0.23139214515686035, "learning_rate": 0.00014080414217262587, "loss": 0.3977, "step": 15330 }, { "epoch": 1.231840995783979, "grad_norm": 0.25773924589157104, "learning_rate": 0.0001407343219646548, "loss": 0.3515, "step": 15340 }, { "epoch": 1.2326440473800442, "grad_norm": 0.20812846720218658, "learning_rate": 0.0001406644792664019, "loss": 0.3514, "step": 15350 }, { "epoch": 1.2334470989761093, "grad_norm": 0.21223288774490356, "learning_rate": 0.00014059461412182448, "loss": 0.366, "step": 15360 }, { "epoch": 1.2342501505721744, "grad_norm": 0.3149060010910034, "learning_rate": 0.00014052472657489386, "loss": 0.358, "step": 15370 }, { "epoch": 1.2350532021682392, "grad_norm": 0.20357945561408997, "learning_rate": 0.00014045481666959554, "loss": 0.3425, "step": 15380 }, { "epoch": 1.2358562537643043, "grad_norm": 0.22117449343204498, "learning_rate": 0.00014038488444992897, "loss": 0.36, "step": 15390 }, { "epoch": 1.2366593053603694, "grad_norm": 0.17473256587982178, "learning_rate": 0.0001403149299599078, "loss": 0.3471, "step": 15400 }, { "epoch": 1.2374623569564345, "grad_norm": 0.21033895015716553, "learning_rate": 0.00014024495324355962, "loss": 0.3554, "step": 15410 }, { "epoch": 1.2382654085524996, "grad_norm": 0.20606380701065063, "learning_rate": 0.000140174954344926, "loss": 0.3692, "step": 15420 }, { "epoch": 1.2390684601485646, "grad_norm": 0.17048214375972748, "learning_rate": 0.00014010493330806245, "loss": 0.3544, "step": 15430 }, { "epoch": 1.2398715117446295, "grad_norm": 0.2535124719142914, "learning_rate": 0.0001400348901770385, "loss": 0.3527, "step": 15440 }, { "epoch": 1.2406745633406946, "grad_norm": 0.22880057990550995, "learning_rate": 0.0001399648249959375, "loss": 0.359, "step": 15450 }, { "epoch": 1.2414776149367597, "grad_norm": 0.21684002876281738, "learning_rate": 0.00013989473780885673, "loss": 0.3567, "step": 15460 }, { "epoch": 1.2422806665328248, "grad_norm": 0.2602676749229431, "learning_rate": 0.0001398246286599073, "loss": 0.377, "step": 15470 }, { "epoch": 1.2430837181288898, "grad_norm": 0.21793153882026672, "learning_rate": 0.0001397544975932141, "loss": 0.3414, "step": 15480 }, { "epoch": 1.2438867697249547, "grad_norm": 0.20635658502578735, "learning_rate": 0.00013968434465291585, "loss": 0.3481, "step": 15490 }, { "epoch": 1.2446898213210198, "grad_norm": 0.2214379459619522, "learning_rate": 0.00013961416988316516, "loss": 0.3711, "step": 15500 }, { "epoch": 1.2454928729170849, "grad_norm": 0.22070766985416412, "learning_rate": 0.0001395439733281281, "loss": 0.342, "step": 15510 }, { "epoch": 1.24629592451315, "grad_norm": 0.2108486443758011, "learning_rate": 0.00013947375503198475, "loss": 0.3504, "step": 15520 }, { "epoch": 1.247098976109215, "grad_norm": 0.2077271044254303, "learning_rate": 0.0001394035150389287, "loss": 0.36, "step": 15530 }, { "epoch": 1.2479020277052801, "grad_norm": 0.2359362542629242, "learning_rate": 0.00013933325339316713, "loss": 0.3814, "step": 15540 }, { "epoch": 1.2487050793013452, "grad_norm": 0.2579987347126007, "learning_rate": 0.0001392629701389211, "loss": 0.3611, "step": 15550 }, { "epoch": 1.24950813089741, "grad_norm": 0.21965356171131134, "learning_rate": 0.00013919266532042498, "loss": 0.3721, "step": 15560 }, { "epoch": 1.2503111824934752, "grad_norm": 0.21819597482681274, "learning_rate": 0.00013912233898192696, "loss": 0.3892, "step": 15570 }, { "epoch": 1.2511142340895403, "grad_norm": 0.2274889498949051, "learning_rate": 0.00013905199116768862, "loss": 0.3798, "step": 15580 }, { "epoch": 1.2519172856856053, "grad_norm": 0.2253030687570572, "learning_rate": 0.0001389816219219851, "loss": 0.3678, "step": 15590 }, { "epoch": 1.2527203372816704, "grad_norm": 0.22527995705604553, "learning_rate": 0.00013891123128910505, "loss": 0.3653, "step": 15600 }, { "epoch": 1.2535233888777353, "grad_norm": 0.2373007833957672, "learning_rate": 0.00013884081931335053, "loss": 0.3573, "step": 15610 }, { "epoch": 1.2543264404738004, "grad_norm": 0.22605758905410767, "learning_rate": 0.00013877038603903708, "loss": 0.3593, "step": 15620 }, { "epoch": 1.2551294920698655, "grad_norm": 0.2471255511045456, "learning_rate": 0.00013869993151049363, "loss": 0.35, "step": 15630 }, { "epoch": 1.2559325436659305, "grad_norm": 0.22890664637088776, "learning_rate": 0.0001386294557720625, "loss": 0.3597, "step": 15640 }, { "epoch": 1.2567355952619956, "grad_norm": 0.26582443714141846, "learning_rate": 0.0001385589588680993, "loss": 0.3585, "step": 15650 }, { "epoch": 1.2575386468580607, "grad_norm": 0.23453570902347565, "learning_rate": 0.00013848844084297305, "loss": 0.359, "step": 15660 }, { "epoch": 1.2583416984541258, "grad_norm": 0.21347646415233612, "learning_rate": 0.000138417901741066, "loss": 0.3512, "step": 15670 }, { "epoch": 1.2591447500501907, "grad_norm": 0.2309814840555191, "learning_rate": 0.00013834734160677372, "loss": 0.3528, "step": 15680 }, { "epoch": 1.2599478016462557, "grad_norm": 0.2207341194152832, "learning_rate": 0.00013827676048450488, "loss": 0.3959, "step": 15690 }, { "epoch": 1.2607508532423208, "grad_norm": 0.1722925454378128, "learning_rate": 0.00013820615841868156, "loss": 0.3177, "step": 15700 }, { "epoch": 1.261553904838386, "grad_norm": 0.2578652799129486, "learning_rate": 0.00013813553545373886, "loss": 0.3907, "step": 15710 }, { "epoch": 1.2623569564344508, "grad_norm": 0.24126489460468292, "learning_rate": 0.00013806489163412515, "loss": 0.3606, "step": 15720 }, { "epoch": 1.2631600080305159, "grad_norm": 0.22154182195663452, "learning_rate": 0.0001379942270043018, "loss": 0.3704, "step": 15730 }, { "epoch": 1.263963059626581, "grad_norm": 0.30037689208984375, "learning_rate": 0.00013792354160874342, "loss": 0.3665, "step": 15740 }, { "epoch": 1.264766111222646, "grad_norm": 0.27220413088798523, "learning_rate": 0.00013785283549193754, "loss": 0.3703, "step": 15750 }, { "epoch": 1.2655691628187111, "grad_norm": 0.24087250232696533, "learning_rate": 0.0001377821086983849, "loss": 0.3683, "step": 15760 }, { "epoch": 1.2663722144147762, "grad_norm": 0.23698106408119202, "learning_rate": 0.00013771136127259907, "loss": 0.3728, "step": 15770 }, { "epoch": 1.2671752660108413, "grad_norm": 0.31331947445869446, "learning_rate": 0.0001376405932591068, "loss": 0.37, "step": 15780 }, { "epoch": 1.2679783176069064, "grad_norm": 0.2114751935005188, "learning_rate": 0.0001375698047024476, "loss": 0.3913, "step": 15790 }, { "epoch": 1.2687813692029712, "grad_norm": 0.21313492953777313, "learning_rate": 0.00013749899564717406, "loss": 0.3539, "step": 15800 }, { "epoch": 1.2695844207990363, "grad_norm": 0.20947280526161194, "learning_rate": 0.0001374281661378515, "loss": 0.338, "step": 15810 }, { "epoch": 1.2703874723951014, "grad_norm": 0.20294860005378723, "learning_rate": 0.00013735731621905843, "loss": 0.3727, "step": 15820 }, { "epoch": 1.2711905239911665, "grad_norm": 0.20112484693527222, "learning_rate": 0.00013728644593538587, "loss": 0.3554, "step": 15830 }, { "epoch": 1.2719935755872314, "grad_norm": 0.20582254230976105, "learning_rate": 0.00013721555533143778, "loss": 0.36, "step": 15840 }, { "epoch": 1.2727966271832964, "grad_norm": 0.24831810593605042, "learning_rate": 0.00013714464445183103, "loss": 0.3485, "step": 15850 }, { "epoch": 1.2735996787793615, "grad_norm": 0.19863009452819824, "learning_rate": 0.00013707371334119499, "loss": 0.4039, "step": 15860 }, { "epoch": 1.2744027303754266, "grad_norm": 0.22307097911834717, "learning_rate": 0.00013700276204417201, "loss": 0.3835, "step": 15870 }, { "epoch": 1.2752057819714917, "grad_norm": 0.20181068778038025, "learning_rate": 0.00013693179060541707, "loss": 0.3862, "step": 15880 }, { "epoch": 1.2760088335675568, "grad_norm": 0.2411166876554489, "learning_rate": 0.0001368607990695977, "loss": 0.3632, "step": 15890 }, { "epoch": 1.2768118851636219, "grad_norm": 0.2259128838777542, "learning_rate": 0.00013678978748139428, "loss": 0.3538, "step": 15900 }, { "epoch": 1.2776149367596867, "grad_norm": 0.21668501198291779, "learning_rate": 0.00013671875588549967, "loss": 0.3569, "step": 15910 }, { "epoch": 1.2784179883557518, "grad_norm": 0.18253757059574127, "learning_rate": 0.00013664770432661937, "loss": 0.3427, "step": 15920 }, { "epoch": 1.279221039951817, "grad_norm": 0.21372996270656586, "learning_rate": 0.0001365766328494715, "loss": 0.3665, "step": 15930 }, { "epoch": 1.280024091547882, "grad_norm": 0.2246398776769638, "learning_rate": 0.00013650554149878655, "loss": 0.3626, "step": 15940 }, { "epoch": 1.280827143143947, "grad_norm": 0.21646837890148163, "learning_rate": 0.00013643443031930772, "loss": 0.3764, "step": 15950 }, { "epoch": 1.281630194740012, "grad_norm": 0.19838599860668182, "learning_rate": 0.00013636329935579053, "loss": 0.3762, "step": 15960 }, { "epoch": 1.282433246336077, "grad_norm": 0.2852643132209778, "learning_rate": 0.00013629214865300304, "loss": 0.3622, "step": 15970 }, { "epoch": 1.2832362979321421, "grad_norm": 0.2471650391817093, "learning_rate": 0.00013622097825572574, "loss": 0.354, "step": 15980 }, { "epoch": 1.2840393495282072, "grad_norm": 0.24565647542476654, "learning_rate": 0.00013614978820875143, "loss": 0.3881, "step": 15990 }, { "epoch": 1.2848424011242723, "grad_norm": 0.2175855040550232, "learning_rate": 0.00013607857855688535, "loss": 0.355, "step": 16000 }, { "epoch": 1.2856454527203374, "grad_norm": 0.25790339708328247, "learning_rate": 0.0001360073493449451, "loss": 0.355, "step": 16010 }, { "epoch": 1.2864485043164025, "grad_norm": 0.24982036650180817, "learning_rate": 0.00013593610061776048, "loss": 0.3479, "step": 16020 }, { "epoch": 1.2872515559124673, "grad_norm": 0.20853908360004425, "learning_rate": 0.0001358648324201737, "loss": 0.3635, "step": 16030 }, { "epoch": 1.2880546075085324, "grad_norm": 0.22436121106147766, "learning_rate": 0.00013579354479703915, "loss": 0.362, "step": 16040 }, { "epoch": 1.2888576591045975, "grad_norm": 0.1992465704679489, "learning_rate": 0.00013572223779322347, "loss": 0.3749, "step": 16050 }, { "epoch": 1.2896607107006626, "grad_norm": 0.23962196707725525, "learning_rate": 0.00013565091145360548, "loss": 0.3571, "step": 16060 }, { "epoch": 1.2904637622967274, "grad_norm": 0.21661421656608582, "learning_rate": 0.00013557956582307615, "loss": 0.3477, "step": 16070 }, { "epoch": 1.2912668138927925, "grad_norm": 0.23265664279460907, "learning_rate": 0.0001355082009465387, "loss": 0.3814, "step": 16080 }, { "epoch": 1.2920698654888576, "grad_norm": 0.29565221071243286, "learning_rate": 0.00013543681686890833, "loss": 0.3588, "step": 16090 }, { "epoch": 1.2928729170849227, "grad_norm": 0.2224903255701065, "learning_rate": 0.0001353654136351124, "loss": 0.3864, "step": 16100 }, { "epoch": 1.2936759686809878, "grad_norm": 0.2251744568347931, "learning_rate": 0.00013529399129009028, "loss": 0.3464, "step": 16110 }, { "epoch": 1.2944790202770529, "grad_norm": 0.22255858778953552, "learning_rate": 0.00013522254987879343, "loss": 0.3906, "step": 16120 }, { "epoch": 1.295282071873118, "grad_norm": 0.22464625537395477, "learning_rate": 0.00013515108944618523, "loss": 0.3611, "step": 16130 }, { "epoch": 1.296085123469183, "grad_norm": 0.24506863951683044, "learning_rate": 0.00013507961003724115, "loss": 0.3773, "step": 16140 }, { "epoch": 1.296888175065248, "grad_norm": 0.3113899528980255, "learning_rate": 0.00013500811169694845, "loss": 0.3639, "step": 16150 }, { "epoch": 1.297691226661313, "grad_norm": 0.20690518617630005, "learning_rate": 0.00013493659447030645, "loss": 0.3451, "step": 16160 }, { "epoch": 1.298494278257378, "grad_norm": 0.24398499727249146, "learning_rate": 0.0001348650584023262, "loss": 0.3844, "step": 16170 }, { "epoch": 1.2992973298534432, "grad_norm": 0.2109779566526413, "learning_rate": 0.00013479350353803077, "loss": 0.3467, "step": 16180 }, { "epoch": 1.300100381449508, "grad_norm": 0.25023457407951355, "learning_rate": 0.000134721929922455, "loss": 0.3827, "step": 16190 }, { "epoch": 1.300903433045573, "grad_norm": 0.2708590030670166, "learning_rate": 0.00013465033760064546, "loss": 0.3663, "step": 16200 }, { "epoch": 1.3017064846416382, "grad_norm": 0.20561988651752472, "learning_rate": 0.0001345787266176606, "loss": 0.357, "step": 16210 }, { "epoch": 1.3025095362377033, "grad_norm": 0.2450905293226242, "learning_rate": 0.00013450709701857055, "loss": 0.3652, "step": 16220 }, { "epoch": 1.3033125878337684, "grad_norm": 0.23391218483448029, "learning_rate": 0.00013443544884845718, "loss": 0.359, "step": 16230 }, { "epoch": 1.3041156394298334, "grad_norm": 0.29627516865730286, "learning_rate": 0.00013436378215241402, "loss": 0.377, "step": 16240 }, { "epoch": 1.3049186910258985, "grad_norm": 0.2352074235677719, "learning_rate": 0.00013429209697554632, "loss": 0.3665, "step": 16250 }, { "epoch": 1.3057217426219634, "grad_norm": 0.24724067747592926, "learning_rate": 0.0001342203933629709, "loss": 0.3577, "step": 16260 }, { "epoch": 1.3065247942180285, "grad_norm": 0.2043708711862564, "learning_rate": 0.00013414867135981624, "loss": 0.3554, "step": 16270 }, { "epoch": 1.3073278458140936, "grad_norm": 0.22650738060474396, "learning_rate": 0.0001340769310112223, "loss": 0.395, "step": 16280 }, { "epoch": 1.3081308974101586, "grad_norm": 0.23204390704631805, "learning_rate": 0.00013400517236234074, "loss": 0.3702, "step": 16290 }, { "epoch": 1.3089339490062237, "grad_norm": 0.2733776569366455, "learning_rate": 0.00013393339545833454, "loss": 0.3431, "step": 16300 }, { "epoch": 1.3097370006022886, "grad_norm": 0.19391649961471558, "learning_rate": 0.00013386160034437833, "loss": 0.3434, "step": 16310 }, { "epoch": 1.3105400521983537, "grad_norm": 0.19205684959888458, "learning_rate": 0.00013378978706565813, "loss": 0.3407, "step": 16320 }, { "epoch": 1.3113431037944188, "grad_norm": 0.2234421670436859, "learning_rate": 0.00013371795566737143, "loss": 0.3486, "step": 16330 }, { "epoch": 1.3121461553904838, "grad_norm": 0.20240291953086853, "learning_rate": 0.00013364610619472707, "loss": 0.3292, "step": 16340 }, { "epoch": 1.312949206986549, "grad_norm": 0.2354898750782013, "learning_rate": 0.0001335742386929454, "loss": 0.3621, "step": 16350 }, { "epoch": 1.313752258582614, "grad_norm": 0.24531564116477966, "learning_rate": 0.00013350235320725784, "loss": 0.3416, "step": 16360 }, { "epoch": 1.314555310178679, "grad_norm": 0.2663232386112213, "learning_rate": 0.00013343044978290743, "loss": 0.3622, "step": 16370 }, { "epoch": 1.315358361774744, "grad_norm": 0.22507476806640625, "learning_rate": 0.00013335852846514833, "loss": 0.3721, "step": 16380 }, { "epoch": 1.316161413370809, "grad_norm": 0.2686161398887634, "learning_rate": 0.000133286589299246, "loss": 0.3562, "step": 16390 }, { "epoch": 1.3169644649668741, "grad_norm": 0.24456973373889923, "learning_rate": 0.00013321463233047717, "loss": 0.3629, "step": 16400 }, { "epoch": 1.3177675165629392, "grad_norm": 0.23876982927322388, "learning_rate": 0.00013314265760412973, "loss": 0.3655, "step": 16410 }, { "epoch": 1.318570568159004, "grad_norm": 0.21368640661239624, "learning_rate": 0.00013307066516550275, "loss": 0.3406, "step": 16420 }, { "epoch": 1.3193736197550692, "grad_norm": 0.20818422734737396, "learning_rate": 0.00013299865505990644, "loss": 0.3785, "step": 16430 }, { "epoch": 1.3201766713511343, "grad_norm": 0.2641404867172241, "learning_rate": 0.00013292662733266222, "loss": 0.3834, "step": 16440 }, { "epoch": 1.3209797229471993, "grad_norm": 0.22542397677898407, "learning_rate": 0.0001328545820291024, "loss": 0.3524, "step": 16450 }, { "epoch": 1.3217827745432644, "grad_norm": 0.23181335628032684, "learning_rate": 0.0001327825191945706, "loss": 0.3716, "step": 16460 }, { "epoch": 1.3225858261393295, "grad_norm": 0.25388774275779724, "learning_rate": 0.00013271043887442128, "loss": 0.377, "step": 16470 }, { "epoch": 1.3233888777353946, "grad_norm": 0.23251797258853912, "learning_rate": 0.00013263834111402003, "loss": 0.3575, "step": 16480 }, { "epoch": 1.3241919293314597, "grad_norm": 0.22272297739982605, "learning_rate": 0.00013256622595874328, "loss": 0.3755, "step": 16490 }, { "epoch": 1.3249949809275245, "grad_norm": 0.23828798532485962, "learning_rate": 0.00013249409345397861, "loss": 0.3538, "step": 16500 }, { "epoch": 1.3257980325235896, "grad_norm": 0.19673992693424225, "learning_rate": 0.00013242194364512428, "loss": 0.3662, "step": 16510 }, { "epoch": 1.3266010841196547, "grad_norm": 0.22187326848506927, "learning_rate": 0.00013234977657758964, "loss": 0.3628, "step": 16520 }, { "epoch": 1.3274041357157198, "grad_norm": 0.21137841045856476, "learning_rate": 0.0001322775922967948, "loss": 0.3335, "step": 16530 }, { "epoch": 1.3282071873117847, "grad_norm": 0.2985902428627014, "learning_rate": 0.00013220539084817076, "loss": 0.3478, "step": 16540 }, { "epoch": 1.3290102389078498, "grad_norm": 0.21824900805950165, "learning_rate": 0.00013213317227715924, "loss": 0.3463, "step": 16550 }, { "epoch": 1.3298132905039148, "grad_norm": 0.2685167193412781, "learning_rate": 0.0001320609366292128, "loss": 0.3635, "step": 16560 }, { "epoch": 1.33061634209998, "grad_norm": 0.2492697536945343, "learning_rate": 0.00013198868394979476, "loss": 0.3634, "step": 16570 }, { "epoch": 1.331419393696045, "grad_norm": 0.2336621880531311, "learning_rate": 0.00013191641428437912, "loss": 0.389, "step": 16580 }, { "epoch": 1.33222244529211, "grad_norm": 0.2557229697704315, "learning_rate": 0.00013184412767845057, "loss": 0.3442, "step": 16590 }, { "epoch": 1.3330254968881752, "grad_norm": 0.22554460167884827, "learning_rate": 0.00013177182417750452, "loss": 0.3501, "step": 16600 }, { "epoch": 1.33382854848424, "grad_norm": 0.24208523333072662, "learning_rate": 0.00013169950382704694, "loss": 0.3634, "step": 16610 }, { "epoch": 1.3346316000803051, "grad_norm": 0.19028149545192719, "learning_rate": 0.00013162716667259445, "loss": 0.3714, "step": 16620 }, { "epoch": 1.3354346516763702, "grad_norm": 0.2475832849740982, "learning_rate": 0.00013155481275967424, "loss": 0.343, "step": 16630 }, { "epoch": 1.3362377032724353, "grad_norm": 0.23119062185287476, "learning_rate": 0.00013148244213382398, "loss": 0.3906, "step": 16640 }, { "epoch": 1.3370407548685004, "grad_norm": 0.24095498025417328, "learning_rate": 0.00013141005484059202, "loss": 0.368, "step": 16650 }, { "epoch": 1.3378438064645652, "grad_norm": 0.24269172549247742, "learning_rate": 0.00013133765092553697, "loss": 0.3406, "step": 16660 }, { "epoch": 1.3386468580606303, "grad_norm": 0.2248685359954834, "learning_rate": 0.00013126523043422822, "loss": 0.3452, "step": 16670 }, { "epoch": 1.3394499096566954, "grad_norm": 0.2315104603767395, "learning_rate": 0.00013119279341224516, "loss": 0.3685, "step": 16680 }, { "epoch": 1.3402529612527605, "grad_norm": 0.2891160249710083, "learning_rate": 0.00013112033990517804, "loss": 0.3604, "step": 16690 }, { "epoch": 1.3410560128488256, "grad_norm": 0.26936033368110657, "learning_rate": 0.00013104786995862716, "loss": 0.3947, "step": 16700 }, { "epoch": 1.3418590644448907, "grad_norm": 0.19612225890159607, "learning_rate": 0.00013097538361820328, "loss": 0.363, "step": 16710 }, { "epoch": 1.3426621160409558, "grad_norm": 0.2137482911348343, "learning_rate": 0.00013090288092952753, "loss": 0.3508, "step": 16720 }, { "epoch": 1.3434651676370206, "grad_norm": 0.2634015679359436, "learning_rate": 0.00013083036193823122, "loss": 0.3574, "step": 16730 }, { "epoch": 1.3442682192330857, "grad_norm": 0.2895856499671936, "learning_rate": 0.00013075782668995603, "loss": 0.3895, "step": 16740 }, { "epoch": 1.3450712708291508, "grad_norm": 0.21924373507499695, "learning_rate": 0.00013068527523035374, "loss": 0.3493, "step": 16750 }, { "epoch": 1.3458743224252159, "grad_norm": 0.2133820801973343, "learning_rate": 0.00013061270760508648, "loss": 0.3736, "step": 16760 }, { "epoch": 1.3466773740212807, "grad_norm": 0.2318306565284729, "learning_rate": 0.00013054012385982643, "loss": 0.3572, "step": 16770 }, { "epoch": 1.3474804256173458, "grad_norm": 0.20007920265197754, "learning_rate": 0.00013046752404025604, "loss": 0.3612, "step": 16780 }, { "epoch": 1.348283477213411, "grad_norm": 0.21844260394573212, "learning_rate": 0.00013039490819206779, "loss": 0.3516, "step": 16790 }, { "epoch": 1.349086528809476, "grad_norm": 0.19390973448753357, "learning_rate": 0.0001303222763609642, "loss": 0.3693, "step": 16800 }, { "epoch": 1.349889580405541, "grad_norm": 0.20452642440795898, "learning_rate": 0.00013024962859265797, "loss": 0.3394, "step": 16810 }, { "epoch": 1.3506926320016062, "grad_norm": 0.2298920899629593, "learning_rate": 0.00013017696493287175, "loss": 0.347, "step": 16820 }, { "epoch": 1.3514956835976712, "grad_norm": 0.20903745293617249, "learning_rate": 0.00013010428542733823, "loss": 0.3405, "step": 16830 }, { "epoch": 1.3522987351937363, "grad_norm": 0.2558334171772003, "learning_rate": 0.00013003159012180007, "loss": 0.3729, "step": 16840 }, { "epoch": 1.3531017867898012, "grad_norm": 0.30961117148399353, "learning_rate": 0.00012995887906200985, "loss": 0.3523, "step": 16850 }, { "epoch": 1.3539048383858663, "grad_norm": 0.22805267572402954, "learning_rate": 0.0001298861522937301, "loss": 0.3569, "step": 16860 }, { "epoch": 1.3547078899819314, "grad_norm": 0.22244533896446228, "learning_rate": 0.0001298134098627332, "loss": 0.3722, "step": 16870 }, { "epoch": 1.3555109415779965, "grad_norm": 0.2234622985124588, "learning_rate": 0.00012974065181480137, "loss": 0.3321, "step": 16880 }, { "epoch": 1.3563139931740613, "grad_norm": 0.23992842435836792, "learning_rate": 0.00012966787819572673, "loss": 0.3465, "step": 16890 }, { "epoch": 1.3571170447701264, "grad_norm": 0.20845840871334076, "learning_rate": 0.00012959508905131112, "loss": 0.3584, "step": 16900 }, { "epoch": 1.3579200963661915, "grad_norm": 0.23163193464279175, "learning_rate": 0.00012952228442736626, "loss": 0.3705, "step": 16910 }, { "epoch": 1.3587231479622566, "grad_norm": 0.22035910189151764, "learning_rate": 0.0001294494643697135, "loss": 0.3605, "step": 16920 }, { "epoch": 1.3595261995583217, "grad_norm": 0.20225444436073303, "learning_rate": 0.000129376628924184, "loss": 0.349, "step": 16930 }, { "epoch": 1.3603292511543867, "grad_norm": 0.22460193932056427, "learning_rate": 0.00012930377813661847, "loss": 0.3715, "step": 16940 }, { "epoch": 1.3611323027504518, "grad_norm": 0.20023660361766815, "learning_rate": 0.00012923091205286743, "loss": 0.3611, "step": 16950 }, { "epoch": 1.3619353543465167, "grad_norm": 0.19968754053115845, "learning_rate": 0.00012915803071879094, "loss": 0.3628, "step": 16960 }, { "epoch": 1.3627384059425818, "grad_norm": 0.20509541034698486, "learning_rate": 0.00012908513418025862, "loss": 0.3502, "step": 16970 }, { "epoch": 1.3635414575386469, "grad_norm": 0.2262030392885208, "learning_rate": 0.00012901222248314981, "loss": 0.3556, "step": 16980 }, { "epoch": 1.364344509134712, "grad_norm": 0.2453491985797882, "learning_rate": 0.00012893929567335324, "loss": 0.3555, "step": 16990 }, { "epoch": 1.365147560730777, "grad_norm": 0.18476475775241852, "learning_rate": 0.0001288663537967672, "loss": 0.3507, "step": 17000 }, { "epoch": 1.365950612326842, "grad_norm": 0.21590785682201385, "learning_rate": 0.0001287933968992995, "loss": 0.3674, "step": 17010 }, { "epoch": 1.366753663922907, "grad_norm": 0.19883811473846436, "learning_rate": 0.00012872042502686733, "loss": 0.3649, "step": 17020 }, { "epoch": 1.367556715518972, "grad_norm": 0.22646869719028473, "learning_rate": 0.00012864743822539737, "loss": 0.3559, "step": 17030 }, { "epoch": 1.3683597671150372, "grad_norm": 0.2395525872707367, "learning_rate": 0.00012857443654082568, "loss": 0.3568, "step": 17040 }, { "epoch": 1.3691628187111022, "grad_norm": 0.2331976443529129, "learning_rate": 0.0001285014200190977, "loss": 0.361, "step": 17050 }, { "epoch": 1.3699658703071673, "grad_norm": 0.23047754168510437, "learning_rate": 0.00012842838870616818, "loss": 0.3578, "step": 17060 }, { "epoch": 1.3707689219032324, "grad_norm": 0.24064384400844574, "learning_rate": 0.00012835534264800112, "loss": 0.3471, "step": 17070 }, { "epoch": 1.3715719734992973, "grad_norm": 0.22094909846782684, "learning_rate": 0.00012828228189056996, "loss": 0.3771, "step": 17080 }, { "epoch": 1.3723750250953624, "grad_norm": 0.23042342066764832, "learning_rate": 0.00012820920647985722, "loss": 0.3545, "step": 17090 }, { "epoch": 1.3731780766914274, "grad_norm": 0.20369772613048553, "learning_rate": 0.00012813611646185483, "loss": 0.3413, "step": 17100 }, { "epoch": 1.3739811282874925, "grad_norm": 0.25838446617126465, "learning_rate": 0.0001280630118825637, "loss": 0.333, "step": 17110 }, { "epoch": 1.3747841798835574, "grad_norm": 0.25149106979370117, "learning_rate": 0.00012798989278799407, "loss": 0.3543, "step": 17120 }, { "epoch": 1.3755872314796225, "grad_norm": 0.21229946613311768, "learning_rate": 0.00012791675922416523, "loss": 0.3486, "step": 17130 }, { "epoch": 1.3763902830756876, "grad_norm": 0.2470630705356598, "learning_rate": 0.0001278436112371056, "loss": 0.3615, "step": 17140 }, { "epoch": 1.3771933346717526, "grad_norm": 0.18704456090927124, "learning_rate": 0.00012777044887285268, "loss": 0.3819, "step": 17150 }, { "epoch": 1.3779963862678177, "grad_norm": 0.2001979649066925, "learning_rate": 0.00012769727217745301, "loss": 0.3313, "step": 17160 }, { "epoch": 1.3787994378638828, "grad_norm": 0.2131490409374237, "learning_rate": 0.0001276240811969622, "loss": 0.3385, "step": 17170 }, { "epoch": 1.379602489459948, "grad_norm": 0.21971255540847778, "learning_rate": 0.0001275508759774448, "loss": 0.3647, "step": 17180 }, { "epoch": 1.380405541056013, "grad_norm": 0.21938082575798035, "learning_rate": 0.0001274776565649743, "loss": 0.3733, "step": 17190 }, { "epoch": 1.3812085926520778, "grad_norm": 0.22388245165348053, "learning_rate": 0.00012740442300563317, "loss": 0.3487, "step": 17200 }, { "epoch": 1.382011644248143, "grad_norm": 0.24035081267356873, "learning_rate": 0.00012733117534551275, "loss": 0.3506, "step": 17210 }, { "epoch": 1.382814695844208, "grad_norm": 0.2129107415676117, "learning_rate": 0.00012725791363071327, "loss": 0.3612, "step": 17220 }, { "epoch": 1.383617747440273, "grad_norm": 0.2335951328277588, "learning_rate": 0.00012718463790734386, "loss": 0.3942, "step": 17230 }, { "epoch": 1.384420799036338, "grad_norm": 0.26704224944114685, "learning_rate": 0.00012711134822152237, "loss": 0.3847, "step": 17240 }, { "epoch": 1.385223850632403, "grad_norm": 0.21710412204265594, "learning_rate": 0.00012703804461937547, "loss": 0.3397, "step": 17250 }, { "epoch": 1.3860269022284681, "grad_norm": 0.2346591055393219, "learning_rate": 0.00012696472714703863, "loss": 0.3526, "step": 17260 }, { "epoch": 1.3868299538245332, "grad_norm": 0.2052374929189682, "learning_rate": 0.00012689139585065598, "loss": 0.3313, "step": 17270 }, { "epoch": 1.3876330054205983, "grad_norm": 0.2095077484846115, "learning_rate": 0.00012681805077638042, "loss": 0.355, "step": 17280 }, { "epoch": 1.3884360570166634, "grad_norm": 0.2190144956111908, "learning_rate": 0.00012674469197037353, "loss": 0.3626, "step": 17290 }, { "epoch": 1.3892391086127285, "grad_norm": 0.23592016100883484, "learning_rate": 0.00012667131947880544, "loss": 0.359, "step": 17300 }, { "epoch": 1.3900421602087933, "grad_norm": 0.24494177103042603, "learning_rate": 0.00012659793334785494, "loss": 0.3666, "step": 17310 }, { "epoch": 1.3908452118048584, "grad_norm": 0.20131094753742218, "learning_rate": 0.00012652453362370944, "loss": 0.3664, "step": 17320 }, { "epoch": 1.3916482634009235, "grad_norm": 0.20424477756023407, "learning_rate": 0.00012645112035256488, "loss": 0.3543, "step": 17330 }, { "epoch": 1.3924513149969886, "grad_norm": 0.23819950222969055, "learning_rate": 0.00012637769358062569, "loss": 0.3501, "step": 17340 }, { "epoch": 1.3932543665930537, "grad_norm": 0.24054613709449768, "learning_rate": 0.00012630425335410488, "loss": 0.3353, "step": 17350 }, { "epoch": 1.3940574181891185, "grad_norm": 0.30730539560317993, "learning_rate": 0.00012623079971922383, "loss": 0.3564, "step": 17360 }, { "epoch": 1.3948604697851836, "grad_norm": 0.21328063309192657, "learning_rate": 0.00012615733272221242, "loss": 0.3494, "step": 17370 }, { "epoch": 1.3956635213812487, "grad_norm": 0.24108783900737762, "learning_rate": 0.00012608385240930892, "loss": 0.3849, "step": 17380 }, { "epoch": 1.3964665729773138, "grad_norm": 0.2150178998708725, "learning_rate": 0.00012601035882676002, "loss": 0.3651, "step": 17390 }, { "epoch": 1.3972696245733789, "grad_norm": 0.22943882644176483, "learning_rate": 0.00012593685202082073, "loss": 0.3704, "step": 17400 }, { "epoch": 1.398072676169444, "grad_norm": 0.21456846594810486, "learning_rate": 0.00012586333203775432, "loss": 0.3421, "step": 17410 }, { "epoch": 1.398875727765509, "grad_norm": 0.21207913756370544, "learning_rate": 0.00012578979892383248, "loss": 0.3445, "step": 17420 }, { "epoch": 1.399678779361574, "grad_norm": 0.23081548511981964, "learning_rate": 0.00012571625272533507, "loss": 0.3352, "step": 17430 }, { "epoch": 1.400481830957639, "grad_norm": 0.2523498833179474, "learning_rate": 0.00012564269348855022, "loss": 0.3624, "step": 17440 }, { "epoch": 1.401284882553704, "grad_norm": 0.22183799743652344, "learning_rate": 0.00012556912125977424, "loss": 0.3483, "step": 17450 }, { "epoch": 1.4020879341497692, "grad_norm": 0.19609279930591583, "learning_rate": 0.00012549553608531168, "loss": 0.3471, "step": 17460 }, { "epoch": 1.402890985745834, "grad_norm": 0.21470342576503754, "learning_rate": 0.00012542193801147515, "loss": 0.37, "step": 17470 }, { "epoch": 1.4036940373418991, "grad_norm": 0.2634105384349823, "learning_rate": 0.00012534832708458547, "loss": 0.3482, "step": 17480 }, { "epoch": 1.4044970889379642, "grad_norm": 0.2000497430562973, "learning_rate": 0.00012527470335097144, "loss": 0.3356, "step": 17490 }, { "epoch": 1.4053001405340293, "grad_norm": 0.1663072407245636, "learning_rate": 0.00012520106685697007, "loss": 0.3695, "step": 17500 }, { "epoch": 1.4061031921300944, "grad_norm": 0.23225881159305573, "learning_rate": 0.00012512741764892618, "loss": 0.3603, "step": 17510 }, { "epoch": 1.4069062437261595, "grad_norm": 0.21267428994178772, "learning_rate": 0.00012505375577319286, "loss": 0.3497, "step": 17520 }, { "epoch": 1.4077092953222246, "grad_norm": 0.208751380443573, "learning_rate": 0.000124980081276131, "loss": 0.3395, "step": 17530 }, { "epoch": 1.4085123469182896, "grad_norm": 0.23712235689163208, "learning_rate": 0.00012490639420410942, "loss": 0.3303, "step": 17540 }, { "epoch": 1.4093153985143545, "grad_norm": 0.2576706111431122, "learning_rate": 0.00012483269460350496, "loss": 0.3318, "step": 17550 }, { "epoch": 1.4101184501104196, "grad_norm": 0.24055072665214539, "learning_rate": 0.0001247589825207023, "loss": 0.3658, "step": 17560 }, { "epoch": 1.4109215017064847, "grad_norm": 0.24889521300792694, "learning_rate": 0.00012468525800209392, "loss": 0.3552, "step": 17570 }, { "epoch": 1.4117245533025498, "grad_norm": 0.23675952851772308, "learning_rate": 0.0001246115210940802, "loss": 0.3652, "step": 17580 }, { "epoch": 1.4125276048986146, "grad_norm": 0.20326939225196838, "learning_rate": 0.00012453777184306928, "loss": 0.3499, "step": 17590 }, { "epoch": 1.4133306564946797, "grad_norm": 0.21912360191345215, "learning_rate": 0.00012446401029547706, "loss": 0.3616, "step": 17600 }, { "epoch": 1.4141337080907448, "grad_norm": 0.23468591272830963, "learning_rate": 0.00012439023649772727, "loss": 0.3469, "step": 17610 }, { "epoch": 1.4149367596868099, "grad_norm": 0.2115103304386139, "learning_rate": 0.00012431645049625122, "loss": 0.372, "step": 17620 }, { "epoch": 1.415739811282875, "grad_norm": 0.26658758521080017, "learning_rate": 0.00012424265233748803, "loss": 0.3592, "step": 17630 }, { "epoch": 1.41654286287894, "grad_norm": 0.24509911239147186, "learning_rate": 0.00012416884206788427, "loss": 0.3577, "step": 17640 }, { "epoch": 1.4173459144750051, "grad_norm": 0.17926208674907684, "learning_rate": 0.0001240950197338944, "loss": 0.346, "step": 17650 }, { "epoch": 1.41814896607107, "grad_norm": 0.22276286780834198, "learning_rate": 0.00012402118538198023, "loss": 0.3453, "step": 17660 }, { "epoch": 1.418952017667135, "grad_norm": 0.256456583738327, "learning_rate": 0.00012394733905861133, "loss": 0.3507, "step": 17670 }, { "epoch": 1.4197550692632002, "grad_norm": 0.247592031955719, "learning_rate": 0.0001238734808102647, "loss": 0.3567, "step": 17680 }, { "epoch": 1.4205581208592652, "grad_norm": 0.19634637236595154, "learning_rate": 0.0001237996106834248, "loss": 0.3479, "step": 17690 }, { "epoch": 1.4213611724553303, "grad_norm": 0.21324452757835388, "learning_rate": 0.00012372572872458365, "loss": 0.3405, "step": 17700 }, { "epoch": 1.4221642240513952, "grad_norm": 0.26794931292533875, "learning_rate": 0.00012365183498024072, "loss": 0.3406, "step": 17710 }, { "epoch": 1.4229672756474603, "grad_norm": 0.2895088493824005, "learning_rate": 0.00012357792949690285, "loss": 0.3777, "step": 17720 }, { "epoch": 1.4237703272435254, "grad_norm": 0.3252345323562622, "learning_rate": 0.00012350401232108434, "loss": 0.3557, "step": 17730 }, { "epoch": 1.4245733788395905, "grad_norm": 0.22079795598983765, "learning_rate": 0.0001234300834993067, "loss": 0.3697, "step": 17740 }, { "epoch": 1.4253764304356555, "grad_norm": 0.2631239593029022, "learning_rate": 0.00012335614307809893, "loss": 0.3779, "step": 17750 }, { "epoch": 1.4261794820317206, "grad_norm": 0.25506797432899475, "learning_rate": 0.00012328219110399727, "loss": 0.3781, "step": 17760 }, { "epoch": 1.4269825336277857, "grad_norm": 0.19937588274478912, "learning_rate": 0.0001232082276235452, "loss": 0.3524, "step": 17770 }, { "epoch": 1.4277855852238506, "grad_norm": 0.19595758616924286, "learning_rate": 0.0001231342526832935, "loss": 0.3435, "step": 17780 }, { "epoch": 1.4285886368199157, "grad_norm": 0.2499721795320511, "learning_rate": 0.00012306026632980012, "loss": 0.3492, "step": 17790 }, { "epoch": 1.4293916884159807, "grad_norm": 0.2052449882030487, "learning_rate": 0.0001229862686096302, "loss": 0.3445, "step": 17800 }, { "epoch": 1.4301947400120458, "grad_norm": 0.2522154152393341, "learning_rate": 0.00012291225956935609, "loss": 0.3904, "step": 17810 }, { "epoch": 1.4309977916081107, "grad_norm": 0.2056715041399002, "learning_rate": 0.00012283823925555713, "loss": 0.3489, "step": 17820 }, { "epoch": 1.4318008432041758, "grad_norm": 0.21509891748428345, "learning_rate": 0.00012276420771481988, "loss": 0.3812, "step": 17830 }, { "epoch": 1.4326038948002409, "grad_norm": 0.23658888041973114, "learning_rate": 0.00012269016499373794, "loss": 0.3757, "step": 17840 }, { "epoch": 1.433406946396306, "grad_norm": 0.17899830639362335, "learning_rate": 0.0001226161111389119, "loss": 0.3672, "step": 17850 }, { "epoch": 1.434209997992371, "grad_norm": 0.20687516033649445, "learning_rate": 0.00012254204619694946, "loss": 0.3734, "step": 17860 }, { "epoch": 1.4350130495884361, "grad_norm": 0.19790667295455933, "learning_rate": 0.00012246797021446516, "loss": 0.3306, "step": 17870 }, { "epoch": 1.4358161011845012, "grad_norm": 0.2134610265493393, "learning_rate": 0.00012239388323808062, "loss": 0.3667, "step": 17880 }, { "epoch": 1.4366191527805663, "grad_norm": 0.22014352679252625, "learning_rate": 0.0001223197853144242, "loss": 0.3537, "step": 17890 }, { "epoch": 1.4374222043766312, "grad_norm": 0.21866799890995026, "learning_rate": 0.0001222456764901314, "loss": 0.362, "step": 17900 }, { "epoch": 1.4382252559726962, "grad_norm": 0.25274455547332764, "learning_rate": 0.00012217155681184435, "loss": 0.3671, "step": 17910 }, { "epoch": 1.4390283075687613, "grad_norm": 0.21309660375118256, "learning_rate": 0.00012209742632621212, "loss": 0.3707, "step": 17920 }, { "epoch": 1.4398313591648264, "grad_norm": 0.29000818729400635, "learning_rate": 0.00012202328507989066, "loss": 0.3558, "step": 17930 }, { "epoch": 1.4406344107608913, "grad_norm": 0.23699535429477692, "learning_rate": 0.00012194913311954248, "loss": 0.3656, "step": 17940 }, { "epoch": 1.4414374623569564, "grad_norm": 0.23169568181037903, "learning_rate": 0.00012187497049183707, "loss": 0.343, "step": 17950 }, { "epoch": 1.4422405139530214, "grad_norm": 0.21598279476165771, "learning_rate": 0.0001218007972434504, "loss": 0.3271, "step": 17960 }, { "epoch": 1.4430435655490865, "grad_norm": 0.2581801116466522, "learning_rate": 0.00012172661342106533, "loss": 0.3668, "step": 17970 }, { "epoch": 1.4438466171451516, "grad_norm": 0.2344127595424652, "learning_rate": 0.00012165241907137125, "loss": 0.3633, "step": 17980 }, { "epoch": 1.4446496687412167, "grad_norm": 0.22278772294521332, "learning_rate": 0.00012157821424106424, "loss": 0.348, "step": 17990 }, { "epoch": 1.4454527203372818, "grad_norm": 0.2565406858921051, "learning_rate": 0.00012150399897684694, "loss": 0.3649, "step": 18000 }, { "epoch": 1.4462557719333466, "grad_norm": 0.23086319863796234, "learning_rate": 0.00012142977332542857, "loss": 0.3592, "step": 18010 }, { "epoch": 1.4470588235294117, "grad_norm": 0.21001124382019043, "learning_rate": 0.00012135553733352488, "loss": 0.3711, "step": 18020 }, { "epoch": 1.4478618751254768, "grad_norm": 0.2515798509120941, "learning_rate": 0.0001212812910478581, "loss": 0.3813, "step": 18030 }, { "epoch": 1.448664926721542, "grad_norm": 0.2199918031692505, "learning_rate": 0.00012120703451515703, "loss": 0.3681, "step": 18040 }, { "epoch": 1.449467978317607, "grad_norm": 0.2369040548801422, "learning_rate": 0.00012113276778215681, "loss": 0.3687, "step": 18050 }, { "epoch": 1.4502710299136718, "grad_norm": 0.22514848411083221, "learning_rate": 0.00012105849089559909, "loss": 0.3637, "step": 18060 }, { "epoch": 1.451074081509737, "grad_norm": 0.272183895111084, "learning_rate": 0.00012098420390223187, "loss": 0.3465, "step": 18070 }, { "epoch": 1.451877133105802, "grad_norm": 0.23029182851314545, "learning_rate": 0.00012090990684880947, "loss": 0.3562, "step": 18080 }, { "epoch": 1.452680184701867, "grad_norm": 0.24774055182933807, "learning_rate": 0.00012083559978209261, "loss": 0.3709, "step": 18090 }, { "epoch": 1.4534832362979322, "grad_norm": 0.256442129611969, "learning_rate": 0.00012076128274884828, "loss": 0.3592, "step": 18100 }, { "epoch": 1.4542862878939973, "grad_norm": 0.25545960664749146, "learning_rate": 0.00012068695579584976, "loss": 0.345, "step": 18110 }, { "epoch": 1.4550893394900624, "grad_norm": 0.2084749937057495, "learning_rate": 0.00012061261896987655, "loss": 0.3556, "step": 18120 }, { "epoch": 1.4558923910861272, "grad_norm": 0.22722353041172028, "learning_rate": 0.0001205382723177144, "loss": 0.3561, "step": 18130 }, { "epoch": 1.4566954426821923, "grad_norm": 0.2150675505399704, "learning_rate": 0.0001204639158861552, "loss": 0.363, "step": 18140 }, { "epoch": 1.4574984942782574, "grad_norm": 0.23994708061218262, "learning_rate": 0.00012038954972199698, "loss": 0.3547, "step": 18150 }, { "epoch": 1.4583015458743225, "grad_norm": 0.28118932247161865, "learning_rate": 0.00012031517387204396, "loss": 0.3475, "step": 18160 }, { "epoch": 1.4591045974703873, "grad_norm": 0.23476941883563995, "learning_rate": 0.00012024078838310644, "loss": 0.3507, "step": 18170 }, { "epoch": 1.4599076490664524, "grad_norm": 0.24053291976451874, "learning_rate": 0.00012016639330200079, "loss": 0.3482, "step": 18180 }, { "epoch": 1.4607107006625175, "grad_norm": 0.24631573259830475, "learning_rate": 0.00012009198867554933, "loss": 0.3611, "step": 18190 }, { "epoch": 1.4615137522585826, "grad_norm": 0.23241114616394043, "learning_rate": 0.00012001757455058052, "loss": 0.3525, "step": 18200 }, { "epoch": 1.4623168038546477, "grad_norm": 0.1940845251083374, "learning_rate": 0.0001199431509739287, "loss": 0.3411, "step": 18210 }, { "epoch": 1.4631198554507128, "grad_norm": 0.22809112071990967, "learning_rate": 0.0001198687179924342, "loss": 0.3646, "step": 18220 }, { "epoch": 1.4639229070467779, "grad_norm": 0.23367823660373688, "learning_rate": 0.00011979427565294327, "loss": 0.3861, "step": 18230 }, { "epoch": 1.464725958642843, "grad_norm": 0.23726876080036163, "learning_rate": 0.00011971982400230806, "loss": 0.3923, "step": 18240 }, { "epoch": 1.4655290102389078, "grad_norm": 0.24578964710235596, "learning_rate": 0.00011964536308738654, "loss": 0.3743, "step": 18250 }, { "epoch": 1.4663320618349729, "grad_norm": 0.20841024816036224, "learning_rate": 0.00011957089295504253, "loss": 0.3579, "step": 18260 }, { "epoch": 1.467135113431038, "grad_norm": 0.21789124608039856, "learning_rate": 0.00011949641365214568, "loss": 0.3521, "step": 18270 }, { "epoch": 1.467938165027103, "grad_norm": 0.21411162614822388, "learning_rate": 0.00011942192522557134, "loss": 0.3513, "step": 18280 }, { "epoch": 1.468741216623168, "grad_norm": 0.19814512133598328, "learning_rate": 0.00011934742772220068, "loss": 0.3201, "step": 18290 }, { "epoch": 1.469544268219233, "grad_norm": 0.21096102893352509, "learning_rate": 0.00011927292118892057, "loss": 0.3501, "step": 18300 }, { "epoch": 1.470347319815298, "grad_norm": 0.22303827106952667, "learning_rate": 0.0001191984056726235, "loss": 0.3721, "step": 18310 }, { "epoch": 1.4711503714113632, "grad_norm": 0.20821788907051086, "learning_rate": 0.00011912388122020772, "loss": 0.3743, "step": 18320 }, { "epoch": 1.4719534230074283, "grad_norm": 0.20943821966648102, "learning_rate": 0.00011904934787857698, "loss": 0.3665, "step": 18330 }, { "epoch": 1.4727564746034933, "grad_norm": 0.21695294976234436, "learning_rate": 0.00011897480569464067, "loss": 0.3365, "step": 18340 }, { "epoch": 1.4735595261995584, "grad_norm": 0.23727230727672577, "learning_rate": 0.00011890025471531385, "loss": 0.3742, "step": 18350 }, { "epoch": 1.4743625777956233, "grad_norm": 0.220000758767128, "learning_rate": 0.00011882569498751697, "loss": 0.3512, "step": 18360 }, { "epoch": 1.4751656293916884, "grad_norm": 0.26766034960746765, "learning_rate": 0.00011875112655817606, "loss": 0.3347, "step": 18370 }, { "epoch": 1.4759686809877535, "grad_norm": 0.22262190282344818, "learning_rate": 0.00011867654947422259, "loss": 0.3679, "step": 18380 }, { "epoch": 1.4767717325838186, "grad_norm": 0.24553526937961578, "learning_rate": 0.00011860196378259354, "loss": 0.3828, "step": 18390 }, { "epoch": 1.4775747841798836, "grad_norm": 0.2511424422264099, "learning_rate": 0.0001185273695302312, "loss": 0.3513, "step": 18400 }, { "epoch": 1.4783778357759485, "grad_norm": 0.2175104022026062, "learning_rate": 0.00011845276676408342, "loss": 0.3437, "step": 18410 }, { "epoch": 1.4791808873720136, "grad_norm": 0.19784000515937805, "learning_rate": 0.00011837815553110316, "loss": 0.346, "step": 18420 }, { "epoch": 1.4799839389680787, "grad_norm": 0.22275295853614807, "learning_rate": 0.00011830353587824894, "loss": 0.3601, "step": 18430 }, { "epoch": 1.4807869905641438, "grad_norm": 0.2309107482433319, "learning_rate": 0.00011822890785248448, "loss": 0.3789, "step": 18440 }, { "epoch": 1.4815900421602088, "grad_norm": 0.24002695083618164, "learning_rate": 0.00011815427150077878, "loss": 0.3489, "step": 18450 }, { "epoch": 1.482393093756274, "grad_norm": 0.29280319809913635, "learning_rate": 0.00011807962687010605, "loss": 0.3584, "step": 18460 }, { "epoch": 1.483196145352339, "grad_norm": 0.2138054072856903, "learning_rate": 0.00011800497400744575, "loss": 0.3471, "step": 18470 }, { "epoch": 1.4839991969484039, "grad_norm": 0.23961058259010315, "learning_rate": 0.00011793031295978252, "loss": 0.3679, "step": 18480 }, { "epoch": 1.484802248544469, "grad_norm": 0.20669585466384888, "learning_rate": 0.00011785564377410616, "loss": 0.3547, "step": 18490 }, { "epoch": 1.485605300140534, "grad_norm": 0.2207762449979782, "learning_rate": 0.00011778096649741155, "loss": 0.3505, "step": 18500 }, { "epoch": 1.4864083517365991, "grad_norm": 0.32196149230003357, "learning_rate": 0.00011770628117669868, "loss": 0.3655, "step": 18510 }, { "epoch": 1.487211403332664, "grad_norm": 0.3133296072483063, "learning_rate": 0.00011763158785897264, "loss": 0.375, "step": 18520 }, { "epoch": 1.488014454928729, "grad_norm": 0.255058616399765, "learning_rate": 0.00011755688659124349, "loss": 0.3679, "step": 18530 }, { "epoch": 1.4888175065247942, "grad_norm": 0.23136073350906372, "learning_rate": 0.00011748217742052632, "loss": 0.359, "step": 18540 }, { "epoch": 1.4896205581208593, "grad_norm": 0.23675096035003662, "learning_rate": 0.00011740746039384122, "loss": 0.3648, "step": 18550 }, { "epoch": 1.4904236097169243, "grad_norm": 0.2168986201286316, "learning_rate": 0.00011733273555821322, "loss": 0.3632, "step": 18560 }, { "epoch": 1.4912266613129894, "grad_norm": 0.24666951596736908, "learning_rate": 0.00011725800296067224, "loss": 0.3503, "step": 18570 }, { "epoch": 1.4920297129090545, "grad_norm": 0.21621036529541016, "learning_rate": 0.00011718326264825307, "loss": 0.3435, "step": 18580 }, { "epoch": 1.4928327645051196, "grad_norm": 0.21856829524040222, "learning_rate": 0.0001171085146679954, "loss": 0.3356, "step": 18590 }, { "epoch": 1.4936358161011845, "grad_norm": 0.24542741477489471, "learning_rate": 0.00011703375906694369, "loss": 0.3662, "step": 18600 }, { "epoch": 1.4944388676972495, "grad_norm": 0.21511881053447723, "learning_rate": 0.00011695899589214727, "loss": 0.3659, "step": 18610 }, { "epoch": 1.4952419192933146, "grad_norm": 0.21155375242233276, "learning_rate": 0.00011688422519066017, "loss": 0.3743, "step": 18620 }, { "epoch": 1.4960449708893797, "grad_norm": 0.21115100383758545, "learning_rate": 0.00011680944700954124, "loss": 0.3248, "step": 18630 }, { "epoch": 1.4968480224854446, "grad_norm": 0.263362854719162, "learning_rate": 0.00011673466139585392, "loss": 0.3539, "step": 18640 }, { "epoch": 1.4976510740815097, "grad_norm": 0.20146341621875763, "learning_rate": 0.00011665986839666643, "loss": 0.3377, "step": 18650 }, { "epoch": 1.4984541256775747, "grad_norm": 0.21654823422431946, "learning_rate": 0.00011658506805905158, "loss": 0.3515, "step": 18660 }, { "epoch": 1.4992571772736398, "grad_norm": 0.1814952939748764, "learning_rate": 0.00011651026043008682, "loss": 0.3627, "step": 18670 }, { "epoch": 1.500060228869705, "grad_norm": 0.2325565218925476, "learning_rate": 0.00011643544555685417, "loss": 0.4111, "step": 18680 }, { "epoch": 1.50086328046577, "grad_norm": 0.2633918523788452, "learning_rate": 0.00011636062348644028, "loss": 0.3787, "step": 18690 }, { "epoch": 1.501666332061835, "grad_norm": 0.2448648065328598, "learning_rate": 0.00011628579426593624, "loss": 0.3607, "step": 18700 }, { "epoch": 1.5024693836579002, "grad_norm": 0.2833770513534546, "learning_rate": 0.00011621095794243768, "loss": 0.3617, "step": 18710 }, { "epoch": 1.503272435253965, "grad_norm": 0.24973605573177338, "learning_rate": 0.00011613611456304465, "loss": 0.3853, "step": 18720 }, { "epoch": 1.5040754868500301, "grad_norm": 0.31341204047203064, "learning_rate": 0.00011606126417486174, "loss": 0.3631, "step": 18730 }, { "epoch": 1.5048785384460952, "grad_norm": 0.19810275733470917, "learning_rate": 0.00011598640682499791, "loss": 0.3465, "step": 18740 }, { "epoch": 1.50568159004216, "grad_norm": 0.2508654296398163, "learning_rate": 0.00011591154256056648, "loss": 0.3916, "step": 18750 }, { "epoch": 1.5064846416382252, "grad_norm": 0.21403494477272034, "learning_rate": 0.0001158366714286851, "loss": 0.355, "step": 18760 }, { "epoch": 1.5072876932342902, "grad_norm": 0.25821787118911743, "learning_rate": 0.0001157617934764758, "loss": 0.3428, "step": 18770 }, { "epoch": 1.5080907448303553, "grad_norm": 0.2243880331516266, "learning_rate": 0.00011568690875106487, "loss": 0.3629, "step": 18780 }, { "epoch": 1.5088937964264204, "grad_norm": 0.25474634766578674, "learning_rate": 0.00011561201729958284, "loss": 0.3539, "step": 18790 }, { "epoch": 1.5096968480224855, "grad_norm": 0.21774046123027802, "learning_rate": 0.00011553711916916458, "loss": 0.3546, "step": 18800 }, { "epoch": 1.5104998996185506, "grad_norm": 0.2363780289888382, "learning_rate": 0.000115462214406949, "loss": 0.3611, "step": 18810 }, { "epoch": 1.5113029512146157, "grad_norm": 0.19771307706832886, "learning_rate": 0.00011538730306007932, "loss": 0.3532, "step": 18820 }, { "epoch": 1.5121060028106807, "grad_norm": 0.22682133316993713, "learning_rate": 0.00011531238517570284, "loss": 0.3316, "step": 18830 }, { "epoch": 1.5129090544067456, "grad_norm": 0.22028154134750366, "learning_rate": 0.000115237460800971, "loss": 0.3633, "step": 18840 }, { "epoch": 1.5137121060028107, "grad_norm": 0.21286757290363312, "learning_rate": 0.00011516252998303929, "loss": 0.3448, "step": 18850 }, { "epoch": 1.5145151575988758, "grad_norm": 0.219071626663208, "learning_rate": 0.00011508759276906729, "loss": 0.3646, "step": 18860 }, { "epoch": 1.5153182091949406, "grad_norm": 0.21599730849266052, "learning_rate": 0.0001150126492062186, "loss": 0.3482, "step": 18870 }, { "epoch": 1.5161212607910057, "grad_norm": 0.22430256009101868, "learning_rate": 0.00011493769934166081, "loss": 0.3585, "step": 18880 }, { "epoch": 1.5169243123870708, "grad_norm": 0.24240989983081818, "learning_rate": 0.00011486274322256547, "loss": 0.3674, "step": 18890 }, { "epoch": 1.517727363983136, "grad_norm": 0.2567283809185028, "learning_rate": 0.00011478778089610807, "loss": 0.3915, "step": 18900 }, { "epoch": 1.518530415579201, "grad_norm": 0.19873562455177307, "learning_rate": 0.00011471281240946801, "loss": 0.3452, "step": 18910 }, { "epoch": 1.519333467175266, "grad_norm": 0.2613579034805298, "learning_rate": 0.00011463783780982857, "loss": 0.3701, "step": 18920 }, { "epoch": 1.5201365187713312, "grad_norm": 0.2717297673225403, "learning_rate": 0.00011456285714437688, "loss": 0.3672, "step": 18930 }, { "epoch": 1.5209395703673962, "grad_norm": 0.22181536257266998, "learning_rate": 0.00011448787046030387, "loss": 0.3771, "step": 18940 }, { "epoch": 1.521742621963461, "grad_norm": 0.19868221879005432, "learning_rate": 0.0001144128778048043, "loss": 0.3358, "step": 18950 }, { "epoch": 1.5225456735595262, "grad_norm": 0.212707981467247, "learning_rate": 0.00011433787922507661, "loss": 0.3448, "step": 18960 }, { "epoch": 1.5233487251555913, "grad_norm": 0.24154768884181976, "learning_rate": 0.00011426287476832304, "loss": 0.3752, "step": 18970 }, { "epoch": 1.5241517767516561, "grad_norm": 0.23004604876041412, "learning_rate": 0.0001141878644817495, "loss": 0.3636, "step": 18980 }, { "epoch": 1.5249548283477212, "grad_norm": 0.20968830585479736, "learning_rate": 0.00011411284841256559, "loss": 0.3769, "step": 18990 }, { "epoch": 1.5257578799437863, "grad_norm": 0.21386472880840302, "learning_rate": 0.0001140378266079845, "loss": 0.3622, "step": 19000 }, { "epoch": 1.5265609315398514, "grad_norm": 0.2628445625305176, "learning_rate": 0.0001139627991152231, "loss": 0.3474, "step": 19010 }, { "epoch": 1.5273639831359165, "grad_norm": 0.212790384888649, "learning_rate": 0.0001138877659815018, "loss": 0.3654, "step": 19020 }, { "epoch": 1.5281670347319816, "grad_norm": 0.26615169644355774, "learning_rate": 0.00011381272725404453, "loss": 0.3472, "step": 19030 }, { "epoch": 1.5289700863280467, "grad_norm": 0.2517569661140442, "learning_rate": 0.00011373768298007878, "loss": 0.3467, "step": 19040 }, { "epoch": 1.5297731379241117, "grad_norm": 0.2675992250442505, "learning_rate": 0.0001136626332068356, "loss": 0.3645, "step": 19050 }, { "epoch": 1.5305761895201768, "grad_norm": 0.2253274917602539, "learning_rate": 0.00011358757798154931, "loss": 0.3345, "step": 19060 }, { "epoch": 1.5313792411162417, "grad_norm": 0.2776845395565033, "learning_rate": 0.00011351251735145785, "loss": 0.3614, "step": 19070 }, { "epoch": 1.5321822927123068, "grad_norm": 0.23385535180568695, "learning_rate": 0.00011343745136380249, "loss": 0.3686, "step": 19080 }, { "epoch": 1.5329853443083719, "grad_norm": 0.2252427637577057, "learning_rate": 0.00011336238006582787, "loss": 0.3283, "step": 19090 }, { "epoch": 1.5337883959044367, "grad_norm": 0.21285894513130188, "learning_rate": 0.00011328730350478192, "loss": 0.3674, "step": 19100 }, { "epoch": 1.5345914475005018, "grad_norm": 0.21477019786834717, "learning_rate": 0.00011321222172791596, "loss": 0.3579, "step": 19110 }, { "epoch": 1.535394499096567, "grad_norm": 0.20603926479816437, "learning_rate": 0.00011313713478248462, "loss": 0.3588, "step": 19120 }, { "epoch": 1.536197550692632, "grad_norm": 0.27597951889038086, "learning_rate": 0.00011306204271574568, "loss": 0.362, "step": 19130 }, { "epoch": 1.537000602288697, "grad_norm": 0.24269598722457886, "learning_rate": 0.00011298694557496022, "loss": 0.3477, "step": 19140 }, { "epoch": 1.5378036538847621, "grad_norm": 0.19088968634605408, "learning_rate": 0.00011291184340739245, "loss": 0.3582, "step": 19150 }, { "epoch": 1.5386067054808272, "grad_norm": 0.2727414667606354, "learning_rate": 0.00011283673626030986, "loss": 0.3777, "step": 19160 }, { "epoch": 1.5394097570768923, "grad_norm": 0.22239810228347778, "learning_rate": 0.00011276162418098289, "loss": 0.3751, "step": 19170 }, { "epoch": 1.5402128086729574, "grad_norm": 0.2166455239057541, "learning_rate": 0.00011268650721668525, "loss": 0.3797, "step": 19180 }, { "epoch": 1.5410158602690223, "grad_norm": 0.2317408174276352, "learning_rate": 0.00011261138541469367, "loss": 0.3435, "step": 19190 }, { "epoch": 1.5418189118650873, "grad_norm": 0.2554263770580292, "learning_rate": 0.00011253625882228791, "loss": 0.3866, "step": 19200 }, { "epoch": 1.5426219634611524, "grad_norm": 0.2065521478652954, "learning_rate": 0.00011246112748675075, "loss": 0.3411, "step": 19210 }, { "epoch": 1.5434250150572173, "grad_norm": 0.22938737273216248, "learning_rate": 0.00011238599145536796, "loss": 0.3565, "step": 19220 }, { "epoch": 1.5442280666532824, "grad_norm": 0.2277815341949463, "learning_rate": 0.00011231085077542824, "loss": 0.3622, "step": 19230 }, { "epoch": 1.5450311182493475, "grad_norm": 0.1831049770116806, "learning_rate": 0.00011223570549422332, "loss": 0.3577, "step": 19240 }, { "epoch": 1.5458341698454126, "grad_norm": 0.22872759401798248, "learning_rate": 0.00011216055565904763, "loss": 0.3453, "step": 19250 }, { "epoch": 1.5466372214414776, "grad_norm": 0.2494538575410843, "learning_rate": 0.0001120854013171987, "loss": 0.3684, "step": 19260 }, { "epoch": 1.5474402730375427, "grad_norm": 0.21708345413208008, "learning_rate": 0.00011201024251597672, "loss": 0.3553, "step": 19270 }, { "epoch": 1.5482433246336078, "grad_norm": 0.25220054388046265, "learning_rate": 0.00011193507930268472, "loss": 0.3654, "step": 19280 }, { "epoch": 1.549046376229673, "grad_norm": 0.212235227227211, "learning_rate": 0.0001118599117246286, "loss": 0.3683, "step": 19290 }, { "epoch": 1.5498494278257378, "grad_norm": 0.2414795607328415, "learning_rate": 0.00011178473982911686, "loss": 0.355, "step": 19300 }, { "epoch": 1.5506524794218028, "grad_norm": 0.21729108691215515, "learning_rate": 0.00011170956366346086, "loss": 0.3431, "step": 19310 }, { "epoch": 1.551455531017868, "grad_norm": 0.21426552534103394, "learning_rate": 0.00011163438327497452, "loss": 0.3808, "step": 19320 }, { "epoch": 1.5522585826139328, "grad_norm": 0.26880306005477905, "learning_rate": 0.0001115591987109746, "loss": 0.354, "step": 19330 }, { "epoch": 1.5530616342099979, "grad_norm": 0.23481176793575287, "learning_rate": 0.00011148401001878026, "loss": 0.3725, "step": 19340 }, { "epoch": 1.553864685806063, "grad_norm": 0.21776485443115234, "learning_rate": 0.00011140881724571342, "loss": 0.3624, "step": 19350 }, { "epoch": 1.554667737402128, "grad_norm": 0.21182815730571747, "learning_rate": 0.00011133362043909843, "loss": 0.3457, "step": 19360 }, { "epoch": 1.5554707889981931, "grad_norm": 0.22543564438819885, "learning_rate": 0.0001112584196462624, "loss": 0.3792, "step": 19370 }, { "epoch": 1.5562738405942582, "grad_norm": 0.2936418354511261, "learning_rate": 0.00011118321491453472, "loss": 0.3474, "step": 19380 }, { "epoch": 1.5570768921903233, "grad_norm": 0.2693498432636261, "learning_rate": 0.0001111080062912474, "loss": 0.3493, "step": 19390 }, { "epoch": 1.5578799437863884, "grad_norm": 0.20420540869235992, "learning_rate": 0.00011103279382373483, "loss": 0.3497, "step": 19400 }, { "epoch": 1.5586829953824535, "grad_norm": 0.2410145401954651, "learning_rate": 0.00011095757755933387, "loss": 0.3365, "step": 19410 }, { "epoch": 1.5594860469785183, "grad_norm": 0.2326405644416809, "learning_rate": 0.00011088235754538368, "loss": 0.3752, "step": 19420 }, { "epoch": 1.5602890985745834, "grad_norm": 0.3034543991088867, "learning_rate": 0.0001108071338292259, "loss": 0.3825, "step": 19430 }, { "epoch": 1.5610921501706485, "grad_norm": 0.21931292116641998, "learning_rate": 0.00011073190645820442, "loss": 0.3725, "step": 19440 }, { "epoch": 1.5618952017667134, "grad_norm": 0.21090419590473175, "learning_rate": 0.00011065667547966543, "loss": 0.385, "step": 19450 }, { "epoch": 1.5626982533627785, "grad_norm": 0.21505311131477356, "learning_rate": 0.00011058144094095744, "loss": 0.3797, "step": 19460 }, { "epoch": 1.5635013049588435, "grad_norm": 0.18967914581298828, "learning_rate": 0.00011050620288943118, "loss": 0.3495, "step": 19470 }, { "epoch": 1.5643043565549086, "grad_norm": 0.22730021178722382, "learning_rate": 0.00011043096137243958, "loss": 0.3844, "step": 19480 }, { "epoch": 1.5651074081509737, "grad_norm": 0.2218935340642929, "learning_rate": 0.00011035571643733773, "loss": 0.3489, "step": 19490 }, { "epoch": 1.5659104597470388, "grad_norm": 0.2052084505558014, "learning_rate": 0.00011028046813148291, "loss": 0.3657, "step": 19500 }, { "epoch": 1.5667135113431039, "grad_norm": 0.27507463097572327, "learning_rate": 0.00011020521650223452, "loss": 0.3564, "step": 19510 }, { "epoch": 1.567516562939169, "grad_norm": 0.19883522391319275, "learning_rate": 0.00011012996159695402, "loss": 0.3998, "step": 19520 }, { "epoch": 1.568319614535234, "grad_norm": 0.26401954889297485, "learning_rate": 0.00011005470346300494, "loss": 0.3563, "step": 19530 }, { "epoch": 1.569122666131299, "grad_norm": 0.2341492623090744, "learning_rate": 0.00010997944214775291, "loss": 0.344, "step": 19540 }, { "epoch": 1.569925717727364, "grad_norm": 0.238798588514328, "learning_rate": 0.00010990417769856545, "loss": 0.3623, "step": 19550 }, { "epoch": 1.5707287693234289, "grad_norm": 0.26482534408569336, "learning_rate": 0.00010982891016281214, "loss": 0.3786, "step": 19560 }, { "epoch": 1.571531820919494, "grad_norm": 0.22434483468532562, "learning_rate": 0.00010975363958786442, "loss": 0.3598, "step": 19570 }, { "epoch": 1.572334872515559, "grad_norm": 0.24821147322654724, "learning_rate": 0.00010967836602109576, "loss": 0.3703, "step": 19580 }, { "epoch": 1.5731379241116241, "grad_norm": 0.24609600007534027, "learning_rate": 0.0001096030895098814, "loss": 0.352, "step": 19590 }, { "epoch": 1.5739409757076892, "grad_norm": 0.2408294677734375, "learning_rate": 0.00010952781010159851, "loss": 0.3515, "step": 19600 }, { "epoch": 1.5747440273037543, "grad_norm": 0.25565239787101746, "learning_rate": 0.00010945252784362605, "loss": 0.3641, "step": 19610 }, { "epoch": 1.5755470788998194, "grad_norm": 0.23592610657215118, "learning_rate": 0.00010937724278334472, "loss": 0.365, "step": 19620 }, { "epoch": 1.5763501304958845, "grad_norm": 0.2054055631160736, "learning_rate": 0.00010930195496813711, "loss": 0.363, "step": 19630 }, { "epoch": 1.5771531820919495, "grad_norm": 0.18682993948459625, "learning_rate": 0.00010922666444538745, "loss": 0.3554, "step": 19640 }, { "epoch": 1.5779562336880144, "grad_norm": 0.24138574302196503, "learning_rate": 0.00010915137126248167, "loss": 0.3587, "step": 19650 }, { "epoch": 1.5787592852840795, "grad_norm": 0.21056433022022247, "learning_rate": 0.00010907607546680747, "loss": 0.3513, "step": 19660 }, { "epoch": 1.5795623368801446, "grad_norm": 0.21268221735954285, "learning_rate": 0.00010900077710575407, "loss": 0.3498, "step": 19670 }, { "epoch": 1.5803653884762094, "grad_norm": 0.22648605704307556, "learning_rate": 0.00010892547622671233, "loss": 0.3496, "step": 19680 }, { "epoch": 1.5811684400722745, "grad_norm": 0.18060685694217682, "learning_rate": 0.00010885017287707481, "loss": 0.3489, "step": 19690 }, { "epoch": 1.5819714916683396, "grad_norm": 0.2557713985443115, "learning_rate": 0.00010877486710423547, "loss": 0.3485, "step": 19700 }, { "epoch": 1.5827745432644047, "grad_norm": 0.22466924786567688, "learning_rate": 0.00010869955895558994, "loss": 0.3407, "step": 19710 }, { "epoch": 1.5835775948604698, "grad_norm": 0.3377053141593933, "learning_rate": 0.0001086242484785352, "loss": 0.3519, "step": 19720 }, { "epoch": 1.5843806464565349, "grad_norm": 0.2619187533855438, "learning_rate": 0.00010854893572046981, "loss": 0.3417, "step": 19730 }, { "epoch": 1.5851836980526, "grad_norm": 0.22539567947387695, "learning_rate": 0.00010847362072879367, "loss": 0.37, "step": 19740 }, { "epoch": 1.585986749648665, "grad_norm": 0.2650134563446045, "learning_rate": 0.0001083983035509082, "loss": 0.3518, "step": 19750 }, { "epoch": 1.5867898012447301, "grad_norm": 0.257466197013855, "learning_rate": 0.0001083229842342161, "loss": 0.3503, "step": 19760 }, { "epoch": 1.587592852840795, "grad_norm": 0.38637763261795044, "learning_rate": 0.00010824766282612149, "loss": 0.3555, "step": 19770 }, { "epoch": 1.58839590443686, "grad_norm": 0.2595391869544983, "learning_rate": 0.00010817233937402973, "loss": 0.3455, "step": 19780 }, { "epoch": 1.5891989560329252, "grad_norm": 0.2219323068857193, "learning_rate": 0.00010809701392534751, "loss": 0.3368, "step": 19790 }, { "epoch": 1.59000200762899, "grad_norm": 0.24162936210632324, "learning_rate": 0.00010802168652748272, "loss": 0.3433, "step": 19800 }, { "epoch": 1.590805059225055, "grad_norm": 0.23002022504806519, "learning_rate": 0.00010794635722784458, "loss": 0.3675, "step": 19810 }, { "epoch": 1.5916081108211202, "grad_norm": 0.22402970492839813, "learning_rate": 0.00010787102607384347, "loss": 0.3631, "step": 19820 }, { "epoch": 1.5924111624171853, "grad_norm": 0.24532955884933472, "learning_rate": 0.00010779569311289086, "loss": 0.3546, "step": 19830 }, { "epoch": 1.5932142140132504, "grad_norm": 0.2268253117799759, "learning_rate": 0.00010772035839239952, "loss": 0.3445, "step": 19840 }, { "epoch": 1.5940172656093154, "grad_norm": 0.2977912127971649, "learning_rate": 0.0001076450219597831, "loss": 0.3798, "step": 19850 }, { "epoch": 1.5948203172053805, "grad_norm": 0.2108163684606552, "learning_rate": 0.00010756968386245654, "loss": 0.359, "step": 19860 }, { "epoch": 1.5956233688014456, "grad_norm": 0.284542977809906, "learning_rate": 0.00010749434414783567, "loss": 0.3662, "step": 19870 }, { "epoch": 1.5964264203975107, "grad_norm": 0.22755348682403564, "learning_rate": 0.0001074190028633375, "loss": 0.3654, "step": 19880 }, { "epoch": 1.5972294719935756, "grad_norm": 0.21132920682430267, "learning_rate": 0.00010734366005637984, "loss": 0.3845, "step": 19890 }, { "epoch": 1.5980325235896407, "grad_norm": 0.1947557032108307, "learning_rate": 0.00010726831577438164, "loss": 0.3555, "step": 19900 }, { "epoch": 1.5988355751857055, "grad_norm": 0.20682880282402039, "learning_rate": 0.00010719297006476261, "loss": 0.3616, "step": 19910 }, { "epoch": 1.5996386267817706, "grad_norm": 0.2381097823381424, "learning_rate": 0.0001071176229749435, "loss": 0.3771, "step": 19920 }, { "epoch": 1.6004416783778357, "grad_norm": 0.20175214111804962, "learning_rate": 0.00010704227455234586, "loss": 0.3399, "step": 19930 }, { "epoch": 1.6012447299739008, "grad_norm": 0.26761212944984436, "learning_rate": 0.00010696692484439206, "loss": 0.3539, "step": 19940 }, { "epoch": 1.6020477815699659, "grad_norm": 0.22140289843082428, "learning_rate": 0.00010689157389850533, "loss": 0.3672, "step": 19950 }, { "epoch": 1.602850833166031, "grad_norm": 0.2282790094614029, "learning_rate": 0.00010681622176210964, "loss": 0.3714, "step": 19960 }, { "epoch": 1.603653884762096, "grad_norm": 0.22340236604213715, "learning_rate": 0.00010674086848262979, "loss": 0.3345, "step": 19970 }, { "epoch": 1.6044569363581611, "grad_norm": 0.21651864051818848, "learning_rate": 0.00010666551410749113, "loss": 0.3521, "step": 19980 }, { "epoch": 1.6052599879542262, "grad_norm": 0.23300786316394806, "learning_rate": 0.0001065901586841199, "loss": 0.3563, "step": 19990 }, { "epoch": 1.606063039550291, "grad_norm": 0.20533804595470428, "learning_rate": 0.00010651480225994284, "loss": 0.3407, "step": 20000 }, { "epoch": 1.6068660911463561, "grad_norm": 0.20314735174179077, "learning_rate": 0.00010643944488238741, "loss": 0.3457, "step": 20010 }, { "epoch": 1.6076691427424212, "grad_norm": 0.2376837432384491, "learning_rate": 0.00010636408659888164, "loss": 0.3535, "step": 20020 }, { "epoch": 1.608472194338486, "grad_norm": 0.21849752962589264, "learning_rate": 0.00010628872745685414, "loss": 0.3504, "step": 20030 }, { "epoch": 1.6092752459345512, "grad_norm": 0.2369389683008194, "learning_rate": 0.00010621336750373408, "loss": 0.365, "step": 20040 }, { "epoch": 1.6100782975306163, "grad_norm": 0.22305577993392944, "learning_rate": 0.00010613800678695107, "loss": 0.3563, "step": 20050 }, { "epoch": 1.6108813491266814, "grad_norm": 0.22452972829341888, "learning_rate": 0.00010606264535393524, "loss": 0.3662, "step": 20060 }, { "epoch": 1.6116844007227464, "grad_norm": 0.2584066689014435, "learning_rate": 0.00010598728325211724, "loss": 0.3532, "step": 20070 }, { "epoch": 1.6124874523188115, "grad_norm": 0.22867850959300995, "learning_rate": 0.000105911920528928, "loss": 0.3792, "step": 20080 }, { "epoch": 1.6132905039148766, "grad_norm": 0.23886001110076904, "learning_rate": 0.000105836557231799, "loss": 0.3362, "step": 20090 }, { "epoch": 1.6140935555109417, "grad_norm": 0.2330983728170395, "learning_rate": 0.00010576119340816196, "loss": 0.3483, "step": 20100 }, { "epoch": 1.6148966071070068, "grad_norm": 0.2546873986721039, "learning_rate": 0.00010568582910544896, "loss": 0.3622, "step": 20110 }, { "epoch": 1.6156996587030716, "grad_norm": 0.18796178698539734, "learning_rate": 0.00010561046437109239, "loss": 0.3698, "step": 20120 }, { "epoch": 1.6165027102991367, "grad_norm": 0.2027808576822281, "learning_rate": 0.0001055350992525249, "loss": 0.3416, "step": 20130 }, { "epoch": 1.6173057618952018, "grad_norm": 0.21678213775157928, "learning_rate": 0.00010545973379717947, "loss": 0.3625, "step": 20140 }, { "epoch": 1.6181088134912667, "grad_norm": 0.2177809625864029, "learning_rate": 0.00010538436805248914, "loss": 0.3408, "step": 20150 }, { "epoch": 1.6189118650873318, "grad_norm": 0.22382032871246338, "learning_rate": 0.00010530900206588727, "loss": 0.3532, "step": 20160 }, { "epoch": 1.6197149166833968, "grad_norm": 0.24277783930301666, "learning_rate": 0.00010523363588480728, "loss": 0.3417, "step": 20170 }, { "epoch": 1.620517968279462, "grad_norm": 0.24982421100139618, "learning_rate": 0.00010515826955668277, "loss": 0.3806, "step": 20180 }, { "epoch": 1.621321019875527, "grad_norm": 0.2175341248512268, "learning_rate": 0.00010508290312894735, "loss": 0.3599, "step": 20190 }, { "epoch": 1.622124071471592, "grad_norm": 0.25291121006011963, "learning_rate": 0.0001050075366490348, "loss": 0.3517, "step": 20200 }, { "epoch": 1.6229271230676572, "grad_norm": 0.20610889792442322, "learning_rate": 0.00010493217016437889, "loss": 0.3257, "step": 20210 }, { "epoch": 1.6237301746637223, "grad_norm": 0.26363039016723633, "learning_rate": 0.00010485680372241336, "loss": 0.3644, "step": 20220 }, { "epoch": 1.6245332262597874, "grad_norm": 0.2796023488044739, "learning_rate": 0.00010478143737057198, "loss": 0.3631, "step": 20230 }, { "epoch": 1.6253362778558522, "grad_norm": 0.21730650961399078, "learning_rate": 0.00010470607115628839, "loss": 0.376, "step": 20240 }, { "epoch": 1.6261393294519173, "grad_norm": 0.2713567912578583, "learning_rate": 0.00010463070512699617, "loss": 0.3396, "step": 20250 }, { "epoch": 1.6269423810479822, "grad_norm": 0.20855602622032166, "learning_rate": 0.00010455533933012886, "loss": 0.3655, "step": 20260 }, { "epoch": 1.6277454326440473, "grad_norm": 0.2624019682407379, "learning_rate": 0.00010447997381311982, "loss": 0.3797, "step": 20270 }, { "epoch": 1.6285484842401123, "grad_norm": 0.2667376399040222, "learning_rate": 0.00010440460862340211, "loss": 0.3716, "step": 20280 }, { "epoch": 1.6293515358361774, "grad_norm": 0.2661930024623871, "learning_rate": 0.00010432924380840876, "loss": 0.374, "step": 20290 }, { "epoch": 1.6301545874322425, "grad_norm": 0.22487317025661469, "learning_rate": 0.00010425387941557245, "loss": 0.36, "step": 20300 }, { "epoch": 1.6309576390283076, "grad_norm": 0.22290433943271637, "learning_rate": 0.00010417851549232563, "loss": 0.3463, "step": 20310 }, { "epoch": 1.6317606906243727, "grad_norm": 0.2038174569606781, "learning_rate": 0.00010410315208610042, "loss": 0.3387, "step": 20320 }, { "epoch": 1.6325637422204378, "grad_norm": 0.22879064083099365, "learning_rate": 0.0001040277892443287, "loss": 0.3449, "step": 20330 }, { "epoch": 1.6333667938165028, "grad_norm": 0.2338641732931137, "learning_rate": 0.00010395242701444199, "loss": 0.3681, "step": 20340 }, { "epoch": 1.6341698454125677, "grad_norm": 0.18378420174121857, "learning_rate": 0.00010387706544387126, "loss": 0.3435, "step": 20350 }, { "epoch": 1.6349728970086328, "grad_norm": 0.24389775097370148, "learning_rate": 0.00010380170458004728, "loss": 0.3925, "step": 20360 }, { "epoch": 1.6357759486046979, "grad_norm": 0.22245857119560242, "learning_rate": 0.00010372634447040019, "loss": 0.3347, "step": 20370 }, { "epoch": 1.6365790002007627, "grad_norm": 0.2272493988275528, "learning_rate": 0.00010365098516235986, "loss": 0.3572, "step": 20380 }, { "epoch": 1.6373820517968278, "grad_norm": 0.22848866879940033, "learning_rate": 0.00010357562670335543, "loss": 0.3705, "step": 20390 }, { "epoch": 1.638185103392893, "grad_norm": 0.218630850315094, "learning_rate": 0.00010350026914081568, "loss": 0.3839, "step": 20400 }, { "epoch": 1.638988154988958, "grad_norm": 0.34791046380996704, "learning_rate": 0.00010342491252216875, "loss": 0.3697, "step": 20410 }, { "epoch": 1.639791206585023, "grad_norm": 0.2211252599954605, "learning_rate": 0.0001033495568948422, "loss": 0.3592, "step": 20420 }, { "epoch": 1.6405942581810882, "grad_norm": 0.22575163841247559, "learning_rate": 0.0001032742023062629, "loss": 0.359, "step": 20430 }, { "epoch": 1.6413973097771533, "grad_norm": 0.22633859515190125, "learning_rate": 0.0001031988488038572, "loss": 0.355, "step": 20440 }, { "epoch": 1.6422003613732183, "grad_norm": 0.23095111548900604, "learning_rate": 0.00010312349643505066, "loss": 0.3521, "step": 20450 }, { "epoch": 1.6430034129692834, "grad_norm": 0.22721755504608154, "learning_rate": 0.00010304814524726824, "loss": 0.367, "step": 20460 }, { "epoch": 1.6438064645653483, "grad_norm": 0.2161111682653427, "learning_rate": 0.00010297279528793395, "loss": 0.3719, "step": 20470 }, { "epoch": 1.6446095161614134, "grad_norm": 0.25198647379875183, "learning_rate": 0.00010289744660447127, "loss": 0.3372, "step": 20480 }, { "epoch": 1.6454125677574785, "grad_norm": 0.22442254424095154, "learning_rate": 0.00010282209924430273, "loss": 0.3609, "step": 20490 }, { "epoch": 1.6462156193535433, "grad_norm": 0.2423294335603714, "learning_rate": 0.00010274675325485, "loss": 0.342, "step": 20500 }, { "epoch": 1.6470186709496084, "grad_norm": 0.23485930263996124, "learning_rate": 0.00010267140868353398, "loss": 0.3535, "step": 20510 }, { "epoch": 1.6478217225456735, "grad_norm": 0.22211475670337677, "learning_rate": 0.00010259606557777467, "loss": 0.3718, "step": 20520 }, { "epoch": 1.6486247741417386, "grad_norm": 0.23540081083774567, "learning_rate": 0.00010252072398499115, "loss": 0.3776, "step": 20530 }, { "epoch": 1.6494278257378037, "grad_norm": 0.2294946014881134, "learning_rate": 0.00010244538395260144, "loss": 0.3618, "step": 20540 }, { "epoch": 1.6502308773338688, "grad_norm": 0.22059917449951172, "learning_rate": 0.00010237004552802271, "loss": 0.3596, "step": 20550 }, { "epoch": 1.6510339289299338, "grad_norm": 0.26206329464912415, "learning_rate": 0.00010229470875867104, "loss": 0.3655, "step": 20560 }, { "epoch": 1.651836980525999, "grad_norm": 0.2479095458984375, "learning_rate": 0.00010221937369196155, "loss": 0.3857, "step": 20570 }, { "epoch": 1.652640032122064, "grad_norm": 0.2313258796930313, "learning_rate": 0.00010214404037530813, "loss": 0.3607, "step": 20580 }, { "epoch": 1.6534430837181289, "grad_norm": 0.23578694462776184, "learning_rate": 0.00010206870885612377, "loss": 0.3628, "step": 20590 }, { "epoch": 1.654246135314194, "grad_norm": 0.21331045031547546, "learning_rate": 0.00010199337918182018, "loss": 0.3281, "step": 20600 }, { "epoch": 1.6550491869102588, "grad_norm": 0.22536399960517883, "learning_rate": 0.00010191805139980799, "loss": 0.3515, "step": 20610 }, { "epoch": 1.655852238506324, "grad_norm": 0.21999405324459076, "learning_rate": 0.00010184272555749655, "loss": 0.3528, "step": 20620 }, { "epoch": 1.656655290102389, "grad_norm": 0.23177073895931244, "learning_rate": 0.00010176740170229409, "loss": 0.3543, "step": 20630 }, { "epoch": 1.657458341698454, "grad_norm": 0.2553349733352661, "learning_rate": 0.00010169207988160758, "loss": 0.3615, "step": 20640 }, { "epoch": 1.6582613932945192, "grad_norm": 0.21935677528381348, "learning_rate": 0.0001016167601428426, "loss": 0.3618, "step": 20650 }, { "epoch": 1.6590644448905842, "grad_norm": 0.23939910531044006, "learning_rate": 0.00010154144253340351, "loss": 0.3391, "step": 20660 }, { "epoch": 1.6598674964866493, "grad_norm": 0.22547832131385803, "learning_rate": 0.00010146612710069338, "loss": 0.354, "step": 20670 }, { "epoch": 1.6606705480827144, "grad_norm": 0.204453706741333, "learning_rate": 0.00010139081389211385, "loss": 0.3653, "step": 20680 }, { "epoch": 1.6614735996787795, "grad_norm": 0.26727473735809326, "learning_rate": 0.00010131550295506505, "loss": 0.3814, "step": 20690 }, { "epoch": 1.6622766512748444, "grad_norm": 0.192289799451828, "learning_rate": 0.00010124019433694583, "loss": 0.3299, "step": 20700 }, { "epoch": 1.6630797028709094, "grad_norm": 0.23895664513111115, "learning_rate": 0.00010116488808515355, "loss": 0.3409, "step": 20710 }, { "epoch": 1.6638827544669745, "grad_norm": 0.22268058359622955, "learning_rate": 0.00010108958424708412, "loss": 0.3491, "step": 20720 }, { "epoch": 1.6646858060630394, "grad_norm": 0.23104093968868256, "learning_rate": 0.0001010142828701318, "loss": 0.3605, "step": 20730 }, { "epoch": 1.6654888576591045, "grad_norm": 0.2057337909936905, "learning_rate": 0.00010093898400168939, "loss": 0.3708, "step": 20740 }, { "epoch": 1.6662919092551696, "grad_norm": 0.20756718516349792, "learning_rate": 0.00010086368768914814, "loss": 0.3687, "step": 20750 }, { "epoch": 1.6670949608512347, "grad_norm": 0.1979016214609146, "learning_rate": 0.00010078839397989763, "loss": 0.316, "step": 20760 }, { "epoch": 1.6678980124472997, "grad_norm": 0.22611697018146515, "learning_rate": 0.00010071310292132581, "loss": 0.346, "step": 20770 }, { "epoch": 1.6687010640433648, "grad_norm": 0.22984950244426727, "learning_rate": 0.00010063781456081898, "loss": 0.3503, "step": 20780 }, { "epoch": 1.66950411563943, "grad_norm": 0.19159242510795593, "learning_rate": 0.00010056252894576175, "loss": 0.3384, "step": 20790 }, { "epoch": 1.670307167235495, "grad_norm": 0.2034764140844345, "learning_rate": 0.00010048724612353701, "loss": 0.3419, "step": 20800 }, { "epoch": 1.67111021883156, "grad_norm": 0.2464211881160736, "learning_rate": 0.00010041196614152582, "loss": 0.3332, "step": 20810 }, { "epoch": 1.671913270427625, "grad_norm": 0.2674948573112488, "learning_rate": 0.00010033668904710751, "loss": 0.3625, "step": 20820 }, { "epoch": 1.67271632202369, "grad_norm": 0.18939973413944244, "learning_rate": 0.00010026141488765964, "loss": 0.3535, "step": 20830 }, { "epoch": 1.6735193736197551, "grad_norm": 0.20562082529067993, "learning_rate": 0.00010018614371055781, "loss": 0.3557, "step": 20840 }, { "epoch": 1.67432242521582, "grad_norm": 0.2507036328315735, "learning_rate": 0.00010011087556317582, "loss": 0.3406, "step": 20850 }, { "epoch": 1.675125476811885, "grad_norm": 0.23387818038463593, "learning_rate": 0.00010003561049288557, "loss": 0.3831, "step": 20860 }, { "epoch": 1.6759285284079501, "grad_norm": 0.1867886632680893, "learning_rate": 9.996034854705699e-05, "loss": 0.3445, "step": 20870 }, { "epoch": 1.6767315800040152, "grad_norm": 0.19693128764629364, "learning_rate": 9.9885089773058e-05, "loss": 0.3287, "step": 20880 }, { "epoch": 1.6775346316000803, "grad_norm": 0.246832937002182, "learning_rate": 9.980983421825462e-05, "loss": 0.3605, "step": 20890 }, { "epoch": 1.6783376831961454, "grad_norm": 0.22004716098308563, "learning_rate": 9.973458193001082e-05, "loss": 0.3549, "step": 20900 }, { "epoch": 1.6791407347922105, "grad_norm": 0.23711735010147095, "learning_rate": 9.96593329556885e-05, "loss": 0.3633, "step": 20910 }, { "epoch": 1.6799437863882756, "grad_norm": 0.2357361614704132, "learning_rate": 9.958408734264738e-05, "loss": 0.3618, "step": 20920 }, { "epoch": 1.6807468379843407, "grad_norm": 0.21374905109405518, "learning_rate": 9.950884513824527e-05, "loss": 0.379, "step": 20930 }, { "epoch": 1.6815498895804055, "grad_norm": 0.21120992302894592, "learning_rate": 9.94336063898376e-05, "loss": 0.3563, "step": 20940 }, { "epoch": 1.6823529411764706, "grad_norm": 0.25473085045814514, "learning_rate": 9.935837114477787e-05, "loss": 0.3378, "step": 20950 }, { "epoch": 1.6831559927725355, "grad_norm": 0.2114132046699524, "learning_rate": 9.928313945041713e-05, "loss": 0.376, "step": 20960 }, { "epoch": 1.6839590443686006, "grad_norm": 0.25723016262054443, "learning_rate": 9.920791135410435e-05, "loss": 0.3792, "step": 20970 }, { "epoch": 1.6847620959646656, "grad_norm": 0.25996133685112, "learning_rate": 9.913268690318627e-05, "loss": 0.377, "step": 20980 }, { "epoch": 1.6855651475607307, "grad_norm": 0.20694802701473236, "learning_rate": 9.905746614500717e-05, "loss": 0.3718, "step": 20990 }, { "epoch": 1.6863681991567958, "grad_norm": 0.23376159369945526, "learning_rate": 9.898224912690913e-05, "loss": 0.358, "step": 21000 }, { "epoch": 1.687171250752861, "grad_norm": 0.2547830045223236, "learning_rate": 9.890703589623184e-05, "loss": 0.353, "step": 21010 }, { "epoch": 1.687974302348926, "grad_norm": 0.22279663383960724, "learning_rate": 9.883182650031265e-05, "loss": 0.3351, "step": 21020 }, { "epoch": 1.688777353944991, "grad_norm": 0.2035725712776184, "learning_rate": 9.875662098648641e-05, "loss": 0.3473, "step": 21030 }, { "epoch": 1.6895804055410562, "grad_norm": 0.24942292273044586, "learning_rate": 9.868141940208558e-05, "loss": 0.3838, "step": 21040 }, { "epoch": 1.690383457137121, "grad_norm": 0.22124066948890686, "learning_rate": 9.86062217944402e-05, "loss": 0.361, "step": 21050 }, { "epoch": 1.691186508733186, "grad_norm": 0.2014891654253006, "learning_rate": 9.853102821087771e-05, "loss": 0.3437, "step": 21060 }, { "epoch": 1.6919895603292512, "grad_norm": 0.2280520796775818, "learning_rate": 9.845583869872301e-05, "loss": 0.3349, "step": 21070 }, { "epoch": 1.692792611925316, "grad_norm": 0.21021735668182373, "learning_rate": 9.838065330529855e-05, "loss": 0.3552, "step": 21080 }, { "epoch": 1.6935956635213811, "grad_norm": 0.26529762148857117, "learning_rate": 9.830547207792408e-05, "loss": 0.3572, "step": 21090 }, { "epoch": 1.6943987151174462, "grad_norm": 0.23544450104236603, "learning_rate": 9.823029506391684e-05, "loss": 0.3546, "step": 21100 }, { "epoch": 1.6952017667135113, "grad_norm": 0.21531236171722412, "learning_rate": 9.815512231059127e-05, "loss": 0.3404, "step": 21110 }, { "epoch": 1.6960048183095764, "grad_norm": 0.3488386869430542, "learning_rate": 9.807995386525922e-05, "loss": 0.3585, "step": 21120 }, { "epoch": 1.6968078699056415, "grad_norm": 0.23662441968917847, "learning_rate": 9.800478977522984e-05, "loss": 0.3712, "step": 21130 }, { "epoch": 1.6976109215017066, "grad_norm": 0.21003665030002594, "learning_rate": 9.792963008780953e-05, "loss": 0.3244, "step": 21140 }, { "epoch": 1.6984139730977716, "grad_norm": 0.2737422585487366, "learning_rate": 9.785447485030186e-05, "loss": 0.3712, "step": 21150 }, { "epoch": 1.6992170246938367, "grad_norm": 0.2392003983259201, "learning_rate": 9.777932411000763e-05, "loss": 0.3631, "step": 21160 }, { "epoch": 1.7000200762899016, "grad_norm": 0.24967364966869354, "learning_rate": 9.770417791422491e-05, "loss": 0.3468, "step": 21170 }, { "epoch": 1.7008231278859667, "grad_norm": 0.21049346029758453, "learning_rate": 9.762903631024875e-05, "loss": 0.3432, "step": 21180 }, { "epoch": 1.7016261794820318, "grad_norm": 0.2500723898410797, "learning_rate": 9.755389934537137e-05, "loss": 0.3763, "step": 21190 }, { "epoch": 1.7024292310780966, "grad_norm": 0.2505439817905426, "learning_rate": 9.747876706688212e-05, "loss": 0.3636, "step": 21200 }, { "epoch": 1.7032322826741617, "grad_norm": 0.24385294318199158, "learning_rate": 9.740363952206739e-05, "loss": 0.347, "step": 21210 }, { "epoch": 1.7040353342702268, "grad_norm": 0.2645030617713928, "learning_rate": 9.732851675821047e-05, "loss": 0.3375, "step": 21220 }, { "epoch": 1.7048383858662919, "grad_norm": 0.21130266785621643, "learning_rate": 9.72533988225918e-05, "loss": 0.3474, "step": 21230 }, { "epoch": 1.705641437462357, "grad_norm": 0.23759393393993378, "learning_rate": 9.717828576248873e-05, "loss": 0.3388, "step": 21240 }, { "epoch": 1.706444489058422, "grad_norm": 0.2236267477273941, "learning_rate": 9.710317762517552e-05, "loss": 0.352, "step": 21250 }, { "epoch": 1.7072475406544871, "grad_norm": 0.2540056109428406, "learning_rate": 9.702807445792327e-05, "loss": 0.3596, "step": 21260 }, { "epoch": 1.7080505922505522, "grad_norm": 0.23096424341201782, "learning_rate": 9.69529763080001e-05, "loss": 0.3274, "step": 21270 }, { "epoch": 1.7088536438466173, "grad_norm": 0.2289983481168747, "learning_rate": 9.687788322267086e-05, "loss": 0.3635, "step": 21280 }, { "epoch": 1.7096566954426822, "grad_norm": 0.24481850862503052, "learning_rate": 9.680279524919729e-05, "loss": 0.3576, "step": 21290 }, { "epoch": 1.7104597470387473, "grad_norm": 0.22816714644432068, "learning_rate": 9.67277124348378e-05, "loss": 0.3435, "step": 21300 }, { "epoch": 1.7112627986348121, "grad_norm": 0.25592148303985596, "learning_rate": 9.665263482684765e-05, "loss": 0.3539, "step": 21310 }, { "epoch": 1.7120658502308772, "grad_norm": 0.2397242784500122, "learning_rate": 9.657756247247882e-05, "loss": 0.3454, "step": 21320 }, { "epoch": 1.7128689018269423, "grad_norm": 0.2593490481376648, "learning_rate": 9.650249541897991e-05, "loss": 0.3816, "step": 21330 }, { "epoch": 1.7136719534230074, "grad_norm": 0.28466448187828064, "learning_rate": 9.642743371359625e-05, "loss": 0.3667, "step": 21340 }, { "epoch": 1.7144750050190725, "grad_norm": 0.30005940794944763, "learning_rate": 9.63523774035698e-05, "loss": 0.3498, "step": 21350 }, { "epoch": 1.7152780566151375, "grad_norm": 0.20771978795528412, "learning_rate": 9.627732653613912e-05, "loss": 0.3438, "step": 21360 }, { "epoch": 1.7160811082112026, "grad_norm": 0.19618244469165802, "learning_rate": 9.620228115853932e-05, "loss": 0.353, "step": 21370 }, { "epoch": 1.7168841598072677, "grad_norm": 0.22194652259349823, "learning_rate": 9.612724131800204e-05, "loss": 0.3684, "step": 21380 }, { "epoch": 1.7176872114033328, "grad_norm": 0.24738769233226776, "learning_rate": 9.60522070617555e-05, "loss": 0.3594, "step": 21390 }, { "epoch": 1.7184902629993977, "grad_norm": 0.2682954668998718, "learning_rate": 9.59771784370244e-05, "loss": 0.3574, "step": 21400 }, { "epoch": 1.7192933145954628, "grad_norm": 0.22649361193180084, "learning_rate": 9.590215549102976e-05, "loss": 0.3677, "step": 21410 }, { "epoch": 1.7200963661915278, "grad_norm": 0.20013847947120667, "learning_rate": 9.582713827098922e-05, "loss": 0.3638, "step": 21420 }, { "epoch": 1.7208994177875927, "grad_norm": 0.22820502519607544, "learning_rate": 9.575212682411673e-05, "loss": 0.3722, "step": 21430 }, { "epoch": 1.7217024693836578, "grad_norm": 0.22874954342842102, "learning_rate": 9.567712119762262e-05, "loss": 0.3717, "step": 21440 }, { "epoch": 1.7225055209797229, "grad_norm": 0.21984310448169708, "learning_rate": 9.560212143871344e-05, "loss": 0.3423, "step": 21450 }, { "epoch": 1.723308572575788, "grad_norm": 0.23317809402942657, "learning_rate": 9.552712759459221e-05, "loss": 0.3499, "step": 21460 }, { "epoch": 1.724111624171853, "grad_norm": 0.22859571874141693, "learning_rate": 9.545213971245818e-05, "loss": 0.3755, "step": 21470 }, { "epoch": 1.7249146757679181, "grad_norm": 0.23624634742736816, "learning_rate": 9.537715783950686e-05, "loss": 0.3519, "step": 21480 }, { "epoch": 1.7257177273639832, "grad_norm": 0.2251552939414978, "learning_rate": 9.530218202292988e-05, "loss": 0.3391, "step": 21490 }, { "epoch": 1.7265207789600483, "grad_norm": 0.2530650198459625, "learning_rate": 9.522721230991516e-05, "loss": 0.361, "step": 21500 }, { "epoch": 1.7273238305561134, "grad_norm": 0.25287941098213196, "learning_rate": 9.515224874764679e-05, "loss": 0.3667, "step": 21510 }, { "epoch": 1.7281268821521782, "grad_norm": 0.283319354057312, "learning_rate": 9.507729138330484e-05, "loss": 0.3656, "step": 21520 }, { "epoch": 1.7289299337482433, "grad_norm": 0.23311743140220642, "learning_rate": 9.500234026406569e-05, "loss": 0.3611, "step": 21530 }, { "epoch": 1.7297329853443084, "grad_norm": 0.18277983367443085, "learning_rate": 9.492739543710162e-05, "loss": 0.362, "step": 21540 }, { "epoch": 1.7305360369403733, "grad_norm": 0.2600090801715851, "learning_rate": 9.48524569495811e-05, "loss": 0.3531, "step": 21550 }, { "epoch": 1.7313390885364384, "grad_norm": 0.21518392860889435, "learning_rate": 9.477752484866844e-05, "loss": 0.3634, "step": 21560 }, { "epoch": 1.7321421401325034, "grad_norm": 0.18309509754180908, "learning_rate": 9.470259918152405e-05, "loss": 0.3489, "step": 21570 }, { "epoch": 1.7329451917285685, "grad_norm": 0.22069819271564484, "learning_rate": 9.462767999530422e-05, "loss": 0.3523, "step": 21580 }, { "epoch": 1.7337482433246336, "grad_norm": 0.22716714441776276, "learning_rate": 9.45527673371613e-05, "loss": 0.3457, "step": 21590 }, { "epoch": 1.7345512949206987, "grad_norm": 0.24055568873882294, "learning_rate": 9.447786125424331e-05, "loss": 0.3612, "step": 21600 }, { "epoch": 1.7353543465167638, "grad_norm": 0.20122528076171875, "learning_rate": 9.44029617936943e-05, "loss": 0.3481, "step": 21610 }, { "epoch": 1.7361573981128289, "grad_norm": 0.2335854172706604, "learning_rate": 9.432806900265412e-05, "loss": 0.3472, "step": 21620 }, { "epoch": 1.736960449708894, "grad_norm": 0.2302168756723404, "learning_rate": 9.42531829282584e-05, "loss": 0.3353, "step": 21630 }, { "epoch": 1.7377635013049588, "grad_norm": 0.20904570817947388, "learning_rate": 9.417830361763854e-05, "loss": 0.3536, "step": 21640 }, { "epoch": 1.738566552901024, "grad_norm": 0.2382701188325882, "learning_rate": 9.410343111792165e-05, "loss": 0.3799, "step": 21650 }, { "epoch": 1.7393696044970888, "grad_norm": 0.2240576148033142, "learning_rate": 9.402856547623069e-05, "loss": 0.3714, "step": 21660 }, { "epoch": 1.7401726560931539, "grad_norm": 0.22663499414920807, "learning_rate": 9.395370673968411e-05, "loss": 0.3734, "step": 21670 }, { "epoch": 1.740975707689219, "grad_norm": 0.2177506536245346, "learning_rate": 9.387885495539619e-05, "loss": 0.3488, "step": 21680 }, { "epoch": 1.741778759285284, "grad_norm": 0.2506709098815918, "learning_rate": 9.380401017047668e-05, "loss": 0.3606, "step": 21690 }, { "epoch": 1.7425818108813491, "grad_norm": 0.23382815718650818, "learning_rate": 9.37291724320311e-05, "loss": 0.3554, "step": 21700 }, { "epoch": 1.7433848624774142, "grad_norm": 0.24811317026615143, "learning_rate": 9.365434178716034e-05, "loss": 0.3716, "step": 21710 }, { "epoch": 1.7441879140734793, "grad_norm": 0.20578646659851074, "learning_rate": 9.357951828296096e-05, "loss": 0.3761, "step": 21720 }, { "epoch": 1.7449909656695444, "grad_norm": 0.2063874751329422, "learning_rate": 9.350470196652498e-05, "loss": 0.352, "step": 21730 }, { "epoch": 1.7457940172656095, "grad_norm": 0.25280505418777466, "learning_rate": 9.342989288493998e-05, "loss": 0.3609, "step": 21740 }, { "epoch": 1.7465970688616743, "grad_norm": 0.22083832323551178, "learning_rate": 9.335509108528881e-05, "loss": 0.3519, "step": 21750 }, { "epoch": 1.7474001204577394, "grad_norm": 0.23083768784999847, "learning_rate": 9.328029661464985e-05, "loss": 0.3667, "step": 21760 }, { "epoch": 1.7482031720538045, "grad_norm": 0.19731494784355164, "learning_rate": 9.320550952009688e-05, "loss": 0.3419, "step": 21770 }, { "epoch": 1.7490062236498694, "grad_norm": 0.28434306383132935, "learning_rate": 9.313072984869905e-05, "loss": 0.3706, "step": 21780 }, { "epoch": 1.7498092752459344, "grad_norm": 0.18695014715194702, "learning_rate": 9.305595764752073e-05, "loss": 0.3358, "step": 21790 }, { "epoch": 1.7506123268419995, "grad_norm": 0.23911550641059875, "learning_rate": 9.29811929636217e-05, "loss": 0.3456, "step": 21800 }, { "epoch": 1.7514153784380646, "grad_norm": 0.2290857434272766, "learning_rate": 9.290643584405697e-05, "loss": 0.3665, "step": 21810 }, { "epoch": 1.7522184300341297, "grad_norm": 0.2196226716041565, "learning_rate": 9.283168633587678e-05, "loss": 0.3531, "step": 21820 }, { "epoch": 1.7530214816301948, "grad_norm": 0.20125652849674225, "learning_rate": 9.275694448612657e-05, "loss": 0.354, "step": 21830 }, { "epoch": 1.7538245332262599, "grad_norm": 0.24214617908000946, "learning_rate": 9.268221034184697e-05, "loss": 0.3682, "step": 21840 }, { "epoch": 1.754627584822325, "grad_norm": 0.2836051881313324, "learning_rate": 9.260748395007385e-05, "loss": 0.3736, "step": 21850 }, { "epoch": 1.75543063641839, "grad_norm": 0.2662922441959381, "learning_rate": 9.253276535783801e-05, "loss": 0.362, "step": 21860 }, { "epoch": 1.756233688014455, "grad_norm": 0.28595948219299316, "learning_rate": 9.24580546121655e-05, "loss": 0.386, "step": 21870 }, { "epoch": 1.75703673961052, "grad_norm": 0.23771610856056213, "learning_rate": 9.238335176007738e-05, "loss": 0.3589, "step": 21880 }, { "epoch": 1.757839791206585, "grad_norm": 0.24067102372646332, "learning_rate": 9.230865684858975e-05, "loss": 0.3695, "step": 21890 }, { "epoch": 1.75864284280265, "grad_norm": 0.2023199498653412, "learning_rate": 9.223396992471367e-05, "loss": 0.3606, "step": 21900 }, { "epoch": 1.759445894398715, "grad_norm": 0.2203502506017685, "learning_rate": 9.215929103545524e-05, "loss": 0.3607, "step": 21910 }, { "epoch": 1.76024894599478, "grad_norm": 0.21028228104114532, "learning_rate": 9.208462022781544e-05, "loss": 0.3737, "step": 21920 }, { "epoch": 1.7610519975908452, "grad_norm": 0.24282899498939514, "learning_rate": 9.200995754879028e-05, "loss": 0.3723, "step": 21930 }, { "epoch": 1.7618550491869103, "grad_norm": 0.20173656940460205, "learning_rate": 9.193530304537047e-05, "loss": 0.3234, "step": 21940 }, { "epoch": 1.7626581007829754, "grad_norm": 0.23165814578533173, "learning_rate": 9.18606567645417e-05, "loss": 0.3487, "step": 21950 }, { "epoch": 1.7634611523790404, "grad_norm": 0.2478218376636505, "learning_rate": 9.178601875328447e-05, "loss": 0.3305, "step": 21960 }, { "epoch": 1.7642642039751055, "grad_norm": 0.21844498813152313, "learning_rate": 9.171138905857408e-05, "loss": 0.3493, "step": 21970 }, { "epoch": 1.7650672555711706, "grad_norm": 0.26850003004074097, "learning_rate": 9.163676772738054e-05, "loss": 0.3798, "step": 21980 }, { "epoch": 1.7658703071672355, "grad_norm": 0.21571029722690582, "learning_rate": 9.156215480666865e-05, "loss": 0.3281, "step": 21990 }, { "epoch": 1.7666733587633006, "grad_norm": 0.21801888942718506, "learning_rate": 9.148755034339796e-05, "loss": 0.3504, "step": 22000 }, { "epoch": 1.7674764103593654, "grad_norm": 0.24234731495380402, "learning_rate": 9.141295438452255e-05, "loss": 0.3595, "step": 22010 }, { "epoch": 1.7682794619554305, "grad_norm": 0.22811876237392426, "learning_rate": 9.133836697699125e-05, "loss": 0.3581, "step": 22020 }, { "epoch": 1.7690825135514956, "grad_norm": 0.20280469954013824, "learning_rate": 9.126378816774754e-05, "loss": 0.3716, "step": 22030 }, { "epoch": 1.7698855651475607, "grad_norm": 0.22133313119411469, "learning_rate": 9.118921800372945e-05, "loss": 0.3552, "step": 22040 }, { "epoch": 1.7706886167436258, "grad_norm": 0.2021932452917099, "learning_rate": 9.111465653186949e-05, "loss": 0.3656, "step": 22050 }, { "epoch": 1.7714916683396909, "grad_norm": 0.22263488173484802, "learning_rate": 9.104010379909484e-05, "loss": 0.331, "step": 22060 }, { "epoch": 1.772294719935756, "grad_norm": 0.19840997457504272, "learning_rate": 9.096555985232706e-05, "loss": 0.3806, "step": 22070 }, { "epoch": 1.773097771531821, "grad_norm": 0.18405921757221222, "learning_rate": 9.089102473848231e-05, "loss": 0.3468, "step": 22080 }, { "epoch": 1.773900823127886, "grad_norm": 0.22060248255729675, "learning_rate": 9.081649850447103e-05, "loss": 0.3421, "step": 22090 }, { "epoch": 1.774703874723951, "grad_norm": 0.2756880223751068, "learning_rate": 9.074198119719816e-05, "loss": 0.3507, "step": 22100 }, { "epoch": 1.775506926320016, "grad_norm": 0.24122105538845062, "learning_rate": 9.066747286356305e-05, "loss": 0.3409, "step": 22110 }, { "epoch": 1.7763099779160811, "grad_norm": 0.22113178670406342, "learning_rate": 9.059297355045941e-05, "loss": 0.3507, "step": 22120 }, { "epoch": 1.777113029512146, "grad_norm": 0.22778575122356415, "learning_rate": 9.051848330477515e-05, "loss": 0.3418, "step": 22130 }, { "epoch": 1.777916081108211, "grad_norm": 0.2502182126045227, "learning_rate": 9.044400217339255e-05, "loss": 0.3428, "step": 22140 }, { "epoch": 1.7787191327042762, "grad_norm": 0.2641085982322693, "learning_rate": 9.03695302031882e-05, "loss": 0.3651, "step": 22150 }, { "epoch": 1.7795221843003413, "grad_norm": 0.23941686749458313, "learning_rate": 9.029506744103292e-05, "loss": 0.3695, "step": 22160 }, { "epoch": 1.7803252358964063, "grad_norm": 0.24728423357009888, "learning_rate": 9.02206139337916e-05, "loss": 0.3341, "step": 22170 }, { "epoch": 1.7811282874924714, "grad_norm": 0.20256154239177704, "learning_rate": 9.014616972832346e-05, "loss": 0.3403, "step": 22180 }, { "epoch": 1.7819313390885365, "grad_norm": 0.21633243560791016, "learning_rate": 9.007173487148181e-05, "loss": 0.3539, "step": 22190 }, { "epoch": 1.7827343906846016, "grad_norm": 0.21422742307186127, "learning_rate": 8.999730941011404e-05, "loss": 0.3425, "step": 22200 }, { "epoch": 1.7835374422806667, "grad_norm": 0.20312748849391937, "learning_rate": 8.992289339106167e-05, "loss": 0.31, "step": 22210 }, { "epoch": 1.7843404938767315, "grad_norm": 0.1958518922328949, "learning_rate": 8.984848686116026e-05, "loss": 0.3165, "step": 22220 }, { "epoch": 1.7851435454727966, "grad_norm": 0.2386859953403473, "learning_rate": 8.977408986723949e-05, "loss": 0.3559, "step": 22230 }, { "epoch": 1.7859465970688617, "grad_norm": 0.23799726366996765, "learning_rate": 8.969970245612282e-05, "loss": 0.3884, "step": 22240 }, { "epoch": 1.7867496486649266, "grad_norm": 0.23579570651054382, "learning_rate": 8.96253246746279e-05, "loss": 0.3422, "step": 22250 }, { "epoch": 1.7875527002609917, "grad_norm": 0.22676023840904236, "learning_rate": 8.95509565695662e-05, "loss": 0.3575, "step": 22260 }, { "epoch": 1.7883557518570568, "grad_norm": 0.25784775614738464, "learning_rate": 8.947659818774318e-05, "loss": 0.3378, "step": 22270 }, { "epoch": 1.7891588034531218, "grad_norm": 0.24826961755752563, "learning_rate": 8.940224957595803e-05, "loss": 0.3442, "step": 22280 }, { "epoch": 1.789961855049187, "grad_norm": 0.24150045216083527, "learning_rate": 8.932791078100397e-05, "loss": 0.3613, "step": 22290 }, { "epoch": 1.790764906645252, "grad_norm": 0.2954409718513489, "learning_rate": 8.925358184966793e-05, "loss": 0.3643, "step": 22300 }, { "epoch": 1.791567958241317, "grad_norm": 0.22633011639118195, "learning_rate": 8.917926282873075e-05, "loss": 0.3577, "step": 22310 }, { "epoch": 1.7923710098373822, "grad_norm": 0.22908411920070648, "learning_rate": 8.910495376496683e-05, "loss": 0.3633, "step": 22320 }, { "epoch": 1.7931740614334473, "grad_norm": 0.28764116764068604, "learning_rate": 8.903065470514447e-05, "loss": 0.3659, "step": 22330 }, { "epoch": 1.7939771130295121, "grad_norm": 0.2382822334766388, "learning_rate": 8.895636569602567e-05, "loss": 0.3726, "step": 22340 }, { "epoch": 1.7947801646255772, "grad_norm": 0.2438185065984726, "learning_rate": 8.888208678436601e-05, "loss": 0.3744, "step": 22350 }, { "epoch": 1.795583216221642, "grad_norm": 0.20823155343532562, "learning_rate": 8.880781801691475e-05, "loss": 0.3279, "step": 22360 }, { "epoch": 1.7963862678177072, "grad_norm": 0.20972733199596405, "learning_rate": 8.873355944041482e-05, "loss": 0.3617, "step": 22370 }, { "epoch": 1.7971893194137722, "grad_norm": 0.16829316318035126, "learning_rate": 8.86593111016027e-05, "loss": 0.3306, "step": 22380 }, { "epoch": 1.7979923710098373, "grad_norm": 0.22438661754131317, "learning_rate": 8.85850730472084e-05, "loss": 0.3669, "step": 22390 }, { "epoch": 1.7987954226059024, "grad_norm": 0.2003881335258484, "learning_rate": 8.851084532395546e-05, "loss": 0.3248, "step": 22400 }, { "epoch": 1.7995984742019675, "grad_norm": 0.24102091789245605, "learning_rate": 8.843662797856099e-05, "loss": 0.3445, "step": 22410 }, { "epoch": 1.8004015257980326, "grad_norm": 0.19846133887767792, "learning_rate": 8.836242105773552e-05, "loss": 0.3361, "step": 22420 }, { "epoch": 1.8012045773940977, "grad_norm": 0.22576576471328735, "learning_rate": 8.828822460818296e-05, "loss": 0.3542, "step": 22430 }, { "epoch": 1.8020076289901628, "grad_norm": 0.21784177422523499, "learning_rate": 8.821403867660073e-05, "loss": 0.3527, "step": 22440 }, { "epoch": 1.8028106805862276, "grad_norm": 0.2283320128917694, "learning_rate": 8.813986330967955e-05, "loss": 0.3636, "step": 22450 }, { "epoch": 1.8036137321822927, "grad_norm": 0.21136026084423065, "learning_rate": 8.806569855410358e-05, "loss": 0.3659, "step": 22460 }, { "epoch": 1.8044167837783578, "grad_norm": 0.17944742739200592, "learning_rate": 8.799154445655019e-05, "loss": 0.354, "step": 22470 }, { "epoch": 1.8052198353744227, "grad_norm": 0.24452611804008484, "learning_rate": 8.791740106369013e-05, "loss": 0.3484, "step": 22480 }, { "epoch": 1.8060228869704877, "grad_norm": 0.24651969969272614, "learning_rate": 8.784326842218737e-05, "loss": 0.3615, "step": 22490 }, { "epoch": 1.8068259385665528, "grad_norm": 0.22076578438282013, "learning_rate": 8.776914657869919e-05, "loss": 0.3631, "step": 22500 }, { "epoch": 1.807628990162618, "grad_norm": 0.22758103907108307, "learning_rate": 8.769503557987591e-05, "loss": 0.3536, "step": 22510 }, { "epoch": 1.808432041758683, "grad_norm": 0.2516574561595917, "learning_rate": 8.762093547236119e-05, "loss": 0.3707, "step": 22520 }, { "epoch": 1.809235093354748, "grad_norm": 0.2854478359222412, "learning_rate": 8.754684630279178e-05, "loss": 0.3428, "step": 22530 }, { "epoch": 1.8100381449508132, "grad_norm": 0.20754745602607727, "learning_rate": 8.747276811779745e-05, "loss": 0.3478, "step": 22540 }, { "epoch": 1.8108411965468783, "grad_norm": 0.3421103060245514, "learning_rate": 8.739870096400122e-05, "loss": 0.3483, "step": 22550 }, { "epoch": 1.8116442481429433, "grad_norm": 0.22516076266765594, "learning_rate": 8.732464488801909e-05, "loss": 0.3602, "step": 22560 }, { "epoch": 1.8124472997390082, "grad_norm": 0.20061512291431427, "learning_rate": 8.725059993646008e-05, "loss": 0.3527, "step": 22570 }, { "epoch": 1.8132503513350733, "grad_norm": 0.24947701394557953, "learning_rate": 8.717656615592618e-05, "loss": 0.3662, "step": 22580 }, { "epoch": 1.8140534029311384, "grad_norm": 0.2566344738006592, "learning_rate": 8.710254359301242e-05, "loss": 0.3493, "step": 22590 }, { "epoch": 1.8148564545272032, "grad_norm": 0.23884867131710052, "learning_rate": 8.702853229430672e-05, "loss": 0.3531, "step": 22600 }, { "epoch": 1.8156595061232683, "grad_norm": 0.2597128748893738, "learning_rate": 8.695453230638997e-05, "loss": 0.3355, "step": 22610 }, { "epoch": 1.8164625577193334, "grad_norm": 0.23023967444896698, "learning_rate": 8.688054367583583e-05, "loss": 0.3593, "step": 22620 }, { "epoch": 1.8172656093153985, "grad_norm": 0.193950816988945, "learning_rate": 8.680656644921094e-05, "loss": 0.3667, "step": 22630 }, { "epoch": 1.8180686609114636, "grad_norm": 0.19820623099803925, "learning_rate": 8.673260067307464e-05, "loss": 0.3554, "step": 22640 }, { "epoch": 1.8188717125075287, "grad_norm": 0.23550105094909668, "learning_rate": 8.665864639397922e-05, "loss": 0.3611, "step": 22650 }, { "epoch": 1.8196747641035937, "grad_norm": 0.21555687487125397, "learning_rate": 8.658470365846953e-05, "loss": 0.347, "step": 22660 }, { "epoch": 1.8204778156996588, "grad_norm": 0.20896002650260925, "learning_rate": 8.65107725130833e-05, "loss": 0.352, "step": 22670 }, { "epoch": 1.821280867295724, "grad_norm": 0.24384430050849915, "learning_rate": 8.6436853004351e-05, "loss": 0.3521, "step": 22680 }, { "epoch": 1.8220839188917888, "grad_norm": 0.24289828538894653, "learning_rate": 8.636294517879562e-05, "loss": 0.3272, "step": 22690 }, { "epoch": 1.8228869704878539, "grad_norm": 0.2224525362253189, "learning_rate": 8.628904908293292e-05, "loss": 0.3449, "step": 22700 }, { "epoch": 1.8236900220839187, "grad_norm": 0.23473824560642242, "learning_rate": 8.621516476327121e-05, "loss": 0.3723, "step": 22710 }, { "epoch": 1.8244930736799838, "grad_norm": 0.24102257192134857, "learning_rate": 8.614129226631151e-05, "loss": 0.3712, "step": 22720 }, { "epoch": 1.825296125276049, "grad_norm": 0.2074304223060608, "learning_rate": 8.606743163854719e-05, "loss": 0.3539, "step": 22730 }, { "epoch": 1.826099176872114, "grad_norm": 0.22692358493804932, "learning_rate": 8.599358292646432e-05, "loss": 0.3642, "step": 22740 }, { "epoch": 1.826902228468179, "grad_norm": 0.21791404485702515, "learning_rate": 8.591974617654143e-05, "loss": 0.3476, "step": 22750 }, { "epoch": 1.8277052800642442, "grad_norm": 0.23091265559196472, "learning_rate": 8.58459214352495e-05, "loss": 0.3517, "step": 22760 }, { "epoch": 1.8285083316603092, "grad_norm": 0.24024757742881775, "learning_rate": 8.577210874905196e-05, "loss": 0.3597, "step": 22770 }, { "epoch": 1.8293113832563743, "grad_norm": 0.25507667660713196, "learning_rate": 8.569830816440464e-05, "loss": 0.3332, "step": 22780 }, { "epoch": 1.8301144348524394, "grad_norm": 0.2168031930923462, "learning_rate": 8.562451972775577e-05, "loss": 0.354, "step": 22790 }, { "epoch": 1.8309174864485043, "grad_norm": 0.20592649281024933, "learning_rate": 8.555074348554599e-05, "loss": 0.3602, "step": 22800 }, { "epoch": 1.8317205380445694, "grad_norm": 0.2282685488462448, "learning_rate": 8.547697948420814e-05, "loss": 0.3552, "step": 22810 }, { "epoch": 1.8325235896406344, "grad_norm": 0.21493947505950928, "learning_rate": 8.540322777016745e-05, "loss": 0.343, "step": 22820 }, { "epoch": 1.8333266412366993, "grad_norm": 0.25337639451026917, "learning_rate": 8.532948838984137e-05, "loss": 0.3611, "step": 22830 }, { "epoch": 1.8341296928327644, "grad_norm": 0.22643820941448212, "learning_rate": 8.525576138963967e-05, "loss": 0.3572, "step": 22840 }, { "epoch": 1.8349327444288295, "grad_norm": 0.23571129143238068, "learning_rate": 8.518204681596418e-05, "loss": 0.3494, "step": 22850 }, { "epoch": 1.8357357960248946, "grad_norm": 0.24769364297389984, "learning_rate": 8.510834471520904e-05, "loss": 0.3693, "step": 22860 }, { "epoch": 1.8365388476209596, "grad_norm": 0.23470690846443176, "learning_rate": 8.503465513376053e-05, "loss": 0.3273, "step": 22870 }, { "epoch": 1.8373418992170247, "grad_norm": 0.25755101442337036, "learning_rate": 8.496097811799693e-05, "loss": 0.3872, "step": 22880 }, { "epoch": 1.8381449508130898, "grad_norm": 0.20236191153526306, "learning_rate": 8.488731371428877e-05, "loss": 0.3331, "step": 22890 }, { "epoch": 1.838948002409155, "grad_norm": 0.24138781428337097, "learning_rate": 8.481366196899854e-05, "loss": 0.3583, "step": 22900 }, { "epoch": 1.83975105400522, "grad_norm": 0.2207489162683487, "learning_rate": 8.474002292848082e-05, "loss": 0.3563, "step": 22910 }, { "epoch": 1.8405541056012849, "grad_norm": 0.24866652488708496, "learning_rate": 8.466639663908208e-05, "loss": 0.3588, "step": 22920 }, { "epoch": 1.84135715719735, "grad_norm": 0.2187635451555252, "learning_rate": 8.459278314714095e-05, "loss": 0.3245, "step": 22930 }, { "epoch": 1.842160208793415, "grad_norm": 0.2418372631072998, "learning_rate": 8.451918249898788e-05, "loss": 0.3669, "step": 22940 }, { "epoch": 1.8429632603894799, "grad_norm": 0.2322666496038437, "learning_rate": 8.444559474094525e-05, "loss": 0.3588, "step": 22950 }, { "epoch": 1.843766311985545, "grad_norm": 0.1734633892774582, "learning_rate": 8.437201991932735e-05, "loss": 0.3534, "step": 22960 }, { "epoch": 1.84456936358161, "grad_norm": 0.23501993715763092, "learning_rate": 8.429845808044029e-05, "loss": 0.318, "step": 22970 }, { "epoch": 1.8453724151776751, "grad_norm": 0.25197839736938477, "learning_rate": 8.422490927058206e-05, "loss": 0.3463, "step": 22980 }, { "epoch": 1.8461754667737402, "grad_norm": 0.20204994082450867, "learning_rate": 8.415137353604247e-05, "loss": 0.3523, "step": 22990 }, { "epoch": 1.8469785183698053, "grad_norm": 0.2414935976266861, "learning_rate": 8.407785092310299e-05, "loss": 0.3472, "step": 23000 }, { "epoch": 1.8477815699658704, "grad_norm": 0.21485155820846558, "learning_rate": 8.400434147803693e-05, "loss": 0.3437, "step": 23010 }, { "epoch": 1.8485846215619355, "grad_norm": 0.21599480509757996, "learning_rate": 8.393084524710929e-05, "loss": 0.337, "step": 23020 }, { "epoch": 1.8493876731580006, "grad_norm": 0.2764146327972412, "learning_rate": 8.385736227657673e-05, "loss": 0.3662, "step": 23030 }, { "epoch": 1.8501907247540654, "grad_norm": 0.211295023560524, "learning_rate": 8.378389261268756e-05, "loss": 0.3353, "step": 23040 }, { "epoch": 1.8509937763501305, "grad_norm": 0.23166891932487488, "learning_rate": 8.371043630168178e-05, "loss": 0.3615, "step": 23050 }, { "epoch": 1.8517968279461954, "grad_norm": 0.22762027382850647, "learning_rate": 8.363699338979094e-05, "loss": 0.3574, "step": 23060 }, { "epoch": 1.8525998795422605, "grad_norm": 0.19282010197639465, "learning_rate": 8.35635639232381e-05, "loss": 0.3563, "step": 23070 }, { "epoch": 1.8534029311383255, "grad_norm": 0.20518025755882263, "learning_rate": 8.3490147948238e-05, "loss": 0.3405, "step": 23080 }, { "epoch": 1.8542059827343906, "grad_norm": 0.18850253522396088, "learning_rate": 8.341674551099672e-05, "loss": 0.3339, "step": 23090 }, { "epoch": 1.8550090343304557, "grad_norm": 0.22141219675540924, "learning_rate": 8.3343356657712e-05, "loss": 0.3315, "step": 23100 }, { "epoch": 1.8558120859265208, "grad_norm": 0.202422097325325, "learning_rate": 8.326998143457282e-05, "loss": 0.3429, "step": 23110 }, { "epoch": 1.856615137522586, "grad_norm": 0.2283453494310379, "learning_rate": 8.319661988775977e-05, "loss": 0.3547, "step": 23120 }, { "epoch": 1.857418189118651, "grad_norm": 0.21976958215236664, "learning_rate": 8.312327206344475e-05, "loss": 0.352, "step": 23130 }, { "epoch": 1.858221240714716, "grad_norm": 0.22879275679588318, "learning_rate": 8.304993800779104e-05, "loss": 0.3413, "step": 23140 }, { "epoch": 1.859024292310781, "grad_norm": 0.31202003359794617, "learning_rate": 8.297661776695323e-05, "loss": 0.355, "step": 23150 }, { "epoch": 1.859827343906846, "grad_norm": 0.2406323105096817, "learning_rate": 8.29033113870772e-05, "loss": 0.3606, "step": 23160 }, { "epoch": 1.860630395502911, "grad_norm": 0.3028995394706726, "learning_rate": 8.283001891430021e-05, "loss": 0.3366, "step": 23170 }, { "epoch": 1.861433447098976, "grad_norm": 0.23760391771793365, "learning_rate": 8.275674039475068e-05, "loss": 0.3658, "step": 23180 }, { "epoch": 1.862236498695041, "grad_norm": 0.22613467276096344, "learning_rate": 8.26834758745482e-05, "loss": 0.3474, "step": 23190 }, { "epoch": 1.8630395502911061, "grad_norm": 0.2023455649614334, "learning_rate": 8.261022539980369e-05, "loss": 0.3449, "step": 23200 }, { "epoch": 1.8638426018871712, "grad_norm": 0.25055548548698425, "learning_rate": 8.253698901661909e-05, "loss": 0.3589, "step": 23210 }, { "epoch": 1.8646456534832363, "grad_norm": 0.2355741411447525, "learning_rate": 8.24637667710876e-05, "loss": 0.3222, "step": 23220 }, { "epoch": 1.8654487050793014, "grad_norm": 0.21974754333496094, "learning_rate": 8.239055870929338e-05, "loss": 0.3268, "step": 23230 }, { "epoch": 1.8662517566753665, "grad_norm": 0.24713043868541718, "learning_rate": 8.231736487731177e-05, "loss": 0.3421, "step": 23240 }, { "epoch": 1.8670548082714316, "grad_norm": 0.2046034336090088, "learning_rate": 8.224418532120919e-05, "loss": 0.3441, "step": 23250 }, { "epoch": 1.8678578598674966, "grad_norm": 0.2254888266324997, "learning_rate": 8.21710200870429e-05, "loss": 0.3659, "step": 23260 }, { "epoch": 1.8686609114635615, "grad_norm": 0.21269816160202026, "learning_rate": 8.209786922086133e-05, "loss": 0.3573, "step": 23270 }, { "epoch": 1.8694639630596266, "grad_norm": 0.2005157172679901, "learning_rate": 8.202473276870376e-05, "loss": 0.3449, "step": 23280 }, { "epoch": 1.8702670146556917, "grad_norm": 0.2899685204029083, "learning_rate": 8.195161077660045e-05, "loss": 0.3673, "step": 23290 }, { "epoch": 1.8710700662517565, "grad_norm": 0.23009485006332397, "learning_rate": 8.187850329057251e-05, "loss": 0.3503, "step": 23300 }, { "epoch": 1.8718731178478216, "grad_norm": 0.2566055655479431, "learning_rate": 8.180541035663197e-05, "loss": 0.4065, "step": 23310 }, { "epoch": 1.8726761694438867, "grad_norm": 0.2027304619550705, "learning_rate": 8.173233202078167e-05, "loss": 0.3587, "step": 23320 }, { "epoch": 1.8734792210399518, "grad_norm": 0.30631059408187866, "learning_rate": 8.165926832901531e-05, "loss": 0.3081, "step": 23330 }, { "epoch": 1.8742822726360169, "grad_norm": 0.2896544337272644, "learning_rate": 8.15862193273173e-05, "loss": 0.3595, "step": 23340 }, { "epoch": 1.875085324232082, "grad_norm": 0.2415016144514084, "learning_rate": 8.151318506166282e-05, "loss": 0.3589, "step": 23350 }, { "epoch": 1.875888375828147, "grad_norm": 0.23022019863128662, "learning_rate": 8.144016557801786e-05, "loss": 0.3542, "step": 23360 }, { "epoch": 1.8766914274242121, "grad_norm": 0.23831671476364136, "learning_rate": 8.136716092233895e-05, "loss": 0.3749, "step": 23370 }, { "epoch": 1.8774944790202772, "grad_norm": 0.25214648246765137, "learning_rate": 8.129417114057347e-05, "loss": 0.3497, "step": 23380 }, { "epoch": 1.878297530616342, "grad_norm": 0.2664155662059784, "learning_rate": 8.122119627865933e-05, "loss": 0.3462, "step": 23390 }, { "epoch": 1.8791005822124072, "grad_norm": 0.21797269582748413, "learning_rate": 8.114823638252506e-05, "loss": 0.3559, "step": 23400 }, { "epoch": 1.879903633808472, "grad_norm": 0.26186615228652954, "learning_rate": 8.107529149808976e-05, "loss": 0.334, "step": 23410 }, { "epoch": 1.8807066854045371, "grad_norm": 0.2288975864648819, "learning_rate": 8.100236167126311e-05, "loss": 0.3514, "step": 23420 }, { "epoch": 1.8815097370006022, "grad_norm": 0.21106231212615967, "learning_rate": 8.092944694794531e-05, "loss": 0.3478, "step": 23430 }, { "epoch": 1.8823127885966673, "grad_norm": 0.21461106836795807, "learning_rate": 8.08565473740271e-05, "loss": 0.3487, "step": 23440 }, { "epoch": 1.8831158401927324, "grad_norm": 0.25448545813560486, "learning_rate": 8.078366299538955e-05, "loss": 0.35, "step": 23450 }, { "epoch": 1.8839188917887975, "grad_norm": 0.2119607925415039, "learning_rate": 8.07107938579043e-05, "loss": 0.3372, "step": 23460 }, { "epoch": 1.8847219433848625, "grad_norm": 0.20407943427562714, "learning_rate": 8.063794000743332e-05, "loss": 0.3155, "step": 23470 }, { "epoch": 1.8855249949809276, "grad_norm": 0.22257941961288452, "learning_rate": 8.056510148982904e-05, "loss": 0.3394, "step": 23480 }, { "epoch": 1.8863280465769927, "grad_norm": 0.2115587741136551, "learning_rate": 8.049227835093413e-05, "loss": 0.3701, "step": 23490 }, { "epoch": 1.8871310981730576, "grad_norm": 0.2510697841644287, "learning_rate": 8.041947063658164e-05, "loss": 0.3622, "step": 23500 }, { "epoch": 1.8879341497691227, "grad_norm": 0.22234708070755005, "learning_rate": 8.034667839259498e-05, "loss": 0.3622, "step": 23510 }, { "epoch": 1.8887372013651877, "grad_norm": 0.20119698345661163, "learning_rate": 8.027390166478768e-05, "loss": 0.36, "step": 23520 }, { "epoch": 1.8895402529612526, "grad_norm": 0.21293535828590393, "learning_rate": 8.020114049896362e-05, "loss": 0.3497, "step": 23530 }, { "epoch": 1.8903433045573177, "grad_norm": 0.20179183781147003, "learning_rate": 8.012839494091682e-05, "loss": 0.3491, "step": 23540 }, { "epoch": 1.8911463561533828, "grad_norm": 0.22621209919452667, "learning_rate": 8.005566503643154e-05, "loss": 0.3404, "step": 23550 }, { "epoch": 1.8919494077494479, "grad_norm": 0.28432193398475647, "learning_rate": 7.998295083128207e-05, "loss": 0.3382, "step": 23560 }, { "epoch": 1.892752459345513, "grad_norm": 0.1970234513282776, "learning_rate": 7.991025237123297e-05, "loss": 0.3659, "step": 23570 }, { "epoch": 1.893555510941578, "grad_norm": 0.27643126249313354, "learning_rate": 7.98375697020388e-05, "loss": 0.3674, "step": 23580 }, { "epoch": 1.8943585625376431, "grad_norm": 0.26336586475372314, "learning_rate": 7.976490286944421e-05, "loss": 0.3179, "step": 23590 }, { "epoch": 1.8951616141337082, "grad_norm": 0.22118203341960907, "learning_rate": 7.969225191918386e-05, "loss": 0.3504, "step": 23600 }, { "epoch": 1.8959646657297733, "grad_norm": 0.23425814509391785, "learning_rate": 7.96196168969824e-05, "loss": 0.3715, "step": 23610 }, { "epoch": 1.8967677173258382, "grad_norm": 0.18626368045806885, "learning_rate": 7.954699784855452e-05, "loss": 0.3604, "step": 23620 }, { "epoch": 1.8975707689219032, "grad_norm": 0.22138318419456482, "learning_rate": 7.947439481960484e-05, "loss": 0.3571, "step": 23630 }, { "epoch": 1.8983738205179683, "grad_norm": 0.24255208671092987, "learning_rate": 7.940180785582781e-05, "loss": 0.3489, "step": 23640 }, { "epoch": 1.8991768721140332, "grad_norm": 0.23412851989269257, "learning_rate": 7.932923700290791e-05, "loss": 0.3525, "step": 23650 }, { "epoch": 1.8999799237100983, "grad_norm": 0.25527167320251465, "learning_rate": 7.925668230651932e-05, "loss": 0.3448, "step": 23660 }, { "epoch": 1.9007829753061634, "grad_norm": 0.2420099526643753, "learning_rate": 7.918414381232626e-05, "loss": 0.3334, "step": 23670 }, { "epoch": 1.9015860269022284, "grad_norm": 0.28295740485191345, "learning_rate": 7.91116215659825e-05, "loss": 0.3524, "step": 23680 }, { "epoch": 1.9023890784982935, "grad_norm": 0.3027486503124237, "learning_rate": 7.903911561313178e-05, "loss": 0.3824, "step": 23690 }, { "epoch": 1.9031921300943586, "grad_norm": 0.377047598361969, "learning_rate": 7.89666259994075e-05, "loss": 0.3375, "step": 23700 }, { "epoch": 1.9039951816904237, "grad_norm": 0.23507441580295563, "learning_rate": 7.889415277043285e-05, "loss": 0.3578, "step": 23710 }, { "epoch": 1.9047982332864888, "grad_norm": 0.2256840020418167, "learning_rate": 7.88216959718206e-05, "loss": 0.3528, "step": 23720 }, { "epoch": 1.9056012848825539, "grad_norm": 0.2503352463245392, "learning_rate": 7.874925564917322e-05, "loss": 0.3539, "step": 23730 }, { "epoch": 1.9064043364786187, "grad_norm": 0.23551350831985474, "learning_rate": 7.867683184808289e-05, "loss": 0.3575, "step": 23740 }, { "epoch": 1.9072073880746838, "grad_norm": 0.22339186072349548, "learning_rate": 7.860442461413123e-05, "loss": 0.3521, "step": 23750 }, { "epoch": 1.9080104396707487, "grad_norm": 0.28662601113319397, "learning_rate": 7.85320339928896e-05, "loss": 0.3647, "step": 23760 }, { "epoch": 1.9088134912668138, "grad_norm": 0.2142658829689026, "learning_rate": 7.845966002991884e-05, "loss": 0.343, "step": 23770 }, { "epoch": 1.9096165428628789, "grad_norm": 0.21114008128643036, "learning_rate": 7.838730277076925e-05, "loss": 0.3425, "step": 23780 }, { "epoch": 1.910419594458944, "grad_norm": 0.20285294950008392, "learning_rate": 7.831496226098072e-05, "loss": 0.3621, "step": 23790 }, { "epoch": 1.911222646055009, "grad_norm": 0.23611877858638763, "learning_rate": 7.824263854608247e-05, "loss": 0.3718, "step": 23800 }, { "epoch": 1.912025697651074, "grad_norm": 0.2206626832485199, "learning_rate": 7.817033167159327e-05, "loss": 0.3508, "step": 23810 }, { "epoch": 1.9128287492471392, "grad_norm": 0.23805946111679077, "learning_rate": 7.809804168302129e-05, "loss": 0.3431, "step": 23820 }, { "epoch": 1.9136318008432043, "grad_norm": 0.18654972314834595, "learning_rate": 7.802576862586395e-05, "loss": 0.3401, "step": 23830 }, { "epoch": 1.9144348524392694, "grad_norm": 0.27555862069129944, "learning_rate": 7.795351254560812e-05, "loss": 0.3688, "step": 23840 }, { "epoch": 1.9152379040353342, "grad_norm": 0.23131133615970612, "learning_rate": 7.788127348772996e-05, "loss": 0.3711, "step": 23850 }, { "epoch": 1.9160409556313993, "grad_norm": 0.20734204351902008, "learning_rate": 7.780905149769493e-05, "loss": 0.3379, "step": 23860 }, { "epoch": 1.9168440072274644, "grad_norm": 0.2341688871383667, "learning_rate": 7.773684662095765e-05, "loss": 0.3391, "step": 23870 }, { "epoch": 1.9176470588235293, "grad_norm": 0.27117741107940674, "learning_rate": 7.766465890296213e-05, "loss": 0.3272, "step": 23880 }, { "epoch": 1.9184501104195943, "grad_norm": 0.21970979869365692, "learning_rate": 7.759248838914153e-05, "loss": 0.3537, "step": 23890 }, { "epoch": 1.9192531620156594, "grad_norm": 0.18365620076656342, "learning_rate": 7.752033512491805e-05, "loss": 0.3249, "step": 23900 }, { "epoch": 1.9200562136117245, "grad_norm": 0.21193769574165344, "learning_rate": 7.744819915570322e-05, "loss": 0.3572, "step": 23910 }, { "epoch": 1.9208592652077896, "grad_norm": 0.23587313294410706, "learning_rate": 7.737608052689755e-05, "loss": 0.3808, "step": 23920 }, { "epoch": 1.9216623168038547, "grad_norm": 0.19992391765117645, "learning_rate": 7.730397928389074e-05, "loss": 0.3617, "step": 23930 }, { "epoch": 1.9224653683999198, "grad_norm": 0.23664049804210663, "learning_rate": 7.723189547206143e-05, "loss": 0.3429, "step": 23940 }, { "epoch": 1.9232684199959849, "grad_norm": 0.24715556204319, "learning_rate": 7.71598291367774e-05, "loss": 0.3423, "step": 23950 }, { "epoch": 1.92407147159205, "grad_norm": 0.26234063506126404, "learning_rate": 7.708778032339537e-05, "loss": 0.3376, "step": 23960 }, { "epoch": 1.9248745231881148, "grad_norm": 0.22616265714168549, "learning_rate": 7.701574907726106e-05, "loss": 0.353, "step": 23970 }, { "epoch": 1.92567757478418, "grad_norm": 0.23215310275554657, "learning_rate": 7.694373544370912e-05, "loss": 0.351, "step": 23980 }, { "epoch": 1.926480626380245, "grad_norm": 0.22120191156864166, "learning_rate": 7.687173946806309e-05, "loss": 0.3463, "step": 23990 }, { "epoch": 1.9272836779763098, "grad_norm": 0.22960764169692993, "learning_rate": 7.679976119563542e-05, "loss": 0.3818, "step": 24000 }, { "epoch": 1.928086729572375, "grad_norm": 0.22896112501621246, "learning_rate": 7.672780067172751e-05, "loss": 0.3606, "step": 24010 }, { "epoch": 1.92888978116844, "grad_norm": 0.21595245599746704, "learning_rate": 7.665585794162938e-05, "loss": 0.3485, "step": 24020 }, { "epoch": 1.929692832764505, "grad_norm": 0.26333653926849365, "learning_rate": 7.658393305062005e-05, "loss": 0.367, "step": 24030 }, { "epoch": 1.9304958843605702, "grad_norm": 0.22725892066955566, "learning_rate": 7.651202604396718e-05, "loss": 0.3529, "step": 24040 }, { "epoch": 1.9312989359566353, "grad_norm": 0.21821071207523346, "learning_rate": 7.644013696692731e-05, "loss": 0.3479, "step": 24050 }, { "epoch": 1.9321019875527004, "grad_norm": 0.26249468326568604, "learning_rate": 7.636826586474551e-05, "loss": 0.3497, "step": 24060 }, { "epoch": 1.9329050391487654, "grad_norm": 0.26029643416404724, "learning_rate": 7.62964127826557e-05, "loss": 0.3325, "step": 24070 }, { "epoch": 1.9337080907448305, "grad_norm": 0.2725798785686493, "learning_rate": 7.622457776588042e-05, "loss": 0.3574, "step": 24080 }, { "epoch": 1.9345111423408954, "grad_norm": 0.22192320227622986, "learning_rate": 7.615276085963075e-05, "loss": 0.3728, "step": 24090 }, { "epoch": 1.9353141939369605, "grad_norm": 0.23183055222034454, "learning_rate": 7.608096210910653e-05, "loss": 0.3531, "step": 24100 }, { "epoch": 1.9361172455330253, "grad_norm": 0.2100285440683365, "learning_rate": 7.600918155949599e-05, "loss": 0.3434, "step": 24110 }, { "epoch": 1.9369202971290904, "grad_norm": 0.22728872299194336, "learning_rate": 7.593741925597609e-05, "loss": 0.3453, "step": 24120 }, { "epoch": 1.9377233487251555, "grad_norm": 0.2323300987482071, "learning_rate": 7.586567524371211e-05, "loss": 0.3724, "step": 24130 }, { "epoch": 1.9385264003212206, "grad_norm": 0.2266131490468979, "learning_rate": 7.5793949567858e-05, "loss": 0.3517, "step": 24140 }, { "epoch": 1.9393294519172857, "grad_norm": 0.20897157490253448, "learning_rate": 7.57222422735561e-05, "loss": 0.3479, "step": 24150 }, { "epoch": 1.9401325035133508, "grad_norm": 0.20940454304218292, "learning_rate": 7.565055340593715e-05, "loss": 0.3488, "step": 24160 }, { "epoch": 1.9409355551094158, "grad_norm": 0.2055739462375641, "learning_rate": 7.55788830101203e-05, "loss": 0.366, "step": 24170 }, { "epoch": 1.941738606705481, "grad_norm": 0.2276800572872162, "learning_rate": 7.550723113121312e-05, "loss": 0.3479, "step": 24180 }, { "epoch": 1.942541658301546, "grad_norm": 0.2524734139442444, "learning_rate": 7.543559781431145e-05, "loss": 0.3573, "step": 24190 }, { "epoch": 1.9433447098976109, "grad_norm": 0.2086971402168274, "learning_rate": 7.536398310449959e-05, "loss": 0.3501, "step": 24200 }, { "epoch": 1.944147761493676, "grad_norm": 0.23695190250873566, "learning_rate": 7.529238704684994e-05, "loss": 0.3544, "step": 24210 }, { "epoch": 1.944950813089741, "grad_norm": 0.234921395778656, "learning_rate": 7.52208096864233e-05, "loss": 0.3785, "step": 24220 }, { "epoch": 1.945753864685806, "grad_norm": 0.22120268642902374, "learning_rate": 7.514925106826863e-05, "loss": 0.3732, "step": 24230 }, { "epoch": 1.946556916281871, "grad_norm": 0.22058194875717163, "learning_rate": 7.507771123742316e-05, "loss": 0.3504, "step": 24240 }, { "epoch": 1.947359967877936, "grad_norm": 0.26833346486091614, "learning_rate": 7.50061902389122e-05, "loss": 0.3488, "step": 24250 }, { "epoch": 1.9481630194740012, "grad_norm": 0.23756884038448334, "learning_rate": 7.493468811774932e-05, "loss": 0.3549, "step": 24260 }, { "epoch": 1.9489660710700663, "grad_norm": 0.2664330005645752, "learning_rate": 7.486320491893615e-05, "loss": 0.3697, "step": 24270 }, { "epoch": 1.9497691226661313, "grad_norm": 0.2532287836074829, "learning_rate": 7.479174068746236e-05, "loss": 0.3263, "step": 24280 }, { "epoch": 1.9505721742621964, "grad_norm": 0.226562038064003, "learning_rate": 7.472029546830578e-05, "loss": 0.3653, "step": 24290 }, { "epoch": 1.9513752258582615, "grad_norm": 0.2808647155761719, "learning_rate": 7.464886930643221e-05, "loss": 0.3477, "step": 24300 }, { "epoch": 1.9521782774543266, "grad_norm": 0.2928006052970886, "learning_rate": 7.457746224679552e-05, "loss": 0.3547, "step": 24310 }, { "epoch": 1.9529813290503915, "grad_norm": 0.2546825408935547, "learning_rate": 7.450607433433747e-05, "loss": 0.3676, "step": 24320 }, { "epoch": 1.9537843806464565, "grad_norm": 0.2583765685558319, "learning_rate": 7.44347056139878e-05, "loss": 0.3686, "step": 24330 }, { "epoch": 1.9545874322425216, "grad_norm": 0.2100476175546646, "learning_rate": 7.436335613066423e-05, "loss": 0.3636, "step": 24340 }, { "epoch": 1.9553904838385865, "grad_norm": 0.26751193404197693, "learning_rate": 7.42920259292723e-05, "loss": 0.3751, "step": 24350 }, { "epoch": 1.9561935354346516, "grad_norm": 0.24072231352329254, "learning_rate": 7.422071505470547e-05, "loss": 0.3651, "step": 24360 }, { "epoch": 1.9569965870307167, "grad_norm": 0.2254866510629654, "learning_rate": 7.414942355184495e-05, "loss": 0.3678, "step": 24370 }, { "epoch": 1.9577996386267817, "grad_norm": 0.2630707025527954, "learning_rate": 7.407815146555987e-05, "loss": 0.3589, "step": 24380 }, { "epoch": 1.9586026902228468, "grad_norm": 0.22625724971294403, "learning_rate": 7.400689884070709e-05, "loss": 0.3633, "step": 24390 }, { "epoch": 1.959405741818912, "grad_norm": 0.22401447594165802, "learning_rate": 7.393566572213115e-05, "loss": 0.3384, "step": 24400 }, { "epoch": 1.960208793414977, "grad_norm": 0.24438561499118805, "learning_rate": 7.386445215466445e-05, "loss": 0.3413, "step": 24410 }, { "epoch": 1.961011845011042, "grad_norm": 0.23210887610912323, "learning_rate": 7.3793258183127e-05, "loss": 0.3573, "step": 24420 }, { "epoch": 1.9618148966071072, "grad_norm": 0.2611130475997925, "learning_rate": 7.372208385232645e-05, "loss": 0.3606, "step": 24430 }, { "epoch": 1.962617948203172, "grad_norm": 0.25835442543029785, "learning_rate": 7.365092920705816e-05, "loss": 0.3445, "step": 24440 }, { "epoch": 1.9634209997992371, "grad_norm": 0.24118094146251678, "learning_rate": 7.357979429210505e-05, "loss": 0.3634, "step": 24450 }, { "epoch": 1.964224051395302, "grad_norm": 0.2709980309009552, "learning_rate": 7.350867915223772e-05, "loss": 0.3415, "step": 24460 }, { "epoch": 1.965027102991367, "grad_norm": 0.23120085895061493, "learning_rate": 7.343758383221415e-05, "loss": 0.3531, "step": 24470 }, { "epoch": 1.9658301545874322, "grad_norm": 0.25775647163391113, "learning_rate": 7.336650837677999e-05, "loss": 0.3313, "step": 24480 }, { "epoch": 1.9666332061834972, "grad_norm": 0.22655822336673737, "learning_rate": 7.329545283066833e-05, "loss": 0.3453, "step": 24490 }, { "epoch": 1.9674362577795623, "grad_norm": 0.240370512008667, "learning_rate": 7.322441723859978e-05, "loss": 0.3289, "step": 24500 }, { "epoch": 1.9682393093756274, "grad_norm": 0.22774915397167206, "learning_rate": 7.315340164528227e-05, "loss": 0.3857, "step": 24510 }, { "epoch": 1.9690423609716925, "grad_norm": 0.21011494100093842, "learning_rate": 7.308240609541128e-05, "loss": 0.3414, "step": 24520 }, { "epoch": 1.9698454125677576, "grad_norm": 0.22871756553649902, "learning_rate": 7.301143063366962e-05, "loss": 0.334, "step": 24530 }, { "epoch": 1.9706484641638227, "grad_norm": 0.2131802886724472, "learning_rate": 7.294047530472744e-05, "loss": 0.3271, "step": 24540 }, { "epoch": 1.9714515157598875, "grad_norm": 0.326091468334198, "learning_rate": 7.286954015324227e-05, "loss": 0.3305, "step": 24550 }, { "epoch": 1.9722545673559526, "grad_norm": 0.30469343066215515, "learning_rate": 7.279862522385884e-05, "loss": 0.3708, "step": 24560 }, { "epoch": 1.9730576189520177, "grad_norm": 0.2285691648721695, "learning_rate": 7.272773056120929e-05, "loss": 0.3176, "step": 24570 }, { "epoch": 1.9738606705480826, "grad_norm": 0.22770445048809052, "learning_rate": 7.26568562099129e-05, "loss": 0.3661, "step": 24580 }, { "epoch": 1.9746637221441476, "grad_norm": 0.203849196434021, "learning_rate": 7.258600221457618e-05, "loss": 0.3815, "step": 24590 }, { "epoch": 1.9754667737402127, "grad_norm": 0.22375687956809998, "learning_rate": 7.251516861979289e-05, "loss": 0.3551, "step": 24600 }, { "epoch": 1.9762698253362778, "grad_norm": 0.2558288276195526, "learning_rate": 7.24443554701439e-05, "loss": 0.3337, "step": 24610 }, { "epoch": 1.977072876932343, "grad_norm": 0.22158297896385193, "learning_rate": 7.237356281019723e-05, "loss": 0.326, "step": 24620 }, { "epoch": 1.977875928528408, "grad_norm": 0.20659767091274261, "learning_rate": 7.230279068450794e-05, "loss": 0.3365, "step": 24630 }, { "epoch": 1.978678980124473, "grad_norm": 0.22562508285045624, "learning_rate": 7.223203913761829e-05, "loss": 0.3333, "step": 24640 }, { "epoch": 1.9794820317205382, "grad_norm": 0.1802888810634613, "learning_rate": 7.21613082140575e-05, "loss": 0.3549, "step": 24650 }, { "epoch": 1.9802850833166032, "grad_norm": 0.2514024078845978, "learning_rate": 7.209059795834181e-05, "loss": 0.3429, "step": 24660 }, { "epoch": 1.981088134912668, "grad_norm": 0.22406888008117676, "learning_rate": 7.20199084149745e-05, "loss": 0.3601, "step": 24670 }, { "epoch": 1.9818911865087332, "grad_norm": 0.23294053971767426, "learning_rate": 7.194923962844576e-05, "loss": 0.374, "step": 24680 }, { "epoch": 1.9826942381047983, "grad_norm": 0.21522653102874756, "learning_rate": 7.187859164323281e-05, "loss": 0.3953, "step": 24690 }, { "epoch": 1.9834972897008631, "grad_norm": 0.22164511680603027, "learning_rate": 7.180796450379961e-05, "loss": 0.3334, "step": 24700 }, { "epoch": 1.9843003412969282, "grad_norm": 0.21359401941299438, "learning_rate": 7.173735825459718e-05, "loss": 0.3545, "step": 24710 }, { "epoch": 1.9851033928929933, "grad_norm": 0.2605522572994232, "learning_rate": 7.166677294006327e-05, "loss": 0.3648, "step": 24720 }, { "epoch": 1.9859064444890584, "grad_norm": 0.2681507468223572, "learning_rate": 7.159620860462253e-05, "loss": 0.3644, "step": 24730 }, { "epoch": 1.9867094960851235, "grad_norm": 0.21421729028224945, "learning_rate": 7.152566529268637e-05, "loss": 0.3758, "step": 24740 }, { "epoch": 1.9875125476811886, "grad_norm": 0.24224638938903809, "learning_rate": 7.145514304865293e-05, "loss": 0.3485, "step": 24750 }, { "epoch": 1.9883155992772537, "grad_norm": 0.25120916962623596, "learning_rate": 7.138464191690722e-05, "loss": 0.3582, "step": 24760 }, { "epoch": 1.9891186508733187, "grad_norm": 0.2042090892791748, "learning_rate": 7.131416194182078e-05, "loss": 0.3361, "step": 24770 }, { "epoch": 1.9899217024693838, "grad_norm": 0.2559480369091034, "learning_rate": 7.124370316775198e-05, "loss": 0.3353, "step": 24780 }, { "epoch": 1.9907247540654487, "grad_norm": 0.24505077302455902, "learning_rate": 7.11732656390458e-05, "loss": 0.3599, "step": 24790 }, { "epoch": 1.9915278056615138, "grad_norm": 0.2639990448951721, "learning_rate": 7.110284940003384e-05, "loss": 0.3719, "step": 24800 }, { "epoch": 1.9923308572575786, "grad_norm": 0.23970429599285126, "learning_rate": 7.103245449503428e-05, "loss": 0.3723, "step": 24810 }, { "epoch": 1.9931339088536437, "grad_norm": 0.25519800186157227, "learning_rate": 7.096208096835193e-05, "loss": 0.3245, "step": 24820 }, { "epoch": 1.9939369604497088, "grad_norm": 0.24239763617515564, "learning_rate": 7.089172886427809e-05, "loss": 0.3836, "step": 24830 }, { "epoch": 1.994740012045774, "grad_norm": 0.19882798194885254, "learning_rate": 7.082139822709067e-05, "loss": 0.3485, "step": 24840 }, { "epoch": 1.995543063641839, "grad_norm": 0.23105105757713318, "learning_rate": 7.075108910105391e-05, "loss": 0.3311, "step": 24850 }, { "epoch": 1.996346115237904, "grad_norm": 0.22424709796905518, "learning_rate": 7.068080153041865e-05, "loss": 0.3494, "step": 24860 }, { "epoch": 1.9971491668339691, "grad_norm": 0.2285398542881012, "learning_rate": 7.061053555942208e-05, "loss": 0.3571, "step": 24870 }, { "epoch": 1.9979522184300342, "grad_norm": 0.2184617519378662, "learning_rate": 7.054029123228792e-05, "loss": 0.3433, "step": 24880 }, { "epoch": 1.9987552700260993, "grad_norm": 0.2646709084510803, "learning_rate": 7.047006859322605e-05, "loss": 0.3412, "step": 24890 }, { "epoch": 1.9995583216221642, "grad_norm": 0.26311400532722473, "learning_rate": 7.039986768643292e-05, "loss": 0.3336, "step": 24900 }, { "epoch": 2.000321220638426, "grad_norm": 0.24872811138629913, "learning_rate": 7.032968855609121e-05, "loss": 0.3679, "step": 24910 }, { "epoch": 2.001124272234491, "grad_norm": 0.22173278033733368, "learning_rate": 7.025953124636983e-05, "loss": 0.3183, "step": 24920 }, { "epoch": 2.001927323830556, "grad_norm": 0.24861803650856018, "learning_rate": 7.018939580142411e-05, "loss": 0.328, "step": 24930 }, { "epoch": 2.002730375426621, "grad_norm": 0.2292541265487671, "learning_rate": 7.011928226539548e-05, "loss": 0.3295, "step": 24940 }, { "epoch": 2.003533427022686, "grad_norm": 0.24193409085273743, "learning_rate": 7.004919068241165e-05, "loss": 0.3049, "step": 24950 }, { "epoch": 2.004336478618751, "grad_norm": 0.23466987907886505, "learning_rate": 6.997912109658647e-05, "loss": 0.3229, "step": 24960 }, { "epoch": 2.0051395302148163, "grad_norm": 0.24660472571849823, "learning_rate": 6.990907355201997e-05, "loss": 0.332, "step": 24970 }, { "epoch": 2.0059425818108814, "grad_norm": 0.24764154851436615, "learning_rate": 6.983904809279836e-05, "loss": 0.2957, "step": 24980 }, { "epoch": 2.0067456334069464, "grad_norm": 0.23329558968544006, "learning_rate": 6.976904476299388e-05, "loss": 0.3032, "step": 24990 }, { "epoch": 2.0075486850030115, "grad_norm": 0.23115824162960052, "learning_rate": 6.969906360666481e-05, "loss": 0.3485, "step": 25000 }, { "epoch": 2.0083517365990766, "grad_norm": 0.2133435755968094, "learning_rate": 6.962910466785558e-05, "loss": 0.3495, "step": 25010 }, { "epoch": 2.0091547881951417, "grad_norm": 0.22377750277519226, "learning_rate": 6.955916799059659e-05, "loss": 0.3165, "step": 25020 }, { "epoch": 2.009957839791207, "grad_norm": 0.9661308526992798, "learning_rate": 6.948925361890421e-05, "loss": 0.3394, "step": 25030 }, { "epoch": 2.0107608913872714, "grad_norm": 0.19444815814495087, "learning_rate": 6.941936159678072e-05, "loss": 0.3145, "step": 25040 }, { "epoch": 2.0115639429833365, "grad_norm": 0.26924946904182434, "learning_rate": 6.934949196821451e-05, "loss": 0.3225, "step": 25050 }, { "epoch": 2.0123669945794016, "grad_norm": 0.2343599945306778, "learning_rate": 6.927964477717967e-05, "loss": 0.3406, "step": 25060 }, { "epoch": 2.0131700461754667, "grad_norm": 0.224501833319664, "learning_rate": 6.920982006763635e-05, "loss": 0.32, "step": 25070 }, { "epoch": 2.0139730977715318, "grad_norm": 0.22549498081207275, "learning_rate": 6.914001788353037e-05, "loss": 0.319, "step": 25080 }, { "epoch": 2.014776149367597, "grad_norm": 0.23336263000965118, "learning_rate": 6.90702382687935e-05, "loss": 0.3568, "step": 25090 }, { "epoch": 2.015579200963662, "grad_norm": 0.24956114590168, "learning_rate": 6.900048126734333e-05, "loss": 0.3323, "step": 25100 }, { "epoch": 2.016382252559727, "grad_norm": 0.2419971525669098, "learning_rate": 6.893074692308305e-05, "loss": 0.327, "step": 25110 }, { "epoch": 2.017185304155792, "grad_norm": 0.24328289926052094, "learning_rate": 6.886103527990179e-05, "loss": 0.3293, "step": 25120 }, { "epoch": 2.017988355751857, "grad_norm": 0.2059687227010727, "learning_rate": 6.879134638167425e-05, "loss": 0.2984, "step": 25130 }, { "epoch": 2.0187914073479223, "grad_norm": 0.20006226003170013, "learning_rate": 6.872168027226092e-05, "loss": 0.3008, "step": 25140 }, { "epoch": 2.0195944589439874, "grad_norm": 0.29468128085136414, "learning_rate": 6.865203699550783e-05, "loss": 0.3565, "step": 25150 }, { "epoch": 2.020397510540052, "grad_norm": 0.27964136004447937, "learning_rate": 6.858241659524672e-05, "loss": 0.3233, "step": 25160 }, { "epoch": 2.021200562136117, "grad_norm": 0.24558515846729279, "learning_rate": 6.851281911529494e-05, "loss": 0.2939, "step": 25170 }, { "epoch": 2.022003613732182, "grad_norm": 0.28955134749412537, "learning_rate": 6.844324459945542e-05, "loss": 0.3313, "step": 25180 }, { "epoch": 2.0228066653282473, "grad_norm": 0.21956397593021393, "learning_rate": 6.837369309151652e-05, "loss": 0.3285, "step": 25190 }, { "epoch": 2.0236097169243124, "grad_norm": 0.23095791041851044, "learning_rate": 6.830416463525224e-05, "loss": 0.328, "step": 25200 }, { "epoch": 2.0244127685203774, "grad_norm": 0.21196947991847992, "learning_rate": 6.823465927442205e-05, "loss": 0.3097, "step": 25210 }, { "epoch": 2.0252158201164425, "grad_norm": 0.23069632053375244, "learning_rate": 6.816517705277092e-05, "loss": 0.3013, "step": 25220 }, { "epoch": 2.0260188717125076, "grad_norm": 0.24248024821281433, "learning_rate": 6.809571801402911e-05, "loss": 0.3086, "step": 25230 }, { "epoch": 2.0268219233085727, "grad_norm": 0.24380698800086975, "learning_rate": 6.802628220191248e-05, "loss": 0.3205, "step": 25240 }, { "epoch": 2.0276249749046378, "grad_norm": 0.22546611726284027, "learning_rate": 6.795686966012214e-05, "loss": 0.311, "step": 25250 }, { "epoch": 2.028428026500703, "grad_norm": 0.20484767854213715, "learning_rate": 6.788748043234456e-05, "loss": 0.3323, "step": 25260 }, { "epoch": 2.0292310780967675, "grad_norm": 0.25045841932296753, "learning_rate": 6.781811456225162e-05, "loss": 0.3267, "step": 25270 }, { "epoch": 2.0300341296928326, "grad_norm": 0.23530364036560059, "learning_rate": 6.774877209350044e-05, "loss": 0.354, "step": 25280 }, { "epoch": 2.0308371812888977, "grad_norm": 0.23538629710674286, "learning_rate": 6.767945306973345e-05, "loss": 0.3575, "step": 25290 }, { "epoch": 2.0316402328849628, "grad_norm": 0.2104635238647461, "learning_rate": 6.761015753457822e-05, "loss": 0.3184, "step": 25300 }, { "epoch": 2.032443284481028, "grad_norm": 0.21716949343681335, "learning_rate": 6.754088553164769e-05, "loss": 0.3154, "step": 25310 }, { "epoch": 2.033246336077093, "grad_norm": 0.25779449939727783, "learning_rate": 6.747163710453988e-05, "loss": 0.3336, "step": 25320 }, { "epoch": 2.034049387673158, "grad_norm": 0.2348909229040146, "learning_rate": 6.740241229683803e-05, "loss": 0.3087, "step": 25330 }, { "epoch": 2.034852439269223, "grad_norm": 0.21285489201545715, "learning_rate": 6.733321115211042e-05, "loss": 0.3256, "step": 25340 }, { "epoch": 2.035655490865288, "grad_norm": 0.22418463230133057, "learning_rate": 6.726403371391057e-05, "loss": 0.3277, "step": 25350 }, { "epoch": 2.0364585424613533, "grad_norm": 0.22189722955226898, "learning_rate": 6.7194880025777e-05, "loss": 0.346, "step": 25360 }, { "epoch": 2.0372615940574184, "grad_norm": 0.2597862780094147, "learning_rate": 6.712575013123334e-05, "loss": 0.3327, "step": 25370 }, { "epoch": 2.0380646456534834, "grad_norm": 0.25206223130226135, "learning_rate": 6.705664407378813e-05, "loss": 0.3336, "step": 25380 }, { "epoch": 2.038867697249548, "grad_norm": 0.2554842233657837, "learning_rate": 6.6987561896935e-05, "loss": 0.3098, "step": 25390 }, { "epoch": 2.039670748845613, "grad_norm": 0.2472432255744934, "learning_rate": 6.691850364415256e-05, "loss": 0.3242, "step": 25400 }, { "epoch": 2.0404738004416783, "grad_norm": 0.2069147229194641, "learning_rate": 6.684946935890436e-05, "loss": 0.3326, "step": 25410 }, { "epoch": 2.0412768520377433, "grad_norm": 0.22389402985572815, "learning_rate": 6.678045908463875e-05, "loss": 0.3225, "step": 25420 }, { "epoch": 2.0420799036338084, "grad_norm": 0.2373104691505432, "learning_rate": 6.671147286478913e-05, "loss": 0.2979, "step": 25430 }, { "epoch": 2.0428829552298735, "grad_norm": 0.24120356142520905, "learning_rate": 6.664251074277369e-05, "loss": 0.3095, "step": 25440 }, { "epoch": 2.0436860068259386, "grad_norm": 0.22056381404399872, "learning_rate": 6.657357276199539e-05, "loss": 0.3207, "step": 25450 }, { "epoch": 2.0444890584220037, "grad_norm": 0.26877960562705994, "learning_rate": 6.65046589658421e-05, "loss": 0.3322, "step": 25460 }, { "epoch": 2.0452921100180688, "grad_norm": 0.2583077847957611, "learning_rate": 6.643576939768641e-05, "loss": 0.3151, "step": 25470 }, { "epoch": 2.046095161614134, "grad_norm": 0.2175104022026062, "learning_rate": 6.636690410088574e-05, "loss": 0.3195, "step": 25480 }, { "epoch": 2.046898213210199, "grad_norm": 0.28643378615379333, "learning_rate": 6.629806311878209e-05, "loss": 0.3385, "step": 25490 }, { "epoch": 2.047701264806264, "grad_norm": 0.26712819933891296, "learning_rate": 6.622924649470227e-05, "loss": 0.3293, "step": 25500 }, { "epoch": 2.0485043164023287, "grad_norm": 0.22565750777721405, "learning_rate": 6.616045427195773e-05, "loss": 0.3303, "step": 25510 }, { "epoch": 2.0493073679983937, "grad_norm": 0.22854040563106537, "learning_rate": 6.60916864938446e-05, "loss": 0.3371, "step": 25520 }, { "epoch": 2.050110419594459, "grad_norm": 0.2307654619216919, "learning_rate": 6.60229432036435e-05, "loss": 0.3318, "step": 25530 }, { "epoch": 2.050913471190524, "grad_norm": 0.2981824278831482, "learning_rate": 6.595422444461979e-05, "loss": 0.3363, "step": 25540 }, { "epoch": 2.051716522786589, "grad_norm": 0.19012506306171417, "learning_rate": 6.588553026002333e-05, "loss": 0.3188, "step": 25550 }, { "epoch": 2.052519574382654, "grad_norm": 0.2876904606819153, "learning_rate": 6.58168606930885e-05, "loss": 0.3271, "step": 25560 }, { "epoch": 2.053322625978719, "grad_norm": 0.20208780467510223, "learning_rate": 6.574821578703415e-05, "loss": 0.3268, "step": 25570 }, { "epoch": 2.0541256775747843, "grad_norm": 0.23064401745796204, "learning_rate": 6.56795955850637e-05, "loss": 0.3296, "step": 25580 }, { "epoch": 2.0549287291708493, "grad_norm": 0.2213357388973236, "learning_rate": 6.561100013036498e-05, "loss": 0.3117, "step": 25590 }, { "epoch": 2.0557317807669144, "grad_norm": 0.22890175879001617, "learning_rate": 6.554242946611018e-05, "loss": 0.326, "step": 25600 }, { "epoch": 2.0565348323629795, "grad_norm": 0.25573232769966125, "learning_rate": 6.547388363545601e-05, "loss": 0.3083, "step": 25610 }, { "epoch": 2.057337883959044, "grad_norm": 0.24693745374679565, "learning_rate": 6.540536268154348e-05, "loss": 0.3387, "step": 25620 }, { "epoch": 2.0581409355551092, "grad_norm": 0.23282228410243988, "learning_rate": 6.533686664749797e-05, "loss": 0.3542, "step": 25630 }, { "epoch": 2.0589439871511743, "grad_norm": 0.22226032614707947, "learning_rate": 6.52683955764291e-05, "loss": 0.3437, "step": 25640 }, { "epoch": 2.0597470387472394, "grad_norm": 0.25506314635276794, "learning_rate": 6.51999495114309e-05, "loss": 0.3388, "step": 25650 }, { "epoch": 2.0605500903433045, "grad_norm": 0.20900098979473114, "learning_rate": 6.513152849558152e-05, "loss": 0.3281, "step": 25660 }, { "epoch": 2.0613531419393696, "grad_norm": 0.2623632848262787, "learning_rate": 6.506313257194352e-05, "loss": 0.3523, "step": 25670 }, { "epoch": 2.0621561935354347, "grad_norm": 0.2637616991996765, "learning_rate": 6.499476178356347e-05, "loss": 0.3159, "step": 25680 }, { "epoch": 2.0629592451314998, "grad_norm": 0.25281214714050293, "learning_rate": 6.492641617347229e-05, "loss": 0.3265, "step": 25690 }, { "epoch": 2.063762296727565, "grad_norm": 0.26380959153175354, "learning_rate": 6.485809578468496e-05, "loss": 0.3232, "step": 25700 }, { "epoch": 2.06456534832363, "grad_norm": 0.2140706181526184, "learning_rate": 6.478980066020065e-05, "loss": 0.2996, "step": 25710 }, { "epoch": 2.065368399919695, "grad_norm": 0.2885824739933014, "learning_rate": 6.472153084300252e-05, "loss": 0.3099, "step": 25720 }, { "epoch": 2.06617145151576, "grad_norm": 0.24739418923854828, "learning_rate": 6.465328637605792e-05, "loss": 0.3257, "step": 25730 }, { "epoch": 2.0669745031118247, "grad_norm": 0.24460576474666595, "learning_rate": 6.458506730231818e-05, "loss": 0.3095, "step": 25740 }, { "epoch": 2.06777755470789, "grad_norm": 0.23061227798461914, "learning_rate": 6.451687366471873e-05, "loss": 0.3216, "step": 25750 }, { "epoch": 2.068580606303955, "grad_norm": 0.19629500806331635, "learning_rate": 6.44487055061788e-05, "loss": 0.3234, "step": 25760 }, { "epoch": 2.06938365790002, "grad_norm": 0.2008221447467804, "learning_rate": 6.438056286960186e-05, "loss": 0.327, "step": 25770 }, { "epoch": 2.070186709496085, "grad_norm": 0.20555250346660614, "learning_rate": 6.431244579787502e-05, "loss": 0.3208, "step": 25780 }, { "epoch": 2.07098976109215, "grad_norm": 0.261018306016922, "learning_rate": 6.424435433386957e-05, "loss": 0.3322, "step": 25790 }, { "epoch": 2.0717928126882152, "grad_norm": 0.20754916965961456, "learning_rate": 6.417628852044048e-05, "loss": 0.3017, "step": 25800 }, { "epoch": 2.0725958642842803, "grad_norm": 0.2506975829601288, "learning_rate": 6.410824840042668e-05, "loss": 0.3129, "step": 25810 }, { "epoch": 2.0733989158803454, "grad_norm": 0.2319159060716629, "learning_rate": 6.404023401665096e-05, "loss": 0.3206, "step": 25820 }, { "epoch": 2.0742019674764105, "grad_norm": 0.2856186628341675, "learning_rate": 6.397224541191978e-05, "loss": 0.3152, "step": 25830 }, { "epoch": 2.0750050190724756, "grad_norm": 0.2231995016336441, "learning_rate": 6.390428262902349e-05, "loss": 0.3382, "step": 25840 }, { "epoch": 2.0758080706685407, "grad_norm": 0.25654909014701843, "learning_rate": 6.383634571073615e-05, "loss": 0.302, "step": 25850 }, { "epoch": 2.0766111222646053, "grad_norm": 0.2303108274936676, "learning_rate": 6.376843469981558e-05, "loss": 0.3313, "step": 25860 }, { "epoch": 2.0774141738606704, "grad_norm": 0.27974948287010193, "learning_rate": 6.370054963900318e-05, "loss": 0.3205, "step": 25870 }, { "epoch": 2.0782172254567355, "grad_norm": 0.2510107457637787, "learning_rate": 6.363269057102416e-05, "loss": 0.3171, "step": 25880 }, { "epoch": 2.0790202770528006, "grad_norm": 0.2555181086063385, "learning_rate": 6.356485753858733e-05, "loss": 0.3392, "step": 25890 }, { "epoch": 2.0798233286488657, "grad_norm": 0.23423372209072113, "learning_rate": 6.3497050584385e-05, "loss": 0.3118, "step": 25900 }, { "epoch": 2.0806263802449307, "grad_norm": 0.24335584044456482, "learning_rate": 6.342926975109325e-05, "loss": 0.3395, "step": 25910 }, { "epoch": 2.081429431840996, "grad_norm": 0.20730699598789215, "learning_rate": 6.336151508137158e-05, "loss": 0.3292, "step": 25920 }, { "epoch": 2.082232483437061, "grad_norm": 0.2269790917634964, "learning_rate": 6.329378661786308e-05, "loss": 0.321, "step": 25930 }, { "epoch": 2.083035535033126, "grad_norm": 0.21575935184955597, "learning_rate": 6.322608440319432e-05, "loss": 0.3123, "step": 25940 }, { "epoch": 2.083838586629191, "grad_norm": 0.2528509497642517, "learning_rate": 6.315840847997538e-05, "loss": 0.347, "step": 25950 }, { "epoch": 2.084641638225256, "grad_norm": 0.25129616260528564, "learning_rate": 6.309075889079978e-05, "loss": 0.3095, "step": 25960 }, { "epoch": 2.085444689821321, "grad_norm": 0.27199697494506836, "learning_rate": 6.302313567824454e-05, "loss": 0.3259, "step": 25970 }, { "epoch": 2.086247741417386, "grad_norm": 0.266250342130661, "learning_rate": 6.295553888486988e-05, "loss": 0.3239, "step": 25980 }, { "epoch": 2.087050793013451, "grad_norm": 0.248186856508255, "learning_rate": 6.288796855321958e-05, "loss": 0.3127, "step": 25990 }, { "epoch": 2.087853844609516, "grad_norm": 0.2224540412425995, "learning_rate": 6.28204247258207e-05, "loss": 0.3054, "step": 26000 }, { "epoch": 2.088656896205581, "grad_norm": 0.25754788517951965, "learning_rate": 6.275290744518366e-05, "loss": 0.3254, "step": 26010 }, { "epoch": 2.0894599478016462, "grad_norm": 0.25697746872901917, "learning_rate": 6.268541675380207e-05, "loss": 0.3248, "step": 26020 }, { "epoch": 2.0902629993977113, "grad_norm": 0.2543553113937378, "learning_rate": 6.261795269415296e-05, "loss": 0.3122, "step": 26030 }, { "epoch": 2.0910660509937764, "grad_norm": 0.2275550365447998, "learning_rate": 6.255051530869639e-05, "loss": 0.3247, "step": 26040 }, { "epoch": 2.0918691025898415, "grad_norm": 0.23328091204166412, "learning_rate": 6.248310463987583e-05, "loss": 0.3363, "step": 26050 }, { "epoch": 2.0926721541859066, "grad_norm": 0.23922231793403625, "learning_rate": 6.241572073011783e-05, "loss": 0.3124, "step": 26060 }, { "epoch": 2.0934752057819717, "grad_norm": 0.28722378611564636, "learning_rate": 6.234836362183213e-05, "loss": 0.3339, "step": 26070 }, { "epoch": 2.0942782573780367, "grad_norm": 0.24498523771762848, "learning_rate": 6.22810333574116e-05, "loss": 0.323, "step": 26080 }, { "epoch": 2.0950813089741014, "grad_norm": 0.2986457347869873, "learning_rate": 6.221372997923222e-05, "loss": 0.3206, "step": 26090 }, { "epoch": 2.0958843605701665, "grad_norm": 0.22956052422523499, "learning_rate": 6.214645352965303e-05, "loss": 0.3214, "step": 26100 }, { "epoch": 2.0966874121662316, "grad_norm": 0.25036323070526123, "learning_rate": 6.207920405101611e-05, "loss": 0.3108, "step": 26110 }, { "epoch": 2.0974904637622966, "grad_norm": 0.20005090534687042, "learning_rate": 6.201198158564667e-05, "loss": 0.3161, "step": 26120 }, { "epoch": 2.0982935153583617, "grad_norm": 0.20378251373767853, "learning_rate": 6.19447861758527e-05, "loss": 0.3326, "step": 26130 }, { "epoch": 2.099096566954427, "grad_norm": 0.24508939683437347, "learning_rate": 6.18776178639254e-05, "loss": 0.3278, "step": 26140 }, { "epoch": 2.099899618550492, "grad_norm": 0.2641811966896057, "learning_rate": 6.181047669213883e-05, "loss": 0.3226, "step": 26150 }, { "epoch": 2.100702670146557, "grad_norm": 0.2503342926502228, "learning_rate": 6.174336270274987e-05, "loss": 0.3055, "step": 26160 }, { "epoch": 2.101505721742622, "grad_norm": 0.2155403345823288, "learning_rate": 6.167627593799844e-05, "loss": 0.3141, "step": 26170 }, { "epoch": 2.102308773338687, "grad_norm": 0.27190712094306946, "learning_rate": 6.16092164401072e-05, "loss": 0.3229, "step": 26180 }, { "epoch": 2.1031118249347522, "grad_norm": 0.22791315615177155, "learning_rate": 6.154218425128171e-05, "loss": 0.3018, "step": 26190 }, { "epoch": 2.1039148765308173, "grad_norm": 0.2954922318458557, "learning_rate": 6.147517941371045e-05, "loss": 0.3315, "step": 26200 }, { "epoch": 2.104717928126882, "grad_norm": 0.2206811010837555, "learning_rate": 6.140820196956445e-05, "loss": 0.3135, "step": 26210 }, { "epoch": 2.105520979722947, "grad_norm": 0.25183379650115967, "learning_rate": 6.134125196099766e-05, "loss": 0.3378, "step": 26220 }, { "epoch": 2.106324031319012, "grad_norm": 0.22396886348724365, "learning_rate": 6.127432943014673e-05, "loss": 0.3242, "step": 26230 }, { "epoch": 2.107127082915077, "grad_norm": 0.22455260157585144, "learning_rate": 6.120743441913109e-05, "loss": 0.3283, "step": 26240 }, { "epoch": 2.1079301345111423, "grad_norm": 0.26888328790664673, "learning_rate": 6.114056697005265e-05, "loss": 0.3281, "step": 26250 }, { "epoch": 2.1087331861072074, "grad_norm": 0.2608834505081177, "learning_rate": 6.107372712499618e-05, "loss": 0.3175, "step": 26260 }, { "epoch": 2.1095362377032725, "grad_norm": 0.25409719347953796, "learning_rate": 6.100691492602898e-05, "loss": 0.3193, "step": 26270 }, { "epoch": 2.1103392892993376, "grad_norm": 0.22387394309043884, "learning_rate": 6.094013041520093e-05, "loss": 0.3187, "step": 26280 }, { "epoch": 2.1111423408954026, "grad_norm": 0.24477700889110565, "learning_rate": 6.087337363454459e-05, "loss": 0.3396, "step": 26290 }, { "epoch": 2.1119453924914677, "grad_norm": 0.2347884327173233, "learning_rate": 6.0806644626074905e-05, "loss": 0.3094, "step": 26300 }, { "epoch": 2.112748444087533, "grad_norm": 0.25021737813949585, "learning_rate": 6.07399434317895e-05, "loss": 0.3296, "step": 26310 }, { "epoch": 2.1135514956835975, "grad_norm": 0.2489921897649765, "learning_rate": 6.067327009366838e-05, "loss": 0.3363, "step": 26320 }, { "epoch": 2.1143545472796625, "grad_norm": 0.2880696952342987, "learning_rate": 6.0606624653674085e-05, "loss": 0.3403, "step": 26330 }, { "epoch": 2.1151575988757276, "grad_norm": 0.25647637248039246, "learning_rate": 6.0540007153751576e-05, "loss": 0.2988, "step": 26340 }, { "epoch": 2.1159606504717927, "grad_norm": 0.2629273533821106, "learning_rate": 6.047341763582825e-05, "loss": 0.3363, "step": 26350 }, { "epoch": 2.116763702067858, "grad_norm": 0.255754679441452, "learning_rate": 6.0406856141813826e-05, "loss": 0.3236, "step": 26360 }, { "epoch": 2.117566753663923, "grad_norm": 0.19593216478824615, "learning_rate": 6.034032271360046e-05, "loss": 0.2898, "step": 26370 }, { "epoch": 2.118369805259988, "grad_norm": 0.24379917979240417, "learning_rate": 6.0273817393062595e-05, "loss": 0.3306, "step": 26380 }, { "epoch": 2.119172856856053, "grad_norm": 0.28716593980789185, "learning_rate": 6.020734022205707e-05, "loss": 0.3534, "step": 26390 }, { "epoch": 2.119975908452118, "grad_norm": 0.2538658678531647, "learning_rate": 6.0140891242422856e-05, "loss": 0.3255, "step": 26400 }, { "epoch": 2.1207789600481832, "grad_norm": 0.2667030990123749, "learning_rate": 6.007447049598133e-05, "loss": 0.3254, "step": 26410 }, { "epoch": 2.1215820116442483, "grad_norm": 0.24329525232315063, "learning_rate": 6.0008078024535975e-05, "loss": 0.3239, "step": 26420 }, { "epoch": 2.1223850632403134, "grad_norm": 0.2141181379556656, "learning_rate": 5.9941713869872606e-05, "loss": 0.3311, "step": 26430 }, { "epoch": 2.123188114836378, "grad_norm": 0.2698177993297577, "learning_rate": 5.987537807375908e-05, "loss": 0.3053, "step": 26440 }, { "epoch": 2.123991166432443, "grad_norm": 0.3024904131889343, "learning_rate": 5.98090706779455e-05, "loss": 0.3379, "step": 26450 }, { "epoch": 2.124794218028508, "grad_norm": 0.23428724706172943, "learning_rate": 5.9742791724164125e-05, "loss": 0.3401, "step": 26460 }, { "epoch": 2.1255972696245733, "grad_norm": 0.260379433631897, "learning_rate": 5.967654125412919e-05, "loss": 0.3139, "step": 26470 }, { "epoch": 2.1264003212206384, "grad_norm": 0.2975679039955139, "learning_rate": 5.961031930953708e-05, "loss": 0.3413, "step": 26480 }, { "epoch": 2.1272033728167035, "grad_norm": 0.22636918723583221, "learning_rate": 5.954412593206624e-05, "loss": 0.3147, "step": 26490 }, { "epoch": 2.1280064244127685, "grad_norm": 0.22091557085514069, "learning_rate": 5.947796116337715e-05, "loss": 0.3477, "step": 26500 }, { "epoch": 2.1288094760088336, "grad_norm": 0.2796545624732971, "learning_rate": 5.9411825045112156e-05, "loss": 0.3027, "step": 26510 }, { "epoch": 2.1296125276048987, "grad_norm": 0.23723001778125763, "learning_rate": 5.9345717618895726e-05, "loss": 0.3385, "step": 26520 }, { "epoch": 2.130415579200964, "grad_norm": 0.2695634663105011, "learning_rate": 5.927963892633422e-05, "loss": 0.3312, "step": 26530 }, { "epoch": 2.131218630797029, "grad_norm": 0.25503623485565186, "learning_rate": 5.921358900901585e-05, "loss": 0.3425, "step": 26540 }, { "epoch": 2.132021682393094, "grad_norm": 0.24527747929096222, "learning_rate": 5.9147567908510835e-05, "loss": 0.3147, "step": 26550 }, { "epoch": 2.1328247339891586, "grad_norm": 0.2914295196533203, "learning_rate": 5.908157566637111e-05, "loss": 0.3372, "step": 26560 }, { "epoch": 2.1336277855852237, "grad_norm": 0.2279946357011795, "learning_rate": 5.9015612324130585e-05, "loss": 0.3302, "step": 26570 }, { "epoch": 2.134430837181289, "grad_norm": 0.2850649654865265, "learning_rate": 5.894967792330495e-05, "loss": 0.3286, "step": 26580 }, { "epoch": 2.135233888777354, "grad_norm": 0.22940348088741302, "learning_rate": 5.8883772505391576e-05, "loss": 0.3347, "step": 26590 }, { "epoch": 2.136036940373419, "grad_norm": 0.2604712247848511, "learning_rate": 5.881789611186972e-05, "loss": 0.3188, "step": 26600 }, { "epoch": 2.136839991969484, "grad_norm": 0.2625724673271179, "learning_rate": 5.875204878420036e-05, "loss": 0.3301, "step": 26610 }, { "epoch": 2.137643043565549, "grad_norm": 0.22988931834697723, "learning_rate": 5.868623056382606e-05, "loss": 0.3389, "step": 26620 }, { "epoch": 2.138446095161614, "grad_norm": 0.29836511611938477, "learning_rate": 5.862044149217121e-05, "loss": 0.3104, "step": 26630 }, { "epoch": 2.1392491467576793, "grad_norm": 0.2626972794532776, "learning_rate": 5.855468161064176e-05, "loss": 0.3447, "step": 26640 }, { "epoch": 2.1400521983537444, "grad_norm": 0.2541082501411438, "learning_rate": 5.848895096062539e-05, "loss": 0.3221, "step": 26650 }, { "epoch": 2.1408552499498095, "grad_norm": 0.22409959137439728, "learning_rate": 5.842324958349122e-05, "loss": 0.3116, "step": 26660 }, { "epoch": 2.141658301545874, "grad_norm": 0.265927791595459, "learning_rate": 5.835757752059013e-05, "loss": 0.3305, "step": 26670 }, { "epoch": 2.142461353141939, "grad_norm": 0.2685490846633911, "learning_rate": 5.829193481325438e-05, "loss": 0.3156, "step": 26680 }, { "epoch": 2.1432644047380043, "grad_norm": 0.2185991108417511, "learning_rate": 5.822632150279791e-05, "loss": 0.3276, "step": 26690 }, { "epoch": 2.1440674563340694, "grad_norm": 0.24959595501422882, "learning_rate": 5.816073763051601e-05, "loss": 0.3238, "step": 26700 }, { "epoch": 2.1448705079301345, "grad_norm": 0.23283201456069946, "learning_rate": 5.809518323768557e-05, "loss": 0.326, "step": 26710 }, { "epoch": 2.1456735595261995, "grad_norm": 0.35999900102615356, "learning_rate": 5.802965836556484e-05, "loss": 0.361, "step": 26720 }, { "epoch": 2.1464766111222646, "grad_norm": 0.2650614380836487, "learning_rate": 5.796416305539359e-05, "loss": 0.3202, "step": 26730 }, { "epoch": 2.1472796627183297, "grad_norm": 0.25292733311653137, "learning_rate": 5.789869734839282e-05, "loss": 0.2985, "step": 26740 }, { "epoch": 2.148082714314395, "grad_norm": 0.26987433433532715, "learning_rate": 5.7833261285765065e-05, "loss": 0.351, "step": 26750 }, { "epoch": 2.14888576591046, "grad_norm": 0.2256740778684616, "learning_rate": 5.776785490869409e-05, "loss": 0.3283, "step": 26760 }, { "epoch": 2.149688817506525, "grad_norm": 0.279121071100235, "learning_rate": 5.7702478258345095e-05, "loss": 0.3332, "step": 26770 }, { "epoch": 2.15049186910259, "grad_norm": 0.2708527147769928, "learning_rate": 5.763713137586441e-05, "loss": 0.3174, "step": 26780 }, { "epoch": 2.1512949206986547, "grad_norm": 0.26240089535713196, "learning_rate": 5.757181430237978e-05, "loss": 0.3293, "step": 26790 }, { "epoch": 2.1520979722947198, "grad_norm": 0.27211594581604004, "learning_rate": 5.750652707900006e-05, "loss": 0.3236, "step": 26800 }, { "epoch": 2.152901023890785, "grad_norm": 0.25894594192504883, "learning_rate": 5.7441269746815475e-05, "loss": 0.3115, "step": 26810 }, { "epoch": 2.15370407548685, "grad_norm": 0.19751548767089844, "learning_rate": 5.7376042346897243e-05, "loss": 0.3283, "step": 26820 }, { "epoch": 2.154507127082915, "grad_norm": 0.271707147359848, "learning_rate": 5.73108449202979e-05, "loss": 0.3218, "step": 26830 }, { "epoch": 2.15531017867898, "grad_norm": 0.23493173718452454, "learning_rate": 5.724567750805109e-05, "loss": 0.3254, "step": 26840 }, { "epoch": 2.156113230275045, "grad_norm": 0.23969268798828125, "learning_rate": 5.718054015117147e-05, "loss": 0.3156, "step": 26850 }, { "epoch": 2.1569162818711103, "grad_norm": 0.3005601167678833, "learning_rate": 5.7115432890654886e-05, "loss": 0.3268, "step": 26860 }, { "epoch": 2.1577193334671754, "grad_norm": 0.23792418837547302, "learning_rate": 5.705035576747821e-05, "loss": 0.3132, "step": 26870 }, { "epoch": 2.1585223850632405, "grad_norm": 0.24272672832012177, "learning_rate": 5.6985308822599394e-05, "loss": 0.3292, "step": 26880 }, { "epoch": 2.1593254366593055, "grad_norm": 0.24107615649700165, "learning_rate": 5.692029209695725e-05, "loss": 0.3363, "step": 26890 }, { "epoch": 2.1601284882553706, "grad_norm": 0.25208666920661926, "learning_rate": 5.68553056314717e-05, "loss": 0.3248, "step": 26900 }, { "epoch": 2.1609315398514353, "grad_norm": 0.23874764144420624, "learning_rate": 5.679034946704366e-05, "loss": 0.3149, "step": 26910 }, { "epoch": 2.1617345914475004, "grad_norm": 0.23480597138404846, "learning_rate": 5.672542364455479e-05, "loss": 0.3022, "step": 26920 }, { "epoch": 2.1625376430435654, "grad_norm": 0.243326336145401, "learning_rate": 5.6660528204867855e-05, "loss": 0.3211, "step": 26930 }, { "epoch": 2.1633406946396305, "grad_norm": 0.30290868878364563, "learning_rate": 5.659566318882634e-05, "loss": 0.3453, "step": 26940 }, { "epoch": 2.1641437462356956, "grad_norm": 0.2512684166431427, "learning_rate": 5.653082863725473e-05, "loss": 0.3399, "step": 26950 }, { "epoch": 2.1649467978317607, "grad_norm": 0.2449852079153061, "learning_rate": 5.646602459095817e-05, "loss": 0.3095, "step": 26960 }, { "epoch": 2.165749849427826, "grad_norm": 0.26402437686920166, "learning_rate": 5.640125109072274e-05, "loss": 0.3372, "step": 26970 }, { "epoch": 2.166552901023891, "grad_norm": 0.30451974272727966, "learning_rate": 5.633650817731524e-05, "loss": 0.314, "step": 26980 }, { "epoch": 2.167355952619956, "grad_norm": 0.24645578861236572, "learning_rate": 5.627179589148328e-05, "loss": 0.335, "step": 26990 }, { "epoch": 2.168159004216021, "grad_norm": 0.22935336828231812, "learning_rate": 5.620711427395505e-05, "loss": 0.3354, "step": 27000 }, { "epoch": 2.168962055812086, "grad_norm": 0.20580662786960602, "learning_rate": 5.614246336543959e-05, "loss": 0.3158, "step": 27010 }, { "epoch": 2.1697651074081508, "grad_norm": 0.21738086640834808, "learning_rate": 5.6077843206626536e-05, "loss": 0.3158, "step": 27020 }, { "epoch": 2.170568159004216, "grad_norm": 0.282398521900177, "learning_rate": 5.6013253838186216e-05, "loss": 0.3126, "step": 27030 }, { "epoch": 2.171371210600281, "grad_norm": 0.2824917137622833, "learning_rate": 5.59486953007695e-05, "loss": 0.3315, "step": 27040 }, { "epoch": 2.172174262196346, "grad_norm": 0.31237155199050903, "learning_rate": 5.588416763500797e-05, "loss": 0.3074, "step": 27050 }, { "epoch": 2.172977313792411, "grad_norm": 0.2221287041902542, "learning_rate": 5.581967088151363e-05, "loss": 0.3299, "step": 27060 }, { "epoch": 2.173780365388476, "grad_norm": 0.264369398355484, "learning_rate": 5.575520508087919e-05, "loss": 0.3194, "step": 27070 }, { "epoch": 2.1745834169845413, "grad_norm": 0.37427571415901184, "learning_rate": 5.569077027367774e-05, "loss": 0.3137, "step": 27080 }, { "epoch": 2.1753864685806064, "grad_norm": 0.22570203244686127, "learning_rate": 5.562636650046293e-05, "loss": 0.3173, "step": 27090 }, { "epoch": 2.1761895201766714, "grad_norm": 0.23777054250240326, "learning_rate": 5.556199380176891e-05, "loss": 0.3195, "step": 27100 }, { "epoch": 2.1769925717727365, "grad_norm": 0.21149227023124695, "learning_rate": 5.549765221811024e-05, "loss": 0.3057, "step": 27110 }, { "epoch": 2.1777956233688016, "grad_norm": 0.2894785404205322, "learning_rate": 5.543334178998184e-05, "loss": 0.3404, "step": 27120 }, { "epoch": 2.1785986749648667, "grad_norm": 0.24409106373786926, "learning_rate": 5.53690625578591e-05, "loss": 0.3166, "step": 27130 }, { "epoch": 2.1794017265609313, "grad_norm": 0.23901721835136414, "learning_rate": 5.530481456219778e-05, "loss": 0.3094, "step": 27140 }, { "epoch": 2.1802047781569964, "grad_norm": 0.21554191410541534, "learning_rate": 5.524059784343388e-05, "loss": 0.3298, "step": 27150 }, { "epoch": 2.1810078297530615, "grad_norm": 0.23435696959495544, "learning_rate": 5.517641244198383e-05, "loss": 0.3261, "step": 27160 }, { "epoch": 2.1818108813491266, "grad_norm": 0.2428428828716278, "learning_rate": 5.511225839824432e-05, "loss": 0.3377, "step": 27170 }, { "epoch": 2.1826139329451917, "grad_norm": 0.2310289591550827, "learning_rate": 5.5048135752592245e-05, "loss": 0.3268, "step": 27180 }, { "epoch": 2.1834169845412568, "grad_norm": 0.23872172832489014, "learning_rate": 5.4984044545384824e-05, "loss": 0.348, "step": 27190 }, { "epoch": 2.184220036137322, "grad_norm": 0.2545344829559326, "learning_rate": 5.4919984816959414e-05, "loss": 0.3257, "step": 27200 }, { "epoch": 2.185023087733387, "grad_norm": 0.2589345872402191, "learning_rate": 5.48559566076336e-05, "loss": 0.3104, "step": 27210 }, { "epoch": 2.185826139329452, "grad_norm": 0.24310199916362762, "learning_rate": 5.4791959957705155e-05, "loss": 0.311, "step": 27220 }, { "epoch": 2.186629190925517, "grad_norm": 0.2285807877779007, "learning_rate": 5.472799490745191e-05, "loss": 0.3288, "step": 27230 }, { "epoch": 2.187432242521582, "grad_norm": 0.2442426234483719, "learning_rate": 5.466406149713187e-05, "loss": 0.3152, "step": 27240 }, { "epoch": 2.1882352941176473, "grad_norm": 0.22888809442520142, "learning_rate": 5.460015976698312e-05, "loss": 0.3255, "step": 27250 }, { "epoch": 2.189038345713712, "grad_norm": 0.29339393973350525, "learning_rate": 5.453628975722384e-05, "loss": 0.3252, "step": 27260 }, { "epoch": 2.189841397309777, "grad_norm": 0.2644522488117218, "learning_rate": 5.447245150805213e-05, "loss": 0.3271, "step": 27270 }, { "epoch": 2.190644448905842, "grad_norm": 0.18203558027744293, "learning_rate": 5.44086450596462e-05, "loss": 0.2942, "step": 27280 }, { "epoch": 2.191447500501907, "grad_norm": 0.2718253433704376, "learning_rate": 5.434487045216429e-05, "loss": 0.3171, "step": 27290 }, { "epoch": 2.1922505520979723, "grad_norm": 0.23929502069950104, "learning_rate": 5.428112772574442e-05, "loss": 0.3201, "step": 27300 }, { "epoch": 2.1930536036940373, "grad_norm": 0.24171558022499084, "learning_rate": 5.421741692050474e-05, "loss": 0.3129, "step": 27310 }, { "epoch": 2.1938566552901024, "grad_norm": 0.228899285197258, "learning_rate": 5.415373807654317e-05, "loss": 0.3087, "step": 27320 }, { "epoch": 2.1946597068861675, "grad_norm": 0.2529512941837311, "learning_rate": 5.4090091233937626e-05, "loss": 0.3343, "step": 27330 }, { "epoch": 2.1954627584822326, "grad_norm": 0.23619462549686432, "learning_rate": 5.402647643274574e-05, "loss": 0.2929, "step": 27340 }, { "epoch": 2.1962658100782977, "grad_norm": 0.24778012931346893, "learning_rate": 5.3962893713005134e-05, "loss": 0.3288, "step": 27350 }, { "epoch": 2.1970688616743628, "grad_norm": 0.2250872552394867, "learning_rate": 5.389934311473316e-05, "loss": 0.3247, "step": 27360 }, { "epoch": 2.1978719132704274, "grad_norm": 0.2531875967979431, "learning_rate": 5.383582467792698e-05, "loss": 0.3199, "step": 27370 }, { "epoch": 2.1986749648664925, "grad_norm": 0.2892155647277832, "learning_rate": 5.377233844256345e-05, "loss": 0.3319, "step": 27380 }, { "epoch": 2.1994780164625576, "grad_norm": 0.2564356327056885, "learning_rate": 5.3708884448599226e-05, "loss": 0.341, "step": 27390 }, { "epoch": 2.2002810680586227, "grad_norm": 0.24837927520275116, "learning_rate": 5.3645462735970654e-05, "loss": 0.3027, "step": 27400 }, { "epoch": 2.2010841196546878, "grad_norm": 0.23384208977222443, "learning_rate": 5.358207334459382e-05, "loss": 0.3394, "step": 27410 }, { "epoch": 2.201887171250753, "grad_norm": 0.2783987820148468, "learning_rate": 5.35187163143643e-05, "loss": 0.319, "step": 27420 }, { "epoch": 2.202690222846818, "grad_norm": 0.24623946845531464, "learning_rate": 5.3455391685157516e-05, "loss": 0.3155, "step": 27430 }, { "epoch": 2.203493274442883, "grad_norm": 0.25523871183395386, "learning_rate": 5.3392099496828304e-05, "loss": 0.3203, "step": 27440 }, { "epoch": 2.204296326038948, "grad_norm": 0.21937350928783417, "learning_rate": 5.332883978921126e-05, "loss": 0.2906, "step": 27450 }, { "epoch": 2.205099377635013, "grad_norm": 0.2767978608608246, "learning_rate": 5.326561260212035e-05, "loss": 0.3121, "step": 27460 }, { "epoch": 2.2059024292310783, "grad_norm": 0.22119396924972534, "learning_rate": 5.320241797534925e-05, "loss": 0.3279, "step": 27470 }, { "epoch": 2.2067054808271434, "grad_norm": 0.25470367074012756, "learning_rate": 5.3139255948671065e-05, "loss": 0.3522, "step": 27480 }, { "epoch": 2.207508532423208, "grad_norm": 0.2745916545391083, "learning_rate": 5.3076126561838325e-05, "loss": 0.3502, "step": 27490 }, { "epoch": 2.208311584019273, "grad_norm": 0.2361493855714798, "learning_rate": 5.301302985458311e-05, "loss": 0.2972, "step": 27500 }, { "epoch": 2.209114635615338, "grad_norm": 0.243666410446167, "learning_rate": 5.29499658666169e-05, "loss": 0.3148, "step": 27510 }, { "epoch": 2.2099176872114032, "grad_norm": 0.25109249353408813, "learning_rate": 5.2886934637630624e-05, "loss": 0.3325, "step": 27520 }, { "epoch": 2.2107207388074683, "grad_norm": 0.2742447853088379, "learning_rate": 5.2823936207294454e-05, "loss": 0.301, "step": 27530 }, { "epoch": 2.2115237904035334, "grad_norm": 0.2418070286512375, "learning_rate": 5.276097061525808e-05, "loss": 0.3116, "step": 27540 }, { "epoch": 2.2123268419995985, "grad_norm": 0.2689466178417206, "learning_rate": 5.269803790115046e-05, "loss": 0.3233, "step": 27550 }, { "epoch": 2.2131298935956636, "grad_norm": 0.2298179417848587, "learning_rate": 5.263513810457985e-05, "loss": 0.3164, "step": 27560 }, { "epoch": 2.2139329451917287, "grad_norm": 0.2958206236362457, "learning_rate": 5.2572271265133746e-05, "loss": 0.3259, "step": 27570 }, { "epoch": 2.2147359967877938, "grad_norm": 0.2586722671985626, "learning_rate": 5.250943742237901e-05, "loss": 0.3307, "step": 27580 }, { "epoch": 2.215539048383859, "grad_norm": 0.2692318558692932, "learning_rate": 5.2446636615861656e-05, "loss": 0.3457, "step": 27590 }, { "epoch": 2.216342099979924, "grad_norm": 0.2167486995458603, "learning_rate": 5.238386888510697e-05, "loss": 0.3273, "step": 27600 }, { "epoch": 2.2171451515759886, "grad_norm": 0.2410346120595932, "learning_rate": 5.232113426961932e-05, "loss": 0.3185, "step": 27610 }, { "epoch": 2.2179482031720537, "grad_norm": 0.22290250658988953, "learning_rate": 5.225843280888232e-05, "loss": 0.3228, "step": 27620 }, { "epoch": 2.2187512547681187, "grad_norm": 0.26900526881217957, "learning_rate": 5.219576454235871e-05, "loss": 0.3548, "step": 27630 }, { "epoch": 2.219554306364184, "grad_norm": 0.243906632065773, "learning_rate": 5.2133129509490274e-05, "loss": 0.3001, "step": 27640 }, { "epoch": 2.220357357960249, "grad_norm": 0.2689366340637207, "learning_rate": 5.207052774969794e-05, "loss": 0.3263, "step": 27650 }, { "epoch": 2.221160409556314, "grad_norm": 0.2556426227092743, "learning_rate": 5.2007959302381684e-05, "loss": 0.3131, "step": 27660 }, { "epoch": 2.221963461152379, "grad_norm": 0.2759295105934143, "learning_rate": 5.194542420692056e-05, "loss": 0.3233, "step": 27670 }, { "epoch": 2.222766512748444, "grad_norm": 0.2164018750190735, "learning_rate": 5.188292250267247e-05, "loss": 0.297, "step": 27680 }, { "epoch": 2.2235695643445093, "grad_norm": 0.26365339756011963, "learning_rate": 5.182045422897454e-05, "loss": 0.3297, "step": 27690 }, { "epoch": 2.2243726159405743, "grad_norm": 0.23182764649391174, "learning_rate": 5.17580194251426e-05, "loss": 0.3232, "step": 27700 }, { "epoch": 2.2251756675366394, "grad_norm": 0.29337170720100403, "learning_rate": 5.1695618130471645e-05, "loss": 0.3317, "step": 27710 }, { "epoch": 2.225978719132704, "grad_norm": 0.25355201959609985, "learning_rate": 5.16332503842354e-05, "loss": 0.315, "step": 27720 }, { "epoch": 2.226781770728769, "grad_norm": 0.23379606008529663, "learning_rate": 5.1570916225686604e-05, "loss": 0.3173, "step": 27730 }, { "epoch": 2.2275848223248342, "grad_norm": 0.2438281923532486, "learning_rate": 5.150861569405676e-05, "loss": 0.3242, "step": 27740 }, { "epoch": 2.2283878739208993, "grad_norm": 0.21012279391288757, "learning_rate": 5.144634882855635e-05, "loss": 0.3397, "step": 27750 }, { "epoch": 2.2291909255169644, "grad_norm": 0.25766420364379883, "learning_rate": 5.138411566837446e-05, "loss": 0.3167, "step": 27760 }, { "epoch": 2.2299939771130295, "grad_norm": 0.43043676018714905, "learning_rate": 5.13219162526791e-05, "loss": 0.3401, "step": 27770 }, { "epoch": 2.2307970287090946, "grad_norm": 0.3053670823574066, "learning_rate": 5.125975062061704e-05, "loss": 0.3329, "step": 27780 }, { "epoch": 2.2316000803051597, "grad_norm": 0.25341978669166565, "learning_rate": 5.1197618811313787e-05, "loss": 0.3164, "step": 27790 }, { "epoch": 2.2324031319012247, "grad_norm": 0.24935176968574524, "learning_rate": 5.1135520863873467e-05, "loss": 0.3243, "step": 27800 }, { "epoch": 2.23320618349729, "grad_norm": 0.2392369955778122, "learning_rate": 5.1073456817379026e-05, "loss": 0.2948, "step": 27810 }, { "epoch": 2.234009235093355, "grad_norm": 0.2684544026851654, "learning_rate": 5.101142671089199e-05, "loss": 0.311, "step": 27820 }, { "epoch": 2.23481228668942, "grad_norm": 0.23088999092578888, "learning_rate": 5.094943058345251e-05, "loss": 0.3425, "step": 27830 }, { "epoch": 2.2356153382854846, "grad_norm": 0.21615049242973328, "learning_rate": 5.088746847407942e-05, "loss": 0.3118, "step": 27840 }, { "epoch": 2.2364183898815497, "grad_norm": 0.22880128026008606, "learning_rate": 5.082554042177008e-05, "loss": 0.3254, "step": 27850 }, { "epoch": 2.237221441477615, "grad_norm": 0.2526080906391144, "learning_rate": 5.076364646550051e-05, "loss": 0.3402, "step": 27860 }, { "epoch": 2.23802449307368, "grad_norm": 0.30191224813461304, "learning_rate": 5.070178664422515e-05, "loss": 0.3124, "step": 27870 }, { "epoch": 2.238827544669745, "grad_norm": 0.26728007197380066, "learning_rate": 5.063996099687701e-05, "loss": 0.3217, "step": 27880 }, { "epoch": 2.23963059626581, "grad_norm": 0.27429091930389404, "learning_rate": 5.057816956236764e-05, "loss": 0.3101, "step": 27890 }, { "epoch": 2.240433647861875, "grad_norm": 0.2550830841064453, "learning_rate": 5.0516412379586996e-05, "loss": 0.3446, "step": 27900 }, { "epoch": 2.2412366994579402, "grad_norm": 0.2598472535610199, "learning_rate": 5.045468948740346e-05, "loss": 0.3372, "step": 27910 }, { "epoch": 2.2420397510540053, "grad_norm": 0.2920839190483093, "learning_rate": 5.039300092466388e-05, "loss": 0.3041, "step": 27920 }, { "epoch": 2.2428428026500704, "grad_norm": 0.29776665568351746, "learning_rate": 5.033134673019353e-05, "loss": 0.3327, "step": 27930 }, { "epoch": 2.2436458542461355, "grad_norm": 0.2594432830810547, "learning_rate": 5.026972694279597e-05, "loss": 0.3266, "step": 27940 }, { "epoch": 2.2444489058422006, "grad_norm": 0.2545328438282013, "learning_rate": 5.020814160125309e-05, "loss": 0.3257, "step": 27950 }, { "epoch": 2.2452519574382652, "grad_norm": 0.2511461675167084, "learning_rate": 5.014659074432522e-05, "loss": 0.3082, "step": 27960 }, { "epoch": 2.2460550090343303, "grad_norm": 0.2675129771232605, "learning_rate": 5.008507441075092e-05, "loss": 0.3297, "step": 27970 }, { "epoch": 2.2468580606303954, "grad_norm": 0.2881471812725067, "learning_rate": 5.0023592639246974e-05, "loss": 0.3287, "step": 27980 }, { "epoch": 2.2476611122264605, "grad_norm": 0.2721409201622009, "learning_rate": 4.996214546850848e-05, "loss": 0.3375, "step": 27990 }, { "epoch": 2.2484641638225256, "grad_norm": 0.2556069791316986, "learning_rate": 4.990073293720874e-05, "loss": 0.3456, "step": 28000 }, { "epoch": 2.2492672154185906, "grad_norm": 0.24009770154953003, "learning_rate": 4.983935508399928e-05, "loss": 0.3256, "step": 28010 }, { "epoch": 2.2500702670146557, "grad_norm": 0.207969069480896, "learning_rate": 4.977801194750974e-05, "loss": 0.312, "step": 28020 }, { "epoch": 2.250873318610721, "grad_norm": 0.24699918925762177, "learning_rate": 4.9716703566347946e-05, "loss": 0.3458, "step": 28030 }, { "epoch": 2.251676370206786, "grad_norm": 0.22685576975345612, "learning_rate": 4.965542997909986e-05, "loss": 0.3136, "step": 28040 }, { "epoch": 2.252479421802851, "grad_norm": 0.24497218430042267, "learning_rate": 4.959419122432957e-05, "loss": 0.314, "step": 28050 }, { "epoch": 2.253282473398916, "grad_norm": 0.24260596930980682, "learning_rate": 4.953298734057913e-05, "loss": 0.3199, "step": 28060 }, { "epoch": 2.2540855249949807, "grad_norm": 0.26565027236938477, "learning_rate": 4.9471818366368786e-05, "loss": 0.3219, "step": 28070 }, { "epoch": 2.254888576591046, "grad_norm": 0.3341734707355499, "learning_rate": 4.941068434019669e-05, "loss": 0.3523, "step": 28080 }, { "epoch": 2.255691628187111, "grad_norm": 0.24764220416545868, "learning_rate": 4.934958530053913e-05, "loss": 0.3343, "step": 28090 }, { "epoch": 2.256494679783176, "grad_norm": 0.2812884747982025, "learning_rate": 4.928852128585022e-05, "loss": 0.3317, "step": 28100 }, { "epoch": 2.257297731379241, "grad_norm": 0.23371656239032745, "learning_rate": 4.9227492334562165e-05, "loss": 0.3216, "step": 28110 }, { "epoch": 2.258100782975306, "grad_norm": 0.2542067766189575, "learning_rate": 4.916649848508502e-05, "loss": 0.3119, "step": 28120 }, { "epoch": 2.2589038345713712, "grad_norm": 0.230075865983963, "learning_rate": 4.9105539775806854e-05, "loss": 0.3203, "step": 28130 }, { "epoch": 2.2597068861674363, "grad_norm": 0.23552577197551727, "learning_rate": 4.904461624509342e-05, "loss": 0.3245, "step": 28140 }, { "epoch": 2.2605099377635014, "grad_norm": 0.26308688521385193, "learning_rate": 4.898372793128854e-05, "loss": 0.3307, "step": 28150 }, { "epoch": 2.2613129893595665, "grad_norm": 0.26124677062034607, "learning_rate": 4.8922874872713774e-05, "loss": 0.3403, "step": 28160 }, { "epoch": 2.2621160409556316, "grad_norm": 0.2556802034378052, "learning_rate": 4.886205710766848e-05, "loss": 0.3297, "step": 28170 }, { "epoch": 2.262919092551696, "grad_norm": 0.26548734307289124, "learning_rate": 4.880127467442982e-05, "loss": 0.3411, "step": 28180 }, { "epoch": 2.2637221441477617, "grad_norm": 0.25166910886764526, "learning_rate": 4.874052761125279e-05, "loss": 0.3281, "step": 28190 }, { "epoch": 2.2645251957438264, "grad_norm": 0.22786086797714233, "learning_rate": 4.867981595636999e-05, "loss": 0.3164, "step": 28200 }, { "epoch": 2.2653282473398915, "grad_norm": 0.21015878021717072, "learning_rate": 4.8619139747991814e-05, "loss": 0.3261, "step": 28210 }, { "epoch": 2.2661312989359566, "grad_norm": 0.21065697073936462, "learning_rate": 4.8558499024306346e-05, "loss": 0.3342, "step": 28220 }, { "epoch": 2.2669343505320216, "grad_norm": 0.2979133427143097, "learning_rate": 4.849789382347934e-05, "loss": 0.3137, "step": 28230 }, { "epoch": 2.2677374021280867, "grad_norm": 0.24678011238574982, "learning_rate": 4.843732418365422e-05, "loss": 0.3325, "step": 28240 }, { "epoch": 2.268540453724152, "grad_norm": 0.24080732464790344, "learning_rate": 4.8376790142951914e-05, "loss": 0.3018, "step": 28250 }, { "epoch": 2.269343505320217, "grad_norm": 0.24335749447345734, "learning_rate": 4.831629173947105e-05, "loss": 0.3266, "step": 28260 }, { "epoch": 2.270146556916282, "grad_norm": 0.2533215880393982, "learning_rate": 4.8255829011287824e-05, "loss": 0.3069, "step": 28270 }, { "epoch": 2.270949608512347, "grad_norm": 0.28821077942848206, "learning_rate": 4.8195401996455955e-05, "loss": 0.328, "step": 28280 }, { "epoch": 2.271752660108412, "grad_norm": 0.2968761920928955, "learning_rate": 4.813501073300664e-05, "loss": 0.3061, "step": 28290 }, { "epoch": 2.2725557117044772, "grad_norm": 0.25312066078186035, "learning_rate": 4.8074655258948644e-05, "loss": 0.3154, "step": 28300 }, { "epoch": 2.273358763300542, "grad_norm": 0.25447604060173035, "learning_rate": 4.80143356122682e-05, "loss": 0.3364, "step": 28310 }, { "epoch": 2.274161814896607, "grad_norm": 0.25344711542129517, "learning_rate": 4.795405183092897e-05, "loss": 0.3074, "step": 28320 }, { "epoch": 2.274964866492672, "grad_norm": 0.2859925925731659, "learning_rate": 4.7893803952872e-05, "loss": 0.3169, "step": 28330 }, { "epoch": 2.275767918088737, "grad_norm": 0.26703768968582153, "learning_rate": 4.7833592016015815e-05, "loss": 0.3179, "step": 28340 }, { "epoch": 2.276570969684802, "grad_norm": 0.28470373153686523, "learning_rate": 4.777341605825631e-05, "loss": 0.3148, "step": 28350 }, { "epoch": 2.2773740212808673, "grad_norm": 0.5845988392829895, "learning_rate": 4.771327611746669e-05, "loss": 0.3437, "step": 28360 }, { "epoch": 2.2781770728769324, "grad_norm": 0.21893131732940674, "learning_rate": 4.765317223149752e-05, "loss": 0.336, "step": 28370 }, { "epoch": 2.2789801244729975, "grad_norm": 0.22528760135173798, "learning_rate": 4.759310443817667e-05, "loss": 0.335, "step": 28380 }, { "epoch": 2.2797831760690626, "grad_norm": 0.2698814570903778, "learning_rate": 4.753307277530935e-05, "loss": 0.3008, "step": 28390 }, { "epoch": 2.2805862276651276, "grad_norm": 0.24168185889720917, "learning_rate": 4.7473077280677894e-05, "loss": 0.2962, "step": 28400 }, { "epoch": 2.2813892792611927, "grad_norm": 0.26473987102508545, "learning_rate": 4.741311799204199e-05, "loss": 0.3401, "step": 28410 }, { "epoch": 2.2821923308572574, "grad_norm": 0.23896387219429016, "learning_rate": 4.7353194947138516e-05, "loss": 0.3226, "step": 28420 }, { "epoch": 2.2829953824533225, "grad_norm": 0.27529165148735046, "learning_rate": 4.729330818368154e-05, "loss": 0.312, "step": 28430 }, { "epoch": 2.2837984340493875, "grad_norm": 0.29678863286972046, "learning_rate": 4.723345773936225e-05, "loss": 0.3245, "step": 28440 }, { "epoch": 2.2846014856454526, "grad_norm": 0.23592343926429749, "learning_rate": 4.7173643651849e-05, "loss": 0.3142, "step": 28450 }, { "epoch": 2.2854045372415177, "grad_norm": 0.25277969241142273, "learning_rate": 4.7113865958787277e-05, "loss": 0.3319, "step": 28460 }, { "epoch": 2.286207588837583, "grad_norm": 0.24960479140281677, "learning_rate": 4.705412469779969e-05, "loss": 0.3683, "step": 28470 }, { "epoch": 2.287010640433648, "grad_norm": 0.25051867961883545, "learning_rate": 4.699441990648582e-05, "loss": 0.3317, "step": 28480 }, { "epoch": 2.287813692029713, "grad_norm": 0.29695940017700195, "learning_rate": 4.693475162242239e-05, "loss": 0.3154, "step": 28490 }, { "epoch": 2.288616743625778, "grad_norm": 0.23378774523735046, "learning_rate": 4.6875119883163165e-05, "loss": 0.3087, "step": 28500 }, { "epoch": 2.289419795221843, "grad_norm": 0.23277121782302856, "learning_rate": 4.681552472623878e-05, "loss": 0.3348, "step": 28510 }, { "epoch": 2.290222846817908, "grad_norm": 0.2874528169631958, "learning_rate": 4.675596618915694e-05, "loss": 0.3182, "step": 28520 }, { "epoch": 2.291025898413973, "grad_norm": 0.2029178887605667, "learning_rate": 4.669644430940231e-05, "loss": 0.3148, "step": 28530 }, { "epoch": 2.2918289500100384, "grad_norm": 0.2593824565410614, "learning_rate": 4.663695912443649e-05, "loss": 0.3511, "step": 28540 }, { "epoch": 2.292632001606103, "grad_norm": 0.24497970938682556, "learning_rate": 4.6577510671697886e-05, "loss": 0.3223, "step": 28550 }, { "epoch": 2.293435053202168, "grad_norm": 0.23585720360279083, "learning_rate": 4.651809898860191e-05, "loss": 0.3377, "step": 28560 }, { "epoch": 2.294238104798233, "grad_norm": 0.20564740896224976, "learning_rate": 4.645872411254078e-05, "loss": 0.3076, "step": 28570 }, { "epoch": 2.2950411563942983, "grad_norm": 0.2470681220293045, "learning_rate": 4.6399386080883525e-05, "loss": 0.3348, "step": 28580 }, { "epoch": 2.2958442079903634, "grad_norm": 0.25927799940109253, "learning_rate": 4.634008493097597e-05, "loss": 0.3212, "step": 28590 }, { "epoch": 2.2966472595864285, "grad_norm": 0.22574757039546967, "learning_rate": 4.6280820700140826e-05, "loss": 0.3311, "step": 28600 }, { "epoch": 2.2974503111824935, "grad_norm": 0.22547219693660736, "learning_rate": 4.622159342567746e-05, "loss": 0.3224, "step": 28610 }, { "epoch": 2.2982533627785586, "grad_norm": 0.2818748354911804, "learning_rate": 4.616240314486209e-05, "loss": 0.2989, "step": 28620 }, { "epoch": 2.2990564143746237, "grad_norm": 0.24027115106582642, "learning_rate": 4.610324989494752e-05, "loss": 0.3151, "step": 28630 }, { "epoch": 2.299859465970689, "grad_norm": 0.2762349545955658, "learning_rate": 4.604413371316335e-05, "loss": 0.3323, "step": 28640 }, { "epoch": 2.300662517566754, "grad_norm": 0.20192471146583557, "learning_rate": 4.5985054636715793e-05, "loss": 0.3096, "step": 28650 }, { "epoch": 2.3014655691628185, "grad_norm": 0.2980937063694, "learning_rate": 4.5926012702787776e-05, "loss": 0.3097, "step": 28660 }, { "epoch": 2.3022686207588836, "grad_norm": 0.3454783260822296, "learning_rate": 4.586700794853875e-05, "loss": 0.3328, "step": 28670 }, { "epoch": 2.3030716723549487, "grad_norm": 0.2345275729894638, "learning_rate": 4.5808040411104834e-05, "loss": 0.3312, "step": 28680 }, { "epoch": 2.303874723951014, "grad_norm": 0.26718756556510925, "learning_rate": 4.574911012759877e-05, "loss": 0.3306, "step": 28690 }, { "epoch": 2.304677775547079, "grad_norm": 0.23916994035243988, "learning_rate": 4.5690217135109736e-05, "loss": 0.3094, "step": 28700 }, { "epoch": 2.305480827143144, "grad_norm": 0.2589437663555145, "learning_rate": 4.563136147070347e-05, "loss": 0.3215, "step": 28710 }, { "epoch": 2.306283878739209, "grad_norm": 0.2573365867137909, "learning_rate": 4.5572543171422286e-05, "loss": 0.3387, "step": 28720 }, { "epoch": 2.307086930335274, "grad_norm": 0.2670202851295471, "learning_rate": 4.5513762274284956e-05, "loss": 0.3152, "step": 28730 }, { "epoch": 2.307889981931339, "grad_norm": 0.22757185995578766, "learning_rate": 4.545501881628664e-05, "loss": 0.332, "step": 28740 }, { "epoch": 2.3086930335274043, "grad_norm": 0.21750754117965698, "learning_rate": 4.5396312834399035e-05, "loss": 0.3092, "step": 28750 }, { "epoch": 2.3094960851234694, "grad_norm": 0.2743428647518158, "learning_rate": 4.533764436557019e-05, "loss": 0.3112, "step": 28760 }, { "epoch": 2.310299136719534, "grad_norm": 0.26206904649734497, "learning_rate": 4.5279013446724594e-05, "loss": 0.3148, "step": 28770 }, { "epoch": 2.311102188315599, "grad_norm": 0.26449859142303467, "learning_rate": 4.522042011476301e-05, "loss": 0.3335, "step": 28780 }, { "epoch": 2.311905239911664, "grad_norm": 0.32160499691963196, "learning_rate": 4.516186440656263e-05, "loss": 0.3097, "step": 28790 }, { "epoch": 2.3127082915077293, "grad_norm": 0.27457621693611145, "learning_rate": 4.510334635897695e-05, "loss": 0.3253, "step": 28800 }, { "epoch": 2.3135113431037944, "grad_norm": 0.23720620572566986, "learning_rate": 4.504486600883579e-05, "loss": 0.3198, "step": 28810 }, { "epoch": 2.3143143946998594, "grad_norm": 0.26284047961235046, "learning_rate": 4.4986423392945154e-05, "loss": 0.3224, "step": 28820 }, { "epoch": 2.3151174462959245, "grad_norm": 0.25880166888237, "learning_rate": 4.4928018548087356e-05, "loss": 0.3169, "step": 28830 }, { "epoch": 2.3159204978919896, "grad_norm": 0.2374906688928604, "learning_rate": 4.486965151102097e-05, "loss": 0.3137, "step": 28840 }, { "epoch": 2.3167235494880547, "grad_norm": 0.2552730143070221, "learning_rate": 4.481132231848068e-05, "loss": 0.3294, "step": 28850 }, { "epoch": 2.31752660108412, "grad_norm": 0.25224679708480835, "learning_rate": 4.475303100717745e-05, "loss": 0.327, "step": 28860 }, { "epoch": 2.318329652680185, "grad_norm": 0.23272863030433655, "learning_rate": 4.469477761379835e-05, "loss": 0.3091, "step": 28870 }, { "epoch": 2.3191327042762495, "grad_norm": 0.30255043506622314, "learning_rate": 4.463656217500664e-05, "loss": 0.3277, "step": 28880 }, { "epoch": 2.319935755872315, "grad_norm": 0.237210214138031, "learning_rate": 4.457838472744158e-05, "loss": 0.3295, "step": 28890 }, { "epoch": 2.3207388074683797, "grad_norm": 0.23671181499958038, "learning_rate": 4.452024530771861e-05, "loss": 0.2887, "step": 28900 }, { "epoch": 2.3215418590644448, "grad_norm": 0.2497985064983368, "learning_rate": 4.446214395242925e-05, "loss": 0.2973, "step": 28910 }, { "epoch": 2.32234491066051, "grad_norm": 0.2471594661474228, "learning_rate": 4.440408069814104e-05, "loss": 0.3311, "step": 28920 }, { "epoch": 2.323147962256575, "grad_norm": 0.23545514047145844, "learning_rate": 4.434605558139745e-05, "loss": 0.3094, "step": 28930 }, { "epoch": 2.32395101385264, "grad_norm": 0.2604195177555084, "learning_rate": 4.4288068638718095e-05, "loss": 0.3266, "step": 28940 }, { "epoch": 2.324754065448705, "grad_norm": 0.2938971221446991, "learning_rate": 4.423011990659849e-05, "loss": 0.3482, "step": 28950 }, { "epoch": 2.32555711704477, "grad_norm": 0.23074907064437866, "learning_rate": 4.4172209421510105e-05, "loss": 0.3258, "step": 28960 }, { "epoch": 2.3263601686408353, "grad_norm": 0.23822887241840363, "learning_rate": 4.4114337219900283e-05, "loss": 0.29, "step": 28970 }, { "epoch": 2.3271632202369004, "grad_norm": 0.2440735250711441, "learning_rate": 4.405650333819238e-05, "loss": 0.3282, "step": 28980 }, { "epoch": 2.3279662718329655, "grad_norm": 0.27449271082878113, "learning_rate": 4.3998707812785564e-05, "loss": 0.333, "step": 28990 }, { "epoch": 2.3287693234290305, "grad_norm": 0.28267404437065125, "learning_rate": 4.394095068005494e-05, "loss": 0.3161, "step": 29000 }, { "epoch": 2.329572375025095, "grad_norm": 0.24980713427066803, "learning_rate": 4.38832319763513e-05, "loss": 0.3203, "step": 29010 }, { "epoch": 2.3303754266211603, "grad_norm": 0.2476373314857483, "learning_rate": 4.382555173800141e-05, "loss": 0.3423, "step": 29020 }, { "epoch": 2.3311784782172253, "grad_norm": 0.26057079434394836, "learning_rate": 4.376791000130776e-05, "loss": 0.3127, "step": 29030 }, { "epoch": 2.3319815298132904, "grad_norm": 0.2954016327857971, "learning_rate": 4.371030680254855e-05, "loss": 0.3493, "step": 29040 }, { "epoch": 2.3327845814093555, "grad_norm": 0.25318437814712524, "learning_rate": 4.3652742177977815e-05, "loss": 0.3267, "step": 29050 }, { "epoch": 2.3335876330054206, "grad_norm": 0.2292611449956894, "learning_rate": 4.359521616382529e-05, "loss": 0.3013, "step": 29060 }, { "epoch": 2.3343906846014857, "grad_norm": 0.22760048508644104, "learning_rate": 4.3537728796296404e-05, "loss": 0.3246, "step": 29070 }, { "epoch": 2.3351937361975508, "grad_norm": 0.2511257529258728, "learning_rate": 4.348028011157225e-05, "loss": 0.3362, "step": 29080 }, { "epoch": 2.335996787793616, "grad_norm": 0.25829094648361206, "learning_rate": 4.3422870145809544e-05, "loss": 0.3121, "step": 29090 }, { "epoch": 2.336799839389681, "grad_norm": 0.24212509393692017, "learning_rate": 4.336549893514069e-05, "loss": 0.3225, "step": 29100 }, { "epoch": 2.337602890985746, "grad_norm": 0.24397864937782288, "learning_rate": 4.330816651567374e-05, "loss": 0.3065, "step": 29110 }, { "epoch": 2.3384059425818107, "grad_norm": 0.25586172938346863, "learning_rate": 4.325087292349219e-05, "loss": 0.3014, "step": 29120 }, { "epoch": 2.3392089941778758, "grad_norm": 0.2334771752357483, "learning_rate": 4.319361819465523e-05, "loss": 0.3143, "step": 29130 }, { "epoch": 2.340012045773941, "grad_norm": 0.23629896342754364, "learning_rate": 4.3136402365197525e-05, "loss": 0.3369, "step": 29140 }, { "epoch": 2.340815097370006, "grad_norm": 0.24888278543949127, "learning_rate": 4.307922547112933e-05, "loss": 0.3206, "step": 29150 }, { "epoch": 2.341618148966071, "grad_norm": 0.33965003490448, "learning_rate": 4.302208754843625e-05, "loss": 0.3254, "step": 29160 }, { "epoch": 2.342421200562136, "grad_norm": 0.4191114604473114, "learning_rate": 4.2964988633079526e-05, "loss": 0.3344, "step": 29170 }, { "epoch": 2.343224252158201, "grad_norm": 0.2389759123325348, "learning_rate": 4.2907928760995765e-05, "loss": 0.3322, "step": 29180 }, { "epoch": 2.3440273037542663, "grad_norm": 0.265163779258728, "learning_rate": 4.2850907968096974e-05, "loss": 0.3234, "step": 29190 }, { "epoch": 2.3448303553503314, "grad_norm": 0.24226877093315125, "learning_rate": 4.279392629027066e-05, "loss": 0.3145, "step": 29200 }, { "epoch": 2.3456334069463964, "grad_norm": 0.23704278469085693, "learning_rate": 4.273698376337959e-05, "loss": 0.3155, "step": 29210 }, { "epoch": 2.3464364585424615, "grad_norm": 0.25295037031173706, "learning_rate": 4.268008042326203e-05, "loss": 0.3106, "step": 29220 }, { "epoch": 2.347239510138526, "grad_norm": 0.22032150626182556, "learning_rate": 4.262321630573142e-05, "loss": 0.3351, "step": 29230 }, { "epoch": 2.3480425617345917, "grad_norm": 0.24687501788139343, "learning_rate": 4.2566391446576656e-05, "loss": 0.3086, "step": 29240 }, { "epoch": 2.3488456133306563, "grad_norm": 0.2632053792476654, "learning_rate": 4.250960588156188e-05, "loss": 0.3379, "step": 29250 }, { "epoch": 2.3496486649267214, "grad_norm": 0.24788551032543182, "learning_rate": 4.245285964642651e-05, "loss": 0.3209, "step": 29260 }, { "epoch": 2.3504517165227865, "grad_norm": 0.22793512046337128, "learning_rate": 4.2396152776885136e-05, "loss": 0.3305, "step": 29270 }, { "epoch": 2.3512547681188516, "grad_norm": 0.24006962776184082, "learning_rate": 4.233948530862766e-05, "loss": 0.3365, "step": 29280 }, { "epoch": 2.3520578197149167, "grad_norm": 0.2816630005836487, "learning_rate": 4.228285727731917e-05, "loss": 0.3114, "step": 29290 }, { "epoch": 2.3528608713109818, "grad_norm": 0.24521668255329132, "learning_rate": 4.222626871859995e-05, "loss": 0.292, "step": 29300 }, { "epoch": 2.353663922907047, "grad_norm": 0.2618454098701477, "learning_rate": 4.216971966808534e-05, "loss": 0.328, "step": 29310 }, { "epoch": 2.354466974503112, "grad_norm": 0.27309906482696533, "learning_rate": 4.2113210161365945e-05, "loss": 0.287, "step": 29320 }, { "epoch": 2.355270026099177, "grad_norm": 0.27452847361564636, "learning_rate": 4.205674023400742e-05, "loss": 0.3193, "step": 29330 }, { "epoch": 2.356073077695242, "grad_norm": 0.28663888573646545, "learning_rate": 4.2000309921550504e-05, "loss": 0.3261, "step": 29340 }, { "epoch": 2.356876129291307, "grad_norm": 0.28551095724105835, "learning_rate": 4.194391925951099e-05, "loss": 0.319, "step": 29350 }, { "epoch": 2.357679180887372, "grad_norm": 0.23526152968406677, "learning_rate": 4.188756828337975e-05, "loss": 0.3201, "step": 29360 }, { "epoch": 2.358482232483437, "grad_norm": 0.2628491222858429, "learning_rate": 4.1831257028622717e-05, "loss": 0.3121, "step": 29370 }, { "epoch": 2.359285284079502, "grad_norm": 0.2715875804424286, "learning_rate": 4.177498553068069e-05, "loss": 0.3356, "step": 29380 }, { "epoch": 2.360088335675567, "grad_norm": 0.3161601126194, "learning_rate": 4.171875382496959e-05, "loss": 0.3419, "step": 29390 }, { "epoch": 2.360891387271632, "grad_norm": 0.2399444282054901, "learning_rate": 4.166256194688023e-05, "loss": 0.3195, "step": 29400 }, { "epoch": 2.3616944388676973, "grad_norm": 0.1995360106229782, "learning_rate": 4.160640993177839e-05, "loss": 0.3264, "step": 29410 }, { "epoch": 2.3624974904637623, "grad_norm": 0.2531709671020508, "learning_rate": 4.1550297815004666e-05, "loss": 0.3191, "step": 29420 }, { "epoch": 2.3633005420598274, "grad_norm": 0.25284549593925476, "learning_rate": 4.149422563187464e-05, "loss": 0.3442, "step": 29430 }, { "epoch": 2.3641035936558925, "grad_norm": 0.22601163387298584, "learning_rate": 4.1438193417678736e-05, "loss": 0.3143, "step": 29440 }, { "epoch": 2.3649066452519576, "grad_norm": 0.2713486850261688, "learning_rate": 4.138220120768225e-05, "loss": 0.3371, "step": 29450 }, { "epoch": 2.3657096968480227, "grad_norm": 0.26210102438926697, "learning_rate": 4.132624903712523e-05, "loss": 0.3037, "step": 29460 }, { "epoch": 2.3665127484440873, "grad_norm": 0.27984827756881714, "learning_rate": 4.127033694122255e-05, "loss": 0.3205, "step": 29470 }, { "epoch": 2.3673158000401524, "grad_norm": 0.2348852902650833, "learning_rate": 4.121446495516388e-05, "loss": 0.2982, "step": 29480 }, { "epoch": 2.3681188516362175, "grad_norm": 0.2720058262348175, "learning_rate": 4.115863311411369e-05, "loss": 0.332, "step": 29490 }, { "epoch": 2.3689219032322826, "grad_norm": 0.3034224510192871, "learning_rate": 4.110284145321106e-05, "loss": 0.3267, "step": 29500 }, { "epoch": 2.3697249548283477, "grad_norm": 0.24784111976623535, "learning_rate": 4.10470900075699e-05, "loss": 0.3415, "step": 29510 }, { "epoch": 2.3705280064244127, "grad_norm": 0.20390728116035461, "learning_rate": 4.099137881227878e-05, "loss": 0.3434, "step": 29520 }, { "epoch": 2.371331058020478, "grad_norm": 0.29914066195487976, "learning_rate": 4.0935707902400876e-05, "loss": 0.3142, "step": 29530 }, { "epoch": 2.372134109616543, "grad_norm": 0.28621378540992737, "learning_rate": 4.0880077312974065e-05, "loss": 0.3237, "step": 29540 }, { "epoch": 2.372937161212608, "grad_norm": 0.2508004307746887, "learning_rate": 4.082448707901087e-05, "loss": 0.332, "step": 29550 }, { "epoch": 2.373740212808673, "grad_norm": 0.28653115034103394, "learning_rate": 4.0768937235498385e-05, "loss": 0.3306, "step": 29560 }, { "epoch": 2.374543264404738, "grad_norm": 0.22405952215194702, "learning_rate": 4.071342781739823e-05, "loss": 0.3223, "step": 29570 }, { "epoch": 2.375346316000803, "grad_norm": 0.2523614466190338, "learning_rate": 4.065795885964669e-05, "loss": 0.3148, "step": 29580 }, { "epoch": 2.3761493675968683, "grad_norm": 0.22318202257156372, "learning_rate": 4.060253039715449e-05, "loss": 0.3217, "step": 29590 }, { "epoch": 2.376952419192933, "grad_norm": 0.22779522836208344, "learning_rate": 4.0547142464806956e-05, "loss": 0.3096, "step": 29600 }, { "epoch": 2.377755470788998, "grad_norm": 0.23044592142105103, "learning_rate": 4.049179509746378e-05, "loss": 0.3182, "step": 29610 }, { "epoch": 2.378558522385063, "grad_norm": 0.2211126983165741, "learning_rate": 4.043648832995926e-05, "loss": 0.3289, "step": 29620 }, { "epoch": 2.3793615739811282, "grad_norm": 0.24399890005588531, "learning_rate": 4.0381222197102045e-05, "loss": 0.3362, "step": 29630 }, { "epoch": 2.3801646255771933, "grad_norm": 0.2686973214149475, "learning_rate": 4.032599673367531e-05, "loss": 0.3387, "step": 29640 }, { "epoch": 2.3809676771732584, "grad_norm": 0.29335010051727295, "learning_rate": 4.02708119744365e-05, "loss": 0.3172, "step": 29650 }, { "epoch": 2.3817707287693235, "grad_norm": 0.2765568792819977, "learning_rate": 4.0215667954117534e-05, "loss": 0.329, "step": 29660 }, { "epoch": 2.3825737803653886, "grad_norm": 0.2627156972885132, "learning_rate": 4.0160564707424677e-05, "loss": 0.3227, "step": 29670 }, { "epoch": 2.3833768319614537, "grad_norm": 0.2421642243862152, "learning_rate": 4.0105502269038545e-05, "loss": 0.2958, "step": 29680 }, { "epoch": 2.3841798835575188, "grad_norm": 0.2540767788887024, "learning_rate": 4.0050480673614e-05, "loss": 0.3466, "step": 29690 }, { "epoch": 2.384982935153584, "grad_norm": 0.22065496444702148, "learning_rate": 3.99954999557803e-05, "loss": 0.3431, "step": 29700 }, { "epoch": 2.3857859867496485, "grad_norm": 0.2849826514720917, "learning_rate": 3.994056015014087e-05, "loss": 0.3237, "step": 29710 }, { "epoch": 2.3865890383457136, "grad_norm": 0.25796282291412354, "learning_rate": 3.988566129127349e-05, "loss": 0.3176, "step": 29720 }, { "epoch": 2.3873920899417787, "grad_norm": 0.2414199560880661, "learning_rate": 3.983080341373008e-05, "loss": 0.3166, "step": 29730 }, { "epoch": 2.3881951415378437, "grad_norm": 0.2328486144542694, "learning_rate": 3.977598655203679e-05, "loss": 0.3062, "step": 29740 }, { "epoch": 2.388998193133909, "grad_norm": 0.23364661633968353, "learning_rate": 3.972121074069406e-05, "loss": 0.3229, "step": 29750 }, { "epoch": 2.389801244729974, "grad_norm": 0.24551227688789368, "learning_rate": 3.9666476014176314e-05, "loss": 0.2915, "step": 29760 }, { "epoch": 2.390604296326039, "grad_norm": 0.24464410543441772, "learning_rate": 3.961178240693223e-05, "loss": 0.3172, "step": 29770 }, { "epoch": 2.391407347922104, "grad_norm": 0.26060187816619873, "learning_rate": 3.9557129953384595e-05, "loss": 0.3417, "step": 29780 }, { "epoch": 2.392210399518169, "grad_norm": 0.2512723505496979, "learning_rate": 3.950251868793034e-05, "loss": 0.2924, "step": 29790 }, { "epoch": 2.3930134511142342, "grad_norm": 0.25268271565437317, "learning_rate": 3.944794864494032e-05, "loss": 0.3196, "step": 29800 }, { "epoch": 2.3938165027102993, "grad_norm": 0.261360764503479, "learning_rate": 3.9393419858759597e-05, "loss": 0.314, "step": 29810 }, { "epoch": 2.394619554306364, "grad_norm": 0.2896828353404999, "learning_rate": 3.933893236370722e-05, "loss": 0.3331, "step": 29820 }, { "epoch": 2.395422605902429, "grad_norm": 0.2601867616176605, "learning_rate": 3.9284486194076256e-05, "loss": 0.3316, "step": 29830 }, { "epoch": 2.396225657498494, "grad_norm": 0.2671002149581909, "learning_rate": 3.923008138413374e-05, "loss": 0.3166, "step": 29840 }, { "epoch": 2.3970287090945592, "grad_norm": 0.243623748421669, "learning_rate": 3.917571796812065e-05, "loss": 0.3042, "step": 29850 }, { "epoch": 2.3978317606906243, "grad_norm": 0.29407835006713867, "learning_rate": 3.912139598025202e-05, "loss": 0.3214, "step": 29860 }, { "epoch": 2.3986348122866894, "grad_norm": 0.21566203236579895, "learning_rate": 3.906711545471667e-05, "loss": 0.33, "step": 29870 }, { "epoch": 2.3994378638827545, "grad_norm": 0.2771897315979004, "learning_rate": 3.9012876425677426e-05, "loss": 0.3374, "step": 29880 }, { "epoch": 2.4002409154788196, "grad_norm": 0.2321242392063141, "learning_rate": 3.895867892727095e-05, "loss": 0.3167, "step": 29890 }, { "epoch": 2.4010439670748847, "grad_norm": 0.2257077544927597, "learning_rate": 3.890452299360785e-05, "loss": 0.3274, "step": 29900 }, { "epoch": 2.4018470186709497, "grad_norm": 0.24979214370250702, "learning_rate": 3.885040865877242e-05, "loss": 0.3232, "step": 29910 }, { "epoch": 2.402650070267015, "grad_norm": 0.26806342601776123, "learning_rate": 3.879633595682288e-05, "loss": 0.2954, "step": 29920 }, { "epoch": 2.4034531218630795, "grad_norm": 0.25100916624069214, "learning_rate": 3.874230492179126e-05, "loss": 0.321, "step": 29930 }, { "epoch": 2.404256173459145, "grad_norm": 0.28666743636131287, "learning_rate": 3.868831558768333e-05, "loss": 0.3079, "step": 29940 }, { "epoch": 2.4050592250552096, "grad_norm": 0.2349349558353424, "learning_rate": 3.863436798847858e-05, "loss": 0.3277, "step": 29950 }, { "epoch": 2.4058622766512747, "grad_norm": 0.2273445576429367, "learning_rate": 3.85804621581303e-05, "loss": 0.3282, "step": 29960 }, { "epoch": 2.40666532824734, "grad_norm": 0.3047908544540405, "learning_rate": 3.852659813056544e-05, "loss": 0.3433, "step": 29970 }, { "epoch": 2.407468379843405, "grad_norm": 0.25385674834251404, "learning_rate": 3.847277593968469e-05, "loss": 0.3194, "step": 29980 }, { "epoch": 2.40827143143947, "grad_norm": 0.23417766392230988, "learning_rate": 3.841899561936233e-05, "loss": 0.3365, "step": 29990 }, { "epoch": 2.409074483035535, "grad_norm": 0.28284162282943726, "learning_rate": 3.836525720344637e-05, "loss": 0.3308, "step": 30000 } ], "logging_steps": 10, "max_steps": 40000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.9838011659358044e+19, "train_batch_size": 3, "trial_name": null, "trial_params": null }