{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.85, "eval_steps": 500, "global_step": 60000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "grad_norm": 4.447365760803223, "learning_rate": 3.0000000000000004e-07, "loss": 1.3484, "step": 10 }, { "grad_norm": 4.113630771636963, "learning_rate": 6.333333333333333e-07, "loss": 1.3434, "step": 20 }, { "grad_norm": 3.8941195011138916, "learning_rate": 9.666666666666668e-07, "loss": 1.3238, "step": 30 }, { "grad_norm": 2.6234889030456543, "learning_rate": 1.3e-06, "loss": 1.2652, "step": 40 }, { "grad_norm": 1.3288973569869995, "learning_rate": 1.6333333333333333e-06, "loss": 1.1887, "step": 50 }, { "grad_norm": 0.8369770646095276, "learning_rate": 1.9666666666666668e-06, "loss": 1.1305, "step": 60 }, { "grad_norm": 0.4795992970466614, "learning_rate": 2.3e-06, "loss": 1.0973, "step": 70 }, { "grad_norm": 0.2965182065963745, "learning_rate": 2.6333333333333337e-06, "loss": 1.0801, "step": 80 }, { "grad_norm": 0.32636141777038574, "learning_rate": 2.966666666666667e-06, "loss": 1.0785, "step": 90 }, { "grad_norm": 0.3556094169616699, "learning_rate": 3.3e-06, "loss": 1.075, "step": 100 }, { "grad_norm": 0.35541146993637085, "learning_rate": 3.633333333333334e-06, "loss": 1.0656, "step": 110 }, { "grad_norm": 0.3391725420951843, "learning_rate": 3.966666666666667e-06, "loss": 1.0664, "step": 120 }, { "grad_norm": 0.4171597361564636, "learning_rate": 4.2999999999999995e-06, "loss": 1.0477, "step": 130 }, { "grad_norm": 0.3247183859348297, "learning_rate": 4.633333333333334e-06, "loss": 1.0523, "step": 140 }, { "grad_norm": 0.4718841016292572, "learning_rate": 4.966666666666667e-06, "loss": 1.0461, "step": 150 }, { "grad_norm": 0.4053254723548889, "learning_rate": 5.3e-06, "loss": 1.0422, "step": 160 }, { "grad_norm": 0.39676275849342346, "learning_rate": 5.633333333333333e-06, "loss": 1.043, "step": 170 }, { "grad_norm": 0.3670744299888611, "learning_rate": 5.9666666666666666e-06, "loss": 1.0371, "step": 180 }, { "grad_norm": 0.542593777179718, "learning_rate": 6.300000000000001e-06, "loss": 1.0336, "step": 190 }, { "grad_norm": 0.5725336670875549, "learning_rate": 6.633333333333333e-06, "loss": 1.0227, "step": 200 }, { "grad_norm": 0.6448169350624084, "learning_rate": 6.966666666666667e-06, "loss": 1.0158, "step": 210 }, { "grad_norm": 0.5190443992614746, "learning_rate": 7.2999999999999996e-06, "loss": 1.0049, "step": 220 }, { "grad_norm": 0.6504555344581604, "learning_rate": 7.633333333333334e-06, "loss": 0.9838, "step": 230 }, { "grad_norm": 0.8528154492378235, "learning_rate": 7.966666666666666e-06, "loss": 0.9516, "step": 240 }, { "grad_norm": 1.0889426469802856, "learning_rate": 8.3e-06, "loss": 0.8994, "step": 250 }, { "grad_norm": 1.0057774782180786, "learning_rate": 8.633333333333334e-06, "loss": 0.8383, "step": 260 }, { "grad_norm": 1.1228306293487549, "learning_rate": 8.966666666666668e-06, "loss": 0.7645, "step": 270 }, { "grad_norm": 1.5069864988327026, "learning_rate": 9.3e-06, "loss": 0.7127, "step": 280 }, { "grad_norm": 1.491326093673706, "learning_rate": 9.633333333333335e-06, "loss": 0.6578, "step": 290 }, { "grad_norm": 1.7207450866699219, "learning_rate": 9.966666666666667e-06, "loss": 0.6078, "step": 300 }, { "grad_norm": 1.2145884037017822, "learning_rate": 1.03e-05, "loss": 0.5693, "step": 310 }, { "grad_norm": 1.5747157335281372, "learning_rate": 1.0633333333333334e-05, "loss": 0.5098, "step": 320 }, { "grad_norm": 1.5645968914031982, "learning_rate": 1.0966666666666666e-05, "loss": 0.4669, "step": 330 }, { "grad_norm": 1.6364047527313232, "learning_rate": 1.13e-05, "loss": 0.435, "step": 340 }, { "grad_norm": 1.8076460361480713, "learning_rate": 1.1633333333333334e-05, "loss": 0.4071, "step": 350 }, { "grad_norm": 1.8036781549453735, "learning_rate": 1.1966666666666668e-05, "loss": 0.3798, "step": 360 }, { "grad_norm": 1.761277437210083, "learning_rate": 1.23e-05, "loss": 0.3501, "step": 370 }, { "grad_norm": 1.606987476348877, "learning_rate": 1.2633333333333333e-05, "loss": 0.3208, "step": 380 }, { "grad_norm": 1.7726079225540161, "learning_rate": 1.2966666666666669e-05, "loss": 0.3004, "step": 390 }, { "grad_norm": 2.3330554962158203, "learning_rate": 1.3300000000000001e-05, "loss": 0.2878, "step": 400 }, { "grad_norm": 1.6834126710891724, "learning_rate": 1.3633333333333334e-05, "loss": 0.2703, "step": 410 }, { "grad_norm": 1.9400217533111572, "learning_rate": 1.3966666666666666e-05, "loss": 0.2336, "step": 420 }, { "grad_norm": 1.739591360092163, "learning_rate": 1.43e-05, "loss": 0.2212, "step": 430 }, { "grad_norm": 2.0412356853485107, "learning_rate": 1.4633333333333334e-05, "loss": 0.2075, "step": 440 }, { "grad_norm": 2.1918587684631348, "learning_rate": 1.4966666666666668e-05, "loss": 0.2056, "step": 450 }, { "grad_norm": 2.2246336936950684, "learning_rate": 1.53e-05, "loss": 0.1877, "step": 460 }, { "grad_norm": 2.368511199951172, "learning_rate": 1.563333333333333e-05, "loss": 0.187, "step": 470 }, { "grad_norm": 2.1554298400878906, "learning_rate": 1.5966666666666667e-05, "loss": 0.1694, "step": 480 }, { "grad_norm": 2.1426496505737305, "learning_rate": 1.63e-05, "loss": 0.1537, "step": 490 }, { "grad_norm": 2.1911087036132812, "learning_rate": 1.6633333333333336e-05, "loss": 0.147, "step": 500 }, { "grad_norm": 2.10931134223938, "learning_rate": 1.6966666666666668e-05, "loss": 0.1434, "step": 510 }, { "grad_norm": 2.0908308029174805, "learning_rate": 1.73e-05, "loss": 0.1382, "step": 520 }, { "grad_norm": 2.3200180530548096, "learning_rate": 1.7633333333333336e-05, "loss": 0.1346, "step": 530 }, { "grad_norm": 2.0884504318237305, "learning_rate": 1.796666666666667e-05, "loss": 0.1432, "step": 540 }, { "grad_norm": 2.292975425720215, "learning_rate": 1.83e-05, "loss": 0.1266, "step": 550 }, { "grad_norm": 1.8442728519439697, "learning_rate": 1.8633333333333333e-05, "loss": 0.1323, "step": 560 }, { "grad_norm": 2.1122641563415527, "learning_rate": 1.896666666666667e-05, "loss": 0.1322, "step": 570 }, { "grad_norm": 2.50030255317688, "learning_rate": 1.93e-05, "loss": 0.1261, "step": 580 }, { "grad_norm": 2.5016114711761475, "learning_rate": 1.9633333333333334e-05, "loss": 0.1184, "step": 590 }, { "grad_norm": 2.4425137042999268, "learning_rate": 1.9966666666666666e-05, "loss": 0.1247, "step": 600 }, { "grad_norm": 1.9957345724105835, "learning_rate": 2.0300000000000002e-05, "loss": 0.1137, "step": 610 }, { "grad_norm": 2.0271806716918945, "learning_rate": 2.0633333333333335e-05, "loss": 0.1167, "step": 620 }, { "grad_norm": 2.0384092330932617, "learning_rate": 2.0966666666666667e-05, "loss": 0.1129, "step": 630 }, { "grad_norm": 2.214675188064575, "learning_rate": 2.13e-05, "loss": 0.1092, "step": 640 }, { "grad_norm": 2.3753225803375244, "learning_rate": 2.1633333333333332e-05, "loss": 0.1095, "step": 650 }, { "grad_norm": 1.9375733137130737, "learning_rate": 2.1966666666666668e-05, "loss": 0.1152, "step": 660 }, { "grad_norm": 1.9821141958236694, "learning_rate": 2.23e-05, "loss": 0.1122, "step": 670 }, { "grad_norm": 1.9919532537460327, "learning_rate": 2.2633333333333336e-05, "loss": 0.1044, "step": 680 }, { "grad_norm": 2.376053810119629, "learning_rate": 2.2966666666666668e-05, "loss": 0.1048, "step": 690 }, { "grad_norm": 1.8856782913208008, "learning_rate": 2.3300000000000004e-05, "loss": 0.1028, "step": 700 }, { "grad_norm": 2.3740177154541016, "learning_rate": 2.3633333333333336e-05, "loss": 0.1176, "step": 710 }, { "grad_norm": 2.156780958175659, "learning_rate": 2.396666666666667e-05, "loss": 0.1048, "step": 720 }, { "grad_norm": 2.4900104999542236, "learning_rate": 2.43e-05, "loss": 0.0942, "step": 730 }, { "grad_norm": 2.1864829063415527, "learning_rate": 2.4633333333333334e-05, "loss": 0.1058, "step": 740 }, { "grad_norm": 2.5174381732940674, "learning_rate": 2.496666666666667e-05, "loss": 0.1036, "step": 750 }, { "grad_norm": 2.1998066902160645, "learning_rate": 2.5300000000000002e-05, "loss": 0.1084, "step": 760 }, { "grad_norm": 2.03684663772583, "learning_rate": 2.5633333333333338e-05, "loss": 0.1058, "step": 770 }, { "grad_norm": 2.7059671878814697, "learning_rate": 2.5966666666666667e-05, "loss": 0.1025, "step": 780 }, { "grad_norm": 2.069469928741455, "learning_rate": 2.6300000000000002e-05, "loss": 0.1068, "step": 790 }, { "grad_norm": 1.898769497871399, "learning_rate": 2.663333333333333e-05, "loss": 0.102, "step": 800 }, { "grad_norm": 1.8516987562179565, "learning_rate": 2.6966666666666667e-05, "loss": 0.0965, "step": 810 }, { "grad_norm": 1.918587565422058, "learning_rate": 2.7300000000000003e-05, "loss": 0.0971, "step": 820 }, { "grad_norm": 2.0393455028533936, "learning_rate": 2.7633333333333332e-05, "loss": 0.0989, "step": 830 }, { "grad_norm": 1.5016096830368042, "learning_rate": 2.7966666666666668e-05, "loss": 0.0954, "step": 840 }, { "grad_norm": 1.993768572807312, "learning_rate": 2.83e-05, "loss": 0.0997, "step": 850 }, { "grad_norm": 2.1159729957580566, "learning_rate": 2.8633333333333336e-05, "loss": 0.1051, "step": 860 }, { "grad_norm": 1.8566912412643433, "learning_rate": 2.8966666666666668e-05, "loss": 0.0952, "step": 870 }, { "grad_norm": 2.3972713947296143, "learning_rate": 2.93e-05, "loss": 0.0976, "step": 880 }, { "grad_norm": 1.9326415061950684, "learning_rate": 2.9633333333333336e-05, "loss": 0.0942, "step": 890 }, { "grad_norm": 1.6511321067810059, "learning_rate": 2.9966666666666672e-05, "loss": 0.0877, "step": 900 }, { "grad_norm": 1.6019006967544556, "learning_rate": 3.03e-05, "loss": 0.0835, "step": 910 }, { "grad_norm": 2.0079538822174072, "learning_rate": 3.063333333333334e-05, "loss": 0.0883, "step": 920 }, { "grad_norm": 1.8352253437042236, "learning_rate": 3.096666666666666e-05, "loss": 0.0894, "step": 930 }, { "grad_norm": 1.7566516399383545, "learning_rate": 3.13e-05, "loss": 0.0823, "step": 940 }, { "grad_norm": 1.7244350910186768, "learning_rate": 3.1633333333333334e-05, "loss": 0.0904, "step": 950 }, { "grad_norm": 1.6203480958938599, "learning_rate": 3.196666666666667e-05, "loss": 0.0886, "step": 960 }, { "grad_norm": 2.22615122795105, "learning_rate": 3.2300000000000006e-05, "loss": 0.0892, "step": 970 }, { "grad_norm": 2.079116106033325, "learning_rate": 3.263333333333333e-05, "loss": 0.0886, "step": 980 }, { "grad_norm": 1.7760223150253296, "learning_rate": 3.296666666666667e-05, "loss": 0.0894, "step": 990 }, { "grad_norm": 1.8316891193389893, "learning_rate": 3.33e-05, "loss": 0.0936, "step": 1000 }, { "grad_norm": 1.8911479711532593, "learning_rate": 3.3633333333333335e-05, "loss": 0.0873, "step": 1010 }, { "grad_norm": 2.0155932903289795, "learning_rate": 3.396666666666667e-05, "loss": 0.0896, "step": 1020 }, { "grad_norm": 1.7159051895141602, "learning_rate": 3.430000000000001e-05, "loss": 0.0939, "step": 1030 }, { "grad_norm": 1.8582090139389038, "learning_rate": 3.463333333333333e-05, "loss": 0.087, "step": 1040 }, { "grad_norm": 1.7398709058761597, "learning_rate": 3.496666666666667e-05, "loss": 0.0805, "step": 1050 }, { "grad_norm": 1.660385251045227, "learning_rate": 3.53e-05, "loss": 0.0823, "step": 1060 }, { "grad_norm": 1.6338180303573608, "learning_rate": 3.563333333333334e-05, "loss": 0.0803, "step": 1070 }, { "grad_norm": 1.898735523223877, "learning_rate": 3.596666666666667e-05, "loss": 0.0771, "step": 1080 }, { "grad_norm": 1.6656705141067505, "learning_rate": 3.63e-05, "loss": 0.0797, "step": 1090 }, { "grad_norm": 2.257603883743286, "learning_rate": 3.6633333333333334e-05, "loss": 0.085, "step": 1100 }, { "grad_norm": 1.6739178895950317, "learning_rate": 3.6966666666666666e-05, "loss": 0.0746, "step": 1110 }, { "grad_norm": 1.684378981590271, "learning_rate": 3.73e-05, "loss": 0.0842, "step": 1120 }, { "grad_norm": 1.809085488319397, "learning_rate": 3.763333333333334e-05, "loss": 0.0831, "step": 1130 }, { "grad_norm": 1.951213002204895, "learning_rate": 3.796666666666667e-05, "loss": 0.0807, "step": 1140 }, { "grad_norm": 1.5780582427978516, "learning_rate": 3.83e-05, "loss": 0.086, "step": 1150 }, { "grad_norm": 1.7449209690093994, "learning_rate": 3.8633333333333335e-05, "loss": 0.0762, "step": 1160 }, { "grad_norm": 1.9996048212051392, "learning_rate": 3.896666666666667e-05, "loss": 0.0846, "step": 1170 }, { "grad_norm": 1.4134843349456787, "learning_rate": 3.9300000000000007e-05, "loss": 0.0837, "step": 1180 }, { "grad_norm": 1.4462107419967651, "learning_rate": 3.963333333333333e-05, "loss": 0.0787, "step": 1190 }, { "grad_norm": 1.7750245332717896, "learning_rate": 3.996666666666667e-05, "loss": 0.0836, "step": 1200 }, { "grad_norm": 1.5897971391677856, "learning_rate": 4.0300000000000004e-05, "loss": 0.0752, "step": 1210 }, { "grad_norm": 1.4849587678909302, "learning_rate": 4.0633333333333336e-05, "loss": 0.0714, "step": 1220 }, { "grad_norm": 1.1623440980911255, "learning_rate": 4.096666666666667e-05, "loss": 0.0743, "step": 1230 }, { "grad_norm": 1.7267963886260986, "learning_rate": 4.13e-05, "loss": 0.084, "step": 1240 }, { "grad_norm": 1.3627574443817139, "learning_rate": 4.1633333333333333e-05, "loss": 0.0821, "step": 1250 }, { "grad_norm": 1.644944429397583, "learning_rate": 4.196666666666667e-05, "loss": 0.0707, "step": 1260 }, { "grad_norm": 1.4931832551956177, "learning_rate": 4.23e-05, "loss": 0.085, "step": 1270 }, { "grad_norm": 1.327480435371399, "learning_rate": 4.263333333333334e-05, "loss": 0.0788, "step": 1280 }, { "grad_norm": 1.8330137729644775, "learning_rate": 4.296666666666666e-05, "loss": 0.0811, "step": 1290 }, { "grad_norm": 1.6781295537948608, "learning_rate": 4.33e-05, "loss": 0.0784, "step": 1300 }, { "grad_norm": 1.6285682916641235, "learning_rate": 4.3633333333333335e-05, "loss": 0.0698, "step": 1310 }, { "grad_norm": 1.512878179550171, "learning_rate": 4.396666666666667e-05, "loss": 0.0841, "step": 1320 }, { "grad_norm": 1.525999903678894, "learning_rate": 4.43e-05, "loss": 0.0721, "step": 1330 }, { "grad_norm": 1.5048669576644897, "learning_rate": 4.463333333333334e-05, "loss": 0.0683, "step": 1340 }, { "grad_norm": 1.3727238178253174, "learning_rate": 4.496666666666667e-05, "loss": 0.0772, "step": 1350 }, { "grad_norm": 1.4466592073440552, "learning_rate": 4.53e-05, "loss": 0.0815, "step": 1360 }, { "grad_norm": 1.4270600080490112, "learning_rate": 4.5633333333333336e-05, "loss": 0.0721, "step": 1370 }, { "grad_norm": 1.2492244243621826, "learning_rate": 4.596666666666667e-05, "loss": 0.0702, "step": 1380 }, { "grad_norm": 1.3557374477386475, "learning_rate": 4.630000000000001e-05, "loss": 0.0668, "step": 1390 }, { "grad_norm": 1.397212266921997, "learning_rate": 4.663333333333333e-05, "loss": 0.0652, "step": 1400 }, { "grad_norm": 1.514648675918579, "learning_rate": 4.696666666666667e-05, "loss": 0.0676, "step": 1410 }, { "grad_norm": 1.2394548654556274, "learning_rate": 4.73e-05, "loss": 0.0707, "step": 1420 }, { "grad_norm": 1.5269253253936768, "learning_rate": 4.763333333333334e-05, "loss": 0.0804, "step": 1430 }, { "grad_norm": 1.300212025642395, "learning_rate": 4.796666666666667e-05, "loss": 0.0718, "step": 1440 }, { "grad_norm": 1.1953885555267334, "learning_rate": 4.83e-05, "loss": 0.0677, "step": 1450 }, { "grad_norm": 1.2278605699539185, "learning_rate": 4.8633333333333334e-05, "loss": 0.0732, "step": 1460 }, { "grad_norm": 1.4869178533554077, "learning_rate": 4.8966666666666667e-05, "loss": 0.0659, "step": 1470 }, { "grad_norm": 1.6627095937728882, "learning_rate": 4.93e-05, "loss": 0.0665, "step": 1480 }, { "grad_norm": 1.6575415134429932, "learning_rate": 4.963333333333334e-05, "loss": 0.0716, "step": 1490 }, { "grad_norm": 1.3267849683761597, "learning_rate": 4.996666666666667e-05, "loss": 0.0676, "step": 1500 }, { "grad_norm": 1.5192252397537231, "learning_rate": 5.03e-05, "loss": 0.0719, "step": 1510 }, { "grad_norm": 1.2676864862442017, "learning_rate": 5.0633333333333335e-05, "loss": 0.0687, "step": 1520 }, { "grad_norm": 1.3717687129974365, "learning_rate": 5.0966666666666674e-05, "loss": 0.0649, "step": 1530 }, { "grad_norm": 1.2322230339050293, "learning_rate": 5.130000000000001e-05, "loss": 0.0697, "step": 1540 }, { "grad_norm": 1.248760461807251, "learning_rate": 5.163333333333333e-05, "loss": 0.0665, "step": 1550 }, { "grad_norm": 1.2173956632614136, "learning_rate": 5.196666666666667e-05, "loss": 0.0676, "step": 1560 }, { "grad_norm": 1.379817008972168, "learning_rate": 5.2300000000000004e-05, "loss": 0.0686, "step": 1570 }, { "grad_norm": 1.5441510677337646, "learning_rate": 5.2633333333333336e-05, "loss": 0.0648, "step": 1580 }, { "grad_norm": 1.224876880645752, "learning_rate": 5.296666666666666e-05, "loss": 0.0643, "step": 1590 }, { "grad_norm": 1.2213428020477295, "learning_rate": 5.330000000000001e-05, "loss": 0.0721, "step": 1600 }, { "grad_norm": 1.4989992380142212, "learning_rate": 5.3633333333333334e-05, "loss": 0.0666, "step": 1610 }, { "grad_norm": 1.3955914974212646, "learning_rate": 5.3966666666666666e-05, "loss": 0.0659, "step": 1620 }, { "grad_norm": 1.2911821603775024, "learning_rate": 5.4300000000000005e-05, "loss": 0.0669, "step": 1630 }, { "grad_norm": 1.4699312448501587, "learning_rate": 5.463333333333334e-05, "loss": 0.0681, "step": 1640 }, { "grad_norm": 1.3759241104125977, "learning_rate": 5.496666666666666e-05, "loss": 0.0634, "step": 1650 }, { "grad_norm": 1.4657407999038696, "learning_rate": 5.530000000000001e-05, "loss": 0.063, "step": 1660 }, { "grad_norm": 1.4505670070648193, "learning_rate": 5.5633333333333335e-05, "loss": 0.0694, "step": 1670 }, { "grad_norm": 1.1750441789627075, "learning_rate": 5.596666666666667e-05, "loss": 0.0645, "step": 1680 }, { "grad_norm": 1.2160885334014893, "learning_rate": 5.63e-05, "loss": 0.0613, "step": 1690 }, { "grad_norm": 1.3564035892486572, "learning_rate": 5.663333333333334e-05, "loss": 0.073, "step": 1700 }, { "grad_norm": 1.61124849319458, "learning_rate": 5.696666666666667e-05, "loss": 0.0643, "step": 1710 }, { "grad_norm": 1.247754693031311, "learning_rate": 5.73e-05, "loss": 0.0692, "step": 1720 }, { "grad_norm": 1.3943097591400146, "learning_rate": 5.7633333333333336e-05, "loss": 0.0687, "step": 1730 }, { "grad_norm": 0.9708131551742554, "learning_rate": 5.796666666666667e-05, "loss": 0.0641, "step": 1740 }, { "grad_norm": 1.123645544052124, "learning_rate": 5.83e-05, "loss": 0.0604, "step": 1750 }, { "grad_norm": 1.007624626159668, "learning_rate": 5.863333333333334e-05, "loss": 0.0587, "step": 1760 }, { "grad_norm": 1.0768353939056396, "learning_rate": 5.896666666666667e-05, "loss": 0.0704, "step": 1770 }, { "grad_norm": 1.0121560096740723, "learning_rate": 5.93e-05, "loss": 0.0667, "step": 1780 }, { "grad_norm": 1.04828679561615, "learning_rate": 5.9633333333333344e-05, "loss": 0.0649, "step": 1790 }, { "grad_norm": 1.2127963304519653, "learning_rate": 5.996666666666667e-05, "loss": 0.0674, "step": 1800 }, { "grad_norm": 1.0731089115142822, "learning_rate": 6.03e-05, "loss": 0.0611, "step": 1810 }, { "grad_norm": 1.4043693542480469, "learning_rate": 6.063333333333333e-05, "loss": 0.0637, "step": 1820 }, { "grad_norm": 1.1265013217926025, "learning_rate": 6.0966666666666674e-05, "loss": 0.0623, "step": 1830 }, { "grad_norm": 0.9502959847450256, "learning_rate": 6.13e-05, "loss": 0.0603, "step": 1840 }, { "grad_norm": 1.1003267765045166, "learning_rate": 6.163333333333333e-05, "loss": 0.0608, "step": 1850 }, { "grad_norm": 1.1360594034194946, "learning_rate": 6.196666666666668e-05, "loss": 0.0576, "step": 1860 }, { "grad_norm": 1.1677144765853882, "learning_rate": 6.23e-05, "loss": 0.0589, "step": 1870 }, { "grad_norm": 1.2256553173065186, "learning_rate": 6.263333333333333e-05, "loss": 0.0589, "step": 1880 }, { "grad_norm": 1.42429518699646, "learning_rate": 6.296666666666667e-05, "loss": 0.0573, "step": 1890 }, { "grad_norm": 1.1541980504989624, "learning_rate": 6.330000000000001e-05, "loss": 0.0656, "step": 1900 }, { "grad_norm": 1.4295943975448608, "learning_rate": 6.363333333333334e-05, "loss": 0.065, "step": 1910 }, { "grad_norm": 1.0897581577301025, "learning_rate": 6.396666666666667e-05, "loss": 0.0579, "step": 1920 }, { "grad_norm": 1.1042475700378418, "learning_rate": 6.43e-05, "loss": 0.0593, "step": 1930 }, { "grad_norm": 0.8818796277046204, "learning_rate": 6.463333333333334e-05, "loss": 0.0703, "step": 1940 }, { "grad_norm": 1.3052315711975098, "learning_rate": 6.496666666666667e-05, "loss": 0.0585, "step": 1950 }, { "grad_norm": 1.0372258424758911, "learning_rate": 6.53e-05, "loss": 0.0589, "step": 1960 }, { "grad_norm": 1.0483773946762085, "learning_rate": 6.563333333333333e-05, "loss": 0.0598, "step": 1970 }, { "grad_norm": 1.1204289197921753, "learning_rate": 6.596666666666667e-05, "loss": 0.0634, "step": 1980 }, { "grad_norm": 1.2750037908554077, "learning_rate": 6.630000000000001e-05, "loss": 0.0561, "step": 1990 }, { "grad_norm": 1.0252948999404907, "learning_rate": 6.663333333333333e-05, "loss": 0.0576, "step": 2000 }, { "grad_norm": 1.13008713722229, "learning_rate": 6.696666666666666e-05, "loss": 0.0593, "step": 2010 }, { "grad_norm": 0.8467022776603699, "learning_rate": 6.730000000000001e-05, "loss": 0.0589, "step": 2020 }, { "grad_norm": 1.1267976760864258, "learning_rate": 6.763333333333334e-05, "loss": 0.0542, "step": 2030 }, { "grad_norm": 1.2363266944885254, "learning_rate": 6.796666666666666e-05, "loss": 0.0604, "step": 2040 }, { "grad_norm": 0.9148569703102112, "learning_rate": 6.83e-05, "loss": 0.0618, "step": 2050 }, { "grad_norm": 1.1480687856674194, "learning_rate": 6.863333333333334e-05, "loss": 0.0583, "step": 2060 }, { "grad_norm": 1.2516300678253174, "learning_rate": 6.896666666666667e-05, "loss": 0.0597, "step": 2070 }, { "grad_norm": 1.0247350931167603, "learning_rate": 6.93e-05, "loss": 0.0614, "step": 2080 }, { "grad_norm": 0.9880638122558594, "learning_rate": 6.963333333333334e-05, "loss": 0.0626, "step": 2090 }, { "grad_norm": 1.1071597337722778, "learning_rate": 6.996666666666667e-05, "loss": 0.0567, "step": 2100 }, { "grad_norm": 1.1150048971176147, "learning_rate": 7.03e-05, "loss": 0.0572, "step": 2110 }, { "grad_norm": 0.9690060615539551, "learning_rate": 7.063333333333333e-05, "loss": 0.057, "step": 2120 }, { "grad_norm": 1.1471099853515625, "learning_rate": 7.096666666666667e-05, "loss": 0.0619, "step": 2130 }, { "grad_norm": 0.8427419662475586, "learning_rate": 7.13e-05, "loss": 0.0607, "step": 2140 }, { "grad_norm": 0.9163537621498108, "learning_rate": 7.163333333333334e-05, "loss": 0.0556, "step": 2150 }, { "grad_norm": 0.9024648070335388, "learning_rate": 7.196666666666668e-05, "loss": 0.0492, "step": 2160 }, { "grad_norm": 0.9732857346534729, "learning_rate": 7.23e-05, "loss": 0.0583, "step": 2170 }, { "grad_norm": 1.0352048873901367, "learning_rate": 7.263333333333334e-05, "loss": 0.0569, "step": 2180 }, { "grad_norm": 0.9434832334518433, "learning_rate": 7.296666666666667e-05, "loss": 0.0593, "step": 2190 }, { "grad_norm": 1.045331597328186, "learning_rate": 7.33e-05, "loss": 0.0536, "step": 2200 }, { "grad_norm": 1.0376135110855103, "learning_rate": 7.363333333333334e-05, "loss": 0.0538, "step": 2210 }, { "grad_norm": 0.9841770529747009, "learning_rate": 7.396666666666667e-05, "loss": 0.0554, "step": 2220 }, { "grad_norm": 0.9631847739219666, "learning_rate": 7.43e-05, "loss": 0.0547, "step": 2230 }, { "grad_norm": 1.181673526763916, "learning_rate": 7.463333333333334e-05, "loss": 0.0556, "step": 2240 }, { "grad_norm": 1.006360411643982, "learning_rate": 7.496666666666667e-05, "loss": 0.0609, "step": 2250 }, { "grad_norm": 1.2000733613967896, "learning_rate": 7.53e-05, "loss": 0.0627, "step": 2260 }, { "grad_norm": 0.965882420539856, "learning_rate": 7.563333333333333e-05, "loss": 0.0627, "step": 2270 }, { "grad_norm": 0.6869267821311951, "learning_rate": 7.596666666666668e-05, "loss": 0.0553, "step": 2280 }, { "grad_norm": 0.8393562436103821, "learning_rate": 7.630000000000001e-05, "loss": 0.0528, "step": 2290 }, { "grad_norm": 1.001491665840149, "learning_rate": 7.663333333333333e-05, "loss": 0.0559, "step": 2300 }, { "grad_norm": 0.9473748803138733, "learning_rate": 7.696666666666668e-05, "loss": 0.0555, "step": 2310 }, { "grad_norm": 0.9534969925880432, "learning_rate": 7.730000000000001e-05, "loss": 0.054, "step": 2320 }, { "grad_norm": 0.9704079627990723, "learning_rate": 7.763333333333334e-05, "loss": 0.0593, "step": 2330 }, { "grad_norm": 0.875954806804657, "learning_rate": 7.796666666666666e-05, "loss": 0.0575, "step": 2340 }, { "grad_norm": 0.9904036521911621, "learning_rate": 7.83e-05, "loss": 0.0561, "step": 2350 }, { "grad_norm": 0.8696838021278381, "learning_rate": 7.863333333333334e-05, "loss": 0.0603, "step": 2360 }, { "grad_norm": 0.8562178611755371, "learning_rate": 7.896666666666667e-05, "loss": 0.0576, "step": 2370 }, { "grad_norm": 1.0243654251098633, "learning_rate": 7.93e-05, "loss": 0.0552, "step": 2380 }, { "grad_norm": 0.7713583111763, "learning_rate": 7.963333333333334e-05, "loss": 0.0552, "step": 2390 }, { "grad_norm": 0.8817041516304016, "learning_rate": 7.996666666666667e-05, "loss": 0.0546, "step": 2400 }, { "grad_norm": 0.9232308268547058, "learning_rate": 8.030000000000001e-05, "loss": 0.0494, "step": 2410 }, { "grad_norm": 1.128440260887146, "learning_rate": 8.063333333333333e-05, "loss": 0.0532, "step": 2420 }, { "grad_norm": 0.7509755492210388, "learning_rate": 8.096666666666667e-05, "loss": 0.0525, "step": 2430 }, { "grad_norm": 0.9969519376754761, "learning_rate": 8.13e-05, "loss": 0.051, "step": 2440 }, { "grad_norm": 0.7783822417259216, "learning_rate": 8.163333333333334e-05, "loss": 0.0519, "step": 2450 }, { "grad_norm": 1.017827033996582, "learning_rate": 8.196666666666668e-05, "loss": 0.0562, "step": 2460 }, { "grad_norm": 0.8367951512336731, "learning_rate": 8.23e-05, "loss": 0.047, "step": 2470 }, { "grad_norm": 0.8985480070114136, "learning_rate": 8.263333333333334e-05, "loss": 0.0575, "step": 2480 }, { "grad_norm": 0.9090971946716309, "learning_rate": 8.296666666666667e-05, "loss": 0.0517, "step": 2490 }, { "grad_norm": 0.7974523901939392, "learning_rate": 8.33e-05, "loss": 0.0523, "step": 2500 }, { "grad_norm": 0.9519146680831909, "learning_rate": 8.363333333333334e-05, "loss": 0.0542, "step": 2510 }, { "grad_norm": 0.7397735118865967, "learning_rate": 8.396666666666667e-05, "loss": 0.0515, "step": 2520 }, { "grad_norm": 0.9653218984603882, "learning_rate": 8.43e-05, "loss": 0.0488, "step": 2530 }, { "grad_norm": 0.673168957233429, "learning_rate": 8.463333333333335e-05, "loss": 0.0474, "step": 2540 }, { "grad_norm": 0.7991240620613098, "learning_rate": 8.496666666666667e-05, "loss": 0.0538, "step": 2550 }, { "grad_norm": 0.8494511842727661, "learning_rate": 8.53e-05, "loss": 0.0466, "step": 2560 }, { "grad_norm": 0.9272388815879822, "learning_rate": 8.563333333333333e-05, "loss": 0.0473, "step": 2570 }, { "grad_norm": 0.8365880846977234, "learning_rate": 8.596666666666668e-05, "loss": 0.0538, "step": 2580 }, { "grad_norm": 0.7655223608016968, "learning_rate": 8.63e-05, "loss": 0.0499, "step": 2590 }, { "grad_norm": 0.6641157269477844, "learning_rate": 8.663333333333333e-05, "loss": 0.0503, "step": 2600 }, { "grad_norm": 0.8131173849105835, "learning_rate": 8.696666666666668e-05, "loss": 0.0503, "step": 2610 }, { "grad_norm": 0.916050374507904, "learning_rate": 8.730000000000001e-05, "loss": 0.0476, "step": 2620 }, { "grad_norm": 0.7481029629707336, "learning_rate": 8.763333333333334e-05, "loss": 0.0505, "step": 2630 }, { "grad_norm": 0.817760705947876, "learning_rate": 8.796666666666667e-05, "loss": 0.0494, "step": 2640 }, { "grad_norm": 0.8094144463539124, "learning_rate": 8.83e-05, "loss": 0.0501, "step": 2650 }, { "grad_norm": 0.9813537001609802, "learning_rate": 8.863333333333334e-05, "loss": 0.0477, "step": 2660 }, { "grad_norm": 0.6867929697036743, "learning_rate": 8.896666666666667e-05, "loss": 0.0512, "step": 2670 }, { "grad_norm": 0.9549563527107239, "learning_rate": 8.93e-05, "loss": 0.0481, "step": 2680 }, { "grad_norm": 0.7506797909736633, "learning_rate": 8.963333333333333e-05, "loss": 0.0494, "step": 2690 }, { "grad_norm": 0.8433989882469177, "learning_rate": 8.996666666666667e-05, "loss": 0.049, "step": 2700 }, { "grad_norm": 0.8604899644851685, "learning_rate": 9.030000000000001e-05, "loss": 0.0532, "step": 2710 }, { "grad_norm": 0.7850534915924072, "learning_rate": 9.063333333333333e-05, "loss": 0.0455, "step": 2720 }, { "grad_norm": 0.7254740595817566, "learning_rate": 9.096666666666666e-05, "loss": 0.0494, "step": 2730 }, { "grad_norm": 0.7519999146461487, "learning_rate": 9.130000000000001e-05, "loss": 0.0493, "step": 2740 }, { "grad_norm": 0.8911005258560181, "learning_rate": 9.163333333333334e-05, "loss": 0.0487, "step": 2750 }, { "grad_norm": 0.896418571472168, "learning_rate": 9.196666666666666e-05, "loss": 0.0514, "step": 2760 }, { "grad_norm": 0.7974010109901428, "learning_rate": 9.230000000000001e-05, "loss": 0.0445, "step": 2770 }, { "grad_norm": 0.6575005054473877, "learning_rate": 9.263333333333334e-05, "loss": 0.0492, "step": 2780 }, { "grad_norm": 0.7433968782424927, "learning_rate": 9.296666666666667e-05, "loss": 0.0504, "step": 2790 }, { "grad_norm": 0.7195668816566467, "learning_rate": 9.33e-05, "loss": 0.0458, "step": 2800 }, { "grad_norm": 0.8323925733566284, "learning_rate": 9.363333333333334e-05, "loss": 0.0436, "step": 2810 }, { "grad_norm": 0.7287455797195435, "learning_rate": 9.396666666666667e-05, "loss": 0.0429, "step": 2820 }, { "grad_norm": 0.8642809987068176, "learning_rate": 9.43e-05, "loss": 0.0489, "step": 2830 }, { "grad_norm": 0.8033154606819153, "learning_rate": 9.463333333333333e-05, "loss": 0.0501, "step": 2840 }, { "grad_norm": 0.67922443151474, "learning_rate": 9.496666666666667e-05, "loss": 0.0468, "step": 2850 }, { "grad_norm": 0.7234774231910706, "learning_rate": 9.53e-05, "loss": 0.0449, "step": 2860 }, { "grad_norm": 0.7744899392127991, "learning_rate": 9.563333333333334e-05, "loss": 0.0439, "step": 2870 }, { "grad_norm": 0.8154613375663757, "learning_rate": 9.596666666666668e-05, "loss": 0.044, "step": 2880 }, { "grad_norm": 0.7204713821411133, "learning_rate": 9.63e-05, "loss": 0.0428, "step": 2890 }, { "grad_norm": 0.8785443305969238, "learning_rate": 9.663333333333334e-05, "loss": 0.0428, "step": 2900 }, { "grad_norm": 0.849808931350708, "learning_rate": 9.696666666666667e-05, "loss": 0.0449, "step": 2910 }, { "grad_norm": 0.8286228775978088, "learning_rate": 9.730000000000001e-05, "loss": 0.0433, "step": 2920 }, { "grad_norm": 0.6806231737136841, "learning_rate": 9.763333333333334e-05, "loss": 0.0496, "step": 2930 }, { "grad_norm": 0.6595783233642578, "learning_rate": 9.796666666666667e-05, "loss": 0.0501, "step": 2940 }, { "grad_norm": 0.8414114117622375, "learning_rate": 9.83e-05, "loss": 0.0576, "step": 2950 }, { "grad_norm": 0.7837775349617004, "learning_rate": 9.863333333333334e-05, "loss": 0.0589, "step": 2960 }, { "grad_norm": 0.830162525177002, "learning_rate": 9.896666666666667e-05, "loss": 0.0487, "step": 2970 }, { "grad_norm": 0.6498530507087708, "learning_rate": 9.93e-05, "loss": 0.0458, "step": 2980 }, { "grad_norm": 0.624728798866272, "learning_rate": 9.963333333333333e-05, "loss": 0.0443, "step": 2990 }, { "grad_norm": 0.878516435623169, "learning_rate": 9.996666666666668e-05, "loss": 0.0484, "step": 3000 }, { "grad_norm": 0.7892113924026489, "learning_rate": 9.999999384858465e-05, "loss": 0.0531, "step": 3010 }, { "grad_norm": 0.8021721243858337, "learning_rate": 9.999997258443473e-05, "loss": 0.0465, "step": 3020 }, { "grad_norm": 0.7913960218429565, "learning_rate": 9.999993613161331e-05, "loss": 0.0446, "step": 3030 }, { "grad_norm": 0.6336753964424133, "learning_rate": 9.999988449013146e-05, "loss": 0.042, "step": 3040 }, { "grad_norm": 0.683625340461731, "learning_rate": 9.99998176600049e-05, "loss": 0.0439, "step": 3050 }, { "grad_norm": 0.7004140019416809, "learning_rate": 9.999973564125389e-05, "loss": 0.0442, "step": 3060 }, { "grad_norm": 0.8021495342254639, "learning_rate": 9.999963843390335e-05, "loss": 0.0466, "step": 3070 }, { "grad_norm": 0.6171932816505432, "learning_rate": 9.999952603798282e-05, "loss": 0.0455, "step": 3080 }, { "grad_norm": 0.7242380380630493, "learning_rate": 9.999939845352646e-05, "loss": 0.0442, "step": 3090 }, { "grad_norm": 0.7642882466316223, "learning_rate": 9.999925568057298e-05, "loss": 0.0478, "step": 3100 }, { "grad_norm": 0.8305861949920654, "learning_rate": 9.999909771916578e-05, "loss": 0.0511, "step": 3110 }, { "grad_norm": 0.616356372833252, "learning_rate": 9.999892456935285e-05, "loss": 0.0431, "step": 3120 }, { "grad_norm": 0.7295675277709961, "learning_rate": 9.999873623118679e-05, "loss": 0.0532, "step": 3130 }, { "grad_norm": 0.7463228702545166, "learning_rate": 9.999853270472479e-05, "loss": 0.0433, "step": 3140 }, { "grad_norm": 0.7920631766319275, "learning_rate": 9.999831399002871e-05, "loss": 0.0467, "step": 3150 }, { "grad_norm": 0.7888407707214355, "learning_rate": 9.999808008716494e-05, "loss": 0.0403, "step": 3160 }, { "grad_norm": 0.7514359951019287, "learning_rate": 9.999783099620459e-05, "loss": 0.0434, "step": 3170 }, { "grad_norm": 0.6265234351158142, "learning_rate": 9.999756671722328e-05, "loss": 0.0438, "step": 3180 }, { "grad_norm": 0.7002662420272827, "learning_rate": 9.99972872503013e-05, "loss": 0.0444, "step": 3190 }, { "grad_norm": 0.770563006401062, "learning_rate": 9.999699259552359e-05, "loss": 0.0407, "step": 3200 }, { "grad_norm": 0.8374990820884705, "learning_rate": 9.99966827529796e-05, "loss": 0.0393, "step": 3210 }, { "grad_norm": 0.8018321394920349, "learning_rate": 9.999635772276348e-05, "loss": 0.0442, "step": 3220 }, { "grad_norm": 0.5608826279640198, "learning_rate": 9.999601750497396e-05, "loss": 0.0481, "step": 3230 }, { "grad_norm": 0.6996321678161621, "learning_rate": 9.99956620997144e-05, "loss": 0.0416, "step": 3240 }, { "grad_norm": 0.7144750952720642, "learning_rate": 9.999529150709275e-05, "loss": 0.0431, "step": 3250 }, { "grad_norm": 0.6433903574943542, "learning_rate": 9.999490572722158e-05, "loss": 0.0451, "step": 3260 }, { "grad_norm": 0.740658164024353, "learning_rate": 9.99945047602181e-05, "loss": 0.0429, "step": 3270 }, { "grad_norm": 0.6363276839256287, "learning_rate": 9.99940886062041e-05, "loss": 0.0405, "step": 3280 }, { "grad_norm": 0.6644964814186096, "learning_rate": 9.999365726530599e-05, "loss": 0.0423, "step": 3290 }, { "grad_norm": 0.6375532746315002, "learning_rate": 9.999321073765481e-05, "loss": 0.0425, "step": 3300 }, { "grad_norm": 0.620246946811676, "learning_rate": 9.99927490233862e-05, "loss": 0.0428, "step": 3310 }, { "grad_norm": 0.7844054102897644, "learning_rate": 9.999227212264043e-05, "loss": 0.0453, "step": 3320 }, { "grad_norm": 0.6290953755378723, "learning_rate": 9.999178003556236e-05, "loss": 0.0407, "step": 3330 }, { "grad_norm": 0.6946619749069214, "learning_rate": 9.999127276230146e-05, "loss": 0.0421, "step": 3340 }, { "grad_norm": 0.6346951723098755, "learning_rate": 9.999075030301184e-05, "loss": 0.0422, "step": 3350 }, { "grad_norm": 0.6657125353813171, "learning_rate": 9.999021265785221e-05, "loss": 0.0401, "step": 3360 }, { "grad_norm": 0.4986710548400879, "learning_rate": 9.998965982698589e-05, "loss": 0.0399, "step": 3370 }, { "grad_norm": 0.6020874381065369, "learning_rate": 9.998909181058082e-05, "loss": 0.0376, "step": 3380 }, { "grad_norm": 0.6792619228363037, "learning_rate": 9.998850860880953e-05, "loss": 0.0412, "step": 3390 }, { "grad_norm": 0.8668169379234314, "learning_rate": 9.998791022184922e-05, "loss": 0.0421, "step": 3400 }, { "grad_norm": 0.6989952921867371, "learning_rate": 9.99872966498816e-05, "loss": 0.0458, "step": 3410 }, { "grad_norm": 0.5354453921318054, "learning_rate": 9.998666789309313e-05, "loss": 0.04, "step": 3420 }, { "grad_norm": 0.5989109873771667, "learning_rate": 9.998602395167475e-05, "loss": 0.0372, "step": 3430 }, { "grad_norm": 0.620912492275238, "learning_rate": 9.998536482582213e-05, "loss": 0.0412, "step": 3440 }, { "grad_norm": 0.7610539793968201, "learning_rate": 9.998469051573544e-05, "loss": 0.0356, "step": 3450 }, { "grad_norm": 0.7006726264953613, "learning_rate": 9.998400102161954e-05, "loss": 0.0404, "step": 3460 }, { "grad_norm": 0.7211143970489502, "learning_rate": 9.998329634368388e-05, "loss": 0.0461, "step": 3470 }, { "grad_norm": 0.8158736228942871, "learning_rate": 9.998257648214253e-05, "loss": 0.0409, "step": 3480 }, { "grad_norm": 0.5517433881759644, "learning_rate": 9.998184143721417e-05, "loss": 0.0382, "step": 3490 }, { "grad_norm": 0.6554803252220154, "learning_rate": 9.998109120912206e-05, "loss": 0.0373, "step": 3500 }, { "grad_norm": 0.6897868514060974, "learning_rate": 9.998032579809411e-05, "loss": 0.0427, "step": 3510 }, { "grad_norm": 0.6962120532989502, "learning_rate": 9.997954520436286e-05, "loss": 0.0397, "step": 3520 }, { "grad_norm": 0.6989943981170654, "learning_rate": 9.997874942816538e-05, "loss": 0.0404, "step": 3530 }, { "grad_norm": 0.6061151623725891, "learning_rate": 9.997793846974345e-05, "loss": 0.0389, "step": 3540 }, { "grad_norm": 0.5280351638793945, "learning_rate": 9.997711232934341e-05, "loss": 0.0383, "step": 3550 }, { "grad_norm": 0.728611946105957, "learning_rate": 9.99762710072162e-05, "loss": 0.0406, "step": 3560 }, { "grad_norm": 0.7230589389801025, "learning_rate": 9.997541450361743e-05, "loss": 0.0413, "step": 3570 }, { "grad_norm": 0.7686134576797485, "learning_rate": 9.997454281880723e-05, "loss": 0.042, "step": 3580 }, { "grad_norm": 0.5007238984107971, "learning_rate": 9.997365595305044e-05, "loss": 0.0371, "step": 3590 }, { "grad_norm": 0.6437061429023743, "learning_rate": 9.997275390661644e-05, "loss": 0.0365, "step": 3600 }, { "grad_norm": 0.5456061363220215, "learning_rate": 9.997183667977926e-05, "loss": 0.0364, "step": 3610 }, { "grad_norm": 0.6077585220336914, "learning_rate": 9.997090427281752e-05, "loss": 0.0349, "step": 3620 }, { "grad_norm": 0.7311477065086365, "learning_rate": 9.996995668601448e-05, "loss": 0.0396, "step": 3630 }, { "grad_norm": 0.6024607419967651, "learning_rate": 9.996899391965798e-05, "loss": 0.0357, "step": 3640 }, { "grad_norm": 0.673121988773346, "learning_rate": 9.996801597404048e-05, "loss": 0.0357, "step": 3650 }, { "grad_norm": 0.5982141494750977, "learning_rate": 9.996702284945905e-05, "loss": 0.0382, "step": 3660 }, { "grad_norm": 0.512007474899292, "learning_rate": 9.996601454621539e-05, "loss": 0.0369, "step": 3670 }, { "grad_norm": 0.49710696935653687, "learning_rate": 9.996499106461577e-05, "loss": 0.0364, "step": 3680 }, { "grad_norm": 0.7000653743743896, "learning_rate": 9.996395240497112e-05, "loss": 0.0429, "step": 3690 }, { "grad_norm": 0.5541830062866211, "learning_rate": 9.996289856759696e-05, "loss": 0.0352, "step": 3700 }, { "grad_norm": 0.7469106912612915, "learning_rate": 9.996182955281342e-05, "loss": 0.0377, "step": 3710 }, { "grad_norm": 0.7084099650382996, "learning_rate": 9.996074536094519e-05, "loss": 0.0392, "step": 3720 }, { "grad_norm": 0.6888859272003174, "learning_rate": 9.995964599232168e-05, "loss": 0.0427, "step": 3730 }, { "grad_norm": 0.699871838092804, "learning_rate": 9.995853144727683e-05, "loss": 0.0404, "step": 3740 }, { "grad_norm": 0.7385432124137878, "learning_rate": 9.99574017261492e-05, "loss": 0.0394, "step": 3750 }, { "grad_norm": 0.7044745683670044, "learning_rate": 9.995625682928198e-05, "loss": 0.0404, "step": 3760 }, { "grad_norm": 0.7506102919578552, "learning_rate": 9.995509675702295e-05, "loss": 0.0344, "step": 3770 }, { "grad_norm": 0.6009000539779663, "learning_rate": 9.995392150972451e-05, "loss": 0.0399, "step": 3780 }, { "grad_norm": 0.59146648645401, "learning_rate": 9.995273108774366e-05, "loss": 0.0378, "step": 3790 }, { "grad_norm": 0.5564849972724915, "learning_rate": 9.995152549144205e-05, "loss": 0.0372, "step": 3800 }, { "grad_norm": 0.6562843322753906, "learning_rate": 9.995030472118587e-05, "loss": 0.0424, "step": 3810 }, { "grad_norm": 0.7454749345779419, "learning_rate": 9.9949068777346e-05, "loss": 0.0441, "step": 3820 }, { "grad_norm": 0.5691495537757874, "learning_rate": 9.994781766029786e-05, "loss": 0.035, "step": 3830 }, { "grad_norm": 0.5864580869674683, "learning_rate": 9.994655137042151e-05, "loss": 0.036, "step": 3840 }, { "grad_norm": 0.5260812640190125, "learning_rate": 9.99452699081016e-05, "loss": 0.036, "step": 3850 }, { "grad_norm": 0.509746789932251, "learning_rate": 9.994397327372743e-05, "loss": 0.0392, "step": 3860 }, { "grad_norm": 0.6681617498397827, "learning_rate": 9.994266146769286e-05, "loss": 0.0347, "step": 3870 }, { "grad_norm": 0.5469800233840942, "learning_rate": 9.994133449039642e-05, "loss": 0.0354, "step": 3880 }, { "grad_norm": 0.6444597840309143, "learning_rate": 9.993999234224118e-05, "loss": 0.0408, "step": 3890 }, { "grad_norm": 0.5558236241340637, "learning_rate": 9.993863502363485e-05, "loss": 0.0438, "step": 3900 }, { "grad_norm": 0.5329928994178772, "learning_rate": 9.993726253498976e-05, "loss": 0.0362, "step": 3910 }, { "grad_norm": 0.6277582049369812, "learning_rate": 9.993587487672282e-05, "loss": 0.0422, "step": 3920 }, { "grad_norm": 0.655707836151123, "learning_rate": 9.993447204925558e-05, "loss": 0.0342, "step": 3930 }, { "grad_norm": 0.5586263537406921, "learning_rate": 9.993305405301416e-05, "loss": 0.0367, "step": 3940 }, { "grad_norm": 0.44315865635871887, "learning_rate": 9.993162088842935e-05, "loss": 0.0353, "step": 3950 }, { "grad_norm": 0.6656477451324463, "learning_rate": 9.993017255593646e-05, "loss": 0.0343, "step": 3960 }, { "grad_norm": 0.5894978046417236, "learning_rate": 9.992870905597548e-05, "loss": 0.0318, "step": 3970 }, { "grad_norm": 0.6920962333679199, "learning_rate": 9.9927230388991e-05, "loss": 0.0327, "step": 3980 }, { "grad_norm": 0.6823915839195251, "learning_rate": 9.992573655543215e-05, "loss": 0.035, "step": 3990 }, { "grad_norm": 0.6368980407714844, "learning_rate": 9.992422755575277e-05, "loss": 0.0364, "step": 4000 }, { "grad_norm": 0.570302426815033, "learning_rate": 9.992270339041123e-05, "loss": 0.0337, "step": 4010 }, { "grad_norm": 0.5436093807220459, "learning_rate": 9.992116405987053e-05, "loss": 0.0319, "step": 4020 }, { "grad_norm": 0.6327980756759644, "learning_rate": 9.991960956459828e-05, "loss": 0.0352, "step": 4030 }, { "grad_norm": 0.4797522723674774, "learning_rate": 9.991803990506669e-05, "loss": 0.0307, "step": 4040 }, { "grad_norm": 0.5248384475708008, "learning_rate": 9.991645508175258e-05, "loss": 0.0346, "step": 4050 }, { "grad_norm": 0.6256198883056641, "learning_rate": 9.99148550951374e-05, "loss": 0.0334, "step": 4060 }, { "grad_norm": 0.6489942073822021, "learning_rate": 9.991323994570716e-05, "loss": 0.0299, "step": 4070 }, { "grad_norm": 0.6390136480331421, "learning_rate": 9.99116096339525e-05, "loss": 0.0326, "step": 4080 }, { "grad_norm": 0.7176806926727295, "learning_rate": 9.990996416036869e-05, "loss": 0.033, "step": 4090 }, { "grad_norm": 0.6497689485549927, "learning_rate": 9.990830352545555e-05, "loss": 0.0303, "step": 4100 }, { "grad_norm": 0.5362087488174438, "learning_rate": 9.990662772971756e-05, "loss": 0.0361, "step": 4110 }, { "grad_norm": 0.5707340240478516, "learning_rate": 9.990493677366376e-05, "loss": 0.0361, "step": 4120 }, { "grad_norm": 0.6157304644584656, "learning_rate": 9.990323065780786e-05, "loss": 0.0359, "step": 4130 }, { "grad_norm": 0.5689452886581421, "learning_rate": 9.990150938266808e-05, "loss": 0.0322, "step": 4140 }, { "grad_norm": 0.6773049831390381, "learning_rate": 9.989977294876733e-05, "loss": 0.0347, "step": 4150 }, { "grad_norm": 0.5795975923538208, "learning_rate": 9.989802135663308e-05, "loss": 0.0324, "step": 4160 }, { "grad_norm": 0.5491712093353271, "learning_rate": 9.989625460679743e-05, "loss": 0.0355, "step": 4170 }, { "grad_norm": 0.5472167730331421, "learning_rate": 9.989447269979706e-05, "loss": 0.0334, "step": 4180 }, { "grad_norm": 0.6765989065170288, "learning_rate": 9.989267563617328e-05, "loss": 0.0356, "step": 4190 }, { "grad_norm": 0.6238303184509277, "learning_rate": 9.989086341647198e-05, "loss": 0.0296, "step": 4200 }, { "grad_norm": 0.6904252767562866, "learning_rate": 9.988903604124366e-05, "loss": 0.0306, "step": 4210 }, { "grad_norm": 0.6524732708930969, "learning_rate": 9.988719351104343e-05, "loss": 0.0312, "step": 4220 }, { "grad_norm": 0.5553227663040161, "learning_rate": 9.9885335826431e-05, "loss": 0.0311, "step": 4230 }, { "grad_norm": 0.5986655950546265, "learning_rate": 9.988346298797071e-05, "loss": 0.0299, "step": 4240 }, { "grad_norm": 0.6228980422019958, "learning_rate": 9.988157499623146e-05, "loss": 0.0316, "step": 4250 }, { "grad_norm": 0.5520809292793274, "learning_rate": 9.987967185178677e-05, "loss": 0.0301, "step": 4260 }, { "grad_norm": 0.6654737591743469, "learning_rate": 9.987775355521476e-05, "loss": 0.0308, "step": 4270 }, { "grad_norm": 0.5442163348197937, "learning_rate": 9.987582010709817e-05, "loss": 0.0319, "step": 4280 }, { "grad_norm": 0.6658127903938293, "learning_rate": 9.987387150802431e-05, "loss": 0.029, "step": 4290 }, { "grad_norm": 0.5804133415222168, "learning_rate": 9.987190775858517e-05, "loss": 0.0277, "step": 4300 }, { "grad_norm": 0.5722190141677856, "learning_rate": 9.98699288593772e-05, "loss": 0.0292, "step": 4310 }, { "grad_norm": 0.623306393623352, "learning_rate": 9.986793481100161e-05, "loss": 0.0313, "step": 4320 }, { "grad_norm": 0.6122274994850159, "learning_rate": 9.986592561406412e-05, "loss": 0.0292, "step": 4330 }, { "grad_norm": 0.5425803661346436, "learning_rate": 9.986390126917503e-05, "loss": 0.0313, "step": 4340 }, { "grad_norm": 0.511246919631958, "learning_rate": 9.986186177694933e-05, "loss": 0.0349, "step": 4350 }, { "grad_norm": 0.5783056020736694, "learning_rate": 9.985980713800656e-05, "loss": 0.0285, "step": 4360 }, { "grad_norm": 0.810947060585022, "learning_rate": 9.985773735297084e-05, "loss": 0.0315, "step": 4370 }, { "grad_norm": 0.5040915608406067, "learning_rate": 9.985565242247092e-05, "loss": 0.0327, "step": 4380 }, { "grad_norm": 0.4829008877277374, "learning_rate": 9.985355234714016e-05, "loss": 0.0362, "step": 4390 }, { "grad_norm": 0.49304136633872986, "learning_rate": 9.985143712761652e-05, "loss": 0.0288, "step": 4400 }, { "grad_norm": 0.5399729013442993, "learning_rate": 9.984930676454252e-05, "loss": 0.0298, "step": 4410 }, { "grad_norm": 0.6986041069030762, "learning_rate": 9.984716125856532e-05, "loss": 0.0308, "step": 4420 }, { "grad_norm": 0.5839313864707947, "learning_rate": 9.984500061033667e-05, "loss": 0.0286, "step": 4430 }, { "grad_norm": 0.6318590044975281, "learning_rate": 9.984282482051293e-05, "loss": 0.0291, "step": 4440 }, { "grad_norm": 0.7042383551597595, "learning_rate": 9.9840633889755e-05, "loss": 0.032, "step": 4450 }, { "grad_norm": 0.6632933616638184, "learning_rate": 9.983842781872848e-05, "loss": 0.0283, "step": 4460 }, { "grad_norm": 0.454008013010025, "learning_rate": 9.98362066081035e-05, "loss": 0.0273, "step": 4470 }, { "grad_norm": 0.6959115862846375, "learning_rate": 9.983397025855479e-05, "loss": 0.0294, "step": 4480 }, { "grad_norm": 0.6951610445976257, "learning_rate": 9.983171877076171e-05, "loss": 0.033, "step": 4490 }, { "grad_norm": 0.45543181896209717, "learning_rate": 9.98294521454082e-05, "loss": 0.0274, "step": 4500 }, { "grad_norm": 0.6101294755935669, "learning_rate": 9.98271703831828e-05, "loss": 0.0253, "step": 4510 }, { "grad_norm": 0.5760356187820435, "learning_rate": 9.982487348477865e-05, "loss": 0.0264, "step": 4520 }, { "grad_norm": 0.6294093132019043, "learning_rate": 9.982256145089347e-05, "loss": 0.0304, "step": 4530 }, { "grad_norm": 0.539822518825531, "learning_rate": 9.982023428222962e-05, "loss": 0.0283, "step": 4540 }, { "grad_norm": 0.3482538163661957, "learning_rate": 9.981789197949403e-05, "loss": 0.0259, "step": 4550 }, { "grad_norm": 0.5884767770767212, "learning_rate": 9.98155345433982e-05, "loss": 0.0272, "step": 4560 }, { "grad_norm": 0.5896508097648621, "learning_rate": 9.981316197465831e-05, "loss": 0.0259, "step": 4570 }, { "grad_norm": 0.6355373859405518, "learning_rate": 9.981077427399504e-05, "loss": 0.0302, "step": 4580 }, { "grad_norm": 0.6076304912567139, "learning_rate": 9.980837144213371e-05, "loss": 0.0329, "step": 4590 }, { "grad_norm": 0.44014647603034973, "learning_rate": 9.980595347980426e-05, "loss": 0.0264, "step": 4600 }, { "grad_norm": 0.6942553520202637, "learning_rate": 9.980352038774119e-05, "loss": 0.031, "step": 4610 }, { "grad_norm": 0.5383680462837219, "learning_rate": 9.98010721666836e-05, "loss": 0.026, "step": 4620 }, { "grad_norm": 0.5941911339759827, "learning_rate": 9.979860881737523e-05, "loss": 0.0269, "step": 4630 }, { "grad_norm": 0.6166422367095947, "learning_rate": 9.979613034056434e-05, "loss": 0.0285, "step": 4640 }, { "grad_norm": 0.5569292902946472, "learning_rate": 9.979363673700386e-05, "loss": 0.0264, "step": 4650 }, { "grad_norm": 0.4750020205974579, "learning_rate": 9.979112800745124e-05, "loss": 0.0277, "step": 4660 }, { "grad_norm": 0.6898394823074341, "learning_rate": 9.978860415266861e-05, "loss": 0.0251, "step": 4670 }, { "grad_norm": 0.630106508731842, "learning_rate": 9.978606517342262e-05, "loss": 0.0247, "step": 4680 }, { "grad_norm": 0.5730926990509033, "learning_rate": 9.978351107048456e-05, "loss": 0.0252, "step": 4690 }, { "grad_norm": 0.6251030564308167, "learning_rate": 9.978094184463029e-05, "loss": 0.0341, "step": 4700 }, { "grad_norm": 0.49815332889556885, "learning_rate": 9.977835749664029e-05, "loss": 0.0274, "step": 4710 }, { "grad_norm": 0.611866295337677, "learning_rate": 9.97757580272996e-05, "loss": 0.0235, "step": 4720 }, { "grad_norm": 0.5598731637001038, "learning_rate": 9.977314343739786e-05, "loss": 0.0244, "step": 4730 }, { "grad_norm": 0.6545037627220154, "learning_rate": 9.977051372772934e-05, "loss": 0.0256, "step": 4740 }, { "grad_norm": 0.5057151913642883, "learning_rate": 9.976786889909286e-05, "loss": 0.0241, "step": 4750 }, { "grad_norm": 0.7073130011558533, "learning_rate": 9.976520895229185e-05, "loss": 0.0272, "step": 4760 }, { "grad_norm": 0.5498185157775879, "learning_rate": 9.976253388813433e-05, "loss": 0.0321, "step": 4770 }, { "grad_norm": 0.5412503480911255, "learning_rate": 9.975984370743293e-05, "loss": 0.0247, "step": 4780 }, { "grad_norm": 0.5492538213729858, "learning_rate": 9.975713841100485e-05, "loss": 0.0282, "step": 4790 }, { "grad_norm": 0.5769220590591431, "learning_rate": 9.975441799967187e-05, "loss": 0.0274, "step": 4800 }, { "grad_norm": 0.5793959498405457, "learning_rate": 9.975168247426039e-05, "loss": 0.023, "step": 4810 }, { "grad_norm": 0.5048704147338867, "learning_rate": 9.974893183560139e-05, "loss": 0.0256, "step": 4820 }, { "grad_norm": 0.5510044097900391, "learning_rate": 9.974616608453045e-05, "loss": 0.0242, "step": 4830 }, { "grad_norm": 0.645194411277771, "learning_rate": 9.974338522188772e-05, "loss": 0.0241, "step": 4840 }, { "grad_norm": 0.5308319330215454, "learning_rate": 9.974058924851797e-05, "loss": 0.0248, "step": 4850 }, { "grad_norm": 0.5941885709762573, "learning_rate": 9.973777816527051e-05, "loss": 0.0224, "step": 4860 }, { "grad_norm": 0.5658865571022034, "learning_rate": 9.973495197299931e-05, "loss": 0.027, "step": 4870 }, { "grad_norm": 0.5737472772598267, "learning_rate": 9.973211067256287e-05, "loss": 0.025, "step": 4880 }, { "grad_norm": 0.552737832069397, "learning_rate": 9.97292542648243e-05, "loss": 0.0261, "step": 4890 }, { "grad_norm": 0.6258673071861267, "learning_rate": 9.972638275065131e-05, "loss": 0.0243, "step": 4900 }, { "grad_norm": 0.553909957408905, "learning_rate": 9.972349613091621e-05, "loss": 0.0224, "step": 4910 }, { "grad_norm": 0.4020157754421234, "learning_rate": 9.972059440649584e-05, "loss": 0.0248, "step": 4920 }, { "grad_norm": 0.4582313001155853, "learning_rate": 9.971767757827168e-05, "loss": 0.0257, "step": 4930 }, { "grad_norm": 0.5173747539520264, "learning_rate": 9.971474564712982e-05, "loss": 0.0214, "step": 4940 }, { "grad_norm": 0.5914600491523743, "learning_rate": 9.971179861396084e-05, "loss": 0.0265, "step": 4950 }, { "grad_norm": 0.4407144784927368, "learning_rate": 9.970883647966003e-05, "loss": 0.0279, "step": 4960 }, { "grad_norm": 0.6293507218360901, "learning_rate": 9.970585924512717e-05, "loss": 0.0272, "step": 4970 }, { "grad_norm": 0.5919846296310425, "learning_rate": 9.970286691126669e-05, "loss": 0.0345, "step": 4980 }, { "grad_norm": 0.5908362865447998, "learning_rate": 9.969985947898756e-05, "loss": 0.031, "step": 4990 }, { "grad_norm": 0.5150045156478882, "learning_rate": 9.969683694920337e-05, "loss": 0.0265, "step": 5000 }, { "grad_norm": 0.538543701171875, "learning_rate": 9.969379932283228e-05, "loss": 0.0271, "step": 5010 }, { "grad_norm": 0.6217741966247559, "learning_rate": 9.969074660079704e-05, "loss": 0.0288, "step": 5020 }, { "grad_norm": 0.4673011600971222, "learning_rate": 9.968767878402501e-05, "loss": 0.0237, "step": 5030 }, { "grad_norm": 0.4540856182575226, "learning_rate": 9.968459587344808e-05, "loss": 0.0228, "step": 5040 }, { "grad_norm": 0.537109375, "learning_rate": 9.968149787000278e-05, "loss": 0.025, "step": 5050 }, { "grad_norm": 0.5948236584663391, "learning_rate": 9.967838477463018e-05, "loss": 0.027, "step": 5060 }, { "grad_norm": 0.4020276963710785, "learning_rate": 9.967525658827597e-05, "loss": 0.0221, "step": 5070 }, { "grad_norm": 0.3882424831390381, "learning_rate": 9.967211331189042e-05, "loss": 0.021, "step": 5080 }, { "grad_norm": 0.5875966548919678, "learning_rate": 9.966895494642834e-05, "loss": 0.0225, "step": 5090 }, { "grad_norm": 0.592677116394043, "learning_rate": 9.96657814928492e-05, "loss": 0.0237, "step": 5100 }, { "grad_norm": 0.5306417346000671, "learning_rate": 9.966259295211697e-05, "loss": 0.0244, "step": 5110 }, { "grad_norm": 0.4629195034503937, "learning_rate": 9.965938932520028e-05, "loss": 0.0206, "step": 5120 }, { "grad_norm": 0.5218473076820374, "learning_rate": 9.965617061307229e-05, "loss": 0.0219, "step": 5130 }, { "grad_norm": 0.5671105980873108, "learning_rate": 9.965293681671077e-05, "loss": 0.0231, "step": 5140 }, { "grad_norm": 0.49504706263542175, "learning_rate": 9.964968793709804e-05, "loss": 0.023, "step": 5150 }, { "grad_norm": 0.411983460187912, "learning_rate": 9.964642397522106e-05, "loss": 0.0228, "step": 5160 }, { "grad_norm": 0.5020577907562256, "learning_rate": 9.96431449320713e-05, "loss": 0.0207, "step": 5170 }, { "grad_norm": 0.4675840139389038, "learning_rate": 9.963985080864486e-05, "loss": 0.0198, "step": 5180 }, { "grad_norm": 0.4841030240058899, "learning_rate": 9.96365416059424e-05, "loss": 0.0228, "step": 5190 }, { "grad_norm": 0.39482370018959045, "learning_rate": 9.963321732496919e-05, "loss": 0.0213, "step": 5200 }, { "grad_norm": 0.5138295888900757, "learning_rate": 9.962987796673506e-05, "loss": 0.0259, "step": 5210 }, { "grad_norm": 0.41514965891838074, "learning_rate": 9.962652353225438e-05, "loss": 0.0262, "step": 5220 }, { "grad_norm": 0.5538020133972168, "learning_rate": 9.962315402254619e-05, "loss": 0.0232, "step": 5230 }, { "grad_norm": 0.47969186305999756, "learning_rate": 9.9619769438634e-05, "loss": 0.0201, "step": 5240 }, { "grad_norm": 0.4588451683521271, "learning_rate": 9.9616369781546e-05, "loss": 0.0222, "step": 5250 }, { "grad_norm": 0.5873843431472778, "learning_rate": 9.961295505231491e-05, "loss": 0.0216, "step": 5260 }, { "grad_norm": 0.4506019353866577, "learning_rate": 9.960952525197804e-05, "loss": 0.023, "step": 5270 }, { "grad_norm": 0.5607607960700989, "learning_rate": 9.960608038157724e-05, "loss": 0.0223, "step": 5280 }, { "grad_norm": 0.6128898859024048, "learning_rate": 9.960262044215901e-05, "loss": 0.0238, "step": 5290 }, { "grad_norm": 0.4695815443992615, "learning_rate": 9.959914543477435e-05, "loss": 0.0265, "step": 5300 }, { "grad_norm": 0.43593689799308777, "learning_rate": 9.959565536047892e-05, "loss": 0.0275, "step": 5310 }, { "grad_norm": 0.6567972898483276, "learning_rate": 9.959215022033288e-05, "loss": 0.0283, "step": 5320 }, { "grad_norm": 0.38720253109931946, "learning_rate": 9.9588630015401e-05, "loss": 0.0216, "step": 5330 }, { "grad_norm": 0.5306717753410339, "learning_rate": 9.958509474675264e-05, "loss": 0.0249, "step": 5340 }, { "grad_norm": 0.5195200443267822, "learning_rate": 9.958154441546171e-05, "loss": 0.0234, "step": 5350 }, { "grad_norm": 0.411095529794693, "learning_rate": 9.957797902260673e-05, "loss": 0.02, "step": 5360 }, { "grad_norm": 0.5348077416419983, "learning_rate": 9.957439856927073e-05, "loss": 0.0221, "step": 5370 }, { "grad_norm": 0.5276638269424438, "learning_rate": 9.957080305654139e-05, "loss": 0.0228, "step": 5380 }, { "grad_norm": 0.40877729654312134, "learning_rate": 9.956719248551092e-05, "loss": 0.0211, "step": 5390 }, { "grad_norm": 0.5705676674842834, "learning_rate": 9.956356685727612e-05, "loss": 0.0192, "step": 5400 }, { "grad_norm": 0.4318673610687256, "learning_rate": 9.955992617293836e-05, "loss": 0.0204, "step": 5410 }, { "grad_norm": 0.41563689708709717, "learning_rate": 9.955627043360358e-05, "loss": 0.02, "step": 5420 }, { "grad_norm": 0.4354114234447479, "learning_rate": 9.955259964038231e-05, "loss": 0.0231, "step": 5430 }, { "grad_norm": 0.345611572265625, "learning_rate": 9.954891379438962e-05, "loss": 0.0206, "step": 5440 }, { "grad_norm": 0.5379743576049805, "learning_rate": 9.954521289674519e-05, "loss": 0.0203, "step": 5450 }, { "grad_norm": 0.43512454628944397, "learning_rate": 9.954149694857325e-05, "loss": 0.0216, "step": 5460 }, { "grad_norm": 0.4377577006816864, "learning_rate": 9.953776595100258e-05, "loss": 0.0204, "step": 5470 }, { "grad_norm": 0.4804694950580597, "learning_rate": 9.95340199051666e-05, "loss": 0.0231, "step": 5480 }, { "grad_norm": 0.48580053448677063, "learning_rate": 9.953025881220325e-05, "loss": 0.0211, "step": 5490 }, { "grad_norm": 0.4967275559902191, "learning_rate": 9.952648267325504e-05, "loss": 0.0192, "step": 5500 }, { "grad_norm": 0.5465275645256042, "learning_rate": 9.952269148946905e-05, "loss": 0.0218, "step": 5510 }, { "grad_norm": 0.46749478578567505, "learning_rate": 9.951888526199697e-05, "loss": 0.0234, "step": 5520 }, { "grad_norm": 0.4764584004878998, "learning_rate": 9.951506399199501e-05, "loss": 0.0203, "step": 5530 }, { "grad_norm": 0.47356438636779785, "learning_rate": 9.951122768062399e-05, "loss": 0.0216, "step": 5540 }, { "grad_norm": 0.40681713819503784, "learning_rate": 9.950737632904927e-05, "loss": 0.0212, "step": 5550 }, { "grad_norm": 0.43655475974082947, "learning_rate": 9.950350993844077e-05, "loss": 0.02, "step": 5560 }, { "grad_norm": 0.5888035893440247, "learning_rate": 9.949962850997303e-05, "loss": 0.0214, "step": 5570 }, { "grad_norm": 0.5284451246261597, "learning_rate": 9.949573204482512e-05, "loss": 0.0196, "step": 5580 }, { "grad_norm": 0.5056226253509521, "learning_rate": 9.949182054418064e-05, "loss": 0.021, "step": 5590 }, { "grad_norm": 0.42620813846588135, "learning_rate": 9.948789400922787e-05, "loss": 0.0197, "step": 5600 }, { "grad_norm": 0.43998464941978455, "learning_rate": 9.948395244115953e-05, "loss": 0.0187, "step": 5610 }, { "grad_norm": 0.5457742214202881, "learning_rate": 9.9479995841173e-05, "loss": 0.0189, "step": 5620 }, { "grad_norm": 0.5391003489494324, "learning_rate": 9.947602421047017e-05, "loss": 0.0186, "step": 5630 }, { "grad_norm": 0.43806758522987366, "learning_rate": 9.947203755025753e-05, "loss": 0.0177, "step": 5640 }, { "grad_norm": 0.4712775647640228, "learning_rate": 9.946803586174611e-05, "loss": 0.0176, "step": 5650 }, { "grad_norm": 0.49622803926467896, "learning_rate": 9.946401914615151e-05, "loss": 0.0262, "step": 5660 }, { "grad_norm": 0.652975857257843, "learning_rate": 9.945998740469394e-05, "loss": 0.0185, "step": 5670 }, { "grad_norm": 0.4902481436729431, "learning_rate": 9.945594063859809e-05, "loss": 0.0224, "step": 5680 }, { "grad_norm": 0.3783179223537445, "learning_rate": 9.94518788490933e-05, "loss": 0.0179, "step": 5690 }, { "grad_norm": 0.5471696257591248, "learning_rate": 9.944780203741341e-05, "loss": 0.0202, "step": 5700 }, { "grad_norm": 0.4779660701751709, "learning_rate": 9.944371020479686e-05, "loss": 0.019, "step": 5710 }, { "grad_norm": 0.428937703371048, "learning_rate": 9.943960335248662e-05, "loss": 0.0209, "step": 5720 }, { "grad_norm": 0.39229315519332886, "learning_rate": 9.943548148173027e-05, "loss": 0.0189, "step": 5730 }, { "grad_norm": 0.5156890749931335, "learning_rate": 9.943134459377992e-05, "loss": 0.0226, "step": 5740 }, { "grad_norm": 0.3894418478012085, "learning_rate": 9.942719268989222e-05, "loss": 0.0221, "step": 5750 }, { "grad_norm": 0.3629615008831024, "learning_rate": 9.942302577132844e-05, "loss": 0.019, "step": 5760 }, { "grad_norm": 0.42278769612312317, "learning_rate": 9.941884383935438e-05, "loss": 0.0217, "step": 5770 }, { "grad_norm": 0.4613569974899292, "learning_rate": 9.941464689524039e-05, "loss": 0.0214, "step": 5780 }, { "grad_norm": 0.4308469295501709, "learning_rate": 9.941043494026139e-05, "loss": 0.0219, "step": 5790 }, { "grad_norm": 0.43162721395492554, "learning_rate": 9.940620797569685e-05, "loss": 0.0207, "step": 5800 }, { "grad_norm": 0.4408177435398102, "learning_rate": 9.940196600283082e-05, "loss": 0.0187, "step": 5810 }, { "grad_norm": 0.46690818667411804, "learning_rate": 9.939770902295192e-05, "loss": 0.0207, "step": 5820 }, { "grad_norm": 0.5194782614707947, "learning_rate": 9.939343703735329e-05, "loss": 0.0216, "step": 5830 }, { "grad_norm": 0.4704713225364685, "learning_rate": 9.938915004733264e-05, "loss": 0.0202, "step": 5840 }, { "grad_norm": 0.430652916431427, "learning_rate": 9.938484805419224e-05, "loss": 0.0204, "step": 5850 }, { "grad_norm": 0.43421781063079834, "learning_rate": 9.938053105923894e-05, "loss": 0.0215, "step": 5860 }, { "grad_norm": 0.45059508085250854, "learning_rate": 9.937619906378413e-05, "loss": 0.0226, "step": 5870 }, { "grad_norm": 0.47542697191238403, "learning_rate": 9.937185206914374e-05, "loss": 0.0207, "step": 5880 }, { "grad_norm": 0.3854386806488037, "learning_rate": 9.936749007663829e-05, "loss": 0.0184, "step": 5890 }, { "grad_norm": 0.583702564239502, "learning_rate": 9.93631130875928e-05, "loss": 0.0204, "step": 5900 }, { "grad_norm": 0.401313841342926, "learning_rate": 9.935872110333692e-05, "loss": 0.0205, "step": 5910 }, { "grad_norm": 0.37732264399528503, "learning_rate": 9.935431412520484e-05, "loss": 0.0183, "step": 5920 }, { "grad_norm": 0.46058157086372375, "learning_rate": 9.934989215453523e-05, "loss": 0.0161, "step": 5930 }, { "grad_norm": 0.46219077706336975, "learning_rate": 9.934545519267139e-05, "loss": 0.0201, "step": 5940 }, { "grad_norm": 0.41395333409309387, "learning_rate": 9.934100324096117e-05, "loss": 0.0165, "step": 5950 }, { "grad_norm": 0.4123416543006897, "learning_rate": 9.933653630075692e-05, "loss": 0.0166, "step": 5960 }, { "grad_norm": 0.4856509566307068, "learning_rate": 9.93320543734156e-05, "loss": 0.0207, "step": 5970 }, { "grad_norm": 0.4036096930503845, "learning_rate": 9.932755746029871e-05, "loss": 0.0184, "step": 5980 }, { "grad_norm": 0.5678353309631348, "learning_rate": 9.932304556277228e-05, "loss": 0.021, "step": 5990 }, { "grad_norm": 0.5616645812988281, "learning_rate": 9.93185186822069e-05, "loss": 0.022, "step": 6000 }, { "grad_norm": 0.45179468393325806, "learning_rate": 9.931397681997773e-05, "loss": 0.0172, "step": 6010 }, { "grad_norm": 0.4580582082271576, "learning_rate": 9.930941997746446e-05, "loss": 0.0167, "step": 6020 }, { "grad_norm": 0.3947714865207672, "learning_rate": 9.930484815605134e-05, "loss": 0.0163, "step": 6030 }, { "grad_norm": 0.3337259590625763, "learning_rate": 9.930026135712717e-05, "loss": 0.0186, "step": 6040 }, { "grad_norm": 0.33686500787734985, "learning_rate": 9.92956595820853e-05, "loss": 0.0181, "step": 6050 }, { "grad_norm": 0.39745450019836426, "learning_rate": 9.929104283232362e-05, "loss": 0.0167, "step": 6060 }, { "grad_norm": 0.41633832454681396, "learning_rate": 9.92864111092446e-05, "loss": 0.0187, "step": 6070 }, { "grad_norm": 0.48885807394981384, "learning_rate": 9.92817644142552e-05, "loss": 0.0199, "step": 6080 }, { "grad_norm": 0.4180903434753418, "learning_rate": 9.927710274876698e-05, "loss": 0.0161, "step": 6090 }, { "grad_norm": 0.3967433571815491, "learning_rate": 9.927242611419603e-05, "loss": 0.0203, "step": 6100 }, { "grad_norm": 0.4237320125102997, "learning_rate": 9.926773451196301e-05, "loss": 0.0197, "step": 6110 }, { "grad_norm": 0.5263776183128357, "learning_rate": 9.926302794349306e-05, "loss": 0.0239, "step": 6120 }, { "grad_norm": 0.5497742891311646, "learning_rate": 9.925830641021594e-05, "loss": 0.0242, "step": 6130 }, { "grad_norm": 0.4678046405315399, "learning_rate": 9.925356991356593e-05, "loss": 0.0244, "step": 6140 }, { "grad_norm": 0.5880439877510071, "learning_rate": 9.924881845498184e-05, "loss": 0.0263, "step": 6150 }, { "grad_norm": 0.47017771005630493, "learning_rate": 9.924405203590705e-05, "loss": 0.0254, "step": 6160 }, { "grad_norm": 0.4536885619163513, "learning_rate": 9.923927065778946e-05, "loss": 0.0301, "step": 6170 }, { "grad_norm": 0.3195449411869049, "learning_rate": 9.923447432208154e-05, "loss": 0.0256, "step": 6180 }, { "grad_norm": 0.34646308422088623, "learning_rate": 9.922966303024027e-05, "loss": 0.0216, "step": 6190 }, { "grad_norm": 0.4663931131362915, "learning_rate": 9.922483678372721e-05, "loss": 0.0211, "step": 6200 }, { "grad_norm": 0.35806676745414734, "learning_rate": 9.921999558400845e-05, "loss": 0.0208, "step": 6210 }, { "grad_norm": 0.39478111267089844, "learning_rate": 9.92151394325546e-05, "loss": 0.0213, "step": 6220 }, { "grad_norm": 0.49016234278678894, "learning_rate": 9.921026833084084e-05, "loss": 0.0202, "step": 6230 }, { "grad_norm": 0.37187206745147705, "learning_rate": 9.920538228034689e-05, "loss": 0.0191, "step": 6240 }, { "grad_norm": 0.3741544187068939, "learning_rate": 9.920048128255699e-05, "loss": 0.0226, "step": 6250 }, { "grad_norm": 0.4790568947792053, "learning_rate": 9.919556533895995e-05, "loss": 0.0224, "step": 6260 }, { "grad_norm": 0.347062885761261, "learning_rate": 9.919063445104907e-05, "loss": 0.0177, "step": 6270 }, { "grad_norm": 0.42166146636009216, "learning_rate": 9.918568862032227e-05, "loss": 0.0204, "step": 6280 }, { "grad_norm": 0.4601902365684509, "learning_rate": 9.918072784828194e-05, "loss": 0.0175, "step": 6290 }, { "grad_norm": 0.4728532135486603, "learning_rate": 9.917575213643501e-05, "loss": 0.0221, "step": 6300 }, { "grad_norm": 0.42424672842025757, "learning_rate": 9.917076148629302e-05, "loss": 0.0195, "step": 6310 }, { "grad_norm": 0.4475298225879669, "learning_rate": 9.916575589937196e-05, "loss": 0.0178, "step": 6320 }, { "grad_norm": 0.45007506012916565, "learning_rate": 9.916073537719239e-05, "loss": 0.0193, "step": 6330 }, { "grad_norm": 0.3647649884223938, "learning_rate": 9.915569992127944e-05, "loss": 0.0199, "step": 6340 }, { "grad_norm": 0.517101526260376, "learning_rate": 9.915064953316273e-05, "loss": 0.0237, "step": 6350 }, { "grad_norm": 0.46937012672424316, "learning_rate": 9.914558421437645e-05, "loss": 0.0259, "step": 6360 }, { "grad_norm": 0.4201948046684265, "learning_rate": 9.914050396645929e-05, "loss": 0.0227, "step": 6370 }, { "grad_norm": 0.4780450463294983, "learning_rate": 9.913540879095452e-05, "loss": 0.0237, "step": 6380 }, { "grad_norm": 0.4860870838165283, "learning_rate": 9.913029868940987e-05, "loss": 0.0239, "step": 6390 }, { "grad_norm": 0.4339703917503357, "learning_rate": 9.912517366337772e-05, "loss": 0.019, "step": 6400 }, { "grad_norm": 0.43234312534332275, "learning_rate": 9.912003371441487e-05, "loss": 0.0235, "step": 6410 }, { "grad_norm": 0.3167775869369507, "learning_rate": 9.911487884408271e-05, "loss": 0.0225, "step": 6420 }, { "grad_norm": 0.4893382787704468, "learning_rate": 9.910970905394719e-05, "loss": 0.0181, "step": 6430 }, { "grad_norm": 0.4834316074848175, "learning_rate": 9.91045243455787e-05, "loss": 0.0203, "step": 6440 }, { "grad_norm": 0.3625202476978302, "learning_rate": 9.909932472055225e-05, "loss": 0.0186, "step": 6450 }, { "grad_norm": 0.3890365660190582, "learning_rate": 9.909411018044734e-05, "loss": 0.018, "step": 6460 }, { "grad_norm": 0.3136729300022125, "learning_rate": 9.908888072684802e-05, "loss": 0.0177, "step": 6470 }, { "grad_norm": 0.34689581394195557, "learning_rate": 9.908363636134285e-05, "loss": 0.0194, "step": 6480 }, { "grad_norm": 0.46506989002227783, "learning_rate": 9.907837708552493e-05, "loss": 0.02, "step": 6490 }, { "grad_norm": 0.42716407775878906, "learning_rate": 9.90731029009919e-05, "loss": 0.0173, "step": 6500 }, { "grad_norm": 0.45493918657302856, "learning_rate": 9.906781380934589e-05, "loss": 0.017, "step": 6510 }, { "grad_norm": 0.35338613390922546, "learning_rate": 9.906250981219362e-05, "loss": 0.0169, "step": 6520 }, { "grad_norm": 0.38390281796455383, "learning_rate": 9.905719091114628e-05, "loss": 0.0179, "step": 6530 }, { "grad_norm": 0.37599876523017883, "learning_rate": 9.905185710781964e-05, "loss": 0.0184, "step": 6540 }, { "grad_norm": 0.4138750731945038, "learning_rate": 9.904650840383392e-05, "loss": 0.0183, "step": 6550 }, { "grad_norm": 0.4354017674922943, "learning_rate": 9.904114480081397e-05, "loss": 0.0189, "step": 6560 }, { "grad_norm": 0.5025765299797058, "learning_rate": 9.903576630038906e-05, "loss": 0.0221, "step": 6570 }, { "grad_norm": 0.432136207818985, "learning_rate": 9.903037290419309e-05, "loss": 0.0173, "step": 6580 }, { "grad_norm": 0.486317902803421, "learning_rate": 9.902496461386439e-05, "loss": 0.0185, "step": 6590 }, { "grad_norm": 0.5418636798858643, "learning_rate": 9.901954143104588e-05, "loss": 0.0222, "step": 6600 }, { "grad_norm": 0.36305558681488037, "learning_rate": 9.901410335738496e-05, "loss": 0.0228, "step": 6610 }, { "grad_norm": 0.42342790961265564, "learning_rate": 9.900865039453358e-05, "loss": 0.0155, "step": 6620 }, { "grad_norm": 0.4071841537952423, "learning_rate": 9.900318254414821e-05, "loss": 0.0213, "step": 6630 }, { "grad_norm": 0.39720529317855835, "learning_rate": 9.899769980788985e-05, "loss": 0.0195, "step": 6640 }, { "grad_norm": 0.33522626757621765, "learning_rate": 9.899220218742398e-05, "loss": 0.0165, "step": 6650 }, { "grad_norm": 0.3815174698829651, "learning_rate": 9.898668968442066e-05, "loss": 0.0198, "step": 6660 }, { "grad_norm": 0.45704084634780884, "learning_rate": 9.898116230055443e-05, "loss": 0.0203, "step": 6670 }, { "grad_norm": 0.4906638562679291, "learning_rate": 9.897562003750437e-05, "loss": 0.0185, "step": 6680 }, { "grad_norm": 0.4390787184238434, "learning_rate": 9.897006289695407e-05, "loss": 0.0187, "step": 6690 }, { "grad_norm": 0.377570241689682, "learning_rate": 9.896449088059164e-05, "loss": 0.0179, "step": 6700 }, { "grad_norm": 0.42955902218818665, "learning_rate": 9.89589039901097e-05, "loss": 0.0179, "step": 6710 }, { "grad_norm": 0.38894832134246826, "learning_rate": 9.895330222720542e-05, "loss": 0.0184, "step": 6720 }, { "grad_norm": 0.43698614835739136, "learning_rate": 9.894768559358047e-05, "loss": 0.0153, "step": 6730 }, { "grad_norm": 0.3228228986263275, "learning_rate": 9.894205409094101e-05, "loss": 0.0171, "step": 6740 }, { "grad_norm": 0.4363706111907959, "learning_rate": 9.893640772099777e-05, "loss": 0.0153, "step": 6750 }, { "grad_norm": 0.3527734875679016, "learning_rate": 9.893074648546595e-05, "loss": 0.015, "step": 6760 }, { "grad_norm": 0.2969190776348114, "learning_rate": 9.892507038606528e-05, "loss": 0.0152, "step": 6770 }, { "grad_norm": 0.4266859292984009, "learning_rate": 9.891937942452003e-05, "loss": 0.0172, "step": 6780 }, { "grad_norm": 0.440215528011322, "learning_rate": 9.891367360255895e-05, "loss": 0.0165, "step": 6790 }, { "grad_norm": 0.41470351815223694, "learning_rate": 9.890795292191532e-05, "loss": 0.0177, "step": 6800 }, { "grad_norm": 0.3750207722187042, "learning_rate": 9.890221738432694e-05, "loss": 0.0168, "step": 6810 }, { "grad_norm": 0.41080591082572937, "learning_rate": 9.88964669915361e-05, "loss": 0.0189, "step": 6820 }, { "grad_norm": 0.40893933176994324, "learning_rate": 9.889070174528963e-05, "loss": 0.0153, "step": 6830 }, { "grad_norm": 0.4975588023662567, "learning_rate": 9.888492164733883e-05, "loss": 0.0181, "step": 6840 }, { "grad_norm": 0.4919087290763855, "learning_rate": 9.88791266994396e-05, "loss": 0.0197, "step": 6850 }, { "grad_norm": 0.49394693970680237, "learning_rate": 9.887331690335223e-05, "loss": 0.0212, "step": 6860 }, { "grad_norm": 0.3582223653793335, "learning_rate": 9.886749226084163e-05, "loss": 0.0163, "step": 6870 }, { "grad_norm": 0.41114887595176697, "learning_rate": 9.886165277367714e-05, "loss": 0.0197, "step": 6880 }, { "grad_norm": 0.4360108971595764, "learning_rate": 9.885579844363265e-05, "loss": 0.0145, "step": 6890 }, { "grad_norm": 0.35609444975852966, "learning_rate": 9.884992927248656e-05, "loss": 0.0163, "step": 6900 }, { "grad_norm": 0.4820123612880707, "learning_rate": 9.884404526202178e-05, "loss": 0.0162, "step": 6910 }, { "grad_norm": 0.32953253388404846, "learning_rate": 9.883814641402568e-05, "loss": 0.0181, "step": 6920 }, { "grad_norm": 0.3404013216495514, "learning_rate": 9.88322327302902e-05, "loss": 0.0189, "step": 6930 }, { "grad_norm": 0.36602842807769775, "learning_rate": 9.882630421261176e-05, "loss": 0.0149, "step": 6940 }, { "grad_norm": 0.33201056718826294, "learning_rate": 9.88203608627913e-05, "loss": 0.0165, "step": 6950 }, { "grad_norm": 0.33390235900878906, "learning_rate": 9.881440268263422e-05, "loss": 0.0184, "step": 6960 }, { "grad_norm": 0.41959813237190247, "learning_rate": 9.880842967395048e-05, "loss": 0.0158, "step": 6970 }, { "grad_norm": 0.35842469334602356, "learning_rate": 9.880244183855452e-05, "loss": 0.0182, "step": 6980 }, { "grad_norm": 0.2936786413192749, "learning_rate": 9.879643917826527e-05, "loss": 0.0161, "step": 6990 }, { "grad_norm": 0.3380889892578125, "learning_rate": 9.87904216949062e-05, "loss": 0.0164, "step": 7000 }, { "grad_norm": 0.3669847846031189, "learning_rate": 9.878438939030526e-05, "loss": 0.0196, "step": 7010 }, { "grad_norm": 0.3744491934776306, "learning_rate": 9.877834226629489e-05, "loss": 0.0184, "step": 7020 }, { "grad_norm": 0.3672807216644287, "learning_rate": 9.877228032471206e-05, "loss": 0.0149, "step": 7030 }, { "grad_norm": 0.2968101501464844, "learning_rate": 9.876620356739823e-05, "loss": 0.0163, "step": 7040 }, { "grad_norm": 0.4129272401332855, "learning_rate": 9.876011199619935e-05, "loss": 0.0163, "step": 7050 }, { "grad_norm": 0.393873393535614, "learning_rate": 9.875400561296589e-05, "loss": 0.0163, "step": 7060 }, { "grad_norm": 0.34575068950653076, "learning_rate": 9.874788441955278e-05, "loss": 0.0156, "step": 7070 }, { "grad_norm": 0.3829306662082672, "learning_rate": 9.874174841781951e-05, "loss": 0.0156, "step": 7080 }, { "grad_norm": 0.44204407930374146, "learning_rate": 9.873559760963003e-05, "loss": 0.0167, "step": 7090 }, { "grad_norm": 0.397582083940506, "learning_rate": 9.872943199685278e-05, "loss": 0.0144, "step": 7100 }, { "grad_norm": 0.39908432960510254, "learning_rate": 9.872325158136071e-05, "loss": 0.017, "step": 7110 }, { "grad_norm": 0.43825486302375793, "learning_rate": 9.871705636503128e-05, "loss": 0.0171, "step": 7120 }, { "grad_norm": 0.34599193930625916, "learning_rate": 9.871084634974641e-05, "loss": 0.0152, "step": 7130 }, { "grad_norm": 0.43893107771873474, "learning_rate": 9.870462153739257e-05, "loss": 0.0182, "step": 7140 }, { "grad_norm": 0.3930358290672302, "learning_rate": 9.869838192986067e-05, "loss": 0.0157, "step": 7150 }, { "grad_norm": 0.4006841480731964, "learning_rate": 9.869212752904616e-05, "loss": 0.0161, "step": 7160 }, { "grad_norm": 0.3374226689338684, "learning_rate": 9.868585833684894e-05, "loss": 0.0141, "step": 7170 }, { "grad_norm": 0.37965646386146545, "learning_rate": 9.867957435517342e-05, "loss": 0.0163, "step": 7180 }, { "grad_norm": 0.4180934429168701, "learning_rate": 9.867327558592854e-05, "loss": 0.015, "step": 7190 }, { "grad_norm": 0.33238381147384644, "learning_rate": 9.866696203102766e-05, "loss": 0.0174, "step": 7200 }, { "grad_norm": 0.4209561347961426, "learning_rate": 9.86606336923887e-05, "loss": 0.0161, "step": 7210 }, { "grad_norm": 0.4806552231311798, "learning_rate": 9.865429057193403e-05, "loss": 0.0169, "step": 7220 }, { "grad_norm": 0.4034254550933838, "learning_rate": 9.864793267159053e-05, "loss": 0.018, "step": 7230 }, { "grad_norm": 0.3712209463119507, "learning_rate": 9.864155999328957e-05, "loss": 0.0142, "step": 7240 }, { "grad_norm": 0.4380336403846741, "learning_rate": 9.8635172538967e-05, "loss": 0.0142, "step": 7250 }, { "grad_norm": 0.5083938241004944, "learning_rate": 9.862877031056312e-05, "loss": 0.0158, "step": 7260 }, { "grad_norm": 0.5030562281608582, "learning_rate": 9.862235331002279e-05, "loss": 0.0149, "step": 7270 }, { "grad_norm": 0.41710764169692993, "learning_rate": 9.861592153929533e-05, "loss": 0.0134, "step": 7280 }, { "grad_norm": 0.45611754059791565, "learning_rate": 9.860947500033455e-05, "loss": 0.0163, "step": 7290 }, { "grad_norm": 0.39705395698547363, "learning_rate": 9.86030136950987e-05, "loss": 0.0159, "step": 7300 }, { "grad_norm": 0.412016361951828, "learning_rate": 9.85965376255506e-05, "loss": 0.0148, "step": 7310 }, { "grad_norm": 0.44000282883644104, "learning_rate": 9.859004679365747e-05, "loss": 0.0157, "step": 7320 }, { "grad_norm": 0.3972632884979248, "learning_rate": 9.858354120139108e-05, "loss": 0.0181, "step": 7330 }, { "grad_norm": 0.3845224380493164, "learning_rate": 9.857702085072764e-05, "loss": 0.0161, "step": 7340 }, { "grad_norm": 0.5182003974914551, "learning_rate": 9.857048574364787e-05, "loss": 0.0177, "step": 7350 }, { "grad_norm": 0.37718909978866577, "learning_rate": 9.856393588213698e-05, "loss": 0.0148, "step": 7360 }, { "grad_norm": 0.47215864062309265, "learning_rate": 9.855737126818458e-05, "loss": 0.0162, "step": 7370 }, { "grad_norm": 0.38670554757118225, "learning_rate": 9.855079190378491e-05, "loss": 0.016, "step": 7380 }, { "grad_norm": 0.4058266282081604, "learning_rate": 9.854419779093655e-05, "loss": 0.0165, "step": 7390 }, { "grad_norm": 0.38940101861953735, "learning_rate": 9.853758893164264e-05, "loss": 0.0152, "step": 7400 }, { "grad_norm": 0.41507646441459656, "learning_rate": 9.853096532791078e-05, "loss": 0.0178, "step": 7410 }, { "grad_norm": 0.3551158905029297, "learning_rate": 9.852432698175304e-05, "loss": 0.0151, "step": 7420 }, { "grad_norm": 0.4304772615432739, "learning_rate": 9.851767389518597e-05, "loss": 0.0224, "step": 7430 }, { "grad_norm": 0.39942145347595215, "learning_rate": 9.85110060702306e-05, "loss": 0.0183, "step": 7440 }, { "grad_norm": 0.376549631357193, "learning_rate": 9.850432350891245e-05, "loss": 0.0182, "step": 7450 }, { "grad_norm": 0.4297804534435272, "learning_rate": 9.84976262132615e-05, "loss": 0.0189, "step": 7460 }, { "grad_norm": 0.45385608077049255, "learning_rate": 9.849091418531222e-05, "loss": 0.0229, "step": 7470 }, { "grad_norm": 0.35529136657714844, "learning_rate": 9.848418742710353e-05, "loss": 0.0175, "step": 7480 }, { "grad_norm": 0.397545725107193, "learning_rate": 9.847744594067885e-05, "loss": 0.0182, "step": 7490 }, { "grad_norm": 0.3288343846797943, "learning_rate": 9.847068972808607e-05, "loss": 0.0189, "step": 7500 }, { "grad_norm": 0.3809896409511566, "learning_rate": 9.846391879137756e-05, "loss": 0.0149, "step": 7510 }, { "grad_norm": 0.3377668559551239, "learning_rate": 9.845713313261012e-05, "loss": 0.0144, "step": 7520 }, { "grad_norm": 0.3758298456668854, "learning_rate": 9.845033275384505e-05, "loss": 0.0161, "step": 7530 }, { "grad_norm": 0.3665297329425812, "learning_rate": 9.844351765714818e-05, "loss": 0.0157, "step": 7540 }, { "grad_norm": 0.3788328170776367, "learning_rate": 9.843668784458971e-05, "loss": 0.0154, "step": 7550 }, { "grad_norm": 0.5394845008850098, "learning_rate": 9.842984331824437e-05, "loss": 0.0169, "step": 7560 }, { "grad_norm": 0.31362563371658325, "learning_rate": 9.842298408019133e-05, "loss": 0.0146, "step": 7570 }, { "grad_norm": 0.32352402806282043, "learning_rate": 9.841611013251429e-05, "loss": 0.0178, "step": 7580 }, { "grad_norm": 0.35832497477531433, "learning_rate": 9.840922147730133e-05, "loss": 0.0127, "step": 7590 }, { "grad_norm": 0.41615843772888184, "learning_rate": 9.840231811664506e-05, "loss": 0.0139, "step": 7600 }, { "grad_norm": 0.3583919405937195, "learning_rate": 9.839540005264252e-05, "loss": 0.0155, "step": 7610 }, { "grad_norm": 0.45877712965011597, "learning_rate": 9.838846728739527e-05, "loss": 0.0174, "step": 7620 }, { "grad_norm": 0.4497396945953369, "learning_rate": 9.838151982300927e-05, "loss": 0.0158, "step": 7630 }, { "grad_norm": 0.4297942519187927, "learning_rate": 9.8374557661595e-05, "loss": 0.0219, "step": 7640 }, { "grad_norm": 0.4039759933948517, "learning_rate": 9.836758080526735e-05, "loss": 0.018, "step": 7650 }, { "grad_norm": 0.33790767192840576, "learning_rate": 9.836058925614575e-05, "loss": 0.0136, "step": 7660 }, { "grad_norm": 0.3268868029117584, "learning_rate": 9.8353583016354e-05, "loss": 0.0153, "step": 7670 }, { "grad_norm": 0.34531736373901367, "learning_rate": 9.834656208802044e-05, "loss": 0.0161, "step": 7680 }, { "grad_norm": 0.2970593571662903, "learning_rate": 9.833952647327784e-05, "loss": 0.0167, "step": 7690 }, { "grad_norm": 0.3330003619194031, "learning_rate": 9.833247617426342e-05, "loss": 0.0153, "step": 7700 }, { "grad_norm": 0.3993304669857025, "learning_rate": 9.832541119311889e-05, "loss": 0.0165, "step": 7710 }, { "grad_norm": 0.43474718928337097, "learning_rate": 9.83183315319904e-05, "loss": 0.013, "step": 7720 }, { "grad_norm": 0.3155459761619568, "learning_rate": 9.831123719302855e-05, "loss": 0.017, "step": 7730 }, { "grad_norm": 0.2562251687049866, "learning_rate": 9.830412817838842e-05, "loss": 0.0149, "step": 7740 }, { "grad_norm": 0.3912941515445709, "learning_rate": 9.829700449022956e-05, "loss": 0.0153, "step": 7750 }, { "grad_norm": 0.3397212624549866, "learning_rate": 9.828986613071593e-05, "loss": 0.0161, "step": 7760 }, { "grad_norm": 0.33656275272369385, "learning_rate": 9.828271310201601e-05, "loss": 0.0133, "step": 7770 }, { "grad_norm": 0.3815263509750366, "learning_rate": 9.827554540630268e-05, "loss": 0.0144, "step": 7780 }, { "grad_norm": 0.321825236082077, "learning_rate": 9.826836304575329e-05, "loss": 0.0136, "step": 7790 }, { "grad_norm": 0.40393421053886414, "learning_rate": 9.826116602254966e-05, "loss": 0.0176, "step": 7800 }, { "grad_norm": 0.3452446162700653, "learning_rate": 9.825395433887805e-05, "loss": 0.0187, "step": 7810 }, { "grad_norm": 0.29129523038864136, "learning_rate": 9.824672799692917e-05, "loss": 0.0149, "step": 7820 }, { "grad_norm": 0.34453850984573364, "learning_rate": 9.823948699889823e-05, "loss": 0.0144, "step": 7830 }, { "grad_norm": 0.4322754442691803, "learning_rate": 9.823223134698483e-05, "loss": 0.016, "step": 7840 }, { "grad_norm": 0.36726808547973633, "learning_rate": 9.822496104339303e-05, "loss": 0.0141, "step": 7850 }, { "grad_norm": 0.4384458661079407, "learning_rate": 9.821767609033138e-05, "loss": 0.0153, "step": 7860 }, { "grad_norm": 0.4254269003868103, "learning_rate": 9.821037649001284e-05, "loss": 0.0142, "step": 7870 }, { "grad_norm": 0.349990576505661, "learning_rate": 9.820306224465486e-05, "loss": 0.0169, "step": 7880 }, { "grad_norm": 0.3649872839450836, "learning_rate": 9.819573335647928e-05, "loss": 0.0145, "step": 7890 }, { "grad_norm": 0.2825605273246765, "learning_rate": 9.818838982771246e-05, "loss": 0.0167, "step": 7900 }, { "grad_norm": 0.4013703465461731, "learning_rate": 9.818103166058514e-05, "loss": 0.0184, "step": 7910 }, { "grad_norm": 0.3298392593860626, "learning_rate": 9.817365885733254e-05, "loss": 0.0143, "step": 7920 }, { "grad_norm": 0.38088521361351013, "learning_rate": 9.816627142019434e-05, "loss": 0.0161, "step": 7930 }, { "grad_norm": 0.4659990966320038, "learning_rate": 9.815886935141463e-05, "loss": 0.0183, "step": 7940 }, { "grad_norm": 0.3853849172592163, "learning_rate": 9.8151452653242e-05, "loss": 0.0135, "step": 7950 }, { "grad_norm": 0.37548625469207764, "learning_rate": 9.814402132792939e-05, "loss": 0.0158, "step": 7960 }, { "grad_norm": 0.31197187304496765, "learning_rate": 9.813657537773428e-05, "loss": 0.0161, "step": 7970 }, { "grad_norm": 0.32812777161598206, "learning_rate": 9.812911480491854e-05, "loss": 0.0165, "step": 7980 }, { "grad_norm": 0.3862552046775818, "learning_rate": 9.81216396117485e-05, "loss": 0.0136, "step": 7990 }, { "grad_norm": 0.38808831572532654, "learning_rate": 9.811414980049491e-05, "loss": 0.0139, "step": 8000 }, { "grad_norm": 0.32167288661003113, "learning_rate": 9.810664537343301e-05, "loss": 0.0152, "step": 8010 }, { "grad_norm": 0.44605064392089844, "learning_rate": 9.809912633284243e-05, "loss": 0.018, "step": 8020 }, { "grad_norm": 0.33501434326171875, "learning_rate": 9.809159268100725e-05, "loss": 0.0143, "step": 8030 }, { "grad_norm": 0.4212293028831482, "learning_rate": 9.808404442021599e-05, "loss": 0.0168, "step": 8040 }, { "grad_norm": 0.3646804392337799, "learning_rate": 9.807648155276163e-05, "loss": 0.0136, "step": 8050 }, { "grad_norm": 0.3829670250415802, "learning_rate": 9.806890408094156e-05, "loss": 0.0133, "step": 8060 }, { "grad_norm": 0.37975287437438965, "learning_rate": 9.806131200705761e-05, "loss": 0.0153, "step": 8070 }, { "grad_norm": 0.34802359342575073, "learning_rate": 9.805370533341605e-05, "loss": 0.016, "step": 8080 }, { "grad_norm": 0.34443968534469604, "learning_rate": 9.804608406232762e-05, "loss": 0.0134, "step": 8090 }, { "grad_norm": 0.34838855266571045, "learning_rate": 9.803844819610741e-05, "loss": 0.0143, "step": 8100 }, { "grad_norm": 0.3762608766555786, "learning_rate": 9.803079773707504e-05, "loss": 0.0194, "step": 8110 }, { "grad_norm": 0.3491004407405853, "learning_rate": 9.802313268755447e-05, "loss": 0.0168, "step": 8120 }, { "grad_norm": 0.3784998953342438, "learning_rate": 9.801545304987419e-05, "loss": 0.0159, "step": 8130 }, { "grad_norm": 0.4268767237663269, "learning_rate": 9.800775882636704e-05, "loss": 0.0174, "step": 8140 }, { "grad_norm": 0.4905880093574524, "learning_rate": 9.800005001937034e-05, "loss": 0.0162, "step": 8150 }, { "grad_norm": 0.36699673533439636, "learning_rate": 9.79923266312258e-05, "loss": 0.0164, "step": 8160 }, { "grad_norm": 0.39664509892463684, "learning_rate": 9.79845886642796e-05, "loss": 0.0156, "step": 8170 }, { "grad_norm": 0.44191649556159973, "learning_rate": 9.797683612088233e-05, "loss": 0.0167, "step": 8180 }, { "grad_norm": 0.346263587474823, "learning_rate": 9.796906900338898e-05, "loss": 0.0153, "step": 8190 }, { "grad_norm": 0.29614493250846863, "learning_rate": 9.796128731415903e-05, "loss": 0.0125, "step": 8200 }, { "grad_norm": 0.38059693574905396, "learning_rate": 9.795349105555634e-05, "loss": 0.0137, "step": 8210 }, { "grad_norm": 0.3214937746524811, "learning_rate": 9.794568022994922e-05, "loss": 0.0126, "step": 8220 }, { "grad_norm": 0.348738431930542, "learning_rate": 9.793785483971034e-05, "loss": 0.014, "step": 8230 }, { "grad_norm": 0.3860212564468384, "learning_rate": 9.793001488721691e-05, "loss": 0.0138, "step": 8240 }, { "grad_norm": 0.31617406010627747, "learning_rate": 9.792216037485047e-05, "loss": 0.0139, "step": 8250 }, { "grad_norm": 0.4328280985355377, "learning_rate": 9.791429130499704e-05, "loss": 0.0146, "step": 8260 }, { "grad_norm": 0.4389854371547699, "learning_rate": 9.790640768004698e-05, "loss": 0.0168, "step": 8270 }, { "grad_norm": 0.474202960729599, "learning_rate": 9.789850950239518e-05, "loss": 0.0165, "step": 8280 }, { "grad_norm": 0.39522701501846313, "learning_rate": 9.789059677444089e-05, "loss": 0.0154, "step": 8290 }, { "grad_norm": 0.3074120581150055, "learning_rate": 9.788266949858776e-05, "loss": 0.0136, "step": 8300 }, { "grad_norm": 0.35952499508857727, "learning_rate": 9.787472767724392e-05, "loss": 0.016, "step": 8310 }, { "grad_norm": 0.3444002568721771, "learning_rate": 9.786677131282185e-05, "loss": 0.0143, "step": 8320 }, { "grad_norm": 0.32378682494163513, "learning_rate": 9.785880040773853e-05, "loss": 0.0138, "step": 8330 }, { "grad_norm": 0.39044997096061707, "learning_rate": 9.785081496441527e-05, "loss": 0.0144, "step": 8340 }, { "grad_norm": 0.3554976284503937, "learning_rate": 9.784281498527785e-05, "loss": 0.0148, "step": 8350 }, { "grad_norm": 0.30649831891059875, "learning_rate": 9.783480047275646e-05, "loss": 0.0143, "step": 8360 }, { "grad_norm": 0.4501311480998993, "learning_rate": 9.78267714292857e-05, "loss": 0.0154, "step": 8370 }, { "grad_norm": 0.3144357204437256, "learning_rate": 9.781872785730454e-05, "loss": 0.0163, "step": 8380 }, { "grad_norm": 0.383786141872406, "learning_rate": 9.781066975925646e-05, "loss": 0.0144, "step": 8390 }, { "grad_norm": 0.34242933988571167, "learning_rate": 9.780259713758928e-05, "loss": 0.0135, "step": 8400 }, { "grad_norm": 0.43082892894744873, "learning_rate": 9.779450999475524e-05, "loss": 0.014, "step": 8410 }, { "grad_norm": 0.3656945526599884, "learning_rate": 9.7786408333211e-05, "loss": 0.0146, "step": 8420 }, { "grad_norm": 0.39802682399749756, "learning_rate": 9.777829215541764e-05, "loss": 0.0126, "step": 8430 }, { "grad_norm": 0.29535219073295593, "learning_rate": 9.777016146384064e-05, "loss": 0.0151, "step": 8440 }, { "grad_norm": 0.2743058502674103, "learning_rate": 9.776201626094988e-05, "loss": 0.0151, "step": 8450 }, { "grad_norm": 0.47285139560699463, "learning_rate": 9.775385654921965e-05, "loss": 0.0156, "step": 8460 }, { "grad_norm": 0.5085629820823669, "learning_rate": 9.774568233112868e-05, "loss": 0.0243, "step": 8470 }, { "grad_norm": 0.3698887526988983, "learning_rate": 9.773749360916007e-05, "loss": 0.0202, "step": 8480 }, { "grad_norm": 0.35737812519073486, "learning_rate": 9.772929038580134e-05, "loss": 0.0173, "step": 8490 }, { "grad_norm": 0.3153495192527771, "learning_rate": 9.772107266354439e-05, "loss": 0.0175, "step": 8500 }, { "grad_norm": 0.3515191972255707, "learning_rate": 9.77128404448856e-05, "loss": 0.0161, "step": 8510 }, { "grad_norm": 0.3558012843132019, "learning_rate": 9.770459373232565e-05, "loss": 0.0166, "step": 8520 }, { "grad_norm": 0.30575254559516907, "learning_rate": 9.769633252836969e-05, "loss": 0.0188, "step": 8530 }, { "grad_norm": 0.4248638153076172, "learning_rate": 9.768805683552724e-05, "loss": 0.0177, "step": 8540 }, { "grad_norm": 0.4025588631629944, "learning_rate": 9.767976665631228e-05, "loss": 0.0161, "step": 8550 }, { "grad_norm": 0.4045548737049103, "learning_rate": 9.767146199324311e-05, "loss": 0.0166, "step": 8560 }, { "grad_norm": 0.41473588347435, "learning_rate": 9.766314284884249e-05, "loss": 0.0133, "step": 8570 }, { "grad_norm": 0.352109432220459, "learning_rate": 9.765480922563752e-05, "loss": 0.016, "step": 8580 }, { "grad_norm": 0.4061742424964905, "learning_rate": 9.764646112615978e-05, "loss": 0.0158, "step": 8590 }, { "grad_norm": 0.3103123903274536, "learning_rate": 9.763809855294517e-05, "loss": 0.0144, "step": 8600 }, { "grad_norm": 0.3941231369972229, "learning_rate": 9.762972150853404e-05, "loss": 0.0155, "step": 8610 }, { "grad_norm": 0.4432227611541748, "learning_rate": 9.762132999547111e-05, "loss": 0.0146, "step": 8620 }, { "grad_norm": 0.3516373336315155, "learning_rate": 9.761292401630549e-05, "loss": 0.0131, "step": 8630 }, { "grad_norm": 0.37528103590011597, "learning_rate": 9.76045035735907e-05, "loss": 0.0143, "step": 8640 }, { "grad_norm": 0.3847804665565491, "learning_rate": 9.759606866988464e-05, "loss": 0.0161, "step": 8650 }, { "grad_norm": 0.31890586018562317, "learning_rate": 9.758761930774963e-05, "loss": 0.0158, "step": 8660 }, { "grad_norm": 0.3136662542819977, "learning_rate": 9.757915548975235e-05, "loss": 0.0163, "step": 8670 }, { "grad_norm": 0.31251639127731323, "learning_rate": 9.757067721846389e-05, "loss": 0.0154, "step": 8680 }, { "grad_norm": 0.4165763556957245, "learning_rate": 9.756218449645971e-05, "loss": 0.0151, "step": 8690 }, { "grad_norm": 0.41660186648368835, "learning_rate": 9.75536773263197e-05, "loss": 0.014, "step": 8700 }, { "grad_norm": 0.3443741798400879, "learning_rate": 9.75451557106281e-05, "loss": 0.015, "step": 8710 }, { "grad_norm": 0.3316461741924286, "learning_rate": 9.753661965197354e-05, "loss": 0.0123, "step": 8720 }, { "grad_norm": 0.4706900417804718, "learning_rate": 9.752806915294908e-05, "loss": 0.0167, "step": 8730 }, { "grad_norm": 0.32706180214881897, "learning_rate": 9.75195042161521e-05, "loss": 0.0131, "step": 8740 }, { "grad_norm": 0.3663170039653778, "learning_rate": 9.751092484418442e-05, "loss": 0.0176, "step": 8750 }, { "grad_norm": 0.3475320041179657, "learning_rate": 9.750233103965224e-05, "loss": 0.0131, "step": 8760 }, { "grad_norm": 0.3173498511314392, "learning_rate": 9.749372280516611e-05, "loss": 0.0123, "step": 8770 }, { "grad_norm": 0.3239175081253052, "learning_rate": 9.748510014334097e-05, "loss": 0.0131, "step": 8780 }, { "grad_norm": 0.39269745349884033, "learning_rate": 9.747646305679621e-05, "loss": 0.0122, "step": 8790 }, { "grad_norm": 0.3620828092098236, "learning_rate": 9.74678115481555e-05, "loss": 0.0162, "step": 8800 }, { "grad_norm": 0.42276638746261597, "learning_rate": 9.745914562004696e-05, "loss": 0.0148, "step": 8810 }, { "grad_norm": 0.40569648146629333, "learning_rate": 9.745046527510307e-05, "loss": 0.0157, "step": 8820 }, { "grad_norm": 0.3451126515865326, "learning_rate": 9.744177051596068e-05, "loss": 0.0188, "step": 8830 }, { "grad_norm": 0.37246832251548767, "learning_rate": 9.743306134526105e-05, "loss": 0.0178, "step": 8840 }, { "grad_norm": 0.3463205099105835, "learning_rate": 9.742433776564977e-05, "loss": 0.0181, "step": 8850 }, { "grad_norm": 0.39877772331237793, "learning_rate": 9.741559977977683e-05, "loss": 0.0165, "step": 8860 }, { "grad_norm": 0.26505008339881897, "learning_rate": 9.740684739029661e-05, "loss": 0.0167, "step": 8870 }, { "grad_norm": 0.36500832438468933, "learning_rate": 9.739808059986789e-05, "loss": 0.0142, "step": 8880 }, { "grad_norm": 0.3650977313518524, "learning_rate": 9.738929941115373e-05, "loss": 0.0163, "step": 8890 }, { "grad_norm": 0.29584237933158875, "learning_rate": 9.738050382682167e-05, "loss": 0.0117, "step": 8900 }, { "grad_norm": 0.40130600333213806, "learning_rate": 9.737169384954355e-05, "loss": 0.0149, "step": 8910 }, { "grad_norm": 0.36694860458374023, "learning_rate": 9.736286948199562e-05, "loss": 0.0143, "step": 8920 }, { "grad_norm": 0.29328790307044983, "learning_rate": 9.735403072685848e-05, "loss": 0.0149, "step": 8930 }, { "grad_norm": 0.360105961561203, "learning_rate": 9.734517758681712e-05, "loss": 0.0133, "step": 8940 }, { "grad_norm": 0.3908972442150116, "learning_rate": 9.733631006456088e-05, "loss": 0.0157, "step": 8950 }, { "grad_norm": 0.32673004269599915, "learning_rate": 9.732742816278348e-05, "loss": 0.0168, "step": 8960 }, { "grad_norm": 0.389112263917923, "learning_rate": 9.731853188418302e-05, "loss": 0.0146, "step": 8970 }, { "grad_norm": 0.38246992230415344, "learning_rate": 9.730962123146194e-05, "loss": 0.0158, "step": 8980 }, { "grad_norm": 0.3654584288597107, "learning_rate": 9.730069620732709e-05, "loss": 0.016, "step": 8990 }, { "grad_norm": 0.33035150170326233, "learning_rate": 9.72917568144896e-05, "loss": 0.0196, "step": 9000 }, { "grad_norm": 0.31670403480529785, "learning_rate": 9.728280305566509e-05, "loss": 0.0152, "step": 9010 }, { "grad_norm": 0.28820958733558655, "learning_rate": 9.727383493357343e-05, "loss": 0.0123, "step": 9020 }, { "grad_norm": 0.3736238479614258, "learning_rate": 9.726485245093891e-05, "loss": 0.0174, "step": 9030 }, { "grad_norm": 0.3337687849998474, "learning_rate": 9.725585561049018e-05, "loss": 0.0174, "step": 9040 }, { "grad_norm": 0.4167155921459198, "learning_rate": 9.724684441496022e-05, "loss": 0.0136, "step": 9050 }, { "grad_norm": 0.28661665320396423, "learning_rate": 9.72378188670864e-05, "loss": 0.0146, "step": 9060 }, { "grad_norm": 0.3665269911289215, "learning_rate": 9.722877896961047e-05, "loss": 0.0155, "step": 9070 }, { "grad_norm": 0.2954253852367401, "learning_rate": 9.721972472527848e-05, "loss": 0.0156, "step": 9080 }, { "grad_norm": 0.3786759078502655, "learning_rate": 9.721065613684089e-05, "loss": 0.015, "step": 9090 }, { "grad_norm": 0.33991774916648865, "learning_rate": 9.72015732070525e-05, "loss": 0.0151, "step": 9100 }, { "grad_norm": 0.37635087966918945, "learning_rate": 9.719247593867244e-05, "loss": 0.0155, "step": 9110 }, { "grad_norm": 0.31850841641426086, "learning_rate": 9.718336433446423e-05, "loss": 0.016, "step": 9120 }, { "grad_norm": 0.4509117901325226, "learning_rate": 9.717423839719574e-05, "loss": 0.013, "step": 9130 }, { "grad_norm": 0.26356270909309387, "learning_rate": 9.71650981296392e-05, "loss": 0.0154, "step": 9140 }, { "grad_norm": 0.3148396909236908, "learning_rate": 9.715594353457118e-05, "loss": 0.0131, "step": 9150 }, { "grad_norm": 0.39046502113342285, "learning_rate": 9.714677461477257e-05, "loss": 0.0144, "step": 9160 }, { "grad_norm": 0.33206799626350403, "learning_rate": 9.713759137302869e-05, "loss": 0.0158, "step": 9170 }, { "grad_norm": 0.3918338716030121, "learning_rate": 9.712839381212914e-05, "loss": 0.0135, "step": 9180 }, { "grad_norm": 0.3739299774169922, "learning_rate": 9.71191819348679e-05, "loss": 0.0153, "step": 9190 }, { "grad_norm": 0.3122602105140686, "learning_rate": 9.710995574404331e-05, "loss": 0.016, "step": 9200 }, { "grad_norm": 0.3587058186531067, "learning_rate": 9.710071524245802e-05, "loss": 0.0163, "step": 9210 }, { "grad_norm": 0.27872487902641296, "learning_rate": 9.709146043291906e-05, "loss": 0.0172, "step": 9220 }, { "grad_norm": 0.3250398635864258, "learning_rate": 9.70821913182378e-05, "loss": 0.0144, "step": 9230 }, { "grad_norm": 0.30500882863998413, "learning_rate": 9.707290790122995e-05, "loss": 0.0159, "step": 9240 }, { "grad_norm": 0.21951811015605927, "learning_rate": 9.706361018471557e-05, "loss": 0.0136, "step": 9250 }, { "grad_norm": 0.39244839549064636, "learning_rate": 9.705429817151906e-05, "loss": 0.0134, "step": 9260 }, { "grad_norm": 0.29822587966918945, "learning_rate": 9.704497186446917e-05, "loss": 0.0137, "step": 9270 }, { "grad_norm": 0.40413886308670044, "learning_rate": 9.703563126639896e-05, "loss": 0.0148, "step": 9280 }, { "grad_norm": 0.3283483684062958, "learning_rate": 9.70262763801459e-05, "loss": 0.0138, "step": 9290 }, { "grad_norm": 0.2845418155193329, "learning_rate": 9.701690720855171e-05, "loss": 0.0157, "step": 9300 }, { "grad_norm": 0.3722706735134125, "learning_rate": 9.700752375446253e-05, "loss": 0.0131, "step": 9310 }, { "grad_norm": 0.26517462730407715, "learning_rate": 9.69981260207288e-05, "loss": 0.013, "step": 9320 }, { "grad_norm": 0.3415532112121582, "learning_rate": 9.698871401020529e-05, "loss": 0.0127, "step": 9330 }, { "grad_norm": 0.36040860414505005, "learning_rate": 9.697928772575112e-05, "loss": 0.0132, "step": 9340 }, { "grad_norm": 0.3559826612472534, "learning_rate": 9.696984717022976e-05, "loss": 0.0145, "step": 9350 }, { "grad_norm": 0.3137611746788025, "learning_rate": 9.6960392346509e-05, "loss": 0.014, "step": 9360 }, { "grad_norm": 0.2644113302230835, "learning_rate": 9.695092325746097e-05, "loss": 0.0111, "step": 9370 }, { "grad_norm": 0.3597872257232666, "learning_rate": 9.694143990596211e-05, "loss": 0.0137, "step": 9380 }, { "grad_norm": 0.2509511411190033, "learning_rate": 9.693194229489325e-05, "loss": 0.0115, "step": 9390 }, { "grad_norm": 0.3125064969062805, "learning_rate": 9.692243042713944e-05, "loss": 0.0138, "step": 9400 }, { "grad_norm": 0.2905729413032532, "learning_rate": 9.691290430559022e-05, "loss": 0.011, "step": 9410 }, { "grad_norm": 0.32909095287323, "learning_rate": 9.690336393313932e-05, "loss": 0.014, "step": 9420 }, { "grad_norm": 0.3342820703983307, "learning_rate": 9.689380931268487e-05, "loss": 0.0155, "step": 9430 }, { "grad_norm": 0.2983195185661316, "learning_rate": 9.688424044712932e-05, "loss": 0.0136, "step": 9440 }, { "grad_norm": 0.2790825068950653, "learning_rate": 9.687465733937942e-05, "loss": 0.0153, "step": 9450 }, { "grad_norm": 0.38151609897613525, "learning_rate": 9.686505999234627e-05, "loss": 0.0123, "step": 9460 }, { "grad_norm": 0.33219847083091736, "learning_rate": 9.685544840894529e-05, "loss": 0.0132, "step": 9470 }, { "grad_norm": 0.27068987488746643, "learning_rate": 9.684582259209624e-05, "loss": 0.0151, "step": 9480 }, { "grad_norm": 0.32508429884910583, "learning_rate": 9.683618254472317e-05, "loss": 0.0137, "step": 9490 }, { "grad_norm": 0.4124397039413452, "learning_rate": 9.682652826975449e-05, "loss": 0.0134, "step": 9500 }, { "grad_norm": 0.30208060145378113, "learning_rate": 9.681685977012291e-05, "loss": 0.0154, "step": 9510 }, { "grad_norm": 0.31354883313179016, "learning_rate": 9.680717704876546e-05, "loss": 0.0118, "step": 9520 }, { "grad_norm": 0.318999320268631, "learning_rate": 9.679748010862349e-05, "loss": 0.01, "step": 9530 }, { "grad_norm": 0.284979909658432, "learning_rate": 9.678776895264267e-05, "loss": 0.0128, "step": 9540 }, { "grad_norm": 0.32563382387161255, "learning_rate": 9.6778043583773e-05, "loss": 0.0163, "step": 9550 }, { "grad_norm": 0.34569570422172546, "learning_rate": 9.67683040049688e-05, "loss": 0.0153, "step": 9560 }, { "grad_norm": 0.3157880902290344, "learning_rate": 9.675855021918869e-05, "loss": 0.0134, "step": 9570 }, { "grad_norm": 0.3187633156776428, "learning_rate": 9.674878222939561e-05, "loss": 0.0165, "step": 9580 }, { "grad_norm": 0.33771705627441406, "learning_rate": 9.673900003855681e-05, "loss": 0.017, "step": 9590 }, { "grad_norm": 0.2888439893722534, "learning_rate": 9.672920364964389e-05, "loss": 0.0127, "step": 9600 }, { "grad_norm": 0.3588801622390747, "learning_rate": 9.671939306563269e-05, "loss": 0.015, "step": 9610 }, { "grad_norm": 0.3220963478088379, "learning_rate": 9.670956828950345e-05, "loss": 0.0119, "step": 9620 }, { "grad_norm": 0.3182002604007721, "learning_rate": 9.669972932424065e-05, "loss": 0.0127, "step": 9630 }, { "grad_norm": 0.362656831741333, "learning_rate": 9.668987617283312e-05, "loss": 0.0161, "step": 9640 }, { "grad_norm": 0.3145112991333008, "learning_rate": 9.668000883827397e-05, "loss": 0.0158, "step": 9650 }, { "grad_norm": 0.26826515793800354, "learning_rate": 9.667012732356067e-05, "loss": 0.0147, "step": 9660 }, { "grad_norm": 0.33713433146476746, "learning_rate": 9.666023163169493e-05, "loss": 0.0109, "step": 9670 }, { "grad_norm": 0.29669779539108276, "learning_rate": 9.665032176568281e-05, "loss": 0.0108, "step": 9680 }, { "grad_norm": 0.3480658233165741, "learning_rate": 9.664039772853469e-05, "loss": 0.0152, "step": 9690 }, { "grad_norm": 0.395332396030426, "learning_rate": 9.663045952326518e-05, "loss": 0.0148, "step": 9700 }, { "grad_norm": 0.40417957305908203, "learning_rate": 9.662050715289328e-05, "loss": 0.0142, "step": 9710 }, { "grad_norm": 0.3749469220638275, "learning_rate": 9.661054062044226e-05, "loss": 0.0143, "step": 9720 }, { "grad_norm": 0.33351844549179077, "learning_rate": 9.660055992893968e-05, "loss": 0.0127, "step": 9730 }, { "grad_norm": 0.3418681025505066, "learning_rate": 9.659056508141739e-05, "loss": 0.0135, "step": 9740 }, { "grad_norm": 0.47097650170326233, "learning_rate": 9.658055608091161e-05, "loss": 0.0163, "step": 9750 }, { "grad_norm": 0.4399387240409851, "learning_rate": 9.657053293046276e-05, "loss": 0.0149, "step": 9760 }, { "grad_norm": 0.39624348282814026, "learning_rate": 9.656049563311564e-05, "loss": 0.0126, "step": 9770 }, { "grad_norm": 0.3995211720466614, "learning_rate": 9.655044419191929e-05, "loss": 0.0117, "step": 9780 }, { "grad_norm": 0.38814017176628113, "learning_rate": 9.654037860992711e-05, "loss": 0.0133, "step": 9790 }, { "grad_norm": 0.2618493139743805, "learning_rate": 9.653029889019672e-05, "loss": 0.0136, "step": 9800 }, { "grad_norm": 0.3224608898162842, "learning_rate": 9.65202050357901e-05, "loss": 0.0199, "step": 9810 }, { "grad_norm": 0.43517857789993286, "learning_rate": 9.651009704977347e-05, "loss": 0.0166, "step": 9820 }, { "grad_norm": 0.3874225318431854, "learning_rate": 9.649997493521738e-05, "loss": 0.0178, "step": 9830 }, { "grad_norm": 0.27843573689460754, "learning_rate": 9.64898386951967e-05, "loss": 0.0171, "step": 9840 }, { "grad_norm": 0.40193015336990356, "learning_rate": 9.647968833279049e-05, "loss": 0.0163, "step": 9850 }, { "grad_norm": 0.2597414553165436, "learning_rate": 9.646952385108218e-05, "loss": 0.0148, "step": 9860 }, { "grad_norm": 0.27404460310935974, "learning_rate": 9.645934525315951e-05, "loss": 0.0115, "step": 9870 }, { "grad_norm": 0.3651256561279297, "learning_rate": 9.644915254211442e-05, "loss": 0.0121, "step": 9880 }, { "grad_norm": 0.3848916292190552, "learning_rate": 9.643894572104321e-05, "loss": 0.0121, "step": 9890 }, { "grad_norm": 0.3190523386001587, "learning_rate": 9.642872479304644e-05, "loss": 0.0126, "step": 9900 }, { "grad_norm": 0.3862152397632599, "learning_rate": 9.641848976122895e-05, "loss": 0.0145, "step": 9910 }, { "grad_norm": 0.3423781096935272, "learning_rate": 9.64082406286999e-05, "loss": 0.0137, "step": 9920 }, { "grad_norm": 0.35022684931755066, "learning_rate": 9.639797739857269e-05, "loss": 0.0145, "step": 9930 }, { "grad_norm": 0.33175429701805115, "learning_rate": 9.638770007396498e-05, "loss": 0.011, "step": 9940 }, { "grad_norm": 0.35739460587501526, "learning_rate": 9.63774086579988e-05, "loss": 0.0172, "step": 9950 }, { "grad_norm": 0.3348420262336731, "learning_rate": 9.63671031538004e-05, "loss": 0.0133, "step": 9960 }, { "grad_norm": 0.327800452709198, "learning_rate": 9.635678356450031e-05, "loss": 0.0128, "step": 9970 }, { "grad_norm": 0.2548326253890991, "learning_rate": 9.634644989323336e-05, "loss": 0.0109, "step": 9980 }, { "grad_norm": 0.36686092615127563, "learning_rate": 9.633610214313861e-05, "loss": 0.0131, "step": 9990 }, { "grad_norm": 0.37838801741600037, "learning_rate": 9.632574031735951e-05, "loss": 0.0137, "step": 10000 }, { "grad_norm": 0.26175743341445923, "learning_rate": 9.631536441904364e-05, "loss": 0.011, "step": 10010 }, { "grad_norm": 0.3280612826347351, "learning_rate": 9.630497445134293e-05, "loss": 0.0134, "step": 10020 }, { "grad_norm": 0.2718747854232788, "learning_rate": 9.62945704174136e-05, "loss": 0.0141, "step": 10030 }, { "grad_norm": 0.22997650504112244, "learning_rate": 9.628415232041612e-05, "loss": 0.0121, "step": 10040 }, { "grad_norm": 0.2900289297103882, "learning_rate": 9.627372016351524e-05, "loss": 0.0126, "step": 10050 }, { "grad_norm": 0.27054861187934875, "learning_rate": 9.626327394987995e-05, "loss": 0.0116, "step": 10060 }, { "grad_norm": 0.3369966447353363, "learning_rate": 9.625281368268355e-05, "loss": 0.0121, "step": 10070 }, { "grad_norm": 0.2885831594467163, "learning_rate": 9.624233936510357e-05, "loss": 0.0185, "step": 10080 }, { "grad_norm": 0.34082889556884766, "learning_rate": 9.623185100032187e-05, "loss": 0.0128, "step": 10090 }, { "grad_norm": 0.38311946392059326, "learning_rate": 9.62213485915245e-05, "loss": 0.0133, "step": 10100 }, { "grad_norm": 0.3015669584274292, "learning_rate": 9.621083214190186e-05, "loss": 0.0106, "step": 10110 }, { "grad_norm": 0.2607632577419281, "learning_rate": 9.62003016546485e-05, "loss": 0.0122, "step": 10120 }, { "grad_norm": 0.3680003881454468, "learning_rate": 9.618975713296339e-05, "loss": 0.0147, "step": 10130 }, { "grad_norm": 0.4181278347969055, "learning_rate": 9.61791985800496e-05, "loss": 0.0115, "step": 10140 }, { "grad_norm": 0.365619033575058, "learning_rate": 9.616862599911458e-05, "loss": 0.0118, "step": 10150 }, { "grad_norm": 0.3310202658176422, "learning_rate": 9.615803939337e-05, "loss": 0.013, "step": 10160 }, { "grad_norm": 0.31007471680641174, "learning_rate": 9.614743876603178e-05, "loss": 0.0142, "step": 10170 }, { "grad_norm": 0.269034206867218, "learning_rate": 9.613682412032013e-05, "loss": 0.0143, "step": 10180 }, { "grad_norm": 0.34003397822380066, "learning_rate": 9.612619545945947e-05, "loss": 0.0131, "step": 10190 }, { "grad_norm": 0.2762850821018219, "learning_rate": 9.611555278667852e-05, "loss": 0.0128, "step": 10200 }, { "grad_norm": 0.30333468317985535, "learning_rate": 9.610489610521024e-05, "loss": 0.016, "step": 10210 }, { "grad_norm": 0.3439791202545166, "learning_rate": 9.609422541829187e-05, "loss": 0.0168, "step": 10220 }, { "grad_norm": 0.3174102306365967, "learning_rate": 9.608354072916486e-05, "loss": 0.0137, "step": 10230 }, { "grad_norm": 0.26099175214767456, "learning_rate": 9.607284204107493e-05, "loss": 0.0118, "step": 10240 }, { "grad_norm": 0.3374234735965729, "learning_rate": 9.606212935727208e-05, "loss": 0.0113, "step": 10250 }, { "grad_norm": 0.3489407002925873, "learning_rate": 9.605140268101052e-05, "loss": 0.0162, "step": 10260 }, { "grad_norm": 0.3476567566394806, "learning_rate": 9.604066201554875e-05, "loss": 0.0149, "step": 10270 }, { "grad_norm": 0.3224858343601227, "learning_rate": 9.60299073641495e-05, "loss": 0.0156, "step": 10280 }, { "grad_norm": 0.2694963216781616, "learning_rate": 9.601913873007974e-05, "loss": 0.0126, "step": 10290 }, { "grad_norm": 0.3785710036754608, "learning_rate": 9.60083561166107e-05, "loss": 0.0129, "step": 10300 }, { "grad_norm": 0.44782793521881104, "learning_rate": 9.599755952701783e-05, "loss": 0.0125, "step": 10310 }, { "grad_norm": 0.2835095226764679, "learning_rate": 9.598674896458089e-05, "loss": 0.012, "step": 10320 }, { "grad_norm": 0.2620943784713745, "learning_rate": 9.597592443258383e-05, "loss": 0.0139, "step": 10330 }, { "grad_norm": 0.22041644155979156, "learning_rate": 9.596508593431483e-05, "loss": 0.0116, "step": 10340 }, { "grad_norm": 0.4375811815261841, "learning_rate": 9.59542334730664e-05, "loss": 0.0167, "step": 10350 }, { "grad_norm": 0.4061785936355591, "learning_rate": 9.594336705213516e-05, "loss": 0.0155, "step": 10360 }, { "grad_norm": 0.26017096638679504, "learning_rate": 9.593248667482208e-05, "loss": 0.0136, "step": 10370 }, { "grad_norm": 0.3989296853542328, "learning_rate": 9.592159234443233e-05, "loss": 0.0123, "step": 10380 }, { "grad_norm": 0.35283365845680237, "learning_rate": 9.59106840642753e-05, "loss": 0.0124, "step": 10390 }, { "grad_norm": 0.32362040877342224, "learning_rate": 9.589976183766467e-05, "loss": 0.0125, "step": 10400 }, { "grad_norm": 0.33966749906539917, "learning_rate": 9.58888256679183e-05, "loss": 0.0124, "step": 10410 }, { "grad_norm": 0.22488029301166534, "learning_rate": 9.587787555835832e-05, "loss": 0.01, "step": 10420 }, { "grad_norm": 0.2843591570854187, "learning_rate": 9.586691151231107e-05, "loss": 0.0132, "step": 10430 }, { "grad_norm": 0.3061405122280121, "learning_rate": 9.585593353310715e-05, "loss": 0.0121, "step": 10440 }, { "grad_norm": 0.2765599191188812, "learning_rate": 9.58449416240814e-05, "loss": 0.0116, "step": 10450 }, { "grad_norm": 0.3570658266544342, "learning_rate": 9.583393578857283e-05, "loss": 0.0136, "step": 10460 }, { "grad_norm": 0.3393988609313965, "learning_rate": 9.582291602992474e-05, "loss": 0.0132, "step": 10470 }, { "grad_norm": 0.2755260765552521, "learning_rate": 9.581188235148466e-05, "loss": 0.0126, "step": 10480 }, { "grad_norm": 0.3000558614730835, "learning_rate": 9.58008347566043e-05, "loss": 0.0127, "step": 10490 }, { "grad_norm": 0.34652918577194214, "learning_rate": 9.578977324863965e-05, "loss": 0.0135, "step": 10500 }, { "grad_norm": 0.30220451951026917, "learning_rate": 9.577869783095089e-05, "loss": 0.0141, "step": 10510 }, { "grad_norm": 0.38388505578041077, "learning_rate": 9.576760850690245e-05, "loss": 0.0148, "step": 10520 }, { "grad_norm": 0.3771129846572876, "learning_rate": 9.575650527986298e-05, "loss": 0.0142, "step": 10530 }, { "grad_norm": 0.3310950696468353, "learning_rate": 9.574538815320531e-05, "loss": 0.015, "step": 10540 }, { "grad_norm": 0.40225401520729065, "learning_rate": 9.573425713030656e-05, "loss": 0.0151, "step": 10550 }, { "grad_norm": 0.27139759063720703, "learning_rate": 9.572311221454806e-05, "loss": 0.0133, "step": 10560 }, { "grad_norm": 0.4283144772052765, "learning_rate": 9.57119534093153e-05, "loss": 0.0149, "step": 10570 }, { "grad_norm": 0.275490939617157, "learning_rate": 9.570078071799806e-05, "loss": 0.0121, "step": 10580 }, { "grad_norm": 0.318310409784317, "learning_rate": 9.568959414399028e-05, "loss": 0.0144, "step": 10590 }, { "grad_norm": 0.3069532811641693, "learning_rate": 9.567839369069018e-05, "loss": 0.0137, "step": 10600 }, { "grad_norm": 0.2492484301328659, "learning_rate": 9.566717936150013e-05, "loss": 0.0156, "step": 10610 }, { "grad_norm": 0.33000829815864563, "learning_rate": 9.565595115982678e-05, "loss": 0.0175, "step": 10620 }, { "grad_norm": 0.34279122948646545, "learning_rate": 9.564470908908094e-05, "loss": 0.0132, "step": 10630 }, { "grad_norm": 0.37935033440589905, "learning_rate": 9.563345315267764e-05, "loss": 0.0154, "step": 10640 }, { "grad_norm": 0.24085286259651184, "learning_rate": 9.562218335403616e-05, "loss": 0.012, "step": 10650 }, { "grad_norm": 0.37656792998313904, "learning_rate": 9.561089969657999e-05, "loss": 0.0131, "step": 10660 }, { "grad_norm": 0.2326240986585617, "learning_rate": 9.559960218373673e-05, "loss": 0.0113, "step": 10670 }, { "grad_norm": 0.4000895321369171, "learning_rate": 9.558829081893836e-05, "loss": 0.01, "step": 10680 }, { "grad_norm": 0.35430994629859924, "learning_rate": 9.55769656056209e-05, "loss": 0.0121, "step": 10690 }, { "grad_norm": 0.30428677797317505, "learning_rate": 9.556562654722469e-05, "loss": 0.0133, "step": 10700 }, { "grad_norm": 0.25132450461387634, "learning_rate": 9.555427364719422e-05, "loss": 0.011, "step": 10710 }, { "grad_norm": 0.4662877321243286, "learning_rate": 9.55429069089782e-05, "loss": 0.0146, "step": 10720 }, { "grad_norm": 0.29919013381004333, "learning_rate": 9.553152633602956e-05, "loss": 0.014, "step": 10730 }, { "grad_norm": 0.32089173793792725, "learning_rate": 9.552013193180543e-05, "loss": 0.0123, "step": 10740 }, { "grad_norm": 0.3637930154800415, "learning_rate": 9.550872369976707e-05, "loss": 0.0118, "step": 10750 }, { "grad_norm": 0.2593134939670563, "learning_rate": 9.549730164338007e-05, "loss": 0.0125, "step": 10760 }, { "grad_norm": 0.3442279100418091, "learning_rate": 9.548586576611408e-05, "loss": 0.0141, "step": 10770 }, { "grad_norm": 0.32184478640556335, "learning_rate": 9.54744160714431e-05, "loss": 0.0119, "step": 10780 }, { "grad_norm": 0.29462918639183044, "learning_rate": 9.546295256284516e-05, "loss": 0.0165, "step": 10790 }, { "grad_norm": 0.3887004852294922, "learning_rate": 9.545147524380265e-05, "loss": 0.0142, "step": 10800 }, { "grad_norm": 0.3077283501625061, "learning_rate": 9.543998411780201e-05, "loss": 0.0107, "step": 10810 }, { "grad_norm": 0.2644650340080261, "learning_rate": 9.542847918833397e-05, "loss": 0.013, "step": 10820 }, { "grad_norm": 0.2867148220539093, "learning_rate": 9.541696045889343e-05, "loss": 0.0137, "step": 10830 }, { "grad_norm": 0.3509559631347656, "learning_rate": 9.540542793297947e-05, "loss": 0.0129, "step": 10840 }, { "grad_norm": 0.26982375979423523, "learning_rate": 9.539388161409537e-05, "loss": 0.0119, "step": 10850 }, { "grad_norm": 0.26333704590797424, "learning_rate": 9.538232150574857e-05, "loss": 0.0142, "step": 10860 }, { "grad_norm": 0.3465039134025574, "learning_rate": 9.537074761145076e-05, "loss": 0.014, "step": 10870 }, { "grad_norm": 0.27450138330459595, "learning_rate": 9.535915993471778e-05, "loss": 0.0119, "step": 10880 }, { "grad_norm": 0.3773563504219055, "learning_rate": 9.534755847906964e-05, "loss": 0.014, "step": 10890 }, { "grad_norm": 0.30463284254074097, "learning_rate": 9.533594324803057e-05, "loss": 0.0116, "step": 10900 }, { "grad_norm": 0.3722027540206909, "learning_rate": 9.532431424512895e-05, "loss": 0.0127, "step": 10910 }, { "grad_norm": 0.28425273299217224, "learning_rate": 9.531267147389741e-05, "loss": 0.0134, "step": 10920 }, { "grad_norm": 0.30973801016807556, "learning_rate": 9.530101493787266e-05, "loss": 0.0132, "step": 10930 }, { "grad_norm": 0.3881785571575165, "learning_rate": 9.528934464059571e-05, "loss": 0.0139, "step": 10940 }, { "grad_norm": 0.29282981157302856, "learning_rate": 9.527766058561163e-05, "loss": 0.0109, "step": 10950 }, { "grad_norm": 0.3170948624610901, "learning_rate": 9.526596277646976e-05, "loss": 0.0113, "step": 10960 }, { "grad_norm": 0.2517901062965393, "learning_rate": 9.525425121672358e-05, "loss": 0.0115, "step": 10970 }, { "grad_norm": 0.32891780138015747, "learning_rate": 9.524252590993074e-05, "loss": 0.0135, "step": 10980 }, { "grad_norm": 0.2312023788690567, "learning_rate": 9.523078685965309e-05, "loss": 0.0136, "step": 10990 }, { "grad_norm": 0.3014102280139923, "learning_rate": 9.521903406945664e-05, "loss": 0.0128, "step": 11000 }, { "grad_norm": 0.3667161166667938, "learning_rate": 9.520726754291158e-05, "loss": 0.011, "step": 11010 }, { "grad_norm": 0.320538729429245, "learning_rate": 9.519548728359227e-05, "loss": 0.0111, "step": 11020 }, { "grad_norm": 0.31149187684059143, "learning_rate": 9.518369329507726e-05, "loss": 0.0122, "step": 11030 }, { "grad_norm": 0.3280191421508789, "learning_rate": 9.51718855809492e-05, "loss": 0.0112, "step": 11040 }, { "grad_norm": 0.4067302942276001, "learning_rate": 9.516006414479502e-05, "loss": 0.0129, "step": 11050 }, { "grad_norm": 0.23629716038703918, "learning_rate": 9.514822899020572e-05, "loss": 0.0125, "step": 11060 }, { "grad_norm": 0.27616390585899353, "learning_rate": 9.513638012077654e-05, "loss": 0.0124, "step": 11070 }, { "grad_norm": 0.3262661099433899, "learning_rate": 9.512451754010683e-05, "loss": 0.0133, "step": 11080 }, { "grad_norm": 0.28653451800346375, "learning_rate": 9.511264125180013e-05, "loss": 0.0121, "step": 11090 }, { "grad_norm": 0.31544029712677, "learning_rate": 9.510075125946414e-05, "loss": 0.0119, "step": 11100 }, { "grad_norm": 0.24397487938404083, "learning_rate": 9.508884756671075e-05, "loss": 0.011, "step": 11110 }, { "grad_norm": 0.27550360560417175, "learning_rate": 9.507693017715596e-05, "loss": 0.0124, "step": 11120 }, { "grad_norm": 0.2947140038013458, "learning_rate": 9.506499909441997e-05, "loss": 0.012, "step": 11130 }, { "grad_norm": 0.33004024624824524, "learning_rate": 9.505305432212713e-05, "loss": 0.0141, "step": 11140 }, { "grad_norm": 0.3657475709915161, "learning_rate": 9.504109586390595e-05, "loss": 0.0151, "step": 11150 }, { "grad_norm": 0.2962097227573395, "learning_rate": 9.502912372338908e-05, "loss": 0.0104, "step": 11160 }, { "grad_norm": 0.2767464816570282, "learning_rate": 9.501713790421335e-05, "loss": 0.0105, "step": 11170 }, { "grad_norm": 0.3294580578804016, "learning_rate": 9.500513841001974e-05, "loss": 0.0148, "step": 11180 }, { "grad_norm": 0.29677534103393555, "learning_rate": 9.499312524445336e-05, "loss": 0.0125, "step": 11190 }, { "grad_norm": 0.2728707194328308, "learning_rate": 9.498109841116351e-05, "loss": 0.0125, "step": 11200 }, { "grad_norm": 0.3406853973865509, "learning_rate": 9.496905791380363e-05, "loss": 0.0145, "step": 11210 }, { "grad_norm": 0.3211401402950287, "learning_rate": 9.495700375603129e-05, "loss": 0.0117, "step": 11220 }, { "grad_norm": 0.2678249180316925, "learning_rate": 9.494493594150822e-05, "loss": 0.0126, "step": 11230 }, { "grad_norm": 0.24417801201343536, "learning_rate": 9.493285447390032e-05, "loss": 0.011, "step": 11240 }, { "grad_norm": 0.24807077646255493, "learning_rate": 9.492075935687761e-05, "loss": 0.0124, "step": 11250 }, { "grad_norm": 0.37638401985168457, "learning_rate": 9.490865059411427e-05, "loss": 0.0149, "step": 11260 }, { "grad_norm": 0.41031622886657715, "learning_rate": 9.489652818928863e-05, "loss": 0.018, "step": 11270 }, { "grad_norm": 0.26087990403175354, "learning_rate": 9.488439214608315e-05, "loss": 0.0128, "step": 11280 }, { "grad_norm": 0.3037140965461731, "learning_rate": 9.487224246818444e-05, "loss": 0.0106, "step": 11290 }, { "grad_norm": 0.2749677896499634, "learning_rate": 9.486007915928325e-05, "loss": 0.0112, "step": 11300 }, { "grad_norm": 0.3401210904121399, "learning_rate": 9.484790222307448e-05, "loss": 0.0142, "step": 11310 }, { "grad_norm": 0.3041849732398987, "learning_rate": 9.483571166325716e-05, "loss": 0.0101, "step": 11320 }, { "grad_norm": 0.35622477531433105, "learning_rate": 9.482350748353444e-05, "loss": 0.0131, "step": 11330 }, { "grad_norm": 0.3240088224411011, "learning_rate": 9.481128968761363e-05, "loss": 0.0143, "step": 11340 }, { "grad_norm": 0.2930913269519806, "learning_rate": 9.479905827920621e-05, "loss": 0.0145, "step": 11350 }, { "grad_norm": 0.31744933128356934, "learning_rate": 9.478681326202773e-05, "loss": 0.0111, "step": 11360 }, { "grad_norm": 0.31036609411239624, "learning_rate": 9.477455463979791e-05, "loss": 0.0117, "step": 11370 }, { "grad_norm": 0.3229518532752991, "learning_rate": 9.476228241624059e-05, "loss": 0.0142, "step": 11380 }, { "grad_norm": 0.2793971300125122, "learning_rate": 9.474999659508374e-05, "loss": 0.0131, "step": 11390 }, { "grad_norm": 0.2960239350795746, "learning_rate": 9.47376971800595e-05, "loss": 0.0125, "step": 11400 }, { "grad_norm": 0.3085460364818573, "learning_rate": 9.472538417490409e-05, "loss": 0.0139, "step": 11410 }, { "grad_norm": 0.3967900276184082, "learning_rate": 9.471305758335784e-05, "loss": 0.0148, "step": 11420 }, { "grad_norm": 0.3073781430721283, "learning_rate": 9.47007174091653e-05, "loss": 0.0147, "step": 11430 }, { "grad_norm": 0.40549206733703613, "learning_rate": 9.468836365607507e-05, "loss": 0.0143, "step": 11440 }, { "grad_norm": 0.3226219713687897, "learning_rate": 9.467599632783988e-05, "loss": 0.0124, "step": 11450 }, { "grad_norm": 0.3155043125152588, "learning_rate": 9.466361542821662e-05, "loss": 0.0126, "step": 11460 }, { "grad_norm": 0.22298204898834229, "learning_rate": 9.465122096096625e-05, "loss": 0.0137, "step": 11470 }, { "grad_norm": 0.27996012568473816, "learning_rate": 9.463881292985391e-05, "loss": 0.0126, "step": 11480 }, { "grad_norm": 0.27538031339645386, "learning_rate": 9.462639133864881e-05, "loss": 0.0115, "step": 11490 }, { "grad_norm": 0.26578009128570557, "learning_rate": 9.461395619112432e-05, "loss": 0.015, "step": 11500 }, { "grad_norm": 0.3194080591201782, "learning_rate": 9.460150749105791e-05, "loss": 0.0126, "step": 11510 }, { "grad_norm": 0.3216540813446045, "learning_rate": 9.458904524223116e-05, "loss": 0.0136, "step": 11520 }, { "grad_norm": 0.3181364834308624, "learning_rate": 9.457656944842976e-05, "loss": 0.0131, "step": 11530 }, { "grad_norm": 0.3125889301300049, "learning_rate": 9.456408011344353e-05, "loss": 0.0144, "step": 11540 }, { "grad_norm": 0.35436922311782837, "learning_rate": 9.455157724106643e-05, "loss": 0.0105, "step": 11550 }, { "grad_norm": 0.3434251546859741, "learning_rate": 9.453906083509647e-05, "loss": 0.0144, "step": 11560 }, { "grad_norm": 0.3820211887359619, "learning_rate": 9.45265308993358e-05, "loss": 0.0142, "step": 11570 }, { "grad_norm": 0.2705078721046448, "learning_rate": 9.451398743759071e-05, "loss": 0.0113, "step": 11580 }, { "grad_norm": 0.2262042909860611, "learning_rate": 9.450143045367156e-05, "loss": 0.0101, "step": 11590 }, { "grad_norm": 0.21458709239959717, "learning_rate": 9.448885995139283e-05, "loss": 0.0094, "step": 11600 }, { "grad_norm": 0.31339216232299805, "learning_rate": 9.44762759345731e-05, "loss": 0.0118, "step": 11610 }, { "grad_norm": 0.27186083793640137, "learning_rate": 9.446367840703509e-05, "loss": 0.014, "step": 11620 }, { "grad_norm": 0.23883971571922302, "learning_rate": 9.445106737260556e-05, "loss": 0.0108, "step": 11630 }, { "grad_norm": 0.30917060375213623, "learning_rate": 9.443844283511543e-05, "loss": 0.0116, "step": 11640 }, { "grad_norm": 0.3272685408592224, "learning_rate": 9.442580479839968e-05, "loss": 0.0127, "step": 11650 }, { "grad_norm": 0.2757956385612488, "learning_rate": 9.441315326629745e-05, "loss": 0.0122, "step": 11660 }, { "grad_norm": 0.2818768322467804, "learning_rate": 9.44004882426519e-05, "loss": 0.01, "step": 11670 }, { "grad_norm": 0.32806840538978577, "learning_rate": 9.438780973131037e-05, "loss": 0.0095, "step": 11680 }, { "grad_norm": 0.33475878834724426, "learning_rate": 9.437511773612423e-05, "loss": 0.0148, "step": 11690 }, { "grad_norm": 0.3011960983276367, "learning_rate": 9.436241226094896e-05, "loss": 0.0133, "step": 11700 }, { "grad_norm": 0.3173636794090271, "learning_rate": 9.434969330964418e-05, "loss": 0.0116, "step": 11710 }, { "grad_norm": 0.3196856379508972, "learning_rate": 9.433696088607356e-05, "loss": 0.0133, "step": 11720 }, { "grad_norm": 0.29220321774482727, "learning_rate": 9.432421499410486e-05, "loss": 0.0092, "step": 11730 }, { "grad_norm": 0.32554659247398376, "learning_rate": 9.431145563760998e-05, "loss": 0.0108, "step": 11740 }, { "grad_norm": 0.31929922103881836, "learning_rate": 9.429868282046484e-05, "loss": 0.0114, "step": 11750 }, { "grad_norm": 0.2933848798274994, "learning_rate": 9.428589654654951e-05, "loss": 0.0125, "step": 11760 }, { "grad_norm": 0.3677593469619751, "learning_rate": 9.42730968197481e-05, "loss": 0.0127, "step": 11770 }, { "grad_norm": 0.24362000823020935, "learning_rate": 9.426028364394883e-05, "loss": 0.0148, "step": 11780 }, { "grad_norm": 0.26775676012039185, "learning_rate": 9.424745702304402e-05, "loss": 0.0112, "step": 11790 }, { "grad_norm": 0.3327542841434479, "learning_rate": 9.423461696093006e-05, "loss": 0.0152, "step": 11800 }, { "grad_norm": 0.26998060941696167, "learning_rate": 9.422176346150741e-05, "loss": 0.0115, "step": 11810 }, { "grad_norm": 0.34707435965538025, "learning_rate": 9.420889652868063e-05, "loss": 0.014, "step": 11820 }, { "grad_norm": 0.38913461565971375, "learning_rate": 9.419601616635836e-05, "loss": 0.012, "step": 11830 }, { "grad_norm": 0.3392130434513092, "learning_rate": 9.418312237845331e-05, "loss": 0.0114, "step": 11840 }, { "grad_norm": 0.21947836875915527, "learning_rate": 9.417021516888225e-05, "loss": 0.0137, "step": 11850 }, { "grad_norm": 0.2975698709487915, "learning_rate": 9.415729454156608e-05, "loss": 0.014, "step": 11860 }, { "grad_norm": 0.2511023283004761, "learning_rate": 9.414436050042973e-05, "loss": 0.0148, "step": 11870 }, { "grad_norm": 0.41581928730010986, "learning_rate": 9.413141304940223e-05, "loss": 0.0119, "step": 11880 }, { "grad_norm": 0.22800247371196747, "learning_rate": 9.411845219241666e-05, "loss": 0.0119, "step": 11890 }, { "grad_norm": 0.3093186020851135, "learning_rate": 9.410547793341021e-05, "loss": 0.0178, "step": 11900 }, { "grad_norm": 0.35192447900772095, "learning_rate": 9.409249027632408e-05, "loss": 0.0115, "step": 11910 }, { "grad_norm": 0.30905112624168396, "learning_rate": 9.407948922510362e-05, "loss": 0.0117, "step": 11920 }, { "grad_norm": 0.3030983507633209, "learning_rate": 9.406647478369817e-05, "loss": 0.0123, "step": 11930 }, { "grad_norm": 0.3390377461910248, "learning_rate": 9.405344695606118e-05, "loss": 0.0111, "step": 11940 }, { "grad_norm": 0.3951464593410492, "learning_rate": 9.404040574615018e-05, "loss": 0.0158, "step": 11950 }, { "grad_norm": 0.2875381112098694, "learning_rate": 9.402735115792674e-05, "loss": 0.0141, "step": 11960 }, { "grad_norm": 0.26205626130104065, "learning_rate": 9.401428319535649e-05, "loss": 0.0116, "step": 11970 }, { "grad_norm": 0.28623369336128235, "learning_rate": 9.400120186240912e-05, "loss": 0.0157, "step": 11980 }, { "grad_norm": 0.3012058436870575, "learning_rate": 9.398810716305844e-05, "loss": 0.0127, "step": 11990 }, { "grad_norm": 0.27840206027030945, "learning_rate": 9.397499910128222e-05, "loss": 0.0137, "step": 12000 }, { "grad_norm": 0.2904963493347168, "learning_rate": 9.396187768106237e-05, "loss": 0.0162, "step": 12010 }, { "grad_norm": 0.36414679884910583, "learning_rate": 9.394874290638482e-05, "loss": 0.0151, "step": 12020 }, { "grad_norm": 0.29719632863998413, "learning_rate": 9.393559478123959e-05, "loss": 0.0152, "step": 12030 }, { "grad_norm": 0.29279398918151855, "learning_rate": 9.39224333096207e-05, "loss": 0.0147, "step": 12040 }, { "grad_norm": 0.2706153988838196, "learning_rate": 9.390925849552629e-05, "loss": 0.0109, "step": 12050 }, { "grad_norm": 0.25963762402534485, "learning_rate": 9.389607034295849e-05, "loss": 0.0115, "step": 12060 }, { "grad_norm": 0.266626238822937, "learning_rate": 9.388286885592355e-05, "loss": 0.0124, "step": 12070 }, { "grad_norm": 0.2981499135494232, "learning_rate": 9.386965403843168e-05, "loss": 0.0117, "step": 12080 }, { "grad_norm": 0.37225407361984253, "learning_rate": 9.385642589449726e-05, "loss": 0.011, "step": 12090 }, { "grad_norm": 0.2745845317840576, "learning_rate": 9.38431844281386e-05, "loss": 0.0129, "step": 12100 }, { "grad_norm": 0.32825857400894165, "learning_rate": 9.38299296433781e-05, "loss": 0.0113, "step": 12110 }, { "grad_norm": 0.31450754404067993, "learning_rate": 9.381666154424226e-05, "loss": 0.0136, "step": 12120 }, { "grad_norm": 0.3225879669189453, "learning_rate": 9.380338013476157e-05, "loss": 0.0116, "step": 12130 }, { "grad_norm": 0.2857224643230438, "learning_rate": 9.379008541897054e-05, "loss": 0.0098, "step": 12140 }, { "grad_norm": 0.31626152992248535, "learning_rate": 9.377677740090777e-05, "loss": 0.0114, "step": 12150 }, { "grad_norm": 0.30775129795074463, "learning_rate": 9.376345608461588e-05, "loss": 0.0132, "step": 12160 }, { "grad_norm": 0.3363003134727478, "learning_rate": 9.375012147414155e-05, "loss": 0.0129, "step": 12170 }, { "grad_norm": 0.29895225167274475, "learning_rate": 9.373677357353545e-05, "loss": 0.0103, "step": 12180 }, { "grad_norm": 0.28134334087371826, "learning_rate": 9.372341238685237e-05, "loss": 0.0107, "step": 12190 }, { "grad_norm": 0.3287898004055023, "learning_rate": 9.371003791815102e-05, "loss": 0.0141, "step": 12200 }, { "grad_norm": 0.28676196932792664, "learning_rate": 9.369665017149429e-05, "loss": 0.0105, "step": 12210 }, { "grad_norm": 0.335525780916214, "learning_rate": 9.368324915094895e-05, "loss": 0.0126, "step": 12220 }, { "grad_norm": 0.31452497839927673, "learning_rate": 9.366983486058591e-05, "loss": 0.0112, "step": 12230 }, { "grad_norm": 0.4237510561943054, "learning_rate": 9.365640730448009e-05, "loss": 0.0124, "step": 12240 }, { "grad_norm": 0.2449006289243698, "learning_rate": 9.36429664867104e-05, "loss": 0.0139, "step": 12250 }, { "grad_norm": 0.31632760167121887, "learning_rate": 9.362951241135982e-05, "loss": 0.0129, "step": 12260 }, { "grad_norm": 0.29561933875083923, "learning_rate": 9.361604508251534e-05, "loss": 0.0153, "step": 12270 }, { "grad_norm": 0.3216542601585388, "learning_rate": 9.360256450426799e-05, "loss": 0.0125, "step": 12280 }, { "grad_norm": 0.26470035314559937, "learning_rate": 9.358907068071279e-05, "loss": 0.0115, "step": 12290 }, { "grad_norm": 0.2767382264137268, "learning_rate": 9.357556361594882e-05, "loss": 0.0123, "step": 12300 }, { "grad_norm": 0.3290117084980011, "learning_rate": 9.356204331407917e-05, "loss": 0.0124, "step": 12310 }, { "grad_norm": 0.3501572906970978, "learning_rate": 9.354850977921094e-05, "loss": 0.0102, "step": 12320 }, { "grad_norm": 0.2946989834308624, "learning_rate": 9.353496301545529e-05, "loss": 0.012, "step": 12330 }, { "grad_norm": 0.24019166827201843, "learning_rate": 9.352140302692733e-05, "loss": 0.015, "step": 12340 }, { "grad_norm": 0.23021461069583893, "learning_rate": 9.350782981774627e-05, "loss": 0.0116, "step": 12350 }, { "grad_norm": 0.344180703163147, "learning_rate": 9.349424339203526e-05, "loss": 0.0155, "step": 12360 }, { "grad_norm": 0.30660107731819153, "learning_rate": 9.34806437539215e-05, "loss": 0.013, "step": 12370 }, { "grad_norm": 0.3166130483150482, "learning_rate": 9.346703090753622e-05, "loss": 0.0129, "step": 12380 }, { "grad_norm": 0.2784366309642792, "learning_rate": 9.345340485701461e-05, "loss": 0.0112, "step": 12390 }, { "grad_norm": 0.3697854280471802, "learning_rate": 9.343976560649595e-05, "loss": 0.0146, "step": 12400 }, { "grad_norm": 0.28481167554855347, "learning_rate": 9.342611316012344e-05, "loss": 0.0116, "step": 12410 }, { "grad_norm": 0.3195859491825104, "learning_rate": 9.341244752204437e-05, "loss": 0.0108, "step": 12420 }, { "grad_norm": 0.2889546751976013, "learning_rate": 9.339876869640995e-05, "loss": 0.0098, "step": 12430 }, { "grad_norm": 0.3227902054786682, "learning_rate": 9.33850766873755e-05, "loss": 0.0116, "step": 12440 }, { "grad_norm": 0.3246162235736847, "learning_rate": 9.337137149910028e-05, "loss": 0.0179, "step": 12450 }, { "grad_norm": 0.350818395614624, "learning_rate": 9.335765313574753e-05, "loss": 0.0116, "step": 12460 }, { "grad_norm": 0.29043349623680115, "learning_rate": 9.334392160148457e-05, "loss": 0.0154, "step": 12470 }, { "grad_norm": 0.31587493419647217, "learning_rate": 9.333017690048264e-05, "loss": 0.0109, "step": 12480 }, { "grad_norm": 0.29283198714256287, "learning_rate": 9.331641903691706e-05, "loss": 0.0107, "step": 12490 }, { "grad_norm": 0.24044612050056458, "learning_rate": 9.330264801496707e-05, "loss": 0.0102, "step": 12500 }, { "grad_norm": 0.2525232434272766, "learning_rate": 9.328886383881594e-05, "loss": 0.0103, "step": 12510 }, { "grad_norm": 0.28715670108795166, "learning_rate": 9.327506651265095e-05, "loss": 0.0116, "step": 12520 }, { "grad_norm": 0.314951092004776, "learning_rate": 9.326125604066338e-05, "loss": 0.0137, "step": 12530 }, { "grad_norm": 0.3211721181869507, "learning_rate": 9.324743242704847e-05, "loss": 0.0109, "step": 12540 }, { "grad_norm": 0.2860586941242218, "learning_rate": 9.323359567600546e-05, "loss": 0.0112, "step": 12550 }, { "grad_norm": 0.24017620086669922, "learning_rate": 9.321974579173761e-05, "loss": 0.0099, "step": 12560 }, { "grad_norm": 0.3212856948375702, "learning_rate": 9.320588277845213e-05, "loss": 0.0108, "step": 12570 }, { "grad_norm": 0.30985161662101746, "learning_rate": 9.319200664036026e-05, "loss": 0.01, "step": 12580 }, { "grad_norm": 0.3210581839084625, "learning_rate": 9.31781173816772e-05, "loss": 0.0101, "step": 12590 }, { "grad_norm": 0.2976618707180023, "learning_rate": 9.316421500662212e-05, "loss": 0.0114, "step": 12600 }, { "grad_norm": 0.26481467485427856, "learning_rate": 9.31502995194182e-05, "loss": 0.0108, "step": 12610 }, { "grad_norm": 0.2732757031917572, "learning_rate": 9.31363709242926e-05, "loss": 0.0123, "step": 12620 }, { "grad_norm": 0.44056597352027893, "learning_rate": 9.312242922547647e-05, "loss": 0.0108, "step": 12630 }, { "grad_norm": 0.33957093954086304, "learning_rate": 9.310847442720492e-05, "loss": 0.0125, "step": 12640 }, { "grad_norm": 0.30947208404541016, "learning_rate": 9.309450653371706e-05, "loss": 0.0112, "step": 12650 }, { "grad_norm": 0.3137624263763428, "learning_rate": 9.308052554925595e-05, "loss": 0.0091, "step": 12660 }, { "grad_norm": 0.2852679491043091, "learning_rate": 9.306653147806867e-05, "loss": 0.0094, "step": 12670 }, { "grad_norm": 0.3253926634788513, "learning_rate": 9.305252432440622e-05, "loss": 0.0117, "step": 12680 }, { "grad_norm": 0.30271226167678833, "learning_rate": 9.303850409252361e-05, "loss": 0.0121, "step": 12690 }, { "grad_norm": 0.277969628572464, "learning_rate": 9.302447078667985e-05, "loss": 0.0103, "step": 12700 }, { "grad_norm": 0.2837568521499634, "learning_rate": 9.301042441113783e-05, "loss": 0.0096, "step": 12710 }, { "grad_norm": 0.2578808665275574, "learning_rate": 9.299636497016451e-05, "loss": 0.0113, "step": 12720 }, { "grad_norm": 0.25955691933631897, "learning_rate": 9.298229246803076e-05, "loss": 0.01, "step": 12730 }, { "grad_norm": 0.26568764448165894, "learning_rate": 9.296820690901144e-05, "loss": 0.0107, "step": 12740 }, { "grad_norm": 0.2533382475376129, "learning_rate": 9.295410829738539e-05, "loss": 0.0112, "step": 12750 }, { "grad_norm": 0.24403776228427887, "learning_rate": 9.293999663743535e-05, "loss": 0.0108, "step": 12760 }, { "grad_norm": 0.26153847575187683, "learning_rate": 9.292587193344813e-05, "loss": 0.0113, "step": 12770 }, { "grad_norm": 0.24276554584503174, "learning_rate": 9.291173418971437e-05, "loss": 0.0131, "step": 12780 }, { "grad_norm": 0.23707716166973114, "learning_rate": 9.28975834105288e-05, "loss": 0.0093, "step": 12790 }, { "grad_norm": 0.2535558044910431, "learning_rate": 9.288341960019004e-05, "loss": 0.0112, "step": 12800 }, { "grad_norm": 0.3209000527858734, "learning_rate": 9.286924276300067e-05, "loss": 0.0094, "step": 12810 }, { "grad_norm": 0.30806979537010193, "learning_rate": 9.285505290326726e-05, "loss": 0.0118, "step": 12820 }, { "grad_norm": 0.2941007614135742, "learning_rate": 9.284085002530027e-05, "loss": 0.0118, "step": 12830 }, { "grad_norm": 0.2706282138824463, "learning_rate": 9.282663413341422e-05, "loss": 0.0109, "step": 12840 }, { "grad_norm": 0.2256108820438385, "learning_rate": 9.281240523192747e-05, "loss": 0.0104, "step": 12850 }, { "grad_norm": 0.3792514204978943, "learning_rate": 9.279816332516242e-05, "loss": 0.012, "step": 12860 }, { "grad_norm": 0.23446276783943176, "learning_rate": 9.278390841744536e-05, "loss": 0.0101, "step": 12870 }, { "grad_norm": 0.25093555450439453, "learning_rate": 9.276964051310658e-05, "loss": 0.0097, "step": 12880 }, { "grad_norm": 0.23392321169376373, "learning_rate": 9.275535961648027e-05, "loss": 0.011, "step": 12890 }, { "grad_norm": 0.2726021707057953, "learning_rate": 9.274106573190459e-05, "loss": 0.0112, "step": 12900 }, { "grad_norm": 0.3146917521953583, "learning_rate": 9.272675886372168e-05, "loss": 0.009, "step": 12910 }, { "grad_norm": 0.29945164918899536, "learning_rate": 9.271243901627754e-05, "loss": 0.0101, "step": 12920 }, { "grad_norm": 0.3533570468425751, "learning_rate": 9.269810619392219e-05, "loss": 0.0115, "step": 12930 }, { "grad_norm": 0.25372767448425293, "learning_rate": 9.268376040100955e-05, "loss": 0.0119, "step": 12940 }, { "grad_norm": 0.21627280116081238, "learning_rate": 9.266940164189752e-05, "loss": 0.01, "step": 12950 }, { "grad_norm": 0.2865859866142273, "learning_rate": 9.265502992094787e-05, "loss": 0.0113, "step": 12960 }, { "grad_norm": 0.26772934198379517, "learning_rate": 9.264064524252638e-05, "loss": 0.0099, "step": 12970 }, { "grad_norm": 0.27988114953041077, "learning_rate": 9.262624761100271e-05, "loss": 0.0111, "step": 12980 }, { "grad_norm": 0.3677988350391388, "learning_rate": 9.261183703075051e-05, "loss": 0.0157, "step": 12990 }, { "grad_norm": 0.2572009861469269, "learning_rate": 9.259741350614733e-05, "loss": 0.0104, "step": 13000 }, { "grad_norm": 0.31377890706062317, "learning_rate": 9.258297704157464e-05, "loss": 0.0123, "step": 13010 }, { "grad_norm": 0.2724866569042206, "learning_rate": 9.256852764141786e-05, "loss": 0.0112, "step": 13020 }, { "grad_norm": 0.3324366509914398, "learning_rate": 9.255406531006634e-05, "loss": 0.0115, "step": 13030 }, { "grad_norm": 0.3288393020629883, "learning_rate": 9.253959005191335e-05, "loss": 0.0158, "step": 13040 }, { "grad_norm": 0.35578760504722595, "learning_rate": 9.25251018713561e-05, "loss": 0.0132, "step": 13050 }, { "grad_norm": 0.3423280417919159, "learning_rate": 9.251060077279571e-05, "loss": 0.0108, "step": 13060 }, { "grad_norm": 0.3130364418029785, "learning_rate": 9.249608676063724e-05, "loss": 0.0103, "step": 13070 }, { "grad_norm": 0.3021275997161865, "learning_rate": 9.248155983928964e-05, "loss": 0.0101, "step": 13080 }, { "grad_norm": 0.2814575731754303, "learning_rate": 9.246702001316583e-05, "loss": 0.011, "step": 13090 }, { "grad_norm": 0.25663796067237854, "learning_rate": 9.245246728668262e-05, "loss": 0.0086, "step": 13100 }, { "grad_norm": 0.2873521149158478, "learning_rate": 9.243790166426073e-05, "loss": 0.0126, "step": 13110 }, { "grad_norm": 0.21159593760967255, "learning_rate": 9.242332315032484e-05, "loss": 0.0092, "step": 13120 }, { "grad_norm": 0.3208015263080597, "learning_rate": 9.240873174930349e-05, "loss": 0.0132, "step": 13130 }, { "grad_norm": 0.32282617688179016, "learning_rate": 9.239412746562917e-05, "loss": 0.0122, "step": 13140 }, { "grad_norm": 0.35617560148239136, "learning_rate": 9.237951030373828e-05, "loss": 0.0131, "step": 13150 }, { "grad_norm": 0.26838645339012146, "learning_rate": 9.236488026807113e-05, "loss": 0.011, "step": 13160 }, { "grad_norm": 0.30789637565612793, "learning_rate": 9.235023736307193e-05, "loss": 0.0134, "step": 13170 }, { "grad_norm": 0.2878201901912689, "learning_rate": 9.233558159318881e-05, "loss": 0.0114, "step": 13180 }, { "grad_norm": 0.27371957898139954, "learning_rate": 9.232091296287382e-05, "loss": 0.0127, "step": 13190 }, { "grad_norm": 0.26467692852020264, "learning_rate": 9.230623147658288e-05, "loss": 0.0119, "step": 13200 }, { "grad_norm": 0.2501579821109772, "learning_rate": 9.229153713877586e-05, "loss": 0.0106, "step": 13210 }, { "grad_norm": 0.25455471873283386, "learning_rate": 9.227682995391649e-05, "loss": 0.014, "step": 13220 }, { "grad_norm": 0.3140377998352051, "learning_rate": 9.226210992647243e-05, "loss": 0.0139, "step": 13230 }, { "grad_norm": 0.29039689898490906, "learning_rate": 9.224737706091525e-05, "loss": 0.0145, "step": 13240 }, { "grad_norm": 0.2891133725643158, "learning_rate": 9.223263136172039e-05, "loss": 0.0112, "step": 13250 }, { "grad_norm": 0.3550475537776947, "learning_rate": 9.22178728333672e-05, "loss": 0.0106, "step": 13260 }, { "grad_norm": 0.24973979592323303, "learning_rate": 9.220310148033897e-05, "loss": 0.0097, "step": 13270 }, { "grad_norm": 0.34137365221977234, "learning_rate": 9.21883173071228e-05, "loss": 0.011, "step": 13280 }, { "grad_norm": 0.3092743456363678, "learning_rate": 9.217352031820976e-05, "loss": 0.0089, "step": 13290 }, { "grad_norm": 0.3239731192588806, "learning_rate": 9.215871051809477e-05, "loss": 0.0118, "step": 13300 }, { "grad_norm": 0.30375275015830994, "learning_rate": 9.214388791127666e-05, "loss": 0.0107, "step": 13310 }, { "grad_norm": 0.22032277286052704, "learning_rate": 9.212905250225814e-05, "loss": 0.0113, "step": 13320 }, { "grad_norm": 0.25254762172698975, "learning_rate": 9.211420429554583e-05, "loss": 0.0116, "step": 13330 }, { "grad_norm": 0.23423565924167633, "learning_rate": 9.209934329565022e-05, "loss": 0.0101, "step": 13340 }, { "grad_norm": 0.23686887323856354, "learning_rate": 9.208446950708568e-05, "loss": 0.0099, "step": 13350 }, { "grad_norm": 0.21654759347438812, "learning_rate": 9.20695829343705e-05, "loss": 0.0101, "step": 13360 }, { "grad_norm": 0.3012162446975708, "learning_rate": 9.205468358202678e-05, "loss": 0.0107, "step": 13370 }, { "grad_norm": 0.3501008450984955, "learning_rate": 9.203977145458059e-05, "loss": 0.0099, "step": 13380 }, { "grad_norm": 0.24046842753887177, "learning_rate": 9.202484655656182e-05, "loss": 0.0085, "step": 13390 }, { "grad_norm": 0.28026625514030457, "learning_rate": 9.200990889250427e-05, "loss": 0.01, "step": 13400 }, { "grad_norm": 0.30334681272506714, "learning_rate": 9.19949584669456e-05, "loss": 0.0101, "step": 13410 }, { "grad_norm": 0.27469584345817566, "learning_rate": 9.197999528442738e-05, "loss": 0.0093, "step": 13420 }, { "grad_norm": 0.2630292475223541, "learning_rate": 9.196501934949499e-05, "loss": 0.012, "step": 13430 }, { "grad_norm": 0.3277732729911804, "learning_rate": 9.195003066669776e-05, "loss": 0.0137, "step": 13440 }, { "grad_norm": 0.2155776172876358, "learning_rate": 9.193502924058884e-05, "loss": 0.0123, "step": 13450 }, { "grad_norm": 0.33467185497283936, "learning_rate": 9.192001507572526e-05, "loss": 0.0126, "step": 13460 }, { "grad_norm": 0.33216392993927, "learning_rate": 9.190498817666793e-05, "loss": 0.0158, "step": 13470 }, { "grad_norm": 0.30800798535346985, "learning_rate": 9.188994854798163e-05, "loss": 0.0108, "step": 13480 }, { "grad_norm": 0.2823765277862549, "learning_rate": 9.187489619423499e-05, "loss": 0.0097, "step": 13490 }, { "grad_norm": 0.2572925388813019, "learning_rate": 9.185983112000056e-05, "loss": 0.0095, "step": 13500 }, { "grad_norm": 0.2934921085834503, "learning_rate": 9.184475332985464e-05, "loss": 0.0119, "step": 13510 }, { "grad_norm": 0.2932271957397461, "learning_rate": 9.182966282837754e-05, "loss": 0.0098, "step": 13520 }, { "grad_norm": 0.3143003582954407, "learning_rate": 9.18145596201533e-05, "loss": 0.0099, "step": 13530 }, { "grad_norm": 0.2762475907802582, "learning_rate": 9.179944370976991e-05, "loss": 0.009, "step": 13540 }, { "grad_norm": 0.3207020163536072, "learning_rate": 9.178431510181918e-05, "loss": 0.0106, "step": 13550 }, { "grad_norm": 0.3313432037830353, "learning_rate": 9.176917380089675e-05, "loss": 0.014, "step": 13560 }, { "grad_norm": 0.33052942156791687, "learning_rate": 9.175401981160219e-05, "loss": 0.011, "step": 13570 }, { "grad_norm": 0.20004190504550934, "learning_rate": 9.173885313853885e-05, "loss": 0.0095, "step": 13580 }, { "grad_norm": 0.30103132128715515, "learning_rate": 9.172367378631398e-05, "loss": 0.0102, "step": 13590 }, { "grad_norm": 0.27073389291763306, "learning_rate": 9.170848175953866e-05, "loss": 0.0085, "step": 13600 }, { "grad_norm": 0.23978844285011292, "learning_rate": 9.169327706282784e-05, "loss": 0.0109, "step": 13610 }, { "grad_norm": 0.27707400918006897, "learning_rate": 9.167805970080029e-05, "loss": 0.0095, "step": 13620 }, { "grad_norm": 0.24676603078842163, "learning_rate": 9.166282967807864e-05, "loss": 0.0109, "step": 13630 }, { "grad_norm": 0.28476017713546753, "learning_rate": 9.16475869992894e-05, "loss": 0.0104, "step": 13640 }, { "grad_norm": 0.3044326901435852, "learning_rate": 9.163233166906284e-05, "loss": 0.0109, "step": 13650 }, { "grad_norm": 0.2360885739326477, "learning_rate": 9.161706369203317e-05, "loss": 0.0093, "step": 13660 }, { "grad_norm": 0.26497066020965576, "learning_rate": 9.16017830728384e-05, "loss": 0.0114, "step": 13670 }, { "grad_norm": 0.28854620456695557, "learning_rate": 9.158648981612035e-05, "loss": 0.0097, "step": 13680 }, { "grad_norm": 0.3069143295288086, "learning_rate": 9.157118392652472e-05, "loss": 0.011, "step": 13690 }, { "grad_norm": 0.29258447885513306, "learning_rate": 9.155586540870104e-05, "loss": 0.0106, "step": 13700 }, { "grad_norm": 0.3380049765110016, "learning_rate": 9.154053426730267e-05, "loss": 0.0117, "step": 13710 }, { "grad_norm": 0.2883357107639313, "learning_rate": 9.15251905069868e-05, "loss": 0.0126, "step": 13720 }, { "grad_norm": 0.26026731729507446, "learning_rate": 9.150983413241446e-05, "loss": 0.0101, "step": 13730 }, { "grad_norm": 0.34788039326667786, "learning_rate": 9.149446514825051e-05, "loss": 0.012, "step": 13740 }, { "grad_norm": 0.31428471207618713, "learning_rate": 9.147908355916365e-05, "loss": 0.0091, "step": 13750 }, { "grad_norm": 0.2580438256263733, "learning_rate": 9.146368936982642e-05, "loss": 0.009, "step": 13760 }, { "grad_norm": 0.28266477584838867, "learning_rate": 9.144828258491511e-05, "loss": 0.0115, "step": 13770 }, { "grad_norm": 0.281694233417511, "learning_rate": 9.143286320910996e-05, "loss": 0.012, "step": 13780 }, { "grad_norm": 0.23622789978981018, "learning_rate": 9.141743124709491e-05, "loss": 0.0098, "step": 13790 }, { "grad_norm": 0.30679628252983093, "learning_rate": 9.140198670355784e-05, "loss": 0.0106, "step": 13800 }, { "grad_norm": 0.2917974591255188, "learning_rate": 9.138652958319034e-05, "loss": 0.0093, "step": 13810 }, { "grad_norm": 0.2997232973575592, "learning_rate": 9.137105989068791e-05, "loss": 0.0109, "step": 13820 }, { "grad_norm": 0.3329172730445862, "learning_rate": 9.135557763074983e-05, "loss": 0.0089, "step": 13830 }, { "grad_norm": 0.22885291278362274, "learning_rate": 9.13400828080792e-05, "loss": 0.0091, "step": 13840 }, { "grad_norm": 0.2628379464149475, "learning_rate": 9.132457542738292e-05, "loss": 0.0108, "step": 13850 }, { "grad_norm": 0.34049901366233826, "learning_rate": 9.130905549337174e-05, "loss": 0.012, "step": 13860 }, { "grad_norm": 0.33799418807029724, "learning_rate": 9.129352301076021e-05, "loss": 0.0114, "step": 13870 }, { "grad_norm": 0.29019302129745483, "learning_rate": 9.127797798426668e-05, "loss": 0.011, "step": 13880 }, { "grad_norm": 0.22545388340950012, "learning_rate": 9.126242041861333e-05, "loss": 0.0089, "step": 13890 }, { "grad_norm": 0.2698425352573395, "learning_rate": 9.124685031852611e-05, "loss": 0.0109, "step": 13900 }, { "grad_norm": 0.2501087486743927, "learning_rate": 9.123126768873482e-05, "loss": 0.0087, "step": 13910 }, { "grad_norm": 0.2929692566394806, "learning_rate": 9.121567253397308e-05, "loss": 0.0088, "step": 13920 }, { "grad_norm": 0.305990606546402, "learning_rate": 9.120006485897824e-05, "loss": 0.0097, "step": 13930 }, { "grad_norm": 0.22306327521800995, "learning_rate": 9.118444466849152e-05, "loss": 0.0101, "step": 13940 }, { "grad_norm": 0.3023201823234558, "learning_rate": 9.116881196725793e-05, "loss": 0.0113, "step": 13950 }, { "grad_norm": 0.33773621916770935, "learning_rate": 9.115316676002627e-05, "loss": 0.01, "step": 13960 }, { "grad_norm": 0.2963753640651703, "learning_rate": 9.113750905154911e-05, "loss": 0.0107, "step": 13970 }, { "grad_norm": 0.2869108021259308, "learning_rate": 9.112183884658289e-05, "loss": 0.012, "step": 13980 }, { "grad_norm": 0.24208708107471466, "learning_rate": 9.11061561498878e-05, "loss": 0.0124, "step": 13990 }, { "grad_norm": 0.3181329667568207, "learning_rate": 9.109046096622779e-05, "loss": 0.0149, "step": 14000 }, { "grad_norm": 0.2435329407453537, "learning_rate": 9.107475330037069e-05, "loss": 0.0088, "step": 14010 }, { "grad_norm": 0.26075267791748047, "learning_rate": 9.105903315708806e-05, "loss": 0.0101, "step": 14020 }, { "grad_norm": 0.31523725390434265, "learning_rate": 9.104330054115524e-05, "loss": 0.0088, "step": 14030 }, { "grad_norm": 0.2785132825374603, "learning_rate": 9.102755545735141e-05, "loss": 0.0116, "step": 14040 }, { "grad_norm": 0.22342707216739655, "learning_rate": 9.10117979104595e-05, "loss": 0.0112, "step": 14050 }, { "grad_norm": 0.3659726083278656, "learning_rate": 9.099602790526624e-05, "loss": 0.0105, "step": 14060 }, { "grad_norm": 0.2550984025001526, "learning_rate": 9.098024544656212e-05, "loss": 0.0109, "step": 14070 }, { "grad_norm": 0.3669590651988983, "learning_rate": 9.096445053914148e-05, "loss": 0.0106, "step": 14080 }, { "grad_norm": 0.3143695890903473, "learning_rate": 9.094864318780236e-05, "loss": 0.0112, "step": 14090 }, { "grad_norm": 0.25870826840400696, "learning_rate": 9.093282339734663e-05, "loss": 0.0106, "step": 14100 }, { "grad_norm": 0.22017966210842133, "learning_rate": 9.091699117257992e-05, "loss": 0.0095, "step": 14110 }, { "grad_norm": 0.2652941048145294, "learning_rate": 9.090114651831163e-05, "loss": 0.0106, "step": 14120 }, { "grad_norm": 0.2580643594264984, "learning_rate": 9.088528943935497e-05, "loss": 0.0096, "step": 14130 }, { "grad_norm": 0.23172371089458466, "learning_rate": 9.086941994052689e-05, "loss": 0.009, "step": 14140 }, { "grad_norm": 0.2915630042552948, "learning_rate": 9.085353802664813e-05, "loss": 0.0098, "step": 14150 }, { "grad_norm": 0.2782306969165802, "learning_rate": 9.08376437025432e-05, "loss": 0.0111, "step": 14160 }, { "grad_norm": 0.3387666940689087, "learning_rate": 9.082173697304035e-05, "loss": 0.0099, "step": 14170 }, { "grad_norm": 0.3233988881111145, "learning_rate": 9.080581784297166e-05, "loss": 0.0131, "step": 14180 }, { "grad_norm": 0.26535287499427795, "learning_rate": 9.078988631717291e-05, "loss": 0.0103, "step": 14190 }, { "grad_norm": 0.32505011558532715, "learning_rate": 9.077394240048369e-05, "loss": 0.0134, "step": 14200 }, { "grad_norm": 0.3045431077480316, "learning_rate": 9.075798609774736e-05, "loss": 0.0082, "step": 14210 }, { "grad_norm": 0.3463459610939026, "learning_rate": 9.0742017413811e-05, "loss": 0.0109, "step": 14220 }, { "grad_norm": 0.31739935278892517, "learning_rate": 9.072603635352548e-05, "loss": 0.0103, "step": 14230 }, { "grad_norm": 0.3275075852870941, "learning_rate": 9.071004292174541e-05, "loss": 0.0123, "step": 14240 }, { "grad_norm": 0.29646533727645874, "learning_rate": 9.06940371233292e-05, "loss": 0.0119, "step": 14250 }, { "grad_norm": 0.2616863250732422, "learning_rate": 9.067801896313898e-05, "loss": 0.0102, "step": 14260 }, { "grad_norm": 0.3164909780025482, "learning_rate": 9.066198844604064e-05, "loss": 0.0112, "step": 14270 }, { "grad_norm": 0.2876752018928528, "learning_rate": 9.06459455769038e-05, "loss": 0.0116, "step": 14280 }, { "grad_norm": 0.3506101071834564, "learning_rate": 9.062989036060193e-05, "loss": 0.0109, "step": 14290 }, { "grad_norm": 0.32811760902404785, "learning_rate": 9.061382280201212e-05, "loss": 0.0134, "step": 14300 }, { "grad_norm": 0.30635514855384827, "learning_rate": 9.059774290601528e-05, "loss": 0.0103, "step": 14310 }, { "grad_norm": 0.2746696174144745, "learning_rate": 9.058165067749606e-05, "loss": 0.0102, "step": 14320 }, { "grad_norm": 0.30828356742858887, "learning_rate": 9.056554612134288e-05, "loss": 0.0113, "step": 14330 }, { "grad_norm": 0.2606695294380188, "learning_rate": 9.054942924244785e-05, "loss": 0.0091, "step": 14340 }, { "grad_norm": 0.24746853113174438, "learning_rate": 9.053330004570686e-05, "loss": 0.0107, "step": 14350 }, { "grad_norm": 0.28739649057388306, "learning_rate": 9.051715853601955e-05, "loss": 0.0104, "step": 14360 }, { "grad_norm": 0.2748439610004425, "learning_rate": 9.050100471828926e-05, "loss": 0.0084, "step": 14370 }, { "grad_norm": 0.22228635847568512, "learning_rate": 9.048483859742311e-05, "loss": 0.0087, "step": 14380 }, { "grad_norm": 0.2524581849575043, "learning_rate": 9.046866017833193e-05, "loss": 0.0122, "step": 14390 }, { "grad_norm": 0.2601880431175232, "learning_rate": 9.045246946593029e-05, "loss": 0.0112, "step": 14400 }, { "grad_norm": 0.2924646735191345, "learning_rate": 9.043626646513652e-05, "loss": 0.0115, "step": 14410 }, { "grad_norm": 0.25464484095573425, "learning_rate": 9.042005118087267e-05, "loss": 0.0107, "step": 14420 }, { "grad_norm": 0.2606675922870636, "learning_rate": 9.040382361806448e-05, "loss": 0.0095, "step": 14430 }, { "grad_norm": 0.30493468046188354, "learning_rate": 9.038758378164148e-05, "loss": 0.0096, "step": 14440 }, { "grad_norm": 0.3285244107246399, "learning_rate": 9.037133167653691e-05, "loss": 0.0099, "step": 14450 }, { "grad_norm": 0.26329296827316284, "learning_rate": 9.035506730768771e-05, "loss": 0.0106, "step": 14460 }, { "grad_norm": 0.335332989692688, "learning_rate": 9.033879068003458e-05, "loss": 0.0119, "step": 14470 }, { "grad_norm": 0.21017469465732574, "learning_rate": 9.032250179852193e-05, "loss": 0.0104, "step": 14480 }, { "grad_norm": 0.30278006196022034, "learning_rate": 9.030620066809787e-05, "loss": 0.0112, "step": 14490 }, { "grad_norm": 0.27095702290534973, "learning_rate": 9.028988729371428e-05, "loss": 0.0104, "step": 14500 }, { "grad_norm": 0.2386666089296341, "learning_rate": 9.027356168032673e-05, "loss": 0.011, "step": 14510 }, { "grad_norm": 0.22701570391654968, "learning_rate": 9.02572238328945e-05, "loss": 0.0097, "step": 14520 }, { "grad_norm": 0.24391967058181763, "learning_rate": 9.02408737563806e-05, "loss": 0.0093, "step": 14530 }, { "grad_norm": 0.30137214064598083, "learning_rate": 9.022451145575174e-05, "loss": 0.0117, "step": 14540 }, { "grad_norm": 0.3091307282447815, "learning_rate": 9.02081369359784e-05, "loss": 0.0101, "step": 14550 }, { "grad_norm": 0.2756590247154236, "learning_rate": 9.019175020203465e-05, "loss": 0.01, "step": 14560 }, { "grad_norm": 0.2108382284641266, "learning_rate": 9.017535125889842e-05, "loss": 0.0109, "step": 14570 }, { "grad_norm": 0.2355968952178955, "learning_rate": 9.015894011155124e-05, "loss": 0.0103, "step": 14580 }, { "grad_norm": 0.25008463859558105, "learning_rate": 9.014251676497838e-05, "loss": 0.0096, "step": 14590 }, { "grad_norm": 0.2834985852241516, "learning_rate": 9.012608122416884e-05, "loss": 0.0094, "step": 14600 }, { "grad_norm": 0.301725834608078, "learning_rate": 9.010963349411529e-05, "loss": 0.0101, "step": 14610 }, { "grad_norm": 0.25175178050994873, "learning_rate": 9.00931735798141e-05, "loss": 0.0103, "step": 14620 }, { "grad_norm": 0.24005499482154846, "learning_rate": 9.00767014862654e-05, "loss": 0.0106, "step": 14630 }, { "grad_norm": 0.29720914363861084, "learning_rate": 9.006021721847295e-05, "loss": 0.0094, "step": 14640 }, { "grad_norm": 0.34712278842926025, "learning_rate": 9.004372078144423e-05, "loss": 0.0091, "step": 14650 }, { "grad_norm": 0.22825026512145996, "learning_rate": 9.002721218019043e-05, "loss": 0.0132, "step": 14660 }, { "grad_norm": 0.3266952633857727, "learning_rate": 9.001069141972642e-05, "loss": 0.0098, "step": 14670 }, { "grad_norm": 0.26162558794021606, "learning_rate": 8.99941585050708e-05, "loss": 0.0104, "step": 14680 }, { "grad_norm": 0.26830780506134033, "learning_rate": 8.997761344124578e-05, "loss": 0.0106, "step": 14690 }, { "grad_norm": 0.3264312148094177, "learning_rate": 8.996105623327737e-05, "loss": 0.0107, "step": 14700 }, { "grad_norm": 0.2410210818052292, "learning_rate": 8.994448688619517e-05, "loss": 0.0082, "step": 14710 }, { "grad_norm": 0.284929484128952, "learning_rate": 8.992790540503253e-05, "loss": 0.01, "step": 14720 }, { "grad_norm": 0.2587823271751404, "learning_rate": 8.991131179482648e-05, "loss": 0.0094, "step": 14730 }, { "grad_norm": 0.28239545226097107, "learning_rate": 8.989470606061768e-05, "loss": 0.0116, "step": 14740 }, { "grad_norm": 0.30279162526130676, "learning_rate": 8.987808820745056e-05, "loss": 0.0103, "step": 14750 }, { "grad_norm": 0.29264846444129944, "learning_rate": 8.986145824037315e-05, "loss": 0.0088, "step": 14760 }, { "grad_norm": 0.19506973028182983, "learning_rate": 8.984481616443721e-05, "loss": 0.01, "step": 14770 }, { "grad_norm": 0.2171848863363266, "learning_rate": 8.982816198469815e-05, "loss": 0.01, "step": 14780 }, { "grad_norm": 0.3335197865962982, "learning_rate": 8.98114957062151e-05, "loss": 0.0101, "step": 14790 }, { "grad_norm": 0.2951051890850067, "learning_rate": 8.97948173340508e-05, "loss": 0.0103, "step": 14800 }, { "grad_norm": 0.30811333656311035, "learning_rate": 8.977812687327172e-05, "loss": 0.01, "step": 14810 }, { "grad_norm": 0.2803950309753418, "learning_rate": 8.976142432894798e-05, "loss": 0.0109, "step": 14820 }, { "grad_norm": 0.3032907545566559, "learning_rate": 8.974470970615336e-05, "loss": 0.0113, "step": 14830 }, { "grad_norm": 0.2634539306163788, "learning_rate": 8.972798300996534e-05, "loss": 0.0107, "step": 14840 }, { "grad_norm": 0.2380373328924179, "learning_rate": 8.971124424546504e-05, "loss": 0.0095, "step": 14850 }, { "grad_norm": 0.22044654190540314, "learning_rate": 8.969449341773724e-05, "loss": 0.0084, "step": 14860 }, { "grad_norm": 0.171296626329422, "learning_rate": 8.967773053187042e-05, "loss": 0.0082, "step": 14870 }, { "grad_norm": 0.30781790614128113, "learning_rate": 8.966095559295668e-05, "loss": 0.0105, "step": 14880 }, { "grad_norm": 0.2670572102069855, "learning_rate": 8.964416860609184e-05, "loss": 0.0111, "step": 14890 }, { "grad_norm": 0.23065350949764252, "learning_rate": 8.962736957637532e-05, "loss": 0.0095, "step": 14900 }, { "grad_norm": 0.2330165058374405, "learning_rate": 8.96105585089102e-05, "loss": 0.0085, "step": 14910 }, { "grad_norm": 0.2678815424442291, "learning_rate": 8.959373540880329e-05, "loss": 0.0122, "step": 14920 }, { "grad_norm": 0.30811116099357605, "learning_rate": 8.957690028116495e-05, "loss": 0.0123, "step": 14930 }, { "grad_norm": 0.2776390612125397, "learning_rate": 8.956005313110928e-05, "loss": 0.0113, "step": 14940 }, { "grad_norm": 0.2533762454986572, "learning_rate": 8.9543193963754e-05, "loss": 0.0091, "step": 14950 }, { "grad_norm": 0.24179822206497192, "learning_rate": 8.952632278422048e-05, "loss": 0.0083, "step": 14960 }, { "grad_norm": 0.30182337760925293, "learning_rate": 8.95094395976337e-05, "loss": 0.0148, "step": 14970 }, { "grad_norm": 0.21201108396053314, "learning_rate": 8.949254440912239e-05, "loss": 0.0092, "step": 14980 }, { "grad_norm": 0.2630045711994171, "learning_rate": 8.94756372238188e-05, "loss": 0.0085, "step": 14990 }, { "grad_norm": 0.2681374251842499, "learning_rate": 8.945871804685892e-05, "loss": 0.0106, "step": 15000 }, { "grad_norm": 0.2599766254425049, "learning_rate": 8.944178688338236e-05, "loss": 0.0094, "step": 15010 }, { "grad_norm": 0.23193500936031342, "learning_rate": 8.942484373853233e-05, "loss": 0.0101, "step": 15020 }, { "grad_norm": 0.2852785289287567, "learning_rate": 8.940788861745572e-05, "loss": 0.0086, "step": 15030 }, { "grad_norm": 0.2425108402967453, "learning_rate": 8.939092152530308e-05, "loss": 0.0079, "step": 15040 }, { "grad_norm": 0.2836150825023651, "learning_rate": 8.937394246722853e-05, "loss": 0.0094, "step": 15050 }, { "grad_norm": 0.2823534309864044, "learning_rate": 8.935695144838984e-05, "loss": 0.0081, "step": 15060 }, { "grad_norm": 0.292885422706604, "learning_rate": 8.933994847394849e-05, "loss": 0.0083, "step": 15070 }, { "grad_norm": 0.3051721751689911, "learning_rate": 8.932293354906949e-05, "loss": 0.0103, "step": 15080 }, { "grad_norm": 0.2722909450531006, "learning_rate": 8.930590667892153e-05, "loss": 0.0089, "step": 15090 }, { "grad_norm": 0.29498255252838135, "learning_rate": 8.928886786867696e-05, "loss": 0.0096, "step": 15100 }, { "grad_norm": 0.27731165289878845, "learning_rate": 8.927181712351168e-05, "loss": 0.0111, "step": 15110 }, { "grad_norm": 0.2584938406944275, "learning_rate": 8.925475444860527e-05, "loss": 0.0096, "step": 15120 }, { "grad_norm": 0.2471330314874649, "learning_rate": 8.923767984914092e-05, "loss": 0.0092, "step": 15130 }, { "grad_norm": 0.292123019695282, "learning_rate": 8.922059333030545e-05, "loss": 0.0126, "step": 15140 }, { "grad_norm": 0.3022165894508362, "learning_rate": 8.920349489728928e-05, "loss": 0.0112, "step": 15150 }, { "grad_norm": 0.27089715003967285, "learning_rate": 8.918638455528646e-05, "loss": 0.011, "step": 15160 }, { "grad_norm": 0.2372835874557495, "learning_rate": 8.916926230949468e-05, "loss": 0.0105, "step": 15170 }, { "grad_norm": 0.32664525508880615, "learning_rate": 8.915212816511522e-05, "loss": 0.0104, "step": 15180 }, { "grad_norm": 0.3257906436920166, "learning_rate": 8.913498212735296e-05, "loss": 0.0119, "step": 15190 }, { "grad_norm": 0.29897263646125793, "learning_rate": 8.911782420141643e-05, "loss": 0.0107, "step": 15200 }, { "grad_norm": 0.21231438219547272, "learning_rate": 8.910065439251775e-05, "loss": 0.0106, "step": 15210 }, { "grad_norm": 0.28170809149742126, "learning_rate": 8.908347270587268e-05, "loss": 0.0097, "step": 15220 }, { "grad_norm": 0.26248350739479065, "learning_rate": 8.906627914670054e-05, "loss": 0.0095, "step": 15230 }, { "grad_norm": 0.30315592885017395, "learning_rate": 8.904907372022427e-05, "loss": 0.0104, "step": 15240 }, { "grad_norm": 0.2650705575942993, "learning_rate": 8.903185643167042e-05, "loss": 0.0098, "step": 15250 }, { "grad_norm": 0.2631804347038269, "learning_rate": 8.901462728626919e-05, "loss": 0.011, "step": 15260 }, { "grad_norm": 0.24776969850063324, "learning_rate": 8.899738628925429e-05, "loss": 0.0096, "step": 15270 }, { "grad_norm": 0.29048454761505127, "learning_rate": 8.898013344586312e-05, "loss": 0.0106, "step": 15280 }, { "grad_norm": 0.2221129685640335, "learning_rate": 8.896286876133661e-05, "loss": 0.009, "step": 15290 }, { "grad_norm": 0.2735884487628937, "learning_rate": 8.894559224091933e-05, "loss": 0.009, "step": 15300 }, { "grad_norm": 0.23851007223129272, "learning_rate": 8.892830388985942e-05, "loss": 0.009, "step": 15310 }, { "grad_norm": 0.28519192337989807, "learning_rate": 8.891100371340864e-05, "loss": 0.0104, "step": 15320 }, { "grad_norm": 0.21761135756969452, "learning_rate": 8.889369171682231e-05, "loss": 0.0085, "step": 15330 }, { "grad_norm": 0.2853968143463135, "learning_rate": 8.887636790535936e-05, "loss": 0.0105, "step": 15340 }, { "grad_norm": 0.2600754499435425, "learning_rate": 8.885903228428231e-05, "loss": 0.0099, "step": 15350 }, { "grad_norm": 0.22754167020320892, "learning_rate": 8.884168485885727e-05, "loss": 0.0099, "step": 15360 }, { "grad_norm": 0.2530911862850189, "learning_rate": 8.882432563435393e-05, "loss": 0.0121, "step": 15370 }, { "grad_norm": 0.3016035258769989, "learning_rate": 8.880695461604556e-05, "loss": 0.0123, "step": 15380 }, { "grad_norm": 0.30656224489212036, "learning_rate": 8.878957180920901e-05, "loss": 0.0108, "step": 15390 }, { "grad_norm": 0.33010727167129517, "learning_rate": 8.877217721912473e-05, "loss": 0.012, "step": 15400 }, { "grad_norm": 0.2794025242328644, "learning_rate": 8.875477085107673e-05, "loss": 0.011, "step": 15410 }, { "grad_norm": 0.32219621539115906, "learning_rate": 8.87373527103526e-05, "loss": 0.0107, "step": 15420 }, { "grad_norm": 0.27440646290779114, "learning_rate": 8.871992280224353e-05, "loss": 0.0103, "step": 15430 }, { "grad_norm": 0.27229413390159607, "learning_rate": 8.870248113204422e-05, "loss": 0.0115, "step": 15440 }, { "grad_norm": 0.22909151017665863, "learning_rate": 8.868502770505306e-05, "loss": 0.0092, "step": 15450 }, { "grad_norm": 0.24661816656589508, "learning_rate": 8.86675625265719e-05, "loss": 0.0112, "step": 15460 }, { "grad_norm": 0.2945097088813782, "learning_rate": 8.865008560190618e-05, "loss": 0.0113, "step": 15470 }, { "grad_norm": 0.2632730007171631, "learning_rate": 8.863259693636496e-05, "loss": 0.012, "step": 15480 }, { "grad_norm": 0.24616363644599915, "learning_rate": 8.861509653526083e-05, "loss": 0.0157, "step": 15490 }, { "grad_norm": 0.32043564319610596, "learning_rate": 8.859758440390993e-05, "loss": 0.0135, "step": 15500 }, { "grad_norm": 0.28351980447769165, "learning_rate": 8.858006054763202e-05, "loss": 0.012, "step": 15510 }, { "grad_norm": 0.27427908778190613, "learning_rate": 8.856252497175035e-05, "loss": 0.011, "step": 15520 }, { "grad_norm": 0.22871659696102142, "learning_rate": 8.854497768159178e-05, "loss": 0.0104, "step": 15530 }, { "grad_norm": 0.2508438229560852, "learning_rate": 8.852741868248671e-05, "loss": 0.0096, "step": 15540 }, { "grad_norm": 0.27534446120262146, "learning_rate": 8.85098479797691e-05, "loss": 0.0114, "step": 15550 }, { "grad_norm": 0.2810802757740021, "learning_rate": 8.849226557877646e-05, "loss": 0.0092, "step": 15560 }, { "grad_norm": 0.27636170387268066, "learning_rate": 8.84746714848499e-05, "loss": 0.0095, "step": 15570 }, { "grad_norm": 0.2919359505176544, "learning_rate": 8.845706570333397e-05, "loss": 0.0085, "step": 15580 }, { "grad_norm": 0.27650538086891174, "learning_rate": 8.84394482395769e-05, "loss": 0.0112, "step": 15590 }, { "grad_norm": 0.21103999018669128, "learning_rate": 8.842181909893038e-05, "loss": 0.0111, "step": 15600 }, { "grad_norm": 0.27054905891418457, "learning_rate": 8.840417828674969e-05, "loss": 0.0105, "step": 15610 }, { "grad_norm": 0.24890156090259552, "learning_rate": 8.838652580839364e-05, "loss": 0.0087, "step": 15620 }, { "grad_norm": 0.3368520736694336, "learning_rate": 8.836886166922458e-05, "loss": 0.0094, "step": 15630 }, { "grad_norm": 0.27556174993515015, "learning_rate": 8.835118587460844e-05, "loss": 0.0106, "step": 15640 }, { "grad_norm": 0.2553868889808655, "learning_rate": 8.83334984299146e-05, "loss": 0.0101, "step": 15650 }, { "grad_norm": 0.2568078637123108, "learning_rate": 8.83157993405161e-05, "loss": 0.0111, "step": 15660 }, { "grad_norm": 0.28967228531837463, "learning_rate": 8.829808861178943e-05, "loss": 0.0091, "step": 15670 }, { "grad_norm": 0.32905226945877075, "learning_rate": 8.828036624911464e-05, "loss": 0.0104, "step": 15680 }, { "grad_norm": 0.30181950330734253, "learning_rate": 8.826263225787532e-05, "loss": 0.0097, "step": 15690 }, { "grad_norm": 0.28712335228919983, "learning_rate": 8.824488664345858e-05, "loss": 0.0085, "step": 15700 }, { "grad_norm": 0.23175868391990662, "learning_rate": 8.822712941125508e-05, "loss": 0.0096, "step": 15710 }, { "grad_norm": 0.21660515666007996, "learning_rate": 8.820936056665898e-05, "loss": 0.0091, "step": 15720 }, { "grad_norm": 0.3418399691581726, "learning_rate": 8.819158011506801e-05, "loss": 0.0107, "step": 15730 }, { "grad_norm": 0.32794296741485596, "learning_rate": 8.81737880618834e-05, "loss": 0.0107, "step": 15740 }, { "grad_norm": 0.2936713397502899, "learning_rate": 8.815598441250987e-05, "loss": 0.0093, "step": 15750 }, { "grad_norm": 0.2799868583679199, "learning_rate": 8.813816917235576e-05, "loss": 0.0113, "step": 15760 }, { "grad_norm": 0.3164301812648773, "learning_rate": 8.812034234683282e-05, "loss": 0.0115, "step": 15770 }, { "grad_norm": 0.3140501081943512, "learning_rate": 8.810250394135637e-05, "loss": 0.0098, "step": 15780 }, { "grad_norm": 0.2496100664138794, "learning_rate": 8.808465396134529e-05, "loss": 0.0097, "step": 15790 }, { "grad_norm": 0.2808854877948761, "learning_rate": 8.806679241222189e-05, "loss": 0.0101, "step": 15800 }, { "grad_norm": 0.2741963565349579, "learning_rate": 8.804891929941203e-05, "loss": 0.0086, "step": 15810 }, { "grad_norm": 0.2546410858631134, "learning_rate": 8.803103462834514e-05, "loss": 0.0137, "step": 15820 }, { "grad_norm": 0.2717438042163849, "learning_rate": 8.801313840445408e-05, "loss": 0.0085, "step": 15830 }, { "grad_norm": 0.3066229820251465, "learning_rate": 8.799523063317524e-05, "loss": 0.012, "step": 15840 }, { "grad_norm": 0.26512670516967773, "learning_rate": 8.797731131994854e-05, "loss": 0.01, "step": 15850 }, { "grad_norm": 0.35922300815582275, "learning_rate": 8.795938047021739e-05, "loss": 0.0105, "step": 15860 }, { "grad_norm": 0.2575354278087616, "learning_rate": 8.794143808942872e-05, "loss": 0.009, "step": 15870 }, { "grad_norm": 0.2672472596168518, "learning_rate": 8.792348418303296e-05, "loss": 0.0105, "step": 15880 }, { "grad_norm": 0.2731088697910309, "learning_rate": 8.790551875648398e-05, "loss": 0.0107, "step": 15890 }, { "grad_norm": 0.3232043385505676, "learning_rate": 8.788754181523926e-05, "loss": 0.0123, "step": 15900 }, { "grad_norm": 0.23219452798366547, "learning_rate": 8.78695533647597e-05, "loss": 0.0104, "step": 15910 }, { "grad_norm": 0.24780035018920898, "learning_rate": 8.785155341050972e-05, "loss": 0.0136, "step": 15920 }, { "grad_norm": 0.29535433650016785, "learning_rate": 8.783354195795721e-05, "loss": 0.0099, "step": 15930 }, { "grad_norm": 0.28305551409721375, "learning_rate": 8.78155190125736e-05, "loss": 0.0112, "step": 15940 }, { "grad_norm": 0.2864108383655548, "learning_rate": 8.779748457983378e-05, "loss": 0.0126, "step": 15950 }, { "grad_norm": 0.25819188356399536, "learning_rate": 8.777943866521612e-05, "loss": 0.0112, "step": 15960 }, { "grad_norm": 0.25041109323501587, "learning_rate": 8.77613812742025e-05, "loss": 0.0106, "step": 15970 }, { "grad_norm": 0.2609696090221405, "learning_rate": 8.774331241227829e-05, "loss": 0.0106, "step": 15980 }, { "grad_norm": 0.2358810156583786, "learning_rate": 8.772523208493232e-05, "loss": 0.0094, "step": 15990 }, { "grad_norm": 0.25929760932922363, "learning_rate": 8.770714029765692e-05, "loss": 0.0125, "step": 16000 }, { "grad_norm": 0.3153972029685974, "learning_rate": 8.768903705594789e-05, "loss": 0.0108, "step": 16010 }, { "grad_norm": 0.2483811378479004, "learning_rate": 8.767092236530453e-05, "loss": 0.0122, "step": 16020 }, { "grad_norm": 0.17887499928474426, "learning_rate": 8.76527962312296e-05, "loss": 0.0099, "step": 16030 }, { "grad_norm": 0.22678126394748688, "learning_rate": 8.763465865922934e-05, "loss": 0.0093, "step": 16040 }, { "grad_norm": 0.2637060284614563, "learning_rate": 8.761650965481347e-05, "loss": 0.0097, "step": 16050 }, { "grad_norm": 0.27175331115722656, "learning_rate": 8.759834922349516e-05, "loss": 0.0102, "step": 16060 }, { "grad_norm": 0.28748819231987, "learning_rate": 8.758017737079108e-05, "loss": 0.0083, "step": 16070 }, { "grad_norm": 0.3020229935646057, "learning_rate": 8.756199410222137e-05, "loss": 0.0105, "step": 16080 }, { "grad_norm": 0.2474466860294342, "learning_rate": 8.754379942330963e-05, "loss": 0.0091, "step": 16090 }, { "grad_norm": 0.3583427965641022, "learning_rate": 8.75255933395829e-05, "loss": 0.0094, "step": 16100 }, { "grad_norm": 0.3202165365219116, "learning_rate": 8.750737585657171e-05, "loss": 0.01, "step": 16110 }, { "grad_norm": 0.28818318247795105, "learning_rate": 8.748914697981008e-05, "loss": 0.0089, "step": 16120 }, { "grad_norm": 0.306518018245697, "learning_rate": 8.747090671483542e-05, "loss": 0.0102, "step": 16130 }, { "grad_norm": 0.2634550929069519, "learning_rate": 8.745265506718869e-05, "loss": 0.0101, "step": 16140 }, { "grad_norm": 0.2795845568180084, "learning_rate": 8.74343920424142e-05, "loss": 0.0101, "step": 16150 }, { "grad_norm": 0.25344061851501465, "learning_rate": 8.741611764605982e-05, "loss": 0.0107, "step": 16160 }, { "grad_norm": 0.2501373291015625, "learning_rate": 8.739783188367682e-05, "loss": 0.0088, "step": 16170 }, { "grad_norm": 0.2246922254562378, "learning_rate": 8.737953476081991e-05, "loss": 0.0099, "step": 16180 }, { "grad_norm": 0.30105143785476685, "learning_rate": 8.73612262830473e-05, "loss": 0.0096, "step": 16190 }, { "grad_norm": 0.22231817245483398, "learning_rate": 8.734290645592061e-05, "loss": 0.0089, "step": 16200 }, { "grad_norm": 0.3406302034854889, "learning_rate": 8.732457528500493e-05, "loss": 0.0096, "step": 16210 }, { "grad_norm": 0.2609446048736572, "learning_rate": 8.730623277586875e-05, "loss": 0.0104, "step": 16220 }, { "grad_norm": 0.33388611674308777, "learning_rate": 8.72878789340841e-05, "loss": 0.0094, "step": 16230 }, { "grad_norm": 0.25460466742515564, "learning_rate": 8.726951376522635e-05, "loss": 0.0096, "step": 16240 }, { "grad_norm": 0.21380110085010529, "learning_rate": 8.725113727487435e-05, "loss": 0.0079, "step": 16250 }, { "grad_norm": 0.3228558599948883, "learning_rate": 8.723274946861042e-05, "loss": 0.0125, "step": 16260 }, { "grad_norm": 0.27122434973716736, "learning_rate": 8.721435035202026e-05, "loss": 0.0091, "step": 16270 }, { "grad_norm": 0.4003918468952179, "learning_rate": 8.719593993069306e-05, "loss": 0.0121, "step": 16280 }, { "grad_norm": 0.22706764936447144, "learning_rate": 8.717751821022139e-05, "loss": 0.0079, "step": 16290 }, { "grad_norm": 0.2985023856163025, "learning_rate": 8.715908519620134e-05, "loss": 0.009, "step": 16300 }, { "grad_norm": 0.285762757062912, "learning_rate": 8.71406408942323e-05, "loss": 0.011, "step": 16310 }, { "grad_norm": 0.3188495934009552, "learning_rate": 8.712218530991723e-05, "loss": 0.0107, "step": 16320 }, { "grad_norm": 0.20482321083545685, "learning_rate": 8.710371844886241e-05, "loss": 0.0083, "step": 16330 }, { "grad_norm": 0.23664060235023499, "learning_rate": 8.708524031667758e-05, "loss": 0.0085, "step": 16340 }, { "grad_norm": 0.40683987736701965, "learning_rate": 8.706675091897592e-05, "loss": 0.0106, "step": 16350 }, { "grad_norm": 0.22715559601783752, "learning_rate": 8.704825026137404e-05, "loss": 0.0114, "step": 16360 }, { "grad_norm": 0.21525990962982178, "learning_rate": 8.702973834949192e-05, "loss": 0.008, "step": 16370 }, { "grad_norm": 0.25327223539352417, "learning_rate": 8.701121518895301e-05, "loss": 0.0075, "step": 16380 }, { "grad_norm": 0.30771520733833313, "learning_rate": 8.699268078538414e-05, "loss": 0.0083, "step": 16390 }, { "grad_norm": 0.28778135776519775, "learning_rate": 8.69741351444156e-05, "loss": 0.0109, "step": 16400 }, { "grad_norm": 0.3142705261707306, "learning_rate": 8.695557827168101e-05, "loss": 0.0101, "step": 16410 }, { "grad_norm": 0.26974770426750183, "learning_rate": 8.693701017281753e-05, "loss": 0.0104, "step": 16420 }, { "grad_norm": 0.2427842766046524, "learning_rate": 8.691843085346563e-05, "loss": 0.0084, "step": 16430 }, { "grad_norm": 0.23732158541679382, "learning_rate": 8.689984031926919e-05, "loss": 0.0081, "step": 16440 }, { "grad_norm": 0.27599337697029114, "learning_rate": 8.688123857587555e-05, "loss": 0.0093, "step": 16450 }, { "grad_norm": 0.26685118675231934, "learning_rate": 8.686262562893544e-05, "loss": 0.0098, "step": 16460 }, { "grad_norm": 0.2499106228351593, "learning_rate": 8.684400148410294e-05, "loss": 0.0078, "step": 16470 }, { "grad_norm": 0.21405087411403656, "learning_rate": 8.682536614703562e-05, "loss": 0.0094, "step": 16480 }, { "grad_norm": 0.24212081730365753, "learning_rate": 8.680671962339437e-05, "loss": 0.0086, "step": 16490 }, { "grad_norm": 0.26402127742767334, "learning_rate": 8.678806191884352e-05, "loss": 0.0091, "step": 16500 }, { "grad_norm": 0.23224511742591858, "learning_rate": 8.67693930390508e-05, "loss": 0.0064, "step": 16510 }, { "grad_norm": 0.25720784068107605, "learning_rate": 8.67507129896873e-05, "loss": 0.0095, "step": 16520 }, { "grad_norm": 0.2127181887626648, "learning_rate": 8.673202177642757e-05, "loss": 0.0094, "step": 16530 }, { "grad_norm": 0.21197263896465302, "learning_rate": 8.671331940494945e-05, "loss": 0.0081, "step": 16540 }, { "grad_norm": 0.2022964358329773, "learning_rate": 8.669460588093427e-05, "loss": 0.0065, "step": 16550 }, { "grad_norm": 0.24868224561214447, "learning_rate": 8.667588121006667e-05, "loss": 0.0097, "step": 16560 }, { "grad_norm": 0.27824923396110535, "learning_rate": 8.665714539803475e-05, "loss": 0.0103, "step": 16570 }, { "grad_norm": 0.2965802550315857, "learning_rate": 8.663839845052993e-05, "loss": 0.0084, "step": 16580 }, { "grad_norm": 0.21202483773231506, "learning_rate": 8.661964037324703e-05, "loss": 0.009, "step": 16590 }, { "grad_norm": 0.20447774231433868, "learning_rate": 8.660087117188427e-05, "loss": 0.0085, "step": 16600 }, { "grad_norm": 0.2292976975440979, "learning_rate": 8.658209085214325e-05, "loss": 0.0074, "step": 16610 }, { "grad_norm": 0.3335595428943634, "learning_rate": 8.656329941972891e-05, "loss": 0.0083, "step": 16620 }, { "grad_norm": 0.2836003303527832, "learning_rate": 8.654449688034963e-05, "loss": 0.0102, "step": 16630 }, { "grad_norm": 0.3065835237503052, "learning_rate": 8.652568323971706e-05, "loss": 0.0092, "step": 16640 }, { "grad_norm": 0.24776628613471985, "learning_rate": 8.650685850354636e-05, "loss": 0.0153, "step": 16650 }, { "grad_norm": 0.3175615072250366, "learning_rate": 8.648802267755593e-05, "loss": 0.0096, "step": 16660 }, { "grad_norm": 0.23557539284229279, "learning_rate": 8.646917576746764e-05, "loss": 0.0143, "step": 16670 }, { "grad_norm": 0.272316038608551, "learning_rate": 8.645031777900666e-05, "loss": 0.01, "step": 16680 }, { "grad_norm": 0.34998440742492676, "learning_rate": 8.643144871790154e-05, "loss": 0.0106, "step": 16690 }, { "grad_norm": 0.3412337899208069, "learning_rate": 8.641256858988424e-05, "loss": 0.0109, "step": 16700 }, { "grad_norm": 0.1802694946527481, "learning_rate": 8.639367740069e-05, "loss": 0.0111, "step": 16710 }, { "grad_norm": 0.2797043025493622, "learning_rate": 8.63747751560575e-05, "loss": 0.0089, "step": 16720 }, { "grad_norm": 0.32260340452194214, "learning_rate": 8.635586186172871e-05, "loss": 0.0099, "step": 16730 }, { "grad_norm": 0.278450071811676, "learning_rate": 8.633693752344902e-05, "loss": 0.0119, "step": 16740 }, { "grad_norm": 0.25058600306510925, "learning_rate": 8.631800214696713e-05, "loss": 0.0115, "step": 16750 }, { "grad_norm": 0.2945828139781952, "learning_rate": 8.629905573803511e-05, "loss": 0.0107, "step": 16760 }, { "grad_norm": 0.28727853298187256, "learning_rate": 8.628009830240839e-05, "loss": 0.0113, "step": 16770 }, { "grad_norm": 0.2422596514225006, "learning_rate": 8.626112984584571e-05, "loss": 0.0112, "step": 16780 }, { "grad_norm": 0.286075234413147, "learning_rate": 8.62421503741092e-05, "loss": 0.009, "step": 16790 }, { "grad_norm": 0.2758397161960602, "learning_rate": 8.622315989296432e-05, "loss": 0.0102, "step": 16800 }, { "grad_norm": 0.23855245113372803, "learning_rate": 8.62041584081799e-05, "loss": 0.0066, "step": 16810 }, { "grad_norm": 0.21355026960372925, "learning_rate": 8.618514592552807e-05, "loss": 0.0085, "step": 16820 }, { "grad_norm": 0.29938891530036926, "learning_rate": 8.616612245078431e-05, "loss": 0.0095, "step": 16830 }, { "grad_norm": 0.2483750730752945, "learning_rate": 8.614708798972746e-05, "loss": 0.0086, "step": 16840 }, { "grad_norm": 0.25764375925064087, "learning_rate": 8.61280425481397e-05, "loss": 0.0103, "step": 16850 }, { "grad_norm": 0.20467785000801086, "learning_rate": 8.61089861318065e-05, "loss": 0.0078, "step": 16860 }, { "grad_norm": 0.18834789097309113, "learning_rate": 8.608991874651673e-05, "loss": 0.0085, "step": 16870 }, { "grad_norm": 0.20003071427345276, "learning_rate": 8.607084039806255e-05, "loss": 0.0084, "step": 16880 }, { "grad_norm": 0.19845867156982422, "learning_rate": 8.605175109223944e-05, "loss": 0.0082, "step": 16890 }, { "grad_norm": 0.27606236934661865, "learning_rate": 8.603265083484624e-05, "loss": 0.0097, "step": 16900 }, { "grad_norm": 0.28678980469703674, "learning_rate": 8.60135396316851e-05, "loss": 0.0078, "step": 16910 }, { "grad_norm": 0.28309038281440735, "learning_rate": 8.599441748856152e-05, "loss": 0.0125, "step": 16920 }, { "grad_norm": 0.3022243082523346, "learning_rate": 8.597528441128427e-05, "loss": 0.0089, "step": 16930 }, { "grad_norm": 0.23140695691108704, "learning_rate": 8.595614040566549e-05, "loss": 0.009, "step": 16940 }, { "grad_norm": 0.2016071230173111, "learning_rate": 8.593698547752063e-05, "loss": 0.0106, "step": 16950 }, { "grad_norm": 0.2225610762834549, "learning_rate": 8.591781963266843e-05, "loss": 0.0121, "step": 16960 }, { "grad_norm": 0.22754250466823578, "learning_rate": 8.5898642876931e-05, "loss": 0.0113, "step": 16970 }, { "grad_norm": 0.22588352859020233, "learning_rate": 8.587945521613369e-05, "loss": 0.0087, "step": 16980 }, { "grad_norm": 0.2298949807882309, "learning_rate": 8.586025665610524e-05, "loss": 0.0078, "step": 16990 }, { "grad_norm": 0.2997662425041199, "learning_rate": 8.584104720267765e-05, "loss": 0.0099, "step": 17000 }, { "grad_norm": 0.2864239513874054, "learning_rate": 8.582182686168625e-05, "loss": 0.0127, "step": 17010 }, { "grad_norm": 0.29647591710090637, "learning_rate": 8.580259563896967e-05, "loss": 0.0111, "step": 17020 }, { "grad_norm": 0.21788856387138367, "learning_rate": 8.578335354036983e-05, "loss": 0.0087, "step": 17030 }, { "grad_norm": 0.23925253748893738, "learning_rate": 8.576410057173201e-05, "loss": 0.0097, "step": 17040 }, { "grad_norm": 0.4090352952480316, "learning_rate": 8.574483673890474e-05, "loss": 0.0105, "step": 17050 }, { "grad_norm": 0.2814415693283081, "learning_rate": 8.572556204773983e-05, "loss": 0.0105, "step": 17060 }, { "grad_norm": 0.24559694528579712, "learning_rate": 8.570627650409246e-05, "loss": 0.0093, "step": 17070 }, { "grad_norm": 0.3441891372203827, "learning_rate": 8.568698011382107e-05, "loss": 0.0075, "step": 17080 }, { "grad_norm": 0.32313841581344604, "learning_rate": 8.566767288278738e-05, "loss": 0.011, "step": 17090 }, { "grad_norm": 0.2577977180480957, "learning_rate": 8.56483548168564e-05, "loss": 0.0084, "step": 17100 }, { "grad_norm": 0.2673834264278412, "learning_rate": 8.562902592189648e-05, "loss": 0.0084, "step": 17110 }, { "grad_norm": 0.2455013245344162, "learning_rate": 8.560968620377921e-05, "loss": 0.0097, "step": 17120 }, { "grad_norm": 0.17626313865184784, "learning_rate": 8.559033566837951e-05, "loss": 0.0091, "step": 17130 }, { "grad_norm": 0.306132048368454, "learning_rate": 8.557097432157551e-05, "loss": 0.0102, "step": 17140 }, { "grad_norm": 0.2101263701915741, "learning_rate": 8.555160216924872e-05, "loss": 0.0094, "step": 17150 }, { "grad_norm": 0.27102577686309814, "learning_rate": 8.55322192172839e-05, "loss": 0.0102, "step": 17160 }, { "grad_norm": 0.22342561185359955, "learning_rate": 8.551282547156902e-05, "loss": 0.0081, "step": 17170 }, { "grad_norm": 0.19892767071723938, "learning_rate": 8.549342093799544e-05, "loss": 0.0086, "step": 17180 }, { "grad_norm": 0.24145124852657318, "learning_rate": 8.547400562245773e-05, "loss": 0.0115, "step": 17190 }, { "grad_norm": 0.27737998962402344, "learning_rate": 8.545457953085374e-05, "loss": 0.0078, "step": 17200 }, { "grad_norm": 0.22603948414325714, "learning_rate": 8.543514266908463e-05, "loss": 0.0071, "step": 17210 }, { "grad_norm": 0.2690403461456299, "learning_rate": 8.541569504305478e-05, "loss": 0.0087, "step": 17220 }, { "grad_norm": 0.2842450737953186, "learning_rate": 8.539623665867187e-05, "loss": 0.011, "step": 17230 }, { "grad_norm": 0.39218488335609436, "learning_rate": 8.537676752184685e-05, "loss": 0.0109, "step": 17240 }, { "grad_norm": 0.2956484258174896, "learning_rate": 8.53572876384939e-05, "loss": 0.0086, "step": 17250 }, { "grad_norm": 0.23740901052951813, "learning_rate": 8.533779701453056e-05, "loss": 0.0105, "step": 17260 }, { "grad_norm": 0.3055787980556488, "learning_rate": 8.53182956558775e-05, "loss": 0.0095, "step": 17270 }, { "grad_norm": 0.20200638473033905, "learning_rate": 8.529878356845877e-05, "loss": 0.0087, "step": 17280 }, { "grad_norm": 0.24597354233264923, "learning_rate": 8.527926075820158e-05, "loss": 0.0079, "step": 17290 }, { "grad_norm": 0.22677156329154968, "learning_rate": 8.525972723103648e-05, "loss": 0.0094, "step": 17300 }, { "grad_norm": 0.29029905796051025, "learning_rate": 8.524018299289722e-05, "loss": 0.0083, "step": 17310 }, { "grad_norm": 0.2062239795923233, "learning_rate": 8.522062804972083e-05, "loss": 0.009, "step": 17320 }, { "grad_norm": 0.29048898816108704, "learning_rate": 8.520106240744759e-05, "loss": 0.0106, "step": 17330 }, { "grad_norm": 0.1675039380788803, "learning_rate": 8.518148607202102e-05, "loss": 0.0079, "step": 17340 }, { "grad_norm": 0.309238463640213, "learning_rate": 8.51618990493879e-05, "loss": 0.0106, "step": 17350 }, { "grad_norm": 0.20029228925704956, "learning_rate": 8.514230134549823e-05, "loss": 0.009, "step": 17360 }, { "grad_norm": 0.24057692289352417, "learning_rate": 8.51226929663053e-05, "loss": 0.0077, "step": 17370 }, { "grad_norm": 0.2233814150094986, "learning_rate": 8.51030739177656e-05, "loss": 0.0095, "step": 17380 }, { "grad_norm": 0.21940739452838898, "learning_rate": 8.508344420583889e-05, "loss": 0.0097, "step": 17390 }, { "grad_norm": 0.21646778285503387, "learning_rate": 8.506380383648816e-05, "loss": 0.0106, "step": 17400 }, { "grad_norm": 0.2844802439212799, "learning_rate": 8.504415281567963e-05, "loss": 0.0093, "step": 17410 }, { "grad_norm": 0.28693559765815735, "learning_rate": 8.502449114938275e-05, "loss": 0.0091, "step": 17420 }, { "grad_norm": 0.21802087128162384, "learning_rate": 8.500481884357025e-05, "loss": 0.0095, "step": 17430 }, { "grad_norm": 0.3267443776130676, "learning_rate": 8.498513590421801e-05, "loss": 0.0098, "step": 17440 }, { "grad_norm": 0.3616046905517578, "learning_rate": 8.496544233730522e-05, "loss": 0.0106, "step": 17450 }, { "grad_norm": 0.31874823570251465, "learning_rate": 8.494573814881426e-05, "loss": 0.0083, "step": 17460 }, { "grad_norm": 0.2720226049423218, "learning_rate": 8.492602334473074e-05, "loss": 0.0102, "step": 17470 }, { "grad_norm": 0.22996309399604797, "learning_rate": 8.49062979310435e-05, "loss": 0.0094, "step": 17480 }, { "grad_norm": 0.23823527991771698, "learning_rate": 8.488656191374458e-05, "loss": 0.0086, "step": 17490 }, { "grad_norm": 0.2258954793214798, "learning_rate": 8.48668152988293e-05, "loss": 0.0088, "step": 17500 }, { "grad_norm": 0.23459313809871674, "learning_rate": 8.484705809229612e-05, "loss": 0.009, "step": 17510 }, { "grad_norm": 0.213528111577034, "learning_rate": 8.482729030014677e-05, "loss": 0.0067, "step": 17520 }, { "grad_norm": 0.2736048400402069, "learning_rate": 8.48075119283862e-05, "loss": 0.0091, "step": 17530 }, { "grad_norm": 0.22485287487506866, "learning_rate": 8.478772298302254e-05, "loss": 0.0083, "step": 17540 }, { "grad_norm": 0.26154983043670654, "learning_rate": 8.476792347006716e-05, "loss": 0.0072, "step": 17550 }, { "grad_norm": 0.2836498022079468, "learning_rate": 8.474811339553462e-05, "loss": 0.0084, "step": 17560 }, { "grad_norm": 0.2520183324813843, "learning_rate": 8.47282927654427e-05, "loss": 0.009, "step": 17570 }, { "grad_norm": 0.31152215600013733, "learning_rate": 8.470846158581238e-05, "loss": 0.0089, "step": 17580 }, { "grad_norm": 0.21570228040218353, "learning_rate": 8.468861986266787e-05, "loss": 0.0092, "step": 17590 }, { "grad_norm": 0.21828025579452515, "learning_rate": 8.466876760203654e-05, "loss": 0.0071, "step": 17600 }, { "grad_norm": 0.25473111867904663, "learning_rate": 8.464890480994898e-05, "loss": 0.0085, "step": 17610 }, { "grad_norm": 0.2922549843788147, "learning_rate": 8.462903149243899e-05, "loss": 0.0079, "step": 17620 }, { "grad_norm": 0.28819790482521057, "learning_rate": 8.460914765554357e-05, "loss": 0.0095, "step": 17630 }, { "grad_norm": 0.2571139335632324, "learning_rate": 8.458925330530288e-05, "loss": 0.0086, "step": 17640 }, { "grad_norm": 0.2384311407804489, "learning_rate": 8.456934844776032e-05, "loss": 0.0071, "step": 17650 }, { "grad_norm": 0.19701038300991058, "learning_rate": 8.454943308896246e-05, "loss": 0.0062, "step": 17660 }, { "grad_norm": 0.22720931470394135, "learning_rate": 8.452950723495905e-05, "loss": 0.0086, "step": 17670 }, { "grad_norm": 0.24483327567577362, "learning_rate": 8.450957089180303e-05, "loss": 0.0109, "step": 17680 }, { "grad_norm": 0.2452455312013626, "learning_rate": 8.448962406555055e-05, "loss": 0.0095, "step": 17690 }, { "grad_norm": 0.36983492970466614, "learning_rate": 8.446966676226093e-05, "loss": 0.0101, "step": 17700 }, { "grad_norm": 0.2618146538734436, "learning_rate": 8.444969898799667e-05, "loss": 0.0088, "step": 17710 }, { "grad_norm": 0.3063323199748993, "learning_rate": 8.442972074882343e-05, "loss": 0.0104, "step": 17720 }, { "grad_norm": 0.34352055191993713, "learning_rate": 8.44097320508101e-05, "loss": 0.0089, "step": 17730 }, { "grad_norm": 0.24822193384170532, "learning_rate": 8.43897329000287e-05, "loss": 0.0112, "step": 17740 }, { "grad_norm": 0.2098926603794098, "learning_rate": 8.436972330255448e-05, "loss": 0.007, "step": 17750 }, { "grad_norm": 0.2527866065502167, "learning_rate": 8.434970326446579e-05, "loss": 0.0129, "step": 17760 }, { "grad_norm": 0.23664584755897522, "learning_rate": 8.432967279184418e-05, "loss": 0.0093, "step": 17770 }, { "grad_norm": 0.2556487023830414, "learning_rate": 8.430963189077441e-05, "loss": 0.0093, "step": 17780 }, { "grad_norm": 0.21195469796657562, "learning_rate": 8.428958056734437e-05, "loss": 0.0092, "step": 17790 }, { "grad_norm": 0.3128160238265991, "learning_rate": 8.426951882764513e-05, "loss": 0.009, "step": 17800 }, { "grad_norm": 0.31145206093788147, "learning_rate": 8.424944667777089e-05, "loss": 0.0111, "step": 17810 }, { "grad_norm": 0.25404298305511475, "learning_rate": 8.422936412381905e-05, "loss": 0.0095, "step": 17820 }, { "grad_norm": 0.2762012779712677, "learning_rate": 8.420927117189017e-05, "loss": 0.0079, "step": 17830 }, { "grad_norm": 0.2649284899234772, "learning_rate": 8.418916782808795e-05, "loss": 0.0091, "step": 17840 }, { "grad_norm": 0.21584245562553406, "learning_rate": 8.416905409851926e-05, "loss": 0.0078, "step": 17850 }, { "grad_norm": 0.19599056243896484, "learning_rate": 8.41489299892941e-05, "loss": 0.0082, "step": 17860 }, { "grad_norm": 0.3028583228588104, "learning_rate": 8.412879550652566e-05, "loss": 0.0095, "step": 17870 }, { "grad_norm": 0.3360619843006134, "learning_rate": 8.410865065633029e-05, "loss": 0.0075, "step": 17880 }, { "grad_norm": 0.26718395948410034, "learning_rate": 8.408849544482742e-05, "loss": 0.0099, "step": 17890 }, { "grad_norm": 0.2591950297355652, "learning_rate": 8.406832987813968e-05, "loss": 0.009, "step": 17900 }, { "grad_norm": 0.31409353017807007, "learning_rate": 8.404815396239286e-05, "loss": 0.0107, "step": 17910 }, { "grad_norm": 0.3196844756603241, "learning_rate": 8.402796770371587e-05, "loss": 0.011, "step": 17920 }, { "grad_norm": 0.24811924993991852, "learning_rate": 8.400777110824071e-05, "loss": 0.0082, "step": 17930 }, { "grad_norm": 0.2636547088623047, "learning_rate": 8.398756418210263e-05, "loss": 0.0106, "step": 17940 }, { "grad_norm": 0.20335087180137634, "learning_rate": 8.396734693143993e-05, "loss": 0.0075, "step": 17950 }, { "grad_norm": 0.2664903402328491, "learning_rate": 8.39471193623941e-05, "loss": 0.0115, "step": 17960 }, { "grad_norm": 0.21218733489513397, "learning_rate": 8.392688148110974e-05, "loss": 0.0092, "step": 17970 }, { "grad_norm": 0.28692907094955444, "learning_rate": 8.390663329373456e-05, "loss": 0.0087, "step": 17980 }, { "grad_norm": 0.28370440006256104, "learning_rate": 8.388637480641944e-05, "loss": 0.0101, "step": 17990 }, { "grad_norm": 0.29673174023628235, "learning_rate": 8.386610602531837e-05, "loss": 0.0079, "step": 18000 }, { "grad_norm": 0.22297346591949463, "learning_rate": 8.384582695658847e-05, "loss": 0.0092, "step": 18010 }, { "grad_norm": 0.3078713119029999, "learning_rate": 8.382553760638999e-05, "loss": 0.0077, "step": 18020 }, { "grad_norm": 0.22283561527729034, "learning_rate": 8.380523798088631e-05, "loss": 0.0079, "step": 18030 }, { "grad_norm": 0.2426093965768814, "learning_rate": 8.378492808624389e-05, "loss": 0.0083, "step": 18040 }, { "grad_norm": 0.2425232082605362, "learning_rate": 8.376460792863237e-05, "loss": 0.0092, "step": 18050 }, { "grad_norm": 0.2113635241985321, "learning_rate": 8.374427751422444e-05, "loss": 0.0106, "step": 18060 }, { "grad_norm": 0.2851988673210144, "learning_rate": 8.3723936849196e-05, "loss": 0.0106, "step": 18070 }, { "grad_norm": 0.316919207572937, "learning_rate": 8.370358593972595e-05, "loss": 0.0079, "step": 18080 }, { "grad_norm": 0.2205875962972641, "learning_rate": 8.36832247919964e-05, "loss": 0.0094, "step": 18090 }, { "grad_norm": 0.21108581125736237, "learning_rate": 8.36628534121925e-05, "loss": 0.0091, "step": 18100 }, { "grad_norm": 0.21722665429115295, "learning_rate": 8.364247180650254e-05, "loss": 0.0081, "step": 18110 }, { "grad_norm": 0.2520598769187927, "learning_rate": 8.362207998111794e-05, "loss": 0.0124, "step": 18120 }, { "grad_norm": 0.26642319560050964, "learning_rate": 8.360167794223318e-05, "loss": 0.0095, "step": 18130 }, { "grad_norm": 0.21216467022895813, "learning_rate": 8.358126569604586e-05, "loss": 0.0068, "step": 18140 }, { "grad_norm": 0.23055925965309143, "learning_rate": 8.356084324875668e-05, "loss": 0.0071, "step": 18150 }, { "grad_norm": 0.2907140552997589, "learning_rate": 8.354041060656945e-05, "loss": 0.0085, "step": 18160 }, { "grad_norm": 0.26798176765441895, "learning_rate": 8.351996777569106e-05, "loss": 0.0079, "step": 18170 }, { "grad_norm": 0.2686067521572113, "learning_rate": 8.349951476233148e-05, "loss": 0.0091, "step": 18180 }, { "grad_norm": 0.2189292162656784, "learning_rate": 8.347905157270386e-05, "loss": 0.0093, "step": 18190 }, { "grad_norm": 0.28690341114997864, "learning_rate": 8.345857821302432e-05, "loss": 0.0097, "step": 18200 }, { "grad_norm": 0.26576632261276245, "learning_rate": 8.343809468951213e-05, "loss": 0.0091, "step": 18210 }, { "grad_norm": 0.25923672318458557, "learning_rate": 8.341760100838965e-05, "loss": 0.0085, "step": 18220 }, { "grad_norm": 0.24061141908168793, "learning_rate": 8.339709717588233e-05, "loss": 0.0097, "step": 18230 }, { "grad_norm": 0.2769416272640228, "learning_rate": 8.33765831982187e-05, "loss": 0.0104, "step": 18240 }, { "grad_norm": 0.2558574080467224, "learning_rate": 8.335605908163035e-05, "loss": 0.0099, "step": 18250 }, { "grad_norm": 0.2816294729709625, "learning_rate": 8.333552483235196e-05, "loss": 0.0093, "step": 18260 }, { "grad_norm": 0.3014129400253296, "learning_rate": 8.33149804566213e-05, "loss": 0.0096, "step": 18270 }, { "grad_norm": 0.223966583609581, "learning_rate": 8.329442596067921e-05, "loss": 0.0097, "step": 18280 }, { "grad_norm": 0.22248557209968567, "learning_rate": 8.32738613507696e-05, "loss": 0.0081, "step": 18290 }, { "grad_norm": 0.20519164204597473, "learning_rate": 8.325328663313946e-05, "loss": 0.008, "step": 18300 }, { "grad_norm": 0.3648359179496765, "learning_rate": 8.323270181403884e-05, "loss": 0.0125, "step": 18310 }, { "grad_norm": 0.23096631467342377, "learning_rate": 8.321210689972086e-05, "loss": 0.0083, "step": 18320 }, { "grad_norm": 0.2694459557533264, "learning_rate": 8.319150189644174e-05, "loss": 0.0119, "step": 18330 }, { "grad_norm": 0.23120594024658203, "learning_rate": 8.31708868104607e-05, "loss": 0.0088, "step": 18340 }, { "grad_norm": 0.2610611319541931, "learning_rate": 8.315026164804007e-05, "loss": 0.009, "step": 18350 }, { "grad_norm": 0.30400434136390686, "learning_rate": 8.312962641544524e-05, "loss": 0.0082, "step": 18360 }, { "grad_norm": 0.2520270347595215, "learning_rate": 8.310898111894465e-05, "loss": 0.0077, "step": 18370 }, { "grad_norm": 0.25413277745246887, "learning_rate": 8.308832576480977e-05, "loss": 0.0093, "step": 18380 }, { "grad_norm": 0.21032068133354187, "learning_rate": 8.306766035931519e-05, "loss": 0.007, "step": 18390 }, { "grad_norm": 0.24193617701530457, "learning_rate": 8.304698490873847e-05, "loss": 0.0078, "step": 18400 }, { "grad_norm": 0.33122915029525757, "learning_rate": 8.30262994193603e-05, "loss": 0.0082, "step": 18410 }, { "grad_norm": 0.22571302950382233, "learning_rate": 8.300560389746438e-05, "loss": 0.0082, "step": 18420 }, { "grad_norm": 0.21970698237419128, "learning_rate": 8.298489834933745e-05, "loss": 0.0094, "step": 18430 }, { "grad_norm": 0.23111294209957123, "learning_rate": 8.296418278126934e-05, "loss": 0.01, "step": 18440 }, { "grad_norm": 0.2237575501203537, "learning_rate": 8.294345719955284e-05, "loss": 0.0098, "step": 18450 }, { "grad_norm": 0.18173182010650635, "learning_rate": 8.29227216104839e-05, "loss": 0.0074, "step": 18460 }, { "grad_norm": 0.223158597946167, "learning_rate": 8.290197602036137e-05, "loss": 0.0078, "step": 18470 }, { "grad_norm": 0.21327970921993256, "learning_rate": 8.288122043548725e-05, "loss": 0.0073, "step": 18480 }, { "grad_norm": 0.2590601146221161, "learning_rate": 8.286045486216657e-05, "loss": 0.0099, "step": 18490 }, { "grad_norm": 0.33284223079681396, "learning_rate": 8.283967930670733e-05, "loss": 0.0084, "step": 18500 }, { "grad_norm": 0.2591264843940735, "learning_rate": 8.281889377542058e-05, "loss": 0.0087, "step": 18510 }, { "grad_norm": 0.2607881426811218, "learning_rate": 8.279809827462045e-05, "loss": 0.008, "step": 18520 }, { "grad_norm": 0.25031501054763794, "learning_rate": 8.277729281062402e-05, "loss": 0.0075, "step": 18530 }, { "grad_norm": 0.28451529145240784, "learning_rate": 8.27564773897515e-05, "loss": 0.0086, "step": 18540 }, { "grad_norm": 0.27966487407684326, "learning_rate": 8.273565201832602e-05, "loss": 0.0094, "step": 18550 }, { "grad_norm": 0.3083644211292267, "learning_rate": 8.27148167026738e-05, "loss": 0.0084, "step": 18560 }, { "grad_norm": 0.3163197338581085, "learning_rate": 8.269397144912405e-05, "loss": 0.0083, "step": 18570 }, { "grad_norm": 0.21488220989704132, "learning_rate": 8.267311626400899e-05, "loss": 0.0069, "step": 18580 }, { "grad_norm": 0.26235413551330566, "learning_rate": 8.26522511536639e-05, "loss": 0.0082, "step": 18590 }, { "grad_norm": 0.2428647130727768, "learning_rate": 8.263137612442706e-05, "loss": 0.0077, "step": 18600 }, { "grad_norm": 0.26525843143463135, "learning_rate": 8.261049118263971e-05, "loss": 0.0098, "step": 18610 }, { "grad_norm": 0.22844119369983673, "learning_rate": 8.258959633464619e-05, "loss": 0.0097, "step": 18620 }, { "grad_norm": 0.2765507996082306, "learning_rate": 8.256869158679377e-05, "loss": 0.0096, "step": 18630 }, { "grad_norm": 0.19514387845993042, "learning_rate": 8.254777694543278e-05, "loss": 0.0071, "step": 18640 }, { "grad_norm": 0.2237156480550766, "learning_rate": 8.252685241691651e-05, "loss": 0.0076, "step": 18650 }, { "grad_norm": 0.18528668582439423, "learning_rate": 8.250591800760133e-05, "loss": 0.0075, "step": 18660 }, { "grad_norm": 0.2788935899734497, "learning_rate": 8.248497372384649e-05, "loss": 0.0077, "step": 18670 }, { "grad_norm": 0.30191946029663086, "learning_rate": 8.246401957201437e-05, "loss": 0.0105, "step": 18680 }, { "grad_norm": 0.2995465099811554, "learning_rate": 8.244305555847027e-05, "loss": 0.0089, "step": 18690 }, { "grad_norm": 0.2559933662414551, "learning_rate": 8.24220816895825e-05, "loss": 0.0084, "step": 18700 }, { "grad_norm": 0.30890005826950073, "learning_rate": 8.240109797172237e-05, "loss": 0.0077, "step": 18710 }, { "grad_norm": 0.1988600343465805, "learning_rate": 8.238010441126416e-05, "loss": 0.0075, "step": 18720 }, { "grad_norm": 0.19257709383964539, "learning_rate": 8.23591010145852e-05, "loss": 0.01, "step": 18730 }, { "grad_norm": 0.24763116240501404, "learning_rate": 8.233808778806571e-05, "loss": 0.0079, "step": 18740 }, { "grad_norm": 0.29485538601875305, "learning_rate": 8.231706473808903e-05, "loss": 0.0073, "step": 18750 }, { "grad_norm": 0.24101503193378448, "learning_rate": 8.229603187104133e-05, "loss": 0.0057, "step": 18760 }, { "grad_norm": 0.20755937695503235, "learning_rate": 8.22749891933119e-05, "loss": 0.0067, "step": 18770 }, { "grad_norm": 0.25986117124557495, "learning_rate": 8.225393671129291e-05, "loss": 0.0079, "step": 18780 }, { "grad_norm": 0.2040674239397049, "learning_rate": 8.223287443137957e-05, "loss": 0.0068, "step": 18790 }, { "grad_norm": 0.24269916117191315, "learning_rate": 8.221180235997004e-05, "loss": 0.0082, "step": 18800 }, { "grad_norm": 0.1868121176958084, "learning_rate": 8.219072050346544e-05, "loss": 0.0075, "step": 18810 }, { "grad_norm": 0.27327829599380493, "learning_rate": 8.216962886826992e-05, "loss": 0.0103, "step": 18820 }, { "grad_norm": 0.2937210500240326, "learning_rate": 8.214852746079054e-05, "loss": 0.0094, "step": 18830 }, { "grad_norm": 0.27595123648643494, "learning_rate": 8.212741628743732e-05, "loss": 0.0085, "step": 18840 }, { "grad_norm": 0.20546695590019226, "learning_rate": 8.210629535462333e-05, "loss": 0.0083, "step": 18850 }, { "grad_norm": 0.32897746562957764, "learning_rate": 8.208516466876453e-05, "loss": 0.0075, "step": 18860 }, { "grad_norm": 0.2310360074043274, "learning_rate": 8.206402423627986e-05, "loss": 0.0073, "step": 18870 }, { "grad_norm": 0.23503150045871735, "learning_rate": 8.204287406359124e-05, "loss": 0.0093, "step": 18880 }, { "grad_norm": 0.30840203166007996, "learning_rate": 8.20217141571235e-05, "loss": 0.0113, "step": 18890 }, { "grad_norm": 0.25985071063041687, "learning_rate": 8.200054452330449e-05, "loss": 0.0076, "step": 18900 }, { "grad_norm": 0.24178855121135712, "learning_rate": 8.197936516856499e-05, "loss": 0.008, "step": 18910 }, { "grad_norm": 0.28449511528015137, "learning_rate": 8.195817609933871e-05, "loss": 0.0082, "step": 18920 }, { "grad_norm": 0.2819555401802063, "learning_rate": 8.193697732206233e-05, "loss": 0.0088, "step": 18930 }, { "grad_norm": 0.30575448274612427, "learning_rate": 8.19157688431755e-05, "loss": 0.0082, "step": 18940 }, { "grad_norm": 0.23637647926807404, "learning_rate": 8.189455066912077e-05, "loss": 0.0096, "step": 18950 }, { "grad_norm": 0.1974545419216156, "learning_rate": 8.187332280634369e-05, "loss": 0.0099, "step": 18960 }, { "grad_norm": 0.33112841844558716, "learning_rate": 8.18520852612927e-05, "loss": 0.0085, "step": 18970 }, { "grad_norm": 0.25162628293037415, "learning_rate": 8.183083804041921e-05, "loss": 0.0087, "step": 18980 }, { "grad_norm": 0.22467279434204102, "learning_rate": 8.180958115017757e-05, "loss": 0.0089, "step": 18990 }, { "grad_norm": 0.2644553780555725, "learning_rate": 8.178831459702505e-05, "loss": 0.0094, "step": 19000 }, { "grad_norm": 0.2908259332180023, "learning_rate": 8.17670383874219e-05, "loss": 0.0078, "step": 19010 }, { "grad_norm": 0.22885648906230927, "learning_rate": 8.174575252783124e-05, "loss": 0.0075, "step": 19020 }, { "grad_norm": 0.26460787653923035, "learning_rate": 8.172445702471914e-05, "loss": 0.0094, "step": 19030 }, { "grad_norm": 0.23526042699813843, "learning_rate": 8.170315188455466e-05, "loss": 0.007, "step": 19040 }, { "grad_norm": 0.24561813473701477, "learning_rate": 8.168183711380969e-05, "loss": 0.008, "step": 19050 }, { "grad_norm": 0.239788219332695, "learning_rate": 8.166051271895913e-05, "loss": 0.0072, "step": 19060 }, { "grad_norm": 0.22616542875766754, "learning_rate": 8.163917870648075e-05, "loss": 0.0089, "step": 19070 }, { "grad_norm": 0.2480158805847168, "learning_rate": 8.161783508285526e-05, "loss": 0.0112, "step": 19080 }, { "grad_norm": 0.18843553960323334, "learning_rate": 8.159648185456628e-05, "loss": 0.0083, "step": 19090 }, { "grad_norm": 0.2588503360748291, "learning_rate": 8.157511902810038e-05, "loss": 0.0085, "step": 19100 }, { "grad_norm": 0.24212302267551422, "learning_rate": 8.155374660994701e-05, "loss": 0.0078, "step": 19110 }, { "grad_norm": 0.25631436705589294, "learning_rate": 8.153236460659857e-05, "loss": 0.0087, "step": 19120 }, { "grad_norm": 0.22890721261501312, "learning_rate": 8.151097302455031e-05, "loss": 0.0069, "step": 19130 }, { "grad_norm": 0.22997477650642395, "learning_rate": 8.148957187030044e-05, "loss": 0.0076, "step": 19140 }, { "grad_norm": 0.293473482131958, "learning_rate": 8.146816115035006e-05, "loss": 0.0076, "step": 19150 }, { "grad_norm": 0.23699471354484558, "learning_rate": 8.14467408712032e-05, "loss": 0.0066, "step": 19160 }, { "grad_norm": 0.1930815726518631, "learning_rate": 8.142531103936678e-05, "loss": 0.0075, "step": 19170 }, { "grad_norm": 0.25631794333457947, "learning_rate": 8.14038716613506e-05, "loss": 0.0096, "step": 19180 }, { "grad_norm": 0.2409273236989975, "learning_rate": 8.138242274366736e-05, "loss": 0.0092, "step": 19190 }, { "grad_norm": 0.22028669714927673, "learning_rate": 8.136096429283271e-05, "loss": 0.0077, "step": 19200 }, { "grad_norm": 0.20971792936325073, "learning_rate": 8.133949631536515e-05, "loss": 0.0066, "step": 19210 }, { "grad_norm": 0.18486718833446503, "learning_rate": 8.131801881778607e-05, "loss": 0.0059, "step": 19220 }, { "grad_norm": 0.25684654712677, "learning_rate": 8.129653180661978e-05, "loss": 0.008, "step": 19230 }, { "grad_norm": 0.21516060829162598, "learning_rate": 8.127503528839346e-05, "loss": 0.0074, "step": 19240 }, { "grad_norm": 0.30441102385520935, "learning_rate": 8.125352926963721e-05, "loss": 0.01, "step": 19250 }, { "grad_norm": 0.2278432697057724, "learning_rate": 8.123201375688395e-05, "loss": 0.0076, "step": 19260 }, { "grad_norm": 0.20663349330425262, "learning_rate": 8.121048875666954e-05, "loss": 0.0075, "step": 19270 }, { "grad_norm": 0.30783334374427795, "learning_rate": 8.118895427553274e-05, "loss": 0.01, "step": 19280 }, { "grad_norm": 0.23236003518104553, "learning_rate": 8.116741032001511e-05, "loss": 0.0086, "step": 19290 }, { "grad_norm": 0.2601412534713745, "learning_rate": 8.114585689666114e-05, "loss": 0.0064, "step": 19300 }, { "grad_norm": 0.18265371024608612, "learning_rate": 8.112429401201821e-05, "loss": 0.0068, "step": 19310 }, { "grad_norm": 0.24811069667339325, "learning_rate": 8.110272167263656e-05, "loss": 0.0083, "step": 19320 }, { "grad_norm": 0.23625928163528442, "learning_rate": 8.108113988506929e-05, "loss": 0.0102, "step": 19330 }, { "grad_norm": 0.23172149062156677, "learning_rate": 8.105954865587235e-05, "loss": 0.0095, "step": 19340 }, { "grad_norm": 0.23441234230995178, "learning_rate": 8.103794799160463e-05, "loss": 0.0077, "step": 19350 }, { "grad_norm": 0.2271808683872223, "learning_rate": 8.101633789882781e-05, "loss": 0.0085, "step": 19360 }, { "grad_norm": 0.2427125722169876, "learning_rate": 8.099471838410648e-05, "loss": 0.0074, "step": 19370 }, { "grad_norm": 0.2313302904367447, "learning_rate": 8.097308945400806e-05, "loss": 0.0084, "step": 19380 }, { "grad_norm": 0.2358667105436325, "learning_rate": 8.095145111510288e-05, "loss": 0.0093, "step": 19390 }, { "grad_norm": 0.20756182074546814, "learning_rate": 8.092980337396406e-05, "loss": 0.0086, "step": 19400 }, { "grad_norm": 0.2865169048309326, "learning_rate": 8.090814623716763e-05, "loss": 0.0109, "step": 19410 }, { "grad_norm": 0.27583447098731995, "learning_rate": 8.088647971129246e-05, "loss": 0.007, "step": 19420 }, { "grad_norm": 0.2980212867259979, "learning_rate": 8.086480380292026e-05, "loss": 0.0082, "step": 19430 }, { "grad_norm": 0.21550731360912323, "learning_rate": 8.084311851863562e-05, "loss": 0.0076, "step": 19440 }, { "grad_norm": 0.2747206389904022, "learning_rate": 8.082142386502591e-05, "loss": 0.0076, "step": 19450 }, { "grad_norm": 0.3016121983528137, "learning_rate": 8.079971984868145e-05, "loss": 0.0071, "step": 19460 }, { "grad_norm": 0.3915133774280548, "learning_rate": 8.077800647619532e-05, "loss": 0.0082, "step": 19470 }, { "grad_norm": 0.234879732131958, "learning_rate": 8.075628375416345e-05, "loss": 0.0097, "step": 19480 }, { "grad_norm": 0.23224681615829468, "learning_rate": 8.073455168918464e-05, "loss": 0.007, "step": 19490 }, { "grad_norm": 0.23529082536697388, "learning_rate": 8.071281028786055e-05, "loss": 0.0065, "step": 19500 }, { "grad_norm": 0.24942827224731445, "learning_rate": 8.069105955679562e-05, "loss": 0.0095, "step": 19510 }, { "grad_norm": 0.20556573569774628, "learning_rate": 8.066929950259713e-05, "loss": 0.0067, "step": 19520 }, { "grad_norm": 0.25004634261131287, "learning_rate": 8.064753013187522e-05, "loss": 0.0081, "step": 19530 }, { "grad_norm": 0.24985705316066742, "learning_rate": 8.062575145124289e-05, "loss": 0.0071, "step": 19540 }, { "grad_norm": 0.28442367911338806, "learning_rate": 8.060396346731587e-05, "loss": 0.0086, "step": 19550 }, { "grad_norm": 0.2917954623699188, "learning_rate": 8.058216618671281e-05, "loss": 0.0079, "step": 19560 }, { "grad_norm": 0.2235489934682846, "learning_rate": 8.056035961605514e-05, "loss": 0.0089, "step": 19570 }, { "grad_norm": 0.2291390597820282, "learning_rate": 8.05385437619671e-05, "loss": 0.0058, "step": 19580 }, { "grad_norm": 0.26902198791503906, "learning_rate": 8.05167186310758e-05, "loss": 0.0095, "step": 19590 }, { "grad_norm": 0.2794935703277588, "learning_rate": 8.049488423001113e-05, "loss": 0.009, "step": 19600 }, { "grad_norm": 0.24435113370418549, "learning_rate": 8.047304056540581e-05, "loss": 0.0074, "step": 19610 }, { "grad_norm": 0.2055334597826004, "learning_rate": 8.045118764389534e-05, "loss": 0.0085, "step": 19620 }, { "grad_norm": 0.17402079701423645, "learning_rate": 8.042932547211809e-05, "loss": 0.0065, "step": 19630 }, { "grad_norm": 0.22251446545124054, "learning_rate": 8.04074540567152e-05, "loss": 0.0092, "step": 19640 }, { "grad_norm": 0.25850194692611694, "learning_rate": 8.038557340433063e-05, "loss": 0.0074, "step": 19650 }, { "grad_norm": 0.2445290982723236, "learning_rate": 8.036368352161115e-05, "loss": 0.0075, "step": 19660 }, { "grad_norm": 0.21069346368312836, "learning_rate": 8.034178441520633e-05, "loss": 0.0077, "step": 19670 }, { "grad_norm": 0.2779119908809662, "learning_rate": 8.031987609176852e-05, "loss": 0.0094, "step": 19680 }, { "grad_norm": 0.20738078653812408, "learning_rate": 8.02979585579529e-05, "loss": 0.0071, "step": 19690 }, { "grad_norm": 0.2439897060394287, "learning_rate": 8.027603182041745e-05, "loss": 0.0065, "step": 19700 }, { "grad_norm": 0.2629689872264862, "learning_rate": 8.025409588582292e-05, "loss": 0.0089, "step": 19710 }, { "grad_norm": 0.2789883315563202, "learning_rate": 8.023215076083288e-05, "loss": 0.0082, "step": 19720 }, { "grad_norm": 0.22468744218349457, "learning_rate": 8.021019645211367e-05, "loss": 0.0107, "step": 19730 }, { "grad_norm": 0.22162243723869324, "learning_rate": 8.018823296633441e-05, "loss": 0.0081, "step": 19740 }, { "grad_norm": 0.255220502614975, "learning_rate": 8.016626031016708e-05, "loss": 0.0073, "step": 19750 }, { "grad_norm": 0.1920865923166275, "learning_rate": 8.014427849028636e-05, "loss": 0.0062, "step": 19760 }, { "grad_norm": 0.3471209406852722, "learning_rate": 8.012228751336974e-05, "loss": 0.0097, "step": 19770 }, { "grad_norm": 0.2116909772157669, "learning_rate": 8.01002873860975e-05, "loss": 0.0079, "step": 19780 }, { "grad_norm": 0.285729318857193, "learning_rate": 8.00782781151527e-05, "loss": 0.0078, "step": 19790 }, { "grad_norm": 0.18220259249210358, "learning_rate": 8.005625970722119e-05, "loss": 0.007, "step": 19800 }, { "grad_norm": 0.2763148248195648, "learning_rate": 8.003423216899158e-05, "loss": 0.0074, "step": 19810 }, { "grad_norm": 0.29228675365448, "learning_rate": 8.001219550715522e-05, "loss": 0.0083, "step": 19820 }, { "grad_norm": 0.23517456650733948, "learning_rate": 7.999014972840632e-05, "loss": 0.0087, "step": 19830 }, { "grad_norm": 0.3168605864048004, "learning_rate": 7.996809483944174e-05, "loss": 0.0084, "step": 19840 }, { "grad_norm": 0.26795217394828796, "learning_rate": 7.994603084696124e-05, "loss": 0.0075, "step": 19850 }, { "grad_norm": 0.20814606547355652, "learning_rate": 7.992395775766724e-05, "loss": 0.0085, "step": 19860 }, { "grad_norm": 0.24424220621585846, "learning_rate": 7.990187557826497e-05, "loss": 0.009, "step": 19870 }, { "grad_norm": 0.2510085701942444, "learning_rate": 7.987978431546242e-05, "loss": 0.0089, "step": 19880 }, { "grad_norm": 0.25798335671424866, "learning_rate": 7.985768397597031e-05, "loss": 0.0088, "step": 19890 }, { "grad_norm": 0.24623669683933258, "learning_rate": 7.983557456650216e-05, "loss": 0.0089, "step": 19900 }, { "grad_norm": 0.3170531094074249, "learning_rate": 7.981345609377422e-05, "loss": 0.0109, "step": 19910 }, { "grad_norm": 0.2596462666988373, "learning_rate": 7.97913285645055e-05, "loss": 0.0077, "step": 19920 }, { "grad_norm": 0.25415122509002686, "learning_rate": 7.976919198541776e-05, "loss": 0.007, "step": 19930 }, { "grad_norm": 0.21104435622692108, "learning_rate": 7.974704636323548e-05, "loss": 0.0081, "step": 19940 }, { "grad_norm": 0.2091863602399826, "learning_rate": 7.972489170468597e-05, "loss": 0.0067, "step": 19950 }, { "grad_norm": 0.31201303005218506, "learning_rate": 7.970272801649918e-05, "loss": 0.008, "step": 19960 }, { "grad_norm": 0.2933281362056732, "learning_rate": 7.96805553054079e-05, "loss": 0.0096, "step": 19970 }, { "grad_norm": 0.2672351896762848, "learning_rate": 7.965837357814756e-05, "loss": 0.008, "step": 19980 }, { "grad_norm": 0.22798676788806915, "learning_rate": 7.963618284145643e-05, "loss": 0.0073, "step": 19990 }, { "grad_norm": 0.2415589839220047, "learning_rate": 7.961398310207544e-05, "loss": 0.0081, "step": 20000 }, { "grad_norm": 0.2610226273536682, "learning_rate": 7.95917743667483e-05, "loss": 0.011, "step": 20010 }, { "grad_norm": 0.2124694585800171, "learning_rate": 7.956955664222144e-05, "loss": 0.0085, "step": 20020 }, { "grad_norm": 0.16482801735401154, "learning_rate": 7.954732993524399e-05, "loss": 0.0079, "step": 20030 }, { "grad_norm": 0.25093182921409607, "learning_rate": 7.952509425256786e-05, "loss": 0.0067, "step": 20040 }, { "grad_norm": 0.1770930290222168, "learning_rate": 7.950284960094767e-05, "loss": 0.0065, "step": 20050 }, { "grad_norm": 0.18716323375701904, "learning_rate": 7.948059598714076e-05, "loss": 0.008, "step": 20060 }, { "grad_norm": 0.3074689209461212, "learning_rate": 7.945833341790717e-05, "loss": 0.0089, "step": 20070 }, { "grad_norm": 0.20486070215702057, "learning_rate": 7.94360619000097e-05, "loss": 0.0077, "step": 20080 }, { "grad_norm": 0.26866084337234497, "learning_rate": 7.941378144021381e-05, "loss": 0.0099, "step": 20090 }, { "grad_norm": 0.2770794928073883, "learning_rate": 7.939149204528777e-05, "loss": 0.0119, "step": 20100 }, { "grad_norm": 0.22791442275047302, "learning_rate": 7.936919372200246e-05, "loss": 0.0101, "step": 20110 }, { "grad_norm": 0.31353890895843506, "learning_rate": 7.934688647713158e-05, "loss": 0.0106, "step": 20120 }, { "grad_norm": 0.28576356172561646, "learning_rate": 7.932457031745143e-05, "loss": 0.0095, "step": 20130 }, { "grad_norm": 0.3792745769023895, "learning_rate": 7.930224524974108e-05, "loss": 0.0099, "step": 20140 }, { "grad_norm": 0.27687785029411316, "learning_rate": 7.927991128078232e-05, "loss": 0.0101, "step": 20150 }, { "grad_norm": 0.30250903964042664, "learning_rate": 7.925756841735958e-05, "loss": 0.0082, "step": 20160 }, { "grad_norm": 0.24277298152446747, "learning_rate": 7.923521666626008e-05, "loss": 0.0074, "step": 20170 }, { "grad_norm": 0.23286497592926025, "learning_rate": 7.921285603427366e-05, "loss": 0.0076, "step": 20180 }, { "grad_norm": 0.20044639706611633, "learning_rate": 7.91904865281929e-05, "loss": 0.0082, "step": 20190 }, { "grad_norm": 0.22045907378196716, "learning_rate": 7.916810815481307e-05, "loss": 0.0073, "step": 20200 }, { "grad_norm": 0.21045024693012238, "learning_rate": 7.914572092093211e-05, "loss": 0.0078, "step": 20210 }, { "grad_norm": 0.2371012568473816, "learning_rate": 7.912332483335068e-05, "loss": 0.0098, "step": 20220 }, { "grad_norm": 0.21180227398872375, "learning_rate": 7.910091989887213e-05, "loss": 0.0072, "step": 20230 }, { "grad_norm": 0.2546599507331848, "learning_rate": 7.907850612430248e-05, "loss": 0.007, "step": 20240 }, { "grad_norm": 0.28301095962524414, "learning_rate": 7.905608351645044e-05, "loss": 0.0085, "step": 20250 }, { "grad_norm": 0.24904896318912506, "learning_rate": 7.90336520821274e-05, "loss": 0.0103, "step": 20260 }, { "grad_norm": 0.25038453936576843, "learning_rate": 7.901121182814746e-05, "loss": 0.0078, "step": 20270 }, { "grad_norm": 0.26120755076408386, "learning_rate": 7.898876276132736e-05, "loss": 0.0076, "step": 20280 }, { "grad_norm": 0.3288372755050659, "learning_rate": 7.896630488848654e-05, "loss": 0.0074, "step": 20290 }, { "grad_norm": 0.23615920543670654, "learning_rate": 7.89438382164471e-05, "loss": 0.0071, "step": 20300 }, { "grad_norm": 0.3450459837913513, "learning_rate": 7.892136275203383e-05, "loss": 0.0108, "step": 20310 }, { "grad_norm": 0.23624712228775024, "learning_rate": 7.889887850207418e-05, "loss": 0.0082, "step": 20320 }, { "grad_norm": 0.22624339163303375, "learning_rate": 7.887638547339827e-05, "loss": 0.0088, "step": 20330 }, { "grad_norm": 0.21127496659755707, "learning_rate": 7.885388367283891e-05, "loss": 0.0083, "step": 20340 }, { "grad_norm": 0.24286557734012604, "learning_rate": 7.88313731072315e-05, "loss": 0.0101, "step": 20350 }, { "grad_norm": 0.2506529986858368, "learning_rate": 7.88088537834142e-05, "loss": 0.0093, "step": 20360 }, { "grad_norm": 0.238164484500885, "learning_rate": 7.878632570822778e-05, "loss": 0.0092, "step": 20370 }, { "grad_norm": 0.23400647938251495, "learning_rate": 7.876378888851567e-05, "loss": 0.0091, "step": 20380 }, { "grad_norm": 0.21203789114952087, "learning_rate": 7.874124333112396e-05, "loss": 0.0104, "step": 20390 }, { "grad_norm": 0.24512746930122375, "learning_rate": 7.871868904290138e-05, "loss": 0.0091, "step": 20400 }, { "grad_norm": 0.28832200169563293, "learning_rate": 7.869612603069935e-05, "loss": 0.009, "step": 20410 }, { "grad_norm": 0.24954824149608612, "learning_rate": 7.867355430137192e-05, "loss": 0.0059, "step": 20420 }, { "grad_norm": 0.17843236029148102, "learning_rate": 7.865097386177577e-05, "loss": 0.0057, "step": 20430 }, { "grad_norm": 0.21828846633434296, "learning_rate": 7.862838471877023e-05, "loss": 0.008, "step": 20440 }, { "grad_norm": 0.2021966427564621, "learning_rate": 7.860578687921731e-05, "loss": 0.0065, "step": 20450 }, { "grad_norm": 0.17848709225654602, "learning_rate": 7.858318034998164e-05, "loss": 0.0077, "step": 20460 }, { "grad_norm": 0.2623528242111206, "learning_rate": 7.856056513793046e-05, "loss": 0.0076, "step": 20470 }, { "grad_norm": 0.1966804414987564, "learning_rate": 7.85379412499337e-05, "loss": 0.0088, "step": 20480 }, { "grad_norm": 0.2642080783843994, "learning_rate": 7.851530869286389e-05, "loss": 0.0093, "step": 20490 }, { "grad_norm": 0.2431679666042328, "learning_rate": 7.849266747359619e-05, "loss": 0.0082, "step": 20500 }, { "grad_norm": 0.23049116134643555, "learning_rate": 7.847001759900843e-05, "loss": 0.0068, "step": 20510 }, { "grad_norm": 0.172549307346344, "learning_rate": 7.844735907598102e-05, "loss": 0.0112, "step": 20520 }, { "grad_norm": 0.22782915830612183, "learning_rate": 7.842469191139703e-05, "loss": 0.0105, "step": 20530 }, { "grad_norm": 0.22819611430168152, "learning_rate": 7.840201611214215e-05, "loss": 0.0064, "step": 20540 }, { "grad_norm": 0.2272072434425354, "learning_rate": 7.837933168510469e-05, "loss": 0.0088, "step": 20550 }, { "grad_norm": 0.21519364416599274, "learning_rate": 7.835663863717559e-05, "loss": 0.0097, "step": 20560 }, { "grad_norm": 0.3380639851093292, "learning_rate": 7.833393697524838e-05, "loss": 0.0079, "step": 20570 }, { "grad_norm": 0.32779932022094727, "learning_rate": 7.831122670621922e-05, "loss": 0.0101, "step": 20580 }, { "grad_norm": 0.26737362146377563, "learning_rate": 7.82885078369869e-05, "loss": 0.0077, "step": 20590 }, { "grad_norm": 0.1945435255765915, "learning_rate": 7.826578037445283e-05, "loss": 0.0068, "step": 20600 }, { "grad_norm": 0.22408734261989594, "learning_rate": 7.824304432552097e-05, "loss": 0.0074, "step": 20610 }, { "grad_norm": 0.17780053615570068, "learning_rate": 7.822029969709798e-05, "loss": 0.006, "step": 20620 }, { "grad_norm": 0.2417307198047638, "learning_rate": 7.819754649609306e-05, "loss": 0.0094, "step": 20630 }, { "grad_norm": 0.28193891048431396, "learning_rate": 7.817478472941802e-05, "loss": 0.0079, "step": 20640 }, { "grad_norm": 0.3269716501235962, "learning_rate": 7.815201440398727e-05, "loss": 0.0093, "step": 20650 }, { "grad_norm": 0.24986889958381653, "learning_rate": 7.812923552671789e-05, "loss": 0.0098, "step": 20660 }, { "grad_norm": 0.22790886461734772, "learning_rate": 7.810644810452945e-05, "loss": 0.0086, "step": 20670 }, { "grad_norm": 0.21844862401485443, "learning_rate": 7.808365214434417e-05, "loss": 0.0079, "step": 20680 }, { "grad_norm": 0.29999908804893494, "learning_rate": 7.80608476530869e-05, "loss": 0.0076, "step": 20690 }, { "grad_norm": 0.2469121366739273, "learning_rate": 7.8038034637685e-05, "loss": 0.009, "step": 20700 }, { "grad_norm": 0.22816407680511475, "learning_rate": 7.801521310506848e-05, "loss": 0.0076, "step": 20710 }, { "grad_norm": 0.23810826241970062, "learning_rate": 7.799238306216994e-05, "loss": 0.0079, "step": 20720 }, { "grad_norm": 0.27262425422668457, "learning_rate": 7.796954451592448e-05, "loss": 0.0095, "step": 20730 }, { "grad_norm": 0.2820528745651245, "learning_rate": 7.794669747326992e-05, "loss": 0.0107, "step": 20740 }, { "grad_norm": 0.2760917842388153, "learning_rate": 7.792384194114654e-05, "loss": 0.0085, "step": 20750 }, { "grad_norm": 0.21847929060459137, "learning_rate": 7.790097792649729e-05, "loss": 0.008, "step": 20760 }, { "grad_norm": 0.2442878782749176, "learning_rate": 7.787810543626762e-05, "loss": 0.0106, "step": 20770 }, { "grad_norm": 0.18228621780872345, "learning_rate": 7.785522447740558e-05, "loss": 0.0062, "step": 20780 }, { "grad_norm": 0.2158331573009491, "learning_rate": 7.783233505686182e-05, "loss": 0.007, "step": 20790 }, { "grad_norm": 0.2960889935493469, "learning_rate": 7.780943718158955e-05, "loss": 0.0078, "step": 20800 }, { "grad_norm": 0.1813492625951767, "learning_rate": 7.778653085854453e-05, "loss": 0.0064, "step": 20810 }, { "grad_norm": 0.21400456130504608, "learning_rate": 7.77636160946851e-05, "loss": 0.007, "step": 20820 }, { "grad_norm": 0.2015584409236908, "learning_rate": 7.774069289697215e-05, "loss": 0.0077, "step": 20830 }, { "grad_norm": 0.30142104625701904, "learning_rate": 7.771776127236913e-05, "loss": 0.0064, "step": 20840 }, { "grad_norm": 0.24986571073532104, "learning_rate": 7.769482122784212e-05, "loss": 0.007, "step": 20850 }, { "grad_norm": 0.2626262903213501, "learning_rate": 7.767187277035963e-05, "loss": 0.0077, "step": 20860 }, { "grad_norm": 0.28270116448402405, "learning_rate": 7.764891590689285e-05, "loss": 0.0074, "step": 20870 }, { "grad_norm": 0.22431358695030212, "learning_rate": 7.762595064441542e-05, "loss": 0.0077, "step": 20880 }, { "grad_norm": 0.22290213406085968, "learning_rate": 7.760297698990362e-05, "loss": 0.0071, "step": 20890 }, { "grad_norm": 0.19826994836330414, "learning_rate": 7.757999495033623e-05, "loss": 0.0064, "step": 20900 }, { "grad_norm": 0.2533501982688904, "learning_rate": 7.755700453269456e-05, "loss": 0.0081, "step": 20910 }, { "grad_norm": 0.21321673691272736, "learning_rate": 7.753400574396254e-05, "loss": 0.007, "step": 20920 }, { "grad_norm": 0.21878372132778168, "learning_rate": 7.751099859112655e-05, "loss": 0.0063, "step": 20930 }, { "grad_norm": 0.28217369318008423, "learning_rate": 7.748798308117557e-05, "loss": 0.01, "step": 20940 }, { "grad_norm": 0.26973801851272583, "learning_rate": 7.746495922110112e-05, "loss": 0.007, "step": 20950 }, { "grad_norm": 0.2012580782175064, "learning_rate": 7.744192701789723e-05, "loss": 0.0067, "step": 20960 }, { "grad_norm": 0.25700366497039795, "learning_rate": 7.741888647856046e-05, "loss": 0.0076, "step": 20970 }, { "grad_norm": 0.21343129873275757, "learning_rate": 7.739583761008994e-05, "loss": 0.0081, "step": 20980 }, { "grad_norm": 0.2504539489746094, "learning_rate": 7.73727804194873e-05, "loss": 0.0064, "step": 20990 }, { "grad_norm": 0.22741132974624634, "learning_rate": 7.734971491375671e-05, "loss": 0.0104, "step": 21000 }, { "grad_norm": 0.2709076702594757, "learning_rate": 7.732664109990485e-05, "loss": 0.0074, "step": 21010 }, { "grad_norm": 0.22615349292755127, "learning_rate": 7.730355898494095e-05, "loss": 0.0093, "step": 21020 }, { "grad_norm": 0.22514010965824127, "learning_rate": 7.728046857587673e-05, "loss": 0.0076, "step": 21030 }, { "grad_norm": 0.23027095198631287, "learning_rate": 7.725736987972647e-05, "loss": 0.0081, "step": 21040 }, { "grad_norm": 0.2386668622493744, "learning_rate": 7.723426290350691e-05, "loss": 0.0096, "step": 21050 }, { "grad_norm": 0.22972188889980316, "learning_rate": 7.721114765423736e-05, "loss": 0.0067, "step": 21060 }, { "grad_norm": 0.2910795211791992, "learning_rate": 7.718802413893963e-05, "loss": 0.01, "step": 21070 }, { "grad_norm": 0.2245515137910843, "learning_rate": 7.716489236463802e-05, "loss": 0.0092, "step": 21080 }, { "grad_norm": 0.20972386002540588, "learning_rate": 7.714175233835936e-05, "loss": 0.0077, "step": 21090 }, { "grad_norm": 0.2070750743150711, "learning_rate": 7.711860406713299e-05, "loss": 0.0076, "step": 21100 }, { "grad_norm": 0.28407394886016846, "learning_rate": 7.70954475579907e-05, "loss": 0.0093, "step": 21110 }, { "grad_norm": 0.23277485370635986, "learning_rate": 7.707228281796688e-05, "loss": 0.007, "step": 21120 }, { "grad_norm": 0.24423246085643768, "learning_rate": 7.704910985409833e-05, "loss": 0.0087, "step": 21130 }, { "grad_norm": 0.23047204315662384, "learning_rate": 7.702592867342439e-05, "loss": 0.0086, "step": 21140 }, { "grad_norm": 0.23932595551013947, "learning_rate": 7.700273928298691e-05, "loss": 0.0085, "step": 21150 }, { "grad_norm": 0.23834581673145294, "learning_rate": 7.697954168983021e-05, "loss": 0.0075, "step": 21160 }, { "grad_norm": 0.20849624276161194, "learning_rate": 7.695633590100109e-05, "loss": 0.0076, "step": 21170 }, { "grad_norm": 0.26982787251472473, "learning_rate": 7.693312192354886e-05, "loss": 0.0084, "step": 21180 }, { "grad_norm": 0.23794211447238922, "learning_rate": 7.690989976452532e-05, "loss": 0.0091, "step": 21190 }, { "grad_norm": 0.2747703194618225, "learning_rate": 7.688666943098475e-05, "loss": 0.008, "step": 21200 }, { "grad_norm": 0.2960923910140991, "learning_rate": 7.686343092998389e-05, "loss": 0.0075, "step": 21210 }, { "grad_norm": 0.2536032199859619, "learning_rate": 7.684018426858202e-05, "loss": 0.0077, "step": 21220 }, { "grad_norm": 0.258856862783432, "learning_rate": 7.681692945384084e-05, "loss": 0.0079, "step": 21230 }, { "grad_norm": 0.20317433774471283, "learning_rate": 7.679366649282456e-05, "loss": 0.0076, "step": 21240 }, { "grad_norm": 0.2373640388250351, "learning_rate": 7.677039539259983e-05, "loss": 0.0069, "step": 21250 }, { "grad_norm": 0.2372748702764511, "learning_rate": 7.674711616023581e-05, "loss": 0.0072, "step": 21260 }, { "grad_norm": 0.2482282519340515, "learning_rate": 7.672382880280413e-05, "loss": 0.0093, "step": 21270 }, { "grad_norm": 0.25534459948539734, "learning_rate": 7.670053332737885e-05, "loss": 0.0076, "step": 21280 }, { "grad_norm": 0.27775219082832336, "learning_rate": 7.667722974103654e-05, "loss": 0.0088, "step": 21290 }, { "grad_norm": 0.2930542528629303, "learning_rate": 7.66539180508562e-05, "loss": 0.0072, "step": 21300 }, { "grad_norm": 0.21638818085193634, "learning_rate": 7.663059826391932e-05, "loss": 0.0097, "step": 21310 }, { "grad_norm": 0.23934610188007355, "learning_rate": 7.660727038730981e-05, "loss": 0.0085, "step": 21320 }, { "grad_norm": 0.16057957708835602, "learning_rate": 7.65839344281141e-05, "loss": 0.0056, "step": 21330 }, { "grad_norm": 0.22868148982524872, "learning_rate": 7.656059039342101e-05, "loss": 0.0086, "step": 21340 }, { "grad_norm": 0.34593066573143005, "learning_rate": 7.653723829032187e-05, "loss": 0.0084, "step": 21350 }, { "grad_norm": 0.3157254457473755, "learning_rate": 7.65138781259104e-05, "loss": 0.0072, "step": 21360 }, { "grad_norm": 0.2019934505224228, "learning_rate": 7.649050990728279e-05, "loss": 0.0068, "step": 21370 }, { "grad_norm": 0.17256787419319153, "learning_rate": 7.646713364153774e-05, "loss": 0.0065, "step": 21380 }, { "grad_norm": 0.18384099006652832, "learning_rate": 7.64437493357763e-05, "loss": 0.0087, "step": 21390 }, { "grad_norm": 0.2063303291797638, "learning_rate": 7.642035699710202e-05, "loss": 0.0069, "step": 21400 }, { "grad_norm": 0.21035006642341614, "learning_rate": 7.639695663262089e-05, "loss": 0.0077, "step": 21410 }, { "grad_norm": 0.2264682501554489, "learning_rate": 7.637354824944128e-05, "loss": 0.008, "step": 21420 }, { "grad_norm": 0.2054148018360138, "learning_rate": 7.635013185467408e-05, "loss": 0.0068, "step": 21430 }, { "grad_norm": 0.2782699763774872, "learning_rate": 7.632670745543256e-05, "loss": 0.0086, "step": 21440 }, { "grad_norm": 0.26364076137542725, "learning_rate": 7.630327505883242e-05, "loss": 0.0106, "step": 21450 }, { "grad_norm": 0.18610930442810059, "learning_rate": 7.627983467199182e-05, "loss": 0.0065, "step": 21460 }, { "grad_norm": 0.2130100131034851, "learning_rate": 7.625638630203132e-05, "loss": 0.0062, "step": 21470 }, { "grad_norm": 0.2408430278301239, "learning_rate": 7.623292995607394e-05, "loss": 0.0073, "step": 21480 }, { "grad_norm": 0.21088026463985443, "learning_rate": 7.620946564124507e-05, "loss": 0.0075, "step": 21490 }, { "grad_norm": 0.1982228010892868, "learning_rate": 7.618599336467256e-05, "loss": 0.009, "step": 21500 }, { "grad_norm": 0.20202748477458954, "learning_rate": 7.616251313348666e-05, "loss": 0.0062, "step": 21510 }, { "grad_norm": 0.30146968364715576, "learning_rate": 7.613902495482005e-05, "loss": 0.0073, "step": 21520 }, { "grad_norm": 0.21921776235103607, "learning_rate": 7.611552883580784e-05, "loss": 0.0081, "step": 21530 }, { "grad_norm": 0.1917729377746582, "learning_rate": 7.609202478358748e-05, "loss": 0.0084, "step": 21540 }, { "grad_norm": 0.2060953825712204, "learning_rate": 7.606851280529895e-05, "loss": 0.0069, "step": 21550 }, { "grad_norm": 0.2721261978149414, "learning_rate": 7.604499290808449e-05, "loss": 0.007, "step": 21560 }, { "grad_norm": 0.18917472660541534, "learning_rate": 7.602146509908888e-05, "loss": 0.0064, "step": 21570 }, { "grad_norm": 0.25411972403526306, "learning_rate": 7.599792938545921e-05, "loss": 0.0095, "step": 21580 }, { "grad_norm": 0.23172226548194885, "learning_rate": 7.597438577434506e-05, "loss": 0.0088, "step": 21590 }, { "grad_norm": 0.21325945854187012, "learning_rate": 7.595083427289831e-05, "loss": 0.0092, "step": 21600 }, { "grad_norm": 0.20827534794807434, "learning_rate": 7.59272748882733e-05, "loss": 0.0077, "step": 21610 }, { "grad_norm": 0.19146892428398132, "learning_rate": 7.590370762762675e-05, "loss": 0.0077, "step": 21620 }, { "grad_norm": 0.20657746493816376, "learning_rate": 7.588013249811777e-05, "loss": 0.0081, "step": 21630 }, { "grad_norm": 0.24334716796875, "learning_rate": 7.585654950690786e-05, "loss": 0.0076, "step": 21640 }, { "grad_norm": 0.23545455932617188, "learning_rate": 7.583295866116091e-05, "loss": 0.0061, "step": 21650 }, { "grad_norm": 0.23728017508983612, "learning_rate": 7.580935996804321e-05, "loss": 0.0074, "step": 21660 }, { "grad_norm": 0.22799402475357056, "learning_rate": 7.57857534347234e-05, "loss": 0.0076, "step": 21670 }, { "grad_norm": 0.28084051609039307, "learning_rate": 7.576213906837254e-05, "loss": 0.0084, "step": 21680 }, { "grad_norm": 0.2438400685787201, "learning_rate": 7.573851687616403e-05, "loss": 0.0078, "step": 21690 }, { "grad_norm": 0.17609958350658417, "learning_rate": 7.571488686527368e-05, "loss": 0.0087, "step": 21700 }, { "grad_norm": 0.2439611256122589, "learning_rate": 7.569124904287968e-05, "loss": 0.0088, "step": 21710 }, { "grad_norm": 0.22961488366127014, "learning_rate": 7.566760341616254e-05, "loss": 0.0118, "step": 21720 }, { "grad_norm": 0.23086677491664886, "learning_rate": 7.564394999230519e-05, "loss": 0.0101, "step": 21730 }, { "grad_norm": 0.27812400460243225, "learning_rate": 7.562028877849294e-05, "loss": 0.0068, "step": 21740 }, { "grad_norm": 0.26642799377441406, "learning_rate": 7.559661978191341e-05, "loss": 0.0075, "step": 21750 }, { "grad_norm": 0.18844999372959137, "learning_rate": 7.557294300975664e-05, "loss": 0.0083, "step": 21760 }, { "grad_norm": 0.23796232044696808, "learning_rate": 7.554925846921499e-05, "loss": 0.009, "step": 21770 }, { "grad_norm": 0.27837589383125305, "learning_rate": 7.552556616748321e-05, "loss": 0.0071, "step": 21780 }, { "grad_norm": 0.24125047028064728, "learning_rate": 7.550186611175838e-05, "loss": 0.0066, "step": 21790 }, { "grad_norm": 0.17244909703731537, "learning_rate": 7.547815830923998e-05, "loss": 0.007, "step": 21800 }, { "grad_norm": 0.27542996406555176, "learning_rate": 7.54544427671298e-05, "loss": 0.0081, "step": 21810 }, { "grad_norm": 0.22847610712051392, "learning_rate": 7.543071949263198e-05, "loss": 0.006, "step": 21820 }, { "grad_norm": 0.2737162113189697, "learning_rate": 7.540698849295305e-05, "loss": 0.0078, "step": 21830 }, { "grad_norm": 0.2757057547569275, "learning_rate": 7.538324977530183e-05, "loss": 0.007, "step": 21840 }, { "grad_norm": 0.26891055703163147, "learning_rate": 7.535950334688955e-05, "loss": 0.0094, "step": 21850 }, { "grad_norm": 0.14977210760116577, "learning_rate": 7.533574921492972e-05, "loss": 0.0077, "step": 21860 }, { "grad_norm": 0.32933393120765686, "learning_rate": 7.531198738663824e-05, "loss": 0.0085, "step": 21870 }, { "grad_norm": 0.24526238441467285, "learning_rate": 7.528821786923333e-05, "loss": 0.0069, "step": 21880 }, { "grad_norm": 0.2873361110687256, "learning_rate": 7.52644406699355e-05, "loss": 0.0074, "step": 21890 }, { "grad_norm": 0.25182682275772095, "learning_rate": 7.524065579596766e-05, "loss": 0.0091, "step": 21900 }, { "grad_norm": 0.2680301368236542, "learning_rate": 7.521686325455506e-05, "loss": 0.0076, "step": 21910 }, { "grad_norm": 0.24315355718135834, "learning_rate": 7.51930630529252e-05, "loss": 0.0071, "step": 21920 }, { "grad_norm": 0.22277019917964935, "learning_rate": 7.516925519830797e-05, "loss": 0.0081, "step": 21930 }, { "grad_norm": 0.231625497341156, "learning_rate": 7.514543969793557e-05, "loss": 0.0078, "step": 21940 }, { "grad_norm": 0.2680265009403229, "learning_rate": 7.512161655904251e-05, "loss": 0.0058, "step": 21950 }, { "grad_norm": 0.19067281484603882, "learning_rate": 7.509778578886563e-05, "loss": 0.0067, "step": 21960 }, { "grad_norm": 0.26889294385910034, "learning_rate": 7.507394739464412e-05, "loss": 0.0092, "step": 21970 }, { "grad_norm": 0.2563711702823639, "learning_rate": 7.50501013836194e-05, "loss": 0.009, "step": 21980 }, { "grad_norm": 0.2342793047428131, "learning_rate": 7.50262477630353e-05, "loss": 0.0075, "step": 21990 }, { "grad_norm": 0.28217384219169617, "learning_rate": 7.500238654013794e-05, "loss": 0.0061, "step": 22000 }, { "grad_norm": 0.22224083542823792, "learning_rate": 7.497851772217566e-05, "loss": 0.0074, "step": 22010 }, { "grad_norm": 0.15639682114124298, "learning_rate": 7.495464131639924e-05, "loss": 0.0072, "step": 22020 }, { "grad_norm": 0.21873731911182404, "learning_rate": 7.493075733006166e-05, "loss": 0.0063, "step": 22030 }, { "grad_norm": 0.2252448946237564, "learning_rate": 7.490686577041828e-05, "loss": 0.0076, "step": 22040 }, { "grad_norm": 0.21436147391796112, "learning_rate": 7.488296664472668e-05, "loss": 0.0064, "step": 22050 }, { "grad_norm": 0.24536632001399994, "learning_rate": 7.485905996024682e-05, "loss": 0.006, "step": 22060 }, { "grad_norm": 0.30665984749794006, "learning_rate": 7.483514572424093e-05, "loss": 0.0075, "step": 22070 }, { "grad_norm": 0.1688169538974762, "learning_rate": 7.481122394397349e-05, "loss": 0.0065, "step": 22080 }, { "grad_norm": 0.27143412828445435, "learning_rate": 7.478729462671131e-05, "loss": 0.0065, "step": 22090 }, { "grad_norm": 0.2068139612674713, "learning_rate": 7.47633577797235e-05, "loss": 0.0105, "step": 22100 }, { "grad_norm": 0.2206212729215622, "learning_rate": 7.473941341028144e-05, "loss": 0.0072, "step": 22110 }, { "grad_norm": 0.21949850022792816, "learning_rate": 7.471546152565879e-05, "loss": 0.0077, "step": 22120 }, { "grad_norm": 0.2864134609699249, "learning_rate": 7.46915021331315e-05, "loss": 0.0106, "step": 22130 }, { "grad_norm": 0.2223970741033554, "learning_rate": 7.466753523997778e-05, "loss": 0.008, "step": 22140 }, { "grad_norm": 0.1761765331029892, "learning_rate": 7.464356085347819e-05, "loss": 0.0073, "step": 22150 }, { "grad_norm": 0.23056161403656006, "learning_rate": 7.461957898091548e-05, "loss": 0.0068, "step": 22160 }, { "grad_norm": 0.22583626210689545, "learning_rate": 7.459558962957473e-05, "loss": 0.0073, "step": 22170 }, { "grad_norm": 0.20231269299983978, "learning_rate": 7.457159280674326e-05, "loss": 0.0081, "step": 22180 }, { "grad_norm": 0.2062925100326538, "learning_rate": 7.454758851971066e-05, "loss": 0.0068, "step": 22190 }, { "grad_norm": 0.24801717698574066, "learning_rate": 7.45235767757688e-05, "loss": 0.0076, "step": 22200 }, { "grad_norm": 0.22194969654083252, "learning_rate": 7.449955758221183e-05, "loss": 0.0079, "step": 22210 }, { "grad_norm": 0.27872583270072937, "learning_rate": 7.447553094633615e-05, "loss": 0.009, "step": 22220 }, { "grad_norm": 0.26207977533340454, "learning_rate": 7.445149687544039e-05, "loss": 0.0071, "step": 22230 }, { "grad_norm": 0.17819800972938538, "learning_rate": 7.44274553768255e-05, "loss": 0.0076, "step": 22240 }, { "grad_norm": 0.1804770529270172, "learning_rate": 7.440340645779464e-05, "loss": 0.0117, "step": 22250 }, { "grad_norm": 0.23417186737060547, "learning_rate": 7.437935012565322e-05, "loss": 0.0088, "step": 22260 }, { "grad_norm": 0.26907482743263245, "learning_rate": 7.435528638770893e-05, "loss": 0.0067, "step": 22270 }, { "grad_norm": 0.23220942914485931, "learning_rate": 7.433121525127171e-05, "loss": 0.0074, "step": 22280 }, { "grad_norm": 0.18864183127880096, "learning_rate": 7.430713672365371e-05, "loss": 0.0074, "step": 22290 }, { "grad_norm": 0.21367451548576355, "learning_rate": 7.428305081216938e-05, "loss": 0.0064, "step": 22300 }, { "grad_norm": 0.24732628464698792, "learning_rate": 7.425895752413536e-05, "loss": 0.0076, "step": 22310 }, { "grad_norm": 0.23585957288742065, "learning_rate": 7.423485686687057e-05, "loss": 0.0069, "step": 22320 }, { "grad_norm": 0.25474631786346436, "learning_rate": 7.421074884769616e-05, "loss": 0.0104, "step": 22330 }, { "grad_norm": 0.22462978959083557, "learning_rate": 7.418663347393548e-05, "loss": 0.0078, "step": 22340 }, { "grad_norm": 0.269584059715271, "learning_rate": 7.416251075291418e-05, "loss": 0.0072, "step": 22350 }, { "grad_norm": 0.2947555482387543, "learning_rate": 7.413838069196007e-05, "loss": 0.0071, "step": 22360 }, { "grad_norm": 0.2860237658023834, "learning_rate": 7.411424329840324e-05, "loss": 0.0074, "step": 22370 }, { "grad_norm": 0.18522831797599792, "learning_rate": 7.409009857957601e-05, "loss": 0.0065, "step": 22380 }, { "grad_norm": 0.22943225502967834, "learning_rate": 7.40659465428129e-05, "loss": 0.0091, "step": 22390 }, { "grad_norm": 0.25011688470840454, "learning_rate": 7.404178719545063e-05, "loss": 0.009, "step": 22400 }, { "grad_norm": 0.2632109820842743, "learning_rate": 7.401762054482822e-05, "loss": 0.0072, "step": 22410 }, { "grad_norm": 0.24603308737277985, "learning_rate": 7.39934465982868e-05, "loss": 0.0089, "step": 22420 }, { "grad_norm": 0.20079408586025238, "learning_rate": 7.396926536316984e-05, "loss": 0.0087, "step": 22430 }, { "grad_norm": 0.24733145534992218, "learning_rate": 7.394507684682293e-05, "loss": 0.0089, "step": 22440 }, { "grad_norm": 0.24166692793369293, "learning_rate": 7.392088105659393e-05, "loss": 0.0071, "step": 22450 }, { "grad_norm": 0.2011084109544754, "learning_rate": 7.389667799983284e-05, "loss": 0.0069, "step": 22460 }, { "grad_norm": 0.2692710757255554, "learning_rate": 7.387246768389193e-05, "loss": 0.0068, "step": 22470 }, { "grad_norm": 0.20514404773712158, "learning_rate": 7.384825011612563e-05, "loss": 0.0061, "step": 22480 }, { "grad_norm": 0.17967849969863892, "learning_rate": 7.382402530389066e-05, "loss": 0.0066, "step": 22490 }, { "grad_norm": 0.20874296128749847, "learning_rate": 7.379979325454582e-05, "loss": 0.0079, "step": 22500 }, { "grad_norm": 0.2606438398361206, "learning_rate": 7.37755539754522e-05, "loss": 0.007, "step": 22510 }, { "grad_norm": 0.25000566244125366, "learning_rate": 7.375130747397302e-05, "loss": 0.0069, "step": 22520 }, { "grad_norm": 0.2225477546453476, "learning_rate": 7.372705375747377e-05, "loss": 0.0078, "step": 22530 }, { "grad_norm": 0.22811853885650635, "learning_rate": 7.370279283332205e-05, "loss": 0.0084, "step": 22540 }, { "grad_norm": 0.1927633285522461, "learning_rate": 7.36785247088877e-05, "loss": 0.0066, "step": 22550 }, { "grad_norm": 0.2659134566783905, "learning_rate": 7.365424939154275e-05, "loss": 0.0077, "step": 22560 }, { "grad_norm": 0.1849370151758194, "learning_rate": 7.362996688866138e-05, "loss": 0.0073, "step": 22570 }, { "grad_norm": 0.22637829184532166, "learning_rate": 7.360567720761999e-05, "loss": 0.0061, "step": 22580 }, { "grad_norm": 0.22359804809093475, "learning_rate": 7.358138035579711e-05, "loss": 0.0059, "step": 22590 }, { "grad_norm": 0.21463163197040558, "learning_rate": 7.355707634057354e-05, "loss": 0.0069, "step": 22600 }, { "grad_norm": 0.21173028647899628, "learning_rate": 7.353276516933215e-05, "loss": 0.0071, "step": 22610 }, { "grad_norm": 0.166578009724617, "learning_rate": 7.350844684945806e-05, "loss": 0.0056, "step": 22620 }, { "grad_norm": 0.23739005625247955, "learning_rate": 7.348412138833851e-05, "loss": 0.006, "step": 22630 }, { "grad_norm": 0.24811996519565582, "learning_rate": 7.345978879336295e-05, "loss": 0.0077, "step": 22640 }, { "grad_norm": 0.2003709226846695, "learning_rate": 7.343544907192296e-05, "loss": 0.0096, "step": 22650 }, { "grad_norm": 0.2116670310497284, "learning_rate": 7.341110223141235e-05, "loss": 0.0063, "step": 22660 }, { "grad_norm": 0.2476043403148651, "learning_rate": 7.3386748279227e-05, "loss": 0.0101, "step": 22670 }, { "grad_norm": 0.2581193447113037, "learning_rate": 7.336238722276501e-05, "loss": 0.008, "step": 22680 }, { "grad_norm": 0.22614417970180511, "learning_rate": 7.333801906942663e-05, "loss": 0.0078, "step": 22690 }, { "grad_norm": 0.20376884937286377, "learning_rate": 7.331364382661428e-05, "loss": 0.0077, "step": 22700 }, { "grad_norm": 0.2353021800518036, "learning_rate": 7.328926150173248e-05, "loss": 0.0084, "step": 22710 }, { "grad_norm": 0.2068241536617279, "learning_rate": 7.326487210218795e-05, "loss": 0.0069, "step": 22720 }, { "grad_norm": 0.14977042376995087, "learning_rate": 7.324047563538955e-05, "loss": 0.0062, "step": 22730 }, { "grad_norm": 0.21605587005615234, "learning_rate": 7.321607210874828e-05, "loss": 0.0082, "step": 22740 }, { "grad_norm": 0.21768251061439514, "learning_rate": 7.31916615296773e-05, "loss": 0.0075, "step": 22750 }, { "grad_norm": 0.22607772052288055, "learning_rate": 7.316724390559188e-05, "loss": 0.0067, "step": 22760 }, { "grad_norm": 0.20838750898838043, "learning_rate": 7.314281924390946e-05, "loss": 0.0082, "step": 22770 }, { "grad_norm": 0.23569554090499878, "learning_rate": 7.311838755204959e-05, "loss": 0.0068, "step": 22780 }, { "grad_norm": 0.25966617465019226, "learning_rate": 7.3093948837434e-05, "loss": 0.0073, "step": 22790 }, { "grad_norm": 0.2056553214788437, "learning_rate": 7.306950310748651e-05, "loss": 0.0072, "step": 22800 }, { "grad_norm": 0.26063889265060425, "learning_rate": 7.304505036963311e-05, "loss": 0.01, "step": 22810 }, { "grad_norm": 0.2838689088821411, "learning_rate": 7.302059063130186e-05, "loss": 0.0061, "step": 22820 }, { "grad_norm": 0.20723405480384827, "learning_rate": 7.2996123899923e-05, "loss": 0.0078, "step": 22830 }, { "grad_norm": 0.3118131160736084, "learning_rate": 7.297165018292886e-05, "loss": 0.0069, "step": 22840 }, { "grad_norm": 0.24340879917144775, "learning_rate": 7.294716948775396e-05, "loss": 0.0083, "step": 22850 }, { "grad_norm": 0.34460562467575073, "learning_rate": 7.292268182183484e-05, "loss": 0.0074, "step": 22860 }, { "grad_norm": 0.2676179111003876, "learning_rate": 7.28981871926102e-05, "loss": 0.0072, "step": 22870 }, { "grad_norm": 0.23573555052280426, "learning_rate": 7.28736856075209e-05, "loss": 0.0079, "step": 22880 }, { "grad_norm": 0.20188726484775543, "learning_rate": 7.284917707400985e-05, "loss": 0.007, "step": 22890 }, { "grad_norm": 0.22615191340446472, "learning_rate": 7.282466159952212e-05, "loss": 0.0084, "step": 22900 }, { "grad_norm": 0.2418040782213211, "learning_rate": 7.280013919150483e-05, "loss": 0.0073, "step": 22910 }, { "grad_norm": 0.20078390836715698, "learning_rate": 7.277560985740728e-05, "loss": 0.008, "step": 22920 }, { "grad_norm": 0.2556045353412628, "learning_rate": 7.275107360468079e-05, "loss": 0.0066, "step": 22930 }, { "grad_norm": 0.23398616909980774, "learning_rate": 7.272653044077885e-05, "loss": 0.0079, "step": 22940 }, { "grad_norm": 0.2393350750207901, "learning_rate": 7.270198037315703e-05, "loss": 0.0077, "step": 22950 }, { "grad_norm": 0.26894935965538025, "learning_rate": 7.267742340927297e-05, "loss": 0.009, "step": 22960 }, { "grad_norm": 0.3006245195865631, "learning_rate": 7.265285955658645e-05, "loss": 0.0073, "step": 22970 }, { "grad_norm": 0.2342676818370819, "learning_rate": 7.26282888225593e-05, "loss": 0.006, "step": 22980 }, { "grad_norm": 0.21574576199054718, "learning_rate": 7.260371121465548e-05, "loss": 0.006, "step": 22990 }, { "grad_norm": 0.2520429790019989, "learning_rate": 7.2579126740341e-05, "loss": 0.0079, "step": 23000 }, { "grad_norm": 0.23564472794532776, "learning_rate": 7.2554535407084e-05, "loss": 0.0066, "step": 23010 }, { "grad_norm": 0.19552455842494965, "learning_rate": 7.252993722235464e-05, "loss": 0.0068, "step": 23020 }, { "grad_norm": 0.207731693983078, "learning_rate": 7.250533219362523e-05, "loss": 0.006, "step": 23030 }, { "grad_norm": 0.22337056696414948, "learning_rate": 7.248072032837012e-05, "loss": 0.0072, "step": 23040 }, { "grad_norm": 0.19904747605323792, "learning_rate": 7.245610163406575e-05, "loss": 0.0058, "step": 23050 }, { "grad_norm": 0.26901039481163025, "learning_rate": 7.243147611819061e-05, "loss": 0.0075, "step": 23060 }, { "grad_norm": 0.17573612928390503, "learning_rate": 7.240684378822531e-05, "loss": 0.0075, "step": 23070 }, { "grad_norm": 0.23055042326450348, "learning_rate": 7.238220465165248e-05, "loss": 0.0087, "step": 23080 }, { "grad_norm": 0.21898643672466278, "learning_rate": 7.235755871595684e-05, "loss": 0.0074, "step": 23090 }, { "grad_norm": 0.20287759602069855, "learning_rate": 7.233290598862517e-05, "loss": 0.0074, "step": 23100 }, { "grad_norm": 0.19096684455871582, "learning_rate": 7.230824647714635e-05, "loss": 0.0064, "step": 23110 }, { "grad_norm": 0.14748680591583252, "learning_rate": 7.228358018901124e-05, "loss": 0.0057, "step": 23120 }, { "grad_norm": 0.20211993157863617, "learning_rate": 7.225890713171286e-05, "loss": 0.0065, "step": 23130 }, { "grad_norm": 0.17459122836589813, "learning_rate": 7.223422731274618e-05, "loss": 0.0059, "step": 23140 }, { "grad_norm": 0.21621543169021606, "learning_rate": 7.220954073960832e-05, "loss": 0.0073, "step": 23150 }, { "grad_norm": 0.2575087249279022, "learning_rate": 7.218484741979838e-05, "loss": 0.0067, "step": 23160 }, { "grad_norm": 0.22043457627296448, "learning_rate": 7.216014736081756e-05, "loss": 0.0068, "step": 23170 }, { "grad_norm": 0.21308642625808716, "learning_rate": 7.213544057016906e-05, "loss": 0.0063, "step": 23180 }, { "grad_norm": 0.31472212076187134, "learning_rate": 7.211072705535819e-05, "loss": 0.0098, "step": 23190 }, { "grad_norm": 0.22318722307682037, "learning_rate": 7.208600682389224e-05, "loss": 0.0111, "step": 23200 }, { "grad_norm": 0.23881292343139648, "learning_rate": 7.206127988328055e-05, "loss": 0.0062, "step": 23210 }, { "grad_norm": 0.23514747619628906, "learning_rate": 7.203654624103453e-05, "loss": 0.0058, "step": 23220 }, { "grad_norm": 0.24831639230251312, "learning_rate": 7.201180590466761e-05, "loss": 0.0069, "step": 23230 }, { "grad_norm": 0.20711083710193634, "learning_rate": 7.198705888169523e-05, "loss": 0.0079, "step": 23240 }, { "grad_norm": 0.20910988748073578, "learning_rate": 7.196230517963491e-05, "loss": 0.0054, "step": 23250 }, { "grad_norm": 0.2589011490345001, "learning_rate": 7.193754480600615e-05, "loss": 0.0069, "step": 23260 }, { "grad_norm": 0.22907955944538116, "learning_rate": 7.19127777683305e-05, "loss": 0.0069, "step": 23270 }, { "grad_norm": 0.2453407645225525, "learning_rate": 7.188800407413156e-05, "loss": 0.0079, "step": 23280 }, { "grad_norm": 0.25670960545539856, "learning_rate": 7.186322373093489e-05, "loss": 0.0071, "step": 23290 }, { "grad_norm": 0.19476912915706635, "learning_rate": 7.18384367462681e-05, "loss": 0.007, "step": 23300 }, { "grad_norm": 0.2318083643913269, "learning_rate": 7.181364312766085e-05, "loss": 0.007, "step": 23310 }, { "grad_norm": 0.21937629580497742, "learning_rate": 7.178884288264477e-05, "loss": 0.0084, "step": 23320 }, { "grad_norm": 0.23231841623783112, "learning_rate": 7.176403601875353e-05, "loss": 0.0061, "step": 23330 }, { "grad_norm": 0.2002200335264206, "learning_rate": 7.173922254352279e-05, "loss": 0.0053, "step": 23340 }, { "grad_norm": 0.25092506408691406, "learning_rate": 7.171440246449024e-05, "loss": 0.0069, "step": 23350 }, { "grad_norm": 0.23472483456134796, "learning_rate": 7.168957578919555e-05, "loss": 0.0081, "step": 23360 }, { "grad_norm": 0.23603397607803345, "learning_rate": 7.16647425251804e-05, "loss": 0.0073, "step": 23370 }, { "grad_norm": 0.2890200614929199, "learning_rate": 7.163990267998852e-05, "loss": 0.0094, "step": 23380 }, { "grad_norm": 0.18937300145626068, "learning_rate": 7.161505626116556e-05, "loss": 0.0069, "step": 23390 }, { "grad_norm": 0.19407562911510468, "learning_rate": 7.159020327625923e-05, "loss": 0.0073, "step": 23400 }, { "grad_norm": 0.3111112415790558, "learning_rate": 7.15653437328192e-05, "loss": 0.0071, "step": 23410 }, { "grad_norm": 0.24332955479621887, "learning_rate": 7.154047763839713e-05, "loss": 0.0096, "step": 23420 }, { "grad_norm": 0.259840190410614, "learning_rate": 7.15156050005467e-05, "loss": 0.0097, "step": 23430 }, { "grad_norm": 0.20390920341014862, "learning_rate": 7.149072582682357e-05, "loss": 0.0064, "step": 23440 }, { "grad_norm": 0.2323862463235855, "learning_rate": 7.146584012478535e-05, "loss": 0.0084, "step": 23450 }, { "grad_norm": 0.2258659154176712, "learning_rate": 7.144094790199169e-05, "loss": 0.0085, "step": 23460 }, { "grad_norm": 0.18803656101226807, "learning_rate": 7.141604916600415e-05, "loss": 0.0076, "step": 23470 }, { "grad_norm": 0.2015160620212555, "learning_rate": 7.139114392438635e-05, "loss": 0.0062, "step": 23480 }, { "grad_norm": 0.2064451277256012, "learning_rate": 7.136623218470382e-05, "loss": 0.005, "step": 23490 }, { "grad_norm": 0.28598785400390625, "learning_rate": 7.13413139545241e-05, "loss": 0.0067, "step": 23500 }, { "grad_norm": 0.2777937352657318, "learning_rate": 7.131638924141668e-05, "loss": 0.008, "step": 23510 }, { "grad_norm": 0.3024005889892578, "learning_rate": 7.129145805295304e-05, "loss": 0.0081, "step": 23520 }, { "grad_norm": 0.20812225341796875, "learning_rate": 7.126652039670661e-05, "loss": 0.0082, "step": 23530 }, { "grad_norm": 0.30336740612983704, "learning_rate": 7.124157628025278e-05, "loss": 0.0088, "step": 23540 }, { "grad_norm": 0.26776477694511414, "learning_rate": 7.121662571116894e-05, "loss": 0.0071, "step": 23550 }, { "grad_norm": 0.21023081243038177, "learning_rate": 7.119166869703441e-05, "loss": 0.0061, "step": 23560 }, { "grad_norm": 0.2137153446674347, "learning_rate": 7.116670524543044e-05, "loss": 0.0068, "step": 23570 }, { "grad_norm": 0.2735556364059448, "learning_rate": 7.114173536394032e-05, "loss": 0.0098, "step": 23580 }, { "grad_norm": 0.16731102764606476, "learning_rate": 7.111675906014917e-05, "loss": 0.0077, "step": 23590 }, { "grad_norm": 0.23282283544540405, "learning_rate": 7.109177634164421e-05, "loss": 0.0064, "step": 23600 }, { "grad_norm": 0.14320172369480133, "learning_rate": 7.106678721601449e-05, "loss": 0.0057, "step": 23610 }, { "grad_norm": 0.19735263288021088, "learning_rate": 7.104179169085103e-05, "loss": 0.006, "step": 23620 }, { "grad_norm": 0.2534981667995453, "learning_rate": 7.101678977374683e-05, "loss": 0.0065, "step": 23630 }, { "grad_norm": 0.20142443478107452, "learning_rate": 7.099178147229685e-05, "loss": 0.0062, "step": 23640 }, { "grad_norm": 0.19346113502979279, "learning_rate": 7.096676679409789e-05, "loss": 0.006, "step": 23650 }, { "grad_norm": 0.24220320582389832, "learning_rate": 7.094174574674877e-05, "loss": 0.0062, "step": 23660 }, { "grad_norm": 0.23936009407043457, "learning_rate": 7.091671833785025e-05, "loss": 0.0067, "step": 23670 }, { "grad_norm": 0.25479021668434143, "learning_rate": 7.089168457500493e-05, "loss": 0.0058, "step": 23680 }, { "grad_norm": 0.22986072301864624, "learning_rate": 7.086664446581747e-05, "loss": 0.0063, "step": 23690 }, { "grad_norm": 0.21007581055164337, "learning_rate": 7.084159801789438e-05, "loss": 0.0063, "step": 23700 }, { "grad_norm": 0.19588488340377808, "learning_rate": 7.081654523884411e-05, "loss": 0.0056, "step": 23710 }, { "grad_norm": 0.23593758046627045, "learning_rate": 7.0791486136277e-05, "loss": 0.0075, "step": 23720 }, { "grad_norm": 0.2843397557735443, "learning_rate": 7.07664207178054e-05, "loss": 0.0065, "step": 23730 }, { "grad_norm": 0.2020701915025711, "learning_rate": 7.074134899104345e-05, "loss": 0.0085, "step": 23740 }, { "grad_norm": 0.20319929718971252, "learning_rate": 7.071627096360735e-05, "loss": 0.0066, "step": 23750 }, { "grad_norm": 0.2622431218624115, "learning_rate": 7.069118664311511e-05, "loss": 0.0077, "step": 23760 }, { "grad_norm": 0.2573256492614746, "learning_rate": 7.06660960371867e-05, "loss": 0.0062, "step": 23770 }, { "grad_norm": 0.21369421482086182, "learning_rate": 7.064099915344396e-05, "loss": 0.0068, "step": 23780 }, { "grad_norm": 0.2916682958602905, "learning_rate": 7.061589599951066e-05, "loss": 0.0071, "step": 23790 }, { "grad_norm": 0.30225372314453125, "learning_rate": 7.05907865830125e-05, "loss": 0.0071, "step": 23800 }, { "grad_norm": 0.27047479152679443, "learning_rate": 7.056567091157703e-05, "loss": 0.007, "step": 23810 }, { "grad_norm": 0.22229182720184326, "learning_rate": 7.054054899283375e-05, "loss": 0.0082, "step": 23820 }, { "grad_norm": 0.2269914299249649, "learning_rate": 7.051542083441403e-05, "loss": 0.006, "step": 23830 }, { "grad_norm": 0.2352418452501297, "learning_rate": 7.049028644395113e-05, "loss": 0.0074, "step": 23840 }, { "grad_norm": 0.24701836705207825, "learning_rate": 7.046514582908024e-05, "loss": 0.0069, "step": 23850 }, { "grad_norm": 0.26358070969581604, "learning_rate": 7.043999899743838e-05, "loss": 0.0057, "step": 23860 }, { "grad_norm": 0.2301684021949768, "learning_rate": 7.041484595666451e-05, "loss": 0.0071, "step": 23870 }, { "grad_norm": 0.21277542412281036, "learning_rate": 7.038968671439948e-05, "loss": 0.006, "step": 23880 }, { "grad_norm": 0.23770518600940704, "learning_rate": 7.036452127828596e-05, "loss": 0.0072, "step": 23890 }, { "grad_norm": 0.23463623225688934, "learning_rate": 7.033934965596859e-05, "loss": 0.0072, "step": 23900 }, { "grad_norm": 0.2334730327129364, "learning_rate": 7.031417185509381e-05, "loss": 0.0066, "step": 23910 }, { "grad_norm": 0.15661562979221344, "learning_rate": 7.028898788331e-05, "loss": 0.0063, "step": 23920 }, { "grad_norm": 0.21705128252506256, "learning_rate": 7.026379774826736e-05, "loss": 0.0058, "step": 23930 }, { "grad_norm": 0.19458024203777313, "learning_rate": 7.0238601457618e-05, "loss": 0.0059, "step": 23940 }, { "grad_norm": 0.2701191008090973, "learning_rate": 7.02133990190159e-05, "loss": 0.0067, "step": 23950 }, { "grad_norm": 0.23577691614627838, "learning_rate": 7.018819044011687e-05, "loss": 0.0078, "step": 23960 }, { "grad_norm": 0.15558412671089172, "learning_rate": 7.016297572857863e-05, "loss": 0.0053, "step": 23970 }, { "grad_norm": 0.18498459458351135, "learning_rate": 7.013775489206072e-05, "loss": 0.0051, "step": 23980 }, { "grad_norm": 0.2610701322555542, "learning_rate": 7.01125279382246e-05, "loss": 0.0057, "step": 23990 }, { "grad_norm": 0.2301194816827774, "learning_rate": 7.008729487473351e-05, "loss": 0.0066, "step": 24000 }, { "grad_norm": 0.1587142050266266, "learning_rate": 7.006205570925263e-05, "loss": 0.0061, "step": 24010 }, { "grad_norm": 0.2386852502822876, "learning_rate": 7.003681044944892e-05, "loss": 0.0081, "step": 24020 }, { "grad_norm": 0.18989433348178864, "learning_rate": 7.001155910299126e-05, "loss": 0.0061, "step": 24030 }, { "grad_norm": 0.2027624249458313, "learning_rate": 6.99863016775503e-05, "loss": 0.0068, "step": 24040 }, { "grad_norm": 0.253236323595047, "learning_rate": 6.996103818079859e-05, "loss": 0.0079, "step": 24050 }, { "grad_norm": 0.27774831652641296, "learning_rate": 6.993576862041054e-05, "loss": 0.0084, "step": 24060 }, { "grad_norm": 0.21400724351406097, "learning_rate": 6.991049300406235e-05, "loss": 0.0068, "step": 24070 }, { "grad_norm": 0.19312065839767456, "learning_rate": 6.988521133943209e-05, "loss": 0.0047, "step": 24080 }, { "grad_norm": 0.2008533626794815, "learning_rate": 6.985992363419966e-05, "loss": 0.0073, "step": 24090 }, { "grad_norm": 0.1768791675567627, "learning_rate": 6.983462989604682e-05, "loss": 0.0063, "step": 24100 }, { "grad_norm": 0.3028063178062439, "learning_rate": 6.980933013265709e-05, "loss": 0.0069, "step": 24110 }, { "grad_norm": 0.2480514645576477, "learning_rate": 6.978402435171592e-05, "loss": 0.0073, "step": 24120 }, { "grad_norm": 0.17038322985172272, "learning_rate": 6.975871256091052e-05, "loss": 0.0062, "step": 24130 }, { "grad_norm": 0.2071041464805603, "learning_rate": 6.973339476792995e-05, "loss": 0.0065, "step": 24140 }, { "grad_norm": 0.21248899400234222, "learning_rate": 6.970807098046505e-05, "loss": 0.006, "step": 24150 }, { "grad_norm": 0.2238602340221405, "learning_rate": 6.968274120620858e-05, "loss": 0.0074, "step": 24160 }, { "grad_norm": 0.22012795507907867, "learning_rate": 6.965740545285499e-05, "loss": 0.0054, "step": 24170 }, { "grad_norm": 0.29043570160865784, "learning_rate": 6.963206372810068e-05, "loss": 0.0083, "step": 24180 }, { "grad_norm": 0.2822639048099518, "learning_rate": 6.960671603964375e-05, "loss": 0.0087, "step": 24190 }, { "grad_norm": 0.19819259643554688, "learning_rate": 6.958136239518418e-05, "loss": 0.006, "step": 24200 }, { "grad_norm": 0.18711893260478973, "learning_rate": 6.955600280242371e-05, "loss": 0.0069, "step": 24210 }, { "grad_norm": 0.1911400556564331, "learning_rate": 6.953063726906596e-05, "loss": 0.0088, "step": 24220 }, { "grad_norm": 0.195345938205719, "learning_rate": 6.950526580281626e-05, "loss": 0.0085, "step": 24230 }, { "grad_norm": 0.18867981433868408, "learning_rate": 6.947988841138184e-05, "loss": 0.0063, "step": 24240 }, { "grad_norm": 0.21644961833953857, "learning_rate": 6.945450510247165e-05, "loss": 0.0065, "step": 24250 }, { "grad_norm": 0.2197287678718567, "learning_rate": 6.942911588379647e-05, "loss": 0.0064, "step": 24260 }, { "grad_norm": 0.24320431053638458, "learning_rate": 6.940372076306888e-05, "loss": 0.0058, "step": 24270 }, { "grad_norm": 0.1552775651216507, "learning_rate": 6.937831974800326e-05, "loss": 0.0066, "step": 24280 }, { "grad_norm": 0.2661644518375397, "learning_rate": 6.935291284631574e-05, "loss": 0.0087, "step": 24290 }, { "grad_norm": 0.2091807872056961, "learning_rate": 6.932750006572428e-05, "loss": 0.0072, "step": 24300 }, { "grad_norm": 0.2535192668437958, "learning_rate": 6.930208141394863e-05, "loss": 0.0078, "step": 24310 }, { "grad_norm": 0.23283711075782776, "learning_rate": 6.927665689871026e-05, "loss": 0.0062, "step": 24320 }, { "grad_norm": 0.24870415031909943, "learning_rate": 6.925122652773253e-05, "loss": 0.0064, "step": 24330 }, { "grad_norm": 0.3209875822067261, "learning_rate": 6.922579030874046e-05, "loss": 0.0073, "step": 24340 }, { "grad_norm": 0.25944098830223083, "learning_rate": 6.920034824946093e-05, "loss": 0.0059, "step": 24350 }, { "grad_norm": 0.2029070258140564, "learning_rate": 6.917490035762255e-05, "loss": 0.0077, "step": 24360 }, { "grad_norm": 0.16941291093826294, "learning_rate": 6.914944664095573e-05, "loss": 0.0079, "step": 24370 }, { "grad_norm": 0.20521265268325806, "learning_rate": 6.912398710719264e-05, "loss": 0.0069, "step": 24380 }, { "grad_norm": 0.25230786204338074, "learning_rate": 6.90985217640672e-05, "loss": 0.007, "step": 24390 }, { "grad_norm": 0.21439579129219055, "learning_rate": 6.90730506193151e-05, "loss": 0.0068, "step": 24400 }, { "grad_norm": 0.21030278503894806, "learning_rate": 6.904757368067384e-05, "loss": 0.0076, "step": 24410 }, { "grad_norm": 0.17052730917930603, "learning_rate": 6.90220909558826e-05, "loss": 0.0061, "step": 24420 }, { "grad_norm": 0.2733839750289917, "learning_rate": 6.899660245268237e-05, "loss": 0.0075, "step": 24430 }, { "grad_norm": 0.2304183542728424, "learning_rate": 6.897110817881592e-05, "loss": 0.0089, "step": 24440 }, { "grad_norm": 0.2562944293022156, "learning_rate": 6.894560814202769e-05, "loss": 0.0077, "step": 24450 }, { "grad_norm": 0.28432559967041016, "learning_rate": 6.892010235006394e-05, "loss": 0.0061, "step": 24460 }, { "grad_norm": 0.22223351895809174, "learning_rate": 6.889459081067264e-05, "loss": 0.0069, "step": 24470 }, { "grad_norm": 0.24769578874111176, "learning_rate": 6.886907353160356e-05, "loss": 0.0069, "step": 24480 }, { "grad_norm": 0.24927619099617004, "learning_rate": 6.884355052060814e-05, "loss": 0.0075, "step": 24490 }, { "grad_norm": 0.2924117147922516, "learning_rate": 6.88180217854396e-05, "loss": 0.0087, "step": 24500 }, { "grad_norm": 0.2673661708831787, "learning_rate": 6.87924873338529e-05, "loss": 0.0069, "step": 24510 }, { "grad_norm": 0.2333325296640396, "learning_rate": 6.876694717360475e-05, "loss": 0.0078, "step": 24520 }, { "grad_norm": 0.18996460735797882, "learning_rate": 6.874140131245355e-05, "loss": 0.0072, "step": 24530 }, { "grad_norm": 0.23210026323795319, "learning_rate": 6.871584975815948e-05, "loss": 0.0104, "step": 24540 }, { "grad_norm": 0.14410477876663208, "learning_rate": 6.86902925184844e-05, "loss": 0.0072, "step": 24550 }, { "grad_norm": 0.23743757605552673, "learning_rate": 6.866472960119195e-05, "loss": 0.0072, "step": 24560 }, { "grad_norm": 0.2170960158109665, "learning_rate": 6.863916101404748e-05, "loss": 0.0091, "step": 24570 }, { "grad_norm": 0.22801494598388672, "learning_rate": 6.8613586764818e-05, "loss": 0.006, "step": 24580 }, { "grad_norm": 0.24854330718517303, "learning_rate": 6.858800686127233e-05, "loss": 0.0062, "step": 24590 }, { "grad_norm": 0.2309127151966095, "learning_rate": 6.856242131118097e-05, "loss": 0.0088, "step": 24600 }, { "grad_norm": 0.20720410346984863, "learning_rate": 6.853683012231614e-05, "loss": 0.0065, "step": 24610 }, { "grad_norm": 0.2361244559288025, "learning_rate": 6.851123330245173e-05, "loss": 0.0083, "step": 24620 }, { "grad_norm": 0.20022918283939362, "learning_rate": 6.848563085936343e-05, "loss": 0.0062, "step": 24630 }, { "grad_norm": 0.15003590285778046, "learning_rate": 6.846002280082853e-05, "loss": 0.0055, "step": 24640 }, { "grad_norm": 0.16061264276504517, "learning_rate": 6.843440913462614e-05, "loss": 0.0064, "step": 24650 }, { "grad_norm": 0.2030072808265686, "learning_rate": 6.840878986853698e-05, "loss": 0.007, "step": 24660 }, { "grad_norm": 0.236202672123909, "learning_rate": 6.838316501034352e-05, "loss": 0.0066, "step": 24670 }, { "grad_norm": 0.22119833528995514, "learning_rate": 6.83575345678299e-05, "loss": 0.0077, "step": 24680 }, { "grad_norm": 0.2633749842643738, "learning_rate": 6.833189854878196e-05, "loss": 0.008, "step": 24690 }, { "grad_norm": 0.21641162037849426, "learning_rate": 6.83062569609873e-05, "loss": 0.0065, "step": 24700 }, { "grad_norm": 0.23379985988140106, "learning_rate": 6.828060981223512e-05, "loss": 0.006, "step": 24710 }, { "grad_norm": 0.20191489160060883, "learning_rate": 6.825495711031634e-05, "loss": 0.0091, "step": 24720 }, { "grad_norm": 0.21407240629196167, "learning_rate": 6.822929886302359e-05, "loss": 0.0063, "step": 24730 }, { "grad_norm": 0.25733667612075806, "learning_rate": 6.820363507815116e-05, "loss": 0.0075, "step": 24740 }, { "grad_norm": 0.22397202253341675, "learning_rate": 6.817796576349501e-05, "loss": 0.0069, "step": 24750 }, { "grad_norm": 0.2072632610797882, "learning_rate": 6.815229092685285e-05, "loss": 0.0092, "step": 24760 }, { "grad_norm": 0.1787474900484085, "learning_rate": 6.812661057602399e-05, "loss": 0.0056, "step": 24770 }, { "grad_norm": 0.20838628709316254, "learning_rate": 6.810092471880943e-05, "loss": 0.0063, "step": 24780 }, { "grad_norm": 0.20440411567687988, "learning_rate": 6.807523336301187e-05, "loss": 0.0075, "step": 24790 }, { "grad_norm": 0.20928245782852173, "learning_rate": 6.804953651643566e-05, "loss": 0.006, "step": 24800 }, { "grad_norm": 0.21724002063274384, "learning_rate": 6.802383418688685e-05, "loss": 0.0085, "step": 24810 }, { "grad_norm": 0.1721886396408081, "learning_rate": 6.799812638217309e-05, "loss": 0.0057, "step": 24820 }, { "grad_norm": 0.2006726711988449, "learning_rate": 6.797241311010373e-05, "loss": 0.0065, "step": 24830 }, { "grad_norm": 0.21389009058475494, "learning_rate": 6.794669437848982e-05, "loss": 0.0057, "step": 24840 }, { "grad_norm": 0.2328222244977951, "learning_rate": 6.792097019514402e-05, "loss": 0.0064, "step": 24850 }, { "grad_norm": 0.2139717936515808, "learning_rate": 6.789524056788064e-05, "loss": 0.0051, "step": 24860 }, { "grad_norm": 0.18213902413845062, "learning_rate": 6.786950550451567e-05, "loss": 0.0056, "step": 24870 }, { "grad_norm": 0.1845434010028839, "learning_rate": 6.784376501286676e-05, "loss": 0.0062, "step": 24880 }, { "grad_norm": 0.18680481612682343, "learning_rate": 6.781801910075316e-05, "loss": 0.0051, "step": 24890 }, { "grad_norm": 0.1739758849143982, "learning_rate": 6.779226777599581e-05, "loss": 0.0057, "step": 24900 }, { "grad_norm": 0.22760528326034546, "learning_rate": 6.776651104641729e-05, "loss": 0.0059, "step": 24910 }, { "grad_norm": 0.21769170463085175, "learning_rate": 6.774074891984183e-05, "loss": 0.0077, "step": 24920 }, { "grad_norm": 0.22711685299873352, "learning_rate": 6.771498140409526e-05, "loss": 0.0063, "step": 24930 }, { "grad_norm": 0.2919391095638275, "learning_rate": 6.768920850700506e-05, "loss": 0.0062, "step": 24940 }, { "grad_norm": 0.21893933415412903, "learning_rate": 6.766343023640039e-05, "loss": 0.0065, "step": 24950 }, { "grad_norm": 0.18296101689338684, "learning_rate": 6.763764660011198e-05, "loss": 0.007, "step": 24960 }, { "grad_norm": 0.19271834194660187, "learning_rate": 6.761185760597223e-05, "loss": 0.0058, "step": 24970 }, { "grad_norm": 0.33522406220436096, "learning_rate": 6.758606326181515e-05, "loss": 0.0087, "step": 24980 }, { "grad_norm": 0.23418518900871277, "learning_rate": 6.75602635754764e-05, "loss": 0.0072, "step": 24990 }, { "grad_norm": 0.23341238498687744, "learning_rate": 6.75344585547932e-05, "loss": 0.0073, "step": 25000 }, { "grad_norm": 0.22817090153694153, "learning_rate": 6.750864820760449e-05, "loss": 0.0059, "step": 25010 }, { "grad_norm": 0.20839819312095642, "learning_rate": 6.748283254175072e-05, "loss": 0.0056, "step": 25020 }, { "grad_norm": 0.18569742143154144, "learning_rate": 6.745701156507404e-05, "loss": 0.007, "step": 25030 }, { "grad_norm": 0.19279655814170837, "learning_rate": 6.743118528541818e-05, "loss": 0.0053, "step": 25040 }, { "grad_norm": 0.3393954336643219, "learning_rate": 6.740535371062846e-05, "loss": 0.0073, "step": 25050 }, { "grad_norm": 0.24617277085781097, "learning_rate": 6.737951684855185e-05, "loss": 0.0067, "step": 25060 }, { "grad_norm": 0.2199849784374237, "learning_rate": 6.735367470703691e-05, "loss": 0.0085, "step": 25070 }, { "grad_norm": 0.2632351517677307, "learning_rate": 6.732782729393379e-05, "loss": 0.0068, "step": 25080 }, { "grad_norm": 0.20831426978111267, "learning_rate": 6.730197461709425e-05, "loss": 0.006, "step": 25090 }, { "grad_norm": 0.2053185999393463, "learning_rate": 6.727611668437164e-05, "loss": 0.0072, "step": 25100 }, { "grad_norm": 0.19841746985912323, "learning_rate": 6.725025350362094e-05, "loss": 0.0063, "step": 25110 }, { "grad_norm": 0.2458447515964508, "learning_rate": 6.72243850826987e-05, "loss": 0.0074, "step": 25120 }, { "grad_norm": 0.19741925597190857, "learning_rate": 6.719851142946305e-05, "loss": 0.0067, "step": 25130 }, { "grad_norm": 0.21869440376758575, "learning_rate": 6.717263255177372e-05, "loss": 0.0081, "step": 25140 }, { "grad_norm": 0.250583291053772, "learning_rate": 6.714674845749205e-05, "loss": 0.0064, "step": 25150 }, { "grad_norm": 0.20444005727767944, "learning_rate": 6.712085915448092e-05, "loss": 0.0058, "step": 25160 }, { "grad_norm": 0.21273529529571533, "learning_rate": 6.709496465060486e-05, "loss": 0.0059, "step": 25170 }, { "grad_norm": 0.18499837815761566, "learning_rate": 6.706906495372987e-05, "loss": 0.0071, "step": 25180 }, { "grad_norm": 0.2700726389884949, "learning_rate": 6.704316007172365e-05, "loss": 0.0069, "step": 25190 }, { "grad_norm": 0.22663068771362305, "learning_rate": 6.701725001245539e-05, "loss": 0.0052, "step": 25200 }, { "grad_norm": 0.18079528212547302, "learning_rate": 6.699133478379588e-05, "loss": 0.0051, "step": 25210 }, { "grad_norm": 0.19316238164901733, "learning_rate": 6.69654143936175e-05, "loss": 0.0079, "step": 25220 }, { "grad_norm": 0.21459133923053741, "learning_rate": 6.693948884979419e-05, "loss": 0.0072, "step": 25230 }, { "grad_norm": 0.23129896819591522, "learning_rate": 6.691355816020142e-05, "loss": 0.0066, "step": 25240 }, { "grad_norm": 0.1841215342283249, "learning_rate": 6.688762233271624e-05, "loss": 0.0057, "step": 25250 }, { "grad_norm": 0.16104280948638916, "learning_rate": 6.68616813752173e-05, "loss": 0.0062, "step": 25260 }, { "grad_norm": 0.24268488585948944, "learning_rate": 6.683573529558477e-05, "loss": 0.0072, "step": 25270 }, { "grad_norm": 0.259103000164032, "learning_rate": 6.680978410170037e-05, "loss": 0.0064, "step": 25280 }, { "grad_norm": 0.2177886962890625, "learning_rate": 6.678382780144741e-05, "loss": 0.0076, "step": 25290 }, { "grad_norm": 0.1991564780473709, "learning_rate": 6.675786640271071e-05, "loss": 0.0072, "step": 25300 }, { "grad_norm": 0.22543810307979584, "learning_rate": 6.673189991337665e-05, "loss": 0.0073, "step": 25310 }, { "grad_norm": 0.24554254114627838, "learning_rate": 6.670592834133317e-05, "loss": 0.0078, "step": 25320 }, { "grad_norm": 0.15265288949012756, "learning_rate": 6.667995169446979e-05, "loss": 0.0063, "step": 25330 }, { "grad_norm": 0.16831746697425842, "learning_rate": 6.665396998067747e-05, "loss": 0.0057, "step": 25340 }, { "grad_norm": 0.19175328314304352, "learning_rate": 6.66279832078488e-05, "loss": 0.0048, "step": 25350 }, { "grad_norm": 0.18119966983795166, "learning_rate": 6.660199138387786e-05, "loss": 0.0055, "step": 25360 }, { "grad_norm": 0.24650025367736816, "learning_rate": 6.65759945166603e-05, "loss": 0.007, "step": 25370 }, { "grad_norm": 0.21147359907627106, "learning_rate": 6.654999261409326e-05, "loss": 0.0071, "step": 25380 }, { "grad_norm": 0.22879630327224731, "learning_rate": 6.652398568407544e-05, "loss": 0.0074, "step": 25390 }, { "grad_norm": 0.22692294418811798, "learning_rate": 6.649797373450707e-05, "loss": 0.0081, "step": 25400 }, { "grad_norm": 0.2574363946914673, "learning_rate": 6.647195677328988e-05, "loss": 0.0094, "step": 25410 }, { "grad_norm": 0.2013295441865921, "learning_rate": 6.644593480832712e-05, "loss": 0.0086, "step": 25420 }, { "grad_norm": 0.15698054432868958, "learning_rate": 6.641990784752363e-05, "loss": 0.0059, "step": 25430 }, { "grad_norm": 0.1815074235200882, "learning_rate": 6.639387589878566e-05, "loss": 0.0072, "step": 25440 }, { "grad_norm": 0.16183070838451385, "learning_rate": 6.636783897002103e-05, "loss": 0.0049, "step": 25450 }, { "grad_norm": 0.20442305505275726, "learning_rate": 6.63417970691391e-05, "loss": 0.0082, "step": 25460 }, { "grad_norm": 0.2078857719898224, "learning_rate": 6.63157502040507e-05, "loss": 0.0101, "step": 25470 }, { "grad_norm": 0.2201431542634964, "learning_rate": 6.628969838266819e-05, "loss": 0.0088, "step": 25480 }, { "grad_norm": 0.24755345284938812, "learning_rate": 6.626364161290541e-05, "loss": 0.0076, "step": 25490 }, { "grad_norm": 0.22055433690547943, "learning_rate": 6.623757990267774e-05, "loss": 0.0063, "step": 25500 }, { "grad_norm": 0.18375305831432343, "learning_rate": 6.621151325990201e-05, "loss": 0.007, "step": 25510 }, { "grad_norm": 0.2530211806297302, "learning_rate": 6.618544169249657e-05, "loss": 0.0088, "step": 25520 }, { "grad_norm": 0.26939213275909424, "learning_rate": 6.615936520838133e-05, "loss": 0.0082, "step": 25530 }, { "grad_norm": 0.3377273678779602, "learning_rate": 6.613328381547759e-05, "loss": 0.0075, "step": 25540 }, { "grad_norm": 0.26195868849754333, "learning_rate": 6.610719752170821e-05, "loss": 0.0049, "step": 25550 }, { "grad_norm": 0.20831884443759918, "learning_rate": 6.60811063349975e-05, "loss": 0.0054, "step": 25560 }, { "grad_norm": 0.23061248660087585, "learning_rate": 6.605501026327127e-05, "loss": 0.0067, "step": 25570 }, { "grad_norm": 0.19042521715164185, "learning_rate": 6.602890931445685e-05, "loss": 0.0071, "step": 25580 }, { "grad_norm": 0.2882711887359619, "learning_rate": 6.6002803496483e-05, "loss": 0.0083, "step": 25590 }, { "grad_norm": 0.2180003523826599, "learning_rate": 6.597669281727997e-05, "loss": 0.0075, "step": 25600 }, { "grad_norm": 0.20100033283233643, "learning_rate": 6.595057728477949e-05, "loss": 0.0062, "step": 25610 }, { "grad_norm": 0.20027615129947662, "learning_rate": 6.59244569069148e-05, "loss": 0.0066, "step": 25620 }, { "grad_norm": 0.26127099990844727, "learning_rate": 6.589833169162054e-05, "loss": 0.0077, "step": 25630 }, { "grad_norm": 0.24925601482391357, "learning_rate": 6.587220164683291e-05, "loss": 0.0059, "step": 25640 }, { "grad_norm": 0.271277517080307, "learning_rate": 6.58460667804895e-05, "loss": 0.0066, "step": 25650 }, { "grad_norm": 0.20398743450641632, "learning_rate": 6.581992710052938e-05, "loss": 0.0067, "step": 25660 }, { "grad_norm": 0.20166295766830444, "learning_rate": 6.579378261489311e-05, "loss": 0.0096, "step": 25670 }, { "grad_norm": 0.23275814950466156, "learning_rate": 6.576763333152268e-05, "loss": 0.0073, "step": 25680 }, { "grad_norm": 0.135862797498703, "learning_rate": 6.574147925836159e-05, "loss": 0.0049, "step": 25690 }, { "grad_norm": 0.1966680884361267, "learning_rate": 6.571532040335472e-05, "loss": 0.0072, "step": 25700 }, { "grad_norm": 0.2585192024707794, "learning_rate": 6.568915677444845e-05, "loss": 0.0079, "step": 25710 }, { "grad_norm": 0.16709916293621063, "learning_rate": 6.56629883795906e-05, "loss": 0.0053, "step": 25720 }, { "grad_norm": 0.1999332308769226, "learning_rate": 6.563681522673043e-05, "loss": 0.0073, "step": 25730 }, { "grad_norm": 0.22824805974960327, "learning_rate": 6.561063732381867e-05, "loss": 0.0068, "step": 25740 }, { "grad_norm": 0.21648965775966644, "learning_rate": 6.558445467880745e-05, "loss": 0.0071, "step": 25750 }, { "grad_norm": 0.2542528510093689, "learning_rate": 6.55582672996504e-05, "loss": 0.0073, "step": 25760 }, { "grad_norm": 0.2426459789276123, "learning_rate": 6.553207519430253e-05, "loss": 0.0056, "step": 25770 }, { "grad_norm": 0.2039594054222107, "learning_rate": 6.550587837072032e-05, "loss": 0.0062, "step": 25780 }, { "grad_norm": 0.23912934958934784, "learning_rate": 6.547967683686166e-05, "loss": 0.0063, "step": 25790 }, { "grad_norm": 0.18202634155750275, "learning_rate": 6.545347060068591e-05, "loss": 0.005, "step": 25800 }, { "grad_norm": 0.22227893769741058, "learning_rate": 6.542725967015382e-05, "loss": 0.006, "step": 25810 }, { "grad_norm": 0.21977637708187103, "learning_rate": 6.540104405322757e-05, "loss": 0.0072, "step": 25820 }, { "grad_norm": 0.2197333425283432, "learning_rate": 6.537482375787077e-05, "loss": 0.009, "step": 25830 }, { "grad_norm": 0.15945011377334595, "learning_rate": 6.534859879204845e-05, "loss": 0.0072, "step": 25840 }, { "grad_norm": 0.1592593640089035, "learning_rate": 6.532236916372709e-05, "loss": 0.007, "step": 25850 }, { "grad_norm": 0.2686917185783386, "learning_rate": 6.529613488087454e-05, "loss": 0.0097, "step": 25860 }, { "grad_norm": 0.24457047879695892, "learning_rate": 6.526989595146009e-05, "loss": 0.0056, "step": 25870 }, { "grad_norm": 0.21197229623794556, "learning_rate": 6.524365238345441e-05, "loss": 0.009, "step": 25880 }, { "grad_norm": 0.22684945166110992, "learning_rate": 6.521740418482964e-05, "loss": 0.006, "step": 25890 }, { "grad_norm": 0.20982791483402252, "learning_rate": 6.519115136355925e-05, "loss": 0.0058, "step": 25900 }, { "grad_norm": 0.23438315093517303, "learning_rate": 6.51648939276182e-05, "loss": 0.0081, "step": 25910 }, { "grad_norm": 0.21843135356903076, "learning_rate": 6.513863188498277e-05, "loss": 0.007, "step": 25920 }, { "grad_norm": 0.20278333127498627, "learning_rate": 6.511236524363068e-05, "loss": 0.0048, "step": 25930 }, { "grad_norm": 0.20683413743972778, "learning_rate": 6.508609401154104e-05, "loss": 0.0051, "step": 25940 }, { "grad_norm": 0.2596301734447479, "learning_rate": 6.505981819669439e-05, "loss": 0.0062, "step": 25950 }, { "grad_norm": 0.23002870380878448, "learning_rate": 6.503353780707258e-05, "loss": 0.0058, "step": 25960 }, { "grad_norm": 0.20470206439495087, "learning_rate": 6.500725285065895e-05, "loss": 0.0049, "step": 25970 }, { "grad_norm": 0.19926327466964722, "learning_rate": 6.498096333543813e-05, "loss": 0.0055, "step": 25980 }, { "grad_norm": 0.19966010749340057, "learning_rate": 6.49546692693962e-05, "loss": 0.0054, "step": 25990 }, { "grad_norm": 0.19689211249351501, "learning_rate": 6.492837066052059e-05, "loss": 0.0048, "step": 26000 }, { "grad_norm": 0.2969319224357605, "learning_rate": 6.490206751680014e-05, "loss": 0.009, "step": 26010 }, { "grad_norm": 0.22720490396022797, "learning_rate": 6.487575984622505e-05, "loss": 0.0053, "step": 26020 }, { "grad_norm": 0.2233182191848755, "learning_rate": 6.484944765678689e-05, "loss": 0.0079, "step": 26030 }, { "grad_norm": 0.1987072080373764, "learning_rate": 6.482313095647861e-05, "loss": 0.0054, "step": 26040 }, { "grad_norm": 0.1756778359413147, "learning_rate": 6.479680975329451e-05, "loss": 0.0054, "step": 26050 }, { "grad_norm": 0.15614423155784607, "learning_rate": 6.477048405523031e-05, "loss": 0.0055, "step": 26060 }, { "grad_norm": 0.21151113510131836, "learning_rate": 6.474415387028304e-05, "loss": 0.0067, "step": 26070 }, { "grad_norm": 0.19343069195747375, "learning_rate": 6.471781920645114e-05, "loss": 0.0066, "step": 26080 }, { "grad_norm": 0.1986183077096939, "learning_rate": 6.469148007173434e-05, "loss": 0.0074, "step": 26090 }, { "grad_norm": 0.22761370241641998, "learning_rate": 6.466513647413381e-05, "loss": 0.0065, "step": 26100 }, { "grad_norm": 0.18695175647735596, "learning_rate": 6.463878842165203e-05, "loss": 0.006, "step": 26110 }, { "grad_norm": 0.15956836938858032, "learning_rate": 6.461243592229286e-05, "loss": 0.0051, "step": 26120 }, { "grad_norm": 0.1976875364780426, "learning_rate": 6.458607898406146e-05, "loss": 0.0061, "step": 26130 }, { "grad_norm": 0.19613993167877197, "learning_rate": 6.455971761496439e-05, "loss": 0.0057, "step": 26140 }, { "grad_norm": 0.2289198637008667, "learning_rate": 6.453335182300953e-05, "loss": 0.0073, "step": 26150 }, { "grad_norm": 0.2191610187292099, "learning_rate": 6.450698161620612e-05, "loss": 0.0054, "step": 26160 }, { "grad_norm": 0.19832280278205872, "learning_rate": 6.448060700256473e-05, "loss": 0.0082, "step": 26170 }, { "grad_norm": 0.20640571415424347, "learning_rate": 6.445422799009726e-05, "loss": 0.005, "step": 26180 }, { "grad_norm": 0.22177433967590332, "learning_rate": 6.442784458681699e-05, "loss": 0.0056, "step": 26190 }, { "grad_norm": 0.19210496544837952, "learning_rate": 6.440145680073847e-05, "loss": 0.0079, "step": 26200 }, { "grad_norm": 0.26254889369010925, "learning_rate": 6.437506463987762e-05, "loss": 0.0084, "step": 26210 }, { "grad_norm": 0.209589883685112, "learning_rate": 6.434866811225168e-05, "loss": 0.0065, "step": 26220 }, { "grad_norm": 0.22509878873825073, "learning_rate": 6.432226722587923e-05, "loss": 0.0085, "step": 26230 }, { "grad_norm": 0.21649697422981262, "learning_rate": 6.429586198878015e-05, "loss": 0.0059, "step": 26240 }, { "grad_norm": 0.201228067278862, "learning_rate": 6.426945240897566e-05, "loss": 0.0053, "step": 26250 }, { "grad_norm": 0.1925959438085556, "learning_rate": 6.424303849448829e-05, "loss": 0.0063, "step": 26260 }, { "grad_norm": 0.2186427116394043, "learning_rate": 6.42166202533419e-05, "loss": 0.0062, "step": 26270 }, { "grad_norm": 0.23884597420692444, "learning_rate": 6.419019769356164e-05, "loss": 0.0048, "step": 26280 }, { "grad_norm": 0.21245859563350677, "learning_rate": 6.416377082317398e-05, "loss": 0.0052, "step": 26290 }, { "grad_norm": 0.2216310203075409, "learning_rate": 6.413733965020674e-05, "loss": 0.006, "step": 26300 }, { "grad_norm": 0.15961231291294098, "learning_rate": 6.411090418268896e-05, "loss": 0.0057, "step": 26310 }, { "grad_norm": 0.22585490345954895, "learning_rate": 6.408446442865109e-05, "loss": 0.0066, "step": 26320 }, { "grad_norm": 0.20640522241592407, "learning_rate": 6.405802039612479e-05, "loss": 0.0075, "step": 26330 }, { "grad_norm": 0.23104189336299896, "learning_rate": 6.403157209314308e-05, "loss": 0.0075, "step": 26340 }, { "grad_norm": 0.19122198224067688, "learning_rate": 6.400511952774024e-05, "loss": 0.0094, "step": 26350 }, { "grad_norm": 0.2364351451396942, "learning_rate": 6.397866270795187e-05, "loss": 0.0056, "step": 26360 }, { "grad_norm": 0.22780410945415497, "learning_rate": 6.395220164181489e-05, "loss": 0.0055, "step": 26370 }, { "grad_norm": 0.2700885832309723, "learning_rate": 6.39257363373674e-05, "loss": 0.0067, "step": 26380 }, { "grad_norm": 0.26172471046447754, "learning_rate": 6.389926680264892e-05, "loss": 0.0074, "step": 26390 }, { "grad_norm": 0.22210976481437683, "learning_rate": 6.387279304570017e-05, "loss": 0.0057, "step": 26400 }, { "grad_norm": 0.2141304612159729, "learning_rate": 6.384631507456319e-05, "loss": 0.0065, "step": 26410 }, { "grad_norm": 0.22563396394252777, "learning_rate": 6.381983289728126e-05, "loss": 0.0067, "step": 26420 }, { "grad_norm": 0.19412635266780853, "learning_rate": 6.3793346521899e-05, "loss": 0.0058, "step": 26430 }, { "grad_norm": 0.16654404997825623, "learning_rate": 6.376685595646226e-05, "loss": 0.0072, "step": 26440 }, { "grad_norm": 0.16080507636070251, "learning_rate": 6.374036120901816e-05, "loss": 0.0056, "step": 26450 }, { "grad_norm": 0.32743343710899353, "learning_rate": 6.371386228761514e-05, "loss": 0.0065, "step": 26460 }, { "grad_norm": 0.28099605441093445, "learning_rate": 6.368735920030283e-05, "loss": 0.0064, "step": 26470 }, { "grad_norm": 0.19512446224689484, "learning_rate": 6.366085195513218e-05, "loss": 0.0068, "step": 26480 }, { "grad_norm": 0.2368878275156021, "learning_rate": 6.363434056015543e-05, "loss": 0.0073, "step": 26490 }, { "grad_norm": 0.18388403952121735, "learning_rate": 6.360782502342599e-05, "loss": 0.0055, "step": 26500 }, { "grad_norm": 0.1994674950838089, "learning_rate": 6.358130535299862e-05, "loss": 0.0064, "step": 26510 }, { "grad_norm": 0.26185137033462524, "learning_rate": 6.355478155692926e-05, "loss": 0.0065, "step": 26520 }, { "grad_norm": 0.19742263853549957, "learning_rate": 6.352825364327517e-05, "loss": 0.0058, "step": 26530 }, { "grad_norm": 0.24318774044513702, "learning_rate": 6.350172162009482e-05, "loss": 0.0078, "step": 26540 }, { "grad_norm": 0.2802782952785492, "learning_rate": 6.347518549544793e-05, "loss": 0.0075, "step": 26550 }, { "grad_norm": 0.1994744837284088, "learning_rate": 6.344864527739547e-05, "loss": 0.0062, "step": 26560 }, { "grad_norm": 0.2588227093219757, "learning_rate": 6.342210097399966e-05, "loss": 0.0079, "step": 26570 }, { "grad_norm": 0.20334990322589874, "learning_rate": 6.339555259332398e-05, "loss": 0.0076, "step": 26580 }, { "grad_norm": 0.2466357797384262, "learning_rate": 6.33690001434331e-05, "loss": 0.0055, "step": 26590 }, { "grad_norm": 0.1516738384962082, "learning_rate": 6.334244363239296e-05, "loss": 0.0059, "step": 26600 }, { "grad_norm": 0.25755223631858826, "learning_rate": 6.331588306827073e-05, "loss": 0.0082, "step": 26610 }, { "grad_norm": 0.20359931886196136, "learning_rate": 6.328931845913483e-05, "loss": 0.0078, "step": 26620 }, { "grad_norm": 0.26368001103401184, "learning_rate": 6.326274981305484e-05, "loss": 0.0061, "step": 26630 }, { "grad_norm": 0.20655317604541779, "learning_rate": 6.323617713810166e-05, "loss": 0.0056, "step": 26640 }, { "grad_norm": 0.23046205937862396, "learning_rate": 6.320960044234734e-05, "loss": 0.0055, "step": 26650 }, { "grad_norm": 0.2450997680425644, "learning_rate": 6.318301973386518e-05, "loss": 0.0066, "step": 26660 }, { "grad_norm": 0.21167534589767456, "learning_rate": 6.315643502072971e-05, "loss": 0.0056, "step": 26670 }, { "grad_norm": 0.17250072956085205, "learning_rate": 6.312984631101667e-05, "loss": 0.0055, "step": 26680 }, { "grad_norm": 0.24523146450519562, "learning_rate": 6.310325361280297e-05, "loss": 0.0059, "step": 26690 }, { "grad_norm": 0.24308966100215912, "learning_rate": 6.30766569341668e-05, "loss": 0.006, "step": 26700 }, { "grad_norm": 0.18970511853694916, "learning_rate": 6.305005628318753e-05, "loss": 0.0067, "step": 26710 }, { "grad_norm": 0.20905692875385284, "learning_rate": 6.302345166794572e-05, "loss": 0.0055, "step": 26720 }, { "grad_norm": 0.18546536564826965, "learning_rate": 6.299684309652316e-05, "loss": 0.0053, "step": 26730 }, { "grad_norm": 0.12105439603328705, "learning_rate": 6.297023057700283e-05, "loss": 0.0044, "step": 26740 }, { "grad_norm": 0.20489977300167084, "learning_rate": 6.294361411746891e-05, "loss": 0.0062, "step": 26750 }, { "grad_norm": 0.24438318610191345, "learning_rate": 6.291699372600677e-05, "loss": 0.0064, "step": 26760 }, { "grad_norm": 0.22675564885139465, "learning_rate": 6.2890369410703e-05, "loss": 0.005, "step": 26770 }, { "grad_norm": 0.2818538248538971, "learning_rate": 6.286374117964534e-05, "loss": 0.0074, "step": 26780 }, { "grad_norm": 0.18035154044628143, "learning_rate": 6.283710904092277e-05, "loss": 0.0043, "step": 26790 }, { "grad_norm": 0.1388295739889145, "learning_rate": 6.281047300262542e-05, "loss": 0.0059, "step": 26800 }, { "grad_norm": 0.1769358366727829, "learning_rate": 6.278383307284461e-05, "loss": 0.0061, "step": 26810 }, { "grad_norm": 0.16022364795207977, "learning_rate": 6.275718925967284e-05, "loss": 0.0056, "step": 26820 }, { "grad_norm": 0.22695006430149078, "learning_rate": 6.273054157120382e-05, "loss": 0.0068, "step": 26830 }, { "grad_norm": 0.21520768105983734, "learning_rate": 6.270389001553238e-05, "loss": 0.0058, "step": 26840 }, { "grad_norm": 0.19172999262809753, "learning_rate": 6.26772346007546e-05, "loss": 0.0056, "step": 26850 }, { "grad_norm": 0.2523164451122284, "learning_rate": 6.265057533496767e-05, "loss": 0.0049, "step": 26860 }, { "grad_norm": 0.26460522413253784, "learning_rate": 6.262391222626997e-05, "loss": 0.0069, "step": 26870 }, { "grad_norm": 0.25140002369880676, "learning_rate": 6.259724528276106e-05, "loss": 0.0064, "step": 26880 }, { "grad_norm": 0.1940133422613144, "learning_rate": 6.257057451254162e-05, "loss": 0.008, "step": 26890 }, { "grad_norm": 0.22486016154289246, "learning_rate": 6.254389992371357e-05, "loss": 0.0068, "step": 26900 }, { "grad_norm": 0.2173072248697281, "learning_rate": 6.25172215243799e-05, "loss": 0.006, "step": 26910 }, { "grad_norm": 0.2645227909088135, "learning_rate": 6.249053932264486e-05, "loss": 0.0064, "step": 26920 }, { "grad_norm": 0.1636214554309845, "learning_rate": 6.246385332661376e-05, "loss": 0.0066, "step": 26930 }, { "grad_norm": 0.23108375072479248, "learning_rate": 6.24371635443931e-05, "loss": 0.0062, "step": 26940 }, { "grad_norm": 0.20354636013507843, "learning_rate": 6.241046998409054e-05, "loss": 0.0074, "step": 26950 }, { "grad_norm": 0.23577523231506348, "learning_rate": 6.238377265381489e-05, "loss": 0.0063, "step": 26960 }, { "grad_norm": 0.3131958544254303, "learning_rate": 6.235707156167607e-05, "loss": 0.0058, "step": 26970 }, { "grad_norm": 0.25694388151168823, "learning_rate": 6.233036671578519e-05, "loss": 0.0065, "step": 26980 }, { "grad_norm": 0.19088886678218842, "learning_rate": 6.230365812425445e-05, "loss": 0.0069, "step": 26990 }, { "grad_norm": 0.247942715883255, "learning_rate": 6.227694579519724e-05, "loss": 0.0055, "step": 27000 }, { "grad_norm": 0.23688679933547974, "learning_rate": 6.225022973672805e-05, "loss": 0.0061, "step": 27010 }, { "grad_norm": 0.2073313444852829, "learning_rate": 6.222350995696253e-05, "loss": 0.0058, "step": 27020 }, { "grad_norm": 0.24426770210266113, "learning_rate": 6.21967864640174e-05, "loss": 0.0057, "step": 27030 }, { "grad_norm": 0.1389980912208557, "learning_rate": 6.217005926601059e-05, "loss": 0.0052, "step": 27040 }, { "grad_norm": 0.18472421169281006, "learning_rate": 6.214332837106111e-05, "loss": 0.0052, "step": 27050 }, { "grad_norm": 0.2815677523612976, "learning_rate": 6.21165937872891e-05, "loss": 0.0071, "step": 27060 }, { "grad_norm": 0.2159937471151352, "learning_rate": 6.208985552281582e-05, "loss": 0.0063, "step": 27070 }, { "grad_norm": 0.19135768711566925, "learning_rate": 6.206311358576364e-05, "loss": 0.0064, "step": 27080 }, { "grad_norm": 0.1525515466928482, "learning_rate": 6.203636798425608e-05, "loss": 0.0047, "step": 27090 }, { "grad_norm": 0.15612834692001343, "learning_rate": 6.20096187264177e-05, "loss": 0.0053, "step": 27100 }, { "grad_norm": 0.1883126199245453, "learning_rate": 6.198286582037425e-05, "loss": 0.0051, "step": 27110 }, { "grad_norm": 0.21747751533985138, "learning_rate": 6.195610927425256e-05, "loss": 0.0057, "step": 27120 }, { "grad_norm": 0.20136936008930206, "learning_rate": 6.192934909618056e-05, "loss": 0.0054, "step": 27130 }, { "grad_norm": 0.18872247636318207, "learning_rate": 6.190258529428728e-05, "loss": 0.0049, "step": 27140 }, { "grad_norm": 0.28938767313957214, "learning_rate": 6.187581787670285e-05, "loss": 0.0072, "step": 27150 }, { "grad_norm": 0.2653864324092865, "learning_rate": 6.184904685155852e-05, "loss": 0.0077, "step": 27160 }, { "grad_norm": 0.2535940706729889, "learning_rate": 6.18222722269866e-05, "loss": 0.0076, "step": 27170 }, { "grad_norm": 0.1849503070116043, "learning_rate": 6.179549401112053e-05, "loss": 0.0067, "step": 27180 }, { "grad_norm": 0.2222738415002823, "learning_rate": 6.176871221209482e-05, "loss": 0.0079, "step": 27190 }, { "grad_norm": 0.29542919993400574, "learning_rate": 6.174192683804508e-05, "loss": 0.0059, "step": 27200 }, { "grad_norm": 0.22121202945709229, "learning_rate": 6.1715137897108e-05, "loss": 0.0067, "step": 27210 }, { "grad_norm": 0.21276335418224335, "learning_rate": 6.168834539742134e-05, "loss": 0.0055, "step": 27220 }, { "grad_norm": 0.29806381464004517, "learning_rate": 6.166154934712397e-05, "loss": 0.0052, "step": 27230 }, { "grad_norm": 0.2035537213087082, "learning_rate": 6.163474975435581e-05, "loss": 0.0053, "step": 27240 }, { "grad_norm": 0.2204233556985855, "learning_rate": 6.160794662725787e-05, "loss": 0.006, "step": 27250 }, { "grad_norm": 0.23584558069705963, "learning_rate": 6.158113997397222e-05, "loss": 0.0063, "step": 27260 }, { "grad_norm": 0.19361534714698792, "learning_rate": 6.155432980264205e-05, "loss": 0.006, "step": 27270 }, { "grad_norm": 0.18379713594913483, "learning_rate": 6.152751612141156e-05, "loss": 0.0061, "step": 27280 }, { "grad_norm": 0.18036022782325745, "learning_rate": 6.150069893842602e-05, "loss": 0.006, "step": 27290 }, { "grad_norm": 0.13554903864860535, "learning_rate": 6.147387826183182e-05, "loss": 0.0085, "step": 27300 }, { "grad_norm": 0.23212015628814697, "learning_rate": 6.144705409977635e-05, "loss": 0.0061, "step": 27310 }, { "grad_norm": 0.21660080552101135, "learning_rate": 6.142022646040808e-05, "loss": 0.0058, "step": 27320 }, { "grad_norm": 0.18117165565490723, "learning_rate": 6.139339535187653e-05, "loss": 0.0049, "step": 27330 }, { "grad_norm": 0.16411201655864716, "learning_rate": 6.136656078233232e-05, "loss": 0.0051, "step": 27340 }, { "grad_norm": 0.1901129186153412, "learning_rate": 6.133972275992707e-05, "loss": 0.0046, "step": 27350 }, { "grad_norm": 0.1666228026151657, "learning_rate": 6.131288129281342e-05, "loss": 0.0045, "step": 27360 }, { "grad_norm": 0.18389329314231873, "learning_rate": 6.128603638914516e-05, "loss": 0.0066, "step": 27370 }, { "grad_norm": 0.1898213028907776, "learning_rate": 6.125918805707704e-05, "loss": 0.006, "step": 27380 }, { "grad_norm": 0.19179952144622803, "learning_rate": 6.123233630476485e-05, "loss": 0.0059, "step": 27390 }, { "grad_norm": 0.22159911692142487, "learning_rate": 6.120548114036547e-05, "loss": 0.0056, "step": 27400 }, { "grad_norm": 0.2556234896183014, "learning_rate": 6.117862257203679e-05, "loss": 0.006, "step": 27410 }, { "grad_norm": 0.2184114009141922, "learning_rate": 6.115176060793771e-05, "loss": 0.006, "step": 27420 }, { "grad_norm": 0.2209833711385727, "learning_rate": 6.112489525622822e-05, "loss": 0.0074, "step": 27430 }, { "grad_norm": 0.2665866017341614, "learning_rate": 6.109802652506928e-05, "loss": 0.0065, "step": 27440 }, { "grad_norm": 0.21960771083831787, "learning_rate": 6.107115442262291e-05, "loss": 0.0075, "step": 27450 }, { "grad_norm": 0.15529780089855194, "learning_rate": 6.104427895705214e-05, "loss": 0.0048, "step": 27460 }, { "grad_norm": 0.17456737160682678, "learning_rate": 6.101740013652103e-05, "loss": 0.0067, "step": 27470 }, { "grad_norm": 0.17030955851078033, "learning_rate": 6.099051796919465e-05, "loss": 0.0059, "step": 27480 }, { "grad_norm": 0.14730389416217804, "learning_rate": 6.096363246323911e-05, "loss": 0.0047, "step": 27490 }, { "grad_norm": 0.19984987378120422, "learning_rate": 6.0936743626821504e-05, "loss": 0.0058, "step": 27500 }, { "grad_norm": 0.24629443883895874, "learning_rate": 6.090985146810996e-05, "loss": 0.0056, "step": 27510 }, { "grad_norm": 0.19523531198501587, "learning_rate": 6.088295599527357e-05, "loss": 0.0055, "step": 27520 }, { "grad_norm": 0.19997794926166534, "learning_rate": 6.085605721648252e-05, "loss": 0.0044, "step": 27530 }, { "grad_norm": 0.2269504815340042, "learning_rate": 6.082915513990792e-05, "loss": 0.0061, "step": 27540 }, { "grad_norm": 0.19974826276302338, "learning_rate": 6.080224977372192e-05, "loss": 0.0059, "step": 27550 }, { "grad_norm": 0.14757752418518066, "learning_rate": 6.0775341126097666e-05, "loss": 0.0049, "step": 27560 }, { "grad_norm": 0.18435977399349213, "learning_rate": 6.074842920520926e-05, "loss": 0.0065, "step": 27570 }, { "grad_norm": 0.2411748319864273, "learning_rate": 6.072151401923186e-05, "loss": 0.0068, "step": 27580 }, { "grad_norm": 0.20606940984725952, "learning_rate": 6.069459557634159e-05, "loss": 0.008, "step": 27590 }, { "grad_norm": 0.2300151288509369, "learning_rate": 6.066767388471557e-05, "loss": 0.0064, "step": 27600 }, { "grad_norm": 0.19813773036003113, "learning_rate": 6.064074895253188e-05, "loss": 0.007, "step": 27610 }, { "grad_norm": 0.20168276131153107, "learning_rate": 6.061382078796961e-05, "loss": 0.01, "step": 27620 }, { "grad_norm": 0.20673015713691711, "learning_rate": 6.0586889399208814e-05, "loss": 0.0075, "step": 27630 }, { "grad_norm": 0.25116589665412903, "learning_rate": 6.0559954794430565e-05, "loss": 0.0058, "step": 27640 }, { "grad_norm": 0.22388678789138794, "learning_rate": 6.053301698181687e-05, "loss": 0.006, "step": 27650 }, { "grad_norm": 0.1884300410747528, "learning_rate": 6.0506075969550725e-05, "loss": 0.0052, "step": 27660 }, { "grad_norm": 0.16081227362155914, "learning_rate": 6.047913176581609e-05, "loss": 0.0055, "step": 27670 }, { "grad_norm": 0.1714400202035904, "learning_rate": 6.0452184378797904e-05, "loss": 0.0044, "step": 27680 }, { "grad_norm": 0.20271414518356323, "learning_rate": 6.042523381668209e-05, "loss": 0.0063, "step": 27690 }, { "grad_norm": 0.25421056151390076, "learning_rate": 6.03982800876555e-05, "loss": 0.0064, "step": 27700 }, { "grad_norm": 0.20130525529384613, "learning_rate": 6.0371323199905975e-05, "loss": 0.0058, "step": 27710 }, { "grad_norm": 0.19146931171417236, "learning_rate": 6.03443631616223e-05, "loss": 0.0047, "step": 27720 }, { "grad_norm": 0.1841273307800293, "learning_rate": 6.031739998099421e-05, "loss": 0.0044, "step": 27730 }, { "grad_norm": 0.19991715252399445, "learning_rate": 6.029043366621243e-05, "loss": 0.0064, "step": 27740 }, { "grad_norm": 0.2284337431192398, "learning_rate": 6.0263464225468615e-05, "loss": 0.0065, "step": 27750 }, { "grad_norm": 0.29523906111717224, "learning_rate": 6.023649166695534e-05, "loss": 0.0061, "step": 27760 }, { "grad_norm": 0.20799440145492554, "learning_rate": 6.0209515998866186e-05, "loss": 0.0079, "step": 27770 }, { "grad_norm": 0.1822252869606018, "learning_rate": 6.018253722939563e-05, "loss": 0.0051, "step": 27780 }, { "grad_norm": 0.2678062617778778, "learning_rate": 6.015555536673914e-05, "loss": 0.0069, "step": 27790 }, { "grad_norm": 0.23746149241924286, "learning_rate": 6.0128570419093054e-05, "loss": 0.006, "step": 27800 }, { "grad_norm": 0.22552046179771423, "learning_rate": 6.010158239465471e-05, "loss": 0.0061, "step": 27810 }, { "grad_norm": 0.30230170488357544, "learning_rate": 6.007459130162235e-05, "loss": 0.0072, "step": 27820 }, { "grad_norm": 0.22161483764648438, "learning_rate": 6.004759714819516e-05, "loss": 0.0055, "step": 27830 }, { "grad_norm": 0.19161266088485718, "learning_rate": 6.002059994257323e-05, "loss": 0.0051, "step": 27840 }, { "grad_norm": 0.18854838609695435, "learning_rate": 5.999359969295764e-05, "loss": 0.0053, "step": 27850 }, { "grad_norm": 0.1774907410144806, "learning_rate": 5.9966596407550314e-05, "loss": 0.0055, "step": 27860 }, { "grad_norm": 0.19992147386074066, "learning_rate": 5.993959009455416e-05, "loss": 0.0061, "step": 27870 }, { "grad_norm": 0.23237383365631104, "learning_rate": 5.991258076217298e-05, "loss": 0.0062, "step": 27880 }, { "grad_norm": 0.18414562940597534, "learning_rate": 5.988556841861147e-05, "loss": 0.0072, "step": 27890 }, { "grad_norm": 0.22399203479290009, "learning_rate": 5.985855307207531e-05, "loss": 0.0053, "step": 27900 }, { "grad_norm": 0.19938924908638, "learning_rate": 5.9831534730771e-05, "loss": 0.0079, "step": 27910 }, { "grad_norm": 0.18949943780899048, "learning_rate": 5.980451340290605e-05, "loss": 0.0049, "step": 27920 }, { "grad_norm": 0.17120018601417542, "learning_rate": 5.97774890966888e-05, "loss": 0.0073, "step": 27930 }, { "grad_norm": 0.3061679005622864, "learning_rate": 5.975046182032851e-05, "loss": 0.0049, "step": 27940 }, { "grad_norm": 0.25692275166511536, "learning_rate": 5.972343158203537e-05, "loss": 0.0083, "step": 27950 }, { "grad_norm": 0.25221413373947144, "learning_rate": 5.969639839002045e-05, "loss": 0.0058, "step": 27960 }, { "grad_norm": 0.23802326619625092, "learning_rate": 5.966936225249572e-05, "loss": 0.0064, "step": 27970 }, { "grad_norm": 0.2714694142341614, "learning_rate": 5.9642323177674044e-05, "loss": 0.0062, "step": 27980 }, { "grad_norm": 0.16080555319786072, "learning_rate": 5.9615281173769154e-05, "loss": 0.007, "step": 27990 }, { "grad_norm": 0.18601246178150177, "learning_rate": 5.958823624899574e-05, "loss": 0.0051, "step": 28000 }, { "grad_norm": 0.1622346192598343, "learning_rate": 5.956118841156933e-05, "loss": 0.0064, "step": 28010 }, { "grad_norm": 0.2637445330619812, "learning_rate": 5.953413766970631e-05, "loss": 0.0059, "step": 28020 }, { "grad_norm": 0.2083210051059723, "learning_rate": 5.9507084031624e-05, "loss": 0.0066, "step": 28030 }, { "grad_norm": 0.23868678510189056, "learning_rate": 5.948002750554058e-05, "loss": 0.0056, "step": 28040 }, { "grad_norm": 0.20230062305927277, "learning_rate": 5.9452968099675124e-05, "loss": 0.008, "step": 28050 }, { "grad_norm": 0.21888235211372375, "learning_rate": 5.9425905822247527e-05, "loss": 0.0059, "step": 28060 }, { "grad_norm": 0.1834922730922699, "learning_rate": 5.939884068147864e-05, "loss": 0.005, "step": 28070 }, { "grad_norm": 0.2151925414800644, "learning_rate": 5.937177268559011e-05, "loss": 0.005, "step": 28080 }, { "grad_norm": 0.15771934390068054, "learning_rate": 5.934470184280448e-05, "loss": 0.0042, "step": 28090 }, { "grad_norm": 0.1876213699579239, "learning_rate": 5.931762816134516e-05, "loss": 0.0071, "step": 28100 }, { "grad_norm": 0.11851833760738373, "learning_rate": 5.9290551649436434e-05, "loss": 0.0049, "step": 28110 }, { "grad_norm": 0.1731957197189331, "learning_rate": 5.9263472315303416e-05, "loss": 0.0047, "step": 28120 }, { "grad_norm": 0.18775789439678192, "learning_rate": 5.9236390167172096e-05, "loss": 0.0045, "step": 28130 }, { "grad_norm": 0.24825909733772278, "learning_rate": 5.920930521326932e-05, "loss": 0.0058, "step": 28140 }, { "grad_norm": 0.1368711143732071, "learning_rate": 5.918221746182276e-05, "loss": 0.0044, "step": 28150 }, { "grad_norm": 0.21826130151748657, "learning_rate": 5.9155126921061e-05, "loss": 0.0063, "step": 28160 }, { "grad_norm": 0.27194494009017944, "learning_rate": 5.91280335992134e-05, "loss": 0.0068, "step": 28170 }, { "grad_norm": 0.16480326652526855, "learning_rate": 5.91009375045102e-05, "loss": 0.0053, "step": 28180 }, { "grad_norm": 0.2562893331050873, "learning_rate": 5.9073838645182476e-05, "loss": 0.0067, "step": 28190 }, { "grad_norm": 0.28722527623176575, "learning_rate": 5.904673702946217e-05, "loss": 0.0057, "step": 28200 }, { "grad_norm": 0.2712259292602539, "learning_rate": 5.9019632665582004e-05, "loss": 0.0063, "step": 28210 }, { "grad_norm": 0.23969018459320068, "learning_rate": 5.899252556177559e-05, "loss": 0.0054, "step": 28220 }, { "grad_norm": 0.14926593005657196, "learning_rate": 5.896541572627735e-05, "loss": 0.0046, "step": 28230 }, { "grad_norm": 0.21841435134410858, "learning_rate": 5.893830316732253e-05, "loss": 0.0059, "step": 28240 }, { "grad_norm": 0.17031942307949066, "learning_rate": 5.8911187893147214e-05, "loss": 0.0052, "step": 28250 }, { "grad_norm": 0.19262637197971344, "learning_rate": 5.888406991198828e-05, "loss": 0.0058, "step": 28260 }, { "grad_norm": 0.2473677545785904, "learning_rate": 5.885694923208349e-05, "loss": 0.005, "step": 28270 }, { "grad_norm": 0.1710784137248993, "learning_rate": 5.882982586167138e-05, "loss": 0.0056, "step": 28280 }, { "grad_norm": 0.16016383469104767, "learning_rate": 5.880269980899131e-05, "loss": 0.0047, "step": 28290 }, { "grad_norm": 0.21569634974002838, "learning_rate": 5.8775571082283465e-05, "loss": 0.0054, "step": 28300 }, { "grad_norm": 0.2260180413722992, "learning_rate": 5.8748439689788824e-05, "loss": 0.0063, "step": 28310 }, { "grad_norm": 0.25348004698753357, "learning_rate": 5.87213056397492e-05, "loss": 0.0071, "step": 28320 }, { "grad_norm": 0.20415283739566803, "learning_rate": 5.869416894040719e-05, "loss": 0.0052, "step": 28330 }, { "grad_norm": 0.1627565622329712, "learning_rate": 5.866702960000621e-05, "loss": 0.0046, "step": 28340 }, { "grad_norm": 0.18913191556930542, "learning_rate": 5.863988762679048e-05, "loss": 0.0052, "step": 28350 }, { "grad_norm": 0.19791871309280396, "learning_rate": 5.8612743029005e-05, "loss": 0.005, "step": 28360 }, { "grad_norm": 0.17180344462394714, "learning_rate": 5.858559581489561e-05, "loss": 0.0044, "step": 28370 }, { "grad_norm": 0.1873185634613037, "learning_rate": 5.85584459927089e-05, "loss": 0.0059, "step": 28380 }, { "grad_norm": 0.18828752636909485, "learning_rate": 5.853129357069227e-05, "loss": 0.0059, "step": 28390 }, { "grad_norm": 0.1895962655544281, "learning_rate": 5.8504138557093913e-05, "loss": 0.0042, "step": 28400 }, { "grad_norm": 0.17760205268859863, "learning_rate": 5.8476980960162784e-05, "loss": 0.0074, "step": 28410 }, { "grad_norm": 0.20683133602142334, "learning_rate": 5.844982078814868e-05, "loss": 0.0054, "step": 28420 }, { "grad_norm": 0.1892419159412384, "learning_rate": 5.842265804930211e-05, "loss": 0.006, "step": 28430 }, { "grad_norm": 0.18328820168972015, "learning_rate": 5.839549275187444e-05, "loss": 0.0056, "step": 28440 }, { "grad_norm": 0.21130099892616272, "learning_rate": 5.836832490411771e-05, "loss": 0.0055, "step": 28450 }, { "grad_norm": 0.242754265666008, "learning_rate": 5.834115451428485e-05, "loss": 0.0055, "step": 28460 }, { "grad_norm": 0.2330566793680191, "learning_rate": 5.831398159062946e-05, "loss": 0.0075, "step": 28470 }, { "grad_norm": 0.2693495452404022, "learning_rate": 5.828680614140599e-05, "loss": 0.0069, "step": 28480 }, { "grad_norm": 0.22436171770095825, "learning_rate": 5.825962817486962e-05, "loss": 0.0061, "step": 28490 }, { "grad_norm": 0.20326124131679535, "learning_rate": 5.823244769927629e-05, "loss": 0.0062, "step": 28500 }, { "grad_norm": 0.2075069695711136, "learning_rate": 5.8205264722882716e-05, "loss": 0.0043, "step": 28510 }, { "grad_norm": 0.16633442044258118, "learning_rate": 5.817807925394636e-05, "loss": 0.0053, "step": 28520 }, { "grad_norm": 0.14915831387043, "learning_rate": 5.815089130072546e-05, "loss": 0.0061, "step": 28530 }, { "grad_norm": 0.175053209066391, "learning_rate": 5.8123700871479e-05, "loss": 0.0056, "step": 28540 }, { "grad_norm": 0.23188957571983337, "learning_rate": 5.809650797446671e-05, "loss": 0.0058, "step": 28550 }, { "grad_norm": 0.2453858107328415, "learning_rate": 5.806931261794907e-05, "loss": 0.0068, "step": 28560 }, { "grad_norm": 0.20580248534679413, "learning_rate": 5.804211481018731e-05, "loss": 0.0059, "step": 28570 }, { "grad_norm": 0.2458379715681076, "learning_rate": 5.801491455944341e-05, "loss": 0.0058, "step": 28580 }, { "grad_norm": 0.2027692049741745, "learning_rate": 5.79877118739801e-05, "loss": 0.0059, "step": 28590 }, { "grad_norm": 0.22771303355693817, "learning_rate": 5.7960506762060816e-05, "loss": 0.0047, "step": 28600 }, { "grad_norm": 0.21013477444648743, "learning_rate": 5.793329923194977e-05, "loss": 0.0052, "step": 28610 }, { "grad_norm": 0.19989684224128723, "learning_rate": 5.790608929191187e-05, "loss": 0.0056, "step": 28620 }, { "grad_norm": 0.16074591875076294, "learning_rate": 5.78788769502128e-05, "loss": 0.0072, "step": 28630 }, { "grad_norm": 0.17087243497371674, "learning_rate": 5.785166221511894e-05, "loss": 0.0047, "step": 28640 }, { "grad_norm": 0.22725346684455872, "learning_rate": 5.7824445094897415e-05, "loss": 0.0055, "step": 28650 }, { "grad_norm": 0.18366172909736633, "learning_rate": 5.7797225597816065e-05, "loss": 0.0043, "step": 28660 }, { "grad_norm": 0.1574377864599228, "learning_rate": 5.777000373214345e-05, "loss": 0.0045, "step": 28670 }, { "grad_norm": 0.2128681093454361, "learning_rate": 5.774277950614885e-05, "loss": 0.006, "step": 28680 }, { "grad_norm": 0.2590664327144623, "learning_rate": 5.771555292810227e-05, "loss": 0.0052, "step": 28690 }, { "grad_norm": 0.23427312076091766, "learning_rate": 5.768832400627444e-05, "loss": 0.0064, "step": 28700 }, { "grad_norm": 0.22861208021640778, "learning_rate": 5.7661092748936775e-05, "loss": 0.0062, "step": 28710 }, { "grad_norm": 0.20201127231121063, "learning_rate": 5.76338591643614e-05, "loss": 0.0046, "step": 28720 }, { "grad_norm": 0.2151762843132019, "learning_rate": 5.760662326082118e-05, "loss": 0.0059, "step": 28730 }, { "grad_norm": 0.1839437037706375, "learning_rate": 5.757938504658965e-05, "loss": 0.0056, "step": 28740 }, { "grad_norm": 0.1831098049879074, "learning_rate": 5.755214452994107e-05, "loss": 0.0057, "step": 28750 }, { "grad_norm": 0.140682652592659, "learning_rate": 5.752490171915039e-05, "loss": 0.0046, "step": 28760 }, { "grad_norm": 0.16588278114795685, "learning_rate": 5.749765662249324e-05, "loss": 0.0065, "step": 28770 }, { "grad_norm": 0.19379691779613495, "learning_rate": 5.747040924824596e-05, "loss": 0.0059, "step": 28780 }, { "grad_norm": 0.20092302560806274, "learning_rate": 5.7443159604685613e-05, "loss": 0.0052, "step": 28790 }, { "grad_norm": 0.21887630224227905, "learning_rate": 5.74159077000899e-05, "loss": 0.0049, "step": 28800 }, { "grad_norm": 0.2055760771036148, "learning_rate": 5.7388653542737235e-05, "loss": 0.0051, "step": 28810 }, { "grad_norm": 0.22898320853710175, "learning_rate": 5.736139714090672e-05, "loss": 0.008, "step": 28820 }, { "grad_norm": 0.2068106085062027, "learning_rate": 5.73341385028781e-05, "loss": 0.005, "step": 28830 }, { "grad_norm": 0.1926884949207306, "learning_rate": 5.7306877636931855e-05, "loss": 0.0058, "step": 28840 }, { "grad_norm": 0.18023042380809784, "learning_rate": 5.7279614551349125e-05, "loss": 0.0051, "step": 28850 }, { "grad_norm": 0.21538949012756348, "learning_rate": 5.725234925441169e-05, "loss": 0.0048, "step": 28860 }, { "grad_norm": 0.1922045350074768, "learning_rate": 5.7225081754402044e-05, "loss": 0.0054, "step": 28870 }, { "grad_norm": 0.19482499361038208, "learning_rate": 5.7197812059603326e-05, "loss": 0.0054, "step": 28880 }, { "grad_norm": 0.18533092737197876, "learning_rate": 5.717054017829934e-05, "loss": 0.0051, "step": 28890 }, { "grad_norm": 0.2453930377960205, "learning_rate": 5.7143266118774584e-05, "loss": 0.0062, "step": 28900 }, { "grad_norm": 0.24753063917160034, "learning_rate": 5.711598988931418e-05, "loss": 0.005, "step": 28910 }, { "grad_norm": 0.21387207508087158, "learning_rate": 5.7088711498203954e-05, "loss": 0.0046, "step": 28920 }, { "grad_norm": 0.2242448627948761, "learning_rate": 5.706143095373033e-05, "loss": 0.0067, "step": 28930 }, { "grad_norm": 0.17051264643669128, "learning_rate": 5.703414826418042e-05, "loss": 0.0046, "step": 28940 }, { "grad_norm": 0.1759202480316162, "learning_rate": 5.7006863437842007e-05, "loss": 0.0054, "step": 28950 }, { "grad_norm": 0.151921346783638, "learning_rate": 5.697957648300348e-05, "loss": 0.0076, "step": 28960 }, { "grad_norm": 0.22174721956253052, "learning_rate": 5.695228740795391e-05, "loss": 0.0056, "step": 28970 }, { "grad_norm": 0.19715653359889984, "learning_rate": 5.6924996220982985e-05, "loss": 0.0051, "step": 28980 }, { "grad_norm": 0.21052099764347076, "learning_rate": 5.6897702930381045e-05, "loss": 0.0064, "step": 28990 }, { "grad_norm": 0.19617202877998352, "learning_rate": 5.687040754443908e-05, "loss": 0.0056, "step": 29000 }, { "grad_norm": 0.20572853088378906, "learning_rate": 5.6843110071448725e-05, "loss": 0.0057, "step": 29010 }, { "grad_norm": 0.16405658423900604, "learning_rate": 5.6815810519702194e-05, "loss": 0.0057, "step": 29020 }, { "grad_norm": 0.1357373595237732, "learning_rate": 5.6788508897492396e-05, "loss": 0.0044, "step": 29030 }, { "grad_norm": 0.30207711458206177, "learning_rate": 5.676120521311282e-05, "loss": 0.0087, "step": 29040 }, { "grad_norm": 0.18085119128227234, "learning_rate": 5.6733899474857634e-05, "loss": 0.004, "step": 29050 }, { "grad_norm": 0.2029605656862259, "learning_rate": 5.670659169102157e-05, "loss": 0.0051, "step": 29060 }, { "grad_norm": 0.2264328896999359, "learning_rate": 5.6679281869900044e-05, "loss": 0.0063, "step": 29070 }, { "grad_norm": 0.16823899745941162, "learning_rate": 5.6651970019789045e-05, "loss": 0.0055, "step": 29080 }, { "grad_norm": 0.14622972905635834, "learning_rate": 5.662465614898519e-05, "loss": 0.0056, "step": 29090 }, { "grad_norm": 0.2224721759557724, "learning_rate": 5.6597340265785695e-05, "loss": 0.0056, "step": 29100 }, { "grad_norm": 0.18313968181610107, "learning_rate": 5.657002237848843e-05, "loss": 0.0044, "step": 29110 }, { "grad_norm": 0.2091536968946457, "learning_rate": 5.654270249539183e-05, "loss": 0.005, "step": 29120 }, { "grad_norm": 0.15692025423049927, "learning_rate": 5.651538062479498e-05, "loss": 0.0058, "step": 29130 }, { "grad_norm": 0.15955621004104614, "learning_rate": 5.648805677499751e-05, "loss": 0.0067, "step": 29140 }, { "grad_norm": 0.23243653774261475, "learning_rate": 5.646073095429969e-05, "loss": 0.0059, "step": 29150 }, { "grad_norm": 0.25523442029953003, "learning_rate": 5.643340317100241e-05, "loss": 0.0059, "step": 29160 }, { "grad_norm": 0.22557803988456726, "learning_rate": 5.64060734334071e-05, "loss": 0.0052, "step": 29170 }, { "grad_norm": 0.18731147050857544, "learning_rate": 5.637874174981583e-05, "loss": 0.0053, "step": 29180 }, { "grad_norm": 0.20181021094322205, "learning_rate": 5.635140812853124e-05, "loss": 0.005, "step": 29190 }, { "grad_norm": 0.17075319588184357, "learning_rate": 5.6324072577856544e-05, "loss": 0.0043, "step": 29200 }, { "grad_norm": 0.18515950441360474, "learning_rate": 5.629673510609559e-05, "loss": 0.0045, "step": 29210 }, { "grad_norm": 0.2035454660654068, "learning_rate": 5.626939572155276e-05, "loss": 0.0064, "step": 29220 }, { "grad_norm": 0.19095073640346527, "learning_rate": 5.6242054432533054e-05, "loss": 0.0054, "step": 29230 }, { "grad_norm": 0.16964705288410187, "learning_rate": 5.621471124734201e-05, "loss": 0.0069, "step": 29240 }, { "grad_norm": 0.22279876470565796, "learning_rate": 5.6187366174285794e-05, "loss": 0.006, "step": 29250 }, { "grad_norm": 0.18500900268554688, "learning_rate": 5.616001922167109e-05, "loss": 0.0046, "step": 29260 }, { "grad_norm": 0.16835862398147583, "learning_rate": 5.61326703978052e-05, "loss": 0.0048, "step": 29270 }, { "grad_norm": 0.22742272913455963, "learning_rate": 5.6105319710995964e-05, "loss": 0.0052, "step": 29280 }, { "grad_norm": 0.1859036237001419, "learning_rate": 5.60779671695518e-05, "loss": 0.0059, "step": 29290 }, { "grad_norm": 0.20393681526184082, "learning_rate": 5.6050612781781684e-05, "loss": 0.0067, "step": 29300 }, { "grad_norm": 0.19883304834365845, "learning_rate": 5.602325655599516e-05, "loss": 0.0052, "step": 29310 }, { "grad_norm": 0.15367934107780457, "learning_rate": 5.599589850050234e-05, "loss": 0.0056, "step": 29320 }, { "grad_norm": 0.15047653019428253, "learning_rate": 5.5968538623613874e-05, "loss": 0.006, "step": 29330 }, { "grad_norm": 0.18690043687820435, "learning_rate": 5.594117693364095e-05, "loss": 0.0049, "step": 29340 }, { "grad_norm": 0.21890491247177124, "learning_rate": 5.591381343889535e-05, "loss": 0.0062, "step": 29350 }, { "grad_norm": 0.16709180176258087, "learning_rate": 5.5886448147689355e-05, "loss": 0.0058, "step": 29360 }, { "grad_norm": 0.20897939801216125, "learning_rate": 5.585908106833585e-05, "loss": 0.0057, "step": 29370 }, { "grad_norm": 0.21187157928943634, "learning_rate": 5.5831712209148226e-05, "loss": 0.0052, "step": 29380 }, { "grad_norm": 0.16506585478782654, "learning_rate": 5.58043415784404e-05, "loss": 0.0048, "step": 29390 }, { "grad_norm": 0.2711997330188751, "learning_rate": 5.577696918452686e-05, "loss": 0.0056, "step": 29400 }, { "grad_norm": 0.2071024626493454, "learning_rate": 5.5749595035722604e-05, "loss": 0.0041, "step": 29410 }, { "grad_norm": 0.1898985505104065, "learning_rate": 5.5722219140343193e-05, "loss": 0.004, "step": 29420 }, { "grad_norm": 0.2062532603740692, "learning_rate": 5.56948415067047e-05, "loss": 0.006, "step": 29430 }, { "grad_norm": 0.19252778589725494, "learning_rate": 5.5667462143123704e-05, "loss": 0.0053, "step": 29440 }, { "grad_norm": 0.2357863485813141, "learning_rate": 5.564008105791737e-05, "loss": 0.0056, "step": 29450 }, { "grad_norm": 0.2055368274450302, "learning_rate": 5.5612698259403316e-05, "loss": 0.0059, "step": 29460 }, { "grad_norm": 0.2414434850215912, "learning_rate": 5.5585313755899724e-05, "loss": 0.0044, "step": 29470 }, { "grad_norm": 0.20956923067569733, "learning_rate": 5.5557927555725285e-05, "loss": 0.0057, "step": 29480 }, { "grad_norm": 0.14181269705295563, "learning_rate": 5.55305396671992e-05, "loss": 0.0042, "step": 29490 }, { "grad_norm": 0.19984854757785797, "learning_rate": 5.55031500986412e-05, "loss": 0.0056, "step": 29500 }, { "grad_norm": 0.1677868664264679, "learning_rate": 5.547575885837149e-05, "loss": 0.0062, "step": 29510 }, { "grad_norm": 0.2121850699186325, "learning_rate": 5.5448365954710825e-05, "loss": 0.0062, "step": 29520 }, { "grad_norm": 0.16570019721984863, "learning_rate": 5.5420971395980446e-05, "loss": 0.0045, "step": 29530 }, { "grad_norm": 0.30435338616371155, "learning_rate": 5.539357519050209e-05, "loss": 0.0079, "step": 29540 }, { "grad_norm": 0.1941782832145691, "learning_rate": 5.536617734659799e-05, "loss": 0.0055, "step": 29550 }, { "grad_norm": 0.20635464787483215, "learning_rate": 5.533877787259091e-05, "loss": 0.0066, "step": 29560 }, { "grad_norm": 0.22745567560195923, "learning_rate": 5.5311376776804044e-05, "loss": 0.0055, "step": 29570 }, { "grad_norm": 0.1521776020526886, "learning_rate": 5.528397406756118e-05, "loss": 0.005, "step": 29580 }, { "grad_norm": 0.14877501130104065, "learning_rate": 5.525656975318652e-05, "loss": 0.0051, "step": 29590 }, { "grad_norm": 0.18450523912906647, "learning_rate": 5.522916384200474e-05, "loss": 0.0068, "step": 29600 }, { "grad_norm": 0.30232444405555725, "learning_rate": 5.520175634234106e-05, "loss": 0.0086, "step": 29610 }, { "grad_norm": 0.24299657344818115, "learning_rate": 5.517434726252113e-05, "loss": 0.0068, "step": 29620 }, { "grad_norm": 0.22698938846588135, "learning_rate": 5.514693661087113e-05, "loss": 0.0049, "step": 29630 }, { "grad_norm": 0.24346503615379333, "learning_rate": 5.511952439571769e-05, "loss": 0.0058, "step": 29640 }, { "grad_norm": 0.19434130191802979, "learning_rate": 5.509211062538791e-05, "loss": 0.0071, "step": 29650 }, { "grad_norm": 0.39745065569877625, "learning_rate": 5.506469530820939e-05, "loss": 0.0065, "step": 29660 }, { "grad_norm": 0.19955550134181976, "learning_rate": 5.503727845251014e-05, "loss": 0.0072, "step": 29670 }, { "grad_norm": 0.251282662153244, "learning_rate": 5.50098600666187e-05, "loss": 0.0055, "step": 29680 }, { "grad_norm": 0.18520720303058624, "learning_rate": 5.498244015886406e-05, "loss": 0.0072, "step": 29690 }, { "grad_norm": 0.22318704426288605, "learning_rate": 5.495501873757565e-05, "loss": 0.0066, "step": 29700 }, { "grad_norm": 0.19224198162555695, "learning_rate": 5.492759581108336e-05, "loss": 0.0064, "step": 29710 }, { "grad_norm": 0.20975066721439362, "learning_rate": 5.490017138771759e-05, "loss": 0.0067, "step": 29720 }, { "grad_norm": 0.18289147317409515, "learning_rate": 5.487274547580912e-05, "loss": 0.0096, "step": 29730 }, { "grad_norm": 0.15758202970027924, "learning_rate": 5.484531808368923e-05, "loss": 0.0053, "step": 29740 }, { "grad_norm": 0.17166924476623535, "learning_rate": 5.4817889219689656e-05, "loss": 0.0066, "step": 29750 }, { "grad_norm": 0.2286929488182068, "learning_rate": 5.4790458892142536e-05, "loss": 0.0061, "step": 29760 }, { "grad_norm": 0.20322489738464355, "learning_rate": 5.476302710938048e-05, "loss": 0.0073, "step": 29770 }, { "grad_norm": 0.15503044426441193, "learning_rate": 5.473559387973657e-05, "loss": 0.0059, "step": 29780 }, { "grad_norm": 0.21335212886333466, "learning_rate": 5.470815921154425e-05, "loss": 0.0057, "step": 29790 }, { "grad_norm": 0.25170767307281494, "learning_rate": 5.468072311313749e-05, "loss": 0.0074, "step": 29800 }, { "grad_norm": 0.2249269336462021, "learning_rate": 5.465328559285063e-05, "loss": 0.0043, "step": 29810 }, { "grad_norm": 0.15145115554332733, "learning_rate": 5.462584665901849e-05, "loss": 0.0055, "step": 29820 }, { "grad_norm": 0.1966078132390976, "learning_rate": 5.4598406319976235e-05, "loss": 0.0056, "step": 29830 }, { "grad_norm": 0.16924650967121124, "learning_rate": 5.457096458405958e-05, "loss": 0.0052, "step": 29840 }, { "grad_norm": 0.21797440946102142, "learning_rate": 5.454352145960457e-05, "loss": 0.0063, "step": 29850 }, { "grad_norm": 0.2023417055606842, "learning_rate": 5.4516076954947715e-05, "loss": 0.0065, "step": 29860 }, { "grad_norm": 0.23208950459957123, "learning_rate": 5.448863107842591e-05, "loss": 0.0061, "step": 29870 }, { "grad_norm": 0.15935717523097992, "learning_rate": 5.446118383837651e-05, "loss": 0.0068, "step": 29880 }, { "grad_norm": 0.2505706250667572, "learning_rate": 5.443373524313722e-05, "loss": 0.0072, "step": 29890 }, { "grad_norm": 0.27601441740989685, "learning_rate": 5.440628530104626e-05, "loss": 0.0061, "step": 29900 }, { "grad_norm": 0.19104118645191193, "learning_rate": 5.4378834020442146e-05, "loss": 0.0074, "step": 29910 }, { "grad_norm": 0.1839321255683899, "learning_rate": 5.4351381409663884e-05, "loss": 0.0052, "step": 29920 }, { "grad_norm": 0.23367559909820557, "learning_rate": 5.432392747705084e-05, "loss": 0.0071, "step": 29930 }, { "grad_norm": 0.17363567650318146, "learning_rate": 5.429647223094278e-05, "loss": 0.0064, "step": 29940 }, { "grad_norm": 0.21096286177635193, "learning_rate": 5.4269015679679924e-05, "loss": 0.0061, "step": 29950 }, { "grad_norm": 0.15616314113140106, "learning_rate": 5.424155783160281e-05, "loss": 0.0043, "step": 29960 }, { "grad_norm": 0.17585046589374542, "learning_rate": 5.4214098695052415e-05, "loss": 0.0046, "step": 29970 }, { "grad_norm": 0.17712654173374176, "learning_rate": 5.418663827837012e-05, "loss": 0.0039, "step": 29980 }, { "grad_norm": 0.22880172729492188, "learning_rate": 5.415917658989763e-05, "loss": 0.0047, "step": 29990 }, { "grad_norm": 0.17595677077770233, "learning_rate": 5.413171363797713e-05, "loss": 0.0052, "step": 30000 }, { "grad_norm": 0.20251259207725525, "learning_rate": 5.4104249430951116e-05, "loss": 0.0047, "step": 30010 }, { "grad_norm": 0.14635731279850006, "learning_rate": 5.4076783977162494e-05, "loss": 0.0077, "step": 30020 }, { "grad_norm": 0.19002458453178406, "learning_rate": 5.4049317284954525e-05, "loss": 0.0057, "step": 30030 }, { "grad_norm": 0.15300147235393524, "learning_rate": 5.4021849362670884e-05, "loss": 0.0044, "step": 30040 }, { "grad_norm": 0.1581823080778122, "learning_rate": 5.3994380218655604e-05, "loss": 0.0061, "step": 30050 }, { "grad_norm": 0.3429616689682007, "learning_rate": 5.396690986125309e-05, "loss": 0.0071, "step": 30060 }, { "grad_norm": 0.27367961406707764, "learning_rate": 5.3939438298808075e-05, "loss": 0.0053, "step": 30070 }, { "grad_norm": 0.16606593132019043, "learning_rate": 5.3911965539665744e-05, "loss": 0.0049, "step": 30080 }, { "grad_norm": 0.1713843047618866, "learning_rate": 5.388449159217156e-05, "loss": 0.0063, "step": 30090 }, { "grad_norm": 0.1901925504207611, "learning_rate": 5.3857016464671385e-05, "loss": 0.0055, "step": 30100 }, { "grad_norm": 0.20465657114982605, "learning_rate": 5.382954016551146e-05, "loss": 0.0063, "step": 30110 }, { "grad_norm": 0.16736078262329102, "learning_rate": 5.380206270303835e-05, "loss": 0.0049, "step": 30120 }, { "grad_norm": 0.23199190199375153, "learning_rate": 5.377458408559897e-05, "loss": 0.005, "step": 30130 }, { "grad_norm": 0.17988093197345734, "learning_rate": 5.374710432154061e-05, "loss": 0.0089, "step": 30140 }, { "grad_norm": 0.2259960025548935, "learning_rate": 5.3719623419210886e-05, "loss": 0.0057, "step": 30150 }, { "grad_norm": 0.17799469828605652, "learning_rate": 5.3692141386957786e-05, "loss": 0.0057, "step": 30160 }, { "grad_norm": 0.17998215556144714, "learning_rate": 5.3664658233129616e-05, "loss": 0.0064, "step": 30170 }, { "grad_norm": 0.2078496515750885, "learning_rate": 5.363717396607504e-05, "loss": 0.005, "step": 30180 }, { "grad_norm": 0.19409272074699402, "learning_rate": 5.360968859414305e-05, "loss": 0.0051, "step": 30190 }, { "grad_norm": 0.20358821749687195, "learning_rate": 5.358220212568295e-05, "loss": 0.006, "step": 30200 }, { "grad_norm": 0.176359623670578, "learning_rate": 5.355471456904444e-05, "loss": 0.0058, "step": 30210 }, { "grad_norm": 0.2315361052751541, "learning_rate": 5.3527225932577495e-05, "loss": 0.0077, "step": 30220 }, { "grad_norm": 0.22574542462825775, "learning_rate": 5.349973622463246e-05, "loss": 0.0047, "step": 30230 }, { "grad_norm": 0.25905847549438477, "learning_rate": 5.3472245453559956e-05, "loss": 0.0054, "step": 30240 }, { "grad_norm": 0.24247246980667114, "learning_rate": 5.3444753627710955e-05, "loss": 0.0078, "step": 30250 }, { "grad_norm": 0.1722092181444168, "learning_rate": 5.341726075543676e-05, "loss": 0.0048, "step": 30260 }, { "grad_norm": 0.17815691232681274, "learning_rate": 5.338976684508898e-05, "loss": 0.0059, "step": 30270 }, { "grad_norm": 0.23875965178012848, "learning_rate": 5.336227190501953e-05, "loss": 0.0075, "step": 30280 }, { "grad_norm": 0.20787908136844635, "learning_rate": 5.3334775943580664e-05, "loss": 0.0057, "step": 30290 }, { "grad_norm": 0.15233765542507172, "learning_rate": 5.330727896912491e-05, "loss": 0.0049, "step": 30300 }, { "grad_norm": 0.16355381906032562, "learning_rate": 5.327978099000511e-05, "loss": 0.0044, "step": 30310 }, { "grad_norm": 0.18125870823860168, "learning_rate": 5.3252282014574465e-05, "loss": 0.0059, "step": 30320 }, { "grad_norm": 0.19926011562347412, "learning_rate": 5.322478205118641e-05, "loss": 0.0052, "step": 30330 }, { "grad_norm": 0.26833435893058777, "learning_rate": 5.3197281108194704e-05, "loss": 0.0061, "step": 30340 }, { "grad_norm": 0.2054644078016281, "learning_rate": 5.316977919395342e-05, "loss": 0.0059, "step": 30350 }, { "grad_norm": 0.20752538740634918, "learning_rate": 5.314227631681691e-05, "loss": 0.006, "step": 30360 }, { "grad_norm": 0.1510171741247177, "learning_rate": 5.311477248513982e-05, "loss": 0.0065, "step": 30370 }, { "grad_norm": 0.18885274231433868, "learning_rate": 5.30872677072771e-05, "loss": 0.0046, "step": 30380 }, { "grad_norm": 0.16170471906661987, "learning_rate": 5.3059761991583954e-05, "loss": 0.0051, "step": 30390 }, { "grad_norm": 0.14873333275318146, "learning_rate": 5.303225534641592e-05, "loss": 0.0043, "step": 30400 }, { "grad_norm": 0.16585133969783783, "learning_rate": 5.300474778012875e-05, "loss": 0.0051, "step": 30410 }, { "grad_norm": 0.18362580239772797, "learning_rate": 5.297723930107855e-05, "loss": 0.005, "step": 30420 }, { "grad_norm": 0.13625064492225647, "learning_rate": 5.294972991762167e-05, "loss": 0.0031, "step": 30430 }, { "grad_norm": 0.17825248837471008, "learning_rate": 5.292221963811472e-05, "loss": 0.004, "step": 30440 }, { "grad_norm": 0.25440919399261475, "learning_rate": 5.28947084709146e-05, "loss": 0.0045, "step": 30450 }, { "grad_norm": 0.1803164780139923, "learning_rate": 5.2867196424378465e-05, "loss": 0.0041, "step": 30460 }, { "grad_norm": 0.2411777675151825, "learning_rate": 5.2839683506863765e-05, "loss": 0.005, "step": 30470 }, { "grad_norm": 0.17569312453269958, "learning_rate": 5.281216972672821e-05, "loss": 0.0043, "step": 30480 }, { "grad_norm": 0.2172713726758957, "learning_rate": 5.278465509232973e-05, "loss": 0.005, "step": 30490 }, { "grad_norm": 0.23277314007282257, "learning_rate": 5.275713961202655e-05, "loss": 0.0056, "step": 30500 }, { "grad_norm": 0.25411856174468994, "learning_rate": 5.2729623294177165e-05, "loss": 0.0051, "step": 30510 }, { "grad_norm": 0.16799281537532806, "learning_rate": 5.270210614714028e-05, "loss": 0.0046, "step": 30520 }, { "grad_norm": 0.21863852441310883, "learning_rate": 5.267458817927491e-05, "loss": 0.0058, "step": 30530 }, { "grad_norm": 0.16732220351696014, "learning_rate": 5.264706939894026e-05, "loss": 0.0052, "step": 30540 }, { "grad_norm": 0.15635059773921967, "learning_rate": 5.261954981449584e-05, "loss": 0.0042, "step": 30550 }, { "grad_norm": 0.19723793864250183, "learning_rate": 5.2592029434301324e-05, "loss": 0.0058, "step": 30560 }, { "grad_norm": 0.17818093299865723, "learning_rate": 5.256450826671672e-05, "loss": 0.0083, "step": 30570 }, { "grad_norm": 0.20513881742954254, "learning_rate": 5.253698632010221e-05, "loss": 0.0062, "step": 30580 }, { "grad_norm": 0.18832646310329437, "learning_rate": 5.2509463602818246e-05, "loss": 0.0041, "step": 30590 }, { "grad_norm": 0.24967971444129944, "learning_rate": 5.248194012322549e-05, "loss": 0.0049, "step": 30600 }, { "grad_norm": 0.22181031107902527, "learning_rate": 5.245441588968486e-05, "loss": 0.0062, "step": 30610 }, { "grad_norm": 0.19122512638568878, "learning_rate": 5.242689091055748e-05, "loss": 0.004, "step": 30620 }, { "grad_norm": 0.1803915798664093, "learning_rate": 5.239936519420473e-05, "loss": 0.0058, "step": 30630 }, { "grad_norm": 0.22564992308616638, "learning_rate": 5.2371838748988175e-05, "loss": 0.0056, "step": 30640 }, { "grad_norm": 0.184902623295784, "learning_rate": 5.234431158326965e-05, "loss": 0.0047, "step": 30650 }, { "grad_norm": 0.23818105459213257, "learning_rate": 5.231678370541115e-05, "loss": 0.0056, "step": 30660 }, { "grad_norm": 0.1931838095188141, "learning_rate": 5.228925512377495e-05, "loss": 0.0042, "step": 30670 }, { "grad_norm": 0.25122156739234924, "learning_rate": 5.2261725846723465e-05, "loss": 0.0047, "step": 30680 }, { "grad_norm": 0.17127318680286407, "learning_rate": 5.22341958826194e-05, "loss": 0.0053, "step": 30690 }, { "grad_norm": 0.19694863259792328, "learning_rate": 5.22066652398256e-05, "loss": 0.0044, "step": 30700 }, { "grad_norm": 0.17200535535812378, "learning_rate": 5.2179133926705185e-05, "loss": 0.0055, "step": 30710 }, { "grad_norm": 0.20394374430179596, "learning_rate": 5.215160195162141e-05, "loss": 0.0046, "step": 30720 }, { "grad_norm": 0.20772460103034973, "learning_rate": 5.212406932293776e-05, "loss": 0.0055, "step": 30730 }, { "grad_norm": 0.17080987989902496, "learning_rate": 5.209653604901795e-05, "loss": 0.0057, "step": 30740 }, { "grad_norm": 0.16024409234523773, "learning_rate": 5.206900213822584e-05, "loss": 0.0041, "step": 30750 }, { "grad_norm": 0.14029310643672943, "learning_rate": 5.204146759892551e-05, "loss": 0.005, "step": 30760 }, { "grad_norm": 0.20849694311618805, "learning_rate": 5.2013932439481216e-05, "loss": 0.0052, "step": 30770 }, { "grad_norm": 0.18949127197265625, "learning_rate": 5.198639666825743e-05, "loss": 0.0065, "step": 30780 }, { "grad_norm": 0.22011834383010864, "learning_rate": 5.195886029361877e-05, "loss": 0.0043, "step": 30790 }, { "grad_norm": 0.2479807585477829, "learning_rate": 5.193132332393009e-05, "loss": 0.0065, "step": 30800 }, { "grad_norm": 0.23610201478004456, "learning_rate": 5.1903785767556376e-05, "loss": 0.0058, "step": 30810 }, { "grad_norm": 0.2370217740535736, "learning_rate": 5.187624763286282e-05, "loss": 0.0061, "step": 30820 }, { "grad_norm": 0.19059418141841888, "learning_rate": 5.184870892821475e-05, "loss": 0.0065, "step": 30830 }, { "grad_norm": 0.20750829577445984, "learning_rate": 5.182116966197773e-05, "loss": 0.0042, "step": 30840 }, { "grad_norm": 0.1982104480266571, "learning_rate": 5.1793629842517466e-05, "loss": 0.0045, "step": 30850 }, { "grad_norm": 0.2042876034975052, "learning_rate": 5.17660894781998e-05, "loss": 0.0061, "step": 30860 }, { "grad_norm": 0.2952621579170227, "learning_rate": 5.173854857739079e-05, "loss": 0.0076, "step": 30870 }, { "grad_norm": 0.18933899700641632, "learning_rate": 5.171100714845661e-05, "loss": 0.0057, "step": 30880 }, { "grad_norm": 0.1879114955663681, "learning_rate": 5.1683465199763646e-05, "loss": 0.0049, "step": 30890 }, { "grad_norm": 0.23395439982414246, "learning_rate": 5.16559227396784e-05, "loss": 0.0048, "step": 30900 }, { "grad_norm": 0.2327568233013153, "learning_rate": 5.1628379776567556e-05, "loss": 0.0053, "step": 30910 }, { "grad_norm": 0.17762382328510284, "learning_rate": 5.160083631879792e-05, "loss": 0.0052, "step": 30920 }, { "grad_norm": 0.23868829011917114, "learning_rate": 5.1573292374736484e-05, "loss": 0.0052, "step": 30930 }, { "grad_norm": 0.16216345131397247, "learning_rate": 5.1545747952750356e-05, "loss": 0.0042, "step": 30940 }, { "grad_norm": 0.13944071531295776, "learning_rate": 5.151820306120682e-05, "loss": 0.0043, "step": 30950 }, { "grad_norm": 0.14964541792869568, "learning_rate": 5.149065770847328e-05, "loss": 0.0054, "step": 30960 }, { "grad_norm": 0.1824244260787964, "learning_rate": 5.1463111902917297e-05, "loss": 0.0046, "step": 30970 }, { "grad_norm": 0.1795882135629654, "learning_rate": 5.143556565290654e-05, "loss": 0.0044, "step": 30980 }, { "grad_norm": 0.1381848007440567, "learning_rate": 5.140801896680882e-05, "loss": 0.0064, "step": 30990 }, { "grad_norm": 0.15474601089954376, "learning_rate": 5.1380471852992144e-05, "loss": 0.0041, "step": 31000 }, { "grad_norm": 0.1834709495306015, "learning_rate": 5.135292431982457e-05, "loss": 0.0049, "step": 31010 }, { "grad_norm": 0.24421149492263794, "learning_rate": 5.1325376375674294e-05, "loss": 0.0056, "step": 31020 }, { "grad_norm": 0.1933891922235489, "learning_rate": 5.129782802890968e-05, "loss": 0.0045, "step": 31030 }, { "grad_norm": 0.2036387324333191, "learning_rate": 5.127027928789916e-05, "loss": 0.007, "step": 31040 }, { "grad_norm": 0.2099962830543518, "learning_rate": 5.124273016101135e-05, "loss": 0.0049, "step": 31050 }, { "grad_norm": 0.19814443588256836, "learning_rate": 5.121518065661492e-05, "loss": 0.0047, "step": 31060 }, { "grad_norm": 0.2068185955286026, "learning_rate": 5.11876307830787e-05, "loss": 0.0057, "step": 31070 }, { "grad_norm": 0.16235095262527466, "learning_rate": 5.1160080548771596e-05, "loss": 0.0057, "step": 31080 }, { "grad_norm": 0.12750132381916046, "learning_rate": 5.1132529962062656e-05, "loss": 0.0044, "step": 31090 }, { "grad_norm": 0.2374095767736435, "learning_rate": 5.110497903132101e-05, "loss": 0.0057, "step": 31100 }, { "grad_norm": 0.21322056651115417, "learning_rate": 5.107742776491592e-05, "loss": 0.0047, "step": 31110 }, { "grad_norm": 0.24200071394443512, "learning_rate": 5.104987617121673e-05, "loss": 0.0061, "step": 31120 }, { "grad_norm": 0.15461426973342896, "learning_rate": 5.102232425859287e-05, "loss": 0.0046, "step": 31130 }, { "grad_norm": 0.20958973467350006, "learning_rate": 5.09947720354139e-05, "loss": 0.0043, "step": 31140 }, { "grad_norm": 0.17817848920822144, "learning_rate": 5.096721951004942e-05, "loss": 0.0047, "step": 31150 }, { "grad_norm": 0.18468694388866425, "learning_rate": 5.0939666690869227e-05, "loss": 0.0059, "step": 31160 }, { "grad_norm": 0.17305415868759155, "learning_rate": 5.0912113586243096e-05, "loss": 0.0043, "step": 31170 }, { "grad_norm": 0.2267351597547531, "learning_rate": 5.0884560204540935e-05, "loss": 0.005, "step": 31180 }, { "grad_norm": 0.16885721683502197, "learning_rate": 5.0857006554132736e-05, "loss": 0.0039, "step": 31190 }, { "grad_norm": 0.24405206739902496, "learning_rate": 5.0829452643388575e-05, "loss": 0.004, "step": 31200 }, { "grad_norm": 0.20062245428562164, "learning_rate": 5.08018984806786e-05, "loss": 0.0049, "step": 31210 }, { "grad_norm": 0.23667308688163757, "learning_rate": 5.0774344074373036e-05, "loss": 0.0062, "step": 31220 }, { "grad_norm": 0.23339824378490448, "learning_rate": 5.07467894328422e-05, "loss": 0.0055, "step": 31230 }, { "grad_norm": 0.1691211313009262, "learning_rate": 5.0719234564456454e-05, "loss": 0.0071, "step": 31240 }, { "grad_norm": 0.2070344090461731, "learning_rate": 5.0691679477586216e-05, "loss": 0.0052, "step": 31250 }, { "grad_norm": 0.17927901446819305, "learning_rate": 5.0664124180602035e-05, "loss": 0.0045, "step": 31260 }, { "grad_norm": 0.15509898960590363, "learning_rate": 5.063656868187447e-05, "loss": 0.0056, "step": 31270 }, { "grad_norm": 0.14657287299633026, "learning_rate": 5.060901298977413e-05, "loss": 0.0075, "step": 31280 }, { "grad_norm": 0.24408428370952606, "learning_rate": 5.0581457112671725e-05, "loss": 0.0065, "step": 31290 }, { "grad_norm": 0.20375415682792664, "learning_rate": 5.0553901058938016e-05, "loss": 0.007, "step": 31300 }, { "grad_norm": 0.20089338719844818, "learning_rate": 5.052634483694377e-05, "loss": 0.0046, "step": 31310 }, { "grad_norm": 0.18726156651973724, "learning_rate": 5.049878845505988e-05, "loss": 0.0046, "step": 31320 }, { "grad_norm": 0.2967512309551239, "learning_rate": 5.047123192165721e-05, "loss": 0.0052, "step": 31330 }, { "grad_norm": 0.20723484456539154, "learning_rate": 5.0443675245106735e-05, "loss": 0.0055, "step": 31340 }, { "grad_norm": 0.2124812752008438, "learning_rate": 5.0416118433779426e-05, "loss": 0.006, "step": 31350 }, { "grad_norm": 0.19948455691337585, "learning_rate": 5.038856149604633e-05, "loss": 0.0054, "step": 31360 }, { "grad_norm": 0.22487370669841766, "learning_rate": 5.03610044402785e-05, "loss": 0.0059, "step": 31370 }, { "grad_norm": 0.21467886865139008, "learning_rate": 5.033344727484707e-05, "loss": 0.0055, "step": 31380 }, { "grad_norm": 0.19325312972068787, "learning_rate": 5.030589000812315e-05, "loss": 0.0075, "step": 31390 }, { "grad_norm": 0.18458259105682373, "learning_rate": 5.027833264847793e-05, "loss": 0.0055, "step": 31400 }, { "grad_norm": 0.17840997874736786, "learning_rate": 5.025077520428258e-05, "loss": 0.0065, "step": 31410 }, { "grad_norm": 0.21097692847251892, "learning_rate": 5.022321768390837e-05, "loss": 0.0067, "step": 31420 }, { "grad_norm": 0.18814191222190857, "learning_rate": 5.0195660095726516e-05, "loss": 0.0047, "step": 31430 }, { "grad_norm": 0.21292294561862946, "learning_rate": 5.016810244810829e-05, "loss": 0.0052, "step": 31440 }, { "grad_norm": 0.26881682872772217, "learning_rate": 5.0140544749424976e-05, "loss": 0.0048, "step": 31450 }, { "grad_norm": 0.2497543841600418, "learning_rate": 5.0112987008047874e-05, "loss": 0.0063, "step": 31460 }, { "grad_norm": 0.18842750787734985, "learning_rate": 5.008542923234831e-05, "loss": 0.0051, "step": 31470 }, { "grad_norm": 0.17844758927822113, "learning_rate": 5.00578714306976e-05, "loss": 0.0055, "step": 31480 }, { "grad_norm": 0.15989556908607483, "learning_rate": 5.0030313611467084e-05, "loss": 0.0054, "step": 31490 }, { "grad_norm": 0.190887913107872, "learning_rate": 5.0002755783028074e-05, "loss": 0.0055, "step": 31500 }, { "grad_norm": 0.19800716638565063, "learning_rate": 4.997519795375194e-05, "loss": 0.0045, "step": 31510 }, { "grad_norm": 0.16440793871879578, "learning_rate": 4.9947640132010016e-05, "loss": 0.0047, "step": 31520 }, { "grad_norm": 0.18869194388389587, "learning_rate": 4.9920082326173625e-05, "loss": 0.0046, "step": 31530 }, { "grad_norm": 0.20321868360042572, "learning_rate": 4.9892524544614114e-05, "loss": 0.0065, "step": 31540 }, { "grad_norm": 0.27668923139572144, "learning_rate": 4.986496679570283e-05, "loss": 0.0082, "step": 31550 }, { "grad_norm": 0.24034807085990906, "learning_rate": 4.983740908781105e-05, "loss": 0.0044, "step": 31560 }, { "grad_norm": 0.15371045470237732, "learning_rate": 4.9809851429310116e-05, "loss": 0.004, "step": 31570 }, { "grad_norm": 0.20635126531124115, "learning_rate": 4.9782293828571275e-05, "loss": 0.0041, "step": 31580 }, { "grad_norm": 0.1869991570711136, "learning_rate": 4.9754736293965846e-05, "loss": 0.0045, "step": 31590 }, { "grad_norm": 0.17415070533752441, "learning_rate": 4.972717883386502e-05, "loss": 0.0046, "step": 31600 }, { "grad_norm": 0.16830730438232422, "learning_rate": 4.9699621456640075e-05, "loss": 0.0044, "step": 31610 }, { "grad_norm": 0.21755871176719666, "learning_rate": 4.9672064170662214e-05, "loss": 0.007, "step": 31620 }, { "grad_norm": 0.21022994816303253, "learning_rate": 4.9644506984302583e-05, "loss": 0.0036, "step": 31630 }, { "grad_norm": 0.2332187443971634, "learning_rate": 4.9616949905932356e-05, "loss": 0.0059, "step": 31640 }, { "grad_norm": 0.1786932349205017, "learning_rate": 4.9589392943922615e-05, "loss": 0.005, "step": 31650 }, { "grad_norm": 0.27448326349258423, "learning_rate": 4.956183610664447e-05, "loss": 0.0059, "step": 31660 }, { "grad_norm": 0.24794825911521912, "learning_rate": 4.9534279402468945e-05, "loss": 0.0067, "step": 31670 }, { "grad_norm": 0.17630399763584137, "learning_rate": 4.9506722839767036e-05, "loss": 0.0052, "step": 31680 }, { "grad_norm": 0.14124633371829987, "learning_rate": 4.947916642690972e-05, "loss": 0.0042, "step": 31690 }, { "grad_norm": 0.22503885626792908, "learning_rate": 4.9451610172267874e-05, "loss": 0.0054, "step": 31700 }, { "grad_norm": 0.23687101900577545, "learning_rate": 4.9424054084212376e-05, "loss": 0.0044, "step": 31710 }, { "grad_norm": 0.15688809752464294, "learning_rate": 4.939649817111407e-05, "loss": 0.004, "step": 31720 }, { "grad_norm": 0.27571186423301697, "learning_rate": 4.936894244134365e-05, "loss": 0.0043, "step": 31730 }, { "grad_norm": 0.2929707467556, "learning_rate": 4.9341386903271886e-05, "loss": 0.005, "step": 31740 }, { "grad_norm": 0.17888648808002472, "learning_rate": 4.931383156526936e-05, "loss": 0.0058, "step": 31750 }, { "grad_norm": 0.2074800282716751, "learning_rate": 4.92862764357067e-05, "loss": 0.0053, "step": 31760 }, { "grad_norm": 0.24055776000022888, "learning_rate": 4.925872152295443e-05, "loss": 0.0054, "step": 31770 }, { "grad_norm": 0.19162774085998535, "learning_rate": 4.923116683538296e-05, "loss": 0.0045, "step": 31780 }, { "grad_norm": 0.24251529574394226, "learning_rate": 4.920361238136273e-05, "loss": 0.0054, "step": 31790 }, { "grad_norm": 0.2408338189125061, "learning_rate": 4.9176058169264014e-05, "loss": 0.005, "step": 31800 }, { "grad_norm": 0.19583497941493988, "learning_rate": 4.9148504207457074e-05, "loss": 0.0049, "step": 31810 }, { "grad_norm": 0.1445378214120865, "learning_rate": 4.912095050431208e-05, "loss": 0.0043, "step": 31820 }, { "grad_norm": 0.13216830790042877, "learning_rate": 4.909339706819911e-05, "loss": 0.0045, "step": 31830 }, { "grad_norm": 0.18678554892539978, "learning_rate": 4.906584390748819e-05, "loss": 0.0045, "step": 31840 }, { "grad_norm": 0.19915135204792023, "learning_rate": 4.9038291030549195e-05, "loss": 0.0047, "step": 31850 }, { "grad_norm": 0.2730449438095093, "learning_rate": 4.9010738445751995e-05, "loss": 0.0051, "step": 31860 }, { "grad_norm": 0.16995364427566528, "learning_rate": 4.8983186161466364e-05, "loss": 0.0044, "step": 31870 }, { "grad_norm": 0.15751521289348602, "learning_rate": 4.89556341860619e-05, "loss": 0.0044, "step": 31880 }, { "grad_norm": 0.17200878262519836, "learning_rate": 4.892808252790822e-05, "loss": 0.0035, "step": 31890 }, { "grad_norm": 0.2017274796962738, "learning_rate": 4.890053119537475e-05, "loss": 0.0041, "step": 31900 }, { "grad_norm": 0.1692100614309311, "learning_rate": 4.887298019683087e-05, "loss": 0.0048, "step": 31910 }, { "grad_norm": 0.22506344318389893, "learning_rate": 4.884542954064587e-05, "loss": 0.0045, "step": 31920 }, { "grad_norm": 0.24472633004188538, "learning_rate": 4.881787923518887e-05, "loss": 0.0048, "step": 31930 }, { "grad_norm": 0.19130097329616547, "learning_rate": 4.879032928882896e-05, "loss": 0.007, "step": 31940 }, { "grad_norm": 0.2052450329065323, "learning_rate": 4.876277970993505e-05, "loss": 0.0049, "step": 31950 }, { "grad_norm": 0.1908828765153885, "learning_rate": 4.873523050687602e-05, "loss": 0.0082, "step": 31960 }, { "grad_norm": 0.16499602794647217, "learning_rate": 4.870768168802056e-05, "loss": 0.0046, "step": 31970 }, { "grad_norm": 0.15493258833885193, "learning_rate": 4.868013326173728e-05, "loss": 0.0058, "step": 31980 }, { "grad_norm": 0.12490259110927582, "learning_rate": 4.865258523639468e-05, "loss": 0.0045, "step": 31990 }, { "grad_norm": 0.15310634672641754, "learning_rate": 4.862503762036109e-05, "loss": 0.0047, "step": 32000 }, { "grad_norm": 0.19791896641254425, "learning_rate": 4.859749042200478e-05, "loss": 0.0063, "step": 32010 }, { "grad_norm": 0.2279396653175354, "learning_rate": 4.856994364969384e-05, "loss": 0.0047, "step": 32020 }, { "grad_norm": 0.1784597784280777, "learning_rate": 4.854239731179625e-05, "loss": 0.0052, "step": 32030 }, { "grad_norm": 0.14760909974575043, "learning_rate": 4.85148514166799e-05, "loss": 0.0042, "step": 32040 }, { "grad_norm": 0.19168606400489807, "learning_rate": 4.8487305972712456e-05, "loss": 0.004, "step": 32050 }, { "grad_norm": 0.23768550157546997, "learning_rate": 4.8459760988261526e-05, "loss": 0.005, "step": 32060 }, { "grad_norm": 0.15142159163951874, "learning_rate": 4.843221647169453e-05, "loss": 0.0034, "step": 32070 }, { "grad_norm": 0.21645669639110565, "learning_rate": 4.840467243137878e-05, "loss": 0.0053, "step": 32080 }, { "grad_norm": 0.20354746282100677, "learning_rate": 4.837712887568143e-05, "loss": 0.0053, "step": 32090 }, { "grad_norm": 0.2716869115829468, "learning_rate": 4.8349585812969464e-05, "loss": 0.0041, "step": 32100 }, { "grad_norm": 0.20231391489505768, "learning_rate": 4.8322043251609775e-05, "loss": 0.0047, "step": 32110 }, { "grad_norm": 0.14112946391105652, "learning_rate": 4.8294501199969015e-05, "loss": 0.005, "step": 32120 }, { "grad_norm": 0.28037208318710327, "learning_rate": 4.826695966641376e-05, "loss": 0.0048, "step": 32130 }, { "grad_norm": 0.16090203821659088, "learning_rate": 4.823941865931043e-05, "loss": 0.006, "step": 32140 }, { "grad_norm": 0.15256863832473755, "learning_rate": 4.82118781870252e-05, "loss": 0.0066, "step": 32150 }, { "grad_norm": 0.1753431260585785, "learning_rate": 4.8184338257924185e-05, "loss": 0.0041, "step": 32160 }, { "grad_norm": 0.19041666388511658, "learning_rate": 4.815679888037324e-05, "loss": 0.0039, "step": 32170 }, { "grad_norm": 0.22406138479709625, "learning_rate": 4.8129260062738135e-05, "loss": 0.0047, "step": 32180 }, { "grad_norm": 0.1523721069097519, "learning_rate": 4.810172181338445e-05, "loss": 0.0037, "step": 32190 }, { "grad_norm": 0.20484371483325958, "learning_rate": 4.807418414067753e-05, "loss": 0.0041, "step": 32200 }, { "grad_norm": 0.2010655701160431, "learning_rate": 4.804664705298264e-05, "loss": 0.0078, "step": 32210 }, { "grad_norm": 0.23369023203849792, "learning_rate": 4.80191105586648e-05, "loss": 0.0048, "step": 32220 }, { "grad_norm": 0.23071376979351044, "learning_rate": 4.799157466608886e-05, "loss": 0.0047, "step": 32230 }, { "grad_norm": 0.14918065071105957, "learning_rate": 4.796403938361951e-05, "loss": 0.0049, "step": 32240 }, { "grad_norm": 0.18900185823440552, "learning_rate": 4.793650471962123e-05, "loss": 0.0042, "step": 32250 }, { "grad_norm": 0.21083198487758636, "learning_rate": 4.790897068245835e-05, "loss": 0.0064, "step": 32260 }, { "grad_norm": 0.25141823291778564, "learning_rate": 4.7881437280494954e-05, "loss": 0.0061, "step": 32270 }, { "grad_norm": 0.21428143978118896, "learning_rate": 4.7853904522094965e-05, "loss": 0.006, "step": 32280 }, { "grad_norm": 0.1968785971403122, "learning_rate": 4.782637241562215e-05, "loss": 0.0061, "step": 32290 }, { "grad_norm": 0.15142029523849487, "learning_rate": 4.779884096943997e-05, "loss": 0.005, "step": 32300 }, { "grad_norm": 0.18293695151805878, "learning_rate": 4.777131019191182e-05, "loss": 0.0042, "step": 32310 }, { "grad_norm": 0.13914795219898224, "learning_rate": 4.774378009140076e-05, "loss": 0.0033, "step": 32320 }, { "grad_norm": 0.1742292195558548, "learning_rate": 4.7716250676269735e-05, "loss": 0.0049, "step": 32330 }, { "grad_norm": 0.18444855511188507, "learning_rate": 4.7688721954881485e-05, "loss": 0.0048, "step": 32340 }, { "grad_norm": 0.1803952008485794, "learning_rate": 4.7661193935598446e-05, "loss": 0.0044, "step": 32350 }, { "grad_norm": 0.15445967018604279, "learning_rate": 4.763366662678296e-05, "loss": 0.0052, "step": 32360 }, { "grad_norm": 0.18442274630069733, "learning_rate": 4.7606140036797064e-05, "loss": 0.0051, "step": 32370 }, { "grad_norm": 0.17230643332004547, "learning_rate": 4.7578614174002614e-05, "loss": 0.0042, "step": 32380 }, { "grad_norm": 0.20352445542812347, "learning_rate": 4.755108904676125e-05, "loss": 0.0072, "step": 32390 }, { "grad_norm": 0.26936498284339905, "learning_rate": 4.752356466343436e-05, "loss": 0.0053, "step": 32400 }, { "grad_norm": 0.2861011326313019, "learning_rate": 4.7496041032383174e-05, "loss": 0.007, "step": 32410 }, { "grad_norm": 0.190623477101326, "learning_rate": 4.746851816196858e-05, "loss": 0.0056, "step": 32420 }, { "grad_norm": 0.1682569533586502, "learning_rate": 4.744099606055135e-05, "loss": 0.0047, "step": 32430 }, { "grad_norm": 0.22984115779399872, "learning_rate": 4.741347473649193e-05, "loss": 0.0046, "step": 32440 }, { "grad_norm": 0.19816507399082184, "learning_rate": 4.738595419815058e-05, "loss": 0.005, "step": 32450 }, { "grad_norm": 0.15016452968120575, "learning_rate": 4.7358434453887365e-05, "loss": 0.0055, "step": 32460 }, { "grad_norm": 0.23969532549381256, "learning_rate": 4.7330915512061976e-05, "loss": 0.0064, "step": 32470 }, { "grad_norm": 0.17962460219860077, "learning_rate": 4.730339738103402e-05, "loss": 0.0059, "step": 32480 }, { "grad_norm": 0.16040557622909546, "learning_rate": 4.727588006916271e-05, "loss": 0.0045, "step": 32490 }, { "grad_norm": 0.20202873647212982, "learning_rate": 4.724836358480711e-05, "loss": 0.0039, "step": 32500 }, { "grad_norm": 0.12208746373653412, "learning_rate": 4.722084793632601e-05, "loss": 0.004, "step": 32510 }, { "grad_norm": 0.15922726690769196, "learning_rate": 4.719333313207792e-05, "loss": 0.0049, "step": 32520 }, { "grad_norm": 0.1514817327260971, "learning_rate": 4.716581918042114e-05, "loss": 0.0043, "step": 32530 }, { "grad_norm": 0.26068586111068726, "learning_rate": 4.7138306089713636e-05, "loss": 0.0086, "step": 32540 }, { "grad_norm": 0.19129633903503418, "learning_rate": 4.7110793868313183e-05, "loss": 0.0048, "step": 32550 }, { "grad_norm": 0.2191700041294098, "learning_rate": 4.708328252457729e-05, "loss": 0.0043, "step": 32560 }, { "grad_norm": 0.24066494405269623, "learning_rate": 4.7055772066863135e-05, "loss": 0.0048, "step": 32570 }, { "grad_norm": 0.24875414371490479, "learning_rate": 4.702826250352771e-05, "loss": 0.0062, "step": 32580 }, { "grad_norm": 0.24485665559768677, "learning_rate": 4.7000753842927653e-05, "loss": 0.007, "step": 32590 }, { "grad_norm": 0.18917761743068695, "learning_rate": 4.6973246093419384e-05, "loss": 0.0068, "step": 32600 }, { "grad_norm": 0.22483518719673157, "learning_rate": 4.694573926335906e-05, "loss": 0.0036, "step": 32610 }, { "grad_norm": 0.1390780657529831, "learning_rate": 4.6918233361102476e-05, "loss": 0.0032, "step": 32620 }, { "grad_norm": 0.1989414244890213, "learning_rate": 4.689072839500525e-05, "loss": 0.0054, "step": 32630 }, { "grad_norm": 0.18637628853321075, "learning_rate": 4.6863224373422635e-05, "loss": 0.004, "step": 32640 }, { "grad_norm": 0.171858012676239, "learning_rate": 4.683572130470962e-05, "loss": 0.0033, "step": 32650 }, { "grad_norm": 0.16169320046901703, "learning_rate": 4.680821919722094e-05, "loss": 0.005, "step": 32660 }, { "grad_norm": 0.14494270086288452, "learning_rate": 4.6780718059310975e-05, "loss": 0.0044, "step": 32670 }, { "grad_norm": 0.14536269009113312, "learning_rate": 4.675321789933389e-05, "loss": 0.0067, "step": 32680 }, { "grad_norm": 0.199925497174263, "learning_rate": 4.6725718725643464e-05, "loss": 0.0049, "step": 32690 }, { "grad_norm": 0.19077004492282867, "learning_rate": 4.669822054659323e-05, "loss": 0.0049, "step": 32700 }, { "grad_norm": 0.19654829800128937, "learning_rate": 4.667072337053644e-05, "loss": 0.0038, "step": 32710 }, { "grad_norm": 0.20727397501468658, "learning_rate": 4.6643227205825965e-05, "loss": 0.0043, "step": 32720 }, { "grad_norm": 0.21547819674015045, "learning_rate": 4.6615732060814454e-05, "loss": 0.0059, "step": 32730 }, { "grad_norm": 0.1741621047258377, "learning_rate": 4.658823794385417e-05, "loss": 0.0034, "step": 32740 }, { "grad_norm": 0.16072964668273926, "learning_rate": 4.6560744863297115e-05, "loss": 0.0053, "step": 32750 }, { "grad_norm": 0.1594333052635193, "learning_rate": 4.653325282749498e-05, "loss": 0.0044, "step": 32760 }, { "grad_norm": 0.2023230791091919, "learning_rate": 4.6505761844799075e-05, "loss": 0.0053, "step": 32770 }, { "grad_norm": 0.1880958080291748, "learning_rate": 4.647827192356048e-05, "loss": 0.005, "step": 32780 }, { "grad_norm": 0.19728344678878784, "learning_rate": 4.645078307212989e-05, "loss": 0.0039, "step": 32790 }, { "grad_norm": 0.1750764399766922, "learning_rate": 4.642329529885768e-05, "loss": 0.0045, "step": 32800 }, { "grad_norm": 0.14193753898143768, "learning_rate": 4.639580861209393e-05, "loss": 0.0038, "step": 32810 }, { "grad_norm": 0.14580261707305908, "learning_rate": 4.636832302018835e-05, "loss": 0.0039, "step": 32820 }, { "grad_norm": 0.22704246640205383, "learning_rate": 4.6340838531490365e-05, "loss": 0.0066, "step": 32830 }, { "grad_norm": 0.16191266477108002, "learning_rate": 4.6313355154349e-05, "loss": 0.004, "step": 32840 }, { "grad_norm": 0.15272437036037445, "learning_rate": 4.6285872897113025e-05, "loss": 0.0042, "step": 32850 }, { "grad_norm": 0.18530550599098206, "learning_rate": 4.625839176813077e-05, "loss": 0.004, "step": 32860 }, { "grad_norm": 0.14753226935863495, "learning_rate": 4.623091177575031e-05, "loss": 0.0045, "step": 32870 }, { "grad_norm": 0.15381473302841187, "learning_rate": 4.620343292831936e-05, "loss": 0.0064, "step": 32880 }, { "grad_norm": 0.1351233720779419, "learning_rate": 4.6175955234185206e-05, "loss": 0.0042, "step": 32890 }, { "grad_norm": 0.17321446537971497, "learning_rate": 4.614847870169492e-05, "loss": 0.0044, "step": 32900 }, { "grad_norm": 0.17199234664440155, "learning_rate": 4.612100333919509e-05, "loss": 0.0032, "step": 32910 }, { "grad_norm": 0.18708087503910065, "learning_rate": 4.609352915503202e-05, "loss": 0.0046, "step": 32920 }, { "grad_norm": 0.19583621621131897, "learning_rate": 4.606605615755166e-05, "loss": 0.0044, "step": 32930 }, { "grad_norm": 0.25135812163352966, "learning_rate": 4.6038584355099576e-05, "loss": 0.0062, "step": 32940 }, { "grad_norm": 0.2475704401731491, "learning_rate": 4.6011113756020964e-05, "loss": 0.0053, "step": 32950 }, { "grad_norm": 0.13889190554618835, "learning_rate": 4.598364436866066e-05, "loss": 0.0041, "step": 32960 }, { "grad_norm": 0.19973912835121155, "learning_rate": 4.595617620136316e-05, "loss": 0.0036, "step": 32970 }, { "grad_norm": 0.18490223586559296, "learning_rate": 4.592870926247257e-05, "loss": 0.0048, "step": 32980 }, { "grad_norm": 0.1921015828847885, "learning_rate": 4.5901243560332594e-05, "loss": 0.0052, "step": 32990 }, { "grad_norm": 0.23081588745117188, "learning_rate": 4.587377910328662e-05, "loss": 0.0053, "step": 33000 }, { "grad_norm": 0.16712728142738342, "learning_rate": 4.5846315899677586e-05, "loss": 0.0045, "step": 33010 }, { "grad_norm": 0.18717841804027557, "learning_rate": 4.5818853957848114e-05, "loss": 0.0045, "step": 33020 }, { "grad_norm": 0.20282988250255585, "learning_rate": 4.579139328614043e-05, "loss": 0.0064, "step": 33030 }, { "grad_norm": 0.22016045451164246, "learning_rate": 4.576393389289633e-05, "loss": 0.0059, "step": 33040 }, { "grad_norm": 0.17904025316238403, "learning_rate": 4.573647578645728e-05, "loss": 0.006, "step": 33050 }, { "grad_norm": 0.19048282504081726, "learning_rate": 4.57090189751643e-05, "loss": 0.0052, "step": 33060 }, { "grad_norm": 0.1914157122373581, "learning_rate": 4.568156346735806e-05, "loss": 0.0045, "step": 33070 }, { "grad_norm": 0.20221026241779327, "learning_rate": 4.565410927137882e-05, "loss": 0.0065, "step": 33080 }, { "grad_norm": 0.21152468025684357, "learning_rate": 4.562665639556644e-05, "loss": 0.0066, "step": 33090 }, { "grad_norm": 0.17515084147453308, "learning_rate": 4.559920484826037e-05, "loss": 0.0055, "step": 33100 }, { "grad_norm": 0.13399997353553772, "learning_rate": 4.5571754637799665e-05, "loss": 0.0044, "step": 33110 }, { "grad_norm": 0.15194153785705566, "learning_rate": 4.554430577252298e-05, "loss": 0.0043, "step": 33120 }, { "grad_norm": 0.16189908981323242, "learning_rate": 4.551685826076858e-05, "loss": 0.0046, "step": 33130 }, { "grad_norm": 0.22422078251838684, "learning_rate": 4.5489412110874246e-05, "loss": 0.007, "step": 33140 }, { "grad_norm": 0.2511410415172577, "learning_rate": 4.5461967331177444e-05, "loss": 0.0053, "step": 33150 }, { "grad_norm": 0.21357578039169312, "learning_rate": 4.5434523930015115e-05, "loss": 0.0048, "step": 33160 }, { "grad_norm": 0.19267761707305908, "learning_rate": 4.540708191572388e-05, "loss": 0.0062, "step": 33170 }, { "grad_norm": 0.208268940448761, "learning_rate": 4.537964129663991e-05, "loss": 0.0058, "step": 33180 }, { "grad_norm": 0.22184109687805176, "learning_rate": 4.535220208109889e-05, "loss": 0.0041, "step": 33190 }, { "grad_norm": 0.2716061472892761, "learning_rate": 4.5324764277436194e-05, "loss": 0.0042, "step": 33200 }, { "grad_norm": 0.23763802647590637, "learning_rate": 4.529732789398664e-05, "loss": 0.0069, "step": 33210 }, { "grad_norm": 0.19782312214374542, "learning_rate": 4.526989293908472e-05, "loss": 0.0049, "step": 33220 }, { "grad_norm": 0.18328730762004852, "learning_rate": 4.524245942106442e-05, "loss": 0.005, "step": 33230 }, { "grad_norm": 0.18177179992198944, "learning_rate": 4.5215027348259345e-05, "loss": 0.0049, "step": 33240 }, { "grad_norm": 0.15582804381847382, "learning_rate": 4.5187596729002616e-05, "loss": 0.0046, "step": 33250 }, { "grad_norm": 0.1172068864107132, "learning_rate": 4.516016757162693e-05, "loss": 0.0072, "step": 33260 }, { "grad_norm": 0.16688767075538635, "learning_rate": 4.513273988446457e-05, "loss": 0.0051, "step": 33270 }, { "grad_norm": 0.1660337597131729, "learning_rate": 4.5105313675847296e-05, "loss": 0.0051, "step": 33280 }, { "grad_norm": 0.15883983671665192, "learning_rate": 4.5077888954106495e-05, "loss": 0.0047, "step": 33290 }, { "grad_norm": 0.19077563285827637, "learning_rate": 4.505046572757309e-05, "loss": 0.0044, "step": 33300 }, { "grad_norm": 0.18063879013061523, "learning_rate": 4.502304400457749e-05, "loss": 0.0048, "step": 33310 }, { "grad_norm": 0.1593950390815735, "learning_rate": 4.499562379344973e-05, "loss": 0.0038, "step": 33320 }, { "grad_norm": 0.20816084742546082, "learning_rate": 4.4968205102519306e-05, "loss": 0.0079, "step": 33330 }, { "grad_norm": 0.17260810732841492, "learning_rate": 4.494078794011532e-05, "loss": 0.0049, "step": 33340 }, { "grad_norm": 0.1723584085702896, "learning_rate": 4.491337231456639e-05, "loss": 0.0039, "step": 33350 }, { "grad_norm": 0.15594543516635895, "learning_rate": 4.4885958234200634e-05, "loss": 0.0045, "step": 33360 }, { "grad_norm": 0.18546228110790253, "learning_rate": 4.485854570734575e-05, "loss": 0.0044, "step": 33370 }, { "grad_norm": 0.14639759063720703, "learning_rate": 4.483113474232891e-05, "loss": 0.0054, "step": 33380 }, { "grad_norm": 0.21834929287433624, "learning_rate": 4.480372534747688e-05, "loss": 0.0048, "step": 33390 }, { "grad_norm": 0.2139056921005249, "learning_rate": 4.477631753111588e-05, "loss": 0.0051, "step": 33400 }, { "grad_norm": 0.22326582670211792, "learning_rate": 4.4748911301571686e-05, "loss": 0.0049, "step": 33410 }, { "grad_norm": 0.15286721289157867, "learning_rate": 4.472150666716961e-05, "loss": 0.0038, "step": 33420 }, { "grad_norm": 0.1834818422794342, "learning_rate": 4.469410363623442e-05, "loss": 0.0046, "step": 33430 }, { "grad_norm": 0.1544836163520813, "learning_rate": 4.466670221709044e-05, "loss": 0.0082, "step": 33440 }, { "grad_norm": 0.18582512438297272, "learning_rate": 4.463930241806154e-05, "loss": 0.0045, "step": 33450 }, { "grad_norm": 0.1381050944328308, "learning_rate": 4.4611904247471006e-05, "loss": 0.0038, "step": 33460 }, { "grad_norm": 0.19770090281963348, "learning_rate": 4.458450771364171e-05, "loss": 0.0057, "step": 33470 }, { "grad_norm": 0.20572297275066376, "learning_rate": 4.4557112824895965e-05, "loss": 0.0062, "step": 33480 }, { "grad_norm": 0.15222179889678955, "learning_rate": 4.452971958955563e-05, "loss": 0.0041, "step": 33490 }, { "grad_norm": 0.18206976354122162, "learning_rate": 4.450232801594208e-05, "loss": 0.0038, "step": 33500 }, { "grad_norm": 0.22142469882965088, "learning_rate": 4.447493811237609e-05, "loss": 0.007, "step": 33510 }, { "grad_norm": 0.18259239196777344, "learning_rate": 4.444754988717804e-05, "loss": 0.0037, "step": 33520 }, { "grad_norm": 0.1644694209098816, "learning_rate": 4.442016334866771e-05, "loss": 0.0057, "step": 33530 }, { "grad_norm": 0.12861062586307526, "learning_rate": 4.4392778505164445e-05, "loss": 0.0039, "step": 33540 }, { "grad_norm": 0.130414217710495, "learning_rate": 4.436539536498702e-05, "loss": 0.0047, "step": 33550 }, { "grad_norm": 0.1650528758764267, "learning_rate": 4.433801393645369e-05, "loss": 0.0046, "step": 33560 }, { "grad_norm": 0.1523527055978775, "learning_rate": 4.431063422788226e-05, "loss": 0.0041, "step": 33570 }, { "grad_norm": 0.17007499933242798, "learning_rate": 4.428325624758991e-05, "loss": 0.0049, "step": 33580 }, { "grad_norm": 0.20401349663734436, "learning_rate": 4.4255880003893366e-05, "loss": 0.0044, "step": 33590 }, { "grad_norm": 0.14803670346736908, "learning_rate": 4.422850550510884e-05, "loss": 0.0039, "step": 33600 }, { "grad_norm": 0.20467841625213623, "learning_rate": 4.4201132759551934e-05, "loss": 0.0061, "step": 33610 }, { "grad_norm": 0.18328902125358582, "learning_rate": 4.4173761775537804e-05, "loss": 0.0042, "step": 33620 }, { "grad_norm": 0.17253699898719788, "learning_rate": 4.414639256138099e-05, "loss": 0.0053, "step": 33630 }, { "grad_norm": 0.20801103115081787, "learning_rate": 4.411902512539557e-05, "loss": 0.0046, "step": 33640 }, { "grad_norm": 0.1973229944705963, "learning_rate": 4.4091659475895044e-05, "loss": 0.0059, "step": 33650 }, { "grad_norm": 0.16329631209373474, "learning_rate": 4.406429562119235e-05, "loss": 0.0044, "step": 33660 }, { "grad_norm": 0.18277055025100708, "learning_rate": 4.4036933569599945e-05, "loss": 0.0042, "step": 33670 }, { "grad_norm": 0.16051676869392395, "learning_rate": 4.400957332942965e-05, "loss": 0.0038, "step": 33680 }, { "grad_norm": 0.16573944687843323, "learning_rate": 4.3982214908992844e-05, "loss": 0.0057, "step": 33690 }, { "grad_norm": 0.19477687776088715, "learning_rate": 4.3954858316600235e-05, "loss": 0.0035, "step": 33700 }, { "grad_norm": 0.21553660929203033, "learning_rate": 4.392750356056205e-05, "loss": 0.0049, "step": 33710 }, { "grad_norm": 0.21639776229858398, "learning_rate": 4.390015064918798e-05, "loss": 0.0051, "step": 33720 }, { "grad_norm": 0.1857653558254242, "learning_rate": 4.387279959078705e-05, "loss": 0.0044, "step": 33730 }, { "grad_norm": 0.16132286190986633, "learning_rate": 4.384545039366786e-05, "loss": 0.0035, "step": 33740 }, { "grad_norm": 0.1379638910293579, "learning_rate": 4.381810306613831e-05, "loss": 0.0047, "step": 33750 }, { "grad_norm": 0.13853634893894196, "learning_rate": 4.3790757616505826e-05, "loss": 0.004, "step": 33760 }, { "grad_norm": 0.13563930988311768, "learning_rate": 4.376341405307725e-05, "loss": 0.0041, "step": 33770 }, { "grad_norm": 0.21742835640907288, "learning_rate": 4.37360723841588e-05, "loss": 0.004, "step": 33780 }, { "grad_norm": 0.1971443146467209, "learning_rate": 4.370873261805619e-05, "loss": 0.005, "step": 33790 }, { "grad_norm": 0.2042785882949829, "learning_rate": 4.368139476307449e-05, "loss": 0.0053, "step": 33800 }, { "grad_norm": 0.22481749951839447, "learning_rate": 4.365405882751822e-05, "loss": 0.005, "step": 33810 }, { "grad_norm": 0.18183766305446625, "learning_rate": 4.3626724819691326e-05, "loss": 0.0038, "step": 33820 }, { "grad_norm": 0.26173731684684753, "learning_rate": 4.359939274789715e-05, "loss": 0.0053, "step": 33830 }, { "grad_norm": 0.14960384368896484, "learning_rate": 4.357206262043848e-05, "loss": 0.0042, "step": 33840 }, { "grad_norm": 0.23865333199501038, "learning_rate": 4.354473444561745e-05, "loss": 0.0053, "step": 33850 }, { "grad_norm": 0.13766765594482422, "learning_rate": 4.3517408231735644e-05, "loss": 0.0035, "step": 33860 }, { "grad_norm": 0.1455104500055313, "learning_rate": 4.3490083987094086e-05, "loss": 0.0044, "step": 33870 }, { "grad_norm": 0.2013978511095047, "learning_rate": 4.34627617199931e-05, "loss": 0.0047, "step": 33880 }, { "grad_norm": 0.22904083132743835, "learning_rate": 4.3435441438732526e-05, "loss": 0.0048, "step": 33890 }, { "grad_norm": 0.15855951607227325, "learning_rate": 4.340812315161149e-05, "loss": 0.0058, "step": 33900 }, { "grad_norm": 0.14015594124794006, "learning_rate": 4.338080686692859e-05, "loss": 0.0035, "step": 33910 }, { "grad_norm": 0.14604465663433075, "learning_rate": 4.3353492592981816e-05, "loss": 0.0045, "step": 33920 }, { "grad_norm": 0.1883566826581955, "learning_rate": 4.3326180338068485e-05, "loss": 0.0061, "step": 33930 }, { "grad_norm": 0.12513059377670288, "learning_rate": 4.3298870110485356e-05, "loss": 0.004, "step": 33940 }, { "grad_norm": 0.17817538976669312, "learning_rate": 4.3271561918528567e-05, "loss": 0.0037, "step": 33950 }, { "grad_norm": 0.15345628559589386, "learning_rate": 4.324425577049359e-05, "loss": 0.0047, "step": 33960 }, { "grad_norm": 0.1819748431444168, "learning_rate": 4.321695167467535e-05, "loss": 0.0051, "step": 33970 }, { "grad_norm": 0.20217286050319672, "learning_rate": 4.3189649639368093e-05, "loss": 0.0061, "step": 33980 }, { "grad_norm": 0.18917502462863922, "learning_rate": 4.316234967286547e-05, "loss": 0.0045, "step": 33990 }, { "grad_norm": 0.19039934873580933, "learning_rate": 4.313505178346046e-05, "loss": 0.0058, "step": 34000 }, { "grad_norm": 0.18818224966526031, "learning_rate": 4.3107755979445465e-05, "loss": 0.0039, "step": 34010 }, { "grad_norm": 0.18497240543365479, "learning_rate": 4.308046226911224e-05, "loss": 0.0049, "step": 34020 }, { "grad_norm": 0.19601446390151978, "learning_rate": 4.305317066075185e-05, "loss": 0.007, "step": 34030 }, { "grad_norm": 0.19892337918281555, "learning_rate": 4.302588116265482e-05, "loss": 0.005, "step": 34040 }, { "grad_norm": 0.19490763545036316, "learning_rate": 4.299859378311094e-05, "loss": 0.0042, "step": 34050 }, { "grad_norm": 0.14752547442913055, "learning_rate": 4.2971308530409424e-05, "loss": 0.0069, "step": 34060 }, { "grad_norm": 0.18503214418888092, "learning_rate": 4.2944025412838765e-05, "loss": 0.0046, "step": 34070 }, { "grad_norm": 0.1781253069639206, "learning_rate": 4.291674443868689e-05, "loss": 0.0044, "step": 34080 }, { "grad_norm": 0.21360643208026886, "learning_rate": 4.288946561624104e-05, "loss": 0.0051, "step": 34090 }, { "grad_norm": 0.20713235437870026, "learning_rate": 4.2862188953787794e-05, "loss": 0.0039, "step": 34100 }, { "grad_norm": 0.2380281239748001, "learning_rate": 4.283491445961308e-05, "loss": 0.0053, "step": 34110 }, { "grad_norm": 0.1647782176733017, "learning_rate": 4.2807642142002155e-05, "loss": 0.0058, "step": 34120 }, { "grad_norm": 0.20737305283546448, "learning_rate": 4.278037200923966e-05, "loss": 0.0054, "step": 34130 }, { "grad_norm": 0.23220381140708923, "learning_rate": 4.275310406960953e-05, "loss": 0.0035, "step": 34140 }, { "grad_norm": 0.18818068504333496, "learning_rate": 4.272583833139502e-05, "loss": 0.0053, "step": 34150 }, { "grad_norm": 0.17668244242668152, "learning_rate": 4.2698574802878794e-05, "loss": 0.004, "step": 34160 }, { "grad_norm": 0.15770000219345093, "learning_rate": 4.2671313492342734e-05, "loss": 0.004, "step": 34170 }, { "grad_norm": 0.12426867336034775, "learning_rate": 4.264405440806813e-05, "loss": 0.0051, "step": 34180 }, { "grad_norm": 0.20602105557918549, "learning_rate": 4.26167975583356e-05, "loss": 0.0039, "step": 34190 }, { "grad_norm": 0.18737304210662842, "learning_rate": 4.2589542951425e-05, "loss": 0.0036, "step": 34200 }, { "grad_norm": 0.15843060612678528, "learning_rate": 4.2562290595615615e-05, "loss": 0.0032, "step": 34210 }, { "grad_norm": 0.14224781095981598, "learning_rate": 4.2535040499185946e-05, "loss": 0.0036, "step": 34220 }, { "grad_norm": 0.18161417543888092, "learning_rate": 4.250779267041387e-05, "loss": 0.0044, "step": 34230 }, { "grad_norm": 0.16561119258403778, "learning_rate": 4.248054711757657e-05, "loss": 0.0055, "step": 34240 }, { "grad_norm": 0.135898157954216, "learning_rate": 4.245330384895052e-05, "loss": 0.0044, "step": 34250 }, { "grad_norm": 0.1834179162979126, "learning_rate": 4.242606287281151e-05, "loss": 0.0049, "step": 34260 }, { "grad_norm": 0.18382470309734344, "learning_rate": 4.2398824197434595e-05, "loss": 0.0044, "step": 34270 }, { "grad_norm": 0.21192659437656403, "learning_rate": 4.23715878310942e-05, "loss": 0.0057, "step": 34280 }, { "grad_norm": 0.16714639961719513, "learning_rate": 4.234435378206402e-05, "loss": 0.0042, "step": 34290 }, { "grad_norm": 0.16182488203048706, "learning_rate": 4.2317122058617006e-05, "loss": 0.0034, "step": 34300 }, { "grad_norm": 0.16742929816246033, "learning_rate": 4.2289892669025485e-05, "loss": 0.0034, "step": 34310 }, { "grad_norm": 0.18080438673496246, "learning_rate": 4.226266562156097e-05, "loss": 0.0056, "step": 34320 }, { "grad_norm": 0.17973585426807404, "learning_rate": 4.223544092449435e-05, "loss": 0.0046, "step": 34330 }, { "grad_norm": 0.1538865566253662, "learning_rate": 4.2208218586095784e-05, "loss": 0.0034, "step": 34340 }, { "grad_norm": 0.1607210338115692, "learning_rate": 4.218099861463466e-05, "loss": 0.0046, "step": 34350 }, { "grad_norm": 0.17730094492435455, "learning_rate": 4.215378101837972e-05, "loss": 0.0047, "step": 34360 }, { "grad_norm": 0.1639341413974762, "learning_rate": 4.2126565805598937e-05, "loss": 0.0039, "step": 34370 }, { "grad_norm": 0.19921983778476715, "learning_rate": 4.209935298455957e-05, "loss": 0.0046, "step": 34380 }, { "grad_norm": 0.19436313211917877, "learning_rate": 4.207214256352817e-05, "loss": 0.0075, "step": 34390 }, { "grad_norm": 0.191510409116745, "learning_rate": 4.2044934550770524e-05, "loss": 0.0038, "step": 34400 }, { "grad_norm": 0.1624172478914261, "learning_rate": 4.201772895455174e-05, "loss": 0.0038, "step": 34410 }, { "grad_norm": 0.15403953194618225, "learning_rate": 4.199052578313613e-05, "loss": 0.0036, "step": 34420 }, { "grad_norm": 0.15793345868587494, "learning_rate": 4.1963325044787294e-05, "loss": 0.0042, "step": 34430 }, { "grad_norm": 0.13288035988807678, "learning_rate": 4.193612674776814e-05, "loss": 0.0036, "step": 34440 }, { "grad_norm": 0.20503857731819153, "learning_rate": 4.1908930900340745e-05, "loss": 0.0045, "step": 34450 }, { "grad_norm": 0.1907370239496231, "learning_rate": 4.1881737510766536e-05, "loss": 0.0041, "step": 34460 }, { "grad_norm": 0.2307111769914627, "learning_rate": 4.185454658730609e-05, "loss": 0.0038, "step": 34470 }, { "grad_norm": 0.22476862370967865, "learning_rate": 4.1827358138219355e-05, "loss": 0.0047, "step": 34480 }, { "grad_norm": 0.22284075617790222, "learning_rate": 4.1800172171765404e-05, "loss": 0.0038, "step": 34490 }, { "grad_norm": 0.14311498403549194, "learning_rate": 4.177298869620264e-05, "loss": 0.0041, "step": 34500 }, { "grad_norm": 0.1221272423863411, "learning_rate": 4.1745807719788705e-05, "loss": 0.0045, "step": 34510 }, { "grad_norm": 0.16835187375545502, "learning_rate": 4.1718629250780445e-05, "loss": 0.0052, "step": 34520 }, { "grad_norm": 0.23141171038150787, "learning_rate": 4.1691453297433956e-05, "loss": 0.0048, "step": 34530 }, { "grad_norm": 0.13610680401325226, "learning_rate": 4.166427986800457e-05, "loss": 0.0041, "step": 34540 }, { "grad_norm": 0.16812001168727875, "learning_rate": 4.163710897074688e-05, "loss": 0.0044, "step": 34550 }, { "grad_norm": 0.26125040650367737, "learning_rate": 4.1609940613914686e-05, "loss": 0.0041, "step": 34560 }, { "grad_norm": 0.23603585362434387, "learning_rate": 4.1582774805760996e-05, "loss": 0.0047, "step": 34570 }, { "grad_norm": 0.18174543976783752, "learning_rate": 4.155561155453809e-05, "loss": 0.0062, "step": 34580 }, { "grad_norm": 0.1725202053785324, "learning_rate": 4.15284508684974e-05, "loss": 0.004, "step": 34590 }, { "grad_norm": 0.22396813333034515, "learning_rate": 4.1501292755889675e-05, "loss": 0.0037, "step": 34600 }, { "grad_norm": 0.14621104300022125, "learning_rate": 4.1474137224964833e-05, "loss": 0.0052, "step": 34610 }, { "grad_norm": 0.21945036947727203, "learning_rate": 4.144698428397197e-05, "loss": 0.0045, "step": 34620 }, { "grad_norm": 0.19067789614200592, "learning_rate": 4.1419833941159466e-05, "loss": 0.0061, "step": 34630 }, { "grad_norm": 0.17680232226848602, "learning_rate": 4.1392686204774846e-05, "loss": 0.0058, "step": 34640 }, { "grad_norm": 0.17204707860946655, "learning_rate": 4.13655410830649e-05, "loss": 0.0073, "step": 34650 }, { "grad_norm": 0.16198855638504028, "learning_rate": 4.1338398584275594e-05, "loss": 0.004, "step": 34660 }, { "grad_norm": 0.1765734851360321, "learning_rate": 4.1311258716652104e-05, "loss": 0.004, "step": 34670 }, { "grad_norm": 0.18409477174282074, "learning_rate": 4.128412148843881e-05, "loss": 0.0056, "step": 34680 }, { "grad_norm": 0.16727179288864136, "learning_rate": 4.125698690787926e-05, "loss": 0.0045, "step": 34690 }, { "grad_norm": 0.20261996984481812, "learning_rate": 4.1229854983216245e-05, "loss": 0.0055, "step": 34700 }, { "grad_norm": 0.15564371645450592, "learning_rate": 4.120272572269175e-05, "loss": 0.005, "step": 34710 }, { "grad_norm": 0.21736681461334229, "learning_rate": 4.117559913454687e-05, "loss": 0.0044, "step": 34720 }, { "grad_norm": 0.1425514668226242, "learning_rate": 4.114847522702201e-05, "loss": 0.0038, "step": 34730 }, { "grad_norm": 0.17848694324493408, "learning_rate": 4.112135400835664e-05, "loss": 0.0058, "step": 34740 }, { "grad_norm": 0.14816541969776154, "learning_rate": 4.109423548678949e-05, "loss": 0.0049, "step": 34750 }, { "grad_norm": 0.16022250056266785, "learning_rate": 4.106711967055848e-05, "loss": 0.0038, "step": 34760 }, { "grad_norm": 0.20128020644187927, "learning_rate": 4.1040006567900636e-05, "loss": 0.004, "step": 34770 }, { "grad_norm": 0.1692381352186203, "learning_rate": 4.101289618705224e-05, "loss": 0.0034, "step": 34780 }, { "grad_norm": 0.1550602912902832, "learning_rate": 4.0985788536248675e-05, "loss": 0.0037, "step": 34790 }, { "grad_norm": 0.15244947373867035, "learning_rate": 4.095868362372454e-05, "loss": 0.0038, "step": 34800 }, { "grad_norm": 0.1967269331216812, "learning_rate": 4.0931581457713614e-05, "loss": 0.0046, "step": 34810 }, { "grad_norm": 0.17493416368961334, "learning_rate": 4.09044820464488e-05, "loss": 0.0037, "step": 34820 }, { "grad_norm": 0.17064078152179718, "learning_rate": 4.087738539816219e-05, "loss": 0.0051, "step": 34830 }, { "grad_norm": 0.1470010131597519, "learning_rate": 4.085029152108501e-05, "loss": 0.0039, "step": 34840 }, { "grad_norm": 0.19303803145885468, "learning_rate": 4.0823200423447714e-05, "loss": 0.0039, "step": 34850 }, { "grad_norm": 0.21545466780662537, "learning_rate": 4.079611211347981e-05, "loss": 0.0038, "step": 34860 }, { "grad_norm": 0.1646214872598648, "learning_rate": 4.076902659941002e-05, "loss": 0.0059, "step": 34870 }, { "grad_norm": 0.1329984962940216, "learning_rate": 4.074194388946624e-05, "loss": 0.0042, "step": 34880 }, { "grad_norm": 0.19727373123168945, "learning_rate": 4.071486399187545e-05, "loss": 0.0049, "step": 34890 }, { "grad_norm": 0.16062751412391663, "learning_rate": 4.0687786914863836e-05, "loss": 0.0038, "step": 34900 }, { "grad_norm": 0.17265285551548004, "learning_rate": 4.0660712666656666e-05, "loss": 0.0033, "step": 34910 }, { "grad_norm": 0.16694913804531097, "learning_rate": 4.0633641255478394e-05, "loss": 0.0034, "step": 34920 }, { "grad_norm": 0.19818897545337677, "learning_rate": 4.0606572689552624e-05, "loss": 0.004, "step": 34930 }, { "grad_norm": 0.18170298635959625, "learning_rate": 4.0579506977102036e-05, "loss": 0.0056, "step": 34940 }, { "grad_norm": 0.16174526512622833, "learning_rate": 4.055244412634849e-05, "loss": 0.0034, "step": 34950 }, { "grad_norm": 0.1887652575969696, "learning_rate": 4.052538414551298e-05, "loss": 0.0038, "step": 34960 }, { "grad_norm": 0.1330665647983551, "learning_rate": 4.0498327042815596e-05, "loss": 0.0034, "step": 34970 }, { "grad_norm": 0.16771569848060608, "learning_rate": 4.047127282647559e-05, "loss": 0.0059, "step": 34980 }, { "grad_norm": 0.14479343593120575, "learning_rate": 4.04442215047113e-05, "loss": 0.0048, "step": 34990 }, { "grad_norm": 0.18350155651569366, "learning_rate": 4.041717308574023e-05, "loss": 0.0041, "step": 35000 }, { "grad_norm": 0.19537577033042908, "learning_rate": 4.039012757777893e-05, "loss": 0.0088, "step": 35010 }, { "grad_norm": 0.16308967769145966, "learning_rate": 4.036308498904314e-05, "loss": 0.004, "step": 35020 }, { "grad_norm": 0.21453475952148438, "learning_rate": 4.033604532774771e-05, "loss": 0.005, "step": 35030 }, { "grad_norm": 0.1916845738887787, "learning_rate": 4.030900860210652e-05, "loss": 0.0049, "step": 35040 }, { "grad_norm": 0.26032671332359314, "learning_rate": 4.028197482033266e-05, "loss": 0.004, "step": 35050 }, { "grad_norm": 0.1645016223192215, "learning_rate": 4.0254943990638246e-05, "loss": 0.0039, "step": 35060 }, { "grad_norm": 0.17950184643268585, "learning_rate": 4.022791612123454e-05, "loss": 0.0048, "step": 35070 }, { "grad_norm": 0.18055547773838043, "learning_rate": 4.020089122033192e-05, "loss": 0.0071, "step": 35080 }, { "grad_norm": 0.1926897168159485, "learning_rate": 4.01738692961398e-05, "loss": 0.0046, "step": 35090 }, { "grad_norm": 0.19580279290676117, "learning_rate": 4.014685035686675e-05, "loss": 0.0043, "step": 35100 }, { "grad_norm": 0.23293651640415192, "learning_rate": 4.011983441072039e-05, "loss": 0.0067, "step": 35110 }, { "grad_norm": 0.2102246731519699, "learning_rate": 4.0092821465907485e-05, "loss": 0.005, "step": 35120 }, { "grad_norm": 0.14274299144744873, "learning_rate": 4.006581153063383e-05, "loss": 0.0044, "step": 35130 }, { "grad_norm": 0.17626157402992249, "learning_rate": 4.003880461310432e-05, "loss": 0.0037, "step": 35140 }, { "grad_norm": 0.1781103014945984, "learning_rate": 4.001180072152298e-05, "loss": 0.0038, "step": 35150 }, { "grad_norm": 0.169856995344162, "learning_rate": 3.998479986409285e-05, "loss": 0.0041, "step": 35160 }, { "grad_norm": 0.19370587170124054, "learning_rate": 3.995780204901607e-05, "loss": 0.0038, "step": 35170 }, { "grad_norm": 0.182124063372612, "learning_rate": 3.993080728449391e-05, "loss": 0.0054, "step": 35180 }, { "grad_norm": 0.14766483008861542, "learning_rate": 3.990381557872661e-05, "loss": 0.0057, "step": 35190 }, { "grad_norm": 0.2013283669948578, "learning_rate": 3.987682693991359e-05, "loss": 0.0069, "step": 35200 }, { "grad_norm": 0.20138674974441528, "learning_rate": 3.9849841376253226e-05, "loss": 0.0042, "step": 35210 }, { "grad_norm": 0.15418125689029694, "learning_rate": 3.982285889594306e-05, "loss": 0.0044, "step": 35220 }, { "grad_norm": 0.13768728077411652, "learning_rate": 3.9795879507179665e-05, "loss": 0.0047, "step": 35230 }, { "grad_norm": 0.18905511498451233, "learning_rate": 3.9768903218158634e-05, "loss": 0.0043, "step": 35240 }, { "grad_norm": 0.14632026851177216, "learning_rate": 3.974193003707468e-05, "loss": 0.0048, "step": 35250 }, { "grad_norm": 0.14236973226070404, "learning_rate": 3.971495997212152e-05, "loss": 0.005, "step": 35260 }, { "grad_norm": 0.15416570007801056, "learning_rate": 3.9687993031491985e-05, "loss": 0.0041, "step": 35270 }, { "grad_norm": 0.13119575381278992, "learning_rate": 3.966102922337787e-05, "loss": 0.0028, "step": 35280 }, { "grad_norm": 0.20743732154369354, "learning_rate": 3.963406855597009e-05, "loss": 0.0046, "step": 35290 }, { "grad_norm": 0.1820625215768814, "learning_rate": 3.960711103745861e-05, "loss": 0.0045, "step": 35300 }, { "grad_norm": 0.21300853788852692, "learning_rate": 3.958015667603237e-05, "loss": 0.004, "step": 35310 }, { "grad_norm": 0.1771298050880432, "learning_rate": 3.955320547987943e-05, "loss": 0.0046, "step": 35320 }, { "grad_norm": 0.2095687985420227, "learning_rate": 3.952625745718681e-05, "loss": 0.005, "step": 35330 }, { "grad_norm": 0.16684526205062866, "learning_rate": 3.949931261614064e-05, "loss": 0.0046, "step": 35340 }, { "grad_norm": 0.19215929508209229, "learning_rate": 3.947237096492605e-05, "loss": 0.0053, "step": 35350 }, { "grad_norm": 0.1813025027513504, "learning_rate": 3.944543251172719e-05, "loss": 0.0038, "step": 35360 }, { "grad_norm": 0.18504272401332855, "learning_rate": 3.941849726472725e-05, "loss": 0.007, "step": 35370 }, { "grad_norm": 0.12724550068378448, "learning_rate": 3.939156523210846e-05, "loss": 0.0033, "step": 35380 }, { "grad_norm": 0.142240509390831, "learning_rate": 3.9364636422052046e-05, "loss": 0.0049, "step": 35390 }, { "grad_norm": 0.21093280613422394, "learning_rate": 3.933771084273828e-05, "loss": 0.0046, "step": 35400 }, { "grad_norm": 0.17055660486221313, "learning_rate": 3.931078850234643e-05, "loss": 0.0038, "step": 35410 }, { "grad_norm": 0.18784572184085846, "learning_rate": 3.928386940905483e-05, "loss": 0.0054, "step": 35420 }, { "grad_norm": 0.2002873420715332, "learning_rate": 3.925695357104073e-05, "loss": 0.0048, "step": 35430 }, { "grad_norm": 0.21293509006500244, "learning_rate": 3.923004099648049e-05, "loss": 0.0068, "step": 35440 }, { "grad_norm": 0.22828805446624756, "learning_rate": 3.920313169354944e-05, "loss": 0.0051, "step": 35450 }, { "grad_norm": 0.26494100689888, "learning_rate": 3.9176225670421897e-05, "loss": 0.0059, "step": 35460 }, { "grad_norm": 0.18764545023441315, "learning_rate": 3.9149322935271224e-05, "loss": 0.0051, "step": 35470 }, { "grad_norm": 0.1537577360868454, "learning_rate": 3.9122423496269725e-05, "loss": 0.0049, "step": 35480 }, { "grad_norm": 0.13006477057933807, "learning_rate": 3.909552736158877e-05, "loss": 0.0028, "step": 35490 }, { "grad_norm": 0.13274535536766052, "learning_rate": 3.90686345393987e-05, "loss": 0.0053, "step": 35500 }, { "grad_norm": 0.20071713626384735, "learning_rate": 3.9041745037868816e-05, "loss": 0.0066, "step": 35510 }, { "grad_norm": 0.15042737126350403, "learning_rate": 3.9014858865167465e-05, "loss": 0.005, "step": 35520 }, { "grad_norm": 0.18074959516525269, "learning_rate": 3.8987976029461935e-05, "loss": 0.0046, "step": 35530 }, { "grad_norm": 0.1836220771074295, "learning_rate": 3.896109653891853e-05, "loss": 0.0047, "step": 35540 }, { "grad_norm": 0.18061544001102448, "learning_rate": 3.893422040170254e-05, "loss": 0.0039, "step": 35550 }, { "grad_norm": 0.12281706929206848, "learning_rate": 3.8907347625978207e-05, "loss": 0.0046, "step": 35560 }, { "grad_norm": 0.17047938704490662, "learning_rate": 3.88804782199088e-05, "loss": 0.0043, "step": 35570 }, { "grad_norm": 0.1955205798149109, "learning_rate": 3.8853612191656495e-05, "loss": 0.0036, "step": 35580 }, { "grad_norm": 0.2124948799610138, "learning_rate": 3.88267495493825e-05, "loss": 0.0043, "step": 35590 }, { "grad_norm": 0.22286926209926605, "learning_rate": 3.8799890301247004e-05, "loss": 0.0044, "step": 35600 }, { "grad_norm": 0.17063790559768677, "learning_rate": 3.8773034455409096e-05, "loss": 0.0049, "step": 35610 }, { "grad_norm": 0.18034812808036804, "learning_rate": 3.8746182020026904e-05, "loss": 0.0055, "step": 35620 }, { "grad_norm": 0.23504638671875, "learning_rate": 3.871933300325745e-05, "loss": 0.0062, "step": 35630 }, { "grad_norm": 0.2064599096775055, "learning_rate": 3.869248741325679e-05, "loss": 0.0059, "step": 35640 }, { "grad_norm": 0.17354246973991394, "learning_rate": 3.866564525817992e-05, "loss": 0.0042, "step": 35650 }, { "grad_norm": 0.19360032677650452, "learning_rate": 3.8638806546180725e-05, "loss": 0.0047, "step": 35660 }, { "grad_norm": 0.1580348163843155, "learning_rate": 3.861197128541213e-05, "loss": 0.0045, "step": 35670 }, { "grad_norm": 0.14937378466129303, "learning_rate": 3.858513948402599e-05, "loss": 0.0046, "step": 35680 }, { "grad_norm": 0.1953752487897873, "learning_rate": 3.8558311150173077e-05, "loss": 0.0063, "step": 35690 }, { "grad_norm": 0.1868027001619339, "learning_rate": 3.853148629200312e-05, "loss": 0.0046, "step": 35700 }, { "grad_norm": 0.16624651849269867, "learning_rate": 3.850466491766482e-05, "loss": 0.0039, "step": 35710 }, { "grad_norm": 0.15174205601215363, "learning_rate": 3.847784703530583e-05, "loss": 0.0071, "step": 35720 }, { "grad_norm": 0.1895485520362854, "learning_rate": 3.845103265307266e-05, "loss": 0.0051, "step": 35730 }, { "grad_norm": 0.24633023142814636, "learning_rate": 3.842422177911086e-05, "loss": 0.004, "step": 35740 }, { "grad_norm": 0.1753271222114563, "learning_rate": 3.8397414421564826e-05, "loss": 0.0043, "step": 35750 }, { "grad_norm": 0.16444982588291168, "learning_rate": 3.8370610588577935e-05, "loss": 0.0039, "step": 35760 }, { "grad_norm": 0.19640284776687622, "learning_rate": 3.834381028829251e-05, "loss": 0.0053, "step": 35770 }, { "grad_norm": 0.1419774740934372, "learning_rate": 3.8317013528849745e-05, "loss": 0.003, "step": 35780 }, { "grad_norm": 0.15221914649009705, "learning_rate": 3.8290220318389815e-05, "loss": 0.0036, "step": 35790 }, { "grad_norm": 0.17576512694358826, "learning_rate": 3.8263430665051746e-05, "loss": 0.0053, "step": 35800 }, { "grad_norm": 0.18683822453022003, "learning_rate": 3.8236644576973554e-05, "loss": 0.006, "step": 35810 }, { "grad_norm": 0.20936179161071777, "learning_rate": 3.820986206229217e-05, "loss": 0.0037, "step": 35820 }, { "grad_norm": 0.12605148553848267, "learning_rate": 3.8183083129143384e-05, "loss": 0.0039, "step": 35830 }, { "grad_norm": 0.1542615443468094, "learning_rate": 3.815630778566193e-05, "loss": 0.0043, "step": 35840 }, { "grad_norm": 0.14638002216815948, "learning_rate": 3.812953603998145e-05, "loss": 0.004, "step": 35850 }, { "grad_norm": 0.12962792813777924, "learning_rate": 3.8102767900234504e-05, "loss": 0.0032, "step": 35860 }, { "grad_norm": 0.178972065448761, "learning_rate": 3.807600337455256e-05, "loss": 0.0064, "step": 35870 }, { "grad_norm": 0.14596381783485413, "learning_rate": 3.804924247106593e-05, "loss": 0.0034, "step": 35880 }, { "grad_norm": 0.16244550049304962, "learning_rate": 3.8022485197903925e-05, "loss": 0.0038, "step": 35890 }, { "grad_norm": 0.2238420695066452, "learning_rate": 3.799573156319464e-05, "loss": 0.0033, "step": 35900 }, { "grad_norm": 0.16197574138641357, "learning_rate": 3.796898157506515e-05, "loss": 0.0055, "step": 35910 }, { "grad_norm": 0.16722843050956726, "learning_rate": 3.794223524164143e-05, "loss": 0.0032, "step": 35920 }, { "grad_norm": 0.14297403395175934, "learning_rate": 3.7915492571048245e-05, "loss": 0.004, "step": 35930 }, { "grad_norm": 0.16478975117206573, "learning_rate": 3.788875357140937e-05, "loss": 0.0033, "step": 35940 }, { "grad_norm": 0.101627878844738, "learning_rate": 3.786201825084736e-05, "loss": 0.0038, "step": 35950 }, { "grad_norm": 0.14181600511074066, "learning_rate": 3.783528661748372e-05, "loss": 0.0031, "step": 35960 }, { "grad_norm": 0.2099507600069046, "learning_rate": 3.780855867943882e-05, "loss": 0.0045, "step": 35970 }, { "grad_norm": 0.16354510188102722, "learning_rate": 3.778183444483189e-05, "loss": 0.0048, "step": 35980 }, { "grad_norm": 0.20852220058441162, "learning_rate": 3.775511392178108e-05, "loss": 0.0047, "step": 35990 }, { "grad_norm": 0.16216205060482025, "learning_rate": 3.772839711840332e-05, "loss": 0.0057, "step": 36000 }, { "grad_norm": 0.14635902643203735, "learning_rate": 3.7701684042814515e-05, "loss": 0.0043, "step": 36010 }, { "grad_norm": 0.15734851360321045, "learning_rate": 3.76749747031294e-05, "loss": 0.0035, "step": 36020 }, { "grad_norm": 0.19009758532047272, "learning_rate": 3.764826910746152e-05, "loss": 0.0062, "step": 36030 }, { "grad_norm": 0.16363267600536346, "learning_rate": 3.762156726392338e-05, "loss": 0.005, "step": 36040 }, { "grad_norm": 0.17221324145793915, "learning_rate": 3.759486918062625e-05, "loss": 0.0038, "step": 36050 }, { "grad_norm": 0.17455646395683289, "learning_rate": 3.756817486568033e-05, "loss": 0.0054, "step": 36060 }, { "grad_norm": 0.1589495986700058, "learning_rate": 3.7541484327194654e-05, "loss": 0.0045, "step": 36070 }, { "grad_norm": 0.1859559565782547, "learning_rate": 3.751479757327707e-05, "loss": 0.0045, "step": 36080 }, { "grad_norm": 0.16552455723285675, "learning_rate": 3.7488114612034345e-05, "loss": 0.0046, "step": 36090 }, { "grad_norm": 0.17324581742286682, "learning_rate": 3.7461435451572044e-05, "loss": 0.0058, "step": 36100 }, { "grad_norm": 0.1281116008758545, "learning_rate": 3.743476009999459e-05, "loss": 0.0044, "step": 36110 }, { "grad_norm": 0.2255716323852539, "learning_rate": 3.7408088565405245e-05, "loss": 0.0046, "step": 36120 }, { "grad_norm": 0.18869923055171967, "learning_rate": 3.738142085590612e-05, "loss": 0.0039, "step": 36130 }, { "grad_norm": 0.19128362834453583, "learning_rate": 3.7354756979598194e-05, "loss": 0.0035, "step": 36140 }, { "grad_norm": 0.23120026290416718, "learning_rate": 3.7328096944581187e-05, "loss": 0.004, "step": 36150 }, { "grad_norm": 0.23744067549705505, "learning_rate": 3.730144075895377e-05, "loss": 0.0033, "step": 36160 }, { "grad_norm": 0.2170422524213791, "learning_rate": 3.727478843081335e-05, "loss": 0.0049, "step": 36170 }, { "grad_norm": 0.17735883593559265, "learning_rate": 3.72481399682562e-05, "loss": 0.0037, "step": 36180 }, { "grad_norm": 0.20702946186065674, "learning_rate": 3.722149537937747e-05, "loss": 0.0042, "step": 36190 }, { "grad_norm": 0.17318615317344666, "learning_rate": 3.7194854672271015e-05, "loss": 0.0037, "step": 36200 }, { "grad_norm": 0.12289384007453918, "learning_rate": 3.7168217855029644e-05, "loss": 0.0033, "step": 36210 }, { "grad_norm": 0.13628794252872467, "learning_rate": 3.7141584935744856e-05, "loss": 0.0035, "step": 36220 }, { "grad_norm": 0.1993735432624817, "learning_rate": 3.7114955922507055e-05, "loss": 0.0056, "step": 36230 }, { "grad_norm": 0.24133211374282837, "learning_rate": 3.708833082340545e-05, "loss": 0.0045, "step": 36240 }, { "grad_norm": 0.1532207429409027, "learning_rate": 3.7061709646528034e-05, "loss": 0.0041, "step": 36250 }, { "grad_norm": 0.19150801002979279, "learning_rate": 3.7035092399961604e-05, "loss": 0.0041, "step": 36260 }, { "grad_norm": 0.1891794502735138, "learning_rate": 3.700847909179177e-05, "loss": 0.004, "step": 36270 }, { "grad_norm": 0.20939947664737701, "learning_rate": 3.698186973010297e-05, "loss": 0.0044, "step": 36280 }, { "grad_norm": 0.14455635845661163, "learning_rate": 3.695526432297844e-05, "loss": 0.0037, "step": 36290 }, { "grad_norm": 0.1436607986688614, "learning_rate": 3.692866287850017e-05, "loss": 0.0036, "step": 36300 }, { "grad_norm": 0.1685621589422226, "learning_rate": 3.6902065404749006e-05, "loss": 0.0036, "step": 36310 }, { "grad_norm": 0.1561403125524521, "learning_rate": 3.6875471909804516e-05, "loss": 0.0033, "step": 36320 }, { "grad_norm": 0.1796732246875763, "learning_rate": 3.6848882401745135e-05, "loss": 0.0046, "step": 36330 }, { "grad_norm": 0.18659226596355438, "learning_rate": 3.682229688864806e-05, "loss": 0.0048, "step": 36340 }, { "grad_norm": 0.20979976654052734, "learning_rate": 3.6795715378589235e-05, "loss": 0.0042, "step": 36350 }, { "grad_norm": 0.18520836532115936, "learning_rate": 3.676913787964345e-05, "loss": 0.0032, "step": 36360 }, { "grad_norm": 0.15382996201515198, "learning_rate": 3.674256439988423e-05, "loss": 0.0045, "step": 36370 }, { "grad_norm": 0.18960382044315338, "learning_rate": 3.6715994947383904e-05, "loss": 0.0052, "step": 36380 }, { "grad_norm": 0.20157665014266968, "learning_rate": 3.668942953021357e-05, "loss": 0.0054, "step": 36390 }, { "grad_norm": 0.1395639330148697, "learning_rate": 3.66628681564431e-05, "loss": 0.0037, "step": 36400 }, { "grad_norm": 0.16661204397678375, "learning_rate": 3.663631083414114e-05, "loss": 0.004, "step": 36410 }, { "grad_norm": 0.18493399024009705, "learning_rate": 3.660975757137509e-05, "loss": 0.0037, "step": 36420 }, { "grad_norm": 0.15603278577327728, "learning_rate": 3.658320837621114e-05, "loss": 0.005, "step": 36430 }, { "grad_norm": 0.19166652858257294, "learning_rate": 3.655666325671426e-05, "loss": 0.0044, "step": 36440 }, { "grad_norm": 0.19289395213127136, "learning_rate": 3.65301222209481e-05, "loss": 0.004, "step": 36450 }, { "grad_norm": 0.13206928968429565, "learning_rate": 3.650358527697519e-05, "loss": 0.0035, "step": 36460 }, { "grad_norm": 0.15644435584545135, "learning_rate": 3.64770524328567e-05, "loss": 0.0038, "step": 36470 }, { "grad_norm": 0.12828420102596283, "learning_rate": 3.645052369665265e-05, "loss": 0.0036, "step": 36480 }, { "grad_norm": 0.2099485844373703, "learning_rate": 3.6423999076421724e-05, "loss": 0.0042, "step": 36490 }, { "grad_norm": 0.18850077688694, "learning_rate": 3.639747858022142e-05, "loss": 0.0036, "step": 36500 }, { "grad_norm": 0.1435929387807846, "learning_rate": 3.637096221610799e-05, "loss": 0.0029, "step": 36510 }, { "grad_norm": 0.13435561954975128, "learning_rate": 3.634444999213638e-05, "loss": 0.0029, "step": 36520 }, { "grad_norm": 0.16715021431446075, "learning_rate": 3.6317941916360296e-05, "loss": 0.003, "step": 36530 }, { "grad_norm": 0.18665917217731476, "learning_rate": 3.629143799683221e-05, "loss": 0.0042, "step": 36540 }, { "grad_norm": 0.21228618919849396, "learning_rate": 3.626493824160331e-05, "loss": 0.0034, "step": 36550 }, { "grad_norm": 0.1824057251214981, "learning_rate": 3.623844265872352e-05, "loss": 0.003, "step": 36560 }, { "grad_norm": 0.1753162145614624, "learning_rate": 3.621195125624149e-05, "loss": 0.0033, "step": 36570 }, { "grad_norm": 0.22050733864307404, "learning_rate": 3.618546404220463e-05, "loss": 0.0057, "step": 36580 }, { "grad_norm": 0.2052624672651291, "learning_rate": 3.615898102465903e-05, "loss": 0.0055, "step": 36590 }, { "grad_norm": 0.16407272219657898, "learning_rate": 3.6132502211649544e-05, "loss": 0.0047, "step": 36600 }, { "grad_norm": 0.20948605239391327, "learning_rate": 3.610602761121975e-05, "loss": 0.0055, "step": 36610 }, { "grad_norm": 0.16353218257427216, "learning_rate": 3.6079557231411897e-05, "loss": 0.0054, "step": 36620 }, { "grad_norm": 0.14929567277431488, "learning_rate": 3.6053091080267035e-05, "loss": 0.0034, "step": 36630 }, { "grad_norm": 0.18421703577041626, "learning_rate": 3.602662916582483e-05, "loss": 0.0036, "step": 36640 }, { "grad_norm": 0.16220426559448242, "learning_rate": 3.600017149612375e-05, "loss": 0.0059, "step": 36650 }, { "grad_norm": 0.16695435345172882, "learning_rate": 3.5973718079200935e-05, "loss": 0.0032, "step": 36660 }, { "grad_norm": 0.21801723539829254, "learning_rate": 3.5947268923092216e-05, "loss": 0.0035, "step": 36670 }, { "grad_norm": 0.14759376645088196, "learning_rate": 3.592082403583216e-05, "loss": 0.0041, "step": 36680 }, { "grad_norm": 0.21544069051742554, "learning_rate": 3.5894383425454004e-05, "loss": 0.0056, "step": 36690 }, { "grad_norm": 0.12972599267959595, "learning_rate": 3.586794709998975e-05, "loss": 0.0044, "step": 36700 }, { "grad_norm": 0.13849015533924103, "learning_rate": 3.584151506747002e-05, "loss": 0.0039, "step": 36710 }, { "grad_norm": 0.17557428777217865, "learning_rate": 3.581508733592418e-05, "loss": 0.0045, "step": 36720 }, { "grad_norm": 0.17779088020324707, "learning_rate": 3.5788663913380297e-05, "loss": 0.0035, "step": 36730 }, { "grad_norm": 0.16489960253238678, "learning_rate": 3.576224480786506e-05, "loss": 0.0036, "step": 36740 }, { "grad_norm": 0.13422001898288727, "learning_rate": 3.573583002740393e-05, "loss": 0.0059, "step": 36750 }, { "grad_norm": 0.23320846259593964, "learning_rate": 3.570941958002103e-05, "loss": 0.0053, "step": 36760 }, { "grad_norm": 0.153609961271286, "learning_rate": 3.568301347373912e-05, "loss": 0.0033, "step": 36770 }, { "grad_norm": 0.14701387286186218, "learning_rate": 3.5656611716579726e-05, "loss": 0.0033, "step": 36780 }, { "grad_norm": 0.19461458921432495, "learning_rate": 3.5630214316562946e-05, "loss": 0.0046, "step": 36790 }, { "grad_norm": 0.17379187047481537, "learning_rate": 3.560382128170766e-05, "loss": 0.0048, "step": 36800 }, { "grad_norm": 0.21024708449840546, "learning_rate": 3.5577432620031374e-05, "loss": 0.0036, "step": 36810 }, { "grad_norm": 0.20913304388523102, "learning_rate": 3.5551048339550216e-05, "loss": 0.0045, "step": 36820 }, { "grad_norm": 0.19161812961101532, "learning_rate": 3.55246684482791e-05, "loss": 0.0055, "step": 36830 }, { "grad_norm": 0.17264392971992493, "learning_rate": 3.5498292954231496e-05, "loss": 0.0042, "step": 36840 }, { "grad_norm": 0.16684402525424957, "learning_rate": 3.54719218654196e-05, "loss": 0.0047, "step": 36850 }, { "grad_norm": 0.16446812450885773, "learning_rate": 3.544555518985425e-05, "loss": 0.0046, "step": 36860 }, { "grad_norm": 0.12240175902843475, "learning_rate": 3.541919293554494e-05, "loss": 0.0042, "step": 36870 }, { "grad_norm": 0.15868568420410156, "learning_rate": 3.539283511049985e-05, "loss": 0.0041, "step": 36880 }, { "grad_norm": 0.25886955857276917, "learning_rate": 3.5366481722725755e-05, "loss": 0.0032, "step": 36890 }, { "grad_norm": 0.12546958029270172, "learning_rate": 3.534013278022816e-05, "loss": 0.0035, "step": 36900 }, { "grad_norm": 0.15886810421943665, "learning_rate": 3.531378829101113e-05, "loss": 0.0065, "step": 36910 }, { "grad_norm": 0.1262883096933365, "learning_rate": 3.528744826307746e-05, "loss": 0.0037, "step": 36920 }, { "grad_norm": 0.13824865221977234, "learning_rate": 3.5261112704428554e-05, "loss": 0.0031, "step": 36930 }, { "grad_norm": 0.17388708889484406, "learning_rate": 3.523478162306443e-05, "loss": 0.0045, "step": 36940 }, { "grad_norm": 0.17003922164440155, "learning_rate": 3.520845502698381e-05, "loss": 0.0066, "step": 36950 }, { "grad_norm": 0.18128372728824615, "learning_rate": 3.5182132924184005e-05, "loss": 0.0062, "step": 36960 }, { "grad_norm": 0.1506938487291336, "learning_rate": 3.5155815322660966e-05, "loss": 0.0044, "step": 36970 }, { "grad_norm": 0.10939103364944458, "learning_rate": 3.512950223040931e-05, "loss": 0.0039, "step": 36980 }, { "grad_norm": 0.13286690413951874, "learning_rate": 3.5103193655422216e-05, "loss": 0.0035, "step": 36990 }, { "grad_norm": 0.18425068259239197, "learning_rate": 3.5076889605691596e-05, "loss": 0.0052, "step": 37000 }, { "grad_norm": 0.2002500742673874, "learning_rate": 3.505059008920787e-05, "loss": 0.0043, "step": 37010 }, { "grad_norm": 0.17909139394760132, "learning_rate": 3.502429511396016e-05, "loss": 0.0033, "step": 37020 }, { "grad_norm": 0.13977700471878052, "learning_rate": 3.4998004687936196e-05, "loss": 0.0039, "step": 37030 }, { "grad_norm": 0.16155658662319183, "learning_rate": 3.497171881912229e-05, "loss": 0.0059, "step": 37040 }, { "grad_norm": 0.1531539261341095, "learning_rate": 3.494543751550342e-05, "loss": 0.0045, "step": 37050 }, { "grad_norm": 0.16485808789730072, "learning_rate": 3.491916078506313e-05, "loss": 0.0058, "step": 37060 }, { "grad_norm": 0.17877094447612762, "learning_rate": 3.489288863578361e-05, "loss": 0.0037, "step": 37070 }, { "grad_norm": 0.18021926283836365, "learning_rate": 3.4866621075645646e-05, "loss": 0.0037, "step": 37080 }, { "grad_norm": 0.21794147789478302, "learning_rate": 3.4840358112628614e-05, "loss": 0.0043, "step": 37090 }, { "grad_norm": 0.15519224107265472, "learning_rate": 3.481409975471053e-05, "loss": 0.0037, "step": 37100 }, { "grad_norm": 0.17899326980113983, "learning_rate": 3.4787846009867986e-05, "loss": 0.0037, "step": 37110 }, { "grad_norm": 0.17512184381484985, "learning_rate": 3.476159688607615e-05, "loss": 0.0032, "step": 37120 }, { "grad_norm": 0.12973667681217194, "learning_rate": 3.4735352391308854e-05, "loss": 0.0034, "step": 37130 }, { "grad_norm": 0.14092187583446503, "learning_rate": 3.4709112533538446e-05, "loss": 0.0032, "step": 37140 }, { "grad_norm": 0.1747317612171173, "learning_rate": 3.4682877320735934e-05, "loss": 0.0042, "step": 37150 }, { "grad_norm": 0.11954566836357117, "learning_rate": 3.465664676087085e-05, "loss": 0.0037, "step": 37160 }, { "grad_norm": 0.18138955533504486, "learning_rate": 3.463042086191136e-05, "loss": 0.0049, "step": 37170 }, { "grad_norm": 0.1978742778301239, "learning_rate": 3.460419963182423e-05, "loss": 0.0055, "step": 37180 }, { "grad_norm": 0.18544071912765503, "learning_rate": 3.457798307857473e-05, "loss": 0.006, "step": 37190 }, { "grad_norm": 0.17738130688667297, "learning_rate": 3.455177121012678e-05, "loss": 0.0042, "step": 37200 }, { "grad_norm": 0.1703493446111679, "learning_rate": 3.452556403444285e-05, "loss": 0.0062, "step": 37210 }, { "grad_norm": 0.16046540439128876, "learning_rate": 3.4499361559483975e-05, "loss": 0.0044, "step": 37220 }, { "grad_norm": 0.21843072772026062, "learning_rate": 3.44731637932098e-05, "loss": 0.0057, "step": 37230 }, { "grad_norm": 0.1549636721611023, "learning_rate": 3.44469707435785e-05, "loss": 0.0035, "step": 37240 }, { "grad_norm": 0.1514454334974289, "learning_rate": 3.4420782418546835e-05, "loss": 0.0038, "step": 37250 }, { "grad_norm": 0.14715375006198883, "learning_rate": 3.439459882607012e-05, "loss": 0.0041, "step": 37260 }, { "grad_norm": 0.19239705801010132, "learning_rate": 3.436841997410225e-05, "loss": 0.0036, "step": 37270 }, { "grad_norm": 0.23121893405914307, "learning_rate": 3.434224587059567e-05, "loss": 0.0054, "step": 37280 }, { "grad_norm": 0.17497767508029938, "learning_rate": 3.431607652350136e-05, "loss": 0.0032, "step": 37290 }, { "grad_norm": 0.1866466999053955, "learning_rate": 3.428991194076891e-05, "loss": 0.0049, "step": 37300 }, { "grad_norm": 0.17356768250465393, "learning_rate": 3.4263752130346394e-05, "loss": 0.0034, "step": 37310 }, { "grad_norm": 0.12500721216201782, "learning_rate": 3.4237597100180515e-05, "loss": 0.0066, "step": 37320 }, { "grad_norm": 0.25784391164779663, "learning_rate": 3.4211446858216427e-05, "loss": 0.0052, "step": 37330 }, { "grad_norm": 0.19535090029239655, "learning_rate": 3.4185301412397915e-05, "loss": 0.0048, "step": 37340 }, { "grad_norm": 0.1677456945180893, "learning_rate": 3.415916077066729e-05, "loss": 0.0045, "step": 37350 }, { "grad_norm": 0.17326194047927856, "learning_rate": 3.413302494096535e-05, "loss": 0.005, "step": 37360 }, { "grad_norm": 0.15238283574581146, "learning_rate": 3.410689393123151e-05, "loss": 0.0033, "step": 37370 }, { "grad_norm": 0.1412474364042282, "learning_rate": 3.408076774940364e-05, "loss": 0.0039, "step": 37380 }, { "grad_norm": 0.12052754312753677, "learning_rate": 3.40546464034182e-05, "loss": 0.005, "step": 37390 }, { "grad_norm": 0.1928095519542694, "learning_rate": 3.4028529901210185e-05, "loss": 0.0042, "step": 37400 }, { "grad_norm": 0.17098627984523773, "learning_rate": 3.4002418250713086e-05, "loss": 0.0055, "step": 37410 }, { "grad_norm": 0.1517518311738968, "learning_rate": 3.3976311459858936e-05, "loss": 0.0035, "step": 37420 }, { "grad_norm": 0.14871418476104736, "learning_rate": 3.395020953657826e-05, "loss": 0.0031, "step": 37430 }, { "grad_norm": 0.12419089674949646, "learning_rate": 3.3924112488800165e-05, "loss": 0.0057, "step": 37440 }, { "grad_norm": 0.12372568249702454, "learning_rate": 3.389802032445225e-05, "loss": 0.0038, "step": 37450 }, { "grad_norm": 0.19098053872585297, "learning_rate": 3.38719330514606e-05, "loss": 0.0052, "step": 37460 }, { "grad_norm": 0.2306004911661148, "learning_rate": 3.3845850677749866e-05, "loss": 0.0052, "step": 37470 }, { "grad_norm": 0.1845322996377945, "learning_rate": 3.3819773211243157e-05, "loss": 0.0038, "step": 37480 }, { "grad_norm": 0.13190294802188873, "learning_rate": 3.379370065986213e-05, "loss": 0.0049, "step": 37490 }, { "grad_norm": 0.230205237865448, "learning_rate": 3.3767633031526955e-05, "loss": 0.0045, "step": 37500 }, { "grad_norm": 0.15713104605674744, "learning_rate": 3.374157033415626e-05, "loss": 0.0039, "step": 37510 }, { "grad_norm": 0.15131232142448425, "learning_rate": 3.371551257566723e-05, "loss": 0.0058, "step": 37520 }, { "grad_norm": 0.18210501968860626, "learning_rate": 3.36894597639755e-05, "loss": 0.0046, "step": 37530 }, { "grad_norm": 0.14680740237236023, "learning_rate": 3.366341190699523e-05, "loss": 0.0039, "step": 37540 }, { "grad_norm": 0.17157544195652008, "learning_rate": 3.36373690126391e-05, "loss": 0.0038, "step": 37550 }, { "grad_norm": 0.1537700593471527, "learning_rate": 3.3611331088818234e-05, "loss": 0.0042, "step": 37560 }, { "grad_norm": 0.17883045971393585, "learning_rate": 3.3585298143442265e-05, "loss": 0.0061, "step": 37570 }, { "grad_norm": 0.1626027673482895, "learning_rate": 3.35592701844193e-05, "loss": 0.0038, "step": 37580 }, { "grad_norm": 0.20300991833209991, "learning_rate": 3.353324721965596e-05, "loss": 0.0035, "step": 37590 }, { "grad_norm": 0.1403523087501526, "learning_rate": 3.350722925705736e-05, "loss": 0.0026, "step": 37600 }, { "grad_norm": 0.17607346177101135, "learning_rate": 3.348121630452703e-05, "loss": 0.0035, "step": 37610 }, { "grad_norm": 0.1412612348794937, "learning_rate": 3.3455208369967044e-05, "loss": 0.0029, "step": 37620 }, { "grad_norm": 0.12900951504707336, "learning_rate": 3.34292054612779e-05, "loss": 0.0031, "step": 37630 }, { "grad_norm": 0.17048409581184387, "learning_rate": 3.340320758635861e-05, "loss": 0.0034, "step": 37640 }, { "grad_norm": 0.1889721155166626, "learning_rate": 3.337721475310666e-05, "loss": 0.0049, "step": 37650 }, { "grad_norm": 0.16436006128787994, "learning_rate": 3.335122696941795e-05, "loss": 0.0046, "step": 37660 }, { "grad_norm": 0.20078778266906738, "learning_rate": 3.332524424318692e-05, "loss": 0.0047, "step": 37670 }, { "grad_norm": 0.20041373372077942, "learning_rate": 3.32992665823064e-05, "loss": 0.006, "step": 37680 }, { "grad_norm": 0.1866205930709839, "learning_rate": 3.327329399466774e-05, "loss": 0.0053, "step": 37690 }, { "grad_norm": 0.16264699399471283, "learning_rate": 3.324732648816072e-05, "loss": 0.0035, "step": 37700 }, { "grad_norm": 0.15720997750759125, "learning_rate": 3.322136407067358e-05, "loss": 0.0042, "step": 37710 }, { "grad_norm": 0.11677990108728409, "learning_rate": 3.3195406750093036e-05, "loss": 0.0034, "step": 37720 }, { "grad_norm": 0.14262202382087708, "learning_rate": 3.3169454534304205e-05, "loss": 0.0032, "step": 37730 }, { "grad_norm": 0.17104850709438324, "learning_rate": 3.3143507431190725e-05, "loss": 0.0039, "step": 37740 }, { "grad_norm": 0.18489162623882294, "learning_rate": 3.311756544863459e-05, "loss": 0.004, "step": 37750 }, { "grad_norm": 0.17596763372421265, "learning_rate": 3.309162859451633e-05, "loss": 0.0036, "step": 37760 }, { "grad_norm": 0.1680329442024231, "learning_rate": 3.306569687671487e-05, "loss": 0.0032, "step": 37770 }, { "grad_norm": 0.14729122817516327, "learning_rate": 3.303977030310756e-05, "loss": 0.0049, "step": 37780 }, { "grad_norm": 0.15907712280750275, "learning_rate": 3.3013848881570245e-05, "loss": 0.0029, "step": 37790 }, { "grad_norm": 0.19403396546840668, "learning_rate": 3.298793261997712e-05, "loss": 0.0037, "step": 37800 }, { "grad_norm": 0.16667458415031433, "learning_rate": 3.2962021526200893e-05, "loss": 0.0044, "step": 37810 }, { "grad_norm": 0.17531436681747437, "learning_rate": 3.293611560811268e-05, "loss": 0.0042, "step": 37820 }, { "grad_norm": 0.14528824388980865, "learning_rate": 3.291021487358199e-05, "loss": 0.0028, "step": 37830 }, { "grad_norm": 0.178969144821167, "learning_rate": 3.28843193304768e-05, "loss": 0.0035, "step": 37840 }, { "grad_norm": 0.15281599760055542, "learning_rate": 3.2858428986663456e-05, "loss": 0.0044, "step": 37850 }, { "grad_norm": 0.14106035232543945, "learning_rate": 3.283254385000681e-05, "loss": 0.0037, "step": 37860 }, { "grad_norm": 0.13820761442184448, "learning_rate": 3.2806663928370076e-05, "loss": 0.0028, "step": 37870 }, { "grad_norm": 0.192808136343956, "learning_rate": 3.278078922961485e-05, "loss": 0.0034, "step": 37880 }, { "grad_norm": 0.14884616434574127, "learning_rate": 3.275491976160123e-05, "loss": 0.0034, "step": 37890 }, { "grad_norm": 0.11686035245656967, "learning_rate": 3.2729055532187645e-05, "loss": 0.0028, "step": 37900 }, { "grad_norm": 0.141291081905365, "learning_rate": 3.270319654923097e-05, "loss": 0.0032, "step": 37910 }, { "grad_norm": 0.18173593282699585, "learning_rate": 3.2677342820586506e-05, "loss": 0.0031, "step": 37920 }, { "grad_norm": 0.2224915623664856, "learning_rate": 3.2651494354107905e-05, "loss": 0.0036, "step": 37930 }, { "grad_norm": 0.14514945447444916, "learning_rate": 3.2625651157647266e-05, "loss": 0.0033, "step": 37940 }, { "grad_norm": 0.14295557141304016, "learning_rate": 3.259981323905505e-05, "loss": 0.0032, "step": 37950 }, { "grad_norm": 0.13157346844673157, "learning_rate": 3.257398060618014e-05, "loss": 0.0038, "step": 37960 }, { "grad_norm": 0.14938926696777344, "learning_rate": 3.254815326686983e-05, "loss": 0.0042, "step": 37970 }, { "grad_norm": 0.1744973063468933, "learning_rate": 3.2522331228969774e-05, "loss": 0.0038, "step": 37980 }, { "grad_norm": 0.08774067461490631, "learning_rate": 3.2496514500324006e-05, "loss": 0.0027, "step": 37990 }, { "grad_norm": 0.16428858041763306, "learning_rate": 3.247070308877498e-05, "loss": 0.0055, "step": 38000 }, { "grad_norm": 0.26228994131088257, "learning_rate": 3.2444897002163515e-05, "loss": 0.0039, "step": 38010 }, { "grad_norm": 0.16996653378009796, "learning_rate": 3.241909624832885e-05, "loss": 0.0034, "step": 38020 }, { "grad_norm": 0.1171463206410408, "learning_rate": 3.239330083510852e-05, "loss": 0.0026, "step": 38030 }, { "grad_norm": 0.1099950522184372, "learning_rate": 3.236751077033855e-05, "loss": 0.0032, "step": 38040 }, { "grad_norm": 0.15454281866550446, "learning_rate": 3.234172606185322e-05, "loss": 0.0043, "step": 38050 }, { "grad_norm": 0.1872277706861496, "learning_rate": 3.231594671748528e-05, "loss": 0.0034, "step": 38060 }, { "grad_norm": 0.13560956716537476, "learning_rate": 3.2290172745065815e-05, "loss": 0.0032, "step": 38070 }, { "grad_norm": 0.19233420491218567, "learning_rate": 3.226440415242426e-05, "loss": 0.0035, "step": 38080 }, { "grad_norm": 0.13916891813278198, "learning_rate": 3.223864094738846e-05, "loss": 0.0052, "step": 38090 }, { "grad_norm": 0.1733429878950119, "learning_rate": 3.221288313778456e-05, "loss": 0.0061, "step": 38100 }, { "grad_norm": 0.1555292010307312, "learning_rate": 3.2187130731437125e-05, "loss": 0.0037, "step": 38110 }, { "grad_norm": 0.15827955305576324, "learning_rate": 3.216138373616905e-05, "loss": 0.0038, "step": 38120 }, { "grad_norm": 0.17322027683258057, "learning_rate": 3.21356421598016e-05, "loss": 0.0058, "step": 38130 }, { "grad_norm": 0.1480608880519867, "learning_rate": 3.210990601015438e-05, "loss": 0.0027, "step": 38140 }, { "grad_norm": 0.11870847642421722, "learning_rate": 3.208417529504535e-05, "loss": 0.0027, "step": 38150 }, { "grad_norm": 0.1499631553888321, "learning_rate": 3.205845002229084e-05, "loss": 0.0036, "step": 38160 }, { "grad_norm": 0.17069754004478455, "learning_rate": 3.203273019970547e-05, "loss": 0.0053, "step": 38170 }, { "grad_norm": 0.16122272610664368, "learning_rate": 3.200701583510227e-05, "loss": 0.0029, "step": 38180 }, { "grad_norm": 0.14388661086559296, "learning_rate": 3.198130693629261e-05, "loss": 0.0041, "step": 38190 }, { "grad_norm": 0.18408207595348358, "learning_rate": 3.195560351108612e-05, "loss": 0.0037, "step": 38200 }, { "grad_norm": 0.16047623753547668, "learning_rate": 3.1929905567290865e-05, "loss": 0.0042, "step": 38210 }, { "grad_norm": 0.20117397606372833, "learning_rate": 3.1904213112713164e-05, "loss": 0.0042, "step": 38220 }, { "grad_norm": 0.21903356909751892, "learning_rate": 3.187852615515774e-05, "loss": 0.0047, "step": 38230 }, { "grad_norm": 0.12816491723060608, "learning_rate": 3.1852844702427606e-05, "loss": 0.0036, "step": 38240 }, { "grad_norm": 0.19357353448867798, "learning_rate": 3.18271687623241e-05, "loss": 0.0044, "step": 38250 }, { "grad_norm": 0.1963251680135727, "learning_rate": 3.1801498342646896e-05, "loss": 0.0039, "step": 38260 }, { "grad_norm": 0.17671528458595276, "learning_rate": 3.177583345119398e-05, "loss": 0.0039, "step": 38270 }, { "grad_norm": 0.17538240551948547, "learning_rate": 3.17501740957617e-05, "loss": 0.0038, "step": 38280 }, { "grad_norm": 0.1837548166513443, "learning_rate": 3.172452028414467e-05, "loss": 0.0038, "step": 38290 }, { "grad_norm": 0.17383086681365967, "learning_rate": 3.169887202413583e-05, "loss": 0.0035, "step": 38300 }, { "grad_norm": 0.16568560898303986, "learning_rate": 3.167322932352646e-05, "loss": 0.0045, "step": 38310 }, { "grad_norm": 0.20074434578418732, "learning_rate": 3.164759219010613e-05, "loss": 0.0037, "step": 38320 }, { "grad_norm": 0.14772246778011322, "learning_rate": 3.1621960631662725e-05, "loss": 0.005, "step": 38330 }, { "grad_norm": 0.21931256353855133, "learning_rate": 3.159633465598245e-05, "loss": 0.0069, "step": 38340 }, { "grad_norm": 0.21280939877033234, "learning_rate": 3.1570714270849767e-05, "loss": 0.0038, "step": 38350 }, { "grad_norm": 0.1555836945772171, "learning_rate": 3.1545099484047516e-05, "loss": 0.0032, "step": 38360 }, { "grad_norm": 0.22691906988620758, "learning_rate": 3.151949030335674e-05, "loss": 0.0042, "step": 38370 }, { "grad_norm": 0.18548434972763062, "learning_rate": 3.149388673655687e-05, "loss": 0.0038, "step": 38380 }, { "grad_norm": 0.17932584881782532, "learning_rate": 3.146828879142559e-05, "loss": 0.0034, "step": 38390 }, { "grad_norm": 0.13206318020820618, "learning_rate": 3.1442696475738866e-05, "loss": 0.0032, "step": 38400 }, { "grad_norm": 0.19317376613616943, "learning_rate": 3.141710979727098e-05, "loss": 0.0055, "step": 38410 }, { "grad_norm": 0.17274326086044312, "learning_rate": 3.139152876379447e-05, "loss": 0.0047, "step": 38420 }, { "grad_norm": 0.14006538689136505, "learning_rate": 3.1365953383080214e-05, "loss": 0.0042, "step": 38430 }, { "grad_norm": 0.20923461019992828, "learning_rate": 3.134038366289731e-05, "loss": 0.0033, "step": 38440 }, { "grad_norm": 0.1356029212474823, "learning_rate": 3.131481961101317e-05, "loss": 0.0035, "step": 38450 }, { "grad_norm": 0.1677478551864624, "learning_rate": 3.128926123519349e-05, "loss": 0.0051, "step": 38460 }, { "grad_norm": 0.16832514107227325, "learning_rate": 3.1263708543202194e-05, "loss": 0.0042, "step": 38470 }, { "grad_norm": 0.15600119531154633, "learning_rate": 3.123816154280155e-05, "loss": 0.0039, "step": 38480 }, { "grad_norm": 0.19982333481311798, "learning_rate": 3.121262024175207e-05, "loss": 0.004, "step": 38490 }, { "grad_norm": 0.14756712317466736, "learning_rate": 3.118708464781248e-05, "loss": 0.0057, "step": 38500 }, { "grad_norm": 0.16085180640220642, "learning_rate": 3.116155476873987e-05, "loss": 0.0051, "step": 38510 }, { "grad_norm": 0.13968053460121155, "learning_rate": 3.11360306122895e-05, "loss": 0.0032, "step": 38520 }, { "grad_norm": 0.13419944047927856, "learning_rate": 3.1110512186214975e-05, "loss": 0.0033, "step": 38530 }, { "grad_norm": 0.18088099360466003, "learning_rate": 3.1084999498268095e-05, "loss": 0.0049, "step": 38540 }, { "grad_norm": 0.15623392164707184, "learning_rate": 3.1059492556198934e-05, "loss": 0.0056, "step": 38550 }, { "grad_norm": 0.13471689820289612, "learning_rate": 3.103399136775586e-05, "loss": 0.0033, "step": 38560 }, { "grad_norm": 0.16237109899520874, "learning_rate": 3.100849594068541e-05, "loss": 0.0037, "step": 38570 }, { "grad_norm": 0.16273000836372375, "learning_rate": 3.0983006282732484e-05, "loss": 0.0036, "step": 38580 }, { "grad_norm": 0.1692211925983429, "learning_rate": 3.0957522401640116e-05, "loss": 0.0045, "step": 38590 }, { "grad_norm": 0.1641266644001007, "learning_rate": 3.0932044305149645e-05, "loss": 0.0037, "step": 38600 }, { "grad_norm": 0.1974402368068695, "learning_rate": 3.090657200100068e-05, "loss": 0.0041, "step": 38610 }, { "grad_norm": 0.18272998929023743, "learning_rate": 3.088110549693099e-05, "loss": 0.0058, "step": 38620 }, { "grad_norm": 0.13980145752429962, "learning_rate": 3.085564480067667e-05, "loss": 0.0038, "step": 38630 }, { "grad_norm": 0.15106166899204254, "learning_rate": 3.0830189919971955e-05, "loss": 0.0037, "step": 38640 }, { "grad_norm": 0.14273133873939514, "learning_rate": 3.080474086254939e-05, "loss": 0.0039, "step": 38650 }, { "grad_norm": 0.2325126677751541, "learning_rate": 3.077929763613975e-05, "loss": 0.004, "step": 38660 }, { "grad_norm": 0.16288131475448608, "learning_rate": 3.075386024847198e-05, "loss": 0.0035, "step": 38670 }, { "grad_norm": 0.17242486774921417, "learning_rate": 3.072842870727331e-05, "loss": 0.0042, "step": 38680 }, { "grad_norm": 0.16674727201461792, "learning_rate": 3.070300302026916e-05, "loss": 0.004, "step": 38690 }, { "grad_norm": 0.16508355736732483, "learning_rate": 3.067758319518318e-05, "loss": 0.0032, "step": 38700 }, { "grad_norm": 0.18067985773086548, "learning_rate": 3.065216923973725e-05, "loss": 0.0037, "step": 38710 }, { "grad_norm": 0.138757586479187, "learning_rate": 3.062676116165145e-05, "loss": 0.0042, "step": 38720 }, { "grad_norm": 0.1765752136707306, "learning_rate": 3.06013589686441e-05, "loss": 0.0048, "step": 38730 }, { "grad_norm": 0.13081985712051392, "learning_rate": 3.05759626684317e-05, "loss": 0.004, "step": 38740 }, { "grad_norm": 0.15878133475780487, "learning_rate": 3.055057226872896e-05, "loss": 0.0054, "step": 38750 }, { "grad_norm": 0.19448181986808777, "learning_rate": 3.052518777724887e-05, "loss": 0.0033, "step": 38760 }, { "grad_norm": 0.13335123658180237, "learning_rate": 3.04998092017025e-05, "loss": 0.0055, "step": 38770 }, { "grad_norm": 0.14835438132286072, "learning_rate": 3.0474436549799246e-05, "loss": 0.0036, "step": 38780 }, { "grad_norm": 0.14918258786201477, "learning_rate": 3.044906982924661e-05, "loss": 0.0035, "step": 38790 }, { "grad_norm": 0.11435020714998245, "learning_rate": 3.0423709047750337e-05, "loss": 0.0035, "step": 38800 }, { "grad_norm": 0.16265302896499634, "learning_rate": 3.03983542130144e-05, "loss": 0.0044, "step": 38810 }, { "grad_norm": 0.17572930455207825, "learning_rate": 3.0373005332740877e-05, "loss": 0.004, "step": 38820 }, { "grad_norm": 0.20133958756923676, "learning_rate": 3.034766241463013e-05, "loss": 0.0041, "step": 38830 }, { "grad_norm": 0.12758953869342804, "learning_rate": 3.032232546638064e-05, "loss": 0.0039, "step": 38840 }, { "grad_norm": 0.147571861743927, "learning_rate": 3.0296994495689114e-05, "loss": 0.0035, "step": 38850 }, { "grad_norm": 0.16126450896263123, "learning_rate": 3.0271669510250444e-05, "loss": 0.0067, "step": 38860 }, { "grad_norm": 0.2174222469329834, "learning_rate": 3.024635051775766e-05, "loss": 0.0037, "step": 38870 }, { "grad_norm": 0.22608225047588348, "learning_rate": 3.022103752590205e-05, "loss": 0.0041, "step": 38880 }, { "grad_norm": 0.13144844770431519, "learning_rate": 3.0195730542372992e-05, "loss": 0.004, "step": 38890 }, { "grad_norm": 0.14710941910743713, "learning_rate": 3.0170429574858084e-05, "loss": 0.0041, "step": 38900 }, { "grad_norm": 0.15190009772777557, "learning_rate": 3.0145134631043127e-05, "loss": 0.004, "step": 38910 }, { "grad_norm": 0.14443771541118622, "learning_rate": 3.0119845718612018e-05, "loss": 0.0037, "step": 38920 }, { "grad_norm": 0.14645619690418243, "learning_rate": 3.009456284524688e-05, "loss": 0.0027, "step": 38930 }, { "grad_norm": 0.12203684449195862, "learning_rate": 3.0069286018627967e-05, "loss": 0.0037, "step": 38940 }, { "grad_norm": 0.16224557161331177, "learning_rate": 3.0044015246433743e-05, "loss": 0.0059, "step": 38950 }, { "grad_norm": 0.21660684049129486, "learning_rate": 3.0018750536340755e-05, "loss": 0.0033, "step": 38960 }, { "grad_norm": 0.1786169558763504, "learning_rate": 2.999349189602378e-05, "loss": 0.0037, "step": 38970 }, { "grad_norm": 0.206757053732872, "learning_rate": 2.9968239333155733e-05, "loss": 0.0037, "step": 38980 }, { "grad_norm": 0.1491784304380417, "learning_rate": 2.994299285540767e-05, "loss": 0.0049, "step": 38990 }, { "grad_norm": 0.1379040628671646, "learning_rate": 2.9917752470448813e-05, "loss": 0.0037, "step": 39000 }, { "grad_norm": 0.1490938812494278, "learning_rate": 2.9892518185946495e-05, "loss": 0.0049, "step": 39010 }, { "grad_norm": 0.1295795440673828, "learning_rate": 2.986729000956624e-05, "loss": 0.0033, "step": 39020 }, { "grad_norm": 0.10465283691883087, "learning_rate": 2.9842067948971736e-05, "loss": 0.0038, "step": 39030 }, { "grad_norm": 0.20977923274040222, "learning_rate": 2.9816852011824727e-05, "loss": 0.0047, "step": 39040 }, { "grad_norm": 0.18177370727062225, "learning_rate": 2.979164220578519e-05, "loss": 0.0026, "step": 39050 }, { "grad_norm": 0.11753685027360916, "learning_rate": 2.9766438538511165e-05, "loss": 0.0044, "step": 39060 }, { "grad_norm": 0.1519283503293991, "learning_rate": 2.9741241017658873e-05, "loss": 0.0042, "step": 39070 }, { "grad_norm": 0.16709767282009125, "learning_rate": 2.971604965088267e-05, "loss": 0.003, "step": 39080 }, { "grad_norm": 0.25406086444854736, "learning_rate": 2.9690864445835008e-05, "loss": 0.0041, "step": 39090 }, { "grad_norm": 0.18339000642299652, "learning_rate": 2.966568541016651e-05, "loss": 0.0044, "step": 39100 }, { "grad_norm": 0.16771723330020905, "learning_rate": 2.9640512551525867e-05, "loss": 0.003, "step": 39110 }, { "grad_norm": 0.17842097580432892, "learning_rate": 2.961534587755995e-05, "loss": 0.0057, "step": 39120 }, { "grad_norm": 0.14806637167930603, "learning_rate": 2.959018539591375e-05, "loss": 0.0046, "step": 39130 }, { "grad_norm": 0.2111293375492096, "learning_rate": 2.9565031114230325e-05, "loss": 0.0042, "step": 39140 }, { "grad_norm": 0.12681937217712402, "learning_rate": 2.9539883040150895e-05, "loss": 0.0027, "step": 39150 }, { "grad_norm": 0.13751573860645294, "learning_rate": 2.9514741181314774e-05, "loss": 0.0031, "step": 39160 }, { "grad_norm": 0.13514862954616547, "learning_rate": 2.94896055453594e-05, "loss": 0.0028, "step": 39170 }, { "grad_norm": 0.15924686193466187, "learning_rate": 2.9464476139920332e-05, "loss": 0.0026, "step": 39180 }, { "grad_norm": 0.1754576563835144, "learning_rate": 2.9439352972631186e-05, "loss": 0.0044, "step": 39190 }, { "grad_norm": 0.17615650594234467, "learning_rate": 2.9414236051123757e-05, "loss": 0.0031, "step": 39200 }, { "grad_norm": 0.1596420705318451, "learning_rate": 2.938912538302785e-05, "loss": 0.0037, "step": 39210 }, { "grad_norm": 0.18859560787677765, "learning_rate": 2.9364020975971464e-05, "loss": 0.0037, "step": 39220 }, { "grad_norm": 0.14963360130786896, "learning_rate": 2.9338922837580657e-05, "loss": 0.0033, "step": 39230 }, { "grad_norm": 0.25426939129829407, "learning_rate": 2.931383097547955e-05, "loss": 0.0063, "step": 39240 }, { "grad_norm": 0.19684921205043793, "learning_rate": 2.928874539729043e-05, "loss": 0.0029, "step": 39250 }, { "grad_norm": 0.15413810312747955, "learning_rate": 2.926366611063358e-05, "loss": 0.0031, "step": 39260 }, { "grad_norm": 0.1927766352891922, "learning_rate": 2.9238593123127463e-05, "loss": 0.0028, "step": 39270 }, { "grad_norm": 0.17459021508693695, "learning_rate": 2.9213526442388583e-05, "loss": 0.0062, "step": 39280 }, { "grad_norm": 0.18897677958011627, "learning_rate": 2.9188466076031545e-05, "loss": 0.0058, "step": 39290 }, { "grad_norm": 0.2039947509765625, "learning_rate": 2.9163412031669012e-05, "loss": 0.0036, "step": 39300 }, { "grad_norm": 0.21787191927433014, "learning_rate": 2.913836431691175e-05, "loss": 0.0044, "step": 39310 }, { "grad_norm": 0.2503415048122406, "learning_rate": 2.9113322939368583e-05, "loss": 0.004, "step": 39320 }, { "grad_norm": 0.1635635793209076, "learning_rate": 2.9088287906646427e-05, "loss": 0.0055, "step": 39330 }, { "grad_norm": 0.143579363822937, "learning_rate": 2.906325922635024e-05, "loss": 0.0028, "step": 39340 }, { "grad_norm": 0.15177857875823975, "learning_rate": 2.903823690608313e-05, "loss": 0.0043, "step": 39350 }, { "grad_norm": 0.21038132905960083, "learning_rate": 2.9013220953446174e-05, "loss": 0.0043, "step": 39360 }, { "grad_norm": 0.14685139060020447, "learning_rate": 2.8988211376038564e-05, "loss": 0.0032, "step": 39370 }, { "grad_norm": 0.1463918536901474, "learning_rate": 2.8963208181457564e-05, "loss": 0.0035, "step": 39380 }, { "grad_norm": 0.1303980052471161, "learning_rate": 2.8938211377298453e-05, "loss": 0.0029, "step": 39390 }, { "grad_norm": 0.15008729696273804, "learning_rate": 2.8913220971154652e-05, "loss": 0.0029, "step": 39400 }, { "grad_norm": 0.17849008738994598, "learning_rate": 2.888823697061753e-05, "loss": 0.0031, "step": 39410 }, { "grad_norm": 0.16182662546634674, "learning_rate": 2.8863259383276618e-05, "loss": 0.0034, "step": 39420 }, { "grad_norm": 0.19884468615055084, "learning_rate": 2.8838288216719395e-05, "loss": 0.0044, "step": 39430 }, { "grad_norm": 0.1171545535326004, "learning_rate": 2.8813323478531484e-05, "loss": 0.0051, "step": 39440 }, { "grad_norm": 0.15900716185569763, "learning_rate": 2.8788365176296496e-05, "loss": 0.0041, "step": 39450 }, { "grad_norm": 0.18207363784313202, "learning_rate": 2.876341331759611e-05, "loss": 0.003, "step": 39460 }, { "grad_norm": 0.1453072428703308, "learning_rate": 2.8738467910010036e-05, "loss": 0.0032, "step": 39470 }, { "grad_norm": 0.09435635805130005, "learning_rate": 2.8713528961116032e-05, "loss": 0.0029, "step": 39480 }, { "grad_norm": 0.1797606647014618, "learning_rate": 2.8688596478489875e-05, "loss": 0.0038, "step": 39490 }, { "grad_norm": 0.14052733778953552, "learning_rate": 2.8663670469705434e-05, "loss": 0.0029, "step": 39500 }, { "grad_norm": 0.20392489433288574, "learning_rate": 2.8638750942334546e-05, "loss": 0.0066, "step": 39510 }, { "grad_norm": 0.16696028411388397, "learning_rate": 2.8613837903947115e-05, "loss": 0.0044, "step": 39520 }, { "grad_norm": 0.12981997430324554, "learning_rate": 2.858893136211106e-05, "loss": 0.0028, "step": 39530 }, { "grad_norm": 0.15271884202957153, "learning_rate": 2.8564031324392315e-05, "loss": 0.0033, "step": 39540 }, { "grad_norm": 0.12272675335407257, "learning_rate": 2.85391377983549e-05, "loss": 0.0031, "step": 39550 }, { "grad_norm": 0.11483343690633774, "learning_rate": 2.851425079156075e-05, "loss": 0.003, "step": 39560 }, { "grad_norm": 0.13703300058841705, "learning_rate": 2.848937031156994e-05, "loss": 0.0031, "step": 39570 }, { "grad_norm": 0.15563447773456573, "learning_rate": 2.846449636594044e-05, "loss": 0.0046, "step": 39580 }, { "grad_norm": 0.16307058930397034, "learning_rate": 2.843962896222836e-05, "loss": 0.0047, "step": 39590 }, { "grad_norm": 0.2005375623703003, "learning_rate": 2.8414768107987722e-05, "loss": 0.0035, "step": 39600 }, { "grad_norm": 0.166842520236969, "learning_rate": 2.838991381077061e-05, "loss": 0.0047, "step": 39610 }, { "grad_norm": 0.15242613852024078, "learning_rate": 2.83650660781271e-05, "loss": 0.003, "step": 39620 }, { "grad_norm": 0.11560218781232834, "learning_rate": 2.8340224917605285e-05, "loss": 0.0035, "step": 39630 }, { "grad_norm": 0.1581832319498062, "learning_rate": 2.831539033675122e-05, "loss": 0.0033, "step": 39640 }, { "grad_norm": 0.12408707290887833, "learning_rate": 2.8290562343109038e-05, "loss": 0.0025, "step": 39650 }, { "grad_norm": 0.17381906509399414, "learning_rate": 2.826574094422082e-05, "loss": 0.0034, "step": 39660 }, { "grad_norm": 0.17554503679275513, "learning_rate": 2.8240926147626645e-05, "loss": 0.0037, "step": 39670 }, { "grad_norm": 0.14596165716648102, "learning_rate": 2.8216117960864586e-05, "loss": 0.0035, "step": 39680 }, { "grad_norm": 0.15940487384796143, "learning_rate": 2.8191316391470703e-05, "loss": 0.0032, "step": 39690 }, { "grad_norm": 0.12426194548606873, "learning_rate": 2.816652144697911e-05, "loss": 0.0039, "step": 39700 }, { "grad_norm": 0.24336692690849304, "learning_rate": 2.8141733134921783e-05, "loss": 0.005, "step": 39710 }, { "grad_norm": 0.1461370289325714, "learning_rate": 2.811695146282884e-05, "loss": 0.0032, "step": 39720 }, { "grad_norm": 0.1991419494152069, "learning_rate": 2.8092176438228212e-05, "loss": 0.0041, "step": 39730 }, { "grad_norm": 0.18520689010620117, "learning_rate": 2.806740806864598e-05, "loss": 0.007, "step": 39740 }, { "grad_norm": 0.13291534781455994, "learning_rate": 2.804264636160604e-05, "loss": 0.0028, "step": 39750 }, { "grad_norm": 0.14441566169261932, "learning_rate": 2.8017891324630402e-05, "loss": 0.0037, "step": 39760 }, { "grad_norm": 0.2251356840133667, "learning_rate": 2.7993142965238976e-05, "loss": 0.0033, "step": 39770 }, { "grad_norm": 0.14895671606063843, "learning_rate": 2.7968401290949665e-05, "loss": 0.0036, "step": 39780 }, { "grad_norm": 0.1317809373140335, "learning_rate": 2.7943666309278328e-05, "loss": 0.0051, "step": 39790 }, { "grad_norm": 0.1881517618894577, "learning_rate": 2.7918938027738783e-05, "loss": 0.0039, "step": 39800 }, { "grad_norm": 0.1234879419207573, "learning_rate": 2.789421645384287e-05, "loss": 0.0037, "step": 39810 }, { "grad_norm": 0.17134664952754974, "learning_rate": 2.786950159510032e-05, "loss": 0.0043, "step": 39820 }, { "grad_norm": 0.14779159426689148, "learning_rate": 2.7844793459018876e-05, "loss": 0.0033, "step": 39830 }, { "grad_norm": 0.1418488472700119, "learning_rate": 2.7820092053104195e-05, "loss": 0.0034, "step": 39840 }, { "grad_norm": 0.18395917117595673, "learning_rate": 2.7795397384859933e-05, "loss": 0.0046, "step": 39850 }, { "grad_norm": 0.20731322467327118, "learning_rate": 2.7770709461787638e-05, "loss": 0.0037, "step": 39860 }, { "grad_norm": 0.154915913939476, "learning_rate": 2.7746028291386915e-05, "loss": 0.0033, "step": 39870 }, { "grad_norm": 0.1507592797279358, "learning_rate": 2.772135388115519e-05, "loss": 0.0034, "step": 39880 }, { "grad_norm": 0.23889128863811493, "learning_rate": 2.7696686238587945e-05, "loss": 0.0049, "step": 39890 }, { "grad_norm": 0.15484386682510376, "learning_rate": 2.7672025371178505e-05, "loss": 0.0049, "step": 39900 }, { "grad_norm": 0.15138769149780273, "learning_rate": 2.7647371286418238e-05, "loss": 0.0036, "step": 39910 }, { "grad_norm": 0.12590371072292328, "learning_rate": 2.762272399179639e-05, "loss": 0.004, "step": 39920 }, { "grad_norm": 0.1896025687456131, "learning_rate": 2.7598083494800154e-05, "loss": 0.0051, "step": 39930 }, { "grad_norm": 0.19800323247909546, "learning_rate": 2.7573449802914664e-05, "loss": 0.0031, "step": 39940 }, { "grad_norm": 0.13694854080677032, "learning_rate": 2.7548822923622964e-05, "loss": 0.0029, "step": 39950 }, { "grad_norm": 0.1395803689956665, "learning_rate": 2.752420286440609e-05, "loss": 0.004, "step": 39960 }, { "grad_norm": 0.1329280436038971, "learning_rate": 2.749958963274295e-05, "loss": 0.0026, "step": 39970 }, { "grad_norm": 0.10489252954721451, "learning_rate": 2.747498323611039e-05, "loss": 0.0036, "step": 39980 }, { "grad_norm": 0.11536134034395218, "learning_rate": 2.7450383681983184e-05, "loss": 0.0027, "step": 39990 }, { "grad_norm": 0.130319744348526, "learning_rate": 2.742579097783403e-05, "loss": 0.0027, "step": 40000 }, { "grad_norm": 0.16949599981307983, "learning_rate": 2.7401205131133512e-05, "loss": 0.0038, "step": 40010 }, { "grad_norm": 0.19407635927200317, "learning_rate": 2.7376626149350238e-05, "loss": 0.0049, "step": 40020 }, { "grad_norm": 0.12247345596551895, "learning_rate": 2.735205403995056e-05, "loss": 0.0037, "step": 40030 }, { "grad_norm": 0.141652911901474, "learning_rate": 2.7327488810398917e-05, "loss": 0.0032, "step": 40040 }, { "grad_norm": 0.17005136609077454, "learning_rate": 2.7302930468157507e-05, "loss": 0.0048, "step": 40050 }, { "grad_norm": 0.17325459420681, "learning_rate": 2.727837902068655e-05, "loss": 0.0035, "step": 40060 }, { "grad_norm": 0.12086674571037292, "learning_rate": 2.7253834475444123e-05, "loss": 0.0031, "step": 40070 }, { "grad_norm": 0.14297521114349365, "learning_rate": 2.7229296839886204e-05, "loss": 0.0034, "step": 40080 }, { "grad_norm": 0.16402524709701538, "learning_rate": 2.720476612146668e-05, "loss": 0.0045, "step": 40090 }, { "grad_norm": 0.13627465069293976, "learning_rate": 2.7180242327637317e-05, "loss": 0.0049, "step": 40100 }, { "grad_norm": 0.17370890080928802, "learning_rate": 2.7155725465847826e-05, "loss": 0.0036, "step": 40110 }, { "grad_norm": 0.11252837628126144, "learning_rate": 2.713121554354578e-05, "loss": 0.0042, "step": 40120 }, { "grad_norm": 0.0983961671590805, "learning_rate": 2.7106712568176628e-05, "loss": 0.0062, "step": 40130 }, { "grad_norm": 0.16455169022083282, "learning_rate": 2.708221654718374e-05, "loss": 0.0058, "step": 40140 }, { "grad_norm": 0.1063307672739029, "learning_rate": 2.7057727488008357e-05, "loss": 0.0029, "step": 40150 }, { "grad_norm": 0.13380520045757294, "learning_rate": 2.703324539808961e-05, "loss": 0.0035, "step": 40160 }, { "grad_norm": 0.23055113852024078, "learning_rate": 2.7008770284864505e-05, "loss": 0.0047, "step": 40170 }, { "grad_norm": 0.21245726943016052, "learning_rate": 2.6984302155767916e-05, "loss": 0.0033, "step": 40180 }, { "grad_norm": 0.14027787744998932, "learning_rate": 2.6959841018232683e-05, "loss": 0.0035, "step": 40190 }, { "grad_norm": 0.18057382106781006, "learning_rate": 2.693538687968937e-05, "loss": 0.0042, "step": 40200 }, { "grad_norm": 0.13222627341747284, "learning_rate": 2.6910939747566556e-05, "loss": 0.0031, "step": 40210 }, { "grad_norm": 0.1441546380519867, "learning_rate": 2.6886499629290607e-05, "loss": 0.0034, "step": 40220 }, { "grad_norm": 0.15969042479991913, "learning_rate": 2.6862066532285802e-05, "loss": 0.0038, "step": 40230 }, { "grad_norm": 0.11195027083158493, "learning_rate": 2.6837640463974262e-05, "loss": 0.0034, "step": 40240 }, { "grad_norm": 0.09255232661962509, "learning_rate": 2.681322143177596e-05, "loss": 0.0021, "step": 40250 }, { "grad_norm": 0.16341856122016907, "learning_rate": 2.678880944310882e-05, "loss": 0.0038, "step": 40260 }, { "grad_norm": 0.10179568827152252, "learning_rate": 2.6764404505388474e-05, "loss": 0.0023, "step": 40270 }, { "grad_norm": 0.16367743909358978, "learning_rate": 2.6740006626028558e-05, "loss": 0.0047, "step": 40280 }, { "grad_norm": 0.2046918272972107, "learning_rate": 2.671561581244048e-05, "loss": 0.0027, "step": 40290 }, { "grad_norm": 0.20121406018733978, "learning_rate": 2.6691232072033536e-05, "loss": 0.003, "step": 40300 }, { "grad_norm": 0.23787392675876617, "learning_rate": 2.6666855412214852e-05, "loss": 0.0042, "step": 40310 }, { "grad_norm": 0.17558088898658752, "learning_rate": 2.664248584038942e-05, "loss": 0.0048, "step": 40320 }, { "grad_norm": 0.17522291839122772, "learning_rate": 2.6618123363960047e-05, "loss": 0.0052, "step": 40330 }, { "grad_norm": 0.18726029992103577, "learning_rate": 2.659376799032748e-05, "loss": 0.0032, "step": 40340 }, { "grad_norm": 0.1825515180826187, "learning_rate": 2.6569419726890145e-05, "loss": 0.0029, "step": 40350 }, { "grad_norm": 0.14483273029327393, "learning_rate": 2.654507858104447e-05, "loss": 0.0062, "step": 40360 }, { "grad_norm": 0.18235516548156738, "learning_rate": 2.652074456018463e-05, "loss": 0.0035, "step": 40370 }, { "grad_norm": 0.12875507771968842, "learning_rate": 2.6496417671702646e-05, "loss": 0.0028, "step": 40380 }, { "grad_norm": 0.12988851964473724, "learning_rate": 2.6472097922988427e-05, "loss": 0.0027, "step": 40390 }, { "grad_norm": 0.15920481085777283, "learning_rate": 2.6447785321429607e-05, "loss": 0.0047, "step": 40400 }, { "grad_norm": 0.18997079133987427, "learning_rate": 2.6423479874411784e-05, "loss": 0.0032, "step": 40410 }, { "grad_norm": 0.16491113603115082, "learning_rate": 2.6399181589318234e-05, "loss": 0.0028, "step": 40420 }, { "grad_norm": 0.20085182785987854, "learning_rate": 2.6374890473530188e-05, "loss": 0.0037, "step": 40430 }, { "grad_norm": 0.21777567267417908, "learning_rate": 2.635060653442664e-05, "loss": 0.0053, "step": 40440 }, { "grad_norm": 0.12248072773218155, "learning_rate": 2.6326329779384395e-05, "loss": 0.0037, "step": 40450 }, { "grad_norm": 0.14673012495040894, "learning_rate": 2.63020602157781e-05, "loss": 0.0056, "step": 40460 }, { "grad_norm": 0.13401389122009277, "learning_rate": 2.62777978509802e-05, "loss": 0.0029, "step": 40470 }, { "grad_norm": 0.18501581251621246, "learning_rate": 2.6253542692360954e-05, "loss": 0.0036, "step": 40480 }, { "grad_norm": 0.12535792589187622, "learning_rate": 2.6229294747288458e-05, "loss": 0.0028, "step": 40490 }, { "grad_norm": 0.11880595237016678, "learning_rate": 2.6205054023128596e-05, "loss": 0.0028, "step": 40500 }, { "grad_norm": 0.17363210022449493, "learning_rate": 2.6180820527245043e-05, "loss": 0.0036, "step": 40510 }, { "grad_norm": 0.13007038831710815, "learning_rate": 2.6156594266999313e-05, "loss": 0.0033, "step": 40520 }, { "grad_norm": 0.12292372435331345, "learning_rate": 2.6132375249750672e-05, "loss": 0.0037, "step": 40530 }, { "grad_norm": 0.13666892051696777, "learning_rate": 2.6108163482856286e-05, "loss": 0.0075, "step": 40540 }, { "grad_norm": 0.15705054998397827, "learning_rate": 2.6083958973670964e-05, "loss": 0.005, "step": 40550 }, { "grad_norm": 0.1749950349330902, "learning_rate": 2.6059761729547483e-05, "loss": 0.0044, "step": 40560 }, { "grad_norm": 0.1375347375869751, "learning_rate": 2.603557175783624e-05, "loss": 0.0027, "step": 40570 }, { "grad_norm": 0.1371878683567047, "learning_rate": 2.601138906588559e-05, "loss": 0.0031, "step": 40580 }, { "grad_norm": 0.1526304930448532, "learning_rate": 2.598721366104152e-05, "loss": 0.0025, "step": 40590 }, { "grad_norm": 0.11683519184589386, "learning_rate": 2.5963045550647945e-05, "loss": 0.0039, "step": 40600 }, { "grad_norm": 0.15001489222049713, "learning_rate": 2.5938884742046466e-05, "loss": 0.0031, "step": 40610 }, { "grad_norm": 0.13099738955497742, "learning_rate": 2.5914731242576507e-05, "loss": 0.0023, "step": 40620 }, { "grad_norm": 0.15278936922550201, "learning_rate": 2.5890585059575268e-05, "loss": 0.003, "step": 40630 }, { "grad_norm": 0.12717005610466003, "learning_rate": 2.5866446200377688e-05, "loss": 0.003, "step": 40640 }, { "grad_norm": 0.15024015307426453, "learning_rate": 2.5842314672316566e-05, "loss": 0.0027, "step": 40650 }, { "grad_norm": 0.11710398644208908, "learning_rate": 2.581819048272239e-05, "loss": 0.0029, "step": 40660 }, { "grad_norm": 0.10066734254360199, "learning_rate": 2.5794073638923478e-05, "loss": 0.0024, "step": 40670 }, { "grad_norm": 0.13636569678783417, "learning_rate": 2.576996414824586e-05, "loss": 0.0028, "step": 40680 }, { "grad_norm": 0.15097136795520782, "learning_rate": 2.574586201801339e-05, "loss": 0.0055, "step": 40690 }, { "grad_norm": 0.10483056306838989, "learning_rate": 2.572176725554762e-05, "loss": 0.0025, "step": 40700 }, { "grad_norm": 0.19350281357765198, "learning_rate": 2.5697679868167966e-05, "loss": 0.0031, "step": 40710 }, { "grad_norm": 0.16532692313194275, "learning_rate": 2.5673599863191468e-05, "loss": 0.0045, "step": 40720 }, { "grad_norm": 0.11395798623561859, "learning_rate": 2.564952724793306e-05, "loss": 0.0028, "step": 40730 }, { "grad_norm": 0.16589069366455078, "learning_rate": 2.5625462029705306e-05, "loss": 0.0042, "step": 40740 }, { "grad_norm": 0.12468626350164413, "learning_rate": 2.5601404215818624e-05, "loss": 0.003, "step": 40750 }, { "grad_norm": 0.15399008989334106, "learning_rate": 2.5577353813581144e-05, "loss": 0.0033, "step": 40760 }, { "grad_norm": 0.10989420115947723, "learning_rate": 2.5553310830298733e-05, "loss": 0.0021, "step": 40770 }, { "grad_norm": 0.1566045880317688, "learning_rate": 2.5529275273275012e-05, "loss": 0.0034, "step": 40780 }, { "grad_norm": 0.15581819415092468, "learning_rate": 2.550524714981133e-05, "loss": 0.0038, "step": 40790 }, { "grad_norm": 0.23047573864459991, "learning_rate": 2.5481226467206837e-05, "loss": 0.0047, "step": 40800 }, { "grad_norm": 0.18920201063156128, "learning_rate": 2.5457213232758365e-05, "loss": 0.0039, "step": 40810 }, { "grad_norm": 0.14982692897319794, "learning_rate": 2.5433207453760498e-05, "loss": 0.0034, "step": 40820 }, { "grad_norm": 0.17577974498271942, "learning_rate": 2.5409209137505552e-05, "loss": 0.0031, "step": 40830 }, { "grad_norm": 0.12767860293388367, "learning_rate": 2.5385218291283597e-05, "loss": 0.0031, "step": 40840 }, { "grad_norm": 0.23887500166893005, "learning_rate": 2.5361234922382383e-05, "loss": 0.0025, "step": 40850 }, { "grad_norm": 0.13887599110603333, "learning_rate": 2.533725903808749e-05, "loss": 0.0023, "step": 40860 }, { "grad_norm": 0.124891497194767, "learning_rate": 2.5313290645682085e-05, "loss": 0.0028, "step": 40870 }, { "grad_norm": 0.15588347613811493, "learning_rate": 2.52893297524472e-05, "loss": 0.0035, "step": 40880 }, { "grad_norm": 0.20391803979873657, "learning_rate": 2.526537636566145e-05, "loss": 0.0036, "step": 40890 }, { "grad_norm": 0.11534092575311661, "learning_rate": 2.5241430492601305e-05, "loss": 0.0042, "step": 40900 }, { "grad_norm": 0.13671579957008362, "learning_rate": 2.5217492140540867e-05, "loss": 0.0042, "step": 40910 }, { "grad_norm": 0.1937016099691391, "learning_rate": 2.5193561316751967e-05, "loss": 0.0027, "step": 40920 }, { "grad_norm": 0.14325913786888123, "learning_rate": 2.516963802850416e-05, "loss": 0.003, "step": 40930 }, { "grad_norm": 0.13932280242443085, "learning_rate": 2.5145722283064698e-05, "loss": 0.0035, "step": 40940 }, { "grad_norm": 0.11823240667581558, "learning_rate": 2.5121814087698602e-05, "loss": 0.0037, "step": 40950 }, { "grad_norm": 0.12812559306621552, "learning_rate": 2.509791344966848e-05, "loss": 0.0028, "step": 40960 }, { "grad_norm": 0.21148385107517242, "learning_rate": 2.5074020376234768e-05, "loss": 0.0051, "step": 40970 }, { "grad_norm": 0.12552213668823242, "learning_rate": 2.5050134874655534e-05, "loss": 0.0027, "step": 40980 }, { "grad_norm": 0.18470603227615356, "learning_rate": 2.5026256952186566e-05, "loss": 0.0044, "step": 40990 }, { "grad_norm": 0.12672986090183258, "learning_rate": 2.5002386616081335e-05, "loss": 0.0037, "step": 41000 }, { "grad_norm": 0.18359945714473724, "learning_rate": 2.497852387359103e-05, "loss": 0.0043, "step": 41010 }, { "grad_norm": 0.17262490093708038, "learning_rate": 2.4954668731964496e-05, "loss": 0.0031, "step": 41020 }, { "grad_norm": 0.14123176038265228, "learning_rate": 2.4930821198448364e-05, "loss": 0.0036, "step": 41030 }, { "grad_norm": 0.16663289070129395, "learning_rate": 2.4906981280286796e-05, "loss": 0.0027, "step": 41040 }, { "grad_norm": 0.11400409042835236, "learning_rate": 2.488314898472179e-05, "loss": 0.0028, "step": 41050 }, { "grad_norm": 0.09916108846664429, "learning_rate": 2.485932431899295e-05, "loss": 0.0023, "step": 41060 }, { "grad_norm": 0.09815013408660889, "learning_rate": 2.4835507290337584e-05, "loss": 0.0029, "step": 41070 }, { "grad_norm": 0.133224755525589, "learning_rate": 2.4811697905990672e-05, "loss": 0.0035, "step": 41080 }, { "grad_norm": 0.1441335529088974, "learning_rate": 2.4787896173184854e-05, "loss": 0.0034, "step": 41090 }, { "grad_norm": 0.14974042773246765, "learning_rate": 2.4764102099150534e-05, "loss": 0.0033, "step": 41100 }, { "grad_norm": 0.1326851099729538, "learning_rate": 2.4740315691115644e-05, "loss": 0.0027, "step": 41110 }, { "grad_norm": 0.16287770867347717, "learning_rate": 2.4716536956305918e-05, "loss": 0.0026, "step": 41120 }, { "grad_norm": 0.11091754585504532, "learning_rate": 2.4692765901944697e-05, "loss": 0.0026, "step": 41130 }, { "grad_norm": 0.13137423992156982, "learning_rate": 2.4669002535253e-05, "loss": 0.0027, "step": 41140 }, { "grad_norm": 0.09918172657489777, "learning_rate": 2.46452468634495e-05, "loss": 0.0027, "step": 41150 }, { "grad_norm": 0.13535438477993011, "learning_rate": 2.462149889375055e-05, "loss": 0.0025, "step": 41160 }, { "grad_norm": 0.1631234884262085, "learning_rate": 2.459775863337014e-05, "loss": 0.0033, "step": 41170 }, { "grad_norm": 0.1580214500427246, "learning_rate": 2.4574026089519985e-05, "loss": 0.0031, "step": 41180 }, { "grad_norm": 0.21731801331043243, "learning_rate": 2.4550301269409333e-05, "loss": 0.0045, "step": 41190 }, { "grad_norm": 0.19862671196460724, "learning_rate": 2.4526584180245216e-05, "loss": 0.006, "step": 41200 }, { "grad_norm": 0.18760280311107635, "learning_rate": 2.4502874829232236e-05, "loss": 0.003, "step": 41210 }, { "grad_norm": 0.13663454353809357, "learning_rate": 2.447917322357267e-05, "loss": 0.003, "step": 41220 }, { "grad_norm": 0.11657775193452835, "learning_rate": 2.4455479370466443e-05, "loss": 0.0026, "step": 41230 }, { "grad_norm": 0.15587320923805237, "learning_rate": 2.4431793277111097e-05, "loss": 0.0033, "step": 41240 }, { "grad_norm": 0.136717289686203, "learning_rate": 2.4408114950701905e-05, "loss": 0.0053, "step": 41250 }, { "grad_norm": 0.17365571856498718, "learning_rate": 2.4384444398431634e-05, "loss": 0.0035, "step": 41260 }, { "grad_norm": 0.1508485972881317, "learning_rate": 2.4360781627490837e-05, "loss": 0.005, "step": 41270 }, { "grad_norm": 0.15707406401634216, "learning_rate": 2.433712664506762e-05, "loss": 0.0048, "step": 41280 }, { "grad_norm": 0.11897261440753937, "learning_rate": 2.431347945834774e-05, "loss": 0.0023, "step": 41290 }, { "grad_norm": 0.19674824178218842, "learning_rate": 2.428984007451458e-05, "loss": 0.003, "step": 41300 }, { "grad_norm": 0.1379646211862564, "learning_rate": 2.426620850074917e-05, "loss": 0.0029, "step": 41310 }, { "grad_norm": 0.16234001517295837, "learning_rate": 2.424258474423014e-05, "loss": 0.0037, "step": 41320 }, { "grad_norm": 0.16070736944675446, "learning_rate": 2.421896881213382e-05, "loss": 0.0033, "step": 41330 }, { "grad_norm": 0.1205456554889679, "learning_rate": 2.419536071163402e-05, "loss": 0.0037, "step": 41340 }, { "grad_norm": 0.15602603554725647, "learning_rate": 2.417176044990233e-05, "loss": 0.0035, "step": 41350 }, { "grad_norm": 0.17379288375377655, "learning_rate": 2.4148168034107855e-05, "loss": 0.0034, "step": 41360 }, { "grad_norm": 0.12310892343521118, "learning_rate": 2.4124583471417355e-05, "loss": 0.0028, "step": 41370 }, { "grad_norm": 0.14370737969875336, "learning_rate": 2.41010067689952e-05, "loss": 0.0034, "step": 41380 }, { "grad_norm": 0.12322472780942917, "learning_rate": 2.4077437934003338e-05, "loss": 0.005, "step": 41390 }, { "grad_norm": 0.11277265101671219, "learning_rate": 2.405387697360143e-05, "loss": 0.0024, "step": 41400 }, { "grad_norm": 0.13990013301372528, "learning_rate": 2.4030323894946595e-05, "loss": 0.0042, "step": 41410 }, { "grad_norm": 0.12131306529045105, "learning_rate": 2.40067787051937e-05, "loss": 0.0035, "step": 41420 }, { "grad_norm": 0.14810140430927277, "learning_rate": 2.3983241411495087e-05, "loss": 0.0029, "step": 41430 }, { "grad_norm": 0.16509346663951874, "learning_rate": 2.3959712021000823e-05, "loss": 0.0041, "step": 41440 }, { "grad_norm": 0.1655333787202835, "learning_rate": 2.3936190540858495e-05, "loss": 0.003, "step": 41450 }, { "grad_norm": 0.127410888671875, "learning_rate": 2.39126769782133e-05, "loss": 0.0029, "step": 41460 }, { "grad_norm": 0.15603305399417877, "learning_rate": 2.388917134020805e-05, "loss": 0.0042, "step": 41470 }, { "grad_norm": 0.13136667013168335, "learning_rate": 2.3865673633983128e-05, "loss": 0.0033, "step": 41480 }, { "grad_norm": 0.15359167754650116, "learning_rate": 2.3842183866676492e-05, "loss": 0.0039, "step": 41490 }, { "grad_norm": 0.16049325466156006, "learning_rate": 2.381870204542377e-05, "loss": 0.0033, "step": 41500 }, { "grad_norm": 0.14517591893672943, "learning_rate": 2.379522817735808e-05, "loss": 0.0034, "step": 41510 }, { "grad_norm": 0.09945697337388992, "learning_rate": 2.377176226961018e-05, "loss": 0.0028, "step": 41520 }, { "grad_norm": 0.14102882146835327, "learning_rate": 2.3748304329308384e-05, "loss": 0.0036, "step": 41530 }, { "grad_norm": 0.14894405007362366, "learning_rate": 2.372485436357858e-05, "loss": 0.0038, "step": 41540 }, { "grad_norm": 0.18140621483325958, "learning_rate": 2.3701412379544296e-05, "loss": 0.0037, "step": 41550 }, { "grad_norm": 0.14919565618038177, "learning_rate": 2.367797838432653e-05, "loss": 0.0026, "step": 41560 }, { "grad_norm": 0.14559563994407654, "learning_rate": 2.3654552385043967e-05, "loss": 0.0044, "step": 41570 }, { "grad_norm": 0.15985830128192902, "learning_rate": 2.3631134388812742e-05, "loss": 0.0032, "step": 41580 }, { "grad_norm": 0.13166458904743195, "learning_rate": 2.3607724402746684e-05, "loss": 0.0051, "step": 41590 }, { "grad_norm": 0.13355065882205963, "learning_rate": 2.35843224339571e-05, "loss": 0.0029, "step": 41600 }, { "grad_norm": 0.1258516162633896, "learning_rate": 2.3560928489552897e-05, "loss": 0.0028, "step": 41610 }, { "grad_norm": 0.19604630768299103, "learning_rate": 2.353754257664053e-05, "loss": 0.0051, "step": 41620 }, { "grad_norm": 0.19981656968593597, "learning_rate": 2.3514164702324037e-05, "loss": 0.0027, "step": 41630 }, { "grad_norm": 0.14043785631656647, "learning_rate": 2.3490794873704963e-05, "loss": 0.0027, "step": 41640 }, { "grad_norm": 0.15612654387950897, "learning_rate": 2.3467433097882496e-05, "loss": 0.0021, "step": 41650 }, { "grad_norm": 0.10130954533815384, "learning_rate": 2.34440793819533e-05, "loss": 0.0021, "step": 41660 }, { "grad_norm": 0.15227554738521576, "learning_rate": 2.3420733733011617e-05, "loss": 0.0051, "step": 41670 }, { "grad_norm": 0.17008958756923676, "learning_rate": 2.3397396158149243e-05, "loss": 0.0025, "step": 41680 }, { "grad_norm": 0.16074174642562866, "learning_rate": 2.3374066664455498e-05, "loss": 0.0042, "step": 41690 }, { "grad_norm": 0.1633201241493225, "learning_rate": 2.3350745259017315e-05, "loss": 0.0032, "step": 41700 }, { "grad_norm": 0.13288123905658722, "learning_rate": 2.332743194891906e-05, "loss": 0.0035, "step": 41710 }, { "grad_norm": 0.1581203043460846, "learning_rate": 2.330412674124276e-05, "loss": 0.0036, "step": 41720 }, { "grad_norm": 0.10304956883192062, "learning_rate": 2.328082964306786e-05, "loss": 0.0028, "step": 41730 }, { "grad_norm": 0.1855410635471344, "learning_rate": 2.325754066147145e-05, "loss": 0.0037, "step": 41740 }, { "grad_norm": 0.1351453959941864, "learning_rate": 2.32342598035281e-05, "loss": 0.003, "step": 41750 }, { "grad_norm": 0.14050233364105225, "learning_rate": 2.321098707630991e-05, "loss": 0.0024, "step": 41760 }, { "grad_norm": 0.12904061377048492, "learning_rate": 2.318772248688652e-05, "loss": 0.0061, "step": 41770 }, { "grad_norm": 0.14126089215278625, "learning_rate": 2.3164466042325107e-05, "loss": 0.0032, "step": 41780 }, { "grad_norm": 0.15881861746311188, "learning_rate": 2.3141217749690353e-05, "loss": 0.0038, "step": 41790 }, { "grad_norm": 0.12823353707790375, "learning_rate": 2.3117977616044466e-05, "loss": 0.0031, "step": 41800 }, { "grad_norm": 0.124204620718956, "learning_rate": 2.309474564844722e-05, "loss": 0.0029, "step": 41810 }, { "grad_norm": 0.1603548377752304, "learning_rate": 2.307152185395585e-05, "loss": 0.0032, "step": 41820 }, { "grad_norm": 0.1899862140417099, "learning_rate": 2.3048306239625144e-05, "loss": 0.004, "step": 41830 }, { "grad_norm": 0.10973954200744629, "learning_rate": 2.3025098812507378e-05, "loss": 0.0021, "step": 41840 }, { "grad_norm": 0.13352397084236145, "learning_rate": 2.3001899579652366e-05, "loss": 0.0046, "step": 41850 }, { "grad_norm": 0.1227138563990593, "learning_rate": 2.2978708548107393e-05, "loss": 0.0033, "step": 41860 }, { "grad_norm": 0.11199397593736649, "learning_rate": 2.2955525724917348e-05, "loss": 0.0026, "step": 41870 }, { "grad_norm": 0.23392455279827118, "learning_rate": 2.2932351117124477e-05, "loss": 0.0055, "step": 41880 }, { "grad_norm": 0.16378571093082428, "learning_rate": 2.29091847317687e-05, "loss": 0.0027, "step": 41890 }, { "grad_norm": 0.11797003448009491, "learning_rate": 2.2886026575887277e-05, "loss": 0.0029, "step": 41900 }, { "grad_norm": 0.12120306491851807, "learning_rate": 2.2862876656515094e-05, "loss": 0.0042, "step": 41910 }, { "grad_norm": 0.12483644485473633, "learning_rate": 2.2839734980684464e-05, "loss": 0.0028, "step": 41920 }, { "grad_norm": 0.11894072592258453, "learning_rate": 2.281660155542522e-05, "loss": 0.0029, "step": 41930 }, { "grad_norm": 0.16940362751483917, "learning_rate": 2.279347638776469e-05, "loss": 0.0024, "step": 41940 }, { "grad_norm": 0.16561780869960785, "learning_rate": 2.2770359484727665e-05, "loss": 0.0029, "step": 41950 }, { "grad_norm": 0.13039658963680267, "learning_rate": 2.27472508533365e-05, "loss": 0.0032, "step": 41960 }, { "grad_norm": 0.16566585004329681, "learning_rate": 2.2724150500610948e-05, "loss": 0.0033, "step": 41970 }, { "grad_norm": 0.10417769849300385, "learning_rate": 2.2701058433568302e-05, "loss": 0.0031, "step": 41980 }, { "grad_norm": 0.15752018988132477, "learning_rate": 2.2677974659223318e-05, "loss": 0.0035, "step": 41990 }, { "grad_norm": 0.09713495522737503, "learning_rate": 2.2654899184588235e-05, "loss": 0.0025, "step": 42000 }, { "grad_norm": 0.18814073503017426, "learning_rate": 2.2631832016672756e-05, "loss": 0.0031, "step": 42010 }, { "grad_norm": 0.15588198602199554, "learning_rate": 2.2608773162484127e-05, "loss": 0.0028, "step": 42020 }, { "grad_norm": 0.17948514223098755, "learning_rate": 2.2585722629026958e-05, "loss": 0.0045, "step": 42030 }, { "grad_norm": 0.12258731573820114, "learning_rate": 2.2562680423303457e-05, "loss": 0.0033, "step": 42040 }, { "grad_norm": 0.11625772714614868, "learning_rate": 2.2539646552313165e-05, "loss": 0.0024, "step": 42050 }, { "grad_norm": 0.1443842649459839, "learning_rate": 2.251662102305322e-05, "loss": 0.0043, "step": 42060 }, { "grad_norm": 0.1871681809425354, "learning_rate": 2.2493603842518152e-05, "loss": 0.0029, "step": 42070 }, { "grad_norm": 0.12985116243362427, "learning_rate": 2.2470595017699974e-05, "loss": 0.0029, "step": 42080 }, { "grad_norm": 0.16299666464328766, "learning_rate": 2.244759455558816e-05, "loss": 0.0028, "step": 42090 }, { "grad_norm": 0.13300037384033203, "learning_rate": 2.2424602463169614e-05, "loss": 0.0037, "step": 42100 }, { "grad_norm": 0.1265360414981842, "learning_rate": 2.2401618747428776e-05, "loss": 0.0034, "step": 42110 }, { "grad_norm": 0.10668852925300598, "learning_rate": 2.237864341534747e-05, "loss": 0.0026, "step": 42120 }, { "grad_norm": 0.16422423720359802, "learning_rate": 2.2355676473904998e-05, "loss": 0.0029, "step": 42130 }, { "grad_norm": 0.08642151206731796, "learning_rate": 2.2332717930078108e-05, "loss": 0.0025, "step": 42140 }, { "grad_norm": 0.1471499502658844, "learning_rate": 2.2309767790840992e-05, "loss": 0.0031, "step": 42150 }, { "grad_norm": 0.13878025114536285, "learning_rate": 2.228682606316529e-05, "loss": 0.0035, "step": 42160 }, { "grad_norm": 0.195353701710701, "learning_rate": 2.2263892754020138e-05, "loss": 0.0029, "step": 42170 }, { "grad_norm": 0.13064812123775482, "learning_rate": 2.2240967870372004e-05, "loss": 0.0036, "step": 42180 }, { "grad_norm": 0.15243151783943176, "learning_rate": 2.2218051419184933e-05, "loss": 0.0034, "step": 42190 }, { "grad_norm": 0.19037245213985443, "learning_rate": 2.219514340742026e-05, "loss": 0.0033, "step": 42200 }, { "grad_norm": 0.13021117448806763, "learning_rate": 2.2172243842036898e-05, "loss": 0.0032, "step": 42210 }, { "grad_norm": 0.13808530569076538, "learning_rate": 2.2149352729991107e-05, "loss": 0.0026, "step": 42220 }, { "grad_norm": 0.24775077402591705, "learning_rate": 2.2126470078236605e-05, "loss": 0.0035, "step": 42230 }, { "grad_norm": 0.1521518975496292, "learning_rate": 2.2103595893724533e-05, "loss": 0.0033, "step": 42240 }, { "grad_norm": 0.1505928635597229, "learning_rate": 2.208073018340345e-05, "loss": 0.003, "step": 42250 }, { "grad_norm": 0.14481805264949799, "learning_rate": 2.2057872954219405e-05, "loss": 0.0031, "step": 42260 }, { "grad_norm": 0.1576819270849228, "learning_rate": 2.203502421311575e-05, "loss": 0.0036, "step": 42270 }, { "grad_norm": 0.1533893644809723, "learning_rate": 2.2012183967033388e-05, "loss": 0.0025, "step": 42280 }, { "grad_norm": 0.1618533879518509, "learning_rate": 2.198935222291056e-05, "loss": 0.0026, "step": 42290 }, { "grad_norm": 0.1628190279006958, "learning_rate": 2.1966528987682948e-05, "loss": 0.0042, "step": 42300 }, { "grad_norm": 0.13917112350463867, "learning_rate": 2.194371426828365e-05, "loss": 0.0027, "step": 42310 }, { "grad_norm": 0.17998965084552765, "learning_rate": 2.192090807164317e-05, "loss": 0.0061, "step": 42320 }, { "grad_norm": 0.1781017780303955, "learning_rate": 2.1898110404689422e-05, "loss": 0.0038, "step": 42330 }, { "grad_norm": 0.17894701659679413, "learning_rate": 2.1875321274347776e-05, "loss": 0.0028, "step": 42340 }, { "grad_norm": 0.1310628354549408, "learning_rate": 2.18525406875409e-05, "loss": 0.004, "step": 42350 }, { "grad_norm": 0.10396751016378403, "learning_rate": 2.1829768651188997e-05, "loss": 0.0029, "step": 42360 }, { "grad_norm": 0.12844139337539673, "learning_rate": 2.180700517220958e-05, "loss": 0.004, "step": 42370 }, { "grad_norm": 0.20522047579288483, "learning_rate": 2.1784250257517603e-05, "loss": 0.0055, "step": 42380 }, { "grad_norm": 0.1703515201807022, "learning_rate": 2.1761503914025406e-05, "loss": 0.0044, "step": 42390 }, { "grad_norm": 0.14236022531986237, "learning_rate": 2.1738766148642705e-05, "loss": 0.0024, "step": 42400 }, { "grad_norm": 0.11090123653411865, "learning_rate": 2.1716036968276683e-05, "loss": 0.0031, "step": 42410 }, { "grad_norm": 0.16948281228542328, "learning_rate": 2.1693316379831808e-05, "loss": 0.004, "step": 42420 }, { "grad_norm": 0.15160688757896423, "learning_rate": 2.1670604390210037e-05, "loss": 0.0049, "step": 42430 }, { "grad_norm": 0.11981331557035446, "learning_rate": 2.1647901006310656e-05, "loss": 0.0027, "step": 42440 }, { "grad_norm": 0.1265793740749359, "learning_rate": 2.1625206235030353e-05, "loss": 0.0029, "step": 42450 }, { "grad_norm": 0.12867692112922668, "learning_rate": 2.160252008326321e-05, "loss": 0.0041, "step": 42460 }, { "grad_norm": 0.1542431265115738, "learning_rate": 2.157984255790067e-05, "loss": 0.0038, "step": 42470 }, { "grad_norm": 0.12827377021312714, "learning_rate": 2.1557173665831553e-05, "loss": 0.0041, "step": 42480 }, { "grad_norm": 0.1588735729455948, "learning_rate": 2.153451341394212e-05, "loss": 0.0033, "step": 42490 }, { "grad_norm": 0.17535175383090973, "learning_rate": 2.151186180911589e-05, "loss": 0.0036, "step": 42500 }, { "grad_norm": 0.11486001312732697, "learning_rate": 2.1489218858233877e-05, "loss": 0.003, "step": 42510 }, { "grad_norm": 0.16072852909564972, "learning_rate": 2.1466584568174392e-05, "loss": 0.0052, "step": 42520 }, { "grad_norm": 0.14080722630023956, "learning_rate": 2.1443958945813132e-05, "loss": 0.0034, "step": 42530 }, { "grad_norm": 0.13305938243865967, "learning_rate": 2.1421341998023163e-05, "loss": 0.003, "step": 42540 }, { "grad_norm": 0.13315212726593018, "learning_rate": 2.139873373167491e-05, "loss": 0.0032, "step": 42550 }, { "grad_norm": 0.09000915288925171, "learning_rate": 2.13761341536362e-05, "loss": 0.0021, "step": 42560 }, { "grad_norm": 0.11708277463912964, "learning_rate": 2.1353543270772136e-05, "loss": 0.0025, "step": 42570 }, { "grad_norm": 0.15893222391605377, "learning_rate": 2.1330961089945297e-05, "loss": 0.0036, "step": 42580 }, { "grad_norm": 0.16108164191246033, "learning_rate": 2.130838761801548e-05, "loss": 0.0032, "step": 42590 }, { "grad_norm": 0.16260536015033722, "learning_rate": 2.1285822861839966e-05, "loss": 0.0058, "step": 42600 }, { "grad_norm": 0.1277707815170288, "learning_rate": 2.126326682827331e-05, "loss": 0.0031, "step": 42610 }, { "grad_norm": 0.1873548924922943, "learning_rate": 2.124071952416744e-05, "loss": 0.0031, "step": 42620 }, { "grad_norm": 0.17107369005680084, "learning_rate": 2.1218180956371634e-05, "loss": 0.0045, "step": 42630 }, { "grad_norm": 0.1146121546626091, "learning_rate": 2.119565113173252e-05, "loss": 0.003, "step": 42640 }, { "grad_norm": 0.0860452950000763, "learning_rate": 2.1173130057094033e-05, "loss": 0.0043, "step": 42650 }, { "grad_norm": 0.15265463292598724, "learning_rate": 2.115061773929753e-05, "loss": 0.0022, "step": 42660 }, { "grad_norm": 0.13698548078536987, "learning_rate": 2.1128114185181623e-05, "loss": 0.0027, "step": 42670 }, { "grad_norm": 0.12606237828731537, "learning_rate": 2.1105619401582317e-05, "loss": 0.0032, "step": 42680 }, { "grad_norm": 0.1708395928144455, "learning_rate": 2.1083133395332928e-05, "loss": 0.0038, "step": 42690 }, { "grad_norm": 0.2103361338376999, "learning_rate": 2.1060656173264082e-05, "loss": 0.0044, "step": 42700 }, { "grad_norm": 0.21404999494552612, "learning_rate": 2.103818774220383e-05, "loss": 0.0045, "step": 42710 }, { "grad_norm": 0.16158972680568695, "learning_rate": 2.1015728108977412e-05, "loss": 0.0034, "step": 42720 }, { "grad_norm": 0.16464819014072418, "learning_rate": 2.0993277280407548e-05, "loss": 0.0036, "step": 42730 }, { "grad_norm": 0.1364492028951645, "learning_rate": 2.0970835263314132e-05, "loss": 0.0033, "step": 42740 }, { "grad_norm": 0.11004994809627533, "learning_rate": 2.094840206451451e-05, "loss": 0.0022, "step": 42750 }, { "grad_norm": 0.12882307171821594, "learning_rate": 2.0925977690823273e-05, "loss": 0.0033, "step": 42760 }, { "grad_norm": 0.11047834157943726, "learning_rate": 2.0903562149052364e-05, "loss": 0.0022, "step": 42770 }, { "grad_norm": 0.10292172431945801, "learning_rate": 2.0881155446011025e-05, "loss": 0.0035, "step": 42780 }, { "grad_norm": 0.12612217664718628, "learning_rate": 2.0858757588505823e-05, "loss": 0.0029, "step": 42790 }, { "grad_norm": 0.21856506168842316, "learning_rate": 2.0836368583340622e-05, "loss": 0.0032, "step": 42800 }, { "grad_norm": 0.13722337782382965, "learning_rate": 2.081398843731664e-05, "loss": 0.0022, "step": 42810 }, { "grad_norm": 0.14211338758468628, "learning_rate": 2.0791617157232357e-05, "loss": 0.0056, "step": 42820 }, { "grad_norm": 0.17739957571029663, "learning_rate": 2.0769254749883576e-05, "loss": 0.0031, "step": 42830 }, { "grad_norm": 0.11499431729316711, "learning_rate": 2.0746901222063415e-05, "loss": 0.0037, "step": 42840 }, { "grad_norm": 0.1767963021993637, "learning_rate": 2.072455658056226e-05, "loss": 0.003, "step": 42850 }, { "grad_norm": 0.19275100529193878, "learning_rate": 2.0702220832167873e-05, "loss": 0.0034, "step": 42860 }, { "grad_norm": 0.13615760207176208, "learning_rate": 2.0679893983665205e-05, "loss": 0.0022, "step": 42870 }, { "grad_norm": 0.12432663887739182, "learning_rate": 2.0657576041836622e-05, "loss": 0.0051, "step": 42880 }, { "grad_norm": 0.1212100014090538, "learning_rate": 2.0635267013461666e-05, "loss": 0.0025, "step": 42890 }, { "grad_norm": 0.13655124604701996, "learning_rate": 2.061296690531728e-05, "loss": 0.0032, "step": 42900 }, { "grad_norm": 0.12499711662530899, "learning_rate": 2.0590675724177622e-05, "loss": 0.0026, "step": 42910 }, { "grad_norm": 0.14739903807640076, "learning_rate": 2.0568393476814167e-05, "loss": 0.0026, "step": 42920 }, { "grad_norm": 0.14726291596889496, "learning_rate": 2.0546120169995685e-05, "loss": 0.0036, "step": 42930 }, { "grad_norm": 0.16694466769695282, "learning_rate": 2.0523855810488214e-05, "loss": 0.0024, "step": 42940 }, { "grad_norm": 0.12402988970279694, "learning_rate": 2.050160040505505e-05, "loss": 0.0023, "step": 42950 }, { "grad_norm": 0.14468993246555328, "learning_rate": 2.0479353960456843e-05, "loss": 0.0029, "step": 42960 }, { "grad_norm": 0.12729193270206451, "learning_rate": 2.0457116483451456e-05, "loss": 0.0026, "step": 42970 }, { "grad_norm": 0.15954506397247314, "learning_rate": 2.0434887980794043e-05, "loss": 0.0039, "step": 42980 }, { "grad_norm": 0.14968369901180267, "learning_rate": 2.0412668459237043e-05, "loss": 0.0023, "step": 42990 }, { "grad_norm": 0.16360320150852203, "learning_rate": 2.039045792553016e-05, "loss": 0.0029, "step": 43000 }, { "grad_norm": 0.138072207570076, "learning_rate": 2.036825638642036e-05, "loss": 0.0029, "step": 43010 }, { "grad_norm": 0.14269419014453888, "learning_rate": 2.0346063848651868e-05, "loss": 0.0026, "step": 43020 }, { "grad_norm": 0.12009543180465698, "learning_rate": 2.0323880318966254e-05, "loss": 0.0024, "step": 43030 }, { "grad_norm": 0.15685294568538666, "learning_rate": 2.030170580410221e-05, "loss": 0.0031, "step": 43040 }, { "grad_norm": 0.11948579549789429, "learning_rate": 2.0279540310795837e-05, "loss": 0.0032, "step": 43050 }, { "grad_norm": 0.13224561512470245, "learning_rate": 2.0257383845780365e-05, "loss": 0.0031, "step": 43060 }, { "grad_norm": 0.19799253344535828, "learning_rate": 2.0235236415786384e-05, "loss": 0.0051, "step": 43070 }, { "grad_norm": 0.15876567363739014, "learning_rate": 2.021309802754169e-05, "loss": 0.0033, "step": 43080 }, { "grad_norm": 0.15996171534061432, "learning_rate": 2.0190968687771332e-05, "loss": 0.004, "step": 43090 }, { "grad_norm": 0.13225407898426056, "learning_rate": 2.016884840319763e-05, "loss": 0.0025, "step": 43100 }, { "grad_norm": 0.10967284440994263, "learning_rate": 2.0146737180540122e-05, "loss": 0.0035, "step": 43110 }, { "grad_norm": 0.1429014503955841, "learning_rate": 2.012463502651564e-05, "loss": 0.0037, "step": 43120 }, { "grad_norm": 0.142154723405838, "learning_rate": 2.0102541947838228e-05, "loss": 0.0041, "step": 43130 }, { "grad_norm": 0.12910042703151703, "learning_rate": 2.0080457951219173e-05, "loss": 0.002, "step": 43140 }, { "grad_norm": 0.14672163128852844, "learning_rate": 2.0058383043367017e-05, "loss": 0.0033, "step": 43150 }, { "grad_norm": 0.10366083681583405, "learning_rate": 2.0036317230987528e-05, "loss": 0.0025, "step": 43160 }, { "grad_norm": 0.1546521633863449, "learning_rate": 2.0014260520783696e-05, "loss": 0.0034, "step": 43170 }, { "grad_norm": 0.1403999626636505, "learning_rate": 1.9992212919455834e-05, "loss": 0.0034, "step": 43180 }, { "grad_norm": 0.1530219465494156, "learning_rate": 1.9970174433701333e-05, "loss": 0.0032, "step": 43190 }, { "grad_norm": 0.15878364443778992, "learning_rate": 1.9948145070214992e-05, "loss": 0.0028, "step": 43200 }, { "grad_norm": 0.1135379821062088, "learning_rate": 1.9926124835688663e-05, "loss": 0.0033, "step": 43210 }, { "grad_norm": 0.1723741739988327, "learning_rate": 1.9904113736811576e-05, "loss": 0.0049, "step": 43220 }, { "grad_norm": 0.13433456420898438, "learning_rate": 1.9882111780270096e-05, "loss": 0.0058, "step": 43230 }, { "grad_norm": 0.1358075588941574, "learning_rate": 1.986011897274784e-05, "loss": 0.0036, "step": 43240 }, { "grad_norm": 0.11093565821647644, "learning_rate": 1.983813532092565e-05, "loss": 0.0033, "step": 43250 }, { "grad_norm": 0.1462140679359436, "learning_rate": 1.981616083148155e-05, "loss": 0.0031, "step": 43260 }, { "grad_norm": 0.17562538385391235, "learning_rate": 1.9794195511090845e-05, "loss": 0.0025, "step": 43270 }, { "grad_norm": 0.1119430661201477, "learning_rate": 1.977223936642601e-05, "loss": 0.0027, "step": 43280 }, { "grad_norm": 0.1822449415922165, "learning_rate": 1.975029240415674e-05, "loss": 0.0032, "step": 43290 }, { "grad_norm": 0.10740764439105988, "learning_rate": 1.9728354630949936e-05, "loss": 0.0028, "step": 43300 }, { "grad_norm": 0.12288554012775421, "learning_rate": 1.9706426053469716e-05, "loss": 0.0024, "step": 43310 }, { "grad_norm": 0.1317206770181656, "learning_rate": 1.9684506678377396e-05, "loss": 0.0029, "step": 43320 }, { "grad_norm": 0.19511902332305908, "learning_rate": 1.9662596512331544e-05, "loss": 0.0049, "step": 43330 }, { "grad_norm": 0.2005932778120041, "learning_rate": 1.964069556198782e-05, "loss": 0.0029, "step": 43340 }, { "grad_norm": 0.16023850440979004, "learning_rate": 1.9618803833999232e-05, "loss": 0.0038, "step": 43350 }, { "grad_norm": 0.1335248202085495, "learning_rate": 1.9596921335015838e-05, "loss": 0.0034, "step": 43360 }, { "grad_norm": 0.13381657004356384, "learning_rate": 1.957504807168501e-05, "loss": 0.0039, "step": 43370 }, { "grad_norm": 0.11346202343702316, "learning_rate": 1.9553184050651253e-05, "loss": 0.0033, "step": 43380 }, { "grad_norm": 0.14977198839187622, "learning_rate": 1.953132927855628e-05, "loss": 0.0038, "step": 43390 }, { "grad_norm": 0.13065074384212494, "learning_rate": 1.9509483762038995e-05, "loss": 0.0034, "step": 43400 }, { "grad_norm": 0.1366412341594696, "learning_rate": 1.9487647507735467e-05, "loss": 0.0035, "step": 43410 }, { "grad_norm": 0.19153328239917755, "learning_rate": 1.9465820522279032e-05, "loss": 0.0061, "step": 43420 }, { "grad_norm": 0.1296260505914688, "learning_rate": 1.9444002812300078e-05, "loss": 0.0044, "step": 43430 }, { "grad_norm": 0.15517450869083405, "learning_rate": 1.94221943844263e-05, "loss": 0.0033, "step": 43440 }, { "grad_norm": 0.10487982630729675, "learning_rate": 1.9400395245282515e-05, "loss": 0.0025, "step": 43450 }, { "grad_norm": 0.20621170103549957, "learning_rate": 1.937860540149071e-05, "loss": 0.0027, "step": 43460 }, { "grad_norm": 0.10109329223632812, "learning_rate": 1.9356824859670082e-05, "loss": 0.0027, "step": 43470 }, { "grad_norm": 0.1692003309726715, "learning_rate": 1.9335053626436967e-05, "loss": 0.0033, "step": 43480 }, { "grad_norm": 0.1333615928888321, "learning_rate": 1.9313291708404885e-05, "loss": 0.0034, "step": 43490 }, { "grad_norm": 0.15854498744010925, "learning_rate": 1.9291539112184587e-05, "loss": 0.0029, "step": 43500 }, { "grad_norm": 0.14657294750213623, "learning_rate": 1.9269795844383854e-05, "loss": 0.002, "step": 43510 }, { "grad_norm": 0.19784286618232727, "learning_rate": 1.9248061911607777e-05, "loss": 0.003, "step": 43520 }, { "grad_norm": 0.16682776808738708, "learning_rate": 1.9226337320458538e-05, "loss": 0.003, "step": 43530 }, { "grad_norm": 0.1091584786772728, "learning_rate": 1.9204622077535488e-05, "loss": 0.0027, "step": 43540 }, { "grad_norm": 0.08509393781423569, "learning_rate": 1.9182916189435147e-05, "loss": 0.0028, "step": 43550 }, { "grad_norm": 0.12800520658493042, "learning_rate": 1.916121966275117e-05, "loss": 0.0028, "step": 43560 }, { "grad_norm": 0.16889889538288116, "learning_rate": 1.9139532504074443e-05, "loss": 0.0031, "step": 43570 }, { "grad_norm": 0.14365145564079285, "learning_rate": 1.9117854719992885e-05, "loss": 0.003, "step": 43580 }, { "grad_norm": 0.12228386849164963, "learning_rate": 1.9096186317091687e-05, "loss": 0.0025, "step": 43590 }, { "grad_norm": 0.14174194633960724, "learning_rate": 1.9074527301953116e-05, "loss": 0.0034, "step": 43600 }, { "grad_norm": 0.14558447897434235, "learning_rate": 1.9052877681156607e-05, "loss": 0.0031, "step": 43610 }, { "grad_norm": 0.16939060389995575, "learning_rate": 1.903123746127875e-05, "loss": 0.0029, "step": 43620 }, { "grad_norm": 0.1154969111084938, "learning_rate": 1.900960664889327e-05, "loss": 0.0023, "step": 43630 }, { "grad_norm": 0.11383353173732758, "learning_rate": 1.8987985250571015e-05, "loss": 0.0037, "step": 43640 }, { "grad_norm": 0.12167482078075409, "learning_rate": 1.8966373272880054e-05, "loss": 0.0037, "step": 43650 }, { "grad_norm": 0.11626416444778442, "learning_rate": 1.8944770722385462e-05, "loss": 0.0027, "step": 43660 }, { "grad_norm": 0.11196047812700272, "learning_rate": 1.8923177605649576e-05, "loss": 0.0025, "step": 43670 }, { "grad_norm": 0.129922553896904, "learning_rate": 1.8901593929231802e-05, "loss": 0.0036, "step": 43680 }, { "grad_norm": 0.11380775272846222, "learning_rate": 1.8880019699688684e-05, "loss": 0.0036, "step": 43690 }, { "grad_norm": 0.16569480299949646, "learning_rate": 1.8858454923573904e-05, "loss": 0.0037, "step": 43700 }, { "grad_norm": 0.15776319801807404, "learning_rate": 1.8836899607438253e-05, "loss": 0.0034, "step": 43710 }, { "grad_norm": 0.20473143458366394, "learning_rate": 1.8815353757829723e-05, "loss": 0.0044, "step": 43720 }, { "grad_norm": 0.09540721029043198, "learning_rate": 1.879381738129331e-05, "loss": 0.0022, "step": 43730 }, { "grad_norm": 0.1435556560754776, "learning_rate": 1.8772290484371236e-05, "loss": 0.0029, "step": 43740 }, { "grad_norm": 0.1187094897031784, "learning_rate": 1.8750773073602795e-05, "loss": 0.0029, "step": 43750 }, { "grad_norm": 0.16787496209144592, "learning_rate": 1.8729265155524405e-05, "loss": 0.0025, "step": 43760 }, { "grad_norm": 0.12616588175296783, "learning_rate": 1.8707766736669607e-05, "loss": 0.0021, "step": 43770 }, { "grad_norm": 0.14102788269519806, "learning_rate": 1.8686277823569055e-05, "loss": 0.0032, "step": 43780 }, { "grad_norm": 0.1212286725640297, "learning_rate": 1.8664798422750484e-05, "loss": 0.0033, "step": 43790 }, { "grad_norm": 0.1220666915178299, "learning_rate": 1.8643328540738832e-05, "loss": 0.005, "step": 43800 }, { "grad_norm": 0.11089291423559189, "learning_rate": 1.862186818405601e-05, "loss": 0.0018, "step": 43810 }, { "grad_norm": 0.1053742915391922, "learning_rate": 1.8600417359221156e-05, "loss": 0.0037, "step": 43820 }, { "grad_norm": 0.109604611992836, "learning_rate": 1.8578976072750454e-05, "loss": 0.0033, "step": 43830 }, { "grad_norm": 0.18026070296764374, "learning_rate": 1.8557544331157194e-05, "loss": 0.0026, "step": 43840 }, { "grad_norm": 0.21883097290992737, "learning_rate": 1.8536122140951785e-05, "loss": 0.0039, "step": 43850 }, { "grad_norm": 0.15479585528373718, "learning_rate": 1.8514709508641688e-05, "loss": 0.0044, "step": 43860 }, { "grad_norm": 0.16708962619304657, "learning_rate": 1.8493306440731555e-05, "loss": 0.004, "step": 43870 }, { "grad_norm": 0.15324416756629944, "learning_rate": 1.8471912943723013e-05, "loss": 0.0037, "step": 43880 }, { "grad_norm": 0.09840118885040283, "learning_rate": 1.8450529024114894e-05, "loss": 0.0021, "step": 43890 }, { "grad_norm": 0.10716372728347778, "learning_rate": 1.842915468840301e-05, "loss": 0.0032, "step": 43900 }, { "grad_norm": 0.12303082644939423, "learning_rate": 1.840778994308037e-05, "loss": 0.0033, "step": 43910 }, { "grad_norm": 0.13007058203220367, "learning_rate": 1.8386434794637004e-05, "loss": 0.0023, "step": 43920 }, { "grad_norm": 0.15132826566696167, "learning_rate": 1.8365089249560034e-05, "loss": 0.0023, "step": 43930 }, { "grad_norm": 0.09762345999479294, "learning_rate": 1.8343753314333683e-05, "loss": 0.0041, "step": 43940 }, { "grad_norm": 0.19953911006450653, "learning_rate": 1.8322426995439236e-05, "loss": 0.0048, "step": 43950 }, { "grad_norm": 0.10680358111858368, "learning_rate": 1.8301110299355058e-05, "loss": 0.0029, "step": 43960 }, { "grad_norm": 0.12017536908388138, "learning_rate": 1.8279803232556625e-05, "loss": 0.002, "step": 43970 }, { "grad_norm": 0.12636029720306396, "learning_rate": 1.8258505801516444e-05, "loss": 0.0036, "step": 43980 }, { "grad_norm": 0.1203877180814743, "learning_rate": 1.8237218012704117e-05, "loss": 0.0034, "step": 43990 }, { "grad_norm": 0.18636785447597504, "learning_rate": 1.821593987258631e-05, "loss": 0.0027, "step": 44000 }, { "grad_norm": 0.15933936834335327, "learning_rate": 1.8194671387626744e-05, "loss": 0.0025, "step": 44010 }, { "grad_norm": 0.14290201663970947, "learning_rate": 1.8173412564286276e-05, "loss": 0.0028, "step": 44020 }, { "grad_norm": 0.11299658566713333, "learning_rate": 1.8152163409022697e-05, "loss": 0.0026, "step": 44030 }, { "grad_norm": 0.10744449496269226, "learning_rate": 1.8130923928291023e-05, "loss": 0.0024, "step": 44040 }, { "grad_norm": 0.12385797500610352, "learning_rate": 1.8109694128543163e-05, "loss": 0.0019, "step": 44050 }, { "grad_norm": 0.1592237651348114, "learning_rate": 1.8088474016228237e-05, "loss": 0.0032, "step": 44060 }, { "grad_norm": 0.14553458988666534, "learning_rate": 1.8067263597792328e-05, "loss": 0.0032, "step": 44070 }, { "grad_norm": 0.15482890605926514, "learning_rate": 1.80460628796786e-05, "loss": 0.0027, "step": 44080 }, { "grad_norm": 0.09575201570987701, "learning_rate": 1.8024871868327276e-05, "loss": 0.0019, "step": 44090 }, { "grad_norm": 0.11266981065273285, "learning_rate": 1.8003690570175608e-05, "loss": 0.0025, "step": 44100 }, { "grad_norm": 0.1318572610616684, "learning_rate": 1.7982518991657943e-05, "loss": 0.0029, "step": 44110 }, { "grad_norm": 0.11195012927055359, "learning_rate": 1.7961357139205643e-05, "loss": 0.0027, "step": 44120 }, { "grad_norm": 0.09446470439434052, "learning_rate": 1.7940205019247108e-05, "loss": 0.0026, "step": 44130 }, { "grad_norm": 0.1283102184534073, "learning_rate": 1.79190626382078e-05, "loss": 0.003, "step": 44140 }, { "grad_norm": 0.14767558872699738, "learning_rate": 1.7897930002510215e-05, "loss": 0.0026, "step": 44150 }, { "grad_norm": 0.12476933747529984, "learning_rate": 1.787680711857387e-05, "loss": 0.0024, "step": 44160 }, { "grad_norm": 0.1404978185892105, "learning_rate": 1.7855693992815398e-05, "loss": 0.0038, "step": 44170 }, { "grad_norm": 0.1445080041885376, "learning_rate": 1.7834590631648328e-05, "loss": 0.0023, "step": 44180 }, { "grad_norm": 0.13157252967357635, "learning_rate": 1.7813497041483384e-05, "loss": 0.0023, "step": 44190 }, { "grad_norm": 0.1158943697810173, "learning_rate": 1.779241322872817e-05, "loss": 0.0037, "step": 44200 }, { "grad_norm": 0.09384501725435257, "learning_rate": 1.777133919978744e-05, "loss": 0.0024, "step": 44210 }, { "grad_norm": 0.09926478564739227, "learning_rate": 1.7750274961062912e-05, "loss": 0.0023, "step": 44220 }, { "grad_norm": 0.13296474516391754, "learning_rate": 1.772922051895335e-05, "loss": 0.0043, "step": 44230 }, { "grad_norm": 0.16705164313316345, "learning_rate": 1.770817587985453e-05, "loss": 0.0022, "step": 44240 }, { "grad_norm": 0.1257438212633133, "learning_rate": 1.7687141050159246e-05, "loss": 0.0033, "step": 44250 }, { "grad_norm": 0.1263522207736969, "learning_rate": 1.7666116036257375e-05, "loss": 0.0033, "step": 44260 }, { "grad_norm": 0.13838563859462738, "learning_rate": 1.764510084453569e-05, "loss": 0.0034, "step": 44270 }, { "grad_norm": 0.11271598190069199, "learning_rate": 1.76240954813781e-05, "loss": 0.0038, "step": 44280 }, { "grad_norm": 0.10200243443250656, "learning_rate": 1.7603099953165476e-05, "loss": 0.0037, "step": 44290 }, { "grad_norm": 0.1016218364238739, "learning_rate": 1.7582114266275683e-05, "loss": 0.0023, "step": 44300 }, { "grad_norm": 0.10926016420125961, "learning_rate": 1.756113842708364e-05, "loss": 0.0025, "step": 44310 }, { "grad_norm": 0.19211642444133759, "learning_rate": 1.7540172441961245e-05, "loss": 0.0039, "step": 44320 }, { "grad_norm": 0.17272014915943146, "learning_rate": 1.7519216317277387e-05, "loss": 0.0038, "step": 44330 }, { "grad_norm": 0.13535098731517792, "learning_rate": 1.7498270059398046e-05, "loss": 0.003, "step": 44340 }, { "grad_norm": 0.17224624752998352, "learning_rate": 1.7477333674686062e-05, "loss": 0.0031, "step": 44350 }, { "grad_norm": 0.10299841314554214, "learning_rate": 1.745640716950142e-05, "loss": 0.0026, "step": 44360 }, { "grad_norm": 0.07292947918176651, "learning_rate": 1.7435490550201017e-05, "loss": 0.0018, "step": 44370 }, { "grad_norm": 0.12707625329494476, "learning_rate": 1.7414583823138762e-05, "loss": 0.0031, "step": 44380 }, { "grad_norm": 0.18536223471164703, "learning_rate": 1.739368699466558e-05, "loss": 0.005, "step": 44390 }, { "grad_norm": 0.1214776486158371, "learning_rate": 1.737280007112935e-05, "loss": 0.002, "step": 44400 }, { "grad_norm": 0.14398516714572906, "learning_rate": 1.735192305887502e-05, "loss": 0.0033, "step": 44410 }, { "grad_norm": 0.1109752506017685, "learning_rate": 1.733105596424441e-05, "loss": 0.0025, "step": 44420 }, { "grad_norm": 0.1155310571193695, "learning_rate": 1.7310198793576437e-05, "loss": 0.0031, "step": 44430 }, { "grad_norm": 0.11506570130586624, "learning_rate": 1.7289351553206952e-05, "loss": 0.0033, "step": 44440 }, { "grad_norm": 0.10263778269290924, "learning_rate": 1.7268514249468788e-05, "loss": 0.0032, "step": 44450 }, { "grad_norm": 0.12946584820747375, "learning_rate": 1.7247686888691765e-05, "loss": 0.0027, "step": 44460 }, { "grad_norm": 0.12770754098892212, "learning_rate": 1.7226869477202694e-05, "loss": 0.004, "step": 44470 }, { "grad_norm": 0.16242554783821106, "learning_rate": 1.7206062021325336e-05, "loss": 0.0036, "step": 44480 }, { "grad_norm": 0.09878756105899811, "learning_rate": 1.7185264527380502e-05, "loss": 0.0018, "step": 44490 }, { "grad_norm": 0.18068929016590118, "learning_rate": 1.716447700168584e-05, "loss": 0.0026, "step": 44500 }, { "grad_norm": 0.11795713007450104, "learning_rate": 1.714369945055611e-05, "loss": 0.0022, "step": 44510 }, { "grad_norm": 0.12547971308231354, "learning_rate": 1.7122931880302968e-05, "loss": 0.0022, "step": 44520 }, { "grad_norm": 0.10362352430820465, "learning_rate": 1.710217429723505e-05, "loss": 0.0022, "step": 44530 }, { "grad_norm": 0.12436658889055252, "learning_rate": 1.7081426707657972e-05, "loss": 0.0029, "step": 44540 }, { "grad_norm": 0.09812696278095245, "learning_rate": 1.7060689117874275e-05, "loss": 0.0025, "step": 44550 }, { "grad_norm": 0.11503946036100388, "learning_rate": 1.703996153418354e-05, "loss": 0.0027, "step": 44560 }, { "grad_norm": 0.13398377597332, "learning_rate": 1.7019243962882205e-05, "loss": 0.0035, "step": 44570 }, { "grad_norm": 0.17513889074325562, "learning_rate": 1.6998536410263754e-05, "loss": 0.0041, "step": 44580 }, { "grad_norm": 0.15084140002727509, "learning_rate": 1.6977838882618596e-05, "loss": 0.0035, "step": 44590 }, { "grad_norm": 0.11744046211242676, "learning_rate": 1.6957151386234088e-05, "loss": 0.0029, "step": 44600 }, { "grad_norm": 0.14456751942634583, "learning_rate": 1.6936473927394536e-05, "loss": 0.0034, "step": 44610 }, { "grad_norm": 0.15840619802474976, "learning_rate": 1.6915806512381222e-05, "loss": 0.0036, "step": 44620 }, { "grad_norm": 0.1667514145374298, "learning_rate": 1.6895149147472344e-05, "loss": 0.0029, "step": 44630 }, { "grad_norm": 0.14782719314098358, "learning_rate": 1.6874501838943073e-05, "loss": 0.003, "step": 44640 }, { "grad_norm": 0.1551159769296646, "learning_rate": 1.6853864593065506e-05, "loss": 0.0039, "step": 44650 }, { "grad_norm": 0.11422882974147797, "learning_rate": 1.683323741610871e-05, "loss": 0.0028, "step": 44660 }, { "grad_norm": 0.120932936668396, "learning_rate": 1.6812620314338674e-05, "loss": 0.0029, "step": 44670 }, { "grad_norm": 0.15635259449481964, "learning_rate": 1.6792013294018326e-05, "loss": 0.0035, "step": 44680 }, { "grad_norm": 0.12792742252349854, "learning_rate": 1.6771416361407526e-05, "loss": 0.0028, "step": 44690 }, { "grad_norm": 0.12818844616413116, "learning_rate": 1.675082952276308e-05, "loss": 0.0033, "step": 44700 }, { "grad_norm": 0.13887102901935577, "learning_rate": 1.6730252784338757e-05, "loss": 0.0031, "step": 44710 }, { "grad_norm": 0.1204068586230278, "learning_rate": 1.6709686152385166e-05, "loss": 0.0028, "step": 44720 }, { "grad_norm": 0.1587284654378891, "learning_rate": 1.668912963314998e-05, "loss": 0.0025, "step": 44730 }, { "grad_norm": 0.10827206075191498, "learning_rate": 1.6668583232877653e-05, "loss": 0.0036, "step": 44740 }, { "grad_norm": 0.16987235844135284, "learning_rate": 1.6648046957809698e-05, "loss": 0.0034, "step": 44750 }, { "grad_norm": 0.10383376479148865, "learning_rate": 1.6627520814184462e-05, "loss": 0.0023, "step": 44760 }, { "grad_norm": 0.10195975005626678, "learning_rate": 1.660700480823726e-05, "loss": 0.0023, "step": 44770 }, { "grad_norm": 0.15911747515201569, "learning_rate": 1.65864989462003e-05, "loss": 0.004, "step": 44780 }, { "grad_norm": 0.12350042164325714, "learning_rate": 1.656600323430273e-05, "loss": 0.0021, "step": 44790 }, { "grad_norm": 0.10617683082818985, "learning_rate": 1.654551767877059e-05, "loss": 0.0033, "step": 44800 }, { "grad_norm": 0.11906342208385468, "learning_rate": 1.6525042285826874e-05, "loss": 0.002, "step": 44810 }, { "grad_norm": 0.09882324188947678, "learning_rate": 1.6504577061691468e-05, "loss": 0.0035, "step": 44820 }, { "grad_norm": 0.1142059937119484, "learning_rate": 1.6484122012581143e-05, "loss": 0.0056, "step": 44830 }, { "grad_norm": 0.10681496560573578, "learning_rate": 1.6463677144709623e-05, "loss": 0.0035, "step": 44840 }, { "grad_norm": 0.1290084272623062, "learning_rate": 1.6443242464287493e-05, "loss": 0.0025, "step": 44850 }, { "grad_norm": 0.1337740272283554, "learning_rate": 1.642281797752232e-05, "loss": 0.0032, "step": 44860 }, { "grad_norm": 0.11962473392486572, "learning_rate": 1.6402403690618456e-05, "loss": 0.0032, "step": 44870 }, { "grad_norm": 0.15387757122516632, "learning_rate": 1.6381999609777295e-05, "loss": 0.0028, "step": 44880 }, { "grad_norm": 0.10399586707353592, "learning_rate": 1.6361605741196983e-05, "loss": 0.002, "step": 44890 }, { "grad_norm": 0.1211291179060936, "learning_rate": 1.63412220910727e-05, "loss": 0.0029, "step": 44900 }, { "grad_norm": 0.1669653207063675, "learning_rate": 1.6320848665596433e-05, "loss": 0.003, "step": 44910 }, { "grad_norm": 0.09013701975345612, "learning_rate": 1.6300485470957095e-05, "loss": 0.0026, "step": 44920 }, { "grad_norm": 0.15025624632835388, "learning_rate": 1.6280132513340483e-05, "loss": 0.0064, "step": 44930 }, { "grad_norm": 0.141093909740448, "learning_rate": 1.62597897989293e-05, "loss": 0.0029, "step": 44940 }, { "grad_norm": 0.15742972493171692, "learning_rate": 1.623945733390309e-05, "loss": 0.0026, "step": 44950 }, { "grad_norm": 0.1692814975976944, "learning_rate": 1.6219135124438374e-05, "loss": 0.0025, "step": 44960 }, { "grad_norm": 0.09857185184955597, "learning_rate": 1.6198823176708465e-05, "loss": 0.0019, "step": 44970 }, { "grad_norm": 0.14189870655536652, "learning_rate": 1.6178521496883613e-05, "loss": 0.0028, "step": 44980 }, { "grad_norm": 0.1352986991405487, "learning_rate": 1.6158230091130926e-05, "loss": 0.0052, "step": 44990 }, { "grad_norm": 0.13977892696857452, "learning_rate": 1.613794896561438e-05, "loss": 0.0024, "step": 45000 }, { "grad_norm": 0.12037627398967743, "learning_rate": 1.6117678126494894e-05, "loss": 0.0027, "step": 45010 }, { "grad_norm": 0.11894038319587708, "learning_rate": 1.6097417579930153e-05, "loss": 0.003, "step": 45020 }, { "grad_norm": 0.11856943368911743, "learning_rate": 1.6077167332074834e-05, "loss": 0.0031, "step": 45030 }, { "grad_norm": 0.1582639366388321, "learning_rate": 1.605692738908037e-05, "loss": 0.0033, "step": 45040 }, { "grad_norm": 0.1551557034254074, "learning_rate": 1.6036697757095176e-05, "loss": 0.0039, "step": 45050 }, { "grad_norm": 0.1763678640127182, "learning_rate": 1.6016478442264428e-05, "loss": 0.0033, "step": 45060 }, { "grad_norm": 0.11375752091407776, "learning_rate": 1.599626945073026e-05, "loss": 0.0023, "step": 45070 }, { "grad_norm": 0.14403843879699707, "learning_rate": 1.597607078863162e-05, "loss": 0.0031, "step": 45080 }, { "grad_norm": 0.13409040868282318, "learning_rate": 1.595588246210432e-05, "loss": 0.002, "step": 45090 }, { "grad_norm": 0.12490037828683853, "learning_rate": 1.5935704477281048e-05, "loss": 0.0029, "step": 45100 }, { "grad_norm": 0.11826036125421524, "learning_rate": 1.5915536840291323e-05, "loss": 0.0035, "step": 45110 }, { "grad_norm": 0.11063234508037567, "learning_rate": 1.5895379557261576e-05, "loss": 0.0033, "step": 45120 }, { "grad_norm": 0.1400458812713623, "learning_rate": 1.5875232634315033e-05, "loss": 0.0026, "step": 45130 }, { "grad_norm": 0.1644551157951355, "learning_rate": 1.5855096077571812e-05, "loss": 0.0038, "step": 45140 }, { "grad_norm": 0.14464645087718964, "learning_rate": 1.5834969893148855e-05, "loss": 0.0031, "step": 45150 }, { "grad_norm": 0.14953793585300446, "learning_rate": 1.581485408715997e-05, "loss": 0.0033, "step": 45160 }, { "grad_norm": 0.16871076822280884, "learning_rate": 1.5794748665715785e-05, "loss": 0.0031, "step": 45170 }, { "grad_norm": 0.11060309410095215, "learning_rate": 1.5774653634923857e-05, "loss": 0.0042, "step": 45180 }, { "grad_norm": 0.11836708337068558, "learning_rate": 1.575456900088845e-05, "loss": 0.0026, "step": 45190 }, { "grad_norm": 0.15805479884147644, "learning_rate": 1.5734494769710816e-05, "loss": 0.0041, "step": 45200 }, { "grad_norm": 0.1307588368654251, "learning_rate": 1.5714430947488912e-05, "loss": 0.0025, "step": 45210 }, { "grad_norm": 0.14632175862789154, "learning_rate": 1.5694377540317645e-05, "loss": 0.002, "step": 45220 }, { "grad_norm": 0.11879955977201462, "learning_rate": 1.5674334554288694e-05, "loss": 0.0024, "step": 45230 }, { "grad_norm": 0.1097344234585762, "learning_rate": 1.5654301995490582e-05, "loss": 0.003, "step": 45240 }, { "grad_norm": 0.13567927479743958, "learning_rate": 1.5634279870008685e-05, "loss": 0.0049, "step": 45250 }, { "grad_norm": 0.1696065068244934, "learning_rate": 1.5614268183925174e-05, "loss": 0.0029, "step": 45260 }, { "grad_norm": 0.142481729388237, "learning_rate": 1.5594266943319097e-05, "loss": 0.0034, "step": 45270 }, { "grad_norm": 0.12296462804079056, "learning_rate": 1.5574276154266294e-05, "loss": 0.0032, "step": 45280 }, { "grad_norm": 0.13821595907211304, "learning_rate": 1.5554295822839437e-05, "loss": 0.0032, "step": 45290 }, { "grad_norm": 0.18884176015853882, "learning_rate": 1.5534325955108025e-05, "loss": 0.0037, "step": 45300 }, { "grad_norm": 0.1347174197435379, "learning_rate": 1.5514366557138373e-05, "loss": 0.0022, "step": 45310 }, { "grad_norm": 0.1188732236623764, "learning_rate": 1.5494417634993602e-05, "loss": 0.0022, "step": 45320 }, { "grad_norm": 0.12445495277643204, "learning_rate": 1.547447919473372e-05, "loss": 0.0026, "step": 45330 }, { "grad_norm": 0.12494923919439316, "learning_rate": 1.5454551242415434e-05, "loss": 0.0026, "step": 45340 }, { "grad_norm": 0.14090991020202637, "learning_rate": 1.543463378409239e-05, "loss": 0.0024, "step": 45350 }, { "grad_norm": 0.11229303479194641, "learning_rate": 1.541472682581493e-05, "loss": 0.0025, "step": 45360 }, { "grad_norm": 0.12597060203552246, "learning_rate": 1.5394830373630298e-05, "loss": 0.0033, "step": 45370 }, { "grad_norm": 0.19662624597549438, "learning_rate": 1.5374944433582506e-05, "loss": 0.003, "step": 45380 }, { "grad_norm": 0.17440171539783478, "learning_rate": 1.5355069011712375e-05, "loss": 0.0031, "step": 45390 }, { "grad_norm": 0.18217696249485016, "learning_rate": 1.5335204114057526e-05, "loss": 0.0047, "step": 45400 }, { "grad_norm": 0.13746672868728638, "learning_rate": 1.5315349746652387e-05, "loss": 0.0028, "step": 45410 }, { "grad_norm": 0.16245613992214203, "learning_rate": 1.5295505915528212e-05, "loss": 0.0022, "step": 45420 }, { "grad_norm": 0.08981003612279892, "learning_rate": 1.5275672626713024e-05, "loss": 0.002, "step": 45430 }, { "grad_norm": 0.13700507581233978, "learning_rate": 1.5255849886231643e-05, "loss": 0.0028, "step": 45440 }, { "grad_norm": 0.12492302060127258, "learning_rate": 1.523603770010571e-05, "loss": 0.0047, "step": 45450 }, { "grad_norm": 0.13143570721149445, "learning_rate": 1.521623607435363e-05, "loss": 0.0022, "step": 45460 }, { "grad_norm": 0.12899400293827057, "learning_rate": 1.5196445014990612e-05, "loss": 0.0032, "step": 45470 }, { "grad_norm": 0.08515872806310654, "learning_rate": 1.5176664528028672e-05, "loss": 0.0027, "step": 45480 }, { "grad_norm": 0.11894240975379944, "learning_rate": 1.5156894619476574e-05, "loss": 0.0029, "step": 45490 }, { "grad_norm": 0.1269761621952057, "learning_rate": 1.5137135295339938e-05, "loss": 0.0022, "step": 45500 }, { "grad_norm": 0.15310552716255188, "learning_rate": 1.5117386561621073e-05, "loss": 0.0026, "step": 45510 }, { "grad_norm": 0.17156606912612915, "learning_rate": 1.5097648424319167e-05, "loss": 0.004, "step": 45520 }, { "grad_norm": 0.12597014009952545, "learning_rate": 1.5077920889430119e-05, "loss": 0.0024, "step": 45530 }, { "grad_norm": 0.09204896539449692, "learning_rate": 1.5058203962946644e-05, "loss": 0.0029, "step": 45540 }, { "grad_norm": 0.09744611382484436, "learning_rate": 1.503849765085822e-05, "loss": 0.0021, "step": 45550 }, { "grad_norm": 0.14112231135368347, "learning_rate": 1.501880195915109e-05, "loss": 0.0029, "step": 45560 }, { "grad_norm": 0.09753841161727905, "learning_rate": 1.499911689380833e-05, "loss": 0.0032, "step": 45570 }, { "grad_norm": 0.12400601804256439, "learning_rate": 1.4979442460809683e-05, "loss": 0.0026, "step": 45580 }, { "grad_norm": 0.09993378818035126, "learning_rate": 1.4959778666131763e-05, "loss": 0.0033, "step": 45590 }, { "grad_norm": 0.12758682668209076, "learning_rate": 1.4940125515747905e-05, "loss": 0.0048, "step": 45600 }, { "grad_norm": 0.09667088836431503, "learning_rate": 1.4920483015628211e-05, "loss": 0.0028, "step": 45610 }, { "grad_norm": 0.103914774954319, "learning_rate": 1.490085117173956e-05, "loss": 0.0027, "step": 45620 }, { "grad_norm": 0.08405863493680954, "learning_rate": 1.488122999004558e-05, "loss": 0.0018, "step": 45630 }, { "grad_norm": 0.13954687118530273, "learning_rate": 1.486161947650666e-05, "loss": 0.0037, "step": 45640 }, { "grad_norm": 0.11454354226589203, "learning_rate": 1.4842019637079995e-05, "loss": 0.0024, "step": 45650 }, { "grad_norm": 0.12007882446050644, "learning_rate": 1.482243047771944e-05, "loss": 0.002, "step": 45660 }, { "grad_norm": 0.0748705193400383, "learning_rate": 1.4802852004375712e-05, "loss": 0.0023, "step": 45670 }, { "grad_norm": 0.06864310055971146, "learning_rate": 1.4783284222996218e-05, "loss": 0.0028, "step": 45680 }, { "grad_norm": 0.16428780555725098, "learning_rate": 1.4763727139525135e-05, "loss": 0.0031, "step": 45690 }, { "grad_norm": 0.1233481839299202, "learning_rate": 1.4744180759903392e-05, "loss": 0.0022, "step": 45700 }, { "grad_norm": 0.13660785555839539, "learning_rate": 1.4724645090068635e-05, "loss": 0.0038, "step": 45710 }, { "grad_norm": 0.2045556902885437, "learning_rate": 1.4705120135955341e-05, "loss": 0.0024, "step": 45720 }, { "grad_norm": 0.1555570662021637, "learning_rate": 1.4685605903494614e-05, "loss": 0.0043, "step": 45730 }, { "grad_norm": 0.11550207436084747, "learning_rate": 1.46661023986144e-05, "loss": 0.0031, "step": 45740 }, { "grad_norm": 0.13337044417858124, "learning_rate": 1.4646609627239344e-05, "loss": 0.0049, "step": 45750 }, { "grad_norm": 0.17174091935157776, "learning_rate": 1.4627127595290835e-05, "loss": 0.0032, "step": 45760 }, { "grad_norm": 0.17372578382492065, "learning_rate": 1.460765630868699e-05, "loss": 0.004, "step": 45770 }, { "grad_norm": 0.1773354560136795, "learning_rate": 1.4588195773342678e-05, "loss": 0.0038, "step": 45780 }, { "grad_norm": 0.14584480226039886, "learning_rate": 1.4568745995169485e-05, "loss": 0.003, "step": 45790 }, { "grad_norm": 0.18555361032485962, "learning_rate": 1.4549306980075778e-05, "loss": 0.0056, "step": 45800 }, { "grad_norm": 0.1179942637681961, "learning_rate": 1.4529878733966557e-05, "loss": 0.0024, "step": 45810 }, { "grad_norm": 0.12227556109428406, "learning_rate": 1.4510461262743658e-05, "loss": 0.0025, "step": 45820 }, { "grad_norm": 0.10669393837451935, "learning_rate": 1.4491054572305585e-05, "loss": 0.0028, "step": 45830 }, { "grad_norm": 0.1617264449596405, "learning_rate": 1.4471658668547566e-05, "loss": 0.0034, "step": 45840 }, { "grad_norm": 0.1140698790550232, "learning_rate": 1.4452273557361579e-05, "loss": 0.0034, "step": 45850 }, { "grad_norm": 0.17291259765625, "learning_rate": 1.4432899244636282e-05, "loss": 0.003, "step": 45860 }, { "grad_norm": 0.1903328001499176, "learning_rate": 1.4413535736257134e-05, "loss": 0.0044, "step": 45870 }, { "grad_norm": 0.23108699917793274, "learning_rate": 1.439418303810619e-05, "loss": 0.0036, "step": 45880 }, { "grad_norm": 0.17733709514141083, "learning_rate": 1.4374841156062352e-05, "loss": 0.0025, "step": 45890 }, { "grad_norm": 0.10132022202014923, "learning_rate": 1.4355510096001112e-05, "loss": 0.0023, "step": 45900 }, { "grad_norm": 0.09983125329017639, "learning_rate": 1.4336189863794786e-05, "loss": 0.0026, "step": 45910 }, { "grad_norm": 0.141797736287117, "learning_rate": 1.4316880465312327e-05, "loss": 0.0036, "step": 45920 }, { "grad_norm": 0.10112389177083969, "learning_rate": 1.4297581906419426e-05, "loss": 0.0023, "step": 45930 }, { "grad_norm": 0.13054825365543365, "learning_rate": 1.4278294192978475e-05, "loss": 0.002, "step": 45940 }, { "grad_norm": 0.16906709969043732, "learning_rate": 1.4259017330848574e-05, "loss": 0.0022, "step": 45950 }, { "grad_norm": 0.12141149491071701, "learning_rate": 1.4239751325885498e-05, "loss": 0.0024, "step": 45960 }, { "grad_norm": 0.09307458996772766, "learning_rate": 1.4220496183941795e-05, "loss": 0.0027, "step": 45970 }, { "grad_norm": 0.1287001073360443, "learning_rate": 1.4201251910866648e-05, "loss": 0.0023, "step": 45980 }, { "grad_norm": 0.1382073163986206, "learning_rate": 1.4182018512505957e-05, "loss": 0.0027, "step": 45990 }, { "grad_norm": 0.14326943457126617, "learning_rate": 1.4162795994702327e-05, "loss": 0.0041, "step": 46000 }, { "grad_norm": 0.1298130750656128, "learning_rate": 1.4143584363295032e-05, "loss": 0.0026, "step": 46010 }, { "grad_norm": 0.10548557341098785, "learning_rate": 1.4124383624120101e-05, "loss": 0.0023, "step": 46020 }, { "grad_norm": 0.10755957663059235, "learning_rate": 1.4105193783010151e-05, "loss": 0.0025, "step": 46030 }, { "grad_norm": 0.15985740721225739, "learning_rate": 1.4086014845794621e-05, "loss": 0.0033, "step": 46040 }, { "grad_norm": 0.13729777932167053, "learning_rate": 1.4066846818299489e-05, "loss": 0.0041, "step": 46050 }, { "grad_norm": 0.13783517479896545, "learning_rate": 1.4047689706347555e-05, "loss": 0.0037, "step": 46060 }, { "grad_norm": 0.14581967890262604, "learning_rate": 1.402854351575822e-05, "loss": 0.003, "step": 46070 }, { "grad_norm": 0.12558075785636902, "learning_rate": 1.4009408252347588e-05, "loss": 0.0035, "step": 46080 }, { "grad_norm": 0.14792941510677338, "learning_rate": 1.399028392192846e-05, "loss": 0.0023, "step": 46090 }, { "grad_norm": 0.09443023055791855, "learning_rate": 1.397117053031029e-05, "loss": 0.0024, "step": 46100 }, { "grad_norm": 0.11221737414598465, "learning_rate": 1.3952068083299213e-05, "loss": 0.0027, "step": 46110 }, { "grad_norm": 0.1332656890153885, "learning_rate": 1.3932976586698082e-05, "loss": 0.0036, "step": 46120 }, { "grad_norm": 0.1910703182220459, "learning_rate": 1.3913896046306363e-05, "loss": 0.0036, "step": 46130 }, { "grad_norm": 0.1155548170208931, "learning_rate": 1.389482646792023e-05, "loss": 0.003, "step": 46140 }, { "grad_norm": 0.10850763320922852, "learning_rate": 1.387576785733251e-05, "loss": 0.0021, "step": 46150 }, { "grad_norm": 0.09422829747200012, "learning_rate": 1.3856720220332703e-05, "loss": 0.0024, "step": 46160 }, { "grad_norm": 0.1184690073132515, "learning_rate": 1.383768356270701e-05, "loss": 0.0025, "step": 46170 }, { "grad_norm": 0.11434689909219742, "learning_rate": 1.3818657890238207e-05, "loss": 0.0022, "step": 46180 }, { "grad_norm": 0.14526799321174622, "learning_rate": 1.3799643208705859e-05, "loss": 0.003, "step": 46190 }, { "grad_norm": 0.14587469398975372, "learning_rate": 1.3780639523886058e-05, "loss": 0.003, "step": 46200 }, { "grad_norm": 0.10978133231401443, "learning_rate": 1.3761646841551668e-05, "loss": 0.0025, "step": 46210 }, { "grad_norm": 0.06338368356227875, "learning_rate": 1.3742665167472146e-05, "loss": 0.0018, "step": 46220 }, { "grad_norm": 0.13494592905044556, "learning_rate": 1.372369450741363e-05, "loss": 0.0028, "step": 46230 }, { "grad_norm": 0.1185169517993927, "learning_rate": 1.3704734867138901e-05, "loss": 0.0021, "step": 46240 }, { "grad_norm": 0.08597885817289352, "learning_rate": 1.36857862524074e-05, "loss": 0.0021, "step": 46250 }, { "grad_norm": 0.09294528514146805, "learning_rate": 1.3666848668975213e-05, "loss": 0.0019, "step": 46260 }, { "grad_norm": 0.10184340924024582, "learning_rate": 1.3647922122595063e-05, "loss": 0.002, "step": 46270 }, { "grad_norm": 0.10174739360809326, "learning_rate": 1.3629006619016366e-05, "loss": 0.002, "step": 46280 }, { "grad_norm": 0.11188867688179016, "learning_rate": 1.3610102163985139e-05, "loss": 0.0021, "step": 46290 }, { "grad_norm": 0.12490741908550262, "learning_rate": 1.3591208763244057e-05, "loss": 0.0023, "step": 46300 }, { "grad_norm": 0.143723726272583, "learning_rate": 1.3572326422532428e-05, "loss": 0.0029, "step": 46310 }, { "grad_norm": 0.1327531784772873, "learning_rate": 1.355345514758622e-05, "loss": 0.003, "step": 46320 }, { "grad_norm": 0.1411413550376892, "learning_rate": 1.3534594944138007e-05, "loss": 0.0028, "step": 46330 }, { "grad_norm": 0.15518443286418915, "learning_rate": 1.3515745817917069e-05, "loss": 0.0041, "step": 46340 }, { "grad_norm": 0.09662064164876938, "learning_rate": 1.3496907774649208e-05, "loss": 0.0042, "step": 46350 }, { "grad_norm": 0.1721542775630951, "learning_rate": 1.3478080820056987e-05, "loss": 0.003, "step": 46360 }, { "grad_norm": 0.11976107209920883, "learning_rate": 1.3459264959859474e-05, "loss": 0.0033, "step": 46370 }, { "grad_norm": 0.19108441472053528, "learning_rate": 1.3440460199772487e-05, "loss": 0.0024, "step": 46380 }, { "grad_norm": 0.12142565846443176, "learning_rate": 1.3421666545508382e-05, "loss": 0.0034, "step": 46390 }, { "grad_norm": 0.1365564912557602, "learning_rate": 1.3402884002776194e-05, "loss": 0.0022, "step": 46400 }, { "grad_norm": 0.100091852247715, "learning_rate": 1.3384112577281555e-05, "loss": 0.0037, "step": 46410 }, { "grad_norm": 0.1566309630870819, "learning_rate": 1.3365352274726711e-05, "loss": 0.003, "step": 46420 }, { "grad_norm": 0.1333821415901184, "learning_rate": 1.3346603100810578e-05, "loss": 0.0032, "step": 46430 }, { "grad_norm": 0.10575415194034576, "learning_rate": 1.3327865061228645e-05, "loss": 0.0029, "step": 46440 }, { "grad_norm": 0.14837437868118286, "learning_rate": 1.330913816167304e-05, "loss": 0.0025, "step": 46450 }, { "grad_norm": 0.12326712161302567, "learning_rate": 1.3290422407832492e-05, "loss": 0.0032, "step": 46460 }, { "grad_norm": 0.10345402359962463, "learning_rate": 1.3271717805392354e-05, "loss": 0.0027, "step": 46470 }, { "grad_norm": 0.12002383917570114, "learning_rate": 1.3253024360034582e-05, "loss": 0.004, "step": 46480 }, { "grad_norm": 0.09263564646244049, "learning_rate": 1.323434207743779e-05, "loss": 0.0025, "step": 46490 }, { "grad_norm": 0.10615521669387817, "learning_rate": 1.3215670963277105e-05, "loss": 0.0018, "step": 46500 }, { "grad_norm": 0.15661312639713287, "learning_rate": 1.3197011023224376e-05, "loss": 0.0035, "step": 46510 }, { "grad_norm": 0.1299227774143219, "learning_rate": 1.3178362262947941e-05, "loss": 0.0021, "step": 46520 }, { "grad_norm": 0.14572419226169586, "learning_rate": 1.3159724688112845e-05, "loss": 0.0028, "step": 46530 }, { "grad_norm": 0.13466009497642517, "learning_rate": 1.3141098304380683e-05, "loss": 0.002, "step": 46540 }, { "grad_norm": 0.1126943975687027, "learning_rate": 1.3122483117409651e-05, "loss": 0.0029, "step": 46550 }, { "grad_norm": 0.14645519852638245, "learning_rate": 1.3103879132854552e-05, "loss": 0.0022, "step": 46560 }, { "grad_norm": 0.10317640006542206, "learning_rate": 1.3085286356366771e-05, "loss": 0.0026, "step": 46570 }, { "grad_norm": 0.1310623437166214, "learning_rate": 1.3066704793594337e-05, "loss": 0.0029, "step": 46580 }, { "grad_norm": 0.12525010108947754, "learning_rate": 1.3048134450181816e-05, "loss": 0.0022, "step": 46590 }, { "grad_norm": 0.126299187541008, "learning_rate": 1.3029575331770394e-05, "loss": 0.0022, "step": 46600 }, { "grad_norm": 0.07357867062091827, "learning_rate": 1.3011027443997837e-05, "loss": 0.0026, "step": 46610 }, { "grad_norm": 0.10681796818971634, "learning_rate": 1.2992490792498507e-05, "loss": 0.0034, "step": 46620 }, { "grad_norm": 0.16927707195281982, "learning_rate": 1.297396538290333e-05, "loss": 0.0028, "step": 46630 }, { "grad_norm": 0.10481976717710495, "learning_rate": 1.2955451220839888e-05, "loss": 0.0018, "step": 46640 }, { "grad_norm": 0.12929436564445496, "learning_rate": 1.2936948311932223e-05, "loss": 0.0025, "step": 46650 }, { "grad_norm": 0.08394632488489151, "learning_rate": 1.2918456661801104e-05, "loss": 0.0023, "step": 46660 }, { "grad_norm": 0.11800488084554672, "learning_rate": 1.2899976276063736e-05, "loss": 0.0025, "step": 46670 }, { "grad_norm": 0.10719099640846252, "learning_rate": 1.2881507160334022e-05, "loss": 0.0033, "step": 46680 }, { "grad_norm": 0.1297837644815445, "learning_rate": 1.286304932022238e-05, "loss": 0.0032, "step": 46690 }, { "grad_norm": 0.14928150177001953, "learning_rate": 1.2844602761335806e-05, "loss": 0.0048, "step": 46700 }, { "grad_norm": 0.10335986316204071, "learning_rate": 1.2826167489277885e-05, "loss": 0.0023, "step": 46710 }, { "grad_norm": 0.12226234376430511, "learning_rate": 1.2807743509648745e-05, "loss": 0.0029, "step": 46720 }, { "grad_norm": 0.1272212266921997, "learning_rate": 1.2789330828045149e-05, "loss": 0.0035, "step": 46730 }, { "grad_norm": 0.1568411886692047, "learning_rate": 1.2770929450060332e-05, "loss": 0.0024, "step": 46740 }, { "grad_norm": 0.12614105641841888, "learning_rate": 1.2752539381284184e-05, "loss": 0.0032, "step": 46750 }, { "grad_norm": 0.13119417428970337, "learning_rate": 1.273416062730311e-05, "loss": 0.0023, "step": 46760 }, { "grad_norm": 0.12374354153871536, "learning_rate": 1.2715793193700088e-05, "loss": 0.0025, "step": 46770 }, { "grad_norm": 0.08981849253177643, "learning_rate": 1.2697437086054664e-05, "loss": 0.0021, "step": 46780 }, { "grad_norm": 0.10945316404104233, "learning_rate": 1.2679092309942937e-05, "loss": 0.0027, "step": 46790 }, { "grad_norm": 0.21132531762123108, "learning_rate": 1.266075887093755e-05, "loss": 0.0033, "step": 46800 }, { "grad_norm": 0.11576346307992935, "learning_rate": 1.2642436774607757e-05, "loss": 0.0023, "step": 46810 }, { "grad_norm": 0.15515871345996857, "learning_rate": 1.2624126026519278e-05, "loss": 0.0023, "step": 46820 }, { "grad_norm": 0.09275900572538376, "learning_rate": 1.2605826632234474e-05, "loss": 0.0031, "step": 46830 }, { "grad_norm": 0.11182555556297302, "learning_rate": 1.2587538597312198e-05, "loss": 0.0019, "step": 46840 }, { "grad_norm": 0.08751679956912994, "learning_rate": 1.2569261927307884e-05, "loss": 0.0018, "step": 46850 }, { "grad_norm": 0.12337523698806763, "learning_rate": 1.2550996627773493e-05, "loss": 0.0027, "step": 46860 }, { "grad_norm": 0.07998523116111755, "learning_rate": 1.2532742704257527e-05, "loss": 0.0016, "step": 46870 }, { "grad_norm": 0.1518530547618866, "learning_rate": 1.2514500162305087e-05, "loss": 0.0041, "step": 46880 }, { "grad_norm": 0.08548180758953094, "learning_rate": 1.2496269007457728e-05, "loss": 0.0016, "step": 46890 }, { "grad_norm": 0.12792423367500305, "learning_rate": 1.2478049245253625e-05, "loss": 0.002, "step": 46900 }, { "grad_norm": 0.11787933856248856, "learning_rate": 1.2459840881227459e-05, "loss": 0.0022, "step": 46910 }, { "grad_norm": 0.0767868235707283, "learning_rate": 1.2441643920910435e-05, "loss": 0.0017, "step": 46920 }, { "grad_norm": 0.07796741276979446, "learning_rate": 1.2423458369830322e-05, "loss": 0.0028, "step": 46930 }, { "grad_norm": 0.11237072199583054, "learning_rate": 1.2405284233511406e-05, "loss": 0.0027, "step": 46940 }, { "grad_norm": 0.1674765795469284, "learning_rate": 1.2387121517474487e-05, "loss": 0.0032, "step": 46950 }, { "grad_norm": 0.13526573777198792, "learning_rate": 1.2368970227236975e-05, "loss": 0.0028, "step": 46960 }, { "grad_norm": 0.09588257968425751, "learning_rate": 1.2350830368312688e-05, "loss": 0.0025, "step": 46970 }, { "grad_norm": 0.16969755291938782, "learning_rate": 1.2332701946212083e-05, "loss": 0.004, "step": 46980 }, { "grad_norm": 0.1334334909915924, "learning_rate": 1.2314584966442077e-05, "loss": 0.002, "step": 46990 }, { "grad_norm": 0.10175280272960663, "learning_rate": 1.2296479434506136e-05, "loss": 0.0016, "step": 47000 }, { "grad_norm": 0.11347930878400803, "learning_rate": 1.2278385355904232e-05, "loss": 0.0025, "step": 47010 }, { "grad_norm": 0.11240120232105255, "learning_rate": 1.2260302736132867e-05, "loss": 0.0022, "step": 47020 }, { "grad_norm": 0.12349376827478409, "learning_rate": 1.2242231580685098e-05, "loss": 0.0025, "step": 47030 }, { "grad_norm": 0.19221635162830353, "learning_rate": 1.2224171895050413e-05, "loss": 0.004, "step": 47040 }, { "grad_norm": 0.1625974476337433, "learning_rate": 1.2206123684714903e-05, "loss": 0.0026, "step": 47050 }, { "grad_norm": 0.13469119369983673, "learning_rate": 1.2188086955161132e-05, "loss": 0.0029, "step": 47060 }, { "grad_norm": 0.08854442089796066, "learning_rate": 1.2170061711868175e-05, "loss": 0.0029, "step": 47070 }, { "grad_norm": 0.08915482461452484, "learning_rate": 1.215204796031163e-05, "loss": 0.002, "step": 47080 }, { "grad_norm": 0.1259669065475464, "learning_rate": 1.2134045705963599e-05, "loss": 0.0024, "step": 47090 }, { "grad_norm": 0.10597129911184311, "learning_rate": 1.2116054954292689e-05, "loss": 0.0034, "step": 47100 }, { "grad_norm": 0.1195705458521843, "learning_rate": 1.2098075710764011e-05, "loss": 0.0027, "step": 47110 }, { "grad_norm": 0.1130211278796196, "learning_rate": 1.2080107980839183e-05, "loss": 0.0023, "step": 47120 }, { "grad_norm": 0.1250617951154709, "learning_rate": 1.2062151769976343e-05, "loss": 0.0025, "step": 47130 }, { "grad_norm": 0.1296345293521881, "learning_rate": 1.204420708363011e-05, "loss": 0.0024, "step": 47140 }, { "grad_norm": 0.1487611085176468, "learning_rate": 1.2026273927251597e-05, "loss": 0.003, "step": 47150 }, { "grad_norm": 0.1340097188949585, "learning_rate": 1.2008352306288424e-05, "loss": 0.003, "step": 47160 }, { "grad_norm": 0.12724849581718445, "learning_rate": 1.1990442226184695e-05, "loss": 0.0018, "step": 47170 }, { "grad_norm": 0.14960534870624542, "learning_rate": 1.1972543692381066e-05, "loss": 0.0033, "step": 47180 }, { "grad_norm": 0.12741266191005707, "learning_rate": 1.1954656710314576e-05, "loss": 0.0024, "step": 47190 }, { "grad_norm": 0.10716425627470016, "learning_rate": 1.1936781285418875e-05, "loss": 0.0045, "step": 47200 }, { "grad_norm": 0.1180964782834053, "learning_rate": 1.1918917423123993e-05, "loss": 0.003, "step": 47210 }, { "grad_norm": 0.1549995392560959, "learning_rate": 1.1901065128856537e-05, "loss": 0.0031, "step": 47220 }, { "grad_norm": 0.1273275911808014, "learning_rate": 1.1883224408039551e-05, "loss": 0.0023, "step": 47230 }, { "grad_norm": 0.08809608221054077, "learning_rate": 1.1865395266092578e-05, "loss": 0.0029, "step": 47240 }, { "grad_norm": 0.12624306976795197, "learning_rate": 1.1847577708431633e-05, "loss": 0.0025, "step": 47250 }, { "grad_norm": 0.10553889721632004, "learning_rate": 1.1829771740469225e-05, "loss": 0.0024, "step": 47260 }, { "grad_norm": 0.12091819196939468, "learning_rate": 1.1811977367614324e-05, "loss": 0.0038, "step": 47270 }, { "grad_norm": 0.116727814078331, "learning_rate": 1.1794194595272412e-05, "loss": 0.0024, "step": 47280 }, { "grad_norm": 0.1217162162065506, "learning_rate": 1.1776423428845423e-05, "loss": 0.0031, "step": 47290 }, { "grad_norm": 0.09838945418596268, "learning_rate": 1.1758663873731756e-05, "loss": 0.0016, "step": 47300 }, { "grad_norm": 0.08561576157808304, "learning_rate": 1.1740915935326302e-05, "loss": 0.0022, "step": 47310 }, { "grad_norm": 0.09892681241035461, "learning_rate": 1.1723179619020396e-05, "loss": 0.0027, "step": 47320 }, { "grad_norm": 0.10146355628967285, "learning_rate": 1.1705454930201914e-05, "loss": 0.0018, "step": 47330 }, { "grad_norm": 0.13942992687225342, "learning_rate": 1.1687741874255087e-05, "loss": 0.0022, "step": 47340 }, { "grad_norm": 0.1388404220342636, "learning_rate": 1.1670040456560728e-05, "loss": 0.004, "step": 47350 }, { "grad_norm": 0.10307937860488892, "learning_rate": 1.1652350682496005e-05, "loss": 0.003, "step": 47360 }, { "grad_norm": 0.12538784742355347, "learning_rate": 1.163467255743465e-05, "loss": 0.0023, "step": 47370 }, { "grad_norm": 0.1075529232621193, "learning_rate": 1.1617006086746796e-05, "loss": 0.0027, "step": 47380 }, { "grad_norm": 0.10925360023975372, "learning_rate": 1.1599351275799047e-05, "loss": 0.0027, "step": 47390 }, { "grad_norm": 0.15213201940059662, "learning_rate": 1.1581708129954466e-05, "loss": 0.0022, "step": 47400 }, { "grad_norm": 0.22908484935760498, "learning_rate": 1.1564076654572587e-05, "loss": 0.0032, "step": 47410 }, { "grad_norm": 0.12508384883403778, "learning_rate": 1.1546456855009358e-05, "loss": 0.0027, "step": 47420 }, { "grad_norm": 0.12114439904689789, "learning_rate": 1.1528848736617248e-05, "loss": 0.0025, "step": 47430 }, { "grad_norm": 0.1250842660665512, "learning_rate": 1.1511252304745112e-05, "loss": 0.0024, "step": 47440 }, { "grad_norm": 0.11026407033205032, "learning_rate": 1.1493667564738297e-05, "loss": 0.0024, "step": 47450 }, { "grad_norm": 0.16509422659873962, "learning_rate": 1.1476094521938574e-05, "loss": 0.0038, "step": 47460 }, { "grad_norm": 0.11542849242687225, "learning_rate": 1.1458533181684167e-05, "loss": 0.002, "step": 47470 }, { "grad_norm": 0.142963707447052, "learning_rate": 1.1440983549309753e-05, "loss": 0.0032, "step": 47480 }, { "grad_norm": 0.10162800550460815, "learning_rate": 1.1423445630146434e-05, "loss": 0.0023, "step": 47490 }, { "grad_norm": 0.11569596081972122, "learning_rate": 1.1405919429521799e-05, "loss": 0.0024, "step": 47500 }, { "grad_norm": 0.12443464249372482, "learning_rate": 1.1388404952759802e-05, "loss": 0.0024, "step": 47510 }, { "grad_norm": 0.08241724967956543, "learning_rate": 1.1370902205180923e-05, "loss": 0.0018, "step": 47520 }, { "grad_norm": 0.09481204301118851, "learning_rate": 1.1353411192101987e-05, "loss": 0.0019, "step": 47530 }, { "grad_norm": 0.1298399120569229, "learning_rate": 1.133593191883634e-05, "loss": 0.0028, "step": 47540 }, { "grad_norm": 0.11450059711933136, "learning_rate": 1.1318464390693711e-05, "loss": 0.0022, "step": 47550 }, { "grad_norm": 0.1073218435049057, "learning_rate": 1.1301008612980257e-05, "loss": 0.0028, "step": 47560 }, { "grad_norm": 0.10504373162984848, "learning_rate": 1.128356459099863e-05, "loss": 0.0025, "step": 47570 }, { "grad_norm": 0.135113924741745, "learning_rate": 1.1266132330047802e-05, "loss": 0.0033, "step": 47580 }, { "grad_norm": 0.12440724670886993, "learning_rate": 1.1248711835423281e-05, "loss": 0.0024, "step": 47590 }, { "grad_norm": 0.08071115612983704, "learning_rate": 1.123130311241693e-05, "loss": 0.0015, "step": 47600 }, { "grad_norm": 0.09911885112524033, "learning_rate": 1.1213906166317068e-05, "loss": 0.0049, "step": 47610 }, { "grad_norm": 0.0713583305478096, "learning_rate": 1.1196521002408427e-05, "loss": 0.0039, "step": 47620 }, { "grad_norm": 0.08284076303243637, "learning_rate": 1.1179147625972159e-05, "loss": 0.0018, "step": 47630 }, { "grad_norm": 0.08442492038011551, "learning_rate": 1.1161786042285822e-05, "loss": 0.0022, "step": 47640 }, { "grad_norm": 0.10332383215427399, "learning_rate": 1.1144436256623447e-05, "loss": 0.0029, "step": 47650 }, { "grad_norm": 0.08446500450372696, "learning_rate": 1.1127098274255392e-05, "loss": 0.0019, "step": 47660 }, { "grad_norm": 0.11181280761957169, "learning_rate": 1.1109772100448512e-05, "loss": 0.0022, "step": 47670 }, { "grad_norm": 0.0828646644949913, "learning_rate": 1.1092457740466033e-05, "loss": 0.0021, "step": 47680 }, { "grad_norm": 0.1020873636007309, "learning_rate": 1.10751551995676e-05, "loss": 0.0021, "step": 47690 }, { "grad_norm": 0.09450958669185638, "learning_rate": 1.1057864483009262e-05, "loss": 0.0018, "step": 47700 }, { "grad_norm": 0.16156481206417084, "learning_rate": 1.1040585596043473e-05, "loss": 0.0025, "step": 47710 }, { "grad_norm": 0.12269661575555801, "learning_rate": 1.1023318543919148e-05, "loss": 0.0021, "step": 47720 }, { "grad_norm": 0.105267733335495, "learning_rate": 1.10060633318815e-05, "loss": 0.0025, "step": 47730 }, { "grad_norm": 0.12487838417291641, "learning_rate": 1.0988819965172248e-05, "loss": 0.0032, "step": 47740 }, { "grad_norm": 0.1382634937763214, "learning_rate": 1.0971588449029462e-05, "loss": 0.0035, "step": 47750 }, { "grad_norm": 0.08704044669866562, "learning_rate": 1.095436878868762e-05, "loss": 0.0021, "step": 47760 }, { "grad_norm": 0.12546859681606293, "learning_rate": 1.0937160989377598e-05, "loss": 0.002, "step": 47770 }, { "grad_norm": 0.09371322393417358, "learning_rate": 1.0919965056326676e-05, "loss": 0.0022, "step": 47780 }, { "grad_norm": 0.13431386649608612, "learning_rate": 1.0902780994758504e-05, "loss": 0.0024, "step": 47790 }, { "grad_norm": 0.10775809735059738, "learning_rate": 1.0885608809893193e-05, "loss": 0.0039, "step": 47800 }, { "grad_norm": 0.1157064437866211, "learning_rate": 1.0868448506947142e-05, "loss": 0.0023, "step": 47810 }, { "grad_norm": 0.10234591364860535, "learning_rate": 1.0851300091133243e-05, "loss": 0.0026, "step": 47820 }, { "grad_norm": 0.14829765260219574, "learning_rate": 1.083416356766071e-05, "loss": 0.0023, "step": 47830 }, { "grad_norm": 0.07441359758377075, "learning_rate": 1.0817038941735175e-05, "loss": 0.0018, "step": 47840 }, { "grad_norm": 0.15241529047489166, "learning_rate": 1.0799926218558642e-05, "loss": 0.0024, "step": 47850 }, { "grad_norm": 0.09379903972148895, "learning_rate": 1.0782825403329488e-05, "loss": 0.0019, "step": 47860 }, { "grad_norm": 0.13571493327617645, "learning_rate": 1.076573650124254e-05, "loss": 0.0028, "step": 47870 }, { "grad_norm": 0.17612747848033905, "learning_rate": 1.0748659517488891e-05, "loss": 0.0033, "step": 47880 }, { "grad_norm": 0.17345304787158966, "learning_rate": 1.0731594457256138e-05, "loss": 0.0039, "step": 47890 }, { "grad_norm": 0.11342008411884308, "learning_rate": 1.0714541325728139e-05, "loss": 0.0023, "step": 47900 }, { "grad_norm": 0.1162128821015358, "learning_rate": 1.0697500128085231e-05, "loss": 0.0022, "step": 47910 }, { "grad_norm": 0.12873190641403198, "learning_rate": 1.0680470869504055e-05, "loss": 0.0025, "step": 47920 }, { "grad_norm": 0.11322584003210068, "learning_rate": 1.066345355515766e-05, "loss": 0.0033, "step": 47930 }, { "grad_norm": 0.11463586241006851, "learning_rate": 1.0646448190215453e-05, "loss": 0.002, "step": 47940 }, { "grad_norm": 0.08118673413991928, "learning_rate": 1.0629454779843217e-05, "loss": 0.0029, "step": 47950 }, { "grad_norm": 0.11862143129110336, "learning_rate": 1.0612473329203082e-05, "loss": 0.0029, "step": 47960 }, { "grad_norm": 0.12997980415821075, "learning_rate": 1.0595503843453596e-05, "loss": 0.0038, "step": 47970 }, { "grad_norm": 0.1317794770002365, "learning_rate": 1.0578546327749634e-05, "loss": 0.0024, "step": 47980 }, { "grad_norm": 0.10178518295288086, "learning_rate": 1.0561600787242425e-05, "loss": 0.004, "step": 47990 }, { "grad_norm": 0.09716015309095383, "learning_rate": 1.0544667227079591e-05, "loss": 0.0029, "step": 48000 }, { "grad_norm": 0.12273216247558594, "learning_rate": 1.0527745652405085e-05, "loss": 0.0022, "step": 48010 }, { "grad_norm": 0.10136248171329498, "learning_rate": 1.051083606835927e-05, "loss": 0.0023, "step": 48020 }, { "grad_norm": 0.084026038646698, "learning_rate": 1.049393848007878e-05, "loss": 0.0027, "step": 48030 }, { "grad_norm": 0.11330205947160721, "learning_rate": 1.0477052892696709e-05, "loss": 0.002, "step": 48040 }, { "grad_norm": 0.08676755428314209, "learning_rate": 1.0460179311342394e-05, "loss": 0.0022, "step": 48050 }, { "grad_norm": 0.09153099358081818, "learning_rate": 1.0443317741141634e-05, "loss": 0.0024, "step": 48060 }, { "grad_norm": 0.11852974444627762, "learning_rate": 1.0426468187216514e-05, "loss": 0.0027, "step": 48070 }, { "grad_norm": 0.1029120534658432, "learning_rate": 1.0409630654685477e-05, "loss": 0.0018, "step": 48080 }, { "grad_norm": 0.11213282495737076, "learning_rate": 1.039280514866332e-05, "loss": 0.0033, "step": 48090 }, { "grad_norm": 0.11281349509954453, "learning_rate": 1.0375991674261198e-05, "loss": 0.0041, "step": 48100 }, { "grad_norm": 0.11131361871957779, "learning_rate": 1.0359190236586575e-05, "loss": 0.0024, "step": 48110 }, { "grad_norm": 0.10350585728883743, "learning_rate": 1.0342400840743322e-05, "loss": 0.0025, "step": 48120 }, { "grad_norm": 0.1123080626130104, "learning_rate": 1.0325623491831593e-05, "loss": 0.0024, "step": 48130 }, { "grad_norm": 0.08169658482074738, "learning_rate": 1.0308858194947906e-05, "loss": 0.002, "step": 48140 }, { "grad_norm": 0.10029498487710953, "learning_rate": 1.0292104955185111e-05, "loss": 0.003, "step": 48150 }, { "grad_norm": 0.08469241112470627, "learning_rate": 1.0275363777632396e-05, "loss": 0.0019, "step": 48160 }, { "grad_norm": 0.10511776059865952, "learning_rate": 1.0258634667375321e-05, "loss": 0.002, "step": 48170 }, { "grad_norm": 0.09187564998865128, "learning_rate": 1.02419176294957e-05, "loss": 0.0025, "step": 48180 }, { "grad_norm": 0.08772830665111542, "learning_rate": 1.0225212669071782e-05, "loss": 0.002, "step": 48190 }, { "grad_norm": 0.10771210491657257, "learning_rate": 1.0208519791178029e-05, "loss": 0.0022, "step": 48200 }, { "grad_norm": 0.08751191943883896, "learning_rate": 1.019183900088535e-05, "loss": 0.0022, "step": 48210 }, { "grad_norm": 0.08429927378892899, "learning_rate": 1.0175170303260906e-05, "loss": 0.0019, "step": 48220 }, { "grad_norm": 0.11151190847158432, "learning_rate": 1.0158513703368206e-05, "loss": 0.0026, "step": 48230 }, { "grad_norm": 0.10996969044208527, "learning_rate": 1.0141869206267095e-05, "loss": 0.0033, "step": 48240 }, { "grad_norm": 0.11298898607492447, "learning_rate": 1.0125236817013723e-05, "loss": 0.003, "step": 48250 }, { "grad_norm": 0.11981301009654999, "learning_rate": 1.010861654066056e-05, "loss": 0.0023, "step": 48260 }, { "grad_norm": 0.11962216347455978, "learning_rate": 1.0092008382256434e-05, "loss": 0.0024, "step": 48270 }, { "grad_norm": 0.10287439078092575, "learning_rate": 1.0075412346846458e-05, "loss": 0.0021, "step": 48280 }, { "grad_norm": 0.08729320019483566, "learning_rate": 1.0058828439472056e-05, "loss": 0.002, "step": 48290 }, { "grad_norm": 0.15193483233451843, "learning_rate": 1.0042256665170996e-05, "loss": 0.0026, "step": 48300 }, { "grad_norm": 0.14248016476631165, "learning_rate": 1.0025697028977332e-05, "loss": 0.0034, "step": 48310 }, { "grad_norm": 0.13116611540317535, "learning_rate": 1.0009149535921454e-05, "loss": 0.0033, "step": 48320 }, { "grad_norm": 0.11773749440908432, "learning_rate": 9.992614191030031e-06, "loss": 0.0019, "step": 48330 }, { "grad_norm": 0.1203927993774414, "learning_rate": 9.976090999326115e-06, "loss": 0.0025, "step": 48340 }, { "grad_norm": 0.09188685566186905, "learning_rate": 9.959579965828952e-06, "loss": 0.002, "step": 48350 }, { "grad_norm": 0.15167517960071564, "learning_rate": 9.943081095554218e-06, "loss": 0.0038, "step": 48360 }, { "grad_norm": 0.1040549948811531, "learning_rate": 9.926594393513783e-06, "loss": 0.0034, "step": 48370 }, { "grad_norm": 0.15255528688430786, "learning_rate": 9.910119864715906e-06, "loss": 0.0026, "step": 48380 }, { "grad_norm": 0.11402716487646103, "learning_rate": 9.8936575141651e-06, "loss": 0.0028, "step": 48390 }, { "grad_norm": 0.11590831726789474, "learning_rate": 9.877207346862194e-06, "loss": 0.0026, "step": 48400 }, { "grad_norm": 0.07933210581541061, "learning_rate": 9.860769367804312e-06, "loss": 0.0019, "step": 48410 }, { "grad_norm": 0.0818268209695816, "learning_rate": 9.844343581984877e-06, "loss": 0.0022, "step": 48420 }, { "grad_norm": 0.09188905358314514, "learning_rate": 9.82792999439362e-06, "loss": 0.002, "step": 48430 }, { "grad_norm": 0.1119978129863739, "learning_rate": 9.811528610016546e-06, "loss": 0.0035, "step": 48440 }, { "grad_norm": 0.10409771651029587, "learning_rate": 9.79513943383597e-06, "loss": 0.0019, "step": 48450 }, { "grad_norm": 0.09029753506183624, "learning_rate": 9.778762470830489e-06, "loss": 0.0028, "step": 48460 }, { "grad_norm": 0.11212477833032608, "learning_rate": 9.762397725974982e-06, "loss": 0.002, "step": 48470 }, { "grad_norm": 0.11352349072694778, "learning_rate": 9.746045204240622e-06, "loss": 0.0017, "step": 48480 }, { "grad_norm": 0.10118387639522552, "learning_rate": 9.729704910594917e-06, "loss": 0.0033, "step": 48490 }, { "grad_norm": 0.0925510972738266, "learning_rate": 9.713376850001554e-06, "loss": 0.0028, "step": 48500 }, { "grad_norm": 0.1536151021718979, "learning_rate": 9.697061027420622e-06, "loss": 0.0035, "step": 48510 }, { "grad_norm": 0.13699395954608917, "learning_rate": 9.680757447808385e-06, "loss": 0.005, "step": 48520 }, { "grad_norm": 0.14559189975261688, "learning_rate": 9.664466116117488e-06, "loss": 0.0031, "step": 48530 }, { "grad_norm": 0.13472463190555573, "learning_rate": 9.64818703729678e-06, "loss": 0.0026, "step": 48540 }, { "grad_norm": 0.10501401126384735, "learning_rate": 9.631920216291423e-06, "loss": 0.0022, "step": 48550 }, { "grad_norm": 0.12361995875835419, "learning_rate": 9.615665658042849e-06, "loss": 0.0029, "step": 48560 }, { "grad_norm": 0.09984498471021652, "learning_rate": 9.599423367488747e-06, "loss": 0.0017, "step": 48570 }, { "grad_norm": 0.12647372484207153, "learning_rate": 9.583193349563124e-06, "loss": 0.0023, "step": 48580 }, { "grad_norm": 0.13131414353847504, "learning_rate": 9.566975609196216e-06, "loss": 0.0026, "step": 48590 }, { "grad_norm": 0.11458467692136765, "learning_rate": 9.550770151314548e-06, "loss": 0.0021, "step": 48600 }, { "grad_norm": 0.12321095168590546, "learning_rate": 9.53457698084091e-06, "loss": 0.0037, "step": 48610 }, { "grad_norm": 0.12712985277175903, "learning_rate": 9.518396102694355e-06, "loss": 0.0021, "step": 48620 }, { "grad_norm": 0.15009577572345734, "learning_rate": 9.502227521790198e-06, "loss": 0.0019, "step": 48630 }, { "grad_norm": 0.11379413306713104, "learning_rate": 9.486071243040063e-06, "loss": 0.0024, "step": 48640 }, { "grad_norm": 0.149822399020195, "learning_rate": 9.469927271351747e-06, "loss": 0.0037, "step": 48650 }, { "grad_norm": 0.09868599474430084, "learning_rate": 9.453795611629419e-06, "loss": 0.0021, "step": 48660 }, { "grad_norm": 0.13636882603168488, "learning_rate": 9.437676268773399e-06, "loss": 0.0025, "step": 48670 }, { "grad_norm": 0.09216433018445969, "learning_rate": 9.421569247680357e-06, "loss": 0.0019, "step": 48680 }, { "grad_norm": 0.07814184576272964, "learning_rate": 9.40547455324316e-06, "loss": 0.0018, "step": 48690 }, { "grad_norm": 0.12194598466157913, "learning_rate": 9.389392190350965e-06, "loss": 0.0034, "step": 48700 }, { "grad_norm": 0.10378128290176392, "learning_rate": 9.373322163889153e-06, "loss": 0.0028, "step": 48710 }, { "grad_norm": 0.0951819121837616, "learning_rate": 9.357264478739375e-06, "loss": 0.0019, "step": 48720 }, { "grad_norm": 0.1617048680782318, "learning_rate": 9.341219139779567e-06, "loss": 0.0029, "step": 48730 }, { "grad_norm": 0.11434527486562729, "learning_rate": 9.325186151883824e-06, "loss": 0.0018, "step": 48740 }, { "grad_norm": 0.10056687146425247, "learning_rate": 9.30916551992258e-06, "loss": 0.0021, "step": 48750 }, { "grad_norm": 0.13249000906944275, "learning_rate": 9.293157248762479e-06, "loss": 0.0018, "step": 48760 }, { "grad_norm": 0.13988246023654938, "learning_rate": 9.2771613432664e-06, "loss": 0.0025, "step": 48770 }, { "grad_norm": 0.1329503208398819, "learning_rate": 9.261177808293481e-06, "loss": 0.0029, "step": 48780 }, { "grad_norm": 0.11927880346775055, "learning_rate": 9.245206648699096e-06, "loss": 0.0023, "step": 48790 }, { "grad_norm": 0.12103567272424698, "learning_rate": 9.22924786933485e-06, "loss": 0.0018, "step": 48800 }, { "grad_norm": 0.10801254957914352, "learning_rate": 9.213301475048642e-06, "loss": 0.0018, "step": 48810 }, { "grad_norm": 0.09657008945941925, "learning_rate": 9.197367470684504e-06, "loss": 0.0022, "step": 48820 }, { "grad_norm": 0.10457472503185272, "learning_rate": 9.181445861082816e-06, "loss": 0.0028, "step": 48830 }, { "grad_norm": 0.14161191880702972, "learning_rate": 9.16553665108012e-06, "loss": 0.0048, "step": 48840 }, { "grad_norm": 0.11951187252998352, "learning_rate": 9.149639845509223e-06, "loss": 0.0042, "step": 48850 }, { "grad_norm": 0.14019037783145905, "learning_rate": 9.133755449199144e-06, "loss": 0.003, "step": 48860 }, { "grad_norm": 0.128916397690773, "learning_rate": 9.117883466975135e-06, "loss": 0.0026, "step": 48870 }, { "grad_norm": 0.1488710343837738, "learning_rate": 9.10202390365873e-06, "loss": 0.0025, "step": 48880 }, { "grad_norm": 0.11417711526155472, "learning_rate": 9.086176764067583e-06, "loss": 0.0018, "step": 48890 }, { "grad_norm": 0.11061181128025055, "learning_rate": 9.070342053015684e-06, "loss": 0.0031, "step": 48900 }, { "grad_norm": 0.08189591765403748, "learning_rate": 9.054519775313187e-06, "loss": 0.0018, "step": 48910 }, { "grad_norm": 0.09690409898757935, "learning_rate": 9.038709935766476e-06, "loss": 0.0021, "step": 48920 }, { "grad_norm": 0.06724182516336441, "learning_rate": 9.02291253917817e-06, "loss": 0.0017, "step": 48930 }, { "grad_norm": 0.12101121991872787, "learning_rate": 9.007127590347091e-06, "loss": 0.0039, "step": 48940 }, { "grad_norm": 0.08156918734312057, "learning_rate": 8.991355094068288e-06, "loss": 0.0028, "step": 48950 }, { "grad_norm": 0.07716041803359985, "learning_rate": 8.975595055133062e-06, "loss": 0.0018, "step": 48960 }, { "grad_norm": 0.07253929227590561, "learning_rate": 8.959847478328848e-06, "loss": 0.0023, "step": 48970 }, { "grad_norm": 0.11566736549139023, "learning_rate": 8.944112368439378e-06, "loss": 0.0024, "step": 48980 }, { "grad_norm": 0.07291276007890701, "learning_rate": 8.928389730244552e-06, "loss": 0.002, "step": 48990 }, { "grad_norm": 0.15503861010074615, "learning_rate": 8.912679568520494e-06, "loss": 0.0035, "step": 49000 }, { "grad_norm": 0.09338108450174332, "learning_rate": 8.896981888039534e-06, "loss": 0.002, "step": 49010 }, { "grad_norm": 0.1186317428946495, "learning_rate": 8.881296693570201e-06, "loss": 0.0021, "step": 49020 }, { "grad_norm": 0.09802825003862381, "learning_rate": 8.865623989877281e-06, "loss": 0.003, "step": 49030 }, { "grad_norm": 0.09408789128065109, "learning_rate": 8.849963781721681e-06, "loss": 0.0024, "step": 49040 }, { "grad_norm": 0.09908238798379898, "learning_rate": 8.834316073860588e-06, "loss": 0.0031, "step": 49050 }, { "grad_norm": 0.07211890816688538, "learning_rate": 8.818680871047357e-06, "loss": 0.0016, "step": 49060 }, { "grad_norm": 0.07321076095104218, "learning_rate": 8.803058178031549e-06, "loss": 0.0016, "step": 49070 }, { "grad_norm": 0.12595193088054657, "learning_rate": 8.787447999558922e-06, "loss": 0.0055, "step": 49080 }, { "grad_norm": 0.11518527567386627, "learning_rate": 8.77185034037144e-06, "loss": 0.0016, "step": 49090 }, { "grad_norm": 0.11184072494506836, "learning_rate": 8.756265205207259e-06, "loss": 0.0018, "step": 49100 }, { "grad_norm": 0.15995003283023834, "learning_rate": 8.740692598800732e-06, "loss": 0.0036, "step": 49110 }, { "grad_norm": 0.13087674975395203, "learning_rate": 8.72513252588239e-06, "loss": 0.0015, "step": 49120 }, { "grad_norm": 0.10785440355539322, "learning_rate": 8.709584991178998e-06, "loss": 0.0017, "step": 49130 }, { "grad_norm": 0.07876037061214447, "learning_rate": 8.694049999413479e-06, "loss": 0.0029, "step": 49140 }, { "grad_norm": 0.1258798986673355, "learning_rate": 8.678527555304945e-06, "loss": 0.002, "step": 49150 }, { "grad_norm": 0.16029977798461914, "learning_rate": 8.663017663568712e-06, "loss": 0.0034, "step": 49160 }, { "grad_norm": 0.09392597526311874, "learning_rate": 8.647520328916259e-06, "loss": 0.0022, "step": 49170 }, { "grad_norm": 0.07043136656284332, "learning_rate": 8.632035556055307e-06, "loss": 0.0023, "step": 49180 }, { "grad_norm": 0.1295660138130188, "learning_rate": 8.616563349689672e-06, "loss": 0.0026, "step": 49190 }, { "grad_norm": 0.11023680865764618, "learning_rate": 8.601103714519448e-06, "loss": 0.0043, "step": 49200 }, { "grad_norm": 0.11860691756010056, "learning_rate": 8.58565665524082e-06, "loss": 0.0024, "step": 49210 }, { "grad_norm": 0.10789217054843903, "learning_rate": 8.570222176546222e-06, "loss": 0.0024, "step": 49220 }, { "grad_norm": 0.09861379116773605, "learning_rate": 8.554800283124242e-06, "loss": 0.0023, "step": 49230 }, { "grad_norm": 0.11307317763566971, "learning_rate": 8.539390979659639e-06, "loss": 0.0026, "step": 49240 }, { "grad_norm": 0.17224572598934174, "learning_rate": 8.523994270833352e-06, "loss": 0.0037, "step": 49250 }, { "grad_norm": 0.11919666826725006, "learning_rate": 8.5086101613225e-06, "loss": 0.0019, "step": 49260 }, { "grad_norm": 0.1128411591053009, "learning_rate": 8.493238655800346e-06, "loss": 0.0026, "step": 49270 }, { "grad_norm": 0.08248217403888702, "learning_rate": 8.47787975893638e-06, "loss": 0.0021, "step": 49280 }, { "grad_norm": 0.07551082223653793, "learning_rate": 8.462533475396211e-06, "loss": 0.0019, "step": 49290 }, { "grad_norm": 0.10485408455133438, "learning_rate": 8.447199809841643e-06, "loss": 0.003, "step": 49300 }, { "grad_norm": 0.10779853910207748, "learning_rate": 8.431878766930635e-06, "loss": 0.0017, "step": 49310 }, { "grad_norm": 0.102345772087574, "learning_rate": 8.416570351317304e-06, "loss": 0.0017, "step": 49320 }, { "grad_norm": 0.10136539489030838, "learning_rate": 8.401274567651973e-06, "loss": 0.0021, "step": 49330 }, { "grad_norm": 0.10500148683786392, "learning_rate": 8.385991420581058e-06, "loss": 0.0035, "step": 49340 }, { "grad_norm": 0.07619517296552658, "learning_rate": 8.370720914747215e-06, "loss": 0.0021, "step": 49350 }, { "grad_norm": 0.1480957418680191, "learning_rate": 8.355463054789181e-06, "loss": 0.0019, "step": 49360 }, { "grad_norm": 0.08972138166427612, "learning_rate": 8.340217845341919e-06, "loss": 0.0023, "step": 49370 }, { "grad_norm": 0.17312607169151306, "learning_rate": 8.324985291036514e-06, "loss": 0.0029, "step": 49380 }, { "grad_norm": 0.1291695237159729, "learning_rate": 8.309765396500213e-06, "loss": 0.0035, "step": 49390 }, { "grad_norm": 0.07369519025087357, "learning_rate": 8.294558166356419e-06, "loss": 0.0017, "step": 49400 }, { "grad_norm": 0.11528974026441574, "learning_rate": 8.279363605224683e-06, "loss": 0.0031, "step": 49410 }, { "grad_norm": 0.07922019809484482, "learning_rate": 8.264181717720704e-06, "loss": 0.0036, "step": 49420 }, { "grad_norm": 0.11961455643177032, "learning_rate": 8.249012508456361e-06, "loss": 0.0023, "step": 49430 }, { "grad_norm": 0.11232568323612213, "learning_rate": 8.233855982039646e-06, "loss": 0.002, "step": 49440 }, { "grad_norm": 0.06828329712152481, "learning_rate": 8.218712143074708e-06, "loss": 0.0017, "step": 49450 }, { "grad_norm": 0.09473037719726562, "learning_rate": 8.203580996161858e-06, "loss": 0.0023, "step": 49460 }, { "grad_norm": 0.07595035433769226, "learning_rate": 8.188462545897512e-06, "loss": 0.0017, "step": 49470 }, { "grad_norm": 0.07802329212427139, "learning_rate": 8.173356796874304e-06, "loss": 0.0024, "step": 49480 }, { "grad_norm": 0.09403416514396667, "learning_rate": 8.158263753680906e-06, "loss": 0.003, "step": 49490 }, { "grad_norm": 0.07932818681001663, "learning_rate": 8.143183420902239e-06, "loss": 0.0026, "step": 49500 }, { "grad_norm": 0.12975916266441345, "learning_rate": 8.128115803119258e-06, "loss": 0.0025, "step": 49510 }, { "grad_norm": 0.09234490245580673, "learning_rate": 8.11306090490916e-06, "loss": 0.0018, "step": 49520 }, { "grad_norm": 0.09094514697790146, "learning_rate": 8.098018730845169e-06, "loss": 0.0017, "step": 49530 }, { "grad_norm": 0.12187396734952927, "learning_rate": 8.082989285496745e-06, "loss": 0.0022, "step": 49540 }, { "grad_norm": 0.1424001008272171, "learning_rate": 8.067972573429416e-06, "loss": 0.002, "step": 49550 }, { "grad_norm": 0.09127826243638992, "learning_rate": 8.052968599204874e-06, "loss": 0.0023, "step": 49560 }, { "grad_norm": 0.1185259222984314, "learning_rate": 8.037977367380922e-06, "loss": 0.0027, "step": 49570 }, { "grad_norm": 0.11775805801153183, "learning_rate": 8.022998882511495e-06, "loss": 0.0024, "step": 49580 }, { "grad_norm": 0.09068689495325089, "learning_rate": 8.008033149146677e-06, "loss": 0.0053, "step": 49590 }, { "grad_norm": 0.08658009022474289, "learning_rate": 7.993080171832656e-06, "loss": 0.0027, "step": 49600 }, { "grad_norm": 0.09561969339847565, "learning_rate": 7.978139955111752e-06, "loss": 0.0023, "step": 49610 }, { "grad_norm": 0.0997595265507698, "learning_rate": 7.9632125035224e-06, "loss": 0.0017, "step": 49620 }, { "grad_norm": 0.11469525843858719, "learning_rate": 7.948297821599177e-06, "loss": 0.0022, "step": 49630 }, { "grad_norm": 0.1325279176235199, "learning_rate": 7.933395913872755e-06, "loss": 0.0022, "step": 49640 }, { "grad_norm": 0.09660835564136505, "learning_rate": 7.918506784869972e-06, "loss": 0.0021, "step": 49650 }, { "grad_norm": 0.10351784527301788, "learning_rate": 7.903630439113707e-06, "loss": 0.0017, "step": 49660 }, { "grad_norm": 0.0911022424697876, "learning_rate": 7.888766881123044e-06, "loss": 0.0039, "step": 49670 }, { "grad_norm": 0.10272479802370071, "learning_rate": 7.873916115413099e-06, "loss": 0.0024, "step": 49680 }, { "grad_norm": 0.09037468582391739, "learning_rate": 7.85907814649518e-06, "loss": 0.0041, "step": 49690 }, { "grad_norm": 0.11527078598737717, "learning_rate": 7.844252978876649e-06, "loss": 0.002, "step": 49700 }, { "grad_norm": 0.15726034343242645, "learning_rate": 7.829440617061001e-06, "loss": 0.0025, "step": 49710 }, { "grad_norm": 0.09577140212059021, "learning_rate": 7.814641065547851e-06, "loss": 0.0036, "step": 49720 }, { "grad_norm": 0.12195814400911331, "learning_rate": 7.79985432883289e-06, "loss": 0.0027, "step": 49730 }, { "grad_norm": 0.11293379217386246, "learning_rate": 7.78508041140797e-06, "loss": 0.0036, "step": 49740 }, { "grad_norm": 0.07693137228488922, "learning_rate": 7.770319317760993e-06, "loss": 0.0019, "step": 49750 }, { "grad_norm": 0.08573081344366074, "learning_rate": 7.755571052376004e-06, "loss": 0.0024, "step": 49760 }, { "grad_norm": 0.11723636835813522, "learning_rate": 7.740835619733128e-06, "loss": 0.0028, "step": 49770 }, { "grad_norm": 0.09361232817173004, "learning_rate": 7.726113024308601e-06, "loss": 0.0017, "step": 49780 }, { "grad_norm": 0.08562222123146057, "learning_rate": 7.711403270574746e-06, "loss": 0.0024, "step": 49790 }, { "grad_norm": 0.08299718052148819, "learning_rate": 7.696706363000039e-06, "loss": 0.0024, "step": 49800 }, { "grad_norm": 0.08675315976142883, "learning_rate": 7.682022306048959e-06, "loss": 0.002, "step": 49810 }, { "grad_norm": 0.12459217756986618, "learning_rate": 7.667351104182186e-06, "loss": 0.0022, "step": 49820 }, { "grad_norm": 0.09263528138399124, "learning_rate": 7.652692761856395e-06, "loss": 0.002, "step": 49830 }, { "grad_norm": 0.08735678344964981, "learning_rate": 7.63804728352444e-06, "loss": 0.0024, "step": 49840 }, { "grad_norm": 0.13695622980594635, "learning_rate": 7.623414673635215e-06, "loss": 0.0029, "step": 49850 }, { "grad_norm": 0.08536655455827713, "learning_rate": 7.608794936633723e-06, "loss": 0.0018, "step": 49860 }, { "grad_norm": 0.13196313381195068, "learning_rate": 7.594188076961056e-06, "loss": 0.0024, "step": 49870 }, { "grad_norm": 0.09964917600154877, "learning_rate": 7.579594099054382e-06, "loss": 0.0021, "step": 49880 }, { "grad_norm": 0.10163655132055283, "learning_rate": 7.565013007346983e-06, "loss": 0.0026, "step": 49890 }, { "grad_norm": 0.07348078489303589, "learning_rate": 7.5504448062682035e-06, "loss": 0.0022, "step": 49900 }, { "grad_norm": 0.0986529067158699, "learning_rate": 7.53588950024347e-06, "loss": 0.0017, "step": 49910 }, { "grad_norm": 0.09526107460260391, "learning_rate": 7.5213470936943145e-06, "loss": 0.0025, "step": 49920 }, { "grad_norm": 0.13952034711837769, "learning_rate": 7.506817591038323e-06, "loss": 0.0038, "step": 49930 }, { "grad_norm": 0.09679154306650162, "learning_rate": 7.492300996689183e-06, "loss": 0.0025, "step": 49940 }, { "grad_norm": 0.10739537328481674, "learning_rate": 7.477797315056645e-06, "loss": 0.0018, "step": 49950 }, { "grad_norm": 0.11501108109951019, "learning_rate": 7.463306550546539e-06, "loss": 0.002, "step": 49960 }, { "grad_norm": 0.09192699939012527, "learning_rate": 7.448828707560812e-06, "loss": 0.0018, "step": 49970 }, { "grad_norm": 0.1040724515914917, "learning_rate": 7.4343637904974e-06, "loss": 0.0019, "step": 49980 }, { "grad_norm": 0.10239234566688538, "learning_rate": 7.419911803750401e-06, "loss": 0.0025, "step": 49990 }, { "grad_norm": 0.10671626031398773, "learning_rate": 7.405472751709935e-06, "loss": 0.0025, "step": 50000 }, { "grad_norm": 0.10485080629587173, "learning_rate": 7.3910466387622e-06, "loss": 0.0052, "step": 50010 }, { "grad_norm": 0.1321965903043747, "learning_rate": 7.3766334692894735e-06, "loss": 0.0022, "step": 50020 }, { "grad_norm": 0.12933506071567535, "learning_rate": 7.3622332476700865e-06, "loss": 0.0033, "step": 50030 }, { "grad_norm": 0.10197607427835464, "learning_rate": 7.347845978278472e-06, "loss": 0.0022, "step": 50040 }, { "grad_norm": 0.16024062037467957, "learning_rate": 7.333471665485065e-06, "loss": 0.003, "step": 50050 }, { "grad_norm": 0.09591895341873169, "learning_rate": 7.31911031365643e-06, "loss": 0.0022, "step": 50060 }, { "grad_norm": 0.09845178574323654, "learning_rate": 7.304761927155157e-06, "loss": 0.0015, "step": 50070 }, { "grad_norm": 0.11886335909366608, "learning_rate": 7.29042651033991e-06, "loss": 0.0022, "step": 50080 }, { "grad_norm": 0.17459557950496674, "learning_rate": 7.276104067565409e-06, "loss": 0.0032, "step": 50090 }, { "grad_norm": 0.10992390662431717, "learning_rate": 7.261794603182431e-06, "loss": 0.0023, "step": 50100 }, { "grad_norm": 0.10051076114177704, "learning_rate": 7.24749812153781e-06, "loss": 0.0023, "step": 50110 }, { "grad_norm": 0.09980118274688721, "learning_rate": 7.2332146269744605e-06, "loss": 0.0041, "step": 50120 }, { "grad_norm": 0.1500774770975113, "learning_rate": 7.218944123831295e-06, "loss": 0.0041, "step": 50130 }, { "grad_norm": 0.11630157381296158, "learning_rate": 7.204686616443351e-06, "loss": 0.0019, "step": 50140 }, { "grad_norm": 0.10871115326881409, "learning_rate": 7.190442109141665e-06, "loss": 0.0025, "step": 50150 }, { "grad_norm": 0.1109437569975853, "learning_rate": 7.176210606253347e-06, "loss": 0.002, "step": 50160 }, { "grad_norm": 0.08186427503824234, "learning_rate": 7.161992112101551e-06, "loss": 0.0023, "step": 50170 }, { "grad_norm": 0.0854170173406601, "learning_rate": 7.147786631005465e-06, "loss": 0.002, "step": 50180 }, { "grad_norm": 0.09184639900922775, "learning_rate": 7.1335941672803775e-06, "loss": 0.0022, "step": 50190 }, { "grad_norm": 0.10300204902887344, "learning_rate": 7.1194147252375384e-06, "loss": 0.002, "step": 50200 }, { "grad_norm": 0.08573468029499054, "learning_rate": 7.10524830918432e-06, "loss": 0.0023, "step": 50210 }, { "grad_norm": 0.0983940064907074, "learning_rate": 7.091094923424097e-06, "loss": 0.0018, "step": 50220 }, { "grad_norm": 0.14008750021457672, "learning_rate": 7.0769545722562894e-06, "loss": 0.0017, "step": 50230 }, { "grad_norm": 0.07016535848379135, "learning_rate": 7.0628272599763675e-06, "loss": 0.0019, "step": 50240 }, { "grad_norm": 0.10257017612457275, "learning_rate": 7.048712990875828e-06, "loss": 0.0017, "step": 50250 }, { "grad_norm": 0.09225355088710785, "learning_rate": 7.034611769242216e-06, "loss": 0.0026, "step": 50260 }, { "grad_norm": 0.1315232515335083, "learning_rate": 7.02052359935913e-06, "loss": 0.0031, "step": 50270 }, { "grad_norm": 0.10144662111997604, "learning_rate": 7.006448485506145e-06, "loss": 0.003, "step": 50280 }, { "grad_norm": 0.14347226917743683, "learning_rate": 6.992386431958942e-06, "loss": 0.0018, "step": 50290 }, { "grad_norm": 0.10548096150159836, "learning_rate": 6.978337442989197e-06, "loss": 0.0018, "step": 50300 }, { "grad_norm": 0.13043780624866486, "learning_rate": 6.964301522864608e-06, "loss": 0.0026, "step": 50310 }, { "grad_norm": 0.108993299305439, "learning_rate": 6.950278675848926e-06, "loss": 0.0033, "step": 50320 }, { "grad_norm": 0.10760018974542618, "learning_rate": 6.9362689062019145e-06, "loss": 0.0022, "step": 50330 }, { "grad_norm": 0.07013796269893646, "learning_rate": 6.922272218179393e-06, "loss": 0.0019, "step": 50340 }, { "grad_norm": 0.0823139101266861, "learning_rate": 6.908288616033148e-06, "loss": 0.0019, "step": 50350 }, { "grad_norm": 0.11102023720741272, "learning_rate": 6.894318104011077e-06, "loss": 0.0025, "step": 50360 }, { "grad_norm": 0.10970284044742584, "learning_rate": 6.880360686357007e-06, "loss": 0.0022, "step": 50370 }, { "grad_norm": 0.0939311683177948, "learning_rate": 6.8664163673108575e-06, "loss": 0.0019, "step": 50380 }, { "grad_norm": 0.09024213254451752, "learning_rate": 6.85248515110854e-06, "loss": 0.0022, "step": 50390 }, { "grad_norm": 0.08812045305967331, "learning_rate": 6.838567041981992e-06, "loss": 0.0021, "step": 50400 }, { "grad_norm": 0.14436450600624084, "learning_rate": 6.8246620441591634e-06, "loss": 0.0025, "step": 50410 }, { "grad_norm": 0.1031867042183876, "learning_rate": 6.8107701618640275e-06, "loss": 0.002, "step": 50420 }, { "grad_norm": 0.06161723658442497, "learning_rate": 6.796891399316557e-06, "loss": 0.0017, "step": 50430 }, { "grad_norm": 0.16692671179771423, "learning_rate": 6.7830257607327804e-06, "loss": 0.0043, "step": 50440 }, { "grad_norm": 0.11882632225751877, "learning_rate": 6.7691732503247e-06, "loss": 0.0021, "step": 50450 }, { "grad_norm": 0.10401830077171326, "learning_rate": 6.755333872300346e-06, "loss": 0.0022, "step": 50460 }, { "grad_norm": 0.0780961662530899, "learning_rate": 6.741507630863747e-06, "loss": 0.0037, "step": 50470 }, { "grad_norm": 0.09252814948558807, "learning_rate": 6.727694530214945e-06, "loss": 0.002, "step": 50480 }, { "grad_norm": 0.09737398475408554, "learning_rate": 6.713894574550028e-06, "loss": 0.0023, "step": 50490 }, { "grad_norm": 0.09577210992574692, "learning_rate": 6.700107768061015e-06, "loss": 0.0032, "step": 50500 }, { "grad_norm": 0.0999421775341034, "learning_rate": 6.686334114936016e-06, "loss": 0.0029, "step": 50510 }, { "grad_norm": 0.08325758576393127, "learning_rate": 6.672573619359063e-06, "loss": 0.0015, "step": 50520 }, { "grad_norm": 0.09199460595846176, "learning_rate": 6.658826285510256e-06, "loss": 0.0019, "step": 50530 }, { "grad_norm": 0.07033105939626694, "learning_rate": 6.645092117565666e-06, "loss": 0.0018, "step": 50540 }, { "grad_norm": 0.09250234812498093, "learning_rate": 6.631371119697371e-06, "loss": 0.0022, "step": 50550 }, { "grad_norm": 0.14956402778625488, "learning_rate": 6.6176632960734505e-06, "loss": 0.0028, "step": 50560 }, { "grad_norm": 0.11683869361877441, "learning_rate": 6.603968650857978e-06, "loss": 0.0026, "step": 50570 }, { "grad_norm": 0.12380315363407135, "learning_rate": 6.5902871882110085e-06, "loss": 0.0018, "step": 50580 }, { "grad_norm": 0.0813598781824112, "learning_rate": 6.576618912288635e-06, "loss": 0.0018, "step": 50590 }, { "grad_norm": 0.09748382866382599, "learning_rate": 6.562963827242913e-06, "loss": 0.0022, "step": 50600 }, { "grad_norm": 0.10563912242650986, "learning_rate": 6.549321937221886e-06, "loss": 0.0026, "step": 50610 }, { "grad_norm": 0.10386590659618378, "learning_rate": 6.5356932463696064e-06, "loss": 0.0026, "step": 50620 }, { "grad_norm": 0.10276325792074203, "learning_rate": 6.522077758826101e-06, "loss": 0.0027, "step": 50630 }, { "grad_norm": 0.07653278112411499, "learning_rate": 6.5084754787274275e-06, "loss": 0.0026, "step": 50640 }, { "grad_norm": 0.07074282318353653, "learning_rate": 6.494886410205553e-06, "loss": 0.0015, "step": 50650 }, { "grad_norm": 0.08966255933046341, "learning_rate": 6.481310557388521e-06, "loss": 0.0021, "step": 50660 }, { "grad_norm": 0.07163316756486893, "learning_rate": 6.46774792440028e-06, "loss": 0.0015, "step": 50670 }, { "grad_norm": 0.09443012624979019, "learning_rate": 6.4541985153608206e-06, "loss": 0.0018, "step": 50680 }, { "grad_norm": 0.10940142720937729, "learning_rate": 6.4406623343861e-06, "loss": 0.0016, "step": 50690 }, { "grad_norm": 0.07139239460229874, "learning_rate": 6.427139385588038e-06, "loss": 0.0024, "step": 50700 }, { "grad_norm": 0.09700915217399597, "learning_rate": 6.413629673074561e-06, "loss": 0.002, "step": 50710 }, { "grad_norm": 0.0829000398516655, "learning_rate": 6.400133200949554e-06, "loss": 0.0017, "step": 50720 }, { "grad_norm": 0.09213865548372269, "learning_rate": 6.386649973312897e-06, "loss": 0.0023, "step": 50730 }, { "grad_norm": 0.06495214253664017, "learning_rate": 6.37317999426042e-06, "loss": 0.0019, "step": 50740 }, { "grad_norm": 0.1298716962337494, "learning_rate": 6.359723267883977e-06, "loss": 0.0024, "step": 50750 }, { "grad_norm": 0.11895918101072311, "learning_rate": 6.346279798271343e-06, "loss": 0.0025, "step": 50760 }, { "grad_norm": 0.10852912068367004, "learning_rate": 6.332849589506301e-06, "loss": 0.003, "step": 50770 }, { "grad_norm": 0.08950640261173248, "learning_rate": 6.319432645668588e-06, "loss": 0.002, "step": 50780 }, { "grad_norm": 0.08060760796070099, "learning_rate": 6.306028970833922e-06, "loss": 0.0019, "step": 50790 }, { "grad_norm": 0.0718727558851242, "learning_rate": 6.2926385690739665e-06, "loss": 0.0018, "step": 50800 }, { "grad_norm": 0.10022716969251633, "learning_rate": 6.279261444456413e-06, "loss": 0.0019, "step": 50810 }, { "grad_norm": 0.06802347302436829, "learning_rate": 6.265897601044829e-06, "loss": 0.0022, "step": 50820 }, { "grad_norm": 0.10718294978141785, "learning_rate": 6.2525470428988434e-06, "loss": 0.0032, "step": 50830 }, { "grad_norm": 0.09632886946201324, "learning_rate": 6.239209774073962e-06, "loss": 0.0022, "step": 50840 }, { "grad_norm": 0.11730077862739563, "learning_rate": 6.225885798621728e-06, "loss": 0.0017, "step": 50850 }, { "grad_norm": 0.1273166835308075, "learning_rate": 6.2125751205895925e-06, "loss": 0.0031, "step": 50860 }, { "grad_norm": 0.1078796461224556, "learning_rate": 6.199277744020998e-06, "loss": 0.0022, "step": 50870 }, { "grad_norm": 0.1520080268383026, "learning_rate": 6.185993672955337e-06, "loss": 0.0033, "step": 50880 }, { "grad_norm": 0.06875384598970413, "learning_rate": 6.172722911427947e-06, "loss": 0.002, "step": 50890 }, { "grad_norm": 0.1419181376695633, "learning_rate": 6.159465463470149e-06, "loss": 0.0028, "step": 50900 }, { "grad_norm": 0.1305476278066635, "learning_rate": 6.146221333109204e-06, "loss": 0.0031, "step": 50910 }, { "grad_norm": 0.10579439252614975, "learning_rate": 6.132990524368326e-06, "loss": 0.0021, "step": 50920 }, { "grad_norm": 0.06911089271306992, "learning_rate": 6.119773041266685e-06, "loss": 0.0021, "step": 50930 }, { "grad_norm": 0.08394245058298111, "learning_rate": 6.106568887819402e-06, "loss": 0.003, "step": 50940 }, { "grad_norm": 0.08618783205747604, "learning_rate": 6.093378068037548e-06, "loss": 0.0023, "step": 50950 }, { "grad_norm": 0.1016187071800232, "learning_rate": 6.080200585928164e-06, "loss": 0.0019, "step": 50960 }, { "grad_norm": 0.0799061506986618, "learning_rate": 6.06703644549419e-06, "loss": 0.0019, "step": 50970 }, { "grad_norm": 0.05634094774723053, "learning_rate": 6.053885650734576e-06, "loss": 0.0015, "step": 50980 }, { "grad_norm": 0.06543285399675369, "learning_rate": 6.040748205644153e-06, "loss": 0.0017, "step": 50990 }, { "grad_norm": 0.11701717972755432, "learning_rate": 6.0276241142137646e-06, "loss": 0.0018, "step": 51000 }, { "grad_norm": 0.07951046526432037, "learning_rate": 6.014513380430142e-06, "loss": 0.0017, "step": 51010 }, { "grad_norm": 0.07820898294448853, "learning_rate": 6.001416008275984e-06, "loss": 0.0026, "step": 51020 }, { "grad_norm": 0.09279879927635193, "learning_rate": 5.988332001729929e-06, "loss": 0.0021, "step": 51030 }, { "grad_norm": 0.08558051288127899, "learning_rate": 5.975261364766543e-06, "loss": 0.0018, "step": 51040 }, { "grad_norm": 0.09042573720216751, "learning_rate": 5.962204101356356e-06, "loss": 0.0021, "step": 51050 }, { "grad_norm": 0.07813333719968796, "learning_rate": 5.94916021546581e-06, "loss": 0.0029, "step": 51060 }, { "grad_norm": 0.10995281487703323, "learning_rate": 5.936129711057298e-06, "loss": 0.0027, "step": 51070 }, { "grad_norm": 0.16042004525661469, "learning_rate": 5.923112592089142e-06, "loss": 0.0026, "step": 51080 }, { "grad_norm": 0.062433984130620956, "learning_rate": 5.9101088625155954e-06, "loss": 0.0013, "step": 51090 }, { "grad_norm": 0.11844075471162796, "learning_rate": 5.897118526286843e-06, "loss": 0.0035, "step": 51100 }, { "grad_norm": 0.10972762107849121, "learning_rate": 5.884141587349035e-06, "loss": 0.0026, "step": 51110 }, { "grad_norm": 0.12986643612384796, "learning_rate": 5.871178049644177e-06, "loss": 0.0025, "step": 51120 }, { "grad_norm": 0.12324147671461105, "learning_rate": 5.858227917110293e-06, "loss": 0.0023, "step": 51130 }, { "grad_norm": 0.056577168405056, "learning_rate": 5.845291193681252e-06, "loss": 0.0019, "step": 51140 }, { "grad_norm": 0.08215837925672531, "learning_rate": 5.832367883286921e-06, "loss": 0.002, "step": 51150 }, { "grad_norm": 0.08361116051673889, "learning_rate": 5.81945798985305e-06, "loss": 0.0022, "step": 51160 }, { "grad_norm": 0.05113065242767334, "learning_rate": 5.806561517301306e-06, "loss": 0.0014, "step": 51170 }, { "grad_norm": 0.14439591765403748, "learning_rate": 5.793678469549335e-06, "loss": 0.0022, "step": 51180 }, { "grad_norm": 0.08118671178817749, "learning_rate": 5.780808850510627e-06, "loss": 0.0026, "step": 51190 }, { "grad_norm": 0.1326126754283905, "learning_rate": 5.767952664094673e-06, "loss": 0.002, "step": 51200 }, { "grad_norm": 0.10221609473228455, "learning_rate": 5.755109914206791e-06, "loss": 0.0017, "step": 51210 }, { "grad_norm": 0.07862391322851181, "learning_rate": 5.7422806047483125e-06, "loss": 0.0019, "step": 51220 }, { "grad_norm": 0.10415737330913544, "learning_rate": 5.72946473961643e-06, "loss": 0.0022, "step": 51230 }, { "grad_norm": 0.09854258596897125, "learning_rate": 5.716662322704264e-06, "loss": 0.003, "step": 51240 }, { "grad_norm": 0.12208380550146103, "learning_rate": 5.703873357900852e-06, "loss": 0.0022, "step": 51250 }, { "grad_norm": 0.12907817959785461, "learning_rate": 5.691097849091143e-06, "loss": 0.0024, "step": 51260 }, { "grad_norm": 0.1858043521642685, "learning_rate": 5.678335800155982e-06, "loss": 0.0033, "step": 51270 }, { "grad_norm": 0.13309243321418762, "learning_rate": 5.665587214972174e-06, "loss": 0.0027, "step": 51280 }, { "grad_norm": 0.08783125132322311, "learning_rate": 5.652852097412386e-06, "loss": 0.002, "step": 51290 }, { "grad_norm": 0.11275880038738251, "learning_rate": 5.640130451345216e-06, "loss": 0.0023, "step": 51300 }, { "grad_norm": 0.062131527811288834, "learning_rate": 5.627422280635159e-06, "loss": 0.0025, "step": 51310 }, { "grad_norm": 0.07860226929187775, "learning_rate": 5.61472758914261e-06, "loss": 0.0022, "step": 51320 }, { "grad_norm": 0.05721055343747139, "learning_rate": 5.602046380723918e-06, "loss": 0.0019, "step": 51330 }, { "grad_norm": 0.05749586969614029, "learning_rate": 5.5893786592312535e-06, "loss": 0.0025, "step": 51340 }, { "grad_norm": 0.09556373953819275, "learning_rate": 5.576724428512775e-06, "loss": 0.0023, "step": 51350 }, { "grad_norm": 0.08765895664691925, "learning_rate": 5.56408369241247e-06, "loss": 0.002, "step": 51360 }, { "grad_norm": 0.07582288235425949, "learning_rate": 5.5514564547702875e-06, "loss": 0.0027, "step": 51370 }, { "grad_norm": 0.08892741799354553, "learning_rate": 5.538842719422038e-06, "loss": 0.0018, "step": 51380 }, { "grad_norm": 0.080121710896492, "learning_rate": 5.52624249019944e-06, "loss": 0.0018, "step": 51390 }, { "grad_norm": 0.09261548519134521, "learning_rate": 5.51365577093011e-06, "loss": 0.0018, "step": 51400 }, { "grad_norm": 0.10181830078363419, "learning_rate": 5.501082565437565e-06, "loss": 0.0028, "step": 51410 }, { "grad_norm": 0.08727855235338211, "learning_rate": 5.488522877541202e-06, "loss": 0.0022, "step": 51420 }, { "grad_norm": 0.11304180324077606, "learning_rate": 5.475976711056341e-06, "loss": 0.0025, "step": 51430 }, { "grad_norm": 0.14425863325595856, "learning_rate": 5.463444069794166e-06, "loss": 0.0024, "step": 51440 }, { "grad_norm": 0.09790679067373276, "learning_rate": 5.4509249575617594e-06, "loss": 0.0029, "step": 51450 }, { "grad_norm": 0.10836488008499146, "learning_rate": 5.438419378162107e-06, "loss": 0.0021, "step": 51460 }, { "grad_norm": 0.1139838844537735, "learning_rate": 5.425927335394054e-06, "loss": 0.0026, "step": 51470 }, { "grad_norm": 0.14746569097042084, "learning_rate": 5.413448833052387e-06, "loss": 0.0031, "step": 51480 }, { "grad_norm": 0.07528004050254822, "learning_rate": 5.400983874927701e-06, "loss": 0.0014, "step": 51490 }, { "grad_norm": 0.07845738530158997, "learning_rate": 5.388532464806567e-06, "loss": 0.0023, "step": 51500 }, { "grad_norm": 0.04815568029880524, "learning_rate": 5.3760946064713546e-06, "loss": 0.0014, "step": 51510 }, { "grad_norm": 0.12126094847917557, "learning_rate": 5.363670303700386e-06, "loss": 0.0016, "step": 51520 }, { "grad_norm": 0.13244712352752686, "learning_rate": 5.351259560267824e-06, "loss": 0.0027, "step": 51530 }, { "grad_norm": 0.0808882862329483, "learning_rate": 5.338862379943721e-06, "loss": 0.0021, "step": 51540 }, { "grad_norm": 0.06990167498588562, "learning_rate": 5.326478766494025e-06, "loss": 0.0016, "step": 51550 }, { "grad_norm": 0.08757064491510391, "learning_rate": 5.3141087236805385e-06, "loss": 0.0024, "step": 51560 }, { "grad_norm": 0.179501473903656, "learning_rate": 5.3017522552609615e-06, "loss": 0.0028, "step": 51570 }, { "grad_norm": 0.09101392328739166, "learning_rate": 5.289409364988851e-06, "loss": 0.0017, "step": 51580 }, { "grad_norm": 0.12195906788110733, "learning_rate": 5.277080056613671e-06, "loss": 0.0019, "step": 51590 }, { "grad_norm": 0.18641230463981628, "learning_rate": 5.264764333880729e-06, "loss": 0.0033, "step": 51600 }, { "grad_norm": 0.07430040091276169, "learning_rate": 5.252462200531216e-06, "loss": 0.0013, "step": 51610 }, { "grad_norm": 0.08553164452314377, "learning_rate": 5.240173660302194e-06, "loss": 0.0016, "step": 51620 }, { "grad_norm": 0.15592463314533234, "learning_rate": 5.2278987169266044e-06, "loss": 0.0021, "step": 51630 }, { "grad_norm": 0.12957355380058289, "learning_rate": 5.215637374133231e-06, "loss": 0.0031, "step": 51640 }, { "grad_norm": 0.09143024682998657, "learning_rate": 5.203389635646782e-06, "loss": 0.0021, "step": 51650 }, { "grad_norm": 0.19930338859558105, "learning_rate": 5.191155505187756e-06, "loss": 0.0024, "step": 51660 }, { "grad_norm": 0.11485683917999268, "learning_rate": 5.1789349864726e-06, "loss": 0.0019, "step": 51670 }, { "grad_norm": 0.08265043050050735, "learning_rate": 5.16672808321354e-06, "loss": 0.0041, "step": 51680 }, { "grad_norm": 0.08319453150033951, "learning_rate": 5.154534799118749e-06, "loss": 0.0023, "step": 51690 }, { "grad_norm": 0.06945076584815979, "learning_rate": 5.142355137892207e-06, "loss": 0.0014, "step": 51700 }, { "grad_norm": 0.05214717611670494, "learning_rate": 5.130189103233779e-06, "loss": 0.0016, "step": 51710 }, { "grad_norm": 0.11276382952928543, "learning_rate": 5.118036698839179e-06, "loss": 0.0024, "step": 51720 }, { "grad_norm": 0.08282233029603958, "learning_rate": 5.105897928399983e-06, "loss": 0.0013, "step": 51730 }, { "grad_norm": 0.0844469889998436, "learning_rate": 5.0937727956036464e-06, "loss": 0.0023, "step": 51740 }, { "grad_norm": 0.07699114084243774, "learning_rate": 5.081661304133456e-06, "loss": 0.0025, "step": 51750 }, { "grad_norm": 0.07751737534999847, "learning_rate": 5.069563457668558e-06, "loss": 0.0027, "step": 51760 }, { "grad_norm": 0.08662010729312897, "learning_rate": 5.0574792598839624e-06, "loss": 0.0026, "step": 51770 }, { "grad_norm": 0.0768296867609024, "learning_rate": 5.0454087144505276e-06, "loss": 0.0039, "step": 51780 }, { "grad_norm": 0.07720673084259033, "learning_rate": 5.0333518250349655e-06, "loss": 0.0018, "step": 51790 }, { "grad_norm": 0.08852304518222809, "learning_rate": 5.021308595299856e-06, "loss": 0.004, "step": 51800 }, { "grad_norm": 0.11592481285333633, "learning_rate": 5.009279028903585e-06, "loss": 0.0018, "step": 51810 }, { "grad_norm": 0.16419260203838348, "learning_rate": 4.997263129500452e-06, "loss": 0.0048, "step": 51820 }, { "grad_norm": 0.09170050919055939, "learning_rate": 4.985260900740535e-06, "loss": 0.0017, "step": 51830 }, { "grad_norm": 0.12243098765611649, "learning_rate": 4.973272346269814e-06, "loss": 0.0031, "step": 51840 }, { "grad_norm": 0.15324784815311432, "learning_rate": 4.961297469730097e-06, "loss": 0.0025, "step": 51850 }, { "grad_norm": 0.1402556151151657, "learning_rate": 4.949336274759031e-06, "loss": 0.0027, "step": 51860 }, { "grad_norm": 0.08906861394643784, "learning_rate": 4.9373887649901144e-06, "loss": 0.0018, "step": 51870 }, { "grad_norm": 0.07055895030498505, "learning_rate": 4.925454944052666e-06, "loss": 0.0021, "step": 51880 }, { "grad_norm": 0.1100592315196991, "learning_rate": 4.913534815571891e-06, "loss": 0.0019, "step": 51890 }, { "grad_norm": 0.13338735699653625, "learning_rate": 4.901628383168805e-06, "loss": 0.0022, "step": 51900 }, { "grad_norm": 0.0836353525519371, "learning_rate": 4.8897356504602585e-06, "loss": 0.0025, "step": 51910 }, { "grad_norm": 0.08108049631118774, "learning_rate": 4.877856621058957e-06, "loss": 0.0026, "step": 51920 }, { "grad_norm": 0.08092668652534485, "learning_rate": 4.86599129857343e-06, "loss": 0.0016, "step": 51930 }, { "grad_norm": 0.07439397275447845, "learning_rate": 4.85413968660805e-06, "loss": 0.0016, "step": 51940 }, { "grad_norm": 0.06807383894920349, "learning_rate": 4.842301788763031e-06, "loss": 0.0019, "step": 51950 }, { "grad_norm": 0.09692571312189102, "learning_rate": 4.830477608634393e-06, "loss": 0.0014, "step": 51960 }, { "grad_norm": 0.07418592274188995, "learning_rate": 4.818667149814049e-06, "loss": 0.0016, "step": 51970 }, { "grad_norm": 0.06503136456012726, "learning_rate": 4.806870415889664e-06, "loss": 0.0017, "step": 51980 }, { "grad_norm": 0.132238507270813, "learning_rate": 4.795087410444798e-06, "loss": 0.0036, "step": 51990 }, { "grad_norm": 0.11996836215257645, "learning_rate": 4.783318137058807e-06, "loss": 0.0029, "step": 52000 }, { "grad_norm": 0.07210696488618851, "learning_rate": 4.771562599306895e-06, "loss": 0.0016, "step": 52010 }, { "grad_norm": 0.0911535993218422, "learning_rate": 4.759820800760073e-06, "loss": 0.0027, "step": 52020 }, { "grad_norm": 0.0797157809138298, "learning_rate": 4.7480927449851834e-06, "loss": 0.002, "step": 52030 }, { "grad_norm": 0.07628306746482849, "learning_rate": 4.73637843554493e-06, "loss": 0.0021, "step": 52040 }, { "grad_norm": 0.07218877226114273, "learning_rate": 4.724677875997774e-06, "loss": 0.0016, "step": 52050 }, { "grad_norm": 0.08089448511600494, "learning_rate": 4.712991069898065e-06, "loss": 0.002, "step": 52060 }, { "grad_norm": 0.09109161049127579, "learning_rate": 4.7013180207959305e-06, "loss": 0.0019, "step": 52070 }, { "grad_norm": 0.05619505047798157, "learning_rate": 4.6896587322373395e-06, "loss": 0.0016, "step": 52080 }, { "grad_norm": 0.0744321346282959, "learning_rate": 4.678013207764081e-06, "loss": 0.0019, "step": 52090 }, { "grad_norm": 0.11107102781534195, "learning_rate": 4.666381450913748e-06, "loss": 0.0021, "step": 52100 }, { "grad_norm": 0.10511229187250137, "learning_rate": 4.654763465219752e-06, "loss": 0.0019, "step": 52110 }, { "grad_norm": 0.04569906368851662, "learning_rate": 4.643159254211371e-06, "loss": 0.0015, "step": 52120 }, { "grad_norm": 0.09290259331464767, "learning_rate": 4.631568821413606e-06, "loss": 0.0018, "step": 52130 }, { "grad_norm": 0.08251085132360458, "learning_rate": 4.619992170347359e-06, "loss": 0.0017, "step": 52140 }, { "grad_norm": 0.09558875858783722, "learning_rate": 4.608429304529305e-06, "loss": 0.002, "step": 52150 }, { "grad_norm": 0.08549585938453674, "learning_rate": 4.596880227471928e-06, "loss": 0.0019, "step": 52160 }, { "grad_norm": 0.09379017353057861, "learning_rate": 4.585344942683539e-06, "loss": 0.0016, "step": 52170 }, { "grad_norm": 0.061152227222919464, "learning_rate": 4.573823453668241e-06, "loss": 0.0015, "step": 52180 }, { "grad_norm": 0.08461473137140274, "learning_rate": 4.562315763925995e-06, "loss": 0.0033, "step": 52190 }, { "grad_norm": 0.06666522473096848, "learning_rate": 4.5508218769524825e-06, "loss": 0.0027, "step": 52200 }, { "grad_norm": 0.0728725865483284, "learning_rate": 4.539341796239277e-06, "loss": 0.0016, "step": 52210 }, { "grad_norm": 0.08726110309362411, "learning_rate": 4.527875525273717e-06, "loss": 0.002, "step": 52220 }, { "grad_norm": 0.08086732774972916, "learning_rate": 4.51642306753895e-06, "loss": 0.0025, "step": 52230 }, { "grad_norm": 0.07932565361261368, "learning_rate": 4.5049844265139306e-06, "loss": 0.0025, "step": 52240 }, { "grad_norm": 0.07438625395298004, "learning_rate": 4.4935596056734144e-06, "loss": 0.0019, "step": 52250 }, { "grad_norm": 0.09787704050540924, "learning_rate": 4.482148608487957e-06, "loss": 0.0016, "step": 52260 }, { "grad_norm": 0.058351270854473114, "learning_rate": 4.4707514384239365e-06, "loss": 0.0026, "step": 52270 }, { "grad_norm": 0.07209163159132004, "learning_rate": 4.459368098943484e-06, "loss": 0.0029, "step": 52280 }, { "grad_norm": 0.05841663107275963, "learning_rate": 4.447998593504582e-06, "loss": 0.0017, "step": 52290 }, { "grad_norm": 0.09702879190444946, "learning_rate": 4.4366429255609744e-06, "loss": 0.0041, "step": 52300 }, { "grad_norm": 0.08758071064949036, "learning_rate": 4.425301098562212e-06, "loss": 0.002, "step": 52310 }, { "grad_norm": 0.1202865019440651, "learning_rate": 4.413973115953651e-06, "loss": 0.002, "step": 52320 }, { "grad_norm": 0.06758531928062439, "learning_rate": 4.402658981176416e-06, "loss": 0.0025, "step": 52330 }, { "grad_norm": 0.11205927282571793, "learning_rate": 4.391358697667475e-06, "loss": 0.0021, "step": 52340 }, { "grad_norm": 0.09588846564292908, "learning_rate": 4.3800722688595195e-06, "loss": 0.002, "step": 52350 }, { "grad_norm": 0.11360388249158859, "learning_rate": 4.368799698181097e-06, "loss": 0.003, "step": 52360 }, { "grad_norm": 0.07467757910490036, "learning_rate": 4.357540989056486e-06, "loss": 0.0019, "step": 52370 }, { "grad_norm": 0.06339841336011887, "learning_rate": 4.346296144905815e-06, "loss": 0.0021, "step": 52380 }, { "grad_norm": 0.12538567185401917, "learning_rate": 4.335065169144958e-06, "loss": 0.0016, "step": 52390 }, { "grad_norm": 0.06279294192790985, "learning_rate": 4.323848065185593e-06, "loss": 0.0016, "step": 52400 }, { "grad_norm": 0.0800292119383812, "learning_rate": 4.31264483643517e-06, "loss": 0.0051, "step": 52410 }, { "grad_norm": 0.10177330672740936, "learning_rate": 4.301455486296946e-06, "loss": 0.0032, "step": 52420 }, { "grad_norm": 0.1045570895075798, "learning_rate": 4.290280018169935e-06, "loss": 0.0028, "step": 52430 }, { "grad_norm": 0.06981983780860901, "learning_rate": 4.27911843544897e-06, "loss": 0.0025, "step": 52440 }, { "grad_norm": 0.07184161990880966, "learning_rate": 4.2679707415246294e-06, "loss": 0.0028, "step": 52450 }, { "grad_norm": 0.11678671091794968, "learning_rate": 4.256836939783299e-06, "loss": 0.003, "step": 52460 }, { "grad_norm": 0.12661907076835632, "learning_rate": 4.245717033607127e-06, "loss": 0.0034, "step": 52470 }, { "grad_norm": 0.10484634339809418, "learning_rate": 4.234611026374035e-06, "loss": 0.0026, "step": 52480 }, { "grad_norm": 0.09484592080116272, "learning_rate": 4.2235189214577694e-06, "loss": 0.0032, "step": 52490 }, { "grad_norm": 0.1650460809469223, "learning_rate": 4.212440722227779e-06, "loss": 0.0035, "step": 52500 }, { "grad_norm": 0.14628027379512787, "learning_rate": 4.201376432049364e-06, "loss": 0.0024, "step": 52510 }, { "grad_norm": 0.13583262264728546, "learning_rate": 4.1903260542835275e-06, "loss": 0.0019, "step": 52520 }, { "grad_norm": 0.13224215805530548, "learning_rate": 4.1792895922871114e-06, "loss": 0.0021, "step": 52530 }, { "grad_norm": 0.10863717645406723, "learning_rate": 4.168267049412694e-06, "loss": 0.0017, "step": 52540 }, { "grad_norm": 0.0944783166050911, "learning_rate": 4.157258429008626e-06, "loss": 0.0023, "step": 52550 }, { "grad_norm": 0.12254945933818817, "learning_rate": 4.146263734419043e-06, "loss": 0.0028, "step": 52560 }, { "grad_norm": 0.0702785775065422, "learning_rate": 4.135282968983839e-06, "loss": 0.0018, "step": 52570 }, { "grad_norm": 0.12036824226379395, "learning_rate": 4.124316136038675e-06, "loss": 0.002, "step": 52580 }, { "grad_norm": 0.06493907421827316, "learning_rate": 4.1133632389149965e-06, "loss": 0.0018, "step": 52590 }, { "grad_norm": 0.07807005941867828, "learning_rate": 4.102424280939998e-06, "loss": 0.0026, "step": 52600 }, { "grad_norm": 0.058739788830280304, "learning_rate": 4.091499265436649e-06, "loss": 0.0027, "step": 52610 }, { "grad_norm": 0.06150120496749878, "learning_rate": 4.080588195723684e-06, "loss": 0.0015, "step": 52620 }, { "grad_norm": 0.08427347987890244, "learning_rate": 4.069691075115578e-06, "loss": 0.0015, "step": 52630 }, { "grad_norm": 0.10936084389686584, "learning_rate": 4.0588079069226235e-06, "loss": 0.0014, "step": 52640 }, { "grad_norm": 0.03771369531750679, "learning_rate": 4.0479386944508034e-06, "loss": 0.0019, "step": 52650 }, { "grad_norm": 0.04970606416463852, "learning_rate": 4.037083441001932e-06, "loss": 0.0013, "step": 52660 }, { "grad_norm": 0.06829822808504105, "learning_rate": 4.026242149873516e-06, "loss": 0.0017, "step": 52670 }, { "grad_norm": 0.04925454035401344, "learning_rate": 4.015414824358871e-06, "loss": 0.0022, "step": 52680 }, { "grad_norm": 0.06825439631938934, "learning_rate": 4.004601467747054e-06, "loss": 0.0013, "step": 52690 }, { "grad_norm": 0.05642141029238701, "learning_rate": 3.993802083322873e-06, "loss": 0.0023, "step": 52700 }, { "grad_norm": 0.09069765359163284, "learning_rate": 3.9830166743668906e-06, "loss": 0.0017, "step": 52710 }, { "grad_norm": 0.08697748184204102, "learning_rate": 3.9722452441554425e-06, "loss": 0.0026, "step": 52720 }, { "grad_norm": 0.05683028697967529, "learning_rate": 3.961487795960584e-06, "loss": 0.0015, "step": 52730 }, { "grad_norm": 0.059424031525850296, "learning_rate": 3.95074433305016e-06, "loss": 0.0036, "step": 52740 }, { "grad_norm": 0.08129140734672546, "learning_rate": 3.940014858687752e-06, "loss": 0.0014, "step": 52750 }, { "grad_norm": 0.11415276676416397, "learning_rate": 3.929299376132689e-06, "loss": 0.0047, "step": 52760 }, { "grad_norm": 0.07907243072986603, "learning_rate": 3.918597888640047e-06, "loss": 0.0025, "step": 52770 }, { "grad_norm": 0.08863958716392517, "learning_rate": 3.907910399460657e-06, "loss": 0.0015, "step": 52780 }, { "grad_norm": 0.05264083668589592, "learning_rate": 3.8972369118410956e-06, "loss": 0.0013, "step": 52790 }, { "grad_norm": 0.1157156378030777, "learning_rate": 3.88657742902368e-06, "loss": 0.0027, "step": 52800 }, { "grad_norm": 0.16505104303359985, "learning_rate": 3.875931954246504e-06, "loss": 0.0025, "step": 52810 }, { "grad_norm": 0.06951690465211868, "learning_rate": 3.865300490743351e-06, "loss": 0.0023, "step": 52820 }, { "grad_norm": 0.0755576565861702, "learning_rate": 3.854683041743806e-06, "loss": 0.0032, "step": 52830 }, { "grad_norm": 0.11203145980834961, "learning_rate": 3.844079610473139e-06, "loss": 0.0017, "step": 52840 }, { "grad_norm": 0.056018002331256866, "learning_rate": 3.833490200152423e-06, "loss": 0.0019, "step": 52850 }, { "grad_norm": 0.12049967050552368, "learning_rate": 3.822914813998424e-06, "loss": 0.0018, "step": 52860 }, { "grad_norm": 0.08500275015830994, "learning_rate": 3.812353455223666e-06, "loss": 0.0027, "step": 52870 }, { "grad_norm": 0.07628992199897766, "learning_rate": 3.8018061270364225e-06, "loss": 0.0026, "step": 52880 }, { "grad_norm": 0.08573714643716812, "learning_rate": 3.7912728326406688e-06, "loss": 0.0028, "step": 52890 }, { "grad_norm": 0.061480812728405, "learning_rate": 3.7807535752361732e-06, "loss": 0.0016, "step": 52900 }, { "grad_norm": 0.09375834465026855, "learning_rate": 3.7702483580183855e-06, "loss": 0.0038, "step": 52910 }, { "grad_norm": 0.09858549386262894, "learning_rate": 3.759757184178525e-06, "loss": 0.0046, "step": 52920 }, { "grad_norm": 0.07815971225500107, "learning_rate": 3.7492800569035312e-06, "loss": 0.0029, "step": 52930 }, { "grad_norm": 0.10285542160272598, "learning_rate": 3.7388169793760754e-06, "loss": 0.0017, "step": 52940 }, { "grad_norm": 0.13717767596244812, "learning_rate": 3.728367954774553e-06, "loss": 0.0021, "step": 52950 }, { "grad_norm": 0.07499244064092636, "learning_rate": 3.7179329862731317e-06, "loss": 0.0013, "step": 52960 }, { "grad_norm": 0.08867670595645905, "learning_rate": 3.707512077041647e-06, "loss": 0.0021, "step": 52970 }, { "grad_norm": 0.09590643644332886, "learning_rate": 3.6971052302457288e-06, "loss": 0.0022, "step": 52980 }, { "grad_norm": 0.11790993809700012, "learning_rate": 3.6867124490466698e-06, "loss": 0.0019, "step": 52990 }, { "grad_norm": 0.06500764936208725, "learning_rate": 3.6763337366015393e-06, "loss": 0.0014, "step": 53000 }, { "grad_norm": 0.07242599129676819, "learning_rate": 3.665969096063121e-06, "loss": 0.0021, "step": 53010 }, { "grad_norm": 0.09093352407217026, "learning_rate": 3.6556185305799074e-06, "loss": 0.0022, "step": 53020 }, { "grad_norm": 0.08295711129903793, "learning_rate": 3.645282043296133e-06, "loss": 0.0018, "step": 53030 }, { "grad_norm": 0.059141773730516434, "learning_rate": 3.6349596373517427e-06, "loss": 0.0024, "step": 53040 }, { "grad_norm": 0.11438831686973572, "learning_rate": 3.6246513158824215e-06, "loss": 0.0025, "step": 53050 }, { "grad_norm": 0.07515961676836014, "learning_rate": 3.6143570820195593e-06, "loss": 0.0025, "step": 53060 }, { "grad_norm": 0.15036208927631378, "learning_rate": 3.6040769388902773e-06, "loss": 0.003, "step": 53070 }, { "grad_norm": 0.07366809993982315, "learning_rate": 3.593810889617405e-06, "loss": 0.0016, "step": 53080 }, { "grad_norm": 0.1306273639202118, "learning_rate": 3.5835589373194978e-06, "loss": 0.0057, "step": 53090 }, { "grad_norm": 0.11764480918645859, "learning_rate": 3.5733210851108257e-06, "loss": 0.0019, "step": 53100 }, { "grad_norm": 0.08813925832509995, "learning_rate": 3.5630973361014008e-06, "loss": 0.0023, "step": 53110 }, { "grad_norm": 0.07190808653831482, "learning_rate": 3.552887693396889e-06, "loss": 0.0019, "step": 53120 }, { "grad_norm": 0.09255789965391159, "learning_rate": 3.542692160098754e-06, "loss": 0.0022, "step": 53130 }, { "grad_norm": 0.13056059181690216, "learning_rate": 3.5325107393040846e-06, "loss": 0.0016, "step": 53140 }, { "grad_norm": 0.08913110196590424, "learning_rate": 3.522343434105757e-06, "loss": 0.0016, "step": 53150 }, { "grad_norm": 0.056747760623693466, "learning_rate": 3.512190247592323e-06, "loss": 0.0014, "step": 53160 }, { "grad_norm": 0.08582012355327606, "learning_rate": 3.502051182848054e-06, "loss": 0.0025, "step": 53170 }, { "grad_norm": 0.09070461243391037, "learning_rate": 3.4919262429529308e-06, "loss": 0.0019, "step": 53180 }, { "grad_norm": 0.07242628186941147, "learning_rate": 3.4818154309826325e-06, "loss": 0.0025, "step": 53190 }, { "grad_norm": 0.0876319631934166, "learning_rate": 3.4717187500085734e-06, "loss": 0.0019, "step": 53200 }, { "grad_norm": 0.07199565321207047, "learning_rate": 3.46163620309784e-06, "loss": 0.002, "step": 53210 }, { "grad_norm": 0.055618077516555786, "learning_rate": 3.4515677933132595e-06, "loss": 0.0016, "step": 53220 }, { "grad_norm": 0.05885442718863487, "learning_rate": 3.4415135237133466e-06, "loss": 0.0014, "step": 53230 }, { "grad_norm": 0.0822029858827591, "learning_rate": 3.4314733973523196e-06, "loss": 0.0019, "step": 53240 }, { "grad_norm": 0.05147421732544899, "learning_rate": 3.4214474172800993e-06, "loss": 0.0014, "step": 53250 }, { "grad_norm": 0.07879266887903214, "learning_rate": 3.411435586542322e-06, "loss": 0.0016, "step": 53260 }, { "grad_norm": 0.061414893716573715, "learning_rate": 3.4014379081802995e-06, "loss": 0.0018, "step": 53270 }, { "grad_norm": 0.07778201997280121, "learning_rate": 3.391454385231102e-06, "loss": 0.0019, "step": 53280 }, { "grad_norm": 0.07013441622257233, "learning_rate": 3.3814850207274095e-06, "loss": 0.0021, "step": 53290 }, { "grad_norm": 0.10824336856603622, "learning_rate": 3.371529817697694e-06, "loss": 0.0027, "step": 53300 }, { "grad_norm": 0.09860820323228836, "learning_rate": 3.3615887791660585e-06, "loss": 0.0024, "step": 53310 }, { "grad_norm": 0.08826929330825806, "learning_rate": 3.3516619081523426e-06, "loss": 0.0017, "step": 53320 }, { "grad_norm": 0.06803655624389648, "learning_rate": 3.3417492076720567e-06, "loss": 0.0035, "step": 53330 }, { "grad_norm": 0.05405272915959358, "learning_rate": 3.3318506807364147e-06, "loss": 0.0028, "step": 53340 }, { "grad_norm": 0.09187202900648117, "learning_rate": 3.3219663303523553e-06, "loss": 0.0019, "step": 53350 }, { "grad_norm": 0.07730714231729507, "learning_rate": 3.3120961595224374e-06, "loss": 0.002, "step": 53360 }, { "grad_norm": 0.10263852030038834, "learning_rate": 3.302240171245002e-06, "loss": 0.0029, "step": 53370 }, { "grad_norm": 0.08190537244081497, "learning_rate": 3.29239836851401e-06, "loss": 0.0026, "step": 53380 }, { "grad_norm": 0.0946035087108612, "learning_rate": 3.2825707543191588e-06, "loss": 0.0023, "step": 53390 }, { "grad_norm": 0.07189280539751053, "learning_rate": 3.272757331645804e-06, "loss": 0.002, "step": 53400 }, { "grad_norm": 0.05237928405404091, "learning_rate": 3.2629581034750166e-06, "loss": 0.002, "step": 53410 }, { "grad_norm": 0.07859513908624649, "learning_rate": 3.2531730727835218e-06, "loss": 0.0025, "step": 53420 }, { "grad_norm": 0.1653396189212799, "learning_rate": 3.2434022425437914e-06, "loss": 0.0027, "step": 53430 }, { "grad_norm": 0.07641898840665817, "learning_rate": 3.233645615723907e-06, "loss": 0.0044, "step": 53440 }, { "grad_norm": 0.0855804979801178, "learning_rate": 3.2239031952876918e-06, "loss": 0.0023, "step": 53450 }, { "grad_norm": 0.12052163481712341, "learning_rate": 3.21417498419464e-06, "loss": 0.002, "step": 53460 }, { "grad_norm": 0.11012811958789825, "learning_rate": 3.204460985399921e-06, "loss": 0.0027, "step": 53470 }, { "grad_norm": 0.06710916757583618, "learning_rate": 3.1947612018543903e-06, "loss": 0.0022, "step": 53480 }, { "grad_norm": 0.14122194051742554, "learning_rate": 3.1850756365045753e-06, "loss": 0.0017, "step": 53490 }, { "grad_norm": 0.07987992465496063, "learning_rate": 3.175404292292722e-06, "loss": 0.0023, "step": 53500 }, { "grad_norm": 0.06671053916215897, "learning_rate": 3.1657471721566965e-06, "loss": 0.0019, "step": 53510 }, { "grad_norm": 0.10298892110586166, "learning_rate": 3.1561042790300977e-06, "loss": 0.0017, "step": 53520 }, { "grad_norm": 0.06646808236837387, "learning_rate": 3.1464756158421816e-06, "loss": 0.0022, "step": 53530 }, { "grad_norm": 0.09134387969970703, "learning_rate": 3.136861185517875e-06, "loss": 0.0019, "step": 53540 }, { "grad_norm": 0.07304519414901733, "learning_rate": 3.127260990977798e-06, "loss": 0.0035, "step": 53550 }, { "grad_norm": 0.08984380960464478, "learning_rate": 3.1176750351382235e-06, "loss": 0.0034, "step": 53560 }, { "grad_norm": 0.06049609184265137, "learning_rate": 3.1081033209111153e-06, "loss": 0.0012, "step": 53570 }, { "grad_norm": 0.1328003853559494, "learning_rate": 3.0985458512041155e-06, "loss": 0.0026, "step": 53580 }, { "grad_norm": 0.06519491970539093, "learning_rate": 3.089002628920512e-06, "loss": 0.0022, "step": 53590 }, { "grad_norm": 0.08595166355371475, "learning_rate": 3.079473656959303e-06, "loss": 0.0017, "step": 53600 }, { "grad_norm": 0.2910189628601074, "learning_rate": 3.0699589382151393e-06, "loss": 0.0016, "step": 53610 }, { "grad_norm": 0.05026281625032425, "learning_rate": 3.060458475578326e-06, "loss": 0.0023, "step": 53620 }, { "grad_norm": 0.08304903656244278, "learning_rate": 3.0509722719348656e-06, "loss": 0.0024, "step": 53630 }, { "grad_norm": 0.05646422505378723, "learning_rate": 3.041500330166408e-06, "loss": 0.0014, "step": 53640 }, { "grad_norm": 0.07882935553789139, "learning_rate": 3.032042653150291e-06, "loss": 0.0035, "step": 53650 }, { "grad_norm": 0.17481666803359985, "learning_rate": 3.0225992437594887e-06, "loss": 0.0037, "step": 53660 }, { "grad_norm": 0.11247922480106354, "learning_rate": 3.0131701048626783e-06, "loss": 0.0026, "step": 53670 }, { "grad_norm": 0.06159326434135437, "learning_rate": 3.003755239324163e-06, "loss": 0.0022, "step": 53680 }, { "grad_norm": 0.07959142327308655, "learning_rate": 2.9943546500039553e-06, "loss": 0.0014, "step": 53690 }, { "grad_norm": 0.06081371009349823, "learning_rate": 2.9849683397576877e-06, "loss": 0.0018, "step": 53700 }, { "grad_norm": 0.07618306577205658, "learning_rate": 2.975596311436679e-06, "loss": 0.0016, "step": 53710 }, { "grad_norm": 0.10597028583288193, "learning_rate": 2.966238567887902e-06, "loss": 0.0028, "step": 53720 }, { "grad_norm": 0.06285658478736877, "learning_rate": 2.9568951119539943e-06, "loss": 0.0033, "step": 53730 }, { "grad_norm": 0.09460027515888214, "learning_rate": 2.947565946473241e-06, "loss": 0.0034, "step": 53740 }, { "grad_norm": 0.06624805927276611, "learning_rate": 2.9382510742796188e-06, "loss": 0.0016, "step": 53750 }, { "grad_norm": 0.09915328025817871, "learning_rate": 2.9289504982027204e-06, "loss": 0.0018, "step": 53760 }, { "grad_norm": 0.06237504631280899, "learning_rate": 2.9196642210678248e-06, "loss": 0.0016, "step": 53770 }, { "grad_norm": 0.07811791449785233, "learning_rate": 2.910392245695853e-06, "loss": 0.0026, "step": 53780 }, { "grad_norm": 0.06234855577349663, "learning_rate": 2.9011345749033804e-06, "loss": 0.0015, "step": 53790 }, { "grad_norm": 0.06481218338012695, "learning_rate": 2.8918912115026677e-06, "loss": 0.0019, "step": 53800 }, { "grad_norm": 0.08792757987976074, "learning_rate": 2.8826621583015743e-06, "loss": 0.0023, "step": 53810 }, { "grad_norm": 0.07245846837759018, "learning_rate": 2.8734474181036643e-06, "loss": 0.0024, "step": 53820 }, { "grad_norm": 0.07888416945934296, "learning_rate": 2.864246993708114e-06, "loss": 0.0019, "step": 53830 }, { "grad_norm": 0.1260496973991394, "learning_rate": 2.8550608879097884e-06, "loss": 0.0017, "step": 53840 }, { "grad_norm": 0.06381430476903915, "learning_rate": 2.845889103499172e-06, "loss": 0.0026, "step": 53850 }, { "grad_norm": 0.06190713122487068, "learning_rate": 2.8367316432624138e-06, "loss": 0.0015, "step": 53860 }, { "grad_norm": 0.09318691492080688, "learning_rate": 2.8275885099813105e-06, "loss": 0.0018, "step": 53870 }, { "grad_norm": 0.08710166066884995, "learning_rate": 2.8184597064332963e-06, "loss": 0.0026, "step": 53880 }, { "grad_norm": 0.05651054531335831, "learning_rate": 2.809345235391464e-06, "loss": 0.0017, "step": 53890 }, { "grad_norm": 0.08247019350528717, "learning_rate": 2.80024509962456e-06, "loss": 0.0035, "step": 53900 }, { "grad_norm": 0.0925832986831665, "learning_rate": 2.7911593018969563e-06, "loss": 0.0015, "step": 53910 }, { "grad_norm": 0.07202859967947006, "learning_rate": 2.7820878449686838e-06, "loss": 0.0027, "step": 53920 }, { "grad_norm": 0.06817338615655899, "learning_rate": 2.7730307315953995e-06, "loss": 0.0012, "step": 53930 }, { "grad_norm": 0.0424310527741909, "learning_rate": 2.763987964528425e-06, "loss": 0.0015, "step": 53940 }, { "grad_norm": 0.08619671314954758, "learning_rate": 2.754959546514718e-06, "loss": 0.0021, "step": 53950 }, { "grad_norm": 0.10410776734352112, "learning_rate": 2.7459454802968576e-06, "loss": 0.0022, "step": 53960 }, { "grad_norm": 0.06249423325061798, "learning_rate": 2.7369457686131028e-06, "loss": 0.0017, "step": 53970 }, { "grad_norm": 0.1610705703496933, "learning_rate": 2.7279604141973004e-06, "loss": 0.0025, "step": 53980 }, { "grad_norm": 0.08082373440265656, "learning_rate": 2.7189894197789946e-06, "loss": 0.0018, "step": 53990 }, { "grad_norm": 0.10258739441633224, "learning_rate": 2.7100327880833055e-06, "loss": 0.0015, "step": 54000 }, { "grad_norm": 0.0672992393374443, "learning_rate": 2.70109052183104e-06, "loss": 0.0019, "step": 54010 }, { "grad_norm": 0.07101277261972427, "learning_rate": 2.69216262373862e-06, "loss": 0.002, "step": 54020 }, { "grad_norm": 0.06556190550327301, "learning_rate": 2.6832490965181036e-06, "loss": 0.0018, "step": 54030 }, { "grad_norm": 0.09636989235877991, "learning_rate": 2.6743499428771857e-06, "loss": 0.0013, "step": 54040 }, { "grad_norm": 0.05531807616353035, "learning_rate": 2.6654651655191875e-06, "loss": 0.003, "step": 54050 }, { "grad_norm": 0.12198468297719955, "learning_rate": 2.6565947671430836e-06, "loss": 0.0015, "step": 54060 }, { "grad_norm": 0.0538400262594223, "learning_rate": 2.647738750443457e-06, "loss": 0.0015, "step": 54070 }, { "grad_norm": 0.10752842575311661, "learning_rate": 2.6388971181105393e-06, "loss": 0.0023, "step": 54080 }, { "grad_norm": 0.0972432792186737, "learning_rate": 2.630069872830171e-06, "loss": 0.0021, "step": 54090 }, { "grad_norm": 0.0944426953792572, "learning_rate": 2.6212570172838514e-06, "loss": 0.0018, "step": 54100 }, { "grad_norm": 0.08618577569723129, "learning_rate": 2.6124585541486778e-06, "loss": 0.0028, "step": 54110 }, { "grad_norm": 0.055841874331235886, "learning_rate": 2.6036744860974127e-06, "loss": 0.0015, "step": 54120 }, { "grad_norm": 0.1259326934814453, "learning_rate": 2.594904815798399e-06, "loss": 0.0019, "step": 54130 }, { "grad_norm": 0.09413226693868637, "learning_rate": 2.586149545915656e-06, "loss": 0.002, "step": 54140 }, { "grad_norm": 0.11932085454463959, "learning_rate": 2.577408679108778e-06, "loss": 0.0038, "step": 54150 }, { "grad_norm": 0.1117199957370758, "learning_rate": 2.5686822180330306e-06, "loss": 0.0026, "step": 54160 }, { "grad_norm": 0.10979679971933365, "learning_rate": 2.5599701653392703e-06, "loss": 0.0019, "step": 54170 }, { "grad_norm": 0.07426301389932632, "learning_rate": 2.551272523673992e-06, "loss": 0.0022, "step": 54180 }, { "grad_norm": 0.0741279125213623, "learning_rate": 2.542589295679315e-06, "loss": 0.0014, "step": 54190 }, { "grad_norm": 0.050029970705509186, "learning_rate": 2.5339204839929575e-06, "loss": 0.0014, "step": 54200 }, { "grad_norm": 0.0576753169298172, "learning_rate": 2.525266091248296e-06, "loss": 0.0027, "step": 54210 }, { "grad_norm": 0.06492278724908829, "learning_rate": 2.5166261200743e-06, "loss": 0.0016, "step": 54220 }, { "grad_norm": 0.096565842628479, "learning_rate": 2.5080005730955646e-06, "loss": 0.0018, "step": 54230 }, { "grad_norm": 0.10656166076660156, "learning_rate": 2.499389452932299e-06, "loss": 0.0019, "step": 54240 }, { "grad_norm": 0.0756705030798912, "learning_rate": 2.4907927622003336e-06, "loss": 0.0016, "step": 54250 }, { "grad_norm": 0.08639020472764969, "learning_rate": 2.4822105035111177e-06, "loss": 0.0021, "step": 54260 }, { "grad_norm": 0.07466516643762589, "learning_rate": 2.4736426794717273e-06, "loss": 0.002, "step": 54270 }, { "grad_norm": 0.10140484571456909, "learning_rate": 2.4650892926848135e-06, "loss": 0.0023, "step": 54280 }, { "grad_norm": 0.09284615516662598, "learning_rate": 2.456550345748704e-06, "loss": 0.0024, "step": 54290 }, { "grad_norm": 0.09955625236034393, "learning_rate": 2.4480258412572733e-06, "loss": 0.0027, "step": 54300 }, { "grad_norm": 0.0661083534359932, "learning_rate": 2.4395157818000612e-06, "loss": 0.0034, "step": 54310 }, { "grad_norm": 0.09930426627397537, "learning_rate": 2.431020169962189e-06, "loss": 0.0027, "step": 54320 }, { "grad_norm": 0.04940907657146454, "learning_rate": 2.422539008324409e-06, "loss": 0.0014, "step": 54330 }, { "grad_norm": 0.08067351579666138, "learning_rate": 2.414072299463066e-06, "loss": 0.0021, "step": 54340 }, { "grad_norm": 0.13216720521450043, "learning_rate": 2.4056200459501186e-06, "loss": 0.0032, "step": 54350 }, { "grad_norm": 0.20564766228199005, "learning_rate": 2.397182250353147e-06, "loss": 0.0026, "step": 54360 }, { "grad_norm": 0.0816926583647728, "learning_rate": 2.388758915235334e-06, "loss": 0.0016, "step": 54370 }, { "grad_norm": 0.0625016987323761, "learning_rate": 2.380350043155455e-06, "loss": 0.0018, "step": 54380 }, { "grad_norm": 0.057611215859651566, "learning_rate": 2.371955636667911e-06, "loss": 0.0018, "step": 54390 }, { "grad_norm": 0.059634532779455185, "learning_rate": 2.3635756983227008e-06, "loss": 0.0024, "step": 54400 }, { "grad_norm": 0.04650024697184563, "learning_rate": 2.3552102306654278e-06, "loss": 0.0016, "step": 54410 }, { "grad_norm": 0.07028137892484665, "learning_rate": 2.346859236237292e-06, "loss": 0.0019, "step": 54420 }, { "grad_norm": 0.09724060446023941, "learning_rate": 2.3385227175751145e-06, "loss": 0.0025, "step": 54430 }, { "grad_norm": 0.09171615540981293, "learning_rate": 2.330200677211314e-06, "loss": 0.0022, "step": 54440 }, { "grad_norm": 0.09668052196502686, "learning_rate": 2.3218931176738847e-06, "loss": 0.0017, "step": 54450 }, { "grad_norm": 0.10153709352016449, "learning_rate": 2.313600041486469e-06, "loss": 0.0016, "step": 54460 }, { "grad_norm": 0.07403513789176941, "learning_rate": 2.3053214511682743e-06, "loss": 0.0015, "step": 54470 }, { "grad_norm": 0.07765621691942215, "learning_rate": 2.2970573492341163e-06, "loss": 0.0026, "step": 54480 }, { "grad_norm": 0.1168711856007576, "learning_rate": 2.288807738194415e-06, "loss": 0.0024, "step": 54490 }, { "grad_norm": 0.18862630426883698, "learning_rate": 2.2805726205551768e-06, "loss": 0.0033, "step": 54500 }, { "grad_norm": 0.06827455013990402, "learning_rate": 2.272351998818023e-06, "loss": 0.0025, "step": 54510 }, { "grad_norm": 0.05661782622337341, "learning_rate": 2.2641458754801505e-06, "loss": 0.0018, "step": 54520 }, { "grad_norm": 0.07161679863929749, "learning_rate": 2.2559542530343756e-06, "loss": 0.0018, "step": 54530 }, { "grad_norm": 0.04397375509142876, "learning_rate": 2.247777133969087e-06, "loss": 0.0017, "step": 54540 }, { "grad_norm": 0.058602336794137955, "learning_rate": 2.2396145207682795e-06, "loss": 0.0019, "step": 54550 }, { "grad_norm": 0.06978696584701538, "learning_rate": 2.231466415911543e-06, "loss": 0.0014, "step": 54560 }, { "grad_norm": 0.057848718017339706, "learning_rate": 2.2233328218740524e-06, "loss": 0.0016, "step": 54570 }, { "grad_norm": 0.05732574686408043, "learning_rate": 2.215213741126576e-06, "loss": 0.0022, "step": 54580 }, { "grad_norm": 0.09724109619855881, "learning_rate": 2.2071091761354912e-06, "loss": 0.0046, "step": 54590 }, { "grad_norm": 0.09100291877985, "learning_rate": 2.1990191293627337e-06, "loss": 0.0017, "step": 54600 }, { "grad_norm": 0.0882551372051239, "learning_rate": 2.1909436032658548e-06, "loss": 0.0018, "step": 54610 }, { "grad_norm": 0.1002589613199234, "learning_rate": 2.1828826002979806e-06, "loss": 0.003, "step": 54620 }, { "grad_norm": 0.08412455767393112, "learning_rate": 2.1748361229078362e-06, "loss": 0.0017, "step": 54630 }, { "grad_norm": 0.1040043756365776, "learning_rate": 2.1668041735397327e-06, "loss": 0.0022, "step": 54640 }, { "grad_norm": 0.07975007593631744, "learning_rate": 2.1587867546335514e-06, "loss": 0.0014, "step": 54650 }, { "grad_norm": 0.03711052983999252, "learning_rate": 2.1507838686247894e-06, "loss": 0.0029, "step": 54660 }, { "grad_norm": 0.06248718127608299, "learning_rate": 2.1427955179444957e-06, "loss": 0.0016, "step": 54670 }, { "grad_norm": 0.04533735662698746, "learning_rate": 2.13482170501933e-06, "loss": 0.002, "step": 54680 }, { "grad_norm": 0.05845937877893448, "learning_rate": 2.1268624322715202e-06, "loss": 0.0016, "step": 54690 }, { "grad_norm": 0.07439695298671722, "learning_rate": 2.118917702118889e-06, "loss": 0.0022, "step": 54700 }, { "grad_norm": 0.0428188256919384, "learning_rate": 2.1109875169748327e-06, "loss": 0.0029, "step": 54710 }, { "grad_norm": 0.05631381645798683, "learning_rate": 2.103071879248336e-06, "loss": 0.0027, "step": 54720 }, { "grad_norm": 0.07006761431694031, "learning_rate": 2.0951707913439478e-06, "loss": 0.0035, "step": 54730 }, { "grad_norm": 0.05126510560512543, "learning_rate": 2.0872842556618255e-06, "loss": 0.0018, "step": 54740 }, { "grad_norm": 0.04206765070557594, "learning_rate": 2.079412274597686e-06, "loss": 0.002, "step": 54750 }, { "grad_norm": 0.06521360576152802, "learning_rate": 2.0715548505428284e-06, "loss": 0.0018, "step": 54760 }, { "grad_norm": 0.09802603721618652, "learning_rate": 2.0637119858841258e-06, "loss": 0.0014, "step": 54770 }, { "grad_norm": 0.04573657363653183, "learning_rate": 2.055883683004034e-06, "loss": 0.0019, "step": 54780 }, { "grad_norm": 0.0576496422290802, "learning_rate": 2.0480699442806006e-06, "loss": 0.0025, "step": 54790 }, { "grad_norm": 0.0925966128706932, "learning_rate": 2.0402707720874105e-06, "loss": 0.0017, "step": 54800 }, { "grad_norm": 0.06119620054960251, "learning_rate": 2.032486168793668e-06, "loss": 0.0049, "step": 54810 }, { "grad_norm": 0.11019590497016907, "learning_rate": 2.024716136764104e-06, "loss": 0.0026, "step": 54820 }, { "grad_norm": 0.08656821399927139, "learning_rate": 2.0169606783590757e-06, "loss": 0.0023, "step": 54830 }, { "grad_norm": 0.10480524599552155, "learning_rate": 2.0092197959344638e-06, "loss": 0.0017, "step": 54840 }, { "grad_norm": 0.1236996054649353, "learning_rate": 2.0014934918417606e-06, "loss": 0.0026, "step": 54850 }, { "grad_norm": 0.08634085208177567, "learning_rate": 1.99378176842801e-06, "loss": 0.0018, "step": 54860 }, { "grad_norm": 0.08186342567205429, "learning_rate": 1.9860846280358224e-06, "loss": 0.0016, "step": 54870 }, { "grad_norm": 0.054384924471378326, "learning_rate": 1.978402073003394e-06, "loss": 0.0016, "step": 54880 }, { "grad_norm": 0.06738172471523285, "learning_rate": 1.9707341056644736e-06, "loss": 0.0019, "step": 54890 }, { "grad_norm": 0.17576219141483307, "learning_rate": 1.963080728348399e-06, "loss": 0.0016, "step": 54900 }, { "grad_norm": 0.10620465129613876, "learning_rate": 1.955441943380054e-06, "loss": 0.0043, "step": 54910 }, { "grad_norm": 0.05007859319448471, "learning_rate": 1.947817753079906e-06, "loss": 0.0028, "step": 54920 }, { "grad_norm": 0.04470022767782211, "learning_rate": 1.9402081597639785e-06, "loss": 0.0017, "step": 54930 }, { "grad_norm": 0.0445830412209034, "learning_rate": 1.9326131657438683e-06, "loss": 0.0016, "step": 54940 }, { "grad_norm": 0.0491841658949852, "learning_rate": 1.925032773326724e-06, "loss": 0.0016, "step": 54950 }, { "grad_norm": 0.05340069159865379, "learning_rate": 1.9174669848152916e-06, "loss": 0.0021, "step": 54960 }, { "grad_norm": 0.060233116149902344, "learning_rate": 1.9099158025078336e-06, "loss": 0.002, "step": 54970 }, { "grad_norm": 0.08575578778982162, "learning_rate": 1.90237922869822e-06, "loss": 0.0016, "step": 54980 }, { "grad_norm": 0.04427783936262131, "learning_rate": 1.8948572656758367e-06, "loss": 0.0016, "step": 54990 }, { "grad_norm": 0.06264875829219818, "learning_rate": 1.8873499157256834e-06, "loss": 0.0017, "step": 55000 }, { "grad_norm": 0.04102405160665512, "learning_rate": 1.879857181128286e-06, "loss": 0.0033, "step": 55010 }, { "grad_norm": 0.08111399412155151, "learning_rate": 1.8723790641597349e-06, "loss": 0.0028, "step": 55020 }, { "grad_norm": 0.06301755458116531, "learning_rate": 1.8649155670916906e-06, "loss": 0.0017, "step": 55030 }, { "grad_norm": 0.09790223091840744, "learning_rate": 1.8574666921913565e-06, "loss": 0.0021, "step": 55040 }, { "grad_norm": 0.16046346724033356, "learning_rate": 1.8500324417215166e-06, "loss": 0.0021, "step": 55050 }, { "grad_norm": 0.05608521029353142, "learning_rate": 1.8426128179404977e-06, "loss": 0.0022, "step": 55060 }, { "grad_norm": 0.10060373693704605, "learning_rate": 1.8352078231021807e-06, "loss": 0.0022, "step": 55070 }, { "grad_norm": 0.06295880675315857, "learning_rate": 1.8278174594560049e-06, "loss": 0.002, "step": 55080 }, { "grad_norm": 0.08510169386863708, "learning_rate": 1.82044172924698e-06, "loss": 0.0017, "step": 55090 }, { "grad_norm": 0.10186333954334259, "learning_rate": 1.813080634715636e-06, "loss": 0.0026, "step": 55100 }, { "grad_norm": 0.059168219566345215, "learning_rate": 1.8057341780981119e-06, "loss": 0.0027, "step": 55110 }, { "grad_norm": 0.11927201598882675, "learning_rate": 1.7984023616260338e-06, "loss": 0.0019, "step": 55120 }, { "grad_norm": 0.05390871688723564, "learning_rate": 1.7910851875266421e-06, "loss": 0.0033, "step": 55130 }, { "grad_norm": 0.06839112937450409, "learning_rate": 1.7837826580226757e-06, "loss": 0.0018, "step": 55140 }, { "grad_norm": 0.06399611383676529, "learning_rate": 1.7764947753324656e-06, "loss": 0.0018, "step": 55150 }, { "grad_norm": 0.06992144137620926, "learning_rate": 1.7692215416698799e-06, "loss": 0.0022, "step": 55160 }, { "grad_norm": 0.11019055545330048, "learning_rate": 1.7619629592443233e-06, "loss": 0.0018, "step": 55170 }, { "grad_norm": 0.09786662459373474, "learning_rate": 1.7547190302607709e-06, "loss": 0.003, "step": 55180 }, { "grad_norm": 0.06553267687559128, "learning_rate": 1.747489756919729e-06, "loss": 0.0024, "step": 55190 }, { "grad_norm": 0.06700772047042847, "learning_rate": 1.7402751414172802e-06, "loss": 0.003, "step": 55200 }, { "grad_norm": 0.30077818036079407, "learning_rate": 1.7330751859450044e-06, "loss": 0.0017, "step": 55210 }, { "grad_norm": 0.062278904020786285, "learning_rate": 1.7258898926900801e-06, "loss": 0.0017, "step": 55220 }, { "grad_norm": 0.15523922443389893, "learning_rate": 1.7187192638352002e-06, "loss": 0.0016, "step": 55230 }, { "grad_norm": 0.04551497474312782, "learning_rate": 1.7115633015586163e-06, "loss": 0.0017, "step": 55240 }, { "grad_norm": 0.05989941954612732, "learning_rate": 1.7044220080341178e-06, "loss": 0.0021, "step": 55250 }, { "grad_norm": 0.06417721509933472, "learning_rate": 1.6972953854310414e-06, "loss": 0.0021, "step": 55260 }, { "grad_norm": 0.038663625717163086, "learning_rate": 1.690183435914261e-06, "loss": 0.0013, "step": 55270 }, { "grad_norm": 0.05625640228390694, "learning_rate": 1.6830861616442206e-06, "loss": 0.0019, "step": 55280 }, { "grad_norm": 0.059589698910713196, "learning_rate": 1.6760035647768568e-06, "loss": 0.0027, "step": 55290 }, { "grad_norm": 0.05900309979915619, "learning_rate": 1.6689356474636985e-06, "loss": 0.002, "step": 55300 }, { "grad_norm": 0.10812920331954956, "learning_rate": 1.6618824118517784e-06, "loss": 0.0019, "step": 55310 }, { "grad_norm": 0.07384541630744934, "learning_rate": 1.654843860083688e-06, "loss": 0.0019, "step": 55320 }, { "grad_norm": 0.055850252509117126, "learning_rate": 1.647819994297556e-06, "loss": 0.0013, "step": 55330 }, { "grad_norm": 0.06195919215679169, "learning_rate": 1.6408108166270363e-06, "loss": 0.0016, "step": 55340 }, { "grad_norm": 0.05866905301809311, "learning_rate": 1.6338163292013486e-06, "loss": 0.0033, "step": 55350 }, { "grad_norm": 0.06020285189151764, "learning_rate": 1.6268365341452208e-06, "loss": 0.0013, "step": 55360 }, { "grad_norm": 0.10561487078666687, "learning_rate": 1.6198714335789345e-06, "loss": 0.0019, "step": 55370 }, { "grad_norm": 0.06198647990822792, "learning_rate": 1.612921029618303e-06, "loss": 0.0015, "step": 55380 }, { "grad_norm": 0.056997813284397125, "learning_rate": 1.6059853243746815e-06, "loss": 0.0018, "step": 55390 }, { "grad_norm": 0.04382305592298508, "learning_rate": 1.5990643199549404e-06, "loss": 0.0021, "step": 55400 }, { "grad_norm": 0.09489727765321732, "learning_rate": 1.5921580184615147e-06, "loss": 0.0023, "step": 55410 }, { "grad_norm": 0.03671587258577347, "learning_rate": 1.5852664219923374e-06, "loss": 0.0014, "step": 55420 }, { "grad_norm": 0.06806158274412155, "learning_rate": 1.5783895326409172e-06, "loss": 0.002, "step": 55430 }, { "grad_norm": 0.04714014753699303, "learning_rate": 1.5715273524962438e-06, "loss": 0.0019, "step": 55440 }, { "grad_norm": 0.10765212029218674, "learning_rate": 1.564679883642889e-06, "loss": 0.0017, "step": 55450 }, { "grad_norm": 0.054942239075899124, "learning_rate": 1.5578471281609276e-06, "loss": 0.0016, "step": 55460 }, { "grad_norm": 0.06616050750017166, "learning_rate": 1.551029088125966e-06, "loss": 0.0017, "step": 55470 }, { "grad_norm": 0.06391224265098572, "learning_rate": 1.544225765609142e-06, "loss": 0.0014, "step": 55480 }, { "grad_norm": 0.052012715488672256, "learning_rate": 1.53743716267713e-06, "loss": 0.0017, "step": 55490 }, { "grad_norm": 0.06327026337385178, "learning_rate": 1.5306632813921361e-06, "loss": 0.0032, "step": 55500 }, { "grad_norm": 0.05464669689536095, "learning_rate": 1.52390412381187e-06, "loss": 0.0015, "step": 55510 }, { "grad_norm": 0.1308482438325882, "learning_rate": 1.5171596919895948e-06, "loss": 0.0024, "step": 55520 }, { "grad_norm": 0.07215800136327744, "learning_rate": 1.5104299879740936e-06, "loss": 0.0012, "step": 55530 }, { "grad_norm": 0.07016236335039139, "learning_rate": 1.5037150138096701e-06, "loss": 0.0029, "step": 55540 }, { "grad_norm": 0.03346060588955879, "learning_rate": 1.4970147715361538e-06, "loss": 0.0014, "step": 55550 }, { "grad_norm": 0.07614639401435852, "learning_rate": 1.4903292631889054e-06, "loss": 0.0024, "step": 55560 }, { "grad_norm": 0.03969057649374008, "learning_rate": 1.483658490798795e-06, "loss": 0.0014, "step": 55570 }, { "grad_norm": 0.04972626641392708, "learning_rate": 1.4770024563922457e-06, "loss": 0.0018, "step": 55580 }, { "grad_norm": 0.09137051552534103, "learning_rate": 1.4703611619911628e-06, "loss": 0.002, "step": 55590 }, { "grad_norm": 0.13551737368106842, "learning_rate": 1.4637346096130155e-06, "loss": 0.0025, "step": 55600 }, { "grad_norm": 0.04698452353477478, "learning_rate": 1.4571228012707662e-06, "loss": 0.0017, "step": 55610 }, { "grad_norm": 0.05467325448989868, "learning_rate": 1.4505257389729132e-06, "loss": 0.0014, "step": 55620 }, { "grad_norm": 0.04777725785970688, "learning_rate": 1.4439434247234596e-06, "loss": 0.0017, "step": 55630 }, { "grad_norm": 0.06600505113601685, "learning_rate": 1.4373758605219445e-06, "loss": 0.0022, "step": 55640 }, { "grad_norm": 0.07806967198848724, "learning_rate": 1.4308230483634333e-06, "loss": 0.0023, "step": 55650 }, { "grad_norm": 0.06175815314054489, "learning_rate": 1.4242849902384724e-06, "loss": 0.0012, "step": 55660 }, { "grad_norm": 0.07971997559070587, "learning_rate": 1.4177616881331734e-06, "loss": 0.002, "step": 55670 }, { "grad_norm": 0.09943245351314545, "learning_rate": 1.4112531440291233e-06, "loss": 0.0023, "step": 55680 }, { "grad_norm": 0.05424543842673302, "learning_rate": 1.4047593599034624e-06, "loss": 0.0036, "step": 55690 }, { "grad_norm": 0.05612809211015701, "learning_rate": 1.3982803377288246e-06, "loss": 0.0024, "step": 55700 }, { "grad_norm": 0.037639278918504715, "learning_rate": 1.3918160794733681e-06, "loss": 0.0019, "step": 55710 }, { "grad_norm": 0.10856914520263672, "learning_rate": 1.3853665871007615e-06, "loss": 0.0019, "step": 55720 }, { "grad_norm": 0.053919292986392975, "learning_rate": 1.378931862570193e-06, "loss": 0.0014, "step": 55730 }, { "grad_norm": 0.04358123242855072, "learning_rate": 1.372511907836349e-06, "loss": 0.0017, "step": 55740 }, { "grad_norm": 0.03900609537959099, "learning_rate": 1.3661067248494586e-06, "loss": 0.0015, "step": 55750 }, { "grad_norm": 0.06084549054503441, "learning_rate": 1.3597163155552429e-06, "loss": 0.0018, "step": 55760 }, { "grad_norm": 0.06153925880789757, "learning_rate": 1.3533406818949434e-06, "loss": 0.0021, "step": 55770 }, { "grad_norm": 0.0626777708530426, "learning_rate": 1.3469798258053002e-06, "loss": 0.0035, "step": 55780 }, { "grad_norm": 0.0658852681517601, "learning_rate": 1.3406337492185672e-06, "loss": 0.0034, "step": 55790 }, { "grad_norm": 0.07038184255361557, "learning_rate": 1.3343024540625414e-06, "loss": 0.0021, "step": 55800 }, { "grad_norm": 0.06997904926538467, "learning_rate": 1.3279859422604735e-06, "loss": 0.0026, "step": 55810 }, { "grad_norm": 0.0552855022251606, "learning_rate": 1.3216842157311781e-06, "loss": 0.0013, "step": 55820 }, { "grad_norm": 0.12711909413337708, "learning_rate": 1.3153972763889355e-06, "loss": 0.0021, "step": 55830 }, { "grad_norm": 0.06278539448976517, "learning_rate": 1.3091251261435566e-06, "loss": 0.0022, "step": 55840 }, { "grad_norm": 0.06203445419669151, "learning_rate": 1.3028677669003564e-06, "loss": 0.0017, "step": 55850 }, { "grad_norm": 0.06531459838151932, "learning_rate": 1.2966252005601587e-06, "loss": 0.0019, "step": 55860 }, { "grad_norm": 0.06296014040708542, "learning_rate": 1.2903974290192855e-06, "loss": 0.0027, "step": 55870 }, { "grad_norm": 0.04302409291267395, "learning_rate": 1.284184454169568e-06, "loss": 0.0022, "step": 55880 }, { "grad_norm": 0.04586528614163399, "learning_rate": 1.2779862778983464e-06, "loss": 0.0015, "step": 55890 }, { "grad_norm": 0.07205494493246078, "learning_rate": 1.2718029020884647e-06, "loss": 0.002, "step": 55900 }, { "grad_norm": 0.06471510231494904, "learning_rate": 1.2656343286182703e-06, "loss": 0.0023, "step": 55910 }, { "grad_norm": 0.07436578720808029, "learning_rate": 1.2594805593616088e-06, "loss": 0.0016, "step": 55920 }, { "grad_norm": 0.1629922240972519, "learning_rate": 1.2533415961878404e-06, "loss": 0.0016, "step": 55930 }, { "grad_norm": 0.07494357973337173, "learning_rate": 1.2472174409618009e-06, "loss": 0.0021, "step": 55940 }, { "grad_norm": 0.0752205103635788, "learning_rate": 1.2411080955438747e-06, "loss": 0.002, "step": 55950 }, { "grad_norm": 0.049530670046806335, "learning_rate": 1.235013561789894e-06, "loss": 0.0037, "step": 55960 }, { "grad_norm": 0.10082685202360153, "learning_rate": 1.2289338415512385e-06, "loss": 0.0018, "step": 55970 }, { "grad_norm": 0.054863251745700836, "learning_rate": 1.222868936674748e-06, "loss": 0.0012, "step": 55980 }, { "grad_norm": 0.057741425931453705, "learning_rate": 1.2168188490027876e-06, "loss": 0.0019, "step": 55990 }, { "grad_norm": 0.030309228226542473, "learning_rate": 1.2107835803732204e-06, "loss": 0.0012, "step": 56000 }, { "grad_norm": 0.03874985873699188, "learning_rate": 1.2047631326193964e-06, "loss": 0.0027, "step": 56010 }, { "grad_norm": 0.08432970941066742, "learning_rate": 1.1987575075701696e-06, "loss": 0.002, "step": 56020 }, { "grad_norm": 0.041321296244859695, "learning_rate": 1.1927667070498916e-06, "loss": 0.0015, "step": 56030 }, { "grad_norm": 0.09103959053754807, "learning_rate": 1.1867907328784067e-06, "loss": 0.0015, "step": 56040 }, { "grad_norm": 0.12052654474973679, "learning_rate": 1.1808295868710518e-06, "loss": 0.0017, "step": 56050 }, { "grad_norm": 0.04596017673611641, "learning_rate": 1.174883270838678e-06, "loss": 0.0014, "step": 56060 }, { "grad_norm": 0.09621170908212662, "learning_rate": 1.1689517865876187e-06, "loss": 0.0035, "step": 56070 }, { "grad_norm": 0.051761675626039505, "learning_rate": 1.1630351359196933e-06, "loss": 0.0026, "step": 56080 }, { "grad_norm": 0.12357889115810394, "learning_rate": 1.1571333206322255e-06, "loss": 0.0044, "step": 56090 }, { "grad_norm": 0.07829266041517258, "learning_rate": 1.1512463425180365e-06, "loss": 0.0024, "step": 56100 }, { "grad_norm": 0.07623567432165146, "learning_rate": 1.145374203365429e-06, "loss": 0.0018, "step": 56110 }, { "grad_norm": 0.13896743953227997, "learning_rate": 1.1395169049582155e-06, "loss": 0.0018, "step": 56120 }, { "grad_norm": 0.0428655743598938, "learning_rate": 1.1336744490756722e-06, "loss": 0.0014, "step": 56130 }, { "grad_norm": 0.10808618366718292, "learning_rate": 1.1278468374925967e-06, "loss": 0.0017, "step": 56140 }, { "grad_norm": 0.11178108304738998, "learning_rate": 1.12203407197925e-06, "loss": 0.0021, "step": 56150 }, { "grad_norm": 0.06898672133684158, "learning_rate": 1.116236154301409e-06, "loss": 0.0016, "step": 56160 }, { "grad_norm": 0.036168038845062256, "learning_rate": 1.11045308622032e-06, "loss": 0.0014, "step": 56170 }, { "grad_norm": 0.03508417308330536, "learning_rate": 1.1046848694927337e-06, "loss": 0.0022, "step": 56180 }, { "grad_norm": 0.07750049978494644, "learning_rate": 1.098931505870876e-06, "loss": 0.002, "step": 56190 }, { "grad_norm": 0.04469189792871475, "learning_rate": 1.0931929971024657e-06, "loss": 0.0024, "step": 56200 }, { "grad_norm": 0.09542711824178696, "learning_rate": 1.0874693449307193e-06, "loss": 0.0021, "step": 56210 }, { "grad_norm": 0.04920828714966774, "learning_rate": 1.081760551094324e-06, "loss": 0.0019, "step": 56220 }, { "grad_norm": 0.08028838038444519, "learning_rate": 1.0760666173274592e-06, "loss": 0.0028, "step": 56230 }, { "grad_norm": 0.04915463551878929, "learning_rate": 1.0703875453597967e-06, "loss": 0.0014, "step": 56240 }, { "grad_norm": 0.04620865732431412, "learning_rate": 1.0647233369164845e-06, "loss": 0.002, "step": 56250 }, { "grad_norm": 0.06789971888065338, "learning_rate": 1.0590739937181625e-06, "loss": 0.0018, "step": 56260 }, { "grad_norm": 0.07244718819856644, "learning_rate": 1.053439517480953e-06, "loss": 0.0022, "step": 56270 }, { "grad_norm": 0.06584817916154861, "learning_rate": 1.047819909916453e-06, "loss": 0.0019, "step": 56280 }, { "grad_norm": 0.0596274696290493, "learning_rate": 1.0422151727317697e-06, "loss": 0.0022, "step": 56290 }, { "grad_norm": 0.06513825803995132, "learning_rate": 1.0366253076294462e-06, "loss": 0.0017, "step": 56300 }, { "grad_norm": 0.050875693559646606, "learning_rate": 1.031050316307558e-06, "loss": 0.0012, "step": 56310 }, { "grad_norm": 0.11654133349657059, "learning_rate": 1.0254902004596333e-06, "loss": 0.0019, "step": 56320 }, { "grad_norm": 0.09935937821865082, "learning_rate": 1.0199449617746882e-06, "loss": 0.0017, "step": 56330 }, { "grad_norm": 0.050849977880716324, "learning_rate": 1.0144146019372247e-06, "loss": 0.0022, "step": 56340 }, { "grad_norm": 0.06618990749120712, "learning_rate": 1.0088991226272048e-06, "loss": 0.0017, "step": 56350 }, { "grad_norm": 0.05514708161354065, "learning_rate": 1.003398525520105e-06, "loss": 0.0018, "step": 56360 }, { "grad_norm": 0.07978447526693344, "learning_rate": 9.979128122868552e-07, "loss": 0.0018, "step": 56370 }, { "grad_norm": 0.03807741031050682, "learning_rate": 9.924419845938614e-07, "loss": 0.0017, "step": 56380 }, { "grad_norm": 0.07537122070789337, "learning_rate": 9.869860441030276e-07, "loss": 0.0017, "step": 56390 }, { "grad_norm": 0.07776734977960587, "learning_rate": 9.815449924717169e-07, "loss": 0.0017, "step": 56400 }, { "grad_norm": 0.06905701756477356, "learning_rate": 9.761188313527791e-07, "loss": 0.002, "step": 56410 }, { "grad_norm": 0.08049152046442032, "learning_rate": 9.70707562394546e-07, "loss": 0.0025, "step": 56420 }, { "grad_norm": 0.11917521059513092, "learning_rate": 9.653111872408027e-07, "loss": 0.002, "step": 56430 }, { "grad_norm": 0.07971474528312683, "learning_rate": 9.599297075308434e-07, "loss": 0.0022, "step": 56440 }, { "grad_norm": 0.03614998236298561, "learning_rate": 9.545631248994048e-07, "loss": 0.0015, "step": 56450 }, { "grad_norm": 0.0247368011623621, "learning_rate": 9.492114409767217e-07, "loss": 0.0018, "step": 56460 }, { "grad_norm": 0.05091584846377373, "learning_rate": 9.438746573884938e-07, "loss": 0.002, "step": 56470 }, { "grad_norm": 0.05932494252920151, "learning_rate": 9.385527757558909e-07, "loss": 0.0016, "step": 56480 }, { "grad_norm": 0.07539341598749161, "learning_rate": 9.332457976955644e-07, "loss": 0.0039, "step": 56490 }, { "grad_norm": 0.17069612443447113, "learning_rate": 9.279537248196247e-07, "loss": 0.0032, "step": 56500 }, { "grad_norm": 0.04852021113038063, "learning_rate": 9.2267655873568e-07, "loss": 0.0016, "step": 56510 }, { "grad_norm": 0.05712112411856651, "learning_rate": 9.174143010467762e-07, "loss": 0.0018, "step": 56520 }, { "grad_norm": 0.06710833311080933, "learning_rate": 9.12166953351462e-07, "loss": 0.0013, "step": 56530 }, { "grad_norm": 0.058544598519802094, "learning_rate": 9.069345172437404e-07, "loss": 0.0025, "step": 56540 }, { "grad_norm": 0.05729273334145546, "learning_rate": 9.017169943130843e-07, "loss": 0.0019, "step": 56550 }, { "grad_norm": 0.05204881727695465, "learning_rate": 8.965143861444425e-07, "loss": 0.0019, "step": 56560 }, { "grad_norm": 0.05346278101205826, "learning_rate": 8.91326694318223e-07, "loss": 0.0042, "step": 56570 }, { "grad_norm": 0.04623052105307579, "learning_rate": 8.861539204103098e-07, "loss": 0.0025, "step": 56580 }, { "grad_norm": 0.046255018562078476, "learning_rate": 8.809960659920735e-07, "loss": 0.002, "step": 56590 }, { "grad_norm": 0.03455488383769989, "learning_rate": 8.758531326303055e-07, "loss": 0.0039, "step": 56600 }, { "grad_norm": 0.05625394359230995, "learning_rate": 8.707251218873169e-07, "loss": 0.0016, "step": 56610 }, { "grad_norm": 0.09812338650226593, "learning_rate": 8.656120353208507e-07, "loss": 0.002, "step": 56620 }, { "grad_norm": 0.08530284464359283, "learning_rate": 8.605138744841312e-07, "loss": 0.0021, "step": 56630 }, { "grad_norm": 0.044126808643341064, "learning_rate": 8.554306409258417e-07, "loss": 0.0021, "step": 56640 }, { "grad_norm": 0.04840657487511635, "learning_rate": 8.503623361901358e-07, "loss": 0.0018, "step": 56650 }, { "grad_norm": 0.06996975094079971, "learning_rate": 8.453089618166377e-07, "loss": 0.0027, "step": 56660 }, { "grad_norm": 0.0683833658695221, "learning_rate": 8.402705193404137e-07, "loss": 0.0017, "step": 56670 }, { "grad_norm": 0.06494048982858658, "learning_rate": 8.352470102920174e-07, "loss": 0.0015, "step": 56680 }, { "grad_norm": 0.05499904975295067, "learning_rate": 8.302384361974669e-07, "loss": 0.0031, "step": 56690 }, { "grad_norm": 0.285785436630249, "learning_rate": 8.252447985782231e-07, "loss": 0.0025, "step": 56700 }, { "grad_norm": 0.05633128434419632, "learning_rate": 8.202660989512279e-07, "loss": 0.0021, "step": 56710 }, { "grad_norm": 0.06436681747436523, "learning_rate": 8.153023388288772e-07, "loss": 0.0023, "step": 56720 }, { "grad_norm": 0.099615179002285, "learning_rate": 8.103535197190204e-07, "loss": 0.0026, "step": 56730 }, { "grad_norm": 0.06618428975343704, "learning_rate": 8.054196431249993e-07, "loss": 0.0019, "step": 56740 }, { "grad_norm": 0.12161441892385483, "learning_rate": 8.005007105455709e-07, "loss": 0.0019, "step": 56750 }, { "grad_norm": 0.06798754632472992, "learning_rate": 7.95596723474995e-07, "loss": 0.0014, "step": 56760 }, { "grad_norm": 0.035703256726264954, "learning_rate": 7.907076834029692e-07, "loss": 0.0013, "step": 56770 }, { "grad_norm": 0.05448156222701073, "learning_rate": 7.858335918146498e-07, "loss": 0.0022, "step": 56780 }, { "grad_norm": 0.04690316319465637, "learning_rate": 7.809744501906635e-07, "loss": 0.0021, "step": 56790 }, { "grad_norm": 0.046710677444934845, "learning_rate": 7.761302600070797e-07, "loss": 0.0019, "step": 56800 }, { "grad_norm": 0.05189380422234535, "learning_rate": 7.713010227354545e-07, "loss": 0.0018, "step": 56810 }, { "grad_norm": 0.04737582430243492, "learning_rate": 7.664867398427589e-07, "loss": 0.0014, "step": 56820 }, { "grad_norm": 0.09771882742643356, "learning_rate": 7.616874127914619e-07, "loss": 0.0018, "step": 56830 }, { "grad_norm": 0.09414016455411911, "learning_rate": 7.569030430394641e-07, "loss": 0.0028, "step": 56840 }, { "grad_norm": 0.048337697982788086, "learning_rate": 7.521336320401306e-07, "loss": 0.0019, "step": 56850 }, { "grad_norm": 0.0667913556098938, "learning_rate": 7.473791812422915e-07, "loss": 0.0018, "step": 56860 }, { "grad_norm": 0.04438870772719383, "learning_rate": 7.426396920902134e-07, "loss": 0.0019, "step": 56870 }, { "grad_norm": 0.06593619287014008, "learning_rate": 7.379151660236283e-07, "loss": 0.0039, "step": 56880 }, { "grad_norm": 0.0466214120388031, "learning_rate": 7.332056044777324e-07, "loss": 0.002, "step": 56890 }, { "grad_norm": 0.05017360299825668, "learning_rate": 7.285110088831537e-07, "loss": 0.0024, "step": 56900 }, { "grad_norm": 0.17466281354427338, "learning_rate": 7.238313806659902e-07, "loss": 0.0025, "step": 56910 }, { "grad_norm": 0.05216871201992035, "learning_rate": 7.191667212477993e-07, "loss": 0.0027, "step": 56920 }, { "grad_norm": 0.0463014654815197, "learning_rate": 7.145170320455697e-07, "loss": 0.0019, "step": 56930 }, { "grad_norm": 0.054463278502225876, "learning_rate": 7.098823144717604e-07, "loss": 0.0022, "step": 56940 }, { "grad_norm": 0.08143548667430878, "learning_rate": 7.052625699342674e-07, "loss": 0.0017, "step": 56950 }, { "grad_norm": 0.02746555395424366, "learning_rate": 7.006577998364628e-07, "loss": 0.0024, "step": 56960 }, { "grad_norm": 0.1048780083656311, "learning_rate": 6.96068005577133e-07, "loss": 0.0021, "step": 56970 }, { "grad_norm": 0.16140638291835785, "learning_rate": 6.914931885505627e-07, "loss": 0.0015, "step": 56980 }, { "grad_norm": 0.03862803801894188, "learning_rate": 6.869333501464347e-07, "loss": 0.0018, "step": 56990 }, { "grad_norm": 0.051892779767513275, "learning_rate": 6.823884917499246e-07, "loss": 0.0035, "step": 57000 }, { "grad_norm": 0.08538272976875305, "learning_rate": 6.778586147416278e-07, "loss": 0.0014, "step": 57010 }, { "grad_norm": 0.03845364972949028, "learning_rate": 6.733437204976156e-07, "loss": 0.001, "step": 57020 }, { "grad_norm": 0.10623897612094879, "learning_rate": 6.688438103893857e-07, "loss": 0.0042, "step": 57030 }, { "grad_norm": 0.09667661786079407, "learning_rate": 6.64358885783889e-07, "loss": 0.0026, "step": 57040 }, { "grad_norm": 0.051102884113788605, "learning_rate": 6.598889480435299e-07, "loss": 0.0019, "step": 57050 }, { "grad_norm": 0.06881003826856613, "learning_rate": 6.554339985261615e-07, "loss": 0.0016, "step": 57060 }, { "grad_norm": 0.061392948031425476, "learning_rate": 6.509940385850733e-07, "loss": 0.0016, "step": 57070 }, { "grad_norm": 0.10281597077846527, "learning_rate": 6.465690695690141e-07, "loss": 0.0018, "step": 57080 }, { "grad_norm": 0.042001206427812576, "learning_rate": 6.421590928221699e-07, "loss": 0.0014, "step": 57090 }, { "grad_norm": 0.05065804347395897, "learning_rate": 6.377641096841691e-07, "loss": 0.0014, "step": 57100 }, { "grad_norm": 0.030709296464920044, "learning_rate": 6.333841214901048e-07, "loss": 0.0017, "step": 57110 }, { "grad_norm": 0.04658455401659012, "learning_rate": 6.290191295704906e-07, "loss": 0.002, "step": 57120 }, { "grad_norm": 0.09820462018251419, "learning_rate": 6.246691352513046e-07, "loss": 0.0032, "step": 57130 }, { "grad_norm": 0.06811260432004929, "learning_rate": 6.203341398539452e-07, "loss": 0.0015, "step": 57140 }, { "grad_norm": 0.03459732607007027, "learning_rate": 6.160141446952872e-07, "loss": 0.0014, "step": 57150 }, { "grad_norm": 0.04866626858711243, "learning_rate": 6.11709151087625e-07, "loss": 0.0016, "step": 57160 }, { "grad_norm": 0.041527654975652695, "learning_rate": 6.074191603386958e-07, "loss": 0.0014, "step": 57170 }, { "grad_norm": 0.03288004919886589, "learning_rate": 6.031441737516907e-07, "loss": 0.0013, "step": 57180 }, { "grad_norm": 0.060558710247278214, "learning_rate": 5.988841926252431e-07, "loss": 0.0028, "step": 57190 }, { "grad_norm": 0.08243124186992645, "learning_rate": 5.946392182534066e-07, "loss": 0.0019, "step": 57200 }, { "grad_norm": 0.05785783380270004, "learning_rate": 5.90409251925711e-07, "loss": 0.0016, "step": 57210 }, { "grad_norm": 0.04605141282081604, "learning_rate": 5.861942949270949e-07, "loss": 0.005, "step": 57220 }, { "grad_norm": 0.031177379190921783, "learning_rate": 5.819943485379564e-07, "loss": 0.0016, "step": 57230 }, { "grad_norm": 0.042267605662345886, "learning_rate": 5.778094140341306e-07, "loss": 0.0018, "step": 57240 }, { "grad_norm": 0.05929802730679512, "learning_rate": 5.736394926868893e-07, "loss": 0.0015, "step": 57250 }, { "grad_norm": 0.04832405596971512, "learning_rate": 5.694845857629416e-07, "loss": 0.0018, "step": 57260 }, { "grad_norm": 0.072139210999012, "learning_rate": 5.653446945244334e-07, "loss": 0.0014, "step": 57270 }, { "grad_norm": 0.05935519561171532, "learning_rate": 5.612198202289698e-07, "loss": 0.0023, "step": 57280 }, { "grad_norm": 0.045992977917194366, "learning_rate": 5.571099641295596e-07, "loss": 0.0026, "step": 57290 }, { "grad_norm": 0.05129547789692879, "learning_rate": 5.530151274746875e-07, "loss": 0.0014, "step": 57300 }, { "grad_norm": 0.03503677621483803, "learning_rate": 5.489353115082418e-07, "loss": 0.0015, "step": 57310 }, { "grad_norm": 0.0757785364985466, "learning_rate": 5.4487051746957e-07, "loss": 0.0021, "step": 57320 }, { "grad_norm": 0.033482201397418976, "learning_rate": 5.408207465934511e-07, "loss": 0.0021, "step": 57330 }, { "grad_norm": 0.06210426613688469, "learning_rate": 5.3678600011009e-07, "loss": 0.0013, "step": 57340 }, { "grad_norm": 0.05117350071668625, "learning_rate": 5.327662792451449e-07, "loss": 0.0028, "step": 57350 }, { "grad_norm": 0.04117773473262787, "learning_rate": 5.287615852196947e-07, "loss": 0.0018, "step": 57360 }, { "grad_norm": 0.05955406650900841, "learning_rate": 5.247719192502665e-07, "loss": 0.0016, "step": 57370 }, { "grad_norm": 0.029013805091381073, "learning_rate": 5.207972825488128e-07, "loss": 0.0026, "step": 57380 }, { "grad_norm": 0.03855122998356819, "learning_rate": 5.168376763227178e-07, "loss": 0.002, "step": 57390 }, { "grad_norm": 0.03751670569181442, "learning_rate": 5.12893101774814e-07, "loss": 0.002, "step": 57400 }, { "grad_norm": 0.12298837304115295, "learning_rate": 5.089635601033483e-07, "loss": 0.0014, "step": 57410 }, { "grad_norm": 0.044152356684207916, "learning_rate": 5.050490525020213e-07, "loss": 0.002, "step": 57420 }, { "grad_norm": 0.03840535134077072, "learning_rate": 5.011495801599541e-07, "loss": 0.0021, "step": 57430 }, { "grad_norm": 0.04537270963191986, "learning_rate": 4.972651442616994e-07, "loss": 0.0019, "step": 57440 }, { "grad_norm": 0.036976296454668045, "learning_rate": 4.933957459872574e-07, "loss": 0.0034, "step": 57450 }, { "grad_norm": 0.06119784340262413, "learning_rate": 4.895413865120324e-07, "loss": 0.0016, "step": 57460 }, { "grad_norm": 0.06850314140319824, "learning_rate": 4.857020670068935e-07, "loss": 0.002, "step": 57470 }, { "grad_norm": 0.026897938922047615, "learning_rate": 4.818777886381132e-07, "loss": 0.0011, "step": 57480 }, { "grad_norm": 0.04337281733751297, "learning_rate": 4.780685525674122e-07, "loss": 0.0018, "step": 57490 }, { "grad_norm": 0.034391701221466064, "learning_rate": 4.7427435995193724e-07, "loss": 0.0017, "step": 57500 }, { "grad_norm": 0.03225114941596985, "learning_rate": 4.7049521194425515e-07, "loss": 0.0021, "step": 57510 }, { "grad_norm": 0.024797815829515457, "learning_rate": 4.667311096923754e-07, "loss": 0.0019, "step": 57520 }, { "grad_norm": 0.05999648571014404, "learning_rate": 4.6298205433973895e-07, "loss": 0.0018, "step": 57530 }, { "grad_norm": 0.06903325766324997, "learning_rate": 4.5924804702520696e-07, "loss": 0.0027, "step": 57540 }, { "grad_norm": 0.05884411931037903, "learning_rate": 4.5552908888306655e-07, "loss": 0.0026, "step": 57550 }, { "grad_norm": 0.029378321021795273, "learning_rate": 4.5182518104304185e-07, "loss": 0.0042, "step": 57560 }, { "grad_norm": 0.0833432748913765, "learning_rate": 4.4813632463028277e-07, "loss": 0.0015, "step": 57570 }, { "grad_norm": 0.03921504318714142, "learning_rate": 4.444625207653763e-07, "loss": 0.0017, "step": 57580 }, { "grad_norm": 0.04709751904010773, "learning_rate": 4.4080377056430753e-07, "loss": 0.0018, "step": 57590 }, { "grad_norm": 0.040002815425395966, "learning_rate": 4.371600751385263e-07, "loss": 0.0023, "step": 57600 }, { "grad_norm": 0.04730124771595001, "learning_rate": 4.3353143559487495e-07, "loss": 0.0012, "step": 57610 }, { "grad_norm": 0.07143151760101318, "learning_rate": 4.2991785303565513e-07, "loss": 0.0031, "step": 57620 }, { "grad_norm": 0.07616531848907471, "learning_rate": 4.2631932855856647e-07, "loss": 0.0018, "step": 57630 }, { "grad_norm": 0.03569434583187103, "learning_rate": 4.2273586325674576e-07, "loss": 0.0031, "step": 57640 }, { "grad_norm": 0.04422670602798462, "learning_rate": 4.1916745821876103e-07, "loss": 0.0035, "step": 57650 }, { "grad_norm": 0.03967597335577011, "learning_rate": 4.156141145285897e-07, "loss": 0.0015, "step": 57660 }, { "grad_norm": 0.05007532611489296, "learning_rate": 4.1207583326566267e-07, "loss": 0.0018, "step": 57670 }, { "grad_norm": 0.03677612170577049, "learning_rate": 4.085526155047925e-07, "loss": 0.0016, "step": 57680 }, { "grad_norm": 0.10075671970844269, "learning_rate": 4.050444623162564e-07, "loss": 0.003, "step": 57690 }, { "grad_norm": 0.030502984300255775, "learning_rate": 4.015513747657351e-07, "loss": 0.0022, "step": 57700 }, { "grad_norm": 0.06658079475164413, "learning_rate": 3.9807335391433554e-07, "loss": 0.0038, "step": 57710 }, { "grad_norm": 0.029139438644051552, "learning_rate": 3.946104008185847e-07, "loss": 0.0015, "step": 57720 }, { "grad_norm": 0.061930879950523376, "learning_rate": 3.9116251653044113e-07, "loss": 0.0018, "step": 57730 }, { "grad_norm": 0.05950324982404709, "learning_rate": 3.877297020972781e-07, "loss": 0.0022, "step": 57740 }, { "grad_norm": 0.05202266946434975, "learning_rate": 3.8431195856190036e-07, "loss": 0.0015, "step": 57750 }, { "grad_norm": 0.042618997395038605, "learning_rate": 3.8090928696251635e-07, "loss": 0.0024, "step": 57760 }, { "grad_norm": 0.0490972101688385, "learning_rate": 3.77521688332777e-07, "loss": 0.0019, "step": 57770 }, { "grad_norm": 0.0705968514084816, "learning_rate": 3.741491637017425e-07, "loss": 0.0015, "step": 57780 }, { "grad_norm": 0.04218076914548874, "learning_rate": 3.707917140939043e-07, "loss": 0.0019, "step": 57790 }, { "grad_norm": 0.027439618483185768, "learning_rate": 3.6744934052915235e-07, "loss": 0.0026, "step": 57800 }, { "grad_norm": 0.08499769866466522, "learning_rate": 3.641220440228188e-07, "loss": 0.0015, "step": 57810 }, { "grad_norm": 0.07947226613759995, "learning_rate": 3.608098255856562e-07, "loss": 0.0029, "step": 57820 }, { "grad_norm": 0.035306788980960846, "learning_rate": 3.575126862238154e-07, "loss": 0.0019, "step": 57830 }, { "grad_norm": 0.07461398839950562, "learning_rate": 3.5423062693888955e-07, "loss": 0.003, "step": 57840 }, { "grad_norm": 0.05418021231889725, "learning_rate": 3.509636487278756e-07, "loss": 0.0013, "step": 57850 }, { "grad_norm": 0.03223951905965805, "learning_rate": 3.4771175258320186e-07, "loss": 0.0015, "step": 57860 }, { "grad_norm": 0.10770311951637268, "learning_rate": 3.4447493949270047e-07, "loss": 0.0022, "step": 57870 }, { "grad_norm": 0.0410153791308403, "learning_rate": 3.4125321043964045e-07, "loss": 0.0014, "step": 57880 }, { "grad_norm": 0.036266446113586426, "learning_rate": 3.380465664026833e-07, "loss": 0.002, "step": 57890 }, { "grad_norm": 0.04671436920762062, "learning_rate": 3.348550083559388e-07, "loss": 0.0013, "step": 57900 }, { "grad_norm": 0.036069322377443314, "learning_rate": 3.316785372689091e-07, "loss": 0.0016, "step": 57910 }, { "grad_norm": 0.05417667701840401, "learning_rate": 3.285171541065224e-07, "loss": 0.0015, "step": 57920 }, { "grad_norm": 0.035045161843299866, "learning_rate": 3.253708598291272e-07, "loss": 0.0021, "step": 57930 }, { "grad_norm": 0.0699935331940651, "learning_rate": 3.2223965539248116e-07, "loss": 0.0014, "step": 57940 }, { "grad_norm": 0.03070792928338051, "learning_rate": 3.1912354174776227e-07, "loss": 0.0014, "step": 57950 }, { "grad_norm": 0.037877488881349564, "learning_rate": 3.1602251984155786e-07, "loss": 0.0014, "step": 57960 }, { "grad_norm": 0.11186408996582031, "learning_rate": 3.1293659061589207e-07, "loss": 0.0024, "step": 57970 }, { "grad_norm": 0.039434753358364105, "learning_rate": 3.098657550081707e-07, "loss": 0.0014, "step": 57980 }, { "grad_norm": 0.04078147932887077, "learning_rate": 3.068100139512475e-07, "loss": 0.0026, "step": 57990 }, { "grad_norm": 0.033300504088401794, "learning_rate": 3.037693683733689e-07, "loss": 0.002, "step": 58000 }, { "grad_norm": 0.028592251241207123, "learning_rate": 3.007438191982015e-07, "loss": 0.0023, "step": 58010 }, { "grad_norm": 0.14351193606853485, "learning_rate": 2.9773336734482684e-07, "loss": 0.0014, "step": 58020 }, { "grad_norm": 0.03563110530376434, "learning_rate": 2.9473801372774667e-07, "loss": 0.0015, "step": 58030 }, { "grad_norm": 0.04076527804136276, "learning_rate": 2.91757759256861e-07, "loss": 0.0018, "step": 58040 }, { "grad_norm": 0.04505648463964462, "learning_rate": 2.887926048375067e-07, "loss": 0.0018, "step": 58050 }, { "grad_norm": 0.08677639067173004, "learning_rate": 2.858425513704022e-07, "loss": 0.0023, "step": 58060 }, { "grad_norm": 0.024720491841435432, "learning_rate": 2.8290759975170834e-07, "loss": 0.0024, "step": 58070 }, { "grad_norm": 0.10004983097314835, "learning_rate": 2.799877508729787e-07, "loss": 0.0048, "step": 58080 }, { "grad_norm": 0.04419567808508873, "learning_rate": 2.770830056211926e-07, "loss": 0.0014, "step": 58090 }, { "grad_norm": 0.12463217228651047, "learning_rate": 2.741933648787331e-07, "loss": 0.0023, "step": 58100 }, { "grad_norm": 0.02443358488380909, "learning_rate": 2.7131882952339263e-07, "loss": 0.002, "step": 58110 }, { "grad_norm": 0.04266953840851784, "learning_rate": 2.684594004283836e-07, "loss": 0.0022, "step": 58120 }, { "grad_norm": 0.04970623552799225, "learning_rate": 2.6561507846232234e-07, "loss": 0.0037, "step": 58130 }, { "grad_norm": 0.05502042546868324, "learning_rate": 2.6278586448924005e-07, "loss": 0.0024, "step": 58140 }, { "grad_norm": 0.053655046969652176, "learning_rate": 2.5997175936857685e-07, "loss": 0.0016, "step": 58150 }, { "grad_norm": 0.057660698890686035, "learning_rate": 2.57172763955188e-07, "loss": 0.0022, "step": 58160 }, { "grad_norm": 0.06838681548833847, "learning_rate": 2.543888790993265e-07, "loss": 0.0019, "step": 58170 }, { "grad_norm": 0.060685619711875916, "learning_rate": 2.5162010564666607e-07, "loss": 0.0017, "step": 58180 }, { "grad_norm": 0.14120259881019592, "learning_rate": 2.488664444382893e-07, "loss": 0.0018, "step": 58190 }, { "grad_norm": 0.06408952176570892, "learning_rate": 2.461278963106828e-07, "loss": 0.0022, "step": 58200 }, { "grad_norm": 0.036832425743341446, "learning_rate": 2.434044620957421e-07, "loss": 0.0026, "step": 58210 }, { "grad_norm": 0.03708605840802193, "learning_rate": 2.406961426207832e-07, "loss": 0.0014, "step": 58220 }, { "grad_norm": 0.025073770433664322, "learning_rate": 2.380029387085203e-07, "loss": 0.0014, "step": 58230 }, { "grad_norm": 0.040609683841466904, "learning_rate": 2.353248511770767e-07, "loss": 0.0017, "step": 58240 }, { "grad_norm": 0.026789173483848572, "learning_rate": 2.3266188083997942e-07, "loss": 0.0017, "step": 58250 }, { "grad_norm": 0.05461307615041733, "learning_rate": 2.3001402850617027e-07, "loss": 0.0027, "step": 58260 }, { "grad_norm": 0.03069292940199375, "learning_rate": 2.2738129498000581e-07, "loss": 0.002, "step": 58270 }, { "grad_norm": 0.03969322144985199, "learning_rate": 2.2476368106122414e-07, "loss": 0.0018, "step": 58280 }, { "grad_norm": 0.10775385051965714, "learning_rate": 2.2216118754500582e-07, "loss": 0.0023, "step": 58290 }, { "grad_norm": 0.07354037463665009, "learning_rate": 2.1957381522190735e-07, "loss": 0.0016, "step": 58300 }, { "grad_norm": 0.039317790418863297, "learning_rate": 2.1700156487790558e-07, "loss": 0.0018, "step": 58310 }, { "grad_norm": 0.026262616738677025, "learning_rate": 2.1444443729439213e-07, "loss": 0.0012, "step": 58320 }, { "grad_norm": 0.06699593365192413, "learning_rate": 2.1190243324814007e-07, "loss": 0.0021, "step": 58330 }, { "grad_norm": 0.10579883307218552, "learning_rate": 2.0937555351135395e-07, "loss": 0.0022, "step": 58340 }, { "grad_norm": 0.08626481890678406, "learning_rate": 2.0686379885162532e-07, "loss": 0.0021, "step": 58350 }, { "grad_norm": 0.04022153094410896, "learning_rate": 2.0436717003196604e-07, "loss": 0.0015, "step": 58360 }, { "grad_norm": 0.0913291722536087, "learning_rate": 2.0188566781078057e-07, "loss": 0.0019, "step": 58370 }, { "grad_norm": 0.030319202691316605, "learning_rate": 1.994192929418881e-07, "loss": 0.0015, "step": 58380 }, { "grad_norm": 0.03326492756605148, "learning_rate": 1.9696804617451158e-07, "loss": 0.0018, "step": 58390 }, { "grad_norm": 0.039654720574617386, "learning_rate": 1.9453192825326093e-07, "loss": 0.0033, "step": 58400 }, { "grad_norm": 0.15418066084384918, "learning_rate": 1.9211093991817753e-07, "loss": 0.0024, "step": 58410 }, { "grad_norm": 0.04638614505529404, "learning_rate": 1.8970508190468973e-07, "loss": 0.0016, "step": 58420 }, { "grad_norm": 0.05325758457183838, "learning_rate": 1.8731435494362958e-07, "loss": 0.0012, "step": 58430 }, { "grad_norm": 0.08779995888471603, "learning_rate": 1.849387597612495e-07, "loss": 0.0016, "step": 58440 }, { "grad_norm": 0.05808218568563461, "learning_rate": 1.8257829707917228e-07, "loss": 0.0019, "step": 58450 }, { "grad_norm": 0.025890076532959938, "learning_rate": 1.8023296761446317e-07, "loss": 0.0029, "step": 58460 }, { "grad_norm": 0.0482134073972702, "learning_rate": 1.7790277207956341e-07, "loss": 0.0019, "step": 58470 }, { "grad_norm": 0.0647921934723854, "learning_rate": 1.7558771118232343e-07, "loss": 0.0016, "step": 58480 }, { "grad_norm": 0.048067670315504074, "learning_rate": 1.7328778562599734e-07, "loss": 0.0021, "step": 58490 }, { "grad_norm": 0.0886903703212738, "learning_rate": 1.7100299610924298e-07, "loss": 0.0022, "step": 58500 }, { "grad_norm": 0.04870017617940903, "learning_rate": 1.6873334332612733e-07, "loss": 0.0015, "step": 58510 }, { "grad_norm": 0.04579079523682594, "learning_rate": 1.6647882796609894e-07, "loss": 0.0029, "step": 58520 }, { "grad_norm": 0.07362257689237595, "learning_rate": 1.6423945071402102e-07, "loss": 0.0016, "step": 58530 }, { "grad_norm": 0.064451202750206, "learning_rate": 1.6201521225016614e-07, "loss": 0.0031, "step": 58540 }, { "grad_norm": 0.086424820125103, "learning_rate": 1.598061132501938e-07, "loss": 0.0022, "step": 58550 }, { "grad_norm": 0.032967064529657364, "learning_rate": 1.576121543851672e-07, "loss": 0.0018, "step": 58560 }, { "grad_norm": 0.06411159038543701, "learning_rate": 1.5543333632155876e-07, "loss": 0.0017, "step": 58570 }, { "grad_norm": 0.055354807525873184, "learning_rate": 1.5326965972123352e-07, "loss": 0.0026, "step": 58580 }, { "grad_norm": 0.028955847024917603, "learning_rate": 1.5112112524146016e-07, "loss": 0.0019, "step": 58590 }, { "grad_norm": 0.04863233119249344, "learning_rate": 1.4898773353489992e-07, "loss": 0.0015, "step": 58600 }, { "grad_norm": 0.03290300816297531, "learning_rate": 1.4686948524962884e-07, "loss": 0.002, "step": 58610 }, { "grad_norm": 0.06219300627708435, "learning_rate": 1.4476638102911556e-07, "loss": 0.0021, "step": 58620 }, { "grad_norm": 0.06562041491270065, "learning_rate": 1.4267842151222123e-07, "loss": 0.0021, "step": 58630 }, { "grad_norm": 0.036848705261945724, "learning_rate": 1.4060560733321626e-07, "loss": 0.0022, "step": 58640 }, { "grad_norm": 0.06273098289966583, "learning_rate": 1.385479391217692e-07, "loss": 0.0015, "step": 58650 }, { "grad_norm": 0.0530925877392292, "learning_rate": 1.365054175029412e-07, "loss": 0.0027, "step": 58660 }, { "grad_norm": 0.048895213752985, "learning_rate": 1.3447804309719702e-07, "loss": 0.0013, "step": 58670 }, { "grad_norm": 0.028824251145124435, "learning_rate": 1.3246581652040512e-07, "loss": 0.0013, "step": 58680 }, { "grad_norm": 0.028173640370368958, "learning_rate": 1.3046873838381546e-07, "loss": 0.0018, "step": 58690 }, { "grad_norm": 0.031453412026166916, "learning_rate": 1.2848680929409828e-07, "loss": 0.002, "step": 58700 }, { "grad_norm": 0.06685216724872589, "learning_rate": 1.2652002985331091e-07, "loss": 0.0014, "step": 58710 }, { "grad_norm": 0.05645514279603958, "learning_rate": 1.2456840065889764e-07, "loss": 0.0016, "step": 58720 }, { "grad_norm": 0.053065910935401917, "learning_rate": 1.226319223037231e-07, "loss": 0.0026, "step": 58730 }, { "grad_norm": 0.054537683725357056, "learning_rate": 1.2071059537603902e-07, "loss": 0.0018, "step": 58740 }, { "grad_norm": 0.05256020650267601, "learning_rate": 1.1880442045948403e-07, "loss": 0.0019, "step": 58750 }, { "grad_norm": 0.1144891232252121, "learning_rate": 1.1691339813311164e-07, "loss": 0.0018, "step": 58760 }, { "grad_norm": 0.03753571957349777, "learning_rate": 1.1503752897136233e-07, "loss": 0.0023, "step": 58770 }, { "grad_norm": 0.07236386090517044, "learning_rate": 1.1317681354407472e-07, "loss": 0.0027, "step": 58780 }, { "grad_norm": 0.04716004803776741, "learning_rate": 1.1133125241649111e-07, "loss": 0.0015, "step": 58790 }, { "grad_norm": 0.1047186553478241, "learning_rate": 1.0950084614922973e-07, "loss": 0.0031, "step": 58800 }, { "grad_norm": 0.10258243978023529, "learning_rate": 1.0768559529834021e-07, "loss": 0.0029, "step": 58810 }, { "grad_norm": 0.0595087893307209, "learning_rate": 1.0588550041522594e-07, "loss": 0.0013, "step": 58820 }, { "grad_norm": 0.08410868048667908, "learning_rate": 1.0410056204672169e-07, "loss": 0.0033, "step": 58830 }, { "grad_norm": 0.03289599344134331, "learning_rate": 1.0233078073504376e-07, "loss": 0.0017, "step": 58840 }, { "grad_norm": 0.07193457335233688, "learning_rate": 1.0057615701780654e-07, "loss": 0.0019, "step": 58850 }, { "grad_norm": 0.10032462328672409, "learning_rate": 9.883669142801144e-08, "loss": 0.0023, "step": 58860 }, { "grad_norm": 0.05668910965323448, "learning_rate": 9.711238449406356e-08, "loss": 0.0027, "step": 58870 }, { "grad_norm": 0.08455364406108856, "learning_rate": 9.540323673976614e-08, "loss": 0.0026, "step": 58880 }, { "grad_norm": 0.08835799247026443, "learning_rate": 9.370924868430942e-08, "loss": 0.0026, "step": 58890 }, { "grad_norm": 0.02371213398873806, "learning_rate": 9.203042084228175e-08, "loss": 0.0018, "step": 58900 }, { "grad_norm": 0.02446998469531536, "learning_rate": 9.036675372366965e-08, "loss": 0.0016, "step": 58910 }, { "grad_norm": 0.03439794480800629, "learning_rate": 8.871824783385218e-08, "loss": 0.0012, "step": 58920 }, { "grad_norm": 0.06286773830652237, "learning_rate": 8.7084903673601e-08, "loss": 0.0023, "step": 58930 }, { "grad_norm": 0.037395279854536057, "learning_rate": 8.546672173908032e-08, "loss": 0.0014, "step": 58940 }, { "grad_norm": 0.05481887608766556, "learning_rate": 8.386370252185249e-08, "loss": 0.0044, "step": 58950 }, { "grad_norm": 0.05990765243768692, "learning_rate": 8.227584650887243e-08, "loss": 0.0024, "step": 58960 }, { "grad_norm": 0.10528717935085297, "learning_rate": 8.070315418249319e-08, "loss": 0.0017, "step": 58970 }, { "grad_norm": 0.03271739184856415, "learning_rate": 7.914562602044929e-08, "loss": 0.0024, "step": 58980 }, { "grad_norm": 0.039924196898937225, "learning_rate": 7.76032624958789e-08, "loss": 0.0036, "step": 58990 }, { "grad_norm": 0.07227745652198792, "learning_rate": 7.607606407731282e-08, "loss": 0.0021, "step": 59000 }, { "grad_norm": 0.033133938908576965, "learning_rate": 7.45640312286744e-08, "loss": 0.0023, "step": 59010 }, { "grad_norm": 0.04029903933405876, "learning_rate": 7.306716440927952e-08, "loss": 0.0027, "step": 59020 }, { "grad_norm": 0.08826343715190887, "learning_rate": 7.15854640738367e-08, "loss": 0.0024, "step": 59030 }, { "grad_norm": 0.14936865866184235, "learning_rate": 7.011893067244701e-08, "loss": 0.0025, "step": 59040 }, { "grad_norm": 0.04518784210085869, "learning_rate": 6.866756465060408e-08, "loss": 0.0018, "step": 59050 }, { "grad_norm": 0.0326828695833683, "learning_rate": 6.723136644918859e-08, "loss": 0.0017, "step": 59060 }, { "grad_norm": 0.03347652778029442, "learning_rate": 6.581033650449042e-08, "loss": 0.0013, "step": 59070 }, { "grad_norm": 0.047391701489686966, "learning_rate": 6.440447524817539e-08, "loss": 0.0013, "step": 59080 }, { "grad_norm": 0.036413270980119705, "learning_rate": 6.301378310730743e-08, "loss": 0.0018, "step": 59090 }, { "grad_norm": 0.08045385777950287, "learning_rate": 6.163826050434307e-08, "loss": 0.0021, "step": 59100 }, { "grad_norm": 0.061411552131175995, "learning_rate": 6.027790785713139e-08, "loss": 0.0019, "step": 59110 }, { "grad_norm": 0.08067141473293304, "learning_rate": 5.89327255789085e-08, "loss": 0.0016, "step": 59120 }, { "grad_norm": 0.032342471182346344, "learning_rate": 5.7602714078303085e-08, "loss": 0.0015, "step": 59130 }, { "grad_norm": 0.07008244842290878, "learning_rate": 5.628787375934197e-08, "loss": 0.0025, "step": 59140 }, { "grad_norm": 0.03568591922521591, "learning_rate": 5.498820502143898e-08, "loss": 0.0019, "step": 59150 }, { "grad_norm": 0.02357313223183155, "learning_rate": 5.370370825939497e-08, "loss": 0.0014, "step": 59160 }, { "grad_norm": 0.0540211983025074, "learning_rate": 5.243438386340893e-08, "loss": 0.0017, "step": 59170 }, { "grad_norm": 0.03671962395310402, "learning_rate": 5.118023221907242e-08, "loss": 0.0018, "step": 59180 }, { "grad_norm": 0.05022633075714111, "learning_rate": 4.994125370735292e-08, "loss": 0.0018, "step": 59190 }, { "grad_norm": 0.02455817349255085, "learning_rate": 4.871744870462713e-08, "loss": 0.0018, "step": 59200 }, { "grad_norm": 0.07708315551280975, "learning_rate": 4.7508817582658794e-08, "loss": 0.0017, "step": 59210 }, { "grad_norm": 0.033112216740846634, "learning_rate": 4.631536070858755e-08, "loss": 0.0023, "step": 59220 }, { "grad_norm": 0.034977734088897705, "learning_rate": 4.513707844495674e-08, "loss": 0.0022, "step": 59230 }, { "grad_norm": 0.06507162749767303, "learning_rate": 4.3973971149702255e-08, "loss": 0.0016, "step": 59240 }, { "grad_norm": 0.054538268595933914, "learning_rate": 4.2826039176147025e-08, "loss": 0.0016, "step": 59250 }, { "grad_norm": 0.04904944822192192, "learning_rate": 4.169328287299545e-08, "loss": 0.002, "step": 59260 }, { "grad_norm": 0.07659133523702621, "learning_rate": 4.057570258435006e-08, "loss": 0.0025, "step": 59270 }, { "grad_norm": 0.02684958651661873, "learning_rate": 3.947329864970595e-08, "loss": 0.0016, "step": 59280 }, { "grad_norm": 0.06208379194140434, "learning_rate": 3.8386071403939686e-08, "loss": 0.0017, "step": 59290 }, { "grad_norm": 0.03156406059861183, "learning_rate": 3.731402117733152e-08, "loss": 0.0015, "step": 59300 }, { "grad_norm": 0.07186618447303772, "learning_rate": 3.625714829552651e-08, "loss": 0.0021, "step": 59310 }, { "grad_norm": 0.06333423405885696, "learning_rate": 3.5215453079590065e-08, "loss": 0.0014, "step": 59320 }, { "grad_norm": 0.08392772823572159, "learning_rate": 3.4188935845952396e-08, "loss": 0.002, "step": 59330 }, { "grad_norm": 0.03192640095949173, "learning_rate": 3.3177596906447396e-08, "loss": 0.0017, "step": 59340 }, { "grad_norm": 0.07749873399734497, "learning_rate": 3.218143656829043e-08, "loss": 0.002, "step": 59350 }, { "grad_norm": 0.026460831984877586, "learning_rate": 3.120045513408387e-08, "loss": 0.0011, "step": 59360 }, { "grad_norm": 0.032364800572395325, "learning_rate": 3.02346529018338e-08, "loss": 0.0037, "step": 59370 }, { "grad_norm": 0.04694696143269539, "learning_rate": 2.9284030164922204e-08, "loss": 0.0019, "step": 59380 }, { "grad_norm": 0.028310921043157578, "learning_rate": 2.8348587212123634e-08, "loss": 0.0028, "step": 59390 }, { "grad_norm": 0.12441930174827576, "learning_rate": 2.7428324327594125e-08, "loss": 0.0022, "step": 59400 }, { "grad_norm": 0.03990059718489647, "learning_rate": 2.6523241790893383e-08, "loss": 0.0021, "step": 59410 }, { "grad_norm": 0.042742300778627396, "learning_rate": 2.563333987695704e-08, "loss": 0.0021, "step": 59420 }, { "grad_norm": 0.09080519527196884, "learning_rate": 2.4758618856118852e-08, "loss": 0.0031, "step": 59430 }, { "grad_norm": 0.02846841886639595, "learning_rate": 2.3899078994088497e-08, "loss": 0.0025, "step": 59440 }, { "grad_norm": 0.048731427639722824, "learning_rate": 2.3054720551973775e-08, "loss": 0.0015, "step": 59450 }, { "grad_norm": 0.03363591060042381, "learning_rate": 2.222554378627506e-08, "loss": 0.0012, "step": 59460 }, { "grad_norm": 0.020043428987264633, "learning_rate": 2.1411548948868653e-08, "loss": 0.0012, "step": 59470 }, { "grad_norm": 0.21088097989559174, "learning_rate": 2.0612736287023426e-08, "loss": 0.0026, "step": 59480 }, { "grad_norm": 0.0361279733479023, "learning_rate": 1.9829106043400826e-08, "loss": 0.0017, "step": 59490 }, { "grad_norm": 0.02286909893155098, "learning_rate": 1.9060658456043768e-08, "loss": 0.0015, "step": 59500 }, { "grad_norm": 0.031134311109781265, "learning_rate": 1.830739375838775e-08, "loss": 0.0013, "step": 59510 }, { "grad_norm": 0.09717237204313278, "learning_rate": 1.7569312179260832e-08, "loss": 0.0013, "step": 59520 }, { "grad_norm": 0.08429041504859924, "learning_rate": 1.684641394286146e-08, "loss": 0.0022, "step": 59530 }, { "grad_norm": 0.045938536524772644, "learning_rate": 1.6138699268797296e-08, "loss": 0.0013, "step": 59540 }, { "grad_norm": 0.042852360755205154, "learning_rate": 1.5446168372046376e-08, "loss": 0.0019, "step": 59550 }, { "grad_norm": 0.07190746068954468, "learning_rate": 1.4768821462984861e-08, "loss": 0.0017, "step": 59560 }, { "grad_norm": 0.027321411296725273, "learning_rate": 1.4106658747370383e-08, "loss": 0.0043, "step": 59570 }, { "grad_norm": 0.05060608312487602, "learning_rate": 1.3459680426353149e-08, "loss": 0.0032, "step": 59580 }, { "grad_norm": 0.04011053591966629, "learning_rate": 1.2827886696464841e-08, "loss": 0.0016, "step": 59590 }, { "grad_norm": 0.06210273504257202, "learning_rate": 1.2211277749635264e-08, "loss": 0.0024, "step": 59600 }, { "grad_norm": 0.04004385694861412, "learning_rate": 1.1609853773164592e-08, "loss": 0.0016, "step": 59610 }, { "grad_norm": 0.0310466680675745, "learning_rate": 1.1023614949751127e-08, "loss": 0.0024, "step": 59620 }, { "grad_norm": 0.03447660803794861, "learning_rate": 1.0452561457485744e-08, "loss": 0.0034, "step": 59630 }, { "grad_norm": 0.06696496903896332, "learning_rate": 9.896693469829688e-09, "loss": 0.0014, "step": 59640 }, { "grad_norm": 0.028710516169667244, "learning_rate": 9.35601115564788e-09, "loss": 0.0014, "step": 59650 }, { "grad_norm": 0.05129091069102287, "learning_rate": 8.830514679186719e-09, "loss": 0.0017, "step": 59660 }, { "grad_norm": 0.10318212956190109, "learning_rate": 8.320204200074066e-09, "loss": 0.0019, "step": 59670 }, { "grad_norm": 0.061326656490564346, "learning_rate": 7.825079873324814e-09, "loss": 0.0011, "step": 59680 }, { "grad_norm": 0.05653013661503792, "learning_rate": 7.345141849351977e-09, "loss": 0.0015, "step": 59690 }, { "grad_norm": 0.05302873253822327, "learning_rate": 6.880390273944493e-09, "loss": 0.0014, "step": 59700 }, { "grad_norm": 0.0874003916978836, "learning_rate": 6.4308252882838704e-09, "loss": 0.0014, "step": 59710 }, { "grad_norm": 0.08187376707792282, "learning_rate": 5.9964470289386455e-09, "loss": 0.0013, "step": 59720 }, { "grad_norm": 0.07469252496957779, "learning_rate": 5.577255627853273e-09, "loss": 0.0016, "step": 59730 }, { "grad_norm": 0.10403530299663544, "learning_rate": 5.173251212370334e-09, "loss": 0.0024, "step": 59740 }, { "grad_norm": 0.026600105687975883, "learning_rate": 4.784433905219432e-09, "loss": 0.0025, "step": 59750 }, { "grad_norm": 0.06172705069184303, "learning_rate": 4.4108038245060934e-09, "loss": 0.002, "step": 59760 }, { "grad_norm": 0.02699715830385685, "learning_rate": 4.05236108373952e-09, "loss": 0.0019, "step": 59770 }, { "grad_norm": 0.051058750599622726, "learning_rate": 3.7091057917937324e-09, "loss": 0.0021, "step": 59780 }, { "grad_norm": 0.05513815954327583, "learning_rate": 3.381038052946428e-09, "loss": 0.0019, "step": 59790 }, { "grad_norm": 0.06892674416303635, "learning_rate": 3.0681579668623283e-09, "loss": 0.0018, "step": 59800 }, { "grad_norm": 0.11579745262861252, "learning_rate": 2.7704656285709727e-09, "loss": 0.0019, "step": 59810 }, { "grad_norm": 0.031310658901929855, "learning_rate": 2.4879611285166803e-09, "loss": 0.0013, "step": 59820 }, { "grad_norm": 0.0371757410466671, "learning_rate": 2.2206445525085883e-09, "loss": 0.0014, "step": 59830 }, { "grad_norm": 0.04418807476758957, "learning_rate": 1.9685159817595112e-09, "loss": 0.0017, "step": 59840 }, { "grad_norm": 0.05054246261715889, "learning_rate": 1.7315754928470817e-09, "loss": 0.0019, "step": 59850 }, { "grad_norm": 0.06414949148893356, "learning_rate": 1.5098231577581611e-09, "loss": 0.0023, "step": 59860 }, { "grad_norm": 0.023615892976522446, "learning_rate": 1.3032590438499804e-09, "loss": 0.0019, "step": 59870 }, { "grad_norm": 0.09313354641199112, "learning_rate": 1.1118832138723444e-09, "loss": 0.0029, "step": 59880 }, { "grad_norm": 0.08225814998149872, "learning_rate": 9.356957259620825e-10, "loss": 0.0032, "step": 59890 }, { "grad_norm": 0.03423801064491272, "learning_rate": 7.74696633637495e-10, "loss": 0.0023, "step": 59900 }, { "grad_norm": 0.025128740817308426, "learning_rate": 6.288859858039064e-10, "loss": 0.0017, "step": 59910 }, { "grad_norm": 0.03376555070281029, "learning_rate": 4.982638267647665e-10, "loss": 0.0026, "step": 59920 }, { "grad_norm": 0.040921635925769806, "learning_rate": 3.8283019618834406e-10, "loss": 0.0016, "step": 59930 }, { "grad_norm": 0.02604549750685692, "learning_rate": 2.825851291410331e-10, "loss": 0.0019, "step": 59940 }, { "grad_norm": 0.029058320447802544, "learning_rate": 1.975286560873535e-10, "loss": 0.0012, "step": 59950 }, { "grad_norm": 0.021468672901391983, "learning_rate": 1.2766080285109284e-10, "loss": 0.0015, "step": 59960 }, { "grad_norm": 0.061713118106126785, "learning_rate": 7.298159065971533e-11, "loss": 0.0015, "step": 59970 }, { "grad_norm": 0.057619862258434296, "learning_rate": 3.349103612770854e-11, "loss": 0.0036, "step": 59980 }, { "grad_norm": 0.0311071015894413, "learning_rate": 9.189151245481142e-12, "loss": 0.0017, "step": 59990 }, { "grad_norm": 0.06286779046058655, "learning_rate": 7.594340156735769e-14, "loss": 0.0028, "step": 60000 } ], "logging_steps": 10, "max_steps": 60000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 48, "trial_name": null, "trial_params": null }