{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 642.8571428571429, "eval_steps": 500, "global_step": 36000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008787346221441124, "grad_norm": 3.463869571685791, "learning_rate": 1.8000000000000002e-08, "loss": 1.2749, "step": 10 }, { "epoch": 0.0017574692442882249, "grad_norm": 2.8309216499328613, "learning_rate": 3.8e-08, "loss": 1.2673, "step": 20 }, { "epoch": 0.0026362038664323375, "grad_norm": 3.3412649631500244, "learning_rate": 5.8e-08, "loss": 1.2685, "step": 30 }, { "epoch": 0.0035149384885764497, "grad_norm": 2.3116111755371094, "learning_rate": 7.8e-08, "loss": 1.2608, "step": 40 }, { "epoch": 0.004393673110720563, "grad_norm": 2.0663321018218994, "learning_rate": 9.8e-08, "loss": 1.2411, "step": 50 }, { "epoch": 0.005272407732864675, "grad_norm": 1.6630572080612183, "learning_rate": 1.1800000000000001e-07, "loss": 1.237, "step": 60 }, { "epoch": 0.006151142355008787, "grad_norm": 1.003572702407837, "learning_rate": 1.38e-07, "loss": 1.2356, "step": 70 }, { "epoch": 0.007029876977152899, "grad_norm": 1.1782159805297852, "learning_rate": 1.58e-07, "loss": 1.238, "step": 80 }, { "epoch": 0.007908611599297012, "grad_norm": 0.9716034531593323, "learning_rate": 1.78e-07, "loss": 1.2308, "step": 90 }, { "epoch": 0.008787346221441126, "grad_norm": 0.9351742267608643, "learning_rate": 1.98e-07, "loss": 1.2184, "step": 100 }, { "epoch": 0.009666080843585237, "grad_norm": 0.9983887672424316, "learning_rate": 2.1800000000000002e-07, "loss": 1.2286, "step": 110 }, { "epoch": 0.01054481546572935, "grad_norm": 0.9736628532409668, "learning_rate": 2.3800000000000004e-07, "loss": 1.2261, "step": 120 }, { "epoch": 0.011423550087873463, "grad_norm": 0.8565272688865662, "learning_rate": 2.58e-07, "loss": 1.2234, "step": 130 }, { "epoch": 0.012302284710017574, "grad_norm": 1.1082686185836792, "learning_rate": 2.7800000000000003e-07, "loss": 1.2207, "step": 140 }, { "epoch": 0.013181019332161687, "grad_norm": 1.318981647491455, "learning_rate": 2.98e-07, "loss": 1.2222, "step": 150 }, { "epoch": 0.014059753954305799, "grad_norm": 0.9183272123336792, "learning_rate": 3.18e-07, "loss": 1.2179, "step": 160 }, { "epoch": 0.014938488576449912, "grad_norm": 1.218881607055664, "learning_rate": 3.3800000000000004e-07, "loss": 1.2142, "step": 170 }, { "epoch": 0.015817223198594025, "grad_norm": 0.9174835085868835, "learning_rate": 3.58e-07, "loss": 1.2043, "step": 180 }, { "epoch": 0.016695957820738138, "grad_norm": 0.9799721240997314, "learning_rate": 3.78e-07, "loss": 1.2083, "step": 190 }, { "epoch": 0.01757469244288225, "grad_norm": 0.9706547856330872, "learning_rate": 3.9800000000000004e-07, "loss": 1.1962, "step": 200 }, { "epoch": 0.01845342706502636, "grad_norm": 1.0952097177505493, "learning_rate": 4.18e-07, "loss": 1.2003, "step": 210 }, { "epoch": 0.019332161687170474, "grad_norm": 1.1255017518997192, "learning_rate": 4.3800000000000003e-07, "loss": 1.1839, "step": 220 }, { "epoch": 0.020210896309314587, "grad_norm": 1.2500284910202026, "learning_rate": 4.58e-07, "loss": 1.2006, "step": 230 }, { "epoch": 0.0210896309314587, "grad_norm": 1.2024898529052734, "learning_rate": 4.78e-07, "loss": 1.1865, "step": 240 }, { "epoch": 0.021968365553602813, "grad_norm": 1.55277419090271, "learning_rate": 4.98e-07, "loss": 1.1761, "step": 250 }, { "epoch": 0.022847100175746926, "grad_norm": 1.3900305032730103, "learning_rate": 5.18e-07, "loss": 1.1609, "step": 260 }, { "epoch": 0.023725834797891036, "grad_norm": 2.296308755874634, "learning_rate": 5.380000000000001e-07, "loss": 1.1588, "step": 270 }, { "epoch": 0.02460456942003515, "grad_norm": 1.5932459831237793, "learning_rate": 5.58e-07, "loss": 1.1346, "step": 280 }, { "epoch": 0.025483304042179262, "grad_norm": 1.4375121593475342, "learning_rate": 5.780000000000001e-07, "loss": 1.1236, "step": 290 }, { "epoch": 0.026362038664323375, "grad_norm": 4.208409309387207, "learning_rate": 5.98e-07, "loss": 1.1294, "step": 300 }, { "epoch": 0.027240773286467488, "grad_norm": 4.129127502441406, "learning_rate": 6.180000000000001e-07, "loss": 1.1058, "step": 310 }, { "epoch": 0.028119507908611598, "grad_norm": 4.015860080718994, "learning_rate": 6.380000000000001e-07, "loss": 1.1077, "step": 320 }, { "epoch": 0.02899824253075571, "grad_norm": 2.1062886714935303, "learning_rate": 6.58e-07, "loss": 1.1002, "step": 330 }, { "epoch": 0.029876977152899824, "grad_norm": 2.479001522064209, "learning_rate": 6.78e-07, "loss": 1.0948, "step": 340 }, { "epoch": 0.030755711775043937, "grad_norm": 2.5701727867126465, "learning_rate": 6.98e-07, "loss": 1.0869, "step": 350 }, { "epoch": 0.03163444639718805, "grad_norm": 1.8001195192337036, "learning_rate": 7.18e-07, "loss": 1.0974, "step": 360 }, { "epoch": 0.03251318101933216, "grad_norm": 3.4848244190216064, "learning_rate": 7.380000000000001e-07, "loss": 1.0894, "step": 370 }, { "epoch": 0.033391915641476276, "grad_norm": 2.265071392059326, "learning_rate": 7.58e-07, "loss": 1.0912, "step": 380 }, { "epoch": 0.03427065026362039, "grad_norm": 3.0545144081115723, "learning_rate": 7.78e-07, "loss": 1.0981, "step": 390 }, { "epoch": 0.0351493848857645, "grad_norm": 2.742431879043579, "learning_rate": 7.979999999999999e-07, "loss": 1.086, "step": 400 }, { "epoch": 0.03602811950790861, "grad_norm": 2.800856828689575, "learning_rate": 8.18e-07, "loss": 1.0904, "step": 410 }, { "epoch": 0.03690685413005272, "grad_norm": 2.66064190864563, "learning_rate": 8.380000000000001e-07, "loss": 1.0701, "step": 420 }, { "epoch": 0.037785588752196834, "grad_norm": 1.4373632669448853, "learning_rate": 8.580000000000001e-07, "loss": 1.0779, "step": 430 }, { "epoch": 0.03866432337434095, "grad_norm": 2.483046531677246, "learning_rate": 8.78e-07, "loss": 1.0695, "step": 440 }, { "epoch": 0.03954305799648506, "grad_norm": 2.563528537750244, "learning_rate": 8.98e-07, "loss": 1.0744, "step": 450 }, { "epoch": 0.040421792618629174, "grad_norm": 2.1164588928222656, "learning_rate": 9.180000000000001e-07, "loss": 1.0885, "step": 460 }, { "epoch": 0.04130052724077329, "grad_norm": 1.9856990575790405, "learning_rate": 9.38e-07, "loss": 1.0615, "step": 470 }, { "epoch": 0.0421792618629174, "grad_norm": 2.7458105087280273, "learning_rate": 9.58e-07, "loss": 1.0722, "step": 480 }, { "epoch": 0.04305799648506151, "grad_norm": 3.1617209911346436, "learning_rate": 9.78e-07, "loss": 1.0729, "step": 490 }, { "epoch": 0.043936731107205626, "grad_norm": 1.97232985496521, "learning_rate": 9.98e-07, "loss": 1.0802, "step": 500 }, { "epoch": 0.04481546572934974, "grad_norm": 1.5699235200881958, "learning_rate": 1.018e-06, "loss": 1.0717, "step": 510 }, { "epoch": 0.04569420035149385, "grad_norm": 1.6749203205108643, "learning_rate": 1.038e-06, "loss": 1.0727, "step": 520 }, { "epoch": 0.04657293497363796, "grad_norm": 1.6090953350067139, "learning_rate": 1.058e-06, "loss": 1.0622, "step": 530 }, { "epoch": 0.04745166959578207, "grad_norm": 1.9539084434509277, "learning_rate": 1.078e-06, "loss": 1.0578, "step": 540 }, { "epoch": 0.048330404217926184, "grad_norm": 2.618823289871216, "learning_rate": 1.0980000000000001e-06, "loss": 1.0685, "step": 550 }, { "epoch": 0.0492091388400703, "grad_norm": 2.2720987796783447, "learning_rate": 1.1180000000000001e-06, "loss": 1.0661, "step": 560 }, { "epoch": 0.05008787346221441, "grad_norm": 1.720299482345581, "learning_rate": 1.138e-06, "loss": 1.0566, "step": 570 }, { "epoch": 0.050966608084358524, "grad_norm": 3.6626269817352295, "learning_rate": 1.158e-06, "loss": 1.0629, "step": 580 }, { "epoch": 0.05184534270650264, "grad_norm": 1.520838975906372, "learning_rate": 1.1780000000000002e-06, "loss": 1.0587, "step": 590 }, { "epoch": 0.05272407732864675, "grad_norm": 3.7160165309906006, "learning_rate": 1.198e-06, "loss": 1.0625, "step": 600 }, { "epoch": 0.05360281195079086, "grad_norm": 2.158292293548584, "learning_rate": 1.218e-06, "loss": 1.0642, "step": 610 }, { "epoch": 0.054481546572934976, "grad_norm": 3.5635032653808594, "learning_rate": 1.238e-06, "loss": 1.0614, "step": 620 }, { "epoch": 0.05536028119507909, "grad_norm": 3.262779712677002, "learning_rate": 1.258e-06, "loss": 1.0674, "step": 630 }, { "epoch": 0.056239015817223195, "grad_norm": 2.4983654022216797, "learning_rate": 1.278e-06, "loss": 1.0651, "step": 640 }, { "epoch": 0.05711775043936731, "grad_norm": 2.9171204566955566, "learning_rate": 1.2980000000000001e-06, "loss": 1.0591, "step": 650 }, { "epoch": 0.05799648506151142, "grad_norm": 2.9060566425323486, "learning_rate": 1.3180000000000001e-06, "loss": 1.0602, "step": 660 }, { "epoch": 0.058875219683655534, "grad_norm": 3.25357723236084, "learning_rate": 1.338e-06, "loss": 1.063, "step": 670 }, { "epoch": 0.05975395430579965, "grad_norm": 2.3171072006225586, "learning_rate": 1.3580000000000002e-06, "loss": 1.0577, "step": 680 }, { "epoch": 0.06063268892794376, "grad_norm": 3.0793352127075195, "learning_rate": 1.3780000000000002e-06, "loss": 1.0649, "step": 690 }, { "epoch": 0.061511423550087874, "grad_norm": 1.7567499876022339, "learning_rate": 1.398e-06, "loss": 1.0494, "step": 700 }, { "epoch": 0.06239015817223199, "grad_norm": 1.7746974229812622, "learning_rate": 1.418e-06, "loss": 1.0606, "step": 710 }, { "epoch": 0.0632688927943761, "grad_norm": 2.6496479511260986, "learning_rate": 1.438e-06, "loss": 1.0592, "step": 720 }, { "epoch": 0.0641476274165202, "grad_norm": 1.9937723875045776, "learning_rate": 1.458e-06, "loss": 1.053, "step": 730 }, { "epoch": 0.06502636203866433, "grad_norm": 1.6786264181137085, "learning_rate": 1.478e-06, "loss": 1.0546, "step": 740 }, { "epoch": 0.06590509666080843, "grad_norm": 2.0175304412841797, "learning_rate": 1.498e-06, "loss": 1.0558, "step": 750 }, { "epoch": 0.06678383128295255, "grad_norm": 2.601452350616455, "learning_rate": 1.5180000000000001e-06, "loss": 1.0492, "step": 760 }, { "epoch": 0.06766256590509666, "grad_norm": 2.490344524383545, "learning_rate": 1.538e-06, "loss": 1.0497, "step": 770 }, { "epoch": 0.06854130052724078, "grad_norm": 3.1689963340759277, "learning_rate": 1.5580000000000002e-06, "loss": 1.0478, "step": 780 }, { "epoch": 0.06942003514938488, "grad_norm": 2.9290425777435303, "learning_rate": 1.578e-06, "loss": 1.0605, "step": 790 }, { "epoch": 0.070298769771529, "grad_norm": 2.5105092525482178, "learning_rate": 1.5980000000000002e-06, "loss": 1.0505, "step": 800 }, { "epoch": 0.07117750439367311, "grad_norm": 4.053808689117432, "learning_rate": 1.618e-06, "loss": 1.0524, "step": 810 }, { "epoch": 0.07205623901581722, "grad_norm": 1.8167824745178223, "learning_rate": 1.638e-06, "loss": 1.0492, "step": 820 }, { "epoch": 0.07293497363796134, "grad_norm": 2.3400559425354004, "learning_rate": 1.6580000000000003e-06, "loss": 1.0382, "step": 830 }, { "epoch": 0.07381370826010544, "grad_norm": 2.029547929763794, "learning_rate": 1.678e-06, "loss": 1.0476, "step": 840 }, { "epoch": 0.07469244288224956, "grad_norm": 1.6860438585281372, "learning_rate": 1.6979999999999999e-06, "loss": 1.0479, "step": 850 }, { "epoch": 0.07557117750439367, "grad_norm": 1.5329878330230713, "learning_rate": 1.7180000000000001e-06, "loss": 1.0413, "step": 860 }, { "epoch": 0.07644991212653779, "grad_norm": 1.5208624601364136, "learning_rate": 1.7380000000000001e-06, "loss": 1.0411, "step": 870 }, { "epoch": 0.0773286467486819, "grad_norm": 1.953786015510559, "learning_rate": 1.758e-06, "loss": 1.0412, "step": 880 }, { "epoch": 0.07820738137082602, "grad_norm": 1.6580291986465454, "learning_rate": 1.7780000000000002e-06, "loss": 1.0372, "step": 890 }, { "epoch": 0.07908611599297012, "grad_norm": 1.713304042816162, "learning_rate": 1.798e-06, "loss": 1.0295, "step": 900 }, { "epoch": 0.07996485061511424, "grad_norm": 2.206963300704956, "learning_rate": 1.8180000000000002e-06, "loss": 1.0338, "step": 910 }, { "epoch": 0.08084358523725835, "grad_norm": 2.2312278747558594, "learning_rate": 1.8380000000000002e-06, "loss": 1.0383, "step": 920 }, { "epoch": 0.08172231985940245, "grad_norm": 3.9925506114959717, "learning_rate": 1.858e-06, "loss": 1.0251, "step": 930 }, { "epoch": 0.08260105448154657, "grad_norm": 1.6212830543518066, "learning_rate": 1.8780000000000003e-06, "loss": 1.0239, "step": 940 }, { "epoch": 0.08347978910369068, "grad_norm": 1.6375181674957275, "learning_rate": 1.898e-06, "loss": 1.0204, "step": 950 }, { "epoch": 0.0843585237258348, "grad_norm": 2.2830593585968018, "learning_rate": 1.918e-06, "loss": 1.0187, "step": 960 }, { "epoch": 0.0852372583479789, "grad_norm": 2.3687996864318848, "learning_rate": 1.9380000000000003e-06, "loss": 1.0239, "step": 970 }, { "epoch": 0.08611599297012303, "grad_norm": 2.5130574703216553, "learning_rate": 1.958e-06, "loss": 1.0125, "step": 980 }, { "epoch": 0.08699472759226713, "grad_norm": 1.6806515455245972, "learning_rate": 1.978e-06, "loss": 0.9979, "step": 990 }, { "epoch": 0.08787346221441125, "grad_norm": 2.6505160331726074, "learning_rate": 1.998e-06, "loss": 1.0083, "step": 1000 }, { "epoch": 0.08875219683655536, "grad_norm": 2.801907539367676, "learning_rate": 2.018e-06, "loss": 0.9997, "step": 1010 }, { "epoch": 0.08963093145869948, "grad_norm": 1.9019992351531982, "learning_rate": 2.038e-06, "loss": 0.9922, "step": 1020 }, { "epoch": 0.09050966608084358, "grad_norm": 3.4847166538238525, "learning_rate": 2.058e-06, "loss": 0.9838, "step": 1030 }, { "epoch": 0.0913884007029877, "grad_norm": 2.375701904296875, "learning_rate": 2.0780000000000003e-06, "loss": 0.9733, "step": 1040 }, { "epoch": 0.09226713532513181, "grad_norm": 3.301974058151245, "learning_rate": 2.098e-06, "loss": 0.9522, "step": 1050 }, { "epoch": 0.09314586994727592, "grad_norm": 1.7148151397705078, "learning_rate": 2.1180000000000003e-06, "loss": 0.9452, "step": 1060 }, { "epoch": 0.09402460456942004, "grad_norm": 3.3245508670806885, "learning_rate": 2.138e-06, "loss": 0.9421, "step": 1070 }, { "epoch": 0.09490333919156414, "grad_norm": 2.658867120742798, "learning_rate": 2.158e-06, "loss": 0.9225, "step": 1080 }, { "epoch": 0.09578207381370826, "grad_norm": 3.5111141204833984, "learning_rate": 2.178e-06, "loss": 0.9107, "step": 1090 }, { "epoch": 0.09666080843585237, "grad_norm": 3.2645533084869385, "learning_rate": 2.198e-06, "loss": 0.8892, "step": 1100 }, { "epoch": 0.09753954305799649, "grad_norm": 5.194536209106445, "learning_rate": 2.2179999999999998e-06, "loss": 0.8702, "step": 1110 }, { "epoch": 0.0984182776801406, "grad_norm": 4.27847146987915, "learning_rate": 2.238e-06, "loss": 0.8416, "step": 1120 }, { "epoch": 0.09929701230228472, "grad_norm": 5.4441752433776855, "learning_rate": 2.2580000000000002e-06, "loss": 0.828, "step": 1130 }, { "epoch": 0.10017574692442882, "grad_norm": 3.82067608833313, "learning_rate": 2.2780000000000005e-06, "loss": 0.8127, "step": 1140 }, { "epoch": 0.10105448154657294, "grad_norm": 2.8788838386535645, "learning_rate": 2.2980000000000003e-06, "loss": 0.7732, "step": 1150 }, { "epoch": 0.10193321616871705, "grad_norm": 4.3162736892700195, "learning_rate": 2.318e-06, "loss": 0.7533, "step": 1160 }, { "epoch": 0.10281195079086115, "grad_norm": 4.732647895812988, "learning_rate": 2.3380000000000003e-06, "loss": 0.7445, "step": 1170 }, { "epoch": 0.10369068541300527, "grad_norm": 3.976848602294922, "learning_rate": 2.358e-06, "loss": 0.723, "step": 1180 }, { "epoch": 0.10456942003514938, "grad_norm": 5.475327491760254, "learning_rate": 2.378e-06, "loss": 0.6848, "step": 1190 }, { "epoch": 0.1054481546572935, "grad_norm": 5.6328043937683105, "learning_rate": 2.398e-06, "loss": 0.6848, "step": 1200 }, { "epoch": 0.1063268892794376, "grad_norm": 4.06898307800293, "learning_rate": 2.418e-06, "loss": 0.6496, "step": 1210 }, { "epoch": 0.10720562390158173, "grad_norm": 5.250406265258789, "learning_rate": 2.438e-06, "loss": 0.6411, "step": 1220 }, { "epoch": 0.10808435852372583, "grad_norm": 4.9642815589904785, "learning_rate": 2.4580000000000004e-06, "loss": 0.6294, "step": 1230 }, { "epoch": 0.10896309314586995, "grad_norm": 5.2756667137146, "learning_rate": 2.4780000000000002e-06, "loss": 0.5843, "step": 1240 }, { "epoch": 0.10984182776801406, "grad_norm": 4.9830451011657715, "learning_rate": 2.498e-06, "loss": 0.5847, "step": 1250 }, { "epoch": 0.11072056239015818, "grad_norm": 5.453825950622559, "learning_rate": 2.5180000000000003e-06, "loss": 0.5603, "step": 1260 }, { "epoch": 0.11159929701230228, "grad_norm": 5.195611000061035, "learning_rate": 2.538e-06, "loss": 0.5402, "step": 1270 }, { "epoch": 0.11247803163444639, "grad_norm": 5.605865478515625, "learning_rate": 2.558e-06, "loss": 0.5365, "step": 1280 }, { "epoch": 0.11335676625659051, "grad_norm": 5.1030731201171875, "learning_rate": 2.578e-06, "loss": 0.5149, "step": 1290 }, { "epoch": 0.11423550087873462, "grad_norm": 4.508848190307617, "learning_rate": 2.598e-06, "loss": 0.4947, "step": 1300 }, { "epoch": 0.11511423550087874, "grad_norm": 6.794380187988281, "learning_rate": 2.618e-06, "loss": 0.4676, "step": 1310 }, { "epoch": 0.11599297012302284, "grad_norm": 5.3257527351379395, "learning_rate": 2.638e-06, "loss": 0.4426, "step": 1320 }, { "epoch": 0.11687170474516696, "grad_norm": 5.038968563079834, "learning_rate": 2.6580000000000002e-06, "loss": 0.465, "step": 1330 }, { "epoch": 0.11775043936731107, "grad_norm": 3.0532450675964355, "learning_rate": 2.6780000000000004e-06, "loss": 0.4367, "step": 1340 }, { "epoch": 0.11862917398945519, "grad_norm": 4.21120548248291, "learning_rate": 2.6980000000000003e-06, "loss": 0.4314, "step": 1350 }, { "epoch": 0.1195079086115993, "grad_norm": 4.6664252281188965, "learning_rate": 2.718e-06, "loss": 0.4081, "step": 1360 }, { "epoch": 0.12038664323374342, "grad_norm": 5.621894836425781, "learning_rate": 2.7380000000000003e-06, "loss": 0.4066, "step": 1370 }, { "epoch": 0.12126537785588752, "grad_norm": 5.374266147613525, "learning_rate": 2.758e-06, "loss": 0.3745, "step": 1380 }, { "epoch": 0.12214411247803164, "grad_norm": 5.121448040008545, "learning_rate": 2.778e-06, "loss": 0.3636, "step": 1390 }, { "epoch": 0.12302284710017575, "grad_norm": 5.401526927947998, "learning_rate": 2.798e-06, "loss": 0.3616, "step": 1400 }, { "epoch": 0.12390158172231985, "grad_norm": 4.060187339782715, "learning_rate": 2.818e-06, "loss": 0.3437, "step": 1410 }, { "epoch": 0.12478031634446397, "grad_norm": 3.808840751647949, "learning_rate": 2.838e-06, "loss": 0.3431, "step": 1420 }, { "epoch": 0.1256590509666081, "grad_norm": 3.1423850059509277, "learning_rate": 2.8580000000000004e-06, "loss": 0.3229, "step": 1430 }, { "epoch": 0.1265377855887522, "grad_norm": 4.208380699157715, "learning_rate": 2.8780000000000002e-06, "loss": 0.3186, "step": 1440 }, { "epoch": 0.1274165202108963, "grad_norm": 6.508106708526611, "learning_rate": 2.898e-06, "loss": 0.3305, "step": 1450 }, { "epoch": 0.1282952548330404, "grad_norm": 3.5621750354766846, "learning_rate": 2.9180000000000003e-06, "loss": 0.3291, "step": 1460 }, { "epoch": 0.12917398945518455, "grad_norm": 3.6666269302368164, "learning_rate": 2.938e-06, "loss": 0.2932, "step": 1470 }, { "epoch": 0.13005272407732865, "grad_norm": 4.305308818817139, "learning_rate": 2.958e-06, "loss": 0.3013, "step": 1480 }, { "epoch": 0.13093145869947276, "grad_norm": 3.6606287956237793, "learning_rate": 2.978e-06, "loss": 0.2895, "step": 1490 }, { "epoch": 0.13181019332161686, "grad_norm": 4.306281566619873, "learning_rate": 2.998e-06, "loss": 0.2879, "step": 1500 }, { "epoch": 0.13268892794376097, "grad_norm": 2.9988508224487305, "learning_rate": 3.018e-06, "loss": 0.2823, "step": 1510 }, { "epoch": 0.1335676625659051, "grad_norm": 3.6735427379608154, "learning_rate": 3.0380000000000004e-06, "loss": 0.2608, "step": 1520 }, { "epoch": 0.1344463971880492, "grad_norm": 3.464357614517212, "learning_rate": 3.058e-06, "loss": 0.2788, "step": 1530 }, { "epoch": 0.13532513181019332, "grad_norm": 3.9073052406311035, "learning_rate": 3.078e-06, "loss": 0.2746, "step": 1540 }, { "epoch": 0.13620386643233742, "grad_norm": 2.313807249069214, "learning_rate": 3.0980000000000002e-06, "loss": 0.2677, "step": 1550 }, { "epoch": 0.13708260105448156, "grad_norm": 3.230877637863159, "learning_rate": 3.118e-06, "loss": 0.2625, "step": 1560 }, { "epoch": 0.13796133567662566, "grad_norm": 3.9138760566711426, "learning_rate": 3.138e-06, "loss": 0.2563, "step": 1570 }, { "epoch": 0.13884007029876977, "grad_norm": 3.109079599380493, "learning_rate": 3.1579999999999997e-06, "loss": 0.2448, "step": 1580 }, { "epoch": 0.13971880492091387, "grad_norm": 1.976616382598877, "learning_rate": 3.1780000000000003e-06, "loss": 0.2343, "step": 1590 }, { "epoch": 0.140597539543058, "grad_norm": 2.9246182441711426, "learning_rate": 3.198e-06, "loss": 0.2457, "step": 1600 }, { "epoch": 0.14147627416520211, "grad_norm": 1.660309910774231, "learning_rate": 3.2180000000000004e-06, "loss": 0.2282, "step": 1610 }, { "epoch": 0.14235500878734622, "grad_norm": 1.763709306716919, "learning_rate": 3.238e-06, "loss": 0.2143, "step": 1620 }, { "epoch": 0.14323374340949033, "grad_norm": 2.8652684688568115, "learning_rate": 3.258e-06, "loss": 0.2236, "step": 1630 }, { "epoch": 0.14411247803163443, "grad_norm": 2.7996826171875, "learning_rate": 3.2779999999999998e-06, "loss": 0.2113, "step": 1640 }, { "epoch": 0.14499121265377857, "grad_norm": 3.3106772899627686, "learning_rate": 3.2980000000000004e-06, "loss": 0.214, "step": 1650 }, { "epoch": 0.14586994727592267, "grad_norm": 2.583939552307129, "learning_rate": 3.3180000000000003e-06, "loss": 0.2031, "step": 1660 }, { "epoch": 0.14674868189806678, "grad_norm": 2.360971212387085, "learning_rate": 3.338e-06, "loss": 0.2132, "step": 1670 }, { "epoch": 0.14762741652021089, "grad_norm": 2.3245885372161865, "learning_rate": 3.358e-06, "loss": 0.2076, "step": 1680 }, { "epoch": 0.14850615114235502, "grad_norm": 2.427187442779541, "learning_rate": 3.378e-06, "loss": 0.2214, "step": 1690 }, { "epoch": 0.14938488576449913, "grad_norm": 2.6368818283081055, "learning_rate": 3.3980000000000003e-06, "loss": 0.2172, "step": 1700 }, { "epoch": 0.15026362038664323, "grad_norm": 2.802570104598999, "learning_rate": 3.4180000000000006e-06, "loss": 0.2089, "step": 1710 }, { "epoch": 0.15114235500878734, "grad_norm": 3.2071008682250977, "learning_rate": 3.4380000000000004e-06, "loss": 0.1944, "step": 1720 }, { "epoch": 0.15202108963093147, "grad_norm": 1.8590292930603027, "learning_rate": 3.458e-06, "loss": 0.1971, "step": 1730 }, { "epoch": 0.15289982425307558, "grad_norm": 3.3348464965820312, "learning_rate": 3.478e-06, "loss": 0.1935, "step": 1740 }, { "epoch": 0.15377855887521968, "grad_norm": 3.339822769165039, "learning_rate": 3.498e-06, "loss": 0.2019, "step": 1750 }, { "epoch": 0.1546572934973638, "grad_norm": 1.8955796957015991, "learning_rate": 3.5180000000000005e-06, "loss": 0.192, "step": 1760 }, { "epoch": 0.1555360281195079, "grad_norm": 2.588034152984619, "learning_rate": 3.5380000000000003e-06, "loss": 0.1889, "step": 1770 }, { "epoch": 0.15641476274165203, "grad_norm": 2.7756404876708984, "learning_rate": 3.558e-06, "loss": 0.1944, "step": 1780 }, { "epoch": 0.15729349736379614, "grad_norm": 2.370884895324707, "learning_rate": 3.5780000000000003e-06, "loss": 0.1948, "step": 1790 }, { "epoch": 0.15817223198594024, "grad_norm": 1.7515318393707275, "learning_rate": 3.598e-06, "loss": 0.1851, "step": 1800 }, { "epoch": 0.15905096660808435, "grad_norm": 2.5112240314483643, "learning_rate": 3.618e-06, "loss": 0.1841, "step": 1810 }, { "epoch": 0.15992970123022848, "grad_norm": 2.9127445220947266, "learning_rate": 3.6380000000000006e-06, "loss": 0.1836, "step": 1820 }, { "epoch": 0.1608084358523726, "grad_norm": 2.128004550933838, "learning_rate": 3.6580000000000004e-06, "loss": 0.1746, "step": 1830 }, { "epoch": 0.1616871704745167, "grad_norm": 2.4589202404022217, "learning_rate": 3.678e-06, "loss": 0.1823, "step": 1840 }, { "epoch": 0.1625659050966608, "grad_norm": 2.79221773147583, "learning_rate": 3.698e-06, "loss": 0.1806, "step": 1850 }, { "epoch": 0.1634446397188049, "grad_norm": 5.28768253326416, "learning_rate": 3.718e-06, "loss": 0.1791, "step": 1860 }, { "epoch": 0.16432337434094904, "grad_norm": 1.8026188611984253, "learning_rate": 3.738e-06, "loss": 0.1681, "step": 1870 }, { "epoch": 0.16520210896309315, "grad_norm": 2.4017155170440674, "learning_rate": 3.7580000000000003e-06, "loss": 0.1755, "step": 1880 }, { "epoch": 0.16608084358523725, "grad_norm": 2.3145155906677246, "learning_rate": 3.778e-06, "loss": 0.1809, "step": 1890 }, { "epoch": 0.16695957820738136, "grad_norm": 2.4866645336151123, "learning_rate": 3.7980000000000003e-06, "loss": 0.1662, "step": 1900 }, { "epoch": 0.1678383128295255, "grad_norm": 2.165557622909546, "learning_rate": 3.818e-06, "loss": 0.1705, "step": 1910 }, { "epoch": 0.1687170474516696, "grad_norm": 2.1025986671447754, "learning_rate": 3.8379999999999995e-06, "loss": 0.1691, "step": 1920 }, { "epoch": 0.1695957820738137, "grad_norm": 2.5322656631469727, "learning_rate": 3.858e-06, "loss": 0.165, "step": 1930 }, { "epoch": 0.1704745166959578, "grad_norm": 1.802471399307251, "learning_rate": 3.878e-06, "loss": 0.1719, "step": 1940 }, { "epoch": 0.17135325131810195, "grad_norm": 1.8138082027435303, "learning_rate": 3.898000000000001e-06, "loss": 0.1655, "step": 1950 }, { "epoch": 0.17223198594024605, "grad_norm": 2.0405566692352295, "learning_rate": 3.9180000000000004e-06, "loss": 0.1681, "step": 1960 }, { "epoch": 0.17311072056239016, "grad_norm": 2.5012738704681396, "learning_rate": 3.938e-06, "loss": 0.1693, "step": 1970 }, { "epoch": 0.17398945518453426, "grad_norm": 2.5141043663024902, "learning_rate": 3.958e-06, "loss": 0.1667, "step": 1980 }, { "epoch": 0.17486818980667837, "grad_norm": 1.9684289693832397, "learning_rate": 3.978000000000001e-06, "loss": 0.1647, "step": 1990 }, { "epoch": 0.1757469244288225, "grad_norm": 1.9283367395401, "learning_rate": 3.9980000000000005e-06, "loss": 0.1633, "step": 2000 }, { "epoch": 0.1766256590509666, "grad_norm": 2.5559628009796143, "learning_rate": 4.018e-06, "loss": 0.1609, "step": 2010 }, { "epoch": 0.17750439367311072, "grad_norm": 2.1618239879608154, "learning_rate": 4.038e-06, "loss": 0.1626, "step": 2020 }, { "epoch": 0.17838312829525482, "grad_norm": 3.445176601409912, "learning_rate": 4.058e-06, "loss": 0.1632, "step": 2030 }, { "epoch": 0.17926186291739896, "grad_norm": 2.6116364002227783, "learning_rate": 4.078e-06, "loss": 0.155, "step": 2040 }, { "epoch": 0.18014059753954306, "grad_norm": 2.3290865421295166, "learning_rate": 4.098e-06, "loss": 0.1599, "step": 2050 }, { "epoch": 0.18101933216168717, "grad_norm": 2.273310661315918, "learning_rate": 4.118e-06, "loss": 0.1582, "step": 2060 }, { "epoch": 0.18189806678383127, "grad_norm": 2.0280520915985107, "learning_rate": 4.138e-06, "loss": 0.1579, "step": 2070 }, { "epoch": 0.1827768014059754, "grad_norm": 2.0889034271240234, "learning_rate": 4.158e-06, "loss": 0.1423, "step": 2080 }, { "epoch": 0.18365553602811951, "grad_norm": 1.810962200164795, "learning_rate": 4.178e-06, "loss": 0.1546, "step": 2090 }, { "epoch": 0.18453427065026362, "grad_norm": 1.9627288579940796, "learning_rate": 4.198e-06, "loss": 0.1489, "step": 2100 }, { "epoch": 0.18541300527240773, "grad_norm": 1.6989834308624268, "learning_rate": 4.218e-06, "loss": 0.1481, "step": 2110 }, { "epoch": 0.18629173989455183, "grad_norm": 2.5804450511932373, "learning_rate": 4.238e-06, "loss": 0.1465, "step": 2120 }, { "epoch": 0.18717047451669597, "grad_norm": 1.6843068599700928, "learning_rate": 4.2580000000000006e-06, "loss": 0.1534, "step": 2130 }, { "epoch": 0.18804920913884007, "grad_norm": 2.3447439670562744, "learning_rate": 4.278e-06, "loss": 0.1413, "step": 2140 }, { "epoch": 0.18892794376098418, "grad_norm": 1.9160220623016357, "learning_rate": 4.298e-06, "loss": 0.1457, "step": 2150 }, { "epoch": 0.18980667838312829, "grad_norm": 1.8627796173095703, "learning_rate": 4.318000000000001e-06, "loss": 0.1383, "step": 2160 }, { "epoch": 0.19068541300527242, "grad_norm": 2.171884775161743, "learning_rate": 4.338000000000001e-06, "loss": 0.1388, "step": 2170 }, { "epoch": 0.19156414762741653, "grad_norm": 2.092026472091675, "learning_rate": 4.3580000000000005e-06, "loss": 0.1395, "step": 2180 }, { "epoch": 0.19244288224956063, "grad_norm": 2.034827470779419, "learning_rate": 4.378e-06, "loss": 0.1394, "step": 2190 }, { "epoch": 0.19332161687170474, "grad_norm": 2.778254747390747, "learning_rate": 4.398e-06, "loss": 0.1536, "step": 2200 }, { "epoch": 0.19420035149384884, "grad_norm": 2.5266220569610596, "learning_rate": 4.418e-06, "loss": 0.1416, "step": 2210 }, { "epoch": 0.19507908611599298, "grad_norm": 2.804205894470215, "learning_rate": 4.4380000000000005e-06, "loss": 0.1479, "step": 2220 }, { "epoch": 0.19595782073813708, "grad_norm": 2.5284831523895264, "learning_rate": 4.458e-06, "loss": 0.1382, "step": 2230 }, { "epoch": 0.1968365553602812, "grad_norm": 2.431641101837158, "learning_rate": 4.478e-06, "loss": 0.1385, "step": 2240 }, { "epoch": 0.1977152899824253, "grad_norm": 2.5469958782196045, "learning_rate": 4.498e-06, "loss": 0.1408, "step": 2250 }, { "epoch": 0.19859402460456943, "grad_norm": 2.5016205310821533, "learning_rate": 4.518e-06, "loss": 0.1442, "step": 2260 }, { "epoch": 0.19947275922671354, "grad_norm": 2.403317451477051, "learning_rate": 4.538e-06, "loss": 0.1393, "step": 2270 }, { "epoch": 0.20035149384885764, "grad_norm": 2.061683416366577, "learning_rate": 4.558e-06, "loss": 0.1302, "step": 2280 }, { "epoch": 0.20123022847100175, "grad_norm": 2.353365898132324, "learning_rate": 4.578e-06, "loss": 0.1365, "step": 2290 }, { "epoch": 0.20210896309314588, "grad_norm": 2.067767381668091, "learning_rate": 4.598e-06, "loss": 0.1352, "step": 2300 }, { "epoch": 0.20298769771529, "grad_norm": 1.978219747543335, "learning_rate": 4.6180000000000005e-06, "loss": 0.1398, "step": 2310 }, { "epoch": 0.2038664323374341, "grad_norm": 2.370285749435425, "learning_rate": 4.638e-06, "loss": 0.1327, "step": 2320 }, { "epoch": 0.2047451669595782, "grad_norm": 1.9646838903427124, "learning_rate": 4.658000000000001e-06, "loss": 0.1409, "step": 2330 }, { "epoch": 0.2056239015817223, "grad_norm": 1.5867830514907837, "learning_rate": 4.678000000000001e-06, "loss": 0.1284, "step": 2340 }, { "epoch": 0.20650263620386644, "grad_norm": 2.3547122478485107, "learning_rate": 4.698000000000001e-06, "loss": 0.1334, "step": 2350 }, { "epoch": 0.20738137082601055, "grad_norm": 2.3618783950805664, "learning_rate": 4.718e-06, "loss": 0.1361, "step": 2360 }, { "epoch": 0.20826010544815465, "grad_norm": 2.877232313156128, "learning_rate": 4.738e-06, "loss": 0.1284, "step": 2370 }, { "epoch": 0.20913884007029876, "grad_norm": 2.48787522315979, "learning_rate": 4.758e-06, "loss": 0.1316, "step": 2380 }, { "epoch": 0.2100175746924429, "grad_norm": 2.332261323928833, "learning_rate": 4.778000000000001e-06, "loss": 0.1306, "step": 2390 }, { "epoch": 0.210896309314587, "grad_norm": 2.069209098815918, "learning_rate": 4.7980000000000005e-06, "loss": 0.1255, "step": 2400 }, { "epoch": 0.2117750439367311, "grad_norm": 1.8646126985549927, "learning_rate": 4.818e-06, "loss": 0.1217, "step": 2410 }, { "epoch": 0.2126537785588752, "grad_norm": 1.2744523286819458, "learning_rate": 4.838e-06, "loss": 0.1342, "step": 2420 }, { "epoch": 0.21353251318101935, "grad_norm": 1.5507313013076782, "learning_rate": 4.858e-06, "loss": 0.1256, "step": 2430 }, { "epoch": 0.21441124780316345, "grad_norm": 1.8727538585662842, "learning_rate": 4.878e-06, "loss": 0.1281, "step": 2440 }, { "epoch": 0.21528998242530756, "grad_norm": 2.6271703243255615, "learning_rate": 4.898e-06, "loss": 0.127, "step": 2450 }, { "epoch": 0.21616871704745166, "grad_norm": 1.92905592918396, "learning_rate": 4.918e-06, "loss": 0.1204, "step": 2460 }, { "epoch": 0.21704745166959577, "grad_norm": 2.1970415115356445, "learning_rate": 4.938e-06, "loss": 0.1272, "step": 2470 }, { "epoch": 0.2179261862917399, "grad_norm": 2.2590746879577637, "learning_rate": 4.958e-06, "loss": 0.1229, "step": 2480 }, { "epoch": 0.218804920913884, "grad_norm": 1.7315181493759155, "learning_rate": 4.978e-06, "loss": 0.1294, "step": 2490 }, { "epoch": 0.21968365553602812, "grad_norm": 2.187256097793579, "learning_rate": 4.998e-06, "loss": 0.1254, "step": 2500 }, { "epoch": 0.22056239015817222, "grad_norm": 2.330751895904541, "learning_rate": 5.018e-06, "loss": 0.1239, "step": 2510 }, { "epoch": 0.22144112478031636, "grad_norm": 1.802083969116211, "learning_rate": 5.038000000000001e-06, "loss": 0.1215, "step": 2520 }, { "epoch": 0.22231985940246046, "grad_norm": 2.8694326877593994, "learning_rate": 5.0580000000000005e-06, "loss": 0.1215, "step": 2530 }, { "epoch": 0.22319859402460457, "grad_norm": 2.380300760269165, "learning_rate": 5.078e-06, "loss": 0.116, "step": 2540 }, { "epoch": 0.22407732864674867, "grad_norm": 2.7429630756378174, "learning_rate": 5.098e-06, "loss": 0.126, "step": 2550 }, { "epoch": 0.22495606326889278, "grad_norm": 3.1724154949188232, "learning_rate": 5.118000000000001e-06, "loss": 0.1305, "step": 2560 }, { "epoch": 0.22583479789103691, "grad_norm": 2.4605634212493896, "learning_rate": 5.138000000000001e-06, "loss": 0.1183, "step": 2570 }, { "epoch": 0.22671353251318102, "grad_norm": 3.17720890045166, "learning_rate": 5.158e-06, "loss": 0.1212, "step": 2580 }, { "epoch": 0.22759226713532513, "grad_norm": 2.523125648498535, "learning_rate": 5.178e-06, "loss": 0.1109, "step": 2590 }, { "epoch": 0.22847100175746923, "grad_norm": 2.741697311401367, "learning_rate": 5.198e-06, "loss": 0.124, "step": 2600 }, { "epoch": 0.22934973637961337, "grad_norm": 2.240185022354126, "learning_rate": 5.218e-06, "loss": 0.1172, "step": 2610 }, { "epoch": 0.23022847100175747, "grad_norm": 3.0612666606903076, "learning_rate": 5.2380000000000005e-06, "loss": 0.1181, "step": 2620 }, { "epoch": 0.23110720562390158, "grad_norm": 1.7941676378250122, "learning_rate": 5.258e-06, "loss": 0.1163, "step": 2630 }, { "epoch": 0.23198594024604569, "grad_norm": 2.060241460800171, "learning_rate": 5.278e-06, "loss": 0.1184, "step": 2640 }, { "epoch": 0.23286467486818982, "grad_norm": 2.2126176357269287, "learning_rate": 5.298e-06, "loss": 0.1162, "step": 2650 }, { "epoch": 0.23374340949033393, "grad_norm": 2.1816959381103516, "learning_rate": 5.318e-06, "loss": 0.1169, "step": 2660 }, { "epoch": 0.23462214411247803, "grad_norm": 1.7983324527740479, "learning_rate": 5.3379999999999995e-06, "loss": 0.1173, "step": 2670 }, { "epoch": 0.23550087873462214, "grad_norm": 1.35907781124115, "learning_rate": 5.358e-06, "loss": 0.1163, "step": 2680 }, { "epoch": 0.23637961335676624, "grad_norm": 1.972160816192627, "learning_rate": 5.378e-06, "loss": 0.1148, "step": 2690 }, { "epoch": 0.23725834797891038, "grad_norm": 2.1841633319854736, "learning_rate": 5.398000000000001e-06, "loss": 0.11, "step": 2700 }, { "epoch": 0.23813708260105448, "grad_norm": 2.338524341583252, "learning_rate": 5.4180000000000005e-06, "loss": 0.1135, "step": 2710 }, { "epoch": 0.2390158172231986, "grad_norm": 2.043558359146118, "learning_rate": 5.438e-06, "loss": 0.1147, "step": 2720 }, { "epoch": 0.2398945518453427, "grad_norm": 2.6492252349853516, "learning_rate": 5.458e-06, "loss": 0.1144, "step": 2730 }, { "epoch": 0.24077328646748683, "grad_norm": 1.8510220050811768, "learning_rate": 5.478000000000001e-06, "loss": 0.1081, "step": 2740 }, { "epoch": 0.24165202108963094, "grad_norm": 2.0844838619232178, "learning_rate": 5.4980000000000006e-06, "loss": 0.1034, "step": 2750 }, { "epoch": 0.24253075571177504, "grad_norm": 2.42154860496521, "learning_rate": 5.518e-06, "loss": 0.1077, "step": 2760 }, { "epoch": 0.24340949033391915, "grad_norm": 1.5807160139083862, "learning_rate": 5.538e-06, "loss": 0.1139, "step": 2770 }, { "epoch": 0.24428822495606328, "grad_norm": 1.771907925605774, "learning_rate": 5.558e-06, "loss": 0.1054, "step": 2780 }, { "epoch": 0.2451669595782074, "grad_norm": 1.9723347425460815, "learning_rate": 5.578000000000001e-06, "loss": 0.1092, "step": 2790 }, { "epoch": 0.2460456942003515, "grad_norm": 2.0771758556365967, "learning_rate": 5.5980000000000004e-06, "loss": 0.1117, "step": 2800 }, { "epoch": 0.2469244288224956, "grad_norm": 2.2821900844573975, "learning_rate": 5.618e-06, "loss": 0.1148, "step": 2810 }, { "epoch": 0.2478031634446397, "grad_norm": 1.4923632144927979, "learning_rate": 5.638e-06, "loss": 0.1081, "step": 2820 }, { "epoch": 0.24868189806678384, "grad_norm": 2.3057737350463867, "learning_rate": 5.658e-06, "loss": 0.1056, "step": 2830 }, { "epoch": 0.24956063268892795, "grad_norm": 2.066838026046753, "learning_rate": 5.678e-06, "loss": 0.107, "step": 2840 }, { "epoch": 0.2504393673110721, "grad_norm": 2.1274232864379883, "learning_rate": 5.698e-06, "loss": 0.1119, "step": 2850 }, { "epoch": 0.2513181019332162, "grad_norm": 2.9787166118621826, "learning_rate": 5.718e-06, "loss": 0.1167, "step": 2860 }, { "epoch": 0.2521968365553603, "grad_norm": 2.5948004722595215, "learning_rate": 5.738e-06, "loss": 0.1101, "step": 2870 }, { "epoch": 0.2530755711775044, "grad_norm": 1.8119170665740967, "learning_rate": 5.758000000000001e-06, "loss": 0.1069, "step": 2880 }, { "epoch": 0.2539543057996485, "grad_norm": 3.234044313430786, "learning_rate": 5.778e-06, "loss": 0.1087, "step": 2890 }, { "epoch": 0.2548330404217926, "grad_norm": 1.924469232559204, "learning_rate": 5.798e-06, "loss": 0.1025, "step": 2900 }, { "epoch": 0.2557117750439367, "grad_norm": 2.408850908279419, "learning_rate": 5.818000000000001e-06, "loss": 0.1029, "step": 2910 }, { "epoch": 0.2565905096660808, "grad_norm": 2.4093515872955322, "learning_rate": 5.838000000000001e-06, "loss": 0.1105, "step": 2920 }, { "epoch": 0.25746924428822493, "grad_norm": 1.8465616703033447, "learning_rate": 5.8580000000000005e-06, "loss": 0.1115, "step": 2930 }, { "epoch": 0.2583479789103691, "grad_norm": 2.247119426727295, "learning_rate": 5.878e-06, "loss": 0.1132, "step": 2940 }, { "epoch": 0.2592267135325132, "grad_norm": 2.5168042182922363, "learning_rate": 5.898e-06, "loss": 0.1065, "step": 2950 }, { "epoch": 0.2601054481546573, "grad_norm": 2.0775747299194336, "learning_rate": 5.918000000000001e-06, "loss": 0.1073, "step": 2960 }, { "epoch": 0.2609841827768014, "grad_norm": 1.3593918085098267, "learning_rate": 5.9380000000000006e-06, "loss": 0.105, "step": 2970 }, { "epoch": 0.2618629173989455, "grad_norm": 1.7398275136947632, "learning_rate": 5.958e-06, "loss": 0.1014, "step": 2980 }, { "epoch": 0.2627416520210896, "grad_norm": 2.5759592056274414, "learning_rate": 5.978e-06, "loss": 0.1036, "step": 2990 }, { "epoch": 0.26362038664323373, "grad_norm": 2.644089460372925, "learning_rate": 5.998e-06, "loss": 0.1047, "step": 3000 }, { "epoch": 0.26449912126537783, "grad_norm": 2.86651349067688, "learning_rate": 6.018e-06, "loss": 0.0995, "step": 3010 }, { "epoch": 0.26537785588752194, "grad_norm": 2.1200671195983887, "learning_rate": 6.0380000000000005e-06, "loss": 0.1042, "step": 3020 }, { "epoch": 0.2662565905096661, "grad_norm": 2.216271162033081, "learning_rate": 6.058e-06, "loss": 0.0988, "step": 3030 }, { "epoch": 0.2671353251318102, "grad_norm": 2.3444623947143555, "learning_rate": 6.078e-06, "loss": 0.103, "step": 3040 }, { "epoch": 0.2680140597539543, "grad_norm": 1.936098575592041, "learning_rate": 6.098e-06, "loss": 0.0995, "step": 3050 }, { "epoch": 0.2688927943760984, "grad_norm": 1.6453566551208496, "learning_rate": 6.118e-06, "loss": 0.1001, "step": 3060 }, { "epoch": 0.2697715289982425, "grad_norm": 1.6412616968154907, "learning_rate": 6.138e-06, "loss": 0.1032, "step": 3070 }, { "epoch": 0.27065026362038663, "grad_norm": 1.7546085119247437, "learning_rate": 6.158e-06, "loss": 0.0982, "step": 3080 }, { "epoch": 0.27152899824253074, "grad_norm": 1.8211009502410889, "learning_rate": 6.178000000000001e-06, "loss": 0.1038, "step": 3090 }, { "epoch": 0.27240773286467485, "grad_norm": 1.630876064300537, "learning_rate": 6.198000000000001e-06, "loss": 0.1027, "step": 3100 }, { "epoch": 0.273286467486819, "grad_norm": 1.5953483581542969, "learning_rate": 6.2180000000000004e-06, "loss": 0.0992, "step": 3110 }, { "epoch": 0.2741652021089631, "grad_norm": 1.9550857543945312, "learning_rate": 6.238e-06, "loss": 0.1013, "step": 3120 }, { "epoch": 0.2750439367311072, "grad_norm": 1.7180119752883911, "learning_rate": 6.258e-06, "loss": 0.0985, "step": 3130 }, { "epoch": 0.2759226713532513, "grad_norm": 1.5465073585510254, "learning_rate": 6.278000000000001e-06, "loss": 0.1024, "step": 3140 }, { "epoch": 0.27680140597539543, "grad_norm": 2.0354764461517334, "learning_rate": 6.298e-06, "loss": 0.0984, "step": 3150 }, { "epoch": 0.27768014059753954, "grad_norm": 2.0395641326904297, "learning_rate": 6.318e-06, "loss": 0.0989, "step": 3160 }, { "epoch": 0.27855887521968364, "grad_norm": 1.581426978111267, "learning_rate": 6.338000000000001e-06, "loss": 0.0969, "step": 3170 }, { "epoch": 0.27943760984182775, "grad_norm": 1.978210210800171, "learning_rate": 6.358e-06, "loss": 0.0949, "step": 3180 }, { "epoch": 0.28031634446397186, "grad_norm": 2.4446499347686768, "learning_rate": 6.378000000000001e-06, "loss": 0.098, "step": 3190 }, { "epoch": 0.281195079086116, "grad_norm": 1.303643822669983, "learning_rate": 6.3979999999999996e-06, "loss": 0.0996, "step": 3200 }, { "epoch": 0.2820738137082601, "grad_norm": 1.683947205543518, "learning_rate": 6.418e-06, "loss": 0.1, "step": 3210 }, { "epoch": 0.28295254833040423, "grad_norm": 2.4734599590301514, "learning_rate": 6.438000000000001e-06, "loss": 0.0936, "step": 3220 }, { "epoch": 0.28383128295254834, "grad_norm": 2.3828072547912598, "learning_rate": 6.458e-06, "loss": 0.0925, "step": 3230 }, { "epoch": 0.28471001757469244, "grad_norm": 2.4500648975372314, "learning_rate": 6.4780000000000005e-06, "loss": 0.099, "step": 3240 }, { "epoch": 0.28558875219683655, "grad_norm": 1.8788725137710571, "learning_rate": 6.498e-06, "loss": 0.0917, "step": 3250 }, { "epoch": 0.28646748681898065, "grad_norm": 1.8513479232788086, "learning_rate": 6.518e-06, "loss": 0.1029, "step": 3260 }, { "epoch": 0.28734622144112476, "grad_norm": 1.384954571723938, "learning_rate": 6.538e-06, "loss": 0.0969, "step": 3270 }, { "epoch": 0.28822495606326887, "grad_norm": 2.052896022796631, "learning_rate": 6.5580000000000006e-06, "loss": 0.0985, "step": 3280 }, { "epoch": 0.28910369068541303, "grad_norm": 1.710828185081482, "learning_rate": 6.578000000000001e-06, "loss": 0.1047, "step": 3290 }, { "epoch": 0.28998242530755713, "grad_norm": 2.2466678619384766, "learning_rate": 6.598e-06, "loss": 0.0959, "step": 3300 }, { "epoch": 0.29086115992970124, "grad_norm": 2.151179313659668, "learning_rate": 6.618000000000001e-06, "loss": 0.096, "step": 3310 }, { "epoch": 0.29173989455184535, "grad_norm": 2.450803756713867, "learning_rate": 6.638e-06, "loss": 0.0981, "step": 3320 }, { "epoch": 0.29261862917398945, "grad_norm": 1.995654821395874, "learning_rate": 6.6580000000000005e-06, "loss": 0.0936, "step": 3330 }, { "epoch": 0.29349736379613356, "grad_norm": 1.233574628829956, "learning_rate": 6.678000000000001e-06, "loss": 0.0949, "step": 3340 }, { "epoch": 0.29437609841827767, "grad_norm": 1.802309274673462, "learning_rate": 6.698e-06, "loss": 0.0933, "step": 3350 }, { "epoch": 0.29525483304042177, "grad_norm": 1.7199766635894775, "learning_rate": 6.718000000000001e-06, "loss": 0.0934, "step": 3360 }, { "epoch": 0.2961335676625659, "grad_norm": 1.4234678745269775, "learning_rate": 6.738e-06, "loss": 0.0982, "step": 3370 }, { "epoch": 0.29701230228471004, "grad_norm": 1.9691678285598755, "learning_rate": 6.758e-06, "loss": 0.0916, "step": 3380 }, { "epoch": 0.29789103690685415, "grad_norm": 2.1342763900756836, "learning_rate": 6.778000000000001e-06, "loss": 0.0889, "step": 3390 }, { "epoch": 0.29876977152899825, "grad_norm": 2.3572824001312256, "learning_rate": 6.798e-06, "loss": 0.1012, "step": 3400 }, { "epoch": 0.29964850615114236, "grad_norm": 1.845125436782837, "learning_rate": 6.818000000000001e-06, "loss": 0.0865, "step": 3410 }, { "epoch": 0.30052724077328646, "grad_norm": 1.5901755094528198, "learning_rate": 6.838e-06, "loss": 0.0873, "step": 3420 }, { "epoch": 0.30140597539543057, "grad_norm": 1.7708345651626587, "learning_rate": 6.858e-06, "loss": 0.0944, "step": 3430 }, { "epoch": 0.3022847100175747, "grad_norm": 1.9271360635757446, "learning_rate": 6.878e-06, "loss": 0.0992, "step": 3440 }, { "epoch": 0.3031634446397188, "grad_norm": 1.821614146232605, "learning_rate": 6.898000000000001e-06, "loss": 0.0949, "step": 3450 }, { "epoch": 0.30404217926186294, "grad_norm": 1.6612801551818848, "learning_rate": 6.9180000000000005e-06, "loss": 0.0966, "step": 3460 }, { "epoch": 0.30492091388400705, "grad_norm": 1.4196553230285645, "learning_rate": 6.938e-06, "loss": 0.0847, "step": 3470 }, { "epoch": 0.30579964850615116, "grad_norm": 2.2649617195129395, "learning_rate": 6.958000000000001e-06, "loss": 0.0941, "step": 3480 }, { "epoch": 0.30667838312829526, "grad_norm": 2.283384084701538, "learning_rate": 6.978e-06, "loss": 0.0894, "step": 3490 }, { "epoch": 0.30755711775043937, "grad_norm": 1.497635841369629, "learning_rate": 6.998000000000001e-06, "loss": 0.0884, "step": 3500 }, { "epoch": 0.3084358523725835, "grad_norm": 2.2702796459198, "learning_rate": 7.018000000000001e-06, "loss": 0.0868, "step": 3510 }, { "epoch": 0.3093145869947276, "grad_norm": 1.8912830352783203, "learning_rate": 7.038e-06, "loss": 0.0907, "step": 3520 }, { "epoch": 0.3101933216168717, "grad_norm": 1.430221676826477, "learning_rate": 7.058000000000001e-06, "loss": 0.0901, "step": 3530 }, { "epoch": 0.3110720562390158, "grad_norm": 1.5795549154281616, "learning_rate": 7.078e-06, "loss": 0.0891, "step": 3540 }, { "epoch": 0.31195079086115995, "grad_norm": 1.5496853590011597, "learning_rate": 7.0980000000000005e-06, "loss": 0.0886, "step": 3550 }, { "epoch": 0.31282952548330406, "grad_norm": 1.4072401523590088, "learning_rate": 7.1179999999999994e-06, "loss": 0.0913, "step": 3560 }, { "epoch": 0.31370826010544817, "grad_norm": 2.0257627964019775, "learning_rate": 7.138e-06, "loss": 0.0894, "step": 3570 }, { "epoch": 0.3145869947275923, "grad_norm": 1.736790418624878, "learning_rate": 7.158000000000001e-06, "loss": 0.0935, "step": 3580 }, { "epoch": 0.3154657293497364, "grad_norm": 1.4992486238479614, "learning_rate": 7.178e-06, "loss": 0.089, "step": 3590 }, { "epoch": 0.3163444639718805, "grad_norm": 1.9044816493988037, "learning_rate": 7.198e-06, "loss": 0.0935, "step": 3600 }, { "epoch": 0.3172231985940246, "grad_norm": 1.5368399620056152, "learning_rate": 7.218e-06, "loss": 0.086, "step": 3610 }, { "epoch": 0.3181019332161687, "grad_norm": 1.3563343286514282, "learning_rate": 7.238e-06, "loss": 0.0859, "step": 3620 }, { "epoch": 0.3189806678383128, "grad_norm": 2.433441162109375, "learning_rate": 7.258000000000001e-06, "loss": 0.0882, "step": 3630 }, { "epoch": 0.31985940246045697, "grad_norm": 1.7010964155197144, "learning_rate": 7.2780000000000005e-06, "loss": 0.084, "step": 3640 }, { "epoch": 0.32073813708260107, "grad_norm": 2.0799717903137207, "learning_rate": 7.298e-06, "loss": 0.0891, "step": 3650 }, { "epoch": 0.3216168717047452, "grad_norm": 1.861369013786316, "learning_rate": 7.318e-06, "loss": 0.0885, "step": 3660 }, { "epoch": 0.3224956063268893, "grad_norm": 1.9444239139556885, "learning_rate": 7.338000000000001e-06, "loss": 0.0881, "step": 3670 }, { "epoch": 0.3233743409490334, "grad_norm": 1.8194053173065186, "learning_rate": 7.358000000000001e-06, "loss": 0.0855, "step": 3680 }, { "epoch": 0.3242530755711775, "grad_norm": 1.343105673789978, "learning_rate": 7.378e-06, "loss": 0.0872, "step": 3690 }, { "epoch": 0.3251318101933216, "grad_norm": 2.030895471572876, "learning_rate": 7.398000000000001e-06, "loss": 0.0886, "step": 3700 }, { "epoch": 0.3260105448154657, "grad_norm": 2.0013155937194824, "learning_rate": 7.418e-06, "loss": 0.0843, "step": 3710 }, { "epoch": 0.3268892794376098, "grad_norm": 1.5180984735488892, "learning_rate": 7.438000000000001e-06, "loss": 0.088, "step": 3720 }, { "epoch": 0.327768014059754, "grad_norm": 1.8028545379638672, "learning_rate": 7.4579999999999996e-06, "loss": 0.0861, "step": 3730 }, { "epoch": 0.3286467486818981, "grad_norm": 1.6797069311141968, "learning_rate": 7.478e-06, "loss": 0.0869, "step": 3740 }, { "epoch": 0.3295254833040422, "grad_norm": 1.7574487924575806, "learning_rate": 7.498000000000001e-06, "loss": 0.0882, "step": 3750 }, { "epoch": 0.3304042179261863, "grad_norm": 2.0643138885498047, "learning_rate": 7.518e-06, "loss": 0.0828, "step": 3760 }, { "epoch": 0.3312829525483304, "grad_norm": 1.5959566831588745, "learning_rate": 7.5380000000000005e-06, "loss": 0.0868, "step": 3770 }, { "epoch": 0.3321616871704745, "grad_norm": 2.5017781257629395, "learning_rate": 7.5579999999999995e-06, "loss": 0.0821, "step": 3780 }, { "epoch": 0.3330404217926186, "grad_norm": 2.7113587856292725, "learning_rate": 7.578e-06, "loss": 0.0905, "step": 3790 }, { "epoch": 0.3339191564147627, "grad_norm": 1.8532171249389648, "learning_rate": 7.598000000000001e-06, "loss": 0.092, "step": 3800 }, { "epoch": 0.3347978910369069, "grad_norm": 1.5728623867034912, "learning_rate": 7.618e-06, "loss": 0.0812, "step": 3810 }, { "epoch": 0.335676625659051, "grad_norm": 1.6414227485656738, "learning_rate": 7.638e-06, "loss": 0.0876, "step": 3820 }, { "epoch": 0.3365553602811951, "grad_norm": 1.8019486665725708, "learning_rate": 7.658e-06, "loss": 0.0874, "step": 3830 }, { "epoch": 0.3374340949033392, "grad_norm": 1.8055164813995361, "learning_rate": 7.678e-06, "loss": 0.0859, "step": 3840 }, { "epoch": 0.3383128295254833, "grad_norm": 1.2197480201721191, "learning_rate": 7.698000000000002e-06, "loss": 0.0809, "step": 3850 }, { "epoch": 0.3391915641476274, "grad_norm": 1.7019689083099365, "learning_rate": 7.718e-06, "loss": 0.0849, "step": 3860 }, { "epoch": 0.3400702987697715, "grad_norm": 1.5270233154296875, "learning_rate": 7.738000000000001e-06, "loss": 0.0867, "step": 3870 }, { "epoch": 0.3409490333919156, "grad_norm": 1.7653752565383911, "learning_rate": 7.758e-06, "loss": 0.0835, "step": 3880 }, { "epoch": 0.34182776801405973, "grad_norm": 1.7155466079711914, "learning_rate": 7.778e-06, "loss": 0.0844, "step": 3890 }, { "epoch": 0.3427065026362039, "grad_norm": 1.4172050952911377, "learning_rate": 7.798e-06, "loss": 0.0823, "step": 3900 }, { "epoch": 0.343585237258348, "grad_norm": 1.658683180809021, "learning_rate": 7.818e-06, "loss": 0.0878, "step": 3910 }, { "epoch": 0.3444639718804921, "grad_norm": 1.683134913444519, "learning_rate": 7.838e-06, "loss": 0.0905, "step": 3920 }, { "epoch": 0.3453427065026362, "grad_norm": 1.904125452041626, "learning_rate": 7.858e-06, "loss": 0.0782, "step": 3930 }, { "epoch": 0.3462214411247803, "grad_norm": 1.839957356452942, "learning_rate": 7.878000000000001e-06, "loss": 0.0827, "step": 3940 }, { "epoch": 0.3471001757469244, "grad_norm": 1.7248456478118896, "learning_rate": 7.898e-06, "loss": 0.085, "step": 3950 }, { "epoch": 0.34797891036906853, "grad_norm": 2.092602252960205, "learning_rate": 7.918000000000001e-06, "loss": 0.0885, "step": 3960 }, { "epoch": 0.34885764499121263, "grad_norm": 1.9122580289840698, "learning_rate": 7.938000000000001e-06, "loss": 0.0879, "step": 3970 }, { "epoch": 0.34973637961335674, "grad_norm": 2.432152032852173, "learning_rate": 7.958e-06, "loss": 0.093, "step": 3980 }, { "epoch": 0.3506151142355009, "grad_norm": 2.101242780685425, "learning_rate": 7.978e-06, "loss": 0.089, "step": 3990 }, { "epoch": 0.351493848857645, "grad_norm": 1.302430510520935, "learning_rate": 7.998e-06, "loss": 0.0829, "step": 4000 }, { "epoch": 0.3523725834797891, "grad_norm": 1.7957179546356201, "learning_rate": 8.018e-06, "loss": 0.0861, "step": 4010 }, { "epoch": 0.3532513181019332, "grad_norm": 1.7636044025421143, "learning_rate": 8.038e-06, "loss": 0.0827, "step": 4020 }, { "epoch": 0.3541300527240773, "grad_norm": 1.97199547290802, "learning_rate": 8.058e-06, "loss": 0.0831, "step": 4030 }, { "epoch": 0.35500878734622143, "grad_norm": 1.2015912532806396, "learning_rate": 8.078000000000001e-06, "loss": 0.0783, "step": 4040 }, { "epoch": 0.35588752196836554, "grad_norm": 1.3419209718704224, "learning_rate": 8.098e-06, "loss": 0.0763, "step": 4050 }, { "epoch": 0.35676625659050965, "grad_norm": 1.2393898963928223, "learning_rate": 8.118000000000001e-06, "loss": 0.0806, "step": 4060 }, { "epoch": 0.35764499121265375, "grad_norm": 1.351449728012085, "learning_rate": 8.137999999999999e-06, "loss": 0.0783, "step": 4070 }, { "epoch": 0.3585237258347979, "grad_norm": 1.3512752056121826, "learning_rate": 8.158e-06, "loss": 0.0807, "step": 4080 }, { "epoch": 0.359402460456942, "grad_norm": 1.8526815176010132, "learning_rate": 8.178e-06, "loss": 0.083, "step": 4090 }, { "epoch": 0.3602811950790861, "grad_norm": 2.068988084793091, "learning_rate": 8.198e-06, "loss": 0.0822, "step": 4100 }, { "epoch": 0.36115992970123023, "grad_norm": 1.5281312465667725, "learning_rate": 8.218e-06, "loss": 0.0801, "step": 4110 }, { "epoch": 0.36203866432337434, "grad_norm": 1.578158974647522, "learning_rate": 8.238e-06, "loss": 0.083, "step": 4120 }, { "epoch": 0.36291739894551844, "grad_norm": 1.3071130514144897, "learning_rate": 8.258000000000001e-06, "loss": 0.0763, "step": 4130 }, { "epoch": 0.36379613356766255, "grad_norm": 1.780167818069458, "learning_rate": 8.278000000000001e-06, "loss": 0.0843, "step": 4140 }, { "epoch": 0.36467486818980666, "grad_norm": 1.4668567180633545, "learning_rate": 8.298000000000001e-06, "loss": 0.0771, "step": 4150 }, { "epoch": 0.3655536028119508, "grad_norm": 2.02119517326355, "learning_rate": 8.318e-06, "loss": 0.0838, "step": 4160 }, { "epoch": 0.3664323374340949, "grad_norm": 1.365477442741394, "learning_rate": 8.338e-06, "loss": 0.0798, "step": 4170 }, { "epoch": 0.36731107205623903, "grad_norm": 1.273240089416504, "learning_rate": 8.358e-06, "loss": 0.0802, "step": 4180 }, { "epoch": 0.36818980667838314, "grad_norm": 1.5557680130004883, "learning_rate": 8.378e-06, "loss": 0.0771, "step": 4190 }, { "epoch": 0.36906854130052724, "grad_norm": 1.1207354068756104, "learning_rate": 8.398e-06, "loss": 0.0774, "step": 4200 }, { "epoch": 0.36994727592267135, "grad_norm": 1.3448114395141602, "learning_rate": 8.418000000000001e-06, "loss": 0.0787, "step": 4210 }, { "epoch": 0.37082601054481545, "grad_norm": 1.436302900314331, "learning_rate": 8.438e-06, "loss": 0.0769, "step": 4220 }, { "epoch": 0.37170474516695956, "grad_norm": 1.0816749334335327, "learning_rate": 8.458000000000001e-06, "loss": 0.0712, "step": 4230 }, { "epoch": 0.37258347978910367, "grad_norm": 0.9576603174209595, "learning_rate": 8.477999999999999e-06, "loss": 0.0783, "step": 4240 }, { "epoch": 0.37346221441124783, "grad_norm": 2.375091552734375, "learning_rate": 8.498e-06, "loss": 0.078, "step": 4250 }, { "epoch": 0.37434094903339193, "grad_norm": 1.8340996503829956, "learning_rate": 8.518e-06, "loss": 0.0803, "step": 4260 }, { "epoch": 0.37521968365553604, "grad_norm": 1.3901652097702026, "learning_rate": 8.538e-06, "loss": 0.0745, "step": 4270 }, { "epoch": 0.37609841827768015, "grad_norm": 1.0132428407669067, "learning_rate": 8.558e-06, "loss": 0.0788, "step": 4280 }, { "epoch": 0.37697715289982425, "grad_norm": 1.5363519191741943, "learning_rate": 8.578e-06, "loss": 0.0808, "step": 4290 }, { "epoch": 0.37785588752196836, "grad_norm": 1.1054880619049072, "learning_rate": 8.598e-06, "loss": 0.0819, "step": 4300 }, { "epoch": 0.37873462214411246, "grad_norm": 1.7495173215866089, "learning_rate": 8.618000000000001e-06, "loss": 0.0792, "step": 4310 }, { "epoch": 0.37961335676625657, "grad_norm": 1.2746564149856567, "learning_rate": 8.638000000000001e-06, "loss": 0.075, "step": 4320 }, { "epoch": 0.3804920913884007, "grad_norm": 1.492939829826355, "learning_rate": 8.658e-06, "loss": 0.0766, "step": 4330 }, { "epoch": 0.38137082601054484, "grad_norm": 1.7918190956115723, "learning_rate": 8.678e-06, "loss": 0.0802, "step": 4340 }, { "epoch": 0.38224956063268895, "grad_norm": 1.54453444480896, "learning_rate": 8.698e-06, "loss": 0.0757, "step": 4350 }, { "epoch": 0.38312829525483305, "grad_norm": 1.0565292835235596, "learning_rate": 8.718e-06, "loss": 0.0737, "step": 4360 }, { "epoch": 0.38400702987697716, "grad_norm": 1.2128345966339111, "learning_rate": 8.738e-06, "loss": 0.081, "step": 4370 }, { "epoch": 0.38488576449912126, "grad_norm": 1.2913795709609985, "learning_rate": 8.758000000000002e-06, "loss": 0.0794, "step": 4380 }, { "epoch": 0.38576449912126537, "grad_norm": 1.330307126045227, "learning_rate": 8.778e-06, "loss": 0.0782, "step": 4390 }, { "epoch": 0.3866432337434095, "grad_norm": 1.4693490266799927, "learning_rate": 8.798000000000001e-06, "loss": 0.0823, "step": 4400 }, { "epoch": 0.3875219683655536, "grad_norm": 1.3467656373977661, "learning_rate": 8.818e-06, "loss": 0.0827, "step": 4410 }, { "epoch": 0.3884007029876977, "grad_norm": 1.8994150161743164, "learning_rate": 8.838e-06, "loss": 0.0763, "step": 4420 }, { "epoch": 0.38927943760984185, "grad_norm": 1.1950037479400635, "learning_rate": 8.858e-06, "loss": 0.0806, "step": 4430 }, { "epoch": 0.39015817223198596, "grad_norm": 1.7948187589645386, "learning_rate": 8.878e-06, "loss": 0.0764, "step": 4440 }, { "epoch": 0.39103690685413006, "grad_norm": 1.1279445886611938, "learning_rate": 8.898e-06, "loss": 0.0782, "step": 4450 }, { "epoch": 0.39191564147627417, "grad_norm": 1.8394153118133545, "learning_rate": 8.918e-06, "loss": 0.0732, "step": 4460 }, { "epoch": 0.3927943760984183, "grad_norm": 1.4815815687179565, "learning_rate": 8.938e-06, "loss": 0.0742, "step": 4470 }, { "epoch": 0.3936731107205624, "grad_norm": 1.9996079206466675, "learning_rate": 8.958000000000001e-06, "loss": 0.0802, "step": 4480 }, { "epoch": 0.3945518453427065, "grad_norm": 1.4785722494125366, "learning_rate": 8.978000000000001e-06, "loss": 0.076, "step": 4490 }, { "epoch": 0.3954305799648506, "grad_norm": 1.9405239820480347, "learning_rate": 8.998000000000001e-06, "loss": 0.082, "step": 4500 }, { "epoch": 0.39630931458699475, "grad_norm": 1.029089093208313, "learning_rate": 9.018e-06, "loss": 0.0748, "step": 4510 }, { "epoch": 0.39718804920913886, "grad_norm": 1.6018834114074707, "learning_rate": 9.038e-06, "loss": 0.0732, "step": 4520 }, { "epoch": 0.39806678383128297, "grad_norm": 1.4526143074035645, "learning_rate": 9.058e-06, "loss": 0.0751, "step": 4530 }, { "epoch": 0.3989455184534271, "grad_norm": 1.807190179824829, "learning_rate": 9.078e-06, "loss": 0.0743, "step": 4540 }, { "epoch": 0.3998242530755712, "grad_norm": 1.472328782081604, "learning_rate": 9.098000000000002e-06, "loss": 0.0778, "step": 4550 }, { "epoch": 0.4007029876977153, "grad_norm": 1.4430283308029175, "learning_rate": 9.118e-06, "loss": 0.0796, "step": 4560 }, { "epoch": 0.4015817223198594, "grad_norm": 1.2544517517089844, "learning_rate": 9.138000000000001e-06, "loss": 0.0716, "step": 4570 }, { "epoch": 0.4024604569420035, "grad_norm": 1.3429677486419678, "learning_rate": 9.158e-06, "loss": 0.0771, "step": 4580 }, { "epoch": 0.4033391915641476, "grad_norm": 1.6763536930084229, "learning_rate": 9.178000000000001e-06, "loss": 0.082, "step": 4590 }, { "epoch": 0.40421792618629176, "grad_norm": 1.6407326459884644, "learning_rate": 9.198e-06, "loss": 0.0751, "step": 4600 }, { "epoch": 0.40509666080843587, "grad_norm": 1.3331291675567627, "learning_rate": 9.218e-06, "loss": 0.0765, "step": 4610 }, { "epoch": 0.40597539543058, "grad_norm": 1.6689263582229614, "learning_rate": 9.238e-06, "loss": 0.0736, "step": 4620 }, { "epoch": 0.4068541300527241, "grad_norm": 1.710863471031189, "learning_rate": 9.258e-06, "loss": 0.076, "step": 4630 }, { "epoch": 0.4077328646748682, "grad_norm": 1.2745252847671509, "learning_rate": 9.278e-06, "loss": 0.0735, "step": 4640 }, { "epoch": 0.4086115992970123, "grad_norm": 1.6209636926651, "learning_rate": 9.298e-06, "loss": 0.0761, "step": 4650 }, { "epoch": 0.4094903339191564, "grad_norm": 2.0351877212524414, "learning_rate": 9.318e-06, "loss": 0.0771, "step": 4660 }, { "epoch": 0.4103690685413005, "grad_norm": 1.382647156715393, "learning_rate": 9.338000000000001e-06, "loss": 0.0788, "step": 4670 }, { "epoch": 0.4112478031634446, "grad_norm": 1.8995236158370972, "learning_rate": 9.358000000000001e-06, "loss": 0.0722, "step": 4680 }, { "epoch": 0.4121265377855888, "grad_norm": 1.5053101778030396, "learning_rate": 9.378e-06, "loss": 0.0824, "step": 4690 }, { "epoch": 0.4130052724077329, "grad_norm": 1.1820427179336548, "learning_rate": 9.398e-06, "loss": 0.0737, "step": 4700 }, { "epoch": 0.413884007029877, "grad_norm": 1.329729676246643, "learning_rate": 9.418e-06, "loss": 0.0759, "step": 4710 }, { "epoch": 0.4147627416520211, "grad_norm": 1.4497474431991577, "learning_rate": 9.438000000000002e-06, "loss": 0.0777, "step": 4720 }, { "epoch": 0.4156414762741652, "grad_norm": 1.363439679145813, "learning_rate": 9.458e-06, "loss": 0.0747, "step": 4730 }, { "epoch": 0.4165202108963093, "grad_norm": 1.3846591711044312, "learning_rate": 9.478000000000001e-06, "loss": 0.0736, "step": 4740 }, { "epoch": 0.4173989455184534, "grad_norm": 1.3375447988510132, "learning_rate": 9.498e-06, "loss": 0.0769, "step": 4750 }, { "epoch": 0.4182776801405975, "grad_norm": 1.2131069898605347, "learning_rate": 9.518000000000001e-06, "loss": 0.0774, "step": 4760 }, { "epoch": 0.4191564147627416, "grad_norm": 1.288740873336792, "learning_rate": 9.538e-06, "loss": 0.0773, "step": 4770 }, { "epoch": 0.4200351493848858, "grad_norm": 1.3559858798980713, "learning_rate": 9.558e-06, "loss": 0.0798, "step": 4780 }, { "epoch": 0.4209138840070299, "grad_norm": 1.9042999744415283, "learning_rate": 9.578e-06, "loss": 0.0794, "step": 4790 }, { "epoch": 0.421792618629174, "grad_norm": 1.2908687591552734, "learning_rate": 9.598e-06, "loss": 0.0715, "step": 4800 }, { "epoch": 0.4226713532513181, "grad_norm": 1.500057339668274, "learning_rate": 9.618e-06, "loss": 0.0772, "step": 4810 }, { "epoch": 0.4235500878734622, "grad_norm": 1.2668381929397583, "learning_rate": 9.638e-06, "loss": 0.0712, "step": 4820 }, { "epoch": 0.4244288224956063, "grad_norm": 1.5379369258880615, "learning_rate": 9.658e-06, "loss": 0.0764, "step": 4830 }, { "epoch": 0.4253075571177504, "grad_norm": 1.5847082138061523, "learning_rate": 9.678000000000001e-06, "loss": 0.0755, "step": 4840 }, { "epoch": 0.42618629173989453, "grad_norm": 1.5394196510314941, "learning_rate": 9.698e-06, "loss": 0.0725, "step": 4850 }, { "epoch": 0.4270650263620387, "grad_norm": 1.42989981174469, "learning_rate": 9.718e-06, "loss": 0.0778, "step": 4860 }, { "epoch": 0.4279437609841828, "grad_norm": 1.095538854598999, "learning_rate": 9.738e-06, "loss": 0.0732, "step": 4870 }, { "epoch": 0.4288224956063269, "grad_norm": 1.2941069602966309, "learning_rate": 9.758e-06, "loss": 0.0703, "step": 4880 }, { "epoch": 0.429701230228471, "grad_norm": 0.9937630891799927, "learning_rate": 9.778e-06, "loss": 0.0707, "step": 4890 }, { "epoch": 0.4305799648506151, "grad_norm": 1.377485752105713, "learning_rate": 9.798e-06, "loss": 0.0722, "step": 4900 }, { "epoch": 0.4314586994727592, "grad_norm": 1.1031206846237183, "learning_rate": 9.818000000000002e-06, "loss": 0.0711, "step": 4910 }, { "epoch": 0.43233743409490333, "grad_norm": 1.791684865951538, "learning_rate": 9.838e-06, "loss": 0.0778, "step": 4920 }, { "epoch": 0.43321616871704743, "grad_norm": 2.2197062969207764, "learning_rate": 9.858000000000001e-06, "loss": 0.0777, "step": 4930 }, { "epoch": 0.43409490333919154, "grad_norm": 1.6057512760162354, "learning_rate": 9.878000000000001e-06, "loss": 0.0716, "step": 4940 }, { "epoch": 0.4349736379613357, "grad_norm": 1.7177897691726685, "learning_rate": 9.898e-06, "loss": 0.079, "step": 4950 }, { "epoch": 0.4358523725834798, "grad_norm": 1.711667537689209, "learning_rate": 9.918e-06, "loss": 0.0742, "step": 4960 }, { "epoch": 0.4367311072056239, "grad_norm": 1.1543787717819214, "learning_rate": 9.938e-06, "loss": 0.0743, "step": 4970 }, { "epoch": 0.437609841827768, "grad_norm": 1.4856468439102173, "learning_rate": 9.958e-06, "loss": 0.0749, "step": 4980 }, { "epoch": 0.4384885764499121, "grad_norm": 1.723510503768921, "learning_rate": 9.978e-06, "loss": 0.0734, "step": 4990 }, { "epoch": 0.43936731107205623, "grad_norm": 1.340665340423584, "learning_rate": 9.998e-06, "loss": 0.0752, "step": 5000 }, { "epoch": 0.44024604569420034, "grad_norm": 1.5930957794189453, "learning_rate": 1.0018000000000001e-05, "loss": 0.0707, "step": 5010 }, { "epoch": 0.44112478031634444, "grad_norm": 1.51628839969635, "learning_rate": 1.0038e-05, "loss": 0.0771, "step": 5020 }, { "epoch": 0.44200351493848855, "grad_norm": 1.4665250778198242, "learning_rate": 1.0058000000000001e-05, "loss": 0.0707, "step": 5030 }, { "epoch": 0.4428822495606327, "grad_norm": 1.0812610387802124, "learning_rate": 1.0078e-05, "loss": 0.0706, "step": 5040 }, { "epoch": 0.4437609841827768, "grad_norm": 1.27137291431427, "learning_rate": 1.0098e-05, "loss": 0.0692, "step": 5050 }, { "epoch": 0.4446397188049209, "grad_norm": 1.723771333694458, "learning_rate": 1.0118e-05, "loss": 0.0736, "step": 5060 }, { "epoch": 0.44551845342706503, "grad_norm": 1.4355685710906982, "learning_rate": 1.0138e-05, "loss": 0.0698, "step": 5070 }, { "epoch": 0.44639718804920914, "grad_norm": 1.3532863855361938, "learning_rate": 1.0158000000000002e-05, "loss": 0.0695, "step": 5080 }, { "epoch": 0.44727592267135324, "grad_norm": 1.1342772245407104, "learning_rate": 1.0178e-05, "loss": 0.0706, "step": 5090 }, { "epoch": 0.44815465729349735, "grad_norm": 1.389132022857666, "learning_rate": 1.0198000000000001e-05, "loss": 0.0707, "step": 5100 }, { "epoch": 0.44903339191564146, "grad_norm": 1.5353913307189941, "learning_rate": 1.0218000000000001e-05, "loss": 0.0724, "step": 5110 }, { "epoch": 0.44991212653778556, "grad_norm": 1.5393531322479248, "learning_rate": 1.0238000000000001e-05, "loss": 0.0707, "step": 5120 }, { "epoch": 0.4507908611599297, "grad_norm": 1.3386510610580444, "learning_rate": 1.0258e-05, "loss": 0.0681, "step": 5130 }, { "epoch": 0.45166959578207383, "grad_norm": 1.4414421319961548, "learning_rate": 1.0278e-05, "loss": 0.0767, "step": 5140 }, { "epoch": 0.45254833040421794, "grad_norm": 1.3767026662826538, "learning_rate": 1.0298e-05, "loss": 0.0712, "step": 5150 }, { "epoch": 0.45342706502636204, "grad_norm": 1.086778998374939, "learning_rate": 1.0318e-05, "loss": 0.0723, "step": 5160 }, { "epoch": 0.45430579964850615, "grad_norm": 1.0133328437805176, "learning_rate": 1.0338e-05, "loss": 0.0676, "step": 5170 }, { "epoch": 0.45518453427065025, "grad_norm": 1.3533023595809937, "learning_rate": 1.0358000000000001e-05, "loss": 0.0682, "step": 5180 }, { "epoch": 0.45606326889279436, "grad_norm": 1.066445231437683, "learning_rate": 1.0378e-05, "loss": 0.0745, "step": 5190 }, { "epoch": 0.45694200351493847, "grad_norm": 0.9903545379638672, "learning_rate": 1.0398000000000001e-05, "loss": 0.0724, "step": 5200 }, { "epoch": 0.45782073813708263, "grad_norm": 1.2453259229660034, "learning_rate": 1.0418e-05, "loss": 0.071, "step": 5210 }, { "epoch": 0.45869947275922673, "grad_norm": 1.1613255739212036, "learning_rate": 1.0438e-05, "loss": 0.0713, "step": 5220 }, { "epoch": 0.45957820738137084, "grad_norm": 1.3289642333984375, "learning_rate": 1.0458e-05, "loss": 0.0715, "step": 5230 }, { "epoch": 0.46045694200351495, "grad_norm": 1.054144263267517, "learning_rate": 1.0478e-05, "loss": 0.0721, "step": 5240 }, { "epoch": 0.46133567662565905, "grad_norm": 1.0418885946273804, "learning_rate": 1.0498e-05, "loss": 0.0705, "step": 5250 }, { "epoch": 0.46221441124780316, "grad_norm": 1.3572636842727661, "learning_rate": 1.0518e-05, "loss": 0.072, "step": 5260 }, { "epoch": 0.46309314586994726, "grad_norm": 1.018161416053772, "learning_rate": 1.0538000000000001e-05, "loss": 0.0728, "step": 5270 }, { "epoch": 0.46397188049209137, "grad_norm": 1.541266679763794, "learning_rate": 1.0558e-05, "loss": 0.0715, "step": 5280 }, { "epoch": 0.4648506151142355, "grad_norm": 1.1153682470321655, "learning_rate": 1.0578000000000001e-05, "loss": 0.0655, "step": 5290 }, { "epoch": 0.46572934973637964, "grad_norm": 1.1930108070373535, "learning_rate": 1.0598000000000001e-05, "loss": 0.068, "step": 5300 }, { "epoch": 0.46660808435852374, "grad_norm": 1.1185739040374756, "learning_rate": 1.0618e-05, "loss": 0.0676, "step": 5310 }, { "epoch": 0.46748681898066785, "grad_norm": 1.2425848245620728, "learning_rate": 1.0638e-05, "loss": 0.0696, "step": 5320 }, { "epoch": 0.46836555360281196, "grad_norm": 1.1115821599960327, "learning_rate": 1.0658e-05, "loss": 0.0681, "step": 5330 }, { "epoch": 0.46924428822495606, "grad_norm": 1.2552802562713623, "learning_rate": 1.0678e-05, "loss": 0.0697, "step": 5340 }, { "epoch": 0.47012302284710017, "grad_norm": 0.9254797697067261, "learning_rate": 1.0698000000000002e-05, "loss": 0.0671, "step": 5350 }, { "epoch": 0.4710017574692443, "grad_norm": 1.1946330070495605, "learning_rate": 1.0718e-05, "loss": 0.0708, "step": 5360 }, { "epoch": 0.4718804920913884, "grad_norm": 1.5972368717193604, "learning_rate": 1.0738000000000001e-05, "loss": 0.0682, "step": 5370 }, { "epoch": 0.4727592267135325, "grad_norm": 1.7945982217788696, "learning_rate": 1.0758e-05, "loss": 0.0699, "step": 5380 }, { "epoch": 0.47363796133567665, "grad_norm": 1.35090970993042, "learning_rate": 1.0778e-05, "loss": 0.0692, "step": 5390 }, { "epoch": 0.47451669595782076, "grad_norm": 1.6243902444839478, "learning_rate": 1.0798e-05, "loss": 0.0711, "step": 5400 }, { "epoch": 0.47539543057996486, "grad_norm": 1.4769642353057861, "learning_rate": 1.0818e-05, "loss": 0.0712, "step": 5410 }, { "epoch": 0.47627416520210897, "grad_norm": 1.1383129358291626, "learning_rate": 1.0838e-05, "loss": 0.0717, "step": 5420 }, { "epoch": 0.4771528998242531, "grad_norm": 1.1415034532546997, "learning_rate": 1.0858e-05, "loss": 0.0668, "step": 5430 }, { "epoch": 0.4780316344463972, "grad_norm": 1.2548372745513916, "learning_rate": 1.0878e-05, "loss": 0.0667, "step": 5440 }, { "epoch": 0.4789103690685413, "grad_norm": 1.634475827217102, "learning_rate": 1.0898e-05, "loss": 0.068, "step": 5450 }, { "epoch": 0.4797891036906854, "grad_norm": 1.4803670644760132, "learning_rate": 1.0918000000000001e-05, "loss": 0.0671, "step": 5460 }, { "epoch": 0.4806678383128295, "grad_norm": 1.5789378881454468, "learning_rate": 1.0938000000000001e-05, "loss": 0.0707, "step": 5470 }, { "epoch": 0.48154657293497366, "grad_norm": 1.0540963411331177, "learning_rate": 1.0958e-05, "loss": 0.068, "step": 5480 }, { "epoch": 0.48242530755711777, "grad_norm": 1.5120412111282349, "learning_rate": 1.0978e-05, "loss": 0.0688, "step": 5490 }, { "epoch": 0.4833040421792619, "grad_norm": 1.3413174152374268, "learning_rate": 1.0998e-05, "loss": 0.0729, "step": 5500 }, { "epoch": 0.484182776801406, "grad_norm": 1.2082793712615967, "learning_rate": 1.1018e-05, "loss": 0.066, "step": 5510 }, { "epoch": 0.4850615114235501, "grad_norm": 1.025377869606018, "learning_rate": 1.1038000000000002e-05, "loss": 0.0698, "step": 5520 }, { "epoch": 0.4859402460456942, "grad_norm": 1.570175290107727, "learning_rate": 1.1058e-05, "loss": 0.0659, "step": 5530 }, { "epoch": 0.4868189806678383, "grad_norm": 1.488970398902893, "learning_rate": 1.1078000000000001e-05, "loss": 0.0704, "step": 5540 }, { "epoch": 0.4876977152899824, "grad_norm": 1.5006330013275146, "learning_rate": 1.1098e-05, "loss": 0.0678, "step": 5550 }, { "epoch": 0.48857644991212656, "grad_norm": 1.274294376373291, "learning_rate": 1.1118000000000001e-05, "loss": 0.0735, "step": 5560 }, { "epoch": 0.48945518453427067, "grad_norm": 1.25430166721344, "learning_rate": 1.1138e-05, "loss": 0.0701, "step": 5570 }, { "epoch": 0.4903339191564148, "grad_norm": 0.8462620973587036, "learning_rate": 1.1158e-05, "loss": 0.0716, "step": 5580 }, { "epoch": 0.4912126537785589, "grad_norm": 1.3691872358322144, "learning_rate": 1.1178e-05, "loss": 0.0695, "step": 5590 }, { "epoch": 0.492091388400703, "grad_norm": 1.397708535194397, "learning_rate": 1.1198e-05, "loss": 0.0712, "step": 5600 }, { "epoch": 0.4929701230228471, "grad_norm": 1.5477691888809204, "learning_rate": 1.1218e-05, "loss": 0.0719, "step": 5610 }, { "epoch": 0.4938488576449912, "grad_norm": 0.9066592454910278, "learning_rate": 1.1238e-05, "loss": 0.0682, "step": 5620 }, { "epoch": 0.4947275922671353, "grad_norm": 1.4178621768951416, "learning_rate": 1.1258000000000001e-05, "loss": 0.071, "step": 5630 }, { "epoch": 0.4956063268892794, "grad_norm": 1.413733959197998, "learning_rate": 1.1278000000000001e-05, "loss": 0.0638, "step": 5640 }, { "epoch": 0.4964850615114236, "grad_norm": 1.5736613273620605, "learning_rate": 1.1298000000000001e-05, "loss": 0.0688, "step": 5650 }, { "epoch": 0.4973637961335677, "grad_norm": 1.173417568206787, "learning_rate": 1.1318e-05, "loss": 0.066, "step": 5660 }, { "epoch": 0.4982425307557118, "grad_norm": 1.5823478698730469, "learning_rate": 1.1338e-05, "loss": 0.0722, "step": 5670 }, { "epoch": 0.4991212653778559, "grad_norm": 0.9899019598960876, "learning_rate": 1.1358e-05, "loss": 0.0649, "step": 5680 }, { "epoch": 0.5, "grad_norm": 1.274638295173645, "learning_rate": 1.1378000000000002e-05, "loss": 0.0693, "step": 5690 }, { "epoch": 0.5008787346221442, "grad_norm": 1.153896450996399, "learning_rate": 1.1398e-05, "loss": 0.0618, "step": 5700 }, { "epoch": 0.5017574692442882, "grad_norm": 1.188037633895874, "learning_rate": 1.1418000000000001e-05, "loss": 0.0707, "step": 5710 }, { "epoch": 0.5026362038664324, "grad_norm": 1.4087270498275757, "learning_rate": 1.1438e-05, "loss": 0.0665, "step": 5720 }, { "epoch": 0.5035149384885764, "grad_norm": 1.133388638496399, "learning_rate": 1.1458000000000001e-05, "loss": 0.0718, "step": 5730 }, { "epoch": 0.5043936731107206, "grad_norm": 1.3622926473617554, "learning_rate": 1.1478e-05, "loss": 0.0664, "step": 5740 }, { "epoch": 0.5052724077328646, "grad_norm": 1.1535447835922241, "learning_rate": 1.1498e-05, "loss": 0.0645, "step": 5750 }, { "epoch": 0.5061511423550088, "grad_norm": 1.5850107669830322, "learning_rate": 1.1518e-05, "loss": 0.0663, "step": 5760 }, { "epoch": 0.5070298769771528, "grad_norm": 1.0446351766586304, "learning_rate": 1.1538e-05, "loss": 0.068, "step": 5770 }, { "epoch": 0.507908611599297, "grad_norm": 1.1535921096801758, "learning_rate": 1.1558e-05, "loss": 0.0684, "step": 5780 }, { "epoch": 0.5087873462214412, "grad_norm": 1.0433881282806396, "learning_rate": 1.1578e-05, "loss": 0.061, "step": 5790 }, { "epoch": 0.5096660808435852, "grad_norm": 1.038731575012207, "learning_rate": 1.1598e-05, "loss": 0.0663, "step": 5800 }, { "epoch": 0.5105448154657294, "grad_norm": 1.3039507865905762, "learning_rate": 1.1618000000000001e-05, "loss": 0.067, "step": 5810 }, { "epoch": 0.5114235500878734, "grad_norm": 1.077332854270935, "learning_rate": 1.1638000000000001e-05, "loss": 0.0632, "step": 5820 }, { "epoch": 0.5123022847100176, "grad_norm": 1.4187805652618408, "learning_rate": 1.1658000000000001e-05, "loss": 0.0651, "step": 5830 }, { "epoch": 0.5131810193321616, "grad_norm": 1.3279038667678833, "learning_rate": 1.1678e-05, "loss": 0.0661, "step": 5840 }, { "epoch": 0.5140597539543058, "grad_norm": 1.583889365196228, "learning_rate": 1.1698e-05, "loss": 0.0677, "step": 5850 }, { "epoch": 0.5149384885764499, "grad_norm": 1.3593908548355103, "learning_rate": 1.1718000000000002e-05, "loss": 0.0673, "step": 5860 }, { "epoch": 0.515817223198594, "grad_norm": 1.1138986349105835, "learning_rate": 1.1738e-05, "loss": 0.0631, "step": 5870 }, { "epoch": 0.5166959578207382, "grad_norm": 1.0587695837020874, "learning_rate": 1.1758000000000002e-05, "loss": 0.0641, "step": 5880 }, { "epoch": 0.5175746924428822, "grad_norm": 1.147458553314209, "learning_rate": 1.1778e-05, "loss": 0.0585, "step": 5890 }, { "epoch": 0.5184534270650264, "grad_norm": 1.2803547382354736, "learning_rate": 1.1798000000000001e-05, "loss": 0.066, "step": 5900 }, { "epoch": 0.5193321616871704, "grad_norm": 1.447007656097412, "learning_rate": 1.1818e-05, "loss": 0.0664, "step": 5910 }, { "epoch": 0.5202108963093146, "grad_norm": 0.8291473388671875, "learning_rate": 1.1838e-05, "loss": 0.0607, "step": 5920 }, { "epoch": 0.5210896309314587, "grad_norm": 1.0639077425003052, "learning_rate": 1.1858e-05, "loss": 0.0648, "step": 5930 }, { "epoch": 0.5219683655536028, "grad_norm": 1.296423316001892, "learning_rate": 1.1878e-05, "loss": 0.0638, "step": 5940 }, { "epoch": 0.5228471001757469, "grad_norm": 1.4167884588241577, "learning_rate": 1.1898e-05, "loss": 0.0691, "step": 5950 }, { "epoch": 0.523725834797891, "grad_norm": 1.3369683027267456, "learning_rate": 1.1918e-05, "loss": 0.0643, "step": 5960 }, { "epoch": 0.5246045694200352, "grad_norm": 1.0160760879516602, "learning_rate": 1.1938e-05, "loss": 0.0627, "step": 5970 }, { "epoch": 0.5254833040421792, "grad_norm": 1.0247163772583008, "learning_rate": 1.1958000000000001e-05, "loss": 0.0623, "step": 5980 }, { "epoch": 0.5263620386643234, "grad_norm": 1.8771427869796753, "learning_rate": 1.1978e-05, "loss": 0.0659, "step": 5990 }, { "epoch": 0.5272407732864675, "grad_norm": 1.1833455562591553, "learning_rate": 1.1998000000000001e-05, "loss": 0.0596, "step": 6000 }, { "epoch": 0.5281195079086116, "grad_norm": 1.1735892295837402, "learning_rate": 1.2018e-05, "loss": 0.0652, "step": 6010 }, { "epoch": 0.5289982425307557, "grad_norm": 0.672307550907135, "learning_rate": 1.2038e-05, "loss": 0.0588, "step": 6020 }, { "epoch": 0.5298769771528998, "grad_norm": 1.069530963897705, "learning_rate": 1.2058e-05, "loss": 0.0662, "step": 6030 }, { "epoch": 0.5307557117750439, "grad_norm": 1.0258772373199463, "learning_rate": 1.2078e-05, "loss": 0.0576, "step": 6040 }, { "epoch": 0.531634446397188, "grad_norm": 0.8064135909080505, "learning_rate": 1.2098000000000002e-05, "loss": 0.0603, "step": 6050 }, { "epoch": 0.5325131810193322, "grad_norm": 1.107426404953003, "learning_rate": 1.2118e-05, "loss": 0.0596, "step": 6060 }, { "epoch": 0.5333919156414763, "grad_norm": 0.8232565522193909, "learning_rate": 1.2138000000000001e-05, "loss": 0.0582, "step": 6070 }, { "epoch": 0.5342706502636204, "grad_norm": 1.390871286392212, "learning_rate": 1.2158e-05, "loss": 0.0662, "step": 6080 }, { "epoch": 0.5351493848857645, "grad_norm": 1.2778838872909546, "learning_rate": 1.2178000000000001e-05, "loss": 0.0623, "step": 6090 }, { "epoch": 0.5360281195079086, "grad_norm": 1.1849502325057983, "learning_rate": 1.2198e-05, "loss": 0.062, "step": 6100 }, { "epoch": 0.5369068541300527, "grad_norm": 1.1744307279586792, "learning_rate": 1.2218e-05, "loss": 0.0639, "step": 6110 }, { "epoch": 0.5377855887521968, "grad_norm": 1.1337134838104248, "learning_rate": 1.2238e-05, "loss": 0.0622, "step": 6120 }, { "epoch": 0.538664323374341, "grad_norm": 1.3445965051651, "learning_rate": 1.2258e-05, "loss": 0.0599, "step": 6130 }, { "epoch": 0.539543057996485, "grad_norm": 1.5361723899841309, "learning_rate": 1.2278e-05, "loss": 0.0619, "step": 6140 }, { "epoch": 0.5404217926186292, "grad_norm": 0.9543642997741699, "learning_rate": 1.2298000000000002e-05, "loss": 0.0571, "step": 6150 }, { "epoch": 0.5413005272407733, "grad_norm": 1.1797324419021606, "learning_rate": 1.2318e-05, "loss": 0.0559, "step": 6160 }, { "epoch": 0.5421792618629174, "grad_norm": 1.5906414985656738, "learning_rate": 1.2338000000000001e-05, "loss": 0.0566, "step": 6170 }, { "epoch": 0.5430579964850615, "grad_norm": 1.5075336694717407, "learning_rate": 1.2358000000000001e-05, "loss": 0.0573, "step": 6180 }, { "epoch": 0.5439367311072056, "grad_norm": 1.4666519165039062, "learning_rate": 1.2378e-05, "loss": 0.0607, "step": 6190 }, { "epoch": 0.5448154657293497, "grad_norm": 1.314322829246521, "learning_rate": 1.2398e-05, "loss": 0.0593, "step": 6200 }, { "epoch": 0.5456942003514939, "grad_norm": 1.3848793506622314, "learning_rate": 1.2418e-05, "loss": 0.0632, "step": 6210 }, { "epoch": 0.546572934973638, "grad_norm": 1.0889935493469238, "learning_rate": 1.2438000000000002e-05, "loss": 0.0596, "step": 6220 }, { "epoch": 0.5474516695957821, "grad_norm": 0.8941905498504639, "learning_rate": 1.2458e-05, "loss": 0.0554, "step": 6230 }, { "epoch": 0.5483304042179262, "grad_norm": 1.1513161659240723, "learning_rate": 1.2478000000000001e-05, "loss": 0.059, "step": 6240 }, { "epoch": 0.5492091388400703, "grad_norm": 1.4198933839797974, "learning_rate": 1.2498e-05, "loss": 0.0558, "step": 6250 }, { "epoch": 0.5500878734622144, "grad_norm": 1.6610264778137207, "learning_rate": 1.2518000000000001e-05, "loss": 0.0543, "step": 6260 }, { "epoch": 0.5509666080843585, "grad_norm": 1.27925443649292, "learning_rate": 1.2538e-05, "loss": 0.0546, "step": 6270 }, { "epoch": 0.5518453427065027, "grad_norm": 1.1144756078720093, "learning_rate": 1.2558e-05, "loss": 0.0557, "step": 6280 }, { "epoch": 0.5527240773286467, "grad_norm": 1.1637948751449585, "learning_rate": 1.2578e-05, "loss": 0.0585, "step": 6290 }, { "epoch": 0.5536028119507909, "grad_norm": 1.125502109527588, "learning_rate": 1.2598000000000002e-05, "loss": 0.056, "step": 6300 }, { "epoch": 0.554481546572935, "grad_norm": 0.8727356791496277, "learning_rate": 1.2617999999999998e-05, "loss": 0.0542, "step": 6310 }, { "epoch": 0.5553602811950791, "grad_norm": 1.1747361421585083, "learning_rate": 1.2638e-05, "loss": 0.0576, "step": 6320 }, { "epoch": 0.5562390158172232, "grad_norm": 1.3037147521972656, "learning_rate": 1.2658e-05, "loss": 0.0557, "step": 6330 }, { "epoch": 0.5571177504393673, "grad_norm": 1.313866138458252, "learning_rate": 1.2678000000000001e-05, "loss": 0.0567, "step": 6340 }, { "epoch": 0.5579964850615114, "grad_norm": 0.8570563793182373, "learning_rate": 1.2698000000000001e-05, "loss": 0.0625, "step": 6350 }, { "epoch": 0.5588752196836555, "grad_norm": 0.9330220222473145, "learning_rate": 1.2718e-05, "loss": 0.0538, "step": 6360 }, { "epoch": 0.5597539543057997, "grad_norm": 1.493088960647583, "learning_rate": 1.2738e-05, "loss": 0.0556, "step": 6370 }, { "epoch": 0.5606326889279437, "grad_norm": 0.9183053374290466, "learning_rate": 1.2758e-05, "loss": 0.0541, "step": 6380 }, { "epoch": 0.5615114235500879, "grad_norm": 1.2265267372131348, "learning_rate": 1.2778e-05, "loss": 0.0566, "step": 6390 }, { "epoch": 0.562390158172232, "grad_norm": 1.3201960325241089, "learning_rate": 1.2798000000000002e-05, "loss": 0.0596, "step": 6400 }, { "epoch": 0.5632688927943761, "grad_norm": 1.2632077932357788, "learning_rate": 1.2818e-05, "loss": 0.0595, "step": 6410 }, { "epoch": 0.5641476274165202, "grad_norm": 0.81128990650177, "learning_rate": 1.2838e-05, "loss": 0.0575, "step": 6420 }, { "epoch": 0.5650263620386643, "grad_norm": 0.9677115678787231, "learning_rate": 1.2858000000000001e-05, "loss": 0.0539, "step": 6430 }, { "epoch": 0.5659050966608085, "grad_norm": 0.8692770600318909, "learning_rate": 1.2878000000000001e-05, "loss": 0.0542, "step": 6440 }, { "epoch": 0.5667838312829525, "grad_norm": 0.8990409970283508, "learning_rate": 1.2898000000000003e-05, "loss": 0.0545, "step": 6450 }, { "epoch": 0.5676625659050967, "grad_norm": 1.1862324476242065, "learning_rate": 1.2917999999999999e-05, "loss": 0.0555, "step": 6460 }, { "epoch": 0.5685413005272407, "grad_norm": 1.1130549907684326, "learning_rate": 1.2938e-05, "loss": 0.0507, "step": 6470 }, { "epoch": 0.5694200351493849, "grad_norm": 1.2198177576065063, "learning_rate": 1.2958e-05, "loss": 0.0557, "step": 6480 }, { "epoch": 0.570298769771529, "grad_norm": 1.1491122245788574, "learning_rate": 1.2978000000000002e-05, "loss": 0.0584, "step": 6490 }, { "epoch": 0.5711775043936731, "grad_norm": 1.443427562713623, "learning_rate": 1.2998000000000002e-05, "loss": 0.0556, "step": 6500 }, { "epoch": 0.5720562390158173, "grad_norm": 1.47344970703125, "learning_rate": 1.3018e-05, "loss": 0.0567, "step": 6510 }, { "epoch": 0.5729349736379613, "grad_norm": 1.242917776107788, "learning_rate": 1.3038e-05, "loss": 0.0542, "step": 6520 }, { "epoch": 0.5738137082601055, "grad_norm": 1.3073241710662842, "learning_rate": 1.3058000000000001e-05, "loss": 0.0546, "step": 6530 }, { "epoch": 0.5746924428822495, "grad_norm": 0.804561972618103, "learning_rate": 1.3078e-05, "loss": 0.0484, "step": 6540 }, { "epoch": 0.5755711775043937, "grad_norm": 1.2710630893707275, "learning_rate": 1.3098000000000002e-05, "loss": 0.0549, "step": 6550 }, { "epoch": 0.5764499121265377, "grad_norm": 1.225601315498352, "learning_rate": 1.3118e-05, "loss": 0.0536, "step": 6560 }, { "epoch": 0.5773286467486819, "grad_norm": 1.2965461015701294, "learning_rate": 1.3138e-05, "loss": 0.0541, "step": 6570 }, { "epoch": 0.5782073813708261, "grad_norm": 0.891914427280426, "learning_rate": 1.3158e-05, "loss": 0.0498, "step": 6580 }, { "epoch": 0.5790861159929701, "grad_norm": 1.6163685321807861, "learning_rate": 1.3178000000000002e-05, "loss": 0.0563, "step": 6590 }, { "epoch": 0.5799648506151143, "grad_norm": 1.0565282106399536, "learning_rate": 1.3198e-05, "loss": 0.0572, "step": 6600 }, { "epoch": 0.5808435852372583, "grad_norm": 1.0166242122650146, "learning_rate": 1.3218e-05, "loss": 0.0565, "step": 6610 }, { "epoch": 0.5817223198594025, "grad_norm": 0.9570269584655762, "learning_rate": 1.3238000000000001e-05, "loss": 0.0574, "step": 6620 }, { "epoch": 0.5826010544815465, "grad_norm": 1.0868866443634033, "learning_rate": 1.3258e-05, "loss": 0.0597, "step": 6630 }, { "epoch": 0.5834797891036907, "grad_norm": 1.4116953611373901, "learning_rate": 1.3278000000000002e-05, "loss": 0.0606, "step": 6640 }, { "epoch": 0.5843585237258347, "grad_norm": 1.1332725286483765, "learning_rate": 1.3297999999999999e-05, "loss": 0.0597, "step": 6650 }, { "epoch": 0.5852372583479789, "grad_norm": 1.2747186422348022, "learning_rate": 1.3318e-05, "loss": 0.0535, "step": 6660 }, { "epoch": 0.5861159929701231, "grad_norm": 1.6930937767028809, "learning_rate": 1.3338e-05, "loss": 0.0564, "step": 6670 }, { "epoch": 0.5869947275922671, "grad_norm": 1.245792031288147, "learning_rate": 1.3358000000000002e-05, "loss": 0.0507, "step": 6680 }, { "epoch": 0.5878734622144113, "grad_norm": 1.2702313661575317, "learning_rate": 1.3378000000000001e-05, "loss": 0.0535, "step": 6690 }, { "epoch": 0.5887521968365553, "grad_norm": 0.9170592427253723, "learning_rate": 1.3398e-05, "loss": 0.0517, "step": 6700 }, { "epoch": 0.5896309314586995, "grad_norm": 1.07599937915802, "learning_rate": 1.3418e-05, "loss": 0.0575, "step": 6710 }, { "epoch": 0.5905096660808435, "grad_norm": 1.4922980070114136, "learning_rate": 1.3438e-05, "loss": 0.0587, "step": 6720 }, { "epoch": 0.5913884007029877, "grad_norm": 0.9133854508399963, "learning_rate": 1.3458e-05, "loss": 0.0535, "step": 6730 }, { "epoch": 0.5922671353251318, "grad_norm": 1.1268037557601929, "learning_rate": 1.3478000000000002e-05, "loss": 0.0528, "step": 6740 }, { "epoch": 0.5931458699472759, "grad_norm": 1.4689587354660034, "learning_rate": 1.3498e-05, "loss": 0.0624, "step": 6750 }, { "epoch": 0.5940246045694201, "grad_norm": 1.2506121397018433, "learning_rate": 1.3518e-05, "loss": 0.053, "step": 6760 }, { "epoch": 0.5949033391915641, "grad_norm": 1.082794189453125, "learning_rate": 1.3538000000000002e-05, "loss": 0.0532, "step": 6770 }, { "epoch": 0.5957820738137083, "grad_norm": 1.1495486497879028, "learning_rate": 1.3558000000000001e-05, "loss": 0.0532, "step": 6780 }, { "epoch": 0.5966608084358523, "grad_norm": 1.387743353843689, "learning_rate": 1.3578000000000001e-05, "loss": 0.0512, "step": 6790 }, { "epoch": 0.5975395430579965, "grad_norm": 0.9727322459220886, "learning_rate": 1.3598e-05, "loss": 0.0538, "step": 6800 }, { "epoch": 0.5984182776801406, "grad_norm": 0.9360994696617126, "learning_rate": 1.3618e-05, "loss": 0.0513, "step": 6810 }, { "epoch": 0.5992970123022847, "grad_norm": 0.9344448447227478, "learning_rate": 1.3638e-05, "loss": 0.0555, "step": 6820 }, { "epoch": 0.6001757469244289, "grad_norm": 1.2419686317443848, "learning_rate": 1.3658000000000002e-05, "loss": 0.0528, "step": 6830 }, { "epoch": 0.6010544815465729, "grad_norm": 0.9263470768928528, "learning_rate": 1.3678000000000002e-05, "loss": 0.0533, "step": 6840 }, { "epoch": 0.6019332161687171, "grad_norm": 1.2621417045593262, "learning_rate": 1.3698e-05, "loss": 0.0512, "step": 6850 }, { "epoch": 0.6028119507908611, "grad_norm": 1.0682573318481445, "learning_rate": 1.3718e-05, "loss": 0.0505, "step": 6860 }, { "epoch": 0.6036906854130053, "grad_norm": 1.2588106393814087, "learning_rate": 1.3738000000000001e-05, "loss": 0.0536, "step": 6870 }, { "epoch": 0.6045694200351494, "grad_norm": 1.180705189704895, "learning_rate": 1.3758000000000001e-05, "loss": 0.0551, "step": 6880 }, { "epoch": 0.6054481546572935, "grad_norm": 1.1021357774734497, "learning_rate": 1.3778000000000003e-05, "loss": 0.0512, "step": 6890 }, { "epoch": 0.6063268892794376, "grad_norm": 1.1557438373565674, "learning_rate": 1.3797999999999999e-05, "loss": 0.0535, "step": 6900 }, { "epoch": 0.6072056239015817, "grad_norm": 1.1761081218719482, "learning_rate": 1.3818e-05, "loss": 0.0551, "step": 6910 }, { "epoch": 0.6080843585237259, "grad_norm": 0.8858229517936707, "learning_rate": 1.3838e-05, "loss": 0.0533, "step": 6920 }, { "epoch": 0.6089630931458699, "grad_norm": 1.3045294284820557, "learning_rate": 1.3858000000000002e-05, "loss": 0.0531, "step": 6930 }, { "epoch": 0.6098418277680141, "grad_norm": 1.063245177268982, "learning_rate": 1.3878e-05, "loss": 0.0488, "step": 6940 }, { "epoch": 0.6107205623901582, "grad_norm": 1.3416905403137207, "learning_rate": 1.3898e-05, "loss": 0.0531, "step": 6950 }, { "epoch": 0.6115992970123023, "grad_norm": 1.527098536491394, "learning_rate": 1.3918000000000001e-05, "loss": 0.052, "step": 6960 }, { "epoch": 0.6124780316344464, "grad_norm": 1.075090765953064, "learning_rate": 1.3938000000000001e-05, "loss": 0.05, "step": 6970 }, { "epoch": 0.6133567662565905, "grad_norm": 1.2158530950546265, "learning_rate": 1.3958000000000001e-05, "loss": 0.0544, "step": 6980 }, { "epoch": 0.6142355008787346, "grad_norm": 0.8266593813896179, "learning_rate": 1.3977999999999999e-05, "loss": 0.0469, "step": 6990 }, { "epoch": 0.6151142355008787, "grad_norm": 0.9415361881256104, "learning_rate": 1.3998e-05, "loss": 0.0487, "step": 7000 }, { "epoch": 0.6159929701230229, "grad_norm": 1.333661675453186, "learning_rate": 1.4018e-05, "loss": 0.0523, "step": 7010 }, { "epoch": 0.616871704745167, "grad_norm": 0.9589647054672241, "learning_rate": 1.4038000000000002e-05, "loss": 0.051, "step": 7020 }, { "epoch": 0.6177504393673111, "grad_norm": 0.989940345287323, "learning_rate": 1.4058000000000002e-05, "loss": 0.0483, "step": 7030 }, { "epoch": 0.6186291739894552, "grad_norm": 0.8877699971199036, "learning_rate": 1.4078e-05, "loss": 0.0508, "step": 7040 }, { "epoch": 0.6195079086115993, "grad_norm": 0.9965460300445557, "learning_rate": 1.4098e-05, "loss": 0.0519, "step": 7050 }, { "epoch": 0.6203866432337434, "grad_norm": 1.0452256202697754, "learning_rate": 1.4118000000000001e-05, "loss": 0.0452, "step": 7060 }, { "epoch": 0.6212653778558875, "grad_norm": 0.7711632251739502, "learning_rate": 1.4138e-05, "loss": 0.0514, "step": 7070 }, { "epoch": 0.6221441124780316, "grad_norm": 1.1785640716552734, "learning_rate": 1.4158000000000002e-05, "loss": 0.0491, "step": 7080 }, { "epoch": 0.6230228471001757, "grad_norm": 1.1993991136550903, "learning_rate": 1.4178e-05, "loss": 0.0501, "step": 7090 }, { "epoch": 0.6239015817223199, "grad_norm": 1.0742818117141724, "learning_rate": 1.4198e-05, "loss": 0.0493, "step": 7100 }, { "epoch": 0.624780316344464, "grad_norm": 0.8369795083999634, "learning_rate": 1.4218e-05, "loss": 0.0522, "step": 7110 }, { "epoch": 0.6256590509666081, "grad_norm": 0.8882085084915161, "learning_rate": 1.4238000000000002e-05, "loss": 0.0491, "step": 7120 }, { "epoch": 0.6265377855887522, "grad_norm": 1.1926536560058594, "learning_rate": 1.4258000000000001e-05, "loss": 0.0528, "step": 7130 }, { "epoch": 0.6274165202108963, "grad_norm": 0.9693045616149902, "learning_rate": 1.4278e-05, "loss": 0.0494, "step": 7140 }, { "epoch": 0.6282952548330404, "grad_norm": 0.9167799353599548, "learning_rate": 1.4298000000000001e-05, "loss": 0.0487, "step": 7150 }, { "epoch": 0.6291739894551845, "grad_norm": 1.4945253133773804, "learning_rate": 1.4318e-05, "loss": 0.0529, "step": 7160 }, { "epoch": 0.6300527240773286, "grad_norm": 1.0629793405532837, "learning_rate": 1.4338e-05, "loss": 0.0551, "step": 7170 }, { "epoch": 0.6309314586994728, "grad_norm": 1.1229746341705322, "learning_rate": 1.4358000000000002e-05, "loss": 0.0524, "step": 7180 }, { "epoch": 0.6318101933216169, "grad_norm": 0.7518585920333862, "learning_rate": 1.4378e-05, "loss": 0.052, "step": 7190 }, { "epoch": 0.632688927943761, "grad_norm": 1.2062993049621582, "learning_rate": 1.4398e-05, "loss": 0.0477, "step": 7200 }, { "epoch": 0.6335676625659051, "grad_norm": 0.8847637176513672, "learning_rate": 1.4418000000000002e-05, "loss": 0.0491, "step": 7210 }, { "epoch": 0.6344463971880492, "grad_norm": 1.0517674684524536, "learning_rate": 1.4438000000000001e-05, "loss": 0.0439, "step": 7220 }, { "epoch": 0.6353251318101933, "grad_norm": 1.2888931035995483, "learning_rate": 1.4458e-05, "loss": 0.0522, "step": 7230 }, { "epoch": 0.6362038664323374, "grad_norm": 1.4016321897506714, "learning_rate": 1.4478e-05, "loss": 0.0533, "step": 7240 }, { "epoch": 0.6370826010544816, "grad_norm": 1.1638407707214355, "learning_rate": 1.4498e-05, "loss": 0.0466, "step": 7250 }, { "epoch": 0.6379613356766256, "grad_norm": 0.8852329254150391, "learning_rate": 1.4518e-05, "loss": 0.0469, "step": 7260 }, { "epoch": 0.6388400702987698, "grad_norm": 1.289658784866333, "learning_rate": 1.4538000000000002e-05, "loss": 0.0505, "step": 7270 }, { "epoch": 0.6397188049209139, "grad_norm": 1.268968939781189, "learning_rate": 1.4558e-05, "loss": 0.0498, "step": 7280 }, { "epoch": 0.640597539543058, "grad_norm": 1.2018814086914062, "learning_rate": 1.4578e-05, "loss": 0.0489, "step": 7290 }, { "epoch": 0.6414762741652021, "grad_norm": 0.916374146938324, "learning_rate": 1.4598e-05, "loss": 0.0466, "step": 7300 }, { "epoch": 0.6423550087873462, "grad_norm": 1.0416017770767212, "learning_rate": 1.4618000000000001e-05, "loss": 0.0477, "step": 7310 }, { "epoch": 0.6432337434094904, "grad_norm": 1.2166956663131714, "learning_rate": 1.4638000000000001e-05, "loss": 0.0528, "step": 7320 }, { "epoch": 0.6441124780316344, "grad_norm": 0.9110180139541626, "learning_rate": 1.4658e-05, "loss": 0.0458, "step": 7330 }, { "epoch": 0.6449912126537786, "grad_norm": 0.9326126575469971, "learning_rate": 1.4678e-05, "loss": 0.0456, "step": 7340 }, { "epoch": 0.6458699472759226, "grad_norm": 1.1574902534484863, "learning_rate": 1.4698e-05, "loss": 0.0462, "step": 7350 }, { "epoch": 0.6467486818980668, "grad_norm": 1.2315402030944824, "learning_rate": 1.4718000000000002e-05, "loss": 0.0577, "step": 7360 }, { "epoch": 0.6476274165202109, "grad_norm": 1.2243436574935913, "learning_rate": 1.4738000000000002e-05, "loss": 0.0538, "step": 7370 }, { "epoch": 0.648506151142355, "grad_norm": 0.7321206331253052, "learning_rate": 1.4758e-05, "loss": 0.0501, "step": 7380 }, { "epoch": 0.6493848857644992, "grad_norm": 0.8444899320602417, "learning_rate": 1.4778e-05, "loss": 0.0506, "step": 7390 }, { "epoch": 0.6502636203866432, "grad_norm": 0.823876678943634, "learning_rate": 1.4798000000000001e-05, "loss": 0.0464, "step": 7400 }, { "epoch": 0.6511423550087874, "grad_norm": 0.7690760493278503, "learning_rate": 1.4818000000000001e-05, "loss": 0.049, "step": 7410 }, { "epoch": 0.6520210896309314, "grad_norm": 1.089821696281433, "learning_rate": 1.4838000000000003e-05, "loss": 0.0466, "step": 7420 }, { "epoch": 0.6528998242530756, "grad_norm": 0.7772980332374573, "learning_rate": 1.4857999999999999e-05, "loss": 0.0492, "step": 7430 }, { "epoch": 0.6537785588752196, "grad_norm": 1.1532515287399292, "learning_rate": 1.4878e-05, "loss": 0.0487, "step": 7440 }, { "epoch": 0.6546572934973638, "grad_norm": 0.7214043736457825, "learning_rate": 1.4898e-05, "loss": 0.0467, "step": 7450 }, { "epoch": 0.655536028119508, "grad_norm": 1.2670365571975708, "learning_rate": 1.4918000000000002e-05, "loss": 0.0509, "step": 7460 }, { "epoch": 0.656414762741652, "grad_norm": 0.8854920268058777, "learning_rate": 1.4938000000000002e-05, "loss": 0.0466, "step": 7470 }, { "epoch": 0.6572934973637962, "grad_norm": 0.9513890743255615, "learning_rate": 1.4958e-05, "loss": 0.046, "step": 7480 }, { "epoch": 0.6581722319859402, "grad_norm": 0.8908875584602356, "learning_rate": 1.4978e-05, "loss": 0.0428, "step": 7490 }, { "epoch": 0.6590509666080844, "grad_norm": 0.7934906482696533, "learning_rate": 1.4998000000000001e-05, "loss": 0.0482, "step": 7500 }, { "epoch": 0.6599297012302284, "grad_norm": 0.6023195385932922, "learning_rate": 1.5018000000000001e-05, "loss": 0.043, "step": 7510 }, { "epoch": 0.6608084358523726, "grad_norm": 0.8144250512123108, "learning_rate": 1.5038000000000002e-05, "loss": 0.0502, "step": 7520 }, { "epoch": 0.6616871704745168, "grad_norm": 1.1019787788391113, "learning_rate": 1.5058e-05, "loss": 0.0473, "step": 7530 }, { "epoch": 0.6625659050966608, "grad_norm": 0.5509560704231262, "learning_rate": 1.5078e-05, "loss": 0.0467, "step": 7540 }, { "epoch": 0.663444639718805, "grad_norm": 0.7435941100120544, "learning_rate": 1.5098000000000002e-05, "loss": 0.045, "step": 7550 }, { "epoch": 0.664323374340949, "grad_norm": 0.5606623291969299, "learning_rate": 1.5118000000000002e-05, "loss": 0.0516, "step": 7560 }, { "epoch": 0.6652021089630932, "grad_norm": 0.9421476721763611, "learning_rate": 1.5138e-05, "loss": 0.0465, "step": 7570 }, { "epoch": 0.6660808435852372, "grad_norm": 1.0393770933151245, "learning_rate": 1.5158e-05, "loss": 0.0441, "step": 7580 }, { "epoch": 0.6669595782073814, "grad_norm": 0.9660190939903259, "learning_rate": 1.5178000000000001e-05, "loss": 0.0462, "step": 7590 }, { "epoch": 0.6678383128295254, "grad_norm": 0.960306704044342, "learning_rate": 1.5198000000000001e-05, "loss": 0.0476, "step": 7600 }, { "epoch": 0.6687170474516696, "grad_norm": 0.8123681545257568, "learning_rate": 1.5218000000000002e-05, "loss": 0.0436, "step": 7610 }, { "epoch": 0.6695957820738138, "grad_norm": 1.0410940647125244, "learning_rate": 1.5237999999999999e-05, "loss": 0.0467, "step": 7620 }, { "epoch": 0.6704745166959578, "grad_norm": 0.9898613691329956, "learning_rate": 1.5258e-05, "loss": 0.0475, "step": 7630 }, { "epoch": 0.671353251318102, "grad_norm": 1.0882841348648071, "learning_rate": 1.5278000000000002e-05, "loss": 0.0462, "step": 7640 }, { "epoch": 0.672231985940246, "grad_norm": 0.8267814517021179, "learning_rate": 1.5298e-05, "loss": 0.0456, "step": 7650 }, { "epoch": 0.6731107205623902, "grad_norm": 1.0817722082138062, "learning_rate": 1.5318e-05, "loss": 0.0516, "step": 7660 }, { "epoch": 0.6739894551845342, "grad_norm": 0.9799589514732361, "learning_rate": 1.5338e-05, "loss": 0.0483, "step": 7670 }, { "epoch": 0.6748681898066784, "grad_norm": 1.011676549911499, "learning_rate": 1.5358e-05, "loss": 0.0426, "step": 7680 }, { "epoch": 0.6757469244288224, "grad_norm": 1.219343900680542, "learning_rate": 1.5378e-05, "loss": 0.0444, "step": 7690 }, { "epoch": 0.6766256590509666, "grad_norm": 1.105164647102356, "learning_rate": 1.5398e-05, "loss": 0.0511, "step": 7700 }, { "epoch": 0.6775043936731108, "grad_norm": 1.101932168006897, "learning_rate": 1.5418e-05, "loss": 0.048, "step": 7710 }, { "epoch": 0.6783831282952548, "grad_norm": 0.7285661101341248, "learning_rate": 1.5438e-05, "loss": 0.0439, "step": 7720 }, { "epoch": 0.679261862917399, "grad_norm": 0.9005547761917114, "learning_rate": 1.5458e-05, "loss": 0.048, "step": 7730 }, { "epoch": 0.680140597539543, "grad_norm": 0.7544947266578674, "learning_rate": 1.5478e-05, "loss": 0.0442, "step": 7740 }, { "epoch": 0.6810193321616872, "grad_norm": 1.154849886894226, "learning_rate": 1.5498000000000003e-05, "loss": 0.0484, "step": 7750 }, { "epoch": 0.6818980667838312, "grad_norm": 0.8253381252288818, "learning_rate": 1.5518000000000003e-05, "loss": 0.0444, "step": 7760 }, { "epoch": 0.6827768014059754, "grad_norm": 1.1139674186706543, "learning_rate": 1.5538e-05, "loss": 0.0525, "step": 7770 }, { "epoch": 0.6836555360281195, "grad_norm": 0.8859580159187317, "learning_rate": 1.5558e-05, "loss": 0.0449, "step": 7780 }, { "epoch": 0.6845342706502636, "grad_norm": 0.82254558801651, "learning_rate": 1.5578000000000002e-05, "loss": 0.0449, "step": 7790 }, { "epoch": 0.6854130052724078, "grad_norm": 0.8442023992538452, "learning_rate": 1.5598000000000002e-05, "loss": 0.0448, "step": 7800 }, { "epoch": 0.6862917398945518, "grad_norm": 1.0135929584503174, "learning_rate": 1.5618000000000002e-05, "loss": 0.045, "step": 7810 }, { "epoch": 0.687170474516696, "grad_norm": 0.8330069780349731, "learning_rate": 1.5638e-05, "loss": 0.0429, "step": 7820 }, { "epoch": 0.68804920913884, "grad_norm": 1.1279186010360718, "learning_rate": 1.5658e-05, "loss": 0.047, "step": 7830 }, { "epoch": 0.6889279437609842, "grad_norm": 1.0576441287994385, "learning_rate": 1.5678e-05, "loss": 0.043, "step": 7840 }, { "epoch": 0.6898066783831283, "grad_norm": 1.0668011903762817, "learning_rate": 1.5698e-05, "loss": 0.0464, "step": 7850 }, { "epoch": 0.6906854130052724, "grad_norm": 0.7779638171195984, "learning_rate": 1.5718e-05, "loss": 0.0456, "step": 7860 }, { "epoch": 0.6915641476274165, "grad_norm": 0.7307767868041992, "learning_rate": 1.5738e-05, "loss": 0.0442, "step": 7870 }, { "epoch": 0.6924428822495606, "grad_norm": 0.8882766366004944, "learning_rate": 1.5758e-05, "loss": 0.043, "step": 7880 }, { "epoch": 0.6933216168717048, "grad_norm": 1.174443244934082, "learning_rate": 1.5778e-05, "loss": 0.0462, "step": 7890 }, { "epoch": 0.6942003514938488, "grad_norm": 1.2718632221221924, "learning_rate": 1.5798e-05, "loss": 0.0458, "step": 7900 }, { "epoch": 0.695079086115993, "grad_norm": 0.6352127194404602, "learning_rate": 1.5818e-05, "loss": 0.0468, "step": 7910 }, { "epoch": 0.6959578207381371, "grad_norm": 0.8908881545066833, "learning_rate": 1.5838e-05, "loss": 0.0441, "step": 7920 }, { "epoch": 0.6968365553602812, "grad_norm": 0.9582550525665283, "learning_rate": 1.5858e-05, "loss": 0.0455, "step": 7930 }, { "epoch": 0.6977152899824253, "grad_norm": 0.9445775747299194, "learning_rate": 1.5878000000000003e-05, "loss": 0.0482, "step": 7940 }, { "epoch": 0.6985940246045694, "grad_norm": 0.9115543961524963, "learning_rate": 1.5898000000000003e-05, "loss": 0.0458, "step": 7950 }, { "epoch": 0.6994727592267135, "grad_norm": 0.9642423987388611, "learning_rate": 1.5918e-05, "loss": 0.0484, "step": 7960 }, { "epoch": 0.7003514938488576, "grad_norm": 1.526833176612854, "learning_rate": 1.5938e-05, "loss": 0.0493, "step": 7970 }, { "epoch": 0.7012302284710018, "grad_norm": 0.7598287463188171, "learning_rate": 1.5958000000000002e-05, "loss": 0.0505, "step": 7980 }, { "epoch": 0.7021089630931459, "grad_norm": 1.0083378553390503, "learning_rate": 1.5978000000000002e-05, "loss": 0.0448, "step": 7990 }, { "epoch": 0.70298769771529, "grad_norm": 1.0609333515167236, "learning_rate": 1.5998e-05, "loss": 0.0417, "step": 8000 }, { "epoch": 0.7038664323374341, "grad_norm": 0.9088830947875977, "learning_rate": 1.6017999999999998e-05, "loss": 0.0412, "step": 8010 }, { "epoch": 0.7047451669595782, "grad_norm": 1.0972785949707031, "learning_rate": 1.6038e-05, "loss": 0.0447, "step": 8020 }, { "epoch": 0.7056239015817223, "grad_norm": 0.8793069124221802, "learning_rate": 1.6058e-05, "loss": 0.0442, "step": 8030 }, { "epoch": 0.7065026362038664, "grad_norm": 0.7648331522941589, "learning_rate": 1.6078e-05, "loss": 0.0452, "step": 8040 }, { "epoch": 0.7073813708260105, "grad_norm": 1.1602716445922852, "learning_rate": 1.6098e-05, "loss": 0.0447, "step": 8050 }, { "epoch": 0.7082601054481547, "grad_norm": 0.8101522922515869, "learning_rate": 1.6118e-05, "loss": 0.0424, "step": 8060 }, { "epoch": 0.7091388400702988, "grad_norm": 1.0090534687042236, "learning_rate": 1.6138e-05, "loss": 0.0423, "step": 8070 }, { "epoch": 0.7100175746924429, "grad_norm": 1.133711814880371, "learning_rate": 1.6158e-05, "loss": 0.0451, "step": 8080 }, { "epoch": 0.710896309314587, "grad_norm": 0.7729128003120422, "learning_rate": 1.6178e-05, "loss": 0.0448, "step": 8090 }, { "epoch": 0.7117750439367311, "grad_norm": 0.7567286491394043, "learning_rate": 1.6198000000000003e-05, "loss": 0.0469, "step": 8100 }, { "epoch": 0.7126537785588752, "grad_norm": 0.7114367485046387, "learning_rate": 1.6218e-05, "loss": 0.0402, "step": 8110 }, { "epoch": 0.7135325131810193, "grad_norm": 0.98529052734375, "learning_rate": 1.6238e-05, "loss": 0.0449, "step": 8120 }, { "epoch": 0.7144112478031635, "grad_norm": 1.2089589834213257, "learning_rate": 1.6258000000000003e-05, "loss": 0.0439, "step": 8130 }, { "epoch": 0.7152899824253075, "grad_norm": 0.9934971332550049, "learning_rate": 1.6278000000000002e-05, "loss": 0.0416, "step": 8140 }, { "epoch": 0.7161687170474517, "grad_norm": 0.8863129615783691, "learning_rate": 1.6298000000000002e-05, "loss": 0.0444, "step": 8150 }, { "epoch": 0.7170474516695958, "grad_norm": 0.6983170509338379, "learning_rate": 1.6318e-05, "loss": 0.0451, "step": 8160 }, { "epoch": 0.7179261862917399, "grad_norm": 0.7218720316886902, "learning_rate": 1.6338000000000002e-05, "loss": 0.044, "step": 8170 }, { "epoch": 0.718804920913884, "grad_norm": 0.908745527267456, "learning_rate": 1.6358e-05, "loss": 0.043, "step": 8180 }, { "epoch": 0.7196836555360281, "grad_norm": 1.0619219541549683, "learning_rate": 1.6378e-05, "loss": 0.0447, "step": 8190 }, { "epoch": 0.7205623901581723, "grad_norm": 1.0376293659210205, "learning_rate": 1.6397999999999998e-05, "loss": 0.0439, "step": 8200 }, { "epoch": 0.7214411247803163, "grad_norm": 0.8047411441802979, "learning_rate": 1.6418e-05, "loss": 0.0418, "step": 8210 }, { "epoch": 0.7223198594024605, "grad_norm": 0.9350674748420715, "learning_rate": 1.6438e-05, "loss": 0.04, "step": 8220 }, { "epoch": 0.7231985940246046, "grad_norm": 0.7493903040885925, "learning_rate": 1.6458e-05, "loss": 0.0422, "step": 8230 }, { "epoch": 0.7240773286467487, "grad_norm": 0.6668313145637512, "learning_rate": 1.6478e-05, "loss": 0.0435, "step": 8240 }, { "epoch": 0.7249560632688928, "grad_norm": 0.8368672728538513, "learning_rate": 1.6498e-05, "loss": 0.045, "step": 8250 }, { "epoch": 0.7258347978910369, "grad_norm": 0.7532497048377991, "learning_rate": 1.6518e-05, "loss": 0.0421, "step": 8260 }, { "epoch": 0.726713532513181, "grad_norm": 0.6674710512161255, "learning_rate": 1.6538e-05, "loss": 0.0413, "step": 8270 }, { "epoch": 0.7275922671353251, "grad_norm": 0.6711191534996033, "learning_rate": 1.6558e-05, "loss": 0.0436, "step": 8280 }, { "epoch": 0.7284710017574693, "grad_norm": 1.062362790107727, "learning_rate": 1.6578000000000003e-05, "loss": 0.045, "step": 8290 }, { "epoch": 0.7293497363796133, "grad_norm": 1.2090237140655518, "learning_rate": 1.6598e-05, "loss": 0.0427, "step": 8300 }, { "epoch": 0.7302284710017575, "grad_norm": 1.3807828426361084, "learning_rate": 1.6618e-05, "loss": 0.0454, "step": 8310 }, { "epoch": 0.7311072056239016, "grad_norm": 0.9406363368034363, "learning_rate": 1.6638000000000002e-05, "loss": 0.0422, "step": 8320 }, { "epoch": 0.7319859402460457, "grad_norm": 1.1144884824752808, "learning_rate": 1.6658000000000002e-05, "loss": 0.0423, "step": 8330 }, { "epoch": 0.7328646748681898, "grad_norm": 0.9416878819465637, "learning_rate": 1.6678000000000002e-05, "loss": 0.0423, "step": 8340 }, { "epoch": 0.7337434094903339, "grad_norm": 1.0770896673202515, "learning_rate": 1.6698e-05, "loss": 0.0455, "step": 8350 }, { "epoch": 0.7346221441124781, "grad_norm": 1.2926645278930664, "learning_rate": 1.6718e-05, "loss": 0.0422, "step": 8360 }, { "epoch": 0.7355008787346221, "grad_norm": 0.923851728439331, "learning_rate": 1.6738e-05, "loss": 0.0396, "step": 8370 }, { "epoch": 0.7363796133567663, "grad_norm": 0.7585502862930298, "learning_rate": 1.6758e-05, "loss": 0.0406, "step": 8380 }, { "epoch": 0.7372583479789103, "grad_norm": 0.7343850135803223, "learning_rate": 1.6778e-05, "loss": 0.0438, "step": 8390 }, { "epoch": 0.7381370826010545, "grad_norm": 0.9294388294219971, "learning_rate": 1.6798e-05, "loss": 0.0427, "step": 8400 }, { "epoch": 0.7390158172231986, "grad_norm": 0.7951410412788391, "learning_rate": 1.6818e-05, "loss": 0.0449, "step": 8410 }, { "epoch": 0.7398945518453427, "grad_norm": 0.9810773134231567, "learning_rate": 1.6838e-05, "loss": 0.0415, "step": 8420 }, { "epoch": 0.7407732864674869, "grad_norm": 0.601721465587616, "learning_rate": 1.6858e-05, "loss": 0.0414, "step": 8430 }, { "epoch": 0.7416520210896309, "grad_norm": 1.0042251348495483, "learning_rate": 1.6878000000000003e-05, "loss": 0.0432, "step": 8440 }, { "epoch": 0.7425307557117751, "grad_norm": 1.0446043014526367, "learning_rate": 1.6898e-05, "loss": 0.0493, "step": 8450 }, { "epoch": 0.7434094903339191, "grad_norm": 1.041822075843811, "learning_rate": 1.6918e-05, "loss": 0.0431, "step": 8460 }, { "epoch": 0.7442882249560633, "grad_norm": 1.044788122177124, "learning_rate": 1.6938e-05, "loss": 0.0414, "step": 8470 }, { "epoch": 0.7451669595782073, "grad_norm": 0.9744192361831665, "learning_rate": 1.6958000000000003e-05, "loss": 0.0435, "step": 8480 }, { "epoch": 0.7460456942003515, "grad_norm": 0.8327853679656982, "learning_rate": 1.6978e-05, "loss": 0.0424, "step": 8490 }, { "epoch": 0.7469244288224957, "grad_norm": 1.1088013648986816, "learning_rate": 1.6998e-05, "loss": 0.0462, "step": 8500 }, { "epoch": 0.7478031634446397, "grad_norm": 1.0683618783950806, "learning_rate": 1.7018000000000002e-05, "loss": 0.0421, "step": 8510 }, { "epoch": 0.7486818980667839, "grad_norm": 0.5904213786125183, "learning_rate": 1.7038000000000002e-05, "loss": 0.0421, "step": 8520 }, { "epoch": 0.7495606326889279, "grad_norm": 0.963259220123291, "learning_rate": 1.7058e-05, "loss": 0.0419, "step": 8530 }, { "epoch": 0.7504393673110721, "grad_norm": 0.858403742313385, "learning_rate": 1.7077999999999998e-05, "loss": 0.0447, "step": 8540 }, { "epoch": 0.7513181019332161, "grad_norm": 1.3853710889816284, "learning_rate": 1.7098e-05, "loss": 0.0418, "step": 8550 }, { "epoch": 0.7521968365553603, "grad_norm": 1.0350157022476196, "learning_rate": 1.7118e-05, "loss": 0.0465, "step": 8560 }, { "epoch": 0.7530755711775043, "grad_norm": 1.0401486158370972, "learning_rate": 1.7138e-05, "loss": 0.0473, "step": 8570 }, { "epoch": 0.7539543057996485, "grad_norm": 0.7854179739952087, "learning_rate": 1.7158e-05, "loss": 0.044, "step": 8580 }, { "epoch": 0.7548330404217927, "grad_norm": 1.0115795135498047, "learning_rate": 1.7178e-05, "loss": 0.0417, "step": 8590 }, { "epoch": 0.7557117750439367, "grad_norm": 0.9423606991767883, "learning_rate": 1.7198e-05, "loss": 0.0416, "step": 8600 }, { "epoch": 0.7565905096660809, "grad_norm": 1.1308754682540894, "learning_rate": 1.7218e-05, "loss": 0.0431, "step": 8610 }, { "epoch": 0.7574692442882249, "grad_norm": 0.763620913028717, "learning_rate": 1.7238e-05, "loss": 0.0401, "step": 8620 }, { "epoch": 0.7583479789103691, "grad_norm": 1.0169329643249512, "learning_rate": 1.7258000000000003e-05, "loss": 0.0426, "step": 8630 }, { "epoch": 0.7592267135325131, "grad_norm": 0.8249424695968628, "learning_rate": 1.7278e-05, "loss": 0.0413, "step": 8640 }, { "epoch": 0.7601054481546573, "grad_norm": 1.0291988849639893, "learning_rate": 1.7298e-05, "loss": 0.0442, "step": 8650 }, { "epoch": 0.7609841827768014, "grad_norm": 0.9045040011405945, "learning_rate": 1.7318000000000003e-05, "loss": 0.0412, "step": 8660 }, { "epoch": 0.7618629173989455, "grad_norm": 1.1636890172958374, "learning_rate": 1.7338000000000002e-05, "loss": 0.0434, "step": 8670 }, { "epoch": 0.7627416520210897, "grad_norm": 0.6519398093223572, "learning_rate": 1.7358000000000002e-05, "loss": 0.042, "step": 8680 }, { "epoch": 0.7636203866432337, "grad_norm": 0.7625969052314758, "learning_rate": 1.7378e-05, "loss": 0.0377, "step": 8690 }, { "epoch": 0.7644991212653779, "grad_norm": 0.7796596884727478, "learning_rate": 1.7398000000000002e-05, "loss": 0.0376, "step": 8700 }, { "epoch": 0.7653778558875219, "grad_norm": 0.983058750629425, "learning_rate": 1.7418e-05, "loss": 0.0418, "step": 8710 }, { "epoch": 0.7662565905096661, "grad_norm": 0.7431403398513794, "learning_rate": 1.7438e-05, "loss": 0.0412, "step": 8720 }, { "epoch": 0.7671353251318102, "grad_norm": 0.7926545143127441, "learning_rate": 1.7458e-05, "loss": 0.0409, "step": 8730 }, { "epoch": 0.7680140597539543, "grad_norm": 0.796971321105957, "learning_rate": 1.7478e-05, "loss": 0.04, "step": 8740 }, { "epoch": 0.7688927943760984, "grad_norm": 0.7147412896156311, "learning_rate": 1.7498e-05, "loss": 0.0362, "step": 8750 }, { "epoch": 0.7697715289982425, "grad_norm": 0.8948741555213928, "learning_rate": 1.7518e-05, "loss": 0.0395, "step": 8760 }, { "epoch": 0.7706502636203867, "grad_norm": 0.7725838422775269, "learning_rate": 1.7538e-05, "loss": 0.0445, "step": 8770 }, { "epoch": 0.7715289982425307, "grad_norm": 0.8621135354042053, "learning_rate": 1.7558000000000004e-05, "loss": 0.0425, "step": 8780 }, { "epoch": 0.7724077328646749, "grad_norm": 0.6942624449729919, "learning_rate": 1.7578e-05, "loss": 0.0411, "step": 8790 }, { "epoch": 0.773286467486819, "grad_norm": 0.7800189256668091, "learning_rate": 1.7598e-05, "loss": 0.0364, "step": 8800 }, { "epoch": 0.7741652021089631, "grad_norm": 0.6706297993659973, "learning_rate": 1.7618e-05, "loss": 0.0382, "step": 8810 }, { "epoch": 0.7750439367311072, "grad_norm": 0.8158588409423828, "learning_rate": 1.7638000000000003e-05, "loss": 0.0433, "step": 8820 }, { "epoch": 0.7759226713532513, "grad_norm": 0.8118991851806641, "learning_rate": 1.7658e-05, "loss": 0.0397, "step": 8830 }, { "epoch": 0.7768014059753954, "grad_norm": 0.8072343468666077, "learning_rate": 1.7678e-05, "loss": 0.0431, "step": 8840 }, { "epoch": 0.7776801405975395, "grad_norm": 0.7001318335533142, "learning_rate": 1.7698000000000002e-05, "loss": 0.0452, "step": 8850 }, { "epoch": 0.7785588752196837, "grad_norm": 0.7780551314353943, "learning_rate": 1.7718000000000002e-05, "loss": 0.0421, "step": 8860 }, { "epoch": 0.7794376098418277, "grad_norm": 0.7299732565879822, "learning_rate": 1.7738000000000002e-05, "loss": 0.0442, "step": 8870 }, { "epoch": 0.7803163444639719, "grad_norm": 0.6805367469787598, "learning_rate": 1.7758e-05, "loss": 0.0417, "step": 8880 }, { "epoch": 0.781195079086116, "grad_norm": 0.6234753727912903, "learning_rate": 1.7778e-05, "loss": 0.0441, "step": 8890 }, { "epoch": 0.7820738137082601, "grad_norm": 0.4457421600818634, "learning_rate": 1.7798e-05, "loss": 0.0451, "step": 8900 }, { "epoch": 0.7829525483304042, "grad_norm": 0.8471882939338684, "learning_rate": 1.7818e-05, "loss": 0.0413, "step": 8910 }, { "epoch": 0.7838312829525483, "grad_norm": 0.8237369656562805, "learning_rate": 1.7838e-05, "loss": 0.0419, "step": 8920 }, { "epoch": 0.7847100175746925, "grad_norm": 0.9636493921279907, "learning_rate": 1.7858e-05, "loss": 0.0467, "step": 8930 }, { "epoch": 0.7855887521968365, "grad_norm": 0.820115864276886, "learning_rate": 1.7878e-05, "loss": 0.0423, "step": 8940 }, { "epoch": 0.7864674868189807, "grad_norm": 0.8796687722206116, "learning_rate": 1.7898e-05, "loss": 0.0421, "step": 8950 }, { "epoch": 0.7873462214411248, "grad_norm": 1.0134191513061523, "learning_rate": 1.7918e-05, "loss": 0.0418, "step": 8960 }, { "epoch": 0.7882249560632689, "grad_norm": 1.0322152376174927, "learning_rate": 1.7938000000000003e-05, "loss": 0.0446, "step": 8970 }, { "epoch": 0.789103690685413, "grad_norm": 0.8455127477645874, "learning_rate": 1.7958e-05, "loss": 0.042, "step": 8980 }, { "epoch": 0.7899824253075571, "grad_norm": 1.1513714790344238, "learning_rate": 1.7978e-05, "loss": 0.0469, "step": 8990 }, { "epoch": 0.7908611599297012, "grad_norm": 0.8271756172180176, "learning_rate": 1.7998e-05, "loss": 0.0456, "step": 9000 }, { "epoch": 0.7917398945518453, "grad_norm": 0.6156802177429199, "learning_rate": 1.8018000000000003e-05, "loss": 0.0436, "step": 9010 }, { "epoch": 0.7926186291739895, "grad_norm": 0.8158534169197083, "learning_rate": 1.8038000000000002e-05, "loss": 0.0429, "step": 9020 }, { "epoch": 0.7934973637961336, "grad_norm": 0.6900830864906311, "learning_rate": 1.8058e-05, "loss": 0.042, "step": 9030 }, { "epoch": 0.7943760984182777, "grad_norm": 0.8780141472816467, "learning_rate": 1.8078000000000002e-05, "loss": 0.0395, "step": 9040 }, { "epoch": 0.7952548330404218, "grad_norm": 0.8368458151817322, "learning_rate": 1.8098000000000002e-05, "loss": 0.0385, "step": 9050 }, { "epoch": 0.7961335676625659, "grad_norm": 0.993640661239624, "learning_rate": 1.8118000000000002e-05, "loss": 0.0411, "step": 9060 }, { "epoch": 0.79701230228471, "grad_norm": 0.816169023513794, "learning_rate": 1.8138e-05, "loss": 0.0408, "step": 9070 }, { "epoch": 0.7978910369068541, "grad_norm": 0.9092068076133728, "learning_rate": 1.8158e-05, "loss": 0.0405, "step": 9080 }, { "epoch": 0.7987697715289982, "grad_norm": 0.5644040107727051, "learning_rate": 1.8178e-05, "loss": 0.0396, "step": 9090 }, { "epoch": 0.7996485061511424, "grad_norm": 0.7712323069572449, "learning_rate": 1.8198e-05, "loss": 0.0384, "step": 9100 }, { "epoch": 0.8005272407732865, "grad_norm": 0.7490242719650269, "learning_rate": 1.8218e-05, "loss": 0.0368, "step": 9110 }, { "epoch": 0.8014059753954306, "grad_norm": 0.9151948690414429, "learning_rate": 1.8238e-05, "loss": 0.0404, "step": 9120 }, { "epoch": 0.8022847100175747, "grad_norm": 0.8109952211380005, "learning_rate": 1.8258e-05, "loss": 0.0401, "step": 9130 }, { "epoch": 0.8031634446397188, "grad_norm": 0.7636911273002625, "learning_rate": 1.8278e-05, "loss": 0.0387, "step": 9140 }, { "epoch": 0.8040421792618629, "grad_norm": 0.7101365327835083, "learning_rate": 1.8298e-05, "loss": 0.0419, "step": 9150 }, { "epoch": 0.804920913884007, "grad_norm": 0.8036380410194397, "learning_rate": 1.8318000000000003e-05, "loss": 0.0368, "step": 9160 }, { "epoch": 0.8057996485061512, "grad_norm": 0.6688329577445984, "learning_rate": 1.8338e-05, "loss": 0.0395, "step": 9170 }, { "epoch": 0.8066783831282952, "grad_norm": 0.594559907913208, "learning_rate": 1.8358e-05, "loss": 0.0391, "step": 9180 }, { "epoch": 0.8075571177504394, "grad_norm": 0.7707840204238892, "learning_rate": 1.8378e-05, "loss": 0.0405, "step": 9190 }, { "epoch": 0.8084358523725835, "grad_norm": 0.4332924783229828, "learning_rate": 1.8398000000000002e-05, "loss": 0.0403, "step": 9200 }, { "epoch": 0.8093145869947276, "grad_norm": 0.5154026746749878, "learning_rate": 1.8418000000000002e-05, "loss": 0.0444, "step": 9210 }, { "epoch": 0.8101933216168717, "grad_norm": 0.4688563048839569, "learning_rate": 1.8438e-05, "loss": 0.0406, "step": 9220 }, { "epoch": 0.8110720562390158, "grad_norm": 0.8550302386283875, "learning_rate": 1.8458000000000002e-05, "loss": 0.0407, "step": 9230 }, { "epoch": 0.81195079086116, "grad_norm": 0.8244479894638062, "learning_rate": 1.8478e-05, "loss": 0.0383, "step": 9240 }, { "epoch": 0.812829525483304, "grad_norm": 0.6967799067497253, "learning_rate": 1.8498e-05, "loss": 0.039, "step": 9250 }, { "epoch": 0.8137082601054482, "grad_norm": 0.8962905406951904, "learning_rate": 1.8518e-05, "loss": 0.0395, "step": 9260 }, { "epoch": 0.8145869947275922, "grad_norm": 0.5815776586532593, "learning_rate": 1.8538e-05, "loss": 0.0434, "step": 9270 }, { "epoch": 0.8154657293497364, "grad_norm": 0.7581464648246765, "learning_rate": 1.8558e-05, "loss": 0.0393, "step": 9280 }, { "epoch": 0.8163444639718805, "grad_norm": 0.9699270129203796, "learning_rate": 1.8578e-05, "loss": 0.0386, "step": 9290 }, { "epoch": 0.8172231985940246, "grad_norm": 1.1551704406738281, "learning_rate": 1.8598e-05, "loss": 0.0419, "step": 9300 }, { "epoch": 0.8181019332161688, "grad_norm": 0.8947121500968933, "learning_rate": 1.8618000000000004e-05, "loss": 0.0395, "step": 9310 }, { "epoch": 0.8189806678383128, "grad_norm": 0.9064276814460754, "learning_rate": 1.8638e-05, "loss": 0.0393, "step": 9320 }, { "epoch": 0.819859402460457, "grad_norm": 0.6649875640869141, "learning_rate": 1.8658e-05, "loss": 0.039, "step": 9330 }, { "epoch": 0.820738137082601, "grad_norm": 0.898710310459137, "learning_rate": 1.8678e-05, "loss": 0.0439, "step": 9340 }, { "epoch": 0.8216168717047452, "grad_norm": 0.6703824400901794, "learning_rate": 1.8698000000000003e-05, "loss": 0.0424, "step": 9350 }, { "epoch": 0.8224956063268892, "grad_norm": 0.8106133937835693, "learning_rate": 1.8718000000000003e-05, "loss": 0.0407, "step": 9360 }, { "epoch": 0.8233743409490334, "grad_norm": 0.6209238767623901, "learning_rate": 1.8738e-05, "loss": 0.037, "step": 9370 }, { "epoch": 0.8242530755711776, "grad_norm": 0.8079084753990173, "learning_rate": 1.8758e-05, "loss": 0.0396, "step": 9380 }, { "epoch": 0.8251318101933216, "grad_norm": 0.9081114530563354, "learning_rate": 1.8778000000000002e-05, "loss": 0.0372, "step": 9390 }, { "epoch": 0.8260105448154658, "grad_norm": 0.8924810886383057, "learning_rate": 1.8798000000000002e-05, "loss": 0.0398, "step": 9400 }, { "epoch": 0.8268892794376098, "grad_norm": 0.8233388662338257, "learning_rate": 1.8818e-05, "loss": 0.0405, "step": 9410 }, { "epoch": 0.827768014059754, "grad_norm": 0.6217953562736511, "learning_rate": 1.8838e-05, "loss": 0.0357, "step": 9420 }, { "epoch": 0.828646748681898, "grad_norm": 0.9237642884254456, "learning_rate": 1.8858e-05, "loss": 0.0371, "step": 9430 }, { "epoch": 0.8295254833040422, "grad_norm": 0.813280463218689, "learning_rate": 1.8878e-05, "loss": 0.0384, "step": 9440 }, { "epoch": 0.8304042179261862, "grad_norm": 0.6726194620132446, "learning_rate": 1.8898e-05, "loss": 0.0373, "step": 9450 }, { "epoch": 0.8312829525483304, "grad_norm": 0.7461214065551758, "learning_rate": 1.8918e-05, "loss": 0.0389, "step": 9460 }, { "epoch": 0.8321616871704746, "grad_norm": 0.6651244759559631, "learning_rate": 1.8938e-05, "loss": 0.0383, "step": 9470 }, { "epoch": 0.8330404217926186, "grad_norm": 0.8500538468360901, "learning_rate": 1.8958e-05, "loss": 0.038, "step": 9480 }, { "epoch": 0.8339191564147628, "grad_norm": 0.7841906547546387, "learning_rate": 1.8978e-05, "loss": 0.0384, "step": 9490 }, { "epoch": 0.8347978910369068, "grad_norm": 0.523756206035614, "learning_rate": 1.8998000000000003e-05, "loss": 0.0348, "step": 9500 }, { "epoch": 0.835676625659051, "grad_norm": 0.9485006928443909, "learning_rate": 1.9018e-05, "loss": 0.0386, "step": 9510 }, { "epoch": 0.836555360281195, "grad_norm": 0.5500234961509705, "learning_rate": 1.9038e-05, "loss": 0.04, "step": 9520 }, { "epoch": 0.8374340949033392, "grad_norm": 0.6616672277450562, "learning_rate": 1.9058e-05, "loss": 0.0349, "step": 9530 }, { "epoch": 0.8383128295254832, "grad_norm": 0.6896331906318665, "learning_rate": 1.9078000000000003e-05, "loss": 0.0362, "step": 9540 }, { "epoch": 0.8391915641476274, "grad_norm": 0.7097848057746887, "learning_rate": 1.9098000000000002e-05, "loss": 0.0364, "step": 9550 }, { "epoch": 0.8400702987697716, "grad_norm": 0.5699155926704407, "learning_rate": 1.9118e-05, "loss": 0.0356, "step": 9560 }, { "epoch": 0.8409490333919156, "grad_norm": 0.6800719499588013, "learning_rate": 1.9138e-05, "loss": 0.0366, "step": 9570 }, { "epoch": 0.8418277680140598, "grad_norm": 0.7568923830986023, "learning_rate": 1.9158000000000002e-05, "loss": 0.038, "step": 9580 }, { "epoch": 0.8427065026362038, "grad_norm": 0.5202877521514893, "learning_rate": 1.9178000000000002e-05, "loss": 0.0382, "step": 9590 }, { "epoch": 0.843585237258348, "grad_norm": 0.6875913143157959, "learning_rate": 1.9198e-05, "loss": 0.0362, "step": 9600 }, { "epoch": 0.844463971880492, "grad_norm": 0.817875325679779, "learning_rate": 1.9218e-05, "loss": 0.0368, "step": 9610 }, { "epoch": 0.8453427065026362, "grad_norm": 0.8442402482032776, "learning_rate": 1.9238e-05, "loss": 0.0397, "step": 9620 }, { "epoch": 0.8462214411247804, "grad_norm": 0.8659873604774475, "learning_rate": 1.9258e-05, "loss": 0.0387, "step": 9630 }, { "epoch": 0.8471001757469244, "grad_norm": 0.6221631765365601, "learning_rate": 1.9278e-05, "loss": 0.0389, "step": 9640 }, { "epoch": 0.8479789103690686, "grad_norm": 0.9509397745132446, "learning_rate": 1.9298e-05, "loss": 0.038, "step": 9650 }, { "epoch": 0.8488576449912126, "grad_norm": 0.7205158472061157, "learning_rate": 1.9318e-05, "loss": 0.0359, "step": 9660 }, { "epoch": 0.8497363796133568, "grad_norm": 0.5356127023696899, "learning_rate": 1.9338e-05, "loss": 0.0397, "step": 9670 }, { "epoch": 0.8506151142355008, "grad_norm": 0.7912895679473877, "learning_rate": 1.9358e-05, "loss": 0.0389, "step": 9680 }, { "epoch": 0.851493848857645, "grad_norm": 0.7488508224487305, "learning_rate": 1.9378000000000003e-05, "loss": 0.0385, "step": 9690 }, { "epoch": 0.8523725834797891, "grad_norm": 0.7743740081787109, "learning_rate": 1.9398000000000003e-05, "loss": 0.0371, "step": 9700 }, { "epoch": 0.8532513181019332, "grad_norm": 0.7572956681251526, "learning_rate": 1.9418e-05, "loss": 0.0386, "step": 9710 }, { "epoch": 0.8541300527240774, "grad_norm": 0.7941198945045471, "learning_rate": 1.9438e-05, "loss": 0.0388, "step": 9720 }, { "epoch": 0.8550087873462214, "grad_norm": 1.0071325302124023, "learning_rate": 1.9458000000000002e-05, "loss": 0.0352, "step": 9730 }, { "epoch": 0.8558875219683656, "grad_norm": 0.8539367914199829, "learning_rate": 1.9478000000000002e-05, "loss": 0.0403, "step": 9740 }, { "epoch": 0.8567662565905096, "grad_norm": 0.584865391254425, "learning_rate": 1.9498e-05, "loss": 0.0392, "step": 9750 }, { "epoch": 0.8576449912126538, "grad_norm": 0.5888796448707581, "learning_rate": 1.9518000000000002e-05, "loss": 0.0367, "step": 9760 }, { "epoch": 0.8585237258347979, "grad_norm": 0.5317763090133667, "learning_rate": 1.9538e-05, "loss": 0.0376, "step": 9770 }, { "epoch": 0.859402460456942, "grad_norm": 0.6545144319534302, "learning_rate": 1.9558e-05, "loss": 0.0409, "step": 9780 }, { "epoch": 0.8602811950790861, "grad_norm": 0.6890981197357178, "learning_rate": 1.9578e-05, "loss": 0.0406, "step": 9790 }, { "epoch": 0.8611599297012302, "grad_norm": 0.8718016147613525, "learning_rate": 1.9598e-05, "loss": 0.0372, "step": 9800 }, { "epoch": 0.8620386643233744, "grad_norm": 0.8227787017822266, "learning_rate": 1.9618e-05, "loss": 0.0349, "step": 9810 }, { "epoch": 0.8629173989455184, "grad_norm": 0.6571164727210999, "learning_rate": 1.9638e-05, "loss": 0.0361, "step": 9820 }, { "epoch": 0.8637961335676626, "grad_norm": 0.6815102696418762, "learning_rate": 1.9658e-05, "loss": 0.0369, "step": 9830 }, { "epoch": 0.8646748681898067, "grad_norm": 0.5572563409805298, "learning_rate": 1.9678e-05, "loss": 0.0363, "step": 9840 }, { "epoch": 0.8655536028119508, "grad_norm": 0.8430776596069336, "learning_rate": 1.9698e-05, "loss": 0.0403, "step": 9850 }, { "epoch": 0.8664323374340949, "grad_norm": 0.7193848490715027, "learning_rate": 1.9718e-05, "loss": 0.0367, "step": 9860 }, { "epoch": 0.867311072056239, "grad_norm": 1.0121779441833496, "learning_rate": 1.9738e-05, "loss": 0.038, "step": 9870 }, { "epoch": 0.8681898066783831, "grad_norm": 0.9494460821151733, "learning_rate": 1.9758000000000003e-05, "loss": 0.039, "step": 9880 }, { "epoch": 0.8690685413005272, "grad_norm": 0.5572819113731384, "learning_rate": 1.9778000000000003e-05, "loss": 0.0408, "step": 9890 }, { "epoch": 0.8699472759226714, "grad_norm": 0.9024667143821716, "learning_rate": 1.9798e-05, "loss": 0.0386, "step": 9900 }, { "epoch": 0.8708260105448155, "grad_norm": 0.5988235473632812, "learning_rate": 1.9818e-05, "loss": 0.038, "step": 9910 }, { "epoch": 0.8717047451669596, "grad_norm": 0.6786329746246338, "learning_rate": 1.9838000000000002e-05, "loss": 0.0439, "step": 9920 }, { "epoch": 0.8725834797891037, "grad_norm": 0.7467644214630127, "learning_rate": 1.9858000000000002e-05, "loss": 0.0421, "step": 9930 }, { "epoch": 0.8734622144112478, "grad_norm": 0.5100950002670288, "learning_rate": 1.9878000000000002e-05, "loss": 0.0369, "step": 9940 }, { "epoch": 0.8743409490333919, "grad_norm": 0.7369251847267151, "learning_rate": 1.9898e-05, "loss": 0.0394, "step": 9950 }, { "epoch": 0.875219683655536, "grad_norm": 0.5747088193893433, "learning_rate": 1.9918e-05, "loss": 0.037, "step": 9960 }, { "epoch": 0.8760984182776801, "grad_norm": 0.4598296582698822, "learning_rate": 1.9938e-05, "loss": 0.0362, "step": 9970 }, { "epoch": 0.8769771528998243, "grad_norm": 0.6410236954689026, "learning_rate": 1.9958e-05, "loss": 0.0385, "step": 9980 }, { "epoch": 0.8778558875219684, "grad_norm": 0.7327964305877686, "learning_rate": 1.9978e-05, "loss": 0.0373, "step": 9990 }, { "epoch": 0.8787346221441125, "grad_norm": 0.991978645324707, "learning_rate": 1.9998e-05, "loss": 0.0372, "step": 10000 }, { "epoch": 0.8796133567662566, "grad_norm": 0.5166749954223633, "learning_rate": 2.0018e-05, "loss": 0.0356, "step": 10010 }, { "epoch": 0.8804920913884007, "grad_norm": 0.6014555096626282, "learning_rate": 2.0038e-05, "loss": 0.0376, "step": 10020 }, { "epoch": 0.8813708260105448, "grad_norm": 0.7864572405815125, "learning_rate": 2.0058000000000003e-05, "loss": 0.0363, "step": 10030 }, { "epoch": 0.8822495606326889, "grad_norm": 0.5697535872459412, "learning_rate": 2.0078e-05, "loss": 0.0348, "step": 10040 }, { "epoch": 0.883128295254833, "grad_norm": 0.659483790397644, "learning_rate": 2.0098e-05, "loss": 0.0358, "step": 10050 }, { "epoch": 0.8840070298769771, "grad_norm": 0.5906124711036682, "learning_rate": 2.0118e-05, "loss": 0.0347, "step": 10060 }, { "epoch": 0.8848857644991213, "grad_norm": 0.48392796516418457, "learning_rate": 2.0138000000000003e-05, "loss": 0.037, "step": 10070 }, { "epoch": 0.8857644991212654, "grad_norm": 0.590290904045105, "learning_rate": 2.0158000000000002e-05, "loss": 0.0383, "step": 10080 }, { "epoch": 0.8866432337434095, "grad_norm": 0.6385877728462219, "learning_rate": 2.0178e-05, "loss": 0.0365, "step": 10090 }, { "epoch": 0.8875219683655536, "grad_norm": 1.1011266708374023, "learning_rate": 2.0198e-05, "loss": 0.0381, "step": 10100 }, { "epoch": 0.8884007029876977, "grad_norm": 0.7373857498168945, "learning_rate": 2.0218000000000002e-05, "loss": 0.0352, "step": 10110 }, { "epoch": 0.8892794376098418, "grad_norm": 0.908666729927063, "learning_rate": 2.0238000000000002e-05, "loss": 0.0377, "step": 10120 }, { "epoch": 0.8901581722319859, "grad_norm": 0.6858969330787659, "learning_rate": 2.0258e-05, "loss": 0.0354, "step": 10130 }, { "epoch": 0.8910369068541301, "grad_norm": 0.6740691065788269, "learning_rate": 2.0278e-05, "loss": 0.0362, "step": 10140 }, { "epoch": 0.8919156414762741, "grad_norm": 0.47136616706848145, "learning_rate": 2.0298e-05, "loss": 0.0359, "step": 10150 }, { "epoch": 0.8927943760984183, "grad_norm": 0.7404363751411438, "learning_rate": 2.0318e-05, "loss": 0.0353, "step": 10160 }, { "epoch": 0.8936731107205624, "grad_norm": 0.8132708072662354, "learning_rate": 2.0338e-05, "loss": 0.0402, "step": 10170 }, { "epoch": 0.8945518453427065, "grad_norm": 0.737639844417572, "learning_rate": 2.0358e-05, "loss": 0.0369, "step": 10180 }, { "epoch": 0.8954305799648506, "grad_norm": 0.6776847839355469, "learning_rate": 2.0378e-05, "loss": 0.0339, "step": 10190 }, { "epoch": 0.8963093145869947, "grad_norm": 0.5755254030227661, "learning_rate": 2.0398e-05, "loss": 0.0346, "step": 10200 }, { "epoch": 0.8971880492091389, "grad_norm": 0.6927011013031006, "learning_rate": 2.0418e-05, "loss": 0.037, "step": 10210 }, { "epoch": 0.8980667838312829, "grad_norm": 0.8353449702262878, "learning_rate": 2.0438000000000003e-05, "loss": 0.0392, "step": 10220 }, { "epoch": 0.8989455184534271, "grad_norm": 0.6027050018310547, "learning_rate": 2.0458000000000003e-05, "loss": 0.0358, "step": 10230 }, { "epoch": 0.8998242530755711, "grad_norm": 0.656103789806366, "learning_rate": 2.0478e-05, "loss": 0.0364, "step": 10240 }, { "epoch": 0.9007029876977153, "grad_norm": 0.6603544354438782, "learning_rate": 2.0498e-05, "loss": 0.0345, "step": 10250 }, { "epoch": 0.9015817223198594, "grad_norm": 0.5290365815162659, "learning_rate": 2.0518000000000002e-05, "loss": 0.0374, "step": 10260 }, { "epoch": 0.9024604569420035, "grad_norm": 0.5377839207649231, "learning_rate": 2.0538000000000002e-05, "loss": 0.036, "step": 10270 }, { "epoch": 0.9033391915641477, "grad_norm": 0.7930485606193542, "learning_rate": 2.0558000000000002e-05, "loss": 0.0385, "step": 10280 }, { "epoch": 0.9042179261862917, "grad_norm": 0.8507276773452759, "learning_rate": 2.0578e-05, "loss": 0.0382, "step": 10290 }, { "epoch": 0.9050966608084359, "grad_norm": 0.6501576900482178, "learning_rate": 2.0598e-05, "loss": 0.0396, "step": 10300 }, { "epoch": 0.9059753954305799, "grad_norm": 0.7334262728691101, "learning_rate": 2.0618e-05, "loss": 0.0372, "step": 10310 }, { "epoch": 0.9068541300527241, "grad_norm": 0.7140448689460754, "learning_rate": 2.0638e-05, "loss": 0.0326, "step": 10320 }, { "epoch": 0.9077328646748682, "grad_norm": 0.6385663151741028, "learning_rate": 2.0658e-05, "loss": 0.0388, "step": 10330 }, { "epoch": 0.9086115992970123, "grad_norm": 0.7276721596717834, "learning_rate": 2.0678e-05, "loss": 0.036, "step": 10340 }, { "epoch": 0.9094903339191565, "grad_norm": 0.509782075881958, "learning_rate": 2.0698e-05, "loss": 0.0385, "step": 10350 }, { "epoch": 0.9103690685413005, "grad_norm": 0.5569102764129639, "learning_rate": 2.0718e-05, "loss": 0.036, "step": 10360 }, { "epoch": 0.9112478031634447, "grad_norm": 0.6110565066337585, "learning_rate": 2.0738e-05, "loss": 0.0365, "step": 10370 }, { "epoch": 0.9121265377855887, "grad_norm": 0.6353746056556702, "learning_rate": 2.0758e-05, "loss": 0.0352, "step": 10380 }, { "epoch": 0.9130052724077329, "grad_norm": 0.8047717809677124, "learning_rate": 2.0778e-05, "loss": 0.0376, "step": 10390 }, { "epoch": 0.9138840070298769, "grad_norm": 0.7215612530708313, "learning_rate": 2.0798e-05, "loss": 0.0404, "step": 10400 }, { "epoch": 0.9147627416520211, "grad_norm": 0.7832387089729309, "learning_rate": 2.0818000000000003e-05, "loss": 0.0368, "step": 10410 }, { "epoch": 0.9156414762741653, "grad_norm": 0.7010772824287415, "learning_rate": 2.0838000000000003e-05, "loss": 0.0362, "step": 10420 }, { "epoch": 0.9165202108963093, "grad_norm": 0.5065191388130188, "learning_rate": 2.0858e-05, "loss": 0.0342, "step": 10430 }, { "epoch": 0.9173989455184535, "grad_norm": 0.7418439984321594, "learning_rate": 2.0878e-05, "loss": 0.0326, "step": 10440 }, { "epoch": 0.9182776801405975, "grad_norm": 0.7441498637199402, "learning_rate": 2.0898000000000002e-05, "loss": 0.0336, "step": 10450 }, { "epoch": 0.9191564147627417, "grad_norm": 0.6294314861297607, "learning_rate": 2.0918000000000002e-05, "loss": 0.0362, "step": 10460 }, { "epoch": 0.9200351493848857, "grad_norm": 0.502199113368988, "learning_rate": 2.0938000000000002e-05, "loss": 0.0349, "step": 10470 }, { "epoch": 0.9209138840070299, "grad_norm": 0.9756407737731934, "learning_rate": 2.0957999999999998e-05, "loss": 0.0356, "step": 10480 }, { "epoch": 0.9217926186291739, "grad_norm": 0.8378863334655762, "learning_rate": 2.0978e-05, "loss": 0.0374, "step": 10490 }, { "epoch": 0.9226713532513181, "grad_norm": 0.8449456691741943, "learning_rate": 2.0998e-05, "loss": 0.0411, "step": 10500 }, { "epoch": 0.9235500878734623, "grad_norm": 0.9215894341468811, "learning_rate": 2.1018e-05, "loss": 0.0331, "step": 10510 }, { "epoch": 0.9244288224956063, "grad_norm": 0.806915819644928, "learning_rate": 2.1038e-05, "loss": 0.0355, "step": 10520 }, { "epoch": 0.9253075571177505, "grad_norm": 0.6396540999412537, "learning_rate": 2.1058e-05, "loss": 0.0377, "step": 10530 }, { "epoch": 0.9261862917398945, "grad_norm": 0.870961606502533, "learning_rate": 2.1078e-05, "loss": 0.0395, "step": 10540 }, { "epoch": 0.9270650263620387, "grad_norm": 1.0244332551956177, "learning_rate": 2.1098e-05, "loss": 0.0375, "step": 10550 }, { "epoch": 0.9279437609841827, "grad_norm": 0.5943139791488647, "learning_rate": 2.1118e-05, "loss": 0.0364, "step": 10560 }, { "epoch": 0.9288224956063269, "grad_norm": 0.9526610374450684, "learning_rate": 2.1138000000000003e-05, "loss": 0.0361, "step": 10570 }, { "epoch": 0.929701230228471, "grad_norm": 0.7066686749458313, "learning_rate": 2.1158e-05, "loss": 0.0354, "step": 10580 }, { "epoch": 0.9305799648506151, "grad_norm": 0.6154652237892151, "learning_rate": 2.1178e-05, "loss": 0.0352, "step": 10590 }, { "epoch": 0.9314586994727593, "grad_norm": 0.7722954154014587, "learning_rate": 2.1198000000000003e-05, "loss": 0.0353, "step": 10600 }, { "epoch": 0.9323374340949033, "grad_norm": 0.623588502407074, "learning_rate": 2.1218000000000003e-05, "loss": 0.0342, "step": 10610 }, { "epoch": 0.9332161687170475, "grad_norm": 0.6622728109359741, "learning_rate": 2.1238000000000002e-05, "loss": 0.0374, "step": 10620 }, { "epoch": 0.9340949033391915, "grad_norm": 0.6658490300178528, "learning_rate": 2.1258e-05, "loss": 0.0382, "step": 10630 }, { "epoch": 0.9349736379613357, "grad_norm": 0.7851880788803101, "learning_rate": 2.1278000000000002e-05, "loss": 0.0407, "step": 10640 }, { "epoch": 0.9358523725834798, "grad_norm": 0.5121892094612122, "learning_rate": 2.1298000000000002e-05, "loss": 0.0379, "step": 10650 }, { "epoch": 0.9367311072056239, "grad_norm": 0.5957114696502686, "learning_rate": 2.1318e-05, "loss": 0.0349, "step": 10660 }, { "epoch": 0.937609841827768, "grad_norm": 0.635485827922821, "learning_rate": 2.1337999999999998e-05, "loss": 0.0374, "step": 10670 }, { "epoch": 0.9384885764499121, "grad_norm": 0.7365423440933228, "learning_rate": 2.1358e-05, "loss": 0.0385, "step": 10680 }, { "epoch": 0.9393673110720563, "grad_norm": 0.7242499589920044, "learning_rate": 2.1378e-05, "loss": 0.0386, "step": 10690 }, { "epoch": 0.9402460456942003, "grad_norm": 0.8411391973495483, "learning_rate": 2.1398e-05, "loss": 0.0352, "step": 10700 }, { "epoch": 0.9411247803163445, "grad_norm": 0.6560684442520142, "learning_rate": 2.1418e-05, "loss": 0.0367, "step": 10710 }, { "epoch": 0.9420035149384886, "grad_norm": 0.9359292387962341, "learning_rate": 2.1438e-05, "loss": 0.0377, "step": 10720 }, { "epoch": 0.9428822495606327, "grad_norm": 0.6028962135314941, "learning_rate": 2.1458e-05, "loss": 0.0366, "step": 10730 }, { "epoch": 0.9437609841827768, "grad_norm": 0.6565437316894531, "learning_rate": 2.1478e-05, "loss": 0.0349, "step": 10740 }, { "epoch": 0.9446397188049209, "grad_norm": 0.5641800165176392, "learning_rate": 2.1498e-05, "loss": 0.0363, "step": 10750 }, { "epoch": 0.945518453427065, "grad_norm": 0.7129019498825073, "learning_rate": 2.1518000000000003e-05, "loss": 0.037, "step": 10760 }, { "epoch": 0.9463971880492091, "grad_norm": 0.6075322031974792, "learning_rate": 2.1538e-05, "loss": 0.034, "step": 10770 }, { "epoch": 0.9472759226713533, "grad_norm": 0.4578143060207367, "learning_rate": 2.1558e-05, "loss": 0.0326, "step": 10780 }, { "epoch": 0.9481546572934973, "grad_norm": 0.6707172989845276, "learning_rate": 2.1578000000000002e-05, "loss": 0.0358, "step": 10790 }, { "epoch": 0.9490333919156415, "grad_norm": 0.7991960644721985, "learning_rate": 2.1598000000000002e-05, "loss": 0.0359, "step": 10800 }, { "epoch": 0.9499121265377856, "grad_norm": 0.7062135934829712, "learning_rate": 2.1618000000000002e-05, "loss": 0.0361, "step": 10810 }, { "epoch": 0.9507908611599297, "grad_norm": 0.757217288017273, "learning_rate": 2.1638e-05, "loss": 0.0353, "step": 10820 }, { "epoch": 0.9516695957820738, "grad_norm": 0.5408464074134827, "learning_rate": 2.1658e-05, "loss": 0.034, "step": 10830 }, { "epoch": 0.9525483304042179, "grad_norm": 0.6284731030464172, "learning_rate": 2.1678e-05, "loss": 0.0346, "step": 10840 }, { "epoch": 0.953427065026362, "grad_norm": 0.5471599698066711, "learning_rate": 2.1698e-05, "loss": 0.0367, "step": 10850 }, { "epoch": 0.9543057996485061, "grad_norm": 0.5463921427726746, "learning_rate": 2.1718e-05, "loss": 0.0334, "step": 10860 }, { "epoch": 0.9551845342706503, "grad_norm": 0.7230879068374634, "learning_rate": 2.1738e-05, "loss": 0.036, "step": 10870 }, { "epoch": 0.9560632688927944, "grad_norm": 0.5279799699783325, "learning_rate": 2.1758e-05, "loss": 0.0368, "step": 10880 }, { "epoch": 0.9569420035149385, "grad_norm": 0.7827208042144775, "learning_rate": 2.1778e-05, "loss": 0.0348, "step": 10890 }, { "epoch": 0.9578207381370826, "grad_norm": 0.9197952747344971, "learning_rate": 2.1798e-05, "loss": 0.0361, "step": 10900 }, { "epoch": 0.9586994727592267, "grad_norm": 0.5793194770812988, "learning_rate": 2.1818000000000004e-05, "loss": 0.0359, "step": 10910 }, { "epoch": 0.9595782073813708, "grad_norm": 0.7226101160049438, "learning_rate": 2.1838e-05, "loss": 0.0355, "step": 10920 }, { "epoch": 0.960456942003515, "grad_norm": 0.7634719014167786, "learning_rate": 2.1858e-05, "loss": 0.0371, "step": 10930 }, { "epoch": 0.961335676625659, "grad_norm": 0.6101735234260559, "learning_rate": 2.1878e-05, "loss": 0.0359, "step": 10940 }, { "epoch": 0.9622144112478032, "grad_norm": 0.5591304302215576, "learning_rate": 2.1898000000000003e-05, "loss": 0.0342, "step": 10950 }, { "epoch": 0.9630931458699473, "grad_norm": 0.8403410911560059, "learning_rate": 2.1918000000000003e-05, "loss": 0.0326, "step": 10960 }, { "epoch": 0.9639718804920914, "grad_norm": 0.6004418134689331, "learning_rate": 2.1938e-05, "loss": 0.038, "step": 10970 }, { "epoch": 0.9648506151142355, "grad_norm": 0.7476364374160767, "learning_rate": 2.1958000000000002e-05, "loss": 0.0339, "step": 10980 }, { "epoch": 0.9657293497363796, "grad_norm": 0.6138219237327576, "learning_rate": 2.1978000000000002e-05, "loss": 0.0385, "step": 10990 }, { "epoch": 0.9666080843585237, "grad_norm": 0.4401990473270416, "learning_rate": 2.1998000000000002e-05, "loss": 0.0351, "step": 11000 }, { "epoch": 0.9674868189806678, "grad_norm": 0.5704279541969299, "learning_rate": 2.2017999999999998e-05, "loss": 0.0363, "step": 11010 }, { "epoch": 0.968365553602812, "grad_norm": 0.7811922430992126, "learning_rate": 2.2038e-05, "loss": 0.0362, "step": 11020 }, { "epoch": 0.9692442882249561, "grad_norm": 0.6717191338539124, "learning_rate": 2.2058e-05, "loss": 0.0351, "step": 11030 }, { "epoch": 0.9701230228471002, "grad_norm": 0.7580554485321045, "learning_rate": 2.2078e-05, "loss": 0.0362, "step": 11040 }, { "epoch": 0.9710017574692443, "grad_norm": 0.6775755286216736, "learning_rate": 2.2098e-05, "loss": 0.0359, "step": 11050 }, { "epoch": 0.9718804920913884, "grad_norm": 0.6980703473091125, "learning_rate": 2.2118e-05, "loss": 0.0367, "step": 11060 }, { "epoch": 0.9727592267135325, "grad_norm": 0.5485110282897949, "learning_rate": 2.2138e-05, "loss": 0.0338, "step": 11070 }, { "epoch": 0.9736379613356766, "grad_norm": 0.6423590183258057, "learning_rate": 2.2158e-05, "loss": 0.0364, "step": 11080 }, { "epoch": 0.9745166959578208, "grad_norm": 0.6257197260856628, "learning_rate": 2.2178e-05, "loss": 0.0343, "step": 11090 }, { "epoch": 0.9753954305799648, "grad_norm": 0.6757160425186157, "learning_rate": 2.2198000000000003e-05, "loss": 0.0348, "step": 11100 }, { "epoch": 0.976274165202109, "grad_norm": 0.5027305483818054, "learning_rate": 2.2218e-05, "loss": 0.0355, "step": 11110 }, { "epoch": 0.9771528998242531, "grad_norm": 0.7978763580322266, "learning_rate": 2.2238e-05, "loss": 0.0357, "step": 11120 }, { "epoch": 0.9780316344463972, "grad_norm": 0.6257880926132202, "learning_rate": 2.2258000000000003e-05, "loss": 0.0361, "step": 11130 }, { "epoch": 0.9789103690685413, "grad_norm": 0.7718781232833862, "learning_rate": 2.2278000000000003e-05, "loss": 0.0348, "step": 11140 }, { "epoch": 0.9797891036906854, "grad_norm": 0.7559919357299805, "learning_rate": 2.2298000000000002e-05, "loss": 0.0344, "step": 11150 }, { "epoch": 0.9806678383128296, "grad_norm": 0.7872924208641052, "learning_rate": 2.2318e-05, "loss": 0.037, "step": 11160 }, { "epoch": 0.9815465729349736, "grad_norm": 0.8317897915840149, "learning_rate": 2.2338000000000002e-05, "loss": 0.0354, "step": 11170 }, { "epoch": 0.9824253075571178, "grad_norm": 0.901872456073761, "learning_rate": 2.2358000000000002e-05, "loss": 0.0339, "step": 11180 }, { "epoch": 0.9833040421792618, "grad_norm": 0.5760416388511658, "learning_rate": 2.2378e-05, "loss": 0.0366, "step": 11190 }, { "epoch": 0.984182776801406, "grad_norm": 0.5457079410552979, "learning_rate": 2.2398e-05, "loss": 0.0364, "step": 11200 }, { "epoch": 0.9850615114235501, "grad_norm": 0.6340052485466003, "learning_rate": 2.2418e-05, "loss": 0.0366, "step": 11210 }, { "epoch": 0.9859402460456942, "grad_norm": 0.6269063353538513, "learning_rate": 2.2438e-05, "loss": 0.0331, "step": 11220 }, { "epoch": 0.9868189806678384, "grad_norm": 0.8265933394432068, "learning_rate": 2.2458e-05, "loss": 0.042, "step": 11230 }, { "epoch": 0.9876977152899824, "grad_norm": 0.5357298851013184, "learning_rate": 2.2478e-05, "loss": 0.0326, "step": 11240 }, { "epoch": 0.9885764499121266, "grad_norm": 0.6196725368499756, "learning_rate": 2.2498000000000004e-05, "loss": 0.0345, "step": 11250 }, { "epoch": 0.9894551845342706, "grad_norm": 0.5149360299110413, "learning_rate": 2.2518e-05, "loss": 0.0329, "step": 11260 }, { "epoch": 0.9903339191564148, "grad_norm": 0.5315958261489868, "learning_rate": 2.2538e-05, "loss": 0.0331, "step": 11270 }, { "epoch": 0.9912126537785588, "grad_norm": 0.5073951482772827, "learning_rate": 2.2558e-05, "loss": 0.0333, "step": 11280 }, { "epoch": 0.992091388400703, "grad_norm": 0.6162111163139343, "learning_rate": 2.2578000000000003e-05, "loss": 0.0333, "step": 11290 }, { "epoch": 0.9929701230228472, "grad_norm": 0.4642693102359772, "learning_rate": 2.2598e-05, "loss": 0.0338, "step": 11300 }, { "epoch": 0.9938488576449912, "grad_norm": 0.6230454444885254, "learning_rate": 2.2618e-05, "loss": 0.0341, "step": 11310 }, { "epoch": 0.9947275922671354, "grad_norm": 0.6759862899780273, "learning_rate": 2.2638000000000002e-05, "loss": 0.0339, "step": 11320 }, { "epoch": 0.9956063268892794, "grad_norm": 0.5988819003105164, "learning_rate": 2.2658000000000002e-05, "loss": 0.0337, "step": 11330 }, { "epoch": 0.9964850615114236, "grad_norm": 0.48579996824264526, "learning_rate": 2.2678000000000002e-05, "loss": 0.0336, "step": 11340 }, { "epoch": 0.9973637961335676, "grad_norm": 0.4851973056793213, "learning_rate": 2.2698e-05, "loss": 0.0366, "step": 11350 }, { "epoch": 0.9982425307557118, "grad_norm": 0.494454562664032, "learning_rate": 2.2718000000000002e-05, "loss": 0.0361, "step": 11360 }, { "epoch": 0.9991212653778558, "grad_norm": 0.7050285339355469, "learning_rate": 2.2738e-05, "loss": 0.0368, "step": 11370 }, { "epoch": 1.0, "grad_norm": 0.781132698059082, "learning_rate": 2.2758e-05, "loss": 0.0366, "step": 11380 }, { "epoch": 1.0008787346221442, "grad_norm": 0.5526796579360962, "learning_rate": 2.2778e-05, "loss": 0.0354, "step": 11390 }, { "epoch": 1.0017574692442883, "grad_norm": 0.8166113495826721, "learning_rate": 2.2798e-05, "loss": 0.0363, "step": 11400 }, { "epoch": 1.0026362038664323, "grad_norm": 0.6619054675102234, "learning_rate": 2.2818e-05, "loss": 0.0375, "step": 11410 }, { "epoch": 1.0035149384885764, "grad_norm": 0.9565250873565674, "learning_rate": 2.2838e-05, "loss": 0.0368, "step": 11420 }, { "epoch": 1.0043936731107206, "grad_norm": 0.5230754613876343, "learning_rate": 2.2858e-05, "loss": 0.0349, "step": 11430 }, { "epoch": 1.0052724077328647, "grad_norm": 0.6461597084999084, "learning_rate": 2.2878000000000004e-05, "loss": 0.0337, "step": 11440 }, { "epoch": 1.0061511423550087, "grad_norm": 0.44210436940193176, "learning_rate": 2.2898e-05, "loss": 0.0359, "step": 11450 }, { "epoch": 1.0070298769771528, "grad_norm": 0.6429334282875061, "learning_rate": 2.2918e-05, "loss": 0.0348, "step": 11460 }, { "epoch": 1.007908611599297, "grad_norm": 0.3853527903556824, "learning_rate": 2.2938e-05, "loss": 0.0318, "step": 11470 }, { "epoch": 1.0087873462214412, "grad_norm": 0.5267785787582397, "learning_rate": 2.2958000000000003e-05, "loss": 0.0316, "step": 11480 }, { "epoch": 1.0096660808435853, "grad_norm": 0.5622298121452332, "learning_rate": 2.2978000000000003e-05, "loss": 0.0327, "step": 11490 }, { "epoch": 1.0105448154657293, "grad_norm": 0.6146053075790405, "learning_rate": 2.2998e-05, "loss": 0.0335, "step": 11500 }, { "epoch": 1.0114235500878734, "grad_norm": 0.4289880692958832, "learning_rate": 2.3018000000000002e-05, "loss": 0.0339, "step": 11510 }, { "epoch": 1.0123022847100176, "grad_norm": 0.5256696343421936, "learning_rate": 2.3038000000000002e-05, "loss": 0.0344, "step": 11520 }, { "epoch": 1.0131810193321618, "grad_norm": 0.5782315731048584, "learning_rate": 2.3058000000000002e-05, "loss": 0.0373, "step": 11530 }, { "epoch": 1.0140597539543057, "grad_norm": 0.6777941584587097, "learning_rate": 2.3078e-05, "loss": 0.0368, "step": 11540 }, { "epoch": 1.0149384885764499, "grad_norm": 0.5793466567993164, "learning_rate": 2.3098e-05, "loss": 0.036, "step": 11550 }, { "epoch": 1.015817223198594, "grad_norm": 0.8668937087059021, "learning_rate": 2.3118e-05, "loss": 0.0364, "step": 11560 }, { "epoch": 1.0166959578207382, "grad_norm": 0.6745285391807556, "learning_rate": 2.3138e-05, "loss": 0.033, "step": 11570 }, { "epoch": 1.0175746924428823, "grad_norm": 0.5228933095932007, "learning_rate": 2.3158e-05, "loss": 0.0367, "step": 11580 }, { "epoch": 1.0184534270650263, "grad_norm": 0.9872060418128967, "learning_rate": 2.3178000000000004e-05, "loss": 0.0349, "step": 11590 }, { "epoch": 1.0193321616871704, "grad_norm": 0.7311321496963501, "learning_rate": 2.3198e-05, "loss": 0.0336, "step": 11600 }, { "epoch": 1.0202108963093146, "grad_norm": 0.4466843903064728, "learning_rate": 2.3218e-05, "loss": 0.0375, "step": 11610 }, { "epoch": 1.0210896309314588, "grad_norm": 0.7241096496582031, "learning_rate": 2.3238e-05, "loss": 0.0397, "step": 11620 }, { "epoch": 1.0219683655536027, "grad_norm": 0.7272554636001587, "learning_rate": 2.3258000000000003e-05, "loss": 0.0401, "step": 11630 }, { "epoch": 1.0228471001757469, "grad_norm": 0.5368269681930542, "learning_rate": 2.3278e-05, "loss": 0.0365, "step": 11640 }, { "epoch": 1.023725834797891, "grad_norm": 0.6514606475830078, "learning_rate": 2.3298e-05, "loss": 0.0363, "step": 11650 }, { "epoch": 1.0246045694200352, "grad_norm": 0.6050959229469299, "learning_rate": 2.3318e-05, "loss": 0.0356, "step": 11660 }, { "epoch": 1.0254833040421794, "grad_norm": 0.5975491404533386, "learning_rate": 2.3338000000000003e-05, "loss": 0.0356, "step": 11670 }, { "epoch": 1.0263620386643233, "grad_norm": 0.6251665949821472, "learning_rate": 2.3358000000000002e-05, "loss": 0.0331, "step": 11680 }, { "epoch": 1.0272407732864675, "grad_norm": 0.541267454624176, "learning_rate": 2.3378e-05, "loss": 0.0332, "step": 11690 }, { "epoch": 1.0281195079086116, "grad_norm": 0.5702481269836426, "learning_rate": 2.3398000000000002e-05, "loss": 0.0363, "step": 11700 }, { "epoch": 1.0289982425307558, "grad_norm": 0.48166361451148987, "learning_rate": 2.3418000000000002e-05, "loss": 0.0342, "step": 11710 }, { "epoch": 1.0298769771528997, "grad_norm": 0.7461867928504944, "learning_rate": 2.3438e-05, "loss": 0.0339, "step": 11720 }, { "epoch": 1.0307557117750439, "grad_norm": 0.6627786755561829, "learning_rate": 2.3458e-05, "loss": 0.0364, "step": 11730 }, { "epoch": 1.031634446397188, "grad_norm": 0.3975735306739807, "learning_rate": 2.3478e-05, "loss": 0.0358, "step": 11740 }, { "epoch": 1.0325131810193322, "grad_norm": 0.5368557572364807, "learning_rate": 2.3498e-05, "loss": 0.0366, "step": 11750 }, { "epoch": 1.0333919156414764, "grad_norm": 0.4199780523777008, "learning_rate": 2.3518e-05, "loss": 0.0345, "step": 11760 }, { "epoch": 1.0342706502636203, "grad_norm": 0.42523059248924255, "learning_rate": 2.3538e-05, "loss": 0.0325, "step": 11770 }, { "epoch": 1.0351493848857645, "grad_norm": 0.606063723564148, "learning_rate": 2.3558000000000004e-05, "loss": 0.0306, "step": 11780 }, { "epoch": 1.0360281195079086, "grad_norm": 0.5297714471817017, "learning_rate": 2.3578e-05, "loss": 0.0337, "step": 11790 }, { "epoch": 1.0369068541300528, "grad_norm": 0.6080306172370911, "learning_rate": 2.3598e-05, "loss": 0.0358, "step": 11800 }, { "epoch": 1.0377855887521967, "grad_norm": 0.5932711362838745, "learning_rate": 2.3618e-05, "loss": 0.0334, "step": 11810 }, { "epoch": 1.038664323374341, "grad_norm": 0.6557393670082092, "learning_rate": 2.3638000000000003e-05, "loss": 0.0345, "step": 11820 }, { "epoch": 1.039543057996485, "grad_norm": 0.5284473896026611, "learning_rate": 2.3658000000000003e-05, "loss": 0.0338, "step": 11830 }, { "epoch": 1.0404217926186292, "grad_norm": 0.5884192585945129, "learning_rate": 2.3678e-05, "loss": 0.0351, "step": 11840 }, { "epoch": 1.0413005272407734, "grad_norm": 0.4392316937446594, "learning_rate": 2.3698e-05, "loss": 0.0326, "step": 11850 }, { "epoch": 1.0421792618629173, "grad_norm": 0.6492966413497925, "learning_rate": 2.3718000000000002e-05, "loss": 0.0324, "step": 11860 }, { "epoch": 1.0430579964850615, "grad_norm": 0.6499637961387634, "learning_rate": 2.3738000000000002e-05, "loss": 0.0344, "step": 11870 }, { "epoch": 1.0439367311072056, "grad_norm": 0.5523832440376282, "learning_rate": 2.3758000000000002e-05, "loss": 0.0357, "step": 11880 }, { "epoch": 1.0448154657293498, "grad_norm": 0.5314735174179077, "learning_rate": 2.3778000000000002e-05, "loss": 0.0324, "step": 11890 }, { "epoch": 1.0456942003514937, "grad_norm": 0.5756626725196838, "learning_rate": 2.3798e-05, "loss": 0.033, "step": 11900 }, { "epoch": 1.046572934973638, "grad_norm": 0.5254719257354736, "learning_rate": 2.3818e-05, "loss": 0.0331, "step": 11910 }, { "epoch": 1.047451669595782, "grad_norm": 0.7090378403663635, "learning_rate": 2.3838e-05, "loss": 0.0345, "step": 11920 }, { "epoch": 1.0483304042179262, "grad_norm": 0.4748328626155853, "learning_rate": 2.3858e-05, "loss": 0.0393, "step": 11930 }, { "epoch": 1.0492091388400704, "grad_norm": 0.776798665523529, "learning_rate": 2.3878e-05, "loss": 0.0372, "step": 11940 }, { "epoch": 1.0500878734622143, "grad_norm": 0.5411202311515808, "learning_rate": 2.3898e-05, "loss": 0.032, "step": 11950 }, { "epoch": 1.0509666080843585, "grad_norm": 0.5582253336906433, "learning_rate": 2.3918e-05, "loss": 0.0367, "step": 11960 }, { "epoch": 1.0518453427065027, "grad_norm": 0.45917436480522156, "learning_rate": 2.3938000000000004e-05, "loss": 0.0354, "step": 11970 }, { "epoch": 1.0527240773286468, "grad_norm": 0.6220127940177917, "learning_rate": 2.3958e-05, "loss": 0.0341, "step": 11980 }, { "epoch": 1.0536028119507908, "grad_norm": 0.5291969776153564, "learning_rate": 2.3978e-05, "loss": 0.0325, "step": 11990 }, { "epoch": 1.054481546572935, "grad_norm": 0.6090192198753357, "learning_rate": 2.3998e-05, "loss": 0.0371, "step": 12000 }, { "epoch": 1.055360281195079, "grad_norm": 0.5772424340248108, "learning_rate": 2.4018000000000003e-05, "loss": 0.0362, "step": 12010 }, { "epoch": 1.0562390158172232, "grad_norm": 0.7474581599235535, "learning_rate": 2.4038000000000003e-05, "loss": 0.0323, "step": 12020 }, { "epoch": 1.0571177504393674, "grad_norm": 0.8087471723556519, "learning_rate": 2.4058e-05, "loss": 0.0341, "step": 12030 }, { "epoch": 1.0579964850615113, "grad_norm": 0.6531078219413757, "learning_rate": 2.4078000000000002e-05, "loss": 0.0335, "step": 12040 }, { "epoch": 1.0588752196836555, "grad_norm": 0.42805245518684387, "learning_rate": 2.4098000000000002e-05, "loss": 0.034, "step": 12050 }, { "epoch": 1.0597539543057997, "grad_norm": 0.4970327615737915, "learning_rate": 2.4118000000000002e-05, "loss": 0.0349, "step": 12060 }, { "epoch": 1.0606326889279438, "grad_norm": 0.42906785011291504, "learning_rate": 2.4138e-05, "loss": 0.0322, "step": 12070 }, { "epoch": 1.0615114235500878, "grad_norm": 0.6533353328704834, "learning_rate": 2.4158e-05, "loss": 0.0341, "step": 12080 }, { "epoch": 1.062390158172232, "grad_norm": 0.8055566549301147, "learning_rate": 2.4178e-05, "loss": 0.0329, "step": 12090 }, { "epoch": 1.063268892794376, "grad_norm": 0.6014014482498169, "learning_rate": 2.4198e-05, "loss": 0.0348, "step": 12100 }, { "epoch": 1.0641476274165202, "grad_norm": 0.7228134870529175, "learning_rate": 2.4218e-05, "loss": 0.0343, "step": 12110 }, { "epoch": 1.0650263620386644, "grad_norm": 0.8864094018936157, "learning_rate": 2.4238e-05, "loss": 0.0335, "step": 12120 }, { "epoch": 1.0659050966608083, "grad_norm": 0.5909647941589355, "learning_rate": 2.4258e-05, "loss": 0.0343, "step": 12130 }, { "epoch": 1.0667838312829525, "grad_norm": 0.6480389833450317, "learning_rate": 2.4278e-05, "loss": 0.0334, "step": 12140 }, { "epoch": 1.0676625659050967, "grad_norm": 0.56613689661026, "learning_rate": 2.4298e-05, "loss": 0.0335, "step": 12150 }, { "epoch": 1.0685413005272408, "grad_norm": 0.4483303725719452, "learning_rate": 2.4318000000000003e-05, "loss": 0.0346, "step": 12160 }, { "epoch": 1.0694200351493848, "grad_norm": 0.7536498308181763, "learning_rate": 2.4338000000000003e-05, "loss": 0.0334, "step": 12170 }, { "epoch": 1.070298769771529, "grad_norm": 0.7894177436828613, "learning_rate": 2.4358e-05, "loss": 0.0358, "step": 12180 }, { "epoch": 1.071177504393673, "grad_norm": 0.6240437030792236, "learning_rate": 2.4378e-05, "loss": 0.0337, "step": 12190 }, { "epoch": 1.0720562390158173, "grad_norm": 0.3538649380207062, "learning_rate": 2.4398000000000003e-05, "loss": 0.0349, "step": 12200 }, { "epoch": 1.0729349736379614, "grad_norm": 0.6592252254486084, "learning_rate": 2.4418000000000002e-05, "loss": 0.0363, "step": 12210 }, { "epoch": 1.0738137082601054, "grad_norm": 0.7146226763725281, "learning_rate": 2.4438e-05, "loss": 0.0322, "step": 12220 }, { "epoch": 1.0746924428822495, "grad_norm": 0.5937340259552002, "learning_rate": 2.4458000000000002e-05, "loss": 0.0343, "step": 12230 }, { "epoch": 1.0755711775043937, "grad_norm": 0.5461322069168091, "learning_rate": 2.4478000000000002e-05, "loss": 0.0354, "step": 12240 }, { "epoch": 1.0764499121265378, "grad_norm": 0.4049326479434967, "learning_rate": 2.4498e-05, "loss": 0.0309, "step": 12250 }, { "epoch": 1.0773286467486818, "grad_norm": 0.5037710666656494, "learning_rate": 2.4518e-05, "loss": 0.0346, "step": 12260 }, { "epoch": 1.078207381370826, "grad_norm": 0.5660841464996338, "learning_rate": 2.4538e-05, "loss": 0.0366, "step": 12270 }, { "epoch": 1.07908611599297, "grad_norm": 0.6561073064804077, "learning_rate": 2.4558e-05, "loss": 0.033, "step": 12280 }, { "epoch": 1.0799648506151143, "grad_norm": 0.6088632941246033, "learning_rate": 2.4578e-05, "loss": 0.0329, "step": 12290 }, { "epoch": 1.0808435852372584, "grad_norm": 0.8530681133270264, "learning_rate": 2.4598e-05, "loss": 0.0378, "step": 12300 }, { "epoch": 1.0817223198594024, "grad_norm": 0.486627995967865, "learning_rate": 2.4618000000000004e-05, "loss": 0.0359, "step": 12310 }, { "epoch": 1.0826010544815465, "grad_norm": 0.5641435384750366, "learning_rate": 2.4638e-05, "loss": 0.0327, "step": 12320 }, { "epoch": 1.0834797891036907, "grad_norm": 0.6868342161178589, "learning_rate": 2.4658e-05, "loss": 0.0315, "step": 12330 }, { "epoch": 1.0843585237258349, "grad_norm": 0.4865725636482239, "learning_rate": 2.4678e-05, "loss": 0.0321, "step": 12340 }, { "epoch": 1.0852372583479788, "grad_norm": 0.6847585439682007, "learning_rate": 2.4698000000000003e-05, "loss": 0.035, "step": 12350 }, { "epoch": 1.086115992970123, "grad_norm": 0.5146521925926208, "learning_rate": 2.4718000000000003e-05, "loss": 0.0335, "step": 12360 }, { "epoch": 1.0869947275922671, "grad_norm": 0.5456010103225708, "learning_rate": 2.4738e-05, "loss": 0.0307, "step": 12370 }, { "epoch": 1.0878734622144113, "grad_norm": 0.5613805651664734, "learning_rate": 2.4758e-05, "loss": 0.0315, "step": 12380 }, { "epoch": 1.0887521968365554, "grad_norm": 0.3833884298801422, "learning_rate": 2.4778000000000002e-05, "loss": 0.0322, "step": 12390 }, { "epoch": 1.0896309314586994, "grad_norm": 0.620926022529602, "learning_rate": 2.4798000000000002e-05, "loss": 0.0327, "step": 12400 }, { "epoch": 1.0905096660808435, "grad_norm": 0.43322327733039856, "learning_rate": 2.4818000000000002e-05, "loss": 0.0326, "step": 12410 }, { "epoch": 1.0913884007029877, "grad_norm": 0.8584910035133362, "learning_rate": 2.4838000000000002e-05, "loss": 0.0341, "step": 12420 }, { "epoch": 1.0922671353251319, "grad_norm": 0.5527920722961426, "learning_rate": 2.4858e-05, "loss": 0.0371, "step": 12430 }, { "epoch": 1.0931458699472758, "grad_norm": 0.5108375549316406, "learning_rate": 2.4878e-05, "loss": 0.0337, "step": 12440 }, { "epoch": 1.09402460456942, "grad_norm": 0.5109298825263977, "learning_rate": 2.4898e-05, "loss": 0.0354, "step": 12450 }, { "epoch": 1.0949033391915641, "grad_norm": 0.6599330306053162, "learning_rate": 2.4918e-05, "loss": 0.0317, "step": 12460 }, { "epoch": 1.0957820738137083, "grad_norm": 0.5916787385940552, "learning_rate": 2.4938e-05, "loss": 0.0377, "step": 12470 }, { "epoch": 1.0966608084358525, "grad_norm": 0.415406733751297, "learning_rate": 2.4958e-05, "loss": 0.0339, "step": 12480 }, { "epoch": 1.0975395430579964, "grad_norm": 0.46630552411079407, "learning_rate": 2.4978e-05, "loss": 0.0377, "step": 12490 }, { "epoch": 1.0984182776801406, "grad_norm": 0.7234543561935425, "learning_rate": 2.4998000000000004e-05, "loss": 0.034, "step": 12500 }, { "epoch": 1.0992970123022847, "grad_norm": 0.492112398147583, "learning_rate": 2.5018000000000003e-05, "loss": 0.0335, "step": 12510 }, { "epoch": 1.1001757469244289, "grad_norm": 0.5175300240516663, "learning_rate": 2.5038e-05, "loss": 0.0305, "step": 12520 }, { "epoch": 1.101054481546573, "grad_norm": 0.3991245627403259, "learning_rate": 2.5058000000000003e-05, "loss": 0.0325, "step": 12530 }, { "epoch": 1.101933216168717, "grad_norm": 0.3706790506839752, "learning_rate": 2.5078000000000003e-05, "loss": 0.0308, "step": 12540 }, { "epoch": 1.1028119507908611, "grad_norm": 0.47286516427993774, "learning_rate": 2.5098e-05, "loss": 0.0327, "step": 12550 }, { "epoch": 1.1036906854130053, "grad_norm": 0.518574059009552, "learning_rate": 2.5118000000000002e-05, "loss": 0.037, "step": 12560 }, { "epoch": 1.1045694200351495, "grad_norm": 0.6702398061752319, "learning_rate": 2.5138e-05, "loss": 0.0321, "step": 12570 }, { "epoch": 1.1054481546572934, "grad_norm": 0.6278778314590454, "learning_rate": 2.5158000000000005e-05, "loss": 0.0352, "step": 12580 }, { "epoch": 1.1063268892794376, "grad_norm": 0.5264450907707214, "learning_rate": 2.5178000000000002e-05, "loss": 0.0326, "step": 12590 }, { "epoch": 1.1072056239015817, "grad_norm": 0.6567831039428711, "learning_rate": 2.5197999999999998e-05, "loss": 0.0339, "step": 12600 }, { "epoch": 1.1080843585237259, "grad_norm": 0.545574426651001, "learning_rate": 2.5218e-05, "loss": 0.0306, "step": 12610 }, { "epoch": 1.10896309314587, "grad_norm": 0.45788735151290894, "learning_rate": 2.5238e-05, "loss": 0.0319, "step": 12620 }, { "epoch": 1.109841827768014, "grad_norm": 0.6662684082984924, "learning_rate": 2.5258000000000004e-05, "loss": 0.0341, "step": 12630 }, { "epoch": 1.1107205623901582, "grad_norm": 0.6381824016571045, "learning_rate": 2.5278e-05, "loss": 0.0344, "step": 12640 }, { "epoch": 1.1115992970123023, "grad_norm": 0.5870642066001892, "learning_rate": 2.5298e-05, "loss": 0.0327, "step": 12650 }, { "epoch": 1.1124780316344465, "grad_norm": 0.533662736415863, "learning_rate": 2.5318000000000004e-05, "loss": 0.0327, "step": 12660 }, { "epoch": 1.1133567662565904, "grad_norm": 0.40351536870002747, "learning_rate": 2.5338e-05, "loss": 0.0328, "step": 12670 }, { "epoch": 1.1142355008787346, "grad_norm": 0.6075906753540039, "learning_rate": 2.5358000000000004e-05, "loss": 0.033, "step": 12680 }, { "epoch": 1.1151142355008787, "grad_norm": 0.5959701538085938, "learning_rate": 2.5378000000000003e-05, "loss": 0.0349, "step": 12690 }, { "epoch": 1.115992970123023, "grad_norm": 0.6647385954856873, "learning_rate": 2.5398e-05, "loss": 0.031, "step": 12700 }, { "epoch": 1.116871704745167, "grad_norm": 0.4992883503437042, "learning_rate": 2.5418000000000003e-05, "loss": 0.0334, "step": 12710 }, { "epoch": 1.117750439367311, "grad_norm": 0.32385602593421936, "learning_rate": 2.5438e-05, "loss": 0.0355, "step": 12720 }, { "epoch": 1.1186291739894552, "grad_norm": 0.42279303073883057, "learning_rate": 2.5458e-05, "loss": 0.0357, "step": 12730 }, { "epoch": 1.1195079086115993, "grad_norm": 0.6184004545211792, "learning_rate": 2.5478000000000002e-05, "loss": 0.0358, "step": 12740 }, { "epoch": 1.1203866432337435, "grad_norm": 0.7231355905532837, "learning_rate": 2.5498e-05, "loss": 0.0355, "step": 12750 }, { "epoch": 1.1212653778558874, "grad_norm": 0.48102471232414246, "learning_rate": 2.5518000000000002e-05, "loss": 0.0376, "step": 12760 }, { "epoch": 1.1221441124780316, "grad_norm": 0.5343582034111023, "learning_rate": 2.5538000000000002e-05, "loss": 0.0359, "step": 12770 }, { "epoch": 1.1230228471001757, "grad_norm": 0.8644925355911255, "learning_rate": 2.5557999999999998e-05, "loss": 0.0365, "step": 12780 }, { "epoch": 1.12390158172232, "grad_norm": 0.7694138288497925, "learning_rate": 2.5578e-05, "loss": 0.0381, "step": 12790 }, { "epoch": 1.124780316344464, "grad_norm": 0.598847508430481, "learning_rate": 2.5598e-05, "loss": 0.0369, "step": 12800 }, { "epoch": 1.125659050966608, "grad_norm": 0.5805948972702026, "learning_rate": 2.5618000000000004e-05, "loss": 0.0327, "step": 12810 }, { "epoch": 1.1265377855887522, "grad_norm": 0.417802095413208, "learning_rate": 2.5638e-05, "loss": 0.0341, "step": 12820 }, { "epoch": 1.1274165202108963, "grad_norm": 0.5690740942955017, "learning_rate": 2.5657999999999997e-05, "loss": 0.0354, "step": 12830 }, { "epoch": 1.1282952548330405, "grad_norm": 0.6874098181724548, "learning_rate": 2.5678e-05, "loss": 0.0342, "step": 12840 }, { "epoch": 1.1291739894551847, "grad_norm": 0.6951627135276794, "learning_rate": 2.5698e-05, "loss": 0.0376, "step": 12850 }, { "epoch": 1.1300527240773286, "grad_norm": 0.4887341558933258, "learning_rate": 2.5718000000000003e-05, "loss": 0.0349, "step": 12860 }, { "epoch": 1.1309314586994728, "grad_norm": 0.5008218884468079, "learning_rate": 2.5738e-05, "loss": 0.0353, "step": 12870 }, { "epoch": 1.131810193321617, "grad_norm": 0.3749786913394928, "learning_rate": 2.5758e-05, "loss": 0.0313, "step": 12880 }, { "epoch": 1.1326889279437609, "grad_norm": 0.6888092756271362, "learning_rate": 2.5778000000000003e-05, "loss": 0.0344, "step": 12890 }, { "epoch": 1.133567662565905, "grad_norm": 0.536831259727478, "learning_rate": 2.5798e-05, "loss": 0.0323, "step": 12900 }, { "epoch": 1.1344463971880492, "grad_norm": 0.4843059182167053, "learning_rate": 2.5818000000000003e-05, "loss": 0.0308, "step": 12910 }, { "epoch": 1.1353251318101933, "grad_norm": 0.5331262946128845, "learning_rate": 2.5838000000000002e-05, "loss": 0.0344, "step": 12920 }, { "epoch": 1.1362038664323375, "grad_norm": 0.6473107933998108, "learning_rate": 2.5858e-05, "loss": 0.0375, "step": 12930 }, { "epoch": 1.1370826010544817, "grad_norm": 0.5112519860267639, "learning_rate": 2.5878000000000002e-05, "loss": 0.0322, "step": 12940 }, { "epoch": 1.1379613356766256, "grad_norm": 0.722721517086029, "learning_rate": 2.5898e-05, "loss": 0.035, "step": 12950 }, { "epoch": 1.1388400702987698, "grad_norm": 0.6780510544776917, "learning_rate": 2.5918000000000005e-05, "loss": 0.0338, "step": 12960 }, { "epoch": 1.139718804920914, "grad_norm": 0.524528443813324, "learning_rate": 2.5938e-05, "loss": 0.0327, "step": 12970 }, { "epoch": 1.140597539543058, "grad_norm": 0.577532947063446, "learning_rate": 2.5957999999999998e-05, "loss": 0.0333, "step": 12980 }, { "epoch": 1.141476274165202, "grad_norm": 0.5291146636009216, "learning_rate": 2.5978e-05, "loss": 0.0343, "step": 12990 }, { "epoch": 1.1423550087873462, "grad_norm": 0.4438979923725128, "learning_rate": 2.5998e-05, "loss": 0.0366, "step": 13000 }, { "epoch": 1.1432337434094904, "grad_norm": 0.6831320524215698, "learning_rate": 2.6018000000000004e-05, "loss": 0.0326, "step": 13010 }, { "epoch": 1.1441124780316345, "grad_norm": 0.40810784697532654, "learning_rate": 2.6038e-05, "loss": 0.0324, "step": 13020 }, { "epoch": 1.1449912126537787, "grad_norm": 0.5959415435791016, "learning_rate": 2.6058e-05, "loss": 0.0321, "step": 13030 }, { "epoch": 1.1458699472759226, "grad_norm": 0.3609844446182251, "learning_rate": 2.6078000000000003e-05, "loss": 0.0314, "step": 13040 }, { "epoch": 1.1467486818980668, "grad_norm": 0.4347589612007141, "learning_rate": 2.6098e-05, "loss": 0.0336, "step": 13050 }, { "epoch": 1.147627416520211, "grad_norm": 0.36888161301612854, "learning_rate": 2.6118000000000003e-05, "loss": 0.032, "step": 13060 }, { "epoch": 1.148506151142355, "grad_norm": 0.6449065804481506, "learning_rate": 2.6138000000000003e-05, "loss": 0.0314, "step": 13070 }, { "epoch": 1.149384885764499, "grad_norm": 0.43291008472442627, "learning_rate": 2.6158e-05, "loss": 0.0364, "step": 13080 }, { "epoch": 1.1502636203866432, "grad_norm": 0.515650749206543, "learning_rate": 2.6178000000000002e-05, "loss": 0.0342, "step": 13090 }, { "epoch": 1.1511423550087874, "grad_norm": 0.680261492729187, "learning_rate": 2.6198e-05, "loss": 0.0343, "step": 13100 }, { "epoch": 1.1520210896309315, "grad_norm": 0.47335416078567505, "learning_rate": 2.6218000000000002e-05, "loss": 0.0339, "step": 13110 }, { "epoch": 1.1528998242530757, "grad_norm": 0.45423558354377747, "learning_rate": 2.6238000000000002e-05, "loss": 0.0327, "step": 13120 }, { "epoch": 1.1537785588752196, "grad_norm": 0.4885488748550415, "learning_rate": 2.6257999999999998e-05, "loss": 0.0327, "step": 13130 }, { "epoch": 1.1546572934973638, "grad_norm": 0.52451092004776, "learning_rate": 2.6278e-05, "loss": 0.0309, "step": 13140 }, { "epoch": 1.155536028119508, "grad_norm": 0.613347053527832, "learning_rate": 2.6298e-05, "loss": 0.0329, "step": 13150 }, { "epoch": 1.1564147627416521, "grad_norm": 1.722760796546936, "learning_rate": 2.6318000000000005e-05, "loss": 0.0332, "step": 13160 }, { "epoch": 1.157293497363796, "grad_norm": 0.4251764416694641, "learning_rate": 2.6338e-05, "loss": 0.0342, "step": 13170 }, { "epoch": 1.1581722319859402, "grad_norm": 0.6354678869247437, "learning_rate": 2.6358e-05, "loss": 0.0342, "step": 13180 }, { "epoch": 1.1590509666080844, "grad_norm": 0.5279765725135803, "learning_rate": 2.6378000000000004e-05, "loss": 0.037, "step": 13190 }, { "epoch": 1.1599297012302285, "grad_norm": 0.8038347959518433, "learning_rate": 2.6398e-05, "loss": 0.032, "step": 13200 }, { "epoch": 1.1608084358523727, "grad_norm": 0.4772154986858368, "learning_rate": 2.6418000000000004e-05, "loss": 0.0336, "step": 13210 }, { "epoch": 1.1616871704745166, "grad_norm": 0.3761778175830841, "learning_rate": 2.6438e-05, "loss": 0.0323, "step": 13220 }, { "epoch": 1.1625659050966608, "grad_norm": 0.39828240871429443, "learning_rate": 2.6458e-05, "loss": 0.0362, "step": 13230 }, { "epoch": 1.163444639718805, "grad_norm": 0.46127939224243164, "learning_rate": 2.6478000000000003e-05, "loss": 0.0311, "step": 13240 }, { "epoch": 1.1643233743409491, "grad_norm": 0.47640079259872437, "learning_rate": 2.6498e-05, "loss": 0.0325, "step": 13250 }, { "epoch": 1.165202108963093, "grad_norm": 0.5360355377197266, "learning_rate": 2.6518000000000003e-05, "loss": 0.0334, "step": 13260 }, { "epoch": 1.1660808435852372, "grad_norm": 0.37436994910240173, "learning_rate": 2.6538000000000002e-05, "loss": 0.0316, "step": 13270 }, { "epoch": 1.1669595782073814, "grad_norm": 0.4621749222278595, "learning_rate": 2.6558e-05, "loss": 0.032, "step": 13280 }, { "epoch": 1.1678383128295255, "grad_norm": 0.5866767168045044, "learning_rate": 2.6578000000000002e-05, "loss": 0.0343, "step": 13290 }, { "epoch": 1.1687170474516697, "grad_norm": 0.5683937668800354, "learning_rate": 2.6598000000000002e-05, "loss": 0.0354, "step": 13300 }, { "epoch": 1.1695957820738137, "grad_norm": 0.5859082341194153, "learning_rate": 2.6618000000000005e-05, "loss": 0.0334, "step": 13310 }, { "epoch": 1.1704745166959578, "grad_norm": 0.718777060508728, "learning_rate": 2.6638e-05, "loss": 0.0384, "step": 13320 }, { "epoch": 1.171353251318102, "grad_norm": 0.4475327134132385, "learning_rate": 2.6658e-05, "loss": 0.0363, "step": 13330 }, { "epoch": 1.1722319859402461, "grad_norm": 0.5124778151512146, "learning_rate": 2.6678000000000004e-05, "loss": 0.0336, "step": 13340 }, { "epoch": 1.17311072056239, "grad_norm": 0.4543285369873047, "learning_rate": 2.6698e-05, "loss": 0.0327, "step": 13350 }, { "epoch": 1.1739894551845342, "grad_norm": 0.5955966711044312, "learning_rate": 2.6717999999999997e-05, "loss": 0.0357, "step": 13360 }, { "epoch": 1.1748681898066784, "grad_norm": 0.3684990704059601, "learning_rate": 2.6738e-05, "loss": 0.0328, "step": 13370 }, { "epoch": 1.1757469244288226, "grad_norm": 0.4813845753669739, "learning_rate": 2.6758e-05, "loss": 0.0337, "step": 13380 }, { "epoch": 1.1766256590509667, "grad_norm": 0.39394405484199524, "learning_rate": 2.6778000000000003e-05, "loss": 0.0312, "step": 13390 }, { "epoch": 1.1775043936731107, "grad_norm": 0.4171236455440521, "learning_rate": 2.6798e-05, "loss": 0.034, "step": 13400 }, { "epoch": 1.1783831282952548, "grad_norm": 0.48966217041015625, "learning_rate": 2.6818e-05, "loss": 0.0334, "step": 13410 }, { "epoch": 1.179261862917399, "grad_norm": 0.3433203101158142, "learning_rate": 2.6838000000000003e-05, "loss": 0.0338, "step": 13420 }, { "epoch": 1.1801405975395431, "grad_norm": 0.6826770901679993, "learning_rate": 2.6858e-05, "loss": 0.0358, "step": 13430 }, { "epoch": 1.181019332161687, "grad_norm": 0.5491447448730469, "learning_rate": 2.6878000000000003e-05, "loss": 0.0331, "step": 13440 }, { "epoch": 1.1818980667838312, "grad_norm": 0.5606110095977783, "learning_rate": 2.6898000000000002e-05, "loss": 0.0311, "step": 13450 }, { "epoch": 1.1827768014059754, "grad_norm": 0.5357977151870728, "learning_rate": 2.6918e-05, "loss": 0.0315, "step": 13460 }, { "epoch": 1.1836555360281196, "grad_norm": 0.4644031822681427, "learning_rate": 2.6938000000000002e-05, "loss": 0.0331, "step": 13470 }, { "epoch": 1.1845342706502637, "grad_norm": 0.6015061736106873, "learning_rate": 2.6958e-05, "loss": 0.0343, "step": 13480 }, { "epoch": 1.1854130052724077, "grad_norm": 0.5272778272628784, "learning_rate": 2.6978000000000005e-05, "loss": 0.0328, "step": 13490 }, { "epoch": 1.1862917398945518, "grad_norm": 0.4662376046180725, "learning_rate": 2.6998e-05, "loss": 0.0332, "step": 13500 }, { "epoch": 1.187170474516696, "grad_norm": 0.6035333871841431, "learning_rate": 2.7017999999999998e-05, "loss": 0.0331, "step": 13510 }, { "epoch": 1.1880492091388402, "grad_norm": 0.5305371880531311, "learning_rate": 2.7038e-05, "loss": 0.0306, "step": 13520 }, { "epoch": 1.188927943760984, "grad_norm": 0.5647196173667908, "learning_rate": 2.7058e-05, "loss": 0.0317, "step": 13530 }, { "epoch": 1.1898066783831283, "grad_norm": 0.5606021881103516, "learning_rate": 2.7078000000000004e-05, "loss": 0.0322, "step": 13540 }, { "epoch": 1.1906854130052724, "grad_norm": 0.49526992440223694, "learning_rate": 2.7098e-05, "loss": 0.0349, "step": 13550 }, { "epoch": 1.1915641476274166, "grad_norm": 0.486958384513855, "learning_rate": 2.7118e-05, "loss": 0.0338, "step": 13560 }, { "epoch": 1.1924428822495607, "grad_norm": 0.5294201374053955, "learning_rate": 2.7138000000000003e-05, "loss": 0.0364, "step": 13570 }, { "epoch": 1.1933216168717047, "grad_norm": 0.46090689301490784, "learning_rate": 2.7158e-05, "loss": 0.0337, "step": 13580 }, { "epoch": 1.1942003514938488, "grad_norm": 0.49006831645965576, "learning_rate": 2.7178000000000003e-05, "loss": 0.0359, "step": 13590 }, { "epoch": 1.195079086115993, "grad_norm": 1.9598753452301025, "learning_rate": 2.7198000000000003e-05, "loss": 0.0356, "step": 13600 }, { "epoch": 1.1959578207381372, "grad_norm": 0.5415491461753845, "learning_rate": 2.7218e-05, "loss": 0.0329, "step": 13610 }, { "epoch": 1.196836555360281, "grad_norm": 0.5847928524017334, "learning_rate": 2.7238000000000002e-05, "loss": 0.0354, "step": 13620 }, { "epoch": 1.1977152899824253, "grad_norm": 0.45793765783309937, "learning_rate": 2.7258e-05, "loss": 0.0357, "step": 13630 }, { "epoch": 1.1985940246045694, "grad_norm": 0.5422604084014893, "learning_rate": 2.7278000000000002e-05, "loss": 0.0354, "step": 13640 }, { "epoch": 1.1994727592267136, "grad_norm": 0.5318675637245178, "learning_rate": 2.7298000000000002e-05, "loss": 0.0366, "step": 13650 }, { "epoch": 1.2003514938488578, "grad_norm": 0.4779163897037506, "learning_rate": 2.7318e-05, "loss": 0.0338, "step": 13660 }, { "epoch": 1.2012302284710017, "grad_norm": 0.6049703359603882, "learning_rate": 2.7338e-05, "loss": 0.0347, "step": 13670 }, { "epoch": 1.2021089630931459, "grad_norm": 0.5396398901939392, "learning_rate": 2.7358e-05, "loss": 0.0369, "step": 13680 }, { "epoch": 1.20298769771529, "grad_norm": 0.5361047387123108, "learning_rate": 2.7378000000000005e-05, "loss": 0.0338, "step": 13690 }, { "epoch": 1.2038664323374342, "grad_norm": 0.41096919775009155, "learning_rate": 2.7398e-05, "loss": 0.0352, "step": 13700 }, { "epoch": 1.2047451669595781, "grad_norm": 0.48263663053512573, "learning_rate": 2.7418e-05, "loss": 0.0349, "step": 13710 }, { "epoch": 1.2056239015817223, "grad_norm": 0.5283039808273315, "learning_rate": 2.7438000000000004e-05, "loss": 0.0328, "step": 13720 }, { "epoch": 1.2065026362038664, "grad_norm": 0.5720680356025696, "learning_rate": 2.7458e-05, "loss": 0.0353, "step": 13730 }, { "epoch": 1.2073813708260106, "grad_norm": 0.5917161703109741, "learning_rate": 2.7478000000000004e-05, "loss": 0.0311, "step": 13740 }, { "epoch": 1.2082601054481548, "grad_norm": 0.7818168997764587, "learning_rate": 2.7498e-05, "loss": 0.0367, "step": 13750 }, { "epoch": 1.2091388400702987, "grad_norm": 0.9145519137382507, "learning_rate": 2.7518e-05, "loss": 0.0368, "step": 13760 }, { "epoch": 1.2100175746924429, "grad_norm": 0.4580551087856293, "learning_rate": 2.7538000000000003e-05, "loss": 0.0329, "step": 13770 }, { "epoch": 1.210896309314587, "grad_norm": 0.4414786100387573, "learning_rate": 2.7558e-05, "loss": 0.0328, "step": 13780 }, { "epoch": 1.2117750439367312, "grad_norm": 0.5174933671951294, "learning_rate": 2.7578000000000003e-05, "loss": 0.0344, "step": 13790 }, { "epoch": 1.2126537785588751, "grad_norm": 0.6850188970565796, "learning_rate": 2.7598000000000002e-05, "loss": 0.0341, "step": 13800 }, { "epoch": 1.2135325131810193, "grad_norm": 0.6719142198562622, "learning_rate": 2.7618e-05, "loss": 0.0358, "step": 13810 }, { "epoch": 1.2144112478031635, "grad_norm": 0.741354763507843, "learning_rate": 2.7638000000000002e-05, "loss": 0.0367, "step": 13820 }, { "epoch": 1.2152899824253076, "grad_norm": 0.595305860042572, "learning_rate": 2.7658000000000002e-05, "loss": 0.0389, "step": 13830 }, { "epoch": 1.2161687170474518, "grad_norm": 0.5444684624671936, "learning_rate": 2.7678000000000005e-05, "loss": 0.036, "step": 13840 }, { "epoch": 1.2170474516695957, "grad_norm": 0.4468037188053131, "learning_rate": 2.7698e-05, "loss": 0.0333, "step": 13850 }, { "epoch": 1.2179261862917399, "grad_norm": 0.4106488525867462, "learning_rate": 2.7717999999999998e-05, "loss": 0.0323, "step": 13860 }, { "epoch": 1.218804920913884, "grad_norm": 0.3698160946369171, "learning_rate": 2.7738000000000004e-05, "loss": 0.0354, "step": 13870 }, { "epoch": 1.2196836555360282, "grad_norm": 0.4175899922847748, "learning_rate": 2.7758e-05, "loss": 0.0336, "step": 13880 }, { "epoch": 1.2205623901581721, "grad_norm": 0.46444782614707947, "learning_rate": 2.7778000000000004e-05, "loss": 0.0336, "step": 13890 }, { "epoch": 1.2214411247803163, "grad_norm": 0.4804862439632416, "learning_rate": 2.7798e-05, "loss": 0.0323, "step": 13900 }, { "epoch": 1.2223198594024605, "grad_norm": 0.5309175848960876, "learning_rate": 2.7818e-05, "loss": 0.0353, "step": 13910 }, { "epoch": 1.2231985940246046, "grad_norm": 0.7772464156150818, "learning_rate": 2.7838000000000004e-05, "loss": 0.0329, "step": 13920 }, { "epoch": 1.2240773286467488, "grad_norm": 0.5054795145988464, "learning_rate": 2.7858e-05, "loss": 0.031, "step": 13930 }, { "epoch": 1.2249560632688927, "grad_norm": 0.6611114740371704, "learning_rate": 2.7878e-05, "loss": 0.0314, "step": 13940 }, { "epoch": 1.2258347978910369, "grad_norm": 0.37652623653411865, "learning_rate": 2.7898000000000003e-05, "loss": 0.0317, "step": 13950 }, { "epoch": 1.226713532513181, "grad_norm": 0.39295336604118347, "learning_rate": 2.7918e-05, "loss": 0.0329, "step": 13960 }, { "epoch": 1.2275922671353252, "grad_norm": 0.44425010681152344, "learning_rate": 2.7938000000000003e-05, "loss": 0.0354, "step": 13970 }, { "epoch": 1.2284710017574691, "grad_norm": 0.5044276118278503, "learning_rate": 2.7958000000000002e-05, "loss": 0.033, "step": 13980 }, { "epoch": 1.2293497363796133, "grad_norm": 0.439517617225647, "learning_rate": 2.7978e-05, "loss": 0.0331, "step": 13990 }, { "epoch": 1.2302284710017575, "grad_norm": 0.37256255745887756, "learning_rate": 2.7998000000000002e-05, "loss": 0.0301, "step": 14000 }, { "epoch": 1.2311072056239016, "grad_norm": 0.6911427974700928, "learning_rate": 2.8018e-05, "loss": 0.0332, "step": 14010 }, { "epoch": 1.2319859402460458, "grad_norm": 0.4936477839946747, "learning_rate": 2.8038e-05, "loss": 0.0332, "step": 14020 }, { "epoch": 1.2328646748681897, "grad_norm": 0.4756612181663513, "learning_rate": 2.8058e-05, "loss": 0.0313, "step": 14030 }, { "epoch": 1.233743409490334, "grad_norm": 0.4310343861579895, "learning_rate": 2.8077999999999998e-05, "loss": 0.0311, "step": 14040 }, { "epoch": 1.234622144112478, "grad_norm": 0.5280249118804932, "learning_rate": 2.8098e-05, "loss": 0.0335, "step": 14050 }, { "epoch": 1.2355008787346222, "grad_norm": 0.5910183191299438, "learning_rate": 2.8118e-05, "loss": 0.0317, "step": 14060 }, { "epoch": 1.2363796133567662, "grad_norm": 0.4127860963344574, "learning_rate": 2.8138000000000004e-05, "loss": 0.028, "step": 14070 }, { "epoch": 1.2372583479789103, "grad_norm": 0.6509653925895691, "learning_rate": 2.8158e-05, "loss": 0.0294, "step": 14080 }, { "epoch": 1.2381370826010545, "grad_norm": 0.5763230919837952, "learning_rate": 2.8178e-05, "loss": 0.0317, "step": 14090 }, { "epoch": 1.2390158172231986, "grad_norm": 0.48277243971824646, "learning_rate": 2.8198000000000003e-05, "loss": 0.0324, "step": 14100 }, { "epoch": 1.2398945518453428, "grad_norm": 0.4606180787086487, "learning_rate": 2.8218e-05, "loss": 0.0321, "step": 14110 }, { "epoch": 1.2407732864674867, "grad_norm": 0.5056268572807312, "learning_rate": 2.8238000000000003e-05, "loss": 0.033, "step": 14120 }, { "epoch": 1.241652021089631, "grad_norm": 0.4729219377040863, "learning_rate": 2.8258e-05, "loss": 0.0319, "step": 14130 }, { "epoch": 1.242530755711775, "grad_norm": 0.48317766189575195, "learning_rate": 2.8278e-05, "loss": 0.0308, "step": 14140 }, { "epoch": 1.2434094903339192, "grad_norm": 0.6497467756271362, "learning_rate": 2.8298000000000002e-05, "loss": 0.0295, "step": 14150 }, { "epoch": 1.2442882249560632, "grad_norm": 0.4496748149394989, "learning_rate": 2.8318e-05, "loss": 0.0299, "step": 14160 }, { "epoch": 1.2451669595782073, "grad_norm": 0.611882746219635, "learning_rate": 2.8338000000000002e-05, "loss": 0.0332, "step": 14170 }, { "epoch": 1.2460456942003515, "grad_norm": 0.4735523462295532, "learning_rate": 2.8358000000000002e-05, "loss": 0.0339, "step": 14180 }, { "epoch": 1.2469244288224957, "grad_norm": 0.6218748688697815, "learning_rate": 2.8378e-05, "loss": 0.0341, "step": 14190 }, { "epoch": 1.2478031634446398, "grad_norm": 0.6606560349464417, "learning_rate": 2.8398e-05, "loss": 0.0332, "step": 14200 }, { "epoch": 1.2486818980667838, "grad_norm": 0.47621941566467285, "learning_rate": 2.8418e-05, "loss": 0.0335, "step": 14210 }, { "epoch": 1.249560632688928, "grad_norm": 0.6968988180160522, "learning_rate": 2.8438000000000005e-05, "loss": 0.0348, "step": 14220 }, { "epoch": 1.250439367311072, "grad_norm": 0.4677242934703827, "learning_rate": 2.8458e-05, "loss": 0.0344, "step": 14230 }, { "epoch": 1.2513181019332162, "grad_norm": 0.46110808849334717, "learning_rate": 2.8478e-05, "loss": 0.0342, "step": 14240 }, { "epoch": 1.2521968365553602, "grad_norm": 0.550952136516571, "learning_rate": 2.8498000000000004e-05, "loss": 0.0357, "step": 14250 }, { "epoch": 1.2530755711775043, "grad_norm": 0.4722099006175995, "learning_rate": 2.8518e-05, "loss": 0.0322, "step": 14260 }, { "epoch": 1.2539543057996485, "grad_norm": 0.6099050641059875, "learning_rate": 2.8538000000000004e-05, "loss": 0.0332, "step": 14270 }, { "epoch": 1.2548330404217927, "grad_norm": 0.4815683960914612, "learning_rate": 2.8558e-05, "loss": 0.0319, "step": 14280 }, { "epoch": 1.2557117750439368, "grad_norm": 0.478014200925827, "learning_rate": 2.8578e-05, "loss": 0.0325, "step": 14290 }, { "epoch": 1.2565905096660808, "grad_norm": 0.44079068303108215, "learning_rate": 2.8598000000000003e-05, "loss": 0.0324, "step": 14300 }, { "epoch": 1.257469244288225, "grad_norm": 0.4173274040222168, "learning_rate": 2.8618e-05, "loss": 0.0375, "step": 14310 }, { "epoch": 1.258347978910369, "grad_norm": 0.4604024589061737, "learning_rate": 2.8638000000000003e-05, "loss": 0.0354, "step": 14320 }, { "epoch": 1.2592267135325133, "grad_norm": 0.38611385226249695, "learning_rate": 2.8658000000000002e-05, "loss": 0.0313, "step": 14330 }, { "epoch": 1.2601054481546572, "grad_norm": 0.5293319225311279, "learning_rate": 2.8678e-05, "loss": 0.0342, "step": 14340 }, { "epoch": 1.2609841827768014, "grad_norm": 0.5170547366142273, "learning_rate": 2.8698000000000002e-05, "loss": 0.0329, "step": 14350 }, { "epoch": 1.2618629173989455, "grad_norm": 0.44833096861839294, "learning_rate": 2.8718000000000002e-05, "loss": 0.0349, "step": 14360 }, { "epoch": 1.2627416520210897, "grad_norm": 0.445536345243454, "learning_rate": 2.8738000000000005e-05, "loss": 0.0326, "step": 14370 }, { "epoch": 1.2636203866432338, "grad_norm": 0.6560038328170776, "learning_rate": 2.8758e-05, "loss": 0.0345, "step": 14380 }, { "epoch": 1.2644991212653778, "grad_norm": 0.6477782130241394, "learning_rate": 2.8777999999999998e-05, "loss": 0.0344, "step": 14390 }, { "epoch": 1.265377855887522, "grad_norm": 0.5171456933021545, "learning_rate": 2.8798e-05, "loss": 0.0315, "step": 14400 }, { "epoch": 1.266256590509666, "grad_norm": 0.5196077823638916, "learning_rate": 2.8818e-05, "loss": 0.0306, "step": 14410 }, { "epoch": 1.2671353251318103, "grad_norm": 0.2846094071865082, "learning_rate": 2.8838000000000004e-05, "loss": 0.0281, "step": 14420 }, { "epoch": 1.2680140597539542, "grad_norm": 0.4698847830295563, "learning_rate": 2.8858e-05, "loss": 0.0312, "step": 14430 }, { "epoch": 1.2688927943760984, "grad_norm": 0.4284623861312866, "learning_rate": 2.8878e-05, "loss": 0.0315, "step": 14440 }, { "epoch": 1.2697715289982425, "grad_norm": 0.5656578540802002, "learning_rate": 2.8898000000000004e-05, "loss": 0.0304, "step": 14450 }, { "epoch": 1.2706502636203867, "grad_norm": 0.5294116139411926, "learning_rate": 2.8918e-05, "loss": 0.0313, "step": 14460 }, { "epoch": 1.2715289982425309, "grad_norm": 0.419134259223938, "learning_rate": 2.8938000000000003e-05, "loss": 0.0321, "step": 14470 }, { "epoch": 1.2724077328646748, "grad_norm": 0.4889072775840759, "learning_rate": 2.8958000000000003e-05, "loss": 0.0309, "step": 14480 }, { "epoch": 1.273286467486819, "grad_norm": 0.4842917323112488, "learning_rate": 2.8978e-05, "loss": 0.0334, "step": 14490 }, { "epoch": 1.2741652021089631, "grad_norm": 0.44684603810310364, "learning_rate": 2.8998000000000003e-05, "loss": 0.0353, "step": 14500 }, { "epoch": 1.2750439367311073, "grad_norm": 0.40302079916000366, "learning_rate": 2.9018000000000002e-05, "loss": 0.0346, "step": 14510 }, { "epoch": 1.2759226713532512, "grad_norm": 0.7448258996009827, "learning_rate": 2.9038000000000006e-05, "loss": 0.0336, "step": 14520 }, { "epoch": 1.2768014059753954, "grad_norm": 0.37633562088012695, "learning_rate": 2.9058000000000002e-05, "loss": 0.0364, "step": 14530 }, { "epoch": 1.2776801405975395, "grad_norm": 0.5132938623428345, "learning_rate": 2.9078e-05, "loss": 0.0347, "step": 14540 }, { "epoch": 1.2785588752196837, "grad_norm": 0.43769240379333496, "learning_rate": 2.9098e-05, "loss": 0.037, "step": 14550 }, { "epoch": 1.2794376098418279, "grad_norm": 0.40800175070762634, "learning_rate": 2.9118e-05, "loss": 0.0338, "step": 14560 }, { "epoch": 1.2803163444639718, "grad_norm": 0.3854919373989105, "learning_rate": 2.9137999999999998e-05, "loss": 0.0332, "step": 14570 }, { "epoch": 1.281195079086116, "grad_norm": 0.5110515356063843, "learning_rate": 2.9158e-05, "loss": 0.0328, "step": 14580 }, { "epoch": 1.2820738137082601, "grad_norm": 0.405998557806015, "learning_rate": 2.9178e-05, "loss": 0.034, "step": 14590 }, { "epoch": 1.2829525483304043, "grad_norm": 0.36525818705558777, "learning_rate": 2.9198000000000004e-05, "loss": 0.0333, "step": 14600 }, { "epoch": 1.2838312829525482, "grad_norm": 0.44705450534820557, "learning_rate": 2.9218e-05, "loss": 0.0346, "step": 14610 }, { "epoch": 1.2847100175746924, "grad_norm": 0.3977420926094055, "learning_rate": 2.9238e-05, "loss": 0.0339, "step": 14620 }, { "epoch": 1.2855887521968365, "grad_norm": 0.3618879020214081, "learning_rate": 2.9258000000000003e-05, "loss": 0.0358, "step": 14630 }, { "epoch": 1.2864674868189807, "grad_norm": 0.4203489124774933, "learning_rate": 2.9278e-05, "loss": 0.0343, "step": 14640 }, { "epoch": 1.2873462214411249, "grad_norm": 0.5044721961021423, "learning_rate": 2.9298000000000003e-05, "loss": 0.033, "step": 14650 }, { "epoch": 1.2882249560632688, "grad_norm": 0.4955662786960602, "learning_rate": 2.9318e-05, "loss": 0.0353, "step": 14660 }, { "epoch": 1.289103690685413, "grad_norm": 0.4104270935058594, "learning_rate": 2.9338e-05, "loss": 0.0355, "step": 14670 }, { "epoch": 1.2899824253075571, "grad_norm": 0.44277676939964294, "learning_rate": 2.9358000000000003e-05, "loss": 0.0333, "step": 14680 }, { "epoch": 1.2908611599297013, "grad_norm": 0.43760624527931213, "learning_rate": 2.9378e-05, "loss": 0.0328, "step": 14690 }, { "epoch": 1.2917398945518452, "grad_norm": 0.6059921979904175, "learning_rate": 2.9398000000000002e-05, "loss": 0.0344, "step": 14700 }, { "epoch": 1.2926186291739894, "grad_norm": 0.5604166388511658, "learning_rate": 2.9418000000000002e-05, "loss": 0.0324, "step": 14710 }, { "epoch": 1.2934973637961336, "grad_norm": 0.35083940625190735, "learning_rate": 2.9438e-05, "loss": 0.033, "step": 14720 }, { "epoch": 1.2943760984182777, "grad_norm": 0.3806609511375427, "learning_rate": 2.9458e-05, "loss": 0.0328, "step": 14730 }, { "epoch": 1.2952548330404219, "grad_norm": 0.4807817041873932, "learning_rate": 2.9478e-05, "loss": 0.0323, "step": 14740 }, { "epoch": 1.2961335676625658, "grad_norm": 0.5698063969612122, "learning_rate": 2.9498000000000005e-05, "loss": 0.0317, "step": 14750 }, { "epoch": 1.29701230228471, "grad_norm": 0.46172472834587097, "learning_rate": 2.9518e-05, "loss": 0.0357, "step": 14760 }, { "epoch": 1.2978910369068541, "grad_norm": 0.3014800548553467, "learning_rate": 2.9537999999999997e-05, "loss": 0.0326, "step": 14770 }, { "epoch": 1.2987697715289983, "grad_norm": 0.6967650651931763, "learning_rate": 2.9558000000000004e-05, "loss": 0.0349, "step": 14780 }, { "epoch": 1.2996485061511422, "grad_norm": 0.36610397696495056, "learning_rate": 2.9578e-05, "loss": 0.0321, "step": 14790 }, { "epoch": 1.3005272407732864, "grad_norm": 0.4686068594455719, "learning_rate": 2.9598000000000004e-05, "loss": 0.0353, "step": 14800 }, { "epoch": 1.3014059753954306, "grad_norm": 0.411689817905426, "learning_rate": 2.9618e-05, "loss": 0.035, "step": 14810 }, { "epoch": 1.3022847100175747, "grad_norm": 0.8821938037872314, "learning_rate": 2.9638e-05, "loss": 0.0322, "step": 14820 }, { "epoch": 1.303163444639719, "grad_norm": 0.44967585802078247, "learning_rate": 2.9658000000000003e-05, "loss": 0.0326, "step": 14830 }, { "epoch": 1.304042179261863, "grad_norm": 0.4320845901966095, "learning_rate": 2.9678e-05, "loss": 0.0335, "step": 14840 }, { "epoch": 1.304920913884007, "grad_norm": 0.42321810126304626, "learning_rate": 2.9698000000000003e-05, "loss": 0.0299, "step": 14850 }, { "epoch": 1.3057996485061512, "grad_norm": 0.553941011428833, "learning_rate": 2.9718000000000002e-05, "loss": 0.0338, "step": 14860 }, { "epoch": 1.3066783831282953, "grad_norm": 0.3943984806537628, "learning_rate": 2.9738e-05, "loss": 0.0317, "step": 14870 }, { "epoch": 1.3075571177504393, "grad_norm": 0.3916983902454376, "learning_rate": 2.9758000000000002e-05, "loss": 0.0331, "step": 14880 }, { "epoch": 1.3084358523725834, "grad_norm": 0.4370555877685547, "learning_rate": 2.9778000000000002e-05, "loss": 0.0311, "step": 14890 }, { "epoch": 1.3093145869947276, "grad_norm": 0.4062500596046448, "learning_rate": 2.9798000000000005e-05, "loss": 0.0362, "step": 14900 }, { "epoch": 1.3101933216168717, "grad_norm": 0.3829629421234131, "learning_rate": 2.9818e-05, "loss": 0.0326, "step": 14910 }, { "epoch": 1.311072056239016, "grad_norm": 0.4552094042301178, "learning_rate": 2.9837999999999998e-05, "loss": 0.0307, "step": 14920 }, { "epoch": 1.31195079086116, "grad_norm": 0.5945309400558472, "learning_rate": 2.9858e-05, "loss": 0.0338, "step": 14930 }, { "epoch": 1.312829525483304, "grad_norm": 0.47871240973472595, "learning_rate": 2.9878e-05, "loss": 0.034, "step": 14940 }, { "epoch": 1.3137082601054482, "grad_norm": 0.6387138366699219, "learning_rate": 2.9898000000000004e-05, "loss": 0.0328, "step": 14950 }, { "epoch": 1.3145869947275923, "grad_norm": 0.5139346122741699, "learning_rate": 2.9918e-05, "loss": 0.0287, "step": 14960 }, { "epoch": 1.3154657293497363, "grad_norm": 0.5720702409744263, "learning_rate": 2.9938e-05, "loss": 0.0332, "step": 14970 }, { "epoch": 1.3163444639718804, "grad_norm": 0.609650194644928, "learning_rate": 2.9958000000000004e-05, "loss": 0.0334, "step": 14980 }, { "epoch": 1.3172231985940246, "grad_norm": 0.5256709456443787, "learning_rate": 2.9978e-05, "loss": 0.0319, "step": 14990 }, { "epoch": 1.3181019332161688, "grad_norm": 0.4382863938808441, "learning_rate": 2.9998000000000003e-05, "loss": 0.0328, "step": 15000 }, { "epoch": 1.318980667838313, "grad_norm": 0.4035584330558777, "learning_rate": 3.0018000000000003e-05, "loss": 0.035, "step": 15010 }, { "epoch": 1.319859402460457, "grad_norm": 0.48336756229400635, "learning_rate": 3.0038e-05, "loss": 0.0349, "step": 15020 }, { "epoch": 1.320738137082601, "grad_norm": 1.0409501791000366, "learning_rate": 3.0058000000000003e-05, "loss": 0.0322, "step": 15030 }, { "epoch": 1.3216168717047452, "grad_norm": 0.4250957667827606, "learning_rate": 3.0078e-05, "loss": 0.0318, "step": 15040 }, { "epoch": 1.3224956063268893, "grad_norm": 0.3751996159553528, "learning_rate": 3.0098000000000006e-05, "loss": 0.0338, "step": 15050 }, { "epoch": 1.3233743409490333, "grad_norm": 0.37352776527404785, "learning_rate": 3.0118000000000002e-05, "loss": 0.0317, "step": 15060 }, { "epoch": 1.3242530755711774, "grad_norm": 0.5317201018333435, "learning_rate": 3.0138e-05, "loss": 0.037, "step": 15070 }, { "epoch": 1.3251318101933216, "grad_norm": 0.3536349833011627, "learning_rate": 3.0158e-05, "loss": 0.0349, "step": 15080 }, { "epoch": 1.3260105448154658, "grad_norm": 0.669782817363739, "learning_rate": 3.0178e-05, "loss": 0.0361, "step": 15090 }, { "epoch": 1.32688927943761, "grad_norm": 0.5161206722259521, "learning_rate": 3.0198000000000005e-05, "loss": 0.0336, "step": 15100 }, { "epoch": 1.327768014059754, "grad_norm": 0.7538968920707703, "learning_rate": 3.0218e-05, "loss": 0.0353, "step": 15110 }, { "epoch": 1.328646748681898, "grad_norm": 0.5084847211837769, "learning_rate": 3.0238e-05, "loss": 0.0338, "step": 15120 }, { "epoch": 1.3295254833040422, "grad_norm": 0.4151563346385956, "learning_rate": 3.0258000000000004e-05, "loss": 0.0309, "step": 15130 }, { "epoch": 1.3304042179261863, "grad_norm": 0.545931339263916, "learning_rate": 3.0278e-05, "loss": 0.0344, "step": 15140 }, { "epoch": 1.3312829525483303, "grad_norm": 0.47535139322280884, "learning_rate": 3.0298000000000004e-05, "loss": 0.0302, "step": 15150 }, { "epoch": 1.3321616871704745, "grad_norm": 0.5122387409210205, "learning_rate": 3.0318000000000003e-05, "loss": 0.0309, "step": 15160 }, { "epoch": 1.3330404217926186, "grad_norm": 0.38979372382164, "learning_rate": 3.0338e-05, "loss": 0.0314, "step": 15170 }, { "epoch": 1.3339191564147628, "grad_norm": 0.43365478515625, "learning_rate": 3.0358000000000003e-05, "loss": 0.032, "step": 15180 }, { "epoch": 1.334797891036907, "grad_norm": 0.39796534180641174, "learning_rate": 3.0378e-05, "loss": 0.0341, "step": 15190 }, { "epoch": 1.335676625659051, "grad_norm": 0.29923954606056213, "learning_rate": 3.0398e-05, "loss": 0.0326, "step": 15200 }, { "epoch": 1.336555360281195, "grad_norm": 0.3453194200992584, "learning_rate": 3.0418000000000003e-05, "loss": 0.0312, "step": 15210 }, { "epoch": 1.3374340949033392, "grad_norm": 0.34856754541397095, "learning_rate": 3.0438e-05, "loss": 0.032, "step": 15220 }, { "epoch": 1.3383128295254834, "grad_norm": 0.49982157349586487, "learning_rate": 3.0458000000000002e-05, "loss": 0.0314, "step": 15230 }, { "epoch": 1.3391915641476273, "grad_norm": 0.5478368997573853, "learning_rate": 3.0478000000000002e-05, "loss": 0.033, "step": 15240 }, { "epoch": 1.3400702987697715, "grad_norm": 0.7633512020111084, "learning_rate": 3.0498e-05, "loss": 0.0323, "step": 15250 }, { "epoch": 1.3409490333919156, "grad_norm": 0.5476495623588562, "learning_rate": 3.0518e-05, "loss": 0.0318, "step": 15260 }, { "epoch": 1.3418277680140598, "grad_norm": 0.41742751002311707, "learning_rate": 3.0538e-05, "loss": 0.0339, "step": 15270 }, { "epoch": 1.342706502636204, "grad_norm": 0.5141993761062622, "learning_rate": 3.0558e-05, "loss": 0.0338, "step": 15280 }, { "epoch": 1.343585237258348, "grad_norm": 0.47573402523994446, "learning_rate": 3.0578000000000004e-05, "loss": 0.0373, "step": 15290 }, { "epoch": 1.344463971880492, "grad_norm": 0.602078914642334, "learning_rate": 3.0598e-05, "loss": 0.0376, "step": 15300 }, { "epoch": 1.3453427065026362, "grad_norm": 0.45903724431991577, "learning_rate": 3.0618000000000004e-05, "loss": 0.0375, "step": 15310 }, { "epoch": 1.3462214411247804, "grad_norm": 0.4364699423313141, "learning_rate": 3.0638e-05, "loss": 0.0335, "step": 15320 }, { "epoch": 1.3471001757469243, "grad_norm": 0.4606728255748749, "learning_rate": 3.0658000000000004e-05, "loss": 0.0363, "step": 15330 }, { "epoch": 1.3479789103690685, "grad_norm": 0.4919450879096985, "learning_rate": 3.0678e-05, "loss": 0.0353, "step": 15340 }, { "epoch": 1.3488576449912126, "grad_norm": 0.3485974073410034, "learning_rate": 3.0697999999999996e-05, "loss": 0.0338, "step": 15350 }, { "epoch": 1.3497363796133568, "grad_norm": 0.46027883887290955, "learning_rate": 3.0718e-05, "loss": 0.0387, "step": 15360 }, { "epoch": 1.350615114235501, "grad_norm": 0.5182762145996094, "learning_rate": 3.0738e-05, "loss": 0.0393, "step": 15370 }, { "epoch": 1.3514938488576451, "grad_norm": 0.4986664652824402, "learning_rate": 3.0758000000000006e-05, "loss": 0.0349, "step": 15380 }, { "epoch": 1.352372583479789, "grad_norm": 0.4052332043647766, "learning_rate": 3.0778e-05, "loss": 0.0342, "step": 15390 }, { "epoch": 1.3532513181019332, "grad_norm": 0.5446585416793823, "learning_rate": 3.0798e-05, "loss": 0.0354, "step": 15400 }, { "epoch": 1.3541300527240774, "grad_norm": 0.45764586329460144, "learning_rate": 3.0818e-05, "loss": 0.0308, "step": 15410 }, { "epoch": 1.3550087873462213, "grad_norm": 0.5031924247741699, "learning_rate": 3.0838e-05, "loss": 0.0347, "step": 15420 }, { "epoch": 1.3558875219683655, "grad_norm": 0.36113518476486206, "learning_rate": 3.0858e-05, "loss": 0.0318, "step": 15430 }, { "epoch": 1.3567662565905096, "grad_norm": 0.5056775212287903, "learning_rate": 3.0878e-05, "loss": 0.0362, "step": 15440 }, { "epoch": 1.3576449912126538, "grad_norm": 0.34840086102485657, "learning_rate": 3.0898e-05, "loss": 0.0338, "step": 15450 }, { "epoch": 1.358523725834798, "grad_norm": 0.5364977121353149, "learning_rate": 3.0918000000000005e-05, "loss": 0.0299, "step": 15460 }, { "epoch": 1.3594024604569421, "grad_norm": 0.5142046213150024, "learning_rate": 3.0938e-05, "loss": 0.0324, "step": 15470 }, { "epoch": 1.360281195079086, "grad_norm": 0.501118540763855, "learning_rate": 3.0958000000000004e-05, "loss": 0.0374, "step": 15480 }, { "epoch": 1.3611599297012302, "grad_norm": 0.3839934468269348, "learning_rate": 3.0978e-05, "loss": 0.0349, "step": 15490 }, { "epoch": 1.3620386643233744, "grad_norm": 0.4047946333885193, "learning_rate": 3.0998e-05, "loss": 0.0357, "step": 15500 }, { "epoch": 1.3629173989455183, "grad_norm": 0.6224887371063232, "learning_rate": 3.1018e-05, "loss": 0.0342, "step": 15510 }, { "epoch": 1.3637961335676625, "grad_norm": 0.4678182601928711, "learning_rate": 3.1038e-05, "loss": 0.034, "step": 15520 }, { "epoch": 1.3646748681898067, "grad_norm": 0.47321781516075134, "learning_rate": 3.1058000000000007e-05, "loss": 0.0327, "step": 15530 }, { "epoch": 1.3655536028119508, "grad_norm": 0.4784434139728546, "learning_rate": 3.1078e-05, "loss": 0.0324, "step": 15540 }, { "epoch": 1.366432337434095, "grad_norm": 0.44063800573349, "learning_rate": 3.1098e-05, "loss": 0.0342, "step": 15550 }, { "epoch": 1.3673110720562391, "grad_norm": 0.4977007508277893, "learning_rate": 3.1118e-05, "loss": 0.0352, "step": 15560 }, { "epoch": 1.368189806678383, "grad_norm": 0.6147475838661194, "learning_rate": 3.1138e-05, "loss": 0.0358, "step": 15570 }, { "epoch": 1.3690685413005272, "grad_norm": 0.7062366604804993, "learning_rate": 3.1158e-05, "loss": 0.0362, "step": 15580 }, { "epoch": 1.3699472759226714, "grad_norm": 0.463225781917572, "learning_rate": 3.1178e-05, "loss": 0.0294, "step": 15590 }, { "epoch": 1.3708260105448153, "grad_norm": 0.3200031518936157, "learning_rate": 3.1198e-05, "loss": 0.0311, "step": 15600 }, { "epoch": 1.3717047451669595, "grad_norm": 0.37633004784584045, "learning_rate": 3.1218000000000005e-05, "loss": 0.0304, "step": 15610 }, { "epoch": 1.3725834797891037, "grad_norm": 0.3924047648906708, "learning_rate": 3.1238e-05, "loss": 0.0304, "step": 15620 }, { "epoch": 1.3734622144112478, "grad_norm": 0.45266616344451904, "learning_rate": 3.1258000000000005e-05, "loss": 0.0285, "step": 15630 }, { "epoch": 1.374340949033392, "grad_norm": 0.3154696226119995, "learning_rate": 3.1278e-05, "loss": 0.0316, "step": 15640 }, { "epoch": 1.3752196836555362, "grad_norm": 0.5161053538322449, "learning_rate": 3.1298e-05, "loss": 0.0317, "step": 15650 }, { "epoch": 1.37609841827768, "grad_norm": 0.5103948712348938, "learning_rate": 3.1318e-05, "loss": 0.0321, "step": 15660 }, { "epoch": 1.3769771528998243, "grad_norm": 0.37690961360931396, "learning_rate": 3.1338000000000004e-05, "loss": 0.031, "step": 15670 }, { "epoch": 1.3778558875219684, "grad_norm": 0.4728568494319916, "learning_rate": 3.135800000000001e-05, "loss": 0.0304, "step": 15680 }, { "epoch": 1.3787346221441124, "grad_norm": 0.5346733927726746, "learning_rate": 3.1378000000000003e-05, "loss": 0.0303, "step": 15690 }, { "epoch": 1.3796133567662565, "grad_norm": 0.3771136999130249, "learning_rate": 3.1398e-05, "loss": 0.0329, "step": 15700 }, { "epoch": 1.3804920913884007, "grad_norm": 0.42604202032089233, "learning_rate": 3.1418e-05, "loss": 0.0354, "step": 15710 }, { "epoch": 1.3813708260105448, "grad_norm": 0.32131293416023254, "learning_rate": 3.1438e-05, "loss": 0.0307, "step": 15720 }, { "epoch": 1.382249560632689, "grad_norm": 0.388258695602417, "learning_rate": 3.1458e-05, "loss": 0.0312, "step": 15730 }, { "epoch": 1.3831282952548332, "grad_norm": 0.4113108813762665, "learning_rate": 3.1478e-05, "loss": 0.0308, "step": 15740 }, { "epoch": 1.384007029876977, "grad_norm": 0.42699897289276123, "learning_rate": 3.1498e-05, "loss": 0.0336, "step": 15750 }, { "epoch": 1.3848857644991213, "grad_norm": 0.3930269479751587, "learning_rate": 3.1518000000000006e-05, "loss": 0.0306, "step": 15760 }, { "epoch": 1.3857644991212654, "grad_norm": 0.3326951861381531, "learning_rate": 3.1538e-05, "loss": 0.0335, "step": 15770 }, { "epoch": 1.3866432337434094, "grad_norm": 0.5150085091590881, "learning_rate": 3.1558000000000005e-05, "loss": 0.0309, "step": 15780 }, { "epoch": 1.3875219683655535, "grad_norm": 0.5016921758651733, "learning_rate": 3.1578e-05, "loss": 0.0323, "step": 15790 }, { "epoch": 1.3884007029876977, "grad_norm": 0.45075711607933044, "learning_rate": 3.1598e-05, "loss": 0.0335, "step": 15800 }, { "epoch": 1.3892794376098418, "grad_norm": 0.4692087173461914, "learning_rate": 3.1618e-05, "loss": 0.0308, "step": 15810 }, { "epoch": 1.390158172231986, "grad_norm": 0.45531055331230164, "learning_rate": 3.1638e-05, "loss": 0.032, "step": 15820 }, { "epoch": 1.3910369068541302, "grad_norm": 0.5617169737815857, "learning_rate": 3.1658e-05, "loss": 0.0332, "step": 15830 }, { "epoch": 1.3919156414762741, "grad_norm": 0.4111024737358093, "learning_rate": 3.1678000000000004e-05, "loss": 0.0321, "step": 15840 }, { "epoch": 1.3927943760984183, "grad_norm": 0.37415584921836853, "learning_rate": 3.1698e-05, "loss": 0.0334, "step": 15850 }, { "epoch": 1.3936731107205624, "grad_norm": 0.5614737868309021, "learning_rate": 3.1718000000000004e-05, "loss": 0.0351, "step": 15860 }, { "epoch": 1.3945518453427064, "grad_norm": 0.48443683981895447, "learning_rate": 3.1738e-05, "loss": 0.0325, "step": 15870 }, { "epoch": 1.3954305799648505, "grad_norm": 0.6002009510993958, "learning_rate": 3.1757999999999996e-05, "loss": 0.0329, "step": 15880 }, { "epoch": 1.3963093145869947, "grad_norm": 0.5311535596847534, "learning_rate": 3.1778e-05, "loss": 0.036, "step": 15890 }, { "epoch": 1.3971880492091389, "grad_norm": 0.5063393712043762, "learning_rate": 3.1798e-05, "loss": 0.0322, "step": 15900 }, { "epoch": 1.398066783831283, "grad_norm": 0.472586065530777, "learning_rate": 3.1818000000000006e-05, "loss": 0.0323, "step": 15910 }, { "epoch": 1.3989455184534272, "grad_norm": 0.5953604578971863, "learning_rate": 3.1838e-05, "loss": 0.0324, "step": 15920 }, { "epoch": 1.3998242530755711, "grad_norm": 0.3617658317089081, "learning_rate": 3.1858e-05, "loss": 0.0344, "step": 15930 }, { "epoch": 1.4007029876977153, "grad_norm": 0.5993531942367554, "learning_rate": 3.1878e-05, "loss": 0.0344, "step": 15940 }, { "epoch": 1.4015817223198594, "grad_norm": 0.678981602191925, "learning_rate": 3.1898e-05, "loss": 0.0334, "step": 15950 }, { "epoch": 1.4024604569420034, "grad_norm": 0.42375651001930237, "learning_rate": 3.1918e-05, "loss": 0.032, "step": 15960 }, { "epoch": 1.4033391915641475, "grad_norm": 0.5370731949806213, "learning_rate": 3.1938e-05, "loss": 0.0341, "step": 15970 }, { "epoch": 1.4042179261862917, "grad_norm": 0.7220519781112671, "learning_rate": 3.1958e-05, "loss": 0.035, "step": 15980 }, { "epoch": 1.4050966608084359, "grad_norm": 0.41921404004096985, "learning_rate": 3.1978000000000005e-05, "loss": 0.0328, "step": 15990 }, { "epoch": 1.40597539543058, "grad_norm": 0.4645719826221466, "learning_rate": 3.1998e-05, "loss": 0.0337, "step": 16000 }, { "epoch": 1.4068541300527242, "grad_norm": 0.5637190937995911, "learning_rate": 3.2018000000000004e-05, "loss": 0.031, "step": 16010 }, { "epoch": 1.4077328646748681, "grad_norm": 0.3942726254463196, "learning_rate": 3.2038e-05, "loss": 0.0331, "step": 16020 }, { "epoch": 1.4086115992970123, "grad_norm": 0.9019116163253784, "learning_rate": 3.2058e-05, "loss": 0.0303, "step": 16030 }, { "epoch": 1.4094903339191565, "grad_norm": 0.3753403425216675, "learning_rate": 3.2078e-05, "loss": 0.03, "step": 16040 }, { "epoch": 1.4103690685413004, "grad_norm": 0.8036108613014221, "learning_rate": 3.2098e-05, "loss": 0.032, "step": 16050 }, { "epoch": 1.4112478031634446, "grad_norm": 0.6036341190338135, "learning_rate": 3.2118000000000007e-05, "loss": 0.0329, "step": 16060 }, { "epoch": 1.4121265377855887, "grad_norm": 0.6228379011154175, "learning_rate": 3.2138e-05, "loss": 0.0334, "step": 16070 }, { "epoch": 1.4130052724077329, "grad_norm": 0.34382137656211853, "learning_rate": 3.2158e-05, "loss": 0.0343, "step": 16080 }, { "epoch": 1.413884007029877, "grad_norm": 0.5795236229896545, "learning_rate": 3.2178e-05, "loss": 0.0369, "step": 16090 }, { "epoch": 1.4147627416520212, "grad_norm": 0.4919991195201874, "learning_rate": 3.2198e-05, "loss": 0.0306, "step": 16100 }, { "epoch": 1.4156414762741651, "grad_norm": 0.39509284496307373, "learning_rate": 3.2218e-05, "loss": 0.0341, "step": 16110 }, { "epoch": 1.4165202108963093, "grad_norm": 0.41017696261405945, "learning_rate": 3.2238e-05, "loss": 0.0372, "step": 16120 }, { "epoch": 1.4173989455184535, "grad_norm": 0.3784236013889313, "learning_rate": 3.2258e-05, "loss": 0.0348, "step": 16130 }, { "epoch": 1.4182776801405974, "grad_norm": 0.28759902715682983, "learning_rate": 3.2278000000000005e-05, "loss": 0.0337, "step": 16140 }, { "epoch": 1.4191564147627416, "grad_norm": 0.42520850896835327, "learning_rate": 3.2298e-05, "loss": 0.0315, "step": 16150 }, { "epoch": 1.4200351493848857, "grad_norm": 0.4009518623352051, "learning_rate": 3.2318000000000005e-05, "loss": 0.032, "step": 16160 }, { "epoch": 1.42091388400703, "grad_norm": 0.4249320328235626, "learning_rate": 3.2338e-05, "loss": 0.0318, "step": 16170 }, { "epoch": 1.421792618629174, "grad_norm": 0.4736495316028595, "learning_rate": 3.2358e-05, "loss": 0.0311, "step": 16180 }, { "epoch": 1.4226713532513182, "grad_norm": 0.38267478346824646, "learning_rate": 3.2378e-05, "loss": 0.0316, "step": 16190 }, { "epoch": 1.4235500878734622, "grad_norm": 0.2924399971961975, "learning_rate": 3.2398000000000004e-05, "loss": 0.0332, "step": 16200 }, { "epoch": 1.4244288224956063, "grad_norm": 0.42754697799682617, "learning_rate": 3.241800000000001e-05, "loss": 0.0318, "step": 16210 }, { "epoch": 1.4253075571177505, "grad_norm": 0.39129143953323364, "learning_rate": 3.2438000000000004e-05, "loss": 0.0345, "step": 16220 }, { "epoch": 1.4261862917398944, "grad_norm": 0.9636648893356323, "learning_rate": 3.2458e-05, "loss": 0.0317, "step": 16230 }, { "epoch": 1.4270650263620386, "grad_norm": 0.4188474118709564, "learning_rate": 3.2478e-05, "loss": 0.0308, "step": 16240 }, { "epoch": 1.4279437609841827, "grad_norm": 0.32051369547843933, "learning_rate": 3.2498e-05, "loss": 0.0354, "step": 16250 }, { "epoch": 1.428822495606327, "grad_norm": 0.49614110589027405, "learning_rate": 3.2518e-05, "loss": 0.0333, "step": 16260 }, { "epoch": 1.429701230228471, "grad_norm": 0.3594072461128235, "learning_rate": 3.2538e-05, "loss": 0.0344, "step": 16270 }, { "epoch": 1.4305799648506152, "grad_norm": 0.37504783272743225, "learning_rate": 3.2558e-05, "loss": 0.0329, "step": 16280 }, { "epoch": 1.4314586994727592, "grad_norm": 0.48637551069259644, "learning_rate": 3.2578000000000006e-05, "loss": 0.0357, "step": 16290 }, { "epoch": 1.4323374340949033, "grad_norm": 0.3975578248500824, "learning_rate": 3.2598e-05, "loss": 0.0339, "step": 16300 }, { "epoch": 1.4332161687170475, "grad_norm": 0.5625788569450378, "learning_rate": 3.2618000000000005e-05, "loss": 0.031, "step": 16310 }, { "epoch": 1.4340949033391914, "grad_norm": 0.48675912618637085, "learning_rate": 3.2638e-05, "loss": 0.0336, "step": 16320 }, { "epoch": 1.4349736379613356, "grad_norm": 0.566201388835907, "learning_rate": 3.2658e-05, "loss": 0.0344, "step": 16330 }, { "epoch": 1.4358523725834798, "grad_norm": 0.5417633056640625, "learning_rate": 3.2678e-05, "loss": 0.0366, "step": 16340 }, { "epoch": 1.436731107205624, "grad_norm": 0.6730843186378479, "learning_rate": 3.2698e-05, "loss": 0.0347, "step": 16350 }, { "epoch": 1.437609841827768, "grad_norm": 0.42798376083374023, "learning_rate": 3.2718e-05, "loss": 0.0373, "step": 16360 }, { "epoch": 1.4384885764499122, "grad_norm": 0.3132336735725403, "learning_rate": 3.2738000000000004e-05, "loss": 0.0362, "step": 16370 }, { "epoch": 1.4393673110720562, "grad_norm": 0.36344870924949646, "learning_rate": 3.2758e-05, "loss": 0.0348, "step": 16380 }, { "epoch": 1.4402460456942003, "grad_norm": 0.46098822355270386, "learning_rate": 3.2778000000000004e-05, "loss": 0.0374, "step": 16390 }, { "epoch": 1.4411247803163445, "grad_norm": 0.45711565017700195, "learning_rate": 3.2798e-05, "loss": 0.0335, "step": 16400 }, { "epoch": 1.4420035149384884, "grad_norm": 0.36659324169158936, "learning_rate": 3.2818e-05, "loss": 0.0323, "step": 16410 }, { "epoch": 1.4428822495606326, "grad_norm": 0.31711089611053467, "learning_rate": 3.2838e-05, "loss": 0.0314, "step": 16420 }, { "epoch": 1.4437609841827768, "grad_norm": 0.47796231508255005, "learning_rate": 3.2858e-05, "loss": 0.0342, "step": 16430 }, { "epoch": 1.444639718804921, "grad_norm": 0.2767942547798157, "learning_rate": 3.2878000000000006e-05, "loss": 0.031, "step": 16440 }, { "epoch": 1.445518453427065, "grad_norm": 0.2625356912612915, "learning_rate": 3.2898e-05, "loss": 0.033, "step": 16450 }, { "epoch": 1.4463971880492092, "grad_norm": 0.26889482140541077, "learning_rate": 3.2918e-05, "loss": 0.0317, "step": 16460 }, { "epoch": 1.4472759226713532, "grad_norm": 1.0923831462860107, "learning_rate": 3.2938e-05, "loss": 0.0322, "step": 16470 }, { "epoch": 1.4481546572934973, "grad_norm": 0.5004123449325562, "learning_rate": 3.2958e-05, "loss": 0.0411, "step": 16480 }, { "epoch": 1.4490333919156415, "grad_norm": 0.554145336151123, "learning_rate": 3.2978e-05, "loss": 0.0406, "step": 16490 }, { "epoch": 1.4499121265377855, "grad_norm": 0.5339035987854004, "learning_rate": 3.2998e-05, "loss": 0.0341, "step": 16500 }, { "epoch": 1.4507908611599296, "grad_norm": 0.3869035542011261, "learning_rate": 3.3018e-05, "loss": 0.0359, "step": 16510 }, { "epoch": 1.4516695957820738, "grad_norm": 0.47544848918914795, "learning_rate": 3.3038000000000005e-05, "loss": 0.0343, "step": 16520 }, { "epoch": 1.452548330404218, "grad_norm": 0.6894603371620178, "learning_rate": 3.3058e-05, "loss": 0.0324, "step": 16530 }, { "epoch": 1.453427065026362, "grad_norm": 0.4221839904785156, "learning_rate": 3.3078000000000004e-05, "loss": 0.0308, "step": 16540 }, { "epoch": 1.4543057996485063, "grad_norm": 0.28816232085227966, "learning_rate": 3.3098e-05, "loss": 0.0306, "step": 16550 }, { "epoch": 1.4551845342706502, "grad_norm": 0.32212528586387634, "learning_rate": 3.3118e-05, "loss": 0.0305, "step": 16560 }, { "epoch": 1.4560632688927944, "grad_norm": 0.2801029086112976, "learning_rate": 3.3138e-05, "loss": 0.031, "step": 16570 }, { "epoch": 1.4569420035149385, "grad_norm": 0.25634753704071045, "learning_rate": 3.3158000000000003e-05, "loss": 0.0313, "step": 16580 }, { "epoch": 1.4578207381370827, "grad_norm": 0.33657583594322205, "learning_rate": 3.3178000000000007e-05, "loss": 0.0309, "step": 16590 }, { "epoch": 1.4586994727592266, "grad_norm": 0.31468451023101807, "learning_rate": 3.3198e-05, "loss": 0.0339, "step": 16600 }, { "epoch": 1.4595782073813708, "grad_norm": 0.49141189455986023, "learning_rate": 3.3218e-05, "loss": 0.0301, "step": 16610 }, { "epoch": 1.460456942003515, "grad_norm": 0.33522623777389526, "learning_rate": 3.3238e-05, "loss": 0.0296, "step": 16620 }, { "epoch": 1.461335676625659, "grad_norm": 0.30419984459877014, "learning_rate": 3.3258e-05, "loss": 0.0298, "step": 16630 }, { "epoch": 1.4622144112478033, "grad_norm": 0.529350757598877, "learning_rate": 3.3278e-05, "loss": 0.0334, "step": 16640 }, { "epoch": 1.4630931458699472, "grad_norm": 0.4603848159313202, "learning_rate": 3.3298e-05, "loss": 0.029, "step": 16650 }, { "epoch": 1.4639718804920914, "grad_norm": 0.4120873808860779, "learning_rate": 3.3318e-05, "loss": 0.0296, "step": 16660 }, { "epoch": 1.4648506151142355, "grad_norm": 0.3772662580013275, "learning_rate": 3.3338000000000005e-05, "loss": 0.0299, "step": 16670 }, { "epoch": 1.4657293497363797, "grad_norm": 0.37952518463134766, "learning_rate": 3.3358e-05, "loss": 0.0292, "step": 16680 }, { "epoch": 1.4666080843585236, "grad_norm": 0.4039608836174011, "learning_rate": 3.3378000000000005e-05, "loss": 0.0331, "step": 16690 }, { "epoch": 1.4674868189806678, "grad_norm": 0.33011436462402344, "learning_rate": 3.3398e-05, "loss": 0.0306, "step": 16700 }, { "epoch": 1.468365553602812, "grad_norm": 0.330858439207077, "learning_rate": 3.3418e-05, "loss": 0.0274, "step": 16710 }, { "epoch": 1.4692442882249561, "grad_norm": 0.3586142063140869, "learning_rate": 3.3438e-05, "loss": 0.0288, "step": 16720 }, { "epoch": 1.4701230228471003, "grad_norm": 0.39356735348701477, "learning_rate": 3.3458e-05, "loss": 0.0286, "step": 16730 }, { "epoch": 1.4710017574692442, "grad_norm": 0.3560757040977478, "learning_rate": 3.347800000000001e-05, "loss": 0.0278, "step": 16740 }, { "epoch": 1.4718804920913884, "grad_norm": 0.39722543954849243, "learning_rate": 3.3498000000000004e-05, "loss": 0.0314, "step": 16750 }, { "epoch": 1.4727592267135325, "grad_norm": 0.43690699338912964, "learning_rate": 3.3518e-05, "loss": 0.029, "step": 16760 }, { "epoch": 1.4736379613356767, "grad_norm": 0.33053165674209595, "learning_rate": 3.3538e-05, "loss": 0.0278, "step": 16770 }, { "epoch": 1.4745166959578206, "grad_norm": 0.41223272681236267, "learning_rate": 3.3558e-05, "loss": 0.0283, "step": 16780 }, { "epoch": 1.4753954305799648, "grad_norm": 0.2889290750026703, "learning_rate": 3.3578e-05, "loss": 0.0275, "step": 16790 }, { "epoch": 1.476274165202109, "grad_norm": 0.3082895576953888, "learning_rate": 3.3598e-05, "loss": 0.0305, "step": 16800 }, { "epoch": 1.4771528998242531, "grad_norm": 0.31355395913124084, "learning_rate": 3.3618e-05, "loss": 0.0299, "step": 16810 }, { "epoch": 1.4780316344463973, "grad_norm": 0.4386684000492096, "learning_rate": 3.3638000000000006e-05, "loss": 0.028, "step": 16820 }, { "epoch": 1.4789103690685412, "grad_norm": 0.35013777017593384, "learning_rate": 3.3658e-05, "loss": 0.0312, "step": 16830 }, { "epoch": 1.4797891036906854, "grad_norm": 0.39046117663383484, "learning_rate": 3.3678000000000005e-05, "loss": 0.0288, "step": 16840 }, { "epoch": 1.4806678383128296, "grad_norm": 0.39903926849365234, "learning_rate": 3.3698e-05, "loss": 0.0277, "step": 16850 }, { "epoch": 1.4815465729349737, "grad_norm": 0.3434886038303375, "learning_rate": 3.3718e-05, "loss": 0.0281, "step": 16860 }, { "epoch": 1.4824253075571177, "grad_norm": 0.27506107091903687, "learning_rate": 3.3738e-05, "loss": 0.0282, "step": 16870 }, { "epoch": 1.4833040421792618, "grad_norm": 0.5269370675086975, "learning_rate": 3.3758e-05, "loss": 0.028, "step": 16880 }, { "epoch": 1.484182776801406, "grad_norm": 0.40288087725639343, "learning_rate": 3.3778e-05, "loss": 0.0282, "step": 16890 }, { "epoch": 1.4850615114235501, "grad_norm": 0.5256991982460022, "learning_rate": 3.3798000000000004e-05, "loss": 0.0302, "step": 16900 }, { "epoch": 1.4859402460456943, "grad_norm": 0.44673770666122437, "learning_rate": 3.3818e-05, "loss": 0.0267, "step": 16910 }, { "epoch": 1.4868189806678382, "grad_norm": 0.3610461354255676, "learning_rate": 3.3838000000000004e-05, "loss": 0.0286, "step": 16920 }, { "epoch": 1.4876977152899824, "grad_norm": 0.5344422459602356, "learning_rate": 3.3858e-05, "loss": 0.0288, "step": 16930 }, { "epoch": 1.4885764499121266, "grad_norm": 0.26839151978492737, "learning_rate": 3.3878e-05, "loss": 0.0257, "step": 16940 }, { "epoch": 1.4894551845342707, "grad_norm": 0.24629580974578857, "learning_rate": 3.3898e-05, "loss": 0.0266, "step": 16950 }, { "epoch": 1.4903339191564147, "grad_norm": 0.2885098457336426, "learning_rate": 3.3918e-05, "loss": 0.0263, "step": 16960 }, { "epoch": 1.4912126537785588, "grad_norm": 0.30727308988571167, "learning_rate": 3.3938000000000006e-05, "loss": 0.0255, "step": 16970 }, { "epoch": 1.492091388400703, "grad_norm": 0.2697884440422058, "learning_rate": 3.3958e-05, "loss": 0.03, "step": 16980 }, { "epoch": 1.4929701230228472, "grad_norm": 0.1924544721841812, "learning_rate": 3.3978000000000006e-05, "loss": 0.0287, "step": 16990 }, { "epoch": 1.4938488576449913, "grad_norm": 0.4509918987751007, "learning_rate": 3.3998e-05, "loss": 0.0287, "step": 17000 }, { "epoch": 1.4947275922671353, "grad_norm": 0.34734034538269043, "learning_rate": 3.4018e-05, "loss": 0.0276, "step": 17010 }, { "epoch": 1.4956063268892794, "grad_norm": 0.3584314286708832, "learning_rate": 3.4038e-05, "loss": 0.0308, "step": 17020 }, { "epoch": 1.4964850615114236, "grad_norm": 0.2986353039741516, "learning_rate": 3.4058e-05, "loss": 0.0267, "step": 17030 }, { "epoch": 1.4973637961335677, "grad_norm": 0.3148731291294098, "learning_rate": 3.4078e-05, "loss": 0.0289, "step": 17040 }, { "epoch": 1.4982425307557117, "grad_norm": 0.27651792764663696, "learning_rate": 3.4098000000000005e-05, "loss": 0.0257, "step": 17050 }, { "epoch": 1.4991212653778558, "grad_norm": 0.45059439539909363, "learning_rate": 3.4118e-05, "loss": 0.0296, "step": 17060 }, { "epoch": 1.5, "grad_norm": 0.35662299394607544, "learning_rate": 3.4138000000000004e-05, "loss": 0.0253, "step": 17070 }, { "epoch": 1.5008787346221442, "grad_norm": 0.39294853806495667, "learning_rate": 3.4158e-05, "loss": 0.029, "step": 17080 }, { "epoch": 1.5017574692442883, "grad_norm": 0.4299938976764679, "learning_rate": 3.4178e-05, "loss": 0.0289, "step": 17090 }, { "epoch": 1.5026362038664325, "grad_norm": 0.5488709211349487, "learning_rate": 3.4198e-05, "loss": 0.0286, "step": 17100 }, { "epoch": 1.5035149384885764, "grad_norm": 0.2633499205112457, "learning_rate": 3.4218000000000003e-05, "loss": 0.0306, "step": 17110 }, { "epoch": 1.5043936731107206, "grad_norm": 0.4203757047653198, "learning_rate": 3.4238000000000007e-05, "loss": 0.0277, "step": 17120 }, { "epoch": 1.5052724077328645, "grad_norm": 0.27922582626342773, "learning_rate": 3.4258e-05, "loss": 0.0289, "step": 17130 }, { "epoch": 1.5061511423550087, "grad_norm": 0.5536366701126099, "learning_rate": 3.4278e-05, "loss": 0.0289, "step": 17140 }, { "epoch": 1.5070298769771528, "grad_norm": 0.5592535734176636, "learning_rate": 3.4298e-05, "loss": 0.0276, "step": 17150 }, { "epoch": 1.507908611599297, "grad_norm": 0.38431981205940247, "learning_rate": 3.4318e-05, "loss": 0.0293, "step": 17160 }, { "epoch": 1.5087873462214412, "grad_norm": 0.37994110584259033, "learning_rate": 3.4338e-05, "loss": 0.0282, "step": 17170 }, { "epoch": 1.5096660808435853, "grad_norm": 0.4396600127220154, "learning_rate": 3.4358e-05, "loss": 0.0299, "step": 17180 }, { "epoch": 1.5105448154657295, "grad_norm": 0.5814457535743713, "learning_rate": 3.4378e-05, "loss": 0.0278, "step": 17190 }, { "epoch": 1.5114235500878734, "grad_norm": 0.5311217308044434, "learning_rate": 3.4398000000000005e-05, "loss": 0.0264, "step": 17200 }, { "epoch": 1.5123022847100176, "grad_norm": 0.4885469079017639, "learning_rate": 3.4418e-05, "loss": 0.027, "step": 17210 }, { "epoch": 1.5131810193321615, "grad_norm": 0.4479878842830658, "learning_rate": 3.4438000000000005e-05, "loss": 0.0295, "step": 17220 }, { "epoch": 1.5140597539543057, "grad_norm": 0.5072970390319824, "learning_rate": 3.4458e-05, "loss": 0.028, "step": 17230 }, { "epoch": 1.5149384885764499, "grad_norm": 0.4594135582447052, "learning_rate": 3.4478e-05, "loss": 0.0274, "step": 17240 }, { "epoch": 1.515817223198594, "grad_norm": 0.36413681507110596, "learning_rate": 3.4498e-05, "loss": 0.0275, "step": 17250 }, { "epoch": 1.5166959578207382, "grad_norm": 0.3280104696750641, "learning_rate": 3.4518e-05, "loss": 0.0281, "step": 17260 }, { "epoch": 1.5175746924428823, "grad_norm": 0.264160692691803, "learning_rate": 3.4538e-05, "loss": 0.0256, "step": 17270 }, { "epoch": 1.5184534270650265, "grad_norm": 0.5538557171821594, "learning_rate": 3.4558000000000004e-05, "loss": 0.0275, "step": 17280 }, { "epoch": 1.5193321616871704, "grad_norm": 0.3335443437099457, "learning_rate": 3.4578e-05, "loss": 0.027, "step": 17290 }, { "epoch": 1.5202108963093146, "grad_norm": 0.40634748339653015, "learning_rate": 3.4598e-05, "loss": 0.0283, "step": 17300 }, { "epoch": 1.5210896309314585, "grad_norm": 0.465692937374115, "learning_rate": 3.4618e-05, "loss": 0.0299, "step": 17310 }, { "epoch": 1.5219683655536027, "grad_norm": 0.4428754150867462, "learning_rate": 3.4638e-05, "loss": 0.0291, "step": 17320 }, { "epoch": 1.5228471001757469, "grad_norm": 0.36537113785743713, "learning_rate": 3.4658e-05, "loss": 0.0286, "step": 17330 }, { "epoch": 1.523725834797891, "grad_norm": 0.2686469852924347, "learning_rate": 3.4678e-05, "loss": 0.0262, "step": 17340 }, { "epoch": 1.5246045694200352, "grad_norm": 0.3861306607723236, "learning_rate": 3.4698000000000006e-05, "loss": 0.0265, "step": 17350 }, { "epoch": 1.5254833040421794, "grad_norm": 0.3113792836666107, "learning_rate": 3.4718e-05, "loss": 0.0267, "step": 17360 }, { "epoch": 1.5263620386643235, "grad_norm": 0.26025307178497314, "learning_rate": 3.4738000000000005e-05, "loss": 0.027, "step": 17370 }, { "epoch": 1.5272407732864675, "grad_norm": 0.6024535298347473, "learning_rate": 3.4758e-05, "loss": 0.028, "step": 17380 }, { "epoch": 1.5281195079086116, "grad_norm": 0.4874661862850189, "learning_rate": 3.4778e-05, "loss": 0.0286, "step": 17390 }, { "epoch": 1.5289982425307556, "grad_norm": 0.35125112533569336, "learning_rate": 3.4798e-05, "loss": 0.0306, "step": 17400 }, { "epoch": 1.5298769771528997, "grad_norm": 0.4087780714035034, "learning_rate": 3.4818e-05, "loss": 0.0263, "step": 17410 }, { "epoch": 1.5307557117750439, "grad_norm": 0.3164629340171814, "learning_rate": 3.4838e-05, "loss": 0.0277, "step": 17420 }, { "epoch": 1.531634446397188, "grad_norm": 0.36570289731025696, "learning_rate": 3.4858000000000004e-05, "loss": 0.027, "step": 17430 }, { "epoch": 1.5325131810193322, "grad_norm": 0.2339581996202469, "learning_rate": 3.4878e-05, "loss": 0.0251, "step": 17440 }, { "epoch": 1.5333919156414764, "grad_norm": 0.31251534819602966, "learning_rate": 3.4898000000000004e-05, "loss": 0.0262, "step": 17450 }, { "epoch": 1.5342706502636205, "grad_norm": 0.4867303669452667, "learning_rate": 3.4918e-05, "loss": 0.0291, "step": 17460 }, { "epoch": 1.5351493848857645, "grad_norm": 0.37176427245140076, "learning_rate": 3.4938e-05, "loss": 0.0284, "step": 17470 }, { "epoch": 1.5360281195079086, "grad_norm": 0.40921953320503235, "learning_rate": 3.4958e-05, "loss": 0.0282, "step": 17480 }, { "epoch": 1.5369068541300526, "grad_norm": 0.3902413547039032, "learning_rate": 3.4978e-05, "loss": 0.0283, "step": 17490 }, { "epoch": 1.5377855887521967, "grad_norm": 0.3384402394294739, "learning_rate": 3.4998000000000006e-05, "loss": 0.0258, "step": 17500 }, { "epoch": 1.538664323374341, "grad_norm": 0.3331215977668762, "learning_rate": 3.5018e-05, "loss": 0.0256, "step": 17510 }, { "epoch": 1.539543057996485, "grad_norm": 0.4437127113342285, "learning_rate": 3.5038000000000006e-05, "loss": 0.0292, "step": 17520 }, { "epoch": 1.5404217926186292, "grad_norm": 0.2931359112262726, "learning_rate": 3.5058e-05, "loss": 0.0288, "step": 17530 }, { "epoch": 1.5413005272407734, "grad_norm": 0.4418531060218811, "learning_rate": 3.5078e-05, "loss": 0.0282, "step": 17540 }, { "epoch": 1.5421792618629175, "grad_norm": 0.4532262682914734, "learning_rate": 3.5098e-05, "loss": 0.0274, "step": 17550 }, { "epoch": 1.5430579964850615, "grad_norm": 0.39529597759246826, "learning_rate": 3.5118e-05, "loss": 0.0288, "step": 17560 }, { "epoch": 1.5439367311072056, "grad_norm": 0.45109742879867554, "learning_rate": 3.5138e-05, "loss": 0.0293, "step": 17570 }, { "epoch": 1.5448154657293496, "grad_norm": 0.35711440443992615, "learning_rate": 3.5158000000000005e-05, "loss": 0.0285, "step": 17580 }, { "epoch": 1.5456942003514937, "grad_norm": 0.3920532464981079, "learning_rate": 3.5178e-05, "loss": 0.0282, "step": 17590 }, { "epoch": 1.546572934973638, "grad_norm": 0.35828927159309387, "learning_rate": 3.5198000000000004e-05, "loss": 0.0286, "step": 17600 }, { "epoch": 1.547451669595782, "grad_norm": 0.2775271236896515, "learning_rate": 3.5218e-05, "loss": 0.0256, "step": 17610 }, { "epoch": 1.5483304042179262, "grad_norm": 0.2846187651157379, "learning_rate": 3.5238000000000004e-05, "loss": 0.0249, "step": 17620 }, { "epoch": 1.5492091388400704, "grad_norm": 0.4410446882247925, "learning_rate": 3.5258e-05, "loss": 0.0301, "step": 17630 }, { "epoch": 1.5500878734622145, "grad_norm": 0.5103135108947754, "learning_rate": 3.5278e-05, "loss": 0.0283, "step": 17640 }, { "epoch": 1.5509666080843585, "grad_norm": 0.34004703164100647, "learning_rate": 3.529800000000001e-05, "loss": 0.0287, "step": 17650 }, { "epoch": 1.5518453427065027, "grad_norm": 0.37869665026664734, "learning_rate": 3.5318e-05, "loss": 0.0296, "step": 17660 }, { "epoch": 1.5527240773286466, "grad_norm": 0.353164941072464, "learning_rate": 3.5338000000000006e-05, "loss": 0.0251, "step": 17670 }, { "epoch": 1.5536028119507908, "grad_norm": 0.24357189238071442, "learning_rate": 3.5358e-05, "loss": 0.0274, "step": 17680 }, { "epoch": 1.554481546572935, "grad_norm": 0.3418588638305664, "learning_rate": 3.5378e-05, "loss": 0.0302, "step": 17690 }, { "epoch": 1.555360281195079, "grad_norm": 0.3373928666114807, "learning_rate": 3.5398e-05, "loss": 0.0276, "step": 17700 }, { "epoch": 1.5562390158172232, "grad_norm": 0.29208049178123474, "learning_rate": 3.5418e-05, "loss": 0.0264, "step": 17710 }, { "epoch": 1.5571177504393674, "grad_norm": 0.2879009246826172, "learning_rate": 3.5438e-05, "loss": 0.0267, "step": 17720 }, { "epoch": 1.5579964850615116, "grad_norm": 0.3376672565937042, "learning_rate": 3.5458000000000005e-05, "loss": 0.0268, "step": 17730 }, { "epoch": 1.5588752196836555, "grad_norm": 0.29250842332839966, "learning_rate": 3.5478e-05, "loss": 0.0263, "step": 17740 }, { "epoch": 1.5597539543057997, "grad_norm": 0.26206573843955994, "learning_rate": 3.5498000000000005e-05, "loss": 0.0264, "step": 17750 }, { "epoch": 1.5606326889279436, "grad_norm": 0.2627841532230377, "learning_rate": 3.5518e-05, "loss": 0.0263, "step": 17760 }, { "epoch": 1.5615114235500878, "grad_norm": 0.22968581318855286, "learning_rate": 3.5538e-05, "loss": 0.0275, "step": 17770 }, { "epoch": 1.562390158172232, "grad_norm": 0.2318480759859085, "learning_rate": 3.5558e-05, "loss": 0.0285, "step": 17780 }, { "epoch": 1.563268892794376, "grad_norm": 0.2733573913574219, "learning_rate": 3.5578e-05, "loss": 0.0277, "step": 17790 }, { "epoch": 1.5641476274165202, "grad_norm": 0.34545087814331055, "learning_rate": 3.5598e-05, "loss": 0.0264, "step": 17800 }, { "epoch": 1.5650263620386644, "grad_norm": 0.3566303253173828, "learning_rate": 3.5618000000000004e-05, "loss": 0.027, "step": 17810 }, { "epoch": 1.5659050966608086, "grad_norm": 0.4635716676712036, "learning_rate": 3.5638e-05, "loss": 0.0289, "step": 17820 }, { "epoch": 1.5667838312829525, "grad_norm": 0.32288724184036255, "learning_rate": 3.5658e-05, "loss": 0.0255, "step": 17830 }, { "epoch": 1.5676625659050967, "grad_norm": 0.3132362961769104, "learning_rate": 3.5678e-05, "loss": 0.0275, "step": 17840 }, { "epoch": 1.5685413005272406, "grad_norm": 0.3818802535533905, "learning_rate": 3.5698e-05, "loss": 0.0292, "step": 17850 }, { "epoch": 1.5694200351493848, "grad_norm": 0.362228661775589, "learning_rate": 3.5718e-05, "loss": 0.0256, "step": 17860 }, { "epoch": 1.570298769771529, "grad_norm": 0.42782506346702576, "learning_rate": 3.5738e-05, "loss": 0.0273, "step": 17870 }, { "epoch": 1.571177504393673, "grad_norm": 0.2495250105857849, "learning_rate": 3.5758000000000006e-05, "loss": 0.0256, "step": 17880 }, { "epoch": 1.5720562390158173, "grad_norm": 0.2989693880081177, "learning_rate": 3.5778e-05, "loss": 0.0271, "step": 17890 }, { "epoch": 1.5729349736379614, "grad_norm": 0.30611351132392883, "learning_rate": 3.5798000000000005e-05, "loss": 0.0253, "step": 17900 }, { "epoch": 1.5738137082601056, "grad_norm": 0.30499207973480225, "learning_rate": 3.5818e-05, "loss": 0.0269, "step": 17910 }, { "epoch": 1.5746924428822495, "grad_norm": 0.3863859474658966, "learning_rate": 3.5838e-05, "loss": 0.0281, "step": 17920 }, { "epoch": 1.5755711775043937, "grad_norm": 0.3046998977661133, "learning_rate": 3.5858e-05, "loss": 0.0278, "step": 17930 }, { "epoch": 1.5764499121265376, "grad_norm": 0.39119455218315125, "learning_rate": 3.5878e-05, "loss": 0.0271, "step": 17940 }, { "epoch": 1.5773286467486818, "grad_norm": 0.4929792582988739, "learning_rate": 3.5898e-05, "loss": 0.0263, "step": 17950 }, { "epoch": 1.578207381370826, "grad_norm": 0.381782591342926, "learning_rate": 3.5918000000000004e-05, "loss": 0.0262, "step": 17960 }, { "epoch": 1.57908611599297, "grad_norm": 0.4035857021808624, "learning_rate": 3.5938e-05, "loss": 0.0281, "step": 17970 }, { "epoch": 1.5799648506151143, "grad_norm": 0.4553943872451782, "learning_rate": 3.5958000000000004e-05, "loss": 0.0243, "step": 17980 }, { "epoch": 1.5808435852372584, "grad_norm": 0.40477609634399414, "learning_rate": 3.5978e-05, "loss": 0.0258, "step": 17990 }, { "epoch": 1.5817223198594026, "grad_norm": 0.3404220640659332, "learning_rate": 3.5998e-05, "loss": 0.0276, "step": 18000 }, { "epoch": 1.5826010544815465, "grad_norm": 0.4867801368236542, "learning_rate": 3.6018e-05, "loss": 0.0278, "step": 18010 }, { "epoch": 1.5834797891036907, "grad_norm": 0.4373556971549988, "learning_rate": 3.6038e-05, "loss": 0.0265, "step": 18020 }, { "epoch": 1.5843585237258346, "grad_norm": 0.3678586483001709, "learning_rate": 3.6058000000000006e-05, "loss": 0.0253, "step": 18030 }, { "epoch": 1.5852372583479788, "grad_norm": 0.22998802363872528, "learning_rate": 3.6078e-05, "loss": 0.028, "step": 18040 }, { "epoch": 1.586115992970123, "grad_norm": 0.21984513103961945, "learning_rate": 3.6098000000000006e-05, "loss": 0.0273, "step": 18050 }, { "epoch": 1.5869947275922671, "grad_norm": 0.5300821661949158, "learning_rate": 3.6118e-05, "loss": 0.0258, "step": 18060 }, { "epoch": 1.5878734622144113, "grad_norm": 0.4388774335384369, "learning_rate": 3.6138e-05, "loss": 0.0273, "step": 18070 }, { "epoch": 1.5887521968365554, "grad_norm": 0.5105413794517517, "learning_rate": 3.6158e-05, "loss": 0.0271, "step": 18080 }, { "epoch": 1.5896309314586996, "grad_norm": 0.3928118944168091, "learning_rate": 3.6178e-05, "loss": 0.0283, "step": 18090 }, { "epoch": 1.5905096660808435, "grad_norm": 0.3409395217895508, "learning_rate": 3.6198e-05, "loss": 0.0275, "step": 18100 }, { "epoch": 1.5913884007029877, "grad_norm": 0.263976514339447, "learning_rate": 3.6218000000000005e-05, "loss": 0.0266, "step": 18110 }, { "epoch": 1.5922671353251316, "grad_norm": 0.45610591769218445, "learning_rate": 3.6238e-05, "loss": 0.0251, "step": 18120 }, { "epoch": 1.5931458699472758, "grad_norm": 0.26211366057395935, "learning_rate": 3.6258000000000004e-05, "loss": 0.0257, "step": 18130 }, { "epoch": 1.59402460456942, "grad_norm": 0.38341736793518066, "learning_rate": 3.6278e-05, "loss": 0.0309, "step": 18140 }, { "epoch": 1.5949033391915641, "grad_norm": 0.32906100153923035, "learning_rate": 3.6298000000000004e-05, "loss": 0.0259, "step": 18150 }, { "epoch": 1.5957820738137083, "grad_norm": 0.2528334856033325, "learning_rate": 3.6318e-05, "loss": 0.0262, "step": 18160 }, { "epoch": 1.5966608084358525, "grad_norm": 0.26621541380882263, "learning_rate": 3.6338e-05, "loss": 0.0298, "step": 18170 }, { "epoch": 1.5975395430579966, "grad_norm": 0.3228946924209595, "learning_rate": 3.6358e-05, "loss": 0.0259, "step": 18180 }, { "epoch": 1.5984182776801406, "grad_norm": 0.41037073731422424, "learning_rate": 3.6378e-05, "loss": 0.0274, "step": 18190 }, { "epoch": 1.5992970123022847, "grad_norm": 0.5190422534942627, "learning_rate": 3.6398000000000006e-05, "loss": 0.025, "step": 18200 }, { "epoch": 1.6001757469244289, "grad_norm": 0.4056777358055115, "learning_rate": 3.6418e-05, "loss": 0.0262, "step": 18210 }, { "epoch": 1.6010544815465728, "grad_norm": 0.4261966347694397, "learning_rate": 3.6438e-05, "loss": 0.0309, "step": 18220 }, { "epoch": 1.601933216168717, "grad_norm": 0.4257802665233612, "learning_rate": 3.6458e-05, "loss": 0.0289, "step": 18230 }, { "epoch": 1.6028119507908611, "grad_norm": 0.3446098864078522, "learning_rate": 3.6478e-05, "loss": 0.0265, "step": 18240 }, { "epoch": 1.6036906854130053, "grad_norm": 0.2964709401130676, "learning_rate": 3.6498e-05, "loss": 0.0262, "step": 18250 }, { "epoch": 1.6045694200351495, "grad_norm": 0.3353453278541565, "learning_rate": 3.6518000000000005e-05, "loss": 0.0271, "step": 18260 }, { "epoch": 1.6054481546572936, "grad_norm": 0.37630587816238403, "learning_rate": 3.6538e-05, "loss": 0.0267, "step": 18270 }, { "epoch": 1.6063268892794376, "grad_norm": 0.322000116109848, "learning_rate": 3.6558000000000005e-05, "loss": 0.0278, "step": 18280 }, { "epoch": 1.6072056239015817, "grad_norm": 0.3495213985443115, "learning_rate": 3.6578e-05, "loss": 0.0291, "step": 18290 }, { "epoch": 1.6080843585237259, "grad_norm": 0.3635331988334656, "learning_rate": 3.6598000000000004e-05, "loss": 0.0271, "step": 18300 }, { "epoch": 1.6089630931458698, "grad_norm": 0.44378504157066345, "learning_rate": 3.6618e-05, "loss": 0.0271, "step": 18310 }, { "epoch": 1.609841827768014, "grad_norm": 0.36657053232192993, "learning_rate": 3.6638e-05, "loss": 0.0256, "step": 18320 }, { "epoch": 1.6107205623901582, "grad_norm": 0.4730789065361023, "learning_rate": 3.6658e-05, "loss": 0.0302, "step": 18330 }, { "epoch": 1.6115992970123023, "grad_norm": 0.28399229049682617, "learning_rate": 3.6678000000000004e-05, "loss": 0.028, "step": 18340 }, { "epoch": 1.6124780316344465, "grad_norm": 0.3643015921115875, "learning_rate": 3.6698e-05, "loss": 0.0262, "step": 18350 }, { "epoch": 1.6133567662565906, "grad_norm": 0.37623366713523865, "learning_rate": 3.6718e-05, "loss": 0.028, "step": 18360 }, { "epoch": 1.6142355008787346, "grad_norm": 0.5350509285926819, "learning_rate": 3.6738e-05, "loss": 0.0283, "step": 18370 }, { "epoch": 1.6151142355008787, "grad_norm": 0.18593397736549377, "learning_rate": 3.6758e-05, "loss": 0.0295, "step": 18380 }, { "epoch": 1.615992970123023, "grad_norm": 0.4009331464767456, "learning_rate": 3.6778e-05, "loss": 0.03, "step": 18390 }, { "epoch": 1.6168717047451668, "grad_norm": 0.39933133125305176, "learning_rate": 3.6798e-05, "loss": 0.0283, "step": 18400 }, { "epoch": 1.617750439367311, "grad_norm": 0.2436313033103943, "learning_rate": 3.6818000000000006e-05, "loss": 0.0311, "step": 18410 }, { "epoch": 1.6186291739894552, "grad_norm": 0.3544790744781494, "learning_rate": 3.6838e-05, "loss": 0.0277, "step": 18420 }, { "epoch": 1.6195079086115993, "grad_norm": 0.32900679111480713, "learning_rate": 3.6858000000000005e-05, "loss": 0.0302, "step": 18430 }, { "epoch": 1.6203866432337435, "grad_norm": 0.2884068489074707, "learning_rate": 3.6878e-05, "loss": 0.0256, "step": 18440 }, { "epoch": 1.6212653778558876, "grad_norm": 0.3534647226333618, "learning_rate": 3.6898e-05, "loss": 0.0274, "step": 18450 }, { "epoch": 1.6221441124780316, "grad_norm": 0.34560635685920715, "learning_rate": 3.6918e-05, "loss": 0.0276, "step": 18460 }, { "epoch": 1.6230228471001757, "grad_norm": 0.19267602264881134, "learning_rate": 3.6938e-05, "loss": 0.0268, "step": 18470 }, { "epoch": 1.62390158172232, "grad_norm": 0.3481060266494751, "learning_rate": 3.6958e-05, "loss": 0.025, "step": 18480 }, { "epoch": 1.6247803163444638, "grad_norm": 0.30811524391174316, "learning_rate": 3.6978000000000004e-05, "loss": 0.0268, "step": 18490 }, { "epoch": 1.625659050966608, "grad_norm": 0.45587244629859924, "learning_rate": 3.6998e-05, "loss": 0.0272, "step": 18500 }, { "epoch": 1.6265377855887522, "grad_norm": 0.2888101637363434, "learning_rate": 3.7018000000000004e-05, "loss": 0.029, "step": 18510 }, { "epoch": 1.6274165202108963, "grad_norm": 0.3180406391620636, "learning_rate": 3.7038e-05, "loss": 0.0283, "step": 18520 }, { "epoch": 1.6282952548330405, "grad_norm": 0.4503192603588104, "learning_rate": 3.7058e-05, "loss": 0.0256, "step": 18530 }, { "epoch": 1.6291739894551847, "grad_norm": 0.5254066586494446, "learning_rate": 3.7078e-05, "loss": 0.0311, "step": 18540 }, { "epoch": 1.6300527240773286, "grad_norm": 0.3498695492744446, "learning_rate": 3.7097999999999996e-05, "loss": 0.0285, "step": 18550 }, { "epoch": 1.6309314586994728, "grad_norm": 0.23650312423706055, "learning_rate": 3.7118000000000006e-05, "loss": 0.027, "step": 18560 }, { "epoch": 1.631810193321617, "grad_norm": 0.3123376965522766, "learning_rate": 3.7138e-05, "loss": 0.0297, "step": 18570 }, { "epoch": 1.6326889279437609, "grad_norm": 0.27688848972320557, "learning_rate": 3.7158000000000006e-05, "loss": 0.0246, "step": 18580 }, { "epoch": 1.633567662565905, "grad_norm": 0.2534264028072357, "learning_rate": 3.7178e-05, "loss": 0.0266, "step": 18590 }, { "epoch": 1.6344463971880492, "grad_norm": 0.4991530776023865, "learning_rate": 3.7198e-05, "loss": 0.0272, "step": 18600 }, { "epoch": 1.6353251318101933, "grad_norm": 0.47657909989356995, "learning_rate": 3.7218e-05, "loss": 0.0279, "step": 18610 }, { "epoch": 1.6362038664323375, "grad_norm": 0.3545703887939453, "learning_rate": 3.7238e-05, "loss": 0.0268, "step": 18620 }, { "epoch": 1.6370826010544817, "grad_norm": 0.29108309745788574, "learning_rate": 3.7258e-05, "loss": 0.0266, "step": 18630 }, { "epoch": 1.6379613356766256, "grad_norm": 0.3392850160598755, "learning_rate": 3.7278000000000005e-05, "loss": 0.0262, "step": 18640 }, { "epoch": 1.6388400702987698, "grad_norm": 0.2589740753173828, "learning_rate": 3.7298e-05, "loss": 0.0248, "step": 18650 }, { "epoch": 1.639718804920914, "grad_norm": 0.333300918340683, "learning_rate": 3.7318000000000004e-05, "loss": 0.026, "step": 18660 }, { "epoch": 1.6405975395430579, "grad_norm": 0.408833384513855, "learning_rate": 3.7338e-05, "loss": 0.0247, "step": 18670 }, { "epoch": 1.641476274165202, "grad_norm": 0.3323635160923004, "learning_rate": 3.7358000000000004e-05, "loss": 0.0274, "step": 18680 }, { "epoch": 1.6423550087873462, "grad_norm": 0.3875831067562103, "learning_rate": 3.7378e-05, "loss": 0.0285, "step": 18690 }, { "epoch": 1.6432337434094904, "grad_norm": 0.35734856128692627, "learning_rate": 3.7398e-05, "loss": 0.0289, "step": 18700 }, { "epoch": 1.6441124780316345, "grad_norm": 0.3814052939414978, "learning_rate": 3.7418e-05, "loss": 0.027, "step": 18710 }, { "epoch": 1.6449912126537787, "grad_norm": 0.4620251953601837, "learning_rate": 3.7438e-05, "loss": 0.026, "step": 18720 }, { "epoch": 1.6458699472759226, "grad_norm": 0.43296971917152405, "learning_rate": 3.7458000000000006e-05, "loss": 0.0248, "step": 18730 }, { "epoch": 1.6467486818980668, "grad_norm": 0.2310459464788437, "learning_rate": 3.7478e-05, "loss": 0.03, "step": 18740 }, { "epoch": 1.647627416520211, "grad_norm": 0.2664235532283783, "learning_rate": 3.7498e-05, "loss": 0.0263, "step": 18750 }, { "epoch": 1.6485061511423549, "grad_norm": 0.34016433358192444, "learning_rate": 3.7518e-05, "loss": 0.0233, "step": 18760 }, { "epoch": 1.649384885764499, "grad_norm": 0.3043590188026428, "learning_rate": 3.7538e-05, "loss": 0.0258, "step": 18770 }, { "epoch": 1.6502636203866432, "grad_norm": 0.2902829945087433, "learning_rate": 3.7558e-05, "loss": 0.0261, "step": 18780 }, { "epoch": 1.6511423550087874, "grad_norm": 0.3819049894809723, "learning_rate": 3.7578000000000005e-05, "loss": 0.0273, "step": 18790 }, { "epoch": 1.6520210896309315, "grad_norm": 0.3790261447429657, "learning_rate": 3.7598e-05, "loss": 0.03, "step": 18800 }, { "epoch": 1.6528998242530757, "grad_norm": 0.3333602845668793, "learning_rate": 3.7618000000000005e-05, "loss": 0.0266, "step": 18810 }, { "epoch": 1.6537785588752196, "grad_norm": 0.45146846771240234, "learning_rate": 3.7638e-05, "loss": 0.0286, "step": 18820 }, { "epoch": 1.6546572934973638, "grad_norm": 0.319824755191803, "learning_rate": 3.7658000000000004e-05, "loss": 0.0266, "step": 18830 }, { "epoch": 1.655536028119508, "grad_norm": 0.37407949566841125, "learning_rate": 3.7678e-05, "loss": 0.0281, "step": 18840 }, { "epoch": 1.656414762741652, "grad_norm": 0.3010409474372864, "learning_rate": 3.7698e-05, "loss": 0.028, "step": 18850 }, { "epoch": 1.657293497363796, "grad_norm": 0.2403719574213028, "learning_rate": 3.7718e-05, "loss": 0.0267, "step": 18860 }, { "epoch": 1.6581722319859402, "grad_norm": 0.28193098306655884, "learning_rate": 3.7738000000000004e-05, "loss": 0.0258, "step": 18870 }, { "epoch": 1.6590509666080844, "grad_norm": 0.34975892305374146, "learning_rate": 3.775800000000001e-05, "loss": 0.0265, "step": 18880 }, { "epoch": 1.6599297012302285, "grad_norm": 0.31749671697616577, "learning_rate": 3.7778e-05, "loss": 0.0264, "step": 18890 }, { "epoch": 1.6608084358523727, "grad_norm": 0.360320121049881, "learning_rate": 3.7798e-05, "loss": 0.0265, "step": 18900 }, { "epoch": 1.6616871704745169, "grad_norm": 0.3004800081253052, "learning_rate": 3.7818e-05, "loss": 0.0275, "step": 18910 }, { "epoch": 1.6625659050966608, "grad_norm": 0.3183133006095886, "learning_rate": 3.7838e-05, "loss": 0.0302, "step": 18920 }, { "epoch": 1.663444639718805, "grad_norm": 0.30980950593948364, "learning_rate": 3.7858e-05, "loss": 0.0287, "step": 18930 }, { "epoch": 1.664323374340949, "grad_norm": 0.3199068009853363, "learning_rate": 3.7878000000000006e-05, "loss": 0.0257, "step": 18940 }, { "epoch": 1.665202108963093, "grad_norm": 0.26168450713157654, "learning_rate": 3.7898e-05, "loss": 0.0265, "step": 18950 }, { "epoch": 1.6660808435852372, "grad_norm": 0.34014981985092163, "learning_rate": 3.7918000000000005e-05, "loss": 0.0258, "step": 18960 }, { "epoch": 1.6669595782073814, "grad_norm": 0.1966772973537445, "learning_rate": 3.7938e-05, "loss": 0.0284, "step": 18970 }, { "epoch": 1.6678383128295255, "grad_norm": 0.3469763398170471, "learning_rate": 3.7958e-05, "loss": 0.0281, "step": 18980 }, { "epoch": 1.6687170474516697, "grad_norm": 0.32918357849121094, "learning_rate": 3.7978e-05, "loss": 0.027, "step": 18990 }, { "epoch": 1.6695957820738139, "grad_norm": 0.2727913558483124, "learning_rate": 3.7998e-05, "loss": 0.0263, "step": 19000 }, { "epoch": 1.6704745166959578, "grad_norm": 0.23251743614673615, "learning_rate": 3.8018e-05, "loss": 0.0242, "step": 19010 }, { "epoch": 1.671353251318102, "grad_norm": 0.2699318528175354, "learning_rate": 3.8038000000000004e-05, "loss": 0.0266, "step": 19020 }, { "epoch": 1.672231985940246, "grad_norm": 0.3054729998111725, "learning_rate": 3.8058e-05, "loss": 0.0279, "step": 19030 }, { "epoch": 1.67311072056239, "grad_norm": 0.31116047501564026, "learning_rate": 3.8078000000000004e-05, "loss": 0.0289, "step": 19040 }, { "epoch": 1.6739894551845342, "grad_norm": 0.32500240206718445, "learning_rate": 3.8098e-05, "loss": 0.0264, "step": 19050 }, { "epoch": 1.6748681898066784, "grad_norm": 0.3725212812423706, "learning_rate": 3.8118e-05, "loss": 0.0303, "step": 19060 }, { "epoch": 1.6757469244288226, "grad_norm": 0.4428686499595642, "learning_rate": 3.8138e-05, "loss": 0.0269, "step": 19070 }, { "epoch": 1.6766256590509667, "grad_norm": 0.3206555247306824, "learning_rate": 3.8157999999999996e-05, "loss": 0.027, "step": 19080 }, { "epoch": 1.6775043936731109, "grad_norm": 0.33358636498451233, "learning_rate": 3.8178e-05, "loss": 0.0273, "step": 19090 }, { "epoch": 1.6783831282952548, "grad_norm": 0.44346457719802856, "learning_rate": 3.8198e-05, "loss": 0.029, "step": 19100 }, { "epoch": 1.679261862917399, "grad_norm": 0.33140337467193604, "learning_rate": 3.8218000000000006e-05, "loss": 0.0315, "step": 19110 }, { "epoch": 1.680140597539543, "grad_norm": 0.409489244222641, "learning_rate": 3.8238e-05, "loss": 0.0285, "step": 19120 }, { "epoch": 1.681019332161687, "grad_norm": 0.44681230187416077, "learning_rate": 3.8258e-05, "loss": 0.0291, "step": 19130 }, { "epoch": 1.6818980667838312, "grad_norm": 0.3439991772174835, "learning_rate": 3.8278e-05, "loss": 0.0294, "step": 19140 }, { "epoch": 1.6827768014059754, "grad_norm": 0.41083019971847534, "learning_rate": 3.8298e-05, "loss": 0.0306, "step": 19150 }, { "epoch": 1.6836555360281196, "grad_norm": 0.3338320255279541, "learning_rate": 3.8318e-05, "loss": 0.0281, "step": 19160 }, { "epoch": 1.6845342706502637, "grad_norm": 0.3328593075275421, "learning_rate": 3.8338000000000005e-05, "loss": 0.0281, "step": 19170 }, { "epoch": 1.685413005272408, "grad_norm": 0.35839834809303284, "learning_rate": 3.8358e-05, "loss": 0.0284, "step": 19180 }, { "epoch": 1.6862917398945518, "grad_norm": 0.4696662127971649, "learning_rate": 3.8378000000000004e-05, "loss": 0.0293, "step": 19190 }, { "epoch": 1.687170474516696, "grad_norm": 0.36811256408691406, "learning_rate": 3.8398e-05, "loss": 0.0289, "step": 19200 }, { "epoch": 1.68804920913884, "grad_norm": 0.33950385451316833, "learning_rate": 3.8418000000000004e-05, "loss": 0.0267, "step": 19210 }, { "epoch": 1.688927943760984, "grad_norm": 0.5395058393478394, "learning_rate": 3.8438e-05, "loss": 0.0275, "step": 19220 }, { "epoch": 1.6898066783831283, "grad_norm": 0.3749678134918213, "learning_rate": 3.8458e-05, "loss": 0.027, "step": 19230 }, { "epoch": 1.6906854130052724, "grad_norm": 0.24799400568008423, "learning_rate": 3.8478e-05, "loss": 0.0266, "step": 19240 }, { "epoch": 1.6915641476274166, "grad_norm": 0.39947330951690674, "learning_rate": 3.8498e-05, "loss": 0.0311, "step": 19250 }, { "epoch": 1.6924428822495607, "grad_norm": 0.2714920938014984, "learning_rate": 3.8518000000000006e-05, "loss": 0.0277, "step": 19260 }, { "epoch": 1.693321616871705, "grad_norm": 0.3117375075817108, "learning_rate": 3.8538e-05, "loss": 0.024, "step": 19270 }, { "epoch": 1.6942003514938488, "grad_norm": 0.5016921758651733, "learning_rate": 3.8558e-05, "loss": 0.0299, "step": 19280 }, { "epoch": 1.695079086115993, "grad_norm": 0.321635365486145, "learning_rate": 3.8578e-05, "loss": 0.0262, "step": 19290 }, { "epoch": 1.695957820738137, "grad_norm": 0.49725353717803955, "learning_rate": 3.8598e-05, "loss": 0.028, "step": 19300 }, { "epoch": 1.696836555360281, "grad_norm": 0.294913113117218, "learning_rate": 3.8618e-05, "loss": 0.0272, "step": 19310 }, { "epoch": 1.6977152899824253, "grad_norm": 0.4389573037624359, "learning_rate": 3.8638000000000005e-05, "loss": 0.0303, "step": 19320 }, { "epoch": 1.6985940246045694, "grad_norm": 0.3519786298274994, "learning_rate": 3.8658e-05, "loss": 0.0271, "step": 19330 }, { "epoch": 1.6994727592267136, "grad_norm": 0.3638388514518738, "learning_rate": 3.8678000000000005e-05, "loss": 0.0272, "step": 19340 }, { "epoch": 1.7003514938488578, "grad_norm": 0.3443089425563812, "learning_rate": 3.8698e-05, "loss": 0.0273, "step": 19350 }, { "epoch": 1.701230228471002, "grad_norm": 0.3549979627132416, "learning_rate": 3.8718000000000004e-05, "loss": 0.0253, "step": 19360 }, { "epoch": 1.7021089630931459, "grad_norm": 0.40370601415634155, "learning_rate": 3.8738e-05, "loss": 0.0303, "step": 19370 }, { "epoch": 1.70298769771529, "grad_norm": 0.2621031403541565, "learning_rate": 3.8758e-05, "loss": 0.0269, "step": 19380 }, { "epoch": 1.703866432337434, "grad_norm": 0.3457932770252228, "learning_rate": 3.8778e-05, "loss": 0.0272, "step": 19390 }, { "epoch": 1.7047451669595781, "grad_norm": 0.2679772675037384, "learning_rate": 3.8798000000000004e-05, "loss": 0.0269, "step": 19400 }, { "epoch": 1.7056239015817223, "grad_norm": 0.3461378514766693, "learning_rate": 3.881800000000001e-05, "loss": 0.0273, "step": 19410 }, { "epoch": 1.7065026362038664, "grad_norm": 0.35147929191589355, "learning_rate": 3.8838e-05, "loss": 0.0257, "step": 19420 }, { "epoch": 1.7073813708260106, "grad_norm": 0.4590533673763275, "learning_rate": 3.8858e-05, "loss": 0.0269, "step": 19430 }, { "epoch": 1.7082601054481548, "grad_norm": 0.391988068819046, "learning_rate": 3.8878e-05, "loss": 0.0291, "step": 19440 }, { "epoch": 1.709138840070299, "grad_norm": 0.38870206475257874, "learning_rate": 3.8898e-05, "loss": 0.0234, "step": 19450 }, { "epoch": 1.7100175746924429, "grad_norm": 0.3678790032863617, "learning_rate": 3.8918e-05, "loss": 0.0257, "step": 19460 }, { "epoch": 1.710896309314587, "grad_norm": 0.2905931770801544, "learning_rate": 3.8938e-05, "loss": 0.0244, "step": 19470 }, { "epoch": 1.711775043936731, "grad_norm": 0.423878937959671, "learning_rate": 3.8958e-05, "loss": 0.0268, "step": 19480 }, { "epoch": 1.7126537785588751, "grad_norm": 0.24136272072792053, "learning_rate": 3.8978000000000005e-05, "loss": 0.0277, "step": 19490 }, { "epoch": 1.7135325131810193, "grad_norm": 0.24880680441856384, "learning_rate": 3.8998e-05, "loss": 0.0254, "step": 19500 }, { "epoch": 1.7144112478031635, "grad_norm": 0.26837778091430664, "learning_rate": 3.9018000000000005e-05, "loss": 0.0303, "step": 19510 }, { "epoch": 1.7152899824253076, "grad_norm": 0.3645654618740082, "learning_rate": 3.9038e-05, "loss": 0.0265, "step": 19520 }, { "epoch": 1.7161687170474518, "grad_norm": 0.3498166799545288, "learning_rate": 3.9058e-05, "loss": 0.0271, "step": 19530 }, { "epoch": 1.717047451669596, "grad_norm": 0.4815354645252228, "learning_rate": 3.9078e-05, "loss": 0.0244, "step": 19540 }, { "epoch": 1.7179261862917399, "grad_norm": 0.4344193935394287, "learning_rate": 3.9098000000000004e-05, "loss": 0.0275, "step": 19550 }, { "epoch": 1.718804920913884, "grad_norm": 0.4371820092201233, "learning_rate": 3.911800000000001e-05, "loss": 0.0272, "step": 19560 }, { "epoch": 1.719683655536028, "grad_norm": 0.3177238702774048, "learning_rate": 3.9138000000000004e-05, "loss": 0.0275, "step": 19570 }, { "epoch": 1.7205623901581721, "grad_norm": 0.3926061689853668, "learning_rate": 3.9158e-05, "loss": 0.0262, "step": 19580 }, { "epoch": 1.7214411247803163, "grad_norm": 0.33470770716667175, "learning_rate": 3.9178e-05, "loss": 0.024, "step": 19590 }, { "epoch": 1.7223198594024605, "grad_norm": 0.31291821599006653, "learning_rate": 3.9198e-05, "loss": 0.0268, "step": 19600 }, { "epoch": 1.7231985940246046, "grad_norm": 0.26830482482910156, "learning_rate": 3.9217999999999996e-05, "loss": 0.0266, "step": 19610 }, { "epoch": 1.7240773286467488, "grad_norm": 0.2809064984321594, "learning_rate": 3.9238e-05, "loss": 0.0257, "step": 19620 }, { "epoch": 1.724956063268893, "grad_norm": 0.3823087215423584, "learning_rate": 3.9258e-05, "loss": 0.0277, "step": 19630 }, { "epoch": 1.7258347978910369, "grad_norm": 0.3199980854988098, "learning_rate": 3.9278000000000006e-05, "loss": 0.0258, "step": 19640 }, { "epoch": 1.726713532513181, "grad_norm": 0.43033623695373535, "learning_rate": 3.9298e-05, "loss": 0.0277, "step": 19650 }, { "epoch": 1.727592267135325, "grad_norm": 0.4424465596675873, "learning_rate": 3.9318e-05, "loss": 0.0282, "step": 19660 }, { "epoch": 1.7284710017574691, "grad_norm": 0.35997894406318665, "learning_rate": 3.9338e-05, "loss": 0.0287, "step": 19670 }, { "epoch": 1.7293497363796133, "grad_norm": 0.4338673949241638, "learning_rate": 3.9358e-05, "loss": 0.0278, "step": 19680 }, { "epoch": 1.7302284710017575, "grad_norm": 0.45513612031936646, "learning_rate": 3.9378e-05, "loss": 0.0257, "step": 19690 }, { "epoch": 1.7311072056239016, "grad_norm": 0.2934495210647583, "learning_rate": 3.9398000000000005e-05, "loss": 0.0256, "step": 19700 }, { "epoch": 1.7319859402460458, "grad_norm": 0.21169567108154297, "learning_rate": 3.9418e-05, "loss": 0.0243, "step": 19710 }, { "epoch": 1.73286467486819, "grad_norm": 0.2398345023393631, "learning_rate": 3.9438000000000004e-05, "loss": 0.0261, "step": 19720 }, { "epoch": 1.733743409490334, "grad_norm": 0.26330938935279846, "learning_rate": 3.9458e-05, "loss": 0.025, "step": 19730 }, { "epoch": 1.734622144112478, "grad_norm": 0.3107210397720337, "learning_rate": 3.9478000000000004e-05, "loss": 0.0262, "step": 19740 }, { "epoch": 1.735500878734622, "grad_norm": 0.31115245819091797, "learning_rate": 3.9498e-05, "loss": 0.0278, "step": 19750 }, { "epoch": 1.7363796133567662, "grad_norm": 0.28761231899261475, "learning_rate": 3.9518e-05, "loss": 0.0291, "step": 19760 }, { "epoch": 1.7372583479789103, "grad_norm": 0.357497900724411, "learning_rate": 3.9538e-05, "loss": 0.0269, "step": 19770 }, { "epoch": 1.7381370826010545, "grad_norm": 0.3526709973812103, "learning_rate": 3.9558e-05, "loss": 0.0252, "step": 19780 }, { "epoch": 1.7390158172231986, "grad_norm": 0.39353859424591064, "learning_rate": 3.9578000000000006e-05, "loss": 0.0283, "step": 19790 }, { "epoch": 1.7398945518453428, "grad_norm": 0.3247212767601013, "learning_rate": 3.9598e-05, "loss": 0.0273, "step": 19800 }, { "epoch": 1.740773286467487, "grad_norm": 0.34664642810821533, "learning_rate": 3.9618e-05, "loss": 0.027, "step": 19810 }, { "epoch": 1.741652021089631, "grad_norm": 0.5089440941810608, "learning_rate": 3.9638e-05, "loss": 0.0264, "step": 19820 }, { "epoch": 1.742530755711775, "grad_norm": 0.2917691171169281, "learning_rate": 3.9658e-05, "loss": 0.0251, "step": 19830 }, { "epoch": 1.743409490333919, "grad_norm": 0.3668069839477539, "learning_rate": 3.9678e-05, "loss": 0.0263, "step": 19840 }, { "epoch": 1.7442882249560632, "grad_norm": 0.259416788816452, "learning_rate": 3.9698000000000005e-05, "loss": 0.0259, "step": 19850 }, { "epoch": 1.7451669595782073, "grad_norm": 0.4098031520843506, "learning_rate": 3.9718e-05, "loss": 0.0275, "step": 19860 }, { "epoch": 1.7460456942003515, "grad_norm": 0.4145653247833252, "learning_rate": 3.9738000000000005e-05, "loss": 0.0265, "step": 19870 }, { "epoch": 1.7469244288224957, "grad_norm": 0.30411168932914734, "learning_rate": 3.9758e-05, "loss": 0.0272, "step": 19880 }, { "epoch": 1.7478031634446398, "grad_norm": 0.3051583766937256, "learning_rate": 3.9778000000000004e-05, "loss": 0.0281, "step": 19890 }, { "epoch": 1.748681898066784, "grad_norm": 0.28869524598121643, "learning_rate": 3.9798e-05, "loss": 0.0262, "step": 19900 }, { "epoch": 1.749560632688928, "grad_norm": 0.2575744390487671, "learning_rate": 3.9818e-05, "loss": 0.0288, "step": 19910 }, { "epoch": 1.750439367311072, "grad_norm": 0.25861629843711853, "learning_rate": 3.9838e-05, "loss": 0.027, "step": 19920 }, { "epoch": 1.751318101933216, "grad_norm": 0.3245207369327545, "learning_rate": 3.9858000000000004e-05, "loss": 0.0297, "step": 19930 }, { "epoch": 1.7521968365553602, "grad_norm": 0.2810225784778595, "learning_rate": 3.987800000000001e-05, "loss": 0.0266, "step": 19940 }, { "epoch": 1.7530755711775043, "grad_norm": 0.23918156325817108, "learning_rate": 3.9898e-05, "loss": 0.026, "step": 19950 }, { "epoch": 1.7539543057996485, "grad_norm": 0.4179322123527527, "learning_rate": 3.9918e-05, "loss": 0.0285, "step": 19960 }, { "epoch": 1.7548330404217927, "grad_norm": 0.36444246768951416, "learning_rate": 3.9938e-05, "loss": 0.0259, "step": 19970 }, { "epoch": 1.7557117750439368, "grad_norm": 0.393347829580307, "learning_rate": 3.9958e-05, "loss": 0.026, "step": 19980 }, { "epoch": 1.756590509666081, "grad_norm": 0.2893217206001282, "learning_rate": 3.9978e-05, "loss": 0.0252, "step": 19990 }, { "epoch": 1.757469244288225, "grad_norm": 0.20919393002986908, "learning_rate": 3.9998e-05, "loss": 0.026, "step": 20000 }, { "epoch": 1.758347978910369, "grad_norm": 0.25964483618736267, "learning_rate": 4.0018e-05, "loss": 0.0252, "step": 20010 }, { "epoch": 1.759226713532513, "grad_norm": 0.39830705523490906, "learning_rate": 4.0038000000000005e-05, "loss": 0.0253, "step": 20020 }, { "epoch": 1.7601054481546572, "grad_norm": 0.49062541127204895, "learning_rate": 4.0058e-05, "loss": 0.0275, "step": 20030 }, { "epoch": 1.7609841827768014, "grad_norm": 0.28100401163101196, "learning_rate": 4.0078000000000005e-05, "loss": 0.0264, "step": 20040 }, { "epoch": 1.7618629173989455, "grad_norm": 0.3232133686542511, "learning_rate": 4.0098e-05, "loss": 0.0256, "step": 20050 }, { "epoch": 1.7627416520210897, "grad_norm": 0.36816665530204773, "learning_rate": 4.0118e-05, "loss": 0.0246, "step": 20060 }, { "epoch": 1.7636203866432338, "grad_norm": 0.29883095622062683, "learning_rate": 4.0138e-05, "loss": 0.0286, "step": 20070 }, { "epoch": 1.764499121265378, "grad_norm": 0.3134177029132843, "learning_rate": 4.0158000000000004e-05, "loss": 0.0262, "step": 20080 }, { "epoch": 1.765377855887522, "grad_norm": 0.2467358112335205, "learning_rate": 4.017800000000001e-05, "loss": 0.0254, "step": 20090 }, { "epoch": 1.766256590509666, "grad_norm": 0.29543718695640564, "learning_rate": 4.0198000000000004e-05, "loss": 0.0243, "step": 20100 }, { "epoch": 1.76713532513181, "grad_norm": 0.23855756223201752, "learning_rate": 4.0218e-05, "loss": 0.026, "step": 20110 }, { "epoch": 1.7680140597539542, "grad_norm": 0.29584023356437683, "learning_rate": 4.0238e-05, "loss": 0.0258, "step": 20120 }, { "epoch": 1.7688927943760984, "grad_norm": 0.2714744210243225, "learning_rate": 4.0258e-05, "loss": 0.026, "step": 20130 }, { "epoch": 1.7697715289982425, "grad_norm": 0.31321981549263, "learning_rate": 4.0278e-05, "loss": 0.0254, "step": 20140 }, { "epoch": 1.7706502636203867, "grad_norm": 0.363172709941864, "learning_rate": 4.0298e-05, "loss": 0.0265, "step": 20150 }, { "epoch": 1.7715289982425309, "grad_norm": 0.26532670855522156, "learning_rate": 4.0318e-05, "loss": 0.0284, "step": 20160 }, { "epoch": 1.772407732864675, "grad_norm": 0.3295498192310333, "learning_rate": 4.0338000000000006e-05, "loss": 0.0277, "step": 20170 }, { "epoch": 1.773286467486819, "grad_norm": 0.3167981803417206, "learning_rate": 4.0358e-05, "loss": 0.0255, "step": 20180 }, { "epoch": 1.7741652021089631, "grad_norm": 0.41293156147003174, "learning_rate": 4.0378e-05, "loss": 0.0268, "step": 20190 }, { "epoch": 1.775043936731107, "grad_norm": 0.31164658069610596, "learning_rate": 4.0398e-05, "loss": 0.0248, "step": 20200 }, { "epoch": 1.7759226713532512, "grad_norm": 0.3399678170681, "learning_rate": 4.0418e-05, "loss": 0.0277, "step": 20210 }, { "epoch": 1.7768014059753954, "grad_norm": 0.3551381528377533, "learning_rate": 4.0438e-05, "loss": 0.0276, "step": 20220 }, { "epoch": 1.7776801405975395, "grad_norm": 0.29276493191719055, "learning_rate": 4.0458000000000005e-05, "loss": 0.0258, "step": 20230 }, { "epoch": 1.7785588752196837, "grad_norm": 0.4279690086841583, "learning_rate": 4.0478e-05, "loss": 0.0262, "step": 20240 }, { "epoch": 1.7794376098418279, "grad_norm": 0.33200716972351074, "learning_rate": 4.0498000000000004e-05, "loss": 0.0248, "step": 20250 }, { "epoch": 1.780316344463972, "grad_norm": 0.3021490275859833, "learning_rate": 4.0518e-05, "loss": 0.0262, "step": 20260 }, { "epoch": 1.781195079086116, "grad_norm": 0.31303611397743225, "learning_rate": 4.0538000000000004e-05, "loss": 0.0262, "step": 20270 }, { "epoch": 1.7820738137082601, "grad_norm": 0.2477438747882843, "learning_rate": 4.0558e-05, "loss": 0.0245, "step": 20280 }, { "epoch": 1.782952548330404, "grad_norm": 0.293721467256546, "learning_rate": 4.0578e-05, "loss": 0.0263, "step": 20290 }, { "epoch": 1.7838312829525482, "grad_norm": 0.3756994307041168, "learning_rate": 4.0598e-05, "loss": 0.0286, "step": 20300 }, { "epoch": 1.7847100175746924, "grad_norm": 0.3520333170890808, "learning_rate": 4.0618e-05, "loss": 0.0255, "step": 20310 }, { "epoch": 1.7855887521968365, "grad_norm": 0.2387007474899292, "learning_rate": 4.0638000000000006e-05, "loss": 0.0234, "step": 20320 }, { "epoch": 1.7864674868189807, "grad_norm": 0.2360846847295761, "learning_rate": 4.0658e-05, "loss": 0.0253, "step": 20330 }, { "epoch": 1.7873462214411249, "grad_norm": 0.2381294220685959, "learning_rate": 4.0678e-05, "loss": 0.0257, "step": 20340 }, { "epoch": 1.788224956063269, "grad_norm": 0.18114107847213745, "learning_rate": 4.0698e-05, "loss": 0.0248, "step": 20350 }, { "epoch": 1.789103690685413, "grad_norm": 0.2538262605667114, "learning_rate": 4.0718e-05, "loss": 0.0262, "step": 20360 }, { "epoch": 1.7899824253075571, "grad_norm": 0.25318434834480286, "learning_rate": 4.0738e-05, "loss": 0.0274, "step": 20370 }, { "epoch": 1.790861159929701, "grad_norm": 0.31495317816734314, "learning_rate": 4.0758e-05, "loss": 0.0289, "step": 20380 }, { "epoch": 1.7917398945518452, "grad_norm": 0.2442825436592102, "learning_rate": 4.0778e-05, "loss": 0.0248, "step": 20390 }, { "epoch": 1.7926186291739894, "grad_norm": 0.338117778301239, "learning_rate": 4.0798000000000005e-05, "loss": 0.0268, "step": 20400 }, { "epoch": 1.7934973637961336, "grad_norm": 0.20796389877796173, "learning_rate": 4.0818e-05, "loss": 0.0251, "step": 20410 }, { "epoch": 1.7943760984182777, "grad_norm": 0.3478718101978302, "learning_rate": 4.0838000000000004e-05, "loss": 0.0254, "step": 20420 }, { "epoch": 1.7952548330404219, "grad_norm": 0.2476649433374405, "learning_rate": 4.0858e-05, "loss": 0.0246, "step": 20430 }, { "epoch": 1.796133567662566, "grad_norm": 0.39784857630729675, "learning_rate": 4.0878e-05, "loss": 0.0267, "step": 20440 }, { "epoch": 1.79701230228471, "grad_norm": 0.2780655026435852, "learning_rate": 4.0898e-05, "loss": 0.0262, "step": 20450 }, { "epoch": 1.7978910369068541, "grad_norm": 0.28608956933021545, "learning_rate": 4.0918000000000004e-05, "loss": 0.0247, "step": 20460 }, { "epoch": 1.798769771528998, "grad_norm": 0.27252301573753357, "learning_rate": 4.093800000000001e-05, "loss": 0.0262, "step": 20470 }, { "epoch": 1.7996485061511422, "grad_norm": 0.18516850471496582, "learning_rate": 4.0958e-05, "loss": 0.0285, "step": 20480 }, { "epoch": 1.8005272407732864, "grad_norm": 0.3807215988636017, "learning_rate": 4.0978e-05, "loss": 0.0266, "step": 20490 }, { "epoch": 1.8014059753954306, "grad_norm": 0.21109262108802795, "learning_rate": 4.0998e-05, "loss": 0.0248, "step": 20500 }, { "epoch": 1.8022847100175747, "grad_norm": 0.35265496373176575, "learning_rate": 4.1018e-05, "loss": 0.026, "step": 20510 }, { "epoch": 1.803163444639719, "grad_norm": 0.42745861411094666, "learning_rate": 4.1038e-05, "loss": 0.0251, "step": 20520 }, { "epoch": 1.804042179261863, "grad_norm": 0.3345975875854492, "learning_rate": 4.1058e-05, "loss": 0.0288, "step": 20530 }, { "epoch": 1.804920913884007, "grad_norm": 0.36742761731147766, "learning_rate": 4.1078e-05, "loss": 0.0275, "step": 20540 }, { "epoch": 1.8057996485061512, "grad_norm": 0.3274065852165222, "learning_rate": 4.1098000000000005e-05, "loss": 0.0253, "step": 20550 }, { "epoch": 1.806678383128295, "grad_norm": 0.38029810786247253, "learning_rate": 4.1118e-05, "loss": 0.026, "step": 20560 }, { "epoch": 1.8075571177504393, "grad_norm": 0.21288740634918213, "learning_rate": 4.1138000000000005e-05, "loss": 0.026, "step": 20570 }, { "epoch": 1.8084358523725834, "grad_norm": 0.3912142813205719, "learning_rate": 4.1158e-05, "loss": 0.028, "step": 20580 }, { "epoch": 1.8093145869947276, "grad_norm": 0.2741037607192993, "learning_rate": 4.1178e-05, "loss": 0.0262, "step": 20590 }, { "epoch": 1.8101933216168717, "grad_norm": 0.3604153096675873, "learning_rate": 4.1198e-05, "loss": 0.0265, "step": 20600 }, { "epoch": 1.811072056239016, "grad_norm": 0.24551226198673248, "learning_rate": 4.1218000000000004e-05, "loss": 0.0286, "step": 20610 }, { "epoch": 1.81195079086116, "grad_norm": 0.24318155646324158, "learning_rate": 4.123800000000001e-05, "loss": 0.0243, "step": 20620 }, { "epoch": 1.812829525483304, "grad_norm": 0.2537330389022827, "learning_rate": 4.1258000000000004e-05, "loss": 0.0266, "step": 20630 }, { "epoch": 1.8137082601054482, "grad_norm": 0.30612069368362427, "learning_rate": 4.1278e-05, "loss": 0.0244, "step": 20640 }, { "epoch": 1.814586994727592, "grad_norm": 0.321065217256546, "learning_rate": 4.1298e-05, "loss": 0.0235, "step": 20650 }, { "epoch": 1.8154657293497363, "grad_norm": 0.3237490653991699, "learning_rate": 4.1318e-05, "loss": 0.0267, "step": 20660 }, { "epoch": 1.8163444639718804, "grad_norm": 0.21598926186561584, "learning_rate": 4.1338e-05, "loss": 0.025, "step": 20670 }, { "epoch": 1.8172231985940246, "grad_norm": 0.3431110978126526, "learning_rate": 4.1358e-05, "loss": 0.0276, "step": 20680 }, { "epoch": 1.8181019332161688, "grad_norm": 0.2708272933959961, "learning_rate": 4.1378e-05, "loss": 0.0276, "step": 20690 }, { "epoch": 1.818980667838313, "grad_norm": 0.31991925835609436, "learning_rate": 4.1398000000000006e-05, "loss": 0.0279, "step": 20700 }, { "epoch": 1.819859402460457, "grad_norm": 0.33765795826911926, "learning_rate": 4.1418e-05, "loss": 0.0281, "step": 20710 }, { "epoch": 1.820738137082601, "grad_norm": 0.3429203927516937, "learning_rate": 4.1438000000000005e-05, "loss": 0.0298, "step": 20720 }, { "epoch": 1.8216168717047452, "grad_norm": 0.32085099816322327, "learning_rate": 4.1458e-05, "loss": 0.0269, "step": 20730 }, { "epoch": 1.8224956063268891, "grad_norm": 0.26043522357940674, "learning_rate": 4.1478e-05, "loss": 0.0245, "step": 20740 }, { "epoch": 1.8233743409490333, "grad_norm": 0.29192811250686646, "learning_rate": 4.1498e-05, "loss": 0.0262, "step": 20750 }, { "epoch": 1.8242530755711774, "grad_norm": 0.3423249125480652, "learning_rate": 4.1518000000000005e-05, "loss": 0.0239, "step": 20760 }, { "epoch": 1.8251318101933216, "grad_norm": 0.2816118597984314, "learning_rate": 4.153800000000001e-05, "loss": 0.0272, "step": 20770 }, { "epoch": 1.8260105448154658, "grad_norm": 0.2796817421913147, "learning_rate": 4.1558000000000004e-05, "loss": 0.0256, "step": 20780 }, { "epoch": 1.82688927943761, "grad_norm": 0.2710303068161011, "learning_rate": 4.1578e-05, "loss": 0.0246, "step": 20790 }, { "epoch": 1.827768014059754, "grad_norm": 0.3250398337841034, "learning_rate": 4.1598000000000004e-05, "loss": 0.0241, "step": 20800 }, { "epoch": 1.828646748681898, "grad_norm": 0.3295214772224426, "learning_rate": 4.1618e-05, "loss": 0.0274, "step": 20810 }, { "epoch": 1.8295254833040422, "grad_norm": 0.32638394832611084, "learning_rate": 4.1638e-05, "loss": 0.0252, "step": 20820 }, { "epoch": 1.8304042179261861, "grad_norm": 0.4222949147224426, "learning_rate": 4.1658e-05, "loss": 0.0286, "step": 20830 }, { "epoch": 1.8312829525483303, "grad_norm": 0.2674826681613922, "learning_rate": 4.1678e-05, "loss": 0.0268, "step": 20840 }, { "epoch": 1.8321616871704745, "grad_norm": 0.37801140546798706, "learning_rate": 4.1698000000000006e-05, "loss": 0.0273, "step": 20850 }, { "epoch": 1.8330404217926186, "grad_norm": 0.2632582187652588, "learning_rate": 4.1718e-05, "loss": 0.0279, "step": 20860 }, { "epoch": 1.8339191564147628, "grad_norm": 0.25582143664360046, "learning_rate": 4.1738e-05, "loss": 0.0279, "step": 20870 }, { "epoch": 1.834797891036907, "grad_norm": 0.22345741093158722, "learning_rate": 4.1758e-05, "loss": 0.0246, "step": 20880 }, { "epoch": 1.835676625659051, "grad_norm": 0.3082350790500641, "learning_rate": 4.1778e-05, "loss": 0.026, "step": 20890 }, { "epoch": 1.836555360281195, "grad_norm": 0.24615082144737244, "learning_rate": 4.1798e-05, "loss": 0.026, "step": 20900 }, { "epoch": 1.8374340949033392, "grad_norm": 0.28904807567596436, "learning_rate": 4.1818e-05, "loss": 0.0262, "step": 20910 }, { "epoch": 1.8383128295254831, "grad_norm": 0.28285351395606995, "learning_rate": 4.1838e-05, "loss": 0.0265, "step": 20920 }, { "epoch": 1.8391915641476273, "grad_norm": 0.2950380742549896, "learning_rate": 4.1858000000000005e-05, "loss": 0.0262, "step": 20930 }, { "epoch": 1.8400702987697715, "grad_norm": 0.24591466784477234, "learning_rate": 4.1878e-05, "loss": 0.0277, "step": 20940 }, { "epoch": 1.8409490333919156, "grad_norm": 0.24327854812145233, "learning_rate": 4.1898000000000004e-05, "loss": 0.0268, "step": 20950 }, { "epoch": 1.8418277680140598, "grad_norm": 0.2528746724128723, "learning_rate": 4.1918e-05, "loss": 0.026, "step": 20960 }, { "epoch": 1.842706502636204, "grad_norm": 0.3276069760322571, "learning_rate": 4.1938e-05, "loss": 0.025, "step": 20970 }, { "epoch": 1.843585237258348, "grad_norm": 0.3410789668560028, "learning_rate": 4.1958e-05, "loss": 0.0267, "step": 20980 }, { "epoch": 1.844463971880492, "grad_norm": 0.28738391399383545, "learning_rate": 4.1978000000000004e-05, "loss": 0.0264, "step": 20990 }, { "epoch": 1.8453427065026362, "grad_norm": 0.29430481791496277, "learning_rate": 4.199800000000001e-05, "loss": 0.0284, "step": 21000 }, { "epoch": 1.8462214411247804, "grad_norm": 0.3552565574645996, "learning_rate": 4.2018e-05, "loss": 0.0285, "step": 21010 }, { "epoch": 1.8471001757469243, "grad_norm": 0.28838545083999634, "learning_rate": 4.2038e-05, "loss": 0.0289, "step": 21020 }, { "epoch": 1.8479789103690685, "grad_norm": 0.2637741267681122, "learning_rate": 4.2058e-05, "loss": 0.0266, "step": 21030 }, { "epoch": 1.8488576449912126, "grad_norm": 0.3063446879386902, "learning_rate": 4.2078e-05, "loss": 0.028, "step": 21040 }, { "epoch": 1.8497363796133568, "grad_norm": 0.2997719943523407, "learning_rate": 4.2098e-05, "loss": 0.0257, "step": 21050 }, { "epoch": 1.850615114235501, "grad_norm": 0.2654675543308258, "learning_rate": 4.2118e-05, "loss": 0.0247, "step": 21060 }, { "epoch": 1.8514938488576451, "grad_norm": 0.22962406277656555, "learning_rate": 4.2138e-05, "loss": 0.0244, "step": 21070 }, { "epoch": 1.852372583479789, "grad_norm": 0.21780161559581757, "learning_rate": 4.2158000000000005e-05, "loss": 0.0269, "step": 21080 }, { "epoch": 1.8532513181019332, "grad_norm": 0.2492867261171341, "learning_rate": 4.2178e-05, "loss": 0.0251, "step": 21090 }, { "epoch": 1.8541300527240774, "grad_norm": 0.31712639331817627, "learning_rate": 4.2198000000000005e-05, "loss": 0.0249, "step": 21100 }, { "epoch": 1.8550087873462213, "grad_norm": 0.29131263494491577, "learning_rate": 4.2218e-05, "loss": 0.0242, "step": 21110 }, { "epoch": 1.8558875219683655, "grad_norm": 0.3063405752182007, "learning_rate": 4.2238e-05, "loss": 0.0257, "step": 21120 }, { "epoch": 1.8567662565905096, "grad_norm": 0.2362366020679474, "learning_rate": 4.2258e-05, "loss": 0.0278, "step": 21130 }, { "epoch": 1.8576449912126538, "grad_norm": 0.34768566489219666, "learning_rate": 4.2278000000000004e-05, "loss": 0.0254, "step": 21140 }, { "epoch": 1.858523725834798, "grad_norm": 0.3080756664276123, "learning_rate": 4.229800000000001e-05, "loss": 0.0268, "step": 21150 }, { "epoch": 1.8594024604569421, "grad_norm": 0.42181649804115295, "learning_rate": 4.2318000000000004e-05, "loss": 0.0265, "step": 21160 }, { "epoch": 1.860281195079086, "grad_norm": 0.4408959746360779, "learning_rate": 4.2338e-05, "loss": 0.0275, "step": 21170 }, { "epoch": 1.8611599297012302, "grad_norm": 0.39280587434768677, "learning_rate": 4.2358000000000003e-05, "loss": 0.0265, "step": 21180 }, { "epoch": 1.8620386643233744, "grad_norm": 0.2843494117259979, "learning_rate": 4.2378e-05, "loss": 0.0268, "step": 21190 }, { "epoch": 1.8629173989455183, "grad_norm": 0.2241947203874588, "learning_rate": 4.2398e-05, "loss": 0.024, "step": 21200 }, { "epoch": 1.8637961335676625, "grad_norm": 0.20625483989715576, "learning_rate": 4.2418e-05, "loss": 0.0243, "step": 21210 }, { "epoch": 1.8646748681898067, "grad_norm": 0.23911845684051514, "learning_rate": 4.2438e-05, "loss": 0.0244, "step": 21220 }, { "epoch": 1.8655536028119508, "grad_norm": 0.243997260928154, "learning_rate": 4.2458000000000006e-05, "loss": 0.0245, "step": 21230 }, { "epoch": 1.866432337434095, "grad_norm": 0.26263487339019775, "learning_rate": 4.2478e-05, "loss": 0.0257, "step": 21240 }, { "epoch": 1.8673110720562391, "grad_norm": 0.24608957767486572, "learning_rate": 4.2498000000000005e-05, "loss": 0.0228, "step": 21250 }, { "epoch": 1.868189806678383, "grad_norm": 0.38480982184410095, "learning_rate": 4.2518e-05, "loss": 0.0249, "step": 21260 }, { "epoch": 1.8690685413005272, "grad_norm": 0.3263947367668152, "learning_rate": 4.2538e-05, "loss": 0.0253, "step": 21270 }, { "epoch": 1.8699472759226714, "grad_norm": 0.235094353556633, "learning_rate": 4.2558e-05, "loss": 0.0242, "step": 21280 }, { "epoch": 1.8708260105448153, "grad_norm": 0.32286518812179565, "learning_rate": 4.2578e-05, "loss": 0.0244, "step": 21290 }, { "epoch": 1.8717047451669595, "grad_norm": 0.1966005265712738, "learning_rate": 4.259800000000001e-05, "loss": 0.0245, "step": 21300 }, { "epoch": 1.8725834797891037, "grad_norm": 0.3061169981956482, "learning_rate": 4.2618000000000004e-05, "loss": 0.0297, "step": 21310 }, { "epoch": 1.8734622144112478, "grad_norm": 0.3154331147670746, "learning_rate": 4.2638e-05, "loss": 0.0263, "step": 21320 }, { "epoch": 1.874340949033392, "grad_norm": 0.32688096165657043, "learning_rate": 4.2658000000000004e-05, "loss": 0.0267, "step": 21330 }, { "epoch": 1.8752196836555362, "grad_norm": 0.32004836201667786, "learning_rate": 4.2678e-05, "loss": 0.0254, "step": 21340 }, { "epoch": 1.87609841827768, "grad_norm": 0.2941858768463135, "learning_rate": 4.2698000000000004e-05, "loss": 0.026, "step": 21350 }, { "epoch": 1.8769771528998243, "grad_norm": 0.36076799035072327, "learning_rate": 4.2718e-05, "loss": 0.0268, "step": 21360 }, { "epoch": 1.8778558875219684, "grad_norm": 0.3418593108654022, "learning_rate": 4.2738e-05, "loss": 0.0228, "step": 21370 }, { "epoch": 1.8787346221441124, "grad_norm": 0.3235760033130646, "learning_rate": 4.2758000000000006e-05, "loss": 0.0248, "step": 21380 }, { "epoch": 1.8796133567662565, "grad_norm": 0.2464119791984558, "learning_rate": 4.2778e-05, "loss": 0.0248, "step": 21390 }, { "epoch": 1.8804920913884007, "grad_norm": 0.3214205801486969, "learning_rate": 4.2798000000000006e-05, "loss": 0.0265, "step": 21400 }, { "epoch": 1.8813708260105448, "grad_norm": 0.23806022107601166, "learning_rate": 4.2818e-05, "loss": 0.0264, "step": 21410 }, { "epoch": 1.882249560632689, "grad_norm": 0.2367316335439682, "learning_rate": 4.2838e-05, "loss": 0.0248, "step": 21420 }, { "epoch": 1.8831282952548332, "grad_norm": 0.30713167786598206, "learning_rate": 4.2858e-05, "loss": 0.0247, "step": 21430 }, { "epoch": 1.884007029876977, "grad_norm": 0.188310444355011, "learning_rate": 4.2878e-05, "loss": 0.0256, "step": 21440 }, { "epoch": 1.8848857644991213, "grad_norm": 0.28957951068878174, "learning_rate": 4.2898e-05, "loss": 0.0248, "step": 21450 }, { "epoch": 1.8857644991212654, "grad_norm": 0.2248542606830597, "learning_rate": 4.2918000000000005e-05, "loss": 0.0255, "step": 21460 }, { "epoch": 1.8866432337434094, "grad_norm": 0.31164243817329407, "learning_rate": 4.2938e-05, "loss": 0.0272, "step": 21470 }, { "epoch": 1.8875219683655535, "grad_norm": 0.2591785490512848, "learning_rate": 4.2958000000000004e-05, "loss": 0.0234, "step": 21480 }, { "epoch": 1.8884007029876977, "grad_norm": 0.2504434883594513, "learning_rate": 4.2978e-05, "loss": 0.025, "step": 21490 }, { "epoch": 1.8892794376098418, "grad_norm": 0.2676185965538025, "learning_rate": 4.2998e-05, "loss": 0.024, "step": 21500 }, { "epoch": 1.890158172231986, "grad_norm": 0.31066712737083435, "learning_rate": 4.3018e-05, "loss": 0.0247, "step": 21510 }, { "epoch": 1.8910369068541302, "grad_norm": 0.33019718527793884, "learning_rate": 4.3038000000000004e-05, "loss": 0.0283, "step": 21520 }, { "epoch": 1.8919156414762741, "grad_norm": 0.29634493589401245, "learning_rate": 4.305800000000001e-05, "loss": 0.025, "step": 21530 }, { "epoch": 1.8927943760984183, "grad_norm": 0.2297309786081314, "learning_rate": 4.3078e-05, "loss": 0.024, "step": 21540 }, { "epoch": 1.8936731107205624, "grad_norm": 0.36586177349090576, "learning_rate": 4.3098e-05, "loss": 0.0254, "step": 21550 }, { "epoch": 1.8945518453427064, "grad_norm": 0.3592677116394043, "learning_rate": 4.3118e-05, "loss": 0.0281, "step": 21560 }, { "epoch": 1.8954305799648505, "grad_norm": 0.2726651728153229, "learning_rate": 4.3138e-05, "loss": 0.0258, "step": 21570 }, { "epoch": 1.8963093145869947, "grad_norm": 0.40631917119026184, "learning_rate": 4.3158e-05, "loss": 0.0273, "step": 21580 }, { "epoch": 1.8971880492091389, "grad_norm": 0.2604122757911682, "learning_rate": 4.3178e-05, "loss": 0.025, "step": 21590 }, { "epoch": 1.898066783831283, "grad_norm": 0.3373049795627594, "learning_rate": 4.3198e-05, "loss": 0.0246, "step": 21600 }, { "epoch": 1.8989455184534272, "grad_norm": 0.2716085612773895, "learning_rate": 4.3218000000000005e-05, "loss": 0.0287, "step": 21610 }, { "epoch": 1.8998242530755711, "grad_norm": 0.3588009178638458, "learning_rate": 4.3238e-05, "loss": 0.0269, "step": 21620 }, { "epoch": 1.9007029876977153, "grad_norm": 0.3310633897781372, "learning_rate": 4.3258000000000005e-05, "loss": 0.0275, "step": 21630 }, { "epoch": 1.9015817223198594, "grad_norm": 0.2819397449493408, "learning_rate": 4.3278e-05, "loss": 0.0246, "step": 21640 }, { "epoch": 1.9024604569420034, "grad_norm": 0.23822270333766937, "learning_rate": 4.3298e-05, "loss": 0.0258, "step": 21650 }, { "epoch": 1.9033391915641475, "grad_norm": 0.24692010879516602, "learning_rate": 4.3318e-05, "loss": 0.025, "step": 21660 }, { "epoch": 1.9042179261862917, "grad_norm": 0.3691697418689728, "learning_rate": 4.3338000000000004e-05, "loss": 0.0266, "step": 21670 }, { "epoch": 1.9050966608084359, "grad_norm": 0.3858849108219147, "learning_rate": 4.335800000000001e-05, "loss": 0.0253, "step": 21680 }, { "epoch": 1.90597539543058, "grad_norm": 0.2861391007900238, "learning_rate": 4.3378000000000004e-05, "loss": 0.0235, "step": 21690 }, { "epoch": 1.9068541300527242, "grad_norm": 0.24528411030769348, "learning_rate": 4.3398e-05, "loss": 0.026, "step": 21700 }, { "epoch": 1.9077328646748684, "grad_norm": 0.25874122977256775, "learning_rate": 4.3418000000000003e-05, "loss": 0.0257, "step": 21710 }, { "epoch": 1.9086115992970123, "grad_norm": 0.3099939823150635, "learning_rate": 4.3438e-05, "loss": 0.027, "step": 21720 }, { "epoch": 1.9094903339191565, "grad_norm": 0.36025118827819824, "learning_rate": 4.3458e-05, "loss": 0.0248, "step": 21730 }, { "epoch": 1.9103690685413004, "grad_norm": 0.25379249453544617, "learning_rate": 4.3478e-05, "loss": 0.0242, "step": 21740 }, { "epoch": 1.9112478031634446, "grad_norm": 0.27377185225486755, "learning_rate": 4.3498e-05, "loss": 0.0269, "step": 21750 }, { "epoch": 1.9121265377855887, "grad_norm": 0.2885150909423828, "learning_rate": 4.3518000000000006e-05, "loss": 0.0263, "step": 21760 }, { "epoch": 1.9130052724077329, "grad_norm": 0.24848034977912903, "learning_rate": 4.3538e-05, "loss": 0.0247, "step": 21770 }, { "epoch": 1.913884007029877, "grad_norm": 0.27853286266326904, "learning_rate": 4.3558000000000005e-05, "loss": 0.0277, "step": 21780 }, { "epoch": 1.9147627416520212, "grad_norm": 0.28189942240715027, "learning_rate": 4.3578e-05, "loss": 0.0223, "step": 21790 }, { "epoch": 1.9156414762741654, "grad_norm": 0.3109968602657318, "learning_rate": 4.3598e-05, "loss": 0.0266, "step": 21800 }, { "epoch": 1.9165202108963093, "grad_norm": 0.3383229076862335, "learning_rate": 4.3618e-05, "loss": 0.0254, "step": 21810 }, { "epoch": 1.9173989455184535, "grad_norm": 0.32797351479530334, "learning_rate": 4.3638e-05, "loss": 0.0256, "step": 21820 }, { "epoch": 1.9182776801405974, "grad_norm": 0.26600638031959534, "learning_rate": 4.3658e-05, "loss": 0.0252, "step": 21830 }, { "epoch": 1.9191564147627416, "grad_norm": 0.4431403875350952, "learning_rate": 4.3678000000000004e-05, "loss": 0.0259, "step": 21840 }, { "epoch": 1.9200351493848857, "grad_norm": 0.31588324904441833, "learning_rate": 4.3698e-05, "loss": 0.0285, "step": 21850 }, { "epoch": 1.92091388400703, "grad_norm": 0.3754591643810272, "learning_rate": 4.3718000000000004e-05, "loss": 0.0263, "step": 21860 }, { "epoch": 1.921792618629174, "grad_norm": 0.4109876751899719, "learning_rate": 4.3738e-05, "loss": 0.0273, "step": 21870 }, { "epoch": 1.9226713532513182, "grad_norm": 0.22761918604373932, "learning_rate": 4.3758000000000004e-05, "loss": 0.0254, "step": 21880 }, { "epoch": 1.9235500878734624, "grad_norm": 0.24765415489673615, "learning_rate": 4.3778e-05, "loss": 0.027, "step": 21890 }, { "epoch": 1.9244288224956063, "grad_norm": 0.29988527297973633, "learning_rate": 4.3798e-05, "loss": 0.0251, "step": 21900 }, { "epoch": 1.9253075571177505, "grad_norm": 0.2818260192871094, "learning_rate": 4.3818000000000006e-05, "loss": 0.0278, "step": 21910 }, { "epoch": 1.9261862917398944, "grad_norm": 0.2593751549720764, "learning_rate": 4.3838e-05, "loss": 0.0258, "step": 21920 }, { "epoch": 1.9270650263620386, "grad_norm": 0.4656081795692444, "learning_rate": 4.3858000000000006e-05, "loss": 0.0284, "step": 21930 }, { "epoch": 1.9279437609841827, "grad_norm": 0.261507511138916, "learning_rate": 4.3878e-05, "loss": 0.0275, "step": 21940 }, { "epoch": 1.928822495606327, "grad_norm": 0.261738121509552, "learning_rate": 4.3898e-05, "loss": 0.0253, "step": 21950 }, { "epoch": 1.929701230228471, "grad_norm": 0.33368441462516785, "learning_rate": 4.3918e-05, "loss": 0.0246, "step": 21960 }, { "epoch": 1.9305799648506152, "grad_norm": 0.2601669430732727, "learning_rate": 4.3938e-05, "loss": 0.0273, "step": 21970 }, { "epoch": 1.9314586994727594, "grad_norm": 0.31258171796798706, "learning_rate": 4.3958e-05, "loss": 0.026, "step": 21980 }, { "epoch": 1.9323374340949033, "grad_norm": 0.3122083246707916, "learning_rate": 4.3978000000000005e-05, "loss": 0.0254, "step": 21990 }, { "epoch": 1.9332161687170475, "grad_norm": 0.3453373610973358, "learning_rate": 4.3998e-05, "loss": 0.0271, "step": 22000 }, { "epoch": 1.9340949033391914, "grad_norm": 0.38188791275024414, "learning_rate": 4.4018000000000004e-05, "loss": 0.0258, "step": 22010 }, { "epoch": 1.9349736379613356, "grad_norm": 0.33194220066070557, "learning_rate": 4.4038e-05, "loss": 0.0254, "step": 22020 }, { "epoch": 1.9358523725834798, "grad_norm": 0.3467579185962677, "learning_rate": 4.4058000000000004e-05, "loss": 0.0261, "step": 22030 }, { "epoch": 1.936731107205624, "grad_norm": 0.3584032654762268, "learning_rate": 4.4078e-05, "loss": 0.025, "step": 22040 }, { "epoch": 1.937609841827768, "grad_norm": 0.328664630651474, "learning_rate": 4.4098000000000004e-05, "loss": 0.0251, "step": 22050 }, { "epoch": 1.9384885764499122, "grad_norm": 0.19540640711784363, "learning_rate": 4.411800000000001e-05, "loss": 0.0241, "step": 22060 }, { "epoch": 1.9393673110720564, "grad_norm": 0.3712926208972931, "learning_rate": 4.4138e-05, "loss": 0.024, "step": 22070 }, { "epoch": 1.9402460456942003, "grad_norm": 0.21721385419368744, "learning_rate": 4.4158e-05, "loss": 0.0259, "step": 22080 }, { "epoch": 1.9411247803163445, "grad_norm": 0.31322911381721497, "learning_rate": 4.4178e-05, "loss": 0.024, "step": 22090 }, { "epoch": 1.9420035149384884, "grad_norm": 0.3006669878959656, "learning_rate": 4.4198e-05, "loss": 0.0249, "step": 22100 }, { "epoch": 1.9428822495606326, "grad_norm": 0.2157297283411026, "learning_rate": 4.4218e-05, "loss": 0.0253, "step": 22110 }, { "epoch": 1.9437609841827768, "grad_norm": 0.22179488837718964, "learning_rate": 4.4238e-05, "loss": 0.0245, "step": 22120 }, { "epoch": 1.944639718804921, "grad_norm": 0.21534523367881775, "learning_rate": 4.4258e-05, "loss": 0.0274, "step": 22130 }, { "epoch": 1.945518453427065, "grad_norm": 0.22621093690395355, "learning_rate": 4.4278000000000005e-05, "loss": 0.0246, "step": 22140 }, { "epoch": 1.9463971880492092, "grad_norm": 0.29132938385009766, "learning_rate": 4.4298e-05, "loss": 0.0245, "step": 22150 }, { "epoch": 1.9472759226713534, "grad_norm": 0.19002275168895721, "learning_rate": 4.4318000000000005e-05, "loss": 0.0266, "step": 22160 }, { "epoch": 1.9481546572934973, "grad_norm": 0.297720342874527, "learning_rate": 4.4338e-05, "loss": 0.0274, "step": 22170 }, { "epoch": 1.9490333919156415, "grad_norm": 0.2904730439186096, "learning_rate": 4.4358e-05, "loss": 0.0241, "step": 22180 }, { "epoch": 1.9499121265377855, "grad_norm": 0.24537548422813416, "learning_rate": 4.4378e-05, "loss": 0.0224, "step": 22190 }, { "epoch": 1.9507908611599296, "grad_norm": 0.30251801013946533, "learning_rate": 4.4398e-05, "loss": 0.0248, "step": 22200 }, { "epoch": 1.9516695957820738, "grad_norm": 0.21064577996730804, "learning_rate": 4.441800000000001e-05, "loss": 0.0268, "step": 22210 }, { "epoch": 1.952548330404218, "grad_norm": 0.22643427550792694, "learning_rate": 4.4438000000000004e-05, "loss": 0.0233, "step": 22220 }, { "epoch": 1.953427065026362, "grad_norm": 0.2412668615579605, "learning_rate": 4.4458e-05, "loss": 0.0237, "step": 22230 }, { "epoch": 1.9543057996485063, "grad_norm": 0.2868073880672455, "learning_rate": 4.4478000000000003e-05, "loss": 0.0223, "step": 22240 }, { "epoch": 1.9551845342706504, "grad_norm": 0.2663196325302124, "learning_rate": 4.4498e-05, "loss": 0.024, "step": 22250 }, { "epoch": 1.9560632688927944, "grad_norm": 0.3058953285217285, "learning_rate": 4.4518e-05, "loss": 0.0239, "step": 22260 }, { "epoch": 1.9569420035149385, "grad_norm": 0.32324522733688354, "learning_rate": 4.4538e-05, "loss": 0.0252, "step": 22270 }, { "epoch": 1.9578207381370825, "grad_norm": 0.245005264878273, "learning_rate": 4.4558e-05, "loss": 0.0256, "step": 22280 }, { "epoch": 1.9586994727592266, "grad_norm": 0.3949161171913147, "learning_rate": 4.4578000000000006e-05, "loss": 0.0234, "step": 22290 }, { "epoch": 1.9595782073813708, "grad_norm": 0.2858911156654358, "learning_rate": 4.4598e-05, "loss": 0.0261, "step": 22300 }, { "epoch": 1.960456942003515, "grad_norm": 0.3661840260028839, "learning_rate": 4.4618000000000005e-05, "loss": 0.0265, "step": 22310 }, { "epoch": 1.961335676625659, "grad_norm": 0.3659648299217224, "learning_rate": 4.4638e-05, "loss": 0.0256, "step": 22320 }, { "epoch": 1.9622144112478033, "grad_norm": 0.3419761657714844, "learning_rate": 4.4658e-05, "loss": 0.0271, "step": 22330 }, { "epoch": 1.9630931458699474, "grad_norm": 0.2072383314371109, "learning_rate": 4.4678e-05, "loss": 0.0268, "step": 22340 }, { "epoch": 1.9639718804920914, "grad_norm": 0.30006492137908936, "learning_rate": 4.4698e-05, "loss": 0.0288, "step": 22350 }, { "epoch": 1.9648506151142355, "grad_norm": 0.237926185131073, "learning_rate": 4.4718e-05, "loss": 0.0236, "step": 22360 }, { "epoch": 1.9657293497363795, "grad_norm": 0.2811640501022339, "learning_rate": 4.4738000000000004e-05, "loss": 0.0254, "step": 22370 }, { "epoch": 1.9666080843585236, "grad_norm": 0.2616775333881378, "learning_rate": 4.4758e-05, "loss": 0.0239, "step": 22380 }, { "epoch": 1.9674868189806678, "grad_norm": 0.24968041479587555, "learning_rate": 4.4778000000000004e-05, "loss": 0.0271, "step": 22390 }, { "epoch": 1.968365553602812, "grad_norm": 0.2861277461051941, "learning_rate": 4.4798e-05, "loss": 0.0273, "step": 22400 }, { "epoch": 1.9692442882249561, "grad_norm": 0.3057953119277954, "learning_rate": 4.4818000000000004e-05, "loss": 0.0261, "step": 22410 }, { "epoch": 1.9701230228471003, "grad_norm": 0.3878457248210907, "learning_rate": 4.4838e-05, "loss": 0.0272, "step": 22420 }, { "epoch": 1.9710017574692444, "grad_norm": 0.3977612257003784, "learning_rate": 4.4858e-05, "loss": 0.0256, "step": 22430 }, { "epoch": 1.9718804920913884, "grad_norm": 0.47900325059890747, "learning_rate": 4.4878000000000006e-05, "loss": 0.0278, "step": 22440 }, { "epoch": 1.9727592267135325, "grad_norm": 0.332751601934433, "learning_rate": 4.4898e-05, "loss": 0.0285, "step": 22450 }, { "epoch": 1.9736379613356765, "grad_norm": 0.36186152696609497, "learning_rate": 4.4918000000000006e-05, "loss": 0.0266, "step": 22460 }, { "epoch": 1.9745166959578206, "grad_norm": 0.4099121689796448, "learning_rate": 4.4938e-05, "loss": 0.0255, "step": 22470 }, { "epoch": 1.9753954305799648, "grad_norm": 0.32561248540878296, "learning_rate": 4.4958e-05, "loss": 0.0246, "step": 22480 }, { "epoch": 1.976274165202109, "grad_norm": 0.2443913221359253, "learning_rate": 4.4978e-05, "loss": 0.0231, "step": 22490 }, { "epoch": 1.9771528998242531, "grad_norm": 0.3354743421077728, "learning_rate": 4.4998e-05, "loss": 0.027, "step": 22500 }, { "epoch": 1.9780316344463973, "grad_norm": 0.33702781796455383, "learning_rate": 4.5018e-05, "loss": 0.0267, "step": 22510 }, { "epoch": 1.9789103690685415, "grad_norm": 0.3569600284099579, "learning_rate": 4.5038000000000005e-05, "loss": 0.0273, "step": 22520 }, { "epoch": 1.9797891036906854, "grad_norm": 0.26657742261886597, "learning_rate": 4.5058e-05, "loss": 0.0242, "step": 22530 }, { "epoch": 1.9806678383128296, "grad_norm": 0.3611448109149933, "learning_rate": 4.5078000000000004e-05, "loss": 0.0262, "step": 22540 }, { "epoch": 1.9815465729349735, "grad_norm": 0.19387802481651306, "learning_rate": 4.5098e-05, "loss": 0.027, "step": 22550 }, { "epoch": 1.9824253075571177, "grad_norm": 0.2618274390697479, "learning_rate": 4.5118000000000004e-05, "loss": 0.0249, "step": 22560 }, { "epoch": 1.9833040421792618, "grad_norm": 0.1845492422580719, "learning_rate": 4.5138e-05, "loss": 0.0258, "step": 22570 }, { "epoch": 1.984182776801406, "grad_norm": 0.3049406409263611, "learning_rate": 4.5158000000000004e-05, "loss": 0.0262, "step": 22580 }, { "epoch": 1.9850615114235501, "grad_norm": 0.25801050662994385, "learning_rate": 4.517800000000001e-05, "loss": 0.0257, "step": 22590 }, { "epoch": 1.9859402460456943, "grad_norm": 0.5731838345527649, "learning_rate": 4.5198e-05, "loss": 0.0244, "step": 22600 }, { "epoch": 1.9868189806678385, "grad_norm": 0.2474963665008545, "learning_rate": 4.5218000000000007e-05, "loss": 0.0281, "step": 22610 }, { "epoch": 1.9876977152899824, "grad_norm": 0.21953123807907104, "learning_rate": 4.5238e-05, "loss": 0.0268, "step": 22620 }, { "epoch": 1.9885764499121266, "grad_norm": 0.2254832535982132, "learning_rate": 4.5258e-05, "loss": 0.0266, "step": 22630 }, { "epoch": 1.9894551845342705, "grad_norm": 0.25243639945983887, "learning_rate": 4.5278e-05, "loss": 0.0238, "step": 22640 }, { "epoch": 1.9903339191564147, "grad_norm": 0.39777740836143494, "learning_rate": 4.5298e-05, "loss": 0.0276, "step": 22650 }, { "epoch": 1.9912126537785588, "grad_norm": 0.26933425664901733, "learning_rate": 4.5318e-05, "loss": 0.0255, "step": 22660 }, { "epoch": 1.992091388400703, "grad_norm": 0.3177735507488251, "learning_rate": 4.5338000000000005e-05, "loss": 0.028, "step": 22670 }, { "epoch": 1.9929701230228472, "grad_norm": 0.22894474864006042, "learning_rate": 4.5358e-05, "loss": 0.0246, "step": 22680 }, { "epoch": 1.9938488576449913, "grad_norm": 0.22230014204978943, "learning_rate": 4.5378000000000005e-05, "loss": 0.0264, "step": 22690 }, { "epoch": 1.9947275922671355, "grad_norm": 0.25370028614997864, "learning_rate": 4.5398e-05, "loss": 0.0269, "step": 22700 }, { "epoch": 1.9956063268892794, "grad_norm": 0.17896434664726257, "learning_rate": 4.5418e-05, "loss": 0.0261, "step": 22710 }, { "epoch": 1.9964850615114236, "grad_norm": 0.2650851905345917, "learning_rate": 4.5438e-05, "loss": 0.0257, "step": 22720 }, { "epoch": 1.9973637961335675, "grad_norm": 0.3151187300682068, "learning_rate": 4.5458e-05, "loss": 0.024, "step": 22730 }, { "epoch": 1.9982425307557117, "grad_norm": 0.22128546237945557, "learning_rate": 4.5478e-05, "loss": 0.0256, "step": 22740 }, { "epoch": 1.9991212653778558, "grad_norm": 0.26904425024986267, "learning_rate": 4.5498000000000004e-05, "loss": 0.0264, "step": 22750 }, { "epoch": 2.0, "grad_norm": 0.24021464586257935, "learning_rate": 4.5518e-05, "loss": 0.0286, "step": 22760 }, { "epoch": 2.000878734622144, "grad_norm": 0.3065584599971771, "learning_rate": 4.5538000000000003e-05, "loss": 0.0286, "step": 22770 }, { "epoch": 2.0017574692442883, "grad_norm": 0.38795819878578186, "learning_rate": 4.5558e-05, "loss": 0.0278, "step": 22780 }, { "epoch": 2.0026362038664325, "grad_norm": 0.26804637908935547, "learning_rate": 4.5578e-05, "loss": 0.0265, "step": 22790 }, { "epoch": 2.0035149384885766, "grad_norm": 0.2157072275876999, "learning_rate": 4.5598e-05, "loss": 0.0245, "step": 22800 }, { "epoch": 2.0043936731107204, "grad_norm": 0.27306607365608215, "learning_rate": 4.5618e-05, "loss": 0.0255, "step": 22810 }, { "epoch": 2.0052724077328645, "grad_norm": 0.2299346625804901, "learning_rate": 4.5638000000000006e-05, "loss": 0.0226, "step": 22820 }, { "epoch": 2.0061511423550087, "grad_norm": 0.2776416540145874, "learning_rate": 4.5658e-05, "loss": 0.0221, "step": 22830 }, { "epoch": 2.007029876977153, "grad_norm": 0.22891908884048462, "learning_rate": 4.5678000000000005e-05, "loss": 0.0259, "step": 22840 }, { "epoch": 2.007908611599297, "grad_norm": 0.23792710900306702, "learning_rate": 4.5698e-05, "loss": 0.0249, "step": 22850 }, { "epoch": 2.008787346221441, "grad_norm": 0.2660064101219177, "learning_rate": 4.5718e-05, "loss": 0.0251, "step": 22860 }, { "epoch": 2.0096660808435853, "grad_norm": 0.2554968595504761, "learning_rate": 4.5738e-05, "loss": 0.0248, "step": 22870 }, { "epoch": 2.0105448154657295, "grad_norm": 0.21883758902549744, "learning_rate": 4.5758e-05, "loss": 0.0264, "step": 22880 }, { "epoch": 2.0114235500878737, "grad_norm": 0.20676937699317932, "learning_rate": 4.5778e-05, "loss": 0.0231, "step": 22890 }, { "epoch": 2.0123022847100174, "grad_norm": 0.23917116224765778, "learning_rate": 4.5798000000000004e-05, "loss": 0.0238, "step": 22900 }, { "epoch": 2.0131810193321615, "grad_norm": 0.21849209070205688, "learning_rate": 4.5818e-05, "loss": 0.0243, "step": 22910 }, { "epoch": 2.0140597539543057, "grad_norm": 0.2662329375743866, "learning_rate": 4.5838000000000004e-05, "loss": 0.0265, "step": 22920 }, { "epoch": 2.01493848857645, "grad_norm": 0.3434867858886719, "learning_rate": 4.5858e-05, "loss": 0.0262, "step": 22930 }, { "epoch": 2.015817223198594, "grad_norm": 0.3529421091079712, "learning_rate": 4.5878000000000004e-05, "loss": 0.0259, "step": 22940 }, { "epoch": 2.016695957820738, "grad_norm": 0.3918481767177582, "learning_rate": 4.5898e-05, "loss": 0.0237, "step": 22950 }, { "epoch": 2.0175746924428823, "grad_norm": 0.3073890805244446, "learning_rate": 4.5918e-05, "loss": 0.0242, "step": 22960 }, { "epoch": 2.0184534270650265, "grad_norm": 0.3114066421985626, "learning_rate": 4.5938000000000006e-05, "loss": 0.0254, "step": 22970 }, { "epoch": 2.0193321616871707, "grad_norm": 0.233526349067688, "learning_rate": 4.5958e-05, "loss": 0.0248, "step": 22980 }, { "epoch": 2.0202108963093144, "grad_norm": 0.2520209848880768, "learning_rate": 4.5978000000000006e-05, "loss": 0.0252, "step": 22990 }, { "epoch": 2.0210896309314585, "grad_norm": 0.17094671726226807, "learning_rate": 4.5998e-05, "loss": 0.0235, "step": 23000 }, { "epoch": 2.0219683655536027, "grad_norm": 0.2612821161746979, "learning_rate": 4.6018e-05, "loss": 0.0259, "step": 23010 }, { "epoch": 2.022847100175747, "grad_norm": 0.32313546538352966, "learning_rate": 4.6038e-05, "loss": 0.0246, "step": 23020 }, { "epoch": 2.023725834797891, "grad_norm": 0.3696606457233429, "learning_rate": 4.6058e-05, "loss": 0.0272, "step": 23030 }, { "epoch": 2.024604569420035, "grad_norm": 0.22313067317008972, "learning_rate": 4.6078e-05, "loss": 0.0257, "step": 23040 }, { "epoch": 2.0254833040421794, "grad_norm": 0.4357368052005768, "learning_rate": 4.6098000000000005e-05, "loss": 0.027, "step": 23050 }, { "epoch": 2.0263620386643235, "grad_norm": 0.2735729217529297, "learning_rate": 4.6118e-05, "loss": 0.0246, "step": 23060 }, { "epoch": 2.0272407732864677, "grad_norm": 0.26690927147865295, "learning_rate": 4.6138000000000004e-05, "loss": 0.0244, "step": 23070 }, { "epoch": 2.0281195079086114, "grad_norm": 0.20741726458072662, "learning_rate": 4.6158e-05, "loss": 0.0254, "step": 23080 }, { "epoch": 2.0289982425307556, "grad_norm": 0.26538312435150146, "learning_rate": 4.6178000000000004e-05, "loss": 0.0241, "step": 23090 }, { "epoch": 2.0298769771528997, "grad_norm": 0.30671796202659607, "learning_rate": 4.6198e-05, "loss": 0.026, "step": 23100 }, { "epoch": 2.030755711775044, "grad_norm": 0.2819511592388153, "learning_rate": 4.6218e-05, "loss": 0.0247, "step": 23110 }, { "epoch": 2.031634446397188, "grad_norm": 0.2909702956676483, "learning_rate": 4.623800000000001e-05, "loss": 0.0259, "step": 23120 }, { "epoch": 2.032513181019332, "grad_norm": 0.3370380401611328, "learning_rate": 4.6258e-05, "loss": 0.026, "step": 23130 }, { "epoch": 2.0333919156414764, "grad_norm": 0.31068259477615356, "learning_rate": 4.6278000000000007e-05, "loss": 0.025, "step": 23140 }, { "epoch": 2.0342706502636205, "grad_norm": 0.2957508862018585, "learning_rate": 4.6298e-05, "loss": 0.0279, "step": 23150 }, { "epoch": 2.0351493848857647, "grad_norm": 0.22328390181064606, "learning_rate": 4.6318e-05, "loss": 0.0256, "step": 23160 }, { "epoch": 2.0360281195079084, "grad_norm": 0.22589127719402313, "learning_rate": 4.6338e-05, "loss": 0.0229, "step": 23170 }, { "epoch": 2.0369068541300526, "grad_norm": 0.3189900815486908, "learning_rate": 4.6358e-05, "loss": 0.0246, "step": 23180 }, { "epoch": 2.0377855887521967, "grad_norm": 0.365426242351532, "learning_rate": 4.6378e-05, "loss": 0.025, "step": 23190 }, { "epoch": 2.038664323374341, "grad_norm": 0.30175867676734924, "learning_rate": 4.6398000000000005e-05, "loss": 0.025, "step": 23200 }, { "epoch": 2.039543057996485, "grad_norm": 0.31367993354797363, "learning_rate": 4.6418e-05, "loss": 0.0238, "step": 23210 }, { "epoch": 2.040421792618629, "grad_norm": 0.2825661301612854, "learning_rate": 4.6438000000000005e-05, "loss": 0.0256, "step": 23220 }, { "epoch": 2.0413005272407734, "grad_norm": 0.2332545667886734, "learning_rate": 4.6458e-05, "loss": 0.025, "step": 23230 }, { "epoch": 2.0421792618629175, "grad_norm": 0.2834702134132385, "learning_rate": 4.6478000000000005e-05, "loss": 0.025, "step": 23240 }, { "epoch": 2.0430579964850617, "grad_norm": 0.30565086007118225, "learning_rate": 4.6498e-05, "loss": 0.0254, "step": 23250 }, { "epoch": 2.0439367311072054, "grad_norm": 0.2864646017551422, "learning_rate": 4.6518e-05, "loss": 0.0271, "step": 23260 }, { "epoch": 2.0448154657293496, "grad_norm": 0.21942220628261566, "learning_rate": 4.6538e-05, "loss": 0.0224, "step": 23270 }, { "epoch": 2.0456942003514937, "grad_norm": 0.20732176303863525, "learning_rate": 4.6558000000000004e-05, "loss": 0.0261, "step": 23280 }, { "epoch": 2.046572934973638, "grad_norm": 0.20933859050273895, "learning_rate": 4.657800000000001e-05, "loss": 0.0228, "step": 23290 }, { "epoch": 2.047451669595782, "grad_norm": 0.2618546485900879, "learning_rate": 4.6598000000000003e-05, "loss": 0.0253, "step": 23300 }, { "epoch": 2.0483304042179262, "grad_norm": 0.25168901681900024, "learning_rate": 4.6618e-05, "loss": 0.0244, "step": 23310 }, { "epoch": 2.0492091388400704, "grad_norm": 0.27238020300865173, "learning_rate": 4.6638e-05, "loss": 0.023, "step": 23320 }, { "epoch": 2.0500878734622145, "grad_norm": 0.2937231659889221, "learning_rate": 4.6658e-05, "loss": 0.0237, "step": 23330 }, { "epoch": 2.0509666080843587, "grad_norm": 0.3167555630207062, "learning_rate": 4.6678e-05, "loss": 0.0273, "step": 23340 }, { "epoch": 2.0518453427065024, "grad_norm": 0.1882835477590561, "learning_rate": 4.6698000000000006e-05, "loss": 0.0247, "step": 23350 }, { "epoch": 2.0527240773286466, "grad_norm": 0.27204975485801697, "learning_rate": 4.6718e-05, "loss": 0.0244, "step": 23360 }, { "epoch": 2.0536028119507908, "grad_norm": 0.28763896226882935, "learning_rate": 4.6738000000000006e-05, "loss": 0.0256, "step": 23370 }, { "epoch": 2.054481546572935, "grad_norm": 0.2624853253364563, "learning_rate": 4.6758e-05, "loss": 0.0247, "step": 23380 }, { "epoch": 2.055360281195079, "grad_norm": 0.2688407003879547, "learning_rate": 4.6778e-05, "loss": 0.0269, "step": 23390 }, { "epoch": 2.0562390158172232, "grad_norm": 0.24839811027050018, "learning_rate": 4.6798e-05, "loss": 0.0238, "step": 23400 }, { "epoch": 2.0571177504393674, "grad_norm": 0.2667486071586609, "learning_rate": 4.6818e-05, "loss": 0.0255, "step": 23410 }, { "epoch": 2.0579964850615116, "grad_norm": 0.24169981479644775, "learning_rate": 4.6838e-05, "loss": 0.0263, "step": 23420 }, { "epoch": 2.0588752196836557, "grad_norm": 0.3409883379936218, "learning_rate": 4.6858000000000004e-05, "loss": 0.0267, "step": 23430 }, { "epoch": 2.0597539543057994, "grad_norm": 0.2658964991569519, "learning_rate": 4.6878e-05, "loss": 0.0256, "step": 23440 }, { "epoch": 2.0606326889279436, "grad_norm": 0.33390480279922485, "learning_rate": 4.6898000000000004e-05, "loss": 0.0245, "step": 23450 }, { "epoch": 2.0615114235500878, "grad_norm": 0.19729584455490112, "learning_rate": 4.6918e-05, "loss": 0.0263, "step": 23460 }, { "epoch": 2.062390158172232, "grad_norm": 0.27124661207199097, "learning_rate": 4.6938000000000004e-05, "loss": 0.0271, "step": 23470 }, { "epoch": 2.063268892794376, "grad_norm": 0.33333227038383484, "learning_rate": 4.6958e-05, "loss": 0.0251, "step": 23480 }, { "epoch": 2.0641476274165202, "grad_norm": 0.22700011730194092, "learning_rate": 4.6977999999999996e-05, "loss": 0.0238, "step": 23490 }, { "epoch": 2.0650263620386644, "grad_norm": 0.24922244250774384, "learning_rate": 4.6998000000000006e-05, "loss": 0.0264, "step": 23500 }, { "epoch": 2.0659050966608086, "grad_norm": 0.29094764590263367, "learning_rate": 4.7018e-05, "loss": 0.025, "step": 23510 }, { "epoch": 2.0667838312829527, "grad_norm": 0.3307070732116699, "learning_rate": 4.7038000000000006e-05, "loss": 0.027, "step": 23520 }, { "epoch": 2.0676625659050965, "grad_norm": 0.22653955221176147, "learning_rate": 4.7058e-05, "loss": 0.025, "step": 23530 }, { "epoch": 2.0685413005272406, "grad_norm": 0.202428936958313, "learning_rate": 4.7078e-05, "loss": 0.0271, "step": 23540 }, { "epoch": 2.0694200351493848, "grad_norm": 0.21499773859977722, "learning_rate": 4.7098e-05, "loss": 0.0249, "step": 23550 }, { "epoch": 2.070298769771529, "grad_norm": 0.2652522325515747, "learning_rate": 4.7118e-05, "loss": 0.0261, "step": 23560 }, { "epoch": 2.071177504393673, "grad_norm": 0.26710498332977295, "learning_rate": 4.7138e-05, "loss": 0.0266, "step": 23570 }, { "epoch": 2.0720562390158173, "grad_norm": 0.3622592091560364, "learning_rate": 4.7158000000000005e-05, "loss": 0.0261, "step": 23580 }, { "epoch": 2.0729349736379614, "grad_norm": 0.2996024489402771, "learning_rate": 4.7178e-05, "loss": 0.0269, "step": 23590 }, { "epoch": 2.0738137082601056, "grad_norm": 0.2992555499076843, "learning_rate": 4.7198000000000004e-05, "loss": 0.0255, "step": 23600 }, { "epoch": 2.0746924428822497, "grad_norm": 0.347791463136673, "learning_rate": 4.7218e-05, "loss": 0.0253, "step": 23610 }, { "epoch": 2.0755711775043935, "grad_norm": 0.2581549286842346, "learning_rate": 4.7238000000000004e-05, "loss": 0.0276, "step": 23620 }, { "epoch": 2.0764499121265376, "grad_norm": 0.294903039932251, "learning_rate": 4.7258e-05, "loss": 0.0265, "step": 23630 }, { "epoch": 2.077328646748682, "grad_norm": 0.2357836663722992, "learning_rate": 4.7278e-05, "loss": 0.0261, "step": 23640 }, { "epoch": 2.078207381370826, "grad_norm": 0.32269513607025146, "learning_rate": 4.7298e-05, "loss": 0.0266, "step": 23650 }, { "epoch": 2.07908611599297, "grad_norm": 0.20380020141601562, "learning_rate": 4.7318e-05, "loss": 0.0264, "step": 23660 }, { "epoch": 2.0799648506151143, "grad_norm": 0.2889593541622162, "learning_rate": 4.7338000000000007e-05, "loss": 0.0264, "step": 23670 }, { "epoch": 2.0808435852372584, "grad_norm": 0.3332526981830597, "learning_rate": 4.7358e-05, "loss": 0.023, "step": 23680 }, { "epoch": 2.0817223198594026, "grad_norm": 0.3244920074939728, "learning_rate": 4.7378e-05, "loss": 0.0267, "step": 23690 }, { "epoch": 2.0826010544815468, "grad_norm": 0.3518829345703125, "learning_rate": 4.7398e-05, "loss": 0.0251, "step": 23700 }, { "epoch": 2.0834797891036905, "grad_norm": 0.28583407402038574, "learning_rate": 4.7418e-05, "loss": 0.0252, "step": 23710 }, { "epoch": 2.0843585237258346, "grad_norm": 0.21507388353347778, "learning_rate": 4.7438e-05, "loss": 0.0263, "step": 23720 }, { "epoch": 2.085237258347979, "grad_norm": 0.35600078105926514, "learning_rate": 4.7458000000000005e-05, "loss": 0.0252, "step": 23730 }, { "epoch": 2.086115992970123, "grad_norm": 0.31652000546455383, "learning_rate": 4.7478e-05, "loss": 0.0254, "step": 23740 }, { "epoch": 2.086994727592267, "grad_norm": 0.23522259294986725, "learning_rate": 4.7498000000000005e-05, "loss": 0.0261, "step": 23750 }, { "epoch": 2.0878734622144113, "grad_norm": 0.2656204104423523, "learning_rate": 4.7518e-05, "loss": 0.0259, "step": 23760 }, { "epoch": 2.0887521968365554, "grad_norm": 0.3238801658153534, "learning_rate": 4.7538000000000005e-05, "loss": 0.0249, "step": 23770 }, { "epoch": 2.0896309314586996, "grad_norm": 0.21263010799884796, "learning_rate": 4.7558e-05, "loss": 0.0256, "step": 23780 }, { "epoch": 2.0905096660808438, "grad_norm": 0.20381878316402435, "learning_rate": 4.7578e-05, "loss": 0.0235, "step": 23790 }, { "epoch": 2.0913884007029875, "grad_norm": 0.22556418180465698, "learning_rate": 4.7598e-05, "loss": 0.0236, "step": 23800 }, { "epoch": 2.0922671353251316, "grad_norm": 0.3299870193004608, "learning_rate": 4.7618000000000004e-05, "loss": 0.0262, "step": 23810 }, { "epoch": 2.093145869947276, "grad_norm": 0.2744443118572235, "learning_rate": 4.763800000000001e-05, "loss": 0.0251, "step": 23820 }, { "epoch": 2.09402460456942, "grad_norm": 0.2858784794807434, "learning_rate": 4.7658000000000003e-05, "loss": 0.0253, "step": 23830 }, { "epoch": 2.094903339191564, "grad_norm": 0.3235347270965576, "learning_rate": 4.7678e-05, "loss": 0.0241, "step": 23840 }, { "epoch": 2.0957820738137083, "grad_norm": 0.34611645340919495, "learning_rate": 4.7698e-05, "loss": 0.0272, "step": 23850 }, { "epoch": 2.0966608084358525, "grad_norm": 0.3315395712852478, "learning_rate": 4.7718e-05, "loss": 0.0252, "step": 23860 }, { "epoch": 2.0975395430579966, "grad_norm": 0.2822474241256714, "learning_rate": 4.7738e-05, "loss": 0.0248, "step": 23870 }, { "epoch": 2.0984182776801408, "grad_norm": 0.27195441722869873, "learning_rate": 4.7758000000000006e-05, "loss": 0.0241, "step": 23880 }, { "epoch": 2.0992970123022845, "grad_norm": 0.3352242708206177, "learning_rate": 4.7778e-05, "loss": 0.0247, "step": 23890 }, { "epoch": 2.1001757469244287, "grad_norm": 0.30077502131462097, "learning_rate": 4.7798000000000006e-05, "loss": 0.0257, "step": 23900 }, { "epoch": 2.101054481546573, "grad_norm": 0.26494845747947693, "learning_rate": 4.7818e-05, "loss": 0.0241, "step": 23910 }, { "epoch": 2.101933216168717, "grad_norm": 0.29607757925987244, "learning_rate": 4.7838000000000005e-05, "loss": 0.0224, "step": 23920 }, { "epoch": 2.102811950790861, "grad_norm": 0.19356662034988403, "learning_rate": 4.7858e-05, "loss": 0.0252, "step": 23930 }, { "epoch": 2.1036906854130053, "grad_norm": 0.22386303544044495, "learning_rate": 4.7878e-05, "loss": 0.0225, "step": 23940 }, { "epoch": 2.1045694200351495, "grad_norm": 0.30531108379364014, "learning_rate": 4.7898e-05, "loss": 0.025, "step": 23950 }, { "epoch": 2.1054481546572936, "grad_norm": 0.2819540202617645, "learning_rate": 4.7918000000000004e-05, "loss": 0.0255, "step": 23960 }, { "epoch": 2.106326889279438, "grad_norm": 0.3311958312988281, "learning_rate": 4.7938e-05, "loss": 0.0256, "step": 23970 }, { "epoch": 2.1072056239015815, "grad_norm": 0.302600234746933, "learning_rate": 4.7958000000000004e-05, "loss": 0.0258, "step": 23980 }, { "epoch": 2.1080843585237257, "grad_norm": 0.3927766680717468, "learning_rate": 4.7978e-05, "loss": 0.0246, "step": 23990 }, { "epoch": 2.10896309314587, "grad_norm": 0.32047194242477417, "learning_rate": 4.7998000000000004e-05, "loss": 0.0252, "step": 24000 }, { "epoch": 2.109841827768014, "grad_norm": 0.237257719039917, "learning_rate": 4.8018e-05, "loss": 0.027, "step": 24010 }, { "epoch": 2.110720562390158, "grad_norm": 0.22873666882514954, "learning_rate": 4.8037999999999996e-05, "loss": 0.023, "step": 24020 }, { "epoch": 2.1115992970123023, "grad_norm": 0.29507583379745483, "learning_rate": 4.8058e-05, "loss": 0.0237, "step": 24030 }, { "epoch": 2.1124780316344465, "grad_norm": 0.27245965600013733, "learning_rate": 4.8078e-05, "loss": 0.0237, "step": 24040 }, { "epoch": 2.1133567662565906, "grad_norm": 0.28809916973114014, "learning_rate": 4.8098000000000006e-05, "loss": 0.0232, "step": 24050 }, { "epoch": 2.114235500878735, "grad_norm": 0.2564117908477783, "learning_rate": 4.8118e-05, "loss": 0.024, "step": 24060 }, { "epoch": 2.1151142355008785, "grad_norm": 0.17284615337848663, "learning_rate": 4.8138e-05, "loss": 0.0242, "step": 24070 }, { "epoch": 2.1159929701230227, "grad_norm": 0.2585204839706421, "learning_rate": 4.8158e-05, "loss": 0.0234, "step": 24080 }, { "epoch": 2.116871704745167, "grad_norm": 0.2656245231628418, "learning_rate": 4.8178e-05, "loss": 0.0225, "step": 24090 }, { "epoch": 2.117750439367311, "grad_norm": 0.26547980308532715, "learning_rate": 4.8198e-05, "loss": 0.0262, "step": 24100 }, { "epoch": 2.118629173989455, "grad_norm": 0.20999674499034882, "learning_rate": 4.8218000000000005e-05, "loss": 0.0265, "step": 24110 }, { "epoch": 2.1195079086115993, "grad_norm": 0.21815647184848785, "learning_rate": 4.8238e-05, "loss": 0.0234, "step": 24120 }, { "epoch": 2.1203866432337435, "grad_norm": 0.24322591722011566, "learning_rate": 4.8258000000000005e-05, "loss": 0.0242, "step": 24130 }, { "epoch": 2.1212653778558876, "grad_norm": 0.17931954562664032, "learning_rate": 4.8278e-05, "loss": 0.0238, "step": 24140 }, { "epoch": 2.122144112478032, "grad_norm": 0.1666475087404251, "learning_rate": 4.8298000000000004e-05, "loss": 0.0224, "step": 24150 }, { "epoch": 2.1230228471001755, "grad_norm": 0.22711360454559326, "learning_rate": 4.8318e-05, "loss": 0.0263, "step": 24160 }, { "epoch": 2.1239015817223197, "grad_norm": 0.4498386085033417, "learning_rate": 4.8338e-05, "loss": 0.026, "step": 24170 }, { "epoch": 2.124780316344464, "grad_norm": 0.2453557848930359, "learning_rate": 4.8358e-05, "loss": 0.0254, "step": 24180 }, { "epoch": 2.125659050966608, "grad_norm": 0.24061763286590576, "learning_rate": 4.8378e-05, "loss": 0.0219, "step": 24190 }, { "epoch": 2.126537785588752, "grad_norm": 0.2131642997264862, "learning_rate": 4.8398000000000007e-05, "loss": 0.0273, "step": 24200 }, { "epoch": 2.1274165202108963, "grad_norm": 0.3493885099887848, "learning_rate": 4.8418e-05, "loss": 0.0257, "step": 24210 }, { "epoch": 2.1282952548330405, "grad_norm": 0.22453556954860687, "learning_rate": 4.8438e-05, "loss": 0.0252, "step": 24220 }, { "epoch": 2.1291739894551847, "grad_norm": 0.24320904910564423, "learning_rate": 4.8458e-05, "loss": 0.0229, "step": 24230 }, { "epoch": 2.130052724077329, "grad_norm": 0.2785133421421051, "learning_rate": 4.8478e-05, "loss": 0.0235, "step": 24240 }, { "epoch": 2.1309314586994725, "grad_norm": 0.3402400314807892, "learning_rate": 4.8498e-05, "loss": 0.0238, "step": 24250 }, { "epoch": 2.1318101933216167, "grad_norm": 0.2986193299293518, "learning_rate": 4.8518000000000005e-05, "loss": 0.0235, "step": 24260 }, { "epoch": 2.132688927943761, "grad_norm": 0.21634316444396973, "learning_rate": 4.8538e-05, "loss": 0.024, "step": 24270 }, { "epoch": 2.133567662565905, "grad_norm": 0.2650652229785919, "learning_rate": 4.8558000000000005e-05, "loss": 0.0235, "step": 24280 }, { "epoch": 2.134446397188049, "grad_norm": 0.18183249235153198, "learning_rate": 4.8578e-05, "loss": 0.0244, "step": 24290 }, { "epoch": 2.1353251318101933, "grad_norm": 0.2958601415157318, "learning_rate": 4.8598000000000005e-05, "loss": 0.0266, "step": 24300 }, { "epoch": 2.1362038664323375, "grad_norm": 0.2570926249027252, "learning_rate": 4.8618e-05, "loss": 0.0272, "step": 24310 }, { "epoch": 2.1370826010544817, "grad_norm": 0.25271522998809814, "learning_rate": 4.8638e-05, "loss": 0.0251, "step": 24320 }, { "epoch": 2.137961335676626, "grad_norm": 0.2556265592575073, "learning_rate": 4.8658e-05, "loss": 0.0241, "step": 24330 }, { "epoch": 2.1388400702987695, "grad_norm": 0.27193683385849, "learning_rate": 4.8678000000000004e-05, "loss": 0.0258, "step": 24340 }, { "epoch": 2.1397188049209137, "grad_norm": 0.3960658609867096, "learning_rate": 4.869800000000001e-05, "loss": 0.0266, "step": 24350 }, { "epoch": 2.140597539543058, "grad_norm": 0.23463529348373413, "learning_rate": 4.8718000000000003e-05, "loss": 0.0255, "step": 24360 }, { "epoch": 2.141476274165202, "grad_norm": 0.2897809147834778, "learning_rate": 4.8738e-05, "loss": 0.0238, "step": 24370 }, { "epoch": 2.142355008787346, "grad_norm": 0.2259867787361145, "learning_rate": 4.8758e-05, "loss": 0.0235, "step": 24380 }, { "epoch": 2.1432337434094904, "grad_norm": 0.2243587076663971, "learning_rate": 4.8778e-05, "loss": 0.0225, "step": 24390 }, { "epoch": 2.1441124780316345, "grad_norm": 0.2788099944591522, "learning_rate": 4.8798e-05, "loss": 0.0265, "step": 24400 }, { "epoch": 2.1449912126537787, "grad_norm": 0.29764440655708313, "learning_rate": 4.8818000000000006e-05, "loss": 0.0262, "step": 24410 }, { "epoch": 2.145869947275923, "grad_norm": 0.28261563181877136, "learning_rate": 4.8838e-05, "loss": 0.0247, "step": 24420 }, { "epoch": 2.1467486818980666, "grad_norm": 0.36846473813056946, "learning_rate": 4.8858000000000006e-05, "loss": 0.0274, "step": 24430 }, { "epoch": 2.1476274165202107, "grad_norm": 0.28484389185905457, "learning_rate": 4.8878e-05, "loss": 0.0278, "step": 24440 }, { "epoch": 2.148506151142355, "grad_norm": 0.3451750576496124, "learning_rate": 4.8898000000000005e-05, "loss": 0.0306, "step": 24450 }, { "epoch": 2.149384885764499, "grad_norm": 0.24555623531341553, "learning_rate": 4.8918e-05, "loss": 0.0242, "step": 24460 }, { "epoch": 2.150263620386643, "grad_norm": 0.267197847366333, "learning_rate": 4.8938e-05, "loss": 0.0233, "step": 24470 }, { "epoch": 2.1511423550087874, "grad_norm": 0.2781601548194885, "learning_rate": 4.8958e-05, "loss": 0.0256, "step": 24480 }, { "epoch": 2.1520210896309315, "grad_norm": 0.21142669022083282, "learning_rate": 4.8978000000000004e-05, "loss": 0.0247, "step": 24490 }, { "epoch": 2.1528998242530757, "grad_norm": 0.30092698335647583, "learning_rate": 4.899800000000001e-05, "loss": 0.0261, "step": 24500 }, { "epoch": 2.15377855887522, "grad_norm": 0.23197683691978455, "learning_rate": 4.9018000000000004e-05, "loss": 0.0252, "step": 24510 }, { "epoch": 2.1546572934973636, "grad_norm": 0.21930190920829773, "learning_rate": 4.9038e-05, "loss": 0.0242, "step": 24520 }, { "epoch": 2.1555360281195077, "grad_norm": 0.28211626410484314, "learning_rate": 4.9058000000000004e-05, "loss": 0.0252, "step": 24530 }, { "epoch": 2.156414762741652, "grad_norm": 0.27866846323013306, "learning_rate": 4.9078e-05, "loss": 0.0243, "step": 24540 }, { "epoch": 2.157293497363796, "grad_norm": 0.2860143184661865, "learning_rate": 4.9098e-05, "loss": 0.025, "step": 24550 }, { "epoch": 2.15817223198594, "grad_norm": 0.39985066652297974, "learning_rate": 4.9118e-05, "loss": 0.0256, "step": 24560 }, { "epoch": 2.1590509666080844, "grad_norm": 0.23309779167175293, "learning_rate": 4.9138e-05, "loss": 0.0247, "step": 24570 }, { "epoch": 2.1599297012302285, "grad_norm": 0.2761567533016205, "learning_rate": 4.9158000000000006e-05, "loss": 0.0257, "step": 24580 }, { "epoch": 2.1608084358523727, "grad_norm": 0.24926088750362396, "learning_rate": 4.9178e-05, "loss": 0.0233, "step": 24590 }, { "epoch": 2.161687170474517, "grad_norm": 0.2153415083885193, "learning_rate": 4.9198e-05, "loss": 0.0245, "step": 24600 }, { "epoch": 2.1625659050966606, "grad_norm": 0.34606316685676575, "learning_rate": 4.9218e-05, "loss": 0.0243, "step": 24610 }, { "epoch": 2.1634446397188047, "grad_norm": 0.2356669306755066, "learning_rate": 4.9238e-05, "loss": 0.0244, "step": 24620 }, { "epoch": 2.164323374340949, "grad_norm": 0.27867165207862854, "learning_rate": 4.9258e-05, "loss": 0.0247, "step": 24630 }, { "epoch": 2.165202108963093, "grad_norm": 0.3052630126476288, "learning_rate": 4.9278000000000005e-05, "loss": 0.0236, "step": 24640 }, { "epoch": 2.1660808435852372, "grad_norm": 0.28766438364982605, "learning_rate": 4.9298e-05, "loss": 0.0225, "step": 24650 }, { "epoch": 2.1669595782073814, "grad_norm": 0.33783721923828125, "learning_rate": 4.9318000000000005e-05, "loss": 0.0253, "step": 24660 }, { "epoch": 2.1678383128295255, "grad_norm": 0.2667042315006256, "learning_rate": 4.9338e-05, "loss": 0.0223, "step": 24670 }, { "epoch": 2.1687170474516697, "grad_norm": 0.17895984649658203, "learning_rate": 4.9358000000000004e-05, "loss": 0.0221, "step": 24680 }, { "epoch": 2.169595782073814, "grad_norm": 0.20765148103237152, "learning_rate": 4.9378e-05, "loss": 0.0233, "step": 24690 }, { "epoch": 2.1704745166959576, "grad_norm": 0.2627381980419159, "learning_rate": 4.9398e-05, "loss": 0.0227, "step": 24700 }, { "epoch": 2.1713532513181018, "grad_norm": 0.2765902280807495, "learning_rate": 4.9418e-05, "loss": 0.0252, "step": 24710 }, { "epoch": 2.172231985940246, "grad_norm": 0.28311771154403687, "learning_rate": 4.9438e-05, "loss": 0.0225, "step": 24720 }, { "epoch": 2.17311072056239, "grad_norm": 0.3013139069080353, "learning_rate": 4.9458000000000007e-05, "loss": 0.0218, "step": 24730 }, { "epoch": 2.1739894551845342, "grad_norm": 0.2827947735786438, "learning_rate": 4.9478e-05, "loss": 0.0237, "step": 24740 }, { "epoch": 2.1748681898066784, "grad_norm": 0.32333114743232727, "learning_rate": 4.9498e-05, "loss": 0.0254, "step": 24750 }, { "epoch": 2.1757469244288226, "grad_norm": 0.22261536121368408, "learning_rate": 4.9518e-05, "loss": 0.0231, "step": 24760 }, { "epoch": 2.1766256590509667, "grad_norm": 0.2605843245983124, "learning_rate": 4.9538e-05, "loss": 0.0269, "step": 24770 }, { "epoch": 2.177504393673111, "grad_norm": 0.2749735713005066, "learning_rate": 4.9558e-05, "loss": 0.026, "step": 24780 }, { "epoch": 2.1783831282952546, "grad_norm": 0.29945138096809387, "learning_rate": 4.9578000000000005e-05, "loss": 0.0248, "step": 24790 }, { "epoch": 2.1792618629173988, "grad_norm": 0.3056463599205017, "learning_rate": 4.9598e-05, "loss": 0.0247, "step": 24800 }, { "epoch": 2.180140597539543, "grad_norm": 0.2567957043647766, "learning_rate": 4.9618000000000005e-05, "loss": 0.0247, "step": 24810 }, { "epoch": 2.181019332161687, "grad_norm": 0.34532904624938965, "learning_rate": 4.9638e-05, "loss": 0.0254, "step": 24820 }, { "epoch": 2.1818980667838312, "grad_norm": 0.2715056836605072, "learning_rate": 4.9658000000000005e-05, "loss": 0.0251, "step": 24830 }, { "epoch": 2.1827768014059754, "grad_norm": 0.19216975569725037, "learning_rate": 4.9678e-05, "loss": 0.0233, "step": 24840 }, { "epoch": 2.1836555360281196, "grad_norm": 0.20727278292179108, "learning_rate": 4.9698e-05, "loss": 0.0245, "step": 24850 }, { "epoch": 2.1845342706502637, "grad_norm": 0.3119242489337921, "learning_rate": 4.9718e-05, "loss": 0.0288, "step": 24860 }, { "epoch": 2.185413005272408, "grad_norm": 0.2942597568035126, "learning_rate": 4.9738000000000004e-05, "loss": 0.025, "step": 24870 }, { "epoch": 2.1862917398945516, "grad_norm": 0.42303040623664856, "learning_rate": 4.975800000000001e-05, "loss": 0.0255, "step": 24880 }, { "epoch": 2.1871704745166958, "grad_norm": 0.26813748478889465, "learning_rate": 4.9778000000000004e-05, "loss": 0.0239, "step": 24890 }, { "epoch": 2.18804920913884, "grad_norm": 0.3227483332157135, "learning_rate": 4.9798e-05, "loss": 0.0242, "step": 24900 }, { "epoch": 2.188927943760984, "grad_norm": 0.3196532130241394, "learning_rate": 4.9818e-05, "loss": 0.0276, "step": 24910 }, { "epoch": 2.1898066783831283, "grad_norm": 0.31067386269569397, "learning_rate": 4.9838e-05, "loss": 0.0258, "step": 24920 }, { "epoch": 2.1906854130052724, "grad_norm": 0.369011253118515, "learning_rate": 4.9858e-05, "loss": 0.0254, "step": 24930 }, { "epoch": 2.1915641476274166, "grad_norm": 0.38773250579833984, "learning_rate": 4.9878e-05, "loss": 0.0257, "step": 24940 }, { "epoch": 2.1924428822495607, "grad_norm": 0.301504909992218, "learning_rate": 4.9898e-05, "loss": 0.0236, "step": 24950 }, { "epoch": 2.193321616871705, "grad_norm": 0.19325008988380432, "learning_rate": 4.9918000000000006e-05, "loss": 0.0255, "step": 24960 }, { "epoch": 2.1942003514938486, "grad_norm": 0.28254956007003784, "learning_rate": 4.9938e-05, "loss": 0.0228, "step": 24970 }, { "epoch": 2.195079086115993, "grad_norm": 0.2064584642648697, "learning_rate": 4.9958000000000005e-05, "loss": 0.0229, "step": 24980 }, { "epoch": 2.195957820738137, "grad_norm": 0.15744370222091675, "learning_rate": 4.9978e-05, "loss": 0.0227, "step": 24990 }, { "epoch": 2.196836555360281, "grad_norm": 0.21432368457317352, "learning_rate": 4.9998e-05, "loss": 0.0273, "step": 25000 }, { "epoch": 2.1977152899824253, "grad_norm": 0.3279573917388916, "learning_rate": 5.0018e-05, "loss": 0.0241, "step": 25010 }, { "epoch": 2.1985940246045694, "grad_norm": 0.23338817059993744, "learning_rate": 5.0038000000000004e-05, "loss": 0.0246, "step": 25020 }, { "epoch": 2.1994727592267136, "grad_norm": 0.18994443118572235, "learning_rate": 5.005800000000001e-05, "loss": 0.0231, "step": 25030 }, { "epoch": 2.2003514938488578, "grad_norm": 0.1945120245218277, "learning_rate": 5.0078000000000004e-05, "loss": 0.0247, "step": 25040 }, { "epoch": 2.201230228471002, "grad_norm": 0.20795650780200958, "learning_rate": 5.0098e-05, "loss": 0.0245, "step": 25050 }, { "epoch": 2.202108963093146, "grad_norm": 0.21652546525001526, "learning_rate": 5.0118e-05, "loss": 0.0253, "step": 25060 }, { "epoch": 2.20298769771529, "grad_norm": 0.18280932307243347, "learning_rate": 5.013800000000001e-05, "loss": 0.0234, "step": 25070 }, { "epoch": 2.203866432337434, "grad_norm": 0.2059442549943924, "learning_rate": 5.0158e-05, "loss": 0.0268, "step": 25080 }, { "epoch": 2.204745166959578, "grad_norm": 0.20422165095806122, "learning_rate": 5.0178e-05, "loss": 0.0235, "step": 25090 }, { "epoch": 2.2056239015817223, "grad_norm": 0.2133624404668808, "learning_rate": 5.0198e-05, "loss": 0.0248, "step": 25100 }, { "epoch": 2.2065026362038664, "grad_norm": 0.20743653178215027, "learning_rate": 5.0218e-05, "loss": 0.0257, "step": 25110 }, { "epoch": 2.2073813708260106, "grad_norm": 0.1803218424320221, "learning_rate": 5.023800000000001e-05, "loss": 0.0219, "step": 25120 }, { "epoch": 2.2082601054481548, "grad_norm": 0.23563887178897858, "learning_rate": 5.0258000000000006e-05, "loss": 0.0258, "step": 25130 }, { "epoch": 2.209138840070299, "grad_norm": 0.21626387536525726, "learning_rate": 5.0278e-05, "loss": 0.0234, "step": 25140 }, { "epoch": 2.210017574692443, "grad_norm": 0.22092179954051971, "learning_rate": 5.0298e-05, "loss": 0.024, "step": 25150 }, { "epoch": 2.210896309314587, "grad_norm": 0.1577003002166748, "learning_rate": 5.0318e-05, "loss": 0.0228, "step": 25160 }, { "epoch": 2.211775043936731, "grad_norm": 0.16344551742076874, "learning_rate": 5.0338000000000005e-05, "loss": 0.0226, "step": 25170 }, { "epoch": 2.212653778558875, "grad_norm": 0.2658519446849823, "learning_rate": 5.035800000000001e-05, "loss": 0.0237, "step": 25180 }, { "epoch": 2.2135325131810193, "grad_norm": 0.17818626761436462, "learning_rate": 5.0378000000000005e-05, "loss": 0.0251, "step": 25190 }, { "epoch": 2.2144112478031635, "grad_norm": 0.22074659168720245, "learning_rate": 5.0398e-05, "loss": 0.0247, "step": 25200 }, { "epoch": 2.2152899824253076, "grad_norm": 0.22766032814979553, "learning_rate": 5.0418e-05, "loss": 0.0251, "step": 25210 }, { "epoch": 2.2161687170474518, "grad_norm": 0.31237563490867615, "learning_rate": 5.043800000000001e-05, "loss": 0.0253, "step": 25220 }, { "epoch": 2.217047451669596, "grad_norm": 0.15112519264221191, "learning_rate": 5.0458000000000004e-05, "loss": 0.0229, "step": 25230 }, { "epoch": 2.21792618629174, "grad_norm": 0.2149231880903244, "learning_rate": 5.0478e-05, "loss": 0.0247, "step": 25240 }, { "epoch": 2.218804920913884, "grad_norm": 0.2952043414115906, "learning_rate": 5.0498e-05, "loss": 0.0247, "step": 25250 }, { "epoch": 2.219683655536028, "grad_norm": 0.24469199776649475, "learning_rate": 5.0518e-05, "loss": 0.023, "step": 25260 }, { "epoch": 2.220562390158172, "grad_norm": 0.24357815086841583, "learning_rate": 5.053800000000001e-05, "loss": 0.0274, "step": 25270 }, { "epoch": 2.2214411247803163, "grad_norm": 0.23514555394649506, "learning_rate": 5.0558000000000006e-05, "loss": 0.0219, "step": 25280 }, { "epoch": 2.2223198594024605, "grad_norm": 0.3107793629169464, "learning_rate": 5.0578e-05, "loss": 0.0255, "step": 25290 }, { "epoch": 2.2231985940246046, "grad_norm": 0.21511657536029816, "learning_rate": 5.0598e-05, "loss": 0.0216, "step": 25300 }, { "epoch": 2.224077328646749, "grad_norm": 0.23593628406524658, "learning_rate": 5.0617999999999995e-05, "loss": 0.0249, "step": 25310 }, { "epoch": 2.224956063268893, "grad_norm": 0.22926676273345947, "learning_rate": 5.0638000000000005e-05, "loss": 0.0223, "step": 25320 }, { "epoch": 2.225834797891037, "grad_norm": 0.3054211139678955, "learning_rate": 5.065800000000001e-05, "loss": 0.0244, "step": 25330 }, { "epoch": 2.226713532513181, "grad_norm": 0.24706760048866272, "learning_rate": 5.0678000000000005e-05, "loss": 0.0234, "step": 25340 }, { "epoch": 2.227592267135325, "grad_norm": 0.2564692795276642, "learning_rate": 5.0698e-05, "loss": 0.0259, "step": 25350 }, { "epoch": 2.228471001757469, "grad_norm": 0.283652663230896, "learning_rate": 5.0718e-05, "loss": 0.0245, "step": 25360 }, { "epoch": 2.2293497363796133, "grad_norm": 0.3029247224330902, "learning_rate": 5.073800000000001e-05, "loss": 0.0244, "step": 25370 }, { "epoch": 2.2302284710017575, "grad_norm": 0.2836477756500244, "learning_rate": 5.0758000000000004e-05, "loss": 0.0234, "step": 25380 }, { "epoch": 2.2311072056239016, "grad_norm": 0.2917945981025696, "learning_rate": 5.0778e-05, "loss": 0.0247, "step": 25390 }, { "epoch": 2.231985940246046, "grad_norm": 0.30585068464279175, "learning_rate": 5.0798000000000004e-05, "loss": 0.0238, "step": 25400 }, { "epoch": 2.23286467486819, "grad_norm": 0.2682947814464569, "learning_rate": 5.0818e-05, "loss": 0.027, "step": 25410 }, { "epoch": 2.233743409490334, "grad_norm": 0.3226286768913269, "learning_rate": 5.083800000000001e-05, "loss": 0.0253, "step": 25420 }, { "epoch": 2.234622144112478, "grad_norm": 0.24718575179576874, "learning_rate": 5.085800000000001e-05, "loss": 0.0253, "step": 25430 }, { "epoch": 2.235500878734622, "grad_norm": 0.241078183054924, "learning_rate": 5.0878e-05, "loss": 0.0241, "step": 25440 }, { "epoch": 2.236379613356766, "grad_norm": 0.2640663683414459, "learning_rate": 5.0898e-05, "loss": 0.0275, "step": 25450 }, { "epoch": 2.2372583479789103, "grad_norm": 0.32292506098747253, "learning_rate": 5.0917999999999996e-05, "loss": 0.0243, "step": 25460 }, { "epoch": 2.2381370826010545, "grad_norm": 0.2638123035430908, "learning_rate": 5.0938000000000006e-05, "loss": 0.0263, "step": 25470 }, { "epoch": 2.2390158172231986, "grad_norm": 0.2427927553653717, "learning_rate": 5.0958e-05, "loss": 0.0267, "step": 25480 }, { "epoch": 2.239894551845343, "grad_norm": 0.23896604776382446, "learning_rate": 5.0978000000000006e-05, "loss": 0.0252, "step": 25490 }, { "epoch": 2.240773286467487, "grad_norm": 0.30919283628463745, "learning_rate": 5.0998e-05, "loss": 0.0278, "step": 25500 }, { "epoch": 2.241652021089631, "grad_norm": 0.39934563636779785, "learning_rate": 5.1018e-05, "loss": 0.024, "step": 25510 }, { "epoch": 2.242530755711775, "grad_norm": 0.27975204586982727, "learning_rate": 5.1037999999999995e-05, "loss": 0.0255, "step": 25520 }, { "epoch": 2.243409490333919, "grad_norm": 0.26910972595214844, "learning_rate": 5.1058000000000005e-05, "loss": 0.0278, "step": 25530 }, { "epoch": 2.244288224956063, "grad_norm": 0.2192414551973343, "learning_rate": 5.1078e-05, "loss": 0.0251, "step": 25540 }, { "epoch": 2.2451669595782073, "grad_norm": 0.20352932810783386, "learning_rate": 5.1098000000000004e-05, "loss": 0.0267, "step": 25550 }, { "epoch": 2.2460456942003515, "grad_norm": 0.23536446690559387, "learning_rate": 5.1118e-05, "loss": 0.0254, "step": 25560 }, { "epoch": 2.2469244288224957, "grad_norm": 0.24273881316184998, "learning_rate": 5.1138e-05, "loss": 0.025, "step": 25570 }, { "epoch": 2.24780316344464, "grad_norm": 0.27134349942207336, "learning_rate": 5.115800000000001e-05, "loss": 0.0235, "step": 25580 }, { "epoch": 2.248681898066784, "grad_norm": 0.30812838673591614, "learning_rate": 5.1178000000000004e-05, "loss": 0.0267, "step": 25590 }, { "epoch": 2.249560632688928, "grad_norm": 0.27372217178344727, "learning_rate": 5.1198e-05, "loss": 0.0242, "step": 25600 }, { "epoch": 2.2504393673110723, "grad_norm": 0.28480255603790283, "learning_rate": 5.1217999999999996e-05, "loss": 0.0243, "step": 25610 }, { "epoch": 2.251318101933216, "grad_norm": 0.3457868993282318, "learning_rate": 5.1238e-05, "loss": 0.0218, "step": 25620 }, { "epoch": 2.25219683655536, "grad_norm": 0.21781392395496368, "learning_rate": 5.1258e-05, "loss": 0.0248, "step": 25630 }, { "epoch": 2.2530755711775043, "grad_norm": 0.205312117934227, "learning_rate": 5.1278000000000006e-05, "loss": 0.0224, "step": 25640 }, { "epoch": 2.2539543057996485, "grad_norm": 0.45324990153312683, "learning_rate": 5.1298e-05, "loss": 0.0236, "step": 25650 }, { "epoch": 2.2548330404217927, "grad_norm": 0.3346847891807556, "learning_rate": 5.1318e-05, "loss": 0.0217, "step": 25660 }, { "epoch": 2.255711775043937, "grad_norm": 0.26908302307128906, "learning_rate": 5.1337999999999995e-05, "loss": 0.0257, "step": 25670 }, { "epoch": 2.256590509666081, "grad_norm": 0.33044373989105225, "learning_rate": 5.1358000000000005e-05, "loss": 0.0253, "step": 25680 }, { "epoch": 2.2574692442882247, "grad_norm": 0.27299121022224426, "learning_rate": 5.1378e-05, "loss": 0.0244, "step": 25690 }, { "epoch": 2.2583479789103693, "grad_norm": 0.30768823623657227, "learning_rate": 5.1398000000000005e-05, "loss": 0.0241, "step": 25700 }, { "epoch": 2.259226713532513, "grad_norm": 0.3039794862270355, "learning_rate": 5.1418e-05, "loss": 0.0258, "step": 25710 }, { "epoch": 2.260105448154657, "grad_norm": 0.2357749491930008, "learning_rate": 5.1438e-05, "loss": 0.0245, "step": 25720 }, { "epoch": 2.2609841827768014, "grad_norm": 0.23209120333194733, "learning_rate": 5.145800000000001e-05, "loss": 0.023, "step": 25730 }, { "epoch": 2.2618629173989455, "grad_norm": 0.2992802560329437, "learning_rate": 5.1478000000000004e-05, "loss": 0.0235, "step": 25740 }, { "epoch": 2.2627416520210897, "grad_norm": 0.19915875792503357, "learning_rate": 5.1498e-05, "loss": 0.0229, "step": 25750 }, { "epoch": 2.263620386643234, "grad_norm": 0.2618875205516815, "learning_rate": 5.1518e-05, "loss": 0.0245, "step": 25760 }, { "epoch": 2.264499121265378, "grad_norm": 0.2695561349391937, "learning_rate": 5.1538e-05, "loss": 0.0248, "step": 25770 }, { "epoch": 2.2653778558875217, "grad_norm": 0.3497191369533539, "learning_rate": 5.1558000000000003e-05, "loss": 0.0241, "step": 25780 }, { "epoch": 2.2662565905096663, "grad_norm": 0.2571616470813751, "learning_rate": 5.1578000000000007e-05, "loss": 0.0265, "step": 25790 }, { "epoch": 2.26713532513181, "grad_norm": 0.242437943816185, "learning_rate": 5.1598e-05, "loss": 0.0228, "step": 25800 }, { "epoch": 2.268014059753954, "grad_norm": 0.33221715688705444, "learning_rate": 5.1618e-05, "loss": 0.0235, "step": 25810 }, { "epoch": 2.2688927943760984, "grad_norm": 0.2732490003108978, "learning_rate": 5.1637999999999996e-05, "loss": 0.0248, "step": 25820 }, { "epoch": 2.2697715289982425, "grad_norm": 0.1690388023853302, "learning_rate": 5.1658000000000006e-05, "loss": 0.0225, "step": 25830 }, { "epoch": 2.2706502636203867, "grad_norm": 0.3766641318798065, "learning_rate": 5.1678e-05, "loss": 0.0242, "step": 25840 }, { "epoch": 2.271528998242531, "grad_norm": 0.32408827543258667, "learning_rate": 5.1698e-05, "loss": 0.0228, "step": 25850 }, { "epoch": 2.272407732864675, "grad_norm": 0.22614051401615143, "learning_rate": 5.1718e-05, "loss": 0.0244, "step": 25860 }, { "epoch": 2.273286467486819, "grad_norm": 0.2350919544696808, "learning_rate": 5.1738e-05, "loss": 0.0239, "step": 25870 }, { "epoch": 2.2741652021089633, "grad_norm": 0.33264702558517456, "learning_rate": 5.175800000000001e-05, "loss": 0.0228, "step": 25880 }, { "epoch": 2.275043936731107, "grad_norm": 0.2932528853416443, "learning_rate": 5.1778000000000005e-05, "loss": 0.024, "step": 25890 }, { "epoch": 2.275922671353251, "grad_norm": 0.3133087158203125, "learning_rate": 5.1798e-05, "loss": 0.0245, "step": 25900 }, { "epoch": 2.2768014059753954, "grad_norm": 0.1846410185098648, "learning_rate": 5.1818e-05, "loss": 0.0238, "step": 25910 }, { "epoch": 2.2776801405975395, "grad_norm": 0.18364492058753967, "learning_rate": 5.1838e-05, "loss": 0.0212, "step": 25920 }, { "epoch": 2.2785588752196837, "grad_norm": 0.16654613614082336, "learning_rate": 5.1858000000000004e-05, "loss": 0.0239, "step": 25930 }, { "epoch": 2.279437609841828, "grad_norm": 0.19059202075004578, "learning_rate": 5.187800000000001e-05, "loss": 0.0245, "step": 25940 }, { "epoch": 2.280316344463972, "grad_norm": 0.2962977886199951, "learning_rate": 5.1898000000000004e-05, "loss": 0.0235, "step": 25950 }, { "epoch": 2.281195079086116, "grad_norm": 0.25650694966316223, "learning_rate": 5.1918e-05, "loss": 0.023, "step": 25960 }, { "epoch": 2.2820738137082603, "grad_norm": 0.2794906497001648, "learning_rate": 5.1937999999999996e-05, "loss": 0.0249, "step": 25970 }, { "epoch": 2.282952548330404, "grad_norm": 0.22609536349773407, "learning_rate": 5.1958000000000006e-05, "loss": 0.0279, "step": 25980 }, { "epoch": 2.2838312829525482, "grad_norm": 0.16158843040466309, "learning_rate": 5.1978e-05, "loss": 0.0259, "step": 25990 }, { "epoch": 2.2847100175746924, "grad_norm": 0.34395667910575867, "learning_rate": 5.1998e-05, "loss": 0.0229, "step": 26000 }, { "epoch": 2.2855887521968365, "grad_norm": 0.18940965831279755, "learning_rate": 5.2018e-05, "loss": 0.0257, "step": 26010 }, { "epoch": 2.2864674868189807, "grad_norm": 0.23012946546077728, "learning_rate": 5.2038e-05, "loss": 0.0248, "step": 26020 }, { "epoch": 2.287346221441125, "grad_norm": 0.20202912390232086, "learning_rate": 5.205800000000001e-05, "loss": 0.0234, "step": 26030 }, { "epoch": 2.288224956063269, "grad_norm": 0.25276148319244385, "learning_rate": 5.2078000000000005e-05, "loss": 0.0254, "step": 26040 }, { "epoch": 2.289103690685413, "grad_norm": 0.22297267615795135, "learning_rate": 5.2098e-05, "loss": 0.0227, "step": 26050 }, { "epoch": 2.2899824253075574, "grad_norm": 0.26706600189208984, "learning_rate": 5.2118e-05, "loss": 0.0281, "step": 26060 }, { "epoch": 2.290861159929701, "grad_norm": 0.26285526156425476, "learning_rate": 5.2138e-05, "loss": 0.0228, "step": 26070 }, { "epoch": 2.2917398945518452, "grad_norm": 0.23124012351036072, "learning_rate": 5.2158000000000004e-05, "loss": 0.0239, "step": 26080 }, { "epoch": 2.2926186291739894, "grad_norm": 0.2682701349258423, "learning_rate": 5.217800000000001e-05, "loss": 0.0228, "step": 26090 }, { "epoch": 2.2934973637961336, "grad_norm": 0.3146224319934845, "learning_rate": 5.2198000000000004e-05, "loss": 0.0229, "step": 26100 }, { "epoch": 2.2943760984182777, "grad_norm": 0.21320082247257233, "learning_rate": 5.2218e-05, "loss": 0.0253, "step": 26110 }, { "epoch": 2.295254833040422, "grad_norm": 0.29514428973197937, "learning_rate": 5.2238e-05, "loss": 0.0255, "step": 26120 }, { "epoch": 2.296133567662566, "grad_norm": 0.40070995688438416, "learning_rate": 5.225800000000001e-05, "loss": 0.0238, "step": 26130 }, { "epoch": 2.29701230228471, "grad_norm": 0.24065178632736206, "learning_rate": 5.2278e-05, "loss": 0.0227, "step": 26140 }, { "epoch": 2.2978910369068544, "grad_norm": 0.24027056992053986, "learning_rate": 5.2298e-05, "loss": 0.0252, "step": 26150 }, { "epoch": 2.298769771528998, "grad_norm": 0.20408575236797333, "learning_rate": 5.2318e-05, "loss": 0.0231, "step": 26160 }, { "epoch": 2.2996485061511422, "grad_norm": 0.22561314702033997, "learning_rate": 5.2338e-05, "loss": 0.0249, "step": 26170 }, { "epoch": 2.3005272407732864, "grad_norm": 0.20207937061786652, "learning_rate": 5.235800000000001e-05, "loss": 0.0225, "step": 26180 }, { "epoch": 2.3014059753954306, "grad_norm": 0.25492531061172485, "learning_rate": 5.2378000000000006e-05, "loss": 0.0224, "step": 26190 }, { "epoch": 2.3022847100175747, "grad_norm": 0.3227454423904419, "learning_rate": 5.2398e-05, "loss": 0.0256, "step": 26200 }, { "epoch": 2.303163444639719, "grad_norm": 0.3547140061855316, "learning_rate": 5.2418e-05, "loss": 0.0237, "step": 26210 }, { "epoch": 2.304042179261863, "grad_norm": 0.27597516775131226, "learning_rate": 5.2437999999999995e-05, "loss": 0.0237, "step": 26220 }, { "epoch": 2.304920913884007, "grad_norm": 0.35260865092277527, "learning_rate": 5.2458000000000005e-05, "loss": 0.0239, "step": 26230 }, { "epoch": 2.3057996485061514, "grad_norm": 0.4070754945278168, "learning_rate": 5.247800000000001e-05, "loss": 0.0259, "step": 26240 }, { "epoch": 2.306678383128295, "grad_norm": 0.33939218521118164, "learning_rate": 5.2498000000000005e-05, "loss": 0.0258, "step": 26250 }, { "epoch": 2.3075571177504393, "grad_norm": 0.4226858913898468, "learning_rate": 5.2518e-05, "loss": 0.0275, "step": 26260 }, { "epoch": 2.3084358523725834, "grad_norm": 0.3232779800891876, "learning_rate": 5.2538e-05, "loss": 0.0267, "step": 26270 }, { "epoch": 2.3093145869947276, "grad_norm": 0.189116969704628, "learning_rate": 5.255800000000001e-05, "loss": 0.0253, "step": 26280 }, { "epoch": 2.3101933216168717, "grad_norm": 0.2810094952583313, "learning_rate": 5.2578000000000004e-05, "loss": 0.0244, "step": 26290 }, { "epoch": 2.311072056239016, "grad_norm": 0.2315996140241623, "learning_rate": 5.2598e-05, "loss": 0.0232, "step": 26300 }, { "epoch": 2.31195079086116, "grad_norm": 0.38299867510795593, "learning_rate": 5.2618000000000003e-05, "loss": 0.0291, "step": 26310 }, { "epoch": 2.3128295254833042, "grad_norm": 0.1810326874256134, "learning_rate": 5.2638e-05, "loss": 0.0245, "step": 26320 }, { "epoch": 2.3137082601054484, "grad_norm": 0.2154727578163147, "learning_rate": 5.265800000000001e-05, "loss": 0.0246, "step": 26330 }, { "epoch": 2.314586994727592, "grad_norm": 0.23565255105495453, "learning_rate": 5.2678000000000006e-05, "loss": 0.0263, "step": 26340 }, { "epoch": 2.3154657293497363, "grad_norm": 0.3613020181655884, "learning_rate": 5.2698e-05, "loss": 0.0239, "step": 26350 }, { "epoch": 2.3163444639718804, "grad_norm": 0.27200213074684143, "learning_rate": 5.2718e-05, "loss": 0.0223, "step": 26360 }, { "epoch": 2.3172231985940246, "grad_norm": 0.25472211837768555, "learning_rate": 5.2737999999999995e-05, "loss": 0.024, "step": 26370 }, { "epoch": 2.3181019332161688, "grad_norm": 0.2583603262901306, "learning_rate": 5.2758000000000005e-05, "loss": 0.0249, "step": 26380 }, { "epoch": 2.318980667838313, "grad_norm": 0.29174354672431946, "learning_rate": 5.2778e-05, "loss": 0.0263, "step": 26390 }, { "epoch": 2.319859402460457, "grad_norm": 0.2734518051147461, "learning_rate": 5.2798000000000005e-05, "loss": 0.0256, "step": 26400 }, { "epoch": 2.3207381370826012, "grad_norm": 0.2573036849498749, "learning_rate": 5.2818e-05, "loss": 0.0235, "step": 26410 }, { "epoch": 2.3216168717047454, "grad_norm": 0.25694605708122253, "learning_rate": 5.2838e-05, "loss": 0.0232, "step": 26420 }, { "epoch": 2.322495606326889, "grad_norm": 0.2266259342432022, "learning_rate": 5.285800000000001e-05, "loss": 0.0238, "step": 26430 }, { "epoch": 2.3233743409490333, "grad_norm": 0.21852530539035797, "learning_rate": 5.2878000000000004e-05, "loss": 0.0244, "step": 26440 }, { "epoch": 2.3242530755711774, "grad_norm": 0.21246416866779327, "learning_rate": 5.2898e-05, "loss": 0.0229, "step": 26450 }, { "epoch": 2.3251318101933216, "grad_norm": 0.22259926795959473, "learning_rate": 5.2918000000000004e-05, "loss": 0.0235, "step": 26460 }, { "epoch": 2.3260105448154658, "grad_norm": 0.20966239273548126, "learning_rate": 5.2938e-05, "loss": 0.0233, "step": 26470 }, { "epoch": 2.32688927943761, "grad_norm": 0.2733669877052307, "learning_rate": 5.295800000000001e-05, "loss": 0.0251, "step": 26480 }, { "epoch": 2.327768014059754, "grad_norm": 0.27635475993156433, "learning_rate": 5.297800000000001e-05, "loss": 0.0243, "step": 26490 }, { "epoch": 2.3286467486818982, "grad_norm": 0.24403096735477448, "learning_rate": 5.2998e-05, "loss": 0.026, "step": 26500 }, { "epoch": 2.3295254833040424, "grad_norm": 0.2878713011741638, "learning_rate": 5.3018e-05, "loss": 0.0261, "step": 26510 }, { "epoch": 2.330404217926186, "grad_norm": 0.21332615613937378, "learning_rate": 5.3037999999999996e-05, "loss": 0.0236, "step": 26520 }, { "epoch": 2.3312829525483303, "grad_norm": 0.31303271651268005, "learning_rate": 5.3058000000000006e-05, "loss": 0.0272, "step": 26530 }, { "epoch": 2.3321616871704745, "grad_norm": 0.1896185427904129, "learning_rate": 5.3078e-05, "loss": 0.0244, "step": 26540 }, { "epoch": 2.3330404217926186, "grad_norm": 0.2203862965106964, "learning_rate": 5.3098000000000006e-05, "loss": 0.0244, "step": 26550 }, { "epoch": 2.3339191564147628, "grad_norm": 0.3442118167877197, "learning_rate": 5.3118e-05, "loss": 0.0246, "step": 26560 }, { "epoch": 2.334797891036907, "grad_norm": 0.18568043410778046, "learning_rate": 5.3138e-05, "loss": 0.0259, "step": 26570 }, { "epoch": 2.335676625659051, "grad_norm": 0.22568322718143463, "learning_rate": 5.315800000000001e-05, "loss": 0.0258, "step": 26580 }, { "epoch": 2.3365553602811953, "grad_norm": 0.25992146134376526, "learning_rate": 5.3178000000000005e-05, "loss": 0.0241, "step": 26590 }, { "epoch": 2.3374340949033394, "grad_norm": 0.3609178364276886, "learning_rate": 5.3198e-05, "loss": 0.0281, "step": 26600 }, { "epoch": 2.338312829525483, "grad_norm": 0.33806225657463074, "learning_rate": 5.3218000000000004e-05, "loss": 0.0256, "step": 26610 }, { "epoch": 2.3391915641476273, "grad_norm": 0.3022158443927765, "learning_rate": 5.3238e-05, "loss": 0.0253, "step": 26620 }, { "epoch": 2.3400702987697715, "grad_norm": 0.27142754197120667, "learning_rate": 5.325800000000001e-05, "loss": 0.0259, "step": 26630 }, { "epoch": 2.3409490333919156, "grad_norm": 0.19219818711280823, "learning_rate": 5.327800000000001e-05, "loss": 0.0256, "step": 26640 }, { "epoch": 2.34182776801406, "grad_norm": 0.27228549122810364, "learning_rate": 5.3298000000000004e-05, "loss": 0.0243, "step": 26650 }, { "epoch": 2.342706502636204, "grad_norm": 0.21305842697620392, "learning_rate": 5.3318e-05, "loss": 0.0225, "step": 26660 }, { "epoch": 2.343585237258348, "grad_norm": 0.24271272122859955, "learning_rate": 5.3337999999999997e-05, "loss": 0.0225, "step": 26670 }, { "epoch": 2.3444639718804923, "grad_norm": 0.21097959578037262, "learning_rate": 5.3358000000000006e-05, "loss": 0.0227, "step": 26680 }, { "epoch": 2.3453427065026364, "grad_norm": 0.25375479459762573, "learning_rate": 5.3378e-05, "loss": 0.0224, "step": 26690 }, { "epoch": 2.34622144112478, "grad_norm": 0.3640991449356079, "learning_rate": 5.3398000000000006e-05, "loss": 0.0266, "step": 26700 }, { "epoch": 2.3471001757469243, "grad_norm": 0.36332082748413086, "learning_rate": 5.3418e-05, "loss": 0.0257, "step": 26710 }, { "epoch": 2.3479789103690685, "grad_norm": 0.19745701551437378, "learning_rate": 5.3438e-05, "loss": 0.0238, "step": 26720 }, { "epoch": 2.3488576449912126, "grad_norm": 0.27521514892578125, "learning_rate": 5.345800000000001e-05, "loss": 0.0273, "step": 26730 }, { "epoch": 2.349736379613357, "grad_norm": 0.2275242954492569, "learning_rate": 5.3478000000000005e-05, "loss": 0.0249, "step": 26740 }, { "epoch": 2.350615114235501, "grad_norm": 0.1763858199119568, "learning_rate": 5.3498e-05, "loss": 0.0219, "step": 26750 }, { "epoch": 2.351493848857645, "grad_norm": 0.25470125675201416, "learning_rate": 5.3518e-05, "loss": 0.0245, "step": 26760 }, { "epoch": 2.3523725834797893, "grad_norm": 0.17052198946475983, "learning_rate": 5.3538e-05, "loss": 0.0252, "step": 26770 }, { "epoch": 2.3532513181019334, "grad_norm": 0.22500234842300415, "learning_rate": 5.3558e-05, "loss": 0.0239, "step": 26780 }, { "epoch": 2.354130052724077, "grad_norm": 0.26183021068573, "learning_rate": 5.357800000000001e-05, "loss": 0.0269, "step": 26790 }, { "epoch": 2.3550087873462213, "grad_norm": 0.20969243347644806, "learning_rate": 5.3598000000000004e-05, "loss": 0.0256, "step": 26800 }, { "epoch": 2.3558875219683655, "grad_norm": 0.30910956859588623, "learning_rate": 5.3618e-05, "loss": 0.0238, "step": 26810 }, { "epoch": 2.3567662565905096, "grad_norm": 0.2514209747314453, "learning_rate": 5.3638e-05, "loss": 0.0247, "step": 26820 }, { "epoch": 2.357644991212654, "grad_norm": 0.21725864708423615, "learning_rate": 5.3658e-05, "loss": 0.0243, "step": 26830 }, { "epoch": 2.358523725834798, "grad_norm": 0.2578730881214142, "learning_rate": 5.3678000000000003e-05, "loss": 0.0232, "step": 26840 }, { "epoch": 2.359402460456942, "grad_norm": 0.22804056107997894, "learning_rate": 5.369800000000001e-05, "loss": 0.0229, "step": 26850 }, { "epoch": 2.3602811950790863, "grad_norm": 0.19937732815742493, "learning_rate": 5.3718e-05, "loss": 0.0252, "step": 26860 }, { "epoch": 2.3611599297012305, "grad_norm": 0.18041600286960602, "learning_rate": 5.3738e-05, "loss": 0.0236, "step": 26870 }, { "epoch": 2.362038664323374, "grad_norm": 0.24834372103214264, "learning_rate": 5.3757999999999996e-05, "loss": 0.0229, "step": 26880 }, { "epoch": 2.3629173989455183, "grad_norm": 0.23292692005634308, "learning_rate": 5.3778000000000006e-05, "loss": 0.024, "step": 26890 }, { "epoch": 2.3637961335676625, "grad_norm": 0.24060675501823425, "learning_rate": 5.3798e-05, "loss": 0.0235, "step": 26900 }, { "epoch": 2.3646748681898067, "grad_norm": 0.2922380566596985, "learning_rate": 5.3818e-05, "loss": 0.0239, "step": 26910 }, { "epoch": 2.365553602811951, "grad_norm": 0.2669958174228668, "learning_rate": 5.3838e-05, "loss": 0.0238, "step": 26920 }, { "epoch": 2.366432337434095, "grad_norm": 0.2674262821674347, "learning_rate": 5.3858e-05, "loss": 0.025, "step": 26930 }, { "epoch": 2.367311072056239, "grad_norm": 0.31285396218299866, "learning_rate": 5.387800000000001e-05, "loss": 0.0234, "step": 26940 }, { "epoch": 2.3681898066783833, "grad_norm": 0.250734806060791, "learning_rate": 5.3898000000000005e-05, "loss": 0.0242, "step": 26950 }, { "epoch": 2.3690685413005275, "grad_norm": 0.376048743724823, "learning_rate": 5.3918e-05, "loss": 0.025, "step": 26960 }, { "epoch": 2.369947275922671, "grad_norm": 0.22468189895153046, "learning_rate": 5.3938e-05, "loss": 0.0231, "step": 26970 }, { "epoch": 2.3708260105448153, "grad_norm": 0.2990213632583618, "learning_rate": 5.3958e-05, "loss": 0.0266, "step": 26980 }, { "epoch": 2.3717047451669595, "grad_norm": 0.2226438671350479, "learning_rate": 5.3978000000000004e-05, "loss": 0.0247, "step": 26990 }, { "epoch": 2.3725834797891037, "grad_norm": 0.31972405314445496, "learning_rate": 5.399800000000001e-05, "loss": 0.025, "step": 27000 }, { "epoch": 2.373462214411248, "grad_norm": 0.18782219290733337, "learning_rate": 5.4018000000000004e-05, "loss": 0.0242, "step": 27010 }, { "epoch": 2.374340949033392, "grad_norm": 0.1882195621728897, "learning_rate": 5.4038e-05, "loss": 0.0249, "step": 27020 }, { "epoch": 2.375219683655536, "grad_norm": 0.16405613720417023, "learning_rate": 5.4057999999999996e-05, "loss": 0.0262, "step": 27030 }, { "epoch": 2.3760984182776803, "grad_norm": 0.21642546355724335, "learning_rate": 5.4078000000000006e-05, "loss": 0.025, "step": 27040 }, { "epoch": 2.3769771528998245, "grad_norm": 0.20344139635562897, "learning_rate": 5.4098e-05, "loss": 0.0238, "step": 27050 }, { "epoch": 2.377855887521968, "grad_norm": 0.26751136779785156, "learning_rate": 5.4118e-05, "loss": 0.0243, "step": 27060 }, { "epoch": 2.3787346221441124, "grad_norm": 0.19875475764274597, "learning_rate": 5.4138e-05, "loss": 0.0237, "step": 27070 }, { "epoch": 2.3796133567662565, "grad_norm": 0.24799372255802155, "learning_rate": 5.4158e-05, "loss": 0.0243, "step": 27080 }, { "epoch": 2.3804920913884007, "grad_norm": 0.2168177366256714, "learning_rate": 5.417800000000001e-05, "loss": 0.0243, "step": 27090 }, { "epoch": 2.381370826010545, "grad_norm": 0.196205273270607, "learning_rate": 5.4198000000000005e-05, "loss": 0.0222, "step": 27100 }, { "epoch": 2.382249560632689, "grad_norm": 0.24395999312400818, "learning_rate": 5.4218e-05, "loss": 0.023, "step": 27110 }, { "epoch": 2.383128295254833, "grad_norm": 0.13379743695259094, "learning_rate": 5.4238e-05, "loss": 0.0228, "step": 27120 }, { "epoch": 2.3840070298769773, "grad_norm": 0.1987210512161255, "learning_rate": 5.4257999999999994e-05, "loss": 0.0251, "step": 27130 }, { "epoch": 2.3848857644991215, "grad_norm": 0.20157836377620697, "learning_rate": 5.4278000000000004e-05, "loss": 0.0223, "step": 27140 }, { "epoch": 2.385764499121265, "grad_norm": 0.1950172334909439, "learning_rate": 5.429800000000001e-05, "loss": 0.0219, "step": 27150 }, { "epoch": 2.3866432337434094, "grad_norm": 0.24388112127780914, "learning_rate": 5.4318000000000004e-05, "loss": 0.0232, "step": 27160 }, { "epoch": 2.3875219683655535, "grad_norm": 0.23198376595973969, "learning_rate": 5.4338e-05, "loss": 0.0225, "step": 27170 }, { "epoch": 2.3884007029876977, "grad_norm": 0.20723293721675873, "learning_rate": 5.4358e-05, "loss": 0.0258, "step": 27180 }, { "epoch": 2.389279437609842, "grad_norm": 0.2686028480529785, "learning_rate": 5.437800000000001e-05, "loss": 0.0233, "step": 27190 }, { "epoch": 2.390158172231986, "grad_norm": 0.2175588607788086, "learning_rate": 5.4398e-05, "loss": 0.024, "step": 27200 }, { "epoch": 2.39103690685413, "grad_norm": 0.2212730050086975, "learning_rate": 5.4418e-05, "loss": 0.0236, "step": 27210 }, { "epoch": 2.3919156414762743, "grad_norm": 0.30563074350357056, "learning_rate": 5.4438e-05, "loss": 0.0237, "step": 27220 }, { "epoch": 2.3927943760984185, "grad_norm": 0.2583383023738861, "learning_rate": 5.4458e-05, "loss": 0.0222, "step": 27230 }, { "epoch": 2.393673110720562, "grad_norm": 0.20732031762599945, "learning_rate": 5.447800000000001e-05, "loss": 0.0244, "step": 27240 }, { "epoch": 2.3945518453427064, "grad_norm": 0.30781763792037964, "learning_rate": 5.4498000000000006e-05, "loss": 0.0255, "step": 27250 }, { "epoch": 2.3954305799648505, "grad_norm": 0.17601880431175232, "learning_rate": 5.4518e-05, "loss": 0.0245, "step": 27260 }, { "epoch": 2.3963093145869947, "grad_norm": 0.20166301727294922, "learning_rate": 5.4538e-05, "loss": 0.0215, "step": 27270 }, { "epoch": 2.397188049209139, "grad_norm": 0.22126267850399017, "learning_rate": 5.4557999999999995e-05, "loss": 0.0234, "step": 27280 }, { "epoch": 2.398066783831283, "grad_norm": 0.3165261745452881, "learning_rate": 5.4578000000000005e-05, "loss": 0.0245, "step": 27290 }, { "epoch": 2.398945518453427, "grad_norm": 0.2540392279624939, "learning_rate": 5.4598e-05, "loss": 0.0259, "step": 27300 }, { "epoch": 2.3998242530755713, "grad_norm": 0.17529097199440002, "learning_rate": 5.4618000000000005e-05, "loss": 0.0213, "step": 27310 }, { "epoch": 2.4007029876977155, "grad_norm": 0.2222749888896942, "learning_rate": 5.4638e-05, "loss": 0.0221, "step": 27320 }, { "epoch": 2.4015817223198592, "grad_norm": 0.2063647210597992, "learning_rate": 5.4658e-05, "loss": 0.025, "step": 27330 }, { "epoch": 2.4024604569420034, "grad_norm": 0.20726321637630463, "learning_rate": 5.467800000000001e-05, "loss": 0.0251, "step": 27340 }, { "epoch": 2.4033391915641475, "grad_norm": 0.20893211662769318, "learning_rate": 5.4698000000000004e-05, "loss": 0.0232, "step": 27350 }, { "epoch": 2.4042179261862917, "grad_norm": 0.1442466527223587, "learning_rate": 5.4718e-05, "loss": 0.0221, "step": 27360 }, { "epoch": 2.405096660808436, "grad_norm": 0.1829877495765686, "learning_rate": 5.4738000000000003e-05, "loss": 0.0235, "step": 27370 }, { "epoch": 2.40597539543058, "grad_norm": 0.21843282878398895, "learning_rate": 5.4758e-05, "loss": 0.0239, "step": 27380 }, { "epoch": 2.406854130052724, "grad_norm": 0.2082715779542923, "learning_rate": 5.477800000000001e-05, "loss": 0.0216, "step": 27390 }, { "epoch": 2.4077328646748684, "grad_norm": 0.20949327945709229, "learning_rate": 5.4798000000000006e-05, "loss": 0.0233, "step": 27400 }, { "epoch": 2.4086115992970125, "grad_norm": 0.30336785316467285, "learning_rate": 5.4818e-05, "loss": 0.0236, "step": 27410 }, { "epoch": 2.4094903339191562, "grad_norm": 0.2828904986381531, "learning_rate": 5.4838e-05, "loss": 0.0234, "step": 27420 }, { "epoch": 2.4103690685413004, "grad_norm": 0.2957141101360321, "learning_rate": 5.4857999999999996e-05, "loss": 0.025, "step": 27430 }, { "epoch": 2.4112478031634446, "grad_norm": 0.28145167231559753, "learning_rate": 5.4878000000000005e-05, "loss": 0.0249, "step": 27440 }, { "epoch": 2.4121265377855887, "grad_norm": 0.21672123670578003, "learning_rate": 5.4898e-05, "loss": 0.0266, "step": 27450 }, { "epoch": 2.413005272407733, "grad_norm": 0.27834638953208923, "learning_rate": 5.4918000000000005e-05, "loss": 0.0249, "step": 27460 }, { "epoch": 2.413884007029877, "grad_norm": 0.2621001899242401, "learning_rate": 5.4938e-05, "loss": 0.0239, "step": 27470 }, { "epoch": 2.414762741652021, "grad_norm": 0.17789024114608765, "learning_rate": 5.4958e-05, "loss": 0.0223, "step": 27480 }, { "epoch": 2.4156414762741654, "grad_norm": 0.21423931419849396, "learning_rate": 5.497800000000001e-05, "loss": 0.0239, "step": 27490 }, { "epoch": 2.4165202108963095, "grad_norm": 0.24904079735279083, "learning_rate": 5.4998000000000004e-05, "loss": 0.0212, "step": 27500 }, { "epoch": 2.4173989455184532, "grad_norm": 0.27474260330200195, "learning_rate": 5.5018e-05, "loss": 0.025, "step": 27510 }, { "epoch": 2.4182776801405974, "grad_norm": 0.2782505750656128, "learning_rate": 5.5038000000000004e-05, "loss": 0.0253, "step": 27520 }, { "epoch": 2.4191564147627416, "grad_norm": 0.2858966290950775, "learning_rate": 5.5058e-05, "loss": 0.0225, "step": 27530 }, { "epoch": 2.4200351493848857, "grad_norm": 0.33210504055023193, "learning_rate": 5.507800000000001e-05, "loss": 0.0245, "step": 27540 }, { "epoch": 2.42091388400703, "grad_norm": 0.26047948002815247, "learning_rate": 5.509800000000001e-05, "loss": 0.0229, "step": 27550 }, { "epoch": 2.421792618629174, "grad_norm": 0.2546370327472687, "learning_rate": 5.5118e-05, "loss": 0.0256, "step": 27560 }, { "epoch": 2.422671353251318, "grad_norm": 0.2232128381729126, "learning_rate": 5.5138e-05, "loss": 0.0236, "step": 27570 }, { "epoch": 2.4235500878734624, "grad_norm": 0.2330678552389145, "learning_rate": 5.5157999999999996e-05, "loss": 0.0215, "step": 27580 }, { "epoch": 2.4244288224956065, "grad_norm": 0.2285596877336502, "learning_rate": 5.5178000000000006e-05, "loss": 0.0231, "step": 27590 }, { "epoch": 2.4253075571177503, "grad_norm": 0.2334977388381958, "learning_rate": 5.5198e-05, "loss": 0.0252, "step": 27600 }, { "epoch": 2.4261862917398944, "grad_norm": 0.20243436098098755, "learning_rate": 5.5218000000000006e-05, "loss": 0.0243, "step": 27610 }, { "epoch": 2.4270650263620386, "grad_norm": 0.2660866975784302, "learning_rate": 5.5238e-05, "loss": 0.0238, "step": 27620 }, { "epoch": 2.4279437609841827, "grad_norm": 0.19491936266422272, "learning_rate": 5.5258e-05, "loss": 0.024, "step": 27630 }, { "epoch": 2.428822495606327, "grad_norm": 0.2636050879955292, "learning_rate": 5.527800000000001e-05, "loss": 0.0245, "step": 27640 }, { "epoch": 2.429701230228471, "grad_norm": 0.17918676137924194, "learning_rate": 5.5298000000000005e-05, "loss": 0.0223, "step": 27650 }, { "epoch": 2.4305799648506152, "grad_norm": 0.2159094661474228, "learning_rate": 5.5318e-05, "loss": 0.0248, "step": 27660 }, { "epoch": 2.4314586994727594, "grad_norm": 0.21090030670166016, "learning_rate": 5.5338e-05, "loss": 0.0232, "step": 27670 }, { "epoch": 2.4323374340949035, "grad_norm": 0.21100269258022308, "learning_rate": 5.5358e-05, "loss": 0.0243, "step": 27680 }, { "epoch": 2.4332161687170473, "grad_norm": 0.30341944098472595, "learning_rate": 5.537800000000001e-05, "loss": 0.0255, "step": 27690 }, { "epoch": 2.4340949033391914, "grad_norm": 0.28144583106040955, "learning_rate": 5.539800000000001e-05, "loss": 0.0226, "step": 27700 }, { "epoch": 2.4349736379613356, "grad_norm": 0.23257943987846375, "learning_rate": 5.5418000000000004e-05, "loss": 0.0258, "step": 27710 }, { "epoch": 2.4358523725834798, "grad_norm": 0.22647713124752045, "learning_rate": 5.5438e-05, "loss": 0.0266, "step": 27720 }, { "epoch": 2.436731107205624, "grad_norm": 0.2758958637714386, "learning_rate": 5.5457999999999997e-05, "loss": 0.0241, "step": 27730 }, { "epoch": 2.437609841827768, "grad_norm": 0.24054035544395447, "learning_rate": 5.5478000000000007e-05, "loss": 0.0229, "step": 27740 }, { "epoch": 2.4384885764499122, "grad_norm": 0.28282877802848816, "learning_rate": 5.5498e-05, "loss": 0.0245, "step": 27750 }, { "epoch": 2.4393673110720564, "grad_norm": 0.2785070538520813, "learning_rate": 5.5518000000000006e-05, "loss": 0.0232, "step": 27760 }, { "epoch": 2.4402460456942006, "grad_norm": 0.2254485785961151, "learning_rate": 5.5538e-05, "loss": 0.0228, "step": 27770 }, { "epoch": 2.4411247803163443, "grad_norm": 0.2902337610721588, "learning_rate": 5.5558e-05, "loss": 0.0267, "step": 27780 }, { "epoch": 2.4420035149384884, "grad_norm": 0.1934742033481598, "learning_rate": 5.557800000000001e-05, "loss": 0.0259, "step": 27790 }, { "epoch": 2.4428822495606326, "grad_norm": 0.3717823326587677, "learning_rate": 5.5598000000000005e-05, "loss": 0.0269, "step": 27800 }, { "epoch": 2.4437609841827768, "grad_norm": 0.19699794054031372, "learning_rate": 5.5618e-05, "loss": 0.0243, "step": 27810 }, { "epoch": 2.444639718804921, "grad_norm": 0.29406583309173584, "learning_rate": 5.5638e-05, "loss": 0.0259, "step": 27820 }, { "epoch": 2.445518453427065, "grad_norm": 0.19926786422729492, "learning_rate": 5.5658e-05, "loss": 0.0243, "step": 27830 }, { "epoch": 2.4463971880492092, "grad_norm": 0.2602544128894806, "learning_rate": 5.5678000000000005e-05, "loss": 0.0221, "step": 27840 }, { "epoch": 2.4472759226713534, "grad_norm": 0.2248588651418686, "learning_rate": 5.569800000000001e-05, "loss": 0.023, "step": 27850 }, { "epoch": 2.4481546572934976, "grad_norm": 0.22242416441440582, "learning_rate": 5.5718000000000004e-05, "loss": 0.0241, "step": 27860 }, { "epoch": 2.4490333919156413, "grad_norm": 0.24202419817447662, "learning_rate": 5.5738e-05, "loss": 0.0251, "step": 27870 }, { "epoch": 2.4499121265377855, "grad_norm": 0.177146315574646, "learning_rate": 5.5758e-05, "loss": 0.026, "step": 27880 }, { "epoch": 2.4507908611599296, "grad_norm": 0.2350604087114334, "learning_rate": 5.577800000000001e-05, "loss": 0.0244, "step": 27890 }, { "epoch": 2.4516695957820738, "grad_norm": 0.2823990285396576, "learning_rate": 5.5798000000000003e-05, "loss": 0.025, "step": 27900 }, { "epoch": 2.452548330404218, "grad_norm": 0.25868546962738037, "learning_rate": 5.581800000000001e-05, "loss": 0.0254, "step": 27910 }, { "epoch": 2.453427065026362, "grad_norm": 0.20561915636062622, "learning_rate": 5.5838e-05, "loss": 0.0251, "step": 27920 }, { "epoch": 2.4543057996485063, "grad_norm": 0.2883884906768799, "learning_rate": 5.5858e-05, "loss": 0.0227, "step": 27930 }, { "epoch": 2.4551845342706504, "grad_norm": 0.23336032032966614, "learning_rate": 5.587800000000001e-05, "loss": 0.0244, "step": 27940 }, { "epoch": 2.4560632688927946, "grad_norm": 0.34367766976356506, "learning_rate": 5.5898000000000006e-05, "loss": 0.0234, "step": 27950 }, { "epoch": 2.4569420035149383, "grad_norm": 0.2691735625267029, "learning_rate": 5.5918e-05, "loss": 0.0257, "step": 27960 }, { "epoch": 2.4578207381370825, "grad_norm": 0.2605743110179901, "learning_rate": 5.5938e-05, "loss": 0.0235, "step": 27970 }, { "epoch": 2.4586994727592266, "grad_norm": 0.2632691264152527, "learning_rate": 5.5958e-05, "loss": 0.0244, "step": 27980 }, { "epoch": 2.459578207381371, "grad_norm": 0.2307945340871811, "learning_rate": 5.5978000000000005e-05, "loss": 0.0263, "step": 27990 }, { "epoch": 2.460456942003515, "grad_norm": 0.23559105396270752, "learning_rate": 5.599800000000001e-05, "loss": 0.0249, "step": 28000 }, { "epoch": 2.461335676625659, "grad_norm": 0.26758408546447754, "learning_rate": 5.6018000000000005e-05, "loss": 0.0267, "step": 28010 }, { "epoch": 2.4622144112478033, "grad_norm": 0.2727244198322296, "learning_rate": 5.6038e-05, "loss": 0.0262, "step": 28020 }, { "epoch": 2.4630931458699474, "grad_norm": 0.2519964873790741, "learning_rate": 5.6058e-05, "loss": 0.0225, "step": 28030 }, { "epoch": 2.4639718804920916, "grad_norm": 0.2708396017551422, "learning_rate": 5.6077999999999994e-05, "loss": 0.0252, "step": 28040 }, { "epoch": 2.4648506151142353, "grad_norm": 0.25494590401649475, "learning_rate": 5.6098000000000004e-05, "loss": 0.0245, "step": 28050 }, { "epoch": 2.4657293497363795, "grad_norm": 0.2391340285539627, "learning_rate": 5.611800000000001e-05, "loss": 0.0254, "step": 28060 }, { "epoch": 2.4666080843585236, "grad_norm": 0.2226860076189041, "learning_rate": 5.6138000000000004e-05, "loss": 0.023, "step": 28070 }, { "epoch": 2.467486818980668, "grad_norm": 0.32635849714279175, "learning_rate": 5.6158e-05, "loss": 0.027, "step": 28080 }, { "epoch": 2.468365553602812, "grad_norm": 0.3104507327079773, "learning_rate": 5.6177999999999996e-05, "loss": 0.025, "step": 28090 }, { "epoch": 2.469244288224956, "grad_norm": 0.2205335795879364, "learning_rate": 5.6198000000000006e-05, "loss": 0.0226, "step": 28100 }, { "epoch": 2.4701230228471003, "grad_norm": 0.26624271273612976, "learning_rate": 5.6218e-05, "loss": 0.0254, "step": 28110 }, { "epoch": 2.4710017574692444, "grad_norm": 0.21644483506679535, "learning_rate": 5.6238e-05, "loss": 0.0251, "step": 28120 }, { "epoch": 2.4718804920913886, "grad_norm": 0.2692057490348816, "learning_rate": 5.6258e-05, "loss": 0.0257, "step": 28130 }, { "epoch": 2.4727592267135323, "grad_norm": 0.24082361161708832, "learning_rate": 5.6278e-05, "loss": 0.0256, "step": 28140 }, { "epoch": 2.4736379613356765, "grad_norm": 0.2718566954135895, "learning_rate": 5.629800000000001e-05, "loss": 0.0252, "step": 28150 }, { "epoch": 2.4745166959578206, "grad_norm": 0.25887417793273926, "learning_rate": 5.6318000000000005e-05, "loss": 0.0242, "step": 28160 }, { "epoch": 2.475395430579965, "grad_norm": 0.21900875866413116, "learning_rate": 5.6338e-05, "loss": 0.025, "step": 28170 }, { "epoch": 2.476274165202109, "grad_norm": 0.3183000683784485, "learning_rate": 5.6358e-05, "loss": 0.0271, "step": 28180 }, { "epoch": 2.477152899824253, "grad_norm": 0.17250777781009674, "learning_rate": 5.6377999999999995e-05, "loss": 0.0254, "step": 28190 }, { "epoch": 2.4780316344463973, "grad_norm": 0.20259076356887817, "learning_rate": 5.6398000000000004e-05, "loss": 0.0221, "step": 28200 }, { "epoch": 2.4789103690685415, "grad_norm": 0.1746387779712677, "learning_rate": 5.6418e-05, "loss": 0.025, "step": 28210 }, { "epoch": 2.4797891036906856, "grad_norm": 0.2376514971256256, "learning_rate": 5.6438000000000004e-05, "loss": 0.0232, "step": 28220 }, { "epoch": 2.4806678383128293, "grad_norm": 0.36266154050827026, "learning_rate": 5.6458e-05, "loss": 0.022, "step": 28230 }, { "epoch": 2.4815465729349735, "grad_norm": 0.2557600736618042, "learning_rate": 5.6478e-05, "loss": 0.0249, "step": 28240 }, { "epoch": 2.4824253075571177, "grad_norm": 0.2307996302843094, "learning_rate": 5.649800000000001e-05, "loss": 0.0252, "step": 28250 }, { "epoch": 2.483304042179262, "grad_norm": 0.28988271951675415, "learning_rate": 5.6518e-05, "loss": 0.0248, "step": 28260 }, { "epoch": 2.484182776801406, "grad_norm": 0.2064240276813507, "learning_rate": 5.6538e-05, "loss": 0.024, "step": 28270 }, { "epoch": 2.48506151142355, "grad_norm": 0.19671279191970825, "learning_rate": 5.6558e-05, "loss": 0.0228, "step": 28280 }, { "epoch": 2.4859402460456943, "grad_norm": 0.2699902355670929, "learning_rate": 5.6578e-05, "loss": 0.0233, "step": 28290 }, { "epoch": 2.4868189806678385, "grad_norm": 0.15498103201389313, "learning_rate": 5.659800000000001e-05, "loss": 0.0225, "step": 28300 }, { "epoch": 2.4876977152899826, "grad_norm": 0.20898652076721191, "learning_rate": 5.6618000000000006e-05, "loss": 0.0252, "step": 28310 }, { "epoch": 2.4885764499121263, "grad_norm": 0.23219530284404755, "learning_rate": 5.6638e-05, "loss": 0.0229, "step": 28320 }, { "epoch": 2.4894551845342705, "grad_norm": 0.25383976101875305, "learning_rate": 5.6658e-05, "loss": 0.0231, "step": 28330 }, { "epoch": 2.4903339191564147, "grad_norm": 0.20777764916419983, "learning_rate": 5.6677999999999995e-05, "loss": 0.0234, "step": 28340 }, { "epoch": 2.491212653778559, "grad_norm": 0.25890016555786133, "learning_rate": 5.6698000000000005e-05, "loss": 0.0216, "step": 28350 }, { "epoch": 2.492091388400703, "grad_norm": 0.18845528364181519, "learning_rate": 5.6718e-05, "loss": 0.0238, "step": 28360 }, { "epoch": 2.492970123022847, "grad_norm": 0.20170755684375763, "learning_rate": 5.6738000000000005e-05, "loss": 0.0233, "step": 28370 }, { "epoch": 2.4938488576449913, "grad_norm": 0.23593056201934814, "learning_rate": 5.6758e-05, "loss": 0.0217, "step": 28380 }, { "epoch": 2.4947275922671355, "grad_norm": 0.20006349682807922, "learning_rate": 5.6778e-05, "loss": 0.0245, "step": 28390 }, { "epoch": 2.4956063268892796, "grad_norm": 0.19750022888183594, "learning_rate": 5.679800000000001e-05, "loss": 0.0221, "step": 28400 }, { "epoch": 2.4964850615114234, "grad_norm": 0.3032025992870331, "learning_rate": 5.6818000000000004e-05, "loss": 0.0239, "step": 28410 }, { "epoch": 2.4973637961335675, "grad_norm": 0.27559131383895874, "learning_rate": 5.6838e-05, "loss": 0.0219, "step": 28420 }, { "epoch": 2.4982425307557117, "grad_norm": 0.20802472531795502, "learning_rate": 5.6858000000000003e-05, "loss": 0.0213, "step": 28430 }, { "epoch": 2.499121265377856, "grad_norm": 0.18999968469142914, "learning_rate": 5.6878e-05, "loss": 0.0226, "step": 28440 }, { "epoch": 2.5, "grad_norm": 0.20562319457530975, "learning_rate": 5.689800000000001e-05, "loss": 0.0242, "step": 28450 }, { "epoch": 2.500878734622144, "grad_norm": 0.22660794854164124, "learning_rate": 5.6918000000000006e-05, "loss": 0.023, "step": 28460 }, { "epoch": 2.5017574692442883, "grad_norm": 0.3146689832210541, "learning_rate": 5.6938e-05, "loss": 0.0245, "step": 28470 }, { "epoch": 2.5026362038664325, "grad_norm": 0.21134275197982788, "learning_rate": 5.6958e-05, "loss": 0.0256, "step": 28480 }, { "epoch": 2.5035149384885766, "grad_norm": 0.2570085823535919, "learning_rate": 5.6977999999999996e-05, "loss": 0.0226, "step": 28490 }, { "epoch": 2.5043936731107204, "grad_norm": 0.24817943572998047, "learning_rate": 5.6998000000000006e-05, "loss": 0.025, "step": 28500 }, { "epoch": 2.5052724077328645, "grad_norm": 0.24424727261066437, "learning_rate": 5.7018e-05, "loss": 0.025, "step": 28510 }, { "epoch": 2.5061511423550087, "grad_norm": 0.20152126252651215, "learning_rate": 5.7038000000000005e-05, "loss": 0.023, "step": 28520 }, { "epoch": 2.507029876977153, "grad_norm": 0.2706468403339386, "learning_rate": 5.7058e-05, "loss": 0.0236, "step": 28530 }, { "epoch": 2.507908611599297, "grad_norm": 0.2205171287059784, "learning_rate": 5.7078e-05, "loss": 0.0251, "step": 28540 }, { "epoch": 2.508787346221441, "grad_norm": 0.29872915148735046, "learning_rate": 5.709800000000001e-05, "loss": 0.022, "step": 28550 }, { "epoch": 2.5096660808435853, "grad_norm": 0.2679526209831238, "learning_rate": 5.7118000000000004e-05, "loss": 0.0229, "step": 28560 }, { "epoch": 2.5105448154657295, "grad_norm": 0.2676604688167572, "learning_rate": 5.7138e-05, "loss": 0.0247, "step": 28570 }, { "epoch": 2.5114235500878737, "grad_norm": 0.21679352223873138, "learning_rate": 5.7158e-05, "loss": 0.0261, "step": 28580 }, { "epoch": 2.5123022847100174, "grad_norm": 0.5451251268386841, "learning_rate": 5.7178e-05, "loss": 0.0226, "step": 28590 }, { "epoch": 2.5131810193321615, "grad_norm": 0.2416870892047882, "learning_rate": 5.719800000000001e-05, "loss": 0.0247, "step": 28600 }, { "epoch": 2.5140597539543057, "grad_norm": 0.25192782282829285, "learning_rate": 5.721800000000001e-05, "loss": 0.0256, "step": 28610 }, { "epoch": 2.51493848857645, "grad_norm": 0.15753620862960815, "learning_rate": 5.7238e-05, "loss": 0.0253, "step": 28620 }, { "epoch": 2.515817223198594, "grad_norm": 0.18430152535438538, "learning_rate": 5.7258e-05, "loss": 0.025, "step": 28630 }, { "epoch": 2.516695957820738, "grad_norm": 0.23803357779979706, "learning_rate": 5.7277999999999996e-05, "loss": 0.0225, "step": 28640 }, { "epoch": 2.5175746924428823, "grad_norm": 0.24928820133209229, "learning_rate": 5.7298000000000006e-05, "loss": 0.0235, "step": 28650 }, { "epoch": 2.5184534270650265, "grad_norm": 0.3287147283554077, "learning_rate": 5.7318e-05, "loss": 0.0236, "step": 28660 }, { "epoch": 2.5193321616871707, "grad_norm": 0.29009056091308594, "learning_rate": 5.7338000000000006e-05, "loss": 0.0251, "step": 28670 }, { "epoch": 2.5202108963093144, "grad_norm": 0.19667309522628784, "learning_rate": 5.7358e-05, "loss": 0.022, "step": 28680 }, { "epoch": 2.5210896309314585, "grad_norm": 0.20892763137817383, "learning_rate": 5.7378e-05, "loss": 0.0244, "step": 28690 }, { "epoch": 2.5219683655536027, "grad_norm": 0.1826179176568985, "learning_rate": 5.739800000000001e-05, "loss": 0.024, "step": 28700 }, { "epoch": 2.522847100175747, "grad_norm": 0.20837418735027313, "learning_rate": 5.7418000000000005e-05, "loss": 0.024, "step": 28710 }, { "epoch": 2.523725834797891, "grad_norm": 0.311541348695755, "learning_rate": 5.7438e-05, "loss": 0.0243, "step": 28720 }, { "epoch": 2.524604569420035, "grad_norm": 0.20761100947856903, "learning_rate": 5.7458e-05, "loss": 0.0226, "step": 28730 }, { "epoch": 2.5254833040421794, "grad_norm": 0.1768271028995514, "learning_rate": 5.7478e-05, "loss": 0.0232, "step": 28740 }, { "epoch": 2.5263620386643235, "grad_norm": 0.2124328911304474, "learning_rate": 5.7498000000000004e-05, "loss": 0.0246, "step": 28750 }, { "epoch": 2.5272407732864677, "grad_norm": 0.2483387291431427, "learning_rate": 5.751800000000001e-05, "loss": 0.0255, "step": 28760 }, { "epoch": 2.5281195079086114, "grad_norm": 0.2836155295372009, "learning_rate": 5.7538000000000004e-05, "loss": 0.0235, "step": 28770 }, { "epoch": 2.5289982425307556, "grad_norm": 0.25669726729393005, "learning_rate": 5.7558e-05, "loss": 0.026, "step": 28780 }, { "epoch": 2.5298769771528997, "grad_norm": 0.21845978498458862, "learning_rate": 5.7577999999999997e-05, "loss": 0.0241, "step": 28790 }, { "epoch": 2.530755711775044, "grad_norm": 0.2723180055618286, "learning_rate": 5.7598000000000007e-05, "loss": 0.0226, "step": 28800 }, { "epoch": 2.531634446397188, "grad_norm": 0.23103754222393036, "learning_rate": 5.7618e-05, "loss": 0.025, "step": 28810 }, { "epoch": 2.532513181019332, "grad_norm": 0.33700987696647644, "learning_rate": 5.7638000000000006e-05, "loss": 0.0257, "step": 28820 }, { "epoch": 2.5333919156414764, "grad_norm": 0.3470402956008911, "learning_rate": 5.7658e-05, "loss": 0.0239, "step": 28830 }, { "epoch": 2.5342706502636205, "grad_norm": 0.33611801266670227, "learning_rate": 5.7678e-05, "loss": 0.0255, "step": 28840 }, { "epoch": 2.5351493848857647, "grad_norm": 0.3229193687438965, "learning_rate": 5.769800000000001e-05, "loss": 0.0254, "step": 28850 }, { "epoch": 2.5360281195079084, "grad_norm": 0.21164748072624207, "learning_rate": 5.7718000000000005e-05, "loss": 0.0251, "step": 28860 }, { "epoch": 2.5369068541300526, "grad_norm": 0.24294698238372803, "learning_rate": 5.7738e-05, "loss": 0.0243, "step": 28870 }, { "epoch": 2.5377855887521967, "grad_norm": 0.22285783290863037, "learning_rate": 5.7758e-05, "loss": 0.0267, "step": 28880 }, { "epoch": 2.538664323374341, "grad_norm": 0.2208998203277588, "learning_rate": 5.7778e-05, "loss": 0.0207, "step": 28890 }, { "epoch": 2.539543057996485, "grad_norm": 0.2388780564069748, "learning_rate": 5.7798000000000005e-05, "loss": 0.0231, "step": 28900 }, { "epoch": 2.540421792618629, "grad_norm": 0.352311372756958, "learning_rate": 5.781800000000001e-05, "loss": 0.0263, "step": 28910 }, { "epoch": 2.5413005272407734, "grad_norm": 0.2712363004684448, "learning_rate": 5.7838000000000004e-05, "loss": 0.0224, "step": 28920 }, { "epoch": 2.5421792618629175, "grad_norm": 0.201191246509552, "learning_rate": 5.7858e-05, "loss": 0.0227, "step": 28930 }, { "epoch": 2.5430579964850617, "grad_norm": 0.1808079481124878, "learning_rate": 5.7878e-05, "loss": 0.0222, "step": 28940 }, { "epoch": 2.5439367311072054, "grad_norm": 0.22402602434158325, "learning_rate": 5.789800000000001e-05, "loss": 0.024, "step": 28950 }, { "epoch": 2.5448154657293496, "grad_norm": 0.2024383544921875, "learning_rate": 5.7918000000000003e-05, "loss": 0.0244, "step": 28960 }, { "epoch": 2.5456942003514937, "grad_norm": 0.2325199842453003, "learning_rate": 5.793800000000001e-05, "loss": 0.0243, "step": 28970 }, { "epoch": 2.546572934973638, "grad_norm": 0.2542589008808136, "learning_rate": 5.7958e-05, "loss": 0.0239, "step": 28980 }, { "epoch": 2.547451669595782, "grad_norm": 0.26289939880371094, "learning_rate": 5.7978e-05, "loss": 0.0229, "step": 28990 }, { "epoch": 2.5483304042179262, "grad_norm": 0.2281097024679184, "learning_rate": 5.799800000000001e-05, "loss": 0.023, "step": 29000 }, { "epoch": 2.5492091388400704, "grad_norm": 0.21027615666389465, "learning_rate": 5.8018000000000006e-05, "loss": 0.0233, "step": 29010 }, { "epoch": 2.5500878734622145, "grad_norm": 0.2996568977832794, "learning_rate": 5.8038e-05, "loss": 0.0228, "step": 29020 }, { "epoch": 2.5509666080843587, "grad_norm": 0.462344765663147, "learning_rate": 5.8058e-05, "loss": 0.0235, "step": 29030 }, { "epoch": 2.5518453427065024, "grad_norm": 0.18134427070617676, "learning_rate": 5.8078e-05, "loss": 0.0229, "step": 29040 }, { "epoch": 2.5527240773286466, "grad_norm": 0.1727302372455597, "learning_rate": 5.8098000000000005e-05, "loss": 0.0256, "step": 29050 }, { "epoch": 2.5536028119507908, "grad_norm": 0.20856261253356934, "learning_rate": 5.811800000000001e-05, "loss": 0.0247, "step": 29060 }, { "epoch": 2.554481546572935, "grad_norm": 0.31379613280296326, "learning_rate": 5.8138000000000005e-05, "loss": 0.0241, "step": 29070 }, { "epoch": 2.555360281195079, "grad_norm": 0.278610497713089, "learning_rate": 5.8158e-05, "loss": 0.0247, "step": 29080 }, { "epoch": 2.5562390158172232, "grad_norm": 0.22506268322467804, "learning_rate": 5.8178e-05, "loss": 0.023, "step": 29090 }, { "epoch": 2.5571177504393674, "grad_norm": 0.19933432340621948, "learning_rate": 5.819800000000001e-05, "loss": 0.0228, "step": 29100 }, { "epoch": 2.5579964850615116, "grad_norm": 0.21220363676548004, "learning_rate": 5.8218000000000004e-05, "loss": 0.025, "step": 29110 }, { "epoch": 2.5588752196836557, "grad_norm": 0.21447032690048218, "learning_rate": 5.8238e-05, "loss": 0.0246, "step": 29120 }, { "epoch": 2.5597539543057994, "grad_norm": 0.2219160795211792, "learning_rate": 5.8258000000000004e-05, "loss": 0.0232, "step": 29130 }, { "epoch": 2.5606326889279436, "grad_norm": 0.18914449214935303, "learning_rate": 5.8278e-05, "loss": 0.0237, "step": 29140 }, { "epoch": 2.5615114235500878, "grad_norm": 0.2030240297317505, "learning_rate": 5.829800000000001e-05, "loss": 0.0238, "step": 29150 }, { "epoch": 2.562390158172232, "grad_norm": 0.3603041172027588, "learning_rate": 5.8318000000000006e-05, "loss": 0.0239, "step": 29160 }, { "epoch": 2.563268892794376, "grad_norm": 0.2866019904613495, "learning_rate": 5.8338e-05, "loss": 0.0227, "step": 29170 }, { "epoch": 2.5641476274165202, "grad_norm": 0.30266276001930237, "learning_rate": 5.8358e-05, "loss": 0.0252, "step": 29180 }, { "epoch": 2.5650263620386644, "grad_norm": 0.19688734412193298, "learning_rate": 5.8378e-05, "loss": 0.0232, "step": 29190 }, { "epoch": 2.5659050966608086, "grad_norm": 0.2573350965976715, "learning_rate": 5.8398000000000006e-05, "loss": 0.0231, "step": 29200 }, { "epoch": 2.5667838312829527, "grad_norm": 0.18213754892349243, "learning_rate": 5.841800000000001e-05, "loss": 0.0254, "step": 29210 }, { "epoch": 2.5676625659050965, "grad_norm": 0.29184725880622864, "learning_rate": 5.8438000000000005e-05, "loss": 0.0247, "step": 29220 }, { "epoch": 2.5685413005272406, "grad_norm": 0.19407621026039124, "learning_rate": 5.8458e-05, "loss": 0.0227, "step": 29230 }, { "epoch": 2.5694200351493848, "grad_norm": 0.2212926596403122, "learning_rate": 5.8478e-05, "loss": 0.0241, "step": 29240 }, { "epoch": 2.570298769771529, "grad_norm": 0.2341824322938919, "learning_rate": 5.849800000000001e-05, "loss": 0.0239, "step": 29250 }, { "epoch": 2.571177504393673, "grad_norm": 0.16558334231376648, "learning_rate": 5.8518000000000005e-05, "loss": 0.0264, "step": 29260 }, { "epoch": 2.5720562390158173, "grad_norm": 0.24929696321487427, "learning_rate": 5.8538e-05, "loss": 0.0247, "step": 29270 }, { "epoch": 2.5729349736379614, "grad_norm": 0.2626969814300537, "learning_rate": 5.8558000000000004e-05, "loss": 0.0249, "step": 29280 }, { "epoch": 2.5738137082601056, "grad_norm": 0.27330610156059265, "learning_rate": 5.8578e-05, "loss": 0.0235, "step": 29290 }, { "epoch": 2.5746924428822497, "grad_norm": 0.2791828215122223, "learning_rate": 5.8598e-05, "loss": 0.0246, "step": 29300 }, { "epoch": 2.5755711775043935, "grad_norm": 0.4215807020664215, "learning_rate": 5.861800000000001e-05, "loss": 0.0251, "step": 29310 }, { "epoch": 2.5764499121265376, "grad_norm": 0.2579632103443146, "learning_rate": 5.8638e-05, "loss": 0.0253, "step": 29320 }, { "epoch": 2.577328646748682, "grad_norm": 0.26577651500701904, "learning_rate": 5.8658e-05, "loss": 0.0238, "step": 29330 }, { "epoch": 2.578207381370826, "grad_norm": 0.3672355115413666, "learning_rate": 5.8678e-05, "loss": 0.0252, "step": 29340 }, { "epoch": 2.57908611599297, "grad_norm": 0.19223976135253906, "learning_rate": 5.8698e-05, "loss": 0.0256, "step": 29350 }, { "epoch": 2.5799648506151143, "grad_norm": 0.1767713725566864, "learning_rate": 5.871800000000001e-05, "loss": 0.0223, "step": 29360 }, { "epoch": 2.5808435852372584, "grad_norm": 0.21505245566368103, "learning_rate": 5.8738000000000006e-05, "loss": 0.0232, "step": 29370 }, { "epoch": 2.5817223198594026, "grad_norm": 0.26271429657936096, "learning_rate": 5.8758e-05, "loss": 0.0237, "step": 29380 }, { "epoch": 2.5826010544815468, "grad_norm": 0.18891048431396484, "learning_rate": 5.8778e-05, "loss": 0.0236, "step": 29390 }, { "epoch": 2.5834797891036905, "grad_norm": 0.20438648760318756, "learning_rate": 5.8797999999999995e-05, "loss": 0.0233, "step": 29400 }, { "epoch": 2.5843585237258346, "grad_norm": 0.2235652059316635, "learning_rate": 5.8818000000000005e-05, "loss": 0.0232, "step": 29410 }, { "epoch": 2.585237258347979, "grad_norm": 0.23337024450302124, "learning_rate": 5.8838e-05, "loss": 0.024, "step": 29420 }, { "epoch": 2.586115992970123, "grad_norm": 0.2826691269874573, "learning_rate": 5.8858000000000005e-05, "loss": 0.0268, "step": 29430 }, { "epoch": 2.586994727592267, "grad_norm": 0.19029784202575684, "learning_rate": 5.8878e-05, "loss": 0.021, "step": 29440 }, { "epoch": 2.5878734622144113, "grad_norm": 0.24542316794395447, "learning_rate": 5.8898e-05, "loss": 0.022, "step": 29450 }, { "epoch": 2.5887521968365554, "grad_norm": 0.18858331441879272, "learning_rate": 5.891800000000001e-05, "loss": 0.0254, "step": 29460 }, { "epoch": 2.5896309314586996, "grad_norm": 0.1757849156856537, "learning_rate": 5.8938000000000004e-05, "loss": 0.0243, "step": 29470 }, { "epoch": 2.5905096660808438, "grad_norm": 0.19494014978408813, "learning_rate": 5.8958e-05, "loss": 0.0231, "step": 29480 }, { "epoch": 2.5913884007029875, "grad_norm": 0.31032997369766235, "learning_rate": 5.8978e-05, "loss": 0.0258, "step": 29490 }, { "epoch": 2.5922671353251316, "grad_norm": 0.22916044294834137, "learning_rate": 5.8998e-05, "loss": 0.0246, "step": 29500 }, { "epoch": 2.593145869947276, "grad_norm": 0.2527830898761749, "learning_rate": 5.901800000000001e-05, "loss": 0.0235, "step": 29510 }, { "epoch": 2.59402460456942, "grad_norm": 0.3744308054447174, "learning_rate": 5.9038000000000006e-05, "loss": 0.0252, "step": 29520 }, { "epoch": 2.594903339191564, "grad_norm": 0.21717679500579834, "learning_rate": 5.9058e-05, "loss": 0.0238, "step": 29530 }, { "epoch": 2.5957820738137083, "grad_norm": 0.22760769724845886, "learning_rate": 5.9078e-05, "loss": 0.0241, "step": 29540 }, { "epoch": 2.5966608084358525, "grad_norm": 0.31806105375289917, "learning_rate": 5.9097999999999996e-05, "loss": 0.0225, "step": 29550 }, { "epoch": 2.5975395430579966, "grad_norm": 0.3011299669742584, "learning_rate": 5.9118000000000006e-05, "loss": 0.0239, "step": 29560 }, { "epoch": 2.5984182776801408, "grad_norm": 0.3361513912677765, "learning_rate": 5.9138e-05, "loss": 0.0225, "step": 29570 }, { "epoch": 2.5992970123022845, "grad_norm": 0.20532000064849854, "learning_rate": 5.9158000000000005e-05, "loss": 0.0268, "step": 29580 }, { "epoch": 2.600175746924429, "grad_norm": 0.2454250603914261, "learning_rate": 5.9178e-05, "loss": 0.0237, "step": 29590 }, { "epoch": 2.601054481546573, "grad_norm": 0.19137358665466309, "learning_rate": 5.9198e-05, "loss": 0.0244, "step": 29600 }, { "epoch": 2.601933216168717, "grad_norm": 0.15667429566383362, "learning_rate": 5.921800000000001e-05, "loss": 0.0235, "step": 29610 }, { "epoch": 2.602811950790861, "grad_norm": 0.2573208808898926, "learning_rate": 5.9238000000000004e-05, "loss": 0.024, "step": 29620 }, { "epoch": 2.6036906854130053, "grad_norm": 0.276623010635376, "learning_rate": 5.9258e-05, "loss": 0.0246, "step": 29630 }, { "epoch": 2.6045694200351495, "grad_norm": 0.30163949728012085, "learning_rate": 5.9278e-05, "loss": 0.0257, "step": 29640 }, { "epoch": 2.6054481546572936, "grad_norm": 0.35464924573898315, "learning_rate": 5.9298e-05, "loss": 0.0249, "step": 29650 }, { "epoch": 2.606326889279438, "grad_norm": 0.22232092916965485, "learning_rate": 5.9318000000000004e-05, "loss": 0.0218, "step": 29660 }, { "epoch": 2.6072056239015815, "grad_norm": 0.21447403728961945, "learning_rate": 5.933800000000001e-05, "loss": 0.0225, "step": 29670 }, { "epoch": 2.608084358523726, "grad_norm": 0.2991069555282593, "learning_rate": 5.9358e-05, "loss": 0.0217, "step": 29680 }, { "epoch": 2.60896309314587, "grad_norm": 0.2857450544834137, "learning_rate": 5.9378e-05, "loss": 0.0237, "step": 29690 }, { "epoch": 2.609841827768014, "grad_norm": 0.17908118665218353, "learning_rate": 5.9397999999999996e-05, "loss": 0.024, "step": 29700 }, { "epoch": 2.610720562390158, "grad_norm": 0.24938873946666718, "learning_rate": 5.9418000000000006e-05, "loss": 0.0236, "step": 29710 }, { "epoch": 2.6115992970123023, "grad_norm": 0.19030120968818665, "learning_rate": 5.9438e-05, "loss": 0.0228, "step": 29720 }, { "epoch": 2.6124780316344465, "grad_norm": 0.2522960603237152, "learning_rate": 5.9458000000000006e-05, "loss": 0.0228, "step": 29730 }, { "epoch": 2.6133567662565906, "grad_norm": 0.21651019155979156, "learning_rate": 5.9478e-05, "loss": 0.0221, "step": 29740 }, { "epoch": 2.614235500878735, "grad_norm": 0.29647740721702576, "learning_rate": 5.9498e-05, "loss": 0.0241, "step": 29750 }, { "epoch": 2.6151142355008785, "grad_norm": 0.2568770945072174, "learning_rate": 5.951800000000001e-05, "loss": 0.0233, "step": 29760 }, { "epoch": 2.615992970123023, "grad_norm": 0.25758475065231323, "learning_rate": 5.9538000000000005e-05, "loss": 0.024, "step": 29770 }, { "epoch": 2.616871704745167, "grad_norm": 0.2525334358215332, "learning_rate": 5.9558e-05, "loss": 0.0257, "step": 29780 }, { "epoch": 2.617750439367311, "grad_norm": 0.2307051122188568, "learning_rate": 5.9578e-05, "loss": 0.0245, "step": 29790 }, { "epoch": 2.618629173989455, "grad_norm": 0.21386203169822693, "learning_rate": 5.9598e-05, "loss": 0.025, "step": 29800 }, { "epoch": 2.6195079086115993, "grad_norm": 0.20228539407253265, "learning_rate": 5.9618000000000004e-05, "loss": 0.026, "step": 29810 }, { "epoch": 2.6203866432337435, "grad_norm": 0.21880574524402618, "learning_rate": 5.963800000000001e-05, "loss": 0.0238, "step": 29820 }, { "epoch": 2.6212653778558876, "grad_norm": 0.2253495454788208, "learning_rate": 5.9658000000000004e-05, "loss": 0.023, "step": 29830 }, { "epoch": 2.622144112478032, "grad_norm": 0.27972331643104553, "learning_rate": 5.9678e-05, "loss": 0.0246, "step": 29840 }, { "epoch": 2.6230228471001755, "grad_norm": 0.22790464758872986, "learning_rate": 5.9698e-05, "loss": 0.0242, "step": 29850 }, { "epoch": 2.62390158172232, "grad_norm": 0.2729972004890442, "learning_rate": 5.9718000000000007e-05, "loss": 0.0249, "step": 29860 }, { "epoch": 2.624780316344464, "grad_norm": 0.288981169462204, "learning_rate": 5.9738e-05, "loss": 0.0254, "step": 29870 }, { "epoch": 2.625659050966608, "grad_norm": 0.21629875898361206, "learning_rate": 5.9758000000000006e-05, "loss": 0.0249, "step": 29880 }, { "epoch": 2.626537785588752, "grad_norm": 0.1643693745136261, "learning_rate": 5.9778e-05, "loss": 0.026, "step": 29890 }, { "epoch": 2.6274165202108963, "grad_norm": 0.24887621402740479, "learning_rate": 5.9798e-05, "loss": 0.0246, "step": 29900 }, { "epoch": 2.6282952548330405, "grad_norm": 0.19160936772823334, "learning_rate": 5.981800000000001e-05, "loss": 0.0241, "step": 29910 }, { "epoch": 2.6291739894551847, "grad_norm": 0.2921736538410187, "learning_rate": 5.9838000000000005e-05, "loss": 0.0254, "step": 29920 }, { "epoch": 2.630052724077329, "grad_norm": 0.23489633202552795, "learning_rate": 5.9858e-05, "loss": 0.0261, "step": 29930 }, { "epoch": 2.6309314586994725, "grad_norm": 0.1895064413547516, "learning_rate": 5.9878e-05, "loss": 0.0234, "step": 29940 }, { "epoch": 2.631810193321617, "grad_norm": 0.27643704414367676, "learning_rate": 5.9898e-05, "loss": 0.024, "step": 29950 }, { "epoch": 2.632688927943761, "grad_norm": 0.27418455481529236, "learning_rate": 5.9918000000000005e-05, "loss": 0.022, "step": 29960 }, { "epoch": 2.633567662565905, "grad_norm": 0.17672473192214966, "learning_rate": 5.993800000000001e-05, "loss": 0.0229, "step": 29970 }, { "epoch": 2.634446397188049, "grad_norm": 0.1736183911561966, "learning_rate": 5.9958000000000004e-05, "loss": 0.0224, "step": 29980 }, { "epoch": 2.6353251318101933, "grad_norm": 0.18858736753463745, "learning_rate": 5.9978e-05, "loss": 0.0221, "step": 29990 }, { "epoch": 2.6362038664323375, "grad_norm": 0.21524402499198914, "learning_rate": 5.9998e-05, "loss": 0.0226, "step": 30000 }, { "epoch": 535.8928571428571, "grad_norm": 0.8295404314994812, "learning_rate": 6.001800000000001e-05, "loss": 0.2067, "step": 30010 }, { "epoch": 536.0714285714286, "grad_norm": 0.6381756663322449, "learning_rate": 6.0038000000000004e-05, "loss": 0.0948, "step": 30020 }, { "epoch": 536.25, "grad_norm": 0.9255214333534241, "learning_rate": 6.0058e-05, "loss": 0.0773, "step": 30030 }, { "epoch": 536.4285714285714, "grad_norm": 0.7327773571014404, "learning_rate": 6.0078e-05, "loss": 0.063, "step": 30040 }, { "epoch": 536.6071428571429, "grad_norm": 0.6379172205924988, "learning_rate": 6.0098e-05, "loss": 0.0584, "step": 30050 }, { "epoch": 536.7857142857143, "grad_norm": 0.49742043018341064, "learning_rate": 6.011800000000001e-05, "loss": 0.0534, "step": 30060 }, { "epoch": 536.9642857142857, "grad_norm": 0.5963789820671082, "learning_rate": 6.0138000000000006e-05, "loss": 0.0542, "step": 30070 }, { "epoch": 537.1428571428571, "grad_norm": 0.3682718276977539, "learning_rate": 6.0158e-05, "loss": 0.0441, "step": 30080 }, { "epoch": 537.3214285714286, "grad_norm": 0.3341223895549774, "learning_rate": 6.0178e-05, "loss": 0.0439, "step": 30090 }, { "epoch": 537.5, "grad_norm": 0.3379054665565491, "learning_rate": 6.0198e-05, "loss": 0.0446, "step": 30100 }, { "epoch": 537.6785714285714, "grad_norm": 0.3546058237552643, "learning_rate": 6.0218000000000005e-05, "loss": 0.0384, "step": 30110 }, { "epoch": 537.8571428571429, "grad_norm": 0.3468504250049591, "learning_rate": 6.023800000000001e-05, "loss": 0.0358, "step": 30120 }, { "epoch": 538.0357142857143, "grad_norm": 0.3291664123535156, "learning_rate": 6.0258000000000005e-05, "loss": 0.0388, "step": 30130 }, { "epoch": 538.2142857142857, "grad_norm": 0.291990727186203, "learning_rate": 6.0278e-05, "loss": 0.0339, "step": 30140 }, { "epoch": 538.3928571428571, "grad_norm": 0.34485387802124023, "learning_rate": 6.0298e-05, "loss": 0.0347, "step": 30150 }, { "epoch": 538.5714285714286, "grad_norm": 0.3350243866443634, "learning_rate": 6.031800000000001e-05, "loss": 0.0307, "step": 30160 }, { "epoch": 538.75, "grad_norm": 0.36345741152763367, "learning_rate": 6.0338000000000004e-05, "loss": 0.0309, "step": 30170 }, { "epoch": 538.9285714285714, "grad_norm": 0.2263478934764862, "learning_rate": 6.0358e-05, "loss": 0.0309, "step": 30180 }, { "epoch": 539.1071428571429, "grad_norm": 0.257233589887619, "learning_rate": 6.0378000000000004e-05, "loss": 0.0251, "step": 30190 }, { "epoch": 539.2857142857143, "grad_norm": 0.3484329879283905, "learning_rate": 6.0398e-05, "loss": 0.0282, "step": 30200 }, { "epoch": 539.4642857142857, "grad_norm": 0.3121212124824524, "learning_rate": 6.041800000000001e-05, "loss": 0.0303, "step": 30210 }, { "epoch": 539.6428571428571, "grad_norm": 0.3382284641265869, "learning_rate": 6.0438000000000006e-05, "loss": 0.0258, "step": 30220 }, { "epoch": 539.8214285714286, "grad_norm": 0.5041925311088562, "learning_rate": 6.0458e-05, "loss": 0.0286, "step": 30230 }, { "epoch": 540.0, "grad_norm": 0.23477523028850555, "learning_rate": 6.0478e-05, "loss": 0.0282, "step": 30240 }, { "epoch": 540.1785714285714, "grad_norm": 0.34757229685783386, "learning_rate": 6.0497999999999996e-05, "loss": 0.0271, "step": 30250 }, { "epoch": 540.3571428571429, "grad_norm": 0.2760128080844879, "learning_rate": 6.0518000000000006e-05, "loss": 0.0232, "step": 30260 }, { "epoch": 540.5357142857143, "grad_norm": 0.20670850574970245, "learning_rate": 6.053800000000001e-05, "loss": 0.0251, "step": 30270 }, { "epoch": 540.7142857142857, "grad_norm": 0.3189743459224701, "learning_rate": 6.0558000000000005e-05, "loss": 0.0235, "step": 30280 }, { "epoch": 540.8928571428571, "grad_norm": 0.2595438063144684, "learning_rate": 6.0578e-05, "loss": 0.0239, "step": 30290 }, { "epoch": 541.0714285714286, "grad_norm": 0.26622509956359863, "learning_rate": 6.0598e-05, "loss": 0.0248, "step": 30300 }, { "epoch": 541.25, "grad_norm": 0.3851238191127777, "learning_rate": 6.061800000000001e-05, "loss": 0.0223, "step": 30310 }, { "epoch": 541.4285714285714, "grad_norm": 0.3310007154941559, "learning_rate": 6.0638000000000005e-05, "loss": 0.0221, "step": 30320 }, { "epoch": 541.6071428571429, "grad_norm": 0.2931680679321289, "learning_rate": 6.0658e-05, "loss": 0.0254, "step": 30330 }, { "epoch": 541.7857142857143, "grad_norm": 0.28556278347969055, "learning_rate": 6.0678000000000004e-05, "loss": 0.0226, "step": 30340 }, { "epoch": 541.9642857142857, "grad_norm": 0.321854829788208, "learning_rate": 6.0698e-05, "loss": 0.0228, "step": 30350 }, { "epoch": 542.1428571428571, "grad_norm": 0.28416070342063904, "learning_rate": 6.071800000000001e-05, "loss": 0.0218, "step": 30360 }, { "epoch": 542.3214285714286, "grad_norm": 0.25775235891342163, "learning_rate": 6.073800000000001e-05, "loss": 0.022, "step": 30370 }, { "epoch": 542.5, "grad_norm": 0.27598392963409424, "learning_rate": 6.0758000000000003e-05, "loss": 0.0203, "step": 30380 }, { "epoch": 542.6785714285714, "grad_norm": 0.4076850712299347, "learning_rate": 6.0778e-05, "loss": 0.0225, "step": 30390 }, { "epoch": 542.8571428571429, "grad_norm": 0.2831838130950928, "learning_rate": 6.0797999999999996e-05, "loss": 0.0267, "step": 30400 }, { "epoch": 543.0357142857143, "grad_norm": 0.25233373045921326, "learning_rate": 6.0818000000000006e-05, "loss": 0.0234, "step": 30410 }, { "epoch": 543.2142857142857, "grad_norm": 0.21917417645454407, "learning_rate": 6.083800000000001e-05, "loss": 0.0215, "step": 30420 }, { "epoch": 543.3928571428571, "grad_norm": 0.18100547790527344, "learning_rate": 6.0858000000000006e-05, "loss": 0.0189, "step": 30430 }, { "epoch": 543.5714285714286, "grad_norm": 0.27086183428764343, "learning_rate": 6.0878e-05, "loss": 0.0185, "step": 30440 }, { "epoch": 543.75, "grad_norm": 0.430787056684494, "learning_rate": 6.0898e-05, "loss": 0.0195, "step": 30450 }, { "epoch": 543.9285714285714, "grad_norm": 0.23886050283908844, "learning_rate": 6.091800000000001e-05, "loss": 0.019, "step": 30460 }, { "epoch": 544.1071428571429, "grad_norm": 0.2139621078968048, "learning_rate": 6.0938000000000005e-05, "loss": 0.017, "step": 30470 }, { "epoch": 544.2857142857143, "grad_norm": 0.303571492433548, "learning_rate": 6.0958e-05, "loss": 0.0194, "step": 30480 }, { "epoch": 544.4642857142857, "grad_norm": 0.32088571786880493, "learning_rate": 6.0978000000000005e-05, "loss": 0.0192, "step": 30490 }, { "epoch": 544.6428571428571, "grad_norm": 0.20978201925754547, "learning_rate": 6.0998e-05, "loss": 0.0178, "step": 30500 }, { "epoch": 544.8214285714286, "grad_norm": 0.22928866744041443, "learning_rate": 6.1018e-05, "loss": 0.0182, "step": 30510 }, { "epoch": 545.0, "grad_norm": 0.33176538348197937, "learning_rate": 6.103800000000001e-05, "loss": 0.0198, "step": 30520 }, { "epoch": 545.1785714285714, "grad_norm": 0.275335431098938, "learning_rate": 6.105800000000001e-05, "loss": 0.0183, "step": 30530 }, { "epoch": 545.3571428571429, "grad_norm": 0.2602602541446686, "learning_rate": 6.1078e-05, "loss": 0.0212, "step": 30540 }, { "epoch": 545.5357142857143, "grad_norm": 0.2758956551551819, "learning_rate": 6.1098e-05, "loss": 0.0189, "step": 30550 }, { "epoch": 545.7142857142857, "grad_norm": 0.211493581533432, "learning_rate": 6.111799999999999e-05, "loss": 0.0201, "step": 30560 }, { "epoch": 545.8928571428571, "grad_norm": 0.32160094380378723, "learning_rate": 6.113800000000001e-05, "loss": 0.0205, "step": 30570 }, { "epoch": 546.0714285714286, "grad_norm": 0.2825220823287964, "learning_rate": 6.1158e-05, "loss": 0.0179, "step": 30580 }, { "epoch": 546.25, "grad_norm": 0.2961184084415436, "learning_rate": 6.1178e-05, "loss": 0.0204, "step": 30590 }, { "epoch": 546.4285714285714, "grad_norm": 0.15553322434425354, "learning_rate": 6.1198e-05, "loss": 0.0182, "step": 30600 }, { "epoch": 546.6071428571429, "grad_norm": 0.28147992491722107, "learning_rate": 6.1218e-05, "loss": 0.0195, "step": 30610 }, { "epoch": 546.7857142857143, "grad_norm": 0.2374066263437271, "learning_rate": 6.123800000000001e-05, "loss": 0.0176, "step": 30620 }, { "epoch": 546.9642857142857, "grad_norm": 0.1862960010766983, "learning_rate": 6.1258e-05, "loss": 0.0166, "step": 30630 }, { "epoch": 547.1428571428571, "grad_norm": 0.4425911605358124, "learning_rate": 6.1278e-05, "loss": 0.0175, "step": 30640 }, { "epoch": 547.3214285714286, "grad_norm": 0.3539193570613861, "learning_rate": 6.1298e-05, "loss": 0.0168, "step": 30650 }, { "epoch": 547.5, "grad_norm": 0.2806050777435303, "learning_rate": 6.1318e-05, "loss": 0.0173, "step": 30660 }, { "epoch": 547.6785714285714, "grad_norm": 0.2564849555492401, "learning_rate": 6.1338e-05, "loss": 0.0186, "step": 30670 }, { "epoch": 547.8571428571429, "grad_norm": 0.257759690284729, "learning_rate": 6.1358e-05, "loss": 0.0203, "step": 30680 }, { "epoch": 548.0357142857143, "grad_norm": 0.43082937598228455, "learning_rate": 6.137800000000001e-05, "loss": 0.0182, "step": 30690 }, { "epoch": 548.2142857142857, "grad_norm": 0.24106952548027039, "learning_rate": 6.1398e-05, "loss": 0.0193, "step": 30700 }, { "epoch": 548.3928571428571, "grad_norm": 0.19925588369369507, "learning_rate": 6.1418e-05, "loss": 0.0193, "step": 30710 }, { "epoch": 548.5714285714286, "grad_norm": 0.3220268785953522, "learning_rate": 6.1438e-05, "loss": 0.0185, "step": 30720 }, { "epoch": 548.75, "grad_norm": 0.37179991602897644, "learning_rate": 6.145800000000001e-05, "loss": 0.0215, "step": 30730 }, { "epoch": 548.9285714285714, "grad_norm": 0.20839427411556244, "learning_rate": 6.1478e-05, "loss": 0.0177, "step": 30740 }, { "epoch": 549.1071428571429, "grad_norm": 0.2357696294784546, "learning_rate": 6.1498e-05, "loss": 0.0195, "step": 30750 }, { "epoch": 549.2857142857143, "grad_norm": 0.18023119866847992, "learning_rate": 6.1518e-05, "loss": 0.0188, "step": 30760 }, { "epoch": 549.4642857142857, "grad_norm": 0.32350361347198486, "learning_rate": 6.1538e-05, "loss": 0.0171, "step": 30770 }, { "epoch": 549.6428571428571, "grad_norm": 0.34990981221199036, "learning_rate": 6.155800000000001e-05, "loss": 0.0177, "step": 30780 }, { "epoch": 549.8214285714286, "grad_norm": 0.29821670055389404, "learning_rate": 6.1578e-05, "loss": 0.0173, "step": 30790 }, { "epoch": 550.0, "grad_norm": 0.21906858682632446, "learning_rate": 6.1598e-05, "loss": 0.0173, "step": 30800 }, { "epoch": 550.1785714285714, "grad_norm": 0.26966288685798645, "learning_rate": 6.161799999999999e-05, "loss": 0.0158, "step": 30810 }, { "epoch": 550.3571428571429, "grad_norm": 0.2308344841003418, "learning_rate": 6.163800000000001e-05, "loss": 0.0173, "step": 30820 }, { "epoch": 550.5357142857143, "grad_norm": 0.22460833191871643, "learning_rate": 6.1658e-05, "loss": 0.0171, "step": 30830 }, { "epoch": 550.7142857142857, "grad_norm": 0.23826542496681213, "learning_rate": 6.1678e-05, "loss": 0.0147, "step": 30840 }, { "epoch": 550.8928571428571, "grad_norm": 0.20007003843784332, "learning_rate": 6.1698e-05, "loss": 0.0148, "step": 30850 }, { "epoch": 551.0714285714286, "grad_norm": 0.20566034317016602, "learning_rate": 6.1718e-05, "loss": 0.0159, "step": 30860 }, { "epoch": 551.25, "grad_norm": 0.2738024592399597, "learning_rate": 6.173800000000001e-05, "loss": 0.0138, "step": 30870 }, { "epoch": 551.4285714285714, "grad_norm": 0.22104357182979584, "learning_rate": 6.1758e-05, "loss": 0.0161, "step": 30880 }, { "epoch": 551.6071428571429, "grad_norm": 0.27797460556030273, "learning_rate": 6.1778e-05, "loss": 0.0175, "step": 30890 }, { "epoch": 551.7857142857143, "grad_norm": 0.272727906703949, "learning_rate": 6.179800000000001e-05, "loss": 0.0198, "step": 30900 }, { "epoch": 551.9642857142857, "grad_norm": 0.28774604201316833, "learning_rate": 6.1818e-05, "loss": 0.0143, "step": 30910 }, { "epoch": 552.1428571428571, "grad_norm": 0.24690589308738708, "learning_rate": 6.183800000000001e-05, "loss": 0.0155, "step": 30920 }, { "epoch": 552.3214285714286, "grad_norm": 0.28828904032707214, "learning_rate": 6.1858e-05, "loss": 0.0163, "step": 30930 }, { "epoch": 552.5, "grad_norm": 0.3980543613433838, "learning_rate": 6.1878e-05, "loss": 0.0187, "step": 30940 }, { "epoch": 552.6785714285714, "grad_norm": 0.3805142939090729, "learning_rate": 6.1898e-05, "loss": 0.0194, "step": 30950 }, { "epoch": 552.8571428571429, "grad_norm": 0.25523197650909424, "learning_rate": 6.1918e-05, "loss": 0.0194, "step": 30960 }, { "epoch": 553.0357142857143, "grad_norm": 0.34959420561790466, "learning_rate": 6.1938e-05, "loss": 0.0148, "step": 30970 }, { "epoch": 553.2142857142857, "grad_norm": 0.2675730884075165, "learning_rate": 6.1958e-05, "loss": 0.0164, "step": 30980 }, { "epoch": 553.3928571428571, "grad_norm": 0.2756670415401459, "learning_rate": 6.197800000000001e-05, "loss": 0.0177, "step": 30990 }, { "epoch": 553.5714285714286, "grad_norm": 0.2620287835597992, "learning_rate": 6.1998e-05, "loss": 0.0164, "step": 31000 }, { "epoch": 553.75, "grad_norm": 0.21860894560813904, "learning_rate": 6.2018e-05, "loss": 0.0175, "step": 31010 }, { "epoch": 553.9285714285714, "grad_norm": 0.24189580976963043, "learning_rate": 6.2038e-05, "loss": 0.0164, "step": 31020 }, { "epoch": 554.1071428571429, "grad_norm": 0.3334757685661316, "learning_rate": 6.205800000000001e-05, "loss": 0.0168, "step": 31030 }, { "epoch": 554.2857142857143, "grad_norm": 0.31243929266929626, "learning_rate": 6.2078e-05, "loss": 0.0147, "step": 31040 }, { "epoch": 554.4642857142857, "grad_norm": 0.1898679882287979, "learning_rate": 6.2098e-05, "loss": 0.0167, "step": 31050 }, { "epoch": 554.6428571428571, "grad_norm": 0.2883601188659668, "learning_rate": 6.2118e-05, "loss": 0.0187, "step": 31060 }, { "epoch": 554.8214285714286, "grad_norm": 0.19006390869617462, "learning_rate": 6.213800000000001e-05, "loss": 0.0143, "step": 31070 }, { "epoch": 555.0, "grad_norm": 0.3040248453617096, "learning_rate": 6.215800000000001e-05, "loss": 0.019, "step": 31080 }, { "epoch": 555.1785714285714, "grad_norm": 0.31413981318473816, "learning_rate": 6.2178e-05, "loss": 0.0157, "step": 31090 }, { "epoch": 555.3571428571429, "grad_norm": 0.41140228509902954, "learning_rate": 6.2198e-05, "loss": 0.0175, "step": 31100 }, { "epoch": 555.5357142857143, "grad_norm": 0.3243526518344879, "learning_rate": 6.221799999999999e-05, "loss": 0.0167, "step": 31110 }, { "epoch": 555.7142857142857, "grad_norm": 0.20840394496917725, "learning_rate": 6.223800000000001e-05, "loss": 0.0188, "step": 31120 }, { "epoch": 555.8928571428571, "grad_norm": 0.23771779239177704, "learning_rate": 6.2258e-05, "loss": 0.0156, "step": 31130 }, { "epoch": 556.0714285714286, "grad_norm": 0.17054596543312073, "learning_rate": 6.2278e-05, "loss": 0.0148, "step": 31140 }, { "epoch": 556.25, "grad_norm": 0.15231561660766602, "learning_rate": 6.2298e-05, "loss": 0.0149, "step": 31150 }, { "epoch": 556.4285714285714, "grad_norm": 0.22953499853610992, "learning_rate": 6.2318e-05, "loss": 0.0149, "step": 31160 }, { "epoch": 556.6071428571429, "grad_norm": 0.2876657247543335, "learning_rate": 6.233800000000001e-05, "loss": 0.0157, "step": 31170 }, { "epoch": 556.7857142857143, "grad_norm": 0.3009263873100281, "learning_rate": 6.2358e-05, "loss": 0.0164, "step": 31180 }, { "epoch": 556.9642857142857, "grad_norm": 0.28852519392967224, "learning_rate": 6.2378e-05, "loss": 0.0196, "step": 31190 }, { "epoch": 557.1428571428571, "grad_norm": 0.2886342406272888, "learning_rate": 6.2398e-05, "loss": 0.0158, "step": 31200 }, { "epoch": 557.3214285714286, "grad_norm": 0.27074167132377625, "learning_rate": 6.2418e-05, "loss": 0.0157, "step": 31210 }, { "epoch": 557.5, "grad_norm": 0.382783979177475, "learning_rate": 6.2438e-05, "loss": 0.0178, "step": 31220 }, { "epoch": 557.6785714285714, "grad_norm": 0.2499755471944809, "learning_rate": 6.2458e-05, "loss": 0.0153, "step": 31230 }, { "epoch": 557.8571428571429, "grad_norm": 0.30805525183677673, "learning_rate": 6.247800000000001e-05, "loss": 0.0147, "step": 31240 }, { "epoch": 558.0357142857143, "grad_norm": 0.26752278208732605, "learning_rate": 6.2498e-05, "loss": 0.016, "step": 31250 }, { "epoch": 558.2142857142857, "grad_norm": 0.20478519797325134, "learning_rate": 6.2518e-05, "loss": 0.0147, "step": 31260 }, { "epoch": 558.3928571428571, "grad_norm": 0.24120016396045685, "learning_rate": 6.2538e-05, "loss": 0.0165, "step": 31270 }, { "epoch": 558.5714285714286, "grad_norm": 0.17173568904399872, "learning_rate": 6.2558e-05, "loss": 0.0149, "step": 31280 }, { "epoch": 558.75, "grad_norm": 0.1834830790758133, "learning_rate": 6.257800000000001e-05, "loss": 0.0164, "step": 31290 }, { "epoch": 558.9285714285714, "grad_norm": 0.24880382418632507, "learning_rate": 6.2598e-05, "loss": 0.0143, "step": 31300 }, { "epoch": 559.1071428571429, "grad_norm": 0.1804461032152176, "learning_rate": 6.2618e-05, "loss": 0.0173, "step": 31310 }, { "epoch": 559.2857142857143, "grad_norm": 0.25798383355140686, "learning_rate": 6.2638e-05, "loss": 0.0164, "step": 31320 }, { "epoch": 559.4642857142857, "grad_norm": 0.25129345059394836, "learning_rate": 6.265800000000001e-05, "loss": 0.019, "step": 31330 }, { "epoch": 559.6428571428571, "grad_norm": 0.23374931514263153, "learning_rate": 6.2678e-05, "loss": 0.0179, "step": 31340 }, { "epoch": 559.8214285714286, "grad_norm": 0.2643740773200989, "learning_rate": 6.2698e-05, "loss": 0.0156, "step": 31350 }, { "epoch": 560.0, "grad_norm": 0.3175007700920105, "learning_rate": 6.2718e-05, "loss": 0.0145, "step": 31360 }, { "epoch": 560.1785714285714, "grad_norm": 0.30858489871025085, "learning_rate": 6.273800000000001e-05, "loss": 0.0185, "step": 31370 }, { "epoch": 560.3571428571429, "grad_norm": 0.23640286922454834, "learning_rate": 6.275800000000001e-05, "loss": 0.0137, "step": 31380 }, { "epoch": 560.5357142857143, "grad_norm": 0.22879087924957275, "learning_rate": 6.2778e-05, "loss": 0.0173, "step": 31390 }, { "epoch": 560.7142857142857, "grad_norm": 0.26281845569610596, "learning_rate": 6.2798e-05, "loss": 0.0165, "step": 31400 }, { "epoch": 560.8928571428571, "grad_norm": 0.2946714758872986, "learning_rate": 6.2818e-05, "loss": 0.0181, "step": 31410 }, { "epoch": 561.0714285714286, "grad_norm": 0.2160150706768036, "learning_rate": 6.283800000000001e-05, "loss": 0.0147, "step": 31420 }, { "epoch": 561.25, "grad_norm": 0.25680509209632874, "learning_rate": 6.2858e-05, "loss": 0.0158, "step": 31430 }, { "epoch": 561.4285714285714, "grad_norm": 0.2221389263868332, "learning_rate": 6.2878e-05, "loss": 0.0151, "step": 31440 }, { "epoch": 561.6071428571429, "grad_norm": 0.3929239809513092, "learning_rate": 6.2898e-05, "loss": 0.0164, "step": 31450 }, { "epoch": 561.7857142857143, "grad_norm": 0.2578808665275574, "learning_rate": 6.2918e-05, "loss": 0.0131, "step": 31460 }, { "epoch": 561.9642857142857, "grad_norm": 0.25252604484558105, "learning_rate": 6.293800000000001e-05, "loss": 0.0145, "step": 31470 }, { "epoch": 562.1428571428571, "grad_norm": 0.2148098647594452, "learning_rate": 6.2958e-05, "loss": 0.0157, "step": 31480 }, { "epoch": 562.3214285714286, "grad_norm": 0.24088717997074127, "learning_rate": 6.2978e-05, "loss": 0.0142, "step": 31490 }, { "epoch": 562.5, "grad_norm": 0.22890691459178925, "learning_rate": 6.2998e-05, "loss": 0.015, "step": 31500 }, { "epoch": 562.6785714285714, "grad_norm": 0.23063288629055023, "learning_rate": 6.3018e-05, "loss": 0.0147, "step": 31510 }, { "epoch": 562.8571428571429, "grad_norm": 0.26395007967948914, "learning_rate": 6.3038e-05, "loss": 0.0139, "step": 31520 }, { "epoch": 563.0357142857143, "grad_norm": 0.2215006798505783, "learning_rate": 6.3058e-05, "loss": 0.015, "step": 31530 }, { "epoch": 563.2142857142857, "grad_norm": 0.25293275713920593, "learning_rate": 6.307800000000001e-05, "loss": 0.0138, "step": 31540 }, { "epoch": 563.3928571428571, "grad_norm": 0.22684246301651, "learning_rate": 6.3098e-05, "loss": 0.0166, "step": 31550 }, { "epoch": 563.5714285714286, "grad_norm": 0.28450748324394226, "learning_rate": 6.3118e-05, "loss": 0.0145, "step": 31560 }, { "epoch": 563.75, "grad_norm": 0.22888632118701935, "learning_rate": 6.3138e-05, "loss": 0.0146, "step": 31570 }, { "epoch": 563.9285714285714, "grad_norm": 0.25990498065948486, "learning_rate": 6.315800000000001e-05, "loss": 0.0144, "step": 31580 }, { "epoch": 564.1071428571429, "grad_norm": 0.23908282816410065, "learning_rate": 6.3178e-05, "loss": 0.0145, "step": 31590 }, { "epoch": 564.2857142857143, "grad_norm": 0.21010041236877441, "learning_rate": 6.3198e-05, "loss": 0.0132, "step": 31600 }, { "epoch": 564.4642857142857, "grad_norm": 0.28477105498313904, "learning_rate": 6.3218e-05, "loss": 0.0145, "step": 31610 }, { "epoch": 564.6428571428571, "grad_norm": 0.28544583916664124, "learning_rate": 6.3238e-05, "loss": 0.0147, "step": 31620 }, { "epoch": 564.8214285714286, "grad_norm": 0.33127614855766296, "learning_rate": 6.325800000000001e-05, "loss": 0.0162, "step": 31630 }, { "epoch": 565.0, "grad_norm": 0.1811511218547821, "learning_rate": 6.3278e-05, "loss": 0.0144, "step": 31640 }, { "epoch": 565.1785714285714, "grad_norm": 0.24207283556461334, "learning_rate": 6.3298e-05, "loss": 0.0161, "step": 31650 }, { "epoch": 565.3571428571429, "grad_norm": 0.2631942927837372, "learning_rate": 6.3318e-05, "loss": 0.0132, "step": 31660 }, { "epoch": 565.5357142857143, "grad_norm": 0.2743905186653137, "learning_rate": 6.333800000000001e-05, "loss": 0.014, "step": 31670 }, { "epoch": 565.7142857142857, "grad_norm": 0.3108574450016022, "learning_rate": 6.335800000000001e-05, "loss": 0.0145, "step": 31680 }, { "epoch": 565.8928571428571, "grad_norm": 0.3611335754394531, "learning_rate": 6.3378e-05, "loss": 0.0132, "step": 31690 }, { "epoch": 566.0714285714286, "grad_norm": 0.29785647988319397, "learning_rate": 6.3398e-05, "loss": 0.0137, "step": 31700 }, { "epoch": 566.25, "grad_norm": 0.2329464703798294, "learning_rate": 6.3418e-05, "loss": 0.014, "step": 31710 }, { "epoch": 566.4285714285714, "grad_norm": 0.22924591600894928, "learning_rate": 6.343800000000001e-05, "loss": 0.0133, "step": 31720 }, { "epoch": 566.6071428571429, "grad_norm": 0.23536041378974915, "learning_rate": 6.3458e-05, "loss": 0.0137, "step": 31730 }, { "epoch": 566.7857142857143, "grad_norm": 0.16819316148757935, "learning_rate": 6.3478e-05, "loss": 0.0164, "step": 31740 }, { "epoch": 566.9642857142857, "grad_norm": 0.1726657599210739, "learning_rate": 6.349800000000001e-05, "loss": 0.0164, "step": 31750 }, { "epoch": 567.1428571428571, "grad_norm": 0.2831055819988251, "learning_rate": 6.3518e-05, "loss": 0.013, "step": 31760 }, { "epoch": 567.3214285714286, "grad_norm": 0.21098649501800537, "learning_rate": 6.3538e-05, "loss": 0.0146, "step": 31770 }, { "epoch": 567.5, "grad_norm": 0.2277112603187561, "learning_rate": 6.3558e-05, "loss": 0.0159, "step": 31780 }, { "epoch": 567.6785714285714, "grad_norm": 0.267535924911499, "learning_rate": 6.357800000000001e-05, "loss": 0.018, "step": 31790 }, { "epoch": 567.8571428571429, "grad_norm": 0.36105528473854065, "learning_rate": 6.3598e-05, "loss": 0.014, "step": 31800 }, { "epoch": 568.0357142857143, "grad_norm": 0.22820217907428741, "learning_rate": 6.3618e-05, "loss": 0.0144, "step": 31810 }, { "epoch": 568.2142857142857, "grad_norm": 0.23887357115745544, "learning_rate": 6.3638e-05, "loss": 0.0151, "step": 31820 }, { "epoch": 568.3928571428571, "grad_norm": 0.2586442828178406, "learning_rate": 6.3658e-05, "loss": 0.0162, "step": 31830 }, { "epoch": 568.5714285714286, "grad_norm": 0.21184217929840088, "learning_rate": 6.367800000000001e-05, "loss": 0.0159, "step": 31840 }, { "epoch": 568.75, "grad_norm": 0.2237294465303421, "learning_rate": 6.3698e-05, "loss": 0.0161, "step": 31850 }, { "epoch": 568.9285714285714, "grad_norm": 0.2742435932159424, "learning_rate": 6.3718e-05, "loss": 0.0166, "step": 31860 }, { "epoch": 569.1071428571429, "grad_norm": 0.26743000745773315, "learning_rate": 6.373799999999999e-05, "loss": 0.0142, "step": 31870 }, { "epoch": 569.2857142857143, "grad_norm": 0.3324533700942993, "learning_rate": 6.375800000000001e-05, "loss": 0.0167, "step": 31880 }, { "epoch": 569.4642857142857, "grad_norm": 0.24467164278030396, "learning_rate": 6.3778e-05, "loss": 0.0168, "step": 31890 }, { "epoch": 569.6428571428571, "grad_norm": 0.19232843816280365, "learning_rate": 6.3798e-05, "loss": 0.0157, "step": 31900 }, { "epoch": 569.8214285714286, "grad_norm": 0.27459418773651123, "learning_rate": 6.3818e-05, "loss": 0.0158, "step": 31910 }, { "epoch": 570.0, "grad_norm": 0.17561480402946472, "learning_rate": 6.3838e-05, "loss": 0.0122, "step": 31920 }, { "epoch": 570.1785714285714, "grad_norm": 0.1646227240562439, "learning_rate": 6.385800000000001e-05, "loss": 0.0128, "step": 31930 }, { "epoch": 570.3571428571429, "grad_norm": 0.15148717164993286, "learning_rate": 6.3878e-05, "loss": 0.0145, "step": 31940 }, { "epoch": 570.5357142857143, "grad_norm": 0.16599619388580322, "learning_rate": 6.3898e-05, "loss": 0.0181, "step": 31950 }, { "epoch": 570.7142857142857, "grad_norm": 0.18912053108215332, "learning_rate": 6.3918e-05, "loss": 0.0133, "step": 31960 }, { "epoch": 570.8928571428571, "grad_norm": 0.2720184326171875, "learning_rate": 6.3938e-05, "loss": 0.0158, "step": 31970 }, { "epoch": 571.0714285714286, "grad_norm": 0.21616306900978088, "learning_rate": 6.395800000000001e-05, "loss": 0.0144, "step": 31980 }, { "epoch": 571.25, "grad_norm": 0.2618716061115265, "learning_rate": 6.3978e-05, "loss": 0.0133, "step": 31990 }, { "epoch": 571.4285714285714, "grad_norm": 0.27495405077934265, "learning_rate": 6.3998e-05, "loss": 0.0125, "step": 32000 }, { "epoch": 571.6071428571429, "grad_norm": 0.18436101078987122, "learning_rate": 6.4018e-05, "loss": 0.0144, "step": 32010 }, { "epoch": 571.7857142857143, "grad_norm": 0.25347471237182617, "learning_rate": 6.4038e-05, "loss": 0.0154, "step": 32020 }, { "epoch": 571.9642857142857, "grad_norm": 0.23195503652095795, "learning_rate": 6.4058e-05, "loss": 0.0156, "step": 32030 }, { "epoch": 572.1428571428571, "grad_norm": 0.23730112612247467, "learning_rate": 6.4078e-05, "loss": 0.0145, "step": 32040 }, { "epoch": 572.3214285714286, "grad_norm": 0.22636431455612183, "learning_rate": 6.409800000000001e-05, "loss": 0.0142, "step": 32050 }, { "epoch": 572.5, "grad_norm": 0.21586433053016663, "learning_rate": 6.4118e-05, "loss": 0.0129, "step": 32060 }, { "epoch": 572.6785714285714, "grad_norm": 0.27406108379364014, "learning_rate": 6.4138e-05, "loss": 0.0135, "step": 32070 }, { "epoch": 572.8571428571429, "grad_norm": 0.23142433166503906, "learning_rate": 6.4158e-05, "loss": 0.0143, "step": 32080 }, { "epoch": 573.0357142857143, "grad_norm": 0.20073240995407104, "learning_rate": 6.417800000000001e-05, "loss": 0.0141, "step": 32090 }, { "epoch": 573.2142857142857, "grad_norm": 0.28513556718826294, "learning_rate": 6.4198e-05, "loss": 0.0155, "step": 32100 }, { "epoch": 573.3928571428571, "grad_norm": 0.22574909031391144, "learning_rate": 6.4218e-05, "loss": 0.0153, "step": 32110 }, { "epoch": 573.5714285714286, "grad_norm": 0.24241489171981812, "learning_rate": 6.4238e-05, "loss": 0.0151, "step": 32120 }, { "epoch": 573.75, "grad_norm": 0.2502438724040985, "learning_rate": 6.425800000000001e-05, "loss": 0.0163, "step": 32130 }, { "epoch": 573.9285714285714, "grad_norm": 0.2843800187110901, "learning_rate": 6.427800000000001e-05, "loss": 0.0154, "step": 32140 }, { "epoch": 574.1071428571429, "grad_norm": 0.2418842762708664, "learning_rate": 6.4298e-05, "loss": 0.0147, "step": 32150 }, { "epoch": 574.2857142857143, "grad_norm": 0.2621355950832367, "learning_rate": 6.4318e-05, "loss": 0.0162, "step": 32160 }, { "epoch": 574.4642857142857, "grad_norm": 0.19287893176078796, "learning_rate": 6.433799999999999e-05, "loss": 0.0135, "step": 32170 }, { "epoch": 574.6428571428571, "grad_norm": 0.14918416738510132, "learning_rate": 6.435800000000001e-05, "loss": 0.0138, "step": 32180 }, { "epoch": 574.8214285714286, "grad_norm": 0.1678963601589203, "learning_rate": 6.4378e-05, "loss": 0.0138, "step": 32190 }, { "epoch": 575.0, "grad_norm": 0.19409194588661194, "learning_rate": 6.4398e-05, "loss": 0.0143, "step": 32200 }, { "epoch": 575.1785714285714, "grad_norm": 0.45534124970436096, "learning_rate": 6.4418e-05, "loss": 0.0138, "step": 32210 }, { "epoch": 575.3571428571429, "grad_norm": 0.1797991544008255, "learning_rate": 6.4438e-05, "loss": 0.0163, "step": 32220 }, { "epoch": 575.5357142857143, "grad_norm": 0.24596107006072998, "learning_rate": 6.445800000000001e-05, "loss": 0.0157, "step": 32230 }, { "epoch": 575.7142857142857, "grad_norm": 0.12887056171894073, "learning_rate": 6.4478e-05, "loss": 0.0147, "step": 32240 }, { "epoch": 575.8928571428571, "grad_norm": 0.22241422533988953, "learning_rate": 6.4498e-05, "loss": 0.0142, "step": 32250 }, { "epoch": 576.0714285714286, "grad_norm": 0.23547980189323425, "learning_rate": 6.4518e-05, "loss": 0.0141, "step": 32260 }, { "epoch": 576.25, "grad_norm": 0.1589604914188385, "learning_rate": 6.4538e-05, "loss": 0.014, "step": 32270 }, { "epoch": 576.4285714285714, "grad_norm": 0.26177650690078735, "learning_rate": 6.4558e-05, "loss": 0.014, "step": 32280 }, { "epoch": 576.6071428571429, "grad_norm": 0.20755338668823242, "learning_rate": 6.4578e-05, "loss": 0.0141, "step": 32290 }, { "epoch": 576.7857142857143, "grad_norm": 0.20666837692260742, "learning_rate": 6.459800000000001e-05, "loss": 0.0149, "step": 32300 }, { "epoch": 576.9642857142857, "grad_norm": 0.4009536802768707, "learning_rate": 6.4618e-05, "loss": 0.017, "step": 32310 }, { "epoch": 577.1428571428571, "grad_norm": 0.2630116939544678, "learning_rate": 6.4638e-05, "loss": 0.0168, "step": 32320 }, { "epoch": 577.3214285714286, "grad_norm": 0.2525438070297241, "learning_rate": 6.4658e-05, "loss": 0.0145, "step": 32330 }, { "epoch": 577.5, "grad_norm": 0.2826937139034271, "learning_rate": 6.4678e-05, "loss": 0.0168, "step": 32340 }, { "epoch": 577.6785714285714, "grad_norm": 0.3092265725135803, "learning_rate": 6.469800000000001e-05, "loss": 0.0185, "step": 32350 }, { "epoch": 577.8571428571429, "grad_norm": 0.31710970401763916, "learning_rate": 6.4718e-05, "loss": 0.0166, "step": 32360 }, { "epoch": 578.0357142857143, "grad_norm": 0.26587265729904175, "learning_rate": 6.4738e-05, "loss": 0.0152, "step": 32370 }, { "epoch": 578.2142857142857, "grad_norm": 0.32053208351135254, "learning_rate": 6.4758e-05, "loss": 0.0172, "step": 32380 }, { "epoch": 578.3928571428571, "grad_norm": 0.2190794050693512, "learning_rate": 6.477800000000001e-05, "loss": 0.0158, "step": 32390 }, { "epoch": 578.5714285714286, "grad_norm": 0.2289978712797165, "learning_rate": 6.4798e-05, "loss": 0.0164, "step": 32400 }, { "epoch": 578.75, "grad_norm": 0.25581109523773193, "learning_rate": 6.4818e-05, "loss": 0.0167, "step": 32410 }, { "epoch": 578.9285714285714, "grad_norm": 0.2302786409854889, "learning_rate": 6.4838e-05, "loss": 0.0225, "step": 32420 }, { "epoch": 579.1071428571429, "grad_norm": 0.23553496599197388, "learning_rate": 6.485800000000001e-05, "loss": 0.018, "step": 32430 }, { "epoch": 579.2857142857143, "grad_norm": 0.25681042671203613, "learning_rate": 6.487800000000001e-05, "loss": 0.0178, "step": 32440 }, { "epoch": 579.4642857142857, "grad_norm": 0.24104423820972443, "learning_rate": 6.4898e-05, "loss": 0.0161, "step": 32450 }, { "epoch": 579.6428571428571, "grad_norm": 0.3267906606197357, "learning_rate": 6.4918e-05, "loss": 0.0158, "step": 32460 }, { "epoch": 579.8214285714286, "grad_norm": 0.23799748718738556, "learning_rate": 6.4938e-05, "loss": 0.016, "step": 32470 }, { "epoch": 580.0, "grad_norm": 0.2612999975681305, "learning_rate": 6.495800000000001e-05, "loss": 0.0142, "step": 32480 }, { "epoch": 580.1785714285714, "grad_norm": 0.2618653476238251, "learning_rate": 6.4978e-05, "loss": 0.015, "step": 32490 }, { "epoch": 580.3571428571429, "grad_norm": 0.2634671628475189, "learning_rate": 6.4998e-05, "loss": 0.0167, "step": 32500 }, { "epoch": 580.5357142857143, "grad_norm": 0.2390412986278534, "learning_rate": 6.5018e-05, "loss": 0.0146, "step": 32510 }, { "epoch": 580.7142857142857, "grad_norm": 0.24774129688739777, "learning_rate": 6.5038e-05, "loss": 0.0162, "step": 32520 }, { "epoch": 580.8928571428571, "grad_norm": 0.18433013558387756, "learning_rate": 6.505800000000001e-05, "loss": 0.0151, "step": 32530 }, { "epoch": 581.0714285714286, "grad_norm": 0.21200227737426758, "learning_rate": 6.5078e-05, "loss": 0.0141, "step": 32540 }, { "epoch": 581.25, "grad_norm": 0.2512839734554291, "learning_rate": 6.5098e-05, "loss": 0.0154, "step": 32550 }, { "epoch": 581.4285714285714, "grad_norm": 0.21169604361057281, "learning_rate": 6.5118e-05, "loss": 0.0155, "step": 32560 }, { "epoch": 581.6071428571429, "grad_norm": 0.23867985606193542, "learning_rate": 6.5138e-05, "loss": 0.0175, "step": 32570 }, { "epoch": 581.7857142857143, "grad_norm": 0.21671076118946075, "learning_rate": 6.5158e-05, "loss": 0.0149, "step": 32580 }, { "epoch": 581.9642857142857, "grad_norm": 0.38097333908081055, "learning_rate": 6.5178e-05, "loss": 0.0132, "step": 32590 }, { "epoch": 582.1428571428571, "grad_norm": 0.2582288086414337, "learning_rate": 6.519800000000001e-05, "loss": 0.0135, "step": 32600 }, { "epoch": 582.3214285714286, "grad_norm": 0.21684856712818146, "learning_rate": 6.5218e-05, "loss": 0.0166, "step": 32610 }, { "epoch": 582.5, "grad_norm": 0.2970295548439026, "learning_rate": 6.5238e-05, "loss": 0.0149, "step": 32620 }, { "epoch": 582.6785714285714, "grad_norm": 0.30883872509002686, "learning_rate": 6.5258e-05, "loss": 0.0157, "step": 32630 }, { "epoch": 582.8571428571429, "grad_norm": 0.2590865194797516, "learning_rate": 6.527800000000001e-05, "loss": 0.0151, "step": 32640 }, { "epoch": 583.0357142857143, "grad_norm": 0.18850407004356384, "learning_rate": 6.5298e-05, "loss": 0.016, "step": 32650 }, { "epoch": 583.2142857142857, "grad_norm": 0.2485005110502243, "learning_rate": 6.5318e-05, "loss": 0.0123, "step": 32660 }, { "epoch": 583.3928571428571, "grad_norm": 0.20352470874786377, "learning_rate": 6.5338e-05, "loss": 0.0142, "step": 32670 }, { "epoch": 583.5714285714286, "grad_norm": 0.34710270166397095, "learning_rate": 6.535800000000001e-05, "loss": 0.0137, "step": 32680 }, { "epoch": 583.75, "grad_norm": 0.2102855145931244, "learning_rate": 6.537800000000001e-05, "loss": 0.0135, "step": 32690 }, { "epoch": 583.9285714285714, "grad_norm": 0.22627215087413788, "learning_rate": 6.5398e-05, "loss": 0.0143, "step": 32700 }, { "epoch": 584.1071428571429, "grad_norm": 0.281726598739624, "learning_rate": 6.5418e-05, "loss": 0.014, "step": 32710 }, { "epoch": 584.2857142857143, "grad_norm": 0.25257325172424316, "learning_rate": 6.5438e-05, "loss": 0.0127, "step": 32720 }, { "epoch": 584.4642857142857, "grad_norm": 0.24925050139427185, "learning_rate": 6.545800000000001e-05, "loss": 0.0142, "step": 32730 }, { "epoch": 584.6428571428571, "grad_norm": 0.18744413554668427, "learning_rate": 6.547800000000001e-05, "loss": 0.0127, "step": 32740 }, { "epoch": 584.8214285714286, "grad_norm": 0.18833599984645844, "learning_rate": 6.5498e-05, "loss": 0.0135, "step": 32750 }, { "epoch": 585.0, "grad_norm": 0.18337799608707428, "learning_rate": 6.5518e-05, "loss": 0.0139, "step": 32760 }, { "epoch": 585.1785714285714, "grad_norm": 0.22392909228801727, "learning_rate": 6.5538e-05, "loss": 0.0137, "step": 32770 }, { "epoch": 585.3571428571429, "grad_norm": 0.2693372964859009, "learning_rate": 6.555800000000001e-05, "loss": 0.0142, "step": 32780 }, { "epoch": 585.5357142857143, "grad_norm": 0.25966888666152954, "learning_rate": 6.5578e-05, "loss": 0.0156, "step": 32790 }, { "epoch": 585.7142857142857, "grad_norm": 0.2690941095352173, "learning_rate": 6.5598e-05, "loss": 0.0119, "step": 32800 }, { "epoch": 585.8928571428571, "grad_norm": 0.22807225584983826, "learning_rate": 6.561800000000001e-05, "loss": 0.0134, "step": 32810 }, { "epoch": 586.0714285714286, "grad_norm": 0.23539184033870697, "learning_rate": 6.5638e-05, "loss": 0.0125, "step": 32820 }, { "epoch": 586.25, "grad_norm": 0.28872737288475037, "learning_rate": 6.565800000000001e-05, "loss": 0.0113, "step": 32830 }, { "epoch": 586.4285714285714, "grad_norm": 0.14526396989822388, "learning_rate": 6.5678e-05, "loss": 0.0137, "step": 32840 }, { "epoch": 586.6071428571429, "grad_norm": 0.2265174388885498, "learning_rate": 6.569800000000001e-05, "loss": 0.0145, "step": 32850 }, { "epoch": 586.7857142857143, "grad_norm": 0.1606399267911911, "learning_rate": 6.5718e-05, "loss": 0.0117, "step": 32860 }, { "epoch": 586.9642857142857, "grad_norm": 0.20325569808483124, "learning_rate": 6.5738e-05, "loss": 0.0139, "step": 32870 }, { "epoch": 587.1428571428571, "grad_norm": 0.1409047693014145, "learning_rate": 6.5758e-05, "loss": 0.0121, "step": 32880 }, { "epoch": 587.3214285714286, "grad_norm": 0.1848263293504715, "learning_rate": 6.5778e-05, "loss": 0.0129, "step": 32890 }, { "epoch": 587.5, "grad_norm": 0.23661817610263824, "learning_rate": 6.579800000000001e-05, "loss": 0.0139, "step": 32900 }, { "epoch": 587.6785714285714, "grad_norm": 0.16990642249584198, "learning_rate": 6.5818e-05, "loss": 0.0136, "step": 32910 }, { "epoch": 587.8571428571429, "grad_norm": 0.2503138780593872, "learning_rate": 6.5838e-05, "loss": 0.0116, "step": 32920 }, { "epoch": 588.0357142857143, "grad_norm": 0.19892694056034088, "learning_rate": 6.5858e-05, "loss": 0.0132, "step": 32930 }, { "epoch": 588.2142857142857, "grad_norm": 0.20373745262622833, "learning_rate": 6.587800000000001e-05, "loss": 0.0113, "step": 32940 }, { "epoch": 588.3928571428571, "grad_norm": 0.1855716109275818, "learning_rate": 6.5898e-05, "loss": 0.0124, "step": 32950 }, { "epoch": 588.5714285714286, "grad_norm": 0.16489370167255402, "learning_rate": 6.5918e-05, "loss": 0.014, "step": 32960 }, { "epoch": 588.75, "grad_norm": 0.23086391389369965, "learning_rate": 6.5938e-05, "loss": 0.0132, "step": 32970 }, { "epoch": 588.9285714285714, "grad_norm": 0.21350811421871185, "learning_rate": 6.595800000000001e-05, "loss": 0.011, "step": 32980 }, { "epoch": 589.1071428571429, "grad_norm": 0.2027156949043274, "learning_rate": 6.597800000000001e-05, "loss": 0.0133, "step": 32990 }, { "epoch": 589.2857142857143, "grad_norm": 0.21233128011226654, "learning_rate": 6.5998e-05, "loss": 0.0129, "step": 33000 }, { "epoch": 589.4642857142857, "grad_norm": 0.19447460770606995, "learning_rate": 6.6018e-05, "loss": 0.0128, "step": 33010 }, { "epoch": 589.6428571428571, "grad_norm": 0.31100475788116455, "learning_rate": 6.6038e-05, "loss": 0.0115, "step": 33020 }, { "epoch": 589.8214285714286, "grad_norm": 0.2892075181007385, "learning_rate": 6.6058e-05, "loss": 0.0139, "step": 33030 }, { "epoch": 590.0, "grad_norm": 0.23933656513690948, "learning_rate": 6.6078e-05, "loss": 0.0125, "step": 33040 }, { "epoch": 590.1785714285714, "grad_norm": 0.2161177545785904, "learning_rate": 6.6098e-05, "loss": 0.0124, "step": 33050 }, { "epoch": 590.3571428571429, "grad_norm": 0.266937792301178, "learning_rate": 6.6118e-05, "loss": 0.0119, "step": 33060 }, { "epoch": 590.5357142857143, "grad_norm": 0.23895949125289917, "learning_rate": 6.6138e-05, "loss": 0.0117, "step": 33070 }, { "epoch": 590.7142857142857, "grad_norm": 0.17119815945625305, "learning_rate": 6.6158e-05, "loss": 0.0132, "step": 33080 }, { "epoch": 590.8928571428571, "grad_norm": 0.21070575714111328, "learning_rate": 6.6178e-05, "loss": 0.0127, "step": 33090 }, { "epoch": 591.0714285714286, "grad_norm": 0.23489516973495483, "learning_rate": 6.6198e-05, "loss": 0.0128, "step": 33100 }, { "epoch": 591.25, "grad_norm": 0.16938075423240662, "learning_rate": 6.621800000000001e-05, "loss": 0.0121, "step": 33110 }, { "epoch": 591.4285714285714, "grad_norm": 0.18940480053424835, "learning_rate": 6.6238e-05, "loss": 0.0115, "step": 33120 }, { "epoch": 591.6071428571429, "grad_norm": 0.2626052498817444, "learning_rate": 6.6258e-05, "loss": 0.0136, "step": 33130 }, { "epoch": 591.7857142857143, "grad_norm": 0.2483232170343399, "learning_rate": 6.6278e-05, "loss": 0.0114, "step": 33140 }, { "epoch": 591.9642857142857, "grad_norm": 0.23437601327896118, "learning_rate": 6.629800000000001e-05, "loss": 0.014, "step": 33150 }, { "epoch": 592.1428571428571, "grad_norm": 0.19743669033050537, "learning_rate": 6.6318e-05, "loss": 0.0127, "step": 33160 }, { "epoch": 592.3214285714286, "grad_norm": 0.3026680648326874, "learning_rate": 6.6338e-05, "loss": 0.0139, "step": 33170 }, { "epoch": 592.5, "grad_norm": 0.25887244939804077, "learning_rate": 6.6358e-05, "loss": 0.013, "step": 33180 }, { "epoch": 592.6785714285714, "grad_norm": 0.23532646894454956, "learning_rate": 6.637800000000001e-05, "loss": 0.0134, "step": 33190 }, { "epoch": 592.8571428571429, "grad_norm": 0.18952050805091858, "learning_rate": 6.639800000000001e-05, "loss": 0.0132, "step": 33200 }, { "epoch": 593.0357142857143, "grad_norm": 0.35023438930511475, "learning_rate": 6.6418e-05, "loss": 0.015, "step": 33210 }, { "epoch": 593.2142857142857, "grad_norm": 0.19648635387420654, "learning_rate": 6.6438e-05, "loss": 0.0141, "step": 33220 }, { "epoch": 593.3928571428571, "grad_norm": 0.264178991317749, "learning_rate": 6.645799999999999e-05, "loss": 0.014, "step": 33230 }, { "epoch": 593.5714285714286, "grad_norm": 0.25932008028030396, "learning_rate": 6.647800000000001e-05, "loss": 0.015, "step": 33240 }, { "epoch": 593.75, "grad_norm": 0.2687787711620331, "learning_rate": 6.6498e-05, "loss": 0.0148, "step": 33250 }, { "epoch": 593.9285714285714, "grad_norm": 0.27314963936805725, "learning_rate": 6.6518e-05, "loss": 0.0145, "step": 33260 }, { "epoch": 594.1071428571429, "grad_norm": 0.26256272196769714, "learning_rate": 6.6538e-05, "loss": 0.0142, "step": 33270 }, { "epoch": 594.2857142857143, "grad_norm": 0.31458014249801636, "learning_rate": 6.6558e-05, "loss": 0.0117, "step": 33280 }, { "epoch": 594.4642857142857, "grad_norm": 0.2561814486980438, "learning_rate": 6.657800000000001e-05, "loss": 0.0142, "step": 33290 }, { "epoch": 594.6428571428571, "grad_norm": 0.20299720764160156, "learning_rate": 6.6598e-05, "loss": 0.0127, "step": 33300 }, { "epoch": 594.8214285714286, "grad_norm": 0.2574617266654968, "learning_rate": 6.6618e-05, "loss": 0.0142, "step": 33310 }, { "epoch": 595.0, "grad_norm": 0.2695096731185913, "learning_rate": 6.6638e-05, "loss": 0.0135, "step": 33320 }, { "epoch": 595.1785714285714, "grad_norm": 0.2070390284061432, "learning_rate": 6.6658e-05, "loss": 0.013, "step": 33330 }, { "epoch": 595.3571428571429, "grad_norm": 0.20449298620224, "learning_rate": 6.6678e-05, "loss": 0.0126, "step": 33340 }, { "epoch": 595.5357142857143, "grad_norm": 0.21951235830783844, "learning_rate": 6.6698e-05, "loss": 0.0154, "step": 33350 }, { "epoch": 595.7142857142857, "grad_norm": 0.26164552569389343, "learning_rate": 6.671800000000001e-05, "loss": 0.0126, "step": 33360 }, { "epoch": 595.8928571428571, "grad_norm": 0.3319559097290039, "learning_rate": 6.6738e-05, "loss": 0.0126, "step": 33370 }, { "epoch": 596.0714285714286, "grad_norm": 0.2830018997192383, "learning_rate": 6.6758e-05, "loss": 0.0138, "step": 33380 }, { "epoch": 596.25, "grad_norm": 0.2396959513425827, "learning_rate": 6.6778e-05, "loss": 0.0128, "step": 33390 }, { "epoch": 596.4285714285714, "grad_norm": 0.33530840277671814, "learning_rate": 6.6798e-05, "loss": 0.0127, "step": 33400 }, { "epoch": 596.6071428571429, "grad_norm": 0.2804027497768402, "learning_rate": 6.6818e-05, "loss": 0.0152, "step": 33410 }, { "epoch": 596.7857142857143, "grad_norm": 0.27027231454849243, "learning_rate": 6.6838e-05, "loss": 0.0145, "step": 33420 }, { "epoch": 596.9642857142857, "grad_norm": 0.2692406475543976, "learning_rate": 6.6858e-05, "loss": 0.0139, "step": 33430 }, { "epoch": 597.1428571428571, "grad_norm": 0.248239666223526, "learning_rate": 6.6878e-05, "loss": 0.0161, "step": 33440 }, { "epoch": 597.3214285714286, "grad_norm": 0.19885358214378357, "learning_rate": 6.689800000000001e-05, "loss": 0.0143, "step": 33450 }, { "epoch": 597.5, "grad_norm": 0.2614540159702301, "learning_rate": 6.6918e-05, "loss": 0.0152, "step": 33460 }, { "epoch": 597.6785714285714, "grad_norm": 0.23318836092948914, "learning_rate": 6.6938e-05, "loss": 0.0139, "step": 33470 }, { "epoch": 597.8571428571429, "grad_norm": 0.19380785524845123, "learning_rate": 6.6958e-05, "loss": 0.0147, "step": 33480 }, { "epoch": 598.0357142857143, "grad_norm": 0.20495004951953888, "learning_rate": 6.697800000000001e-05, "loss": 0.014, "step": 33490 }, { "epoch": 598.2142857142857, "grad_norm": 0.15789780020713806, "learning_rate": 6.699800000000001e-05, "loss": 0.0127, "step": 33500 }, { "epoch": 598.3928571428571, "grad_norm": 0.24772250652313232, "learning_rate": 6.7018e-05, "loss": 0.0141, "step": 33510 }, { "epoch": 598.5714285714286, "grad_norm": 0.2594005763530731, "learning_rate": 6.7038e-05, "loss": 0.0129, "step": 33520 }, { "epoch": 598.75, "grad_norm": 0.2677764296531677, "learning_rate": 6.7058e-05, "loss": 0.0146, "step": 33530 }, { "epoch": 598.9285714285714, "grad_norm": 0.2429964691400528, "learning_rate": 6.707800000000001e-05, "loss": 0.013, "step": 33540 }, { "epoch": 599.1071428571429, "grad_norm": 0.38381141424179077, "learning_rate": 6.7098e-05, "loss": 0.0143, "step": 33550 }, { "epoch": 599.2857142857143, "grad_norm": 0.2621883749961853, "learning_rate": 6.7118e-05, "loss": 0.0125, "step": 33560 }, { "epoch": 599.4642857142857, "grad_norm": 0.23225048184394836, "learning_rate": 6.7138e-05, "loss": 0.0116, "step": 33570 }, { "epoch": 599.6428571428571, "grad_norm": 0.1599252074956894, "learning_rate": 6.7158e-05, "loss": 0.0134, "step": 33580 }, { "epoch": 599.8214285714286, "grad_norm": 0.24138851463794708, "learning_rate": 6.717800000000001e-05, "loss": 0.0114, "step": 33590 }, { "epoch": 600.0, "grad_norm": 0.20352159440517426, "learning_rate": 6.7198e-05, "loss": 0.0114, "step": 33600 }, { "epoch": 600.1785714285714, "grad_norm": 0.2562340795993805, "learning_rate": 6.7218e-05, "loss": 0.0113, "step": 33610 }, { "epoch": 600.3571428571429, "grad_norm": 0.21993690729141235, "learning_rate": 6.7238e-05, "loss": 0.0131, "step": 33620 }, { "epoch": 600.5357142857143, "grad_norm": 0.24511313438415527, "learning_rate": 6.7258e-05, "loss": 0.0149, "step": 33630 }, { "epoch": 600.7142857142857, "grad_norm": 0.22254815697669983, "learning_rate": 6.7278e-05, "loss": 0.0131, "step": 33640 }, { "epoch": 600.8928571428571, "grad_norm": 0.2533663511276245, "learning_rate": 6.7298e-05, "loss": 0.0148, "step": 33650 }, { "epoch": 601.0714285714286, "grad_norm": 0.2304009050130844, "learning_rate": 6.731800000000001e-05, "loss": 0.0129, "step": 33660 }, { "epoch": 601.25, "grad_norm": 0.28271427750587463, "learning_rate": 6.7338e-05, "loss": 0.0123, "step": 33670 }, { "epoch": 601.4285714285714, "grad_norm": 0.21143494546413422, "learning_rate": 6.7358e-05, "loss": 0.0126, "step": 33680 }, { "epoch": 601.6071428571429, "grad_norm": 0.2025323510169983, "learning_rate": 6.7378e-05, "loss": 0.0121, "step": 33690 }, { "epoch": 601.7857142857143, "grad_norm": 0.2018558830022812, "learning_rate": 6.739800000000001e-05, "loss": 0.0167, "step": 33700 }, { "epoch": 601.9642857142857, "grad_norm": 0.22614786028862, "learning_rate": 6.7418e-05, "loss": 0.0125, "step": 33710 }, { "epoch": 602.1428571428571, "grad_norm": 0.20346243679523468, "learning_rate": 6.7438e-05, "loss": 0.0128, "step": 33720 }, { "epoch": 602.3214285714286, "grad_norm": 0.16153177618980408, "learning_rate": 6.7458e-05, "loss": 0.012, "step": 33730 }, { "epoch": 602.5, "grad_norm": 0.22267043590545654, "learning_rate": 6.747800000000001e-05, "loss": 0.0144, "step": 33740 }, { "epoch": 602.6785714285714, "grad_norm": 0.20118318498134613, "learning_rate": 6.749800000000001e-05, "loss": 0.0136, "step": 33750 }, { "epoch": 602.8571428571429, "grad_norm": 0.22223062813282013, "learning_rate": 6.7518e-05, "loss": 0.0148, "step": 33760 }, { "epoch": 603.0357142857143, "grad_norm": 0.13085858523845673, "learning_rate": 6.7538e-05, "loss": 0.012, "step": 33770 }, { "epoch": 603.2142857142857, "grad_norm": 0.23880510032176971, "learning_rate": 6.755799999999999e-05, "loss": 0.0153, "step": 33780 }, { "epoch": 603.3928571428571, "grad_norm": 0.24046090245246887, "learning_rate": 6.757800000000001e-05, "loss": 0.0143, "step": 33790 }, { "epoch": 603.5714285714286, "grad_norm": 0.19886226952075958, "learning_rate": 6.759800000000001e-05, "loss": 0.0138, "step": 33800 }, { "epoch": 603.75, "grad_norm": 0.3077443540096283, "learning_rate": 6.7618e-05, "loss": 0.0134, "step": 33810 }, { "epoch": 603.9285714285714, "grad_norm": 0.28910356760025024, "learning_rate": 6.7638e-05, "loss": 0.0133, "step": 33820 }, { "epoch": 604.1071428571429, "grad_norm": 0.21837382018566132, "learning_rate": 6.7658e-05, "loss": 0.015, "step": 33830 }, { "epoch": 604.2857142857143, "grad_norm": 0.35471928119659424, "learning_rate": 6.767800000000001e-05, "loss": 0.0157, "step": 33840 }, { "epoch": 604.4642857142857, "grad_norm": 0.18573322892189026, "learning_rate": 6.7698e-05, "loss": 0.015, "step": 33850 }, { "epoch": 604.6428571428571, "grad_norm": 0.2736486494541168, "learning_rate": 6.7718e-05, "loss": 0.0165, "step": 33860 }, { "epoch": 604.8214285714286, "grad_norm": 0.2750754654407501, "learning_rate": 6.773800000000001e-05, "loss": 0.017, "step": 33870 }, { "epoch": 605.0, "grad_norm": 0.17478111386299133, "learning_rate": 6.7758e-05, "loss": 0.0146, "step": 33880 }, { "epoch": 605.1785714285714, "grad_norm": 0.3243459463119507, "learning_rate": 6.777800000000001e-05, "loss": 0.0166, "step": 33890 }, { "epoch": 605.3571428571429, "grad_norm": 0.30519065260887146, "learning_rate": 6.7798e-05, "loss": 0.0158, "step": 33900 }, { "epoch": 605.5357142857143, "grad_norm": 0.1749008297920227, "learning_rate": 6.781800000000001e-05, "loss": 0.0134, "step": 33910 }, { "epoch": 605.7142857142857, "grad_norm": 0.21072712540626526, "learning_rate": 6.7838e-05, "loss": 0.0118, "step": 33920 }, { "epoch": 605.8928571428571, "grad_norm": 0.16391725838184357, "learning_rate": 6.7858e-05, "loss": 0.012, "step": 33930 }, { "epoch": 606.0714285714286, "grad_norm": 0.2329922914505005, "learning_rate": 6.7878e-05, "loss": 0.0143, "step": 33940 }, { "epoch": 606.25, "grad_norm": 0.20741745829582214, "learning_rate": 6.7898e-05, "loss": 0.0129, "step": 33950 }, { "epoch": 606.4285714285714, "grad_norm": 0.2062619924545288, "learning_rate": 6.791800000000001e-05, "loss": 0.0133, "step": 33960 }, { "epoch": 606.6071428571429, "grad_norm": 0.16471494734287262, "learning_rate": 6.7938e-05, "loss": 0.012, "step": 33970 }, { "epoch": 606.7857142857143, "grad_norm": 0.31114959716796875, "learning_rate": 6.7958e-05, "loss": 0.0132, "step": 33980 }, { "epoch": 606.9642857142857, "grad_norm": 0.3664761781692505, "learning_rate": 6.7978e-05, "loss": 0.0118, "step": 33990 }, { "epoch": 607.1428571428571, "grad_norm": 0.2981029152870178, "learning_rate": 6.799800000000001e-05, "loss": 0.0127, "step": 34000 }, { "epoch": 607.3214285714286, "grad_norm": 0.2586006820201874, "learning_rate": 6.8018e-05, "loss": 0.0132, "step": 34010 }, { "epoch": 607.5, "grad_norm": 0.22958695888519287, "learning_rate": 6.8038e-05, "loss": 0.012, "step": 34020 }, { "epoch": 607.6785714285714, "grad_norm": 0.19521358609199524, "learning_rate": 6.8058e-05, "loss": 0.0144, "step": 34030 }, { "epoch": 607.8571428571429, "grad_norm": 0.23584172129631042, "learning_rate": 6.807800000000001e-05, "loss": 0.0127, "step": 34040 }, { "epoch": 608.0357142857143, "grad_norm": 0.2839738130569458, "learning_rate": 6.809800000000001e-05, "loss": 0.0155, "step": 34050 }, { "epoch": 608.2142857142857, "grad_norm": 0.31649404764175415, "learning_rate": 6.8118e-05, "loss": 0.0132, "step": 34060 }, { "epoch": 608.3928571428571, "grad_norm": 0.17829568684101105, "learning_rate": 6.8138e-05, "loss": 0.0114, "step": 34070 }, { "epoch": 608.5714285714286, "grad_norm": 0.17327775061130524, "learning_rate": 6.8158e-05, "loss": 0.0126, "step": 34080 }, { "epoch": 608.75, "grad_norm": 0.17699852585792542, "learning_rate": 6.817800000000001e-05, "loss": 0.0146, "step": 34090 }, { "epoch": 608.9285714285714, "grad_norm": 0.17251180112361908, "learning_rate": 6.8198e-05, "loss": 0.0122, "step": 34100 }, { "epoch": 609.1071428571429, "grad_norm": 0.19925224781036377, "learning_rate": 6.8218e-05, "loss": 0.0125, "step": 34110 }, { "epoch": 609.2857142857143, "grad_norm": 0.13357572257518768, "learning_rate": 6.8238e-05, "loss": 0.0116, "step": 34120 }, { "epoch": 609.4642857142857, "grad_norm": 0.183492511510849, "learning_rate": 6.8258e-05, "loss": 0.0117, "step": 34130 }, { "epoch": 609.6428571428571, "grad_norm": 0.2445039302110672, "learning_rate": 6.827800000000001e-05, "loss": 0.0146, "step": 34140 }, { "epoch": 609.8214285714286, "grad_norm": 0.24988611042499542, "learning_rate": 6.8298e-05, "loss": 0.0112, "step": 34150 }, { "epoch": 610.0, "grad_norm": 0.17439305782318115, "learning_rate": 6.8318e-05, "loss": 0.0124, "step": 34160 }, { "epoch": 610.1785714285714, "grad_norm": 0.21176370978355408, "learning_rate": 6.833800000000001e-05, "loss": 0.0117, "step": 34170 }, { "epoch": 610.3571428571429, "grad_norm": 0.24642756581306458, "learning_rate": 6.8358e-05, "loss": 0.0124, "step": 34180 }, { "epoch": 610.5357142857143, "grad_norm": 0.19078418612480164, "learning_rate": 6.837800000000002e-05, "loss": 0.0114, "step": 34190 }, { "epoch": 610.7142857142857, "grad_norm": 0.1562032401561737, "learning_rate": 6.8398e-05, "loss": 0.0146, "step": 34200 }, { "epoch": 610.8928571428571, "grad_norm": 0.2226502150297165, "learning_rate": 6.841800000000001e-05, "loss": 0.0126, "step": 34210 }, { "epoch": 611.0714285714286, "grad_norm": 0.2266032099723816, "learning_rate": 6.8438e-05, "loss": 0.0111, "step": 34220 }, { "epoch": 611.25, "grad_norm": 0.2775794565677643, "learning_rate": 6.8458e-05, "loss": 0.0128, "step": 34230 }, { "epoch": 611.4285714285714, "grad_norm": 0.20697028934955597, "learning_rate": 6.8478e-05, "loss": 0.0108, "step": 34240 }, { "epoch": 611.6071428571429, "grad_norm": 0.24752941727638245, "learning_rate": 6.849800000000001e-05, "loss": 0.0115, "step": 34250 }, { "epoch": 611.7857142857143, "grad_norm": 0.23442591726779938, "learning_rate": 6.851800000000001e-05, "loss": 0.0129, "step": 34260 }, { "epoch": 611.9642857142857, "grad_norm": 0.29594922065734863, "learning_rate": 6.8538e-05, "loss": 0.0133, "step": 34270 }, { "epoch": 612.1428571428571, "grad_norm": 0.36140990257263184, "learning_rate": 6.8558e-05, "loss": 0.0146, "step": 34280 }, { "epoch": 612.3214285714286, "grad_norm": 0.24410800635814667, "learning_rate": 6.857799999999999e-05, "loss": 0.0118, "step": 34290 }, { "epoch": 612.5, "grad_norm": 0.19479626417160034, "learning_rate": 6.859800000000001e-05, "loss": 0.0133, "step": 34300 }, { "epoch": 612.6785714285714, "grad_norm": 0.28765273094177246, "learning_rate": 6.8618e-05, "loss": 0.0152, "step": 34310 }, { "epoch": 612.8571428571429, "grad_norm": 0.19947116076946259, "learning_rate": 6.8638e-05, "loss": 0.0127, "step": 34320 }, { "epoch": 613.0357142857143, "grad_norm": 0.2201048582792282, "learning_rate": 6.8658e-05, "loss": 0.0149, "step": 34330 }, { "epoch": 613.2142857142857, "grad_norm": 0.22748170793056488, "learning_rate": 6.8678e-05, "loss": 0.0148, "step": 34340 }, { "epoch": 613.3928571428571, "grad_norm": 0.23009935021400452, "learning_rate": 6.869800000000001e-05, "loss": 0.0159, "step": 34350 }, { "epoch": 613.5714285714286, "grad_norm": 0.2176533192396164, "learning_rate": 6.8718e-05, "loss": 0.0127, "step": 34360 }, { "epoch": 613.75, "grad_norm": 0.2746637463569641, "learning_rate": 6.8738e-05, "loss": 0.0113, "step": 34370 }, { "epoch": 613.9285714285714, "grad_norm": 0.2188718616962433, "learning_rate": 6.8758e-05, "loss": 0.0126, "step": 34380 }, { "epoch": 614.1071428571429, "grad_norm": 0.2917541563510895, "learning_rate": 6.8778e-05, "loss": 0.0125, "step": 34390 }, { "epoch": 614.2857142857143, "grad_norm": 0.19848018884658813, "learning_rate": 6.8798e-05, "loss": 0.013, "step": 34400 }, { "epoch": 614.4642857142857, "grad_norm": 0.30409055948257446, "learning_rate": 6.8818e-05, "loss": 0.0151, "step": 34410 }, { "epoch": 614.6428571428571, "grad_norm": 0.2011013776063919, "learning_rate": 6.883800000000001e-05, "loss": 0.0113, "step": 34420 }, { "epoch": 614.8214285714286, "grad_norm": 0.13936841487884521, "learning_rate": 6.8858e-05, "loss": 0.0111, "step": 34430 }, { "epoch": 615.0, "grad_norm": 0.2785061001777649, "learning_rate": 6.8878e-05, "loss": 0.0127, "step": 34440 }, { "epoch": 615.1785714285714, "grad_norm": 0.18450671434402466, "learning_rate": 6.8898e-05, "loss": 0.0114, "step": 34450 }, { "epoch": 615.3571428571429, "grad_norm": 0.17758265137672424, "learning_rate": 6.8918e-05, "loss": 0.0109, "step": 34460 }, { "epoch": 615.5357142857143, "grad_norm": 0.2341727763414383, "learning_rate": 6.8938e-05, "loss": 0.0116, "step": 34470 }, { "epoch": 615.7142857142857, "grad_norm": 0.33931592106819153, "learning_rate": 6.8958e-05, "loss": 0.0125, "step": 34480 }, { "epoch": 615.8928571428571, "grad_norm": 0.29875504970550537, "learning_rate": 6.8978e-05, "loss": 0.0118, "step": 34490 }, { "epoch": 616.0714285714286, "grad_norm": 0.229914128780365, "learning_rate": 6.8998e-05, "loss": 0.0143, "step": 34500 }, { "epoch": 616.25, "grad_norm": 0.20901063084602356, "learning_rate": 6.901800000000001e-05, "loss": 0.0127, "step": 34510 }, { "epoch": 616.4285714285714, "grad_norm": 0.2284872680902481, "learning_rate": 6.9038e-05, "loss": 0.011, "step": 34520 }, { "epoch": 616.6071428571429, "grad_norm": 0.16617362201213837, "learning_rate": 6.9058e-05, "loss": 0.0133, "step": 34530 }, { "epoch": 616.7857142857143, "grad_norm": 0.24923290312290192, "learning_rate": 6.907799999999999e-05, "loss": 0.0138, "step": 34540 }, { "epoch": 616.9642857142857, "grad_norm": 0.27955514192581177, "learning_rate": 6.909800000000001e-05, "loss": 0.0139, "step": 34550 }, { "epoch": 617.1428571428571, "grad_norm": 0.25648316740989685, "learning_rate": 6.911800000000001e-05, "loss": 0.0126, "step": 34560 }, { "epoch": 617.3214285714286, "grad_norm": 0.2293093502521515, "learning_rate": 6.9138e-05, "loss": 0.011, "step": 34570 }, { "epoch": 617.5, "grad_norm": 0.28340646624565125, "learning_rate": 6.9158e-05, "loss": 0.012, "step": 34580 }, { "epoch": 617.6785714285714, "grad_norm": 0.2757665514945984, "learning_rate": 6.9178e-05, "loss": 0.0154, "step": 34590 }, { "epoch": 617.8571428571429, "grad_norm": 0.23254184424877167, "learning_rate": 6.919800000000001e-05, "loss": 0.0129, "step": 34600 }, { "epoch": 618.0357142857143, "grad_norm": 0.21720951795578003, "learning_rate": 6.9218e-05, "loss": 0.0115, "step": 34610 }, { "epoch": 618.2142857142857, "grad_norm": 0.19609160721302032, "learning_rate": 6.9238e-05, "loss": 0.0113, "step": 34620 }, { "epoch": 618.3928571428571, "grad_norm": 0.18751917779445648, "learning_rate": 6.9258e-05, "loss": 0.0108, "step": 34630 }, { "epoch": 618.5714285714286, "grad_norm": 0.2131335735321045, "learning_rate": 6.9278e-05, "loss": 0.0113, "step": 34640 }, { "epoch": 618.75, "grad_norm": 0.2306707501411438, "learning_rate": 6.929800000000001e-05, "loss": 0.0131, "step": 34650 }, { "epoch": 618.9285714285714, "grad_norm": 0.1841747909784317, "learning_rate": 6.9318e-05, "loss": 0.0113, "step": 34660 }, { "epoch": 619.1071428571429, "grad_norm": 0.16676002740859985, "learning_rate": 6.9338e-05, "loss": 0.0109, "step": 34670 }, { "epoch": 619.2857142857143, "grad_norm": 0.21075759828090668, "learning_rate": 6.9358e-05, "loss": 0.0113, "step": 34680 }, { "epoch": 619.4642857142857, "grad_norm": 0.1590062528848648, "learning_rate": 6.9378e-05, "loss": 0.0104, "step": 34690 }, { "epoch": 619.6428571428571, "grad_norm": 0.27841681241989136, "learning_rate": 6.9398e-05, "loss": 0.0118, "step": 34700 }, { "epoch": 619.8214285714286, "grad_norm": 0.21608693897724152, "learning_rate": 6.9418e-05, "loss": 0.0121, "step": 34710 }, { "epoch": 620.0, "grad_norm": 0.3129778206348419, "learning_rate": 6.943800000000001e-05, "loss": 0.0119, "step": 34720 }, { "epoch": 620.1785714285714, "grad_norm": 0.26365843415260315, "learning_rate": 6.9458e-05, "loss": 0.0125, "step": 34730 }, { "epoch": 620.3571428571429, "grad_norm": 0.17500616610050201, "learning_rate": 6.9478e-05, "loss": 0.0133, "step": 34740 }, { "epoch": 620.5357142857143, "grad_norm": 0.21855266392230988, "learning_rate": 6.9498e-05, "loss": 0.014, "step": 34750 }, { "epoch": 620.7142857142857, "grad_norm": 0.26168251037597656, "learning_rate": 6.951800000000001e-05, "loss": 0.0139, "step": 34760 }, { "epoch": 620.8928571428571, "grad_norm": 0.17954811453819275, "learning_rate": 6.9538e-05, "loss": 0.0116, "step": 34770 }, { "epoch": 621.0714285714286, "grad_norm": 0.14278073608875275, "learning_rate": 6.9558e-05, "loss": 0.0123, "step": 34780 }, { "epoch": 621.25, "grad_norm": 0.10802464187145233, "learning_rate": 6.9578e-05, "loss": 0.013, "step": 34790 }, { "epoch": 621.4285714285714, "grad_norm": 0.2037927210330963, "learning_rate": 6.959800000000001e-05, "loss": 0.0112, "step": 34800 }, { "epoch": 621.6071428571429, "grad_norm": 0.19072984158992767, "learning_rate": 6.961800000000001e-05, "loss": 0.0137, "step": 34810 }, { "epoch": 621.7857142857143, "grad_norm": 0.16711966693401337, "learning_rate": 6.9638e-05, "loss": 0.0117, "step": 34820 }, { "epoch": 621.9642857142857, "grad_norm": 0.19930455088615417, "learning_rate": 6.9658e-05, "loss": 0.012, "step": 34830 }, { "epoch": 622.1428571428571, "grad_norm": 0.1379341036081314, "learning_rate": 6.967799999999999e-05, "loss": 0.0117, "step": 34840 }, { "epoch": 622.3214285714286, "grad_norm": 0.21207371354103088, "learning_rate": 6.969800000000001e-05, "loss": 0.0135, "step": 34850 }, { "epoch": 622.5, "grad_norm": 0.19126583635807037, "learning_rate": 6.9718e-05, "loss": 0.013, "step": 34860 }, { "epoch": 622.6785714285714, "grad_norm": 0.17721018195152283, "learning_rate": 6.9738e-05, "loss": 0.0107, "step": 34870 }, { "epoch": 622.8571428571429, "grad_norm": 0.20206379890441895, "learning_rate": 6.9758e-05, "loss": 0.0128, "step": 34880 }, { "epoch": 623.0357142857143, "grad_norm": 0.17595505714416504, "learning_rate": 6.9778e-05, "loss": 0.0119, "step": 34890 }, { "epoch": 623.2142857142857, "grad_norm": 0.15465469658374786, "learning_rate": 6.979800000000001e-05, "loss": 0.0114, "step": 34900 }, { "epoch": 623.3928571428571, "grad_norm": 0.20041996240615845, "learning_rate": 6.9818e-05, "loss": 0.012, "step": 34910 }, { "epoch": 623.5714285714286, "grad_norm": 0.21492457389831543, "learning_rate": 6.9838e-05, "loss": 0.0115, "step": 34920 }, { "epoch": 623.75, "grad_norm": 0.14631399512290955, "learning_rate": 6.985800000000001e-05, "loss": 0.0109, "step": 34930 }, { "epoch": 623.9285714285714, "grad_norm": 0.20969027280807495, "learning_rate": 6.9878e-05, "loss": 0.0116, "step": 34940 }, { "epoch": 624.1071428571429, "grad_norm": 0.17886649072170258, "learning_rate": 6.989800000000001e-05, "loss": 0.0112, "step": 34950 }, { "epoch": 624.2857142857143, "grad_norm": 0.14830215275287628, "learning_rate": 6.9918e-05, "loss": 0.0098, "step": 34960 }, { "epoch": 624.4642857142857, "grad_norm": 0.22721046209335327, "learning_rate": 6.993800000000001e-05, "loss": 0.0124, "step": 34970 }, { "epoch": 624.6428571428571, "grad_norm": 0.2791197896003723, "learning_rate": 6.9958e-05, "loss": 0.0098, "step": 34980 }, { "epoch": 624.8214285714286, "grad_norm": 0.275938481092453, "learning_rate": 6.9978e-05, "loss": 0.0116, "step": 34990 }, { "epoch": 625.0, "grad_norm": 0.19566549360752106, "learning_rate": 6.9998e-05, "loss": 0.0125, "step": 35000 }, { "epoch": 625.1785714285714, "grad_norm": 0.1904924511909485, "learning_rate": 7.0018e-05, "loss": 0.0107, "step": 35010 }, { "epoch": 625.3571428571429, "grad_norm": 0.35160931944847107, "learning_rate": 7.003800000000001e-05, "loss": 0.0126, "step": 35020 }, { "epoch": 625.5357142857143, "grad_norm": 0.2852135896682739, "learning_rate": 7.0058e-05, "loss": 0.0141, "step": 35030 }, { "epoch": 625.7142857142857, "grad_norm": 0.24805408716201782, "learning_rate": 7.0078e-05, "loss": 0.0108, "step": 35040 }, { "epoch": 625.8928571428571, "grad_norm": 0.3333788812160492, "learning_rate": 7.0098e-05, "loss": 0.0158, "step": 35050 }, { "epoch": 626.0714285714286, "grad_norm": 0.1983613222837448, "learning_rate": 7.011800000000001e-05, "loss": 0.0122, "step": 35060 }, { "epoch": 626.25, "grad_norm": 0.25508540868759155, "learning_rate": 7.0138e-05, "loss": 0.0133, "step": 35070 }, { "epoch": 626.4285714285714, "grad_norm": 0.2545822560787201, "learning_rate": 7.0158e-05, "loss": 0.0122, "step": 35080 }, { "epoch": 626.6071428571429, "grad_norm": 0.15674133598804474, "learning_rate": 7.0178e-05, "loss": 0.0123, "step": 35090 }, { "epoch": 626.7857142857143, "grad_norm": 0.2554071247577667, "learning_rate": 7.019800000000001e-05, "loss": 0.0132, "step": 35100 }, { "epoch": 626.9642857142857, "grad_norm": 0.14632220566272736, "learning_rate": 7.021800000000001e-05, "loss": 0.012, "step": 35110 }, { "epoch": 627.1428571428571, "grad_norm": 0.22783036530017853, "learning_rate": 7.0238e-05, "loss": 0.0125, "step": 35120 }, { "epoch": 627.3214285714286, "grad_norm": 0.1839084029197693, "learning_rate": 7.0258e-05, "loss": 0.0104, "step": 35130 }, { "epoch": 627.5, "grad_norm": 0.23784837126731873, "learning_rate": 7.0278e-05, "loss": 0.0106, "step": 35140 }, { "epoch": 627.6785714285714, "grad_norm": 0.20474335551261902, "learning_rate": 7.029800000000001e-05, "loss": 0.0106, "step": 35150 }, { "epoch": 627.8571428571429, "grad_norm": 0.18745851516723633, "learning_rate": 7.0318e-05, "loss": 0.0121, "step": 35160 }, { "epoch": 628.0357142857143, "grad_norm": 0.17222553491592407, "learning_rate": 7.0338e-05, "loss": 0.0093, "step": 35170 }, { "epoch": 628.2142857142857, "grad_norm": 0.226962149143219, "learning_rate": 7.0358e-05, "loss": 0.0103, "step": 35180 }, { "epoch": 628.3928571428571, "grad_norm": 0.23839795589447021, "learning_rate": 7.0378e-05, "loss": 0.01, "step": 35190 }, { "epoch": 628.5714285714286, "grad_norm": 0.23220841586589813, "learning_rate": 7.039800000000001e-05, "loss": 0.0109, "step": 35200 }, { "epoch": 628.75, "grad_norm": 0.3055848479270935, "learning_rate": 7.0418e-05, "loss": 0.0138, "step": 35210 }, { "epoch": 628.9285714285714, "grad_norm": 0.14866341650485992, "learning_rate": 7.0438e-05, "loss": 0.0123, "step": 35220 }, { "epoch": 629.1071428571429, "grad_norm": 0.1731225848197937, "learning_rate": 7.0458e-05, "loss": 0.0138, "step": 35230 }, { "epoch": 629.2857142857143, "grad_norm": 0.3113473355770111, "learning_rate": 7.0478e-05, "loss": 0.0144, "step": 35240 }, { "epoch": 629.4642857142857, "grad_norm": 0.3184945583343506, "learning_rate": 7.049800000000002e-05, "loss": 0.0113, "step": 35250 }, { "epoch": 629.6428571428571, "grad_norm": 0.25418487191200256, "learning_rate": 7.0518e-05, "loss": 0.0119, "step": 35260 }, { "epoch": 629.8214285714286, "grad_norm": 0.1768251657485962, "learning_rate": 7.053800000000001e-05, "loss": 0.011, "step": 35270 }, { "epoch": 630.0, "grad_norm": 0.27400800585746765, "learning_rate": 7.0558e-05, "loss": 0.0138, "step": 35280 }, { "epoch": 630.1785714285714, "grad_norm": 0.257712185382843, "learning_rate": 7.0578e-05, "loss": 0.0118, "step": 35290 }, { "epoch": 630.3571428571429, "grad_norm": 0.19005373120307922, "learning_rate": 7.0598e-05, "loss": 0.0125, "step": 35300 }, { "epoch": 630.5357142857143, "grad_norm": 0.2187575250864029, "learning_rate": 7.061800000000001e-05, "loss": 0.0103, "step": 35310 }, { "epoch": 630.7142857142857, "grad_norm": 0.2916545569896698, "learning_rate": 7.063800000000001e-05, "loss": 0.0114, "step": 35320 }, { "epoch": 630.8928571428571, "grad_norm": 0.21091759204864502, "learning_rate": 7.0658e-05, "loss": 0.0126, "step": 35330 }, { "epoch": 631.0714285714286, "grad_norm": 0.2358870804309845, "learning_rate": 7.0678e-05, "loss": 0.0111, "step": 35340 }, { "epoch": 631.25, "grad_norm": 0.1715797334909439, "learning_rate": 7.0698e-05, "loss": 0.0109, "step": 35350 }, { "epoch": 631.4285714285714, "grad_norm": 0.21355409920215607, "learning_rate": 7.071800000000001e-05, "loss": 0.0119, "step": 35360 }, { "epoch": 631.6071428571429, "grad_norm": 0.18554998934268951, "learning_rate": 7.0738e-05, "loss": 0.0125, "step": 35370 }, { "epoch": 631.7857142857143, "grad_norm": 0.25238949060440063, "learning_rate": 7.0758e-05, "loss": 0.0123, "step": 35380 }, { "epoch": 631.9642857142857, "grad_norm": 0.2531481385231018, "learning_rate": 7.0778e-05, "loss": 0.0097, "step": 35390 }, { "epoch": 632.1428571428571, "grad_norm": 0.2350180298089981, "learning_rate": 7.079800000000001e-05, "loss": 0.0124, "step": 35400 }, { "epoch": 632.3214285714286, "grad_norm": 0.22645239531993866, "learning_rate": 7.081800000000001e-05, "loss": 0.012, "step": 35410 }, { "epoch": 632.5, "grad_norm": 0.18030314147472382, "learning_rate": 7.0838e-05, "loss": 0.0124, "step": 35420 }, { "epoch": 632.6785714285714, "grad_norm": 0.18234004080295563, "learning_rate": 7.0858e-05, "loss": 0.0132, "step": 35430 }, { "epoch": 632.8571428571429, "grad_norm": 0.2581194043159485, "learning_rate": 7.0878e-05, "loss": 0.0143, "step": 35440 }, { "epoch": 633.0357142857143, "grad_norm": 0.2877500653266907, "learning_rate": 7.089800000000001e-05, "loss": 0.013, "step": 35450 }, { "epoch": 633.2142857142857, "grad_norm": 0.1600055694580078, "learning_rate": 7.0918e-05, "loss": 0.0127, "step": 35460 }, { "epoch": 633.3928571428571, "grad_norm": 0.17430758476257324, "learning_rate": 7.0938e-05, "loss": 0.0141, "step": 35470 }, { "epoch": 633.5714285714286, "grad_norm": 0.26863232254981995, "learning_rate": 7.095800000000001e-05, "loss": 0.0116, "step": 35480 }, { "epoch": 633.75, "grad_norm": 0.2562435567378998, "learning_rate": 7.0978e-05, "loss": 0.0146, "step": 35490 }, { "epoch": 633.9285714285714, "grad_norm": 0.30834150314331055, "learning_rate": 7.099800000000001e-05, "loss": 0.0125, "step": 35500 }, { "epoch": 634.1071428571429, "grad_norm": 0.22947654128074646, "learning_rate": 7.1018e-05, "loss": 0.0122, "step": 35510 }, { "epoch": 634.2857142857143, "grad_norm": 0.2962089776992798, "learning_rate": 7.1038e-05, "loss": 0.0148, "step": 35520 }, { "epoch": 634.4642857142857, "grad_norm": 0.24798107147216797, "learning_rate": 7.1058e-05, "loss": 0.0141, "step": 35530 }, { "epoch": 634.6428571428571, "grad_norm": 0.3732598125934601, "learning_rate": 7.1078e-05, "loss": 0.0148, "step": 35540 }, { "epoch": 634.8214285714286, "grad_norm": 0.2347463220357895, "learning_rate": 7.1098e-05, "loss": 0.0134, "step": 35550 }, { "epoch": 635.0, "grad_norm": 0.20543870329856873, "learning_rate": 7.1118e-05, "loss": 0.014, "step": 35560 }, { "epoch": 635.1785714285714, "grad_norm": 0.3416565954685211, "learning_rate": 7.113800000000001e-05, "loss": 0.0127, "step": 35570 }, { "epoch": 635.3571428571429, "grad_norm": 0.19205991923809052, "learning_rate": 7.1158e-05, "loss": 0.0145, "step": 35580 }, { "epoch": 635.5357142857143, "grad_norm": 0.16635844111442566, "learning_rate": 7.1178e-05, "loss": 0.0124, "step": 35590 }, { "epoch": 635.7142857142857, "grad_norm": 0.2010444849729538, "learning_rate": 7.119799999999999e-05, "loss": 0.0135, "step": 35600 }, { "epoch": 635.8928571428571, "grad_norm": 0.17847272753715515, "learning_rate": 7.121800000000001e-05, "loss": 0.0142, "step": 35610 }, { "epoch": 636.0714285714286, "grad_norm": 0.18223658204078674, "learning_rate": 7.123800000000001e-05, "loss": 0.0125, "step": 35620 }, { "epoch": 636.25, "grad_norm": 0.1728585958480835, "learning_rate": 7.1258e-05, "loss": 0.0112, "step": 35630 }, { "epoch": 636.4285714285714, "grad_norm": 0.2069527953863144, "learning_rate": 7.1278e-05, "loss": 0.0117, "step": 35640 }, { "epoch": 636.6071428571429, "grad_norm": 0.24408350884914398, "learning_rate": 7.1298e-05, "loss": 0.0131, "step": 35650 }, { "epoch": 636.7857142857143, "grad_norm": 0.19737385213375092, "learning_rate": 7.131800000000001e-05, "loss": 0.0126, "step": 35660 }, { "epoch": 636.9642857142857, "grad_norm": 0.24563272297382355, "learning_rate": 7.1338e-05, "loss": 0.012, "step": 35670 }, { "epoch": 637.1428571428571, "grad_norm": 0.20680861175060272, "learning_rate": 7.1358e-05, "loss": 0.0118, "step": 35680 }, { "epoch": 637.3214285714286, "grad_norm": 0.18067632615566254, "learning_rate": 7.1378e-05, "loss": 0.0128, "step": 35690 }, { "epoch": 637.5, "grad_norm": 0.22440527379512787, "learning_rate": 7.1398e-05, "loss": 0.0105, "step": 35700 }, { "epoch": 637.6785714285714, "grad_norm": 0.20285136997699738, "learning_rate": 7.141800000000001e-05, "loss": 0.0122, "step": 35710 }, { "epoch": 637.8571428571429, "grad_norm": 0.28111732006073, "learning_rate": 7.1438e-05, "loss": 0.0119, "step": 35720 }, { "epoch": 638.0357142857143, "grad_norm": 0.2850992977619171, "learning_rate": 7.1458e-05, "loss": 0.011, "step": 35730 }, { "epoch": 638.2142857142857, "grad_norm": 0.2496289759874344, "learning_rate": 7.1478e-05, "loss": 0.0121, "step": 35740 }, { "epoch": 638.3928571428571, "grad_norm": 0.1802910566329956, "learning_rate": 7.1498e-05, "loss": 0.0111, "step": 35750 }, { "epoch": 638.5714285714286, "grad_norm": 0.17500489950180054, "learning_rate": 7.1518e-05, "loss": 0.0114, "step": 35760 }, { "epoch": 638.75, "grad_norm": 0.17398259043693542, "learning_rate": 7.1538e-05, "loss": 0.0121, "step": 35770 }, { "epoch": 638.9285714285714, "grad_norm": 0.17188967764377594, "learning_rate": 7.155800000000001e-05, "loss": 0.0117, "step": 35780 }, { "epoch": 639.1071428571429, "grad_norm": 0.24489906430244446, "learning_rate": 7.1578e-05, "loss": 0.0116, "step": 35790 }, { "epoch": 639.2857142857143, "grad_norm": 0.2440083622932434, "learning_rate": 7.1598e-05, "loss": 0.0121, "step": 35800 }, { "epoch": 639.4642857142857, "grad_norm": 0.21797975897789001, "learning_rate": 7.1618e-05, "loss": 0.0131, "step": 35810 }, { "epoch": 639.6428571428571, "grad_norm": 0.29448655247688293, "learning_rate": 7.163800000000001e-05, "loss": 0.0128, "step": 35820 }, { "epoch": 639.8214285714286, "grad_norm": 0.19223840534687042, "learning_rate": 7.1658e-05, "loss": 0.0114, "step": 35830 }, { "epoch": 640.0, "grad_norm": 0.23293046653270721, "learning_rate": 7.1678e-05, "loss": 0.0118, "step": 35840 }, { "epoch": 640.1785714285714, "grad_norm": 0.20988169312477112, "learning_rate": 7.1698e-05, "loss": 0.012, "step": 35850 }, { "epoch": 640.3571428571429, "grad_norm": 0.18778681755065918, "learning_rate": 7.171800000000001e-05, "loss": 0.0106, "step": 35860 }, { "epoch": 640.5357142857143, "grad_norm": 0.22858130931854248, "learning_rate": 7.173800000000001e-05, "loss": 0.0127, "step": 35870 }, { "epoch": 640.7142857142857, "grad_norm": 0.1425427943468094, "learning_rate": 7.1758e-05, "loss": 0.0096, "step": 35880 }, { "epoch": 640.8928571428571, "grad_norm": 0.22344441711902618, "learning_rate": 7.1778e-05, "loss": 0.0102, "step": 35890 }, { "epoch": 641.0714285714286, "grad_norm": 0.18301625549793243, "learning_rate": 7.179799999999999e-05, "loss": 0.011, "step": 35900 }, { "epoch": 641.25, "grad_norm": 0.20662106573581696, "learning_rate": 7.181800000000001e-05, "loss": 0.0121, "step": 35910 }, { "epoch": 641.4285714285714, "grad_norm": 0.11410978436470032, "learning_rate": 7.1838e-05, "loss": 0.0114, "step": 35920 }, { "epoch": 641.6071428571429, "grad_norm": 0.1877937912940979, "learning_rate": 7.1858e-05, "loss": 0.0101, "step": 35930 }, { "epoch": 641.7857142857143, "grad_norm": 0.175204798579216, "learning_rate": 7.1878e-05, "loss": 0.0102, "step": 35940 }, { "epoch": 641.9642857142857, "grad_norm": 0.18520772457122803, "learning_rate": 7.1898e-05, "loss": 0.0115, "step": 35950 }, { "epoch": 642.1428571428571, "grad_norm": 0.16644275188446045, "learning_rate": 7.191800000000001e-05, "loss": 0.0096, "step": 35960 }, { "epoch": 642.3214285714286, "grad_norm": 0.23529595136642456, "learning_rate": 7.1938e-05, "loss": 0.0127, "step": 35970 }, { "epoch": 642.5, "grad_norm": 0.28598278760910034, "learning_rate": 7.1958e-05, "loss": 0.0141, "step": 35980 }, { "epoch": 642.6785714285714, "grad_norm": 0.3149554431438446, "learning_rate": 7.1978e-05, "loss": 0.0117, "step": 35990 }, { "epoch": 642.8571428571429, "grad_norm": 0.21391679346561432, "learning_rate": 7.1998e-05, "loss": 0.014, "step": 36000 } ], "logging_steps": 10, "max_steps": 1000000, "num_input_tokens_seen": 0, "num_train_epochs": 17858, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }